001 /**
002 * Licensed to the Apache Software Foundation (ASF) under one
003 * or more contributor license agreements. See the NOTICE file
004 * distributed with this work for additional information
005 * regarding copyright ownership. The ASF licenses this file
006 * to you under the Apache License, Version 2.0 (the
007 * "License"); you may not use this file except in compliance
008 * with the License. You may obtain a copy of the License at
009 *
010 * http://www.apache.org/licenses/LICENSE-2.0
011 *
012 * Unless required by applicable law or agreed to in writing, software
013 * distributed under the License is distributed on an "AS IS" BASIS,
014 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
015 * See the License for the specific language governing permissions and
016 * limitations under the License.
017 */
018
019 package org.apache.hadoop.fs;
020
021 import java.io.IOException;
022 import java.net.URI;
023 import java.net.URISyntaxException;
024 import java.util.regex.Pattern;
025
026 import org.apache.avro.reflect.Stringable;
027 import org.apache.commons.lang.StringUtils;
028 import org.apache.hadoop.HadoopIllegalArgumentException;
029 import org.apache.hadoop.classification.InterfaceAudience;
030 import org.apache.hadoop.classification.InterfaceStability;
031 import org.apache.hadoop.conf.Configuration;
032
033 /** Names a file or directory in a {@link FileSystem}.
034 * Path strings use slash as the directory separator. A path string is
035 * absolute if it begins with a slash.
036 */
037 @Stringable
038 @InterfaceAudience.Public
039 @InterfaceStability.Stable
040 public class Path implements Comparable {
041
042 /** The directory separator, a slash. */
043 public static final String SEPARATOR = "/";
044 public static final char SEPARATOR_CHAR = '/';
045
046 public static final String CUR_DIR = ".";
047
048 public static final boolean WINDOWS
049 = System.getProperty("os.name").startsWith("Windows");
050
051 /**
052 * Pre-compiled regular expressions to detect path formats.
053 */
054 private static final Pattern hasUriScheme =
055 Pattern.compile("^[a-zA-Z][a-zA-Z0-9+-.]+:");
056 private static final Pattern hasDriveLetterSpecifier =
057 Pattern.compile("^/?[a-zA-Z]:");
058
059 private URI uri; // a hierarchical uri
060
061 /**
062 * Pathnames with scheme and relative path are illegal.
063 * @param path to be checked
064 */
065 void checkNotSchemeWithRelative() {
066 if (toUri().isAbsolute() && !isUriPathAbsolute()) {
067 throw new HadoopIllegalArgumentException(
068 "Unsupported name: has scheme but relative path-part");
069 }
070 }
071
072 void checkNotRelative() {
073 if (!isAbsolute() && toUri().getScheme() == null) {
074 throw new HadoopIllegalArgumentException("Path is relative");
075 }
076 }
077
078 public static Path getPathWithoutSchemeAndAuthority(Path path) {
079 // This code depends on Path.toString() to remove the leading slash before
080 // the drive specification on Windows.
081 Path newPath = path.isUriPathAbsolute() ?
082 new Path(null, null, path.toUri().getPath()) :
083 path;
084 return newPath;
085 }
086
087 /** Resolve a child path against a parent path. */
088 public Path(String parent, String child) {
089 this(new Path(parent), new Path(child));
090 }
091
092 /** Resolve a child path against a parent path. */
093 public Path(Path parent, String child) {
094 this(parent, new Path(child));
095 }
096
097 /** Resolve a child path against a parent path. */
098 public Path(String parent, Path child) {
099 this(new Path(parent), child);
100 }
101
102 /** Resolve a child path against a parent path. */
103 public Path(Path parent, Path child) {
104 // Add a slash to parent's path so resolution is compatible with URI's
105 URI parentUri = parent.uri;
106 String parentPath = parentUri.getPath();
107 if (!(parentPath.equals("/") || parentPath.isEmpty())) {
108 try {
109 parentUri = new URI(parentUri.getScheme(), parentUri.getAuthority(),
110 parentUri.getPath()+"/", null, parentUri.getFragment());
111 } catch (URISyntaxException e) {
112 throw new IllegalArgumentException(e);
113 }
114 }
115 URI resolved = parentUri.resolve(child.uri);
116 initialize(resolved.getScheme(), resolved.getAuthority(),
117 resolved.getPath(), resolved.getFragment());
118 }
119
120 private void checkPathArg( String path ) throws IllegalArgumentException {
121 // disallow construction of a Path from an empty string
122 if ( path == null ) {
123 throw new IllegalArgumentException(
124 "Can not create a Path from a null string");
125 }
126 if( path.length() == 0 ) {
127 throw new IllegalArgumentException(
128 "Can not create a Path from an empty string");
129 }
130 }
131
132 /** Construct a path from a String. Path strings are URIs, but with
133 * unescaped elements and some additional normalization. */
134 public Path(String pathString) throws IllegalArgumentException {
135 checkPathArg( pathString );
136
137 // We can't use 'new URI(String)' directly, since it assumes things are
138 // escaped, which we don't require of Paths.
139
140 // add a slash in front of paths with Windows drive letters
141 if (hasWindowsDrive(pathString) && pathString.charAt(0) != '/') {
142 pathString = "/" + pathString;
143 }
144
145 // parse uri components
146 String scheme = null;
147 String authority = null;
148
149 int start = 0;
150
151 // parse uri scheme, if any
152 int colon = pathString.indexOf(':');
153 int slash = pathString.indexOf('/');
154 if ((colon != -1) &&
155 ((slash == -1) || (colon < slash))) { // has a scheme
156 scheme = pathString.substring(0, colon);
157 start = colon+1;
158 }
159
160 // parse uri authority, if any
161 if (pathString.startsWith("//", start) &&
162 (pathString.length()-start > 2)) { // has authority
163 int nextSlash = pathString.indexOf('/', start+2);
164 int authEnd = nextSlash > 0 ? nextSlash : pathString.length();
165 authority = pathString.substring(start+2, authEnd);
166 start = authEnd;
167 }
168
169 // uri path is the rest of the string -- query & fragment not supported
170 String path = pathString.substring(start, pathString.length());
171
172 initialize(scheme, authority, path, null);
173 }
174
175 /**
176 * Construct a path from a URI
177 */
178 public Path(URI aUri) {
179 uri = aUri.normalize();
180 }
181
182 /** Construct a Path from components. */
183 public Path(String scheme, String authority, String path) {
184 checkPathArg( path );
185
186 // add a slash in front of paths with Windows drive letters
187 if (hasWindowsDrive(path) && path.charAt(0) != '/') {
188 path = "/" + path;
189 }
190
191 // add "./" in front of Linux relative paths so that a path containing
192 // a colon e.q. "a:b" will not be interpreted as scheme "a".
193 if (!WINDOWS && path.charAt(0) != '/') {
194 path = "./" + path;
195 }
196
197 initialize(scheme, authority, path, null);
198 }
199
200 private void initialize(String scheme, String authority, String path,
201 String fragment) {
202 try {
203 this.uri = new URI(scheme, authority, normalizePath(scheme, path), null, fragment)
204 .normalize();
205 } catch (URISyntaxException e) {
206 throw new IllegalArgumentException(e);
207 }
208 }
209
210 /**
211 * Merge 2 paths such that the second path is appended relative to the first.
212 * The returned path has the scheme and authority of the first path. On
213 * Windows, the drive specification in the second path is discarded.
214 *
215 * @param path1 Path first path
216 * @param path2 Path second path, to be appended relative to path1
217 * @return Path merged path
218 */
219 public static Path mergePaths(Path path1, Path path2) {
220 String path2Str = path2.toUri().getPath();
221 path2Str = path2Str.substring(startPositionWithoutWindowsDrive(path2Str));
222 // Add path components explicitly, because simply concatenating two path
223 // string is not safe, for example:
224 // "/" + "/foo" yields "//foo", which will be parsed as authority in Path
225 return new Path(path1.toUri().getScheme(),
226 path1.toUri().getAuthority(),
227 path1.toUri().getPath() + path2Str);
228 }
229
230 /**
231 * Normalize a path string to use non-duplicated forward slashes as
232 * the path separator and remove any trailing path separators.
233 * @param scheme Supplies the URI scheme. Used to deduce whether we
234 * should replace backslashes or not.
235 * @param path Supplies the scheme-specific part
236 * @return Normalized path string.
237 */
238 private static String normalizePath(String scheme, String path) {
239 // Remove double forward slashes.
240 path = StringUtils.replace(path, "//", "/");
241
242 // Remove backslashes if this looks like a Windows path. Avoid
243 // the substitution if it looks like a non-local URI.
244 if (WINDOWS &&
245 (hasWindowsDrive(path) ||
246 (scheme == null) ||
247 (scheme.isEmpty()) ||
248 (scheme.equals("file")))) {
249 path = StringUtils.replace(path, "\\", "/");
250 }
251
252 // trim trailing slash from non-root path (ignoring windows drive)
253 int minLength = startPositionWithoutWindowsDrive(path) + 1;
254 if (path.length() > minLength && path.endsWith(SEPARATOR)) {
255 path = path.substring(0, path.length()-1);
256 }
257
258 return path;
259 }
260
261 private static boolean hasWindowsDrive(String path) {
262 return (WINDOWS && hasDriveLetterSpecifier.matcher(path).find());
263 }
264
265 private static int startPositionWithoutWindowsDrive(String path) {
266 if (hasWindowsDrive(path)) {
267 return path.charAt(0) == SEPARATOR_CHAR ? 3 : 2;
268 } else {
269 return 0;
270 }
271 }
272
273 /**
274 * Determine whether a given path string represents an absolute path on
275 * Windows. e.g. "C:/a/b" is an absolute path. "C:a/b" is not.
276 *
277 * @param pathString Supplies the path string to evaluate.
278 * @param slashed true if the given path is prefixed with "/".
279 * @return true if the supplied path looks like an absolute path with a Windows
280 * drive-specifier.
281 */
282 public static boolean isWindowsAbsolutePath(final String pathString,
283 final boolean slashed) {
284 int start = startPositionWithoutWindowsDrive(pathString);
285 return start > 0
286 && pathString.length() > start
287 && ((pathString.charAt(start) == SEPARATOR_CHAR) ||
288 (pathString.charAt(start) == '\\'));
289 }
290
291 /** Convert this to a URI. */
292 public URI toUri() { return uri; }
293
294 /** Return the FileSystem that owns this Path. */
295 public FileSystem getFileSystem(Configuration conf) throws IOException {
296 return FileSystem.get(this.toUri(), conf);
297 }
298
299 /**
300 * Is an absolute path (ie a slash relative path part)
301 * AND a scheme is null AND authority is null.
302 */
303 public boolean isAbsoluteAndSchemeAuthorityNull() {
304 return (isUriPathAbsolute() &&
305 uri.getScheme() == null && uri.getAuthority() == null);
306 }
307
308 /**
309 * True if the path component (i.e. directory) of this URI is absolute.
310 */
311 public boolean isUriPathAbsolute() {
312 int start = startPositionWithoutWindowsDrive(uri.getPath());
313 return uri.getPath().startsWith(SEPARATOR, start);
314 }
315
316 /** True if the path component of this URI is absolute. */
317 /**
318 * There is some ambiguity here. An absolute path is a slash
319 * relative name without a scheme or an authority.
320 * So either this method was incorrectly named or its
321 * implementation is incorrect. This method returns true
322 * even if there is a scheme and authority.
323 */
324 public boolean isAbsolute() {
325 return isUriPathAbsolute();
326 }
327
328 /**
329 * @return true if and only if this path represents the root of a file system
330 */
331 public boolean isRoot() {
332 return getParent() == null;
333 }
334
335 /** Returns the final component of this path.*/
336 public String getName() {
337 String path = uri.getPath();
338 int slash = path.lastIndexOf(SEPARATOR);
339 return path.substring(slash+1);
340 }
341
342 /** Returns the parent of a path or null if at root. */
343 public Path getParent() {
344 String path = uri.getPath();
345 int lastSlash = path.lastIndexOf('/');
346 int start = startPositionWithoutWindowsDrive(path);
347 if ((path.length() == start) || // empty path
348 (lastSlash == start && path.length() == start+1)) { // at root
349 return null;
350 }
351 String parent;
352 if (lastSlash==-1) {
353 parent = CUR_DIR;
354 } else {
355 parent = path.substring(0, lastSlash==start?start+1:lastSlash);
356 }
357 return new Path(uri.getScheme(), uri.getAuthority(), parent);
358 }
359
360 /** Adds a suffix to the final name in the path.*/
361 public Path suffix(String suffix) {
362 return new Path(getParent(), getName()+suffix);
363 }
364
365 @Override
366 public String toString() {
367 // we can't use uri.toString(), which escapes everything, because we want
368 // illegal characters unescaped in the string, for glob processing, etc.
369 StringBuilder buffer = new StringBuilder();
370 if (uri.getScheme() != null) {
371 buffer.append(uri.getScheme());
372 buffer.append(":");
373 }
374 if (uri.getAuthority() != null) {
375 buffer.append("//");
376 buffer.append(uri.getAuthority());
377 }
378 if (uri.getPath() != null) {
379 String path = uri.getPath();
380 if (path.indexOf('/')==0 &&
381 hasWindowsDrive(path) && // has windows drive
382 uri.getScheme() == null && // but no scheme
383 uri.getAuthority() == null) // or authority
384 path = path.substring(1); // remove slash before drive
385 buffer.append(path);
386 }
387 if (uri.getFragment() != null) {
388 buffer.append("#");
389 buffer.append(uri.getFragment());
390 }
391 return buffer.toString();
392 }
393
394 @Override
395 public boolean equals(Object o) {
396 if (!(o instanceof Path)) {
397 return false;
398 }
399 Path that = (Path)o;
400 return this.uri.equals(that.uri);
401 }
402
403 @Override
404 public int hashCode() {
405 return uri.hashCode();
406 }
407
408 @Override
409 public int compareTo(Object o) {
410 Path that = (Path)o;
411 return this.uri.compareTo(that.uri);
412 }
413
414 /** Return the number of elements in this path. */
415 public int depth() {
416 String path = uri.getPath();
417 int depth = 0;
418 int slash = path.length()==1 && path.charAt(0)=='/' ? -1 : 0;
419 while (slash != -1) {
420 depth++;
421 slash = path.indexOf(SEPARATOR, slash+1);
422 }
423 return depth;
424 }
425
426 /**
427 * Returns a qualified path object.
428 *
429 * Deprecated - use {@link #makeQualified(URI, Path)}
430 */
431 @Deprecated
432 public Path makeQualified(FileSystem fs) {
433 return makeQualified(fs.getUri(), fs.getWorkingDirectory());
434 }
435
436 /** Returns a qualified path object. */
437 @InterfaceAudience.LimitedPrivate({"HDFS", "MapReduce"})
438 public Path makeQualified(URI defaultUri, Path workingDir ) {
439 Path path = this;
440 if (!isAbsolute()) {
441 path = new Path(workingDir, this);
442 }
443
444 URI pathUri = path.toUri();
445
446 String scheme = pathUri.getScheme();
447 String authority = pathUri.getAuthority();
448 String fragment = pathUri.getFragment();
449
450 if (scheme != null &&
451 (authority != null || defaultUri.getAuthority() == null))
452 return path;
453
454 if (scheme == null) {
455 scheme = defaultUri.getScheme();
456 }
457
458 if (authority == null) {
459 authority = defaultUri.getAuthority();
460 if (authority == null) {
461 authority = "";
462 }
463 }
464
465 URI newUri = null;
466 try {
467 newUri = new URI(scheme, authority ,
468 normalizePath(scheme, pathUri.getPath()), null, fragment);
469 } catch (URISyntaxException e) {
470 throw new IllegalArgumentException(e);
471 }
472 return new Path(newUri);
473 }
474 }