001/**
002 * Licensed to the Apache Software Foundation (ASF) under one
003 * or more contributor license agreements.  See the NOTICE file
004 * distributed with this work for additional information
005 * regarding copyright ownership.  The ASF licenses this file
006 * to you under the Apache License, Version 2.0 (the
007 * "License"); you may not use this file except in compliance
008 * with the License.  You may obtain a copy of the License at
009 *
010 *     http://www.apache.org/licenses/LICENSE-2.0
011 *
012 * Unless required by applicable law or agreed to in writing, software
013 * distributed under the License is distributed on an "AS IS" BASIS,
014 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
015 * See the License for the specific language governing permissions and
016 * limitations under the License.
017 */
018
019package org.apache.hadoop.fs;
020
021import java.io.IOException;
022import java.net.URI;
023import java.net.URISyntaxException;
024import java.util.regex.Pattern;
025
026import org.apache.avro.reflect.Stringable;
027import org.apache.commons.lang.StringUtils;
028import org.apache.hadoop.HadoopIllegalArgumentException;
029import org.apache.hadoop.classification.InterfaceAudience;
030import org.apache.hadoop.classification.InterfaceStability;
031import org.apache.hadoop.conf.Configuration;
032
033/**
034 * Names a file or directory in a {@link FileSystem}.
035 * Path strings use slash as the directory separator.
036 */
037@Stringable
038@InterfaceAudience.Public
039@InterfaceStability.Stable
040public class Path implements Comparable {
041
042  /**
043   * The directory separator, a slash.
044   */
045  public static final String SEPARATOR = "/";
046
047  /**
048   * The directory separator, a slash, as a character.
049   */
050  public static final char SEPARATOR_CHAR = '/';
051  
052  /**
053   * The current directory, ".".
054   */
055  public static final String CUR_DIR = ".";
056  
057  /**
058   * Whether the current host is a Windows machine.
059   */
060  public static final boolean WINDOWS =
061      System.getProperty("os.name").startsWith("Windows");
062
063  /**
064   *  Pre-compiled regular expressions to detect path formats.
065   */
066  private static final Pattern HAS_DRIVE_LETTER_SPECIFIER =
067      Pattern.compile("^/?[a-zA-Z]:");
068
069  private URI uri; // a hierarchical uri
070
071  /**
072   * Test whether this Path uses a scheme and is relative.
073   * Pathnames with scheme and relative path are illegal.
074   */
075  void checkNotSchemeWithRelative() {
076    if (toUri().isAbsolute() && !isUriPathAbsolute()) {
077      throw new HadoopIllegalArgumentException(
078          "Unsupported name: has scheme but relative path-part");
079    }
080  }
081
082  void checkNotRelative() {
083    if (!isAbsolute() && toUri().getScheme() == null) {
084      throw new HadoopIllegalArgumentException("Path is relative");
085    }
086  }
087
088  /**
089   * Return a version of the given Path without the scheme information.
090   *
091   * @param path the source Path
092   * @return a copy of this Path without the scheme information
093   */
094  public static Path getPathWithoutSchemeAndAuthority(Path path) {
095    // This code depends on Path.toString() to remove the leading slash before
096    // the drive specification on Windows.
097    Path newPath = path.isUriPathAbsolute() ?
098      new Path(null, null, path.toUri().getPath()) :
099      path;
100    return newPath;
101  }
102
103  /**
104   * Create a new Path based on the child path resolved against the parent path.
105   *
106   * @param parent the parent path
107   * @param child the child path
108   */
109  public Path(String parent, String child) {
110    this(new Path(parent), new Path(child));
111  }
112
113  /**
114   * Create a new Path based on the child path resolved against the parent path.
115   *
116   * @param parent the parent path
117   * @param child the child path
118   */
119  public Path(Path parent, String child) {
120    this(parent, new Path(child));
121  }
122
123  /**
124   * Create a new Path based on the child path resolved against the parent path.
125   *
126   * @param parent the parent path
127   * @param child the child path
128   */
129  public Path(String parent, Path child) {
130    this(new Path(parent), child);
131  }
132
133  /**
134   * Create a new Path based on the child path resolved against the parent path.
135   *
136   * @param parent the parent path
137   * @param child the child path
138   */
139  public Path(Path parent, Path child) {
140    // Add a slash to parent's path so resolution is compatible with URI's
141    URI parentUri = parent.uri;
142    String parentPath = parentUri.getPath();
143    if (!(parentPath.equals("/") || parentPath.isEmpty())) {
144      try {
145        parentUri = new URI(parentUri.getScheme(), parentUri.getAuthority(),
146                      parentUri.getPath()+"/", null, parentUri.getFragment());
147      } catch (URISyntaxException e) {
148        throw new IllegalArgumentException(e);
149      }
150    }
151    URI resolved = parentUri.resolve(child.uri);
152    initialize(resolved.getScheme(), resolved.getAuthority(),
153               resolved.getPath(), resolved.getFragment());
154  }
155
156  private void checkPathArg( String path ) throws IllegalArgumentException {
157    // disallow construction of a Path from an empty string
158    if ( path == null ) {
159      throw new IllegalArgumentException(
160          "Can not create a Path from a null string");
161    }
162    if( path.length() == 0 ) {
163       throw new IllegalArgumentException(
164           "Can not create a Path from an empty string");
165    }   
166  }
167  
168  /**
169   * Construct a path from a String.  Path strings are URIs, but with
170   * unescaped elements and some additional normalization.
171   *
172   * @param pathString the path string
173   */
174  public Path(String pathString) throws IllegalArgumentException {
175    checkPathArg( pathString );
176    
177    // We can't use 'new URI(String)' directly, since it assumes things are
178    // escaped, which we don't require of Paths. 
179    
180    // add a slash in front of paths with Windows drive letters
181    if (hasWindowsDrive(pathString) && pathString.charAt(0) != '/') {
182      pathString = "/" + pathString;
183    }
184
185    // parse uri components
186    String scheme = null;
187    String authority = null;
188
189    int start = 0;
190
191    // parse uri scheme, if any
192    int colon = pathString.indexOf(':');
193    int slash = pathString.indexOf('/');
194    if ((colon != -1) &&
195        ((slash == -1) || (colon < slash))) {     // has a scheme
196      scheme = pathString.substring(0, colon);
197      start = colon+1;
198    }
199
200    // parse uri authority, if any
201    if (pathString.startsWith("//", start) &&
202        (pathString.length()-start > 2)) {       // has authority
203      int nextSlash = pathString.indexOf('/', start+2);
204      int authEnd = nextSlash > 0 ? nextSlash : pathString.length();
205      authority = pathString.substring(start+2, authEnd);
206      start = authEnd;
207    }
208
209    // uri path is the rest of the string -- query & fragment not supported
210    String path = pathString.substring(start, pathString.length());
211
212    initialize(scheme, authority, path, null);
213  }
214
215  /**
216   * Construct a path from a URI
217   *
218   * @param aUri the source URI
219   */
220  public Path(URI aUri) {
221    uri = aUri.normalize();
222  }
223  
224  /**
225   * Construct a Path from components.
226   *
227   * @param scheme the scheme
228   * @param authority the authority
229   * @param path the path
230   */
231  public Path(String scheme, String authority, String path) {
232    checkPathArg( path );
233
234    // add a slash in front of paths with Windows drive letters
235    if (hasWindowsDrive(path) && path.charAt(0) != '/') {
236      path = "/" + path;
237    }
238
239    // add "./" in front of Linux relative paths so that a path containing
240    // a colon e.q. "a:b" will not be interpreted as scheme "a".
241    if (!WINDOWS && path.charAt(0) != '/') {
242      path = "./" + path;
243    }
244
245    initialize(scheme, authority, path, null);
246  }
247
248  private void initialize(String scheme, String authority, String path,
249      String fragment) {
250    try {
251      this.uri = new URI(scheme, authority, normalizePath(scheme, path), null, fragment)
252        .normalize();
253    } catch (URISyntaxException e) {
254      throw new IllegalArgumentException(e);
255    }
256  }
257
258  /**
259   * Merge 2 paths such that the second path is appended relative to the first.
260   * The returned path has the scheme and authority of the first path.  On
261   * Windows, the drive specification in the second path is discarded.
262   * 
263   * @param path1 the first path
264   * @param path2 the second path, to be appended relative to path1
265   * @return the merged path
266   */
267  public static Path mergePaths(Path path1, Path path2) {
268    String path2Str = path2.toUri().getPath();
269    path2Str = path2Str.substring(startPositionWithoutWindowsDrive(path2Str));
270    // Add path components explicitly, because simply concatenating two path
271    // string is not safe, for example:
272    // "/" + "/foo" yields "//foo", which will be parsed as authority in Path
273    return new Path(path1.toUri().getScheme(), 
274        path1.toUri().getAuthority(), 
275        path1.toUri().getPath() + path2Str);
276  }
277
278  /**
279   * Normalize a path string to use non-duplicated forward slashes as
280   * the path separator and remove any trailing path separators.
281   *
282   * @param scheme the URI scheme. Used to deduce whether we
283   * should replace backslashes or not
284   * @param path the scheme-specific part
285   * @return the normalized path string
286   */
287  private static String normalizePath(String scheme, String path) {
288    // Remove double forward slashes.
289    path = StringUtils.replace(path, "//", "/");
290
291    // Remove backslashes if this looks like a Windows path. Avoid
292    // the substitution if it looks like a non-local URI.
293    if (WINDOWS &&
294        (hasWindowsDrive(path) ||
295         (scheme == null) ||
296         (scheme.isEmpty()) ||
297         (scheme.equals("file")))) {
298      path = StringUtils.replace(path, "\\", "/");
299    }
300    
301    // trim trailing slash from non-root path (ignoring windows drive)
302    int minLength = startPositionWithoutWindowsDrive(path) + 1;
303    if (path.length() > minLength && path.endsWith(SEPARATOR)) {
304      path = path.substring(0, path.length()-1);
305    }
306    
307    return path;
308  }
309
310  private static boolean hasWindowsDrive(String path) {
311    return (WINDOWS && HAS_DRIVE_LETTER_SPECIFIER.matcher(path).find());
312  }
313
314  private static int startPositionWithoutWindowsDrive(String path) {
315    if (hasWindowsDrive(path)) {
316      return path.charAt(0) ==  SEPARATOR_CHAR ? 3 : 2;
317    } else {
318      return 0;
319    }
320  }
321  
322  /**
323   * Determine whether a given path string represents an absolute path on
324   * Windows. e.g. "C:/a/b" is an absolute path. "C:a/b" is not.
325   *
326   * @param pathString the path string to evaluate
327   * @param slashed true if the given path is prefixed with "/"
328   * @return true if the supplied path looks like an absolute path with a Windows
329   * drive-specifier
330   */
331  public static boolean isWindowsAbsolutePath(final String pathString,
332                                              final boolean slashed) {
333    int start = startPositionWithoutWindowsDrive(pathString);
334    return start > 0
335        && pathString.length() > start
336        && ((pathString.charAt(start) == SEPARATOR_CHAR) ||
337            (pathString.charAt(start) == '\\'));
338  }
339
340  /**
341   * Convert this Path to a URI.
342   *
343   * @return this Path as a URI
344   */
345  public URI toUri() { return uri; }
346
347  /**
348   * Return the FileSystem that owns this Path.
349   *
350   * @param conf the configuration to use when resolving the FileSystem
351   * @return the FileSystem that owns this Path
352   * @throws java.io.IOException thrown if there's an issue resolving the
353   * FileSystem
354   */
355  public FileSystem getFileSystem(Configuration conf) throws IOException {
356    return FileSystem.get(this.toUri(), conf);
357  }
358
359  /**
360   * Returns true if the path component (i.e. directory) of this URI is
361   * absolute <strong>and</strong> the scheme is null, <b>and</b> the authority
362   * is null.
363   *
364   * @return whether the path is absolute and the URI has no scheme nor
365   * authority parts
366   */
367  public boolean isAbsoluteAndSchemeAuthorityNull() {
368    return  (isUriPathAbsolute() && 
369        uri.getScheme() == null && uri.getAuthority() == null);
370  }
371  
372  /**
373   * Returns true if the path component (i.e. directory) of this URI is
374   * absolute.
375   *
376   * @return whether this URI's path is absolute
377   */
378  public boolean isUriPathAbsolute() {
379    int start = startPositionWithoutWindowsDrive(uri.getPath());
380    return uri.getPath().startsWith(SEPARATOR, start);
381   }
382  
383  /**
384   * Returns true if the path component (i.e. directory) of this URI is
385   * absolute.  This method is a wrapper for {@link #isUriPathAbsolute()}.
386   *
387   * @return whether this URI's path is absolute
388   */
389  public boolean isAbsolute() {
390     return isUriPathAbsolute();
391  }
392
393  /**
394   * Returns true if and only if this path represents the root of a file system.
395   *
396   * @return true if and only if this path represents the root of a file system
397   */
398  public boolean isRoot() {
399    return getParent() == null;
400  }
401
402  /**
403   * Returns the final component of this path.
404   *
405   * @return the final component of this path
406   */
407  public String getName() {
408    String path = uri.getPath();
409    int slash = path.lastIndexOf(SEPARATOR);
410    return path.substring(slash+1);
411  }
412
413  /**
414   * Returns the parent of a path or null if at root.
415   * @return the parent of a path or null if at root
416   */
417  public Path getParent() {
418    String path = uri.getPath();
419    int lastSlash = path.lastIndexOf('/');
420    int start = startPositionWithoutWindowsDrive(path);
421    if ((path.length() == start) ||               // empty path
422        (lastSlash == start && path.length() == start+1)) { // at root
423      return null;
424    }
425    String parent;
426    if (lastSlash==-1) {
427      parent = CUR_DIR;
428    } else {
429      parent = path.substring(0, lastSlash==start?start+1:lastSlash);
430    }
431    return new Path(uri.getScheme(), uri.getAuthority(), parent);
432  }
433
434  /**
435   * Adds a suffix to the final name in the path.
436   *
437   * @param suffix the suffix to add
438   * @return a new path with the suffix added
439   */
440  public Path suffix(String suffix) {
441    return new Path(getParent(), getName()+suffix);
442  }
443
444  @Override
445  public String toString() {
446    // we can't use uri.toString(), which escapes everything, because we want
447    // illegal characters unescaped in the string, for glob processing, etc.
448    StringBuilder buffer = new StringBuilder();
449    if (uri.getScheme() != null) {
450      buffer.append(uri.getScheme());
451      buffer.append(":");
452    }
453    if (uri.getAuthority() != null) {
454      buffer.append("//");
455      buffer.append(uri.getAuthority());
456    }
457    if (uri.getPath() != null) {
458      String path = uri.getPath();
459      if (path.indexOf('/')==0 &&
460          hasWindowsDrive(path) &&                // has windows drive
461          uri.getScheme() == null &&              // but no scheme
462          uri.getAuthority() == null)             // or authority
463        path = path.substring(1);                 // remove slash before drive
464      buffer.append(path);
465    }
466    if (uri.getFragment() != null) {
467      buffer.append("#");
468      buffer.append(uri.getFragment());
469    }
470    return buffer.toString();
471  }
472
473  @Override
474  public boolean equals(Object o) {
475    if (!(o instanceof Path)) {
476      return false;
477    }
478    Path that = (Path)o;
479    return this.uri.equals(that.uri);
480  }
481
482  @Override
483  public int hashCode() {
484    return uri.hashCode();
485  }
486
487  @Override
488  public int compareTo(Object o) {
489    Path that = (Path)o;
490    return this.uri.compareTo(that.uri);
491  }
492  
493  /**
494   * Returns the number of elements in this path.
495   * @return the number of elements in this path
496   */
497  public int depth() {
498    String path = uri.getPath();
499    int depth = 0;
500    int slash = path.length()==1 && path.charAt(0)=='/' ? -1 : 0;
501    while (slash != -1) {
502      depth++;
503      slash = path.indexOf(SEPARATOR, slash+1);
504    }
505    return depth;
506  }
507
508  /**
509   * Returns a qualified path object for the {@link FileSystem}'s working
510   * directory.
511   *  
512   * @param fs the target FileSystem
513   * @return a qualified path object for the FileSystem's working directory
514   * @deprecated use {@link #makeQualified(URI, Path)}
515   */
516  @Deprecated
517  public Path makeQualified(FileSystem fs) {
518    return makeQualified(fs.getUri(), fs.getWorkingDirectory());
519  }
520  
521  /**
522   * Returns a qualified path object.
523   *
524   * @param defaultUri if this path is missing the scheme or authority
525   * components, borrow them from this URI
526   * @param workingDir if this path isn't absolute, treat it as relative to this
527   * working directory
528   * @return this path if it contains a scheme and authority and is absolute, or
529   * a new path that includes a path and authority and is fully qualified
530   */
531  @InterfaceAudience.LimitedPrivate({"HDFS", "MapReduce"})
532  public Path makeQualified(URI defaultUri, Path workingDir ) {
533    Path path = this;
534    if (!isAbsolute()) {
535      path = new Path(workingDir, this);
536    }
537
538    URI pathUri = path.toUri();
539      
540    String scheme = pathUri.getScheme();
541    String authority = pathUri.getAuthority();
542    String fragment = pathUri.getFragment();
543
544    if (scheme != null &&
545        (authority != null || defaultUri.getAuthority() == null))
546      return path;
547
548    if (scheme == null) {
549      scheme = defaultUri.getScheme();
550    }
551
552    if (authority == null) {
553      authority = defaultUri.getAuthority();
554      if (authority == null) {
555        authority = "";
556      }
557    }
558    
559    URI newUri = null;
560    try {
561      newUri = new URI(scheme, authority , 
562        normalizePath(scheme, pathUri.getPath()), null, fragment);
563    } catch (URISyntaxException e) {
564      throw new IllegalArgumentException(e);
565    }
566    return new Path(newUri);
567  }
568}