001/**
002 * Licensed to the Apache Software Foundation (ASF) under one
003 * or more contributor license agreements.  See the NOTICE file
004 * distributed with this work for additional information
005 * regarding copyright ownership.  The ASF licenses this file
006 * to you under the Apache License, Version 2.0 (the
007 * "License"); you may not use this file except in compliance
008 * with the License.  You may obtain a copy of the License at
009 *
010 *     http://www.apache.org/licenses/LICENSE-2.0
011 *
012 * Unless required by applicable law or agreed to in writing, software
013 * distributed under the License is distributed on an "AS IS" BASIS,
014 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
015 * See the License for the specific language governing permissions and
016 * limitations under the License.
017 */
018
019package org.apache.hadoop.fs;
020
021import java.io.IOException;
022import java.net.URI;
023import java.net.URISyntaxException;
024import java.util.regex.Pattern;
025
026import org.apache.avro.reflect.Stringable;
027import org.apache.commons.lang.StringUtils;
028import org.apache.hadoop.HadoopIllegalArgumentException;
029import org.apache.hadoop.classification.InterfaceAudience;
030import org.apache.hadoop.classification.InterfaceStability;
031import org.apache.hadoop.conf.Configuration;
032
033/** Names a file or directory in a {@link FileSystem}.
034 * Path strings use slash as the directory separator.  A path string is
035 * absolute if it begins with a slash.
036 */
037@Stringable
038@InterfaceAudience.Public
039@InterfaceStability.Stable
040public class Path implements Comparable {
041
042  /** The directory separator, a slash. */
043  public static final String SEPARATOR = "/";
044  public static final char SEPARATOR_CHAR = '/';
045  
046  public static final String CUR_DIR = ".";
047  
048  public static final boolean WINDOWS
049    = System.getProperty("os.name").startsWith("Windows");
050
051  /**
052   *  Pre-compiled regular expressions to detect path formats.
053   */
054  private static final Pattern hasUriScheme =
055      Pattern.compile("^[a-zA-Z][a-zA-Z0-9+-.]+:");
056  private static final Pattern hasDriveLetterSpecifier =
057      Pattern.compile("^/?[a-zA-Z]:");
058
059  private URI uri;                                // a hierarchical uri
060
061  /**
062   * Pathnames with scheme and relative path are illegal.
063   * @param path to be checked
064   */
065  void checkNotSchemeWithRelative() {
066    if (toUri().isAbsolute() && !isUriPathAbsolute()) {
067      throw new HadoopIllegalArgumentException(
068          "Unsupported name: has scheme but relative path-part");
069    }
070  }
071
072  void checkNotRelative() {
073    if (!isAbsolute() && toUri().getScheme() == null) {
074      throw new HadoopIllegalArgumentException("Path is relative");
075    }
076  }
077
078  public static Path getPathWithoutSchemeAndAuthority(Path path) {
079    // This code depends on Path.toString() to remove the leading slash before
080    // the drive specification on Windows.
081    Path newPath = path.isUriPathAbsolute() ?
082      new Path(null, null, path.toUri().getPath()) :
083      path;
084    return newPath;
085  }
086
087  /** Resolve a child path against a parent path. */
088  public Path(String parent, String child) {
089    this(new Path(parent), new Path(child));
090  }
091
092  /** Resolve a child path against a parent path. */
093  public Path(Path parent, String child) {
094    this(parent, new Path(child));
095  }
096
097  /** Resolve a child path against a parent path. */
098  public Path(String parent, Path child) {
099    this(new Path(parent), child);
100  }
101
102  /** Resolve a child path against a parent path. */
103  public Path(Path parent, Path child) {
104    // Add a slash to parent's path so resolution is compatible with URI's
105    URI parentUri = parent.uri;
106    String parentPath = parentUri.getPath();
107    if (!(parentPath.equals("/") || parentPath.isEmpty())) {
108      try {
109        parentUri = new URI(parentUri.getScheme(), parentUri.getAuthority(),
110                      parentUri.getPath()+"/", null, parentUri.getFragment());
111      } catch (URISyntaxException e) {
112        throw new IllegalArgumentException(e);
113      }
114    }
115    URI resolved = parentUri.resolve(child.uri);
116    initialize(resolved.getScheme(), resolved.getAuthority(),
117               resolved.getPath(), resolved.getFragment());
118  }
119
120  private void checkPathArg( String path ) throws IllegalArgumentException {
121    // disallow construction of a Path from an empty string
122    if ( path == null ) {
123      throw new IllegalArgumentException(
124          "Can not create a Path from a null string");
125    }
126    if( path.length() == 0 ) {
127       throw new IllegalArgumentException(
128           "Can not create a Path from an empty string");
129    }   
130  }
131  
132  /** Construct a path from a String.  Path strings are URIs, but with
133   * unescaped elements and some additional normalization. */
134  public Path(String pathString) throws IllegalArgumentException {
135    checkPathArg( pathString );
136    
137    // We can't use 'new URI(String)' directly, since it assumes things are
138    // escaped, which we don't require of Paths. 
139    
140    // add a slash in front of paths with Windows drive letters
141    if (hasWindowsDrive(pathString) && pathString.charAt(0) != '/') {
142      pathString = "/" + pathString;
143    }
144
145    // parse uri components
146    String scheme = null;
147    String authority = null;
148
149    int start = 0;
150
151    // parse uri scheme, if any
152    int colon = pathString.indexOf(':');
153    int slash = pathString.indexOf('/');
154    if ((colon != -1) &&
155        ((slash == -1) || (colon < slash))) {     // has a scheme
156      scheme = pathString.substring(0, colon);
157      start = colon+1;
158    }
159
160    // parse uri authority, if any
161    if (pathString.startsWith("//", start) &&
162        (pathString.length()-start > 2)) {       // has authority
163      int nextSlash = pathString.indexOf('/', start+2);
164      int authEnd = nextSlash > 0 ? nextSlash : pathString.length();
165      authority = pathString.substring(start+2, authEnd);
166      start = authEnd;
167    }
168
169    // uri path is the rest of the string -- query & fragment not supported
170    String path = pathString.substring(start, pathString.length());
171
172    initialize(scheme, authority, path, null);
173  }
174
175  /**
176   * Construct a path from a URI
177   */
178  public Path(URI aUri) {
179    uri = aUri.normalize();
180  }
181  
182  /** Construct a Path from components. */
183  public Path(String scheme, String authority, String path) {
184    checkPathArg( path );
185
186    // add a slash in front of paths with Windows drive letters
187    if (hasWindowsDrive(path) && path.charAt(0) != '/') {
188      path = "/" + path;
189    }
190
191    // add "./" in front of Linux relative paths so that a path containing
192    // a colon e.q. "a:b" will not be interpreted as scheme "a".
193    if (!WINDOWS && path.charAt(0) != '/') {
194      path = "./" + path;
195    }
196
197    initialize(scheme, authority, path, null);
198  }
199
200  private void initialize(String scheme, String authority, String path,
201      String fragment) {
202    try {
203      this.uri = new URI(scheme, authority, normalizePath(scheme, path), null, fragment)
204        .normalize();
205    } catch (URISyntaxException e) {
206      throw new IllegalArgumentException(e);
207    }
208  }
209
210  /**
211   * Merge 2 paths such that the second path is appended relative to the first.
212   * The returned path has the scheme and authority of the first path.  On
213   * Windows, the drive specification in the second path is discarded.
214   * 
215   * @param path1 Path first path
216   * @param path2 Path second path, to be appended relative to path1
217   * @return Path merged path
218   */
219  public static Path mergePaths(Path path1, Path path2) {
220    String path2Str = path2.toUri().getPath();
221    path2Str = path2Str.substring(startPositionWithoutWindowsDrive(path2Str));
222    // Add path components explicitly, because simply concatenating two path
223    // string is not safe, for example:
224    // "/" + "/foo" yields "//foo", which will be parsed as authority in Path
225    return new Path(path1.toUri().getScheme(), 
226        path1.toUri().getAuthority(), 
227        path1.toUri().getPath() + path2Str);
228  }
229
230  /**
231   * Normalize a path string to use non-duplicated forward slashes as
232   * the path separator and remove any trailing path separators.
233   * @param scheme Supplies the URI scheme. Used to deduce whether we
234   *               should replace backslashes or not.
235   * @param path Supplies the scheme-specific part
236   * @return Normalized path string.
237   */
238  private static String normalizePath(String scheme, String path) {
239    // Remove double forward slashes.
240    path = StringUtils.replace(path, "//", "/");
241
242    // Remove backslashes if this looks like a Windows path. Avoid
243    // the substitution if it looks like a non-local URI.
244    if (WINDOWS &&
245        (hasWindowsDrive(path) ||
246         (scheme == null) ||
247         (scheme.isEmpty()) ||
248         (scheme.equals("file")))) {
249      path = StringUtils.replace(path, "\\", "/");
250    }
251    
252    // trim trailing slash from non-root path (ignoring windows drive)
253    int minLength = startPositionWithoutWindowsDrive(path) + 1;
254    if (path.length() > minLength && path.endsWith(SEPARATOR)) {
255      path = path.substring(0, path.length()-1);
256    }
257    
258    return path;
259  }
260
261  private static boolean hasWindowsDrive(String path) {
262    return (WINDOWS && hasDriveLetterSpecifier.matcher(path).find());
263  }
264
265  private static int startPositionWithoutWindowsDrive(String path) {
266    if (hasWindowsDrive(path)) {
267      return path.charAt(0) ==  SEPARATOR_CHAR ? 3 : 2;
268    } else {
269      return 0;
270    }
271  }
272  
273  /**
274   * Determine whether a given path string represents an absolute path on
275   * Windows. e.g. "C:/a/b" is an absolute path. "C:a/b" is not.
276   *
277   * @param pathString Supplies the path string to evaluate.
278   * @param slashed true if the given path is prefixed with "/".
279   * @return true if the supplied path looks like an absolute path with a Windows
280   * drive-specifier.
281   */
282  public static boolean isWindowsAbsolutePath(final String pathString,
283                                              final boolean slashed) {
284    int start = startPositionWithoutWindowsDrive(pathString);
285    return start > 0
286        && pathString.length() > start
287        && ((pathString.charAt(start) == SEPARATOR_CHAR) ||
288            (pathString.charAt(start) == '\\'));
289  }
290
291  /** Convert this to a URI. */
292  public URI toUri() { return uri; }
293
294  /** Return the FileSystem that owns this Path. */
295  public FileSystem getFileSystem(Configuration conf) throws IOException {
296    return FileSystem.get(this.toUri(), conf);
297  }
298
299  /**
300   * Is an absolute path (ie a slash relative path part)
301   *  AND  a scheme is null AND  authority is null.
302   */
303  public boolean isAbsoluteAndSchemeAuthorityNull() {
304    return  (isUriPathAbsolute() && 
305        uri.getScheme() == null && uri.getAuthority() == null);
306  }
307  
308  /**
309   *  True if the path component (i.e. directory) of this URI is absolute.
310   */
311  public boolean isUriPathAbsolute() {
312    int start = startPositionWithoutWindowsDrive(uri.getPath());
313    return uri.getPath().startsWith(SEPARATOR, start);
314   }
315  
316  /** True if the path component of this URI is absolute. */
317  /**
318   * There is some ambiguity here. An absolute path is a slash
319   * relative name without a scheme or an authority.
320   * So either this method was incorrectly named or its
321   * implementation is incorrect. This method returns true
322   * even if there is a scheme and authority.
323   */
324  public boolean isAbsolute() {
325     return isUriPathAbsolute();
326  }
327
328  /**
329   * @return true if and only if this path represents the root of a file system
330   */
331  public boolean isRoot() {
332    return getParent() == null;
333  }
334
335  /** Returns the final component of this path.*/
336  public String getName() {
337    String path = uri.getPath();
338    int slash = path.lastIndexOf(SEPARATOR);
339    return path.substring(slash+1);
340  }
341
342  /** Returns the parent of a path or null if at root. */
343  public Path getParent() {
344    String path = uri.getPath();
345    int lastSlash = path.lastIndexOf('/');
346    int start = startPositionWithoutWindowsDrive(path);
347    if ((path.length() == start) ||               // empty path
348        (lastSlash == start && path.length() == start+1)) { // at root
349      return null;
350    }
351    String parent;
352    if (lastSlash==-1) {
353      parent = CUR_DIR;
354    } else {
355      parent = path.substring(0, lastSlash==start?start+1:lastSlash);
356    }
357    return new Path(uri.getScheme(), uri.getAuthority(), parent);
358  }
359
360  /** Adds a suffix to the final name in the path.*/
361  public Path suffix(String suffix) {
362    return new Path(getParent(), getName()+suffix);
363  }
364
365  @Override
366  public String toString() {
367    // we can't use uri.toString(), which escapes everything, because we want
368    // illegal characters unescaped in the string, for glob processing, etc.
369    StringBuilder buffer = new StringBuilder();
370    if (uri.getScheme() != null) {
371      buffer.append(uri.getScheme());
372      buffer.append(":");
373    }
374    if (uri.getAuthority() != null) {
375      buffer.append("//");
376      buffer.append(uri.getAuthority());
377    }
378    if (uri.getPath() != null) {
379      String path = uri.getPath();
380      if (path.indexOf('/')==0 &&
381          hasWindowsDrive(path) &&                // has windows drive
382          uri.getScheme() == null &&              // but no scheme
383          uri.getAuthority() == null)             // or authority
384        path = path.substring(1);                 // remove slash before drive
385      buffer.append(path);
386    }
387    if (uri.getFragment() != null) {
388      buffer.append("#");
389      buffer.append(uri.getFragment());
390    }
391    return buffer.toString();
392  }
393
394  @Override
395  public boolean equals(Object o) {
396    if (!(o instanceof Path)) {
397      return false;
398    }
399    Path that = (Path)o;
400    return this.uri.equals(that.uri);
401  }
402
403  @Override
404  public int hashCode() {
405    return uri.hashCode();
406  }
407
408  @Override
409  public int compareTo(Object o) {
410    Path that = (Path)o;
411    return this.uri.compareTo(that.uri);
412  }
413  
414  /** Return the number of elements in this path. */
415  public int depth() {
416    String path = uri.getPath();
417    int depth = 0;
418    int slash = path.length()==1 && path.charAt(0)=='/' ? -1 : 0;
419    while (slash != -1) {
420      depth++;
421      slash = path.indexOf(SEPARATOR, slash+1);
422    }
423    return depth;
424  }
425
426  /**
427   *  Returns a qualified path object.
428   *  
429   *  Deprecated - use {@link #makeQualified(URI, Path)}
430   */
431  @Deprecated
432  public Path makeQualified(FileSystem fs) {
433    return makeQualified(fs.getUri(), fs.getWorkingDirectory());
434  }
435  
436  /** Returns a qualified path object. */
437  @InterfaceAudience.LimitedPrivate({"HDFS", "MapReduce"})
438  public Path makeQualified(URI defaultUri, Path workingDir ) {
439    Path path = this;
440    if (!isAbsolute()) {
441      path = new Path(workingDir, this);
442    }
443
444    URI pathUri = path.toUri();
445      
446    String scheme = pathUri.getScheme();
447    String authority = pathUri.getAuthority();
448    String fragment = pathUri.getFragment();
449
450    if (scheme != null &&
451        (authority != null || defaultUri.getAuthority() == null))
452      return path;
453
454    if (scheme == null) {
455      scheme = defaultUri.getScheme();
456    }
457
458    if (authority == null) {
459      authority = defaultUri.getAuthority();
460      if (authority == null) {
461        authority = "";
462      }
463    }
464    
465    URI newUri = null;
466    try {
467      newUri = new URI(scheme, authority , 
468        normalizePath(scheme, pathUri.getPath()), null, fragment);
469    } catch (URISyntaxException e) {
470      throw new IllegalArgumentException(e);
471    }
472    return new Path(newUri);
473  }
474}