001/**
002 * Licensed to the Apache Software Foundation (ASF) under one
003 * or more contributor license agreements.  See the NOTICE file
004 * distributed with this work for additional information
005 * regarding copyright ownership.  The ASF licenses this file
006 * to you under the Apache License, Version 2.0 (the
007 * "License"); you may not use this file except in compliance
008 * with the License.  You may obtain a copy of the License at
009 *
010 *     http://www.apache.org/licenses/LICENSE-2.0
011 *
012 * Unless required by applicable law or agreed to in writing, software
013 * distributed under the License is distributed on an "AS IS" BASIS,
014 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
015 * See the License for the specific language governing permissions and
016 * limitations under the License.
017 */
018
019package org.apache.hadoop.fs;
020
021import java.io.IOException;
022import java.net.URI;
023import java.net.URISyntaxException;
024import java.util.regex.Pattern;
025
026import org.apache.avro.reflect.Stringable;
027import org.apache.commons.lang.StringUtils;
028import org.apache.hadoop.HadoopIllegalArgumentException;
029import org.apache.hadoop.classification.InterfaceAudience;
030import org.apache.hadoop.classification.InterfaceStability;
031import org.apache.hadoop.conf.Configuration;
032
033/** Names a file or directory in a {@link FileSystem}.
034 * Path strings use slash as the directory separator.  A path string is
035 * absolute if it begins with a slash.
036 */
037@Stringable
038@InterfaceAudience.Public
039@InterfaceStability.Stable
040public class Path implements Comparable {
041
042  /** The directory separator, a slash. */
043  public static final String SEPARATOR = "/";
044  public static final char SEPARATOR_CHAR = '/';
045  
046  public static final String CUR_DIR = ".";
047  
048  public static final boolean WINDOWS
049    = System.getProperty("os.name").startsWith("Windows");
050
051  /**
052   *  Pre-compiled regular expressions to detect path formats.
053   */
054  private static final Pattern hasUriScheme =
055      Pattern.compile("^[a-zA-Z][a-zA-Z0-9+-.]+:");
056  private static final Pattern hasDriveLetterSpecifier =
057      Pattern.compile("^/?[a-zA-Z]:");
058
059  private URI uri;                                // a hierarchical uri
060
061  /**
062   * Pathnames with scheme and relative path are illegal.
063   */
064  void checkNotSchemeWithRelative() {
065    if (toUri().isAbsolute() && !isUriPathAbsolute()) {
066      throw new HadoopIllegalArgumentException(
067          "Unsupported name: has scheme but relative path-part");
068    }
069  }
070
071  void checkNotRelative() {
072    if (!isAbsolute() && toUri().getScheme() == null) {
073      throw new HadoopIllegalArgumentException("Path is relative");
074    }
075  }
076
077  public static Path getPathWithoutSchemeAndAuthority(Path path) {
078    // This code depends on Path.toString() to remove the leading slash before
079    // the drive specification on Windows.
080    Path newPath = path.isUriPathAbsolute() ?
081      new Path(null, null, path.toUri().getPath()) :
082      path;
083    return newPath;
084  }
085
086  /** Resolve a child path against a parent path. */
087  public Path(String parent, String child) {
088    this(new Path(parent), new Path(child));
089  }
090
091  /** Resolve a child path against a parent path. */
092  public Path(Path parent, String child) {
093    this(parent, new Path(child));
094  }
095
096  /** Resolve a child path against a parent path. */
097  public Path(String parent, Path child) {
098    this(new Path(parent), child);
099  }
100
101  /** Resolve a child path against a parent path. */
102  public Path(Path parent, Path child) {
103    // Add a slash to parent's path so resolution is compatible with URI's
104    URI parentUri = parent.uri;
105    String parentPath = parentUri.getPath();
106    if (!(parentPath.equals("/") || parentPath.isEmpty())) {
107      try {
108        parentUri = new URI(parentUri.getScheme(), parentUri.getAuthority(),
109                      parentUri.getPath()+"/", null, parentUri.getFragment());
110      } catch (URISyntaxException e) {
111        throw new IllegalArgumentException(e);
112      }
113    }
114    URI resolved = parentUri.resolve(child.uri);
115    initialize(resolved.getScheme(), resolved.getAuthority(),
116               resolved.getPath(), resolved.getFragment());
117  }
118
119  private void checkPathArg( String path ) throws IllegalArgumentException {
120    // disallow construction of a Path from an empty string
121    if ( path == null ) {
122      throw new IllegalArgumentException(
123          "Can not create a Path from a null string");
124    }
125    if( path.length() == 0 ) {
126       throw new IllegalArgumentException(
127           "Can not create a Path from an empty string");
128    }   
129  }
130  
131  /** Construct a path from a String.  Path strings are URIs, but with
132   * unescaped elements and some additional normalization. */
133  public Path(String pathString) throws IllegalArgumentException {
134    checkPathArg( pathString );
135    
136    // We can't use 'new URI(String)' directly, since it assumes things are
137    // escaped, which we don't require of Paths. 
138    
139    // add a slash in front of paths with Windows drive letters
140    if (hasWindowsDrive(pathString) && pathString.charAt(0) != '/') {
141      pathString = "/" + pathString;
142    }
143
144    // parse uri components
145    String scheme = null;
146    String authority = null;
147
148    int start = 0;
149
150    // parse uri scheme, if any
151    int colon = pathString.indexOf(':');
152    int slash = pathString.indexOf('/');
153    if ((colon != -1) &&
154        ((slash == -1) || (colon < slash))) {     // has a scheme
155      scheme = pathString.substring(0, colon);
156      start = colon+1;
157    }
158
159    // parse uri authority, if any
160    if (pathString.startsWith("//", start) &&
161        (pathString.length()-start > 2)) {       // has authority
162      int nextSlash = pathString.indexOf('/', start+2);
163      int authEnd = nextSlash > 0 ? nextSlash : pathString.length();
164      authority = pathString.substring(start+2, authEnd);
165      start = authEnd;
166    }
167
168    // uri path is the rest of the string -- query & fragment not supported
169    String path = pathString.substring(start, pathString.length());
170
171    initialize(scheme, authority, path, null);
172  }
173
174  /**
175   * Construct a path from a URI
176   */
177  public Path(URI aUri) {
178    uri = aUri.normalize();
179  }
180  
181  /** Construct a Path from components. */
182  public Path(String scheme, String authority, String path) {
183    checkPathArg( path );
184
185    // add a slash in front of paths with Windows drive letters
186    if (hasWindowsDrive(path) && path.charAt(0) != '/') {
187      path = "/" + path;
188    }
189
190    // add "./" in front of Linux relative paths so that a path containing
191    // a colon e.q. "a:b" will not be interpreted as scheme "a".
192    if (!WINDOWS && path.charAt(0) != '/') {
193      path = "./" + path;
194    }
195
196    initialize(scheme, authority, path, null);
197  }
198
199  private void initialize(String scheme, String authority, String path,
200      String fragment) {
201    try {
202      this.uri = new URI(scheme, authority, normalizePath(scheme, path), null, fragment)
203        .normalize();
204    } catch (URISyntaxException e) {
205      throw new IllegalArgumentException(e);
206    }
207  }
208
209  /**
210   * Merge 2 paths such that the second path is appended relative to the first.
211   * The returned path has the scheme and authority of the first path.  On
212   * Windows, the drive specification in the second path is discarded.
213   * 
214   * @param path1 Path first path
215   * @param path2 Path second path, to be appended relative to path1
216   * @return Path merged path
217   */
218  public static Path mergePaths(Path path1, Path path2) {
219    String path2Str = path2.toUri().getPath();
220    path2Str = path2Str.substring(startPositionWithoutWindowsDrive(path2Str));
221    // Add path components explicitly, because simply concatenating two path
222    // string is not safe, for example:
223    // "/" + "/foo" yields "//foo", which will be parsed as authority in Path
224    return new Path(path1.toUri().getScheme(), 
225        path1.toUri().getAuthority(), 
226        path1.toUri().getPath() + path2Str);
227  }
228
229  /**
230   * Normalize a path string to use non-duplicated forward slashes as
231   * the path separator and remove any trailing path separators.
232   * @param scheme Supplies the URI scheme. Used to deduce whether we
233   *               should replace backslashes or not.
234   * @param path Supplies the scheme-specific part
235   * @return Normalized path string.
236   */
237  private static String normalizePath(String scheme, String path) {
238    // Remove double forward slashes.
239    path = StringUtils.replace(path, "//", "/");
240
241    // Remove backslashes if this looks like a Windows path. Avoid
242    // the substitution if it looks like a non-local URI.
243    if (WINDOWS &&
244        (hasWindowsDrive(path) ||
245         (scheme == null) ||
246         (scheme.isEmpty()) ||
247         (scheme.equals("file")))) {
248      path = StringUtils.replace(path, "\\", "/");
249    }
250    
251    // trim trailing slash from non-root path (ignoring windows drive)
252    int minLength = startPositionWithoutWindowsDrive(path) + 1;
253    if (path.length() > minLength && path.endsWith(SEPARATOR)) {
254      path = path.substring(0, path.length()-1);
255    }
256    
257    return path;
258  }
259
260  private static boolean hasWindowsDrive(String path) {
261    return (WINDOWS && hasDriveLetterSpecifier.matcher(path).find());
262  }
263
264  private static int startPositionWithoutWindowsDrive(String path) {
265    if (hasWindowsDrive(path)) {
266      return path.charAt(0) ==  SEPARATOR_CHAR ? 3 : 2;
267    } else {
268      return 0;
269    }
270  }
271  
272  /**
273   * Determine whether a given path string represents an absolute path on
274   * Windows. e.g. "C:/a/b" is an absolute path. "C:a/b" is not.
275   *
276   * @param pathString Supplies the path string to evaluate.
277   * @param slashed true if the given path is prefixed with "/".
278   * @return true if the supplied path looks like an absolute path with a Windows
279   * drive-specifier.
280   */
281  public static boolean isWindowsAbsolutePath(final String pathString,
282                                              final boolean slashed) {
283    int start = startPositionWithoutWindowsDrive(pathString);
284    return start > 0
285        && pathString.length() > start
286        && ((pathString.charAt(start) == SEPARATOR_CHAR) ||
287            (pathString.charAt(start) == '\\'));
288  }
289
290  /** Convert this to a URI. */
291  public URI toUri() { return uri; }
292
293  /** Return the FileSystem that owns this Path. */
294  public FileSystem getFileSystem(Configuration conf) throws IOException {
295    return FileSystem.get(this.toUri(), conf);
296  }
297
298  /**
299   * Is an absolute path (ie a slash relative path part)
300   *  AND  a scheme is null AND  authority is null.
301   */
302  public boolean isAbsoluteAndSchemeAuthorityNull() {
303    return  (isUriPathAbsolute() && 
304        uri.getScheme() == null && uri.getAuthority() == null);
305  }
306  
307  /**
308   *  True if the path component (i.e. directory) of this URI is absolute.
309   */
310  public boolean isUriPathAbsolute() {
311    int start = startPositionWithoutWindowsDrive(uri.getPath());
312    return uri.getPath().startsWith(SEPARATOR, start);
313   }
314  
315  /** True if the path component of this URI is absolute. */
316  /**
317   * There is some ambiguity here. An absolute path is a slash
318   * relative name without a scheme or an authority.
319   * So either this method was incorrectly named or its
320   * implementation is incorrect. This method returns true
321   * even if there is a scheme and authority.
322   */
323  public boolean isAbsolute() {
324     return isUriPathAbsolute();
325  }
326
327  /**
328   * @return true if and only if this path represents the root of a file system
329   */
330  public boolean isRoot() {
331    return getParent() == null;
332  }
333
334  /** Returns the final component of this path.*/
335  public String getName() {
336    String path = uri.getPath();
337    int slash = path.lastIndexOf(SEPARATOR);
338    return path.substring(slash+1);
339  }
340
341  /** Returns the parent of a path or null if at root. */
342  public Path getParent() {
343    String path = uri.getPath();
344    int lastSlash = path.lastIndexOf('/');
345    int start = startPositionWithoutWindowsDrive(path);
346    if ((path.length() == start) ||               // empty path
347        (lastSlash == start && path.length() == start+1)) { // at root
348      return null;
349    }
350    String parent;
351    if (lastSlash==-1) {
352      parent = CUR_DIR;
353    } else {
354      parent = path.substring(0, lastSlash==start?start+1:lastSlash);
355    }
356    return new Path(uri.getScheme(), uri.getAuthority(), parent);
357  }
358
359  /** Adds a suffix to the final name in the path.*/
360  public Path suffix(String suffix) {
361    return new Path(getParent(), getName()+suffix);
362  }
363
364  @Override
365  public String toString() {
366    // we can't use uri.toString(), which escapes everything, because we want
367    // illegal characters unescaped in the string, for glob processing, etc.
368    StringBuilder buffer = new StringBuilder();
369    if (uri.getScheme() != null) {
370      buffer.append(uri.getScheme());
371      buffer.append(":");
372    }
373    if (uri.getAuthority() != null) {
374      buffer.append("//");
375      buffer.append(uri.getAuthority());
376    }
377    if (uri.getPath() != null) {
378      String path = uri.getPath();
379      if (path.indexOf('/')==0 &&
380          hasWindowsDrive(path) &&                // has windows drive
381          uri.getScheme() == null &&              // but no scheme
382          uri.getAuthority() == null)             // or authority
383        path = path.substring(1);                 // remove slash before drive
384      buffer.append(path);
385    }
386    if (uri.getFragment() != null) {
387      buffer.append("#");
388      buffer.append(uri.getFragment());
389    }
390    return buffer.toString();
391  }
392
393  @Override
394  public boolean equals(Object o) {
395    if (!(o instanceof Path)) {
396      return false;
397    }
398    Path that = (Path)o;
399    return this.uri.equals(that.uri);
400  }
401
402  @Override
403  public int hashCode() {
404    return uri.hashCode();
405  }
406
407  @Override
408  public int compareTo(Object o) {
409    Path that = (Path)o;
410    return this.uri.compareTo(that.uri);
411  }
412  
413  /** Return the number of elements in this path. */
414  public int depth() {
415    String path = uri.getPath();
416    int depth = 0;
417    int slash = path.length()==1 && path.charAt(0)=='/' ? -1 : 0;
418    while (slash != -1) {
419      depth++;
420      slash = path.indexOf(SEPARATOR, slash+1);
421    }
422    return depth;
423  }
424
425  /**
426   *  Returns a qualified path object.
427   *  
428   *  Deprecated - use {@link #makeQualified(URI, Path)}
429   */
430  @Deprecated
431  public Path makeQualified(FileSystem fs) {
432    return makeQualified(fs.getUri(), fs.getWorkingDirectory());
433  }
434  
435  /** Returns a qualified path object. */
436  @InterfaceAudience.LimitedPrivate({"HDFS", "MapReduce"})
437  public Path makeQualified(URI defaultUri, Path workingDir ) {
438    Path path = this;
439    if (!isAbsolute()) {
440      path = new Path(workingDir, this);
441    }
442
443    URI pathUri = path.toUri();
444      
445    String scheme = pathUri.getScheme();
446    String authority = pathUri.getAuthority();
447    String fragment = pathUri.getFragment();
448
449    if (scheme != null &&
450        (authority != null || defaultUri.getAuthority() == null))
451      return path;
452
453    if (scheme == null) {
454      scheme = defaultUri.getScheme();
455    }
456
457    if (authority == null) {
458      authority = defaultUri.getAuthority();
459      if (authority == null) {
460        authority = "";
461      }
462    }
463    
464    URI newUri = null;
465    try {
466      newUri = new URI(scheme, authority , 
467        normalizePath(scheme, pathUri.getPath()), null, fragment);
468    } catch (URISyntaxException e) {
469      throw new IllegalArgumentException(e);
470    }
471    return new Path(newUri);
472  }
473}