001    /**
002     * Licensed to the Apache Software Foundation (ASF) under one
003     * or more contributor license agreements.  See the NOTICE file
004     * distributed with this work for additional information
005     * regarding copyright ownership.  The ASF licenses this file
006     * to you under the Apache License, Version 2.0 (the
007     * "License"); you may not use this file except in compliance
008     * with the License.  You may obtain a copy of the License at
009     *
010     *     http://www.apache.org/licenses/LICENSE-2.0
011     *
012     * Unless required by applicable law or agreed to in writing, software
013     * distributed under the License is distributed on an "AS IS" BASIS,
014     * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
015     * See the License for the specific language governing permissions and
016     * limitations under the License.
017     */
018    
019    package org.apache.hadoop.fs;
020    
021    import java.io.IOException;
022    import java.net.URI;
023    import java.net.URISyntaxException;
024    import java.util.regex.Pattern;
025    
026    import org.apache.avro.reflect.Stringable;
027    import org.apache.commons.lang.StringUtils;
028    import org.apache.hadoop.HadoopIllegalArgumentException;
029    import org.apache.hadoop.classification.InterfaceAudience;
030    import org.apache.hadoop.classification.InterfaceStability;
031    import org.apache.hadoop.conf.Configuration;
032    
033    /** Names a file or directory in a {@link FileSystem}.
034     * Path strings use slash as the directory separator.  A path string is
035     * absolute if it begins with a slash.
036     */
037    @Stringable
038    @InterfaceAudience.Public
039    @InterfaceStability.Stable
040    public class Path implements Comparable {
041    
042      /** The directory separator, a slash. */
043      public static final String SEPARATOR = "/";
044      public static final char SEPARATOR_CHAR = '/';
045      
046      public static final String CUR_DIR = ".";
047      
048      public static final boolean WINDOWS
049        = System.getProperty("os.name").startsWith("Windows");
050    
051      /**
052       *  Pre-compiled regular expressions to detect path formats.
053       */
054      private static final Pattern hasUriScheme =
055          Pattern.compile("^[a-zA-Z][a-zA-Z0-9+-.]+:");
056      private static final Pattern hasDriveLetterSpecifier =
057          Pattern.compile("^/?[a-zA-Z]:");
058    
059      private URI uri;                                // a hierarchical uri
060    
061      /**
062       * Pathnames with scheme and relative path are illegal.
063       * @param path to be checked
064       */
065      void checkNotSchemeWithRelative() {
066        if (toUri().isAbsolute() && !isUriPathAbsolute()) {
067          throw new HadoopIllegalArgumentException(
068              "Unsupported name: has scheme but relative path-part");
069        }
070      }
071    
072      void checkNotRelative() {
073        if (!isAbsolute() && toUri().getScheme() == null) {
074          throw new HadoopIllegalArgumentException("Path is relative");
075        }
076      }
077    
078      public static Path getPathWithoutSchemeAndAuthority(Path path) {
079        // This code depends on Path.toString() to remove the leading slash before
080        // the drive specification on Windows.
081        Path newPath = path.isUriPathAbsolute() ?
082          new Path(null, null, path.toUri().getPath()) :
083          path;
084        return newPath;
085      }
086    
087      /** Resolve a child path against a parent path. */
088      public Path(String parent, String child) {
089        this(new Path(parent), new Path(child));
090      }
091    
092      /** Resolve a child path against a parent path. */
093      public Path(Path parent, String child) {
094        this(parent, new Path(child));
095      }
096    
097      /** Resolve a child path against a parent path. */
098      public Path(String parent, Path child) {
099        this(new Path(parent), child);
100      }
101    
102      /** Resolve a child path against a parent path. */
103      public Path(Path parent, Path child) {
104        // Add a slash to parent's path so resolution is compatible with URI's
105        URI parentUri = parent.uri;
106        String parentPath = parentUri.getPath();
107        if (!(parentPath.equals("/") || parentPath.isEmpty())) {
108          try {
109            parentUri = new URI(parentUri.getScheme(), parentUri.getAuthority(),
110                          parentUri.getPath()+"/", null, parentUri.getFragment());
111          } catch (URISyntaxException e) {
112            throw new IllegalArgumentException(e);
113          }
114        }
115        URI resolved = parentUri.resolve(child.uri);
116        initialize(resolved.getScheme(), resolved.getAuthority(),
117                   resolved.getPath(), resolved.getFragment());
118      }
119    
120      private void checkPathArg( String path ) throws IllegalArgumentException {
121        // disallow construction of a Path from an empty string
122        if ( path == null ) {
123          throw new IllegalArgumentException(
124              "Can not create a Path from a null string");
125        }
126        if( path.length() == 0 ) {
127           throw new IllegalArgumentException(
128               "Can not create a Path from an empty string");
129        }   
130      }
131      
132      /** Construct a path from a String.  Path strings are URIs, but with
133       * unescaped elements and some additional normalization. */
134      public Path(String pathString) throws IllegalArgumentException {
135        checkPathArg( pathString );
136        
137        // We can't use 'new URI(String)' directly, since it assumes things are
138        // escaped, which we don't require of Paths. 
139        
140        // add a slash in front of paths with Windows drive letters
141        if (hasWindowsDrive(pathString) && pathString.charAt(0) != '/') {
142          pathString = "/" + pathString;
143        }
144    
145        // parse uri components
146        String scheme = null;
147        String authority = null;
148    
149        int start = 0;
150    
151        // parse uri scheme, if any
152        int colon = pathString.indexOf(':');
153        int slash = pathString.indexOf('/');
154        if ((colon != -1) &&
155            ((slash == -1) || (colon < slash))) {     // has a scheme
156          scheme = pathString.substring(0, colon);
157          start = colon+1;
158        }
159    
160        // parse uri authority, if any
161        if (pathString.startsWith("//", start) &&
162            (pathString.length()-start > 2)) {       // has authority
163          int nextSlash = pathString.indexOf('/', start+2);
164          int authEnd = nextSlash > 0 ? nextSlash : pathString.length();
165          authority = pathString.substring(start+2, authEnd);
166          start = authEnd;
167        }
168    
169        // uri path is the rest of the string -- query & fragment not supported
170        String path = pathString.substring(start, pathString.length());
171    
172        initialize(scheme, authority, path, null);
173      }
174    
175      /**
176       * Construct a path from a URI
177       */
178      public Path(URI aUri) {
179        uri = aUri.normalize();
180      }
181      
182      /** Construct a Path from components. */
183      public Path(String scheme, String authority, String path) {
184        checkPathArg( path );
185    
186        // add a slash in front of paths with Windows drive letters
187        if (hasWindowsDrive(path) && path.charAt(0) != '/') {
188          path = "/" + path;
189        }
190    
191        // add "./" in front of Linux relative paths so that a path containing
192        // a colon e.q. "a:b" will not be interpreted as scheme "a".
193        if (!WINDOWS && path.charAt(0) != '/') {
194          path = "./" + path;
195        }
196    
197        initialize(scheme, authority, path, null);
198      }
199    
200      private void initialize(String scheme, String authority, String path,
201          String fragment) {
202        try {
203          this.uri = new URI(scheme, authority, normalizePath(scheme, path), null, fragment)
204            .normalize();
205        } catch (URISyntaxException e) {
206          throw new IllegalArgumentException(e);
207        }
208      }
209    
210      /**
211       * Merge 2 paths such that the second path is appended relative to the first.
212       * The returned path has the scheme and authority of the first path.  On
213       * Windows, the drive specification in the second path is discarded.
214       * 
215       * @param path1 Path first path
216       * @param path2 Path second path, to be appended relative to path1
217       * @return Path merged path
218       */
219      public static Path mergePaths(Path path1, Path path2) {
220        String path2Str = path2.toUri().getPath();
221        path2Str = path2Str.substring(startPositionWithoutWindowsDrive(path2Str));
222        // Add path components explicitly, because simply concatenating two path
223        // string is not safe, for example:
224        // "/" + "/foo" yields "//foo", which will be parsed as authority in Path
225        return new Path(path1.toUri().getScheme(), 
226            path1.toUri().getAuthority(), 
227            path1.toUri().getPath() + path2Str);
228      }
229    
230      /**
231       * Normalize a path string to use non-duplicated forward slashes as
232       * the path separator and remove any trailing path separators.
233       * @param scheme Supplies the URI scheme. Used to deduce whether we
234       *               should replace backslashes or not.
235       * @param path Supplies the scheme-specific part
236       * @return Normalized path string.
237       */
238      private static String normalizePath(String scheme, String path) {
239        // Remove double forward slashes.
240        path = StringUtils.replace(path, "//", "/");
241    
242        // Remove backslashes if this looks like a Windows path. Avoid
243        // the substitution if it looks like a non-local URI.
244        if (WINDOWS &&
245            (hasWindowsDrive(path) ||
246             (scheme == null) ||
247             (scheme.isEmpty()) ||
248             (scheme.equals("file")))) {
249          path = StringUtils.replace(path, "\\", "/");
250        }
251        
252        // trim trailing slash from non-root path (ignoring windows drive)
253        int minLength = startPositionWithoutWindowsDrive(path) + 1;
254        if (path.length() > minLength && path.endsWith(SEPARATOR)) {
255          path = path.substring(0, path.length()-1);
256        }
257        
258        return path;
259      }
260    
261      private static boolean hasWindowsDrive(String path) {
262        return (WINDOWS && hasDriveLetterSpecifier.matcher(path).find());
263      }
264    
265      private static int startPositionWithoutWindowsDrive(String path) {
266        if (hasWindowsDrive(path)) {
267          return path.charAt(0) ==  SEPARATOR_CHAR ? 3 : 2;
268        } else {
269          return 0;
270        }
271      }
272      
273      /**
274       * Determine whether a given path string represents an absolute path on
275       * Windows. e.g. "C:/a/b" is an absolute path. "C:a/b" is not.
276       *
277       * @param pathString Supplies the path string to evaluate.
278       * @param slashed true if the given path is prefixed with "/".
279       * @return true if the supplied path looks like an absolute path with a Windows
280       * drive-specifier.
281       */
282      public static boolean isWindowsAbsolutePath(final String pathString,
283                                                  final boolean slashed) {
284        int start = startPositionWithoutWindowsDrive(pathString);
285        return start > 0
286            && pathString.length() > start
287            && ((pathString.charAt(start) == SEPARATOR_CHAR) ||
288                (pathString.charAt(start) == '\\'));
289      }
290    
291      /** Convert this to a URI. */
292      public URI toUri() { return uri; }
293    
294      /** Return the FileSystem that owns this Path. */
295      public FileSystem getFileSystem(Configuration conf) throws IOException {
296        return FileSystem.get(this.toUri(), conf);
297      }
298    
299      /**
300       * Is an absolute path (ie a slash relative path part)
301       *  AND  a scheme is null AND  authority is null.
302       */
303      public boolean isAbsoluteAndSchemeAuthorityNull() {
304        return  (isUriPathAbsolute() && 
305            uri.getScheme() == null && uri.getAuthority() == null);
306      }
307      
308      /**
309       *  True if the path component (i.e. directory) of this URI is absolute.
310       */
311      public boolean isUriPathAbsolute() {
312        int start = startPositionWithoutWindowsDrive(uri.getPath());
313        return uri.getPath().startsWith(SEPARATOR, start);
314       }
315      
316      /** True if the path component of this URI is absolute. */
317      /**
318       * There is some ambiguity here. An absolute path is a slash
319       * relative name without a scheme or an authority.
320       * So either this method was incorrectly named or its
321       * implementation is incorrect. This method returns true
322       * even if there is a scheme and authority.
323       */
324      public boolean isAbsolute() {
325         return isUriPathAbsolute();
326      }
327    
328      /**
329       * @return true if and only if this path represents the root of a file system
330       */
331      public boolean isRoot() {
332        return getParent() == null;
333      }
334    
335      /** Returns the final component of this path.*/
336      public String getName() {
337        String path = uri.getPath();
338        int slash = path.lastIndexOf(SEPARATOR);
339        return path.substring(slash+1);
340      }
341    
342      /** Returns the parent of a path or null if at root. */
343      public Path getParent() {
344        String path = uri.getPath();
345        int lastSlash = path.lastIndexOf('/');
346        int start = startPositionWithoutWindowsDrive(path);
347        if ((path.length() == start) ||               // empty path
348            (lastSlash == start && path.length() == start+1)) { // at root
349          return null;
350        }
351        String parent;
352        if (lastSlash==-1) {
353          parent = CUR_DIR;
354        } else {
355          parent = path.substring(0, lastSlash==start?start+1:lastSlash);
356        }
357        return new Path(uri.getScheme(), uri.getAuthority(), parent);
358      }
359    
360      /** Adds a suffix to the final name in the path.*/
361      public Path suffix(String suffix) {
362        return new Path(getParent(), getName()+suffix);
363      }
364    
365      @Override
366      public String toString() {
367        // we can't use uri.toString(), which escapes everything, because we want
368        // illegal characters unescaped in the string, for glob processing, etc.
369        StringBuilder buffer = new StringBuilder();
370        if (uri.getScheme() != null) {
371          buffer.append(uri.getScheme());
372          buffer.append(":");
373        }
374        if (uri.getAuthority() != null) {
375          buffer.append("//");
376          buffer.append(uri.getAuthority());
377        }
378        if (uri.getPath() != null) {
379          String path = uri.getPath();
380          if (path.indexOf('/')==0 &&
381              hasWindowsDrive(path) &&                // has windows drive
382              uri.getScheme() == null &&              // but no scheme
383              uri.getAuthority() == null)             // or authority
384            path = path.substring(1);                 // remove slash before drive
385          buffer.append(path);
386        }
387        if (uri.getFragment() != null) {
388          buffer.append("#");
389          buffer.append(uri.getFragment());
390        }
391        return buffer.toString();
392      }
393    
394      @Override
395      public boolean equals(Object o) {
396        if (!(o instanceof Path)) {
397          return false;
398        }
399        Path that = (Path)o;
400        return this.uri.equals(that.uri);
401      }
402    
403      @Override
404      public int hashCode() {
405        return uri.hashCode();
406      }
407    
408      @Override
409      public int compareTo(Object o) {
410        Path that = (Path)o;
411        return this.uri.compareTo(that.uri);
412      }
413      
414      /** Return the number of elements in this path. */
415      public int depth() {
416        String path = uri.getPath();
417        int depth = 0;
418        int slash = path.length()==1 && path.charAt(0)=='/' ? -1 : 0;
419        while (slash != -1) {
420          depth++;
421          slash = path.indexOf(SEPARATOR, slash+1);
422        }
423        return depth;
424      }
425    
426      /**
427       *  Returns a qualified path object.
428       *  
429       *  Deprecated - use {@link #makeQualified(URI, Path)}
430       */
431      @Deprecated
432      public Path makeQualified(FileSystem fs) {
433        return makeQualified(fs.getUri(), fs.getWorkingDirectory());
434      }
435      
436      /** Returns a qualified path object. */
437      @InterfaceAudience.LimitedPrivate({"HDFS", "MapReduce"})
438      public Path makeQualified(URI defaultUri, Path workingDir ) {
439        Path path = this;
440        if (!isAbsolute()) {
441          path = new Path(workingDir, this);
442        }
443    
444        URI pathUri = path.toUri();
445          
446        String scheme = pathUri.getScheme();
447        String authority = pathUri.getAuthority();
448        String fragment = pathUri.getFragment();
449    
450        if (scheme != null &&
451            (authority != null || defaultUri.getAuthority() == null))
452          return path;
453    
454        if (scheme == null) {
455          scheme = defaultUri.getScheme();
456        }
457    
458        if (authority == null) {
459          authority = defaultUri.getAuthority();
460          if (authority == null) {
461            authority = "";
462          }
463        }
464        
465        URI newUri = null;
466        try {
467          newUri = new URI(scheme, authority , 
468            normalizePath(scheme, pathUri.getPath()), null, fragment);
469        } catch (URISyntaxException e) {
470          throw new IllegalArgumentException(e);
471        }
472        return new Path(newUri);
473      }
474    }