001/** 002 * Licensed to the Apache Software Foundation (ASF) under one 003 * or more contributor license agreements. See the NOTICE file 004 * distributed with this work for additional information 005 * regarding copyright ownership. The ASF licenses this file 006 * to you under the Apache License, Version 2.0 (the 007 * "License"); you may not use this file except in compliance 008 * with the License. You may obtain a copy of the License at 009 * 010 * http://www.apache.org/licenses/LICENSE-2.0 011 * 012 * Unless required by applicable law or agreed to in writing, software 013 * distributed under the License is distributed on an "AS IS" BASIS, 014 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 015 * See the License for the specific language governing permissions and 016 * limitations under the License. 017 */ 018 019package org.apache.hadoop.fs; 020 021import java.io.IOException; 022import java.net.URI; 023import java.net.URISyntaxException; 024import java.util.regex.Pattern; 025 026import org.apache.avro.reflect.Stringable; 027import org.apache.commons.lang.StringUtils; 028import org.apache.hadoop.HadoopIllegalArgumentException; 029import org.apache.hadoop.classification.InterfaceAudience; 030import org.apache.hadoop.classification.InterfaceStability; 031import org.apache.hadoop.conf.Configuration; 032 033/** Names a file or directory in a {@link FileSystem}. 034 * Path strings use slash as the directory separator. A path string is 035 * absolute if it begins with a slash. 036 */ 037@Stringable 038@InterfaceAudience.Public 039@InterfaceStability.Stable 040public class Path implements Comparable { 041 042 /** The directory separator, a slash. */ 043 public static final String SEPARATOR = "/"; 044 public static final char SEPARATOR_CHAR = '/'; 045 046 public static final String CUR_DIR = "."; 047 048 public static final boolean WINDOWS 049 = System.getProperty("os.name").startsWith("Windows"); 050 051 /** 052 * Pre-compiled regular expressions to detect path formats. 053 */ 054 private static final Pattern hasUriScheme = 055 Pattern.compile("^[a-zA-Z][a-zA-Z0-9+-.]+:"); 056 private static final Pattern hasDriveLetterSpecifier = 057 Pattern.compile("^/?[a-zA-Z]:"); 058 059 private URI uri; // a hierarchical uri 060 061 /** 062 * Pathnames with scheme and relative path are illegal. 063 * @param path to be checked 064 */ 065 void checkNotSchemeWithRelative() { 066 if (toUri().isAbsolute() && !isUriPathAbsolute()) { 067 throw new HadoopIllegalArgumentException( 068 "Unsupported name: has scheme but relative path-part"); 069 } 070 } 071 072 void checkNotRelative() { 073 if (!isAbsolute() && toUri().getScheme() == null) { 074 throw new HadoopIllegalArgumentException("Path is relative"); 075 } 076 } 077 078 public static Path getPathWithoutSchemeAndAuthority(Path path) { 079 // This code depends on Path.toString() to remove the leading slash before 080 // the drive specification on Windows. 081 Path newPath = path.isUriPathAbsolute() ? 082 new Path(null, null, path.toUri().getPath()) : 083 path; 084 return newPath; 085 } 086 087 /** Resolve a child path against a parent path. */ 088 public Path(String parent, String child) { 089 this(new Path(parent), new Path(child)); 090 } 091 092 /** Resolve a child path against a parent path. */ 093 public Path(Path parent, String child) { 094 this(parent, new Path(child)); 095 } 096 097 /** Resolve a child path against a parent path. */ 098 public Path(String parent, Path child) { 099 this(new Path(parent), child); 100 } 101 102 /** Resolve a child path against a parent path. */ 103 public Path(Path parent, Path child) { 104 // Add a slash to parent's path so resolution is compatible with URI's 105 URI parentUri = parent.uri; 106 String parentPath = parentUri.getPath(); 107 if (!(parentPath.equals("/") || parentPath.isEmpty())) { 108 try { 109 parentUri = new URI(parentUri.getScheme(), parentUri.getAuthority(), 110 parentUri.getPath()+"/", null, parentUri.getFragment()); 111 } catch (URISyntaxException e) { 112 throw new IllegalArgumentException(e); 113 } 114 } 115 URI resolved = parentUri.resolve(child.uri); 116 initialize(resolved.getScheme(), resolved.getAuthority(), 117 resolved.getPath(), resolved.getFragment()); 118 } 119 120 private void checkPathArg( String path ) throws IllegalArgumentException { 121 // disallow construction of a Path from an empty string 122 if ( path == null ) { 123 throw new IllegalArgumentException( 124 "Can not create a Path from a null string"); 125 } 126 if( path.length() == 0 ) { 127 throw new IllegalArgumentException( 128 "Can not create a Path from an empty string"); 129 } 130 } 131 132 /** Construct a path from a String. Path strings are URIs, but with 133 * unescaped elements and some additional normalization. */ 134 public Path(String pathString) throws IllegalArgumentException { 135 checkPathArg( pathString ); 136 137 // We can't use 'new URI(String)' directly, since it assumes things are 138 // escaped, which we don't require of Paths. 139 140 // add a slash in front of paths with Windows drive letters 141 if (hasWindowsDrive(pathString) && pathString.charAt(0) != '/') { 142 pathString = "/" + pathString; 143 } 144 145 // parse uri components 146 String scheme = null; 147 String authority = null; 148 149 int start = 0; 150 151 // parse uri scheme, if any 152 int colon = pathString.indexOf(':'); 153 int slash = pathString.indexOf('/'); 154 if ((colon != -1) && 155 ((slash == -1) || (colon < slash))) { // has a scheme 156 scheme = pathString.substring(0, colon); 157 start = colon+1; 158 } 159 160 // parse uri authority, if any 161 if (pathString.startsWith("//", start) && 162 (pathString.length()-start > 2)) { // has authority 163 int nextSlash = pathString.indexOf('/', start+2); 164 int authEnd = nextSlash > 0 ? nextSlash : pathString.length(); 165 authority = pathString.substring(start+2, authEnd); 166 start = authEnd; 167 } 168 169 // uri path is the rest of the string -- query & fragment not supported 170 String path = pathString.substring(start, pathString.length()); 171 172 initialize(scheme, authority, path, null); 173 } 174 175 /** 176 * Construct a path from a URI 177 */ 178 public Path(URI aUri) { 179 uri = aUri.normalize(); 180 } 181 182 /** Construct a Path from components. */ 183 public Path(String scheme, String authority, String path) { 184 checkPathArg( path ); 185 186 // add a slash in front of paths with Windows drive letters 187 if (hasWindowsDrive(path) && path.charAt(0) != '/') { 188 path = "/" + path; 189 } 190 191 // add "./" in front of Linux relative paths so that a path containing 192 // a colon e.q. "a:b" will not be interpreted as scheme "a". 193 if (!WINDOWS && path.charAt(0) != '/') { 194 path = "./" + path; 195 } 196 197 initialize(scheme, authority, path, null); 198 } 199 200 private void initialize(String scheme, String authority, String path, 201 String fragment) { 202 try { 203 this.uri = new URI(scheme, authority, normalizePath(scheme, path), null, fragment) 204 .normalize(); 205 } catch (URISyntaxException e) { 206 throw new IllegalArgumentException(e); 207 } 208 } 209 210 /** 211 * Merge 2 paths such that the second path is appended relative to the first. 212 * The returned path has the scheme and authority of the first path. On 213 * Windows, the drive specification in the second path is discarded. 214 * 215 * @param path1 Path first path 216 * @param path2 Path second path, to be appended relative to path1 217 * @return Path merged path 218 */ 219 public static Path mergePaths(Path path1, Path path2) { 220 String path2Str = path2.toUri().getPath(); 221 path2Str = path2Str.substring(startPositionWithoutWindowsDrive(path2Str)); 222 // Add path components explicitly, because simply concatenating two path 223 // string is not safe, for example: 224 // "/" + "/foo" yields "//foo", which will be parsed as authority in Path 225 return new Path(path1.toUri().getScheme(), 226 path1.toUri().getAuthority(), 227 path1.toUri().getPath() + path2Str); 228 } 229 230 /** 231 * Normalize a path string to use non-duplicated forward slashes as 232 * the path separator and remove any trailing path separators. 233 * @param scheme Supplies the URI scheme. Used to deduce whether we 234 * should replace backslashes or not. 235 * @param path Supplies the scheme-specific part 236 * @return Normalized path string. 237 */ 238 private static String normalizePath(String scheme, String path) { 239 // Remove double forward slashes. 240 path = StringUtils.replace(path, "//", "/"); 241 242 // Remove backslashes if this looks like a Windows path. Avoid 243 // the substitution if it looks like a non-local URI. 244 if (WINDOWS && 245 (hasWindowsDrive(path) || 246 (scheme == null) || 247 (scheme.isEmpty()) || 248 (scheme.equals("file")))) { 249 path = StringUtils.replace(path, "\\", "/"); 250 } 251 252 // trim trailing slash from non-root path (ignoring windows drive) 253 int minLength = startPositionWithoutWindowsDrive(path) + 1; 254 if (path.length() > minLength && path.endsWith(SEPARATOR)) { 255 path = path.substring(0, path.length()-1); 256 } 257 258 return path; 259 } 260 261 private static boolean hasWindowsDrive(String path) { 262 return (WINDOWS && hasDriveLetterSpecifier.matcher(path).find()); 263 } 264 265 private static int startPositionWithoutWindowsDrive(String path) { 266 if (hasWindowsDrive(path)) { 267 return path.charAt(0) == SEPARATOR_CHAR ? 3 : 2; 268 } else { 269 return 0; 270 } 271 } 272 273 /** 274 * Determine whether a given path string represents an absolute path on 275 * Windows. e.g. "C:/a/b" is an absolute path. "C:a/b" is not. 276 * 277 * @param pathString Supplies the path string to evaluate. 278 * @param slashed true if the given path is prefixed with "/". 279 * @return true if the supplied path looks like an absolute path with a Windows 280 * drive-specifier. 281 */ 282 public static boolean isWindowsAbsolutePath(final String pathString, 283 final boolean slashed) { 284 int start = startPositionWithoutWindowsDrive(pathString); 285 return start > 0 286 && pathString.length() > start 287 && ((pathString.charAt(start) == SEPARATOR_CHAR) || 288 (pathString.charAt(start) == '\\')); 289 } 290 291 /** Convert this to a URI. */ 292 public URI toUri() { return uri; } 293 294 /** Return the FileSystem that owns this Path. */ 295 public FileSystem getFileSystem(Configuration conf) throws IOException { 296 return FileSystem.get(this.toUri(), conf); 297 } 298 299 /** 300 * Is an absolute path (ie a slash relative path part) 301 * AND a scheme is null AND authority is null. 302 */ 303 public boolean isAbsoluteAndSchemeAuthorityNull() { 304 return (isUriPathAbsolute() && 305 uri.getScheme() == null && uri.getAuthority() == null); 306 } 307 308 /** 309 * True if the path component (i.e. directory) of this URI is absolute. 310 */ 311 public boolean isUriPathAbsolute() { 312 int start = startPositionWithoutWindowsDrive(uri.getPath()); 313 return uri.getPath().startsWith(SEPARATOR, start); 314 } 315 316 /** True if the path component of this URI is absolute. */ 317 /** 318 * There is some ambiguity here. An absolute path is a slash 319 * relative name without a scheme or an authority. 320 * So either this method was incorrectly named or its 321 * implementation is incorrect. This method returns true 322 * even if there is a scheme and authority. 323 */ 324 public boolean isAbsolute() { 325 return isUriPathAbsolute(); 326 } 327 328 /** 329 * @return true if and only if this path represents the root of a file system 330 */ 331 public boolean isRoot() { 332 return getParent() == null; 333 } 334 335 /** Returns the final component of this path.*/ 336 public String getName() { 337 String path = uri.getPath(); 338 int slash = path.lastIndexOf(SEPARATOR); 339 return path.substring(slash+1); 340 } 341 342 /** Returns the parent of a path or null if at root. */ 343 public Path getParent() { 344 String path = uri.getPath(); 345 int lastSlash = path.lastIndexOf('/'); 346 int start = startPositionWithoutWindowsDrive(path); 347 if ((path.length() == start) || // empty path 348 (lastSlash == start && path.length() == start+1)) { // at root 349 return null; 350 } 351 String parent; 352 if (lastSlash==-1) { 353 parent = CUR_DIR; 354 } else { 355 parent = path.substring(0, lastSlash==start?start+1:lastSlash); 356 } 357 return new Path(uri.getScheme(), uri.getAuthority(), parent); 358 } 359 360 /** Adds a suffix to the final name in the path.*/ 361 public Path suffix(String suffix) { 362 return new Path(getParent(), getName()+suffix); 363 } 364 365 @Override 366 public String toString() { 367 // we can't use uri.toString(), which escapes everything, because we want 368 // illegal characters unescaped in the string, for glob processing, etc. 369 StringBuilder buffer = new StringBuilder(); 370 if (uri.getScheme() != null) { 371 buffer.append(uri.getScheme()); 372 buffer.append(":"); 373 } 374 if (uri.getAuthority() != null) { 375 buffer.append("//"); 376 buffer.append(uri.getAuthority()); 377 } 378 if (uri.getPath() != null) { 379 String path = uri.getPath(); 380 if (path.indexOf('/')==0 && 381 hasWindowsDrive(path) && // has windows drive 382 uri.getScheme() == null && // but no scheme 383 uri.getAuthority() == null) // or authority 384 path = path.substring(1); // remove slash before drive 385 buffer.append(path); 386 } 387 if (uri.getFragment() != null) { 388 buffer.append("#"); 389 buffer.append(uri.getFragment()); 390 } 391 return buffer.toString(); 392 } 393 394 @Override 395 public boolean equals(Object o) { 396 if (!(o instanceof Path)) { 397 return false; 398 } 399 Path that = (Path)o; 400 return this.uri.equals(that.uri); 401 } 402 403 @Override 404 public int hashCode() { 405 return uri.hashCode(); 406 } 407 408 @Override 409 public int compareTo(Object o) { 410 Path that = (Path)o; 411 return this.uri.compareTo(that.uri); 412 } 413 414 /** Return the number of elements in this path. */ 415 public int depth() { 416 String path = uri.getPath(); 417 int depth = 0; 418 int slash = path.length()==1 && path.charAt(0)=='/' ? -1 : 0; 419 while (slash != -1) { 420 depth++; 421 slash = path.indexOf(SEPARATOR, slash+1); 422 } 423 return depth; 424 } 425 426 /** 427 * Returns a qualified path object. 428 * 429 * Deprecated - use {@link #makeQualified(URI, Path)} 430 */ 431 @Deprecated 432 public Path makeQualified(FileSystem fs) { 433 return makeQualified(fs.getUri(), fs.getWorkingDirectory()); 434 } 435 436 /** Returns a qualified path object. */ 437 @InterfaceAudience.LimitedPrivate({"HDFS", "MapReduce"}) 438 public Path makeQualified(URI defaultUri, Path workingDir ) { 439 Path path = this; 440 if (!isAbsolute()) { 441 path = new Path(workingDir, this); 442 } 443 444 URI pathUri = path.toUri(); 445 446 String scheme = pathUri.getScheme(); 447 String authority = pathUri.getAuthority(); 448 String fragment = pathUri.getFragment(); 449 450 if (scheme != null && 451 (authority != null || defaultUri.getAuthority() == null)) 452 return path; 453 454 if (scheme == null) { 455 scheme = defaultUri.getScheme(); 456 } 457 458 if (authority == null) { 459 authority = defaultUri.getAuthority(); 460 if (authority == null) { 461 authority = ""; 462 } 463 } 464 465 URI newUri = null; 466 try { 467 newUri = new URI(scheme, authority , 468 normalizePath(scheme, pathUri.getPath()), null, fragment); 469 } catch (URISyntaxException e) { 470 throw new IllegalArgumentException(e); 471 } 472 return new Path(newUri); 473 } 474}