001/** 002 * Licensed to the Apache Software Foundation (ASF) under one 003 * or more contributor license agreements. See the NOTICE file 004 * distributed with this work for additional information 005 * regarding copyright ownership. The ASF licenses this file 006 * to you under the Apache License, Version 2.0 (the 007 * "License"); you may not use this file except in compliance 008 * with the License. You may obtain a copy of the License at 009 * 010 * http://www.apache.org/licenses/LICENSE-2.0 011 * 012 * Unless required by applicable law or agreed to in writing, software 013 * distributed under the License is distributed on an "AS IS" BASIS, 014 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 015 * See the License for the specific language governing permissions and 016 * limitations under the License. 017 */ 018 019package org.apache.hadoop.fs; 020 021import java.io.IOException; 022import java.net.URI; 023import java.net.URISyntaxException; 024import java.util.regex.Pattern; 025 026import org.apache.avro.reflect.Stringable; 027import org.apache.commons.lang.StringUtils; 028import org.apache.hadoop.HadoopIllegalArgumentException; 029import org.apache.hadoop.classification.InterfaceAudience; 030import org.apache.hadoop.classification.InterfaceStability; 031import org.apache.hadoop.conf.Configuration; 032 033/** 034 * Names a file or directory in a {@link FileSystem}. 035 * Path strings use slash as the directory separator. 036 */ 037@Stringable 038@InterfaceAudience.Public 039@InterfaceStability.Stable 040public class Path implements Comparable { 041 042 /** 043 * The directory separator, a slash. 044 */ 045 public static final String SEPARATOR = "/"; 046 047 /** 048 * The directory separator, a slash, as a character. 049 */ 050 public static final char SEPARATOR_CHAR = '/'; 051 052 /** 053 * The current directory, ".". 054 */ 055 public static final String CUR_DIR = "."; 056 057 /** 058 * Whether the current host is a Windows machine. 059 */ 060 public static final boolean WINDOWS = 061 System.getProperty("os.name").startsWith("Windows"); 062 063 /** 064 * Pre-compiled regular expressions to detect path formats. 065 */ 066 private static final Pattern HAS_DRIVE_LETTER_SPECIFIER = 067 Pattern.compile("^/?[a-zA-Z]:"); 068 069 private URI uri; // a hierarchical uri 070 071 /** 072 * Test whether this Path uses a scheme and is relative. 073 * Pathnames with scheme and relative path are illegal. 074 */ 075 void checkNotSchemeWithRelative() { 076 if (toUri().isAbsolute() && !isUriPathAbsolute()) { 077 throw new HadoopIllegalArgumentException( 078 "Unsupported name: has scheme but relative path-part"); 079 } 080 } 081 082 void checkNotRelative() { 083 if (!isAbsolute() && toUri().getScheme() == null) { 084 throw new HadoopIllegalArgumentException("Path is relative"); 085 } 086 } 087 088 /** 089 * Return a version of the given Path without the scheme information. 090 * 091 * @param path the source Path 092 * @return a copy of this Path without the scheme information 093 */ 094 public static Path getPathWithoutSchemeAndAuthority(Path path) { 095 // This code depends on Path.toString() to remove the leading slash before 096 // the drive specification on Windows. 097 Path newPath = path.isUriPathAbsolute() ? 098 new Path(null, null, path.toUri().getPath()) : 099 path; 100 return newPath; 101 } 102 103 /** 104 * Create a new Path based on the child path resolved against the parent path. 105 * 106 * @param parent the parent path 107 * @param child the child path 108 */ 109 public Path(String parent, String child) { 110 this(new Path(parent), new Path(child)); 111 } 112 113 /** 114 * Create a new Path based on the child path resolved against the parent path. 115 * 116 * @param parent the parent path 117 * @param child the child path 118 */ 119 public Path(Path parent, String child) { 120 this(parent, new Path(child)); 121 } 122 123 /** 124 * Create a new Path based on the child path resolved against the parent path. 125 * 126 * @param parent the parent path 127 * @param child the child path 128 */ 129 public Path(String parent, Path child) { 130 this(new Path(parent), child); 131 } 132 133 /** 134 * Create a new Path based on the child path resolved against the parent path. 135 * 136 * @param parent the parent path 137 * @param child the child path 138 */ 139 public Path(Path parent, Path child) { 140 // Add a slash to parent's path so resolution is compatible with URI's 141 URI parentUri = parent.uri; 142 String parentPath = parentUri.getPath(); 143 if (!(parentPath.equals("/") || parentPath.isEmpty())) { 144 try { 145 parentUri = new URI(parentUri.getScheme(), parentUri.getAuthority(), 146 parentUri.getPath()+"/", null, parentUri.getFragment()); 147 } catch (URISyntaxException e) { 148 throw new IllegalArgumentException(e); 149 } 150 } 151 URI resolved = parentUri.resolve(child.uri); 152 initialize(resolved.getScheme(), resolved.getAuthority(), 153 resolved.getPath(), resolved.getFragment()); 154 } 155 156 private void checkPathArg( String path ) throws IllegalArgumentException { 157 // disallow construction of a Path from an empty string 158 if ( path == null ) { 159 throw new IllegalArgumentException( 160 "Can not create a Path from a null string"); 161 } 162 if( path.length() == 0 ) { 163 throw new IllegalArgumentException( 164 "Can not create a Path from an empty string"); 165 } 166 } 167 168 /** 169 * Construct a path from a String. Path strings are URIs, but with 170 * unescaped elements and some additional normalization. 171 * 172 * @param pathString the path string 173 */ 174 public Path(String pathString) throws IllegalArgumentException { 175 checkPathArg( pathString ); 176 177 // We can't use 'new URI(String)' directly, since it assumes things are 178 // escaped, which we don't require of Paths. 179 180 // add a slash in front of paths with Windows drive letters 181 if (hasWindowsDrive(pathString) && pathString.charAt(0) != '/') { 182 pathString = "/" + pathString; 183 } 184 185 // parse uri components 186 String scheme = null; 187 String authority = null; 188 189 int start = 0; 190 191 // parse uri scheme, if any 192 int colon = pathString.indexOf(':'); 193 int slash = pathString.indexOf('/'); 194 if ((colon != -1) && 195 ((slash == -1) || (colon < slash))) { // has a scheme 196 scheme = pathString.substring(0, colon); 197 start = colon+1; 198 } 199 200 // parse uri authority, if any 201 if (pathString.startsWith("//", start) && 202 (pathString.length()-start > 2)) { // has authority 203 int nextSlash = pathString.indexOf('/', start+2); 204 int authEnd = nextSlash > 0 ? nextSlash : pathString.length(); 205 authority = pathString.substring(start+2, authEnd); 206 start = authEnd; 207 } 208 209 // uri path is the rest of the string -- query & fragment not supported 210 String path = pathString.substring(start, pathString.length()); 211 212 initialize(scheme, authority, path, null); 213 } 214 215 /** 216 * Construct a path from a URI 217 * 218 * @param aUri the source URI 219 */ 220 public Path(URI aUri) { 221 uri = aUri.normalize(); 222 } 223 224 /** 225 * Construct a Path from components. 226 * 227 * @param scheme the scheme 228 * @param authority the authority 229 * @param path the path 230 */ 231 public Path(String scheme, String authority, String path) { 232 checkPathArg( path ); 233 234 // add a slash in front of paths with Windows drive letters 235 if (hasWindowsDrive(path) && path.charAt(0) != '/') { 236 path = "/" + path; 237 } 238 239 // add "./" in front of Linux relative paths so that a path containing 240 // a colon e.q. "a:b" will not be interpreted as scheme "a". 241 if (!WINDOWS && path.charAt(0) != '/') { 242 path = "./" + path; 243 } 244 245 initialize(scheme, authority, path, null); 246 } 247 248 private void initialize(String scheme, String authority, String path, 249 String fragment) { 250 try { 251 this.uri = new URI(scheme, authority, normalizePath(scheme, path), null, fragment) 252 .normalize(); 253 } catch (URISyntaxException e) { 254 throw new IllegalArgumentException(e); 255 } 256 } 257 258 /** 259 * Merge 2 paths such that the second path is appended relative to the first. 260 * The returned path has the scheme and authority of the first path. On 261 * Windows, the drive specification in the second path is discarded. 262 * 263 * @param path1 the first path 264 * @param path2 the second path, to be appended relative to path1 265 * @return the merged path 266 */ 267 public static Path mergePaths(Path path1, Path path2) { 268 String path2Str = path2.toUri().getPath(); 269 path2Str = path2Str.substring(startPositionWithoutWindowsDrive(path2Str)); 270 // Add path components explicitly, because simply concatenating two path 271 // string is not safe, for example: 272 // "/" + "/foo" yields "//foo", which will be parsed as authority in Path 273 return new Path(path1.toUri().getScheme(), 274 path1.toUri().getAuthority(), 275 path1.toUri().getPath() + path2Str); 276 } 277 278 /** 279 * Normalize a path string to use non-duplicated forward slashes as 280 * the path separator and remove any trailing path separators. 281 * 282 * @param scheme the URI scheme. Used to deduce whether we 283 * should replace backslashes or not 284 * @param path the scheme-specific part 285 * @return the normalized path string 286 */ 287 private static String normalizePath(String scheme, String path) { 288 // Remove double forward slashes. 289 path = StringUtils.replace(path, "//", "/"); 290 291 // Remove backslashes if this looks like a Windows path. Avoid 292 // the substitution if it looks like a non-local URI. 293 if (WINDOWS && 294 (hasWindowsDrive(path) || 295 (scheme == null) || 296 (scheme.isEmpty()) || 297 (scheme.equals("file")))) { 298 path = StringUtils.replace(path, "\\", "/"); 299 } 300 301 // trim trailing slash from non-root path (ignoring windows drive) 302 int minLength = startPositionWithoutWindowsDrive(path) + 1; 303 if (path.length() > minLength && path.endsWith(SEPARATOR)) { 304 path = path.substring(0, path.length()-1); 305 } 306 307 return path; 308 } 309 310 private static boolean hasWindowsDrive(String path) { 311 return (WINDOWS && HAS_DRIVE_LETTER_SPECIFIER.matcher(path).find()); 312 } 313 314 private static int startPositionWithoutWindowsDrive(String path) { 315 if (hasWindowsDrive(path)) { 316 return path.charAt(0) == SEPARATOR_CHAR ? 3 : 2; 317 } else { 318 return 0; 319 } 320 } 321 322 /** 323 * Determine whether a given path string represents an absolute path on 324 * Windows. e.g. "C:/a/b" is an absolute path. "C:a/b" is not. 325 * 326 * @param pathString the path string to evaluate 327 * @param slashed true if the given path is prefixed with "/" 328 * @return true if the supplied path looks like an absolute path with a Windows 329 * drive-specifier 330 */ 331 public static boolean isWindowsAbsolutePath(final String pathString, 332 final boolean slashed) { 333 int start = startPositionWithoutWindowsDrive(pathString); 334 return start > 0 335 && pathString.length() > start 336 && ((pathString.charAt(start) == SEPARATOR_CHAR) || 337 (pathString.charAt(start) == '\\')); 338 } 339 340 /** 341 * Convert this Path to a URI. 342 * 343 * @return this Path as a URI 344 */ 345 public URI toUri() { return uri; } 346 347 /** 348 * Return the FileSystem that owns this Path. 349 * 350 * @param conf the configuration to use when resolving the FileSystem 351 * @return the FileSystem that owns this Path 352 * @throws java.io.IOException thrown if there's an issue resolving the 353 * FileSystem 354 */ 355 public FileSystem getFileSystem(Configuration conf) throws IOException { 356 return FileSystem.get(this.toUri(), conf); 357 } 358 359 /** 360 * Returns true if the path component (i.e. directory) of this URI is 361 * absolute <strong>and</strong> the scheme is null, <b>and</b> the authority 362 * is null. 363 * 364 * @return whether the path is absolute and the URI has no scheme nor 365 * authority parts 366 */ 367 public boolean isAbsoluteAndSchemeAuthorityNull() { 368 return (isUriPathAbsolute() && 369 uri.getScheme() == null && uri.getAuthority() == null); 370 } 371 372 /** 373 * Returns true if the path component (i.e. directory) of this URI is 374 * absolute. 375 * 376 * @return whether this URI's path is absolute 377 */ 378 public boolean isUriPathAbsolute() { 379 int start = startPositionWithoutWindowsDrive(uri.getPath()); 380 return uri.getPath().startsWith(SEPARATOR, start); 381 } 382 383 /** 384 * Returns true if the path component (i.e. directory) of this URI is 385 * absolute. This method is a wrapper for {@link #isUriPathAbsolute()}. 386 * 387 * @return whether this URI's path is absolute 388 */ 389 public boolean isAbsolute() { 390 return isUriPathAbsolute(); 391 } 392 393 /** 394 * Returns true if and only if this path represents the root of a file system. 395 * 396 * @return true if and only if this path represents the root of a file system 397 */ 398 public boolean isRoot() { 399 return getParent() == null; 400 } 401 402 /** 403 * Returns the final component of this path. 404 * 405 * @return the final component of this path 406 */ 407 public String getName() { 408 String path = uri.getPath(); 409 int slash = path.lastIndexOf(SEPARATOR); 410 return path.substring(slash+1); 411 } 412 413 /** 414 * Returns the parent of a path or null if at root. 415 * @return the parent of a path or null if at root 416 */ 417 public Path getParent() { 418 String path = uri.getPath(); 419 int lastSlash = path.lastIndexOf('/'); 420 int start = startPositionWithoutWindowsDrive(path); 421 if ((path.length() == start) || // empty path 422 (lastSlash == start && path.length() == start+1)) { // at root 423 return null; 424 } 425 String parent; 426 if (lastSlash==-1) { 427 parent = CUR_DIR; 428 } else { 429 parent = path.substring(0, lastSlash==start?start+1:lastSlash); 430 } 431 return new Path(uri.getScheme(), uri.getAuthority(), parent); 432 } 433 434 /** 435 * Adds a suffix to the final name in the path. 436 * 437 * @param suffix the suffix to add 438 * @return a new path with the suffix added 439 */ 440 public Path suffix(String suffix) { 441 return new Path(getParent(), getName()+suffix); 442 } 443 444 @Override 445 public String toString() { 446 // we can't use uri.toString(), which escapes everything, because we want 447 // illegal characters unescaped in the string, for glob processing, etc. 448 StringBuilder buffer = new StringBuilder(); 449 if (uri.getScheme() != null) { 450 buffer.append(uri.getScheme()); 451 buffer.append(":"); 452 } 453 if (uri.getAuthority() != null) { 454 buffer.append("//"); 455 buffer.append(uri.getAuthority()); 456 } 457 if (uri.getPath() != null) { 458 String path = uri.getPath(); 459 if (path.indexOf('/')==0 && 460 hasWindowsDrive(path) && // has windows drive 461 uri.getScheme() == null && // but no scheme 462 uri.getAuthority() == null) // or authority 463 path = path.substring(1); // remove slash before drive 464 buffer.append(path); 465 } 466 if (uri.getFragment() != null) { 467 buffer.append("#"); 468 buffer.append(uri.getFragment()); 469 } 470 return buffer.toString(); 471 } 472 473 @Override 474 public boolean equals(Object o) { 475 if (!(o instanceof Path)) { 476 return false; 477 } 478 Path that = (Path)o; 479 return this.uri.equals(that.uri); 480 } 481 482 @Override 483 public int hashCode() { 484 return uri.hashCode(); 485 } 486 487 @Override 488 public int compareTo(Object o) { 489 Path that = (Path)o; 490 return this.uri.compareTo(that.uri); 491 } 492 493 /** 494 * Returns the number of elements in this path. 495 * @return the number of elements in this path 496 */ 497 public int depth() { 498 String path = uri.getPath(); 499 int depth = 0; 500 int slash = path.length()==1 && path.charAt(0)=='/' ? -1 : 0; 501 while (slash != -1) { 502 depth++; 503 slash = path.indexOf(SEPARATOR, slash+1); 504 } 505 return depth; 506 } 507 508 /** 509 * Returns a qualified path object for the {@link FileSystem}'s working 510 * directory. 511 * 512 * @param fs the target FileSystem 513 * @return a qualified path object for the FileSystem's working directory 514 * @deprecated use {@link #makeQualified(URI, Path)} 515 */ 516 @Deprecated 517 public Path makeQualified(FileSystem fs) { 518 return makeQualified(fs.getUri(), fs.getWorkingDirectory()); 519 } 520 521 /** 522 * Returns a qualified path object. 523 * 524 * @param defaultUri if this path is missing the scheme or authority 525 * components, borrow them from this URI 526 * @param workingDir if this path isn't absolute, treat it as relative to this 527 * working directory 528 * @return this path if it contains a scheme and authority and is absolute, or 529 * a new path that includes a path and authority and is fully qualified 530 */ 531 @InterfaceAudience.LimitedPrivate({"HDFS", "MapReduce"}) 532 public Path makeQualified(URI defaultUri, Path workingDir ) { 533 Path path = this; 534 if (!isAbsolute()) { 535 path = new Path(workingDir, this); 536 } 537 538 URI pathUri = path.toUri(); 539 540 String scheme = pathUri.getScheme(); 541 String authority = pathUri.getAuthority(); 542 String fragment = pathUri.getFragment(); 543 544 if (scheme != null && 545 (authority != null || defaultUri.getAuthority() == null)) 546 return path; 547 548 if (scheme == null) { 549 scheme = defaultUri.getScheme(); 550 } 551 552 if (authority == null) { 553 authority = defaultUri.getAuthority(); 554 if (authority == null) { 555 authority = ""; 556 } 557 } 558 559 URI newUri = null; 560 try { 561 newUri = new URI(scheme, authority , 562 normalizePath(scheme, pathUri.getPath()), null, fragment); 563 } catch (URISyntaxException e) { 564 throw new IllegalArgumentException(e); 565 } 566 return new Path(newUri); 567 } 568}