001/** 002 * Licensed to the Apache Software Foundation (ASF) under one 003 * or more contributor license agreements. See the NOTICE file 004 * distributed with this work for additional information 005 * regarding copyright ownership. The ASF licenses this file 006 * to you under the Apache License, Version 2.0 (the 007 * "License"); you may not use this file except in compliance 008 * with the License. You may obtain a copy of the License at 009 * 010 * http://www.apache.org/licenses/LICENSE-2.0 011 * 012 * Unless required by applicable law or agreed to in writing, software 013 * distributed under the License is distributed on an "AS IS" BASIS, 014 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 015 * See the License for the specific language governing permissions and 016 * limitations under the License. 017 */ 018package org.apache.hadoop.hdfs.server.datanode; 019 020import java.io.File; 021import java.io.FileInputStream; 022import java.io.FileOutputStream; 023import java.io.IOException; 024import java.util.HashMap; 025import java.util.Map; 026 027import org.apache.hadoop.classification.InterfaceAudience; 028import org.apache.hadoop.fs.FileUtil; 029import org.apache.hadoop.fs.HardLink; 030import org.apache.hadoop.hdfs.protocol.Block; 031import org.apache.hadoop.hdfs.server.datanode.fsdataset.FsVolumeSpi; 032import org.apache.hadoop.io.IOUtils; 033import org.apache.hadoop.util.LightWeightResizableGSet; 034 035import com.google.common.annotations.VisibleForTesting; 036 037/** 038 * This class is used by datanodes to maintain meta data of its replicas. 039 * It provides a general interface for meta information of a replica. 040 */ 041@InterfaceAudience.Private 042abstract public class ReplicaInfo extends Block 043 implements Replica, LightWeightResizableGSet.LinkedElement { 044 045 /** For implementing {@link LightWeightResizableGSet.LinkedElement} interface */ 046 private LightWeightResizableGSet.LinkedElement next; 047 048 /** volume where the replica belongs */ 049 private FsVolumeSpi volume; 050 051 /** directory where block & meta files belong */ 052 053 /** 054 * Base directory containing numerically-identified sub directories and 055 * possibly blocks. 056 */ 057 private File baseDir; 058 059 /** 060 * Whether or not this replica's parent directory includes subdirs, in which 061 * case we can generate them based on the replica's block ID 062 */ 063 private boolean hasSubdirs; 064 065 private static final Map<String, File> internedBaseDirs = new HashMap<String, File>(); 066 067 /** 068 * Constructor 069 * @param block a block 070 * @param vol volume where replica is located 071 * @param dir directory path where block and meta files are located 072 */ 073 ReplicaInfo(Block block, FsVolumeSpi vol, File dir) { 074 this(block.getBlockId(), block.getNumBytes(), 075 block.getGenerationStamp(), vol, dir); 076 } 077 078 /** 079 * Constructor 080 * @param blockId block id 081 * @param len replica length 082 * @param genStamp replica generation stamp 083 * @param vol volume where replica is located 084 * @param dir directory path where block and meta files are located 085 */ 086 ReplicaInfo(long blockId, long len, long genStamp, 087 FsVolumeSpi vol, File dir) { 088 super(blockId, len, genStamp); 089 this.volume = vol; 090 setDirInternal(dir); 091 } 092 093 /** 094 * Copy constructor. 095 * @param from where to copy from 096 */ 097 ReplicaInfo(ReplicaInfo from) { 098 this(from, from.getVolume(), from.getDir()); 099 } 100 101 /** 102 * Get the full path of this replica's data file 103 * @return the full path of this replica's data file 104 */ 105 public File getBlockFile() { 106 return new File(getDir(), getBlockName()); 107 } 108 109 /** 110 * Get the full path of this replica's meta file 111 * @return the full path of this replica's meta file 112 */ 113 public File getMetaFile() { 114 return new File(getDir(), 115 DatanodeUtil.getMetaName(getBlockName(), getGenerationStamp())); 116 } 117 118 /** 119 * Get the volume where this replica is located on disk 120 * @return the volume where this replica is located on disk 121 */ 122 public FsVolumeSpi getVolume() { 123 return volume; 124 } 125 126 /** 127 * Set the volume where this replica is located on disk 128 */ 129 void setVolume(FsVolumeSpi vol) { 130 this.volume = vol; 131 } 132 133 /** 134 * Get the storageUuid of the volume that stores this replica. 135 */ 136 @Override 137 public String getStorageUuid() { 138 return volume.getStorageID(); 139 } 140 141 /** 142 * Return the parent directory path where this replica is located 143 * @return the parent directory path where this replica is located 144 */ 145 File getDir() { 146 return hasSubdirs ? DatanodeUtil.idToBlockDir(baseDir, 147 getBlockId()) : baseDir; 148 } 149 150 /** 151 * Set the parent directory where this replica is located 152 * @param dir the parent directory where the replica is located 153 */ 154 public void setDir(File dir) { 155 setDirInternal(dir); 156 } 157 158 private void setDirInternal(File dir) { 159 if (dir == null) { 160 baseDir = null; 161 return; 162 } 163 164 ReplicaDirInfo dirInfo = parseBaseDir(dir); 165 this.hasSubdirs = dirInfo.hasSubidrs; 166 167 synchronized (internedBaseDirs) { 168 if (!internedBaseDirs.containsKey(dirInfo.baseDirPath)) { 169 // Create a new String path of this file and make a brand new File object 170 // to guarantee we drop the reference to the underlying char[] storage. 171 File baseDir = new File(dirInfo.baseDirPath); 172 internedBaseDirs.put(dirInfo.baseDirPath, baseDir); 173 } 174 this.baseDir = internedBaseDirs.get(dirInfo.baseDirPath); 175 } 176 } 177 178 @VisibleForTesting 179 public static class ReplicaDirInfo { 180 public String baseDirPath; 181 public boolean hasSubidrs; 182 183 public ReplicaDirInfo (String baseDirPath, boolean hasSubidrs) { 184 this.baseDirPath = baseDirPath; 185 this.hasSubidrs = hasSubidrs; 186 } 187 } 188 189 @VisibleForTesting 190 public static ReplicaDirInfo parseBaseDir(File dir) { 191 192 File currentDir = dir; 193 boolean hasSubdirs = false; 194 while (currentDir.getName().startsWith(DataStorage.BLOCK_SUBDIR_PREFIX)) { 195 hasSubdirs = true; 196 currentDir = currentDir.getParentFile(); 197 } 198 199 return new ReplicaDirInfo(currentDir.getAbsolutePath(), hasSubdirs); 200 } 201 202 /** 203 * Number of bytes reserved for this replica on disk. 204 */ 205 public long getBytesReserved() { 206 return 0; 207 } 208 209 /** 210 * Number of bytes originally reserved for this replica. The actual 211 * reservation is adjusted as data is written to disk. 212 * 213 * @return the number of bytes originally reserved for this replica. 214 */ 215 public long getOriginalBytesReserved() { 216 return 0; 217 } 218 219 /** 220 * Copy specified file into a temporary file. Then rename the 221 * temporary file to the original name. This will cause any 222 * hardlinks to the original file to be removed. The temporary 223 * files are created in the same directory. The temporary files will 224 * be recovered (especially on Windows) on datanode restart. 225 */ 226 private void breakHardlinks(File file, Block b) throws IOException { 227 File tmpFile = DatanodeUtil.createTmpFile(b, DatanodeUtil.getUnlinkTmpFile(file)); 228 try { 229 FileInputStream in = new FileInputStream(file); 230 try { 231 FileOutputStream out = new FileOutputStream(tmpFile); 232 try { 233 IOUtils.copyBytes(in, out, 16 * 1024); 234 } finally { 235 out.close(); 236 } 237 } finally { 238 in.close(); 239 } 240 if (file.length() != tmpFile.length()) { 241 throw new IOException("Copy of file " + file + " size " + file.length()+ 242 " into file " + tmpFile + 243 " resulted in a size of " + tmpFile.length()); 244 } 245 FileUtil.replaceFile(tmpFile, file); 246 } catch (IOException e) { 247 boolean done = tmpFile.delete(); 248 if (!done) { 249 DataNode.LOG.info("detachFile failed to delete temporary file " + 250 tmpFile); 251 } 252 throw e; 253 } 254 } 255 256 /** 257 * This function "breaks hardlinks" to the current replica file. 258 * 259 * When doing a DataNode upgrade, we create a bunch of hardlinks to each block 260 * file. This cleverly ensures that both the old and the new storage 261 * directories can contain the same block file, without using additional space 262 * for the data. 263 * 264 * However, when we want to append to the replica file, we need to "break" the 265 * hardlink to ensure that the old snapshot continues to contain the old data 266 * length. If we failed to do that, we could roll back to the previous/ 267 * directory during a downgrade, and find that the block contents were longer 268 * than they were at the time of upgrade. 269 * 270 * @return true only if data was copied. 271 * @throws IOException 272 */ 273 public boolean breakHardLinksIfNeeded() throws IOException { 274 File file = getBlockFile(); 275 if (file == null || getVolume() == null) { 276 throw new IOException("detachBlock:Block not found. " + this); 277 } 278 File meta = getMetaFile(); 279 280 int linkCount = HardLink.getLinkCount(file); 281 if (linkCount > 1) { 282 DataNode.LOG.info("Breaking hardlink for " + linkCount + "x-linked " + 283 "block " + this); 284 breakHardlinks(file, this); 285 } 286 if (HardLink.getLinkCount(meta) > 1) { 287 breakHardlinks(meta, this); 288 } 289 return true; 290 } 291 292 @Override //Object 293 public String toString() { 294 return getClass().getSimpleName() 295 + ", " + super.toString() 296 + ", " + getState() 297 + "\n getNumBytes() = " + getNumBytes() 298 + "\n getBytesOnDisk() = " + getBytesOnDisk() 299 + "\n getVisibleLength()= " + getVisibleLength() 300 + "\n getVolume() = " + getVolume() 301 + "\n getBlockFile() = " + getBlockFile(); 302 } 303 304 @Override 305 public boolean isOnTransientStorage() { 306 return volume.isTransientStorage(); 307 } 308 309 @Override 310 public LightWeightResizableGSet.LinkedElement getNext() { 311 return next; 312 } 313 314 @Override 315 public void setNext(LightWeightResizableGSet.LinkedElement next) { 316 this.next = next; 317 } 318}