001/**
002 * Licensed to the Apache Software Foundation (ASF) under one
003 * or more contributor license agreements.  See the NOTICE file
004 * distributed with this work for additional information
005 * regarding copyright ownership.  The ASF licenses this file
006 * to you under the Apache License, Version 2.0 (the
007 * "License"); you may not use this file except in compliance
008 * with the License.  You may obtain a copy of the License at
009 *
010 *     http://www.apache.org/licenses/LICENSE-2.0
011 *
012 * Unless required by applicable law or agreed to in writing, software
013 * distributed under the License is distributed on an "AS IS" BASIS,
014 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
015 * See the License for the specific language governing permissions and
016 * limitations under the License.
017 */
018package org.apache.hadoop.hdfs.server.datanode;
019
020import java.io.File;
021import java.io.FileInputStream;
022import java.io.FileOutputStream;
023import java.io.IOException;
024import java.util.HashMap;
025import java.util.Map;
026
027import org.apache.hadoop.classification.InterfaceAudience;
028import org.apache.hadoop.fs.FileUtil;
029import org.apache.hadoop.fs.HardLink;
030import org.apache.hadoop.hdfs.protocol.Block;
031import org.apache.hadoop.hdfs.server.datanode.fsdataset.FsVolumeSpi;
032import org.apache.hadoop.io.IOUtils;
033import org.apache.hadoop.util.LightWeightResizableGSet;
034
035import com.google.common.annotations.VisibleForTesting;
036
037/**
038 * This class is used by datanodes to maintain meta data of its replicas.
039 * It provides a general interface for meta information of a replica.
040 */
041@InterfaceAudience.Private
042abstract public class ReplicaInfo extends Block
043    implements Replica, LightWeightResizableGSet.LinkedElement {
044
045  /** For implementing {@link LightWeightResizableGSet.LinkedElement} interface */
046  private LightWeightResizableGSet.LinkedElement next;
047
048  /** volume where the replica belongs */
049  private FsVolumeSpi volume;
050  
051  /** directory where block & meta files belong */
052  
053  /**
054   * Base directory containing numerically-identified sub directories and
055   * possibly blocks.
056   */
057  private File baseDir;
058  
059  /**
060   * Whether or not this replica's parent directory includes subdirs, in which
061   * case we can generate them based on the replica's block ID
062   */
063  private boolean hasSubdirs;
064  
065  private static final Map<String, File> internedBaseDirs = new HashMap<String, File>();
066
067  /**
068   * Constructor
069   * @param block a block
070   * @param vol volume where replica is located
071   * @param dir directory path where block and meta files are located
072   */
073  ReplicaInfo(Block block, FsVolumeSpi vol, File dir) {
074    this(block.getBlockId(), block.getNumBytes(), 
075        block.getGenerationStamp(), vol, dir);
076  }
077  
078  /**
079   * Constructor
080   * @param blockId block id
081   * @param len replica length
082   * @param genStamp replica generation stamp
083   * @param vol volume where replica is located
084   * @param dir directory path where block and meta files are located
085   */
086  ReplicaInfo(long blockId, long len, long genStamp,
087      FsVolumeSpi vol, File dir) {
088    super(blockId, len, genStamp);
089    this.volume = vol;
090    setDirInternal(dir);
091  }
092
093  /**
094   * Copy constructor.
095   * @param from where to copy from
096   */
097  ReplicaInfo(ReplicaInfo from) {
098    this(from, from.getVolume(), from.getDir());
099  }
100  
101  /**
102   * Get the full path of this replica's data file
103   * @return the full path of this replica's data file
104   */
105  public File getBlockFile() {
106    return new File(getDir(), getBlockName());
107  }
108  
109  /**
110   * Get the full path of this replica's meta file
111   * @return the full path of this replica's meta file
112   */
113  public File getMetaFile() {
114    return new File(getDir(),
115        DatanodeUtil.getMetaName(getBlockName(), getGenerationStamp()));
116  }
117  
118  /**
119   * Get the volume where this replica is located on disk
120   * @return the volume where this replica is located on disk
121   */
122  public FsVolumeSpi getVolume() {
123    return volume;
124  }
125  
126  /**
127   * Set the volume where this replica is located on disk
128   */
129  void setVolume(FsVolumeSpi vol) {
130    this.volume = vol;
131  }
132
133  /**
134   * Get the storageUuid of the volume that stores this replica.
135   */
136  @Override
137  public String getStorageUuid() {
138    return volume.getStorageID();
139  }
140  
141  /**
142   * Return the parent directory path where this replica is located
143   * @return the parent directory path where this replica is located
144   */
145  File getDir() {
146    return hasSubdirs ? DatanodeUtil.idToBlockDir(baseDir,
147        getBlockId()) : baseDir;
148  }
149
150  /**
151   * Set the parent directory where this replica is located
152   * @param dir the parent directory where the replica is located
153   */
154  public void setDir(File dir) {
155    setDirInternal(dir);
156  }
157
158  private void setDirInternal(File dir) {
159    if (dir == null) {
160      baseDir = null;
161      return;
162    }
163
164    ReplicaDirInfo dirInfo = parseBaseDir(dir);
165    this.hasSubdirs = dirInfo.hasSubidrs;
166    
167    synchronized (internedBaseDirs) {
168      if (!internedBaseDirs.containsKey(dirInfo.baseDirPath)) {
169        // Create a new String path of this file and make a brand new File object
170        // to guarantee we drop the reference to the underlying char[] storage.
171        File baseDir = new File(dirInfo.baseDirPath);
172        internedBaseDirs.put(dirInfo.baseDirPath, baseDir);
173      }
174      this.baseDir = internedBaseDirs.get(dirInfo.baseDirPath);
175    }
176  }
177
178  @VisibleForTesting
179  public static class ReplicaDirInfo {
180    public String baseDirPath;
181    public boolean hasSubidrs;
182
183    public ReplicaDirInfo (String baseDirPath, boolean hasSubidrs) {
184      this.baseDirPath = baseDirPath;
185      this.hasSubidrs = hasSubidrs;
186    }
187  }
188  
189  @VisibleForTesting
190  public static ReplicaDirInfo parseBaseDir(File dir) {
191    
192    File currentDir = dir;
193    boolean hasSubdirs = false;
194    while (currentDir.getName().startsWith(DataStorage.BLOCK_SUBDIR_PREFIX)) {
195      hasSubdirs = true;
196      currentDir = currentDir.getParentFile();
197    }
198    
199    return new ReplicaDirInfo(currentDir.getAbsolutePath(), hasSubdirs);
200  }
201
202  /**
203   * Number of bytes reserved for this replica on disk.
204   */
205  public long getBytesReserved() {
206    return 0;
207  }
208
209  /**
210   * Number of bytes originally reserved for this replica. The actual
211   * reservation is adjusted as data is written to disk.
212   *
213   * @return the number of bytes originally reserved for this replica.
214   */
215  public long getOriginalBytesReserved() {
216    return 0;
217  }
218
219  /**
220   * Copy specified file into a temporary file. Then rename the
221   * temporary file to the original name. This will cause any
222   * hardlinks to the original file to be removed. The temporary
223   * files are created in the same directory. The temporary files will
224   * be recovered (especially on Windows) on datanode restart.
225   */
226  private void breakHardlinks(File file, Block b) throws IOException {
227    File tmpFile = DatanodeUtil.createTmpFile(b, DatanodeUtil.getUnlinkTmpFile(file));
228    try {
229      FileInputStream in = new FileInputStream(file);
230      try {
231        FileOutputStream out = new FileOutputStream(tmpFile);
232        try {
233          IOUtils.copyBytes(in, out, 16 * 1024);
234        } finally {
235          out.close();
236        }
237      } finally {
238        in.close();
239      }
240      if (file.length() != tmpFile.length()) {
241        throw new IOException("Copy of file " + file + " size " + file.length()+
242                              " into file " + tmpFile +
243                              " resulted in a size of " + tmpFile.length());
244      }
245      FileUtil.replaceFile(tmpFile, file);
246    } catch (IOException e) {
247      boolean done = tmpFile.delete();
248      if (!done) {
249        DataNode.LOG.info("detachFile failed to delete temporary file " +
250                          tmpFile);
251      }
252      throw e;
253    }
254  }
255
256  /**
257   * This function "breaks hardlinks" to the current replica file.
258   *
259   * When doing a DataNode upgrade, we create a bunch of hardlinks to each block
260   * file.  This cleverly ensures that both the old and the new storage
261   * directories can contain the same block file, without using additional space
262   * for the data.
263   *
264   * However, when we want to append to the replica file, we need to "break" the
265   * hardlink to ensure that the old snapshot continues to contain the old data
266   * length.  If we failed to do that, we could roll back to the previous/
267   * directory during a downgrade, and find that the block contents were longer
268   * than they were at the time of upgrade.
269   *
270   * @return true only if data was copied.
271   * @throws IOException
272   */
273  public boolean breakHardLinksIfNeeded() throws IOException {
274    File file = getBlockFile();
275    if (file == null || getVolume() == null) {
276      throw new IOException("detachBlock:Block not found. " + this);
277    }
278    File meta = getMetaFile();
279
280    int linkCount = HardLink.getLinkCount(file);
281    if (linkCount > 1) {
282      DataNode.LOG.info("Breaking hardlink for " + linkCount + "x-linked " +
283          "block " + this);
284      breakHardlinks(file, this);
285    }
286    if (HardLink.getLinkCount(meta) > 1) {
287      breakHardlinks(meta, this);
288    }
289    return true;
290  }
291
292  @Override  //Object
293  public String toString() {
294    return getClass().getSimpleName()
295        + ", " + super.toString()
296        + ", " + getState()
297        + "\n  getNumBytes()     = " + getNumBytes()
298        + "\n  getBytesOnDisk()  = " + getBytesOnDisk()
299        + "\n  getVisibleLength()= " + getVisibleLength()
300        + "\n  getVolume()       = " + getVolume()
301        + "\n  getBlockFile()    = " + getBlockFile();
302  }
303
304  @Override
305  public boolean isOnTransientStorage() {
306    return volume.isTransientStorage();
307  }
308
309  @Override
310  public LightWeightResizableGSet.LinkedElement getNext() {
311    return next;
312  }
313
314  @Override
315  public void setNext(LightWeightResizableGSet.LinkedElement next) {
316    this.next = next;
317  }
318}