001/**
002 * Licensed to the Apache Software Foundation (ASF) under one
003 * or more contributor license agreements.  See the NOTICE file
004 * distributed with this work for additional information
005 * regarding copyright ownership.  The ASF licenses this file
006 * to you under the Apache License, Version 2.0 (the
007 * "License"); you may not use this file except in compliance
008 * with the License.  You may obtain a copy of the License at
009 *
010 *     http://www.apache.org/licenses/LICENSE-2.0
011 *
012 * Unless required by applicable law or agreed to in writing, software
013 * distributed under the License is distributed on an "AS IS" BASIS,
014 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
015 * See the License for the specific language governing permissions and
016 * limitations under the License.
017 */
018package org.apache.hadoop.hdfs.util;
019
020import java.io.BufferedReader;
021import java.io.File;
022import java.io.FileInputStream;
023import java.io.FileNotFoundException;
024import java.io.IOException;
025import java.io.InputStream;
026import java.io.InputStreamReader;
027import java.security.DigestInputStream;
028import java.security.MessageDigest;
029import java.util.regex.Matcher;
030import java.util.regex.Pattern;
031
032import org.apache.commons.logging.Log;
033import org.apache.commons.logging.LogFactory;
034import org.apache.hadoop.io.IOUtils;
035import org.apache.hadoop.io.MD5Hash;
036import org.apache.hadoop.util.StringUtils;
037
038import com.google.common.base.Charsets;
039
040/**
041 * Static functions for dealing with files of the same format
042 * that the Unix "md5sum" utility writes.
043 */
044public abstract class MD5FileUtils {
045  private static final Log LOG = LogFactory.getLog(
046      MD5FileUtils.class);
047
048  public static final String MD5_SUFFIX = ".md5";
049  private static final Pattern LINE_REGEX =
050    Pattern.compile("([0-9a-f]{32}) [ \\*](.+)");
051  
052  /**
053   * Verify that the previously saved md5 for the given file matches
054   * expectedMd5.
055   * @throws IOException 
056   */
057  public static void verifySavedMD5(File dataFile, MD5Hash expectedMD5)
058      throws IOException {
059    MD5Hash storedHash = readStoredMd5ForFile(dataFile);
060    // Check the hash itself
061    if (!expectedMD5.equals(storedHash)) {
062      throw new IOException(
063          "File " + dataFile + " did not match stored MD5 checksum " +
064          " (stored: " + storedHash + ", computed: " + expectedMD5);
065    }
066  }
067  
068  /**
069   * Read the md5 file stored alongside the given data file
070   * and match the md5 file content.
071   * @param dataFile the file containing data
072   * @return a matcher with two matched groups
073   *   where group(1) is the md5 string and group(2) is the data file path.
074   */
075  private static Matcher readStoredMd5(File md5File) throws IOException {
076    BufferedReader reader =
077        new BufferedReader(new InputStreamReader(new FileInputStream(
078            md5File), Charsets.UTF_8));
079    String md5Line;
080    try {
081      md5Line = reader.readLine();
082      if (md5Line == null) { md5Line = ""; }
083      md5Line = md5Line.trim();
084    } catch (IOException ioe) {
085      throw new IOException("Error reading md5 file at " + md5File, ioe);
086    } finally {
087      IOUtils.cleanup(LOG, reader);
088    }
089    
090    Matcher matcher = LINE_REGEX.matcher(md5Line);
091    if (!matcher.matches()) {
092      throw new IOException("Invalid MD5 file " + md5File + ": the content \""
093          + md5Line + "\" does not match the expected pattern.");
094    }
095    return matcher;
096  }
097
098  /**
099   * Read the md5 checksum stored alongside the given data file.
100   * @param dataFile the file containing data
101   * @return the checksum stored in dataFile.md5
102   */
103  public static MD5Hash readStoredMd5ForFile(File dataFile) throws IOException {
104    final File md5File = getDigestFileForFile(dataFile);
105    if (!md5File.exists()) {
106      return null;
107    }
108
109    final Matcher matcher = readStoredMd5(md5File);
110    String storedHash = matcher.group(1);
111    File referencedFile = new File(matcher.group(2));
112
113    // Sanity check: Make sure that the file referenced in the .md5 file at
114    // least has the same name as the file we expect
115    if (!referencedFile.getName().equals(dataFile.getName())) {
116      throw new IOException(
117          "MD5 file at " + md5File + " references file named " +
118          referencedFile.getName() + " but we expected it to reference " +
119          dataFile);
120    }
121    return new MD5Hash(storedHash);
122  }
123  
124  /**
125   * Read dataFile and compute its MD5 checksum.
126   */
127  public static MD5Hash computeMd5ForFile(File dataFile) throws IOException {
128    InputStream in = new FileInputStream(dataFile);
129    try {
130      MessageDigest digester = MD5Hash.getDigester();
131      DigestInputStream dis = new DigestInputStream(in, digester);
132      IOUtils.copyBytes(dis, new IOUtils.NullOutputStream(), 128*1024);
133      
134      return new MD5Hash(digester.digest());
135    } finally {
136      IOUtils.closeStream(in);
137    }
138  }
139
140  /**
141   * Save the ".md5" file that lists the md5sum of another file.
142   * @param dataFile the original file whose md5 was computed
143   * @param digest the computed digest
144   * @throws IOException
145   */
146  public static void saveMD5File(File dataFile, MD5Hash digest)
147      throws IOException {
148    final String digestString = StringUtils.byteToHexString(digest.getDigest());
149    saveMD5File(dataFile, digestString);
150  }
151
152  private static void saveMD5File(File dataFile, String digestString)
153      throws IOException {
154    File md5File = getDigestFileForFile(dataFile);
155    String md5Line = digestString + " *" + dataFile.getName() + "\n";
156
157    AtomicFileOutputStream afos = new AtomicFileOutputStream(md5File);
158    afos.write(md5Line.getBytes(Charsets.UTF_8));
159    afos.close();
160
161    if (LOG.isDebugEnabled()) {
162      LOG.debug("Saved MD5 " + digestString + " to " + md5File);
163    }
164  }
165
166  public static void renameMD5File(File oldDataFile, File newDataFile)
167      throws IOException {
168    final File fromFile = getDigestFileForFile(oldDataFile);
169    if (!fromFile.exists()) {
170      throw new FileNotFoundException(fromFile + " does not exist.");
171    }
172
173    final String digestString = readStoredMd5(fromFile).group(1);
174    saveMD5File(newDataFile, digestString);
175
176    if (!fromFile.delete()) {
177      LOG.warn("deleting  " + fromFile.getAbsolutePath() + " FAILED");
178    }
179  }
180
181  /**
182   * @return a reference to the file with .md5 suffix that will
183   * contain the md5 checksum for the given data file.
184   */
185  public static File getDigestFileForFile(File file) {
186    return new File(file.getParentFile(), file.getName() + MD5_SUFFIX);
187  }
188}