001/** 002 * Licensed to the Apache Software Foundation (ASF) under one 003 * or more contributor license agreements. See the NOTICE file 004 * distributed with this work for additional information 005 * regarding copyright ownership. The ASF licenses this file 006 * to you under the Apache License, Version 2.0 (the 007 * "License"); you may not use this file except in compliance 008 * with the License. You may obtain a copy of the License at 009 * 010 * http://www.apache.org/licenses/LICENSE-2.0 011 * 012 * Unless required by applicable law or agreed to in writing, software 013 * distributed under the License is distributed on an "AS IS" BASIS, 014 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 015 * See the License for the specific language governing permissions and 016 * limitations under the License. 017 */ 018package org.apache.hadoop.hdfs.util; 019 020import java.io.BufferedReader; 021import java.io.File; 022import java.io.FileInputStream; 023import java.io.FileNotFoundException; 024import java.io.IOException; 025import java.io.InputStream; 026import java.io.InputStreamReader; 027import java.security.DigestInputStream; 028import java.security.MessageDigest; 029import java.util.regex.Matcher; 030import java.util.regex.Pattern; 031 032import org.apache.commons.logging.Log; 033import org.apache.commons.logging.LogFactory; 034import org.apache.hadoop.io.IOUtils; 035import org.apache.hadoop.io.MD5Hash; 036import org.apache.hadoop.util.StringUtils; 037 038import com.google.common.base.Charsets; 039 040/** 041 * Static functions for dealing with files of the same format 042 * that the Unix "md5sum" utility writes. 043 */ 044public abstract class MD5FileUtils { 045 private static final Log LOG = LogFactory.getLog( 046 MD5FileUtils.class); 047 048 public static final String MD5_SUFFIX = ".md5"; 049 private static final Pattern LINE_REGEX = 050 Pattern.compile("([0-9a-f]{32}) [ \\*](.+)"); 051 052 /** 053 * Verify that the previously saved md5 for the given file matches 054 * expectedMd5. 055 * @throws IOException 056 */ 057 public static void verifySavedMD5(File dataFile, MD5Hash expectedMD5) 058 throws IOException { 059 MD5Hash storedHash = readStoredMd5ForFile(dataFile); 060 // Check the hash itself 061 if (!expectedMD5.equals(storedHash)) { 062 throw new IOException( 063 "File " + dataFile + " did not match stored MD5 checksum " + 064 " (stored: " + storedHash + ", computed: " + expectedMD5); 065 } 066 } 067 068 /** 069 * Read the md5 file stored alongside the given data file 070 * and match the md5 file content. 071 * @param dataFile the file containing data 072 * @return a matcher with two matched groups 073 * where group(1) is the md5 string and group(2) is the data file path. 074 */ 075 private static Matcher readStoredMd5(File md5File) throws IOException { 076 BufferedReader reader = 077 new BufferedReader(new InputStreamReader(new FileInputStream( 078 md5File), Charsets.UTF_8)); 079 String md5Line; 080 try { 081 md5Line = reader.readLine(); 082 if (md5Line == null) { md5Line = ""; } 083 md5Line = md5Line.trim(); 084 } catch (IOException ioe) { 085 throw new IOException("Error reading md5 file at " + md5File, ioe); 086 } finally { 087 IOUtils.cleanup(LOG, reader); 088 } 089 090 Matcher matcher = LINE_REGEX.matcher(md5Line); 091 if (!matcher.matches()) { 092 throw new IOException("Invalid MD5 file " + md5File + ": the content \"" 093 + md5Line + "\" does not match the expected pattern."); 094 } 095 return matcher; 096 } 097 098 /** 099 * Read the md5 checksum stored alongside the given data file. 100 * @param dataFile the file containing data 101 * @return the checksum stored in dataFile.md5 102 */ 103 public static MD5Hash readStoredMd5ForFile(File dataFile) throws IOException { 104 final File md5File = getDigestFileForFile(dataFile); 105 if (!md5File.exists()) { 106 return null; 107 } 108 109 final Matcher matcher = readStoredMd5(md5File); 110 String storedHash = matcher.group(1); 111 File referencedFile = new File(matcher.group(2)); 112 113 // Sanity check: Make sure that the file referenced in the .md5 file at 114 // least has the same name as the file we expect 115 if (!referencedFile.getName().equals(dataFile.getName())) { 116 throw new IOException( 117 "MD5 file at " + md5File + " references file named " + 118 referencedFile.getName() + " but we expected it to reference " + 119 dataFile); 120 } 121 return new MD5Hash(storedHash); 122 } 123 124 /** 125 * Read dataFile and compute its MD5 checksum. 126 */ 127 public static MD5Hash computeMd5ForFile(File dataFile) throws IOException { 128 InputStream in = new FileInputStream(dataFile); 129 try { 130 MessageDigest digester = MD5Hash.getDigester(); 131 DigestInputStream dis = new DigestInputStream(in, digester); 132 IOUtils.copyBytes(dis, new IOUtils.NullOutputStream(), 128*1024); 133 134 return new MD5Hash(digester.digest()); 135 } finally { 136 IOUtils.closeStream(in); 137 } 138 } 139 140 /** 141 * Save the ".md5" file that lists the md5sum of another file. 142 * @param dataFile the original file whose md5 was computed 143 * @param digest the computed digest 144 * @throws IOException 145 */ 146 public static void saveMD5File(File dataFile, MD5Hash digest) 147 throws IOException { 148 final String digestString = StringUtils.byteToHexString(digest.getDigest()); 149 saveMD5File(dataFile, digestString); 150 } 151 152 private static void saveMD5File(File dataFile, String digestString) 153 throws IOException { 154 File md5File = getDigestFileForFile(dataFile); 155 String md5Line = digestString + " *" + dataFile.getName() + "\n"; 156 157 AtomicFileOutputStream afos = new AtomicFileOutputStream(md5File); 158 afos.write(md5Line.getBytes(Charsets.UTF_8)); 159 afos.close(); 160 161 if (LOG.isDebugEnabled()) { 162 LOG.debug("Saved MD5 " + digestString + " to " + md5File); 163 } 164 } 165 166 public static void renameMD5File(File oldDataFile, File newDataFile) 167 throws IOException { 168 final File fromFile = getDigestFileForFile(oldDataFile); 169 if (!fromFile.exists()) { 170 throw new FileNotFoundException(fromFile + " does not exist."); 171 } 172 173 final String digestString = readStoredMd5(fromFile).group(1); 174 saveMD5File(newDataFile, digestString); 175 176 if (!fromFile.delete()) { 177 LOG.warn("deleting " + fromFile.getAbsolutePath() + " FAILED"); 178 } 179 } 180 181 /** 182 * @return a reference to the file with .md5 suffix that will 183 * contain the md5 checksum for the given data file. 184 */ 185 public static File getDigestFileForFile(File file) { 186 return new File(file.getParentFile(), file.getName() + MD5_SUFFIX); 187 } 188}