001 /** 002 * Licensed to the Apache Software Foundation (ASF) under one 003 * or more contributor license agreements. See the NOTICE file 004 * distributed with this work for additional information 005 * regarding copyright ownership. The ASF licenses this file 006 * to you under the Apache License, Version 2.0 (the 007 * "License"); you may not use this file except in compliance 008 * with the License. You may obtain a copy of the License at 009 * 010 * http://www.apache.org/licenses/LICENSE-2.0 011 * 012 * Unless required by applicable law or agreed to in writing, software 013 * distributed under the License is distributed on an "AS IS" BASIS, 014 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 015 * See the License for the specific language governing permissions and 016 * limitations under the License. 017 */ 018 019 package org.apache.hadoop.io; 020 021 import java.io.IOException; 022 import java.io.DataInput; 023 import java.io.DataOutput; 024 import java.io.InputStream; 025 import java.util.Arrays; 026 import java.security.*; 027 028 import org.apache.hadoop.classification.InterfaceAudience; 029 import org.apache.hadoop.classification.InterfaceStability; 030 031 /** A Writable for MD5 hash values. 032 */ 033 @InterfaceAudience.Public 034 @InterfaceStability.Stable 035 public class MD5Hash implements WritableComparable<MD5Hash> { 036 public static final int MD5_LEN = 16; 037 038 private static ThreadLocal<MessageDigest> DIGESTER_FACTORY = new ThreadLocal<MessageDigest>() { 039 @Override 040 protected MessageDigest initialValue() { 041 try { 042 return MessageDigest.getInstance("MD5"); 043 } catch (NoSuchAlgorithmException e) { 044 throw new RuntimeException(e); 045 } 046 } 047 }; 048 049 private byte[] digest; 050 051 /** Constructs an MD5Hash. */ 052 public MD5Hash() { 053 this.digest = new byte[MD5_LEN]; 054 } 055 056 /** Constructs an MD5Hash from a hex string. */ 057 public MD5Hash(String hex) { 058 setDigest(hex); 059 } 060 061 /** Constructs an MD5Hash with a specified value. */ 062 public MD5Hash(byte[] digest) { 063 if (digest.length != MD5_LEN) 064 throw new IllegalArgumentException("Wrong length: " + digest.length); 065 this.digest = digest; 066 } 067 068 // javadoc from Writable 069 @Override 070 public void readFields(DataInput in) throws IOException { 071 in.readFully(digest); 072 } 073 074 /** Constructs, reads and returns an instance. */ 075 public static MD5Hash read(DataInput in) throws IOException { 076 MD5Hash result = new MD5Hash(); 077 result.readFields(in); 078 return result; 079 } 080 081 // javadoc from Writable 082 @Override 083 public void write(DataOutput out) throws IOException { 084 out.write(digest); 085 } 086 087 /** Copy the contents of another instance into this instance. */ 088 public void set(MD5Hash that) { 089 System.arraycopy(that.digest, 0, this.digest, 0, MD5_LEN); 090 } 091 092 /** Returns the digest bytes. */ 093 public byte[] getDigest() { return digest; } 094 095 /** Construct a hash value for a byte array. */ 096 public static MD5Hash digest(byte[] data) { 097 return digest(data, 0, data.length); 098 } 099 100 /** 101 * Create a thread local MD5 digester 102 */ 103 public static MessageDigest getDigester() { 104 MessageDigest digester = DIGESTER_FACTORY.get(); 105 digester.reset(); 106 return digester; 107 } 108 109 /** Construct a hash value for the content from the InputStream. */ 110 public static MD5Hash digest(InputStream in) throws IOException { 111 final byte[] buffer = new byte[4*1024]; 112 113 final MessageDigest digester = getDigester(); 114 for(int n; (n = in.read(buffer)) != -1; ) { 115 digester.update(buffer, 0, n); 116 } 117 118 return new MD5Hash(digester.digest()); 119 } 120 121 /** Construct a hash value for a byte array. */ 122 public static MD5Hash digest(byte[] data, int start, int len) { 123 byte[] digest; 124 MessageDigest digester = getDigester(); 125 digester.update(data, start, len); 126 digest = digester.digest(); 127 return new MD5Hash(digest); 128 } 129 130 /** Construct a hash value for a String. */ 131 public static MD5Hash digest(String string) { 132 return digest(UTF8.getBytes(string)); 133 } 134 135 /** Construct a hash value for a String. */ 136 public static MD5Hash digest(UTF8 utf8) { 137 return digest(utf8.getBytes(), 0, utf8.getLength()); 138 } 139 140 /** Construct a half-sized version of this MD5. Fits in a long **/ 141 public long halfDigest() { 142 long value = 0; 143 for (int i = 0; i < 8; i++) 144 value |= ((digest[i] & 0xffL) << (8*(7-i))); 145 return value; 146 } 147 148 /** 149 * Return a 32-bit digest of the MD5. 150 * @return the first 4 bytes of the md5 151 */ 152 public int quarterDigest() { 153 int value = 0; 154 for (int i = 0; i < 4; i++) 155 value |= ((digest[i] & 0xff) << (8*(3-i))); 156 return value; 157 } 158 159 /** Returns true iff <code>o</code> is an MD5Hash whose digest contains the 160 * same values. */ 161 @Override 162 public boolean equals(Object o) { 163 if (!(o instanceof MD5Hash)) 164 return false; 165 MD5Hash other = (MD5Hash)o; 166 return Arrays.equals(this.digest, other.digest); 167 } 168 169 /** Returns a hash code value for this object. 170 * Only uses the first 4 bytes, since md5s are evenly distributed. 171 */ 172 @Override 173 public int hashCode() { 174 return quarterDigest(); 175 } 176 177 178 /** Compares this object with the specified object for order.*/ 179 @Override 180 public int compareTo(MD5Hash that) { 181 return WritableComparator.compareBytes(this.digest, 0, MD5_LEN, 182 that.digest, 0, MD5_LEN); 183 } 184 185 /** A WritableComparator optimized for MD5Hash keys. */ 186 public static class Comparator extends WritableComparator { 187 public Comparator() { 188 super(MD5Hash.class); 189 } 190 191 @Override 192 public int compare(byte[] b1, int s1, int l1, 193 byte[] b2, int s2, int l2) { 194 return compareBytes(b1, s1, MD5_LEN, b2, s2, MD5_LEN); 195 } 196 } 197 198 static { // register this comparator 199 WritableComparator.define(MD5Hash.class, new Comparator()); 200 } 201 202 private static final char[] HEX_DIGITS = 203 {'0','1','2','3','4','5','6','7','8','9','a','b','c','d','e','f'}; 204 205 /** Returns a string representation of this object. */ 206 @Override 207 public String toString() { 208 StringBuilder buf = new StringBuilder(MD5_LEN*2); 209 for (int i = 0; i < MD5_LEN; i++) { 210 int b = digest[i]; 211 buf.append(HEX_DIGITS[(b >> 4) & 0xf]); 212 buf.append(HEX_DIGITS[b & 0xf]); 213 } 214 return buf.toString(); 215 } 216 217 /** Sets the digest value from a hex string. */ 218 public void setDigest(String hex) { 219 if (hex.length() != MD5_LEN*2) 220 throw new IllegalArgumentException("Wrong length: " + hex.length()); 221 byte[] digest = new byte[MD5_LEN]; 222 for (int i = 0; i < MD5_LEN; i++) { 223 int j = i << 1; 224 digest[i] = (byte)(charToNibble(hex.charAt(j)) << 4 | 225 charToNibble(hex.charAt(j+1))); 226 } 227 this.digest = digest; 228 } 229 230 private static final int charToNibble(char c) { 231 if (c >= '0' && c <= '9') { 232 return c - '0'; 233 } else if (c >= 'a' && c <= 'f') { 234 return 0xa + (c - 'a'); 235 } else if (c >= 'A' && c <= 'F') { 236 return 0xA + (c - 'A'); 237 } else { 238 throw new RuntimeException("Not a hex character: " + c); 239 } 240 } 241 242 243 }