001    /**
002     * Licensed to the Apache Software Foundation (ASF) under one
003     * or more contributor license agreements.  See the NOTICE file
004     * distributed with this work for additional information
005     * regarding copyright ownership.  The ASF licenses this file
006     * to you under the Apache License, Version 2.0 (the
007     * "License"); you may not use this file except in compliance
008     * with the License.  You may obtain a copy of the License at
009     *
010     *     http://www.apache.org/licenses/LICENSE-2.0
011     *
012     * Unless required by applicable law or agreed to in writing, software
013     * distributed under the License is distributed on an "AS IS" BASIS,
014     * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
015     * See the License for the specific language governing permissions and
016     * limitations under the License.
017     */
018    
019    package org.apache.hadoop.io;
020    
021    import java.io.IOException;
022    import java.io.DataInput;
023    import java.io.DataOutput;
024    import java.io.InputStream;
025    import java.util.Arrays;
026    import java.security.*;
027    
028    import org.apache.hadoop.classification.InterfaceAudience;
029    import org.apache.hadoop.classification.InterfaceStability;
030    
031    /** A Writable for MD5 hash values.
032     */
033    @InterfaceAudience.Public
034    @InterfaceStability.Stable
035    public class MD5Hash implements WritableComparable<MD5Hash> {
036      public static final int MD5_LEN = 16;
037    
038      private static ThreadLocal<MessageDigest> DIGESTER_FACTORY = new ThreadLocal<MessageDigest>() {
039        @Override
040        protected MessageDigest initialValue() {
041          try {
042            return MessageDigest.getInstance("MD5");
043          } catch (NoSuchAlgorithmException e) {
044            throw new RuntimeException(e);
045          }
046        }
047      };
048    
049      private byte[] digest;
050    
051      /** Constructs an MD5Hash. */
052      public MD5Hash() {
053        this.digest = new byte[MD5_LEN];
054      }
055    
056      /** Constructs an MD5Hash from a hex string. */
057      public MD5Hash(String hex) {
058        setDigest(hex);
059      }
060      
061      /** Constructs an MD5Hash with a specified value. */
062      public MD5Hash(byte[] digest) {
063        if (digest.length != MD5_LEN)
064          throw new IllegalArgumentException("Wrong length: " + digest.length);
065        this.digest = digest;
066      }
067      
068      // javadoc from Writable
069      @Override
070      public void readFields(DataInput in) throws IOException {
071        in.readFully(digest);
072      }
073    
074      /** Constructs, reads and returns an instance. */
075      public static MD5Hash read(DataInput in) throws IOException {
076        MD5Hash result = new MD5Hash();
077        result.readFields(in);
078        return result;
079      }
080    
081      // javadoc from Writable
082      @Override
083      public void write(DataOutput out) throws IOException {
084        out.write(digest);
085      }
086    
087      /** Copy the contents of another instance into this instance. */
088      public void set(MD5Hash that) {
089        System.arraycopy(that.digest, 0, this.digest, 0, MD5_LEN);
090      }
091    
092      /** Returns the digest bytes. */
093      public byte[] getDigest() { return digest; }
094    
095      /** Construct a hash value for a byte array. */
096      public static MD5Hash digest(byte[] data) {
097        return digest(data, 0, data.length);
098      }
099    
100      /**
101       * Create a thread local MD5 digester
102       */
103      public static MessageDigest getDigester() {
104        MessageDigest digester = DIGESTER_FACTORY.get();
105        digester.reset();
106        return digester;
107      }
108    
109      /** Construct a hash value for the content from the InputStream. */
110      public static MD5Hash digest(InputStream in) throws IOException {
111        final byte[] buffer = new byte[4*1024]; 
112    
113        final MessageDigest digester = getDigester();
114        for(int n; (n = in.read(buffer)) != -1; ) {
115          digester.update(buffer, 0, n);
116        }
117    
118        return new MD5Hash(digester.digest());
119      }
120    
121      /** Construct a hash value for a byte array. */
122      public static MD5Hash digest(byte[] data, int start, int len) {
123        byte[] digest;
124        MessageDigest digester = getDigester();
125        digester.update(data, start, len);
126        digest = digester.digest();
127        return new MD5Hash(digest);
128      }
129    
130      /** Construct a hash value for a String. */
131      public static MD5Hash digest(String string) {
132        return digest(UTF8.getBytes(string));
133      }
134    
135      /** Construct a hash value for a String. */
136      public static MD5Hash digest(UTF8 utf8) {
137        return digest(utf8.getBytes(), 0, utf8.getLength());
138      }
139    
140      /** Construct a half-sized version of this MD5.  Fits in a long **/
141      public long halfDigest() {
142        long value = 0;
143        for (int i = 0; i < 8; i++)
144          value |= ((digest[i] & 0xffL) << (8*(7-i)));
145        return value;
146      }
147    
148      /**
149       * Return a 32-bit digest of the MD5.
150       * @return the first 4 bytes of the md5
151       */
152      public int quarterDigest() {
153        int value = 0;
154        for (int i = 0; i < 4; i++)
155          value |= ((digest[i] & 0xff) << (8*(3-i)));
156        return value;    
157      }
158    
159      /** Returns true iff <code>o</code> is an MD5Hash whose digest contains the
160       * same values.  */
161      @Override
162      public boolean equals(Object o) {
163        if (!(o instanceof MD5Hash))
164          return false;
165        MD5Hash other = (MD5Hash)o;
166        return Arrays.equals(this.digest, other.digest);
167      }
168    
169      /** Returns a hash code value for this object.
170       * Only uses the first 4 bytes, since md5s are evenly distributed.
171       */
172      @Override
173      public int hashCode() {
174        return quarterDigest();
175      }
176    
177    
178      /** Compares this object with the specified object for order.*/
179      @Override
180      public int compareTo(MD5Hash that) {
181        return WritableComparator.compareBytes(this.digest, 0, MD5_LEN,
182                                               that.digest, 0, MD5_LEN);
183      }
184    
185      /** A WritableComparator optimized for MD5Hash keys. */
186      public static class Comparator extends WritableComparator {
187        public Comparator() {
188          super(MD5Hash.class);
189        }
190    
191        @Override
192        public int compare(byte[] b1, int s1, int l1,
193                           byte[] b2, int s2, int l2) {
194          return compareBytes(b1, s1, MD5_LEN, b2, s2, MD5_LEN);
195        }
196      }
197    
198      static {                                        // register this comparator
199        WritableComparator.define(MD5Hash.class, new Comparator());
200      }
201    
202      private static final char[] HEX_DIGITS =
203      {'0','1','2','3','4','5','6','7','8','9','a','b','c','d','e','f'};
204    
205      /** Returns a string representation of this object. */
206      @Override
207      public String toString() {
208        StringBuilder buf = new StringBuilder(MD5_LEN*2);
209        for (int i = 0; i < MD5_LEN; i++) {
210          int b = digest[i];
211          buf.append(HEX_DIGITS[(b >> 4) & 0xf]);
212          buf.append(HEX_DIGITS[b & 0xf]);
213        }
214        return buf.toString();
215      }
216    
217      /** Sets the digest value from a hex string. */
218      public void setDigest(String hex) {
219        if (hex.length() != MD5_LEN*2)
220          throw new IllegalArgumentException("Wrong length: " + hex.length());
221        byte[] digest = new byte[MD5_LEN];
222        for (int i = 0; i < MD5_LEN; i++) {
223          int j = i << 1;
224          digest[i] = (byte)(charToNibble(hex.charAt(j)) << 4 |
225                             charToNibble(hex.charAt(j+1)));
226        }
227        this.digest = digest;
228      }
229    
230      private static final int charToNibble(char c) {
231        if (c >= '0' && c <= '9') {
232          return c - '0';
233        } else if (c >= 'a' && c <= 'f') {
234          return 0xa + (c - 'a');
235        } else if (c >= 'A' && c <= 'F') {
236          return 0xA + (c - 'A');
237        } else {
238          throw new RuntimeException("Not a hex character: " + c);
239        }
240      }
241    
242    
243    }