001    /**
002     *
003     * Copyright (c) 2005, European Commission project OneLab under contract 034819 
004     * (http://www.one-lab.org)
005     * 
006     * All rights reserved.
007     * Redistribution and use in source and binary forms, with or 
008     * without modification, are permitted provided that the following 
009     * conditions are met:
010     *  - Redistributions of source code must retain the above copyright 
011     *    notice, this list of conditions and the following disclaimer.
012     *  - Redistributions in binary form must reproduce the above copyright 
013     *    notice, this list of conditions and the following disclaimer in 
014     *    the documentation and/or other materials provided with the distribution.
015     *  - Neither the name of the University Catholique de Louvain - UCL
016     *    nor the names of its contributors may be used to endorse or 
017     *    promote products derived from this software without specific prior 
018     *    written permission.
019     *    
020     * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS 
021     * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT 
022     * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS 
023     * FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE 
024     * COPYRIGHT OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, 
025     * INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, 
026     * BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; 
027     * LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER 
028     * CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT 
029     * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN 
030     * ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE 
031     * POSSIBILITY OF SUCH DAMAGE.
032     */
033    
034    /**
035     * Licensed to the Apache Software Foundation (ASF) under one
036     * or more contributor license agreements.  See the NOTICE file
037     * distributed with this work for additional information
038     * regarding copyright ownership.  The ASF licenses this file
039     * to you under the Apache License, Version 2.0 (the
040     * "License"); you may not use this file except in compliance
041     * with the License.  You may obtain a copy of the License at
042     *
043     *     http://www.apache.org/licenses/LICENSE-2.0
044     *
045     * Unless required by applicable law or agreed to in writing, software
046     * distributed under the License is distributed on an "AS IS" BASIS,
047     * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
048     * See the License for the specific language governing permissions and
049     * limitations under the License.
050     */
051    package org.apache.hadoop.util.bloom;
052    
053    import org.apache.hadoop.classification.InterfaceAudience;
054    import org.apache.hadoop.classification.InterfaceStability;
055    import org.apache.hadoop.util.hash.Hash;
056    
057    /**
058     * Implements a hash object that returns a certain number of hashed values.
059     * 
060     * @see Key The general behavior of a key being stored in a filter
061     * @see Filter The general behavior of a filter
062     */
063    @InterfaceAudience.Public
064    @InterfaceStability.Stable
065    public final class HashFunction {
066      /** The number of hashed values. */
067      private int nbHash;
068    
069      /** The maximum highest returned value. */
070      private int maxValue;
071    
072      /** Hashing algorithm to use. */
073      private Hash hashFunction;
074      
075      /**
076       * Constructor.
077       * <p>
078       * Builds a hash function that must obey to a given maximum number of returned values and a highest value.
079       * @param maxValue The maximum highest returned value.
080       * @param nbHash The number of resulting hashed values.
081       * @param hashType type of the hashing function (see {@link Hash}).
082       */
083      public HashFunction(int maxValue, int nbHash, int hashType) {
084        if (maxValue <= 0) {
085          throw new IllegalArgumentException("maxValue must be > 0");
086        }
087        
088        if (nbHash <= 0) {
089          throw new IllegalArgumentException("nbHash must be > 0");
090        }
091    
092        this.maxValue = maxValue;
093        this.nbHash = nbHash;
094        this.hashFunction = Hash.getInstance(hashType);
095        if (this.hashFunction == null)
096          throw new IllegalArgumentException("hashType must be known");
097      }
098    
099      /** Clears <i>this</i> hash function. A NOOP */
100      public void clear() {
101      }
102    
103      /**
104       * Hashes a specified key into several integers.
105       * @param k The specified key.
106       * @return The array of hashed values.
107       */
108      public int[] hash(Key k){
109          byte[] b = k.getBytes();
110          if (b == null) {
111            throw new NullPointerException("buffer reference is null");
112          }
113          if (b.length == 0) {
114            throw new IllegalArgumentException("key length must be > 0");
115          }
116          int[] result = new int[nbHash];
117          for (int i = 0, initval = 0; i < nbHash; i++) {
118              initval = hashFunction.hash(b, initval);
119              result[i] = Math.abs(initval % maxValue);
120          }
121          return result;
122      }
123    }