001    /**
002     *
003     * Copyright (c) 2005, European Commission project OneLab under contract 034819 (http://www.one-lab.org)
004     * All rights reserved.
005     * Redistribution and use in source and binary forms, with or 
006     * without modification, are permitted provided that the following 
007     * conditions are met:
008     *  - Redistributions of source code must retain the above copyright 
009     *    notice, this list of conditions and the following disclaimer.
010     *  - Redistributions in binary form must reproduce the above copyright 
011     *    notice, this list of conditions and the following disclaimer in 
012     *    the documentation and/or other materials provided with the distribution.
013     *  - Neither the name of the University Catholique de Louvain - UCL
014     *    nor the names of its contributors may be used to endorse or 
015     *    promote products derived from this software without specific prior 
016     *    written permission.
017     *    
018     * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS 
019     * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT 
020     * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS 
021     * FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE 
022     * COPYRIGHT OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, 
023     * INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, 
024     * BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; 
025     * LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER 
026     * CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT 
027     * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN 
028     * ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE 
029     * POSSIBILITY OF SUCH DAMAGE.
030     */
031    
032    /**
033     * Licensed to the Apache Software Foundation (ASF) under one
034     * or more contributor license agreements.  See the NOTICE file
035     * distributed with this work for additional information
036     * regarding copyright ownership.  The ASF licenses this file
037     * to you under the Apache License, Version 2.0 (the
038     * "License"); you may not use this file except in compliance
039     * with the License.  You may obtain a copy of the License at
040     *
041     *     http://www.apache.org/licenses/LICENSE-2.0
042     *
043     * Unless required by applicable law or agreed to in writing, software
044     * distributed under the License is distributed on an "AS IS" BASIS,
045     * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
046     * See the License for the specific language governing permissions and
047     * limitations under the License.
048     */
049    
050    package org.apache.hadoop.util.bloom;
051    
052    import java.io.DataInput;
053    import java.io.DataOutput;
054    import java.io.IOException;
055    
056    import org.apache.hadoop.classification.InterfaceAudience;
057    import org.apache.hadoop.classification.InterfaceStability;
058    
059    /**
060     * Implements a <i>dynamic Bloom filter</i>, as defined in the INFOCOM 2006 paper.
061     * <p>
062     * A dynamic Bloom filter (DBF) makes use of a <code>s * m</code> bit matrix but
063     * each of the <code>s</code> rows is a standard Bloom filter. The creation 
064     * process of a DBF is iterative. At the start, the DBF is a <code>1 * m</code>
065     * bit matrix, i.e., it is composed of a single standard Bloom filter.
066     * It assumes that <code>n<sub>r</sub></code> elements are recorded in the 
067     * initial bit vector, where <code>n<sub>r</sub> <= n</code> (<code>n</code> is
068     * the cardinality of the set <code>A</code> to record in the filter).  
069     * <p>
070     * As the size of <code>A</code> grows during the execution of the application,
071     * several keys must be inserted in the DBF.  When inserting a key into the DBF,
072     * one must first get an active Bloom filter in the matrix.  A Bloom filter is
073     * active when the number of recorded keys, <code>n<sub>r</sub></code>, is 
074     * strictly less than the current cardinality of <code>A</code>, <code>n</code>.
075     * If an active Bloom filter is found, the key is inserted and 
076     * <code>n<sub>r</sub></code> is incremented by one. On the other hand, if there
077     * is no active Bloom filter, a new one is created (i.e., a new row is added to
078     * the matrix) according to the current size of <code>A</code> and the element
079     * is added in this new Bloom filter and the <code>n<sub>r</sub></code> value of
080     * this new Bloom filter is set to one.  A given key is said to belong to the
081     * DBF if the <code>k</code> positions are set to one in one of the matrix rows.
082     * <p>
083     * Originally created by
084     * <a href="http://www.one-lab.org">European Commission One-Lab Project 034819</a>.
085     *
086     * @see Filter The general behavior of a filter
087     * @see BloomFilter A Bloom filter
088     * 
089     * @see <a href="http://www.cse.fau.edu/~jie/research/publications/Publication_files/infocom2006.pdf">Theory and Network Applications of Dynamic Bloom Filters</a>
090     */
091    @InterfaceAudience.Public
092    @InterfaceStability.Stable
093    public class DynamicBloomFilter extends Filter {
094      /** 
095       * Threshold for the maximum number of key to record in a dynamic Bloom filter row.
096       */
097      private int nr;
098    
099      /**
100       * The number of keys recorded in the current standard active Bloom filter.
101       */
102      private int currentNbRecord;
103    
104      /**
105       * The matrix of Bloom filter.
106       */
107      private BloomFilter[] matrix;
108    
109      /**
110       * Zero-args constructor for the serialization.
111       */
112      public DynamicBloomFilter() { }
113    
114      /**
115       * Constructor.
116       * <p>
117       * Builds an empty Dynamic Bloom filter.
118       * @param vectorSize The number of bits in the vector.
119       * @param nbHash The number of hash function to consider.
120       * @param hashType type of the hashing function (see
121       * {@link org.apache.hadoop.util.hash.Hash}).
122       * @param nr The threshold for the maximum number of keys to record in a
123       * dynamic Bloom filter row.
124       */
125      public DynamicBloomFilter(int vectorSize, int nbHash, int hashType, int nr) {
126        super(vectorSize, nbHash, hashType);
127    
128        this.nr = nr;
129        this.currentNbRecord = 0;
130    
131        matrix = new BloomFilter[1];
132        matrix[0] = new BloomFilter(this.vectorSize, this.nbHash, this.hashType);
133      }
134    
135      @Override
136      public void add(Key key) {
137        if (key == null) {
138          throw new NullPointerException("Key can not be null");
139        }
140    
141        BloomFilter bf = getActiveStandardBF();
142    
143        if (bf == null) {
144          addRow();
145          bf = matrix[matrix.length - 1];
146          currentNbRecord = 0;
147        }
148    
149        bf.add(key);
150    
151        currentNbRecord++;
152      }
153    
154      @Override
155      public void and(Filter filter) {
156        if (filter == null
157            || !(filter instanceof DynamicBloomFilter)
158            || filter.vectorSize != this.vectorSize
159            || filter.nbHash != this.nbHash) {
160          throw new IllegalArgumentException("filters cannot be and-ed");
161        }
162    
163        DynamicBloomFilter dbf = (DynamicBloomFilter)filter;
164    
165        if (dbf.matrix.length != this.matrix.length || dbf.nr != this.nr) {
166          throw new IllegalArgumentException("filters cannot be and-ed");
167        }
168    
169        for (int i = 0; i < matrix.length; i++) {
170          matrix[i].and(dbf.matrix[i]);
171        }
172      }
173    
174      @Override
175      public boolean membershipTest(Key key) {
176        if (key == null) {
177          return true;
178        }
179    
180        for (int i = 0; i < matrix.length; i++) {
181          if (matrix[i].membershipTest(key)) {
182            return true;
183          }
184        }
185    
186        return false;
187      }
188    
189      @Override
190      public void not() {
191        for (int i = 0; i < matrix.length; i++) {
192          matrix[i].not();
193        }
194      }
195    
196      @Override
197      public void or(Filter filter) {
198        if (filter == null
199            || !(filter instanceof DynamicBloomFilter)
200            || filter.vectorSize != this.vectorSize
201            || filter.nbHash != this.nbHash) {
202          throw new IllegalArgumentException("filters cannot be or-ed");
203        }
204    
205        DynamicBloomFilter dbf = (DynamicBloomFilter)filter;
206    
207        if (dbf.matrix.length != this.matrix.length || dbf.nr != this.nr) {
208          throw new IllegalArgumentException("filters cannot be or-ed");
209        }
210        for (int i = 0; i < matrix.length; i++) {
211          matrix[i].or(dbf.matrix[i]);
212        }
213      }
214    
215      @Override
216      public void xor(Filter filter) {
217        if (filter == null
218            || !(filter instanceof DynamicBloomFilter)
219            || filter.vectorSize != this.vectorSize
220            || filter.nbHash != this.nbHash) {
221          throw new IllegalArgumentException("filters cannot be xor-ed");
222        }
223        DynamicBloomFilter dbf = (DynamicBloomFilter)filter;
224    
225        if (dbf.matrix.length != this.matrix.length || dbf.nr != this.nr) {
226          throw new IllegalArgumentException("filters cannot be xor-ed");
227        }
228    
229        for(int i = 0; i<matrix.length; i++) {
230            matrix[i].xor(dbf.matrix[i]);
231        }
232      }
233    
234      @Override
235      public String toString() {
236        StringBuilder res = new StringBuilder();
237    
238        for (int i = 0; i < matrix.length; i++) {
239          res.append(matrix[i]);
240          res.append(Character.LINE_SEPARATOR);
241        }
242        return res.toString();
243      }
244    
245      // Writable
246    
247      @Override
248      public void write(DataOutput out) throws IOException {
249        super.write(out);
250        out.writeInt(nr);
251        out.writeInt(currentNbRecord);
252        out.writeInt(matrix.length);
253        for (int i = 0; i < matrix.length; i++) {
254          matrix[i].write(out);
255        }
256      }
257    
258      @Override
259      public void readFields(DataInput in) throws IOException {
260        super.readFields(in);
261        nr = in.readInt();
262        currentNbRecord = in.readInt();
263        int len = in.readInt();
264        matrix = new BloomFilter[len];
265        for (int i = 0; i < matrix.length; i++) {
266          matrix[i] = new BloomFilter();
267          matrix[i].readFields(in);
268        }
269      }
270    
271      /**
272       * Adds a new row to <i>this</i> dynamic Bloom filter.
273       */
274      private void addRow() {
275        BloomFilter[] tmp = new BloomFilter[matrix.length + 1];
276    
277        for (int i = 0; i < matrix.length; i++) {
278          tmp[i] = matrix[i];
279        }
280    
281        tmp[tmp.length-1] = new BloomFilter(vectorSize, nbHash, hashType);
282    
283        matrix = tmp;
284      }
285    
286      /**
287       * Returns the active standard Bloom filter in <i>this</i> dynamic Bloom filter.
288       * @return BloomFilter The active standard Bloom filter.
289       *                     <code>Null</code> otherwise.
290       */
291      private BloomFilter getActiveStandardBF() {
292        if (currentNbRecord >= nr) {
293          return null;
294        }
295    
296        return matrix[matrix.length - 1];
297      }
298    }