001/**
002 *
003 * Copyright (c) 2005, European Commission project OneLab under contract 034819 (http://www.one-lab.org)
004 * All rights reserved.
005 * Redistribution and use in source and binary forms, with or 
006 * without modification, are permitted provided that the following 
007 * conditions are met:
008 *  - Redistributions of source code must retain the above copyright 
009 *    notice, this list of conditions and the following disclaimer.
010 *  - Redistributions in binary form must reproduce the above copyright 
011 *    notice, this list of conditions and the following disclaimer in 
012 *    the documentation and/or other materials provided with the distribution.
013 *  - Neither the name of the University Catholique de Louvain - UCL
014 *    nor the names of its contributors may be used to endorse or 
015 *    promote products derived from this software without specific prior 
016 *    written permission.
017 *    
018 * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS 
019 * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT 
020 * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS 
021 * FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE 
022 * COPYRIGHT OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, 
023 * INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, 
024 * BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; 
025 * LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER 
026 * CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT 
027 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN 
028 * ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE 
029 * POSSIBILITY OF SUCH DAMAGE.
030 */
031
032/**
033 * Licensed to the Apache Software Foundation (ASF) under one
034 * or more contributor license agreements.  See the NOTICE file
035 * distributed with this work for additional information
036 * regarding copyright ownership.  The ASF licenses this file
037 * to you under the Apache License, Version 2.0 (the
038 * "License"); you may not use this file except in compliance
039 * with the License.  You may obtain a copy of the License at
040 *
041 *     http://www.apache.org/licenses/LICENSE-2.0
042 *
043 * Unless required by applicable law or agreed to in writing, software
044 * distributed under the License is distributed on an "AS IS" BASIS,
045 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
046 * See the License for the specific language governing permissions and
047 * limitations under the License.
048 */
049
050package org.apache.hadoop.util.bloom;
051
052import java.io.DataInput;
053import java.io.DataOutput;
054import java.io.IOException;
055
056import org.apache.hadoop.classification.InterfaceAudience;
057import org.apache.hadoop.classification.InterfaceStability;
058
059/**
060 * Implements a <i>dynamic Bloom filter</i>, as defined in the INFOCOM 2006 paper.
061 * <p>
062 * A dynamic Bloom filter (DBF) makes use of a <code>s * m</code> bit matrix but
063 * each of the <code>s</code> rows is a standard Bloom filter. The creation 
064 * process of a DBF is iterative. At the start, the DBF is a <code>1 * m</code>
065 * bit matrix, i.e., it is composed of a single standard Bloom filter.
066 * It assumes that <code>n<sub>r</sub></code> elements are recorded in the 
067 * initial bit vector, where <code>n<sub>r</sub> <= n</code> (<code>n</code> is
068 * the cardinality of the set <code>A</code> to record in the filter).  
069 * <p>
070 * As the size of <code>A</code> grows during the execution of the application,
071 * several keys must be inserted in the DBF.  When inserting a key into the DBF,
072 * one must first get an active Bloom filter in the matrix.  A Bloom filter is
073 * active when the number of recorded keys, <code>n<sub>r</sub></code>, is 
074 * strictly less than the current cardinality of <code>A</code>, <code>n</code>.
075 * If an active Bloom filter is found, the key is inserted and 
076 * <code>n<sub>r</sub></code> is incremented by one. On the other hand, if there
077 * is no active Bloom filter, a new one is created (i.e., a new row is added to
078 * the matrix) according to the current size of <code>A</code> and the element
079 * is added in this new Bloom filter and the <code>n<sub>r</sub></code> value of
080 * this new Bloom filter is set to one.  A given key is said to belong to the
081 * DBF if the <code>k</code> positions are set to one in one of the matrix rows.
082 * <p>
083 * Originally created by
084 * <a href="http://www.one-lab.org">European Commission One-Lab Project 034819</a>.
085 *
086 * @see Filter The general behavior of a filter
087 * @see BloomFilter A Bloom filter
088 * 
089 * @see <a href="http://www.cse.fau.edu/~jie/research/publications/Publication_files/infocom2006.pdf">Theory and Network Applications of Dynamic Bloom Filters</a>
090 */
091@InterfaceAudience.Public
092@InterfaceStability.Stable
093public class DynamicBloomFilter extends Filter {
094  /** 
095   * Threshold for the maximum number of key to record in a dynamic Bloom filter row.
096   */
097  private int nr;
098
099  /**
100   * The number of keys recorded in the current standard active Bloom filter.
101   */
102  private int currentNbRecord;
103
104  /**
105   * The matrix of Bloom filter.
106   */
107  private BloomFilter[] matrix;
108
109  /**
110   * Zero-args constructor for the serialization.
111   */
112  public DynamicBloomFilter() { }
113
114  /**
115   * Constructor.
116   * <p>
117   * Builds an empty Dynamic Bloom filter.
118   * @param vectorSize The number of bits in the vector.
119   * @param nbHash The number of hash function to consider.
120   * @param hashType type of the hashing function (see
121   * {@link org.apache.hadoop.util.hash.Hash}).
122   * @param nr The threshold for the maximum number of keys to record in a
123   * dynamic Bloom filter row.
124   */
125  public DynamicBloomFilter(int vectorSize, int nbHash, int hashType, int nr) {
126    super(vectorSize, nbHash, hashType);
127
128    this.nr = nr;
129    this.currentNbRecord = 0;
130
131    matrix = new BloomFilter[1];
132    matrix[0] = new BloomFilter(this.vectorSize, this.nbHash, this.hashType);
133  }
134
135  @Override
136  public void add(Key key) {
137    if (key == null) {
138      throw new NullPointerException("Key can not be null");
139    }
140
141    BloomFilter bf = getActiveStandardBF();
142
143    if (bf == null) {
144      addRow();
145      bf = matrix[matrix.length - 1];
146      currentNbRecord = 0;
147    }
148
149    bf.add(key);
150
151    currentNbRecord++;
152  }
153
154  @Override
155  public void and(Filter filter) {
156    if (filter == null
157        || !(filter instanceof DynamicBloomFilter)
158        || filter.vectorSize != this.vectorSize
159        || filter.nbHash != this.nbHash) {
160      throw new IllegalArgumentException("filters cannot be and-ed");
161    }
162
163    DynamicBloomFilter dbf = (DynamicBloomFilter)filter;
164
165    if (dbf.matrix.length != this.matrix.length || dbf.nr != this.nr) {
166      throw new IllegalArgumentException("filters cannot be and-ed");
167    }
168
169    for (int i = 0; i < matrix.length; i++) {
170      matrix[i].and(dbf.matrix[i]);
171    }
172  }
173
174  @Override
175  public boolean membershipTest(Key key) {
176    if (key == null) {
177      return true;
178    }
179
180    for (int i = 0; i < matrix.length; i++) {
181      if (matrix[i].membershipTest(key)) {
182        return true;
183      }
184    }
185
186    return false;
187  }
188
189  @Override
190  public void not() {
191    for (int i = 0; i < matrix.length; i++) {
192      matrix[i].not();
193    }
194  }
195
196  @Override
197  public void or(Filter filter) {
198    if (filter == null
199        || !(filter instanceof DynamicBloomFilter)
200        || filter.vectorSize != this.vectorSize
201        || filter.nbHash != this.nbHash) {
202      throw new IllegalArgumentException("filters cannot be or-ed");
203    }
204
205    DynamicBloomFilter dbf = (DynamicBloomFilter)filter;
206
207    if (dbf.matrix.length != this.matrix.length || dbf.nr != this.nr) {
208      throw new IllegalArgumentException("filters cannot be or-ed");
209    }
210    for (int i = 0; i < matrix.length; i++) {
211      matrix[i].or(dbf.matrix[i]);
212    }
213  }
214
215  @Override
216  public void xor(Filter filter) {
217    if (filter == null
218        || !(filter instanceof DynamicBloomFilter)
219        || filter.vectorSize != this.vectorSize
220        || filter.nbHash != this.nbHash) {
221      throw new IllegalArgumentException("filters cannot be xor-ed");
222    }
223    DynamicBloomFilter dbf = (DynamicBloomFilter)filter;
224
225    if (dbf.matrix.length != this.matrix.length || dbf.nr != this.nr) {
226      throw new IllegalArgumentException("filters cannot be xor-ed");
227    }
228
229    for(int i = 0; i<matrix.length; i++) {
230        matrix[i].xor(dbf.matrix[i]);
231    }
232  }
233
234  @Override
235  public String toString() {
236    StringBuilder res = new StringBuilder();
237
238    for (int i = 0; i < matrix.length; i++) {
239      res.append(matrix[i]);
240      res.append(Character.LINE_SEPARATOR);
241    }
242    return res.toString();
243  }
244
245  // Writable
246
247  @Override
248  public void write(DataOutput out) throws IOException {
249    super.write(out);
250    out.writeInt(nr);
251    out.writeInt(currentNbRecord);
252    out.writeInt(matrix.length);
253    for (int i = 0; i < matrix.length; i++) {
254      matrix[i].write(out);
255    }
256  }
257
258  @Override
259  public void readFields(DataInput in) throws IOException {
260    super.readFields(in);
261    nr = in.readInt();
262    currentNbRecord = in.readInt();
263    int len = in.readInt();
264    matrix = new BloomFilter[len];
265    for (int i = 0; i < matrix.length; i++) {
266      matrix[i] = new BloomFilter();
267      matrix[i].readFields(in);
268    }
269  }
270
271  /**
272   * Adds a new row to <i>this</i> dynamic Bloom filter.
273   */
274  private void addRow() {
275    BloomFilter[] tmp = new BloomFilter[matrix.length + 1];
276
277    for (int i = 0; i < matrix.length; i++) {
278      tmp[i] = matrix[i];
279    }
280
281    tmp[tmp.length-1] = new BloomFilter(vectorSize, nbHash, hashType);
282
283    matrix = tmp;
284  }
285
286  /**
287   * Returns the active standard Bloom filter in <i>this</i> dynamic Bloom filter.
288   * @return BloomFilter The active standard Bloom filter.
289   *                     <code>Null</code> otherwise.
290   */
291  private BloomFilter getActiveStandardBF() {
292    if (currentNbRecord >= nr) {
293      return null;
294    }
295
296    return matrix[matrix.length - 1];
297  }
298}