001/** 002 * 003 * Copyright (c) 2005, European Commission project OneLab under contract 034819 (http://www.one-lab.org) 004 * All rights reserved. 005 * Redistribution and use in source and binary forms, with or 006 * without modification, are permitted provided that the following 007 * conditions are met: 008 * - Redistributions of source code must retain the above copyright 009 * notice, this list of conditions and the following disclaimer. 010 * - Redistributions in binary form must reproduce the above copyright 011 * notice, this list of conditions and the following disclaimer in 012 * the documentation and/or other materials provided with the distribution. 013 * - Neither the name of the University Catholique de Louvain - UCL 014 * nor the names of its contributors may be used to endorse or 015 * promote products derived from this software without specific prior 016 * written permission. 017 * 018 * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS 019 * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT 020 * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS 021 * FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE 022 * COPYRIGHT OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, 023 * INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, 024 * BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; 025 * LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER 026 * CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT 027 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN 028 * ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE 029 * POSSIBILITY OF SUCH DAMAGE. 030 */ 031 032/** 033 * Licensed to the Apache Software Foundation (ASF) under one 034 * or more contributor license agreements. See the NOTICE file 035 * distributed with this work for additional information 036 * regarding copyright ownership. The ASF licenses this file 037 * to you under the Apache License, Version 2.0 (the 038 * "License"); you may not use this file except in compliance 039 * with the License. You may obtain a copy of the License at 040 * 041 * http://www.apache.org/licenses/LICENSE-2.0 042 * 043 * Unless required by applicable law or agreed to in writing, software 044 * distributed under the License is distributed on an "AS IS" BASIS, 045 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 046 * See the License for the specific language governing permissions and 047 * limitations under the License. 048 */ 049 050package org.apache.hadoop.util.bloom; 051 052import java.io.DataInput; 053import java.io.DataOutput; 054import java.io.IOException; 055 056import org.apache.hadoop.classification.InterfaceAudience; 057import org.apache.hadoop.classification.InterfaceStability; 058 059/** 060 * Implements a <i>dynamic Bloom filter</i>, as defined in the INFOCOM 2006 paper. 061 * <p> 062 * A dynamic Bloom filter (DBF) makes use of a <code>s * m</code> bit matrix but 063 * each of the <code>s</code> rows is a standard Bloom filter. The creation 064 * process of a DBF is iterative. At the start, the DBF is a <code>1 * m</code> 065 * bit matrix, i.e., it is composed of a single standard Bloom filter. 066 * It assumes that <code>n<sub>r</sub></code> elements are recorded in the 067 * initial bit vector, where <code>n<sub>r</sub> <= n</code> (<code>n</code> is 068 * the cardinality of the set <code>A</code> to record in the filter). 069 * <p> 070 * As the size of <code>A</code> grows during the execution of the application, 071 * several keys must be inserted in the DBF. When inserting a key into the DBF, 072 * one must first get an active Bloom filter in the matrix. A Bloom filter is 073 * active when the number of recorded keys, <code>n<sub>r</sub></code>, is 074 * strictly less than the current cardinality of <code>A</code>, <code>n</code>. 075 * If an active Bloom filter is found, the key is inserted and 076 * <code>n<sub>r</sub></code> is incremented by one. On the other hand, if there 077 * is no active Bloom filter, a new one is created (i.e., a new row is added to 078 * the matrix) according to the current size of <code>A</code> and the element 079 * is added in this new Bloom filter and the <code>n<sub>r</sub></code> value of 080 * this new Bloom filter is set to one. A given key is said to belong to the 081 * DBF if the <code>k</code> positions are set to one in one of the matrix rows. 082 * <p> 083 * Originally created by 084 * <a href="http://www.one-lab.org">European Commission One-Lab Project 034819</a>. 085 * 086 * @see Filter The general behavior of a filter 087 * @see BloomFilter A Bloom filter 088 * 089 * @see <a href="http://www.cse.fau.edu/~jie/research/publications/Publication_files/infocom2006.pdf">Theory and Network Applications of Dynamic Bloom Filters</a> 090 */ 091@InterfaceAudience.Public 092@InterfaceStability.Stable 093public class DynamicBloomFilter extends Filter { 094 /** 095 * Threshold for the maximum number of key to record in a dynamic Bloom filter row. 096 */ 097 private int nr; 098 099 /** 100 * The number of keys recorded in the current standard active Bloom filter. 101 */ 102 private int currentNbRecord; 103 104 /** 105 * The matrix of Bloom filter. 106 */ 107 private BloomFilter[] matrix; 108 109 /** 110 * Zero-args constructor for the serialization. 111 */ 112 public DynamicBloomFilter() { } 113 114 /** 115 * Constructor. 116 * <p> 117 * Builds an empty Dynamic Bloom filter. 118 * @param vectorSize The number of bits in the vector. 119 * @param nbHash The number of hash function to consider. 120 * @param hashType type of the hashing function (see 121 * {@link org.apache.hadoop.util.hash.Hash}). 122 * @param nr The threshold for the maximum number of keys to record in a 123 * dynamic Bloom filter row. 124 */ 125 public DynamicBloomFilter(int vectorSize, int nbHash, int hashType, int nr) { 126 super(vectorSize, nbHash, hashType); 127 128 this.nr = nr; 129 this.currentNbRecord = 0; 130 131 matrix = new BloomFilter[1]; 132 matrix[0] = new BloomFilter(this.vectorSize, this.nbHash, this.hashType); 133 } 134 135 @Override 136 public void add(Key key) { 137 if (key == null) { 138 throw new NullPointerException("Key can not be null"); 139 } 140 141 BloomFilter bf = getActiveStandardBF(); 142 143 if (bf == null) { 144 addRow(); 145 bf = matrix[matrix.length - 1]; 146 currentNbRecord = 0; 147 } 148 149 bf.add(key); 150 151 currentNbRecord++; 152 } 153 154 @Override 155 public void and(Filter filter) { 156 if (filter == null 157 || !(filter instanceof DynamicBloomFilter) 158 || filter.vectorSize != this.vectorSize 159 || filter.nbHash != this.nbHash) { 160 throw new IllegalArgumentException("filters cannot be and-ed"); 161 } 162 163 DynamicBloomFilter dbf = (DynamicBloomFilter)filter; 164 165 if (dbf.matrix.length != this.matrix.length || dbf.nr != this.nr) { 166 throw new IllegalArgumentException("filters cannot be and-ed"); 167 } 168 169 for (int i = 0; i < matrix.length; i++) { 170 matrix[i].and(dbf.matrix[i]); 171 } 172 } 173 174 @Override 175 public boolean membershipTest(Key key) { 176 if (key == null) { 177 return true; 178 } 179 180 for (int i = 0; i < matrix.length; i++) { 181 if (matrix[i].membershipTest(key)) { 182 return true; 183 } 184 } 185 186 return false; 187 } 188 189 @Override 190 public void not() { 191 for (int i = 0; i < matrix.length; i++) { 192 matrix[i].not(); 193 } 194 } 195 196 @Override 197 public void or(Filter filter) { 198 if (filter == null 199 || !(filter instanceof DynamicBloomFilter) 200 || filter.vectorSize != this.vectorSize 201 || filter.nbHash != this.nbHash) { 202 throw new IllegalArgumentException("filters cannot be or-ed"); 203 } 204 205 DynamicBloomFilter dbf = (DynamicBloomFilter)filter; 206 207 if (dbf.matrix.length != this.matrix.length || dbf.nr != this.nr) { 208 throw new IllegalArgumentException("filters cannot be or-ed"); 209 } 210 for (int i = 0; i < matrix.length; i++) { 211 matrix[i].or(dbf.matrix[i]); 212 } 213 } 214 215 @Override 216 public void xor(Filter filter) { 217 if (filter == null 218 || !(filter instanceof DynamicBloomFilter) 219 || filter.vectorSize != this.vectorSize 220 || filter.nbHash != this.nbHash) { 221 throw new IllegalArgumentException("filters cannot be xor-ed"); 222 } 223 DynamicBloomFilter dbf = (DynamicBloomFilter)filter; 224 225 if (dbf.matrix.length != this.matrix.length || dbf.nr != this.nr) { 226 throw new IllegalArgumentException("filters cannot be xor-ed"); 227 } 228 229 for(int i = 0; i<matrix.length; i++) { 230 matrix[i].xor(dbf.matrix[i]); 231 } 232 } 233 234 @Override 235 public String toString() { 236 StringBuilder res = new StringBuilder(); 237 238 for (int i = 0; i < matrix.length; i++) { 239 res.append(matrix[i]); 240 res.append(Character.LINE_SEPARATOR); 241 } 242 return res.toString(); 243 } 244 245 // Writable 246 247 @Override 248 public void write(DataOutput out) throws IOException { 249 super.write(out); 250 out.writeInt(nr); 251 out.writeInt(currentNbRecord); 252 out.writeInt(matrix.length); 253 for (int i = 0; i < matrix.length; i++) { 254 matrix[i].write(out); 255 } 256 } 257 258 @Override 259 public void readFields(DataInput in) throws IOException { 260 super.readFields(in); 261 nr = in.readInt(); 262 currentNbRecord = in.readInt(); 263 int len = in.readInt(); 264 matrix = new BloomFilter[len]; 265 for (int i = 0; i < matrix.length; i++) { 266 matrix[i] = new BloomFilter(); 267 matrix[i].readFields(in); 268 } 269 } 270 271 /** 272 * Adds a new row to <i>this</i> dynamic Bloom filter. 273 */ 274 private void addRow() { 275 BloomFilter[] tmp = new BloomFilter[matrix.length + 1]; 276 277 for (int i = 0; i < matrix.length; i++) { 278 tmp[i] = matrix[i]; 279 } 280 281 tmp[tmp.length-1] = new BloomFilter(vectorSize, nbHash, hashType); 282 283 matrix = tmp; 284 } 285 286 /** 287 * Returns the active standard Bloom filter in <i>this</i> dynamic Bloom filter. 288 * @return BloomFilter The active standard Bloom filter. 289 * <code>Null</code> otherwise. 290 */ 291 private BloomFilter getActiveStandardBF() { 292 if (currentNbRecord >= nr) { 293 return null; 294 } 295 296 return matrix[matrix.length - 1]; 297 } 298}