...

Source file src/github.com/syndtr/goleveldb/leveldb/filter/bloom.go

Documentation: github.com/syndtr/goleveldb/leveldb/filter

     1  // Copyright (c) 2012, Suryandaru Triandana <syndtr@gmail.com>
     2  // All rights reserved.
     3  //
     4  // Use of this source code is governed by a BSD-style license that can be
     5  // found in the LICENSE file.
     6  
     7  package filter
     8  
     9  import (
    10  	"github.com/syndtr/goleveldb/leveldb/util"
    11  )
    12  
    13  func bloomHash(key []byte) uint32 {
    14  	return util.Hash(key, 0xbc9f1d34)
    15  }
    16  
    17  type bloomFilter int
    18  
    19  // Name: The bloom filter serializes its parameters and is backward compatible
    20  // with respect to them. Therefor, its parameters are not added to its
    21  // name.
    22  func (bloomFilter) Name() string {
    23  	return "leveldb.BuiltinBloomFilter"
    24  }
    25  
    26  func (f bloomFilter) Contains(filter, key []byte) bool {
    27  	nBytes := len(filter) - 1
    28  	if nBytes < 1 {
    29  		return false
    30  	}
    31  	nBits := uint32(nBytes * 8)
    32  
    33  	// Use the encoded k so that we can read filters generated by
    34  	// bloom filters created using different parameters.
    35  	k := filter[nBytes]
    36  	if k > 30 {
    37  		// Reserved for potentially new encodings for short bloom filters.
    38  		// Consider it a match.
    39  		return true
    40  	}
    41  
    42  	kh := bloomHash(key)
    43  	delta := (kh >> 17) | (kh << 15) // Rotate right 17 bits
    44  	for j := uint8(0); j < k; j++ {
    45  		bitpos := kh % nBits
    46  		if (uint32(filter[bitpos/8]) & (1 << (bitpos % 8))) == 0 {
    47  			return false
    48  		}
    49  		kh += delta
    50  	}
    51  	return true
    52  }
    53  
    54  func (f bloomFilter) NewGenerator() FilterGenerator {
    55  	// Round down to reduce probing cost a little bit.
    56  	k := uint8(f * 69 / 100) // 0.69 =~ ln(2)
    57  	if k < 1 {
    58  		k = 1
    59  	} else if k > 30 {
    60  		k = 30
    61  	}
    62  	return &bloomFilterGenerator{
    63  		n: int(f),
    64  		k: k,
    65  	}
    66  }
    67  
    68  type bloomFilterGenerator struct {
    69  	n int
    70  	k uint8
    71  
    72  	keyHashes []uint32
    73  }
    74  
    75  func (g *bloomFilterGenerator) Add(key []byte) {
    76  	// Use double-hashing to generate a sequence of hash values.
    77  	// See analysis in [Kirsch,Mitzenmacher 2006].
    78  	g.keyHashes = append(g.keyHashes, bloomHash(key))
    79  }
    80  
    81  func (g *bloomFilterGenerator) Generate(b Buffer) {
    82  	// Compute bloom filter size (in both bits and bytes)
    83  	nBits := uint32(len(g.keyHashes) * g.n)
    84  	// For small n, we can see a very high false positive rate.  Fix it
    85  	// by enforcing a minimum bloom filter length.
    86  	if nBits < 64 {
    87  		nBits = 64
    88  	}
    89  	nBytes := (nBits + 7) / 8
    90  	nBits = nBytes * 8
    91  
    92  	dest := b.Alloc(int(nBytes) + 1)
    93  	dest[nBytes] = g.k
    94  	for _, kh := range g.keyHashes {
    95  		delta := (kh >> 17) | (kh << 15) // Rotate right 17 bits
    96  		for j := uint8(0); j < g.k; j++ {
    97  			bitpos := kh % nBits
    98  			dest[bitpos/8] |= (1 << (bitpos % 8))
    99  			kh += delta
   100  		}
   101  	}
   102  
   103  	g.keyHashes = g.keyHashes[:0]
   104  }
   105  
   106  // NewBloomFilter creates a new initialized bloom filter for given
   107  // bitsPerKey.
   108  //
   109  // Since bitsPerKey is persisted individually for each bloom filter
   110  // serialization, bloom filters are backwards compatible with respect to
   111  // changing bitsPerKey. This means that no big performance penalty will
   112  // be experienced when changing the parameter. See documentation for
   113  // opt.Options.Filter for more information.
   114  func NewBloomFilter(bitsPerKey int) Filter {
   115  	return bloomFilter(bitsPerKey)
   116  }
   117  

View as plain text