#include"BloomFilter.h"
#include"Hash.h"
//uint32_t Hash(const char* data,size_t n,uint32_t seed);
static uint32_t BloomHash(const string& key){
return Hash(key.data(), key.size(), 0xbc9f1d34);
}
//需要考虑string里面是否有data这样的返回数;
BloomFilter::BloomFilter(int bits_per_key){
k_ = static_cast<size_t>(bits_per_key*0.69);
if(k_<1) k_ = 1;
if(k_>30) k_ =30;
}
const char* BloomFilter::Name() const {
return "leveldb.BuiltinBloomFilter2";
}
void BloomFilter::CreateFilter(const string* key,int n, std::string* dst)const {
size_t bits = n*bits_per_key_;
//首先需要知道全部的字节数
if(bits < 64) bits = 64;
size_t bytes = (bits+7)/8 ; //可以得到实际的位数;
bits = bytes*8;
const size_t init_size = dst->size(); //这样是因为需要为dst实际的申请内存;
dst->resize(init_size + bytes, 0);
dst->push_back(static_cast<char>(k_)); //这个是实际的k_么? ?
//如何访问dst,转化为char*
char* array = &(*dst)[init_size]; //获得起始地址;
//不允许变动的数据一定写成const;
for(int i=0; i<n ; i++){
uint32_t h = BloomHash(key[i]);
const uint32_t delma = (h>>17) | (h<<15);
for(size_t j=0; j<k_; j++){
const uint32_t bitpos = h% bits; //key所在的实际位置;
array[bitpos/8] |= 1<<(bitpos%8);
h +=delma;
}
}
}
bool BloomFilter::KeyMayMatch(const string& key, const string& bloom_filter) const{
const size_t len = bloom_filter.size();
if(len<2) return false;
const char* array = bloom_filter.data();
const size_t bits = (len - 1) * 8; //第一个位置存放的是k_;
const size_t k = array[len-1]; //记录的实际位置;
if (k > 30) {
// Reserved for potentially new encodings for short bloom filters.
// Consider it a match.
return true;
}
uint32_t h = BloomHash(key);
const uint32_t delta = (h >> 17) | (h << 15); // Rotate right 17 bits
for (size_t j = 0; j < k; j++) {
const uint32_t bitpos = h % bits;
if ((array[bitpos/8] & (1 << (bitpos % 8))) == 0) return false;
h += delta;
}
return true;
}