diff --git a/Makefile.in b/Makefile.in
index 73326b1..e54fbd5 100644
--- a/Makefile.in
+++ b/Makefile.in
@@ -36,6 +36,7 @@ all: $(PROGRAMS)
SOURCE=\
base/base64.cc \
+ base/endian_utils.cc \
base/error_state.cc \
\
caching/hint_array.cc \
@@ -46,8 +47,9 @@ SOURCE=\
caching/restore_emitter.cc \
caching/xml_format.cc \
\
+ era/era_detail.cc \
+ \
persistent-data/checksum.cc \
- persistent-data/endian_utils.cc \
persistent-data/error_set.cc \
persistent-data/file_utils.cc \
persistent-data/hex_dump.cc \
@@ -55,6 +57,7 @@ SOURCE=\
persistent-data/transaction_manager.cc \
\
persistent-data/data-structures/bitset.cc \
+ persistent-data/data-structures/bloom_filter.cc \
persistent-data/data-structures/btree.cc \
\
persistent-data/space_map.cc \
@@ -153,8 +156,9 @@ THIN_REPAIR_SOURCE=$(SOURCE)
THIN_RESTORE_SOURCE=$(SOURCE)
THIN_CHECK_SOURCE=\
base/error_state.cc \
+ base/endian_utils.cc \
+ \
persistent-data/checksum.cc \
- persistent-data/endian_utils.cc \
persistent-data/error_set.cc \
persistent-data/file_utils.cc \
persistent-data/hex_dump.cc \
@@ -165,6 +169,7 @@ THIN_CHECK_SOURCE=\
persistent-data/space-maps/recursive.cc \
persistent-data/space-maps/careful_alloc.cc \
persistent-data/transaction_manager.cc \
+ \
thin-provisioning/device_tree.cc \
thin-provisioning/mapping_tree.cc \
thin-provisioning/metadata.cc \
@@ -172,8 +177,9 @@ THIN_CHECK_SOURCE=\
thin-provisioning/superblock.cc
THIN_RMAP_SOURCE=\
+ base/endian_utils.cc \
+ \
persistent-data/checksum.cc \
- persistent-data/endian_utils.cc \
persistent-data/error_set.cc \
persistent-data/file_utils.cc \
persistent-data/hex_dump.cc \
@@ -232,8 +238,9 @@ thin_metadata_size: thin-provisioning/thin_metadata_size.o
CACHE_CHECK_SOURCE=\
base/base64.cc \
base/error_state.cc \
+ base/endian_utils.cc \
+ \
persistent-data/checksum.cc \
- persistent-data/endian_utils.cc \
persistent-data/error_set.cc \
persistent-data/file_utils.cc \
persistent-data/hex_dump.cc \
@@ -245,6 +252,7 @@ CACHE_CHECK_SOURCE=\
persistent-data/space-maps/recursive.cc \
persistent-data/space-maps/careful_alloc.cc \
persistent-data/transaction_manager.cc \
+ \
caching/hint_array.cc \
caching/superblock.cc \
caching/mapping_array.cc \
diff --git a/persistent-data/endian_utils.cc b/base/endian_utils.cc
similarity index 100%
rename from persistent-data/endian_utils.cc
rename to base/endian_utils.cc
diff --git a/persistent-data/endian_utils.h b/base/endian_utils.h
similarity index 97%
rename from persistent-data/endian_utils.h
rename to base/endian_utils.h
index 39276e6..3ad75ae 100644
--- a/persistent-data/endian_utils.h
+++ b/base/endian_utils.h
@@ -16,8 +16,8 @@
// with thin-provisioning-tools. If not, see
// .
-#ifndef ENDIAN_H
-#define ENDIAN_H
+#ifndef BASE_ENDIAN_H
+#define BASE_ENDIAN_H
#include
#include
@@ -25,7 +25,6 @@
//----------------------------------------------------------------
-// FIXME: rename to endian
namespace base {
// These are just little wrapper types to make the compiler
diff --git a/caching/mapping_array.cc b/caching/mapping_array.cc
index d31c2c9..c6af4ef 100644
--- a/caching/mapping_array.cc
+++ b/caching/mapping_array.cc
@@ -1,5 +1,5 @@
+#include "base/endian_utils.h"
#include "caching/mapping_array.h"
-#include "persistent-data/endian_utils.h"
#include
diff --git a/caching/metadata.h b/caching/metadata.h
index f15543e..46c2b93 100644
--- a/caching/metadata.h
+++ b/caching/metadata.h
@@ -1,10 +1,11 @@
#ifndef CACHE_METADATA_H
#define CACHE_METADATA_H
+#include "base/endian_utils.h"
+
#include "persistent-data/block.h"
#include "persistent-data/data-structures/array.h"
#include "persistent-data/data-structures/bitset.h"
-#include "persistent-data/endian_utils.h"
#include "persistent-data/space-maps/disk.h"
#include "persistent-data/transaction_manager.h"
diff --git a/caching/superblock.h b/caching/superblock.h
index 2c2cf30..d1a24aa 100644
--- a/caching/superblock.h
+++ b/caching/superblock.h
@@ -1,7 +1,7 @@
#ifndef CACHE_SUPERBLOCK_H
#define CACHE_SUPERBLOCK_H
-#include "persistent-data/endian_utils.h"
+#include "base/endian_utils.h"
#include "persistent-data/data-structures/btree.h"
#include
diff --git a/era/era_detail.cc b/era/era_detail.cc
new file mode 100644
index 0000000..43fbc51
--- /dev/null
+++ b/era/era_detail.cc
@@ -0,0 +1,36 @@
+#include "era/era_detail.h"
+
+#include
+
+using namespace base;
+using namespace era;
+
+//----------------------------------------------------------------
+
+namespace {
+ le32 pack_hash_detail(uint32_t hash1, uint32_t hash2, uint32_t nr_probes) {
+ throw std::runtime_error("not implemented");
+ }
+
+ void unpack_hash_detail(le32 packed, uint32_t &hash1, uint32_t &hash2, uint32_t &nr_probes) {
+ throw std::runtime_error("not implemented");
+ }
+}
+
+void
+era_detail_traits::unpack(disk_type const &disk, value_type &value)
+{
+ value.nr_bits = to_cpu(disk.nr_bits);
+ unpack_hash_detail(disk.hash_fns_and_probes, value.hash1, value.hash2, value.nr_probes);
+ value.bloom_root = to_cpu(disk.bloom_root);
+}
+
+void
+era_detail_traits::pack(value_type const &value, disk_type &disk)
+{
+ disk.nr_bits = to_disk(value.nr_bits);
+ disk.hash_fns_and_probes = pack_hash_detail(value.hash1, value.hash2, value.nr_probes);
+ disk.bloom_root = to_disk(value.bloom_root);
+}
+
+//----------------------------------------------------------------
diff --git a/era/era_detail.h b/era/era_detail.h
new file mode 100644
index 0000000..adaec57
--- /dev/null
+++ b/era/era_detail.h
@@ -0,0 +1,36 @@
+#ifndef ERA_DETAIL_H
+#define ERA_DETAIL_H
+
+#include "base/endian_utils.h"
+
+//----------------------------------------------------------------
+
+namespace era {
+ struct era_detail_disk {
+ base::le32 nr_bits;
+ base::le32 hash_fns_and_probes;
+ base::le64 bloom_root;
+ } __attribute__ ((packed));
+
+ struct era_detail {
+ uint32_t nr_bits;
+
+ uint32_t hash1;
+ uint32_t hash2;
+ uint32_t nr_probes;
+
+ uint64_t bloom_root;
+ };
+
+ struct era_detail_traits {
+ typedef era_detail_disk disk_type;
+ typedef era_detail value_type;
+
+ static void unpack(disk_type const &disk, value_type &value);
+ static void pack(value_type const &value, disk_type &disk);
+ };
+}
+
+//----------------------------------------------------------------
+
+#endif
diff --git a/persistent-data/data-structures/array_block.h b/persistent-data/data-structures/array_block.h
index 1bb3d4c..638e164 100644
--- a/persistent-data/data-structures/array_block.h
+++ b/persistent-data/data-structures/array_block.h
@@ -19,7 +19,7 @@
#ifndef ARRAY_BLOCK_H
#define ARRAY_BLOCK_H
-#include "persistent-data/endian_utils.h"
+#include "base/endian_utils.h"
//----------------------------------------------------------------
diff --git a/persistent-data/data-structures/bitset.cc b/persistent-data/data-structures/bitset.cc
index 1570fc1..6bab965 100644
--- a/persistent-data/data-structures/bitset.cc
+++ b/persistent-data/data-structures/bitset.cc
@@ -46,6 +46,10 @@ namespace persistent_data {
return array_.get_root();
}
+ unsigned get_nr_bits() const {
+ return nr_bits_;
+ }
+
void grow(unsigned new_nr_bits, bool default_value) {
pad_last_block(default_value);
resize_array(new_nr_bits, default_value);
@@ -184,7 +188,7 @@ namespace persistent_data {
if (n >= nr_bits_) {
std::ostringstream str;
str << "bitset index out of bounds ("
- << n << " >= " << nr_bits_ << endl;
+ << n << " >= " << nr_bits_ << ")";
throw runtime_error(str.str());
}
}
@@ -214,6 +218,12 @@ persistent_data::bitset::get_root() const
return impl_->get_root();
}
+unsigned
+bitset::get_nr_bits() const
+{
+ return impl_->get_nr_bits();
+}
+
void
persistent_data::bitset::grow(unsigned new_nr_bits, bool default_value)
{
diff --git a/persistent-data/data-structures/bitset.h b/persistent-data/data-structures/bitset.h
index a6e90ae..70688aa 100644
--- a/persistent-data/data-structures/bitset.h
+++ b/persistent-data/data-structures/bitset.h
@@ -16,8 +16,8 @@
// with thin-provisioning-tools. If not, see
// .
-#ifndef BITSET_H
-#define BITSET_H
+#ifndef PERSISTENT_DATA_DATA_STRUCTURES_BITSET_H
+#define PERSISTENT_DATA_DATA_STRUCTURES_BITSET_H
#include "persistent-data/run.h"
@@ -54,6 +54,7 @@ namespace persistent_data {
bitset(tm_ptr tm);
bitset(tm_ptr tm, block_address root, unsigned nr_bits);
block_address get_root() const;
+ unsigned get_nr_bits() const;
void grow(unsigned new_nr_bits, bool default_value);
void destroy();
diff --git a/persistent-data/data-structures/bloom_filter.cc b/persistent-data/data-structures/bloom_filter.cc
new file mode 100644
index 0000000..3ca6ffb
--- /dev/null
+++ b/persistent-data/data-structures/bloom_filter.cc
@@ -0,0 +1,146 @@
+#include "persistent-data/data-structures/bloom_filter.h"
+
+#include
+
+using namespace persistent_data;
+
+//----------------------------------------------------------------
+
+namespace {
+ static const uint64_t m1 = 0x9e37fffffffc0001UL;
+ static const unsigned bits = 18;
+
+ static uint32_t hash1(block_address const &b) {
+ return (b * m1) >> bits;
+ }
+
+ static uint32_t hash2(block_address const &b) {
+ uint32_t n = b;
+
+ n = n ^ (n >> 16);
+ n = n * 0x85ebca6bu;
+ n = n ^ (n >> 13);
+ n = n * 0xc2b2ae35u;
+ n = n ^ (n >> 16);
+
+ return n;
+ }
+
+ void check_power_of_two(unsigned nr_bits) {
+ if (nr_bits & (nr_bits - 1))
+ throw std::runtime_error("bloom filter needs a power of two nr_bits");
+ }
+}
+
+//----------------------------------------------------------------
+
+bloom_filter::bloom_filter(tm_ptr tm,
+ unsigned nr_bits, unsigned nr_probes)
+ : tm_(tm),
+ bits_(tm),
+ nr_probes_(nr_probes),
+ mask_(nr_bits - 1)
+{
+ check_power_of_two(nr_bits);
+ bits_.grow(nr_bits, false);
+}
+
+bloom_filter::bloom_filter(tm_ptr tm, block_address root,
+ unsigned nr_bits, unsigned nr_probes)
+ : tm_(tm),
+ bits_(tm, root, nr_bits),
+ nr_probes_(nr_probes),
+ mask_(nr_bits - 1)
+{
+ check_power_of_two(nr_bits);
+}
+
+block_address
+bloom_filter::get_root() const
+{
+ return bits_.get_root();
+}
+
+bool
+bloom_filter::test(uint64_t b)
+{
+ vector probes(nr_probes_);
+ fill_probes(b, probes);
+
+ for (unsigned p = 0; p < nr_probes_; p++)
+ if (!bits_.get(probes[p]))
+ return false;
+
+ return true;
+}
+
+void
+bloom_filter::set(uint64_t b)
+{
+ vector probes(nr_probes_);
+ fill_probes(b, probes);
+
+ for (unsigned p = 0; p < nr_probes_; p++)
+ bits_.set(probes[p], true);
+}
+
+void
+bloom_filter::flush()
+{
+ bits_.flush();
+}
+
+void
+bloom_filter::fill_probes(block_address b, vector &probes) const
+{
+ uint32_t h1 = hash1(b) & mask_;
+ uint32_t h2 = hash2(b) & mask_;
+
+ probes[0] = h1;
+ for (unsigned p = 1; p < nr_probes_; p++) {
+ h1 = (h1 + h2) & mask_;
+ h2 = (h2 + p) & mask_;
+ probes[p] = h1;
+ }
+}
+
+void
+bloom_filter::print_debug(ostream &out)
+{
+ print_residency(out);
+
+ map runs;
+
+ for (unsigned i = 0; i < bits_.get_nr_bits();) {
+ bool v = bits_.get(i);
+ unsigned run_length = 1;
+
+ while (++i < bits_.get_nr_bits() && bits_.get(i) == v)
+ run_length++;
+
+ map::iterator it = runs.find(run_length);
+ if (it != runs.end())
+ it->second++;
+ else
+ runs.insert(make_pair(run_length, 1));
+ }
+
+ {
+ map::const_iterator it;
+ for (it = runs.begin(); it != runs.end(); ++it)
+ out << it->first << ": " << it->second << endl;
+ }
+}
+
+void
+bloom_filter::print_residency(ostream &out)
+{
+ unsigned count = 0;
+ for (unsigned i = 0; i < bits_.get_nr_bits(); i++)
+ if (bits_.get(i))
+ count++;
+
+ out << "residency: " << count << "/" << bits_.get_nr_bits() << endl;
+}
+
+//----------------------------------------------------------------
diff --git a/persistent-data/data-structures/bloom_filter.h b/persistent-data/data-structures/bloom_filter.h
new file mode 100644
index 0000000..6703a7d
--- /dev/null
+++ b/persistent-data/data-structures/bloom_filter.h
@@ -0,0 +1,47 @@
+#ifndef PERSISTENT_DATA_DATA_STRUCTURES_BLOOM_FILTER_H
+#define PERSISTENT_DATA_DATA_STRUCTURES_BLOOM_FILTER_H
+
+#include "persistent-data/transaction_manager.h"
+#include "persistent-data/data-structures/bitset.h"
+
+#include
+
+//----------------------------------------------------------------
+
+namespace persistent_data {
+ class bloom_filter {
+ public:
+ typedef boost::shared_ptr ptr;
+ typedef typename persistent_data::transaction_manager::ptr tm_ptr;
+
+ // nr_bits must be a power of two
+ bloom_filter(tm_ptr tm,
+ unsigned nr_bits, unsigned nr_probes);
+
+ bloom_filter(tm_ptr tm, block_address root,
+ unsigned nr_bits_power, unsigned nr_probes);
+
+ block_address get_root() const;
+
+ bool test(uint64_t b); // not const due to caching effects in bitset
+ void set(uint64_t b);
+ void flush();
+
+ void print_debug(ostream &out);
+
+ private:
+ void print_residency(ostream &out);
+
+ void fill_probes(block_address b, vector &probes) const;
+
+ tm_ptr tm_;
+ unsigned nr_bits_;
+ persistent_data::bitset bits_;
+ unsigned nr_probes_;
+ uint64_t mask_;
+ };
+}
+
+//----------------------------------------------------------------
+
+#endif
diff --git a/persistent-data/data-structures/btree.h b/persistent-data/data-structures/btree.h
index 26e687a..93c3284 100644
--- a/persistent-data/data-structures/btree.h
+++ b/persistent-data/data-structures/btree.h
@@ -19,7 +19,7 @@
#ifndef BTREE_H
#define BTREE_H
-#include "persistent-data/endian_utils.h"
+#include "base/endian_utils.h"
#include "persistent-data/transaction_manager.h"
#include "persistent-data/data-structures/ref_counter.h"
diff --git a/persistent-data/space-maps/disk.cc b/persistent-data/space-maps/disk.cc
index df3ed43..0c851f6 100644
--- a/persistent-data/space-maps/disk.cc
+++ b/persistent-data/space-maps/disk.cc
@@ -16,6 +16,8 @@
// with thin-provisioning-tools. If not, see
// .
+#include "base/endian_utils.h"
+
#include "persistent-data/space-maps/disk.h"
#include "persistent-data/space-maps/disk_structures.h"
#include "persistent-data/space-maps/recursive.h"
@@ -23,7 +25,6 @@
#include "persistent-data/data-structures/btree_damage_visitor.h"
#include "persistent-data/checksum.h"
-#include "persistent-data/endian_utils.h"
#include "persistent-data/math_utils.h"
#include "persistent-data/transaction_manager.h"
diff --git a/persistent-data/space-maps/disk_structures.h b/persistent-data/space-maps/disk_structures.h
index a92f490..1429d36 100644
--- a/persistent-data/space-maps/disk_structures.h
+++ b/persistent-data/space-maps/disk_structures.h
@@ -19,7 +19,7 @@
#ifndef SPACE_MAP_DISK_STRUCTURES_H
#define SPACE_MAP_DISK_STRUCTURES_H
-#include "persistent-data/endian_utils.h"
+#include "base/endian_utils.h"
// FIXME: what's this included for?
#include "persistent-data/data-structures/btree.h"
diff --git a/thin-provisioning/metadata.h b/thin-provisioning/metadata.h
index 9749acb..c0913a0 100644
--- a/thin-provisioning/metadata.h
+++ b/thin-provisioning/metadata.h
@@ -19,9 +19,10 @@
#ifndef METADATA_LL_H
#define METADATA_LL_H
+#include "base/endian_utils.h"
+
#include "persistent-data/block.h"
#include "persistent-data/data-structures/btree.h"
-#include "persistent-data/endian_utils.h"
#include "persistent-data/space-maps/disk.h"
#include "persistent-data/transaction_manager.h"
diff --git a/thin-provisioning/superblock.h b/thin-provisioning/superblock.h
index d6d78e3..3a3d90a 100644
--- a/thin-provisioning/superblock.h
+++ b/thin-provisioning/superblock.h
@@ -1,8 +1,9 @@
#ifndef THIN_SUPERBLOCK_H
#define THIN_SUPERBLOCK_H
+#include "base/endian_utils.h"
+
#include "persistent-data/block.h"
-#include "persistent-data/endian_utils.h"
#include "persistent-data/data-structures/ref_counter.h"
//----------------------------------------------------------------
diff --git a/unit-tests/Makefile.in b/unit-tests/Makefile.in
index d2506ec..585ddff 100644
--- a/unit-tests/Makefile.in
+++ b/unit-tests/Makefile.in
@@ -50,6 +50,7 @@ TEST_SOURCE=\
unit-tests/base64_t.cc \
unit-tests/bitset_t.cc \
unit-tests/block_t.cc \
+ unit-tests/bloom_filter_t.cc \
unit-tests/btree_t.cc \
unit-tests/btree_counter_t.cc \
unit-tests/btree_damage_visitor_t.cc \
diff --git a/unit-tests/bloom_filter_t.cc b/unit-tests/bloom_filter_t.cc
new file mode 100644
index 0000000..bb879ed
--- /dev/null
+++ b/unit-tests/bloom_filter_t.cc
@@ -0,0 +1,153 @@
+#include "gmock/gmock.h"
+#include "persistent-data/data-structures/bloom_filter.h"
+#include "persistent-data/transaction_manager.h"
+#include "persistent-data/space-maps/core.h"
+#include "persistent-data/data-structures/array_block.h"
+#include "test_utils.h"
+
+#include
+#include
+#include
+#include
+#include
+#include
+
+using namespace persistent_data;
+using namespace std;
+using namespace test;
+using namespace testing;
+
+//----------------------------------------------------------------
+
+namespace {
+ block_address const BLOCK_SIZE = 4096;
+ block_address const NR_BLOCKS = 102400;
+ block_address const SUPERBLOCK = 0;
+
+ //--------------------------------
+
+ class BloomFilterTests : public Test {
+ public:
+ BloomFilterTests()
+ : bm_(create_bm(NR_BLOCKS)),
+ sm_(setup_core_map()),
+ tm_(new transaction_manager(bm_, sm_)) {
+ }
+
+ set generate_random_blocks(unsigned count,
+ block_address max = std::numeric_limits::max()) {
+ set r;
+
+ using namespace boost::random;
+
+ mt19937 rng;
+ uniform_int_distribution uniform_dist(0, max);
+
+ while (r.size() < count) {
+ block_address b = uniform_dist(rng);
+ r.insert(b);
+ }
+
+ return r;
+ }
+
+ void commit() {
+ block_manager<>::write_ref superblock(bm_->superblock(SUPERBLOCK));
+ }
+
+ space_map::ptr setup_core_map() {
+ space_map::ptr sm(new core_map(NR_BLOCKS));
+ sm->inc(SUPERBLOCK);
+ return sm;
+ }
+
+ with_temp_directory dir_;
+ block_manager<>::ptr bm_;
+ space_map::ptr sm_;
+ transaction_manager::ptr tm_;
+ };
+}
+
+//----------------------------------------------------------------
+
+TEST_F(BloomFilterTests, nr_bits_must_be_a_power_of_two)
+{
+ ASSERT_THROW(bloom_filter f(tm_, 1023, 3), runtime_error);
+}
+
+TEST_F(BloomFilterTests, can_create_a_bloom_filter)
+{
+ bloom_filter f(tm_, 1024, 3);
+}
+
+TEST_F(BloomFilterTests, no_false_negatives)
+{
+ bloom_filter f(tm_, 4096, 6);
+ set bs = generate_random_blocks(1000);
+
+ set::const_iterator it;
+ for (it = bs.begin(); it != bs.end(); ++it)
+ f.set(*it);
+
+ for (it = bs.begin(); it != bs.end(); ++it)
+ ASSERT_THAT(f.test(*it), Eq(true));
+}
+
+TEST_F(BloomFilterTests, reload_works)
+{
+ block_address root;
+ set bs = generate_random_blocks(1000);
+
+ {
+ bloom_filter f(tm_, 4096, 6);
+
+ set::const_iterator it;
+ for (it = bs.begin(); it != bs.end(); ++it)
+ f.set(*it);
+
+ f.flush();
+ root = f.get_root();
+ commit();
+ }
+
+ {
+ bloom_filter f(tm_, root, 4096, 6);
+
+ set::const_iterator it;
+ for (it = bs.begin(); it != bs.end(); ++it)
+ ASSERT_THAT(f.test(*it), Eq(true));
+ }
+}
+
+TEST_F(BloomFilterTests, count_false_positives)
+{
+ block_address nr_blocks = 1024 * 1024;
+ block_address written_blocks = nr_blocks / 100;
+
+ unsigned shift = 1;
+
+ while ((1ull << shift) < (16 * written_blocks))
+ shift++;
+ cerr << "bitset size: " << ((1 << shift) / (8 * 1024)) << "k" << endl;
+
+ bloom_filter f(tm_, 1 << shift, 6);
+
+ set bs = generate_random_blocks(written_blocks, nr_blocks);
+ set::const_iterator it;
+
+ for (it = bs.begin(); it != bs.end(); ++it)
+ f.set(*it);
+
+ // f.print_debug(cerr);
+
+ unsigned count = 0;
+ for (unsigned i = 0; i < nr_blocks; i++)
+ if (!bs.count(i) && f.test(i))
+ count++;
+
+ cerr << count << " false positives out of " << nr_blocks << ", "
+ << static_cast(count * 100) / static_cast(nr_blocks)
+ << "%" << endl;
+}
+
+//----------------------------------------------------------------