From a6839795853d186a1a905c49b30526cd1f5072e4 Mon Sep 17 00:00:00 2001 From: Joe Thornber Date: Fri, 22 Jul 2011 16:09:56 +0100 Subject: [PATCH] space map disk --- .gitignore | 4 +- Makefile | 35 ++-- block.h | 12 ++ block.tcc | 64 ++++++- block_t.cc | 28 ++- btree.h | 33 +++- btree.tcc | 84 ++++++--- btree_t.cc | 4 +- endian.cc | 46 +++++ endian.h | 10 +- endian_t.cc | 62 +++++++ main.cc | 5 +- math.h | 16 ++ metadata.cc | 197 +++++++++++---------- metadata.h | 191 +++++++++++--------- metadata_disk_structures.cc | 95 ++++++++++ metadata_disk_structures.h | 117 +++++++++++++ metadata_t.cc | 41 +++++ space_map.h | 4 +- space_map_disk.cc | 7 + space_map_disk.h | 338 ++++++++++++++++++++++++++++++++++++ space_map_disk_structures.h | 98 +++++++++++ space_map_disk_t.cc | 108 ++++++++++++ space_map_t.cc | 5 - transaction_manager.h | 6 +- 25 files changed, 1389 insertions(+), 221 deletions(-) create mode 100644 endian.cc create mode 100644 endian_t.cc create mode 100644 math.h create mode 100644 metadata_disk_structures.cc create mode 100644 metadata_disk_structures.h create mode 100644 metadata_t.cc create mode 100644 space_map_disk.cc create mode 100644 space_map_disk.h create mode 100644 space_map_disk_structures.h create mode 100644 space_map_disk_t.cc diff --git a/.gitignore b/.gitignore index 8dc898b..4f9a738 100644 --- a/.gitignore +++ b/.gitignore @@ -1,3 +1,5 @@ *~ *.o -multisnap_display \ No newline at end of file +*_t +*.d +test.data \ No newline at end of file diff --git a/Makefile b/Makefile index c55c45b..a20c7ae 100644 --- a/Makefile +++ b/Makefile @@ -1,24 +1,30 @@ SOURCE=\ - metadata.cc + endian.cc \ + metadata.cc \ + metadata_disk_structures.cc \ + space_map_disk.cc \ -PROGRAM_SOURCE=\ +TEST_SOURCE=\ block_t.cc \ btree_t.cc \ + endian_t.cc \ + metadata_t.cc \ space_map_t.cc \ - transaction_manager_t.cc + space_map_disk_t.cc \ + transaction_manager_t.cc \ OBJECTS=$(subst .cc,.o,$(SOURCE)) +TEST_PROGRAMS=$(subst .cc,,$(TEST_SOURCE)) CPPFLAGS=-Wall -std=c++0x -g INCLUDES= LIBS=-lstdc++ -.PHONEY: unit-tests +.PHONEY: unit-tests test-programs -unit-tests: block_t btree_t space_map_t transaction_manager_t - ./block_t - ./btree_t - ./space_map_t - ./transaction_manager_t +test-programs: $(TEST_PROGRAMS) + +unit-tests: $(TEST_PROGRAMS) + for p in $(TEST_PROGRAMS); do echo Running $$p; ./$$p; done .SUFFIXES: .cc .o .d @@ -42,8 +48,17 @@ btree_t: btree_t.o space_map_t: space_map_t.o g++ $(CPPFLAGS) -o $@ $+ $(LIBS) +space_map_disk_t: space_map_disk_t.o $(OBJECTS) + g++ $(CPPFLAGS) -o $@ $+ $(LIBS) + transaction_manager_t: transaction_manager_t.o g++ $(CPPFLAGS) -o $@ $+ $(LIBS) +metadata_t: metadata_t.o $(OBJECTS) + g++ $(CPPFLAGS) -o $@ $+ $(LIBS) + +endian_t: endian_t.o $(OBJECTS) + g++ $(CPPFLAGS) -o $@ $+ $(LIBS) + include $(subst .cc,.d,$(SOURCE)) -include $(subst .cc,.d,$(PROGRAM_SOURCE)) \ No newline at end of file +include $(subst .cc,.d,$(TEST_SOURCE)) \ No newline at end of file diff --git a/block.h b/block.h index 5437cf8..879f82f 100644 --- a/block.h +++ b/block.h @@ -156,6 +156,8 @@ namespace persistent_data { // held. void flush(); + block_address get_nr_blocks() const; + private: void check(block_address b) const; @@ -165,11 +167,21 @@ namespace persistent_data { void read_release(block *b) const; void write_release(block *b); + enum lock_type { + READ_LOCK, + WRITE_LOCK + }; + + void register_lock(block_address b, lock_type t) const; + void unregister_lock(block_address b, lock_type t) const; + int fd_; block_address nr_blocks_; mutable unsigned lock_count_; mutable unsigned superblock_count_; mutable unsigned ordinary_count_; + + mutable std::map > held_locks_; }; } diff --git a/block.tcc b/block.tcc index 9c195c1..1979bd2 100644 --- a/block.tcc +++ b/block.tcc @@ -6,7 +6,6 @@ #include #include -#include #include using namespace boost; @@ -76,10 +75,11 @@ block_manager::read_lock(block_address location) const buffer buf; read_buffer(location, buf); - + register_lock(location, READ_LOCK); return read_ref( typename block::ptr( - new block(location, buf, lock_count_, ordinary_count_))); + new block(location, buf, lock_count_, ordinary_count_), + bind(&block_manager::read_release, this, _1))); } template @@ -97,6 +97,7 @@ block_manager::write_lock(block_address location) buffer buf; read_buffer(location, buf); + register_lock(location, WRITE_LOCK); return write_ref( typename block::ptr( new block(location, buf, lock_count_, ordinary_count_), @@ -113,6 +114,7 @@ block_manager::write_lock_zero(block_address location) zero_buffer(buf); typename block::ptr b(new block(location, buf, lock_count_, ordinary_count_), bind(&block_manager::write_release, this, _1)); + register_lock(location, WRITE_LOCK); return write_ref(b); } @@ -125,7 +127,9 @@ block_manager::read_lock(block_address location, buffer buf; read_buffer(location, buf); - typename block::ptr b(new block(location, buf, lock_count_, ordinary_count_, false, v)); + typename block::ptr b(new block(location, buf, lock_count_, ordinary_count_, false, v), + bind(&block_manager::read_release, this, _1)); + register_lock(location, READ_LOCK); return read_ref(b); } @@ -148,6 +152,7 @@ block_manager::write_lock(block_address location, read_buffer(location, buf); typename block::ptr b(new block(location, buf, lock_count_, ordinary_count_, false, v), bind(&block_manager::write_release, this, _1)); + register_lock(location, WRITE_LOCK); return write_ref(b); } @@ -162,6 +167,7 @@ block_manager::write_lock_zero(block_address location, zero_buffer(buf); typename block::ptr b(new block(location, buf, lock_count_, ordinary_count_, false, v), bind(&block_manager::write_release, this, _1)); + register_lock(location, WRITE_LOCK); return write_ref(b); } @@ -178,6 +184,7 @@ block_manager::superblock(block_address location) read_buffer(location, buf); typename block::ptr b(new block(location, buf, lock_count_, superblock_count_, true), bind(&block_manager::write_release, this, _1)); + register_lock(location, WRITE_LOCK); return write_ref(b); } @@ -194,6 +201,7 @@ block_manager::superblock_zero(block_address location) zero_buffer(buf); typename block::ptr b(new block(location, buf, lock_count_, superblock_count_, true), bind(&block_manager::write_release, this, _1)); + register_lock(location, WRITE_LOCK); return write_ref(b); } @@ -211,6 +219,7 @@ block_manager::superblock(block_address location, read_buffer(location, buf); typename block::ptr b(new block(location, buf, lock_count_, superblock_count_, true, v), bind(&block_manager::write_release, this, _1)); + register_lock(location, WRITE_LOCK); return write_ref(b); } @@ -228,6 +237,7 @@ block_manager::superblock_zero(block_address location, zero_buffer(buf); typename block::ptr b(new block(location, buf, lock_count_, superblock_count_, true, v), bind(&block_manager::write_release, this, _1)); + register_lock(location, WRITE_LOCK); return write_ref(b); } @@ -292,7 +302,7 @@ template void block_manager::zero_buffer(block_manager::buffer &buffer) const { - memset(buffer, 0, BlockSize); + ::memset(buffer, 0, BlockSize); } // FIXME: we don't need this anymore @@ -300,6 +310,7 @@ template void block_manager::read_release(block *b) const { + unregister_lock(b->location_, READ_LOCK); delete b; } @@ -316,6 +327,7 @@ block_manager::write_release(block *b) (*b->validator_)->prepare(*b); write_buffer(b->location_, b->data_); + unregister_lock(b->location_, WRITE_LOCK); delete b; } @@ -327,4 +339,46 @@ block_manager::check(block_address b) const throw std::runtime_error("block address out of bounds"); } +template +block_address +block_manager::get_nr_blocks() const +{ + return nr_blocks_; +} + +// FIXME: how do we unregister if block construction throws? +template +void +block_manager::register_lock(block_address b, lock_type t) const +{ + auto it = held_locks_.find(b); + if (it == held_locks_.end()) + held_locks_.insert(make_pair(b, make_pair(t, 1))); + else { + if (it->second.first != t) + throw std::runtime_error("lock type mismatch when locking"); + + if (it->second.first == WRITE_LOCK) + throw std::runtime_error("cannot hold concurrent write locks"); + + it->second.second++; + } +} + +template +void +block_manager::unregister_lock(block_address b, lock_type t) const +{ + auto it = held_locks_.find(b); + if (it == held_locks_.end()) + throw std::runtime_error("lock not held"); + + if (it->second.first != t) + throw std::runtime_error("lock type mismatch when unlocking"); + + it->second.second--; + if (it->second.second == 0) + held_locks_.erase(it); +} + //---------------------------------------------------------------- diff --git a/block_t.cc b/block_t.cc index 7c03e81..2e68a24 100644 --- a/block_t.cc +++ b/block_t.cc @@ -167,6 +167,32 @@ BOOST_AUTO_TEST_CASE(flush_throws_if_held_locks) BOOST_CHECK_THROW(bm->flush(), runtime_error); } -// cannot write lock the same block more than once +BOOST_AUTO_TEST_CASE(no_concurrent_write_locks) +{ + auto bm = create_bm(); + auto wr = bm->write_lock(0); + BOOST_CHECK_THROW(bm->write_lock(0), runtime_error); +} + +BOOST_AUTO_TEST_CASE(concurrent_read_locks) +{ + auto bm = create_bm(); + auto rr = bm->read_lock(0); + bm->read_lock(0); +} + +BOOST_AUTO_TEST_CASE(read_then_write) +{ + auto bm = create_bm(); + bm->read_lock(0); + bm->write_lock(0); +} + +BOOST_AUTO_TEST_CASE(write_then_read) +{ + auto bm = create_bm(); + bm->write_lock(0); + bm->read_lock(0); +} //---------------------------------------------------------------- diff --git a/btree.h b/btree.h index 7005d45..73b957a 100644 --- a/btree.h +++ b/btree.h @@ -12,9 +12,17 @@ namespace persistent_data { + template + class NoOpRefCounter { + public: + void inc(ValueType const &v) {} + void dec(ValueType const &v) {} + }; + struct uint64_traits { typedef base::__le64 disk_type; typedef uint64_t value_type; + typedef NoOpRefCounter ref_counter; static void unpack(disk_type const &disk, value_type &value) { value = base::to_cpu(disk); @@ -72,6 +80,8 @@ namespace persistent_data { unsigned get_max_entries() const; void set_max_entries(unsigned n); + + // FIXME: remove this, and get the constructor to do it. void set_max_entries(); // calculates the max for you. uint64_t key_at(unsigned i) const; @@ -102,6 +112,14 @@ namespace persistent_data { optional exact_search(uint64_t key) const; int lower_bound(uint64_t key) const; + template + void inc_children(RefCounter &rc); + + // FIXME: remove + void *raw() { + return raw_; + } + private: static unsigned calc_max_entries(void); @@ -206,6 +224,10 @@ namespace persistent_data { return to_node(spine_.front()); } + block_address get_parent_location() const { + return spine_.front().get_location(); + } + block_address get_root() const { return root_; } @@ -216,6 +238,7 @@ namespace persistent_data { block_address root_; }; + // FIXME: make a member of btree template optional lookup_raw(ro_spine &spine, block_address block, uint64_t key) { @@ -252,9 +275,13 @@ namespace persistent_data { typedef typename block_manager::read_ref read_ref; typedef typename block_manager::write_ref write_ref; - btree(typename persistent_data::transaction_manager::ptr tm); + btree(typename persistent_data::transaction_manager::ptr tm, + typename ValueTraits::ref_counter rc); + btree(typename transaction_manager::ptr tm, - block_address root); + block_address root, + typename ValueTraits::ref_counter rc); + ~btree(); maybe_value lookup(key const &key) const; @@ -297,6 +324,8 @@ namespace persistent_data { typename persistent_data::transaction_manager::ptr tm_; bool destroy_; block_address root_; + NoOpRefCounter internal_rc_; + typename ValueTraits::ref_counter rc_; }; }; diff --git a/btree.tcc b/btree.tcc index 1f14807..7b84efe 100644 --- a/btree.tcc +++ b/btree.tcc @@ -155,8 +155,8 @@ node_ref::copy_entries(node_ref const &rhs, throw runtime_error("too many entries"); set_nr_entries(n + count); - ::memcpy(rhs.key_ptr(begin), key_ptr(n), sizeof(uint64_t) * count); - ::memcpy(rhs.value_ptr(begin), value_ptr(n), sizeof(typename ValueTraits::disk_type) * count); + ::memcpy(key_ptr(n), rhs.key_ptr(begin), sizeof(uint64_t) * count); + ::memcpy(value_ptr(n), rhs.value_ptr(begin), sizeof(typename ValueTraits::disk_type) * count); } template @@ -228,12 +228,30 @@ node_ref::value_ptr(unsigned i) const sizeof(typename ValueTraits::disk_type) * i; } +template +template +void +node_ref::inc_children(RefCounter &rc) +{ + unsigned nr_entries = get_nr_entries(); + for (unsigned i = 0; i < nr_entries; i++) { + typename ValueTraits::value_type v; + typename ValueTraits::disk_type d; + ::memcpy(&d, value_ptr(i), sizeof(d)); + ValueTraits::unpack(d, v); + rc.inc(v); + } +} + //---------------------------------------------------------------- template -btree::btree(typename transaction_manager::ptr tm) +btree:: +btree(typename transaction_manager::ptr tm, + typename ValueTraits::ref_counter rc) : tm_(tm), - destroy_(false) + destroy_(false), + rc_(rc) { using namespace btree_detail; @@ -248,11 +266,14 @@ btree::btree(typename transaction_manager -btree::btree(typename transaction_manager::ptr tm, - block_address root) +btree:: +btree(typename transaction_manager::ptr tm, + block_address root, + typename ValueTraits::ref_counter rc) : tm_(tm), destroy_(false), - root_(root) + root_(root), + rc_(rc) { } @@ -318,7 +339,7 @@ insert(key const &key, auto n = spine.template get_node(); if (need_insert) { - btree new_tree(tm_); + btree new_tree(tm_, rc_); n.insert_at(index, key[level], new_tree.get_root()); } @@ -335,20 +356,18 @@ insert(key const &key, n.set_value(index, value); } -#if 0 template void btree::remove(key const &key) { using namespace btree_detail; - } template block_address btree::get_root() const { - + return root_; } template @@ -356,25 +375,40 @@ void btree::set_root(block_address root) { using namespace btree_detail; - + root_ = root; } template -block_address -btree::get_root() const -{ - using namespace btree_detail; - -} - -template -ptr +typename btree::ptr btree::clone() const { using namespace btree_detail; + ro_spine spine(tm_); + spine.step(root_); + auto new_root = tm_->new_block(); + + auto o = spine.template get_node(); + if (o.get_type() == INTERNAL) { + auto n = to_node(new_root); + ::memcpy(n.raw(), o.raw(), BlockSize); + + typename uint64_traits::ref_counter rc(internal_rc_); + n.inc_children(rc); + } else { + auto n = to_node(new_root); + ::memcpy(n.raw(), o.raw(), BlockSize); + + typename ValueTraits::ref_counter rc(rc_); + n.inc_children(rc); + } + + return btree::ptr( + new btree( + tm_, new_root.get_location(), rc_)); } +#if 0 template void btree::destroy() @@ -414,7 +448,6 @@ split_beneath(btree_detail::shadow_spine &spine, node_type type; unsigned nr_left, nr_right; - auto left = tm_->new_block(); auto l = to_node(left); l.set_nr_entries(0); @@ -457,9 +490,10 @@ split_beneath(btree_detail::shadow_spine &spine, template template void -btree::split_sibling(btree_detail::shadow_spine &spine, - block_address parent_index, - uint64_t key) +btree:: +split_sibling(btree_detail::shadow_spine &spine, + block_address parent_index, + uint64_t key) { using namespace btree_detail; diff --git a/btree_t.cc b/btree_t.cc index 5094155..46fd7d2 100644 --- a/btree_t.cc +++ b/btree_t.cc @@ -24,9 +24,11 @@ namespace { btree<1, uint64_traits, 4096>::ptr create_btree() { + typename uint64_traits::ref_counter rc; + return btree<1, uint64_traits, 4096>::ptr( new btree<1, uint64_traits, 4096>( - create_tm())); + create_tm(), rc)); } } diff --git a/endian.cc b/endian.cc new file mode 100644 index 0000000..797edcc --- /dev/null +++ b/endian.cc @@ -0,0 +1,46 @@ +#include "endian.h" + +using namespace base; + +//---------------------------------------------------------------- + +bool +base::test_bit_le(void const *bits, unsigned b) +{ + __le64 const *w = reinterpret_cast<__le64 const *>(bits); + w += b / 64; + + uint64_t v = to_cpu(*w); + uint64_t mask = 1; + mask = mask << (b % 64); + return (v & mask) ? true : false; +} + +void +base::set_bit_le(void *bits, unsigned b) +{ + __le64 *w = reinterpret_cast<__le64 *>(bits); + w += b / 64; + + uint64_t v = to_cpu(*w); + uint64_t mask = 1; + mask = mask << (b % 64); + v |= mask; + *w = to_disk<__le64>(v); +} + +void +base::clear_bit_le(void *bits, unsigned b) +{ + __le64 *w = reinterpret_cast<__le64 *>(bits); + w += b / 64; + + uint64_t v = to_cpu(*w); + uint64_t mask = 1; + mask = mask << (b % 64); + mask = ~mask; + v &= mask; + *w = to_disk<__le64>(v); +} + +//---------------------------------------------------------------- diff --git a/endian.h b/endian.h index 269dc3b..51e2d7b 100644 --- a/endian.h +++ b/endian.h @@ -1,16 +1,17 @@ #ifndef ENDIAN_H #define ENDIAN_H +#include #include //---------------------------------------------------------------- +// FIXME: rename to endian namespace base { // These are just little wrapper types to make the compiler // understand that the le types are not assignable to the // corresponding cpu type. - struct __le16 { explicit __le16(uint16_t v = 0) : v_(v) { @@ -37,6 +38,7 @@ namespace base { //-------------------------------- + // FIXME: actually do the conversions ! template CPUType to_cpu(DiskType const &d) { BOOST_STATIC_ASSERT(sizeof(d) == 0); @@ -76,6 +78,12 @@ namespace base { inline __le64 to_disk<__le64, uint64_t>(uint64_t const &v) { return __le64(v); } + + //-------------------------------- + + bool test_bit_le(void const *bits, unsigned b); + void set_bit_le(void *bits, unsigned b); + void clear_bit_le(void *bits, unsigned b); } //---------------------------------------------------------------- diff --git a/endian_t.cc b/endian_t.cc new file mode 100644 index 0000000..13c04d9 --- /dev/null +++ b/endian_t.cc @@ -0,0 +1,62 @@ +#include "space_map_disk.h" +#include "core_map.h" + +#define BOOST_TEST_MODULE EndianTests +#include + +using namespace base; +using namespace boost; +using namespace persistent_data; +using namespace std; + +//---------------------------------------------------------------- + +BOOST_AUTO_TEST_CASE(bitmaps) +{ + unsigned NR_BITS = 10247; + vector data((NR_BITS + 63) / 64, 0); + + // check all bits are zero + void *bits = &data[0]; + for (unsigned i = 0; i < NR_BITS; i++) + BOOST_CHECK(!test_bit_le(bits, i)); + + // set all bits to one + for (unsigned i = 0; i < NR_BITS; i++) + set_bit_le(bits, i); + + // check they're all 1 now + for (unsigned i = 0; i < NR_BITS; i++) + BOOST_CHECK(test_bit_le(bits, i)); + + // clear every third bit + for (unsigned i = 0; i < NR_BITS; i += 3) + clear_bit_le(bits, i); + + // check everything is as we expect + for (unsigned i = 0; i < NR_BITS; i++) { + if ((i % 3) == 0) + BOOST_CHECK(!test_bit_le(bits, i)); + else + BOOST_CHECK(test_bit_le(bits, i)); + } +} + +BOOST_AUTO_TEST_CASE(bitmaps_alternate_words) +{ + unsigned NR_BITS = 10247; + vector data((NR_BITS + 63) / 64, 0); + + // check all bits are zero + void *bits = &data[0]; + for (unsigned i = 0; i < 128; i++) + BOOST_CHECK(!test_bit_le(bits, i)); + + for (unsigned i = 0; i < 64; i++) + set_bit_le(bits, i); + + for (unsigned i = 64; i < 128; i++) + BOOST_CHECK(!test_bit_le(bits, i)); +} + +//---------------------------------------------------------------- diff --git a/main.cc b/main.cc index 0b3b13c..7c8aaab 100644 --- a/main.cc +++ b/main.cc @@ -1,9 +1,12 @@ #include "block.h" +#include "metadata.h" #include +using namespace thinp; + int main(int argc, char **argv) { - std::cout << "Hello, world!" << std::endl; + metadata m("foo.metadata", 128, 1024); return 0; } diff --git a/math.h b/math.h new file mode 100644 index 0000000..d7f01fd --- /dev/null +++ b/math.h @@ -0,0 +1,16 @@ +#ifndef THINP_MATH_H +#define THINP_MATH_H + +//---------------------------------------------------------------- + +namespace base { + // Only works for integral types + template + T div_up(T const &v, T const &divisor) { + return (v + (divisor - 1)) / divisor; + } +} + +//---------------------------------------------------------------- + +#endif diff --git a/metadata.cc b/metadata.cc index 10701f5..1808c1b 100644 --- a/metadata.cc +++ b/metadata.cc @@ -3,96 +3,106 @@ #include using namespace persistent_data; -using namespace multisnap; +using namespace thin_provisioning; //---------------------------------------------------------------- namespace { - typedef uint8_t __le8; - typedef uint8_t __u8; - typedef uint32_t __le32; - typedef uint64_t __le64; - - - uint32_t const SUPERBLOCK_MAGIC = 27022010; block_address const SUPERBLOCK_LOCATION = 0; uint32_t const VERSION = 1; unsigned const METADATA_CACHE_SIZE = 1024; unsigned const SECTOR_TO_BLOCK_SHIFT = 3; - unsigned const SPACE_MAP_ROOT_SIZE = 128; - - struct multisnap_super_block { - __le32 csum_; - __le32 flags_; - __le64 blocknr_; /* this block number, dm_block_t */ - - __u8 uuid_[16]; - __le64 magic_; - __le32 version_; - __le32 time_; - - __le64 trans_id_; - /* root for userspace's transaction (for migration and friends) */ - __le64 held_root_; - - __u8 data_space_map_root_[SPACE_MAP_ROOT_SIZE]; - __u8 metadata_space_map_root_[SPACE_MAP_ROOT_SIZE]; - - /* 2 level btree mapping (dev_id, (dev block, time)) -> data block */ - __le64 data_mapping_root_; - - /* device detail root mapping dev_id -> device_details */ - __le64 device_details_root_; - - __le32 data_block_size_; /* in 512-byte sectors */ - - __le32 metadata_block_size_; /* in 512-byte sectors */ - __le64 metadata_nr_blocks_; - - __le32 compat_flags_; - __le32 incompat_flags_; - } __attribute__ ((packed)); - - struct device_details { - __le64 dev_size_; - __le64 mapped_blocks_; - __le64 transaction_id_; /* when created */ - __le32 creation_time_; - __le32 snapshotted_time_; - } __attribute__ ((packed)); } //---------------------------------------------------------------- -metadata::thin::maybe_address -metadata::thin::lookup(block_address thin_block) +thin::thin(thin_dev_t dev, metadata *metadata) + : dev_(dev), + metadata_(metadata) +{ +} + +thin_dev_t +thin::get_dev_t() const +{ + return dev_; +} + +thin::maybe_address +thin::lookup(block_address thin_block) { uint64_t key[2] = {dev_, thin_block}; return metadata_->mappings_.lookup(key); } void -metadata::thin::insert(block_address thin_block, block_address data_block) +thin::insert(block_address thin_block, block_address data_block) { uint64_t key[2] = {dev_, thin_block}; return metadata_->mappings_.insert(key, data_block); } void -metadata::thin::remove(block_address thin_block) +thin::remove(block_address thin_block) { uint64_t key[2] = {dev_, thin_block}; metadata_->mappings_.remove(key); } -#if 0 + +void +thin::set_snapshot_time(uint32_t time) +{ + uint64_t key[1] = { dev_ }; + auto mdetail = metadata_->details_.lookup(key); + if (!mdetail) + throw runtime_error("no such device"); + + mdetail->snapshotted_time_ = time; + metadata_->details_.insert(key, *mdetail); +} + +block_address +thin::get_mapped_blocks() const +{ + uint64_t key[1] = { dev_ }; + auto mdetail = metadata_->details_.lookup(key); + if (!mdetail) + throw runtime_error("no such device"); + + return mdetail->mapped_blocks_; +} + +void +thin::set_mapped_blocks(block_address count) +{ + uint64_t key[1] = { dev_ }; + auto mdetail = metadata_->details_.lookup(key); + if (!mdetail) + throw runtime_error("no such device"); + + mdetail->mapped_blocks_ = count; + metadata_->details_.insert(key, *mdetail); +} + //-------------------------------- -metadata::metadata(std::string const &metadata_dev, +metadata::metadata(transaction_manager::ptr tm, + block_address superblock, sector_t data_block_size, - block_address nr_data_blocks) + block_address nr_data_blocks, + bool create) + : superblock_(superblock), + tm_(tm), + details_(tm, typename device_details_traits::ref_counter()), + mappings_top_level_(tm, mtree_ref_counter(tm)), + mappings_(tm, space_map_ref_counter(data_sm_)) { - + ::memset(&sb_, 0, sizeof(sb_)); + sb_.data_mapping_root_ = mappings_.get_root(); + sb_.device_details_root_ = details_.get_root(); + sb_.metadata_block_size_ = MD_BLOCK_SIZE; + sb_.metadata_nr_blocks_ = tm->get_bm()->get_nr_blocks(); } metadata::~metadata() @@ -103,24 +113,29 @@ metadata::~metadata() void metadata::commit() { + sb_.data_mapping_root_ = mappings_.get_root(); + sb_.device_details_root_ = details_.get_root(); + auto superblock = tm_->get_bm()->superblock(superblock_); + auto disk = reinterpret_cast(superblock.data()); + superblock_traits::pack(sb_, *disk); } -#endif + void -metadata::create_thin(dev_t dev) +metadata::create_thin(thin_dev_t dev) { uint64_t key[1] = {dev}; if (device_exists(dev)) throw std::runtime_error("Device already exists"); - single_mapping_tree::ptr new_tree(new single_mapping_tree(tm_)); + single_mapping_tree::ptr new_tree(new single_mapping_tree(tm_, space_map_ref_counter(data_sm_))); mappings_top_level_.insert(key, new_tree->get_root()); mappings_.set_root(mappings_top_level_.get_root()); // FIXME: ugly } void -metadata::create_snap(dev_t dev, dev_t origin) +metadata::create_snap(thin_dev_t dev, thin_dev_t origin) { uint64_t snap_key[1] = {dev}; uint64_t origin_key[1] = {origin}; @@ -129,89 +144,95 @@ metadata::create_snap(dev_t dev, dev_t origin) if (!mtree_root) throw std::runtime_error("unknown origin"); - single_mapping_tree otree(tm_, *mtree_root); + single_mapping_tree otree(tm_, *mtree_root, + space_map_ref_counter(data_sm_)); single_mapping_tree::ptr clone(otree.clone()); mappings_top_level_.insert(snap_key, clone->get_root()); mappings_.set_root(mappings_top_level_.get_root()); // FIXME: ugly - time_++; + sb_.time_++; - auto o = open(origin); - auto s = open(dev); - o->set_snapshot_time(time_); - s->set_snapshot_time(time_); + auto o = open_thin(origin); + auto s = open_thin(dev); + o->set_snapshot_time(sb_.time_); + s->set_snapshot_time(sb_.time_); s->set_mapped_blocks(o->get_mapped_blocks()); } void -metadata::del(dev_t dev) +metadata::del(thin_dev_t dev) { uint64_t key[1] = {dev}; mappings_top_level_.remove(key); } -#if 0 void metadata::set_transaction_id(uint64_t id) { - + sb_.trans_id_ = id; } uint64_t metadata::get_transaction_id() const { - + return sb_.trans_id_; } block_address metadata::get_held_root() const { - -} - -thin_ptr -metadata::open_device(dev_t) -{ - + return sb_.held_root_; } block_address metadata::alloc_data_block() { - + return data_sm_->new_block(); } void metadata::free_data_block(block_address b) { - + data_sm_->dec(b); } block_address metadata::get_nr_free_data_blocks() const { - + return data_sm_->get_nr_free(); } sector_t metadata::get_data_block_size() const { - + return sb_.data_block_size_; } block_address metadata::get_data_dev_size() const { - + return data_sm_->get_nr_blocks(); } -#endif -bool -metadata::device_exists(dev_t dev) const + +thin::ptr +metadata::open_thin(thin_dev_t dev) { uint64_t key[1] = {dev}; - auto mval = details_.lookup(key); - return mval; + auto mdetails = details_.lookup(key); + if (!mdetails) + throw runtime_error("no such device"); + + thin *ptr = new thin(dev, this); + thin::ptr r(ptr); + return r; +} + +bool +metadata::device_exists(thin_dev_t dev) const +{ + uint64_t key[1] = {dev}; + return details_.lookup(key); } //---------------------------------------------------------------- diff --git a/metadata.h b/metadata.h index 7dec5dd..332018a 100644 --- a/metadata.h +++ b/metadata.h @@ -4,6 +4,8 @@ #include "block.h" #include "transaction_manager.h" #include "btree.h" +#include "endian.h" +#include "metadata_disk_structures.h" #include @@ -11,76 +13,124 @@ //---------------------------------------------------------------- -// FIXME: make a const -#define BLOCK_SIZE 4096 +namespace thin_provisioning { + unsigned const MD_BLOCK_SIZE = 4096; + + // FIXME: don't use namespaces in a header + using namespace base; + using namespace persistent_data; -namespace multisnap { typedef uint64_t sector_t; + typedef uint32_t thin_dev_t; - struct device_details_disk { - __le64 dev_size; - __le64 mapped_blocks; - __le64 transaction_id; /* when created */ - __le32 creation_time; - __le32 snapshotted_time; - } __attribute__ ((packed)); + //------------------------------------------------ - struct device_details { - uint64_t dev_size; - uint64_t mapped_blocks; - uint64_t transaction_id; /* when created */ - uint32_t creation_time; - uint32_t snapshotted_time; - }; - - struct detail_traits { - typedef device_details_disk disk_type; - typedef device_details value_type; - - static value_type construct(void *data) { - struct device_details_disk disk; - struct device_details cpu; - - ::memcpy(&disk, data, sizeof(disk)); - cpu.dev_size = to_cpu(disk.dev_size); - cpu.mapped_blocks = to_cpu(disk.mapped_blocks); - cpu.transaction_id = to_cpu(disk.transaction_id); - cpu.creation_time = to_cpu(disk.creation_time); - cpu.snapshotted_time = to_cpu(disk.snapshotted_time); - - return cpu; - } - }; -#if 0 - class dev_traits { + class space_map_ref_counter { public: + space_map_ref_counter(space_map::ptr sm) + : sm_(sm) { + } + + void inc(block_address b) { + sm_->inc(b); + } + + void dec(block_address b) { + sm_->dec(b); + } + + private: + space_map::ptr sm_; + }; + + struct block_traits { typedef base::__le64 disk_type; - typedef persistent_data::btree<1, uint64_traits, BLOCK_SIZE> value_type; + typedef uint64_t value_type; + typedef space_map_ref_counter ref_counter; - static value_type construct(void *data) { - uint64_t root = uint64_traits::construct(data); + static void unpack(disk_type const &disk, value_type &value) { + value = base::to_cpu(disk); + } - return value_type + static void pack(value_type const &value, disk_type &disk) { + disk = base::to_disk(value); } }; -#endif + + //------------------------------------------------ + + template + class mtree_ref_counter { + public: + mtree_ref_counter(typename transaction_manager::ptr tm) + : tm_(tm) { + } + + void inc(block_address b) { + } + + void dec(block_address b) { + } + + private: + typename transaction_manager::ptr tm_; + }; + + template + struct mtree_traits { + typedef base::__le64 disk_type; + typedef uint64_t value_type; + typedef mtree_ref_counter ref_counter; + + static void unpack(disk_type const &disk, value_type &value) { + value = base::to_cpu(disk); + } + + static void pack(value_type const &value, disk_type &disk) { + disk = base::to_disk(value); + } + }; + + class metadata; + class thin { + public: + typedef boost::shared_ptr ptr; + typedef boost::optional maybe_address; + + thin_dev_t get_dev_t() const; + maybe_address lookup(block_address thin_block); + void insert(block_address thin_block, block_address data_block); + void remove(block_address thin_block); + + void set_snapshot_time(uint32_t time); + + block_address get_mapped_blocks() const; + void set_mapped_blocks(block_address count); + + private: + friend class metadata; + thin(thin_dev_t dev, metadata *metadata); + + thin_dev_t dev_; + metadata *metadata_; + }; class metadata { public: typedef boost::shared_ptr ptr; - typedef persistent_data::block_address block_address; - metadata(std::string const &metadata_dev, + metadata(transaction_manager::ptr tm, + block_address superblock, sector_t data_block_size, - persistent_data::block_address nr_data_blocks); + block_address nr_data_blocks, + bool create); ~metadata(); void commit(); - typedef uint32_t dev_t; - void create_thin(dev_t dev); - void create_snap(dev_t dev, dev_t origin); - void del(dev_t); + void create_thin(thin_dev_t dev); + void create_snap(thin_dev_t dev, thin_dev_t origin); + void del(thin_dev_t); void set_transaction_id(uint64_t id); uint64_t get_transaction_id() const; @@ -95,46 +145,29 @@ namespace multisnap { sector_t get_data_block_size() const; block_address get_data_dev_size() const; - class thin { - public: - typedef boost::shared_ptr ptr; - - dev_t get_dev_t() const; - - typedef boost::optional maybe_address; - maybe_address lookup(block_address thin_block); - void insert(block_address thin_block, block_address data_block); - void remove(block_address thin_block); - - void set_snapshot_time(uint32_t time); - - persistent_data::block_address get_mapped_blocks() const; - void set_mapped_blocks(persistent_data::block_address count); - - private: - dev_t dev_; - metadata::ptr metadata_; - }; - - thin::ptr open(dev_t); + thin::ptr open_thin(thin_dev_t); private: friend class thin; - bool device_exists(dev_t dev) const; + bool device_exists(thin_dev_t dev) const; - uint32_t time_; + block_address superblock_; - persistent_data::transaction_manager::ptr tm_; + typedef persistent_data::transaction_manager::ptr tm_ptr; - typedef persistent_data::btree<1, detail_traits, BLOCK_SIZE> detail_tree; - typedef persistent_data::btree<1, uint64_traits, BLOCK_SIZE> dev_tree; - typedef persistent_data::btree<2, uint64_traits, BLOCK_SIZE> mapping_tree; - typedef persistent_data::btree<1, uint64_traits, BLOCK_SIZE> single_mapping_tree; + typedef persistent_data::btree<1, device_details_traits, MD_BLOCK_SIZE> detail_tree; + typedef persistent_data::btree<1, mtree_traits, MD_BLOCK_SIZE> dev_tree; + typedef persistent_data::btree<2, block_traits, MD_BLOCK_SIZE> mapping_tree; + typedef persistent_data::btree<1, block_traits, MD_BLOCK_SIZE> single_mapping_tree; + tm_ptr tm_; + space_map::ptr metadata_sm_; + space_map::ptr data_sm_; detail_tree details_; dev_tree mappings_top_level_; mapping_tree mappings_; + superblock sb_; }; }; diff --git a/metadata_disk_structures.cc b/metadata_disk_structures.cc new file mode 100644 index 0000000..1dff20a --- /dev/null +++ b/metadata_disk_structures.cc @@ -0,0 +1,95 @@ +#include "metadata_disk_structures.h" + +#include + +using namespace thin_provisioning; + +//---------------------------------------------------------------- + +void +device_details_traits::unpack(device_details_disk const &disk, device_details &value) +{ + value.dev_size_ = to_cpu(disk.dev_size_); + value.mapped_blocks_ = to_cpu(disk.mapped_blocks_); + value.transaction_id_ = to_cpu(disk.transaction_id_); + value.creation_time_ = to_cpu(disk.creation_time_); + value.snapshotted_time_ = to_cpu(disk.snapshotted_time_); +} + +void +device_details_traits::pack(device_details const &value, device_details_disk &disk) +{ + disk.dev_size_ = to_disk<__le64>(value.dev_size_); + disk.mapped_blocks_ = to_disk<__le64>(value.mapped_blocks_); + disk.transaction_id_ = to_disk<__le64>(value.transaction_id_); + disk.creation_time_ = to_disk<__le32>(value.creation_time_); + disk.snapshotted_time_ = to_disk<__le32>(value.snapshotted_time_); +} + +void +superblock_traits::unpack(superblock_disk const &disk, superblock &value) +{ + value.csum_ = to_cpu(disk.csum_); + value.flags_ = to_cpu(disk.csum_); + value.blocknr_ = to_cpu(disk.blocknr_); + + ::memcpy(value.uuid_, disk.uuid_, sizeof(value.uuid_)); + value.magic_ = to_cpu(disk.magic_); + value.version_ = to_cpu(disk.version_); + value.time_ = to_cpu(disk.time_); + + value.trans_id_ = to_cpu(disk.trans_id_); + value.held_root_ = to_cpu(disk.held_root_); + + ::memcpy(value.data_space_map_root_, + disk.data_space_map_root_, + sizeof(value.data_space_map_root_)); + ::memcpy(value.metadata_space_map_root_, + disk.metadata_space_map_root_, + sizeof(value.metadata_space_map_root_)); + + value.data_mapping_root_ = to_cpu(disk.data_mapping_root_); + value.device_details_root_ = to_cpu(disk.device_details_root_); + value.data_block_size_ = to_cpu(disk.data_block_size_); + + value.metadata_block_size_ = to_cpu(disk.metadata_block_size_); + value.metadata_nr_blocks_ = to_cpu(disk.metadata_nr_blocks_); + + value.compat_flags_ = to_cpu(disk.compat_flags_); + value.incompat_flags_ = to_cpu(disk.incompat_flags_); +} + +void +superblock_traits::pack(superblock const &value, superblock_disk &disk) +{ + disk.csum_ = to_disk<__le32>(value.csum_); + disk.flags_ = to_disk<__le32>(value.csum_); + disk.blocknr_ = to_disk<__le64>(value.blocknr_); + + ::memcpy(disk.uuid_, value.uuid_, sizeof(disk.uuid_)); + disk.magic_ = to_disk<__le64>(value.magic_); + disk.version_ = to_disk<__le32>(value.version_); + disk.time_ = to_disk<__le32>(value.time_); + + disk.trans_id_ = to_disk<__le64>(value.trans_id_); + disk.held_root_ = to_disk<__le64>(value.held_root_); + + ::memcpy(disk.data_space_map_root_, + value.data_space_map_root_, + sizeof(disk.data_space_map_root_)); + ::memcpy(disk.metadata_space_map_root_, + value.metadata_space_map_root_, + sizeof(disk.metadata_space_map_root_)); + + disk.data_mapping_root_ = to_disk<__le64>(value.data_mapping_root_); + disk.device_details_root_ = to_disk<__le64>(value.device_details_root_); + disk.data_block_size_ = to_disk<__le32>(value.data_block_size_); + + disk.metadata_block_size_ = to_disk<__le32>(value.metadata_block_size_); + disk.metadata_nr_blocks_ = to_disk<__le64>(value.metadata_nr_blocks_); + + disk.compat_flags_ = to_disk<__le32>(value.compat_flags_); + disk.incompat_flags_ = to_disk<__le32>(value.incompat_flags_); +} + +//---------------------------------------------------------------- diff --git a/metadata_disk_structures.h b/metadata_disk_structures.h new file mode 100644 index 0000000..089b9d6 --- /dev/null +++ b/metadata_disk_structures.h @@ -0,0 +1,117 @@ +#ifndef METADATA_DISK_STRUCTURES_H +#define METADATA_DISK_STRUCTURES_H + +#include "endian.h" +#include "btree.h" + +//---------------------------------------------------------------- + +namespace thin_provisioning { + using namespace base; // FIXME: don't use namespaces in headers. + + struct device_details_disk { + __le64 dev_size_; + __le64 mapped_blocks_; + __le64 transaction_id_; /* when created */ + __le32 creation_time_; + __le32 snapshotted_time_; + } __attribute__ ((packed)); + + struct device_details { + uint64_t dev_size_; + uint64_t mapped_blocks_; + uint64_t transaction_id_; /* when created */ + uint32_t creation_time_; + uint32_t snapshotted_time_; + }; + + struct device_details_traits { + typedef device_details_disk disk_type; + typedef device_details value_type; + typedef persistent_data::NoOpRefCounter ref_counter; + + static void unpack(device_details_disk const &disk, device_details &value); + static void pack(device_details const &value, device_details_disk &disk); + }; + + unsigned const SPACE_MAP_ROOT_SIZE = 128; + + typedef unsigned char __u8; + + struct superblock_disk { + __le32 csum_; + __le32 flags_; + __le64 blocknr_; + + __u8 uuid_[16]; + __le64 magic_; + __le32 version_; + __le32 time_; + + __le64 trans_id_; + /* root for userspace's transaction (for migration and friends) */ + __le64 held_root_; + + __u8 data_space_map_root_[SPACE_MAP_ROOT_SIZE]; + __u8 metadata_space_map_root_[SPACE_MAP_ROOT_SIZE]; + + /* 2 level btree mapping (dev_id, (dev block, time)) -> data block */ + __le64 data_mapping_root_; + + /* device detail root mapping dev_id -> device_details */ + __le64 device_details_root_; + + __le32 data_block_size_; /* in 512-byte sectors */ + + __le32 metadata_block_size_; /* in 512-byte sectors */ + __le64 metadata_nr_blocks_; + + __le32 compat_flags_; + __le32 incompat_flags_; + } __attribute__ ((packed)); + + struct superblock { + uint32_t csum_; + uint32_t flags_; + uint64_t blocknr_; + + unsigned char uuid_[16]; + uint64_t magic_; + uint32_t version_; + uint32_t time_; + + uint64_t trans_id_; + /* root for userspace's transaction (for migration and friends) */ + uint64_t held_root_; + + unsigned char data_space_map_root_[SPACE_MAP_ROOT_SIZE]; + unsigned char metadata_space_map_root_[SPACE_MAP_ROOT_SIZE]; + + /* 2 level btree mapping (dev_id, (dev block, time)) -> data block */ + uint64_t data_mapping_root_; + + /* device detail root mapping dev_id -> device_details */ + uint64_t device_details_root_; + + uint32_t data_block_size_; /* in 512-byte sectors */ + + uint32_t metadata_block_size_; /* in 512-byte sectors */ + uint64_t metadata_nr_blocks_; + + uint32_t compat_flags_; + uint32_t incompat_flags_; + }; + + struct superblock_traits { + typedef superblock_disk disk_type; + typedef superblock value_type; + typedef NoOpRefCounter ref_counter; + + static void unpack(superblock_disk const &disk, superblock &value); + static void pack(superblock const &value, superblock_disk &disk); + }; +} + +//---------------------------------------------------------------- + +#endif diff --git a/metadata_t.cc b/metadata_t.cc new file mode 100644 index 0000000..8ae9f25 --- /dev/null +++ b/metadata_t.cc @@ -0,0 +1,41 @@ +#include "metadata.h" +#include "core_map.h" + +#define BOOST_TEST_MODULE MetadataTests +#include + +using namespace std; +using namespace boost; +using namespace persistent_data; +using namespace thin_provisioning; + +//---------------------------------------------------------------- + +namespace { + block_address const NR_BLOCKS = 1024; + block_address const SUPERBLOCK = 0; + + transaction_manager<4096>::ptr + create_tm() { + block_manager<4096>::ptr bm(new block_manager<4096>("./test.data", NR_BLOCKS)); + space_map::ptr sm(new core_map(NR_BLOCKS)); + transaction_manager<4096>::ptr tm(new transaction_manager<4096>(bm, sm)); + return tm; + } + + metadata::ptr + create_metadata() { + auto tm = create_tm(); + return metadata::ptr( + new metadata(tm, 0, 128, 1024000, true)); + } +} + +//---------------------------------------------------------------- + +BOOST_AUTO_TEST_CASE(create_metadata_object) +{ + auto m = create_metadata(); +} + +//---------------------------------------------------------------- diff --git a/space_map.h b/space_map.h index e1f9478..e6aa82d 100644 --- a/space_map.h +++ b/space_map.h @@ -29,8 +29,10 @@ namespace persistent_data { virtual bool count_possibly_greater_than_one(block_address b) const = 0; }; - class persistent_space_map { + class persistent_space_map : public space_map { public: + typedef boost::shared_ptr ptr; + virtual size_t root_size() = 0; virtual void copy_root(void *dest, size_t len) = 0; }; diff --git a/space_map_disk.cc b/space_map_disk.cc new file mode 100644 index 0000000..cab39f9 --- /dev/null +++ b/space_map_disk.cc @@ -0,0 +1,7 @@ +#include "space_map_disk.h" + + +//---------------------------------------------------------------- + + +//---------------------------------------------------------------- diff --git a/space_map_disk.h b/space_map_disk.h new file mode 100644 index 0000000..7ea69ce --- /dev/null +++ b/space_map_disk.h @@ -0,0 +1,338 @@ +#ifndef SPACE_MAP_DISK_H +#define SPACE_MAP_DISK_H + +#include "space_map.h" +#include "transaction_manager.h" +#include "endian.h" +#include "space_map_disk_structures.h" +#include "math.h" + +//---------------------------------------------------------------- + +namespace persistent_data { + + namespace sm_disk_detail { + using namespace base; + using namespace persistent_data; + + template + class bitmap { + public: + bitmap(typename transaction_manager::ptr tm, + index_entry const &ie) + : tm_(tm), + ie_(ie) { + } + + ref_t lookup(unsigned b) const { + auto rr = tm_->read_lock(ie_.blocknr_); + void const *bits = bitmap_data(rr); + ref_t b1 = test_bit_le(bits, b * 2); + ref_t b2 = test_bit_le(bits, b * 2 + 1); + ref_t result = b2 ? 1 : 0; + result |= b1 ? 0b10 : 0; + return result; + } + + void insert(unsigned b, ref_t n) { + auto wr = tm_->shadow(ie_.blocknr_).first; + void *bits = bitmap_data(wr); + bool was_free = !test_bit_le(bits, b * 2) && !test_bit_le(bits, b * 2 + 1); + if (n == 1 || n == 3) + set_bit_le(bits, b * 2 + 1); + else + clear_bit_le(bits, b * 2 + 1); + + if (n == 2 || n == 3) + set_bit_le(bits, b * 2); + else + clear_bit_le(bits, b * 2); + + ie_.blocknr_ = wr.get_location(); + + if (was_free && n > 0) { + ie_.nr_free_--; + if (b == ie_.none_free_before_) + ie_.none_free_before_++; + } + + if (!was_free && n == 0) { + ie_.nr_free_++; + if (b < ie_.none_free_before_) + ie_.none_free_before_ = b; + } + } + + unsigned find_free(unsigned end) { + for (unsigned i = ie_.none_free_before_; i < end; i++) { + if (lookup(i) == 0) { + insert(i, 1); + return i; + } + } + + throw std::runtime_error("no free entry in bitmap"); + } + + index_entry const &get_ie() const { + return ie_; + } + + private: + void *bitmap_data(typename transaction_manager::write_ref &wr) { + bitmap_header *h = reinterpret_cast(&wr.data()[0]); + return h + 1; + } + + void const *bitmap_data(typename transaction_manager::read_ref &rr) const { + bitmap_header const *h = reinterpret_cast(&rr.data()[0]); + return h + 1; + } + + typename transaction_manager::ptr tm_; + index_entry ie_; + }; + + struct ref_count_traits { + typedef __le32 disk_type; + typedef uint32_t value_type; + typedef NoOpRefCounter ref_counter; + + static void unpack(disk_type const &d, value_type &v) { + v = to_cpu(d); + } + + static void pack(value_type const &v, disk_type &d) { + d = to_disk(v); + } + }; + + template + class sm_disk : public persistent_space_map { + public: + typedef boost::shared_ptr > ptr; + + sm_disk(typename transaction_manager::ptr tm, + block_address nr_blocks) + : tm_(tm), + entries_per_block_((BlockSize - sizeof(bitmap_header)) * 4), + nr_blocks_(0), + nr_allocated_(0), + bitmaps_(tm_, typename sm_disk_detail::index_entry_traits::ref_counter()), + ref_counts_(tm_, ref_count_traits::ref_counter()) { + + extend(nr_blocks); + } + + sm_disk(typename transaction_manager::ptr tm, + sm_root const &root) + : tm_(tm), + nr_blocks_(root.nr_blocks_), + nr_allocated_(root.nr_allocated_), + bitmaps_(tm_, root.bitmap_root_, typename sm_disk::index_entry_traits::ref_counter()), + ref_counts_(tm_, root.ref_count_root_, typename ref_count_traits::ref_counter()) { + } + + block_address get_nr_blocks() const { + return nr_blocks_; + } + + block_address get_nr_free() const { + return nr_blocks_ - nr_allocated_; + } + + ref_t get_count(block_address b) const { + auto count = lookup_bitmap(b); + if (count == 3) + return lookup_ref_count(b); + + return count; + } + + void set_count(block_address b, ref_t c) { + ref_t old = get_count(b); + + if (c == old) + return; + + if (c > 2) { + if (old < 3) + insert_bitmap(b, 3); + insert_ref_count(b, c); + } else { + if (old > 2) + remove_ref_count(b); + insert_bitmap(b, c); + } + + if (old == 0) + nr_allocated_++; + else if (c == 0) + nr_allocated_--; + } + + void commit() { + } + + void inc(block_address b) { + // FIXME: 2 get_counts + ref_t old = get_count(b); + set_count(b, old + 1); + } + + void dec(block_address b) { + ref_t old = get_count(b); + set_count(b, old - 1); + } + + block_address new_block() { + // silly to always start searching from the + // beginning. + block_address nr_indexes = div_up(nr_blocks_, entries_per_block_); + for (block_address index = 0; index < nr_indexes; index++) { + uint64_t key[1] = {index}; + auto mie = bitmaps_.lookup(key); + + if (!mie) + throw runtime_error("bitmap entry missing from btree"); + + bitmap bm(tm_, *mie); + block_address b = bm.find_free((index == nr_indexes - 1) ? + nr_blocks_ % entries_per_block_ : entries_per_block_); + bitmaps_.insert(key, bm.get_ie()); + nr_allocated_++; + b = (index * entries_per_block_) + b; + assert(get_count(b) == 1); + return b; + } + + throw runtime_error("out of space"); + } + + bool count_possibly_greater_than_one(block_address b) const { + return get_count(b) > 1; + } + + size_t root_size() { + return sizeof(sm_root_disk); + } + + void copy_root(void *dest, size_t len) { + sm_root_disk d; + sm_root v; + + if (len < sizeof(d)) + throw runtime_error("root too small"); + + v.nr_blocks_ = nr_blocks_; + v.nr_allocated_ = nr_allocated_; + v.bitmap_root_ = bitmaps_.get_root(); + v.ref_count_root_ = ref_counts_.get_root(); + sm_root_traits::pack(v, d); + ::memcpy(dest, &d, sizeof(d)); + } + + private: + void extend(block_address extra_blocks) { + block_address nr_blocks = nr_blocks_ + extra_blocks; + + block_address bitmap_count = div_up(nr_blocks, entries_per_block_); + block_address old_bitmap_count = div_up(nr_blocks_, entries_per_block_); + for (block_address i = old_bitmap_count; i < bitmap_count; i++) { + auto wr = tm_->new_block(); + + struct index_entry ie; + ie.blocknr_ = wr.get_location(); + ie.nr_free_ = i == (bitmap_count - 1) ? + (nr_blocks % entries_per_block_) : entries_per_block_; + ie.none_free_before_ = 0; + + uint64_t key[1] = {i}; + bitmaps_.insert(key, ie); + } + + nr_blocks_ = nr_blocks; + } + + ref_t lookup_bitmap(block_address b) const { + uint64_t key[1] = {b / entries_per_block_}; + auto mindex = bitmaps_.lookup(key); + if (!mindex) + throw runtime_error("Couldn't lookup bitmap"); + + bitmap bm(tm_, *mindex); + return bm.lookup(b % entries_per_block_); + } + + void insert_bitmap(block_address b, unsigned n) { + if (n > 3) + throw runtime_error("bitmap can only hold 2 bit values"); + + uint64_t key[1] = {b / entries_per_block_}; + auto mindex = bitmaps_.lookup(key); + if (!mindex) + throw runtime_error("Couldn't lookup bitmap"); + + bitmap bm(tm_, *mindex); + bm.insert(b % entries_per_block_, n); + bitmaps_.insert(key, bm.get_ie()); + } + + ref_t lookup_ref_count(block_address b) const { + uint64_t key[1] = {b}; + auto mvalue = ref_counts_.lookup(key); + if (!mvalue) + throw runtime_error("ref count not in tree"); + return *mvalue; + } + + void insert_ref_count(block_address b, ref_t count) { + uint64_t key[1] = {b}; + ref_counts_.insert(key, count); + } + + void remove_ref_count(block_address b) { + uint64_t key[1] = {b}; + ref_counts_.remove(key); + } + + typename transaction_manager::ptr tm_; + uint32_t entries_per_block_; + block_address nr_blocks_; + block_address nr_allocated_; + + btree<1, index_entry_traits, BlockSize> bitmaps_; + btree<1, ref_count_traits, BlockSize> ref_counts_; + }; + } + + template + persistent_space_map::ptr + create_disk_sm(typename transaction_manager::ptr tm, + block_address nr_blocks) + { + using namespace sm_disk_detail; + return typename persistent_space_map::ptr( + new sm_disk(tm, nr_blocks)); + } + + template + persistent_space_map::ptr + open_disk_sm(typename transaction_manager::ptr tm, + void *root) + { + using namespace sm_disk_detail; + + sm_root_disk d; + sm_root v; + + ::memcpy(&d, root, sizeof(d)); + sm_root_traits::unpack(d, v); + return typename persistent_space_map::ptr( + new sm_disk(tm, v)); + } +} + +//---------------------------------------------------------------- + +#endif diff --git a/space_map_disk_structures.h b/space_map_disk_structures.h new file mode 100644 index 0000000..54f688d --- /dev/null +++ b/space_map_disk_structures.h @@ -0,0 +1,98 @@ +#ifndef SPACE_MAP_DISK_STRUCTURES_H +#define SPACE_MAP_DISK_STRUCTURES_H + +#include "endian.h" +#include "btree.h" + +//---------------------------------------------------------------- + +namespace persistent_data { + using namespace base; + + namespace sm_disk_detail { + struct index_entry_disk { + __le64 blocknr_; + __le32 nr_free_; + __le32 none_free_before_; + } __attribute__ ((packed)); + + struct index_entry { + uint64_t blocknr_; + uint32_t nr_free_; + uint32_t none_free_before_; + }; + + struct index_entry_traits { + typedef index_entry_disk disk_type; + typedef index_entry value_type; + typedef NoOpRefCounter ref_counter; + + static void unpack(disk_type const &disk, value_type &value) { + value.blocknr_ = to_cpu(disk.blocknr_); + value.nr_free_ = to_cpu(disk.nr_free_); + value.none_free_before_ = to_cpu(disk.none_free_before_); + } + + static void pack(value_type const &value, disk_type &disk) { + disk.blocknr_ = to_disk<__le64>(value.blocknr_); + disk.nr_free_ = to_disk<__le32>(value.nr_free_); + disk.none_free_before_ = to_disk<__le32>(value.none_free_before_); + } + }; + + unsigned const MAX_METADATA_BITMAPS = 255; + unsigned const ENTRIES_PER_BYTE = 4; + + struct metadata_index { + __le32 csum_; + __le32 padding_; + __le64 blocknr_; + + struct index_entry index[MAX_METADATA_BITMAPS]; + } __attribute__ ((packed)); + + struct sm_root_disk { + __le64 nr_blocks_; + __le64 nr_allocated_; + __le64 bitmap_root_; + __le64 ref_count_root_; + } __attribute__ ((packed)); + + struct sm_root { + uint64_t nr_blocks_; + uint64_t nr_allocated_; + uint64_t bitmap_root_; + uint64_t ref_count_root_; + }; + + struct sm_root_traits { + typedef sm_root_disk disk_type; + typedef sm_root value_type; + typedef NoOpRefCounter ref_counter; + + static void unpack(disk_type const &disk, value_type &value) { + value.nr_blocks_ = to_cpu(disk.nr_blocks_); + value.nr_allocated_ = to_cpu(disk.nr_allocated_); + value.bitmap_root_ = to_cpu(disk.bitmap_root_); + value.ref_count_root_ = to_cpu(disk.ref_count_root_); + } + + static void pack(value_type const &value, disk_type &disk) { + disk.nr_blocks_ = to_disk<__le64>(value.nr_blocks_); + disk.nr_allocated_ = to_disk<__le64>(value.nr_allocated_); + disk.bitmap_root_ = to_disk<__le64>(value.bitmap_root_); + disk.ref_count_root_ = to_disk<__le64>(value.ref_count_root_); + } + }; + + struct bitmap_header { + __le32 csum; + __le32 not_used; + __le64 blocknr; + } __attribute__ ((packed)); + } +} + +//---------------------------------------------------------------- + +#endif diff --git a/space_map_disk_t.cc b/space_map_disk_t.cc new file mode 100644 index 0000000..015ec60 --- /dev/null +++ b/space_map_disk_t.cc @@ -0,0 +1,108 @@ +#include "space_map_disk.h" +#include "core_map.h" + +#define BOOST_TEST_MODULE SpaceMapDiskTests +#include + +using namespace std; +using namespace boost; +using namespace persistent_data; + +//---------------------------------------------------------------- + +namespace { + block_address const NR_BLOCKS = 1023; + block_address const SUPERBLOCK = 0; + unsigned const BLOCK_SIZE = 4096; + + transaction_manager::ptr + create_tm() { + block_manager::ptr bm( + new block_manager("./test.data", NR_BLOCKS)); + space_map::ptr sm(new core_map(1024)); + transaction_manager::ptr tm( + new transaction_manager(bm, sm)); + return tm; + } + + persistent_space_map::ptr + create_sm_disk() { + auto tm = create_tm(); + return persistent_data::create_disk_sm(tm, NR_BLOCKS); + } +} + +//---------------------------------------------------------------- + +BOOST_AUTO_TEST_CASE(reopen_an_sm) +{ + auto sm = create_sm_disk(); +} + +BOOST_AUTO_TEST_CASE(test_get_nr_blocks) +{ + auto sm = create_sm_disk(); + BOOST_CHECK_EQUAL(sm->get_nr_blocks(), NR_BLOCKS); +} + +BOOST_AUTO_TEST_CASE(test_get_nr_free) +{ + auto sm = create_sm_disk(); + BOOST_CHECK_EQUAL(sm->get_nr_free(), NR_BLOCKS); + + for (unsigned i = 0; i < NR_BLOCKS; i++) { + sm->new_block(); + BOOST_CHECK_EQUAL(sm->get_nr_free(), NR_BLOCKS - i - 1); + } + + for (unsigned i = 0; i < NR_BLOCKS; i++) { + sm->dec(i); + BOOST_CHECK_EQUAL(sm->get_nr_free(), i + 1); + } +} + +BOOST_AUTO_TEST_CASE(test_throws_no_space) +{ + auto sm = create_sm_disk(); + for (unsigned i = 0; i < NR_BLOCKS; i++) + sm->new_block(); + + BOOST_CHECK_THROW(sm->new_block(), std::runtime_error); +} + +BOOST_AUTO_TEST_CASE(test_inc_and_dec) +{ + auto sm = create_sm_disk(); + block_address b = 63; + + for (unsigned i = 0; i < 50; i++) { + BOOST_CHECK_EQUAL(sm->get_count(b), i); + sm->inc(b); + } + + for (unsigned i = 50; i > 0; i--) { + BOOST_CHECK_EQUAL(sm->get_count(b), i); + sm->dec(b); + } +} + +BOOST_AUTO_TEST_CASE(test_not_allocated_twice) +{ + auto sm = create_sm_disk(); + block_address b = sm->new_block(); + + try { + for (;;) + BOOST_CHECK(sm->new_block() != b); + } catch (...) { + } +} + +BOOST_AUTO_TEST_CASE(test_set_count) +{ + auto sm = create_sm_disk(); + sm->set_count(43, 5); + BOOST_CHECK_EQUAL(sm->get_count(43), 5); +} + +//---------------------------------------------------------------- diff --git a/space_map_t.cc b/space_map_t.cc index 3024629..56d9c48 100644 --- a/space_map_t.cc +++ b/space_map_t.cc @@ -11,11 +11,6 @@ unsigned const NR_BLOCKS = 1024; //---------------------------------------------------------------- -namespace { -} - -//---------------------------------------------------------------- - BOOST_AUTO_TEST_CASE(test_get_nr_blocks) { core_map sm(NR_BLOCKS); diff --git a/transaction_manager.h b/transaction_manager.h index 2ff65c3..859f082 100644 --- a/transaction_manager.h +++ b/transaction_manager.h @@ -11,7 +11,7 @@ namespace persistent_data { template - class transaction_manager : public boost::noncopyable { + class transaction_manager : boost::noncopyable { public: typedef boost::shared_ptr > ptr; typedef typename block_manager::read_ref read_ref; @@ -45,6 +45,10 @@ namespace persistent_data { return sm_; } + typename block_manager::ptr get_bm() { + return bm_; + } + private: void add_shadow(block_address b); void remove_shadow(block_address b);