diff --git a/Makefile b/Makefile index c50d653..a3cddf4 100644 --- a/Makefile +++ b/Makefile @@ -1,16 +1,18 @@ SOURCE=\ - block.cc \ main.cc \ - metadata.cc \ - transaction_manager.cc + metadata.cc OBJECTS=$(subst .cc,.o,$(SOURCE)) CPPFLAGS=-Wall -std=c++0x INCLUDES= LIBS=-lstdc++ +.SUFFIXES: .cc .o .d -.SUFFIXES: .cc .o +%.d: %.cc + $(CC) -MM $(CPPFLAGS) $< > $@.$$$$; \ + sed 's,\($*\)\.o[ :]*,\1.o $@ : ,g' < $@.$$$$ > $@; \ + rm -f $@.$$$$ .cc.o: g++ -c $(CPPFLAGS) $(INCLUDES) -o $@ $< @@ -18,7 +20,4 @@ LIBS=-lstdc++ multisnap_display: $(OBJECTS) g++ -o $@ $+ $(LIBS) -main.o: block.h -block.o: block.h -transaction_manager.o: transaction_manager.h block.h -metadata.o: block.h transaction_manager.h btree.h metadata.h \ No newline at end of file +include $(subst .cc,.d,$(SOURCE)) \ No newline at end of file diff --git a/block.h b/block.h index d6f4fca..7ca2d41 100644 --- a/block.h +++ b/block.h @@ -30,6 +30,8 @@ namespace persistent_data { class block_validator { public: + typedef boost::shared_ptr ptr; + virtual ~block_validator() {} virtual void check(block const &b) const = 0; @@ -37,7 +39,7 @@ namespace persistent_data { }; struct block { - typedef boost::optional maybe_validator; + typedef boost::optional maybe_validator; block(block_address location, maybe_validator v = maybe_validator()) @@ -47,7 +49,7 @@ namespace persistent_data { block_address location_; buffer data_; - boost::optional validator_; + maybe_validator validator_; }; typedef boost::shared_ptr block_ptr; @@ -67,6 +69,8 @@ namespace persistent_data { // locked. class write_ref : public read_ref { public: + write_ref(block_ptr b); + using read_ref::data; buffer &data(); }; @@ -86,16 +90,20 @@ namespace persistent_data { // Validator variants read_ref - read_lock(block_address location, block_validator const &v); + read_lock(block_address location, + typename block_validator::ptr const &v); boost::optional - read_try_lock(block_address location, block_validator const &v); + read_try_lock(block_address location, + typename block_validator::ptr const &v); write_ref - write_lock(block_address location, block_validator const &v); + write_lock(block_address location, + typename block_validator::ptr const &v); write_ref - write_lock_zero(block_address location, block_validator const &v); + write_lock_zero(block_address location, + typename block_validator::ptr const &v); // Use this to commit changes void flush(write_ref super_block); @@ -111,6 +119,8 @@ namespace persistent_data { }; } +#include "block.tcc" + //---------------------------------------------------------------- #endif diff --git a/block.tcc b/block.tcc index 4acb60a..2547610 100644 --- a/block.tcc +++ b/block.tcc @@ -5,11 +5,13 @@ #include #include +#include #include #include using namespace boost; using namespace persistent_data; +using namespace std; //---------------------------------------------------------------- @@ -33,6 +35,12 @@ block_manager::read_ref::data() const return block_->data_; } +template +block_manager::write_ref::write_ref(block_manager::block_ptr b) + : read_ref(b) +{ +} + template typename block_manager::buffer & block_manager::write_ref::data() @@ -61,7 +69,7 @@ typename block_manager::read_ref block_manager::read_lock(block_address location) { block_ptr b(new block(location)); - read_block(b); + read_block(*b); return read_ref(b); } @@ -76,8 +84,8 @@ template typename block_manager::write_ref block_manager::write_lock(block_address location) { - block_ptr b(new block(location), write_and_release); - read_block(b); + block_ptr b(new block(location), bind(&block_manager::write_and_release, this, _1)); + read_block(*b); return write_ref(b); } @@ -85,25 +93,25 @@ template typename block_manager::write_ref block_manager::write_lock_zero(block_address location) { - block_ptr b(new block(location), write_and_release); - zero_block(b); + block_ptr b(new block(location), bind(&block_manager::write_and_release, this, _1)); + zero_block(*b); return write_ref(b); } template typename block_manager::read_ref block_manager::read_lock(block_address location, - block_manager::block_validator const &v) + typename block_manager::block_validator::ptr const &v) { block_ptr b(new block(location, v)); - read_block(b); + read_block(*b); return read_ref(b); } template optional::read_ref> block_manager::read_try_lock(block_address location, - block_manager::block_validator const &v) + typename block_manager::block_validator::ptr const &v) { return read_lock(location, v); } @@ -111,20 +119,20 @@ block_manager::read_try_lock(block_address location, template typename block_manager::write_ref block_manager::write_lock(block_address location, - block_manager::block_validator const &v) + typename block_manager::block_validator::ptr const &v) { block_ptr b(new block(location, v), write_and_release); - read_block(b); + read_block(*b); return write_ref(b); } template typename block_manager::write_ref block_manager::write_lock_zero(block_address location, - block_manager::block_validator const &v) + typename block_manager::block_validator::ptr const &v) { block_ptr b(new block(location, v), write_and_release); - zero_block(b); + zero_block(*b); return write_ref(b); } @@ -140,16 +148,16 @@ template void block_manager::read_block(block &b) { - std::cerr << "reading block: " << b->location << std::endl; + std::cerr << "reading block: " << b.location_ << std::endl; off_t r; - r = ::lseek(fd_, BlockSize * b->location_, SEEK_SET); + r = ::lseek(fd_, BlockSize * b.location_, SEEK_SET); if (r == (off_t) -1) throw std::runtime_error("lseek failed"); ssize_t n; size_t remaining = BlockSize; - unsigned char *buf = b->data_; + unsigned char *buf = b.data_; do { n = ::read(fd_, buf, remaining); if (n > 0) { @@ -167,18 +175,18 @@ void block_manager::write_block(block const &b) { - std::cerr << "writing block: " << b->location << std::endl; + std::cerr << "writing block: " << b.location_ << std::endl; off_t r; - r = ::lseek(fd_, BlockSize * b->location_, SEEK_SET); + r = ::lseek(fd_, BlockSize * b.location_, SEEK_SET); if (r == (off_t) -1) throw std::runtime_error("lseek failed"); ssize_t n; size_t remaining = BlockSize; - unsigned char *buf = b->data_; + unsigned char const *buf = b.data_; do { - n = ::read(fd_, buf, remaining); + n = ::write(fd_, buf, remaining); if (n > 0) { remaining -= n; buf += n; @@ -193,14 +201,14 @@ template void block_manager::zero_block(block &b) { - memset(b->data_, 0, BlockSize); + memset(b.data_, 0, BlockSize); } template void block_manager::write_and_release(block *b) { - write_block(b); + write_block(*b); delete b; } diff --git a/btree.h b/btree.h index 021be15..b0ac709 100644 --- a/btree.h +++ b/btree.h @@ -1,6 +1,7 @@ #ifndef BTREE_H #define BTREE_H +#include "endian.h" #include "transaction_manager.h" //---------------------------------------------------------------- @@ -17,8 +18,8 @@ namespace persistent_data { typedef typename block_manager::read_ref read_ref; typedef typename block_manager::write_ref write_ref; - btree(boost::shared_ptr > tm); - btree(boost::shared_ptr > tm, + btree(typename persistent_data::transaction_manager::ptr tm); + btree(typename transaction_manager::ptr tm, block_address root); ~btree(); @@ -42,8 +43,23 @@ namespace persistent_data { bool destroy_; block_address root_; }; + + struct uint64_traits { + typedef base::__le64 disk_type; + typedef uint64_t value_type; + + static value_type construct(void *data) { + // extra memcpy because I'm paranoid about alignment issues + base::__le64 disk; + + ::memcpy(&disk, data, sizeof(disk)); + return base::to_cpu(disk); + } + }; }; +#include "btree.tcc" + //---------------------------------------------------------------- #endif diff --git a/btree.tcc b/btree.tcc index 3f6d477..d060136 100644 --- a/btree.tcc +++ b/btree.tcc @@ -1,14 +1,20 @@ #include "btree.h" +#include "endian.h" +#include "transaction_manager.h" + +#include #include #include +// FIXME: can't have using clauses in a header +using namespace base; using namespace boost; using namespace persistent_data; //---------------------------------------------------------------- -namespace { +namespace btree_detail { //------------------------------------------------ // On disk data layout for btree nodes enum node_flags { @@ -25,7 +31,7 @@ namespace { __le32 max_entries; } __attribute__((packed)); - struct node { + struct disk_node { struct node_header header; __le64 keys[0]; } __attribute__((packed)); @@ -34,34 +40,76 @@ namespace { //------------------------------------------------ // Class that acts as an interface over the raw little endian btree // node data. - class node { + class node_ref { public: enum type { INTERNAL, LEAF }; - type get_type() const; - void set_type(type t); + node_ref(disk_node *raw) + : raw_(raw) { + } - unsigned get_nr_entries() const; - void set_nr_entries(unsigned n); + type get_type() const { + uint32_t flags = to_cpu(raw_->header.flags); + if (flags & INTERNAL_NODE) + return INTERNAL; + else if (flags & LEAF_NODE) + return LEAF; + else + throw runtime_error("unknow node type"); + } - unsigned get_max_entries() const; - void set_max_entries(unsigned n); + void set_type(type t){ + uint32_t flags = to_cpu(raw_->header.flags); + switch (t) { + case INTERNAL: + flags |= INTERNAL_NODE; + break; - uint64_t key_at(unsigned i) const; + case LEAF: + flags |= LEAF_NODE; + break; + } + raw_->header.flags = to_disk<__le32>(flags); + } + + unsigned get_nr_entries() const { + return to_cpu(raw_->header.nr_entries); + } + + void set_nr_entries(unsigned n) { + raw_->header.nr_entries = to_disk<__le32>(n); + } + + unsigned get_max_entries() const { + return to_cpu(raw_->header.max_entries); + } + + void set_max_entries(unsigned n) { + raw_->header.max_entries = to_disk<__le32>(n); + } + + uint64_t key_at(unsigned i) const { + return to_cpu(raw_->keys[i]); + } template - typename ValueTraits::value_type value_at(unsigned i) const; + typename ValueTraits::value_type value_at(unsigned i) const { + void *value_base = &raw_->keys[to_cpu(raw_->header.max_entries)]; + void *value_ptr = static_cast(value_base) + + sizeof(typename ValueTraits::disk_type) * i; + return ValueTraits::construct(value_ptr); + } private: - struct node *raw_; + disk_node *raw_; }; //------------------------------------------------ // Various searches - int bsearch(node const &n, uint64_t key, int want_hi) + int bsearch(node_ref const &n, uint64_t key, int want_hi) { int lo = -1, hi = n.get_nr_entries(); @@ -81,7 +129,7 @@ namespace { return want_hi ? hi : lo; } - optional exact_search(node const &n, uint64_t key) { + optional exact_search(node_ref const &n, uint64_t key) { int i = bsearch(n, key, 0); if (i < 0 || static_cast(i) >= n.get_nr_entries()) return optional(); @@ -92,7 +140,19 @@ namespace { //------------------------------------------------ // template - node &to_node(typename block_manager::write_ref b); + node_ref to_node(typename block_manager::read_ref &b) + { + // FIXME: this should return a const read_ref somehow. + return node_ref( + reinterpret_cast(const_cast(b.data()))); + } + + template + node_ref to_node(typename block_manager::write_ref &b) + { + return node_ref( + reinterpret_cast(const_cast(b.data()))); + } unsigned calc_max_entries(uint32_t bs); @@ -101,62 +161,103 @@ namespace { template class ro_spine : private noncopyable { public: - void step(block_address b); - node get_node() const; + ro_spine(typename transaction_manager::ptr tm) + : tm_(tm) { + } + + void step(block_address b) { + spine_.push_back(tm_->read_lock(b)); + if (spine_.size() > 2) + spine_.pop_front(); + } + + node_ref get_node() { + return to_node(spine_.back()); + } private: - + typename transaction_manager::ptr tm_; + std::list::read_ref> spine_; }; - class internal_traits { + template + class shadow_spine : private noncopyable { public: - typedef uint64_t value_type; + shadow_spine(typename transaction_manager::ptr tm) + : tm_(tm) { + } + + void step(block_address b) { + spine_.push_back(tm_->shadow(b)); + if (spine_.size() == 1) + root_ = spine_.front().get_location(); + else if (spine_.size() > 2) + spine_.pop_front(); + } + + node_ref get_node() { + return to_node(spine_.back()); + } + + node_ref get_parent() { + if (spine_.size() < 2) + throw std::runtime_error("no parent"); + + return to_node(spine_.front()); + } + + node_ref get_root() { + return root_; + } + + private: + typename transaction_manager::ptr tm_; + std::list::write_ref> spine_; + block_address root_; }; - template + template //, typename Search> optional lookup_raw(ro_spine &spine, block_address block, uint64_t key) { using namespace boost; typedef typename ValueTraits::value_type leaf_type; - typedef typename internal_traits::value_type internal_type; - - Search find; for (;;) { spine.step(block); - node &n = spine.node(); + node_ref const &n = spine.get_node(); - auto mi = find(n, key); + auto mi = exact_search(n, key); if (!mi) return optional(); - if (n.get_type() == node::LEAF) - return optional(n.value_at(*mi)); + if (n.get_type() == node_ref::LEAF) + return optional(n.value_at(*mi)); - block = n.value_at(*mi); + block = n.value_at(*mi); } } } - template -btree::btree(shared_ptr > tm) +btree::btree(typename transaction_manager::ptr tm) : tm_(tm), destroy_(false) { - write_ref root = tm_.new_block(); + using namespace btree_detail; - node &n = to_node(root); - n.set_type(node::LEAF); + write_ref root = tm_->new_block(); + + node_ref n = to_node(root); + n.set_type(node_ref::LEAF); n.set_nr_entries(0); n.set_max_entries(calc_max_entries(BlockSize)); - root_ = root.location(); + root_ = root.get_location(); } template -btree::btree(shared_ptr > tm, +btree::btree(typename transaction_manager::ptr tm, block_address root) : tm_(tm), destroy_(false), @@ -174,24 +275,29 @@ template typename btree::maybe_value btree::lookup(key const &key) const { - ro_spine spine; + using namespace btree_detail; + + ro_spine spine(tm_); block_address root = root_; for (unsigned level = 0; level < Levels - 1; ++level) { - auto mroot = lookup_raw(spine, root, key[level]); + optional mroot = + lookup_raw(spine, root, key[level]); if (!mroot) return maybe_value(); root = *mroot; } - return lookup_raw(spine, root, key[Levels - 1]); + return lookup_raw(spine, root, key[Levels - 1]); } template typename btree::maybe_pair btree::lookup_le(key const &key) const { + using namespace btree_detail; + return maybe_pair(); } @@ -199,6 +305,8 @@ template typename btree::maybe_pair btree::lookup_ge(key const &key) const { + using namespace btree_detail; + return maybe_pair(); } @@ -207,6 +315,7 @@ template void btree::insert(key const &key, typename ValueTraits::value_type const &value) { + using namespace btree_detail; } @@ -214,6 +323,7 @@ template void btree::remove(key const &key) { + using namespace btree_detail; } @@ -228,6 +338,7 @@ template void btree::set_root(block_address root) { + using namespace btree_detail; } @@ -235,6 +346,7 @@ template block_address btree::get_root() const { + using namespace btree_detail; } @@ -242,6 +354,7 @@ template ptr btree::clone() const { + using namespace btree_detail; } @@ -249,6 +362,7 @@ template void btree::destroy() { + using namespace btree_detail; } #endif diff --git a/endian.h b/endian.h new file mode 100644 index 0000000..dd2b42a --- /dev/null +++ b/endian.h @@ -0,0 +1,83 @@ +#ifndef ENDIAN_H +#define ENDIAN_H + +#include + +//---------------------------------------------------------------- + +namespace base { + + // These are just little wrapper types to make the compiler + // understand that the le types are not assignable to the + // corresponding cpu type. + + struct __le16 { + explicit __le16(uint16_t v = 0.0) + : v_(v) { + } + + uint16_t v_; + } __attribute__((packed)); + + struct __le32 { + explicit __le32(uint32_t v = 0.0) + : v_(v) { + } + + uint32_t v_; + } __attribute__((packed)); + + struct __le64 { + explicit __le64(uint64_t v = 0.0) + : v_(v) { + } + + uint64_t v_; + } __attribute__((packed)); + + //-------------------------------- + + template + CPUType to_cpu(DiskType const &d) { + BOOST_STATIC_ASSERT(sizeof(d) == 0); + } + + template + DiskType to_disk(CPUType const &v) { + BOOST_STATIC_ASSERT(sizeof(v) == 0); + } + + template <> + uint16_t to_cpu(__le16 const &d) { + return d.v_; + } + + template <> + __le16 to_disk<__le16, uint16_t>(uint16_t const &v) { + return __le16(v); + } + + template <> + uint32_t to_cpu(__le32 const &d) { + return d.v_; + } + + template <> + __le32 to_disk<__le32, uint32_t>(uint32_t const &v) { + return __le32(v); + } + + template <> + uint64_t to_cpu(__le64 const &d) { + return d.v_; + } + + template <> + __le64 to_disk<__le64, uint64_t>(uint64_t const &v) { + return __le64(v); + } +} + +//---------------------------------------------------------------- + +#endif diff --git a/metadata.cc b/metadata.cc index 260bdda..10701f5 100644 --- a/metadata.cc +++ b/metadata.cc @@ -115,7 +115,7 @@ metadata::create_thin(dev_t dev) throw std::runtime_error("Device already exists"); single_mapping_tree::ptr new_tree(new single_mapping_tree(tm_)); - mappings_top_level_.insert(key, *new_tree); + mappings_top_level_.insert(key, new_tree->get_root()); mappings_.set_root(mappings_top_level_.get_root()); // FIXME: ugly } @@ -125,12 +125,14 @@ metadata::create_snap(dev_t dev, dev_t origin) uint64_t snap_key[1] = {dev}; uint64_t origin_key[1] = {origin}; - auto mtree = mappings_top_level_.lookup(origin_key); - if (!mtree) + auto mtree_root = mappings_top_level_.lookup(origin_key); + if (!mtree_root) throw std::runtime_error("unknown origin"); - single_mapping_tree::ptr clone(mtree->clone()); - mappings_top_level_.insert(snap_key, *clone); + single_mapping_tree otree(tm_, *mtree_root); + + single_mapping_tree::ptr clone(otree.clone()); + mappings_top_level_.insert(snap_key, clone->get_root()); mappings_.set_root(mappings_top_level_.get_root()); // FIXME: ugly time_++; diff --git a/metadata.h b/metadata.h index df86af4..7dec5dd 100644 --- a/metadata.h +++ b/metadata.h @@ -17,6 +17,54 @@ namespace multisnap { typedef uint64_t sector_t; + struct device_details_disk { + __le64 dev_size; + __le64 mapped_blocks; + __le64 transaction_id; /* when created */ + __le32 creation_time; + __le32 snapshotted_time; + } __attribute__ ((packed)); + + struct device_details { + uint64_t dev_size; + uint64_t mapped_blocks; + uint64_t transaction_id; /* when created */ + uint32_t creation_time; + uint32_t snapshotted_time; + }; + + struct detail_traits { + typedef device_details_disk disk_type; + typedef device_details value_type; + + static value_type construct(void *data) { + struct device_details_disk disk; + struct device_details cpu; + + ::memcpy(&disk, data, sizeof(disk)); + cpu.dev_size = to_cpu(disk.dev_size); + cpu.mapped_blocks = to_cpu(disk.mapped_blocks); + cpu.transaction_id = to_cpu(disk.transaction_id); + cpu.creation_time = to_cpu(disk.creation_time); + cpu.snapshotted_time = to_cpu(disk.snapshotted_time); + + return cpu; + } + }; +#if 0 + class dev_traits { + public: + typedef base::__le64 disk_type; + typedef persistent_data::btree<1, uint64_traits, BLOCK_SIZE> value_type; + + static value_type construct(void *data) { + uint64_t root = uint64_traits::construct(data); + + return value_type + } + }; +#endif + class metadata { public: typedef boost::shared_ptr ptr; @@ -75,29 +123,14 @@ namespace multisnap { bool device_exists(dev_t dev) const; - class detail_traits { - public: - typedef uint64_t value_type; - }; - - class map_traits { - public: - typedef block_address value_type; - }; - - class dev_traits { - public: - typedef persistent_data::btree<1, map_traits, BLOCK_SIZE> value_type; - }; - uint32_t time_; persistent_data::transaction_manager::ptr tm_; typedef persistent_data::btree<1, detail_traits, BLOCK_SIZE> detail_tree; - typedef persistent_data::btree<1, dev_traits, BLOCK_SIZE> dev_tree; - typedef persistent_data::btree<2, map_traits, BLOCK_SIZE> mapping_tree; - typedef persistent_data::btree<1, map_traits, BLOCK_SIZE> single_mapping_tree; + typedef persistent_data::btree<1, uint64_traits, BLOCK_SIZE> dev_tree; + typedef persistent_data::btree<2, uint64_traits, BLOCK_SIZE> mapping_tree; + typedef persistent_data::btree<1, uint64_traits, BLOCK_SIZE> single_mapping_tree; detail_tree details_; dev_tree mappings_top_level_; diff --git a/transaction_manager.h b/transaction_manager.h index fb6af05..91ab028 100644 --- a/transaction_manager.h +++ b/transaction_manager.h @@ -54,6 +54,8 @@ namespace persistent_data { }; } +#include "transaction_manager.tcc" + //---------------------------------------------------------------- #endif