#ifndef BTREE_H #define BTREE_H #include "endian.h" #include "transaction_manager.h" #include #include #include //---------------------------------------------------------------- namespace persistent_data { struct uint64_traits { typedef base::__le64 disk_type; typedef uint64_t value_type; static void unpack(disk_type const &disk, value_type &value) { value = base::to_cpu(disk); } static void pack(value_type const &value, disk_type &disk) { disk = base::to_disk(value); } }; namespace btree_detail { using namespace base; using namespace std; using namespace boost; //------------------------------------------------ // On disk data layout for btree nodes enum node_flags { INTERNAL_NODE = 1, LEAF_NODE = 1 << 1 }; struct node_header { __le32 csum; __le32 flags; __le64 blocknr; /* which block this node is supposed to live in */ __le32 nr_entries; __le32 max_entries; } __attribute__((packed)); struct disk_node { struct node_header header; __le64 keys[0]; } __attribute__((packed)); enum node_type { INTERNAL, LEAF }; //------------------------------------------------ // Class that acts as an interface over the raw little endian btree // node data. template class node_ref { public: explicit node_ref(disk_node *raw); node_type get_type() const; void set_type(node_type t); unsigned get_nr_entries() const; void set_nr_entries(unsigned n); unsigned get_max_entries() const; void set_max_entries(unsigned n); void set_max_entries(); // calculates the max for you. uint64_t key_at(unsigned i) const; void set_key(unsigned i, uint64_t k); typename ValueTraits::value_type value_at(unsigned i) const; void set_value(unsigned i, typename ValueTraits::value_type const &v); // Increments the nr_entries field void insert_at(unsigned i, uint64_t key, typename ValueTraits::value_type const &v); // Does not increment nr_entries void overwrite_at(unsigned i, uint64_t key, typename ValueTraits::value_type const &v); // Copies entries from another node, appends them // to the back of this node. Adjusts nr_entries. void copy_entries(node_ref const &rhs, unsigned begin, unsigned end); // Various searches int bsearch(uint64_t key, int want_hi) const; optional exact_search(uint64_t key) const; int lower_bound(uint64_t key) const; private: static unsigned calc_max_entries(void); void *key_ptr(unsigned i) const; void *value_ptr(unsigned i) const; disk_node *raw_; }; //------------------------------------------------ // template node_ref to_node(typename block_manager::read_ref &b) { // FIXME: this should return a const read_ref somehow. return node_ref( reinterpret_cast( const_cast(b.data()))); } template node_ref to_node(typename block_manager::write_ref &b) { return node_ref( reinterpret_cast( const_cast(b.data()))); } template class ro_spine : private noncopyable { public: ro_spine(typename transaction_manager::ptr tm) : tm_(tm) { } void step(block_address b) { spine_.push_back(tm_->read_lock(b)); if (spine_.size() > 2) spine_.pop_front(); } template node_ref get_node() { return to_node(spine_.back()); } private: typename transaction_manager::ptr tm_; std::list::read_ref> spine_; }; template class shadow_spine : private noncopyable { public: shadow_spine(typename transaction_manager::ptr tm) : tm_(tm) { } // true if the children of the shadow need incrementing bool step(block_address b) { auto p = tm_->shadow(b); try { step(p.first); } catch (...) { tm_->get_sm()->dec(p.first.get_location()); throw; } return p.second; } void step(typename transaction_manager::write_ref b) { spine_.push_back(b); if (spine_.size() == 1) root_ = spine_.front().get_location(); else if (spine_.size() > 2) spine_.pop_front(); } void pop() { spine_.pop_back(); } template node_ref get_node() { return to_node(spine_.back()); } block_address get_block() const { return spine_.back().get_location(); } bool has_parent() const { return spine_.size() > 1; } node_ref get_parent() { if (spine_.size() < 2) throw std::runtime_error("no parent"); return to_node(spine_.front()); } block_address get_root() const { return root_; } private: typename transaction_manager::ptr tm_; std::list::write_ref> spine_; block_address root_; }; template optional lookup_raw(ro_spine &spine, block_address block, uint64_t key) { using namespace boost; typedef typename ValueTraits::value_type leaf_type; for (;;) { spine.step(block); auto leaf = spine.template get_node(); auto mi = leaf.exact_search(key); if (!mi) return optional(); if (leaf.get_type() == btree_detail::LEAF) return optional(leaf.value_at(*mi)); auto internal = spine.template get_node(); block = internal.value_at(*mi); } } } template class btree { public: typedef boost::shared_ptr > ptr; typedef uint64_t key[Levels]; typedef typename ValueTraits::value_type value_type; typedef boost::optional maybe_value; typedef boost::optional > maybe_pair; typedef typename block_manager::read_ref read_ref; typedef typename block_manager::write_ref write_ref; btree(typename persistent_data::transaction_manager::ptr tm); btree(typename transaction_manager::ptr tm, block_address root); ~btree(); maybe_value lookup(key const &key) const; maybe_pair lookup_le(key const &key) const; maybe_pair lookup_ge(key const &key) const; void insert(key const &key, typename ValueTraits::value_type const &value); void remove(key const &key); void set_root(block_address root); block_address get_root() const; ptr clone() const; // free the on disk btree when the destructor is called void destroy(); private: template void split_node(btree_detail::shadow_spine &spine, block_address parent_index, uint64_t key, bool top); template void split_beneath(btree_detail::shadow_spine &spine, uint64_t key); template void split_sibling(btree_detail::shadow_spine &spine, block_address parent_index, uint64_t key); template bool insert_location(btree_detail::shadow_spine &spine, block_address block, uint64_t key, int *index); typename persistent_data::transaction_manager::ptr tm_; bool destroy_; block_address root_; }; }; #include "btree.tcc" //---------------------------------------------------------------- #endif