2011-06-23 19:17:08 +05:30
|
|
|
#include "btree.h"
|
|
|
|
|
2011-06-27 15:15:30 +05:30
|
|
|
#include "endian.h"
|
|
|
|
#include "transaction_manager.h"
|
|
|
|
|
|
|
|
#include <list>
|
2011-06-23 19:17:08 +05:30
|
|
|
#include <boost/noncopyable.hpp>
|
|
|
|
#include <boost/optional.hpp>
|
|
|
|
|
2011-06-27 15:15:30 +05:30
|
|
|
// FIXME: can't have using clauses in a header
|
|
|
|
using namespace base;
|
2011-06-23 19:17:08 +05:30
|
|
|
using namespace boost;
|
|
|
|
using namespace persistent_data;
|
|
|
|
|
|
|
|
//----------------------------------------------------------------
|
|
|
|
|
2011-06-27 15:15:30 +05:30
|
|
|
namespace btree_detail {
|
2011-06-23 19:17:08 +05:30
|
|
|
//------------------------------------------------
|
|
|
|
// On disk data layout for btree nodes
|
|
|
|
enum node_flags {
|
|
|
|
INTERNAL_NODE = 1,
|
|
|
|
LEAF_NODE = 1 << 1
|
|
|
|
};
|
|
|
|
|
|
|
|
struct node_header {
|
|
|
|
__le32 csum;
|
|
|
|
__le32 flags;
|
|
|
|
__le64 blocknr; /* which block this node is supposed to live in */
|
|
|
|
|
|
|
|
__le32 nr_entries;
|
|
|
|
__le32 max_entries;
|
|
|
|
} __attribute__((packed));
|
|
|
|
|
2011-06-27 15:15:30 +05:30
|
|
|
struct disk_node {
|
2011-06-23 19:17:08 +05:30
|
|
|
struct node_header header;
|
|
|
|
__le64 keys[0];
|
|
|
|
} __attribute__((packed));
|
|
|
|
|
|
|
|
|
|
|
|
//------------------------------------------------
|
|
|
|
// Class that acts as an interface over the raw little endian btree
|
|
|
|
// node data.
|
2011-06-27 15:15:30 +05:30
|
|
|
class node_ref {
|
2011-06-23 19:17:08 +05:30
|
|
|
public:
|
|
|
|
enum type {
|
|
|
|
INTERNAL,
|
|
|
|
LEAF
|
|
|
|
};
|
|
|
|
|
2011-06-27 15:15:30 +05:30
|
|
|
node_ref(disk_node *raw)
|
|
|
|
: raw_(raw) {
|
|
|
|
}
|
|
|
|
|
|
|
|
type get_type() const {
|
|
|
|
uint32_t flags = to_cpu<uint32_t>(raw_->header.flags);
|
|
|
|
if (flags & INTERNAL_NODE)
|
|
|
|
return INTERNAL;
|
|
|
|
else if (flags & LEAF_NODE)
|
|
|
|
return LEAF;
|
|
|
|
else
|
|
|
|
throw runtime_error("unknow node type");
|
|
|
|
}
|
|
|
|
|
|
|
|
void set_type(type t){
|
|
|
|
uint32_t flags = to_cpu<uint32_t>(raw_->header.flags);
|
|
|
|
switch (t) {
|
|
|
|
case INTERNAL:
|
|
|
|
flags |= INTERNAL_NODE;
|
|
|
|
break;
|
|
|
|
|
|
|
|
case LEAF:
|
|
|
|
flags |= LEAF_NODE;
|
|
|
|
break;
|
|
|
|
}
|
|
|
|
raw_->header.flags = to_disk<__le32>(flags);
|
|
|
|
}
|
|
|
|
|
|
|
|
unsigned get_nr_entries() const {
|
|
|
|
return to_cpu<uint32_t>(raw_->header.nr_entries);
|
|
|
|
}
|
2011-06-23 19:17:08 +05:30
|
|
|
|
2011-06-27 15:15:30 +05:30
|
|
|
void set_nr_entries(unsigned n) {
|
|
|
|
raw_->header.nr_entries = to_disk<__le32>(n);
|
|
|
|
}
|
2011-06-23 19:17:08 +05:30
|
|
|
|
2011-06-27 15:15:30 +05:30
|
|
|
unsigned get_max_entries() const {
|
|
|
|
return to_cpu<uint32_t>(raw_->header.max_entries);
|
|
|
|
}
|
2011-06-23 19:17:08 +05:30
|
|
|
|
2011-06-27 15:15:30 +05:30
|
|
|
void set_max_entries(unsigned n) {
|
|
|
|
raw_->header.max_entries = to_disk<__le32>(n);
|
|
|
|
}
|
|
|
|
|
|
|
|
uint64_t key_at(unsigned i) const {
|
|
|
|
return to_cpu<uint64_t>(raw_->keys[i]);
|
|
|
|
}
|
2011-06-23 19:17:08 +05:30
|
|
|
|
|
|
|
template <typename ValueTraits>
|
2011-06-27 15:15:30 +05:30
|
|
|
typename ValueTraits::value_type value_at(unsigned i) const {
|
|
|
|
void *value_base = &raw_->keys[to_cpu<uint32_t>(raw_->header.max_entries)];
|
|
|
|
void *value_ptr = static_cast<unsigned char *>(value_base) +
|
|
|
|
sizeof(typename ValueTraits::disk_type) * i;
|
|
|
|
return ValueTraits::construct(value_ptr);
|
|
|
|
}
|
2011-06-23 19:17:08 +05:30
|
|
|
|
|
|
|
private:
|
2011-06-27 15:15:30 +05:30
|
|
|
disk_node *raw_;
|
2011-06-23 19:17:08 +05:30
|
|
|
};
|
|
|
|
|
|
|
|
//------------------------------------------------
|
|
|
|
// Various searches
|
2011-06-27 15:15:30 +05:30
|
|
|
int bsearch(node_ref const &n, uint64_t key, int want_hi)
|
2011-06-23 19:17:08 +05:30
|
|
|
{
|
|
|
|
int lo = -1, hi = n.get_nr_entries();
|
|
|
|
|
|
|
|
while(hi - lo > 1) {
|
|
|
|
int mid = lo + ((hi - lo) / 2);
|
|
|
|
uint64_t mid_key = n.key_at(mid);
|
|
|
|
|
|
|
|
if (mid_key == key)
|
|
|
|
return mid;
|
|
|
|
|
|
|
|
if (mid_key < key)
|
|
|
|
lo = mid;
|
|
|
|
else
|
|
|
|
hi = mid;
|
|
|
|
}
|
|
|
|
|
|
|
|
return want_hi ? hi : lo;
|
|
|
|
}
|
|
|
|
|
2011-06-27 15:15:30 +05:30
|
|
|
optional<unsigned> exact_search(node_ref const &n, uint64_t key) {
|
2011-06-23 19:17:08 +05:30
|
|
|
int i = bsearch(n, key, 0);
|
|
|
|
if (i < 0 || static_cast<unsigned>(i) >= n.get_nr_entries())
|
|
|
|
return optional<unsigned>();
|
|
|
|
|
|
|
|
return optional<unsigned>(i);
|
|
|
|
}
|
|
|
|
|
|
|
|
//------------------------------------------------
|
|
|
|
//
|
|
|
|
template <uint32_t BlockSize>
|
2011-06-27 15:15:30 +05:30
|
|
|
node_ref to_node(typename block_manager<BlockSize>::read_ref &b)
|
|
|
|
{
|
|
|
|
// FIXME: this should return a const read_ref somehow.
|
|
|
|
return node_ref(
|
|
|
|
reinterpret_cast<disk_node *>(const_cast<unsigned char *>(b.data())));
|
|
|
|
}
|
|
|
|
|
|
|
|
template <uint32_t BlockSize>
|
|
|
|
node_ref to_node(typename block_manager<BlockSize>::write_ref &b)
|
|
|
|
{
|
|
|
|
return node_ref(
|
|
|
|
reinterpret_cast<disk_node *>(const_cast<unsigned char *>(b.data())));
|
|
|
|
}
|
2011-06-23 19:17:08 +05:30
|
|
|
|
|
|
|
unsigned
|
|
|
|
calc_max_entries(uint32_t bs);
|
|
|
|
|
|
|
|
// Spines
|
|
|
|
template <uint32_t BlockSize>
|
|
|
|
class ro_spine : private noncopyable {
|
|
|
|
public:
|
2011-06-27 15:15:30 +05:30
|
|
|
ro_spine(typename transaction_manager<BlockSize>::ptr tm)
|
|
|
|
: tm_(tm) {
|
|
|
|
}
|
2011-06-23 19:17:08 +05:30
|
|
|
|
2011-06-27 15:15:30 +05:30
|
|
|
void step(block_address b) {
|
|
|
|
spine_.push_back(tm_->read_lock(b));
|
|
|
|
if (spine_.size() > 2)
|
|
|
|
spine_.pop_front();
|
|
|
|
}
|
|
|
|
|
|
|
|
node_ref get_node() {
|
|
|
|
return to_node<BlockSize>(spine_.back());
|
|
|
|
}
|
2011-06-23 19:17:08 +05:30
|
|
|
|
2011-06-27 15:15:30 +05:30
|
|
|
private:
|
|
|
|
typename transaction_manager<BlockSize>::ptr tm_;
|
|
|
|
std::list<typename block_manager<BlockSize>::read_ref> spine_;
|
2011-06-23 19:17:08 +05:30
|
|
|
};
|
|
|
|
|
2011-06-27 15:15:30 +05:30
|
|
|
template <uint32_t BlockSize>
|
|
|
|
class shadow_spine : private noncopyable {
|
2011-06-23 19:17:08 +05:30
|
|
|
public:
|
2011-06-27 15:15:30 +05:30
|
|
|
shadow_spine(typename transaction_manager<BlockSize>::ptr tm)
|
|
|
|
: tm_(tm) {
|
|
|
|
}
|
|
|
|
|
|
|
|
void step(block_address b) {
|
|
|
|
spine_.push_back(tm_->shadow(b));
|
|
|
|
if (spine_.size() == 1)
|
|
|
|
root_ = spine_.front().get_location();
|
|
|
|
else if (spine_.size() > 2)
|
|
|
|
spine_.pop_front();
|
|
|
|
}
|
|
|
|
|
|
|
|
node_ref get_node() {
|
|
|
|
return to_node<BlockSize>(spine_.back());
|
|
|
|
}
|
|
|
|
|
|
|
|
node_ref get_parent() {
|
|
|
|
if (spine_.size() < 2)
|
|
|
|
throw std::runtime_error("no parent");
|
|
|
|
|
|
|
|
return to_node<BlockSize>(spine_.front());
|
|
|
|
}
|
|
|
|
|
|
|
|
node_ref get_root() {
|
|
|
|
return root_;
|
|
|
|
}
|
|
|
|
|
|
|
|
private:
|
|
|
|
typename transaction_manager<BlockSize>::ptr tm_;
|
|
|
|
std::list<typename block_manager<BlockSize>::write_ref> spine_;
|
|
|
|
block_address root_;
|
2011-06-23 19:17:08 +05:30
|
|
|
};
|
|
|
|
|
2011-06-27 15:15:30 +05:30
|
|
|
template <typename ValueTraits, uint32_t BlockSize> //, typename Search>
|
2011-06-23 19:17:08 +05:30
|
|
|
optional<typename ValueTraits::value_type>
|
|
|
|
lookup_raw(ro_spine<BlockSize> &spine, block_address block, uint64_t key) {
|
|
|
|
|
|
|
|
using namespace boost;
|
|
|
|
typedef typename ValueTraits::value_type leaf_type;
|
|
|
|
|
|
|
|
for (;;) {
|
|
|
|
spine.step(block);
|
2011-06-27 15:15:30 +05:30
|
|
|
node_ref const &n = spine.get_node();
|
2011-06-23 19:17:08 +05:30
|
|
|
|
2011-06-27 15:15:30 +05:30
|
|
|
auto mi = exact_search(n, key);
|
2011-06-23 19:17:08 +05:30
|
|
|
if (!mi)
|
|
|
|
return optional<leaf_type>();
|
|
|
|
|
2011-06-27 15:15:30 +05:30
|
|
|
if (n.get_type() == node_ref::LEAF)
|
|
|
|
return optional<leaf_type>(n.value_at<ValueTraits>(*mi));
|
2011-06-23 19:17:08 +05:30
|
|
|
|
2011-06-27 15:15:30 +05:30
|
|
|
block = n.value_at<uint64_traits>(*mi);
|
2011-06-23 19:17:08 +05:30
|
|
|
}
|
|
|
|
}
|
|
|
|
}
|
|
|
|
|
|
|
|
template <unsigned Levels, typename ValueTraits, uint32_t BlockSize>
|
2011-06-27 15:15:30 +05:30
|
|
|
btree<Levels, ValueTraits, BlockSize>::btree(typename transaction_manager<BlockSize>::ptr tm)
|
2011-06-23 19:17:08 +05:30
|
|
|
: tm_(tm),
|
|
|
|
destroy_(false)
|
|
|
|
{
|
2011-06-27 15:15:30 +05:30
|
|
|
using namespace btree_detail;
|
|
|
|
|
|
|
|
write_ref root = tm_->new_block();
|
2011-06-23 19:17:08 +05:30
|
|
|
|
2011-06-27 15:15:30 +05:30
|
|
|
node_ref n = to_node<BlockSize>(root);
|
|
|
|
n.set_type(node_ref::LEAF);
|
2011-06-23 19:17:08 +05:30
|
|
|
n.set_nr_entries(0);
|
|
|
|
n.set_max_entries(calc_max_entries(BlockSize));
|
|
|
|
|
2011-06-27 15:15:30 +05:30
|
|
|
root_ = root.get_location();
|
2011-06-23 19:17:08 +05:30
|
|
|
}
|
|
|
|
|
|
|
|
template <unsigned Levels, typename ValueTraits, uint32_t BlockSize>
|
2011-06-27 15:15:30 +05:30
|
|
|
btree<Levels, ValueTraits, BlockSize>::btree(typename transaction_manager<BlockSize>::ptr tm,
|
2011-06-23 19:17:08 +05:30
|
|
|
block_address root)
|
|
|
|
: tm_(tm),
|
|
|
|
destroy_(false),
|
|
|
|
root_(root)
|
|
|
|
{
|
|
|
|
}
|
|
|
|
|
|
|
|
template <unsigned Levels, typename ValueTraits, uint32_t BlockSize>
|
|
|
|
btree<Levels, ValueTraits, BlockSize>::~btree()
|
|
|
|
{
|
|
|
|
|
|
|
|
}
|
|
|
|
|
|
|
|
template <unsigned Levels, typename ValueTraits, uint32_t BlockSize>
|
|
|
|
typename btree<Levels, ValueTraits, BlockSize>::maybe_value
|
|
|
|
btree<Levels, ValueTraits, BlockSize>::lookup(key const &key) const
|
|
|
|
{
|
2011-06-27 15:15:30 +05:30
|
|
|
using namespace btree_detail;
|
|
|
|
|
|
|
|
ro_spine<BlockSize> spine(tm_);
|
2011-06-23 19:17:08 +05:30
|
|
|
block_address root = root_;
|
|
|
|
|
|
|
|
for (unsigned level = 0; level < Levels - 1; ++level) {
|
2011-06-27 15:15:30 +05:30
|
|
|
optional<block_address> mroot =
|
|
|
|
lookup_raw<uint64_traits, BlockSize>(spine, root, key[level]);
|
2011-06-23 19:17:08 +05:30
|
|
|
if (!mroot)
|
|
|
|
return maybe_value();
|
|
|
|
|
|
|
|
root = *mroot;
|
|
|
|
}
|
|
|
|
|
2011-06-27 15:15:30 +05:30
|
|
|
return lookup_raw<ValueTraits, BlockSize>(spine, root, key[Levels - 1]);
|
2011-06-23 19:17:08 +05:30
|
|
|
}
|
|
|
|
|
|
|
|
template <unsigned Levels, typename ValueTraits, uint32_t BlockSize>
|
|
|
|
typename btree<Levels, ValueTraits, BlockSize>::maybe_pair
|
|
|
|
btree<Levels, ValueTraits, BlockSize>::lookup_le(key const &key) const
|
|
|
|
{
|
2011-06-27 15:15:30 +05:30
|
|
|
using namespace btree_detail;
|
|
|
|
|
2011-06-23 19:17:08 +05:30
|
|
|
return maybe_pair();
|
|
|
|
}
|
|
|
|
|
|
|
|
template <unsigned Levels, typename ValueTraits, uint32_t BlockSize>
|
|
|
|
typename btree<Levels, ValueTraits, BlockSize>::maybe_pair
|
|
|
|
btree<Levels, ValueTraits, BlockSize>::lookup_ge(key const &key) const
|
|
|
|
{
|
2011-06-27 15:15:30 +05:30
|
|
|
using namespace btree_detail;
|
|
|
|
|
2011-06-23 19:17:08 +05:30
|
|
|
return maybe_pair();
|
|
|
|
}
|
|
|
|
|
|
|
|
#if 0
|
|
|
|
template <unsigned Levels, typename ValueTraits, uint32_t BlockSize>
|
|
|
|
void
|
|
|
|
btree<Levels, ValueTraits, BlockSize>::insert(key const &key, typename ValueTraits::value_type const &value)
|
|
|
|
{
|
2011-06-27 15:15:30 +05:30
|
|
|
using namespace btree_detail;
|
2011-06-23 19:17:08 +05:30
|
|
|
|
|
|
|
}
|
|
|
|
|
|
|
|
template <unsigned Levels, typename ValueTraits, uint32_t BlockSize>
|
|
|
|
void
|
|
|
|
btree<Levels, ValueTraits, BlockSize>::remove(key const &key)
|
|
|
|
{
|
2011-06-27 15:15:30 +05:30
|
|
|
using namespace btree_detail;
|
2011-06-23 19:17:08 +05:30
|
|
|
|
|
|
|
}
|
|
|
|
|
|
|
|
template <unsigned Levels, typename ValueTraits, uint32_t BlockSize>
|
|
|
|
block_address
|
|
|
|
btree<Levels, ValueTraits, BlockSize>::get_root() const
|
|
|
|
{
|
|
|
|
|
|
|
|
}
|
|
|
|
|
|
|
|
template <unsigned Levels, typename ValueTraits, uint32_t BlockSize>
|
|
|
|
void
|
|
|
|
btree<Levels, ValueTraits, BlockSize>::set_root(block_address root)
|
|
|
|
{
|
2011-06-27 15:15:30 +05:30
|
|
|
using namespace btree_detail;
|
2011-06-23 19:17:08 +05:30
|
|
|
|
|
|
|
}
|
|
|
|
|
|
|
|
template <unsigned Levels, typename ValueTraits, uint32_t BlockSize>
|
|
|
|
block_address
|
|
|
|
btree<Levels, ValueTraits, BlockSize>::get_root() const
|
|
|
|
{
|
2011-06-27 15:15:30 +05:30
|
|
|
using namespace btree_detail;
|
2011-06-23 19:17:08 +05:30
|
|
|
|
|
|
|
}
|
|
|
|
|
|
|
|
template <unsigned Levels, typename ValueTraits, uint32_t BlockSize>
|
|
|
|
ptr
|
|
|
|
btree<Levels, ValueTraits, BlockSize>::clone() const
|
|
|
|
{
|
2011-06-27 15:15:30 +05:30
|
|
|
using namespace btree_detail;
|
2011-06-23 19:17:08 +05:30
|
|
|
|
|
|
|
}
|
|
|
|
|
|
|
|
template <unsigned Levels, typename ValueTraits, uint32_t BlockSize>
|
|
|
|
void
|
|
|
|
btree<Levels, ValueTraits, BlockSize>::destroy()
|
|
|
|
{
|
2011-06-27 15:15:30 +05:30
|
|
|
using namespace btree_detail;
|
2011-06-23 19:17:08 +05:30
|
|
|
|
|
|
|
}
|
|
|
|
#endif
|
|
|
|
//----------------------------------------------------------------
|