Introduce persistent-data/data-structures/
This commit is contained in:
154
persistent-data/data-structures/array.h
Normal file
154
persistent-data/data-structures/array.h
Normal file
@ -0,0 +1,154 @@
|
||||
// Copyright (C) 2012 Red Hat, Inc. All rights reserved.
|
||||
//
|
||||
// This file is part of the thin-provisioning-tools source.
|
||||
//
|
||||
// thin-provisioning-tools is free software: you can redistribute it
|
||||
// and/or modify it under the terms of the GNU General Public License
|
||||
// as published by the Free Software Foundation, either version 3 of
|
||||
// the License, or (at your option) any later version.
|
||||
//
|
||||
// thin-provisioning-tools is distributed in the hope that it will be
|
||||
// useful, but WITHOUT ANY WARRANTY; without even the implied warranty
|
||||
// of MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
|
||||
// GNU General Public License for more details.
|
||||
//
|
||||
// You should have received a copy of the GNU General Public License along
|
||||
// with thin-provisioning-tools. If not, see
|
||||
// <http://www.gnu.org/licenses/>.
|
||||
|
||||
#ifndef ARRAY_H
|
||||
#define ARRAY_H
|
||||
|
||||
#include "btree.h"
|
||||
|
||||
//----------------------------------------------------------------
|
||||
|
||||
namespace persistent_data {
|
||||
struct array_block_disk {
|
||||
__le32 csum;
|
||||
__le32 max_entries;
|
||||
__le32 nr_entries;
|
||||
__le32 value_size;
|
||||
__le64 blocknr;
|
||||
} __attribute__((packed));
|
||||
|
||||
template <typename ValueTraits>
|
||||
class ro_array_block {
|
||||
public:
|
||||
typedef typename ValueTraits::value_type value_type;
|
||||
typedef block_manager<>::read_ref read_ref;
|
||||
|
||||
ro_array_block(read_ref rr);
|
||||
|
||||
unsigned nr_entries() const;
|
||||
value_type get(unsigned index) const;
|
||||
|
||||
private:
|
||||
const void *element_at(unsigned int index) const;
|
||||
|
||||
read_ref rr_;
|
||||
};
|
||||
|
||||
template <typename ValueTraits>
|
||||
class array_block : public ro_array_block<ValueTraits> {
|
||||
public:
|
||||
typedef typename ValueTraits::value_type value_type;
|
||||
typedef block_manager<>::write_ref write_ref;
|
||||
|
||||
array_block(write_ref wr);
|
||||
|
||||
// No virtual methods, so no need for a virtual destructor.
|
||||
// Not really sure inheritance is the right relationship
|
||||
// though.
|
||||
|
||||
void set(unsigned index, value_type const &v);
|
||||
void inc_all_entries(typename ValueTraits::ref_counter &rc);
|
||||
void dec_all_entries(typename ValueTraits::ref_counter &rc);
|
||||
|
||||
block_address address() const;
|
||||
|
||||
// FIXME: why isn't this visible?
|
||||
//using ro_array_block<ValueTraits>::nr_entries();
|
||||
|
||||
private:
|
||||
void *element_at(unsigned int index);
|
||||
|
||||
write_ref wr_;
|
||||
};
|
||||
|
||||
class BlockRefCounter {
|
||||
public:
|
||||
void inc(uint64_t const &v) {}
|
||||
void dec(uint64_t const &v) {}
|
||||
};
|
||||
|
||||
template <typename ValueTraits>
|
||||
struct array_block_traits {
|
||||
typedef base::__le64 disk_type;
|
||||
typedef array_block<ValueTraits> value_type;
|
||||
typedef NoOpRefCounter<value_type> ref_counter;
|
||||
|
||||
static void unpack(disk_type const &disk, value_type &value) {
|
||||
value = base::to_cpu<uint64_t>(disk);
|
||||
}
|
||||
|
||||
static void pack(value_type const &value, disk_type &disk) {
|
||||
disk = base::to_disk<base::__le64>(value);
|
||||
}
|
||||
};
|
||||
|
||||
template <typename ValueTraits>
|
||||
class array {
|
||||
public:
|
||||
typedef typename persistent_data::transaction_manager::ptr tm_ptr;
|
||||
|
||||
typedef boost::shared_ptr<array<ValueTraits> > ptr;
|
||||
typedef typename ValueTraits::value_type value_type;
|
||||
|
||||
array(tm_ptr tm,
|
||||
typename ValueTraits::ref_counter rc,
|
||||
unsigned nr_entries,
|
||||
value_type const &default_value);
|
||||
|
||||
array(tm_ptr tm,
|
||||
typename ValueTraits::ref_counter rc,
|
||||
block_address root);
|
||||
|
||||
void set_root(block_address root);
|
||||
block_address get_root() const;
|
||||
|
||||
void destroy();
|
||||
|
||||
void grow(unsigned old_size, unsigned new_size, value_type const &v);
|
||||
void shrink(unsigned old_size, unsigned new_size);
|
||||
|
||||
value_type const &get(unsigned index) const;
|
||||
void set(unsigned index, value_type const &value);
|
||||
|
||||
|
||||
private:
|
||||
array_block<ValueTraits> new_ablock();
|
||||
ro_array_block<ValueTraits> get_ablock(unsigned block_index) const;
|
||||
array_block<ValueTraits> shadow_ablock(unsigned block_index);
|
||||
|
||||
void fill_tail_block(array_block<ValueTraits> &ab,
|
||||
value_type v,
|
||||
unsigned nr_entries);
|
||||
void insert_full_blocks(unsigned begin_index, unsigned end_index,
|
||||
value_type v);
|
||||
void insert_tail_block(unsigned index, unsigned nr_entries, value_type v);
|
||||
|
||||
|
||||
tm_ptr tm_;
|
||||
bool destroy_;
|
||||
unsigned entries_per_block_; // FIXME: initialise
|
||||
btree<1, array_block_traits<ValueTraits> > block_tree_;
|
||||
typename ValueTraits::ref_counter rc_;
|
||||
};
|
||||
}
|
||||
|
||||
#include "array.tcc"
|
||||
|
||||
//----------------------------------------------------------------
|
||||
|
||||
#endif
|
290
persistent-data/data-structures/array.tcc
Normal file
290
persistent-data/data-structures/array.tcc
Normal file
@ -0,0 +1,290 @@
|
||||
// Copyright (C) 2012 Red Hat, Inc. All rights reserved.
|
||||
//
|
||||
// This file is part of the thin-provisioning-tools source.
|
||||
//
|
||||
// thin-provisioning-tools is free software: you can redistribute it
|
||||
// and/or modify it under the terms of the GNU General Public License
|
||||
// as published by the Free Software Foundation, either version 3 of
|
||||
// the License, or (at your option) any later version.
|
||||
//
|
||||
// thin-provisioning-tools is distributed in the hope that it will be
|
||||
// useful, but WITHOUT ANY WARRANTY; without even the implied warranty
|
||||
// of MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
|
||||
// GNU General Public License for more details.
|
||||
//
|
||||
// You should have received a copy of the GNU General Public License along
|
||||
// with thin-provisioning-tools. If not, see
|
||||
// <http://www.gnu.org/licenses/>.
|
||||
|
||||
using namespace base;
|
||||
using namespace persistent_data;
|
||||
|
||||
//----------------------------------------------------------------
|
||||
|
||||
namespace {
|
||||
uint32_t const ARRAY_CSUM_XOR = 595846735;
|
||||
|
||||
struct array_block_validator : public block_manager<>::validator {
|
||||
virtual void check(buffer<> const &b, block_address location) const {
|
||||
array_block_disk const *data = reinterpret_cast<array_block_disk const *>(&b);
|
||||
crc32c sum(ARRAY_CSUM_XOR);
|
||||
sum.append(&data->max_entries, MD_BLOCK_SIZE - sizeof(uint32_t));
|
||||
if (sum.get_sum() != to_cpu<uint32_t>(data->csum))
|
||||
throw checksum_error("bad checksum in array block node");
|
||||
|
||||
if (to_cpu<uint64_t>(data->blocknr) != location)
|
||||
throw checksum_error("bad block nr in array block");
|
||||
}
|
||||
|
||||
virtual void prepare(buffer<> &b, block_address location) const {
|
||||
array_block_disk *data = reinterpret_cast<array_block_disk *>(&b);
|
||||
data->blocknr = to_disk<base::__le64, uint64_t>(location);
|
||||
|
||||
crc32c sum(ARRAY_CSUM_XOR);
|
||||
sum.append(&data->max_entries, MD_BLOCK_SIZE - sizeof(uint32_t));
|
||||
data->csum = to_disk<base::__le32>(sum.get_sum());
|
||||
}
|
||||
};
|
||||
|
||||
struct array_dim {
|
||||
array_dim(unsigned nr_entries, unsigned entries_per_block)
|
||||
: nr_full_blocks(nr_entries / entries_per_block),
|
||||
nr_entries_in_last_block(nr_entries % entries_per_block) {
|
||||
}
|
||||
|
||||
unsigned nr_full_blocks;
|
||||
unsigned nr_entries_in_last_block;
|
||||
};
|
||||
|
||||
unsigned calc_max_entries(size_t value_size, size_t block_size)
|
||||
{
|
||||
return (block_size - sizeof(struct array_block_disk)) / value_size;
|
||||
}
|
||||
}
|
||||
|
||||
//----------------------------------------------------------------
|
||||
|
||||
template <typename ValueTraits>
|
||||
array<ValueTraits>::array(typename persistent_data::transaction_manager::ptr tm,
|
||||
typename ValueTraits::ref_counter rc,
|
||||
unsigned nr_entries,
|
||||
value_type const &default_value)
|
||||
: tm_(tm),
|
||||
destroy_(false),
|
||||
block_tree_(tm, array_block_traits<ValueTraits>()),
|
||||
entries_per_block_(calc_max_entries(sizeof(value_type), MD_BLOCK_SIZE)),
|
||||
rc_(rc)
|
||||
{
|
||||
}
|
||||
|
||||
template <typename ValueTraits>
|
||||
array<ValueTraits>::array(typename persistent_data::transaction_manager::ptr tm,
|
||||
typename ValueTraits::ref_counter rc,
|
||||
block_address root)
|
||||
: tm_(tm),
|
||||
destroy_(false),
|
||||
block_tree_(tm, root, array_block_traits<ValueTraits>()),
|
||||
entries_per_block_(calc_max_entries(sizeof(value_type), MD_BLOCK_SIZE)),
|
||||
rc_(rc)
|
||||
{
|
||||
}
|
||||
|
||||
template <typename ValueTraits>
|
||||
void
|
||||
array<ValueTraits>::set_root(block_address root)
|
||||
{
|
||||
block_tree_.set_root(root);
|
||||
}
|
||||
|
||||
template <typename ValueTraits>
|
||||
block_address
|
||||
array<ValueTraits>::get_root() const
|
||||
{
|
||||
return block_tree_.get_root();
|
||||
}
|
||||
|
||||
template <typename ValueTraits>
|
||||
void
|
||||
array<ValueTraits>::destroy()
|
||||
{
|
||||
block_tree_.destroy();
|
||||
}
|
||||
|
||||
template <typename ValueTraits>
|
||||
void
|
||||
array<ValueTraits>::grow(unsigned old_size, unsigned new_size,
|
||||
typename ValueTraits::value_type const &v)
|
||||
{
|
||||
array_dim old_dim(old_size, entries_per_block_);
|
||||
array_dim new_dim(new_size, entries_per_block_);
|
||||
|
||||
if (new_dim.nr_full_blocks > old_dim.nr_full_blocks) {
|
||||
if (old_dim.nr_entries_in_last_block > 0) {
|
||||
array_block<ValueTraits> ab = shadow_ablock(old_dim.nr_full_blocks);
|
||||
fill_tail_block(ab, v, entries_per_block_);
|
||||
}
|
||||
|
||||
insert_full_blocks(old_dim.nr_full_blocks, new_dim.nr_full_blocks + 1, v);
|
||||
insert_tail_block(new_dim.nr_full_blocks, new_dim.nr_entries_in_last_block, v);
|
||||
} else {
|
||||
array_block<ValueTraits> ab = get_ablock(new_dim.nr_full_blocks - 1u);
|
||||
fill_tail_block(ab, v, new_dim.nr_entries_in_last_block);
|
||||
}
|
||||
}
|
||||
|
||||
template <typename ValueTraits>
|
||||
void
|
||||
array<ValueTraits>::shrink(unsigned old_size, unsigned new_size)
|
||||
{
|
||||
|
||||
}
|
||||
|
||||
template <typename ValueTraits>
|
||||
typename array<ValueTraits>::value_type const &
|
||||
array<ValueTraits>::get(unsigned index) const
|
||||
{
|
||||
array_block<ValueTraits> ab = get_ablock(index / entries_per_block_);
|
||||
return ab.get(index % entries_per_block_);
|
||||
}
|
||||
|
||||
template <typename ValueTraits>
|
||||
void
|
||||
array<ValueTraits>::set(unsigned index, value_type const &value)
|
||||
{
|
||||
array_block<ValueTraits> ab = shadow_ablock(index / entries_per_block_);
|
||||
ab.set(index % entries_per_block_, value);
|
||||
}
|
||||
|
||||
template <typename ValueTraits>
|
||||
ro_array_block<ValueTraits>
|
||||
array<ValueTraits>::get_ablock(unsigned block_index) const
|
||||
{
|
||||
return ro_array_block<ValueTraits>(tm_->read_lock(block_index));
|
||||
}
|
||||
|
||||
template <typename ValueTraits>
|
||||
array_block<ValueTraits>
|
||||
array<ValueTraits>::shadow_ablock(unsigned block_index)
|
||||
{
|
||||
typedef typename block_manager<>::write_ref write_ref;
|
||||
|
||||
transaction_manager::validator v(new array_block_validator);
|
||||
std::pair<write_ref, bool> p = tm_->shadow(block_index, v);
|
||||
array_block<ValueTraits> ab(p.first);
|
||||
|
||||
if (p.second)
|
||||
ab.inc_all_entries();
|
||||
|
||||
uint64_t key[1];
|
||||
key[0] = block_index;
|
||||
block_tree_.insert(key, ab.get_location());
|
||||
|
||||
return ab;
|
||||
}
|
||||
|
||||
template <typename ValueTraits>
|
||||
void
|
||||
array<ValueTraits>::fill_tail_block(array_block<ValueTraits> &ab,
|
||||
value_type v,
|
||||
unsigned nr_entries)
|
||||
{
|
||||
for (unsigned i = ab.nr_entries(); i < nr_entries; i++)
|
||||
ab.set(i, v);
|
||||
}
|
||||
|
||||
template <typename ValueTraits>
|
||||
void
|
||||
array<ValueTraits>::insert_full_blocks(unsigned begin_index,
|
||||
unsigned end_index,
|
||||
value_type v)
|
||||
{
|
||||
array_block<ValueTraits> ab = new_ablock();
|
||||
space_map::ptr sm = tm_->get_sm();
|
||||
|
||||
for (unsigned i = 0; i < entries_per_block_; i++)
|
||||
ab.set(i, v);
|
||||
|
||||
for (uint64_t b = begin_index; b < end_index; b++) {
|
||||
block_tree_.insert(b, ab);
|
||||
sm->inc(ab.address());
|
||||
}
|
||||
|
||||
sm->dec(ab.adress());
|
||||
}
|
||||
|
||||
template <typename ValueTraits>
|
||||
void
|
||||
array<ValueTraits>::insert_tail_block(unsigned index,
|
||||
unsigned nr_entries,
|
||||
value_type v)
|
||||
{
|
||||
array_block<ValueTraits> ab = new_ablock();
|
||||
|
||||
for (unsigned i = 0; i < nr_entries; i++)
|
||||
ab.set(i, v);
|
||||
|
||||
block_tree_.insert(index, ab);
|
||||
}
|
||||
|
||||
//----------------------------------------------------------------
|
||||
|
||||
template <typename ValueTraits>
|
||||
ro_array_block<ValueTraits>::ro_array_block(read_ref rr)
|
||||
: rr_(rr)
|
||||
{
|
||||
}
|
||||
|
||||
template <typename ValueTraits>
|
||||
unsigned
|
||||
ro_array_block<ValueTraits>::nr_entries() const
|
||||
{
|
||||
array_block_disk const *data =
|
||||
reinterpret_cast<array_block_disk const *>(&rr_.data());
|
||||
|
||||
return to_cpu<uint32_t>(data->nr_entries);
|
||||
}
|
||||
|
||||
template <typename ValueTraits>
|
||||
typename ValueTraits::value_type
|
||||
ro_array_block<ValueTraits>::get(unsigned index) const
|
||||
{
|
||||
value_type v;
|
||||
ValueTraits::unpack(element_at(index), v);
|
||||
return v;
|
||||
}
|
||||
|
||||
template <typename ValueTraits>
|
||||
array_block<ValueTraits>::array_block(write_ref wr)
|
||||
: wr_(wr)
|
||||
{
|
||||
}
|
||||
|
||||
template <typename ValueTraits>
|
||||
void
|
||||
array_block<ValueTraits>::set(unsigned index, value_type const &v)
|
||||
{
|
||||
void *elt = element_at(index);
|
||||
ValueTraits::pack(v, element_at(index));
|
||||
}
|
||||
|
||||
template <typename ValueTraits>
|
||||
void
|
||||
array_block<ValueTraits>::inc_all_entries(typename ValueTraits::ref_counter &rc)
|
||||
{
|
||||
unsigned nr = ro_array_block<ValueTraits>::nr_entries();
|
||||
|
||||
for (unsigned i = 0; i < nr; i++)
|
||||
rc.inc(ro_array_block<ValueTraits>::get(i));
|
||||
}
|
||||
|
||||
template <typename ValueTraits>
|
||||
void
|
||||
array_block<ValueTraits>::dec_all_entries(typename ValueTraits::ref_counter &rc)
|
||||
{
|
||||
unsigned nr = ro_array_block<ValueTraits>::nr_entries();
|
||||
|
||||
for (unsigned i = 0; i < nr; i++)
|
||||
rc.dec(ro_array_block<ValueTraits>::get(i));
|
||||
}
|
||||
|
||||
//----------------------------------------------------------------
|
371
persistent-data/data-structures/btree.h
Normal file
371
persistent-data/data-structures/btree.h
Normal file
@ -0,0 +1,371 @@
|
||||
// Copyright (C) 2011 Red Hat, Inc. All rights reserved.
|
||||
//
|
||||
// This file is part of the thin-provisioning-tools source.
|
||||
//
|
||||
// thin-provisioning-tools is free software: you can redistribute it
|
||||
// and/or modify it under the terms of the GNU General Public License
|
||||
// as published by the Free Software Foundation, either version 3 of
|
||||
// the License, or (at your option) any later version.
|
||||
//
|
||||
// thin-provisioning-tools is distributed in the hope that it will be
|
||||
// useful, but WITHOUT ANY WARRANTY; without even the implied warranty
|
||||
// of MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
|
||||
// GNU General Public License for more details.
|
||||
//
|
||||
// You should have received a copy of the GNU General Public License along
|
||||
// with thin-provisioning-tools. If not, see
|
||||
// <http://www.gnu.org/licenses/>.
|
||||
|
||||
#ifndef BTREE_H
|
||||
#define BTREE_H
|
||||
|
||||
#include "persistent-data/endian_utils.h"
|
||||
#include "persistent-data/transaction_manager.h"
|
||||
|
||||
#include <boost/noncopyable.hpp>
|
||||
#include <boost/optional.hpp>
|
||||
#include <list>
|
||||
|
||||
//----------------------------------------------------------------
|
||||
|
||||
namespace persistent_data {
|
||||
|
||||
template <typename ValueType>
|
||||
class NoOpRefCounter {
|
||||
public:
|
||||
void inc(ValueType const &v) {}
|
||||
void dec(ValueType const &v) {}
|
||||
};
|
||||
|
||||
struct uint64_traits {
|
||||
typedef base::__le64 disk_type;
|
||||
typedef uint64_t value_type;
|
||||
typedef NoOpRefCounter<uint64_t> ref_counter;
|
||||
|
||||
static void unpack(disk_type const &disk, value_type &value) {
|
||||
value = base::to_cpu<uint64_t>(disk);
|
||||
}
|
||||
|
||||
static void pack(value_type const &value, disk_type &disk) {
|
||||
disk = base::to_disk<base::__le64>(value);
|
||||
}
|
||||
};
|
||||
|
||||
namespace btree_detail {
|
||||
using namespace base;
|
||||
using namespace std;
|
||||
using namespace boost;
|
||||
|
||||
uint32_t const BTREE_CSUM_XOR = 121107;
|
||||
|
||||
//------------------------------------------------
|
||||
// On disk data layout for btree nodes
|
||||
enum node_flags {
|
||||
INTERNAL_NODE = 1,
|
||||
LEAF_NODE = 1 << 1
|
||||
};
|
||||
|
||||
struct node_header {
|
||||
__le32 csum;
|
||||
__le32 flags;
|
||||
__le64 blocknr; /* which block this node is supposed to live in */
|
||||
|
||||
__le32 nr_entries;
|
||||
__le32 max_entries;
|
||||
__le32 value_size;
|
||||
__le32 padding;
|
||||
} __attribute__((packed));
|
||||
|
||||
struct disk_node {
|
||||
struct node_header header;
|
||||
__le64 keys[0];
|
||||
} __attribute__((packed));
|
||||
|
||||
enum node_type {
|
||||
INTERNAL,
|
||||
LEAF
|
||||
};
|
||||
|
||||
//------------------------------------------------
|
||||
// Class that acts as an interface over the raw little endian btree
|
||||
// node data.
|
||||
template <typename ValueTraits>
|
||||
class node_ref {
|
||||
public:
|
||||
explicit node_ref(block_address b, disk_node *raw);
|
||||
|
||||
uint32_t get_checksum() const;
|
||||
|
||||
block_address get_location() const {
|
||||
return location_;
|
||||
}
|
||||
|
||||
block_address get_block_nr() const;
|
||||
|
||||
node_type get_type() const;
|
||||
void set_type(node_type t);
|
||||
|
||||
unsigned get_nr_entries() const;
|
||||
void set_nr_entries(unsigned n);
|
||||
|
||||
unsigned get_max_entries() const;
|
||||
void set_max_entries(unsigned n);
|
||||
|
||||
// FIXME: remove this, and get the constructor to do it.
|
||||
void set_max_entries(); // calculates the max for you.
|
||||
|
||||
size_t get_value_size() const;
|
||||
void set_value_size(size_t);
|
||||
|
||||
uint64_t key_at(unsigned i) const;
|
||||
void set_key(unsigned i, uint64_t k);
|
||||
|
||||
typename ValueTraits::value_type value_at(unsigned i) const;
|
||||
void set_value(unsigned i,
|
||||
typename ValueTraits::value_type const &v);
|
||||
|
||||
// Increments the nr_entries field
|
||||
void insert_at(unsigned i,
|
||||
uint64_t key,
|
||||
typename ValueTraits::value_type const &v);
|
||||
|
||||
// Does not increment nr_entries
|
||||
void overwrite_at(unsigned i,
|
||||
uint64_t key,
|
||||
typename ValueTraits::value_type const &v);
|
||||
|
||||
// Copies entries from another node, appends them
|
||||
// to the back of this node. Adjusts nr_entries.
|
||||
void copy_entries(node_ref const &rhs,
|
||||
unsigned begin,
|
||||
unsigned end);
|
||||
|
||||
// Various searches
|
||||
int bsearch(uint64_t key, int want_hi) const;
|
||||
optional<unsigned> exact_search(uint64_t key) const;
|
||||
int lower_bound(uint64_t key) const;
|
||||
|
||||
template <typename RefCounter>
|
||||
void inc_children(RefCounter &rc);
|
||||
|
||||
disk_node *raw() {
|
||||
return raw_;
|
||||
}
|
||||
|
||||
disk_node const *raw() const {
|
||||
return raw_;
|
||||
}
|
||||
|
||||
private:
|
||||
static unsigned calc_max_entries(void);
|
||||
|
||||
void *key_ptr(unsigned i) const;
|
||||
void *value_ptr(unsigned i) const;
|
||||
|
||||
block_address location_;
|
||||
disk_node *raw_;
|
||||
};
|
||||
|
||||
//------------------------------------------------
|
||||
//
|
||||
template <typename ValueTraits>
|
||||
node_ref<ValueTraits>
|
||||
to_node(typename block_manager<>::read_ref &b)
|
||||
{
|
||||
// FIXME: this should return a const read_ref somehow.
|
||||
return node_ref<ValueTraits>(
|
||||
b.get_location(),
|
||||
reinterpret_cast<disk_node *>(
|
||||
const_cast<unsigned char *>(b.data().raw())));
|
||||
}
|
||||
|
||||
template <typename ValueTraits>
|
||||
node_ref<ValueTraits>
|
||||
to_node(typename block_manager<>::write_ref &b)
|
||||
{
|
||||
return node_ref<ValueTraits>(
|
||||
b.get_location(),
|
||||
reinterpret_cast<disk_node *>(
|
||||
const_cast<unsigned char *>(b.data().raw())));
|
||||
}
|
||||
|
||||
class ro_spine : private noncopyable {
|
||||
public:
|
||||
ro_spine(transaction_manager::ptr tm)
|
||||
: tm_(tm) {
|
||||
}
|
||||
|
||||
void step(block_address b);
|
||||
|
||||
template <typename ValueTraits>
|
||||
node_ref<ValueTraits> get_node() {
|
||||
return to_node<ValueTraits>(spine_.back());
|
||||
}
|
||||
|
||||
private:
|
||||
transaction_manager::ptr tm_;
|
||||
std::list<block_manager<>::read_ref> spine_;
|
||||
};
|
||||
|
||||
class shadow_spine : private noncopyable {
|
||||
public:
|
||||
typedef transaction_manager::read_ref read_ref;
|
||||
typedef transaction_manager::write_ref write_ref;
|
||||
|
||||
shadow_spine(transaction_manager::ptr tm)
|
||||
: tm_(tm) {
|
||||
}
|
||||
|
||||
// true if the children of the shadow need incrementing
|
||||
bool step(block_address b);
|
||||
void step(transaction_manager::write_ref b) {
|
||||
spine_.push_back(b);
|
||||
if (spine_.size() == 1)
|
||||
root_ = spine_.front().get_location();
|
||||
else if (spine_.size() > 2)
|
||||
spine_.pop_front();
|
||||
}
|
||||
|
||||
void pop() {
|
||||
spine_.pop_back();
|
||||
}
|
||||
|
||||
template <typename ValueTraits>
|
||||
node_ref<ValueTraits> get_node() {
|
||||
return to_node<ValueTraits>(spine_.back());
|
||||
}
|
||||
|
||||
block_address get_block() const {
|
||||
return spine_.back().get_location();
|
||||
}
|
||||
|
||||
bool has_parent() const {
|
||||
return spine_.size() > 1;
|
||||
}
|
||||
|
||||
node_ref<uint64_traits> get_parent() {
|
||||
if (spine_.size() < 2)
|
||||
throw std::runtime_error("no parent");
|
||||
|
||||
return to_node<uint64_traits>(spine_.front());
|
||||
}
|
||||
|
||||
block_address get_parent_location() const {
|
||||
return spine_.front().get_location();
|
||||
}
|
||||
|
||||
block_address get_root() const {
|
||||
return root_;
|
||||
}
|
||||
|
||||
private:
|
||||
transaction_manager::ptr tm_;
|
||||
std::list<block_manager<>::write_ref> spine_;
|
||||
block_address root_;
|
||||
};
|
||||
}
|
||||
|
||||
template <unsigned Levels, typename ValueTraits>
|
||||
class btree {
|
||||
public:
|
||||
typedef boost::shared_ptr<btree<Levels, ValueTraits> > ptr;
|
||||
|
||||
typedef uint64_t key[Levels];
|
||||
typedef typename ValueTraits::value_type value_type;
|
||||
typedef boost::optional<value_type> maybe_value;
|
||||
typedef boost::optional<std::pair<unsigned, value_type> > maybe_pair;
|
||||
typedef typename block_manager<>::read_ref read_ref;
|
||||
typedef typename block_manager<>::write_ref write_ref;
|
||||
typedef typename btree_detail::node_ref<ValueTraits> leaf_node;
|
||||
typedef typename btree_detail::node_ref<uint64_traits> internal_node;
|
||||
|
||||
btree(typename persistent_data::transaction_manager::ptr tm,
|
||||
typename ValueTraits::ref_counter rc);
|
||||
|
||||
btree(typename transaction_manager::ptr tm,
|
||||
block_address root,
|
||||
typename ValueTraits::ref_counter rc);
|
||||
|
||||
~btree();
|
||||
|
||||
maybe_value lookup(key const &key) const;
|
||||
maybe_pair lookup_le(key const &key) const;
|
||||
maybe_pair lookup_ge(key const &key) const;
|
||||
|
||||
void insert(key const &key, typename ValueTraits::value_type const &value);
|
||||
void remove(key const &key);
|
||||
|
||||
void set_root(block_address root);
|
||||
block_address get_root() const;
|
||||
|
||||
ptr clone() const;
|
||||
|
||||
// free the on disk btree when the destructor is called
|
||||
void destroy();
|
||||
|
||||
|
||||
// Derive a class from this base class if you need to
|
||||
// inspect the individual nodes that make up a btree.
|
||||
class visitor {
|
||||
public:
|
||||
virtual ~visitor() {}
|
||||
typedef boost::shared_ptr<visitor> ptr;
|
||||
|
||||
// The bool return values indicate whether the walk
|
||||
// should be continued into sub trees of the node (true == continue).
|
||||
virtual bool visit_internal(unsigned level, bool sub_root, boost::optional<uint64_t> key,
|
||||
internal_node const &n) = 0;
|
||||
virtual bool visit_internal_leaf(unsigned level, bool sub_root, boost::optional<uint64_t> key,
|
||||
internal_node const &n) = 0;
|
||||
virtual bool visit_leaf(unsigned level, bool sub_root, boost::optional<uint64_t> key,
|
||||
leaf_node const &n) = 0;
|
||||
|
||||
virtual void visit_complete() {}
|
||||
};
|
||||
|
||||
// Walks the tree in depth first order
|
||||
void visit(typename visitor::ptr visitor) const;
|
||||
|
||||
private:
|
||||
template <typename ValueTraits2, typename Search>
|
||||
optional<typename ValueTraits2::value_type>
|
||||
lookup_raw(btree_detail::ro_spine &spine, block_address block, uint64_t key) const;
|
||||
|
||||
template <typename ValueTraits2>
|
||||
void split_node(btree_detail::shadow_spine &spine,
|
||||
block_address parent_index,
|
||||
uint64_t key,
|
||||
bool top);
|
||||
|
||||
template <typename ValueTraits2>
|
||||
void split_beneath(btree_detail::shadow_spine &spine, uint64_t key);
|
||||
|
||||
template <typename ValueTraits2>
|
||||
void split_sibling(btree_detail::shadow_spine &spine,
|
||||
block_address parent_index,
|
||||
uint64_t key);
|
||||
|
||||
template <typename ValueTraits2>
|
||||
bool
|
||||
insert_location(btree_detail::shadow_spine &spine,
|
||||
block_address block,
|
||||
uint64_t key,
|
||||
int *index);
|
||||
|
||||
void walk_tree(typename visitor::ptr visitor,
|
||||
unsigned level, bool root, boost::optional<uint64_t> key,
|
||||
block_address b) const;
|
||||
|
||||
typename persistent_data::transaction_manager::ptr tm_;
|
||||
bool destroy_;
|
||||
block_address root_;
|
||||
NoOpRefCounter<uint64_t> internal_rc_;
|
||||
typename ValueTraits::ref_counter rc_;
|
||||
};
|
||||
};
|
||||
|
||||
#include "btree.tcc"
|
||||
|
||||
//----------------------------------------------------------------
|
||||
|
||||
#endif
|
782
persistent-data/data-structures/btree.tcc
Normal file
782
persistent-data/data-structures/btree.tcc
Normal file
@ -0,0 +1,782 @@
|
||||
// Copyright (C) 2011 Red Hat, Inc. All rights reserved.
|
||||
//
|
||||
// This file is part of the thin-provisioning-tools source.
|
||||
//
|
||||
// thin-provisioning-tools is free software: you can redistribute it
|
||||
// and/or modify it under the terms of the GNU General Public License
|
||||
// as published by the Free Software Foundation, either version 3 of
|
||||
// the License, or (at your option) any later version.
|
||||
//
|
||||
// thin-provisioning-tools is distributed in the hope that it will be
|
||||
// useful, but WITHOUT ANY WARRANTY; without even the implied warranty
|
||||
// of MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
|
||||
// GNU General Public License for more details.
|
||||
//
|
||||
// You should have received a copy of the GNU General Public License along
|
||||
// with thin-provisioning-tools. If not, see
|
||||
// <http://www.gnu.org/licenses/>.
|
||||
|
||||
#include "btree.h"
|
||||
|
||||
#include "persistent-data/errors.h"
|
||||
#include "persistent-data/checksum.h"
|
||||
#include "persistent-data/transaction_manager.h"
|
||||
|
||||
#include <iostream>
|
||||
|
||||
using namespace base;
|
||||
using namespace btree_detail;
|
||||
using namespace persistent_data;
|
||||
using namespace std;
|
||||
|
||||
//----------------------------------------------------------------
|
||||
|
||||
namespace {
|
||||
struct btree_node_validator : public block_manager<>::validator {
|
||||
virtual void check(buffer<> const &b, block_address location) const {
|
||||
disk_node const *data = reinterpret_cast<disk_node const *>(&b);
|
||||
node_header const *n = &data->header;
|
||||
crc32c sum(BTREE_CSUM_XOR);
|
||||
sum.append(&n->flags, MD_BLOCK_SIZE - sizeof(uint32_t));
|
||||
if (sum.get_sum() != to_cpu<uint32_t>(n->csum))
|
||||
throw checksum_error("bad checksum in btree node");
|
||||
|
||||
if (to_cpu<uint64_t>(n->blocknr) != location)
|
||||
throw checksum_error("bad block nr in btree node");
|
||||
}
|
||||
|
||||
virtual void prepare(buffer<> &b, block_address location) const {
|
||||
disk_node *data = reinterpret_cast<disk_node *>(&b);
|
||||
node_header *n = &data->header;
|
||||
n->blocknr = to_disk<base::__le64, uint64_t>(location);
|
||||
|
||||
crc32c sum(BTREE_CSUM_XOR);
|
||||
sum.append(&n->flags, MD_BLOCK_SIZE - sizeof(uint32_t));
|
||||
n->csum = to_disk<base::__le32>(sum.get_sum());
|
||||
}
|
||||
};
|
||||
|
||||
block_manager<>::validator::ptr
|
||||
btree_validator() {
|
||||
return block_manager<>::validator::ptr(new btree_node_validator);
|
||||
}
|
||||
}
|
||||
|
||||
//----------------------------------------------------------------
|
||||
|
||||
inline void
|
||||
ro_spine::step(block_address b)
|
||||
{
|
||||
spine_.push_back(tm_->read_lock(b, btree_validator()));
|
||||
if (spine_.size() > 2)
|
||||
spine_.pop_front();
|
||||
}
|
||||
|
||||
inline bool
|
||||
shadow_spine::step(block_address b)
|
||||
{
|
||||
pair<write_ref, bool> p = tm_->shadow(b, btree_validator());
|
||||
try {
|
||||
step(p.first);
|
||||
} catch (...) {
|
||||
tm_->get_sm()->dec(p.first.get_location());
|
||||
throw;
|
||||
}
|
||||
return p.second;
|
||||
}
|
||||
|
||||
//----------------------------------------------------------------
|
||||
|
||||
template <typename ValueTraits>
|
||||
node_ref<ValueTraits>::node_ref(block_address location, disk_node *raw)
|
||||
: location_(location),
|
||||
raw_(raw)
|
||||
{
|
||||
}
|
||||
|
||||
template <typename ValueTraits>
|
||||
uint32_t
|
||||
node_ref<ValueTraits>::get_checksum() const
|
||||
{
|
||||
return to_cpu<uint32_t>(raw_->header.csum);
|
||||
}
|
||||
|
||||
template <typename ValueTraits>
|
||||
block_address
|
||||
node_ref<ValueTraits>::get_block_nr() const
|
||||
{
|
||||
return to_cpu<uint64_t>(raw_->header.blocknr);
|
||||
}
|
||||
|
||||
template <typename ValueTraits>
|
||||
btree_detail::node_type
|
||||
node_ref<ValueTraits>::get_type() const
|
||||
{
|
||||
uint32_t flags = to_cpu<uint32_t>(raw_->header.flags);
|
||||
if (flags & INTERNAL_NODE) {
|
||||
if (flags & LEAF_NODE)
|
||||
throw runtime_error("btree node is both internal and leaf");
|
||||
return INTERNAL;
|
||||
|
||||
} else if (flags & LEAF_NODE)
|
||||
return LEAF;
|
||||
else
|
||||
throw runtime_error("unknown node type");
|
||||
}
|
||||
|
||||
template <typename ValueTraits>
|
||||
void
|
||||
node_ref<ValueTraits>::set_type(node_type t)
|
||||
{
|
||||
uint32_t flags = to_cpu<uint32_t>(raw_->header.flags);
|
||||
switch (t) {
|
||||
case INTERNAL:
|
||||
flags = INTERNAL_NODE;
|
||||
break;
|
||||
|
||||
case LEAF:
|
||||
flags = LEAF_NODE;
|
||||
break;
|
||||
}
|
||||
raw_->header.flags = to_disk<__le32>(flags);
|
||||
}
|
||||
|
||||
template <typename ValueTraits>
|
||||
unsigned
|
||||
node_ref<ValueTraits>::get_nr_entries() const
|
||||
{
|
||||
return to_cpu<uint32_t>(raw_->header.nr_entries);
|
||||
}
|
||||
|
||||
template <typename ValueTraits>
|
||||
void
|
||||
node_ref<ValueTraits>::set_nr_entries(unsigned n)
|
||||
{
|
||||
raw_->header.nr_entries = to_disk<__le32>(n);
|
||||
}
|
||||
|
||||
template <typename ValueTraits>
|
||||
unsigned
|
||||
node_ref<ValueTraits>::get_max_entries() const
|
||||
{
|
||||
return to_cpu<uint32_t>(raw_->header.max_entries);
|
||||
}
|
||||
|
||||
template <typename ValueTraits>
|
||||
void
|
||||
node_ref<ValueTraits>::set_max_entries(unsigned n)
|
||||
{
|
||||
raw_->header.max_entries = to_disk<__le32>(n);
|
||||
}
|
||||
|
||||
template <typename ValueTraits>
|
||||
void
|
||||
node_ref<ValueTraits>::set_max_entries()
|
||||
{
|
||||
set_max_entries(calc_max_entries());
|
||||
}
|
||||
|
||||
template <typename ValueTraits>
|
||||
size_t
|
||||
node_ref<ValueTraits>::get_value_size() const
|
||||
{
|
||||
return to_cpu<uint32_t>(raw_->header.value_size);
|
||||
}
|
||||
|
||||
template <typename ValueTraits>
|
||||
void
|
||||
node_ref<ValueTraits>::set_value_size(size_t s)
|
||||
{
|
||||
raw_->header.value_size = to_disk<__le32>(static_cast<uint32_t>(s));
|
||||
}
|
||||
|
||||
template <typename ValueTraits>
|
||||
uint64_t
|
||||
node_ref<ValueTraits>::key_at(unsigned i) const
|
||||
{
|
||||
if (i >= get_nr_entries())
|
||||
throw runtime_error("key index out of bounds");
|
||||
return to_cpu<uint64_t>(raw_->keys[i]);
|
||||
}
|
||||
|
||||
template <typename ValueTraits>
|
||||
void
|
||||
node_ref<ValueTraits>::set_key(unsigned i, uint64_t k)
|
||||
{
|
||||
raw_->keys[i] = to_disk<__le64>(k);
|
||||
}
|
||||
|
||||
template <typename ValueTraits>
|
||||
typename ValueTraits::value_type
|
||||
node_ref<ValueTraits>::value_at(unsigned i) const
|
||||
{
|
||||
if (i >= get_nr_entries())
|
||||
throw runtime_error("value index out of bounds");
|
||||
|
||||
// We have to copy because of alignment issues.
|
||||
typename ValueTraits::disk_type d;
|
||||
::memcpy(&d, value_ptr(i), sizeof(d));
|
||||
|
||||
typename ValueTraits::value_type v;
|
||||
ValueTraits::unpack(d, v);
|
||||
return v;
|
||||
}
|
||||
|
||||
template <typename ValueTraits>
|
||||
void
|
||||
node_ref<ValueTraits>::set_value(unsigned i,
|
||||
typename ValueTraits::value_type const &v)
|
||||
{
|
||||
typename ValueTraits::disk_type d;
|
||||
ValueTraits::pack(v, d);
|
||||
::memcpy(value_ptr(i), &d, sizeof(d));
|
||||
}
|
||||
|
||||
template <typename ValueTraits>
|
||||
void
|
||||
node_ref<ValueTraits>::insert_at(unsigned i,
|
||||
uint64_t key,
|
||||
typename ValueTraits::value_type const &v)
|
||||
{
|
||||
unsigned n = get_nr_entries();
|
||||
if ((n + 1) > get_max_entries())
|
||||
throw runtime_error("too many entries");
|
||||
|
||||
set_nr_entries(n + 1);
|
||||
::memmove(key_ptr(i + 1), key_ptr(i), sizeof(uint64_t) * (n - i));
|
||||
::memmove(value_ptr(i + 1), value_ptr(i), sizeof(typename ValueTraits::disk_type) * (n - i));
|
||||
overwrite_at(i, key, v);
|
||||
}
|
||||
|
||||
template <typename ValueTraits>
|
||||
void
|
||||
node_ref<ValueTraits>::overwrite_at(unsigned i,
|
||||
uint64_t key,
|
||||
typename ValueTraits::value_type const &v)
|
||||
{
|
||||
set_key(i, key);
|
||||
set_value(i, v);
|
||||
}
|
||||
|
||||
template <typename ValueTraits>
|
||||
void
|
||||
node_ref<ValueTraits>::copy_entries(node_ref const &rhs,
|
||||
unsigned begin,
|
||||
unsigned end)
|
||||
{
|
||||
unsigned count = end - begin;
|
||||
unsigned n = get_nr_entries();
|
||||
if ((n + count) > get_max_entries())
|
||||
throw runtime_error("too many entries");
|
||||
|
||||
::memcpy(key_ptr(n), rhs.key_ptr(begin), sizeof(uint64_t) * count);
|
||||
::memcpy(value_ptr(n), rhs.value_ptr(begin), sizeof(typename ValueTraits::disk_type) * count);
|
||||
set_nr_entries(n + count);
|
||||
}
|
||||
|
||||
template <typename ValueTraits>
|
||||
int
|
||||
node_ref<ValueTraits>::bsearch(uint64_t key, int want_hi) const
|
||||
{
|
||||
int lo = -1, hi = get_nr_entries();
|
||||
|
||||
while(hi - lo > 1) {
|
||||
int mid = lo + ((hi - lo) / 2);
|
||||
uint64_t mid_key = key_at(mid);
|
||||
|
||||
if (mid_key == key)
|
||||
return mid;
|
||||
|
||||
if (mid_key < key)
|
||||
lo = mid;
|
||||
else
|
||||
hi = mid;
|
||||
}
|
||||
|
||||
return want_hi ? hi : lo;
|
||||
}
|
||||
|
||||
template <typename ValueTraits>
|
||||
optional<unsigned>
|
||||
node_ref<ValueTraits>::exact_search(uint64_t key) const
|
||||
{
|
||||
int i = bsearch(key, 0);
|
||||
if (i < 0 || static_cast<unsigned>(i) >= get_nr_entries())
|
||||
return optional<unsigned>();
|
||||
|
||||
if (key != key_at(i))
|
||||
return optional<unsigned>();
|
||||
|
||||
return optional<unsigned>(i);
|
||||
}
|
||||
|
||||
template <typename ValueTraits>
|
||||
int
|
||||
node_ref<ValueTraits>::lower_bound(uint64_t key) const
|
||||
{
|
||||
return bsearch(key, 0);
|
||||
}
|
||||
|
||||
template <typename ValueTraits>
|
||||
unsigned
|
||||
node_ref<ValueTraits>::calc_max_entries(void)
|
||||
{
|
||||
uint32_t total;
|
||||
|
||||
// key + value
|
||||
size_t elt_size = sizeof(uint64_t) + sizeof(typename ValueTraits::disk_type);
|
||||
total = (MD_BLOCK_SIZE - sizeof(struct node_header)) / elt_size;
|
||||
return (total / 3) * 3; // rounds down
|
||||
}
|
||||
|
||||
template <typename ValueTraits>
|
||||
void *
|
||||
node_ref<ValueTraits>::key_ptr(unsigned i) const
|
||||
{
|
||||
return raw_->keys + i;
|
||||
}
|
||||
|
||||
template <typename ValueTraits>
|
||||
void *
|
||||
node_ref<ValueTraits>::value_ptr(unsigned i) const
|
||||
{
|
||||
void *value_base = &raw_->keys[to_cpu<uint32_t>(raw_->header.max_entries)];
|
||||
return static_cast<unsigned char *>(value_base) +
|
||||
sizeof(typename ValueTraits::disk_type) * i;
|
||||
}
|
||||
|
||||
template <typename ValueTraits>
|
||||
template <typename RefCounter>
|
||||
void
|
||||
node_ref<ValueTraits>::inc_children(RefCounter &rc)
|
||||
{
|
||||
unsigned nr_entries = get_nr_entries();
|
||||
for (unsigned i = 0; i < nr_entries; i++) {
|
||||
typename ValueTraits::value_type v;
|
||||
typename ValueTraits::disk_type d;
|
||||
::memcpy(&d, value_ptr(i), sizeof(d));
|
||||
ValueTraits::unpack(d, v);
|
||||
rc.inc(v);
|
||||
}
|
||||
}
|
||||
|
||||
//----------------------------------------------------------------
|
||||
|
||||
template <unsigned Levels, typename ValueTraits>
|
||||
btree<Levels, ValueTraits>::
|
||||
btree(typename transaction_manager::ptr tm,
|
||||
typename ValueTraits::ref_counter rc)
|
||||
: tm_(tm),
|
||||
destroy_(false),
|
||||
rc_(rc)
|
||||
{
|
||||
using namespace btree_detail;
|
||||
|
||||
write_ref root = tm_->new_block(btree_validator());
|
||||
|
||||
leaf_node n = to_node<ValueTraits>(root);
|
||||
n.set_type(btree_detail::LEAF);
|
||||
n.set_nr_entries(0);
|
||||
n.set_max_entries();
|
||||
n.set_value_size(sizeof(typename ValueTraits::disk_type));
|
||||
|
||||
root_ = root.get_location();
|
||||
}
|
||||
|
||||
template <unsigned Levels, typename ValueTraits>
|
||||
btree<Levels, ValueTraits>::
|
||||
btree(typename transaction_manager::ptr tm,
|
||||
block_address root,
|
||||
typename ValueTraits::ref_counter rc)
|
||||
: tm_(tm),
|
||||
destroy_(false),
|
||||
root_(root),
|
||||
rc_(rc)
|
||||
{
|
||||
}
|
||||
|
||||
template <unsigned Levels, typename ValueTraits>
|
||||
btree<Levels, ValueTraits>::~btree()
|
||||
{
|
||||
|
||||
}
|
||||
|
||||
namespace {
|
||||
template <typename ValueTraits>
|
||||
struct lower_bound_search {
|
||||
static optional<unsigned> search(btree_detail::node_ref<ValueTraits> n, uint64_t key) {
|
||||
return n.lower_bound(key);
|
||||
}
|
||||
};
|
||||
|
||||
template <typename ValueTraits>
|
||||
struct exact_search {
|
||||
static optional<unsigned> search(btree_detail::node_ref<ValueTraits> n, uint64_t key) {
|
||||
return n.exact_search(key);
|
||||
}
|
||||
};
|
||||
}
|
||||
|
||||
template <unsigned Levels, typename ValueTraits>
|
||||
typename btree<Levels, ValueTraits>::maybe_value
|
||||
btree<Levels, ValueTraits>::lookup(key const &key) const
|
||||
{
|
||||
using namespace btree_detail;
|
||||
|
||||
ro_spine spine(tm_);
|
||||
block_address root = root_;
|
||||
|
||||
for (unsigned level = 0; level < Levels - 1; ++level) {
|
||||
optional<block_address> mroot =
|
||||
lookup_raw<uint64_traits, lower_bound_search<uint64_traits> >(spine, root, key[level]);
|
||||
if (!mroot)
|
||||
return maybe_value();
|
||||
|
||||
root = *mroot;
|
||||
}
|
||||
|
||||
return lookup_raw<ValueTraits, exact_search<ValueTraits> >(spine, root, key[Levels - 1]);
|
||||
}
|
||||
|
||||
template <unsigned Levels, typename ValueTraits>
|
||||
typename btree<Levels, ValueTraits>::maybe_pair
|
||||
btree<Levels, ValueTraits>::lookup_le(key const &key) const
|
||||
{
|
||||
using namespace btree_detail;
|
||||
|
||||
return maybe_pair();
|
||||
}
|
||||
|
||||
template <unsigned Levels, typename ValueTraits>
|
||||
typename btree<Levels, ValueTraits>::maybe_pair
|
||||
btree<Levels, ValueTraits>::lookup_ge(key const &key) const
|
||||
{
|
||||
using namespace btree_detail;
|
||||
|
||||
return maybe_pair();
|
||||
}
|
||||
|
||||
template <unsigned Levels, typename ValueTraits>
|
||||
void
|
||||
btree<Levels, ValueTraits>::
|
||||
insert(key const &key,
|
||||
typename ValueTraits::value_type const &value)
|
||||
{
|
||||
using namespace btree_detail;
|
||||
|
||||
block_address block = root_;
|
||||
int index = 0; // FIXME: ???
|
||||
shadow_spine spine(tm_);
|
||||
|
||||
for (unsigned level = 0; level < Levels - 1; ++level) {
|
||||
bool need_insert = insert_location<uint64_traits>(spine, block, key[level], &index);
|
||||
|
||||
internal_node n = spine.template get_node<uint64_traits>();
|
||||
if (need_insert) {
|
||||
btree<Levels - 1, ValueTraits> new_tree(tm_, rc_);
|
||||
n.insert_at(index, key[level], new_tree.get_root());
|
||||
}
|
||||
|
||||
block = n.value_at(index);
|
||||
}
|
||||
|
||||
bool need_insert = insert_location<ValueTraits>(spine, block, key[Levels - 1], &index);
|
||||
|
||||
leaf_node n = spine.template get_node<ValueTraits>();
|
||||
if (need_insert)
|
||||
n.insert_at(index, key[Levels - 1], value);
|
||||
else
|
||||
// FIXME: check if we're overwriting with the same value.
|
||||
n.set_value(index, value);
|
||||
}
|
||||
|
||||
template <unsigned Levels, typename ValueTraits>
|
||||
void
|
||||
btree<Levels, ValueTraits>::remove(key const &key)
|
||||
{
|
||||
using namespace btree_detail;
|
||||
}
|
||||
|
||||
template <unsigned Levels, typename ValueTraits>
|
||||
block_address
|
||||
btree<Levels, ValueTraits>::get_root() const
|
||||
{
|
||||
return root_;
|
||||
}
|
||||
|
||||
template <unsigned Levels, typename ValueTraits>
|
||||
void
|
||||
btree<Levels, ValueTraits>::set_root(block_address root)
|
||||
{
|
||||
using namespace btree_detail;
|
||||
root_ = root;
|
||||
}
|
||||
|
||||
template <unsigned Levels, typename ValueTraits>
|
||||
typename btree<Levels, ValueTraits>::ptr
|
||||
btree<Levels, ValueTraits>::clone() const
|
||||
{
|
||||
tm_->get_sm()->inc(root_);
|
||||
return ptr(new btree<Levels, ValueTraits>(tm_, root_, rc_));
|
||||
}
|
||||
|
||||
#if 0
|
||||
template <unsigned Levels, typename ValueTraits>
|
||||
void
|
||||
btree<Levels, ValueTraits>::destroy()
|
||||
{
|
||||
using namespace btree_detail;
|
||||
|
||||
}
|
||||
#endif
|
||||
|
||||
template <unsigned Levels, typename _>
|
||||
template <typename ValueTraits, typename Search>
|
||||
optional<typename ValueTraits::value_type>
|
||||
btree<Levels, _>::
|
||||
lookup_raw(ro_spine &spine, block_address block, uint64_t key) const
|
||||
{
|
||||
using namespace boost;
|
||||
typedef typename ValueTraits::value_type leaf_type;
|
||||
|
||||
for (;;) {
|
||||
spine.step(block);
|
||||
node_ref<ValueTraits> leaf = spine.template get_node<ValueTraits>();
|
||||
|
||||
optional<unsigned> mi;
|
||||
if (leaf.get_type() == btree_detail::LEAF) {
|
||||
mi = Search::search(leaf, key);
|
||||
if (!mi)
|
||||
return optional<leaf_type>();
|
||||
return optional<leaf_type>(leaf.value_at(*mi));
|
||||
|
||||
}
|
||||
|
||||
mi = leaf.lower_bound(key);
|
||||
if (!mi || *mi < 0)
|
||||
return optional<leaf_type>();
|
||||
|
||||
node_ref<uint64_traits> internal = spine.template get_node<uint64_traits>();
|
||||
block = internal.value_at(*mi);
|
||||
}
|
||||
}
|
||||
|
||||
|
||||
template <unsigned Levels, typename _>
|
||||
template <typename ValueTraits>
|
||||
void
|
||||
btree<Levels, _>::
|
||||
split_node(btree_detail::shadow_spine &spine,
|
||||
block_address parent_index,
|
||||
uint64_t key,
|
||||
bool top)
|
||||
{
|
||||
node_ref<ValueTraits> n = spine.template get_node<ValueTraits>();
|
||||
if (n.get_nr_entries() == n.get_max_entries()) {
|
||||
if (top)
|
||||
split_beneath<ValueTraits>(spine, key);
|
||||
else
|
||||
split_sibling<ValueTraits>(spine, parent_index, key);
|
||||
}
|
||||
}
|
||||
|
||||
template <unsigned Levels, typename _>
|
||||
template <typename ValueTraits>
|
||||
void
|
||||
btree<Levels, _>::
|
||||
split_beneath(btree_detail::shadow_spine &spine,
|
||||
uint64_t key)
|
||||
{
|
||||
using namespace btree_detail;
|
||||
|
||||
node_type type;
|
||||
unsigned nr_left, nr_right;
|
||||
|
||||
write_ref left = tm_->new_block(btree_validator());
|
||||
node_ref<ValueTraits> l = to_node<ValueTraits>(left);
|
||||
l.set_nr_entries(0);
|
||||
l.set_max_entries();
|
||||
l.set_value_size(sizeof(typename ValueTraits::disk_type));
|
||||
|
||||
write_ref right = tm_->new_block(btree_validator());
|
||||
node_ref<ValueTraits> r = to_node<ValueTraits>(right);
|
||||
r.set_nr_entries(0);
|
||||
r.set_max_entries();
|
||||
r.set_value_size(sizeof(typename ValueTraits::disk_type));
|
||||
|
||||
{
|
||||
node_ref<ValueTraits> p = spine.template get_node<ValueTraits>();
|
||||
|
||||
if (p.get_value_size() != sizeof(typename ValueTraits::disk_type))
|
||||
throw std::runtime_error("bad value_size");
|
||||
|
||||
nr_left = p.get_nr_entries() / 2;
|
||||
nr_right = p.get_nr_entries() - nr_left;
|
||||
type = p.get_type();
|
||||
|
||||
l.set_type(type);
|
||||
l.copy_entries(p, 0, nr_left);
|
||||
|
||||
r.set_type(type);
|
||||
r.copy_entries(p, nr_left, nr_left + nr_right);
|
||||
}
|
||||
|
||||
{
|
||||
// The parent may have changed value type, so we re-get it.
|
||||
internal_node p = spine.template get_node<uint64_traits>();
|
||||
p.set_type(btree_detail::INTERNAL);
|
||||
p.set_max_entries();
|
||||
p.set_nr_entries(2);
|
||||
p.set_value_size(sizeof(typename uint64_traits::disk_type));
|
||||
|
||||
p.overwrite_at(0, l.key_at(0), left.get_location());
|
||||
p.overwrite_at(1, r.key_at(0), right.get_location());
|
||||
}
|
||||
|
||||
if (key < r.key_at(0))
|
||||
spine.step(left);
|
||||
else
|
||||
spine.step(right);
|
||||
}
|
||||
|
||||
template <unsigned Levels, typename _>
|
||||
template <typename ValueTraits>
|
||||
void
|
||||
btree<Levels, _>::
|
||||
split_sibling(btree_detail::shadow_spine &spine,
|
||||
block_address parent_index,
|
||||
uint64_t key)
|
||||
{
|
||||
using namespace btree_detail;
|
||||
|
||||
node_ref<ValueTraits> l = spine.template get_node<ValueTraits>();
|
||||
block_address left = spine.get_block();
|
||||
|
||||
write_ref right = tm_->new_block(btree_validator());
|
||||
node_ref<ValueTraits> r = to_node<ValueTraits>(right);
|
||||
|
||||
unsigned nr_left = l.get_nr_entries() / 2;
|
||||
unsigned nr_right = l.get_nr_entries() - nr_left;
|
||||
|
||||
r.set_nr_entries(0);
|
||||
r.set_max_entries();
|
||||
r.set_type(l.get_type());
|
||||
r.set_value_size(sizeof(typename ValueTraits::disk_type));
|
||||
r.copy_entries(l, nr_left, nr_left + nr_right);
|
||||
l.set_nr_entries(nr_left);
|
||||
|
||||
internal_node p = spine.get_parent();
|
||||
p.overwrite_at(parent_index, l.key_at(0), left);
|
||||
p.insert_at(parent_index + 1, r.key_at(0), right.get_location());
|
||||
|
||||
spine.pop();
|
||||
if (key < r.key_at(0))
|
||||
spine.step(left);
|
||||
else
|
||||
spine.step(right);
|
||||
}
|
||||
|
||||
// Returns true if we need a new insertion, rather than overwrite.
|
||||
template <unsigned Levels, typename _>
|
||||
template <typename ValueTraits>
|
||||
bool
|
||||
btree<Levels, _>::
|
||||
insert_location(btree_detail::shadow_spine &spine,
|
||||
block_address block,
|
||||
uint64_t key,
|
||||
int *index)
|
||||
{
|
||||
using namespace btree_detail;
|
||||
|
||||
bool top = true; // this isn't the same as spine.has_parent()
|
||||
int i = *index;
|
||||
bool inc = false;
|
||||
|
||||
for (;;) {
|
||||
inc = spine.step(block);
|
||||
#if 0
|
||||
if (inc)
|
||||
inc_children<ValueTraits>();
|
||||
#endif
|
||||
|
||||
// patch up the parent to point to the new shadow
|
||||
if (spine.has_parent()) {
|
||||
internal_node p = spine.get_parent();
|
||||
p.set_value(i, spine.get_block());
|
||||
}
|
||||
|
||||
internal_node internal = spine.template get_node<uint64_traits>();
|
||||
|
||||
// Split the node if we're full
|
||||
if (internal.get_type() == INTERNAL)
|
||||
split_node<uint64_traits>(spine, i, key, top);
|
||||
else
|
||||
split_node<ValueTraits>(spine, i, key, top);
|
||||
|
||||
internal = spine.template get_node<uint64_traits>();
|
||||
i = internal.lower_bound(key);
|
||||
if (internal.get_type() == btree_detail::LEAF)
|
||||
break;
|
||||
|
||||
if (i < 0) {
|
||||
internal.set_key(0, key);
|
||||
i = 0;
|
||||
}
|
||||
|
||||
block = internal.value_at(i);
|
||||
top = false;
|
||||
}
|
||||
|
||||
node_ref<ValueTraits> leaf = spine.template get_node<ValueTraits>();
|
||||
// FIXME: gross
|
||||
if (i < 0 || leaf.key_at(i) != key)
|
||||
i++;
|
||||
|
||||
// do decrement the old value if it already exists
|
||||
// FIXME: I'm not sure about this, I don't understand the |inc| reference
|
||||
if (static_cast<unsigned>(i) < leaf.get_nr_entries() && leaf.key_at(i) == key && inc) {
|
||||
// dec old entry
|
||||
}
|
||||
*index = i;
|
||||
|
||||
return ((static_cast<unsigned>(i) >= leaf.get_nr_entries()) ||
|
||||
(leaf.key_at(i) != key));
|
||||
}
|
||||
|
||||
template <unsigned Levels, typename ValueTraits>
|
||||
void
|
||||
btree<Levels, ValueTraits>::visit(typename visitor::ptr visitor) const
|
||||
{
|
||||
walk_tree(visitor, 0, true, boost::optional<uint64_t>(), root_);
|
||||
visitor->visit_complete();
|
||||
}
|
||||
|
||||
template <unsigned Levels, typename ValueTraits>
|
||||
void
|
||||
btree<Levels, ValueTraits>::
|
||||
walk_tree(typename visitor::ptr visitor,
|
||||
unsigned level, bool sub_root,
|
||||
boost::optional<uint64_t> key,
|
||||
block_address b) const
|
||||
{
|
||||
using namespace btree_detail;
|
||||
|
||||
read_ref blk = tm_->read_lock(b);
|
||||
internal_node o = to_node<uint64_traits>(blk);
|
||||
if (o.get_type() == INTERNAL) {
|
||||
if (visitor->visit_internal(level, sub_root, key, o))
|
||||
for (unsigned i = 0; i < o.get_nr_entries(); i++)
|
||||
walk_tree(visitor, level, false, o.key_at(i), o.value_at(i));
|
||||
|
||||
} else if (level < Levels - 1) {
|
||||
if (visitor->visit_internal_leaf(level, sub_root, key, o))
|
||||
for (unsigned i = 0; i < o.get_nr_entries(); i++)
|
||||
walk_tree(visitor, level + 1, true, boost::optional<uint64_t>(o.key_at(i)), o.value_at(i));
|
||||
|
||||
} else {
|
||||
leaf_node ov = to_node<ValueTraits>(blk);
|
||||
visitor->visit_leaf(level, sub_root, key, ov);
|
||||
}
|
||||
}
|
||||
|
||||
//----------------------------------------------------------------
|
305
persistent-data/data-structures/btree_checker.h
Normal file
305
persistent-data/data-structures/btree_checker.h
Normal file
@ -0,0 +1,305 @@
|
||||
// Copyright (C) 2011 Red Hat, Inc. All rights reserved.
|
||||
//
|
||||
// This file is part of the thin-provisioning-tools source.
|
||||
//
|
||||
// thin-provisioning-tools is free software: you can redistribute it
|
||||
// and/or modify it under the terms of the GNU General Public License
|
||||
// as published by the Free Software Foundation, either version 3 of
|
||||
// the License, or (at your option) any later version.
|
||||
//
|
||||
// thin-provisioning-tools is distributed in the hope that it will be
|
||||
// useful, but WITHOUT ANY WARRANTY; without even the implied warranty
|
||||
// of MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
|
||||
// GNU General Public License for more details.
|
||||
//
|
||||
// You should have received a copy of the GNU General Public License along
|
||||
// with thin-provisioning-tools. If not, see
|
||||
// <http://www.gnu.org/licenses/>.
|
||||
|
||||
#ifndef BTREE_CHECKER_H
|
||||
#define BTREE_CHECKER_H
|
||||
|
||||
#include "btree.h"
|
||||
|
||||
#include "persistent-data/block_counter.h"
|
||||
#include "persistent-data/checksum.h"
|
||||
#include "persistent-data/error_set.h"
|
||||
|
||||
#include <sstream>
|
||||
#include <map>
|
||||
#include <set>
|
||||
|
||||
using namespace persistent_data;
|
||||
using namespace std;
|
||||
|
||||
//----------------------------------------------------------------
|
||||
|
||||
namespace persistent_data {
|
||||
//----------------------------------------------------------------
|
||||
// This class implements consistency checking for the btrees in
|
||||
// general. Derive from this if you want some additional checks.
|
||||
// It's worth summarising what is checked:
|
||||
//
|
||||
// Implemented
|
||||
// -----------
|
||||
//
|
||||
// - block_nr
|
||||
// - nr_entries < max_entries
|
||||
// - max_entries fits in block
|
||||
// - max_entries is divisible by 3
|
||||
// - nr_entries > minimum (except for root nodes)
|
||||
//
|
||||
// Not implemented
|
||||
// ---------------
|
||||
//
|
||||
// - leaf | internal flags (this can be inferred from siblings)
|
||||
//----------------------------------------------------------------
|
||||
template <uint32_t Levels, typename ValueTraits>
|
||||
class btree_checker : public btree<Levels, ValueTraits>::visitor {
|
||||
public:
|
||||
btree_checker(block_counter &counter, bool avoid_repeated_visits = true)
|
||||
: counter_(counter),
|
||||
errs_(new error_set("btree errors")),
|
||||
avoid_repeated_visits_(avoid_repeated_visits) {
|
||||
}
|
||||
|
||||
bool visit_internal(unsigned level,
|
||||
bool sub_root,
|
||||
optional<uint64_t> key,
|
||||
btree_detail::node_ref<uint64_traits> const &n) {
|
||||
return check_internal(level, sub_root, key, n);
|
||||
}
|
||||
|
||||
bool visit_internal_leaf(unsigned level,
|
||||
bool sub_root,
|
||||
optional<uint64_t> key,
|
||||
btree_detail::node_ref<uint64_traits> const &n) {
|
||||
return check_leaf(level, sub_root, key, n);
|
||||
}
|
||||
|
||||
bool visit_leaf(unsigned level,
|
||||
bool sub_root,
|
||||
optional<uint64_t> key,
|
||||
btree_detail::node_ref<ValueTraits> const &n) {
|
||||
return check_leaf(level, sub_root, key, n);
|
||||
}
|
||||
|
||||
error_set::ptr get_errors() const {
|
||||
return errs_;
|
||||
}
|
||||
|
||||
protected:
|
||||
block_counter &get_counter() {
|
||||
return counter_;
|
||||
}
|
||||
|
||||
private:
|
||||
bool check_internal(unsigned level,
|
||||
bool sub_root,
|
||||
optional<uint64_t> key,
|
||||
btree_detail::node_ref<uint64_traits> const &n) {
|
||||
if (!already_visited(n) &&
|
||||
check_sum(n) &&
|
||||
check_block_nr(n) &&
|
||||
check_max_entries(n) &&
|
||||
check_nr_entries(n, sub_root) &&
|
||||
check_ordered_keys(n) &&
|
||||
check_parent_key(sub_root ? optional<uint64_t>() : key, n)) {
|
||||
if (sub_root)
|
||||
new_root(level);
|
||||
|
||||
return true;
|
||||
}
|
||||
|
||||
return false;
|
||||
}
|
||||
|
||||
template <typename ValueTraits2>
|
||||
bool check_leaf(unsigned level,
|
||||
bool sub_root,
|
||||
optional<uint64_t> key,
|
||||
btree_detail::node_ref<ValueTraits2> const &n) {
|
||||
if (!already_visited(n) &&
|
||||
check_sum(n) &&
|
||||
check_block_nr(n) &&
|
||||
check_max_entries(n) &&
|
||||
check_nr_entries(n, sub_root) &&
|
||||
check_ordered_keys(n) &&
|
||||
check_parent_key(sub_root ? optional<uint64_t>() : key, n)) {
|
||||
if (sub_root)
|
||||
new_root(level);
|
||||
|
||||
return check_leaf_key(level, n);
|
||||
}
|
||||
|
||||
return false;
|
||||
}
|
||||
|
||||
|
||||
template <typename node>
|
||||
bool already_visited(node const &n) {
|
||||
block_address b = n.get_location();
|
||||
|
||||
counter_.inc(b);
|
||||
|
||||
if (avoid_repeated_visits_) {
|
||||
if (seen_.count(b) > 0)
|
||||
return true;
|
||||
|
||||
seen_.insert(b);
|
||||
}
|
||||
|
||||
return false;
|
||||
}
|
||||
|
||||
template <typename node>
|
||||
bool check_sum(node const &n) const {
|
||||
crc32c sum(BTREE_CSUM_XOR);
|
||||
|
||||
disk_node const *data = n.raw();
|
||||
sum.append(&data->header.flags, MD_BLOCK_SIZE - sizeof(uint32_t));
|
||||
if (sum.get_sum() != n.get_checksum()) {
|
||||
std::ostringstream out;
|
||||
out << "checksum error for block " << n.get_block_nr()
|
||||
<< ", sum was " << sum.get_sum()
|
||||
<< ", on disk " << n.get_checksum();
|
||||
errs_->add_child(out.str());
|
||||
return false;
|
||||
}
|
||||
|
||||
return true;
|
||||
}
|
||||
|
||||
template <typename node>
|
||||
bool check_block_nr(node const &n) const {
|
||||
if (n.get_location() != n.get_block_nr()) {
|
||||
std::ostringstream out;
|
||||
out << "block number mismatch: actually "
|
||||
<< n.get_location()
|
||||
<< ", claims " << n.get_block_nr();
|
||||
errs_->add_child(out.str());
|
||||
return false;
|
||||
}
|
||||
|
||||
return true;
|
||||
}
|
||||
|
||||
template <typename node>
|
||||
bool check_max_entries(node const &n) const {
|
||||
size_t elt_size = sizeof(uint64_t) + n.get_value_size();
|
||||
if (elt_size * n.get_max_entries() + sizeof(node_header) > MD_BLOCK_SIZE) {
|
||||
std::ostringstream out;
|
||||
out << "max entries too large: " << n.get_max_entries();
|
||||
errs_->add_child(out.str());
|
||||
return false;
|
||||
}
|
||||
|
||||
if (n.get_max_entries() % 3) {
|
||||
std::ostringstream out;
|
||||
out << "max entries is not divisible by 3: " << n.get_max_entries();
|
||||
errs_->add_child(out.str());
|
||||
return false;
|
||||
}
|
||||
|
||||
return true;
|
||||
}
|
||||
|
||||
template <typename node>
|
||||
bool check_nr_entries(node const &n, bool is_root) const {
|
||||
if (n.get_nr_entries() > n.get_max_entries()) {
|
||||
std::ostringstream out;
|
||||
out << "bad nr_entries: "
|
||||
<< n.get_nr_entries() << " < "
|
||||
<< n.get_max_entries();
|
||||
errs_->add_child(out.str());
|
||||
return false;
|
||||
}
|
||||
|
||||
block_address min = n.get_max_entries() / 3;
|
||||
if (!is_root && (n.get_nr_entries() < min)) {
|
||||
ostringstream out;
|
||||
out << "too few entries in btree: "
|
||||
<< n.get_nr_entries()
|
||||
<< ", expected at least "
|
||||
<< min
|
||||
<< "(max_entries = " << n.get_max_entries() << ")";
|
||||
errs_->add_child(out.str());
|
||||
return false;
|
||||
}
|
||||
|
||||
return true;
|
||||
}
|
||||
|
||||
template <typename node>
|
||||
bool check_ordered_keys(node const &n) const {
|
||||
unsigned nr_entries = n.get_nr_entries();
|
||||
|
||||
if (nr_entries == 0)
|
||||
return true; // can only happen if a root node
|
||||
|
||||
uint64_t last_key = n.key_at(0);
|
||||
|
||||
for (unsigned i = 1; i < nr_entries; i++) {
|
||||
uint64_t k = n.key_at(i);
|
||||
if (k <= last_key) {
|
||||
ostringstream out;
|
||||
out << "keys are out of order, " << k << " <= " << last_key;
|
||||
errs_->add_child(out.str());
|
||||
return false;
|
||||
}
|
||||
last_key = k;
|
||||
}
|
||||
|
||||
return true;
|
||||
}
|
||||
|
||||
template <typename node>
|
||||
bool check_parent_key(boost::optional<uint64_t> key, node const &n) const {
|
||||
if (!key)
|
||||
return true;
|
||||
|
||||
if (*key > n.key_at(0)) {
|
||||
ostringstream out;
|
||||
out << "parent key mismatch: parent was " << *key
|
||||
<< ", but lowest in node was " << n.key_at(0);
|
||||
errs_->add_child(out.str());
|
||||
return false;
|
||||
}
|
||||
|
||||
return true;
|
||||
}
|
||||
|
||||
template <typename node>
|
||||
bool check_leaf_key(unsigned level, node const &n) {
|
||||
if (n.get_nr_entries() == 0)
|
||||
return true; // can only happen if a root node
|
||||
|
||||
if (last_leaf_key_[level] && *last_leaf_key_[level] >= n.key_at(0)) {
|
||||
ostringstream out;
|
||||
out << "the last key of the previous leaf was " << *last_leaf_key_[level]
|
||||
<< " and the first key of this leaf is " << n.key_at(0);
|
||||
errs_->add_child(out.str());
|
||||
return false;
|
||||
}
|
||||
|
||||
last_leaf_key_[level] = n.key_at(n.get_nr_entries() - 1);
|
||||
return true;
|
||||
}
|
||||
|
||||
void new_root(unsigned level) {
|
||||
// we're starting a new subtree, so should
|
||||
// reset the last_leaf value.
|
||||
last_leaf_key_[level] = boost::optional<uint64_t>();
|
||||
}
|
||||
|
||||
block_counter &counter_;
|
||||
std::set<block_address> seen_;
|
||||
error_set::ptr errs_;
|
||||
boost::optional<uint64_t> last_leaf_key_[Levels];
|
||||
bool avoid_repeated_visits_;
|
||||
};
|
||||
}
|
||||
|
||||
//----------------------------------------------------------------
|
||||
|
||||
#endif
|
Reference in New Issue
Block a user