From add23c1709d1e77e99456745b4c0c0a88d5ba101 Mon Sep 17 00:00:00 2001 From: Joe Thornber Date: Wed, 13 Mar 2013 13:28:03 +0000 Subject: [PATCH] Array data structure --- persistent-data/data-structures/array.h | 351 ++++++++++++++++------ persistent-data/data-structures/array.tcc | 290 ------------------ unit-tests/Makefile.in | 4 + unit-tests/array_t.cc | 122 ++++++++ 4 files changed, 383 insertions(+), 384 deletions(-) delete mode 100644 persistent-data/data-structures/array.tcc create mode 100644 unit-tests/array_t.cc diff --git a/persistent-data/data-structures/array.h b/persistent-data/data-structures/array.h index 7043742..bedb406 100644 --- a/persistent-data/data-structures/array.h +++ b/persistent-data/data-structures/array.h @@ -19,140 +19,303 @@ #ifndef ARRAY_H #define ARRAY_H -#include "btree.h" +#include "persistent-data/math_utils.h" +#include "persistent-data/data-structures/btree.h" +#include "persistent-data/data-structures/array_block.h" //---------------------------------------------------------------- +// FIXME: we need an array checker + namespace persistent_data { - struct array_block_disk { - __le32 csum; - __le32 max_entries; - __le32 nr_entries; - __le32 value_size; - __le64 blocknr; - } __attribute__((packed)); + namespace array_detail { + uint32_t const ARRAY_CSUM_XOR = 595846735; - template - class ro_array_block { - public: - typedef typename ValueTraits::value_type value_type; - typedef block_manager<>::read_ref read_ref; + struct array_block_validator : public block_manager<>::validator { + virtual void check(buffer<> const &b, block_address location) const { + array_block_disk const *data = reinterpret_cast(&b); + crc32c sum(ARRAY_CSUM_XOR); + sum.append(&data->max_entries, MD_BLOCK_SIZE - sizeof(uint32_t)); + if (sum.get_sum() != to_cpu(data->csum)) + throw checksum_error("bad checksum in array block node"); - ro_array_block(read_ref rr); + if (to_cpu(data->blocknr) != location) + throw checksum_error("bad block nr in array block"); + } - unsigned nr_entries() const; - value_type get(unsigned index) const; + virtual void prepare(buffer<> &b, block_address location) const { + array_block_disk *data = reinterpret_cast(&b); + data->blocknr = to_disk(location); - private: - const void *element_at(unsigned int index) const; + crc32c sum(ARRAY_CSUM_XOR); + sum.append(&data->max_entries, MD_BLOCK_SIZE - sizeof(uint32_t)); + data->csum = to_disk(sum.get_sum()); + } + }; - read_ref rr_; - }; + struct array_dim { + array_dim(unsigned nr_entries, unsigned entries_per_block) + : nr_full_blocks(nr_entries / entries_per_block), + nr_entries_in_last_block(nr_entries % entries_per_block) { + } - template - class array_block : public ro_array_block { - public: - typedef typename ValueTraits::value_type value_type; - typedef block_manager<>::write_ref write_ref; + unsigned nr_full_blocks; + unsigned nr_entries_in_last_block; + }; - array_block(write_ref wr); - - // No virtual methods, so no need for a virtual destructor. - // Not really sure inheritance is the right relationship - // though. - - void set(unsigned index, value_type const &v); - void inc_all_entries(typename ValueTraits::ref_counter &rc); - void dec_all_entries(typename ValueTraits::ref_counter &rc); - - block_address address() const; - - // FIXME: why isn't this visible? - //using ro_array_block::nr_entries(); - - private: - void *element_at(unsigned int index); - - write_ref wr_; - }; - - class BlockRefCounter { - public: - void inc(uint64_t const &v) {} - void dec(uint64_t const &v) {} - }; - - template - struct array_block_traits { - typedef base::__le64 disk_type; - typedef array_block value_type; - typedef NoOpRefCounter ref_counter; - - static void unpack(disk_type const &disk, value_type &value) { - value = base::to_cpu(disk); + unsigned calc_max_entries(size_t value_size, size_t block_size) + { + return (block_size - sizeof(struct array_block_disk)) / value_size; } - - static void pack(value_type const &value, disk_type &disk) { - disk = base::to_disk(value); - } - }; + } template class array { public: + class block_ref_counter : public ref_counter { + public: + block_ref_counter(space_map::ptr sm, + array &a) + : sm_(sm), + a_(a) { + } + + virtual void set(uint64_t b, uint32_t rc) { + sm_->set_count(b, rc); + if (rc == 0) + dec_values(b); + } + + virtual void inc(uint64_t b) { + sm_->inc(b); + } + + virtual void dec(uint64_t b) { + sm_->dec(b); + if (sm_->get_count(b) == 0) + dec_values(b); + } + + private: + void dec_values(uint64_t b) { + a_.dec_ablock_entries(b); + } + + space_map::ptr sm_; + array &a_; + }; + friend class block_ref_counter; + + struct block_traits { + typedef base::__le64 disk_type; + typedef block_address value_type; + typedef block_ref_counter ref_counter; + + static void unpack(disk_type const &disk, value_type &value) { + value = base::to_cpu(disk); + } + + static void pack(value_type const &value, disk_type &disk) { + disk = base::to_disk(value); + } + }; + typedef typename persistent_data::transaction_manager::ptr tm_ptr; + typedef block_manager<>::write_ref write_ref; + typedef block_manager<>::read_ref read_ref; + + typedef array_block::write_ref> wblock; + typedef array_block::read_ref> rblock; + typedef boost::shared_ptr > ptr; typedef typename ValueTraits::value_type value_type; array(tm_ptr tm, - typename ValueTraits::ref_counter rc, - unsigned nr_entries, - value_type const &default_value); + typename ValueTraits::ref_counter rc) + : tm_(tm), + entries_per_block_(rblock::calc_max_entries()), + nr_entries_(0), + block_rc_(tm->get_sm(), *this), + block_tree_(tm, block_rc_), + rc_(rc) { + } array(tm_ptr tm, typename ValueTraits::ref_counter rc, - block_address root); + unsigned nr_entries, + block_address root) + : tm_(tm), + entries_per_block_(rblock::calc_max_entries()), + nr_entries_(nr_entries), + block_rc_(tm->get_sm(), *this), + block_tree_(tm, root, block_rc_), + rc_(rc) { + } - void set_root(block_address root); - block_address get_root() const; + unsigned get_nr_entries() const { + return nr_entries_; + } - void destroy(); + // FIXME: why is this needed? + void set_root(block_address root) { + block_tree_.set_root(root); + } - void grow(unsigned old_size, unsigned new_size, value_type const &v); - void shrink(unsigned old_size, unsigned new_size); + block_address get_root() const { + return block_tree_.get_root(); + } - value_type const &get(unsigned index) const; - void set(unsigned index, value_type const &value); + void destroy() { + block_tree_.destroy(); // FIXME: not implemented + } + void grow(unsigned new_nr_entries, value_type const &v) { + resizer r(*this, nr_entries_, new_nr_entries, entries_per_block_, v); + r.grow(new_nr_entries, v); + } + + value_type get(unsigned index) const { + rblock b = get_ablock(index / entries_per_block_); + return b.get(index % entries_per_block_); + } + + void set(unsigned index, value_type const &value) { + wblock b = shadow_ablock(index / entries_per_block_); + b.set(index % entries_per_block_, value); + } private: - array_block new_ablock(); - ro_array_block get_ablock(unsigned block_index) const; - array_block shadow_ablock(unsigned block_index); - void fill_tail_block(array_block &ab, - value_type v, - unsigned nr_entries); - void insert_full_blocks(unsigned begin_index, unsigned end_index, - value_type v); - void insert_tail_block(unsigned index, unsigned nr_entries, value_type v); + struct resizer { + resizer(array &a, + unsigned old_size, + unsigned new_size, + unsigned entries_per_block, + typename ValueTraits::value_type const &v) + : a_(a), + old_dim_(old_size, entries_per_block), + new_dim_(new_size, entries_per_block), + entries_per_block_(entries_per_block), + v_(v) { + } + void grow(unsigned new_nr_entries, value_type const &v) { + if (new_dim_.nr_full_blocks > old_dim_.nr_full_blocks) + grow_needs_more_blocks(); + + else if (old_dim_.nr_entries_in_last_block > 0) + grow_extend_tail_block(new_dim_.nr_entries_in_last_block); + + else + grow_add_tail_block(); + } + + private: + void insert_full_ablocks(unsigned begin_index, unsigned end_index) { + while (begin_index != end_index) { + wblock b = a_.new_ablock(begin_index); + b.grow(entries_per_block_, v_); + + begin_index++; + } + } + + void grow_add_tail_block() { + wblock b = a_.new_ablock(new_dim_.nr_full_blocks); + b.grow(new_dim_.nr_entries_in_last_block, v_); + } + + void grow_needs_more_blocks() { + if (old_dim_.nr_entries_in_last_block > 0) + grow_extend_tail_block(entries_per_block_); + + insert_full_ablocks(old_dim_.nr_full_blocks + (old_dim_.nr_entries_in_last_block ? 1 : 0), + new_dim_.nr_full_blocks); + + if (new_dim_.nr_entries_in_last_block > 0) + grow_add_tail_block(); + } + + void grow_extend_tail_block(unsigned new_nr_entries) { + uint64_t last_block = div_up(a_.nr_entries_, entries_per_block_); + wblock b = a_.shadow_ablock(last_block); + b.grow(new_nr_entries % entries_per_block_, v_); + } + + array &a_; + array_detail::array_dim old_dim_; + array_detail::array_dim new_dim_; + unsigned entries_per_block_; + + typename ValueTraits::value_type const &v_; + }; + + friend class resizer; + + //-------------------------------- + + block_manager<>::validator::ptr validator() const { + return block_manager<>::validator::ptr( + new block_manager<>::noop_validator()); + } + + block_address lookup_block_address(unsigned array_index) const { + uint64_t key[1] = {array_index}; + boost::optional addr = block_tree_.lookup(key); + if (!addr) { + std::ostringstream str; + str << "lookup of array block " << array_index << " failed"; + throw runtime_error(str.str()); + } + + return *addr; + } + + wblock new_ablock(unsigned ablock_index) { + uint64_t key[1] = {ablock_index}; + write_ref b = tm_->new_block(validator()); + block_address location = b.get_location(); + + wblock wb(b, rc_); + wb.setup_empty(); + block_tree_.insert(key, location); + return wblock(b, rc_); + } + + rblock get_ablock(unsigned ablock_index) const { + block_address addr = lookup_block_address(ablock_index); + return rblock(tm_->read_lock(addr, validator()), rc_); + } + + wblock shadow_ablock(unsigned ablock_index) { + uint64_t key[1] = {ablock_index}; + block_address addr = lookup_block_address(ablock_index); + std::pair p = tm_->shadow(addr, validator()); + wblock wb = wblock(p.first, rc_); + + if (p.second) + wb.inc_all_entries(); + + block_tree_.insert(key, p.first.get_location()); + + return wb; + } + + void dec_ablock_entries(block_address addr) { + rblock b(tm_->read_lock(addr, validator()), rc_); + b.dec_all_entries(); + } tm_ptr tm_; - bool destroy_; - unsigned entries_per_block_; // FIXME: initialise - - typedef array_block_traits btree_traits; - btree<1, btree_traits> block_tree_; + unsigned entries_per_block_; + unsigned nr_entries_; + block_ref_counter block_rc_; + btree<1, block_traits> block_tree_; typename ValueTraits::ref_counter rc_; }; - - // FIXME: we need an array checker } -#include "array.tcc" - //---------------------------------------------------------------- #endif diff --git a/persistent-data/data-structures/array.tcc b/persistent-data/data-structures/array.tcc deleted file mode 100644 index 0cf638c..0000000 --- a/persistent-data/data-structures/array.tcc +++ /dev/null @@ -1,290 +0,0 @@ -// Copyright (C) 2012 Red Hat, Inc. All rights reserved. -// -// This file is part of the thin-provisioning-tools source. -// -// thin-provisioning-tools is free software: you can redistribute it -// and/or modify it under the terms of the GNU General Public License -// as published by the Free Software Foundation, either version 3 of -// the License, or (at your option) any later version. -// -// thin-provisioning-tools is distributed in the hope that it will be -// useful, but WITHOUT ANY WARRANTY; without even the implied warranty -// of MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the -// GNU General Public License for more details. -// -// You should have received a copy of the GNU General Public License along -// with thin-provisioning-tools. If not, see -// . - -using namespace base; -using namespace persistent_data; - -//---------------------------------------------------------------- - -namespace { - uint32_t const ARRAY_CSUM_XOR = 595846735; - - struct array_block_validator : public block_manager<>::validator { - virtual void check(buffer<> const &b, block_address location) const { - array_block_disk const *data = reinterpret_cast(&b); - crc32c sum(ARRAY_CSUM_XOR); - sum.append(&data->max_entries, MD_BLOCK_SIZE - sizeof(uint32_t)); - if (sum.get_sum() != to_cpu(data->csum)) - throw checksum_error("bad checksum in array block node"); - - if (to_cpu(data->blocknr) != location) - throw checksum_error("bad block nr in array block"); - } - - virtual void prepare(buffer<> &b, block_address location) const { - array_block_disk *data = reinterpret_cast(&b); - data->blocknr = to_disk(location); - - crc32c sum(ARRAY_CSUM_XOR); - sum.append(&data->max_entries, MD_BLOCK_SIZE - sizeof(uint32_t)); - data->csum = to_disk(sum.get_sum()); - } - }; - - struct array_dim { - array_dim(unsigned nr_entries, unsigned entries_per_block) - : nr_full_blocks(nr_entries / entries_per_block), - nr_entries_in_last_block(nr_entries % entries_per_block) { - } - - unsigned nr_full_blocks; - unsigned nr_entries_in_last_block; - }; - - unsigned calc_max_entries(size_t value_size, size_t block_size) - { - return (block_size - sizeof(struct array_block_disk)) / value_size; - } -} - -//---------------------------------------------------------------- - -template -array::array(typename persistent_data::transaction_manager::ptr tm, - typename ValueTraits::ref_counter rc, - unsigned nr_entries, - value_type const &default_value) - : tm_(tm), - destroy_(false), - entries_per_block_(calc_max_entries(sizeof(value_type), MD_BLOCK_SIZE)), - block_tree_(tm, typename btree_traits::ref_counter()), - rc_(rc) -{ -} - -template -array::array(typename persistent_data::transaction_manager::ptr tm, - typename ValueTraits::ref_counter rc, - block_address root) - : tm_(tm), - destroy_(false), - entries_per_block_(calc_max_entries(sizeof(value_type), MD_BLOCK_SIZE)), - block_tree_(tm, root, typename btree_traits::ref_counter()), - rc_(rc) -{ -} - -template -void -array::set_root(block_address root) -{ - block_tree_.set_root(root); -} - -template -block_address -array::get_root() const -{ - return block_tree_.get_root(); -} - -template -void -array::destroy() -{ - block_tree_.destroy(); -} - -template -void -array::grow(unsigned old_size, unsigned new_size, - typename ValueTraits::value_type const &v) -{ - array_dim old_dim(old_size, entries_per_block_); - array_dim new_dim(new_size, entries_per_block_); - - if (new_dim.nr_full_blocks > old_dim.nr_full_blocks) { - if (old_dim.nr_entries_in_last_block > 0) { - array_block ab = shadow_ablock(old_dim.nr_full_blocks); - fill_tail_block(ab, v, entries_per_block_); - } - - insert_full_blocks(old_dim.nr_full_blocks, new_dim.nr_full_blocks + 1, v); - insert_tail_block(new_dim.nr_full_blocks, new_dim.nr_entries_in_last_block, v); - } else { - array_block ab = get_ablock(new_dim.nr_full_blocks - 1u); - fill_tail_block(ab, v, new_dim.nr_entries_in_last_block); - } -} - -template -void -array::shrink(unsigned old_size, unsigned new_size) -{ - -} - -template -typename array::value_type const & -array::get(unsigned index) const -{ - array_block ab = get_ablock(index / entries_per_block_); - return ab.get(index % entries_per_block_); -} - -template -void -array::set(unsigned index, value_type const &value) -{ - array_block ab = shadow_ablock(index / entries_per_block_); - ab.set(index % entries_per_block_, value); -} - -template -ro_array_block -array::get_ablock(unsigned block_index) const -{ - return ro_array_block(tm_->read_lock(block_index)); -} - -template -array_block -array::shadow_ablock(unsigned block_index) -{ - typedef typename block_manager<>::write_ref write_ref; - - transaction_manager::validator v(new array_block_validator); - std::pair p = tm_->shadow(block_index, v); - array_block ab(p.first); - - if (p.second) - ab.inc_all_entries(); - - uint64_t key[1]; - key[0] = block_index; - block_tree_.insert(key, ab.get_location()); - - return ab; -} - -template -void -array::fill_tail_block(array_block &ab, - value_type v, - unsigned nr_entries) -{ - for (unsigned i = ab.nr_entries(); i < nr_entries; i++) - ab.set(i, v); -} - -template -void -array::insert_full_blocks(unsigned begin_index, - unsigned end_index, - value_type v) -{ - array_block ab = new_ablock(); - space_map::ptr sm = tm_->get_sm(); - - for (unsigned i = 0; i < entries_per_block_; i++) - ab.set(i, v); - - for (uint64_t b = begin_index; b < end_index; b++) { - block_tree_.insert(b, ab); - sm->inc(ab.address()); - } - - sm->dec(ab.adress()); -} - -template -void -array::insert_tail_block(unsigned index, - unsigned nr_entries, - value_type v) -{ - array_block ab = new_ablock(); - - for (unsigned i = 0; i < nr_entries; i++) - ab.set(i, v); - - block_tree_.insert(index, ab); -} - -//---------------------------------------------------------------- - -template -ro_array_block::ro_array_block(read_ref rr) - : rr_(rr) -{ -} - -template -unsigned -ro_array_block::nr_entries() const -{ - array_block_disk const *data = - reinterpret_cast(&rr_.data()); - - return to_cpu(data->nr_entries); -} - -template -typename ValueTraits::value_type -ro_array_block::get(unsigned index) const -{ - value_type v; - ValueTraits::unpack(element_at(index), v); - return v; -} - -template -array_block::array_block(write_ref wr) - : wr_(wr) -{ -} - -template -void -array_block::set(unsigned index, value_type const &v) -{ - void *elt = element_at(index); - ValueTraits::pack(v, element_at(index)); -} - -template -void -array_block::inc_all_entries(typename ValueTraits::ref_counter &rc) -{ - unsigned nr = ro_array_block::nr_entries(); - - for (unsigned i = 0; i < nr; i++) - rc.inc(ro_array_block::get(i)); -} - -template -void -array_block::dec_all_entries(typename ValueTraits::ref_counter &rc) -{ - unsigned nr = ro_array_block::nr_entries(); - - for (unsigned i = 0; i < nr; i++) - rc.dec(ro_array_block::get(i)); -} - -//---------------------------------------------------------------- diff --git a/unit-tests/Makefile.in b/unit-tests/Makefile.in index cd4507d..630231d 100644 --- a/unit-tests/Makefile.in +++ b/unit-tests/Makefile.in @@ -18,6 +18,7 @@ TEST_SOURCE=\ unit-tests/array_block_t.cc \ + unit-tests/array_t.cc \ unit-tests/buffer_t.cc \ unit-tests/cache_t.cc \ unit-tests/block_t.cc \ @@ -33,6 +34,9 @@ unit-test: $(TEST_PROGRAMS) .PHONY: unit-test +unit-tests/array_t: unit-tests/array_t.o $(OBJECTS) + g++ $(CXXFLAGS) $(INCLUDES) -o $@ $+ $(LIBS) $(LIBEXPAT) + unit-tests/array_block_t: unit-tests/array_block_t.o $(OBJECTS) g++ $(CXXFLAGS) $(INCLUDES) -o $@ $+ $(LIBS) $(LIBEXPAT) diff --git a/unit-tests/array_t.cc b/unit-tests/array_t.cc new file mode 100644 index 0000000..30109b3 --- /dev/null +++ b/unit-tests/array_t.cc @@ -0,0 +1,122 @@ +// Copyright (C) 2013 Red Hat, Inc. All rights reserved. +// +// This file is part of the thin-provisioning-tools source. +// +// thin-provisioning-tools is free software: you can redistribute it +// and/or modify it under the terms of the GNU General Public License +// as published by the Free Software Foundation, either version 3 of +// the License, or (at your option) any later version. +// +// thin-provisioning-tools is distributed in the hope that it will be +// useful, but WITHOUT ANY WARRANTY; without even the implied warranty +// of MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +// GNU General Public License for more details. +// +// You should have received a copy of the GNU General Public License along +// with thin-provisioning-tools. If not, see +// . + +#include "persistent-data/transaction_manager.h" +#include "persistent-data/space-maps/core.h" +#include "persistent-data/data-structures/array.h" + +#define BOOST_TEST_MODULE ArrayTests +#include + +using namespace std; +using namespace boost; +using namespace persistent_data; + +//---------------------------------------------------------------- + +namespace { + block_address const NR_BLOCKS = 102400; + typedef array array64; + + transaction_manager::ptr + create_tm() { + block_manager<>::ptr bm(new block_manager<>("./test.data", NR_BLOCKS, 4, block_io<>::READ_WRITE)); + space_map::ptr sm(new core_map(NR_BLOCKS)); + transaction_manager::ptr tm(new transaction_manager(bm, sm)); + return tm; + } + + typename array64::ptr + create_array(unsigned nr_entries, uint64_t default_value) { + uint64_traits::ref_counter rc; + + typename array64::ptr a(new array64(create_tm(), rc)); + + if (nr_entries) + a->grow(nr_entries, default_value); + + return a; + } +} + +//---------------------------------------------------------------- + +BOOST_AUTO_TEST_CASE(can_create_an_empty_array) +{ + array::ptr tree = create_array(0, 0); + + BOOST_CHECK_THROW(tree->get(0), runtime_error); +} + +BOOST_AUTO_TEST_CASE(get_elements) +{ + unsigned const COUNT = 10000; + array::ptr tree = create_array(COUNT, 123); + + for (unsigned i = 0; i < COUNT; i++) { + BOOST_CHECK_EQUAL(tree->get(i), 123); + } + + BOOST_CHECK_THROW(tree->get(COUNT), runtime_error); +} + +BOOST_AUTO_TEST_CASE(set_elements) +{ + unsigned const COUNT = 10000; + array::ptr tree = create_array(COUNT, 123); + + for (unsigned i = 0; i < COUNT; i++) + tree->set(i, 124); + + for (unsigned i = 0; i < COUNT; i++) + BOOST_CHECK_EQUAL(tree->get(i), 124); + + BOOST_CHECK_THROW(tree->get(COUNT), runtime_error); +} + +template +unsigned array_size(T (&)[size]) { + return size; +} + +BOOST_AUTO_TEST_CASE(grow) +{ + unsigned const STEPS[] = { + 17, 71, 137, 277, 439, 683, 967 + }; + + for (unsigned s = 0; s < array_size(STEPS); s++) { + + unsigned step = STEPS[s]; + cerr << "testing grow with step size " << step << endl; + + unsigned const COUNT = 10000; + array::ptr a = create_array(0, 123); + + for (unsigned i = 0; i < COUNT; i = min(i + step, COUNT)) { + a->grow(i + step, i); + + for (unsigned j = i; j < i + step; j++) + BOOST_CHECK_EQUAL(a->get(j), i); + + BOOST_CHECK_THROW(a->get(i + step), runtime_error); + } + } +} + +//----------------------------------------------------------------