2011-06-23 19:17:08 +05:30
|
|
|
#include "metadata.h"
|
|
|
|
|
2011-08-24 15:15:39 +05:30
|
|
|
#include "btree_validator.h"
|
2011-08-23 16:25:37 +05:30
|
|
|
#include "core_map.h"
|
|
|
|
|
2011-06-23 19:17:08 +05:30
|
|
|
#include <stdexcept>
|
2011-08-23 16:25:37 +05:30
|
|
|
#include <sstream>
|
|
|
|
#include <iostream>
|
2011-08-24 15:15:39 +05:30
|
|
|
#include <set>
|
|
|
|
#include <map>
|
2011-06-23 19:17:08 +05:30
|
|
|
|
2011-08-23 16:25:37 +05:30
|
|
|
using namespace std;
|
2011-06-23 19:17:08 +05:30
|
|
|
using namespace persistent_data;
|
2011-07-22 20:39:56 +05:30
|
|
|
using namespace thin_provisioning;
|
2011-06-23 19:17:08 +05:30
|
|
|
|
|
|
|
//----------------------------------------------------------------
|
|
|
|
|
|
|
|
namespace {
|
|
|
|
uint32_t const SUPERBLOCK_MAGIC = 27022010;
|
|
|
|
block_address const SUPERBLOCK_LOCATION = 0;
|
|
|
|
uint32_t const VERSION = 1;
|
|
|
|
unsigned const METADATA_CACHE_SIZE = 1024;
|
|
|
|
unsigned const SECTOR_TO_BLOCK_SHIFT = 3;
|
2011-08-23 16:25:37 +05:30
|
|
|
|
|
|
|
// FIXME: get the file size
|
|
|
|
unsigned const NR_BLOCKS = 1024;
|
|
|
|
|
|
|
|
transaction_manager<4096>::ptr
|
|
|
|
open_tm(string const &dev_path) {
|
|
|
|
block_manager<4096>::ptr bm(new block_manager<4096>(dev_path, NR_BLOCKS));
|
|
|
|
space_map::ptr sm(new core_map(NR_BLOCKS));
|
|
|
|
transaction_manager<4096>::ptr tm(new transaction_manager<4096>(bm, sm));
|
|
|
|
return tm;
|
|
|
|
}
|
|
|
|
|
|
|
|
superblock read_superblock(block_manager<4096>::ptr bm) {
|
|
|
|
superblock sb;
|
2011-08-24 18:57:45 +05:30
|
|
|
block_manager<4096>::read_ref r = bm->read_lock(SUPERBLOCK_LOCATION);
|
2011-08-23 16:25:37 +05:30
|
|
|
superblock_disk const *sbd = reinterpret_cast<superblock_disk const *>(&r.data());
|
|
|
|
superblock_traits::unpack(*sbd, sb);
|
|
|
|
return sb;
|
|
|
|
}
|
|
|
|
|
|
|
|
// As well as the standard btree checks, we build up a set of what
|
|
|
|
// devices having mappings defined, which can later be cross
|
2011-08-24 15:15:39 +05:30
|
|
|
// referenced with the details tree. A separate block_counter is
|
|
|
|
// used to later verify the data space map.
|
2011-08-23 16:25:37 +05:30
|
|
|
class mapping_validator : public btree_validator<2, block_traits, MD_BLOCK_SIZE> {
|
|
|
|
public:
|
|
|
|
typedef boost::shared_ptr<mapping_validator> ptr;
|
|
|
|
|
2011-08-24 15:15:39 +05:30
|
|
|
mapping_validator(block_counter &metadata_counter, block_counter &data_counter)
|
|
|
|
: btree_validator<2, block_traits, MD_BLOCK_SIZE>(metadata_counter),
|
|
|
|
data_counter_(data_counter) {
|
|
|
|
}
|
|
|
|
|
|
|
|
void visit_internal_leaf(unsigned level, bool is_root,
|
2011-08-23 16:25:37 +05:30
|
|
|
btree_detail::node_ref<uint64_traits, MD_BLOCK_SIZE> const &n) {
|
2011-08-24 15:15:39 +05:30
|
|
|
btree_validator<2, block_traits, MD_BLOCK_SIZE>::visit_internal_leaf(level, is_root, n);
|
2011-08-23 16:25:37 +05:30
|
|
|
|
|
|
|
for (unsigned i = 0; i < n.get_nr_entries(); i++)
|
|
|
|
devices_.insert(n.key_at(i));
|
|
|
|
}
|
|
|
|
|
2011-08-24 15:15:39 +05:30
|
|
|
void visit_leaf(unsigned level, bool is_root,
|
|
|
|
btree_detail::node_ref<block_traits, MD_BLOCK_SIZE> const &n) {
|
|
|
|
btree_validator<2, block_traits, MD_BLOCK_SIZE>::visit_leaf(level, is_root, n);
|
|
|
|
|
|
|
|
for (unsigned i = 0; i < n.get_nr_entries(); i++)
|
|
|
|
data_counter_.inc(n.value_at(i).block_);
|
|
|
|
}
|
|
|
|
|
2011-08-24 18:57:45 +05:30
|
|
|
set<uint64_t> const &get_devices() const {
|
2011-08-23 16:25:37 +05:30
|
|
|
return devices_;
|
|
|
|
}
|
|
|
|
|
|
|
|
private:
|
2011-08-24 15:15:39 +05:30
|
|
|
block_counter &data_counter_;
|
2011-08-23 16:25:37 +05:30
|
|
|
set<uint64_t> devices_;
|
|
|
|
};
|
|
|
|
|
|
|
|
class details_validator : public btree_validator<1, device_details_traits, MD_BLOCK_SIZE> {
|
|
|
|
public:
|
|
|
|
typedef boost::shared_ptr<details_validator> ptr;
|
|
|
|
|
2011-08-24 15:15:39 +05:30
|
|
|
details_validator(block_counter &counter)
|
|
|
|
: btree_validator<1, device_details_traits, MD_BLOCK_SIZE>(counter) {
|
|
|
|
}
|
|
|
|
|
|
|
|
void visit_leaf(unsigned level, bool is_root,
|
2011-08-23 16:25:37 +05:30
|
|
|
btree_detail::node_ref<device_details_traits, MD_BLOCK_SIZE> const &n) {
|
2011-08-24 15:15:39 +05:30
|
|
|
btree_validator<1, device_details_traits, MD_BLOCK_SIZE>::visit_leaf(level, is_root, n);
|
2011-08-23 16:25:37 +05:30
|
|
|
|
|
|
|
for (unsigned i = 0; i < n.get_nr_entries(); i++)
|
|
|
|
devices_.insert(n.key_at(i));
|
|
|
|
}
|
|
|
|
|
2011-08-24 18:57:45 +05:30
|
|
|
set<uint64_t> const &get_devices() const {
|
2011-08-23 16:25:37 +05:30
|
|
|
return devices_;
|
|
|
|
}
|
|
|
|
|
|
|
|
private:
|
|
|
|
set<uint64_t> devices_;
|
|
|
|
};
|
2011-07-22 20:39:56 +05:30
|
|
|
}
|
2011-06-23 19:17:08 +05:30
|
|
|
|
2011-07-22 20:39:56 +05:30
|
|
|
//----------------------------------------------------------------
|
2011-06-23 19:17:08 +05:30
|
|
|
|
2011-07-22 20:39:56 +05:30
|
|
|
thin::thin(thin_dev_t dev, metadata *metadata)
|
|
|
|
: dev_(dev),
|
|
|
|
metadata_(metadata)
|
|
|
|
{
|
2011-06-23 19:17:08 +05:30
|
|
|
}
|
|
|
|
|
2011-07-22 20:39:56 +05:30
|
|
|
thin_dev_t
|
|
|
|
thin::get_dev_t() const
|
|
|
|
{
|
|
|
|
return dev_;
|
|
|
|
}
|
2011-06-23 19:17:08 +05:30
|
|
|
|
2011-07-22 20:39:56 +05:30
|
|
|
thin::maybe_address
|
|
|
|
thin::lookup(block_address thin_block)
|
2011-06-23 19:17:08 +05:30
|
|
|
{
|
|
|
|
uint64_t key[2] = {dev_, thin_block};
|
|
|
|
return metadata_->mappings_.lookup(key);
|
|
|
|
}
|
|
|
|
|
|
|
|
void
|
2011-07-22 20:39:56 +05:30
|
|
|
thin::insert(block_address thin_block, block_address data_block)
|
2011-06-23 19:17:08 +05:30
|
|
|
{
|
|
|
|
uint64_t key[2] = {dev_, thin_block};
|
2011-08-24 15:15:39 +05:30
|
|
|
block_time bt;
|
|
|
|
bt.block_ = data_block;
|
|
|
|
bt.time_ = 0; // FIXME: use current time.
|
|
|
|
return metadata_->mappings_.insert(key, bt);
|
2011-06-23 19:17:08 +05:30
|
|
|
}
|
|
|
|
|
|
|
|
void
|
2011-07-22 20:39:56 +05:30
|
|
|
thin::remove(block_address thin_block)
|
2011-06-23 19:17:08 +05:30
|
|
|
{
|
|
|
|
uint64_t key[2] = {dev_, thin_block};
|
|
|
|
metadata_->mappings_.remove(key);
|
|
|
|
}
|
2011-07-22 20:39:56 +05:30
|
|
|
|
|
|
|
void
|
|
|
|
thin::set_snapshot_time(uint32_t time)
|
|
|
|
{
|
|
|
|
uint64_t key[1] = { dev_ };
|
2011-08-24 18:57:45 +05:30
|
|
|
optional<device_details> mdetail = metadata_->details_.lookup(key);
|
2011-07-22 20:39:56 +05:30
|
|
|
if (!mdetail)
|
|
|
|
throw runtime_error("no such device");
|
|
|
|
|
|
|
|
mdetail->snapshotted_time_ = time;
|
|
|
|
metadata_->details_.insert(key, *mdetail);
|
|
|
|
}
|
|
|
|
|
|
|
|
block_address
|
|
|
|
thin::get_mapped_blocks() const
|
|
|
|
{
|
|
|
|
uint64_t key[1] = { dev_ };
|
2011-08-24 18:57:45 +05:30
|
|
|
optional<device_details> mdetail = metadata_->details_.lookup(key);
|
2011-07-22 20:39:56 +05:30
|
|
|
if (!mdetail)
|
|
|
|
throw runtime_error("no such device");
|
|
|
|
|
|
|
|
return mdetail->mapped_blocks_;
|
|
|
|
}
|
|
|
|
|
|
|
|
void
|
|
|
|
thin::set_mapped_blocks(block_address count)
|
|
|
|
{
|
|
|
|
uint64_t key[1] = { dev_ };
|
2011-08-24 18:57:45 +05:30
|
|
|
optional<device_details> mdetail = metadata_->details_.lookup(key);
|
2011-07-22 20:39:56 +05:30
|
|
|
if (!mdetail)
|
|
|
|
throw runtime_error("no such device");
|
|
|
|
|
|
|
|
mdetail->mapped_blocks_ = count;
|
|
|
|
metadata_->details_.insert(key, *mdetail);
|
|
|
|
}
|
|
|
|
|
2011-06-23 19:17:08 +05:30
|
|
|
//--------------------------------
|
|
|
|
|
2011-08-23 16:25:37 +05:30
|
|
|
metadata::metadata(std::string const &dev_path)
|
|
|
|
: tm_(open_tm(dev_path)),
|
|
|
|
sb_(read_superblock(tm_->get_bm())),
|
2011-08-25 19:35:23 +05:30
|
|
|
metadata_sm_(open_metadata_sm<MD_BLOCK_SIZE>(tm_, static_cast<void *>(&sb_.metadata_space_map_root_))),
|
2011-08-24 15:15:39 +05:30
|
|
|
data_sm_(open_disk_sm<MD_BLOCK_SIZE>(tm_, static_cast<void *>(&sb_.data_space_map_root_))),
|
2011-08-25 15:24:43 +05:30
|
|
|
details_(tm_, sb_.device_details_root_, device_details_traits::ref_counter()),
|
2011-08-23 16:25:37 +05:30
|
|
|
mappings_top_level_(tm_, sb_.data_mapping_root_, mtree_ref_counter<MD_BLOCK_SIZE>(tm_)),
|
2011-08-24 15:15:39 +05:30
|
|
|
mappings_(tm_, sb_.data_mapping_root_, block_time_ref_counter(data_sm_))
|
2011-06-23 19:17:08 +05:30
|
|
|
{
|
2011-08-23 16:25:37 +05:30
|
|
|
#if 0
|
2011-07-22 20:39:56 +05:30
|
|
|
::memset(&sb_, 0, sizeof(sb_));
|
|
|
|
sb_.data_mapping_root_ = mappings_.get_root();
|
|
|
|
sb_.device_details_root_ = details_.get_root();
|
|
|
|
sb_.metadata_block_size_ = MD_BLOCK_SIZE;
|
2011-08-23 16:25:37 +05:30
|
|
|
sb_.metadata_nr_blocks_ = tm_->get_bm()->get_nr_blocks();
|
|
|
|
#endif
|
2011-06-23 19:17:08 +05:30
|
|
|
}
|
|
|
|
|
|
|
|
metadata::~metadata()
|
|
|
|
{
|
|
|
|
|
|
|
|
}
|
|
|
|
|
|
|
|
void
|
|
|
|
metadata::commit()
|
|
|
|
{
|
2011-07-22 20:39:56 +05:30
|
|
|
sb_.data_mapping_root_ = mappings_.get_root();
|
|
|
|
sb_.device_details_root_ = details_.get_root();
|
2011-06-23 19:17:08 +05:30
|
|
|
|
2011-08-24 18:57:45 +05:30
|
|
|
write_ref superblock = tm_->get_bm()->superblock(SUPERBLOCK_LOCATION);
|
|
|
|
superblock_disk *disk = reinterpret_cast<superblock_disk *>(superblock.data());
|
2011-07-22 20:39:56 +05:30
|
|
|
superblock_traits::pack(sb_, *disk);
|
2011-06-23 19:17:08 +05:30
|
|
|
}
|
2011-07-22 20:39:56 +05:30
|
|
|
|
2011-06-23 19:17:08 +05:30
|
|
|
void
|
2011-07-22 20:39:56 +05:30
|
|
|
metadata::create_thin(thin_dev_t dev)
|
2011-06-23 19:17:08 +05:30
|
|
|
{
|
|
|
|
uint64_t key[1] = {dev};
|
|
|
|
|
|
|
|
if (device_exists(dev))
|
|
|
|
throw std::runtime_error("Device already exists");
|
|
|
|
|
2011-08-24 15:15:39 +05:30
|
|
|
single_mapping_tree::ptr new_tree(new single_mapping_tree(tm_, block_time_ref_counter(data_sm_)));
|
2011-06-27 15:15:30 +05:30
|
|
|
mappings_top_level_.insert(key, new_tree->get_root());
|
2011-06-23 19:17:08 +05:30
|
|
|
mappings_.set_root(mappings_top_level_.get_root()); // FIXME: ugly
|
|
|
|
}
|
|
|
|
|
|
|
|
void
|
2011-07-22 20:39:56 +05:30
|
|
|
metadata::create_snap(thin_dev_t dev, thin_dev_t origin)
|
2011-06-23 19:17:08 +05:30
|
|
|
{
|
|
|
|
uint64_t snap_key[1] = {dev};
|
|
|
|
uint64_t origin_key[1] = {origin};
|
|
|
|
|
2011-08-24 18:57:45 +05:30
|
|
|
optional<uint64_t> mtree_root = mappings_top_level_.lookup(origin_key);
|
2011-06-27 15:15:30 +05:30
|
|
|
if (!mtree_root)
|
2011-06-23 19:17:08 +05:30
|
|
|
throw std::runtime_error("unknown origin");
|
|
|
|
|
2011-07-22 20:39:56 +05:30
|
|
|
single_mapping_tree otree(tm_, *mtree_root,
|
2011-08-24 15:15:39 +05:30
|
|
|
block_time_ref_counter(data_sm_));
|
2011-06-27 15:15:30 +05:30
|
|
|
|
|
|
|
single_mapping_tree::ptr clone(otree.clone());
|
|
|
|
mappings_top_level_.insert(snap_key, clone->get_root());
|
2011-06-23 19:17:08 +05:30
|
|
|
mappings_.set_root(mappings_top_level_.get_root()); // FIXME: ugly
|
|
|
|
|
2011-07-22 20:39:56 +05:30
|
|
|
sb_.time_++;
|
2011-06-23 19:17:08 +05:30
|
|
|
|
2011-08-24 18:57:45 +05:30
|
|
|
thin::ptr o = open_thin(origin);
|
|
|
|
thin::ptr s = open_thin(dev);
|
2011-07-22 20:39:56 +05:30
|
|
|
o->set_snapshot_time(sb_.time_);
|
|
|
|
s->set_snapshot_time(sb_.time_);
|
2011-06-23 19:17:08 +05:30
|
|
|
s->set_mapped_blocks(o->get_mapped_blocks());
|
|
|
|
}
|
|
|
|
|
|
|
|
void
|
2011-07-22 20:39:56 +05:30
|
|
|
metadata::del(thin_dev_t dev)
|
2011-06-23 19:17:08 +05:30
|
|
|
{
|
|
|
|
uint64_t key[1] = {dev};
|
|
|
|
mappings_top_level_.remove(key);
|
|
|
|
}
|
|
|
|
|
|
|
|
void
|
|
|
|
metadata::set_transaction_id(uint64_t id)
|
|
|
|
{
|
2011-07-22 20:39:56 +05:30
|
|
|
sb_.trans_id_ = id;
|
2011-06-23 19:17:08 +05:30
|
|
|
}
|
|
|
|
|
|
|
|
uint64_t
|
|
|
|
metadata::get_transaction_id() const
|
|
|
|
{
|
2011-07-22 20:39:56 +05:30
|
|
|
return sb_.trans_id_;
|
2011-06-23 19:17:08 +05:30
|
|
|
}
|
|
|
|
|
|
|
|
block_address
|
|
|
|
metadata::get_held_root() const
|
|
|
|
{
|
2011-07-22 20:39:56 +05:30
|
|
|
return sb_.held_root_;
|
2011-06-23 19:17:08 +05:30
|
|
|
}
|
|
|
|
|
|
|
|
block_address
|
|
|
|
metadata::alloc_data_block()
|
|
|
|
{
|
2011-07-22 20:39:56 +05:30
|
|
|
return data_sm_->new_block();
|
2011-06-23 19:17:08 +05:30
|
|
|
}
|
|
|
|
|
|
|
|
void
|
|
|
|
metadata::free_data_block(block_address b)
|
|
|
|
{
|
2011-07-22 20:39:56 +05:30
|
|
|
data_sm_->dec(b);
|
2011-06-23 19:17:08 +05:30
|
|
|
}
|
|
|
|
|
|
|
|
block_address
|
|
|
|
metadata::get_nr_free_data_blocks() const
|
|
|
|
{
|
2011-07-22 20:39:56 +05:30
|
|
|
return data_sm_->get_nr_free();
|
2011-06-23 19:17:08 +05:30
|
|
|
}
|
|
|
|
|
|
|
|
sector_t
|
|
|
|
metadata::get_data_block_size() const
|
|
|
|
{
|
2011-07-22 20:39:56 +05:30
|
|
|
return sb_.data_block_size_;
|
2011-06-23 19:17:08 +05:30
|
|
|
}
|
|
|
|
|
|
|
|
block_address
|
|
|
|
metadata::get_data_dev_size() const
|
|
|
|
{
|
2011-07-22 20:39:56 +05:30
|
|
|
return data_sm_->get_nr_blocks();
|
|
|
|
}
|
|
|
|
|
|
|
|
thin::ptr
|
|
|
|
metadata::open_thin(thin_dev_t dev)
|
|
|
|
{
|
|
|
|
uint64_t key[1] = {dev};
|
2011-08-24 18:57:45 +05:30
|
|
|
optional<device_details> mdetails = details_.lookup(key);
|
2011-07-22 20:39:56 +05:30
|
|
|
if (!mdetails)
|
|
|
|
throw runtime_error("no such device");
|
2011-06-23 19:17:08 +05:30
|
|
|
|
2011-07-22 20:39:56 +05:30
|
|
|
thin *ptr = new thin(dev, this);
|
|
|
|
thin::ptr r(ptr);
|
|
|
|
return r;
|
2011-06-23 19:17:08 +05:30
|
|
|
}
|
2011-07-22 20:39:56 +05:30
|
|
|
|
2011-06-23 19:17:08 +05:30
|
|
|
bool
|
2011-07-22 20:39:56 +05:30
|
|
|
metadata::device_exists(thin_dev_t dev) const
|
2011-06-23 19:17:08 +05:30
|
|
|
{
|
|
|
|
uint64_t key[1] = {dev};
|
2011-07-22 20:39:56 +05:30
|
|
|
return details_.lookup(key);
|
2011-06-23 19:17:08 +05:30
|
|
|
}
|
|
|
|
|
2011-08-25 19:35:23 +05:30
|
|
|
namespace {
|
|
|
|
// FIXME: this doesn't check for non-zero counts in the sm that are
|
|
|
|
// actually zero.
|
|
|
|
optional<error_set::ptr>
|
|
|
|
check_ref_counts(string const &desc, block_counter const &actual,
|
|
|
|
space_map::ptr sm) {
|
|
|
|
error_set::ptr errors(new error_set(desc));
|
|
|
|
|
|
|
|
bool bad = false;
|
|
|
|
block_counter::count_map const &counts = actual.get_counts();
|
|
|
|
block_counter::count_map::const_iterator it, end = counts.end();
|
|
|
|
for (it = counts.begin(); it != end; ++it) {
|
|
|
|
uint32_t ref_count = sm->get_count(it->first);
|
|
|
|
if (ref_count != it->second) {
|
|
|
|
ostringstream out;
|
|
|
|
out << it->first << ": was " << ref_count
|
|
|
|
<< ", expected " << it->second;
|
|
|
|
errors->add_child(out.str());
|
|
|
|
bad = true;
|
|
|
|
}
|
|
|
|
}
|
|
|
|
|
|
|
|
return bad ? optional<error_set::ptr>(errors) : optional<error_set::ptr>();
|
|
|
|
}
|
|
|
|
}
|
|
|
|
|
2011-08-24 15:15:39 +05:30
|
|
|
boost::optional<error_set::ptr>
|
2011-08-23 16:25:37 +05:30
|
|
|
metadata::check()
|
|
|
|
{
|
2011-08-24 15:15:39 +05:30
|
|
|
error_set::ptr errors(new error_set("Errors in metadata"));
|
|
|
|
|
|
|
|
block_counter metadata_counter, data_counter;
|
|
|
|
|
|
|
|
mapping_validator::ptr mv(new mapping_validator(metadata_counter,
|
|
|
|
data_counter));
|
2011-08-23 16:25:37 +05:30
|
|
|
mappings_.visit(mv);
|
2011-08-24 18:57:45 +05:30
|
|
|
set<uint64_t> const &mapped_devs = mv->get_devices();
|
2011-08-23 16:25:37 +05:30
|
|
|
|
2011-08-24 15:15:39 +05:30
|
|
|
details_validator::ptr dv(new details_validator(metadata_counter));
|
2011-08-23 16:25:37 +05:30
|
|
|
details_.visit(dv);
|
2011-08-24 18:57:45 +05:30
|
|
|
set<uint64_t> const &details_devs = dv->get_devices();
|
2011-08-23 16:25:37 +05:30
|
|
|
|
2011-08-24 18:57:45 +05:30
|
|
|
for (set<uint64_t>::const_iterator it = mapped_devs.begin(); it != mapped_devs.end(); ++it)
|
2011-08-23 16:25:37 +05:30
|
|
|
if (details_devs.count(*it) == 0) {
|
|
|
|
ostringstream out;
|
|
|
|
out << "mapping exists for device " << *it
|
|
|
|
<< ", yet there is no entry in the details tree.";
|
|
|
|
throw runtime_error(out.str());
|
|
|
|
}
|
2011-08-24 15:15:39 +05:30
|
|
|
|
|
|
|
data_sm_->check(metadata_counter);
|
2011-08-25 19:35:23 +05:30
|
|
|
errors->add_child(check_ref_counts("Errors in metadata block reference counts",
|
|
|
|
metadata_counter, metadata_sm_));
|
|
|
|
errors->add_child(check_ref_counts("Errors in data block reference counts",
|
|
|
|
data_counter, data_sm_));
|
2011-08-24 15:15:39 +05:30
|
|
|
|
|
|
|
return (errors->get_children().size() > 0) ?
|
|
|
|
optional<error_set::ptr>(errors) :
|
|
|
|
optional<error_set::ptr>();
|
2011-08-23 16:25:37 +05:30
|
|
|
}
|
|
|
|
|
2011-06-23 19:17:08 +05:30
|
|
|
//----------------------------------------------------------------
|