2011-10-10 14:10:30 +01:00

460 lines
11 KiB
C++

#include "metadata.h"
#include "btree_checker.h"
#include "core_map.h"
#include "math_utils.h"
#include <stdexcept>
#include <sstream>
#include <iostream>
#include <set>
#include <map>
#include <linux/fs.h>
#include <sys/ioctl.h>
#include <sys/types.h>
#include <sys/stat.h>
#include <unistd.h>
using namespace base;
using namespace std;
using namespace persistent_data;
using namespace thin_provisioning;
//----------------------------------------------------------------
namespace {
uint32_t const SUPERBLOCK_MAGIC = 27022010;
block_address const SUPERBLOCK_LOCATION = 0;
uint32_t const VERSION = 1;
unsigned const METADATA_CACHE_SIZE = 1024;
unsigned const SECTOR_TO_BLOCK_SHIFT = 3;
block_address get_nr_blocks(string const &path) {
struct stat info;
block_address nr_blocks;
int r = ::stat(path.c_str(), &info);
if (r)
throw runtime_error("Couldn't stat dev path");
if (S_ISREG(info.st_mode))
nr_blocks = div_down<block_address>(info.st_size, MD_BLOCK_SIZE);
else if (S_ISBLK(info.st_mode)) {
// To get the size of a block device we need to
// open it, and then make an ioctl call.
int fd = ::open(path.c_str(), O_RDONLY);
if (fd < 0)
throw runtime_error("couldn't open block device to ascertain size");
r = ::ioctl(fd, BLKGETSIZE64, &nr_blocks);
if (r) {
::close(fd);
throw runtime_error("ioctl BLKGETSIZE64 failed");
}
::close(fd);
nr_blocks = div_down<block_address>(nr_blocks, MD_BLOCK_SIZE);
} else
throw runtime_error("bad path");
return nr_blocks;
}
transaction_manager::ptr
open_tm(string const &dev_path) {
block_address nr_blocks = get_nr_blocks(dev_path);
block_manager<>::ptr bm(new block_manager<>(dev_path, nr_blocks));
space_map::ptr sm(new core_map(nr_blocks));
transaction_manager::ptr tm(new transaction_manager(bm, sm));
return tm;
}
superblock read_superblock(block_manager<>::ptr bm) {
superblock sb;
block_manager<>::read_ref r = bm->read_lock(SUPERBLOCK_LOCATION);
superblock_disk const *sbd = reinterpret_cast<superblock_disk const *>(&r.data());
crc32c sum(160774);
sum.append(&sbd->flags_, MD_BLOCK_SIZE - sizeof(uint32_t));
if (sum.get_sum() != to_cpu<uint32_t>(sbd->csum_)) {
ostringstream out;
out << "bad checksum in superblock, calculated "
<< sum.get_sum()
<< ", superblock contains " << to_cpu<uint32_t>(sbd->csum_);
throw runtime_error(out.str());
}
superblock_traits::unpack(*sbd, sb);
return sb;
}
// As well as the standard btree checks, we build up a set of what
// devices having mappings defined, which can later be cross
// referenced with the details tree. A separate block_counter is
// used to later verify the data space map.
class mapping_validator : public btree_checker<2, block_traits> {
public:
typedef boost::shared_ptr<mapping_validator> ptr;
mapping_validator(block_counter &metadata_counter, block_counter &data_counter)
: btree_checker<2, block_traits>(metadata_counter),
data_counter_(data_counter) {
}
// Sharing can only occur in level 1 nodes.
// FIXME: not true once we start having held roots.
bool visit_internal_leaf(unsigned level,
bool sub_root,
optional<uint64_t> key,
btree_detail::node_ref<uint64_traits> const &n) {
bool r = btree_checker<2, block_traits>::visit_internal_leaf(level, sub_root, key, n);
if (!r && level == 0) {
throw runtime_error("unexpected sharing in level 0 of mapping tree.");
}
for (unsigned i = 0; i < n.get_nr_entries(); i++)
devices_.insert(n.key_at(i));
return r;
}
bool visit_leaf(unsigned level,
bool sub_root,
optional<uint64_t> key,
btree_detail::node_ref<block_traits> const &n) {
bool r = btree_checker<2, block_traits>::visit_leaf(level, sub_root, key, n);
if (r)
for (unsigned i = 0; i < n.get_nr_entries(); i++)
data_counter_.inc(n.value_at(i).block_);
return r;
}
set<uint64_t> const &get_devices() const {
return devices_;
}
private:
block_counter &data_counter_;
set<uint64_t> devices_;
};
class details_validator : public btree_checker<1, device_details_traits> {
public:
typedef boost::shared_ptr<details_validator> ptr;
details_validator(block_counter &counter)
: btree_checker<1, device_details_traits>(counter) {
}
bool visit_leaf(unsigned level,
bool sub_root,
optional<uint64_t> key,
btree_detail::node_ref<device_details_traits> const &n) {
bool r = btree_checker<1, device_details_traits>::visit_leaf(level, sub_root, key, n);
if (r)
for (unsigned i = 0; i < n.get_nr_entries(); i++)
devices_.insert(n.key_at(i));
return r;
}
set<uint64_t> const &get_devices() const {
return devices_;
}
private:
set<uint64_t> devices_;
};
}
//----------------------------------------------------------------
thin::thin(thin_dev_t dev, metadata *metadata)
: dev_(dev),
metadata_(metadata)
{
}
thin_dev_t
thin::get_dev_t() const
{
return dev_;
}
thin::maybe_address
thin::lookup(block_address thin_block)
{
uint64_t key[2] = {dev_, thin_block};
return metadata_->mappings_.lookup(key);
}
void
thin::insert(block_address thin_block, block_address data_block)
{
uint64_t key[2] = {dev_, thin_block};
block_time bt;
bt.block_ = data_block;
bt.time_ = 0; // FIXME: use current time.
return metadata_->mappings_.insert(key, bt);
}
void
thin::remove(block_address thin_block)
{
uint64_t key[2] = {dev_, thin_block};
metadata_->mappings_.remove(key);
}
void
thin::set_snapshot_time(uint32_t time)
{
uint64_t key[1] = { dev_ };
optional<device_details> mdetail = metadata_->details_.lookup(key);
if (!mdetail)
throw runtime_error("no such device");
mdetail->snapshotted_time_ = time;
metadata_->details_.insert(key, *mdetail);
}
block_address
thin::get_mapped_blocks() const
{
uint64_t key[1] = { dev_ };
optional<device_details> mdetail = metadata_->details_.lookup(key);
if (!mdetail)
throw runtime_error("no such device");
return mdetail->mapped_blocks_;
}
void
thin::set_mapped_blocks(block_address count)
{
uint64_t key[1] = { dev_ };
optional<device_details> mdetail = metadata_->details_.lookup(key);
if (!mdetail)
throw runtime_error("no such device");
mdetail->mapped_blocks_ = count;
metadata_->details_.insert(key, *mdetail);
}
//--------------------------------
metadata::metadata(std::string const &dev_path)
: tm_(open_tm(dev_path)),
sb_(read_superblock(tm_->get_bm())),
metadata_sm_(open_metadata_sm(tm_, static_cast<void *>(&sb_.metadata_space_map_root_))),
data_sm_(open_disk_sm(tm_, static_cast<void *>(&sb_.data_space_map_root_))),
details_(tm_, sb_.device_details_root_, device_details_traits::ref_counter()),
mappings_top_level_(tm_, sb_.data_mapping_root_, mtree_ref_counter(tm_)),
mappings_(tm_, sb_.data_mapping_root_, block_time_ref_counter(data_sm_))
{
#if 0
::memset(&sb_, 0, sizeof(sb_));
sb_.data_mapping_root_ = mappings_.get_root();
sb_.device_details_root_ = details_.get_root();
sb_.metadata_block_size_ = MD_BLOCK_SIZE;
sb_.metadata_nr_blocks_ = tm_->get_bm()->get_nr_blocks();
#endif
}
metadata::~metadata()
{
}
void
metadata::commit()
{
sb_.data_mapping_root_ = mappings_.get_root();
sb_.device_details_root_ = details_.get_root();
write_ref superblock = tm_->get_bm()->superblock(SUPERBLOCK_LOCATION);
superblock_disk *disk = reinterpret_cast<superblock_disk *>(superblock.data());
superblock_traits::pack(sb_, *disk);
}
void
metadata::create_thin(thin_dev_t dev)
{
uint64_t key[1] = {dev};
if (device_exists(dev))
throw std::runtime_error("Device already exists");
single_mapping_tree::ptr new_tree(new single_mapping_tree(tm_, block_time_ref_counter(data_sm_)));
mappings_top_level_.insert(key, new_tree->get_root());
mappings_.set_root(mappings_top_level_.get_root()); // FIXME: ugly
}
void
metadata::create_snap(thin_dev_t dev, thin_dev_t origin)
{
uint64_t snap_key[1] = {dev};
uint64_t origin_key[1] = {origin};
optional<uint64_t> mtree_root = mappings_top_level_.lookup(origin_key);
if (!mtree_root)
throw std::runtime_error("unknown origin");
single_mapping_tree otree(tm_, *mtree_root,
block_time_ref_counter(data_sm_));
single_mapping_tree::ptr clone(otree.clone());
mappings_top_level_.insert(snap_key, clone->get_root());
mappings_.set_root(mappings_top_level_.get_root()); // FIXME: ugly
sb_.time_++;
thin::ptr o = open_thin(origin);
thin::ptr s = open_thin(dev);
o->set_snapshot_time(sb_.time_);
s->set_snapshot_time(sb_.time_);
s->set_mapped_blocks(o->get_mapped_blocks());
}
void
metadata::del(thin_dev_t dev)
{
uint64_t key[1] = {dev};
mappings_top_level_.remove(key);
}
void
metadata::set_transaction_id(uint64_t id)
{
sb_.trans_id_ = id;
}
uint64_t
metadata::get_transaction_id() const
{
return sb_.trans_id_;
}
block_address
metadata::get_held_root() const
{
return sb_.held_root_;
}
block_address
metadata::alloc_data_block()
{
return data_sm_->new_block();
}
void
metadata::free_data_block(block_address b)
{
data_sm_->dec(b);
}
block_address
metadata::get_nr_free_data_blocks() const
{
return data_sm_->get_nr_free();
}
sector_t
metadata::get_data_block_size() const
{
return sb_.data_block_size_;
}
block_address
metadata::get_data_dev_size() const
{
return data_sm_->get_nr_blocks();
}
thin::ptr
metadata::open_thin(thin_dev_t dev)
{
uint64_t key[1] = {dev};
optional<device_details> mdetails = details_.lookup(key);
if (!mdetails)
throw runtime_error("no such device");
thin *ptr = new thin(dev, this);
thin::ptr r(ptr);
return r;
}
bool
metadata::device_exists(thin_dev_t dev) const
{
uint64_t key[1] = {dev};
return details_.lookup(key);
}
namespace {
optional<error_set::ptr>
check_ref_counts(string const &desc, block_counter const &counts,
space_map::ptr sm) {
error_set::ptr errors(new error_set(desc));
bool bad = false;
for (block_address b = 0; b < sm->get_nr_blocks(); b++) {
uint32_t actual = sm->get_count(b);
uint32_t expected = counts.get_count(b);
if (actual != expected) {
ostringstream out;
out << b << ": was " << actual
<< ", expected " << expected;
errors->add_child(out.str());
bad = true;
}
}
return bad ? optional<error_set::ptr>(errors) : optional<error_set::ptr>();
}
}
boost::optional<error_set::ptr>
metadata::check()
{
error_set::ptr errors(new error_set("Errors in metadata"));
block_counter metadata_counter, data_counter;
mapping_validator::ptr mv(new mapping_validator(metadata_counter,
data_counter));
mappings_.visit(mv);
set<uint64_t> const &mapped_devs = mv->get_devices();
details_validator::ptr dv(new details_validator(metadata_counter));
details_.visit(dv);
set<uint64_t> const &details_devs = dv->get_devices();
for (set<uint64_t>::const_iterator it = mapped_devs.begin(); it != mapped_devs.end(); ++it)
if (details_devs.count(*it) == 0) {
ostringstream out;
out << "mapping exists for device " << *it
<< ", yet there is no entry in the details tree.";
throw runtime_error(out.str());
}
metadata_counter.inc(SUPERBLOCK_LOCATION);
metadata_sm_->check(metadata_counter);
data_sm_->check(metadata_counter);
errors->add_child(check_ref_counts("Errors in metadata block reference counts",
metadata_counter, metadata_sm_));
errors->add_child(check_ref_counts("Errors in data block reference counts",
data_counter, data_sm_));
return (errors->get_children().size() > 0) ?
optional<error_set::ptr>(errors) :
optional<error_set::ptr>();
}
//----------------------------------------------------------------