#include "metadata.h" #include "btree_checker.h" #include "core_map.h" #include "math_utils.h" #include "space_map_disk.h" #include #include #include #include #include #include #include #include #include #include using namespace base; using namespace std; using namespace persistent_data; using namespace thin_provisioning; //---------------------------------------------------------------- namespace { uint32_t const SUPERBLOCK_MAGIC = 27022010; block_address const SUPERBLOCK_LOCATION = 0; uint32_t const VERSION = 1; unsigned const METADATA_CACHE_SIZE = 1024; unsigned const SECTOR_TO_BLOCK_SHIFT = 3; block_address get_nr_blocks(string const &path) { struct stat info; block_address nr_blocks; int r = ::stat(path.c_str(), &info); if (r) throw runtime_error("Couldn't stat dev path"); if (S_ISREG(info.st_mode)) nr_blocks = div_down(info.st_size, MD_BLOCK_SIZE); else if (S_ISBLK(info.st_mode)) { // To get the size of a block device we need to // open it, and then make an ioctl call. int fd = ::open(path.c_str(), O_RDONLY); if (fd < 0) throw runtime_error("couldn't open block device to ascertain size"); r = ::ioctl(fd, BLKGETSIZE64, &nr_blocks); if (r) { ::close(fd); throw runtime_error("ioctl BLKGETSIZE64 failed"); } ::close(fd); nr_blocks = div_down(nr_blocks, MD_BLOCK_SIZE); } else throw runtime_error("bad path"); return nr_blocks; } transaction_manager::ptr open_tm(string const &dev_path) { block_address nr_blocks = get_nr_blocks(dev_path); block_manager<>::ptr bm(new block_manager<>(dev_path, nr_blocks, 8)); space_map::ptr sm(new core_map(nr_blocks)); transaction_manager::ptr tm(new transaction_manager(bm, sm)); return tm; } superblock read_superblock(block_manager<>::ptr bm) { superblock sb; block_manager<>::read_ref r = bm->read_lock(SUPERBLOCK_LOCATION); superblock_disk const *sbd = reinterpret_cast(&r.data()); crc32c sum(160774); sum.append(&sbd->flags_, MD_BLOCK_SIZE - sizeof(uint32_t)); if (sum.get_sum() != to_cpu(sbd->csum_)) { ostringstream out; out << "bad checksum in superblock, calculated " << sum.get_sum() << ", superblock contains " << to_cpu(sbd->csum_); throw runtime_error(out.str()); } superblock_traits::unpack(*sbd, sb); return sb; } // As well as the standard btree checks, we build up a set of what // devices having mappings defined, which can later be cross // referenced with the details tree. A separate block_counter is // used to later verify the data space map. class mapping_validator : public btree_checker<2, block_traits> { public: typedef boost::shared_ptr ptr; mapping_validator(block_counter &metadata_counter, block_counter &data_counter) : btree_checker<2, block_traits>(metadata_counter), data_counter_(data_counter) { } // Sharing can only occur in level 1 nodes. // FIXME: not true once we start having held roots. bool visit_internal_leaf(unsigned level, bool sub_root, optional key, btree_detail::node_ref const &n) { bool r = btree_checker<2, block_traits>::visit_internal_leaf(level, sub_root, key, n); if (!r && level == 0) { throw runtime_error("unexpected sharing in level 0 of mapping tree."); } for (unsigned i = 0; i < n.get_nr_entries(); i++) devices_.insert(n.key_at(i)); return r; } bool visit_leaf(unsigned level, bool sub_root, optional key, btree_detail::node_ref const &n) { bool r = btree_checker<2, block_traits>::visit_leaf(level, sub_root, key, n); if (r) for (unsigned i = 0; i < n.get_nr_entries(); i++) data_counter_.inc(n.value_at(i).block_); return r; } set const &get_devices() const { return devices_; } private: block_counter &data_counter_; set devices_; }; class details_validator : public btree_checker<1, device_details_traits> { public: typedef boost::shared_ptr ptr; details_validator(block_counter &counter) : btree_checker<1, device_details_traits>(counter) { } bool visit_leaf(unsigned level, bool sub_root, optional key, btree_detail::node_ref const &n) { bool r = btree_checker<1, device_details_traits>::visit_leaf(level, sub_root, key, n); if (r) for (unsigned i = 0; i < n.get_nr_entries(); i++) devices_.insert(n.key_at(i)); return r; } set const &get_devices() const { return devices_; } private: set devices_; }; } //---------------------------------------------------------------- thin::thin(thin_dev_t dev, metadata *metadata) : dev_(dev), metadata_(metadata) { } thin_dev_t thin::get_dev_t() const { return dev_; } thin::maybe_address thin::lookup(block_address thin_block) { uint64_t key[2] = {dev_, thin_block}; return metadata_->mappings_.lookup(key); } void thin::insert(block_address thin_block, block_address data_block) { uint64_t key[2] = {dev_, thin_block}; block_time bt; bt.block_ = data_block; bt.time_ = 0; // FIXME: use current time. return metadata_->mappings_.insert(key, bt); } void thin::remove(block_address thin_block) { uint64_t key[2] = {dev_, thin_block}; metadata_->mappings_.remove(key); } void thin::set_snapshot_time(uint32_t time) { uint64_t key[1] = { dev_ }; optional mdetail = metadata_->details_.lookup(key); if (!mdetail) throw runtime_error("no such device"); mdetail->snapshotted_time_ = time; metadata_->details_.insert(key, *mdetail); } block_address thin::get_mapped_blocks() const { uint64_t key[1] = { dev_ }; optional mdetail = metadata_->details_.lookup(key); if (!mdetail) throw runtime_error("no such device"); return mdetail->mapped_blocks_; } void thin::set_mapped_blocks(block_address count) { uint64_t key[1] = { dev_ }; optional mdetail = metadata_->details_.lookup(key); if (!mdetail) throw runtime_error("no such device"); mdetail->mapped_blocks_ = count; metadata_->details_.insert(key, *mdetail); } //-------------------------------- metadata::metadata(std::string const &dev_path) : tm_(open_tm(dev_path)), sb_(read_superblock(tm_->get_bm())), metadata_sm_(open_metadata_sm(tm_, static_cast(&sb_.metadata_space_map_root_))), data_sm_(open_disk_sm(tm_, static_cast(&sb_.data_space_map_root_))), details_(tm_, sb_.device_details_root_, device_details_traits::ref_counter()), mappings_top_level_(tm_, sb_.data_mapping_root_, mtree_ref_counter(tm_)), mappings_(tm_, sb_.data_mapping_root_, block_time_ref_counter(data_sm_)) { #if 0 ::memset(&sb_, 0, sizeof(sb_)); sb_.data_mapping_root_ = mappings_.get_root(); sb_.device_details_root_ = details_.get_root(); sb_.metadata_block_size_ = MD_BLOCK_SIZE; sb_.metadata_nr_blocks_ = tm_->get_bm()->get_nr_blocks(); #endif } metadata::~metadata() { } void metadata::commit() { sb_.data_mapping_root_ = mappings_.get_root(); sb_.device_details_root_ = details_.get_root(); write_ref superblock = tm_->get_bm()->superblock(SUPERBLOCK_LOCATION); superblock_disk *disk = reinterpret_cast(superblock.data()); superblock_traits::pack(sb_, *disk); } void metadata::create_thin(thin_dev_t dev) { uint64_t key[1] = {dev}; if (device_exists(dev)) throw std::runtime_error("Device already exists"); single_mapping_tree::ptr new_tree(new single_mapping_tree(tm_, block_time_ref_counter(data_sm_))); mappings_top_level_.insert(key, new_tree->get_root()); mappings_.set_root(mappings_top_level_.get_root()); // FIXME: ugly } void metadata::create_snap(thin_dev_t dev, thin_dev_t origin) { uint64_t snap_key[1] = {dev}; uint64_t origin_key[1] = {origin}; optional mtree_root = mappings_top_level_.lookup(origin_key); if (!mtree_root) throw std::runtime_error("unknown origin"); single_mapping_tree otree(tm_, *mtree_root, block_time_ref_counter(data_sm_)); single_mapping_tree::ptr clone(otree.clone()); mappings_top_level_.insert(snap_key, clone->get_root()); mappings_.set_root(mappings_top_level_.get_root()); // FIXME: ugly sb_.time_++; thin::ptr o = open_thin(origin); thin::ptr s = open_thin(dev); o->set_snapshot_time(sb_.time_); s->set_snapshot_time(sb_.time_); s->set_mapped_blocks(o->get_mapped_blocks()); } void metadata::del(thin_dev_t dev) { uint64_t key[1] = {dev}; mappings_top_level_.remove(key); } void metadata::set_transaction_id(uint64_t id) { sb_.trans_id_ = id; } uint64_t metadata::get_transaction_id() const { return sb_.trans_id_; } block_address metadata::get_held_root() const { return sb_.held_root_; } block_address metadata::alloc_data_block() { return data_sm_->new_block(); } void metadata::free_data_block(block_address b) { data_sm_->dec(b); } block_address metadata::get_nr_free_data_blocks() const { return data_sm_->get_nr_free(); } sector_t metadata::get_data_block_size() const { return sb_.data_block_size_; } block_address metadata::get_data_dev_size() const { return data_sm_->get_nr_blocks(); } thin::ptr metadata::open_thin(thin_dev_t dev) { uint64_t key[1] = {dev}; optional mdetails = details_.lookup(key); if (!mdetails) throw runtime_error("no such device"); thin *ptr = new thin(dev, this); thin::ptr r(ptr); return r; } bool metadata::device_exists(thin_dev_t dev) const { uint64_t key[1] = {dev}; return details_.lookup(key); } namespace { struct check_count : public space_map::iterator { check_count(string const &desc, block_counter const &expected) : bad_(false), expected_(expected), errors_(new error_set(desc)) { } virtual void operator() (block_address b, ref_t actual) { ref_t expected = expected_.get_count(b); if (actual != expected) { ostringstream out; out << b << ": was " << actual << ", expected " << expected; errors_->add_child(out.str()); bad_ = true; } } bool bad_; block_counter const &expected_; error_set::ptr errors_; }; optional check_ref_counts(string const &desc, block_counter const &counts, space_map::ptr sm) { check_count checker(desc, counts); sm->iterate(checker); return checker.bad_ ? optional(checker.errors_) : optional(); } } boost::optional metadata::check() { error_set::ptr errors(new error_set("Errors in metadata")); block_counter metadata_counter, data_counter; mapping_validator::ptr mv(new mapping_validator(metadata_counter, data_counter)); mappings_.visit(mv); set const &mapped_devs = mv->get_devices(); details_validator::ptr dv(new details_validator(metadata_counter)); details_.visit(dv); set const &details_devs = dv->get_devices(); for (set::const_iterator it = mapped_devs.begin(); it != mapped_devs.end(); ++it) if (details_devs.count(*it) == 0) { ostringstream out; out << "mapping exists for device " << *it << ", yet there is no entry in the details tree."; throw runtime_error(out.str()); } metadata_counter.inc(SUPERBLOCK_LOCATION); metadata_sm_->check(metadata_counter); data_sm_->check(metadata_counter); errors->add_child(check_ref_counts("Errors in metadata block reference counts", metadata_counter, metadata_sm_)); errors->add_child(check_ref_counts("Errors in data block reference counts", data_counter, data_sm_)); return (errors->get_children().size() > 0) ? optional(errors) : optional(); } //----------------------------------------------------------------