diff --git a/.gitignore b/.gitignore index 4f9a738..226f487 100644 --- a/.gitignore +++ b/.gitignore @@ -2,4 +2,6 @@ *.o *_t *.d -test.data \ No newline at end of file +test.data +thin_dump +thin_repair diff --git a/Makefile b/Makefile index df452f5..7e3736b 100644 --- a/Makefile +++ b/Makefile @@ -1,9 +1,8 @@ SOURCE=\ endian_utils.cc \ + metadata.cc \ metadata_disk_structures.cc -# metadata.cc \ - TEST_SOURCE=\ unit-tests/block_t.cc \ unit-tests/btree_t.cc \ @@ -12,8 +11,6 @@ TEST_SOURCE=\ unit-tests/space_map_disk_t.cc \ unit-tests/transaction_manager_t.cc \ -# unit-tests/metadata_t.cc \ - OBJECTS=$(subst .cc,.o,$(SOURCE)) TEST_PROGRAMS=$(subst .cc,,$(TEST_SOURCE)) TOP_DIR:=$(PWD) @@ -40,6 +37,12 @@ unit-test: $(TEST_PROGRAMS) multisnap_display: $(OBJECTS) main.o g++ $(CPPFLAGS) -o $@ $+ $(LIBS) +thin_dump: $(OBJECTS) thin_dump.o + g++ $(CPPFLAGS) -o $@ $+ $(LIBS) + +thin_repair: $(OBJECTS) thin_repair.o + g++ $(CPPFLAGS) -o $@ $+ $(LIBS) + unit-tests/block_t: unit-tests/block_t.o g++ $(CPPFLAGS) -o $@ $+ $(LIBS) diff --git a/btree.h b/btree.h index 02f3b86..55c1876 100644 --- a/btree.h +++ b/btree.h @@ -52,6 +52,8 @@ namespace persistent_data { __le32 nr_entries; __le32 max_entries; + __le32 value_size; + __le32 padding; } __attribute__((packed)); struct disk_node { @@ -76,6 +78,8 @@ namespace persistent_data { return location_; } + block_address get_block_nr() const; + node_type get_type() const; void set_type(node_type t); @@ -88,6 +92,8 @@ namespace persistent_data { // FIXME: remove this, and get the constructor to do it. void set_max_entries(); // calculates the max for you. + size_t get_value_size() const; + uint64_t key_at(unsigned i) const; void set_key(unsigned i, uint64_t k); diff --git a/btree.tcc b/btree.tcc index fce7eb1..ec689e0 100644 --- a/btree.tcc +++ b/btree.tcc @@ -16,6 +16,13 @@ node_ref::node_ref(block_address location, disk_node *ra { } +template +block_address +node_ref::get_block_nr() const +{ + return to_cpu(raw_->header.blocknr); +} + template btree_detail::node_type node_ref::get_type() const @@ -81,6 +88,13 @@ node_ref::set_max_entries() set_max_entries(calc_max_entries()); } +template +size_t +node_ref::get_value_size() const +{ + return to_cpu(raw_->header.value_size); +} + template uint64_t node_ref::key_at(unsigned i) const @@ -610,6 +624,7 @@ walk_tree(typename visitor::ptr visitor, unsigned level, block_address b) { using namespace btree_detail; + auto blk = tm_->read_lock(b); auto o = to_node(blk); if (o.get_type() == INTERNAL) { diff --git a/metadata.cc b/metadata.cc index 1808c1b..24da786 100644 --- a/metadata.cc +++ b/metadata.cc @@ -1,7 +1,12 @@ #include "metadata.h" -#include +#include "core_map.h" +#include +#include +#include + +using namespace std; using namespace persistent_data; using namespace thin_provisioning; @@ -13,6 +18,164 @@ namespace { uint32_t const VERSION = 1; unsigned const METADATA_CACHE_SIZE = 1024; unsigned const SECTOR_TO_BLOCK_SHIFT = 3; + + // FIXME: get the file size + unsigned const NR_BLOCKS = 1024; + + transaction_manager<4096>::ptr + open_tm(string const &dev_path) { + block_manager<4096>::ptr bm(new block_manager<4096>(dev_path, NR_BLOCKS)); + space_map::ptr sm(new core_map(NR_BLOCKS)); + transaction_manager<4096>::ptr tm(new transaction_manager<4096>(bm, sm)); + return tm; + } + + superblock read_superblock(block_manager<4096>::ptr bm) { + superblock sb; + auto r = bm->read_lock(SUPERBLOCK_LOCATION); + superblock_disk const *sbd = reinterpret_cast(&r.data()); + superblock_traits::unpack(*sbd, sb); + return sb; + } + + //---------------------------------------------------------------- + // This class implements consistency checking for the + // btrees in general. It's worth summarising what is checked: + // + // Implemented + // ----------- + // + // - No block appears in the tree more than once. + // - block_nr + // - nr_entries < max_entries + // - max_entries fits in block + // - max_entries is divisible by 3 + // + // Not implemented + // --------------- + // + // - checksum + // - leaf | internal flags (this can be inferred from siblings) + // - nr_entries > minimum + //---------------------------------------------------------------- + template + class btree_validator : public btree::visitor { + public: + void visit_internal(unsigned level, btree_detail::node_ref const &n) { + check_duplicate_block(n.get_location()); + check_block_nr(n); + check_max_entries(n); + check_nr_entries(n); + } + + void visit_internal_leaf(unsigned level, btree_detail::node_ref const &n) { + check_duplicate_block(n.get_location()); + check_block_nr(n); + check_max_entries(n); + check_nr_entries(n); + } + + void visit_leaf(unsigned level, btree_detail::node_ref const &n) { + check_duplicate_block(n.get_location()); + check_block_nr(n); + check_max_entries(n); + check_nr_entries(n); + } + + private: + void check_duplicate_block(block_address b) { + if (seen_.count(b)) { + ostringstream out; + out << "duplicate block in btree: " << b; + throw runtime_error(out.str()); + } + + seen_.insert(b); + } + + template + void check_block_nr(node const &n) const { + if (n.get_location() != n.get_block_nr()) { + ostringstream out; + out << "block number mismatch: actually " + << n.get_location() + << ", claims " << n.get_block_nr(); + throw runtime_error(out.str()); + } + } + + template + void check_max_entries(node const &n) const { + size_t elt_size = sizeof(uint64_t) + n.get_value_size(); + if (elt_size * n.get_max_entries() + sizeof(node_header) > BlockSize) { + ostringstream out; + out << "max entries too large: " << n.get_max_entries(); + throw runtime_error(out.str()); + } + + if (n.get_max_entries() % 3) { + ostringstream out; + out << "max entries is not divisible by 3: " << n.get_max_entries(); + throw runtime_error(out.str()); + } + } + + template + void check_nr_entries(node const &n) const { + if (n.get_nr_entries() > n.get_max_entries()) { + ostringstream out; + out << "bad nr_entries: " + << n.get_nr_entries() << " < " + << n.get_max_entries(); + throw runtime_error(out.str()); + } + } + + set seen_; + }; + + // As well as the standard btree checks, we build up a set of what + // devices having mappings defined, which can later be cross + // referenced with the details tree. + class mapping_validator : public btree_validator<2, block_traits, MD_BLOCK_SIZE> { + public: + typedef boost::shared_ptr ptr; + + void visit_internal_leaf(unsigned level, + btree_detail::node_ref const &n) { + btree_validator<2, block_traits, MD_BLOCK_SIZE>::visit_internal_leaf(level, n); + + for (unsigned i = 0; i < n.get_nr_entries(); i++) + devices_.insert(n.key_at(i)); + } + + set get_devices() const { + return devices_; + } + + private: + set devices_; + }; + + class details_validator : public btree_validator<1, device_details_traits, MD_BLOCK_SIZE> { + public: + typedef boost::shared_ptr ptr; + + void visit_leaf(unsigned level, + btree_detail::node_ref const &n) { + btree_validator<1, device_details_traits, MD_BLOCK_SIZE>::visit_leaf(level, n); + + for (unsigned i = 0; i < n.get_nr_entries(); i++) + devices_.insert(n.key_at(i)); + } + + set get_devices() const { + return devices_; + } + + private: + set devices_; + }; } //---------------------------------------------------------------- @@ -87,22 +250,20 @@ thin::set_mapped_blocks(block_address count) //-------------------------------- -metadata::metadata(transaction_manager::ptr tm, - block_address superblock, - sector_t data_block_size, - block_address nr_data_blocks, - bool create) - : superblock_(superblock), - tm_(tm), - details_(tm, typename device_details_traits::ref_counter()), - mappings_top_level_(tm, mtree_ref_counter(tm)), - mappings_(tm, space_map_ref_counter(data_sm_)) +metadata::metadata(std::string const &dev_path) + : tm_(open_tm(dev_path)), + sb_(read_superblock(tm_->get_bm())), + details_(tm_, sb_.device_details_root_, typename device_details_traits::ref_counter()), + mappings_top_level_(tm_, sb_.data_mapping_root_, mtree_ref_counter(tm_)), + mappings_(tm_, sb_.data_mapping_root_, space_map_ref_counter(data_sm_)) { +#if 0 ::memset(&sb_, 0, sizeof(sb_)); sb_.data_mapping_root_ = mappings_.get_root(); sb_.device_details_root_ = details_.get_root(); sb_.metadata_block_size_ = MD_BLOCK_SIZE; - sb_.metadata_nr_blocks_ = tm->get_bm()->get_nr_blocks(); + sb_.metadata_nr_blocks_ = tm_->get_bm()->get_nr_blocks(); +#endif } metadata::~metadata() @@ -116,7 +277,7 @@ metadata::commit() sb_.data_mapping_root_ = mappings_.get_root(); sb_.device_details_root_ = details_.get_root(); - auto superblock = tm_->get_bm()->superblock(superblock_); + auto superblock = tm_->get_bm()->superblock(SUPERBLOCK_LOCATION); auto disk = reinterpret_cast(superblock.data()); superblock_traits::pack(sb_, *disk); } @@ -235,4 +396,24 @@ metadata::device_exists(thin_dev_t dev) const return details_.lookup(key); } +void +metadata::check() +{ + mapping_validator::ptr mv(new mapping_validator); + mappings_.visit(mv); + auto mapped_devs = mv->get_devices(); + + details_validator::ptr dv(new details_validator); + details_.visit(dv); + auto details_devs = dv->get_devices(); + + for (auto it = mapped_devs.begin(); it != mapped_devs.end(); ++it) + if (details_devs.count(*it) == 0) { + ostringstream out; + out << "mapping exists for device " << *it + << ", yet there is no entry in the details tree."; + throw runtime_error(out.str()); + } +} + //---------------------------------------------------------------- diff --git a/metadata.h b/metadata.h index c46a362..064d1a4 100644 --- a/metadata.h +++ b/metadata.h @@ -143,13 +143,14 @@ namespace thin_provisioning { thin::ptr open_thin(thin_dev_t); + // Validation and repair + void check(); + private: friend class thin; bool device_exists(thin_dev_t dev) const; - block_address superblock_; - typedef persistent_data::transaction_manager::ptr tm_ptr; typedef persistent_data::btree<1, device_details_traits, MD_BLOCK_SIZE> detail_tree; @@ -157,7 +158,9 @@ namespace thin_provisioning { typedef persistent_data::btree<2, block_traits, MD_BLOCK_SIZE> mapping_tree; typedef persistent_data::btree<1, block_traits, MD_BLOCK_SIZE> single_mapping_tree; + // Declaration order is important here tm_ptr tm_; + superblock sb_; // Ignoring the metadata sm for now, since we don't need it for the basic 'dump' tool // space_map::ptr metadata_sm_; @@ -165,7 +168,6 @@ namespace thin_provisioning { detail_tree details_; dev_tree mappings_top_level_; mapping_tree mappings_; - superblock sb_; }; }; diff --git a/thin_dump.cc b/thin_dump.cc new file mode 100644 index 0000000..96df939 --- /dev/null +++ b/thin_dump.cc @@ -0,0 +1,31 @@ +#include + +#include "metadata.h" + +using namespace persistent_data; +using namespace std; +using namespace thin_provisioning; + +namespace { + void dump(string const &path) { + metadata md(path); + + md.check(); + } + + void usage(string const &cmd) { + cerr << "Usage: " << cmd << " " << endl; + } +} + +int main(int argc, char **argv) +{ + if (argc != 2) { + usage(argv[0]); + exit(1); + } + + dump(argv[1]); + + return 0; +} diff --git a/thin_repair.cc b/thin_repair.cc new file mode 100644 index 0000000..c5504ee --- /dev/null +++ b/thin_repair.cc @@ -0,0 +1,31 @@ +#include + +#include "metadata.h" + +using namespace persistent_data; +using namespace std; +using namespace thin_provisioning; + +namespace { + void check(string const &path) { + metadata md(path); + + md.check(); + } + + void usage(string const &cmd) { + cerr << "Usage: " << cmd << " " << endl; + } +} + +int main(int argc, char **argv) +{ + if (argc != 2) { + usage(argv[0]); + exit(1); + } + + check(argv[1]); + + return 0; +} diff --git a/unit-tests/metadata_t.cc b/unit-tests/metadata_t.cc index 8ae9f25..7882194 100644 --- a/unit-tests/metadata_t.cc +++ b/unit-tests/metadata_t.cc @@ -15,14 +15,6 @@ namespace { block_address const NR_BLOCKS = 1024; block_address const SUPERBLOCK = 0; - transaction_manager<4096>::ptr - create_tm() { - block_manager<4096>::ptr bm(new block_manager<4096>("./test.data", NR_BLOCKS)); - space_map::ptr sm(new core_map(NR_BLOCKS)); - transaction_manager<4096>::ptr tm(new transaction_manager<4096>(bm, sm)); - return tm; - } - metadata::ptr create_metadata() { auto tm = create_tm();