diff --git a/CHANGES b/CHANGES index 491c24a..3282b51 100644 --- a/CHANGES +++ b/CHANGES @@ -1,3 +1,11 @@ +v0.9.0 +====== + +- New support tools: thin_metadata_{pack,unpack}. +- thin_check now checks data block reference counts. +- thin_check can now check metadata snapshots. +- some metadata space map bug fixes. + v0.7.5 ====== diff --git a/Cargo.lock b/Cargo.lock index 0ca6d78..c1497ca 100644 --- a/Cargo.lock +++ b/Cargo.lock @@ -21,6 +21,11 @@ dependencies = [ "winapi 0.3.8 (registry+https://github.com/rust-lang/crates.io-index)", ] +[[package]] +name = "anyhow" +version = "1.0.31" +source = "registry+https://github.com/rust-lang/crates.io-index" + [[package]] name = "arrayvec" version = "0.4.12" @@ -380,6 +385,7 @@ dependencies = [ name = "thinp" version = "0.1.0" dependencies = [ + "anyhow 1.0.31 (registry+https://github.com/rust-lang/crates.io-index)", "byteorder 1.3.4 (registry+https://github.com/rust-lang/crates.io-index)", "clap 2.33.1 (registry+https://github.com/rust-lang/crates.io-index)", "crc32c 0.4.0 (registry+https://github.com/rust-lang/crates.io-index)", @@ -393,6 +399,25 @@ dependencies = [ "quickcheck 0.9.2 (registry+https://github.com/rust-lang/crates.io-index)", "quickcheck_macros 0.9.1 (registry+https://github.com/rust-lang/crates.io-index)", "rand 0.7.3 (registry+https://github.com/rust-lang/crates.io-index)", + "thiserror 1.0.20 (registry+https://github.com/rust-lang/crates.io-index)", +] + +[[package]] +name = "thiserror" +version = "1.0.20" +source = "registry+https://github.com/rust-lang/crates.io-index" +dependencies = [ + "thiserror-impl 1.0.20 (registry+https://github.com/rust-lang/crates.io-index)", +] + +[[package]] +name = "thiserror-impl" +version = "1.0.20" +source = "registry+https://github.com/rust-lang/crates.io-index" +dependencies = [ + "proc-macro2 1.0.18 (registry+https://github.com/rust-lang/crates.io-index)", + "quote 1.0.6 (registry+https://github.com/rust-lang/crates.io-index)", + "syn 1.0.30 (registry+https://github.com/rust-lang/crates.io-index)", ] [[package]] @@ -456,6 +481,7 @@ source = "registry+https://github.com/rust-lang/crates.io-index" "checksum adler32 1.0.4 (registry+https://github.com/rust-lang/crates.io-index)" = "5d2e7343e7fc9de883d1b0341e0b13970f764c14101234857d2ddafa1cb1cac2" "checksum aho-corasick 0.7.10 (registry+https://github.com/rust-lang/crates.io-index)" = "8716408b8bc624ed7f65d223ddb9ac2d044c0547b6fa4b0d554f3a9540496ada" "checksum ansi_term 0.11.0 (registry+https://github.com/rust-lang/crates.io-index)" = "ee49baf6cb617b853aa8d93bf420db2383fab46d314482ca2803b40d5fde979b" +"checksum anyhow 1.0.31 (registry+https://github.com/rust-lang/crates.io-index)" = "85bb70cc08ec97ca5450e6eba421deeea5f172c0fc61f78b5357b2a8e8be195f" "checksum arrayvec 0.4.12 (registry+https://github.com/rust-lang/crates.io-index)" = "cd9fd44efafa8690358b7408d253adf110036b88f55672a933f01d616ad9b1b9" "checksum atty 0.2.14 (registry+https://github.com/rust-lang/crates.io-index)" = "d9b39be18770d11421cdb1b9947a45dd3f37e93092cbf377614828a319d5fee8" "checksum autocfg 1.0.0 (registry+https://github.com/rust-lang/crates.io-index)" = "f8aac770f1885fd7e387acedd76065302551364496e46b3dd00860b2f8359b9d" @@ -501,6 +527,8 @@ source = "registry+https://github.com/rust-lang/crates.io-index" "checksum strsim 0.8.0 (registry+https://github.com/rust-lang/crates.io-index)" = "8ea5119cdb4c55b55d432abb513a0429384878c15dde60cc77b1c99de1a95a6a" "checksum syn 1.0.30 (registry+https://github.com/rust-lang/crates.io-index)" = "93a56fabc59dce20fe48b6c832cc249c713e7ed88fa28b0ee0a3bfcaae5fe4e2" "checksum textwrap 0.11.0 (registry+https://github.com/rust-lang/crates.io-index)" = "d326610f408c7a4eb6f51c37c330e496b08506c9457c9d34287ecc38809fb060" +"checksum thiserror 1.0.20 (registry+https://github.com/rust-lang/crates.io-index)" = "7dfdd070ccd8ccb78f4ad66bf1982dc37f620ef696c6b5028fe2ed83dd3d0d08" +"checksum thiserror-impl 1.0.20 (registry+https://github.com/rust-lang/crates.io-index)" = "bd80fc12f73063ac132ac92aceea36734f04a1d93c1240c6944e23a3b8841793" "checksum thread_local 1.0.1 (registry+https://github.com/rust-lang/crates.io-index)" = "d40c6d1b69745a6ec6fb1ca717914848da4b44ae29d9b3080cbee91d72a69b14" "checksum unicode-width 0.1.7 (registry+https://github.com/rust-lang/crates.io-index)" = "caaa9d531767d1ff2150b9332433f32a24622147e5ebb1f26409d5da67afd479" "checksum unicode-xid 0.2.0 (registry+https://github.com/rust-lang/crates.io-index)" = "826e7639553986605ec5979c7dd957c7895e93eabed50ab2ffa7f6128a75097c" diff --git a/Cargo.toml b/Cargo.toml index e1e1c7c..fc98546 100644 --- a/Cargo.toml +++ b/Cargo.toml @@ -6,6 +6,7 @@ edition = "2018" license = "GPL3" [dependencies] +anyhow = "1.0" byteorder = "1.3" clap = "2.33" crc32c = "0.4" @@ -14,9 +15,10 @@ libc = "0.2.71" nix = "0.17" nom = "5.1" num_cpus = "1.13" -rand = "0.7" -num-traits = "0.2" num-derive = "0.3" +num-traits = "0.2" +rand = "0.7" +thiserror = "1.0" [dev-dependencies] quickcheck = "0.9" diff --git a/Makefile.in b/Makefile.in index 8ba7622..97b86c4 100644 --- a/Makefile.in +++ b/Makefile.in @@ -40,6 +40,7 @@ SOURCE=\ base/error_state.cc \ base/error_string.cc \ base/grid_layout.cc \ + base/io_generator.cc \ base/file_utils.cc \ base/progress_monitor.cc \ base/rolling_hash.cc \ @@ -128,6 +129,7 @@ DEVTOOLS_SOURCE=\ thin-provisioning/thin_ll_restore.cc \ thin-provisioning/thin_show_duplicates.cc \ thin-provisioning/thin_generate_metadata.cc \ + thin-provisioning/thin_generate_mappings.cc \ thin-provisioning/variable_chunk_stream.cc \ thin-provisioning/thin_show_metadata.cc \ thin-provisioning/thin_scan.cc \ @@ -167,7 +169,7 @@ CXXFLAGS+=@CXXDEBUG_FLAG@ CXXFLAGS+=@CXX_STRERROR_FLAG@ CXXFLAGS+=@LFS_FLAGS@ INCLUDES+=-I$(TOP_BUILDDIR) -I$(TOP_DIR) -I$(TOP_DIR)/thin-provisioning -LIBS:=-laio -lexpat -lz -lboost_iostreams -ldl +LIBS:=-laio -lexpat -lboost_iostreams -ldl ifeq ("@DEVTOOLS@", "yes") LIBS+=-lncurses @@ -318,7 +320,6 @@ install: bin/pdata_tools $(MANPAGES) $(INSTALL_DATA) man8/thin_repair.8 $(MANPATH)/man8 $(INSTALL_DATA) man8/thin_restore.8 $(MANPATH)/man8 $(INSTALL_DATA) man8/thin_rmap.8 $(MANPATH)/man8 - $(INSTALL_DATA) man8/thin_metadata_size.8 $(MANPATH)/man8 $(INSTALL_DATA) man8/era_check.8 $(MANPATH)/man8 $(INSTALL_DATA) man8/era_dump.8 $(MANPATH)/man8 @@ -333,10 +334,16 @@ ifeq ("@DEVTOOLS@", "yes") ln -s -f pdata_tools $(BINDIR)/thin_scan endif -.PHONY: install install-rust-tools +.PHONY: install install-rust-tools rust-tools -install-rust-tools: - cargo install --path . --root $(BINDIR) +rust-tools: + cargo build --release + +install-rust-tools: man8/thin_metadata_pack.8 man8/thin_metadata_unpack.8 rust-tools + $(INSTALL_PROGRAM) target/release/thin_metadata_pack $(BINDIR) + $(INSTALL_PROGRAM) target/release/thin_metadata_unpack $(BINDIR) + $(STRIP) $(BINDIR)/thin_metadata_pack + $(STRIP) $(BINDIR)/thin_metadata_unpack $(INSTALL_DATA) man8/thin_metadata_pack.8 $(MANPATH)/man8 $(INSTALL_DATA) man8/thin_metadata_unpack.8 $(MANPATH)/man8 diff --git a/VERSION b/VERSION index 7ada0d3..262b51c 100644 --- a/VERSION +++ b/VERSION @@ -1 +1 @@ -0.8.5 +0.9.0-rc1 diff --git a/base/io.h b/base/io.h new file mode 100644 index 0000000..75f2d3a --- /dev/null +++ b/base/io.h @@ -0,0 +1,25 @@ +#ifndef BASE_IO_H +#define BASE_IO_H + +#include "base/types.h" +#include + +//---------------------------------------------------------------- + +namespace base { + enum req_op { + REQ_OP_READ, + REQ_OP_WRITE, + REQ_OP_DISCARD + }; + + struct io { + unsigned op_; + sector_t sector_; + sector_t size_; + }; +} + +//---------------------------------------------------------------- + +#endif diff --git a/base/io_generator.cc b/base/io_generator.cc new file mode 100644 index 0000000..0543d31 --- /dev/null +++ b/base/io_generator.cc @@ -0,0 +1,240 @@ +#include "base/io_generator.h" +#include +#include +#include + +using namespace base; + +//---------------------------------------------------------------- + +namespace { + std::pair patterns[] = { + {"read", io_pattern::READ}, + {"write", io_pattern::WRITE}, + {"trim", io_pattern::TRIM}, + {"readwrite", io_pattern::READ_WRITE}, + {"trimwrite", io_pattern::TRIM_WRITE}, + {"randread", io_pattern::RAND_READ}, + {"randwrite", io_pattern::RAND_WRITE}, + {"randtrim", io_pattern::RAND_TRIM}, + {"randrw", io_pattern::RAND_RW}, + {"randtw", io_pattern::RAND_TW} + }; + + unsigned const nr_patterns = sizeof(patterns) / sizeof(patterns[0]); + + //-------------------------------- + + class offset_generator { + public: + typedef std::shared_ptr ptr; + + virtual base::sector_t next_offset() = 0; + }; + + class sequential_offset_generator: public offset_generator { + public: + sequential_offset_generator(base::sector_t offset, + base::sector_t size, + base::sector_t block_size) + : block_size_(block_size), + begin_(offset), + end_(offset + size), + current_(offset) { + if (size < block_size) + throw std::runtime_error("size must be greater than block_size"); + } + + base::sector_t next_offset() { + sector_t r = current_; + current_ += block_size_; + if (current_ > end_) + current_ = begin_; + return r; + } + + private: + unsigned block_size_; + base::sector_t begin_; + base::sector_t end_; + base::sector_t current_; + }; + + class random_offset_generator: public offset_generator { + public: + random_offset_generator(sector_t offset, + sector_t size, + sector_t block_size) + : block_begin_(offset / block_size), + nr_blocks_(size / block_size), + block_size_(block_size) { + } + + sector_t next_offset() { + return ((std::rand() % nr_blocks_) + block_begin_) * block_size_; + } + + private: + uint64_t block_begin_; + uint64_t nr_blocks_; + unsigned block_size_; + }; + + //-------------------------------- + + class op_generator { + public: + typedef std::shared_ptr ptr; + + op_generator(base::req_op op1) + : op1_(op1), op2_(op1), op1_pct_(100) { + } + + op_generator(base::req_op op1, + base::req_op op2, + unsigned op1_pct) + : op1_(op1), op2_(op2), op1_pct_(op1_pct) { + if (op1_pct > 100) + throw std::runtime_error("invalid percentage"); + } + + base::req_op next_op() { + if (static_cast(std::rand()) % 100 > op1_pct_) + return op2_; + return op1_; + } + + private: + base::req_op op1_; + base::req_op op2_; + unsigned op1_pct_; + }; + + //-------------------------------- + + class base_io_generator: public io_generator { + public: + base_io_generator(io_generator_options const &opts); + virtual bool has_next(); + virtual void next(base::io &next_io); + + private: + offset_generator::ptr + create_offset_generator(io_generator_options const &opts); + + op_generator::ptr + create_op_generator(io_generator_options const &opts); + + offset_generator::ptr offset_gen_; + op_generator::ptr op_gen_; + sector_t block_size_; + size_t io_size_finished_; + size_t io_size_total_; + }; + + base_io_generator::base_io_generator(io_generator_options const &opts) + : offset_gen_(create_offset_generator(opts)), + op_gen_(create_op_generator(opts)), + block_size_(opts.block_size_), + io_size_finished_(0), + io_size_total_(opts.io_size_) { + } + + bool base_io_generator::has_next() { + return io_size_finished_ < io_size_total_; + } + + void base_io_generator::next(base::io &next_io) { + if (io_size_finished_ >= io_size_total_) + throw std::runtime_error(""); + + next_io.op_ = op_gen_->next_op(); + next_io.sector_ = offset_gen_->next_offset(); + next_io.size_ = block_size_; + + io_size_finished_ += block_size_; + } + + offset_generator::ptr + base_io_generator::create_offset_generator(io_generator_options const &opts) { + if (opts.pattern_.is_random()) + return offset_generator::ptr( + new random_offset_generator(opts.offset_, + opts.size_, + opts.block_size_)); + + return offset_generator::ptr( + new sequential_offset_generator(opts.offset_, + opts.size_, + opts.block_size_)); + } + + op_generator::ptr + base_io_generator::create_op_generator(io_generator_options const &opts) { + // FIXME: elimiate the switch-case and hide enum values + switch (opts.pattern_.val_) { + case io_pattern::READ: + case io_pattern::RAND_READ: + return op_generator::ptr(new op_generator(base::REQ_OP_READ)); + case io_pattern::WRITE: + case io_pattern::RAND_WRITE: + return op_generator::ptr(new op_generator(base::REQ_OP_WRITE)); + case io_pattern::TRIM: + case io_pattern::RAND_TRIM: + return op_generator::ptr(new op_generator(base::REQ_OP_DISCARD)); + case io_pattern::READ_WRITE: + case io_pattern::RAND_RW: + return op_generator::ptr(new op_generator(base::REQ_OP_READ, + base::REQ_OP_WRITE, + 50)); + case io_pattern::TRIM_WRITE: + case io_pattern::RAND_TW: + return op_generator::ptr(new op_generator(base::REQ_OP_DISCARD, + base::REQ_OP_WRITE, + 50)); + default: + throw std::runtime_error("unknown pattern"); + } + } +} + +//---------------------------------------------------------------- + +io_pattern::io_pattern() + : val_(pattern::READ) { +} + +io_pattern::io_pattern(char const *pattern) { + parse(pattern); +} + +void +io_pattern::parse(char const *pattern) { + bool found = false; + unsigned i = 0; + for (i = 0; i < nr_patterns; i++) { + if (!strcmp(patterns[i].first, pattern)) { + found = true; + break; + } + } + + if (!found) + throw std::runtime_error("unknow pattern"); + + val_ = patterns[i].second; +} + +bool +io_pattern::is_random() const { + return val_ & pattern::RANDOM; +} + +//---------------------------------------------------------------- + +io_generator::ptr +base::create_io_generator(io_generator_options const &opts) { + return io_generator::ptr(new base_io_generator(opts)); +} + +//---------------------------------------------------------------- diff --git a/base/io_generator.h b/base/io_generator.h new file mode 100644 index 0000000..4a33550 --- /dev/null +++ b/base/io_generator.h @@ -0,0 +1,55 @@ +#ifndef BASE_IO_GENERATOR_H +#define BASE_IO_GENERATOR_H + +#include "base/io.h" +#include + +//---------------------------------------------------------------- + +namespace base { + struct io_pattern { + enum pattern { + READ = 1 << 1, + WRITE = 1 << 2, + TRIM = 1 << 3, + RANDOM = 1 << 8, + READ_WRITE = READ | WRITE, + TRIM_WRITE = WRITE | TRIM, + RAND_READ = READ | RANDOM, + RAND_WRITE = WRITE | RANDOM, + RAND_TRIM = TRIM | RANDOM, + RAND_RW = READ_WRITE | RANDOM, + RAND_TW = TRIM_WRITE | RANDOM, + }; + + io_pattern(); + io_pattern(char const *pattern); + void parse(char const *pattern); + bool is_random() const; + + pattern val_; + }; + + struct io_generator_options { + io_pattern pattern_; + sector_t offset_; + sector_t block_size_; + sector_t size_; + sector_t io_size_; + }; + + class io_generator { + public: + typedef std::shared_ptr ptr; + + virtual bool has_next() = 0; + virtual void next(base::io &next_io) = 0; + }; + + io_generator::ptr + create_io_generator(io_generator_options const &opts); +} + +//---------------------------------------------------------------- + +#endif diff --git a/persistent-data/math_utils.h b/base/math_utils.h similarity index 90% rename from persistent-data/math_utils.h rename to base/math_utils.h index cb387f4..de00d88 100644 --- a/persistent-data/math_utils.h +++ b/base/math_utils.h @@ -16,8 +16,8 @@ // with thin-provisioning-tools. If not, see // . -#ifndef THINP_MATH_H -#define THINP_MATH_H +#ifndef BASE_MATH_H +#define BASE_MATH_H //---------------------------------------------------------------- @@ -34,6 +34,11 @@ namespace base { T div_down(T const &v, T const &divisor) { return v / divisor; } + + template + bool is_power_of_two(T const v) { + return !(v & (v - 1)); + } } //---------------------------------------------------------------- diff --git a/base/types.h b/base/types.h new file mode 100644 index 0000000..7e56139 --- /dev/null +++ b/base/types.h @@ -0,0 +1,15 @@ +#ifndef BASE_TYPES_H +#define BASE_TYPES_H + +#include + +//---------------------------------------------------------------- + +namespace base { + using sector_t = uint64_t; + unsigned const SECTOR_SHIFT = 9; +} + +//---------------------------------------------------------------- + +#endif diff --git a/block-cache/block_cache.h b/block-cache/block_cache.h index 5312468..28dec8b 100644 --- a/block-cache/block_cache.h +++ b/block-cache/block_cache.h @@ -24,7 +24,6 @@ namespace bi = boost::intrusive; namespace bcache { typedef uint64_t block_address; - typedef uint64_t sector_t; class validator { public: diff --git a/block-cache/io_engine.h b/block-cache/io_engine.h index 0e13957..04e05e1 100644 --- a/block-cache/io_engine.h +++ b/block-cache/io_engine.h @@ -1,6 +1,7 @@ #ifndef BLOCK_CACHE_IO_ENGINE_H #define BLOCK_CACHE_IO_ENGINE_H +#include "base/types.h" #include "base/unique_handle.h" #include @@ -18,9 +19,8 @@ //---------------------------------------------------------------- namespace bcache { - using sector_t = uint64_t; - - unsigned const SECTOR_SHIFT = 9; + using base::sector_t; + using base::SECTOR_SHIFT; // Virtual base class to aid unit testing class io_engine { diff --git a/caching/cache_metadata.h b/caching/cache_metadata.h index a89afd5..761f196 100644 --- a/caching/cache_metadata.h +++ b/caching/cache_metadata.h @@ -34,8 +34,6 @@ namespace cache { block_address const SUPERBLOCK_LOCATION = 0; - typedef uint64_t sector_t; - //------------------------------------------------ class space_map_ref_counter { diff --git a/functional-tests/scenario-string-constants.scm b/functional-tests/scenario-string-constants.scm index 88c9ab9..90b0507 100644 --- a/functional-tests/scenario-string-constants.scm +++ b/functional-tests/scenario-string-constants.scm @@ -29,6 +29,7 @@ Options: {-q|--quiet} {-h|--help} {-V|--version} + {-m|--metadata-snap} {--override-mapping-root} {--clear-needs-check-flag} {--ignore-non-fatal-errors} @@ -75,7 +76,7 @@ Options: {-V|--version}") (define thin-metadata-pack-help - "thin_metadata_pack 0.8.5 + "thin_metadata_pack 0.9.0-rc1 Produces a compressed file of thin metadata. Only packs metadata blocks that are actually used. USAGE: @@ -90,7 +91,7 @@ OPTIONS: -o Specify packed output file") (define thin-metadata-unpack-help - "thin_metadata_unpack 0.8.5 + "thin_metadata_unpack 0.9.0-rc1 Unpack a compressed file of thin metadata. USAGE: diff --git a/functional-tests/thin-functional-tests.scm b/functional-tests/thin-functional-tests.scm index b6e5a77..dbabb10 100644 --- a/functional-tests/thin-functional-tests.scm +++ b/functional-tests/thin-functional-tests.scm @@ -511,7 +511,7 @@ (define-scenario (thin-metadata-pack version) "accepts --version" (run-ok-rcv (stdout _) (thin-metadata-pack "--version") - (assert-equal "thin_metadata_pack 0.8.5" stdout))) + (assert-equal "thin_metadata_pack 0.9.0-rc1" stdout))) (define-scenario (thin-metadata-pack h) "accepts -h" @@ -553,7 +553,7 @@ (define-scenario (thin-metadata-unpack version) "accepts --version" (run-ok-rcv (stdout _) (thin-metadata-unpack "--version") - (assert-equal "thin_metadata_unpack 0.8.5" stdout))) + (assert-equal "thin_metadata_unpack 0.9.0-rc1" stdout))) (define-scenario (thin-metadata-unpack h) "accepts -h" diff --git a/man8/thin_check.txt b/man8/thin_check.txt index 1aae63e..55452e6 100644 --- a/man8/thin_check.txt +++ b/man8/thin_check.txt @@ -8,7 +8,8 @@ DESCRIPTION thin_check checks thin provisioning metadata created by the device-mapper thin provisioning target on a device or file. - The tool cannot be run on live metadata. + The tool cannot be run on live metadata unless the --metadata-snapshot + option is used. OPTIONS -q, --quiet Suppress output messages, return only exit code. @@ -33,6 +34,12 @@ OPTIONS is needed to fix any issues. After thin_repair succeeded, you may run thin_check again. + --metadata-snapshot, -m Check the metadata snapshot. + + This will check the devices tree and mappings in a metadata snapshot. + The snap does not contain space maps, so these will not be checked. This + may be used on live metadata. + --override-mapping-root Specify a mapping root to use. Don't use this. This overrides what's specified in the superblock. Only diff --git a/persistent-data/data-structures/array.h b/persistent-data/data-structures/array.h index f7a3ac4..d5063e7 100644 --- a/persistent-data/data-structures/array.h +++ b/persistent-data/data-structures/array.h @@ -19,7 +19,7 @@ #ifndef ARRAY_H #define ARRAY_H -#include "persistent-data/math_utils.h" +#include "base/math_utils.h" #include "persistent-data/data-structures/btree.h" #include "persistent-data/data-structures/btree_counter.h" #include "persistent-data/data-structures/btree_damage_visitor.h" diff --git a/persistent-data/data-structures/bitset.cc b/persistent-data/data-structures/bitset.cc index a4f0b67..02b6b9e 100644 --- a/persistent-data/data-structures/bitset.cc +++ b/persistent-data/data-structures/bitset.cc @@ -1,6 +1,6 @@ #include "persistent-data/data-structures/array.h" #include "persistent-data/data-structures/bitset.h" -#include "persistent-data/math_utils.h" +#include "base/math_utils.h" using namespace persistent_data; using namespace persistent_data::bitset_detail; diff --git a/persistent-data/data-structures/btree-remove.tcc b/persistent-data/data-structures/btree-remove.tcc new file mode 100644 index 0000000..d222273 --- /dev/null +++ b/persistent-data/data-structures/btree-remove.tcc @@ -0,0 +1,373 @@ +// This file is part of the thin-provisioning-tools source. +// +// thin-provisioning-tools is free software: you can redistribute it +// and/or modify it under the terms of the GNU General Public License +// as published by the Free Software Foundation, either version 3 of +// the License, or (at your option) any later version. +// +// thin-provisioning-tools is distributed in the hope that it will be +// useful, but WITHOUT ANY WARRANTY; without even the implied warranty +// of MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +// GNU General Public License for more details. +// +// You should have received a copy of the GNU General Public License along +// with thin-provisioning-tools. If not, see +// . + +namespace persistent_data { + template + btree_detail::shadow_child + btree:: + create_shadow_child(internal_node &parent, + unsigned index) + { + block_address b = parent.value_at(index); + + pair p = tm_.shadow(b, validator_); + write_ref &wr = p.first; + btree_detail::node_type type; + + node_ref n = to_node(wr); + if (n.get_type() == btree_detail::INTERNAL) { + type = btree_detail::INTERNAL; + if (p.second) + n.inc_children(internal_rc_); + } else { + type = btree_detail::LEAF; + if (p.second) { + node_ref leaf = to_node(wr); + leaf.inc_children(rc_); + } + } + + parent.set_value(index, wr.get_location()); + + return btree_detail::shadow_child(wr, type); + } + + template + void + btree:: + remove(key const &key) + { + using namespace btree_detail; + + block_address block = root_; + unsigned index = 0; + shadow_spine spine(tm_, validator_); + bool need_remove = true; + + for (unsigned level = 0; level < Levels - 1; ++level) { + need_remove = remove_location(spine, block, + key[level], &index, + internal_rc_); + if (!need_remove) + break; + + internal_node n = spine.get_node(); + block = n.value_at(index); + } + + if (need_remove) { + need_remove = remove_location(spine, block, + key[Levels - 1], &index, + rc_); + if (need_remove) { + leaf_node leaf = spine.get_node(); + leaf.delete_at(index); + } + } + + root_ = spine.get_root(); + } + + template + template + bool + btree:: + remove_location(btree_detail::shadow_spine &spine, + block_address block, + uint64_t key, + unsigned *index, + RC &leaf_rc) + { + using namespace btree_detail; + + unsigned i = 0; + bool r = false; + + for (;;) { + r = spine.step(block); + + // patch up the parent to point to the new shadow + if (spine.has_parent()) { + internal_node p = spine.get_parent(); + p.set_value(i, spine.get_block()); + } + + internal_node n = spine.get_node(); + if (n.get_type() == btree_detail::LEAF) { + node_ref leaf = spine.get_node(); + boost::optional idx = leaf.exact_search(key); + if (!idx) + return false; + *index = *idx; + return true; + } + + r = rebalance_children(spine, key); + if (!r) + break; + + n = spine.get_node(); + if (n.get_type() == btree_detail::LEAF) { + node_ref leaf = spine.get_node(); + boost::optional idx = leaf.exact_search(key); + if (!idx) + return false; + *index = *idx; + return true; + } + + i = n.lower_bound(key); + block = n.value_at(i); + } + + return r; + } + + template + template + bool + btree:: + rebalance_children(btree_detail::shadow_spine &spine, uint64_t key) + { + internal_node n = spine.get_node(); + + if (n.get_nr_entries() == 1) { + block_address b = n.value_at(0); + read_ref child = tm_.read_lock(b, validator_); + + // FIXME: is it safe? + ::memcpy(n.raw(), child.data(), read_ref::BLOCK_SIZE); + + tm_.get_sm()->dec(child.get_location()); + return true; + } + + int i = n.lower_bound(key); + if (i < 0) + return false; + + bool has_left_sibling = i > 0; + bool has_right_sibling = static_cast(i) < (n.get_nr_entries() - 1); + + if (!has_left_sibling) + rebalance2(spine, i); + else if (!has_right_sibling) + rebalance2(spine, i - 1); + else + rebalance3(spine, i - 1); + + return true; + } + + template + template + void + btree:: + rebalance2(btree_detail::shadow_spine &spine, unsigned left_index) + { + internal_node parent = spine.get_node(); + shadow_child left = create_shadow_child(parent, left_index); + shadow_child right = create_shadow_child(parent, left_index + 1); + + // FIXME: ugly + if (left.get_type() == btree_detail::INTERNAL) { + internal_node l = left.get_node(); + internal_node r = right.get_node(); + __rebalance2(parent, l, r, left_index); + } else { + node_ref l = left.get_node(); + node_ref r = right.get_node(); + __rebalance2(parent, l, r, left_index); + } + } + + template + template + void + btree:: + __rebalance2(internal_node &parent, + node_ref &left, + node_ref &right, + unsigned left_index) + { + unsigned nr_left = left.get_nr_entries(); + unsigned nr_right = right.get_nr_entries(); + unsigned right_index = left_index + 1; + + unsigned threshold = 2 * (left.merge_threshold() + 1); + if (nr_left + nr_right < threshold) { + // Merge the right child into the left + left.copy_entries_to_left(right, nr_right); + left.set_nr_entries(nr_left + nr_right); + parent.delete_at(right_index); + tm_.get_sm()->dec(right.get_location()); + } else { + // Rebalance + unsigned target_left = (nr_left + nr_right) / 2; + left.move_entries(right, nr_left - target_left); + parent.set_key(right_index, right.key_at(0)); + } + } + + template + template + void + btree:: + rebalance3(btree_detail::shadow_spine &spine, unsigned left_index) + { + internal_node parent = spine.get_node(); + shadow_child left = create_shadow_child(parent, left_index); + shadow_child center = create_shadow_child(parent, left_index + 1); + shadow_child right = create_shadow_child(parent, left_index + 2); + + // FIXME: ugly + if (left.get_type() == btree_detail::INTERNAL) { + internal_node l = left.get_node(); + internal_node c = center.get_node(); + internal_node r = right.get_node(); + __rebalance3(parent, l, c, r, left_index); + } else { + node_ref l = left.get_node(); + node_ref c = center.get_node(); + node_ref r = right.get_node(); + __rebalance3(parent, l, c, r, left_index); + } + } + + template + template + void + btree:: + __rebalance3(internal_node &parent, + node_ref &left, + node_ref ¢er, + node_ref &right, + unsigned left_index) + { + unsigned nr_left = left.get_nr_entries(); + unsigned nr_center = center.get_nr_entries(); + unsigned nr_right = right.get_nr_entries(); + + unsigned threshold = left.merge_threshold() * 4 + 1; + + if ((nr_left + nr_center + nr_right) < threshold) + delete_center_node(parent, left, center, right, left_index); + else + redistribute3(parent, left, center, right, left_index); + } + + template + template + void + btree:: + delete_center_node(internal_node &parent, + node_ref &left, + node_ref ¢er, + node_ref &right, + unsigned left_index) + { + unsigned center_index = left_index + 1; + unsigned right_index = left_index + 2; + + unsigned max_entries = left.get_max_entries(); + unsigned nr_left = left.get_nr_entries(); + unsigned nr_center = center.get_nr_entries(); + unsigned nr_right = right.get_nr_entries(); + unsigned shift = std::min(max_entries - nr_left, nr_center); + + if (nr_left + shift > max_entries) + throw std::runtime_error("too many entries"); + + left.copy_entries_to_left(center, shift); + left.set_nr_entries(nr_left + shift); + + if (shift != nr_center) { + shift = nr_center - shift; + if ((nr_right + shift) > max_entries) + throw std::runtime_error("too many entries"); + right.shift_entries_right(shift); + center.copy_entries_to_right(right, shift); + right.set_nr_entries(nr_right + shift); + } + parent.set_key(right_index, right.key_at(0)); + + parent.delete_at(center_index); + --right_index; + + tm_.get_sm()->dec(center.get_location()); + __rebalance2(parent, left, right, left_index); + } + + template + template + void + btree:: + redistribute3(internal_node &parent, + node_ref &left, + node_ref ¢er, + node_ref &right, + unsigned left_index) + { + unsigned center_index = left_index + 1; + unsigned right_index = left_index + 2; + + unsigned nr_left = left.get_nr_entries(); + unsigned nr_center = center.get_nr_entries(); + unsigned nr_right = right.get_nr_entries(); + + unsigned max_entries = left.get_max_entries(); + unsigned total = nr_left + nr_center + nr_right; + unsigned target_right = total / 3; + unsigned remainder = (target_right * 3) != total; + unsigned target_left = target_right + remainder; + + if (target_left > max_entries || target_right > max_entries) + throw std::runtime_error("too many entries"); + + if (nr_left < nr_right) { + int s = nr_left - target_left; + + // FIXME: signed & unsigned comparison + if (s < 0 && nr_center < static_cast(-s)) { + // not enough in central node + left.move_entries(center, -nr_center); + s += nr_center; + left.move_entries(right, s); + nr_right += s; + } else + left.move_entries(center, s); + + center.move_entries(right, target_right - nr_right); + + } else { + int s = target_right - nr_right; + + if (s > 0 && nr_center < static_cast(s)) { + // not enough in central node + center.move_entries(right, nr_center); + s -= nr_center; + left.move_entries(right, s); + nr_left -= s; + } else + center.move_entries(right, s); + + left.move_entries(center, nr_left - target_left); + } + + parent.set_key(center_index, center.key_at(0)); + parent.set_key(right_index, right.key_at(0)); + } +}; diff --git a/persistent-data/data-structures/btree.h b/persistent-data/data-structures/btree.h index 3b84df7..d3423c7 100644 --- a/persistent-data/data-structures/btree.h +++ b/persistent-data/data-structures/btree.h @@ -110,12 +110,34 @@ namespace persistent_data { uint64_t key, typename ValueTraits::value_type const &v); + // Decrements the nr_entries field + void delete_at(unsigned i); + // Copies entries from another node, appends them // to the back of this node. Adjusts nr_entries. void copy_entries(node_ref const &rhs, unsigned begin, unsigned end); + // Moves entries between the sibling node, + // and maintains the key ordering. + // The nr_entreis of both nodes are adjusted. + void move_entries(node_ref &rhs, + int count); + + // Copies entries from the beginning of rhs to the end of lhs, + // or copies entries from the end of lhs to the beginning of rhs. + // The nr_entries is not adjusted. + void copy_entries_to_left(node_ref const &rhs, unsigned count); + void copy_entries_to_right(node_ref &rhs, unsigned count) const; + + // Shifts entries to left or right. + // The nr_entries is not adjusted. + void shift_entries_left(unsigned shift); + void shift_entries_right(unsigned shift); + + unsigned merge_threshold() const; + // Various searches int bsearch(uint64_t key, int want_hi) const; boost::optional exact_search(uint64_t key) const; @@ -124,6 +146,9 @@ namespace persistent_data { template void inc_children(RefCounter &rc); + template + void dec_children(RefCounter &rc); + disk_node *raw() { return raw_; } @@ -256,6 +281,26 @@ namespace persistent_data { maybe_block root_; }; + class shadow_child { + public: + shadow_child(block_manager::write_ref &wr, node_type type) + : wr_(wr), type_(type) { + } + + node_type get_type() const { + return type_; + } + + template + node_ref get_node() { + return to_node(wr_); + } + + private: + block_manager::write_ref wr_; + node_type type_; + }; + // Used to keep a record of a nested btree's position. typedef std::vector btree_path; @@ -396,6 +441,14 @@ namespace persistent_data { int *index, RC &leaf_rc); + template + bool + remove_location(btree_detail::shadow_spine &spine, + block_address block, + uint64_t key, + unsigned *index, + RC &leaf_rc); + void walk_tree(visitor &visitor, btree_detail::node_location const &loc, block_address b) const; @@ -408,6 +461,53 @@ namespace persistent_data { void inc_children(btree_detail::shadow_spine &spine, RefCounter &leaf_rc); + btree_detail::shadow_child + create_shadow_child(internal_node &parent, + unsigned index); + + template + bool rebalance_children(btree_detail::shadow_spine &spine, + uint64_t key); + + template + void rebalance2(btree_detail::shadow_spine &spine, + unsigned left_index); + + template + void rebalance3(btree_detail::shadow_spine &spine, + unsigned left_index); + + template + void + __rebalance2(internal_node &parent, + btree_detail::node_ref &left, + btree_detail::node_ref &right, + unsigned left_index); + + template + void + __rebalance3(internal_node &parent, + btree_detail::node_ref &left, + btree_detail::node_ref ¢er, + btree_detail::node_ref &right, + unsigned left_index); + + template + void + delete_center_node(internal_node &parent, + btree_detail::node_ref &left, + btree_detail::node_ref ¢er, + btree_detail::node_ref &right, + unsigned left_index); + + template + void + redistribute3(internal_node &parent, + btree_detail::node_ref &left, + btree_detail::node_ref ¢er, + btree_detail::node_ref &right, + unsigned left_index); + transaction_manager &tm_; bool destroy_; block_address root_; @@ -418,6 +518,7 @@ namespace persistent_data { }; #include "btree.tcc" +#include "btree-remove.tcc" //---------------------------------------------------------------- diff --git a/persistent-data/data-structures/btree.tcc b/persistent-data/data-structures/btree.tcc index b24ec29..059ebaa 100644 --- a/persistent-data/data-structures/btree.tcc +++ b/persistent-data/data-structures/btree.tcc @@ -25,6 +25,7 @@ #include #include +#include //---------------------------------------------------------------- @@ -33,6 +34,56 @@ namespace { using namespace persistent_data; using namespace btree_detail; using namespace std; + + struct frame { + frame(block_address blocknr, + uint32_t level, + uint32_t nr_entries) + : blocknr_(blocknr), + level_(level), + nr_entries_(nr_entries), + current_child_(0) { + } + block_address blocknr_; + uint32_t level_; + uint32_t nr_entries_; + uint32_t current_child_; + }; + + // stack for postorder DFS traversal + // TODO: Refactor it into a spine-like class, e.g., btree_del_spine, + // "Spine" sounds better for btree operations. + struct btree_del_stack { + public: + btree_del_stack(transaction_manager &tm): tm_(tm) { + } + + void push_frame(block_address blocknr, + uint32_t level, + uint32_t nr_entries) { + if (tm_.get_sm()->get_count(blocknr) > 1) + tm_.get_sm()->dec(blocknr); + else + spine_.push(frame(blocknr, level, nr_entries)); + } + + void pop_frame() { + tm_.get_sm()->dec(spine_.top().blocknr_); + spine_.pop(); + } + + frame &top_frame() { + return spine_.top(); + } + + bool is_empty() { + return spine_.empty(); + } + + private: + transaction_manager &tm_; + std::stack spine_; + }; } //---------------------------------------------------------------- @@ -242,6 +293,23 @@ namespace persistent_data { set_value(i, v); } + template + void + node_ref::delete_at(unsigned i) + { + unsigned nr_entries = get_nr_entries(); + if (i >= nr_entries) + throw runtime_error("key index out of bounds"); + unsigned nr_to_copy = nr_entries - (i + 1); + + if (nr_to_copy) { + ::memmove(key_ptr(i), key_ptr(i + 1), sizeof(uint64_t) * nr_to_copy); + ::memmove(value_ptr(i), value_ptr(i + 1), sizeof(typename ValueTraits::disk_type) * nr_to_copy); + } + + set_nr_entries(nr_entries - 1); + } + template void node_ref::copy_entries(node_ref const &rhs, @@ -258,6 +326,90 @@ namespace persistent_data { set_nr_entries(n + count); } + template + void + node_ref::move_entries(node_ref &rhs, + int count) + { + if (!count) + return; + + unsigned nr_left = get_nr_entries(); + unsigned nr_right = rhs.get_nr_entries(); + unsigned max_entries = get_max_entries(); + + if (nr_left - count > max_entries || nr_right - count > max_entries) + throw runtime_error("too many entries"); + + if (count > 0) { + rhs.shift_entries_right(count); + copy_entries_to_right(rhs, count); + } else { + copy_entries_to_left(rhs, -count); + rhs.shift_entries_left(-count); + } + + set_nr_entries(nr_left - count); + rhs.set_nr_entries(nr_right + count); + } + + template + void + node_ref::copy_entries_to_left(node_ref const &rhs, unsigned count) + { + unsigned n = get_nr_entries(); + if ((n + count) > get_max_entries()) + throw runtime_error("too many entries"); + + ::memcpy(key_ptr(n), rhs.key_ptr(0), sizeof(uint64_t) * count); + ::memcpy(value_ptr(n), rhs.value_ptr(0), sizeof(typename ValueTraits::disk_type) * count); + } + + template + void + node_ref::copy_entries_to_right(node_ref &rhs, unsigned count) const + { + unsigned n = rhs.get_nr_entries(); + if ((n + count) > get_max_entries()) + throw runtime_error("too many entries"); + + unsigned nr_left = get_nr_entries(); + ::memcpy(rhs.key_ptr(0), key_ptr(nr_left - count), sizeof(uint64_t) * count); + ::memcpy(rhs.value_ptr(0), value_ptr(nr_left - count), sizeof(typename ValueTraits::disk_type) * count); + } + + template + void + node_ref::shift_entries_left(unsigned shift) + { + unsigned n = get_nr_entries(); + if (shift > n) + throw runtime_error("too many entries"); + + unsigned nr_shifted = n - shift; + ::memmove(key_ptr(0), key_ptr(shift), sizeof(uint64_t) * nr_shifted); + ::memmove(value_ptr(0), value_ptr(shift), sizeof(typename ValueTraits::disk_type) * nr_shifted); + } + + template + void + node_ref::shift_entries_right(unsigned shift) + { + unsigned n = get_nr_entries(); + if (n + shift > get_max_entries()) + throw runtime_error("too many entries"); + + ::memmove(key_ptr(shift), key_ptr(0), sizeof(uint64_t) * n); + ::memmove(value_ptr(shift), value_ptr(0), sizeof(typename ValueTraits::disk_type) * n); + } + + template + unsigned + node_ref::merge_threshold() const + { + return get_max_entries() / 3; + } + template int node_ref::bsearch(uint64_t key, int want_hi) const @@ -348,6 +500,21 @@ namespace persistent_data { } } + template + template + void + node_ref::dec_children(RefCounter &rc) + { + unsigned nr_entries = get_nr_entries(); + for (unsigned i = 0; i < nr_entries; i++) { + typename ValueTraits::value_type v; + typename ValueTraits::disk_type d; + ::memcpy(&d, value_ptr(i), sizeof(d)); + ValueTraits::unpack(d, v); + rc.dec(v); + } + } + template bool node_ref::value_sizes_match() const { @@ -535,13 +702,6 @@ namespace persistent_data { return need_insert; } - template - void - btree::remove(key const &key) - { - using namespace btree_detail; - } - template block_address btree::get_root() const @@ -565,15 +725,57 @@ namespace persistent_data { return ptr(new btree(tm_, root_, rc_)); } -#if 0 template void btree::destroy() { using namespace btree_detail; + btree_del_stack s(tm_); + + { + read_ref blk = tm_.read_lock(root_, validator_); + internal_node n = to_node(blk); + s.push_frame(root_, 0, n.get_nr_entries()); + } + + while (!s.is_empty()) { + frame &f = s.top_frame(); + + if (f.current_child_ >= f.nr_entries_) { + s.pop_frame(); + continue; + } + + // FIXME: Cache the read_ref object in the stack to avoid temporary objects? + read_ref current = tm_.read_lock(f.blocknr_, validator_); + internal_node n = to_node(current); + + if (n.get_type() == INTERNAL) { + // TODO: test performance penalty of prefetching + //if (!f.current_child_) + // for (unsigned i = 0; i < n.get_nr_entries(); i++) + // tm_.prefetch(n.value_at(i)); + + block_address b = n.value_at(f.current_child_); + read_ref leaf = tm_.read_lock(b, validator_); + internal_node o = to_node(leaf); + s.push_frame(b, f.level_, o.get_nr_entries()); + ++f.current_child_; + // internal leaf + } else if (f.level_ < Levels - 1) { + block_address b = n.value_at(f.current_child_); + read_ref leaf = tm_.read_lock(b, validator_); + internal_node o = to_node(leaf); + s.push_frame(b, f.level_ + 1, o.get_nr_entries()); + ++f.current_child_; + } else { + leaf_node o = to_node(current); + o.dec_children(rc_); // FIXME: move this into pop_frame() + s.pop_frame(); + } + } } -#endif template template diff --git a/persistent-data/file_utils.cc b/persistent-data/file_utils.cc index 98b31c4..329af86 100644 --- a/persistent-data/file_utils.cc +++ b/persistent-data/file_utils.cc @@ -1,4 +1,4 @@ -#include "persistent-data/math_utils.h" +#include "base/math_utils.h" #include "persistent-data/file_utils.h" #include "persistent-data/space-maps/core.h" diff --git a/persistent-data/space-maps/core.cc b/persistent-data/space-maps/core.cc index 463d212..4251ef6 100644 --- a/persistent-data/space-maps/core.cc +++ b/persistent-data/space-maps/core.cc @@ -17,7 +17,7 @@ // . #include "persistent-data/space-maps/core.h" -#include "persistent-data/math_utils.h" +#include "base/math_utils.h" #include diff --git a/persistent-data/space-maps/disk.cc b/persistent-data/space-maps/disk.cc index 28e13d3..87c8fe5 100644 --- a/persistent-data/space-maps/disk.cc +++ b/persistent-data/space-maps/disk.cc @@ -26,7 +26,7 @@ #include "persistent-data/data-structures/btree_damage_visitor.h" #include "persistent-data/data-structures/btree_counter.h" #include "persistent-data/checksum.h" -#include "persistent-data/math_utils.h" +#include "base/math_utils.h" #include "persistent-data/transaction_manager.h" using namespace persistent_data; diff --git a/src/file_utils.rs b/src/file_utils.rs index 37d5c41..3f5dce8 100644 --- a/src/file_utils.rs +++ b/src/file_utils.rs @@ -23,7 +23,6 @@ pub fn file_exists(path: &str) -> bool { _ => { // FIXME: assuming all errors indicate the file doesn't // exist. - eprintln!("couldn't stat '{}'", path); false } } diff --git a/src/pack/node_encode.rs b/src/pack/node_encode.rs index c5e8370..a8a54a6 100644 --- a/src/pack/node_encode.rs +++ b/src/pack/node_encode.rs @@ -1,3 +1,4 @@ +use thiserror::Error; use std::{io, io::Write}; use nom::{bytes::complete::*, number::complete::*, IResult}; @@ -6,41 +7,27 @@ use crate::pack::vm::*; //------------------------------------------- -#[derive(Debug)] +#[derive(Error, Debug)] pub enum PackError { + #[error("Couldn't parse binary data")] ParseError, - IOError, -} -impl std::error::Error for PackError {} + #[error("Write error")] + WriteError { source: std::io::Error }, +} pub type PResult = Result; fn nom_to_pr(r: IResult<&[u8], T>) -> PResult<(&[u8], T)> { - match r { - Ok(v) => Ok(v), - Err(_) => Err(PackError::ParseError), - } + r.map_err(|_source| PackError::ParseError) } fn io_to_pr(r: io::Result) -> PResult { - match r { - Ok(v) => Ok(v), - Err(_) => Err(PackError::IOError), - } + r.map_err(|source| PackError::WriteError {source}) } //------------------------------------------- -impl std::fmt::Display for PackError { - fn fmt(&self, f: &mut std::fmt::Formatter) -> std::fmt::Result { - match self { - PackError::ParseError => write!(f, "parse error"), - PackError::IOError => write!(f, "IO error"), - } - } -} - fn run64(i: &[u8], count: usize) -> IResult<&[u8], Vec> { let (i, ns) = nom::multi::many_m_n(count, count, le_u64)(i)?; Ok((i, ns)) diff --git a/src/pack/toplevel.rs b/src/pack/toplevel.rs index 305a351..03429b4 100644 --- a/src/pack/toplevel.rs +++ b/src/pack/toplevel.rs @@ -1,3 +1,4 @@ +use anyhow::{anyhow, Context, Result}; use byteorder::{LittleEndian, ReadBytesExt, WriteBytesExt}; use flate2::{read::ZlibDecoder, write::ZlibEncoder, Compression}; @@ -63,8 +64,8 @@ fn mk_chunk_vecs(nr_blocks: u64, nr_jobs: u64) -> Vec> { vs } -pub fn pack(input_file: &str, output_file: &str) -> Result<(), Box> { - let nr_blocks = get_nr_blocks(&input_file)?; +pub fn pack(input_file: &str, output_file: &str) -> Result<()> { + let nr_blocks = get_nr_blocks(&input_file).context("getting nr blocks")?; let nr_jobs = std::cmp::max(1, std::cmp::min(num_cpus::get() as u64, nr_blocks / 128)); let chunk_vecs = mk_chunk_vecs(nr_blocks, nr_jobs); @@ -81,7 +82,7 @@ pub fn pack(input_file: &str, output_file: &str) -> Result<(), Box> { .truncate(true) .open(output_file)?; - write_header(&output, nr_blocks)?; + write_header(&output, nr_blocks).context("unable to write pack file header")?; let sync_input = Arc::new(Mutex::new(input)); let sync_output = Arc::new(Mutex::new(output)); @@ -104,7 +105,7 @@ fn crunch( input: Arc>, output: Arc>, ranges: Vec<(u64, u64)>, -) -> io::Result<()> +) -> Result<()> where R: Read + Seek, W: Write, @@ -124,7 +125,7 @@ where let kind = metadata_block_type(data); if kind != BT::UNKNOWN { z.write_u64::(b)?; - pack_block(&mut z, kind, &data); + pack_block(&mut z, kind, &data)?; written += 1; if written == 1024 { @@ -205,22 +206,18 @@ where Ok(buf) } -fn check(r: &PResult) { - match r { - Ok(_) => {} - Err(PackError::ParseError) => panic!("parse error"), - Err(PackError::IOError) => panic!("io error"), - } -} - -fn pack_block(w: &mut W, kind: BT, buf: &[u8]) { +fn pack_block(w: &mut W, kind: BT, buf: &[u8]) -> Result<()> { match kind { - BT::SUPERBLOCK => check(&pack_superblock(w, buf)), - BT::NODE => check(&pack_btree_node(w, buf)), - BT::INDEX => check(&pack_index(w, buf)), - BT::BITMAP => check(&pack_bitmap(w, buf)), - BT::UNKNOWN => {panic!("asked to pack an unknown block type")} + BT::SUPERBLOCK => pack_superblock(w, buf).context("unable to pack superblock")?, + BT::NODE => pack_btree_node(w, buf).context("unable to pack btree node")?, + BT::INDEX => pack_index(w, buf).context("unable to pack space map index")?, + BT::BITMAP => pack_bitmap(w, buf).context("unable to pack space map bitmap")?, + BT::UNKNOWN => { + return Err(anyhow!("asked to pack an unknown block type")) + } } + + Ok(()) } fn write_zero_block(w: &mut W, b: u64) -> io::Result<()> diff --git a/src/thin/superblock.rs b/src/thin/superblock.rs index 722ea7f..6b6d701 100644 --- a/src/thin/superblock.rs +++ b/src/thin/superblock.rs @@ -1,3 +1,4 @@ +use anyhow::Result; use crate::block_manager::*; use crate::checksum::*; @@ -47,7 +48,7 @@ use SuperblockDamage::*; //------------------------------ -pub fn check_type(b: &Block) -> Result<(), Box> { +pub fn check_type(b: &Block) -> Result<()> { match metadata_block_type(&b.data[0..]) { SUPERBLOCK => Ok(()), NODE => Err(Box::new(BadBlockType("BTree Node"))), diff --git a/thin-provisioning/commands.cc b/thin-provisioning/commands.cc index 13dc76c..23f4b0e 100644 --- a/thin-provisioning/commands.cc +++ b/thin-provisioning/commands.cc @@ -23,6 +23,7 @@ thin_provisioning::register_thin_commands(base::application &app) app.add_cmd(command::ptr(new thin_ll_restore_cmd())); app.add_cmd(command::ptr(new thin_scan_cmd())); app.add_cmd(command::ptr(new thin_generate_metadata_cmd())); + app.add_cmd(command::ptr(new thin_generate_mappings_cmd())); app.add_cmd(command::ptr(new thin_show_duplicates_cmd())); app.add_cmd(command::ptr(new thin_show_metadata_cmd())); app.add_cmd(command::ptr(new thin_journal_cmd())); diff --git a/thin-provisioning/commands.h b/thin-provisioning/commands.h index 6b80bdb..e3a41e7 100644 --- a/thin-provisioning/commands.h +++ b/thin-provisioning/commands.h @@ -110,6 +110,13 @@ namespace thin_provisioning { virtual int run(int argc, char **argv); }; + class thin_generate_mappings_cmd : public base::command { + public: + thin_generate_mappings_cmd(); + virtual void usage(std::ostream &out) const; + virtual int run(int argc, char **argv); + }; + class thin_show_metadata_cmd : public base::command { public: thin_show_metadata_cmd(); diff --git a/thin-provisioning/device_tree.cc b/thin-provisioning/device_tree.cc index 4837cb7..4b4e0dd 100644 --- a/thin-provisioning/device_tree.cc +++ b/thin-provisioning/device_tree.cc @@ -54,6 +54,13 @@ namespace thin_provisioning { snapshotted_time_(0) { } + device_details::device_details(uint64_t tid, uint32_t time) + : mapped_blocks_(0), + transaction_id_(tid), + creation_time_(time), + snapshotted_time_(time) { + } + void device_details_traits::unpack(device_details_disk const &disk, device_details &value) { diff --git a/thin-provisioning/device_tree.h b/thin-provisioning/device_tree.h index d284ac5..d7178cd 100644 --- a/thin-provisioning/device_tree.h +++ b/thin-provisioning/device_tree.h @@ -17,6 +17,7 @@ namespace thin_provisioning { struct device_details { device_details(); + device_details(uint64_t tid, uint32_t time); uint64_t mapped_blocks_; uint64_t transaction_id_; /* when created */ diff --git a/thin-provisioning/metadata.cc b/thin-provisioning/metadata.cc index 9f81a51..3c5446a 100644 --- a/thin-provisioning/metadata.cc +++ b/thin-provisioning/metadata.cc @@ -20,7 +20,7 @@ #include "thin-provisioning/metadata.h" #include "persistent-data/file_utils.h" -#include "persistent-data/math_utils.h" +#include "base/math_utils.h" #include "persistent-data/space-maps/core.h" #include "persistent-data/space-maps/disk.h" diff --git a/thin-provisioning/metadata.h b/thin-provisioning/metadata.h index e8bb5fc..d7d1afe 100644 --- a/thin-provisioning/metadata.h +++ b/thin-provisioning/metadata.h @@ -37,7 +37,6 @@ namespace thin_provisioning { using namespace base; using namespace persistent_data; - typedef uint64_t sector_t; typedef uint32_t thin_dev_t; //------------------------------------------------ diff --git a/thin-provisioning/metadata_checker.cc b/thin-provisioning/metadata_checker.cc index 02c12c6..ed3d20d 100644 --- a/thin-provisioning/metadata_checker.cc +++ b/thin-provisioning/metadata_checker.cc @@ -18,11 +18,13 @@ #include "base/nested_output.h" #include "persistent-data/file_utils.h" +#include "persistent-data/space-maps/core.h" #include "thin-provisioning/metadata.h" #include "thin-provisioning/metadata_checker.h" #include "thin-provisioning/metadata_counter.h" #include "thin-provisioning/superblock.h" +using namespace boost; using namespace persistent_data; using namespace thin_provisioning; @@ -84,6 +86,20 @@ namespace { //-------------------------------- + class data_ref_counter : public mapping_tree_detail::mapping_visitor { + public: + data_ref_counter(space_map::ptr sm) + : sm_(sm) { + } + + virtual void visit(btree_path const &path, mapping_tree_detail::block_time const &bt) { + sm_->inc(bt.block_); + } + + private: + space_map::ptr sm_; + }; + class mapping_reporter : public mapping_tree_detail::damage_visitor { public: mapping_reporter(nested_output &out) @@ -121,12 +137,13 @@ namespace { //-------------------------------- error_state examine_superblock(block_manager::ptr bm, + block_address sb_location, nested_output &out) { out << "examining superblock" << end_message(); nested_output::nest _ = out.push(); superblock_reporter sb_rep(out); - check_superblock(bm, sb_rep); + check_superblock(bm, sb_rep, sb_location); return sb_rep.get_error(); } @@ -161,14 +178,20 @@ namespace { error_state examine_mapping_tree_(transaction_manager::ptr tm, superblock_detail::superblock const &sb, - nested_output &out) { + nested_output &out, + optional data_sm) { out << "examining mapping tree" << end_message(); nested_output::nest _ = out.push(); mapping_reporter mapping_rep(out); mapping_tree mtree(*tm, sb.data_mapping_root_, mapping_tree_detail::block_traits::ref_counter(tm->get_sm())); - check_mapping_tree(mtree, mapping_rep); + + if (data_sm) { + data_ref_counter dcounter(*data_sm); + walk_mapping_tree(mtree, dcounter, mapping_rep); + } else + check_mapping_tree(mtree, mapping_rep); return mapping_rep.get_error(); } @@ -184,9 +207,10 @@ namespace { error_state examine_mapping_tree(transaction_manager::ptr tm, superblock_detail::superblock const &sb, - nested_output &out) { + nested_output &out, + optional data_sm) { error_state err = examine_devices_tree_(tm, sb, out); - err << examine_mapping_tree_(tm, sb, out); + err << examine_mapping_tree_(tm, sb, out, data_sm); return err; } @@ -222,6 +246,34 @@ namespace { return err; } + error_state compare_space_maps(space_map::ptr actual, space_map::ptr expected, + nested_output &out) + { + error_state err = NO_ERROR; + auto nr_blocks = actual->get_nr_blocks(); + + if (expected->get_nr_blocks() != nr_blocks) { + out << "internal error: nr blocks in space maps differ" + << end_message(); + err << FATAL; + } else { + for (block_address b = 0; b < nr_blocks; b++) { + auto a_count = actual->get_count(b); + auto e_count = actual->get_count(b); + + if (a_count != e_count) { + out << "data reference counts differ for block " << b + << ", expected " << e_count + << ", but got " << a_count + << end_message(); + err << (a_count > e_count ? NON_FATAL : FATAL); + } + } + } + + return err; + } + void print_info(transaction_manager::ptr tm, superblock_detail::superblock const &sb, nested_output &out) @@ -238,11 +290,11 @@ namespace { //-------------------------------- - class base_metadata_checker : public metadata_checker { + class metadata_checker { public: - base_metadata_checker(block_manager::ptr bm, - check_options check_opts, - output_options output_opts) + metadata_checker(block_manager::ptr bm, + check_options check_opts, + output_options output_opts) : bm_(bm), options_(check_opts), out_(cerr, 2), @@ -256,28 +308,44 @@ namespace { error_state check() { error_state err = NO_ERROR; + auto sb_location = superblock_detail::SUPERBLOCK_LOCATION; - err << examine_superblock(bm_, out_); - + if (options_.use_metadata_snap_) { + superblock_detail::superblock sb = read_superblock(bm_, sb_location); + sb_location = sb.metadata_snap_; + if (sb_location == superblock_detail::SUPERBLOCK_LOCATION) + throw runtime_error("No metadata snapshot found."); + } + + err << examine_superblock(bm_, sb_location, out_); if (err == FATAL) { if (check_for_xml(bm_)) out_ << "This looks like XML. thin_check only checks the binary metadata format." << end_message(); return err; } - superblock_detail::superblock sb = read_superblock(bm_); - transaction_manager::ptr tm = - open_tm(bm_, superblock_detail::SUPERBLOCK_LOCATION); + superblock_detail::superblock sb = read_superblock(bm_, sb_location); + transaction_manager::ptr tm = open_tm(bm_, sb_location); sb.data_mapping_root_ = mapping_root(sb, options_); print_info(tm, sb, info_out_); - err << examine_data_mappings(tm, sb, options_.check_data_mappings_, out_); + if (options_.sm_opts_ == check_options::SPACE_MAP_FULL) { + space_map::ptr data_sm{open_disk_sm(*tm, &sb.data_space_map_root_)}; + optional core_sm{create_core_map(data_sm->get_nr_blocks())}; + err << examine_data_mappings(tm, sb, options_.check_data_mappings_, out_, core_sm); - // if we're checking everything, and there were no errors, - // then we should check the space maps too. - if (err != FATAL) - err << examine_metadata_space_map(tm, sb, options_.check_metadata_space_map_, out_); + // if we're checking everything, and there were no errors, + // then we should check the space maps too. + if (err != FATAL) { + err << examine_metadata_space_map(tm, sb, options_.sm_opts_, out_); + + if (core_sm) + err << compare_space_maps(data_sm, *core_sm, out_); + } + } else + err << examine_data_mappings(tm, sb, options_.check_data_mappings_, out_, + optional()); return err; } @@ -287,7 +355,8 @@ namespace { examine_data_mappings(transaction_manager::ptr tm, superblock_detail::superblock const &sb, check_options::data_mapping_options option, - nested_output &out) { + nested_output &out, + optional data_sm) { error_state err = NO_ERROR; switch (option) { @@ -295,7 +364,7 @@ namespace { err << examine_top_level_mapping_tree(tm, sb, out); break; case check_options::DATA_MAPPING_LEVEL2: - err << examine_mapping_tree(tm, sb, out); + err << examine_mapping_tree(tm, sb, out, data_sm); break; default: break; // do nothing @@ -307,12 +376,12 @@ namespace { static error_state examine_metadata_space_map(transaction_manager::ptr tm, superblock_detail::superblock const &sb, - check_options::metadata_space_map_options option, + check_options::space_map_options option, nested_output &out) { error_state err = NO_ERROR; switch (option) { - case check_options::METADATA_SPACE_MAP_FULL: + case check_options::SPACE_MAP_FULL: err << check_space_map_counts(tm, sb, out); break; default: @@ -332,32 +401,37 @@ namespace { //---------------------------------------------------------------- check_options::check_options() - : check_data_mappings_(DATA_MAPPING_LEVEL2), - check_metadata_space_map_(METADATA_SPACE_MAP_FULL) { + : use_metadata_snap_(false), + check_data_mappings_(DATA_MAPPING_LEVEL2), + sm_opts_(SPACE_MAP_FULL) { } void check_options::set_superblock_only() { check_data_mappings_ = DATA_MAPPING_NONE; - check_metadata_space_map_ = METADATA_SPACE_MAP_NONE; + sm_opts_ = SPACE_MAP_NONE; } void check_options::set_skip_mappings() { check_data_mappings_ = DATA_MAPPING_LEVEL1; - check_metadata_space_map_ = METADATA_SPACE_MAP_NONE; + sm_opts_ = SPACE_MAP_NONE; } void check_options::set_override_mapping_root(block_address b) { override_mapping_root_ = b; } -metadata_checker::ptr -thin_provisioning::create_base_checker(block_manager::ptr bm, - check_options const &check_opts, - output_options output_opts) +void check_options::set_metadata_snap() { + use_metadata_snap_ = true; + sm_opts_ = SPACE_MAP_NONE; +} + +base::error_state +thin_provisioning::check_metadata(block_manager::ptr bm, + check_options const &check_opts, + output_options output_opts) { - metadata_checker::ptr checker; - checker = metadata_checker::ptr(new base_metadata_checker(bm, check_opts, output_opts)); - return checker; + metadata_checker checker(bm, check_opts, output_opts); + return checker.check(); } //---------------------------------------------------------------- diff --git a/thin-provisioning/metadata_checker.h b/thin-provisioning/metadata_checker.h index 7cf6683..1b94d7e 100644 --- a/thin-provisioning/metadata_checker.h +++ b/thin-provisioning/metadata_checker.h @@ -33,9 +33,9 @@ namespace thin_provisioning { DATA_MAPPING_LEVEL2, }; - enum metadata_space_map_options { - METADATA_SPACE_MAP_NONE, - METADATA_SPACE_MAP_FULL, + enum space_map_options { + SPACE_MAP_NONE, + SPACE_MAP_FULL, }; check_options(); @@ -43,9 +43,11 @@ namespace thin_provisioning { void set_superblock_only(); void set_skip_mappings(); void set_override_mapping_root(bcache::block_address b); + void set_metadata_snap(); + bool use_metadata_snap_; data_mapping_options check_data_mappings_; - metadata_space_map_options check_metadata_space_map_; + space_map_options sm_opts_; boost::optional override_mapping_root_; }; @@ -54,19 +56,10 @@ namespace thin_provisioning { OUTPUT_QUIET, }; - class metadata_checker { - public: - typedef std::shared_ptr ptr; - - virtual ~metadata_checker() {} - - virtual base::error_state check() = 0; - }; - - metadata_checker::ptr - create_base_checker(persistent_data::block_manager::ptr bm, - check_options const &check_opts, - output_options output_opts); + base::error_state + check_metadata(persistent_data::block_manager::ptr bm, + check_options const &check_opts, + output_options output_opts); } //---------------------------------------------------------------- diff --git a/thin-provisioning/superblock.cc b/thin-provisioning/superblock.cc index 7b1c493..3521f1d 100644 --- a/thin-provisioning/superblock.cc +++ b/thin-provisioning/superblock.cc @@ -194,11 +194,12 @@ namespace thin_provisioning { void check_superblock(block_manager::ptr bm, - superblock_detail::damage_visitor &visitor) { + superblock_detail::damage_visitor &visitor, + block_address sb_location) { using namespace superblock_detail; try { - bm->read_lock(SUPERBLOCK_LOCATION, superblock_validator()); + bm->read_lock(sb_location, superblock_validator()); } catch (std::exception const &e) { visitor.visit(superblock_corruption(e.what())); diff --git a/thin-provisioning/superblock.h b/thin-provisioning/superblock.h index 62bceb7..9704062 100644 --- a/thin-provisioning/superblock.h +++ b/thin-provisioning/superblock.h @@ -139,7 +139,8 @@ namespace thin_provisioning { superblock_detail::superblock const &sb); void check_superblock(persistent_data::block_manager::ptr bm, - superblock_detail::damage_visitor &visitor); + superblock_detail::damage_visitor &visitor, + persistent_data::block_address sb_location = superblock_detail::SUPERBLOCK_LOCATION); } //---------------------------------------------------------------- diff --git a/thin-provisioning/thin_check.cc b/thin-provisioning/thin_check.cc index 79ea171..05f5582 100644 --- a/thin-provisioning/thin_check.cc +++ b/thin-provisioning/thin_check.cc @@ -76,10 +76,10 @@ namespace { return 1; } - block_manager::ptr bm = open_bm(path); + block_manager::ptr bm = open_bm(path, block_manager::READ_ONLY, + !fs.check_opts.use_metadata_snap_); output_options output_opts = !fs.quiet ? OUTPUT_NORMAL : OUTPUT_QUIET; - metadata_checker::ptr checker = create_base_checker(bm, fs.check_opts, output_opts); - error_state err = checker->check(); + error_state err = check_metadata(bm, fs.check_opts, output_opts); if (fs.ignore_non_fatal_errors) success = (err == FATAL) ? false : true; @@ -110,15 +110,16 @@ thin_check_cmd::thin_check_cmd() void thin_check_cmd::usage(std::ostream &out) const { - out << "Usage: " << get_name() << " [options] {device|file}" << endl - << "Options:" << endl - << " {-q|--quiet}" << endl - << " {-h|--help}" << endl - << " {-V|--version}" << endl - << " {--override-mapping-root}" << endl - << " {--clear-needs-check-flag}" << endl - << " {--ignore-non-fatal-errors}" << endl - << " {--skip-mappings}" << endl + out << "Usage: " << get_name() << " [options] {device|file}\n" + << "Options:\n" + << " {-q|--quiet}\n" + << " {-h|--help}\n" + << " {-V|--version}\n" + << " {-m|--metadata-snap}\n" + << " {--override-mapping-root}\n" + << " {--clear-needs-check-flag}\n" + << " {--ignore-non-fatal-errors}\n" + << " {--skip-mappings}\n" << " {--super-block-only}" << endl; } @@ -128,11 +129,12 @@ thin_check_cmd::run(int argc, char **argv) int c; flags fs; - char const shortopts[] = "qhV"; + char const shortopts[] = "qhVm"; option const longopts[] = { { "quiet", no_argument, NULL, 'q'}, { "help", no_argument, NULL, 'h'}, { "version", no_argument, NULL, 'V'}, + { "metadata-snap", no_argument, NULL, 'm'}, { "super-block-only", no_argument, NULL, 1}, { "skip-mappings", no_argument, NULL, 2}, { "ignore-non-fatal-errors", no_argument, NULL, 3}, @@ -155,6 +157,10 @@ thin_check_cmd::run(int argc, char **argv) cout << THIN_PROVISIONING_TOOLS_VERSION << endl; return 0; + case 'm': + fs.check_opts.set_metadata_snap(); + break; + case 1: // super-block-only fs.check_opts.set_superblock_only(); @@ -186,6 +192,12 @@ thin_check_cmd::run(int argc, char **argv) } } + if (fs.clear_needs_check_flag_on_success && fs.check_opts.use_metadata_snap_) { + cerr << "--metadata-snap cannot be combined with --clear-needs-check-flag."; + usage(cerr); + exit(1); + } + if (argc == optind) { if (!fs.quiet) { cerr << "No input file provided." << endl; diff --git a/thin-provisioning/thin_generate_mappings.cc b/thin-provisioning/thin_generate_mappings.cc new file mode 100644 index 0000000..a366eba --- /dev/null +++ b/thin-provisioning/thin_generate_mappings.cc @@ -0,0 +1,207 @@ +// This file is part of the thin-provisioning-tools source. +// +// thin-provisioning-tools is free software: you can redistribute it +// and/or modify it under the terms of the GNU General Public License +// as published by the Free Software Foundation, either version 3 of +// the License, or (at your option) any later version. +// +// thin-provisioning-tools is distributed in the hope that it will be +// useful, but WITHOUT ANY WARRANTY; without even the implied warranty +// of MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +// GNU General Public License for more details. +// +// You should have received a copy of the GNU General Public License along +// with thin-provisioning-tools. If not, see +// . + +#include "base/io_generator.h" +#include "base/output_file_requirements.h" +#include "persistent-data/file_utils.h" +#include "thin-provisioning/commands.h" +#include "thin-provisioning/thin_pool.h" +#include "version.h" + +#include +#include +#include + +using namespace boost; +using namespace thin_provisioning; + +//---------------------------------------------------------------- + +namespace { + struct flags { + flags() + : pattern("write"), + offset(0) + { + } + + bool check_conformance(); + + boost::optional output; + base::io_pattern pattern; + boost::optional dev_id; + boost::optional block_size; + base::sector_t offset; + boost::optional size; + boost::optional io_size; + }; + + bool flags::check_conformance() { + if (!output) { + cerr << "No output file provided." << endl; + return false; + } + + if (!dev_id) { + cerr << "No device id provided." << endl; + return false; + } + + if (!size) { + cerr << "No device size specified" << endl; + return false; + } + + check_output_file_requirements(*output); + + return true; + } + + //-------------------------------- + + thin_pool::ptr open_pool(flags const &fs) { + block_manager::ptr bm = open_bm(*fs.output, block_manager::READ_WRITE); + return thin_pool::ptr(new thin_pool(bm)); + } + + int generate_mappings(flags const &fs) { + thin_pool::ptr pool = open_pool(fs); + + thin::ptr td = pool->open_thin(*fs.dev_id); + + io_generator_options opts; + opts.pattern_ = fs.pattern; + opts.block_size_ = !fs.block_size ? + pool->get_data_block_size() : + *fs.block_size; + opts.offset_ = fs.offset; + opts.size_ = *fs.size; + opts.io_size_ = !fs.io_size ? *fs.size : *fs.io_size; + io_generator::ptr gen = create_io_generator(opts); + + base::io io; + while (gen->has_next()) { + // TODO: support io.size_ + gen->next(io); + + switch (io.op_) { + case base::REQ_OP_READ: + process_read(td, pool, io.sector_); + break; + case base::REQ_OP_WRITE: + process_write(td, pool, io.sector_); + break; + case base::REQ_OP_DISCARD: + process_discard(td, pool, io.sector_); + break; + } + } + + pool->commit(); + + return 0; + } +} + +//---------------------------------------------------------------- + +thin_generate_mappings_cmd::thin_generate_mappings_cmd() + : command("thin_generate_mappings") +{ +} + +void +thin_generate_mappings_cmd::usage(std::ostream &out) const +{ + out << "Usage: " << get_name() << " [options]\n" + << "Options:\n" + << " {-h|--help}\n" + << " {-o|--output} \n" + << " {--dev-id} \n" + << " {--offset} \n" + << " {--io-size} \n" + << " {--rw write|trim|randwrite|randtrim|randtw}\n" + << " {--size} \n" + << " {-V|--version}" << endl; +} + +int +thin_generate_mappings_cmd::run(int argc, char **argv) +{ + int c; + struct flags fs; + const char *shortopts = "hi:o:qV"; + const struct option longopts[] = { + { "help", no_argument, NULL, 'h' }, + { "output", required_argument, NULL, 'o' }, + { "dev-id", required_argument, NULL, 1 }, + { "rw", required_argument, NULL, 2 }, + { "offset", required_argument, NULL, 3 }, + { "size", required_argument, NULL, 4 }, + { "io-size", required_argument, NULL, 5 }, + { "version", no_argument, NULL, 'V' }, + { NULL, no_argument, NULL, 0 } + }; + + while ((c = getopt_long(argc, argv, shortopts, longopts, NULL)) != -1) { + switch(c) { + case 'h': + usage(cout); + return 0; + + case 'o': + fs.output = optarg; + break; + + case 1: + fs.dev_id = parse_uint64(optarg, "dev_id"); + break; + + case 2: + fs.pattern.parse(optarg); + break; + + case 3: + fs.offset = parse_uint64(optarg, "offset"); + break; + + case 4: + fs.size = parse_uint64(optarg, "size"); + break; + + case 5: + fs.io_size = parse_uint64(optarg, "io_size"); + break; + + case 'V': + cout << THIN_PROVISIONING_TOOLS_VERSION << endl; + return 0; + + default: + usage(cerr); + return 1; + } + } + + if (!fs.check_conformance()) { + usage(cerr); + return 1; + } + + return generate_mappings(fs); +} + +//---------------------------------------------------------------- diff --git a/thin-provisioning/thin_generate_metadata.cc b/thin-provisioning/thin_generate_metadata.cc index f7127cc..e535273 100644 --- a/thin-provisioning/thin_generate_metadata.cc +++ b/thin-provisioning/thin_generate_metadata.cc @@ -19,7 +19,7 @@ #include "base/output_file_requirements.h" #include "persistent-data/file_utils.h" #include "thin-provisioning/commands.h" -#include "thin-provisioning/metadata.h" +#include "thin-provisioning/thin_pool.h" #include "version.h" #include @@ -27,8 +27,6 @@ #include using namespace boost; -using namespace persistent_data; -using namespace std; using namespace thin_provisioning; //---------------------------------------------------------------- @@ -40,6 +38,11 @@ namespace { METADATA_OP_FORMAT, METADATA_OP_OPEN, METADATA_OP_CREATE_THIN, + METADATA_OP_CREATE_SNAP, + METADATA_OP_DELETE_DEV, + METADATA_OP_SET_TRANSACTION_ID, + METADATA_OP_RESERVE_METADATA_SNAP, + METADATA_OP_RELEASE_METADATA_SNAP, METADATA_OP_LAST }; @@ -55,7 +58,9 @@ namespace { metadata_operations op; sector_t data_block_size; block_address nr_data_blocks; - optional dev_id; + optional dev_id; + optional origin; + optional trans_id; optional output; }; @@ -77,88 +82,63 @@ namespace { return false; } + if (op == METADATA_OP_CREATE_SNAP && (!dev_id || !origin)) { + cerr << "no device id provided." << endl; + return false; + } + + if (op == METADATA_OP_DELETE_DEV && !dev_id) { + cerr << "no device id provided." << endl; + return false; + } + + if (op == METADATA_OP_SET_TRANSACTION_ID && !trans_id) { + cerr << "no transaction id provided." << endl; + return false; + } + return true; } //-------------------------------- - single_mapping_tree::ptr new_mapping_tree(metadata::ptr md) { - return single_mapping_tree::ptr( - new single_mapping_tree(*md->tm_, - mapping_tree_detail::block_time_ref_counter(md->data_sm_))); - } - - bool is_device_exists(metadata::ptr md, uint64_t dev_id) { - uint64_t key[1] = {dev_id}; - - device_tree::maybe_value v1 = md->details_->lookup(key); - if (v1) - return true; - - dev_tree::maybe_value v2 = md->mappings_top_level_->lookup(key); - if (v2) - return true; - - return false; - } - - //-------------------------------- - - metadata::ptr format_metadata(block_manager::ptr bm, - sector_t data_block_size, - block_address nr_data_blocks) { - metadata::ptr md(new metadata(bm, - metadata::CREATE, - data_block_size, - nr_data_blocks)); - md->commit(); - return md; - } - - metadata::ptr open_metadata(block_manager::ptr bm) { - metadata::ptr md(new metadata(bm, true)); - return md; - } - - void create_thin(metadata::ptr md, uint64_t dev_id) { - uint64_t key[1] = {dev_id}; - - if (is_device_exists(md, dev_id)) - throw runtime_error("device already exists"); - - device_tree_detail::device_details details; - details.transaction_id_ = md->sb_.trans_id_; - details.creation_time_ = md->sb_.time_; - details.snapshotted_time_ = details.creation_time_; - md->details_->insert(key, details); - - single_mapping_tree::ptr subtree = new_mapping_tree(md); - md->mappings_top_level_->insert(key, subtree->get_root()); - md->mappings_->set_root(md->mappings_top_level_->get_root()); // FIXME: ugly - - md->commit(); - } - - metadata::ptr open_or_format_metadata(block_manager::ptr bm, flags const &fs) { + thin_pool::ptr open_or_create_pool(flags const &fs) { + block_manager::ptr bm = open_bm(*fs.output, block_manager::READ_WRITE); if (fs.op == flags::METADATA_OP_FORMAT) - return format_metadata(bm, fs.data_block_size, fs.nr_data_blocks); + return thin_pool::ptr(new thin_pool(bm, fs.data_block_size, fs.nr_data_blocks)); else - return open_metadata(bm); + return thin_pool::ptr(new thin_pool(bm)); } int generate_metadata(flags const &fs) { - block_manager::ptr bm = open_bm(*fs.output, block_manager::READ_WRITE); - metadata::ptr md = open_or_format_metadata(bm, fs); + thin_pool::ptr pool = open_or_create_pool(fs); switch (fs.op) { case flags::METADATA_OP_CREATE_THIN: - create_thin(md, *fs.dev_id); + pool->create_thin(*fs.dev_id); + break; + case flags::METADATA_OP_CREATE_SNAP: + pool->create_snap(*fs.dev_id, *fs.origin); + break; + case flags::METADATA_OP_DELETE_DEV: + pool->del(*fs.dev_id); + break; + case flags::METADATA_OP_SET_TRANSACTION_ID: + pool->set_transaction_id(*fs.trans_id); + break; + case flags::METADATA_OP_RESERVE_METADATA_SNAP: + pool->reserve_metadata_snap(); + break; + case flags::METADATA_OP_RELEASE_METADATA_SNAP: + pool->release_metadata_snap(); break; default: break; } + pool->commit(); + return 0; } } @@ -176,8 +156,16 @@ thin_generate_metadata_cmd::usage(std::ostream &out) const out << "Usage: " << get_name() << " [options]\n" << "Options:\n" << " {-h|--help}\n" - << " --data-block-size \n" - << " --nr-data-blocks \n" + << " {--format}\n" + << " {--create-thin} \n" + << " {--create-snap} \n" + << " {--delete} \n" + << " {--reserve-metadata-snap}\n" + << " {--release-metadata-snap}\n" + << " {--set-transaction-id} \n" + << " {--data-block-size} \n" + << " {--nr-data-blocks} \n" + << " {--origin} \n" << " {-o|--output} \n" << " {-V|--version}" << endl; } @@ -193,10 +181,15 @@ thin_generate_metadata_cmd::run(int argc, char **argv) { "output", required_argument, NULL, 'o' }, { "format", no_argument, NULL, 1 }, { "open", no_argument, NULL, 2 }, - { "create-thin", no_argument, NULL, 3 }, + { "create-thin", required_argument, NULL, 3 }, + { "create-snap", required_argument, NULL, 4 }, + { "delete", required_argument, NULL, 5 }, + { "set-transaction-id", required_argument, NULL, 6 }, + { "reserve-metadata-snap", no_argument, NULL, 7 }, + { "release-metadata-snap", no_argument, NULL, 8 }, { "data-block-size", required_argument, NULL, 101 }, { "nr-data-blocks", required_argument, NULL, 102 }, - { "dev-id", required_argument, NULL, 301 }, + { "origin", required_argument, NULL, 401 }, { "version", no_argument, NULL, 'V' }, { NULL, no_argument, NULL, 0 } }; @@ -221,6 +214,30 @@ thin_generate_metadata_cmd::run(int argc, char **argv) case 3: fs.op = flags::METADATA_OP_CREATE_THIN; + fs.dev_id = parse_uint64(optarg, "device id"); + break; + + case 4: + fs.op = flags::METADATA_OP_CREATE_SNAP; + fs.dev_id = parse_uint64(optarg, "device id"); + break; + + case 5: + fs.op = flags::METADATA_OP_DELETE_DEV; + fs.dev_id = parse_uint64(optarg, "device id"); + break; + + case 6: + fs.op = flags::METADATA_OP_SET_TRANSACTION_ID; + fs.trans_id = parse_uint64(optarg, "transaction id"); + break; + + case 7: + fs.op = flags::METADATA_OP_RESERVE_METADATA_SNAP; + break; + + case 8: + fs.op = flags::METADATA_OP_RELEASE_METADATA_SNAP; break; case 101: @@ -231,8 +248,8 @@ thin_generate_metadata_cmd::run(int argc, char **argv) fs.nr_data_blocks = parse_uint64(optarg, "nr data blocks"); break; - case 301: - fs.dev_id = parse_uint64(optarg, "dev id"); + case 401: + fs.origin = parse_uint64(optarg, "origin"); break; case 'V': diff --git a/thin-provisioning/thin_pool.cc b/thin-provisioning/thin_pool.cc index d133711..3bdc9e7 100644 --- a/thin-provisioning/thin_pool.cc +++ b/thin-provisioning/thin_pool.cc @@ -16,13 +16,10 @@ // with thin-provisioning-tools. If not, see // . +#include "base/math_utils.h" #include "thin-provisioning/thin_pool.h" #include -#include -#include -#include -#include using namespace base; using namespace std; @@ -31,9 +28,22 @@ using namespace thin_provisioning; //---------------------------------------------------------------- -thin::thin(thin_dev_t dev, thin_pool *pool) +thin::thin(thin_dev_t dev, thin_pool &pool) : dev_(dev), - pool_(pool) + pool_(pool), + details_(pool.get_transaction_id(), pool.get_time()), + open_count_(1), + changed_(true) +{ +} + +thin::thin(thin_dev_t dev, thin_pool &pool, + device_tree_detail::device_details const &details) + : dev_(dev), + pool_(pool), + details_(details), + open_count_(1), + changed_(false) { } @@ -47,66 +57,76 @@ thin::maybe_address thin::lookup(block_address thin_block) { uint64_t key[2] = {dev_, thin_block}; - return pool_->md_->mappings_->lookup(key); + mapping_tree::maybe_value m = pool_.md_->mappings_->lookup(key); + if (!m) + return thin::maybe_address(); + + lookup_result r; + r.block_ = m->block_; + r.shared_ = m->time_ < details_.snapshotted_time_; + return r; } bool thin::insert(block_address thin_block, block_address data_block) { uint64_t key[2] = {dev_, thin_block}; + + ++details_.mapped_blocks_; + changed_ = true; + mapping_tree_detail::block_time bt; bt.block_ = data_block; - bt.time_ = 0; // FIXME: use current time. - return pool_->md_->mappings_->insert(key, bt); + bt.time_ = pool_.get_time(); + return pool_.md_->mappings_->insert(key, bt); } void thin::remove(block_address thin_block) { uint64_t key[2] = {dev_, thin_block}; - pool_->md_->mappings_->remove(key); + pool_.md_->mappings_->remove(key); + + --details_.mapped_blocks_; + changed_ = true; } void thin::set_snapshot_time(uint32_t time) { - uint64_t key[1] = { dev_ }; - boost::optional mdetail = pool_->md_->details_->lookup(key); - if (!mdetail) - throw runtime_error("no such device"); - - mdetail->snapshotted_time_ = time; - pool_->md_->details_->insert(key, *mdetail); + details_.snapshotted_time_ = time; + changed_ = true; } block_address thin::get_mapped_blocks() const { - uint64_t key[1] = { dev_ }; - boost::optional mdetail = pool_->md_->details_->lookup(key); - if (!mdetail) - throw runtime_error("no such device"); - - return mdetail->mapped_blocks_; + return details_.mapped_blocks_; } void thin::set_mapped_blocks(block_address count) { - uint64_t key[1] = { dev_ }; - boost::optional mdetail = pool_->md_->details_->lookup(key); - if (!mdetail) - throw runtime_error("no such device"); - - mdetail->mapped_blocks_ = count; - pool_->md_->details_->insert(key, *mdetail); + details_.mapped_blocks_ = count; + changed_ = true; } //-------------------------------- -thin_pool::thin_pool(metadata::ptr md) - : md_(md) +thin_pool::thin_pool(block_manager::ptr bm) { + md_ = metadata::ptr(new metadata(bm, true)); +} + +thin_pool::thin_pool(block_manager::ptr bm, + sector_t data_block_size, + block_address nr_data_blocks) +{ + md_ = metadata::ptr(new metadata(bm, + metadata::CREATE, + data_block_size, + nr_data_blocks)); + md_->commit(); } thin_pool::~thin_pool() @@ -120,14 +140,15 @@ thin_pool::create_thin(thin_dev_t dev) uint64_t key[1] = {dev}; if (device_exists(dev)) - throw std::runtime_error("Device already exists"); + throw std::runtime_error("device already exists"); single_mapping_tree::ptr new_tree(new single_mapping_tree(*md_->tm_, mapping_tree_detail::block_time_ref_counter(md_->data_sm_))); md_->mappings_top_level_->insert(key, new_tree->get_root()); md_->mappings_->set_root(md_->mappings_top_level_->get_root()); // FIXME: ugly - // FIXME: doesn't set up the device details + thin::ptr r = create_device(dev); + close_device(r); } void @@ -136,31 +157,62 @@ thin_pool::create_snap(thin_dev_t dev, thin_dev_t origin) uint64_t snap_key[1] = {dev}; uint64_t origin_key[1] = {origin}; - boost::optional mtree_root = md_->mappings_top_level_->lookup(origin_key); + if (device_exists(dev)) + throw std::runtime_error("device already exists"); + + // find the mapping tree of the origin + dev_tree::maybe_value mtree_root = md_->mappings_top_level_->lookup(origin_key); if (!mtree_root) throw std::runtime_error("unknown origin"); - single_mapping_tree otree(*md_->tm_, *mtree_root, mapping_tree_detail::block_time_ref_counter(md_->data_sm_)); + // clone the origin single_mapping_tree::ptr clone(otree.clone()); md_->mappings_top_level_->insert(snap_key, clone->get_root()); md_->mappings_->set_root(md_->mappings_top_level_->get_root()); // FIXME: ugly md_->sb_.time_++; - thin::ptr o = open_thin(origin); - thin::ptr s = open_thin(dev); - o->set_snapshot_time(md_->sb_.time_); - s->set_snapshot_time(md_->sb_.time_); - s->set_mapped_blocks(o->get_mapped_blocks()); + // create details for the snapshot + thin::ptr s = create_device(dev); + set_snapshot_details(s, origin); + close_device(s); } void thin_pool::del(thin_dev_t dev) { uint64_t key[1] = {dev}; + + thin::ptr td = open_device(dev); + if (td->open_count_ > 1) { + close_device(td); + throw std::runtime_error("device busy"); + } + + thin_devices_.erase(dev); + + dev_tree::maybe_value mtree_root = md_->mappings_top_level_->lookup(key); + if (!device_exists(dev) || !mtree_root) + throw std::runtime_error("unknown device"); + + // TODO: trigger subtree deletion from the mtree_ref_counter, + // like the kenrel subtree_dec() does. + single_mapping_tree mtree(*md_->tm_, *mtree_root, + mapping_tree_detail::block_time_ref_counter(md_->data_sm_)); + mtree.destroy(); + + md_->details_->remove(key); md_->mappings_top_level_->remove(key); + md_->mappings_->set_root(md_->mappings_top_level_->get_root()); // FIXME: ugly +} + +void +thin_pool::commit() +{ + write_changed_details(); + md_->commit(); } void @@ -175,6 +227,52 @@ thin_pool::get_transaction_id() const return md_->sb_.trans_id_; } +void +thin_pool::reserve_metadata_snap() +{ + if (md_->sb_.metadata_snap_) + throw std::runtime_error("pool metadata snapshot already exists."); + + commit(); + + md_->metadata_sm_->inc(superblock_detail::SUPERBLOCK_LOCATION); + transaction_manager::write_ref wr = md_->tm_->shadow( + superblock_detail::SUPERBLOCK_LOCATION, + superblock_validator()).first; + + superblock_detail::superblock sb; + superblock_detail::superblock_disk *sbd = reinterpret_cast(wr.data()); + superblock_detail::superblock_traits::unpack(*sbd, sb); + + memset(sb.data_space_map_root_, 0, superblock_detail::SPACE_MAP_ROOT_SIZE); + memset(sb.metadata_space_map_root_, 0, superblock_detail::SPACE_MAP_ROOT_SIZE); + md_->metadata_sm_->inc(sb.data_mapping_root_); + md_->metadata_sm_->inc(sb.device_details_root_); + + superblock_detail::superblock_traits::pack(sb, *sbd); + + md_->sb_.metadata_snap_ = wr.get_location(); +} + +void +thin_pool::release_metadata_snap() +{ + if (!md_->sb_.metadata_snap_) + throw std::runtime_error("No pool metadata snapshot found"); + + superblock_detail::superblock sb = read_superblock(md_->tm_->get_bm(), + md_->sb_.metadata_snap_); + device_tree dtree(*md_->tm_, sb.device_details_root_, + device_tree_detail::device_details_traits::ref_counter()); + dtree.destroy(); + mapping_tree mtree(*md_->tm_, sb.data_mapping_root_, + mapping_tree_detail::block_traits::ref_counter(md_->tm_->get_sm())); + mtree.destroy(); + md_->metadata_sm_->dec(md_->sb_.metadata_snap_); + + md_->sb_.metadata_snap_ = 0; +} + block_address thin_pool::get_metadata_snap() const { @@ -184,7 +282,7 @@ thin_pool::get_metadata_snap() const block_address thin_pool::alloc_data_block() { - boost::optional mb = md_->data_sm_->new_block(); + space_map::maybe_block mb = md_->data_sm_->new_block(); if (!mb) throw runtime_error("couldn't allocate new block"); @@ -203,7 +301,7 @@ thin_pool::get_nr_free_data_blocks() const return md_->data_sm_->get_nr_free(); } -thin_provisioning::sector_t +sector_t thin_pool::get_data_block_size() const { return md_->sb_.data_block_size_; @@ -215,17 +313,22 @@ thin_pool::get_data_dev_size() const return md_->data_sm_->get_nr_blocks(); } +uint32_t +thin_pool::get_time() const +{ + return md_->sb_.time_; +} + thin::ptr thin_pool::open_thin(thin_dev_t dev) { - uint64_t key[1] = {dev}; - boost::optional mdetails = md_->details_->lookup(key); - if (!mdetails) - throw runtime_error("no such device"); + return open_device(dev); +} - thin *ptr = new thin(dev, this); - thin::ptr r(ptr); - return r; +void +thin_pool::close_thin(thin::ptr td) +{ + close_device(td); } bool @@ -235,4 +338,107 @@ thin_pool::device_exists(thin_dev_t dev) const return !!md_->details_->lookup(key); } +thin::ptr +thin_pool::create_device(thin_dev_t dev) +{ + device_map::iterator it = thin_devices_.find(dev); + if (it != thin_devices_.end()) + throw std::runtime_error("device already exists"); + + thin::ptr td(new thin(dev, *this)); + thin_devices_[dev] = td; + return td; +} + +thin::ptr +thin_pool::open_device(thin_dev_t dev) +{ + device_map::iterator it = thin_devices_.find(dev); + if (it != thin_devices_.end()) { + thin::ptr td = it->second; + td->open_count_++; + return td; + } + + uint64_t key[1] = {dev}; + device_tree::maybe_value details = md_->details_->lookup(key); + if (!details) + throw std::runtime_error("no such device"); + + thin::ptr td(new thin(dev, *this, *details)); + thin_devices_[dev] = td; + return td; +} + +void +thin_pool::close_device(thin::ptr td) +{ + td->open_count_--; +} + +void +thin_pool::set_snapshot_details(thin::ptr snap, thin_dev_t origin) +{ + thin::ptr o = open_device(origin); + o->set_snapshot_time(md_->sb_.time_); + snap->set_snapshot_time(md_->sb_.time_); + snap->set_mapped_blocks(o->get_mapped_blocks()); + close_device(o); +} + +void +thin_pool::write_changed_details() +{ + for (auto it = thin_devices_.cbegin(); it != thin_devices_.cend(); ) { + uint64_t key[1] = {it->first}; + thin::ptr td = it->second; + + if (td->changed_) { + md_->details_->insert(key, td->details_); + td->changed_ = false; + } + + if (!td->open_count_) + it = thin_devices_.erase(it); + else + ++it; + } +} + +//---------------------------------------------------------------- + +void +thin_provisioning::process_read(thin::ptr td, thin_pool::ptr tp, + sector_t offset) +{ + block_address blocknr = base::div_up(offset, tp->get_data_block_size()); + td->lookup(blocknr); +} + +void +thin_provisioning::process_write(thin::ptr td, thin_pool::ptr tp, + sector_t offset) +{ + block_address blocknr = base::div_up(offset, tp->get_data_block_size()); + thin::maybe_address result = td->lookup(blocknr); + if (!!result && !result->shared_) + return; + // TODO: handle out-of-space errors + block_address data_block = tp->alloc_data_block(); + td->insert(blocknr, data_block); +} + +void +thin_provisioning::process_discard(thin::ptr td, thin_pool::ptr tp, + sector_t offset) +{ + block_address blocknr = base::div_up(offset, tp->get_data_block_size()); + thin::maybe_address result = td->lookup(blocknr); + if (!result) + return; + td->remove(blocknr); + if (!result->shared_) + tp->free_data_block(result->block_); +} + //---------------------------------------------------------------- diff --git a/thin-provisioning/thin_pool.h b/thin-provisioning/thin_pool.h index fe4248b..62f6945 100644 --- a/thin-provisioning/thin_pool.h +++ b/thin-provisioning/thin_pool.h @@ -33,8 +33,13 @@ namespace thin_provisioning { class thin_pool; class thin { public: + struct lookup_result { + block_address block_; + bool shared_; + }; + typedef std::shared_ptr ptr; - typedef boost::optional maybe_address; + typedef boost::optional maybe_address; thin_dev_t get_dev_t() const; maybe_address lookup(block_address thin_block); @@ -48,26 +53,38 @@ namespace thin_provisioning { private: friend class thin_pool; - thin(thin_dev_t dev, thin_pool *pool); // FIXME: pass a reference rather than a ptr + thin(thin_dev_t dev, thin_pool &pool); + thin(thin_dev_t dev, thin_pool &pool, + device_tree_detail::device_details const &details); thin_dev_t dev_; - thin_pool *pool_; + thin_pool &pool_; + device_tree_detail::device_details details_; + uint32_t open_count_; + bool changed_; }; class thin_pool { public: typedef std::shared_ptr ptr; - thin_pool(metadata::ptr md); + thin_pool(block_manager::ptr bm); + thin_pool(block_manager::ptr bm, + sector_t data_block_size, + block_address nr_data_blocks); ~thin_pool(); void create_thin(thin_dev_t dev); void create_snap(thin_dev_t dev, thin_dev_t origin); void del(thin_dev_t); + void commit(); void set_transaction_id(uint64_t id); uint64_t get_transaction_id() const; + // handling metadata snapshot + void reserve_metadata_snap(); + void release_metadata_snap(); block_address get_metadata_snap() const; block_address alloc_data_block(); @@ -77,15 +94,29 @@ namespace thin_provisioning { block_address get_nr_free_data_blocks() const; sector_t get_data_block_size() const; block_address get_data_dev_size() const; + uint32_t get_time() const; thin::ptr open_thin(thin_dev_t); + void close_thin(thin::ptr td); private: friend class thin; + typedef std::map device_map; + bool device_exists(thin_dev_t dev) const; + thin::ptr create_device(thin_dev_t dev); + thin::ptr open_device(thin_dev_t dev); + void close_device(thin::ptr device); + void set_snapshot_details(thin::ptr snap, thin_dev_t origin); + void write_changed_details(); metadata::ptr md_; + device_map thin_devices_; }; + + void process_read(thin::ptr td, thin_pool::ptr tp, sector_t offset); + void process_write(thin::ptr td, thin_pool::ptr tp, sector_t offset); + void process_discard(thin::ptr td, thin_pool::ptr tp, sector_t offset); }; //----------------------------------------------------------------