diff --git a/.gitignore b/.gitignore index 0db515f..4544b4f 100644 --- a/.gitignore +++ b/.gitignore @@ -1,5 +1,6 @@ *~ *.o +*.so *.a *.gmo *_t @@ -11,6 +12,23 @@ cachegrind.* core bin/pdata_tools +thin_check +thin_dump +thin_restore +thin_repair +thin_rmap +thin_metadata_size +thin_show_blocks + +cache_check +cache_dump +cache_restore +cache_repair +cache_metadata_size + +era_check +era_dump +era_invalidate *.metadata bad-metadata @@ -27,6 +45,8 @@ autom4te.cache/ *.xml *.bin *.patch +*.orig +*.rej version.h config.cache @@ -34,4 +54,4 @@ config.log config.status configure -callgrind.* \ No newline at end of file +callgrind.* diff --git a/Makefile.in b/Makefile.in index 4264191..bbbc877 100644 --- a/Makefile.in +++ b/Makefile.in @@ -34,13 +34,18 @@ SOURCE=\ base/error_string.cc \ base/grid_layout.cc \ base/progress_monitor.cc \ + base/rolling_hash.cc \ base/xml_utils.cc \ block-cache/block_cache.cc \ + block-cache/copier.cc \ + block-cache/io_engine.cc \ + block-cache/mem_pool.cc \ caching/cache_check.cc \ caching/cache_dump.cc \ caching/cache_metadata_size.cc \ caching/cache_repair.cc \ caching/cache_restore.cc \ + caching/cache_writeback.cc \ caching/commands.cc \ caching/hint_array.cc \ caching/mapping_array.cc \ @@ -67,6 +72,7 @@ SOURCE=\ persistent-data/data-structures/bitset.cc \ persistent-data/data-structures/bloom_filter.cc \ persistent-data/data-structures/btree.cc \ + persistent-data/data-structures/btree_node_checker.cc \ persistent-data/error_set.cc \ persistent-data/file_utils.cc \ persistent-data/hex_dump.cc \ @@ -77,12 +83,17 @@ SOURCE=\ persistent-data/transaction_manager.cc \ persistent-data/validators.cc \ thin-provisioning/commands.cc \ + thin-provisioning/cache_stream.cc \ + thin-provisioning/chunk_stream.cc \ thin-provisioning/device_tree.cc \ + thin-provisioning/fixed_chunk_stream.cc \ thin-provisioning/human_readable_format.cc \ thin-provisioning/mapping_tree.cc \ thin-provisioning/metadata.cc \ thin-provisioning/metadata_checker.cc \ + thin-provisioning/metadata_counter.cc \ thin-provisioning/metadata_dumper.cc \ + thin-provisioning/pool_stream.cc \ thin-provisioning/restore_emitter.cc \ thin-provisioning/rmap_visitor.cc \ thin-provisioning/superblock.cc \ @@ -98,24 +109,57 @@ SOURCE=\ thin-provisioning/thin_trim.cc \ thin-provisioning/xml_format.cc +DEVTOOLS_SOURCE=\ + thin-provisioning/thin_ll_dump.cc \ + thin-provisioning/thin_ll_restore.cc \ + thin-provisioning/thin_show_duplicates.cc \ + thin-provisioning/thin_generate_metadata.cc \ + thin-provisioning/variable_chunk_stream.cc \ + thin-provisioning/thin_show_metadata.cc \ + thin-provisioning/thin_scan.cc \ + ui/ui.cc + +ifeq ("@DEVTOOLS@", "yes") +SOURCE+=$(DEVTOOLS_SOURCE) +endif + +ifeq ("@STATIC@", "yes") +SOURCE += thin-provisioning/static_library_emitter.cc +else +SOURCE += thin-provisioning/shared_library_emitter.cc +endif + CC:=@CC@ CXX:=@CXX@ STRIP:=@STRIP@ OBJECTS:=$(subst .cc,.o,$(SOURCE)) + +# FIXME EMITTERS += $(PLUGIN_LIBS) doesn't work, probably because it's empty at +# the time of use? +ifeq ("@STATIC@", "yes") +EMITTERS += contrib/*.a +endif + TOP_DIR:=@top_srcdir@ TOP_BUILDDIR:=@top_builddir@ -CFLAGS?=@CFLAGS@ -CFLAGS+=-Wall +CFLAGS+=-g -Wall -O3 -fPIC CFLAGS+=@LFS_FLAGS@ -CXXFLAGS?=@CXXFLAGS@ -CXXFLAGS+=-Wall -fno-strict-aliasing -std=gnu++98 +CXXFLAGS+=-g -Wall -fPIC -fno-strict-aliasing -std=c++11 + +ifeq ("@DEVTOOLS@", "yes") +CXXFLAGS+=-DDEV_TOOLS +endif + CXXFLAGS+=@CXXOPTIMISE_FLAG@ CXXFLAGS+=@CXXDEBUG_FLAG@ CXXFLAGS+=@CXX_STRERROR_FLAG@ CXXFLAGS+=@LFS_FLAGS@ -CPPFLAGS?=@CPPFLAGS@ -CPPFLAGS+=-I$(TOP_BUILDDIR) -I$(TOP_DIR) -I$(TOP_DIR)/thin-provisioning -LIBS:=-laio -lexpat +INCLUDES+=-I$(TOP_BUILDDIR) -I$(TOP_DIR) -I$(TOP_DIR)/thin-provisioning +LIBS:=-laio -lexpat -ldl + +ifeq ("@DEVTOOLS@", "yes") +LIBS+=-lncurses +endif ifeq ("@STATIC_CXX@", "yes") CXXLIB+=-Wl,-Bstatic -lstdc++ -Wl,-Bdynamic -Wl,--as-needed @@ -141,8 +185,8 @@ INSTALL_DATA = $(INSTALL) -p -m 644 ifeq ("@TESTING@", "yes") TEST_INCLUDES=\ - -Igmock-1.6.0/include \ - -Igmock-1.6.0/gtest/include + -Igoogletest/googlemock/include \ + -Igoogletest/googletest/include else TEST_INCLUDES= endif @@ -151,19 +195,19 @@ endif %.o: %.cc @echo " [CXX] $<" - $(V) $(CXX) -c $(CPPFLAGS) $(CXXFLAGS) -o $@ $< + $(V) $(CXX) -c $(INCLUDES) $(CXXFLAGS) -o $@ $< @echo " [DEP] $<" - $(V) $(CXX) -MM -MT $(subst .cc,.o,$<) $(CPPFLAGS) $(TEST_INCLUDES) $(CXXFLAGS) $< > $*.$$$$; \ + $(V) $(CXX) -MM -MT $(subst .cc,.o,$<) $(INCLUDES) $(TEST_INCLUDES) $(CXXFLAGS) $< > $*.$$$$; \ sed 's,\([^ :]*\)\.o[ :]*,\1.o \1.gmo $* : Makefile ,g' < $*.$$$$ > $*.d; \ $(RM) $*.$$$$ #---------------------------------------------------------------- -lib/libpdata.a: $(OBJECTS) +lib/libpdata.a: $(OBJECTS) $(EMITTERS) @echo " [AR] $<" - $(V)ar -rv $@ $(OBJECTS) > /dev/null 2>&1 + $(V)ar -rv $@ $(OBJECTS) $(EMITTERS) > /dev/null 2>&1 -bin/pdata_tools: $(OBJECTS) +bin/pdata_tools: $(OBJECTS) $(EMITTERS) @echo " [LD] $@" $(V) $(CXX) $(CXXFLAGS) $(LDFLAGS) -o $@ $+ $(LIBS) $(CXXLIB) @@ -195,6 +239,7 @@ install: bin/pdata_tools ln -s -f pdata_tools $(BINDIR)/cache_metadata_size ln -s -f pdata_tools $(BINDIR)/cache_repair ln -s -f pdata_tools $(BINDIR)/cache_restore + ln -s -f pdata_tools $(BINDIR)/cache_writeback ln -s -f pdata_tools $(BINDIR)/thin_check ln -s -f pdata_tools $(BINDIR)/thin_delta ln -s -f pdata_tools $(BINDIR)/thin_dump @@ -202,6 +247,7 @@ install: bin/pdata_tools ln -s -f pdata_tools $(BINDIR)/thin_repair ln -s -f pdata_tools $(BINDIR)/thin_restore ln -s -f pdata_tools $(BINDIR)/thin_rmap + ln -s -f pdata_tools $(BINDIR)/thin_show_duplicates ln -s -f pdata_tools $(BINDIR)/thin_trim ln -s -f pdata_tools $(BINDIR)/thin_metadata_size ln -s -f pdata_tools $(BINDIR)/era_check @@ -213,6 +259,7 @@ install: bin/pdata_tools $(INSTALL_DATA) man8/cache_dump.8 $(MANPATH)/man8 $(INSTALL_DATA) man8/cache_repair.8 $(MANPATH)/man8 $(INSTALL_DATA) man8/cache_restore.8 $(MANPATH)/man8 + $(INSTALL_DATA) man8/cache_writeback.8 $(MANPATH)/man8 $(INSTALL_DATA) man8/thin_check.8 $(MANPATH)/man8 $(INSTALL_DATA) man8/thin_delta.8 $(MANPATH)/man8 $(INSTALL_DATA) man8/thin_dump.8 $(MANPATH)/man8 @@ -225,21 +272,28 @@ install: bin/pdata_tools $(INSTALL_DATA) man8/era_check.8 $(MANPATH)/man8 $(INSTALL_DATA) man8/era_dump.8 $(MANPATH)/man8 $(INSTALL_DATA) man8/era_invalidate.8 $(MANPATH)/man8 +ifeq ("@DEVTOOLS@", "yes") + ln -s -f pdata_tools $(BINDIR)/thin_ll_dump + ln -s -f pdata_tools $(BINDIR)/thin_show_duplicates + ln -s -f pdata_tools $(BINDIR)/thin_generate_metadata + ln -s -f pdata_tools $(BINDIR)/thin_scan +endif # $(INSTALL_DATA) man8/era_restore.8 $(MANPATH)/man8 .PHONY: install +include contrib/Makefile + ifeq ("@TESTING@", "yes") include unit-tests/Makefile -.PHONY: features test check +.PHONEY: features -features: bin/pdata_tools +features: pdata_tools cucumber --no-color --format progress test: features unit-test -check: unit-test endif -include $(DEPEND_FILES) diff --git a/VERSION b/VERSION index 844f6a9..04a2c78 100644 --- a/VERSION +++ b/VERSION @@ -1 +1 @@ -0.6.3 +0.7.0-rc1 diff --git a/base/container_of.h b/base/container_of.h new file mode 100644 index 0000000..1d70f31 --- /dev/null +++ b/base/container_of.h @@ -0,0 +1,24 @@ +#ifndef BASE_CONTAINER_OF_H +#define BASE_CONTAINER_OF_H + +#include + +//---------------------------------------------------------------- + +namespace base { + template + size_t offsetof__(const M P::*member) + { + return (size_t) &( reinterpret_cast(0)->*member); + } + + template + P *container_of(M *ptr, M const P::*member) + { + return (P *)((char *)(ptr) - offsetof__(member)); + } +} + +//---------------------------------------------------------------- + +#endif diff --git a/base/progress_monitor.cc b/base/progress_monitor.cc index 1d88302..2cd3901 100644 --- a/base/progress_monitor.cc +++ b/base/progress_monitor.cc @@ -31,17 +31,22 @@ namespace { if (nr_equals < progress_width_) cout << '>'; + else + cout << "="; for (unsigned i = 0; i < nr_spaces; i++) cout << ' '; - cout << "] " << spinner_char() << " " << p << "%\r" << flush; + cout << "] " << spinner_char(p) << " " << p << "%\r" << flush; spinner_++; } private: - char spinner_char() const { + char spinner_char(unsigned p) const { + if (p == 100) + return ' '; + char cs[] = {'|', '/', '-', '\\'}; unsigned index = spinner_ % sizeof(cs); @@ -63,16 +68,16 @@ namespace { //---------------------------------------------------------------- -std::auto_ptr +std::unique_ptr base::create_progress_bar(std::string const &title) { - return auto_ptr(new progress_bar(title)); + return unique_ptr(new progress_bar(title)); } -std::auto_ptr +std::unique_ptr base::create_quiet_progress_monitor() { - return auto_ptr(new quiet_progress()); + return unique_ptr(new quiet_progress()); } //---------------------------------------------------------------- diff --git a/base/progress_monitor.h b/base/progress_monitor.h index 5472343..b3152d1 100644 --- a/base/progress_monitor.h +++ b/base/progress_monitor.h @@ -15,8 +15,8 @@ namespace base { virtual void update_percent(unsigned) = 0; }; - std::auto_ptr create_progress_bar(std::string const &title); - std::auto_ptr create_quiet_progress_monitor(); + std::unique_ptr create_progress_bar(std::string const &title); + std::unique_ptr create_quiet_progress_monitor(); } //---------------------------------------------------------------- diff --git a/base/rolling_hash.cc b/base/rolling_hash.cc new file mode 100644 index 0000000..d2d273a --- /dev/null +++ b/base/rolling_hash.cc @@ -0,0 +1,57 @@ +#include "base/rolling_hash.h" + +using namespace base; +using namespace boost; +using namespace hash_detail; +using namespace std; + +//---------------------------------------------------------------- + +rolling_hash::rolling_hash(unsigned window_size) + : a_(MULTIPLIER), + a_to_k_minus_1_(a_), + window_size_(window_size), + buffer_(window_size) { + + for (unsigned i = 1; i < window_size_ - 1; i++) + a_to_k_minus_1_ *= a_; + + reset(); +} + +void +rolling_hash::reset() +{ + // prime with zeroes + buffer_.clear(); + + hash_ = 0; + for (unsigned i = 0; i < window_size_; i++) { + hash_ = (hash_ * a_) + SEED; + buffer_.push_back(0); + } +} + +//-------------------------------- + +content_based_hash::content_based_hash(unsigned window_size) + : rhash_(window_size), + + // FIXME: hard coded values + backup_div_((window_size / 4) - 1), + div_((window_size / 2) - 1), + min_len_(window_size / 4), + max_len_(window_size), + len_(0) +{ +} + +void +content_based_hash::reset() +{ + len_ = 0; + backup_break_.reset(); + rhash_.reset(); +} + +//---------------------------------------------------------------- diff --git a/base/rolling_hash.h b/base/rolling_hash.h new file mode 100644 index 0000000..dff3145 --- /dev/null +++ b/base/rolling_hash.h @@ -0,0 +1,109 @@ +#ifndef BASE_ROLLING_HASH_H +#define BASE_ROLLING_HASH_H + +#include +#include +#include + +//---------------------------------------------------------------- + +namespace base { + namespace hash_detail { + uint32_t const MULTIPLIER = 4294967291UL; + uint32_t const SEED = 123; + } + + class rolling_hash { + public: + rolling_hash(unsigned window_size); + + void reset(); + + // Returns the current hash + uint32_t step(uint8_t byte) { + update_hash(byte); + return hash_; + } + + uint32_t get_hash() const { + return hash_; + } + + private: + void update_hash(uint8_t byte) { + hash_ -= a_to_k_minus_1_ * (buffer_.front() + hash_detail::SEED); + buffer_.push_back(byte); + hash_ = (hash_ * a_) + byte + hash_detail::SEED; + } + + uint32_t a_; + uint32_t a_to_k_minus_1_; + + uint32_t hash_; + uint32_t window_size_; + + boost::circular_buffer buffer_; + }; + + class content_based_hash { + public: + content_based_hash(unsigned window_size); + void reset(); + + // Returns a break point relative to the last reset/break. + boost::optional step(uint8_t byte) { + boost::optional r; + + rhash_.step(byte); + len_++; + + if (len_ < min_len_) + return r; + + if (hit_break(backup_div_)) + backup_break_ = len_; + + if (hit_break(div_)) { + // found a break + r = len_; + len_ = 0; + backup_break_.reset(); + + } else if (len_ >= max_len_) { + // too big, is there a backup? + if (backup_break_) { + len_ -= *backup_break_; + r = backup_break_; + backup_break_.reset(); + + } else { + r = len_; + len_ = 0; + } + } + + return r; + } + + private: + bool hit_break(uint32_t mask) const { + uint32_t h = rhash_.get_hash() >> 8; + return !(h & mask); + } + + rolling_hash rhash_; + + uint32_t backup_div_; + uint32_t div_; + + unsigned min_len_; + unsigned max_len_; + + unsigned len_; + boost::optional backup_break_; + }; +} + +//---------------------------------------------------------------- + +#endif diff --git a/base/unique_handle.h b/base/unique_handle.h new file mode 100644 index 0000000..9c44e1b --- /dev/null +++ b/base/unique_handle.h @@ -0,0 +1,67 @@ +#ifndef BASE_UNIQUE_HANDLE_H +#define BASE_UNIQUE_HANDLE_H + +#include +#include +#include + +//---------------------------------------------------------------- + +namespace base { + template + class unique_handle + { + public: + unique_handle(std::nullptr_t = nullptr) + : id_(TNul) { + } + + unique_handle(T x) + : id_(x) { + } + + explicit operator bool() const { + return id_ != TNul; + } + + operator T&() { + return id_; + } + + operator T() const { + return id_; + } + + T *operator&() { + return &id_; + } + + const T *operator&() const { + return &id_; + } + + friend bool operator == (unique_handle a, unique_handle b) { return a.id_ == b.id_; } + friend bool operator != (unique_handle a, unique_handle b) { return a.id_ != b.id_; } + friend bool operator == (unique_handle a, std::nullptr_t) { return a.id_ == TNul; } + friend bool operator != (unique_handle a, std::nullptr_t) { return a.id_ != TNul; } + friend bool operator == (std::nullptr_t, unique_handle b) { return TNul == b.id_; } + friend bool operator != (std::nullptr_t, unique_handle b) { return TNul != b.id_; } + + private: + T id_; + }; + + //-------------------------------- + + struct fd_deleter { + typedef unique_handle pointer; + void operator()(pointer p) { + ::close(p); + } + }; + typedef std::unique_ptr unique_fd; +} + +//---------------------------------------------------------------- + +#endif diff --git a/base/xml_utils.cc b/base/xml_utils.cc index fb34153..8dd897c 100644 --- a/base/xml_utils.cc +++ b/base/xml_utils.cc @@ -14,18 +14,21 @@ xml_parser::parse(std::string const &backup_file, bool quiet) persistent_data::check_file_exists(backup_file); ifstream in(backup_file.c_str(), ifstream::in); - std::auto_ptr monitor = create_monitor(quiet); + std::unique_ptr monitor = create_monitor(quiet); size_t total = 0; size_t input_length = get_file_length(backup_file); - while (!in.eof()) { + XML_Error error_code = XML_ERROR_NONE; + while (!in.eof() && error_code == XML_ERROR_NONE) { char buffer[4096]; in.read(buffer, sizeof(buffer)); size_t len = in.gcount(); int done = in.eof(); - if (!XML_Parse(parser_, buffer, len, done)) { + // Do not throw while normally aborted by element handlers + if (!XML_Parse(parser_, buffer, len, done) && + (error_code = XML_GetErrorCode(parser_)) != XML_ERROR_ABORTED) { ostringstream out; out << "Parse error at line " << XML_GetCurrentLineNumber(parser_) @@ -53,7 +56,7 @@ xml_parser::get_file_length(string const &file) const return info.st_size; } -auto_ptr +unique_ptr xml_parser::create_monitor(bool quiet) { if (!quiet && isatty(fileno(stdout))) diff --git a/base/xml_utils.h b/base/xml_utils.h index f867f56..fbfdd2c 100644 --- a/base/xml_utils.h +++ b/base/xml_utils.h @@ -37,7 +37,7 @@ namespace xml_utils { private: size_t get_file_length(string const &file) const; - auto_ptr create_monitor(bool quiet); + unique_ptr create_monitor(bool quiet); XML_Parser parser_; }; diff --git a/bin/thin_generate_metadata b/bin/thin_generate_metadata new file mode 120000 index 0000000..84c01e7 --- /dev/null +++ b/bin/thin_generate_metadata @@ -0,0 +1 @@ +pdata_tools \ No newline at end of file diff --git a/bin/thin_ll_dump b/bin/thin_ll_dump new file mode 120000 index 0000000..84c01e7 --- /dev/null +++ b/bin/thin_ll_dump @@ -0,0 +1 @@ +pdata_tools \ No newline at end of file diff --git a/bin/thin_ll_restore b/bin/thin_ll_restore new file mode 120000 index 0000000..84c01e7 --- /dev/null +++ b/bin/thin_ll_restore @@ -0,0 +1 @@ +pdata_tools \ No newline at end of file diff --git a/bin/thin_scan b/bin/thin_scan new file mode 120000 index 0000000..84c01e7 --- /dev/null +++ b/bin/thin_scan @@ -0,0 +1 @@ +pdata_tools \ No newline at end of file diff --git a/bin/thin_show_duplicates b/bin/thin_show_duplicates new file mode 120000 index 0000000..84c01e7 --- /dev/null +++ b/bin/thin_show_duplicates @@ -0,0 +1 @@ +pdata_tools \ No newline at end of file diff --git a/bin/thin_show_metadata b/bin/thin_show_metadata new file mode 120000 index 0000000..84c01e7 --- /dev/null +++ b/bin/thin_show_metadata @@ -0,0 +1 @@ +pdata_tools \ No newline at end of file diff --git a/block-cache/block_cache.cc b/block-cache/block_cache.cc index 3699d4b..f276625 100644 --- a/block-cache/block_cache.cc +++ b/block-cache/block_cache.cc @@ -1,5 +1,6 @@ #include "block-cache/block_cache.h" +#include #include #include #include @@ -44,34 +45,19 @@ namespace { int block_cache::init_free_list(unsigned count) { - size_t len; - block *blocks; size_t block_size = block_size_ << SECTOR_SHIFT; - void *data; - unsigned i; - - /* Allocate the block structures */ - len = sizeof(block) * count; - blocks = static_cast(malloc(len)); - if (!blocks) - return -ENOMEM; - - blocks_memory_ = blocks; + unsigned char *data = static_cast(alloc_aligned(count * block_size, PAGE_SIZE)); /* Allocate the data for each block. We page align the data. */ - data = alloc_aligned(count * block_size, PAGE_SIZE); - if (!data) { - free(blocks); + if (!data) return -ENOMEM; - } blocks_data_ = data; - for (i = 0; i < count; i++) { - block *b = new (blocks + i) block(); - b->data_ = static_cast(data) + block_size * i; - - list_add(&b->list_, &free_); + for (unsigned i = 0; i < count; i++) { + block &b = (*blocks_memory_)[i]; + b.data_ = data + (block_size * i); + free_.push_front(b); } return 0; @@ -82,28 +68,18 @@ block_cache::exit_free_list() { if (blocks_data_) free(blocks_data_); - - if (blocks_memory_) { - struct block *blocks = static_cast(blocks_memory_); - for (unsigned i = 0; i < nr_cache_blocks_; i++) - (blocks + i)->~block(); - - free(blocks_memory_); - } } block_cache::block * block_cache::__alloc_block() { - block *b; - - if (list_empty(&free_)) + if (free_.empty()) return NULL; - b = list_first_entry(&free_, block, list_); - list_del(&b->list_); + block &b = free_.front(); + b.unlink(); - return b; + return &b; } /*---------------------------------------------------------------- @@ -131,15 +107,18 @@ block_cache::complete_io(block &b, int result) b.clear_flags(BF_IO_PENDING); nr_io_pending_--; - if (b.error_) - list_move_tail(&b.list_, &errored_); - else { + if (b.error_) { + b.unlink(); + errored_.push_back(b); + + } else { if (b.test_flags(BF_DIRTY)) { b.clear_flags(BF_DIRTY | BF_PREVIOUSLY_DIRTY); nr_dirty_--; } - list_move_tail(&b.list_, &clean_); + b.unlink(); + clean_.push_back(b); } } @@ -157,7 +136,8 @@ block_cache::issue_low_level(block &b, enum io_iocb_cmd opcode, const char *desc assert(!b.test_flags(BF_IO_PENDING)); b.set_flags(BF_IO_PENDING); nr_io_pending_++; - list_move_tail(&b.list_, &io_pending_); + b.unlink(); + io_pending_.push_back(b); b.control_block_.aio_lio_opcode = opcode; control_blocks[0] = &b.control_block_; @@ -208,7 +188,7 @@ block_cache::wait_io() for (i = 0; i < static_cast(r); i++) { io_event const &e = events_[i]; - block *b = container_of(e.obj, block, control_block_); + block *b = base::container_of(e.obj, &block::control_block_); if (e.res == block_size_ << SECTOR_SHIFT) complete_io(*b, 0); @@ -236,19 +216,20 @@ block_cache::wait_io() * We're using lru lists atm, but I think it would be worth * experimenting with a multiqueue approach. */ -list_head * +block_cache::block_list & block_cache::__categorise(block &b) { if (b.error_) - return &errored_; + return errored_; - return b.test_flags(BF_DIRTY) ? &dirty_ : &clean_; + return b.test_flags(BF_DIRTY) ? dirty_ : clean_; } void block_cache::hit(block &b) { - list_move_tail(&b.list_, __categorise(b)); + b.unlink(); + __categorise(b).push_back(b); } /*---------------------------------------------------------------- @@ -257,7 +238,7 @@ block_cache::hit(block &b) void block_cache::wait_all() { - while (!list_empty(&io_pending_)) + while (!io_pending_.empty()) wait_io(); } @@ -271,10 +252,15 @@ block_cache::wait_specific(block &b) unsigned block_cache::writeback(unsigned count) { - block *b, *tmp; unsigned actual = 0, dirty_length = 0; - list_for_each_entry_safe (b, tmp, &dirty_, list_) { + // issue_write unlinks b, which invalidates the iteration, so we + // keep track of the next element before removing. + auto it = dirty_.begin(); + auto next = it; + while (it != dirty_.end()) { + next = it; + ++next; dirty_length++; if (actual == count) @@ -282,69 +268,18 @@ block_cache::writeback(unsigned count) // The block may be on the dirty list from a prior // acquisition. - if (b->ref_count_) + if (it->ref_count_) continue; - issue_write(*b); + issue_write(*it); actual++; + + it = next; } return actual; } -/*---------------------------------------------------------------- - * Hash table - *---------------------------------------------------------------*/ - -/* - * |nr_buckets| must be a power of two. - */ -void -block_cache::hash_init(unsigned nr_buckets) -{ - unsigned i; - - nr_buckets_ = nr_buckets; - mask_ = nr_buckets - 1; - - for (i = 0; i < nr_buckets; i++) - INIT_LIST_HEAD(&buckets_[i]); -} - -unsigned -block_cache::hash(uint64_t index) -{ - const unsigned BIG_PRIME = 4294967291UL; - return (((unsigned) index) * BIG_PRIME) & mask_; -} - -block_cache::block * -block_cache::hash_lookup(block_address index) -{ - block *b; - unsigned bucket = hash(index); - - list_for_each_entry (b, &buckets_[bucket], hash_list_) { - if (b->index_ == index) - return b; - } - - return NULL; -} - -void -block_cache::hash_insert(block &b) -{ - unsigned bucket = hash(b.index_); - list_move_tail(&b.hash_list_, &buckets_[bucket]); -} - -void -block_cache::hash_remove(block &b) -{ - list_del_init(&b.hash_list_); -} - /*---------------------------------------------------------------- * High level allocation *--------------------------------------------------------------*/ @@ -362,18 +297,17 @@ block_cache::setup_control_block(block &b) cb->u.c.nbytes = block_size_bytes; } +// FIXME: return a reference block_cache::block * block_cache::find_unused_clean_block() { - struct block *b, *tmp; - - list_for_each_entry_safe (b, tmp, &clean_, list_) { - if (b->ref_count_) + for (block &b : clean_) { + if (b.ref_count_) continue; - hash_remove(*b); - list_del(&b->list_); - return b; + b.unlink_set(); + b.unlink(); + return &b; } return NULL; @@ -386,8 +320,8 @@ block_cache::new_block(block_address index) b = __alloc_block(); if (!b) { - if (list_empty(&clean_)) { - if (list_empty(&io_pending_)) + if (clean_.empty()) { + if (io_pending_.empty()) writeback(16); wait_io(); } @@ -396,8 +330,6 @@ block_cache::new_block(block_address index) } if (b) { - INIT_LIST_HEAD(&b->list_); - INIT_LIST_HEAD(&b->hash_list_); b->bc_ = this; b->ref_count_ = 0; @@ -408,7 +340,7 @@ block_cache::new_block(block_address index) b->index_ = index; setup_control_block(*b); - hash_insert(*b); + block_set_.insert(*b); } return b; @@ -455,9 +387,6 @@ block_cache::block_cache(int fd, sector_t block_size, uint64_t on_disk_blocks, s { int r; unsigned nr_cache_blocks = calc_nr_cache_blocks(mem, block_size); - unsigned nr_buckets = calc_nr_buckets(nr_cache_blocks); - - buckets_.resize(nr_buckets); fd_ = fd; block_size_ = block_size; @@ -473,12 +402,7 @@ block_cache::block_cache(int fd, sector_t block_size, uint64_t on_disk_blocks, s throw std::runtime_error("io_setup failed"); } - hash_init(nr_buckets); - INIT_LIST_HEAD(&free_); - INIT_LIST_HEAD(&errored_); - INIT_LIST_HEAD(&dirty_); - INIT_LIST_HEAD(&clean_); - INIT_LIST_HEAD(&io_pending_); + blocks_memory_.reset(new std::vector(nr_cache_blocks)); r = init_free_list(nr_cache_blocks); if (r) @@ -552,30 +476,31 @@ block_cache::block * block_cache::lookup_or_read_block(block_address index, unsigned flags, validator::ptr v) { - block *b = hash_lookup(index); + auto it = block_set_.find(index, cmp_index()); - if (b) { - if (b->test_flags(BF_IO_PENDING)) { + if (it != block_set_.end()) { + if (it->test_flags(BF_IO_PENDING)) { inc_miss_counter(flags); - wait_specific(*b); + wait_specific(*it); } else inc_hit_counter(flags); if (flags & GF_ZERO) - zero_block(*b); + zero_block(*it); else { - if (b->v_.get() != v.get()) { - if (b->test_flags(BF_DIRTY)) - b->v_->prepare(b->data_, b->index_); - v->check(b->data_, b->index_); + if (it->v_.get() != v.get()) { + if (it->test_flags(BF_DIRTY)) + it->v_->prepare(it->data_, it->index_); + v->check(it->data_, it->index_); } } - b->v_ = v; + it->v_ = v; + return &(*it); } else { inc_miss_counter(flags); - b = new_block(index); + block *b = new_block(index); if (b) { if (flags & GF_ZERO) zero_block(*b); @@ -587,9 +512,9 @@ block_cache::lookup_or_read_block(block_address index, unsigned flags, b->v_ = v; } - } - return (!b || b->error_) ? NULL : b; + return (!b || b->error_) ? NULL : b; + } } block_cache::block & @@ -600,8 +525,11 @@ block_cache::get(block_address index, unsigned flags, validator::ptr v) block *b = lookup_or_read_block(index, flags, v); if (b) { - if (b->ref_count_ && flags & (GF_DIRTY | GF_ZERO)) - throw std::runtime_error("attempt to write lock block concurrently"); + if (b->ref_count_ && (flags & (GF_DIRTY | GF_ZERO))) { + std::ostringstream out; + out << "attempt to write lock block " << index << " concurrently"; + throw std::runtime_error(out.str()); + } // FIXME: this gets called even for new blocks hit(*b); @@ -620,7 +548,9 @@ block_cache::get(block_address index, unsigned flags, validator::ptr v) return *b; } - throw std::runtime_error("couldn't get block"); + std::ostringstream out; + out << "couldn't get block " << index; + throw std::runtime_error(out.str()); } void @@ -644,7 +574,8 @@ block_cache::release(block_cache::block &b) if (b.test_flags(BF_DIRTY)) { if (!b.test_flags(BF_PREVIOUSLY_DIRTY)) { - list_move_tail(&b.list_, &dirty_); + b.unlink(); + dirty_.push_back(b); nr_dirty_++; b.set_flags(BF_PREVIOUSLY_DIRTY); } @@ -661,19 +592,18 @@ block_cache::release(block_cache::block &b) int block_cache::flush() { - block *b, *tmp; - - list_for_each_entry_safe (b, tmp, &dirty_, list_) { - if (b->ref_count_ || b->test_flags(BF_IO_PENDING)) + while (!dirty_.empty()) { + block &b = dirty_.front(); + if (b.ref_count_ || b.test_flags(BF_IO_PENDING)) // The superblock may well be still locked. continue; - issue_write(*b); + issue_write(b); } wait_all(); - return list_empty(&errored_) ? 0 : -EIO; + return errored_.empty() ? 0 : -EIO; } void @@ -681,11 +611,12 @@ block_cache::prefetch(block_address index) { check_index(index); - block *b = hash_lookup(index); - if (!b) { + auto it = block_set_.find(index, cmp_index()); + + if (it == block_set_.end()) { prefetches_++; - b = new_block(index); + block *b = new_block(index); if (b) issue_read(*b); } diff --git a/block-cache/block_cache.h b/block-cache/block_cache.h index 4bc6667..d6cd1c9 100644 --- a/block-cache/block_cache.h +++ b/block-cache/block_cache.h @@ -1,17 +1,23 @@ #ifndef BLOCK_CACHE_H #define BLOCK_CACHE_H -#include "block-cache/list.h" +#include "base/container_of.h" -#include +#include +#include #include - -#include +#include +#include +#include #include #include +#include #include #include #include +#include + +namespace bi = boost::intrusive; //---------------------------------------------------------------- @@ -26,12 +32,14 @@ namespace bcache { virtual ~validator() {} virtual void check(void const *data, block_address location) const = 0; + virtual bool check_raw(void const *data) const = 0; virtual void prepare(void *data, block_address location) const = 0; }; class noop_validator : public validator { public: void check(void const *data, block_address location) const {} + bool check_raw(void const *data) const {return true;} void prepare(void *data, block_address location) const {} }; @@ -50,7 +58,14 @@ namespace bcache { public: block() : v_() { - INIT_LIST_HEAD(&list_); + } + + bool operator <(block const &rhs) const { + return index_ > rhs.index_; + } + + bool operator ==(block const &rhs) const { + return index_ == rhs.index_; } // Do not give this class a destructor, it wont get @@ -92,16 +107,25 @@ namespace bcache { bc_->release(*this); } + void unlink_set() { + set_hook_.unlink(); + } + + void unlink() { + list_hook_.unlink(); + } + private: friend class block_cache; + friend class cmp_index; block_cache *bc_; uint64_t index_; void *data_; - list_head list_; - list_head hash_list_; + bi::list_member_hook> list_hook_; + bi::set_member_hook> set_hook_; unsigned ref_count_; @@ -112,6 +136,54 @@ namespace bcache { validator::ptr v_; }; + struct cmp_index { + bool operator()(block_address index, block const &b) const { + return index > b.index_; + } + + bool operator()(block const &b, block_address index) const { + return b.index_ > index; + } + }; + + class auto_block { + public: + auto_block() + : b_(0) { + } + + auto_block(block &b) + : b_(&b) { + } + + ~auto_block() { + put(); + } + + auto_block &operator =(block &b) { + put(); + b_ = &b; + return *this; + } + + void *get_data() const { + if (b_) + return b_->get_data(); + + throw std::runtime_error("auto_block not set"); + } + + private: + void put() { + if (b_) { + b_->put(); + b_ = 0; + } + } + + block *b_; + }; + //-------------------------------- block_cache(int fd, sector_t block_size, @@ -137,24 +209,24 @@ namespace bcache { void prefetch(block_address index); private: + typedef bi::member_hook>, + &block::list_hook_> list_hook_option; + typedef bi::list> block_list; + int init_free_list(unsigned count); - void exit_free_list(); block *__alloc_block(); void complete_io(block &b, int result); void issue_low_level(block &b, enum io_iocb_cmd opcode, const char *desc); void issue_read(block &b); void issue_write(block &b); void wait_io(); - list_head *__categorise(block &b); + block_list &__categorise(block &b); void hit(block &b); void wait_all(); void wait_specific(block &b); unsigned writeback(unsigned count); - void hash_init(unsigned nr_buckets); - unsigned hash(uint64_t index); - block *hash_lookup(block_address index); - void hash_insert(block &b); - void hash_remove(block &b); void setup_control_block(block &b); block *find_unused_clean_block(); block *new_block(block_address index); @@ -163,6 +235,7 @@ namespace bcache { unsigned calc_nr_buckets(unsigned nr_blocks); void zero_block(block &b); block *lookup_or_read_block(block_address index, unsigned flags, validator::ptr v); + void exit_free_list(); void preemptive_writeback(); void release(block_cache::block &block); @@ -178,9 +251,8 @@ namespace bcache { uint64_t nr_data_blocks_; uint64_t nr_cache_blocks_; - // We can't use auto_ptr or unique_ptr because the memory is allocated with malloc - void *blocks_memory_; - void *blocks_data_; + std::unique_ptr> blocks_memory_; + unsigned char *blocks_data_; io_context_t aio_context_; std::vector events_; @@ -189,23 +261,23 @@ namespace bcache { * Blocks on the free list are not initialised, apart from the * b.data field. */ - list_head free_; - list_head errored_; - list_head dirty_; - list_head clean_; + block_list free_; + block_list errored_; + block_list dirty_; + block_list clean_; unsigned nr_locked_; unsigned nr_dirty_; unsigned nr_io_pending_; - struct list_head io_pending_; + block_list io_pending_; - /* - * Hash table fields. - */ - unsigned nr_buckets_; - unsigned mask_; - std::vector buckets_; + typedef bi::member_hook>, + &block::set_hook_> block_option; + typedef bi::set> block_set; + block_set block_set_; // Stats unsigned read_hits_; diff --git a/block-cache/copier.cc b/block-cache/copier.cc new file mode 100644 index 0000000..b5c21ef --- /dev/null +++ b/block-cache/copier.cc @@ -0,0 +1,192 @@ +#include "block-cache/copier.h" + +#include + +using namespace bcache; +using namespace boost; +using namespace std; + +//---------------------------------------------------------------- + +copier::copier(io_engine &engine, + string const &src, string const &dest, + sector_t block_size, size_t mem) + : pool_(block_size * 512, mem, PAGE_SIZE), + block_size_(block_size), + nr_blocks_(mem / block_size), + engine_(engine), + src_handle_(engine_.open_file(src, io_engine::M_READ_ONLY)), + dest_handle_(engine_.open_file(dest, io_engine::M_READ_WRITE)), + genkey_count_(0) +{ +} + +copier::~copier() +{ + engine_.close_file(src_handle_); + engine_.close_file(dest_handle_); +} + +void +copier::issue(copy_op const &op) +{ + void *data; + + while (!(data = pool_.alloc())) { + wait_(); + + // data may still not be present because the wait_ could + // have completed a read and issued the corresponding + // write. + } + + copy_job job(op, data); + job.op.read_complete = job.op.write_complete = false; + unsigned key = genkey(); // used as context for the io_engine + + auto r = engine_.issue_io(src_handle_, + io_engine::D_READ, + to_sector(op.src_b), + to_sector(op.src_e), + data, + key); + + if (r) + jobs_.insert(make_pair(key, job)); + + else + complete(job); +} + +unsigned +copier::nr_pending() const +{ + return jobs_.size() + complete_.size(); +} + +boost::optional +copier::wait() +{ + if (complete_.empty()) + wait_(); + + return wait_complete(); +} + +boost::optional +copier::wait(unsigned µ) +{ + if (complete_.empty()) + wait_(micro); + return wait_complete(); +} + +bool +copier::pending() const +{ + return !jobs_.empty(); +} + +boost::optional +copier::wait_complete() +{ + if (complete_.empty()) { + return optional(); + + } else { + auto op = complete_.front(); + complete_.pop_front(); + return optional(op); + } +} + +void +copier::wait_(unsigned µ) +{ + optional mp; + + if (!pending()) + return; + + + bool completed = false; + while (pending() && !completed) { + mp = engine_.wait(micro); + if (mp) + completed = wait_successful(*mp); + + if (!micro) + break; + } +} + +void +copier::wait_() +{ + bool completed = false; + + while (pending() && !completed) { + auto mp = engine_.wait(); + if (mp) + completed = wait_successful(*mp); + } +} + +bool +copier::wait_successful(io_engine::wait_result const &p) +{ + auto it = jobs_.find(p.second); + if (it == jobs_.end()) + throw runtime_error("Internal error. Lost track of copy job."); + + copy_job &j = it->second; + if (!p.first) { + // IO was unsuccessful + complete(j); + jobs_.erase(it); + return true; + } + + // IO was successful + if (!j.op.read_complete) { + j.op.read_complete = true; + if (!engine_.issue_io(dest_handle_, + io_engine::D_WRITE, + to_sector(j.op.dest_b), + to_sector(j.op.dest_b + (j.op.src_e - j.op.src_b)), + j.data, + it->first)) { + complete(j); + jobs_.erase(it); + return true; + } + return false; + + } else { + j.op.write_complete = true; + complete(j); + jobs_.erase(it); + return true; + } +} + +void +copier::complete(copy_job const &j) +{ + pool_.free(j.data); + complete_.push_back(j.op); +} + +sector_t +copier::to_sector(block_address b) const +{ + return b * block_size_; +} + +unsigned +copier::genkey() +{ + return genkey_count_++; +} + +//---------------------------------------------------------------- diff --git a/block-cache/copier.h b/block-cache/copier.h new file mode 100644 index 0000000..bd49966 --- /dev/null +++ b/block-cache/copier.h @@ -0,0 +1,106 @@ +#ifndef BLOCK_CACHE_COPIER_H +#define BLOCK_CACHE_COPIER_H + +#include "block-cache/io_engine.h" +#include "block-cache/mem_pool.h" + +#include +#include +#include + +//---------------------------------------------------------------- + +namespace bcache { + using block_address = uint64_t; + + struct copy_op { + copy_op() + : src_b(0), + src_e(0), + dest_b(0), + read_complete(false), + write_complete(false) { + } + + copy_op(block_address src_b_, + block_address src_e_, + block_address dest_b_) + : src_b(src_b_), + src_e(src_e_), + dest_b(dest_b_), + read_complete(false), + write_complete(false) { + } + + bool operator <(copy_op const &rhs) const { + return dest_b < rhs.dest_b; + } + + bool success() const { + return read_complete && write_complete; + } + + block_address src_b, src_e; + block_address dest_b; + + bool read_complete; + bool write_complete; + }; + + class copy_job { + public: + copy_job(copy_op const &op_, void *data_) + : op(op_), data(data_) { + } + + copy_op op; + void *data; + }; + + class copier { + public: + copier(io_engine &engine, + std::string const &src, std::string const &dest, + sector_t block_size, size_t mem); + ~copier(); + + sector_t get_block_size() const { + return block_size_; + } + + // Blocks if out of memory. + void issue(copy_op const &op); + + unsigned nr_pending() const; + boost::optional wait(); + boost::optional wait(unsigned µ); + + private: + bool pending() const; + bool wait_successful(io_engine::wait_result const &p); + boost::optional wait_complete(); + void wait_(unsigned µ); + void wait_(); + void complete(copy_job const &j); + + sector_t to_sector(block_address b) const; + unsigned genkey(); + + mempool pool_; + sector_t block_size_; + unsigned nr_blocks_; + io_engine &engine_; + io_engine::handle src_handle_; + io_engine::handle dest_handle_; + unsigned genkey_count_; + + using job_map = std::map; + using op_list = std::list; + job_map jobs_; + op_list complete_; + }; +} + +//---------------------------------------------------------------- + +#endif diff --git a/block-cache/io_engine.cc b/block-cache/io_engine.cc new file mode 100644 index 0000000..2fe4082 --- /dev/null +++ b/block-cache/io_engine.cc @@ -0,0 +1,199 @@ +#include "base/container_of.h" +#include "block-cache/io_engine.h" + +#include +#include +#include +#include +#include +#include + +using namespace bcache; +using namespace boost; +using namespace std; + +//---------------------------------------------------------------- + +control_block_set::control_block_set(unsigned nr) + : cbs_(nr) +{ + for (auto i = 0u; i < nr; i++) + free_cbs_.insert(i); +} + +iocb * +control_block_set::alloc(unsigned context) +{ + if (free_cbs_.empty()) + return nullptr; + + auto it = free_cbs_.begin(); + + cblock &cb = cbs_[*it]; + cb.context = context; + free_cbs_.erase(it); + + return &cb.cb; +} + +void +control_block_set::free(iocb *cb) +{ + cblock *b = base::container_of(cb, &cblock::cb); + unsigned index = b - &cbs_[0]; + free_cbs_.insert(index); +} + +unsigned +control_block_set::context(iocb *cb) const +{ + cblock *b = base::container_of(cb, &cblock::cb); + return b->context; +} + +//---------------------------------------------------------------- + +aio_engine::aio_engine(unsigned max_io) + : aio_context_(0), + cbs_(max_io) +{ + int r = io_setup(max_io, &aio_context_); + if (r < 0) + throw runtime_error("io_setup failed"); +} + +aio_engine::~aio_engine() +{ + io_destroy(aio_context_); +} + +aio_engine::handle +aio_engine::open_file(std::string const &path, mode m, sharing s) +{ + int flags = (m == M_READ_ONLY) ? O_RDONLY : O_RDWR; + if (s == EXCLUSIVE) + flags |= O_EXCL; + int fd = ::open(path.c_str(), O_DIRECT | flags); + if (fd < 0) { + ostringstream out; + out << "unable to open '" << path << "'"; + throw runtime_error(out.str()); + } + + descriptors_.push_back(base::unique_fd(fd)); + + return static_cast(fd); +} + +void +aio_engine::close_file(handle h) +{ + for (auto it = descriptors_.begin(); it != descriptors_.end(); ++it) { + unsigned it_h = it->get(); + if (it_h == h) { + descriptors_.erase(it); + return; + } + } + + ostringstream out; + out << "unknown descriptor (" << h << ")"; + throw runtime_error(out.str()); +} + +bool +aio_engine::issue_io(handle h, dir d, sector_t b, sector_t e, void *data, unsigned context) +{ + if (reinterpret_cast(data) & (PAGE_SIZE - 1)) + throw runtime_error("Data passed to issue_io must be page aligned\n"); + + iocb *cb; + + cb = cbs_.alloc(context); + if (!cb) + return false; + + memset(cb, 0, sizeof(*cb)); + + cb->aio_fildes = static_cast(h); + cb->u.c.buf = data; + cb->u.c.offset = b << SECTOR_SHIFT; + cb->u.c.nbytes = (e - b) << SECTOR_SHIFT; + cb->aio_lio_opcode = (d == D_READ) ? IO_CMD_PREAD : IO_CMD_PWRITE; + + int r = io_submit(aio_context_, 1, &cb); + return r == 1; +} + +optional +aio_engine::wait() +{ + return wait_(NULL); +} + +optional +aio_engine::wait(unsigned µsec) +{ + timespec start = micro_to_ts(microsec); + timespec stop = start; + auto r = wait_(&stop); + microsec = ts_to_micro(stop) - microsec; + return r; +} + +boost::optional +aio_engine::wait_(timespec *ts) +{ + int r; + struct io_event event; + + memset(&event, 0, sizeof(event)); + r = io_getevents(aio_context_, 1, 1, &event, ts); + if (r < 0) { + std::ostringstream out; + out << "io_getevents failed: " << r; + throw std::runtime_error(out.str()); + } + + if (r == 0) { + return optional(); + } + + iocb *cb = reinterpret_cast(event.obj); + unsigned context = cbs_.context(cb); + + if (event.res == cb->u.c.nbytes) { + cbs_.free(cb); + return optional(make_pair(true, context)); + + } else if (static_cast(event.res) < 0) { + cbs_.free(cb); + return optional(make_pair(false, context)); + + } else { + cbs_.free(cb); + return optional(make_pair(false, context)); + } + + // shouldn't get here + return optional(make_pair(false, 0)); +} + +struct timespec +aio_engine::micro_to_ts(unsigned micro) +{ + timespec ts; + ts.tv_sec = micro / 1000000u; + ts.tv_nsec = (micro % 1000000) * 1000; + return ts; +} + +unsigned +aio_engine::ts_to_micro(timespec const &ts) +{ + unsigned micro = ts.tv_sec * 1000000; + micro += ts.tv_nsec / 1000; + return micro; +} + +//---------------------------------------------------------------- diff --git a/block-cache/io_engine.h b/block-cache/io_engine.h new file mode 100644 index 0000000..fd902c1 --- /dev/null +++ b/block-cache/io_engine.h @@ -0,0 +1,117 @@ +#ifndef BLOCK_CACHE_IO_ENGINE_H +#define BLOCK_CACHE_IO_ENGINE_H + +#include "base/unique_handle.h" + +#include +#include +#include +#include +#include + +//---------------------------------------------------------------- + +namespace bcache { + using sector_t = uint64_t; + + unsigned const SECTOR_SHIFT = 9; + unsigned const PAGE_SIZE = 4096; + + // Virtual base class to aid unit testing + class io_engine { + public: + enum mode { + M_READ_ONLY, + M_READ_WRITE + }; + + enum dir { + D_READ, + D_WRITE + }; + + enum sharing { + EXCLUSIVE, + SHARED + }; + + io_engine() {} + virtual ~io_engine() {} + + using handle = unsigned; + + virtual handle open_file(std::string const &path, mode m, sharing s = EXCLUSIVE) = 0; + virtual void close_file(handle h) = 0; + + // returns false if there are insufficient resources to + // queue the IO + virtual bool issue_io(handle h, dir d, sector_t b, sector_t e, void *data, unsigned context) = 0; + + // returns (success, context) + using wait_result = std::pair; + virtual boost::optional wait() = 0; + virtual boost::optional wait(unsigned µsec) = 0; + + private: + io_engine(io_engine const &) = delete; + io_engine &operator =(io_engine const &) = delete; + }; + + //-------------------------------- + + class control_block_set { + public: + control_block_set(unsigned nr); + + iocb *alloc(unsigned context); + void free(iocb *); + + unsigned context(iocb *) const; + + private: + struct cblock { + unsigned context; + struct iocb cb; + }; + + std::set free_cbs_; + std::vector cbs_; + }; + + //---------------- + + class aio_engine : public io_engine { + public: + // max_io is the maximum nr of concurrent ios expected + aio_engine(unsigned max_io); + ~aio_engine(); + + using handle = unsigned; + + virtual handle open_file(std::string const &path, mode m, sharing s = EXCLUSIVE); + virtual void close_file(handle h); + + // Returns false if queueing the io failed + virtual bool issue_io(handle h, dir d, sector_t b, sector_t e, void *data, unsigned context); + + virtual boost::optional wait(); + virtual boost::optional wait(unsigned µsec); + + private: + static struct timespec micro_to_ts(unsigned micro); + static unsigned ts_to_micro(timespec const &ts); + boost::optional wait_(timespec *ts); + + std::list descriptors_; + + io_context_t aio_context_; + control_block_set cbs_; + + aio_engine(io_engine const &) = delete; + aio_engine &operator =(io_engine const &) = delete; + }; +} + +//---------------------------------------------------------------- + +#endif diff --git a/block-cache/mem_pool.cc b/block-cache/mem_pool.cc new file mode 100644 index 0000000..03f9455 --- /dev/null +++ b/block-cache/mem_pool.cc @@ -0,0 +1,62 @@ +#include "block-cache/mem_pool.h" + +#include +#include +#include + +using namespace bcache; +using namespace boost; +using namespace mempool_detail; +using namespace std; + +//---------------------------------------------------------------- + +mempool::mempool(size_t block_size, size_t total_mem, size_t alignment) +{ + mem_ = alloc_aligned(total_mem, alignment); + + unsigned nr_blocks = total_mem / block_size; + for (auto i = 0u; i < nr_blocks; i++) + free(static_cast(mem_) + (block_size * i)); +} + +mempool::~mempool() +{ + free_.clear(); + ::free(mem_); +} + +void * +mempool::alloc() +{ + if (free_.empty()) + return nullptr; + + mempool_detail::alloc_block &b = free_.front(); + free_.pop_front(); + return reinterpret_cast(&b); +} + +void +mempool::free(void *data) +{ + mempool_detail::alloc_block *b = reinterpret_cast(data); + free_.push_front(*b); +} + +void * +mempool::alloc_aligned(size_t len, size_t alignment) +{ + void *result = NULL; + int r = posix_memalign(&result, alignment, len); + if (r) { + ostringstream out; + out << "posix_memalign failed: len = " << len << ", alignment = " << alignment << ", r = " << r << "\n"; + throw runtime_error(out.str()); + } + + return result; +} + +//---------------------------------------------------------------- + diff --git a/block-cache/mem_pool.h b/block-cache/mem_pool.h new file mode 100644 index 0000000..276a314 --- /dev/null +++ b/block-cache/mem_pool.h @@ -0,0 +1,46 @@ +#ifndef BLOCK_CACHE_MEM_POOL_H +#define BLOCK_CACHE_MEM_POOL_H + +#include +#include +#include + +namespace bi = boost::intrusive; + +//---------------------------------------------------------------- + +namespace bcache { + // FIXME: move to base? + + namespace mempool_detail { + struct alloc_block : public bi::list_base_hook> { + }; + }; + + class mempool { + public: + // alignment must be a power of 2 + mempool(size_t block_size, size_t total_mem, size_t alignment = 8); + ~mempool(); + + void *alloc(); + void free(void *data); + + private: + static void *alloc_aligned(size_t len, size_t alignment); + + using block_list = bi::list; + + void *mem_; + block_list free_; + + //---------------- + + mempool(mempool const &) = delete; + mempool &operator =(mempool const &) = delete; + }; +} + +//---------------------------------------------------------------- + +#endif diff --git a/caching/cache_restore.cc b/caching/cache_restore.cc index efa8e98..6eb42b4 100644 --- a/caching/cache_restore.cc +++ b/caching/cache_restore.cc @@ -33,7 +33,7 @@ namespace { return info.st_size; } - auto_ptr create_monitor(bool quiet) { + unique_ptr create_monitor(bool quiet) { if (!quiet && isatty(fileno(stdout))) return create_progress_bar("Restoring"); else @@ -71,7 +71,7 @@ namespace { check_file_exists(*fs.input); ifstream in(fs.input->c_str(), ifstream::in); - auto_ptr monitor = create_monitor(fs.quiet); + unique_ptr monitor = create_monitor(fs.quiet); parse_xml(in, restorer, get_file_length(*fs.input), *monitor); } catch (std::exception &e) { diff --git a/caching/cache_writeback.cc b/caching/cache_writeback.cc new file mode 100644 index 0000000..be7a9a7 --- /dev/null +++ b/caching/cache_writeback.cc @@ -0,0 +1,460 @@ +#include "base/progress_monitor.h" +#include "persistent-data/file_utils.h" +#include "block-cache/copier.h" +#include "caching/commands.h" +#include "caching/mapping_array.h" +#include "caching/metadata.h" +#include "version.h" + +#include +#include +#include +#include +#include + +using namespace bcache; +using namespace caching; +using namespace boost; +using namespace std; + +//---------------------------------------------------------------- + +namespace { + + template T safe_div(T const n, T const d, T const def) { + return (d == T()) ? def : (n / d); + } + + //-------------------------------- + + struct flags { + flags() + : cache_size(4 * 1024 * 1024), + sort_buffers(16 * 1024), + list_failed_blocks(false), + update_metadata(true) { + } + + // The sort buffers have a dramatic effect on the + // performance. We give up 10% of the general buffer space + // for them. + void calc_sort_buffer_size() { + size_t sbs = cache_size / 10; + cache_size = cache_size - sbs; + + sort_buffers = sbs / sizeof(copy_op); + } + + using maybe_string = boost::optional; + + size_t cache_size; + unsigned sort_buffers; + maybe_string metadata_dev; + maybe_string origin_dev; + maybe_string fast_dev; + bool list_failed_blocks; + bool update_metadata; + }; + + //-------------------------------- + + class copy_batch { + public: + copy_batch(unsigned nr) + : max_(nr), + count_(0), + ops_(nr) { + } + + bool space() const { + return count_ < max_; + } + + void push_op(copy_op const &op) { + if (!space()) + throw runtime_error("copy_batch out of space"); + + ops_[count_++] = op; + } + + void reset() { + count_ = 0; + } + + vector::iterator begin() { + return ops_.begin(); + } + + vector::iterator end() { + return ops_.begin() + count_; + } + + private: + unsigned max_; + unsigned count_; + vector ops_; + }; + + class copy_visitor : public mapping_visitor { + public: + copy_visitor(copier &c, unsigned sort_buffer, bool only_dirty, + bool list_failed_blocks, + progress_monitor &monitor, unsigned cache_blocks) + : copier_(c), + block_size_(c.get_block_size()), + only_dirty_(only_dirty), + list_failed_blocks_(list_failed_blocks), + batch_(sort_buffer), + monitor_(monitor), + cache_blocks_(cache_blocks) { + } + + virtual void visit(block_address cblock, mapping const &m) { + stats_.blocks_scanned = cblock; + update_monitor(); + + if (!(m.flags_ & M_VALID)) + return; + + if (only_dirty_ && !(m.flags_ & M_DIRTY)) + return; + + copy_op cop; + cop.src_b = cblock; + cop.src_e = cblock + 1ull; + cop.dest_b = m.oblock_; + + // blocks + stats_.blocks_needed++; + batch_.push_op(cop); + if (!batch_.space()) + issue(); + } + + void issue() { + auto compare_dest = [](copy_op const &lhs, copy_op const &rhs) { + return lhs.dest_b < rhs.dest_b; + }; + sort(batch_.begin(), batch_.end(), compare_dest); + + auto e = batch_.end(); + for (auto it = batch_.begin(); it != e; ++it) { + copier_.issue(*it); + stats_.blocks_issued++; + update_monitor(); + + check_for_completed_copies(); + } + check_for_completed_copies(); + + batch_.reset(); + } + + void check_for_completed_copies(bool block = false) { + optional mop; + + do { + if (block) + mop = copier_.wait(); + + else { + unsigned micro = 0; + mop = copier_.wait(micro); + } + + if (mop) { + inc_completed(*mop); + if (!mop->success()) { + failed_blocks_.insert(*mop); + failed_cblocks_.insert(mop->src_b); + } + } + + } while (mop); + } + + void complete() { + issue(); + + while (copier_.nr_pending()) + check_for_completed_copies(true); + + monitor_.update_percent(100); + cerr << "\n"; + } + + void inc_completed(copy_op const &op) { + stats_.blocks_completed++; + update_monitor(); + } + + void update_monitor() { + static unsigned call_count = 0; + if (call_count++ % 128) + return; + + uint64_t scanned = stats_.blocks_scanned * 100 / cache_blocks_; + uint64_t copied = safe_div(stats_.blocks_completed * 100, + stats_.blocks_needed, 100ull); + uint64_t percent = min(scanned, copied); + monitor_.update_percent(percent); + } + + struct copy_stats { + copy_stats() + : blocks_scanned(0), + blocks_needed(0), + blocks_issued(0), + blocks_completed(0), + blocks_failed(0) { + } + + block_address blocks_scanned; + block_address blocks_needed; + block_address blocks_issued; + block_address blocks_completed; + block_address blocks_failed; + }; + + copy_stats const &get_stats() const { + return stats_; + } + + set failed_writebacks() const { + return failed_cblocks_; + } + + private: + copier &copier_; + unsigned block_size_; + bool only_dirty_; + bool list_failed_blocks_; + + copy_stats stats_; + copy_batch batch_; + progress_monitor &monitor_; + unsigned cache_blocks_; + + set failed_blocks_; + set failed_cblocks_; + }; + + //-------------------------------- + + using namespace mapping_array_damage; + + class ignore_damage_visitor : public damage_visitor { + public: + ignore_damage_visitor() + : corruption_(false) { + } + + void visit(missing_mappings const &d) { + cerr << "missing mappings (" << d.keys_.begin_ << ", " << d.keys_.end_ << "]\n"; + corruption_ = true; + } + + void visit(invalid_mapping const &d) { + cerr << "invalid mapping cblock = " << d.cblock_ << ", oblock = " << d.m_.oblock_ << "\n"; + corruption_ = true; + } + + bool was_corruption() const { + return corruption_; + } + + private: + bool corruption_; + }; + + bool clean_shutdown(metadata const &md) { + return md.sb_.flags.get_flag(superblock_flags::CLEAN_SHUTDOWN); + } + + void update_metadata(metadata &md, set const &failed_writebacks) { + cout << "Updating metadata ... "; + + cout.flush(); + + auto &mappings = md.mappings_; + for (block_address cblock = 0; cblock < mappings->get_nr_entries(); cblock++) { + auto m = mappings->get(cblock); + if (!(m.flags_ & M_VALID)) + continue; + + if (!(m.flags_ & M_DIRTY)) + continue; + + if (failed_writebacks.count(cblock)) + continue; + + m.flags_ &= ~M_DIRTY; + cerr << "clearing dirty flag for block " << cblock << "\n"; + mappings->set(cblock, m); + } + md.commit(true); + cout << "done\n"; + cout.flush(); + } + + int writeback_(flags const &f) { + block_manager<>::ptr bm = open_bm(*f.metadata_dev, block_manager<>::READ_WRITE); + metadata md(bm, metadata::OPEN); + + // FIXME: we're going to have to copy runs to get the through put with small block sizes + unsigned max_ios = f.cache_size / (md.sb_.data_block_size << SECTOR_SHIFT); + aio_engine engine(max_ios); + copier c(engine, *f.fast_dev, *f.origin_dev, + md.sb_.data_block_size, f.cache_size); + + auto bar = create_progress_bar("Copying data"); + copy_visitor cv(c, f.sort_buffers, clean_shutdown(md), f.list_failed_blocks, + *bar, md.sb_.cache_blocks); + + ignore_damage_visitor dv; + + walk_mapping_array(*md.mappings_, cv, dv); + cv.complete(); + + auto stats = cv.get_stats(); + cout << stats.blocks_issued - stats.blocks_failed << "/" + << stats.blocks_issued << " blocks successfully copied.\n"; + + if (stats.blocks_failed) + cout << stats.blocks_failed << " blocks were not copied\n"; + + if (dv.was_corruption()) { + cout << "Metadata corruption was found, some data may not have been copied.\n"; + if (f.update_metadata) + cout << "Unable to update metadata.\n"; + + } else if (f.update_metadata) + update_metadata(md, cv.failed_writebacks()); + + return (stats.blocks_failed || dv.was_corruption()) ? 1 : 0; + } + + int writeback(flags const &f) { + int r; + + try { + r = writeback_(f); + + } catch (std::exception &e) { + cerr << e.what() << endl; + return 1; + } + + return r; + } +} + +//---------------------------------------------------------------- + +cache_writeback_cmd::cache_writeback_cmd() + : command("cache_writeback") +{ +} + +void +cache_writeback_cmd::usage(std::ostream &out) const +{ + out << "Usage: " << get_name() << " [options]\n" + << "\t\t--metadata-device \n" + << "\t\t--origin-device \n" + << "\t\t--fast-device \n" + << "\t\t--buffer-size-meg \n" + << "\t\t--list-failed-blocks\n" + << "\t\t--no-metadata-update\n" + << "Options:\n" + << " {-h|--help}\n" + << " {-V|--version}" << endl; +} + +int +cache_writeback_cmd::run(int argc, char **argv) +{ + int c; + flags fs; + char const *short_opts = "hV"; + option const long_opts[] = { + { "metadata-device", required_argument, NULL, 0 }, + { "origin-device", required_argument, NULL, 1 }, + { "fast-device", required_argument, NULL, 2 }, + { "buffer-size-meg", required_argument, NULL, 3 }, + { "list-failed-blocks", no_argument, NULL, 4 }, + { "no-metadata-update", no_argument, NULL, 5 }, + { "help", no_argument, NULL, 'h'}, + { "version", no_argument, NULL, 'V'}, + { NULL, no_argument, NULL, 0 } + }; + + while ((c = getopt_long(argc, argv, short_opts, long_opts, NULL)) != -1) { + switch(c) { + case 0: + fs.metadata_dev = optarg; + break; + + case 1: + fs.origin_dev = optarg; + break; + + case 2: + fs.fast_dev = optarg; + break; + + case 3: + fs.cache_size = parse_uint64(optarg, "buffer size") * 1024 * 1024; + break; + + case 4: + fs.list_failed_blocks = true; + break; + + case 5: + fs.update_metadata = false; + break; + + case 'h': + usage(cout); + return 0; + + case 'V': + cout << THIN_PROVISIONING_TOOLS_VERSION << endl; + return 0; + + default: + usage(cerr); + return 1; + } + } + + fs.calc_sort_buffer_size(); + + if (argc != optind) { + usage(cerr); + return 1; + } + + if (!fs.metadata_dev) { + cerr << "No metadata device provided.\n\n"; + usage(cerr); + return 1; + } + + if (!fs.origin_dev) { + cerr << "No origin device provided.\n\n"; + usage(cerr); + return 1; + } + + if (!fs.fast_dev) { + cerr << "No fast device provided.\n\n"; + usage(cerr); + return 1; + } + + return writeback(fs); +} + +//---------------------------------------------------------------- diff --git a/caching/commands.cc b/caching/commands.cc index 1ab79e6..b150a92 100644 --- a/caching/commands.cc +++ b/caching/commands.cc @@ -13,6 +13,7 @@ caching::register_cache_commands(application &app) app.add_cmd(command::ptr(new cache_metadata_size_cmd)); app.add_cmd(command::ptr(new cache_restore_cmd)); app.add_cmd(command::ptr(new cache_repair_cmd)); + app.add_cmd(command::ptr(new cache_writeback_cmd)); } //---------------------------------------------------------------- diff --git a/caching/commands.h b/caching/commands.h index 022ac06..b0546ab 100644 --- a/caching/commands.h +++ b/caching/commands.h @@ -63,6 +63,13 @@ namespace caching { virtual int run(int argc, char **argv); }; + class cache_writeback_cmd : public base::command { + public: + cache_writeback_cmd(); + virtual void usage(std::ostream &out) const; + virtual int run(int argc, char **argv); + }; + void register_cache_commands(base::application &app); } diff --git a/caching/superblock.cc b/caching/superblock.cc index 2edd6c1..3270b3f 100644 --- a/caching/superblock.cc +++ b/caching/superblock.cc @@ -292,6 +292,15 @@ namespace validator { throw checksum_error("bad checksum in superblock"); } + virtual bool check_raw(void const *raw) const { + superblock_disk const *sbd = reinterpret_cast(raw); + crc32c sum(SUPERBLOCK_CSUM_SEED); + sum.append(&sbd->flags, MD_BLOCK_SIZE - sizeof(uint32_t)); + if (sum.get_sum() != to_cpu(sbd->csum)) + return false; + return true; + } + virtual void prepare(void *raw, block_address location) const { superblock_disk *sbd = reinterpret_cast(raw); crc32c sum(SUPERBLOCK_CSUM_SEED); diff --git a/configure.ac b/configure.ac index 0eef6ab..efca586 100644 --- a/configure.ac +++ b/configure.ac @@ -137,6 +137,14 @@ AC_ARG_ENABLE(testing, TESTING=$enableval, TESTING=no) AC_MSG_RESULT($TESTING) +################################################################################ +dnl -- Enable development tools +AC_MSG_CHECKING(whenter to enable development tools) +AC_ARG_ENABLE(dev-tools, + AC_HELP_STRING(--enable-dev-tools, [enable development tools in the makefile]), + DEVTOOLS=$enableval, DEVTOOLS=no) +AC_MSG_RESULT($DEVTOOLS) + ################################################################################ dnl -- Enable static libstdc++ AC_MSG_CHECKING(whether to statically link libstdc++) @@ -181,6 +189,7 @@ AC_SUBST(RELEASE_DATE) AC_SUBST(TESTING) AC_SUBST(THIN_PROVISIONING_TOOLS_VERSION) AC_SUBST(STATIC_CXX) +AC_SUBST(DEVTOOLS) AC_SUBST(STATIC) ################################################################################ @@ -188,6 +197,7 @@ dnl -- First and last lines should not contain files to generate in order to dnl -- keep utility scripts running properly AC_CONFIG_FILES([ Makefile +contrib/Makefile unit-tests/Makefile version.h ]) diff --git a/contrib/Makefile.in b/contrib/Makefile.in new file mode 100644 index 0000000..c093ca7 --- /dev/null +++ b/contrib/Makefile.in @@ -0,0 +1,24 @@ +PLUGIN_LIBS= \ + contrib/thin_sexp_emitter.a \ + contrib/tmakatos_emitter.a \ + contrib/ewheeler_emitter.a + +PLUGINS=\ + contrib/thin_sexp_emitter.so \ + contrib/tmakatos_emitter.so \ + contrib/ewheeler_emitter.so + +contrib: $(PLUGINS) $(PLUGIN_LIBS) + +contrib/%.o: contrib/%.cc + $(V)echo " [CC] $@" + $(V)$(CXX) $(INCLUDES) $(CXXFLAGS) $^ -c -o $@ + +contrib/%.a: contrib/%.o + $(V)echo " [AR] $@" + $(V)ar rcs $@ $^ + +contrib/%.so: contrib/%.a + $(V)echo " [LD] $@" + $(V)$(CC) -shared -Wl,-soname,$@ -o $@ $< + diff --git a/contrib/ewheeler_emitter.cc b/contrib/ewheeler_emitter.cc new file mode 100644 index 0000000..fdc310d --- /dev/null +++ b/contrib/ewheeler_emitter.cc @@ -0,0 +1,84 @@ +#include "thin-provisioning/emitter.h" + +using namespace std; +using namespace thin_provisioning; + +//---------------------------------------------------------------- + +namespace { + template + std::ostream &operator << (ostream &out, boost::optional const &maybe) { + if (maybe) + out << *maybe; + + return out; + } + + class old_emitter : public emitter { + public: + old_emitter(ostream &out) + : out_(out) { + } + + void begin_superblock(string const &uuid, + uint64_t time, + uint64_t trans_id, + boost::optional flags, + boost::optional version, + uint32_t data_block_size, + uint64_t nr_data_blocks, + boost::optional metadata_snap) { + data_block_size_ = data_block_size; + } + + void end_superblock() { + } + + void begin_device(uint32_t dev_id, + uint64_t mapped_blocks, + uint64_t trans_id, + uint64_t creation_time, + uint64_t snap_time) { + } + + void end_device() { + } + + void begin_named_mapping(string const &name) { + } + + void end_named_mapping() { + } + + void identifier(string const &name) { + } + + void range_map(uint64_t origin_begin, uint64_t data_begin, uint32_t time, uint64_t len) { + out_ << (data_block_size_ << 9)*origin_begin + << ":" << (data_block_size_ << 9)*len + << ":" << (data_block_size_ << 9)*data_begin + << endl; + } + + void single_map(uint64_t origin_block, uint64_t data_block, uint32_t time) { + out_ << (data_block_size_ << 9)*origin_block + << ":" << (data_block_size_ << 9) + << ":" << (data_block_size_ << 9)*data_block + << endl; + } + + private: + ostream &out_; + uint64_t data_block_size_; + }; +} + +//---------------------------------------------------------------- + +extern "C" { + emitter::ptr create_emitter(ostream &out) { + return emitter::ptr(new old_emitter(out)); + } +} + +//---------------------------------------------------------------- diff --git a/contrib/thin_sexp_emitter.cc b/contrib/thin_sexp_emitter.cc new file mode 100644 index 0000000..281f0ec --- /dev/null +++ b/contrib/thin_sexp_emitter.cc @@ -0,0 +1,120 @@ +#include "base/indented_stream.h" +#include "thin-provisioning/emitter.h" + +using namespace std; +using namespace thin_provisioning; + +//---------------------------------------------------------------- + +namespace { + class sexp_emitter : public emitter { + public: + sexp_emitter(ostream &out) + : out_(out) { + } + + virtual void begin_superblock(std::string const &uuid, + uint64_t time, + uint64_t trans_id, + boost::optional flags, + boost::optional version, + uint32_t data_block_size, + uint64_t nr_data_blocks, + boost::optional metadata_snap) { + open("superblock"); + out_.indent(); + out_ << "((uuid \"" << uuid << "\")\n"; + kv("time", time); + kv("trans_id", trans_id); + kv("flags", flags); + kv("version", version); + kv("data_block_size", data_block_size); + kv("nr_data_blocks", nr_data_blocks); + kv("metadata_snap", metadata_snap); + out_.indent(); + out_ << ")\n"; + } + + virtual void end_superblock() { + close(); + } + + virtual void begin_device(uint32_t dev_id, + uint64_t mapped_blocks, + uint64_t trans_id, + uint64_t creation_time, + uint64_t snap_time) { + open("device"); + out_.indent(); + out_ << "((dev_id " << dev_id << ")\n"; + kv("mapped_blocks", mapped_blocks); + kv("trans_id", trans_id); + kv("creation_time", creation_time); + kv("snap_time", snap_time); + out_.indent(); + out_ << ")\n"; + } + + virtual void end_device() { + close(); + } + + virtual void begin_named_mapping(std::string const &name) { + + } + + virtual void end_named_mapping() { + + } + + virtual void identifier(std::string const &name) { + + } + + virtual void range_map(uint64_t origin_begin, uint64_t data_begin, uint32_t time, uint64_t len) { + out_.indent(); + out_ << "(range (origin_begin " << origin_begin + << ") (data_begin " << data_begin + << ") (time " << time + << ") (len " << len << "))\n"; + } + + virtual void single_map(uint64_t origin_block, uint64_t data_block, uint32_t time) { + out_.indent(); + out_ << "(single (origin_block " << origin_block + << ") (data_block " << data_block + << ") (time " << time << "))\n"; + } + + private: + void open(char const *tag) { + out_.indent(); + out_ << "(" << tag << "\n"; + out_.inc(); + } + + void close() { + out_.dec(); + out_.indent(); + out_ << ")\n"; + } + + template + void kv(char const *k, T const &v) { + out_.indent(); + out_ << " (" << k << " " << v << ")\n"; + } + + indented_stream out_; + }; +} + +//---------------------------------------------------------------- + +extern "C" { + emitter::ptr create_emitter(ostream &out) { + return emitter::ptr(new sexp_emitter(out)); + } +} + +//---------------------------------------------------------------- diff --git a/contrib/tmakatos_emitter.cc b/contrib/tmakatos_emitter.cc new file mode 100644 index 0000000..c2b1118 --- /dev/null +++ b/contrib/tmakatos_emitter.cc @@ -0,0 +1,129 @@ +#include "thin-provisioning/emitter.h" +#include "contrib/tmakatos_emitter.h" + +#include +#include + +using namespace std; +using namespace thin_provisioning; + +//---------------------------------------------------------------- + +namespace tmakatos_emitter { + template + std::ostream &operator << (ostream &out, boost::optional const &maybe) { + if (maybe) + out << *maybe; + + return out; + } + + //------------------------------------------------ + // binary generator + //------------------------------------------------ + binary_emitter::binary_emitter(ostream &out): out_(out) { + } + + void binary_emitter::begin_superblock(string const &uuid, + uint64_t time, + uint64_t trans_id, + boost::optional flags, + boost::optional version, + uint32_t data_block_size, + uint64_t nr_data_blocks, + boost::optional metadata_snap) { + } + + void binary_emitter::end_superblock() { + } + + void binary_emitter::begin_device(uint32_t dev_id, + uint64_t mapped_blocks, + uint64_t trans_id, + uint64_t creation_time, + uint64_t snap_time) { + cur = 0; + bitmap = 0; + } + + void binary_emitter::end_device() { + emit_bmp(true); + } + + void binary_emitter::begin_named_mapping(string const &name) { } + + void binary_emitter::end_named_mapping() { } + + void binary_emitter::identifier(string const &name) { } + + void binary_emitter::range_map(uint64_t origin_begin, uint64_t, uint32_t, + uint64_t len) { + + uint64_t n = origin_begin / unit; + uint64_t i; + + assert(n >= cur); + assert(len > 0); + + /* + * Cover the gap between the last emitted unit and the current one. + */ + if (n > cur) + do { emit_bmp(); } while (cur < n); + + /* + * Emit partial unit. + */ + if (origin_begin & (unit - 1)) { + const uint64_t j = min(len, + (origin_begin & ~(unit - 1)) + unit - origin_begin); + for (i = origin_begin; i < origin_begin + j; i++) + bitmap |= 1ULL << (i & (unit - 1)); + if (j == len) + return; + + emit_bmp(); + + len -= j; + origin_begin = i; + } + + /* + * Emit full units until end. + */ + n = (origin_begin + len) / unit; + while (cur < n) { + bitmap = ~0; + emit_bmp(); + len -= unit; + } + origin_begin = cur * unit; + + /* + * Emit final unit. + */ + for (i = origin_begin; i < origin_begin + len; i++) + bitmap |= 1ULL << (i & (unit - 1)); + } + + void binary_emitter::single_map(uint64_t origin_block, uint64_t, uint32_t) { + range_map(origin_block, 0, 0, 1); + } + + void binary_emitter::emit_bmp(bool omit_if_zero) { + if (bitmap || !omit_if_zero) + out_.write((const char*)&bitmap, sizeof bitmap); + bitmap = 0; + cur++; + } +} + +//---------------------------------------------------------------- + +extern "C" { + emitter::ptr create_emitter(ostream &out) { + return emitter::ptr(new tmakatos_emitter::binary_emitter(out)); + } +} + +//---------------------------------------------------------------- diff --git a/contrib/tmakatos_emitter.h b/contrib/tmakatos_emitter.h new file mode 100644 index 0000000..51919b8 --- /dev/null +++ b/contrib/tmakatos_emitter.h @@ -0,0 +1,69 @@ +#ifndef _TMAKATOS_EMITTER_H_ +#define _TMAKATOS_EMITTER_H_ + +#include "thin-provisioning/emitter.h" +#include + +using namespace std; +using namespace thin_provisioning; + +namespace tmakatos_emitter { + class binary_emitter : public emitter { + public: + binary_emitter(ostream &out); + + void begin_superblock(string const &uuid, + uint64_t time, + uint64_t trans_id, + boost::optional flags, + boost::optional version, + uint32_t data_block_size, + uint64_t nr_data_blocks, + boost::optional metadata_snap); + + void end_superblock(); + + void begin_device(uint32_t dev_id, + uint64_t mapped_blocks, + uint64_t trans_id, + uint64_t creation_time, + uint64_t snap_time); + + void end_device(); + + void begin_named_mapping(string const &name); + + void end_named_mapping(); + + void identifier(string const &name); + + void range_map(uint64_t origin_begin, uint64_t, uint32_t, + uint64_t len); + + void single_map(uint64_t origin_block, uint64_t, uint32_t); + + private: + ostream &out_; + + /** + * The entire virtual block allocation bitmap is segmented into 64-bit + * sub-bitmaps (units). + */ + uint64_t bitmap; + + /* + * Pointer to the current sub-bitmap (unit) that has not yet been + * emitted. + */ + uint64_t cur; + + /** + * Unit (sub-bitmap) size. Must be a power of 2. + */ + static const size_t unit = sizeof bitmap * CHAR_BIT; + + void emit_bmp(bool omit_if_zero = false); + }; +} + +#endif /* _TMAKATOS_EMITTER_H_ */ diff --git a/era/superblock.cc b/era/superblock.cc index e013064..f61a542 100644 --- a/era/superblock.cc +++ b/era/superblock.cc @@ -219,6 +219,15 @@ namespace era_validator { throw checksum_error("bad checksum in superblock"); } + virtual bool check_raw(void const *raw) const { + superblock_disk const *sbd = reinterpret_cast(raw); + crc32c sum(SUPERBLOCK_CSUM_SEED); + sum.append(&sbd->flags, MD_BLOCK_SIZE - sizeof(uint32_t)); + if (sum.get_sum() != to_cpu(sbd->csum)) + return false; + return true; + } + virtual void prepare(void *raw, block_address location) const { superblock_disk *sbd = reinterpret_cast(raw); crc32c sum(SUPERBLOCK_CSUM_SEED); diff --git a/get-gmock.sh b/get-gmock.sh index 5335350..2e5d2ae 100755 --- a/get-gmock.sh +++ b/get-gmock.sh @@ -1,6 +1,3 @@ #!/bin/sh -e -wget https://googlemock.googlecode.com/files/gmock-1.6.0.zip -unzip gmock-1.6.0.zip -cd gmock-1.6.0 -./configure +git clone https://github.com/google/googletest diff --git a/man8/cache_writeback.8 b/man8/cache_writeback.8 new file mode 100644 index 0000000..525b63a --- /dev/null +++ b/man8/cache_writeback.8 @@ -0,0 +1,53 @@ +.TH CACHE_WRITEBACK 8 "Thin Provisioning Tools" "Red Hat, Inc." \" -*- nroff -*- +.SH NAME +cache_writeback \- writeback dirty blocks to the origin device. + +.SH SYNOPSIS +.B cache_writeback +.RB [ options ] +.RB --metadata-device +.I {device|file} +.RB --origin-device +.I {device|file} +.RB --fast-device +.I {device|file} + +.SH DESCRIPTION +.B cache_writeback + +An offline tool that writesback dirty data to the data device +(origin). Intended for use in recovery scenarios when the SSD is +giving IO errors. + +This tool cannot be run on a live cache. + +.SH OPTIONS + +.IP "\fB\\-\-metadata\-device\fP \fI{device|file}\fP" +Location of cache metadata. + +.IP "\fB\-\-origin\-device\fP \fI{device|file}\fP" +Slow device being cached. + +.IP "\fB\-\-fast\-device\fP \fI{device|file}\fP" +Fast device containing the data that needs to be written back. + +.IP "\fB\-\-skip\-metadata\-update\fP" +Do not update the metadata to clear the dirty flags for written back +data. You may not want to do this if you're decommissioning the +cache. + +.IP "\fB\-h, \-\-help\fP" +Print help and exit. + +.IP "\fB\-V, \-\-version\fP" +Output version information and exit. + +.SH SEE ALSO +.B cache_dump(8) +.B cache_check(8) +.B cache_repair(8) +.B cache_restore(8) + +.SH AUTHOR +Joe Thornber diff --git a/man8/thin_dump.8 b/man8/thin_dump.8 index 7a9f785..eb81c29 100644 --- a/man8/thin_dump.8 +++ b/man8/thin_dump.8 @@ -26,8 +26,13 @@ in order to put it back onto a metadata This tool cannot be run on live metadata unless the \fB\-\-metadata\-snap\fP option is used. -.IP "\fB\-f, \-\-format\fP \fI{xml|human_readable}\fP". -Print output in XML or human readable format. +.IP "\fB\-f, \-\-format\fP \fI{xml|human_readable|custom}\fP". + +Print output in XML or human readable format. Custom formats are +supported via shared library plugins. They should be specified as in +this example: +.sp +.B thin_dump --format custom=mylib.so /dev/sda .IP "\fB\-r, \-\-repair\fP". Repair the metadata whilst dumping it. @@ -39,6 +44,13 @@ the thin provisioning device-mapper target, else try the one at block#. See the thin provisioning target documentation on how to create or release a metadata snapshot and retrieve the block number from the kernel. +.IP "\fB\-\-dev\-id\fP ". +Dump the specified device. This option may be specified multiple +times to select more than one thin device. + +.IP "\fB\-\-skip\-mappings". +Do not dump the mappings. + .IP "\fB\-h, \-\-help\fP". Print help and exit. diff --git a/persistent-data/block.tcc b/persistent-data/block.tcc index 6e47a91..02a4117 100644 --- a/persistent-data/block.tcc +++ b/persistent-data/block.tcc @@ -19,6 +19,7 @@ #include "block.h" #include "base/error_string.h" +#include "block-cache/io_engine.h" #include #include @@ -38,8 +39,6 @@ namespace { using namespace std; int const DEFAULT_MODE = 0666; - unsigned const SECTOR_SHIFT = 9; - int const OPEN_FLAGS = O_DIRECT; // FIXME: introduce a new exception for this, or at least lift this @@ -223,6 +222,7 @@ namespace persistent_data { unsigned max_concurrent_blocks, mode m, bool excl) + // FIXME: * BlockSize ? : fd_(open_or_create_block_file(path, nr_blocks * BlockSize, m, excl)), bc_(fd_, BlockSize >> SECTOR_SHIFT, nr_blocks, 1024u * 1024u * 16), superblock_ref_count_(0) diff --git a/persistent-data/block_counter.h b/persistent-data/block_counter.h index e4232b4..ea70f93 100644 --- a/persistent-data/block_counter.h +++ b/persistent-data/block_counter.h @@ -20,6 +20,7 @@ #define BLOCK_COUNTER_H #include "block.h" +#include "run_set.h" //---------------------------------------------------------------- @@ -32,7 +33,9 @@ namespace persistent_data { public: typedef std::map count_map; - void inc(block_address b) { + virtual ~block_counter() {} + + virtual void inc(block_address b) { count_map::iterator it = counts_.find(b); if (it == counts_.end()) counts_.insert(make_pair(b, 1)); @@ -40,7 +43,7 @@ namespace persistent_data { it->second++; } - unsigned get_count(block_address b) const { + virtual unsigned get_count(block_address b) const { count_map::const_iterator it = counts_.find(b); return (it == counts_.end()) ? 0 : it->second; } @@ -52,6 +55,29 @@ namespace persistent_data { private: count_map counts_; }; + + //---------------------------------------------------------------- + // Little helper class that keeps track of which blocks + // are referenced. + //---------------------------------------------------------------- + class binary_block_counter : public block_counter { + public: + virtual ~binary_block_counter() {} + + virtual void inc(block_address b) { + visited_.add(b); + } + + virtual unsigned get_count(block_address b) const { + return visited_.member(b) ? 1 : 0; + } + + base::run_set const& get_visited() const { + return visited_; + } + private: + base::run_set visited_; + }; } //---------------------------------------------------------------- diff --git a/persistent-data/data-structures/array.h b/persistent-data/data-structures/array.h index 1b87160..d7b1f70 100644 --- a/persistent-data/data-structures/array.h +++ b/persistent-data/data-structures/array.h @@ -43,6 +43,15 @@ namespace persistent_data { throw checksum_error("bad block nr in array block"); } + virtual bool check_raw(void const *raw) const { + array_block_disk const *data = reinterpret_cast(raw); + crc32c sum(ARRAY_CSUM_XOR); + sum.append(&data->max_entries, MD_BLOCK_SIZE - sizeof(uint32_t)); + if (sum.get_sum() != to_cpu(data->csum)) + return false; + return true; + } + virtual void prepare(void *raw, block_address location) const { array_block_disk *data = reinterpret_cast(raw); data->blocknr = to_disk(location); diff --git a/persistent-data/data-structures/btree.h b/persistent-data/data-structures/btree.h index b91fb20..9e85a36 100644 --- a/persistent-data/data-structures/btree.h +++ b/persistent-data/data-structures/btree.h @@ -322,7 +322,7 @@ namespace persistent_data { maybe_pair lookup_le(key const &key) const; maybe_pair lookup_ge(key const &key) const; - void insert(key const &key, typename ValueTraits::value_type const &value); + bool insert(key const &key, typename ValueTraits::value_type const &value); void remove(key const &key); void set_root(block_address root); diff --git a/persistent-data/data-structures/btree.tcc b/persistent-data/data-structures/btree.tcc index 80f2b94..ce49a30 100644 --- a/persistent-data/data-structures/btree.tcc +++ b/persistent-data/data-structures/btree.tcc @@ -32,7 +32,6 @@ namespace { using namespace persistent_data; using namespace btree_detail; using namespace std; - } //---------------------------------------------------------------- @@ -90,14 +89,22 @@ namespace persistent_data { { uint32_t flags = to_cpu(raw_->header.flags); if (flags & INTERNAL_NODE) { - if (flags & LEAF_NODE) - throw runtime_error("btree node is both internal and leaf"); + if (flags & LEAF_NODE) { + ostringstream out; + out << "btree node is both internal and leaf" + << " (block " << location_ << ")"; + throw runtime_error(out.str()); + } return INTERNAL; } else if (flags & LEAF_NODE) return LEAF; - else - throw runtime_error("unknown node type"); + else { + ostringstream out; + out << "unknown node type" + << " (block " << location_ << ")"; + throw runtime_error(out.str()); + } } template @@ -352,7 +359,8 @@ namespace persistent_data { std::ostringstream out; out << "value size mismatch: expected " << sizeof(typename ValueTraits::disk_type) << ", but got " << get_value_size() - << ". This is not the btree you are looking for." << std::endl; + << ". This is not the btree you are looking for." + << " (block " << location_ << ")" << std::endl; return out.str(); } @@ -371,7 +379,8 @@ namespace persistent_data { if (max < get_nr_entries()) { std::ostringstream out; out << "Bad nr of elements: max per block = " - << max << ", actual = " << get_nr_entries() << std::endl; + << max << ", actual = " << get_nr_entries() + << " (block " << location_ << ")" << std::endl; throw std::runtime_error(out.str()); } @@ -489,7 +498,7 @@ namespace persistent_data { } template - void + bool btree:: insert(key const &key, typename ValueTraits::value_type const &value) @@ -522,6 +531,8 @@ namespace persistent_data { n.set_value(index, value); root_ = spine.get_root(); + + return need_insert; } template diff --git a/persistent-data/data-structures/btree_base_visitor.h b/persistent-data/data-structures/btree_base_visitor.h new file mode 100644 index 0000000..17519e1 --- /dev/null +++ b/persistent-data/data-structures/btree_base_visitor.h @@ -0,0 +1,17 @@ +#ifndef PERSISTENT_DATA_DATA_STRUCTURES_BTREE_BASE_VISITOR_H +#define PERSISTENT_DATA_DATA_STRUCTURES_BTREE_BASE_VISITOR_H + +#include "persistent-data/data-structures/btree.h" + +namespace persistent_data { + namespace btree_detail { + template + class noop_value_visitor { + public: + virtual void visit(btree_path const &path, ValueType const &v) { + } + }; + } +} + +#endif diff --git a/persistent-data/data-structures/btree_counter.h b/persistent-data/data-structures/btree_counter.h index ed7b845..6ccf03a 100644 --- a/persistent-data/data-structures/btree_counter.h +++ b/persistent-data/data-structures/btree_counter.h @@ -2,6 +2,7 @@ #define PERSISTENT_DATA_DATA_STRUCTURES_BTREE_COUNTER_H #include "persistent-data/data-structures/btree.h" +#include "persistent-data/data-structures/btree_node_checker.h" #include "persistent-data/block_counter.h" //---------------------------------------------------------------- @@ -20,23 +21,23 @@ namespace persistent_data { virtual bool visit_internal(node_location const &l, typename tree::internal_node const &n) { - return visit_node(n); + return check_internal(l, n) ? visit_node(n) : false; } virtual bool visit_internal_leaf(node_location const &l, typename tree::internal_node const &n) { - return visit_node(n); + return check_leaf(l, n) ? visit_node(n) : false; } virtual bool visit_leaf(node_location const &l, typename tree::leaf_node const &n) { - if (visit_node(n)) { + if (check_leaf(l, n) && visit_node(n)) { unsigned nr = n.get_nr_entries(); for (unsigned i = 0; i < nr; i++) { // FIXME: confirm l2 is correct node_location l2(l); - l2.push_key(i); + l2.push_key(n.key_at(i)); vc_.visit(l2, n.value_at(i)); } @@ -46,7 +47,57 @@ namespace persistent_data { return false; } + typedef typename btree::visitor::error_outcome error_outcome; + + error_outcome error_accessing_node(node_location const &l, block_address b, + std::string const &what) { + return btree::visitor::EXCEPTION_HANDLED; + } + private: + bool check_internal(node_location const &l, + btree_detail::node_ref const &n) { + if (l.is_sub_root()) + new_root(l.level()); + + if (!checker_.check_block_nr(n) || + !checker_.check_value_size(n) || + !checker_.check_max_entries(n) || + !checker_.check_nr_entries(n, l.is_sub_root()) || + !checker_.check_ordered_keys(n) || + !checker_.check_parent_key(n, l.is_sub_root() ? boost::optional() : l.key)) + return false; + + return true; + } + + template + bool check_leaf(node_location const &l, + btree_detail::node_ref const &n) { + if (l.is_sub_root()) + new_root(l.level()); + + if (!checker_.check_block_nr(n) || + !checker_.check_value_size(n) || + !checker_.check_max_entries(n) || + !checker_.check_nr_entries(n, l.is_sub_root()) || + !checker_.check_ordered_keys(n) || + !checker_.check_parent_key(n, l.is_sub_root() ? boost::optional() : l.key) || + !checker_.check_leaf_key(n, last_leaf_key_[l.level()])) + return false; + + if (n.get_nr_entries() > 0) + last_leaf_key_[l.level()] = n.key_at(n.get_nr_entries() - 1); + + return true; + } + + void new_root(unsigned level) { + // we're starting a new subtree, so should + // reset the last_leaf value. + last_leaf_key_[level] = boost::optional(); + } + template bool visit_node(Node const &n) { block_address b = n.get_location(); @@ -57,6 +108,8 @@ namespace persistent_data { block_counter &bc_; ValueCounter &vc_; + btree_node_checker checker_; + boost::optional last_leaf_key_[Levels]; }; } diff --git a/persistent-data/data-structures/btree_damage_visitor.h b/persistent-data/data-structures/btree_damage_visitor.h index fa11378..1ff16a8 100644 --- a/persistent-data/data-structures/btree_damage_visitor.h +++ b/persistent-data/data-structures/btree_damage_visitor.h @@ -2,6 +2,7 @@ #define PERSISTENT_DATA_DATA_STRUCTURES_DAMAGE_VISITOR_H #include "persistent-data/data-structures/btree.h" +#include "persistent-data/data-structures/btree_node_checker.h" #include "persistent-data/run.h" //---------------------------------------------------------------- @@ -27,6 +28,12 @@ namespace persistent_data { return out; } + class noop_damage_visitor { + public: + virtual void visit(btree_path const &path, damage const &d) { + } + }; + // Tracks damage in a single level btree. Use multiple // trackers if you have a multilayer tree. class damage_tracker { @@ -216,44 +223,53 @@ namespace persistent_data { bool check_internal(node_location const &loc, btree_detail::node_ref const &n) { - if (!already_visited(n) && - check_block_nr(n) && - check_value_size(n) && - check_max_entries(n) && - check_nr_entries(n, loc.is_sub_root()) && - check_ordered_keys(n) && - check_parent_key(loc.is_sub_root() ? boost::optional() : loc.key, n)) { - if (loc.is_sub_root()) - new_root(loc.level()); + if (loc.is_sub_root()) + new_root(loc.level()); - good_internal(n.key_at(0)); - return true; + if (already_visited(n)) + return false; + else if (!checker_.check_block_nr(n) || + !checker_.check_value_size(n) || + !checker_.check_max_entries(n) || + !checker_.check_nr_entries(n, loc.is_sub_root()) || + !checker_.check_ordered_keys(n) || + !checker_.check_parent_key(n, loc.is_sub_root() ? boost::optional() : loc.key)) { + report_damage(checker_.get_last_error_string()); + + return false; } - return false; + good_internal(n.key_at(0)); + + return true; } template bool check_leaf(node_location const &loc, btree_detail::node_ref const &n) { - if (!already_visited(n) && - check_block_nr(n) && - check_value_size(n) && - check_max_entries(n) && - check_nr_entries(n, loc.is_sub_root()) && - check_ordered_keys(n) && - check_parent_key(loc.is_sub_root() ? boost::optional() : loc.key, n)) { - if (loc.is_sub_root()) - new_root(loc.level()); + if (loc.is_sub_root()) + new_root(loc.level()); - bool r = check_leaf_key(loc.level(), n); - if (r && n.get_nr_entries() > 0) - good_leaf(n.key_at(0), n.key_at(n.get_nr_entries() - 1) + 1); + if (already_visited(n)) + return false; + else if (!checker_.check_block_nr(n) || + !checker_.check_value_size(n) || + !checker_.check_max_entries(n) || + !checker_.check_nr_entries(n, loc.is_sub_root()) || + !checker_.check_ordered_keys(n) || + !checker_.check_parent_key(n, loc.is_sub_root() ? boost::optional() : loc.key) || + !checker_.check_leaf_key(n, last_leaf_key_[loc.level()])) { + report_damage(checker_.get_last_error_string()); - return r; + return false; } - return false; + if (n.get_nr_entries() > 0) { + last_leaf_key_[loc.level()] = n.key_at(n.get_nr_entries() - 1); + good_leaf(n.key_at(0), n.key_at(n.get_nr_entries() - 1) + 1); + } + + return true; } template @@ -270,133 +286,6 @@ namespace persistent_data { return false; } - template - bool check_block_nr(node const &n) { - if (n.get_location() != n.get_block_nr()) { - std::ostringstream out; - out << "block number mismatch: actually " - << n.get_location() - << ", claims " << n.get_block_nr(); - - report_damage(out.str()); - return false; - } - - return true; - } - - template - bool check_value_size(node const &n) { - if (!n.value_sizes_match()) { - report_damage(n.value_mismatch_string()); - return false; - } - - return true; - } - - template - bool check_max_entries(node const &n) { - size_t elt_size = sizeof(uint64_t) + n.get_value_size(); - if (elt_size * n.get_max_entries() + sizeof(node_header) > MD_BLOCK_SIZE) { - std::ostringstream out; - out << "max entries too large: " << n.get_max_entries(); - report_damage(out.str()); - return false; - } - - if (n.get_max_entries() % 3) { - std::ostringstream out; - out << "max entries is not divisible by 3: " << n.get_max_entries(); - report_damage(out.str()); - return false; - } - - return true; - } - - template - bool check_nr_entries(node const &n, bool is_root) { - if (n.get_nr_entries() > n.get_max_entries()) { - std::ostringstream out; - out << "bad nr_entries: " - << n.get_nr_entries() << " < " - << n.get_max_entries(); - report_damage(out.str()); - return false; - } - - block_address min = n.get_max_entries() / 3; - if (!is_root && (n.get_nr_entries() < min)) { - ostringstream out; - out << "too few entries in btree_node: " - << n.get_nr_entries() - << ", expected at least " - << min - << "(max_entries = " << n.get_max_entries() << ")"; - report_damage(out.str()); - return false; - } - - return true; - } - - template - bool check_ordered_keys(node const &n) { - unsigned nr_entries = n.get_nr_entries(); - - if (nr_entries == 0) - return true; // can only happen if a root node - - uint64_t last_key = n.key_at(0); - - for (unsigned i = 1; i < nr_entries; i++) { - uint64_t k = n.key_at(i); - if (k <= last_key) { - ostringstream out; - out << "keys are out of order, " << k << " <= " << last_key; - report_damage(out.str()); - return false; - } - last_key = k; - } - - return true; - } - - template - bool check_parent_key(boost::optional key, node const &n) { - if (!key) - return true; - - if (*key > n.key_at(0)) { - ostringstream out; - out << "parent key mismatch: parent was " << *key - << ", but lowest in node was " << n.key_at(0); - report_damage(out.str()); - return false; - } - - return true; - } - - template - bool check_leaf_key(unsigned level, node const &n) { - if (n.get_nr_entries() == 0) - return true; // can only happen if a root node - - if (last_leaf_key_[level] && *last_leaf_key_[level] >= n.key_at(0)) { - ostringstream out; - out << "the last key of the previous leaf was " << *last_leaf_key_[level] - << " and the first key of this leaf is " << n.key_at(0); - report_damage(out.str()); - return false; - } - - last_leaf_key_[level] = n.key_at(n.get_nr_entries() - 1); - return true; - } - void new_root(unsigned level) { // we're starting a new subtree, so should // reset the last_leaf value. @@ -474,6 +363,7 @@ namespace persistent_data { std::set seen_; boost::optional last_leaf_key_[Levels]; + btree_node_checker checker_; path_tracker path_tracker_; damage_tracker dt_; std::list damage_reasons_; diff --git a/persistent-data/data-structures/btree_node_checker.cc b/persistent-data/data-structures/btree_node_checker.cc new file mode 100644 index 0000000..0f50670 --- /dev/null +++ b/persistent-data/data-structures/btree_node_checker.cc @@ -0,0 +1,127 @@ +#include "btree_node_checker.h" + +#include + +using persistent_data::btree_detail::btree_node_checker; + +//---------------------------------------------------------------- + +btree_node_checker::error_type btree_node_checker::get_last_error() const { + return last_error_; +} + +std::string btree_node_checker::get_last_error_string() const { + switch (last_error_) { + case BLOCK_NR_MISMATCH: + return block_nr_mismatch_string(); + case VALUE_SIZES_MISMATCH: + return value_sizes_mismatch_string(); + case MAX_ENTRIES_TOO_LARGE: + return max_entries_too_large_string(); + case MAX_ENTRIES_NOT_DIVISIBLE: + return max_entries_not_divisible_string(); + case NR_ENTRIES_TOO_LARGE: + return nr_entries_too_large_string(); + case NR_ENTRIES_TOO_SMALL: + return nr_entries_too_small_string(); + case KEYS_OUT_OF_ORDER: + return keys_out_of_order_string(); + case PARENT_KEY_MISMATCH: + return parent_key_mismatch_string(); + case LEAF_KEY_OVERLAPPED: + return leaf_key_overlapped_string(); + default: + return std::string(); + } +} + +void btree_node_checker::reset() { + last_error_ = NO_ERROR; +} + +std::string btree_node_checker::block_nr_mismatch_string() const { + std::ostringstream out; + out << "block number mismatch: actually " + << error_location_ + << ", claims " << error_block_nr_; + + return out.str(); +} + +std::string btree_node_checker::value_sizes_mismatch_string() const { + std::ostringstream out; + out << "value size mismatch: expected " << error_value_sizes_[1] + << ", but got " << error_value_sizes_[0] + << ". This is not the btree you are looking for." + << " (block " << error_location_ << ")"; + + return out.str(); +} + +std::string btree_node_checker::max_entries_too_large_string() const { + std::ostringstream out; + out << "max entries too large: " << error_max_entries_ + << " (block " << error_location_ << ")"; + + return out.str(); +} + +std::string btree_node_checker::max_entries_not_divisible_string() const { + std::ostringstream out; + out << "max entries is not divisible by 3: " << error_max_entries_ + << " (block " << error_location_ << ")"; + + return out.str(); +} + +std::string btree_node_checker::nr_entries_too_large_string() const { + std::ostringstream out; + out << "bad nr_entries: " + << error_nr_entries_ << " < " + << error_max_entries_ + << " (block " << error_location_ << ")"; + + return out.str(); +} + +std::string btree_node_checker::nr_entries_too_small_string() const { + std::ostringstream out; + out << "too few entries in btree_node: " + << error_nr_entries_ + << ", expected at least " + << (error_max_entries_ / 3) + << " (block " << error_location_ + << ", max_entries = " << error_max_entries_ << ")"; + + return out.str(); +} + +std::string btree_node_checker::keys_out_of_order_string() const { + std::ostringstream out; + out << "keys are out of order, " + << error_keys_[0] << " <= " << error_keys_[1] + << " (block " << error_location_ << ")"; + + return out.str(); +} + +std::string btree_node_checker::parent_key_mismatch_string() const { + std::ostringstream out; + out << "parent key mismatch: parent was " << error_keys_[1] + << ", but lowest in node was " << error_keys_[0] + << " (block " << error_location_ << ")"; + + return out.str(); +} + +std::string btree_node_checker::leaf_key_overlapped_string() const { + std::ostringstream out; + out << "the last key of the previous leaf was " << error_keys_[1] + << " and the first key of this leaf is " << error_keys_[0] + << " (block " << error_location_ << ")"; + + return out.str(); +} + +//---------------------------------------------------------------- + diff --git a/persistent-data/data-structures/btree_node_checker.h b/persistent-data/data-structures/btree_node_checker.h new file mode 100644 index 0000000..be50007 --- /dev/null +++ b/persistent-data/data-structures/btree_node_checker.h @@ -0,0 +1,208 @@ +#ifndef BTREE_NODE_CHECKER_H +#define BTREE_NODE_CHECKER_H + +#include "block-cache/block_cache.h" +#include "persistent-data/block.h" +#include "persistent-data/data-structures/btree.h" +#include "persistent-data/data-structures/btree_disk_structures.h" + +#include +#include + +using bcache::block_address; + +//---------------------------------------------------------------- + +namespace persistent_data { + namespace btree_detail { + class btree_node_checker { + public: + enum error_type { + NO_ERROR, + BLOCK_NR_MISMATCH, + VALUE_SIZES_MISMATCH, + MAX_ENTRIES_TOO_LARGE, + MAX_ENTRIES_NOT_DIVISIBLE, + NR_ENTRIES_TOO_LARGE, + NR_ENTRIES_TOO_SMALL, + KEYS_OUT_OF_ORDER, + VALUE_SIZE_MISMATCH, + PARENT_KEY_MISMATCH, + LEAF_KEY_OVERLAPPED, + }; + + btree_node_checker(): + last_error_(NO_ERROR), + error_location_(0), + error_block_nr_(0), + error_nr_entries_(0), + error_max_entries_(0), + error_value_sizes_{0, 0}, + error_keys_{0, 0} { + } + + virtual ~btree_node_checker() {} + + template + bool check_block_nr(btree_detail::node_ref const &n) { + if (n.get_location() != n.get_block_nr()) { + last_error_ = BLOCK_NR_MISMATCH; + error_block_nr_ = n.get_block_nr(); + error_location_ = n.get_location(); + + return false; + } + + return true; + } + + template + bool check_value_size(btree_detail::node_ref const &n) { + if (!n.value_sizes_match()) { + last_error_ = VALUE_SIZES_MISMATCH; + error_location_ = n.get_location(); + error_value_sizes_[0] = n.get_value_size(); + error_value_sizes_[1] = sizeof(typename ValueTraits::disk_type); + return false; + } + + return true; + } + + template + bool check_max_entries(btree_detail::node_ref const &n) { + size_t elt_size = sizeof(uint64_t) + n.get_value_size(); + if (elt_size * n.get_max_entries() + sizeof(node_header) > MD_BLOCK_SIZE) { + last_error_ = MAX_ENTRIES_TOO_LARGE; + error_location_ = n.get_location(); + error_max_entries_ = n.get_max_entries(); + + return false; + } + + if (n.get_max_entries() % 3) { + last_error_ = MAX_ENTRIES_NOT_DIVISIBLE; + error_location_ = n.get_location(); + error_max_entries_ = n.get_max_entries(); + + return false; + } + + return true; + } + + template + bool check_nr_entries(btree_detail::node_ref const &n, + bool is_root) { + if (n.get_nr_entries() > n.get_max_entries()) { + last_error_ = NR_ENTRIES_TOO_LARGE; + error_location_ = n.get_location(); + error_nr_entries_ = n.get_nr_entries(); + error_max_entries_ = n.get_max_entries(); + + return false; + } + + block_address min = n.get_max_entries() / 3; + if (!is_root && (n.get_nr_entries() < min)) { + last_error_ = NR_ENTRIES_TOO_SMALL; + error_location_ = n.get_location(); + error_nr_entries_ = n.get_nr_entries(); + error_max_entries_ = n.get_max_entries(); + + return false; + } + + return true; + } + + template + bool check_ordered_keys(btree_detail::node_ref const &n) { + unsigned nr_entries = n.get_nr_entries(); + + if (nr_entries == 0) + return true; // can only happen if a root node + + uint64_t last_key = n.key_at(0); + + for (unsigned i = 1; i < nr_entries; i++) { + uint64_t k = n.key_at(i); + if (k <= last_key) { + last_error_ = KEYS_OUT_OF_ORDER; + error_location_ = n.get_location(); + error_keys_[0] = k; + error_keys_[1] = last_key; + + return false; + } + last_key = k; + } + + return true; + } + + template + bool check_parent_key(btree_detail::node_ref const &n, + boost::optional key) { + if (!key) + return true; + + if (*key > n.key_at(0)) { + last_error_ = PARENT_KEY_MISMATCH; + error_location_ = n.get_location(); + error_keys_[0] = n.key_at(0); + error_keys_[1] = *key; + + return false; + } + + return true; + } + + template + bool check_leaf_key(btree_detail::node_ref const &n, + boost::optional key) { + if (n.get_nr_entries() == 0) + return true; // can only happen if a root node + + if (key && *key >= n.key_at(0)) { + last_error_ = LEAF_KEY_OVERLAPPED; + error_location_ = n.get_location(); + error_keys_[0] = n.key_at(0); + error_keys_[1] = *key; + + return false; + } + + return true; + } + + error_type get_last_error() const; + std::string get_last_error_string() const; + void reset(); + + private: + std::string block_nr_mismatch_string() const; + std::string value_sizes_mismatch_string() const; + std::string max_entries_too_large_string() const; + std::string max_entries_not_divisible_string() const; + std::string nr_entries_too_large_string() const; + std::string nr_entries_too_small_string() const; + std::string keys_out_of_order_string() const; + std::string parent_key_mismatch_string() const; + std::string leaf_key_overlapped_string() const; + + error_type last_error_; + block_address error_location_; + block_address error_block_nr_; + uint32_t error_nr_entries_; + uint32_t error_max_entries_; + uint32_t error_value_sizes_[2]; + uint64_t error_keys_[2]; + }; + } +} + +//---------------------------------------------------------------- + +#endif diff --git a/persistent-data/file_utils.cc b/persistent-data/file_utils.cc index 4fd513d..a57e92e 100644 --- a/persistent-data/file_utils.cc +++ b/persistent-data/file_utils.cc @@ -13,8 +13,8 @@ using namespace persistent_data; //---------------------------------------------------------------- -block_address -persistent_data::get_nr_blocks(string const &path, block_address block_size) +persistent_data::block_address +persistent_data::get_nr_blocks(string const &path, sector_t block_size) { using namespace persistent_data; @@ -24,7 +24,8 @@ persistent_data::get_nr_blocks(string const &path, block_address block_size) int r = ::stat(path.c_str(), &info); if (r) { ostringstream out; - out << "Couldn't stat dev path '" << path << "'"; + out << "Couldn't stat dev path '" << path << "': " + << strerror(errno); throw runtime_error(out.str()); } diff --git a/persistent-data/file_utils.h b/persistent-data/file_utils.h index cb80bd4..1dbfce2 100644 --- a/persistent-data/file_utils.h +++ b/persistent-data/file_utils.h @@ -9,7 +9,7 @@ // FIXME: move to a different unit namespace persistent_data { - block_address get_nr_blocks(string const &path, block_address block_size); + persistent_data::block_address get_nr_blocks(string const &path, sector_t block_size = MD_BLOCK_SIZE); block_address get_nr_metadata_blocks(string const &path); block_manager<>::ptr open_bm(std::string const &dev_path, diff --git a/persistent-data/space-maps/disk.cc b/persistent-data/space-maps/disk.cc index 9b1f9cc..caeaca3 100644 --- a/persistent-data/space-maps/disk.cc +++ b/persistent-data/space-maps/disk.cc @@ -50,6 +50,15 @@ namespace { throw checksum_error("bad block nr in space map bitmap"); } + virtual bool check_raw(void const *raw) const { + bitmap_header const *data = reinterpret_cast(raw); + crc32c sum(BITMAP_CSUM_XOR); + sum.append(&data->not_used, MD_BLOCK_SIZE - sizeof(uint32_t)); + if (sum.get_sum() != to_cpu(data->csum)) + return false; + return true; + } + virtual void prepare(void *raw, block_address location) const { bitmap_header *data = reinterpret_cast(raw); data->blocknr = to_disk(location); @@ -62,8 +71,6 @@ namespace { //-------------------------------- - uint64_t const INDEX_CSUM_XOR = 160478; - // FIXME: factor out the common code in these validators struct index_block_validator : public bcache::validator { virtual void check(void const *raw, block_address location) const { @@ -77,6 +84,15 @@ namespace { throw checksum_error("bad block nr in metadata index block"); } + virtual bool check_raw(void const *raw) const { + metadata_index const *mi = reinterpret_cast(raw); + crc32c sum(INDEX_CSUM_XOR); + sum.append(&mi->padding_, MD_BLOCK_SIZE - sizeof(uint32_t)); + if (sum.get_sum() != to_cpu(mi->csum_)) + return false; + return true; + } + virtual void prepare(void *raw, block_address location) const { metadata_index *mi = reinterpret_cast(raw); mi->blocknr_ = to_disk(location); @@ -87,11 +103,6 @@ namespace { } }; - bcache::validator::ptr - index_validator() { - return bcache::validator::ptr(new index_block_validator()); - } - //-------------------------------- class bitmap { @@ -731,7 +742,7 @@ persistent_data::create_disk_sm(transaction_manager &tm, } checked_space_map::ptr -persistent_data::open_disk_sm(transaction_manager &tm, void *root) +persistent_data::open_disk_sm(transaction_manager &tm, void const *root) { sm_root_disk d; sm_root v; @@ -759,7 +770,7 @@ persistent_data::create_metadata_sm(transaction_manager &tm, block_address nr_bl } checked_space_map::ptr -persistent_data::open_metadata_sm(transaction_manager &tm, void *root) +persistent_data::open_metadata_sm(transaction_manager &tm, void const *root) { sm_root_disk d; sm_root v; @@ -773,6 +784,16 @@ persistent_data::open_metadata_sm(transaction_manager &tm, void *root) checked_space_map::ptr(new sm_disk(store, tm, v)))); } +bcache::validator::ptr +persistent_data::bitmap_validator() { + return bcache::validator::ptr(new bitmap_block_validator()); +} + +bcache::validator::ptr +persistent_data::index_validator() { + return bcache::validator::ptr(new index_block_validator()); +} + block_address persistent_data::get_nr_blocks_in_data_sm(transaction_manager &tm, void *root) { diff --git a/persistent-data/space-maps/disk.h b/persistent-data/space-maps/disk.h index cb747e8..906221e 100644 --- a/persistent-data/space-maps/disk.h +++ b/persistent-data/space-maps/disk.h @@ -29,13 +29,17 @@ namespace persistent_data { create_disk_sm(transaction_manager &tm, block_address nr_blocks); checked_space_map::ptr - open_disk_sm(transaction_manager &tm, void *root); + open_disk_sm(transaction_manager &tm, void const *root); checked_space_map::ptr create_metadata_sm(transaction_manager &tm, block_address nr_blocks); checked_space_map::ptr - open_metadata_sm(transaction_manager &tm, void *root); + open_metadata_sm(transaction_manager &tm, void const *root); + + bcache::validator::ptr bitmap_validator(); + + bcache::validator::ptr index_validator(); // Get the number of data blocks with minimal IO. Used when // repairing to avoid the bulk of the space maps. diff --git a/persistent-data/space-maps/disk_structures.h b/persistent-data/space-maps/disk_structures.h index 0a57e61..aa8fbe5 100644 --- a/persistent-data/space-maps/disk_structures.h +++ b/persistent-data/space-maps/disk_structures.h @@ -111,6 +111,9 @@ namespace persistent_data { le32 not_used; le64 blocknr; } __attribute__ ((packed)); + + uint64_t const BITMAP_CSUM_XOR = 240779; + uint64_t const INDEX_CSUM_XOR = 160478; } } diff --git a/persistent-data/validators.cc b/persistent-data/validators.cc index b9c163c..a50947d 100644 --- a/persistent-data/validators.cc +++ b/persistent-data/validators.cc @@ -31,6 +31,16 @@ namespace { } } + virtual bool check_raw(void const *raw) const { + disk_node const *data = reinterpret_cast(raw); + node_header const *n = &data->header; + crc32c sum(BTREE_CSUM_XOR); + sum.append(&n->flags, MD_BLOCK_SIZE - sizeof(uint32_t)); + if (sum.get_sum() != to_cpu(n->csum)) + return false; + return true; + } + virtual void prepare(void *raw, block_address location) const { disk_node *data = reinterpret_cast(raw); node_header *n = &data->header; diff --git a/thin-provisioning/cache_stream.cc b/thin-provisioning/cache_stream.cc new file mode 100644 index 0000000..a21ed64 --- /dev/null +++ b/thin-provisioning/cache_stream.cc @@ -0,0 +1,95 @@ +#include "base/container_of.h" +#include "thin-provisioning/cache_stream.h" +#include "persistent-data/file_utils.h" + +using namespace thin_provisioning; +using namespace std; +using namespace persistent_data; + +//---------------------------------------------------------------- + +namespace { + int open_file(string const &path) { + int fd = ::open(path.c_str(), O_RDONLY | O_DIRECT | O_EXCL, 0666); + if (fd < 0) + syscall_failed("open", + "Note: you cannot run this tool with these options on live metadata."); + + return fd; + } +} + +//---------------------------------------------------------------- + +cache_stream::cache_stream(string const &path, + block_address block_size, + size_t cache_mem) + : block_size_(block_size), + nr_blocks_(get_nr_blocks(path, block_size)), + + // hack because cache uses LRU rather than MRU + cache_blocks_((cache_mem / block_size) / 2u), + fd_(open_file(path)), + v_(new bcache::noop_validator()), + cache_(new block_cache(fd_, block_size / 512, nr_blocks_, cache_mem)), + current_index_(0) { + + rewind(); +} + +block_address +cache_stream::size() const +{ + return nr_blocks_ * block_size_; +} + +void +cache_stream::rewind() +{ + current_index_ = 0; + + for (block_address i = 1; i < min(cache_blocks_, nr_blocks_); i++) + cache_->prefetch(i); +} + +bool +cache_stream::next(block_address count) +{ + current_index_ = min(current_index_ + count, nr_blocks_); + + if (current_index_ + cache_blocks_ < nr_blocks_) + cache_->prefetch(current_index_ + cache_blocks_); + + return !eof(); +} + +bool +cache_stream::eof() const +{ + return current_index_ >= nr_blocks_; +} + +chunk const & +cache_stream::get() +{ + chunk_wrapper *w = new chunk_wrapper(*this); + return w->c_; +} + +void +cache_stream::put(chunk const &c) +{ + chunk_wrapper *w = base::container_of(const_cast(&c), &chunk_wrapper::c_); + delete w; +} + +cache_stream::chunk_wrapper::chunk_wrapper(cache_stream &parent) + : block_(parent.cache_->get(parent.current_index_, 0, parent.v_)) +{ + c_.offset_ = parent.current_index_ * parent.block_size_; + c_.len_ = parent.block_size_; + c_.mem_.begin = static_cast(block_.get_data()); + c_.mem_.end = c_.mem_.begin + parent.block_size_; +} + +//---------------------------------------------------------------- diff --git a/thin-provisioning/cache_stream.h b/thin-provisioning/cache_stream.h new file mode 100644 index 0000000..b7af995 --- /dev/null +++ b/thin-provisioning/cache_stream.h @@ -0,0 +1,51 @@ +#ifndef THIN_PROVISIONING_CACHE_STREAM_H +#define THIN_PROVISIONING_CACHE_STREAM_H + +#include "thin-provisioning/chunk_stream.h" + +//---------------------------------------------------------------- + +namespace thin_provisioning { + using namespace bcache; + + class cache_stream : public chunk_stream { + public: + cache_stream(std::string const &path, + block_address block_size, + size_t cache_mem); + + block_address size() const; + + virtual void rewind(); + + virtual bool next(block_address count = 1ull); + virtual bool eof() const; + + virtual chunk const &get(); + virtual void put(chunk const &c); + + private: + struct chunk_wrapper { + chunk_wrapper(cache_stream &parent); + + block_cache::auto_block block_; + chunk c_; + }; + + friend class chunk_wrapper; + + block_address block_size_; + block_address nr_blocks_; + block_address cache_blocks_; + + int fd_; + validator::ptr v_; + std::auto_ptr cache_; + + block_address current_index_; + }; +} + +//---------------------------------------------------------------- + +#endif diff --git a/thin-provisioning/chunk_stream.cc b/thin-provisioning/chunk_stream.cc new file mode 100644 index 0000000..adc41d0 --- /dev/null +++ b/thin-provisioning/chunk_stream.cc @@ -0,0 +1,9 @@ +#include "thin-provisioning/chunk_stream.h" + +using namespace std; +using namespace thin_provisioning; + +//---------------------------------------------------------------- + + +//---------------------------------------------------------------- diff --git a/thin-provisioning/chunk_stream.h b/thin-provisioning/chunk_stream.h new file mode 100644 index 0000000..1831f27 --- /dev/null +++ b/thin-provisioning/chunk_stream.h @@ -0,0 +1,66 @@ +// Copyright (C) 2015 Red Hat, Inc. All rights reserved. +// +// This file is part of the thin-provisioning-tools source. +// +// thin-provisioning-tools is free software: you can redistribute it +// and/or modify it under the terms of the GNU General Public License +// as published by the Free Software Foundation, either version 3 of +// the License, or (at your option) any later version. +// +// thin-provisioning-tools is distributed in the hope that it will be +// useful, but WITHOUT ANY WARRANTY; without even the implied warranty +// of MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +// GNU General Public License for more details. +// +// You should have received a copy of the GNU General Public License along +// with thin-provisioning-tools. If not, see +// . + +#ifndef CHUNK_STREAM_H +#define CHUNK_STREAM_H + +#include "block-cache/block_cache.h" + +#include +#include + +//---------------------------------------------------------------- + +namespace thin_provisioning { + struct mem { + mem() + : begin(0), + end(0) { + } + + mem(uint8_t *b, uint8_t *e) + : begin(b), + end(e) { + } + + uint8_t *begin, *end; + }; + + struct chunk { + uint64_t offset_, len_; + mem mem_; + }; + + class chunk_stream { + public: + virtual ~chunk_stream() {} + + virtual void rewind() = 0; + virtual bcache::block_address size() const = 0; + + virtual bool next(bcache::block_address count = 1ull) = 0; + virtual bool eof() const = 0; + + virtual chunk const &get() = 0; + virtual void put(chunk const &c) = 0; + }; +} + +//---------------------------------------------------------------- + +#endif diff --git a/thin-provisioning/commands.cc b/thin-provisioning/commands.cc index c25abce..4f2af9d 100644 --- a/thin-provisioning/commands.cc +++ b/thin-provisioning/commands.cc @@ -16,7 +16,16 @@ thin_provisioning::register_thin_commands(base::application &app) app.add_cmd(command::ptr(new thin_restore_cmd())); app.add_cmd(command::ptr(new thin_repair_cmd())); app.add_cmd(command::ptr(new thin_rmap_cmd())); + +#ifdef DEV_TOOLS + app.add_cmd(command::ptr(new thin_ll_dump_cmd())); + app.add_cmd(command::ptr(new thin_ll_restore_cmd())); + app.add_cmd(command::ptr(new thin_scan_cmd())); app.add_cmd(command::ptr(new thin_trim_cmd())); + app.add_cmd(command::ptr(new thin_generate_metadata_cmd())); + app.add_cmd(command::ptr(new thin_show_duplicates_cmd())); + app.add_cmd(command::ptr(new thin_show_metadata_cmd())); +#endif } //---------------------------------------------------------------- diff --git a/thin-provisioning/commands.h b/thin-provisioning/commands.h index ec1f1ec..54e5a88 100644 --- a/thin-provisioning/commands.h +++ b/thin-provisioning/commands.h @@ -66,10 +66,58 @@ namespace thin_provisioning { class thin_trim_cmd : public base::command { public: thin_trim_cmd(); + virtual void usage(std::ostream &out) const; virtual int run(int argc, char **argv); }; +#ifdef DEV_TOOLS + class thin_ll_dump_cmd : public base::command { + public: + thin_ll_dump_cmd(); + + virtual void usage(std::ostream &out) const; + virtual int run(int argc, char **argv); + }; + + class thin_ll_restore_cmd : public base::command { + public: + thin_ll_restore_cmd(); + + virtual void usage(std::ostream &out) const; + virtual int run(int argc, char **argv); + }; + + class thin_scan_cmd : public base::command { + public: + thin_scan_cmd(); + + virtual void usage(std::ostream &out) const; + virtual int run(int argc, char **argv); + }; + + class thin_show_duplicates_cmd : public base::command { + public: + thin_show_duplicates_cmd(); + virtual void usage(std::ostream &out) const; + virtual int run(int argc, char **argv); + }; + + class thin_generate_metadata_cmd : public base::command { + public: + thin_generate_metadata_cmd(); + virtual void usage(std::ostream &out) const; + virtual int run(int argc, char **argv); + }; + + class thin_show_metadata_cmd : public base::command { + public: + thin_show_metadata_cmd(); + virtual void usage(std::ostream &out) const; + virtual int run(int argc, char **argv); + }; +#endif + void register_thin_commands(base::application &app); } diff --git a/thin-provisioning/device_tree.cc b/thin-provisioning/device_tree.cc index 223d816..4837cb7 100644 --- a/thin-provisioning/device_tree.cc +++ b/thin-provisioning/device_tree.cc @@ -47,6 +47,13 @@ namespace { namespace thin_provisioning { namespace device_tree_detail { + device_details::device_details() + : mapped_blocks_(0), + transaction_id_(0), + creation_time_(0), + snapshotted_time_(0) { + } + void device_details_traits::unpack(device_details_disk const &disk, device_details &value) { diff --git a/thin-provisioning/device_tree.h b/thin-provisioning/device_tree.h index 23ae924..ec0f9f2 100644 --- a/thin-provisioning/device_tree.h +++ b/thin-provisioning/device_tree.h @@ -16,6 +16,8 @@ namespace thin_provisioning { } __attribute__ ((packed)); struct device_details { + device_details(); + uint64_t mapped_blocks_; uint64_t transaction_id_; /* when created */ uint32_t creation_time_; diff --git a/thin-provisioning/emitter.h b/thin-provisioning/emitter.h index 58658a9..bb944da 100644 --- a/thin-provisioning/emitter.h +++ b/thin-provisioning/emitter.h @@ -49,6 +49,8 @@ namespace thin_provisioning { virtual void begin_superblock(std::string const &uuid, uint64_t time, uint64_t trans_id, + boost::optional flags, + boost::optional version, uint32_t data_block_size, uint64_t nr_data_blocks, boost::optional metadata_snap) = 0; diff --git a/thin-provisioning/fixed_chunk_stream.cc b/thin-provisioning/fixed_chunk_stream.cc new file mode 100644 index 0000000..ea031dd --- /dev/null +++ b/thin-provisioning/fixed_chunk_stream.cc @@ -0,0 +1,113 @@ +#include "thin-provisioning/fixed_chunk_stream.h" + +using namespace thin_provisioning; + +//---------------------------------------------------------------- + +fixed_chunk_stream::fixed_chunk_stream(chunk_stream &stream, unsigned chunk_size) + : index_(0), + stream_(stream), + chunk_size_(chunk_size), + big_chunk_(0) { + next_big_chunk(); +} + +fixed_chunk_stream::~fixed_chunk_stream() +{ + put_big_chunk(); +} + +bcache::block_address +fixed_chunk_stream::size() const +{ + return stream_.size(); +} + +void +fixed_chunk_stream::rewind() +{ + // FIXME: not complete + index_ = 0; + stream_.rewind(); +} + +bool +fixed_chunk_stream::next(bcache::block_address count) +{ + while (count--) { + index_++; + advance_one(); + } + + return !eof(); +} + +bool +fixed_chunk_stream::eof() const +{ + return stream_.eof(); +} + +chunk const & +fixed_chunk_stream::get() +{ + assert(big_chunk_); + + little_chunk_.len_ = little_e_ - little_b_; + little_chunk_.offset_ = big_chunk_->offset_ + little_chunk_.len_; + + little_chunk_.mem_.begin = little_b_; + little_chunk_.mem_.end = little_e_; + + return little_chunk_; +} + +void +fixed_chunk_stream::put(chunk const &c) +{ + // noop +} + +bool +fixed_chunk_stream::next_big_chunk() +{ + put_big_chunk(); + + if (!stream_.next()) + return false; + + big_chunk_ = &stream_.get(); + little_b_ = little_e_ = last_hashed_ = big_chunk_->mem_.begin; + + return true; +} + +bool +fixed_chunk_stream::advance_one() +{ + uint8_t *big_e; + + big_e = big_chunk_->mem_.end; + little_b_ = little_e_; + + if (little_b_ >= big_e) { + if (next_big_chunk()) + big_e = big_chunk_->mem_.end; + else + return false; + } + + little_e_ += chunk_size_; + return true; +} + +void +fixed_chunk_stream::put_big_chunk() +{ + if (big_chunk_) + stream_.put(*big_chunk_); + + big_chunk_ = 0; +} + +//---------------------------------------------------------------- diff --git a/thin-provisioning/fixed_chunk_stream.h b/thin-provisioning/fixed_chunk_stream.h new file mode 100644 index 0000000..f17d15a --- /dev/null +++ b/thin-provisioning/fixed_chunk_stream.h @@ -0,0 +1,39 @@ +#ifndef THIN_PROVISIONING_FIXED_CHUNK_STREAM_H +#define THIN_PROVISIONING_FIXED_CHUNK_STREAM_H + +#include "thin-provisioning/chunk_stream.h" + +//---------------------------------------------------------------- + +namespace thin_provisioning { + class fixed_chunk_stream : public chunk_stream { + public: + fixed_chunk_stream(chunk_stream &stream, unsigned chunk_size); + ~fixed_chunk_stream(); + + virtual bcache::block_address size() const; + virtual void rewind(); + virtual bool next(bcache::block_address count = 1ull); + virtual bool eof() const; + virtual chunk const &get(); + virtual void put(chunk const &c); + + private: + bool next_big_chunk(); + bool advance_one(); + void put_big_chunk(); + + bcache::block_address index_; + + chunk_stream &stream_; + unsigned chunk_size_; + chunk const *big_chunk_; + + uint8_t *little_b_, *little_e_, *last_hashed_; + chunk little_chunk_; + }; +} + +//---------------------------------------------------------------- + +#endif diff --git a/thin-provisioning/human_readable_format.cc b/thin-provisioning/human_readable_format.cc index 3cfc188..6c53b3c 100644 --- a/thin-provisioning/human_readable_format.cc +++ b/thin-provisioning/human_readable_format.cc @@ -43,12 +43,16 @@ namespace { void begin_superblock(string const &uuid, uint64_t time, uint64_t trans_id, + boost::optional flags, + boost::optional version, uint32_t data_block_size, uint64_t nr_data_blocks, boost::optional metadata_snap) { out_ << "begin superblock: \"" << uuid << "\"" << ", " << time << ", " << trans_id + << ", " << (flags ? *flags : 0) + << ", " << (version ? *version : 1) << ", " << data_block_size << ", " << nr_data_blocks; if (metadata_snap) diff --git a/thin-provisioning/mapping_tree.cc b/thin-provisioning/mapping_tree.cc index 454e85c..c1a9358 100644 --- a/thin-provisioning/mapping_tree.cc +++ b/thin-provisioning/mapping_tree.cc @@ -193,14 +193,16 @@ namespace { class single_mapping_tree_damage_visitor { public: - single_mapping_tree_damage_visitor(damage_visitor &v) - : v_(v) { + single_mapping_tree_damage_visitor(damage_visitor &v, + uint64_t dev_id) + : v_(v), + dev_id_(dev_id) { } virtual void visit(btree_path const &path, btree_detail::damage const &d) { switch (path.size()) { case 0: - v_.visit(missing_devices(d.desc_, d.lost_keys_)); + v_.visit(missing_mappings(d.desc_, dev_id_, d.lost_keys_)); break; default: @@ -210,6 +212,7 @@ namespace { private: damage_visitor &v_; + uint64_t dev_id_; }; } @@ -250,19 +253,21 @@ thin_provisioning::check_mapping_tree(mapping_tree const &tree, void thin_provisioning::walk_mapping_tree(single_mapping_tree const &tree, + uint64_t dev_id, mapping_tree_detail::mapping_visitor &mv, mapping_tree_detail::damage_visitor &dv) { - single_mapping_tree_damage_visitor ll_dv(dv); + single_mapping_tree_damage_visitor ll_dv(dv, dev_id); btree_visit_values(tree, mv, ll_dv); } void thin_provisioning::check_mapping_tree(single_mapping_tree const &tree, + uint64_t dev_id, mapping_tree_detail::damage_visitor &visitor) { noop_block_time_visitor mv; - walk_mapping_tree(tree, mv, visitor); + walk_mapping_tree(tree, dev_id, mv, visitor); } //---------------------------------------------------------------- diff --git a/thin-provisioning/mapping_tree.h b/thin-provisioning/mapping_tree.h index d417b47..4e6adda 100644 --- a/thin-provisioning/mapping_tree.h +++ b/thin-provisioning/mapping_tree.h @@ -139,9 +139,11 @@ namespace thin_provisioning { mapping_tree_detail::damage_visitor &visitor); void walk_mapping_tree(single_mapping_tree const &tree, + uint64_t dev_id, mapping_tree_detail::mapping_visitor &mv, mapping_tree_detail::damage_visitor &dv); void check_mapping_tree(single_mapping_tree const &tree, + uint64_t dev_id, mapping_tree_detail::damage_visitor &visitor); } diff --git a/thin-provisioning/metadata_counter.cc b/thin-provisioning/metadata_counter.cc new file mode 100644 index 0000000..95487d2 --- /dev/null +++ b/thin-provisioning/metadata_counter.cc @@ -0,0 +1,74 @@ +#include "thin-provisioning/metadata_counter.h" +#include "persistent-data/space-maps/core.h" +#include "persistent-data/space-maps/disk_structures.h" + +using namespace persistent_data; +using namespace thin_provisioning; + +//---------------------------------------------------------------- + +namespace { + void count_trees(transaction_manager::ptr tm, + superblock_detail::superblock const &sb, + block_counter &bc) { + + // Count the device tree + { + noop_value_counter vc; + device_tree dtree(*tm, sb.device_details_root_, + device_tree_detail::device_details_traits::ref_counter()); + count_btree_blocks(dtree, bc, vc); + } + + // Count the mapping tree + { + noop_value_counter vc; + mapping_tree mtree(*tm, sb.data_mapping_root_, + mapping_tree_detail::block_traits::ref_counter(space_map::ptr())); + count_btree_blocks(mtree, bc, vc); + } + } + + void count_space_maps(transaction_manager::ptr tm, + superblock_detail::superblock const &sb, + block_counter &bc) { + // Count the metadata space map (no-throw) + try { + persistent_space_map::ptr metadata_sm = + open_metadata_sm(*tm, static_cast(&sb.metadata_space_map_root_)); + metadata_sm->count_metadata(bc); + } catch (std::exception &e) { + cerr << e.what() << endl; + } + + // Count the data space map (no-throw) + { + persistent_space_map::ptr data_sm = + open_disk_sm(*tm, static_cast(&sb.data_space_map_root_)); + data_sm->count_metadata(bc); + } + } +} + +//---------------------------------------------------------------- + +void thin_provisioning::count_metadata(transaction_manager::ptr tm, + superblock_detail::superblock const &sb, + block_counter &bc, + bool skip_metadata_snap) { + // Count the superblock + bc.inc(superblock_detail::SUPERBLOCK_LOCATION); + count_trees(tm, sb, bc); + + // Count the metadata snap, if present + if (!skip_metadata_snap && sb.metadata_snap_ != superblock_detail::SUPERBLOCK_LOCATION) { + bc.inc(sb.metadata_snap_); + + superblock_detail::superblock snap = read_superblock(tm->get_bm(), sb.metadata_snap_); + count_trees(tm, snap, bc); + } + + count_space_maps(tm, sb, bc); +} + +//---------------------------------------------------------------- diff --git a/thin-provisioning/metadata_counter.h b/thin-provisioning/metadata_counter.h new file mode 100644 index 0000000..bc65ab9 --- /dev/null +++ b/thin-provisioning/metadata_counter.h @@ -0,0 +1,18 @@ +#ifndef METADATA_COUNTER_H +#define METADATA_COUNTER_H + +#include "thin-provisioning/metadata.h" +#include "persistent-data/data-structures/btree_counter.h" + +//---------------------------------------------------------------- + +namespace thin_provisioning { + void count_metadata(transaction_manager::ptr tm, + superblock_detail::superblock const &sb, + block_counter &bc, + bool skip_metadata_snap = false); +} + +//---------------------------------------------------------------- + +#endif diff --git a/thin-provisioning/metadata_dumper.cc b/thin-provisioning/metadata_dumper.cc index 7bb9361..bbb5be8 100644 --- a/thin-provisioning/metadata_dumper.cc +++ b/thin-provisioning/metadata_dumper.cc @@ -88,8 +88,13 @@ namespace { class details_extractor : public device_tree_detail::device_visitor { public: + details_extractor(dump_options const &opts) + : opts_(opts) { + } + void visit(block_address dev_id, device_tree_detail::device_details const &dd) { - dd_.insert(make_pair(dev_id, dd)); + if (opts_.selected_dev(dev_id)) + dd_.insert(make_pair(dev_id, dd)); } dd_map const &get_details() const { @@ -97,6 +102,7 @@ namespace { } private: + dump_options const &opts_; dd_map dd_; }; @@ -161,21 +167,24 @@ namespace { class mapping_tree_emitter : public mapping_tree_detail::device_visitor { public: - mapping_tree_emitter(metadata::ptr md, + mapping_tree_emitter(dump_options const &opts, + metadata::ptr md, emitter::ptr e, dd_map const &dd, - bool repair, mapping_tree_detail::damage_visitor::ptr damage_policy) - : md_(md), + : opts_(opts), + md_(md), e_(e), dd_(dd), - repair_(repair), damage_policy_(damage_policy) { } void visit(btree_path const &path, block_address tree_root) { block_address dev_id = path[0]; + if (!opts_.selected_dev(dev_id)) + return; + dd_map::const_iterator it = dd_.find(path[0]); if (it != dd_.end()) { device_tree_detail::device_details const &d = it->second; @@ -186,7 +195,8 @@ namespace { d.snapshotted_time_); try { - emit_mappings(tree_root); + if (!opts_.skip_mappings_) + emit_mappings(dev_id, tree_root); } catch (exception &e) { cerr << e.what(); e_->end_device(); @@ -194,7 +204,7 @@ namespace { } e_->end_device(); - } else if (!repair_) { + } else if (!opts_.repair_) { ostringstream msg; msg << "mappings present for device " << dev_id << ", but it isn't present in device tree"; @@ -203,17 +213,17 @@ namespace { } private: - void emit_mappings(block_address subtree_root) { + void emit_mappings(uint64_t dev_id, block_address subtree_root) { mapping_emitter me(e_); single_mapping_tree tree(*md_->tm_, subtree_root, mapping_tree_detail::block_time_ref_counter(md_->data_sm_)); - walk_mapping_tree(tree, static_cast(me), *damage_policy_); + walk_mapping_tree(tree, dev_id, static_cast(me), *damage_policy_); } + dump_options const &opts_; metadata::ptr md_; emitter::ptr e_; dd_map const &dd_; - bool repair_; mapping_tree_detail::damage_visitor::ptr damage_policy_; }; @@ -234,21 +244,23 @@ namespace { //---------------------------------------------------------------- void -thin_provisioning::metadata_dump(metadata::ptr md, emitter::ptr e, bool repair) +thin_provisioning::metadata_dump(metadata::ptr md, emitter::ptr e, dump_options const &opts) { - details_extractor de; - device_tree_detail::damage_visitor::ptr dd_policy(details_damage_policy(repair)); + details_extractor de(opts); + device_tree_detail::damage_visitor::ptr dd_policy(details_damage_policy(opts.repair_)); walk_device_tree(*md->details_, de, *dd_policy); e->begin_superblock("", md->sb_.time_, md->sb_.trans_id_, + md->sb_.flags_, + md->sb_.version_, md->sb_.data_block_size_, get_nr_blocks(md), boost::optional()); { - mapping_tree_detail::damage_visitor::ptr md_policy(mapping_damage_policy(repair)); - mapping_tree_emitter mte(md, e, de.get_details(), repair, mapping_damage_policy(repair)); + mapping_tree_detail::damage_visitor::ptr md_policy(mapping_damage_policy(opts.repair_)); + mapping_tree_emitter mte(opts, md, e, de.get_details(), mapping_damage_policy(opts.repair_)); walk_mapping_tree(*md->mappings_top_level_, mte, *md_policy); } @@ -256,3 +268,15 @@ thin_provisioning::metadata_dump(metadata::ptr md, emitter::ptr e, bool repair) } //---------------------------------------------------------------- + +void +thin_provisioning::metadata_dump_subtree(metadata::ptr md, emitter::ptr e, bool repair, uint64_t subtree_root) { + mapping_emitter me(e); + single_mapping_tree tree(*md->tm_, subtree_root, + mapping_tree_detail::block_time_ref_counter(md->data_sm_)); + // FIXME: pass the current device id instead of zero + walk_mapping_tree(tree, 0, static_cast(me), + *mapping_damage_policy(repair)); +} + +//---------------------------------------------------------------- diff --git a/thin-provisioning/metadata_dumper.h b/thin-provisioning/metadata_dumper.h index c96d22e..15bf668 100644 --- a/thin-provisioning/metadata_dumper.h +++ b/thin-provisioning/metadata_dumper.h @@ -22,13 +22,43 @@ #include "emitter.h" #include "metadata.h" +#include +#include + //---------------------------------------------------------------- namespace thin_provisioning { + class dump_options { + public: + dump_options() + : repair_(false), + skip_mappings_(false) { + } + + bool selected_dev(uint64_t dev_id) const { + return !dev_filter_ || dev_filter_->count(dev_id); + } + + void select_dev(uint64_t dev_id) { + if (!dev_filter_) + dev_filter_ = dev_set(); + + dev_filter_->insert(dev_id); + } + + bool repair_; + bool skip_mappings_; + + using dev_set = std::set; + using maybe_dev_set = boost::optional; + maybe_dev_set dev_filter_; + }; + // Set the @repair flag if your metadata is corrupt, and you'd like // the dumper to do it's best to recover info. If not set, any // corruption encountered will cause an exception to be thrown. - void metadata_dump(metadata::ptr md, emitter::ptr e, bool repair); + void metadata_dump(metadata::ptr md, emitter::ptr e, dump_options const &opts); + void metadata_dump_subtree(metadata::ptr md, emitter::ptr e, bool repair, uint64_t subtree_root); } //---------------------------------------------------------------- diff --git a/thin-provisioning/pool_stream.cc b/thin-provisioning/pool_stream.cc new file mode 100644 index 0000000..41a0ab0 --- /dev/null +++ b/thin-provisioning/pool_stream.cc @@ -0,0 +1,151 @@ +// Copyright (C) 2015 Red Hat, Inc. All rights reserved. +// +// This file is part of the thin-provisioning-tools source. +// +// thin-provisioning-tools is free software: you can redistribute it +// and/or modify it under the terms of the GNU General Public License +// as published by the Free Software Foundation, either version 3 of +// the License, or (at your option) any later version. +// +// thin-provisioning-tools is distributed in the hope that it will be +// useful, but WITHOUT ANY WARRANTY; without even the implied warranty +// of MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +// GNU General Public License for more details. +// +// You should have received a copy of the GNU General Public License along +// with thin-provisioning-tools. If not, see +// . + +#include "thin-provisioning/pool_stream.h" +#include "persistent-data/data-structures/btree_damage_visitor.h" + +using namespace thin_provisioning; +using namespace persistent_data; + +//---------------------------------------------------------------- + +namespace { + class damage_visitor { + public: + virtual void visit(btree_path const &path, btree_detail::damage const &d) { + throw std::runtime_error("damage in mapping tree, please run thin_check"); + } + }; + + uint32_t const UNMAPPED = -1; +} + +//---------------------------------------------------------------- + +pool_stream::pool_stream(cache_stream &stream, + transaction_manager::ptr tm, superblock_detail::superblock const &sb, + block_address nr_blocks) + : stream_(stream), + block_to_thin_(nr_blocks, UNMAPPED), + nr_mapped_(0), + index_(0), + block_size_(sb.data_block_size_ * 512) +{ + init_rmap(tm, sb, nr_blocks); +} + +block_address +pool_stream::size() const +{ + return nr_mapped_ * block_size_; +} + +void +pool_stream::rewind() +{ + stream_.rewind(); + index_ = 0; +} + +bool +pool_stream::next(block_address count) +{ + while (count--) + if (!advance_one()) + return false; + + return true; +} + +bool +pool_stream::eof() const +{ + return stream_.eof(); +} + +chunk const & +pool_stream::get() +{ + return stream_.get(); +} + +void +pool_stream::put(chunk const &c) +{ + stream_.put(c); +} + +// FIXME: too big to return by value +vector +pool_stream::read_rmap(transaction_manager::ptr tm, + superblock_detail::superblock const &sb, + block_address nr_blocks) +{ + damage_visitor dv; + rmap_visitor rv; + + mapping_tree mtree(*tm, sb.data_mapping_root_, + mapping_tree_detail::block_traits::ref_counter(tm->get_sm())); + + rv.add_data_region(rmap_visitor::region(0, nr_blocks)); + + btree_visit_values(mtree, rv, dv); + rv.complete(); + cerr << "rmap size: " << rv.get_rmap().size() << "\n"; + return rv.get_rmap(); +} + +void +pool_stream::init_rmap(transaction_manager::ptr tm, + superblock_detail::superblock const &sb, + block_address nr_blocks) +{ + cerr << "reading rmap..."; + vector rmap = read_rmap(tm, sb, nr_blocks); + cerr << "done\n"; + + vector::const_iterator it; + set thins; + for (it = rmap.begin(); it != rmap.end(); ++it) { + rmap_region const &r = *it; + for (block_address b = r.data_begin; b != r.data_end; b++) + if (block_to_thin_[b] == UNMAPPED) { + nr_mapped_++; + block_to_thin_[b] = r.thin_dev; + } + thins.insert(r.thin_dev); + } + + cerr << nr_mapped_ << " mapped blocks\n"; + cerr << "there are " << thins.size() << " thin devices\n"; +} + +bool +pool_stream::advance_one() +{ + block_address count = 1; + + while (((index_ + count) < block_to_thin_.size()) && + (block_to_thin_[index_ + count] == UNMAPPED)) + count++; + + index_ += count; + return stream_.next(count); +} + +//---------------------------------------------------------------- diff --git a/thin-provisioning/pool_stream.h b/thin-provisioning/pool_stream.h new file mode 100644 index 0000000..e419842 --- /dev/null +++ b/thin-provisioning/pool_stream.h @@ -0,0 +1,65 @@ +// Copyright (C) 2015 Red Hat, Inc. All rights reserved. +// +// This file is part of the thin-provisioning-tools source. +// +// thin-provisioning-tools is free software: you can redistribute it +// and/or modify it under the terms of the GNU General Public License +// as published by the Free Software Foundation, either version 3 of +// the License, or (at your option) any later version. +// +// thin-provisioning-tools is distributed in the hope that it will be +// useful, but WITHOUT ANY WARRANTY; without even the implied warranty +// of MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +// GNU General Public License for more details. +// +// You should have received a copy of the GNU General Public License along +// with thin-provisioning-tools. If not, see +// . + +#ifndef POOL_STREAM_H +#define POOL_STREAM_H + +#include "thin-provisioning/cache_stream.h" +#include "thin-provisioning/rmap_visitor.h" +#include "thin-provisioning/superblock.h" + +//---------------------------------------------------------------- + +namespace thin_provisioning { + class pool_stream : public chunk_stream { + public: + pool_stream(cache_stream &stream, + transaction_manager::ptr tm, superblock_detail::superblock const &sb, + block_address nr_blocks); + + block_address size() const; + void rewind(); + bool next(block_address count = 1ull); + bool eof() const; + + chunk const &get(); + void put(chunk const &c); + + private: + typedef rmap_visitor::region region; + typedef rmap_visitor::rmap_region rmap_region; + + // FIXME: too big to return by value + vector read_rmap(transaction_manager::ptr tm, + superblock_detail::superblock const &sb, + block_address nr_blocks); + void init_rmap(transaction_manager::ptr tm, superblock_detail::superblock const &sb, + block_address nr_blocks); + bool advance_one(); + + cache_stream &stream_; + vector block_to_thin_; + block_address nr_mapped_; + block_address index_; + block_address block_size_; + }; +} + +//---------------------------------------------------------------- + +#endif diff --git a/thin-provisioning/restore_emitter.cc b/thin-provisioning/restore_emitter.cc index fd83f56..46b7f92 100644 --- a/thin-provisioning/restore_emitter.cc +++ b/thin-provisioning/restore_emitter.cc @@ -44,6 +44,8 @@ namespace { virtual void begin_superblock(std::string const &uuid, uint64_t time, uint64_t trans_id, + boost::optional flags, + boost::optional version, uint32_t data_block_size, uint64_t nr_data_blocks, boost::optional metadata_snap) { @@ -54,6 +56,8 @@ namespace { memcpy(&sb.uuid_, uuid.c_str(), std::min(sizeof(sb.uuid_), uuid.length())); sb.time_ = time; sb.trans_id_ = trans_id; + sb.flags_ = flags ? *flags : 0; + sb.version_ = version ? *version : 1; sb.data_block_size_ = data_block_size; sb.metadata_snap_ = metadata_snap ? *metadata_snap : 0; md_->data_sm_->extend(nr_data_blocks); @@ -78,10 +82,11 @@ namespace { if (device_exists(dev)) throw std::runtime_error("Device already exists"); - // Add entry to the details tree - uint64_t key[1] = {dev}; - device_tree_detail::device_details details = {mapped_blocks, trans_id, (uint32_t)creation_time, (uint32_t)snap_time}; - md_->details_->insert(key, details); + // Store the entry of the details tree + current_device_details_.mapped_blocks_ = 0; + current_device_details_.transaction_id_ = trans_id; + current_device_details_.creation_time_ = (uint32_t)creation_time; + current_device_details_.snapshotted_time_ = (uint32_t)snap_time; current_mapping_ = empty_mapping_->clone(); current_device_ = boost::optional(dev); @@ -90,6 +95,9 @@ namespace { virtual void end_device() { uint64_t key[1] = {*current_device_}; + // Add entry to the details tree + md_->details_->insert(key, current_device_details_); + md_->mappings_top_level_->insert(key, current_mapping_->get_root()); md_->mappings_->set_root(md_->mappings_top_level_->get_root()); // FIXME: ugly @@ -128,7 +136,8 @@ namespace { mapping_tree_detail::block_time bt; bt.block_ = data_block; bt.time_ = time; - current_mapping_->insert(key, bt); + current_device_details_.mapped_blocks_ += + static_cast(current_mapping_->insert(key, bt)); md_->data_sm_->inc(data_block); } @@ -142,13 +151,14 @@ namespace { bool device_exists(thin_dev_t dev) const { uint64_t key[1] = {dev}; device_tree::maybe_value v = md_->details_->lookup(key); - return v; + return !!v; } metadata::ptr md_; bool in_superblock_; block_address nr_data_blocks_; boost::optional current_device_; + device_tree_detail::device_details current_device_details_; single_mapping_tree::ptr current_mapping_; single_mapping_tree::ptr empty_mapping_; }; diff --git a/thin-provisioning/shared_library_emitter.cc b/thin-provisioning/shared_library_emitter.cc new file mode 100644 index 0000000..58f12d2 --- /dev/null +++ b/thin-provisioning/shared_library_emitter.cc @@ -0,0 +1,29 @@ +#include "thin-provisioning/shared_library_emitter.h" + +#include +#include + +using namespace std; +using namespace thin_provisioning; + +//---------------------------------------------------------------- + +emitter::ptr +thin_provisioning::create_custom_emitter(string const &shared_lib, ostream &out) +{ + emitter::ptr (*create_fn)(ostream &out); + void *handle = dlopen(shared_lib.c_str(), RTLD_LAZY); + if (!handle) + throw runtime_error(dlerror()); + + dlerror(); // Clear any existing error + create_fn = reinterpret_cast(dlsym(handle, "create_emitter")); + + char *error = dlerror(); + if (error) + throw runtime_error(error); + + return create_fn(out); +} + +//---------------------------------------------------------------- diff --git a/thin-provisioning/shared_library_emitter.h b/thin-provisioning/shared_library_emitter.h new file mode 100644 index 0000000..e0f8b95 --- /dev/null +++ b/thin-provisioning/shared_library_emitter.h @@ -0,0 +1,14 @@ +#ifndef THIN_PROVISIONING_SHARED_LIBRARY_EMITTER_H +#define THIN_PROVISIONING_SHARED_LIBRARY_EMITTER_H + +#include "thin-provisioning/emitter.h" + +//---------------------------------------------------------------- + +namespace thin_provisioning { + emitter::ptr create_custom_emitter(std::string const &shared_lib, std::ostream &out); +} + +//---------------------------------------------------------------- + +#endif diff --git a/thin-provisioning/static_library_emitter.cc b/thin-provisioning/static_library_emitter.cc new file mode 100644 index 0000000..973ec17 --- /dev/null +++ b/thin-provisioning/static_library_emitter.cc @@ -0,0 +1,20 @@ +#include "thin-provisioning/shared_library_emitter.h" +#include +#include "contrib/tmakatos_emitter.h" +#include + +using namespace std; +using namespace thin_provisioning; + +//---------------------------------------------------------------- + +emitter::ptr +thin_provisioning::create_custom_emitter(string const &shared_lib, ostream &out) +{ + if (shared_lib != "tmakatos_emitter.so") + throw runtime_error(shared_lib + ": no such emitter"); + + return emitter::ptr(new tmakatos_emitter::binary_emitter(out)); +} + +//---------------------------------------------------------------- diff --git a/thin-provisioning/superblock.cc b/thin-provisioning/superblock.cc index 9258b53..fe6ef06 100644 --- a/thin-provisioning/superblock.cc +++ b/thin-provisioning/superblock.cc @@ -88,11 +88,23 @@ namespace { struct sb_validator : public bcache::validator { virtual void check(void const *raw, block_address location) const { + superblock_disk const *sbd = reinterpret_cast(raw); + crc32c sum(SUPERBLOCK_CSUM_SEED); + sum.append(&sbd->flags_, MD_BLOCK_SIZE - sizeof(uint32_t)); + if (sum.get_sum() != to_cpu(sbd->csum_)) { + ostringstream out; + out << "bad checksum in superblock, wanted " << sum.get_sum(); + throw checksum_error(out.str()); + } + } + + virtual bool check_raw(void const *raw) const { superblock_disk const *sbd = reinterpret_cast(raw); crc32c sum(SUPERBLOCK_CSUM_SEED); sum.append(&sbd->flags_, MD_BLOCK_SIZE - sizeof(uint32_t)); if (sum.get_sum() != to_cpu(sbd->csum_)) - throw checksum_error("bad checksum in superblock"); + return false; + return true; } virtual void prepare(void *raw, block_address location) const { @@ -146,22 +158,32 @@ namespace thin_provisioning { } } - superblock_detail::superblock read_superblock(block_manager<>::ptr bm, block_address location) + superblock_detail::superblock read_superblock(block_manager<> const &bm, block_address location) { using namespace superblock_detail; superblock sb; - block_manager<>::read_ref r = bm->read_lock(location, superblock_validator()); + block_manager<>::read_ref r = bm.read_lock(location, superblock_validator()); superblock_disk const *sbd = reinterpret_cast(r.data()); superblock_traits::unpack(*sbd, sb); return sb; } + superblock_detail::superblock read_superblock(block_manager<>::ptr bm, block_address location) + { + return read_superblock(*bm, location); + } + superblock_detail::superblock read_superblock(block_manager<>::ptr bm) { return read_superblock(bm, SUPERBLOCK_LOCATION); } + superblock_detail::superblock read_superblock(block_manager<> const &bm) + { + return read_superblock(bm, SUPERBLOCK_LOCATION); + } + void write_superblock(block_manager<>::ptr bm, superblock_detail::superblock const &sb) { block_manager<>::write_ref w = bm->write_lock(SUPERBLOCK_LOCATION, superblock_validator()); diff --git a/thin-provisioning/superblock.h b/thin-provisioning/superblock.h index f527a15..5e1ba2c 100644 --- a/thin-provisioning/superblock.h +++ b/thin-provisioning/superblock.h @@ -128,6 +128,8 @@ namespace thin_provisioning { // FIXME: should we put init_superblock in here too? + // FIXME: make the bm const, and pass by reference rather than ptr + superblock_detail::superblock read_superblock(persistent_data::block_manager<> const &bm); superblock_detail::superblock read_superblock(persistent_data::block_manager<>::ptr bm); superblock_detail::superblock read_superblock(persistent_data::block_manager<>::ptr bm, persistent_data::block_address location); diff --git a/thin-provisioning/thin_check.cc b/thin-provisioning/thin_check.cc index 4a3215c..d4fedaa 100644 --- a/thin-provisioning/thin_check.cc +++ b/thin-provisioning/thin_check.cc @@ -31,6 +31,7 @@ #include "persistent-data/file_utils.h" #include "thin-provisioning/device_tree.h" #include "thin-provisioning/mapping_tree.h" +#include "thin-provisioning/metadata_counter.h" #include "thin-provisioning/superblock.h" #include "thin-provisioning/commands.h" @@ -169,58 +170,13 @@ namespace { bool clear_needs_check_flag_on_success; }; - void count_trees(transaction_manager::ptr tm, - superblock_detail::superblock &sb, - block_counter &bc) { - - // Count the device tree - { - noop_value_counter vc; - device_tree dtree(*tm, sb.device_details_root_, - device_tree_detail::device_details_traits::ref_counter()); - count_btree_blocks(dtree, bc, vc); - } - - // Count the mapping tree - { - noop_value_counter vc; - mapping_tree mtree(*tm, sb.data_mapping_root_, - mapping_tree_detail::block_traits::ref_counter(tm->get_sm())); - count_btree_blocks(mtree, bc, vc); - } - } - error_state check_space_map_counts(flags const &fs, nested_output &out, superblock_detail::superblock &sb, block_manager<>::ptr bm, transaction_manager::ptr tm) { block_counter bc; - // Count the superblock - bc.inc(superblock_detail::SUPERBLOCK_LOCATION); - count_trees(tm, sb, bc); - - // Count the metadata snap, if present - if (sb.metadata_snap_ != superblock_detail::SUPERBLOCK_LOCATION) { - bc.inc(sb.metadata_snap_); - - superblock_detail::superblock snap = read_superblock(bm, sb.metadata_snap_); - count_trees(tm, snap, bc); - } - - // Count the metadata space map - { - persistent_space_map::ptr metadata_sm = - open_metadata_sm(*tm, static_cast(&sb.metadata_space_map_root_)); - metadata_sm->count_metadata(bc); - } - - // Count the data space map - { - persistent_space_map::ptr data_sm = - open_disk_sm(*tm, static_cast(&sb.data_space_map_root_)); - data_sm->count_metadata(bc); - } + count_metadata(tm, sb, bc); // Finally we need to check the metadata space map agrees // with the counts we've just calculated. diff --git a/thin-provisioning/thin_delta.cc b/thin-provisioning/thin_delta.cc index bc40f76..346d62c 100644 --- a/thin-provisioning/thin_delta.cc +++ b/thin-provisioning/thin_delta.cc @@ -464,31 +464,31 @@ namespace local { // We iterate through both sets of mappings in parallel // noting any differences. - mapping_stream ls(left); - mapping_stream rs(right); + mapping_stream ls{left}; + mapping_stream rs{right}; while (ls.more_mappings() && rs.more_mappings()) { - mapping const &lm = ls.get_mapping(); - mapping const &rm = rs.get_mapping(); + auto &lm = ls.get_mapping(); + auto &rm = rs.get_mapping(); if (lm.vbegin_ < rm.vbegin_) { - uint64_t delta = min(lm.len_, rm.vbegin_ - lm.vbegin_); + auto delta = min(lm.len_, rm.vbegin_ - lm.vbegin_); e.left_only(lm.vbegin_, lm.dbegin_, delta); ls.consume(delta); } else if (lm.vbegin_ > rm.vbegin_) { - uint64_t delta = min(rm.len_, lm.vbegin_ - rm.vbegin_); + auto delta = min(rm.len_, lm.vbegin_ - rm.vbegin_); e.right_only(rm.vbegin_, rm.dbegin_, delta); rs.consume(delta); } else if (lm.dbegin_ != rm.dbegin_) { - uint64_t delta = min(lm.len_, rm.len_); + auto delta = min(lm.len_, rm.len_); e.blocks_differ(lm.vbegin_, lm.dbegin_, rm.dbegin_, delta); ls.consume(delta); rs.consume(delta); } else { - uint64_t delta = min(lm.len_, rm.len_); + auto delta = min(lm.len_, rm.len_); e.blocks_same(lm.vbegin_, lm.dbegin_, delta); ls.consume(delta); rs.consume(delta); @@ -496,13 +496,13 @@ namespace local { } while (ls.more_mappings()) { - mapping const &lm = ls.get_mapping(); + auto &lm = ls.get_mapping(); e.left_only(lm.vbegin_, lm.dbegin_, lm.len_); ls.consume(lm.len_); } while (rs.more_mappings()) { - mapping const &rm = rs.get_mapping(); + auto &rm = rs.get_mapping(); e.right_only(rm.vbegin_, rm.dbegin_, rm.len_); rs.consume(rm.len_); } diff --git a/thin-provisioning/thin_dump.cc b/thin-provisioning/thin_dump.cc index 171c263..c6a964a 100644 --- a/thin-provisioning/thin_dump.cc +++ b/thin-provisioning/thin_dump.cc @@ -21,13 +21,14 @@ #include #include -#include "human_readable_format.h" -#include "metadata_dumper.h" -#include "metadata.h" -#include "xml_format.h" -#include "version.h" -#include "thin-provisioning/commands.h" #include "persistent-data/file_utils.h" +#include "thin-provisioning/commands.h" +#include "thin-provisioning/human_readable_format.h" +#include "thin-provisioning/metadata.h" +#include "thin-provisioning/metadata_dumper.h" +#include "thin-provisioning/shared_library_emitter.h" +#include "thin-provisioning/xml_format.h" +#include "version.h" using namespace boost; using namespace persistent_data; @@ -38,11 +39,13 @@ namespace { // FIXME: put the path into the flags struct flags { flags() - : repair(false), + : format("xml"), use_metadata_snap(false) { } - bool repair; + dump_options opts; + + string format; bool use_metadata_snap; optional snap_location; }; @@ -54,23 +57,36 @@ namespace { return md; } - int dump_(string const &path, ostream &out, string const &format, struct flags &flags) { + bool begins_with(string const &str, string const &prefix) { + return str.substr(0, prefix.length()) == prefix; + } + + emitter::ptr create_emitter(string const &format, ostream &out) { + emitter::ptr e; + + if (format == "xml") + e = create_xml_emitter(out); + + else if (format == "human_readable") + e = create_human_readable_emitter(out); + + else if (begins_with(format, "custom=")) + e = create_custom_emitter(format.substr(7), out); + + else { + ostringstream msg; + msg << "unknown format '" << format << "'"; + throw runtime_error(msg.str()); + } + + return e; + } + + int dump_(string const &path, ostream &out, struct flags &flags) { try { metadata::ptr md = open_metadata(path, flags); - emitter::ptr e; - - if (format == "xml") - e = create_xml_emitter(out); - - else if (format == "human_readable") - e = create_human_readable_emitter(out); - - else { - cerr << "unknown format '" << format << "'" << endl; - exit(1); - } - - metadata_dump(md, e, flags.repair); + emitter::ptr e = create_emitter(flags.format, out); + metadata_dump(md, e, flags.opts); } catch (std::exception &e) { cerr << e.what() << endl; @@ -80,12 +96,12 @@ namespace { return 0; } - int dump(string const &path, char const *output, string const &format, struct flags &flags) { + int dump(string const &path, char const *output, struct flags &flags) { if (output) { ofstream out(output); - return dump_(path, out, format, flags); + return dump_(path, out, flags); } else - return dump_(path, cout, format, flags); + return dump_(path, cout, flags); } } @@ -99,13 +115,14 @@ thin_dump_cmd::thin_dump_cmd() void thin_dump_cmd::usage(std::ostream &out) const { - out << "Usage: " << get_name() << " [options] {device|file}" << endl - << "Options:" << endl - << " {-h|--help}" << endl - << " {-f|--format} {xml|human_readable}" << endl - << " {-r|--repair}" << endl - << " {-m|--metadata-snap} [block#]" << endl - << " {-o }" << endl + out << "Usage: " << get_name() << " [options] {device|file}\n" + << "Options:\n" + << " {-h|--help}\n" + << " {-f|--format} {xml|human_readable|custom}\n" + << " {-r|--repair}\n" + << " {-m|--metadata-snap} [block#]\n" + << " {-o }\n" + << " {--dev-id} \n" << " {-V|--version}" << endl; } @@ -116,8 +133,8 @@ thin_dump_cmd::run(int argc, char **argv) char const *output = NULL; const char shortopts[] = "hm::o:f:rV"; char *end_ptr; - string format = "xml"; block_address metadata_snap = 0; + uint64_t dev_id; struct flags flags; const struct option longopts[] = { @@ -126,6 +143,8 @@ thin_dump_cmd::run(int argc, char **argv) { "output", required_argument, NULL, 'o'}, { "format", required_argument, NULL, 'f' }, { "repair", no_argument, NULL, 'r'}, + { "dev-id", required_argument, NULL, 1 }, + { "skip-mappings", no_argument, NULL, 2 }, { "version", no_argument, NULL, 'V'}, { NULL, no_argument, NULL, 0 } }; @@ -137,11 +156,11 @@ thin_dump_cmd::run(int argc, char **argv) return 0; case 'f': - format = optarg; + flags.format = optarg; break; case 'r': - flags.repair = true; + flags.opts.repair_ = true; break; case 'm': @@ -150,7 +169,7 @@ thin_dump_cmd::run(int argc, char **argv) // FIXME: deprecate this option metadata_snap = strtoull(optarg, &end_ptr, 10); if (end_ptr == optarg) { - cerr << "couldn't parse " << endl; + cerr << "couldn't parse " << endl; usage(cerr); return 1; } @@ -163,6 +182,20 @@ thin_dump_cmd::run(int argc, char **argv) output = optarg; break; + case 1: + dev_id = strtoull(optarg, &end_ptr, 10); + if (end_ptr == optarg) { + cerr << "couldn't parse \n"; + usage(cerr); + return 1; + } + flags.opts.select_dev(dev_id); + break; + + case 2: + flags.opts.skip_mappings_ = true; + break; + case 'V': cout << THIN_PROVISIONING_TOOLS_VERSION << endl; return 0; @@ -179,7 +212,7 @@ thin_dump_cmd::run(int argc, char **argv) return 1; } - return dump(argv[optind], output, format, flags); + return dump(argv[optind], output, flags); } //---------------------------------------------------------------- diff --git a/thin-provisioning/thin_generate_metadata.cc b/thin-provisioning/thin_generate_metadata.cc new file mode 100644 index 0000000..6eb3c62 --- /dev/null +++ b/thin-provisioning/thin_generate_metadata.cc @@ -0,0 +1,215 @@ +// Copyright (C) 2016 Red Hat, Inc. All rights reserved. +// +// This file is part of the thin-provisioning-tools source. +// +// thin-provisioning-tools is free software: you can redistribute it +// and/or modify it under the terms of the GNU General Public License +// as published by the Free Software Foundation, either version 3 of +// the License, or (at your option) any later version. +// +// thin-provisioning-tools is distributed in the hope that it will be +// useful, but WITHOUT ANY WARRANTY; without even the implied warranty +// of MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +// GNU General Public License for more details. +// +// You should have received a copy of the GNU General Public License along +// with thin-provisioning-tools. If not, see +// . + +#include "base/output_file_requirements.h" +#include "persistent-data/file_utils.h" +#include "thin-provisioning/commands.h" +#include "thin-provisioning/emitter.h" +#include "thin-provisioning/human_readable_format.h" +#include "thin-provisioning/metadata.h" +#include "thin-provisioning/restore_emitter.h" +#include "thin-provisioning/xml_format.h" +#include "version.h" + +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include + +using namespace boost; +using namespace persistent_data; +using namespace std; +using namespace thin_provisioning; + +//---------------------------------------------------------------- + +namespace { + struct flags { + flags() + : data_block_size(128), + nr_data_blocks(10240), + nr_thins(1), + blocks_per_thin(1024), + run_lengths(1024) { + } + + block_address data_block_size; + block_address nr_data_blocks; + unsigned nr_thins; + block_address blocks_per_thin; + block_address run_lengths; + optional output; + }; + + // This is how we stir in some entropy. It mixes up the data + // device. + class shuffler { + public: + shuffler(block_address nr_blocks, unsigned run_lengths) + : nr_blocks_(nr_blocks / run_lengths), + run_lengths_(run_lengths) { + } + + block_address map(block_address b) const { + return reverse(b / run_lengths_) + (b % run_lengths_); + } + + private: + block_address reverse(block_address b) const { + return nr_blocks_ - b - 1ull; + } + + block_address nr_blocks_; + block_address run_lengths_; + }; + + void generate_device(emitter::ptr e, shuffler const &s, uint32_t dev_id, + block_address nr_blocks, block_address base) { + + e->begin_device(dev_id, nr_blocks, 0, 0, 0); + for (unsigned b = 0; b < nr_blocks; b++) + e->single_map(b, s.map(base + b), 0); + e->end_device(); + } + + void generate_metadata(flags const &fs, emitter::ptr e) { + e->begin_superblock("fake metadata", 0, 0, optional(), optional(), + fs.data_block_size, fs.nr_data_blocks, optional()); + + shuffler s(fs.nr_data_blocks, fs.run_lengths); + for (unsigned i = 0; i < fs.nr_thins; i++) + generate_device(e, s, i, fs.blocks_per_thin, i * fs.blocks_per_thin); + + e->end_superblock(); + } + + int create_metadata(flags const &fs) { + try { + // The block size gets updated by the restorer. + block_manager<>::ptr bm(open_bm(*fs.output, block_manager<>::READ_WRITE)); + metadata::ptr md(new metadata(bm, metadata::CREATE, 128, 0)); + emitter::ptr restorer = create_restore_emitter(md); + + generate_metadata(fs, restorer); + + } catch (std::exception &e) { + cerr << e.what() << endl; + return 1; + } + + return 0; + } +} + +//---------------------------------------------------------------- + +thin_generate_metadata_cmd::thin_generate_metadata_cmd() + : command("thin_generate_metadata") +{ +} + +void +thin_generate_metadata_cmd::usage(std::ostream &out) const +{ + out << "Usage: " << get_name() << " [options]\n" + << "Options:\n" + << " {-h|--help}\n" + << " --data-block-size \n" + << " --nr-data-blocks \n" + << " --nr-thins \n" + << " --blocks-per-thin \n" + << " --run-lengths \n" + << " {-o|--output} \n" + << " {-V|--version}" << endl; +} + +int +thin_generate_metadata_cmd::run(int argc, char **argv) +{ + int c; + struct flags fs; + const char *shortopts = "hi:o:qV"; + const struct option longopts[] = { + { "help", no_argument, NULL, 'h'}, + { "output", required_argument, NULL, 'o'}, + { "data-block-size", required_argument, NULL, 1}, + { "nr-data-blocks", required_argument, NULL, 2}, + { "nr-thins", required_argument, NULL, 3}, + { "blocks-per-thin", required_argument, NULL, 4}, + { "run-lengths", required_argument, NULL, 5}, + { "version", no_argument, NULL, 'V'}, + { NULL, no_argument, NULL, 0 } + }; + + while ((c = getopt_long(argc, argv, shortopts, longopts, NULL)) != -1) { + switch(c) { + case 'h': + usage(cout); + return 0; + + case 'o': + fs.output = optarg; + break; + + case 1: + fs.data_block_size = parse_uint64(optarg, "data block size"); + break; + + case 2: + fs.nr_data_blocks = parse_uint64(optarg, "nr data blocks"); + break; + + case 3: + fs.nr_thins = parse_uint64(optarg, "nr thins"); + break; + + case 4: + fs.blocks_per_thin = parse_uint64(optarg, "blocks per thin"); + break; + + case 5: + fs.run_lengths = parse_uint64(optarg, "run lengths"); + break; + + case 'V': + cout << THIN_PROVISIONING_TOOLS_VERSION << endl; + return 0; + + default: + usage(cerr); + return 1; + } + } + + if (!fs.output) { + cerr << "No output file provided.\n\n"; + usage(cerr); + return 1; + } else + check_output_file_requirements(*fs.output); + + return create_metadata(fs); +} + +//---------------------------------------------------------------- diff --git a/thin-provisioning/thin_ll_dump.cc b/thin-provisioning/thin_ll_dump.cc new file mode 100644 index 0000000..94f6a08 --- /dev/null +++ b/thin-provisioning/thin_ll_dump.cc @@ -0,0 +1,417 @@ +// This file is part of the thin-provisioning-tools source. +// +// thin-provisioning-tools is free software: you can redistribute it +// and/or modify it under the terms of the GNU General Public License +// as published by the Free Software Foundation, either version 3 of +// the License, or (at your option) any later version. +// +// thin-provisioning-tools is distributed in the hope that it will be +// useful, but WITHOUT ANY WARRANTY; without even the implied warranty +// of MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +// GNU General Public License for more details. +// +// You should have received a copy of the GNU General Public License along +// with thin-provisioning-tools. If not, see +// . + +#include +#include +#include +#include +#include + +#include "base/indented_stream.h" +#include "persistent-data/file_utils.h" +#include "persistent-data/data-structures/btree.h" +#include "persistent-data/data-structures/btree_counter.h" +#include "persistent-data/data-structures/btree_damage_visitor.h" +#include "persistent-data/data-structures/btree_node_checker.h" +#include "persistent-data/data-structures/simple_traits.h" +#include "persistent-data/space-maps/core.h" +#include "persistent-data/space-maps/disk_structures.h" +#include "thin-provisioning/metadata.h" +#include "thin-provisioning/metadata_counter.h" +#include "thin-provisioning/commands.h" +#include "version.h" + +using namespace thin_provisioning; +using namespace persistent_data; + +//---------------------------------------------------------------- + +namespace { + transaction_manager::ptr + open_tm(block_manager<>::ptr bm) { + space_map::ptr sm(new core_map(bm->get_nr_blocks())); + sm->inc(superblock_detail::SUPERBLOCK_LOCATION); + transaction_manager::ptr tm(new transaction_manager(bm, sm)); + return tm; + } +} + +//--------------------------------------------------------------------------- + +namespace { + struct node_info { + uint64_t blocknr_; + uint32_t flags_; + uint64_t key_begin_; + uint64_t key_end_; + uint64_t nr_entries_; + uint32_t value_size_; + }; + + //------------------------------------------------------------------- + + struct btree_node_checker { + typedef boost::shared_ptr ptr; + virtual ~btree_node_checker() {} + virtual bool check(node_ref &n) = 0; + }; + + struct unvisited_btree_node_filter: public btree_node_checker { + unvisited_btree_node_filter(block_counter const &bc) + : nv_(create_btree_node_validator()), bc_(bc) { + } + + virtual bool check(node_ref &n) { + uint32_t flags = to_cpu(n.raw()->header.flags); + if ((n.get_value_size() == sizeof(mapping_tree_detail::block_traits::disk_type) || + n.get_value_size() == sizeof(device_tree_detail::device_details_traits::disk_type)) && + !bc_.get_count(n.get_location()) && + checker_.check_block_nr(n) && + (((flags & INTERNAL_NODE) && !(flags & LEAF_NODE)) || + (flags & LEAF_NODE)) && + nv_->check_raw(n.raw()) && + checker_.check_max_entries(n) && + checker_.check_nr_entries(n, true) && + checker_.check_ordered_keys(n)) + return true; + return false; + } + + bcache::validator::ptr nv_; + block_counter const &bc_; + btree_detail::btree_node_checker checker_; + }; + + //------------------------------------------------------------------- + + void find_btree_nodes(block_manager<>::ptr bm, + block_address begin, + block_address end, + btree_node_checker::ptr checker, + base::run_set &found) { + using namespace persistent_data; + + for (block_address b = begin; b < end; ++b) { + block_manager<>::read_ref rr = bm->read_lock(b); + node_ref n = btree_detail::to_node(rr); + + if (checker->check(n)) + found.add(b); + } + } + + //------------------------------------------------------------------- + + bool first_key_cmp(node_info const &lhs, node_info const &rhs) { + return lhs.key_begin_ < rhs.key_begin_; + } + + template + void convert_to_node_info(node_ref const &n, node_info &ni) { + ni.blocknr_ = n.get_location(); + ni.flags_ = to_cpu(n.raw()->header.flags); + if ((ni.nr_entries_ = n.get_nr_entries()) > 0) { + ni.key_begin_ = n.key_at(0); + ni.key_end_ = n.key_at(n.get_nr_entries() - 1); + } + ni.value_size_ = n.get_value_size(); + } + + void output_node_info(indented_stream &out, node_info const &ni) { + out.indent(); + out << "" << endl; + } + + //------------------------------------------------------------------- + + class ll_mapping_tree_emitter : public mapping_tree_detail::device_visitor { + public: + ll_mapping_tree_emitter(block_manager<>::ptr bm, + indented_stream &out) + : bm_(bm), out_(out) { + } + + void visit(btree_path const &path, block_address tree_root) { + out_.indent(); + out_ << "" << endl; + out_.inc(); + + // Do not throw exception. Process the next entry inside the current node. + try { + block_manager<>::read_ref rr = bm_->read_lock(tree_root); + node_ref n = btree_detail::to_node(rr); + node_info ni; + convert_to_node_info(n, ni); + output_node_info(out_, ni); + } catch (std::exception &e) { + cerr << e.what() << endl; + } + + out_.dec(); + out_.indent(); + out_ << "" << endl; + } + private: + block_manager<>::ptr bm_; + indented_stream& out_; + }; + + //------------------------------------------------------------------- + + struct flags { + flags() : use_metadata_snap_(false) { + } + + bool use_metadata_snap_; + boost::optional metadata_snap_; + boost::optional data_mapping_root_; + boost::optional device_details_root_; + boost::optional scan_begin_; + boost::optional scan_end_; + }; + + int low_level_dump_(string const &input, + std::ostream &output, + flags const &f) { + block_manager<>::ptr bm = open_bm(input, block_manager<>::READ_ONLY); + + block_address scan_begin = f.scan_begin_ ? *f.scan_begin_ : 0; + block_address scan_end = f.scan_end_ ? *f.scan_end_ : bm->get_nr_blocks(); + + // Allow to read superblock at arbitrary location for low-level dump, + // without checking equality between the given metadata_snap and sb.metadata_snap_ + superblock_detail::superblock sb = read_superblock(bm, superblock_detail::SUPERBLOCK_LOCATION); + if (f.use_metadata_snap_) { + sb = f.metadata_snap_ ? + read_superblock(bm, *f.metadata_snap_) : + read_superblock(bm, sb.metadata_snap_); + } + // override sb.data_mapping_root_ + if (f.data_mapping_root_) + sb.data_mapping_root_ = *f.data_mapping_root_; + // override sb.device_details_root_ + if (f.device_details_root_) + sb.device_details_root_ = *f.device_details_root_; + + transaction_manager::ptr tm = open_tm(bm); + + indented_stream out(output); + + out.indent(); + out << "" << endl; + out.inc(); + + // output the top-level data mapping tree + ll_mapping_tree_emitter ll_mte(tm->get_bm(), out); + dev_tree dtree(*tm, sb.data_mapping_root_, + mapping_tree_detail::mtree_traits::ref_counter(tm)); + noop_damage_visitor noop_dv; + btree_visit_values(dtree, ll_mte, noop_dv); + + out.dec(); + out.indent(); + out << "" << endl; + + // find orphans + binary_block_counter bc; + bc.inc(superblock_detail::SUPERBLOCK_LOCATION); + count_metadata(tm, sb, bc, true); + btree_node_checker::ptr filter = btree_node_checker::ptr( + new unvisited_btree_node_filter(bc)); + base::run_set orphans; + find_btree_nodes(bm, scan_begin, scan_end, filter, orphans); + + // sort orphans + std::vector nodes; + for (base::run_set::const_iterator it = orphans.begin(); + it != orphans.end(); + ++it) { + if (it->begin_ && it->end_) { + for (block_address b = *it->begin_; b < *it->end_; ++b) { + block_manager<>::read_ref rr = bm->read_lock(b); + node_ref n = btree_detail::to_node(rr); + nodes.push_back(node_info()); + convert_to_node_info(n, nodes.back()); + } + } + } + std::sort(nodes.begin(), nodes.end(), first_key_cmp); + + // output orphans + out.indent(); + out << "" << std::endl; + out.inc(); + + for (size_t i = 0; i < nodes.size(); ++i) + output_node_info(out, nodes[i]); + + out.dec(); + out.indent(); + out << "" << std::endl; + + return 0; + } + + int low_level_dump(string const &input, + boost::optional output, + flags const &f) { + try { + if (output) { + ofstream out(output->c_str()); + low_level_dump_(input, out, f); + } else + low_level_dump_(input, cout, f); + } catch (std::exception &e) { + cerr << e.what() << endl; + return 1; + } + return 0; + } +} + +//--------------------------------------------------------------------------- + +thin_ll_dump_cmd::thin_ll_dump_cmd() + : command("thin_ll_dump") +{ +} + +void +thin_ll_dump_cmd::usage(ostream &out) const { + out << "Usage: " << get_name() << " [options] {device|file}" << endl + << "Options:" << endl + << " {-h|--help}" << endl + << " {-m|--metadata-snap}[block#]" << endl + << " {-o|--output} " << endl + << " {--begin} " << endl + << " {--end} " << endl + << " {--data-mapping-root} " << endl + << " {--device-details-root} " << endl + << " {-V|--version}" << endl; +} + +int +thin_ll_dump_cmd::run(int argc, char **argv) +{ + const char shortopts[] = "hm::o:V"; + const struct option longopts[] = { + { "help", no_argument, NULL, 'h'}, + { "metadata-snap", optional_argument, NULL, 'm'}, + { "output", required_argument, NULL, 'o'}, + { "version", no_argument, NULL, 'V'}, + { "begin", required_argument, NULL, 1}, + { "end", required_argument, NULL, 2}, + { "data-mapping-root", required_argument, NULL, 3}, + { "device-details-root", required_argument, NULL, 4}, + { NULL, no_argument, NULL, 0 } + }; + boost::optional output; + flags f; + + int c; + while ((c = getopt_long(argc, argv, shortopts, longopts, NULL)) != -1) { + switch(c) { + case 'h': + usage(cout); + return 0; + + case 'm': + f.use_metadata_snap_ = true; + if (optarg) { + try { + f.metadata_snap_ = boost::lexical_cast(optarg); + } catch (std::exception &e) { + cerr << e.what() << endl; + return 1; + } + } + break; + + case 'o': + output = optarg; + break; + + case 'V': + cout << THIN_PROVISIONING_TOOLS_VERSION << endl; + return 0; + + case 1: + try { + f.scan_begin_ = boost::lexical_cast(optarg); + } catch (std::exception &e) { + cerr << e.what() << endl; + return 1; + } + break; + + case 2: + try { + f.scan_end_ = boost::lexical_cast(optarg); + } catch (std::exception &e) { + cerr << e.what() << endl; + return 1; + } + break; + + case 3: + try { + f.data_mapping_root_ = boost::lexical_cast(optarg); + } catch (std::exception &e) { + cerr << e.what() << endl; + return 1; + } + break; + + case 4: + try { + f.device_details_root_ = boost::lexical_cast(optarg); + } catch (std::exception &e) { + cerr << e.what() << endl; + return 1; + } + break; + + default: + usage(cerr); + return 1; + } + } + + if (argc == optind) { + cerr << "No input file provided." << endl; + usage(cerr); + return 1; + } + + if (f.scan_begin_ && f.scan_end_ && (*f.scan_end_ <= *f.scan_begin_)) { + cerr << "badly formed region (end <= begin)" << endl; + usage(cerr); + return 1; + } + + return low_level_dump(argv[optind], output, f); +} + +//--------------------------------------------------------------------------- diff --git a/thin-provisioning/thin_ll_restore.cc b/thin-provisioning/thin_ll_restore.cc new file mode 100644 index 0000000..ff62298 --- /dev/null +++ b/thin-provisioning/thin_ll_restore.cc @@ -0,0 +1,287 @@ +// This file is part of the thin-provisioning-tools source. +// +// thin-provisioning-tools is free software: you can redistribute it +// and/or modify it under the terms of the GNU General Public License +// as published by the Free Software Foundation, either version 3 of +// the License, or (at your option) any later version. +// +// thin-provisioning-tools is distributed in the hope that it will be +// useful, but WITHOUT ANY WARRANTY; without even the implied warranty +// of MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +// GNU General Public License for more details. +// +// You should have received a copy of the GNU General Public License along +// with thin-provisioning-tools. If not, see +// . + +#include "base/output_file_requirements.h" +#include "base/xml_utils.h" +#include "metadata_dumper.h" +#include "metadata.h" +#include "persistent-data/file_utils.h" +#include "persistent-data/space-maps/disk_structures.h" +#include "restore_emitter.h" +#include "xml_format.h" +#include "thin-provisioning/commands.h" +#include "version.h" + +#include +#include +#include + +using namespace persistent_data; +using namespace std; +using namespace thin_provisioning; +using namespace xml_utils; + +//---------------------------------------------------------------- + +namespace { + struct user_data { + block_manager<>::ptr input_bm_; + block_manager<>::ptr output_bm_; + + metadata::ptr md_; + XML_Parser parser_; + emitter::ptr emitter_; + }; + + void open_resources(user_data &ud, attributes const &attr) { + boost::optional val; + + // open the input metadata + // Allow to read superblock at arbitrary location for low-level restore + block_address sb_location = (val = get_opt_attr(attr, "blocknr")) ? + *val : superblock_detail::SUPERBLOCK_LOCATION; + ud.md_ = metadata::ptr(new metadata(ud.input_bm_, sb_location)); + + // override superblock::device_details_root_ + if ((val = get_opt_attr(attr, "device_details_root"))) { + ud.md_->sb_.device_details_root_ = *val; + ud.md_->details_ = device_tree::ptr(new device_tree(*ud.md_->tm_, *val, + device_tree_detail::device_details_traits::ref_counter())); + } + + // open the output metadata + metadata::ptr new_md(new metadata(ud.output_bm_, metadata::CREATE, 128, 0)); + + ud.emitter_ = create_restore_emitter(new_md); + } + + void parse_superblock(metadata::ptr md, emitter::ptr e, attributes const &attr) { + sm_disk_detail::sm_root_disk const *d = + reinterpret_cast(md->sb_.data_space_map_root_); + sm_disk_detail::sm_root v; + sm_disk_detail::sm_root_traits::unpack(*d, v); + + e->begin_superblock("", md->sb_.time_, + md->sb_.trans_id_, + md->sb_.flags_, + md->sb_.version_, + md->sb_.data_block_size_, + v.nr_blocks_, + boost::optional()); + } + + void parse_device(metadata::ptr md, emitter::ptr e, attributes const &attr) { + uint32_t dev_id = get_attr(attr, "dev_id"); + device_tree_detail::device_details details; + + device_tree::ptr details_tree; + boost::optional details_root = get_opt_attr(attr, "blocknr"); + if (details_root) + details_tree = device_tree::ptr(new device_tree(*md->tm_, *details_root, + device_tree_detail::device_details_traits::ref_counter())); + else + details_tree = md->details_; + + uint64_t key[1] = {dev_id}; + device_tree::maybe_value v; + try { + v = details_tree->lookup(key); + } catch (std::exception &e) { + cerr << "missing device " << dev_id << ": " << e.what() << endl; + } + if (v) + details = *v; + + e->begin_device(dev_id, + 0, + details.transaction_id_, + details.creation_time_, + details.snapshotted_time_); + } + + void parse_node(metadata::ptr md, emitter::ptr e, attributes const &attr) { + metadata_dump_subtree(md, e, true, get_attr(attr, "blocknr")); + } + + void start_tag(void *data, char const *el, char const **attr) { + user_data *ud = static_cast(data); + attributes a; + + build_attributes(a, attr); + + if (!strcmp(el, "superblock")) { + open_resources(*ud, a); + parse_superblock(ud->md_, ud->emitter_, a); + + } else if (!strcmp(el, "device")) + parse_device(ud->md_, ud->emitter_, a); + + else if (!strcmp(el, "node")) + parse_node(ud->md_, ud->emitter_, a); + + else + throw runtime_error("unknown tag type"); + } + + void end_tag(void *data, const char *el) { + user_data *ud = static_cast(data); + + if (!strcmp(el, "superblock")) { + ud->emitter_->end_superblock(); + XML_StopParser(ud->parser_, XML_FALSE); // skip the rest elements + } + + else if (!strcmp(el, "device")) + ud->emitter_->end_device(); + + else if (!strcmp(el, "node")) + ; + + else + throw runtime_error("unknown tag type"); + } +} + +//--------------------------------------------------------------------------- + +namespace { + struct flags { + flags() { + } + }; + + int low_level_restore_(string const &src_metadata, string const &input, + string const &output, flags const &f) { + user_data ud; + ud.input_bm_ = open_bm(src_metadata, block_manager<>::READ_ONLY); + ud.output_bm_ = open_bm(output, block_manager<>::READ_WRITE); + + xml_parser p; + ud.parser_ = p.get_parser(); + + XML_SetUserData(p.get_parser(), &ud); + XML_SetElementHandler(p.get_parser(), start_tag, end_tag); + + bool quiet = true; + p.parse(input, quiet); + + return 0; + } + + int low_level_restore(string const &src_metadata, string const &input, + string const &output, flags const &f) { + try { + low_level_restore_(src_metadata, input, output, f); + } catch (std::exception &e) { + cerr << e.what() << endl; + return 1; + } + return 0; + } +} + +//--------------------------------------------------------------------------- + +thin_ll_restore_cmd::thin_ll_restore_cmd() + : command("thin_ll_restore") +{ +} + +void +thin_ll_restore_cmd::usage(ostream &out) const { + out << "Usage: " << get_name() << " [options]" << endl + << "Options:" << endl + << " {-h|--help}" << endl + << " {-E|--source-metadata} " << endl + << " {-i|--input} " << endl + << " {-o|--output} " << endl + << " {-V|--version}" << endl; +} + +int +thin_ll_restore_cmd::run(int argc, char **argv) { + string input; + string output; + string input_metadata; + flags f; + int c; + + const char shortopts[] = "hi:o:E:V"; + const struct option longopts[] = { + { "help", no_argument, NULL, 'h'}, + { "input", required_argument, NULL, 'i'}, + { "output", required_argument, NULL, 'o'}, + { "source-metadata", required_argument, NULL, 'E'}, + { "version", no_argument, NULL, 'V'}, + { NULL, no_argument, NULL, 0 } + }; + + while ((c = getopt_long(argc, argv, shortopts, longopts, NULL)) != -1) { + switch(c) { + case 'h': + usage(cout); + return 0; + + case 'i': + input = optarg; + break; + + case 'o': + output = optarg; + break; + + case 'E': + input_metadata = optarg; + break; + + case 'V': + cout << THIN_PROVISIONING_TOOLS_VERSION << endl; + return 0; + + default: + usage(cerr); + return 1; + } + } + + if (argc != optind) { + usage(cerr); + return 1; + } + + if (!input.length()) { + cerr << "No input file provided." << endl; + usage(cerr); + return 1; + } + + if (!input_metadata.length()) { + cerr << "No input metadata provided." << endl; + usage(cerr); + return 1; + } + + if (!output.length()) { + cerr << "No output file provided." << endl; + usage(cerr); + return 1; + } else + check_output_file_requirements(output); + + return low_level_restore(input_metadata, input, output, f); +} + +//--------------------------------------------------------------------------- diff --git a/thin-provisioning/thin_ls.cc b/thin-provisioning/thin_ls.cc index 4e0b97d..03b8d63 100644 --- a/thin-provisioning/thin_ls.cc +++ b/thin-provisioning/thin_ls.cc @@ -248,7 +248,7 @@ namespace { mapping_pass1 pass1(mappings); fatal_mapping_damage dv; - walk_mapping_tree(dev_mappings, pass1, dv); + walk_mapping_tree(dev_mappings, dev_id, pass1, dv); } @@ -264,7 +264,7 @@ namespace { mapping_pass2 pass2(mappings); fatal_mapping_damage dv; - walk_mapping_tree(dev_mappings, pass2, dv); + walk_mapping_tree(dev_mappings, dev_id, pass2, dv); return pass2.get_exclusives(); } diff --git a/thin-provisioning/thin_pool.cc b/thin-provisioning/thin_pool.cc index 1596c90..d133711 100644 --- a/thin-provisioning/thin_pool.cc +++ b/thin-provisioning/thin_pool.cc @@ -50,7 +50,7 @@ thin::lookup(block_address thin_block) return pool_->md_->mappings_->lookup(key); } -void +bool thin::insert(block_address thin_block, block_address data_block) { uint64_t key[2] = {dev_, thin_block}; @@ -232,7 +232,7 @@ bool thin_pool::device_exists(thin_dev_t dev) const { uint64_t key[1] = {dev}; - return md_->details_->lookup(key); + return !!md_->details_->lookup(key); } //---------------------------------------------------------------- diff --git a/thin-provisioning/thin_pool.h b/thin-provisioning/thin_pool.h index 66ae307..0c6b156 100644 --- a/thin-provisioning/thin_pool.h +++ b/thin-provisioning/thin_pool.h @@ -39,7 +39,7 @@ namespace thin_provisioning { thin_dev_t get_dev_t() const; maybe_address lookup(block_address thin_block); - void insert(block_address thin_block, block_address data_block); + bool insert(block_address thin_block, block_address data_block); void remove(block_address thin_block); void set_snapshot_time(uint32_t time); diff --git a/thin-provisioning/thin_repair.cc b/thin-provisioning/thin_repair.cc index 716970e..feaaf72 100644 --- a/thin-provisioning/thin_repair.cc +++ b/thin-provisioning/thin_repair.cc @@ -26,7 +26,10 @@ namespace { block_manager<>::ptr old_bm = open_bm(old_path, block_manager<>::READ_ONLY); metadata::ptr old_md(new metadata(old_bm, false)); - metadata_dump(old_md, e, true); + + dump_options opts; + opts.repair_ = true; + metadata_dump(old_md, e, opts); } catch (std::exception &e) { cerr << e.what() << endl; diff --git a/thin-provisioning/thin_scan.cc b/thin-provisioning/thin_scan.cc new file mode 100644 index 0000000..6f63df0 --- /dev/null +++ b/thin-provisioning/thin_scan.cc @@ -0,0 +1,439 @@ +// This file is part of the thin-provisioning-tools source. +// +// thin-provisioning-tools is free software: you can redistribute it +// and/or modify it under the terms of the GNU General Public License +// as published by the Free Software Foundation, either version 3 of +// the License, or (at your option) any later version. +// +// thin-provisioning-tools is distributed in the hope that it will be +// useful, but WITHOUT ANY WARRANTY; without even the implied warranty +// of MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +// GNU General Public License for more details. +// +// You should have received a copy of the GNU General Public License along +// with thin-provisioning-tools. If not, see +// . + +#include +#include +#include +#include +#include + +#include "persistent-data/data-structures/btree.h" +#include "persistent-data/data-structures/simple_traits.h" +#include "persistent-data/file_utils.h" +#include "persistent-data/space-maps/core.h" +#include "persistent-data/space-maps/disk_structures.h" +#include "thin-provisioning/metadata.h" +#include "thin-provisioning/superblock.h" +#include "thin-provisioning/commands.h" +#include "version.h" + +using namespace thin_provisioning; + +//---------------------------------------------------------------- + +namespace { + // extracted from btree_damage_visitor.h + template + bool check_block_nr(node const &n) { + if (n.get_location() != n.get_block_nr()) { + return false; + } + return true; + } + + // extracted from btree_damage_visitor.h + template + bool check_max_entries(node const &n) { + size_t elt_size = sizeof(uint64_t) + n.get_value_size(); + if (elt_size * n.get_max_entries() + sizeof(node_header) > MD_BLOCK_SIZE) { + return false; + } + + if (n.get_max_entries() % 3) { + return false; + } + + return true; + } + + // extracted from btree_damage_visitor.h + template + bool check_nr_entries(node const &n, bool is_root) { + if (n.get_nr_entries() > n.get_max_entries()) { + return false; + } + + block_address min = n.get_max_entries() / 3; + if (!is_root && (n.get_nr_entries() < min)) { + return false; + } + + return true; + } + + // extracted from btree_damage_visitor.h + template + bool check_ordered_keys(node const &n) { + unsigned nr_entries = n.get_nr_entries(); + + if (nr_entries == 0) + return true; // can only happen if a root node + + uint64_t last_key = n.key_at(0); + + for (unsigned i = 1; i < nr_entries; i++) { + uint64_t k = n.key_at(i); + if (k <= last_key) { + return false; + } + last_key = k; + } + + return true; + } + + transaction_manager::ptr + open_tm(block_manager<>::ptr bm) { + space_map::ptr sm(new core_map(bm->get_nr_blocks())); + sm->inc(superblock_detail::SUPERBLOCK_LOCATION); + transaction_manager::ptr tm(new transaction_manager(bm, sm)); + return tm; + } +} + +namespace { + // FIXME: deprecated conversion from string constant to ‘char*’ + char const* metadata_block_type_name[] = { + "unknown", + "zero", + "superblock", + "btree_internal", + "btree_leaf", + "btree_unknown", + "index_block", + "bitmap_block" + }; + + enum metadata_block_type { + UNKNOWN = 0, + ZERO, + SUPERBLOCK, + BTREE_INTERNAL, + BTREE_LEAF, + BTREE_UNKNOWN, + INDEX_BLOCK, + BITMAP_BLOCK + }; + + struct block_range { + block_range() + : begin_(0), end_(0), + type_(UNKNOWN), ref_count_(-1), + value_size_(0), is_valid_(false) + { + } + + block_range(block_range const &rhs) + : begin_(rhs.begin_), end_(rhs.end_), + blocknr_begin_(rhs.blocknr_begin_), + type_(rhs.type_), ref_count_(rhs.ref_count_), + value_size_(rhs.value_size_), is_valid_(rhs.is_valid_) + { + } + + uint64_t size() const { + return (end_ > begin_) ? (end_ - begin_) : 0; + } + + // returns true if r is left or right-adjacent + bool is_adjacent_to(block_range const &r) const { + block_range const &lhs = begin_ < r.begin_ ? *this : r; + block_range const &rhs = begin_ < r.begin_ ? r : *this; + + if (size() && r.size() && + rhs.begin_ == lhs.end_ && + ((!blocknr_begin_ && !r.blocknr_begin_) || + (blocknr_begin_ && r.blocknr_begin_ && + *rhs.blocknr_begin_ >= *lhs.blocknr_begin_ && + (*rhs.blocknr_begin_ - *lhs.blocknr_begin_ == rhs.begin_ - lhs.begin_))) && + type_ == r.type_ && + ref_count_ == r.ref_count_ && + value_size_ == r.value_size_ && + is_valid_ == r.is_valid_) + return true; + + return false; + } + + bool concat(block_range const &r) { + if (!is_adjacent_to(r)) + return false; + begin_ = std::min(begin_, r.begin_); + end_ = std::max(end_, r.end_); + return true; + } + + uint64_t begin_; + uint64_t end_; // one-pass-the-end + boost::optional blocknr_begin_; + metadata_block_type type_; + int64_t ref_count_; // ref_count in metadata space map + size_t value_size_; // btree node only + bool is_valid_; + }; + + void output_block_range(block_range const &r, std::ostream &out) { + if (!r.size()) + return; + + if (r.end_ - r.begin_ > 1) { + out << "" << endl; + } else + out << "\"/>" << endl; + } + + //------------------------------------------------------------------- + + struct flags { + flags() { + } + + boost::optional scan_begin_; + boost::optional scan_end_; + }; + + int scan_metadata_(string const &input, + std::ostream &out, + flags const &f) { + using namespace persistent_data; + using namespace thin_provisioning; + using namespace sm_disk_detail; + + block_manager<>::ptr bm; + bm = open_bm(input, block_manager<>::READ_ONLY); + + block_address scan_begin = f.scan_begin_ ? *f.scan_begin_ : 0; + block_address scan_end = f.scan_end_ ? *f.scan_end_ : bm->get_nr_blocks(); + + const std::vector zeros(MD_BLOCK_SIZE, 0); + + // try to open metadata space-map (it's okay to fail) + // note: transaction_manager and space_map must be in the same scope + transaction_manager::ptr tm; + checked_space_map::ptr metadata_sm; + try { + superblock_detail::superblock sb = read_superblock(bm); + tm = open_tm(bm); + metadata_sm = open_metadata_sm(*tm, &sb.metadata_space_map_root_); + tm->set_sm(metadata_sm); + } catch (std::exception &e) { + cerr << e.what() << endl; + } + + block_range curr_range; + block_range run_range; + + bcache::validator::ptr sv = superblock_validator(); + bcache::validator::ptr nv = create_btree_node_validator(); + bcache::validator::ptr iv = index_validator(); + bcache::validator::ptr bv = bitmap_validator(); + + for (block_address b = scan_begin; b < scan_end; ++b) { + block_manager<>::read_ref rr = bm->read_lock(b); + + curr_range.begin_ = b; + curr_range.end_ = b + 1; + curr_range.blocknr_begin_ = boost::none; + curr_range.type_ = UNKNOWN; + curr_range.is_valid_ = false; + + if (!memcmp(rr.data(), zeros.data(), MD_BLOCK_SIZE)) + curr_range.type_ = ZERO; + + if (curr_range.type_ == UNKNOWN && sv->check_raw(rr.data())) { + curr_range.type_ = SUPERBLOCK; + curr_range.is_valid_ = true; + } + + if (curr_range.type_ == UNKNOWN && nv->check_raw(rr.data())) { + // note: check_raw() doesn't check node_header::blocknr_ + node_ref n = btree_detail::to_node(rr); + uint32_t flags = to_cpu(n.raw()->header.flags); + if ((flags & INTERNAL_NODE) && !(flags & LEAF_NODE)) + curr_range.type_ = BTREE_INTERNAL; + else if (flags & LEAF_NODE) + curr_range.type_ = BTREE_LEAF; + else + curr_range.type_ = BTREE_UNKNOWN; + + if (curr_range.type_ != BTREE_UNKNOWN && + check_block_nr(n) && + check_max_entries(n) && + check_nr_entries(n, true) && + check_ordered_keys(n)) + curr_range.is_valid_ = true; + else + curr_range.is_valid_ = false; + + curr_range.blocknr_begin_ = n.get_block_nr(); + curr_range.value_size_ = n.get_value_size(); + } + + if (curr_range.type_ == UNKNOWN && bv->check_raw(rr.data())) { + curr_range.type_ = BITMAP_BLOCK; + bitmap_header const *data = reinterpret_cast(rr.data()); + curr_range.blocknr_begin_ = to_cpu(data->blocknr); + curr_range.is_valid_ = (to_cpu(data->blocknr) == b) ? true : false; + } + + if (curr_range.type_ == UNKNOWN && iv->check_raw(rr.data())) { + curr_range.type_ = INDEX_BLOCK; + metadata_index const *mi = reinterpret_cast(rr.data()); + curr_range.blocknr_begin_ = to_cpu(mi->blocknr_); + curr_range.is_valid_ = (to_cpu(mi->blocknr_) == b) ? true : false; + } + + try { + curr_range.ref_count_ = metadata_sm ? + static_cast(metadata_sm->get_count(b)) : -1; + } catch (std::exception &e) { + curr_range.ref_count_ = -1; + } + + // store the current block + if (!run_range.concat(curr_range)) { + output_block_range(run_range, out); + run_range = curr_range; + } + } + + // output the last run + output_block_range(run_range, out); + + return 0; + } + + int scan_metadata(string const &input, + boost::optional output, + flags const &f) { + try { + if (output) { + std::ofstream out(output->c_str()); + scan_metadata_(input, out, f); + } else + scan_metadata_(input, cout, f); + } catch (std::exception &e) { + cerr << e.what() << endl; + return 1; + } + return 0; + } +} + +//--------------------------------------------------------------------------- + +thin_scan_cmd::thin_scan_cmd() + : command("thin_scan") +{ +} + +void +thin_scan_cmd::usage(std::ostream &out) const { + out << "Usage: " << get_name() << " [options] {device|file}" << endl + << "Options:" << endl + << " {-h|--help}" << endl + << " {-o|--output} " << endl + << " {--begin} " << endl + << " {--end} " << endl + << " {-V|--version}" << endl; +} + +int +thin_scan_cmd::run(int argc, char **argv) +{ + const char shortopts[] = "ho:V"; + const struct option longopts[] = { + { "help", no_argument, NULL, 'h'}, + { "output", required_argument, NULL, 'o'}, + { "version", no_argument, NULL, 'V'}, + { "begin", required_argument, NULL, 1}, + { "end", required_argument, NULL, 2}, + { NULL, no_argument, NULL, 0 } + }; + boost::optional output; + flags f; + + int c; + while ((c = getopt_long(argc, argv, shortopts, longopts, NULL)) != -1) { + switch(c) { + case 'h': + usage(cout); + return 0; + + case 'o': + output = optarg; + break; + + case 'V': + cout << THIN_PROVISIONING_TOOLS_VERSION << endl; + return 0; + + case 1: + try { + f.scan_begin_ = boost::lexical_cast(optarg); + } catch (std::exception &e) { + cerr << e.what() << endl; + return 1; + } + break; + + case 2: + try { + f.scan_end_ = boost::lexical_cast(optarg); + } catch (std::exception &e) { + cerr << e.what() << endl; + return 1; + } + break; + + default: + usage(cerr); + return 1; + } + } + + if (argc == optind) { + cerr << "No input file provided." << endl; + usage(cerr); + return 1; + } + + if (f.scan_begin_ && f.scan_end_ && (*f.scan_end_ <= *f.scan_begin_)) { + cerr << "badly formed region (end <= begin)" << endl; + return 1; + } + + return scan_metadata(argv[optind], output, f); +} + +//--------------------------------------------------------------------------- diff --git a/thin-provisioning/thin_show_duplicates.cc b/thin-provisioning/thin_show_duplicates.cc new file mode 100644 index 0000000..6c1dc27 --- /dev/null +++ b/thin-provisioning/thin_show_duplicates.cc @@ -0,0 +1,358 @@ +// Copyright (C) 2015 Red Hat, Inc. All rights reserved. +// +// This file is part of the thin-provisioning-tools source. +// +// thin-provisioning-tools is free software: you can redistribute it +// and/or modify it under the terms of the GNU General Public License +// as published by the Free Software Foundation, either version 3 of +// the License, or (at your option) any later version. +// +// thin-provisioning-tools is distributed in the hope that it will be +// useful, but WITHOUT ANY WARRANTY; without even the implied warranty +// of MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +// GNU General Public License for more details. +// +// You should have received a copy of the GNU General Public License along +// with thin-provisioning-tools. If not, see +// . + +#include +#include +#include + +#include "version.h" + +#include "base/application.h" +#include "base/error_state.h" +#include "base/progress_monitor.h" +#include "persistent-data/data-structures/btree_damage_visitor.h" +#include "persistent-data/file_utils.h" +#include "persistent-data/space-maps/core.h" +#include "persistent-data/space-maps/disk.h" +#include "thin-provisioning/cache_stream.h" +#include "thin-provisioning/fixed_chunk_stream.h" +#include "thin-provisioning/pool_stream.h" +#include "thin-provisioning/commands.h" +#include "thin-provisioning/device_tree.h" +#include "thin-provisioning/mapping_tree.h" +#include "thin-provisioning/rmap_visitor.h" +#include "thin-provisioning/superblock.h" +#include "thin-provisioning/variable_chunk_stream.h" + +#include +#include +#include +#include +#include + +using namespace base; +using namespace boost; +using namespace persistent_data; +using namespace std; +using namespace thin_provisioning; + +//---------------------------------------------------------------- + +namespace { + bool factor_of(block_address f, block_address n) { + return (n % f) == 0; + } + + block_manager<>::ptr + open_bm(string const &path) { + block_address nr_blocks = get_nr_blocks(path); + block_manager<>::mode m = block_manager<>::READ_ONLY; + return block_manager<>::ptr(new block_manager<>(path, nr_blocks, 1, m)); + } + + transaction_manager::ptr + open_tm(block_manager<>::ptr bm) { + space_map::ptr sm(new core_map(bm->get_nr_blocks())); + sm->inc(superblock_detail::SUPERBLOCK_LOCATION); + transaction_manager::ptr tm(new transaction_manager(bm, sm)); + return tm; + } + + uint64_t parse_int(string const &str, string const &desc) { + try { + return boost::lexical_cast(str); + + } catch (...) { + ostringstream out; + out << "Couldn't parse " << desc << ": '" << str << "'"; + exit(1); + } + + return 0; // never get here + } + + //-------------------------------- + + struct flags { + flags() + : cache_mem(64 * 1024 * 1024), + content_based_chunks(false) { + } + + string data_dev; + optional metadata_dev; + optional block_size; + unsigned cache_mem; + bool content_based_chunks; + }; + + using namespace mapping_tree_detail; + + class duplicate_counter { + public: + duplicate_counter() + : non_zero_dups_(0), + zero_dups_(0) { + } + + void add_duplicate(block_address len) { + non_zero_dups_ += len; + } + + void add_zero_duplicate(block_address len) { + zero_dups_ += len; + } + + block_address get_total() const { + return non_zero_dups_ + zero_dups_; + } + + block_address get_non_zeroes() const { + return non_zero_dups_; + } + + block_address get_zeroes() const { + return zero_dups_; + } + + void display_results(chunk_stream const &stream) const { + block_address meg = 1024 * 1024; + cout << "\n\n" + << stream.size() / meg << "m examined, " + << get_non_zeroes() / meg << "m duplicates, " + << get_zeroes() / meg << "m zeroes\n"; + } + + private: + block_address non_zero_dups_; + block_address zero_dups_; + }; + + class duplicate_detector { + public: + void scan_with_variable_sized_chunks(chunk_stream &stream) { + variable_chunk_stream vstream(stream, 4096); + scan(vstream); + } + + void scan_with_fixed_sized_chunks(chunk_stream &stream, block_address chunk_size) { + fixed_chunk_stream fstream(stream, chunk_size); + scan(fstream); + } + + duplicate_counter const &get_results() const { + return results_; + } + + private: + void scan(chunk_stream &stream) { + block_address total_seen(0); + unique_ptr pbar = create_progress_bar("Examining data"); + + do { + // FIXME: use a wrapper class to automate the put() + chunk const &c = stream.get(); + examine(c); + stream.put(c); + + total_seen += c.len_; + pbar->update_percent((total_seen * 100) / stream.size()); + + } while (stream.next()); + + pbar->update_percent(100); + results_.display_results(stream); + } + + void examine(chunk const &c) { + if (all_zeroes(c)) + results_.add_zero_duplicate(c.len_); + + else { + digestor_.reset(); + digestor_.process_bytes(c.mem_.begin, c.mem_.end - c.mem_.begin); + + unsigned int digest[5]; + digestor_.get_digest(digest); + + // hack + vector v(5); + for (unsigned i = 0; i < 5; i++) + v[i] = digest[i]; + + fingerprint_map::const_iterator it = fm_.find(v); + if (it != fm_.end()) { + results_.add_duplicate(c.len_); + } else + fm_.insert(make_pair(v, c.offset_)); + } + } + + bool all_zeroes(chunk const &c) const { + for (uint8_t *ptr = c.mem_.begin; ptr != c.mem_.end; ptr++) { + if (*ptr != 0) + return false; + } + + return true; + } + + typedef map, block_address> fingerprint_map; + + unsigned block_size_; + boost::uuids::detail::sha1 digestor_; + fingerprint_map fm_; + duplicate_counter results_; + }; + + int show_dups_pool(flags const &fs) { + block_manager<>::ptr bm = open_bm(*fs.metadata_dev); + transaction_manager::ptr tm = open_tm(bm); + superblock_detail::superblock sb = read_superblock(bm); + block_address block_size = sb.data_block_size_ * 512; + block_address nr_blocks = get_nr_blocks(fs.data_dev, block_size); + + cache_stream stream(fs.data_dev, block_size, fs.cache_mem); + pool_stream pstream(stream, tm, sb, nr_blocks); + + duplicate_detector detector; + + if (fs.content_based_chunks) + detector.scan_with_variable_sized_chunks(pstream); + else { + if (*fs.block_size) { + if (factor_of(*fs.block_size, block_size)) + block_size = *fs.block_size; + else + throw runtime_error("specified block size is not a factor of the pool chunk size\n"); + } + + detector.scan_with_fixed_sized_chunks(pstream, block_size); + } + + return 0; + } + + int show_dups_linear(flags const &fs) { + if (!fs.block_size) + // FIXME: this check should be moved to the switch parsing + throw runtime_error("--block-sectors or --metadata-dev must be supplied"); + + block_address block_size = *fs.block_size; + block_address nr_blocks = get_nr_blocks(fs.data_dev, *fs.block_size); + + cerr << "path = " << fs.data_dev << "\n"; + cerr << "nr_blocks = " << nr_blocks << "\n"; + cerr << "block size = " << block_size << "\n"; + + cache_stream stream(fs.data_dev, block_size, fs.cache_mem); + duplicate_detector dd; + + if (fs.content_based_chunks) + dd.scan_with_variable_sized_chunks(stream); + else + dd.scan_with_fixed_sized_chunks(stream, block_size); + + return 0; + } + + int show_dups(flags const &fs) { + if (fs.metadata_dev) + return show_dups_pool(fs); + else { + cerr << "No metadata device provided, so treating data device as a linear device\n"; + return show_dups_linear(fs); + } + } +} + +//---------------------------------------------------------------- + +thin_show_duplicates_cmd::thin_show_duplicates_cmd() + : command("thin_show_duplicates") +{ +} + +void +thin_show_duplicates_cmd::usage(std::ostream &out) const +{ + out << "Usage: " << get_name() << " [options] {device|file}\n" + << "Options:\n" + << " {--block-sectors} \n" + << " {--content-based-chunks}\n" + << " {--metadata-dev} \n" + << " {-h|--help}\n" + << " {-V|--version}" << endl; +} + +int +thin_show_duplicates_cmd::run(int argc, char **argv) +{ + int c; + flags fs; + + char const shortopts[] = "qhV"; + option const longopts[] = { + { "block-sectors", required_argument, NULL, 1}, + { "content-based-chunks", no_argument, NULL, 2}, + { "metadata-dev", required_argument, NULL, 3}, + { "help", no_argument, NULL, 'h'}, + { "version", no_argument, NULL, 'V'}, + { NULL, no_argument, NULL, 0 } + }; + + while ((c = getopt_long(argc, argv, shortopts, longopts, NULL)) != -1) { + switch(c) { + case 'h': + usage(cout); + return 0; + + case 'V': + cout << THIN_PROVISIONING_TOOLS_VERSION << endl; + return 0; + + case 1: + fs.block_size = 512 * parse_int(optarg, "block sectors"); + break; + + case 2: + fs.content_based_chunks = true; + break; + + case 3: + fs.metadata_dev = optarg; + break; + + default: + usage(cerr); + return 1; + } + } + + if (argc == optind) { + cerr << "No data device/file provided." << endl; + usage(cerr); + exit(1); + } + + fs.data_dev = argv[optind]; + + return show_dups(fs); +} + +//---------------------------------------------------------------- diff --git a/thin-provisioning/thin_show_metadata.cc b/thin-provisioning/thin_show_metadata.cc new file mode 100644 index 0000000..5d181c0 --- /dev/null +++ b/thin-provisioning/thin_show_metadata.cc @@ -0,0 +1,367 @@ +#include "persistent-data/checksum.h" +#include "persistent-data/data-structures/btree.h" +#include "persistent-data/file_utils.h" +#include "persistent-data/space-maps/disk_structures.h" +#include "thin-provisioning/commands.h" +#include "thin-provisioning/metadata.h" +#include "thin-provisioning/superblock.h" +#include "ui/ui.h" +#include "version.h" + +#include +#include +#include +#include +#include +#include + +using namespace persistent_data; +using namespace sm_disk_detail; +using namespace std; +using namespace thin_provisioning; +using namespace ui; + +//---------------------------------------------------------------- + +namespace { + class examiner { + public: + examiner(string const &name, int colour_pair, char rep) + : name_(name), + colour_pair_(colour_pair), + rep_(rep) { + } + + virtual ~examiner() {} + + virtual bool recognise(block_manager<>::read_ref rr) const = 0; +// virtual void render_block(text_ui &ui, block_manager<>::read_ref rr) = 0; + + string const &get_name() const { + return name_; + } + + int get_color_pair() const { + return colour_pair_; + } + + char get_rep() const { + return rep_; + } + + private: + string name_; + int colour_pair_; + char rep_; + }; + + class raw_examiner : public examiner { + public: + raw_examiner() + : examiner("raw", 5, '?') { + } + + virtual bool recognise(block_manager<>::read_ref rr) const { + return true; + } + }; + + class superblock_examiner : public examiner { + public: + superblock_examiner() + : examiner("superblock", 1, 'S') { + } + + virtual bool recognise(block_manager<>::read_ref rr) const { + using namespace superblock_detail; + + superblock_disk const *sbd = reinterpret_cast(rr.data()); + if (to_cpu(sbd->magic_) == SUPERBLOCK_MAGIC) { + superblock sb; + superblock_traits::unpack(*sbd, sb); + cout << "metadata nr blocks: " << sb.metadata_nr_blocks_ << endl; + + return true; + } + + return false; + } + }; + + class bitmap_examiner : public examiner { + public: + bitmap_examiner() + : examiner("bitmap", 2, ':') { + } + + virtual bool recognise(block_manager<>::read_ref rr) const { + bitmap_header const *data = reinterpret_cast(rr.data()); + crc32c sum(BITMAP_CSUM_XOR); + sum.append(&data->not_used, MD_BLOCK_SIZE - sizeof(uint32_t)); + return sum.get_sum() == to_cpu(data->csum); + } + }; + + class index_examiner : public examiner { + public: + index_examiner() + : examiner("index", 3, 'i') { + } + + virtual bool recognise(block_manager<>::read_ref rr) const { + metadata_index const *mi = reinterpret_cast(rr.data()); + crc32c sum(INDEX_CSUM_XOR); + sum.append(&mi->padding_, MD_BLOCK_SIZE - sizeof(uint32_t)); + return sum.get_sum() == to_cpu(mi->csum_); + } + }; + + + class btree_examiner : public examiner { + public: + btree_examiner(string const &name, int colour_pair, char c) + : examiner(name, colour_pair, c) { + } + + bool is_btree_node(block_manager<>::read_ref rr) const { + using namespace btree_detail; + + disk_node const *data = reinterpret_cast(rr.data()); + node_header const *n = &data->header; + crc32c sum(BTREE_CSUM_XOR); + sum.append(&n->flags, MD_BLOCK_SIZE - sizeof(uint32_t)); + return sum.get_sum() == to_cpu(n->csum); + } + }; + + class dev_detail_examiner : public btree_examiner { + public: + dev_detail_examiner() + : btree_examiner("dev_details", 4, 'd') { + } + + virtual bool recognise(block_manager<>::read_ref rr) const { + if (!btree_examiner::is_btree_node(rr)) + return false; + + using namespace btree_detail; + + disk_node const *data = reinterpret_cast(rr.data()); + node_header const *n = &data->header; + return to_cpu(n->value_size) == sizeof(device_tree_detail::device_details_disk); + } + }; + + class ref_count_examiner : public btree_examiner { + public: + ref_count_examiner() + : btree_examiner("ref_count node", 6, 'r') { + } + + virtual bool recognise(block_manager<>::read_ref rr) const { + if (!btree_examiner::is_btree_node(rr)) + return false; + + using namespace btree_detail; + + disk_node const *data = reinterpret_cast(rr.data()); + node_header const *n = &data->header; + return to_cpu(n->value_size) == sizeof(uint32_t); + } + }; + + class mapping_examiner : public btree_examiner { + public: + mapping_examiner() + : btree_examiner("mapping node", 7, 'm') { + } + + virtual bool recognise(block_manager<>::read_ref rr) const { + if (!btree_examiner::is_btree_node(rr)) + return false; + + using namespace btree_detail; + + disk_node const *data = reinterpret_cast(rr.data()); + node_header const *n = &data->header; + return to_cpu(n->value_size) == sizeof(uint64_t); + } + }; + + class main_dialog { + public: + main_dialog(text_ui &ui, + block_manager<> const &bm) + : ui_(ui), + bm_(bm), + raw_examiner_(new raw_examiner()) { + + examiners_.push_back(shared_ptr(new superblock_examiner())); + examiners_.push_back(shared_ptr(new bitmap_examiner())); + examiners_.push_back(shared_ptr(new index_examiner())); + examiners_.push_back(shared_ptr(new dev_detail_examiner())); + examiners_.push_back(shared_ptr(new ref_count_examiner())); + examiners_.push_back(shared_ptr(new mapping_examiner())); + } + + void run() { + auto line_length = 80; + for (block_address b = 0; b < 2000; b++) { + block_manager<>::read_ref rr = bm_.read_lock(b); + + if (!(b % line_length)) { + if (b > 0) + printw("\n"); + + printw("%8llu: ", b); + } + + auto e = find_examiner(rr); + attron(COLOR_PAIR(e->get_color_pair())); + printw("%c", e->get_rep()); + attroff(COLOR_PAIR(e->get_color_pair())); + } + + printw("\n"); + show_superblock(); + } + + private: + void show_superblock() { + auto sb = read_superblock(bm_); + + printw("\n\nSuperblock at 0\n"); + printw("data mapping root: %llu\n", sb.data_mapping_root_); + printw("device details root: %llu\n", sb.device_details_root_); + printw("data block size: %u\n", sb.data_block_size_); + printw("metadata nr blocks: %llu\n", sb.metadata_nr_blocks_); + } + + shared_ptr &find_examiner(block_manager<>::read_ref const &rr) { + for (shared_ptr &e : examiners_) { + if (e->recognise(rr)) + return e; + } + + return raw_examiner_; + } + + text_ui &ui_; + block_manager<> const &bm_; + list> examiners_; + shared_ptr raw_examiner_; + + +#if 0 + void show_superblock(text_ui &ui, superblock_detail::superblock const &sb) { + } + + void show_blocks(text_ui &ui, string const &dev) { + metadata md(bm); + + show_superblock(ui, md.sb_); + +#if 0 + cout << "Metadata space map: nr_blocks = " << md.metadata_sm_->get_nr_blocks() + << ", nr_free_blocks = " << md.metadata_sm_->get_nr_free() + << endl; + cout << "Data space map: nr_blocks = " << md.data_sm_->get_nr_blocks() + << ", nr_free_blocks = " << md.data_sm_->get_nr_free() + << endl; + + block_address nr_blocks = bm->get_nr_blocks(); + for (block_address b = 0; b < nr_blocks; b++) { + block_manager<>::read_ref rr = bm->read_lock(b); + + if (is_superblock(rr)) + cout << b << ": superblock" << endl; + + else if (is_bitmap_block(rr)) + cout << b << ": bitmap block" << endl; + + else if (is_btree_node(rr)) + cout << b << ": btree_node" << endl; + + else + cout << b << ": unknown" << endl; + } +#endif + } +#endif + }; +} + +//---------------------------------------------------------------- + +thin_show_metadata_cmd::thin_show_metadata_cmd() + : command("thin_show_metadata") +{ +} + +void +thin_show_metadata_cmd::usage(ostream &out) const +{ + out << "Usage: " << get_name() << " {device|file}" << endl + << "Options:" << endl + << " {-h|--help}" << endl + << " {-V|--version}" << endl; +} + +int +thin_show_metadata_cmd::run(int argc, char **argv) +{ + int c; + const char shortopts[] = "hV"; + const struct option longopts[] = { + { "help", no_argument, NULL, 'h'}, + { "version", no_argument, NULL, 'V'}, + { NULL, no_argument, NULL, 0 } + }; + + while ((c = getopt_long(argc, argv, shortopts, longopts, NULL)) != -1) { + switch(c) { + case 'h': + usage(cout); + return 0; + + case 'V': + cerr << THIN_PROVISIONING_TOOLS_VERSION << endl; + return 0; + } + } + + if (argc == optind) { + usage(cerr); + exit(1); + } + + try { + ui::text_ui ui; + + block_manager<>::ptr bm = open_bm(argv[optind], block_manager<>::READ_ONLY, true); + main_dialog dialog(ui, *bm); + dialog.run(); +#if 0 +// show_blocks(ui, argv[optind]); +#endif + + +#if 0 + attron(COLOR_PAIR(1)); + printw("Hello, "); + attron(A_BOLD); + printw("world!\n"); + attroff(A_BOLD); + attroff(COLOR_PAIR(1)); +#endif + getch(); + + } catch (std::exception const &e) { + cerr << e.what() << endl; + return 1; + } + + return 0; +} + +//---------------------------------------------------------------- diff --git a/thin-provisioning/thin_trim.cc b/thin-provisioning/thin_trim.cc index 2cee686..0a8a3b0 100644 --- a/thin-provisioning/thin_trim.cc +++ b/thin-provisioning/thin_trim.cc @@ -39,6 +39,8 @@ namespace { range[0] = block_to_byte(b); range[1] = block_to_byte(e) - range[0]; + cerr << "emitting discard for blocks (" << b << ", " << e << "]\n"; + if (ioctl(fd_, BLKDISCARD, &range)) throw runtime_error("discard ioctl failed"); } @@ -97,13 +99,8 @@ namespace { highest_ = b; if (count) { - if (last_referenced_) { - if (b > *last_referenced_ + 1) - emitter_.emit(*last_referenced_ + 1, b); - - } else if (b > 0) { - emitter_.emit(0, b); - } + if (last_referenced_ && (b > *last_referenced_ + 1)) + emitter_.emit(*last_referenced_ + 1, b); last_referenced_ = b; } @@ -112,7 +109,7 @@ namespace { void complete() { if (last_referenced_) { if (*last_referenced_ != *highest_) - emitter_.emit(*last_referenced_ + 1ull, *highest_ + 1ull); + emitter_.emit(*last_referenced_, *highest_ + 1ull); } else if (highest_) emitter_.emit(0ull, *highest_ + 1); @@ -125,13 +122,17 @@ namespace { }; int trim(string const &metadata_dev, string const &data_dev) { + cerr << "in trim\n"; + // We can trim any block that has zero count in the data // space map. block_manager<>::ptr bm = open_bm(metadata_dev, block_manager<>::READ_ONLY); metadata md(bm); - if (!md.data_sm_->get_nr_free()) + if (!md.data_sm_->get_nr_free()) { + cerr << "All data blocks allocated, nothing to discard\n"; return 0; + } discard_emitter de(data_dev, md.sb_.data_block_size_, md.data_sm_->get_nr_blocks()); diff --git a/thin-provisioning/variable_chunk_stream.cc b/thin-provisioning/variable_chunk_stream.cc new file mode 100644 index 0000000..f572db7 --- /dev/null +++ b/thin-provisioning/variable_chunk_stream.cc @@ -0,0 +1,133 @@ +#include "thin-provisioning/variable_chunk_stream.h" + +using namespace boost; +using namespace std; +using namespace thin_provisioning; + +//---------------------------------------------------------------- + +variable_chunk_stream::variable_chunk_stream(chunk_stream &stream, unsigned window_size) + : index_(0), + h_(window_size), + stream_(stream), + big_chunk_(0) { + next_big_chunk(); +} + +variable_chunk_stream::~variable_chunk_stream() +{ + put_big_chunk(); +} + +bcache::block_address +variable_chunk_stream::size() const +{ + return stream_.size(); +} + +void +variable_chunk_stream::rewind() +{ + // FIXME: not complete + index_ = 0; + stream_.rewind(); + h_.reset(); +} + +bool +variable_chunk_stream::next(bcache::block_address count) +{ + while (count--) { + index_++; + advance_one(); + } + + return !eof(); +} + +bool +variable_chunk_stream::eof() const +{ + return stream_.eof(); +} + +chunk const & +variable_chunk_stream::get() +{ + assert(big_chunk_); + + little_chunk_.len_ = little_e_ - little_b_; + little_chunk_.offset_ = big_chunk_->offset_ + little_chunk_.len_; + + little_chunk_.mem_.begin = little_b_; + little_chunk_.mem_.end = little_e_; + + return little_chunk_; +} + +void +variable_chunk_stream::put(chunk const &c) +{ + // noop +} + +bool +variable_chunk_stream::next_big_chunk() +{ + put_big_chunk(); + + if (!stream_.next()) + return false; + + big_chunk_ = &stream_.get(); + little_b_ = little_e_ = last_hashed_ = big_chunk_->mem_.begin; + h_.reset(); + + return true; +} + +bool +variable_chunk_stream::advance_one() +{ + uint8_t *big_e; + + big_e = big_chunk_->mem_.end; + little_b_ = little_e_; + little_e_ = last_hashed_; + + if (little_b_ == big_e) { + if (next_big_chunk()) + big_e = big_chunk_->mem_.end; + else + return false; + } + + while (little_e_ != big_e) { + optional maybe_break = h_.step(*little_e_); + little_e_++; + + if (maybe_break) { + // The break is not neccessarily at the current + // byte. + last_hashed_ = little_e_; + little_e_ = little_b_ + *maybe_break; + break; + } + } + + if (little_e_ == big_e) + last_hashed_ = little_e_; + + return true; +} + +void +variable_chunk_stream::put_big_chunk() +{ + if (big_chunk_) + stream_.put(*big_chunk_); + + big_chunk_ = 0; +} + +//---------------------------------------------------------------- diff --git a/thin-provisioning/variable_chunk_stream.h b/thin-provisioning/variable_chunk_stream.h new file mode 100644 index 0000000..cc62945 --- /dev/null +++ b/thin-provisioning/variable_chunk_stream.h @@ -0,0 +1,41 @@ +#ifndef THIN_PROVISIONING_VARIABLE_CHUNK_STREAM_H +#define THIN_PROVISIONING_VARIABLE_CHUNK_STREAM_H + +#include "base/rolling_hash.h" +#include "thin-provisioning/chunk_stream.h" + +//---------------------------------------------------------------- + +namespace thin_provisioning { + class variable_chunk_stream : public chunk_stream { + public: + // window_size must be a power of 2 + variable_chunk_stream(chunk_stream &stream, unsigned window_size); + ~variable_chunk_stream(); + + virtual bcache::block_address size() const; + virtual void rewind(); + virtual bool next(bcache::block_address count = 1ull); + virtual bool eof() const; + virtual chunk const &get(); + virtual void put(chunk const &c); + + private: + bool next_big_chunk(); + bool advance_one(); + void put_big_chunk(); + + bcache::block_address index_; + base::content_based_hash h_; + + chunk_stream &stream_; + chunk const *big_chunk_; + + uint8_t *little_b_, *little_e_, *last_hashed_; + chunk little_chunk_; + }; +} + +//---------------------------------------------------------------- + +#endif diff --git a/thin-provisioning/xml_format.cc b/thin-provisioning/xml_format.cc index 333204f..efbfd3f 100644 --- a/thin-provisioning/xml_format.cc +++ b/thin-provisioning/xml_format.cc @@ -50,6 +50,8 @@ namespace { void begin_superblock(string const &uuid, uint64_t time, uint64_t trans_id, + boost::optional flags, + boost::optional version, uint32_t data_block_size, uint64_t nr_data_blocks, boost::optional metadata_snap) { @@ -57,6 +59,8 @@ namespace { out_ << "begin_superblock(get_attr(attr, "uuid"), get_attr(attr, "time"), get_attr(attr, "transaction"), + get_opt_attr(attr, "flags"), + get_opt_attr(attr, "version"), get_attr(attr, "data_block_size"), get_attr(attr, "nr_data_blocks"), get_opt_attr(attr, "metadata_snap")); diff --git a/ui/ui.cc b/ui/ui.cc new file mode 100644 index 0000000..2df3d6e --- /dev/null +++ b/ui/ui.cc @@ -0,0 +1,36 @@ +#include "ui/ui.h" + +#include + +using namespace ui; + +//---------------------------------------------------------------- + +text_ui::text_ui() +{ + initscr(); + noecho(); + + start_color(); + init_pair(1, COLOR_RED, COLOR_BLACK); + init_pair(2, COLOR_YELLOW, COLOR_BLACK); + init_pair(3, COLOR_BLUE, COLOR_BLACK); + init_pair(4, COLOR_GREEN, COLOR_BLACK); + init_pair(5, COLOR_YELLOW, COLOR_BLACK); + init_pair(6, COLOR_BLACK, COLOR_RED); + init_pair(7, COLOR_WHITE, COLOR_BLACK); + +} + +text_ui::~text_ui() +{ + endwin(); +} + +void +text_ui::refresh() +{ + refresh(); +} + +//---------------------------------------------------------------- diff --git a/ui/ui.h b/ui/ui.h new file mode 100644 index 0000000..75a2808 --- /dev/null +++ b/ui/ui.h @@ -0,0 +1,20 @@ +#ifndef UI_UI_H + +#include + +//---------------------------------------------------------------- + +namespace ui { + class text_ui { + public: + text_ui(); + ~text_ui(); + + void refresh(); + }; +}; + + +//---------------------------------------------------------------- + +#endif diff --git a/unit-tests/Makefile.in b/unit-tests/Makefile.in index 0ff70dd..971b485 100644 --- a/unit-tests/Makefile.in +++ b/unit-tests/Makefile.in @@ -16,10 +16,10 @@ # with thin-provisioning-tools. If not, see # . -GMOCK_DIR=gmock-1.6.0/ +GMOCK_DIR=googletest GMOCK_INCLUDES=\ - -Igmock-1.6.0/include \ - -Igmock-1.6.0/gtest/include + -I$(GMOCK_DIR)/googlemock/include \ + -I$(GMOCK_DIR)/googletest/include GMOCK_FLAGS=\ -Wno-unused-local-typedefs @@ -28,16 +28,16 @@ GMOCK_LIBS=\ -Llib -lpdata -lgmock -lpthread -laio GMOCK_DEPS=\ - $(wildcard $(GMOCK_DIR)/include/*.h) \ - $(wildcard $(GMOCK_DIR)/src/*.cc) \ - $(wildcard $(GMOCK_DIR)/gtest/include/*.h) \ - $(wildcard $(GMOCK_DIR)/gtest/src/*.cc) + $(wildcard $(GMOCK_DIR)/googlemock/include/*.h) \ + $(wildcard $(GMOCK_DIR)/googlemock/src/*.cc) \ + $(wildcard $(GMOCK_DIR)/googletest/include/*.h) \ + $(wildcard $(GMOCK_DIR)/googletest/src/*.cc) lib/libgmock.a: $(GMOCK_DEPS) @echo " [CXX] gtest" - $(V)g++ $(GMOCK_INCLUDES) -I$(GMOCK_DIR)/gtest -c $(GMOCK_DIR)/gtest/src/gtest-all.cc + $(V)g++ $(GMOCK_INCLUDES) -I$(GMOCK_DIR)/googletest -c $(GMOCK_DIR)/googletest/src/gtest-all.cc @echo " [CXX] gmock" - $(V)g++ $(GMOCK_INCLUDES) -I$(GMOCK_DIR) -c $(GMOCK_DIR)/src/gmock-all.cc + $(V)g++ $(GMOCK_INCLUDES) -I$(GMOCK_DIR)/googlemock -c $(GMOCK_DIR)/googlemock/src/gmock-all.cc @echo " [AR] $<" $(V)ar -rv lib/libgmock.a gtest-all.o gmock-all.o > /dev/null 2>&1 @@ -55,10 +55,14 @@ TEST_SOURCE=\ unit-tests/btree_counter_t.cc \ unit-tests/btree_damage_visitor_t.cc \ unit-tests/cache_superblock_t.cc \ + unit-tests/copier_t.cc \ unit-tests/damage_tracker_t.cc \ unit-tests/endian_t.cc \ unit-tests/error_state_t.cc \ + unit-tests/io_engine_t.cc \ + unit-tests/mem_pool_t.cc \ unit-tests/rmap_visitor_t.cc \ + unit-tests/rolling_hash_t.cc \ unit-tests/run_set_t.cc \ unit-tests/space_map_t.cc \ unit-tests/span_iterator_t.cc \ diff --git a/unit-tests/block_t.cc b/unit-tests/block_t.cc index c2a6d58..c1a3172 100644 --- a/unit-tests/block_t.cc +++ b/unit-tests/block_t.cc @@ -44,6 +44,14 @@ namespace { throw runtime_error("validator check zero"); } + virtual bool check_raw(void const *raw) const { + unsigned char const *data = reinterpret_cast(raw); + for (unsigned b = 0; b < BlockSize; b++) + if (data[b] != 0) + return false; + return true; + } + virtual void prepare(void *raw, block_address location) const { unsigned char *data = reinterpret_cast(raw); for (unsigned b = 0; b < BlockSize; b++) @@ -56,6 +64,7 @@ namespace { typedef boost::shared_ptr ptr; MOCK_CONST_METHOD2(check, void(void const *, block_address)); + MOCK_CONST_METHOD1(check_raw, bool(void const *data)); MOCK_CONST_METHOD2(prepare, void(void *, block_address)); }; diff --git a/unit-tests/btree_t.cc b/unit-tests/btree_t.cc index 13a525e..46d5cd3 100644 --- a/unit-tests/btree_t.cc +++ b/unit-tests/btree_t.cc @@ -129,7 +129,7 @@ TEST_F(BtreeTests, insert_works) tree->insert(key, value); btree<1, uint64_traits>::maybe_value l = tree->lookup(key); - ASSERT_TRUE(l); + ASSERT_TRUE(!!l); ASSERT_THAT(*l, Eq(i)); } @@ -153,7 +153,7 @@ TEST_F(BtreeTests, insert_does_not_insert_imaginary_values) tree->insert(key, value); l = tree->lookup(key); - ASSERT_TRUE(l); + ASSERT_TRUE(!!l); ASSERT_THAT(*l, Eq(100u)); key[0] = 1; @@ -183,7 +183,7 @@ TEST_F(BtreeTests, clone) uint64_t value = i * 7; l = tree->lookup(key); - ASSERT_TRUE(l); + ASSERT_TRUE(!!l); ASSERT_THAT(*l, Eq(value)); } @@ -200,11 +200,11 @@ TEST_F(BtreeTests, clone) uint64_t value = i * 7; l = tree->lookup(key); - ASSERT_TRUE(l); + ASSERT_TRUE(!!l); ASSERT_THAT(*l, Eq(value)); l = copy->lookup(key); - ASSERT_TRUE(l); + ASSERT_TRUE(!!l); ASSERT_THAT(*l, Eq(value)); } @@ -216,7 +216,7 @@ TEST_F(BtreeTests, clone) ASSERT_FALSE(l); l = copy->lookup(key); - ASSERT_TRUE(l); + ASSERT_TRUE(!!l); ASSERT_THAT(*l, Eq(value)); } diff --git a/unit-tests/copier_t.cc b/unit-tests/copier_t.cc new file mode 100644 index 0000000..1621c4c --- /dev/null +++ b/unit-tests/copier_t.cc @@ -0,0 +1,273 @@ +// Copyright (C) 2016 Red Hat, Inc. All rights reserved. +// +// This file is part of the thin-provisioning-tools source. +// +// thin-provisioning-tools is free software: you can redistribute it +// and/or modify it under the terms of the GNU General Public License +// as published by the Free Software Foundation, either version 3 of +// the License, or (at your option) any later version. +// +// thin-provisioning-tools is distributed in the hope that it will be +// useful, but WITHOUT ANY WARRANTY; without even the implied warranty +// of MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +// GNU General Public License for more details. +// +// You should have received a copy of the GNU General Public License along +// with thin-provisioning-tools. If not, see +// . + +#include "gmock/gmock.h" +#include "block-cache/copier.h" +#include "test_utils.h" + +#include + +using namespace boost; +using namespace std; +using namespace test; +using namespace testing; + +//---------------------------------------------------------------- + +namespace { + unsigned const BLOCK_SIZE = 64u; + using wait_result = io_engine::wait_result; + + ostream &operator <<(ostream &out, wait_result const &wr) { + out << "wait_result[" << wr.first << ", " << wr.second << "]"; + return out; + } + + ostream &operator <<(ostream &out, optional const &mwr) { + if (mwr) { + out << "Just[wait_result[" << mwr->first << ", " << mwr->second << "]]"; + } else + out << "Nothing"; + return out; + } + + class io_engine_mock : public io_engine { + public: + MOCK_METHOD3(open_file, handle(string const &, mode, sharing)); + MOCK_METHOD1(close_file, void(handle)); + MOCK_METHOD6(issue_io, bool(handle, dir, sector_t, sector_t, void *, unsigned)); + + MOCK_METHOD0(wait, optional()); + MOCK_METHOD1(wait, optional(unsigned &)); + }; + + class CopierTests : public Test { + public: + CopierTests() + : src_file_("copy_src"), + dest_file_("copy_dest") { + } + + unique_ptr make_copier() { + EXPECT_CALL(engine_, open_file(src_file_, io_engine::M_READ_ONLY, io_engine::EXCLUSIVE)). + WillOnce(Return(SRC_HANDLE)); + EXPECT_CALL(engine_, open_file(dest_file_, io_engine::M_READ_WRITE, io_engine::EXCLUSIVE)). + WillOnce(Return(DEST_HANDLE)); + + EXPECT_CALL(engine_, close_file(SRC_HANDLE)).Times(1); + EXPECT_CALL(engine_, close_file(DEST_HANDLE)).Times(1); + + return unique_ptr(new copier(engine_, src_file_, + dest_file_, + BLOCK_SIZE, 1 * 1024 * 1024)); + } + + static optional make_wr(bool success, unsigned context) { + return optional(wait_result(success, context)); + } + + void issue_successful_op(copier &c, copy_op &op, unsigned context) { + InSequence dummy; + + unsigned nr_pending = c.nr_pending(); + EXPECT_CALL(engine_, issue_io(SRC_HANDLE, io_engine::D_READ, + op.src_b * BLOCK_SIZE, + op.src_e * BLOCK_SIZE, _, context)). + WillOnce(Return(true)); + c.issue(op); + + ASSERT_TRUE(c.nr_pending() == nr_pending + 1); + + EXPECT_CALL(engine_, wait()). + WillOnce(Return(make_wr(true, context))); + + EXPECT_CALL(engine_, issue_io(DEST_HANDLE, io_engine::D_WRITE, + op.dest_b * BLOCK_SIZE, + (op.dest_b + (op.src_e - op.src_b)) * BLOCK_SIZE, _, context)). + WillOnce(Return(true)); + + EXPECT_CALL(engine_, wait()). + WillOnce(Return(make_wr(true, context))); + + auto mop = c.wait(); + ASSERT_EQ(c.nr_pending(), nr_pending); + + ASSERT_TRUE(mop->success()); + } + + unsigned const SRC_HANDLE = 10; + unsigned const DEST_HANDLE = 11; + + string src_file_; + string dest_file_; + StrictMock engine_; + }; +} + +//---------------------------------------------------------------- + +TEST_F(CopierTests, empty_test) +{ + auto c = make_copier(); +} + +TEST_F(CopierTests, successful_copy) +{ + // Copy first block + copy_op op1(0, 1, 0); + + auto c = make_copier(); + issue_successful_op(*c, op1, 0); +} + +TEST_F(CopierTests, unsuccessful_issue_read) +{ + copy_op op1(0, 1, 0); + auto c = make_copier(); + + InSequence dummy; + EXPECT_CALL(engine_, issue_io(SRC_HANDLE, io_engine::D_READ, 0, BLOCK_SIZE, _, 0)). + WillOnce(Return(false)); + c->issue(op1); + + ASSERT_EQ(c->nr_pending(), 1u); + + auto mop = c->wait(); + ASSERT_EQ(c->nr_pending(), 0u); + ASSERT_FALSE(mop->success()); +} + +TEST_F(CopierTests, unsuccessful_read) +{ + copy_op op1(0, 1, 0); + auto c = make_copier(); + + InSequence dummy; + EXPECT_CALL(engine_, issue_io(SRC_HANDLE, io_engine::D_READ, 0, BLOCK_SIZE, _, 0)). + WillOnce(Return(true)); + c->issue(op1); + + ASSERT_EQ(c->nr_pending(), 1u); + EXPECT_CALL(engine_, wait()). + WillOnce(Return(make_wr(false, 0u))); + ASSERT_EQ(c->nr_pending(), 1u); + + auto mop = c->wait(); + ASSERT_EQ(c->nr_pending(), 0u); + ASSERT_FALSE(mop->success()); +} + +TEST_F(CopierTests, unsuccessful_issue_write) +{ + copy_op op1(0, 1, 0); + auto c = make_copier(); + + InSequence dummy; + EXPECT_CALL(engine_, issue_io(SRC_HANDLE, io_engine::D_READ, 0, BLOCK_SIZE, _, 0)). + WillOnce(Return(true)); + c->issue(op1); + + ASSERT_EQ(c->nr_pending(), 1u); + + EXPECT_CALL(engine_, wait()). + WillOnce(Return(make_wr(true, 0u))); + ASSERT_EQ(c->nr_pending(), 1u); + + EXPECT_CALL(engine_, issue_io(DEST_HANDLE, io_engine::D_WRITE, 0, BLOCK_SIZE, _, 0)). + WillOnce(Return(false)); + + auto mop = c->wait(); + ASSERT_EQ(c->nr_pending(), 0u); + ASSERT_FALSE(mop->success()); +} + +TEST_F(CopierTests, unsuccessful_write) +{ + // Copy first block + copy_op op1(0, 1, 0); + + auto c = make_copier(); + + InSequence dummy; + EXPECT_CALL(engine_, issue_io(SRC_HANDLE, io_engine::D_READ, 0, BLOCK_SIZE, _, 0)). + WillOnce(Return(true)); + c->issue(op1); + ASSERT_EQ(c->nr_pending(), 1u); + + EXPECT_CALL(engine_, wait()). + WillOnce(Return(make_wr(true, 0u))); + + EXPECT_CALL(engine_, issue_io(DEST_HANDLE, io_engine::D_WRITE, 0, BLOCK_SIZE, _, 0)). + WillOnce(Return(true)); + + EXPECT_CALL(engine_, wait()). + WillOnce(Return(make_wr(false, 0u))); + + auto mop = c->wait(); + ASSERT_EQ(c->nr_pending(), 0u); + + ASSERT_FALSE(mop->success()); +} + +TEST_F(CopierTests, copy_the_same_block_many_times) +{ + auto c = make_copier(); + copy_op op1(0, 1, 0); + + for (unsigned i = 0; i < 50000; i++) + issue_successful_op(*c, op1, i); +} + +TEST_F(CopierTests, copy_different_blocks) +{ + auto c = make_copier(); + for (unsigned i = 0; i < 5000; i++) { + copy_op op(i, i + 1, i); + issue_successful_op(*c, op, i); + } +} + +TEST_F(CopierTests, wait_can_timeout) +{ + copy_op op1(0, 1, 0); + auto c = make_copier(); + + InSequence dummy; + EXPECT_CALL(engine_, issue_io(SRC_HANDLE, io_engine::D_READ, 0, BLOCK_SIZE, _, 0)). + WillOnce(Return(true)); + c->issue(op1); + + ASSERT_EQ(c->nr_pending(), 1u); + + unsigned micro = 10000; + EXPECT_CALL(engine_, wait(micro)). + WillOnce(Return(make_wr(true, 0u))); + ASSERT_EQ(c->nr_pending(), 1u); + + EXPECT_CALL(engine_, issue_io(DEST_HANDLE, io_engine::D_WRITE, 0, BLOCK_SIZE, _, 0)). + WillOnce(Return(true)); + + EXPECT_CALL(engine_, wait(micro)). + WillOnce(DoAll(SetArgReferee<0>(0u), Return(optional()))); + + auto mop = c->wait(micro); + ASSERT_FALSE(mop); + ASSERT_EQ(c->nr_pending(), 1u); +} + +//---------------------------------------------------------------- diff --git a/unit-tests/io_engine_t.cc b/unit-tests/io_engine_t.cc new file mode 100644 index 0000000..2790eb6 --- /dev/null +++ b/unit-tests/io_engine_t.cc @@ -0,0 +1,206 @@ +// Copyright (C) 2016 Red Hat, Inc. All rights reserved. +// +// This file is part of the thin-provisioning-tools source. +// +// thin-provisioning-tools is free software: you can redistribute it +// and/or modify it under the terms of the GNU General Public License +// as published by the Free Software Foundation, either version 3 of +// the License, or (at your option) any later version. +// +// thin-provisioning-tools is distributed in the hope that it will be +// useful, but WITHOUT ANY WARRANTY; without even the implied warranty +// of MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +// GNU General Public License for more details. +// +// You should have received a copy of the GNU General Public License along +// with thin-provisioning-tools. If not, see +// . + +#include "gmock/gmock.h" +#include "block-cache/mem_pool.h" +#include "block-cache/io_engine.h" +#include "test_utils.h" + + +#include + +using namespace boost; +using namespace std; +using namespace test; +using namespace testing; + +//---------------------------------------------------------------- + +namespace { + unsigned const MAX_IO = 64; + unsigned const PAGE_SIZE = 4096; + + class IOEngineTests : public Test { + public: + IOEngineTests() + : pool_(64 * 512, 128 * 512, PAGE_SIZE), + src_file_("copy_src", 32), + dest_file_("copy_dest", 32), + engine_(new aio_engine(MAX_IO)) { + } + + // in sectors + static uint64_t meg(unsigned n) { + return 2 * 1024 * n; + } + + mempool pool_; + temp_file src_file_; + temp_file dest_file_; + unique_ptr engine_; + }; +} + +//---------------------------------------------------------------- + +TEST_F(IOEngineTests, empty_test) +{ +} + +TEST_F(IOEngineTests, open_and_close) +{ + auto src_handle = engine_->open_file(src_file_.get_path(), io_engine::M_READ_ONLY); + auto dest_handle = engine_->open_file(dest_file_.get_path(), io_engine::M_READ_WRITE); + ASSERT_TRUE(src_handle != dest_handle); + engine_->close_file(src_handle); + engine_->close_file(dest_handle); +} + +TEST_F(IOEngineTests, you_can_read_a_read_only_handle) +{ + unsigned nr_sectors = 8; + auto src_handle = engine_->open_file(src_file_.get_path(), io_engine::M_READ_ONLY); + void *data = pool_.alloc(); + bool r = engine_->issue_io(src_handle, + io_engine::D_READ, + 0, nr_sectors, + data, + 123); + ASSERT_TRUE(r); + auto wr = engine_->wait(); + ASSERT_TRUE(wr->first); + ASSERT_TRUE(wr->second == 123); + + engine_->close_file(src_handle); + pool_.free(data); +} + + +TEST_F(IOEngineTests, you_cannot_write_to_a_read_only_handle) +{ + unsigned nr_sectors = 8; + auto src_handle = engine_->open_file(src_file_.get_path(), io_engine::M_READ_ONLY); + void *data = pool_.alloc(); + bool r = engine_->issue_io(src_handle, + io_engine::D_WRITE, + 0, nr_sectors, + data, + 0); + ASSERT_FALSE(r); + engine_->close_file(src_handle); + pool_.free(data); +} + +TEST_F(IOEngineTests, you_can_write_to_a_read_write_handle) +{ + unsigned nr_sectors = 8; + auto src_handle = engine_->open_file(src_file_.get_path(), io_engine::M_READ_ONLY); + void *data = pool_.alloc(); + bool r = engine_->issue_io(src_handle, + io_engine::D_READ, + 0, nr_sectors, + data, + 123); + ASSERT_TRUE(r); + auto wr = engine_->wait(); + ASSERT_TRUE(wr->first); + ASSERT_TRUE(wr->second == 123); + + engine_->close_file(src_handle); + pool_.free(data); +} + +TEST_F(IOEngineTests, final_block_read_succeeds) +{ + unsigned nr_sectors = 8; + auto src_handle = engine_->open_file(src_file_.get_path(), io_engine::M_READ_ONLY); + void *data = pool_.alloc(); + bool r = engine_->issue_io(src_handle, + io_engine::D_READ, + meg(32) - nr_sectors, meg(32), + data, + 123); + ASSERT_TRUE(r); + auto wr = engine_->wait(); + ASSERT_TRUE(wr->first); + + engine_->close_file(src_handle); + pool_.free(data); + +} + +TEST_F(IOEngineTests, out_of_bounds_read_fails) +{ + unsigned nr_sectors = 8; + auto src_handle = engine_->open_file(src_file_.get_path(), io_engine::M_READ_ONLY); + void *data = pool_.alloc(); + bool r = engine_->issue_io(src_handle, + io_engine::D_READ, + meg(32), meg(32) + nr_sectors, + data, + 123); + ASSERT_TRUE(r); + auto wr = engine_->wait(); + ASSERT_FALSE(wr->first); + + engine_->close_file(src_handle); + pool_.free(data); + +} + +TEST_F(IOEngineTests, out_of_bounds_write_succeeds) +{ + unsigned nr_sectors = 8; + auto handle = engine_->open_file(dest_file_.get_path(), io_engine::M_READ_WRITE); + void *data = pool_.alloc(); + bool r = engine_->issue_io(handle, + io_engine::D_WRITE, + meg(32), meg(32) + nr_sectors, + data, + 123); + ASSERT_TRUE(r); + auto wr = engine_->wait(); + ASSERT_TRUE(wr->first); + + engine_->close_file(handle); + pool_.free(data); + +} + +TEST_F(IOEngineTests, succeed_with_timeout) +{ + unsigned nr_sectors = 8; + auto src_handle = engine_->open_file(src_file_.get_path(), io_engine::M_READ_ONLY); + void *data = pool_.alloc(); + bool r = engine_->issue_io(src_handle, + io_engine::D_READ, + 0, nr_sectors, + data, + 123); + ASSERT_TRUE(r); + unsigned micro = 10; + auto wr = engine_->wait(micro); + ASSERT_TRUE(wr->first); + ASSERT_TRUE(wr->second == 123); + + engine_->close_file(src_handle); + pool_.free(data); +} + + +//---------------------------------------------------------------- diff --git a/unit-tests/mem_pool_t.cc b/unit-tests/mem_pool_t.cc new file mode 100644 index 0000000..43e05ec --- /dev/null +++ b/unit-tests/mem_pool_t.cc @@ -0,0 +1,112 @@ +// Copyright (C) 2016 Red Hat, Inc. All rights reserved. +// +// This file is part of the thin-provisioning-tools source. +// +// thin-provisioning-tools is free software: you can redistribute it +// and/or modify it under the terms of the GNU General Public License +// as published by the Free Software Foundation, either version 3 of +// the License, or (at your option) any later version. +// +// thin-provisioning-tools is distributed in the hope that it will be +// useful, but WITHOUT ANY WARRANTY; without even the implied warranty +// of MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +// GNU General Public License for more details. +// +// You should have received a copy of the GNU General Public License along +// with thin-provisioning-tools. If not, see +// . + +#include "gmock/gmock.h" +#include "block-cache/mem_pool.h" +#include "test_utils.h" + +#include + +using namespace boost; +using namespace std; +using namespace test; +using namespace testing; + +//---------------------------------------------------------------- + +namespace { + class MempoolTests : public Test { + public: + bool aligned(void *data, size_t alignment) { + return (reinterpret_cast(data) % alignment) == 0; + } + + private: + + }; +} + +//---------------------------------------------------------------- + +TEST_F(MempoolTests, empty_test) +{ +} + +TEST_F(MempoolTests, create_destroy_cycle) +{ + for (size_t bs = 64; bs <= 512; bs *= 2) { + mempool mp(bs, 4 * 1024 * 1024, bs); + } +} + +TEST_F(MempoolTests, alignments_observed) +{ + for (size_t bs = 64; bs <= 512; bs *= 2) { + mempool mp(bs, 512 * 1024, bs); + + for (unsigned i = 0; i < 100; i++) { + auto md = mp.alloc(); + + if (!md) + throw runtime_error("couldn't alloc"); + + ASSERT_TRUE(aligned(md, bs)); + } + } +} + +TEST_F(MempoolTests, alloc_free_cycle) +{ + mempool mp(512, 512 * 1024, 512); + + for (unsigned i = 0; i < 10000; i++) { + auto md = mp.alloc(); + mp.free(md); + } +} + +TEST_F(MempoolTests, exhaust_pool) +{ + mempool mp(512, 100 * 512, 512); + + for (unsigned i = 0; i < 100; i++) { + auto md = mp.alloc(); + ASSERT_NE(md, nullptr); + } + + auto md = mp.alloc(); + ASSERT_EQ(md, nullptr); +} + +// Use valgrind +TEST_F(MempoolTests, data_can_be_written) +{ + mempool mp(512, 100 * 512, 512); + + for (unsigned i = 0; i < 100; i++) { + auto md = mp.alloc(); + ASSERT_NE(md, nullptr); + + memset(md, 0, 512); + } + + auto md = mp.alloc(); + ASSERT_EQ(md, nullptr); +} + +//---------------------------------------------------------------- diff --git a/unit-tests/rolling_hash_t.cc b/unit-tests/rolling_hash_t.cc new file mode 100644 index 0000000..c25b650 --- /dev/null +++ b/unit-tests/rolling_hash_t.cc @@ -0,0 +1,153 @@ +#include "gmock/gmock.h" + +#include "base/rolling_hash.h" + +using namespace base; +using namespace boost; +using namespace std; +using namespace testing; + +//---------------------------------------------------------------- + +namespace { + class RollingHashTests : public Test { + public: + RollingHashTests() + : window_size_(4096), + rhash_(window_size_) { + } + + typedef vector bytes; + bytes random_bytes(unsigned count) { + bytes v(count, 0); + + for (unsigned i = 0; i < count; i++) + v[i] = random_byte(); + + return v; + } + + uint8_t random_byte() const { + return random() % 256; + } + + void apply_bytes(bytes const &bs) { + for (unsigned i = 0; i < bs.size(); i++) + rhash_.step(bs[i]); + } + + unsigned window_size_; + rolling_hash rhash_; + }; + + class ContentBasedHashTests : public Test { + public: + ContentBasedHashTests() + : window_size_(8192), + h_(window_size_) { + } + + typedef vector bytes; + bytes random_bytes(unsigned count) { + bytes v(count, 0); + + for (unsigned i = 0; i < count; i++) + v[i] = random_byte(); + + return v; + } + + uint8_t random_byte() const { + return random() % 256; + } + + unsigned window_size_; + content_based_hash h_; + }; +} + +//---------------------------------------------------------------- + +TEST_F(RollingHashTests, ctr) +{ +} + +//-------------------------------- + +TEST_F(RollingHashTests, hash_changes) +{ + bytes bs = random_bytes(window_size_ * 100); + + uint32_t prev = rhash_.get_hash(); + for (unsigned i = 0; i < bs.size(); i++) { + rhash_.step(bs[i]); + ASSERT_NE(rhash_.get_hash(), prev); + prev = rhash_.get_hash(); + } +} + +TEST_F(RollingHashTests, hash_repeats) +{ + bytes bs = random_bytes(window_size_); + + apply_bytes(bs); + uint32_t h1 = rhash_.get_hash(); + apply_bytes(bs); + + ASSERT_EQ(rhash_.get_hash(), h1); +} + +TEST_F(RollingHashTests, reset_is_deterministic) +{ + uint8_t bytes[] = "lksdfuwerh,sdg"; + + for (unsigned i = 0; i < sizeof(bytes) - 1; i++) + rhash_.step(bytes[i]); + + uint32_t h1 = rhash_.get_hash(); + + rhash_.reset(); + + for (unsigned i = 0; i < sizeof(bytes) - 1; i++) + rhash_.step(bytes[i]); + + uint32_t h2 = rhash_.get_hash(); + + ASSERT_EQ(h1, h2); +} + +//---------------------------------------------------------------- + +TEST_F(ContentBasedHashTests, ctr) +{ +} + +TEST_F(ContentBasedHashTests, chunk_limits_respected) +{ + unsigned min = 100000, max = 0; + + bytes bs = random_bytes(1024 * 1024 * 100); + vector counts(window_size_, 0); + + for (unsigned i = 0; i < bs.size(); i++) { + optional b = h_.step(bs[i]); + if (b) { + counts[*b]++; + + if (*b < min) + min = *b; + + if (*b > max) + max = *b; + } + } + +#if 1 + for (unsigned i = 0; i < counts.size(); i++) + cerr << i << ": " << counts[i] << "\n"; + + cerr << "min: " << min << ", max: " << max << "\n"; +#endif +} + +//---------------------------------------------------------------- diff --git a/unit-tests/run_set_t.cc b/unit-tests/run_set_t.cc index abe201c..53d1bc1 100644 --- a/unit-tests/run_set_t.cc +++ b/unit-tests/run_set_t.cc @@ -36,7 +36,7 @@ namespace { TEST_F(RunSetTests, create) { - auto_ptr > rs(new run_set()); + unique_ptr > rs(new run_set()); } TEST_F(RunSetTests, add_single_blocks) diff --git a/unit-tests/space_map_t.cc b/unit-tests/space_map_t.cc index 0848909..d11de47 100644 --- a/unit-tests/space_map_t.cc +++ b/unit-tests/space_map_t.cc @@ -101,7 +101,7 @@ namespace { for (unsigned i = 0; i < NR_BLOCKS; i++) { boost::optional mb = sm->new_block(); - ASSERT_TRUE(mb); + ASSERT_TRUE(!!mb); ASSERT_THAT(sm->get_nr_free(), Eq(NR_BLOCKS - i - 1)); } @@ -137,7 +137,7 @@ namespace { void test_not_allocated_twice(space_map::ptr sm) { boost::optional mb = sm->new_block(); - ASSERT_TRUE(mb); + ASSERT_TRUE(!!mb); for (;;) { boost::optional b = sm->new_block(); diff --git a/unit-tests/test_utils.cc b/unit-tests/test_utils.cc index 5a9cc96..85876bd 100644 --- a/unit-tests/test_utils.cc +++ b/unit-tests/test_utils.cc @@ -3,6 +3,8 @@ #include "persistent-data/space-maps/core.h" using namespace persistent_data; +using namespace std; +using namespace test; //---------------------------------------------------------------- @@ -21,3 +23,35 @@ test::open_temporary_tm(block_manager<>::ptr bm) } //---------------------------------------------------------------- + +temp_file::temp_file(string const &name_base, unsigned meg_size) + : path_(gen_path(name_base)) +{ + int fd = ::open(path_.c_str(), O_CREAT | O_RDWR, 0666); + if (fd < 0) + throw runtime_error("couldn't open file"); + + if (::fallocate(fd, 0, 0, 1024 * 1024 * meg_size)) + throw runtime_error("couldn't fallocate"); + + ::close(fd); +} + +temp_file::~temp_file() +{ +// ::unlink(path_.c_str()); +} + +string const & +temp_file::get_path() const +{ + return path_; +} + +string +temp_file::gen_path(string const &base) +{ + return string("./") + base + string(".tmp"); +} + +//---------------------------------------------------------------- diff --git a/unit-tests/test_utils.h b/unit-tests/test_utils.h index b7d32c2..e979d07 100644 --- a/unit-tests/test_utils.h +++ b/unit-tests/test_utils.h @@ -111,7 +111,19 @@ namespace test { throw std::runtime_error("system failed"); } - std::auto_ptr dir_; + std::unique_ptr dir_; + }; + + class temp_file { + public: + temp_file(std::string const &name_base, unsigned meg_size); + ~temp_file(); + std::string const &get_path() const; + + private: + static string gen_path(string const &base); + + string path_; }; //--------------------------------