From 251762e6d94e903f2a82f3415b997cca249d57d2 Mon Sep 17 00:00:00 2001 From: Joe Thornber Date: Fri, 4 Sep 2015 15:16:49 +0100 Subject: [PATCH] [thin_show_dups] tidy up reporting --- thin-provisioning/cache_stream.cc | 10 +--- thin-provisioning/cache_stream.h | 3 +- thin-provisioning/chunk_stream.h | 2 +- thin-provisioning/pool_stream.cc | 31 +++++------ thin-provisioning/pool_stream.h | 5 +- thin-provisioning/thin_show_duplicates.cc | 64 +++++++++++----------- thin-provisioning/variable_chunk_stream.cc | 12 ++-- thin-provisioning/variable_chunk_stream.h | 3 +- 8 files changed, 58 insertions(+), 72 deletions(-) diff --git a/thin-provisioning/cache_stream.cc b/thin-provisioning/cache_stream.cc index b21f435..379c191 100644 --- a/thin-provisioning/cache_stream.cc +++ b/thin-provisioning/cache_stream.cc @@ -37,9 +37,9 @@ cache_stream::cache_stream(string const &path, } block_address -cache_stream::nr_chunks() const +cache_stream::size() const { - return nr_blocks_; + return nr_blocks_ * block_size_; } void @@ -68,12 +68,6 @@ cache_stream::eof() const return current_index_ >= nr_blocks_; } -block_address -cache_stream::index() const -{ - return current_index_; -} - chunk const & cache_stream::get() { diff --git a/thin-provisioning/cache_stream.h b/thin-provisioning/cache_stream.h index 65c81b1..b7af995 100644 --- a/thin-provisioning/cache_stream.h +++ b/thin-provisioning/cache_stream.h @@ -14,10 +14,9 @@ namespace thin_provisioning { block_address block_size, size_t cache_mem); - block_address nr_chunks() const; + block_address size() const; virtual void rewind(); - virtual block_address index() const; virtual bool next(block_address count = 1ull); virtual bool eof() const; diff --git a/thin-provisioning/chunk_stream.h b/thin-provisioning/chunk_stream.h index 0886c9f..1831f27 100644 --- a/thin-provisioning/chunk_stream.h +++ b/thin-provisioning/chunk_stream.h @@ -51,7 +51,7 @@ namespace thin_provisioning { virtual ~chunk_stream() {} virtual void rewind() = 0; - virtual bcache::block_address index() const = 0; + virtual bcache::block_address size() const = 0; virtual bool next(bcache::block_address count = 1ull) = 0; virtual bool eof() const = 0; diff --git a/thin-provisioning/pool_stream.cc b/thin-provisioning/pool_stream.cc index 21964f9..41a0ab0 100644 --- a/thin-provisioning/pool_stream.cc +++ b/thin-provisioning/pool_stream.cc @@ -41,22 +41,25 @@ pool_stream::pool_stream(cache_stream &stream, transaction_manager::ptr tm, superblock_detail::superblock const &sb, block_address nr_blocks) : stream_(stream), - block_to_thin_(stream.nr_chunks(), UNMAPPED), - nr_mapped_(0) + block_to_thin_(nr_blocks, UNMAPPED), + nr_mapped_(0), + index_(0), + block_size_(sb.data_block_size_ * 512) { init_rmap(tm, sb, nr_blocks); } block_address -pool_stream::nr_chunks() const +pool_stream::size() const { - return nr_mapped_; + return nr_mapped_ * block_size_; } void pool_stream::rewind() { stream_.rewind(); + index_ = 0; } bool @@ -75,12 +78,6 @@ pool_stream::eof() const return stream_.eof(); } -block_address -pool_stream::index() const -{ - return stream_.index(); -} - chunk const & pool_stream::get() { @@ -141,16 +138,14 @@ pool_stream::init_rmap(transaction_manager::ptr tm, bool pool_stream::advance_one() { - block_address new_index = index() + 1; + block_address count = 1; - while (block_to_thin_[new_index] == UNMAPPED && - new_index < nr_chunks()) - new_index++; + while (((index_ + count) < block_to_thin_.size()) && + (block_to_thin_[index_ + count] == UNMAPPED)) + count++; - if (new_index >= nr_chunks()) - return false; - - return stream_.next(new_index - index()); + index_ += count; + return stream_.next(count); } //---------------------------------------------------------------- diff --git a/thin-provisioning/pool_stream.h b/thin-provisioning/pool_stream.h index 71576ed..e419842 100644 --- a/thin-provisioning/pool_stream.h +++ b/thin-provisioning/pool_stream.h @@ -32,11 +32,10 @@ namespace thin_provisioning { transaction_manager::ptr tm, superblock_detail::superblock const &sb, block_address nr_blocks); - block_address nr_chunks() const; + block_address size() const; void rewind(); bool next(block_address count = 1ull); bool eof() const; - block_address index() const; chunk const &get(); void put(chunk const &c); @@ -56,6 +55,8 @@ namespace thin_provisioning { cache_stream &stream_; vector block_to_thin_; block_address nr_mapped_; + block_address index_; + block_address block_size_; }; } diff --git a/thin-provisioning/thin_show_duplicates.cc b/thin-provisioning/thin_show_duplicates.cc index 5a75e26..5c09af2 100644 --- a/thin-provisioning/thin_show_duplicates.cc +++ b/thin-provisioning/thin_show_duplicates.cc @@ -182,6 +182,34 @@ namespace { duplicate_counter results_; }; + void display_results(chunk_stream const &stream, duplicate_counter const &r) { + block_address meg = 1024 * 1024; + cout << "\n\n" + << stream.size() / meg << "m examined, " + << r.get_non_zeroes() / meg << "m duplicates, " + << r.get_zeroes() / meg << "m zeroes\n"; + } + + void scan(chunk_stream &stream, block_address stream_size) { + duplicate_detector detector; + block_address total_seen(0); + auto_ptr pbar = create_progress_bar("Examining data"); + + do { + // FIXME: use a wrapper class to automate the put() + chunk const &c = stream.get(); + detector.examine(c); + stream.put(c); + + total_seen += c.len_; + pbar->update_percent((total_seen * 100) / stream.size()); + + } while (stream.next()); + + pbar->update_percent(100); + display_results(stream, detector.get_results()); + } + int show_dups_pool(flags const &fs) { block_manager<>::ptr bm = open_bm(*fs.metadata_dev); transaction_manager::ptr tm = open_tm(bm); @@ -195,21 +223,9 @@ namespace { cache_stream stream(fs.data_dev, block_size, fs.cache_mem); pool_stream pstream(stream, tm, sb, nr_blocks); + variable_chunk_stream vstream(pstream, 4096); - duplicate_detector detector; - auto_ptr pbar = create_progress_bar("Examining data"); - - do { - chunk const &c = pstream.get(); - detector.examine(c); - pstream.put(c); - pbar->update_percent((pstream.index() * 100) / pstream.nr_chunks()); - - } while (pstream.next()); - pbar->update_percent(100); - - cout << "\n\ntotal dups: " << detector.get_results().get_total() << endl; - cout << (detector.get_results().get_total() * 100) / pstream.nr_chunks() << "% duplicates\n"; + scan(vstream, nr_blocks * block_size); return 0; } @@ -229,26 +245,8 @@ namespace { cache_stream low_level_stream(fs.data_dev, block_size, fs.cache_mem); variable_chunk_stream stream(low_level_stream, 4096); - duplicate_detector detector; - auto_ptr pbar = create_progress_bar("Examining data"); - do { - // FIXME: use a wrapper class to automate the put() - chunk const &c = stream.get(); - detector.examine(c); - stream.put(c); - - pbar->update_percent((c.offset_ * 100) / dev_size); - - } while (stream.next()); - pbar->update_percent(100); - - duplicate_counter r = detector.get_results(); - block_address meg = 1024 * 1024; - cout << "\n\n" - << (nr_blocks * block_size) / meg << "m examined, " - << r.get_non_zeroes() / meg << "m duplicates, " - << r.get_zeroes() / meg << "m zeroes\n"; + scan(stream, dev_size); return 0; } diff --git a/thin-provisioning/variable_chunk_stream.cc b/thin-provisioning/variable_chunk_stream.cc index 99ddc61..f572db7 100644 --- a/thin-provisioning/variable_chunk_stream.cc +++ b/thin-provisioning/variable_chunk_stream.cc @@ -19,6 +19,12 @@ variable_chunk_stream::~variable_chunk_stream() put_big_chunk(); } +bcache::block_address +variable_chunk_stream::size() const +{ + return stream_.size(); +} + void variable_chunk_stream::rewind() { @@ -45,12 +51,6 @@ variable_chunk_stream::eof() const return stream_.eof(); } -bcache::block_address -variable_chunk_stream::index() const -{ - return index_; -} - chunk const & variable_chunk_stream::get() { diff --git a/thin-provisioning/variable_chunk_stream.h b/thin-provisioning/variable_chunk_stream.h index f9c5ec7..cc62945 100644 --- a/thin-provisioning/variable_chunk_stream.h +++ b/thin-provisioning/variable_chunk_stream.h @@ -13,11 +13,10 @@ namespace thin_provisioning { variable_chunk_stream(chunk_stream &stream, unsigned window_size); ~variable_chunk_stream(); - // FIXME: we don't know in advance how many chunks we will have + virtual bcache::block_address size() const; virtual void rewind(); virtual bool next(bcache::block_address count = 1ull); virtual bool eof() const; - virtual bcache::block_address index() const; virtual chunk const &get(); virtual void put(chunk const &c);