// Copyright (C) 2015 Red Hat, Inc. All rights reserved. // // This file is part of the thin-provisioning-tools source. // // thin-provisioning-tools is free software: you can redistribute it // and/or modify it under the terms of the GNU General Public License // as published by the Free Software Foundation, either version 3 of // the License, or (at your option) any later version. // // thin-provisioning-tools is distributed in the hope that it will be // useful, but WITHOUT ANY WARRANTY; without even the implied warranty // of MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the // GNU General Public License for more details. // // You should have received a copy of the GNU General Public License along // with thin-provisioning-tools. If not, see // . #include #include #include #include "version.h" #include "base/application.h" #include "base/error_state.h" #include "base/progress_monitor.h" #include "persistent-data/data-structures/btree_damage_visitor.h" #include "persistent-data/file_utils.h" #include "persistent-data/space-maps/core.h" #include "persistent-data/space-maps/disk.h" #include "thin-provisioning/cache_stream.h" #include "thin-provisioning/fixed_chunk_stream.h" #include "thin-provisioning/pool_stream.h" #include "thin-provisioning/commands.h" #include "thin-provisioning/device_tree.h" #include "thin-provisioning/mapping_tree.h" #include "thin-provisioning/metadata.h" #include "thin-provisioning/rmap_visitor.h" #include "thin-provisioning/superblock.h" #include "thin-provisioning/variable_chunk_stream.h" #include #include #include #include #include using namespace base; using namespace boost; using namespace persistent_data; using namespace std; using namespace thin_provisioning; //---------------------------------------------------------------- namespace { bool factor_of(block_address f, block_address n) { return (n % f) == 0; } uint64_t parse_int(string const &str, string const &desc) { try { return boost::lexical_cast(str); } catch (...) { ostringstream out; out << "Couldn't parse " << desc << ": '" << str << "'"; exit(1); } return 0; // never get here } //-------------------------------- struct flags { flags() : cache_mem(64 * 1024 * 1024), content_based_chunks(false) { } string data_dev; optional metadata_dev; optional block_size; unsigned cache_mem; bool content_based_chunks; }; using namespace mapping_tree_detail; class duplicate_counter { public: duplicate_counter() : non_zero_dups_(0), zero_dups_(0) { } void add_duplicate(block_address len) { non_zero_dups_ += len; } void add_zero_duplicate(block_address len) { zero_dups_ += len; } block_address get_total() const { return non_zero_dups_ + zero_dups_; } block_address get_non_zeroes() const { return non_zero_dups_; } block_address get_zeroes() const { return zero_dups_; } void display_results(chunk_stream const &stream) const { block_address meg = 1024 * 1024; cout << "\n\n" << stream.size() / meg << "m examined, " << get_non_zeroes() / meg << "m duplicates, " << get_zeroes() / meg << "m zeroes\n"; } private: block_address non_zero_dups_; block_address zero_dups_; }; class duplicate_detector { public: void scan_with_variable_sized_chunks(chunk_stream &stream) { variable_chunk_stream vstream(stream, 4096); scan(vstream); } void scan_with_fixed_sized_chunks(chunk_stream &stream, block_address chunk_size) { fixed_chunk_stream fstream(stream, chunk_size); scan(fstream); } duplicate_counter const &get_results() const { return results_; } private: void scan(chunk_stream &stream) { block_address total_seen(0); unique_ptr pbar = create_progress_bar("Examining data"); do { // FIXME: use a wrapper class to automate the put() chunk const &c = stream.get(); examine(c); stream.put(c); total_seen += c.len_; pbar->update_percent((total_seen * 100) / stream.size()); } while (stream.next()); pbar->update_percent(100); results_.display_results(stream); } void examine(chunk const &c) { if (all_zeroes(c)) results_.add_zero_duplicate(c.len_); else { digestor_.reset(); digestor_.process_bytes(c.mem_.begin, c.mem_.end - c.mem_.begin); unsigned int digest[5]; digestor_.get_digest(digest); // hack vector v(5); for (unsigned i = 0; i < 5; i++) v[i] = digest[i]; fingerprint_map::const_iterator it = fm_.find(v); if (it != fm_.end()) { results_.add_duplicate(c.len_); } else fm_.insert(make_pair(v, c.offset_)); } } bool all_zeroes(chunk const &c) const { for (uint8_t *ptr = c.mem_.begin; ptr != c.mem_.end; ptr++) { if (*ptr != 0) return false; } return true; } typedef map, block_address> fingerprint_map; unsigned block_size_; boost::uuids::detail::sha1 digestor_; fingerprint_map fm_; duplicate_counter results_; }; int show_dups_pool(flags const &fs) { block_manager::ptr bm = open_bm(*fs.metadata_dev); transaction_manager::ptr tm = open_tm(bm, superblock_detail::SUPERBLOCK_LOCATION); superblock_detail::superblock sb = read_superblock(bm); block_address block_size = sb.data_block_size_ * 512; block_address nr_blocks = get_nr_blocks(fs.data_dev, block_size); cache_stream stream(fs.data_dev, block_size, fs.cache_mem); pool_stream pstream(stream, tm, sb, nr_blocks); duplicate_detector detector; if (fs.content_based_chunks) detector.scan_with_variable_sized_chunks(pstream); else { if (*fs.block_size) { if (factor_of(*fs.block_size, block_size)) block_size = *fs.block_size; else throw runtime_error("specified block size is not a factor of the pool chunk size\n"); } detector.scan_with_fixed_sized_chunks(pstream, block_size); } return 0; } int show_dups_linear(flags const &fs) { if (!fs.block_size) // FIXME: this check should be moved to the switch parsing throw runtime_error("--block-sectors or --metadata-dev must be supplied"); block_address block_size = *fs.block_size; block_address nr_blocks = get_nr_blocks(fs.data_dev, *fs.block_size); cerr << "path = " << fs.data_dev << "\n"; cerr << "nr_blocks = " << nr_blocks << "\n"; cerr << "block size = " << block_size << "\n"; cache_stream stream(fs.data_dev, block_size, fs.cache_mem); duplicate_detector dd; if (fs.content_based_chunks) dd.scan_with_variable_sized_chunks(stream); else dd.scan_with_fixed_sized_chunks(stream, block_size); return 0; } int show_dups(flags const &fs) { if (fs.metadata_dev) return show_dups_pool(fs); else { cerr << "No metadata device provided, so treating data device as a linear device\n"; return show_dups_linear(fs); } } } //---------------------------------------------------------------- thin_show_duplicates_cmd::thin_show_duplicates_cmd() : command("thin_show_duplicates") { } void thin_show_duplicates_cmd::usage(std::ostream &out) const { out << "Usage: " << get_name() << " [options] {device|file}\n" << "Options:\n" << " {--block-sectors} \n" << " {--content-based-chunks}\n" << " {--metadata-dev} \n" << " {-h|--help}\n" << " {-V|--version}" << endl; } int thin_show_duplicates_cmd::run(int argc, char **argv) { int c; flags fs; char const shortopts[] = "qhV"; option const longopts[] = { { "block-sectors", required_argument, NULL, 1}, { "content-based-chunks", no_argument, NULL, 2}, { "metadata-dev", required_argument, NULL, 3}, { "help", no_argument, NULL, 'h'}, { "version", no_argument, NULL, 'V'}, { NULL, no_argument, NULL, 0 } }; while ((c = getopt_long(argc, argv, shortopts, longopts, NULL)) != -1) { switch(c) { case 'h': usage(cout); return 0; case 'V': cout << THIN_PROVISIONING_TOOLS_VERSION << endl; return 0; case 1: fs.block_size = 512 * parse_int(optarg, "block sectors"); break; case 2: fs.content_based_chunks = true; break; case 3: fs.metadata_dev = optarg; break; default: usage(cerr); return 1; } } if (argc == optind) { cerr << "No data device/file provided." << endl; usage(cerr); exit(1); } fs.data_dev = argv[optind]; return show_dups(fs); } //----------------------------------------------------------------