diff --git a/persistent-data/block.tcc b/persistent-data/block.tcc index 6e47a91..0824bf3 100644 --- a/persistent-data/block.tcc +++ b/persistent-data/block.tcc @@ -223,6 +223,7 @@ namespace persistent_data { unsigned max_concurrent_blocks, mode m, bool excl) + // FIXME: * BlockSize ? : fd_(open_or_create_block_file(path, nr_blocks * BlockSize, m, excl)), bc_(fd_, BlockSize >> SECTOR_SHIFT, nr_blocks, 1024u * 1024u * 16), superblock_ref_count_(0) diff --git a/persistent-data/file_utils.cc b/persistent-data/file_utils.cc index 2467079..b00acf0 100644 --- a/persistent-data/file_utils.cc +++ b/persistent-data/file_utils.cc @@ -12,7 +12,7 @@ using namespace base; //---------------------------------------------------------------- persistent_data::block_address -persistent_data::get_nr_blocks(string const &path) +persistent_data::get_nr_blocks(string const &path, sector_t block_size) { using namespace persistent_data; @@ -39,7 +39,7 @@ persistent_data::get_nr_blocks(string const &path) throw runtime_error("ioctl BLKGETSIZE64 failed"); } ::close(fd); - nr_blocks = div_down(nr_blocks, MD_BLOCK_SIZE); + nr_blocks = div_down(nr_blocks, block_size); } else // FIXME: needs a better message throw runtime_error("bad path"); diff --git a/persistent-data/file_utils.h b/persistent-data/file_utils.h index fcf203d..e641b7a 100644 --- a/persistent-data/file_utils.h +++ b/persistent-data/file_utils.h @@ -9,7 +9,7 @@ // FIXME: move to a different unit namespace persistent_data { - persistent_data::block_address get_nr_blocks(string const &path); + persistent_data::block_address get_nr_blocks(string const &path, sector_t block_size = MD_BLOCK_SIZE); block_manager<>::ptr open_bm(std::string const &dev_path, block_manager<>::mode m, bool excl = true); diff --git a/thin-provisioning/thin_show_duplicates.cc b/thin-provisioning/thin_show_duplicates.cc index 06066b1..7666c0e 100644 --- a/thin-provisioning/thin_show_duplicates.cc +++ b/thin-provisioning/thin_show_duplicates.cc @@ -32,6 +32,9 @@ #include "thin-provisioning/mapping_tree.h" #include "thin-provisioning/superblock.h" +#include +#include + using namespace base; using namespace std; using namespace thin_provisioning; @@ -57,14 +60,126 @@ namespace { //-------------------------------- struct flags { - flags() { + flags() + : block_size(512 * 1024), + cache_mem(64 * 1024 * 1024) { } + + unsigned block_size; + unsigned cache_mem; }; + int open_file(string const &path) { + int fd = ::open(path.c_str(), O_RDONLY | O_DIRECT | O_EXCL, 0666); + if (fd < 0) + syscall_failed("open", + "Note: you cannot run this tool with these options on live metadata."); + + return fd; + } + + class duplicate_counter { + public: + duplicate_counter(block_address nr_blocks) + : counts_(nr_blocks), + total_dups_(0) { + } + + void add_duplicate(block_address b1, block_address b2) { + // cout << "block " << b2 << " is a duplicate of " << b1 << "\n"; + total_dups_++; + counts_[b1]++; + } + + block_address get_total() const { + return total_dups_; + } + + private: + vector counts_; + block_address total_dups_; + }; + + class duplicate_detector { + public: + duplicate_detector(unsigned block_size, block_address nr_blocks) + : block_size_(block_size), + results_(nr_blocks) { + } + + void examine(block_cache::block const &b) { + digestor_.reset(); + digestor_.process_bytes(b.get_data(), block_size_); + unsigned int digest[5]; + digestor_.get_digest(digest); + + // hack + vector v(5); + for (unsigned i = 0; i < 5; i++) + v[i] = digest[i]; + + fingerprint_map::const_iterator it = fm_.find(v); + if (it != fm_.end()) { + results_.add_duplicate(it->second, b.get_index()); + } else + fm_.insert(make_pair(v, b.get_index())); + } + + block_address get_total_duplicates() const { + return results_.get_total(); + } + + private: + typedef map, block_address> fingerprint_map; + + unsigned block_size_; + boost::uuids::detail::sha1 digestor_; + fingerprint_map fm_; + duplicate_counter results_; + }; + + int show_dups(string const &path, flags const &fs) { + cerr << "path = " << path << "\n"; + block_address nr_blocks = get_nr_blocks(path, fs.block_size); + cerr << "nr_blocks = " << nr_blocks << "\n"; + + // The cache uses a LRU eviction policy, which plays badly + // with a sequential read. So we can't prefetch all the + // blocks. + + // FIXME: add MRU policy to cache + unsigned cache_blocks = (fs.cache_mem / fs.block_size) / 2; + int fd = open_file(path); + sector_t block_sectors = fs.block_size / 512; + block_cache cache(fd, block_sectors, nr_blocks, fs.cache_mem); + validator::ptr v(new bcache::noop_validator()); + + duplicate_detector detector(fs.block_size, nr_blocks); + + // warm up the cache + for (block_address i = 0; i < cache_blocks; i++) + cache.prefetch(i); + + for (block_address i = 0; i < nr_blocks; i++) { + block_cache::block &b = cache.get(i, 0, v); + block_address prefetch = i + cache_blocks; + if (prefetch < nr_blocks) + cache.prefetch(prefetch); + + detector.examine(b); + b.put(); + } + + cout << "total dups: " << detector.get_total_duplicates() << endl; + + return 0; + } + void usage(ostream &out, string const &cmd) { - out << "Usage: " << cmd << " [options] {device|file}" << endl - << "Options:" << endl - << " {-h|--help}" << endl + out << "Usage: " << cmd << " [options] {device|file}\n" + << "Options:\n" + << " {--block-sectors} \n" + << " {-h|--help}\n" << " {-V|--version}" << endl; } } @@ -103,7 +218,7 @@ int thin_show_dups_main(int argc, char **argv) exit(1); } - return 0; + return show_dups(argv[optind], fs); } base::command thin_provisioning::thin_show_dups_cmd("thin_show_duplicates", thin_show_dups_main);