Merge branch '2015-08-19-thin-show-duplicates' into merge-thin-ls

Conflicts:
	Makefile.in
	block-cache/block_cache.h
	main.cc
	thin-provisioning/commands.h
This commit is contained in:
Joe Thornber
2016-02-24 14:31:51 +00:00
25 changed files with 1477 additions and 25 deletions

View File

@ -0,0 +1,95 @@
#include "base/container_of.h"
#include "thin-provisioning/cache_stream.h"
#include "persistent-data/file_utils.h"
using namespace thin_provisioning;
using namespace std;
using namespace persistent_data;
//----------------------------------------------------------------
namespace {
int open_file(string const &path) {
int fd = ::open(path.c_str(), O_RDONLY | O_DIRECT | O_EXCL, 0666);
if (fd < 0)
syscall_failed("open",
"Note: you cannot run this tool with these options on live metadata.");
return fd;
}
}
//----------------------------------------------------------------
cache_stream::cache_stream(string const &path,
block_address block_size,
size_t cache_mem)
: block_size_(block_size),
nr_blocks_(get_nr_blocks(path, block_size)),
// hack because cache uses LRU rather than MRU
cache_blocks_((cache_mem / block_size) / 2u),
fd_(open_file(path)),
v_(new bcache::noop_validator()),
cache_(new block_cache(fd_, block_size / 512, nr_blocks_, cache_mem)),
current_index_(0) {
rewind();
}
block_address
cache_stream::size() const
{
return nr_blocks_ * block_size_;
}
void
cache_stream::rewind()
{
current_index_ = 0;
for (block_address i = 1; i < min(cache_blocks_, nr_blocks_); i++)
cache_->prefetch(i);
}
bool
cache_stream::next(block_address count)
{
current_index_ = min(current_index_ + count, nr_blocks_);
if (current_index_ + cache_blocks_ < nr_blocks_)
cache_->prefetch(current_index_ + cache_blocks_);
return !eof();
}
bool
cache_stream::eof() const
{
return current_index_ >= nr_blocks_;
}
chunk const &
cache_stream::get()
{
chunk_wrapper *w = new chunk_wrapper(*this);
return w->c_;
}
void
cache_stream::put(chunk const &c)
{
chunk_wrapper *w = base::container_of(const_cast<chunk *>(&c), &chunk_wrapper::c_);
delete w;
}
cache_stream::chunk_wrapper::chunk_wrapper(cache_stream &parent)
: block_(parent.cache_->get(parent.current_index_, 0, parent.v_))
{
c_.offset_ = parent.current_index_ * parent.block_size_;
c_.len_ = parent.block_size_;
c_.mem_.begin = static_cast<uint8_t *>(block_.get_data());
c_.mem_.end = c_.mem_.begin + parent.block_size_;
}
//----------------------------------------------------------------

View File

@ -0,0 +1,51 @@
#ifndef THIN_PROVISIONING_CACHE_STREAM_H
#define THIN_PROVISIONING_CACHE_STREAM_H
#include "thin-provisioning/chunk_stream.h"
//----------------------------------------------------------------
namespace thin_provisioning {
using namespace bcache;
class cache_stream : public chunk_stream {
public:
cache_stream(std::string const &path,
block_address block_size,
size_t cache_mem);
block_address size() const;
virtual void rewind();
virtual bool next(block_address count = 1ull);
virtual bool eof() const;
virtual chunk const &get();
virtual void put(chunk const &c);
private:
struct chunk_wrapper {
chunk_wrapper(cache_stream &parent);
block_cache::auto_block block_;
chunk c_;
};
friend class chunk_wrapper;
block_address block_size_;
block_address nr_blocks_;
block_address cache_blocks_;
int fd_;
validator::ptr v_;
std::auto_ptr<block_cache> cache_;
block_address current_index_;
};
}
//----------------------------------------------------------------
#endif

View File

@ -0,0 +1,9 @@
#include "thin-provisioning/chunk_stream.h"
using namespace std;
using namespace thin_provisioning;
//----------------------------------------------------------------
//----------------------------------------------------------------

View File

@ -0,0 +1,66 @@
// Copyright (C) 2015 Red Hat, Inc. All rights reserved.
//
// This file is part of the thin-provisioning-tools source.
//
// thin-provisioning-tools is free software: you can redistribute it
// and/or modify it under the terms of the GNU General Public License
// as published by the Free Software Foundation, either version 3 of
// the License, or (at your option) any later version.
//
// thin-provisioning-tools is distributed in the hope that it will be
// useful, but WITHOUT ANY WARRANTY; without even the implied warranty
// of MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
// GNU General Public License for more details.
//
// You should have received a copy of the GNU General Public License along
// with thin-provisioning-tools. If not, see
// <http://www.gnu.org/licenses/>.
#ifndef CHUNK_STREAM_H
#define CHUNK_STREAM_H
#include "block-cache/block_cache.h"
#include <deque>
#include <stdint.h>
//----------------------------------------------------------------
namespace thin_provisioning {
struct mem {
mem()
: begin(0),
end(0) {
}
mem(uint8_t *b, uint8_t *e)
: begin(b),
end(e) {
}
uint8_t *begin, *end;
};
struct chunk {
uint64_t offset_, len_;
mem mem_;
};
class chunk_stream {
public:
virtual ~chunk_stream() {}
virtual void rewind() = 0;
virtual bcache::block_address size() const = 0;
virtual bool next(bcache::block_address count = 1ull) = 0;
virtual bool eof() const = 0;
virtual chunk const &get() = 0;
virtual void put(chunk const &c) = 0;
};
}
//----------------------------------------------------------------
#endif

View File

@ -18,8 +18,9 @@ thin_provisioning::register_thin_commands(base::application &app)
app.add_cmd(command::ptr(new thin_rmap_cmd()));
app.add_cmd(command::ptr(new thin_trim_cmd()));
#if DEV_COMMANDS
#ifdef DEV_TOOLS
app.add_cmd(command::ptr(new thin_generate_metadata_cmd()));
app.add_cmd(command::ptr(new thin_show_duplicates_cmd()));
#endif
}

View File

@ -70,6 +70,13 @@ namespace thin_provisioning {
virtual int run(int argc, char **argv);
};
class thin_show_duplicates_cmd : public base::command {
public:
thin_show_duplicates_cmd();
virtual void usage(std::ostream &out) const;
virtual int run(int argc, char **argv);
};
class thin_generate_metadata_cmd : public base::command {
public:
thin_generate_metadata_cmd();

View File

@ -0,0 +1,113 @@
#include "thin-provisioning/fixed_chunk_stream.h"
using namespace thin_provisioning;
//----------------------------------------------------------------
fixed_chunk_stream::fixed_chunk_stream(chunk_stream &stream, unsigned chunk_size)
: index_(0),
stream_(stream),
chunk_size_(chunk_size),
big_chunk_(0) {
next_big_chunk();
}
fixed_chunk_stream::~fixed_chunk_stream()
{
put_big_chunk();
}
bcache::block_address
fixed_chunk_stream::size() const
{
return stream_.size();
}
void
fixed_chunk_stream::rewind()
{
// FIXME: not complete
index_ = 0;
stream_.rewind();
}
bool
fixed_chunk_stream::next(bcache::block_address count)
{
while (count--) {
index_++;
advance_one();
}
return !eof();
}
bool
fixed_chunk_stream::eof() const
{
return stream_.eof();
}
chunk const &
fixed_chunk_stream::get()
{
assert(big_chunk_);
little_chunk_.len_ = little_e_ - little_b_;
little_chunk_.offset_ = big_chunk_->offset_ + little_chunk_.len_;
little_chunk_.mem_.begin = little_b_;
little_chunk_.mem_.end = little_e_;
return little_chunk_;
}
void
fixed_chunk_stream::put(chunk const &c)
{
// noop
}
bool
fixed_chunk_stream::next_big_chunk()
{
put_big_chunk();
if (!stream_.next())
return false;
big_chunk_ = &stream_.get();
little_b_ = little_e_ = last_hashed_ = big_chunk_->mem_.begin;
return true;
}
bool
fixed_chunk_stream::advance_one()
{
uint8_t *big_e;
big_e = big_chunk_->mem_.end;
little_b_ = little_e_;
if (little_b_ >= big_e) {
if (next_big_chunk())
big_e = big_chunk_->mem_.end;
else
return false;
}
little_e_ += chunk_size_;
return true;
}
void
fixed_chunk_stream::put_big_chunk()
{
if (big_chunk_)
stream_.put(*big_chunk_);
big_chunk_ = 0;
}
//----------------------------------------------------------------

View File

@ -0,0 +1,39 @@
#ifndef THIN_PROVISIONING_FIXED_CHUNK_STREAM_H
#define THIN_PROVISIONING_FIXED_CHUNK_STREAM_H
#include "thin-provisioning/chunk_stream.h"
//----------------------------------------------------------------
namespace thin_provisioning {
class fixed_chunk_stream : public chunk_stream {
public:
fixed_chunk_stream(chunk_stream &stream, unsigned chunk_size);
~fixed_chunk_stream();
virtual bcache::block_address size() const;
virtual void rewind();
virtual bool next(bcache::block_address count = 1ull);
virtual bool eof() const;
virtual chunk const &get();
virtual void put(chunk const &c);
private:
bool next_big_chunk();
bool advance_one();
void put_big_chunk();
bcache::block_address index_;
chunk_stream &stream_;
unsigned chunk_size_;
chunk const *big_chunk_;
uint8_t *little_b_, *little_e_, *last_hashed_;
chunk little_chunk_;
};
}
//----------------------------------------------------------------
#endif

View File

@ -0,0 +1,151 @@
// Copyright (C) 2015 Red Hat, Inc. All rights reserved.
//
// This file is part of the thin-provisioning-tools source.
//
// thin-provisioning-tools is free software: you can redistribute it
// and/or modify it under the terms of the GNU General Public License
// as published by the Free Software Foundation, either version 3 of
// the License, or (at your option) any later version.
//
// thin-provisioning-tools is distributed in the hope that it will be
// useful, but WITHOUT ANY WARRANTY; without even the implied warranty
// of MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
// GNU General Public License for more details.
//
// You should have received a copy of the GNU General Public License along
// with thin-provisioning-tools. If not, see
// <http://www.gnu.org/licenses/>.
#include "thin-provisioning/pool_stream.h"
#include "persistent-data/data-structures/btree_damage_visitor.h"
using namespace thin_provisioning;
using namespace persistent_data;
//----------------------------------------------------------------
namespace {
class damage_visitor {
public:
virtual void visit(btree_path const &path, btree_detail::damage const &d) {
throw std::runtime_error("damage in mapping tree, please run thin_check");
}
};
uint32_t const UNMAPPED = -1;
}
//----------------------------------------------------------------
pool_stream::pool_stream(cache_stream &stream,
transaction_manager::ptr tm, superblock_detail::superblock const &sb,
block_address nr_blocks)
: stream_(stream),
block_to_thin_(nr_blocks, UNMAPPED),
nr_mapped_(0),
index_(0),
block_size_(sb.data_block_size_ * 512)
{
init_rmap(tm, sb, nr_blocks);
}
block_address
pool_stream::size() const
{
return nr_mapped_ * block_size_;
}
void
pool_stream::rewind()
{
stream_.rewind();
index_ = 0;
}
bool
pool_stream::next(block_address count)
{
while (count--)
if (!advance_one())
return false;
return true;
}
bool
pool_stream::eof() const
{
return stream_.eof();
}
chunk const &
pool_stream::get()
{
return stream_.get();
}
void
pool_stream::put(chunk const &c)
{
stream_.put(c);
}
// FIXME: too big to return by value
vector<pool_stream::rmap_region>
pool_stream::read_rmap(transaction_manager::ptr tm,
superblock_detail::superblock const &sb,
block_address nr_blocks)
{
damage_visitor dv;
rmap_visitor rv;
mapping_tree mtree(*tm, sb.data_mapping_root_,
mapping_tree_detail::block_traits::ref_counter(tm->get_sm()));
rv.add_data_region(rmap_visitor::region(0, nr_blocks));
btree_visit_values(mtree, rv, dv);
rv.complete();
cerr << "rmap size: " << rv.get_rmap().size() << "\n";
return rv.get_rmap();
}
void
pool_stream::init_rmap(transaction_manager::ptr tm,
superblock_detail::superblock const &sb,
block_address nr_blocks)
{
cerr << "reading rmap...";
vector<rmap_region> rmap = read_rmap(tm, sb, nr_blocks);
cerr << "done\n";
vector<rmap_region>::const_iterator it;
set<uint32_t> thins;
for (it = rmap.begin(); it != rmap.end(); ++it) {
rmap_region const &r = *it;
for (block_address b = r.data_begin; b != r.data_end; b++)
if (block_to_thin_[b] == UNMAPPED) {
nr_mapped_++;
block_to_thin_[b] = r.thin_dev;
}
thins.insert(r.thin_dev);
}
cerr << nr_mapped_ << " mapped blocks\n";
cerr << "there are " << thins.size() << " thin devices\n";
}
bool
pool_stream::advance_one()
{
block_address count = 1;
while (((index_ + count) < block_to_thin_.size()) &&
(block_to_thin_[index_ + count] == UNMAPPED))
count++;
index_ += count;
return stream_.next(count);
}
//----------------------------------------------------------------

View File

@ -0,0 +1,65 @@
// Copyright (C) 2015 Red Hat, Inc. All rights reserved.
//
// This file is part of the thin-provisioning-tools source.
//
// thin-provisioning-tools is free software: you can redistribute it
// and/or modify it under the terms of the GNU General Public License
// as published by the Free Software Foundation, either version 3 of
// the License, or (at your option) any later version.
//
// thin-provisioning-tools is distributed in the hope that it will be
// useful, but WITHOUT ANY WARRANTY; without even the implied warranty
// of MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
// GNU General Public License for more details.
//
// You should have received a copy of the GNU General Public License along
// with thin-provisioning-tools. If not, see
// <http://www.gnu.org/licenses/>.
#ifndef POOL_STREAM_H
#define POOL_STREAM_H
#include "thin-provisioning/cache_stream.h"
#include "thin-provisioning/rmap_visitor.h"
#include "thin-provisioning/superblock.h"
//----------------------------------------------------------------
namespace thin_provisioning {
class pool_stream : public chunk_stream {
public:
pool_stream(cache_stream &stream,
transaction_manager::ptr tm, superblock_detail::superblock const &sb,
block_address nr_blocks);
block_address size() const;
void rewind();
bool next(block_address count = 1ull);
bool eof() const;
chunk const &get();
void put(chunk const &c);
private:
typedef rmap_visitor::region region;
typedef rmap_visitor::rmap_region rmap_region;
// FIXME: too big to return by value
vector<rmap_region> read_rmap(transaction_manager::ptr tm,
superblock_detail::superblock const &sb,
block_address nr_blocks);
void init_rmap(transaction_manager::ptr tm, superblock_detail::superblock const &sb,
block_address nr_blocks);
bool advance_one();
cache_stream &stream_;
vector<uint32_t> block_to_thin_;
block_address nr_mapped_;
block_address index_;
block_address block_size_;
};
}
//----------------------------------------------------------------
#endif

View File

@ -0,0 +1,358 @@
// Copyright (C) 2015 Red Hat, Inc. All rights reserved.
//
// This file is part of the thin-provisioning-tools source.
//
// thin-provisioning-tools is free software: you can redistribute it
// and/or modify it under the terms of the GNU General Public License
// as published by the Free Software Foundation, either version 3 of
// the License, or (at your option) any later version.
//
// thin-provisioning-tools is distributed in the hope that it will be
// useful, but WITHOUT ANY WARRANTY; without even the implied warranty
// of MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
// GNU General Public License for more details.
//
// You should have received a copy of the GNU General Public License along
// with thin-provisioning-tools. If not, see
// <http://www.gnu.org/licenses/>.
#include <iostream>
#include <getopt.h>
#include <libgen.h>
#include "version.h"
#include "base/application.h"
#include "base/error_state.h"
#include "base/progress_monitor.h"
#include "persistent-data/data-structures/btree_damage_visitor.h"
#include "persistent-data/file_utils.h"
#include "persistent-data/space-maps/core.h"
#include "persistent-data/space-maps/disk.h"
#include "thin-provisioning/cache_stream.h"
#include "thin-provisioning/fixed_chunk_stream.h"
#include "thin-provisioning/pool_stream.h"
#include "thin-provisioning/commands.h"
#include "thin-provisioning/device_tree.h"
#include "thin-provisioning/mapping_tree.h"
#include "thin-provisioning/rmap_visitor.h"
#include "thin-provisioning/superblock.h"
#include "thin-provisioning/variable_chunk_stream.h"
#include <boost/uuid/sha1.hpp>
#include <boost/lexical_cast.hpp>
#include <boost/optional.hpp>
#include <deque>
#include <vector>
using namespace base;
using namespace boost;
using namespace persistent_data;
using namespace std;
using namespace thin_provisioning;
//----------------------------------------------------------------
namespace {
bool factor_of(block_address f, block_address n) {
return (n % f) == 0;
}
block_manager<>::ptr
open_bm(string const &path) {
block_address nr_blocks = get_nr_blocks(path);
block_manager<>::mode m = block_manager<>::READ_ONLY;
return block_manager<>::ptr(new block_manager<>(path, nr_blocks, 1, m));
}
transaction_manager::ptr
open_tm(block_manager<>::ptr bm) {
space_map::ptr sm(new core_map(bm->get_nr_blocks()));
sm->inc(superblock_detail::SUPERBLOCK_LOCATION);
transaction_manager::ptr tm(new transaction_manager(bm, sm));
return tm;
}
uint64_t parse_int(string const &str, string const &desc) {
try {
return boost::lexical_cast<uint64_t>(str);
} catch (...) {
ostringstream out;
out << "Couldn't parse " << desc << ": '" << str << "'";
exit(1);
}
return 0; // never get here
}
//--------------------------------
struct flags {
flags()
: cache_mem(64 * 1024 * 1024),
content_based_chunks(false) {
}
string data_dev;
optional<string> metadata_dev;
optional<unsigned> block_size;
unsigned cache_mem;
bool content_based_chunks;
};
using namespace mapping_tree_detail;
class duplicate_counter {
public:
duplicate_counter()
: non_zero_dups_(0),
zero_dups_(0) {
}
void add_duplicate(block_address len) {
non_zero_dups_ += len;
}
void add_zero_duplicate(block_address len) {
zero_dups_ += len;
}
block_address get_total() const {
return non_zero_dups_ + zero_dups_;
}
block_address get_non_zeroes() const {
return non_zero_dups_;
}
block_address get_zeroes() const {
return zero_dups_;
}
void display_results(chunk_stream const &stream) const {
block_address meg = 1024 * 1024;
cout << "\n\n"
<< stream.size() / meg << "m examined, "
<< get_non_zeroes() / meg << "m duplicates, "
<< get_zeroes() / meg << "m zeroes\n";
}
private:
block_address non_zero_dups_;
block_address zero_dups_;
};
class duplicate_detector {
public:
void scan_with_variable_sized_chunks(chunk_stream &stream) {
variable_chunk_stream vstream(stream, 4096);
scan(vstream);
}
void scan_with_fixed_sized_chunks(chunk_stream &stream, block_address chunk_size) {
fixed_chunk_stream fstream(stream, chunk_size);
scan(fstream);
}
duplicate_counter const &get_results() const {
return results_;
}
private:
void scan(chunk_stream &stream) {
block_address total_seen(0);
unique_ptr<progress_monitor> pbar = create_progress_bar("Examining data");
do {
// FIXME: use a wrapper class to automate the put()
chunk const &c = stream.get();
examine(c);
stream.put(c);
total_seen += c.len_;
pbar->update_percent((total_seen * 100) / stream.size());
} while (stream.next());
pbar->update_percent(100);
results_.display_results(stream);
}
void examine(chunk const &c) {
if (all_zeroes(c))
results_.add_zero_duplicate(c.len_);
else {
digestor_.reset();
digestor_.process_bytes(c.mem_.begin, c.mem_.end - c.mem_.begin);
unsigned int digest[5];
digestor_.get_digest(digest);
// hack
vector<unsigned int> v(5);
for (unsigned i = 0; i < 5; i++)
v[i] = digest[i];
fingerprint_map::const_iterator it = fm_.find(v);
if (it != fm_.end()) {
results_.add_duplicate(c.len_);
} else
fm_.insert(make_pair(v, c.offset_));
}
}
bool all_zeroes(chunk const &c) const {
for (uint8_t *ptr = c.mem_.begin; ptr != c.mem_.end; ptr++) {
if (*ptr != 0)
return false;
}
return true;
}
typedef map<vector<unsigned int>, block_address> fingerprint_map;
unsigned block_size_;
boost::uuids::detail::sha1 digestor_;
fingerprint_map fm_;
duplicate_counter results_;
};
int show_dups_pool(flags const &fs) {
block_manager<>::ptr bm = open_bm(*fs.metadata_dev);
transaction_manager::ptr tm = open_tm(bm);
superblock_detail::superblock sb = read_superblock(bm);
block_address block_size = sb.data_block_size_ * 512;
block_address nr_blocks = get_nr_blocks(fs.data_dev, block_size);
cache_stream stream(fs.data_dev, block_size, fs.cache_mem);
pool_stream pstream(stream, tm, sb, nr_blocks);
duplicate_detector detector;
if (fs.content_based_chunks)
detector.scan_with_variable_sized_chunks(pstream);
else {
if (*fs.block_size) {
if (factor_of(*fs.block_size, block_size))
block_size = *fs.block_size;
else
throw runtime_error("specified block size is not a factor of the pool chunk size\n");
}
detector.scan_with_fixed_sized_chunks(pstream, block_size);
}
return 0;
}
int show_dups_linear(flags const &fs) {
if (!fs.block_size)
// FIXME: this check should be moved to the switch parsing
throw runtime_error("--block-sectors or --metadata-dev must be supplied");
block_address block_size = *fs.block_size;
block_address nr_blocks = get_nr_blocks(fs.data_dev, *fs.block_size);
cerr << "path = " << fs.data_dev << "\n";
cerr << "nr_blocks = " << nr_blocks << "\n";
cerr << "block size = " << block_size << "\n";
cache_stream stream(fs.data_dev, block_size, fs.cache_mem);
duplicate_detector dd;
if (fs.content_based_chunks)
dd.scan_with_variable_sized_chunks(stream);
else
dd.scan_with_fixed_sized_chunks(stream, block_size);
return 0;
}
int show_dups(flags const &fs) {
if (fs.metadata_dev)
return show_dups_pool(fs);
else {
cerr << "No metadata device provided, so treating data device as a linear device\n";
return show_dups_linear(fs);
}
}
}
//----------------------------------------------------------------
thin_show_duplicates_cmd::thin_show_duplicates_cmd()
: command("thin_show_duplicates")
{
}
void
thin_show_duplicates_cmd::usage(std::ostream &out) const
{
out << "Usage: " << get_name() << " [options] {device|file}\n"
<< "Options:\n"
<< " {--block-sectors} <integer>\n"
<< " {--content-based-chunks}\n"
<< " {--metadata-dev} <path>\n"
<< " {-h|--help}\n"
<< " {-V|--version}" << endl;
}
int
thin_show_duplicates_cmd::run(int argc, char **argv)
{
int c;
flags fs;
char const shortopts[] = "qhV";
option const longopts[] = {
{ "block-sectors", required_argument, NULL, 1},
{ "content-based-chunks", no_argument, NULL, 2},
{ "metadata-dev", required_argument, NULL, 3},
{ "help", no_argument, NULL, 'h'},
{ "version", no_argument, NULL, 'V'},
{ NULL, no_argument, NULL, 0 }
};
while ((c = getopt_long(argc, argv, shortopts, longopts, NULL)) != -1) {
switch(c) {
case 'h':
usage(cout);
return 0;
case 'V':
cout << THIN_PROVISIONING_TOOLS_VERSION << endl;
return 0;
case 1:
fs.block_size = 512 * parse_int(optarg, "block sectors");
break;
case 2:
fs.content_based_chunks = true;
break;
case 3:
fs.metadata_dev = optarg;
break;
default:
usage(cerr);
return 1;
}
}
if (argc == optind) {
cerr << "No data device/file provided." << endl;
usage(cerr);
exit(1);
}
fs.data_dev = argv[optind];
return show_dups(fs);
}
//----------------------------------------------------------------

View File

@ -0,0 +1,133 @@
#include "thin-provisioning/variable_chunk_stream.h"
using namespace boost;
using namespace std;
using namespace thin_provisioning;
//----------------------------------------------------------------
variable_chunk_stream::variable_chunk_stream(chunk_stream &stream, unsigned window_size)
: index_(0),
h_(window_size),
stream_(stream),
big_chunk_(0) {
next_big_chunk();
}
variable_chunk_stream::~variable_chunk_stream()
{
put_big_chunk();
}
bcache::block_address
variable_chunk_stream::size() const
{
return stream_.size();
}
void
variable_chunk_stream::rewind()
{
// FIXME: not complete
index_ = 0;
stream_.rewind();
h_.reset();
}
bool
variable_chunk_stream::next(bcache::block_address count)
{
while (count--) {
index_++;
advance_one();
}
return !eof();
}
bool
variable_chunk_stream::eof() const
{
return stream_.eof();
}
chunk const &
variable_chunk_stream::get()
{
assert(big_chunk_);
little_chunk_.len_ = little_e_ - little_b_;
little_chunk_.offset_ = big_chunk_->offset_ + little_chunk_.len_;
little_chunk_.mem_.begin = little_b_;
little_chunk_.mem_.end = little_e_;
return little_chunk_;
}
void
variable_chunk_stream::put(chunk const &c)
{
// noop
}
bool
variable_chunk_stream::next_big_chunk()
{
put_big_chunk();
if (!stream_.next())
return false;
big_chunk_ = &stream_.get();
little_b_ = little_e_ = last_hashed_ = big_chunk_->mem_.begin;
h_.reset();
return true;
}
bool
variable_chunk_stream::advance_one()
{
uint8_t *big_e;
big_e = big_chunk_->mem_.end;
little_b_ = little_e_;
little_e_ = last_hashed_;
if (little_b_ == big_e) {
if (next_big_chunk())
big_e = big_chunk_->mem_.end;
else
return false;
}
while (little_e_ != big_e) {
optional<unsigned> maybe_break = h_.step(*little_e_);
little_e_++;
if (maybe_break) {
// The break is not neccessarily at the current
// byte.
last_hashed_ = little_e_;
little_e_ = little_b_ + *maybe_break;
break;
}
}
if (little_e_ == big_e)
last_hashed_ = little_e_;
return true;
}
void
variable_chunk_stream::put_big_chunk()
{
if (big_chunk_)
stream_.put(*big_chunk_);
big_chunk_ = 0;
}
//----------------------------------------------------------------

View File

@ -0,0 +1,41 @@
#ifndef THIN_PROVISIONING_VARIABLE_CHUNK_STREAM_H
#define THIN_PROVISIONING_VARIABLE_CHUNK_STREAM_H
#include "base/rolling_hash.h"
#include "thin-provisioning/chunk_stream.h"
//----------------------------------------------------------------
namespace thin_provisioning {
class variable_chunk_stream : public chunk_stream {
public:
// window_size must be a power of 2
variable_chunk_stream(chunk_stream &stream, unsigned window_size);
~variable_chunk_stream();
virtual bcache::block_address size() const;
virtual void rewind();
virtual bool next(bcache::block_address count = 1ull);
virtual bool eof() const;
virtual chunk const &get();
virtual void put(chunk const &c);
private:
bool next_big_chunk();
bool advance_one();
void put_big_chunk();
bcache::block_address index_;
base::content_based_hash h_;
chunk_stream &stream_;
chunk const *big_chunk_;
uint8_t *little_b_, *little_e_, *last_hashed_;
chunk little_chunk_;
};
}
//----------------------------------------------------------------
#endif