Merge branch '2015-08-19-thin-show-duplicates' into merge-thin-ls

Conflicts:
	Makefile.in
	block-cache/block_cache.h
	main.cc
	thin-provisioning/commands.h
This commit is contained in:
Joe Thornber
2016-02-24 14:31:51 +00:00
25 changed files with 1477 additions and 25 deletions

View File

@@ -31,17 +31,22 @@ namespace {
if (nr_equals < progress_width_)
cout << '>';
else
cout << "=";
for (unsigned i = 0; i < nr_spaces; i++)
cout << ' ';
cout << "] " << spinner_char() << " " << p << "%\r" << flush;
cout << "] " << spinner_char(p) << " " << p << "%\r" << flush;
spinner_++;
}
private:
char spinner_char() const {
char spinner_char(unsigned p) const {
if (p == 100)
return ' ';
char cs[] = {'|', '/', '-', '\\'};
unsigned index = spinner_ % sizeof(cs);

57
base/rolling_hash.cc Normal file
View File

@@ -0,0 +1,57 @@
#include "base/rolling_hash.h"
using namespace base;
using namespace boost;
using namespace hash_detail;
using namespace std;
//----------------------------------------------------------------
rolling_hash::rolling_hash(unsigned window_size)
: a_(MULTIPLIER),
a_to_k_minus_1_(a_),
window_size_(window_size),
buffer_(window_size) {
for (unsigned i = 1; i < window_size_ - 1; i++)
a_to_k_minus_1_ *= a_;
reset();
}
void
rolling_hash::reset()
{
// prime with zeroes
buffer_.clear();
hash_ = 0;
for (unsigned i = 0; i < window_size_; i++) {
hash_ = (hash_ * a_) + SEED;
buffer_.push_back(0);
}
}
//--------------------------------
content_based_hash::content_based_hash(unsigned window_size)
: rhash_(window_size),
// FIXME: hard coded values
backup_div_((window_size / 4) - 1),
div_((window_size / 2) - 1),
min_len_(window_size / 4),
max_len_(window_size),
len_(0)
{
}
void
content_based_hash::reset()
{
len_ = 0;
backup_break_.reset();
rhash_.reset();
}
//----------------------------------------------------------------

109
base/rolling_hash.h Normal file
View File

@@ -0,0 +1,109 @@
#ifndef BASE_ROLLING_HASH_H
#define BASE_ROLLING_HASH_H
#include <boost/circular_buffer.hpp>
#include <stdint.h>
#include <boost/optional.hpp>
//----------------------------------------------------------------
namespace base {
namespace hash_detail {
uint32_t const MULTIPLIER = 4294967291UL;
uint32_t const SEED = 123;
}
class rolling_hash {
public:
rolling_hash(unsigned window_size);
void reset();
// Returns the current hash
uint32_t step(uint8_t byte) {
update_hash(byte);
return hash_;
}
uint32_t get_hash() const {
return hash_;
}
private:
void update_hash(uint8_t byte) {
hash_ -= a_to_k_minus_1_ * (buffer_.front() + hash_detail::SEED);
buffer_.push_back(byte);
hash_ = (hash_ * a_) + byte + hash_detail::SEED;
}
uint32_t a_;
uint32_t a_to_k_minus_1_;
uint32_t hash_;
uint32_t window_size_;
boost::circular_buffer<uint8_t> buffer_;
};
class content_based_hash {
public:
content_based_hash(unsigned window_size);
void reset();
// Returns a break point relative to the last reset/break.
boost::optional<unsigned> step(uint8_t byte) {
boost::optional<unsigned> r;
rhash_.step(byte);
len_++;
if (len_ < min_len_)
return r;
if (hit_break(backup_div_))
backup_break_ = len_;
if (hit_break(div_)) {
// found a break
r = len_;
len_ = 0;
backup_break_.reset();
} else if (len_ >= max_len_) {
// too big, is there a backup?
if (backup_break_) {
len_ -= *backup_break_;
r = backup_break_;
backup_break_.reset();
} else {
r = len_;
len_ = 0;
}
}
return r;
}
private:
bool hit_break(uint32_t mask) const {
uint32_t h = rhash_.get_hash() >> 8;
return !(h & mask);
}
rolling_hash rhash_;
uint32_t backup_div_;
uint32_t div_;
unsigned min_len_;
unsigned max_len_;
unsigned len_;
boost::optional<unsigned> backup_break_;
};
}
//----------------------------------------------------------------
#endif