[thin_show_dups] inline some hash functions

This commit is contained in:
Joe Thornber 2015-09-04 11:10:19 +01:00
parent 7633c5d7ae
commit 506b0a8a08
2 changed files with 56 additions and 75 deletions

View File

@ -2,15 +2,11 @@
using namespace base;
using namespace boost;
using namespace hash_detail;
using namespace std;
//----------------------------------------------------------------
namespace {
uint32_t MULTIPLIER = 4294967291UL;
uint32_t SEED = 123;
}
rolling_hash::rolling_hash(unsigned window_size)
: a_(MULTIPLIER),
a_to_k_minus_1_(a_),
@ -35,28 +31,6 @@ rolling_hash::reset()
}
}
uint32_t
rolling_hash::step(uint8_t byte)
{
update_hash(byte);
return hash_;
}
uint32_t
rolling_hash::get_hash() const
{
return hash_;
}
void
rolling_hash::update_hash(uint8_t byte)
{
hash_ -= a_to_k_minus_1_ * (chars_.front() + SEED);
chars_.pop_front();
chars_.push_back(byte);
hash_ = (hash_ * a_) + byte + SEED;
}
//--------------------------------
content_based_hash::content_based_hash(unsigned window_size)
@ -79,47 +53,4 @@ content_based_hash::reset()
rhash_.reset();
}
optional<unsigned>
content_based_hash::step(uint8_t byte)
{
optional<unsigned> r;
rhash_.step(byte);
len_++;
if (len_ < min_len_)
return r;
if (hit_break(backup_div_))
backup_break_ = len_;
if (hit_break(div_)) {
// found a break
r = len_;
len_ = 0;
backup_break_.reset();
} else if (len_ >= max_len_) {
// too big, is there a backup?
if (backup_break_) {
len_ -= *backup_break_;
r = backup_break_;
backup_break_.reset();
} else {
r = len_;
len_ = 0;
}
}
return r;
}
bool
content_based_hash::hit_break(uint32_t mask) const
{
uint32_t h = rhash_.get_hash() >> 8;
return !(h & mask);
}
//----------------------------------------------------------------

View File

@ -8,6 +8,11 @@
//----------------------------------------------------------------
namespace base {
namespace hash_detail {
uint32_t const MULTIPLIER = 4294967291UL;
uint32_t const SEED = 123;
}
class rolling_hash {
public:
rolling_hash(unsigned window_size);
@ -15,12 +20,22 @@ namespace base {
void reset();
// Returns the current hash
uint32_t step(uint8_t byte);
uint32_t step(uint8_t byte) {
update_hash(byte);
return hash_;
}
uint32_t get_hash() const;
uint32_t get_hash() const {
return hash_;
}
private:
void update_hash(uint8_t byte);
void update_hash(uint8_t byte) {
hash_ -= a_to_k_minus_1_ * (chars_.front() + hash_detail::SEED);
chars_.pop_front();
chars_.push_back(byte);
hash_ = (hash_ * a_) + byte + hash_detail::SEED;
}
uint32_t a_;
uint32_t a_to_k_minus_1_;
@ -38,10 +53,45 @@ namespace base {
void reset();
// Returns a break point relative to the last reset/break.
boost::optional<unsigned> step(uint8_t byte);
boost::optional<unsigned> step(uint8_t byte) {
boost::optional<unsigned> r;
rhash_.step(byte);
len_++;
if (len_ < min_len_)
return r;
if (hit_break(backup_div_))
backup_break_ = len_;
if (hit_break(div_)) {
// found a break
r = len_;
len_ = 0;
backup_break_.reset();
} else if (len_ >= max_len_) {
// too big, is there a backup?
if (backup_break_) {
len_ -= *backup_break_;
r = backup_break_;
backup_break_.reset();
} else {
r = len_;
len_ = 0;
}
}
return r;
}
private:
bool hit_break(uint32_t div) const;
bool hit_break(uint32_t mask) const {
uint32_t h = rhash_.get_hash() >> 8;
return !(h & mask);
}
rolling_hash rhash_;