From 5d383c029334bb8d85dee5c81c40741c2975cf01 Mon Sep 17 00:00:00 2001 From: Joe Thornber Date: Fri, 4 Sep 2015 10:10:41 +0100 Subject: [PATCH] [thin_show_dups] get the backup break working in the rolling hash --- base/rolling_hash.cc | 23 ---------------------- thin-provisioning/variable_chunk_stream.cc | 20 +++++++------------ thin-provisioning/variable_chunk_stream.h | 2 +- 3 files changed, 8 insertions(+), 37 deletions(-) diff --git a/base/rolling_hash.cc b/base/rolling_hash.cc index 1ea362f..9c6e1bf 100644 --- a/base/rolling_hash.cc +++ b/base/rolling_hash.cc @@ -82,7 +82,6 @@ content_based_hash::reset() optional content_based_hash::step(uint8_t byte) { -#if 0 optional r; rhash_.step(byte); @@ -114,28 +113,6 @@ content_based_hash::step(uint8_t byte) } return r; -#else - optional r; - - rhash_.step(byte); - len_++; - - if (len_ < min_len_) - return r; - - if (hit_break(div_)) { - // found a break - r = len_; - len_ = 0; - backup_break_.reset(); - - } else if (len_ >= max_len_) { - r = len_; - len_ = 0; - } - - return r; -#endif } bool diff --git a/thin-provisioning/variable_chunk_stream.cc b/thin-provisioning/variable_chunk_stream.cc index 9a9d11e..41c6c96 100644 --- a/thin-provisioning/variable_chunk_stream.cc +++ b/thin-provisioning/variable_chunk_stream.cc @@ -80,7 +80,7 @@ variable_chunk_stream::next_big_chunk() return false; big_chunk_ = &stream_.get(); - little_b_ = little_e_ = big_chunk_->mem_.front().begin; + little_b_ = little_e_ = last_hashed_ = big_chunk_->mem_.front().begin; h_.reset(); return true; @@ -95,6 +95,7 @@ variable_chunk_stream::advance_one() big_e = big_chunk_->mem_.front().end; little_b_ = little_e_; + little_e_ = last_hashed_; if (little_b_ == big_e) { if (next_big_chunk()) @@ -105,35 +106,28 @@ variable_chunk_stream::advance_one() assert(little_e_ >= big_chunk_->mem_.front().begin); assert(little_b_ >= big_chunk_->mem_.front().begin); -#if 1 - if (little_e_ > big_e) { - cerr << "before -- little_e_: " << (void *) little_e_ << ", big_e: " << (void *) big_e << "\n"; - } -#endif assert(little_e_ <= big_e); assert(little_b_ <= big_e); while (little_e_ != big_e) { optional maybe_break = h_.step(*little_e_); + little_e_++; if (maybe_break) { // The break is not neccessarily at the current // byte. + last_hashed_ = little_e_; little_e_ = little_b_ + *maybe_break; break; } - - little_e_++; } + if (little_e_ == big_e) + last_hashed_ = little_e_; + assert(little_e_ >= big_chunk_->mem_.front().begin); assert(little_b_ >= big_chunk_->mem_.front().begin); -#if 1 - if (little_e_ > big_e) { - cerr << "after -- little_e_: " << (void *) little_e_ << ", big_e: " << (void *) big_e << "\n"; - } -#endif assert(little_e_ <= big_e); assert(little_b_ <= big_e); diff --git a/thin-provisioning/variable_chunk_stream.h b/thin-provisioning/variable_chunk_stream.h index 0327f1d..f9c5ec7 100644 --- a/thin-provisioning/variable_chunk_stream.h +++ b/thin-provisioning/variable_chunk_stream.h @@ -32,7 +32,7 @@ namespace thin_provisioning { chunk_stream &stream_; chunk const *big_chunk_; - uint8_t *little_b_, *little_e_; + uint8_t *little_b_, *little_e_, *last_hashed_; chunk little_chunk_; }; }