154 lines
2.8 KiB
C++
154 lines
2.8 KiB
C++
#include "gmock/gmock.h"
|
|
|
|
#include "base/rolling_hash.h"
|
|
|
|
using namespace base;
|
|
using namespace boost;
|
|
using namespace std;
|
|
using namespace testing;
|
|
|
|
//----------------------------------------------------------------
|
|
|
|
namespace {
|
|
class RollingHashTests : public Test {
|
|
public:
|
|
RollingHashTests()
|
|
: window_size_(4096),
|
|
rhash_(window_size_) {
|
|
}
|
|
|
|
typedef vector<uint8_t> bytes;
|
|
bytes random_bytes(unsigned count) {
|
|
bytes v(count, 0);
|
|
|
|
for (unsigned i = 0; i < count; i++)
|
|
v[i] = random_byte();
|
|
|
|
return v;
|
|
}
|
|
|
|
uint8_t random_byte() const {
|
|
return random() % 256;
|
|
}
|
|
|
|
void apply_bytes(bytes const &bs) {
|
|
for (unsigned i = 0; i < bs.size(); i++)
|
|
rhash_.step(bs[i]);
|
|
}
|
|
|
|
unsigned window_size_;
|
|
rolling_hash rhash_;
|
|
};
|
|
|
|
class ContentBasedHashTests : public Test {
|
|
public:
|
|
ContentBasedHashTests()
|
|
: window_size_(8192),
|
|
h_(window_size_) {
|
|
}
|
|
|
|
typedef vector<uint8_t> bytes;
|
|
bytes random_bytes(unsigned count) {
|
|
bytes v(count, 0);
|
|
|
|
for (unsigned i = 0; i < count; i++)
|
|
v[i] = random_byte();
|
|
|
|
return v;
|
|
}
|
|
|
|
uint8_t random_byte() const {
|
|
return random() % 256;
|
|
}
|
|
|
|
unsigned window_size_;
|
|
content_based_hash h_;
|
|
};
|
|
}
|
|
|
|
//----------------------------------------------------------------
|
|
|
|
TEST_F(RollingHashTests, ctr)
|
|
{
|
|
}
|
|
|
|
//--------------------------------
|
|
|
|
TEST_F(RollingHashTests, hash_changes)
|
|
{
|
|
bytes bs = random_bytes(window_size_ * 100);
|
|
|
|
uint32_t prev = rhash_.get_hash();
|
|
for (unsigned i = 0; i < bs.size(); i++) {
|
|
rhash_.step(bs[i]);
|
|
ASSERT_NE(rhash_.get_hash(), prev);
|
|
prev = rhash_.get_hash();
|
|
}
|
|
}
|
|
|
|
TEST_F(RollingHashTests, hash_repeats)
|
|
{
|
|
bytes bs = random_bytes(window_size_);
|
|
|
|
apply_bytes(bs);
|
|
uint32_t h1 = rhash_.get_hash();
|
|
apply_bytes(bs);
|
|
|
|
ASSERT_EQ(rhash_.get_hash(), h1);
|
|
}
|
|
|
|
TEST_F(RollingHashTests, reset_is_deterministic)
|
|
{
|
|
uint8_t bytes[] = "lksdfuwerh,sdg";
|
|
|
|
for (unsigned i = 0; i < sizeof(bytes) - 1; i++)
|
|
rhash_.step(bytes[i]);
|
|
|
|
uint32_t h1 = rhash_.get_hash();
|
|
|
|
rhash_.reset();
|
|
|
|
for (unsigned i = 0; i < sizeof(bytes) - 1; i++)
|
|
rhash_.step(bytes[i]);
|
|
|
|
uint32_t h2 = rhash_.get_hash();
|
|
|
|
ASSERT_EQ(h1, h2);
|
|
}
|
|
|
|
//----------------------------------------------------------------
|
|
|
|
TEST_F(ContentBasedHashTests, ctr)
|
|
{
|
|
}
|
|
|
|
TEST_F(ContentBasedHashTests, chunk_limits_respected)
|
|
{
|
|
unsigned min = 100000, max = 0;
|
|
|
|
bytes bs = random_bytes(1024 * 1024 * 100);
|
|
vector<unsigned> counts(window_size_, 0);
|
|
|
|
for (unsigned i = 0; i < bs.size(); i++) {
|
|
optional<unsigned> b = h_.step(bs[i]);
|
|
if (b) {
|
|
counts[*b]++;
|
|
|
|
if (*b < min)
|
|
min = *b;
|
|
|
|
if (*b > max)
|
|
max = *b;
|
|
}
|
|
}
|
|
|
|
#if 1
|
|
for (unsigned i = 0; i < counts.size(); i++)
|
|
cerr << i << ": " << counts[i] << "\n";
|
|
|
|
cerr << "min: " << min << ", max: " << max << "\n";
|
|
#endif
|
|
}
|
|
|
|
//----------------------------------------------------------------
|