[base] bse64 encoder

Really slow implementation.  Speed up on a rainy day.
This commit is contained in:
Joe Thornber 2013-10-10 10:18:46 +01:00
parent 83f1e4bdd9
commit a29b5c8d07
5 changed files with 330 additions and 0 deletions

View File

@ -35,6 +35,7 @@ PROGRAMS=\
all: $(PROGRAMS)
SOURCE=\
base/base64.cc \
base/error_state.cc \
\
caching/hint_array.cc \
@ -228,6 +229,7 @@ thin_metadata_size: thin-provisioning/thin_metadata_size.o
# Cache tools
CACHE_CHECK_SOURCE=\
base/base64.cc \
base/error_state.cc \
persistent-data/checksum.cc \
persistent-data/endian_utils.cc \

186
base/base64.cc Normal file
View File

@ -0,0 +1,186 @@
#include "base/base64.h"
#include <boost/optional.hpp>
#include <sstream>
#include <stdexcept>
using namespace base;
using namespace boost;
using namespace std;
//----------------------------------------------------------------
namespace {
char const *table_ = "ABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyz0123456789+/";
struct index_set {
unsigned nr_valid_;
unsigned index_[4];
};
index_set split1(unsigned char c) {
index_set r;
r.nr_valid_ = 2;
r.index_[0] = c >> 2;
r.index_[1] = (c & 3) << 4;
return r;
}
index_set split2(unsigned char c1, unsigned char c2) {
index_set r;
r.nr_valid_ = 3;
r.index_[0] = c1 >> 2;
r.index_[1] = ((c1 & 3) << 4) | (c2 >> 4);
r.index_[2] = (c2 & 15) << 2;
return r;
}
index_set split3(unsigned char c1, unsigned char c2, unsigned c3) {
index_set r;
r.nr_valid_ = 4;
r.index_[0] = c1 >> 2;
r.index_[1] = ((c1 & 3) << 4) | (c2 >> 4);
r.index_[2] = ((c2 & 15) << 2) | (c3 >> 6);
r.index_[3] = c3 & 63;
return r;
}
index_set split(vector<unsigned char> const &raw, unsigned index) {
unsigned remaining = std::min<unsigned>(raw.size() - index, 3);
switch (remaining) {
case 1:
return split1(raw.at(index));
case 2:
return split2(raw.at(index), raw.at(index + 1));
case 3:
return split3(raw.at(index), raw.at(index + 1), raw.at(index + 2));
}
throw std::runtime_error("internal error, in split");
}
optional<unsigned> char_to_index(char c) {
// FIXME: very slow
for (unsigned i = 0; i < 64; i++)
if (table_[i] == c)
return optional<unsigned>(i);
return optional<unsigned>();
}
decoded_or_error success(vector<unsigned char> const &decoded) {
return decoded_or_error(decoded);
}
decoded_or_error fail(string msg) {
return decoded_or_error(msg);
}
decoded_or_error fail_char(char c) {
ostringstream msg;
msg << "bad input character: '" << c << "'";
return fail(msg.str());
}
decoded_or_error decode_quad(char c1, char c2, char c3, char c4) {
typedef optional<unsigned> oi;
unsigned char d1, d2, d3;
vector<unsigned char> decoded;
oi i1 = char_to_index(c1);
if (!i1)
return fail_char(c1);
oi i2 = char_to_index(c2);
if (!i2)
return fail_char(c2);
d1 = (*i1 << 2) | (*i2 >> 4);
decoded.push_back(d1);
d2 = (*i2 & 15) << 4;
if (c3 == '=') {
// FIXME: I really think the push should be here
// decoded.push_back(d2);
return success(decoded);
}
oi i3 = char_to_index(c3);
if (!i3)
return fail_char(c3);
d2 = d2 | (*i3 >> 2);
decoded.push_back(d2);
d3 = (*i3 & 3) << 6;
if (c4 == '=') {
// FIXME: I really think the push should be here
// decoded.push_back(d3);
return success(decoded);
}
oi i4 = char_to_index(c4);
if (!i4)
return fail_char(c4);
d3 = d3 | *i4;
decoded.push_back(d3);
return success(decoded);
}
}
//----------------------------------------------------------------
string
base::base64_encode(vector<unsigned char> const &raw)
{
string r;
for (unsigned i = 0; i < raw.size(); i += 3) {
unsigned j;
index_set is = split(raw, i);
for (j = 0; j < is.nr_valid_; j++)
r.push_back(table_[is.index_[j]]);
for (; j < 4; j++)
r.push_back('=');
}
return r;
}
base::decoded_or_error
base::base64_decode(string const &encoded)
{
if (encoded.length() % 4)
return decoded_or_error("bad input length");
vector<unsigned char> decoded;
for (unsigned i = 0; i < encoded.length(); i += 4) {
decoded_or_error doe = decode_quad(encoded[i], encoded[i + 1], encoded[i + 2], encoded[i + 3]);
vector<unsigned char> *v = get<vector<unsigned char> >(&doe);
if (!v)
return doe;
decoded.insert(decoded.end(), v->begin(), v->end());
}
return decoded_or_error(decoded);
}
//----------------------------------------------------------------

20
base/base64.h Normal file
View File

@ -0,0 +1,20 @@
#ifndef BASE_BASE64_H
#define BASE_BASE64_H
#include <boost/variant.hpp>
#include <string>
#include <vector>
//----------------------------------------------------------------
namespace base {
std::string base64_encode(std::vector<unsigned char> const &raw);
// Returns either the decoded data or an error string
typedef boost::variant<std::vector<unsigned char>, std::string> decoded_or_error;
decoded_or_error base64_decode(std::string const &encoded);
}
//----------------------------------------------------------------
#endif

View File

@ -47,6 +47,7 @@ TEST_SOURCE=\
\
unit-tests/array_block_t.cc \
unit-tests/array_t.cc \
unit-tests/base64_t.cc \
unit-tests/bitset_t.cc \
unit-tests/block_t.cc \
unit-tests/btree_t.cc \

121
unit-tests/base64_t.cc Normal file
View File

@ -0,0 +1,121 @@
#include "gmock/gmock.h"
#include "base/base64.h"
#include <stdexcept>
#include <stdlib.h>
using namespace base;
using namespace boost;
using namespace std;
using namespace testing;
//----------------------------------------------------------------
namespace {
typedef vector<unsigned char> bytes;
char const *wikipedia_examples[] = {
"any carnal pleasure.", "YW55IGNhcm5hbCBwbGVhc3VyZS4=",
"any carnal pleasure", "YW55IGNhcm5hbCBwbGVhc3VyZQ==",
"any carnal pleasur", "YW55IGNhcm5hbCBwbGVhc3Vy",
"any carnal pleasu", "YW55IGNhcm5hbCBwbGVhc3U=",
"any carnal pleas", "YW55IGNhcm5hbCBwbGVhcw==",
"pleasure.", "cGxlYXN1cmUu",
"leasure.", "bGVhc3VyZS4=",
"easure.", "ZWFzdXJlLg==",
"asure.", "YXN1cmUu",
"sure.", "c3VyZS4="
};
void assert_fails(decoded_or_error const &eoe, string const &msg) {
ASSERT_THAT(get<string>(eoe), Eq(msg));
}
};
//----------------------------------------------------------------
TEST(Base64Tests, encoding_an_empty_string)
{
bytes bs;
ASSERT_THAT(base64_encode(bs), Eq(string()));
}
TEST(Base64Tests, decoding_an_empty_string)
{
bytes bs;
ASSERT_THAT(get<vector<unsigned char> >(base64_decode("")), Eq(bs));
}
TEST(Base64Tests, encode_single_byte)
{
bytes bs(1);
bs[0] = 0;
ASSERT_THAT(base64_encode(bs), Eq(string("AA==")));
}
TEST(Base64Tests, encode_double_byte)
{
bytes bs(2, 0);
ASSERT_THAT(base64_encode(bs), Eq(string("AAA=")));
}
TEST(Base64Tests, encode_triple_byte)
{
bytes bs(3, 0);
ASSERT_THAT(base64_encode(bs), Eq(string("AAAA")));
}
TEST(Base64Tests, longer_encodings)
{
for (unsigned example = 0; example < 5; example++) {
char const *in = wikipedia_examples[example * 2];
char const *out = wikipedia_examples[example * 2 + 1];
unsigned len = strlen(in);
bytes bs(len);
for (unsigned b = 0; b < len; b++)
bs.at(b) = in[b];
ASSERT_THAT(base64_encode(bs), Eq(string(out)));
}
}
TEST(Base64Tests, decoding_fails_with_bad_size_input)
{
char const *err = "bad input length";
assert_fails(base64_decode("AAA"), err);
assert_fails(base64_decode("AA"), err);
assert_fails(base64_decode("A"), err);
}
TEST(Base64Tests, encode_decode_cycle)
{
for (unsigned example = 0; example < 5; example++) {
char const *in = wikipedia_examples[example * 2];
unsigned len = strlen(in);
bytes bs(len);
for (unsigned b = 0; b < len; b++)
bs.at(b) = in[b];
decoded_or_error doe = base64_decode(base64_encode(bs));
ASSERT_THAT(get<vector<unsigned char> >(doe), Eq(bs));
}
}
TEST(Base64Tests, random_data)
{
for (unsigned len = 1; len < 17; len++) {
for (unsigned example = 0; example < 10000; example++) {
vector<unsigned char> raw(len);
for (unsigned i = 0; i < len; i++)
raw.at(i) = ::rand() % 256;
decoded_or_error doe = base64_decode(base64_encode(raw));
ASSERT_THAT(get<vector<unsigned char> >(doe), Eq(raw));
}
}
}
//----------------------------------------------------------------