From a29b5c8d07df1885694f899d0fc013fba6d03b19 Mon Sep 17 00:00:00 2001 From: Joe Thornber Date: Thu, 10 Oct 2013 10:18:46 +0100 Subject: [PATCH] [base] bse64 encoder Really slow implementation. Speed up on a rainy day. --- Makefile.in | 2 + base/base64.cc | 186 +++++++++++++++++++++++++++++++++++++++++ base/base64.h | 20 +++++ unit-tests/Makefile.in | 1 + unit-tests/base64_t.cc | 121 +++++++++++++++++++++++++++ 5 files changed, 330 insertions(+) create mode 100644 base/base64.cc create mode 100644 base/base64.h create mode 100644 unit-tests/base64_t.cc diff --git a/Makefile.in b/Makefile.in index ac3ce75..0835d87 100644 --- a/Makefile.in +++ b/Makefile.in @@ -35,6 +35,7 @@ PROGRAMS=\ all: $(PROGRAMS) SOURCE=\ + base/base64.cc \ base/error_state.cc \ \ caching/hint_array.cc \ @@ -228,6 +229,7 @@ thin_metadata_size: thin-provisioning/thin_metadata_size.o # Cache tools CACHE_CHECK_SOURCE=\ + base/base64.cc \ base/error_state.cc \ persistent-data/checksum.cc \ persistent-data/endian_utils.cc \ diff --git a/base/base64.cc b/base/base64.cc new file mode 100644 index 0000000..311d74f --- /dev/null +++ b/base/base64.cc @@ -0,0 +1,186 @@ +#include "base/base64.h" + +#include +#include +#include + +using namespace base; +using namespace boost; +using namespace std; + +//---------------------------------------------------------------- + +namespace { + char const *table_ = "ABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyz0123456789+/"; + + struct index_set { + unsigned nr_valid_; + unsigned index_[4]; + }; + + index_set split1(unsigned char c) { + index_set r; + + r.nr_valid_ = 2; + r.index_[0] = c >> 2; + r.index_[1] = (c & 3) << 4; + + return r; + } + + index_set split2(unsigned char c1, unsigned char c2) { + index_set r; + + r.nr_valid_ = 3; + r.index_[0] = c1 >> 2; + r.index_[1] = ((c1 & 3) << 4) | (c2 >> 4); + r.index_[2] = (c2 & 15) << 2; + + return r; + } + + index_set split3(unsigned char c1, unsigned char c2, unsigned c3) { + index_set r; + + r.nr_valid_ = 4; + r.index_[0] = c1 >> 2; + r.index_[1] = ((c1 & 3) << 4) | (c2 >> 4); + r.index_[2] = ((c2 & 15) << 2) | (c3 >> 6); + r.index_[3] = c3 & 63; + + return r; + } + + index_set split(vector const &raw, unsigned index) { + unsigned remaining = std::min(raw.size() - index, 3); + + switch (remaining) { + case 1: + return split1(raw.at(index)); + + case 2: + return split2(raw.at(index), raw.at(index + 1)); + + case 3: + return split3(raw.at(index), raw.at(index + 1), raw.at(index + 2)); + } + + throw std::runtime_error("internal error, in split"); + } + + optional char_to_index(char c) { + // FIXME: very slow + for (unsigned i = 0; i < 64; i++) + if (table_[i] == c) + return optional(i); + + return optional(); + } + + decoded_or_error success(vector const &decoded) { + return decoded_or_error(decoded); + } + + decoded_or_error fail(string msg) { + return decoded_or_error(msg); + } + + decoded_or_error fail_char(char c) { + ostringstream msg; + msg << "bad input character: '" << c << "'"; + return fail(msg.str()); + } + + decoded_or_error decode_quad(char c1, char c2, char c3, char c4) { + typedef optional oi; + unsigned char d1, d2, d3; + vector decoded; + + oi i1 = char_to_index(c1); + if (!i1) + return fail_char(c1); + + oi i2 = char_to_index(c2); + if (!i2) + return fail_char(c2); + + d1 = (*i1 << 2) | (*i2 >> 4); + decoded.push_back(d1); + + d2 = (*i2 & 15) << 4; + + if (c3 == '=') { + // FIXME: I really think the push should be here +// decoded.push_back(d2); + return success(decoded); + } + + oi i3 = char_to_index(c3); + if (!i3) + return fail_char(c3); + + d2 = d2 | (*i3 >> 2); + decoded.push_back(d2); + + d3 = (*i3 & 3) << 6; + + if (c4 == '=') { + // FIXME: I really think the push should be here +// decoded.push_back(d3); + return success(decoded); + } + + oi i4 = char_to_index(c4); + if (!i4) + return fail_char(c4); + + d3 = d3 | *i4; + decoded.push_back(d3); + + return success(decoded); + } +} + +//---------------------------------------------------------------- + +string +base::base64_encode(vector const &raw) +{ + string r; + + for (unsigned i = 0; i < raw.size(); i += 3) { + unsigned j; + index_set is = split(raw, i); + + for (j = 0; j < is.nr_valid_; j++) + r.push_back(table_[is.index_[j]]); + + for (; j < 4; j++) + r.push_back('='); + } + + return r; +} + +base::decoded_or_error +base::base64_decode(string const &encoded) +{ + if (encoded.length() % 4) + return decoded_or_error("bad input length"); + + vector decoded; + + for (unsigned i = 0; i < encoded.length(); i += 4) { + decoded_or_error doe = decode_quad(encoded[i], encoded[i + 1], encoded[i + 2], encoded[i + 3]); + + vector *v = get >(&doe); + if (!v) + return doe; + + decoded.insert(decoded.end(), v->begin(), v->end()); + } + + return decoded_or_error(decoded); +} + +//---------------------------------------------------------------- diff --git a/base/base64.h b/base/base64.h new file mode 100644 index 0000000..340ae6c --- /dev/null +++ b/base/base64.h @@ -0,0 +1,20 @@ +#ifndef BASE_BASE64_H +#define BASE_BASE64_H + +#include +#include +#include + +//---------------------------------------------------------------- + +namespace base { + std::string base64_encode(std::vector const &raw); + + // Returns either the decoded data or an error string + typedef boost::variant, std::string> decoded_or_error; + decoded_or_error base64_decode(std::string const &encoded); +} + +//---------------------------------------------------------------- + +#endif diff --git a/unit-tests/Makefile.in b/unit-tests/Makefile.in index ffc1afc..db790e6 100644 --- a/unit-tests/Makefile.in +++ b/unit-tests/Makefile.in @@ -47,6 +47,7 @@ TEST_SOURCE=\ \ unit-tests/array_block_t.cc \ unit-tests/array_t.cc \ + unit-tests/base64_t.cc \ unit-tests/bitset_t.cc \ unit-tests/block_t.cc \ unit-tests/btree_t.cc \ diff --git a/unit-tests/base64_t.cc b/unit-tests/base64_t.cc new file mode 100644 index 0000000..c98dbe4 --- /dev/null +++ b/unit-tests/base64_t.cc @@ -0,0 +1,121 @@ +#include "gmock/gmock.h" +#include "base/base64.h" + +#include +#include + +using namespace base; +using namespace boost; +using namespace std; +using namespace testing; + +//---------------------------------------------------------------- + +namespace { + typedef vector bytes; + + char const *wikipedia_examples[] = { + "any carnal pleasure.", "YW55IGNhcm5hbCBwbGVhc3VyZS4=", + "any carnal pleasure", "YW55IGNhcm5hbCBwbGVhc3VyZQ==", + "any carnal pleasur", "YW55IGNhcm5hbCBwbGVhc3Vy", + "any carnal pleasu", "YW55IGNhcm5hbCBwbGVhc3U=", + "any carnal pleas", "YW55IGNhcm5hbCBwbGVhcw==", + "pleasure.", "cGxlYXN1cmUu", + "leasure.", "bGVhc3VyZS4=", + "easure.", "ZWFzdXJlLg==", + "asure.", "YXN1cmUu", + "sure.", "c3VyZS4=" + }; + + void assert_fails(decoded_or_error const &eoe, string const &msg) { + ASSERT_THAT(get(eoe), Eq(msg)); + } +}; + +//---------------------------------------------------------------- + +TEST(Base64Tests, encoding_an_empty_string) +{ + bytes bs; + ASSERT_THAT(base64_encode(bs), Eq(string())); +} + +TEST(Base64Tests, decoding_an_empty_string) +{ + bytes bs; + ASSERT_THAT(get >(base64_decode("")), Eq(bs)); +} + +TEST(Base64Tests, encode_single_byte) +{ + bytes bs(1); + bs[0] = 0; + + ASSERT_THAT(base64_encode(bs), Eq(string("AA=="))); +} + +TEST(Base64Tests, encode_double_byte) +{ + bytes bs(2, 0); + ASSERT_THAT(base64_encode(bs), Eq(string("AAA="))); +} + +TEST(Base64Tests, encode_triple_byte) +{ + bytes bs(3, 0); + ASSERT_THAT(base64_encode(bs), Eq(string("AAAA"))); +} + +TEST(Base64Tests, longer_encodings) +{ + for (unsigned example = 0; example < 5; example++) { + char const *in = wikipedia_examples[example * 2]; + char const *out = wikipedia_examples[example * 2 + 1]; + unsigned len = strlen(in); + bytes bs(len); + for (unsigned b = 0; b < len; b++) + bs.at(b) = in[b]; + + ASSERT_THAT(base64_encode(bs), Eq(string(out))); + } +} + +TEST(Base64Tests, decoding_fails_with_bad_size_input) +{ + char const *err = "bad input length"; + + assert_fails(base64_decode("AAA"), err); + assert_fails(base64_decode("AA"), err); + assert_fails(base64_decode("A"), err); +} + +TEST(Base64Tests, encode_decode_cycle) +{ + for (unsigned example = 0; example < 5; example++) { + char const *in = wikipedia_examples[example * 2]; + unsigned len = strlen(in); + bytes bs(len); + for (unsigned b = 0; b < len; b++) + bs.at(b) = in[b]; + + decoded_or_error doe = base64_decode(base64_encode(bs)); + ASSERT_THAT(get >(doe), Eq(bs)); + } +} + +TEST(Base64Tests, random_data) +{ + for (unsigned len = 1; len < 17; len++) { + for (unsigned example = 0; example < 10000; example++) { + vector raw(len); + + for (unsigned i = 0; i < len; i++) + raw.at(i) = ::rand() % 256; + + decoded_or_error doe = base64_decode(base64_encode(raw)); + ASSERT_THAT(get >(doe), Eq(raw)); + } + } +} + +//----------------------------------------------------------------