[thin_metadata_pack/unpack] Replace C++ implementation with a Rust one.

The Rust implementation is multithreaded, performs better in general and
does custom compression of btree nodes to achieve much better compression
ratios.  unpack also checksums expanded metadata to validate it.

Format version has jumped to 3, no backwards compatibility, but I think
that's ok since we never made a release that contained the C++ version
of these tools.

Benchmarks
==========

On an 8 core, 16 hyperthread machine.

metadata 1G, full:

      Pack size    pack time     unpack time
------------------------------------------------------
C++      193M        50.3s          6.9s (no verify)
Rust      70M         1.4s          1.8s (verify)

metadata 16G, sparse:

       Pack size    pack time     unpack time
------------------------------------------------------
C++      21M          68s           1s   (no verify)
Rust      4M           8.6s         0.5s (verify)
This commit is contained in:
Joe Thornber 2020-06-09 09:15:00 +01:00
parent c48851e747
commit 61de3f9287
21 changed files with 1818 additions and 376 deletions

2
.gitignore vendored
View File

@ -14,6 +14,8 @@ test.data
cachegrind.* cachegrind.*
\#*\# \#*\#
core core
/target
**/*.rs.bk
googletest/ googletest/

487
Cargo.lock generated Normal file
View File

@ -0,0 +1,487 @@
# This file is automatically @generated by Cargo.
# It is not intended for manual editing.
[[package]]
name = "adler32"
version = "1.0.4"
source = "registry+https://github.com/rust-lang/crates.io-index"
[[package]]
name = "aho-corasick"
version = "0.7.10"
source = "registry+https://github.com/rust-lang/crates.io-index"
dependencies = [
"memchr 2.3.3 (registry+https://github.com/rust-lang/crates.io-index)",
]
[[package]]
name = "ansi_term"
version = "0.11.0"
source = "registry+https://github.com/rust-lang/crates.io-index"
dependencies = [
"winapi 0.3.8 (registry+https://github.com/rust-lang/crates.io-index)",
]
[[package]]
name = "arrayvec"
version = "0.4.12"
source = "registry+https://github.com/rust-lang/crates.io-index"
dependencies = [
"nodrop 0.1.14 (registry+https://github.com/rust-lang/crates.io-index)",
]
[[package]]
name = "atty"
version = "0.2.14"
source = "registry+https://github.com/rust-lang/crates.io-index"
dependencies = [
"hermit-abi 0.1.13 (registry+https://github.com/rust-lang/crates.io-index)",
"libc 0.2.71 (registry+https://github.com/rust-lang/crates.io-index)",
"winapi 0.3.8 (registry+https://github.com/rust-lang/crates.io-index)",
]
[[package]]
name = "autocfg"
version = "1.0.0"
source = "registry+https://github.com/rust-lang/crates.io-index"
[[package]]
name = "bitflags"
version = "1.2.1"
source = "registry+https://github.com/rust-lang/crates.io-index"
[[package]]
name = "byteorder"
version = "1.3.4"
source = "registry+https://github.com/rust-lang/crates.io-index"
[[package]]
name = "cfg-if"
version = "0.1.9"
source = "registry+https://github.com/rust-lang/crates.io-index"
[[package]]
name = "clap"
version = "2.33.1"
source = "registry+https://github.com/rust-lang/crates.io-index"
dependencies = [
"ansi_term 0.11.0 (registry+https://github.com/rust-lang/crates.io-index)",
"atty 0.2.14 (registry+https://github.com/rust-lang/crates.io-index)",
"bitflags 1.2.1 (registry+https://github.com/rust-lang/crates.io-index)",
"strsim 0.8.0 (registry+https://github.com/rust-lang/crates.io-index)",
"textwrap 0.11.0 (registry+https://github.com/rust-lang/crates.io-index)",
"unicode-width 0.1.7 (registry+https://github.com/rust-lang/crates.io-index)",
"vec_map 0.8.2 (registry+https://github.com/rust-lang/crates.io-index)",
]
[[package]]
name = "crc32c"
version = "0.4.0"
source = "registry+https://github.com/rust-lang/crates.io-index"
[[package]]
name = "crc32fast"
version = "1.2.0"
source = "registry+https://github.com/rust-lang/crates.io-index"
dependencies = [
"cfg-if 0.1.9 (registry+https://github.com/rust-lang/crates.io-index)",
]
[[package]]
name = "env_logger"
version = "0.7.1"
source = "registry+https://github.com/rust-lang/crates.io-index"
dependencies = [
"log 0.4.8 (registry+https://github.com/rust-lang/crates.io-index)",
"regex 1.3.9 (registry+https://github.com/rust-lang/crates.io-index)",
]
[[package]]
name = "flate2"
version = "1.0.14"
source = "registry+https://github.com/rust-lang/crates.io-index"
dependencies = [
"cfg-if 0.1.9 (registry+https://github.com/rust-lang/crates.io-index)",
"crc32fast 1.2.0 (registry+https://github.com/rust-lang/crates.io-index)",
"libc 0.2.71 (registry+https://github.com/rust-lang/crates.io-index)",
"miniz_oxide 0.3.6 (registry+https://github.com/rust-lang/crates.io-index)",
]
[[package]]
name = "getrandom"
version = "0.1.14"
source = "registry+https://github.com/rust-lang/crates.io-index"
dependencies = [
"cfg-if 0.1.9 (registry+https://github.com/rust-lang/crates.io-index)",
"libc 0.2.71 (registry+https://github.com/rust-lang/crates.io-index)",
"wasi 0.9.0+wasi-snapshot-preview1 (registry+https://github.com/rust-lang/crates.io-index)",
]
[[package]]
name = "hermit-abi"
version = "0.1.13"
source = "registry+https://github.com/rust-lang/crates.io-index"
dependencies = [
"libc 0.2.71 (registry+https://github.com/rust-lang/crates.io-index)",
]
[[package]]
name = "lazy_static"
version = "1.4.0"
source = "registry+https://github.com/rust-lang/crates.io-index"
[[package]]
name = "lexical-core"
version = "0.6.7"
source = "registry+https://github.com/rust-lang/crates.io-index"
dependencies = [
"arrayvec 0.4.12 (registry+https://github.com/rust-lang/crates.io-index)",
"bitflags 1.2.1 (registry+https://github.com/rust-lang/crates.io-index)",
"cfg-if 0.1.9 (registry+https://github.com/rust-lang/crates.io-index)",
"rustc_version 0.2.3 (registry+https://github.com/rust-lang/crates.io-index)",
"ryu 1.0.5 (registry+https://github.com/rust-lang/crates.io-index)",
"static_assertions 0.3.4 (registry+https://github.com/rust-lang/crates.io-index)",
]
[[package]]
name = "libc"
version = "0.2.71"
source = "registry+https://github.com/rust-lang/crates.io-index"
[[package]]
name = "log"
version = "0.4.8"
source = "registry+https://github.com/rust-lang/crates.io-index"
dependencies = [
"cfg-if 0.1.9 (registry+https://github.com/rust-lang/crates.io-index)",
]
[[package]]
name = "memchr"
version = "2.3.3"
source = "registry+https://github.com/rust-lang/crates.io-index"
[[package]]
name = "miniz_oxide"
version = "0.3.6"
source = "registry+https://github.com/rust-lang/crates.io-index"
dependencies = [
"adler32 1.0.4 (registry+https://github.com/rust-lang/crates.io-index)",
]
[[package]]
name = "nodrop"
version = "0.1.14"
source = "registry+https://github.com/rust-lang/crates.io-index"
[[package]]
name = "nom"
version = "5.1.1"
source = "registry+https://github.com/rust-lang/crates.io-index"
dependencies = [
"lexical-core 0.6.7 (registry+https://github.com/rust-lang/crates.io-index)",
"memchr 2.3.3 (registry+https://github.com/rust-lang/crates.io-index)",
"version_check 0.9.2 (registry+https://github.com/rust-lang/crates.io-index)",
]
[[package]]
name = "num-derive"
version = "0.3.0"
source = "registry+https://github.com/rust-lang/crates.io-index"
dependencies = [
"proc-macro2 1.0.18 (registry+https://github.com/rust-lang/crates.io-index)",
"quote 1.0.6 (registry+https://github.com/rust-lang/crates.io-index)",
"syn 1.0.30 (registry+https://github.com/rust-lang/crates.io-index)",
]
[[package]]
name = "num-traits"
version = "0.2.11"
source = "registry+https://github.com/rust-lang/crates.io-index"
dependencies = [
"autocfg 1.0.0 (registry+https://github.com/rust-lang/crates.io-index)",
]
[[package]]
name = "num_cpus"
version = "1.13.0"
source = "registry+https://github.com/rust-lang/crates.io-index"
dependencies = [
"hermit-abi 0.1.13 (registry+https://github.com/rust-lang/crates.io-index)",
"libc 0.2.71 (registry+https://github.com/rust-lang/crates.io-index)",
]
[[package]]
name = "ppv-lite86"
version = "0.2.8"
source = "registry+https://github.com/rust-lang/crates.io-index"
[[package]]
name = "proc-macro2"
version = "1.0.18"
source = "registry+https://github.com/rust-lang/crates.io-index"
dependencies = [
"unicode-xid 0.2.0 (registry+https://github.com/rust-lang/crates.io-index)",
]
[[package]]
name = "quickcheck"
version = "0.9.2"
source = "registry+https://github.com/rust-lang/crates.io-index"
dependencies = [
"env_logger 0.7.1 (registry+https://github.com/rust-lang/crates.io-index)",
"log 0.4.8 (registry+https://github.com/rust-lang/crates.io-index)",
"rand 0.7.3 (registry+https://github.com/rust-lang/crates.io-index)",
"rand_core 0.5.1 (registry+https://github.com/rust-lang/crates.io-index)",
]
[[package]]
name = "quickcheck_macros"
version = "0.9.1"
source = "registry+https://github.com/rust-lang/crates.io-index"
dependencies = [
"proc-macro2 1.0.18 (registry+https://github.com/rust-lang/crates.io-index)",
"quote 1.0.6 (registry+https://github.com/rust-lang/crates.io-index)",
"syn 1.0.30 (registry+https://github.com/rust-lang/crates.io-index)",
]
[[package]]
name = "quote"
version = "1.0.6"
source = "registry+https://github.com/rust-lang/crates.io-index"
dependencies = [
"proc-macro2 1.0.18 (registry+https://github.com/rust-lang/crates.io-index)",
]
[[package]]
name = "rand"
version = "0.7.3"
source = "registry+https://github.com/rust-lang/crates.io-index"
dependencies = [
"getrandom 0.1.14 (registry+https://github.com/rust-lang/crates.io-index)",
"libc 0.2.71 (registry+https://github.com/rust-lang/crates.io-index)",
"rand_chacha 0.2.2 (registry+https://github.com/rust-lang/crates.io-index)",
"rand_core 0.5.1 (registry+https://github.com/rust-lang/crates.io-index)",
"rand_hc 0.2.0 (registry+https://github.com/rust-lang/crates.io-index)",
]
[[package]]
name = "rand_chacha"
version = "0.2.2"
source = "registry+https://github.com/rust-lang/crates.io-index"
dependencies = [
"ppv-lite86 0.2.8 (registry+https://github.com/rust-lang/crates.io-index)",
"rand_core 0.5.1 (registry+https://github.com/rust-lang/crates.io-index)",
]
[[package]]
name = "rand_core"
version = "0.5.1"
source = "registry+https://github.com/rust-lang/crates.io-index"
dependencies = [
"getrandom 0.1.14 (registry+https://github.com/rust-lang/crates.io-index)",
]
[[package]]
name = "rand_hc"
version = "0.2.0"
source = "registry+https://github.com/rust-lang/crates.io-index"
dependencies = [
"rand_core 0.5.1 (registry+https://github.com/rust-lang/crates.io-index)",
]
[[package]]
name = "regex"
version = "1.3.9"
source = "registry+https://github.com/rust-lang/crates.io-index"
dependencies = [
"aho-corasick 0.7.10 (registry+https://github.com/rust-lang/crates.io-index)",
"memchr 2.3.3 (registry+https://github.com/rust-lang/crates.io-index)",
"regex-syntax 0.6.18 (registry+https://github.com/rust-lang/crates.io-index)",
"thread_local 1.0.1 (registry+https://github.com/rust-lang/crates.io-index)",
]
[[package]]
name = "regex-syntax"
version = "0.6.18"
source = "registry+https://github.com/rust-lang/crates.io-index"
[[package]]
name = "rustc_version"
version = "0.2.3"
source = "registry+https://github.com/rust-lang/crates.io-index"
dependencies = [
"semver 0.9.0 (registry+https://github.com/rust-lang/crates.io-index)",
]
[[package]]
name = "ryu"
version = "1.0.5"
source = "registry+https://github.com/rust-lang/crates.io-index"
[[package]]
name = "semver"
version = "0.9.0"
source = "registry+https://github.com/rust-lang/crates.io-index"
dependencies = [
"semver-parser 0.7.0 (registry+https://github.com/rust-lang/crates.io-index)",
]
[[package]]
name = "semver-parser"
version = "0.7.0"
source = "registry+https://github.com/rust-lang/crates.io-index"
[[package]]
name = "static_assertions"
version = "0.3.4"
source = "registry+https://github.com/rust-lang/crates.io-index"
[[package]]
name = "strsim"
version = "0.8.0"
source = "registry+https://github.com/rust-lang/crates.io-index"
[[package]]
name = "syn"
version = "1.0.30"
source = "registry+https://github.com/rust-lang/crates.io-index"
dependencies = [
"proc-macro2 1.0.18 (registry+https://github.com/rust-lang/crates.io-index)",
"quote 1.0.6 (registry+https://github.com/rust-lang/crates.io-index)",
"unicode-xid 0.2.0 (registry+https://github.com/rust-lang/crates.io-index)",
]
[[package]]
name = "textwrap"
version = "0.11.0"
source = "registry+https://github.com/rust-lang/crates.io-index"
dependencies = [
"unicode-width 0.1.7 (registry+https://github.com/rust-lang/crates.io-index)",
]
[[package]]
name = "thinp"
version = "0.1.0"
dependencies = [
"byteorder 1.3.4 (registry+https://github.com/rust-lang/crates.io-index)",
"clap 2.33.1 (registry+https://github.com/rust-lang/crates.io-index)",
"crc32c 0.4.0 (registry+https://github.com/rust-lang/crates.io-index)",
"flate2 1.0.14 (registry+https://github.com/rust-lang/crates.io-index)",
"libc 0.2.71 (registry+https://github.com/rust-lang/crates.io-index)",
"nom 5.1.1 (registry+https://github.com/rust-lang/crates.io-index)",
"num-derive 0.3.0 (registry+https://github.com/rust-lang/crates.io-index)",
"num-traits 0.2.11 (registry+https://github.com/rust-lang/crates.io-index)",
"num_cpus 1.13.0 (registry+https://github.com/rust-lang/crates.io-index)",
"quickcheck 0.9.2 (registry+https://github.com/rust-lang/crates.io-index)",
"quickcheck_macros 0.9.1 (registry+https://github.com/rust-lang/crates.io-index)",
"rand 0.7.3 (registry+https://github.com/rust-lang/crates.io-index)",
]
[[package]]
name = "thread_local"
version = "1.0.1"
source = "registry+https://github.com/rust-lang/crates.io-index"
dependencies = [
"lazy_static 1.4.0 (registry+https://github.com/rust-lang/crates.io-index)",
]
[[package]]
name = "unicode-width"
version = "0.1.7"
source = "registry+https://github.com/rust-lang/crates.io-index"
[[package]]
name = "unicode-xid"
version = "0.2.0"
source = "registry+https://github.com/rust-lang/crates.io-index"
[[package]]
name = "vec_map"
version = "0.8.2"
source = "registry+https://github.com/rust-lang/crates.io-index"
[[package]]
name = "version_check"
version = "0.9.2"
source = "registry+https://github.com/rust-lang/crates.io-index"
[[package]]
name = "wasi"
version = "0.9.0+wasi-snapshot-preview1"
source = "registry+https://github.com/rust-lang/crates.io-index"
[[package]]
name = "winapi"
version = "0.3.8"
source = "registry+https://github.com/rust-lang/crates.io-index"
dependencies = [
"winapi-i686-pc-windows-gnu 0.4.0 (registry+https://github.com/rust-lang/crates.io-index)",
"winapi-x86_64-pc-windows-gnu 0.4.0 (registry+https://github.com/rust-lang/crates.io-index)",
]
[[package]]
name = "winapi-i686-pc-windows-gnu"
version = "0.4.0"
source = "registry+https://github.com/rust-lang/crates.io-index"
[[package]]
name = "winapi-x86_64-pc-windows-gnu"
version = "0.4.0"
source = "registry+https://github.com/rust-lang/crates.io-index"
[metadata]
"checksum adler32 1.0.4 (registry+https://github.com/rust-lang/crates.io-index)" = "5d2e7343e7fc9de883d1b0341e0b13970f764c14101234857d2ddafa1cb1cac2"
"checksum aho-corasick 0.7.10 (registry+https://github.com/rust-lang/crates.io-index)" = "8716408b8bc624ed7f65d223ddb9ac2d044c0547b6fa4b0d554f3a9540496ada"
"checksum ansi_term 0.11.0 (registry+https://github.com/rust-lang/crates.io-index)" = "ee49baf6cb617b853aa8d93bf420db2383fab46d314482ca2803b40d5fde979b"
"checksum arrayvec 0.4.12 (registry+https://github.com/rust-lang/crates.io-index)" = "cd9fd44efafa8690358b7408d253adf110036b88f55672a933f01d616ad9b1b9"
"checksum atty 0.2.14 (registry+https://github.com/rust-lang/crates.io-index)" = "d9b39be18770d11421cdb1b9947a45dd3f37e93092cbf377614828a319d5fee8"
"checksum autocfg 1.0.0 (registry+https://github.com/rust-lang/crates.io-index)" = "f8aac770f1885fd7e387acedd76065302551364496e46b3dd00860b2f8359b9d"
"checksum bitflags 1.2.1 (registry+https://github.com/rust-lang/crates.io-index)" = "cf1de2fe8c75bc145a2f577add951f8134889b4795d47466a54a5c846d691693"
"checksum byteorder 1.3.4 (registry+https://github.com/rust-lang/crates.io-index)" = "08c48aae112d48ed9f069b33538ea9e3e90aa263cfa3d1c24309612b1f7472de"
"checksum cfg-if 0.1.9 (registry+https://github.com/rust-lang/crates.io-index)" = "b486ce3ccf7ffd79fdeb678eac06a9e6c09fc88d33836340becb8fffe87c5e33"
"checksum clap 2.33.1 (registry+https://github.com/rust-lang/crates.io-index)" = "bdfa80d47f954d53a35a64987ca1422f495b8d6483c0fe9f7117b36c2a792129"
"checksum crc32c 0.4.0 (registry+https://github.com/rust-lang/crates.io-index)" = "77ba37ef26c12988c1cee882d522d65e1d5d2ad8c3864665b88ee92767ed84c5"
"checksum crc32fast 1.2.0 (registry+https://github.com/rust-lang/crates.io-index)" = "ba125de2af0df55319f41944744ad91c71113bf74a4646efff39afe1f6842db1"
"checksum env_logger 0.7.1 (registry+https://github.com/rust-lang/crates.io-index)" = "44533bbbb3bb3c1fa17d9f2e4e38bbbaf8396ba82193c4cb1b6445d711445d36"
"checksum flate2 1.0.14 (registry+https://github.com/rust-lang/crates.io-index)" = "2cfff41391129e0a856d6d822600b8d71179d46879e310417eb9c762eb178b42"
"checksum getrandom 0.1.14 (registry+https://github.com/rust-lang/crates.io-index)" = "7abc8dd8451921606d809ba32e95b6111925cd2906060d2dcc29c070220503eb"
"checksum hermit-abi 0.1.13 (registry+https://github.com/rust-lang/crates.io-index)" = "91780f809e750b0a89f5544be56617ff6b1227ee485bcb06ebe10cdf89bd3b71"
"checksum lazy_static 1.4.0 (registry+https://github.com/rust-lang/crates.io-index)" = "e2abad23fbc42b3700f2f279844dc832adb2b2eb069b2df918f455c4e18cc646"
"checksum lexical-core 0.6.7 (registry+https://github.com/rust-lang/crates.io-index)" = "f86d66d380c9c5a685aaac7a11818bdfa1f733198dfd9ec09c70b762cd12ad6f"
"checksum libc 0.2.71 (registry+https://github.com/rust-lang/crates.io-index)" = "9457b06509d27052635f90d6466700c65095fdf75409b3fbdd903e988b886f49"
"checksum log 0.4.8 (registry+https://github.com/rust-lang/crates.io-index)" = "14b6052be84e6b71ab17edffc2eeabf5c2c3ae1fdb464aae35ac50c67a44e1f7"
"checksum memchr 2.3.3 (registry+https://github.com/rust-lang/crates.io-index)" = "3728d817d99e5ac407411fa471ff9800a778d88a24685968b36824eaf4bee400"
"checksum miniz_oxide 0.3.6 (registry+https://github.com/rust-lang/crates.io-index)" = "aa679ff6578b1cddee93d7e82e263b94a575e0bfced07284eb0c037c1d2416a5"
"checksum nodrop 0.1.14 (registry+https://github.com/rust-lang/crates.io-index)" = "72ef4a56884ca558e5ddb05a1d1e7e1bfd9a68d9ed024c21704cc98872dae1bb"
"checksum nom 5.1.1 (registry+https://github.com/rust-lang/crates.io-index)" = "0b471253da97532da4b61552249c521e01e736071f71c1a4f7ebbfbf0a06aad6"
"checksum num-derive 0.3.0 (registry+https://github.com/rust-lang/crates.io-index)" = "0c8b15b261814f992e33760b1fca9fe8b693d8a65299f20c9901688636cfb746"
"checksum num-traits 0.2.11 (registry+https://github.com/rust-lang/crates.io-index)" = "c62be47e61d1842b9170f0fdeec8eba98e60e90e5446449a0545e5152acd7096"
"checksum num_cpus 1.13.0 (registry+https://github.com/rust-lang/crates.io-index)" = "05499f3756671c15885fee9034446956fff3f243d6077b91e5767df161f766b3"
"checksum ppv-lite86 0.2.8 (registry+https://github.com/rust-lang/crates.io-index)" = "237a5ed80e274dbc66f86bd59c1e25edc039660be53194b5fe0a482e0f2612ea"
"checksum proc-macro2 1.0.18 (registry+https://github.com/rust-lang/crates.io-index)" = "beae6331a816b1f65d04c45b078fd8e6c93e8071771f41b8163255bbd8d7c8fa"
"checksum quickcheck 0.9.2 (registry+https://github.com/rust-lang/crates.io-index)" = "a44883e74aa97ad63db83c4bf8ca490f02b2fc02f92575e720c8551e843c945f"
"checksum quickcheck_macros 0.9.1 (registry+https://github.com/rust-lang/crates.io-index)" = "608c156fd8e97febc07dc9c2e2c80bf74cfc6ef26893eae3daf8bc2bc94a4b7f"
"checksum quote 1.0.6 (registry+https://github.com/rust-lang/crates.io-index)" = "54a21852a652ad6f610c9510194f398ff6f8692e334fd1145fed931f7fbe44ea"
"checksum rand 0.7.3 (registry+https://github.com/rust-lang/crates.io-index)" = "6a6b1679d49b24bbfe0c803429aa1874472f50d9b363131f0e89fc356b544d03"
"checksum rand_chacha 0.2.2 (registry+https://github.com/rust-lang/crates.io-index)" = "f4c8ed856279c9737206bf725bf36935d8666ead7aa69b52be55af369d193402"
"checksum rand_core 0.5.1 (registry+https://github.com/rust-lang/crates.io-index)" = "90bde5296fc891b0cef12a6d03ddccc162ce7b2aff54160af9338f8d40df6d19"
"checksum rand_hc 0.2.0 (registry+https://github.com/rust-lang/crates.io-index)" = "ca3129af7b92a17112d59ad498c6f81eaf463253766b90396d39ea7a39d6613c"
"checksum regex 1.3.9 (registry+https://github.com/rust-lang/crates.io-index)" = "9c3780fcf44b193bc4d09f36d2a3c87b251da4a046c87795a0d35f4f927ad8e6"
"checksum regex-syntax 0.6.18 (registry+https://github.com/rust-lang/crates.io-index)" = "26412eb97c6b088a6997e05f69403a802a92d520de2f8e63c2b65f9e0f47c4e8"
"checksum rustc_version 0.2.3 (registry+https://github.com/rust-lang/crates.io-index)" = "138e3e0acb6c9fb258b19b67cb8abd63c00679d2851805ea151465464fe9030a"
"checksum ryu 1.0.5 (registry+https://github.com/rust-lang/crates.io-index)" = "71d301d4193d031abdd79ff7e3dd721168a9572ef3fe51a1517aba235bd8f86e"
"checksum semver 0.9.0 (registry+https://github.com/rust-lang/crates.io-index)" = "1d7eb9ef2c18661902cc47e535f9bc51b78acd254da71d375c2f6720d9a40403"
"checksum semver-parser 0.7.0 (registry+https://github.com/rust-lang/crates.io-index)" = "388a1df253eca08550bef6c72392cfe7c30914bf41df5269b68cbd6ff8f570a3"
"checksum static_assertions 0.3.4 (registry+https://github.com/rust-lang/crates.io-index)" = "7f3eb36b47e512f8f1c9e3d10c2c1965bc992bd9cdb024fa581e2194501c83d3"
"checksum strsim 0.8.0 (registry+https://github.com/rust-lang/crates.io-index)" = "8ea5119cdb4c55b55d432abb513a0429384878c15dde60cc77b1c99de1a95a6a"
"checksum syn 1.0.30 (registry+https://github.com/rust-lang/crates.io-index)" = "93a56fabc59dce20fe48b6c832cc249c713e7ed88fa28b0ee0a3bfcaae5fe4e2"
"checksum textwrap 0.11.0 (registry+https://github.com/rust-lang/crates.io-index)" = "d326610f408c7a4eb6f51c37c330e496b08506c9457c9d34287ecc38809fb060"
"checksum thread_local 1.0.1 (registry+https://github.com/rust-lang/crates.io-index)" = "d40c6d1b69745a6ec6fb1ca717914848da4b44ae29d9b3080cbee91d72a69b14"
"checksum unicode-width 0.1.7 (registry+https://github.com/rust-lang/crates.io-index)" = "caaa9d531767d1ff2150b9332433f32a24622147e5ebb1f26409d5da67afd479"
"checksum unicode-xid 0.2.0 (registry+https://github.com/rust-lang/crates.io-index)" = "826e7639553986605ec5979c7dd957c7895e93eabed50ab2ffa7f6128a75097c"
"checksum vec_map 0.8.2 (registry+https://github.com/rust-lang/crates.io-index)" = "f1bddf1187be692e79c5ffeab891132dfb0f236ed36a43c7ed39f1165ee20191"
"checksum version_check 0.9.2 (registry+https://github.com/rust-lang/crates.io-index)" = "b5a972e5669d67ba988ce3dc826706fb0a8b01471c088cb0b6110b805cc36aed"
"checksum wasi 0.9.0+wasi-snapshot-preview1 (registry+https://github.com/rust-lang/crates.io-index)" = "cccddf32554fecc6acb585f82a32a72e28b48f8c4c1883ddfeeeaa96f7d8e519"
"checksum winapi 0.3.8 (registry+https://github.com/rust-lang/crates.io-index)" = "8093091eeb260906a183e6ae1abdba2ef5ef2257a21801128899c3fc699229c6"
"checksum winapi-i686-pc-windows-gnu 0.4.0 (registry+https://github.com/rust-lang/crates.io-index)" = "ac3b87c63620426dd9b991e5ce0329eff545bccbbb34f3be09ff6fb6ab51b7b6"
"checksum winapi-x86_64-pc-windows-gnu 0.4.0 (registry+https://github.com/rust-lang/crates.io-index)" = "712e227841d057c1ee1cd2fb22fa7e5a5461ae8e48fa2ca79ec42cfc1931183f"

22
Cargo.toml Normal file
View File

@ -0,0 +1,22 @@
[package]
name = "thinp"
version = "0.1.0"
authors = ["Joe Thornber <ejt@redhat.com>"]
edition = "2018"
license = "GPL3"
[dependencies]
byteorder = "1.3"
clap = "2.33"
crc32c = "0.4"
flate2 = "1.0"
libc = "0.2.71"
nom = "5.1"
num_cpus = "1.13"
rand = "0.7"
num-traits = "0.2"
num-derive = "0.3"
[dev-dependencies]
quickcheck = "0.9"
quickcheck_macros = "0.9"

View File

@ -114,7 +114,6 @@ SOURCE=\
thin-provisioning/thin_dump.cc \ thin-provisioning/thin_dump.cc \
thin-provisioning/thin_ls.cc \ thin-provisioning/thin_ls.cc \
thin-provisioning/thin_metadata_size.cc \ thin-provisioning/thin_metadata_size.cc \
thin-provisioning/thin_metadata_pack.cc \
thin-provisioning/thin_pool.cc \ thin-provisioning/thin_pool.cc \
thin-provisioning/thin_repair.cc \ thin-provisioning/thin_repair.cc \
thin-provisioning/thin_restore.cc \ thin-provisioning/thin_restore.cc \
@ -273,8 +272,6 @@ TOOLS:=\
thin_repair \ thin_repair \
thin_restore \ thin_restore \
thin_rmap \ thin_rmap \
thin_metadata_pack \
thin_metadata_unpack \
thin_metadata_size \ thin_metadata_size \
thin_trim \ thin_trim \
era_check \ era_check \
@ -301,8 +298,6 @@ install: bin/pdata_tools $(MANPAGES)
ln -s -f pdata_tools $(BINDIR)/thin_repair ln -s -f pdata_tools $(BINDIR)/thin_repair
ln -s -f pdata_tools $(BINDIR)/thin_restore ln -s -f pdata_tools $(BINDIR)/thin_restore
ln -s -f pdata_tools $(BINDIR)/thin_rmap ln -s -f pdata_tools $(BINDIR)/thin_rmap
ln -s -f pdata_tools $(BINDIR)/thin_metadata_pack
ln -s -f pdata_tools $(BINDIR)/thin_metadata_unpack
ln -s -f pdata_tools $(BINDIR)/thin_metadata_size ln -s -f pdata_tools $(BINDIR)/thin_metadata_size
ln -s -f pdata_tools $(BINDIR)/thin_trim ln -s -f pdata_tools $(BINDIR)/thin_trim
ln -s -f pdata_tools $(BINDIR)/era_check ln -s -f pdata_tools $(BINDIR)/era_check
@ -323,8 +318,7 @@ install: bin/pdata_tools $(MANPAGES)
$(INSTALL_DATA) man8/thin_repair.8 $(MANPATH)/man8 $(INSTALL_DATA) man8/thin_repair.8 $(MANPATH)/man8
$(INSTALL_DATA) man8/thin_restore.8 $(MANPATH)/man8 $(INSTALL_DATA) man8/thin_restore.8 $(MANPATH)/man8
$(INSTALL_DATA) man8/thin_rmap.8 $(MANPATH)/man8 $(INSTALL_DATA) man8/thin_rmap.8 $(MANPATH)/man8
$(INSTALL_DATA) man8/thin_metadata_pack.8 $(MANPATH)/man8
$(INSTALL_DATA) man8/thin_metadata_unpack.8 $(MANPATH)/man8
$(INSTALL_DATA) man8/thin_metadata_size.8 $(MANPATH)/man8 $(INSTALL_DATA) man8/thin_metadata_size.8 $(MANPATH)/man8
$(INSTALL_DATA) man8/era_check.8 $(MANPATH)/man8 $(INSTALL_DATA) man8/era_check.8 $(MANPATH)/man8
$(INSTALL_DATA) man8/era_dump.8 $(MANPATH)/man8 $(INSTALL_DATA) man8/era_dump.8 $(MANPATH)/man8
@ -339,9 +333,12 @@ ifeq ("@DEVTOOLS@", "yes")
ln -s -f pdata_tools $(BINDIR)/thin_scan ln -s -f pdata_tools $(BINDIR)/thin_scan
endif endif
# $(INSTALL_DATA) man8/era_restore.8 $(MANPATH)/man8 .PHONY: install install-rust-tools
.PHONY: install install-rust-tools:
cargo install --path . --root $(BINDIR)
$(INSTALL_DATA) man8/thin_metadata_pack.8 $(MANPATH)/man8
$(INSTALL_DATA) man8/thin_metadata_unpack.8 $(MANPATH)/man8
ifeq ("@TESTING@", "yes") ifeq ("@TESTING@", "yes")
include unit-tests/Makefile include unit-tests/Makefile

View File

@ -13,6 +13,12 @@ The [expat](http://expat.sourceforge.net/) xml parser library (version 1).
The libaio library (note this is not the same as the aio library that you get by linking -lrt) The libaio library (note this is not the same as the aio library that you get by linking -lrt)
make, autoconf etc. make, autoconf etc.
A couple of non-essential tools are written in rust, and will
require cargo and rustcc to be installed:
thin_metadata_pack
thin_metadata_unpack
There are more requirements for testing, detailed below. There are more requirements for testing, detailed below.
Building Building
@ -23,6 +29,11 @@ Building
make make
sudo make install sudo make install
Building Rust tools
===================
sudo make install-rust-tools
Quick examples Quick examples
============== ==============

View File

@ -1 +1 @@
pdata_tools ../target/release/thin_metadata_pack

View File

@ -1 +1 @@
pdata_tools ../target/release/thin_metadata_unpack

1
rustfmt.toml Normal file
View File

@ -0,0 +1 @@
reorder_imports = true

View File

@ -0,0 +1,32 @@
extern crate clap;
extern crate thinp;
use clap::{App, Arg};
use std::process;
fn main() {
let parser = App::new("thin_metadata_pack")
.version("0.8.5") // FIXME: use actual version
.about("Produces a compressed file of thin metadata. Only packs metadata blocks that are actually used.")
.arg(Arg::with_name("INPUT")
.help("Specify thinp metadata binary device/file")
.required(true)
.short("i")
.value_name("DEV")
.takes_value(true))
.arg(Arg::with_name("OUTPUT")
.help("Specify packed output file")
.required(true)
.short("o")
.value_name("FILE")
.takes_value(true));
let matches = parser.get_matches();
let input_file = matches.value_of("INPUT").unwrap();
let output_file = matches.value_of("OUTPUT").unwrap();
if let Err(reason) = thinp::pack::pack::pack(&input_file, &output_file) {
println!("Application error: {}\n", reason);
process::exit(1);
}
}

View File

@ -0,0 +1,33 @@
extern crate clap;
extern crate thinp;
use clap::{App, Arg};
use std::process;
fn main() {
let parser = App::new("thin_metadata_unpack")
.version("0.8.5") // FIXME: use actual version
.about("Unpack a compressed file of thin metadata.")
.arg(Arg::with_name("INPUT")
.help("Specify thinp metadata binary device/file")
.required(true)
.short("i")
.value_name("DEV")
.takes_value(true))
.arg(Arg::with_name("OUTPUT")
.help("Specify packed output file")
.required(true)
.short("o")
.value_name("FILE")
.takes_value(true));
let matches = parser.get_matches();
let input_file = matches.value_of("INPUT").unwrap();
let output_file = matches.value_of("OUTPUT").unwrap();
if let Err(reason) = thinp::pack::pack::unpack(&input_file, &output_file) {
println!("Application error: {}", reason);
process::exit(1);
}
}

51
src/block_manager.rs Normal file
View File

@ -0,0 +1,51 @@
use std::io;
use std::io::{Read, Seek};
use std::fs::OpenOptions;
use std::os::unix::fs::OpenOptionsExt;
use std::fs::File;
pub const BLOCK_SIZE: usize = 4096;
#[repr(align(4096))]
pub struct Block {
pub data: [u8; BLOCK_SIZE as usize],
}
pub struct BlockManager {
pub nr_blocks: u64,
input: File,
}
fn get_nr_blocks(path: &str) -> io::Result<u64> {
let metadata = std::fs::metadata(path)?;
Ok(metadata.len() / (BLOCK_SIZE as u64))
}
impl BlockManager {
pub fn new(path: &str, _cache_size: usize) -> io::Result<BlockManager> {
let input = OpenOptions::new()
.read(true)
.write(false)
.custom_flags(libc::O_DIRECT)
.open(path)?;
Ok(BlockManager {
nr_blocks: get_nr_blocks(path)?,
input: input,
})
}
pub fn get(&mut self, b: u64) -> io::Result<Block> {
self.read_block(b)
}
fn read_block(&mut self, b: u64) -> io::Result<Block>
{
let mut buf = Block {data: [0; BLOCK_SIZE]};
self.input.seek(io::SeekFrom::Start(b * (BLOCK_SIZE as u64)))?;
self.input.read_exact(&mut buf.data)?;
Ok(buf)
}
}

13
src/check.rs Normal file
View File

@ -0,0 +1,13 @@
use std::error::Error;
use crate::block_manager::BlockManager;
pub fn check(dev: &str) -> Result<(), Box<dyn Error>> {
let mut bm = BlockManager::new(dev, 1024)?;
for b in 0..100 {
let _block = bm.get(b)?;
}
Ok(())
}

16
src/lib.rs Normal file
View File

@ -0,0 +1,16 @@
extern crate byteorder;
extern crate crc32c;
extern crate flate2;
extern crate nom;
extern crate num_cpus;
#[cfg(test)]
extern crate quickcheck;
#[cfg(test)]
#[macro_use(quickcheck)]
#[cfg(test)]
extern crate quickcheck_macros;
pub mod block_manager;
pub mod check;
pub mod pack;

169
src/pack/delta_list.rs Normal file
View File

@ -0,0 +1,169 @@
//-------------------------------------------------
#[derive(PartialEq, Debug, Clone)]
pub enum Delta {
Base { n: u64 },
Const { count: u64 },
Pos { delta: u64, count: u64 },
Neg { delta: u64, count: u64 },
}
use Delta::*;
pub fn to_delta(ns: &[u64]) -> Vec<Delta> {
let mut ds = Vec::new();
if ns.len() > 0 {
let mut base = ns[0];
ds.push(Base { n: base });
let mut i = 1;
while i < ns.len() {
let n = ns[i];
if n > base {
let delta = n - base;
let mut count = 1;
while i < ns.len() && (ns[i] == (base + (count * delta))) {
i += 1;
count += 1;
}
count -= 1;
ds.push(Pos {
delta: delta,
count: count,
});
base += delta * count;
} else if n < base {
let delta = base - n;
let mut count = 1;
while i < ns.len() && (ns[i] + (count * delta) == base) {
i += 1;
count += 1;
}
count -= 1;
ds.push(Neg {
delta: delta,
count: count,
});
base -= delta * count;
} else {
let mut count = 1;
while i < ns.len() && ns[i] == base {
i += 1;
count += 1;
}
count -= 1;
ds.push(Const { count: count });
}
}
}
ds
}
#[cfg(test)]
mod tests {
use super::*;
fn from_delta(ds: &[Delta]) -> Vec<u64> {
let mut ns: Vec<u64> = Vec::new();
let mut base = 0u64;
for d in ds {
match d {
Base { n } => {
ns.push(*n);
base = *n;
}
Const { count } => {
for _ in 0..*count {
ns.push(base);
}
}
Pos { delta, count } => {
for _ in 0..*count {
base += delta;
ns.push(base);
}
}
Neg { delta, count } => {
for _ in 0..*count {
assert!(base >= *delta);
base -= delta;
ns.push(base);
}
}
}
}
ns
}
#[test]
fn test_to_delta() {
struct TestCase(Vec<u64>, Vec<Delta>);
let cases = [
TestCase(vec![], vec![]),
TestCase(vec![1], vec![Base { n: 1 }]),
TestCase(vec![1, 2], vec![Base { n: 1 }, Pos { delta: 1, count: 1 }]),
TestCase(
vec![1, 2, 3, 4],
vec![Base { n: 1 }, Pos { delta: 1, count: 3 }],
),
TestCase(
vec![2, 4, 6, 8],
vec![Base { n: 2 }, Pos { delta: 2, count: 3 }],
),
TestCase(
vec![7, 14, 21, 28],
vec![Base { n: 7 }, Pos { delta: 7, count: 3 }],
),
TestCase(
vec![10, 9],
vec![Base { n: 10 }, Neg { delta: 1, count: 1 }],
),
TestCase(
vec![10, 9, 8, 7],
vec![Base { n: 10 }, Neg { delta: 1, count: 3 }],
),
TestCase(
vec![10, 8, 6, 4],
vec![Base { n: 10 }, Neg { delta: 2, count: 3 }],
),
TestCase(
vec![28, 21, 14, 7],
vec![Base { n: 28 }, Neg { delta: 7, count: 3 }],
),
TestCase(
vec![42, 42, 42, 42],
vec![Base { n: 42 }, Const { count: 3 }],
),
TestCase(
vec![1, 2, 3, 10, 20, 30, 40, 38, 36, 34, 0, 0, 0, 0],
vec![
Base { n: 1 },
Pos { delta: 1, count: 2 },
Pos { delta: 7, count: 1 },
Pos {
delta: 10,
count: 3,
},
Neg { delta: 2, count: 3 },
Neg {
delta: 34,
count: 1,
},
Const { count: 3 },
],
),
];
for t in &cases {
assert_eq!(to_delta(&t.0), t.1);
assert_eq!(from_delta(&t.1), t.0);
}
}
}
//-------------------------------------------------

6
src/pack/mod.rs Normal file
View File

@ -0,0 +1,6 @@
pub mod pack;
mod node_encode;
mod delta_list;
mod vm;

127
src/pack/node_encode.rs Normal file
View File

@ -0,0 +1,127 @@
use std::{io, io::Write};
use nom::{bytes::complete::*, number::complete::*, IResult};
use crate::pack::vm::*;
//-------------------------------------------
#[derive(Debug)]
pub enum PackError {
ParseError,
IOError,
}
impl std::error::Error for PackError {}
pub type PResult<T> = Result<T, PackError>;
fn nom_to_pr<T>(r: IResult<&[u8], T>) -> PResult<(&[u8], T)> {
return match r {
Ok(v) => Ok(v),
Err(_) => Err(PackError::ParseError),
};
}
fn io_to_pr<T>(r: io::Result<T>) -> PResult<T> {
return match r {
Ok(v) => Ok(v),
Err(_) => Err(PackError::IOError),
};
}
//-------------------------------------------
impl std::fmt::Display for PackError {
fn fmt(&self, f: &mut std::fmt::Formatter) -> std::fmt::Result {
match self {
PackError::ParseError => write!(f, "parse error"),
PackError::IOError => write!(f, "IO error"),
}
}
}
fn run64(i: &[u8], count: usize) -> IResult<&[u8], Vec<u64>> {
let (i, ns) = nom::multi::many_m_n(count, count, le_u64)(i)?;
Ok((i, ns))
}
struct NodeSummary {
is_leaf: bool,
max_entries: usize,
value_size: usize
}
fn summarise_node(data: &[u8]) -> IResult<&[u8], NodeSummary> {
let (i, _csum) = le_u32(data)?;
let (i, flags) = le_u32(i)?;
let (i, _blocknr) = le_u64(i)?;
let (i, _nr_entries) = le_u32(i)?;
let (i, max_entries) = le_u32(i)?;
let (i, value_size) = le_u32(i)?;
let (i, _padding) = le_u32(i)?;
Ok((i, NodeSummary {
is_leaf: flags == 2,
max_entries: max_entries as usize,
value_size: value_size as usize,
}))
}
pub fn pack_btree_node<W: Write>(w: &mut W, data: &[u8]) -> PResult<()> {
let (_, info) = nom_to_pr(summarise_node(data))?;
if info.is_leaf {
if info.value_size == std::mem::size_of::<u64>() {
let (i, hdr) = nom_to_pr(take(32usize)(data))?;
let (i, keys) = nom_to_pr(run64(i, info.max_entries))?;
let (tail, values) = nom_to_pr(run64(i, info.max_entries))?;
io_to_pr(pack_literal(w, hdr))?;
io_to_pr(pack_u64s(w, &keys))?;
io_to_pr(pack_shifted_u64s(w, &values))?;
if tail.len() > 0 {
io_to_pr(pack_literal(w, tail))?;
}
return Ok(());
} else {
// We don't bother packing the values if they aren't u64
let (i, hdr) = nom_to_pr(take(32usize)(data))?;
let (tail, keys) = nom_to_pr(run64(i, info.max_entries))?;
io_to_pr(pack_literal(w, hdr))?;
io_to_pr(pack_u64s(w, &keys))?;
io_to_pr(pack_literal(w, tail))?;
return Ok(());
}
} else {
// Internal node, values are also u64s
let (i, hdr) = nom_to_pr(take(32usize)(data))?;
let (i, keys) = nom_to_pr(run64(i, info.max_entries))?;
let (tail, values) = nom_to_pr(run64(i, info.max_entries))?;
io_to_pr(pack_literal(w, hdr))?;
io_to_pr(pack_u64s(w, &keys))?;
io_to_pr(pack_u64s(w, &values))?;
if tail.len() > 0 {
io_to_pr(pack_literal(w, tail))?;
}
return Ok(());
}
}
pub fn pack_superblock<W: Write>(w: &mut W, bytes: &[u8]) -> PResult<()> {
io_to_pr(pack_literal(w, bytes))
}
pub fn pack_bitmap<W: Write>(w: &mut W, bytes: &[u8]) -> PResult<()> {
io_to_pr(pack_literal(w, bytes))
}
pub fn pack_index<W: Write>(w: &mut W, bytes: &[u8]) -> PResult<()> {
io_to_pr(pack_literal(w, bytes))
}
//-------------------------------------

350
src/pack/pack.rs Normal file
View File

@ -0,0 +1,350 @@
use byteorder::{LittleEndian, ReadBytesExt, WriteBytesExt};
use flate2::{read::ZlibDecoder, write::ZlibEncoder, Compression};
use std::os::unix::fs::OpenOptionsExt;
use std::{
error::Error,
fs::OpenOptions,
io,
io::prelude::*,
io::Cursor,
io::Write,
ops::DerefMut,
sync::{Arc, Mutex},
thread::spawn,
};
use rand::prelude::*;
use std::sync::mpsc::{sync_channel, Receiver};
use crate::pack::node_encode::*;
const BLOCK_SIZE: u64 = 4096;
const MAGIC: u64 = 0xa537a0aa6309ef77;
const PACK_VERSION: u64 = 3;
const SUPERBLOCK_CSUM_XOR: u32 = 160774;
const BITMAP_CSUM_XOR: u32 = 240779;
const INDEX_CSUM_XOR: u32 = 160478;
const BTREE_CSUM_XOR: u32 = 121107;
fn shuffle<T>(v: &mut Vec<T>) {
let mut rng = rand::thread_rng();
v.shuffle(&mut rng);
}
// FIXME: move to a utils module
fn div_up(n: u64, d: u64) -> u64 {
(n + d - 1) / d
}
// Each thread processes multiple contiguous runs of blocks, called
// chunks. Chunks are shuffled so each thread gets chunks spread
// across the dev in case there are large regions that don't contain
// metadata.
fn mk_chunk_vecs(nr_blocks: u64, nr_jobs: u64) -> Vec<Vec<(u64, u64)>> {
use std::cmp::{max, min};
let chunk_size = min(4 * 1024u64, max(128u64, nr_blocks / (nr_jobs * 64)));
let nr_chunks = div_up(nr_blocks, chunk_size);
let mut chunks = Vec::with_capacity(nr_chunks as usize);
for i in 0..nr_chunks {
chunks.push((i * chunk_size, (i + 1) * chunk_size));
}
shuffle(&mut chunks);
let mut vs = Vec::with_capacity(nr_jobs as usize);
for _ in 0..nr_jobs {
vs.push(Vec::new());
}
for c in 0..nr_chunks {
vs[(c % nr_jobs) as usize].push(chunks[c as usize]);
}
vs
}
pub fn pack(input_file: &str, output_file: &str) -> Result<(), Box<dyn Error>> {
let nr_blocks = get_nr_blocks(&input_file)?;
let nr_jobs = std::cmp::max(1, std::cmp::min(num_cpus::get() as u64, nr_blocks / 128));
let chunk_vecs = mk_chunk_vecs(nr_blocks, nr_jobs);
let input = OpenOptions::new()
.read(true)
.write(false)
.custom_flags(libc::O_EXCL)
.open(input_file)?;
let output = OpenOptions::new()
.read(false)
.write(true)
.create(true)
.truncate(true)
.open(output_file)?;
write_header(&output, nr_blocks)?;
let sync_input = Arc::new(Mutex::new(input));
let sync_output = Arc::new(Mutex::new(output));
let mut threads = Vec::new();
for job in 0..nr_jobs {
let sync_input = Arc::clone(&sync_input);
let sync_output = Arc::clone(&sync_output);
let chunks = chunk_vecs[job as usize].clone();
threads.push(spawn(move || crunch(sync_input, sync_output, chunks)));
}
for t in threads {
t.join().unwrap()?;
}
Ok(())
}
fn crunch<R, W>(
input: Arc<Mutex<R>>,
output: Arc<Mutex<W>>,
ranges: Vec<(u64, u64)>,
) -> io::Result<()>
where
R: Read + Seek,
W: Write,
{
let mut written = 0u64;
let mut z = ZlibEncoder::new(Vec::new(), Compression::default());
for (lo, hi) in ranges {
// We read multiple blocks at once to reduce contention
// on input.
let mut input = input.lock().unwrap();
let big_data = read_blocks(input.deref_mut(), lo, hi - lo)?;
drop(input);
for b in lo..hi {
let block_start = ((b - lo) * BLOCK_SIZE) as usize;
let data = &big_data[block_start..(block_start + BLOCK_SIZE as usize)];
let kind = metadata_block_type(data);
if kind != BT::UNKNOWN {
z.write_u64::<LittleEndian>(b)?;
pack_block(&mut z, kind, &data);
written += 1;
if written == 1024 {
let compressed = z.reset(Vec::new())?;
let mut output = output.lock().unwrap();
output.write_u64::<LittleEndian>(compressed.len() as u64)?;
output.write_all(&compressed)?;
written = 0;
}
}
}
}
if written > 0 {
let compressed = z.finish()?;
let mut output = output.lock().unwrap();
output.write_u64::<LittleEndian>(compressed.len() as u64)?;
output.write_all(&compressed)?;
}
Ok(())
}
fn write_header<W>(mut w: W, nr_blocks: u64) -> io::Result<()>
where
W: byteorder::WriteBytesExt,
{
w.write_u64::<LittleEndian>(MAGIC)?;
w.write_u64::<LittleEndian>(PACK_VERSION)?;
w.write_u64::<LittleEndian>(4096)?;
w.write_u64::<LittleEndian>(nr_blocks)?;
Ok(())
}
fn read_header<R>(mut r: R) -> io::Result<u64>
where
R: byteorder::ReadBytesExt,
{
let magic = r.read_u64::<LittleEndian>()?;
assert_eq!(magic, MAGIC);
let version = r.read_u64::<LittleEndian>()?;
assert_eq!(version, PACK_VERSION);
let block_size = r.read_u64::<LittleEndian>()?;
assert_eq!(block_size, 4096);
r.read_u64::<LittleEndian>()
}
fn get_nr_blocks(path: &str) -> io::Result<u64> {
let metadata = std::fs::metadata(path)?;
Ok(metadata.len() / (BLOCK_SIZE as u64))
}
fn read_blocks<R>(rdr: &mut R, b: u64, count: u64) -> io::Result<Vec<u8>>
where
R: io::Read + io::Seek,
{
let mut buf: Vec<u8> = vec![0; (BLOCK_SIZE * count) as usize];
rdr.seek(io::SeekFrom::Start(b * BLOCK_SIZE))?;
rdr.read_exact(&mut buf)?;
Ok(buf)
}
fn checksum(buf: &[u8]) -> u32 {
crc32c::crc32c(&buf[4..]) ^ 0xffffffff
}
#[derive(PartialEq)]
enum BT {
SUPERBLOCK,
BTREE,
INDEX,
BITMAP,
UNKNOWN,
}
fn metadata_block_type(buf: &[u8]) -> BT {
if buf.len() != BLOCK_SIZE as usize {
return BT::UNKNOWN;
}
// The checksum is always stored in the first u32 of the buffer.
let mut rdr = Cursor::new(buf);
let sum_on_disk = rdr.read_u32::<LittleEndian>().unwrap();
let csum = checksum(buf);
let btype = csum ^ sum_on_disk;
match btype {
SUPERBLOCK_CSUM_XOR => return BT::SUPERBLOCK,
BTREE_CSUM_XOR => return BT::BTREE,
BITMAP_CSUM_XOR => return BT::BITMAP,
INDEX_CSUM_XOR => return BT::INDEX,
_ => {
return BT::UNKNOWN;
}
}
}
fn check<T>(r: &PResult<T>) {
match r {
Ok(_) => {
return;
}
Err(PackError::ParseError) => panic!("parse error"),
Err(PackError::IOError) => panic!("io error"),
}
}
fn pack_block<W: Write>(w: &mut W, kind: BT, buf: &[u8]) {
match kind {
BT::SUPERBLOCK => check(&pack_superblock(w, buf)),
BT::BTREE => check(&pack_btree_node(w, buf)),
BT::INDEX => check(&pack_index(w, buf)),
BT::BITMAP => check(&pack_bitmap(w, buf)),
BT::UNKNOWN => {
assert!(false);
}
}
}
fn write_zero_block<W>(w: &mut W, b: u64) -> io::Result<()>
where
W: Write + Seek,
{
let zeroes: Vec<u8> = vec![0; BLOCK_SIZE as usize];
w.seek(io::SeekFrom::Start(b * BLOCK_SIZE))?;
w.write_all(&zeroes)?;
Ok(())
}
fn write_blocks<W>(w: &Arc<Mutex<W>>, blocks: &mut Vec<(u64, Vec<u8>)>) -> io::Result<()>
where
W: Write + Seek,
{
let mut w = w.lock().unwrap();
while let Some((b, block)) = blocks.pop() {
w.seek(io::SeekFrom::Start(b * BLOCK_SIZE))?;
w.write_all(&block[0..])?;
}
Ok(())
}
fn decode_worker<W>(rx: Receiver<Vec<u8>>, w: Arc<Mutex<W>>) -> io::Result<()>
where
W: Write + Seek,
{
let mut blocks = Vec::new();
while let Ok(bytes) = rx.recv() {
let mut z = ZlibDecoder::new(&bytes[0..]);
while let Ok(b) = z.read_u64::<LittleEndian>() {
let block = crate::pack::vm::unpack(&mut z, BLOCK_SIZE as usize).unwrap();
assert!(metadata_block_type(&block[0..]) != BT::UNKNOWN);
blocks.push((b, block));
if blocks.len() >= 32 {
write_blocks(&w, &mut blocks)?;
}
}
}
write_blocks(&w, &mut blocks)?;
Ok(())
}
pub fn unpack(input_file: &str, output_file: &str) -> Result<(), Box<dyn Error>> {
let mut input = OpenOptions::new()
.read(true)
.write(false)
.open(input_file)?;
let mut output = OpenOptions::new()
.read(false)
.write(true)
.create(true)
.truncate(true)
.open(output_file)?;
let nr_blocks = read_header(&input)?;
// zero the last block to size the file
write_zero_block(&mut output, nr_blocks - 1)?;
// Run until we hit the end
let output = Arc::new(Mutex::new(output));
// kick off the workers
let nr_jobs = num_cpus::get();
let mut senders = Vec::new();
let mut threads = Vec::new();
for _ in 0..nr_jobs {
let (tx, rx) = sync_channel(1);
let output = Arc::clone(&output);
senders.push(tx);
threads.push(spawn(move || decode_worker(rx, output)));
}
// Read z compressed chunk, and hand to worker thread.
let mut next_worker = 0;
while let Ok(len) = input.read_u64::<LittleEndian>() {
let mut bytes = vec![0; len as usize];
input.read_exact(&mut bytes)?;
senders[next_worker].send(bytes).unwrap();
next_worker = (next_worker + 1) % nr_jobs;
}
for s in senders {
drop(s);
}
for t in threads {
t.join().unwrap()?;
}
Ok(())
}

490
src/pack/vm.rs Normal file
View File

@ -0,0 +1,490 @@
use byteorder::{LittleEndian, ReadBytesExt, WriteBytesExt};
use std::io;
use std::io::{Cursor, Read, Write};
use num_derive::FromPrimitive;
use num_traits::FromPrimitive;
use crate::pack::delta_list::*;
//-------------------------------------------------
// Deltas are converted to instructions. A delta may not fit
// into a single instruction.
#[derive(Debug, FromPrimitive)]
enum Tag {
Set, // Operand width given in nibble
Pos, // Delta in nibble
PosW, // Delta in operand, whose width is in nibble
Neg, // Delta in nibble
NegW, // Delta in operand, whose width is in nibble
Const, // Count in nibble
Const8, // count = (nibble << 8) | byte
// Controls how many times the next instruction is applied.
// Not applicable to Const instructions which hold their own count.
Count, // count stored in nibble
Count8, // count = (nibble << 8) | byte
Lit, // len in nibble
LitW,
ShiftedRun,
}
fn pack_tag<W: Write>(w: &mut W, t: Tag, nibble: u8) -> io::Result<()> {
assert!(nibble < 16);
let mut b: u8 = t as u8;
assert!(b < 16);
b = (b << 4) | nibble;
w.write_u8(b)
}
fn pack_count<W>(w: &mut W, count: u64) -> io::Result<()>
where
W: Write,
{
if count == 1u64 {
return Ok(());
} else if count < 16 {
return pack_tag(w, Tag::Count, count as u8);
} else {
assert!(count < 4096);
let nibble = count >> 8;
assert!(nibble < 16);
let byte = count & 0xff;
pack_tag(w, Tag::Count8, nibble as u8)?;
return w.write_u8(byte as u8);
}
}
fn pack_delta<W: Write>(w: &mut W, d: &Delta) -> io::Result<()> {
use Tag::*;
match d {
Delta::Base { n } => {
if *n <= std::u8::MAX as u64 {
pack_tag(w, Set, 1)?;
return w.write_u8(*n as u8);
} else if *n <= std::u16::MAX as u64 {
pack_tag(w, Set, 2)?;
return w.write_u16::<LittleEndian>(*n as u16);
} else if *n <= u32::MAX as u64 {
pack_tag(w, Set, 4)?;
return w.write_u32::<LittleEndian>(*n as u32);
} else {
pack_tag(w, Set, 8)?;
return w.write_u64::<LittleEndian>(*n);
}
}
Delta::Pos { delta, count } => {
pack_count(w, *count)?;
if *delta < 16 {
return pack_tag(w, Tag::Pos, *delta as u8);
} else if *delta <= u8::MAX as u64 {
pack_tag(w, PosW, 1)?;
return w.write_u8(*delta as u8);
} else if *delta <= u16::MAX as u64 {
pack_tag(w, PosW, 2)?;
return w.write_u16::<LittleEndian>(*delta as u16);
} else if *delta <= u32::MAX as u64 {
pack_tag(w, PosW, 4)?;
return w.write_u32::<LittleEndian>(*delta as u32);
} else {
pack_tag(w, PosW, 8)?;
return w.write_u64::<LittleEndian>(*delta as u64);
}
}
Delta::Neg { delta, count } => {
pack_count(w, *count)?;
if *delta < 16 {
return pack_tag(w, Neg, *delta as u8);
} else if *delta <= u8::MAX as u64 {
pack_tag(w, NegW, 1)?;
return w.write_u8(*delta as u8);
} else if *delta <= u16::MAX as u64 {
pack_tag(w, NegW, 2)?;
return w.write_u16::<LittleEndian>(*delta as u16);
} else if *delta <= u32::MAX as u64 {
pack_tag(w, NegW, 4)?;
return w.write_u32::<LittleEndian>(*delta as u32);
} else {
pack_tag(w, NegW, 8)?;
return w.write_u64::<LittleEndian>(*delta as u64);
}
}
Delta::Const { count } => {
if *count < 16 {
return pack_tag(w, Tag::Const, *count as u8);
} else {
assert!(*count < 4096);
let nibble = *count >> 8;
assert!(nibble < 16);
pack_tag(w, Tag::Const8, nibble as u8)?;
return w.write_u8((*count & 0xff) as u8);
}
}
}
}
fn pack_deltas<W: Write>(w: &mut W, ds: &[Delta]) -> io::Result<()> {
for d in ds {
pack_delta(w, d)?;
}
Ok(())
}
//-------------------------------------------------
pub fn pack_u64s<W: Write>(w: &mut W, ns: &[u64]) -> io::Result<()> {
let ds = to_delta(ns);
pack_deltas(w, &ds[0..])
}
fn unshift_nrs(shift: usize, ns: &[u64]) -> (Vec<u64>, Vec<u64>) {
let mut values = Vec::new();
let mut shifts = Vec::new();
let mask = (1 << shift) - 1;
for n in ns {
values.push(n >> shift);
shifts.push(n & mask);
}
(values, shifts)
}
pub fn pack_shifted_u64s<W: Write>(w: &mut W, ns: &[u64]) -> io::Result<()> {
let len = ns.len();
let nibble = len >> 8;
assert!(nibble < 16);
pack_tag(w, Tag::ShiftedRun, nibble as u8)?;
w.write_u8((len & 0xff) as u8)?;
let (high, low) = unshift_nrs(24, ns);
pack_u64s(w, &high[0..])?;
pack_u64s(w, &low[0..])
}
pub fn pack_literal<W: Write>(w: &mut W, bs: &[u8]) -> io::Result<()> {
use Tag::LitW;
let len = bs.len() as u64;
if len < 16 as u64 {
pack_tag(w, Tag::Lit, len as u8)?;
} else if len <= u8::MAX as u64 {
pack_tag(w, LitW, 1)?;
w.write_u8(len as u8)?;
} else if len <= u16::MAX as u64 {
pack_tag(w, LitW, 2)?;
w.write_u16::<LittleEndian>(len as u16)?;
} else if len <= u32::MAX as u64 {
pack_tag(w, LitW, 4)?;
w.write_u32::<LittleEndian>(len as u32)?;
} else {
pack_tag(w, LitW, 8)?;
w.write_u64::<LittleEndian>(len as u64)?;
}
w.write_all(bs)
}
//-------------------------------------------------
fn unpack_with_width<R: Read>(r: &mut R, nibble: u8) -> io::Result<u64> {
let v = match nibble {
1 => r.read_u8()? as u64,
2 => r.read_u16::<LittleEndian>()? as u64,
4 => r.read_u32::<LittleEndian>()? as u64,
8 => r.read_u64::<LittleEndian>()? as u64,
_ => {
panic!("SET with bad width");
}
};
Ok(v)
}
fn unpack_u64s<R: Read>(r: &mut R, count: usize) -> io::Result<Vec<u64>> {
let mut v = Vec::new();
for _ in 0..count {
let n = r.read_u64::<LittleEndian>()?;
v.push(n);
}
Ok(v)
}
struct VM {
base: u64,
bytes_written: usize,
}
impl VM {
fn new() -> VM {
VM {
base: 0,
bytes_written: 0,
}
}
fn emit_u64<W: Write>(&mut self, w: &mut W, n: u64) -> io::Result<()> {
w.write_u64::<LittleEndian>(n)?;
self.bytes_written += 8;
Ok(())
}
fn emit_base<W: Write>(&mut self, w: &mut W) -> io::Result<()> {
self.emit_u64(w, self.base)
}
fn emit_bytes<W: Write>(&mut self, w: &mut W, bytes: &[u8]) -> io::Result<()> {
let len = bytes.len();
w.write_all(bytes)?;
self.bytes_written += len;
Ok(())
}
fn unpack_instr<R: Read, W: Write>(
&mut self,
r: &mut R,
w: &mut W,
count: usize,
) -> io::Result<()> {
use Tag::*;
let b = r.read_u8()?;
let kind: Tag = match Tag::from_u8(b >> 4) {
Some(k) => k,
None => {
panic!("bad tag");
}
};
let nibble = b & 0xf;
match kind {
Set => {
self.base = unpack_with_width(r, nibble)?;
for _ in 0..count {
self.emit_base(w)?;
}
}
Pos => {
for _ in 0..count {
self.base += nibble as u64;
self.emit_base(w)?;
}
}
PosW => {
let delta = unpack_with_width(r, nibble)?;
for _ in 0..count {
self.base += delta;
self.emit_base(w)?;
}
}
Neg => {
for _ in 0..count {
self.base -= nibble as u64;
self.emit_base(w)?;
}
}
NegW => {
let delta = unpack_with_width(r, nibble)?;
for _ in 0..count {
self.base -= delta;
self.emit_base(w)?;
}
}
Const => {
assert_eq!(count, 1);
for _ in 0..nibble as usize {
self.emit_base(w)?;
}
}
Const8 => {
assert_eq!(count, 1);
let count = ((nibble as usize) << 8) | (r.read_u8()? as usize);
for _ in 0..count {
self.emit_base(w)?;
}
}
Count => {
self.unpack_instr(r, w, nibble as usize)?;
}
Count8 => {
let count = ((nibble as usize) << 8) | (r.read_u8()? as usize);
self.unpack_instr(r, w, count as usize)?;
}
Lit => {
assert_eq!(count, 1);
let len = nibble as usize;
let mut bytes = vec![0; len];
r.read_exact(&mut bytes[0..])?;
self.emit_bytes(w, &bytes)?;
}
LitW => {
assert_eq!(count, 1);
let len = unpack_with_width(r, nibble)? as usize;
let mut bytes = vec![0; len];
r.read_exact(&mut bytes[0..])?;
self.emit_bytes(w, &bytes)?;
}
ShiftedRun => {
// FIXME: repeated unpack, pack, unpack
let len = ((nibble as usize) << 8) | (r.read_u8()? as usize);
let nr_bytes = (len as usize) * std::mem::size_of::<u64>() as usize;
let mut high_bytes: Vec<u8> = Vec::with_capacity(nr_bytes);
let written = self.exec(r, &mut high_bytes, nr_bytes)?;
self.bytes_written -= written; // hack
let mut high_r = Cursor::new(high_bytes);
let high = unpack_u64s(&mut high_r, len)?;
let mut low_bytes: Vec<u8> = Vec::with_capacity(nr_bytes);
let written = self.exec(r, &mut low_bytes, nr_bytes)?;
self.bytes_written -= written; // hack
let mut low_r = Cursor::new(low_bytes);
let low = unpack_u64s(&mut low_r, len)?;
let mask = (1 << 24) - 1;
for i in 0..len {
self.emit_u64(w, (high[i] << 24) | (low[i] & mask))?;
}
}
}
Ok(())
}
// Runs until at least a number of bytes have been emitted. Returns nr emitted.
fn exec<R: Read, W: Write>(
&mut self,
r: &mut R,
w: &mut W,
emit_bytes: usize,
) -> io::Result<usize> {
let begin = self.bytes_written;
while (self.bytes_written - begin) < emit_bytes {
self.unpack_instr(r, w, 1)?;
}
Ok(self.bytes_written - begin)
}
}
pub fn unpack<R: Read>(r: &mut R, count: usize) -> io::Result<Vec<u8>> {
let mut w = Vec::with_capacity(4096);
let mut cursor = Cursor::new(&mut w);
let mut vm = VM::new();
let written = vm.exec(r, &mut cursor, count)?;
assert_eq!(w.len(), count);
assert_eq!(written, count);
Ok(w)
}
//-------------------------------------------------
#[cfg(test)]
mod tests {
use super::*;
#[test]
fn test_pack_literals() {
struct TestCase(Vec<u8>);
let cases = [
// This is a bad test case, because unpack will not exec
// any instructions.
TestCase(b"".to_vec()),
TestCase(b"foo".to_vec()),
TestCase(vec![42; 15]),
TestCase(vec![42; 256]),
TestCase(vec![42; 4096]),
];
for t in &cases {
let mut bs = Vec::with_capacity(4096);
let mut w = Cursor::new(&mut bs);
pack_literal(&mut w, &t.0[0..]).unwrap();
let mut r = Cursor::new(&mut bs);
let unpacked = unpack(&mut r, t.0.len()).unwrap();
assert_eq!(&t.0[0..], &unpacked[0..]);
}
}
fn check_u64s_match(ns: &Vec<u64>, bytes: &[u8]) -> bool {
let mut packed = Vec::with_capacity(ns.len() * 8);
let mut w = Cursor::new(&mut packed);
for n in ns {
w.write_u64::<LittleEndian>(*n).unwrap();
}
packed == bytes
}
fn check_pack_u64s(ns: &Vec<u64>) -> bool {
println!("packing {:?}", &ns);
let mut bs = Vec::with_capacity(4096);
let mut w = Cursor::new(&mut bs);
pack_u64s(&mut w, &ns[0..]).unwrap();
println!("unpacked len = {}, packed len = {}", ns.len() * 8, bs.len());
let mut r = Cursor::new(&mut bs);
let unpacked = unpack(&mut r, ns.len() * 8).unwrap();
check_u64s_match(&ns, &unpacked[0..])
}
#[test]
fn test_pack_u64s() {
let cases = [
vec![0],
vec![1, 5, 9, 10],
b"the quick brown fox jumps over the lazy dog"
.iter()
.map(|b| *b as u64)
.collect(),
];
for t in &cases {
assert!(check_pack_u64s(&t));
}
}
#[quickcheck]
fn prop_pack_u64s(mut ns: Vec<u64>) -> bool {
ns.push(42); // We don't handle empty vecs
check_pack_u64s(&ns)
}
fn check_pack_shifted_u64s(ns: &Vec<(u64, u64)>) -> bool {
let shifted: Vec<u64> = ns
.iter()
.map(|(h, l)| (h << 24) | (l & ((1 << 24) - 1)))
.collect();
println!("packing {:?}", &ns);
let mut bs = Vec::with_capacity(4096);
let mut w = Cursor::new(&mut bs);
pack_shifted_u64s(&mut w, &shifted[0..]).unwrap();
println!("unpacked len = {}, packed len = {}", ns.len() * 8, bs.len());
let mut r = Cursor::new(&mut bs);
let unpacked = unpack(&mut r, ns.len() * 8).unwrap();
check_u64s_match(&shifted, &unpacked[0..])
}
#[quickcheck]
fn prop_pack_shifted_u64s(mut ns: Vec<(u64, u64)>) -> bool {
ns.push((42, 42));
check_pack_shifted_u64s(&ns)
}
}
//-------------------------------------------------

View File

@ -12,8 +12,6 @@ thin_provisioning::register_thin_commands(base::application &app)
app.add_cmd(command::ptr(new thin_delta_cmd())); app.add_cmd(command::ptr(new thin_delta_cmd()));
app.add_cmd(command::ptr(new thin_dump_cmd())); app.add_cmd(command::ptr(new thin_dump_cmd()));
app.add_cmd(command::ptr(new thin_ls_cmd())); app.add_cmd(command::ptr(new thin_ls_cmd()));
app.add_cmd(command::ptr(new thin_metadata_pack_cmd()));
app.add_cmd(command::ptr(new thin_metadata_unpack_cmd()));
app.add_cmd(command::ptr(new thin_metadata_size_cmd())); app.add_cmd(command::ptr(new thin_metadata_size_cmd()));
app.add_cmd(command::ptr(new thin_restore_cmd())); app.add_cmd(command::ptr(new thin_restore_cmd()));
app.add_cmd(command::ptr(new thin_repair_cmd())); app.add_cmd(command::ptr(new thin_repair_cmd()));

View File

@ -71,22 +71,6 @@ namespace thin_provisioning {
virtual int run(int argc, char **argv); virtual int run(int argc, char **argv);
}; };
class thin_metadata_pack_cmd : public base::command {
public:
thin_metadata_pack_cmd();
virtual void usage(std::ostream &out) const override;
virtual int run(int argc, char **argv) override;
};
class thin_metadata_unpack_cmd : public base::command {
public:
thin_metadata_unpack_cmd();
virtual void usage(std::ostream &out) const override;
virtual int run(int argc, char **argv) override;
};
#ifdef DEV_TOOLS #ifdef DEV_TOOLS
class thin_ll_dump_cmd : public base::command { class thin_ll_dump_cmd : public base::command {
public: public:

View File

@ -1,347 +0,0 @@
// This file is part of the thin-provisioning-tools source.
//
// thin-provisioning-tools is free software: you can redistribute it
// and/or modify it under the terms of the GNU General Public License
// as published by the Free Software Foundation, either version 3 of
// the License, or (at your option) any later version.
//
// thin-provisioning-tools is distributed in the hope that it will be
// useful, but WITHOUT ANY WARRANTY; without even the implied warranty
// of MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
// GNU General Public License for more details.
//
// You should have received a copy of the GNU General Public License along
// with thin-provisioning-tools. If not, see
// <http://www.gnu.org/licenses/>.
#include <boost/iostreams/filtering_streambuf.hpp>
#include <boost/iostreams/filter/zlib.hpp>
#include <boost/optional.hpp>
#include <fcntl.h>
#include <fstream>
#include <getopt.h>
#include <sys/types.h>
#include <unistd.h>
#include <vector>
#include "persistent-data/file_utils.h"
#include "persistent-data/space-maps/disk.h"
#include "persistent-data/checksum.h"
#include "thin-provisioning/commands.h"
#include "thin-provisioning/superblock.h"
#include "version.h"
using namespace thin_provisioning;
using namespace persistent_data;
using boost::optional;
//---------------------------------------------------------------------------
namespace {
using namespace std;
constexpr uint64_t MAGIC = 0xa537a0aa6309ef77;
constexpr uint64_t PACK_VERSION = 1;
constexpr uint64_t PREFETCH_COUNT = 1024;
uint32_t const SUPERBLOCK_CSUM_SEED = 160774;
uint32_t const BITMAP_CSUM_XOR = 240779;
uint32_t const INDEX_CSUM_XOR = 160478;
uint32_t const BTREE_CSUM_XOR = 121107;
// Pack file format
// ----------------
//
// file := <file-header> <entry>*
// file-header := MAGIC BLOCK_SIZE NR_BLOCKS NR_ENTRIES
// entry := BLOCK_NR BYTES class flags {
struct flags {
optional<string> input_file_;
optional<string> output_file_;
};
class is_metadata_functor {
public:
is_metadata_functor() {
}
bool operator() (void const *raw) const {
uint32_t const *cksum = reinterpret_cast<uint32_t const*>(raw);
base::crc32c sum(*cksum);
sum.append(cksum + 1, MD_BLOCK_SIZE - sizeof(uint32_t));
switch (sum.get_sum()) {
case SUPERBLOCK_CSUM_SEED:
case INDEX_CSUM_XOR:
case BITMAP_CSUM_XOR:
case BTREE_CSUM_XOR:
return true;
default:
return false;
}
}
};
void prealloc_file(string const &file, off_t len) {
int fd = ::open(file.c_str(), O_TRUNC | O_CREAT | O_RDWR, 0666);
if (fd < 0)
throw runtime_error("couldn't open output file");
if (::fallocate(fd, 0, 0, len))
throw runtime_error("couldn't fallocate");
::close(fd);
}
uint64_t read_u64(istream &in) {
base::le64 n;
in.read(reinterpret_cast<char *>(&n), sizeof(n));
if (!in)
throw runtime_error("couldn't read u64");
return base::to_cpu<uint64_t>(n);
}
void write_u64(ostream &out, uint64_t n) {
base::le64 n_le = base::to_disk<base::le64>(n);
out.write(reinterpret_cast<char *>(&n_le), sizeof(n_le));
if (!out)
throw runtime_error("couldn't write u64");
}
int pack(flags const &f) {
using namespace boost::iostreams;
std::ofstream out_file(*f.output_file_, ios_base::binary);
write_u64(out_file, MAGIC);
write_u64(out_file, PACK_VERSION);
boost::iostreams::filtering_ostreambuf out_buf;
out_buf.push(zlib_compressor());
out_buf.push(out_file);
std::ostream out(&out_buf);
block_manager::ptr bm = open_bm(*f.input_file_, block_manager::READ_ONLY, true);
uint64_t block_size = 4096;
auto nr_blocks = bm->get_nr_blocks();
cerr << "nr_blocks = " << nr_blocks << "\n";
write_u64(out, block_size);
write_u64(out, nr_blocks);
// prefetch
for (block_address b = 0; b < PREFETCH_COUNT; b++)
bm->prefetch(b);
is_metadata_functor is_metadata;
for (block_address b = 0; b < nr_blocks; b++) {
auto rr = bm->read_lock(b);
if (is_metadata(rr.data())) {
write_u64(out, b);
out.write(reinterpret_cast<const char *>(rr.data()), block_size);
}
auto prefetch_b = b + PREFETCH_COUNT;
if (prefetch_b < nr_blocks)
bm->prefetch(prefetch_b);
}
return 0;
}
int unpack(flags const &f)
{
using namespace boost::iostreams;
ifstream in_file(*f.input_file_, ios_base::binary);
if (!in_file)
throw runtime_error("Couldn't open pack file");
if (read_u64(in_file) != MAGIC)
throw runtime_error("Not a pack file.");
if (read_u64(in_file) != PACK_VERSION)
throw runtime_error("unknown pack file format version");
filtering_istreambuf in_buf;
in_buf.push(zlib_decompressor());
in_buf.push(in_file);
std::istream in(&in_buf);
auto block_size = read_u64(in);
auto nr_blocks = read_u64(in);
prealloc_file(*f.output_file_, nr_blocks * block_size);
block_manager bm(*f.output_file_, nr_blocks, 6, block_manager::READ_WRITE, true);
uint8_t bytes[block_size];
while (true) {
uint64_t block_nr;
try {
block_nr = read_u64(in);
} catch (...) {
break;
}
if (block_nr >= nr_blocks)
throw runtime_error("block nr out of bounds");
in.read(reinterpret_cast<char *>(bytes), block_size);
if (!in)
throw runtime_error("couldn't read data");
auto wr = bm.write_lock(block_nr);
memcpy(wr.data(), bytes, block_size);
}
return 0;
}
}
//---------------------------------------------------------------------------
thin_metadata_pack_cmd::thin_metadata_pack_cmd()
: command("thin_metadata_pack")
{
}
void
thin_metadata_pack_cmd::usage(ostream &out) const {
out << "Usage: " << get_name() << " [options]\n"
<< "Options:\n"
<< " {-i|--input} <input metadata (binary format)>\n"
<< " {-o|--output} <output packed metadata>\n"
<< " {-h|--help}\n"
<< " {-V|--version}" << endl;
}
int
thin_metadata_pack_cmd::run(int argc, char **argv)
{
const char shortopts[] = "hi:o:V";
const struct option longopts[] = {
{ "help", no_argument, NULL, 'h'},
{ "input", required_argument, NULL, 'i'},
{ "output", required_argument, NULL, 'o'},
{ "version", no_argument, NULL, 'V'},
{ NULL, no_argument, NULL, 0 }
};
flags f;
int c;
while ((c = getopt_long(argc, argv, shortopts, longopts, NULL)) != -1) {
switch(c) {
case 'h':
usage(cout);
return 0;
case 'i':
f.input_file_ = optarg;
break;
case 'o':
f.output_file_ = optarg;
break;
case 'V':
cout << THIN_PROVISIONING_TOOLS_VERSION << endl;
return 0;
default:
usage(cerr);
return 1;
}
}
if (!f.input_file_) {
cerr << "No input file provided." << endl;
usage(cerr);
return 1;
}
if (!f.output_file_) {
cerr << "No output file provided." << endl;
usage(cerr);
return 1;
}
return pack(f);
}
//---------------------------------------------------------------------------
thin_metadata_unpack_cmd::thin_metadata_unpack_cmd()
: command("thin_metadata_unpack")
{
}
void
thin_metadata_unpack_cmd::usage(ostream &out) const {
out << "Usage: " << get_name() << " [options]\n"
<< "Options:\n"
<< " {-i|--input} <input packed metadata>\n"
<< " {-o|--output} <output metadata (binary format)>\n"
<< " {-h|--help}\n"
<< " {-V|--version}" << endl;
}
int
thin_metadata_unpack_cmd::run(int argc, char **argv)
{
const char shortopts[] = "hi:o:V";
const struct option longopts[] = {
{ "help", no_argument, NULL, 'h'},
{ "input", required_argument, NULL, 'i'},
{ "output", required_argument, NULL, 'o'},
{ "version", no_argument, NULL, 'V'},
{ NULL, no_argument, NULL, 0 }
};
flags f;
int c;
while ((c = getopt_long(argc, argv, shortopts, longopts, NULL)) != -1) {
switch(c) {
case 'h':
usage(cout);
return 0;
case 'i':
f.input_file_ = optarg;
break;
case 'o':
f.output_file_ = optarg;
break;
case 'V':
cout << THIN_PROVISIONING_TOOLS_VERSION << endl;
return 0;
default:
usage(cerr);
return 1;
}
}
if (!f.input_file_) {
cerr << "No input file provided." << endl;
usage(cerr);
return 1;
}
if (!f.output_file_) {
cerr << "No output file provided." << endl;
usage(cerr);
return 1;
}
return unpack(f);
}
//---------------------------------------------------------------------------