[thin_metadata_pack] FIx bug where pack would try and read too much.

This commit is contained in:
Joe Thornber 2020-07-09 13:52:53 +01:00
parent 23f3033f61
commit b7e02d0ae4
3 changed files with 17 additions and 12 deletions

7
Cargo.lock generated
View File

@ -21,6 +21,11 @@ dependencies = [
"winapi 0.3.8 (registry+https://github.com/rust-lang/crates.io-index)", "winapi 0.3.8 (registry+https://github.com/rust-lang/crates.io-index)",
] ]
[[package]]
name = "anyhow"
version = "1.0.31"
source = "registry+https://github.com/rust-lang/crates.io-index"
[[package]] [[package]]
name = "arrayvec" name = "arrayvec"
version = "0.4.12" version = "0.4.12"
@ -380,6 +385,7 @@ dependencies = [
name = "thinp" name = "thinp"
version = "0.1.0" version = "0.1.0"
dependencies = [ dependencies = [
"anyhow 1.0.31 (registry+https://github.com/rust-lang/crates.io-index)",
"byteorder 1.3.4 (registry+https://github.com/rust-lang/crates.io-index)", "byteorder 1.3.4 (registry+https://github.com/rust-lang/crates.io-index)",
"clap 2.33.1 (registry+https://github.com/rust-lang/crates.io-index)", "clap 2.33.1 (registry+https://github.com/rust-lang/crates.io-index)",
"crc32c 0.4.0 (registry+https://github.com/rust-lang/crates.io-index)", "crc32c 0.4.0 (registry+https://github.com/rust-lang/crates.io-index)",
@ -456,6 +462,7 @@ source = "registry+https://github.com/rust-lang/crates.io-index"
"checksum adler32 1.0.4 (registry+https://github.com/rust-lang/crates.io-index)" = "5d2e7343e7fc9de883d1b0341e0b13970f764c14101234857d2ddafa1cb1cac2" "checksum adler32 1.0.4 (registry+https://github.com/rust-lang/crates.io-index)" = "5d2e7343e7fc9de883d1b0341e0b13970f764c14101234857d2ddafa1cb1cac2"
"checksum aho-corasick 0.7.10 (registry+https://github.com/rust-lang/crates.io-index)" = "8716408b8bc624ed7f65d223ddb9ac2d044c0547b6fa4b0d554f3a9540496ada" "checksum aho-corasick 0.7.10 (registry+https://github.com/rust-lang/crates.io-index)" = "8716408b8bc624ed7f65d223ddb9ac2d044c0547b6fa4b0d554f3a9540496ada"
"checksum ansi_term 0.11.0 (registry+https://github.com/rust-lang/crates.io-index)" = "ee49baf6cb617b853aa8d93bf420db2383fab46d314482ca2803b40d5fde979b" "checksum ansi_term 0.11.0 (registry+https://github.com/rust-lang/crates.io-index)" = "ee49baf6cb617b853aa8d93bf420db2383fab46d314482ca2803b40d5fde979b"
"checksum anyhow 1.0.31 (registry+https://github.com/rust-lang/crates.io-index)" = "85bb70cc08ec97ca5450e6eba421deeea5f172c0fc61f78b5357b2a8e8be195f"
"checksum arrayvec 0.4.12 (registry+https://github.com/rust-lang/crates.io-index)" = "cd9fd44efafa8690358b7408d253adf110036b88f55672a933f01d616ad9b1b9" "checksum arrayvec 0.4.12 (registry+https://github.com/rust-lang/crates.io-index)" = "cd9fd44efafa8690358b7408d253adf110036b88f55672a933f01d616ad9b1b9"
"checksum atty 0.2.14 (registry+https://github.com/rust-lang/crates.io-index)" = "d9b39be18770d11421cdb1b9947a45dd3f37e93092cbf377614828a319d5fee8" "checksum atty 0.2.14 (registry+https://github.com/rust-lang/crates.io-index)" = "d9b39be18770d11421cdb1b9947a45dd3f37e93092cbf377614828a319d5fee8"
"checksum autocfg 1.0.0 (registry+https://github.com/rust-lang/crates.io-index)" = "f8aac770f1885fd7e387acedd76065302551364496e46b3dd00860b2f8359b9d" "checksum autocfg 1.0.0 (registry+https://github.com/rust-lang/crates.io-index)" = "f8aac770f1885fd7e387acedd76065302551364496e46b3dd00860b2f8359b9d"

View File

@ -6,6 +6,7 @@ edition = "2018"
license = "GPL3" license = "GPL3"
[dependencies] [dependencies]
anyhow = "1.0"
byteorder = "1.3" byteorder = "1.3"
clap = "2.33" clap = "2.33"
crc32c = "0.4" crc32c = "0.4"

View File

@ -1,3 +1,4 @@
use anyhow::Result;
use byteorder::{LittleEndian, ReadBytesExt, WriteBytesExt}; use byteorder::{LittleEndian, ReadBytesExt, WriteBytesExt};
use flate2::{read::ZlibDecoder, write::ZlibEncoder, Compression}; use flate2::{read::ZlibDecoder, write::ZlibEncoder, Compression};
@ -34,11 +35,6 @@ fn shuffle<T>(v: &mut Vec<T>) {
v.shuffle(&mut rng); v.shuffle(&mut rng);
} }
// FIXME: move to a utils module
fn div_up(n: u64, d: u64) -> u64 {
(n + d - 1) / d
}
// Each thread processes multiple contiguous runs of blocks, called // Each thread processes multiple contiguous runs of blocks, called
// chunks. Chunks are shuffled so each thread gets chunks spread // chunks. Chunks are shuffled so each thread gets chunks spread
// across the dev in case there are large regions that don't contain // across the dev in case there are large regions that don't contain
@ -47,12 +43,17 @@ fn mk_chunk_vecs(nr_blocks: u64, nr_jobs: u64) -> Vec<Vec<(u64, u64)>> {
use std::cmp::{max, min}; use std::cmp::{max, min};
let chunk_size = min(4 * 1024u64, max(128u64, nr_blocks / (nr_jobs * 64))); let chunk_size = min(4 * 1024u64, max(128u64, nr_blocks / (nr_jobs * 64)));
let nr_chunks = div_up(nr_blocks, chunk_size); let nr_chunks = nr_blocks / chunk_size;
let mut chunks = Vec::with_capacity(nr_chunks as usize); let mut chunks = Vec::with_capacity(nr_chunks as usize);
for i in 0..nr_chunks { for i in 0..nr_chunks {
chunks.push((i * chunk_size, (i + 1) * chunk_size)); chunks.push((i * chunk_size, (i + 1) * chunk_size));
} }
// there may be a smaller chunk at the back of the file.
if nr_chunks * chunk_size < nr_blocks {
chunks.push((nr_chunks * chunk_size, nr_blocks));
}
shuffle(&mut chunks); shuffle(&mut chunks);
let mut vs = Vec::with_capacity(nr_jobs as usize); let mut vs = Vec::with_capacity(nr_jobs as usize);
@ -104,11 +105,7 @@ pub fn pack(input_file: &str, output_file: &str) -> Result<(), Box<dyn Error>> {
Ok(()) Ok(())
} }
fn crunch<R, W>( fn crunch<R, W>(input: Arc<Mutex<R>>, output: Arc<Mutex<W>>, ranges: Vec<(u64, u64)>) -> Result<()>
input: Arc<Mutex<R>>,
output: Arc<Mutex<W>>,
ranges: Vec<(u64, u64)>,
) -> io::Result<()>
where where
R: Read + Seek, R: Read + Seek,
W: Write, W: Write,
@ -256,7 +253,7 @@ fn pack_block<W: Write>(w: &mut W, kind: BT, buf: &[u8]) {
BT::NODE => check(&pack_btree_node(w, buf)), BT::NODE => check(&pack_btree_node(w, buf)),
BT::INDEX => check(&pack_index(w, buf)), BT::INDEX => check(&pack_index(w, buf)),
BT::BITMAP => check(&pack_bitmap(w, buf)), BT::BITMAP => check(&pack_bitmap(w, buf)),
BT::UNKNOWN => {panic!("asked to pack an unknown block type")} BT::UNKNOWN => panic!("asked to pack an unknown block type"),
} }
} }