From ec8f7b7fa8bb9d89f19374849638901b06f59f8d Mon Sep 17 00:00:00 2001 From: Joe Thornber Date: Sat, 8 Aug 2020 09:54:16 +0100 Subject: [PATCH] [thin_check (rust)] Keep track of data block ref counts as we walk the mapping tree. --- Cargo.lock | 16 ++--- src/pdata/space_map.rs | 132 ++++++++++++++++++++++++++++++++++++----- src/thin/check.rs | 86 +++++++++++++++++++++++++-- 3 files changed, 206 insertions(+), 28 deletions(-) diff --git a/Cargo.lock b/Cargo.lock index a2678fd..fdd3687 100644 --- a/Cargo.lock +++ b/Cargo.lock @@ -85,9 +85,9 @@ checksum = "4785bdd1c96b2a846b2bd7cc02e86b6b3dbf14e7e53446c4f54c92a361040822" [[package]] name = "clap" -version = "2.33.1" +version = "2.33.2" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "bdfa80d47f954d53a35a64987ca1422f495b8d6483c0fe9f7117b36c2a792129" +checksum = "10040cdf04294b565d9e0319955430099ec3813a64c952b86a41200ad714ae48" dependencies = [ "ansi_term", "atty", @@ -305,9 +305,9 @@ dependencies = [ [[package]] name = "libc" -version = "0.2.73" +version = "0.2.74" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "bd7d4bd64732af4bf3a67f367c27df8520ad7e230c5817b8ff485864d80242b9" +checksum = "a2f02823cf78b754822df5f7f268fb59822e7296276d3e069d8e8cb26a14bd10" [[package]] name = "log" @@ -359,9 +359,9 @@ dependencies = [ [[package]] name = "num-derive" -version = "0.3.0" +version = "0.3.1" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "0c8b15b261814f992e33760b1fca9fe8b693d8a65299f20c9901688636cfb746" +checksum = "e0396233fb2d5b0ae3f05ff6aba9a09185f7f6e70f87fb01147d545f85364665" dependencies = [ "proc-macro2", "quote", @@ -607,9 +607,9 @@ checksum = "8ea5119cdb4c55b55d432abb513a0429384878c15dde60cc77b1c99de1a95a6a" [[package]] name = "syn" -version = "1.0.36" +version = "1.0.38" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "4cdb98bcb1f9d81d07b536179c269ea15999b5d14ea958196413869445bb5250" +checksum = "e69abc24912995b3038597a7a593be5053eb0fb44f3cc5beec0deb421790c1f4" dependencies = [ "proc-macro2", "quote", diff --git a/src/pdata/space_map.rs b/src/pdata/space_map.rs index 7242cbe..a05fb2b 100644 --- a/src/pdata/space_map.rs +++ b/src/pdata/space_map.rs @@ -1,5 +1,7 @@ use anyhow::{anyhow, Result}; +use fixedbitset::FixedBitSet; use nom::{number::complete::*, IResult}; +use std::collections::HashMap; use crate::block_manager::*; use crate::pdata::btree::Unpack; @@ -16,9 +18,7 @@ pub struct SMRoot { pub fn unpack_root(data: &[u8]) -> Result { match SMRoot::unpack(data) { - Err(_e) => { - Err(anyhow!("couldn't parse SMRoot")) - }, + Err(_e) => Err(anyhow!("couldn't parse SMRoot")), Ok((_i, v)) => Ok(v), } } @@ -27,19 +27,22 @@ impl Unpack for SMRoot { fn disk_size() -> u32 { 32 } - + fn unpack(data: &[u8]) -> IResult<&[u8], SMRoot> { let (i, nr_blocks) = le_u64(data)?; let (i, nr_allocated) = le_u64(i)?; let (i, bitmap_root) = le_u64(i)?; let (i, ref_count_root) = le_u64(i)?; - Ok ((i, SMRoot { - nr_blocks, - nr_allocated, - bitmap_root, - ref_count_root, - })) + Ok(( + i, + SMRoot { + nr_blocks, + nr_allocated, + bitmap_root, + ref_count_root, + }, + )) } } @@ -62,7 +65,14 @@ impl Unpack for IndexEntry { let (i, nr_free) = le_u32(i)?; let (i, none_free_before) = le_u32(i)?; - Ok((i, IndexEntry {blocknr, nr_free, none_free_before})) + Ok(( + i, + IndexEntry { + blocknr, + nr_free, + none_free_before, + }, + )) } } @@ -85,11 +95,18 @@ impl Unpack for BitmapHeader { let (i, not_used) = le_u32(i)?; let (i, blocknr) = le_u64(i)?; - Ok((i, BitmapHeader {csum, not_used, blocknr} )) + Ok(( + i, + BitmapHeader { + csum, + not_used, + blocknr, + }, + )) } } -#[derive(Debug)] +#[derive(Clone, Debug, PartialEq)] pub enum BitmapEntry { Small(u8), Overflow, @@ -109,7 +126,7 @@ impl Unpack for Bitmap { fn unpack(data: &[u8]) -> IResult<&[u8], Self> { let (mut i, header) = BitmapHeader::unpack(data)?; - let mut entries = Vec::new(); + let mut entries = Vec::new(); let nr_words = (BLOCK_SIZE - BitmapHeader::disk_size() as usize) / 8; for _w in 0..nr_words { let (tmp, mut word) = le_u64(i)?; @@ -128,7 +145,92 @@ impl Unpack for Bitmap { i = tmp; } - Ok((i, Bitmap {header, entries})) + Ok((i, Bitmap { header, entries })) + } +} + +//------------------------------------------ + +pub struct CoreSpaceMap { + nr_entries: u64, + bits: FixedBitSet, + overflow: HashMap, +} + +impl CoreSpaceMap { + pub fn new(nr_entries: u64) -> CoreSpaceMap { + let bits = FixedBitSet::with_capacity(nr_entries as usize * 2); + CoreSpaceMap { + nr_entries, + bits, + overflow: HashMap::new(), + } + } + + fn get_bits(&self, b: u64) -> Result { + if b >= self.nr_entries { + return Err(anyhow!("space map index out of bounds")); + } + + let low_bit = self.bits.contains((b * 2) as usize); + let high_bit = self.bits.contains((b * 2 + 1) as usize); + let mut result = 0u32; + + if high_bit { + result += 2; + } + if low_bit { + result += 1; + } + + Ok(result) + } + + fn set_bits(&mut self, b: u64, count: u32) -> Result<()> { + if count > 3 { + return Err(anyhow!("internal error: bits entry should be <= 3")); + } + + self.bits.set((b * 2) as usize, (count & 0x1) != 0); + self.bits.set((b * 2 + 1) as usize, (count & 0x2) != 0); + Ok(()) + } + + pub fn get(&self, b: u64) -> Result { + let result = self.get_bits(b)?; + if result < 3 { + Ok(result) + } else { + match self.overflow.get(&b) { + None => Err(anyhow!( + "internal error: missing overflow entry in space map" + )), + Some(result) => Ok(*result), + } + } + } + + pub fn inc(&mut self, b: u64) -> Result<()> { + let old = self.get(b)?; + + if old < 3 { + // bump up the bits + self.set_bits(b, old + 1)?; + + if old == 2 { + // insert overflow entry + self.overflow.insert(b, 1); + } + } else if let Some(count) = self.overflow.get_mut(&b) { + // increment the overflow + *count += 1; + } else { + return Err(anyhow!( + "internal error: missing overflow entry in space map" + )); + } + + Ok(()) } } diff --git a/src/thin/check.rs b/src/thin/check.rs index 4330ecb..84e7a7f 100644 --- a/src/thin/check.rs +++ b/src/thin/check.rs @@ -1,7 +1,7 @@ use anyhow::{anyhow, Result}; use fixedbitset::FixedBitSet; use nom::{number::complete::*, IResult}; -use std::collections::HashMap; +use std::collections::{HashMap, BTreeMap}; use std::path::Path; use std::sync::{Arc, Mutex}; use std::time::Instant; @@ -66,10 +66,21 @@ impl Unpack for BlockTime { } } -struct BottomLevelVisitor {} +struct BottomLevelVisitor { + data_sm: Arc>, +} impl NodeVisitor for BottomLevelVisitor { - fn visit(&mut self, _w: &BTreeWalker, _b: &Block, _node: &Node) -> Result<()> { + fn visit(&mut self, _w: &BTreeWalker, _b: &Block, node: &Node) -> Result<()> { + // FIXME: do other checks + + if let Node::Leaf {header: _h, keys: _k, values} = node { + let mut data_sm = self.data_sm.lock().unwrap(); + for bt in values { + data_sm.inc(bt.block)?; + } + } + Ok(()) } } @@ -197,6 +208,46 @@ impl NodeVisitor for ValueCollector { //------------------------------------------ +struct RangeBuilder { + run: Option<(u64, BitmapEntry)>, + runs: Vec<(u64, BitmapEntry)> +} + +impl RangeBuilder { + fn new() -> RangeBuilder { + RangeBuilder { + run: None, + runs: Vec::new(), + } + } + + fn push(&mut self, e: &BitmapEntry) { + match &self.run { + Some((len, e2)) if *e == *e2 => { + self.run = Some((*len + 1, e2.clone())); + }, + Some((len, e2)) => { + self.runs.push((*len, e2.clone())); + self.run = Some((1, e.clone())); + }, + None => { + self.run = Some((1, e.clone())); + } + } + } + + fn complete(&mut self) { + match &self.run { + Some((len, e)) => { + self.runs.push((*len, e.clone())); + }, + None => {} + } + } +} + +//------------------------------------------ + pub fn check(dev: &Path) -> Result<()> { let engine = Arc::new(AsyncIoEngine::new(dev, 256)?); @@ -230,10 +281,14 @@ pub fn check(dev: &Path) -> Result<()> { engine.get_nr_blocks() as usize, ))); + let root = unpack::(&sb.data_sm_root[0..])?; + let data_sm = Arc::new(Mutex::new(CoreSpaceMap::new(root.nr_blocks))); + for (thin_id, root) in roots { let mut w = BTreeWalker::new_with_seen(engine.clone(), seen.clone(), false); + let data_sm = data_sm.clone(); pool.execute(move || { - let mut v = BottomLevelVisitor {}; + let mut v = BottomLevelVisitor {data_sm}; let result = w.walk(&mut v, root).expect("walk failed"); // FIXME: return error eprintln!("checked thin_dev {} -> {:?}", thin_id, result); }); @@ -266,6 +321,8 @@ pub fn check(dev: &Path) -> Result<()> { let _result = w.walk(&mut v, root.bitmap_root); eprintln!("{} index entries", v.entries.len()); + let mut builder = RangeBuilder::new(); + for i in v.entries { let mut b = Block::new(i.blocknr); engine.read(&mut b)?; @@ -274,7 +331,26 @@ pub fn check(dev: &Path) -> Result<()> { return Err(anyhow!("Index entry points to block ({}) that isn't a bitmap", b.loc)); } - let _bitmap = unpack::(b.get_data())?; + let bitmap = unpack::(b.get_data())?; + for e in bitmap.entries { + builder.push(&e); + } + } + builder.complete(); + eprintln!("{} ranges", builder.runs.len()); + + let mut counts = BTreeMap::new(); + for (len, _v) in builder.runs { + if let Some(c) = counts.get(&len) { + let new_c = *c + 1; + counts.insert(len, new_c); + } else { + counts.insert(len, 1); + } + } + + for (len, c) in counts { + eprintln!("{}: {}", len, c); } }