From 37ea0280dfb853917fc4d4b0c57262ea0722e029 Mon Sep 17 00:00:00 2001 From: Joe Thornber Date: Mon, 26 Oct 2020 12:05:27 +0000 Subject: [PATCH] [thin_restore] first pass at btree_builder. No tests yet --- src/bin/thin_explore.rs | 4 +- src/bin/thin_restore.rs | 75 ++++++++ src/pdata/btree.rs | 20 ++ src/pdata/btree_builder.rs | 364 +++++++++++++++++++++++++++++++++++++ src/pdata/mod.rs | 1 + src/pdata/space_map.rs | 59 +++++- src/thin/device_detail.rs | 2 +- src/thin/mod.rs | 1 + src/thin/restore.rs | 31 ++++ 9 files changed, 544 insertions(+), 13 deletions(-) create mode 100644 src/bin/thin_restore.rs create mode 100644 src/pdata/btree_builder.rs create mode 100644 src/thin/restore.rs diff --git a/src/bin/thin_explore.rs b/src/bin/thin_explore.rs index 8fd6ddb..e1ba44e 100644 --- a/src/bin/thin_explore.rs +++ b/src/bin/thin_explore.rs @@ -301,7 +301,7 @@ impl Adjacent for BlockTime { } impl Adjacent for DeviceDetail { - fn adjacent(&self, rhs: &Self) -> bool { + fn adjacent(&self, _rhs: &Self) -> bool { false } } @@ -561,7 +561,7 @@ impl Panel for DeviceDetailPanel { btree::Node::Internal { values, .. } => { Some(PushDeviceDetail(values[self.state.selected().unwrap()])) } - btree::Node::Leaf { values, keys, .. } => None, + btree::Node::Leaf { .. } => None, }, Key::Char('h') | Key::Left => Some(PopPanel), _ => None, diff --git a/src/bin/thin_restore.rs b/src/bin/thin_restore.rs new file mode 100644 index 0000000..b774480 --- /dev/null +++ b/src/bin/thin_restore.rs @@ -0,0 +1,75 @@ +extern crate clap; +extern crate thinp; + +use atty::Stream; +use clap::{App, Arg}; +use std::path::Path; +use std::process; +use std::process::exit; +use std::sync::Arc; +use thinp::file_utils; +use thinp::report::*; +use thinp::thin::restore::{restore, ThinRestoreOptions}; + +fn main() { + let parser = App::new("thin_restore") + .version(thinp::version::TOOLS_VERSION) + .about("Convert XML format metadata to binary.") + .arg( + Arg::with_name("OVERRIDE_MAPPING_ROOT") + .help("Specify a mapping root to use") + .long("override-mapping-root") + .value_name("OVERRIDE_MAPPING_ROOT") + .takes_value(true), + ) + .arg( + Arg::with_name("INPUT") + .help("Specify the input xml") + .short("i") + .long("input") + .required(true), + ) + .arg( + Arg::with_name("OUTPUT") + .help("Specify the output device to check") + .short("o") + .long("output") + .required(true), + ) + .arg( + Arg::with_name("SYNC_IO") + .help("Force use of synchronous io") + .long("sync-io"), + ); + + let matches = parser.get_matches(); + let input_file = Path::new(matches.value_of("INPUT").unwrap()); + let output_file = Path::new(matches.value_of("OUTPUT").unwrap()); + + if !file_utils::file_exists(input_file) { + eprintln!("Couldn't find input file '{:?}'.", &input_file); + exit(1); + } + + let report; + + if matches.is_present("QUIET") { + report = std::sync::Arc::new(mk_quiet_report()); + } else if atty::is(Stream::Stdout) { + report = std::sync::Arc::new(mk_progress_bar_report()); + } else { + report = Arc::new(mk_simple_report()); + } + + let opts = ThinRestoreOptions { + input: &input_file, + output: &output_file, + async_io: !matches.is_present("SYNC_IO"), + report, + }; + + if let Err(reason) = restore(opts) { + println!("{}", reason); + process::exit(1); + } +} diff --git a/src/pdata/btree.rs b/src/pdata/btree.rs index f1cfb17..bee9f91 100644 --- a/src/pdata/btree.rs +++ b/src/pdata/btree.rs @@ -461,6 +461,26 @@ impl Node { Leaf { header, .. } => header, } } + + fn get_mut_header(&mut self) -> &mut NodeHeader { + use Node::*; + match self { + Internal { header, .. } => header, + Leaf { header, .. } => header, + } + } + + pub fn get_keys(&self) -> &[u64] { + use Node::*; + match self { + Internal { keys, .. } => &keys[0..], + Leaf { keys, .. } => &keys[0..], + } + } + + pub fn set_block(&mut self, b: u64) { + self.get_mut_header().block = b; + } } pub fn convert_result<'a, V>(path: &Vec, r: IResult<&'a [u8], V>) -> Result<(&'a [u8], V)> { diff --git a/src/pdata/btree_builder.rs b/src/pdata/btree_builder.rs new file mode 100644 index 0000000..389b685 --- /dev/null +++ b/src/pdata/btree_builder.rs @@ -0,0 +1,364 @@ +use anyhow::{anyhow, Result}; +use byteorder::{LittleEndian, WriteBytesExt}; +use std::collections::VecDeque; +use std::io::Cursor; +use std::sync::{Arc, Mutex}; + +use crate::checksum; +use crate::io_engine::*; +use crate::pdata::btree::*; +use crate::pdata::space_map::*; +use crate::pdata::unpack::*; + +//------------------------------------------ + +fn pack_node(node: &Node, w: &mut W) -> Result<()> { + match node { + Node::Internal { + header, + keys, + values, + } => { + header.pack(w)?; + for k in keys { + w.write_u64::(*k)?; + } + + // pad with zeroes + for _i in keys.len()..header.max_entries as usize { + w.write_u64::(0)?; + } + + for v in values { + v.pack(w)?; + } + } + Node::Leaf { + header, + keys, + values, + } => { + header.pack(w)?; + for k in keys { + w.write_u64::(*k)?; + } + + // pad with zeroes + for _i in keys.len()..header.max_entries as usize { + w.write_u64::(0)?; + } + + for v in values { + v.pack(w)?; + } + } + } + + Ok(()) +} + +//------------------------------------------ + +fn calc_max_entries() -> usize { + let elt_size = 8 + V::disk_size() as usize; + ((BLOCK_SIZE - NodeHeader::disk_size() as usize) / elt_size) as usize +} + +//------------------------------------------ + +struct Entries { + max_entries: usize, + entries: VecDeque<(u64, V)>, +} + +enum Action { + Noop, + WriteSingle { + keys: Vec, + values: Vec, + }, + WritePair { + keys1: Vec, + values1: Vec, + keys2: Vec, + values2: Vec, + }, +} + +impl Entries { + pub fn new(max_entries: usize) -> Entries { + Entries { + max_entries, + entries: VecDeque::new(), + } + } + + pub fn add_entry(&mut self, k: u64, v: V) -> Action { + let result = if self.full() { + let (keys, values) = self.pop(self.max_entries); + Action::WriteSingle { keys, values } + } else { + Action::Noop + }; + + self.entries.push_back((k, v)); + + result + } + + pub fn complete(&mut self) -> Action { + let n = self.entries.len(); + + if n >= self.max_entries { + let n1 = n / 2; + let n2 = n - n1; + let (keys1, values1) = self.pop(n1); + let (keys2, values2) = self.pop(n2); + + Action::WritePair { + keys1, + values1, + keys2, + values2, + } + } else if n > 0 { + let (keys, values) = self.pop(n); + Action::WriteSingle { keys, values } + } else { + Action::Noop + } + } + + fn full(&self) -> bool { + self.entries.len() >= 2 * self.max_entries + } + + fn pop(&mut self, count: usize) -> (Vec, Vec) { + let mut keys = Vec::new(); + let mut values = Vec::new(); + + for _i in 0..count { + let (k, v) = self.entries.pop_front().unwrap(); + keys.push(k); + values.push(v); + } + + (keys, values) + } +} + +//------------------------------------------ + +struct WriteBatcher { + engine: Arc>, + sm: Arc>, + + batch_size: usize, + queue: Vec, +} + +impl WriteBatcher { + fn new( + engine: Arc>, + sm: Arc>, + batch_size: usize, + ) -> WriteBatcher { + WriteBatcher { + engine, + sm, + batch_size, + queue: Vec::with_capacity(batch_size), + } + } + + fn alloc(&mut self) -> Result { + let mut sm = self.sm.lock().unwrap(); + let b = sm.alloc()?; + + if b.is_none() { + return Err(anyhow!("out of metadata space")); + } + + Ok(b.unwrap()) + } + + fn write(&mut self, b: Block) -> Result<()> { + checksum::write_checksum(&mut b.get_data(), checksum::BT::NODE)?; + + if self.queue.len() == self.batch_size { + self.flush()?; + } + + self.queue.push(b); + Ok(()) + } + + fn flush(&mut self) -> Result<()> { + self.engine.write_many(&self.queue)?; + self.queue.clear(); + Ok(()) + } +} + +//------------------------------------------ + +fn write_node_(w: &mut WriteBatcher, mut node: Node) -> Result<(u64, u64)> { + let keys = node.get_keys(); + let first_key = keys.first().unwrap_or(&0u64).clone(); + + let loc = w.alloc()?; + node.set_block(loc); + + let b = Block::new(loc); + let mut cursor = Cursor::new(b.get_data()); + pack_node(&node, &mut cursor)?; + w.write(b)?; + + Ok((first_key, loc)) +} + +fn write_leaf( + w: &mut WriteBatcher, + keys: Vec, + values: Vec, +) -> Result<(u64, u64)> { + let header = NodeHeader { + block: 0, + is_leaf: true, + nr_entries: keys.len() as u32, + max_entries: calc_max_entries::() as u32, + value_size: V::disk_size(), + }; + + let node = Node::Leaf { + header, + keys, + values, + }; + + write_node_(w, node) +} + +fn write_internal(w: &mut WriteBatcher, keys: Vec, values: Vec) -> Result<(u64, u64)> { + let header = NodeHeader { + block: 0, + is_leaf: false, + nr_entries: keys.len() as u32, + max_entries: calc_max_entries::() as u32, + value_size: u64::disk_size(), + }; + + let node: Node = Node::Internal { + header, + keys, + values, + }; + + write_node_(w, node) +} + +pub struct Builder { + w: WriteBatcher, + entries: Entries, + + max_internal_entries: usize, + internal_entries: Vec>, + + root: u64, +} + +impl Builder { + pub fn new(engine: Arc>, sm: Arc>) -> Builder { + let max_entries = calc_max_entries::(); + let max_internal_entries = calc_max_entries::(); + + Builder { + w: WriteBatcher::new(engine, sm, 256), + entries: Entries::new(max_entries), + max_internal_entries, + internal_entries: Vec::new(), + root: 0, + } + } + + pub fn add_entry(&mut self, k: u64, v: V) -> Result<()> { + let action = self.entries.add_entry(k, v); + self.perform_action(action) + } + + pub fn complete(mut self) -> Result { + let action = self.entries.complete(); + self.perform_action(action)?; + self.w.flush()?; + Ok(self.root) + } + + //-------------------- + + fn add_internal_entry(&mut self, level: usize, k: u64, v: u64) -> Result<()> { + if self.internal_entries.len() == level { + self.internal_entries.push(Entries::new(self.max_internal_entries)); + } + + let action = self.internal_entries[level].add_entry(k, v); + self.perform_internal_action(level, action) + } + + fn perform_internal_action(&mut self, level: usize, action: Action) -> Result<()> { + match action { + Action::Noop => {} + Action::WriteSingle { keys, values } => { + let (k, loc) = write_internal(&mut self.w, keys, values)?; + self.add_internal_entry(level + 1, k, loc)?; + self.root = loc; + } + Action::WritePair { + keys1, + values1, + keys2, + values2, + } => { + let (k, loc) = write_leaf(&mut self.w, keys1, values1)?; + self.add_internal_entry(level + 1, k, loc)?; + + let (k, loc) = write_leaf(&mut self.w, keys2, values2)?; + self.add_internal_entry(level + 1, k, loc)?; + } + } + + Ok(()) + } + + fn perform_action(&mut self, action: Action) -> Result<()> { + match action { + Action::Noop => {} + Action::WriteSingle { keys, values } => { + let (k, loc) = write_leaf(&mut self.w, keys, values)?; + self.add_internal_entry(0, k, loc)?; + } + Action::WritePair { + keys1, + values1, + keys2, + values2, + } => { + let (k, loc) = write_leaf(&mut self.w, keys1, values1)?; + self.add_internal_entry(0, k, loc)?; + + let (k, loc) = write_leaf(&mut self.w, keys2, values2)?; + self.add_internal_entry(0, k, loc)?; + } + } + + Ok(()) + } +} + +//------------------------------------------ + +#[test] +fn fail() { + assert!(false); +} + +//------------------------------------------ diff --git a/src/pdata/mod.rs b/src/pdata/mod.rs index 6e473e2..e691b21 100644 --- a/src/pdata/mod.rs +++ b/src/pdata/mod.rs @@ -1,4 +1,5 @@ pub mod btree; +pub mod btree_builder; pub mod space_map; pub mod unpack; diff --git a/src/pdata/space_map.rs b/src/pdata/space_map.rs index fa0af19..34daa15 100644 --- a/src/pdata/space_map.rs +++ b/src/pdata/space_map.rs @@ -1,8 +1,8 @@ use anyhow::{anyhow, Result}; +use byteorder::{LittleEndian, WriteBytesExt}; use fixedbitset::FixedBitSet; use nom::{multi::count, number::complete::*, IResult}; use std::sync::{Arc, Mutex}; -use byteorder::{LittleEndian, WriteBytesExt}; use crate::io_engine::*; use crate::pdata::unpack::{Pack, Unpack}; @@ -96,7 +96,7 @@ impl Unpack for MetadataIndex { let (i, _blocknr) = le_u64(i)?; let (i, indexes) = count(IndexEntry::unpack, MAX_METADATA_BITMAPS)(i)?; - Ok((i, MetadataIndex {indexes})) + Ok((i, MetadataIndex { indexes })) } } @@ -195,17 +195,16 @@ impl Pack for Bitmap { for e in chunk { w >>= 2; match e { - Small(0) => { - }, + Small(0) => {} Small(1) => { w |= 0x2 << 62; - }, + } Small(2) => { w |= 0x1 << 62; - }, + } Small(_) => { return Err(anyhow!("Bad small value in bitmap entry")); - }, + } Overflow => { w |= 0x3 << 62; } @@ -228,8 +227,12 @@ pub trait SpaceMap { // Returns the old ref count fn set(&mut self, b: u64, v: u32) -> Result; - + fn inc(&mut self, begin: u64, len: u64) -> Result<()>; + + // Finds a block with a zero reference count. Increments the + // count. + fn alloc(&mut self) -> Result>; } pub type ASpaceMap = Arc>; @@ -238,6 +241,7 @@ pub type ASpaceMap = Arc>; pub struct CoreSpaceMap { nr_allocated: u64, + first_free: u64, counts: Vec, } @@ -248,6 +252,7 @@ where pub fn new(nr_entries: u64) -> CoreSpaceMap { CoreSpaceMap { nr_allocated: 0, + first_free: 0, counts: vec![V::default(); nr_entries as usize], } } @@ -271,13 +276,16 @@ where fn set(&mut self, b: u64, v: u32) -> Result { let old = self.counts[b as usize]; - assert!(v < 0xff); // FIXME: we can't assume this + assert!(v < 0xff); // FIXME: we can't assume this self.counts[b as usize] = V::from(v as u8); if old == V::from(0u8) && v != 0 { self.nr_allocated += 1; } else if old != V::from(0u8) && v == 0 { self.nr_allocated -= 1; + if b < self.first_free { + self.first_free = b; + } } Ok(old.into()) @@ -295,6 +303,19 @@ where } Ok(()) } + + fn alloc(&mut self) -> Result> { + for b in self.first_free..(self.counts.len() as u64) { + if self.counts[b as usize] == V::from(0u8) { + self.counts[b as usize] = V::from(1u8); + self.first_free = b + 1; + return Ok(Some(b)); + } + } + + self.first_free = self.counts.len() as u64; + Ok(None) + } } pub fn core_sm(nr_entries: u64, max_count: u32) -> Arc> { @@ -314,6 +335,7 @@ pub fn core_sm(nr_entries: u64, max_count: u32) -> Arc Result<()> { @@ -370,6 +396,19 @@ impl SpaceMap for RestrictedSpaceMap { } Ok(()) } + + fn alloc(&mut self) -> Result> { + for b in self.first_free..self.counts.len() { + if !self.counts.contains(b) { + self.counts.insert(b); + self.first_free = b + 1; + return Ok(Some(b as u64)); + } + } + + self.first_free = self.counts.len(); + Ok(None) + } } //------------------------------------------ diff --git a/src/thin/device_detail.rs b/src/thin/device_detail.rs index 68fc21a..b375d88 100644 --- a/src/thin/device_detail.rs +++ b/src/thin/device_detail.rs @@ -19,7 +19,7 @@ impl fmt::Display for DeviceDetail { self.mapped_blocks, self.transaction_id, self.creation_time, - self.snapshotted_time); + self.snapshotted_time)?; Ok(()) } } diff --git a/src/thin/mod.rs b/src/thin/mod.rs index 3f2653a..b333e81 100644 --- a/src/thin/mod.rs +++ b/src/thin/mod.rs @@ -3,4 +3,5 @@ pub mod device_detail; pub mod superblock; pub mod check; pub mod dump; +pub mod restore; pub mod xml; diff --git a/src/thin/restore.rs b/src/thin/restore.rs new file mode 100644 index 0000000..3761407 --- /dev/null +++ b/src/thin/restore.rs @@ -0,0 +1,31 @@ +use anyhow::Result; +use std::collections::{BTreeMap, BTreeSet}; +use std::path::Path; +use std::sync::{Arc, Mutex}; + +use crate::io_engine::{AsyncIoEngine, IoEngine, SyncIoEngine}; +use crate::pdata::btree::{self, *}; +use crate::pdata::space_map::*; +use crate::pdata::unpack::*; +use crate::report::*; +use crate::thin::block_time::*; +use crate::thin::device_detail::*; +use crate::thin::superblock::*; +use crate::thin::xml::{self, MetadataVisitor}; + +//------------------------------------------ + +pub struct ThinRestoreOptions<'a> { + pub input: &'a Path, + pub output: &'a Path, + pub async_io: bool, + pub report: Arc, +} + +//------------------------------------------ + +pub fn restore(opts: ThinRestoreOptions) -> Result<()> { + todo!(); +} + +//------------------------------------------