diff --git a/Cargo.lock b/Cargo.lock index 610c998..4349d0f 100644 --- a/Cargo.lock +++ b/Cargo.lock @@ -49,9 +49,9 @@ dependencies = [ [[package]] name = "autocfg" -version = "1.0.0" +version = "1.0.1" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "f8aac770f1885fd7e387acedd76065302551364496e46b3dd00860b2f8359b9d" +checksum = "cdb031dd78e28731d87d56cc8ffef4a8f36ca26c38fe2de700543e627f8a464a" [[package]] name = "base64" @@ -72,10 +72,16 @@ source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "08c48aae112d48ed9f069b33538ea9e3e90aa263cfa3d1c24309612b1f7472de" [[package]] -name = "cc" -version = "1.0.58" +name = "cassowary" +version = "0.3.0" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "f9a06fb2e53271d7c279ec1efea6ab691c35a2ae67ec0d91d7acec0caf13b518" +checksum = "df8670b8c7b9dae1793364eafadf7239c40d669904660c5960d74cfd80b46a53" + +[[package]] +name = "cc" +version = "1.0.60" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "ef611cc68ff783f18535d77ddd080185275713d852c4f5cbb6122c462a7a825c" [[package]] name = "cfg-if" @@ -85,9 +91,9 @@ checksum = "4785bdd1c96b2a846b2bd7cc02e86b6b3dbf14e7e53446c4f54c92a361040822" [[package]] name = "clap" -version = "2.33.2" +version = "2.33.3" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "10040cdf04294b565d9e0319955430099ec3813a64c952b86a41200ad714ae48" +checksum = "37e58ac78573c40708d45522f0d80fa2f01cc4f9b4e2bf749807255454312002" dependencies = [ "ansi_term", "atty", @@ -130,6 +136,12 @@ dependencies = [ "cfg-if", ] +[[package]] +name = "data-encoding" +version = "2.3.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "d4d0e2d24e5ee3b23a01de38eefdcd978907890701f08ffffd4cb457ca4ee8d6" + [[package]] name = "duct" version = "0.13.4" @@ -160,15 +172,15 @@ dependencies = [ [[package]] name = "fixedbitset" -version = "0.3.0" +version = "0.3.1" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "2fc4fcacf5cd3681968f6524ea159383132937739c6c40dabab9e37ed515911b" +checksum = "4e08c8bc7575d7e091fe0706963bd22e2a4be6a64da995f03b2a5a57d66ad015" [[package]] name = "flate2" -version = "1.0.16" +version = "1.0.17" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "68c90b0fc46cf89d227cc78b40e494ff81287a92dd07631e5af0d06fe3cf885e" +checksum = "766d0e77a2c1502169d4a93ff3b8c15a71fd946cd0126309752104e5f3c46d94" dependencies = [ "cfg-if", "crc32fast", @@ -273,9 +285,9 @@ dependencies = [ [[package]] name = "getrandom" -version = "0.1.14" +version = "0.1.15" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "7abc8dd8451921606d809ba32e95b6111925cd2906060d2dcc29c070220503eb" +checksum = "fc587bc0ec293155d5bfa6b9891ec18a1e330c234f896ea47fbada4cadbe47e6" dependencies = [ "cfg-if", "libc", @@ -340,9 +352,9 @@ dependencies = [ [[package]] name = "libc" -version = "0.2.74" +version = "0.2.77" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "a2f02823cf78b754822df5f7f268fb59822e7296276d3e069d8e8cb26a14bd10" +checksum = "f2f96b10ec2560088a8e76961b00d47107b3a625fecb76dedb29ee7ccbf98235" [[package]] name = "log" @@ -361,11 +373,12 @@ checksum = "3728d817d99e5ac407411fa471ff9800a778d88a24685968b36824eaf4bee400" [[package]] name = "miniz_oxide" -version = "0.4.0" +version = "0.4.2" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "be0f75932c1f6cfae3c04000e40114adf955636e19040f9c0a2c380702aa1c7f" +checksum = "c60c0dfe32c10b43a144bad8fc83538c52f58302c92300ea7ec7bf7b38d5a7b9" dependencies = [ "adler", + "autocfg", ] [[package]] @@ -383,9 +396,7 @@ dependencies = [ [[package]] name = "nom" -version = "5.1.2" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "ffb4262d26ed83a1c0a33a38fe2bb15797329c85770da05e6b828ddb782627af" +version = "6.0.0-alpha1" dependencies = [ "lexical-core", "memchr", @@ -394,9 +405,9 @@ dependencies = [ [[package]] name = "num-derive" -version = "0.3.1" +version = "0.3.2" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "e0396233fb2d5b0ae3f05ff6aba9a09185f7f6e70f87fb01147d545f85364665" +checksum = "6f09b9841adb6b5e1f89ef7087ea636e0fd94b2851f887c1e3eb5d5f8228fab3" dependencies = [ "proc-macro2", "quote", @@ -429,10 +440,16 @@ source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "17b02fc0ff9a9e4b35b3342880f48e896ebf69f2967921fe8646bf5b7125956a" [[package]] -name = "once_cell" -version = "1.4.0" +name = "numtoa" +version = "0.1.0" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "0b631f7e854af39a1739f401cf34a8a013dfe09eac4fa4dba91e9768bd28168d" +checksum = "b8f8bdf33df195859076e54ab11ee78a1b208382d3a26ec40d142ffc1ecc49ef" + +[[package]] +name = "once_cell" +version = "1.4.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "260e51e7efe62b592207e9e13a68e43692a7a279171d6ba57abd208bf23645ad" [[package]] name = "os_pipe" @@ -472,9 +489,9 @@ checksum = "8b870d8c151b6f2fb93e84a13146138f05d02ed11c7e7c54f8826aaaf7c9f184" [[package]] name = "ppv-lite86" -version = "0.2.8" +version = "0.2.9" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "237a5ed80e274dbc66f86bd59c1e25edc039660be53194b5fe0a482e0f2612ea" +checksum = "c36fa947111f5c62a733b652544dd0016a43ce89619538a8ef92724a6f501a20" [[package]] name = "proc-macro-hack" @@ -490,9 +507,9 @@ checksum = "eba180dafb9038b050a4c280019bbedf9f2467b61e5d892dcad585bb57aadc5a" [[package]] name = "proc-macro2" -version = "1.0.19" +version = "1.0.21" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "04f5f085b5d71e2188cb8271e5da0161ad52c3f227a661a3c135fdf28e258b12" +checksum = "36e28516df94f3dd551a587da5357459d9b36d945a7c37c3557928c1c2ff2a2c" dependencies = [ "unicode-xid", ] @@ -585,6 +602,15 @@ version = "0.1.57" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "41cc0f7e4d5d4544e8861606a285bb08d3e70712ccc7d2b84d7c0ccfaf4b05ce" +[[package]] +name = "redox_termios" +version = "0.1.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "7e891cfe48e9100a70a3b6eb652fef28920c117d366339687bd5576160db0f76" +dependencies = [ + "redox_syscall", +] + [[package]] name = "regex" version = "1.3.9" @@ -648,9 +674,9 @@ checksum = "8ea5119cdb4c55b55d432abb513a0429384878c15dde60cc77b1c99de1a95a6a" [[package]] name = "syn" -version = "1.0.38" +version = "1.0.41" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "e69abc24912995b3038597a7a593be5053eb0fb44f3cc5beec0deb421790c1f4" +checksum = "6690e3e9f692504b941dc6c3b188fd28df054f7fb8469ab40680df52fdcc842b" dependencies = [ "proc-macro2", "quote", @@ -681,6 +707,18 @@ dependencies = [ "winapi", ] +[[package]] +name = "termion" +version = "1.5.5" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "c22cec9d8978d906be5ac94bceb5a010d885c626c4c8855721a4dbd20e3ac905" +dependencies = [ + "libc", + "numtoa", + "redox_syscall", + "redox_termios", +] + [[package]] name = "termios" version = "0.3.2" @@ -709,6 +747,7 @@ dependencies = [ "byteorder", "clap", "crc32c", + "data-encoding", "duct", "fixedbitset", "flate2", @@ -727,8 +766,10 @@ dependencies = [ "quickcheck_macros", "rand", "tempfile", + "termion", "thiserror", "threadpool", + "tui", ] [[package]] @@ -769,6 +810,25 @@ dependencies = [ "num_cpus", ] +[[package]] +name = "tui" +version = "0.10.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "a977b0bb2e2033a6fef950f218f13622c3c34e59754b704ce3492dedab1dfe95" +dependencies = [ + "bitflags", + "cassowary", + "termion", + "unicode-segmentation", + "unicode-width", +] + +[[package]] +name = "unicode-segmentation" +version = "1.6.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "e83e153d1053cbb5a118eeff7fd5be06ed99153f00dbcd8ae310c5fb2b22edc0" + [[package]] name = "unicode-width" version = "0.1.8" diff --git a/Cargo.toml b/Cargo.toml index d58ec14..74836d5 100644 --- a/Cargo.toml +++ b/Cargo.toml @@ -12,6 +12,7 @@ base64 = "0.12" byteorder = "1.3" clap = "2.33" crc32c = "0.4" +data-encoding = "2.3" duct = "0.13" fixedbitset = "0.3" futures = "0.3" @@ -20,7 +21,7 @@ io-uring = "0.3" indicatif = "0.15" libc = "0.2.71" nix = "0.17" -nom = "5.1" +nom = { path = "/home/ejt/builds/nom/" } num_cpus = "1.13" num-derive = "0.3" num-traits = "0.2" @@ -29,8 +30,13 @@ rand = "0.7" tempfile = "3.1" threadpool = "1.8" thiserror = "1.0" +tui = "0.10" +termion = "1.5" [dev-dependencies] json = "0.12" quickcheck = "0.9" quickcheck_macros = "0.9" + +[profile.release] +debug = true diff --git a/src/bin/thin_check.rs b/src/bin/thin_check.rs index 59e9696..09759dc 100644 --- a/src/bin/thin_check.rs +++ b/src/bin/thin_check.rs @@ -26,8 +26,15 @@ fn main() { .help("Only check the superblock.") .long("super-block-only") .value_name("SB_ONLY"), - ) .arg( - Arg::with_name("AUTO_REPAIR") + ) + .arg( + Arg::with_name("SKIP_MAPPINGS") + .help("Don't check the mapping tree") + .long("skip-mappings") + .value_name("SKIP_MAPPINGS"), + ) + .arg( + Arg::with_name("AUTO_REPAIR") .help("Auto repair trivial issues.") .long("auto-repair"), ) @@ -88,6 +95,8 @@ fn main() { let opts = ThinCheckOptions { dev: &input_file, async_io: !matches.is_present("SYNC_IO"), + sb_only: matches.is_present("SB_ONLY"), + skip_mappings: matches.is_present("SKIP_MAPPINGS"), ignore_non_fatal: matches.is_present("IGNORE_NON_FATAL"), auto_repair: matches.is_present("AUTO_REPAIR"), report, diff --git a/src/bin/thin_dump.rs b/src/bin/thin_dump.rs new file mode 100644 index 0000000..3954249 --- /dev/null +++ b/src/bin/thin_dump.rs @@ -0,0 +1,105 @@ +extern crate clap; +extern crate thinp; + +use atty::Stream; +use clap::{App, Arg}; +use std::path::Path; +use std::process; +use std::process::exit; +use std::sync::Arc; +use thinp::file_utils; +use thinp::report::*; +use thinp::thin::dump::{dump, ThinDumpOptions}; + +fn main() { + let parser = App::new("thin_check") + .version(thinp::version::TOOLS_VERSION) + .about("Validates thin provisioning metadata on a device or file.") + .arg( + Arg::with_name("QUIET") + .help("Suppress output messages, return only exit code.") + .short("q") + .long("quiet"), + ) + .arg( + Arg::with_name("SB_ONLY") + .help("Only check the superblock.") + .long("super-block-only") + .value_name("SB_ONLY"), + ) + .arg( + Arg::with_name("SKIP_MAPPINGS") + .help("Don't check the mapping tree") + .long("skip-mappings") + .value_name("SKIP_MAPPINGS"), + ) + .arg( + Arg::with_name("AUTO_REPAIR") + .help("Auto repair trivial issues.") + .long("auto-repair"), + ) + .arg( + Arg::with_name("IGNORE_NON_FATAL") + .help("Only return a non-zero exit code if a fatal error is found.") + .long("ignore-non-fatal-errors"), + ) + .arg( + Arg::with_name("CLEAR_NEEDS_CHECK") + .help("Clears the 'needs_check' flag in the superblock") + .long("clear-needs-check"), + ) + .arg( + Arg::with_name("OVERRIDE_MAPPING_ROOT") + .help("Specify a mapping root to use") + .long("override-mapping-root") + .value_name("OVERRIDE_MAPPING_ROOT") + .takes_value(true), + ) + .arg( + Arg::with_name("METADATA_SNAPSHOT") + .help("Check the metadata snapshot on a live pool") + .short("m") + .long("metadata-snapshot") + .value_name("METADATA_SNAPSHOT"), + ) + .arg( + Arg::with_name("INPUT") + .help("Specify the input device to check") + .required(true) + .index(1), + ) + .arg( + Arg::with_name("SYNC_IO") + .help("Force use of synchronous io") + .long("sync-io"), + ); + + let matches = parser.get_matches(); + let input_file = Path::new(matches.value_of("INPUT").unwrap()); + + if !file_utils::file_exists(input_file) { + eprintln!("Couldn't find input file '{:?}'.", &input_file); + exit(1); + } + + let report; + + if matches.is_present("QUIET") { + report = std::sync::Arc::new(mk_quiet_report()); + } else if atty::is(Stream::Stdout) { + report = std::sync::Arc::new(mk_progress_bar_report()); + } else { + report = Arc::new(mk_simple_report()); + } + + let opts = ThinDumpOptions { + dev: &input_file, + async_io: !matches.is_present("SYNC_IO"), + report, + }; + + if let Err(reason) = dump(opts) { + println!("{}", reason); + process::exit(1); + } +} diff --git a/src/bin/thin_explore.rs b/src/bin/thin_explore.rs new file mode 100644 index 0000000..efcb1e6 --- /dev/null +++ b/src/bin/thin_explore.rs @@ -0,0 +1,746 @@ +extern crate clap; + +use anyhow::{anyhow, Result}; +use clap::{App, Arg}; +use std::fmt; +use std::io::{self, Write}; +use std::path::Path; +use std::sync::mpsc; +use std::sync::{ + atomic::{AtomicBool, Ordering}, + Arc, +}; +use std::thread; +use std::time::Duration; + +use termion::event::Key; +use termion::input::TermRead; +use termion::raw::IntoRawMode; + +use tui::{ + backend::{TermionBackend}, + buffer::Buffer, + layout::{Constraint, Direction, Layout, Rect}, + style::{Color, Modifier, Style}, + terminal::Frame, + text::{Span}, + widgets::{Block, Borders, List, ListItem, ListState, Row, StatefulWidget, Table, Widget}, + Terminal, +}; + +use thinp::io_engine::*; +use thinp::pdata::btree; +use thinp::pdata::unpack::*; +use thinp::thin::block_time::*; +use thinp::thin::superblock::*; + +//------------------------------------ + +pub enum Event { + Input(I), + Tick, +} + +pub struct Events { + rx: mpsc::Receiver>, + input_handle: thread::JoinHandle<()>, + ignore_exit_key: Arc, +} + +#[derive(Debug, Clone, Copy)] +pub struct Config { + pub exit_key: Key, + pub tick_rate: Duration, +} + +impl Default for Config { + fn default() -> Config { + Config { + exit_key: Key::Char('q'), + tick_rate: Duration::from_millis(250), + } + } +} + +impl Events { + pub fn new() -> Events { + Events::with_config(Config::default()) + } + + pub fn with_config(config: Config) -> Events { + let (tx, rx) = mpsc::channel(); + let ignore_exit_key = Arc::new(AtomicBool::new(false)); + let input_handle = { + let tx = tx.clone(); + let ignore_exit_key = ignore_exit_key.clone(); + thread::spawn(move || { + let stdin = io::stdin(); + for evt in stdin.keys() { + if let Ok(key) = evt { + if let Err(err) = tx.send(Event::Input(key)) { + eprintln!("{}", err); + return; + } + if !ignore_exit_key.load(Ordering::Relaxed) && key == config.exit_key { + return; + } + } + } + }) + }; + + Events { + rx, + ignore_exit_key, + input_handle, + } + } + + pub fn next(&self) -> Result, mpsc::RecvError> { + self.rx.recv() + } + + pub fn disable_exit_key(&mut self) { + self.ignore_exit_key.store(true, Ordering::Relaxed); + } + + pub fn enable_exit_key(&mut self) { + self.ignore_exit_key.store(false, Ordering::Relaxed); + } +} + +//------------------------------------ + +fn ls_next(ls: &mut ListState, max: usize) { + let i = match ls.selected() { + Some(i) => { + if i >= max - 1 { + max - 1 + } else { + i + 1 + } + } + None => 0, + }; + ls.select(Some(i)); +} + +fn ls_previous(ls: &mut ListState) { + let i = match ls.selected() { + Some(i) => { + if i == 0 { + 0 + } else { + i - 1 + } + } + None => 0, + }; + ls.select(Some(i)); +} + +//------------------------------------ + +struct SBWidget { + sb: Superblock, +} + +impl Widget for SBWidget { + fn render(self, area: Rect, buf: &mut Buffer) { + let sb = &self.sb; + let flags = ["flags".to_string(), format!("{}", sb.flags)]; + let block = ["block".to_string(), format!("{}", sb.block)]; + let uuid = ["uuid".to_string(), format!("-")]; + let version = ["version".to_string(), format!("{}", sb.version)]; + let time = ["time".to_string(), format!("{}", sb.time)]; + let transaction_id = [ + "transaction_id".to_string(), + format!("{}", sb.transaction_id), + ]; + let metadata_snap = [ + "metadata_snap".to_string(), + if sb.metadata_snap == 0 { + "-".to_string() + } else { + format!("{}", sb.metadata_snap) + }, + ]; + let mapping_root = ["mapping root".to_string(), format!("{}", sb.mapping_root)]; + let details_root = ["details root".to_string(), format!("{}", sb.details_root)]; + let data_block_size = [ + "data block size".to_string(), + format!("{}k", sb.data_block_size * 2), + ]; + + let table = Table::new( + ["Field", "Value"].iter(), + vec![ + Row::Data(flags.iter()), + Row::Data(block.iter()), + Row::Data(uuid.iter()), + Row::Data(version.iter()), + Row::Data(time.iter()), + Row::Data(transaction_id.iter()), + Row::Data(metadata_snap.iter()), + Row::Data(mapping_root.iter()), + Row::Data(details_root.iter()), + Row::Data(data_block_size.iter()), + ] + .into_iter(), + ) + .block( + Block::default() + .borders(Borders::ALL) + .title("Superblock".to_string()), + ) + .header_style(Style::default().fg(Color::Yellow)) + .widths(&[Constraint::Length(20), Constraint::Length(60)]) + .style(Style::default().fg(Color::White)) + .column_spacing(1); + + Widget::render(table, area, buf); + } +} + +//------------------------------------ + +struct HeaderWidget<'a> { + title: String, + hdr: &'a btree::NodeHeader, +} + +impl<'a> Widget for HeaderWidget<'a> { + fn render(self, area: Rect, buf: &mut Buffer) { + let hdr = &self.hdr; + let block = ["block".to_string(), format!("{}", hdr.block)]; + let kind = [ + "type".to_string(), + match hdr.is_leaf { + true => "LEAF".to_string(), + false => "INTERNAL".to_string(), + }, + ]; + let nr_entries = ["nr_entries".to_string(), format!("{}", hdr.nr_entries)]; + let max_entries = ["max_entries".to_string(), format!("{}", hdr.max_entries)]; + let value_size = ["value size".to_string(), format!("{}", hdr.value_size)]; + + let table = Table::new( + ["Field", "Value"].iter(), + vec![ + Row::Data(block.iter()), + Row::Data(kind.iter()), + Row::Data(nr_entries.iter()), + Row::Data(max_entries.iter()), + Row::Data(value_size.iter()), + ] + .into_iter(), + ) + .block(Block::default().borders(Borders::ALL).title(self.title)) + .header_style(Style::default().fg(Color::Yellow)) + .widths(&[Constraint::Length(20), Constraint::Length(60)]) + .style(Style::default().fg(Color::White)) + .column_spacing(1); + + Widget::render(table, area, buf); + } +} + +/* +fn read_node_header(engine: &dyn IoEngine, loc: u64) -> Result { + let b = engine.read(loc)?; + unpack(&b.get_data()).map_err(|_| anyhow!("couldn't unpack btree header")) +} +*/ + +fn read_node(engine: &dyn IoEngine, loc: u64) -> Result> { + let b = engine.read(loc)?; + let path = Vec::new(); + btree::unpack_node(&path, &b.get_data(), true, false) + .map_err(|_| anyhow!("couldn't unpack btree node")) +} + +//------------------------------------ + +// For types that have the concept of adjacency, but not of a distance +// between values. For instance with a BlockTime there is no delta that +// will get between two values with different times. +trait Adjacent { + fn adjacent(&self, rhs: &Self) -> bool; +} + +impl Adjacent for u64 { + fn adjacent(&self, rhs: &Self) -> bool { + (*self + 1) == *rhs + } +} + +impl Adjacent for BlockTime { + fn adjacent(&self, rhs: &Self) -> bool { + if self.time != rhs.time { + return false; + } + + self.block + 1 == rhs.block + } +} + +impl Adjacent for (X, Y) { + fn adjacent(&self, rhs: &Self) -> bool { + self.0.adjacent(&rhs.0) && self.1.adjacent(&rhs.1) + } +} + +fn adjacent_runs(mut ns: Vec) -> Vec<(V, usize)> { + let mut result = Vec::new(); + + if ns.len() == 0 { + return result; + } + + // Reverse so we can pop without cloning the value. + ns.reverse(); + + let mut base = ns.pop().unwrap(); + let mut current = base; + let mut len = 1; + while let Some(v) = ns.pop() { + if current.adjacent(&v) { + current = v; + len += 1; + } else { + result.push((base.clone(), len)); + base = v.clone(); + current = v.clone(); + len = 1; + } + } + result.push((base.clone(), len)); + + result +} + +fn mk_runs(keys: &[u64], values: &[V]) -> Vec<((u64, V), usize)> { + let mut pairs = Vec::new(); + for (k, v) in keys.iter().zip(values.iter()) { + pairs.push((k.clone(), v.clone())); + } + + adjacent_runs(pairs) +} + +struct NodeWidget<'a, V: Unpack + Adjacent + Clone> { + title: String, + node: &'a btree::Node, +} + +fn mk_item<'a, V: fmt::Display>(k: u64, v: &V, len: usize) -> ListItem<'a> { + if len > 1 { + ListItem::new(Span::raw(format!("{} x {} -> {}", k, len as u64, v))) + } else { + ListItem::new(Span::raw(format!("{} -> {}", k, v))) + } +} + +fn mk_items<'a, V>(keys: &[u64], values: &[V], selected: usize) -> (Vec>, usize) +where + V: Adjacent + Copy + fmt::Display, +{ + let mut items = Vec::new(); + let bkeys = &keys[0..selected]; + let key = keys[selected]; + let akeys = &keys[(selected + 1)..]; + + let bvalues = &values[0..selected]; + let value = values[selected]; + let avalues = &values[(selected + 1)..]; + + let bruns = mk_runs(bkeys, bvalues); + let aruns = mk_runs(akeys, avalues); + let i = bruns.len(); + + for ((k, v), len) in bruns { + items.push(mk_item(k, &v, len)); + } + + items.push(ListItem::new(Span::raw(format!("{} -> {}", key, value)))); + + for ((k, v), len) in aruns { + items.push(mk_item(k, &v, len)); + } + + (items, i) +} + +impl<'a, V: Unpack + fmt::Display + Adjacent + Copy> StatefulWidget for NodeWidget<'a, V> { + type State = ListState; + + fn render(self, area: Rect, buf: &mut Buffer, state: &mut ListState) { + let chunks = Layout::default() + .direction(Direction::Vertical) + .constraints([Constraint::Min(10), Constraint::Percentage(80)].as_ref()) + .split(area); + + let hdr = HeaderWidget { + title: self.title, + hdr: self.node.get_header(), + }; + hdr.render(chunks[0], buf); + + let items: Vec; + let i: usize; + let selected = state.selected().unwrap(); + let mut state = ListState::default(); + + match self.node { + btree::Node::Internal { keys, values, .. } => { + let (items_, i_) = mk_items(keys, values, selected); + items = items_; + i = i_; + } + btree::Node::Leaf { keys, values, .. } => { + let (items_, i_) = mk_items(keys, values, selected); + items = items_; + i = i_; + } + } + state.select(Some(i)); + + let items = List::new(items) + .block(Block::default().borders(Borders::ALL).title("Entries")) + .highlight_style( + Style::default() + .bg(Color::LightGreen) + .add_modifier(Modifier::BOLD), + ); + + StatefulWidget::render(items, chunks[1], buf, &mut state); + } +} + +//------------------------------------ + +enum Action { + PushTopLevel(u64), + PushBottomLevel(u32, u64), + PopPanel, +} + +use Action::*; + +type Frame_<'a, 'b> = Frame<'a, TermionBackend>>>; + +trait Panel { + fn render(&mut self, area: Rect, f: &mut Frame_); + fn input(&mut self, k: Key) -> Option; + fn path_action(&mut self, child: u64) -> Option; +} + +struct SBPanel { + sb: Superblock, +} + +impl Panel for SBPanel { + fn render(&mut self, area: Rect, f: &mut Frame_) { + // FIXME: get rid of clone + let w = SBWidget { + sb: self.sb.clone(), + }; + f.render_widget(w, area); + } + + fn input(&mut self, _k: Key) -> Option { + None + } + + fn path_action(&mut self, child: u64) -> Option { + if child == self.sb.mapping_root { + Some(PushTopLevel(child)) + } else { + None + } + } +} + +struct TopLevelPanel { + node: btree::Node, + nr_entries: usize, + state: ListState, +} + +impl TopLevelPanel { + fn new(node: btree::Node) -> TopLevelPanel { + let nr_entries = node.get_header().nr_entries as usize; + let mut state = ListState::default(); + state.select(Some(0)); + + TopLevelPanel { + node, + nr_entries, + state, + } + } +} + +impl Panel for TopLevelPanel { + fn render(&mut self, area: Rect, f: &mut Frame_) { + let w = NodeWidget { + title: "Top Level".to_string(), + node: &self.node, // FIXME: get rid of clone + }; + + f.render_stateful_widget(w, area, &mut self.state); + } + + fn input(&mut self, k: Key) -> Option { + match k { + Key::Char('j') | Key::Down => { + ls_next(&mut self.state, self.nr_entries); + None + } + Key::Char('k') | Key::Up => { + ls_previous(&mut self.state); + None + } + Key::Char('l') | Key::Right => match &self.node { + btree::Node::Internal { values, .. } => { + Some(PushTopLevel(values[self.state.selected().unwrap()])) + } + btree::Node::Leaf { values, keys, .. } => { + let index = self.state.selected().unwrap(); + + Some(PushBottomLevel(keys[index] as u32, values[index])) + } + }, + Key::Char('h') | Key::Left => Some(PopPanel), + _ => None, + } + } + + fn path_action(&mut self, child: u64) -> Option { + match &self.node { + btree::Node::Internal { values, .. } => { + for i in 0..values.len() { + if values[i] == child { + self.state.select(Some(i)); + return Some(PushTopLevel(child)); + } + } + + return None; + } + btree::Node::Leaf { keys, values, .. } => { + for i in 0..values.len() { + if values[i] == child { + self.state.select(Some(i)); + return Some(PushBottomLevel(keys[i] as u32, child)); + } + } + + return None; + } + } + } +} + +struct BottomLevelPanel { + thin_id: u32, + node: btree::Node, + nr_entries: usize, + state: ListState, +} + +impl BottomLevelPanel { + fn new(thin_id: u32, node: btree::Node) -> BottomLevelPanel { + let nr_entries = node.get_header().nr_entries as usize; + let mut state = ListState::default(); + state.select(Some(0)); + + BottomLevelPanel { + thin_id, + node, + nr_entries, + state, + } + } +} + +impl Panel for BottomLevelPanel { + fn render(&mut self, area: Rect, f: &mut Frame_) { + let w = NodeWidget { + title: format!("Thin dev #{}", self.thin_id), + node: &self.node, + }; + + f.render_stateful_widget(w, area, &mut self.state); + } + + fn input(&mut self, k: Key) -> Option { + match k { + Key::Char('j') | Key::Down => { + ls_next(&mut self.state, self.nr_entries); + None + } + Key::Char('k') | Key::Up => { + ls_previous(&mut self.state); + None + } + Key::Char('l') | Key::Right => match &self.node { + btree::Node::Internal { values, .. } => Some(PushBottomLevel( + self.thin_id, + values[self.state.selected().unwrap()], + )), + _ => None, + }, + + Key::Char('h') | Key::Left => Some(PopPanel), + _ => None, + } + } + + fn path_action(&mut self, child: u64) -> Option { + match &self.node { + btree::Node::Internal { values, .. } => { + for i in 0..values.len() { + if values[i] == child { + self.state.select(Some(i)); + return Some(PushBottomLevel(self.thin_id, child)); + } + } + + return None; + } + btree::Node::Leaf { .. } => None, + } + } +} + +//------------------------------------ + +fn perform_action( + panels: &mut Vec>, + engine: &dyn IoEngine, + action: Action, +) -> Result<()> { + match action { + PushTopLevel(b) => { + let node = read_node::(engine, b)?; + panels.push(Box::new(TopLevelPanel::new(node))); + } + PushBottomLevel(thin_id, b) => { + let node = read_node::(engine, b)?; + panels.push(Box::new(BottomLevelPanel::new(thin_id, node))); + } + PopPanel => { + if panels.len() > 2 { + panels.pop(); + } + } + }; + Ok(()) +} + +fn explore(path: &Path, node_path: Option>) -> Result<()> { + let stdout = io::stdout(); + let mut stdout = stdout.lock().into_raw_mode()?; + write!(stdout, "{}", termion::clear::All)?; + + let backend = TermionBackend::new(stdout); + let mut terminal = Terminal::new(backend)?; + + let engine = SyncIoEngine::new(&path, 1, false)?; + + let mut panels: Vec> = Vec::new(); + + if let Some(path) = node_path { + assert_eq!(path[0], 0); + let sb = read_superblock(&engine, path[0])?; + panels.push(Box::new(SBPanel { sb })); + for b in &path[1..] { + let action = panels.last_mut().unwrap().path_action(*b); + if let Some(action) = action { + perform_action(&mut panels, &engine, action)?; + } else { + return Err(anyhow!("bad node path: couldn't find child node {}", b)); + } + } + } else { + let sb = read_superblock(&engine, 0)?; + panels.push(Box::new(SBPanel { sb: sb.clone() })); + + let node = read_node::(&engine, sb.mapping_root)?; + panels.push(Box::new(TopLevelPanel::new(node))); + } + + let events = Events::new(); + + 'main: loop { + let render_panels = |f: &mut Frame_| { + let chunks = Layout::default() + .direction(Direction::Horizontal) + .constraints([Constraint::Percentage(50), Constraint::Percentage(50)].as_ref()) + .split(f.size()); + + let mut base = panels.len(); + if base >= 2 { + base -= 2; + } else { + base = 0; + } + + for i in base..panels.len() { + panels[i].render(chunks[i - base], f); + } + }; + + terminal.draw(render_panels)?; + + let last = panels.len() - 1; + let active_panel = &mut panels[last]; + if let Event::Input(key) = events.next()? { + match key { + Key::Char('q') => break 'main, + _ => match active_panel.input(key) { + Some(action) => { + perform_action(&mut panels, &engine, action)?; + } + _ => {} + }, + } + } + } + + events.input_handle.join().unwrap(); + + Ok(()) +} + +//------------------------------------ + +fn main() -> Result<()> { + let parser = App::new("thin_explore") + .version(thinp::version::TOOLS_VERSION) + .about("A text user interface for examining thin metadata.") + .arg( + Arg::with_name("NODE_PATH") + .help("Pass in a node path as output by thin_check") + .short("p") + .long("node-path") + .value_name("NODE_PATH"), + ) + .arg( + Arg::with_name("INPUT") + .help("Specify the input device to check") + .required(true) + .index(1), + ); + + let matches = parser.get_matches(); + let node_path = matches + .value_of("NODE_PATH") + .map(|text| btree::decode_node_path(text).unwrap()); + let input_file = Path::new(matches.value_of("INPUT").unwrap()); + + explore(&input_file, node_path) +} + +//------------------------------------ diff --git a/src/io_engine.rs b/src/io_engine.rs index 61a65ec..c36c46c 100644 --- a/src/io_engine.rs +++ b/src/io_engine.rs @@ -1,10 +1,11 @@ -use anyhow::Result; use io_uring::opcode::{self, types}; use io_uring::IoUring; use std::alloc::{alloc, dealloc, Layout}; use std::fs::File; use std::fs::OpenOptions; +use std::io::Result; use std::io::{self, Read, Seek, Write}; +use std::ops::{Deref, DerefMut}; use std::os::unix::fs::OpenOptionsExt; use std::os::unix::io::{AsRawFd, RawFd}; use std::path::Path; @@ -15,10 +16,10 @@ use std::sync::{Arc, Condvar, Mutex}; pub const BLOCK_SIZE: usize = 4096; const ALIGN: usize = 4096; -#[derive(Debug)] +#[derive(Clone, Debug)] pub struct Block { pub loc: u64, - pub data: *mut u8, + data: *mut u8, } impl Block { @@ -49,10 +50,14 @@ unsafe impl Send for Block {} pub trait IoEngine { fn get_nr_blocks(&self) -> u64; - fn read(&self, block: &mut Block) -> Result<()>; - fn read_many(&self, blocks: &mut [Block]) -> Result<()>; + + fn read(&self, b: u64) -> Result; + // The whole io could fail, or individual blocks + fn read_many(&self, blocks: &[u64]) -> Result>>; + fn write(&self, block: &Block) -> Result<()>; - fn write_many(&self, blocks: &[Block]) -> Result<()>; + // The whole io could fail, or individual blocks + fn write_many(&self, blocks: &[Block]) -> Result>>; } fn get_nr_blocks(path: &Path) -> io::Result { @@ -68,19 +73,54 @@ pub struct SyncIoEngine { cvar: Condvar, } +struct FileGuard<'a> { + engine: &'a SyncIoEngine, + file: Option, +} + +impl<'a> FileGuard<'a> { + fn new(engine: &'a SyncIoEngine, file: File) -> FileGuard<'a> { + FileGuard { + engine, + file: Some(file), + } + } +} + +impl<'a> Deref for FileGuard<'a> { + type Target = File; + + fn deref(&self) -> &File { + &self.file.as_ref().expect("empty file guard") + } +} + +impl<'a> DerefMut for FileGuard<'a> { + fn deref_mut(&mut self) -> &mut File { + match &mut self.file { + None => { + todo!(); + } + Some(f) => f, + } + } +} + +impl<'a> Drop for FileGuard<'a> { + fn drop(&mut self) { + self.engine.put(self.file.take().expect("empty file guard")); + } +} + impl SyncIoEngine { fn open_file(path: &Path, writeable: bool) -> Result { - let file = OpenOptions::new() - .read(true) - .write(writeable) - .custom_flags(libc::O_DIRECT) - .open(path)?; + let file = OpenOptions::new().read(true).write(writeable).open(path)?; Ok(file) } pub fn new(path: &Path, nr_files: usize, writeable: bool) -> Result { - let mut files = Vec::new(); + let mut files = Vec::with_capacity(nr_files); for _n in 0..nr_files { files.push(SyncIoEngine::open_file(path, writeable)?); } @@ -92,13 +132,14 @@ impl SyncIoEngine { }) } - fn get(&self) -> File { + fn get(&self) -> FileGuard { let mut files = self.files.lock().unwrap(); while files.len() == 0 { files = self.cvar.wait(files).unwrap(); } - files.pop().unwrap() + + FileGuard::new(self, files.pop().unwrap()) } fn put(&self, f: File) { @@ -106,6 +147,19 @@ impl SyncIoEngine { files.push(f); self.cvar.notify_one(); } + + fn read_(input: &mut File, loc: u64) -> Result { + let b = Block::new(loc); + input.seek(io::SeekFrom::Start(b.loc * BLOCK_SIZE as u64))?; + input.read_exact(b.get_data())?; + Ok(b) + } + + fn write_(output: &mut File, b: &Block) -> Result<()> { + output.seek(io::SeekFrom::Start(b.loc * BLOCK_SIZE as u64))?; + output.write_all(&b.get_data())?; + Ok(()) + } } impl IoEngine for SyncIoEngine { @@ -113,44 +167,30 @@ impl IoEngine for SyncIoEngine { self.nr_blocks } - fn read(&self, b: &mut Block) -> Result<()> { - let mut input = self.get(); - input.seek(io::SeekFrom::Start(b.loc * BLOCK_SIZE as u64))?; - input.read_exact(&mut b.get_data())?; - self.put(input); - - Ok(()) + fn read(&self, loc: u64) -> Result { + SyncIoEngine::read_(&mut self.get(), loc) } - fn read_many(&self, blocks: &mut [Block]) -> Result<()> { + fn read_many(&self, blocks: &[u64]) -> Result>> { let mut input = self.get(); + let mut bs = Vec::new(); for b in blocks { - input.seek(io::SeekFrom::Start(b.loc * BLOCK_SIZE as u64))?; - input.read_exact(&mut b.get_data())?; + bs.push(SyncIoEngine::read_(&mut input, *b)); } - self.put(input); - - Ok(()) + Ok(bs) } fn write(&self, b: &Block) -> Result<()> { - let mut input = self.get(); - input.seek(io::SeekFrom::Start(b.loc * BLOCK_SIZE as u64))?; - input.write_all(&b.get_data())?; - self.put(input); - - Ok(()) + SyncIoEngine::write_(&mut self.get(), b) } - fn write_many(&self, blocks: &[Block]) -> Result<()> { - let mut input = self.get(); + fn write_many(&self, blocks: &[Block]) -> Result>> { + let mut output = self.get(); + let mut bs = Vec::new(); for b in blocks { - input.seek(io::SeekFrom::Start(b.loc * BLOCK_SIZE as u64))?; - input.write_all(&b.get_data())?; + bs.push(SyncIoEngine::write_(&mut output, b)); } - self.put(input); - - Ok(()) + Ok(bs) } } @@ -188,19 +228,21 @@ impl AsyncIoEngine { } // FIXME: refactor next two fns - fn read_many_(&self, blocks: &mut [Block]) -> Result<()> { + fn read_many_(&self, blocks: Vec) -> Result>> { + use std::io::*; + let mut inner = self.inner.lock().unwrap(); let count = blocks.len(); let fd = types::Target::Fd(inner.input.as_raw_fd()); - for b in blocks.iter_mut() { + for (i, b) in blocks.iter().enumerate() { let read_e = opcode::Read::new(fd, b.data, BLOCK_SIZE as u32) .offset(b.loc as i64 * BLOCK_SIZE as i64); unsafe { let mut queue = inner.ring.submission().available(); queue - .push(read_e.build().user_data(1)) + .push(read_e.build().user_data(i as u64)) .ok() .expect("queue is full"); } @@ -208,30 +250,52 @@ impl AsyncIoEngine { inner.ring.submit_and_wait(count)?; - let cqes = inner.ring.completion().available().collect::>(); + let mut cqes = inner.ring.completion().available().collect::>(); - // FIXME: return proper errors - assert_eq!(cqes.len(), count); - for c in &cqes { - assert_eq!(c.result(), BLOCK_SIZE as i32); + if cqes.len() != count { + return Err(Error::new( + ErrorKind::Other, + "insufficient io_uring completions", + )); } - Ok(()) + // reorder cqes + cqes.sort_by(|a, b| a.user_data().partial_cmp(&b.user_data()).unwrap()); + + let mut rs = Vec::new(); + let mut i = 0; + for b in blocks { + let c = &cqes[i]; + i += 1; + + let r = c.result(); + if r < 0 { + let error = Error::from_raw_os_error(-r); + rs.push(Err(error)); + } else if c.result() != BLOCK_SIZE as i32 { + rs.push(Err(Error::new(ErrorKind::UnexpectedEof, "short read"))); + } else { + rs.push(Ok(b)); + } + } + Ok(rs) } - fn write_many_(&self, blocks: &[Block]) -> Result<()> { + fn write_many_(&self, blocks: &[Block]) -> Result>> { + use std::io::*; + let mut inner = self.inner.lock().unwrap(); let count = blocks.len(); let fd = types::Target::Fd(inner.input.as_raw_fd()); - for b in blocks.iter() { + for (i, b) in blocks.iter().enumerate() { let write_e = opcode::Write::new(fd, b.data, BLOCK_SIZE as u32) .offset(b.loc as i64 * BLOCK_SIZE as i64); unsafe { let mut queue = inner.ring.submission().available(); queue - .push(write_e.build().user_data(1)) + .push(write_e.build().user_data(i as u64)) .ok() .expect("queue is full"); } @@ -239,15 +303,24 @@ impl AsyncIoEngine { inner.ring.submit_and_wait(count)?; - let cqes = inner.ring.completion().available().collect::>(); + let mut cqes = inner.ring.completion().available().collect::>(); - // FIXME: return proper errors - assert_eq!(cqes.len(), count); - for c in &cqes { - assert_eq!(c.result(), BLOCK_SIZE as i32); + // reorder cqes + cqes.sort_by(|a, b| a.user_data().partial_cmp(&b.user_data()).unwrap()); + + let mut rs = Vec::new(); + for c in cqes { + let r = c.result(); + if r < 0 { + let error = Error::from_raw_os_error(-r); + rs.push(Err(error)); + } else if r != BLOCK_SIZE as i32 { + rs.push(Err(Error::new(ErrorKind::UnexpectedEof, "short write"))); + } else { + rs.push(Ok(())); + } } - - Ok(()) + Ok(rs) } } @@ -273,16 +346,17 @@ impl IoEngine for AsyncIoEngine { inner.nr_blocks } - fn read(&self, b: &mut Block) -> Result<()> { + fn read(&self, b: u64) -> Result { let mut inner = self.inner.lock().unwrap(); let fd = types::Target::Fd(inner.input.as_raw_fd()); + let b = Block::new(b); let read_e = opcode::Read::new(fd, b.data, BLOCK_SIZE as u32) .offset(b.loc as i64 * BLOCK_SIZE as i64); unsafe { let mut queue = inner.ring.submission().available(); queue - .push(read_e.build().user_data(1)) + .push(read_e.build().user_data(0)) .ok() .expect("queue is full"); } @@ -291,26 +365,34 @@ impl IoEngine for AsyncIoEngine { let cqes = inner.ring.completion().available().collect::>(); - // FIXME: return proper errors - assert_eq!(cqes.len(), 1); - assert_eq!(cqes[0].user_data(), 1); - assert_eq!(cqes[0].result(), BLOCK_SIZE as i32); - - Ok(()) + let r = cqes[0].result(); + use std::io::*; + if r < 0 { + let error = Error::from_raw_os_error(-r); + Err(error) + } else if r != BLOCK_SIZE as i32 { + Err(Error::new(ErrorKind::UnexpectedEof, "short write")) + } else { + Ok(b) + } } - fn read_many(&self, blocks: &mut [Block]) -> Result<()> { + fn read_many(&self, blocks: &[u64]) -> Result>> { let inner = self.inner.lock().unwrap(); let queue_len = inner.queue_len as usize; drop(inner); - let mut done = 0; - while done != blocks.len() { - let len = usize::min(blocks.len() - done, queue_len); - self.read_many_(&mut blocks[done..(done + len)])?; - done += len; + let mut results = Vec::new(); + for cs in blocks.chunks(queue_len) { + let mut bs = Vec::new(); + for b in cs { + bs.push(Block::new(*b)); + } + + results.append(&mut self.read_many_(bs)?); } - Ok(()) + + Ok(results) } fn write(&self, b: &Block) -> Result<()> { @@ -322,7 +404,7 @@ impl IoEngine for AsyncIoEngine { unsafe { let mut queue = inner.ring.submission().available(); queue - .push(write_e.build().user_data(1)) + .push(write_e.build().user_data(0)) .ok() .expect("queue is full"); } @@ -331,26 +413,32 @@ impl IoEngine for AsyncIoEngine { let cqes = inner.ring.completion().available().collect::>(); - // FIXME: return proper errors - assert_eq!(cqes.len(), 1); - assert_eq!(cqes[0].user_data(), 1); - assert_eq!(cqes[0].result(), BLOCK_SIZE as i32); - - Ok(()) + let r = cqes[0].result(); + use std::io::*; + if r < 0 { + let error = Error::from_raw_os_error(-r); + Err(error) + } else if r != BLOCK_SIZE as i32 { + Err(Error::new(ErrorKind::UnexpectedEof, "short write")) + } else { + Ok(()) + } } - fn write_many(&self, blocks: &[Block]) -> Result<()> { + fn write_many(&self, blocks: &[Block]) -> Result>> { let inner = self.inner.lock().unwrap(); let queue_len = inner.queue_len as usize; drop(inner); + let mut results = Vec::new(); let mut done = 0; while done != blocks.len() { let len = usize::min(blocks.len() - done, queue_len); - self.write_many_(&blocks[done..(done + len)])?; + results.append(&mut self.write_many_(&blocks[done..(done + len)])?); done += len; } - Ok(()) + + Ok(results) } } diff --git a/src/pack/delta_list.rs b/src/pack/delta_list.rs index 6a8a7e1..bd14330 100644 --- a/src/pack/delta_list.rs +++ b/src/pack/delta_list.rs @@ -13,7 +13,7 @@ use Delta::*; pub fn to_delta(ns: &[u64]) -> Vec { use std::cmp::Ordering::*; - let mut ds = Vec::new(); + let mut ds = Vec::with_capacity(ns.len()); if !ns.is_empty() { let mut base = ns[0]; diff --git a/src/pack/mod.rs b/src/pack/mod.rs index e8ed451..27df5f2 100644 --- a/src/pack/mod.rs +++ b/src/pack/mod.rs @@ -1,5 +1,5 @@ +pub mod node_encode; pub mod toplevel; +pub mod vm; mod delta_list; -mod node_encode; -mod vm; diff --git a/src/pack/node_encode.rs b/src/pack/node_encode.rs index a8a54a6..466fd40 100644 --- a/src/pack/node_encode.rs +++ b/src/pack/node_encode.rs @@ -1,5 +1,5 @@ -use thiserror::Error; use std::{io, io::Write}; +use thiserror::Error; use nom::{bytes::complete::*, number::complete::*, IResult}; @@ -23,7 +23,7 @@ fn nom_to_pr(r: IResult<&[u8], T>) -> PResult<(&[u8], T)> { } fn io_to_pr(r: io::Result) -> PResult { - r.map_err(|source| PackError::WriteError {source}) + r.map_err(|source| PackError::WriteError { source }) } //------------------------------------------- @@ -36,7 +36,7 @@ fn run64(i: &[u8], count: usize) -> IResult<&[u8], Vec> { struct NodeSummary { is_leaf: bool, max_entries: usize, - value_size: usize + value_size: usize, } fn summarise_node(data: &[u8]) -> IResult<&[u8], NodeSummary> { @@ -47,11 +47,14 @@ fn summarise_node(data: &[u8]) -> IResult<&[u8], NodeSummary> { let (i, max_entries) = le_u32(i)?; let (i, value_size) = le_u32(i)?; let (i, _padding) = le_u32(i)?; - Ok((i, NodeSummary { - is_leaf: flags == 2, - max_entries: max_entries as usize, - value_size: value_size as usize, - })) + Ok(( + i, + NodeSummary { + is_leaf: flags == 2, + max_entries: max_entries as usize, + value_size: value_size as usize, + }, + )) } pub fn pack_btree_node(w: &mut W, data: &[u8]) -> PResult<()> { diff --git a/src/pack/vm.rs b/src/pack/vm.rs index 4086640..8680a43 100644 --- a/src/pack/vm.rs +++ b/src/pack/vm.rs @@ -146,8 +146,8 @@ pub fn pack_u64s(w: &mut W, ns: &[u64]) -> io::Result<()> { } fn unshift_nrs(shift: usize, ns: &[u64]) -> (Vec, Vec) { - let mut values = Vec::new(); - let mut shifts = Vec::new(); + let mut values = Vec::with_capacity(ns.len()); + let mut shifts = Vec::with_capacity(ns.len()); let mask = (1 << shift) - 1; for n in ns { @@ -206,8 +206,8 @@ fn unpack_with_width(r: &mut R, nibble: u8) -> io::Result { Ok(v) } -fn unpack_u64s(r: &mut R, count: usize) -> io::Result> { - let mut v = Vec::new(); +pub fn unpack_u64s(r: &mut R, count: usize) -> io::Result> { + let mut v = Vec::with_capacity(count); for _ in 0..count { let n = r.read_u64::()?; v.push(n); @@ -215,13 +215,13 @@ fn unpack_u64s(r: &mut R, count: usize) -> io::Result> { Ok(v) } -struct VM { +pub struct VM { base: u64, bytes_written: usize, } impl VM { - fn new() -> VM { + pub fn new() -> VM { VM { base: 0, bytes_written: 0, @@ -356,7 +356,7 @@ impl VM { } // Runs until at least a number of bytes have been emitted. Returns nr emitted. - fn exec( + pub fn exec( &mut self, r: &mut R, w: &mut W, diff --git a/src/pdata/btree.rs b/src/pdata/btree.rs index d22956d..36f6205 100644 --- a/src/pdata/btree.rs +++ b/src/pdata/btree.rs @@ -1,50 +1,424 @@ -use anyhow::{anyhow, Result}; +use anyhow::anyhow; +use byteorder::{ReadBytesExt, WriteBytesExt}; +use data_encoding::BASE64; use nom::{number::complete::*, IResult}; use std::collections::BTreeMap; +use std::fmt; use std::sync::{Arc, Mutex}; +use thiserror::Error; +use threadpool::ThreadPool; use crate::checksum; use crate::io_engine::*; +use crate::pack::vm; use crate::pdata::space_map::*; use crate::pdata::unpack::*; -// FIXME: check that keys are in ascending order between nodes. +//------------------------------------------ + +#[derive(Clone, Debug, PartialEq)] +pub struct KeyRange { + start: Option, + end: Option, // This is the one-past-the-end value +} + +impl fmt::Display for KeyRange { + fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result { + match (self.start, self.end) { + (None, None) => write!(f, "[..]"), + (None, Some(e)) => write!(f, "[..{}]", e), + (Some(s), None) => write!(f, "[{}..]", s), + (Some(s), Some(e)) => write!(f, "[{}..{}]", s, e), + } + } +} + +impl KeyRange { + // None will be returned if either range would be zero length + fn split(&self, n: u64) -> Option<(KeyRange, KeyRange)> { + match (self.start, self.end) { + (None, None) => Some(( + KeyRange { + start: None, + end: Some(n), + }, + KeyRange { + start: Some(n), + end: None, + }, + )), + (None, Some(e)) => { + if n < e { + Some(( + KeyRange { + start: None, + end: Some(n), + }, + KeyRange { + start: Some(n), + end: Some(e), + }, + )) + } else { + None + } + } + (Some(s), None) => { + if s < n { + Some(( + KeyRange { + start: Some(s), + end: Some(n), + }, + KeyRange { + start: Some(n), + end: None, + }, + )) + } else { + None + } + } + (Some(s), Some(e)) => { + if s < n && n < e { + Some(( + KeyRange { + start: Some(s), + end: Some(n), + }, + KeyRange { + start: Some(n), + end: Some(e), + }, + )) + } else { + None + } + } + } + } +} + +#[test] +fn test_split_range() { + struct Test(Option, Option, u64, Option<(KeyRange, KeyRange)>); + + let tests = vec![ + Test( + None, + None, + 100, + Some(( + KeyRange { + start: None, + end: Some(100), + }, + KeyRange { + start: Some(100), + end: None, + }, + )), + ), + Test(None, Some(100), 1000, None), + Test( + None, + Some(100), + 50, + Some(( + KeyRange { + start: None, + end: Some(50), + }, + KeyRange { + start: Some(50), + end: Some(100), + }, + )), + ), + Test(None, Some(100), 100, None), + Test(Some(100), None, 50, None), + Test( + Some(100), + None, + 150, + Some(( + KeyRange { + start: Some(100), + end: Some(150), + }, + KeyRange { + start: Some(150), + end: None, + }, + )), + ), + Test(Some(100), Some(200), 50, None), + Test(Some(100), Some(200), 250, None), + Test( + Some(100), + Some(200), + 150, + Some(( + KeyRange { + start: Some(100), + end: Some(150), + }, + KeyRange { + start: Some(150), + end: Some(200), + }, + )), + ), + ]; + + for Test(start, end, n, expected) in tests { + let kr = KeyRange { start, end }; + let actual = kr.split(n); + assert_eq!(actual, expected); + } +} + +fn split_one(path: &Vec, kr: &KeyRange, k: u64) -> Result<(KeyRange, KeyRange)> { + match kr.split(k) { + None => { + return Err(node_err( + path, + &format!("couldn't split key range {} at {}", kr, k), + )); + } + Some(pair) => Ok(pair), + } +} + +fn split_key_ranges(path: &Vec, kr: &KeyRange, keys: &[u64]) -> Result> { + let mut krs = Vec::with_capacity(keys.len()); + + if keys.len() == 0 { + return Err(node_err(path, "split_key_ranges: no keys present")); + } + + // The first key gives the lower bound + let mut kr = KeyRange { + start: Some(keys[0]), + end: kr.end, + }; + + for i in 1..keys.len() { + let (first, rest) = split_one(path, &kr, keys[i])?; + krs.push(first); + kr = rest; + } + + krs.push(kr); + + Ok(krs) +} + +//------------------------------------------ + +// We compress and base64 encode paths to make them easy to +// cut and paste between programs (eg, thin_explore -p ) +pub fn encode_node_path(path: &[u64]) -> String { + let mut buffer: Vec = Vec::with_capacity(128); + let mut cursor = std::io::Cursor::new(&mut buffer); + assert!(path.len() < 256); + + // The first entry is normally the superblock (0), so we + // special case this. + if path.len() > 0 && path[0] == 0 { + let count = ((path.len() as u8) - 1) << 1; + cursor.write_u8(count as u8).unwrap(); + vm::pack_u64s(&mut cursor, &path[1..]).unwrap(); + } else { + let count = ((path.len() as u8) << 1) | 1; + cursor.write_u8(count as u8).unwrap(); + vm::pack_u64s(&mut cursor, path).unwrap(); + } + + BASE64.encode(&buffer) +} + +pub fn decode_node_path(text: &str) -> anyhow::Result> { + let mut buffer = vec![0; 128]; + let bytes = &mut buffer[0..BASE64.decode_len(text.len()).unwrap()]; + BASE64 + .decode_mut(text.as_bytes(), &mut bytes[0..]) + .map_err(|_| anyhow!("bad node path. Unable to base64 decode."))?; + + let mut input = std::io::Cursor::new(bytes); + + let mut count = input.read_u8()?; + let mut prepend_zero = false; + if (count & 0x1) == 0 { + // Implicit 0 as first entry + prepend_zero = true; + } + count >>= 1; + + let count = count as usize; + let mut path; + if count == 0 { + path = vec![]; + } else { + let mut output = Vec::with_capacity(count * 8); + let mut cursor = std::io::Cursor::new(&mut output); + + let mut vm = vm::VM::new(); + let written = vm.exec(&mut input, &mut cursor, count * 8)?; + assert_eq!(written, count * 8); + + let mut cursor = std::io::Cursor::new(&mut output); + path = vm::unpack_u64s(&mut cursor, count)?; + } + + if prepend_zero { + let mut full_path = vec![0u64]; + full_path.append(&mut path); + Ok(full_path) + } else { + Ok(path) + } +} + +#[test] +fn test_encode_path() { + struct Test(Vec); + + let tests = vec![ + Test(vec![]), + Test(vec![1]), + Test(vec![1, 2]), + Test(vec![1, 2, 3, 4]), + Test(vec![0]), + Test(vec![0, 0]), + Test(vec![0, 1]), + Test(vec![0, 1, 2]), + Test(vec![0, 123, 201231, 3102983012]), + ]; + + for t in tests { + let encoded = encode_node_path(&t.0[0..]); + let decoded = decode_node_path(&encoded).unwrap(); + assert_eq!(decoded, &t.0[0..]); + } +} //------------------------------------------ const NODE_HEADER_SIZE: usize = 32; +#[derive(Error, Clone, Debug)] +pub enum BTreeError { + // #[error("io error")] + IoError, // (std::io::Error), // FIXME: we can't clone an io_error + + // #[error("node error: {0}")] + NodeError(String), + + // #[error("value error: {0}")] + ValueError(String), + + // #[error("keys: {0:?}")] + KeyContext(KeyRange, Box), + + // #[error("aggregate: {0:?}")] + Aggregate(Vec), + + // #[error("{0:?}, {1}")] + Path(Vec, Box), +} + +impl fmt::Display for BTreeError { + fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result { + match self { + BTreeError::IoError => write!(f, "io error"), + BTreeError::NodeError(msg) => write!(f, "node error: {}", msg), + BTreeError::ValueError(msg) => write!(f, "value error: {}", msg), + BTreeError::KeyContext(kr, be) => write!(f, "{}, effecting keys {}", be, kr), + BTreeError::Aggregate(errs) => { + for e in errs { + write!(f, "{}", e)? + } + Ok(()) + } + BTreeError::Path(path, e) => write!(f, "{} {}", e, encode_node_path(path)), + } + } +} +pub fn node_err(path: &Vec, msg: &str) -> BTreeError { + BTreeError::Path( + path.clone(), + Box::new(BTreeError::NodeError(msg.to_string())), + ) +} + +fn node_err_s(path: &Vec, msg: String) -> BTreeError { + BTreeError::Path(path.clone(), Box::new(BTreeError::NodeError(msg))) +} + +pub fn io_err(path: &Vec) -> BTreeError { + BTreeError::Path(path.clone(), Box::new(BTreeError::IoError)) +} + +pub fn value_err(msg: String) -> BTreeError { + BTreeError::ValueError(msg) +} + +pub fn aggregate_error(rs: Vec) -> BTreeError { + BTreeError::Aggregate(rs) +} + +impl BTreeError { + pub fn keys_context(self, keys: &KeyRange) -> BTreeError { + BTreeError::KeyContext(keys.clone(), Box::new(self)) + } +} + +pub type Result = std::result::Result; + +//------------------------------------------ + +#[derive(Debug, Clone, Copy)] pub struct NodeHeader { - is_leaf: bool, - nr_entries: u32, - max_entries: u32, - value_size: u32, + pub block: u64, + pub is_leaf: bool, + pub nr_entries: u32, + pub max_entries: u32, + pub value_size: u32, } #[allow(dead_code)] const INTERNAL_NODE: u32 = 1; const LEAF_NODE: u32 = 2; -pub fn unpack_node_header(data: &[u8]) -> IResult<&[u8], NodeHeader> { - let (i, _csum) = le_u32(data)?; - let (i, flags) = le_u32(i)?; - let (i, _block) = le_u64(i)?; - let (i, nr_entries) = le_u32(i)?; - let (i, max_entries) = le_u32(i)?; - let (i, value_size) = le_u32(i)?; - let (i, _padding) = le_u32(i)?; +impl Unpack for NodeHeader { + fn disk_size() -> u32 { + 32 + } - Ok(( - i, - NodeHeader { - is_leaf: flags == LEAF_NODE, - nr_entries, - max_entries, - value_size, - }, - )) + fn unpack(data: &[u8]) -> IResult<&[u8], NodeHeader> { + let (i, _csum) = le_u32(data)?; + let (i, flags) = le_u32(i)?; + let (i, block) = le_u64(i)?; + let (i, nr_entries) = le_u32(i)?; + let (i, max_entries) = le_u32(i)?; + let (i, value_size) = le_u32(i)?; + let (i, _padding) = le_u32(i)?; + + Ok(( + i, + NodeHeader { + block, + is_leaf: flags == LEAF_NODE, + nr_entries, + max_entries, + value_size, + }, + )) + } } +#[derive(Clone)] pub enum Node { Internal { header: NodeHeader, @@ -58,65 +432,84 @@ pub enum Node { }, } -pub fn node_err(msg: String) -> Result { - let msg = format!("btree node error: {}", msg); - Err(anyhow!(msg)) -} - -pub fn to_any<'a, V>(r: IResult<&'a [u8], V>) -> Result<(&'a [u8], V)> { - if let Ok((i, v)) = r { - Ok((i, v)) - } else { - Err(anyhow!("btree node error: parse error")) +impl Node { + pub fn get_header(&self) -> &NodeHeader { + use Node::*; + match self { + Internal { header, .. } => header, + Leaf { header, .. } => header, + } } } +pub fn convert_result<'a, V>(path: &Vec, r: IResult<&'a [u8], V>) -> Result<(&'a [u8], V)> { + r.map_err(|_e| node_err(path, "parse error")) +} + +pub fn convert_io_err(path: &Vec, r: std::io::Result) -> Result { + r.map_err(|_| io_err(path)) +} + pub fn unpack_node( + path: &Vec, data: &[u8], ignore_non_fatal: bool, is_root: bool, ) -> Result> { use nom::multi::count; - let (i, header) = to_any(unpack_node_header(data))?; + let (i, header) = + NodeHeader::unpack(data).map_err(|_e| node_err(path, "couldn't parse node header"))?; if header.is_leaf && header.value_size != V::disk_size() { - return node_err(format!( - "value_size mismatch: expected {}, was {}", - V::disk_size(), - header.value_size + return Err(node_err_s( + path, + format!( + "value_size mismatch: expected {}, was {}", + V::disk_size(), + header.value_size + ), )); } let elt_size = header.value_size + 8; if elt_size as usize * header.max_entries as usize + NODE_HEADER_SIZE > BLOCK_SIZE { - return node_err(format!("max_entries is too large ({})", header.max_entries)); + return Err(node_err_s( + path, + format!("max_entries is too large ({})", header.max_entries), + )); } if header.nr_entries > header.max_entries { - return node_err("nr_entries > max_entries".to_string()); + return Err(node_err(path, "nr_entries > max_entries")); } if !ignore_non_fatal { if header.max_entries % 3 != 0 { - return node_err("max_entries is not divisible by 3".to_string()); + return Err(node_err(path, "max_entries is not divisible by 3")); } if !is_root { let min = header.max_entries / 3; if header.nr_entries < min { - return node_err("too few entries".to_string()); + return Err(node_err_s( + path, + format!( + "too few entries {}, expected at least {}", + header.nr_entries, min + ), + )); } } } - let (i, keys) = to_any(count(le_u64, header.nr_entries as usize)(i))?; + let (i, keys) = convert_result(path, count(le_u64, header.nr_entries as usize)(i))?; let mut last = None; for k in &keys { if let Some(l) = last { if k <= l { - return node_err("keys out of order".to_string()); + return Err(node_err(path, "keys out of order")); } } @@ -124,10 +517,10 @@ pub fn unpack_node( } let nr_free = header.max_entries - header.nr_entries; - let (i, _padding) = to_any(count(le_u64, nr_free as usize)(i))?; + let (i, _padding) = convert_result(path, count(le_u64, nr_free as usize)(i))?; if header.is_leaf { - let (_i, values) = to_any(count(V::unpack, header.nr_entries as usize)(i))?; + let (_i, values) = convert_result(path, count(V::unpack, header.nr_entries as usize)(i))?; Ok(Node::Leaf { header, @@ -135,7 +528,7 @@ pub fn unpack_node( values, }) } else { - let (_i, values) = to_any(count(le_u64, header.nr_entries as usize)(i))?; + let (_i, values) = convert_result(path, count(le_u64, header.nr_entries as usize)(i))?; Ok(Node::Internal { header, keys, @@ -147,13 +540,29 @@ pub fn unpack_node( //------------------------------------------ pub trait NodeVisitor { - fn visit(&mut self, w: &BTreeWalker, b: &Block, node: &Node) -> Result<()>; + // &self is deliberately non mut to allow the walker to use multiple threads. + fn visit( + &self, + path: &Vec, + keys: &KeyRange, + header: &NodeHeader, + keys: &[u64], + values: &[V], + ) -> Result<()>; + + // Nodes may be shared and thus visited multiple times. The walker avoids + // doing repeated IO, but it does call this method to keep the visitor up to + // date. + fn visit_again(&self, path: &Vec, b: u64) -> Result<()>; + + fn end_walk(&self) -> Result<()>; } #[derive(Clone)] pub struct BTreeWalker { - pub engine: Arc, - pub sm: Arc>, + engine: Arc, + sm: Arc>, + fails: Arc>>, ignore_non_fatal: bool, } @@ -163,6 +572,7 @@ impl BTreeWalker { let r: BTreeWalker = BTreeWalker { engine, sm: Arc::new(Mutex::new(RestrictedSpaceMap::new(nr_blocks as u64))), + fails: Arc::new(Mutex::new(BTreeMap::new())), ignore_non_fatal, }; r @@ -175,153 +585,519 @@ impl BTreeWalker { ) -> Result { { let sm = sm.lock().unwrap(); - assert_eq!(sm.get_nr_blocks()?, engine.get_nr_blocks()); + assert_eq!(sm.get_nr_blocks().unwrap(), engine.get_nr_blocks()); } Ok(BTreeWalker { engine, sm, + fails: Arc::new(Mutex::new(BTreeMap::new())), ignore_non_fatal, }) } - // Atomically increments the ref count, and returns the _old_ count. - fn sm_inc(&self, b: u64) -> Result { - let mut sm = self.sm.lock().unwrap(); - let count = sm.get(b)?; - sm.inc(b, 1)?; - Ok(count) + fn failed(&self, b: u64) -> Option { + let fails = self.fails.lock().unwrap(); + match fails.get(&b) { + None => None, + Some(e) => Some(e.clone()), + } } - fn walk_nodes(&mut self, visitor: &mut NV, bs: &[u64]) -> Result<()> + fn set_fail(&self, b: u64, err: BTreeError) { + // FIXME: should we monitor the size of fails, and abort if too many errors? + let mut fails = self.fails.lock().unwrap(); + fails.insert(b, err); + } + + // Atomically increments the ref count, and returns the _old_ count. + fn sm_inc(&self, b: u64) -> u32 { + let mut sm = self.sm.lock().unwrap(); + let count = sm.get(b).unwrap(); + sm.inc(b, 1).unwrap(); + count + } + + fn build_aggregate(&self, b: u64, errs: Vec) -> Result<()> { + match errs.len() { + 0 => Ok(()), + 1 => { + let e = errs[0].clone(); + self.set_fail(b, e.clone()); + Err(e) + } + _ => { + let e = aggregate_error(errs); + self.set_fail(b, e.clone()); + Err(e) + } + } + } + + fn walk_nodes( + &self, + path: &mut Vec, + visitor: &NV, + krs: &[KeyRange], + bs: &[u64], + ) -> Vec where NV: NodeVisitor, V: Unpack, { - let mut blocks = Vec::new(); - for b in bs { - if self.sm_inc(*b)? == 0 { - blocks.push(Block::new(*b)); + assert_eq!(krs.len(), bs.len()); + let mut errs: Vec = Vec::new(); + + let mut blocks = Vec::with_capacity(bs.len()); + let mut filtered_krs = Vec::with_capacity(krs.len()); + for i in 0..bs.len() { + if self.sm_inc(bs[i]) == 0 { + // Node not yet seen + blocks.push(bs[i]); + filtered_krs.push(krs[i].clone()); + } else { + // This node has already been checked ... + match self.failed(bs[i]) { + None => { + // ... it was clean. + if let Err(e) = visitor.visit_again(path, bs[i]) { + // ... but the visitor isn't happy + errs.push(e.clone()); + } + } + Some(e) => { + // ... there was an error + errs.push(e.clone()); + } + } } } - self.engine.read_many(&mut blocks)?; + match self.engine.read_many(&blocks[0..]) { + Err(_) => { + // IO completely failed, error every block + for (i, b) in blocks.iter().enumerate() { + let e = io_err(path).keys_context(&filtered_krs[i]); + errs.push(e.clone()); + self.set_fail(*b, e); + } + } + Ok(rblocks) => { + let mut i = 0; + for rb in rblocks { + match rb { + Err(_) => { + let e = io_err(path).keys_context(&filtered_krs[i]); + errs.push(e.clone()); + self.set_fail(blocks[i], e); + } + Ok(b) => match self.walk_node(path, visitor, &filtered_krs[i], &b, false) { + Err(e) => { + errs.push(e); + } + Ok(()) => {} + }, + } - for b in blocks { - self.walk_node(visitor, &b, false)?; + i += 1; + } + } } - Ok(()) + errs } - fn walk_node(&mut self, visitor: &mut NV, b: &Block, is_root: bool) -> Result<()> + fn walk_node_( + &self, + path: &mut Vec, + visitor: &NV, + kr: &KeyRange, + b: &Block, + is_root: bool, + ) -> Result<()> where NV: NodeVisitor, V: Unpack, { + use Node::*; + let bt = checksum::metadata_block_type(b.get_data()); if bt != checksum::BT::NODE { - return Err(anyhow!("checksum failed for node {}, {:?}", b.loc, bt)); + return Err(node_err_s( + path, + format!("checksum failed for node {}, {:?}", b.loc, bt), + ) + .keys_context(kr)); } - let node = unpack_node::(&b.get_data(), self.ignore_non_fatal, is_root)?; - visitor.visit(self, &b, &node)?; + let node = unpack_node::(path, &b.get_data(), self.ignore_non_fatal, is_root)?; - if let Node::Internal { - header: _h, - keys: _k, - values, - } = node - { - self.walk_nodes(visitor, &values)?; + match node { + Internal { keys, values, .. } => { + let krs = split_key_ranges(path, &kr, &keys)?; + let errs = self.walk_nodes(path, visitor, &krs, &values); + return self.build_aggregate(b.loc, errs); + } + Leaf { + header, + keys, + values, + } => { + if let Err(e) = visitor.visit(path, &kr, &header, &keys, &values) { + let e = BTreeError::Path(path.clone(), Box::new(e.clone())); + self.set_fail(b.loc, e.clone()); + return Err(e); + } + } } Ok(()) } - pub fn walk_b(&mut self, visitor: &mut NV, root: &Block) -> Result<()> + fn walk_node( + &self, + path: &mut Vec, + visitor: &NV, + kr: &KeyRange, + b: &Block, + is_root: bool, + ) -> Result<()> where NV: NodeVisitor, V: Unpack, { - if self.sm_inc(root.loc)? > 0 { - Ok(()) + path.push(b.loc); + let r = self.walk_node_(path, visitor, kr, b, is_root); + path.pop(); + visitor.end_walk()?; + r + } + + pub fn walk(&self, path: &mut Vec, visitor: &NV, root: u64) -> Result<()> + where + NV: NodeVisitor, + V: Unpack, + { + if self.sm_inc(root) > 0 { + if let Some(e) = self.failed(root) { + Err(e.clone()) + } else { + visitor.visit_again(path, root) + } } else { - self.walk_node(visitor, &root, true) + let root = self.engine.read(root).map_err(|_| io_err(path))?; + let kr = KeyRange { + start: None, + end: None, + }; + self.walk_node(path, visitor, &kr, &root, true) + } + } +} + +//-------------------------------- + +fn walk_node_threaded_( + w: Arc, + path: &mut Vec, + pool: &ThreadPool, + visitor: Arc, + kr: &KeyRange, + b: &Block, + is_root: bool, +) -> Result<()> +where + NV: NodeVisitor + Send + Sync + 'static, + V: Unpack, +{ + use Node::*; + + let bt = checksum::metadata_block_type(b.get_data()); + if bt != checksum::BT::NODE { + return Err(node_err_s( + path, + format!("checksum failed for node {}, {:?}", b.loc, bt), + ) + .keys_context(kr)); + } + + let node = unpack_node::(path, &b.get_data(), w.ignore_non_fatal, is_root)?; + + match node { + Internal { keys, values, .. } => { + let krs = split_key_ranges(path, &kr, &keys)?; + let errs = walk_nodes_threaded(w.clone(), path, pool, visitor, &krs, &values); + return w.build_aggregate(b.loc, errs); + } + Leaf { + header, + keys, + values, + } => { + visitor.visit(path, kr, &header, &keys, &values)?; } } - pub fn walk(&mut self, visitor: &mut NV, root: u64) -> Result<()> - where - NV: NodeVisitor, - V: Unpack, - { - if self.sm_inc(root)? > 0 { - Ok(()) + Ok(()) +} + +fn walk_node_threaded( + w: Arc, + path: &mut Vec, + pool: &ThreadPool, + visitor: Arc, + kr: &KeyRange, + b: &Block, + is_root: bool, +) -> Result<()> +where + NV: NodeVisitor + Send + Sync + 'static, + V: Unpack, +{ + path.push(b.loc); + let r = walk_node_threaded_(w, path, pool, visitor.clone(), kr, b, is_root); + path.pop(); + visitor.end_walk()?; + r +} + +fn walk_nodes_threaded( + w: Arc, + path: &mut Vec, + pool: &ThreadPool, + visitor: Arc, + krs: &[KeyRange], + bs: &[u64], +) -> Vec +where + NV: NodeVisitor + Send + Sync + 'static, + V: Unpack, +{ + assert_eq!(krs.len(), bs.len()); + let mut errs: Vec = Vec::new(); + + let mut blocks = Vec::with_capacity(bs.len()); + let mut filtered_krs = Vec::with_capacity(krs.len()); + for i in 0..bs.len() { + if w.sm_inc(bs[i]) == 0 { + // Node not yet seen + blocks.push(bs[i]); + filtered_krs.push(krs[i].clone()); } else { - let mut root = Block::new(root); - self.engine.read(&mut root)?; - self.walk_node(visitor, &root, true) + // This node has already been checked ... + match w.failed(bs[i]) { + None => { + // ... it was clean. + if let Err(e) = visitor.visit_again(path, bs[i]) { + // ... but the visitor isn't happy + errs.push(e.clone()); + } + } + Some(e) => { + // ... there was an error + errs.push(e.clone()); + } + } } } + + match w.engine.read_many(&blocks[0..]) { + Err(_) => { + // IO completely failed error every block + for (i, b) in blocks.iter().enumerate() { + let e = io_err(path).keys_context(&filtered_krs[i]); + errs.push(e.clone()); + w.set_fail(*b, e); + } + } + Ok(rblocks) => { + let mut i = 0; + let errs = Arc::new(Mutex::new(Vec::new())); + + for rb in rblocks { + match rb { + Err(_) => { + let e = io_err(path).keys_context(&filtered_krs[i]); + let mut errs = errs.lock().unwrap(); + errs.push(e.clone()); + w.set_fail(blocks[i], e); + } + Ok(b) => { + let w = w.clone(); + let visitor = visitor.clone(); + let kr = filtered_krs[i].clone(); + let errs = errs.clone(); + let mut path = path.clone(); + + pool.execute(move || { + match w.walk_node(&mut path, visitor.as_ref(), &kr, &b, false) { + Err(e) => { + let mut errs = errs.lock().unwrap(); + errs.push(e); + } + Ok(()) => {} + } + }); + } + } + + i += 1; + } + + pool.join(); + } + } + + errs +} + +pub fn walk_threaded( + path: &mut Vec, + w: Arc, + pool: &ThreadPool, + visitor: Arc, + root: u64, +) -> Result<()> +where + NV: NodeVisitor + Send + Sync + 'static, + V: Unpack, +{ + if w.sm_inc(root) > 0 { + if let Some(e) = w.failed(root) { + Err(e.clone()) + } else { + visitor.visit_again(path, root) + } + } else { + let root = w.engine.read(root).map_err(|_| io_err(path))?; + let kr = KeyRange { + start: None, + end: None, + }; + walk_node_threaded(w, path, pool, visitor, &kr, &root, true) + } } //------------------------------------------ struct ValueCollector { - values: BTreeMap, + values: Mutex>, } impl ValueCollector { fn new() -> ValueCollector { ValueCollector { - values: BTreeMap::new(), + values: Mutex::new(BTreeMap::new()), } } } -impl NodeVisitor for ValueCollector { - fn visit(&mut self, _w: &BTreeWalker, _b: &Block, node: &Node) -> Result<()> { - if let Node::Leaf { - header: _h, - keys, - values, - } = node - { - for n in 0..keys.len() { - let k = keys[n]; - let v = values[n].clone(); - self.values.insert(k, v); - } +// FIXME: should we be using Copy rather than clone? (Yes) +impl NodeVisitor for ValueCollector { + fn visit( + &self, + _path: &Vec, + _kr: &KeyRange, + _h: &NodeHeader, + keys: &[u64], + values: &[V], + ) -> Result<()> { + let mut vals = self.values.lock().unwrap(); + for n in 0..keys.len() { + vals.insert(keys[n], values[n].clone()); } Ok(()) } + + fn visit_again(&self, _path: &Vec, _b: u64) -> Result<()> { + Ok(()) + } + + fn end_walk(&self) -> Result<()> { + Ok(()) + } } -pub fn btree_to_map( +pub fn btree_to_map( + _path: &mut Vec, engine: Arc, ignore_non_fatal: bool, root: u64, ) -> Result> { - let mut walker = BTreeWalker::new(engine, ignore_non_fatal); - let mut visitor = ValueCollector::::new(); - - walker.walk(&mut visitor, root)?; - Ok(visitor.values) + let walker = BTreeWalker::new(engine, ignore_non_fatal); + let visitor = ValueCollector::::new(); + let mut path = Vec::new(); + walker.walk(&mut path, &visitor, root)?; + Ok(visitor.values.into_inner().unwrap()) } -pub fn btree_to_map_with_sm( +pub fn btree_to_map_with_sm( + path: &mut Vec, engine: Arc, sm: Arc>, ignore_non_fatal: bool, root: u64, ) -> Result> { - let mut walker = BTreeWalker::new_with_sm(engine, sm, ignore_non_fatal)?; - let mut visitor = ValueCollector::::new(); + let walker = BTreeWalker::new_with_sm(engine, sm, ignore_non_fatal)?; + let visitor = ValueCollector::::new(); - walker.walk(&mut visitor, root)?; - Ok(visitor.values) + walker.walk(path, &visitor, root)?; + Ok(visitor.values.into_inner().unwrap()) +} + +//------------------------------------------ + +struct ValuePathCollector { + values: Mutex, V)>>, +} + +impl ValuePathCollector { + fn new() -> ValuePathCollector { + ValuePathCollector { + values: Mutex::new(BTreeMap::new()), + } + } +} + +impl NodeVisitor for ValuePathCollector { + fn visit( + &self, + path: &Vec, + _kr: &KeyRange, + _h: &NodeHeader, + keys: &[u64], + values: &[V], + ) -> Result<()> { + let mut vals = self.values.lock().unwrap(); + for n in 0..keys.len() { + vals.insert(keys[n], (path.clone(), values[n].clone())); + } + + Ok(()) + } + + fn visit_again(&self, _path: &Vec, _b: u64) -> Result<()> { + Ok(()) + } + + fn end_walk(&self) -> Result<()> { + Ok(()) + } +} + +pub fn btree_to_map_with_path( + path: &mut Vec, + engine: Arc, + sm: Arc>, + ignore_non_fatal: bool, + root: u64, +) -> Result, V)>> { + let walker = BTreeWalker::new_with_sm(engine, sm, ignore_non_fatal)?; + let visitor = ValuePathCollector::::new(); + + walker.walk(path, &visitor, root)?; + Ok(visitor.values.into_inner().unwrap()) } //------------------------------------------ diff --git a/src/pdata/space_map.rs b/src/pdata/space_map.rs index a2978ad..fa0af19 100644 --- a/src/pdata/space_map.rs +++ b/src/pdata/space_map.rs @@ -49,7 +49,7 @@ impl Unpack for SMRoot { //------------------------------------------ -#[derive(Clone, Debug)] +#[derive(Clone, Copy, Debug)] pub struct IndexEntry { pub blocknr: u64, pub nr_free: u32, @@ -159,8 +159,8 @@ impl Unpack for Bitmap { fn unpack(data: &[u8]) -> IResult<&[u8], Self> { let (mut i, header) = BitmapHeader::unpack(data)?; - let mut entries = Vec::new(); let nr_words = (BLOCK_SIZE - BitmapHeader::disk_size() as usize) / 8; + let mut entries = Vec::with_capacity(nr_words * 32); for _w in 0..nr_words { let (tmp, mut word) = le_u64(i)?; @@ -225,6 +225,10 @@ pub trait SpaceMap { fn get_nr_blocks(&self) -> Result; fn get_nr_allocated(&self) -> Result; fn get(&self, b: u64) -> Result; + + // Returns the old ref count + fn set(&mut self, b: u64, v: u32) -> Result; + fn inc(&mut self, begin: u64, len: u64) -> Result<()>; } @@ -265,6 +269,20 @@ where Ok(self.counts[b as usize].into()) } + fn set(&mut self, b: u64, v: u32) -> Result { + let old = self.counts[b as usize]; + assert!(v < 0xff); // FIXME: we can't assume this + self.counts[b as usize] = V::from(v as u8); + + if old == V::from(0u8) && v != 0 { + self.nr_allocated += 1; + } else if old != V::from(0u8) && v == 0 { + self.nr_allocated -= 1; + } + + Ok(old.into()) + } + fn inc(&mut self, begin: u64, len: u64) -> Result<()> { for b in begin..(begin + len) { if self.counts[b as usize] == V::from(0u8) { @@ -325,6 +343,24 @@ impl SpaceMap for RestrictedSpaceMap { } } + fn set(&mut self, b: u64, v: u32) -> Result { + let old = self.counts.contains(b as usize); + + if v > 0 { + if !old { + self.nr_allocated += 1; + } + self.counts.insert(b as usize); + } else { + if old { + self.nr_allocated -= 1; + } + self.counts.set(b as usize, false); + } + + Ok(if old {1} else {0}) + } + fn inc(&mut self, begin: u64, len: u64) -> Result<()> { for b in begin..(begin + len) { if !self.counts.contains(b as usize) { diff --git a/src/shrink/toplevel.rs b/src/shrink/toplevel.rs index 1a3d8a9..e7cde54 100644 --- a/src/shrink/toplevel.rs +++ b/src/shrink/toplevel.rs @@ -45,6 +45,14 @@ impl xml::MetadataVisitor for Pass1 { Ok(Visit::Continue) } + fn def_shared_b(&mut self, _name: &str) -> Result { + todo!(); + } + + fn def_shared_e(&mut self) -> Result { + todo!(); + } + fn device_b(&mut self, _d: &xml::Device) -> Result { Ok(Visit::Continue) } @@ -63,6 +71,10 @@ impl xml::MetadataVisitor for Pass1 { Ok(Visit::Continue) } + fn ref_shared(&mut self, _name: &str) -> Result { + todo!(); + } + fn eof(&mut self) -> Result { Ok(Visit::Continue) } @@ -96,6 +108,14 @@ impl xml::MetadataVisitor for Pass2 { self.writer.superblock_e() } + fn def_shared_b(&mut self, _name: &str) -> Result { + todo!(); + } + + fn def_shared_e(&mut self) -> Result { + todo!(); + } + fn device_b(&mut self, d: &xml::Device) -> Result { self.writer.device_b(d) } @@ -127,6 +147,10 @@ impl xml::MetadataVisitor for Pass2 { Ok(Visit::Continue) } + fn ref_shared(&mut self, _name: &str) -> Result { + todo!(); + } + fn eof(&mut self) -> Result { self.writer.eof() } diff --git a/src/thin/block_time.rs b/src/thin/block_time.rs new file mode 100644 index 0000000..3afe28c --- /dev/null +++ b/src/thin/block_time.rs @@ -0,0 +1,40 @@ +use nom::{number::complete::*, IResult}; +use std::fmt; + +use crate::pdata::unpack::*; + +//------------------------------------------ + +#[derive(Clone, Copy)] +pub struct BlockTime { + pub block: u64, + pub time: u32, +} + +impl Unpack for BlockTime { + fn disk_size() -> u32 { + 8 + } + + fn unpack(i: &[u8]) -> IResult<&[u8], BlockTime> { + let (i, n) = le_u64(i)?; + let block = n >> 24; + let time = n & ((1 << 24) - 1); + + Ok(( + i, + BlockTime { + block, + time: time as u32, + }, + )) + } +} + +impl fmt::Display for BlockTime { + fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result { + write!(f, "{} @ {}", self.block, self.time) + } +} + +//------------------------------------------ diff --git a/src/thin/check.rs b/src/thin/check.rs index afe0fb3..e95b81b 100644 --- a/src/thin/check.rs +++ b/src/thin/check.rs @@ -1,5 +1,4 @@ use anyhow::{anyhow, Result}; -use nom::{number::complete::*, IResult}; use std::collections::BTreeMap; use std::io::Cursor; use std::path::Path; @@ -8,41 +7,17 @@ use std::thread::{self, JoinHandle}; use threadpool::ThreadPool; use crate::checksum; -use crate::io_engine::{AsyncIoEngine, Block, IoEngine, SyncIoEngine}; -use crate::pdata::btree::{btree_to_map, btree_to_map_with_sm, BTreeWalker, Node, NodeVisitor}; +use crate::io_engine::{AsyncIoEngine, IoEngine, SyncIoEngine}; +use crate::pdata::btree::{self, *}; use crate::pdata::space_map::*; use crate::pdata::unpack::*; use crate::report::*; +use crate::thin::block_time::*; +use crate::thin::device_detail::*; use crate::thin::superblock::*; //------------------------------------------ -#[allow(dead_code)] -struct BlockTime { - block: u64, - time: u32, -} - -impl Unpack for BlockTime { - fn disk_size() -> u32 { - 8 - } - - fn unpack(i: &[u8]) -> IResult<&[u8], BlockTime> { - let (i, n) = le_u64(i)?; - let block = n >> 24; - let time = n & ((1 << 24) - 1); - - Ok(( - i, - BlockTime { - block, - time: time as u32, - }, - )) - } -} - struct BottomLevelVisitor { data_sm: ASpaceMap, } @@ -50,72 +25,46 @@ struct BottomLevelVisitor { //------------------------------------------ impl NodeVisitor for BottomLevelVisitor { - fn visit(&mut self, _w: &BTreeWalker, _b: &Block, node: &Node) -> Result<()> { + fn visit( + &self, + _path: &Vec, + _kr: &KeyRange, + _h: &NodeHeader, + _k: &[u64], + values: &[BlockTime], + ) -> btree::Result<()> { // FIXME: do other checks - if let Node::Leaf { - header: _h, - keys: _k, - values, - } = node - { - if values.len() == 0 { - return Ok(()); - } - - let mut data_sm = self.data_sm.lock().unwrap(); - - let mut start = values[0].block; - let mut len = 1; - - for n in 1..values.len() { - let block = values[n].block; - if block == start + len { - len += 1; - } else { - data_sm.inc(start, len)?; - start = block; - len = 1; - } - } - - data_sm.inc(start, len)?; + if values.len() == 0 { + return Ok(()); } + let mut data_sm = self.data_sm.lock().unwrap(); + + let mut start = values[0].block; + let mut len = 1; + + for n in 1..values.len() { + let block = values[n].block; + if block == start + len { + len += 1; + } else { + data_sm.inc(start, len).unwrap(); + start = block; + len = 1; + } + } + + data_sm.inc(start, len).unwrap(); Ok(()) } -} -//------------------------------------------ - -#[derive(Clone)] -struct DeviceDetail { - mapped_blocks: u64, - transaction_id: u64, - creation_time: u32, - snapshotted_time: u32, -} - -impl Unpack for DeviceDetail { - fn disk_size() -> u32 { - 24 + fn visit_again(&self, _path: &Vec, _b: u64) -> btree::Result<()> { + Ok(()) } - fn unpack(i: &[u8]) -> IResult<&[u8], DeviceDetail> { - let (i, mapped_blocks) = le_u64(i)?; - let (i, transaction_id) = le_u64(i)?; - let (i, creation_time) = le_u32(i)?; - let (i, snapshotted_time) = le_u32(i)?; - - Ok(( - i, - DeviceDetail { - mapped_blocks, - transaction_id, - creation_time, - snapshotted_time, - }, - )) + fn end_walk(&self) -> btree::Result<()> { + Ok(()) } } @@ -132,26 +81,34 @@ impl<'a> OverflowChecker<'a> { } impl<'a> NodeVisitor for OverflowChecker<'a> { - fn visit(&mut self, _w: &BTreeWalker, _b: &Block, node: &Node) -> Result<()> { - if let Node::Leaf { - header: _h, - keys, - values, - } = node - { - for n in 0..keys.len() { - let k = keys[n]; - let v = values[n]; - let expected = self.data_sm.get(k)?; - if expected != v { - return Err(anyhow!("Bad reference count for data block {}. Expected {}, but space map contains {}.", - k, expected, v)); - } + fn visit( + &self, + _path: &Vec, + _kr: &KeyRange, + _h: &NodeHeader, + keys: &[u64], + values: &[u32], + ) -> btree::Result<()> { + for n in 0..keys.len() { + let k = keys[n]; + let v = values[n]; + let expected = self.data_sm.get(k).unwrap(); + if expected != v { + return Err(value_err(format!("Bad reference count for data block {}. Expected {}, but space map contains {}.", + k, expected, v))); } } Ok(()) } + + fn visit_again(&self, _path: &Vec, _b: u64) -> btree::Result<()> { + Ok(()) + } + + fn end_walk(&self) -> btree::Result<()> { + Ok(()) + } } //------------------------------------------ @@ -163,6 +120,7 @@ struct BitmapLeak { // This checks the space map and returns any leak blocks for auto-repair to process. fn check_space_map( + path: &mut Vec, ctx: &Context, kind: &str, entries: Vec, @@ -177,70 +135,77 @@ fn check_space_map( // overflow btree { - let mut v = OverflowChecker::new(&*sm); - let mut w; + let v = OverflowChecker::new(&*sm); + let w; if metadata_sm.is_none() { w = BTreeWalker::new(engine.clone(), false); } else { w = BTreeWalker::new_with_sm(engine.clone(), metadata_sm.unwrap().clone(), false)?; } - w.walk(&mut v, root.ref_count_root)?; + w.walk(path, &v, root.ref_count_root)?; } - let mut blocks = Vec::new(); + let mut blocks = Vec::with_capacity(entries.len()); for i in &entries { - blocks.push(Block::new(i.blocknr)); + blocks.push(i.blocknr); } // FIXME: we should do this in batches - engine.read_many(&mut blocks)?; + let blocks = engine.read_many(&mut blocks)?; let mut leaks = 0; let mut blocknr = 0; let mut bitmap_leaks = Vec::new(); for n in 0..entries.len() { let b = &blocks[n]; - if checksum::metadata_block_type(&b.get_data()) != checksum::BT::BITMAP { - report.fatal(&format!( - "Index entry points to block ({}) that isn't a bitmap", - b.loc - )); - } - - let bitmap = unpack::(b.get_data())?; - let first_blocknr = blocknr; - let mut contains_leak = false; - for e in bitmap.entries.iter() { - if blocknr >= root.nr_blocks { - break; + match b { + Err(_e) => { + return Err(anyhow!("Unable to read bitmap block")); } - - match e { - BitmapEntry::Small(actual) => { - let expected = sm.get(blocknr)?; - if *actual == 1 && expected == 0 { - leaks += 1; - contains_leak = true; - } else if *actual != expected as u8 { - report.fatal(&format!("Bad reference count for {} block {}. Expected {}, but space map contains {}.", - kind, blocknr, expected, actual)); - } + Ok(b) => { + if checksum::metadata_block_type(&b.get_data()) != checksum::BT::BITMAP { + report.fatal(&format!( + "Index entry points to block ({}) that isn't a bitmap", + b.loc + )); } - BitmapEntry::Overflow => { - let expected = sm.get(blocknr)?; - if expected < 3 { - report.fatal(&format!("Bad reference count for {} block {}. Expected {}, but space map says it's >= 3.", - kind, blocknr, expected)); + + let bitmap = unpack::(b.get_data())?; + let first_blocknr = blocknr; + let mut contains_leak = false; + for e in bitmap.entries.iter() { + if blocknr >= root.nr_blocks { + break; } + + match e { + BitmapEntry::Small(actual) => { + let expected = sm.get(blocknr)?; + if *actual == 1 && expected == 0 { + leaks += 1; + contains_leak = true; + } else if *actual != expected as u8 { + report.fatal(&format!("Bad reference count for {} block {}. Expected {}, but space map contains {}.", + kind, blocknr, expected, actual)); + } + } + BitmapEntry::Overflow => { + let expected = sm.get(blocknr)?; + if expected < 3 { + report.fatal(&format!("Bad reference count for {} block {}. Expected {}, but space map says it's >= 3.", + kind, blocknr, expected)); + } + } + } + blocknr += 1; + } + if contains_leak { + bitmap_leaks.push(BitmapLeak { + blocknr: first_blocknr, + loc: b.loc, + }); } } - blocknr += 1; - } - if contains_leak { - bitmap_leaks.push(BitmapLeak { - blocknr: first_blocknr, - loc: b.loc, - }); } } @@ -258,38 +223,50 @@ fn repair_space_map(ctx: &Context, entries: Vec, sm: ASpaceMap) -> R let sm = sm.lock().unwrap(); - let mut blocks = Vec::new(); + let mut blocks = Vec::with_capacity(entries.len()); for i in &entries { - blocks.push(Block::new(i.loc)); + blocks.push(i.loc); } // FIXME: we should do this in batches - engine.read_many(&mut blocks)?; + let rblocks = engine.read_many(&blocks[0..])?; + let mut write_blocks = Vec::new(); - for (be, b) in entries.iter().zip(blocks.iter()) { - let mut blocknr = be.blocknr; - let mut bitmap = unpack::(b.get_data())?; - for e in bitmap.entries.iter_mut() { - if blocknr >= sm.get_nr_blocks()? { - break; - } - - if let BitmapEntry::Small(actual) = e { - let expected = sm.get(blocknr)?; - if *actual == 1 && expected == 0 { - *e = BitmapEntry::Small(0); + let mut i = 0; + for rb in rblocks { + if rb.is_err() { + return Err(anyhow!("Unable to reread bitmap blocks for repair")); + } else { + let b = rb.unwrap(); + let be = &entries[i]; + let mut blocknr = be.blocknr; + let mut bitmap = unpack::(b.get_data())?; + for e in bitmap.entries.iter_mut() { + if blocknr >= sm.get_nr_blocks()? { + break; } + + if let BitmapEntry::Small(actual) = e { + let expected = sm.get(blocknr)?; + if *actual == 1 && expected == 0 { + *e = BitmapEntry::Small(0); + } + } + + blocknr += 1; } - - blocknr += 1; + + let mut out = Cursor::new(b.get_data()); + bitmap.pack(&mut out)?; + checksum::write_checksum(b.get_data(), checksum::BT::BITMAP)?; + + write_blocks.push(b); } - let mut out = Cursor::new(b.get_data()); - bitmap.pack(&mut out)?; - checksum::write_checksum(b.get_data(), checksum::BT::BITMAP)?; + i += 1; } - engine.write_many(&blocks)?; + engine.write_many(&write_blocks[0..])?; Ok(()) } @@ -316,6 +293,8 @@ const MAX_CONCURRENT_IO: u32 = 1024; pub struct ThinCheckOptions<'a> { pub dev: &'a Path, pub async_io: bool, + pub sb_only: bool, + pub skip_mappings: bool, pub ignore_non_fatal: bool, pub auto_repair: bool, pub report: Arc, @@ -368,30 +347,63 @@ fn check_mapping_bottom_level( ctx: &Context, metadata_sm: &Arc>, data_sm: &Arc>, - roots: &BTreeMap, + roots: &BTreeMap, u64)>, ) -> Result<()> { ctx.report.set_sub_title("mapping tree"); - for (_thin_id, root) in roots { - let mut w = BTreeWalker::new_with_sm(ctx.engine.clone(), metadata_sm.clone(), false)?; - let data_sm = data_sm.clone(); - let root = *root; - ctx.pool.execute(move || { - let mut v = BottomLevelVisitor { data_sm }; + let w = Arc::new(BTreeWalker::new_with_sm( + ctx.engine.clone(), + metadata_sm.clone(), + false, + )?); - // FIXME: return error - match w.walk(&mut v, root) { - Err(e) => { - eprintln!("walk failed {:?}", e); - std::process::abort(); + // We want to print out errors as we progress, so we aggregate for each thin and print + // at that point. + let mut failed = false; + + if roots.len() > 64 { + let errs = Arc::new(Mutex::new(Vec::new())); + for (_thin_id, (path, root)) in roots { + let data_sm = data_sm.clone(); + let root = *root; + let v = BottomLevelVisitor { data_sm }; + let w = w.clone(); + let mut path = path.clone(); + let errs = errs.clone(); + + ctx.pool.execute(move || { + if let Err(e) = w.walk(&mut path, &v, root) { + let mut errs = errs.lock().unwrap(); + errs.push(e); } - Ok(_result) => {} - } - }); - } - ctx.pool.join(); + }); + } + ctx.pool.join(); + let errs = Arc::try_unwrap(errs).unwrap().into_inner().unwrap(); + if errs.len() > 0 { + ctx.report.fatal(&format!("{}", aggregate_error(errs))); + failed = true; + } + } else { + for (_thin_id, (path, root)) in roots { + let w = w.clone(); + let data_sm = data_sm.clone(); + let root = *root; + let v = Arc::new(BottomLevelVisitor { data_sm }); + let mut path = path.clone(); - Ok(()) + if let Err(e) = walk_threaded(&mut path, w, &ctx.pool, v, root) { + failed = true; + ctx.report.fatal(&format!("{}", e)); + } + } + } + + if failed { + Err(anyhow!("Check of mappings failed")) + } else { + Ok(()) + } } fn mk_context(opts: &ThinCheckOptions) -> Result { @@ -400,9 +412,13 @@ fn mk_context(opts: &ThinCheckOptions) -> Result { if opts.async_io { nr_threads = std::cmp::min(4, num_cpus::get()); - engine = Arc::new(AsyncIoEngine::new(opts.dev, MAX_CONCURRENT_IO, opts.auto_repair)?); + engine = Arc::new(AsyncIoEngine::new( + opts.dev, + MAX_CONCURRENT_IO, + opts.auto_repair, + )?); } else { - nr_threads = num_cpus::get() * 2; + nr_threads = std::cmp::max(8, num_cpus::get() * 2); engine = Arc::new(SyncIoEngine::new(opts.dev, nr_threads, opts.auto_repair)?); } let pool = ThreadPool::new(nr_threads); @@ -437,18 +453,28 @@ pub fn check(opts: ThinCheckOptions) -> Result<()> { // superblock let sb = read_superblock(engine.as_ref(), SUPERBLOCK_LOCATION)?; + + report.info(&format!("TRANSACTION_ID={}", sb.transaction_id)); + + if opts.sb_only { + return Ok(()); + } + let metadata_root = unpack::(&sb.metadata_sm_root[0..])?; + let mut path = Vec::new(); + path.push(0); // Device details. We read this once to get the number of thin devices, and hence the // maximum metadata ref count. Then create metadata space map, and reread to increment // the ref counts for that metadata. - let devs = btree_to_map::(engine.clone(), false, sb.details_root)?; + let devs = btree_to_map::(&mut path, engine.clone(), false, sb.details_root)?; let nr_devs = devs.len(); let metadata_sm = core_sm(engine.get_nr_blocks(), nr_devs as u32); inc_superblock(&metadata_sm)?; report.set_sub_title("device details tree"); let _devs = btree_to_map_with_sm::( + &mut path, engine.clone(), metadata_sm.clone(), false, @@ -462,11 +488,20 @@ pub fn check(opts: ThinCheckOptions) -> Result<()> { )?; // mapping top level - let roots = - btree_to_map_with_sm::(engine.clone(), metadata_sm.clone(), false, sb.mapping_root)?; + report.set_sub_title("mapping tree"); + let roots = btree_to_map_with_path::( + &mut path, + engine.clone(), + metadata_sm.clone(), + false, + sb.mapping_root, + )?; + + if opts.skip_mappings { + return Ok(()); + } // mapping bottom level - report.set_sub_title("mapping tree"); let root = unpack::(&sb.data_sm_root[0..])?; let data_sm = core_sm(root.nr_blocks, nr_devs as u32); check_mapping_bottom_level(&ctx, &metadata_sm, &data_sm, &roots)?; @@ -476,6 +511,7 @@ pub fn check(opts: ThinCheckOptions) -> Result<()> { let root = unpack::(&sb.data_sm_root[0..])?; let entries = btree_to_map_with_sm::( + &mut path, engine.clone(), metadata_sm.clone(), false, @@ -485,6 +521,7 @@ pub fn check(opts: ThinCheckOptions) -> Result<()> { inc_entries(&metadata_sm, &entries[0..])?; let data_leaks = check_space_map( + &mut path, &ctx, "data", entries, @@ -496,8 +533,12 @@ pub fn check(opts: ThinCheckOptions) -> Result<()> { report.set_sub_title("metadata space map"); let root = unpack::(&sb.metadata_sm_root[0..])?; - let mut b = Block::new(root.bitmap_root); - engine.read(&mut b)?; + report.info(&format!( + "METADATA_FREE_BLOCKS={}", + root.nr_blocks - root.nr_allocated + )); + + let b = engine.read(root.bitmap_root)?; metadata_sm.lock().unwrap().inc(root.bitmap_root, 1)?; let entries = unpack::(b.get_data())?.indexes; @@ -512,6 +553,7 @@ pub fn check(opts: ThinCheckOptions) -> Result<()> { // We call this for the side effect of incrementing the ref counts // for the metadata that holds the tree. let _counts = btree_to_map_with_sm::( + &mut path, engine.clone(), metadata_sm.clone(), false, @@ -519,28 +561,34 @@ pub fn check(opts: ThinCheckOptions) -> Result<()> { )?; // Now the counts should be correct and we can check it. - let metadata_leaks = check_space_map(&ctx, "metadata", entries, None, metadata_sm.clone(), root)?; + let metadata_leaks = check_space_map( + &mut path, + &ctx, + "metadata", + entries, + None, + metadata_sm.clone(), + root, + )?; + bail_out(&ctx, "metadata space map")?; if opts.auto_repair { if data_leaks.len() > 0 { ctx.report.info("Repairing data leaks."); - repair_space_map(&ctx, data_leaks, data_sm.clone()); + repair_space_map(&ctx, data_leaks, data_sm.clone())?; } if metadata_leaks.len() > 0 { ctx.report.info("Repairing metadata leaks."); - repair_space_map(&ctx, metadata_leaks, metadata_sm.clone()); + repair_space_map(&ctx, metadata_leaks, metadata_sm.clone())?; } } - // Completing consumes the report. { let mut stop_progress = stop_progress.lock().unwrap(); *stop_progress = true; } - - tid.join(); - bail_out(&ctx, "metadata space map")?; + tid.join().unwrap(); Ok(()) } diff --git a/src/thin/device_detail.rs b/src/thin/device_detail.rs new file mode 100644 index 0000000..332fbf7 --- /dev/null +++ b/src/thin/device_detail.rs @@ -0,0 +1,37 @@ +use crate::pdata::unpack::*; +use nom::{number::complete::*, IResult}; + +//------------------------------------------ + +#[derive(Clone, Copy)] +pub struct DeviceDetail { + pub mapped_blocks: u64, + pub transaction_id: u64, + pub creation_time: u32, + pub snapshotted_time: u32, +} + +impl Unpack for DeviceDetail { + fn disk_size() -> u32 { + 24 + } + + fn unpack(i: &[u8]) -> IResult<&[u8], DeviceDetail> { + let (i, mapped_blocks) = le_u64(i)?; + let (i, transaction_id) = le_u64(i)?; + let (i, creation_time) = le_u32(i)?; + let (i, snapshotted_time) = le_u32(i)?; + + Ok(( + i, + DeviceDetail { + mapped_blocks, + transaction_id, + creation_time, + snapshotted_time, + }, + )) + } +} + +//------------------------------------------ diff --git a/src/thin/dump.rs b/src/thin/dump.rs new file mode 100644 index 0000000..2de6cd4 --- /dev/null +++ b/src/thin/dump.rs @@ -0,0 +1,323 @@ +use anyhow::Result; +use std::collections::{BTreeMap, BTreeSet}; +use std::path::Path; +use std::sync::{Arc, Mutex}; + +use crate::io_engine::{AsyncIoEngine, IoEngine, SyncIoEngine}; +use crate::pdata::btree::{self, *}; +use crate::pdata::space_map::*; +use crate::pdata::unpack::*; +use crate::report::*; +use crate::thin::block_time::*; +use crate::thin::device_detail::*; +use crate::thin::superblock::*; +use crate::thin::xml::{self, MetadataVisitor}; + +//------------------------------------------ + +struct RunBuilder { + run: Option, +} + +impl RunBuilder { + fn new() -> RunBuilder { + RunBuilder { run: None } + } + + fn next(&mut self, thin_block: u64, data_block: u64, time: u32) -> Option { + use xml::Map; + + match self.run { + None => { + self.run = Some(xml::Map { + thin_begin: thin_block, + data_begin: data_block, + time: time, + len: 1, + }); + None + } + Some(xml::Map { + thin_begin, + data_begin, + time: mtime, + len, + }) => { + if thin_block == (thin_begin + len) + && data_block == (data_begin + len) + && mtime == time + { + self.run.as_mut().unwrap().len += 1; + None + } else { + self.run.replace(Map { + thin_begin: thin_block, + data_begin: data_block, + time: time, + len: 1, + }) + } + } + } + } + + fn complete(&mut self) -> Option { + self.run.take() + } +} + +//------------------------------------------ + +struct MVInner<'a> { + md_out: &'a mut dyn xml::MetadataVisitor, + builder: RunBuilder, +} + +struct MappingVisitor<'a> { + inner: Mutex>, +} + +//------------------------------------------ + +impl<'a> MappingVisitor<'a> { + fn new(md_out: &'a mut dyn xml::MetadataVisitor) -> MappingVisitor<'a> { + MappingVisitor { + inner: Mutex::new(MVInner { + md_out, + builder: RunBuilder::new(), + }), + } + } +} + +impl<'a> NodeVisitor for MappingVisitor<'a> { + fn visit( + &self, + _path: &Vec, + _kr: &KeyRange, + _h: &NodeHeader, + keys: &[u64], + values: &[BlockTime], + ) -> btree::Result<()> { + let mut inner = self.inner.lock().unwrap(); + for (k, v) in keys.iter().zip(values.iter()) { + if let Some(run) = inner.builder.next(*k, v.block, v.time) { + inner + .md_out + .map(&run) + .map_err(|e| btree::value_err(format!("{}", e)))?; + } + } + + Ok(()) + } + + fn visit_again(&self, _path: &Vec, b: u64) -> btree::Result<()> { + let mut inner = self.inner.lock().unwrap(); + inner + .md_out + .ref_shared(&format!("{}", b)) + .map_err(|e| btree::value_err(format!("{}", e)))?; + Ok(()) + } + + fn end_walk(&self) -> btree::Result<()> { + let mut inner = self.inner.lock().unwrap(); + if let Some(run) = inner.builder.complete() { + inner + .md_out + .map(&run) + .map_err(|e| btree::value_err(format!("{}", e)))?; + } + Ok(()) + } +} + +//------------------------------------------ + +const MAX_CONCURRENT_IO: u32 = 1024; + +pub struct ThinDumpOptions<'a> { + pub dev: &'a Path, + pub async_io: bool, + pub report: Arc, +} + +struct Context { + report: Arc, + engine: Arc, +} + +fn mk_context(opts: &ThinDumpOptions) -> Result { + let engine: Arc; + + if opts.async_io { + engine = Arc::new(AsyncIoEngine::new(opts.dev, MAX_CONCURRENT_IO, false)?); + } else { + let nr_threads = std::cmp::max(8, num_cpus::get() * 2); + engine = Arc::new(SyncIoEngine::new(opts.dev, nr_threads, false)?); + } + + Ok(Context { + report: opts.report.clone(), + engine, + }) +} + +//------------------------------------------ + +struct NoopVisitor {} + +impl btree::NodeVisitor for NoopVisitor { + fn visit( + &self, + _path: &Vec, + _kr: &btree::KeyRange, + _h: &btree::NodeHeader, + _k: &[u64], + _values: &[V], + ) -> btree::Result<()> { + Ok(()) + } + + fn visit_again(&self, _path: &Vec, _b: u64) -> btree::Result<()> { + Ok(()) + } + + fn end_walk(&self) -> btree::Result<()> { + Ok(()) + } +} + +fn find_shared_nodes( + ctx: &Context, + nr_metadata_blocks: u64, + roots: &BTreeMap, +) -> Result<(BTreeSet, Arc>)> { + // By default the walker uses a restricted space map that can only count to 1. So + // we explicitly create a full sm. + let sm = core_sm(nr_metadata_blocks, roots.len() as u32); + let w = BTreeWalker::new_with_sm(ctx.engine.clone(), sm.clone(), false)?; + + let mut path = Vec::new(); + path.push(0); + + for (thin_id, root) in roots { + ctx.report.info(&format!("scanning {}", thin_id)); + let v = NoopVisitor {}; + w.walk::(&mut path, &v, *root)?; + } + + let mut shared = BTreeSet::new(); + { + let sm = sm.lock().unwrap(); + for i in 0..sm.get_nr_blocks().unwrap() { + if sm.get(i).expect("couldn't get count from space map.") > 1 { + shared.insert(i); + } + } + } + + return Ok((shared, sm)); +} + +//------------------------------------------ + +fn dump_node( + ctx: &Context, + out: &mut dyn xml::MetadataVisitor, + root: u64, + sm: &Arc>, + force: bool, // sets the ref count for the root to zero to force output. +) -> Result<()> { + let w = BTreeWalker::new_with_sm(ctx.engine.clone(), sm.clone(), false)?; + let mut path = Vec::new(); + path.push(0); + + let v = MappingVisitor::new(out); + + // Temporarily set the ref count for the root to zero. + let mut old_count = 0; + if force { + let mut sm = sm.lock().unwrap(); + old_count = sm.get(root).unwrap(); + sm.set(root, 0)?; + } + + w.walk::(&mut path, &v, root)?; + + // Reset the ref count for root. + if force { + let mut sm = sm.lock().unwrap(); + sm.set(root, old_count)?; + } + + Ok(()) +} + +//------------------------------------------ + +pub fn dump(opts: ThinDumpOptions) -> Result<()> { + let ctx = mk_context(&opts)?; + + let report = &ctx.report; + let engine = &ctx.engine; + + // superblock + report.set_title("Reading superblock"); + let sb = read_superblock(engine.as_ref(), SUPERBLOCK_LOCATION)?; + let metadata_root = unpack::(&sb.metadata_sm_root[0..])?; + let data_root = unpack::(&sb.data_sm_root[0..])?; + let mut path = Vec::new(); + path.push(0); + + report.set_title("Reading device details"); + let devs = btree_to_map::(&mut path, engine.clone(), true, sb.details_root)?; + + report.set_title("Reading mappings roots"); + let roots = btree_to_map::(&mut path, engine.clone(), true, sb.mapping_root)?; + + report.set_title("Finding shared mappings"); + let (shared, sm) = find_shared_nodes(&ctx, metadata_root.nr_blocks, &roots)?; + report.info(&format!("{} shared nodes found", shared.len())); + + let mut out = xml::XmlWriter::new(std::io::stdout()); + let xml_sb = xml::Superblock { + uuid: "".to_string(), + time: sb.time as u64, + transaction: sb.transaction_id, + flags: None, + version: Some(2), + data_block_size: sb.data_block_size, + nr_data_blocks: data_root.nr_blocks, + metadata_snap: None, + }; + out.superblock_b(&xml_sb)?; + + report.set_title("Dumping shared regions"); + for b in shared { + out.def_shared_b(&format!("{}", b))?; + dump_node(&ctx, &mut out, b, &sm, true)?; + out.def_shared_e()?; + } + + report.set_title("Dumping mappings"); + for (thin_id, detail) in devs { + let d = xml::Device { + dev_id: thin_id as u32, + mapped_blocks: detail.mapped_blocks, + transaction: detail.transaction_id, + creation_time: detail.creation_time as u64, + snap_time: detail.snapshotted_time as u64, + }; + out.device_b(&d)?; + let root = roots.get(&thin_id).unwrap(); + dump_node(&ctx, &mut out, *root, &sm, false)?; + out.device_e()?; + } + out.superblock_e()?; + + Ok(()) +} + +//------------------------------------------ diff --git a/src/thin/mod.rs b/src/thin/mod.rs index b88bba8..3f2653a 100644 --- a/src/thin/mod.rs +++ b/src/thin/mod.rs @@ -1,3 +1,6 @@ +pub mod block_time; +pub mod device_detail; pub mod superblock; pub mod check; +pub mod dump; pub mod xml; diff --git a/src/thin/superblock.rs b/src/thin/superblock.rs index df69ff2..73e5b82 100644 --- a/src/thin/superblock.rs +++ b/src/thin/superblock.rs @@ -1,17 +1,27 @@ use crate::io_engine::*; use anyhow::{anyhow, Result}; use nom::{bytes::complete::*, number::complete::*, IResult}; - +use std::fmt; pub const SUPERBLOCK_LOCATION: u64 = 0; //const UUID_SIZE: usize = 16; const SPACE_MAP_ROOT_SIZE: usize = 128; -#[derive(Debug)] +#[derive(Debug, Clone)] pub struct SuperblockFlags { pub needs_check: bool, } -#[derive(Debug)] +impl fmt::Display for SuperblockFlags { + fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result { + if self.needs_check { + write!(f, "NEEDS_CHECK") + } else { + write!(f, "-") + } + } +} + +#[derive(Debug, Clone)] pub struct Superblock { pub flags: SuperblockFlags, pub block: u64, @@ -76,7 +86,9 @@ fn unpack(data: &[u8]) -> IResult<&[u8], Superblock> { Ok(( i, Superblock { - flags: SuperblockFlags {needs_check: (flags & 0x1) != 0}, + flags: SuperblockFlags { + needs_check: (flags & 0x1) != 0, + }, block, //uuid: uuid[0..UUID_SIZE], version, @@ -93,8 +105,7 @@ fn unpack(data: &[u8]) -> IResult<&[u8], Superblock> { } pub fn read_superblock(engine: &dyn IoEngine, loc: u64) -> Result { - let mut b = Block::new(loc); - engine.read(&mut b)?; + let b = engine.read(loc)?; if let Ok((_, sb)) = unpack(&b.get_data()) { Ok(sb) diff --git a/src/thin/xml.rs b/src/thin/xml.rs index 73155d3..752dd8e 100644 --- a/src/thin/xml.rs +++ b/src/thin/xml.rs @@ -46,10 +46,14 @@ pub trait MetadataVisitor { fn superblock_b(&mut self, sb: &Superblock) -> Result; fn superblock_e(&mut self) -> Result; + fn def_shared_b(&mut self, name: &str) -> Result; + fn def_shared_e(&mut self) -> Result; + fn device_b(&mut self, d: &Device) -> Result; fn device_e(&mut self) -> Result; fn map(&mut self, m: &Map) -> Result; + fn ref_shared(&mut self, name: &str) -> Result; fn eof(&mut self) -> Result; } @@ -110,6 +114,19 @@ impl MetadataVisitor for XmlWriter { Ok(Visit::Continue) } + fn def_shared_b(&mut self, name: &str) -> Result { + let tag = b"def"; + let mut elem = BytesStart::owned(tag.to_vec(), tag.len()); + elem.push_attribute(mk_attr(b"name", name)); + self.w.write_event(Event::Start(elem))?; + Ok(Visit::Continue) + } + + fn def_shared_e(&mut self) -> Result { + self.w.write_event(Event::End(BytesEnd::borrowed(b"def")))?; + Ok(Visit::Continue) + } + fn device_b(&mut self, d: &Device) -> Result { let tag = b"device"; let mut elem = BytesStart::owned(tag.to_vec(), tag.len()); @@ -151,6 +168,14 @@ impl MetadataVisitor for XmlWriter { Ok(Visit::Continue) } + fn ref_shared(&mut self, name: &str) -> Result { + let tag = b"ref"; + let mut elem = BytesStart::owned(tag.to_vec(), tag.len()); + elem.push_attribute(mk_attr(b"name", name)); + self.w.write_event(Event::Empty(elem))?; + Ok(Visit::Continue) + } + fn eof(&mut self) -> Result { let w = self.w.inner(); w.flush()?; @@ -379,6 +404,14 @@ impl MetadataVisitor for SBVisitor { Ok(Visit::Continue) } + fn def_shared_b(&mut self, _name: &str) -> Result { + Ok(Visit::Continue) + } + + fn def_shared_e(&mut self) -> Result { + Ok(Visit::Continue) + } + fn device_b(&mut self, _d: &Device) -> Result { Ok(Visit::Continue) } @@ -390,6 +423,10 @@ impl MetadataVisitor for SBVisitor { Ok(Visit::Continue) } + fn ref_shared(&mut self, _name: &str) -> Result { + Ok(Visit::Continue) + } + fn eof(&mut self) -> Result { Ok(Visit::Stop) } diff --git a/tests/thin_shrink.rs b/tests/thin_shrink.rs index 0856859..6bed874 100644 --- a/tests/thin_shrink.rs +++ b/tests/thin_shrink.rs @@ -103,6 +103,14 @@ impl<'a, V: ThinVisitor> xml::MetadataVisitor for ThinXmlVisitor<'a, V> { Ok(Visit::Continue) } + fn def_shared_b(&mut self, _name: &str) -> Result { + todo!(); + } + + fn def_shared_e(&mut self) -> Result { + todo!(); + } + fn device_b(&mut self, d: &xml::Device) -> Result { self.thin_id = Some(d.dev_id); Ok(Visit::Continue) @@ -125,6 +133,10 @@ impl<'a, V: ThinVisitor> xml::MetadataVisitor for ThinXmlVisitor<'a, V> { Ok(Visit::Continue) } + fn ref_shared(&mut self, _name: &str) -> Result { + todo!(); + } + fn eof(&mut self) -> Result { Ok(Visit::Stop) }