use anyhow::{anyhow, Result}; use std::collections::BTreeMap; use std::path::Path; use std::sync::atomic::{AtomicBool, Ordering}; use std::sync::{Arc, Mutex}; use std::thread::{self, JoinHandle}; use threadpool::ThreadPool; use crate::io_engine::{AsyncIoEngine, IoEngine, SyncIoEngine}; use crate::pdata::btree::{self, *}; use crate::pdata::btree_walker::*; use crate::pdata::space_map::*; use crate::pdata::space_map_checker::*; use crate::pdata::space_map_disk::*; use crate::pdata::unpack::*; use crate::report::*; use crate::thin::block_time::*; use crate::thin::device_detail::*; use crate::thin::superblock::*; //------------------------------------------ struct BottomLevelVisitor { data_sm: ASpaceMap, } //------------------------------------------ impl NodeVisitor for BottomLevelVisitor { fn visit( &self, _path: &[u64], _kr: &KeyRange, _h: &NodeHeader, _k: &[u64], values: &[BlockTime], ) -> btree::Result<()> { // FIXME: do other checks if values.is_empty() { return Ok(()); } let mut data_sm = self.data_sm.lock().unwrap(); let mut start = values[0].block; let mut len = 1; for b in values.iter().skip(1) { let block = b.block; if block == start + len { len += 1; } else { data_sm.inc(start, len).unwrap(); start = block; len = 1; } } data_sm.inc(start, len).unwrap(); Ok(()) } fn visit_again(&self, _path: &[u64], _b: u64) -> btree::Result<()> { Ok(()) } fn end_walk(&self) -> btree::Result<()> { Ok(()) } } //------------------------------------------ fn inc_superblock(sm: &ASpaceMap) -> Result<()> { let mut sm = sm.lock().unwrap(); sm.inc(SUPERBLOCK_LOCATION, 1)?; Ok(()) } //------------------------------------------ pub const MAX_CONCURRENT_IO: u32 = 1024; pub struct ThinCheckOptions { pub engine: Arc, pub sb_only: bool, pub skip_mappings: bool, pub ignore_non_fatal: bool, pub auto_repair: bool, pub report: Arc, } fn spawn_progress_thread( sm: Arc>, nr_allocated_metadata: u64, report: Arc, ) -> Result<(JoinHandle<()>, Arc)> { let tid; let stop_progress = Arc::new(AtomicBool::new(false)); { let stop_progress = stop_progress.clone(); tid = thread::spawn(move || { let interval = std::time::Duration::from_millis(250); loop { if stop_progress.load(Ordering::Relaxed) { break; } let sm = sm.lock().unwrap(); let mut n = sm.get_nr_allocated().unwrap(); drop(sm); n *= 100; n /= nr_allocated_metadata; let _r = report.progress(n as u8); thread::sleep(interval); } }); } Ok((tid, stop_progress)) } struct Context { report: Arc, engine: Arc, pool: ThreadPool, } // Check the mappings filling in the data_sm as we go. fn check_mapping_bottom_level( ctx: &Context, metadata_sm: &Arc>, data_sm: &Arc>, roots: &BTreeMap, u64)>, ) -> Result<()> { ctx.report.set_sub_title("mapping tree"); let w = Arc::new(BTreeWalker::new_with_sm( ctx.engine.clone(), metadata_sm.clone(), false, )?); // We want to print out errors as we progress, so we aggregate for each thin and print // at that point. let mut failed = false; if roots.len() > 64 { let errs = Arc::new(Mutex::new(Vec::new())); for (path, root) in roots.values() { let data_sm = data_sm.clone(); let root = *root; let v = BottomLevelVisitor { data_sm }; let w = w.clone(); let mut path = path.clone(); let errs = errs.clone(); ctx.pool.execute(move || { if let Err(e) = w.walk(&mut path, &v, root) { let mut errs = errs.lock().unwrap(); errs.push(e); } }); } ctx.pool.join(); let errs = Arc::try_unwrap(errs).unwrap().into_inner().unwrap(); if !errs.is_empty() { ctx.report.fatal(&format!("{}", aggregate_error(errs))); failed = true; } } else { for (path, root) in roots.values() { let w = w.clone(); let data_sm = data_sm.clone(); let root = *root; let v = Arc::new(BottomLevelVisitor { data_sm }); let mut path = path.clone(); if let Err(e) = walk_threaded(&mut path, w, &ctx.pool, v, root) { failed = true; ctx.report.fatal(&format!("{}", e)); } } } if failed { Err(anyhow!("Check of mappings failed")) } else { Ok(()) } } fn mk_context(engine: Arc, report: Arc) -> Result { let nr_threads = std::cmp::max(8, num_cpus::get() * 2); let pool = ThreadPool::new(nr_threads); Ok(Context { report, engine, pool, }) } fn bail_out(ctx: &Context, task: &str) -> Result<()> { use ReportOutcome::*; match ctx.report.get_outcome() { Fatal => Err(anyhow!(format!( "Check of {} failed, ending check early.", task ))), _ => Ok(()), } } pub fn check(opts: ThinCheckOptions) -> Result<()> { let ctx = mk_context(opts.engine.clone(), opts.report.clone())?; // FIXME: temporarily get these out let report = &ctx.report; let engine = &ctx.engine; report.set_title("Checking thin metadata"); // superblock let sb = read_superblock(engine.as_ref(), SUPERBLOCK_LOCATION)?; report.info(&format!("TRANSACTION_ID={}", sb.transaction_id)); if opts.sb_only { return Ok(()); } let metadata_root = unpack::(&sb.metadata_sm_root[0..])?; let mut path = Vec::new(); path.push(0); // Device details. We read this once to get the number of thin devices, and hence the // maximum metadata ref count. Then create metadata space map, and reread to increment // the ref counts for that metadata. let devs = btree_to_map::( &mut path, engine.clone(), opts.ignore_non_fatal, sb.details_root, )?; let nr_devs = devs.len(); let metadata_sm = core_sm(engine.get_nr_blocks(), nr_devs as u32); inc_superblock(&metadata_sm)?; report.set_sub_title("device details tree"); let _devs = btree_to_map_with_sm::( &mut path, engine.clone(), metadata_sm.clone(), opts.ignore_non_fatal, sb.details_root, )?; let (tid, stop_progress) = spawn_progress_thread( metadata_sm.clone(), metadata_root.nr_allocated, report.clone(), )?; // mapping top level report.set_sub_title("mapping tree"); let roots = btree_to_map_with_path::( &mut path, engine.clone(), metadata_sm.clone(), opts.ignore_non_fatal, sb.mapping_root, )?; if opts.skip_mappings { return Ok(()); } // mapping bottom level let root = unpack::(&sb.data_sm_root[0..])?; let data_sm = core_sm(root.nr_blocks, nr_devs as u32); check_mapping_bottom_level(&ctx, &metadata_sm, &data_sm, &roots)?; bail_out(&ctx, "mapping tree")?; //----------------------------------------- report.set_sub_title("data space map"); let root = unpack::(&sb.data_sm_root[0..])?; let data_leaks = check_disk_space_map( engine.clone(), report.clone(), root, data_sm.clone(), metadata_sm.clone(), opts.ignore_non_fatal, )?; bail_out(&ctx, "data space map")?; //----------------------------------------- report.set_sub_title("metadata space map"); let root = unpack::(&sb.metadata_sm_root[0..])?; report.info(&format!( "METADATA_FREE_BLOCKS={}", root.nr_blocks - root.nr_allocated )); // Now the counts should be correct and we can check it. let metadata_leaks = check_metadata_space_map( engine.clone(), report.clone(), root, metadata_sm.clone(), opts.ignore_non_fatal, )?; bail_out(&ctx, "metadata space map")?; //----------------------------------------- if opts.auto_repair { if !data_leaks.is_empty() { ctx.report.info("Repairing data leaks."); repair_space_map(ctx.engine.clone(), data_leaks, data_sm.clone())?; } if !metadata_leaks.is_empty() { ctx.report.info("Repairing metadata leaks."); repair_space_map(ctx.engine.clone(), metadata_leaks, metadata_sm.clone())?; } } stop_progress.store(true, Ordering::Relaxed); tid.join().unwrap(); Ok(()) } //------------------------------------------ // Some callers wish to know which blocks are allocated. pub struct CheckMaps { pub metadata_sm: Arc>, pub data_sm: Arc>, } pub fn check_with_maps(engine: Arc, report: Arc) -> Result { let ctx = mk_context(engine.clone(), report.clone())?; report.set_title("Checking thin metadata"); // superblock let sb = read_superblock(engine.as_ref(), SUPERBLOCK_LOCATION)?; report.info(&format!("TRANSACTION_ID={}", sb.transaction_id)); let metadata_root = unpack::(&sb.metadata_sm_root[0..])?; let mut path = Vec::new(); path.push(0); // Device details. We read this once to get the number of thin devices, and hence the // maximum metadata ref count. Then create metadata space map, and reread to increment // the ref counts for that metadata. let devs = btree_to_map::( &mut path, engine.clone(), false, sb.details_root, )?; let nr_devs = devs.len(); let metadata_sm = core_sm(engine.get_nr_blocks(), nr_devs as u32); inc_superblock(&metadata_sm)?; report.set_sub_title("device details tree"); let _devs = btree_to_map_with_sm::( &mut path, engine.clone(), metadata_sm.clone(), false, sb.details_root, )?; let (tid, stop_progress) = spawn_progress_thread( metadata_sm.clone(), metadata_root.nr_allocated, report.clone(), )?; // mapping top level report.set_sub_title("mapping tree"); let roots = btree_to_map_with_path::( &mut path, engine.clone(), metadata_sm.clone(), false, sb.mapping_root, )?; // mapping bottom level let root = unpack::(&sb.data_sm_root[0..])?; let data_sm = core_sm(root.nr_blocks, nr_devs as u32); check_mapping_bottom_level(&ctx, &metadata_sm, &data_sm, &roots)?; bail_out(&ctx, "mapping tree")?; //----------------------------------------- report.set_sub_title("data space map"); let root = unpack::(&sb.data_sm_root[0..])?; let _data_leaks = check_disk_space_map( engine.clone(), report.clone(), root, data_sm.clone(), metadata_sm.clone(), false, )?; bail_out(&ctx, "data space map")?; //----------------------------------------- report.set_sub_title("metadata space map"); let root = unpack::(&sb.metadata_sm_root[0..])?; report.info(&format!( "METADATA_FREE_BLOCKS={}", root.nr_blocks - root.nr_allocated )); // Now the counts should be correct and we can check it. let _metadata_leaks = check_metadata_space_map(engine.clone(), report, root, metadata_sm.clone(), false)?; bail_out(&ctx, "metadata space map")?; //----------------------------------------- stop_progress.store(true, Ordering::Relaxed); tid.join().unwrap(); Ok(CheckMaps { metadata_sm: metadata_sm.clone(), data_sm: data_sm.clone(), }) } //------------------------------------------