From 087a4711a59c62bec090f8feae5bc90e1f5d64d9 Mon Sep 17 00:00:00 2001 From: Ming-Hung Tsai Date: Wed, 9 Dec 2020 18:40:06 +0800 Subject: [PATCH 1/3] [array (rust)] Implement array basis --- src/pdata/array.rs | 39 ++++++++++++++ src/pdata/array_block.rs | 70 ++++++++++++++++++++++++ src/pdata/array_walker.rs | 108 ++++++++++++++++++++++++++++++++++++++ src/pdata/mod.rs | 3 ++ 4 files changed, 220 insertions(+) create mode 100644 src/pdata/array.rs create mode 100644 src/pdata/array_block.rs create mode 100644 src/pdata/array_walker.rs diff --git a/src/pdata/array.rs b/src/pdata/array.rs new file mode 100644 index 0000000..d8299c2 --- /dev/null +++ b/src/pdata/array.rs @@ -0,0 +1,39 @@ +use nom::{number::complete::*, IResult}; +use std::fmt; + +use crate::pdata::unpack::*; + +//------------------------------------------ + +// TODO: build a data structure representating an array? + +// FIXME: rename this struct +pub struct ArrayBlockEntry { + pub block: u64, +} + +impl Unpack for ArrayBlockEntry { + fn disk_size() -> u32 { + 8 + } + + fn unpack(i: &[u8]) -> IResult<&[u8], ArrayBlockEntry> { + let (i, n) = le_u64(i)?; + let block = n; + + Ok(( + i, + ArrayBlockEntry { + block, + } + )) + } +} + +impl fmt::Display for ArrayBlockEntry { + fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result { + write!(f, "{}", self.block) + } +} + +//------------------------------------------ diff --git a/src/pdata/array_block.rs b/src/pdata/array_block.rs new file mode 100644 index 0000000..0cf7069 --- /dev/null +++ b/src/pdata/array_block.rs @@ -0,0 +1,70 @@ +use anyhow::anyhow; +use anyhow::Result; +use nom::{multi::count, number::complete::*, IResult}; + +use crate::pdata::unpack::Unpack; + +//------------------------------------------ + +pub struct ArrayBlockHeader { + pub csum: u32, + pub max_entries: u32, + pub nr_entries: u32, + pub value_size: u32, + pub blocknr: u64, +} + +impl Unpack for ArrayBlockHeader { + fn disk_size() -> u32 { + 24 + } + + fn unpack(data: &[u8]) -> IResult<&[u8], ArrayBlockHeader> { + let (i, csum) = le_u32(data)?; + let (i, max_entries) = le_u32(i)?; + let (i, nr_entries) = le_u32(i)?; + let (i, value_size) = le_u32(i)?; + let (i, blocknr) = le_u64(i)?; + + Ok(( + i, + ArrayBlockHeader { + csum, + max_entries, + nr_entries, + value_size, + blocknr + }, + )) + } +} + +pub struct ArrayBlock { + pub header: ArrayBlockHeader, + pub values: Vec, +} + +impl ArrayBlock { + pub fn get_header(&self) -> &ArrayBlockHeader { + &self.header + } +} + +fn convert_result<'a, V>(r: IResult<&'a [u8], V>) -> Result<(&'a [u8], V)> { + r.map_err(|_| anyhow!("parse error")) +} + +pub fn unpack_array_block( + data: &[u8], +) -> Result> { + // TODO: collect errors + let (i, header) = ArrayBlockHeader::unpack(data).map_err(|_e| anyhow!("Couldn't parse header"))?; + let (_i, values) = convert_result(count(V::unpack, header.nr_entries as usize)(i))?; + + Ok(ArrayBlock { + header, + values, + }) +} + +//------------------------------------------ diff --git a/src/pdata/array_walker.rs b/src/pdata/array_walker.rs new file mode 100644 index 0000000..6322c50 --- /dev/null +++ b/src/pdata/array_walker.rs @@ -0,0 +1,108 @@ +use std::sync::Arc; + +use crate::io_engine::*; +use crate::pdata::array::*; +use crate::pdata::array_block::*; +use crate::pdata::btree::*; +use crate::pdata::btree_walker::*; +use crate::pdata::unpack::*; + +//------------------------------------------ + +pub struct ArrayWalker { + engine: Arc, + ignore_non_fatal: bool +} + +// FIXME: define another Result type for array visiting? +pub trait ArrayBlockVisitor { + fn visit( + &self, + index: u64, + v: V, + ) -> anyhow::Result<()>; +} + +struct BlockValueVisitor { + engine: Arc, + array_block_visitor: Box>, +} + +impl BlockValueVisitor { + pub fn new>( + e: Arc, + v: Box, + ) -> BlockValueVisitor { + BlockValueVisitor { + engine: e, + array_block_visitor: v, + } + } + + pub fn visit_array_block( + &self, + index: u64, + array_block: ArrayBlock, + ) { + let begin = index * u64::from(array_block.header.nr_entries); + for i in 0..array_block.header.nr_entries { + self.array_block_visitor.visit(begin + u64::from(i), array_block.values[i as usize]).unwrap(); + } + } +} + +impl NodeVisitor for BlockValueVisitor { + // FIXME: return errors + fn visit( + &self, + path: &Vec, + _kr: &KeyRange, + _h: &NodeHeader, + keys: &[u64], + values: &[ArrayBlockEntry], + ) -> Result<()> { + for n in 0..keys.len() { + let index = keys[n]; + let b = self.engine.read(values[n].block).map_err(|_| io_err(path))?; + let array_block = unpack_array_block::(b.get_data()).map_err(|_| io_err(path))?; + self.visit_array_block(index, array_block); + } + Ok(()) + } + + // FIXME: stub + fn visit_again(&self, _path: &Vec, _b: u64) -> Result<()> { + Ok(()) + } + + // FIXME: stub + fn end_walk(&self) -> Result<()> { + Ok(()) + } +} + +impl ArrayWalker { + pub fn new(engine: Arc, ignore_non_fatal: bool) -> ArrayWalker { + let r: ArrayWalker = ArrayWalker { + engine, + ignore_non_fatal, + }; + r + } + + // FIXME: pass the visitor by reference? + // FIXME: redefine the Result type for array visiting? + pub fn walk(&self, visitor: Box, root: u64) -> Result<()> + where + BV: 'static + ArrayBlockVisitor, + V: Unpack, + { + let w = BTreeWalker::new(self.engine.clone(), self.ignore_non_fatal); + let mut path = Vec::new(); // FIXME: eliminate this line? + path.push(0); + let v = BlockValueVisitor::::new(self.engine.clone(), visitor); + w.walk(&mut path, &v, root) + } +} + +//------------------------------------------ diff --git a/src/pdata/mod.rs b/src/pdata/mod.rs index 26a7318..370d244 100644 --- a/src/pdata/mod.rs +++ b/src/pdata/mod.rs @@ -1,3 +1,6 @@ +pub mod array; +pub mod array_block; +pub mod array_walker; pub mod btree; pub mod btree_builder; pub mod btree_merge; From faa371c20863d909dc85844063e346ba8c3b67cf Mon Sep 17 00:00:00 2001 From: Ming-Hung Tsai Date: Tue, 15 Dec 2020 20:03:25 +0800 Subject: [PATCH 2/3] [cache (rust)] Implement cache primitives --- Cargo.lock | 7 ++ Cargo.toml | 1 + src/cache/hint.rs | 33 ++++++++++ src/cache/mapping.rs | 50 +++++++++++++++ src/cache/mod.rs | 3 + src/cache/superblock.rs | 139 ++++++++++++++++++++++++++++++++++++++++ 6 files changed, 233 insertions(+) create mode 100644 src/cache/hint.rs create mode 100644 src/cache/mapping.rs create mode 100644 src/cache/superblock.rs diff --git a/Cargo.lock b/Cargo.lock index 761163b..219c98a 100644 --- a/Cargo.lock +++ b/Cargo.lock @@ -803,6 +803,7 @@ dependencies = [ "thiserror", "threadpool", "tui", + "typenum", ] [[package]] @@ -856,6 +857,12 @@ dependencies = [ "unicode-width", ] +[[package]] +name = "typenum" +version = "1.12.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "373c8a200f9e67a0c95e62a4f52fbf80c23b4381c05a17845531982fa99e6b33" + [[package]] name = "unicode-segmentation" version = "1.6.0" diff --git a/Cargo.toml b/Cargo.toml index a98b956..9221f2e 100644 --- a/Cargo.toml +++ b/Cargo.toml @@ -32,6 +32,7 @@ threadpool = "1.8" thiserror = "1.0" tui = "0.10" termion = "1.5" +typenum = "1.12.0" [dev-dependencies] json = "0.12" diff --git a/src/cache/hint.rs b/src/cache/hint.rs new file mode 100644 index 0000000..ef0d83f --- /dev/null +++ b/src/cache/hint.rs @@ -0,0 +1,33 @@ +use nom::IResult; +use std::marker::PhantomData; +use std::convert::TryInto; + +use crate::pdata::unpack::*; + +//------------------------------------------ + +#[derive(Clone, Copy)] +pub struct Hint { + pub hint: [u8; 4], // FIXME: support various hint sizes + _not_used: PhantomData, +} + +impl Unpack for Hint { + fn disk_size() -> u32 { + Width::to_u32() + } + + // FIXME: support different width + fn unpack(i: &[u8]) -> IResult<&[u8], Hint> { + let size = Width::to_usize(); + Ok(( + &i[size..], + Hint { + hint: i[0..size].try_into().unwrap(), + _not_used: PhantomData, + }, + )) + } +} + +//------------------------------------------ diff --git a/src/cache/mapping.rs b/src/cache/mapping.rs new file mode 100644 index 0000000..c18325a --- /dev/null +++ b/src/cache/mapping.rs @@ -0,0 +1,50 @@ +use nom::IResult; +use nom::number::complete::*; + +use crate::pdata::unpack::*; + +//------------------------------------------ + +static FLAGS_MASK: u64 = (1 << 16) - 1; + +//------------------------------------------ + +pub enum MappingFlags { + Valid = 1, + Dirty = 2, +} + +#[derive(Clone, Copy)] +pub struct Mapping { + pub oblock: u64, + pub flags: u32, +} + +impl Mapping { + pub fn is_valid(&self) -> bool { + return (self.flags & MappingFlags::Valid as u32) != 0; + } +} + + +impl Unpack for Mapping { + fn disk_size() -> u32 { + 8 + } + + fn unpack(i: &[u8]) -> IResult<&[u8], Mapping> { + let (i, n) = le_u64(i)?; + let oblock = n >> 16; + let flags = n & FLAGS_MASK; + + Ok(( + i, + Mapping { + oblock, + flags: flags as u32, + }, + )) + } +} + +//------------------------------------------ diff --git a/src/cache/mod.rs b/src/cache/mod.rs index 2910ec6..f1cf219 100644 --- a/src/cache/mod.rs +++ b/src/cache/mod.rs @@ -1 +1,4 @@ +pub mod hint; +pub mod mapping; +pub mod superblock; pub mod xml; diff --git a/src/cache/superblock.rs b/src/cache/superblock.rs new file mode 100644 index 0000000..e98a3e9 --- /dev/null +++ b/src/cache/superblock.rs @@ -0,0 +1,139 @@ +use anyhow::{anyhow, Result}; + +use crate::io_engine::*; +use nom::{bytes::complete::*, number::complete::*, IResult}; + +//------------------------------------------ + +pub const SUPERBLOCK_LOCATION: u64 = 0; + +const POLICY_NAME_SIZE: usize = 16; +const SPACE_MAP_ROOT_SIZE: usize = 128; + +//------------------------------------------ + +#[derive(Debug, Clone)] +pub struct SuperblockFlags { + pub needs_check: bool, +} + +#[derive(Debug, Clone)] +pub struct Superblock { + pub flags: SuperblockFlags, + pub block: u64, + pub version: u32, + + pub policy_name: Vec, + pub policy_version: Vec, + pub policy_hint_size: u32, + + pub metadata_sm_root: Vec, + pub mapping_root: u64, + pub dirty_root: Option, // format 2 only + pub hint_root: u64, + + pub discard_root: u64, + pub discard_block_size: u64, + pub discard_nr_blocks: u64, + + pub data_block_size: u32, + pub metadata_block_size: u32, + pub cache_blocks: u32, + + pub compat_flags: u32, + pub compat_ro_flags: u32, + pub incompat_flags: u32, + + pub read_hits: u32, + pub read_misses: u32, + pub write_hits: u32, + pub write_misses: u32, +} + +fn unpack(data: &[u8]) -> IResult<&[u8], Superblock> { + let (i, _csum) = le_u32(data)?; + let (i, flags) = le_u32(i)?; + let (i, block) = le_u64(i)?; + let (i, _uuid) = take(16usize)(i)?; + let (i, _magic) = le_u64(i)?; + let (i, version) = le_u32(i)?; + + let (i, policy_name) = take(POLICY_NAME_SIZE)(i)?; + let (i, policy_hint_size) = le_u32(i)?; + + let (i, metadata_sm_root) = take(SPACE_MAP_ROOT_SIZE)(i)?; + let (i, mapping_root) = le_u64(i)?; + let (i, hint_root) = le_u64(i)?; + + let (i, discard_root) = le_u64(i)?; + let (i, discard_block_size) = le_u64(i)?; + let (i, discard_nr_blocks) = le_u64(i)?; + + let (i, data_block_size) = le_u32(i)?; + let (i, metadata_block_size) = le_u32(i)?; + let (i, cache_blocks) = le_u32(i)?; + + let (i, compat_flags) = le_u32(i)?; + let (i, compat_ro_flags) = le_u32(i)?; + let (i, incompat_flags) = le_u32(i)?; + + let (i, read_hits) = le_u32(i)?; + let (i, read_misses) = le_u32(i)?; + let (i, write_hits) = le_u32(i)?; + let (i, write_misses) = le_u32(i)?; + + let (i, vsn_major) = le_u32(i)?; + let (i, vsn_minor) = le_u32(i)?; + let (i, vsn_patch) = le_u32(i)?; + + let mut i = i; + let mut dirty_root = None; + if version >= 2 { + let (m, root) = le_u64(i)?; + dirty_root = Some(root); + i = &m; + } + + Ok(( + i, + Superblock { + flags: SuperblockFlags { + needs_check: (flags | 0x1) != 0, + }, + block, + version, + policy_name: policy_name.to_vec(), + policy_version: vec![vsn_major, vsn_minor, vsn_patch], + policy_hint_size, + metadata_sm_root: metadata_sm_root.to_vec(), + mapping_root, + dirty_root, + hint_root, + discard_root, + discard_block_size, + discard_nr_blocks, + data_block_size, + metadata_block_size, + cache_blocks, + compat_flags, + compat_ro_flags, + incompat_flags, + read_hits, + read_misses, + write_hits, + write_misses, + }, + )) +} + +pub fn read_superblock(engine: &dyn IoEngine, loc: u64) -> Result { + let b = engine.read(loc)?; + + if let Ok((_, sb)) = unpack(&b.get_data()) { + Ok(sb) + } else { + Err(anyhow!("couldn't unpack superblock")) + } +} + +//------------------------------------------ From 2bb3bf65b7e3ce56dbcaa8e498229c0263717235 Mon Sep 17 00:00:00 2001 From: Ming-Hung Tsai Date: Sun, 20 Dec 2020 20:23:30 +0800 Subject: [PATCH 3/3] [cache_check (rust)] Implement basic functions --- src/bin/cache_check.rs | 62 +++++++++++++++ src/cache/check.rs | 166 +++++++++++++++++++++++++++++++++++++++++ src/cache/mod.rs | 1 + 3 files changed, 229 insertions(+) create mode 100644 src/bin/cache_check.rs create mode 100644 src/cache/check.rs diff --git a/src/bin/cache_check.rs b/src/bin/cache_check.rs new file mode 100644 index 0000000..4fed20c --- /dev/null +++ b/src/bin/cache_check.rs @@ -0,0 +1,62 @@ +extern crate clap; +extern crate thinp; + +use clap::{App, Arg}; +use std::path::Path; +use thinp::cache::check::{check, CacheCheckOptions}; + +//------------------------------------------ + +fn main() { + let parser = App::new("cache_check") + .version(thinp::version::TOOLS_VERSION) + .arg( + Arg::with_name("INPUT") + .help("Specify the input device to check") + .required(true) + .index(1), + ) + .arg( + Arg::with_name("SB_ONLY") + .help("Only check the superblock.") + .long("super-block-only") + .value_name("SB_ONLY"), + ) + .arg( + Arg::with_name("SKIP_MAPPINGS") + .help("Don't check the mapping array") + .long("skip-mappings") + .value_name("SKIP_MAPPINGS"), + ) + .arg( + Arg::with_name("SKIP_HINTS") + .help("Don't check the hint array") + .long("skip-hints") + .value_name("SKIP_HINTS"), + ) + .arg( + Arg::with_name("SKIP_DISCARDS") + .help("Don't check the discard bitset") + .long("skip-discards") + .value_name("SKIP_DISCARDS"), + ); + + let matches = parser.get_matches(); + let input_file = Path::new(matches.value_of("INPUT").unwrap()); + + let opts = CacheCheckOptions { + dev: &input_file, + async_io: false, + sb_only: matches.is_present("SB_ONLY"), + skip_mappings: matches.is_present("SKIP_MAPPINGS"), + skip_hints: matches.is_present("SKIP_HINTS"), + skip_discards: matches.is_present("SKIP_DISCARDS"), + }; + + if let Err(reason) = check(opts) { + eprintln!("{}", reason); + std::process::exit(1); + } +} + +//------------------------------------------ diff --git a/src/cache/check.rs b/src/cache/check.rs new file mode 100644 index 0000000..92895e1 --- /dev/null +++ b/src/cache/check.rs @@ -0,0 +1,166 @@ +use anyhow::anyhow; +use std::marker::PhantomData; +use std::path::Path; +use std::sync::{Arc, Mutex}; +use std::collections::*; + +use crate::io_engine::{AsyncIoEngine, IoEngine, SyncIoEngine}; +use crate::cache::hint::*; +use crate::cache::mapping::*; +use crate::cache::superblock::*; +use crate::pdata::array_walker::*; + +//------------------------------------------ + +const MAX_CONCURRENT_IO: u32 = 1024; + +//------------------------------------------ + +struct CheckMappingVisitor { + allowed_flags: u32, + seen_oblocks: Arc>>, +} + +impl CheckMappingVisitor { + fn new(metadata_version: u32) -> CheckMappingVisitor { + let mut flags: u32 = MappingFlags::Valid as u32; + if metadata_version == 1 { + flags |= MappingFlags::Dirty as u32; + } + CheckMappingVisitor { + allowed_flags: flags, + seen_oblocks: Arc::new(Mutex::new(BTreeSet::new())), + } + } + + fn seen_oblock(&self, b: u64) -> bool { + let seen_oblocks = self.seen_oblocks.lock().unwrap(); + return seen_oblocks.contains(&b); + } + + fn record_oblock(&self, b: u64) { + let mut seen_oblocks = self.seen_oblocks.lock().unwrap(); + seen_oblocks.insert(b); + } + + // FIXME: is it possible to validate flags at an early phase? + // e.g., move to ctor of Mapping? + fn has_unknown_flags(&self, m: &Mapping) -> bool { + return (m.flags & self.allowed_flags) != 0; + } +} + +impl ArrayBlockVisitor for CheckMappingVisitor { + fn visit(&self, _index: u64, m: Mapping) -> anyhow::Result<()> { + if !m.is_valid() { + return Ok(()); + } + + if self.seen_oblock(m.oblock) { + return Err(anyhow!("origin block already mapped")); + } + + self.record_oblock(m.oblock); + + if !self.has_unknown_flags(&m) { + return Err(anyhow!("unknown flags in mapping")); + } + + Ok(()) + } +} + +//------------------------------------------ + +struct CheckHintVisitor { + _not_used: PhantomData, +} + +impl CheckHintVisitor { + fn new() -> CheckHintVisitor { + CheckHintVisitor { + _not_used: PhantomData, + } + } +} + +impl ArrayBlockVisitor> for CheckHintVisitor { + fn visit(&self, _index: u64, _hint: Hint) -> anyhow::Result<()> { + // TODO: check hints + Ok(()) + } +} + +//------------------------------------------ + +// TODO: ignore_non_fatal, clear_needs_check, auto_repair +pub struct CacheCheckOptions<'a> { + pub dev: &'a Path, + pub async_io: bool, + pub sb_only: bool, + pub skip_mappings: bool, + pub skip_hints: bool, + pub skip_discards: bool, +} + +// TODO: thread pool, report +struct Context { + engine: Arc, +} + +fn mk_context(opts: &CacheCheckOptions) -> anyhow::Result { + let engine: Arc; + + if opts.async_io { + engine = Arc::new(AsyncIoEngine::new( + opts.dev, + MAX_CONCURRENT_IO, + false, + )?); + } else { + let nr_threads = std::cmp::max(8, num_cpus::get() * 2); + engine = Arc::new(SyncIoEngine::new(opts.dev, nr_threads, false)?); + } + + Ok(Context { + engine, + }) +} + +pub fn check(opts: CacheCheckOptions) -> anyhow::Result<()> { + let ctx = mk_context(&opts)?; + + let engine = &ctx.engine; + + let sb = read_superblock(engine.as_ref(), SUPERBLOCK_LOCATION)?; + + if opts.sb_only { + return Ok(()); + } + + // TODO: factor out into check_mappings() + if !opts.skip_mappings { + let w = ArrayWalker::new(engine.clone(), false); + let c = Box::new(CheckMappingVisitor::new(sb.version)); + w.walk(c, sb.mapping_root)?; + + if sb.version >= 2 { + // TODO: check dirty bitset + } + } + + if !opts.skip_hints && sb.hint_root != 0 { + let w = ArrayWalker::new(engine.clone(), false); + type Width = typenum::U4; // FIXME: check sb.policy_hint_size + let c = Box::new(CheckHintVisitor::::new()); + w.walk(c, sb.hint_root)?; + } + + if !opts.skip_discards { + // TODO: check discard bitset + } + + Ok(()) +} + +//------------------------------------------ diff --git a/src/cache/mod.rs b/src/cache/mod.rs index f1cf219..a120ce6 100644 --- a/src/cache/mod.rs +++ b/src/cache/mod.rs @@ -1,3 +1,4 @@ +pub mod check; pub mod hint; pub mod mapping; pub mod superblock;