[thin_dump (rust)] Use common leaf sequences to pack metadata

This commit is contained in:
Ming-Hung Tsai 2021-05-10 18:21:28 +08:00
parent 4c47fcabbf
commit 30cfcd9a88
2 changed files with 17 additions and 46 deletions

View File

@ -15,6 +15,7 @@ pub trait LeafVisitor<V: Unpack> {
// Nodes may be shared and thus visited multiple times. The walker avoids
// doing repeated IO, but it does call this method to keep the visitor up to
// date. b may be an internal node obviously.
// FIXME: remove this method?
fn visit_again(&mut self, b: u64) -> Result<()>;
fn end_walk(&mut self) -> Result<()>;
}
@ -79,22 +80,13 @@ impl<'a> LeafWalker<'a> {
V: Unpack,
{
assert_eq!(krs.len(), bs.len());
let mut errs: Vec<BTreeError> = Vec::new();
let mut blocks = Vec::with_capacity(bs.len());
let mut filtered_krs = Vec::with_capacity(krs.len());
for i in 0..bs.len() {
if self.sm_inc(bs[i]) == 0 {
// Node not yet seen
blocks.push(bs[i]);
filtered_krs.push(krs[i].clone());
} else {
// This node has already been checked ...
if let Err(e) = visitor.visit_again(bs[i]) {
// ... but the visitor isn't happy
errs.push(e.clone());
}
}
self.sm_inc(bs[i]);
blocks.push(bs[i]);
filtered_krs.push(krs[i].clone());
}
let rblocks = self
@ -145,6 +137,7 @@ impl<'a> LeafWalker<'a> {
if let Internal { keys, values, .. } = node {
let krs = split_key_ranges(path, &kr, &keys)?;
if depth == 0 {
// it is the lowest internal
for i in 0..krs.len() {
self.sm.inc(values[i], 1).expect("sm.inc() failed");
for v in &values {
@ -218,13 +211,13 @@ impl<'a> LeafWalker<'a> {
let depth = self.get_depth::<V>(path, root, true)?;
if depth == 0 {
// root is a leaf
self.sm_inc(root);
self.leaves.insert(root as usize);
visitor.visit(&kr, root)?;
Ok(())
} else if self.sm_inc(root) > 0 {
visitor.visit_again(root)
} else {
self.sm_inc(root);
let root = self.engine.read(root).map_err(|_| io_err(path))?;
self.walk_node(depth - 1, path, visitor, &kr, &root, true)

View File

@ -237,18 +237,16 @@ impl LeafVisitor<BlockTime> for CollectLeaves {
fn collect_leaves(
ctx: &Context,
shared: &mut BTreeSet<u64>,
roots: &BTreeSet<u64>,
mut sm: Box<dyn SpaceMap>,
) -> Result<BTreeMap<u64, Vec<Entry>>> {
let mut map: BTreeMap<u64, Vec<Entry>> = BTreeMap::new();
ctx.report.set_title(&format!(
"Collecting leaves for {} shared nodes",
shared.len()
));
ctx.report
.set_title(&format!("Collecting leaves for {} roots", roots.len()));
// FIXME: we don't want any leaves in shared.
for r in shared.iter() {
for r in roots.iter() {
let old_count = sm.get(*r).expect("couldn't get count from space map.");
sm.set(*r, 0).expect("couldn't set count in space map.");
@ -270,6 +268,7 @@ fn collect_leaves(
//------------------------------------------
#[allow(dead_code)]
fn find_shared_nodes(
ctx: &Context,
roots: &BTreeMap<u64, (Vec<u64>, u64)>,
@ -335,16 +334,10 @@ fn build_metadata(ctx: &Context, sb: &Superblock) -> Result<Metadata> {
btree_to_map_with_path::<u64>(&mut path, engine.clone(), sm, true, sb.mapping_root)?;
}
report.set_title("Finding shared mappings");
let (mut shared, sm) = find_shared_nodes(ctx, &roots)?;
// Add in the roots, because they may not be shared.
for (_path, root) in roots.values() {
shared.insert(*root);
}
let entry_map = collect_leaves(&ctx, &mut shared, sm)?;
let mut defs = Vec::new();
let sm = Box::new(RestrictedSpaceMap::new(engine.get_nr_blocks()));
let mapping_roots = roots.values().map(|(_, root)| *root).collect();
let entry_map = collect_leaves(&ctx, &mapping_roots, sm)?;
let defs = Vec::new();
let mut devs = Vec::new();
let mut seen = BTreeSet::new();
@ -364,20 +357,6 @@ fn build_metadata(ctx: &Context, sb: &Superblock) -> Result<Metadata> {
});
}
for b in shared {
if !seen.contains(&b) {
let es = entry_map.get(&b).unwrap();
let kr = KeyRange::new(); // FIXME: finish
defs.push(Def {
def_id: b,
map: Mapping {
kr,
entries: es.to_vec(),
},
});
}
}
Ok(Metadata { defs, devs })
}
@ -622,11 +601,10 @@ pub fn dump(opts: ThinDumpOptions) -> Result<()> {
let sb = read_superblock(ctx.engine.as_ref(), SUPERBLOCK_LOCATION)?;
let md = build_metadata(&ctx, &sb)?;
/*
ctx.report
.set_title("Optimising metadata to improve leaf packing");
let md = optimise_metadata(md)?;
*/
dump_metadata(&ctx, &sb, &md)
}