615 lines
17 KiB
Rust
615 lines
17 KiB
Rust
use std::collections::BTreeMap;
|
|
use std::sync::{Arc, Mutex};
|
|
use threadpool::ThreadPool;
|
|
|
|
use crate::checksum;
|
|
use crate::io_engine::*;
|
|
use crate::pdata::btree::*;
|
|
use crate::pdata::space_map::*;
|
|
use crate::pdata::unpack::*;
|
|
|
|
//------------------------------------------
|
|
|
|
pub trait NodeVisitor<V: Unpack> {
|
|
// &self is deliberately non mut to allow the walker to use multiple threads.
|
|
fn visit(
|
|
&self,
|
|
path: &[u64],
|
|
kr: &KeyRange,
|
|
header: &NodeHeader,
|
|
keys: &[u64],
|
|
values: &[V],
|
|
) -> Result<()>;
|
|
|
|
// Nodes may be shared and thus visited multiple times. The walker avoids
|
|
// doing repeated IO, but it does call this method to keep the visitor up to
|
|
// date.
|
|
fn visit_again(&self, path: &[u64], b: u64) -> Result<()>;
|
|
|
|
fn end_walk(&self) -> Result<()>;
|
|
}
|
|
|
|
#[derive(Clone)]
|
|
pub struct BTreeWalker {
|
|
engine: Arc<dyn IoEngine + Send + Sync>,
|
|
sm: Arc<Mutex<dyn SpaceMap + Send + Sync>>,
|
|
fails: Arc<Mutex<BTreeMap<u64, BTreeError>>>,
|
|
ignore_non_fatal: bool,
|
|
}
|
|
|
|
impl BTreeWalker {
|
|
pub fn new(engine: Arc<dyn IoEngine + Send + Sync>, ignore_non_fatal: bool) -> BTreeWalker {
|
|
let nr_blocks = engine.get_nr_blocks() as usize;
|
|
let r: BTreeWalker = BTreeWalker {
|
|
engine,
|
|
sm: Arc::new(Mutex::new(RestrictedSpaceMap::new(nr_blocks as u64))),
|
|
fails: Arc::new(Mutex::new(BTreeMap::new())),
|
|
ignore_non_fatal,
|
|
};
|
|
r
|
|
}
|
|
|
|
pub fn new_with_sm(
|
|
engine: Arc<dyn IoEngine + Send + Sync>,
|
|
sm: Arc<Mutex<dyn SpaceMap + Send + Sync>>,
|
|
ignore_non_fatal: bool,
|
|
) -> Result<BTreeWalker> {
|
|
{
|
|
let sm = sm.lock().unwrap();
|
|
assert_eq!(sm.get_nr_blocks().unwrap(), engine.get_nr_blocks());
|
|
}
|
|
|
|
Ok(BTreeWalker {
|
|
engine,
|
|
sm,
|
|
fails: Arc::new(Mutex::new(BTreeMap::new())),
|
|
ignore_non_fatal,
|
|
})
|
|
}
|
|
|
|
fn failed(&self, b: u64) -> Option<BTreeError> {
|
|
let fails = self.fails.lock().unwrap();
|
|
fails.get(&b).cloned()
|
|
}
|
|
|
|
fn set_fail(&self, b: u64, err: BTreeError) {
|
|
// FIXME: should we monitor the size of fails, and abort if too many errors?
|
|
let mut fails = self.fails.lock().unwrap();
|
|
fails.insert(b, err);
|
|
}
|
|
|
|
// Atomically increments the ref count, and returns the _old_ count.
|
|
fn sm_inc(&self, b: u64) -> u32 {
|
|
let mut sm = self.sm.lock().unwrap();
|
|
let count = sm.get(b).unwrap();
|
|
sm.inc(b, 1).unwrap();
|
|
count
|
|
}
|
|
|
|
fn build_aggregate(&self, b: u64, errs: Vec<BTreeError>) -> Result<()> {
|
|
match errs.len() {
|
|
0 => Ok(()),
|
|
1 => {
|
|
let e = errs[0].clone();
|
|
self.set_fail(b, e.clone());
|
|
Err(e)
|
|
}
|
|
_ => {
|
|
let e = aggregate_error(errs);
|
|
self.set_fail(b, e.clone());
|
|
Err(e)
|
|
}
|
|
}
|
|
}
|
|
|
|
fn walk_nodes<NV, V>(
|
|
&self,
|
|
path: &mut Vec<u64>,
|
|
visitor: &NV,
|
|
krs: &[KeyRange],
|
|
bs: &[u64],
|
|
) -> Vec<BTreeError>
|
|
where
|
|
NV: NodeVisitor<V>,
|
|
V: Unpack,
|
|
{
|
|
assert_eq!(krs.len(), bs.len());
|
|
let mut errs: Vec<BTreeError> = Vec::new();
|
|
|
|
let mut blocks = Vec::with_capacity(bs.len());
|
|
let mut filtered_krs = Vec::with_capacity(krs.len());
|
|
for i in 0..bs.len() {
|
|
if self.sm_inc(bs[i]) == 0 {
|
|
// Node not yet seen
|
|
blocks.push(bs[i]);
|
|
filtered_krs.push(krs[i].clone());
|
|
} else {
|
|
// This node has already been checked ...
|
|
match self.failed(bs[i]) {
|
|
None => {
|
|
// ... it was clean.
|
|
if let Err(e) = visitor.visit_again(path, bs[i]) {
|
|
// ... but the visitor isn't happy
|
|
errs.push(e.clone());
|
|
}
|
|
}
|
|
Some(e) => {
|
|
// ... there was an error
|
|
errs.push(e.clone());
|
|
}
|
|
}
|
|
}
|
|
}
|
|
|
|
match self.engine.read_many(&blocks[0..]) {
|
|
Err(_) => {
|
|
// IO completely failed, error every block
|
|
for (i, b) in blocks.iter().enumerate() {
|
|
let e = io_err(path).keys_context(&filtered_krs[i]);
|
|
errs.push(e.clone());
|
|
self.set_fail(*b, e);
|
|
}
|
|
}
|
|
Ok(rblocks) => {
|
|
for (i, rb) in rblocks.into_iter().enumerate() {
|
|
match rb {
|
|
Err(_) => {
|
|
let e = io_err(path).keys_context(&filtered_krs[i]);
|
|
errs.push(e.clone());
|
|
self.set_fail(blocks[i], e);
|
|
}
|
|
Ok(b) => match self.walk_node(path, visitor, &filtered_krs[i], &b, false) {
|
|
Err(e) => {
|
|
errs.push(e);
|
|
}
|
|
Ok(()) => {}
|
|
},
|
|
}
|
|
}
|
|
}
|
|
}
|
|
|
|
errs
|
|
}
|
|
|
|
fn walk_node_<NV, V>(
|
|
&self,
|
|
path: &mut Vec<u64>,
|
|
visitor: &NV,
|
|
kr: &KeyRange,
|
|
b: &Block,
|
|
is_root: bool,
|
|
) -> Result<()>
|
|
where
|
|
NV: NodeVisitor<V>,
|
|
V: Unpack,
|
|
{
|
|
use Node::*;
|
|
|
|
let bt = checksum::metadata_block_type(b.get_data());
|
|
if bt != checksum::BT::NODE {
|
|
return Err(node_err_s(
|
|
path,
|
|
format!("checksum failed for node {}, {:?}", b.loc, bt),
|
|
)
|
|
.keys_context(kr));
|
|
}
|
|
|
|
let node = unpack_node::<V>(path, &b.get_data(), self.ignore_non_fatal, is_root)?;
|
|
|
|
match node {
|
|
Internal { keys, values, .. } => {
|
|
let krs = split_key_ranges(path, &kr, &keys)?;
|
|
let errs = self.walk_nodes(path, visitor, &krs, &values);
|
|
return self.build_aggregate(b.loc, errs);
|
|
}
|
|
Leaf {
|
|
header,
|
|
keys,
|
|
values,
|
|
} => {
|
|
if let Err(e) = visitor.visit(path, &kr, &header, &keys, &values) {
|
|
let e = BTreeError::Path(path.clone(), Box::new(e));
|
|
self.set_fail(b.loc, e.clone());
|
|
return Err(e);
|
|
}
|
|
}
|
|
}
|
|
|
|
Ok(())
|
|
}
|
|
|
|
fn walk_node<NV, V>(
|
|
&self,
|
|
path: &mut Vec<u64>,
|
|
visitor: &NV,
|
|
kr: &KeyRange,
|
|
b: &Block,
|
|
is_root: bool,
|
|
) -> Result<()>
|
|
where
|
|
NV: NodeVisitor<V>,
|
|
V: Unpack,
|
|
{
|
|
path.push(b.loc);
|
|
let r = self.walk_node_(path, visitor, kr, b, is_root);
|
|
path.pop();
|
|
visitor.end_walk()?;
|
|
r
|
|
}
|
|
|
|
pub fn walk<NV, V>(&self, path: &mut Vec<u64>, visitor: &NV, root: u64) -> Result<()>
|
|
where
|
|
NV: NodeVisitor<V>,
|
|
V: Unpack,
|
|
{
|
|
if self.sm_inc(root) > 0 {
|
|
if let Some(e) = self.failed(root) {
|
|
Err(e)
|
|
} else {
|
|
visitor.visit_again(path, root)
|
|
}
|
|
} else {
|
|
let root = self.engine.read(root).map_err(|_| io_err(path))?;
|
|
let kr = KeyRange {
|
|
start: None,
|
|
end: None,
|
|
};
|
|
self.walk_node(path, visitor, &kr, &root, true)
|
|
}
|
|
}
|
|
}
|
|
|
|
//--------------------------------
|
|
|
|
fn walk_node_threaded_<NV, V>(
|
|
w: Arc<BTreeWalker>,
|
|
path: &mut Vec<u64>,
|
|
pool: &ThreadPool,
|
|
visitor: Arc<NV>,
|
|
kr: &KeyRange,
|
|
b: &Block,
|
|
is_root: bool,
|
|
) -> Result<()>
|
|
where
|
|
NV: NodeVisitor<V> + Send + Sync + 'static,
|
|
V: Unpack,
|
|
{
|
|
use Node::*;
|
|
|
|
let bt = checksum::metadata_block_type(b.get_data());
|
|
if bt != checksum::BT::NODE {
|
|
return Err(node_err_s(
|
|
path,
|
|
format!("checksum failed for node {}, {:?}", b.loc, bt),
|
|
)
|
|
.keys_context(kr));
|
|
}
|
|
|
|
let node = unpack_node::<V>(path, &b.get_data(), w.ignore_non_fatal, is_root)?;
|
|
|
|
match node {
|
|
Internal { keys, values, .. } => {
|
|
let krs = split_key_ranges(path, &kr, &keys)?;
|
|
let errs = walk_nodes_threaded(w.clone(), path, pool, visitor, &krs, &values);
|
|
return w.build_aggregate(b.loc, errs);
|
|
}
|
|
Leaf {
|
|
header,
|
|
keys,
|
|
values,
|
|
} => {
|
|
visitor.visit(path, kr, &header, &keys, &values)?;
|
|
}
|
|
}
|
|
|
|
Ok(())
|
|
}
|
|
|
|
fn walk_node_threaded<NV, V>(
|
|
w: Arc<BTreeWalker>,
|
|
path: &mut Vec<u64>,
|
|
pool: &ThreadPool,
|
|
visitor: Arc<NV>,
|
|
kr: &KeyRange,
|
|
b: &Block,
|
|
is_root: bool,
|
|
) -> Result<()>
|
|
where
|
|
NV: NodeVisitor<V> + Send + Sync + 'static,
|
|
V: Unpack,
|
|
{
|
|
path.push(b.loc);
|
|
let r = walk_node_threaded_(w, path, pool, visitor.clone(), kr, b, is_root);
|
|
path.pop();
|
|
visitor.end_walk()?;
|
|
r
|
|
}
|
|
|
|
fn walk_nodes_threaded<NV, V>(
|
|
w: Arc<BTreeWalker>,
|
|
path: &mut Vec<u64>,
|
|
pool: &ThreadPool,
|
|
visitor: Arc<NV>,
|
|
krs: &[KeyRange],
|
|
bs: &[u64],
|
|
) -> Vec<BTreeError>
|
|
where
|
|
NV: NodeVisitor<V> + Send + Sync + 'static,
|
|
V: Unpack,
|
|
{
|
|
assert_eq!(krs.len(), bs.len());
|
|
let mut errs: Vec<BTreeError> = Vec::new();
|
|
|
|
let mut blocks = Vec::with_capacity(bs.len());
|
|
let mut filtered_krs = Vec::with_capacity(krs.len());
|
|
for i in 0..bs.len() {
|
|
if w.sm_inc(bs[i]) == 0 {
|
|
// Node not yet seen
|
|
blocks.push(bs[i]);
|
|
filtered_krs.push(krs[i].clone());
|
|
} else {
|
|
// This node has already been checked ...
|
|
match w.failed(bs[i]) {
|
|
None => {
|
|
// ... it was clean.
|
|
if let Err(e) = visitor.visit_again(path, bs[i]) {
|
|
// ... but the visitor isn't happy
|
|
errs.push(e.clone());
|
|
}
|
|
}
|
|
Some(e) => {
|
|
// ... there was an error
|
|
errs.push(e.clone());
|
|
}
|
|
}
|
|
}
|
|
}
|
|
|
|
match w.engine.read_many(&blocks[0..]) {
|
|
Err(_) => {
|
|
// IO completely failed error every block
|
|
for (i, b) in blocks.iter().enumerate() {
|
|
let e = io_err(path).keys_context(&filtered_krs[i]);
|
|
errs.push(e.clone());
|
|
w.set_fail(*b, e);
|
|
}
|
|
}
|
|
Ok(rblocks) => {
|
|
let child_errs = Arc::new(Mutex::new(Vec::new()));
|
|
|
|
for (i, rb) in rblocks.into_iter().enumerate() {
|
|
match rb {
|
|
Err(_) => {
|
|
let e = io_err(path).keys_context(&filtered_krs[i]);
|
|
let mut errs = child_errs.lock().unwrap();
|
|
errs.push(e.clone());
|
|
w.set_fail(blocks[i], e);
|
|
}
|
|
Ok(b) => {
|
|
let w = w.clone();
|
|
let visitor = visitor.clone();
|
|
let kr = filtered_krs[i].clone();
|
|
let errs = child_errs.clone();
|
|
let mut path = path.clone();
|
|
|
|
pool.execute(move || {
|
|
match w.walk_node(&mut path, visitor.as_ref(), &kr, &b, false) {
|
|
Err(e) => {
|
|
let mut errs = errs.lock().unwrap();
|
|
errs.push(e);
|
|
}
|
|
Ok(()) => {}
|
|
}
|
|
});
|
|
}
|
|
}
|
|
}
|
|
|
|
pool.join();
|
|
let mut child_errs = Arc::try_unwrap(child_errs).unwrap().into_inner().unwrap();
|
|
errs.append(&mut child_errs);
|
|
}
|
|
}
|
|
|
|
errs
|
|
}
|
|
|
|
pub fn walk_threaded<NV, V>(
|
|
path: &mut Vec<u64>,
|
|
w: Arc<BTreeWalker>,
|
|
pool: &ThreadPool,
|
|
visitor: Arc<NV>,
|
|
root: u64,
|
|
) -> Result<()>
|
|
where
|
|
NV: NodeVisitor<V> + Send + Sync + 'static,
|
|
V: Unpack,
|
|
{
|
|
if w.sm_inc(root) > 0 {
|
|
if let Some(e) = w.failed(root) {
|
|
Err(e)
|
|
} else {
|
|
visitor.visit_again(path, root)
|
|
}
|
|
} else {
|
|
let root = w.engine.read(root).map_err(|_| io_err(path))?;
|
|
let kr = KeyRange {
|
|
start: None,
|
|
end: None,
|
|
};
|
|
walk_node_threaded(w, path, pool, visitor, &kr, &root, true)
|
|
}
|
|
}
|
|
|
|
//------------------------------------------
|
|
|
|
struct ValueCollector<V> {
|
|
values: Mutex<BTreeMap<u64, V>>,
|
|
}
|
|
|
|
impl<V> ValueCollector<V> {
|
|
fn new() -> ValueCollector<V> {
|
|
ValueCollector {
|
|
values: Mutex::new(BTreeMap::new()),
|
|
}
|
|
}
|
|
}
|
|
|
|
// FIXME: should we be using Copy rather than clone? (Yes)
|
|
impl<V: Unpack + Copy> NodeVisitor<V> for ValueCollector<V> {
|
|
fn visit(
|
|
&self,
|
|
_path: &[u64],
|
|
_kr: &KeyRange,
|
|
_h: &NodeHeader,
|
|
keys: &[u64],
|
|
values: &[V],
|
|
) -> Result<()> {
|
|
let mut vals = self.values.lock().unwrap();
|
|
for n in 0..keys.len() {
|
|
vals.insert(keys[n], values[n]);
|
|
}
|
|
|
|
Ok(())
|
|
}
|
|
|
|
fn visit_again(&self, _path: &[u64], _b: u64) -> Result<()> {
|
|
Ok(())
|
|
}
|
|
|
|
fn end_walk(&self) -> Result<()> {
|
|
Ok(())
|
|
}
|
|
}
|
|
|
|
pub fn btree_to_map<V: Unpack + Copy>(
|
|
path: &mut Vec<u64>,
|
|
engine: Arc<dyn IoEngine + Send + Sync>,
|
|
ignore_non_fatal: bool,
|
|
root: u64,
|
|
) -> Result<BTreeMap<u64, V>> {
|
|
let walker = BTreeWalker::new(engine, ignore_non_fatal);
|
|
let visitor = ValueCollector::<V>::new();
|
|
walker.walk(path, &visitor, root)?;
|
|
Ok(visitor.values.into_inner().unwrap())
|
|
}
|
|
|
|
pub fn btree_to_map_with_sm<V: Unpack + Copy>(
|
|
path: &mut Vec<u64>,
|
|
engine: Arc<dyn IoEngine + Send + Sync>,
|
|
sm: Arc<Mutex<dyn SpaceMap + Send + Sync>>,
|
|
ignore_non_fatal: bool,
|
|
root: u64,
|
|
) -> Result<BTreeMap<u64, V>> {
|
|
let walker = BTreeWalker::new_with_sm(engine, sm, ignore_non_fatal)?;
|
|
let visitor = ValueCollector::<V>::new();
|
|
|
|
walker.walk(path, &visitor, root)?;
|
|
Ok(visitor.values.into_inner().unwrap())
|
|
}
|
|
|
|
//------------------------------------------
|
|
|
|
struct ValuePathCollector<V> {
|
|
values: Mutex<BTreeMap<u64, (Vec<u64>, V)>>,
|
|
}
|
|
|
|
impl<V> ValuePathCollector<V> {
|
|
fn new() -> ValuePathCollector<V> {
|
|
ValuePathCollector {
|
|
values: Mutex::new(BTreeMap::new()),
|
|
}
|
|
}
|
|
}
|
|
|
|
impl<V: Unpack + Clone> NodeVisitor<V> for ValuePathCollector<V> {
|
|
fn visit(
|
|
&self,
|
|
path: &[u64],
|
|
_kr: &KeyRange,
|
|
_h: &NodeHeader,
|
|
keys: &[u64],
|
|
values: &[V],
|
|
) -> Result<()> {
|
|
let mut vals = self.values.lock().unwrap();
|
|
for n in 0..keys.len() {
|
|
vals.insert(keys[n], (path.to_vec(), values[n].clone()));
|
|
}
|
|
|
|
Ok(())
|
|
}
|
|
|
|
fn visit_again(&self, _path: &[u64], _b: u64) -> Result<()> {
|
|
Ok(())
|
|
}
|
|
|
|
fn end_walk(&self) -> Result<()> {
|
|
Ok(())
|
|
}
|
|
}
|
|
|
|
pub fn btree_to_map_with_path<V: Unpack + Copy>(
|
|
path: &mut Vec<u64>,
|
|
engine: Arc<dyn IoEngine + Send + Sync>,
|
|
sm: Arc<Mutex<dyn SpaceMap + Send + Sync>>,
|
|
ignore_non_fatal: bool,
|
|
root: u64,
|
|
) -> Result<BTreeMap<u64, (Vec<u64>, V)>> {
|
|
let walker = BTreeWalker::new_with_sm(engine, sm, ignore_non_fatal)?;
|
|
let visitor = ValuePathCollector::<V>::new();
|
|
|
|
walker.walk(path, &visitor, root)?;
|
|
Ok(visitor.values.into_inner().unwrap())
|
|
}
|
|
|
|
//------------------------------------------
|
|
|
|
struct NoopVisitor<V> {
|
|
dummy: std::marker::PhantomData<V>,
|
|
}
|
|
|
|
impl<V> NoopVisitor<V> {
|
|
pub fn new() -> NoopVisitor<V> {
|
|
NoopVisitor {
|
|
dummy: std::marker::PhantomData,
|
|
}
|
|
}
|
|
}
|
|
|
|
impl<V: Unpack> NodeVisitor<V> for NoopVisitor<V> {
|
|
fn visit(
|
|
&self,
|
|
_path: &[u64],
|
|
_kr: &KeyRange,
|
|
_header: &NodeHeader,
|
|
_keys: &[u64],
|
|
_values: &[V],
|
|
) -> Result<()> {
|
|
Ok(())
|
|
}
|
|
|
|
//fn visit_again(&self, _path: &[u64], _b: u64) -> Result<()> {
|
|
fn visit_again(&self, _path: &[u64], _b: u64) -> Result<()> {
|
|
Ok(())
|
|
}
|
|
|
|
fn end_walk(&self) -> Result<()> {
|
|
Ok(())
|
|
}
|
|
}
|
|
|
|
pub fn count_btree_blocks<V: Unpack>(
|
|
engine: Arc<dyn IoEngine + Send + Sync>,
|
|
path: &mut Vec<u64>,
|
|
root: u64,
|
|
metadata_sm: ASpaceMap,
|
|
ignore_non_fatal: bool,
|
|
) -> Result<()> {
|
|
let w = BTreeWalker::new_with_sm(engine, metadata_sm, ignore_non_fatal)?;
|
|
let v = NoopVisitor::<V>::new();
|
|
w.walk(path, &v, root)
|
|
}
|
|
|
|
//------------------------------------------
|