2020-08-03 17:07:32 +05:30
|
|
|
use anyhow::{anyhow, Result};
|
|
|
|
use fixedbitset::FixedBitSet;
|
|
|
|
use nom::{number::complete::*, IResult};
|
|
|
|
use std::sync::{Arc, Mutex};
|
|
|
|
|
|
|
|
use crate::block_manager::*;
|
|
|
|
use crate::checksum;
|
|
|
|
|
2020-08-04 16:41:36 +05:30
|
|
|
// FIXME: check that keys are in ascending order between nodes.
|
|
|
|
|
2020-08-03 17:07:32 +05:30
|
|
|
//------------------------------------------
|
|
|
|
|
2020-08-05 12:31:02 +05:30
|
|
|
pub trait Unpack {
|
2020-08-03 19:34:59 +05:30
|
|
|
// The size of the value when on disk.
|
|
|
|
fn disk_size() -> u32;
|
2020-08-03 20:52:08 +05:30
|
|
|
fn unpack(data: &[u8]) -> IResult<&[u8], Self>
|
|
|
|
where
|
|
|
|
Self: std::marker::Sized;
|
2020-08-03 17:07:32 +05:30
|
|
|
}
|
|
|
|
|
2020-08-06 12:21:48 +05:30
|
|
|
pub fn unpack<U: Unpack>(data: &[u8]) -> Result<U> {
|
|
|
|
match U::unpack(data) {
|
|
|
|
Err(_e) => {
|
|
|
|
Err(anyhow!("couldn't parse SMRoot"))
|
|
|
|
},
|
|
|
|
Ok((_i, v)) => Ok(v),
|
|
|
|
}
|
|
|
|
}
|
|
|
|
|
2020-08-03 19:34:59 +05:30
|
|
|
const NODE_HEADER_SIZE: usize = 32;
|
|
|
|
|
2020-08-03 17:07:32 +05:30
|
|
|
pub struct NodeHeader {
|
|
|
|
is_leaf: bool,
|
|
|
|
nr_entries: u32,
|
|
|
|
max_entries: u32,
|
|
|
|
value_size: u32,
|
|
|
|
}
|
|
|
|
|
|
|
|
#[allow(dead_code)]
|
|
|
|
const INTERNAL_NODE: u32 = 1;
|
|
|
|
const LEAF_NODE: u32 = 2;
|
|
|
|
|
|
|
|
pub fn unpack_node_header(data: &[u8]) -> IResult<&[u8], NodeHeader> {
|
|
|
|
let (i, _csum) = le_u32(data)?;
|
|
|
|
let (i, flags) = le_u32(i)?;
|
2020-08-07 20:11:21 +05:30
|
|
|
let (i, _block) = le_u64(i)?;
|
2020-08-03 17:07:32 +05:30
|
|
|
let (i, nr_entries) = le_u32(i)?;
|
|
|
|
let (i, max_entries) = le_u32(i)?;
|
|
|
|
let (i, value_size) = le_u32(i)?;
|
|
|
|
let (i, _padding) = le_u32(i)?;
|
|
|
|
|
|
|
|
Ok((
|
|
|
|
i,
|
|
|
|
NodeHeader {
|
|
|
|
is_leaf: flags == LEAF_NODE,
|
|
|
|
nr_entries,
|
|
|
|
max_entries,
|
|
|
|
value_size,
|
|
|
|
},
|
|
|
|
))
|
|
|
|
}
|
|
|
|
|
2020-08-05 12:31:02 +05:30
|
|
|
pub enum Node<V: Unpack> {
|
2020-08-03 17:07:32 +05:30
|
|
|
Internal {
|
|
|
|
header: NodeHeader,
|
|
|
|
keys: Vec<u64>,
|
|
|
|
values: Vec<u64>,
|
|
|
|
},
|
|
|
|
Leaf {
|
|
|
|
header: NodeHeader,
|
|
|
|
keys: Vec<u64>,
|
2020-08-03 20:52:08 +05:30
|
|
|
values: Vec<V>,
|
2020-08-03 17:07:32 +05:30
|
|
|
},
|
|
|
|
}
|
|
|
|
|
2020-08-03 19:34:59 +05:30
|
|
|
pub fn node_err<V>(msg: String) -> Result<V> {
|
|
|
|
let msg = format!("btree node error: {}", msg);
|
|
|
|
Err(anyhow!(msg))
|
|
|
|
}
|
|
|
|
|
|
|
|
pub fn to_any<'a, V>(r: IResult<&'a [u8], V>) -> Result<(&'a [u8], V)> {
|
|
|
|
if let Ok((i, v)) = r {
|
|
|
|
Ok((i, v))
|
|
|
|
} else {
|
|
|
|
Err(anyhow!("btree node error: parse error"))
|
|
|
|
}
|
|
|
|
}
|
|
|
|
|
2020-08-05 12:31:02 +05:30
|
|
|
pub fn unpack_node<V: Unpack>(
|
2020-08-03 19:34:59 +05:30
|
|
|
data: &[u8],
|
|
|
|
ignore_non_fatal: bool,
|
|
|
|
is_root: bool,
|
|
|
|
) -> Result<Node<V>> {
|
2020-08-03 17:07:32 +05:30
|
|
|
use nom::multi::count;
|
|
|
|
|
2020-08-03 19:34:59 +05:30
|
|
|
let (i, header) = to_any(unpack_node_header(data))?;
|
|
|
|
|
2020-08-03 20:52:08 +05:30
|
|
|
if header.is_leaf && header.value_size != V::disk_size() {
|
2020-08-03 19:34:59 +05:30
|
|
|
return node_err(format!(
|
|
|
|
"value_size mismatch: expected {}, was {}",
|
|
|
|
V::disk_size(),
|
|
|
|
header.value_size
|
|
|
|
));
|
|
|
|
}
|
|
|
|
|
2020-08-03 20:52:08 +05:30
|
|
|
let elt_size = header.value_size + 8;
|
2020-08-03 19:34:59 +05:30
|
|
|
if elt_size as usize * header.max_entries as usize + NODE_HEADER_SIZE > BLOCK_SIZE {
|
|
|
|
return node_err(format!("max_entries is too large ({})", header.max_entries));
|
|
|
|
}
|
|
|
|
|
|
|
|
if header.nr_entries > header.max_entries {
|
2020-08-07 20:11:21 +05:30
|
|
|
return node_err("nr_entries > max_entries".to_string());
|
2020-08-03 19:34:59 +05:30
|
|
|
}
|
|
|
|
|
|
|
|
if !ignore_non_fatal {
|
|
|
|
if header.max_entries % 3 != 0 {
|
2020-08-07 20:11:21 +05:30
|
|
|
return node_err("max_entries is not divisible by 3".to_string());
|
2020-08-03 19:34:59 +05:30
|
|
|
}
|
|
|
|
|
|
|
|
if !is_root {
|
|
|
|
let min = header.max_entries / 3;
|
|
|
|
if header.nr_entries < min {
|
2020-08-07 20:11:21 +05:30
|
|
|
return node_err("too few entries".to_string());
|
2020-08-03 19:34:59 +05:30
|
|
|
}
|
|
|
|
}
|
|
|
|
}
|
|
|
|
|
|
|
|
let (i, keys) = to_any(count(le_u64, header.nr_entries as usize)(i))?;
|
|
|
|
|
|
|
|
let mut last = None;
|
|
|
|
for k in &keys {
|
|
|
|
if let Some(l) = last {
|
|
|
|
if k <= l {
|
2020-08-07 20:11:21 +05:30
|
|
|
return node_err("keys out of order".to_string());
|
2020-08-03 19:34:59 +05:30
|
|
|
}
|
|
|
|
}
|
|
|
|
|
|
|
|
last = Some(k);
|
|
|
|
}
|
|
|
|
|
2020-08-03 17:07:32 +05:30
|
|
|
let nr_free = header.max_entries - header.nr_entries;
|
2020-08-03 19:34:59 +05:30
|
|
|
let (i, _padding) = to_any(count(le_u64, nr_free as usize)(i))?;
|
2020-08-03 17:07:32 +05:30
|
|
|
|
|
|
|
if header.is_leaf {
|
2020-08-03 19:34:59 +05:30
|
|
|
let (_i, values) = to_any(count(V::unpack, header.nr_entries as usize)(i))?;
|
2020-08-03 17:07:32 +05:30
|
|
|
|
2020-08-03 19:34:59 +05:30
|
|
|
Ok(Node::Leaf {
|
|
|
|
header,
|
|
|
|
keys,
|
|
|
|
values,
|
|
|
|
})
|
2020-08-03 17:07:32 +05:30
|
|
|
} else {
|
2020-08-03 19:34:59 +05:30
|
|
|
let (_i, values) = to_any(count(le_u64, header.nr_entries as usize)(i))?;
|
|
|
|
Ok(Node::Internal {
|
|
|
|
header,
|
|
|
|
keys,
|
|
|
|
values,
|
|
|
|
})
|
2020-08-03 17:07:32 +05:30
|
|
|
}
|
|
|
|
}
|
|
|
|
|
|
|
|
//------------------------------------------
|
|
|
|
|
2020-08-05 12:31:02 +05:30
|
|
|
impl Unpack for u64 {
|
2020-08-03 19:34:59 +05:30
|
|
|
fn disk_size() -> u32 {
|
|
|
|
8
|
|
|
|
}
|
|
|
|
|
2020-08-03 17:07:32 +05:30
|
|
|
fn unpack(i: &[u8]) -> IResult<&[u8], u64> {
|
|
|
|
le_u64(i)
|
|
|
|
}
|
|
|
|
}
|
|
|
|
|
2020-08-06 12:21:48 +05:30
|
|
|
impl Unpack for u32 {
|
|
|
|
fn disk_size() -> u32 {
|
|
|
|
4
|
|
|
|
}
|
|
|
|
|
|
|
|
fn unpack(i: &[u8]) -> IResult<&[u8], u32> {
|
|
|
|
le_u32(i)
|
|
|
|
}
|
|
|
|
}
|
|
|
|
|
2020-08-03 17:07:32 +05:30
|
|
|
//------------------------------------------
|
|
|
|
|
2020-08-05 12:31:02 +05:30
|
|
|
pub trait NodeVisitor<V: Unpack> {
|
2020-08-07 20:11:21 +05:30
|
|
|
fn visit(&mut self, w: &BTreeWalker, b: &Block, node: &Node<V>) -> Result<()>;
|
2020-08-03 17:07:32 +05:30
|
|
|
}
|
|
|
|
|
|
|
|
#[derive(Clone)]
|
|
|
|
pub struct BTreeWalker {
|
|
|
|
pub engine: Arc<AsyncIoEngine>,
|
|
|
|
pub seen: Arc<Mutex<FixedBitSet>>,
|
2020-08-03 19:34:59 +05:30
|
|
|
ignore_non_fatal: bool,
|
2020-08-03 17:07:32 +05:30
|
|
|
}
|
|
|
|
|
|
|
|
impl BTreeWalker {
|
2020-08-03 20:52:08 +05:30
|
|
|
pub fn new(engine: Arc<AsyncIoEngine>, ignore_non_fatal: bool) -> BTreeWalker {
|
2020-08-03 17:07:32 +05:30
|
|
|
let nr_blocks = engine.get_nr_blocks() as usize;
|
|
|
|
let r: BTreeWalker = BTreeWalker {
|
2020-08-07 20:11:21 +05:30
|
|
|
engine,
|
2020-08-03 17:07:32 +05:30
|
|
|
seen: Arc::new(Mutex::new(FixedBitSet::with_capacity(nr_blocks))),
|
2020-08-03 19:34:59 +05:30
|
|
|
ignore_non_fatal,
|
2020-08-03 17:07:32 +05:30
|
|
|
};
|
|
|
|
r
|
|
|
|
}
|
|
|
|
|
2020-08-04 16:41:36 +05:30
|
|
|
pub fn new_with_seen(
|
|
|
|
engine: Arc<AsyncIoEngine>,
|
|
|
|
seen: Arc<Mutex<FixedBitSet>>,
|
|
|
|
ignore_non_fatal: bool,
|
|
|
|
) -> BTreeWalker {
|
|
|
|
{
|
|
|
|
let seen = seen.lock().unwrap();
|
|
|
|
assert_eq!(seen.len(), engine.get_nr_blocks() as usize);
|
|
|
|
}
|
|
|
|
|
|
|
|
BTreeWalker {
|
|
|
|
engine,
|
|
|
|
seen,
|
|
|
|
ignore_non_fatal,
|
|
|
|
}
|
|
|
|
}
|
|
|
|
|
2020-08-07 20:11:21 +05:30
|
|
|
fn walk_nodes<NV, V>(&mut self, visitor: &mut NV, bs: &[u64]) -> Result<()>
|
2020-08-03 17:07:32 +05:30
|
|
|
where
|
|
|
|
NV: NodeVisitor<V>,
|
2020-08-05 12:31:02 +05:30
|
|
|
V: Unpack,
|
2020-08-03 17:07:32 +05:30
|
|
|
{
|
|
|
|
let mut blocks = Vec::new();
|
|
|
|
let seen = self.seen.lock().unwrap();
|
|
|
|
for b in bs {
|
|
|
|
if !seen[*b as usize] {
|
|
|
|
blocks.push(Block::new(*b));
|
|
|
|
}
|
|
|
|
}
|
|
|
|
drop(seen);
|
|
|
|
|
|
|
|
self.engine.read_many(&mut blocks)?;
|
|
|
|
|
|
|
|
for b in blocks {
|
2020-08-03 19:34:59 +05:30
|
|
|
self.walk_node(visitor, &b, false)?;
|
2020-08-03 17:07:32 +05:30
|
|
|
}
|
|
|
|
|
|
|
|
Ok(())
|
|
|
|
}
|
|
|
|
|
2020-08-03 19:34:59 +05:30
|
|
|
fn walk_node<NV, V>(&mut self, visitor: &mut NV, b: &Block, is_root: bool) -> Result<()>
|
2020-08-03 17:07:32 +05:30
|
|
|
where
|
|
|
|
NV: NodeVisitor<V>,
|
2020-08-05 12:31:02 +05:30
|
|
|
V: Unpack,
|
2020-08-03 17:07:32 +05:30
|
|
|
{
|
|
|
|
let mut seen = self.seen.lock().unwrap();
|
|
|
|
seen.insert(b.loc as usize);
|
|
|
|
drop(seen);
|
|
|
|
|
|
|
|
let bt = checksum::metadata_block_type(b.get_data());
|
|
|
|
if bt != checksum::BT::NODE {
|
|
|
|
return Err(anyhow!("checksum failed for node {}, {:?}", b.loc, bt));
|
|
|
|
}
|
|
|
|
|
2020-08-03 19:34:59 +05:30
|
|
|
let node = unpack_node::<V>(&b.get_data(), self.ignore_non_fatal, is_root)?;
|
2020-08-03 17:07:32 +05:30
|
|
|
visitor.visit(self, &b, &node)?;
|
|
|
|
|
|
|
|
if let Node::Internal {
|
|
|
|
header: _h,
|
|
|
|
keys: _k,
|
|
|
|
values,
|
|
|
|
} = node
|
|
|
|
{
|
|
|
|
self.walk_nodes(visitor, &values)?;
|
|
|
|
}
|
|
|
|
|
|
|
|
Ok(())
|
|
|
|
}
|
2020-08-03 19:34:59 +05:30
|
|
|
|
2020-08-03 20:52:08 +05:30
|
|
|
pub fn walk_b<NV, V>(&mut self, visitor: &mut NV, root: &Block) -> Result<()>
|
|
|
|
where
|
|
|
|
NV: NodeVisitor<V>,
|
2020-08-05 12:31:02 +05:30
|
|
|
V: Unpack,
|
2020-08-03 20:52:08 +05:30
|
|
|
{
|
|
|
|
self.walk_node(visitor, &root, true)
|
|
|
|
}
|
|
|
|
|
|
|
|
pub fn walk<NV, V>(&mut self, visitor: &mut NV, root: u64) -> Result<()>
|
2020-08-03 19:34:59 +05:30
|
|
|
where
|
|
|
|
NV: NodeVisitor<V>,
|
2020-08-05 12:31:02 +05:30
|
|
|
V: Unpack,
|
2020-08-03 19:34:59 +05:30
|
|
|
{
|
2020-08-03 20:52:08 +05:30
|
|
|
let mut root = Block::new(root);
|
|
|
|
self.engine.read(&mut root)?;
|
|
|
|
self.walk_node(visitor, &root, true)
|
2020-08-03 19:34:59 +05:30
|
|
|
}
|
2020-08-03 17:07:32 +05:30
|
|
|
}
|
|
|
|
|
|
|
|
//------------------------------------------
|