590 lines
15 KiB
Rust
590 lines
15 KiB
Rust
use anyhow::anyhow;
|
|
use byteorder::{LittleEndian, ReadBytesExt, WriteBytesExt};
|
|
use data_encoding::BASE64;
|
|
use nom::{number::complete::*, IResult};
|
|
use std::fmt;
|
|
use thiserror::Error;
|
|
|
|
use crate::io_engine::*;
|
|
use crate::pack::vm;
|
|
use crate::pdata::unpack::*;
|
|
|
|
//------------------------------------------
|
|
|
|
#[derive(Clone, Debug, PartialEq)]
|
|
pub struct KeyRange {
|
|
pub start: Option<u64>,
|
|
pub end: Option<u64>, // This is the one-past-the-end value
|
|
}
|
|
|
|
impl KeyRange {
|
|
pub fn new() -> KeyRange {
|
|
KeyRange {
|
|
start: None,
|
|
end: None,
|
|
}
|
|
}
|
|
}
|
|
|
|
impl Default for KeyRange {
|
|
fn default() -> Self {
|
|
Self::new()
|
|
}
|
|
}
|
|
|
|
impl fmt::Display for KeyRange {
|
|
fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
|
|
match (self.start, self.end) {
|
|
(None, None) => write!(f, "[..]"),
|
|
(None, Some(e)) => write!(f, "[..{}]", e),
|
|
(Some(s), None) => write!(f, "[{}..]", s),
|
|
(Some(s), Some(e)) => write!(f, "[{}..{}]", s, e),
|
|
}
|
|
}
|
|
}
|
|
|
|
impl KeyRange {
|
|
// None will be returned if either range would be zero length
|
|
fn split(&self, n: u64) -> Option<(KeyRange, KeyRange)> {
|
|
match (self.start, self.end) {
|
|
(None, None) => Some((
|
|
KeyRange {
|
|
start: None,
|
|
end: Some(n),
|
|
},
|
|
KeyRange {
|
|
start: Some(n),
|
|
end: None,
|
|
},
|
|
)),
|
|
(None, Some(e)) => {
|
|
if n < e {
|
|
Some((
|
|
KeyRange {
|
|
start: None,
|
|
end: Some(n),
|
|
},
|
|
KeyRange {
|
|
start: Some(n),
|
|
end: Some(e),
|
|
},
|
|
))
|
|
} else {
|
|
None
|
|
}
|
|
}
|
|
(Some(s), None) => {
|
|
if s < n {
|
|
Some((
|
|
KeyRange {
|
|
start: Some(s),
|
|
end: Some(n),
|
|
},
|
|
KeyRange {
|
|
start: Some(n),
|
|
end: None,
|
|
},
|
|
))
|
|
} else {
|
|
None
|
|
}
|
|
}
|
|
(Some(s), Some(e)) => {
|
|
if s < n && n < e {
|
|
Some((
|
|
KeyRange {
|
|
start: Some(s),
|
|
end: Some(n),
|
|
},
|
|
KeyRange {
|
|
start: Some(n),
|
|
end: Some(e),
|
|
},
|
|
))
|
|
} else {
|
|
None
|
|
}
|
|
}
|
|
}
|
|
}
|
|
}
|
|
|
|
#[test]
|
|
fn test_split_range() {
|
|
struct Test(Option<u64>, Option<u64>, u64, Option<(KeyRange, KeyRange)>);
|
|
|
|
let tests = vec![
|
|
Test(
|
|
None,
|
|
None,
|
|
100,
|
|
Some((
|
|
KeyRange {
|
|
start: None,
|
|
end: Some(100),
|
|
},
|
|
KeyRange {
|
|
start: Some(100),
|
|
end: None,
|
|
},
|
|
)),
|
|
),
|
|
Test(None, Some(100), 1000, None),
|
|
Test(
|
|
None,
|
|
Some(100),
|
|
50,
|
|
Some((
|
|
KeyRange {
|
|
start: None,
|
|
end: Some(50),
|
|
},
|
|
KeyRange {
|
|
start: Some(50),
|
|
end: Some(100),
|
|
},
|
|
)),
|
|
),
|
|
Test(None, Some(100), 100, None),
|
|
Test(Some(100), None, 50, None),
|
|
Test(
|
|
Some(100),
|
|
None,
|
|
150,
|
|
Some((
|
|
KeyRange {
|
|
start: Some(100),
|
|
end: Some(150),
|
|
},
|
|
KeyRange {
|
|
start: Some(150),
|
|
end: None,
|
|
},
|
|
)),
|
|
),
|
|
Test(Some(100), Some(200), 50, None),
|
|
Test(Some(100), Some(200), 250, None),
|
|
Test(
|
|
Some(100),
|
|
Some(200),
|
|
150,
|
|
Some((
|
|
KeyRange {
|
|
start: Some(100),
|
|
end: Some(150),
|
|
},
|
|
KeyRange {
|
|
start: Some(150),
|
|
end: Some(200),
|
|
},
|
|
)),
|
|
),
|
|
];
|
|
|
|
for Test(start, end, n, expected) in tests {
|
|
let kr = KeyRange { start, end };
|
|
let actual = kr.split(n);
|
|
assert_eq!(actual, expected);
|
|
}
|
|
}
|
|
|
|
fn split_one(path: &[u64], kr: &KeyRange, k: u64) -> Result<(KeyRange, KeyRange)> {
|
|
match kr.split(k) {
|
|
None => Err(node_err(
|
|
path,
|
|
&format!("couldn't split key range {} at {}", kr, k),
|
|
)),
|
|
Some(pair) => Ok(pair),
|
|
}
|
|
}
|
|
|
|
pub fn split_key_ranges(path: &[u64], kr: &KeyRange, keys: &[u64]) -> Result<Vec<KeyRange>> {
|
|
let mut krs = Vec::with_capacity(keys.len());
|
|
|
|
if keys.is_empty() {
|
|
return Err(node_err(path, "split_key_ranges: no keys present"));
|
|
}
|
|
|
|
// The first key gives the lower bound
|
|
let mut kr = KeyRange {
|
|
start: Some(keys[0]),
|
|
end: kr.end,
|
|
};
|
|
|
|
for k in keys.iter().skip(1) {
|
|
let (first, rest) = split_one(path, &kr, *k)?;
|
|
krs.push(first);
|
|
kr = rest;
|
|
}
|
|
|
|
krs.push(kr);
|
|
|
|
Ok(krs)
|
|
}
|
|
|
|
//------------------------------------------
|
|
|
|
// We compress and base64 encode paths to make them easy to
|
|
// cut and paste between programs (eg, thin_explore -p <path>)
|
|
pub fn encode_node_path(path: &[u64]) -> String {
|
|
let mut buffer: Vec<u8> = Vec::with_capacity(128);
|
|
let mut cursor = std::io::Cursor::new(&mut buffer);
|
|
assert!(path.len() < 256);
|
|
|
|
// The first entry is normally the superblock (0), so we
|
|
// special case this.
|
|
if !path.is_empty() && path[0] == 0 {
|
|
let count = ((path.len() as u8) - 1) << 1;
|
|
cursor.write_u8(count as u8).unwrap();
|
|
vm::pack_u64s(&mut cursor, &path[1..]).unwrap();
|
|
} else {
|
|
let count = ((path.len() as u8) << 1) | 1;
|
|
cursor.write_u8(count as u8).unwrap();
|
|
vm::pack_u64s(&mut cursor, path).unwrap();
|
|
}
|
|
|
|
BASE64.encode(&buffer)
|
|
}
|
|
|
|
pub fn decode_node_path(text: &str) -> anyhow::Result<Vec<u64>> {
|
|
let mut buffer = vec![0; 128];
|
|
let bytes = &mut buffer[0..BASE64.decode_len(text.len()).unwrap()];
|
|
BASE64
|
|
.decode_mut(text.as_bytes(), &mut bytes[0..])
|
|
.map_err(|_| anyhow!("bad node path. Unable to base64 decode."))?;
|
|
|
|
let mut input = std::io::Cursor::new(bytes);
|
|
|
|
let mut count = input.read_u8()?;
|
|
let mut prepend_zero = false;
|
|
if (count & 0x1) == 0 {
|
|
// Implicit 0 as first entry
|
|
prepend_zero = true;
|
|
}
|
|
count >>= 1;
|
|
|
|
let count = count as usize;
|
|
let mut path;
|
|
if count == 0 {
|
|
path = vec![];
|
|
} else {
|
|
let mut output = Vec::with_capacity(count * 8);
|
|
let mut cursor = std::io::Cursor::new(&mut output);
|
|
|
|
let mut vm = vm::VM::new();
|
|
let written = vm.exec(&mut input, &mut cursor, count * 8)?;
|
|
assert_eq!(written, count * 8);
|
|
|
|
let mut cursor = std::io::Cursor::new(&mut output);
|
|
path = vm::unpack_u64s(&mut cursor, count)?;
|
|
}
|
|
|
|
if prepend_zero {
|
|
let mut full_path = vec![0u64];
|
|
full_path.append(&mut path);
|
|
Ok(full_path)
|
|
} else {
|
|
Ok(path)
|
|
}
|
|
}
|
|
|
|
#[test]
|
|
fn test_encode_path() {
|
|
struct Test(Vec<u64>);
|
|
|
|
let tests = vec![
|
|
Test(vec![]),
|
|
Test(vec![1]),
|
|
Test(vec![1, 2]),
|
|
Test(vec![1, 2, 3, 4]),
|
|
Test(vec![0]),
|
|
Test(vec![0, 0]),
|
|
Test(vec![0, 1]),
|
|
Test(vec![0, 1, 2]),
|
|
Test(vec![0, 123, 201231, 3102983012]),
|
|
];
|
|
|
|
for t in tests {
|
|
let encoded = encode_node_path(&t.0[0..]);
|
|
let decoded = decode_node_path(&encoded).unwrap();
|
|
assert_eq!(decoded, &t.0[0..]);
|
|
}
|
|
}
|
|
|
|
//------------------------------------------
|
|
|
|
const NODE_HEADER_SIZE: usize = 32;
|
|
|
|
#[derive(Error, Clone, Debug)]
|
|
pub enum BTreeError {
|
|
// #[error("io error")]
|
|
IoError, // (std::io::Error), // FIXME: we can't clone an io_error
|
|
|
|
// #[error("node error: {0}")]
|
|
NodeError(String),
|
|
|
|
// #[error("value error: {0}")]
|
|
ValueError(String),
|
|
|
|
// #[error("keys: {0:?}")]
|
|
KeyContext(KeyRange, Box<BTreeError>),
|
|
|
|
// #[error("aggregate: {0:?}")]
|
|
Aggregate(Vec<BTreeError>),
|
|
|
|
// #[error("{0:?}, {1}")]
|
|
Path(Vec<u64>, Box<BTreeError>),
|
|
}
|
|
|
|
impl fmt::Display for BTreeError {
|
|
fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
|
|
match self {
|
|
BTreeError::IoError => write!(f, "io error"),
|
|
BTreeError::NodeError(msg) => write!(f, "node error: {}", msg),
|
|
BTreeError::ValueError(msg) => write!(f, "value error: {}", msg),
|
|
BTreeError::KeyContext(kr, be) => write!(f, "{}, effecting keys {}", be, kr),
|
|
BTreeError::Aggregate(errs) => {
|
|
for e in errs {
|
|
write!(f, "{}", e)?
|
|
}
|
|
Ok(())
|
|
}
|
|
BTreeError::Path(path, e) => write!(f, "{} {}", e, encode_node_path(path)),
|
|
}
|
|
}
|
|
}
|
|
pub fn node_err(path: &[u64], msg: &str) -> BTreeError {
|
|
BTreeError::Path(
|
|
path.to_vec(),
|
|
Box::new(BTreeError::NodeError(msg.to_string())),
|
|
)
|
|
}
|
|
|
|
pub fn node_err_s(path: &[u64], msg: String) -> BTreeError {
|
|
BTreeError::Path(path.to_vec(), Box::new(BTreeError::NodeError(msg)))
|
|
}
|
|
|
|
pub fn io_err(path: &[u64]) -> BTreeError {
|
|
BTreeError::Path(path.to_vec(), Box::new(BTreeError::IoError))
|
|
}
|
|
|
|
pub fn value_err(msg: String) -> BTreeError {
|
|
BTreeError::ValueError(msg)
|
|
}
|
|
|
|
pub fn aggregate_error(rs: Vec<BTreeError>) -> BTreeError {
|
|
BTreeError::Aggregate(rs)
|
|
}
|
|
|
|
impl BTreeError {
|
|
pub fn keys_context(self, keys: &KeyRange) -> BTreeError {
|
|
BTreeError::KeyContext(keys.clone(), Box::new(self))
|
|
}
|
|
}
|
|
|
|
pub type Result<T> = std::result::Result<T, BTreeError>;
|
|
|
|
//------------------------------------------
|
|
|
|
#[derive(Debug, Clone, Copy)]
|
|
pub struct NodeHeader {
|
|
pub block: u64,
|
|
pub is_leaf: bool,
|
|
pub nr_entries: u32,
|
|
pub max_entries: u32,
|
|
pub value_size: u32,
|
|
}
|
|
|
|
#[allow(dead_code)]
|
|
const INTERNAL_NODE: u32 = 1;
|
|
const LEAF_NODE: u32 = 2;
|
|
|
|
impl Unpack for NodeHeader {
|
|
fn disk_size() -> u32 {
|
|
32
|
|
}
|
|
|
|
fn unpack(data: &[u8]) -> IResult<&[u8], NodeHeader> {
|
|
let (i, _csum) = le_u32(data)?;
|
|
let (i, flags) = le_u32(i)?;
|
|
let (i, block) = le_u64(i)?;
|
|
let (i, nr_entries) = le_u32(i)?;
|
|
let (i, max_entries) = le_u32(i)?;
|
|
let (i, value_size) = le_u32(i)?;
|
|
let (i, _padding) = le_u32(i)?;
|
|
|
|
Ok((
|
|
i,
|
|
NodeHeader {
|
|
block,
|
|
is_leaf: flags == LEAF_NODE,
|
|
nr_entries,
|
|
max_entries,
|
|
value_size,
|
|
},
|
|
))
|
|
}
|
|
}
|
|
|
|
impl Pack for NodeHeader {
|
|
fn pack<W: WriteBytesExt>(&self, w: &mut W) -> anyhow::Result<()> {
|
|
// csum needs to be calculated right for the whole metadata block.
|
|
w.write_u32::<LittleEndian>(0)?;
|
|
|
|
let flags;
|
|
if self.is_leaf {
|
|
flags = LEAF_NODE;
|
|
} else {
|
|
flags = INTERNAL_NODE;
|
|
}
|
|
w.write_u32::<LittleEndian>(flags)?;
|
|
w.write_u64::<LittleEndian>(self.block)?;
|
|
w.write_u32::<LittleEndian>(self.nr_entries)?;
|
|
w.write_u32::<LittleEndian>(self.max_entries)?;
|
|
w.write_u32::<LittleEndian>(self.value_size)?;
|
|
w.write_u32::<LittleEndian>(0)?;
|
|
Ok(())
|
|
}
|
|
}
|
|
|
|
#[derive(Clone)]
|
|
pub enum Node<V: Unpack> {
|
|
Internal {
|
|
header: NodeHeader,
|
|
keys: Vec<u64>,
|
|
values: Vec<u64>,
|
|
},
|
|
Leaf {
|
|
header: NodeHeader,
|
|
keys: Vec<u64>,
|
|
values: Vec<V>,
|
|
},
|
|
}
|
|
|
|
impl<V: Unpack> Node<V> {
|
|
pub fn get_header(&self) -> &NodeHeader {
|
|
use Node::*;
|
|
match self {
|
|
Internal { header, .. } => header,
|
|
Leaf { header, .. } => header,
|
|
}
|
|
}
|
|
|
|
fn get_mut_header(&mut self) -> &mut NodeHeader {
|
|
use Node::*;
|
|
match self {
|
|
Internal { header, .. } => header,
|
|
Leaf { header, .. } => header,
|
|
}
|
|
}
|
|
|
|
pub fn get_keys(&self) -> &[u64] {
|
|
use Node::*;
|
|
match self {
|
|
Internal { keys, .. } => &keys[0..],
|
|
Leaf { keys, .. } => &keys[0..],
|
|
}
|
|
}
|
|
|
|
pub fn set_block(&mut self, b: u64) {
|
|
self.get_mut_header().block = b;
|
|
}
|
|
}
|
|
|
|
pub fn convert_result<'a, V>(path: &[u64], r: IResult<&'a [u8], V>) -> Result<(&'a [u8], V)> {
|
|
r.map_err(|_e| node_err(path, "parse error"))
|
|
}
|
|
|
|
pub fn convert_io_err<V>(path: &[u64], r: std::io::Result<V>) -> Result<V> {
|
|
r.map_err(|_| io_err(path))
|
|
}
|
|
|
|
pub fn unpack_node<V: Unpack>(
|
|
path: &[u64],
|
|
data: &[u8],
|
|
ignore_non_fatal: bool,
|
|
is_root: bool,
|
|
) -> Result<Node<V>> {
|
|
use nom::multi::count;
|
|
|
|
let (i, header) =
|
|
NodeHeader::unpack(data).map_err(|_e| node_err(path, "couldn't parse node header"))?;
|
|
|
|
if header.is_leaf && header.value_size != V::disk_size() {
|
|
return Err(node_err_s(
|
|
path,
|
|
format!(
|
|
"value_size mismatch: expected {}, was {}",
|
|
V::disk_size(),
|
|
header.value_size
|
|
),
|
|
));
|
|
}
|
|
|
|
let elt_size = header.value_size + 8;
|
|
if elt_size as usize * header.max_entries as usize + NODE_HEADER_SIZE > BLOCK_SIZE {
|
|
return Err(node_err_s(
|
|
path,
|
|
format!("max_entries is too large ({})", header.max_entries),
|
|
));
|
|
}
|
|
|
|
if header.nr_entries > header.max_entries {
|
|
return Err(node_err(path, "nr_entries > max_entries"));
|
|
}
|
|
|
|
if !ignore_non_fatal {
|
|
if header.max_entries % 3 != 0 {
|
|
return Err(node_err(path, "max_entries is not divisible by 3"));
|
|
}
|
|
|
|
if !is_root {
|
|
let min = header.max_entries / 3;
|
|
if header.nr_entries < min {
|
|
return Err(node_err_s(
|
|
path,
|
|
format!(
|
|
"too few entries {}, expected at least {}",
|
|
header.nr_entries, min
|
|
),
|
|
));
|
|
}
|
|
}
|
|
}
|
|
|
|
let (i, keys) = convert_result(path, count(le_u64, header.nr_entries as usize)(i))?;
|
|
|
|
let mut last = None;
|
|
for k in &keys {
|
|
if let Some(l) = last {
|
|
if k <= l {
|
|
return Err(node_err(&path, "keys out of order"));
|
|
}
|
|
}
|
|
|
|
last = Some(k);
|
|
}
|
|
|
|
let nr_free = header.max_entries - header.nr_entries;
|
|
let (i, _padding) = convert_result(path, count(le_u64, nr_free as usize)(i))?;
|
|
|
|
if header.is_leaf {
|
|
let (_i, values) = convert_result(path, count(V::unpack, header.nr_entries as usize)(i))?;
|
|
|
|
Ok(Node::Leaf {
|
|
header,
|
|
keys,
|
|
values,
|
|
})
|
|
} else {
|
|
let (_i, values) = convert_result(&path, count(le_u64, header.nr_entries as usize)(i))?;
|
|
Ok(Node::Internal {
|
|
header,
|
|
keys,
|
|
values,
|
|
})
|
|
}
|
|
}
|
|
|
|
//------------------------------------------
|