use anyhow::anyhow; use byteorder::{LittleEndian, ReadBytesExt, WriteBytesExt}; use data_encoding::BASE64; use nom::{number::complete::*, IResult}; use std::fmt; use thiserror::Error; use crate::io_engine::*; use crate::pack::vm; use crate::pdata::unpack::*; //------------------------------------------ #[derive(Clone, Debug, PartialEq)] pub struct KeyRange { pub start: Option, pub end: Option, // This is the one-past-the-end value } impl KeyRange { pub fn new() -> KeyRange { KeyRange { start: None, end: None, } } } impl Default for KeyRange { fn default() -> Self { Self::new() } } impl fmt::Display for KeyRange { fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result { match (self.start, self.end) { (None, None) => write!(f, "[..]"), (None, Some(e)) => write!(f, "[..{}]", e), (Some(s), None) => write!(f, "[{}..]", s), (Some(s), Some(e)) => write!(f, "[{}..{}]", s, e), } } } impl KeyRange { // None will be returned if either range would be zero length fn split(&self, n: u64) -> Option<(KeyRange, KeyRange)> { match (self.start, self.end) { (None, None) => Some(( KeyRange { start: None, end: Some(n), }, KeyRange { start: Some(n), end: None, }, )), (None, Some(e)) => { if n < e { Some(( KeyRange { start: None, end: Some(n), }, KeyRange { start: Some(n), end: Some(e), }, )) } else { None } } (Some(s), None) => { if s < n { Some(( KeyRange { start: Some(s), end: Some(n), }, KeyRange { start: Some(n), end: None, }, )) } else { None } } (Some(s), Some(e)) => { if s < n && n < e { Some(( KeyRange { start: Some(s), end: Some(n), }, KeyRange { start: Some(n), end: Some(e), }, )) } else { None } } } } } #[test] fn test_split_range() { struct Test(Option, Option, u64, Option<(KeyRange, KeyRange)>); let tests = vec![ Test( None, None, 100, Some(( KeyRange { start: None, end: Some(100), }, KeyRange { start: Some(100), end: None, }, )), ), Test(None, Some(100), 1000, None), Test( None, Some(100), 50, Some(( KeyRange { start: None, end: Some(50), }, KeyRange { start: Some(50), end: Some(100), }, )), ), Test(None, Some(100), 100, None), Test(Some(100), None, 50, None), Test( Some(100), None, 150, Some(( KeyRange { start: Some(100), end: Some(150), }, KeyRange { start: Some(150), end: None, }, )), ), Test(Some(100), Some(200), 50, None), Test(Some(100), Some(200), 250, None), Test( Some(100), Some(200), 150, Some(( KeyRange { start: Some(100), end: Some(150), }, KeyRange { start: Some(150), end: Some(200), }, )), ), ]; for Test(start, end, n, expected) in tests { let kr = KeyRange { start, end }; let actual = kr.split(n); assert_eq!(actual, expected); } } fn split_one(path: &[u64], kr: &KeyRange, k: u64) -> Result<(KeyRange, KeyRange)> { match kr.split(k) { None => Err(node_err( path, &format!("couldn't split key range {} at {}", kr, k), )), Some(pair) => Ok(pair), } } pub fn split_key_ranges(path: &[u64], kr: &KeyRange, keys: &[u64]) -> Result> { let mut krs = Vec::with_capacity(keys.len()); if keys.is_empty() { return Err(node_err(path, "split_key_ranges: no keys present")); } // The first key gives the lower bound let mut kr = KeyRange { start: Some(keys[0]), end: kr.end, }; for k in keys.iter().skip(1) { let (first, rest) = split_one(path, &kr, *k)?; krs.push(first); kr = rest; } krs.push(kr); Ok(krs) } //------------------------------------------ // We compress and base64 encode paths to make them easy to // cut and paste between programs (eg, thin_explore -p ) pub fn encode_node_path(path: &[u64]) -> String { let mut buffer: Vec = Vec::with_capacity(128); let mut cursor = std::io::Cursor::new(&mut buffer); assert!(path.len() < 256); // The first entry is normally the superblock (0), so we // special case this. if !path.is_empty() && path[0] == 0 { let count = ((path.len() as u8) - 1) << 1; cursor.write_u8(count as u8).unwrap(); vm::pack_u64s(&mut cursor, &path[1..]).unwrap(); } else { let count = ((path.len() as u8) << 1) | 1; cursor.write_u8(count as u8).unwrap(); vm::pack_u64s(&mut cursor, path).unwrap(); } BASE64.encode(&buffer) } pub fn decode_node_path(text: &str) -> anyhow::Result> { let mut buffer = vec![0; 128]; let bytes = &mut buffer[0..BASE64.decode_len(text.len()).unwrap()]; BASE64 .decode_mut(text.as_bytes(), &mut bytes[0..]) .map_err(|_| anyhow!("bad node path. Unable to base64 decode."))?; let mut input = std::io::Cursor::new(bytes); let mut count = input.read_u8()?; let mut prepend_zero = false; if (count & 0x1) == 0 { // Implicit 0 as first entry prepend_zero = true; } count >>= 1; let count = count as usize; let mut path; if count == 0 { path = vec![]; } else { let mut output = Vec::with_capacity(count * 8); let mut cursor = std::io::Cursor::new(&mut output); let mut vm = vm::VM::new(); let written = vm.exec(&mut input, &mut cursor, count * 8)?; assert_eq!(written, count * 8); let mut cursor = std::io::Cursor::new(&mut output); path = vm::unpack_u64s(&mut cursor, count)?; } if prepend_zero { let mut full_path = vec![0u64]; full_path.append(&mut path); Ok(full_path) } else { Ok(path) } } #[test] fn test_encode_path() { struct Test(Vec); let tests = vec![ Test(vec![]), Test(vec![1]), Test(vec![1, 2]), Test(vec![1, 2, 3, 4]), Test(vec![0]), Test(vec![0, 0]), Test(vec![0, 1]), Test(vec![0, 1, 2]), Test(vec![0, 123, 201231, 3102983012]), ]; for t in tests { let encoded = encode_node_path(&t.0[0..]); let decoded = decode_node_path(&encoded).unwrap(); assert_eq!(decoded, &t.0[0..]); } } //------------------------------------------ const NODE_HEADER_SIZE: usize = 32; #[derive(Error, Clone, Debug)] pub enum BTreeError { // #[error("io error")] IoError, // (std::io::Error), // FIXME: we can't clone an io_error // #[error("node error: {0}")] NodeError(String), // #[error("value error: {0}")] ValueError(String), // #[error("keys: {0:?}")] KeyContext(KeyRange, Box), // #[error("aggregate: {0:?}")] Aggregate(Vec), // #[error("{0:?}, {1}")] Path(Vec, Box), } impl fmt::Display for BTreeError { fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result { match self { BTreeError::IoError => write!(f, "io error"), BTreeError::NodeError(msg) => write!(f, "node error: {}", msg), BTreeError::ValueError(msg) => write!(f, "value error: {}", msg), BTreeError::KeyContext(kr, be) => write!(f, "{}, effecting keys {}", be, kr), BTreeError::Aggregate(errs) => { for e in errs { write!(f, "{}", e)? } Ok(()) } BTreeError::Path(path, e) => write!(f, "{} {}", e, encode_node_path(path)), } } } pub fn node_err(path: &[u64], msg: &str) -> BTreeError { BTreeError::Path( path.to_vec(), Box::new(BTreeError::NodeError(msg.to_string())), ) } pub fn node_err_s(path: &[u64], msg: String) -> BTreeError { BTreeError::Path(path.to_vec(), Box::new(BTreeError::NodeError(msg))) } pub fn io_err(path: &[u64]) -> BTreeError { BTreeError::Path(path.to_vec(), Box::new(BTreeError::IoError)) } pub fn value_err(msg: String) -> BTreeError { BTreeError::ValueError(msg) } pub fn aggregate_error(rs: Vec) -> BTreeError { BTreeError::Aggregate(rs) } impl BTreeError { pub fn keys_context(self, keys: &KeyRange) -> BTreeError { BTreeError::KeyContext(keys.clone(), Box::new(self)) } } pub type Result = std::result::Result; //------------------------------------------ #[derive(Debug, Clone, Copy)] pub struct NodeHeader { pub block: u64, pub is_leaf: bool, pub nr_entries: u32, pub max_entries: u32, pub value_size: u32, } #[allow(dead_code)] const INTERNAL_NODE: u32 = 1; const LEAF_NODE: u32 = 2; impl Unpack for NodeHeader { fn disk_size() -> u32 { 32 } fn unpack(data: &[u8]) -> IResult<&[u8], NodeHeader> { let (i, _csum) = le_u32(data)?; let (i, flags) = le_u32(i)?; let (i, block) = le_u64(i)?; let (i, nr_entries) = le_u32(i)?; let (i, max_entries) = le_u32(i)?; let (i, value_size) = le_u32(i)?; let (i, _padding) = le_u32(i)?; Ok(( i, NodeHeader { block, is_leaf: flags == LEAF_NODE, nr_entries, max_entries, value_size, }, )) } } impl Pack for NodeHeader { fn pack(&self, w: &mut W) -> anyhow::Result<()> { // csum needs to be calculated right for the whole metadata block. w.write_u32::(0)?; let flags; if self.is_leaf { flags = LEAF_NODE; } else { flags = INTERNAL_NODE; } w.write_u32::(flags)?; w.write_u64::(self.block)?; w.write_u32::(self.nr_entries)?; w.write_u32::(self.max_entries)?; w.write_u32::(self.value_size)?; w.write_u32::(0)?; Ok(()) } } #[derive(Clone)] pub enum Node { Internal { header: NodeHeader, keys: Vec, values: Vec, }, Leaf { header: NodeHeader, keys: Vec, values: Vec, }, } impl Node { pub fn get_header(&self) -> &NodeHeader { use Node::*; match self { Internal { header, .. } => header, Leaf { header, .. } => header, } } fn get_mut_header(&mut self) -> &mut NodeHeader { use Node::*; match self { Internal { header, .. } => header, Leaf { header, .. } => header, } } pub fn get_keys(&self) -> &[u64] { use Node::*; match self { Internal { keys, .. } => &keys[0..], Leaf { keys, .. } => &keys[0..], } } pub fn set_block(&mut self, b: u64) { self.get_mut_header().block = b; } } pub fn convert_result<'a, V>(path: &[u64], r: IResult<&'a [u8], V>) -> Result<(&'a [u8], V)> { r.map_err(|_e| node_err(path, "parse error")) } pub fn convert_io_err(path: &[u64], r: std::io::Result) -> Result { r.map_err(|_| io_err(path)) } pub fn unpack_node( path: &[u64], data: &[u8], ignore_non_fatal: bool, is_root: bool, ) -> Result> { use nom::multi::count; let (i, header) = NodeHeader::unpack(data).map_err(|_e| node_err(path, "couldn't parse node header"))?; if header.is_leaf && header.value_size != V::disk_size() { return Err(node_err_s( path, format!( "value_size mismatch: expected {}, was {}", V::disk_size(), header.value_size ), )); } let elt_size = header.value_size + 8; if elt_size as usize * header.max_entries as usize + NODE_HEADER_SIZE > BLOCK_SIZE { return Err(node_err_s( path, format!("max_entries is too large ({})", header.max_entries), )); } if header.nr_entries > header.max_entries { return Err(node_err(path, "nr_entries > max_entries")); } if !ignore_non_fatal { if header.max_entries % 3 != 0 { return Err(node_err(path, "max_entries is not divisible by 3")); } if !is_root { /* let min = header.max_entries / 3; if header.nr_entries < min { return Err(node_err_s( path, format!( "too few entries {}, expected at least {}", header.nr_entries, min ), )); } */ } } let (i, keys) = convert_result(path, count(le_u64, header.nr_entries as usize)(i))?; let mut last = None; for k in &keys { if let Some(l) = last { if k <= l { return Err(node_err(&path, "keys out of order")); } } last = Some(k); } let nr_free = header.max_entries - header.nr_entries; let (i, _padding) = convert_result(path, count(le_u64, nr_free as usize)(i))?; if header.is_leaf { let (_i, values) = convert_result(path, count(V::unpack, header.nr_entries as usize)(i))?; Ok(Node::Leaf { header, keys, values, }) } else { let (_i, values) = convert_result(&path, count(le_u64, header.nr_entries as usize)(i))?; Ok(Node::Internal { header, keys, values, }) } } //------------------------------------------