From 861b2f21ff66413826e7ef46a0198f7c45269bb4 Mon Sep 17 00:00:00 2001 From: Joe Thornber Date: Wed, 24 Jun 2020 13:32:47 +0100 Subject: [PATCH] [thin_shrink] Write thinp xml format reader and writer. Still need to tidy up the error handling. --- Cargo.lock | 17 ++ Cargo.toml | 2 + src/bin/thin_shrink.rs | 41 +++++ src/lib.rs | 2 + src/shrink/mod.rs | 3 + src/shrink/toplevel.rs | 22 +++ src/shrink/xml.rs | 353 +++++++++++++++++++++++++++++++++++++++++ 7 files changed, 440 insertions(+) create mode 100644 src/bin/thin_shrink.rs create mode 100644 src/shrink/mod.rs create mode 100644 src/shrink/toplevel.rs create mode 100644 src/shrink/xml.rs diff --git a/Cargo.lock b/Cargo.lock index 0ca6d78..508bd52 100644 --- a/Cargo.lock +++ b/Cargo.lock @@ -21,6 +21,11 @@ dependencies = [ "winapi 0.3.8 (registry+https://github.com/rust-lang/crates.io-index)", ] +[[package]] +name = "anyhow" +version = "1.0.31" +source = "registry+https://github.com/rust-lang/crates.io-index" + [[package]] name = "arrayvec" version = "0.4.12" @@ -240,6 +245,14 @@ dependencies = [ "unicode-xid 0.2.0 (registry+https://github.com/rust-lang/crates.io-index)", ] +[[package]] +name = "quick-xml" +version = "0.18.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +dependencies = [ + "memchr 2.3.3 (registry+https://github.com/rust-lang/crates.io-index)", +] + [[package]] name = "quickcheck" version = "0.9.2" @@ -380,6 +393,7 @@ dependencies = [ name = "thinp" version = "0.1.0" dependencies = [ + "anyhow 1.0.31 (registry+https://github.com/rust-lang/crates.io-index)", "byteorder 1.3.4 (registry+https://github.com/rust-lang/crates.io-index)", "clap 2.33.1 (registry+https://github.com/rust-lang/crates.io-index)", "crc32c 0.4.0 (registry+https://github.com/rust-lang/crates.io-index)", @@ -390,6 +404,7 @@ dependencies = [ "num-derive 0.3.0 (registry+https://github.com/rust-lang/crates.io-index)", "num-traits 0.2.11 (registry+https://github.com/rust-lang/crates.io-index)", "num_cpus 1.13.0 (registry+https://github.com/rust-lang/crates.io-index)", + "quick-xml 0.18.1 (registry+https://github.com/rust-lang/crates.io-index)", "quickcheck 0.9.2 (registry+https://github.com/rust-lang/crates.io-index)", "quickcheck_macros 0.9.1 (registry+https://github.com/rust-lang/crates.io-index)", "rand 0.7.3 (registry+https://github.com/rust-lang/crates.io-index)", @@ -456,6 +471,7 @@ source = "registry+https://github.com/rust-lang/crates.io-index" "checksum adler32 1.0.4 (registry+https://github.com/rust-lang/crates.io-index)" = "5d2e7343e7fc9de883d1b0341e0b13970f764c14101234857d2ddafa1cb1cac2" "checksum aho-corasick 0.7.10 (registry+https://github.com/rust-lang/crates.io-index)" = "8716408b8bc624ed7f65d223ddb9ac2d044c0547b6fa4b0d554f3a9540496ada" "checksum ansi_term 0.11.0 (registry+https://github.com/rust-lang/crates.io-index)" = "ee49baf6cb617b853aa8d93bf420db2383fab46d314482ca2803b40d5fde979b" +"checksum anyhow 1.0.31 (registry+https://github.com/rust-lang/crates.io-index)" = "85bb70cc08ec97ca5450e6eba421deeea5f172c0fc61f78b5357b2a8e8be195f" "checksum arrayvec 0.4.12 (registry+https://github.com/rust-lang/crates.io-index)" = "cd9fd44efafa8690358b7408d253adf110036b88f55672a933f01d616ad9b1b9" "checksum atty 0.2.14 (registry+https://github.com/rust-lang/crates.io-index)" = "d9b39be18770d11421cdb1b9947a45dd3f37e93092cbf377614828a319d5fee8" "checksum autocfg 1.0.0 (registry+https://github.com/rust-lang/crates.io-index)" = "f8aac770f1885fd7e387acedd76065302551364496e46b3dd00860b2f8359b9d" @@ -484,6 +500,7 @@ source = "registry+https://github.com/rust-lang/crates.io-index" "checksum num_cpus 1.13.0 (registry+https://github.com/rust-lang/crates.io-index)" = "05499f3756671c15885fee9034446956fff3f243d6077b91e5767df161f766b3" "checksum ppv-lite86 0.2.8 (registry+https://github.com/rust-lang/crates.io-index)" = "237a5ed80e274dbc66f86bd59c1e25edc039660be53194b5fe0a482e0f2612ea" "checksum proc-macro2 1.0.18 (registry+https://github.com/rust-lang/crates.io-index)" = "beae6331a816b1f65d04c45b078fd8e6c93e8071771f41b8163255bbd8d7c8fa" +"checksum quick-xml 0.18.1 (registry+https://github.com/rust-lang/crates.io-index)" = "3cc440ee4802a86e357165021e3e255a9143724da31db1e2ea540214c96a0f82" "checksum quickcheck 0.9.2 (registry+https://github.com/rust-lang/crates.io-index)" = "a44883e74aa97ad63db83c4bf8ca490f02b2fc02f92575e720c8551e843c945f" "checksum quickcheck_macros 0.9.1 (registry+https://github.com/rust-lang/crates.io-index)" = "608c156fd8e97febc07dc9c2e2c80bf74cfc6ef26893eae3daf8bc2bc94a4b7f" "checksum quote 1.0.6 (registry+https://github.com/rust-lang/crates.io-index)" = "54a21852a652ad6f610c9510194f398ff6f8692e334fd1145fed931f7fbe44ea" diff --git a/Cargo.toml b/Cargo.toml index e1e1c7c..cf59ab7 100644 --- a/Cargo.toml +++ b/Cargo.toml @@ -6,11 +6,13 @@ edition = "2018" license = "GPL3" [dependencies] +anyhow = "1.0" byteorder = "1.3" clap = "2.33" crc32c = "0.4" flate2 = "1.0" libc = "0.2.71" +quick-xml = "0.18" nix = "0.17" nom = "5.1" num_cpus = "1.13" diff --git a/src/bin/thin_shrink.rs b/src/bin/thin_shrink.rs new file mode 100644 index 0000000..55632c5 --- /dev/null +++ b/src/bin/thin_shrink.rs @@ -0,0 +1,41 @@ +extern crate clap; +extern crate thinp; + +use clap::{App, Arg}; +use std::process::exit; +use thinp::file_utils; + +fn main() { + let parser = App::new("thin_shrink") + .version(thinp::version::TOOLS_VERSION) + .about("Rewrite xml metadata and move data in an inactive pool.") + .arg(Arg::with_name("INPUT") + .help("Specify thinp metadata xml file") + .required(true) + .long("input") + .value_name("INPUT") + .takes_value(true)) + .arg(Arg::with_name("OUTPUT") + .help("Specify output xml file") + .required(true) + .long("output") + .value_name("OUTPUT") + .takes_value(true)); + + let matches = parser.get_matches(); + + // FIXME: check these look like xml + let input_file = matches.value_of("INPUT").unwrap(); + let map_file = matches.value_of("MAP").unwrap(); + let output_file = matches.value_of("OUTPUT").unwrap(); + + if !file_utils::file_exists(input_file) { + eprintln!("Couldn't find input file '{}'.", &input_file); + exit(1); + } + + if let Err(reason) = thinp::shrink::toplevel::shrink(&input_file, &output_file) { + println!("Application error: {}\n", reason); + exit(1); + } +} diff --git a/src/lib.rs b/src/lib.rs index cf410c5..412f468 100644 --- a/src/lib.rs +++ b/src/lib.rs @@ -1,3 +1,4 @@ +extern crate anyhow; extern crate byteorder; extern crate crc32c; extern crate flate2; @@ -18,4 +19,5 @@ pub mod block_manager; pub mod check; pub mod file_utils; pub mod pack; +pub mod shrink; pub mod version; diff --git a/src/shrink/mod.rs b/src/shrink/mod.rs new file mode 100644 index 0000000..a2c8345 --- /dev/null +++ b/src/shrink/mod.rs @@ -0,0 +1,3 @@ +pub mod toplevel; + +mod xml; diff --git a/src/shrink/toplevel.rs b/src/shrink/toplevel.rs new file mode 100644 index 0000000..0a647c2 --- /dev/null +++ b/src/shrink/toplevel.rs @@ -0,0 +1,22 @@ +use anyhow::Result; +use std::fs::OpenOptions; +use std::os::unix::fs::OpenOptionsExt; + +use crate::shrink::xml; + +//--------------------------------------- + +pub fn shrink(input_file: &str, _output_file: &str, _map_file: &str) -> Result<()> { + let input = OpenOptions::new() + .read(true) + .write(false) + .custom_flags(libc::O_EXCL) + .open(input_file)?; + + let mut visitor = xml::XmlWriter::new(std::io::stdout()); + xml::read(input, &mut visitor)?; + + Ok(()) +} + +//--------------------------------------- diff --git a/src/shrink/xml.rs b/src/shrink/xml.rs new file mode 100644 index 0000000..60cd3c4 --- /dev/null +++ b/src/shrink/xml.rs @@ -0,0 +1,353 @@ +use anyhow::Result; +use std::{ + borrow::{Cow}, + fmt::Display, + io::prelude::*, + io::BufReader, + io::Write, +}; + +use quick_xml::events::attributes::Attribute; +use quick_xml::events::{BytesEnd, BytesStart, Event}; +use quick_xml::{Reader, Writer}; + +//--------------------------------------- + +pub struct Superblock { + uuid: String, + time: u64, + transaction: u64, + flags: Option, + version: Option, + data_block_size: u32, + nr_data_blocks: u64, + metadata_snap: Option, +} + +pub struct Device { + dev_id: u32, + mapped_blocks: u64, + transaction: u64, + creation_time: u64, + snap_time: u64, +} + +pub struct Map { + thin_begin: u64, + data_begin: u64, + time: u32, + len: u64, +} + +pub trait MetadataVisitor { + fn superblock_b(&mut self, sb: &Superblock) -> Result<()>; + fn superblock_e(&mut self) -> Result<()>; + + fn device_b(&mut self, d: &Device) -> Result<()>; + fn device_e(&mut self) -> Result<()>; + + fn map(&mut self, m: Map) -> Result<()>; + + fn eof(&mut self) -> Result<()>; +} + +pub struct XmlWriter { + w: Writer, +} + +impl XmlWriter { + pub fn new(w: W) -> XmlWriter { + XmlWriter { w: Writer::new_with_indent(w, 0x20, 2) } + } +} + +fn mk_attr_<'a, T: Display>(n: T) -> Cow<'a, [u8]> { + let str = format!("{}", n); + Cow::Owned(str.into_bytes()) +} + +fn mk_attr<'a, T: Display>(key: &[u8], value: T) -> Attribute { + Attribute { + key, + value: mk_attr_(value), + } +} + +const XML_VERSION: u32 = 2; + +impl MetadataVisitor for XmlWriter { + fn superblock_b(&mut self, sb: &Superblock) -> Result<()> { + let tag = b"superblock"; + let mut elem = BytesStart::owned(tag.to_vec(), tag.len()); + elem.push_attribute(mk_attr(b"uuid", sb.uuid.clone())); + elem.push_attribute(mk_attr(b"time", sb.time)); + elem.push_attribute(mk_attr(b"transaction", sb.transaction)); + if let Some(flags) = sb.flags { + // FIXME: is this really a nr? + elem.push_attribute(mk_attr(b"flags", flags)); + } + + elem.push_attribute(mk_attr(b"version", XML_VERSION)); + elem.push_attribute(mk_attr(b"data_block_size", sb.data_block_size)); + elem.push_attribute(mk_attr(b"nr_data_blocks", sb.nr_data_blocks)); + + if let Some(snap) = sb.metadata_snap { + elem.push_attribute(mk_attr(b"metadata_snap", snap)); + } + + self.w.write_event(Event::Start(elem))?; + Ok(()) + } + + fn superblock_e(&mut self) -> Result<()> { + self.w + .write_event(Event::End(BytesEnd::borrowed(b"superblock")))?; + Ok(()) + } + + fn device_b(&mut self, d: &Device) -> Result<()> { + let tag = b"device"; + let mut elem = BytesStart::owned(tag.to_vec(), tag.len()); + elem.push_attribute(mk_attr(b"dev_id", d.dev_id)); + elem.push_attribute(mk_attr(b"mapped_blocks", d.mapped_blocks)); + elem.push_attribute(mk_attr(b"transaction", d.transaction)); + elem.push_attribute(mk_attr(b"creation_time", d.creation_time)); + elem.push_attribute(mk_attr(b"snap_time", d.snap_time)); + self.w.write_event(Event::Start(elem))?; + Ok(()) + } + + fn device_e(&mut self) -> Result<()> { + self.w + .write_event(Event::End(BytesEnd::borrowed(b"device")))?; + Ok(()) + } + + fn map(&mut self, m: Map) -> Result<()> { + match m.len { + 1 => { + let tag = b"single_mapping"; + let mut elem = BytesStart::owned(tag.to_vec(), tag.len()); + elem.push_attribute(mk_attr(b"origin_block", m.thin_begin)); + elem.push_attribute(mk_attr(b"data_block", m.data_begin)); + elem.push_attribute(mk_attr(b"time", m.time)); + self.w.write_event(Event::Empty(elem))?; + } + _ => { + let tag = b"range_mapping"; + let mut elem = BytesStart::owned(tag.to_vec(), tag.len()); + elem.push_attribute(mk_attr(b"origin_begin", m.thin_begin)); + elem.push_attribute(mk_attr(b"data_begin", m.data_begin)); + elem.push_attribute(mk_attr(b"length", m.len)); + elem.push_attribute(mk_attr(b"time", m.time)); + self.w.write_event(Event::Empty(elem))?; + } + } + Ok(()) + } + + fn eof(&mut self) -> Result<()> { + let w = self.w.inner(); + w.flush()?; + Ok(()) + } +} + +//--------------------------------------- + +// FIXME: nasty unwraps +fn string_val(kv: &Attribute) -> String { + let v = kv.unescaped_value().unwrap(); + let bytes = v.to_vec(); + String::from_utf8(bytes).unwrap() +} + +// FIXME: there's got to be a way of doing this without copying the string +fn u64_val(kv: &Attribute) -> Result { + let n = string_val(kv).parse::()?; + Ok(n) +} + +fn u32_val(kv: &Attribute) -> Result { + let n = string_val(kv).parse::()?; + Ok(n) +} + +fn bad_attr(_tag: &str, _attr: &[u8]) -> Result { + todo!(); +} + +fn missing_attr(_tag: &str, _attr: &str) -> Result { + todo!(); +} + +fn check_attr(tag: &str, name: &str, maybe_v: Option) -> Result { + match maybe_v { + None => missing_attr(tag, name), + Some(v) => Ok(v) + } +} + +fn parse_superblock(e: &BytesStart) -> Result { + let mut uuid: Option = None; + let mut time: Option = None; + let mut transaction: Option = None; + let mut flags: Option = None; + let mut version: Option = None; + let mut data_block_size: Option = None; + let mut nr_data_blocks: Option = None; + let mut metadata_snap: Option = None; + + for a in e.attributes() { + let kv = a.unwrap(); + match kv.key { + b"uuid" => uuid = Some(string_val(&kv)), + b"time" => time = Some(u64_val(&kv)?), + b"transaction" => transaction = Some(u64_val(&kv)?), + b"flags" => flags = Some(u32_val(&kv)?), + b"version" => version = Some(u32_val(&kv)?), + b"data_block_size" => data_block_size = Some(u32_val(&kv)?), + b"nr_data_blocks" => nr_data_blocks = Some(u64_val(&kv)?), + b"metadata_snap" => metadata_snap = Some(u64_val(&kv)?), + _ => return bad_attr("superblock", kv.key), + } + } + + let tag = "superblock"; + + Ok(Superblock { + uuid: check_attr(tag, "uuid", uuid)?, + time: check_attr(tag, "time", time)?, + transaction: check_attr(tag, "transaction", transaction)?, + flags: flags, + version: version, + data_block_size: check_attr(tag, "data_block_size", data_block_size)?, + nr_data_blocks: check_attr(tag, "nr_data_blocks", nr_data_blocks)?, + metadata_snap: metadata_snap, + }) +} + +fn parse_device(e: &BytesStart) -> Result { + let mut dev_id: Option = None; + let mut mapped_blocks: Option = None; + let mut transaction: Option = None; + let mut creation_time: Option = None; + let mut snap_time: Option = None; + + for a in e.attributes() { + let kv = a.unwrap(); + match kv.key { + b"dev_id" => dev_id = Some(u32_val(&kv)?), + b"mapped_blocks" => mapped_blocks = Some(u64_val(&kv)?), + b"transaction" => transaction = Some(u64_val(&kv)?), + b"creation_time" => creation_time = Some(u64_val(&kv)?), + b"snap_time" => snap_time = Some(u64_val(&kv)?), + _ => return bad_attr("device", kv.key), + } + } + + let tag = "device"; + + Ok(Device { + dev_id: check_attr(tag, "dev_id", dev_id)?, + mapped_blocks: check_attr(tag, "mapped_blocks", mapped_blocks)?, + transaction: check_attr(tag, "transaction", transaction)?, + creation_time: check_attr(tag, "creation_time", creation_time)?, + snap_time: check_attr(tag, "snap_time", snap_time)?, + }) +} + +fn parse_single_map(e: &BytesStart) -> Result { + let mut thin_begin: Option = None; + let mut data_begin: Option = None; + let mut time: Option = None; + + for a in e.attributes() { + let kv = a.unwrap(); + match kv.key { + b"origin_block" => thin_begin = Some(u64_val(&kv)?), + b"data_block" => data_begin = Some(u64_val(&kv)?), + b"time" => time = Some(u32_val(&kv)?), + _ => return bad_attr("single_mapping", kv.key), + } + } + + let tag = "single_mapping"; + + Ok(Map { + thin_begin: check_attr(tag, "origin_block", thin_begin)?, + data_begin: check_attr(tag, "data_block", data_begin)?, + time: check_attr(tag, "time", time)?, + len: 1 + }) +} + +fn parse_range_map(e: &BytesStart) -> Result { + let mut thin_begin: Option = None; + let mut data_begin: Option = None; + let mut time: Option = None; + let mut length: Option = None; + + for a in e.attributes() { + let kv = a.unwrap(); + match kv.key { + b"origin_begin" => thin_begin = Some(u64_val(&kv)?), + b"data_begin" => data_begin = Some(u64_val(&kv)?), + b"time" => time = Some(u32_val(&kv)?), + b"length" => length = Some(u64_val(&kv)?), + _ => return bad_attr("range_mapping", kv.key), + } + } + + let tag = "range_mapping"; + + Ok(Map { + thin_begin: check_attr(tag, "origin_begin", thin_begin)?, + data_begin: check_attr(tag, "data_begin", data_begin)?, + time: check_attr(tag, "time", time)?, + len: check_attr(tag, "length", length)?, + }) +} + +pub fn read(input: R, visitor: &mut M) -> Result<()> +where + R: Read, + M: MetadataVisitor, +{ + let input = BufReader::new(input); + let mut reader = Reader::from_reader(input); + + reader.trim_text(true); + let mut buf = Vec::new(); + + loop { + match reader.read_event(&mut buf) { + Ok(Event::Start(ref e)) => match e.name() { + b"superblock" => visitor.superblock_b(&parse_superblock(e)?)?, + b"device" => visitor.device_b(&parse_device(e)?)?, + _ => todo!(), + }, + Ok(Event::End(ref e)) => match e.name() { + b"superblock" => visitor.superblock_e()?, + b"device" => visitor.device_e()?, + _ => todo!(), + }, + Ok(Event::Empty(ref e)) => match e.name() { + b"single_mapping" => visitor.map(parse_single_map(e)?)?, + b"range_mapping" => visitor.map(parse_range_map(e)?)?, + _ => todo!(), + }, + Ok(Event::Text(_)) => {} + Ok(Event::Comment(_)) => {} + Ok(Event::Eof) => break, + Ok(_) => todo!(), + + // FIXME: don't panic! + Err(e) => panic!("error parsing xml {:?}", e), + } + } + + Ok(()) +} + +//---------------------------------------