Merge branch '2020-06-19-use-anyhow' into 2020-06-13-thin-check-rewrite

This commit is contained in:
Joe Thornber 2020-06-22 10:16:27 +01:00
commit 5e19029e65
43 changed files with 1880 additions and 273 deletions

View File

@ -1,3 +1,11 @@
v0.9.0
======
- New support tools: thin_metadata_{pack,unpack}.
- thin_check now checks data block reference counts.
- thin_check can now check metadata snapshots.
- some metadata space map bug fixes.
v0.7.5
======

28
Cargo.lock generated
View File

@ -21,6 +21,11 @@ dependencies = [
"winapi 0.3.8 (registry+https://github.com/rust-lang/crates.io-index)",
]
[[package]]
name = "anyhow"
version = "1.0.31"
source = "registry+https://github.com/rust-lang/crates.io-index"
[[package]]
name = "arrayvec"
version = "0.4.12"
@ -380,6 +385,7 @@ dependencies = [
name = "thinp"
version = "0.1.0"
dependencies = [
"anyhow 1.0.31 (registry+https://github.com/rust-lang/crates.io-index)",
"byteorder 1.3.4 (registry+https://github.com/rust-lang/crates.io-index)",
"clap 2.33.1 (registry+https://github.com/rust-lang/crates.io-index)",
"crc32c 0.4.0 (registry+https://github.com/rust-lang/crates.io-index)",
@ -393,6 +399,25 @@ dependencies = [
"quickcheck 0.9.2 (registry+https://github.com/rust-lang/crates.io-index)",
"quickcheck_macros 0.9.1 (registry+https://github.com/rust-lang/crates.io-index)",
"rand 0.7.3 (registry+https://github.com/rust-lang/crates.io-index)",
"thiserror 1.0.20 (registry+https://github.com/rust-lang/crates.io-index)",
]
[[package]]
name = "thiserror"
version = "1.0.20"
source = "registry+https://github.com/rust-lang/crates.io-index"
dependencies = [
"thiserror-impl 1.0.20 (registry+https://github.com/rust-lang/crates.io-index)",
]
[[package]]
name = "thiserror-impl"
version = "1.0.20"
source = "registry+https://github.com/rust-lang/crates.io-index"
dependencies = [
"proc-macro2 1.0.18 (registry+https://github.com/rust-lang/crates.io-index)",
"quote 1.0.6 (registry+https://github.com/rust-lang/crates.io-index)",
"syn 1.0.30 (registry+https://github.com/rust-lang/crates.io-index)",
]
[[package]]
@ -456,6 +481,7 @@ source = "registry+https://github.com/rust-lang/crates.io-index"
"checksum adler32 1.0.4 (registry+https://github.com/rust-lang/crates.io-index)" = "5d2e7343e7fc9de883d1b0341e0b13970f764c14101234857d2ddafa1cb1cac2"
"checksum aho-corasick 0.7.10 (registry+https://github.com/rust-lang/crates.io-index)" = "8716408b8bc624ed7f65d223ddb9ac2d044c0547b6fa4b0d554f3a9540496ada"
"checksum ansi_term 0.11.0 (registry+https://github.com/rust-lang/crates.io-index)" = "ee49baf6cb617b853aa8d93bf420db2383fab46d314482ca2803b40d5fde979b"
"checksum anyhow 1.0.31 (registry+https://github.com/rust-lang/crates.io-index)" = "85bb70cc08ec97ca5450e6eba421deeea5f172c0fc61f78b5357b2a8e8be195f"
"checksum arrayvec 0.4.12 (registry+https://github.com/rust-lang/crates.io-index)" = "cd9fd44efafa8690358b7408d253adf110036b88f55672a933f01d616ad9b1b9"
"checksum atty 0.2.14 (registry+https://github.com/rust-lang/crates.io-index)" = "d9b39be18770d11421cdb1b9947a45dd3f37e93092cbf377614828a319d5fee8"
"checksum autocfg 1.0.0 (registry+https://github.com/rust-lang/crates.io-index)" = "f8aac770f1885fd7e387acedd76065302551364496e46b3dd00860b2f8359b9d"
@ -501,6 +527,8 @@ source = "registry+https://github.com/rust-lang/crates.io-index"
"checksum strsim 0.8.0 (registry+https://github.com/rust-lang/crates.io-index)" = "8ea5119cdb4c55b55d432abb513a0429384878c15dde60cc77b1c99de1a95a6a"
"checksum syn 1.0.30 (registry+https://github.com/rust-lang/crates.io-index)" = "93a56fabc59dce20fe48b6c832cc249c713e7ed88fa28b0ee0a3bfcaae5fe4e2"
"checksum textwrap 0.11.0 (registry+https://github.com/rust-lang/crates.io-index)" = "d326610f408c7a4eb6f51c37c330e496b08506c9457c9d34287ecc38809fb060"
"checksum thiserror 1.0.20 (registry+https://github.com/rust-lang/crates.io-index)" = "7dfdd070ccd8ccb78f4ad66bf1982dc37f620ef696c6b5028fe2ed83dd3d0d08"
"checksum thiserror-impl 1.0.20 (registry+https://github.com/rust-lang/crates.io-index)" = "bd80fc12f73063ac132ac92aceea36734f04a1d93c1240c6944e23a3b8841793"
"checksum thread_local 1.0.1 (registry+https://github.com/rust-lang/crates.io-index)" = "d40c6d1b69745a6ec6fb1ca717914848da4b44ae29d9b3080cbee91d72a69b14"
"checksum unicode-width 0.1.7 (registry+https://github.com/rust-lang/crates.io-index)" = "caaa9d531767d1ff2150b9332433f32a24622147e5ebb1f26409d5da67afd479"
"checksum unicode-xid 0.2.0 (registry+https://github.com/rust-lang/crates.io-index)" = "826e7639553986605ec5979c7dd957c7895e93eabed50ab2ffa7f6128a75097c"

View File

@ -6,6 +6,7 @@ edition = "2018"
license = "GPL3"
[dependencies]
anyhow = "1.0"
byteorder = "1.3"
clap = "2.33"
crc32c = "0.4"
@ -14,9 +15,10 @@ libc = "0.2.71"
nix = "0.17"
nom = "5.1"
num_cpus = "1.13"
rand = "0.7"
num-traits = "0.2"
num-derive = "0.3"
num-traits = "0.2"
rand = "0.7"
thiserror = "1.0"
[dev-dependencies]
quickcheck = "0.9"

View File

@ -40,6 +40,7 @@ SOURCE=\
base/error_state.cc \
base/error_string.cc \
base/grid_layout.cc \
base/io_generator.cc \
base/file_utils.cc \
base/progress_monitor.cc \
base/rolling_hash.cc \
@ -128,6 +129,7 @@ DEVTOOLS_SOURCE=\
thin-provisioning/thin_ll_restore.cc \
thin-provisioning/thin_show_duplicates.cc \
thin-provisioning/thin_generate_metadata.cc \
thin-provisioning/thin_generate_mappings.cc \
thin-provisioning/variable_chunk_stream.cc \
thin-provisioning/thin_show_metadata.cc \
thin-provisioning/thin_scan.cc \
@ -167,7 +169,7 @@ CXXFLAGS+=@CXXDEBUG_FLAG@
CXXFLAGS+=@CXX_STRERROR_FLAG@
CXXFLAGS+=@LFS_FLAGS@
INCLUDES+=-I$(TOP_BUILDDIR) -I$(TOP_DIR) -I$(TOP_DIR)/thin-provisioning
LIBS:=-laio -lexpat -lz -lboost_iostreams -ldl
LIBS:=-laio -lexpat -lboost_iostreams -ldl
ifeq ("@DEVTOOLS@", "yes")
LIBS+=-lncurses
@ -318,7 +320,6 @@ install: bin/pdata_tools $(MANPAGES)
$(INSTALL_DATA) man8/thin_repair.8 $(MANPATH)/man8
$(INSTALL_DATA) man8/thin_restore.8 $(MANPATH)/man8
$(INSTALL_DATA) man8/thin_rmap.8 $(MANPATH)/man8
$(INSTALL_DATA) man8/thin_metadata_size.8 $(MANPATH)/man8
$(INSTALL_DATA) man8/era_check.8 $(MANPATH)/man8
$(INSTALL_DATA) man8/era_dump.8 $(MANPATH)/man8
@ -333,10 +334,16 @@ ifeq ("@DEVTOOLS@", "yes")
ln -s -f pdata_tools $(BINDIR)/thin_scan
endif
.PHONY: install install-rust-tools
.PHONY: install install-rust-tools rust-tools
install-rust-tools:
cargo install --path . --root $(BINDIR)
rust-tools:
cargo build --release
install-rust-tools: man8/thin_metadata_pack.8 man8/thin_metadata_unpack.8 rust-tools
$(INSTALL_PROGRAM) target/release/thin_metadata_pack $(BINDIR)
$(INSTALL_PROGRAM) target/release/thin_metadata_unpack $(BINDIR)
$(STRIP) $(BINDIR)/thin_metadata_pack
$(STRIP) $(BINDIR)/thin_metadata_unpack
$(INSTALL_DATA) man8/thin_metadata_pack.8 $(MANPATH)/man8
$(INSTALL_DATA) man8/thin_metadata_unpack.8 $(MANPATH)/man8

View File

@ -1 +1 @@
0.8.5
0.9.0-rc1

25
base/io.h Normal file
View File

@ -0,0 +1,25 @@
#ifndef BASE_IO_H
#define BASE_IO_H
#include "base/types.h"
#include <stdint.h>
//----------------------------------------------------------------
namespace base {
enum req_op {
REQ_OP_READ,
REQ_OP_WRITE,
REQ_OP_DISCARD
};
struct io {
unsigned op_;
sector_t sector_;
sector_t size_;
};
}
//----------------------------------------------------------------
#endif

240
base/io_generator.cc Normal file
View File

@ -0,0 +1,240 @@
#include "base/io_generator.h"
#include <stdexcept>
#include <cstdlib>
#include <cstring>
using namespace base;
//----------------------------------------------------------------
namespace {
std::pair<char const*, io_pattern::pattern> patterns[] = {
{"read", io_pattern::READ},
{"write", io_pattern::WRITE},
{"trim", io_pattern::TRIM},
{"readwrite", io_pattern::READ_WRITE},
{"trimwrite", io_pattern::TRIM_WRITE},
{"randread", io_pattern::RAND_READ},
{"randwrite", io_pattern::RAND_WRITE},
{"randtrim", io_pattern::RAND_TRIM},
{"randrw", io_pattern::RAND_RW},
{"randtw", io_pattern::RAND_TW}
};
unsigned const nr_patterns = sizeof(patterns) / sizeof(patterns[0]);
//--------------------------------
class offset_generator {
public:
typedef std::shared_ptr<offset_generator> ptr;
virtual base::sector_t next_offset() = 0;
};
class sequential_offset_generator: public offset_generator {
public:
sequential_offset_generator(base::sector_t offset,
base::sector_t size,
base::sector_t block_size)
: block_size_(block_size),
begin_(offset),
end_(offset + size),
current_(offset) {
if (size < block_size)
throw std::runtime_error("size must be greater than block_size");
}
base::sector_t next_offset() {
sector_t r = current_;
current_ += block_size_;
if (current_ > end_)
current_ = begin_;
return r;
}
private:
unsigned block_size_;
base::sector_t begin_;
base::sector_t end_;
base::sector_t current_;
};
class random_offset_generator: public offset_generator {
public:
random_offset_generator(sector_t offset,
sector_t size,
sector_t block_size)
: block_begin_(offset / block_size),
nr_blocks_(size / block_size),
block_size_(block_size) {
}
sector_t next_offset() {
return ((std::rand() % nr_blocks_) + block_begin_) * block_size_;
}
private:
uint64_t block_begin_;
uint64_t nr_blocks_;
unsigned block_size_;
};
//--------------------------------
class op_generator {
public:
typedef std::shared_ptr<op_generator> ptr;
op_generator(base::req_op op1)
: op1_(op1), op2_(op1), op1_pct_(100) {
}
op_generator(base::req_op op1,
base::req_op op2,
unsigned op1_pct)
: op1_(op1), op2_(op2), op1_pct_(op1_pct) {
if (op1_pct > 100)
throw std::runtime_error("invalid percentage");
}
base::req_op next_op() {
if (static_cast<unsigned>(std::rand()) % 100 > op1_pct_)
return op2_;
return op1_;
}
private:
base::req_op op1_;
base::req_op op2_;
unsigned op1_pct_;
};
//--------------------------------
class base_io_generator: public io_generator {
public:
base_io_generator(io_generator_options const &opts);
virtual bool has_next();
virtual void next(base::io &next_io);
private:
offset_generator::ptr
create_offset_generator(io_generator_options const &opts);
op_generator::ptr
create_op_generator(io_generator_options const &opts);
offset_generator::ptr offset_gen_;
op_generator::ptr op_gen_;
sector_t block_size_;
size_t io_size_finished_;
size_t io_size_total_;
};
base_io_generator::base_io_generator(io_generator_options const &opts)
: offset_gen_(create_offset_generator(opts)),
op_gen_(create_op_generator(opts)),
block_size_(opts.block_size_),
io_size_finished_(0),
io_size_total_(opts.io_size_) {
}
bool base_io_generator::has_next() {
return io_size_finished_ < io_size_total_;
}
void base_io_generator::next(base::io &next_io) {
if (io_size_finished_ >= io_size_total_)
throw std::runtime_error("");
next_io.op_ = op_gen_->next_op();
next_io.sector_ = offset_gen_->next_offset();
next_io.size_ = block_size_;
io_size_finished_ += block_size_;
}
offset_generator::ptr
base_io_generator::create_offset_generator(io_generator_options const &opts) {
if (opts.pattern_.is_random())
return offset_generator::ptr(
new random_offset_generator(opts.offset_,
opts.size_,
opts.block_size_));
return offset_generator::ptr(
new sequential_offset_generator(opts.offset_,
opts.size_,
opts.block_size_));
}
op_generator::ptr
base_io_generator::create_op_generator(io_generator_options const &opts) {
// FIXME: elimiate the switch-case and hide enum values
switch (opts.pattern_.val_) {
case io_pattern::READ:
case io_pattern::RAND_READ:
return op_generator::ptr(new op_generator(base::REQ_OP_READ));
case io_pattern::WRITE:
case io_pattern::RAND_WRITE:
return op_generator::ptr(new op_generator(base::REQ_OP_WRITE));
case io_pattern::TRIM:
case io_pattern::RAND_TRIM:
return op_generator::ptr(new op_generator(base::REQ_OP_DISCARD));
case io_pattern::READ_WRITE:
case io_pattern::RAND_RW:
return op_generator::ptr(new op_generator(base::REQ_OP_READ,
base::REQ_OP_WRITE,
50));
case io_pattern::TRIM_WRITE:
case io_pattern::RAND_TW:
return op_generator::ptr(new op_generator(base::REQ_OP_DISCARD,
base::REQ_OP_WRITE,
50));
default:
throw std::runtime_error("unknown pattern");
}
}
}
//----------------------------------------------------------------
io_pattern::io_pattern()
: val_(pattern::READ) {
}
io_pattern::io_pattern(char const *pattern) {
parse(pattern);
}
void
io_pattern::parse(char const *pattern) {
bool found = false;
unsigned i = 0;
for (i = 0; i < nr_patterns; i++) {
if (!strcmp(patterns[i].first, pattern)) {
found = true;
break;
}
}
if (!found)
throw std::runtime_error("unknow pattern");
val_ = patterns[i].second;
}
bool
io_pattern::is_random() const {
return val_ & pattern::RANDOM;
}
//----------------------------------------------------------------
io_generator::ptr
base::create_io_generator(io_generator_options const &opts) {
return io_generator::ptr(new base_io_generator(opts));
}
//----------------------------------------------------------------

55
base/io_generator.h Normal file
View File

@ -0,0 +1,55 @@
#ifndef BASE_IO_GENERATOR_H
#define BASE_IO_GENERATOR_H
#include "base/io.h"
#include <memory>
//----------------------------------------------------------------
namespace base {
struct io_pattern {
enum pattern {
READ = 1 << 1,
WRITE = 1 << 2,
TRIM = 1 << 3,
RANDOM = 1 << 8,
READ_WRITE = READ | WRITE,
TRIM_WRITE = WRITE | TRIM,
RAND_READ = READ | RANDOM,
RAND_WRITE = WRITE | RANDOM,
RAND_TRIM = TRIM | RANDOM,
RAND_RW = READ_WRITE | RANDOM,
RAND_TW = TRIM_WRITE | RANDOM,
};
io_pattern();
io_pattern(char const *pattern);
void parse(char const *pattern);
bool is_random() const;
pattern val_;
};
struct io_generator_options {
io_pattern pattern_;
sector_t offset_;
sector_t block_size_;
sector_t size_;
sector_t io_size_;
};
class io_generator {
public:
typedef std::shared_ptr<io_generator> ptr;
virtual bool has_next() = 0;
virtual void next(base::io &next_io) = 0;
};
io_generator::ptr
create_io_generator(io_generator_options const &opts);
}
//----------------------------------------------------------------
#endif

View File

@ -16,8 +16,8 @@
// with thin-provisioning-tools. If not, see
// <http://www.gnu.org/licenses/>.
#ifndef THINP_MATH_H
#define THINP_MATH_H
#ifndef BASE_MATH_H
#define BASE_MATH_H
//----------------------------------------------------------------
@ -34,6 +34,11 @@ namespace base {
T div_down(T const &v, T const &divisor) {
return v / divisor;
}
template <typename T>
bool is_power_of_two(T const v) {
return !(v & (v - 1));
}
}
//----------------------------------------------------------------

15
base/types.h Normal file
View File

@ -0,0 +1,15 @@
#ifndef BASE_TYPES_H
#define BASE_TYPES_H
#include <stdint.h>
//----------------------------------------------------------------
namespace base {
using sector_t = uint64_t;
unsigned const SECTOR_SHIFT = 9;
}
//----------------------------------------------------------------
#endif

View File

@ -24,7 +24,6 @@ namespace bi = boost::intrusive;
namespace bcache {
typedef uint64_t block_address;
typedef uint64_t sector_t;
class validator {
public:

View File

@ -1,6 +1,7 @@
#ifndef BLOCK_CACHE_IO_ENGINE_H
#define BLOCK_CACHE_IO_ENGINE_H
#include "base/types.h"
#include "base/unique_handle.h"
#include <boost/optional.hpp>
@ -18,9 +19,8 @@
//----------------------------------------------------------------
namespace bcache {
using sector_t = uint64_t;
unsigned const SECTOR_SHIFT = 9;
using base::sector_t;
using base::SECTOR_SHIFT;
// Virtual base class to aid unit testing
class io_engine {

View File

@ -34,8 +34,6 @@ namespace cache {
block_address const SUPERBLOCK_LOCATION = 0;
typedef uint64_t sector_t;
//------------------------------------------------
class space_map_ref_counter {

View File

@ -29,6 +29,7 @@ Options:
{-q|--quiet}
{-h|--help}
{-V|--version}
{-m|--metadata-snap}
{--override-mapping-root}
{--clear-needs-check-flag}
{--ignore-non-fatal-errors}
@ -75,7 +76,7 @@ Options:
{-V|--version}")
(define thin-metadata-pack-help
"thin_metadata_pack 0.8.5
"thin_metadata_pack 0.9.0-rc1
Produces a compressed file of thin metadata. Only packs metadata blocks that are actually used.
USAGE:
@ -90,7 +91,7 @@ OPTIONS:
-o <FILE> Specify packed output file")
(define thin-metadata-unpack-help
"thin_metadata_unpack 0.8.5
"thin_metadata_unpack 0.9.0-rc1
Unpack a compressed file of thin metadata.
USAGE:

View File

@ -511,7 +511,7 @@
(define-scenario (thin-metadata-pack version)
"accepts --version"
(run-ok-rcv (stdout _) (thin-metadata-pack "--version")
(assert-equal "thin_metadata_pack 0.8.5" stdout)))
(assert-equal "thin_metadata_pack 0.9.0-rc1" stdout)))
(define-scenario (thin-metadata-pack h)
"accepts -h"
@ -553,7 +553,7 @@
(define-scenario (thin-metadata-unpack version)
"accepts --version"
(run-ok-rcv (stdout _) (thin-metadata-unpack "--version")
(assert-equal "thin_metadata_unpack 0.8.5" stdout)))
(assert-equal "thin_metadata_unpack 0.9.0-rc1" stdout)))
(define-scenario (thin-metadata-unpack h)
"accepts -h"

View File

@ -8,7 +8,8 @@ DESCRIPTION
thin_check checks thin provisioning metadata created by the device-mapper
thin provisioning target on a device or file.
The tool cannot be run on live metadata.
The tool cannot be run on live metadata unless the --metadata-snapshot
option is used.
OPTIONS
-q, --quiet Suppress output messages, return only exit code.
@ -33,6 +34,12 @@ OPTIONS
is needed to fix any issues. After thin_repair succeeded, you may run
thin_check again.
--metadata-snapshot, -m Check the metadata snapshot.
This will check the devices tree and mappings in a metadata snapshot.
The snap does not contain space maps, so these will not be checked. This
may be used on live metadata.
--override-mapping-root <block> Specify a mapping root to use.
Don't use this. This overrides what's specified in the superblock. Only

View File

@ -19,7 +19,7 @@
#ifndef ARRAY_H
#define ARRAY_H
#include "persistent-data/math_utils.h"
#include "base/math_utils.h"
#include "persistent-data/data-structures/btree.h"
#include "persistent-data/data-structures/btree_counter.h"
#include "persistent-data/data-structures/btree_damage_visitor.h"

View File

@ -1,6 +1,6 @@
#include "persistent-data/data-structures/array.h"
#include "persistent-data/data-structures/bitset.h"
#include "persistent-data/math_utils.h"
#include "base/math_utils.h"
using namespace persistent_data;
using namespace persistent_data::bitset_detail;

View File

@ -0,0 +1,373 @@
// This file is part of the thin-provisioning-tools source.
//
// thin-provisioning-tools is free software: you can redistribute it
// and/or modify it under the terms of the GNU General Public License
// as published by the Free Software Foundation, either version 3 of
// the License, or (at your option) any later version.
//
// thin-provisioning-tools is distributed in the hope that it will be
// useful, but WITHOUT ANY WARRANTY; without even the implied warranty
// of MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
// GNU General Public License for more details.
//
// You should have received a copy of the GNU General Public License along
// with thin-provisioning-tools. If not, see
// <http://www.gnu.org/licenses/>.
namespace persistent_data {
template <unsigned Levels, typename ValueTraits>
btree_detail::shadow_child
btree<Levels, ValueTraits>::
create_shadow_child(internal_node &parent,
unsigned index)
{
block_address b = parent.value_at(index);
pair<write_ref, bool> p = tm_.shadow(b, validator_);
write_ref &wr = p.first;
btree_detail::node_type type;
node_ref<block_traits> n = to_node<block_traits>(wr);
if (n.get_type() == btree_detail::INTERNAL) {
type = btree_detail::INTERNAL;
if (p.second)
n.inc_children(internal_rc_);
} else {
type = btree_detail::LEAF;
if (p.second) {
node_ref<ValueTraits> leaf = to_node<ValueTraits>(wr);
leaf.inc_children(rc_);
}
}
parent.set_value(index, wr.get_location());
return btree_detail::shadow_child(wr, type);
}
template <unsigned Levels, typename ValueTraits>
void
btree<Levels, ValueTraits>::
remove(key const &key)
{
using namespace btree_detail;
block_address block = root_;
unsigned index = 0;
shadow_spine spine(tm_, validator_);
bool need_remove = true;
for (unsigned level = 0; level < Levels - 1; ++level) {
need_remove = remove_location<block_traits>(spine, block,
key[level], &index,
internal_rc_);
if (!need_remove)
break;
internal_node n = spine.get_node<block_traits>();
block = n.value_at(index);
}
if (need_remove) {
need_remove = remove_location<ValueTraits>(spine, block,
key[Levels - 1], &index,
rc_);
if (need_remove) {
leaf_node leaf = spine.get_node<ValueTraits>();
leaf.delete_at(index);
}
}
root_ = spine.get_root();
}
template <unsigned Levels, typename _>
template <typename ValueTraits, typename RC>
bool
btree<Levels, _>::
remove_location(btree_detail::shadow_spine &spine,
block_address block,
uint64_t key,
unsigned *index,
RC &leaf_rc)
{
using namespace btree_detail;
unsigned i = 0;
bool r = false;
for (;;) {
r = spine.step(block);
// patch up the parent to point to the new shadow
if (spine.has_parent()) {
internal_node p = spine.get_parent();
p.set_value(i, spine.get_block());
}
internal_node n = spine.get_node<block_traits>();
if (n.get_type() == btree_detail::LEAF) {
node_ref<ValueTraits> leaf = spine.get_node<ValueTraits>();
boost::optional<unsigned> idx = leaf.exact_search(key);
if (!idx)
return false;
*index = *idx;
return true;
}
r = rebalance_children<ValueTraits>(spine, key);
if (!r)
break;
n = spine.get_node<block_traits>();
if (n.get_type() == btree_detail::LEAF) {
node_ref<ValueTraits> leaf = spine.get_node<ValueTraits>();
boost::optional<unsigned> idx = leaf.exact_search(key);
if (!idx)
return false;
*index = *idx;
return true;
}
i = n.lower_bound(key);
block = n.value_at(i);
}
return r;
}
template <unsigned Levels, typename _>
template <typename ValueTraits>
bool
btree<Levels, _>::
rebalance_children(btree_detail::shadow_spine &spine, uint64_t key)
{
internal_node n = spine.get_node<block_traits>();
if (n.get_nr_entries() == 1) {
block_address b = n.value_at(0);
read_ref child = tm_.read_lock(b, validator_);
// FIXME: is it safe?
::memcpy(n.raw(), child.data(), read_ref::BLOCK_SIZE);
tm_.get_sm()->dec(child.get_location());
return true;
}
int i = n.lower_bound(key);
if (i < 0)
return false;
bool has_left_sibling = i > 0;
bool has_right_sibling = static_cast<unsigned>(i) < (n.get_nr_entries() - 1);
if (!has_left_sibling)
rebalance2<ValueTraits>(spine, i);
else if (!has_right_sibling)
rebalance2<ValueTraits>(spine, i - 1);
else
rebalance3<ValueTraits>(spine, i - 1);
return true;
}
template <unsigned Levels, typename _>
template <typename ValueTraits>
void
btree<Levels, _>::
rebalance2(btree_detail::shadow_spine &spine, unsigned left_index)
{
internal_node parent = spine.get_node<block_traits>();
shadow_child left = create_shadow_child(parent, left_index);
shadow_child right = create_shadow_child(parent, left_index + 1);
// FIXME: ugly
if (left.get_type() == btree_detail::INTERNAL) {
internal_node l = left.get_node<block_traits>();
internal_node r = right.get_node<block_traits>();
__rebalance2(parent, l, r, left_index);
} else {
node_ref<ValueTraits> l = left.get_node<ValueTraits>();
node_ref<ValueTraits> r = right.get_node<ValueTraits>();
__rebalance2(parent, l, r, left_index);
}
}
template <unsigned Levels, typename _>
template <typename ValueTraits>
void
btree<Levels, _>::
__rebalance2(internal_node &parent,
node_ref<ValueTraits> &left,
node_ref<ValueTraits> &right,
unsigned left_index)
{
unsigned nr_left = left.get_nr_entries();
unsigned nr_right = right.get_nr_entries();
unsigned right_index = left_index + 1;
unsigned threshold = 2 * (left.merge_threshold() + 1);
if (nr_left + nr_right < threshold) {
// Merge the right child into the left
left.copy_entries_to_left(right, nr_right);
left.set_nr_entries(nr_left + nr_right);
parent.delete_at(right_index);
tm_.get_sm()->dec(right.get_location());
} else {
// Rebalance
unsigned target_left = (nr_left + nr_right) / 2;
left.move_entries(right, nr_left - target_left);
parent.set_key(right_index, right.key_at(0));
}
}
template <unsigned Levels, typename _>
template <typename ValueTraits>
void
btree<Levels, _>::
rebalance3(btree_detail::shadow_spine &spine, unsigned left_index)
{
internal_node parent = spine.get_node<block_traits>();
shadow_child left = create_shadow_child(parent, left_index);
shadow_child center = create_shadow_child(parent, left_index + 1);
shadow_child right = create_shadow_child(parent, left_index + 2);
// FIXME: ugly
if (left.get_type() == btree_detail::INTERNAL) {
internal_node l = left.get_node<block_traits>();
internal_node c = center.get_node<block_traits>();
internal_node r = right.get_node<block_traits>();
__rebalance3(parent, l, c, r, left_index);
} else {
node_ref<ValueTraits> l = left.get_node<ValueTraits>();
node_ref<ValueTraits> c = center.get_node<ValueTraits>();
node_ref<ValueTraits> r = right.get_node<ValueTraits>();
__rebalance3(parent, l, c, r, left_index);
}
}
template <unsigned Levels, typename _>
template <typename ValueTraits>
void
btree<Levels, _>::
__rebalance3(internal_node &parent,
node_ref<ValueTraits> &left,
node_ref<ValueTraits> &center,
node_ref<ValueTraits> &right,
unsigned left_index)
{
unsigned nr_left = left.get_nr_entries();
unsigned nr_center = center.get_nr_entries();
unsigned nr_right = right.get_nr_entries();
unsigned threshold = left.merge_threshold() * 4 + 1;
if ((nr_left + nr_center + nr_right) < threshold)
delete_center_node(parent, left, center, right, left_index);
else
redistribute3(parent, left, center, right, left_index);
}
template <unsigned Levels, typename _>
template <typename ValueTraits>
void
btree<Levels, _>::
delete_center_node(internal_node &parent,
node_ref<ValueTraits> &left,
node_ref<ValueTraits> &center,
node_ref<ValueTraits> &right,
unsigned left_index)
{
unsigned center_index = left_index + 1;
unsigned right_index = left_index + 2;
unsigned max_entries = left.get_max_entries();
unsigned nr_left = left.get_nr_entries();
unsigned nr_center = center.get_nr_entries();
unsigned nr_right = right.get_nr_entries();
unsigned shift = std::min(max_entries - nr_left, nr_center);
if (nr_left + shift > max_entries)
throw std::runtime_error("too many entries");
left.copy_entries_to_left(center, shift);
left.set_nr_entries(nr_left + shift);
if (shift != nr_center) {
shift = nr_center - shift;
if ((nr_right + shift) > max_entries)
throw std::runtime_error("too many entries");
right.shift_entries_right(shift);
center.copy_entries_to_right(right, shift);
right.set_nr_entries(nr_right + shift);
}
parent.set_key(right_index, right.key_at(0));
parent.delete_at(center_index);
--right_index;
tm_.get_sm()->dec(center.get_location());
__rebalance2(parent, left, right, left_index);
}
template <unsigned Levels, typename _>
template <typename ValueTraits>
void
btree<Levels, _>::
redistribute3(internal_node &parent,
node_ref<ValueTraits> &left,
node_ref<ValueTraits> &center,
node_ref<ValueTraits> &right,
unsigned left_index)
{
unsigned center_index = left_index + 1;
unsigned right_index = left_index + 2;
unsigned nr_left = left.get_nr_entries();
unsigned nr_center = center.get_nr_entries();
unsigned nr_right = right.get_nr_entries();
unsigned max_entries = left.get_max_entries();
unsigned total = nr_left + nr_center + nr_right;
unsigned target_right = total / 3;
unsigned remainder = (target_right * 3) != total;
unsigned target_left = target_right + remainder;
if (target_left > max_entries || target_right > max_entries)
throw std::runtime_error("too many entries");
if (nr_left < nr_right) {
int s = nr_left - target_left;
// FIXME: signed & unsigned comparison
if (s < 0 && nr_center < static_cast<unsigned>(-s)) {
// not enough in central node
left.move_entries(center, -nr_center);
s += nr_center;
left.move_entries(right, s);
nr_right += s;
} else
left.move_entries(center, s);
center.move_entries(right, target_right - nr_right);
} else {
int s = target_right - nr_right;
if (s > 0 && nr_center < static_cast<unsigned>(s)) {
// not enough in central node
center.move_entries(right, nr_center);
s -= nr_center;
left.move_entries(right, s);
nr_left -= s;
} else
center.move_entries(right, s);
left.move_entries(center, nr_left - target_left);
}
parent.set_key(center_index, center.key_at(0));
parent.set_key(right_index, right.key_at(0));
}
};

View File

@ -110,12 +110,34 @@ namespace persistent_data {
uint64_t key,
typename ValueTraits::value_type const &v);
// Decrements the nr_entries field
void delete_at(unsigned i);
// Copies entries from another node, appends them
// to the back of this node. Adjusts nr_entries.
void copy_entries(node_ref const &rhs,
unsigned begin,
unsigned end);
// Moves entries between the sibling node,
// and maintains the key ordering.
// The nr_entreis of both nodes are adjusted.
void move_entries(node_ref &rhs,
int count);
// Copies entries from the beginning of rhs to the end of lhs,
// or copies entries from the end of lhs to the beginning of rhs.
// The nr_entries is not adjusted.
void copy_entries_to_left(node_ref const &rhs, unsigned count);
void copy_entries_to_right(node_ref &rhs, unsigned count) const;
// Shifts entries to left or right.
// The nr_entries is not adjusted.
void shift_entries_left(unsigned shift);
void shift_entries_right(unsigned shift);
unsigned merge_threshold() const;
// Various searches
int bsearch(uint64_t key, int want_hi) const;
boost::optional<unsigned> exact_search(uint64_t key) const;
@ -124,6 +146,9 @@ namespace persistent_data {
template <typename RefCounter>
void inc_children(RefCounter &rc);
template <typename RefCounter>
void dec_children(RefCounter &rc);
disk_node *raw() {
return raw_;
}
@ -256,6 +281,26 @@ namespace persistent_data {
maybe_block root_;
};
class shadow_child {
public:
shadow_child(block_manager::write_ref &wr, node_type type)
: wr_(wr), type_(type) {
}
node_type get_type() const {
return type_;
}
template <typename ValueTraits>
node_ref<ValueTraits> get_node() {
return to_node<ValueTraits>(wr_);
}
private:
block_manager::write_ref wr_;
node_type type_;
};
// Used to keep a record of a nested btree's position.
typedef std::vector<uint64_t> btree_path;
@ -396,6 +441,14 @@ namespace persistent_data {
int *index,
RC &leaf_rc);
template <typename ValueTraits2, typename RC>
bool
remove_location(btree_detail::shadow_spine &spine,
block_address block,
uint64_t key,
unsigned *index,
RC &leaf_rc);
void walk_tree(visitor &visitor,
btree_detail::node_location const &loc,
block_address b) const;
@ -408,6 +461,53 @@ namespace persistent_data {
void inc_children(btree_detail::shadow_spine &spine,
RefCounter &leaf_rc);
btree_detail::shadow_child
create_shadow_child(internal_node &parent,
unsigned index);
template <typename ValueTraits2>
bool rebalance_children(btree_detail::shadow_spine &spine,
uint64_t key);
template <typename ValueTraits2>
void rebalance2(btree_detail::shadow_spine &spine,
unsigned left_index);
template <typename ValueTraits2>
void rebalance3(btree_detail::shadow_spine &spine,
unsigned left_index);
template <typename ValueTraits2>
void
__rebalance2(internal_node &parent,
btree_detail::node_ref<ValueTraits2> &left,
btree_detail::node_ref<ValueTraits2> &right,
unsigned left_index);
template <typename ValueTraits2>
void
__rebalance3(internal_node &parent,
btree_detail::node_ref<ValueTraits2> &left,
btree_detail::node_ref<ValueTraits2> &center,
btree_detail::node_ref<ValueTraits2> &right,
unsigned left_index);
template <typename ValueTraits2>
void
delete_center_node(internal_node &parent,
btree_detail::node_ref<ValueTraits2> &left,
btree_detail::node_ref<ValueTraits2> &center,
btree_detail::node_ref<ValueTraits2> &right,
unsigned left_index);
template <typename ValueTraits2>
void
redistribute3(internal_node &parent,
btree_detail::node_ref<ValueTraits2> &left,
btree_detail::node_ref<ValueTraits2> &center,
btree_detail::node_ref<ValueTraits2> &right,
unsigned left_index);
transaction_manager &tm_;
bool destroy_;
block_address root_;
@ -418,6 +518,7 @@ namespace persistent_data {
};
#include "btree.tcc"
#include "btree-remove.tcc"
//----------------------------------------------------------------

View File

@ -25,6 +25,7 @@
#include <iostream>
#include <sstream>
#include <stack>
//----------------------------------------------------------------
@ -33,6 +34,56 @@ namespace {
using namespace persistent_data;
using namespace btree_detail;
using namespace std;
struct frame {
frame(block_address blocknr,
uint32_t level,
uint32_t nr_entries)
: blocknr_(blocknr),
level_(level),
nr_entries_(nr_entries),
current_child_(0) {
}
block_address blocknr_;
uint32_t level_;
uint32_t nr_entries_;
uint32_t current_child_;
};
// stack for postorder DFS traversal
// TODO: Refactor it into a spine-like class, e.g., btree_del_spine,
// "Spine" sounds better for btree operations.
struct btree_del_stack {
public:
btree_del_stack(transaction_manager &tm): tm_(tm) {
}
void push_frame(block_address blocknr,
uint32_t level,
uint32_t nr_entries) {
if (tm_.get_sm()->get_count(blocknr) > 1)
tm_.get_sm()->dec(blocknr);
else
spine_.push(frame(blocknr, level, nr_entries));
}
void pop_frame() {
tm_.get_sm()->dec(spine_.top().blocknr_);
spine_.pop();
}
frame &top_frame() {
return spine_.top();
}
bool is_empty() {
return spine_.empty();
}
private:
transaction_manager &tm_;
std::stack<frame> spine_;
};
}
//----------------------------------------------------------------
@ -242,6 +293,23 @@ namespace persistent_data {
set_value(i, v);
}
template <typename ValueTraits>
void
node_ref<ValueTraits>::delete_at(unsigned i)
{
unsigned nr_entries = get_nr_entries();
if (i >= nr_entries)
throw runtime_error("key index out of bounds");
unsigned nr_to_copy = nr_entries - (i + 1);
if (nr_to_copy) {
::memmove(key_ptr(i), key_ptr(i + 1), sizeof(uint64_t) * nr_to_copy);
::memmove(value_ptr(i), value_ptr(i + 1), sizeof(typename ValueTraits::disk_type) * nr_to_copy);
}
set_nr_entries(nr_entries - 1);
}
template <typename ValueTraits>
void
node_ref<ValueTraits>::copy_entries(node_ref const &rhs,
@ -258,6 +326,90 @@ namespace persistent_data {
set_nr_entries(n + count);
}
template <typename ValueTraits>
void
node_ref<ValueTraits>::move_entries(node_ref<ValueTraits> &rhs,
int count)
{
if (!count)
return;
unsigned nr_left = get_nr_entries();
unsigned nr_right = rhs.get_nr_entries();
unsigned max_entries = get_max_entries();
if (nr_left - count > max_entries || nr_right - count > max_entries)
throw runtime_error("too many entries");
if (count > 0) {
rhs.shift_entries_right(count);
copy_entries_to_right(rhs, count);
} else {
copy_entries_to_left(rhs, -count);
rhs.shift_entries_left(-count);
}
set_nr_entries(nr_left - count);
rhs.set_nr_entries(nr_right + count);
}
template <typename ValueTraits>
void
node_ref<ValueTraits>::copy_entries_to_left(node_ref const &rhs, unsigned count)
{
unsigned n = get_nr_entries();
if ((n + count) > get_max_entries())
throw runtime_error("too many entries");
::memcpy(key_ptr(n), rhs.key_ptr(0), sizeof(uint64_t) * count);
::memcpy(value_ptr(n), rhs.value_ptr(0), sizeof(typename ValueTraits::disk_type) * count);
}
template <typename ValueTraits>
void
node_ref<ValueTraits>::copy_entries_to_right(node_ref &rhs, unsigned count) const
{
unsigned n = rhs.get_nr_entries();
if ((n + count) > get_max_entries())
throw runtime_error("too many entries");
unsigned nr_left = get_nr_entries();
::memcpy(rhs.key_ptr(0), key_ptr(nr_left - count), sizeof(uint64_t) * count);
::memcpy(rhs.value_ptr(0), value_ptr(nr_left - count), sizeof(typename ValueTraits::disk_type) * count);
}
template <typename ValueTraits>
void
node_ref<ValueTraits>::shift_entries_left(unsigned shift)
{
unsigned n = get_nr_entries();
if (shift > n)
throw runtime_error("too many entries");
unsigned nr_shifted = n - shift;
::memmove(key_ptr(0), key_ptr(shift), sizeof(uint64_t) * nr_shifted);
::memmove(value_ptr(0), value_ptr(shift), sizeof(typename ValueTraits::disk_type) * nr_shifted);
}
template <typename ValueTraits>
void
node_ref<ValueTraits>::shift_entries_right(unsigned shift)
{
unsigned n = get_nr_entries();
if (n + shift > get_max_entries())
throw runtime_error("too many entries");
::memmove(key_ptr(shift), key_ptr(0), sizeof(uint64_t) * n);
::memmove(value_ptr(shift), value_ptr(0), sizeof(typename ValueTraits::disk_type) * n);
}
template <typename ValueTraits>
unsigned
node_ref<ValueTraits>::merge_threshold() const
{
return get_max_entries() / 3;
}
template <typename ValueTraits>
int
node_ref<ValueTraits>::bsearch(uint64_t key, int want_hi) const
@ -348,6 +500,21 @@ namespace persistent_data {
}
}
template <typename ValueTraits>
template <typename RefCounter>
void
node_ref<ValueTraits>::dec_children(RefCounter &rc)
{
unsigned nr_entries = get_nr_entries();
for (unsigned i = 0; i < nr_entries; i++) {
typename ValueTraits::value_type v;
typename ValueTraits::disk_type d;
::memcpy(&d, value_ptr(i), sizeof(d));
ValueTraits::unpack(d, v);
rc.dec(v);
}
}
template <typename ValueTraits>
bool
node_ref<ValueTraits>::value_sizes_match() const {
@ -535,13 +702,6 @@ namespace persistent_data {
return need_insert;
}
template <unsigned Levels, typename ValueTraits>
void
btree<Levels, ValueTraits>::remove(key const &key)
{
using namespace btree_detail;
}
template <unsigned Levels, typename ValueTraits>
block_address
btree<Levels, ValueTraits>::get_root() const
@ -565,15 +725,57 @@ namespace persistent_data {
return ptr(new btree<Levels, ValueTraits>(tm_, root_, rc_));
}
#if 0
template <unsigned Levels, typename ValueTraits>
void
btree<Levels, ValueTraits>::destroy()
{
using namespace btree_detail;
btree_del_stack s(tm_);
{
read_ref blk = tm_.read_lock(root_, validator_);
internal_node n = to_node<block_traits>(blk);
s.push_frame(root_, 0, n.get_nr_entries());
}
while (!s.is_empty()) {
frame &f = s.top_frame();
if (f.current_child_ >= f.nr_entries_) {
s.pop_frame();
continue;
}
// FIXME: Cache the read_ref object in the stack to avoid temporary objects?
read_ref current = tm_.read_lock(f.blocknr_, validator_);
internal_node n = to_node<block_traits>(current);
if (n.get_type() == INTERNAL) {
// TODO: test performance penalty of prefetching
//if (!f.current_child_)
// for (unsigned i = 0; i < n.get_nr_entries(); i++)
// tm_.prefetch(n.value_at(i));
block_address b = n.value_at(f.current_child_);
read_ref leaf = tm_.read_lock(b, validator_);
internal_node o = to_node<block_traits>(leaf);
s.push_frame(b, f.level_, o.get_nr_entries());
++f.current_child_;
// internal leaf
} else if (f.level_ < Levels - 1) {
block_address b = n.value_at(f.current_child_);
read_ref leaf = tm_.read_lock(b, validator_);
internal_node o = to_node<block_traits>(leaf);
s.push_frame(b, f.level_ + 1, o.get_nr_entries());
++f.current_child_;
} else {
leaf_node o = to_node<ValueTraits>(current);
o.dec_children(rc_); // FIXME: move this into pop_frame()
s.pop_frame();
}
}
}
#endif
template <unsigned Levels, typename _>
template <typename ValueTraits, typename Search>

View File

@ -1,4 +1,4 @@
#include "persistent-data/math_utils.h"
#include "base/math_utils.h"
#include "persistent-data/file_utils.h"
#include "persistent-data/space-maps/core.h"

View File

@ -17,7 +17,7 @@
// <http://www.gnu.org/licenses/>.
#include "persistent-data/space-maps/core.h"
#include "persistent-data/math_utils.h"
#include "base/math_utils.h"
#include <stdexcept>

View File

@ -26,7 +26,7 @@
#include "persistent-data/data-structures/btree_damage_visitor.h"
#include "persistent-data/data-structures/btree_counter.h"
#include "persistent-data/checksum.h"
#include "persistent-data/math_utils.h"
#include "base/math_utils.h"
#include "persistent-data/transaction_manager.h"
using namespace persistent_data;

View File

@ -23,7 +23,6 @@ pub fn file_exists(path: &str) -> bool {
_ => {
// FIXME: assuming all errors indicate the file doesn't
// exist.
eprintln!("couldn't stat '{}'", path);
false
}
}

View File

@ -1,3 +1,4 @@
use thiserror::Error;
use std::{io, io::Write};
use nom::{bytes::complete::*, number::complete::*, IResult};
@ -6,41 +7,27 @@ use crate::pack::vm::*;
//-------------------------------------------
#[derive(Debug)]
#[derive(Error, Debug)]
pub enum PackError {
#[error("Couldn't parse binary data")]
ParseError,
IOError,
}
impl std::error::Error for PackError {}
#[error("Write error")]
WriteError { source: std::io::Error },
}
pub type PResult<T> = Result<T, PackError>;
fn nom_to_pr<T>(r: IResult<&[u8], T>) -> PResult<(&[u8], T)> {
match r {
Ok(v) => Ok(v),
Err(_) => Err(PackError::ParseError),
}
r.map_err(|_source| PackError::ParseError)
}
fn io_to_pr<T>(r: io::Result<T>) -> PResult<T> {
match r {
Ok(v) => Ok(v),
Err(_) => Err(PackError::IOError),
}
r.map_err(|source| PackError::WriteError {source})
}
//-------------------------------------------
impl std::fmt::Display for PackError {
fn fmt(&self, f: &mut std::fmt::Formatter) -> std::fmt::Result {
match self {
PackError::ParseError => write!(f, "parse error"),
PackError::IOError => write!(f, "IO error"),
}
}
}
fn run64(i: &[u8], count: usize) -> IResult<&[u8], Vec<u64>> {
let (i, ns) = nom::multi::many_m_n(count, count, le_u64)(i)?;
Ok((i, ns))

View File

@ -1,3 +1,4 @@
use anyhow::{anyhow, Context, Result};
use byteorder::{LittleEndian, ReadBytesExt, WriteBytesExt};
use flate2::{read::ZlibDecoder, write::ZlibEncoder, Compression};
@ -63,8 +64,8 @@ fn mk_chunk_vecs(nr_blocks: u64, nr_jobs: u64) -> Vec<Vec<(u64, u64)>> {
vs
}
pub fn pack(input_file: &str, output_file: &str) -> Result<(), Box<dyn Error>> {
let nr_blocks = get_nr_blocks(&input_file)?;
pub fn pack(input_file: &str, output_file: &str) -> Result<()> {
let nr_blocks = get_nr_blocks(&input_file).context("getting nr blocks")?;
let nr_jobs = std::cmp::max(1, std::cmp::min(num_cpus::get() as u64, nr_blocks / 128));
let chunk_vecs = mk_chunk_vecs(nr_blocks, nr_jobs);
@ -81,7 +82,7 @@ pub fn pack(input_file: &str, output_file: &str) -> Result<(), Box<dyn Error>> {
.truncate(true)
.open(output_file)?;
write_header(&output, nr_blocks)?;
write_header(&output, nr_blocks).context("unable to write pack file header")?;
let sync_input = Arc::new(Mutex::new(input));
let sync_output = Arc::new(Mutex::new(output));
@ -104,7 +105,7 @@ fn crunch<R, W>(
input: Arc<Mutex<R>>,
output: Arc<Mutex<W>>,
ranges: Vec<(u64, u64)>,
) -> io::Result<()>
) -> Result<()>
where
R: Read + Seek,
W: Write,
@ -124,7 +125,7 @@ where
let kind = metadata_block_type(data);
if kind != BT::UNKNOWN {
z.write_u64::<LittleEndian>(b)?;
pack_block(&mut z, kind, &data);
pack_block(&mut z, kind, &data)?;
written += 1;
if written == 1024 {
@ -205,22 +206,18 @@ where
Ok(buf)
}
fn check<T>(r: &PResult<T>) {
match r {
Ok(_) => {}
Err(PackError::ParseError) => panic!("parse error"),
Err(PackError::IOError) => panic!("io error"),
}
}
fn pack_block<W: Write>(w: &mut W, kind: BT, buf: &[u8]) {
fn pack_block<W: Write>(w: &mut W, kind: BT, buf: &[u8]) -> Result<()> {
match kind {
BT::SUPERBLOCK => check(&pack_superblock(w, buf)),
BT::NODE => check(&pack_btree_node(w, buf)),
BT::INDEX => check(&pack_index(w, buf)),
BT::BITMAP => check(&pack_bitmap(w, buf)),
BT::UNKNOWN => {panic!("asked to pack an unknown block type")}
BT::SUPERBLOCK => pack_superblock(w, buf).context("unable to pack superblock")?,
BT::NODE => pack_btree_node(w, buf).context("unable to pack btree node")?,
BT::INDEX => pack_index(w, buf).context("unable to pack space map index")?,
BT::BITMAP => pack_bitmap(w, buf).context("unable to pack space map bitmap")?,
BT::UNKNOWN => {
return Err(anyhow!("asked to pack an unknown block type"))
}
}
Ok(())
}
fn write_zero_block<W>(w: &mut W, b: u64) -> io::Result<()>

View File

@ -1,3 +1,4 @@
use anyhow::Result;
use crate::block_manager::*;
use crate::checksum::*;
@ -47,7 +48,7 @@ use SuperblockDamage::*;
//------------------------------
pub fn check_type(b: &Block) -> Result<(), Box<dyn CheckError>> {
pub fn check_type(b: &Block) -> Result<()> {
match metadata_block_type(&b.data[0..]) {
SUPERBLOCK => Ok(()),
NODE => Err(Box::new(BadBlockType("BTree Node"))),

View File

@ -23,6 +23,7 @@ thin_provisioning::register_thin_commands(base::application &app)
app.add_cmd(command::ptr(new thin_ll_restore_cmd()));
app.add_cmd(command::ptr(new thin_scan_cmd()));
app.add_cmd(command::ptr(new thin_generate_metadata_cmd()));
app.add_cmd(command::ptr(new thin_generate_mappings_cmd()));
app.add_cmd(command::ptr(new thin_show_duplicates_cmd()));
app.add_cmd(command::ptr(new thin_show_metadata_cmd()));
app.add_cmd(command::ptr(new thin_journal_cmd()));

View File

@ -110,6 +110,13 @@ namespace thin_provisioning {
virtual int run(int argc, char **argv);
};
class thin_generate_mappings_cmd : public base::command {
public:
thin_generate_mappings_cmd();
virtual void usage(std::ostream &out) const;
virtual int run(int argc, char **argv);
};
class thin_show_metadata_cmd : public base::command {
public:
thin_show_metadata_cmd();

View File

@ -54,6 +54,13 @@ namespace thin_provisioning {
snapshotted_time_(0) {
}
device_details::device_details(uint64_t tid, uint32_t time)
: mapped_blocks_(0),
transaction_id_(tid),
creation_time_(time),
snapshotted_time_(time) {
}
void
device_details_traits::unpack(device_details_disk const &disk, device_details &value)
{

View File

@ -17,6 +17,7 @@ namespace thin_provisioning {
struct device_details {
device_details();
device_details(uint64_t tid, uint32_t time);
uint64_t mapped_blocks_;
uint64_t transaction_id_; /* when created */

View File

@ -20,7 +20,7 @@
#include "thin-provisioning/metadata.h"
#include "persistent-data/file_utils.h"
#include "persistent-data/math_utils.h"
#include "base/math_utils.h"
#include "persistent-data/space-maps/core.h"
#include "persistent-data/space-maps/disk.h"

View File

@ -37,7 +37,6 @@ namespace thin_provisioning {
using namespace base;
using namespace persistent_data;
typedef uint64_t sector_t;
typedef uint32_t thin_dev_t;
//------------------------------------------------

View File

@ -18,11 +18,13 @@
#include "base/nested_output.h"
#include "persistent-data/file_utils.h"
#include "persistent-data/space-maps/core.h"
#include "thin-provisioning/metadata.h"
#include "thin-provisioning/metadata_checker.h"
#include "thin-provisioning/metadata_counter.h"
#include "thin-provisioning/superblock.h"
using namespace boost;
using namespace persistent_data;
using namespace thin_provisioning;
@ -84,6 +86,20 @@ namespace {
//--------------------------------
class data_ref_counter : public mapping_tree_detail::mapping_visitor {
public:
data_ref_counter(space_map::ptr sm)
: sm_(sm) {
}
virtual void visit(btree_path const &path, mapping_tree_detail::block_time const &bt) {
sm_->inc(bt.block_);
}
private:
space_map::ptr sm_;
};
class mapping_reporter : public mapping_tree_detail::damage_visitor {
public:
mapping_reporter(nested_output &out)
@ -121,12 +137,13 @@ namespace {
//--------------------------------
error_state examine_superblock(block_manager::ptr bm,
block_address sb_location,
nested_output &out) {
out << "examining superblock" << end_message();
nested_output::nest _ = out.push();
superblock_reporter sb_rep(out);
check_superblock(bm, sb_rep);
check_superblock(bm, sb_rep, sb_location);
return sb_rep.get_error();
}
@ -161,14 +178,20 @@ namespace {
error_state examine_mapping_tree_(transaction_manager::ptr tm,
superblock_detail::superblock const &sb,
nested_output &out) {
nested_output &out,
optional<space_map::ptr> data_sm) {
out << "examining mapping tree" << end_message();
nested_output::nest _ = out.push();
mapping_reporter mapping_rep(out);
mapping_tree mtree(*tm, sb.data_mapping_root_,
mapping_tree_detail::block_traits::ref_counter(tm->get_sm()));
check_mapping_tree(mtree, mapping_rep);
if (data_sm) {
data_ref_counter dcounter(*data_sm);
walk_mapping_tree(mtree, dcounter, mapping_rep);
} else
check_mapping_tree(mtree, mapping_rep);
return mapping_rep.get_error();
}
@ -184,9 +207,10 @@ namespace {
error_state examine_mapping_tree(transaction_manager::ptr tm,
superblock_detail::superblock const &sb,
nested_output &out) {
nested_output &out,
optional<space_map::ptr> data_sm) {
error_state err = examine_devices_tree_(tm, sb, out);
err << examine_mapping_tree_(tm, sb, out);
err << examine_mapping_tree_(tm, sb, out, data_sm);
return err;
}
@ -222,6 +246,34 @@ namespace {
return err;
}
error_state compare_space_maps(space_map::ptr actual, space_map::ptr expected,
nested_output &out)
{
error_state err = NO_ERROR;
auto nr_blocks = actual->get_nr_blocks();
if (expected->get_nr_blocks() != nr_blocks) {
out << "internal error: nr blocks in space maps differ"
<< end_message();
err << FATAL;
} else {
for (block_address b = 0; b < nr_blocks; b++) {
auto a_count = actual->get_count(b);
auto e_count = actual->get_count(b);
if (a_count != e_count) {
out << "data reference counts differ for block " << b
<< ", expected " << e_count
<< ", but got " << a_count
<< end_message();
err << (a_count > e_count ? NON_FATAL : FATAL);
}
}
}
return err;
}
void print_info(transaction_manager::ptr tm,
superblock_detail::superblock const &sb,
nested_output &out)
@ -238,11 +290,11 @@ namespace {
//--------------------------------
class base_metadata_checker : public metadata_checker {
class metadata_checker {
public:
base_metadata_checker(block_manager::ptr bm,
check_options check_opts,
output_options output_opts)
metadata_checker(block_manager::ptr bm,
check_options check_opts,
output_options output_opts)
: bm_(bm),
options_(check_opts),
out_(cerr, 2),
@ -256,28 +308,44 @@ namespace {
error_state check() {
error_state err = NO_ERROR;
auto sb_location = superblock_detail::SUPERBLOCK_LOCATION;
err << examine_superblock(bm_, out_);
if (options_.use_metadata_snap_) {
superblock_detail::superblock sb = read_superblock(bm_, sb_location);
sb_location = sb.metadata_snap_;
if (sb_location == superblock_detail::SUPERBLOCK_LOCATION)
throw runtime_error("No metadata snapshot found.");
}
err << examine_superblock(bm_, sb_location, out_);
if (err == FATAL) {
if (check_for_xml(bm_))
out_ << "This looks like XML. thin_check only checks the binary metadata format." << end_message();
return err;
}
superblock_detail::superblock sb = read_superblock(bm_);
transaction_manager::ptr tm =
open_tm(bm_, superblock_detail::SUPERBLOCK_LOCATION);
superblock_detail::superblock sb = read_superblock(bm_, sb_location);
transaction_manager::ptr tm = open_tm(bm_, sb_location);
sb.data_mapping_root_ = mapping_root(sb, options_);
print_info(tm, sb, info_out_);
err << examine_data_mappings(tm, sb, options_.check_data_mappings_, out_);
if (options_.sm_opts_ == check_options::SPACE_MAP_FULL) {
space_map::ptr data_sm{open_disk_sm(*tm, &sb.data_space_map_root_)};
optional<space_map::ptr> core_sm{create_core_map(data_sm->get_nr_blocks())};
err << examine_data_mappings(tm, sb, options_.check_data_mappings_, out_, core_sm);
// if we're checking everything, and there were no errors,
// then we should check the space maps too.
if (err != FATAL)
err << examine_metadata_space_map(tm, sb, options_.check_metadata_space_map_, out_);
// if we're checking everything, and there were no errors,
// then we should check the space maps too.
if (err != FATAL) {
err << examine_metadata_space_map(tm, sb, options_.sm_opts_, out_);
if (core_sm)
err << compare_space_maps(data_sm, *core_sm, out_);
}
} else
err << examine_data_mappings(tm, sb, options_.check_data_mappings_, out_,
optional<space_map::ptr>());
return err;
}
@ -287,7 +355,8 @@ namespace {
examine_data_mappings(transaction_manager::ptr tm,
superblock_detail::superblock const &sb,
check_options::data_mapping_options option,
nested_output &out) {
nested_output &out,
optional<space_map::ptr> data_sm) {
error_state err = NO_ERROR;
switch (option) {
@ -295,7 +364,7 @@ namespace {
err << examine_top_level_mapping_tree(tm, sb, out);
break;
case check_options::DATA_MAPPING_LEVEL2:
err << examine_mapping_tree(tm, sb, out);
err << examine_mapping_tree(tm, sb, out, data_sm);
break;
default:
break; // do nothing
@ -307,12 +376,12 @@ namespace {
static error_state
examine_metadata_space_map(transaction_manager::ptr tm,
superblock_detail::superblock const &sb,
check_options::metadata_space_map_options option,
check_options::space_map_options option,
nested_output &out) {
error_state err = NO_ERROR;
switch (option) {
case check_options::METADATA_SPACE_MAP_FULL:
case check_options::SPACE_MAP_FULL:
err << check_space_map_counts(tm, sb, out);
break;
default:
@ -332,32 +401,37 @@ namespace {
//----------------------------------------------------------------
check_options::check_options()
: check_data_mappings_(DATA_MAPPING_LEVEL2),
check_metadata_space_map_(METADATA_SPACE_MAP_FULL) {
: use_metadata_snap_(false),
check_data_mappings_(DATA_MAPPING_LEVEL2),
sm_opts_(SPACE_MAP_FULL) {
}
void check_options::set_superblock_only() {
check_data_mappings_ = DATA_MAPPING_NONE;
check_metadata_space_map_ = METADATA_SPACE_MAP_NONE;
sm_opts_ = SPACE_MAP_NONE;
}
void check_options::set_skip_mappings() {
check_data_mappings_ = DATA_MAPPING_LEVEL1;
check_metadata_space_map_ = METADATA_SPACE_MAP_NONE;
sm_opts_ = SPACE_MAP_NONE;
}
void check_options::set_override_mapping_root(block_address b) {
override_mapping_root_ = b;
}
metadata_checker::ptr
thin_provisioning::create_base_checker(block_manager::ptr bm,
check_options const &check_opts,
output_options output_opts)
void check_options::set_metadata_snap() {
use_metadata_snap_ = true;
sm_opts_ = SPACE_MAP_NONE;
}
base::error_state
thin_provisioning::check_metadata(block_manager::ptr bm,
check_options const &check_opts,
output_options output_opts)
{
metadata_checker::ptr checker;
checker = metadata_checker::ptr(new base_metadata_checker(bm, check_opts, output_opts));
return checker;
metadata_checker checker(bm, check_opts, output_opts);
return checker.check();
}
//----------------------------------------------------------------

View File

@ -33,9 +33,9 @@ namespace thin_provisioning {
DATA_MAPPING_LEVEL2,
};
enum metadata_space_map_options {
METADATA_SPACE_MAP_NONE,
METADATA_SPACE_MAP_FULL,
enum space_map_options {
SPACE_MAP_NONE,
SPACE_MAP_FULL,
};
check_options();
@ -43,9 +43,11 @@ namespace thin_provisioning {
void set_superblock_only();
void set_skip_mappings();
void set_override_mapping_root(bcache::block_address b);
void set_metadata_snap();
bool use_metadata_snap_;
data_mapping_options check_data_mappings_;
metadata_space_map_options check_metadata_space_map_;
space_map_options sm_opts_;
boost::optional<bcache::block_address> override_mapping_root_;
};
@ -54,19 +56,10 @@ namespace thin_provisioning {
OUTPUT_QUIET,
};
class metadata_checker {
public:
typedef std::shared_ptr<metadata_checker> ptr;
virtual ~metadata_checker() {}
virtual base::error_state check() = 0;
};
metadata_checker::ptr
create_base_checker(persistent_data::block_manager::ptr bm,
check_options const &check_opts,
output_options output_opts);
base::error_state
check_metadata(persistent_data::block_manager::ptr bm,
check_options const &check_opts,
output_options output_opts);
}
//----------------------------------------------------------------

View File

@ -194,11 +194,12 @@ namespace thin_provisioning {
void
check_superblock(block_manager::ptr bm,
superblock_detail::damage_visitor &visitor) {
superblock_detail::damage_visitor &visitor,
block_address sb_location) {
using namespace superblock_detail;
try {
bm->read_lock(SUPERBLOCK_LOCATION, superblock_validator());
bm->read_lock(sb_location, superblock_validator());
} catch (std::exception const &e) {
visitor.visit(superblock_corruption(e.what()));

View File

@ -139,7 +139,8 @@ namespace thin_provisioning {
superblock_detail::superblock const &sb);
void check_superblock(persistent_data::block_manager::ptr bm,
superblock_detail::damage_visitor &visitor);
superblock_detail::damage_visitor &visitor,
persistent_data::block_address sb_location = superblock_detail::SUPERBLOCK_LOCATION);
}
//----------------------------------------------------------------

View File

@ -76,10 +76,10 @@ namespace {
return 1;
}
block_manager::ptr bm = open_bm(path);
block_manager::ptr bm = open_bm(path, block_manager::READ_ONLY,
!fs.check_opts.use_metadata_snap_);
output_options output_opts = !fs.quiet ? OUTPUT_NORMAL : OUTPUT_QUIET;
metadata_checker::ptr checker = create_base_checker(bm, fs.check_opts, output_opts);
error_state err = checker->check();
error_state err = check_metadata(bm, fs.check_opts, output_opts);
if (fs.ignore_non_fatal_errors)
success = (err == FATAL) ? false : true;
@ -110,15 +110,16 @@ thin_check_cmd::thin_check_cmd()
void
thin_check_cmd::usage(std::ostream &out) const
{
out << "Usage: " << get_name() << " [options] {device|file}" << endl
<< "Options:" << endl
<< " {-q|--quiet}" << endl
<< " {-h|--help}" << endl
<< " {-V|--version}" << endl
<< " {--override-mapping-root}" << endl
<< " {--clear-needs-check-flag}" << endl
<< " {--ignore-non-fatal-errors}" << endl
<< " {--skip-mappings}" << endl
out << "Usage: " << get_name() << " [options] {device|file}\n"
<< "Options:\n"
<< " {-q|--quiet}\n"
<< " {-h|--help}\n"
<< " {-V|--version}\n"
<< " {-m|--metadata-snap}\n"
<< " {--override-mapping-root}\n"
<< " {--clear-needs-check-flag}\n"
<< " {--ignore-non-fatal-errors}\n"
<< " {--skip-mappings}\n"
<< " {--super-block-only}" << endl;
}
@ -128,11 +129,12 @@ thin_check_cmd::run(int argc, char **argv)
int c;
flags fs;
char const shortopts[] = "qhV";
char const shortopts[] = "qhVm";
option const longopts[] = {
{ "quiet", no_argument, NULL, 'q'},
{ "help", no_argument, NULL, 'h'},
{ "version", no_argument, NULL, 'V'},
{ "metadata-snap", no_argument, NULL, 'm'},
{ "super-block-only", no_argument, NULL, 1},
{ "skip-mappings", no_argument, NULL, 2},
{ "ignore-non-fatal-errors", no_argument, NULL, 3},
@ -155,6 +157,10 @@ thin_check_cmd::run(int argc, char **argv)
cout << THIN_PROVISIONING_TOOLS_VERSION << endl;
return 0;
case 'm':
fs.check_opts.set_metadata_snap();
break;
case 1:
// super-block-only
fs.check_opts.set_superblock_only();
@ -186,6 +192,12 @@ thin_check_cmd::run(int argc, char **argv)
}
}
if (fs.clear_needs_check_flag_on_success && fs.check_opts.use_metadata_snap_) {
cerr << "--metadata-snap cannot be combined with --clear-needs-check-flag.";
usage(cerr);
exit(1);
}
if (argc == optind) {
if (!fs.quiet) {
cerr << "No input file provided." << endl;

View File

@ -0,0 +1,207 @@
// This file is part of the thin-provisioning-tools source.
//
// thin-provisioning-tools is free software: you can redistribute it
// and/or modify it under the terms of the GNU General Public License
// as published by the Free Software Foundation, either version 3 of
// the License, or (at your option) any later version.
//
// thin-provisioning-tools is distributed in the hope that it will be
// useful, but WITHOUT ANY WARRANTY; without even the implied warranty
// of MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
// GNU General Public License for more details.
//
// You should have received a copy of the GNU General Public License along
// with thin-provisioning-tools. If not, see
// <http://www.gnu.org/licenses/>.
#include "base/io_generator.h"
#include "base/output_file_requirements.h"
#include "persistent-data/file_utils.h"
#include "thin-provisioning/commands.h"
#include "thin-provisioning/thin_pool.h"
#include "version.h"
#include <boost/optional.hpp>
#include <getopt.h>
#include <unistd.h>
using namespace boost;
using namespace thin_provisioning;
//----------------------------------------------------------------
namespace {
struct flags {
flags()
: pattern("write"),
offset(0)
{
}
bool check_conformance();
boost::optional<string> output;
base::io_pattern pattern;
boost::optional<unsigned> dev_id;
boost::optional<base::sector_t> block_size;
base::sector_t offset;
boost::optional<base::sector_t> size;
boost::optional<base::sector_t> io_size;
};
bool flags::check_conformance() {
if (!output) {
cerr << "No output file provided." << endl;
return false;
}
if (!dev_id) {
cerr << "No device id provided." << endl;
return false;
}
if (!size) {
cerr << "No device size specified" << endl;
return false;
}
check_output_file_requirements(*output);
return true;
}
//--------------------------------
thin_pool::ptr open_pool(flags const &fs) {
block_manager::ptr bm = open_bm(*fs.output, block_manager::READ_WRITE);
return thin_pool::ptr(new thin_pool(bm));
}
int generate_mappings(flags const &fs) {
thin_pool::ptr pool = open_pool(fs);
thin::ptr td = pool->open_thin(*fs.dev_id);
io_generator_options opts;
opts.pattern_ = fs.pattern;
opts.block_size_ = !fs.block_size ?
pool->get_data_block_size() :
*fs.block_size;
opts.offset_ = fs.offset;
opts.size_ = *fs.size;
opts.io_size_ = !fs.io_size ? *fs.size : *fs.io_size;
io_generator::ptr gen = create_io_generator(opts);
base::io io;
while (gen->has_next()) {
// TODO: support io.size_
gen->next(io);
switch (io.op_) {
case base::REQ_OP_READ:
process_read(td, pool, io.sector_);
break;
case base::REQ_OP_WRITE:
process_write(td, pool, io.sector_);
break;
case base::REQ_OP_DISCARD:
process_discard(td, pool, io.sector_);
break;
}
}
pool->commit();
return 0;
}
}
//----------------------------------------------------------------
thin_generate_mappings_cmd::thin_generate_mappings_cmd()
: command("thin_generate_mappings")
{
}
void
thin_generate_mappings_cmd::usage(std::ostream &out) const
{
out << "Usage: " << get_name() << " [options]\n"
<< "Options:\n"
<< " {-h|--help}\n"
<< " {-o|--output} <output device or file>\n"
<< " {--dev-id} <dev-id>\n"
<< " {--offset} <offset>\n"
<< " {--io-size} <io_size>\n"
<< " {--rw write|trim|randwrite|randtrim|randtw}\n"
<< " {--size} <size>\n"
<< " {-V|--version}" << endl;
}
int
thin_generate_mappings_cmd::run(int argc, char **argv)
{
int c;
struct flags fs;
const char *shortopts = "hi:o:qV";
const struct option longopts[] = {
{ "help", no_argument, NULL, 'h' },
{ "output", required_argument, NULL, 'o' },
{ "dev-id", required_argument, NULL, 1 },
{ "rw", required_argument, NULL, 2 },
{ "offset", required_argument, NULL, 3 },
{ "size", required_argument, NULL, 4 },
{ "io-size", required_argument, NULL, 5 },
{ "version", no_argument, NULL, 'V' },
{ NULL, no_argument, NULL, 0 }
};
while ((c = getopt_long(argc, argv, shortopts, longopts, NULL)) != -1) {
switch(c) {
case 'h':
usage(cout);
return 0;
case 'o':
fs.output = optarg;
break;
case 1:
fs.dev_id = parse_uint64(optarg, "dev_id");
break;
case 2:
fs.pattern.parse(optarg);
break;
case 3:
fs.offset = parse_uint64(optarg, "offset");
break;
case 4:
fs.size = parse_uint64(optarg, "size");
break;
case 5:
fs.io_size = parse_uint64(optarg, "io_size");
break;
case 'V':
cout << THIN_PROVISIONING_TOOLS_VERSION << endl;
return 0;
default:
usage(cerr);
return 1;
}
}
if (!fs.check_conformance()) {
usage(cerr);
return 1;
}
return generate_mappings(fs);
}
//----------------------------------------------------------------

View File

@ -19,7 +19,7 @@
#include "base/output_file_requirements.h"
#include "persistent-data/file_utils.h"
#include "thin-provisioning/commands.h"
#include "thin-provisioning/metadata.h"
#include "thin-provisioning/thin_pool.h"
#include "version.h"
#include <boost/optional.hpp>
@ -27,8 +27,6 @@
#include <unistd.h>
using namespace boost;
using namespace persistent_data;
using namespace std;
using namespace thin_provisioning;
//----------------------------------------------------------------
@ -40,6 +38,11 @@ namespace {
METADATA_OP_FORMAT,
METADATA_OP_OPEN,
METADATA_OP_CREATE_THIN,
METADATA_OP_CREATE_SNAP,
METADATA_OP_DELETE_DEV,
METADATA_OP_SET_TRANSACTION_ID,
METADATA_OP_RESERVE_METADATA_SNAP,
METADATA_OP_RELEASE_METADATA_SNAP,
METADATA_OP_LAST
};
@ -55,7 +58,9 @@ namespace {
metadata_operations op;
sector_t data_block_size;
block_address nr_data_blocks;
optional<uint64_t> dev_id;
optional<thin_dev_t> dev_id;
optional<thin_dev_t> origin;
optional<uint64_t> trans_id;
optional<string> output;
};
@ -77,88 +82,63 @@ namespace {
return false;
}
if (op == METADATA_OP_CREATE_SNAP && (!dev_id || !origin)) {
cerr << "no device id provided." << endl;
return false;
}
if (op == METADATA_OP_DELETE_DEV && !dev_id) {
cerr << "no device id provided." << endl;
return false;
}
if (op == METADATA_OP_SET_TRANSACTION_ID && !trans_id) {
cerr << "no transaction id provided." << endl;
return false;
}
return true;
}
//--------------------------------
single_mapping_tree::ptr new_mapping_tree(metadata::ptr md) {
return single_mapping_tree::ptr(
new single_mapping_tree(*md->tm_,
mapping_tree_detail::block_time_ref_counter(md->data_sm_)));
}
bool is_device_exists(metadata::ptr md, uint64_t dev_id) {
uint64_t key[1] = {dev_id};
device_tree::maybe_value v1 = md->details_->lookup(key);
if (v1)
return true;
dev_tree::maybe_value v2 = md->mappings_top_level_->lookup(key);
if (v2)
return true;
return false;
}
//--------------------------------
metadata::ptr format_metadata(block_manager::ptr bm,
sector_t data_block_size,
block_address nr_data_blocks) {
metadata::ptr md(new metadata(bm,
metadata::CREATE,
data_block_size,
nr_data_blocks));
md->commit();
return md;
}
metadata::ptr open_metadata(block_manager::ptr bm) {
metadata::ptr md(new metadata(bm, true));
return md;
}
void create_thin(metadata::ptr md, uint64_t dev_id) {
uint64_t key[1] = {dev_id};
if (is_device_exists(md, dev_id))
throw runtime_error("device already exists");
device_tree_detail::device_details details;
details.transaction_id_ = md->sb_.trans_id_;
details.creation_time_ = md->sb_.time_;
details.snapshotted_time_ = details.creation_time_;
md->details_->insert(key, details);
single_mapping_tree::ptr subtree = new_mapping_tree(md);
md->mappings_top_level_->insert(key, subtree->get_root());
md->mappings_->set_root(md->mappings_top_level_->get_root()); // FIXME: ugly
md->commit();
}
metadata::ptr open_or_format_metadata(block_manager::ptr bm, flags const &fs) {
thin_pool::ptr open_or_create_pool(flags const &fs) {
block_manager::ptr bm = open_bm(*fs.output, block_manager::READ_WRITE);
if (fs.op == flags::METADATA_OP_FORMAT)
return format_metadata(bm, fs.data_block_size, fs.nr_data_blocks);
return thin_pool::ptr(new thin_pool(bm, fs.data_block_size, fs.nr_data_blocks));
else
return open_metadata(bm);
return thin_pool::ptr(new thin_pool(bm));
}
int generate_metadata(flags const &fs) {
block_manager::ptr bm = open_bm(*fs.output, block_manager::READ_WRITE);
metadata::ptr md = open_or_format_metadata(bm, fs);
thin_pool::ptr pool = open_or_create_pool(fs);
switch (fs.op) {
case flags::METADATA_OP_CREATE_THIN:
create_thin(md, *fs.dev_id);
pool->create_thin(*fs.dev_id);
break;
case flags::METADATA_OP_CREATE_SNAP:
pool->create_snap(*fs.dev_id, *fs.origin);
break;
case flags::METADATA_OP_DELETE_DEV:
pool->del(*fs.dev_id);
break;
case flags::METADATA_OP_SET_TRANSACTION_ID:
pool->set_transaction_id(*fs.trans_id);
break;
case flags::METADATA_OP_RESERVE_METADATA_SNAP:
pool->reserve_metadata_snap();
break;
case flags::METADATA_OP_RELEASE_METADATA_SNAP:
pool->release_metadata_snap();
break;
default:
break;
}
pool->commit();
return 0;
}
}
@ -176,8 +156,16 @@ thin_generate_metadata_cmd::usage(std::ostream &out) const
out << "Usage: " << get_name() << " [options]\n"
<< "Options:\n"
<< " {-h|--help}\n"
<< " --data-block-size <block size>\n"
<< " --nr-data-blocks <nr>\n"
<< " {--format}\n"
<< " {--create-thin} <dev-id>\n"
<< " {--create-snap} <dev-id>\n"
<< " {--delete} <dev-id>\n"
<< " {--reserve-metadata-snap}\n"
<< " {--release-metadata-snap}\n"
<< " {--set-transaction-id} <tid>\n"
<< " {--data-block-size} <block size>\n"
<< " {--nr-data-blocks} <nr>\n"
<< " {--origin} <origin-id>\n"
<< " {-o|--output} <output device or file>\n"
<< " {-V|--version}" << endl;
}
@ -193,10 +181,15 @@ thin_generate_metadata_cmd::run(int argc, char **argv)
{ "output", required_argument, NULL, 'o' },
{ "format", no_argument, NULL, 1 },
{ "open", no_argument, NULL, 2 },
{ "create-thin", no_argument, NULL, 3 },
{ "create-thin", required_argument, NULL, 3 },
{ "create-snap", required_argument, NULL, 4 },
{ "delete", required_argument, NULL, 5 },
{ "set-transaction-id", required_argument, NULL, 6 },
{ "reserve-metadata-snap", no_argument, NULL, 7 },
{ "release-metadata-snap", no_argument, NULL, 8 },
{ "data-block-size", required_argument, NULL, 101 },
{ "nr-data-blocks", required_argument, NULL, 102 },
{ "dev-id", required_argument, NULL, 301 },
{ "origin", required_argument, NULL, 401 },
{ "version", no_argument, NULL, 'V' },
{ NULL, no_argument, NULL, 0 }
};
@ -221,6 +214,30 @@ thin_generate_metadata_cmd::run(int argc, char **argv)
case 3:
fs.op = flags::METADATA_OP_CREATE_THIN;
fs.dev_id = parse_uint64(optarg, "device id");
break;
case 4:
fs.op = flags::METADATA_OP_CREATE_SNAP;
fs.dev_id = parse_uint64(optarg, "device id");
break;
case 5:
fs.op = flags::METADATA_OP_DELETE_DEV;
fs.dev_id = parse_uint64(optarg, "device id");
break;
case 6:
fs.op = flags::METADATA_OP_SET_TRANSACTION_ID;
fs.trans_id = parse_uint64(optarg, "transaction id");
break;
case 7:
fs.op = flags::METADATA_OP_RESERVE_METADATA_SNAP;
break;
case 8:
fs.op = flags::METADATA_OP_RELEASE_METADATA_SNAP;
break;
case 101:
@ -231,8 +248,8 @@ thin_generate_metadata_cmd::run(int argc, char **argv)
fs.nr_data_blocks = parse_uint64(optarg, "nr data blocks");
break;
case 301:
fs.dev_id = parse_uint64(optarg, "dev id");
case 401:
fs.origin = parse_uint64(optarg, "origin");
break;
case 'V':

View File

@ -16,13 +16,10 @@
// with thin-provisioning-tools. If not, see
// <http://www.gnu.org/licenses/>.
#include "base/math_utils.h"
#include "thin-provisioning/thin_pool.h"
#include <stdexcept>
#include <sstream>
#include <iostream>
#include <set>
#include <map>
using namespace base;
using namespace std;
@ -31,9 +28,22 @@ using namespace thin_provisioning;
//----------------------------------------------------------------
thin::thin(thin_dev_t dev, thin_pool *pool)
thin::thin(thin_dev_t dev, thin_pool &pool)
: dev_(dev),
pool_(pool)
pool_(pool),
details_(pool.get_transaction_id(), pool.get_time()),
open_count_(1),
changed_(true)
{
}
thin::thin(thin_dev_t dev, thin_pool &pool,
device_tree_detail::device_details const &details)
: dev_(dev),
pool_(pool),
details_(details),
open_count_(1),
changed_(false)
{
}
@ -47,66 +57,76 @@ thin::maybe_address
thin::lookup(block_address thin_block)
{
uint64_t key[2] = {dev_, thin_block};
return pool_->md_->mappings_->lookup(key);
mapping_tree::maybe_value m = pool_.md_->mappings_->lookup(key);
if (!m)
return thin::maybe_address();
lookup_result r;
r.block_ = m->block_;
r.shared_ = m->time_ < details_.snapshotted_time_;
return r;
}
bool
thin::insert(block_address thin_block, block_address data_block)
{
uint64_t key[2] = {dev_, thin_block};
++details_.mapped_blocks_;
changed_ = true;
mapping_tree_detail::block_time bt;
bt.block_ = data_block;
bt.time_ = 0; // FIXME: use current time.
return pool_->md_->mappings_->insert(key, bt);
bt.time_ = pool_.get_time();
return pool_.md_->mappings_->insert(key, bt);
}
void
thin::remove(block_address thin_block)
{
uint64_t key[2] = {dev_, thin_block};
pool_->md_->mappings_->remove(key);
pool_.md_->mappings_->remove(key);
--details_.mapped_blocks_;
changed_ = true;
}
void
thin::set_snapshot_time(uint32_t time)
{
uint64_t key[1] = { dev_ };
boost::optional<device_tree_detail::device_details> mdetail = pool_->md_->details_->lookup(key);
if (!mdetail)
throw runtime_error("no such device");
mdetail->snapshotted_time_ = time;
pool_->md_->details_->insert(key, *mdetail);
details_.snapshotted_time_ = time;
changed_ = true;
}
block_address
thin::get_mapped_blocks() const
{
uint64_t key[1] = { dev_ };
boost::optional<device_tree_detail::device_details> mdetail = pool_->md_->details_->lookup(key);
if (!mdetail)
throw runtime_error("no such device");
return mdetail->mapped_blocks_;
return details_.mapped_blocks_;
}
void
thin::set_mapped_blocks(block_address count)
{
uint64_t key[1] = { dev_ };
boost::optional<device_tree_detail::device_details> mdetail = pool_->md_->details_->lookup(key);
if (!mdetail)
throw runtime_error("no such device");
mdetail->mapped_blocks_ = count;
pool_->md_->details_->insert(key, *mdetail);
details_.mapped_blocks_ = count;
changed_ = true;
}
//--------------------------------
thin_pool::thin_pool(metadata::ptr md)
: md_(md)
thin_pool::thin_pool(block_manager::ptr bm)
{
md_ = metadata::ptr(new metadata(bm, true));
}
thin_pool::thin_pool(block_manager::ptr bm,
sector_t data_block_size,
block_address nr_data_blocks)
{
md_ = metadata::ptr(new metadata(bm,
metadata::CREATE,
data_block_size,
nr_data_blocks));
md_->commit();
}
thin_pool::~thin_pool()
@ -120,14 +140,15 @@ thin_pool::create_thin(thin_dev_t dev)
uint64_t key[1] = {dev};
if (device_exists(dev))
throw std::runtime_error("Device already exists");
throw std::runtime_error("device already exists");
single_mapping_tree::ptr new_tree(new single_mapping_tree(*md_->tm_,
mapping_tree_detail::block_time_ref_counter(md_->data_sm_)));
md_->mappings_top_level_->insert(key, new_tree->get_root());
md_->mappings_->set_root(md_->mappings_top_level_->get_root()); // FIXME: ugly
// FIXME: doesn't set up the device details
thin::ptr r = create_device(dev);
close_device(r);
}
void
@ -136,31 +157,62 @@ thin_pool::create_snap(thin_dev_t dev, thin_dev_t origin)
uint64_t snap_key[1] = {dev};
uint64_t origin_key[1] = {origin};
boost::optional<uint64_t> mtree_root = md_->mappings_top_level_->lookup(origin_key);
if (device_exists(dev))
throw std::runtime_error("device already exists");
// find the mapping tree of the origin
dev_tree::maybe_value mtree_root = md_->mappings_top_level_->lookup(origin_key);
if (!mtree_root)
throw std::runtime_error("unknown origin");
single_mapping_tree otree(*md_->tm_, *mtree_root,
mapping_tree_detail::block_time_ref_counter(md_->data_sm_));
// clone the origin
single_mapping_tree::ptr clone(otree.clone());
md_->mappings_top_level_->insert(snap_key, clone->get_root());
md_->mappings_->set_root(md_->mappings_top_level_->get_root()); // FIXME: ugly
md_->sb_.time_++;
thin::ptr o = open_thin(origin);
thin::ptr s = open_thin(dev);
o->set_snapshot_time(md_->sb_.time_);
s->set_snapshot_time(md_->sb_.time_);
s->set_mapped_blocks(o->get_mapped_blocks());
// create details for the snapshot
thin::ptr s = create_device(dev);
set_snapshot_details(s, origin);
close_device(s);
}
void
thin_pool::del(thin_dev_t dev)
{
uint64_t key[1] = {dev};
thin::ptr td = open_device(dev);
if (td->open_count_ > 1) {
close_device(td);
throw std::runtime_error("device busy");
}
thin_devices_.erase(dev);
dev_tree::maybe_value mtree_root = md_->mappings_top_level_->lookup(key);
if (!device_exists(dev) || !mtree_root)
throw std::runtime_error("unknown device");
// TODO: trigger subtree deletion from the mtree_ref_counter,
// like the kenrel subtree_dec() does.
single_mapping_tree mtree(*md_->tm_, *mtree_root,
mapping_tree_detail::block_time_ref_counter(md_->data_sm_));
mtree.destroy();
md_->details_->remove(key);
md_->mappings_top_level_->remove(key);
md_->mappings_->set_root(md_->mappings_top_level_->get_root()); // FIXME: ugly
}
void
thin_pool::commit()
{
write_changed_details();
md_->commit();
}
void
@ -175,6 +227,52 @@ thin_pool::get_transaction_id() const
return md_->sb_.trans_id_;
}
void
thin_pool::reserve_metadata_snap()
{
if (md_->sb_.metadata_snap_)
throw std::runtime_error("pool metadata snapshot already exists.");
commit();
md_->metadata_sm_->inc(superblock_detail::SUPERBLOCK_LOCATION);
transaction_manager::write_ref wr = md_->tm_->shadow(
superblock_detail::SUPERBLOCK_LOCATION,
superblock_validator()).first;
superblock_detail::superblock sb;
superblock_detail::superblock_disk *sbd = reinterpret_cast<superblock_detail::superblock_disk *>(wr.data());
superblock_detail::superblock_traits::unpack(*sbd, sb);
memset(sb.data_space_map_root_, 0, superblock_detail::SPACE_MAP_ROOT_SIZE);
memset(sb.metadata_space_map_root_, 0, superblock_detail::SPACE_MAP_ROOT_SIZE);
md_->metadata_sm_->inc(sb.data_mapping_root_);
md_->metadata_sm_->inc(sb.device_details_root_);
superblock_detail::superblock_traits::pack(sb, *sbd);
md_->sb_.metadata_snap_ = wr.get_location();
}
void
thin_pool::release_metadata_snap()
{
if (!md_->sb_.metadata_snap_)
throw std::runtime_error("No pool metadata snapshot found");
superblock_detail::superblock sb = read_superblock(md_->tm_->get_bm(),
md_->sb_.metadata_snap_);
device_tree dtree(*md_->tm_, sb.device_details_root_,
device_tree_detail::device_details_traits::ref_counter());
dtree.destroy();
mapping_tree mtree(*md_->tm_, sb.data_mapping_root_,
mapping_tree_detail::block_traits::ref_counter(md_->tm_->get_sm()));
mtree.destroy();
md_->metadata_sm_->dec(md_->sb_.metadata_snap_);
md_->sb_.metadata_snap_ = 0;
}
block_address
thin_pool::get_metadata_snap() const
{
@ -184,7 +282,7 @@ thin_pool::get_metadata_snap() const
block_address
thin_pool::alloc_data_block()
{
boost::optional<block_address> mb = md_->data_sm_->new_block();
space_map::maybe_block mb = md_->data_sm_->new_block();
if (!mb)
throw runtime_error("couldn't allocate new block");
@ -203,7 +301,7 @@ thin_pool::get_nr_free_data_blocks() const
return md_->data_sm_->get_nr_free();
}
thin_provisioning::sector_t
sector_t
thin_pool::get_data_block_size() const
{
return md_->sb_.data_block_size_;
@ -215,17 +313,22 @@ thin_pool::get_data_dev_size() const
return md_->data_sm_->get_nr_blocks();
}
uint32_t
thin_pool::get_time() const
{
return md_->sb_.time_;
}
thin::ptr
thin_pool::open_thin(thin_dev_t dev)
{
uint64_t key[1] = {dev};
boost::optional<device_tree_detail::device_details> mdetails = md_->details_->lookup(key);
if (!mdetails)
throw runtime_error("no such device");
return open_device(dev);
}
thin *ptr = new thin(dev, this);
thin::ptr r(ptr);
return r;
void
thin_pool::close_thin(thin::ptr td)
{
close_device(td);
}
bool
@ -235,4 +338,107 @@ thin_pool::device_exists(thin_dev_t dev) const
return !!md_->details_->lookup(key);
}
thin::ptr
thin_pool::create_device(thin_dev_t dev)
{
device_map::iterator it = thin_devices_.find(dev);
if (it != thin_devices_.end())
throw std::runtime_error("device already exists");
thin::ptr td(new thin(dev, *this));
thin_devices_[dev] = td;
return td;
}
thin::ptr
thin_pool::open_device(thin_dev_t dev)
{
device_map::iterator it = thin_devices_.find(dev);
if (it != thin_devices_.end()) {
thin::ptr td = it->second;
td->open_count_++;
return td;
}
uint64_t key[1] = {dev};
device_tree::maybe_value details = md_->details_->lookup(key);
if (!details)
throw std::runtime_error("no such device");
thin::ptr td(new thin(dev, *this, *details));
thin_devices_[dev] = td;
return td;
}
void
thin_pool::close_device(thin::ptr td)
{
td->open_count_--;
}
void
thin_pool::set_snapshot_details(thin::ptr snap, thin_dev_t origin)
{
thin::ptr o = open_device(origin);
o->set_snapshot_time(md_->sb_.time_);
snap->set_snapshot_time(md_->sb_.time_);
snap->set_mapped_blocks(o->get_mapped_blocks());
close_device(o);
}
void
thin_pool::write_changed_details()
{
for (auto it = thin_devices_.cbegin(); it != thin_devices_.cend(); ) {
uint64_t key[1] = {it->first};
thin::ptr td = it->second;
if (td->changed_) {
md_->details_->insert(key, td->details_);
td->changed_ = false;
}
if (!td->open_count_)
it = thin_devices_.erase(it);
else
++it;
}
}
//----------------------------------------------------------------
void
thin_provisioning::process_read(thin::ptr td, thin_pool::ptr tp,
sector_t offset)
{
block_address blocknr = base::div_up<sector_t>(offset, tp->get_data_block_size());
td->lookup(blocknr);
}
void
thin_provisioning::process_write(thin::ptr td, thin_pool::ptr tp,
sector_t offset)
{
block_address blocknr = base::div_up<sector_t>(offset, tp->get_data_block_size());
thin::maybe_address result = td->lookup(blocknr);
if (!!result && !result->shared_)
return;
// TODO: handle out-of-space errors
block_address data_block = tp->alloc_data_block();
td->insert(blocknr, data_block);
}
void
thin_provisioning::process_discard(thin::ptr td, thin_pool::ptr tp,
sector_t offset)
{
block_address blocknr = base::div_up<sector_t>(offset, tp->get_data_block_size());
thin::maybe_address result = td->lookup(blocknr);
if (!result)
return;
td->remove(blocknr);
if (!result->shared_)
tp->free_data_block(result->block_);
}
//----------------------------------------------------------------

View File

@ -33,8 +33,13 @@ namespace thin_provisioning {
class thin_pool;
class thin {
public:
struct lookup_result {
block_address block_;
bool shared_;
};
typedef std::shared_ptr<thin> ptr;
typedef boost::optional<mapping_tree_detail::block_time> maybe_address;
typedef boost::optional<lookup_result> maybe_address;
thin_dev_t get_dev_t() const;
maybe_address lookup(block_address thin_block);
@ -48,26 +53,38 @@ namespace thin_provisioning {
private:
friend class thin_pool;
thin(thin_dev_t dev, thin_pool *pool); // FIXME: pass a reference rather than a ptr
thin(thin_dev_t dev, thin_pool &pool);
thin(thin_dev_t dev, thin_pool &pool,
device_tree_detail::device_details const &details);
thin_dev_t dev_;
thin_pool *pool_;
thin_pool &pool_;
device_tree_detail::device_details details_;
uint32_t open_count_;
bool changed_;
};
class thin_pool {
public:
typedef std::shared_ptr<thin_pool> ptr;
thin_pool(metadata::ptr md);
thin_pool(block_manager::ptr bm);
thin_pool(block_manager::ptr bm,
sector_t data_block_size,
block_address nr_data_blocks);
~thin_pool();
void create_thin(thin_dev_t dev);
void create_snap(thin_dev_t dev, thin_dev_t origin);
void del(thin_dev_t);
void commit();
void set_transaction_id(uint64_t id);
uint64_t get_transaction_id() const;
// handling metadata snapshot
void reserve_metadata_snap();
void release_metadata_snap();
block_address get_metadata_snap() const;
block_address alloc_data_block();
@ -77,15 +94,29 @@ namespace thin_provisioning {
block_address get_nr_free_data_blocks() const;
sector_t get_data_block_size() const;
block_address get_data_dev_size() const;
uint32_t get_time() const;
thin::ptr open_thin(thin_dev_t);
void close_thin(thin::ptr td);
private:
friend class thin;
typedef std::map<thin_dev_t, thin::ptr> device_map;
bool device_exists(thin_dev_t dev) const;
thin::ptr create_device(thin_dev_t dev);
thin::ptr open_device(thin_dev_t dev);
void close_device(thin::ptr device);
void set_snapshot_details(thin::ptr snap, thin_dev_t origin);
void write_changed_details();
metadata::ptr md_;
device_map thin_devices_;
};
void process_read(thin::ptr td, thin_pool::ptr tp, sector_t offset);
void process_write(thin::ptr td, thin_pool::ptr tp, sector_t offset);
void process_discard(thin::ptr td, thin_pool::ptr tp, sector_t offset);
};
//----------------------------------------------------------------