[thin_journal_check] first code drop

This commit is contained in:
Joe Thornber 2018-08-28 13:12:20 +01:00
parent 9837feaee5
commit 70cdfe12a2
5 changed files with 894 additions and 0 deletions

View File

@ -118,6 +118,8 @@ SOURCE=\
thin-provisioning/xml_format.cc
DEVTOOLS_SOURCE=\
thin-provisioning/thin_journal.cc \
thin-provisioning/thin_journal_check.cc \
thin-provisioning/thin_ll_dump.cc \
thin-provisioning/thin_ll_restore.cc \
thin-provisioning/thin_show_duplicates.cc \

View File

@ -116,6 +116,13 @@ namespace thin_provisioning {
virtual void usage(std::ostream &out) const;
virtual int run(int argc, char **argv);
};
class thin_journal_cmd : public base::command {
public:
thin_journal_cmd();
virtual void usage(std::ostream &out) const;
virtual int run(int argc, char **argv);
};
#endif
void register_thin_commands(base::application &app);

View File

@ -0,0 +1,329 @@
// Copyright (C) 2018 Red Hat, Inc. All rights reserved.
//
// This file is part of the thin-provisioning-tools source.
//
// thin-provisioning-tools is free software: you can redistribute it
// and/or modify it under the terms of the GNU General Public License
// as published by the Free Software Foundation, either version 3 of
// the License, or (at your option) any later version.
//
// thin-provisioning-tools is distributed in the hope that it will be
// useful, but WITHOUT ANY WARRANTY; without even the implied warranty
// of MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
// GNU General Public License for more details.
//
// You should have received a copy of the GNU General Public License along
// with thin-provisioning-tools. If not, see
// <http://www.gnu.org/licenses/>.
#include "thin-provisioning/thin_journal.h"
#include <algorithm>
using namespace thin_provisioning;
using namespace persistent_data;
using namespace std;
//----------------------------------------------------------------
byte_stream::byte_stream(block_manager<JOURNAL_BLOCK_SIZE>::ptr bm)
: bm_(bm),
current_block_(0),
cursor_(0)
{
}
void
byte_stream::read_bytes(uint8_t *b, uint8_t *e)
{
while (b != e)
b += read_some_(b, e);
}
void
byte_stream::next_block_()
{
current_block_++;
}
size_t
byte_stream::read_some_(uint8_t *b, uint8_t *e)
{
if (cursor_ == JOURNAL_BLOCK_SIZE)
next_block_();
size_t len = min<uint64_t>(e - b, JOURNAL_BLOCK_SIZE - cursor_);
auto rr = bm_->read_lock(current_block_);
uint8_t const *data_begin = reinterpret_cast<uint8_t const *>(rr.data()) + cursor_;
memcpy(b, data_begin, len);
cursor_ += len;
return len;
}
//---------------------------------
journal_msg::journal_msg(bool success)
: success_(success)
{
}
block_msg::block_msg(bool success, uint64_t index)
: journal_msg(success), index_(index)
{
}
read_lock_msg::read_lock_msg(bool success, uint64_t index)
: block_msg(success, index)
{
}
void
read_lock_msg::visit(journal_visitor &v) const
{
v.visit(*this);
}
write_lock_msg::write_lock_msg(bool success, uint64_t index)
: block_msg(success, index)
{
}
void
write_lock_msg::visit(journal_visitor &v) const
{
v.visit(*this);
}
zero_lock_msg::zero_lock_msg(bool success, uint64_t index)
: block_msg(success, index)
{
}
void
zero_lock_msg::visit(journal_visitor &v) const
{
v.visit(*this);
}
try_read_lock_msg::try_read_lock_msg(bool success, uint64_t index)
: block_msg(success, index)
{
}
void
try_read_lock_msg::visit(journal_visitor &v) const
{
v.visit(*this);
}
unlock_msg::unlock_msg(bool success, uint64_t index, delta_list const &deltas)
: block_msg(success, index),
deltas_(deltas)
{
}
void
unlock_msg::visit(journal_visitor &v) const
{
v.visit(*this);
}
verify_msg::verify_msg(bool success, uint64_t index)
: block_msg(success, index)
{
}
void
verify_msg::visit(journal_visitor &v) const
{
v.visit(*this);
}
prepare_msg::prepare_msg(bool success, uint64_t index)
: block_msg(success, index)
{
}
void
prepare_msg::visit(journal_visitor &v) const
{
v.visit(*this);
}
flush_msg::flush_msg(bool success)
: journal_msg(success)
{
}
void
flush_msg::visit(journal_visitor &v) const
{
v.visit(*this);
}
flush_and_unlock_msg::flush_and_unlock_msg(bool success, uint64_t index, delta_list const &deltas)
: block_msg(success, index),
deltas_(deltas)
{
}
void
flush_and_unlock_msg::visit(journal_visitor &v) const
{
v.visit(*this);
}
prefetch_msg::prefetch_msg(bool success, uint64_t index)
: block_msg(success, index)
{
}
void
prefetch_msg::visit(journal_visitor &v) const
{
v.visit(*this);
}
set_read_only_msg::set_read_only_msg()
: journal_msg(true)
{
}
void
set_read_only_msg::visit(journal_visitor &v) const
{
v.visit(*this);
}
set_read_write_msg::set_read_write_msg()
: journal_msg(true)
{
}
void
set_read_write_msg::visit(journal_visitor &v) const
{
v.visit(*this);
}
//------------------------------------------
journal::journal(block_manager<JOURNAL_BLOCK_SIZE>::ptr bm)
: in_(bm)
{
}
void
journal::read_journal(struct journal_visitor &v)
{
while (read_one_(v))
;
}
bool
journal::read_one_(struct journal_visitor &v)
{
uint8_t header = read_<uint8_t>();
uint8_t t = header >> 1;
uint8_t success = header & 0x1;
uint64_t index;
switch (static_cast<msg_type>(t)) {
case MT_READ_LOCK:
index = read_<uint64_t>();
v.visit(read_lock_msg(success, index));
break;
case MT_WRITE_LOCK:
index = read_<uint64_t>();
v.visit(write_lock_msg(success, index));
break;
case MT_ZERO_LOCK:
index = read_<uint64_t>();
v.visit(zero_lock_msg(success, index));
break;
case MT_TRY_READ_LOCK:
index = read_<uint64_t>();
v.visit(try_read_lock_msg(success, index));
break;
case MT_UNLOCK: {
index = read_<uint64_t>();
auto deltas = read_deltas_();
v.visit(unlock_msg(success, index, deltas));
}
break;
case MT_VERIFY:
index = read_<uint64_t>();
v.visit(verify_msg(success, index));
break;
case MT_PREPARE:
index = read_<uint64_t>();
v.visit(prepare_msg(success, index));
break;
case MT_FLUSH:
v.visit(flush_msg(success));
break;
case MT_FLUSH_AND_UNLOCK: {
index = read_<uint64_t>();
auto deltas = read_deltas_();
v.visit(flush_and_unlock_msg(success, index, deltas));
}
break;
case MT_PREFETCH:
index = read_<uint64_t>();
v.visit(prefetch_msg(success, index));
break;
case MT_SET_READ_ONLY:
v.visit(set_read_only_msg());
break;
case MT_SET_READ_WRITE:
v.visit(set_read_write_msg());
break;
case MT_END_OF_JOURNAL:
return false;
}
return true;
}
bool
journal::read_delta_(delta_list &ds)
{
uint8_t chunk = read_<uint8_t>();
if (chunk == 0xff)
return false;
auto bytes = vector<uint8_t>(JOURNAL_CHUNK_SIZE, 0);
in_.read_bytes(bytes.data(), bytes.data() + JOURNAL_CHUNK_SIZE);
ds.push_back(delta(chunk, bytes));
return true;
}
thin_provisioning::delta_list
journal::read_deltas_()
{
delta_list ds;
while (read_delta_(ds))
;
return ds;
}
//----------------------------------------------------------------

View File

@ -0,0 +1,203 @@
// Copyright (C) 2018 Red Hat, Inc. All rights reserved.
//
// This file is part of the thin-provisioning-tools source.
//
// thin-provisioning-tools is free software: you can redistribute it
// and/or modify it under the terms of the GNU General Public License
// as published by the Free Software Foundation, either version 3 of
// the License, or (at your option) any later version.
//
// thin-provisioning-tools is distributed in the hope that it will be
// useful, but WITHOUT ANY WARRANTY; without even the implied warranty
// of MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
// GNU General Public License for more details.
//
// You should have received a copy of the GNU General Public License along
// with thin-provisioning-tools. If not, see
// <http://www.gnu.org/licenses/>.
#ifndef THIN_PROVISIONING_THIN_JOURNAL_H
#define THIN_PROVISIONING_THIN_JOURNAL_H
#include "persistent-data/block.h"
#include <vector>
//----------------------------------------------------------------
namespace thin_provisioning {
uint32_t const JOURNAL_BLOCK_SIZE = 256 * 1024;
uint32_t const JOURNAL_NR_CHUNKS = 32;
uint32_t const JOURNAL_CHUNK_SIZE = 4096 / JOURNAL_NR_CHUNKS;
class byte_stream {
public:
byte_stream(persistent_data::block_manager<JOURNAL_BLOCK_SIZE>::ptr bm);
void read_bytes(uint8_t *b, uint8_t *e);
private:
void next_block_();
size_t read_some_(uint8_t *b, uint8_t *e);
persistent_data::block_manager<JOURNAL_BLOCK_SIZE>::ptr bm_;
uint64_t current_block_;
uint64_t cursor_;
};
//---------------------------------
class journal_visitor;
struct journal_msg {
journal_msg(bool success);
virtual ~journal_msg() {}
virtual void visit(journal_visitor &v) const = 0;
bool success_;
};
struct block_msg : public journal_msg {
block_msg(bool success, uint64_t index);
uint64_t index_;
};
struct read_lock_msg : public block_msg {
read_lock_msg(bool success, uint64_t index);
virtual void visit(journal_visitor &v) const;
};
struct write_lock_msg : public block_msg {
write_lock_msg(bool success, uint64_t index);
virtual void visit(journal_visitor &v) const;
};
struct zero_lock_msg : public block_msg {
zero_lock_msg(bool success, uint64_t index);
virtual void visit(journal_visitor &v) const;
};
struct try_read_lock_msg : public block_msg {
try_read_lock_msg(bool success, uint64_t index);
virtual void visit(journal_visitor &v) const;
};
struct delta {
delta(uint32_t offset, std::vector<uint8_t> &bytes)
: offset_(offset),
bytes_(bytes) {
}
uint32_t offset_;
std::vector<uint8_t> bytes_;
};
using delta_list = std::vector<delta>;
struct unlock_msg : public block_msg {
unlock_msg(bool success, uint64_t index, delta_list const &deltas);
virtual void visit(journal_visitor &v) const;
delta_list deltas_;
};
struct verify_msg : public block_msg {
verify_msg(bool success, uint64_t index);
virtual void visit(journal_visitor &v) const;
};
struct prepare_msg : public block_msg {
prepare_msg(bool success, uint64_t index);
virtual void visit(journal_visitor &v) const;
};
struct flush_msg : public journal_msg {
flush_msg(bool success);
virtual void visit(journal_visitor &v) const;
};
struct flush_and_unlock_msg : public block_msg {
flush_and_unlock_msg(bool success, uint64_t index, delta_list const &deltas);
virtual void visit(journal_visitor &v) const;
delta_list deltas_;
};
struct prefetch_msg : public block_msg {
prefetch_msg(bool success, uint64_t index);
virtual void visit(journal_visitor &v) const;
};
struct set_read_only_msg : public journal_msg {
set_read_only_msg();
virtual void visit(journal_visitor &v) const;
};
struct set_read_write_msg : public journal_msg {
set_read_write_msg();
virtual void visit(journal_visitor &v) const;
};
struct journal_visitor {
public:
virtual ~journal_visitor() {};
void visit(journal_msg const &msg) {
msg.visit(*this);
}
virtual void visit(read_lock_msg const &msg) = 0;
virtual void visit(write_lock_msg const &msg) = 0;
virtual void visit(zero_lock_msg const &msg) = 0;
virtual void visit(try_read_lock_msg const &msg) = 0;
virtual void visit(unlock_msg const &msg) = 0;
virtual void visit(verify_msg const &msg) = 0;
virtual void visit(prepare_msg const &msg) = 0;
virtual void visit(flush_msg const &msg) = 0;
virtual void visit(flush_and_unlock_msg const &msg) = 0;
virtual void visit(prefetch_msg const &msg) = 0;
virtual void visit(set_read_only_msg const &msg) = 0;
virtual void visit(set_read_write_msg const &msg) = 0;
};
enum msg_type {
MT_READ_LOCK = 0,
MT_WRITE_LOCK,
MT_ZERO_LOCK,
MT_TRY_READ_LOCK,
MT_UNLOCK,
MT_VERIFY,
MT_PREPARE,
MT_FLUSH,
MT_FLUSH_AND_UNLOCK,
MT_PREFETCH,
MT_SET_READ_ONLY,
MT_SET_READ_WRITE,
MT_END_OF_JOURNAL,
};
class journal {
public:
journal(persistent_data::block_manager<JOURNAL_BLOCK_SIZE>::ptr bm);
void read_journal(struct journal_visitor &v);
private:
bool read_delta_(delta_list &ds);
delta_list read_deltas_();
bool read_one_(struct journal_visitor &v);
template <typename T> T read_() {
T r;
in_.read_bytes(reinterpret_cast<uint8_t *>(&r), reinterpret_cast<uint8_t *>(&r + 1));
return r;
}
byte_stream in_;
};
}
//----------------------------------------------------------------
#endif

View File

@ -0,0 +1,353 @@
// Copyright (C) 2018 Red Hat, Inc. All rights reserved.
//
// This file is part of the thin-provisioning-tools source.
//
// thin-provisioning-tools is free software: you can redistribute it
// and/or modify it under the terms of the GNU General Public License
// as published by the Free Software Foundation, either version 3 of
// the License, or (at your option) any later version.
//
// thin-provisioning-tools is distributed in the hope that it will be
// useful, but WITHOUT ANY WARRANTY; without even the implied warranty
// of MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
// GNU General Public License for more details.
//
// You should have received a copy of the GNU General Public License along
// with thin-provisioning-tools. If not, see
// <http://www.gnu.org/licenses/>.
#include <iostream>
#include <getopt.h>
#include <libgen.h>
#include <fcntl.h>
#include <boost/lexical_cast.hpp>
#include <boost/optional.hpp>
#include <map>
#include <set>
#include "version.h"
#include "base/application.h"
#include "base/error_state.h"
#include "base/file_utils.h"
#include "base/nested_output.h"
#include "persistent-data/data-structures/btree_counter.h"
#include "persistent-data/space-maps/core.h"
#include "persistent-data/space-maps/disk.h"
#include "persistent-data/file_utils.h"
#include "thin-provisioning/metadata.h"
#include "thin-provisioning/device_tree.h"
#include "thin-provisioning/mapping_tree.h"
#include "thin-provisioning/metadata_counter.h"
#include "thin-provisioning/superblock.h"
#include "thin-provisioning/commands.h"
#include "thin-provisioning/thin_journal.h"
using namespace base;
using namespace boost;
using namespace file_utils;
using namespace std;
using namespace thin_provisioning;
//----------------------------------------------------------------
namespace {
unsigned const MAX_HELD_LOCKS = 16;
// We use a temporary file to hold all the deltas. Assume the metadata initially starts zeroed.
// Need to introduce notion of 'time' that increments everytime a write lock is taken.
// Need to track updates to the superblock to define transactions.
class checker : public journal_visitor {
public:
checker(block_address &nr_metadata_blocks)
: bm_(new block_manager<>("metadata.tmp", nr_metadata_blocks, MAX_HELD_LOCKS, block_manager<>::CREATE)) {
}
virtual void visit(read_lock_msg const &msg) {
read_lock_(msg.index_);
}
virtual void visit(write_lock_msg const &msg) {
write_lock_(msg.index_);
}
virtual void visit(zero_lock_msg const &msg) {
write_lock_(msg.index_);
}
virtual void visit(try_read_lock_msg const &msg) {
read_lock_(msg.index_);
}
virtual void visit(unlock_msg const &msg) {
unlock_(msg.index_, msg.deltas_);
}
virtual void visit(verify_msg const &msg) {
// noop
}
virtual void visit(prepare_msg const &msg) {
// noop
}
virtual void visit(flush_msg const &msg) {
cerr << "spurious flush()\n";
}
virtual void visit(flush_and_unlock_msg const &msg) {
if (msg.index_ != superblock_detail::SUPERBLOCK_LOCATION) {
cerr << "flush_and_unlock received for block " << msg.index_
<< ", which isn't the superblock\n";
throw runtime_error("bad flush_and_unlock");
}
commit(msg.deltas_);
}
virtual void visit(prefetch_msg const &msg) {
// ignore
}
virtual void visit(set_read_only_msg const &msg) {
// ignore
}
virtual void visit(set_read_write_msg const &msg) {
// ignore
}
private:
void read_lock_(block_address b) {
if (write_locks_.count(b)) {
cerr << "read lock taken concurrently with write lock for block "
<< b << "\n";
throw runtime_error("bad read lock");
}
auto it = read_locks_.find(b);
if (it == read_locks_.end())
read_locks_.insert(make_pair(b, 1));
else
it->second++;
}
void write_lock_(block_address b) {
if (active_.count(b)) {
cerr << "write lock taken for block "
<< b
<< ", but it is still in the active transaction\n";
throw runtime_error("bad write lock");
}
if (write_locks_.count(b)) {
cerr << "write lock already held for block "
<< b
<< "\n";
throw runtime_error("bad write lock");
}
if (read_locks_.count(b)) {
cerr << "read lock requested for write locked block "
<< b << "\n";
throw runtime_error("bad write lock");
}
write_locks_.insert(b);
}
void unlock_(block_address b, delta_list const &deltas) {
if (write_locks_.count(b)) {
write_locks_.erase(b);
auto wr = bm_->write_lock(b);
for (auto &&d : deltas) {
uint8_t *data = static_cast<uint8_t *>(wr.data());
if (d.offset_ + d.bytes_.size() > 4096) {
cerr << "delta for block " << b << " is out of range ("
<< d.offset_ << ", " << d.offset_ + d.bytes_.size() << "]\n";
throw runtime_error("bad unlock");
}
memcpy(data + d.offset_, d.bytes_.data(), d.bytes_.size());
}
} else {
auto it = read_locks_.find(b);
if (it == read_locks_.end()) {
cerr << "unlock requested on block " << b << ", which isn't locked\n";
throw runtime_error("bad unlock");
}
if (deltas.size()) {
cerr << "unlocking a read lock for " << b << ", yet there are " << deltas.size() << " deltas\n";
throw runtime_error("bad unlock");
}
// Decrement lock
if (!it->second) {
cerr << "read lock entry has zero count (internal error)\n";
throw runtime_error("bad unlock");
}
if (!--it->second)
read_locks_.erase(it);
}
}
void commit(delta_list const &deltas) {
// At this point the only lock held should be the superblock,
// and that should be a write lock.
if (read_locks_.size()) {
cerr << "committing when the following read locks are still held:\n";
for (auto &&p : read_locks_)
cerr << p.first << "\n";
}
unlock_(superblock_detail::SUPERBLOCK_LOCATION, deltas);
if (write_locks_.size()) {
cerr << "commit() called, but the following write locks are held:\n";
for (auto &&b : write_locks_)
cerr << b << "\n";
}
build_active_set_();
}
void build_active_set_() {
using namespace thin_provisioning::superblock_detail;
superblock sb = read_superblock(bm_);
block_counter bc;
auto tm = open_tm(bm_, SUPERBLOCK_LOCATION);
auto sm = open_metadata_sm(*tm, &sb.metadata_space_map_root_);
tm->set_sm(sm);
// FIXME: check we don't have a space leak from a cycle between the sm and tm
count_metadata(tm, sb, bc);
active_.clear();
active_.insert(SUPERBLOCK_LOCATION);
for (auto &&p : bc.get_counts()) {
if (!p.second) {
cerr << "weird zero count for block " << p.first << "\n";
throw runtime_error("build_active_set() failed");
}
active_.insert(p.first);
}
}
typedef set<block_address> block_set;
typedef map<block_address, unsigned> block_map;
block_set active_;
block_set write_locks_;
block_map read_locks_;
block_manager<>::ptr bm_;
transaction_manager::ptr tm_;
};
struct flags {
flags()
: quiet(false) {
}
bool quiet;
};
void check(string const &path, block_address nr_metadata_blocks) {
block_address journal_size = get_file_length(path) / JOURNAL_BLOCK_SIZE;
block_manager<JOURNAL_BLOCK_SIZE>::ptr bm(
new block_manager<JOURNAL_BLOCK_SIZE>(path, journal_size, 4,
block_manager<JOURNAL_BLOCK_SIZE>::READ_ONLY));
journal j(bm);
checker c(nr_metadata_blocks);
j.read_journal(c);
}
}
//----------------------------------------------------------------
thin_journal_cmd::thin_journal_cmd()
: command("thin_journal_check")
{
}
void
thin_journal_cmd::usage(std::ostream &out) const
{
out << "Usage: " << get_name() << " [options] {device|file} {nr blocks}" << endl
<< "Options:\n"
<< " {-q|--quiet}\n"
<< " {-h|--help}\n"
<< " {-V|--version}\n";
}
int
thin_journal_cmd::run(int argc, char **argv)
{
int c;
flags fs;
char const shortopts[] = "qhV";
option const longopts[] = {
{ "quiet", no_argument, NULL, 'q'},
{ "help", no_argument, NULL, 'h'},
{ "version", no_argument, NULL, 'V'},
{ NULL, no_argument, NULL, 0 }
};
while ((c = getopt_long(argc, argv, shortopts, longopts, NULL)) != -1) {
switch(c) {
case 'h':
usage(cout);
return 0;
case 'q':
fs.quiet = true;
break;
case 'V':
cout << THIN_PROVISIONING_TOOLS_VERSION << endl;
return 0;
default:
usage(cerr);
return 1;
}
}
if (argc - optind != 2) {
if (!fs.quiet)
usage(cerr);
exit(1);
}
try {
check(argv[optind], lexical_cast<block_address>(argv[optind + 1]));
} catch (std::exception &e) {
cerr << e.what() << "\n";
return 1;
}
return 0;
}
//----------------------------------------------------------------