[thin_journal_check] Checks journal of block manager activity.

You need to apply doc/bm-journal.patch to create the journal.

thin_journal_check confirms that if the machine had crashed at any time
during the test run no metadata corruption would have occured.
This commit is contained in:
Joe Thornber
2018-09-24 14:51:46 +01:00
parent 70cdfe12a2
commit de7c9a5781
6 changed files with 1761 additions and 94 deletions

View File

@@ -54,6 +54,97 @@ using namespace thin_provisioning;
//----------------------------------------------------------------
namespace {
class journal_display : public journal_visitor {
public:
journal_display(journal_visitor &inner)
: inner_(inner) {
}
virtual void visit(open_journal_msg const &msg) {
cout << "open_journal\n";
inner_.visit(msg);
}
virtual void visit(close_journal_msg const &msg) {
cout << "close_journal\n";
inner_.visit(msg);
}
virtual void visit(read_lock_msg const &msg) {
if (interesting(msg.index_))
cout << "read_lock " << msg.index_ << "\n";
inner_.visit(msg);
}
virtual void visit(write_lock_msg const &msg) {
if (interesting(msg.index_))
cout << "write_lock " << msg.index_ << "\n";
inner_.visit(msg);
}
virtual void visit(zero_lock_msg const &msg) {
if (interesting(msg.index_))
cout << "zero_lock " << msg.index_ << "\n";
inner_.visit(msg);
}
virtual void visit(try_read_lock_msg const &msg) {
if (interesting(msg.index_))
cout << "try_read_lock " << msg.index_ << "\n";
inner_.visit(msg);
}
virtual void visit(unlock_msg const &msg) {
if (interesting(msg.index_))
cout << "unlock " << msg.index_ << "\n";
inner_.visit(msg);
}
virtual void visit(verify_msg const &msg) {
if (interesting(msg.index_))
cout << "verify " << msg.index_ << "\n";
inner_.visit(msg);
}
virtual void visit(prepare_msg const &msg) {
if (interesting(msg.index_))
cout << "prepare " << msg.index_ << "\n";
inner_.visit(msg);
}
virtual void visit(flush_msg const &msg) {
cout << "flush\n";
inner_.visit(msg);
}
virtual void visit(flush_and_unlock_msg const &msg) {
if (interesting(msg.index_))
cout << "flush_and_unlock " << msg.index_ << "\n";
inner_.visit(msg);
}
virtual void visit(prefetch_msg const &msg) {
if (interesting(msg.index_))
cout << "prefetch " << msg.index_ << "\n";
inner_.visit(msg);
}
virtual void visit(set_read_only_msg const &msg) {
cout << "set_read_only\n";
inner_.visit(msg);
}
virtual void visit(set_read_write_msg const &msg) {
cout << "set_read_write\n";
inner_.visit(msg);
}
bool interesting(block_address b) const {
return true;
}
journal_visitor &inner_;
};
unsigned const MAX_HELD_LOCKS = 16;
@@ -62,28 +153,44 @@ namespace {
// Need to track updates to the superblock to define transactions.
class checker : public journal_visitor {
public:
checker(block_address &nr_metadata_blocks)
: bm_(new block_manager<>("metadata.tmp", nr_metadata_blocks, MAX_HELD_LOCKS, block_manager<>::CREATE)) {
virtual void visit(open_journal_msg const &msg) {
bm_.reset(new block_manager<>("metadata.tmp", msg.nr_metadata_blocks_,
MAX_HELD_LOCKS, block_manager<>::CREATE));
}
virtual void visit(close_journal_msg const &msg) {
// noop
}
virtual void visit(read_lock_msg const &msg) {
read_lock_(msg.index_);
if (msg.success_)
read_lock_(msg.index_);
}
virtual void visit(write_lock_msg const &msg) {
write_lock_(msg.index_);
if (msg.success_)
write_lock_(msg.index_);
}
virtual void visit(zero_lock_msg const &msg) {
write_lock_(msg.index_);
if (msg.success_) {
write_lock_(msg.index_);
zero_(msg.index_);
}
}
virtual void visit(try_read_lock_msg const &msg) {
read_lock_(msg.index_);
if (msg.success_)
read_lock_(msg.index_);
}
virtual void visit(unlock_msg const &msg) {
bool write_locked = is_write_locked_(msg.index_);
unlock_(msg.index_, msg.deltas_);
if (write_locked && msg.index_ == superblock_detail::SUPERBLOCK_LOCATION)
commit_();
}
virtual void visit(verify_msg const &msg) {
@@ -95,17 +202,17 @@ namespace {
}
virtual void visit(flush_msg const &msg) {
cerr << "spurious flush()\n";
cout << "WARN: spurious flush()\n";
}
virtual void visit(flush_and_unlock_msg const &msg) {
if (msg.index_ != superblock_detail::SUPERBLOCK_LOCATION) {
cerr << "flush_and_unlock received for block " << msg.index_
cout << "ERROR: flush_and_unlock received for block " << msg.index_
<< ", which isn't the superblock\n";
throw runtime_error("bad flush_and_unlock");
}
commit(msg.deltas_);
unlock_(msg.index_, msg.deltas_);
commit_();
}
virtual void visit(prefetch_msg const &msg) {
@@ -122,55 +229,68 @@ namespace {
private:
void read_lock_(block_address b) {
if (write_locks_.count(b)) {
cerr << "read lock taken concurrently with write lock for block "
<< b << "\n";
throw runtime_error("bad read lock");
}
auto it = locks_.find(b);
if (it == locks_.end())
locks_.insert(make_pair(b, -1));
auto it = read_locks_.find(b);
if (it == read_locks_.end())
read_locks_.insert(make_pair(b, 1));
else
it->second++;
else if (it->second > 0) {
cout << "WARN: read lock taken concurrently with write lock for block "
<< b << "\n";
} else
--it->second;
}
void write_lock_(block_address b) {
if (active_.count(b)) {
cerr << "write lock taken for block "
if (is_superblock_(b)) {
if (locks_.size())
cout << "WARN: superblock taken when locks still held\n";
} else if (active_.count(b)) {
cout << "ERROR: write lock taken for block "
<< b
<< ", but it is still in the active transaction\n";
throw runtime_error("bad write lock");
throw runtime_error("bad write_lock");
}
if (write_locks_.count(b)) {
cerr << "write lock already held for block "
<< b
<< "\n";
throw runtime_error("bad write lock");
}
auto it = locks_.find(b);
if (it == locks_.end())
locks_.insert(make_pair(b, 1));
if (read_locks_.count(b)) {
cerr << "read lock requested for write locked block "
else if (it->second < 0) {
cout << "WARN: write lock requested for read locked block "
<< b << "\n";
throw runtime_error("bad write lock");
}
write_locks_.insert(b);
} else
it->second++;
}
bool is_write_locked_(block_address b) const {
auto it = locks_.find(b);
return it != locks_.end() && it->second > 0;
}
void unlock_(block_address b, delta_list const &deltas) {
if (write_locks_.count(b)) {
write_locks_.erase(b);
auto it = locks_.find(b);
if (it == locks_.end() || !it->second) {
cout << "ERROR: unlock requested on block " << b << ", which isn't locked\n";
throw runtime_error("bad unlock");
}
if (it->second < 0) {
it->second++;
if (deltas.size()) {
cout << "ERROR: unlocking a read lock for " << b << ", yet there are " << deltas.size() << " deltas\n";
throw runtime_error("bad unlock");
}
} else {
auto wr = bm_->write_lock(b);
for (auto &&d : deltas) {
uint8_t *data = static_cast<uint8_t *>(wr.data());
if (d.offset_ + d.bytes_.size() > 4096) {
cerr << "delta for block " << b << " is out of range ("
cout << "ERROR: delta for block " << b << " is out of range ("
<< d.offset_ << ", " << d.offset_ + d.bytes_.size() << "]\n";
throw runtime_error("bad unlock");
}
@@ -178,46 +298,28 @@ namespace {
memcpy(data + d.offset_, d.bytes_.data(), d.bytes_.size());
}
} else {
auto it = read_locks_.find(b);
if (it == read_locks_.end()) {
cerr << "unlock requested on block " << b << ", which isn't locked\n";
throw runtime_error("bad unlock");
}
if (deltas.size()) {
cerr << "unlocking a read lock for " << b << ", yet there are " << deltas.size() << " deltas\n";
throw runtime_error("bad unlock");
}
// Decrement lock
if (!it->second) {
cerr << "read lock entry has zero count (internal error)\n";
throw runtime_error("bad unlock");
}
if (!--it->second)
read_locks_.erase(it);
it->second--;
}
if (!it->second)
locks_.erase(it);
}
void commit(delta_list const &deltas) {
void zero_(block_address b) {
auto wr = bm_->write_lock_zero(b);
}
void commit_() {
using namespace thin_provisioning::superblock_detail;
// At this point the only lock held should be the superblock,
// and that should be a write lock.
if (read_locks_.size()) {
cerr << "committing when the following read locks are still held:\n";
for (auto &&p : read_locks_)
cerr << p.first << "\n";
}
unlock_(superblock_detail::SUPERBLOCK_LOCATION, deltas);
if (write_locks_.size()) {
cerr << "commit() called, but the following write locks are held:\n";
for (auto &&b : write_locks_)
cerr << b << "\n";
if (locks_.size() != 0) {
cout << "ERROR: committing when the following locks are still held:\n";
for (auto &&p : locks_)
if (p.first != SUPERBLOCK_LOCATION)
cerr << p.first << "\n";
throw runtime_error("bad commit");
}
build_active_set_();
@@ -226,6 +328,7 @@ namespace {
void build_active_set_() {
using namespace thin_provisioning::superblock_detail;
cerr << "build active set\n";
superblock sb = read_superblock(bm_);
block_counter bc;
@@ -242,20 +345,24 @@ namespace {
for (auto &&p : bc.get_counts()) {
if (!p.second) {
cerr << "weird zero count for block " << p.first << "\n";
throw runtime_error("build_active_set() failed");
cout << "weird zero count for block " << p.first << "\n";
}
active_.insert(p.first);
}
}
bool is_superblock_(block_address b) const {
return b == superblock_detail::SUPERBLOCK_LOCATION;
}
typedef set<block_address> block_set;
typedef map<block_address, unsigned> block_map;
// write locks positive, unlocked 0, read locks negative
typedef map<block_address, int> block_map;
block_set active_;
block_set write_locks_;
block_map read_locks_;
block_map locks_;
block_manager<>::ptr bm_;
transaction_manager::ptr tm_;
@@ -269,13 +376,14 @@ namespace {
bool quiet;
};
void check(string const &path, block_address nr_metadata_blocks) {
void check(string const &path) {
block_address journal_size = get_file_length(path) / JOURNAL_BLOCK_SIZE;
block_manager<JOURNAL_BLOCK_SIZE>::ptr bm(
new block_manager<JOURNAL_BLOCK_SIZE>(path, journal_size, 4,
block_manager<JOURNAL_BLOCK_SIZE>::READ_ONLY));
journal j(bm);
checker c(nr_metadata_blocks);
checker c;
journal_display dc(c);
j.read_journal(c);
}
@@ -291,7 +399,7 @@ thin_journal_cmd::thin_journal_cmd()
void
thin_journal_cmd::usage(std::ostream &out) const
{
out << "Usage: " << get_name() << " [options] {device|file} {nr blocks}" << endl
out << "Usage: " << get_name() << " [options] {device|file}" << endl
<< "Options:\n"
<< " {-q|--quiet}\n"
<< " {-h|--help}\n"
@@ -332,7 +440,7 @@ thin_journal_cmd::run(int argc, char **argv)
}
}
if (argc - optind != 2) {
if (argc - optind != 1) {
if (!fs.quiet)
usage(cerr);
@@ -340,7 +448,7 @@ thin_journal_cmd::run(int argc, char **argv)
}
try {
check(argv[optind], lexical_cast<block_address>(argv[optind + 1]));
check(argv[optind]);
} catch (std::exception &e) {
cerr << e.what() << "\n";