This commit is contained in:
Joe Thornber 2014-07-28 14:13:28 +01:00
parent ab6d31f136
commit 840be1b6c9
4 changed files with 155 additions and 203 deletions

View File

@ -11,6 +11,7 @@
#include <iostream> #include <iostream>
#include <stdexcept> #include <stdexcept>
#include <sstream>
//---------------------------------------------------------------- //----------------------------------------------------------------
@ -126,14 +127,14 @@ namespace bcache {
block_cache::complete_io(block &b, int result) block_cache::complete_io(block &b, int result)
{ {
b.error_ = result; b.error_ = result;
clear_flags(b, IO_PENDING); b.clear_flags(BF_IO_PENDING);
nr_io_pending_--; nr_io_pending_--;
if (b.error_) if (b.error_)
list_move_tail(&b.list_, &errored_); list_move_tail(&b.list_, &errored_);
else { else {
if (test_flags(b, DIRTY)) { if (b.test_flags(BF_DIRTY)) {
clear_flags(b, DIRTY); b.clear_flags(BF_DIRTY | BF_PREVIOUSLY_DIRTY);
nr_dirty_--; nr_dirty_--;
} }
@ -152,8 +153,8 @@ namespace bcache {
iocb *control_blocks[1]; iocb *control_blocks[1];
// FIXME: put this back in // FIXME: put this back in
assert(!test_flags(b, IO_PENDING)); assert(!b.test_flags(BF_IO_PENDING));
set_flags(b, IO_PENDING); b.set_flags(BF_IO_PENDING);
nr_io_pending_++; nr_io_pending_++;
list_move_tail(&b.list_, &io_pending_); list_move_tail(&b.list_, &io_pending_);
@ -177,14 +178,14 @@ namespace bcache {
int int
block_cache::issue_read(block &b) block_cache::issue_read(block &b)
{ {
assert(!test_flags(b, IO_PENDING)); assert(!b.test_flags(BF_IO_PENDING));
return issue_low_level(b, IO_CMD_PREAD, "read"); return issue_low_level(b, IO_CMD_PREAD, "read");
} }
int int
block_cache::issue_write(block &b) block_cache::issue_write(block &b)
{ {
assert(!test_flags(b, IO_PENDING)); assert(!b.test_flags(BF_IO_PENDING));
b.v_->prepare(b.data_, b.index_); b.v_->prepare(b.data_, b.index_);
return issue_low_level(b, IO_CMD_PWRITE, "write"); return issue_low_level(b, IO_CMD_PWRITE, "write");
} }
@ -213,7 +214,8 @@ namespace bcache {
complete_io(*b, e.res); complete_io(*b, e.res);
else else
info("incomplete io, unexpected: %d\n", r); info("incomplete io for block %llu, unexpected: %d\n",
b->index_, e.res);
} }
} }
@ -231,7 +233,7 @@ namespace bcache {
if (b.error_) if (b.error_)
return &errored_; return &errored_;
return (b.flags_ & DIRTY) ? &dirty_ : &clean_; return b.test_flags(BF_DIRTY) ? &dirty_ : &clean_;
} }
void void
@ -253,7 +255,7 @@ namespace bcache {
void void
block_cache::wait_specific(block &b) block_cache::wait_specific(block &b)
{ {
while (test_flags(b, IO_PENDING)) while (b.test_flags(BF_IO_PENDING))
wait_io(); wait_io();
} }
@ -262,12 +264,16 @@ namespace bcache {
{ {
int r; int r;
block *b, *tmp; block *b, *tmp;
unsigned actual = 0; unsigned actual = 0, dirty_length = 0;
list_for_each_entry_safe (b, tmp, &dirty_, list_) { list_for_each_entry_safe (b, tmp, &dirty_, list_) {
dirty_length++;
if (actual == count) if (actual == count)
break; break;
// The block may be on the dirty list from a prior
// acquisition.
if (b->ref_count_) if (b->ref_count_)
continue; continue;
@ -276,7 +282,7 @@ namespace bcache {
actual++; actual++;
} }
info("writeback: requested %u, actual %u\n", count, actual); info("writeback: requested %u, actual %u, dirty length %u\n", count, actual, dirty_length);
return actual; return actual;
} }
@ -377,7 +383,7 @@ namespace bcache {
b->ref_count_ = 0; b->ref_count_ = 0;
b->error_ = 0; b->error_ = 0;
clear_flags(*b, IO_PENDING | DIRTY); b->flags_ = 0;
b->index_ = index; b->index_ = index;
setup_control_block(*b); setup_control_block(*b);
@ -391,16 +397,6 @@ namespace bcache {
/*---------------------------------------------------------------- /*----------------------------------------------------------------
* Block reference counting * Block reference counting
*--------------------------------------------------------------*/ *--------------------------------------------------------------*/
void
block_cache::mark_dirty(block &b)
{
if (!test_flags(b, DIRTY)) {
set_flags(b, DIRTY);
list_move_tail(&b.list_, &dirty_);
nr_dirty_++;
}
}
unsigned unsigned
block_cache::calc_nr_cache_blocks(size_t mem, sector_t block_size) block_cache::calc_nr_cache_blocks(size_t mem, sector_t block_size)
{ {
@ -451,8 +447,11 @@ namespace bcache {
aio_context_ = 0; /* needed or io_setup will fail */ aio_context_ = 0; /* needed or io_setup will fail */
r = io_setup(nr_cache_blocks, &aio_context_); r = io_setup(nr_cache_blocks, &aio_context_);
if (r < 0) if (r < 0) {
std::cerr << "r = " << r << "\n";
perror("io_setup failed");
throw std::runtime_error("io_setup failed"); throw std::runtime_error("io_setup failed");
}
hash_init(nr_buckets); hash_init(nr_buckets);
INIT_LIST_HEAD(&free_); INIT_LIST_HEAD(&free_);
@ -485,7 +484,7 @@ namespace bcache {
block_cache::zero_block(block &b) block_cache::zero_block(block &b)
{ {
memset(b.data_, 0, block_size_ << SECTOR_SHIFT); memset(b.data_, 0, block_size_ << SECTOR_SHIFT);
mark_dirty(b); b.mark_dirty();
} }
block_cache::block * block_cache::block *
@ -495,7 +494,7 @@ namespace bcache {
block *b = hash_lookup(index); block *b = hash_lookup(index);
if (b) { if (b) {
if (test_flags(*b, IO_PENDING)) if (b->test_flags(BF_IO_PENDING))
wait_specific(*b); wait_specific(*b);
if (flags & GF_ZERO) if (flags & GF_ZERO)
@ -503,24 +502,22 @@ namespace bcache {
else { else {
if (b->v_.get() && if (b->v_.get() &&
b->v_.get() != v.get() && b->v_.get() != v.get() &&
test_flags(*b, DIRTY)) b->test_flags(BF_DIRTY))
b->v_->prepare(b->data_, b->index_); b->v_->prepare(b->data_, b->index_);
} }
b->v_ = v; b->v_ = v;
} else { } else {
if (flags & GF_CAN_BLOCK) { b = new_block(index);
b = new_block(index); if (b) {
if (b) { b->v_ = v;
b->v_ = v;
if (flags & GF_ZERO) if (flags & GF_ZERO)
zero_block(*b); zero_block(*b);
else { else {
issue_read(*b); issue_read(*b);
wait_specific(*b); wait_specific(*b);
v->check(b->data_, b->index_); v->check(b->data_, b->index_);
}
} }
} }
} }
@ -531,12 +528,23 @@ namespace bcache {
block_cache::block & block_cache::block &
block_cache::get(block_address index, unsigned flags, validator::ptr v) block_cache::get(block_address index, unsigned flags, validator::ptr v)
{ {
check_index(index);
block *b = lookup_or_read_block(index, flags, v); block *b = lookup_or_read_block(index, flags, v);
if (b) { if (b) {
if (b->ref_count_)
throw std::runtime_error("block already locked");
hit(*b); hit(*b);
b->ref_count_++; b->ref_count_++;
if (flags & GF_BARRIER)
b->set_flags(BF_FLUSH);
if (flags & GF_DIRTY)
b->set_flags(BF_DIRTY);
return *b; return *b;
} }
@ -544,20 +552,39 @@ namespace bcache {
} }
void void
block_cache::put(block_cache::block &b, unsigned flags) block_cache::preemptive_writeback()
{ {
if (b.ref_count_ == 0) unsigned nr_available = nr_cache_blocks_ - (nr_dirty_ - nr_io_pending_);
throw std::runtime_error("bad put"); if (nr_available < (WRITEBACK_LOW_THRESHOLD_PERCENT * nr_cache_blocks_ / 100))
writeback((WRITEBACK_HIGH_THRESHOLD_PERCENT * nr_cache_blocks_ / 100) - nr_available);
b.ref_count_--; }
if (flags & PF_DIRTY) { void
mark_dirty(b); block_cache::release(block_cache::block &b)
{
assert(!b.ref_count_);
// FIXME: factor out #if 0
unsigned nr_available = nr_cache_blocks_ - (nr_dirty_ - nr_io_pending_); if (b.test_flags(BF_FLUSH))
if (nr_available < (WRITEBACK_LOW_THRESHOLD_PERCENT * nr_cache_blocks_ / 100)) flush();
writeback((WRITEBACK_HIGH_THRESHOLD_PERCENT * nr_cache_blocks_ / 100) - nr_available); #endif
if (b.test_flags(BF_DIRTY)) {
if (!b.test_flags(BF_PREVIOUSLY_DIRTY)) {
list_move_tail(&b.list_, &dirty_);
nr_dirty_++;
b.set_flags(BF_PREVIOUSLY_DIRTY);
}
#if 0
if (b.test_flags(BF_FLUSH))
flush();
else
#endif
preemptive_writeback();
b.clear_flags(BF_FLUSH);
} }
} }
@ -567,7 +594,7 @@ namespace bcache {
block *b, *tmp; block *b, *tmp;
list_for_each_entry_safe (b, tmp, &dirty_, list_) { list_for_each_entry_safe (b, tmp, &dirty_, list_) {
if (b->ref_count_ || test_flags(*b, IO_PENDING)) if (b->ref_count_ || b->test_flags(BF_IO_PENDING))
// The superblock may well be still locked. // The superblock may well be still locked.
continue; continue;
@ -582,6 +609,8 @@ namespace bcache {
void void
block_cache::prefetch(block_address index) block_cache::prefetch(block_address index)
{ {
check_index(index);
block *b = hash_lookup(index); block *b = hash_lookup(index);
if (!b) { if (!b) {
@ -591,24 +620,15 @@ namespace bcache {
} }
} }
//--------------------------------
unsigned
block_cache::test_flags(block &b, unsigned flags)
{
return b.flags_ & flags;
}
void void
block_cache::clear_flags(block &b, unsigned flags) block_cache::check_index(block_address index) const
{ {
b.flags_ &= ~flags; if (index >= nr_data_blocks_) {
} std::ostringstream out;
out << "block out of bounds ("
void << index << " >= " << nr_data_blocks_ << ")\n";
block_cache::set_flags(block &b, unsigned flags) throw std::runtime_error(out.str());
{ }
b.flags_ |= flags;
} }
} }

View File

@ -6,6 +6,7 @@
#include <boost/shared_ptr.hpp> #include <boost/shared_ptr.hpp>
#include <boost/noncopyable.hpp> #include <boost/noncopyable.hpp>
#include <stdexcept>
#include <libaio.h> #include <libaio.h>
#include <memory> #include <memory>
#include <stdint.h> #include <stdint.h>
@ -36,12 +37,13 @@ namespace bcache {
//---------------------------------------------------------------- //----------------------------------------------------------------
// FIXME: throw exceptions rather than returning errors
class block_cache : private boost::noncopyable { class block_cache : private boost::noncopyable {
public: public:
enum block_flags { enum block_flags {
IO_PENDING = (1 << 0), BF_IO_PENDING = (1 << 0),
DIRTY = (1 << 1) BF_DIRTY = (1 << 1),
BF_FLUSH = (1 << 2),
BF_PREVIOUSLY_DIRTY = (1 << 3)
}; };
class block : private boost::noncopyable { class block : private boost::noncopyable {
@ -58,16 +60,49 @@ namespace bcache {
return data_; return data_;
} }
void mark_dirty() {
flags_ |= BF_DIRTY;
}
void mark_flush() {
flags_ |= BF_FLUSH;
}
void set_flags(unsigned flags) {
flags_ |= flags;
}
unsigned test_flags(unsigned flags) const {
return flags_ & flags;
}
void clear_flags(unsigned flags) {
flags_ &= ~flags;
}
void get() {
ref_count_++;
};
void put() {
if (!ref_count_)
throw std::runtime_error("bad put");
if (!--ref_count_)
bc_->release(*this);
}
private: private:
friend class block_cache; friend class block_cache;
block_cache *bc_;
uint64_t index_; uint64_t index_;
void *data_; void *data_;
list_head list_; list_head list_;
list_head hash_list_; list_head hash_list_;
block_cache *bc_;
unsigned ref_count_; unsigned ref_count_;
int error_; int error_;
@ -87,18 +122,12 @@ namespace bcache {
enum get_flags { enum get_flags {
GF_ZERO = (1 << 0), GF_ZERO = (1 << 0),
GF_CAN_BLOCK = (1 << 1) GF_DIRTY = (1 << 1),
GF_BARRIER = (1 << 1)
}; };
// FIXME: what if !GF_CAN_BLOCK?
block_cache::block &get(block_address index, unsigned flags, validator::ptr v); block_cache::block &get(block_address index, unsigned flags, validator::ptr v);
enum put_flags {
PF_DIRTY = (1 << 0),
};
void put(block_cache::block &block, unsigned flags);
/* /*
* Flush can fail if an earlier write failed. You do not know which block * Flush can fail if an earlier write failed. You do not know which block
* failed. Make sure you build your recovery with this in mind. * failed. Make sure you build your recovery with this in mind.
@ -131,9 +160,10 @@ namespace bcache {
unsigned calc_nr_buckets(unsigned nr_blocks); unsigned calc_nr_buckets(unsigned nr_blocks);
void zero_block(block &b); void zero_block(block &b);
block *lookup_or_read_block(block_address index, unsigned flags, validator::ptr v); block *lookup_or_read_block(block_address index, unsigned flags, validator::ptr v);
unsigned test_flags(block &b, unsigned flags);
void clear_flags(block &b, unsigned flags); void preemptive_writeback();
void set_flags(block &b, unsigned flags); void release(block_cache::block &block);
void check_index(block_address index) const;
//-------------------------------- //--------------------------------
@ -169,60 +199,6 @@ namespace bcache {
unsigned mask_; unsigned mask_;
std::vector<list_head> buckets_; std::vector<list_head> buckets_;
}; };
#if 0
class auto_lock {
public:
auto_lock(block_cache &bc, block_address index, bool zero, validator::ptr v, unsigned put_flags)
: bc_(bc),
b_(bc.get(index, (zero ? block_cache::GF_ZERO : 0) | block_cache::GF_CAN_BLOCK, v)),
put_flags_(put_flags),
holders_(new unsigned) {
*holders_ = 1;
}
virtual ~auto_lock() {
bc_.put(b_, put_flags_);
}
auto_lock operator =(auto_lock const &rhs) {
if (this != &rhs) {
bc_ = rhs.bc_;
void const *data() const {
return b_.get_data();
}
private:
block_cache &bc_;
block_cache::block &b_;
unsigned put_flags_;
unsigned *holders_;
};
class auto_read_lock : public auto_lock {
public:
auto_read_lock(block_cache &bc, block_address index, bool zero, validator::ptr v)
: auto_lock(bc, index, zero, v, 0) {
}
using auto_lock::data();
};
class auto_write_lock : public auto_lock {
public:
auto_write_lock(block_cache &bc, block_address index, bool zero, validator::ptr v)
: auto_lock(bc, index, zero, v, block_cache::DIRTY) {
}
using auto_lock::data();
void *data() {
return b_.get_data();
}
};
#endif
} }
//---------------------------------------------------------------- //----------------------------------------------------------------

View File

@ -55,15 +55,11 @@ namespace persistent_data {
unsigned max_concurrent_locks, unsigned max_concurrent_locks,
mode m); mode m);
typedef void (*put_behaviour_fn)(block_cache &, block_cache::block &);
class read_ref { class read_ref {
public: public:
static uint32_t const BLOCK_SIZE = BlockSize; static uint32_t const BLOCK_SIZE = BlockSize;
read_ref(block_cache &bc, read_ref(block_cache::block &b);
block_cache::block &b,
put_behaviour_fn fn);
read_ref(read_ref const &rhs); read_ref(read_ref const &rhs);
virtual ~read_ref(); virtual ~read_ref();
@ -74,19 +70,14 @@ namespace persistent_data {
void const *data() const; void const *data() const;
protected: protected:
block_cache &bc_;
block_cache::block &b_; block_cache::block &b_;
put_behaviour_fn fn_;
unsigned *holders_;
}; };
// Inherited from read_ref, since you can read a block that's write // Inherited from read_ref, since you can read a block that's write
// locked. // locked.
class write_ref : public read_ref { class write_ref : public read_ref {
public: public:
write_ref(block_cache &bc, write_ref(block_cache::block &b);
block_cache::block &b,
put_behaviour_fn fn);
using read_ref::data; using read_ref::data;
void *data(); void *data();
@ -94,9 +85,7 @@ namespace persistent_data {
class super_ref : public write_ref { class super_ref : public write_ref {
public: public:
super_ref(block_cache &bc, super_ref(block_cache::block &b);
block_cache::block &b,
put_behaviour_fn fn);
using read_ref::data; using read_ref::data;
using write_ref::data; using write_ref::data;

View File

@ -104,50 +104,23 @@ namespace {
}; };
namespace persistent_data { namespace persistent_data {
inline void read_put(block_cache &bc, block_cache::block &b) {
bc.put(b, 0);
}
inline void write_put(block_cache &bc, block_cache::block &b) {
bc.put(b, block_cache::PF_DIRTY);
}
inline void super_put(block_cache &bc, block_cache::block &b) {
bc.flush();
bc.put(b, block_cache::PF_DIRTY);
bc.flush();
}
template <uint32_t BlockSize> template <uint32_t BlockSize>
block_manager<BlockSize>::read_ref::read_ref(block_cache &bc, block_manager<BlockSize>::read_ref::read_ref(block_cache::block &b)
block_cache::block &b, : b_(b)
put_behaviour_fn fn)
: bc_(bc),
b_(b),
fn_(fn),
holders_(new unsigned)
{ {
*holders_ = 1;
} }
template <uint32_t BlockSize> template <uint32_t BlockSize>
block_manager<BlockSize>::read_ref::read_ref(read_ref const &rhs) block_manager<BlockSize>::read_ref::read_ref(read_ref const &rhs)
: bc_(rhs.bc_), : b_(rhs.b_)
b_(rhs.b_),
fn_(rhs.fn_),
holders_(rhs.holders_)
{ {
(*holders_)++; b_.get();
} }
template <uint32_t BlockSize> template <uint32_t BlockSize>
block_manager<BlockSize>::read_ref::~read_ref() block_manager<BlockSize>::read_ref::~read_ref()
{ {
if (!--(*holders_)) { b_.put();
fn_(bc_, b_);
delete holders_;
}
} }
template <uint32_t BlockSize> template <uint32_t BlockSize>
@ -155,11 +128,8 @@ namespace persistent_data {
block_manager<BlockSize>::read_ref::operator =(read_ref const &rhs) block_manager<BlockSize>::read_ref::operator =(read_ref const &rhs)
{ {
if (this != &rhs) { if (this != &rhs) {
bc_ = rhs.bc_;
b_ = rhs.b_; b_ = rhs.b_;
fn_ = rhs.fn_; b_.get();
holders_ = rhs.holders_;
(*holders_)++;
} }
return *this; return *this;
@ -182,10 +152,8 @@ namespace persistent_data {
//-------------------------------- //--------------------------------
template <uint32_t BlockSize> template <uint32_t BlockSize>
block_manager<BlockSize>::write_ref::write_ref(block_cache &bc, block_manager<BlockSize>::write_ref::write_ref(block_cache::block &b)
block_cache::block &b, : read_ref(b)
put_behaviour_fn fn)
: read_ref(bc, b, fn)
{ {
} }
@ -199,10 +167,9 @@ namespace persistent_data {
//-------------------------------- //--------------------------------
template <uint32_t BlockSize> template <uint32_t BlockSize>
block_manager<BlockSize>::super_ref::super_ref(block_cache &bc, block_manager<BlockSize>::super_ref::super_ref(block_cache::block &b)
block_cache::block &b, : write_ref(b)
put_behaviour_fn fn) {
: write_ref(bc, b, fn) {
} }
//---------------------------------------------------------------- //----------------------------------------------------------------
@ -213,7 +180,7 @@ namespace persistent_data {
unsigned max_concurrent_blocks, unsigned max_concurrent_blocks,
mode m) mode m)
: fd_(open_block_file(path, nr_blocks * BlockSize, m == READ_WRITE)), : fd_(open_block_file(path, nr_blocks * BlockSize, m == READ_WRITE)),
bc_(fd_, BlockSize >> SECTOR_SHIFT, nr_blocks, 1024u * 1024u * 256) bc_(fd_, BlockSize >> SECTOR_SHIFT, nr_blocks, 1024u * 1024u * 16)
{ {
} }
@ -222,8 +189,8 @@ namespace persistent_data {
block_manager<BlockSize>::read_lock(block_address location, block_manager<BlockSize>::read_lock(block_address location,
typename bcache::validator::ptr v) const typename bcache::validator::ptr v) const
{ {
block_cache::block &b = bc_.get(location, block_cache::GF_CAN_BLOCK, v); block_cache::block &b = bc_.get(location, 0, v);
return read_ref(bc_, b, read_put); return read_ref(b);
} }
template <uint32_t BlockSize> template <uint32_t BlockSize>
@ -231,8 +198,8 @@ namespace persistent_data {
block_manager<BlockSize>::write_lock(block_address location, block_manager<BlockSize>::write_lock(block_address location,
typename bcache::validator::ptr v) typename bcache::validator::ptr v)
{ {
block_cache::block &b = bc_.get(location, block_cache::GF_CAN_BLOCK, v); block_cache::block &b = bc_.get(location, block_cache::GF_DIRTY, v);
return write_ref(bc_, b, write_put); return write_ref(b);
} }
template <uint32_t BlockSize> template <uint32_t BlockSize>
@ -240,8 +207,8 @@ namespace persistent_data {
block_manager<BlockSize>::write_lock_zero(block_address location, block_manager<BlockSize>::write_lock_zero(block_address location,
typename bcache::validator::ptr v) typename bcache::validator::ptr v)
{ {
block_cache::block &b = bc_.get(location, block_cache::GF_CAN_BLOCK | block_cache::GF_ZERO, v); block_cache::block &b = bc_.get(location, block_cache::GF_ZERO, v);
return write_ref(bc_, b, write_put); return write_ref(b);
} }
template <uint32_t BlockSize> template <uint32_t BlockSize>
@ -249,8 +216,8 @@ namespace persistent_data {
block_manager<BlockSize>::superblock(block_address location, block_manager<BlockSize>::superblock(block_address location,
typename bcache::validator::ptr v) typename bcache::validator::ptr v)
{ {
block_cache::block &b = bc_.get(location, block_cache::GF_CAN_BLOCK, v); block_cache::block &b = bc_.get(location, block_cache::GF_BARRIER, v);
return super_ref(bc_, b, super_put); return super_ref(b);
} }
template <uint32_t BlockSize> template <uint32_t BlockSize>
@ -258,8 +225,8 @@ namespace persistent_data {
block_manager<BlockSize>::superblock_zero(block_address location, block_manager<BlockSize>::superblock_zero(block_address location,
typename bcache::validator::ptr v) typename bcache::validator::ptr v)
{ {
block_cache::block &b = bc_.get(location, block_cache::GF_CAN_BLOCK | block_cache::GF_ZERO, v); block_cache::block &b = bc_.get(location, block_cache::GF_ZERO | block_cache::GF_BARRIER, v);
return super_ref(bc_, b, super_put); return super_ref(b);
} }
template <uint32_t BlockSize> template <uint32_t BlockSize>