From 840be1b6c9660b0e2cdfac57addd57c09228d401 Mon Sep 17 00:00:00 2001 From: Joe Thornber Date: Mon, 28 Jul 2014 14:13:28 +0100 Subject: [PATCH] wip --- block-cache/block_cache.cc | 150 +++++++++++++++++++++---------------- block-cache/block_cache.h | 114 +++++++++++----------------- persistent-data/block.h | 17 +---- persistent-data/block.tcc | 77 ++++++------------- 4 files changed, 155 insertions(+), 203 deletions(-) diff --git a/block-cache/block_cache.cc b/block-cache/block_cache.cc index 65ec90d..7aae48b 100644 --- a/block-cache/block_cache.cc +++ b/block-cache/block_cache.cc @@ -11,6 +11,7 @@ #include #include +#include //---------------------------------------------------------------- @@ -126,14 +127,14 @@ namespace bcache { block_cache::complete_io(block &b, int result) { b.error_ = result; - clear_flags(b, IO_PENDING); + b.clear_flags(BF_IO_PENDING); nr_io_pending_--; if (b.error_) list_move_tail(&b.list_, &errored_); else { - if (test_flags(b, DIRTY)) { - clear_flags(b, DIRTY); + if (b.test_flags(BF_DIRTY)) { + b.clear_flags(BF_DIRTY | BF_PREVIOUSLY_DIRTY); nr_dirty_--; } @@ -152,8 +153,8 @@ namespace bcache { iocb *control_blocks[1]; // FIXME: put this back in - assert(!test_flags(b, IO_PENDING)); - set_flags(b, IO_PENDING); + assert(!b.test_flags(BF_IO_PENDING)); + b.set_flags(BF_IO_PENDING); nr_io_pending_++; list_move_tail(&b.list_, &io_pending_); @@ -177,14 +178,14 @@ namespace bcache { int block_cache::issue_read(block &b) { - assert(!test_flags(b, IO_PENDING)); + assert(!b.test_flags(BF_IO_PENDING)); return issue_low_level(b, IO_CMD_PREAD, "read"); } int block_cache::issue_write(block &b) { - assert(!test_flags(b, IO_PENDING)); + assert(!b.test_flags(BF_IO_PENDING)); b.v_->prepare(b.data_, b.index_); return issue_low_level(b, IO_CMD_PWRITE, "write"); } @@ -213,7 +214,8 @@ namespace bcache { complete_io(*b, e.res); else - info("incomplete io, unexpected: %d\n", r); + info("incomplete io for block %llu, unexpected: %d\n", + b->index_, e.res); } } @@ -231,7 +233,7 @@ namespace bcache { if (b.error_) return &errored_; - return (b.flags_ & DIRTY) ? &dirty_ : &clean_; + return b.test_flags(BF_DIRTY) ? &dirty_ : &clean_; } void @@ -253,7 +255,7 @@ namespace bcache { void block_cache::wait_specific(block &b) { - while (test_flags(b, IO_PENDING)) + while (b.test_flags(BF_IO_PENDING)) wait_io(); } @@ -262,12 +264,16 @@ namespace bcache { { int r; block *b, *tmp; - unsigned actual = 0; + unsigned actual = 0, dirty_length = 0; list_for_each_entry_safe (b, tmp, &dirty_, list_) { + dirty_length++; + if (actual == count) break; + // The block may be on the dirty list from a prior + // acquisition. if (b->ref_count_) continue; @@ -276,7 +282,7 @@ namespace bcache { actual++; } - info("writeback: requested %u, actual %u\n", count, actual); + info("writeback: requested %u, actual %u, dirty length %u\n", count, actual, dirty_length); return actual; } @@ -377,7 +383,7 @@ namespace bcache { b->ref_count_ = 0; b->error_ = 0; - clear_flags(*b, IO_PENDING | DIRTY); + b->flags_ = 0; b->index_ = index; setup_control_block(*b); @@ -391,16 +397,6 @@ namespace bcache { /*---------------------------------------------------------------- * Block reference counting *--------------------------------------------------------------*/ - void - block_cache::mark_dirty(block &b) - { - if (!test_flags(b, DIRTY)) { - set_flags(b, DIRTY); - list_move_tail(&b.list_, &dirty_); - nr_dirty_++; - } - } - unsigned block_cache::calc_nr_cache_blocks(size_t mem, sector_t block_size) { @@ -451,8 +447,11 @@ namespace bcache { aio_context_ = 0; /* needed or io_setup will fail */ r = io_setup(nr_cache_blocks, &aio_context_); - if (r < 0) + if (r < 0) { + std::cerr << "r = " << r << "\n"; + perror("io_setup failed"); throw std::runtime_error("io_setup failed"); + } hash_init(nr_buckets); INIT_LIST_HEAD(&free_); @@ -485,7 +484,7 @@ namespace bcache { block_cache::zero_block(block &b) { memset(b.data_, 0, block_size_ << SECTOR_SHIFT); - mark_dirty(b); + b.mark_dirty(); } block_cache::block * @@ -495,7 +494,7 @@ namespace bcache { block *b = hash_lookup(index); if (b) { - if (test_flags(*b, IO_PENDING)) + if (b->test_flags(BF_IO_PENDING)) wait_specific(*b); if (flags & GF_ZERO) @@ -503,24 +502,22 @@ namespace bcache { else { if (b->v_.get() && b->v_.get() != v.get() && - test_flags(*b, DIRTY)) + b->test_flags(BF_DIRTY)) b->v_->prepare(b->data_, b->index_); } b->v_ = v; } else { - if (flags & GF_CAN_BLOCK) { - b = new_block(index); - if (b) { - b->v_ = v; + b = new_block(index); + if (b) { + b->v_ = v; - if (flags & GF_ZERO) - zero_block(*b); - else { - issue_read(*b); - wait_specific(*b); - v->check(b->data_, b->index_); - } + if (flags & GF_ZERO) + zero_block(*b); + else { + issue_read(*b); + wait_specific(*b); + v->check(b->data_, b->index_); } } } @@ -531,12 +528,23 @@ namespace bcache { block_cache::block & block_cache::get(block_address index, unsigned flags, validator::ptr v) { + check_index(index); + block *b = lookup_or_read_block(index, flags, v); if (b) { + if (b->ref_count_) + throw std::runtime_error("block already locked"); + hit(*b); b->ref_count_++; + if (flags & GF_BARRIER) + b->set_flags(BF_FLUSH); + + if (flags & GF_DIRTY) + b->set_flags(BF_DIRTY); + return *b; } @@ -544,20 +552,39 @@ namespace bcache { } void - block_cache::put(block_cache::block &b, unsigned flags) + block_cache::preemptive_writeback() { - if (b.ref_count_ == 0) - throw std::runtime_error("bad put"); + unsigned nr_available = nr_cache_blocks_ - (nr_dirty_ - nr_io_pending_); + if (nr_available < (WRITEBACK_LOW_THRESHOLD_PERCENT * nr_cache_blocks_ / 100)) + writeback((WRITEBACK_HIGH_THRESHOLD_PERCENT * nr_cache_blocks_ / 100) - nr_available); - b.ref_count_--; + } - if (flags & PF_DIRTY) { - mark_dirty(b); + void + block_cache::release(block_cache::block &b) + { + assert(!b.ref_count_); - // FIXME: factor out - unsigned nr_available = nr_cache_blocks_ - (nr_dirty_ - nr_io_pending_); - if (nr_available < (WRITEBACK_LOW_THRESHOLD_PERCENT * nr_cache_blocks_ / 100)) - writeback((WRITEBACK_HIGH_THRESHOLD_PERCENT * nr_cache_blocks_ / 100) - nr_available); +#if 0 + if (b.test_flags(BF_FLUSH)) + flush(); +#endif + + if (b.test_flags(BF_DIRTY)) { + if (!b.test_flags(BF_PREVIOUSLY_DIRTY)) { + list_move_tail(&b.list_, &dirty_); + nr_dirty_++; + b.set_flags(BF_PREVIOUSLY_DIRTY); + } + +#if 0 + if (b.test_flags(BF_FLUSH)) + flush(); + else +#endif + preemptive_writeback(); + + b.clear_flags(BF_FLUSH); } } @@ -567,7 +594,7 @@ namespace bcache { block *b, *tmp; list_for_each_entry_safe (b, tmp, &dirty_, list_) { - if (b->ref_count_ || test_flags(*b, IO_PENDING)) + if (b->ref_count_ || b->test_flags(BF_IO_PENDING)) // The superblock may well be still locked. continue; @@ -582,6 +609,8 @@ namespace bcache { void block_cache::prefetch(block_address index) { + check_index(index); + block *b = hash_lookup(index); if (!b) { @@ -591,24 +620,15 @@ namespace bcache { } } - //-------------------------------- - - unsigned - block_cache::test_flags(block &b, unsigned flags) - { - return b.flags_ & flags; - } - void - block_cache::clear_flags(block &b, unsigned flags) + block_cache::check_index(block_address index) const { - b.flags_ &= ~flags; - } - - void - block_cache::set_flags(block &b, unsigned flags) - { - b.flags_ |= flags; + if (index >= nr_data_blocks_) { + std::ostringstream out; + out << "block out of bounds (" + << index << " >= " << nr_data_blocks_ << ")\n"; + throw std::runtime_error(out.str()); + } } } diff --git a/block-cache/block_cache.h b/block-cache/block_cache.h index 9284500..cb991ab 100644 --- a/block-cache/block_cache.h +++ b/block-cache/block_cache.h @@ -6,6 +6,7 @@ #include #include +#include #include #include #include @@ -36,12 +37,13 @@ namespace bcache { //---------------------------------------------------------------- - // FIXME: throw exceptions rather than returning errors class block_cache : private boost::noncopyable { public: enum block_flags { - IO_PENDING = (1 << 0), - DIRTY = (1 << 1) + BF_IO_PENDING = (1 << 0), + BF_DIRTY = (1 << 1), + BF_FLUSH = (1 << 2), + BF_PREVIOUSLY_DIRTY = (1 << 3) }; class block : private boost::noncopyable { @@ -58,16 +60,49 @@ namespace bcache { return data_; } + void mark_dirty() { + flags_ |= BF_DIRTY; + } + + void mark_flush() { + flags_ |= BF_FLUSH; + } + + void set_flags(unsigned flags) { + flags_ |= flags; + } + + unsigned test_flags(unsigned flags) const { + return flags_ & flags; + } + + void clear_flags(unsigned flags) { + flags_ &= ~flags; + } + + void get() { + ref_count_++; + }; + + void put() { + if (!ref_count_) + throw std::runtime_error("bad put"); + + if (!--ref_count_) + bc_->release(*this); + } + private: friend class block_cache; + block_cache *bc_; + uint64_t index_; void *data_; list_head list_; list_head hash_list_; - block_cache *bc_; unsigned ref_count_; int error_; @@ -87,18 +122,12 @@ namespace bcache { enum get_flags { GF_ZERO = (1 << 0), - GF_CAN_BLOCK = (1 << 1) + GF_DIRTY = (1 << 1), + GF_BARRIER = (1 << 1) }; - // FIXME: what if !GF_CAN_BLOCK? block_cache::block &get(block_address index, unsigned flags, validator::ptr v); - enum put_flags { - PF_DIRTY = (1 << 0), - }; - - void put(block_cache::block &block, unsigned flags); - /* * Flush can fail if an earlier write failed. You do not know which block * failed. Make sure you build your recovery with this in mind. @@ -131,9 +160,10 @@ namespace bcache { unsigned calc_nr_buckets(unsigned nr_blocks); void zero_block(block &b); block *lookup_or_read_block(block_address index, unsigned flags, validator::ptr v); - unsigned test_flags(block &b, unsigned flags); - void clear_flags(block &b, unsigned flags); - void set_flags(block &b, unsigned flags); + + void preemptive_writeback(); + void release(block_cache::block &block); + void check_index(block_address index) const; //-------------------------------- @@ -169,60 +199,6 @@ namespace bcache { unsigned mask_; std::vector buckets_; }; - -#if 0 - class auto_lock { - public: - auto_lock(block_cache &bc, block_address index, bool zero, validator::ptr v, unsigned put_flags) - : bc_(bc), - b_(bc.get(index, (zero ? block_cache::GF_ZERO : 0) | block_cache::GF_CAN_BLOCK, v)), - put_flags_(put_flags), - holders_(new unsigned) { - *holders_ = 1; - } - - virtual ~auto_lock() { - bc_.put(b_, put_flags_); - } - - auto_lock operator =(auto_lock const &rhs) { - if (this != &rhs) { - bc_ = rhs.bc_; - - - - void const *data() const { - return b_.get_data(); - } - - private: - block_cache &bc_; - block_cache::block &b_; - unsigned put_flags_; - unsigned *holders_; - }; - - class auto_read_lock : public auto_lock { - public: - auto_read_lock(block_cache &bc, block_address index, bool zero, validator::ptr v) - : auto_lock(bc, index, zero, v, 0) { - } - - using auto_lock::data(); - }; - - class auto_write_lock : public auto_lock { - public: - auto_write_lock(block_cache &bc, block_address index, bool zero, validator::ptr v) - : auto_lock(bc, index, zero, v, block_cache::DIRTY) { - } - - using auto_lock::data(); - void *data() { - return b_.get_data(); - } - }; -#endif } //---------------------------------------------------------------- diff --git a/persistent-data/block.h b/persistent-data/block.h index e3a2407..ab15687 100644 --- a/persistent-data/block.h +++ b/persistent-data/block.h @@ -55,15 +55,11 @@ namespace persistent_data { unsigned max_concurrent_locks, mode m); - typedef void (*put_behaviour_fn)(block_cache &, block_cache::block &); - class read_ref { public: static uint32_t const BLOCK_SIZE = BlockSize; - read_ref(block_cache &bc, - block_cache::block &b, - put_behaviour_fn fn); + read_ref(block_cache::block &b); read_ref(read_ref const &rhs); virtual ~read_ref(); @@ -74,19 +70,14 @@ namespace persistent_data { void const *data() const; protected: - block_cache &bc_; block_cache::block &b_; - put_behaviour_fn fn_; - unsigned *holders_; }; // Inherited from read_ref, since you can read a block that's write // locked. class write_ref : public read_ref { public: - write_ref(block_cache &bc, - block_cache::block &b, - put_behaviour_fn fn); + write_ref(block_cache::block &b); using read_ref::data; void *data(); @@ -94,9 +85,7 @@ namespace persistent_data { class super_ref : public write_ref { public: - super_ref(block_cache &bc, - block_cache::block &b, - put_behaviour_fn fn); + super_ref(block_cache::block &b); using read_ref::data; using write_ref::data; diff --git a/persistent-data/block.tcc b/persistent-data/block.tcc index 926abe4..897550c 100644 --- a/persistent-data/block.tcc +++ b/persistent-data/block.tcc @@ -104,50 +104,23 @@ namespace { }; namespace persistent_data { - - inline void read_put(block_cache &bc, block_cache::block &b) { - bc.put(b, 0); - } - - inline void write_put(block_cache &bc, block_cache::block &b) { - bc.put(b, block_cache::PF_DIRTY); - } - - inline void super_put(block_cache &bc, block_cache::block &b) { - bc.flush(); - bc.put(b, block_cache::PF_DIRTY); - bc.flush(); - } - template - block_manager::read_ref::read_ref(block_cache &bc, - block_cache::block &b, - put_behaviour_fn fn) - : bc_(bc), - b_(b), - fn_(fn), - holders_(new unsigned) + block_manager::read_ref::read_ref(block_cache::block &b) + : b_(b) { - *holders_ = 1; } template block_manager::read_ref::read_ref(read_ref const &rhs) - : bc_(rhs.bc_), - b_(rhs.b_), - fn_(rhs.fn_), - holders_(rhs.holders_) + : b_(rhs.b_) { - (*holders_)++; + b_.get(); } template block_manager::read_ref::~read_ref() { - if (!--(*holders_)) { - fn_(bc_, b_); - delete holders_; - } + b_.put(); } template @@ -155,11 +128,8 @@ namespace persistent_data { block_manager::read_ref::operator =(read_ref const &rhs) { if (this != &rhs) { - bc_ = rhs.bc_; b_ = rhs.b_; - fn_ = rhs.fn_; - holders_ = rhs.holders_; - (*holders_)++; + b_.get(); } return *this; @@ -182,10 +152,8 @@ namespace persistent_data { //-------------------------------- template - block_manager::write_ref::write_ref(block_cache &bc, - block_cache::block &b, - put_behaviour_fn fn) - : read_ref(bc, b, fn) + block_manager::write_ref::write_ref(block_cache::block &b) + : read_ref(b) { } @@ -199,10 +167,9 @@ namespace persistent_data { //-------------------------------- template - block_manager::super_ref::super_ref(block_cache &bc, - block_cache::block &b, - put_behaviour_fn fn) - : write_ref(bc, b, fn) { + block_manager::super_ref::super_ref(block_cache::block &b) + : write_ref(b) + { } //---------------------------------------------------------------- @@ -213,7 +180,7 @@ namespace persistent_data { unsigned max_concurrent_blocks, mode m) : fd_(open_block_file(path, nr_blocks * BlockSize, m == READ_WRITE)), - bc_(fd_, BlockSize >> SECTOR_SHIFT, nr_blocks, 1024u * 1024u * 256) + bc_(fd_, BlockSize >> SECTOR_SHIFT, nr_blocks, 1024u * 1024u * 16) { } @@ -222,8 +189,8 @@ namespace persistent_data { block_manager::read_lock(block_address location, typename bcache::validator::ptr v) const { - block_cache::block &b = bc_.get(location, block_cache::GF_CAN_BLOCK, v); - return read_ref(bc_, b, read_put); + block_cache::block &b = bc_.get(location, 0, v); + return read_ref(b); } template @@ -231,8 +198,8 @@ namespace persistent_data { block_manager::write_lock(block_address location, typename bcache::validator::ptr v) { - block_cache::block &b = bc_.get(location, block_cache::GF_CAN_BLOCK, v); - return write_ref(bc_, b, write_put); + block_cache::block &b = bc_.get(location, block_cache::GF_DIRTY, v); + return write_ref(b); } template @@ -240,8 +207,8 @@ namespace persistent_data { block_manager::write_lock_zero(block_address location, typename bcache::validator::ptr v) { - block_cache::block &b = bc_.get(location, block_cache::GF_CAN_BLOCK | block_cache::GF_ZERO, v); - return write_ref(bc_, b, write_put); + block_cache::block &b = bc_.get(location, block_cache::GF_ZERO, v); + return write_ref(b); } template @@ -249,8 +216,8 @@ namespace persistent_data { block_manager::superblock(block_address location, typename bcache::validator::ptr v) { - block_cache::block &b = bc_.get(location, block_cache::GF_CAN_BLOCK, v); - return super_ref(bc_, b, super_put); + block_cache::block &b = bc_.get(location, block_cache::GF_BARRIER, v); + return super_ref(b); } template @@ -258,8 +225,8 @@ namespace persistent_data { block_manager::superblock_zero(block_address location, typename bcache::validator::ptr v) { - block_cache::block &b = bc_.get(location, block_cache::GF_CAN_BLOCK | block_cache::GF_ZERO, v); - return super_ref(bc_, b, super_put); + block_cache::block &b = bc_.get(location, block_cache::GF_ZERO | block_cache::GF_BARRIER, v); + return super_ref(b); } template