work in progress

This commit is contained in:
Joe Thornber 2014-07-25 10:35:04 +01:00
parent d9040949fc
commit b32908d5c2
11 changed files with 717 additions and 663 deletions

File diff suppressed because it is too large Load Diff

View File

@ -1,54 +1,173 @@
#ifndef BLOCK_CACHE_H #ifndef BLOCK_CACHE_H
#define BLOCK_CACHE_H #define BLOCK_CACHE_H
#include "block-cache/buffer.h"
#include "block-cache/list.h"
#include <boost/shared_ptr.hpp>
#include <boost/noncopyable.hpp>
#include <libaio.h>
#include <memory>
#include <stdint.h> #include <stdint.h>
#include <stdlib.h> #include <stdlib.h>
#include <vector>
/*----------------------------------------------------------------*/ //----------------------------------------------------------------
/* FIXME: add logging */ namespace bcache {
#if 0
class validator {
public:
typedef boost::shared_ptr<validator> ptr;
/*----------------------------------------------------------------*/ virtual ~validator() {}
/* virtual void check(buffer<BlockSize> const &b, block_address location) const = 0;
* This library is not thread-safe. virtual void prepare(buffer<BlockSize> &b, block_address location) const = 0;
*/ };
typedef uint64_t block_index;
struct block_cache; class noop_validator : public validator {
public:
void check(buffer<BlockSize> const &b, block_address location) const {}
void prepare(buffer<BlockSize> &b, block_address location) const {}
};
#endif
//----------------------------------------------------------------
struct bc_block { // FIXME: throw exceptions rather than returning errors
block_index index; class block_cache : private boost::noncopyable {
void *data; public:
}; enum block_flags {
IO_PENDING = (1 << 0),
DIRTY = (1 << 1)
};
typedef uint64_t sector_t; class block : private boost::noncopyable {
public:
uint64_t get_index() const {
return index_;
}
struct block_cache *block_cache_create(int fd, sector_t block_size, void *get_data() const {
uint64_t max_nr_blocks, size_t mem); return data_;
void block_cache_destroy(struct block_cache *bc); }
uint64_t block_cache_get_nr_blocks(struct block_cache *bc); private:
friend class block_cache;
enum get_flags { uint64_t index_;
GF_ZERO = (1 << 0), void *data_;
GF_CAN_BLOCK = (1 << 1)
};
struct bc_block *block_cache_get(struct block_cache *bc, block_index index, unsigned flags);
enum put_flags { list_head list_;
PF_DIRTY = (1 << 0), list_head hash_list_;
};
void block_cache_put(struct bc_block *b, unsigned flags);
/* block_cache *bc_;
* Flush can fail if an earlier write failed. You do not know which block unsigned ref_count_;
* failed. Make sure you build your recovery with this in mind.
*/
int block_cache_flush(struct block_cache *bc);
void block_cache_prefetch(struct block_cache *bc, block_index index); int error_;
unsigned flags_;
/*----------------------------------------------------------------*/ iocb control_block_;
};
typedef uint64_t block_index;
typedef uint64_t sector_t;
//--------------------------------
block_cache(int fd, sector_t block_size,
uint64_t max_nr_blocks, size_t mem);
~block_cache();
uint64_t get_nr_blocks() const;
enum get_flags {
GF_ZERO = (1 << 0),
GF_CAN_BLOCK = (1 << 1)
};
// FIXME: what if !GF_CAN_BLOCK?
block_cache::block &get(block_index index, unsigned flags);
enum put_flags {
PF_DIRTY = (1 << 0),
};
void put(block_cache::block &block, unsigned flags);
/*
* Flush can fail if an earlier write failed. You do not know which block
* failed. Make sure you build your recovery with this in mind.
*/
int flush();
void prefetch(block_index index);
private:
int init_free_list(unsigned count);
block *__alloc_block();
void complete_io(block &b, int result);
int issue_low_level(block &b, enum io_iocb_cmd opcode, const char *desc);
int issue_read(block &b);
int issue_write(block &b);
void wait_io();
list_head *__categorise(block &b);
void hit(block &b);
void wait_all();
void wait_specific(block &b);
unsigned writeback(unsigned count);
void hash_init(unsigned nr_buckets);
unsigned hash(uint64_t index);
block *hash_lookup(block_index index);
void hash_insert(block &b);
void hash_remove(block &b);
void setup_control_block(block &b);
block *new_block(block_index index);
void mark_dirty(block &b);
unsigned calc_nr_cache_blocks(size_t mem, sector_t block_size);
unsigned calc_nr_buckets(unsigned nr_blocks);
void zero_block(block &b);
block *lookup_or_read_block(block_index index, unsigned flags);
unsigned test_flags(block &b, unsigned flags);
void clear_flags(block &b, unsigned flags);
void set_flags(block &b, unsigned flags);
//--------------------------------
int fd_;
sector_t block_size_;
uint64_t nr_data_blocks_;
uint64_t nr_cache_blocks_;
std::auto_ptr<unsigned char> blocks_memory_; // FIXME: change to a vector
std::auto_ptr<unsigned char> blocks_data_;
io_context_t aio_context_;
std::vector<io_event> events_;
/*
* Blocks on the free list are not initialised, apart from the
* b.data field.
*/
list_head free_;
list_head errored_;
list_head dirty_;
list_head clean_;
unsigned nr_io_pending_;
struct list_head io_pending_;
unsigned nr_dirty_;
/*
* Hash table fields.
*/
unsigned nr_buckets_;
unsigned mask_;
std::vector<list_head> buckets_;
};
}
//----------------------------------------------------------------
#endif #endif

View File

@ -277,7 +277,7 @@ namespace validator {
struct sb_validator : public block_manager<>::validator { struct sb_validator : public block_manager<>::validator {
virtual void check(buffer<> const &b, block_address location) const { virtual void check(buffer<> const &b, block_address location) const {
superblock_disk const *sbd = reinterpret_cast<superblock_disk const *>(&b); superblock_disk const *sbd = reinterpret_cast<superblock_disk const *>(b.raw());
crc32c sum(SUPERBLOCK_CSUM_SEED); crc32c sum(SUPERBLOCK_CSUM_SEED);
sum.append(&sbd->flags, MD_BLOCK_SIZE - sizeof(uint32_t)); sum.append(&sbd->flags, MD_BLOCK_SIZE - sizeof(uint32_t));
if (sum.get_sum() != to_cpu<uint32_t>(sbd->csum)) if (sum.get_sum() != to_cpu<uint32_t>(sbd->csum))
@ -285,7 +285,7 @@ namespace validator {
} }
virtual void prepare(buffer<> &b, block_address location) const { virtual void prepare(buffer<> &b, block_address location) const {
superblock_disk *sbd = reinterpret_cast<superblock_disk *>(&b); superblock_disk *sbd = reinterpret_cast<superblock_disk *>(b.raw());
crc32c sum(SUPERBLOCK_CSUM_SEED); crc32c sum(SUPERBLOCK_CSUM_SEED);
sum.append(&sbd->flags, MD_BLOCK_SIZE - sizeof(uint32_t)); sum.append(&sbd->flags, MD_BLOCK_SIZE - sizeof(uint32_t));
sbd->csum = to_disk<base::le32>(sum.get_sum()); sbd->csum = to_disk<base::le32>(sum.get_sum());

View File

@ -212,7 +212,7 @@ namespace era_validator {
// FIXME: turn into a template, we have 3 similar classes now // FIXME: turn into a template, we have 3 similar classes now
struct sb_validator : public block_manager<>::validator { struct sb_validator : public block_manager<>::validator {
virtual void check(buffer<> const &b, block_address location) const { virtual void check(buffer<> const &b, block_address location) const {
superblock_disk const *sbd = reinterpret_cast<superblock_disk const *>(&b); superblock_disk const *sbd = reinterpret_cast<superblock_disk const *>(b.raw());
crc32c sum(SUPERBLOCK_CSUM_SEED); crc32c sum(SUPERBLOCK_CSUM_SEED);
sum.append(&sbd->flags, MD_BLOCK_SIZE - sizeof(uint32_t)); sum.append(&sbd->flags, MD_BLOCK_SIZE - sizeof(uint32_t));
if (sum.get_sum() != to_cpu<uint32_t>(sbd->csum)) if (sum.get_sum() != to_cpu<uint32_t>(sbd->csum))
@ -220,7 +220,7 @@ namespace era_validator {
} }
virtual void prepare(buffer<> &b, block_address location) const { virtual void prepare(buffer<> &b, block_address location) const {
superblock_disk *sbd = reinterpret_cast<superblock_disk *>(&b); superblock_disk *sbd = reinterpret_cast<superblock_disk *>(b.raw());
crc32c sum(SUPERBLOCK_CSUM_SEED); crc32c sum(SUPERBLOCK_CSUM_SEED);
sum.append(&sbd->flags, MD_BLOCK_SIZE - sizeof(uint32_t)); sum.append(&sbd->flags, MD_BLOCK_SIZE - sizeof(uint32_t));
sbd->csum = to_disk<base::le32>(sum.get_sum()); sbd->csum = to_disk<base::le32>(sum.get_sum());

View File

@ -20,7 +20,7 @@
#define BLOCK_H #define BLOCK_H
#include "block-cache/block_cache.h" #include "block-cache/block_cache.h"
#include "persistent-data/buffer.h" #include "block-cache/buffer.h"
#include <stdint.h> #include <stdint.h>
#include <map> #include <map>
@ -35,6 +35,8 @@
//---------------------------------------------------------------- //----------------------------------------------------------------
namespace persistent_data { namespace persistent_data {
using namespace bcache;
uint32_t const MD_BLOCK_SIZE = 4096; uint32_t const MD_BLOCK_SIZE = 4096;
@ -77,10 +79,11 @@ namespace persistent_data {
BT_NORMAL BT_NORMAL
}; };
// FIXME: eventually this will disappear to be replaced with block_cache::block
struct block : private boost::noncopyable { struct block : private boost::noncopyable {
typedef boost::shared_ptr<block> ptr; typedef boost::shared_ptr<block> ptr;
block(block_cache *bc, block(block_cache &bc,
block_address location, block_address location,
block_type bt, block_type bt,
typename validator::ptr v, typename validator::ptr v,
@ -110,7 +113,8 @@ namespace persistent_data {
private: private:
void check_not_unlocked() const; void check_not_unlocked() const;
bc_block *internal_; block_cache &bc_;
block_cache::block *internal_;
typename validator::ptr validator_; typename validator::ptr validator_;
block_type bt_; block_type bt_;
bool dirty_; bool dirty_;
@ -196,7 +200,9 @@ namespace persistent_data {
void write_block(typename block::ptr b) const; void write_block(typename block::ptr b) const;
int fd_; int fd_;
block_cache *bc_;
// FIXME: the mutable is a fudge to allow flush() to be const, which I'm not sure is necc.
mutable block_cache bc_;
}; };
// A little utility to help build validators // A little utility to help build validators

View File

@ -40,7 +40,7 @@ namespace {
unsigned const SECTOR_SHIFT = 9; unsigned const SECTOR_SHIFT = 9;
// FIXME: these will slow it down until we start doing async io. // FIXME: these will slow it down until we start doing async io.
int const OPEN_FLAGS = O_DIRECT | O_SYNC; int const OPEN_FLAGS = O_DIRECT;
// FIXME: introduce a new exception for this, or at least lift this // FIXME: introduce a new exception for this, or at least lift this
// to exception.h // to exception.h
@ -106,31 +106,27 @@ namespace {
namespace persistent_data { namespace persistent_data {
template <uint32_t BlockSize> template <uint32_t BlockSize>
block_manager<BlockSize>::block::block(block_cache *bc, block_manager<BlockSize>::block::block(block_cache &bc,
block_address location, block_address location,
block_type bt, block_type bt,
typename validator::ptr v, typename validator::ptr v,
bool zero) bool zero)
: validator_(v), : bc_(bc),
bt_(bt), validator_(v),
dirty_(false), bt_(bt),
unlocked_(false), dirty_(false),
buffer_(0, true) // FIXME: we don't know if it's writeable here :( unlocked_(false),
buffer_(0, true) // FIXME: we don't know if it's writeable here :(
{ {
if (zero) { if (zero) {
internal_ = block_cache_get(bc, location, GF_ZERO | GF_CAN_BLOCK); internal_ = &bc.get(location, block_cache::GF_ZERO | block_cache::GF_CAN_BLOCK);
if (!internal_)
throw std::runtime_error("Couldn't get block");
dirty_ = true; dirty_ = true;
buffer_.set_data(internal_->get_data());
} else { } else {
internal_ = block_cache_get(bc, location, GF_CAN_BLOCK); internal_ = &bc.get(location, block_cache::GF_CAN_BLOCK);
if (!internal_) buffer_.set_data(internal_->get_data());
throw std::runtime_error("Couldn't get block"); validator_->check(buffer_, internal_->get_index());
validator_->check(buffer_, internal_->index);
} }
buffer_.set_data(internal_->data);
} }
template <uint32_t BlockSize> template <uint32_t BlockSize>
@ -144,8 +140,9 @@ namespace persistent_data {
void void
block_manager<BlockSize>::block::unlock() block_manager<BlockSize>::block::unlock()
{ {
validator_->prepare(buffer_, internal_->index); if (dirty_)
block_cache_put(internal_, dirty_ ? PF_DIRTY : 0); validator_->prepare(buffer_, internal_->get_index());
bc_.put(*internal_, dirty_ ? block_cache::PF_DIRTY : 0);
unlocked_ = true; unlocked_ = true;
} }
@ -161,7 +158,7 @@ namespace persistent_data {
block_manager<BlockSize>::block::get_location() const block_manager<BlockSize>::block::get_location() const
{ {
check_not_unlocked(); check_not_unlocked();
return internal_->index; return internal_->get_index();
} }
template <uint32_t BlockSize> template <uint32_t BlockSize>
@ -196,12 +193,12 @@ namespace persistent_data {
if (dirty_) if (dirty_)
// It may have already happened, by calling // It may have already happened, by calling
// this we ensure we're consistent. // this we ensure we're consistent.
validator_->prepare(*internal_->data, internal_->index); validator_->prepare(*internal_->get_data(), internal_->get_index());
validator_ = v; validator_ = v;
if (check) if (check)
validator_->check(*internal_->data, internal_->index); validator_->check(*internal_->get_data(), internal_->get_index());
} }
} }
@ -301,14 +298,9 @@ namespace persistent_data {
block_address nr_blocks, block_address nr_blocks,
unsigned max_concurrent_blocks, unsigned max_concurrent_blocks,
mode m) mode m)
: fd_(open_block_file(path, nr_blocks * BlockSize, m == READ_WRITE)),
bc_(fd_, BlockSize >> SECTOR_SHIFT, nr_blocks, 1024u * 1024u * 4)
{ {
// Open the file descriptor
fd_ = open_block_file(path, nr_blocks * BlockSize, m == READ_WRITE);
// Create the cache
bc_ = block_cache_create(fd_, BlockSize << SECTOR_SHIFT, nr_blocks, 1024u * BlockSize * 1.2);
if (!bc_)
throw std::runtime_error("couldn't create block cache");
} }
template <uint32_t BlockSize> template <uint32_t BlockSize>
@ -360,7 +352,7 @@ namespace persistent_data {
block_address block_address
block_manager<BlockSize>::get_nr_blocks() const block_manager<BlockSize>::get_nr_blocks() const
{ {
return block_cache_get_nr_blocks(bc_); return bc_.get_nr_blocks();
} }
template <uint32_t BlockSize> template <uint32_t BlockSize>
@ -374,7 +366,7 @@ namespace persistent_data {
void void
block_manager<BlockSize>::flush() const block_manager<BlockSize>::flush() const
{ {
block_cache_flush(bc_); bc_.flush();
} }
} }

View File

@ -33,7 +33,7 @@ namespace persistent_data {
struct array_block_validator : public block_manager<>::validator { struct array_block_validator : public block_manager<>::validator {
virtual void check(buffer<> const &b, block_address location) const { virtual void check(buffer<> const &b, block_address location) const {
array_block_disk const *data = reinterpret_cast<array_block_disk const *>(&b); array_block_disk const *data = reinterpret_cast<array_block_disk const *>(b.raw());
crc32c sum(ARRAY_CSUM_XOR); crc32c sum(ARRAY_CSUM_XOR);
sum.append(&data->max_entries, MD_BLOCK_SIZE - sizeof(uint32_t)); sum.append(&data->max_entries, MD_BLOCK_SIZE - sizeof(uint32_t));
if (sum.get_sum() != to_cpu<uint32_t>(data->csum)) if (sum.get_sum() != to_cpu<uint32_t>(data->csum))
@ -44,7 +44,7 @@ namespace persistent_data {
} }
virtual void prepare(buffer<> &b, block_address location) const { virtual void prepare(buffer<> &b, block_address location) const {
array_block_disk *data = reinterpret_cast<array_block_disk *>(&b); array_block_disk *data = reinterpret_cast<array_block_disk *>(b.raw());
data->blocknr = to_disk<base::le64, uint64_t>(location); data->blocknr = to_disk<base::le64, uint64_t>(location);
crc32c sum(ARRAY_CSUM_XOR); crc32c sum(ARRAY_CSUM_XOR);

View File

@ -34,7 +34,7 @@ namespace {
struct btree_node_validator : public block_manager<>::validator { struct btree_node_validator : public block_manager<>::validator {
virtual void check(buffer<> const &b, block_address location) const { virtual void check(buffer<> const &b, block_address location) const {
disk_node const *data = reinterpret_cast<disk_node const *>(&b); disk_node const *data = reinterpret_cast<disk_node const *>(b.raw());
node_header const *n = &data->header; node_header const *n = &data->header;
crc32c sum(BTREE_CSUM_XOR); crc32c sum(BTREE_CSUM_XOR);
sum.append(&n->flags, MD_BLOCK_SIZE - sizeof(uint32_t)); sum.append(&n->flags, MD_BLOCK_SIZE - sizeof(uint32_t));
@ -46,7 +46,7 @@ namespace {
} }
virtual void prepare(buffer<> &b, block_address location) const { virtual void prepare(buffer<> &b, block_address location) const {
disk_node *data = reinterpret_cast<disk_node *>(&b); disk_node *data = reinterpret_cast<disk_node *>(b.raw());
node_header *n = &data->header; node_header *n = &data->header;
n->blocknr = to_disk<base::le64, uint64_t>(location); n->blocknr = to_disk<base::le64, uint64_t>(location);

View File

@ -39,7 +39,7 @@ namespace {
struct bitmap_block_validator : public block_manager<>::validator { struct bitmap_block_validator : public block_manager<>::validator {
virtual void check(buffer<> const &b, block_address location) const { virtual void check(buffer<> const &b, block_address location) const {
bitmap_header const *data = reinterpret_cast<bitmap_header const *>(&b); bitmap_header const *data = reinterpret_cast<bitmap_header const *>(b.raw());
crc32c sum(BITMAP_CSUM_XOR); crc32c sum(BITMAP_CSUM_XOR);
sum.append(&data->not_used, MD_BLOCK_SIZE - sizeof(uint32_t)); sum.append(&data->not_used, MD_BLOCK_SIZE - sizeof(uint32_t));
if (sum.get_sum() != to_cpu<uint32_t>(data->csum)) if (sum.get_sum() != to_cpu<uint32_t>(data->csum))
@ -50,7 +50,7 @@ namespace {
} }
virtual void prepare(buffer<> &b, block_address location) const { virtual void prepare(buffer<> &b, block_address location) const {
bitmap_header *data = reinterpret_cast<bitmap_header *>(&b); bitmap_header *data = reinterpret_cast<bitmap_header *>(b.raw());
data->blocknr = to_disk<base::le64, uint64_t>(location); data->blocknr = to_disk<base::le64, uint64_t>(location);
crc32c sum(BITMAP_CSUM_XOR); crc32c sum(BITMAP_CSUM_XOR);
@ -66,7 +66,8 @@ namespace {
// FIXME: factor out the common code in these validators // FIXME: factor out the common code in these validators
struct index_block_validator : public block_manager<>::validator { struct index_block_validator : public block_manager<>::validator {
virtual void check(buffer<> const &b, block_address location) const { virtual void check(buffer<> const &b, block_address location) const {
metadata_index const *mi = reinterpret_cast<metadata_index const *>(&b); metadata_index const *mi = reinterpret_cast<metadata_index const *>(b.raw());
std::cerr << "check mi = " << mi << "\n";
crc32c sum(INDEX_CSUM_XOR); crc32c sum(INDEX_CSUM_XOR);
sum.append(&mi->padding_, MD_BLOCK_SIZE - sizeof(uint32_t)); sum.append(&mi->padding_, MD_BLOCK_SIZE - sizeof(uint32_t));
if (sum.get_sum() != to_cpu<uint32_t>(mi->csum_)) if (sum.get_sum() != to_cpu<uint32_t>(mi->csum_))
@ -77,7 +78,8 @@ namespace {
} }
virtual void prepare(buffer<> &b, block_address location) const { virtual void prepare(buffer<> &b, block_address location) const {
metadata_index *mi = reinterpret_cast<metadata_index *>(&b); metadata_index *mi = reinterpret_cast<metadata_index *>(b.raw());
std::cerr << "prepare mi = " << mi << "\n";
mi->blocknr_ = to_disk<base::le64, uint64_t>(location); mi->blocknr_ = to_disk<base::le64, uint64_t>(location);
crc32c sum(INDEX_CSUM_XOR); crc32c sum(INDEX_CSUM_XOR);
@ -630,7 +632,7 @@ namespace {
tm_->shadow(bitmap_root_, index_validator()); tm_->shadow(bitmap_root_, index_validator());
bitmap_root_ = p.first.get_location(); bitmap_root_ = p.first.get_location();
metadata_index *mdi = reinterpret_cast<metadata_index *>(&p.first.data()); metadata_index *mdi = reinterpret_cast<metadata_index *>(p.first.data().raw());
for (unsigned i = 0; i < entries_.size(); i++) for (unsigned i = 0; i < entries_.size(); i++)
index_entry_traits::pack(entries_[i], mdi->index[i]); index_entry_traits::pack(entries_[i], mdi->index[i]);

View File

@ -87,7 +87,7 @@ namespace {
struct sb_validator : public block_manager<>::validator { struct sb_validator : public block_manager<>::validator {
virtual void check(buffer<> const &b, block_address location) const { virtual void check(buffer<> const &b, block_address location) const {
superblock_disk const *sbd = reinterpret_cast<superblock_disk const *>(&b); superblock_disk const *sbd = reinterpret_cast<superblock_disk const *>(b.raw());
crc32c sum(SUPERBLOCK_CSUM_SEED); crc32c sum(SUPERBLOCK_CSUM_SEED);
sum.append(&sbd->flags_, MD_BLOCK_SIZE - sizeof(uint32_t)); sum.append(&sbd->flags_, MD_BLOCK_SIZE - sizeof(uint32_t));
if (sum.get_sum() != to_cpu<uint32_t>(sbd->csum_)) if (sum.get_sum() != to_cpu<uint32_t>(sbd->csum_))
@ -95,7 +95,7 @@ namespace {
} }
virtual void prepare(buffer<> &b, block_address location) const { virtual void prepare(buffer<> &b, block_address location) const {
superblock_disk *sbd = reinterpret_cast<superblock_disk *>(&b); superblock_disk *sbd = reinterpret_cast<superblock_disk *>(b.raw());
crc32c sum(SUPERBLOCK_CSUM_SEED); crc32c sum(SUPERBLOCK_CSUM_SEED);
sum.append(&sbd->flags_, MD_BLOCK_SIZE - sizeof(uint32_t)); sum.append(&sbd->flags_, MD_BLOCK_SIZE - sizeof(uint32_t));
sbd->csum_ = to_disk<base::le32>(sum.get_sum()); sbd->csum_ = to_disk<base::le32>(sum.get_sum());