diff --git a/functional-tests/bcache/bcache.c b/functional-tests/bcache/bcache.c new file mode 100644 index 0000000..d8f172b --- /dev/null +++ b/functional-tests/bcache/bcache.c @@ -0,0 +1,897 @@ +#define _GNU_SOURCE + +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include + +#include "list.h" +#include "bcache.h" + +//---------------------------------------------------------------- + +static void warn(const char *msg) +{ + fprintf(stderr, "%s\n", msg); +} + +// FIXME: raise a condition somehow? +static void raise(const char *msg) +{ + warn(msg); + exit(1); +} + +/* + * Assumes the list is not empty. + */ +static inline struct list_head *list_pop(struct list_head *head) +{ + struct list_head *l; + + l = head->next; + list_del(l); + return l; +} + +//---------------------------------------------------------------- + +struct control_block { + struct list_head list; + void *context; + struct iocb cb; +}; + +struct cb_set { + struct list_head free; + struct list_head allocated; + struct control_block *vec; +} control_block_set; + +static struct cb_set *cb_set_create(unsigned nr) +{ + int i; + struct cb_set *cbs = malloc(sizeof(*cbs)); + + if (!cbs) + return NULL; + + cbs->vec = malloc(nr * sizeof(*cbs->vec)); + if (!cbs->vec) { + free(cbs); + return NULL; + } + + init_list_head(&cbs->free); + init_list_head(&cbs->allocated); + + for (i = 0; i < nr; i++) + list_add(&cbs->vec[i].list, &cbs->free); + + return cbs; +} + +static void cb_set_destroy(struct cb_set *cbs) +{ + if (!list_empty(&cbs->allocated)) + raise("async io still in flight"); + + free(cbs->vec); + free(cbs); +} + +static struct control_block *cb_alloc(struct cb_set *cbs, void *context) +{ + struct control_block *cb; + + if (list_empty(&cbs->free)) + return NULL; + + cb = container_of(list_pop(&cbs->free), struct control_block, list); + cb->context = context; + list_add(&cb->list, &cbs->allocated); + + return cb; +} + +static void cb_free(struct cb_set *cbs, struct control_block *cb) +{ + list_del(&cb->list); + list_add(&cb->list, &cbs->free); +} + +static struct control_block *iocb_to_cb(struct iocb *icb) +{ + return container_of(icb, struct control_block, cb); +} + +//---------------------------------------------------------------- + +// FIXME: get from linux headers +#define SECTOR_SHIFT 9 +#define PAGE_SIZE 4096 + +enum dir { + DIR_READ, + DIR_WRITE +}; + +struct io_engine { + io_context_t aio_context; + struct cb_set *cbs; +}; + +static struct io_engine *engine_create(unsigned max_io) +{ + int r; + struct io_engine *e = malloc(sizeof(*e)); + + if (!e) + return NULL; + + e->aio_context = 0; + r = io_setup(max_io, &e->aio_context); + if (r < 0) { + warn("io_setup failed"); + return NULL; + } + + e->cbs = cb_set_create(max_io); + if (!e->cbs) { + warn("couldn't create control block set"); + free(e); + return NULL; + } + + return e; +} + +static void engine_destroy(struct io_engine *e) +{ + cb_set_destroy(e->cbs); + io_destroy(e->aio_context); + free(e); +} + +static int engine_issue(struct io_engine *e, int fd, enum dir d, + sector_t sb, sector_t se, void *data, void *context) +{ + int r; + struct iocb *cb_array[1]; + struct control_block *cb; + + if (((uint64_t) data) & (PAGE_SIZE - 1)) + return -1; + + cb = cb_alloc(e->cbs, context); + if (!cb) + return false; + + memset(&cb->cb, 0, sizeof(cb->cb)); + + cb->cb.aio_fildes = (int) fd; + cb->cb.u.c.buf = data; + cb->cb.u.c.offset = sb << SECTOR_SHIFT; + cb->cb.u.c.nbytes = (se - sb) << SECTOR_SHIFT; + cb->cb.aio_lio_opcode = (d == DIR_READ) ? IO_CMD_PREAD : IO_CMD_PWRITE; + + cb_array[0] = &cb->cb; + r = io_submit(e->aio_context, 1, cb_array); + if (r < 0) + cb_free(e->cbs, cb); + + return r; +} + +#define MAX_IO 64 +typedef void complete_fn(void *context, int io_error); + +static int engine_wait(struct io_engine *e, struct timespec *ts, complete_fn fn) +{ + int i, r; + struct io_event event[MAX_IO]; + struct control_block *cb; + + memset(&event, 0, sizeof(event)); + r = io_getevents(e->aio_context, 1, MAX_IO, event, ts); + if (r < 0) { + warn("io_getevents failed"); + return r; + } + + if (r == 0) + return 0; + + for (i = 0; i < r; i++) { + struct io_event *ev = event + i; + + cb = iocb_to_cb((struct iocb *) ev->obj); + + if (ev->res == cb->cb.u.c.nbytes) + fn((void *) cb->context, 0); + + else if ((int) ev->res < 0) + fn(cb->context, (int) ev->res); + + else { + warn("short io"); + fn(cb->context, -ENODATA); + } + + cb_free(e->cbs, cb); + } + + return -ENODATA; +} + +//---------------------------------------------------------------- +#if 0 +struct timespec micro_to_ts(unsigned micro) +{ + struct timespec ts; + ts.tv_sec = micro / 1000000u; + ts.tv_nsec = (micro % 1000000) * 1000; + return ts; +} + +static unsigned ts_to_micro(struct timespec const *ts) +{ + unsigned micro = ts->tv_sec * 1000000; + micro += ts->tv_nsec / 1000; + return micro; +} +#endif +//---------------------------------------------------------------- + +#define MIN_BLOCKS 16 +#define WRITEBACK_LOW_THRESHOLD_PERCENT 33 +#define WRITEBACK_HIGH_THRESHOLD_PERCENT 66 + +//---------------------------------------------------------------- + +static void *alloc_aligned(size_t len, size_t alignment) +{ + void *result = NULL; + int r = posix_memalign(&result, alignment, len); + if (r) + return NULL; + + return result; +} + +//---------------------------------------------------------------- + +static bool test_flags(struct block *b, unsigned bits) +{ + return (b->flags & bits) != 0; +} + +static void set_flags(struct block *b, unsigned bits) +{ + b->flags |= bits; +} + +static void clear_flags(struct block *b, unsigned bits) +{ + b->flags &= ~bits; +} + +//---------------------------------------------------------------- + +enum block_flags { + BF_IO_PENDING = (1 << 0), + BF_DIRTY = (1 << 1), +}; + +struct bcache { + int fd; + sector_t block_sectors; + uint64_t nr_data_blocks; + uint64_t nr_cache_blocks; + + struct io_engine *engine; + + void *raw_data; + struct block *raw_blocks; + + /* + * Lists that categorise the blocks. + */ + unsigned nr_locked; + unsigned nr_dirty; + unsigned nr_io_pending; + + struct list_head free; + struct list_head errored; + struct list_head dirty; + struct list_head clean; + struct list_head io_pending; + + /* + * Hash table. + */ + unsigned nr_buckets; + unsigned hash_mask; + struct list_head *buckets; + + /* + * Statistics + */ + unsigned read_hits; + unsigned read_misses; + unsigned write_zeroes; + unsigned write_hits; + unsigned write_misses; + unsigned prefetches; +}; + +//---------------------------------------------------------------- + +/* 2^63 + 2^61 - 2^57 + 2^54 - 2^51 - 2^18 + 1 */ +#define GOLDEN_RATIO_PRIME_64 0x9e37fffffffc0001UL + +static unsigned hash(struct bcache *cache, uint64_t index) +{ + uint64_t h = index; + h *= GOLDEN_RATIO_PRIME_64; + return h & cache->hash_mask; +} + +static struct block *hash_lookup(struct bcache *cache, uint64_t index) +{ + struct block *b; + unsigned h = hash(cache, index); + + list_for_each_entry (b, cache->buckets + h, hash) + if (b->index == index) + return b; + + return NULL; +} + +static void hash_insert(struct block *b) +{ + unsigned h = hash(b->cache, b->index); + list_add(&b->hash, b->cache->buckets + h); +} + +static void hash_remove(struct block *b) +{ + list_del(&b->hash); +} + +/* + * Must return a power of 2. + */ +static unsigned calc_nr_buckets(unsigned nr_blocks) +{ + unsigned r = 8; + unsigned n = nr_blocks / 4; + + if (n < 8) + n = 8; + + while (r < n) + r <<= 1; + + return r; +} + +static int hash_table_init(struct bcache *cache, unsigned nr_entries) +{ + unsigned i; + + cache->nr_buckets = calc_nr_buckets(nr_entries); + cache->hash_mask = cache->nr_buckets - 1; + cache->buckets = malloc(cache->nr_buckets * sizeof(*cache->buckets)); + if (!cache->buckets) + return -ENOMEM; + + for (i = 0; i < cache->nr_buckets; i++) + init_list_head(cache->buckets + i); + + return 0; +} + +static void hash_table_exit(struct bcache *cache) +{ + free(cache->buckets); +} + +//---------------------------------------------------------------- + +static int init_free_list(struct bcache *cache, unsigned count) +{ + unsigned i; + size_t block_size = cache->block_sectors << SECTOR_SHIFT; + unsigned char *data = + (unsigned char *) alloc_aligned(count * block_size, PAGE_SIZE); + + /* Allocate the data for each block. We page align the data. */ + if (!data) + return -ENOMEM; + + cache->raw_data = data; + cache->raw_blocks = malloc(count * sizeof(*cache->raw_blocks)); + + if (!cache->raw_blocks) + free(cache->raw_data); + + for (i = 0; i < count; i++) { + struct block *b = cache->raw_blocks + i; + b->cache = cache; + b->data = data + (block_size * i); + list_add_tail(&b->list, &cache->free); + } + + return 0; +} + +static void exit_free_list(struct bcache *cache) +{ + free(cache->raw_data); + free(cache->raw_blocks); +} + +static struct block *alloc_block(struct bcache *cache) +{ + struct block *b = container_of(list_pop(&cache->free), struct block, list); + return b; +} + +/*---------------------------------------------------------------- + * Clean/dirty list management. + * Always use these methods to ensure nr_dirty_ is correct. + *--------------------------------------------------------------*/ + +static void unlink_block(struct block *b) +{ + if (test_flags(b, BF_DIRTY)) + b->cache->nr_dirty--; + + list_del(&b->list); +} + +static void link_block(struct block *b) +{ + struct bcache *cache = b->cache; + + if (test_flags(b, BF_DIRTY)) { + list_add_tail(&b->list, &cache->dirty); + cache->nr_dirty++; + } else + list_add_tail(&b->list, &cache->clean); +} + +static void relink(struct block *b) +{ + unlink_block(b); + link_block(b); +} + +/*---------------------------------------------------------------- + * Low level IO handling + * + * We cannot have two concurrent writes on the same block. + * eg, background writeback, put with dirty, flush? + * + * To avoid this we introduce some restrictions: + * + * i) A held block can never be written back. + * ii) You cannot get a block until writeback has completed. + * + *--------------------------------------------------------------*/ + +/* + * |b->list| should be valid (either pointing to itself, on one of the other + * lists. + */ +static int issue_low_level(struct block *b, enum dir d) +{ + struct bcache *cache = b->cache; + sector_t sb = b->index * cache->block_sectors; + sector_t se = sb + cache->block_sectors; + set_flags(b, BF_IO_PENDING); + + return engine_issue(cache->engine, cache->fd, d, sb, se, b->data, b); +} + +static void issue_read(struct block *b) +{ + assert(!test_flags(b, BF_IO_PENDING)); + issue_low_level(b, DIR_READ); +} + +static void issue_write(struct block *b) +{ + assert(!test_flags(b, BF_IO_PENDING)); + //b.v_->prepare(b.data_, b.index_); + issue_low_level(b, DIR_WRITE); +} + +static void complete_io(void *context, int err) +{ + struct block *b = context; + struct bcache *cache = b->cache; + + b->error = err; + clear_flags(b, BF_IO_PENDING); + cache->nr_io_pending--; + + /* + * b is on the io_pending list, so we don't want to use unlink_block. + * Which would incorrectly adjust nr_dirty. + */ + list_del(&b->list); + + if (b->error) + list_add_tail(&b->list, &cache->errored); + + else { + clear_flags(b, BF_DIRTY); + link_block(b); + } +} + +static int wait_io(struct bcache *cache) +{ + return engine_wait(cache->engine, NULL, complete_io); +} + +/*---------------------------------------------------------------- + * High level IO handling + *--------------------------------------------------------------*/ + +static void wait_all(struct bcache *cache) +{ + while (!list_empty(&cache->io_pending)) + wait_io(cache); +} + +static void wait_specific(struct block *b) +{ + while (test_flags(b, BF_IO_PENDING)) + wait_io(b->cache); +} + +static unsigned writeback(struct bcache *cache, unsigned count) +{ + unsigned actual = 0; + struct block *b, *tmp; + + list_for_each_entry_safe (b, tmp, &cache->dirty, list) { + if (actual == count) + break; + + // We can't writeback anything that's still in use. + if (!b->ref_count) { + issue_write(b); + actual++; + } + } + + return actual; +} + +/*---------------------------------------------------------------- + * High level allocation + *--------------------------------------------------------------*/ + +static struct block *find_unused_clean_block(struct bcache *cache) +{ + struct block *b; + + list_for_each_entry (b, &cache->clean, list) { + if (!b->ref_count) { + unlink_block(b); + hash_remove(b); + return b; + } + } + + return NULL; +} + +static struct block *new_block(struct bcache *cache, block_address index) +{ + struct block *b; + + b = alloc_block(cache); + while (!b && cache->nr_locked < cache->nr_cache_blocks) { + b = find_unused_clean_block(cache); + if (!b) { + if (list_empty(&cache->io_pending)) + writeback(cache, 16); + wait_io(cache); + } + } + + if (b) { + init_list_head(&b->list); + init_list_head(&b->hash); + b->flags = 0; + b->index = index; + b->ref_count = 0; + b->error = 0; + + hash_insert(b); + } + + return b; +} + +/*---------------------------------------------------------------- + * Block reference counting + *--------------------------------------------------------------*/ +struct bcache *bcache_create(int fd, sector_t block_sectors, uint64_t on_disk_blocks, + unsigned nr_cache_blocks) +{ + int r; + struct bcache *cache; + + cache = malloc(sizeof(*cache)); + if (!cache) + return NULL; + + cache->fd = fd; + cache->block_sectors = block_sectors; + cache->nr_data_blocks = on_disk_blocks; + cache->nr_cache_blocks = nr_cache_blocks; + + cache->engine = engine_create(nr_cache_blocks < 1024u ? nr_cache_blocks : 1024u); + if (!cache->engine) { + free(cache); + return NULL; + } + + cache->nr_locked = 0; + cache->nr_dirty = 0; + cache->nr_io_pending = 0; + + init_list_head(&cache->free); + init_list_head(&cache->errored); + init_list_head(&cache->dirty); + init_list_head(&cache->clean); + init_list_head(&cache->io_pending); + + if (hash_table_init(cache, nr_cache_blocks)) { + engine_destroy(cache->engine); + free(cache); + } + + cache->read_hits = 0; + cache->read_misses = 0; + cache->write_zeroes = 0; + cache->write_hits = 0; + cache->write_misses = 0; + cache->prefetches = 0; + + r = init_free_list(cache, nr_cache_blocks); + if (r) { + engine_destroy(cache->engine); + hash_table_exit(cache); + free(cache); + } + + return cache; +} + +#define MD_BLOCK_SIZE 4096ull + +struct bcache *bcache_simple(const char *path, unsigned nr_cache_blocks) +{ + int r; + struct stat info; + struct bcache *cache; + int fd = open(path, O_DIRECT | O_EXCL | O_RDONLY); + uint64_t s; + + if (fd < 0) { + raise("couldn't open cache file"); + return NULL; + } + + r = fstat(fd, &info); + if (r < 0) { + raise("couldn't stat cache file"); + return NULL; + } + + s = info.st_size; + cache = bcache_create(fd, MD_BLOCK_SIZE >> SECTOR_SHIFT, s / MD_BLOCK_SIZE, nr_cache_blocks); + if (!cache) + close(fd); + + return cache; +} + +void bcache_destroy(struct bcache *cache) +{ + if (cache->nr_locked) + warn("some blocks are still locked\n"); + + flush_cache(cache); + wait_all(cache); + exit_free_list(cache); + hash_table_exit(cache); + engine_destroy(cache->engine); + close(cache->fd); + free(cache); +} + +// FIXME: we have to return an error code that can be turned into a Scheme +// condition. +static void check_index(struct bcache *cache, block_address index) +{ + if (index >= cache->nr_data_blocks) + raise("block out of bounds"); +} + +uint64_t get_nr_blocks(struct bcache *cache) +{ + return cache->nr_data_blocks; +} + +uint64_t get_nr_locked(struct bcache *cache) +{ + return cache->nr_locked; +} + +static void zero_block(struct block *b) +{ + b->cache->write_zeroes++; + memset(b->data, 0, b->cache->block_sectors << SECTOR_SHIFT); + set_flags(b, BF_DIRTY); +} + +static void hit(struct block *b, unsigned flags) +{ + struct bcache *cache = b->cache; + + if (flags & (GF_ZERO | GF_DIRTY)) + cache->write_hits++; + else + cache->read_hits++; + + relink(b); +} + +static void miss(struct bcache *cache, unsigned flags) +{ + if (flags & (GF_ZERO | GF_DIRTY)) + cache->write_misses++; + else + cache->read_misses++; +} + +static struct block *lookup_or_read_block(struct bcache *cache, + block_address index, unsigned flags) +{ + struct block *b = hash_lookup(cache, index); + + if (b) { + // FIXME: this is insufficient. We need to also catch a read + // lock of a write locked block. Ref count needs to distinguish. + if (b->ref_count && (flags & (GF_DIRTY | GF_ZERO))) + raise("concurrent write lock attempt"); + + if (test_flags(b, BF_IO_PENDING)) { + miss(cache, flags); + wait_specific(b); + + } else + hit(b, flags); + + unlink_block(b); + + if (flags & GF_ZERO) + zero_block(b); + + } else { + miss(cache, flags); + + b = new_block(cache, index); + if (b) { + if (flags & GF_ZERO) + zero_block(b); + + else { + issue_read(b); + wait_specific(b); + + // we know the block is clean and unerrored. + unlink_block(b); + } + } + } + + if (b && !b->error) { + if (flags & (GF_DIRTY | GF_ZERO)) + set_flags(b, BF_DIRTY); + + link_block(b); + return b; + } + + return NULL; +} + +struct block *get_block(struct bcache *cache, block_address index, unsigned flags) +{ + check_index(cache, index); + + struct block *b = lookup_or_read_block(cache, index, flags); + if (b) { + if (!b->ref_count) + cache->nr_locked++; + b->ref_count++; + + return b; + } + + raise("couldn't get block"); + return NULL; +} + +static void preemptive_writeback(struct bcache *cache) +{ + // FIXME: this ignores those blocks that are in the error state. Track + // nr_clean instead? + unsigned nr_available = cache->nr_cache_blocks - (cache->nr_dirty - cache->nr_io_pending); + if (nr_available < (WRITEBACK_LOW_THRESHOLD_PERCENT * cache->nr_cache_blocks / 100)) + writeback(cache, (WRITEBACK_HIGH_THRESHOLD_PERCENT * cache->nr_cache_blocks / 100) - nr_available); + +} + +void release_block(struct block *b) +{ + assert(b->ref_count); + + b->ref_count--; + if (!b->ref_count) + b->cache->nr_locked--; + + if (test_flags(b, BF_DIRTY)) + preemptive_writeback(b->cache); +} + +int flush_cache(struct bcache *cache) +{ + while (!list_empty(&cache->dirty)) { + struct block *b = container_of(list_pop(&cache->dirty), struct block, list); + if (b->ref_count || test_flags(b, BF_IO_PENDING)) + // The superblock may well be still locked. + continue; + + issue_write(b); + } + + wait_all(cache); + + return list_empty(&cache->errored) ? 0 : -EIO; +} + +void prefetch_block(struct bcache *cache, block_address index) +{ + check_index(cache, index); + struct block *b = hash_lookup(cache, index); + + if (!b) { + cache->prefetches++; + + b = new_block(cache, index); + if (b) + issue_read(b); + } +} + +//---------------------------------------------------------------- diff --git a/functional-tests/bcache/bcache.h b/functional-tests/bcache/bcache.h new file mode 100644 index 0000000..b5ec2fd --- /dev/null +++ b/functional-tests/bcache/bcache.h @@ -0,0 +1,69 @@ +#ifndef BCACHE_H +#define BCACHE_H + +#include + +/*----------------------------------------------------------------*/ + +typedef uint64_t block_address; +typedef uint64_t sector_t; + +struct block; +struct bcache; + +// FIXME: allow the cache to be opened read only. +struct bcache *bcache_create(int fd, sector_t block_size, + uint64_t on_disk_blocks, + unsigned nr_cache_blocks); + +/* + * A simpler way of creating a bcache that assumes 4k block size, and stats to + * get the file size. + */ +struct bcache *bcache_simple(const char *path, unsigned nr_cache_blocks); + +void bcache_destroy(struct bcache *cache); +uint64_t get_nr_blocks(struct bcache *cache); +uint64_t get_nr_locked(struct bcache *cache); + +int flush_cache(struct bcache *cache); + +struct bcache; + +struct block { + /* clients may only access these two fields */ + void *data; + uint64_t index; + + struct bcache *cache; + struct list_head list; + struct list_head hash; + + unsigned flags; + unsigned ref_count; + int error; +}; + +enum get_flags { + /* + * The block will be zeroed before get_block returns it. This + * potentially avoids a read if the block is not already in the cache. + * GF_DIRTY is implicit. + */ + GF_ZERO = (1 << 0), + + /* + * Indicates the caller is intending to change the data in the block, a + * writeback will occur after the block is released. + */ + GF_DIRTY = (1 << 1), +}; + +struct block *get_block(struct bcache *cache, block_address index, unsigned flags); +void prefetch_block(struct bcache *cache, block_address index); + +void release_block(struct block *b); + +/*----------------------------------------------------------------*/ + +#endif diff --git a/functional-tests/bcache/list.h b/functional-tests/bcache/list.h new file mode 100644 index 0000000..3cab249 --- /dev/null +++ b/functional-tests/bcache/list.h @@ -0,0 +1,216 @@ +#ifndef LIB_BLOCK_CACHE_LIST_H +#define LIB_BLOCK_CACHE_LIST_H + +#include + +/*----------------------------------------------------------------*/ + +/* + * Simple intrusive linked list code. Lifted from Linux kernel. + */ + +/** + * container_of - cast a member of a structure out to the containing structure + * @ptr: the pointer to the member. + * @type: the type of the container struct this is embedded in. + * @member: the name of the member within the struct. + * + */ +#define container_of(ptr, type, member) ({ \ + const typeof( ((type *)0)->member ) *__mptr = (ptr); \ + (type *)( (char *)__mptr - offsetof(type,member) );}) + +struct list_head { + struct list_head *next, *prev; +}; + +static inline void init_list_head(struct list_head *list) +{ + list->next = list; + list->prev = list; +} + +static inline void __list_add(struct list_head *new_, + struct list_head *prev, + struct list_head *next) +{ + next->prev = new_; + new_->next = next; + new_->prev = prev; + prev->next = new_; +} + +/** + * list_add - add a new entry + * @new_: new entry to be added + * @head: list head to add it after + * + * Insert a new entry after the specified head. + * This is good for implementing stacks. + */ +static inline void list_add(struct list_head *new_, struct list_head *head) +{ + __list_add(new_, head, head->next); +} + + +/** + * list_add_tail - add a new entry + * @new_: new entry to be added + * @head: list head to add it before + * + * Insert a new entry before the specified head. + * This is useful for implementing queues. + */ +static inline void list_add_tail(struct list_head *new_, struct list_head *head) +{ + __list_add(new_, head->prev, head); +} + +/* + * Delete a list entry by making the prev/next entries + * point to each other. + * + * This is only for internal list manipulation where we know + * the prev/next entries already! + */ +static inline void __list_del(struct list_head * prev, struct list_head * next) +{ + next->prev = prev; + prev->next = next; +} + +/** + * list_del - deletes entry from list. + * @entry: the element to delete from the list. + * Note: list_empty() on entry does not return true after this, the entry is + * in an undefined state. + */ +static inline void __list_del_entry(struct list_head *entry) +{ + __list_del(entry->prev, entry->next); +} + +static inline void list_del(struct list_head *entry) +{ + __list_del(entry->prev, entry->next); + entry->next = NULL; + entry->prev = NULL; +} + +/** + * list_del_init - deletes entry from list and reinitialize it. + * @entry: the element to delete from the list. + */ +static inline void list_del_init(struct list_head *entry) +{ + __list_del_entry(entry); + init_list_head(entry); +} + +/** + * list_move - delete from one list and add as another's head + * @list: the entry to move + * @head: the head that will precede our entry + */ +static inline void list_move(struct list_head *list, struct list_head *head) +{ + __list_del_entry(list); + list_add(list, head); +} + +/** + * list_move_tail - delete from one list and add as another's tail + * @list: the entry to move + * @head: the head that will follow our entry + */ +static inline void list_move_tail(struct list_head *list, + struct list_head *head) +{ + __list_del_entry(list); + list_add_tail(list, head); +} + +/** + * list_empty - tests whether a list is empty + * @head: the list to test. + */ +static inline int list_empty(const struct list_head *head) +{ + return head->next == head; +} + +/** + * list_entry - get the struct for this entry + * @ptr: the &struct list_head pointer. + * @type: the type of the struct this is embedded in. + * @member: the name of the list_struct within the struct. + */ +#define list_entry(ptr, type, member) \ + container_of(ptr, type, member) + +/** + * list_first_entry - get the first element from a list + * @ptr: the list head to take the element from. + * @type: the type of the struct this is embedded in. + * @member: the name of the list_struct within the struct. + * + * Note, that list is expected to be not empty. + */ +#define list_first_entry(ptr, type, member) \ + list_entry((ptr)->next, type, member) + +/** + * list_next_entry - get the next element in list + * @pos: the type * to cursor + * @member: the name of the list_struct within the struct. + */ +#define list_next_entry(pos, member) \ + list_entry((pos)->member.next, typeof(*(pos)), member) + +/** + * list_for_each - iterate over a list + * @pos: the &struct list_head to use as a loop cursor. + * @head: the head for your list. + */ +#define list_for_each(pos, head) \ + for (pos = (head)->next; pos != (head); pos = pos->next) + +/** + * list_for_each_safe - iterate over a list safe against removal of list entry + * @pos: the &struct list_head to use as a loop cursor. + * @n: another &struct list_head to use as temporary storage + * @head: the head for your list. + */ +#define list_for_each_safe(pos, n, head) \ + for (pos = (head)->next, n = pos->next; pos != (head); \ + pos = n, n = pos->next) + +/** + * list_for_each_entry - iterate over list of given type + * @pos: the type * to use as a loop cursor. + * @head: the head for your list. + * @member: the name of the list_struct within the struct. + */ +#define list_for_each_entry(pos, head, member) \ + for (pos = list_first_entry(head, typeof(*pos), member); \ + &pos->member != (head); \ + pos = list_next_entry(pos, member)) + +/** + * list_for_each_entry_safe - iterate over list of given type safe against removal of list entry + * @pos: the type * to use as a loop cursor. + * @n: another type * to use as temporary storage + * @head: the head for your list. + * @member: the name of the list_struct within the struct. + */ +#define list_for_each_entry_safe(pos, n, head, member) \ + for (pos = list_first_entry(head, typeof(*pos), member), \ + n = list_next_entry(pos, member); \ + &pos->member != (head); \ + pos = n, n = list_next_entry(n, member)) + + +/*----------------------------------------------------------------*/ + +#endif