Buffered IO interface. Signed-off-by: Mikulas Patocka --- drivers/md/Makefile | 2 drivers/md/dm-bufio.c | 759 +++++++++++++++++++++++++++++++++++++++++++++++ include/linux/dm-bufio.h | 20 + 3 files changed, 780 insertions(+), 1 deletion(-) Index: linux-2.6.31-rc7-fast/drivers/md/dm-bufio.c =================================================================== --- /dev/null 1970-01-01 00:00:00.000000000 +0000 +++ linux-2.6.31-rc7-fast/drivers/md/dm-bufio.c 2009-08-25 14:49:46.000000000 +0200 @@ -0,0 +1,759 @@ +/* + * Copyright (C) 2008 Red Hat Czech, s.r.o. + * + * Mikulas Patocka + * + * This file is released under the GPL. + */ + +#include +#include +#include +#include + +#include + +/* + * dm_bufio_client_create --- create a buffered IO cache on a given device + * dm_bufio_client_destroy --- release a buffered IO cache + * + * dm_bufio_read --- read a given block from disk. Returns pointer to data. + * Returns a pointer to dm_buffer that can be used to release the buffer + * or to make it dirty. + * dm_bufio_new --- like dm_bufio_read, but don't read anything from the disk. + * It is expected that the caller initializes the buffer and marks it + * dirty. + * dm_bufio_release --- release a reference obtained with dm_bufio_read or + * dm_bufio_new. The data pointer and dm_buffer pointer is no longer valid + * after this call. + * + * WARNING: to avoid deadlocks, the thread can hold at most one buffer. Multiple + * threads can hold each one buffer simultaneously. + * + * dm_bufio_mark_buffer_dirty --- mark a buffer dirty. It should be called after + * the buffer is modified. + * dm_bufio_write_dirty_buffers --- write all dirty buffers. Guarantees that all + * dirty buffers created prior to this call are on disk when this call + * exits. + * + * In case of memory pressure, the buffer may be written after + * dm_bufio_mark_buffer_dirty, but before dm_bufio_write_dirty_buffers. + * So, dm_bufio_write_dirty_buffers guarantees that the buffer is on-disk, + * but the actual writing may occur earlier. + * + * dm_bufio_release_move --- like dm_bufio_release, and also move the buffer to + * the new block. dm_bufio_write_dirty_buffers is needed to commit the new + * block. + * dm_bufio_drop_buffers --- clear all buffers. + */ + +/* + * When we're above threshold, start asynchronous memory reclaim. + * When we're above threshold+limit, start synchronous memory reclaim. + */ + +#define DM_BUFIO_THRESHOLD_MEMORY (8 * 1048576) +#define DM_BUFIO_LIMIT_MEMORY (9 * 1048576) + +#define DM_BUFIO_PAGE_VECS 16 + +#define DM_BUFIO_HASH_SIZE (PAGE_SIZE / sizeof(struct hlist_head) / 2) +#define DM_BUFIO_HASH(block) ((block) & (DM_BUFIO_HASH_SIZE - 1)) + +#define DM_BUFIO_BLOCK_SIZE_KMALLOC_LIMIT PAGE_SIZE + +#define B_READING 0 +#define B_WRITING 1 +#define B_DIRTY 2 + +struct dm_bufio_client { + struct list_head lru; + struct list_head dirty_lru; + struct mutex lock; + struct block_device *bdev; + unsigned block_size; + unsigned char sectors_per_block_bits; + unsigned char pages_per_block_bits; + + unsigned long n_buffers; + unsigned threshold_buffers; + unsigned limit_buffers; + + struct dm_io_client *dm_io; + + struct dm_buffer *reserved_buffer; + struct hlist_head cache_hash[DM_BUFIO_HASH_SIZE]; + wait_queue_head_t free_buffer_wait; + + int async_write_error; +}; + +#define DATA_MODE_KMALLOC 1 +#define DATA_MODE_GET_FREE_PAGES 2 +#define DATA_MODE_VMALLOC 3 + +struct dm_buffer { + struct hlist_node hash_list; + struct list_head lru_list; + sector_t block; + void *data; + char data_mode; + unsigned hold_count; + int read_error; + int write_error; + unsigned long state; + struct dm_bufio_client *c; + struct bio bio; + struct bio_vec bio_vec[DM_BUFIO_PAGE_VECS]; +}; + +/* + * Allocating buffers. + * + * Small buffers are allocated with kmalloc, to use space optimally. + * + * Large buffers: + * We use get_free_pages or vmalloc, both have their advantages and + * disadvantages. + * __get_free_pages can randomly fail, if the memory is fragmented. + * __vmalloc won't randomly fail, but vmalloc space is limited (it may be + * as low as 128M) --- so using it for caching is not appropriate. + * If the allocation may fail, we use __get_free_pages, memory fragmentation + * won't have fatal effect here, it just causes flushes of some other + * buffers and more I/O will be performed. + * If the allocation shouldn't fail, we use __vmalloc. This is only for + * the initial reserve allocation, so there's no risk of wasting + * all vmalloc space. + */ + +static void *dm_bufio_alloc_buffer_data(struct dm_bufio_client *c, gfp_t gfp_mask, char *data_mode) +{ + if (c->block_size <= DM_BUFIO_BLOCK_SIZE_KMALLOC_LIMIT) { + *data_mode = DATA_MODE_KMALLOC; + return kmalloc(c->block_size, gfp_mask); + } else if (gfp_mask & __GFP_NORETRY) { + *data_mode = DATA_MODE_GET_FREE_PAGES; + return (void *)__get_free_pages(gfp_mask, c->pages_per_block_bits); + } else { + *data_mode = DATA_MODE_VMALLOC; + return __vmalloc(c->block_size, gfp_mask, PAGE_KERNEL); + } +} + +static void dm_bufio_free_buffer_data(struct dm_bufio_client *c, void *data, char data_mode) +{ + switch (data_mode) { + + case DATA_MODE_KMALLOC: + kfree(data); + break; + case DATA_MODE_GET_FREE_PAGES: + free_pages((unsigned long)data, c->pages_per_block_bits); + break; + case DATA_MODE_VMALLOC: + vfree(data); + break; + default: + printk(KERN_CRIT "dm_bufio_free_buffer_data: bad data mode: %d", data_mode); + BUG(); + + } +} + + +/* + * Allocate buffer and its data. + */ + +static struct dm_buffer *alloc_buffer(struct dm_bufio_client *c, gfp_t gfp_mask) +{ + struct dm_buffer *b; + b = kmalloc(sizeof(struct dm_buffer), gfp_mask); + if (!b) + return NULL; + b->c = c; + b->data = dm_bufio_alloc_buffer_data(c, gfp_mask, &b->data_mode); + if (!b->data) { + kfree(b); + return NULL; + } + return b; +} + +/* + * Free buffer and its data. + */ + +static void free_buffer(struct dm_buffer *b) +{ + dm_bufio_free_buffer_data(b->c, b->data, b->data_mode); + kfree(b); +} + + +/* + * Link buffer to the hash list and clean or dirty queue. + */ + +static void link_buffer(struct dm_buffer *b, sector_t block, int dirty) +{ + struct dm_bufio_client *c = b->c; + c->n_buffers++; + b->block = block; + list_add(&b->lru_list, dirty ? &c->dirty_lru : &c->lru); + hlist_add_head(&b->hash_list, &c->cache_hash[DM_BUFIO_HASH(block)]); +} + +/* + * Unlink buffer from the hash list and dirty or clean queue. + */ + +static void unlink_buffer(struct dm_buffer *b) +{ + BUG_ON(!b->c->n_buffers); + b->c->n_buffers--; + hlist_del(&b->hash_list); + list_del(&b->lru_list); +} + +/* + * Place the buffer to the head of dirty or clean LRU queue. + */ + +static void relink_lru(struct dm_buffer *b, int dirty) +{ + struct dm_bufio_client *c = b->c; + list_del(&b->lru_list); + list_add(&b->lru_list, dirty ? &c->dirty_lru : &c->lru); +} + + +static int just_io_schedule(void *word) +{ + io_schedule(); + return 0; +} + +static void write_dirty_buffer(struct dm_buffer *b); + +/* + * Wait until any activity on the buffer finishes. + * Possibly write the buffer if it is dirty. + */ + +void clean_buffer(struct dm_buffer *b) +{ + BUG_ON(b->hold_count); + if (likely(!b->state)) /* fast case */ + return; + wait_on_bit(&b->state, B_READING, just_io_schedule, TASK_UNINTERRUPTIBLE); + write_dirty_buffer(b); + wait_on_bit(&b->state, B_WRITING, just_io_schedule, TASK_UNINTERRUPTIBLE); +} + +/* + * Find some buffer that is not held by anybody, clean it, unlink it and + * return it. + * If "wait" is zero, try less harder and don't block. + */ + +static struct dm_buffer *get_unclaimed_buffer(struct dm_bufio_client *c, int wait) +{ + struct dm_buffer *b; + list_for_each_entry_reverse(b, &c->lru, lru_list) { + cond_resched(); + BUG_ON(test_bit(B_WRITING, &b->state)); + BUG_ON(test_bit(B_DIRTY, &b->state)); + if (!b->hold_count) { + if (!wait && unlikely(test_bit(B_READING, &b->state))) + continue; + clean_buffer(b); + unlink_buffer(b); + return b; + } + } + list_for_each_entry_reverse(b, &c->dirty_lru, lru_list) { + cond_resched(); + BUG_ON(test_bit(B_READING, &b->state)); + if (!b->hold_count) { + if (!wait && (unlikely(test_bit(B_DIRTY, &b->state)) || + unlikely(test_bit(B_WRITING, &b->state)))) { + if (!test_bit(B_WRITING, &b->state)) + write_dirty_buffer(b); + continue; + } + clean_buffer(b); + unlink_buffer(b); + return b; + } + } + return NULL; +} + +/* + * Wait until some other threads releases hold count on a buffer. + */ + +static void drop_lock_and_wait_for_free_buffer(struct dm_bufio_client *c) +{ + DECLARE_WAITQUEUE(wait, current); + + add_wait_queue(&c->free_buffer_wait, &wait); + set_task_state(current, TASK_UNINTERRUPTIBLE); + mutex_unlock(&c->lock); + io_schedule(); + set_task_state(current, TASK_RUNNING); + remove_wait_queue(&c->free_buffer_wait, &wait); +} + +/* + * Allocate new buffer. If allocation is not possible, wait until some other + * thread frees a buffer. + * + * May drop the lock and regain it. + */ + +static struct dm_buffer *alloc_buffer_wait(struct dm_bufio_client *c) +{ + struct dm_buffer *b; + +retry: + b = alloc_buffer(c, GFP_NOIO | __GFP_NOMEMALLOC | __GFP_NORETRY | __GFP_NOWARN); + if (b) + return b; + + if (c->reserved_buffer) { + b = c->reserved_buffer; + c->reserved_buffer = NULL; + return b; + } + + b = get_unclaimed_buffer(c, 1); + if (b) + return b; + + drop_lock_and_wait_for_free_buffer(c); + mutex_lock(&c->lock); + goto retry; +} + +/* + * Free a buffer and wake other threads waiting for free buffers. + */ + +static void free_buffer_wake(struct dm_buffer *b) +{ + struct dm_bufio_client *c = b->c; + + if (unlikely(!c->reserved_buffer)) + c->reserved_buffer = b; + else + free_buffer(b); + + wake_up(&c->free_buffer_wait); + + cond_resched(); +} + +/* + * Check if we're over watermark. If we are, start freeing buffers. + * If we're over "limit_buffers", wait until buffers become available. + */ + +static void check_watermark(struct dm_bufio_client *c) +{ + while (c->n_buffers > c->threshold_buffers) { + struct dm_buffer *b; + b = get_unclaimed_buffer(c, c->n_buffers > c->limit_buffers); + if (!b) + return; + free_buffer_wake(b); + } +} + +static void dm_bufio_dmio_complete(unsigned long error, void *context); + +static void dm_bufio_submit_io(struct dm_buffer *b, int rw, sector_t block, bio_end_io_t *end_io) +{ + if (b->c->block_size <= DM_BUFIO_PAGE_VECS * PAGE_SIZE && b->data_mode != DATA_MODE_VMALLOC) { + char *ptr; + int len; + bio_init(&b->bio); + b->bio.bi_io_vec = b->bio_vec; + b->bio.bi_max_vecs = DM_BUFIO_PAGE_VECS; + b->bio.bi_sector = b->block << b->c->sectors_per_block_bits; + b->bio.bi_bdev = b->c->bdev; + b->bio.bi_end_io = end_io; + + /* + * we assume that if len >= PAGE_SIZE, ptr is page-aligned, + * if len < PAGE_SIZE, the buffer doesn't cross page boundary. + */ + ptr = b->data; + len = b->c->block_size; + do { + if (!bio_add_page(&b->bio, virt_to_page(ptr), len < PAGE_SIZE ? len : PAGE_SIZE, virt_to_phys(ptr) & (PAGE_SIZE - 1))) { + BUG_ON(b->c->block_size <= PAGE_SIZE); + goto use_dmio; + } + len -= PAGE_SIZE; + ptr += PAGE_SIZE; + } while (len > 0); + submit_bio(rw, &b->bio); + } else +use_dmio : { + int r; + struct dm_io_request io_req = { + .bi_rw = rw, + .notify.fn = dm_bufio_dmio_complete, + .notify.context = b, + .client = b->c->dm_io, + }; + struct dm_io_region region = { + .bdev = b->c->bdev, + .sector = b->block << b->c->sectors_per_block_bits, + .count = b->c->block_size >> SECTOR_SHIFT, + }; + if (b->data_mode != DATA_MODE_VMALLOC) { + io_req.mem.type = DM_IO_KMEM; + io_req.mem.ptr.addr = b->data; + } else { + io_req.mem.type = DM_IO_VMA; + io_req.mem.ptr.vma = b->data; + } + b->bio.bi_end_io = end_io; + r = dm_io(&io_req, 1, ®ion, NULL); + if (unlikely(r)) + end_io(&b->bio, r); + } +} + +static void dm_bufio_dmio_complete(unsigned long error, void *context) +{ + struct dm_buffer *b = context; + int err = 0; + if (unlikely(error != 0)) + err = -EIO; + b->bio.bi_end_io(&b->bio, err); +} + +static struct dm_buffer *dm_bufio_find(struct dm_bufio_client *c, sector_t block) +{ + struct dm_buffer *b; + struct hlist_node *hn; + hlist_for_each_entry(b, hn, &c->cache_hash[DM_BUFIO_HASH(block)], hash_list) { + cond_resched(); + if (b->block == block) + return b; + } + + return NULL; +} + +static void read_endio(struct bio *bio, int error); + +static void *dm_bufio_new_read(struct dm_bufio_client *c, sector_t block, struct dm_buffer **bp, int read) +{ + struct dm_buffer *b, *new_b = NULL; + + cond_resched(); + mutex_lock(&c->lock); +retry_search: + b = dm_bufio_find(c, block); + if (b) { + if (new_b) + free_buffer_wake(new_b); + b->hold_count++; + relink_lru(b, test_bit(B_DIRTY, &b->state) || test_bit(B_WRITING, &b->state)); +unlock_wait_ret: + mutex_unlock(&c->lock); +wait_ret: + wait_on_bit(&b->state, B_READING, just_io_schedule, TASK_UNINTERRUPTIBLE); + if (b->read_error) { + int error = b->read_error; + dm_bufio_release(b); + return ERR_PTR(error); + } + *bp = b; + return b->data; + } + if (!new_b) { + new_b = alloc_buffer_wait(c); + goto retry_search; + } + + check_watermark(c); + + b = new_b; + b->hold_count = 1; + b->read_error = 0; + b->write_error = 0; + link_buffer(b, block, 0); + + if (!read) { + b->state = 0; + goto unlock_wait_ret; + } + + b->state = 1 << B_READING; + + mutex_unlock(&c->lock); + + dm_bufio_submit_io(b, READ, b->block, read_endio); + + goto wait_ret; +} + +void *dm_bufio_read(struct dm_bufio_client *c, sector_t block, struct dm_buffer **bp) +{ + return dm_bufio_new_read(c, block, bp, 1); +} +EXPORT_SYMBOL(dm_bufio_read); + +void *dm_bufio_new(struct dm_bufio_client *c, sector_t block, struct dm_buffer **bp) +{ + return dm_bufio_new_read(c, block, bp, 0); +} +EXPORT_SYMBOL(dm_bufio_new); + +static void read_endio(struct bio *bio, int error) +{ + struct dm_buffer *b = container_of(bio, struct dm_buffer, bio); + b->read_error = error; + BUG_ON(!test_bit(B_READING, &b->state)); + smp_mb__before_clear_bit(); + clear_bit(B_READING, &b->state); + smp_mb__after_clear_bit(); + wake_up_bit(&b->state, B_READING); +} + +void dm_bufio_release(struct dm_buffer *b) +{ + struct dm_bufio_client *c = b->c; + mutex_lock(&c->lock); + BUG_ON(!b->hold_count); + BUG_ON(test_bit(B_READING, &b->state)); + b->hold_count--; + if (!b->hold_count) { + wake_up(&c->free_buffer_wait); + if ((b->read_error || b->write_error) && !test_bit(B_WRITING, &b->state) && !test_bit(B_DIRTY, &b->state)) { + unlink_buffer(b); + free_buffer_wake(b); + } + } + mutex_unlock(&c->lock); +} +EXPORT_SYMBOL(dm_bufio_release); + +void dm_bufio_mark_buffer_dirty(struct dm_buffer *b) +{ + struct dm_bufio_client *c = b->c; + + mutex_lock(&c->lock); + + if (!test_and_set_bit(B_DIRTY, &b->state)) + relink_lru(b, 1); + + mutex_unlock(&c->lock); +} +EXPORT_SYMBOL(dm_bufio_mark_buffer_dirty); + +static void write_endio(struct bio *bio, int error); + +static void write_dirty_buffer(struct dm_buffer *b) +{ + if (!test_bit(B_DIRTY, &b->state)) + return; + clear_bit(B_DIRTY, &b->state); + wait_on_bit_lock(&b->state, B_WRITING, just_io_schedule, TASK_UNINTERRUPTIBLE); + dm_bufio_submit_io(b, WRITE, b->block, write_endio); +} + +static void write_endio(struct bio *bio, int error) +{ + struct dm_buffer *b = container_of(bio, struct dm_buffer, bio); + b->write_error = error; + if (unlikely(error)) { + struct dm_bufio_client *c = b->c; + cmpxchg(&c->async_write_error, 0, error); + } + BUG_ON(!test_bit(B_WRITING, &b->state)); + smp_mb__before_clear_bit(); + clear_bit(B_WRITING, &b->state); + smp_mb__after_clear_bit(); + wake_up_bit(&b->state, B_WRITING); +} + +static void write_dirty_buffers_async(struct dm_bufio_client *c) +{ + struct dm_buffer *b; + list_for_each_entry_reverse(b, &c->dirty_lru, lru_list) { + cond_resched(); + BUG_ON(test_bit(B_READING, &b->state)); + write_dirty_buffer(b); + } +} + +int dm_bufio_write_dirty_buffers(struct dm_bufio_client *c) +{ + struct dm_buffer *b, *tmp; + mutex_lock(&c->lock); + write_dirty_buffers_async(c); + mutex_unlock(&c->lock); + mutex_lock(&c->lock); + list_for_each_entry_safe_reverse(b, tmp, &c->dirty_lru, lru_list) { + cond_resched(); + BUG_ON(test_bit(B_READING, &b->state)); + if (test_bit(B_WRITING, &b->state)) { + b->hold_count++; + mutex_unlock(&c->lock); + wait_on_bit(&b->state, B_WRITING, just_io_schedule, TASK_UNINTERRUPTIBLE); + mutex_lock(&c->lock); + b->hold_count--; + } + if (!test_bit(B_DIRTY, &b->state) && !test_bit(B_WRITING, &b->state)) + relink_lru(b, 0); + } + wake_up(&c->free_buffer_wait); + mutex_unlock(&c->lock); + return xchg(&c->async_write_error, 0); +} +EXPORT_SYMBOL(dm_bufio_write_dirty_buffers); + +void dm_bufio_release_move(struct dm_buffer *b, sector_t new_block) +{ + struct dm_bufio_client *c = b->c; + struct dm_buffer *underlying; + +retry: + mutex_lock(&c->lock); + + underlying = dm_bufio_find(c, new_block); + if (unlikely(underlying != NULL)) { + if (underlying->hold_count) { + drop_lock_and_wait_for_free_buffer(c); + goto retry; + } + clean_buffer(underlying); + unlink_buffer(underlying); + free_buffer_wake(underlying); + } + + BUG_ON(!b->hold_count); + BUG_ON(test_bit(B_READING, &b->state)); + write_dirty_buffer(b); + if (b->hold_count == 1) { + wait_on_bit(&b->state, B_WRITING, just_io_schedule, TASK_UNINTERRUPTIBLE); + set_bit(B_DIRTY, &b->state); + unlink_buffer(b); + link_buffer(b, new_block, 1); + } else { + wait_on_bit_lock(&b->state, B_WRITING, just_io_schedule, TASK_UNINTERRUPTIBLE); + dm_bufio_submit_io(b, WRITE, new_block, write_endio); + wait_on_bit(&b->state, B_WRITING, just_io_schedule, TASK_UNINTERRUPTIBLE); + } + mutex_unlock(&c->lock); + dm_bufio_release(b); +} +EXPORT_SYMBOL(dm_bufio_release_move); + +void dm_bufio_drop_buffers(struct dm_bufio_client *c) +{ + struct dm_buffer *b; + mutex_lock(&c->lock); + write_dirty_buffers_async(c); + while ((b = get_unclaimed_buffer(c, 1))) + free_buffer_wake(b); + BUG_ON(!list_empty(&c->lru)); + BUG_ON(!list_empty(&c->dirty_lru)); + mutex_unlock(&c->lock); +} +EXPORT_SYMBOL(dm_bufio_drop_buffers); + +struct dm_bufio_client *dm_bufio_client_create(struct block_device *bdev, unsigned block_size, unsigned flags, __u64 cache_threshold, __u64 cache_limit) +{ + int r; + struct dm_bufio_client *c; + unsigned i; + + BUG_ON(block_size < 1 << SECTOR_SHIFT || (block_size & (block_size - 1))); + + c = kmalloc(sizeof(*c), GFP_KERNEL); + if (!c) { + r = -ENOMEM; + goto bad_client; + } + + c->bdev = bdev; + c->block_size = block_size; + c->sectors_per_block_bits = ffs(block_size) - 1 - SECTOR_SHIFT; + c->pages_per_block_bits = ffs(block_size) - 1 >= PAGE_SHIFT ? ffs(block_size) - 1 - PAGE_SHIFT : 0; + INIT_LIST_HEAD(&c->lru); + INIT_LIST_HEAD(&c->dirty_lru); + for (i = 0; i < DM_BUFIO_HASH_SIZE; i++) + INIT_HLIST_HEAD(&c->cache_hash[i]); + mutex_init(&c->lock); + c->n_buffers = 0; + + if (!cache_limit) + cache_limit = DM_BUFIO_LIMIT_MEMORY; + c->limit_buffers = cache_limit >> (c->sectors_per_block_bits + SECTOR_SHIFT); + if (!c->limit_buffers) + c->limit_buffers = 1; + + if (!cache_threshold) + cache_threshold = DM_BUFIO_THRESHOLD_MEMORY; + if (cache_threshold > cache_limit) + cache_threshold = cache_limit; + c->threshold_buffers = cache_threshold >> (c->sectors_per_block_bits + SECTOR_SHIFT); + if (!c->threshold_buffers) + c->threshold_buffers = 1; + + /*printk("%d %d\n", c->limit_buffers, c->threshold_buffers);*/ + + init_waitqueue_head(&c->free_buffer_wait); + c->async_write_error = 0; + + c->dm_io = NULL; + if (block_size > PAGE_SIZE) { + /* This is not really hard limit, just a mempool size */ + unsigned num_pages = (block_size + PAGE_SIZE - 1) / PAGE_SIZE; + c->dm_io = dm_io_client_create(num_pages); + if (IS_ERR(c->dm_io)) { + r = PTR_ERR(c->dm_io); + goto bad_dm_io; + } + } + + c->reserved_buffer = alloc_buffer(c, GFP_KERNEL); + if (!c->reserved_buffer) { + r = -ENOMEM; + goto bad_buffer; + } + + return c; + +bad_buffer: + if (c->dm_io) + dm_io_client_destroy(c->dm_io); +bad_dm_io: + kfree(c); +bad_client: + return ERR_PTR(r); +} +EXPORT_SYMBOL(dm_bufio_client_create); + +void dm_bufio_client_destroy(struct dm_bufio_client *c) +{ + unsigned i; + dm_bufio_drop_buffers(c); + for (i = 0; i < DM_BUFIO_HASH_SIZE; i++) + BUG_ON(!hlist_empty(&c->cache_hash[i])); + BUG_ON(!c->reserved_buffer); + free_buffer(c->reserved_buffer); + BUG_ON(c->n_buffers != 0); + if (c->dm_io) + dm_io_client_destroy(c->dm_io); + kfree(c); +} +EXPORT_SYMBOL(dm_bufio_client_destroy); Index: linux-2.6.31-rc7-fast/drivers/md/Makefile =================================================================== --- linux-2.6.31-rc7-fast.orig/drivers/md/Makefile 2009-08-25 14:42:21.000000000 +0200 +++ linux-2.6.31-rc7-fast/drivers/md/Makefile 2009-08-25 14:42:25.000000000 +0200 @@ -3,7 +3,7 @@ # dm-mod-y += dm.o dm-table.o dm-target.o dm-linear.o dm-stripe.o \ - dm-ioctl.o dm-io.o dm-kcopyd.o dm-sysfs.o + dm-ioctl.o dm-io.o dm-kcopyd.o dm-bufio.o dm-sysfs.o dm-multipath-y += dm-path-selector.o dm-mpath.o dm-snapshot-y += dm-snap.o dm-exception-store.o dm-snap-transient.o \ dm-snap-persistent.o Index: linux-2.6.31-rc7-fast/include/linux/dm-bufio.h =================================================================== --- /dev/null 1970-01-01 00:00:00.000000000 +0000 +++ linux-2.6.31-rc7-fast/include/linux/dm-bufio.h 2009-08-25 14:42:25.000000000 +0200 @@ -0,0 +1,20 @@ +#ifndef _LINUX_DM_BUFIO_H +#define _LINUX_DM_BUFIO_H + +struct dm_bufio_client; +struct dm_buffer; + +void *dm_bufio_read(struct dm_bufio_client *c, sector_t block, struct dm_buffer **bp); +void *dm_bufio_new(struct dm_bufio_client *c, sector_t block, struct dm_buffer **bp); +void dm_bufio_release(struct dm_buffer *b); + +void dm_bufio_mark_buffer_dirty(struct dm_buffer *b); +int dm_bufio_write_dirty_buffers(struct dm_bufio_client *c); + +void dm_bufio_release_move(struct dm_buffer *b, sector_t new_block); + +struct dm_bufio_client *dm_bufio_client_create(struct block_device *bdev, unsigned block_size, unsigned flags, __u64 cache_threshold, __u64 cache_limit); +void dm_bufio_client_destroy(struct dm_bufio_client *c); +void dm_bufio_drop_buffers(struct dm_bufio_client *c); + +#endif