New snapshot implementation. This implementation has shared storage and unlimited number of snapshots. The work is split to two modules: dm-multisnapshot.ko - the general module dm-store-mikulas.ko - the snapshot store The modularity allows to load other snapshot stores. Usage: Create two logical volumes, one for origin and one for snapshots. (assume /dev/mapper/vg1-lv1 for origin and /dev/mapper/vg1-lv2 for snapshot in these examples) Clear the first sector of the snapshot volume: dd if=/dev/zero of=/dev/mapper/vg1-lv2 bs=4096 count=1 Load the shared snapshot driver: echo 0 `blockdev --getsize /dev/mapper/vg1-lv1` multisnapshot mikulas /dev/mapper/vg1-lv1 /dev/mapper/vg1-lv2 4096|dmsetup create ms (4096 is the chunk size. You can place different number there) This creates the origin store on /dev/mapper/ms. If the store was zeroed, it creates new structure, otherwise it loads existing structure. Once this is done, you should no longer access /dev/mapper/vg1-lv1 and /dev/mapper/vg1-lv2 and only use /dev/mapper/ms. Create new snapshot: dmsetup message /dev/mapper/ms 0 create (snapshots have IDs assigned from 0 upwards --- it prints the newly created ID to syslog ... in the final version it will use status to publish the ID) Attach the snapshot: echo 0 `blockdev --getsize /dev/mapper/vg1-lv1` multisnap-snap /dev/mapper/vg1-lv1 0|dmsetup create ms0 (that '0' is the snapshot id ... you can use different number) This attaches the snapshot '0' on /dev/mapper/ms0 See status: dmsetup status prints these information about the multisnapshot device: - 0 on active storage, error number on error (ENOSPC, EIO, etc.) - the last created snapshot number - total number of chunks on the device - total number of allocated chunks - a number of chunks allocated for metadata - a number of snapshots - existing snapshot IDs Unload it: dmsetup remove ms dmsetup remove ms0 ... etc. (note, once you unload the origin, the snapshots become inaccessible - the devices exist but they return -EIO on everything) Signed-off-by: Mikulas Patocka --- drivers/md/Kconfig | 21 drivers/md/Makefile | 7 drivers/md/dm-multisnap-alloc.c | 384 +++++++++++ drivers/md/dm-multisnap-blocks.c | 121 +++ drivers/md/dm-multisnap-btree.c | 454 +++++++++++++ drivers/md/dm-multisnap-commit.c | 184 +++++ drivers/md/dm-multisnap-io.c | 186 +++++ drivers/md/dm-multisnap-mikulas-struct.h | 118 +++ drivers/md/dm-multisnap-mikulas.c | 418 ++++++++++++ drivers/md/dm-multisnap-mikulas.h | 149 ++++ drivers/md/dm-multisnap-snaps.c | 190 +++++ drivers/md/dm-multisnap.c | 1034 +++++++++++++++++++++++++++++++ drivers/md/dm-multisnap.h | 178 +++++ 13 files changed, 3444 insertions(+) Index: linux-2.6.29-rc3-devel/drivers/md/Kconfig =================================================================== --- linux-2.6.29-rc3-devel.orig/drivers/md/Kconfig 2009-02-03 07:03:52.000000000 +0100 +++ linux-2.6.29-rc3-devel/drivers/md/Kconfig 2009-02-03 09:08:56.000000000 +0100 @@ -258,6 +258,27 @@ config DM_SNAPSHOT ---help--- Allow volume managers to take writable snapshots of a device. +config DM_MULTISNAPSHOT + tristate "Multisnapshot target" + depends on BLK_DEV_DM + ---help--- + A new implementation of snapshots allowing sharing storage + between several snapshots. + + A submenu allows to select a specific shared snapshot store + driver. + +config DM_MULTISNAPSHOT_MIKULAS + tristate "Mikulas' snapshot store" + depends on DM_MULTISNAPSHOT + ---help--- + Mikulas Patocka's snapshot store. + + A log-structured storage allowing unlimited number of snapshots. + + This is work under development. So far it doesn't support snapshot + deletion and reclaiming of allocated space. + config DM_MIRROR tristate "Mirror target" depends on BLK_DEV_DM Index: linux-2.6.29-rc3-devel/drivers/md/Makefile =================================================================== --- linux-2.6.29-rc3-devel.orig/drivers/md/Makefile 2009-02-03 07:03:59.000000000 +0100 +++ linux-2.6.29-rc3-devel/drivers/md/Makefile 2009-02-03 09:08:56.000000000 +0100 @@ -7,6 +7,11 @@ dm-mod-objs := dm.o dm-table.o dm-target dm-multipath-objs := dm-path-selector.o dm-mpath.o dm-snapshot-objs := dm-snap.o dm-exception-store.o dm-snap-transient.o \ dm-snap-persistent.o +dm-multisnapshot-objs := dm-multisnap.o +dm-store-mikulas-objs := dm-multisnap-mikulas.o dm-multisnap-alloc.o \ + dm-multisnap-blocks.o dm-multisnap-btree.o \ + dm-multisnap-commit.o dm-multisnap-io.o \ + dm-multisnap-snaps.o dm-multisnap-blocks.o dm-mirror-objs := dm-raid1.o md-mod-objs := md.o bitmap.o raid456-objs := raid5.o raid6algos.o raid6recov.o raid6tables.o \ @@ -36,6 +41,8 @@ obj-$(CONFIG_DM_DELAY) += dm-delay.o obj-$(CONFIG_DM_LOOP) += dm-loop.o obj-$(CONFIG_DM_MULTIPATH) += dm-multipath.o dm-round-robin.o obj-$(CONFIG_DM_SNAPSHOT) += dm-snapshot.o +obj-$(CONFIG_DM_MULTISNAPSHOT) += dm-multisnapshot.o +obj-$(CONFIG_DM_MULTISNAPSHOT_MIKULAS) += dm-store-mikulas.o obj-$(CONFIG_DM_MIRROR) += dm-mirror.o dm-log.o dm-region-hash.o obj-$(CONFIG_DM_ZERO) += dm-zero.o Index: linux-2.6.29-rc3-devel/drivers/md/dm-multisnap.c =================================================================== --- /dev/null 1970-01-01 00:00:00.000000000 +0000 +++ linux-2.6.29-rc3-devel/drivers/md/dm-multisnap.c 2009-02-03 09:08:25.000000000 +0100 @@ -0,0 +1,1034 @@ +/* + * Copyright (C) 2008 Red Hat Czech, s.r.o. + * + * Mikulas Patocka + * + * This file is released under the GPL. + */ + +#include "dm-multisnap.h" + +#define MESG_STR(x) x, sizeof(x) + +static void dm_multisnap_process_bios(struct dm_multisnap *s); + +void dm_multisnap_set_error(struct dm_multisnap *s, int error) +{ + if (!s->error) + s->error = error; +} +EXPORT_SYMBOL(dm_multisnap_set_error); + +int dm_multisnap_has_error(struct dm_multisnap *s) +{ + return s->error; +} +EXPORT_SYMBOL(dm_multisnap_has_error); + +static DEFINE_MUTEX(all_multisnapshots_lock); +static LIST_HEAD(all_multisnapshots); + +static chunk_t sector_to_chunk(struct dm_multisnap *s, sector_t sector) +{ + return sector >> (s->chunk_shift - SECTOR_SHIFT); +} + +static sector_t chunk_to_sector(struct dm_multisnap *s, chunk_t chunk) +{ + return chunk << (s->chunk_shift - SECTOR_SHIFT); +} + +/* --- bio list --- */ + +static DEFINE_SPINLOCK(dm_multisnap_bio_list_lock); + +static void wakeup_kmultisnapd(struct dm_multisnap *s) +{ + queue_work(s->wq, &s->work); +} + +static void dm_multisnap_enqueue_bio_unlocked(struct dm_multisnap *s, struct bio *bio) +{ + bio_list_add(&s->bios, bio); +} + +static void dm_multisnap_enqueue_bio(struct dm_multisnap *s, struct bio *bio) +{ + spin_lock(&dm_multisnap_bio_list_lock); + dm_multisnap_enqueue_bio_unlocked(s, bio); + spin_unlock(&dm_multisnap_bio_list_lock); +} + +static void dm_multisnap_enqueue_bio_list(struct dm_multisnap *s, struct bio_list *bl) +{ + struct bio *bio; + while ((bio = bio_list_pop(bl))) + dm_multisnap_enqueue_bio(s, bio); +} + +/* --- pending_exception_cache --- */ + +static struct kmem_cache *dm_multisnap_pending_exception_cache; + +#define GFP_PENDING_EXCEPTION GFP_NOIO + +static void dm_multisnap_pending_exception_ctor(void *pe_) +{ + struct dm_multisnap_pending_exception *pe = pe_; + bio_list_init(&pe->bios); +} + +static struct dm_multisnap_pending_exception *dm_multisnap_alloc_pending_exception(struct dm_multisnap *s, chunk_t chunk) +{ + struct dm_multisnap_pending_exception *pe; + /* + * Warning, we don't want to wait. Because we are holding master_lock + * and taking this lock is needed to complete the exception. + * + * If an allocation failure happens, we must go up, drop the lock, + * try dummy mempool allocation and go here again. + */ + pe = mempool_alloc(s->pending_pool, GFP_PENDING_EXCEPTION & ~__GFP_WAIT); + if (unlikely(!pe)) + return NULL; + + pe->s = s; + pe->chunk = chunk; + hlist_add_head(&pe->hash_list, &s->pending_hash[PENDING_HASH(chunk)]); + return pe; +} + +static void dm_multisnap_free_pending_exception(struct dm_multisnap_pending_exception *pe) +{ + hlist_del(&pe->hash_list); + mempool_free(pe, pe->s->pending_pool); +} + +static void dm_multisnap_wait_for_pending_exception(struct dm_multisnap *s) +{ + /* + * Wait until there is something in the mempool. Free it immediatelly. + */ + struct dm_multisnap_pending_exception *pe; + + pe = mempool_alloc(s->pending_pool, GFP_PENDING_EXCEPTION | __GFP_WAIT); + mempool_free(pe, s->pending_pool); +} + +static int check_pending_io(struct dm_multisnap *s, struct bio *bio, chunk_t chunk, snapid_t snapid) +{ + struct dm_multisnap_pending_exception *pe; + struct hlist_node *hn; + hlist_for_each_entry(pe, hn, &s->pending_hash[PENDING_HASH(chunk)], hash_list) { + if (pe->chunk == chunk) { + int i; + if (snapid == SNAPID_T_ORIGIN) + goto conflict; + for (i = 0; i < pe->n_descs; i++) { + if (s->store->check_conflict(s, &pe->desc[i], snapid)) + goto conflict; + } + } + cond_resched(); + } + return 0; + +conflict: + bio_list_add(&pe->bios, bio); + return 1; +} + +/* --- kcopyd callback --- */ + +static void remap_callback(int read_err, unsigned long write_err, void *pe_) +{ + struct dm_multisnap_pending_exception *pe = pe_; + struct dm_multisnap *s = pe->s; + + if (unlikely((read_err | write_err) != 0)) { + DMERR("remap_callback: kcopyd I/O error: %d, %lx", read_err, write_err); + /* !!! FIXME: drop the snapshot ? */ + } + + list_add_tail(&pe->list, &s->pes_waiting_for_commit); + + if (atomic_dec_and_test(&s->n_kcopyd_jobs)) { + + /* We need to commit stuff */ + mutex_lock(&s->master_lock); + if (unlikely(atomic_read(&s->n_kcopyd_jobs))) { + /* Not yet ... kmultisnapd has just added something */ + mutex_unlock(&s->master_lock); + return; + } + + s->store->commit(s); + + do { + pe = container_of(s->pes_waiting_for_commit.next, struct dm_multisnap_pending_exception, list); + list_del(&pe->list); + dm_multisnap_enqueue_bio_list(s, &pe->bios); + dm_multisnap_free_pending_exception(pe); + } while (!list_empty(&s->pes_waiting_for_commit)); + + /* + * Process the bios that we have just added to the queue. + * It's faster to process them now than to hand them over to + * kmultisnapd. + */ + dm_multisnap_process_bios(s); + + mutex_unlock(&s->master_lock); + + blk_unplug(bdev_get_queue(s->origin->bdev)); + blk_unplug(bdev_get_queue(s->snapshot->bdev)); + } +} + +static void drain_kcopyd_jobs(struct dm_multisnap *s) +{ + while (atomic_read(&s->n_kcopyd_jobs)) + msleep(1); + smp_mb(); +} + +static void dispatch_kcopyd(struct dm_multisnap *s, struct dm_multisnap_pending_exception *pe, int from_snapshot, chunk_t chunk, struct bio *bio, struct dm_io_region *dests, unsigned n_dests) +{ + unsigned i; + struct dm_io_region src; + sector_t origin_sectors = i_size_read(s->origin->bdev->bd_inode) >> SECTOR_SHIFT; + + pe->n_descs = n_dests; + + bio_list_add(&pe->bios, bio); + + src.bdev = likely(!from_snapshot) ? s->origin->bdev : s->snapshot->bdev; + src.sector = chunk_to_sector(s, chunk); + src.count = s->chunk_size >> SECTOR_SHIFT; + + if (likely(!from_snapshot) && unlikely(src.sector + src.count > origin_sectors)) { + BUG_ON(src.sector >= origin_sectors); + src.count = origin_sectors - src.sector; + for (i = 0; i < pe->n_descs; i++) + dests[i].count = src.count; + } + + atomic_inc(&s->n_kcopyd_jobs); + + dm_kcopyd_copy(s->kcopyd, &src, n_dests, dests, 0, remap_callback, pe); +} + +/* --- bio processing --- */ + +static void do_origin_write(struct dm_multisnap *s, struct bio *bio) +{ + int r; + unsigned i; + chunk_t chunk, new_chunk; + struct dm_multisnap_pending_exception *pe; + struct dm_io_region dests[MAX_CHUNKS_TO_REMAP]; + + /* reads are processed directly in multisnap_origin_map */ + BUG_ON(bio_rw(bio) != WRITE); + + if (unlikely(dm_multisnap_has_error(s))) + goto err_endio; + + s->store->reset_query(s); + + chunk = sector_to_chunk(s, bio->bi_sector); + + r = s->store->query_next_remap(s, chunk); + if (unlikely(r < 0)) + goto err_endio; + + if (likely(!r)) { + /* There is nothing to remap */ + + if (unlikely(check_pending_io(s, bio, chunk, SNAPID_T_ORIGIN))) + return; + bio->bi_bdev = s->origin->bdev; + generic_make_request(bio); + return; + } + + pe = dm_multisnap_alloc_pending_exception(s, chunk); + if (unlikely(!pe)) { + s->pending_mempool_allocation_failed = 1; + dm_multisnap_enqueue_bio(s, bio); + return; + } + + i = 0; + goto midcycle; + for (; i < MAX_CHUNKS_TO_REMAP; i++) { + r = s->store->query_next_remap(s, chunk); + if (unlikely(r < 0)) + goto free_err_endio; + if (likely(!r)) + break; + +midcycle: + s->store->add_next_remap(s, &pe->desc[i], &new_chunk); + if (unlikely(dm_multisnap_has_error(s))) + goto free_err_endio; + + dests[i].bdev = s->snapshot->bdev; + dests[i].sector = chunk_to_sector(s, new_chunk); + dests[i].count = s->chunk_size >> SECTOR_SHIFT; + } + + dispatch_kcopyd(s, pe, 0, chunk, bio, dests, i); + return; + +free_err_endio: + dm_multisnap_free_pending_exception(pe); +err_endio: + r = -EIO; /* !!! FIXME: maybe allow it, if we drop snapshot store */ + bio_endio(bio, r); + return; +} + +static void do_snapshot_io(struct dm_multisnap *s, struct bio *bio, snapid_t id) +{ + chunk_t chunk, result, copy_from; + int r; + struct dm_multisnap_pending_exception *pe; + struct dm_io_region dest; + + if (unlikely(bio_rw(bio) == WRITE) && unlikely(!s->store->make_chunk_writeable)) + goto err_endio; + + if (unlikely(dm_multisnap_has_error(s))) + goto err_endio; + + chunk = sector_to_chunk(s, bio->bi_sector); + r = s->store->find_snapshot_chunk(s, id, chunk, &result); + if (unlikely(r < 0)) + goto err_endio; + + if (!r) { + if (unlikely(bio_rw(bio) == WRITE)) { + pe = dm_multisnap_alloc_pending_exception(s, chunk); + if (unlikely(!pe)) + goto failed_pe_allocation; + + s->store->add_next_remap(s, &pe->desc[0], &result); + if (unlikely(dm_multisnap_has_error(s))) + goto free_err_endio; + + dest.bdev = s->snapshot->bdev; + dest.sector = chunk_to_sector(s, result); + dest.count = s->chunk_size >> SECTOR_SHIFT; + + dispatch_kcopyd(s, pe, 0, chunk, bio, &dest, 1); + return; + } + + /* not found in the snapshot */ + /* !!! FIXME: track i/o in-progress */ + bio->bi_bdev = s->origin->bdev; + } else { + if (unlikely(check_pending_io(s, bio, chunk, id))) + return; + + if (unlikely(bio_rw(bio) == WRITE) && r == 1) { + copy_from = result; + + pe = dm_multisnap_alloc_pending_exception(s, chunk); + if (unlikely(!pe)) + goto failed_pe_allocation; + + s->store->make_chunk_writeable(s, &pe->desc[0], &result); + if (unlikely(dm_multisnap_has_error(s))) + goto free_err_endio; + + dest.bdev = s->snapshot->bdev; + dest.sector = chunk_to_sector(s, result); + dest.count = s->chunk_size >> SECTOR_SHIFT; + + dispatch_kcopyd(s, pe, 1, copy_from, bio, &dest, 1); + return; + } + + bio->bi_bdev = s->snapshot->bdev; + bio->bi_sector &= (s->chunk_size >> SECTOR_SHIFT) - 1; + bio->bi_sector |= chunk_to_sector(s, result); + } + generic_make_request(bio); + return; + +free_err_endio: + dm_multisnap_free_pending_exception(pe); +err_endio: + r = -EIO; + bio_endio(bio, r); + return; + +failed_pe_allocation: + s->pending_mempool_allocation_failed = 1; + dm_multisnap_enqueue_bio(s, bio); + return; +} + +static void dm_multisnap_process_bios(struct dm_multisnap *s) +{ + struct bio *bio; + sector_t origin_sectors; + +again: + cond_resched(); + + spin_lock(&dm_multisnap_bio_list_lock); + bio = bio_list_pop(&s->bios); + spin_unlock(&dm_multisnap_bio_list_lock); + + if (unlikely(!bio)) + return; + + origin_sectors = i_size_read(s->origin->bdev->bd_inode) >> SECTOR_SHIFT; + if (bio->bi_sector + (bio->bi_size >> SECTOR_SHIFT) > origin_sectors) { + DMERR("dm_multisnap_process_bios: access out of device, flags %lx, sector %Lx, size %x, origin sectors %Lx", bio->bi_flags, (unsigned long long)bio->bi_sector, bio->bi_size, (unsigned long long)origin_sectors); + bio_endio(bio, -EIO); + goto next_bio; + } + + if (likely(bio->bi_phys_segments == SNAPID_T_ORIGIN)) + do_origin_write(s, bio); + else + do_snapshot_io(s, bio, bio->bi_phys_segments); + +next_bio: + if (!bio_list_empty(&s->bios)) { + if (likely(!s->pending_mempool_allocation_failed) && + likely(list_empty(&s->master_lock.wait_list))) + goto again; + wakeup_kmultisnapd(s); + } +} + +static void dm_multisnap_work(struct work_struct *work) +{ + struct dm_multisnap *s = container_of(work, struct dm_multisnap, work); + + mutex_lock(&s->master_lock); + dm_multisnap_process_bios(s); + mutex_unlock(&s->master_lock); + + if (unlikely(s->pending_mempool_allocation_failed)) { + s->pending_mempool_allocation_failed = 0; + dm_multisnap_wait_for_pending_exception(s); + } + + blk_unplug(bdev_get_queue(s->origin->bdev)); + blk_unplug(bdev_get_queue(s->snapshot->bdev)); +} + +static struct dm_multisnap *find_multisnapshot(struct block_device *origin) +{ + struct dm_multisnap *s; + list_for_each_entry(s, &all_multisnapshots, list_all) + if (s->origin->bdev == origin) + return s; + return NULL; +} + +/* --- exception stores --- */ + +static DEFINE_MUTEX(exception_stores_lock); +static LIST_HEAD(all_exception_stores); + +static struct dm_multisnap_exception_store *dm_multisnap_find_exception_store(const char *name) +{ + struct dm_multisnap_exception_store *store; + + list_for_each_entry(store, &all_exception_stores, list) + if (!strcmp(store->name, name)) + return store; + + return NULL; +} + +static int dm_multisnap_exception_store_active(struct dm_multisnap_exception_store *find) +{ + struct dm_multisnap_exception_store *store; + + list_for_each_entry(store, &all_exception_stores, list) + if (store == find) + return 1; + + return 0; +} + +int dm_multisnap_register_exception_store(struct dm_multisnap_exception_store *store) +{ + mutex_lock(&exception_stores_lock); + + BUG_ON(dm_multisnap_exception_store_active(store)); + + if (dm_multisnap_find_exception_store(store->name)) { + mutex_unlock(&exception_stores_lock); + return -EEXIST; + } + list_add(&store->list, &all_exception_stores); + + mutex_unlock(&exception_stores_lock); + + return 0; +} +EXPORT_SYMBOL(dm_multisnap_register_exception_store); + +void dm_multisnap_unregister_exception_store(struct dm_multisnap_exception_store *store) +{ + mutex_lock(&exception_stores_lock); + + BUG_ON(!dm_multisnap_exception_store_active(store)); + list_del(&store->list); + + mutex_unlock(&exception_stores_lock); +} +EXPORT_SYMBOL(dm_multisnap_unregister_exception_store); + +static struct dm_multisnap_exception_store *dm_multisnap_get_exception_store(const char *name) +{ + struct dm_multisnap_exception_store *store; + + mutex_lock(&exception_stores_lock); + + store = dm_multisnap_find_exception_store(name); + if (store) { + if (!try_module_get(store->module)) + store = NULL; + } + + mutex_unlock(&exception_stores_lock); + + return store; +} + +static void dm_multisnap_put_exception_store(struct dm_multisnap_exception_store *store) +{ + mutex_lock(&exception_stores_lock); + + BUG_ON(!dm_multisnap_exception_store_active(store)); + module_put(store->module); + + mutex_unlock(&exception_stores_lock); +} + +/* --- target methods --- */ + +static int multisnap_origin_ctr(struct dm_target *ti, unsigned argc, char **argv) +{ + int r; + int i; + const char *store_name; + const char *origin_path; + const char *snapshot_path; + + struct dm_multisnap *s; + + if (argc < 3) { + ti->error = "Requires at least 3 arguments"; + r = -EINVAL; + goto bad_arguments; + } + + store_name = argv[0]; + origin_path = argv[1]; + snapshot_path = argv[2]; + argv += 3; + argc -= 3; + + s = kmalloc(sizeof(*s), GFP_KERNEL); + if (!s) { + ti->error = "Can't allocate multisnapshot structure"; + r = -ENOMEM; + goto bad_s; + } + + s->store = dm_multisnap_get_exception_store(store_name); + if (!s->store) { + request_module("dm-store-%s", store_name); + s->store = dm_multisnap_get_exception_store(store_name); + if (!s->store) { + ti->error = "Can't get exception store type"; + r = -ENOENT; + goto bad_store; + } + } + + s->error = 0; + mutex_init(&s->master_lock); + INIT_WORK(&s->work, dm_multisnap_work); + bio_list_init(&s->bios); + atomic_set(&s->n_kcopyd_jobs, 0); + INIT_LIST_HEAD(&s->pes_waiting_for_commit); + for (i = 0; i < PENDING_HASH_SIZE; i++) + INIT_HLIST_HEAD(&s->pending_hash[i]); + s->pending_mempool_allocation_failed = 0; + s->last_snapid = 0; + INIT_LIST_HEAD(&s->all_snaps); + + r = dm_get_device(ti, origin_path, 0, 0, FMODE_READ | FMODE_WRITE, &s->origin); + if (r) { + ti->error = "Could not get origin device"; + goto bad_origin; + } + + r = dm_get_device(ti, snapshot_path, 0, 0, FMODE_READ | FMODE_WRITE, &s->snapshot); + if (r) { + ti->error = "Could not get snapshot device"; + goto bad_snapshot; + } + + s->wq = create_singlethread_workqueue("kmultisnapd"); + if (!s->wq) { + ti->error = "Could not create kernel thread"; + r = -ENOMEM; + goto bad_thread; + } + + s->pending_pool = mempool_create_slab_pool(PENDING_MEMPOOL_SIZE, dm_multisnap_pending_exception_cache); + if (!s->pending_pool) { + ti->error = "Could not allocate mempool for pending exceptions"; + r = -ENOMEM; + goto bad_pending_pool; + } + + r = dm_kcopyd_client_create(MULTISNAP_KCOPYD_PAGES, &s->kcopyd); + if (r) { + ti->error = "Could not create kcopyd client"; + goto bad_kcopyd; + } + + r = s->store->init_exception_store(s, argc, argv, &ti->error); + if (r) + goto exception_store_error; + + ti->private = s; + ti->split_io = s->chunk_size >> SECTOR_SHIFT; + + mutex_lock(&all_multisnapshots_lock); + list_add(&s->list_all, &all_multisnapshots); + mutex_unlock(&all_multisnapshots_lock); + + return 0; + +exception_store_error: + dm_kcopyd_client_destroy(s->kcopyd); +bad_kcopyd: + mempool_destroy(s->pending_pool); +bad_pending_pool: + flush_workqueue(s->wq); + destroy_workqueue(s->wq); +bad_thread: + dm_put_device(ti, s->snapshot); +bad_snapshot: + dm_put_device(ti, s->origin); +bad_origin: + dm_multisnap_put_exception_store(s->store); +bad_store: + kfree(s); +bad_s: +bad_arguments: + return r; +} + +static void multisnap_origin_dtr(struct dm_target *ti) +{ + struct dm_multisnap *s = ti->private; + struct dm_multisnap_snap *sn; + unsigned i; + + mutex_lock(&all_multisnapshots_lock); + + /* Make sure that any more IOs won't be submitted by snapshot targets */ + list_for_each_entry(sn, &s->all_snaps, list_snaps) { + spin_lock(&dm_multisnap_bio_list_lock); + sn->s = NULL; + spin_unlock(&dm_multisnap_bio_list_lock); + } + list_del(&s->all_snaps); + + /* Wait for IOs on snapshots for this origin to finish */ +poll_for_ios: + spin_lock(&dm_multisnap_bio_list_lock); + if (!bio_list_empty(&s->bios)) { + spin_unlock(&dm_multisnap_bio_list_lock); + flush_workqueue(s->wq); + msleep(1); + goto poll_for_ios; + } + spin_unlock(&dm_multisnap_bio_list_lock); + + mutex_lock(&s->master_lock); + for (i = 0; i < PENDING_HASH_SIZE; i++) + if (!hlist_empty(&s->pending_hash[i])) { + mutex_unlock(&s->master_lock); + msleep(1); + goto poll_for_ios; + } + mutex_unlock(&s->master_lock); + + flush_workqueue(s->wq); + + mutex_lock(&s->master_lock); + s->store->commit(s); + s->store->exit_exception_store(s); + list_del(&s->list_all); + mutex_unlock(&s->master_lock); + + mutex_unlock(&all_multisnapshots_lock); + + dm_kcopyd_client_destroy(s->kcopyd); + s->kcopyd = NULL; + mempool_destroy(s->pending_pool); + s->pending_pool = NULL; + destroy_workqueue(s->wq); + s->wq = NULL; + dm_put_device(ti, s->snapshot); + s->snapshot = NULL; + dm_put_device(ti, s->origin); + s->origin = NULL; + dm_multisnap_put_exception_store(s->store); + kfree(s); +} + +static int multisnap_origin_map(struct dm_target *ti, struct bio *bio, union map_info *map_context) +{ + struct dm_multisnap *s = ti->private; + + /* do the most common case quickly */ + if (likely(bio_rw(bio) != WRITE)) { + bio->bi_bdev = s->origin->bdev; + return DM_MAPIO_REMAPPED; + } + + /* abuse bi_phys_segments field */ + bio->bi_flags &= ~(1 << BIO_SEG_VALID); + bio->bi_phys_segments = SNAPID_T_ORIGIN; + + dm_multisnap_enqueue_bio(s, bio); + wakeup_kmultisnapd(s); + + return DM_MAPIO_SUBMITTED; +} + +static int multisnap_origin_message(struct dm_target *ti, unsigned argc, char **argv) +{ + struct dm_multisnap *s = ti->private; + int r; + + mutex_lock(&all_multisnapshots_lock); + mutex_lock(&s->master_lock); + + if (argc == 1 && !strnicmp(argv[0], MESG_STR("create"))) { + drain_kcopyd_jobs(s); + + if ((r = dm_multisnap_has_error(s))) + goto unlock_ret; + + r = s->store->create_snapshot(s, &s->last_snapid); + if (r) + goto unlock_ret; + + r = dm_multisnap_has_error(s); + goto unlock_ret; + } + if (argc == 2 && !strnicmp(argv[0], MESG_STR("delete"))) { + char *snapid_end; + unsigned long snapid; + struct dm_multisnap_snap *sn; + struct bio *bio, *next; + + drain_kcopyd_jobs(s); + + snapid = simple_strtoul(argv[1], &snapid_end, 10); + if (!*argv[1] || *snapid_end || snapid == SNAPID_T_ORIGIN) { + DMWARN("invalid snapshot id."); + r = -EINVAL; + goto unlock_ret; + } + + if (!s->store->delete_snapshot) { + DMERR("snapshot store doesn't support delete"); + r = -EOPNOTSUPP; + goto unlock_ret; + } + + if ((r = dm_multisnap_has_error(s))) + goto unlock_ret; + + /* Kick off possibly attached snapshot */ + list_for_each_entry(sn, &s->all_snaps, list_snaps) { + if (sn->snapid == snapid) { + spin_lock(&dm_multisnap_bio_list_lock); + sn->s = NULL; + spin_unlock(&dm_multisnap_bio_list_lock); + } + } + + /* Terminate bios queues for this snapshot so far */ + spin_lock(&dm_multisnap_bio_list_lock); + bio = bio_list_get(&s->bios); + spin_unlock(&dm_multisnap_bio_list_lock); + for (; bio; bio = next) { + next = bio->bi_next; + bio->bi_next = NULL; + if (bio->bi_phys_segments == snapid) + bio_endio(bio, -EIO); + else + dm_multisnap_enqueue_bio(s, bio); + } + + r = s->store->delete_snapshot(s, snapid); + if (r) + goto unlock_ret; + + r = dm_multisnap_has_error(s); + goto unlock_ret; + } + + DMWARN("unrecognised message received."); + r = -EINVAL; + +unlock_ret: + mutex_unlock(&s->master_lock); + mutex_unlock(&all_multisnapshots_lock); + + return r; +} + +static int multisnap_origin_status(struct dm_target *ti, status_type_t type, char *result, unsigned maxlen) +{ + struct dm_multisnap *s = ti->private; + + mutex_lock(&s->master_lock); + + switch (type) { + case STATUSTYPE_INFO: + /* metadata/data/total */ + snprintf(result, maxlen, "%d %u", -dm_multisnap_has_error(s), s->last_snapid); + dm_multisnap_adjust_string(&result, &maxlen); + s->store->status_info(s, result, maxlen); + dm_multisnap_adjust_string(&result, &maxlen); + break; + case STATUSTYPE_TABLE: + snprintf(result, maxlen, "%s %s %s", s->store->name, s->origin->name, s->snapshot->name); + dm_multisnap_adjust_string(&result, &maxlen); + if (s->store->status_table) + s->store->status_table(s, result, maxlen); + dm_multisnap_adjust_string(&result, &maxlen); + break; + } + + mutex_unlock(&s->master_lock); + + /* If there's no space left in the buffer, ask for larger size */ + return maxlen <= 1; +} + +static int multisnap_snap_ctr(struct dm_target *ti, unsigned argc, char **argv) +{ + int r; + const char *origin_path; + char *snapid_str; + unsigned long snapid; + + struct dm_dev *origin; + + struct dm_multisnap *s; + struct dm_multisnap_snap *sn; + + if (argc != 2) { + ti->error = "Requires exactly 2 arguments"; + r = -EINVAL; + goto bad_arguments; + } + + origin_path = argv[0]; + snapid_str = argv[1]; + snapid = simple_strtoul(snapid_str, &snapid_str, 10); + if (*snapid_str) { + ti->error = "Invalid snapshot id"; + r = -EINVAL; + goto bad_arguments; + } + + r = dm_get_device(ti, origin_path, 0, 0, FMODE_READ | FMODE_WRITE, &origin); + if (r) { + ti->error = "Could not get origin device"; + goto bad_origin; + } + mutex_lock(&all_multisnapshots_lock); + s = find_multisnapshot(origin->bdev); + if (!s) { + r = -ENXIO; + ti->error = "Origin target not loaded"; + goto origin_not_loaded; + } + + mutex_lock(&s->master_lock); + if (!s->store->snapshot_exists(s, snapid)) { + mutex_unlock(&s->master_lock); + r = -ENOENT; + ti->error = "Snapshot with this id doesn't exist"; + goto snapid_doesnt_exist; + } + mutex_unlock(&s->master_lock); + + sn = kmalloc(sizeof(*sn), GFP_KERNEL); + if (!sn) { + ti->error = "Could not allocate multisnapshot_snap structure"; + r = -ENOMEM; + goto cant_allocate; + } + sn->s = s; + sn->snapid = snapid; + list_add(&sn->list_snaps, &s->all_snaps); + strlcpy(sn->origin_name, origin->name, sizeof sn->origin_name); + + mutex_unlock(&all_multisnapshots_lock); + + dm_put_device(ti, origin); + + ti->private = sn; + ti->split_io = s->chunk_size >> SECTOR_SHIFT; + + return 0; + +cant_allocate: +snapid_doesnt_exist: + dm_put_device(ti, origin); +origin_not_loaded: + mutex_unlock(&all_multisnapshots_lock); +bad_origin: +bad_arguments: + return r; +} + +static void multisnap_snap_dtr(struct dm_target *ti) +{ + struct dm_multisnap_snap *sn = ti->private; + + mutex_lock(&all_multisnapshots_lock); + + list_del(&sn->list_snaps); + kfree(sn); + + mutex_unlock(&all_multisnapshots_lock); +} + +static int multisnap_snap_map(struct dm_target *ti, struct bio *bio, union map_info *map_context) +{ + struct dm_multisnap_snap *sn = ti->private; + struct dm_multisnap *s; + + /* abuse bi_phys_segments field */ + bio->bi_flags &= ~(1 << BIO_SEG_VALID); + bio->bi_phys_segments = sn->snapid; + + spin_lock(&dm_multisnap_bio_list_lock); + s = sn->s; + if (!s) { + spin_unlock(&dm_multisnap_bio_list_lock); + return -EIO; + } + dm_multisnap_enqueue_bio_unlocked(s, bio); + spin_unlock(&dm_multisnap_bio_list_lock); + + wakeup_kmultisnapd(s); + + return DM_MAPIO_SUBMITTED; +} + +static int multisnap_snap_status(struct dm_target *ti, status_type_t type, char *result, unsigned maxlen) +{ + struct dm_multisnap_snap *sn = ti->private; + switch (type) { + case STATUSTYPE_INFO: + /* metadata/data/total */ + result[0] = 0; + dm_multisnap_adjust_string(&result, &maxlen); + break; + case STATUSTYPE_TABLE: + snprintf(result, maxlen, "%s %u", sn->origin_name, sn->snapid); + dm_multisnap_adjust_string(&result, &maxlen); + break; + } + /* If there's no space left in the buffer, ask for larger size */ + return maxlen <= 1; +} + +static struct target_type multisnap_origin_target = { + .name = "multisnapshot", + .version = {1, 0, 0}, + .module = THIS_MODULE, + .ctr = multisnap_origin_ctr, + .dtr = multisnap_origin_dtr, + .map = multisnap_origin_map, + .message = multisnap_origin_message, + .status = multisnap_origin_status, +}; + +static struct target_type multisnap_snap_target = { + .name = "multisnap-snap", + .version = {1, 0, 0}, + .module = THIS_MODULE, + .ctr = multisnap_snap_ctr, + .dtr = multisnap_snap_dtr, + .map = multisnap_snap_map, + .status = multisnap_snap_status, +}; + +static int __init dm_multisnapshot_init(void) +{ + int r; + + dm_multisnap_pending_exception_cache = kmem_cache_create( + "pending_cache", sizeof(struct dm_multisnap_pending_exception), + __alignof__(struct dm_multisnap_pending_exception), + 0, dm_multisnap_pending_exception_ctor); + if (!dm_multisnap_pending_exception_cache) { + DMERR("Couldn't create exception cache."); + r = -ENOMEM; + goto bad_exception_cache; + } + + r = dm_register_target(&multisnap_origin_target); + if (r < 0) { + DMERR("multisnap_origin_target target register failed %d", r); + goto bad_multisnap_origin_target; + } + + r = dm_register_target(&multisnap_snap_target); + if (r < 0) { + DMERR("multisnap_snap_target target register failed %d", r); + goto bad_multisnap_snap_target; + } + + return 0; + +bad_multisnap_snap_target: + dm_unregister_target(&multisnap_origin_target); +bad_multisnap_origin_target: + kmem_cache_destroy(dm_multisnap_pending_exception_cache); +bad_exception_cache: + return r; +} + +static void __exit dm_multisnapshot_exit(void) +{ + dm_unregister_target(&multisnap_origin_target); + dm_unregister_target(&multisnap_snap_target); + kmem_cache_destroy(dm_multisnap_pending_exception_cache); +} + +/* Module hooks */ +module_init(dm_multisnapshot_init); +module_exit(dm_multisnapshot_exit); + +MODULE_DESCRIPTION(DM_NAME " multisnapshot target"); +MODULE_AUTHOR("Mikulas Patocka"); +MODULE_LICENSE("GPL"); Index: linux-2.6.29-rc3-devel/drivers/md/dm-multisnap-mikulas.c =================================================================== --- /dev/null 1970-01-01 00:00:00.000000000 +0000 +++ linux-2.6.29-rc3-devel/drivers/md/dm-multisnap-mikulas.c 2009-02-03 09:04:20.000000000 +0100 @@ -0,0 +1,418 @@ +/* + * Copyright (C) 2008 Red Hat Czech, s.r.o. + * + * Mikulas Patocka + * + * This file is released under the GPL. + */ + +#include "dm-multisnap-mikulas.h" + +static void find_load_commit_block(struct dm_multisnap *s) +{ + struct dm_buffer *bp; + struct multisnap_commit_block *cb; + chunk_t cb_addr = s->p->sb_commit_block; + __u64 sequence; + __u64 dev_size; + int bitmap_depth; + unsigned i; + s->p->valid_commit_block = 0; + s->p->commit_sequence = 0; + +try_next: + cb = dm_bufio_read(s->p->bufio, cb_addr, &bp); + if (IS_ERR(cb)) { + DMERR("load_commit_block: can't read commit block %Lx", (unsigned long long)cb_addr); + dm_multisnap_set_error(s, PTR_ERR(cb)); + return; + } + if (cb->signature != CB_SIGNATURE) { + dm_bufio_release(bp); + DMERR("load_commit_block: bad signature on commit block %Lx", (unsigned long long)cb_addr); + dm_multisnap_set_error(s, -EFSERROR); + return; + } + + sequence = le64_to_cpu(cb->sequence); + dev_size = read_48(cb, dev_size); + + dm_bufio_release(bp); + + if (sequence > s->p->commit_sequence) { + s->p->commit_sequence = sequence; + s->p->valid_commit_block = cb_addr; + if ((__u64)cb_addr + CB_STRIDE < dev_size) { + cb_addr += CB_STRIDE; + goto try_next; + } + } + if (!s->p->valid_commit_block) { + DMERR("load_commit_block: no valid commit block"); + dm_multisnap_set_error(s, -EFSERROR); + return; + } + + cb = dm_bufio_read(s->p->bufio, s->p->valid_commit_block, &bp); + if (IS_ERR(cb)) { + DMERR("load_commit_block: can't re-read commit block %Lx", (unsigned long long)s->p->valid_commit_block); + dm_multisnap_set_error(s, PTR_ERR(cb)); + return; + } + if (cb->signature != CB_SIGNATURE) { + dm_bufio_release(bp); + DMERR("load_commit_block: bad signature when re-reading commit block %Lx", (unsigned long long)s->p->valid_commit_block); + dm_multisnap_set_error(s, -EFSERROR); + return; + } + + dev_size = read_48(cb, dev_size); + s->p->total_allocated = read_48(cb, total_allocated); + s->p->data_allocated = read_48(cb, data_allocated); + s->p->bitmap_root = read_48(cb, bitmap_root); + s->p->alloc_rover = read_48(cb, alloc_rover); + s->p->bt_root = read_48(cb, bt_root); + s->p->snapshot_num = le32_to_cpu(cb->snapshot_num); + s->p->bt_depth = cb->bt_depth; + + if (s->p->bt_depth > MAX_BT_DEPTH || !s->p->bt_depth) { + dm_bufio_release(bp); + DMERR("load_commit_block: invalid b+-tree depth in commit block %Lx", (unsigned long long)s->p->valid_commit_block); + dm_multisnap_set_error(s, -EFSERROR); + return; + } + + for (i = 0; i < TMP_REMAP_HASH_SIZE; i++) + INIT_HLIST_HEAD(&s->p->tmp_remap[i]); + s->p->n_used_tmp_remaps = 0; + INIT_LIST_HEAD(&s->p->used_bitmap_tmp_remaps); + INIT_LIST_HEAD(&s->p->used_bt_tmp_remaps); + INIT_LIST_HEAD(&s->p->free_tmp_remaps); + + for (i = 0; i < N_REMAPS; i++) { + struct tmp_remap *t = &s->p->tmp_remap_store[i]; + if (read_48(&cb->tmp_remap[i], old)) { + t->old = read_48(&cb->tmp_remap[i], old); + t->new = read_48(&cb->tmp_remap[i], new); + t->uncommitted = 0; + t->bitmap_idx = le32_to_cpu(cb->tmp_remap[i].bitmap_idx); + hlist_add_head(&t->hash_list, &s->p->tmp_remap[TMP_REMAP_HASH(t->old)]); + if (t->bitmap_idx == CB_BITMAP_IDX_NONE) + list_add(&t->list, &s->p->used_bt_tmp_remaps); + else + list_add(&t->list, &s->p->used_bitmap_tmp_remaps); + s->p->n_used_tmp_remaps++; + } else { + list_add(&t->list, &s->p->free_tmp_remaps); + } + } + + dm_bufio_release(bp); + + if ((dev_size + CB_STRIDE) != (chunk_t)(dev_size + CB_STRIDE)) { + DMERR("load_commit_block: device is too large. Compile kernel with 64-bit sector numbers"); + dm_multisnap_set_error(s, -ERANGE); + return; + } + bitmap_depth = dm_multisnap_bitmap_depth(s->chunk_size, dev_size); + if (bitmap_depth < 0) { + DMERR("load_commit_block: device is too large"); + dm_multisnap_set_error(s, bitmap_depth); + return; + } + s->p->dev_size = dev_size; + s->p->bitmap_depth = bitmap_depth; + + return; +} + +static void initialize_device(struct dm_multisnap *s) +{ + int r; + struct dm_buffer *bp; + struct multisnap_superblock *sb; + struct multisnap_commit_block *cb; + chunk_t cb_block; + chunk_t block_to_write; + __u64 dev_size; + + dev_size = i_size_read(s->snapshot->bdev->bd_inode) >> s->chunk_shift; + if ((dev_size + CB_STRIDE) != (chunk_t)(dev_size + CB_STRIDE)) { + DMERR("initialize_device: device is too large. Compile kernel with 64-bit sector numbers"); + dm_multisnap_set_error(s, -ERANGE); + return; + } + s->p->dev_size = dev_size; + s->p->total_allocated = 0; + s->p->data_allocated = 0; + + block_to_write = SB_BLOCK + 1; + +/* Write btree */ + dm_multisnap_create_btree(s, &block_to_write); + if (dm_multisnap_has_error(s)) + return; + +/* Write bitmaps */ + dm_multisnap_create_bitmaps(s, block_to_write); + if (dm_multisnap_has_error(s)) + return; + +/* Write commit blocks */ + if (FIRST_CB_BLOCK >= dev_size) { + DMERR("initialize_device: device is too small"); + dm_multisnap_set_error(s, -ENOSPC); + return; + } + for (cb_block = FIRST_CB_BLOCK; cb_block < s->p->dev_size; cb_block += CB_STRIDE) { + cb = dm_bufio_new(s->p->bufio, cb_block, &bp); + if (IS_ERR(cb)) { + DMERR("initialize_device: can't allocate commit block at %Lx", (unsigned long long)cb_block); + dm_multisnap_set_error(s, PTR_ERR(cb)); + return; + } + memset(cb, 0, s->chunk_size); + cb->signature = CB_SIGNATURE; + cb->sequence = cpu_to_le64(cb_block == FIRST_CB_BLOCK); + if (cb_block == FIRST_CB_BLOCK) { + cb->snapshot_num = cpu_to_le32(0); + write_48(cb, dev_size, s->p->dev_size); + write_48(cb, total_allocated, s->p->total_allocated); + write_48(cb, data_allocated, s->p->data_allocated); + write_48(cb, bitmap_root, s->p->bitmap_root); + write_48(cb, bt_root, s->p->bt_root); + cb->bt_depth = s->p->bt_depth; + } + dm_bufio_mark_buffer_dirty(bp); + dm_bufio_release(bp); + } + r = dm_bufio_write_dirty_buffers(s->p->bufio); + if (r) { + DMERR("initialize_device: write error when initializing device"); + dm_multisnap_set_error(s, r); + return; + } + +/* Write super block */ + sb = dm_bufio_new(s->p->bufio, SB_BLOCK, &bp); + if (IS_ERR(sb)) { + DMERR("initialize_device: can't allocate super block"); + dm_multisnap_set_error(s, PTR_ERR(sb)); + return; + } + memset(sb, 0, s->chunk_size); + sb->signature = SB_SIGNATURE; + sb->chunk_size = cpu_to_le32(s->chunk_size); + sb->commit_block = cpu_to_le64(FIRST_CB_BLOCK); + dm_bufio_mark_buffer_dirty(bp); + dm_bufio_release(bp); + r = dm_bufio_write_dirty_buffers(s->p->bufio); + if (r) { + DMERR("initialize_device: can't write super block"); + dm_multisnap_set_error(s, r); + return; + } +} + +static int read_super(struct dm_multisnap *s, char **error) +{ + struct dm_buffer *bp; + struct multisnap_superblock *sb; + int initialized; + + initialized = 0; +re_read: + sb = dm_bufio_read(s->p->bufio, SB_BLOCK, &bp); + if (IS_ERR(sb)) { + *error = "Could not read superblock"; + return PTR_ERR(sb); + } + + if (sb->signature != SB_SIGNATURE) { + int i; + if (initialized) { + *error = "Invalid signature after initialization"; + return -EIO; + } + for (i = 0; i < 1 << SECTOR_SHIFT; i++) { + if (((char *)sb)[i]) { + dm_bufio_release(bp); + *error = "Uninitialized device"; + return -ENXIO; + } + } + dm_bufio_release(bp); + initialize_device(s); + if (dm_multisnap_has_error(s)) { + *error = "Can't initialize device"; + return dm_multisnap_has_error(s); + } + initialized = 1; + goto re_read; + } + if (le32_to_cpu(sb->chunk_size) != s->chunk_size) { + dm_bufio_release(bp); + *error = "Bad chunk size"; + return -EINVAL; + } + s->p->sb_commit_block = le64_to_cpu(sb->commit_block); + dm_bufio_release(bp); + + find_load_commit_block(s); + + if (dm_multisnap_has_error(s)) { + *error = "Unable to load commit block"; + return dm_multisnap_has_error(s); + } + + dm_multisnap_read_snapshots(s); + if (dm_multisnap_has_error(s)) { + *error = "Could not read snapshot list"; + return dm_multisnap_has_error(s); + } + + return 0; +} + +static void dm_multisnap_status_table(struct dm_multisnap *s, char *result, unsigned maxlen) +{ + snprintf(result, maxlen, " %u", s->chunk_size); +} + +static int dm_multisnap_mikulas_init(struct dm_multisnap *s, unsigned argc, char **argv, char **error) +{ + int r; + unsigned long chunk_size; + char *chunk_size_str; + + if (argc != 1) { + *error = "Bad number of arguments"; + r = -EINVAL; + goto bad_arguments; + } + + chunk_size_str = argv[0]; + chunk_size = simple_strtoul(chunk_size_str, &chunk_size_str, 10); + if (*chunk_size_str || chunk_size < 512 || chunk_size > PAGE_SIZE || chunk_size & (chunk_size - 1)) { + *error = "Invalid chunk size"; + r = -EINVAL; + goto bad_arguments; + } + s->chunk_size = chunk_size; + s->chunk_shift = ffs(chunk_size) - 1; + + s->p = kmalloc(sizeof(struct exception_store_private), GFP_KERNEL); + if (!s->p) { + *error = "Can't allocate private exception store structure"; + r = -ENOMEM; + goto bad_private; + } + + s->p->active_snapshots = RB_ROOT; + s->p->n_preallocated_blocks = 0; + s->p->query_active = 0; + + s->p->tmp_chunk = kmalloc(s->chunk_size + sizeof(struct dm_multisnap_bt_entry), GFP_KERNEL); + if (!s->p->tmp_chunk) { + *error = "Can't allocate temporary chunk"; + r = -ENOMEM; + goto bad_tmp_chunk; + } + + s->p->bufio = dm_bufio_client_create(s->snapshot->bdev, s->chunk_size); + if (IS_ERR(s->p->bufio)) { + *error = "Can't create bufio client"; + r = PTR_ERR(s->p->bufio); + goto bad_bufio; + } + + r = read_super(s, error); + if (r) + goto bad_super; + + return 0; + +bad_super: + dm_bufio_client_destroy(s->p->bufio); +bad_bufio: + kfree(s->p->tmp_chunk); +bad_tmp_chunk: + kfree(s->p); +bad_private: +bad_arguments: + return r; +} + +static void dm_multisnap_mikulas_exit(struct dm_multisnap *s) +{ + int i; + + i = 0; + while (!list_empty(&s->p->used_bitmap_tmp_remaps)) { + struct tmp_remap *t = list_first_entry(&s->p->used_bitmap_tmp_remaps, struct tmp_remap, list); + list_del(&t->list); + hlist_del(&t->hash_list); + i++; + } + + while (!list_empty(&s->p->used_bt_tmp_remaps)) { + struct tmp_remap *t = list_first_entry(&s->p->used_bt_tmp_remaps, struct tmp_remap, list); + list_del(&t->list); + hlist_del(&t->hash_list); + i++; + } + + BUG_ON(i != s->p->n_used_tmp_remaps); + while (!list_empty(&s->p->free_tmp_remaps)) { + struct tmp_remap *t = list_first_entry(&s->p->free_tmp_remaps, struct tmp_remap, list); + list_del(&t->list); + i++; + } + BUG_ON(i != N_REMAPS); + + for (i = 0; i < TMP_REMAP_HASH_SIZE; i++) + BUG_ON(!hlist_empty(&s->p->tmp_remap[i])); + + dm_bufio_client_destroy(s->p->bufio); + s->p->bufio = NULL; + kfree(s->p->tmp_chunk); + s->p->tmp_chunk = NULL; + kfree(s->p); + s->p = NULL; +} + +struct dm_multisnap_exception_store dm_multisnap_mikulas_store = { + .name = "mikulas", + .module = THIS_MODULE, + .init_exception_store = dm_multisnap_mikulas_init, + .exit_exception_store = dm_multisnap_mikulas_exit, + .status_info = dm_multisnap_status_info, + .status_table = dm_multisnap_status_table, + .create_snapshot = dm_multisnap_create_snapshot, + .snapshot_exists = dm_multisnap_snapshot_exists, + .find_snapshot_chunk = dm_multisnap_find_snapshot_chunk, + .reset_query = dm_multisnap_reset_query, + .query_next_remap = dm_multisnap_query_next_remap, + .add_next_remap = dm_multisnap_add_next_remap, + .check_conflict = dm_multisnap_check_conflict, + .make_chunk_writeable = dm_multisnap_make_chunk_writeable, + .commit = dm_multisnap_commit, +}; + +static int __init dm_multisnapshot_mikulas_module_init(void) +{ + return dm_multisnap_register_exception_store(&dm_multisnap_mikulas_store); +} + +static void __exit dm_multisnapshot_mikulas_module_exit(void) +{ + dm_multisnap_unregister_exception_store(&dm_multisnap_mikulas_store); +} + +module_init(dm_multisnapshot_mikulas_module_init); +module_exit(dm_multisnapshot_mikulas_module_exit); + +MODULE_DESCRIPTION(DM_NAME " multisnapshot Mikulas' exceptions store"); +MODULE_AUTHOR("Mikulas Patocka"); +MODULE_LICENSE("GPL"); + Index: linux-2.6.29-rc3-devel/drivers/md/dm-multisnap.h =================================================================== --- /dev/null 1970-01-01 00:00:00.000000000 +0000 +++ linux-2.6.29-rc3-devel/drivers/md/dm-multisnap.h 2009-02-03 09:08:42.000000000 +0100 @@ -0,0 +1,178 @@ +/* + * Copyright (C) 2008 Red Hat Czech, s.r.o. + * + * Mikulas Patocka + * + * This file is released under the GPL. + */ + +#ifndef DM_MULTISNAP_H +#define DM_MULTISNAP_H + +#include +#include +#include +#include +#include +#include + +#include "dm-bio-list.h" + +#define EFSERROR EPERM + +#define DM_MSG_PREFIX "multisnapshot" + +#define PENDING_HASH_SIZE 256 +#define PENDING_HASH(c) ((c) & (PENDING_HASH_SIZE - 1)) + +#define MULTISNAP_KCOPYD_PAGES (((1UL << 20) >> PAGE_SHIFT) ? : 1) + +#define MAX_CHUNKS_TO_REMAP DM_KCOPYD_MAX_REGIONS + +#define PENDING_MEMPOOL_SIZE 256 + +#define SNAPID_T_ORIGIN 0xffffffff +#define SNAPID_T_SENTINEL 0xffffffff +#define SNAPID_T_MAX 0xfffffffe + +typedef sector_t chunk_t; +typedef __u32 snapid_t; + +struct dm_multisnap { + struct dm_dev *origin; + struct dm_dev *snapshot; + + struct exception_store_private *p; + struct dm_multisnap_exception_store *store; + + int error; + + unsigned chunk_size; + unsigned chunk_shift; + + struct mutex master_lock; + struct workqueue_struct *wq; + struct work_struct work; + struct bio_list bios; + + mempool_t *pending_pool; + + struct dm_kcopyd_client *kcopyd; + atomic_t n_kcopyd_jobs; + + /* This may only be accessed from kcopyd callback, it has no locking */ + struct list_head pes_waiting_for_commit; + + /* List head for struct dm_multisnap_pending_exception->hash_list */ + struct hlist_head pending_hash[PENDING_HASH_SIZE]; + + int pending_mempool_allocation_failed; + + /* The last created snapshot id */ + snapid_t last_snapid; + + /* List head for struct dm_multisnap_snap->list_snaps */ + struct list_head all_snaps; + + /* List entry for all_multisnapshots */ + struct list_head list_all; + +}; + +struct dm_multisnap_snap { + struct dm_multisnap *s; + snapid_t snapid; + /* List entry for struct dm_multisnap->list_all */ + struct list_head list_snaps; + char origin_name[16]; +}; + +union chunk_descriptor { + __u64 bitmask; + struct { + snapid_t from; + snapid_t to; + } range; +}; + +struct dm_multisnap_pending_exception { + /* List entry for struct dm_multisnap->pending_hash */ + struct hlist_node hash_list; + + struct dm_multisnap *s; + struct bio_list bios; + + chunk_t chunk; + + int n_descs; + union chunk_descriptor desc[MAX_CHUNKS_TO_REMAP]; + + /* List entry for struct dm_multisnap->pes_waiting_for_commit */ + struct list_head list; +}; + +struct dm_multisnap_exception_store { + struct list_head list; + struct module *module; + const char *name; + + /* < 0 - error */ + int (*init_exception_store)(struct dm_multisnap *s, unsigned argc, char **argv, char **error); + + void (*exit_exception_store)(struct dm_multisnap *s); + void (*status_info)(struct dm_multisnap *s, char *result, unsigned maxlen); + void (*status_table)(struct dm_multisnap *s, char *result, unsigned maxlen); + /* < 0 - error */ + int (*create_snapshot)(struct dm_multisnap *s, snapid_t *snapid); + + /* < 0 - error */ + int (*delete_snapshot)(struct dm_multisnap *s, snapid_t snapid); + + /* 0 - doesn't exist, 1 - exists */ + int (*snapshot_exists)(struct dm_multisnap *s, snapid_t snapid); + + /* 0 - not found, 1 - found (read-only), 2 - found (writeable), < 0 - error */ + int (*find_snapshot_chunk)(struct dm_multisnap *s, snapid_t id, chunk_t chunk, chunk_t *result); + + /* + * Chunk interface between exception store and generic code. + * Allowed sequences: + * + * - first call reset_query + * then repeatedly query next exception to make with query_next_remap + * and add it to btree with add_next_remap. This can be repeated until + * query_next_remap indicates that it has nothing more or until all 8 + * kcopyd slots are filled. + * + * - call find_snapshot_chunk, if it returns 0, you can call + * add_next_remap to add the chunk to the btree. + * + * - call find_snapshot_chunk, if it returns 1 (shared chunk), call + * make_chunk_writeable to relocate that chunk. + */ + + void (*reset_query)(struct dm_multisnap *s); + int (*query_next_remap)(struct dm_multisnap *s, chunk_t chunk); + void (*add_next_remap)(struct dm_multisnap *s, union chunk_descriptor *cd, chunk_t *new_chunk); + void (*make_chunk_writeable)(struct dm_multisnap *s, union chunk_descriptor *cd, chunk_t *new_chunk); + int (*check_conflict)(struct dm_multisnap *s, union chunk_descriptor *cd, snapid_t snapid); + + void (*commit)(struct dm_multisnap *s); +}; + +static inline void dm_multisnap_adjust_string(char **result, unsigned *maxlen) +{ + unsigned len = strlen(*result); + *result += len; + *maxlen -= len; +} + +/* dm-multisnap.c */ + +void dm_multisnap_set_error(struct dm_multisnap *s, int error); +int dm_multisnap_has_error(struct dm_multisnap *s); + +int dm_multisnap_register_exception_store(struct dm_multisnap_exception_store *store); +void dm_multisnap_unregister_exception_store(struct dm_multisnap_exception_store *store); + +#endif Index: linux-2.6.29-rc3-devel/drivers/md/dm-multisnap-blocks.c =================================================================== --- /dev/null 1970-01-01 00:00:00.000000000 +0000 +++ linux-2.6.29-rc3-devel/drivers/md/dm-multisnap-blocks.c 2009-02-03 09:04:20.000000000 +0100 @@ -0,0 +1,121 @@ +/* + * Copyright (C) 2008 Red Hat Czech, s.r.o. + * + * Mikulas Patocka + * + * This file is released under the GPL. + */ + +#include "dm-multisnap-mikulas.h" + +static struct tmp_remap *find_tmp_remap(struct dm_multisnap *s, chunk_t block) +{ + struct tmp_remap *t; + struct hlist_node *hn; + unsigned hash = TMP_REMAP_HASH(block); + hlist_for_each_entry(t, hn, &s->p->tmp_remap[hash], hash_list) + if (t->old == block) + return t; + return NULL; +} + +void *dm_multisnap_read_block(struct dm_multisnap *s, chunk_t block, struct dm_buffer **bp) +{ + void *buf; + struct tmp_remap *t; + cond_resched(); + t = find_tmp_remap(s, block); + if (t) + block = t->new; + buf = dm_bufio_read(s->p->bufio, block, bp); + if (unlikely(IS_ERR(buf))) { + DMERR("dm_multisnap_read_block: error read chunk %Lx", (unsigned long long)block); + dm_multisnap_set_error(s, PTR_ERR(buf)); + return NULL; + } + return buf; +} + +int dm_multisnap_block_is_uncommitted(struct dm_multisnap *s, chunk_t chunk) +{ + struct tmp_remap *t = find_tmp_remap(s, chunk); + return t && t->uncommitted; +} + +void *dm_multisnap_duplicate_block(struct dm_multisnap *s, chunk_t old_chunk, chunk_t new_chunk, bitmap_t bitmap_idx, struct dm_buffer **bp) +{ + void *buf; + struct tmp_remap *t = find_tmp_remap(s, old_chunk); + if (t) { + if (unlikely(t->bitmap_idx != bitmap_idx)) { + DMERR("dm_multisnap_duplicate_block: bitmap_idx doesn't match, %X != %X", t->bitmap_idx, bitmap_idx); + dm_multisnap_set_error(s, -EFSERROR); + return NULL; + } + dm_multisnap_free_block(s, t->new); + t->new = new_chunk; + } else { + if (unlikely(list_empty(&s->p->free_tmp_remaps))) { + DMERR("dm_multisnap_duplicate_block: all remap blocks used"); + dm_multisnap_set_error(s, -EFSERROR); + return NULL; + } + t = list_first_entry(&s->p->free_tmp_remaps, struct tmp_remap, list); + t->new = new_chunk; + t->old = old_chunk; + t->bitmap_idx = bitmap_idx; + hlist_add_head(&t->hash_list, &s->p->tmp_remap[TMP_REMAP_HASH(old_chunk)]); + s->p->n_used_tmp_remaps++; + } + list_del(&t->list); + if (bitmap_idx == CB_BITMAP_IDX_NONE) + list_add_tail(&t->list, &s->p->used_bt_tmp_remaps); + else + list_add_tail(&t->list, &s->p->used_bitmap_tmp_remaps); + t->uncommitted = 1; + dm_bufio_release_move(*bp, new_chunk); + buf = dm_bufio_read(s->p->bufio, new_chunk, bp); + if (IS_ERR(buf)) { + DMERR("dm_multisnap_duplicate_block: error reading chunk %Lx", (unsigned long long)new_chunk); + dm_multisnap_set_error(s, PTR_ERR(buf)); + return NULL; + } + return buf; +} + +void dm_multisnap_delete_remapped_block(struct dm_multisnap *s, chunk_t chunk) +{ + struct tmp_remap *t = find_tmp_remap(s, chunk); + if (likely(!t)) + return; + dm_multisnap_free_tmp_remap(s, t); +} + +void dm_multisnap_free_tmp_remap(struct dm_multisnap *s, struct tmp_remap *t) +{ + list_del(&t->list); + hlist_del(&t->hash_list); + s->p->n_used_tmp_remaps--; + list_add(&t->list, &s->p->free_tmp_remaps); +} + +void *dm_multisnap_make_block(struct dm_multisnap *s, chunk_t new_chunk, struct dm_buffer **bp) +{ + void *buf; + /* !!! FIXME: add it to the list of recently allocated blocks */ + + buf = dm_bufio_new(s->p->bufio, new_chunk, bp); + if (unlikely(IS_ERR(buf))) { + DMERR("dm_multisnap_make_block: error creating new block at chunk %Lx", (unsigned long long)new_chunk); + dm_multisnap_set_error(s, PTR_ERR(buf)); + return NULL; + } + return buf; +} + +int dm_multisnap_is_cb_block(struct dm_multisnap *s, chunk_t block) +{ + if (block < FIRST_CB_BLOCK) return 0; + return sector_div(block, CB_STRIDE) == FIRST_CB_BLOCK % CB_STRIDE; +} + Index: linux-2.6.29-rc3-devel/drivers/md/dm-multisnap-btree.c =================================================================== --- /dev/null 1970-01-01 00:00:00.000000000 +0000 +++ linux-2.6.29-rc3-devel/drivers/md/dm-multisnap-btree.c 2009-02-03 09:04:20.000000000 +0100 @@ -0,0 +1,454 @@ +/* + * Copyright (C) 2008 Red Hat Czech, s.r.o. + * + * Mikulas Patocka + * + * This file is released under the GPL. + */ + +#include "dm-multisnap-mikulas.h" + +static void add_at_idx(struct dm_multisnap_bt_node *node, unsigned index, struct bt_key *key, chunk_t new_chunk); + +static struct dm_multisnap_bt_node *dm_multisnap_read_btnode(struct dm_multisnap *s, chunk_t block, struct dm_buffer **bp) +{ + struct dm_multisnap_bt_node *node; + + node = dm_multisnap_read_block(s, block, bp); + if (unlikely(!node)) + return NULL; + + if (unlikely(node->signature != BT_SIGNATURE)) { + dm_bufio_release(*bp); + DMERR("dm_multisnap_read_btnode: bad signature on btree node %Lx", (unsigned long long)block); + dm_multisnap_set_error(s, -EFSERROR); + return NULL; + } + + if (unlikely((unsigned)(le16_to_cpu(node->n_entries) - 1) >= dm_multisnap_btree_entries(s->chunk_size))) { + dm_bufio_release(*bp); + DMERR("dm_multisnap_read_btnode: bad number of entries in btree node %Lx", (unsigned long long)block); + dm_multisnap_set_error(s, -EFSERROR); + return NULL; + } + + return node; +} + +void dm_multisnap_create_btree(struct dm_multisnap *s, chunk_t *writing_block) +{ + struct dm_buffer *bp; + struct dm_multisnap_bt_node *node; + struct bt_key new_key; + + while (dm_multisnap_is_cb_block(s, *writing_block)) + (*writing_block)++; + + if (*writing_block >= s->p->dev_size) { + DMERR("dm_multisnap_create_btree: device is too small"); + dm_multisnap_set_error(s, -ENOSPC); + return; + } + + node = dm_bufio_new(s->p->bufio, *writing_block, &bp); + if (IS_ERR(node)) { + DMERR("dm_multisnap_create_btree: 't create direct bitmap block at %Lx", (unsigned long long)*writing_block); + dm_multisnap_set_error(s, PTR_ERR(node)); + return; + } + memset(node, 0, s->chunk_size); + node->signature = BT_SIGNATURE; + node->n_entries = cpu_to_le16(0); + + /* + * A btree node must have at least one entry --- so create this empty + * one + */ + new_key.snap_from = new_key.snap_to = SNAPID_T_SENTINEL; + new_key.chunk = CHUNK_T_SENTINEL; + add_at_idx(node, 0, &new_key, 0); + + dm_bufio_mark_buffer_dirty(bp); + dm_bufio_release(bp); + s->p->bt_root = *writing_block; + s->p->bt_depth = 1; + (*writing_block)++; +} + +static int compare_key(struct dm_multisnap_bt_entry *e, struct bt_key *key) +{ + chunk_t orig_chunk = read_48(e, orig_chunk); + if (orig_chunk < key->chunk) + return -1; + if (orig_chunk > key->chunk) + return 1; + + if (le32_to_cpu(e->snap_to) < key->snap_from) + return -1; + if (le32_to_cpu(e->snap_from) > key->snap_to) + return 1; + + return 0; +} + +/* + * Returns: 1 - found, 0 - not found + * *result - if found, then the first entry in the requested range + * - if not found, then the first entry after the requested range + */ + +static int binary_search(struct dm_multisnap_bt_node *node, struct bt_key *key, unsigned *result) +{ + int c; + int first = 0; + int last = le16_to_cpu(node->n_entries) - 1; + + while (1) { + int middle = (first + last) >> 1; + struct dm_multisnap_bt_entry *e = &node->entries[middle]; + + c = compare_key(e, key); + + if (first == last) + break; + + if (c < 0) + first = middle + 1; + else + last = middle; + } + + *result = first; + return !c; +} + +/* + * Returns: 1 - found, 0 - not found, -1 - error + * In case of not error (0 or 1 is returned), the node and held buffer for + * this node is returned. + */ + +static int walk_btree(struct dm_multisnap *s, struct bt_key *key, struct dm_multisnap_bt_node **nodep, struct dm_buffer **bp, struct path_element path[MAX_BT_DEPTH]) +{ +#define node (*nodep) + int r; + chunk_t block = s->p->bt_root; + unsigned d = 0; + while (1) { + path[d].block = block; + node = dm_multisnap_read_btnode(s, block, bp); + if (!node) + return -1; + if (d != s->p->bt_depth - 1) { + struct dm_multisnap_bt_entry *be = &node->entries[le16_to_cpu(node->n_entries) - 1]; + if (unlikely(read_48(be, orig_chunk) != CHUNK_T_SENTINEL) || + unlikely(le32_to_cpu(be->snap_from) != SNAPID_T_SENTINEL) || + unlikely(le32_to_cpu(be->snap_to) != SNAPID_T_SENTINEL)) { + dm_bufio_release(*bp); + DMERR("walk_btree: node at %Lx in depth %d doesn't have sentinel record, search for %Lx, %x-%x", (unsigned long long)block, d, (unsigned long long)key->chunk, key->snap_from, key->snap_to); + dm_multisnap_set_error(s, -EFSERROR); + return -1; + } + } + r = binary_search(node, key, &path[d].idx); + block = read_48(&node->entries[path[d].idx], new_chunk); + if (++d == s->p->bt_depth) + break; + dm_bufio_release(*bp); + } + if (unlikely(compare_key(&node->entries[path[s->p->bt_depth - 1].idx], key) < 0)) + path[s->p->bt_depth - 1].idx++; + return r; +#undef node +} + +/* + * Returns: 1 - found, 0 - not found, -1 - error + * In case the node is found, key contains updated key and result contains + * the resulting chunk. + */ + +int dm_multisnap_find_in_btree(struct dm_multisnap *s, struct bt_key *key, chunk_t *result) +{ + struct dm_multisnap_bt_node *node; + struct path_element path[MAX_BT_DEPTH]; + struct dm_buffer *bp; + + int r = walk_btree(s, key, &node, &bp, path); + if (unlikely(r < 0)) + return r; + + if (r) { + struct dm_multisnap_bt_entry *entry = &node->entries[path[s->p->bt_depth - 1].idx]; + *result = read_48(entry, new_chunk); + key->chunk = read_48(entry, orig_chunk); + key->snap_from = le32_to_cpu(entry->snap_from); + key->snap_to = le32_to_cpu(entry->snap_to); + } + dm_bufio_release(bp); + + return r; +} + +static void add_at_idx(struct dm_multisnap_bt_node *node, unsigned index, struct bt_key *key, chunk_t new_chunk) +{ + memmove(&node->entries[index + 1], &node->entries[index], (le16_to_cpu(node->n_entries) - index) * sizeof(struct dm_multisnap_bt_entry)); + write_48(&node->entries[index], orig_chunk, key->chunk); + if (sizeof(chunk_t) == 4 && unlikely(key->chunk > CHUNK_T_MAX)) + node->entries[index].orig_chunk2 = cpu_to_le16(0xffff); + write_48(&node->entries[index], new_chunk, new_chunk); + node->entries[index].snap_from = cpu_to_le32(key->snap_from); + node->entries[index].snap_to = cpu_to_le32(key->snap_to); + node->n_entries = cpu_to_le16(le16_to_cpu(node->n_entries) + 1); +} + +static void middle_key(struct dm_multisnap_bt_entry *e1, struct dm_multisnap_bt_entry *e2, struct bt_key *result) +{ + __u32 snap1, snap2; + chunk_t chunk1 = read_48(e1, orig_chunk); + chunk_t chunk2 = read_48(e2, orig_chunk); + result->chunk = (chunk1 + chunk2) >> 1; + if (chunk1 != chunk2) { + result->snap_from = result->snap_to = (snapid_t)-1; + } else { + snap1 = le32_to_cpu(e1->snap_to); + snap2 = le32_to_cpu(e2->snap_from); + result->snap_from = result->snap_to = (snap1 + snap2) >> 1; + } +} + +void dm_multisnap_add_to_btree(struct dm_multisnap *s, struct bt_key *key, chunk_t new_chunk) +{ + struct dm_multisnap_bt_node *node; + struct dm_buffer *bp; + struct path_element path[MAX_BT_DEPTH]; + unsigned depth; + + unsigned split_entries, split_index, split_offset, split_size; + struct bt_key new_key; + chunk_t new_root; + + int r = walk_btree(s, key, &node, &bp, path); + + if (unlikely(r)) { + if (r > 0) { + dm_bufio_release(bp); + DMERR("dm_multisnap_add_to_btree: adding key that already exists: %Lx, %x-%x", (unsigned long long)key->chunk, key->snap_from, key->snap_to); + dm_multisnap_set_error(s, -EFSERROR); + } + return; + } + + depth = s->p->bt_depth - 1; + +go_up: + node = dm_multisnap_alloc_duplicate_block(s, path[depth].block, &bp, node); + if (unlikely(!node)) + return; + + if (likely(le16_to_cpu(node->n_entries) < dm_multisnap_btree_entries(s->chunk_size))) { + add_at_idx(node, path[depth].idx, key, new_chunk); + dm_bufio_mark_buffer_dirty(bp); + dm_bufio_release(bp); + return; + } + memcpy(s->p->tmp_chunk, node, s->chunk_size); + add_at_idx(s->p->tmp_chunk, path[depth].idx, key, new_chunk); + + split_entries = le16_to_cpu(((struct dm_multisnap_bt_node *)s->p->tmp_chunk)->n_entries); + split_index = split_entries / 2; + split_offset = sizeof(struct dm_multisnap_bt_node) + split_index * sizeof(struct dm_multisnap_bt_entry); + split_size = sizeof(struct dm_multisnap_bt_node) + split_entries * sizeof(struct dm_multisnap_bt_entry); + memcpy(node, s->p->tmp_chunk, sizeof(struct dm_multisnap_bt_node)); + memcpy((char *)node + sizeof(struct dm_multisnap_bt_node), (char *)s->p->tmp_chunk + split_offset, split_size - split_offset); + memset((char *)node + sizeof(struct dm_multisnap_bt_node) + split_size - split_offset, 0, s->chunk_size - (sizeof(struct dm_multisnap_bt_node) + split_size - split_offset)); + node->n_entries = cpu_to_le16(split_entries - split_index); + + dm_bufio_mark_buffer_dirty(bp); + dm_bufio_release(bp); + + node = dm_multisnap_alloc_make_block(s, &new_chunk, &bp); + if (unlikely(!node)) + return; + + memcpy(node, s->p->tmp_chunk, split_offset); + memset((char *)node + split_offset, 0, s->chunk_size - split_offset); + node->n_entries = cpu_to_le16(split_index); + + if (likely(depth == s->p->bt_depth - 1)) { + middle_key(&((struct dm_multisnap_bt_node *)s->p->tmp_chunk)->entries[split_index - 1], &((struct dm_multisnap_bt_node *)s->p->tmp_chunk)->entries[split_index], &new_key); + } else { + /* + * Warning: when we'll delete btree entries, pay very much + * attention to not create entry that spans this border. + */ + struct dm_multisnap_bt_entry *last_one = &node->entries[split_index - 1]; + new_key.chunk = read_48(last_one, orig_chunk); + new_key.snap_from = le32_to_cpu(last_one->snap_from); + new_key.snap_to = le32_to_cpu(last_one->snap_to); + write_48(last_one, orig_chunk, CHUNK_T_SENTINEL); + last_one->snap_from = last_one->snap_to = cpu_to_le32(SNAPID_T_SENTINEL); + } + key = &new_key; + + dm_bufio_mark_buffer_dirty(bp); + dm_bufio_release(bp); + + if (depth--) { + node = dm_multisnap_read_btnode(s, path[depth].block, &bp); + if (unlikely(!node)) + return; + goto go_up; + } + + if (s->p->bt_depth >= MAX_BT_DEPTH) { + DMERR("dm_multisnap_add_to_btree: max b+-tree depth reached"); + dm_multisnap_set_error(s, -EFSERROR); + return; + } + + node = dm_multisnap_alloc_make_block(s, &new_root, &bp); + if (unlikely(!node)) + return; + + memset(node, 0, s->chunk_size); + node->signature = BT_SIGNATURE; + node->n_entries = cpu_to_le16(0); + add_at_idx(node, 0, &new_key, new_chunk); + new_key.snap_from = new_key.snap_to = SNAPID_T_SENTINEL; + new_key.chunk = CHUNK_T_SENTINEL; + add_at_idx(node, 1, &new_key, path[0].block); + + dm_bufio_mark_buffer_dirty(bp); + dm_bufio_release(bp); + + s->p->bt_root = new_root; + s->p->bt_depth++; +} + +void dm_multisnap_exclude_from_btree(struct dm_multisnap *s, struct bt_key *key) +{ + struct dm_multisnap_bt_node *node; + struct path_element path[MAX_BT_DEPTH]; + struct dm_buffer *bp; + struct dm_multisnap_bt_entry *entry; + snapid_t from, to; + + int r = walk_btree(s, key, &node, &bp, path); + if (unlikely(r < 0)) + return; + + if (!r) { + dm_bufio_release(bp); + DMERR("dm_multisnap_restrict_btree: unknown key: %Lx, %x-%x", (unsigned long long)key->chunk, key->snap_from, key->snap_to); + dm_multisnap_set_error(s, -EFSERROR); + return; + } + + node = dm_multisnap_alloc_duplicate_block(s, path[s->p->bt_depth - 1].block, &bp, node); + if (unlikely(!node)) + return; + + entry = &node->entries[path[s->p->bt_depth - 1].idx]; + from = le32_to_cpu(entry->snap_from); + to = le32_to_cpu(entry->snap_to); + if (key->snap_from == from && key->snap_to < to) + entry->snap_from = cpu_to_le32(key->snap_to + 1); + else if (key->snap_from > from && key->snap_to == to) + entry->snap_to = cpu_to_le32(key->snap_from - 1); + else { + DMERR("dm_multisnap_restrict_btree: invali range to restruct: %Lx, %x-%x %x-%x", (unsigned long long)key->chunk, from, to, key->snap_from, key->snap_to); + dm_multisnap_set_error(s, -EFSERROR); + } + + dm_bufio_mark_buffer_dirty(bp); + dm_bufio_release(bp); +} + +void dm_multisnap_bt_finalize_tmp_remap(struct dm_multisnap *s, struct tmp_remap *tmp_remap) +{ + struct dm_buffer *bp; + struct dm_multisnap_bt_node *node; + struct bt_key key; + struct path_element path[MAX_BT_DEPTH]; + int results_ptr; + + chunk_t new_blockn; + int r; + int i; + + if (s->p->n_preallocated_blocks < s->p->bt_depth) { + if (dm_multisnap_alloc_blocks(s, s->p->preallocated_blocks + s->p->n_preallocated_blocks, s->p->bt_depth - s->p->n_preallocated_blocks) < 0) + return; + s->p->n_preallocated_blocks = s->p->bt_depth; + } + results_ptr = 0; + + /* + * Read the key from this node --- we'll walk the btree according + * to this key to find a path from the root. + */ + node = dm_multisnap_read_btnode(s, tmp_remap->new, &bp); + if (!node) + return; + key.chunk = read_48(&node->entries[0], orig_chunk); + key.snap_from = key.snap_to = le32_to_cpu(node->entries[0].snap_from); + dm_bufio_release(bp); + + r = walk_btree(s, &key, &node, &bp, path); + if (r < 0) + return; + + dm_bufio_release(bp); + + for (i = s->p->bt_depth - 1; i >= 0; i--) + if (path[i].block == tmp_remap->old) + goto found; + + DMERR("block %Lx/%Lx was not found in btree when searching for %Lx/%x", (unsigned long long)tmp_remap->old, (unsigned long long)tmp_remap->new, (unsigned long long)key.chunk, key.snap_from); + for (i = 0; i < s->p->bt_depth; i++) + DMERR("path[%d]: %Lx/%x", i, (unsigned long long)path[i].block, path[i].idx); + dm_multisnap_set_error(s, -EFSERROR); + return; + + found: + + new_blockn = tmp_remap->new; + for (i--; i >= 0; i--) { + chunk_t block_to_free; + int remapped = 0; + node = dm_multisnap_read_btnode(s, path[i].block, &bp); + if (!node) + return; + if (!dm_multisnap_block_is_uncommitted(s, path[i].block)) { + remapped = 1; + dm_bufio_release_move(bp, s->p->preallocated_blocks[results_ptr]); + node = dm_multisnap_read_btnode(s, s->p->preallocated_blocks[results_ptr], &bp); + if (!node) + return; + /* !!! FIXME: add to a list of newly allocated blocks */ + } + block_to_free = read_48(&node->entries[path[i].idx], new_chunk); + write_48(&node->entries[path[i].idx], new_chunk, new_blockn); + dm_bufio_mark_buffer_dirty(bp); + dm_bufio_release(bp); + + if (block_to_free != tmp_remap->old) + dm_multisnap_delete_remapped_block(s, block_to_free); + dm_multisnap_free_block(s, block_to_free); + + if (!remapped) + goto skip_it; + new_blockn = s->p->preallocated_blocks[results_ptr]; + results_ptr++; + } + + if (s->p->bt_root != tmp_remap->old) + dm_multisnap_delete_remapped_block(s, s->p->bt_root); + dm_multisnap_free_block(s, s->p->bt_root); + s->p->bt_root = new_blockn; + +skip_it: + memmove(s->p->preallocated_blocks, s->p->preallocated_blocks + results_ptr, (s->p->n_preallocated_blocks -= results_ptr) * sizeof(chunk_t)); +} + Index: linux-2.6.29-rc3-devel/drivers/md/dm-multisnap-commit.c =================================================================== --- /dev/null 1970-01-01 00:00:00.000000000 +0000 +++ linux-2.6.29-rc3-devel/drivers/md/dm-multisnap-commit.c 2009-02-03 09:04:20.000000000 +0100 @@ -0,0 +1,184 @@ +/* + * Copyright (C) 2008 Red Hat Czech, s.r.o. + * + * Mikulas Patocka + * + * This file is released under the GPL. + */ + +#include "dm-multisnap-mikulas.h" + +static void dm_multisnap_finalize_tmp_remaps(struct dm_multisnap *s) +{ + struct tmp_remap *t; + int i; + + while (s->p->n_used_tmp_remaps) { + if (dm_multisnap_has_error(s)) + return; + if (s->p->n_used_tmp_remaps < N_REMAPS - 1) { +/* prefer btree remaps ... if there are none, do bitmap remaps */ + if (!list_empty(&s->p->used_bt_tmp_remaps)) { + t = container_of(s->p->used_bt_tmp_remaps.next, struct tmp_remap, list); + dm_multisnap_bt_finalize_tmp_remap(s, t); + dm_multisnap_free_tmp_remap(s, t); + continue; + } + } + +/* else: 0 or 1 free remaps : finalize bitmaps */ + if (!list_empty(&s->p->used_bitmap_tmp_remaps)) { + t = container_of(s->p->used_bitmap_tmp_remaps.next, struct tmp_remap, list); + dm_multisnap_bitmap_finalize_tmp_remap(s, t); + dm_multisnap_free_tmp_remap(s, t); + continue; + } else { + DMERR("dm_multisnap_finalize_tmp_remaps: no bitmap tmp remaps, n_used_tmp_remaps %u", s->p->n_used_tmp_remaps); + dm_multisnap_set_error(s, -EFSERROR); + return; + } + } + + if (dm_multisnap_has_error(s)) + return; + + for (i = s->p->n_preallocated_blocks - 1; i >= 0; i--) + dm_multisnap_free_block_immediate(s, s->p->preallocated_blocks[i]); + s->p->n_preallocated_blocks = 0; +} + +void dm_multisnap_transaction_mark(struct dm_multisnap *s) +{ + /* + * Accounting: + * max number of modified/allocated blocks during btree add: + * s->p->bt_depth * 2 + 1 + * one additional entry for newly allocated data chunk + * one additional entry for bitmap finalization + */ + if (unlikely(N_REMAPS - s->p->n_used_tmp_remaps < s->p->bt_depth * 2 + 3)) + dm_multisnap_finalize_tmp_remaps(s); +} + +void dm_multisnap_commit(struct dm_multisnap *s) +{ + struct tmp_remap *t; + chunk_t cb_addr; + chunk_t cb_div, cb_offset; + struct multisnap_commit_block *cb; + struct multisnap_superblock *sb; + unsigned idx; + struct dm_buffer *bp; + int r; + + if (dm_multisnap_has_error(s)) { + /* !!! FIXME: write error to superblock */ + return; + } + + dm_multisnap_transaction_mark(s); + + list_for_each_entry(t, &s->p->used_bitmap_tmp_remaps, list) + t->uncommitted = 0; + + list_for_each_entry(t, &s->p->used_bt_tmp_remaps, list) + t->uncommitted = 0; + + if (unlikely((r = dm_bufio_write_dirty_buffers(s->p->bufio)) < 0)) { + DMERR("dm_multisnap_commit: error writing data"); + dm_multisnap_set_error(s, r); + return; + } + + cb_addr = s->p->alloc_rover; + + if (cb_addr < FIRST_CB_BLOCK) + cb_addr = FIRST_CB_BLOCK; + cb_div = cb_addr - FIRST_CB_BLOCK; + cb_offset = sector_div(cb_div, CB_STRIDE); + cb_addr += CB_STRIDE - cb_offset; + if (cb_offset < CB_STRIDE / 2 || cb_addr >= s->p->dev_size) + cb_addr -= CB_STRIDE; + + cb = dm_bufio_new(s->p->bufio, cb_addr, &bp); + if (IS_ERR(cb)) { + DMERR("dm_multisnap_commit: can't allocate new commit block at %Lx", (unsigned long long)cb_addr); + dm_multisnap_set_error(s, PTR_ERR(cb)); + return; + } + + s->p->commit_sequence++; + + cb->signature = CB_SIGNATURE; + cb->snapshot_num = cpu_to_le32(s->p->snapshot_num); + cb->sequence = cpu_to_le64(s->p->commit_sequence); + write_48(cb, dev_size, s->p->dev_size); + write_48(cb, total_allocated, s->p->total_allocated); + write_48(cb, data_allocated, s->p->data_allocated); + write_48(cb, bitmap_root, s->p->bitmap_root); + write_48(cb, alloc_rover, s->p->alloc_rover); + write_48(cb, bt_root, s->p->bt_root); + cb->bt_depth = s->p->bt_depth; + memset(cb->pad, 0, sizeof cb->pad); + idx = 0; + list_for_each_entry(t, &s->p->used_bitmap_tmp_remaps, list) { + BUG_ON(idx >= N_REMAPS); + write_48(&cb->tmp_remap[idx], old, t->old); + write_48(&cb->tmp_remap[idx], new, t->new); + cb->tmp_remap[idx].bitmap_idx = cpu_to_le32(t->bitmap_idx); + idx++; + } + list_for_each_entry(t, &s->p->used_bt_tmp_remaps, list) { + BUG_ON(idx >= N_REMAPS); + write_48(&cb->tmp_remap[idx], old, t->old); + write_48(&cb->tmp_remap[idx], new, t->new); + cb->tmp_remap[idx].bitmap_idx = cpu_to_le32(t->bitmap_idx); + idx++; + } + for (; idx < N_REMAPS; idx++) { + write_48(&cb->tmp_remap[idx], old, 0); + write_48(&cb->tmp_remap[idx], new, 0); + cb->tmp_remap[idx].bitmap_idx = cpu_to_le32(0); + } + dm_bufio_mark_buffer_dirty(bp); + dm_bufio_release(bp); + r = dm_bufio_write_dirty_buffers(s->p->bufio); + if (unlikely(r < 0)) { + DMERR("dm_multisnap_commit: can't write commit block at %Lx", (unsigned long long)cb_addr); + dm_multisnap_set_error(s, r); + return; + } + + if (likely(cb_addr == s->p->valid_commit_block) || + likely(cb_addr == s->p->valid_commit_block + CB_STRIDE)) + goto return_success; + + sb = dm_bufio_read(s->p->bufio, SB_BLOCK, &bp); + if (IS_ERR(sb)) { + DMERR("dm_multisnap_commit: can't read super block"); + dm_multisnap_set_error(s, PTR_ERR(sb)); + return; + } + + if (unlikely(sb->signature != SB_SIGNATURE)) { + dm_bufio_release(bp); + DMERR("dm_multisnap_commit: invalid super block signature when committing"); + dm_multisnap_set_error(s, -EFSERROR); + return; + } + + sb->commit_block = cpu_to_le64(cb_addr); + + dm_bufio_mark_buffer_dirty(bp); + dm_bufio_release(bp); + r = dm_bufio_write_dirty_buffers(s->p->bufio); + if (unlikely(r < 0)) { + DMERR("dm_multisnap_commit: can't write super block"); + dm_multisnap_set_error(s, r); + return; + } + +return_success: + s->p->valid_commit_block = cb_addr; + return; +} Index: linux-2.6.29-rc3-devel/drivers/md/dm-multisnap-mikulas-struct.h =================================================================== --- /dev/null 1970-01-01 00:00:00.000000000 +0000 +++ linux-2.6.29-rc3-devel/drivers/md/dm-multisnap-mikulas-struct.h 2009-02-03 09:04:20.000000000 +0100 @@ -0,0 +1,118 @@ +/* + * Copyright (C) 2008 Red Hat Czech, s.r.o. + * + * Mikulas Patocka + * + * This file is released under the GPL. + */ + +#ifndef DM_MULTISNAP_MIKULAS_STRUCT_H +#define DM_MULTISNAP_MIKULAS_STRUCT_H + +#include +#include + +#define SB_BLOCK 0 + +#define SB_SIGNATURE cpu_to_be32(0xF6015342) + +struct multisnap_superblock { + __u32 signature; + __u32 chunk_size; + __u64 commit_block; +}; + +#define FIRST_CB_BLOCK 1 +#define CB_STRIDE 1024 + +#define CB_SIGNATURE cpu_to_be32(0xF6014342) + +struct commit_block_tmp_remap { + __u32 old1; + __u16 old2; + __u16 new2; + __u32 new1; + __u32 bitmap_idx; +}; + +#define CB_BITMAP_IDX_RESERVED 0xffffffff +#define CB_BITMAP_IDX_NONE 0xffffffff + +#define N_REMAPS 28 + +struct multisnap_commit_block { + __u32 signature; + __u32 snapshot_num; + + __u64 sequence; + + __u32 dev_size1; + __u16 dev_size2; + __u16 total_allocated2; + + __u32 total_allocated1; + __u32 data_allocated1; + + __u16 data_allocated2; + __u16 bitmap_root2; + __u32 bitmap_root1; + + __u32 alloc_rover1; + __u16 alloc_rover2; + __u16 bt_root2; + + __u32 bt_root1; + __u8 bt_depth; + + __u8 pad[11]; + + struct commit_block_tmp_remap tmp_remap[N_REMAPS]; +}; + +#define MAX_BITMAP_DEPTH 6 + +static inline int dm_multisnap_bitmap_depth(unsigned chunk_size, __u64 device_size) +{ + unsigned depth = 0; + __u64 entries = chunk_size * 8; + while (entries < device_size) { + depth++; + entries *= chunk_size / 8; + if (!entries) + return -ERANGE; + } + + if (depth > MAX_BITMAP_DEPTH) + return -ERANGE; + + return depth; +} + +/* B+-tree entry. Sorted by orig_chunk and snap_from/to */ + +#define MAX_BT_DEPTH 12 + +struct dm_multisnap_bt_entry { + __u32 orig_chunk1; + __u16 orig_chunk2; + __u16 new_chunk2; + __u32 new_chunk1; + __u32 snap_from; + __u32 snap_to; +}; + +#define BT_SIGNATURE cpu_to_be32(0xF6014254) + +struct dm_multisnap_bt_node { + __u32 signature; + __u16 n_entries; + __u16 pad1; + struct dm_multisnap_bt_entry entries[0]; +}; + +static inline unsigned dm_multisnap_btree_entries(unsigned chunk_size) +{ + return (chunk_size - sizeof(struct dm_multisnap_bt_node)) / sizeof(struct dm_multisnap_bt_entry); +} + +#endif Index: linux-2.6.29-rc3-devel/drivers/md/dm-multisnap-mikulas.h =================================================================== --- /dev/null 1970-01-01 00:00:00.000000000 +0000 +++ linux-2.6.29-rc3-devel/drivers/md/dm-multisnap-mikulas.h 2009-02-03 09:04:20.000000000 +0100 @@ -0,0 +1,149 @@ +/* + * Copyright (C) 2008 Red Hat Czech, s.r.o. + * + * Mikulas Patocka + * + * This file is released under the GPL. + */ + +#ifndef DM_MULTISNAP_MIKULAS_H +#define DM_MULTISNAP_MIKULAS_H + +#include "dm-multisnap.h" +#include "dm-multisnap-mikulas-struct.h" + +#include + +typedef __u32 bitmap_t; + +#define read_48(struc, entry) (le32_to_cpu((struc)->entry##1) | ((chunk_t)le16_to_cpu((struc)->entry##2) << 31 << 1)) +#define write_48(struc, entry, val) do { (struc)->entry##1 = cpu_to_le32(val); (struc)->entry##2 = cpu_to_le16((chunk_t)(val) >> 31 >> 1); } while (0) + +#define TMP_REMAP_HASH_SIZE 256 +#define TMP_REMAP_HASH(c) ((c) & (TMP_REMAP_HASH_SIZE - 1)) + +#define CHUNK_BITS 48 +#define CHUNK_T_SENTINEL ((chunk_t)(1LL << CHUNK_BITS) - 1) +#define CHUNK_T_SNAP_PRESENT ((chunk_t)(1LL << CHUNK_BITS) - 1) +#define CHUNK_T_MAX ((chunk_t)(1LL << CHUNK_BITS) - 2) + +struct tmp_remap { + /* List entry for tmp_remap */ + struct hlist_node hash_list; + /* List entry for used_tmp_remaps/free_tmp_remaps */ + struct list_head list; + chunk_t old; + chunk_t new; + bitmap_t bitmap_idx; + int uncommitted; +}; + +struct bt_key { + chunk_t chunk; + snapid_t snap_from; + snapid_t snap_to; +}; + +struct path_element { + chunk_t block; + unsigned idx; +}; + +struct exception_store_private { + struct dm_bufio_client *bufio; + + chunk_t dev_size; + unsigned bitmap_depth; + snapid_t snapshot_num; + unsigned char bt_depth; + + chunk_t bitmap_root; + chunk_t alloc_rover; + chunk_t bt_root; + chunk_t sb_commit_block; + chunk_t valid_commit_block; + + chunk_t total_allocated; + chunk_t data_allocated; + + __u64 commit_sequence; + + void *tmp_chunk; + + struct rb_root active_snapshots; + + /* Used during query/add remap */ + chunk_t query_snapid; + struct bt_key query_new_key; + unsigned char query_active; + chunk_t query_block_from; + chunk_t query_block_to; + + /* List heads for struct tmp_remap->list */ + unsigned n_used_tmp_remaps; + struct list_head used_bitmap_tmp_remaps; + struct list_head used_bt_tmp_remaps; + struct list_head free_tmp_remaps; + /* List head for struct tmp_remap->hash_list */ + struct hlist_head tmp_remap[TMP_REMAP_HASH_SIZE]; + struct tmp_remap tmp_remap_store[N_REMAPS]; + + unsigned n_preallocated_blocks; + chunk_t preallocated_blocks[MAX_BITMAP_DEPTH * 2]; +}; + +/* dm-multisnap-alloc.c */ + +void dm_multisnap_create_bitmaps(struct dm_multisnap *s, chunk_t start); +int dm_multisnap_alloc_blocks(struct dm_multisnap *s, chunk_t *results, unsigned n_blocks); +void *dm_multisnap_alloc_duplicate_block(struct dm_multisnap *s, chunk_t block, struct dm_buffer **bp, void *ptr); +void *dm_multisnap_alloc_make_block(struct dm_multisnap *s, chunk_t *result, struct dm_buffer **bp); +void dm_multisnap_free_block_immediate(struct dm_multisnap *s, chunk_t block); +void dm_multisnap_free_block(struct dm_multisnap *s, chunk_t block); +void dm_multisnap_bitmap_finalize_tmp_remap(struct dm_multisnap *s, struct tmp_remap *tmp_remap); + +/* dm-multisnap-blocks.c */ + +void *dm_multisnap_read_block(struct dm_multisnap *s, chunk_t block, struct dm_buffer **bp); +void *dm_multisnap_duplicate_block(struct dm_multisnap *s, chunk_t old_chunk, chunk_t new_chunk, bitmap_t bitmap_idx, struct dm_buffer **bp); +void dm_multisnap_delete_remapped_block(struct dm_multisnap *s, chunk_t chunk); +void dm_multisnap_free_tmp_remap(struct dm_multisnap *s, struct tmp_remap *t); +void *dm_multisnap_make_block(struct dm_multisnap *s, chunk_t new_chunk, struct dm_buffer **bp); +int dm_multisnap_block_is_uncommitted(struct dm_multisnap *s, chunk_t block); + +int dm_multisnap_is_cb_block(struct dm_multisnap *s, chunk_t block); + +/* dm-multisnap-btree.c */ + +void dm_multisnap_create_btree(struct dm_multisnap *s, chunk_t *start); +int dm_multisnap_find_in_btree(struct dm_multisnap *s, struct bt_key *key, chunk_t *result); +void dm_multisnap_add_to_btree(struct dm_multisnap *s, struct bt_key *key, chunk_t new_chunk); +void dm_multisnap_exclude_from_btree(struct dm_multisnap *s, struct bt_key *key); +void dm_multisnap_bt_finalize_tmp_remap(struct dm_multisnap *s, struct tmp_remap *tmp_remap); + +/* dm-multisnap-commit.c */ + +void dm_multisnap_transaction_mark(struct dm_multisnap *s); +void dm_multisnap_commit(struct dm_multisnap *s); + +/* dm-multisnap-io.c */ + +int dm_multisnap_find_snapshot_chunk(struct dm_multisnap *s, snapid_t id, chunk_t chunk, chunk_t *result); +void dm_multisnap_reset_query(struct dm_multisnap *s); +int dm_multisnap_query_next_remap(struct dm_multisnap *s, chunk_t chunk); +void dm_multisnap_add_next_remap(struct dm_multisnap *s, union chunk_descriptor *cd, chunk_t *new_chunk); +void dm_multisnap_make_chunk_writeable(struct dm_multisnap *s, union chunk_descriptor *cd, chunk_t *new_chunk); +int dm_multisnap_check_conflict(struct dm_multisnap *s, union chunk_descriptor *cd, snapid_t snapid); + +/* dm-multisnap-snaps.c */ + +int dm_multisnap_snapshot_exists(struct dm_multisnap *s, snapid_t id); +int dm_multisnap_find_next_snapid_range(struct dm_multisnap *s, snapid_t id, snapid_t *from, snapid_t *to); + +void dm_multisnap_destroy_snapshot_tree(struct dm_multisnap *s); +void dm_multisnap_read_snapshots(struct dm_multisnap *s); +int dm_multisnap_create_snapshot(struct dm_multisnap *s, snapid_t *snapid); + +void dm_multisnap_status_info(struct dm_multisnap *s, char *result, unsigned maxlen); + +#endif Index: linux-2.6.29-rc3-devel/drivers/md/dm-multisnap-snaps.c =================================================================== --- /dev/null 1970-01-01 00:00:00.000000000 +0000 +++ linux-2.6.29-rc3-devel/drivers/md/dm-multisnap-snaps.c 2009-02-03 09:04:20.000000000 +0100 @@ -0,0 +1,190 @@ +/* + * Copyright (C) 2008 Red Hat Czech, s.r.o. + * + * Mikulas Patocka + * + * This file is released under the GPL. + */ + +#include "dm-multisnap-mikulas.h" + +struct snapshot_range { + struct rb_node node; + snapid_t from; + snapid_t to; +}; + +static struct snapshot_range *rb_find_insert_snapshot(struct dm_multisnap *s, snapid_t from, snapid_t to, int add) +{ + struct snapshot_range *new; + struct rb_node **p = &s->p->active_snapshots.rb_node; + struct rb_node *parent = NULL; + while (*p) { + parent = *p; +#define rn rb_entry(parent, struct snapshot_range, node) + if (to < rn->from) { + if (to == rn->from - 1 && add) { + rn->from = from; + return rn; + } + p = &rn->node.rb_left; + } else if (from > rn->to) { + if (from == rn->to + 1 && add) { + rn->to = to; + return rn; + } + p = &rn->node.rb_right; + } else { + if (!add) + return rn; + else { + DMERR("rb_insert_snapshot: inserting overlapping entry: (%u,%u) overlaps (%u,%u)", from, to, rn->from, rn->to); + dm_multisnap_set_error(s, -EFSERROR); + return NULL; + } + } +#undef rn + } + if (!add) + return NULL; + + new = kmalloc(sizeof(struct snapshot_range), GFP_KERNEL); + if (!new) { + DMERR("rb_insert_snapshot: can't allocate memory for snapshot descriptor"); + dm_multisnap_set_error(s, -ENOMEM); + return NULL; + } + + new->from = from; + new->to = to; + + rb_link_node(&new->node, parent, p); + rb_insert_color(&new->node, &s->p->active_snapshots); + + return new; +} + +static struct snapshot_range *rb_find_snapshot(struct dm_multisnap *s, snapid_t from, snapid_t to) +{ + return rb_find_insert_snapshot(s, from, to, 0); +} + +static int rb_insert_snapshot(struct dm_multisnap *s, snapid_t from, snapid_t to) +{ + struct snapshot_range *rn; + rn = rb_find_insert_snapshot(s, from, to, 1); + if (!rn) + return -1; + return 0; +} + +int dm_multisnap_snapshot_exists(struct dm_multisnap *s, snapid_t id) +{ + return !!rb_find_snapshot(s, id, id); +} + +int dm_multisnap_find_next_snapid_range(struct dm_multisnap *s, snapid_t id, snapid_t *from, snapid_t *to) +{ + struct snapshot_range *rn; + rn = rb_find_snapshot(s, id, SNAPID_T_MAX); + if (!rn) + return 0; + *from = rn->from; + *to = rn->to; + return 1; +} + +void dm_multisnap_destroy_snapshot_tree(struct dm_multisnap *s) +{ + struct rb_node *root; + while ((root = s->p->active_snapshots.rb_node)) { +#define rn rb_entry(root, struct snapshot_range, node) + rb_erase(root, &s->p->active_snapshots); + kfree(rn); +#undef rn + } +} + +void dm_multisnap_read_snapshots(struct dm_multisnap *s) +{ + struct bt_key snap_key; + chunk_t ignore; + int r; + + dm_multisnap_destroy_snapshot_tree(s); + + snap_key.snap_from = 0; +find_next: + snap_key.snap_to = SNAPID_T_MAX; + snap_key.chunk = CHUNK_T_SNAP_PRESENT; + + r = dm_multisnap_find_in_btree(s, &snap_key, &ignore); + + if (unlikely(r < 0)) + return; + + if (r) { + printk("inserting snapid %d-%d\n", snap_key.snap_from, snap_key.snap_to); + if (unlikely(snap_key.snap_to > SNAPID_T_MAX)) { + DMERR("dm_multisnap_read_snapshots: invalid snapshot id"); + dm_multisnap_set_error(s, -EFSERROR); + return; + } + r = rb_insert_snapshot(s, snap_key.snap_from, snap_key.snap_to); + if (unlikely(r < 0)) + return; + snap_key.snap_from = snap_key.snap_to + 1; + goto find_next; + } +} + +int dm_multisnap_create_snapshot(struct dm_multisnap *s, snapid_t *snapid) +{ + int r; + struct bt_key snap_key; + + if (s->p->snapshot_num > SNAPID_T_MAX) { + DMERR("dm_multisnap_create_snapshot: 2^32 snapshot limit reached"); + return -ENOSPC; + } + + r = rb_insert_snapshot(s, s->p->snapshot_num, s->p->snapshot_num); + if (r < 0) + return dm_multisnap_has_error(s); + + snap_key.chunk = CHUNK_T_SNAP_PRESENT; + snap_key.snap_from = s->p->snapshot_num; + snap_key.snap_to = s->p->snapshot_num; + dm_multisnap_add_to_btree(s, &snap_key, 0); + if (dm_multisnap_has_error(s)) + return dm_multisnap_has_error(s); + + printk("multisnapshot: created snapshot with ID %u\n", s->p->snapshot_num); + + *snapid = s->p->snapshot_num++; + + dm_multisnap_transaction_mark(s); + dm_multisnap_commit(s); + + return 0; +} + +void dm_multisnap_status_info(struct dm_multisnap *s, char *result, unsigned maxlen) +{ + snapid_t n_snaps = 0; + + snapid_t from, to; + snapid_t snapid = 0; + for (snapid = 0; dm_multisnap_find_next_snapid_range(s, snapid, &from, &to); snapid = to + 1) + n_snaps += to - from + 1; + + snprintf(result, maxlen, " %Lu %Lu %Lu %u", (unsigned long long)s->p->dev_size, (unsigned long long)s->p->total_allocated, (unsigned long long)(s->p->total_allocated - s->p->data_allocated), n_snaps); + dm_multisnap_adjust_string(&result, &maxlen); + + for (snapid = 0; dm_multisnap_find_next_snapid_range(s, snapid, &from, &to); snapid = to + 1) + for (; from <= to; from++) { + snprintf(result, maxlen, " %u", from); + dm_multisnap_adjust_string(&result, &maxlen); + } +} + Index: linux-2.6.29-rc3-devel/drivers/md/dm-multisnap-alloc.c =================================================================== --- /dev/null 1970-01-01 00:00:00.000000000 +0000 +++ linux-2.6.29-rc3-devel/drivers/md/dm-multisnap-alloc.c 2009-02-03 09:04:20.000000000 +0100 @@ -0,0 +1,384 @@ +/* + * Copyright (C) 2008 Red Hat Czech, s.r.o. + * + * Mikulas Patocka + * + * This file is released under the GPL. + */ + +#include "dm-multisnap-mikulas.h" + +#define rshift_roundup(val, bits) (((val) + ((chunk_t)1 << (bits)) - 1) >> (bits)) + +#define BITS_PER_BYTE_SHIFT 3 +#define BYTES_PER_POINTER_SHIFT 3 + +void dm_multisnap_create_bitmaps(struct dm_multisnap *s, chunk_t writing_block) +{ + int r; + struct dm_buffer *bp; + chunk_t direct_bitmap_blocks, total_bitmap_blocks, total_preallocated_blocks; + chunk_t lower_depth_block; + unsigned i, d; + chunk_t ii; + + r = dm_multisnap_bitmap_depth(s->chunk_size, s->p->dev_size); + if (r < 0) { + DMERR("dm_multisnap_create_bitmaps: device is too large"); + dm_multisnap_set_error(s, r); + return; + } + s->p->bitmap_depth = r; + + direct_bitmap_blocks = rshift_roundup(s->p->dev_size, s->chunk_shift + BITS_PER_BYTE_SHIFT); + + if (direct_bitmap_blocks >= CB_BITMAP_IDX_RESERVED) { + DMERR("dm_multisnap_create_bitmaps: device is too large"); + dm_multisnap_set_error(s, -ERANGE); + return; + } + + total_bitmap_blocks = 0; + for (i = 0; i <= s->p->bitmap_depth; i++) { + unsigned shift = (s->chunk_shift - BYTES_PER_POINTER_SHIFT) * i; + total_bitmap_blocks += rshift_roundup(direct_bitmap_blocks, shift); + } + total_preallocated_blocks = writing_block + total_bitmap_blocks; + for (ii = 0; ii < total_preallocated_blocks; ii++) { + if (dm_multisnap_is_cb_block(s, ii)) + total_preallocated_blocks++; + } + + if (total_preallocated_blocks >= s->p->dev_size) { + DMERR("dm_multisnap_create_bitmaps: device is too small"); + dm_multisnap_set_error(s, -ENOSPC); + return; + } + +/* Write direct bitmap blocks */ + + lower_depth_block = writing_block; + for (ii = 0; ii < direct_bitmap_blocks; ii++, writing_block++) { + void *bmp; + while (dm_multisnap_is_cb_block(s, writing_block)) + writing_block++; + bmp = dm_bufio_new(s->p->bufio, writing_block, &bp); + if (IS_ERR(bmp)) { + DMERR("dm_multisnap_create_bitmaps: can't create direct bitmap block at %Lx", (unsigned long long)writing_block); + dm_multisnap_set_error(s, PTR_ERR(bmp)); + return; + } + memset(bmp, 0, s->chunk_size); + for (i = 0; i < s->chunk_size << BITS_PER_BYTE_SHIFT; i++) { + chunk_t block_to_test = (ii << (s->chunk_shift + BITS_PER_BYTE_SHIFT)) | i; + if (block_to_test < total_preallocated_blocks || block_to_test >= s->p->dev_size || dm_multisnap_is_cb_block(s, block_to_test)) { + generic___set_le_bit(i, bmp); + s->p->total_allocated++; + } + } + dm_bufio_mark_buffer_dirty(bp); + dm_bufio_release(bp); + } + +/* Write indirect bitmap blocks */ + + for (d = 1; d <= s->p->bitmap_depth; d++) { + chunk_t this_depth_block = writing_block; + for (ii = 0; ii < rshift_roundup(direct_bitmap_blocks, d * (s->chunk_shift - BYTES_PER_POINTER_SHIFT)); ii++, writing_block++) { + __u64 *bmp; + while (dm_multisnap_is_cb_block(s, writing_block)) + writing_block++; + bmp = dm_bufio_new(s->p->bufio, writing_block, &bp); + if (IS_ERR(bmp)) { + DMERR("dm_multisnap_create_bitmaps: can't create indirect bitmap block at %Lx", (unsigned long long)writing_block); + dm_multisnap_set_error(s, PTR_ERR(bmp)); + return; + } + for (i = 0; i < s->chunk_size >> BYTES_PER_POINTER_SHIFT; i++) { + if (((ii << d * (s->chunk_shift - BYTES_PER_POINTER_SHIFT)) | (i << (d - 1) * (s->chunk_shift - BYTES_PER_POINTER_SHIFT))) >= direct_bitmap_blocks) { + bmp[i] = cpu_to_le64(0); + continue; + } + while (dm_multisnap_is_cb_block(s, lower_depth_block)) + lower_depth_block++; + bmp[i] = cpu_to_le64(lower_depth_block); + lower_depth_block++; + } + dm_bufio_mark_buffer_dirty(bp); + dm_bufio_release(bp); + } + lower_depth_block = this_depth_block; + } + + s->p->bitmap_root = writing_block - 1; + + return; +} + +static void *map_bitmap(struct dm_multisnap *s, bitmap_t bitmap, struct dm_buffer **bp, chunk_t *block, struct path_element *path) +{ + __u64 *bmp; + unsigned idx; + unsigned d = s->p->bitmap_depth; + *block = s->p->bitmap_root; + while (1) { + chunk_t new_block; + bmp = dm_multisnap_read_block(s, *block, bp); + if (unlikely(IS_ERR(bmp))) { + DMERR("map_bitmap: can't read bitmap at %Lx, depth %d/%d, index %Lx", (unsigned long long)*block, s->p->bitmap_depth - d, s->p->bitmap_depth, (unsigned long long)bitmap); + dm_multisnap_set_error(s, PTR_ERR(bmp)); + return NULL; + } + if (!d) + return bmp; + + idx = (bitmap >> ((d - 1) * (s->chunk_shift - BYTES_PER_POINTER_SHIFT))) & ((s->chunk_size - 1) >> BYTES_PER_POINTER_SHIFT); + + if (unlikely(path != NULL)) { + path[s->p->bitmap_depth - d].block = *block; + path[s->p->bitmap_depth - d].idx = idx; + } + + new_block = le64_to_cpu(bmp[idx]); + + dm_bufio_release(*bp); + if (!new_block) { + DMERR("map_bitmap: accessing bitmap out of range, bitmap %x", bitmap); + dm_multisnap_set_error(s, -EFSERROR); + return NULL; + } + *block = new_block; + + d--; + } +} + +static int find_bit(const void *bmp, unsigned start, unsigned end, int wide_search) +{ + const void *p; + unsigned bit; + if (unlikely(start >= end)) + return -ENOSPC; + if (likely(!generic_test_le_bit(start, bmp))) + return start; + if (likely(wide_search)) { + p = memchr(bmp + (start >> 3), 0, (end >> 3) - (start >> 3)); + if (p) { + bit = ((const __u8 *)p - (const __u8 *)bmp) << 3; + while (bit > start && !generic_test_le_bit(bit - 1, bmp)) + bit--; + goto ret_bit; + } + } + bit = generic_find_next_zero_le_bit(bmp, end, start); + ret_bit: + if (bit >= end) + return -ENOSPC; + return bit; +} + +int dm_multisnap_alloc_blocks(struct dm_multisnap *s, chunk_t *results, unsigned n_blocks) +{ + void *bmp; + struct dm_buffer *bp; + chunk_t block; + int wrap_around = 0; + int start_bit; + int wide_search; + int i; + bitmap_t bitmap_no; + + bitmap_no = s->p->alloc_rover >> (s->chunk_shift + BITS_PER_BYTE_SHIFT); +next_bitmap: + bmp = map_bitmap(s, bitmap_no, &bp, &block, NULL); + if (unlikely(!bmp)) + return -1; + + wide_search = 1; +find_again: + start_bit = s->p->alloc_rover & ((s->chunk_size << BITS_PER_BYTE_SHIFT) - 1); + + for (i = 0; i < n_blocks; i++) { + int bit = find_bit(bmp, start_bit, s->chunk_size << BITS_PER_BYTE_SHIFT, wide_search); + if (unlikely(bit < 0)) { +bit_find_failed: + if (i && wide_search) { + wide_search = 0; + goto find_again; + } + dm_bufio_release(bp); + s->p->alloc_rover = (chunk_t)++bitmap_no << (s->chunk_shift + BITS_PER_BYTE_SHIFT); + if (unlikely(s->p->alloc_rover >= s->p->dev_size)) { + s->p->alloc_rover = 0; + bitmap_no = 0; + wrap_around++; + if (wrap_around >= 2) { + DMERR("snapshot overflow"); + dm_multisnap_set_error(s, -ENOSPC); + return -1; + } + } + goto next_bitmap; + } + results[i] = ((chunk_t)bitmap_no << (s->chunk_shift + BITS_PER_BYTE_SHIFT)) | bit; + start_bit = bit + 1; + } + + if (!dm_multisnap_block_is_uncommitted(s, block)) { + int bit = find_bit(bmp, start_bit, s->chunk_size << BITS_PER_BYTE_SHIFT, wide_search); + if (unlikely(bit < 0)) + goto bit_find_failed; + + bmp = dm_multisnap_duplicate_block(s, block, ((chunk_t)bitmap_no << (s->chunk_shift + BITS_PER_BYTE_SHIFT)) | bit, bitmap_no, &bp); + if (unlikely(!bmp)) + return -1; + + generic___set_le_bit(bit, bmp); + s->p->total_allocated++; + start_bit = bit + 1; + } + + for (i = 0; i < n_blocks; i++) + generic___set_le_bit(results[i] & ((s->chunk_size << BITS_PER_BYTE_SHIFT) - 1), bmp); + s->p->total_allocated += n_blocks; + + dm_bufio_mark_buffer_dirty(bp); + dm_bufio_release(bp); + + s->p->alloc_rover = (s->p->alloc_rover & ~(chunk_t)((s->chunk_size << BITS_PER_BYTE_SHIFT) - 1)) + start_bit; + if (unlikely(s->p->alloc_rover >= s->p->dev_size)) + s->p->alloc_rover = 0; + + return 0; +} + +void *dm_multisnap_alloc_duplicate_block(struct dm_multisnap *s, chunk_t block, struct dm_buffer **bp, void *ptr) +{ + int r; + chunk_t new_chunk; + void *data; + + if (dm_multisnap_block_is_uncommitted(s, block)) + return ptr; + + dm_bufio_release(*bp); + + r = dm_multisnap_alloc_blocks(s, &new_chunk, 1); + if (r) + return NULL; + + data = dm_multisnap_read_block(s, block, bp); + if (!data) + return NULL; + + return dm_multisnap_duplicate_block(s, block, new_chunk, CB_BITMAP_IDX_NONE, bp); +} + +void *dm_multisnap_alloc_make_block(struct dm_multisnap *s, chunk_t *result, struct dm_buffer **bp) +{ + int r = dm_multisnap_alloc_blocks(s, result, 1); + if (unlikely(r < 0)) + return NULL; + + return dm_multisnap_make_block(s, *result, bp); +} + +void dm_multisnap_free_block_immediate(struct dm_multisnap *s, chunk_t block) +{ + void *bmp; + struct dm_buffer *bp; + bitmap_t bitmap_no; + + if (unlikely(block >= s->p->dev_size)) { + DMERR("dm_multisnap_free_block_immediate: freeing invalid block %Lx", (unsigned long long)block); + dm_multisnap_set_error(s, -EFSERROR); + return; + } + + if (block + 1 == s->p->alloc_rover) + s->p->alloc_rover = block; + + bitmap_no = block >> (s->chunk_shift + BITS_PER_BYTE_SHIFT); + + bmp = map_bitmap(s, bitmap_no, &bp, &block, NULL); + if (!bmp) + return; + + generic___clear_le_bit(block & ((s->chunk_size << BITS_PER_BYTE_SHIFT) - 1), bmp); + s->p->total_allocated--; + + dm_bufio_mark_buffer_dirty(bp); + dm_bufio_release(bp); +} + + +void dm_multisnap_free_block(struct dm_multisnap *s, chunk_t block) +{ + /* !!! FIXME: reclaim space */ +} + +void dm_multisnap_bitmap_finalize_tmp_remap(struct dm_multisnap *s, struct tmp_remap *tmp_remap) +{ + chunk_t block; + struct dm_buffer *bp; + __u64 *new_block; + struct path_element path[MAX_BITMAP_DEPTH]; + int results_ptr; + + chunk_t new_blockn; + int i; + + /* + * Preallocate twice the required amount of blocks, so that resolving + * the next tmp_remap (created here, in dm_multisnap_alloc_blocks) + * doesn't have to allocate anything. + */ + if (s->p->n_preallocated_blocks < s->p->bitmap_depth) { + if (unlikely(dm_multisnap_alloc_blocks(s, s->p->preallocated_blocks + s->p->n_preallocated_blocks, s->p->bitmap_depth * 2 - s->p->n_preallocated_blocks) < 0)) + return; + s->p->n_preallocated_blocks = s->p->bitmap_depth * 2; + } + results_ptr = 0; + + new_block = map_bitmap(s, tmp_remap->bitmap_idx, &bp, &block, path); + if (unlikely(!new_block)) + return; + + dm_bufio_release(bp); + + new_blockn = tmp_remap->new; + for (i = s->p->bitmap_depth - 1; i >= 0; i--) { + chunk_t block_to_free; + int remapped = 0; + __u64 *bmp = dm_multisnap_read_block(s, path[i].block, &bp); + if (unlikely(IS_ERR(bmp))) + return; + + if (!dm_multisnap_block_is_uncommitted(s, path[i].block)) { + remapped = 1; + dm_bufio_release_move(bp, s->p->preallocated_blocks[results_ptr]); + bmp = dm_multisnap_read_block(s, s->p->preallocated_blocks[results_ptr], &bp); + if (unlikely(IS_ERR(bmp))) + return; + /* !!! FIXME: add to a list of newly allocated blocks */ + } + + block_to_free = le32_to_cpu(bmp[path[i].idx]); + bmp[path[i].idx] = cpu_to_le64(new_blockn); + dm_bufio_mark_buffer_dirty(bp); + dm_bufio_release(bp); + + dm_multisnap_free_block(s, block_to_free); + + if (!remapped) + goto skip_it; + new_blockn = s->p->preallocated_blocks[results_ptr]; + results_ptr++; + } + + dm_multisnap_free_block(s, s->p->bitmap_root); + s->p->bitmap_root = new_blockn; + +skip_it: + memmove(s->p->preallocated_blocks, s->p->preallocated_blocks + results_ptr, (s->p->n_preallocated_blocks -= results_ptr) * sizeof(chunk_t)); +} Index: linux-2.6.29-rc3-devel/drivers/md/dm-multisnap-io.c =================================================================== --- /dev/null 1970-01-01 00:00:00.000000000 +0000 +++ linux-2.6.29-rc3-devel/drivers/md/dm-multisnap-io.c 2009-02-03 09:08:31.000000000 +0100 @@ -0,0 +1,186 @@ +/* + * Copyright (C) 2008 Red Hat Czech, s.r.o. + * + * Mikulas Patocka + * + * This file is released under the GPL. + */ + +#include "dm-multisnap-mikulas.h" + +/* + * This function will check if there is remapping for a given snapid/chunk. + * It returns 1 if remapping exists and is read-only (shared by other snapshots) + * and 2 if it exists and is read-write (not shared by anyone). + */ + +int dm_multisnap_find_snapshot_chunk(struct dm_multisnap *s, snapid_t id, chunk_t chunk, chunk_t *result) +{ + int r; + struct bt_key key; + key.chunk = chunk; + key.snap_from = id; + key.snap_to = id; + r = dm_multisnap_find_in_btree(s, &key, result); + if (!r) { + s->p->query_new_key.chunk = chunk; + s->p->query_new_key.snap_from = id; + s->p->query_new_key.snap_to = id; + s->p->query_active = 1; + } + if (r > 0) { + snapid_t find_from, find_to; + /* + * !!! TODO: this branch could be done conditionally + * only for write requests + */ + if (key.snap_from < id) { + if (likely(dm_multisnap_find_next_snapid_range(s, key.snap_from, &find_from, &find_to))) { + if (find_from < id) { + s->p->query_new_key.chunk = chunk; + s->p->query_new_key.snap_from = id; + s->p->query_new_key.snap_to = key.snap_to; + s->p->query_block_from = key.snap_from; + s->p->query_block_to = key.snap_to; + s->p->query_active = 2; + return 1; + } + if (unlikely(find_from > id)) + BUG(); /* SNAPID not in our tree */ + if (find_to > id && key.snap_to > id) { + s->p->query_new_key.chunk = chunk; + s->p->query_new_key.snap_from = key.snap_from; + s->p->query_new_key.snap_to = id; + s->p->query_block_from = key.snap_from; + s->p->query_block_to = key.snap_to; + s->p->query_active = 2; + return 1; + } + } else { + /* we're asking for a SNAPID not in our tree */ + BUG(); + } + } + if (key.snap_to > id) { + if (likely(dm_multisnap_find_next_snapid_range(s, id + 1, &find_from, &find_to))) { + if (find_from <= key.snap_to) { + s->p->query_new_key.chunk = chunk; + s->p->query_new_key.snap_from = key.snap_from; + s->p->query_new_key.snap_to = id; + s->p->query_block_from = key.snap_from; + s->p->query_block_to = key.snap_to; + s->p->query_active = 2; + return 1; + } + } + } + return 2; + } + return r; +} + +void dm_multisnap_reset_query(struct dm_multisnap *s) +{ + s->p->query_active = 0; + + s->p->query_snapid = 0; +} + +int dm_multisnap_query_next_remap(struct dm_multisnap *s, chunk_t chunk) +{ + int r; + chunk_t sink; + snapid_t from, to; + + s->p->query_active = 0; + + while (dm_multisnap_find_next_snapid_range(s, s->p->query_snapid, &from, &to)) { + struct bt_key key; +next_btree_search: + if (dm_multisnap_has_error(s)) + return -1; + key.chunk = chunk; + key.snap_from = from; + key.snap_to = to; + r = dm_multisnap_find_in_btree(s, &key, &sink); + if (unlikely(r < 0)) + return -1; + + if (!r) { + s->p->query_new_key.chunk = chunk; + s->p->query_new_key.snap_from = from; + s->p->query_new_key.snap_to = to; + s->p->query_active = 1; + return 1; + } + + if (key.snap_from > from) { + s->p->query_new_key.chunk = chunk; + s->p->query_new_key.snap_from = from; + s->p->query_new_key.snap_to = key.snap_from - 1; + s->p->query_active = 1; + return 1; + } + + if (key.snap_to < to) { + from = key.snap_to + 1; + goto next_btree_search; + } + + s->p->query_snapid = to + 1; + } + + return 0; +} + +void dm_multisnap_add_next_remap(struct dm_multisnap *s, union chunk_descriptor *cd, chunk_t *new_chunk) +{ + int r; + + BUG_ON(s->p->query_active != 1); + s->p->query_active = 0; + + cd->range.from = s->p->query_new_key.snap_from; + cd->range.to = s->p->query_new_key.snap_to; + + r = dm_multisnap_alloc_blocks(s, new_chunk, 1); + if (unlikely(r < 0)) + return; + + s->p->data_allocated++; + + dm_multisnap_add_to_btree(s, &s->p->query_new_key, *new_chunk); + dm_multisnap_transaction_mark(s); +} + +void dm_multisnap_make_chunk_writeable(struct dm_multisnap *s, union chunk_descriptor *cd, chunk_t *new_chunk) +{ + int r; + + BUG_ON(s->p->query_active != 2); + s->p->query_active = 0; + + cd->range.from = s->p->query_block_from; + cd->range.to = s->p->query_block_to; + + r = dm_multisnap_alloc_blocks(s, new_chunk, 1); + if (unlikely(r < 0)) + return; + + s->p->data_allocated++; + + dm_multisnap_exclude_from_btree(s, &s->p->query_new_key); + dm_multisnap_transaction_mark(s); + + if (unlikely(dm_multisnap_has_error(s))) + return; + + dm_multisnap_add_to_btree(s, &s->p->query_new_key, *new_chunk); + dm_multisnap_transaction_mark(s); +} + +int dm_multisnap_check_conflict(struct dm_multisnap *s, union chunk_descriptor *cd, snapid_t snapid) +{ + return snapid >= cd->range.from && snapid <= cd->range.to; +} +