dm-crypt: use encryption threads Use encryption threads, one per CPU, to improve dm-crypt parallelization. Signed-off-by: Mikulas Patocka --- drivers/md/dm-crypt.c | 226 ++++++++++++++++++++++++++++++++++++++------------ 1 file changed, 176 insertions(+), 50 deletions(-) Index: linux-3.2-fast/drivers/md/dm-crypt.c =================================================================== --- linux-3.2-fast.orig/drivers/md/dm-crypt.c 2012-01-27 03:54:33.000000000 +0100 +++ linux-3.2-fast/drivers/md/dm-crypt.c 2012-01-27 03:57:16.000000000 +0100 @@ -17,6 +17,7 @@ #include #include #include +#include #include #include #include @@ -30,6 +31,9 @@ #define DM_MSG_PREFIX "crypt" +#define DMREQ_PULL_BATCH 16 +#define DMREQ_PUSH_BATCH 16 + /* * context holding the current state of a multi-part conversion */ @@ -42,7 +46,6 @@ struct convert_context { unsigned int idx_out; sector_t sector; atomic_t cc_pending; - struct ablkcipher_request *req; }; /* @@ -62,10 +65,12 @@ struct dm_crypt_io { }; struct dm_crypt_request { + struct list_head list; struct convert_context *ctx; struct scatterlist sg_in; struct scatterlist sg_out; sector_t iv_sector; + struct completion *busy_wait; }; struct crypt_config; @@ -121,6 +126,12 @@ struct crypt_config { struct workqueue_struct *io_queue; struct workqueue_struct *crypt_queue; + unsigned crypt_threads_size; + struct task_struct **crypt_threads; + + wait_queue_head_t crypt_thread_wait; + spinlock_t crypt_thread_spinlock; + struct list_head crypt_thread_list; char *cipher; char *cipher_string; @@ -657,9 +668,80 @@ static u8 *iv_of_dmreq(struct crypt_conf crypto_ablkcipher_alignmask(any_tfm(cc)) + 1); } +static void kcryptd_async_done(struct crypto_async_request *async_req, + int error); + +static int dmcrypt_thread(void *data) +{ + struct crypt_config *cc = data; + while (1) { + struct dm_crypt_request *dmreqs[DMREQ_PULL_BATCH]; + unsigned n_dmreqs; + unsigned i; + + DECLARE_WAITQUEUE(wait, current); + + spin_lock(&cc->crypt_thread_spinlock); + + if (!list_empty(&cc->crypt_thread_list)) + goto pop_from_list; + + __set_current_state(TASK_INTERRUPTIBLE); + add_wait_queue(&cc->crypt_thread_wait, &wait); + + spin_unlock(&cc->crypt_thread_spinlock); + + if (unlikely(kthread_should_stop())) { + set_task_state(current, TASK_RUNNING); + remove_wait_queue(&cc->crypt_thread_wait, &wait); + break; + } + + schedule(); + + set_task_state(current, TASK_RUNNING); + remove_wait_queue(&cc->crypt_thread_wait, &wait); + continue; + +pop_from_list: + n_dmreqs = 0; + do { + struct dm_crypt_request *dmreq = container_of( + cc->crypt_thread_list.next, + struct dm_crypt_request, list); + list_del(&dmreq->list); + dmreqs[n_dmreqs++] = dmreq; + } while (n_dmreqs < DMREQ_PULL_BATCH && + !list_empty(&cc->crypt_thread_list)); + spin_unlock(&cc->crypt_thread_spinlock); + + i = 0; + do { + struct dm_crypt_request *dmreq = dmreqs[i]; + struct ablkcipher_request *req = req_of_dmreq(cc, dmreq); + int r; + DECLARE_COMPLETION(busy_wait); + dmreq->busy_wait = &busy_wait; + if (bio_data_dir(dmreq->ctx->bio_in) == WRITE) + r = crypto_ablkcipher_encrypt(req); + else + r = crypto_ablkcipher_decrypt(req); + if (unlikely(r == -EBUSY)) { + wait_for_completion(&busy_wait); + } else if (likely(r != -EINPROGRESS)) { + struct crypto_async_request as_rq; + as_rq.data = dmreq; + kcryptd_async_done(&as_rq, r); + } + } while (++i < n_dmreqs); + } + return 0; +} + static int crypt_convert_block(struct crypt_config *cc, struct convert_context *ctx, - struct ablkcipher_request *req) + struct ablkcipher_request *req, + struct list_head *batch) { struct bio_vec *bv_in = bio_iovec_idx(ctx->bio_in, ctx->idx_in); struct bio_vec *bv_out = bio_iovec_idx(ctx->bio_out, ctx->idx_out); @@ -701,32 +783,35 @@ static int crypt_convert_block(struct cr ablkcipher_request_set_crypt(req, &dmreq->sg_in, &dmreq->sg_out, 1 << SECTOR_SHIFT, iv); - if (bio_data_dir(ctx->bio_in) == WRITE) - r = crypto_ablkcipher_encrypt(req); - else - r = crypto_ablkcipher_decrypt(req); - - if (!r && cc->iv_gen_ops && cc->iv_gen_ops->post) - r = cc->iv_gen_ops->post(cc, iv, dmreq); + list_add_tail(&dmreq->list, batch); - return r; + return 0; } -static void kcryptd_async_done(struct crypto_async_request *async_req, - int error); - -static void crypt_alloc_req(struct crypt_config *cc, - struct convert_context *ctx) +static struct ablkcipher_request *crypt_alloc_req(struct crypt_config *cc, + struct convert_context *ctx, gfp_t gfp_mask) { unsigned key_index = ctx->sector & (cc->tfms_count - 1); + struct ablkcipher_request *req = mempool_alloc(cc->req_pool, gfp_mask); + if (!req) + return NULL; - if (!ctx->req) - ctx->req = mempool_alloc(cc->req_pool, GFP_NOIO); - - ablkcipher_request_set_tfm(ctx->req, cc->tfms[key_index]); - ablkcipher_request_set_callback(ctx->req, + ablkcipher_request_set_tfm(req, cc->tfms[key_index]); + ablkcipher_request_set_callback(req, CRYPTO_TFM_REQ_MAY_BACKLOG | CRYPTO_TFM_REQ_MAY_SLEEP, - kcryptd_async_done, dmreq_of_req(cc, ctx->req)); + kcryptd_async_done, dmreq_of_req(cc, req)); + + return req; +} + +static void crypt_flush_batch(struct crypt_config *cc, struct list_head *batch) +{ + spin_lock(&cc->crypt_thread_spinlock); + list_splice_tail(batch, &cc->crypt_thread_list); + spin_unlock(&cc->crypt_thread_spinlock); + wake_up_all(&cc->crypt_thread_wait); + INIT_LIST_HEAD(batch); + } /* @@ -736,42 +821,46 @@ static int crypt_convert(struct crypt_co struct convert_context *ctx) { int r; + LIST_HEAD(batch); + unsigned batch_count = 0; atomic_set(&ctx->cc_pending, 1); while(ctx->idx_in < ctx->bio_in->bi_vcnt && ctx->idx_out < ctx->bio_out->bi_vcnt) { - crypt_alloc_req(cc, ctx); + struct ablkcipher_request *req = crypt_alloc_req(cc, ctx, GFP_NOWAIT); + if (!req) { + /* + * We must flush our request queue before we attempt + * non-failing GFP_NOIO allocation. + */ + batch_count = 0; + crypt_flush_batch(cc, &batch); + req = crypt_alloc_req(cc, ctx, GFP_NOIO); + } atomic_inc(&ctx->cc_pending); - r = crypt_convert_block(cc, ctx, ctx->req); - - switch (r) { - /* async */ - case -EBUSY: - /* fall through*/ - case -EINPROGRESS: - ctx->req = NULL; - ctx->sector++; - continue; - - /* sync */ - case 0: + r = crypt_convert_block(cc, ctx, req, &batch); + if (unlikely(r < 0)) { atomic_dec(&ctx->cc_pending); - ctx->sector++; - cond_resched(); - continue; + goto flush_ret; + } - /* error */ - default: - atomic_dec(&ctx->cc_pending); - return r; + ctx->sector++; + + if (unlikely(++batch_count >= DMREQ_PUSH_BATCH)) { + batch_count = 0; + crypt_flush_batch(cc, &batch); } } + r = 0; - return 0; +flush_ret: + crypt_flush_batch(cc, &batch); + + return r; } static void dm_crypt_bio_destructor(struct bio *bio) @@ -862,7 +951,6 @@ static struct dm_crypt_io *crypt_io_allo io->sector = sector; io->error = 0; io->base_io = NULL; - io->ctx.req = NULL; atomic_set(&io->io_pending, 0); return io; @@ -888,8 +976,6 @@ static void crypt_dec_pending(struct dm_ if (!atomic_dec_and_test(&io->io_pending)) return; - if (io->ctx.req) - mempool_free(io->ctx.req, cc->req_pool); mempool_free(io, cc->io_pool); if (likely(!base_io)) @@ -1164,6 +1250,7 @@ static void kcryptd_async_done(struct cr struct crypt_config *cc = io->target->private; if (error == -EINPROGRESS) { + complete(dmreq->busy_wait); return; } @@ -1342,6 +1429,15 @@ static void crypt_dtr(struct dm_target * if (!cc) return; + if (cc->crypt_threads) { + int i; + for (i = 0; i < cc->crypt_threads_size; i++) { + if (cc->crypt_threads[i]) + kthread_stop(cc->crypt_threads[i]); + } + kfree(cc->crypt_threads); + } + if (cc->io_queue) destroy_workqueue(cc->io_queue); if (cc->crypt_queue) @@ -1535,6 +1631,7 @@ static int crypt_ctr(struct dm_target *t int ret; struct dm_arg_set as; const char *opt_string; + int i; static struct dm_arg _args[] = { {0, 1, "Invalid number of feature args"}, @@ -1645,15 +1742,44 @@ static int crypt_ctr(struct dm_target *t cc->crypt_queue = alloc_workqueue("kcryptd", WQ_NON_REENTRANT| - WQ_CPU_INTENSIVE| - WQ_MEM_RECLAIM| - WQ_UNBOUND, - num_online_cpus()); + WQ_MEM_RECLAIM, + 1); if (!cc->crypt_queue) { ti->error = "Couldn't create kcryptd queue"; goto bad; } + for (i = 0; i < NR_CPUS; i++) + if (cpu_online(i)) + cc->crypt_threads_size = i + 1; + + init_waitqueue_head(&cc->crypt_thread_wait); + spin_lock_init(&cc->crypt_thread_spinlock); + INIT_LIST_HEAD(&cc->crypt_thread_list); + + cc->crypt_threads = kzalloc(cc->crypt_threads_size * + sizeof(struct task_struct *), GFP_KERNEL); + if (!cc->crypt_threads) { + ti->error = "Couldn't allocate crypt threads"; + goto bad; + } + + for (i = 0; i < cc->crypt_threads_size; i++) { + if (cpu_online(i)) { + cc->crypt_threads[i] = kthread_create_on_node( + dmcrypt_thread, cc, cpu_to_node(i), + "dmcryptd/%d", i); + if (IS_ERR(cc->crypt_threads[i])) { + ret = PTR_ERR(cc->crypt_threads[i]); + cc->crypt_threads[i] = NULL; + ti->error = "Couldn't spawn thread"; + goto bad; + } + kthread_bind(cc->crypt_threads[i], i); + wake_up_process(cc->crypt_threads[i]); + } + } + ti->num_flush_requests = 1; ti->discard_zeroes_data_unsupported = 1;