From deed78c65eeb21461948c50e7c0ea2447cb313a4 Mon Sep 17 00:00:00 2001 From: Mike Snitzer Date: Fri, 9 May 2014 17:03:15 -0400 Subject: [RHEL6.6 PATCH 02/11] dm crypt: scale to multiple cpus BZ: 1076147 RHEL6 doesn't have upstream's alloc_workqueue or any of the WQ_* flags (e.g. WQ_CPU_INTENSIVE) so code was ported to use create_workqueue(). Also needed to hand-patch changes to crypt_set_key, crypt_wipe_key, and crypt_dtr due to rhel6.git commit afb3521f ("[dm] wipe dm-ioctl buffers") having already applied changes that went upstream after this upstream commit. Upstream commit c029772125594e31eb1a5ad9e0913724ed9891f2 Author: Andi Kleen Date: Thu Jan 13 19:59:53 2011 +0000 dm crypt: scale to multiple cpus Currently dm-crypt does all the encryption work for a single dm-crypt mapping in a single workqueue. This does not scale well when multiple CPUs are submitting IO at a high rate. The single CPU running the single thread cannot keep up with the encryption and encrypted IO performance tanks. This patch changes the crypto workqueue to be per CPU. This means that as long as the IO submitter (or the interrupt target CPUs for reads) runs on different CPUs the encryption work will be also parallel. To avoid a bottleneck on the IO worker I also changed those to be per-CPU threads. There is still some shared data, so I suspect some bouncing cache lines. But I haven't done a detailed study on that yet. Signed-off-by: Andi Kleen Signed-off-by: Milan Broz Signed-off-by: Alasdair G Kergon --- drivers/md/dm-crypt.c | 252 +++++++++++++++++++++++++++++++++++++------------ 1 files changed, 192 insertions(+), 60 deletions(-) diff --git a/drivers/md/dm-crypt.c b/drivers/md/dm-crypt.c index 17c1eb3..821538b 100644 --- a/drivers/md/dm-crypt.c +++ b/drivers/md/dm-crypt.c @@ -18,6 +18,7 @@ #include #include #include +#include #include #include #include @@ -76,7 +77,6 @@ struct crypt_iv_operations { }; struct iv_essiv_private { - struct crypto_cipher *tfm; struct crypto_hash *hash_tfm; u8 *salt; }; @@ -90,6 +90,22 @@ struct iv_benbi_private { * and encrypts / decrypts at the same time. */ enum flags { DM_CRYPT_SUSPENDED, DM_CRYPT_KEY_VALID }; + +/* + * Duplicated per-CPU state for cipher. + */ +struct crypt_cpu { + struct ablkcipher_request *req; + struct crypto_ablkcipher *tfm; + + /* ESSIV: struct crypto_cipher *essiv_tfm */ + void *iv_private; +}; + +/* + * The fields in here must be read only after initialization, + * changing state should be in crypt_cpu. + */ struct crypt_config { struct dm_dev *dev; sector_t start; @@ -118,6 +134,12 @@ struct crypt_config { unsigned int iv_size; /* + * Duplicated per cpu state. Access through + * per_cpu_ptr() only. + */ + struct crypt_cpu __percpu *cpu; + + /* * Layout of each crypto request: * * struct ablkcipher_request @@ -131,9 +153,7 @@ struct crypt_config { * correctly aligned. */ unsigned int dmreq_start; - struct ablkcipher_request *req; - struct crypto_ablkcipher *tfm; unsigned long flags; unsigned int key_size; u8 key[0]; @@ -147,6 +167,19 @@ static struct kmem_cache *_crypt_io_pool; static void clone_init(struct dm_crypt_io *, struct bio *); static void kcryptd_queue_crypt(struct dm_crypt_io *io); +static struct crypt_cpu *this_crypt_config(struct crypt_config *cc) +{ + return this_cpu_ptr(cc->cpu); +} + +/* + * Use this to access cipher attributes that are the same for each CPU. + */ +static struct crypto_ablkcipher *any_tfm(struct crypt_config *cc) +{ + return __this_cpu_ptr(cc->cpu)->tfm; +} + /* * Different IV generation algorithms: * @@ -193,7 +226,8 @@ static int crypt_iv_essiv_init(struct crypt_config *cc) struct iv_essiv_private *essiv = &cc->iv_gen_private.essiv; struct hash_desc desc; struct scatterlist sg; - int err; + struct crypto_cipher *essiv_tfm; + int err, cpu; sg_init_one(&sg, cc->key, cc->key_size); desc.tfm = essiv->hash_tfm; @@ -203,8 +237,16 @@ static int crypt_iv_essiv_init(struct crypt_config *cc) if (err) return err; - return crypto_cipher_setkey(essiv->tfm, essiv->salt, + for_each_possible_cpu(cpu) { + essiv_tfm = per_cpu_ptr(cc->cpu, cpu)->iv_private, + + err = crypto_cipher_setkey(essiv_tfm, essiv->salt, crypto_hash_digestsize(essiv->hash_tfm)); + if (err) + return err; + } + + return 0; } /* Wipe salt and reset key derived from volume key */ @@ -212,24 +254,76 @@ static int crypt_iv_essiv_wipe(struct crypt_config *cc) { struct iv_essiv_private *essiv = &cc->iv_gen_private.essiv; unsigned salt_size = crypto_hash_digestsize(essiv->hash_tfm); + struct crypto_cipher *essiv_tfm; + int cpu, r, err = 0; memset(essiv->salt, 0, salt_size); - return crypto_cipher_setkey(essiv->tfm, essiv->salt, salt_size); + for_each_possible_cpu(cpu) { + essiv_tfm = per_cpu_ptr(cc->cpu, cpu)->iv_private; + r = crypto_cipher_setkey(essiv_tfm, essiv->salt, salt_size); + if (r) + err = r; + } + + return err; +} + +/* Set up per cpu cipher state */ +static struct crypto_cipher *setup_essiv_cpu(struct crypt_config *cc, + struct dm_target *ti, + u8 *salt, unsigned saltsize) +{ + struct crypto_cipher *essiv_tfm; + int err; + + /* Setup the essiv_tfm with the given salt */ + essiv_tfm = crypto_alloc_cipher(cc->cipher, 0, CRYPTO_ALG_ASYNC); + if (IS_ERR(essiv_tfm)) { + ti->error = "Error allocating crypto tfm for ESSIV"; + return essiv_tfm; + } + + if (crypto_cipher_blocksize(essiv_tfm) != + crypto_ablkcipher_ivsize(any_tfm(cc))) { + ti->error = "Block size of ESSIV cipher does " + "not match IV size of block cipher"; + crypto_free_cipher(essiv_tfm); + return ERR_PTR(-EINVAL); + } + + err = crypto_cipher_setkey(essiv_tfm, salt, saltsize); + if (err) { + ti->error = "Failed to set key for ESSIV cipher"; + crypto_free_cipher(essiv_tfm); + return ERR_PTR(err); + } + + return essiv_tfm; } static void crypt_iv_essiv_dtr(struct crypt_config *cc) { + int cpu; + struct crypt_cpu *cpu_cc; + struct crypto_cipher *essiv_tfm; struct iv_essiv_private *essiv = &cc->iv_gen_private.essiv; - crypto_free_cipher(essiv->tfm); - essiv->tfm = NULL; - crypto_free_hash(essiv->hash_tfm); essiv->hash_tfm = NULL; kzfree(essiv->salt); essiv->salt = NULL; + + for_each_possible_cpu(cpu) { + cpu_cc = per_cpu_ptr(cc->cpu, cpu); + essiv_tfm = cpu_cc->iv_private; + + if (essiv_tfm) + crypto_free_cipher(essiv_tfm); + + cpu_cc->iv_private = NULL; + } } static int crypt_iv_essiv_ctr(struct crypt_config *cc, struct dm_target *ti, @@ -238,7 +332,7 @@ static int crypt_iv_essiv_ctr(struct crypt_config *cc, struct dm_target *ti, struct crypto_cipher *essiv_tfm = NULL; struct crypto_hash *hash_tfm = NULL; u8 *salt = NULL; - int err; + int err, cpu; if (!opts) { ti->error = "Digest algorithm missing for ESSIV mode"; @@ -260,30 +354,22 @@ static int crypt_iv_essiv_ctr(struct crypt_config *cc, struct dm_target *ti, goto bad; } - /* Allocate essiv_tfm */ - essiv_tfm = crypto_alloc_cipher(cc->cipher, 0, CRYPTO_ALG_ASYNC); - if (IS_ERR(essiv_tfm)) { - ti->error = "Error allocating crypto tfm for ESSIV"; - err = PTR_ERR(essiv_tfm); - goto bad; - } - if (crypto_cipher_blocksize(essiv_tfm) != - crypto_ablkcipher_ivsize(cc->tfm)) { - ti->error = "Block size of ESSIV cipher does " - "not match IV size of block cipher"; - err = -EINVAL; - goto bad; - } - cc->iv_gen_private.essiv.salt = salt; - cc->iv_gen_private.essiv.tfm = essiv_tfm; cc->iv_gen_private.essiv.hash_tfm = hash_tfm; + for_each_possible_cpu(cpu) { + essiv_tfm = setup_essiv_cpu(cc, ti, salt, + crypto_hash_digestsize(hash_tfm)); + if (IS_ERR(essiv_tfm)) { + crypt_iv_essiv_dtr(cc); + return PTR_ERR(essiv_tfm); + } + per_cpu_ptr(cc->cpu, cpu)->iv_private = essiv_tfm; + } + return 0; bad: - if (essiv_tfm && !IS_ERR(essiv_tfm)) - crypto_free_cipher(essiv_tfm); if (hash_tfm && !IS_ERR(hash_tfm)) crypto_free_hash(hash_tfm); kfree(salt); @@ -292,16 +378,19 @@ bad: static int crypt_iv_essiv_gen(struct crypt_config *cc, u8 *iv, sector_t sector) { + struct crypto_cipher *essiv_tfm = this_crypt_config(cc)->iv_private; + memset(iv, 0, cc->iv_size); *(u64 *)iv = cpu_to_le64(sector); - crypto_cipher_encrypt_one(cc->iv_gen_private.essiv.tfm, iv, iv); + crypto_cipher_encrypt_one(essiv_tfm, iv, iv); + return 0; } static int crypt_iv_benbi_ctr(struct crypt_config *cc, struct dm_target *ti, const char *opts) { - unsigned bs = crypto_ablkcipher_blocksize(cc->tfm); + unsigned bs = crypto_ablkcipher_blocksize(any_tfm(cc)); int log = ilog2(bs); /* we need to calculate how far we must shift the sector count @@ -410,7 +499,7 @@ static int crypt_convert_block(struct crypt_config *cc, dmreq = dmreq_of_req(cc, req); iv = (u8 *)ALIGN((unsigned long)(dmreq + 1), - crypto_ablkcipher_alignmask(cc->tfm) + 1); + crypto_ablkcipher_alignmask(any_tfm(cc)) + 1); dmreq->ctx = ctx; sg_init_table(&dmreq->sg_in, 1); @@ -452,16 +541,19 @@ static int crypt_convert_block(struct crypt_config *cc, static void kcryptd_async_done(struct crypto_async_request *async_req, int error); + static void crypt_alloc_req(struct crypt_config *cc, struct convert_context *ctx) { - if (!cc->req) - cc->req = mempool_alloc(cc->req_pool, GFP_NOIO); - ablkcipher_request_set_tfm(cc->req, cc->tfm); - ablkcipher_request_set_callback(cc->req, CRYPTO_TFM_REQ_MAY_BACKLOG | - CRYPTO_TFM_REQ_MAY_SLEEP, - kcryptd_async_done, - dmreq_of_req(cc, cc->req)); + struct crypt_cpu *this_cc = this_crypt_config(cc); + + if (!this_cc->req) + this_cc->req = mempool_alloc(cc->req_pool, GFP_NOIO); + + ablkcipher_request_set_tfm(this_cc->req, this_cc->tfm); + ablkcipher_request_set_callback(this_cc->req, + CRYPTO_TFM_REQ_MAY_BACKLOG | CRYPTO_TFM_REQ_MAY_SLEEP, + kcryptd_async_done, dmreq_of_req(cc, this_cc->req)); } /* @@ -470,6 +562,7 @@ static void crypt_alloc_req(struct crypt_config *cc, static int crypt_convert(struct crypt_config *cc, struct convert_context *ctx) { + struct crypt_cpu *this_cc = this_crypt_config(cc); int r; atomic_set(&ctx->pending, 1); @@ -481,7 +574,7 @@ static int crypt_convert(struct crypt_config *cc, atomic_inc(&ctx->pending); - r = crypt_convert_block(cc, ctx, cc->req); + r = crypt_convert_block(cc, ctx, this_cc->req); switch (r) { /* async */ @@ -490,7 +583,7 @@ static int crypt_convert(struct crypt_config *cc, INIT_COMPLETION(ctx->restart); /* fall through*/ case -EINPROGRESS: - cc->req = NULL; + this_cc->req = NULL; ctx->sector++; continue; @@ -648,6 +741,9 @@ static void crypt_dec_pending(struct dm_crypt_io *io) * They must be separated as otherwise the final stages could be * starved by new requests which can block in the first stages due * to memory allocation. + * + * The work is done per CPU global for all dm-crypt instances. + * They should not depend on each other and do not block. */ static void crypt_endio(struct bio *clone, int error) { @@ -968,6 +1064,20 @@ static void crypt_encode_key(char *hex, u8 *key, unsigned int size) } } +static int crypt_setkey_allcpus(struct crypt_config *cc) +{ + int cpu, err = 0, r; + + for_each_possible_cpu(cpu) { + r = crypto_ablkcipher_setkey(per_cpu_ptr(cc->cpu, cpu)->tfm, + cc->key, cc->key_size); + if (r) + err = r; + } + + return err; +} + static int crypt_set_key(struct crypt_config *cc, char *key) { int r = -EINVAL; @@ -984,9 +1094,10 @@ static int crypt_set_key(struct crypt_config *cc, char *key) if (cc->key_size && crypt_decode_key(cc->key, key, cc->key_size) < 0) goto out; - r = crypto_ablkcipher_setkey(cc->tfm, cc->key, cc->key_size); - if (!r) - set_bit(DM_CRYPT_KEY_VALID, &cc->flags); + set_bit(DM_CRYPT_KEY_VALID, &cc->flags); + + r = crypt_setkey_allcpus(cc); + out: /* Hex key string not needed after here, so wipe it. */ memset(key, '0', key_string_len); @@ -998,12 +1109,15 @@ static int crypt_wipe_key(struct crypt_config *cc) { clear_bit(DM_CRYPT_KEY_VALID, &cc->flags); memset(&cc->key, 0, cc->key_size * sizeof(u8)); - return crypto_ablkcipher_setkey(cc->tfm, cc->key, cc->key_size); + + return crypt_setkey_allcpus(cc); } static void crypt_dtr(struct dm_target *ti) { struct crypt_config *cc = ti->private; + struct crypt_cpu *cpu_cc; + int cpu; ti->private = NULL; @@ -1015,6 +1129,15 @@ static void crypt_dtr(struct dm_target *ti) if (cc->crypt_queue) destroy_workqueue(cc->crypt_queue); + if (cc->cpu) + for_each_possible_cpu(cpu) { + cpu_cc = per_cpu_ptr(cc->cpu, cpu); + if (cpu_cc->req) + mempool_free(cpu_cc->req, cc->req_pool); + if (cpu_cc->tfm) + crypto_free_ablkcipher(cpu_cc->tfm); + } + if (cc->bs) bioset_free(cc->bs); @@ -1028,12 +1151,12 @@ static void crypt_dtr(struct dm_target *ti) if (cc->iv_gen_ops && cc->iv_gen_ops->dtr) cc->iv_gen_ops->dtr(cc); - if (cc->tfm && !IS_ERR(cc->tfm)) - crypto_free_ablkcipher(cc->tfm); - if (cc->dev) dm_put_device(ti, cc->dev); + if (cc->cpu) + free_percpu(cc->cpu); + kzfree(cc->cipher); kzfree(cc->cipher_string); @@ -1045,9 +1168,10 @@ static int crypt_ctr_cipher(struct dm_target *ti, char *cipher_in, char *key) { struct crypt_config *cc = ti->private; + struct crypto_ablkcipher *tfm; char *tmp, *cipher, *chainmode, *ivmode, *ivopts; char *cipher_api = NULL; - int ret = -EINVAL; + int cpu, ret = -EINVAL; /* Convert to crypto api definition? */ if (strchr(cipher_in, '(')) { @@ -1077,6 +1201,12 @@ static int crypt_ctr_cipher(struct dm_target *ti, if (tmp) DMWARN("Ignoring unexpected additional cipher options"); + cc->cpu = alloc_percpu(struct crypt_cpu); + if (!cc->cpu) { + ti->error = "Cannot allocate per cpu state"; + goto bad_mem; + } + /* * For compatibility with the original dm-crypt mapping format, if * only the cipher name is supplied, use cbc-plain. @@ -1103,11 +1233,14 @@ static int crypt_ctr_cipher(struct dm_target *ti, } /* Allocate cipher */ - cc->tfm = crypto_alloc_ablkcipher(cipher_api, 0, 0); - if (IS_ERR(cc->tfm)) { - ret = PTR_ERR(cc->tfm); - ti->error = "Error allocating crypto tfm"; - goto bad; + for_each_possible_cpu(cpu) { + tfm = crypto_alloc_ablkcipher(cipher_api, 0, 0); + if (IS_ERR(tfm)) { + ret = PTR_ERR(tfm); + ti->error = "Error allocating crypto tfm"; + goto bad; + } + per_cpu_ptr(cc->cpu, cpu)->tfm = tfm; } /* Initialize and set key */ @@ -1118,7 +1251,7 @@ static int crypt_ctr_cipher(struct dm_target *ti, } /* Initialize IV */ - cc->iv_size = crypto_ablkcipher_ivsize(cc->tfm); + cc->iv_size = crypto_ablkcipher_ivsize(any_tfm(cc)); if (cc->iv_size) /* at least a 64 bit sector number should fit in our buffer */ cc->iv_size = max(cc->iv_size, @@ -1213,9 +1346,9 @@ static int crypt_ctr(struct dm_target *ti, unsigned int argc, char **argv) } cc->dmreq_start = sizeof(struct ablkcipher_request); - cc->dmreq_start += crypto_ablkcipher_reqsize(cc->tfm); + cc->dmreq_start += crypto_ablkcipher_reqsize(any_tfm(cc)); cc->dmreq_start = ALIGN(cc->dmreq_start, crypto_tfm_ctx_alignment()); - cc->dmreq_start += crypto_ablkcipher_alignmask(cc->tfm) & + cc->dmreq_start += crypto_ablkcipher_alignmask(any_tfm(cc)) & ~(crypto_tfm_ctx_alignment() - 1); cc->req_pool = mempool_create_kmalloc_pool(MIN_IOS, cc->dmreq_start + @@ -1224,7 +1357,6 @@ static int crypt_ctr(struct dm_target *ti, unsigned int argc, char **argv) ti->error = "Cannot allocate crypt request mempool"; goto bad; } - cc->req = NULL; cc->page_pool = mempool_create_page_pool(MIN_POOL_PAGES, 0); if (!cc->page_pool) { @@ -1257,13 +1389,13 @@ static int crypt_ctr(struct dm_target *ti, unsigned int argc, char **argv) cc->start = tmpll; ret = -ENOMEM; - cc->io_queue = create_singlethread_workqueue("kcryptd_io"); + cc->io_queue = create_workqueue("kcryptd_io"); if (!cc->io_queue) { ti->error = "Couldn't create kcryptd io queue"; goto bad; } - cc->crypt_queue = create_singlethread_workqueue("kcryptd"); + cc->crypt_queue = create_workqueue("kcryptd"); if (!cc->crypt_queue) { ti->error = "Couldn't create kcryptd queue"; goto bad; @@ -1423,7 +1555,7 @@ static int crypt_iterate_devices(struct dm_target *ti, static struct target_type crypt_target = { .name = "crypt", - .version = {1, 8, 0}, + .version = {1, 9, 0}, .module = THIS_MODULE, .ctr = crypt_ctr, .dtr = crypt_dtr, -- 1.7.4.4