block copy: initial XCOPY offload support This is Martin Petersen's xcopy patch (https://git.kernel.org/cgit/linux/kernel/git/mkp/linux.git/commit/?h=xcopy&id=0bdeed274e16b3038a851552188512071974eea8) with some bug fixes, ported to the current kernel. This patch makes it possible to use the SCSI XCOPY command. We create a bio that has REQ_COPY flag in bi_rw and a bi_copy structure that defines the source device. The target device is defined in the bi_bdev and bi_iter.bi_sector. There is a new BLKCOPY ioctl that makes it possible to use XCOPY from userspace. The ioctl argument is a pointer to an array of four uint64_t values. The first value is a source byte offset, the second value is a destination byte offset, the third value is byte length. The forth value is written by the kernel and it represents the number of bytes that the kernel actually copied. Signed-off-by: Martin K. Petersen Signed-off-by: Mikulas Patocka --- Documentation/ABI/testing/sysfs-block | 9 + block/bio.c | 2 block/blk-core.c | 4 block/blk-lib.c | 113 ++++++++++++++ block/blk-merge.c | 3 block/blk-settings.c | 13 + block/blk-sysfs.c | 12 + block/compat_ioctl.c | 1 block/ioctl.c | 61 +++++++ drivers/scsi/scsi.c | 57 +++++++ drivers/scsi/sd.c | 269 ++++++++++++++++++++++++++++++++++ drivers/scsi/sd.h | 4 include/linux/bio.h | 9 - include/linux/blk_types.h | 8 + include/linux/blkdev.h | 16 ++ include/scsi/scsi_device.h | 3 include/uapi/linux/fs.h | 1 17 files changed, 583 insertions(+), 2 deletions(-) Index: linux-4.11-rc2/Documentation/ABI/testing/sysfs-block =================================================================== --- linux-4.11-rc2.orig/Documentation/ABI/testing/sysfs-block +++ linux-4.11-rc2/Documentation/ABI/testing/sysfs-block @@ -248,6 +248,15 @@ Description: write_zeroes_max_bytes is 0, write zeroes is not supported by the device. +What: /sys/block//queue/copy_max_bytes +Date: January 2014 +Contact: Martin K. Petersen +Description: + Devices that support copy offloading will set this value + to indicate the maximum buffer size in bytes that can be + copied in one operation. If the copy_max_bytes is 0 the + device does not support copy offload. + What: /sys/block//queue/zoned Date: September 2016 Contact: Damien Le Moal Index: linux-4.11-rc2/block/blk-core.c =================================================================== --- linux-4.11-rc2.orig/block/blk-core.c +++ linux-4.11-rc2/block/blk-core.c @@ -1921,6 +1921,10 @@ generic_make_request_checks(struct bio * if (!bdev_write_zeroes_sectors(bio->bi_bdev)) goto not_supported; break; + case REQ_OP_COPY: + if (!bdev_copy_offload(bio->bi_bdev)) + goto not_supported; + break; default: break; } Index: linux-4.11-rc2/block/blk-lib.c =================================================================== --- linux-4.11-rc2.orig/block/blk-lib.c +++ linux-4.11-rc2/block/blk-lib.c @@ -9,6 +9,23 @@ #include "blk.h" +struct bio_batch { + atomic_t done; + int error; + struct completion *wait; +}; + +static void bio_batch_end_io(struct bio *bio) +{ + struct bio_batch *bb = bio->bi_private; + + if (bio->bi_error && bio->bi_error != -EOPNOTSUPP) + bb->error = bio->bi_error; + if (atomic_dec_and_test(&bb->done)) + complete(bb->wait); + bio_put(bio); +} + static struct bio *next_bio(struct bio *bio, unsigned int nr_pages, gfp_t gfp) { @@ -380,3 +397,99 @@ int blkdev_issue_zeroout(struct block_de return ret; } EXPORT_SYMBOL(blkdev_issue_zeroout); + +/** + * blkdev_issue_copy - queue a copy same operation + * @src_bdev: source blockdev + * @src_sector: source sector + * @dst_bdev: destination blockdev + * @dst_sector: destination sector + * @nr_sects: number of sectors to copy + * @gfp_mask: memory allocation flags (for bio_alloc) + * + * Description: + * Copy a block range from source device to target device. + */ +int blkdev_issue_copy(struct block_device *src_bdev, sector_t src_sector, + struct block_device *dst_bdev, sector_t dst_sector, + sector_t nr_sects, gfp_t gfp_mask) +{ + DECLARE_COMPLETION_ONSTACK(wait); + struct request_queue *sq = bdev_get_queue(src_bdev); + struct request_queue *dq = bdev_get_queue(dst_bdev); + sector_t max_copy_sectors; + struct bio_batch bb; + int ret = 0; + + if (!sq || !dq) + return -ENXIO; + + max_copy_sectors = min(sq->limits.max_copy_sectors, + dq->limits.max_copy_sectors); + + if (unlikely(!max_copy_sectors)) + return -EOPNOTSUPP; + + if (unlikely(src_sector + nr_sects < src_sector) || + unlikely(dst_sector + nr_sects < dst_sector)) + return -EINVAL; + + /* Do not support overlapping copies */ + if (src_bdev == dst_bdev && + unlikely(abs((u64)dst_sector - (u64)src_sector) < nr_sects)) + return -EOPNOTSUPP; + + atomic_set(&bb.done, 1); + bb.error = 0; + bb.wait = &wait; + + while (nr_sects) { + struct bio *bio; + struct bio_copy *bc; + unsigned int chunk; + + bc = kmalloc(sizeof(struct bio_copy), gfp_mask); + if (!bc) { + ret = -ENOMEM; + break; + } + + bio = bio_alloc(gfp_mask, 0); + if (!bio) { + kfree(bc); + ret = -ENOMEM; + break; + } + + chunk = min(nr_sects, max_copy_sectors); + + bio_set_op_attrs(bio, REQ_OP_COPY, 0); + bio->bi_iter.bi_sector = dst_sector; + bio->bi_iter.bi_size = chunk << 9; + bio->bi_end_io = bio_batch_end_io; + bio->bi_bdev = dst_bdev; + bio->bi_private = &bb; + bio->bi_copy = bc; + + bc->bic_bdev = src_bdev; + bc->bic_sector = src_sector; + + atomic_inc(&bb.done); + submit_bio(bio); + + src_sector += chunk; + dst_sector += chunk; + nr_sects -= chunk; + } + + /* Wait for bios in-flight */ + if (!atomic_dec_and_test(&bb.done)) + wait_for_completion_io(&wait); + + if (likely(!ret)) + ret = bb.error; + + return ret; +} +EXPORT_SYMBOL(blkdev_issue_copy); + Index: linux-4.11-rc2/block/blk-merge.c =================================================================== --- linux-4.11-rc2.orig/block/blk-merge.c +++ linux-4.11-rc2/block/blk-merge.c @@ -206,6 +206,8 @@ void blk_queue_split(struct request_queu case REQ_OP_WRITE_SAME: split = blk_bio_write_same_split(q, *bio, bs, &nsegs); break; + case REQ_OP_COPY: + return; default: split = blk_bio_segment_split(q, *bio, q->bio_split, &nsegs); break; @@ -245,6 +247,7 @@ static unsigned int __blk_recalc_rq_segm case REQ_OP_DISCARD: case REQ_OP_SECURE_ERASE: case REQ_OP_WRITE_ZEROES: + case REQ_OP_COPY: return 0; case REQ_OP_WRITE_SAME: return 1; Index: linux-4.11-rc2/block/blk-settings.c =================================================================== --- linux-4.11-rc2.orig/block/blk-settings.c +++ linux-4.11-rc2/block/blk-settings.c @@ -98,6 +98,7 @@ void blk_set_default_limits(struct queue lim->chunk_sectors = 0; lim->max_write_same_sectors = 0; lim->max_write_zeroes_sectors = 0; + lim->max_copy_sectors = 0; lim->max_discard_sectors = 0; lim->max_hw_discard_sectors = 0; lim->discard_granularity = 0; @@ -318,6 +319,18 @@ void blk_queue_max_write_zeroes_sectors( EXPORT_SYMBOL(blk_queue_max_write_zeroes_sectors); /** + * blk_queue_max_copy_sectors - set max sectors for a single copy operation + * @q: the request queue for the device + * @max_copy_sectors: maximum number of sectors per copy operation + **/ +void blk_queue_max_copy_sectors(struct request_queue *q, + unsigned int max_copy_sectors) +{ + q->limits.max_copy_sectors = max_copy_sectors; +} +EXPORT_SYMBOL(blk_queue_max_copy_sectors); + +/** * blk_queue_max_segments - set max hw segments for a request for this queue * @q: the request queue for the device * @max_segments: max number of segments Index: linux-4.11-rc2/block/blk-sysfs.c =================================================================== --- linux-4.11-rc2.orig/block/blk-sysfs.c +++ linux-4.11-rc2/block/blk-sysfs.c @@ -223,6 +223,12 @@ static ssize_t queue_write_zeroes_max_sh (unsigned long long)q->limits.max_write_zeroes_sectors << 9); } +static ssize_t queue_copy_max_show(struct request_queue *q, char *page) +{ + return sprintf(page, "%llu\n", + (unsigned long long)q->limits.max_copy_sectors << 9); +} + static ssize_t queue_max_sectors_store(struct request_queue *q, const char *page, size_t count) { @@ -633,6 +639,11 @@ static struct queue_sysfs_entry queue_wr .show = queue_write_zeroes_max_show, }; +static struct queue_sysfs_entry queue_copy_max_entry = { + .attr = {.name = "copy_max_bytes", .mode = S_IRUGO }, + .show = queue_copy_max_show, +}; + static struct queue_sysfs_entry queue_nonrot_entry = { .attr = {.name = "rotational", .mode = S_IRUGO | S_IWUSR }, .show = queue_show_nonrot, @@ -724,6 +735,7 @@ static struct attribute *default_attrs[] &queue_discard_zeroes_data_entry.attr, &queue_write_same_max_entry.attr, &queue_write_zeroes_max_entry.attr, + &queue_copy_max_entry.attr, &queue_nonrot_entry.attr, &queue_zoned_entry.attr, &queue_nomerges_entry.attr, Index: linux-4.11-rc2/block/ioctl.c =================================================================== --- linux-4.11-rc2.orig/block/ioctl.c +++ linux-4.11-rc2/block/ioctl.c @@ -258,6 +258,65 @@ static int blk_ioctl_zeroout(struct bloc false); } +static int blk_ioctl_copy(struct block_device *bdev, fmode_t mode, + unsigned long arg) +{ + uint64_t range[4]; + sector_t src_offset, dst_offset, len; + int ret; + + range[3] = 0; + + if (unlikely(copy_to_user((void __user *)(arg + 24), &range[3], 8))) + return -EFAULT; + + if (unlikely(!(mode & FMODE_WRITE))) + return -EBADF; + + if (unlikely(copy_from_user(range, (void __user *)arg, 24))) + return -EFAULT; + + if (unlikely((range[0] & 511) != 0)) + return -EINVAL; + if (unlikely((range[1] & 511) != 0)) + return -EINVAL; + if (unlikely((range[2] & 511) != 0)) + return -EINVAL; + range[0] >>= 9; + range[1] >>= 9; + range[2] >>= 9; + + src_offset = range[0]; + dst_offset = range[1]; + len = range[2]; + + if (sizeof(sector_t) < sizeof(uint64_t)) { + if (unlikely(src_offset != range[0]) || + unlikely(dst_offset != range[1]) || + unlikely(len != range[2])) + return -EINVAL; + } + + if (unlikely(src_offset + len < src_offset) || + unlikely(src_offset + len > (i_size_read(bdev->bd_inode) >> 9))) + return -EINVAL; + + if (unlikely(dst_offset + len < dst_offset) || + unlikely(dst_offset + len > (i_size_read(bdev->bd_inode) >> 9))) + return -EINVAL; + + ret = blkdev_issue_copy(bdev, src_offset, bdev, dst_offset, len, + GFP_KERNEL); + + if (!ret) { + range[3] = range[2] << 9; + if (unlikely(copy_to_user((void __user *)(arg + 24), &range[3], 8))) + return -EFAULT; + } + + return ret; +} + static int put_ushort(unsigned long arg, unsigned short val) { return put_user(val, (unsigned short __user *)arg); @@ -521,6 +580,8 @@ int blkdev_ioctl(struct block_device *bd BLKDEV_DISCARD_SECURE); case BLKZEROOUT: return blk_ioctl_zeroout(bdev, mode, arg); + case BLKCOPY: + return blk_ioctl_copy(bdev, mode, arg); case BLKREPORTZONE: return blkdev_report_zones_ioctl(bdev, mode, cmd, arg); case BLKRESETZONE: Index: linux-4.11-rc2/drivers/scsi/scsi.c =================================================================== --- linux-4.11-rc2.orig/drivers/scsi/scsi.c +++ linux-4.11-rc2/drivers/scsi/scsi.c @@ -419,6 +419,62 @@ int scsi_get_vpd_page(struct scsi_device EXPORT_SYMBOL_GPL(scsi_get_vpd_page); /** + * scsi_lookup_naa - Lookup NAA descriptor in VPD page 0x83 + * @sdev: The device to ask + * + * Copy offloading requires us to know the NAA descriptor for both + * source and target device. This descriptor is mandatory in the Device + * Identification VPD page. Locate this descriptor in the returned VPD + * data so we don't have to do lookups for every copy command. + */ +static void scsi_lookup_naa(struct scsi_device *sdev) +{ + unsigned char *buf = sdev->vpd_pg83; + unsigned int len = sdev->vpd_pg83_len; + + if (buf[1] != 0x83 || get_unaligned_be16(&buf[2]) == 0) { + sdev_printk(KERN_ERR, sdev, + "%s: VPD page 0x83 contains no descriptors\n", + __func__); + return; + } + + buf += 4; + len -= 4; + + do { + unsigned int desig_len = buf[3] + 4; + + /* Binary code set */ + if ((buf[0] & 0xf) != 1) + goto skip; + + /* Target association */ + if ((buf[1] >> 4) & 0x3) + goto skip; + + /* NAA designator */ + if ((buf[1] & 0xf) != 0x3) + goto skip; + + sdev->naa = buf; + sdev->naa_len = desig_len; + + return; + + skip: + buf += desig_len; + len -= desig_len; + + } while (len > 0); + + sdev_printk(KERN_ERR, sdev, + "%s: VPD page 0x83 NAA descriptor not found\n", __func__); + + return; +} + +/** * scsi_attach_vpd - Attach Vital Product Data to a SCSI device structure * @sdev: The device to ask * @@ -513,6 +569,7 @@ retry_pg83: orig_vpd_buf = sdev->vpd_pg83; sdev->vpd_pg83_len = result; rcu_assign_pointer(sdev->vpd_pg83, vpd_buf); + scsi_lookup_naa(sdev); mutex_unlock(&sdev->inquiry_mutex); synchronize_rcu(); if (orig_vpd_buf) Index: linux-4.11-rc2/drivers/scsi/sd.c =================================================================== --- linux-4.11-rc2.orig/drivers/scsi/sd.c +++ linux-4.11-rc2/drivers/scsi/sd.c @@ -103,6 +103,7 @@ MODULE_ALIAS_SCSI_DEVICE(TYPE_ZBC); static void sd_config_discard(struct scsi_disk *, unsigned int); static void sd_config_write_same(struct scsi_disk *); +static void sd_config_copy(struct scsi_disk *); static int sd_revalidate_disk(struct gendisk *); static void sd_unlock_native_capacity(struct gendisk *disk); static int sd_probe(struct device *); @@ -486,6 +487,48 @@ max_write_same_blocks_store(struct devic } static DEVICE_ATTR_RW(max_write_same_blocks); +static ssize_t +max_copy_blocks_show(struct device *dev, struct device_attribute *attr, + char *buf) +{ + struct scsi_disk *sdkp = to_scsi_disk(dev); + + return snprintf(buf, 20, "%u\n", sdkp->max_copy_blocks); +} + +static ssize_t +max_copy_blocks_store(struct device *dev, struct device_attribute *attr, + const char *buf, size_t count) +{ + struct scsi_disk *sdkp = to_scsi_disk(dev); + struct scsi_device *sdp = sdkp->device; + unsigned long max; + int err; + + if (!capable(CAP_SYS_ADMIN)) + return -EACCES; + + if (sdp->type != TYPE_DISK) + return -EINVAL; + + err = kstrtoul(buf, 10, &max); + + if (err) + return err; + + if (max == 0) + sdp->no_copy = 1; + else if (max <= SD_MAX_COPY_BLOCKS) { + sdp->no_copy = 0; + sdkp->max_copy_blocks = max; + } + + sd_config_copy(sdkp); + + return count; +} +static DEVICE_ATTR_RW(max_copy_blocks); + static struct attribute *sd_disk_attrs[] = { &dev_attr_cache_type.attr, &dev_attr_FUA.attr, @@ -497,6 +540,7 @@ static struct attribute *sd_disk_attrs[] &dev_attr_thin_provisioning.attr, &dev_attr_provisioning_mode.attr, &dev_attr_max_write_same_blocks.attr, + &dev_attr_max_copy_blocks.attr, &dev_attr_max_medium_access_timeouts.attr, NULL, }; @@ -886,6 +930,118 @@ static int sd_setup_write_same_cmnd(stru return ret; } +static void sd_config_copy(struct scsi_disk *sdkp) +{ + struct request_queue *q = sdkp->disk->queue; + unsigned int logical_block_size = sdkp->device->sector_size; + + if (sdkp->device->no_copy) + sdkp->max_copy_blocks = 0; + + /* Segment descriptor 0x02 has a 64k block limit */ + sdkp->max_copy_blocks = min(sdkp->max_copy_blocks, + (u32)SD_MAX_CSD2_BLOCKS); + + blk_queue_max_copy_sectors(q, sdkp->max_copy_blocks * + (logical_block_size >> 9)); +} + +static int sd_setup_copy_cmnd(struct scsi_cmnd *cmd) +{ + struct request *rq = cmd->request; + struct scsi_device *src_sdp, *dst_sdp; + struct gendisk *src_disk; + struct request_queue *src_queue, *dst_queue; + sector_t src_lba, dst_lba; + unsigned int nr_blocks, len; + int ret; + struct bio *bio = rq->bio; + struct page *page; + unsigned char *buf; + + if (!bio->bi_copy) + return BLKPREP_KILL; + + dst_sdp = scsi_disk(rq->rq_disk)->device; + dst_queue = rq->rq_disk->queue; + src_disk = bio->bi_copy->bic_bdev->bd_disk; + src_queue = src_disk->queue; + if (!src_queue || + src_queue->make_request_fn != dst_queue->make_request_fn || + src_queue->request_fn != dst_queue->request_fn || + *(struct scsi_driver **)rq->rq_disk->private_data != + *(struct scsi_driver **)src_disk->private_data) + return BLKPREP_KILL; + src_sdp = scsi_disk(src_disk)->device; + + if (src_sdp->no_copy || dst_sdp->no_copy) + return BLKPREP_KILL; + + if (src_sdp->sector_size != dst_sdp->sector_size) + return BLKPREP_KILL; + + dst_lba = blk_rq_pos(rq) >> (ilog2(dst_sdp->sector_size) - 9); + src_lba = bio->bi_copy->bic_sector >> (ilog2(src_sdp->sector_size) - 9); + nr_blocks = blk_rq_sectors(rq) >> (ilog2(dst_sdp->sector_size) - 9); + + page = alloc_page(GFP_ATOMIC | __GFP_ZERO); + if (!page) + return BLKPREP_DEFER; + + buf = page_address(page); + + /* Extended Copy (LID1) Parameter List (16 bytes) */ + buf[0] = 0; /* LID */ + buf[1] = 3 << 3; /* LID usage 11b */ + put_unaligned_be16(32 + 32, &buf[2]); /* 32 bytes per E4 desc. */ + put_unaligned_be32(28, &buf[8]); /* 28 bytes per B2B desc. */ + buf += 16; + + /* Source CSCD (32 bytes) */ + buf[0] = 0xe4; /* Identification desc. */ + memcpy(&buf[4], src_sdp->naa, src_sdp->naa_len); + buf += 32; + + /* Destination CSCD (32 bytes) */ + buf[0] = 0xe4; /* Identification desc. */ + memcpy(&buf[4], dst_sdp->naa, dst_sdp->naa_len); + buf += 32; + + /* Segment descriptor (28 bytes) */ + buf[0] = 0x02; /* Block to block desc. */ + put_unaligned_be16(0x18, &buf[2]); /* Descriptor length */ + put_unaligned_be16(0, &buf[4]); /* Source is desc. 0 */ + put_unaligned_be16(1, &buf[6]); /* Dest. is desc. 1 */ + put_unaligned_be16(nr_blocks, &buf[10]); + put_unaligned_be64(src_lba, &buf[12]); + put_unaligned_be64(dst_lba, &buf[20]); + + /* CDB */ + cmd->cmd_len = 16; + memset(cmd->cmnd, 0, cmd->cmd_len); + cmd->cmnd[0] = EXTENDED_COPY; + cmd->cmnd[1] = 0; /* LID1 */ + len = 16 + 32 + 32 + 28; + put_unaligned_be32(len, &cmd->cmnd[10]); + + rq->timeout = SD_COPY_TIMEOUT; + cmd->transfersize = len; + cmd->allowed = 0; /* don't retry */ + + rq->special_vec.bv_page = page; + rq->special_vec.bv_offset = 0; + rq->special_vec.bv_len = len; + + rq->rq_flags |= RQF_SPECIAL_PAYLOAD; + scsi_req(rq)->resid_len = len; + + ret = scsi_init_io(cmd); + + if (ret != BLKPREP_OK) + __free_page(page); + return ret; +} + static int sd_setup_flush_cmnd(struct scsi_cmnd *cmd) { struct request *rq = cmd->request; @@ -1163,6 +1319,8 @@ static int sd_init_command(struct scsi_c case REQ_OP_READ: case REQ_OP_WRITE: return sd_setup_read_write_cmnd(cmd); + case REQ_OP_COPY: + return sd_setup_copy_cmnd(cmd); case REQ_OP_ZONE_REPORT: return sd_zbc_setup_report_cmnd(cmd); case REQ_OP_ZONE_RESET: @@ -1794,6 +1952,7 @@ static int sd_done(struct scsi_cmnd *SCp switch (req_op(req)) { case REQ_OP_DISCARD: case REQ_OP_WRITE_SAME: + case REQ_OP_COPY: case REQ_OP_ZONE_RESET: if (!result) { good_bytes = blk_rq_bytes(req); @@ -1852,6 +2011,16 @@ static int sd_done(struct scsi_cmnd *SCp /* INVALID COMMAND OPCODE or INVALID FIELD IN CDB */ if (sshdr.asc == 0x20 || sshdr.asc == 0x24) { switch (op) { + case EXTENDED_COPY: + if ((SCpnt->cmnd[1] & 0x1f) == 0) { + sdkp->device->no_copy = 1; + sd_config_copy(sdkp); + + good_bytes = 0; + req->__data_len = blk_rq_bytes(req); + req->rq_flags |= RQF_QUIET; + } + break; case UNMAP: sd_config_discard(sdkp, SD_LBP_DISABLE); break; @@ -2857,6 +3026,105 @@ static void sd_read_write_same(struct sc sdkp->ws10 = 1; } +static void sd_read_copy_operations(struct scsi_disk *sdkp, + unsigned char *buffer) +{ + struct scsi_device *sdev = sdkp->device; + struct scsi_sense_hdr sshdr; + unsigned char cdb[16]; + unsigned int result, len, i; + bool b2b_desc = false, id_desc = false; + + if (sdev->naa_len == 0) + return; + + /* Verify that the device has 3PC set in INQUIRY response */ + if (sdev->inquiry_len < 6 || (sdev->inquiry[5] & (1 << 3)) == 0) + return; + + /* Receive Copy Operation Parameters */ + memset(cdb, 0, 16); + cdb[0] = RECEIVE_COPY_RESULTS; + cdb[1] = 0x3; + put_unaligned_be32(SD_BUF_SIZE, &cdb[10]); + + memset(buffer, 0, SD_BUF_SIZE); + result = scsi_execute_req(sdev, cdb, DMA_FROM_DEVICE, + buffer, SD_BUF_SIZE, &sshdr, + SD_TIMEOUT, SD_MAX_RETRIES, NULL); + + if (!scsi_status_is_good(result)) { + sd_printk(KERN_ERR, sdkp, + "%s: Receive Copy Operating Parameters failed\n", + __func__); + return; + } + + /* The RCOP response is a minimum of 44 bytes long. First 4 + * bytes contain the length of the remaining buffer, i.e. 40+ + * bytes. Trailing the defined fields is a list of supported + * descriptors. We need at least 2 descriptors to drive the + * target, hence 42. + */ + len = get_unaligned_be32(&buffer[0]); + if (len < 42) { + sd_printk(KERN_ERR, sdkp, "%s: result too short (%u)\n", + __func__, len); + return; + } + + if ((buffer[4] & 1) == 0) { + sd_printk(KERN_ERR, sdkp, "%s: does not support SNLID\n", + __func__); + return; + } + + if (get_unaligned_be16(&buffer[8]) < 2) { + sd_printk(KERN_ERR, sdkp, + "%s: Need 2 or more CSCD descriptors\n", __func__); + return; + } + + if (get_unaligned_be16(&buffer[10]) < 1) { + sd_printk(KERN_ERR, sdkp, + "%s: Need 1 or more segment descriptor\n", __func__); + return; + } + + if (len - 40 != buffer[43]) { + sd_printk(KERN_ERR, sdkp, + "%s: Buffer len and descriptor count mismatch " \ + "(%u vs. %u)\n", __func__, len - 40, buffer[43]); + return; + } + + for (i = 44 ; i < len + 4 ; i++) { + if (buffer[i] == 0x02) + b2b_desc = true; + + if (buffer[i] == 0xe4) + id_desc = true; + } + + if (!b2b_desc) { + sd_printk(KERN_ERR, sdkp, + "%s: No block 2 block descriptor (0x02)\n", + __func__); + return; + } + + if (!id_desc) { + sd_printk(KERN_ERR, sdkp, + "%s: No identification descriptor (0xE4)\n", + __func__); + return; + } + + sdkp->max_copy_blocks = get_unaligned_be32(&buffer[16]) + >> ilog2(sdev->sector_size); + sd_config_copy(sdkp); +} + /** * sd_revalidate_disk - called the first time a new disk is seen, * performs disk spin up, read_capacity, etc. @@ -2910,6 +3178,7 @@ static int sd_revalidate_disk(struct gen sd_read_cache_type(sdkp, buffer); sd_read_app_tag_own(sdkp, buffer); sd_read_write_same(sdkp, buffer); + sd_read_copy_operations(sdkp, buffer); } sdkp->first_scan = 0; Index: linux-4.11-rc2/drivers/scsi/sd.h =================================================================== --- linux-4.11-rc2.orig/drivers/scsi/sd.h +++ linux-4.11-rc2/drivers/scsi/sd.h @@ -19,6 +19,7 @@ */ #define SD_FLUSH_TIMEOUT_MULTIPLIER 2 #define SD_WRITE_SAME_TIMEOUT (120 * HZ) +#define SD_COPY_TIMEOUT (120 * HZ) /* * Number of allowed retries @@ -48,6 +49,8 @@ enum { SD_MAX_XFER_BLOCKS = 0xffffffff, SD_MAX_WS10_BLOCKS = 0xffff, SD_MAX_WS16_BLOCKS = 0x7fffff, + SD_MAX_CSD2_BLOCKS = 0xffff, + SD_MAX_COPY_BLOCKS = 0xffffffff, }; enum { @@ -79,6 +82,7 @@ struct scsi_disk { u32 opt_xfer_blocks; u32 max_ws_blocks; u32 max_unmap_blocks; + u32 max_copy_blocks; u32 unmap_granularity; u32 unmap_alignment; u32 index; Index: linux-4.11-rc2/include/linux/bio.h =================================================================== --- linux-4.11-rc2.orig/include/linux/bio.h +++ linux-4.11-rc2/include/linux/bio.h @@ -77,7 +77,8 @@ static inline bool bio_has_data(struct b bio->bi_iter.bi_size && bio_op(bio) != REQ_OP_DISCARD && bio_op(bio) != REQ_OP_SECURE_ERASE && - bio_op(bio) != REQ_OP_WRITE_ZEROES) + bio_op(bio) != REQ_OP_WRITE_ZEROES && + bio_op(bio) != REQ_OP_COPY) return true; return false; @@ -88,11 +89,14 @@ static inline bool bio_no_advance_iter(s return bio_op(bio) == REQ_OP_DISCARD || bio_op(bio) == REQ_OP_SECURE_ERASE || bio_op(bio) == REQ_OP_WRITE_SAME || - bio_op(bio) == REQ_OP_WRITE_ZEROES; + bio_op(bio) == REQ_OP_WRITE_ZEROES || + bio_op(bio) == REQ_OP_COPY; } static inline bool bio_mergeable(struct bio *bio) { + if (bio_op(bio) == REQ_OP_COPY) + return false; if (bio->bi_opf & REQ_NOMERGE_FLAGS) return false; @@ -200,6 +204,7 @@ static inline unsigned __bio_segments(st case REQ_OP_WRITE_ZEROES: return 0; case REQ_OP_WRITE_SAME: + case REQ_OP_COPY: return 1; default: break; Index: linux-4.11-rc2/include/linux/blk_types.h =================================================================== --- linux-4.11-rc2.orig/include/linux/blk_types.h +++ linux-4.11-rc2/include/linux/blk_types.h @@ -64,6 +64,7 @@ struct bio { struct bio_integrity_payload *bi_integrity; /* data integrity */ #endif }; + struct bio_copy *bi_copy; /* TODO, use bi_integrity */ unsigned short bi_vcnt; /* how many bio_vec's */ @@ -89,6 +90,11 @@ struct bio { #define BIO_RESET_BYTES offsetof(struct bio, bi_max_vecs) +struct bio_copy { + struct block_device *bic_bdev; + sector_t bic_sector; +}; + /* * bio flags */ @@ -161,6 +167,8 @@ enum req_opf { REQ_OP_WRITE_SAME = 7, /* write the zero filled sector many times */ REQ_OP_WRITE_ZEROES = 8, + /* copy offload */ + REQ_OP_COPY = 9, /* SCSI passthrough using struct scsi_request */ REQ_OP_SCSI_IN = 32, Index: linux-4.11-rc2/include/linux/blkdev.h =================================================================== --- linux-4.11-rc2.orig/include/linux/blkdev.h +++ linux-4.11-rc2/include/linux/blkdev.h @@ -326,6 +326,7 @@ struct queue_limits { unsigned int max_hw_discard_sectors; unsigned int max_write_same_sectors; unsigned int max_write_zeroes_sectors; + unsigned int max_copy_sectors; unsigned int discard_granularity; unsigned int discard_alignment; @@ -1156,6 +1157,8 @@ extern void blk_queue_max_write_same_sec unsigned int max_write_same_sectors); extern void blk_queue_max_write_zeroes_sectors(struct request_queue *q, unsigned int max_write_same_sectors); +extern void blk_queue_max_copy_sectors(struct request_queue *q, + unsigned int max_copy_sectors); extern void blk_queue_logical_block_size(struct request_queue *, unsigned short); extern void blk_queue_physical_block_size(struct request_queue *, unsigned int); extern void blk_queue_alignment_offset(struct request_queue *q, @@ -1347,6 +1350,9 @@ extern int __blkdev_issue_zeroout(struct bool discard); extern int blkdev_issue_zeroout(struct block_device *bdev, sector_t sector, sector_t nr_sects, gfp_t gfp_mask, bool discard); +extern int blkdev_issue_copy(struct block_device *src_bdev, sector_t src_sector, + struct block_device *dst_bdev, sector_t dst_sector, + sector_t nr_sects, gfp_t gfp_gfp_mask); static inline int sb_issue_discard(struct super_block *sb, sector_t block, sector_t nr_blocks, gfp_t gfp_mask, unsigned long flags) { @@ -1592,6 +1598,16 @@ static inline unsigned int bdev_zone_sec return 0; } + +static inline unsigned int bdev_copy_offload(struct block_device *bdev) +{ + struct request_queue *q = bdev_get_queue(bdev); + + if (q) + return q->limits.max_copy_sectors; + + return 0; +} static inline int queue_dma_alignment(struct request_queue *q) { Index: linux-4.11-rc2/include/scsi/scsi_device.h =================================================================== --- linux-4.11-rc2.orig/include/scsi/scsi_device.h +++ linux-4.11-rc2/include/scsi/scsi_device.h @@ -126,6 +126,8 @@ struct scsi_device { unsigned char __rcu *vpd_pg83; int vpd_pg80_len; unsigned char __rcu *vpd_pg80; + unsigned char naa_len; + unsigned char *naa; unsigned char current_tag; /* current tag */ struct scsi_target *sdev_target; /* used only for single_lun */ @@ -156,6 +158,7 @@ struct scsi_device { unsigned use_10_for_ms:1; /* first try 10-byte mode sense/select */ unsigned no_report_opcodes:1; /* no REPORT SUPPORTED OPERATION CODES */ unsigned no_write_same:1; /* no WRITE SAME command */ + unsigned no_copy:1; /* no copy offload */ unsigned use_16_for_rw:1; /* Use read/write(16) over read/write(10) */ unsigned skip_ms_page_8:1; /* do not use MODE SENSE page 0x08 */ unsigned skip_ms_page_3f:1; /* do not use MODE SENSE page 0x3f */ Index: linux-4.11-rc2/block/compat_ioctl.c =================================================================== --- linux-4.11-rc2.orig/block/compat_ioctl.c +++ linux-4.11-rc2/block/compat_ioctl.c @@ -696,6 +696,7 @@ long compat_blkdev_ioctl(struct file *fi * but we call blkdev_ioctl, which gets the lock for us */ case BLKRRPART: + case BLKCOPY: return blkdev_ioctl(bdev, mode, cmd, (unsigned long)compat_ptr(arg)); case BLKBSZSET_32: Index: linux-4.11-rc2/block/bio.c =================================================================== --- linux-4.11-rc2.orig/block/bio.c +++ linux-4.11-rc2/block/bio.c @@ -243,6 +243,8 @@ static void __bio_free(struct bio *bio) { bio_disassociate_task(bio); + kfree(bio->bi_copy); + if (bio_integrity(bio)) bio_integrity_free(bio); } Index: linux-4.11-rc2/include/uapi/linux/fs.h =================================================================== --- linux-4.11-rc2.orig/include/uapi/linux/fs.h +++ linux-4.11-rc2/include/uapi/linux/fs.h @@ -226,6 +226,7 @@ struct fsxattr { #define BLKSECDISCARD _IO(0x12,125) #define BLKROTATIONAL _IO(0x12,126) #define BLKZEROOUT _IO(0x12,127) +#define BLKCOPY _IO(0x12,128) /* * A jump here: 130-131 are reserved for zoned block devices * (see uapi/linux/blkzoned.h)