diff --git a/Documentation/devicetree/bindings/watchdog/davinci-wdt.txt b/Documentation/devicetree/bindings/watchdog/davinci-wdt.txt new file mode 100644 index 0000000..75558cc --- /dev/null +++ b/Documentation/devicetree/bindings/watchdog/davinci-wdt.txt @@ -0,0 +1,12 @@ +DaVinci Watchdog Timer (WDT) Controller + +Required properties: +- compatible : Should be "ti,davinci-wdt" +- reg : Should contain WDT registers location and length + +Examples: + +wdt: wdt@2320000 { + compatible = "ti,davinci-wdt"; + reg = <0x02320000 0x80>; +}; diff --git a/Makefile b/Makefile index 4fe0559..275b956 100644 --- a/Makefile +++ b/Makefile @@ -1,7 +1,7 @@ VERSION = 3 -PATCHLEVEL = 7 +PATCHLEVEL = 8 SUBLEVEL = 0 -EXTRAVERSION = +EXTRAVERSION = -rc1 NAME = Terrified Chipmunk # *DOCUMENTATION* diff --git a/arch/arm/boot/dts/Makefile b/arch/arm/boot/dts/Makefile index d077ef8..e44da40 100644 --- a/arch/arm/boot/dts/Makefile +++ b/arch/arm/boot/dts/Makefile @@ -42,11 +42,10 @@ dtb-$(CONFIG_ARCH_DOVE) += dove-cm-a510.dtb \ dtb-$(CONFIG_ARCH_EXYNOS) += exynos4210-origen.dtb \ exynos4210-smdkv310.dtb \ exynos4210-trats.dtb \ - exynos5250-smdk5250.dtb \ - exynos5440-ssdk5440.dtb \ exynos4412-smdk4412.dtb \ exynos5250-smdk5250.dtb \ - exynos5250-snow.dtb + exynos5250-snow.dtb \ + exynos5440-ssdk5440.dtb dtb-$(CONFIG_ARCH_HIGHBANK) += highbank.dtb \ ecx-2000.dtb dtb-$(CONFIG_ARCH_INTEGRATOR) += integratorap.dtb \ diff --git a/arch/c6x/include/asm/Kbuild b/arch/c6x/include/asm/Kbuild index eae7b59..4258b08 100644 --- a/arch/c6x/include/asm/Kbuild +++ b/arch/c6x/include/asm/Kbuild @@ -25,6 +25,7 @@ generic-y += kdebug.h generic-y += kmap_types.h generic-y += local.h generic-y += mman.h +generic-y += mmu.h generic-y += mmu_context.h generic-y += msgbuf.h generic-y += param.h diff --git a/arch/c6x/include/asm/mmu.h b/arch/c6x/include/asm/mmu.h deleted file mode 100644 index 4467e77..0000000 --- a/arch/c6x/include/asm/mmu.h +++ /dev/null @@ -1,22 +0,0 @@ -/* - * Port on Texas Instruments TMS320C6x architecture - * - * Copyright (C) 2004, 2009, 2010 Texas Instruments Incorporated - * Author: Aurelien Jacquiot (aurelien.jacquiot@jaluna.com) - * - * This program is free software; you can redistribute it and/or modify - * it under the terms of the GNU General Public License version 2 as - * published by the Free Software Foundation. - */ -#ifndef _ASM_C6X_MMU_H -#define _ASM_C6X_MMU_H - -typedef struct { - unsigned long end_brk; -#ifdef CONFIG_BINFMT_ELF_FDPIC - unsigned long exec_fdpic_loadmap; - unsigned long interp_fdpic_loadmap; -#endif -} mm_context_t; - -#endif /* _ASM_C6X_MMU_H */ diff --git a/arch/h8300/include/asm/Kbuild b/arch/h8300/include/asm/Kbuild index bebdc36..995eb47 100644 --- a/arch/h8300/include/asm/Kbuild +++ b/arch/h8300/include/asm/Kbuild @@ -1,5 +1,6 @@ generic-y += clkdev.h generic-y += exec.h +generic-y += mmu.h generic-y += module.h generic-y += trace_clock.h diff --git a/arch/h8300/include/asm/mmu.h b/arch/h8300/include/asm/mmu.h deleted file mode 100644 index 3130996..0000000 --- a/arch/h8300/include/asm/mmu.h +++ /dev/null @@ -1,10 +0,0 @@ -#ifndef __MMU_H -#define __MMU_H - -/* Copyright (C) 2002, David McCullough */ - -typedef struct { - unsigned long end_brk; -} mm_context_t; - -#endif diff --git a/arch/openrisc/include/asm/io.h b/arch/openrisc/include/asm/io.h index 07f5299..7c69139 100644 --- a/arch/openrisc/include/asm/io.h +++ b/arch/openrisc/include/asm/io.h @@ -30,6 +30,7 @@ #define PIO_MASK 0 #include +#include extern void __iomem *__ioremap(phys_addr_t offset, unsigned long size, pgprot_t prot); diff --git a/arch/xtensa/include/asm/mmu.h b/arch/xtensa/include/asm/mmu.h index 04890d6..8554b2c 100644 --- a/arch/xtensa/include/asm/mmu.h +++ b/arch/xtensa/include/asm/mmu.h @@ -12,7 +12,7 @@ #define _XTENSA_MMU_H #ifndef CONFIG_MMU -#include +#include #else /* Default "unsigned long" context */ diff --git a/arch/xtensa/include/asm/nommu.h b/arch/xtensa/include/asm/nommu.h deleted file mode 100644 index dce2c43..0000000 --- a/arch/xtensa/include/asm/nommu.h +++ /dev/null @@ -1,3 +0,0 @@ -typedef struct { - unsigned long end_brk; -} mm_context_t; diff --git a/drivers/infiniband/hw/cxgb4/cm.c b/drivers/infiniband/hw/cxgb4/cm.c index 5de8696..c13745c 100644 --- a/drivers/infiniband/hw/cxgb4/cm.c +++ b/drivers/infiniband/hw/cxgb4/cm.c @@ -38,10 +38,12 @@ #include #include #include +#include #include #include #include +#include #include "iw_cxgb4.h" @@ -61,6 +63,14 @@ static char *states[] = { NULL, }; +static int nocong; +module_param(nocong, int, 0644); +MODULE_PARM_DESC(nocong, "Turn of congestion control (default=0)"); + +static int enable_ecn; +module_param(enable_ecn, int, 0644); +MODULE_PARM_DESC(enable_ecn, "Enable ECN (default=0/disabled)"); + static int dack_mode = 1; module_param(dack_mode, int, 0644); MODULE_PARM_DESC(dack_mode, "Delayed ack mode (default=1)"); @@ -265,6 +275,7 @@ void _c4iw_free_ep(struct kref *kref) cxgb4_remove_tid(ep->com.dev->rdev.lldi.tids, 0, ep->hwtid); dst_release(ep->dst); cxgb4_l2t_release(ep->l2t); + remove_handle(ep->com.dev, &ep->com.dev->hwtid_idr, ep->hwtid); } kfree(ep); } @@ -441,6 +452,50 @@ static int send_abort(struct c4iw_ep *ep, struct sk_buff *skb, gfp_t gfp) return c4iw_l2t_send(&ep->com.dev->rdev, skb, ep->l2t); } +#define VLAN_NONE 0xfff +#define FILTER_SEL_VLAN_NONE 0xffff +#define FILTER_SEL_WIDTH_P_FC (3+1) /* port uses 3 bits, FCoE one bit */ +#define FILTER_SEL_WIDTH_VIN_P_FC \ + (6 + 7 + FILTER_SEL_WIDTH_P_FC) /* 6 bits are unused, VF uses 7 bits*/ +#define FILTER_SEL_WIDTH_TAG_P_FC \ + (3 + FILTER_SEL_WIDTH_VIN_P_FC) /* PF uses 3 bits */ +#define FILTER_SEL_WIDTH_VLD_TAG_P_FC (1 + FILTER_SEL_WIDTH_TAG_P_FC) + +static unsigned int select_ntuple(struct c4iw_dev *dev, struct dst_entry *dst, + struct l2t_entry *l2t) +{ + unsigned int ntuple = 0; + u32 viid; + + switch (dev->rdev.lldi.filt_mode) { + + /* default filter mode */ + case HW_TPL_FR_MT_PR_IV_P_FC: + if (l2t->vlan == VLAN_NONE) + ntuple |= FILTER_SEL_VLAN_NONE << FILTER_SEL_WIDTH_P_FC; + else { + ntuple |= l2t->vlan << FILTER_SEL_WIDTH_P_FC; + ntuple |= 1 << FILTER_SEL_WIDTH_VLD_TAG_P_FC; + } + ntuple |= l2t->lport << S_PORT | IPPROTO_TCP << + FILTER_SEL_WIDTH_VLD_TAG_P_FC; + break; + case HW_TPL_FR_MT_PR_OV_P_FC: { + viid = cxgb4_port_viid(l2t->neigh->dev); + + ntuple |= FW_VIID_VIN_GET(viid) << FILTER_SEL_WIDTH_P_FC; + ntuple |= FW_VIID_PFN_GET(viid) << FILTER_SEL_WIDTH_VIN_P_FC; + ntuple |= FW_VIID_VIVLD_GET(viid) << FILTER_SEL_WIDTH_TAG_P_FC; + ntuple |= l2t->lport << S_PORT | IPPROTO_TCP << + FILTER_SEL_WIDTH_VLD_TAG_P_FC; + break; + } + default: + break; + } + return ntuple; +} + static int send_connect(struct c4iw_ep *ep) { struct cpl_act_open_req *req; @@ -463,7 +518,8 @@ static int send_connect(struct c4iw_ep *ep) cxgb4_best_mtu(ep->com.dev->rdev.lldi.mtus, ep->mtu, &mtu_idx); wscale = compute_wscale(rcv_win); - opt0 = KEEP_ALIVE(1) | + opt0 = (nocong ? NO_CONG(1) : 0) | + KEEP_ALIVE(1) | DELACK(1) | WND_SCALE(wscale) | MSS_IDX(mtu_idx) | @@ -474,6 +530,7 @@ static int send_connect(struct c4iw_ep *ep) ULP_MODE(ULP_MODE_TCPDDP) | RCV_BUFSIZ(rcv_win>>10); opt2 = RX_CHANNEL(0) | + CCTRL_ECN(enable_ecn) | RSS_QUEUE_VALID | RSS_QUEUE(ep->rss_qid); if (enable_tcp_timestamps) opt2 |= TSTAMPS_EN(1); @@ -492,8 +549,9 @@ static int send_connect(struct c4iw_ep *ep) req->local_ip = ep->com.local_addr.sin_addr.s_addr; req->peer_ip = ep->com.remote_addr.sin_addr.s_addr; req->opt0 = cpu_to_be64(opt0); - req->params = 0; + req->params = cpu_to_be32(select_ntuple(ep->com.dev, ep->dst, ep->l2t)); req->opt2 = cpu_to_be32(opt2); + set_bit(ACT_OPEN_REQ, &ep->com.history); return c4iw_l2t_send(&ep->com.dev->rdev, skb, ep->l2t); } @@ -770,6 +828,7 @@ static int act_establish(struct c4iw_dev *dev, struct sk_buff *skb) /* setup the hwtid for this connection */ ep->hwtid = tid; cxgb4_insert_tid(t, ep, tid); + insert_handle(dev, &dev->hwtid_idr, ep, ep->hwtid); ep->snd_seq = be32_to_cpu(req->snd_isn); ep->rcv_seq = be32_to_cpu(req->rcv_isn); @@ -777,7 +836,9 @@ static int act_establish(struct c4iw_dev *dev, struct sk_buff *skb) set_emss(ep, ntohs(req->tcp_opt)); /* dealloc the atid */ + remove_handle(ep->com.dev, &ep->com.dev->atid_idr, atid); cxgb4_free_atid(t, atid); + set_bit(ACT_ESTAB, &ep->com.history); /* start MPA negotiation */ send_flowc(ep, NULL); @@ -803,6 +864,7 @@ static void close_complete_upcall(struct c4iw_ep *ep) ep->com.cm_id->rem_ref(ep->com.cm_id); ep->com.cm_id = NULL; ep->com.qp = NULL; + set_bit(CLOSE_UPCALL, &ep->com.history); } } @@ -811,6 +873,7 @@ static int abort_connection(struct c4iw_ep *ep, struct sk_buff *skb, gfp_t gfp) PDBG("%s ep %p tid %u\n", __func__, ep, ep->hwtid); close_complete_upcall(ep); state_set(&ep->com, ABORTING); + set_bit(ABORT_CONN, &ep->com.history); return send_abort(ep, skb, gfp); } @@ -825,6 +888,7 @@ static void peer_close_upcall(struct c4iw_ep *ep) PDBG("peer close delivered ep %p cm_id %p tid %u\n", ep, ep->com.cm_id, ep->hwtid); ep->com.cm_id->event_handler(ep->com.cm_id, &event); + set_bit(DISCONN_UPCALL, &ep->com.history); } } @@ -843,6 +907,7 @@ static void peer_abort_upcall(struct c4iw_ep *ep) ep->com.cm_id->rem_ref(ep->com.cm_id); ep->com.cm_id = NULL; ep->com.qp = NULL; + set_bit(ABORT_UPCALL, &ep->com.history); } } @@ -875,6 +940,7 @@ static void connect_reply_upcall(struct c4iw_ep *ep, int status) PDBG("%s ep %p tid %u status %d\n", __func__, ep, ep->hwtid, status); + set_bit(CONN_RPL_UPCALL, &ep->com.history); ep->com.cm_id->event_handler(ep->com.cm_id, &event); if (status < 0) { @@ -915,6 +981,7 @@ static void connect_request_upcall(struct c4iw_ep *ep) ep->parent_ep->com.cm_id, &event); } + set_bit(CONNREQ_UPCALL, &ep->com.history); c4iw_put_ep(&ep->parent_ep->com); ep->parent_ep = NULL; } @@ -931,6 +998,7 @@ static void established_upcall(struct c4iw_ep *ep) if (ep->com.cm_id) { PDBG("%s ep %p tid %u\n", __func__, ep, ep->hwtid); ep->com.cm_id->event_handler(ep->com.cm_id, &event); + set_bit(ESTAB_UPCALL, &ep->com.history); } } @@ -1316,6 +1384,7 @@ static int rx_data(struct c4iw_dev *dev, struct sk_buff *skb) unsigned int dlen = ntohs(hdr->len); unsigned int tid = GET_TID(hdr); struct tid_info *t = dev->rdev.lldi.tids; + __u8 status = hdr->status; ep = lookup_tid(t, tid); PDBG("%s ep %p tid %u dlen %u\n", __func__, ep, ep->hwtid, dlen); @@ -1338,9 +1407,9 @@ static int rx_data(struct c4iw_dev *dev, struct sk_buff *skb) case MPA_REP_SENT: break; default: - printk(KERN_ERR MOD "%s Unexpected streaming data." - " ep %p state %d tid %u\n", - __func__, ep, state_read(&ep->com), ep->hwtid); + pr_err("%s Unexpected streaming data." \ + " ep %p state %d tid %u status %d\n", + __func__, ep, state_read(&ep->com), ep->hwtid, status); /* * The ep will timeout and inform the ULP of the failure. @@ -1383,6 +1452,63 @@ static int abort_rpl(struct c4iw_dev *dev, struct sk_buff *skb) return 0; } +static void send_fw_act_open_req(struct c4iw_ep *ep, unsigned int atid) +{ + struct sk_buff *skb; + struct fw_ofld_connection_wr *req; + unsigned int mtu_idx; + int wscale; + + skb = get_skb(NULL, sizeof(*req), GFP_KERNEL); + req = (struct fw_ofld_connection_wr *)__skb_put(skb, sizeof(*req)); + memset(req, 0, sizeof(*req)); + req->op_compl = htonl(V_WR_OP(FW_OFLD_CONNECTION_WR)); + req->len16_pkd = htonl(FW_WR_LEN16(DIV_ROUND_UP(sizeof(*req), 16))); + req->le.filter = cpu_to_be32(select_ntuple(ep->com.dev, ep->dst, + ep->l2t)); + req->le.lport = ep->com.local_addr.sin_port; + req->le.pport = ep->com.remote_addr.sin_port; + req->le.u.ipv4.lip = ep->com.local_addr.sin_addr.s_addr; + req->le.u.ipv4.pip = ep->com.remote_addr.sin_addr.s_addr; + req->tcb.t_state_to_astid = + htonl(V_FW_OFLD_CONNECTION_WR_T_STATE(TCP_SYN_SENT) | + V_FW_OFLD_CONNECTION_WR_ASTID(atid)); + req->tcb.cplrxdataack_cplpassacceptrpl = + htons(F_FW_OFLD_CONNECTION_WR_CPLRXDATAACK); + req->tcb.tx_max = jiffies; + req->tcb.rcv_adv = htons(1); + cxgb4_best_mtu(ep->com.dev->rdev.lldi.mtus, ep->mtu, &mtu_idx); + wscale = compute_wscale(rcv_win); + req->tcb.opt0 = TCAM_BYPASS(1) | + (nocong ? NO_CONG(1) : 0) | + KEEP_ALIVE(1) | + DELACK(1) | + WND_SCALE(wscale) | + MSS_IDX(mtu_idx) | + L2T_IDX(ep->l2t->idx) | + TX_CHAN(ep->tx_chan) | + SMAC_SEL(ep->smac_idx) | + DSCP(ep->tos) | + ULP_MODE(ULP_MODE_TCPDDP) | + RCV_BUFSIZ(rcv_win >> 10); + req->tcb.opt2 = PACE(1) | + TX_QUEUE(ep->com.dev->rdev.lldi.tx_modq[ep->tx_chan]) | + RX_CHANNEL(0) | + CCTRL_ECN(enable_ecn) | + RSS_QUEUE_VALID | RSS_QUEUE(ep->rss_qid); + if (enable_tcp_timestamps) + req->tcb.opt2 |= TSTAMPS_EN(1); + if (enable_tcp_sack) + req->tcb.opt2 |= SACK_EN(1); + if (wscale && enable_tcp_window_scaling) + req->tcb.opt2 |= WND_SCALE_EN(1); + req->tcb.opt0 = cpu_to_be64(req->tcb.opt0); + req->tcb.opt2 = cpu_to_be32(req->tcb.opt2); + set_wr_txq(skb, CPL_PRIORITY_CONTROL, ep->ctrlq_idx); + set_bit(ACT_OFLD_CONN, &ep->com.history); + c4iw_l2t_send(&ep->com.dev->rdev, skb, ep->l2t); +} + /* * Return whether a failed active open has allocated a TID */ @@ -1392,6 +1518,111 @@ static inline int act_open_has_tid(int status) status != CPL_ERR_ARP_MISS; } +#define ACT_OPEN_RETRY_COUNT 2 + +static int c4iw_reconnect(struct c4iw_ep *ep) +{ + int err = 0; + struct rtable *rt; + struct port_info *pi; + struct net_device *pdev; + int step; + struct neighbour *neigh; + + PDBG("%s qp %p cm_id %p\n", __func__, ep->com.qp, ep->com.cm_id); + init_timer(&ep->timer); + + /* + * Allocate an active TID to initiate a TCP connection. + */ + ep->atid = cxgb4_alloc_atid(ep->com.dev->rdev.lldi.tids, ep); + if (ep->atid == -1) { + pr_err("%s - cannot alloc atid.\n", __func__); + err = -ENOMEM; + goto fail2; + } + insert_handle(ep->com.dev, &ep->com.dev->atid_idr, ep, ep->atid); + + /* find a route */ + rt = find_route(ep->com.dev, + ep->com.cm_id->local_addr.sin_addr.s_addr, + ep->com.cm_id->remote_addr.sin_addr.s_addr, + ep->com.cm_id->local_addr.sin_port, + ep->com.cm_id->remote_addr.sin_port, 0); + if (!rt) { + pr_err("%s - cannot find route.\n", __func__); + err = -EHOSTUNREACH; + goto fail3; + } + ep->dst = &rt->dst; + + neigh = dst_neigh_lookup(ep->dst, + &ep->com.cm_id->remote_addr.sin_addr.s_addr); + /* get a l2t entry */ + if (neigh->dev->flags & IFF_LOOPBACK) { + PDBG("%s LOOPBACK\n", __func__); + pdev = ip_dev_find(&init_net, + ep->com.cm_id->remote_addr.sin_addr.s_addr); + ep->l2t = cxgb4_l2t_get(ep->com.dev->rdev.lldi.l2t, + neigh, pdev, 0); + pi = (struct port_info *)netdev_priv(pdev); + ep->mtu = pdev->mtu; + ep->tx_chan = cxgb4_port_chan(pdev); + ep->smac_idx = (cxgb4_port_viid(pdev) & 0x7F) << 1; + dev_put(pdev); + } else { + ep->l2t = cxgb4_l2t_get(ep->com.dev->rdev.lldi.l2t, + neigh, neigh->dev, 0); + pi = (struct port_info *)netdev_priv(neigh->dev); + ep->mtu = dst_mtu(ep->dst); + ep->tx_chan = cxgb4_port_chan(neigh->dev); + ep->smac_idx = (cxgb4_port_viid(neigh->dev) & + 0x7F) << 1; + } + + step = ep->com.dev->rdev.lldi.ntxq / ep->com.dev->rdev.lldi.nchan; + ep->txq_idx = pi->port_id * step; + ep->ctrlq_idx = pi->port_id; + step = ep->com.dev->rdev.lldi.nrxq / ep->com.dev->rdev.lldi.nchan; + ep->rss_qid = ep->com.dev->rdev.lldi.rxq_ids[pi->port_id * step]; + + if (!ep->l2t) { + pr_err("%s - cannot alloc l2e.\n", __func__); + err = -ENOMEM; + goto fail4; + } + + PDBG("%s txq_idx %u tx_chan %u smac_idx %u rss_qid %u l2t_idx %u\n", + __func__, ep->txq_idx, ep->tx_chan, ep->smac_idx, ep->rss_qid, + ep->l2t->idx); + + state_set(&ep->com, CONNECTING); + ep->tos = 0; + + /* send connect request to rnic */ + err = send_connect(ep); + if (!err) + goto out; + + cxgb4_l2t_release(ep->l2t); +fail4: + dst_release(ep->dst); +fail3: + remove_handle(ep->com.dev, &ep->com.dev->atid_idr, ep->atid); + cxgb4_free_atid(ep->com.dev->rdev.lldi.tids, ep->atid); +fail2: + /* + * remember to send notification to upper layer. + * We are in here so the upper layer is not aware that this is + * re-connect attempt and so, upper layer is still waiting for + * response of 1st connect request. + */ + connect_reply_upcall(ep, -ECONNRESET); + c4iw_put_ep(&ep->com); +out: + return err; +} + static int act_open_rpl(struct c4iw_dev *dev, struct sk_buff *skb) { struct c4iw_ep *ep; @@ -1412,6 +1643,8 @@ static int act_open_rpl(struct c4iw_dev *dev, struct sk_buff *skb) return 0; } + set_bit(ACT_OPEN_RPL, &ep->com.history); + /* * Log interesting failures. */ @@ -1419,6 +1652,29 @@ static int act_open_rpl(struct c4iw_dev *dev, struct sk_buff *skb) case CPL_ERR_CONN_RESET: case CPL_ERR_CONN_TIMEDOUT: break; + case CPL_ERR_TCAM_FULL: + if (dev->rdev.lldi.enable_fw_ofld_conn) { + mutex_lock(&dev->rdev.stats.lock); + dev->rdev.stats.tcam_full++; + mutex_unlock(&dev->rdev.stats.lock); + send_fw_act_open_req(ep, + GET_TID_TID(GET_AOPEN_ATID( + ntohl(rpl->atid_status)))); + return 0; + } + break; + case CPL_ERR_CONN_EXIST: + if (ep->retry_count++ < ACT_OPEN_RETRY_COUNT) { + set_bit(ACT_RETRY_INUSE, &ep->com.history); + remove_handle(ep->com.dev, &ep->com.dev->atid_idr, + atid); + cxgb4_free_atid(t, atid); + dst_release(ep->dst); + cxgb4_l2t_release(ep->l2t); + c4iw_reconnect(ep); + return 0; + } + break; default: printk(KERN_INFO MOD "Active open failure - " "atid %u status %u errno %d %pI4:%u->%pI4:%u\n", @@ -1436,6 +1692,7 @@ static int act_open_rpl(struct c4iw_dev *dev, struct sk_buff *skb) if (status && act_open_has_tid(status)) cxgb4_remove_tid(ep->com.dev->rdev.lldi.tids, 0, GET_TID(rpl)); + remove_handle(ep->com.dev, &ep->com.dev->atid_idr, atid); cxgb4_free_atid(t, atid); dst_release(ep->dst); cxgb4_l2t_release(ep->l2t); @@ -1452,13 +1709,14 @@ static int pass_open_rpl(struct c4iw_dev *dev, struct sk_buff *skb) struct c4iw_listen_ep *ep = lookup_stid(t, stid); if (!ep) { - printk(KERN_ERR MOD "stid %d lookup failure!\n", stid); - return 0; + PDBG("%s stid %d lookup failure!\n", __func__, stid); + goto out; } PDBG("%s ep %p status %d error %d\n", __func__, ep, rpl->status, status2errno(rpl->status)); c4iw_wake_up(&ep->com.wr_wait, status2errno(rpl->status)); +out: return 0; } @@ -1510,14 +1768,15 @@ static void accept_cr(struct c4iw_ep *ep, __be32 peer_ip, struct sk_buff *skb, skb_get(skb); cxgb4_best_mtu(ep->com.dev->rdev.lldi.mtus, ep->mtu, &mtu_idx); wscale = compute_wscale(rcv_win); - opt0 = KEEP_ALIVE(1) | + opt0 = (nocong ? NO_CONG(1) : 0) | + KEEP_ALIVE(1) | DELACK(1) | WND_SCALE(wscale) | MSS_IDX(mtu_idx) | L2T_IDX(ep->l2t->idx) | TX_CHAN(ep->tx_chan) | SMAC_SEL(ep->smac_idx) | - DSCP(ep->tos) | + DSCP(ep->tos >> 2) | ULP_MODE(ULP_MODE_TCPDDP) | RCV_BUFSIZ(rcv_win>>10); opt2 = RX_CHANNEL(0) | @@ -1529,6 +1788,15 @@ static void accept_cr(struct c4iw_ep *ep, __be32 peer_ip, struct sk_buff *skb, opt2 |= SACK_EN(1); if (wscale && enable_tcp_window_scaling) opt2 |= WND_SCALE_EN(1); + if (enable_ecn) { + const struct tcphdr *tcph; + u32 hlen = ntohl(req->hdr_len); + + tcph = (const void *)(req + 1) + G_ETH_HDR_LEN(hlen) + + G_IP_HDR_LEN(hlen); + if (tcph->ece && tcph->cwr) + opt2 |= CCTRL_ECN(1); + } rpl = cplhdr(skb); INIT_TP_WR(rpl, ep->hwtid); @@ -1645,22 +1913,30 @@ out: static int pass_accept_req(struct c4iw_dev *dev, struct sk_buff *skb) { - struct c4iw_ep *child_ep, *parent_ep; + struct c4iw_ep *child_ep = NULL, *parent_ep; struct cpl_pass_accept_req *req = cplhdr(skb); unsigned int stid = GET_POPEN_TID(ntohl(req->tos_stid)); struct tid_info *t = dev->rdev.lldi.tids; unsigned int hwtid = GET_TID(req); struct dst_entry *dst; struct rtable *rt; - __be32 local_ip, peer_ip; + __be32 local_ip, peer_ip = 0; __be16 local_port, peer_port; int err; + u16 peer_mss = ntohs(req->tcpopt.mss); parent_ep = lookup_stid(t, stid); - PDBG("%s parent ep %p tid %u\n", __func__, parent_ep, hwtid); - + if (!parent_ep) { + PDBG("%s connect request on invalid stid %d\n", __func__, stid); + goto reject; + } get_4tuple(req, &local_ip, &peer_ip, &local_port, &peer_port); + PDBG("%s parent ep %p hwtid %u laddr 0x%x raddr 0x%x lport %d " \ + "rport %d peer_mss %d\n", __func__, parent_ep, hwtid, + ntohl(local_ip), ntohl(peer_ip), ntohs(local_port), + ntohs(peer_port), peer_mss); + if (state_read(&parent_ep->com) != LISTEN) { printk(KERN_ERR "%s - listening ep not in LISTEN\n", __func__); @@ -1694,6 +1970,9 @@ static int pass_accept_req(struct c4iw_dev *dev, struct sk_buff *skb) goto reject; } + if (peer_mss && child_ep->mtu > (peer_mss + 40)) + child_ep->mtu = peer_mss + 40; + state_set(&child_ep->com, CONNECTING); child_ep->com.dev = dev; child_ep->com.cm_id = NULL; @@ -1715,6 +1994,7 @@ static int pass_accept_req(struct c4iw_dev *dev, struct sk_buff *skb) init_timer(&child_ep->timer); cxgb4_insert_tid(t, child_ep, hwtid); accept_cr(child_ep, peer_ip, skb, req); + set_bit(PASS_ACCEPT_REQ, &child_ep->com.history); goto out; reject: reject_cr(dev, hwtid, peer_ip, skb); @@ -1734,12 +2014,17 @@ static int pass_establish(struct c4iw_dev *dev, struct sk_buff *skb) ep->snd_seq = be32_to_cpu(req->snd_isn); ep->rcv_seq = be32_to_cpu(req->rcv_isn); + PDBG("%s ep %p hwtid %u tcp_opt 0x%02x\n", __func__, ep, tid, + ntohs(req->tcp_opt)); + set_emss(ep, ntohs(req->tcp_opt)); + insert_handle(dev, &dev->hwtid_idr, ep, ep->hwtid); dst_confirm(ep->dst); state_set(&ep->com, MPA_REQ_WAIT); start_ep_timer(ep); send_flowc(ep, skb); + set_bit(PASS_ESTAB, &ep->com.history); return 0; } @@ -1759,6 +2044,7 @@ static int peer_close(struct c4iw_dev *dev, struct sk_buff *skb) PDBG("%s ep %p tid %u\n", __func__, ep, ep->hwtid); dst_confirm(ep->dst); + set_bit(PEER_CLOSE, &ep->com.history); mutex_lock(&ep->com.mutex); switch (ep->com.state) { case MPA_REQ_WAIT: @@ -1838,74 +2124,6 @@ static int is_neg_adv_abort(unsigned int status) status == CPL_ERR_PERSIST_NEG_ADVICE; } -static int c4iw_reconnect(struct c4iw_ep *ep) -{ - struct rtable *rt; - int err = 0; - - PDBG("%s qp %p cm_id %p\n", __func__, ep->com.qp, ep->com.cm_id); - init_timer(&ep->timer); - - /* - * Allocate an active TID to initiate a TCP connection. - */ - ep->atid = cxgb4_alloc_atid(ep->com.dev->rdev.lldi.tids, ep); - if (ep->atid == -1) { - printk(KERN_ERR MOD "%s - cannot alloc atid.\n", __func__); - err = -ENOMEM; - goto fail2; - } - - /* find a route */ - rt = find_route(ep->com.dev, - ep->com.cm_id->local_addr.sin_addr.s_addr, - ep->com.cm_id->remote_addr.sin_addr.s_addr, - ep->com.cm_id->local_addr.sin_port, - ep->com.cm_id->remote_addr.sin_port, 0); - if (!rt) { - printk(KERN_ERR MOD "%s - cannot find route.\n", __func__); - err = -EHOSTUNREACH; - goto fail3; - } - ep->dst = &rt->dst; - - err = import_ep(ep, ep->com.cm_id->remote_addr.sin_addr.s_addr, - ep->dst, ep->com.dev, false); - if (err) { - printk(KERN_ERR MOD "%s - cannot alloc l2e.\n", __func__); - goto fail4; - } - - PDBG("%s txq_idx %u tx_chan %u smac_idx %u rss_qid %u l2t_idx %u\n", - __func__, ep->txq_idx, ep->tx_chan, ep->smac_idx, ep->rss_qid, - ep->l2t->idx); - - state_set(&ep->com, CONNECTING); - ep->tos = 0; - - /* send connect request to rnic */ - err = send_connect(ep); - if (!err) - goto out; - - cxgb4_l2t_release(ep->l2t); -fail4: - dst_release(ep->dst); -fail3: - cxgb4_free_atid(ep->com.dev->rdev.lldi.tids, ep->atid); -fail2: - /* - * remember to send notification to upper layer. - * We are in here so the upper layer is not aware that this is - * re-connect attempt and so, upper layer is still waiting for - * response of 1st connect request. - */ - connect_reply_upcall(ep, -ECONNRESET); - c4iw_put_ep(&ep->com); -out: - return err; -} - static int peer_abort(struct c4iw_dev *dev, struct sk_buff *skb) { struct cpl_abort_req_rss *req = cplhdr(skb); @@ -1926,6 +2144,7 @@ static int peer_abort(struct c4iw_dev *dev, struct sk_buff *skb) } PDBG("%s ep %p tid %u state %u\n", __func__, ep, ep->hwtid, ep->com.state); + set_bit(PEER_ABORT, &ep->com.history); /* * Wake up any threads in rdma_init() or rdma_fini(). @@ -2140,6 +2359,7 @@ int c4iw_reject_cr(struct iw_cm_id *cm_id, const void *pdata, u8 pdata_len) c4iw_put_ep(&ep->com); return -ECONNRESET; } + set_bit(ULP_REJECT, &ep->com.history); BUG_ON(state_read(&ep->com) != MPA_REQ_RCVD); if (mpa_rev == 0) abort_connection(ep, NULL, GFP_KERNEL); @@ -2169,6 +2389,7 @@ int c4iw_accept_cr(struct iw_cm_id *cm_id, struct iw_cm_conn_param *conn_param) BUG_ON(state_read(&ep->com) != MPA_REQ_RCVD); BUG_ON(!qp); + set_bit(ULP_ACCEPT, &ep->com.history); if ((conn_param->ord > c4iw_max_read_depth) || (conn_param->ird > c4iw_max_read_depth)) { abort_connection(ep, NULL, GFP_KERNEL); @@ -2292,6 +2513,7 @@ int c4iw_connect(struct iw_cm_id *cm_id, struct iw_cm_conn_param *conn_param) err = -ENOMEM; goto fail2; } + insert_handle(dev, &dev->atid_idr, ep, ep->atid); PDBG("%s saddr 0x%x sport 0x%x raddr 0x%x rport 0x%x\n", __func__, ntohl(cm_id->local_addr.sin_addr.s_addr), @@ -2337,6 +2559,7 @@ int c4iw_connect(struct iw_cm_id *cm_id, struct iw_cm_conn_param *conn_param) fail4: dst_release(ep->dst); fail3: + remove_handle(ep->com.dev, &ep->com.dev->atid_idr, ep->atid); cxgb4_free_atid(ep->com.dev->rdev.lldi.tids, ep->atid); fail2: cm_id->rem_ref(cm_id); @@ -2351,7 +2574,6 @@ int c4iw_create_listen(struct iw_cm_id *cm_id, int backlog) struct c4iw_dev *dev = to_c4iw_dev(cm_id->device); struct c4iw_listen_ep *ep; - might_sleep(); ep = alloc_ep(sizeof(*ep), GFP_KERNEL); @@ -2370,30 +2592,54 @@ int c4iw_create_listen(struct iw_cm_id *cm_id, int backlog) /* * Allocate a server TID. */ - ep->stid = cxgb4_alloc_stid(dev->rdev.lldi.tids, PF_INET, ep); + if (dev->rdev.lldi.enable_fw_ofld_conn) + ep->stid = cxgb4_alloc_sftid(dev->rdev.lldi.tids, PF_INET, ep); + else + ep->stid = cxgb4_alloc_stid(dev->rdev.lldi.tids, PF_INET, ep); + if (ep->stid == -1) { printk(KERN_ERR MOD "%s - cannot alloc stid.\n", __func__); err = -ENOMEM; goto fail2; } - + insert_handle(dev, &dev->stid_idr, ep, ep->stid); state_set(&ep->com, LISTEN); - c4iw_init_wr_wait(&ep->com.wr_wait); - err = cxgb4_create_server(ep->com.dev->rdev.lldi.ports[0], ep->stid, - ep->com.local_addr.sin_addr.s_addr, - ep->com.local_addr.sin_port, - ep->com.dev->rdev.lldi.rxq_ids[0]); - if (err) - goto fail3; - - /* wait for pass_open_rpl */ - err = c4iw_wait_for_reply(&ep->com.dev->rdev, &ep->com.wr_wait, 0, 0, - __func__); + if (dev->rdev.lldi.enable_fw_ofld_conn) { + do { + err = cxgb4_create_server_filter( + ep->com.dev->rdev.lldi.ports[0], ep->stid, + ep->com.local_addr.sin_addr.s_addr, + ep->com.local_addr.sin_port, + 0, + ep->com.dev->rdev.lldi.rxq_ids[0], + 0, + 0); + if (err == -EBUSY) { + set_current_state(TASK_UNINTERRUPTIBLE); + schedule_timeout(usecs_to_jiffies(100)); + } + } while (err == -EBUSY); + } else { + c4iw_init_wr_wait(&ep->com.wr_wait); + err = cxgb4_create_server(ep->com.dev->rdev.lldi.ports[0], + ep->stid, ep->com.local_addr.sin_addr.s_addr, + ep->com.local_addr.sin_port, + 0, + ep->com.dev->rdev.lldi.rxq_ids[0]); + if (!err) + err = c4iw_wait_for_reply(&ep->com.dev->rdev, + &ep->com.wr_wait, + 0, 0, __func__); + } if (!err) { cm_id->provider_data = ep; goto out; } -fail3: + pr_err("%s cxgb4_create_server/filter failed err %d " \ + "stid %d laddr %08x lport %d\n", \ + __func__, err, ep->stid, + ntohl(ep->com.local_addr.sin_addr.s_addr), + ntohs(ep->com.local_addr.sin_port)); cxgb4_free_stid(ep->com.dev->rdev.lldi.tids, ep->stid, PF_INET); fail2: cm_id->rem_ref(cm_id); @@ -2412,12 +2658,19 @@ int c4iw_destroy_listen(struct iw_cm_id *cm_id) might_sleep(); state_set(&ep->com, DEAD); - c4iw_init_wr_wait(&ep->com.wr_wait); - err = listen_stop(ep); - if (err) - goto done; - err = c4iw_wait_for_reply(&ep->com.dev->rdev, &ep->com.wr_wait, 0, 0, - __func__); + if (ep->com.dev->rdev.lldi.enable_fw_ofld_conn) { + err = cxgb4_remove_server_filter( + ep->com.dev->rdev.lldi.ports[0], ep->stid, + ep->com.dev->rdev.lldi.rxq_ids[0], 0); + } else { + c4iw_init_wr_wait(&ep->com.wr_wait); + err = listen_stop(ep); + if (err) + goto done; + err = c4iw_wait_for_reply(&ep->com.dev->rdev, &ep->com.wr_wait, + 0, 0, __func__); + } + remove_handle(ep->com.dev, &ep->com.dev->stid_idr, ep->stid); cxgb4_free_stid(ep->com.dev->rdev.lldi.tids, ep->stid, PF_INET); done: cm_id->rem_ref(cm_id); @@ -2481,10 +2734,13 @@ int c4iw_ep_disconnect(struct c4iw_ep *ep, int abrupt, gfp_t gfp) if (close) { if (abrupt) { + set_bit(EP_DISC_ABORT, &ep->com.history); close_complete_upcall(ep); ret = send_abort(ep, NULL, gfp); - } else + } else { + set_bit(EP_DISC_CLOSE, &ep->com.history); ret = send_halfclose(ep, gfp); + } if (ret) fatal = 1; } @@ -2494,10 +2750,323 @@ int c4iw_ep_disconnect(struct c4iw_ep *ep, int abrupt, gfp_t gfp) return ret; } -static int async_event(struct c4iw_dev *dev, struct sk_buff *skb) +static void active_ofld_conn_reply(struct c4iw_dev *dev, struct sk_buff *skb, + struct cpl_fw6_msg_ofld_connection_wr_rpl *req) +{ + struct c4iw_ep *ep; + int atid = be32_to_cpu(req->tid); + + ep = (struct c4iw_ep *)lookup_atid(dev->rdev.lldi.tids, req->tid); + if (!ep) + return; + + switch (req->retval) { + case FW_ENOMEM: + set_bit(ACT_RETRY_NOMEM, &ep->com.history); + if (ep->retry_count++ < ACT_OPEN_RETRY_COUNT) { + send_fw_act_open_req(ep, atid); + return; + } + case FW_EADDRINUSE: + set_bit(ACT_RETRY_INUSE, &ep->com.history); + if (ep->retry_count++ < ACT_OPEN_RETRY_COUNT) { + send_fw_act_open_req(ep, atid); + return; + } + break; + default: + pr_info("%s unexpected ofld conn wr retval %d\n", + __func__, req->retval); + break; + } + pr_err("active ofld_connect_wr failure %d atid %d\n", + req->retval, atid); + mutex_lock(&dev->rdev.stats.lock); + dev->rdev.stats.act_ofld_conn_fails++; + mutex_unlock(&dev->rdev.stats.lock); + connect_reply_upcall(ep, status2errno(req->retval)); + state_set(&ep->com, DEAD); + remove_handle(dev, &dev->atid_idr, atid); + cxgb4_free_atid(dev->rdev.lldi.tids, atid); + dst_release(ep->dst); + cxgb4_l2t_release(ep->l2t); + c4iw_put_ep(&ep->com); +} + +static void passive_ofld_conn_reply(struct c4iw_dev *dev, struct sk_buff *skb, + struct cpl_fw6_msg_ofld_connection_wr_rpl *req) +{ + struct sk_buff *rpl_skb; + struct cpl_pass_accept_req *cpl; + int ret; + + rpl_skb = (struct sk_buff *)cpu_to_be64(req->cookie); + BUG_ON(!rpl_skb); + if (req->retval) { + PDBG("%s passive open failure %d\n", __func__, req->retval); + mutex_lock(&dev->rdev.stats.lock); + dev->rdev.stats.pas_ofld_conn_fails++; + mutex_unlock(&dev->rdev.stats.lock); + kfree_skb(rpl_skb); + } else { + cpl = (struct cpl_pass_accept_req *)cplhdr(rpl_skb); + OPCODE_TID(cpl) = htonl(MK_OPCODE_TID(CPL_PASS_ACCEPT_REQ, + htonl(req->tid))); + ret = pass_accept_req(dev, rpl_skb); + if (!ret) + kfree_skb(rpl_skb); + } + return; +} + +static int deferred_fw6_msg(struct c4iw_dev *dev, struct sk_buff *skb) { struct cpl_fw6_msg *rpl = cplhdr(skb); - c4iw_ev_dispatch(dev, (struct t4_cqe *)&rpl->data[0]); + struct cpl_fw6_msg_ofld_connection_wr_rpl *req; + + switch (rpl->type) { + case FW6_TYPE_CQE: + c4iw_ev_dispatch(dev, (struct t4_cqe *)&rpl->data[0]); + break; + case FW6_TYPE_OFLD_CONNECTION_WR_RPL: + req = (struct cpl_fw6_msg_ofld_connection_wr_rpl *)rpl->data; + switch (req->t_state) { + case TCP_SYN_SENT: + active_ofld_conn_reply(dev, skb, req); + break; + case TCP_SYN_RECV: + passive_ofld_conn_reply(dev, skb, req); + break; + default: + pr_err("%s unexpected ofld conn wr state %d\n", + __func__, req->t_state); + break; + } + break; + } + return 0; +} + +static void build_cpl_pass_accept_req(struct sk_buff *skb, int stid , u8 tos) +{ + u32 l2info; + u16 vlantag, len, hdr_len; + u8 intf; + struct cpl_rx_pkt *cpl = cplhdr(skb); + struct cpl_pass_accept_req *req; + struct tcp_options_received tmp_opt; + + /* Store values from cpl_rx_pkt in temporary location. */ + vlantag = cpl->vlan; + len = cpl->len; + l2info = cpl->l2info; + hdr_len = cpl->hdr_len; + intf = cpl->iff; + + __skb_pull(skb, sizeof(*req) + sizeof(struct rss_header)); + + /* + * We need to parse the TCP options from SYN packet. + * to generate cpl_pass_accept_req. + */ + memset(&tmp_opt, 0, sizeof(tmp_opt)); + tcp_clear_options(&tmp_opt); + tcp_parse_options(skb, &tmp_opt, 0, 0, NULL); + + req = (struct cpl_pass_accept_req *)__skb_push(skb, sizeof(*req)); + memset(req, 0, sizeof(*req)); + req->l2info = cpu_to_be16(V_SYN_INTF(intf) | + V_SYN_MAC_IDX(G_RX_MACIDX(htonl(l2info))) | + F_SYN_XACT_MATCH); + req->hdr_len = cpu_to_be32(V_SYN_RX_CHAN(G_RX_CHAN(htonl(l2info))) | + V_TCP_HDR_LEN(G_RX_TCPHDR_LEN(htons(hdr_len))) | + V_IP_HDR_LEN(G_RX_IPHDR_LEN(htons(hdr_len))) | + V_ETH_HDR_LEN(G_RX_ETHHDR_LEN(htonl(l2info)))); + req->vlan = vlantag; + req->len = len; + req->tos_stid = cpu_to_be32(PASS_OPEN_TID(stid) | + PASS_OPEN_TOS(tos)); + req->tcpopt.mss = htons(tmp_opt.mss_clamp); + if (tmp_opt.wscale_ok) + req->tcpopt.wsf = tmp_opt.snd_wscale; + req->tcpopt.tstamp = tmp_opt.saw_tstamp; + if (tmp_opt.sack_ok) + req->tcpopt.sack = 1; + OPCODE_TID(req) = htonl(MK_OPCODE_TID(CPL_PASS_ACCEPT_REQ, 0)); + return; +} + +static void send_fw_pass_open_req(struct c4iw_dev *dev, struct sk_buff *skb, + __be32 laddr, __be16 lport, + __be32 raddr, __be16 rport, + u32 rcv_isn, u32 filter, u16 window, + u32 rss_qid, u8 port_id) +{ + struct sk_buff *req_skb; + struct fw_ofld_connection_wr *req; + struct cpl_pass_accept_req *cpl = cplhdr(skb); + + req_skb = alloc_skb(sizeof(struct fw_ofld_connection_wr), GFP_KERNEL); + req = (struct fw_ofld_connection_wr *)__skb_put(req_skb, sizeof(*req)); + memset(req, 0, sizeof(*req)); + req->op_compl = htonl(V_WR_OP(FW_OFLD_CONNECTION_WR) | FW_WR_COMPL(1)); + req->len16_pkd = htonl(FW_WR_LEN16(DIV_ROUND_UP(sizeof(*req), 16))); + req->le.version_cpl = htonl(F_FW_OFLD_CONNECTION_WR_CPL); + req->le.filter = filter; + req->le.lport = lport; + req->le.pport = rport; + req->le.u.ipv4.lip = laddr; + req->le.u.ipv4.pip = raddr; + req->tcb.rcv_nxt = htonl(rcv_isn + 1); + req->tcb.rcv_adv = htons(window); + req->tcb.t_state_to_astid = + htonl(V_FW_OFLD_CONNECTION_WR_T_STATE(TCP_SYN_RECV) | + V_FW_OFLD_CONNECTION_WR_RCV_SCALE(cpl->tcpopt.wsf) | + V_FW_OFLD_CONNECTION_WR_ASTID( + GET_PASS_OPEN_TID(ntohl(cpl->tos_stid)))); + + /* + * We store the qid in opt2 which will be used by the firmware + * to send us the wr response. + */ + req->tcb.opt2 = htonl(V_RSS_QUEUE(rss_qid)); + + /* + * We initialize the MSS index in TCB to 0xF. + * So that when driver sends cpl_pass_accept_rpl + * TCB picks up the correct value. If this was 0 + * TP will ignore any value > 0 for MSS index. + */ + req->tcb.opt0 = cpu_to_be64(V_MSS_IDX(0xF)); + req->cookie = cpu_to_be64((u64)skb); + + set_wr_txq(req_skb, CPL_PRIORITY_CONTROL, port_id); + cxgb4_ofld_send(dev->rdev.lldi.ports[0], req_skb); +} + +/* + * Handler for CPL_RX_PKT message. Need to handle cpl_rx_pkt + * messages when a filter is being used instead of server to + * redirect a syn packet. When packets hit filter they are redirected + * to the offload queue and driver tries to establish the connection + * using firmware work request. + */ +static int rx_pkt(struct c4iw_dev *dev, struct sk_buff *skb) +{ + int stid; + unsigned int filter; + struct ethhdr *eh = NULL; + struct vlan_ethhdr *vlan_eh = NULL; + struct iphdr *iph; + struct tcphdr *tcph; + struct rss_header *rss = (void *)skb->data; + struct cpl_rx_pkt *cpl = (void *)skb->data; + struct cpl_pass_accept_req *req = (void *)(rss + 1); + struct l2t_entry *e; + struct dst_entry *dst; + struct rtable *rt; + struct c4iw_ep *lep; + u16 window; + struct port_info *pi; + struct net_device *pdev; + u16 rss_qid; + int step; + u32 tx_chan; + struct neighbour *neigh; + + /* Drop all non-SYN packets */ + if (!(cpl->l2info & cpu_to_be32(F_RXF_SYN))) + goto reject; + + /* + * Drop all packets which did not hit the filter. + * Unlikely to happen. + */ + if (!(rss->filter_hit && rss->filter_tid)) + goto reject; + + /* + * Calculate the server tid from filter hit index from cpl_rx_pkt. + */ + stid = cpu_to_be32(rss->hash_val) - dev->rdev.lldi.tids->sftid_base + + dev->rdev.lldi.tids->nstids; + + lep = (struct c4iw_ep *)lookup_stid(dev->rdev.lldi.tids, stid); + if (!lep) { + PDBG("%s connect request on invalid stid %d\n", __func__, stid); + goto reject; + } + + if (G_RX_ETHHDR_LEN(ntohl(cpl->l2info)) == ETH_HLEN) { + eh = (struct ethhdr *)(req + 1); + iph = (struct iphdr *)(eh + 1); + } else { + vlan_eh = (struct vlan_ethhdr *)(req + 1); + iph = (struct iphdr *)(vlan_eh + 1); + skb->vlan_tci = ntohs(cpl->vlan); + } + + if (iph->version != 0x4) + goto reject; + + tcph = (struct tcphdr *)(iph + 1); + skb_set_network_header(skb, (void *)iph - (void *)rss); + skb_set_transport_header(skb, (void *)tcph - (void *)rss); + skb_get(skb); + + PDBG("%s lip 0x%x lport %u pip 0x%x pport %u tos %d\n", __func__, + ntohl(iph->daddr), ntohs(tcph->dest), ntohl(iph->saddr), + ntohs(tcph->source), iph->tos); + + rt = find_route(dev, iph->daddr, iph->saddr, tcph->dest, tcph->source, + iph->tos); + if (!rt) { + pr_err("%s - failed to find dst entry!\n", + __func__); + goto reject; + } + dst = &rt->dst; + neigh = dst_neigh_lookup_skb(dst, skb); + + if (neigh->dev->flags & IFF_LOOPBACK) { + pdev = ip_dev_find(&init_net, iph->daddr); + e = cxgb4_l2t_get(dev->rdev.lldi.l2t, neigh, + pdev, 0); + pi = (struct port_info *)netdev_priv(pdev); + tx_chan = cxgb4_port_chan(pdev); + dev_put(pdev); + } else { + e = cxgb4_l2t_get(dev->rdev.lldi.l2t, neigh, + neigh->dev, 0); + pi = (struct port_info *)netdev_priv(neigh->dev); + tx_chan = cxgb4_port_chan(neigh->dev); + } + if (!e) { + pr_err("%s - failed to allocate l2t entry!\n", + __func__); + goto free_dst; + } + + step = dev->rdev.lldi.nrxq / dev->rdev.lldi.nchan; + rss_qid = dev->rdev.lldi.rxq_ids[pi->port_id * step]; + window = htons(tcph->window); + + /* Calcuate filter portion for LE region. */ + filter = cpu_to_be32(select_ntuple(dev, dst, e)); + + /* + * Synthesize the cpl_pass_accept_req. We have everything except the + * TID. Once firmware sends a reply with TID we update the TID field + * in cpl and pass it through the regular cpl_pass_accept_req path. + */ + build_cpl_pass_accept_req(skb, stid, iph->tos); + send_fw_pass_open_req(dev, skb, iph->daddr, tcph->dest, iph->saddr, + tcph->source, ntohl(tcph->seq), filter, window, + rss_qid, pi->port_id); + cxgb4_l2t_release(e); +free_dst: + dst_release(dst); +reject: return 0; } @@ -2520,7 +3089,8 @@ static c4iw_handler_func work_handlers[NUM_CPL_CMDS] = { [CPL_CLOSE_CON_RPL] = close_con_rpl, [CPL_RDMA_TERMINATE] = terminate, [CPL_FW4_ACK] = fw4_ack, - [CPL_FW6_MSG] = async_event + [CPL_FW6_MSG] = deferred_fw6_msg, + [CPL_RX_PKT] = rx_pkt }; static void process_timeout(struct c4iw_ep *ep) @@ -2531,6 +3101,7 @@ static void process_timeout(struct c4iw_ep *ep) mutex_lock(&ep->com.mutex); PDBG("%s ep %p tid %u state %d\n", __func__, ep, ep->hwtid, ep->com.state); + set_bit(TIMEDOUT, &ep->com.history); switch (ep->com.state) { case MPA_REQ_SENT: __state_set(&ep->com, ABORTING); @@ -2651,7 +3222,7 @@ static int fw6_msg(struct c4iw_dev *dev, struct sk_buff *skb) PDBG("%s type %u\n", __func__, rpl->type); switch (rpl->type) { - case 1: + case FW6_TYPE_WR_RPL: ret = (int)((be64_to_cpu(rpl->data[0]) >> 8) & 0xff); wr_waitp = (struct c4iw_wr_wait *)(__force unsigned long) rpl->data[1]; PDBG("%s wr_waitp %p ret %u\n", __func__, wr_waitp, ret); @@ -2659,7 +3230,8 @@ static int fw6_msg(struct c4iw_dev *dev, struct sk_buff *skb) c4iw_wake_up(wr_waitp, ret ? -ret : 0); kfree_skb(skb); break; - case 2: + case FW6_TYPE_CQE: + case FW6_TYPE_OFLD_CONNECTION_WR_RPL: sched(dev, skb); break; default: @@ -2722,7 +3294,8 @@ c4iw_handler_func c4iw_handlers[NUM_CPL_CMDS] = { [CPL_RDMA_TERMINATE] = sched, [CPL_FW4_ACK] = sched, [CPL_SET_TCB_RPL] = set_tcb_rpl, - [CPL_FW6_MSG] = fw6_msg + [CPL_FW6_MSG] = fw6_msg, + [CPL_RX_PKT] = sched }; int __init c4iw_cm_init(void) diff --git a/drivers/infiniband/hw/cxgb4/device.c b/drivers/infiniband/hw/cxgb4/device.c index cb4ecd7..ba11c76 100644 --- a/drivers/infiniband/hw/cxgb4/device.c +++ b/drivers/infiniband/hw/cxgb4/device.c @@ -279,6 +279,11 @@ static int stats_show(struct seq_file *seq, void *v) seq_printf(seq, " DB State: %s Transitions %llu\n", db_state_str[dev->db_state], dev->rdev.stats.db_state_transitions); + seq_printf(seq, "TCAM_FULL: %10llu\n", dev->rdev.stats.tcam_full); + seq_printf(seq, "ACT_OFLD_CONN_FAILS: %10llu\n", + dev->rdev.stats.act_ofld_conn_fails); + seq_printf(seq, "PAS_OFLD_CONN_FAILS: %10llu\n", + dev->rdev.stats.pas_ofld_conn_fails); return 0; } @@ -309,6 +314,9 @@ static ssize_t stats_clear(struct file *file, const char __user *buf, dev->rdev.stats.db_empty = 0; dev->rdev.stats.db_drop = 0; dev->rdev.stats.db_state_transitions = 0; + dev->rdev.stats.tcam_full = 0; + dev->rdev.stats.act_ofld_conn_fails = 0; + dev->rdev.stats.pas_ofld_conn_fails = 0; mutex_unlock(&dev->rdev.stats.lock); return count; } @@ -322,6 +330,113 @@ static const struct file_operations stats_debugfs_fops = { .write = stats_clear, }; +static int dump_ep(int id, void *p, void *data) +{ + struct c4iw_ep *ep = p; + struct c4iw_debugfs_data *epd = data; + int space; + int cc; + + space = epd->bufsize - epd->pos - 1; + if (space == 0) + return 1; + + cc = snprintf(epd->buf + epd->pos, space, + "ep %p cm_id %p qp %p state %d flags 0x%lx history 0x%lx " + "hwtid %d atid %d %pI4:%d <-> %pI4:%d\n", + ep, ep->com.cm_id, ep->com.qp, (int)ep->com.state, + ep->com.flags, ep->com.history, ep->hwtid, ep->atid, + &ep->com.local_addr.sin_addr.s_addr, + ntohs(ep->com.local_addr.sin_port), + &ep->com.remote_addr.sin_addr.s_addr, + ntohs(ep->com.remote_addr.sin_port)); + if (cc < space) + epd->pos += cc; + return 0; +} + +static int dump_listen_ep(int id, void *p, void *data) +{ + struct c4iw_listen_ep *ep = p; + struct c4iw_debugfs_data *epd = data; + int space; + int cc; + + space = epd->bufsize - epd->pos - 1; + if (space == 0) + return 1; + + cc = snprintf(epd->buf + epd->pos, space, + "ep %p cm_id %p state %d flags 0x%lx stid %d backlog %d " + "%pI4:%d\n", ep, ep->com.cm_id, (int)ep->com.state, + ep->com.flags, ep->stid, ep->backlog, + &ep->com.local_addr.sin_addr.s_addr, + ntohs(ep->com.local_addr.sin_port)); + if (cc < space) + epd->pos += cc; + return 0; +} + +static int ep_release(struct inode *inode, struct file *file) +{ + struct c4iw_debugfs_data *epd = file->private_data; + if (!epd) { + pr_info("%s null qpd?\n", __func__); + return 0; + } + vfree(epd->buf); + kfree(epd); + return 0; +} + +static int ep_open(struct inode *inode, struct file *file) +{ + struct c4iw_debugfs_data *epd; + int ret = 0; + int count = 1; + + epd = kmalloc(sizeof(*epd), GFP_KERNEL); + if (!epd) { + ret = -ENOMEM; + goto out; + } + epd->devp = inode->i_private; + epd->pos = 0; + + spin_lock_irq(&epd->devp->lock); + idr_for_each(&epd->devp->hwtid_idr, count_idrs, &count); + idr_for_each(&epd->devp->atid_idr, count_idrs, &count); + idr_for_each(&epd->devp->stid_idr, count_idrs, &count); + spin_unlock_irq(&epd->devp->lock); + + epd->bufsize = count * 160; + epd->buf = vmalloc(epd->bufsize); + if (!epd->buf) { + ret = -ENOMEM; + goto err1; + } + + spin_lock_irq(&epd->devp->lock); + idr_for_each(&epd->devp->hwtid_idr, dump_ep, epd); + idr_for_each(&epd->devp->atid_idr, dump_ep, epd); + idr_for_each(&epd->devp->stid_idr, dump_listen_ep, epd); + spin_unlock_irq(&epd->devp->lock); + + file->private_data = epd; + goto out; +err1: + kfree(epd); +out: + return ret; +} + +static const struct file_operations ep_debugfs_fops = { + .owner = THIS_MODULE, + .open = ep_open, + .release = ep_release, + .read = debugfs_read, +}; + static int setup_debugfs(struct c4iw_dev *devp) { struct dentry *de; @@ -344,6 +459,11 @@ static int setup_debugfs(struct c4iw_dev *devp) if (de && de->d_inode) de->d_inode->i_size = 4096; + de = debugfs_create_file("eps", S_IWUSR, devp->debugfs_root, + (void *)devp, &ep_debugfs_fops); + if (de && de->d_inode) + de->d_inode->i_size = 4096; + return 0; } @@ -475,6 +595,9 @@ static void c4iw_dealloc(struct uld_ctx *ctx) idr_destroy(&ctx->dev->cqidr); idr_destroy(&ctx->dev->qpidr); idr_destroy(&ctx->dev->mmidr); + idr_destroy(&ctx->dev->hwtid_idr); + idr_destroy(&ctx->dev->stid_idr); + idr_destroy(&ctx->dev->atid_idr); iounmap(ctx->dev->rdev.oc_mw_kva); ib_dealloc_device(&ctx->dev->ibdev); ctx->dev = NULL; @@ -532,6 +655,9 @@ static struct c4iw_dev *c4iw_alloc(const struct cxgb4_lld_info *infop) idr_init(&devp->cqidr); idr_init(&devp->qpidr); idr_init(&devp->mmidr); + idr_init(&devp->hwtid_idr); + idr_init(&devp->stid_idr); + idr_init(&devp->atid_idr); spin_lock_init(&devp->lock); mutex_init(&devp->rdev.stats.lock); mutex_init(&devp->db_mutex); @@ -577,14 +703,76 @@ out: return ctx; } +static inline struct sk_buff *copy_gl_to_skb_pkt(const struct pkt_gl *gl, + const __be64 *rsp, + u32 pktshift) +{ + struct sk_buff *skb; + + /* + * Allocate space for cpl_pass_accept_req which will be synthesized by + * driver. Once the driver synthesizes the request the skb will go + * through the regular cpl_pass_accept_req processing. + * The math here assumes sizeof cpl_pass_accept_req >= sizeof + * cpl_rx_pkt. + */ + skb = alloc_skb(gl->tot_len + sizeof(struct cpl_pass_accept_req) + + sizeof(struct rss_header) - pktshift, GFP_ATOMIC); + if (unlikely(!skb)) + return NULL; + + __skb_put(skb, gl->tot_len + sizeof(struct cpl_pass_accept_req) + + sizeof(struct rss_header) - pktshift); + + /* + * This skb will contain: + * rss_header from the rspq descriptor (1 flit) + * cpl_rx_pkt struct from the rspq descriptor (2 flits) + * space for the difference between the size of an + * rx_pkt and pass_accept_req cpl (1 flit) + * the packet data from the gl + */ + skb_copy_to_linear_data(skb, rsp, sizeof(struct cpl_pass_accept_req) + + sizeof(struct rss_header)); + skb_copy_to_linear_data_offset(skb, sizeof(struct rss_header) + + sizeof(struct cpl_pass_accept_req), + gl->va + pktshift, + gl->tot_len - pktshift); + return skb; +} + +static inline int recv_rx_pkt(struct c4iw_dev *dev, const struct pkt_gl *gl, + const __be64 *rsp) +{ + unsigned int opcode = *(u8 *)rsp; + struct sk_buff *skb; + + if (opcode != CPL_RX_PKT) + goto out; + + skb = copy_gl_to_skb_pkt(gl , rsp, dev->rdev.lldi.sge_pktshift); + if (skb == NULL) + goto out; + + if (c4iw_handlers[opcode] == NULL) { + pr_info("%s no handler opcode 0x%x...\n", __func__, + opcode); + kfree_skb(skb); + goto out; + } + c4iw_handlers[opcode](dev, skb); + return 1; +out: + return 0; +} + static int c4iw_uld_rx_handler(void *handle, const __be64 *rsp, const struct pkt_gl *gl) { struct uld_ctx *ctx = handle; struct c4iw_dev *dev = ctx->dev; struct sk_buff *skb; - const struct cpl_act_establish *rpl; - unsigned int opcode; + u8 opcode; if (gl == NULL) { /* omit RSS and rsp_ctrl at end of descriptor */ @@ -601,19 +789,29 @@ static int c4iw_uld_rx_handler(void *handle, const __be64 *rsp, u32 qid = be32_to_cpu(rc->pldbuflen_qid); c4iw_ev_handler(dev, qid); return 0; + } else if (unlikely(*(u8 *)rsp != *(u8 *)gl->va)) { + if (recv_rx_pkt(dev, gl, rsp)) + return 0; + + pr_info("%s: unexpected FL contents at %p, " \ + "RSS %#llx, FL %#llx, len %u\n", + pci_name(ctx->lldi.pdev), gl->va, + (unsigned long long)be64_to_cpu(*rsp), + (unsigned long long)be64_to_cpu(*(u64 *)gl->va), + gl->tot_len); + + return 0; } else { skb = cxgb4_pktgl_to_skb(gl, 128, 128); if (unlikely(!skb)) goto nomem; } - rpl = cplhdr(skb); - opcode = rpl->ot.opcode; - + opcode = *(u8 *)rsp; if (c4iw_handlers[opcode]) c4iw_handlers[opcode](dev, skb); else - printk(KERN_INFO "%s no handler opcode 0x%x...\n", __func__, + pr_info("%s no handler opcode 0x%x...\n", __func__, opcode); return 0; diff --git a/drivers/infiniband/hw/cxgb4/iw_cxgb4.h b/drivers/infiniband/hw/cxgb4/iw_cxgb4.h index 9beb3a9..9c1644f 100644 --- a/drivers/infiniband/hw/cxgb4/iw_cxgb4.h +++ b/drivers/infiniband/hw/cxgb4/iw_cxgb4.h @@ -130,6 +130,9 @@ struct c4iw_stats { u64 db_empty; u64 db_drop; u64 db_state_transitions; + u64 tcam_full; + u64 act_ofld_conn_fails; + u64 pas_ofld_conn_fails; }; struct c4iw_rdev { @@ -223,6 +226,9 @@ struct c4iw_dev { struct dentry *debugfs_root; enum db_state db_state; int qpcnt; + struct idr hwtid_idr; + struct idr atid_idr; + struct idr stid_idr; }; static inline struct c4iw_dev *to_c4iw_dev(struct ib_device *ibdev) @@ -712,6 +718,31 @@ enum c4iw_ep_flags { CLOSE_SENT = 3, }; +enum c4iw_ep_history { + ACT_OPEN_REQ = 0, + ACT_OFLD_CONN = 1, + ACT_OPEN_RPL = 2, + ACT_ESTAB = 3, + PASS_ACCEPT_REQ = 4, + PASS_ESTAB = 5, + ABORT_UPCALL = 6, + ESTAB_UPCALL = 7, + CLOSE_UPCALL = 8, + ULP_ACCEPT = 9, + ULP_REJECT = 10, + TIMEDOUT = 11, + PEER_ABORT = 12, + PEER_CLOSE = 13, + CONNREQ_UPCALL = 14, + ABORT_CONN = 15, + DISCONN_UPCALL = 16, + EP_DISC_CLOSE = 17, + EP_DISC_ABORT = 18, + CONN_RPL_UPCALL = 19, + ACT_RETRY_NOMEM = 20, + ACT_RETRY_INUSE = 21 +}; + struct c4iw_ep_common { struct iw_cm_id *cm_id; struct c4iw_qp *qp; @@ -723,6 +754,7 @@ struct c4iw_ep_common { struct sockaddr_in remote_addr; struct c4iw_wr_wait wr_wait; unsigned long flags; + unsigned long history; }; struct c4iw_listen_ep { @@ -760,6 +792,7 @@ struct c4iw_ep { u8 tos; u8 retry_with_mpa_v1; u8 tried_with_mpa_v1; + unsigned int retry_count; }; static inline struct c4iw_ep *to_ep(struct iw_cm_id *cm_id) diff --git a/drivers/infiniband/ulp/ipoib/ipoib_cm.c b/drivers/infiniband/ulp/ipoib/ipoib_cm.c index 72ae63f..03103d2 100644 --- a/drivers/infiniband/ulp/ipoib/ipoib_cm.c +++ b/drivers/infiniband/ulp/ipoib/ipoib_cm.c @@ -752,6 +752,9 @@ void ipoib_cm_send(struct net_device *dev, struct sk_buff *skb, struct ipoib_cm_ dev->trans_start = jiffies; ++tx->tx_head; + skb_orphan(skb); + skb_dst_drop(skb); + if (++priv->tx_outstanding == ipoib_sendq_size) { ipoib_dbg(priv, "TX ring 0x%x full, stopping kernel net queue\n", tx->qp->qp_num); diff --git a/drivers/infiniband/ulp/ipoib/ipoib_ib.c b/drivers/infiniband/ulp/ipoib/ipoib_ib.c index f10221f..a1bca70 100644 --- a/drivers/infiniband/ulp/ipoib/ipoib_ib.c +++ b/drivers/infiniband/ulp/ipoib/ipoib_ib.c @@ -615,8 +615,9 @@ void ipoib_send(struct net_device *dev, struct sk_buff *skb, address->last_send = priv->tx_head; ++priv->tx_head; - skb_orphan(skb); + skb_orphan(skb); + skb_dst_drop(skb); } if (unlikely(priv->tx_outstanding > MAX_SEND_CQE)) diff --git a/drivers/md/dm-bio-prison.c b/drivers/md/dm-bio-prison.c index e4e8415..aefb78e 100644 --- a/drivers/md/dm-bio-prison.c +++ b/drivers/md/dm-bio-prison.c @@ -208,31 +208,6 @@ void dm_cell_release(struct dm_bio_prison_cell *cell, struct bio_list *bios) EXPORT_SYMBOL_GPL(dm_cell_release); /* - * There are a couple of places where we put a bio into a cell briefly - * before taking it out again. In these situations we know that no other - * bio may be in the cell. This function releases the cell, and also does - * a sanity check. - */ -static void __cell_release_singleton(struct dm_bio_prison_cell *cell, struct bio *bio) -{ - BUG_ON(cell->holder != bio); - BUG_ON(!bio_list_empty(&cell->bios)); - - __cell_release(cell, NULL); -} - -void dm_cell_release_singleton(struct dm_bio_prison_cell *cell, struct bio *bio) -{ - unsigned long flags; - struct dm_bio_prison *prison = cell->prison; - - spin_lock_irqsave(&prison->lock, flags); - __cell_release_singleton(cell, bio); - spin_unlock_irqrestore(&prison->lock, flags); -} -EXPORT_SYMBOL_GPL(dm_cell_release_singleton); - -/* * Sometimes we don't want the holder, just the additional bios. */ static void __cell_release_no_holder(struct dm_bio_prison_cell *cell, struct bio_list *inmates) diff --git a/drivers/md/dm-bio-prison.h b/drivers/md/dm-bio-prison.h index 4e0ac37..53d1a7a 100644 --- a/drivers/md/dm-bio-prison.h +++ b/drivers/md/dm-bio-prison.h @@ -44,7 +44,6 @@ int dm_bio_detain(struct dm_bio_prison *prison, struct dm_cell_key *key, struct bio *inmate, struct dm_bio_prison_cell **ref); void dm_cell_release(struct dm_bio_prison_cell *cell, struct bio_list *bios); -void dm_cell_release_singleton(struct dm_bio_prison_cell *cell, struct bio *bio); // FIXME: bio arg not needed void dm_cell_release_no_holder(struct dm_bio_prison_cell *cell, struct bio_list *inmates); void dm_cell_error(struct dm_bio_prison_cell *cell); diff --git a/drivers/md/dm-crypt.c b/drivers/md/dm-crypt.c index bbf459b..f7369f9 100644 --- a/drivers/md/dm-crypt.c +++ b/drivers/md/dm-crypt.c @@ -1689,8 +1689,7 @@ bad: return ret; } -static int crypt_map(struct dm_target *ti, struct bio *bio, - union map_info *map_context) +static int crypt_map(struct dm_target *ti, struct bio *bio) { struct dm_crypt_io *io; struct crypt_config *cc = ti->private; @@ -1846,7 +1845,7 @@ static int crypt_iterate_devices(struct dm_target *ti, static struct target_type crypt_target = { .name = "crypt", - .version = {1, 11, 0}, + .version = {1, 12, 0}, .module = THIS_MODULE, .ctr = crypt_ctr, .dtr = crypt_dtr, diff --git a/drivers/md/dm-delay.c b/drivers/md/dm-delay.c index f53846f..cc1bd04 100644 --- a/drivers/md/dm-delay.c +++ b/drivers/md/dm-delay.c @@ -274,8 +274,7 @@ static void delay_resume(struct dm_target *ti) atomic_set(&dc->may_delay, 1); } -static int delay_map(struct dm_target *ti, struct bio *bio, - union map_info *map_context) +static int delay_map(struct dm_target *ti, struct bio *bio) { struct delay_c *dc = ti->private; @@ -338,7 +337,7 @@ out: static struct target_type delay_target = { .name = "delay", - .version = {1, 1, 0}, + .version = {1, 2, 0}, .module = THIS_MODULE, .ctr = delay_ctr, .dtr = delay_dtr, diff --git a/drivers/md/dm-flakey.c b/drivers/md/dm-flakey.c index cc15543..9721f2f 100644 --- a/drivers/md/dm-flakey.c +++ b/drivers/md/dm-flakey.c @@ -39,6 +39,10 @@ enum feature_flag_bits { DROP_WRITES }; +struct per_bio_data { + bool bio_submitted; +}; + static int parse_features(struct dm_arg_set *as, struct flakey_c *fc, struct dm_target *ti) { @@ -214,6 +218,7 @@ static int flakey_ctr(struct dm_target *ti, unsigned int argc, char **argv) ti->num_flush_requests = 1; ti->num_discard_requests = 1; + ti->per_bio_data_size = sizeof(struct per_bio_data); ti->private = fc; return 0; @@ -265,11 +270,12 @@ static void corrupt_bio_data(struct bio *bio, struct flakey_c *fc) } } -static int flakey_map(struct dm_target *ti, struct bio *bio, - union map_info *map_context) +static int flakey_map(struct dm_target *ti, struct bio *bio) { struct flakey_c *fc = ti->private; unsigned elapsed; + struct per_bio_data *pb = dm_per_bio_data(bio, sizeof(struct per_bio_data)); + pb->bio_submitted = false; /* Are we alive ? */ elapsed = (jiffies - fc->start_time) / HZ; @@ -277,7 +283,7 @@ static int flakey_map(struct dm_target *ti, struct bio *bio, /* * Flag this bio as submitted while down. */ - map_context->ll = 1; + pb->bio_submitted = true; /* * Map reads as normal. @@ -314,17 +320,16 @@ map_bio: return DM_MAPIO_REMAPPED; } -static int flakey_end_io(struct dm_target *ti, struct bio *bio, - int error, union map_info *map_context) +static int flakey_end_io(struct dm_target *ti, struct bio *bio, int error) { struct flakey_c *fc = ti->private; - unsigned bio_submitted_while_down = map_context->ll; + struct per_bio_data *pb = dm_per_bio_data(bio, sizeof(struct per_bio_data)); /* * Corrupt successful READs while in down state. * If flags were specified, only corrupt those that match. */ - if (fc->corrupt_bio_byte && !error && bio_submitted_while_down && + if (fc->corrupt_bio_byte && !error && pb->bio_submitted && (bio_data_dir(bio) == READ) && (fc->corrupt_bio_rw == READ) && all_corrupt_bio_flags_match(bio, fc)) corrupt_bio_data(bio, fc); @@ -406,7 +411,7 @@ static int flakey_iterate_devices(struct dm_target *ti, iterate_devices_callout_ static struct target_type flakey_target = { .name = "flakey", - .version = {1, 2, 0}, + .version = {1, 3, 0}, .module = THIS_MODULE, .ctr = flakey_ctr, .dtr = flakey_dtr, diff --git a/drivers/md/dm-io.c b/drivers/md/dm-io.c index 1c46f97..ea49834 100644 --- a/drivers/md/dm-io.c +++ b/drivers/md/dm-io.c @@ -287,7 +287,8 @@ static void do_region(int rw, unsigned region, struct dm_io_region *where, unsigned num_bvecs; sector_t remaining = where->count; struct request_queue *q = bdev_get_queue(where->bdev); - sector_t discard_sectors; + unsigned short logical_block_size = queue_logical_block_size(q); + sector_t num_sectors; /* * where->count may be zero if rw holds a flush and we need to @@ -297,7 +298,7 @@ static void do_region(int rw, unsigned region, struct dm_io_region *where, /* * Allocate a suitably sized-bio. */ - if (rw & REQ_DISCARD) + if ((rw & REQ_DISCARD) || (rw & REQ_WRITE_SAME)) num_bvecs = 1; else num_bvecs = min_t(int, bio_get_nr_vecs(where->bdev), @@ -310,9 +311,21 @@ static void do_region(int rw, unsigned region, struct dm_io_region *where, store_io_and_region_in_bio(bio, io, region); if (rw & REQ_DISCARD) { - discard_sectors = min_t(sector_t, q->limits.max_discard_sectors, remaining); - bio->bi_size = discard_sectors << SECTOR_SHIFT; - remaining -= discard_sectors; + num_sectors = min_t(sector_t, q->limits.max_discard_sectors, remaining); + bio->bi_size = num_sectors << SECTOR_SHIFT; + remaining -= num_sectors; + } else if (rw & REQ_WRITE_SAME) { + /* + * WRITE SAME only uses a single page. + */ + dp->get_page(dp, &page, &len, &offset); + bio_add_page(bio, page, logical_block_size, offset); + num_sectors = min_t(sector_t, q->limits.max_write_same_sectors, remaining); + bio->bi_size = num_sectors << SECTOR_SHIFT; + + offset = 0; + remaining -= num_sectors; + dp->next_page(dp); } else while (remaining) { /* * Try and add as many pages as possible. diff --git a/drivers/md/dm-ioctl.c b/drivers/md/dm-ioctl.c index afd9598..0666b5d 100644 --- a/drivers/md/dm-ioctl.c +++ b/drivers/md/dm-ioctl.c @@ -1543,7 +1543,21 @@ static int check_version(unsigned int cmd, struct dm_ioctl __user *user) return r; } -static int copy_params(struct dm_ioctl __user *user, struct dm_ioctl **param) +#define DM_PARAMS_VMALLOC 0x0001 /* Params alloced with vmalloc not kmalloc */ +#define DM_WIPE_BUFFER 0x0010 /* Wipe input buffer before returning from ioctl */ + +static void free_params(struct dm_ioctl *param, size_t param_size, int param_flags) +{ + if (param_flags & DM_WIPE_BUFFER) + memset(param, 0, param_size); + + if (param_flags & DM_PARAMS_VMALLOC) + vfree(param); + else + kfree(param); +} + +static int copy_params(struct dm_ioctl __user *user, struct dm_ioctl **param, int *param_flags) { struct dm_ioctl tmp, *dmi; int secure_data; @@ -1556,7 +1570,21 @@ static int copy_params(struct dm_ioctl __user *user, struct dm_ioctl **param) secure_data = tmp.flags & DM_SECURE_DATA_FLAG; - dmi = vmalloc(tmp.data_size); + *param_flags = secure_data ? DM_WIPE_BUFFER : 0; + + /* + * Try to avoid low memory issues when a device is suspended. + * Use kmalloc() rather than vmalloc() when we can. + */ + dmi = NULL; + if (tmp.data_size <= KMALLOC_MAX_SIZE) + dmi = kmalloc(tmp.data_size, GFP_NOIO | __GFP_NORETRY | __GFP_NOMEMALLOC | __GFP_NOWARN); + + if (!dmi) { + dmi = __vmalloc(tmp.data_size, GFP_NOIO | __GFP_REPEAT | __GFP_HIGH, PAGE_KERNEL); + *param_flags |= DM_PARAMS_VMALLOC; + } + if (!dmi) { if (secure_data && clear_user(user, tmp.data_size)) return -EFAULT; @@ -1566,6 +1594,14 @@ static int copy_params(struct dm_ioctl __user *user, struct dm_ioctl **param) if (copy_from_user(dmi, user, tmp.data_size)) goto bad; + /* + * Abort if something changed the ioctl data while it was being copied. + */ + if (dmi->data_size != tmp.data_size) { + DMERR("rejecting ioctl: data size modified while processing parameters"); + goto bad; + } + /* Wipe the user buffer so we do not return it to userspace */ if (secure_data && clear_user(user, tmp.data_size)) goto bad; @@ -1574,9 +1610,8 @@ static int copy_params(struct dm_ioctl __user *user, struct dm_ioctl **param) return 0; bad: - if (secure_data) - memset(dmi, 0, tmp.data_size); - vfree(dmi); + free_params(dmi, tmp.data_size, *param_flags); + return -EFAULT; } @@ -1613,7 +1648,7 @@ static int validate_params(uint cmd, struct dm_ioctl *param) static int ctl_ioctl(uint command, struct dm_ioctl __user *user) { int r = 0; - int wipe_buffer; + int param_flags; unsigned int cmd; struct dm_ioctl *uninitialized_var(param); ioctl_fn fn = NULL; @@ -1649,24 +1684,14 @@ static int ctl_ioctl(uint command, struct dm_ioctl __user *user) } /* - * Trying to avoid low memory issues when a device is - * suspended. - */ - current->flags |= PF_MEMALLOC; - - /* * Copy the parameters into kernel space. */ - r = copy_params(user, ¶m); - - current->flags &= ~PF_MEMALLOC; + r = copy_params(user, ¶m, ¶m_flags); if (r) return r; input_param_size = param->data_size; - wipe_buffer = param->flags & DM_SECURE_DATA_FLAG; - r = validate_params(cmd, param); if (r) goto out; @@ -1681,10 +1706,7 @@ static int ctl_ioctl(uint command, struct dm_ioctl __user *user) r = -EFAULT; out: - if (wipe_buffer) - memset(param, 0, input_param_size); - - vfree(param); + free_params(param, input_param_size, param_flags); return r; } diff --git a/drivers/md/dm-kcopyd.c b/drivers/md/dm-kcopyd.c index bed444c..68c0267 100644 --- a/drivers/md/dm-kcopyd.c +++ b/drivers/md/dm-kcopyd.c @@ -349,7 +349,7 @@ static void complete_io(unsigned long error, void *context) struct dm_kcopyd_client *kc = job->kc; if (error) { - if (job->rw == WRITE) + if (job->rw & WRITE) job->write_err |= error; else job->read_err = 1; @@ -361,7 +361,7 @@ static void complete_io(unsigned long error, void *context) } } - if (job->rw == WRITE) + if (job->rw & WRITE) push(&kc->complete_jobs, job); else { @@ -432,7 +432,7 @@ static int process_jobs(struct list_head *jobs, struct dm_kcopyd_client *kc, if (r < 0) { /* error this rogue job */ - if (job->rw == WRITE) + if (job->rw & WRITE) job->write_err = (unsigned long) -1L; else job->read_err = 1; @@ -585,6 +585,7 @@ int dm_kcopyd_copy(struct dm_kcopyd_client *kc, struct dm_io_region *from, unsigned int flags, dm_kcopyd_notify_fn fn, void *context) { struct kcopyd_job *job; + int i; /* * Allocate an array of jobs consisting of one master job @@ -611,7 +612,16 @@ int dm_kcopyd_copy(struct dm_kcopyd_client *kc, struct dm_io_region *from, memset(&job->source, 0, sizeof job->source); job->source.count = job->dests[0].count; job->pages = &zero_page_list; - job->rw = WRITE; + + /* + * Use WRITE SAME to optimize zeroing if all dests support it. + */ + job->rw = WRITE | REQ_WRITE_SAME; + for (i = 0; i < job->num_dests; i++) + if (!bdev_write_same(job->dests[i].bdev)) { + job->rw = WRITE; + break; + } } job->fn = fn; diff --git a/drivers/md/dm-linear.c b/drivers/md/dm-linear.c index 1bf19a9..328cad5 100644 --- a/drivers/md/dm-linear.c +++ b/drivers/md/dm-linear.c @@ -55,6 +55,7 @@ static int linear_ctr(struct dm_target *ti, unsigned int argc, char **argv) ti->num_flush_requests = 1; ti->num_discard_requests = 1; + ti->num_write_same_requests = 1; ti->private = lc; return 0; @@ -87,8 +88,7 @@ static void linear_map_bio(struct dm_target *ti, struct bio *bio) bio->bi_sector = linear_map_sector(ti, bio->bi_sector); } -static int linear_map(struct dm_target *ti, struct bio *bio, - union map_info *map_context) +static int linear_map(struct dm_target *ti, struct bio *bio) { linear_map_bio(ti, bio); @@ -155,7 +155,7 @@ static int linear_iterate_devices(struct dm_target *ti, static struct target_type linear_target = { .name = "linear", - .version = {1, 1, 0}, + .version = {1, 2, 0}, .module = THIS_MODULE, .ctr = linear_ctr, .dtr = linear_dtr, diff --git a/drivers/md/dm-raid.c b/drivers/md/dm-raid.c index 45d94a7..3d8984e 100644 --- a/drivers/md/dm-raid.c +++ b/drivers/md/dm-raid.c @@ -295,9 +295,11 @@ static int validate_region_size(struct raid_set *rs, unsigned long region_size) * Choose a reasonable default. All figures in sectors. */ if (min_region_size > (1 << 13)) { + /* If not a power of 2, make it the next power of 2 */ + if (min_region_size & (min_region_size - 1)) + region_size = 1 << fls(region_size); DMINFO("Choosing default region size of %lu sectors", region_size); - region_size = min_region_size; } else { DMINFO("Choosing default region size of 4MiB"); region_size = 1 << 13; /* sectors */ @@ -1216,7 +1218,7 @@ static void raid_dtr(struct dm_target *ti) context_free(rs); } -static int raid_map(struct dm_target *ti, struct bio *bio, union map_info *map_context) +static int raid_map(struct dm_target *ti, struct bio *bio) { struct raid_set *rs = ti->private; struct mddev *mddev = &rs->md; @@ -1430,7 +1432,7 @@ static void raid_resume(struct dm_target *ti) static struct target_type raid_target = { .name = "raid", - .version = {1, 3, 1}, + .version = {1, 4, 0}, .module = THIS_MODULE, .ctr = raid_ctr, .dtr = raid_dtr, diff --git a/drivers/md/dm-raid1.c b/drivers/md/dm-raid1.c index fd61f98..fa51918 100644 --- a/drivers/md/dm-raid1.c +++ b/drivers/md/dm-raid1.c @@ -61,7 +61,6 @@ struct mirror_set { struct dm_region_hash *rh; struct dm_kcopyd_client *kcopyd_client; struct dm_io_client *io_client; - mempool_t *read_record_pool; /* recovery */ region_t nr_regions; @@ -139,14 +138,13 @@ static void dispatch_bios(void *context, struct bio_list *bio_list) queue_bio(ms, bio, WRITE); } -#define MIN_READ_RECORDS 20 -struct dm_raid1_read_record { +struct dm_raid1_bio_record { struct mirror *m; + /* if details->bi_bdev == NULL, details were not saved */ struct dm_bio_details details; + region_t write_region; }; -static struct kmem_cache *_dm_raid1_read_record_cache; - /* * Every mirror should look like this one. */ @@ -876,19 +874,9 @@ static struct mirror_set *alloc_context(unsigned int nr_mirrors, atomic_set(&ms->suspend, 0); atomic_set(&ms->default_mirror, DEFAULT_MIRROR); - ms->read_record_pool = mempool_create_slab_pool(MIN_READ_RECORDS, - _dm_raid1_read_record_cache); - - if (!ms->read_record_pool) { - ti->error = "Error creating mirror read_record_pool"; - kfree(ms); - return NULL; - } - ms->io_client = dm_io_client_create(); if (IS_ERR(ms->io_client)) { ti->error = "Error creating dm_io client"; - mempool_destroy(ms->read_record_pool); kfree(ms); return NULL; } @@ -900,7 +888,6 @@ static struct mirror_set *alloc_context(unsigned int nr_mirrors, if (IS_ERR(ms->rh)) { ti->error = "Error creating dirty region hash"; dm_io_client_destroy(ms->io_client); - mempool_destroy(ms->read_record_pool); kfree(ms); return NULL; } @@ -916,7 +903,6 @@ static void free_context(struct mirror_set *ms, struct dm_target *ti, dm_io_client_destroy(ms->io_client); dm_region_hash_destroy(ms->rh); - mempool_destroy(ms->read_record_pool); kfree(ms); } @@ -1088,6 +1074,7 @@ static int mirror_ctr(struct dm_target *ti, unsigned int argc, char **argv) ti->num_flush_requests = 1; ti->num_discard_requests = 1; + ti->per_bio_data_size = sizeof(struct dm_raid1_bio_record); ti->discard_zeroes_data_unsupported = true; ms->kmirrord_wq = alloc_workqueue("kmirrord", @@ -1155,18 +1142,20 @@ static void mirror_dtr(struct dm_target *ti) /* * Mirror mapping function */ -static int mirror_map(struct dm_target *ti, struct bio *bio, - union map_info *map_context) +static int mirror_map(struct dm_target *ti, struct bio *bio) { int r, rw = bio_rw(bio); struct mirror *m; struct mirror_set *ms = ti->private; - struct dm_raid1_read_record *read_record = NULL; struct dm_dirty_log *log = dm_rh_dirty_log(ms->rh); + struct dm_raid1_bio_record *bio_record = + dm_per_bio_data(bio, sizeof(struct dm_raid1_bio_record)); + + bio_record->details.bi_bdev = NULL; if (rw == WRITE) { /* Save region for mirror_end_io() handler */ - map_context->ll = dm_rh_bio_to_region(ms->rh, bio); + bio_record->write_region = dm_rh_bio_to_region(ms->rh, bio); queue_bio(ms, bio, rw); return DM_MAPIO_SUBMITTED; } @@ -1194,33 +1183,29 @@ static int mirror_map(struct dm_target *ti, struct bio *bio, if (unlikely(!m)) return -EIO; - read_record = mempool_alloc(ms->read_record_pool, GFP_NOIO); - if (likely(read_record)) { - dm_bio_record(&read_record->details, bio); - map_context->ptr = read_record; - read_record->m = m; - } + dm_bio_record(&bio_record->details, bio); + bio_record->m = m; map_bio(m, bio); return DM_MAPIO_REMAPPED; } -static int mirror_end_io(struct dm_target *ti, struct bio *bio, - int error, union map_info *map_context) +static int mirror_end_io(struct dm_target *ti, struct bio *bio, int error) { int rw = bio_rw(bio); struct mirror_set *ms = (struct mirror_set *) ti->private; struct mirror *m = NULL; struct dm_bio_details *bd = NULL; - struct dm_raid1_read_record *read_record = map_context->ptr; + struct dm_raid1_bio_record *bio_record = + dm_per_bio_data(bio, sizeof(struct dm_raid1_bio_record)); /* * We need to dec pending if this was a write. */ if (rw == WRITE) { if (!(bio->bi_rw & (REQ_FLUSH | REQ_DISCARD))) - dm_rh_dec(ms->rh, map_context->ll); + dm_rh_dec(ms->rh, bio_record->write_region); return error; } @@ -1231,7 +1216,7 @@ static int mirror_end_io(struct dm_target *ti, struct bio *bio, goto out; if (unlikely(error)) { - if (!read_record) { + if (!bio_record->details.bi_bdev) { /* * There wasn't enough memory to record necessary * information for a retry or there was no other @@ -1241,7 +1226,7 @@ static int mirror_end_io(struct dm_target *ti, struct bio *bio, return -EIO; } - m = read_record->m; + m = bio_record->m; DMERR("Mirror read failed from %s. Trying alternative device.", m->dev->name); @@ -1253,22 +1238,18 @@ static int mirror_end_io(struct dm_target *ti, struct bio *bio, * mirror. */ if (default_ok(m) || mirror_available(ms, bio)) { - bd = &read_record->details; + bd = &bio_record->details; dm_bio_restore(bd, bio); - mempool_free(read_record, ms->read_record_pool); - map_context->ptr = NULL; + bio_record->details.bi_bdev = NULL; queue_bio(ms, bio, rw); - return 1; + return DM_ENDIO_INCOMPLETE; } DMERR("All replicated volumes dead, failing I/O"); } out: - if (read_record) { - mempool_free(read_record, ms->read_record_pool); - map_context->ptr = NULL; - } + bio_record->details.bi_bdev = NULL; return error; } @@ -1422,7 +1403,7 @@ static int mirror_iterate_devices(struct dm_target *ti, static struct target_type mirror_target = { .name = "mirror", - .version = {1, 12, 1}, + .version = {1, 13, 1}, .module = THIS_MODULE, .ctr = mirror_ctr, .dtr = mirror_dtr, @@ -1439,13 +1420,6 @@ static int __init dm_mirror_init(void) { int r; - _dm_raid1_read_record_cache = KMEM_CACHE(dm_raid1_read_record, 0); - if (!_dm_raid1_read_record_cache) { - DMERR("Can't allocate dm_raid1_read_record cache"); - r = -ENOMEM; - goto bad_cache; - } - r = dm_register_target(&mirror_target); if (r < 0) { DMERR("Failed to register mirror target"); @@ -1455,15 +1429,12 @@ static int __init dm_mirror_init(void) return 0; bad_target: - kmem_cache_destroy(_dm_raid1_read_record_cache); -bad_cache: return r; } static void __exit dm_mirror_exit(void) { dm_unregister_target(&mirror_target); - kmem_cache_destroy(_dm_raid1_read_record_cache); } /* Module hooks */ diff --git a/drivers/md/dm-snap.c b/drivers/md/dm-snap.c index a143921..59fc18a 100644 --- a/drivers/md/dm-snap.c +++ b/drivers/md/dm-snap.c @@ -79,7 +79,6 @@ struct dm_snapshot { /* Chunks with outstanding reads */ spinlock_t tracked_chunk_lock; - mempool_t *tracked_chunk_pool; struct hlist_head tracked_chunk_hash[DM_TRACKED_CHUNK_HASH_SIZE]; /* The on disk metadata handler */ @@ -191,35 +190,38 @@ struct dm_snap_tracked_chunk { chunk_t chunk; }; -static struct kmem_cache *tracked_chunk_cache; +static void init_tracked_chunk(struct bio *bio) +{ + struct dm_snap_tracked_chunk *c = dm_per_bio_data(bio, sizeof(struct dm_snap_tracked_chunk)); + INIT_HLIST_NODE(&c->node); +} -static struct dm_snap_tracked_chunk *track_chunk(struct dm_snapshot *s, - chunk_t chunk) +static bool is_bio_tracked(struct bio *bio) { - struct dm_snap_tracked_chunk *c = mempool_alloc(s->tracked_chunk_pool, - GFP_NOIO); - unsigned long flags; + struct dm_snap_tracked_chunk *c = dm_per_bio_data(bio, sizeof(struct dm_snap_tracked_chunk)); + return !hlist_unhashed(&c->node); +} + +static void track_chunk(struct dm_snapshot *s, struct bio *bio, chunk_t chunk) +{ + struct dm_snap_tracked_chunk *c = dm_per_bio_data(bio, sizeof(struct dm_snap_tracked_chunk)); c->chunk = chunk; - spin_lock_irqsave(&s->tracked_chunk_lock, flags); + spin_lock_irq(&s->tracked_chunk_lock); hlist_add_head(&c->node, &s->tracked_chunk_hash[DM_TRACKED_CHUNK_HASH(chunk)]); - spin_unlock_irqrestore(&s->tracked_chunk_lock, flags); - - return c; + spin_unlock_irq(&s->tracked_chunk_lock); } -static void stop_tracking_chunk(struct dm_snapshot *s, - struct dm_snap_tracked_chunk *c) +static void stop_tracking_chunk(struct dm_snapshot *s, struct bio *bio) { + struct dm_snap_tracked_chunk *c = dm_per_bio_data(bio, sizeof(struct dm_snap_tracked_chunk)); unsigned long flags; spin_lock_irqsave(&s->tracked_chunk_lock, flags); hlist_del(&c->node); spin_unlock_irqrestore(&s->tracked_chunk_lock, flags); - - mempool_free(c, s->tracked_chunk_pool); } static int __chunk_is_tracked(struct dm_snapshot *s, chunk_t chunk) @@ -1120,14 +1122,6 @@ static int snapshot_ctr(struct dm_target *ti, unsigned int argc, char **argv) goto bad_pending_pool; } - s->tracked_chunk_pool = mempool_create_slab_pool(MIN_IOS, - tracked_chunk_cache); - if (!s->tracked_chunk_pool) { - ti->error = "Could not allocate tracked_chunk mempool for " - "tracking reads"; - goto bad_tracked_chunk_pool; - } - for (i = 0; i < DM_TRACKED_CHUNK_HASH_SIZE; i++) INIT_HLIST_HEAD(&s->tracked_chunk_hash[i]); @@ -1135,6 +1129,7 @@ static int snapshot_ctr(struct dm_target *ti, unsigned int argc, char **argv) ti->private = s; ti->num_flush_requests = num_flush_requests; + ti->per_bio_data_size = sizeof(struct dm_snap_tracked_chunk); /* Add snapshot to the list of snapshots for this origin */ /* Exceptions aren't triggered till snapshot_resume() is called */ @@ -1183,9 +1178,6 @@ bad_read_metadata: unregister_snapshot(s); bad_load_and_register: - mempool_destroy(s->tracked_chunk_pool); - -bad_tracked_chunk_pool: mempool_destroy(s->pending_pool); bad_pending_pool: @@ -1290,8 +1282,6 @@ static void snapshot_dtr(struct dm_target *ti) BUG_ON(!hlist_empty(&s->tracked_chunk_hash[i])); #endif - mempool_destroy(s->tracked_chunk_pool); - __free_exceptions(s); mempool_destroy(s->pending_pool); @@ -1577,8 +1567,7 @@ static void remap_exception(struct dm_snapshot *s, struct dm_exception *e, s->store->chunk_mask); } -static int snapshot_map(struct dm_target *ti, struct bio *bio, - union map_info *map_context) +static int snapshot_map(struct dm_target *ti, struct bio *bio) { struct dm_exception *e; struct dm_snapshot *s = ti->private; @@ -1586,6 +1575,8 @@ static int snapshot_map(struct dm_target *ti, struct bio *bio, chunk_t chunk; struct dm_snap_pending_exception *pe = NULL; + init_tracked_chunk(bio); + if (bio->bi_rw & REQ_FLUSH) { bio->bi_bdev = s->cow->bdev; return DM_MAPIO_REMAPPED; @@ -1670,7 +1661,7 @@ static int snapshot_map(struct dm_target *ti, struct bio *bio, } } else { bio->bi_bdev = s->origin->bdev; - map_context->ptr = track_chunk(s, chunk); + track_chunk(s, bio, chunk); } out_unlock: @@ -1691,20 +1682,20 @@ out: * If merging is currently taking place on the chunk in question, the * I/O is deferred by adding it to s->bios_queued_during_merge. */ -static int snapshot_merge_map(struct dm_target *ti, struct bio *bio, - union map_info *map_context) +static int snapshot_merge_map(struct dm_target *ti, struct bio *bio) { struct dm_exception *e; struct dm_snapshot *s = ti->private; int r = DM_MAPIO_REMAPPED; chunk_t chunk; + init_tracked_chunk(bio); + if (bio->bi_rw & REQ_FLUSH) { - if (!map_context->target_request_nr) + if (!dm_bio_get_target_request_nr(bio)) bio->bi_bdev = s->origin->bdev; else bio->bi_bdev = s->cow->bdev; - map_context->ptr = NULL; return DM_MAPIO_REMAPPED; } @@ -1733,7 +1724,7 @@ static int snapshot_merge_map(struct dm_target *ti, struct bio *bio, remap_exception(s, e, bio, chunk); if (bio_rw(bio) == WRITE) - map_context->ptr = track_chunk(s, chunk); + track_chunk(s, bio, chunk); goto out_unlock; } @@ -1751,14 +1742,12 @@ out_unlock: return r; } -static int snapshot_end_io(struct dm_target *ti, struct bio *bio, - int error, union map_info *map_context) +static int snapshot_end_io(struct dm_target *ti, struct bio *bio, int error) { struct dm_snapshot *s = ti->private; - struct dm_snap_tracked_chunk *c = map_context->ptr; - if (c) - stop_tracking_chunk(s, c); + if (is_bio_tracked(bio)) + stop_tracking_chunk(s, bio); return 0; } @@ -2127,8 +2116,7 @@ static void origin_dtr(struct dm_target *ti) dm_put_device(ti, dev); } -static int origin_map(struct dm_target *ti, struct bio *bio, - union map_info *map_context) +static int origin_map(struct dm_target *ti, struct bio *bio) { struct dm_dev *dev = ti->private; bio->bi_bdev = dev->bdev; @@ -2193,7 +2181,7 @@ static int origin_iterate_devices(struct dm_target *ti, static struct target_type origin_target = { .name = "snapshot-origin", - .version = {1, 7, 1}, + .version = {1, 8, 0}, .module = THIS_MODULE, .ctr = origin_ctr, .dtr = origin_dtr, @@ -2206,7 +2194,7 @@ static struct target_type origin_target = { static struct target_type snapshot_target = { .name = "snapshot", - .version = {1, 10, 0}, + .version = {1, 11, 0}, .module = THIS_MODULE, .ctr = snapshot_ctr, .dtr = snapshot_dtr, @@ -2220,7 +2208,7 @@ static struct target_type snapshot_target = { static struct target_type merge_target = { .name = dm_snapshot_merge_target_name, - .version = {1, 1, 0}, + .version = {1, 2, 0}, .module = THIS_MODULE, .ctr = snapshot_ctr, .dtr = snapshot_dtr, @@ -2281,17 +2269,8 @@ static int __init dm_snapshot_init(void) goto bad_pending_cache; } - tracked_chunk_cache = KMEM_CACHE(dm_snap_tracked_chunk, 0); - if (!tracked_chunk_cache) { - DMERR("Couldn't create cache to track chunks in use."); - r = -ENOMEM; - goto bad_tracked_chunk_cache; - } - return 0; -bad_tracked_chunk_cache: - kmem_cache_destroy(pending_cache); bad_pending_cache: kmem_cache_destroy(exception_cache); bad_exception_cache: @@ -2317,7 +2296,6 @@ static void __exit dm_snapshot_exit(void) exit_origin_hash(); kmem_cache_destroy(pending_cache); kmem_cache_destroy(exception_cache); - kmem_cache_destroy(tracked_chunk_cache); dm_exception_store_exit(); } diff --git a/drivers/md/dm-stripe.c b/drivers/md/dm-stripe.c index e2f87653..c89cde8 100644 --- a/drivers/md/dm-stripe.c +++ b/drivers/md/dm-stripe.c @@ -162,6 +162,7 @@ static int stripe_ctr(struct dm_target *ti, unsigned int argc, char **argv) ti->num_flush_requests = stripes; ti->num_discard_requests = stripes; + ti->num_write_same_requests = stripes; sc->chunk_size = chunk_size; if (chunk_size & (chunk_size - 1)) @@ -251,8 +252,8 @@ static void stripe_map_range_sector(struct stripe_c *sc, sector_t sector, *result += sc->chunk_size; /* next chunk */ } -static int stripe_map_discard(struct stripe_c *sc, struct bio *bio, - uint32_t target_stripe) +static int stripe_map_range(struct stripe_c *sc, struct bio *bio, + uint32_t target_stripe) { sector_t begin, end; @@ -271,23 +272,23 @@ static int stripe_map_discard(struct stripe_c *sc, struct bio *bio, } } -static int stripe_map(struct dm_target *ti, struct bio *bio, - union map_info *map_context) +static int stripe_map(struct dm_target *ti, struct bio *bio) { struct stripe_c *sc = ti->private; uint32_t stripe; unsigned target_request_nr; if (bio->bi_rw & REQ_FLUSH) { - target_request_nr = map_context->target_request_nr; + target_request_nr = dm_bio_get_target_request_nr(bio); BUG_ON(target_request_nr >= sc->stripes); bio->bi_bdev = sc->stripe[target_request_nr].dev->bdev; return DM_MAPIO_REMAPPED; } - if (unlikely(bio->bi_rw & REQ_DISCARD)) { - target_request_nr = map_context->target_request_nr; + if (unlikely(bio->bi_rw & REQ_DISCARD) || + unlikely(bio->bi_rw & REQ_WRITE_SAME)) { + target_request_nr = dm_bio_get_target_request_nr(bio); BUG_ON(target_request_nr >= sc->stripes); - return stripe_map_discard(sc, bio, target_request_nr); + return stripe_map_range(sc, bio, target_request_nr); } stripe_map_sector(sc, bio->bi_sector, &stripe, &bio->bi_sector); @@ -342,8 +343,7 @@ static int stripe_status(struct dm_target *ti, status_type_t type, return 0; } -static int stripe_end_io(struct dm_target *ti, struct bio *bio, - int error, union map_info *map_context) +static int stripe_end_io(struct dm_target *ti, struct bio *bio, int error) { unsigned i; char major_minor[16]; diff --git a/drivers/md/dm-table.c b/drivers/md/dm-table.c index 100368e..daf25d0 100644 --- a/drivers/md/dm-table.c +++ b/drivers/md/dm-table.c @@ -967,13 +967,22 @@ bool dm_table_request_based(struct dm_table *t) int dm_table_alloc_md_mempools(struct dm_table *t) { unsigned type = dm_table_get_type(t); + unsigned per_bio_data_size = 0; + struct dm_target *tgt; + unsigned i; if (unlikely(type == DM_TYPE_NONE)) { DMWARN("no table type is set, can't allocate mempools"); return -EINVAL; } - t->mempools = dm_alloc_md_mempools(type, t->integrity_supported); + if (type == DM_TYPE_BIO_BASED) + for (i = 0; i < t->num_targets; i++) { + tgt = t->targets + i; + per_bio_data_size = max(per_bio_data_size, tgt->per_bio_data_size); + } + + t->mempools = dm_alloc_md_mempools(type, t->integrity_supported, per_bio_data_size); if (!t->mempools) return -ENOMEM; @@ -1414,6 +1423,33 @@ static bool dm_table_all_devices_attribute(struct dm_table *t, return 1; } +static int device_not_write_same_capable(struct dm_target *ti, struct dm_dev *dev, + sector_t start, sector_t len, void *data) +{ + struct request_queue *q = bdev_get_queue(dev->bdev); + + return q && !q->limits.max_write_same_sectors; +} + +static bool dm_table_supports_write_same(struct dm_table *t) +{ + struct dm_target *ti; + unsigned i = 0; + + while (i < dm_table_get_num_targets(t)) { + ti = dm_table_get_target(t, i++); + + if (!ti->num_write_same_requests) + return false; + + if (!ti->type->iterate_devices || + !ti->type->iterate_devices(ti, device_not_write_same_capable, NULL)) + return false; + } + + return true; +} + void dm_table_set_restrictions(struct dm_table *t, struct request_queue *q, struct queue_limits *limits) { @@ -1445,6 +1481,9 @@ void dm_table_set_restrictions(struct dm_table *t, struct request_queue *q, else queue_flag_clear_unlocked(QUEUE_FLAG_NONROT, q); + if (!dm_table_supports_write_same(t)) + q->limits.max_write_same_sectors = 0; + dm_table_set_integrity(t); /* diff --git a/drivers/md/dm-target.c b/drivers/md/dm-target.c index 8da366c..617d21a 100644 --- a/drivers/md/dm-target.c +++ b/drivers/md/dm-target.c @@ -126,15 +126,14 @@ static void io_err_dtr(struct dm_target *tt) /* empty */ } -static int io_err_map(struct dm_target *tt, struct bio *bio, - union map_info *map_context) +static int io_err_map(struct dm_target *tt, struct bio *bio) { return -EIO; } static struct target_type error_target = { .name = "error", - .version = {1, 0, 1}, + .version = {1, 1, 0}, .ctr = io_err_ctr, .dtr = io_err_dtr, .map = io_err_map, diff --git a/drivers/md/dm-thin-metadata.c b/drivers/md/dm-thin-metadata.c index 693e149..4d6e853 100644 --- a/drivers/md/dm-thin-metadata.c +++ b/drivers/md/dm-thin-metadata.c @@ -408,7 +408,7 @@ static void __setup_btree_details(struct dm_pool_metadata *pmd) pmd->tl_info.tm = pmd->tm; pmd->tl_info.levels = 1; - pmd->tl_info.value_type.context = &pmd->info; + pmd->tl_info.value_type.context = &pmd->bl_info; pmd->tl_info.value_type.size = sizeof(__le64); pmd->tl_info.value_type.inc = subtree_inc; pmd->tl_info.value_type.dec = subtree_dec; diff --git a/drivers/md/dm-thin.c b/drivers/md/dm-thin.c index 058acf3..675ae52 100644 --- a/drivers/md/dm-thin.c +++ b/drivers/md/dm-thin.c @@ -186,7 +186,6 @@ struct pool { struct dm_thin_new_mapping *next_mapping; mempool_t *mapping_pool; - mempool_t *endio_hook_pool; process_bio_fn process_bio; process_bio_fn process_discard; @@ -304,7 +303,7 @@ static void __requeue_bio_list(struct thin_c *tc, struct bio_list *master) bio_list_init(master); while ((bio = bio_list_pop(&bios))) { - struct dm_thin_endio_hook *h = dm_get_mapinfo(bio)->ptr; + struct dm_thin_endio_hook *h = dm_per_bio_data(bio, sizeof(struct dm_thin_endio_hook)); if (h->tc == tc) bio_endio(bio, DM_ENDIO_REQUEUE); @@ -368,6 +367,17 @@ static int bio_triggers_commit(struct thin_c *tc, struct bio *bio) dm_thin_changed_this_transaction(tc->td); } +static void inc_all_io_entry(struct pool *pool, struct bio *bio) +{ + struct dm_thin_endio_hook *h; + + if (bio->bi_rw & REQ_DISCARD) + return; + + h = dm_per_bio_data(bio, sizeof(struct dm_thin_endio_hook)); + h->all_io_entry = dm_deferred_entry_inc(pool->all_io_ds); +} + static void issue(struct thin_c *tc, struct bio *bio) { struct pool *pool = tc->pool; @@ -474,7 +484,7 @@ static void copy_complete(int read_err, unsigned long write_err, void *context) static void overwrite_endio(struct bio *bio, int err) { unsigned long flags; - struct dm_thin_endio_hook *h = dm_get_mapinfo(bio)->ptr; + struct dm_thin_endio_hook *h = dm_per_bio_data(bio, sizeof(struct dm_thin_endio_hook)); struct dm_thin_new_mapping *m = h->overwrite_mapping; struct pool *pool = m->tc->pool; @@ -499,8 +509,7 @@ static void overwrite_endio(struct bio *bio, int err) /* * This sends the bios in the cell back to the deferred_bios list. */ -static void cell_defer(struct thin_c *tc, struct dm_bio_prison_cell *cell, - dm_block_t data_block) +static void cell_defer(struct thin_c *tc, struct dm_bio_prison_cell *cell) { struct pool *pool = tc->pool; unsigned long flags; @@ -513,17 +522,13 @@ static void cell_defer(struct thin_c *tc, struct dm_bio_prison_cell *cell, } /* - * Same as cell_defer above, except it omits one particular detainee, - * a write bio that covers the block and has already been processed. + * Same as cell_defer except it omits the original holder of the cell. */ -static void cell_defer_except(struct thin_c *tc, struct dm_bio_prison_cell *cell) +static void cell_defer_no_holder(struct thin_c *tc, struct dm_bio_prison_cell *cell) { - struct bio_list bios; struct pool *pool = tc->pool; unsigned long flags; - bio_list_init(&bios); - spin_lock_irqsave(&pool->lock, flags); dm_cell_release_no_holder(cell, &pool->deferred_bios); spin_unlock_irqrestore(&pool->lock, flags); @@ -561,7 +566,7 @@ static void process_prepared_mapping(struct dm_thin_new_mapping *m) */ r = dm_thin_insert_block(tc->td, m->virt_block, m->data_block); if (r) { - DMERR("dm_thin_insert_block() failed"); + DMERR_LIMIT("dm_thin_insert_block() failed"); dm_cell_error(m->cell); goto out; } @@ -573,10 +578,10 @@ static void process_prepared_mapping(struct dm_thin_new_mapping *m) * the bios in the cell. */ if (bio) { - cell_defer_except(tc, m->cell); + cell_defer_no_holder(tc, m->cell); bio_endio(bio, 0); } else - cell_defer(tc, m->cell, m->data_block); + cell_defer(tc, m->cell); out: list_del(&m->list); @@ -588,8 +593,8 @@ static void process_prepared_discard_fail(struct dm_thin_new_mapping *m) struct thin_c *tc = m->tc; bio_io_error(m->bio); - cell_defer_except(tc, m->cell); - cell_defer_except(tc, m->cell2); + cell_defer_no_holder(tc, m->cell); + cell_defer_no_holder(tc, m->cell2); mempool_free(m, tc->pool->mapping_pool); } @@ -597,13 +602,15 @@ static void process_prepared_discard_passdown(struct dm_thin_new_mapping *m) { struct thin_c *tc = m->tc; + inc_all_io_entry(tc->pool, m->bio); + cell_defer_no_holder(tc, m->cell); + cell_defer_no_holder(tc, m->cell2); + if (m->pass_discard) remap_and_issue(tc, m->bio, m->data_block); else bio_endio(m->bio, 0); - cell_defer_except(tc, m->cell); - cell_defer_except(tc, m->cell2); mempool_free(m, tc->pool->mapping_pool); } @@ -614,7 +621,7 @@ static void process_prepared_discard(struct dm_thin_new_mapping *m) r = dm_thin_remove_block(tc->td, m->virt_block); if (r) - DMERR("dm_thin_remove_block() failed"); + DMERR_LIMIT("dm_thin_remove_block() failed"); process_prepared_discard_passdown(m); } @@ -706,11 +713,12 @@ static void schedule_copy(struct thin_c *tc, dm_block_t virt_block, * bio immediately. Otherwise we use kcopyd to clone the data first. */ if (io_overwrites_block(pool, bio)) { - struct dm_thin_endio_hook *h = dm_get_mapinfo(bio)->ptr; + struct dm_thin_endio_hook *h = dm_per_bio_data(bio, sizeof(struct dm_thin_endio_hook)); h->overwrite_mapping = m; m->bio = bio; save_and_set_endio(bio, &m->saved_bi_end_io, overwrite_endio); + inc_all_io_entry(pool, bio); remap_and_issue(tc, bio, data_dest); } else { struct dm_io_region from, to; @@ -727,7 +735,7 @@ static void schedule_copy(struct thin_c *tc, dm_block_t virt_block, 0, copy_complete, m); if (r < 0) { mempool_free(m, pool->mapping_pool); - DMERR("dm_kcopyd_copy() failed"); + DMERR_LIMIT("dm_kcopyd_copy() failed"); dm_cell_error(cell); } } @@ -775,11 +783,12 @@ static void schedule_zero(struct thin_c *tc, dm_block_t virt_block, process_prepared_mapping(m); else if (io_overwrites_block(pool, bio)) { - struct dm_thin_endio_hook *h = dm_get_mapinfo(bio)->ptr; + struct dm_thin_endio_hook *h = dm_per_bio_data(bio, sizeof(struct dm_thin_endio_hook)); h->overwrite_mapping = m; m->bio = bio; save_and_set_endio(bio, &m->saved_bi_end_io, overwrite_endio); + inc_all_io_entry(pool, bio); remap_and_issue(tc, bio, data_block); } else { int r; @@ -792,7 +801,7 @@ static void schedule_zero(struct thin_c *tc, dm_block_t virt_block, r = dm_kcopyd_zero(pool->copier, 1, &to, 0, copy_complete, m); if (r < 0) { mempool_free(m, pool->mapping_pool); - DMERR("dm_kcopyd_zero() failed"); + DMERR_LIMIT("dm_kcopyd_zero() failed"); dm_cell_error(cell); } } @@ -804,7 +813,7 @@ static int commit(struct pool *pool) r = dm_pool_commit_metadata(pool->pmd); if (r) - DMERR("commit failed, error = %d", r); + DMERR_LIMIT("commit failed: error = %d", r); return r; } @@ -889,7 +898,7 @@ static int alloc_data_block(struct thin_c *tc, dm_block_t *result) */ static void retry_on_resume(struct bio *bio) { - struct dm_thin_endio_hook *h = dm_get_mapinfo(bio)->ptr; + struct dm_thin_endio_hook *h = dm_per_bio_data(bio, sizeof(struct dm_thin_endio_hook)); struct thin_c *tc = h->tc; struct pool *pool = tc->pool; unsigned long flags; @@ -936,7 +945,7 @@ static void process_discard(struct thin_c *tc, struct bio *bio) */ build_data_key(tc->td, lookup_result.block, &key2); if (dm_bio_detain(tc->pool->prison, &key2, bio, &cell2)) { - dm_cell_release_singleton(cell, bio); + cell_defer_no_holder(tc, cell); break; } @@ -962,13 +971,15 @@ static void process_discard(struct thin_c *tc, struct bio *bio) wake_worker(pool); } } else { + inc_all_io_entry(pool, bio); + cell_defer_no_holder(tc, cell); + cell_defer_no_holder(tc, cell2); + /* * The DM core makes sure that the discard doesn't span * a block boundary. So we submit the discard of a * partial block appropriately. */ - dm_cell_release_singleton(cell, bio); - dm_cell_release_singleton(cell2, bio); if ((!lookup_result.shared) && pool->pf.discard_passdown) remap_and_issue(tc, bio, lookup_result.block); else @@ -980,13 +991,14 @@ static void process_discard(struct thin_c *tc, struct bio *bio) /* * It isn't provisioned, just forget it. */ - dm_cell_release_singleton(cell, bio); + cell_defer_no_holder(tc, cell); bio_endio(bio, 0); break; default: - DMERR("discard: find block unexpectedly returned %d", r); - dm_cell_release_singleton(cell, bio); + DMERR_LIMIT("%s: dm_thin_find_block() failed: error = %d", + __func__, r); + cell_defer_no_holder(tc, cell); bio_io_error(bio); break; } @@ -1012,7 +1024,8 @@ static void break_sharing(struct thin_c *tc, struct bio *bio, dm_block_t block, break; default: - DMERR("%s: alloc_data_block() failed, error = %d", __func__, r); + DMERR_LIMIT("%s: alloc_data_block() failed: error = %d", + __func__, r); dm_cell_error(cell); break; } @@ -1037,11 +1050,12 @@ static void process_shared_bio(struct thin_c *tc, struct bio *bio, if (bio_data_dir(bio) == WRITE && bio->bi_size) break_sharing(tc, bio, block, &key, lookup_result, cell); else { - struct dm_thin_endio_hook *h = dm_get_mapinfo(bio)->ptr; + struct dm_thin_endio_hook *h = dm_per_bio_data(bio, sizeof(struct dm_thin_endio_hook)); h->shared_read_entry = dm_deferred_entry_inc(pool->shared_read_ds); + inc_all_io_entry(pool, bio); + cell_defer_no_holder(tc, cell); - dm_cell_release_singleton(cell, bio); remap_and_issue(tc, bio, lookup_result->block); } } @@ -1056,7 +1070,9 @@ static void provision_block(struct thin_c *tc, struct bio *bio, dm_block_t block * Remap empty bios (flushes) immediately, without provisioning. */ if (!bio->bi_size) { - dm_cell_release_singleton(cell, bio); + inc_all_io_entry(tc->pool, bio); + cell_defer_no_holder(tc, cell); + remap_and_issue(tc, bio, 0); return; } @@ -1066,7 +1082,7 @@ static void provision_block(struct thin_c *tc, struct bio *bio, dm_block_t block */ if (bio_data_dir(bio) == READ) { zero_fill_bio(bio); - dm_cell_release_singleton(cell, bio); + cell_defer_no_holder(tc, cell); bio_endio(bio, 0); return; } @@ -1085,7 +1101,8 @@ static void provision_block(struct thin_c *tc, struct bio *bio, dm_block_t block break; default: - DMERR("%s: alloc_data_block() failed, error = %d", __func__, r); + DMERR_LIMIT("%s: alloc_data_block() failed: error = %d", + __func__, r); set_pool_mode(tc->pool, PM_READ_ONLY); dm_cell_error(cell); break; @@ -1111,34 +1128,31 @@ static void process_bio(struct thin_c *tc, struct bio *bio) r = dm_thin_find_block(tc->td, block, 1, &lookup_result); switch (r) { case 0: - /* - * We can release this cell now. This thread is the only - * one that puts bios into a cell, and we know there were - * no preceding bios. - */ - /* - * TODO: this will probably have to change when discard goes - * back in. - */ - dm_cell_release_singleton(cell, bio); - - if (lookup_result.shared) + if (lookup_result.shared) { process_shared_bio(tc, bio, block, &lookup_result); - else + cell_defer_no_holder(tc, cell); + } else { + inc_all_io_entry(tc->pool, bio); + cell_defer_no_holder(tc, cell); + remap_and_issue(tc, bio, lookup_result.block); + } break; case -ENODATA: if (bio_data_dir(bio) == READ && tc->origin_dev) { - dm_cell_release_singleton(cell, bio); + inc_all_io_entry(tc->pool, bio); + cell_defer_no_holder(tc, cell); + remap_to_origin_and_issue(tc, bio); } else provision_block(tc, bio, block, cell); break; default: - DMERR("dm_thin_find_block() failed, error = %d", r); - dm_cell_release_singleton(cell, bio); + DMERR_LIMIT("%s: dm_thin_find_block() failed: error = %d", + __func__, r); + cell_defer_no_holder(tc, cell); bio_io_error(bio); break; } @@ -1156,8 +1170,10 @@ static void process_bio_read_only(struct thin_c *tc, struct bio *bio) case 0: if (lookup_result.shared && (rw == WRITE) && bio->bi_size) bio_io_error(bio); - else + else { + inc_all_io_entry(tc->pool, bio); remap_and_issue(tc, bio, lookup_result.block); + } break; case -ENODATA: @@ -1167,6 +1183,7 @@ static void process_bio_read_only(struct thin_c *tc, struct bio *bio) } if (tc->origin_dev) { + inc_all_io_entry(tc->pool, bio); remap_to_origin_and_issue(tc, bio); break; } @@ -1176,7 +1193,8 @@ static void process_bio_read_only(struct thin_c *tc, struct bio *bio) break; default: - DMERR("dm_thin_find_block() failed, error = %d", r); + DMERR_LIMIT("%s: dm_thin_find_block() failed: error = %d", + __func__, r); bio_io_error(bio); break; } @@ -1207,7 +1225,7 @@ static void process_deferred_bios(struct pool *pool) spin_unlock_irqrestore(&pool->lock, flags); while ((bio = bio_list_pop(&bios))) { - struct dm_thin_endio_hook *h = dm_get_mapinfo(bio)->ptr; + struct dm_thin_endio_hook *h = dm_per_bio_data(bio, sizeof(struct dm_thin_endio_hook)); struct thin_c *tc = h->tc; /* @@ -1340,32 +1358,30 @@ static void thin_defer_bio(struct thin_c *tc, struct bio *bio) wake_worker(pool); } -static struct dm_thin_endio_hook *thin_hook_bio(struct thin_c *tc, struct bio *bio) +static void thin_hook_bio(struct thin_c *tc, struct bio *bio) { - struct pool *pool = tc->pool; - struct dm_thin_endio_hook *h = mempool_alloc(pool->endio_hook_pool, GFP_NOIO); + struct dm_thin_endio_hook *h = dm_per_bio_data(bio, sizeof(struct dm_thin_endio_hook)); h->tc = tc; h->shared_read_entry = NULL; - h->all_io_entry = bio->bi_rw & REQ_DISCARD ? NULL : dm_deferred_entry_inc(pool->all_io_ds); + h->all_io_entry = NULL; h->overwrite_mapping = NULL; - - return h; } /* * Non-blocking function called from the thin target's map function. */ -static int thin_bio_map(struct dm_target *ti, struct bio *bio, - union map_info *map_context) +static int thin_bio_map(struct dm_target *ti, struct bio *bio) { int r; struct thin_c *tc = ti->private; dm_block_t block = get_bio_block(tc, bio); struct dm_thin_device *td = tc->td; struct dm_thin_lookup_result result; + struct dm_bio_prison_cell *cell1, *cell2; + struct dm_cell_key key; - map_context->ptr = thin_hook_bio(tc, bio); + thin_hook_bio(tc, bio); if (get_pool_mode(tc->pool) == PM_FAIL) { bio_io_error(bio); @@ -1400,12 +1416,25 @@ static int thin_bio_map(struct dm_target *ti, struct bio *bio, * shared flag will be set in their case. */ thin_defer_bio(tc, bio); - r = DM_MAPIO_SUBMITTED; - } else { - remap(tc, bio, result.block); - r = DM_MAPIO_REMAPPED; + return DM_MAPIO_SUBMITTED; } - break; + + build_virtual_key(tc->td, block, &key); + if (dm_bio_detain(tc->pool->prison, &key, bio, &cell1)) + return DM_MAPIO_SUBMITTED; + + build_data_key(tc->td, result.block, &key); + if (dm_bio_detain(tc->pool->prison, &key, bio, &cell2)) { + cell_defer_no_holder(tc, cell1); + return DM_MAPIO_SUBMITTED; + } + + inc_all_io_entry(tc->pool, bio); + cell_defer_no_holder(tc, cell2); + cell_defer_no_holder(tc, cell1); + + remap(tc, bio, result.block); + return DM_MAPIO_REMAPPED; case -ENODATA: if (get_pool_mode(tc->pool) == PM_READ_ONLY) { @@ -1414,8 +1443,7 @@ static int thin_bio_map(struct dm_target *ti, struct bio *bio, * of doing so. Just error it. */ bio_io_error(bio); - r = DM_MAPIO_SUBMITTED; - break; + return DM_MAPIO_SUBMITTED; } /* fall through */ @@ -1425,8 +1453,7 @@ static int thin_bio_map(struct dm_target *ti, struct bio *bio, * provide the hint to load the metadata into cache. */ thin_defer_bio(tc, bio); - r = DM_MAPIO_SUBMITTED; - break; + return DM_MAPIO_SUBMITTED; default: /* @@ -1435,11 +1462,8 @@ static int thin_bio_map(struct dm_target *ti, struct bio *bio, * pool is switched to fail-io mode. */ bio_io_error(bio); - r = DM_MAPIO_SUBMITTED; - break; + return DM_MAPIO_SUBMITTED; } - - return r; } static int pool_is_congested(struct dm_target_callbacks *cb, int bdi_bits) @@ -1566,14 +1590,12 @@ static void __pool_destroy(struct pool *pool) if (pool->next_mapping) mempool_free(pool->next_mapping, pool->mapping_pool); mempool_destroy(pool->mapping_pool); - mempool_destroy(pool->endio_hook_pool); dm_deferred_set_destroy(pool->shared_read_ds); dm_deferred_set_destroy(pool->all_io_ds); kfree(pool); } static struct kmem_cache *_new_mapping_cache; -static struct kmem_cache *_endio_hook_cache; static struct pool *pool_create(struct mapped_device *pool_md, struct block_device *metadata_dev, @@ -1667,13 +1689,6 @@ static struct pool *pool_create(struct mapped_device *pool_md, goto bad_mapping_pool; } - pool->endio_hook_pool = mempool_create_slab_pool(ENDIO_HOOK_POOL_SIZE, - _endio_hook_cache); - if (!pool->endio_hook_pool) { - *error = "Error creating pool's endio_hook mempool"; - err_p = ERR_PTR(-ENOMEM); - goto bad_endio_hook_pool; - } pool->ref_count = 1; pool->last_commit_jiffies = jiffies; pool->pool_md = pool_md; @@ -1682,8 +1697,6 @@ static struct pool *pool_create(struct mapped_device *pool_md, return pool; -bad_endio_hook_pool: - mempool_destroy(pool->mapping_pool); bad_mapping_pool: dm_deferred_set_destroy(pool->all_io_ds); bad_all_io_ds: @@ -1966,8 +1979,7 @@ out_unlock: return r; } -static int pool_map(struct dm_target *ti, struct bio *bio, - union map_info *map_context) +static int pool_map(struct dm_target *ti, struct bio *bio) { int r; struct pool_c *pt = ti->private; @@ -2358,7 +2370,9 @@ static int pool_status(struct dm_target *ti, status_type_t type, else DMEMIT("rw "); - if (pool->pf.discard_enabled && pool->pf.discard_passdown) + if (!pool->pf.discard_enabled) + DMEMIT("ignore_discard"); + else if (pool->pf.discard_passdown) DMEMIT("discard_passdown"); else DMEMIT("no_discard_passdown"); @@ -2454,7 +2468,7 @@ static struct target_type pool_target = { .name = "thin-pool", .features = DM_TARGET_SINGLETON | DM_TARGET_ALWAYS_WRITEABLE | DM_TARGET_IMMUTABLE, - .version = {1, 5, 0}, + .version = {1, 6, 0}, .module = THIS_MODULE, .ctr = pool_ctr, .dtr = pool_dtr, @@ -2576,6 +2590,7 @@ static int thin_ctr(struct dm_target *ti, unsigned argc, char **argv) ti->num_flush_requests = 1; ti->flush_supported = true; + ti->per_bio_data_size = sizeof(struct dm_thin_endio_hook); /* In case the pool supports discards, pass them on. */ if (tc->pool->pf.discard_enabled) { @@ -2609,20 +2624,17 @@ out_unlock: return r; } -static int thin_map(struct dm_target *ti, struct bio *bio, - union map_info *map_context) +static int thin_map(struct dm_target *ti, struct bio *bio) { bio->bi_sector = dm_target_offset(ti, bio->bi_sector); - return thin_bio_map(ti, bio, map_context); + return thin_bio_map(ti, bio); } -static int thin_endio(struct dm_target *ti, - struct bio *bio, int err, - union map_info *map_context) +static int thin_endio(struct dm_target *ti, struct bio *bio, int err) { unsigned long flags; - struct dm_thin_endio_hook *h = map_context->ptr; + struct dm_thin_endio_hook *h = dm_per_bio_data(bio, sizeof(struct dm_thin_endio_hook)); struct list_head work; struct dm_thin_new_mapping *m, *tmp; struct pool *pool = h->tc->pool; @@ -2643,14 +2655,15 @@ static int thin_endio(struct dm_target *ti, if (h->all_io_entry) { INIT_LIST_HEAD(&work); dm_deferred_entry_dec(h->all_io_entry, &work); - spin_lock_irqsave(&pool->lock, flags); - list_for_each_entry_safe(m, tmp, &work, list) - list_add(&m->list, &pool->prepared_discards); - spin_unlock_irqrestore(&pool->lock, flags); + if (!list_empty(&work)) { + spin_lock_irqsave(&pool->lock, flags); + list_for_each_entry_safe(m, tmp, &work, list) + list_add(&m->list, &pool->prepared_discards); + spin_unlock_irqrestore(&pool->lock, flags); + wake_worker(pool); + } } - mempool_free(h, pool->endio_hook_pool); - return 0; } @@ -2745,7 +2758,7 @@ static void thin_io_hints(struct dm_target *ti, struct queue_limits *limits) static struct target_type thin_target = { .name = "thin", - .version = {1, 5, 0}, + .version = {1, 6, 0}, .module = THIS_MODULE, .ctr = thin_ctr, .dtr = thin_dtr, @@ -2779,14 +2792,8 @@ static int __init dm_thin_init(void) if (!_new_mapping_cache) goto bad_new_mapping_cache; - _endio_hook_cache = KMEM_CACHE(dm_thin_endio_hook, 0); - if (!_endio_hook_cache) - goto bad_endio_hook_cache; - return 0; -bad_endio_hook_cache: - kmem_cache_destroy(_new_mapping_cache); bad_new_mapping_cache: dm_unregister_target(&pool_target); bad_pool_target: @@ -2801,7 +2808,6 @@ static void dm_thin_exit(void) dm_unregister_target(&pool_target); kmem_cache_destroy(_new_mapping_cache); - kmem_cache_destroy(_endio_hook_cache); } module_init(dm_thin_init); diff --git a/drivers/md/dm-verity.c b/drivers/md/dm-verity.c index 9e7328b..52cde98 100644 --- a/drivers/md/dm-verity.c +++ b/drivers/md/dm-verity.c @@ -55,7 +55,6 @@ struct dm_verity { unsigned shash_descsize;/* the size of temporary space for crypto */ int hash_failed; /* set to 1 if hash of any block failed */ - mempool_t *io_mempool; /* mempool of struct dm_verity_io */ mempool_t *vec_mempool; /* mempool of bio vector */ struct workqueue_struct *verify_wq; @@ -66,7 +65,6 @@ struct dm_verity { struct dm_verity_io { struct dm_verity *v; - struct bio *bio; /* original values of bio->bi_end_io and bio->bi_private */ bio_end_io_t *orig_bi_end_io; @@ -389,8 +387,8 @@ test_block_hash: */ static void verity_finish_io(struct dm_verity_io *io, int error) { - struct bio *bio = io->bio; struct dm_verity *v = io->v; + struct bio *bio = dm_bio_from_per_bio_data(io, v->ti->per_bio_data_size); bio->bi_end_io = io->orig_bi_end_io; bio->bi_private = io->orig_bi_private; @@ -398,8 +396,6 @@ static void verity_finish_io(struct dm_verity_io *io, int error) if (io->io_vec != io->io_vec_inline) mempool_free(io->io_vec, v->vec_mempool); - mempool_free(io, v->io_mempool); - bio_endio(bio, error); } @@ -462,8 +458,7 @@ no_prefetch_cluster: * Bio map function. It allocates dm_verity_io structure and bio vector and * fills them. Then it issues prefetches and the I/O. */ -static int verity_map(struct dm_target *ti, struct bio *bio, - union map_info *map_context) +static int verity_map(struct dm_target *ti, struct bio *bio) { struct dm_verity *v = ti->private; struct dm_verity_io *io; @@ -486,9 +481,8 @@ static int verity_map(struct dm_target *ti, struct bio *bio, if (bio_data_dir(bio) == WRITE) return -EIO; - io = mempool_alloc(v->io_mempool, GFP_NOIO); + io = dm_per_bio_data(bio, ti->per_bio_data_size); io->v = v; - io->bio = bio; io->orig_bi_end_io = bio->bi_end_io; io->orig_bi_private = bio->bi_private; io->block = bio->bi_sector >> (v->data_dev_block_bits - SECTOR_SHIFT); @@ -610,9 +604,6 @@ static void verity_dtr(struct dm_target *ti) if (v->vec_mempool) mempool_destroy(v->vec_mempool); - if (v->io_mempool) - mempool_destroy(v->io_mempool); - if (v->bufio) dm_bufio_client_destroy(v->bufio); @@ -841,13 +832,7 @@ static int verity_ctr(struct dm_target *ti, unsigned argc, char **argv) goto bad; } - v->io_mempool = mempool_create_kmalloc_pool(DM_VERITY_MEMPOOL_SIZE, - sizeof(struct dm_verity_io) + v->shash_descsize + v->digest_size * 2); - if (!v->io_mempool) { - ti->error = "Cannot allocate io mempool"; - r = -ENOMEM; - goto bad; - } + ti->per_bio_data_size = roundup(sizeof(struct dm_verity_io) + v->shash_descsize + v->digest_size * 2, __alignof__(struct dm_verity_io)); v->vec_mempool = mempool_create_kmalloc_pool(DM_VERITY_MEMPOOL_SIZE, BIO_MAX_PAGES * sizeof(struct bio_vec)); @@ -875,7 +860,7 @@ bad: static struct target_type verity_target = { .name = "verity", - .version = {1, 0, 0}, + .version = {1, 1, 0}, .module = THIS_MODULE, .ctr = verity_ctr, .dtr = verity_dtr, diff --git a/drivers/md/dm-zero.c b/drivers/md/dm-zero.c index cc2b3cb..69a5c3b 100644 --- a/drivers/md/dm-zero.c +++ b/drivers/md/dm-zero.c @@ -33,8 +33,7 @@ static int zero_ctr(struct dm_target *ti, unsigned int argc, char **argv) /* * Return zeros only on reads */ -static int zero_map(struct dm_target *ti, struct bio *bio, - union map_info *map_context) +static int zero_map(struct dm_target *ti, struct bio *bio) { switch(bio_rw(bio)) { case READ: @@ -56,7 +55,7 @@ static int zero_map(struct dm_target *ti, struct bio *bio, static struct target_type zero_target = { .name = "zero", - .version = {1, 0, 0}, + .version = {1, 1, 0}, .module = THIS_MODULE, .ctr = zero_ctr, .map = zero_map, diff --git a/drivers/md/dm.c b/drivers/md/dm.c index 77e6eff..c72e4d5 100644 --- a/drivers/md/dm.c +++ b/drivers/md/dm.c @@ -63,18 +63,6 @@ struct dm_io { }; /* - * For bio-based dm. - * One of these is allocated per target within a bio. Hopefully - * this will be simplified out one day. - */ -struct dm_target_io { - struct dm_io *io; - struct dm_target *ti; - union map_info info; - struct bio clone; -}; - -/* * For request-based dm. * One of these is allocated per request. */ @@ -657,7 +645,7 @@ static void clone_endio(struct bio *bio, int error) error = -EIO; if (endio) { - r = endio(tio->ti, bio, error, &tio->info); + r = endio(tio->ti, bio, error); if (r < 0 || r == DM_ENDIO_REQUEUE) /* * error and requeue request are handled @@ -1016,7 +1004,7 @@ static void __map_bio(struct dm_target *ti, struct dm_target_io *tio) */ atomic_inc(&tio->io->io_count); sector = clone->bi_sector; - r = ti->type->map(ti, clone, &tio->info); + r = ti->type->map(ti, clone); if (r == DM_MAPIO_REMAPPED) { /* the bio has been remapped so dispatch it */ @@ -1111,6 +1099,7 @@ static struct dm_target_io *alloc_tio(struct clone_info *ci, tio->io = ci->io; tio->ti = ti; memset(&tio->info, 0, sizeof(tio->info)); + tio->target_request_nr = 0; return tio; } @@ -1121,7 +1110,7 @@ static void __issue_target_request(struct clone_info *ci, struct dm_target *ti, struct dm_target_io *tio = alloc_tio(ci, ti, ci->bio->bi_max_vecs); struct bio *clone = &tio->clone; - tio->info.target_request_nr = request_nr; + tio->target_request_nr = request_nr; /* * Discard requests require the bio's inline iovecs be initialized. @@ -1174,7 +1163,28 @@ static void __clone_and_map_simple(struct clone_info *ci, struct dm_target *ti) ci->sector_count = 0; } -static int __clone_and_map_discard(struct clone_info *ci) +typedef unsigned (*get_num_requests_fn)(struct dm_target *ti); + +static unsigned get_num_discard_requests(struct dm_target *ti) +{ + return ti->num_discard_requests; +} + +static unsigned get_num_write_same_requests(struct dm_target *ti) +{ + return ti->num_write_same_requests; +} + +typedef bool (*is_split_required_fn)(struct dm_target *ti); + +static bool is_split_required_for_discard(struct dm_target *ti) +{ + return ti->split_discard_requests; +} + +static int __clone_and_map_changing_extent_only(struct clone_info *ci, + get_num_requests_fn get_num_requests, + is_split_required_fn is_split_required) { struct dm_target *ti; sector_t len; @@ -1185,15 +1195,15 @@ static int __clone_and_map_discard(struct clone_info *ci) return -EIO; /* - * Even though the device advertised discard support, - * that does not mean every target supports it, and + * Even though the device advertised support for this type of + * request, that does not mean every target supports it, and * reconfiguration might also have changed that since the * check was performed. */ - if (!ti->num_discard_requests) + if (!get_num_requests || !get_num_requests(ti)) return -EOPNOTSUPP; - if (!ti->split_discard_requests) + if (is_split_required && !is_split_required(ti)) len = min(ci->sector_count, max_io_len_target_boundary(ci->sector, ti)); else len = min(ci->sector_count, max_io_len(ci->sector, ti)); @@ -1206,6 +1216,17 @@ static int __clone_and_map_discard(struct clone_info *ci) return 0; } +static int __clone_and_map_discard(struct clone_info *ci) +{ + return __clone_and_map_changing_extent_only(ci, get_num_discard_requests, + is_split_required_for_discard); +} + +static int __clone_and_map_write_same(struct clone_info *ci) +{ + return __clone_and_map_changing_extent_only(ci, get_num_write_same_requests, NULL); +} + static int __clone_and_map(struct clone_info *ci) { struct bio *bio = ci->bio; @@ -1215,6 +1236,8 @@ static int __clone_and_map(struct clone_info *ci) if (unlikely(bio->bi_rw & REQ_DISCARD)) return __clone_and_map_discard(ci); + else if (unlikely(bio->bi_rw & REQ_WRITE_SAME)) + return __clone_and_map_write_same(ci); ti = dm_table_find_target(ci->map, ci->sector); if (!dm_target_is_valid(ti)) @@ -1946,13 +1969,20 @@ static void free_dev(struct mapped_device *md) static void __bind_mempools(struct mapped_device *md, struct dm_table *t) { - struct dm_md_mempools *p; + struct dm_md_mempools *p = dm_table_get_md_mempools(t); - if (md->io_pool && (md->tio_pool || dm_table_get_type(t) == DM_TYPE_BIO_BASED) && md->bs) - /* the md already has necessary mempools */ + if (md->io_pool && (md->tio_pool || dm_table_get_type(t) == DM_TYPE_BIO_BASED) && md->bs) { + /* + * The md already has necessary mempools. Reload just the + * bioset because front_pad may have changed because + * a different table was loaded. + */ + bioset_free(md->bs); + md->bs = p->bs; + p->bs = NULL; goto out; + } - p = dm_table_get_md_mempools(t); BUG_ON(!p || md->io_pool || md->tio_pool || md->bs); md->io_pool = p->io_pool; @@ -2711,7 +2741,7 @@ int dm_noflush_suspending(struct dm_target *ti) } EXPORT_SYMBOL_GPL(dm_noflush_suspending); -struct dm_md_mempools *dm_alloc_md_mempools(unsigned type, unsigned integrity) +struct dm_md_mempools *dm_alloc_md_mempools(unsigned type, unsigned integrity, unsigned per_bio_data_size) { struct dm_md_mempools *pools = kmalloc(sizeof(*pools), GFP_KERNEL); unsigned int pool_size = (type == DM_TYPE_BIO_BASED) ? 16 : MIN_IOS; @@ -2719,6 +2749,8 @@ struct dm_md_mempools *dm_alloc_md_mempools(unsigned type, unsigned integrity) if (!pools) return NULL; + per_bio_data_size = roundup(per_bio_data_size, __alignof__(struct dm_target_io)); + pools->io_pool = (type == DM_TYPE_BIO_BASED) ? mempool_create_slab_pool(MIN_IOS, _io_cache) : mempool_create_slab_pool(MIN_IOS, _rq_bio_info_cache); @@ -2734,7 +2766,7 @@ struct dm_md_mempools *dm_alloc_md_mempools(unsigned type, unsigned integrity) pools->bs = (type == DM_TYPE_BIO_BASED) ? bioset_create(pool_size, - offsetof(struct dm_target_io, clone)) : + per_bio_data_size + offsetof(struct dm_target_io, clone)) : bioset_create(pool_size, offsetof(struct dm_rq_clone_bio_info, clone)); if (!pools->bs) diff --git a/drivers/md/dm.h b/drivers/md/dm.h index 6a99fef..45b97da 100644 --- a/drivers/md/dm.h +++ b/drivers/md/dm.h @@ -159,7 +159,7 @@ void dm_kcopyd_exit(void); /* * Mempool operations */ -struct dm_md_mempools *dm_alloc_md_mempools(unsigned type, unsigned integrity); +struct dm_md_mempools *dm_alloc_md_mempools(unsigned type, unsigned integrity, unsigned per_bio_data_size); void dm_free_md_mempools(struct dm_md_mempools *pools); #endif diff --git a/drivers/md/persistent-data/dm-block-manager.c b/drivers/md/persistent-data/dm-block-manager.c index a3ae091..28c3ed0 100644 --- a/drivers/md/persistent-data/dm-block-manager.c +++ b/drivers/md/persistent-data/dm-block-manager.c @@ -428,15 +428,17 @@ static int dm_bm_validate_buffer(struct dm_block_manager *bm, if (!v) return 0; r = v->check(v, (struct dm_block *) buf, dm_bufio_get_block_size(bm->bufio)); - if (unlikely(r)) + if (unlikely(r)) { + DMERR_LIMIT("%s validator check failed for block %llu", v->name, + (unsigned long long) dm_bufio_get_block_number(buf)); return r; + } aux->validator = v; } else { if (unlikely(aux->validator != v)) { - DMERR("validator mismatch (old=%s vs new=%s) for block %llu", - aux->validator->name, v ? v->name : "NULL", - (unsigned long long) - dm_bufio_get_block_number(buf)); + DMERR_LIMIT("validator mismatch (old=%s vs new=%s) for block %llu", + aux->validator->name, v ? v->name : "NULL", + (unsigned long long) dm_bufio_get_block_number(buf)); return -EINVAL; } } diff --git a/drivers/md/persistent-data/dm-btree-internal.h b/drivers/md/persistent-data/dm-btree-internal.h index 5709bfe..accbb05 100644 --- a/drivers/md/persistent-data/dm-btree-internal.h +++ b/drivers/md/persistent-data/dm-btree-internal.h @@ -36,13 +36,13 @@ struct node_header { __le32 padding; } __packed; -struct node { +struct btree_node { struct node_header header; __le64 keys[0]; } __packed; -void inc_children(struct dm_transaction_manager *tm, struct node *n, +void inc_children(struct dm_transaction_manager *tm, struct btree_node *n, struct dm_btree_value_type *vt); int new_block(struct dm_btree_info *info, struct dm_block **result); @@ -64,7 +64,7 @@ struct ro_spine { void init_ro_spine(struct ro_spine *s, struct dm_btree_info *info); int exit_ro_spine(struct ro_spine *s); int ro_step(struct ro_spine *s, dm_block_t new_child); -struct node *ro_node(struct ro_spine *s); +struct btree_node *ro_node(struct ro_spine *s); struct shadow_spine { struct dm_btree_info *info; @@ -98,17 +98,17 @@ int shadow_root(struct shadow_spine *s); /* * Some inlines. */ -static inline __le64 *key_ptr(struct node *n, uint32_t index) +static inline __le64 *key_ptr(struct btree_node *n, uint32_t index) { return n->keys + index; } -static inline void *value_base(struct node *n) +static inline void *value_base(struct btree_node *n) { return &n->keys[le32_to_cpu(n->header.max_entries)]; } -static inline void *value_ptr(struct node *n, uint32_t index) +static inline void *value_ptr(struct btree_node *n, uint32_t index) { uint32_t value_size = le32_to_cpu(n->header.value_size); return value_base(n) + (value_size * index); @@ -117,7 +117,7 @@ static inline void *value_ptr(struct node *n, uint32_t index) /* * Assumes the values are suitably-aligned and converts to core format. */ -static inline uint64_t value64(struct node *n, uint32_t index) +static inline uint64_t value64(struct btree_node *n, uint32_t index) { __le64 *values_le = value_base(n); @@ -127,7 +127,7 @@ static inline uint64_t value64(struct node *n, uint32_t index) /* * Searching for a key within a single node. */ -int lower_bound(struct node *n, uint64_t key); +int lower_bound(struct btree_node *n, uint64_t key); extern struct dm_block_validator btree_node_validator; diff --git a/drivers/md/persistent-data/dm-btree-remove.c b/drivers/md/persistent-data/dm-btree-remove.c index aa71e23..c4f2813 100644 --- a/drivers/md/persistent-data/dm-btree-remove.c +++ b/drivers/md/persistent-data/dm-btree-remove.c @@ -53,7 +53,7 @@ /* * Some little utilities for moving node data around. */ -static void node_shift(struct node *n, int shift) +static void node_shift(struct btree_node *n, int shift) { uint32_t nr_entries = le32_to_cpu(n->header.nr_entries); uint32_t value_size = le32_to_cpu(n->header.value_size); @@ -79,7 +79,7 @@ static void node_shift(struct node *n, int shift) } } -static void node_copy(struct node *left, struct node *right, int shift) +static void node_copy(struct btree_node *left, struct btree_node *right, int shift) { uint32_t nr_left = le32_to_cpu(left->header.nr_entries); uint32_t value_size = le32_to_cpu(left->header.value_size); @@ -108,7 +108,7 @@ static void node_copy(struct node *left, struct node *right, int shift) /* * Delete a specific entry from a leaf node. */ -static void delete_at(struct node *n, unsigned index) +static void delete_at(struct btree_node *n, unsigned index) { unsigned nr_entries = le32_to_cpu(n->header.nr_entries); unsigned nr_to_copy = nr_entries - (index + 1); @@ -128,7 +128,7 @@ static void delete_at(struct node *n, unsigned index) n->header.nr_entries = cpu_to_le32(nr_entries - 1); } -static unsigned merge_threshold(struct node *n) +static unsigned merge_threshold(struct btree_node *n) { return le32_to_cpu(n->header.max_entries) / 3; } @@ -136,7 +136,7 @@ static unsigned merge_threshold(struct node *n) struct child { unsigned index; struct dm_block *block; - struct node *n; + struct btree_node *n; }; static struct dm_btree_value_type le64_type = { @@ -147,7 +147,7 @@ static struct dm_btree_value_type le64_type = { .equal = NULL }; -static int init_child(struct dm_btree_info *info, struct node *parent, +static int init_child(struct dm_btree_info *info, struct btree_node *parent, unsigned index, struct child *result) { int r, inc; @@ -177,7 +177,7 @@ static int exit_child(struct dm_btree_info *info, struct child *c) return dm_tm_unlock(info->tm, c->block); } -static void shift(struct node *left, struct node *right, int count) +static void shift(struct btree_node *left, struct btree_node *right, int count) { uint32_t nr_left = le32_to_cpu(left->header.nr_entries); uint32_t nr_right = le32_to_cpu(right->header.nr_entries); @@ -203,11 +203,11 @@ static void shift(struct node *left, struct node *right, int count) right->header.nr_entries = cpu_to_le32(nr_right + count); } -static void __rebalance2(struct dm_btree_info *info, struct node *parent, +static void __rebalance2(struct dm_btree_info *info, struct btree_node *parent, struct child *l, struct child *r) { - struct node *left = l->n; - struct node *right = r->n; + struct btree_node *left = l->n; + struct btree_node *right = r->n; uint32_t nr_left = le32_to_cpu(left->header.nr_entries); uint32_t nr_right = le32_to_cpu(right->header.nr_entries); unsigned threshold = 2 * merge_threshold(left) + 1; @@ -239,7 +239,7 @@ static int rebalance2(struct shadow_spine *s, struct dm_btree_info *info, unsigned left_index) { int r; - struct node *parent; + struct btree_node *parent; struct child left, right; parent = dm_block_data(shadow_current(s)); @@ -270,9 +270,9 @@ static int rebalance2(struct shadow_spine *s, struct dm_btree_info *info, * in right, then rebalance2. This wastes some cpu, but I want something * simple atm. */ -static void delete_center_node(struct dm_btree_info *info, struct node *parent, +static void delete_center_node(struct dm_btree_info *info, struct btree_node *parent, struct child *l, struct child *c, struct child *r, - struct node *left, struct node *center, struct node *right, + struct btree_node *left, struct btree_node *center, struct btree_node *right, uint32_t nr_left, uint32_t nr_center, uint32_t nr_right) { uint32_t max_entries = le32_to_cpu(left->header.max_entries); @@ -301,9 +301,9 @@ static void delete_center_node(struct dm_btree_info *info, struct node *parent, /* * Redistributes entries among 3 sibling nodes. */ -static void redistribute3(struct dm_btree_info *info, struct node *parent, +static void redistribute3(struct dm_btree_info *info, struct btree_node *parent, struct child *l, struct child *c, struct child *r, - struct node *left, struct node *center, struct node *right, + struct btree_node *left, struct btree_node *center, struct btree_node *right, uint32_t nr_left, uint32_t nr_center, uint32_t nr_right) { int s; @@ -343,12 +343,12 @@ static void redistribute3(struct dm_btree_info *info, struct node *parent, *key_ptr(parent, r->index) = right->keys[0]; } -static void __rebalance3(struct dm_btree_info *info, struct node *parent, +static void __rebalance3(struct dm_btree_info *info, struct btree_node *parent, struct child *l, struct child *c, struct child *r) { - struct node *left = l->n; - struct node *center = c->n; - struct node *right = r->n; + struct btree_node *left = l->n; + struct btree_node *center = c->n; + struct btree_node *right = r->n; uint32_t nr_left = le32_to_cpu(left->header.nr_entries); uint32_t nr_center = le32_to_cpu(center->header.nr_entries); @@ -371,7 +371,7 @@ static int rebalance3(struct shadow_spine *s, struct dm_btree_info *info, unsigned left_index) { int r; - struct node *parent = dm_block_data(shadow_current(s)); + struct btree_node *parent = dm_block_data(shadow_current(s)); struct child left, center, right; /* @@ -421,7 +421,7 @@ static int get_nr_entries(struct dm_transaction_manager *tm, { int r; struct dm_block *block; - struct node *n; + struct btree_node *n; r = dm_tm_read_lock(tm, b, &btree_node_validator, &block); if (r) @@ -438,7 +438,7 @@ static int rebalance_children(struct shadow_spine *s, { int i, r, has_left_sibling, has_right_sibling; uint32_t child_entries; - struct node *n; + struct btree_node *n; n = dm_block_data(shadow_current(s)); @@ -483,7 +483,7 @@ static int rebalance_children(struct shadow_spine *s, return r; } -static int do_leaf(struct node *n, uint64_t key, unsigned *index) +static int do_leaf(struct btree_node *n, uint64_t key, unsigned *index) { int i = lower_bound(n, key); @@ -506,7 +506,7 @@ static int remove_raw(struct shadow_spine *s, struct dm_btree_info *info, uint64_t key, unsigned *index) { int i = *index, r; - struct node *n; + struct btree_node *n; for (;;) { r = shadow_step(s, root, vt); @@ -556,7 +556,7 @@ int dm_btree_remove(struct dm_btree_info *info, dm_block_t root, unsigned level, last_level = info->levels - 1; int index = 0, r = 0; struct shadow_spine spine; - struct node *n; + struct btree_node *n; init_shadow_spine(&spine, info); for (level = 0; level < info->levels; level++) { diff --git a/drivers/md/persistent-data/dm-btree-spine.c b/drivers/md/persistent-data/dm-btree-spine.c index d9a7912..f199a0c 100644 --- a/drivers/md/persistent-data/dm-btree-spine.c +++ b/drivers/md/persistent-data/dm-btree-spine.c @@ -23,7 +23,7 @@ static void node_prepare_for_write(struct dm_block_validator *v, struct dm_block *b, size_t block_size) { - struct node *n = dm_block_data(b); + struct btree_node *n = dm_block_data(b); struct node_header *h = &n->header; h->blocknr = cpu_to_le64(dm_block_location(b)); @@ -38,15 +38,15 @@ static int node_check(struct dm_block_validator *v, struct dm_block *b, size_t block_size) { - struct node *n = dm_block_data(b); + struct btree_node *n = dm_block_data(b); struct node_header *h = &n->header; size_t value_size; __le32 csum_disk; uint32_t flags; if (dm_block_location(b) != le64_to_cpu(h->blocknr)) { - DMERR("node_check failed blocknr %llu wanted %llu", - le64_to_cpu(h->blocknr), dm_block_location(b)); + DMERR_LIMIT("node_check failed: blocknr %llu != wanted %llu", + le64_to_cpu(h->blocknr), dm_block_location(b)); return -ENOTBLK; } @@ -54,8 +54,8 @@ static int node_check(struct dm_block_validator *v, block_size - sizeof(__le32), BTREE_CSUM_XOR)); if (csum_disk != h->csum) { - DMERR("node_check failed csum %u wanted %u", - le32_to_cpu(csum_disk), le32_to_cpu(h->csum)); + DMERR_LIMIT("node_check failed: csum %u != wanted %u", + le32_to_cpu(csum_disk), le32_to_cpu(h->csum)); return -EILSEQ; } @@ -63,12 +63,12 @@ static int node_check(struct dm_block_validator *v, if (sizeof(struct node_header) + (sizeof(__le64) + value_size) * le32_to_cpu(h->max_entries) > block_size) { - DMERR("node_check failed: max_entries too large"); + DMERR_LIMIT("node_check failed: max_entries too large"); return -EILSEQ; } if (le32_to_cpu(h->nr_entries) > le32_to_cpu(h->max_entries)) { - DMERR("node_check failed, too many entries"); + DMERR_LIMIT("node_check failed: too many entries"); return -EILSEQ; } @@ -77,7 +77,7 @@ static int node_check(struct dm_block_validator *v, */ flags = le32_to_cpu(h->flags); if (!(flags & INTERNAL_NODE) && !(flags & LEAF_NODE)) { - DMERR("node_check failed, node is neither INTERNAL or LEAF"); + DMERR_LIMIT("node_check failed: node is neither INTERNAL or LEAF"); return -EILSEQ; } @@ -164,7 +164,7 @@ int ro_step(struct ro_spine *s, dm_block_t new_child) return r; } -struct node *ro_node(struct ro_spine *s) +struct btree_node *ro_node(struct ro_spine *s) { struct dm_block *block; diff --git a/drivers/md/persistent-data/dm-btree.c b/drivers/md/persistent-data/dm-btree.c index d12b2cc..4caf669 100644 --- a/drivers/md/persistent-data/dm-btree.c +++ b/drivers/md/persistent-data/dm-btree.c @@ -38,7 +38,7 @@ static void array_insert(void *base, size_t elt_size, unsigned nr_elts, /*----------------------------------------------------------------*/ /* makes the assumption that no two keys are the same. */ -static int bsearch(struct node *n, uint64_t key, int want_hi) +static int bsearch(struct btree_node *n, uint64_t key, int want_hi) { int lo = -1, hi = le32_to_cpu(n->header.nr_entries); @@ -58,12 +58,12 @@ static int bsearch(struct node *n, uint64_t key, int want_hi) return want_hi ? hi : lo; } -int lower_bound(struct node *n, uint64_t key) +int lower_bound(struct btree_node *n, uint64_t key) { return bsearch(n, key, 0); } -void inc_children(struct dm_transaction_manager *tm, struct node *n, +void inc_children(struct dm_transaction_manager *tm, struct btree_node *n, struct dm_btree_value_type *vt) { unsigned i; @@ -77,7 +77,7 @@ void inc_children(struct dm_transaction_manager *tm, struct node *n, vt->inc(vt->context, value_ptr(n, i)); } -static int insert_at(size_t value_size, struct node *node, unsigned index, +static int insert_at(size_t value_size, struct btree_node *node, unsigned index, uint64_t key, void *value) __dm_written_to_disk(value) { @@ -122,7 +122,7 @@ int dm_btree_empty(struct dm_btree_info *info, dm_block_t *root) { int r; struct dm_block *b; - struct node *n; + struct btree_node *n; size_t block_size; uint32_t max_entries; @@ -154,7 +154,7 @@ EXPORT_SYMBOL_GPL(dm_btree_empty); #define MAX_SPINE_DEPTH 64 struct frame { struct dm_block *b; - struct node *n; + struct btree_node *n; unsigned level; unsigned nr_children; unsigned current_child; @@ -230,6 +230,11 @@ static void pop_frame(struct del_stack *s) dm_tm_unlock(s->tm, f->b); } +static bool is_internal_level(struct dm_btree_info *info, struct frame *f) +{ + return f->level < (info->levels - 1); +} + int dm_btree_del(struct dm_btree_info *info, dm_block_t root) { int r; @@ -241,7 +246,7 @@ int dm_btree_del(struct dm_btree_info *info, dm_block_t root) s->tm = info->tm; s->top = -1; - r = push_frame(s, root, 1); + r = push_frame(s, root, 0); if (r) goto out; @@ -267,7 +272,7 @@ int dm_btree_del(struct dm_btree_info *info, dm_block_t root) if (r) goto out; - } else if (f->level != (info->levels - 1)) { + } else if (is_internal_level(info, f)) { b = value64(f->n, f->current_child); f->current_child++; r = push_frame(s, b, f->level + 1); @@ -295,7 +300,7 @@ EXPORT_SYMBOL_GPL(dm_btree_del); /*----------------------------------------------------------------*/ static int btree_lookup_raw(struct ro_spine *s, dm_block_t block, uint64_t key, - int (*search_fn)(struct node *, uint64_t), + int (*search_fn)(struct btree_node *, uint64_t), uint64_t *result_key, void *v, size_t value_size) { int i, r; @@ -406,7 +411,7 @@ static int btree_split_sibling(struct shadow_spine *s, dm_block_t root, size_t size; unsigned nr_left, nr_right; struct dm_block *left, *right, *parent; - struct node *ln, *rn, *pn; + struct btree_node *ln, *rn, *pn; __le64 location; left = shadow_current(s); @@ -491,7 +496,7 @@ static int btree_split_beneath(struct shadow_spine *s, uint64_t key) size_t size; unsigned nr_left, nr_right; struct dm_block *left, *right, *new_parent; - struct node *pn, *ln, *rn; + struct btree_node *pn, *ln, *rn; __le64 val; new_parent = shadow_current(s); @@ -576,7 +581,7 @@ static int btree_insert_raw(struct shadow_spine *s, dm_block_t root, uint64_t key, unsigned *index) { int r, i = *index, top = 1; - struct node *node; + struct btree_node *node; for (;;) { r = shadow_step(s, root, vt); @@ -643,7 +648,7 @@ static int insert(struct dm_btree_info *info, dm_block_t root, unsigned level, index = -1, last_level = info->levels - 1; dm_block_t block = root; struct shadow_spine spine; - struct node *n; + struct btree_node *n; struct dm_btree_value_type le64_type; le64_type.context = NULL; diff --git a/drivers/md/persistent-data/dm-space-map-common.c b/drivers/md/persistent-data/dm-space-map-common.c index f3a9af8..3e7a88d 100644 --- a/drivers/md/persistent-data/dm-space-map-common.c +++ b/drivers/md/persistent-data/dm-space-map-common.c @@ -39,8 +39,8 @@ static int index_check(struct dm_block_validator *v, __le32 csum_disk; if (dm_block_location(b) != le64_to_cpu(mi_le->blocknr)) { - DMERR("index_check failed blocknr %llu wanted %llu", - le64_to_cpu(mi_le->blocknr), dm_block_location(b)); + DMERR_LIMIT("index_check failed: blocknr %llu != wanted %llu", + le64_to_cpu(mi_le->blocknr), dm_block_location(b)); return -ENOTBLK; } @@ -48,8 +48,8 @@ static int index_check(struct dm_block_validator *v, block_size - sizeof(__le32), INDEX_CSUM_XOR)); if (csum_disk != mi_le->csum) { - DMERR("index_check failed csum %u wanted %u", - le32_to_cpu(csum_disk), le32_to_cpu(mi_le->csum)); + DMERR_LIMIT("index_check failed: csum %u != wanted %u", + le32_to_cpu(csum_disk), le32_to_cpu(mi_le->csum)); return -EILSEQ; } @@ -89,8 +89,8 @@ static int bitmap_check(struct dm_block_validator *v, __le32 csum_disk; if (dm_block_location(b) != le64_to_cpu(disk_header->blocknr)) { - DMERR("bitmap check failed blocknr %llu wanted %llu", - le64_to_cpu(disk_header->blocknr), dm_block_location(b)); + DMERR_LIMIT("bitmap check failed: blocknr %llu != wanted %llu", + le64_to_cpu(disk_header->blocknr), dm_block_location(b)); return -ENOTBLK; } @@ -98,8 +98,8 @@ static int bitmap_check(struct dm_block_validator *v, block_size - sizeof(__le32), BITMAP_CSUM_XOR)); if (csum_disk != disk_header->csum) { - DMERR("bitmap check failed csum %u wanted %u", - le32_to_cpu(csum_disk), le32_to_cpu(disk_header->csum)); + DMERR_LIMIT("bitmap check failed: csum %u != wanted %u", + le32_to_cpu(csum_disk), le32_to_cpu(disk_header->csum)); return -EILSEQ; } diff --git a/drivers/md/persistent-data/dm-space-map-metadata.c b/drivers/md/persistent-data/dm-space-map-metadata.c index e89ae5e..906cf3d 100644 --- a/drivers/md/persistent-data/dm-space-map-metadata.c +++ b/drivers/md/persistent-data/dm-space-map-metadata.c @@ -337,7 +337,7 @@ static int sm_metadata_new_block(struct dm_space_map *sm, dm_block_t *b) { int r = sm_metadata_new_block_(sm, b); if (r) - DMERR("out of metadata space"); + DMERR("unable to allocate new metadata block"); return r; } diff --git a/drivers/misc/mei/wd.c b/drivers/misc/mei/wd.c index 636409f..9299a8c 100644 --- a/drivers/misc/mei/wd.c +++ b/drivers/misc/mei/wd.c @@ -370,7 +370,7 @@ void mei_watchdog_register(struct mei_device *dev) void mei_watchdog_unregister(struct mei_device *dev) { - if (test_bit(WDOG_UNREGISTERED, &amt_wd_dev.status)) + if (watchdog_get_drvdata(&amt_wd_dev) == NULL) return; watchdog_set_drvdata(&amt_wd_dev, NULL); diff --git a/drivers/net/ethernet/chelsio/cxgb4/cxgb4.h b/drivers/net/ethernet/chelsio/cxgb4/cxgb4.h index 378988b..6db997c 100644 --- a/drivers/net/ethernet/chelsio/cxgb4/cxgb4.h +++ b/drivers/net/ethernet/chelsio/cxgb4/cxgb4.h @@ -35,6 +35,8 @@ #ifndef __CXGB4_H__ #define __CXGB4_H__ +#include "t4_hw.h" + #include #include #include @@ -212,6 +214,8 @@ struct tp_err_stats { struct tp_params { unsigned int ntxchan; /* # of Tx channels */ unsigned int tre; /* log2 of core clocks per TP tick */ + unsigned short tx_modq_map; /* TX modulation scheduler queue to */ + /* channel map */ uint32_t dack_re; /* DACK timer resolution */ unsigned short tx_modq[NCHAN]; /* channel to modulation queue map */ @@ -526,6 +530,7 @@ struct adapter { struct net_device *port[MAX_NPORTS]; u8 chan_map[NCHAN]; /* channel -> port map */ + u32 filter_mode; unsigned int l2t_start; unsigned int l2t_end; struct l2t_data *l2t; @@ -545,6 +550,129 @@ struct adapter { spinlock_t stats_lock; }; +/* Defined bit width of user definable filter tuples + */ +#define ETHTYPE_BITWIDTH 16 +#define FRAG_BITWIDTH 1 +#define MACIDX_BITWIDTH 9 +#define FCOE_BITWIDTH 1 +#define IPORT_BITWIDTH 3 +#define MATCHTYPE_BITWIDTH 3 +#define PROTO_BITWIDTH 8 +#define TOS_BITWIDTH 8 +#define PF_BITWIDTH 8 +#define VF_BITWIDTH 8 +#define IVLAN_BITWIDTH 16 +#define OVLAN_BITWIDTH 16 + +/* Filter matching rules. These consist of a set of ingress packet field + * (value, mask) tuples. The associated ingress packet field matches the + * tuple when ((field & mask) == value). (Thus a wildcard "don't care" field + * rule can be constructed by specifying a tuple of (0, 0).) A filter rule + * matches an ingress packet when all of the individual individual field + * matching rules are true. + * + * Partial field masks are always valid, however, while it may be easy to + * understand their meanings for some fields (e.g. IP address to match a + * subnet), for others making sensible partial masks is less intuitive (e.g. + * MPS match type) ... + * + * Most of the following data structures are modeled on T4 capabilities. + * Drivers for earlier chips use the subsets which make sense for those chips. + * We really need to come up with a hardware-independent mechanism to + * represent hardware filter capabilities ... + */ +struct ch_filter_tuple { + /* Compressed header matching field rules. The TP_VLAN_PRI_MAP + * register selects which of these fields will participate in the + * filter match rules -- up to a maximum of 36 bits. Because + * TP_VLAN_PRI_MAP is a global register, all filters must use the same + * set of fields. + */ + uint32_t ethtype:ETHTYPE_BITWIDTH; /* Ethernet type */ + uint32_t frag:FRAG_BITWIDTH; /* IP fragmentation header */ + uint32_t ivlan_vld:1; /* inner VLAN valid */ + uint32_t ovlan_vld:1; /* outer VLAN valid */ + uint32_t pfvf_vld:1; /* PF/VF valid */ + uint32_t macidx:MACIDX_BITWIDTH; /* exact match MAC index */ + uint32_t fcoe:FCOE_BITWIDTH; /* FCoE packet */ + uint32_t iport:IPORT_BITWIDTH; /* ingress port */ + uint32_t matchtype:MATCHTYPE_BITWIDTH; /* MPS match type */ + uint32_t proto:PROTO_BITWIDTH; /* protocol type */ + uint32_t tos:TOS_BITWIDTH; /* TOS/Traffic Type */ + uint32_t pf:PF_BITWIDTH; /* PCI-E PF ID */ + uint32_t vf:VF_BITWIDTH; /* PCI-E VF ID */ + uint32_t ivlan:IVLAN_BITWIDTH; /* inner VLAN */ + uint32_t ovlan:OVLAN_BITWIDTH; /* outer VLAN */ + + /* Uncompressed header matching field rules. These are always + * available for field rules. + */ + uint8_t lip[16]; /* local IP address (IPv4 in [3:0]) */ + uint8_t fip[16]; /* foreign IP address (IPv4 in [3:0]) */ + uint16_t lport; /* local port */ + uint16_t fport; /* foreign port */ +}; + +/* A filter ioctl command. + */ +struct ch_filter_specification { + /* Administrative fields for filter. + */ + uint32_t hitcnts:1; /* count filter hits in TCB */ + uint32_t prio:1; /* filter has priority over active/server */ + + /* Fundamental filter typing. This is the one element of filter + * matching that doesn't exist as a (value, mask) tuple. + */ + uint32_t type:1; /* 0 => IPv4, 1 => IPv6 */ + + /* Packet dispatch information. Ingress packets which match the + * filter rules will be dropped, passed to the host or switched back + * out as egress packets. + */ + uint32_t action:2; /* drop, pass, switch */ + + uint32_t rpttid:1; /* report TID in RSS hash field */ + + uint32_t dirsteer:1; /* 0 => RSS, 1 => steer to iq */ + uint32_t iq:10; /* ingress queue */ + + uint32_t maskhash:1; /* dirsteer=0: store RSS hash in TCB */ + uint32_t dirsteerhash:1;/* dirsteer=1: 0 => TCB contains RSS hash */ + /* 1 => TCB contains IQ ID */ + + /* Switch proxy/rewrite fields. An ingress packet which matches a + * filter with "switch" set will be looped back out as an egress + * packet -- potentially with some Ethernet header rewriting. + */ + uint32_t eport:2; /* egress port to switch packet out */ + uint32_t newdmac:1; /* rewrite destination MAC address */ + uint32_t newsmac:1; /* rewrite source MAC address */ + uint32_t newvlan:2; /* rewrite VLAN Tag */ + uint8_t dmac[ETH_ALEN]; /* new destination MAC address */ + uint8_t smac[ETH_ALEN]; /* new source MAC address */ + uint16_t vlan; /* VLAN Tag to insert */ + + /* Filter rule value/mask pairs. + */ + struct ch_filter_tuple val; + struct ch_filter_tuple mask; +}; + +enum { + FILTER_PASS = 0, /* default */ + FILTER_DROP, + FILTER_SWITCH +}; + +enum { + VLAN_NOCHANGE = 0, /* default */ + VLAN_REMOVE, + VLAN_INSERT, + VLAN_REWRITE +}; + static inline u32 t4_read_reg(struct adapter *adap, u32 reg_addr) { return readl(adap->regs + reg_addr); @@ -701,6 +829,12 @@ static inline int t4_wr_mbox_ns(struct adapter *adap, int mbox, const void *cmd, void t4_write_indirect(struct adapter *adap, unsigned int addr_reg, unsigned int data_reg, const u32 *vals, unsigned int nregs, unsigned int start_idx); +void t4_read_indirect(struct adapter *adap, unsigned int addr_reg, + unsigned int data_reg, u32 *vals, unsigned int nregs, + unsigned int start_idx); + +struct fw_filter_wr; + void t4_intr_enable(struct adapter *adapter); void t4_intr_disable(struct adapter *adapter); int t4_slow_intr_handler(struct adapter *adapter); @@ -737,6 +871,8 @@ void t4_tp_get_tcp_stats(struct adapter *adap, struct tp_tcp_stats *v4, void t4_load_mtus(struct adapter *adap, const unsigned short *mtus, const unsigned short *alpha, const unsigned short *beta); +void t4_mk_filtdelwr(unsigned int ftid, struct fw_filter_wr *wr, int qid); + void t4_wol_magic_enable(struct adapter *adap, unsigned int port, const u8 *addr); int t4_wol_pat_enable(struct adapter *adap, unsigned int port, unsigned int map, diff --git a/drivers/net/ethernet/chelsio/cxgb4/cxgb4_main.c b/drivers/net/ethernet/chelsio/cxgb4/cxgb4_main.c index a27b4ae..f0718e1 100644 --- a/drivers/net/ethernet/chelsio/cxgb4/cxgb4_main.c +++ b/drivers/net/ethernet/chelsio/cxgb4/cxgb4_main.c @@ -175,6 +175,30 @@ enum { MIN_FL_ENTRIES = 16 }; +/* Host shadow copy of ingress filter entry. This is in host native format + * and doesn't match the ordering or bit order, etc. of the hardware of the + * firmware command. The use of bit-field structure elements is purely to + * remind ourselves of the field size limitations and save memory in the case + * where the filter table is large. + */ +struct filter_entry { + /* Administrative fields for filter. + */ + u32 valid:1; /* filter allocated and valid */ + u32 locked:1; /* filter is administratively locked */ + + u32 pending:1; /* filter action is pending firmware reply */ + u32 smtidx:8; /* Source MAC Table index for smac */ + struct l2t_entry *l2t; /* Layer Two Table entry for dmac */ + + /* The filter itself. Most of this is a straight copy of information + * provided by the extended ioctl(). Some fields are translated to + * internal forms -- for instance the Ingress Queue ID passed in from + * the ioctl() is translated into the Absolute Ingress Queue ID. + */ + struct ch_filter_specification fs; +}; + #define DFLT_MSG_ENABLE (NETIF_MSG_DRV | NETIF_MSG_PROBE | NETIF_MSG_LINK | \ NETIF_MSG_TIMER | NETIF_MSG_IFDOWN | NETIF_MSG_IFUP |\ NETIF_MSG_RX_ERR | NETIF_MSG_TX_ERR) @@ -325,6 +349,9 @@ enum { static unsigned int tp_vlan_pri_map = TP_VLAN_PRI_MAP_DEFAULT; +module_param(tp_vlan_pri_map, uint, 0644); +MODULE_PARM_DESC(tp_vlan_pri_map, "global compressed filter configuration"); + static struct dentry *cxgb4_debugfs_root; static LIST_HEAD(adapter_list); @@ -506,8 +533,67 @@ static int link_start(struct net_device *dev) return ret; } -/* - * Response queue handler for the FW event queue. +/* Clear a filter and release any of its resources that we own. This also + * clears the filter's "pending" status. + */ +static void clear_filter(struct adapter *adap, struct filter_entry *f) +{ + /* If the new or old filter have loopback rewriteing rules then we'll + * need to free any existing Layer Two Table (L2T) entries of the old + * filter rule. The firmware will handle freeing up any Source MAC + * Table (SMT) entries used for rewriting Source MAC Addresses in + * loopback rules. + */ + if (f->l2t) + cxgb4_l2t_release(f->l2t); + + /* The zeroing of the filter rule below clears the filter valid, + * pending, locked flags, l2t pointer, etc. so it's all we need for + * this operation. + */ + memset(f, 0, sizeof(*f)); +} + +/* Handle a filter write/deletion reply. + */ +static void filter_rpl(struct adapter *adap, const struct cpl_set_tcb_rpl *rpl) +{ + unsigned int idx = GET_TID(rpl); + unsigned int nidx = idx - adap->tids.ftid_base; + unsigned int ret; + struct filter_entry *f; + + if (idx >= adap->tids.ftid_base && nidx < + (adap->tids.nftids + adap->tids.nsftids)) { + idx = nidx; + ret = GET_TCB_COOKIE(rpl->cookie); + f = &adap->tids.ftid_tab[idx]; + + if (ret == FW_FILTER_WR_FLT_DELETED) { + /* Clear the filter when we get confirmation from the + * hardware that the filter has been deleted. + */ + clear_filter(adap, f); + } else if (ret == FW_FILTER_WR_SMT_TBL_FULL) { + dev_err(adap->pdev_dev, "filter %u setup failed due to full SMT\n", + idx); + clear_filter(adap, f); + } else if (ret == FW_FILTER_WR_FLT_ADDED) { + f->smtidx = (be64_to_cpu(rpl->oldval) >> 24) & 0xff; + f->pending = 0; /* asynchronous setup completed */ + f->valid = 1; + } else { + /* Something went wrong. Issue a warning about the + * problem and clear everything out. + */ + dev_err(adap->pdev_dev, "filter %u setup failed with error %u\n", + idx, ret); + clear_filter(adap, f); + } + } +} + +/* Response queue handler for the FW event queue. */ static int fwevtq_handler(struct sge_rspq *q, const __be64 *rsp, const struct pkt_gl *gl) @@ -542,6 +628,10 @@ static int fwevtq_handler(struct sge_rspq *q, const __be64 *rsp, const struct cpl_l2t_write_rpl *p = (void *)rsp; do_l2t_write_rpl(q->adap, p); + } else if (opcode == CPL_SET_TCB_RPL) { + const struct cpl_set_tcb_rpl *p = (void *)rsp; + + filter_rpl(q->adap, p); } else dev_err(q->adap->pdev_dev, "unexpected CPL %#x on FW event queue\n", opcode); @@ -983,6 +1073,148 @@ static void t4_free_mem(void *addr) kfree(addr); } +/* Send a Work Request to write the filter at a specified index. We construct + * a Firmware Filter Work Request to have the work done and put the indicated + * filter into "pending" mode which will prevent any further actions against + * it till we get a reply from the firmware on the completion status of the + * request. + */ +static int set_filter_wr(struct adapter *adapter, int fidx) +{ + struct filter_entry *f = &adapter->tids.ftid_tab[fidx]; + struct sk_buff *skb; + struct fw_filter_wr *fwr; + unsigned int ftid; + + /* If the new filter requires loopback Destination MAC and/or VLAN + * rewriting then we need to allocate a Layer 2 Table (L2T) entry for + * the filter. + */ + if (f->fs.newdmac || f->fs.newvlan) { + /* allocate L2T entry for new filter */ + f->l2t = t4_l2t_alloc_switching(adapter->l2t); + if (f->l2t == NULL) + return -EAGAIN; + if (t4_l2t_set_switching(adapter, f->l2t, f->fs.vlan, + f->fs.eport, f->fs.dmac)) { + cxgb4_l2t_release(f->l2t); + f->l2t = NULL; + return -ENOMEM; + } + } + + ftid = adapter->tids.ftid_base + fidx; + + skb = alloc_skb(sizeof(*fwr), GFP_KERNEL | __GFP_NOFAIL); + fwr = (struct fw_filter_wr *)__skb_put(skb, sizeof(*fwr)); + memset(fwr, 0, sizeof(*fwr)); + + /* It would be nice to put most of the following in t4_hw.c but most + * of the work is translating the cxgbtool ch_filter_specification + * into the Work Request and the definition of that structure is + * currently in cxgbtool.h which isn't appropriate to pull into the + * common code. We may eventually try to come up with a more neutral + * filter specification structure but for now it's easiest to simply + * put this fairly direct code in line ... + */ + fwr->op_pkd = htonl(FW_WR_OP(FW_FILTER_WR)); + fwr->len16_pkd = htonl(FW_WR_LEN16(sizeof(*fwr)/16)); + fwr->tid_to_iq = + htonl(V_FW_FILTER_WR_TID(ftid) | + V_FW_FILTER_WR_RQTYPE(f->fs.type) | + V_FW_FILTER_WR_NOREPLY(0) | + V_FW_FILTER_WR_IQ(f->fs.iq)); + fwr->del_filter_to_l2tix = + htonl(V_FW_FILTER_WR_RPTTID(f->fs.rpttid) | + V_FW_FILTER_WR_DROP(f->fs.action == FILTER_DROP) | + V_FW_FILTER_WR_DIRSTEER(f->fs.dirsteer) | + V_FW_FILTER_WR_MASKHASH(f->fs.maskhash) | + V_FW_FILTER_WR_DIRSTEERHASH(f->fs.dirsteerhash) | + V_FW_FILTER_WR_LPBK(f->fs.action == FILTER_SWITCH) | + V_FW_FILTER_WR_DMAC(f->fs.newdmac) | + V_FW_FILTER_WR_SMAC(f->fs.newsmac) | + V_FW_FILTER_WR_INSVLAN(f->fs.newvlan == VLAN_INSERT || + f->fs.newvlan == VLAN_REWRITE) | + V_FW_FILTER_WR_RMVLAN(f->fs.newvlan == VLAN_REMOVE || + f->fs.newvlan == VLAN_REWRITE) | + V_FW_FILTER_WR_HITCNTS(f->fs.hitcnts) | + V_FW_FILTER_WR_TXCHAN(f->fs.eport) | + V_FW_FILTER_WR_PRIO(f->fs.prio) | + V_FW_FILTER_WR_L2TIX(f->l2t ? f->l2t->idx : 0)); + fwr->ethtype = htons(f->fs.val.ethtype); + fwr->ethtypem = htons(f->fs.mask.ethtype); + fwr->frag_to_ovlan_vldm = + (V_FW_FILTER_WR_FRAG(f->fs.val.frag) | + V_FW_FILTER_WR_FRAGM(f->fs.mask.frag) | + V_FW_FILTER_WR_IVLAN_VLD(f->fs.val.ivlan_vld) | + V_FW_FILTER_WR_OVLAN_VLD(f->fs.val.ovlan_vld) | + V_FW_FILTER_WR_IVLAN_VLDM(f->fs.mask.ivlan_vld) | + V_FW_FILTER_WR_OVLAN_VLDM(f->fs.mask.ovlan_vld)); + fwr->smac_sel = 0; + fwr->rx_chan_rx_rpl_iq = + htons(V_FW_FILTER_WR_RX_CHAN(0) | + V_FW_FILTER_WR_RX_RPL_IQ(adapter->sge.fw_evtq.abs_id)); + fwr->maci_to_matchtypem = + htonl(V_FW_FILTER_WR_MACI(f->fs.val.macidx) | + V_FW_FILTER_WR_MACIM(f->fs.mask.macidx) | + V_FW_FILTER_WR_FCOE(f->fs.val.fcoe) | + V_FW_FILTER_WR_FCOEM(f->fs.mask.fcoe) | + V_FW_FILTER_WR_PORT(f->fs.val.iport) | + V_FW_FILTER_WR_PORTM(f->fs.mask.iport) | + V_FW_FILTER_WR_MATCHTYPE(f->fs.val.matchtype) | + V_FW_FILTER_WR_MATCHTYPEM(f->fs.mask.matchtype)); + fwr->ptcl = f->fs.val.proto; + fwr->ptclm = f->fs.mask.proto; + fwr->ttyp = f->fs.val.tos; + fwr->ttypm = f->fs.mask.tos; + fwr->ivlan = htons(f->fs.val.ivlan); + fwr->ivlanm = htons(f->fs.mask.ivlan); + fwr->ovlan = htons(f->fs.val.ovlan); + fwr->ovlanm = htons(f->fs.mask.ovlan); + memcpy(fwr->lip, f->fs.val.lip, sizeof(fwr->lip)); + memcpy(fwr->lipm, f->fs.mask.lip, sizeof(fwr->lipm)); + memcpy(fwr->fip, f->fs.val.fip, sizeof(fwr->fip)); + memcpy(fwr->fipm, f->fs.mask.fip, sizeof(fwr->fipm)); + fwr->lp = htons(f->fs.val.lport); + fwr->lpm = htons(f->fs.mask.lport); + fwr->fp = htons(f->fs.val.fport); + fwr->fpm = htons(f->fs.mask.fport); + if (f->fs.newsmac) + memcpy(fwr->sma, f->fs.smac, sizeof(fwr->sma)); + + /* Mark the filter as "pending" and ship off the Filter Work Request. + * When we get the Work Request Reply we'll clear the pending status. + */ + f->pending = 1; + set_wr_txq(skb, CPL_PRIORITY_CONTROL, f->fs.val.iport & 0x3); + t4_ofld_send(adapter, skb); + return 0; +} + +/* Delete the filter at a specified index. + */ +static int del_filter_wr(struct adapter *adapter, int fidx) +{ + struct filter_entry *f = &adapter->tids.ftid_tab[fidx]; + struct sk_buff *skb; + struct fw_filter_wr *fwr; + unsigned int len, ftid; + + len = sizeof(*fwr); + ftid = adapter->tids.ftid_base + fidx; + + skb = alloc_skb(len, GFP_KERNEL | __GFP_NOFAIL); + fwr = (struct fw_filter_wr *)__skb_put(skb, len); + t4_mk_filtdelwr(ftid, fwr, adapter->sge.fw_evtq.abs_id); + + /* Mark the filter as "pending" and ship off the Filter Work Request. + * When we get the Work Request Reply we'll clear the pending status. + */ + f->pending = 1; + t4_mgmt_tx(adapter, skb); + return 0; +} + static inline int is_offload(const struct adapter *adap) { return adap->params.offload; @@ -2195,7 +2427,7 @@ int cxgb4_alloc_atid(struct tid_info *t, void *data) if (t->afree) { union aopen_entry *p = t->afree; - atid = p - t->atid_tab; + atid = (p - t->atid_tab) + t->atid_base; t->afree = p->next; p->data = data; t->atids_in_use++; @@ -2210,7 +2442,7 @@ EXPORT_SYMBOL(cxgb4_alloc_atid); */ void cxgb4_free_atid(struct tid_info *t, unsigned int atid) { - union aopen_entry *p = &t->atid_tab[atid]; + union aopen_entry *p = &t->atid_tab[atid - t->atid_base]; spin_lock_bh(&t->atid_lock); p->next = t->afree; @@ -2249,8 +2481,34 @@ int cxgb4_alloc_stid(struct tid_info *t, int family, void *data) } EXPORT_SYMBOL(cxgb4_alloc_stid); -/* - * Release a server TID. +/* Allocate a server filter TID and set it to the supplied value. + */ +int cxgb4_alloc_sftid(struct tid_info *t, int family, void *data) +{ + int stid; + + spin_lock_bh(&t->stid_lock); + if (family == PF_INET) { + stid = find_next_zero_bit(t->stid_bmap, + t->nstids + t->nsftids, t->nstids); + if (stid < (t->nstids + t->nsftids)) + __set_bit(stid, t->stid_bmap); + else + stid = -1; + } else { + stid = -1; + } + if (stid >= 0) { + t->stid_tab[stid].data = data; + stid += t->stid_base; + t->stids_in_use++; + } + spin_unlock_bh(&t->stid_lock); + return stid; +} +EXPORT_SYMBOL(cxgb4_alloc_sftid); + +/* Release a server TID. */ void cxgb4_free_stid(struct tid_info *t, unsigned int stid, int family) { @@ -2362,18 +2620,26 @@ EXPORT_SYMBOL(cxgb4_remove_tid); static int tid_init(struct tid_info *t) { size_t size; + unsigned int stid_bmap_size; unsigned int natids = t->natids; - size = t->ntids * sizeof(*t->tid_tab) + natids * sizeof(*t->atid_tab) + + stid_bmap_size = BITS_TO_LONGS(t->nstids + t->nsftids); + size = t->ntids * sizeof(*t->tid_tab) + + natids * sizeof(*t->atid_tab) + t->nstids * sizeof(*t->stid_tab) + - BITS_TO_LONGS(t->nstids) * sizeof(long); + t->nsftids * sizeof(*t->stid_tab) + + stid_bmap_size * sizeof(long) + + t->nftids * sizeof(*t->ftid_tab) + + t->nsftids * sizeof(*t->ftid_tab); + t->tid_tab = t4_alloc_mem(size); if (!t->tid_tab) return -ENOMEM; t->atid_tab = (union aopen_entry *)&t->tid_tab[t->ntids]; t->stid_tab = (struct serv_entry *)&t->atid_tab[natids]; - t->stid_bmap = (unsigned long *)&t->stid_tab[t->nstids]; + t->stid_bmap = (unsigned long *)&t->stid_tab[t->nstids + t->nsftids]; + t->ftid_tab = (struct filter_entry *)&t->stid_bmap[stid_bmap_size]; spin_lock_init(&t->stid_lock); spin_lock_init(&t->atid_lock); @@ -2388,7 +2654,7 @@ static int tid_init(struct tid_info *t) t->atid_tab[natids - 1].next = &t->atid_tab[natids]; t->afree = t->atid_tab; } - bitmap_zero(t->stid_bmap, t->nstids); + bitmap_zero(t->stid_bmap, t->nstids + t->nsftids); return 0; } @@ -2404,7 +2670,8 @@ static int tid_init(struct tid_info *t) * Returns <0 on error and one of the %NET_XMIT_* values on success. */ int cxgb4_create_server(const struct net_device *dev, unsigned int stid, - __be32 sip, __be16 sport, unsigned int queue) + __be32 sip, __be16 sport, __be16 vlan, + unsigned int queue) { unsigned int chan; struct sk_buff *skb; @@ -2750,6 +3017,7 @@ static void uld_attach(struct adapter *adap, unsigned int uld) { void *handle; struct cxgb4_lld_info lli; + unsigned short i; lli.pdev = adap->pdev; lli.l2t = adap->l2t; @@ -2776,10 +3044,16 @@ static void uld_attach(struct adapter *adap, unsigned int uld) lli.ucq_density = 1 << QUEUESPERPAGEPF0_GET( t4_read_reg(adap, SGE_INGRESS_QUEUES_PER_PAGE_PF) >> (adap->fn * 4)); + lli.filt_mode = adap->filter_mode; + /* MODQ_REQ_MAP sets queues 0-3 to chan 0-3 */ + for (i = 0; i < NCHAN; i++) + lli.tx_modq[i] = i; lli.gts_reg = adap->regs + MYPF_REG(SGE_PF_GTS); lli.db_reg = adap->regs + MYPF_REG(SGE_PF_KDOORBELL); lli.fw_vers = adap->params.fw_vers; lli.dbfifo_int_thresh = dbfifo_int_thresh; + lli.sge_pktshift = adap->sge.pktshift; + lli.enable_fw_ofld_conn = adap->flags & FW_OFLD_CONN; handle = ulds[uld].add(&lli); if (IS_ERR(handle)) { @@ -2999,6 +3273,126 @@ static int cxgb_close(struct net_device *dev) return t4_enable_vi(adapter, adapter->fn, pi->viid, false, false); } +/* Return an error number if the indicated filter isn't writable ... + */ +static int writable_filter(struct filter_entry *f) +{ + if (f->locked) + return -EPERM; + if (f->pending) + return -EBUSY; + + return 0; +} + +/* Delete the filter at the specified index (if valid). The checks for all + * the common problems with doing this like the filter being locked, currently + * pending in another operation, etc. + */ +static int delete_filter(struct adapter *adapter, unsigned int fidx) +{ + struct filter_entry *f; + int ret; + + if (fidx >= adapter->tids.nftids + adapter->tids.nsftids) + return -EINVAL; + + f = &adapter->tids.ftid_tab[fidx]; + ret = writable_filter(f); + if (ret) + return ret; + if (f->valid) + return del_filter_wr(adapter, fidx); + + return 0; +} + +int cxgb4_create_server_filter(const struct net_device *dev, unsigned int stid, + __be32 sip, __be16 sport, __be16 vlan, + unsigned int queue, unsigned char port, unsigned char mask) +{ + int ret; + struct filter_entry *f; + struct adapter *adap; + int i; + u8 *val; + + adap = netdev2adap(dev); + + /* Adjust stid to correct filter index */ + stid -= adap->tids.nstids; + stid += adap->tids.nftids; + + /* Check to make sure the filter requested is writable ... + */ + f = &adap->tids.ftid_tab[stid]; + ret = writable_filter(f); + if (ret) + return ret; + + /* Clear out any old resources being used by the filter before + * we start constructing the new filter. + */ + if (f->valid) + clear_filter(adap, f); + + /* Clear out filter specifications */ + memset(&f->fs, 0, sizeof(struct ch_filter_specification)); + f->fs.val.lport = cpu_to_be16(sport); + f->fs.mask.lport = ~0; + val = (u8 *)&sip; + if ((val[0] | val[1] | val[2] | val[3]) != 0) { + for (i = 0; i < 4; i++) { + f->fs.val.lip[i] = val[i]; + f->fs.mask.lip[i] = ~0; + } + if (adap->filter_mode & F_PORT) { + f->fs.val.iport = port; + f->fs.mask.iport = mask; + } + } + + f->fs.dirsteer = 1; + f->fs.iq = queue; + /* Mark filter as locked */ + f->locked = 1; + f->fs.rpttid = 1; + + ret = set_filter_wr(adap, stid); + if (ret) { + clear_filter(adap, f); + return ret; + } + + return 0; +} +EXPORT_SYMBOL(cxgb4_create_server_filter); + +int cxgb4_remove_server_filter(const struct net_device *dev, unsigned int stid, + unsigned int queue, bool ipv6) +{ + int ret; + struct filter_entry *f; + struct adapter *adap; + + adap = netdev2adap(dev); + + /* Adjust stid to correct filter index */ + stid -= adap->tids.nstids; + stid += adap->tids.nftids; + + f = &adap->tids.ftid_tab[stid]; + /* Unlock the filter */ + f->locked = 0; + + ret = delete_filter(adap, stid); + if (ret) + return ret; + + return 0; +} +EXPORT_SYMBOL(cxgb4_remove_server_filter); + static struct rtnl_link_stats64 *cxgb_get_stats(struct net_device *dev, struct rtnl_link_stats64 *ns) { @@ -3245,6 +3639,34 @@ static int adap_init1(struct adapter *adap, struct fw_caps_config_cmd *c) v = t4_read_reg(adap, TP_PIO_DATA); t4_write_reg(adap, TP_PIO_DATA, v & ~CSUM_HAS_PSEUDO_HDR); + /* first 4 Tx modulation queues point to consecutive Tx channels */ + adap->params.tp.tx_modq_map = 0xE4; + t4_write_reg(adap, A_TP_TX_MOD_QUEUE_REQ_MAP, + V_TX_MOD_QUEUE_REQ_MAP(adap->params.tp.tx_modq_map)); + + /* associate each Tx modulation queue with consecutive Tx channels */ + v = 0x84218421; + t4_write_indirect(adap, TP_PIO_ADDR, TP_PIO_DATA, + &v, 1, A_TP_TX_SCHED_HDR); + t4_write_indirect(adap, TP_PIO_ADDR, TP_PIO_DATA, + &v, 1, A_TP_TX_SCHED_FIFO); + t4_write_indirect(adap, TP_PIO_ADDR, TP_PIO_DATA, + &v, 1, A_TP_TX_SCHED_PCMD); + +#define T4_TX_MODQ_10G_WEIGHT_DEFAULT 16 /* in KB units */ + if (is_offload(adap)) { + t4_write_reg(adap, A_TP_TX_MOD_QUEUE_WEIGHT0, + V_TX_MODQ_WEIGHT0(T4_TX_MODQ_10G_WEIGHT_DEFAULT) | + V_TX_MODQ_WEIGHT1(T4_TX_MODQ_10G_WEIGHT_DEFAULT) | + V_TX_MODQ_WEIGHT2(T4_TX_MODQ_10G_WEIGHT_DEFAULT) | + V_TX_MODQ_WEIGHT3(T4_TX_MODQ_10G_WEIGHT_DEFAULT)); + t4_write_reg(adap, A_TP_TX_MOD_CHANNEL_WEIGHT, + V_TX_MODQ_WEIGHT0(T4_TX_MODQ_10G_WEIGHT_DEFAULT) | + V_TX_MODQ_WEIGHT1(T4_TX_MODQ_10G_WEIGHT_DEFAULT) | + V_TX_MODQ_WEIGHT2(T4_TX_MODQ_10G_WEIGHT_DEFAULT) | + V_TX_MODQ_WEIGHT3(T4_TX_MODQ_10G_WEIGHT_DEFAULT)); + } + /* get basic stuff going */ return t4_early_init(adap, adap->fn); } @@ -4035,6 +4457,10 @@ static int adap_init0(struct adapter *adap) for (j = 0; j < NCHAN; j++) adap->params.tp.tx_modq[j] = j; + t4_read_indirect(adap, TP_PIO_ADDR, TP_PIO_DATA, + &adap->filter_mode, 1, + TP_VLAN_PRI_MAP); + adap->flags |= FW_OK; return 0; @@ -4661,6 +5087,17 @@ static void remove_one(struct pci_dev *pdev) if (adapter->debugfs_root) debugfs_remove_recursive(adapter->debugfs_root); + /* If we allocated filters, free up state associated with any + * valid filters ... + */ + if (adapter->tids.ftid_tab) { + struct filter_entry *f = &adapter->tids.ftid_tab[0]; + for (i = 0; i < (adapter->tids.nftids + + adapter->tids.nsftids); i++, f++) + if (f->valid) + clear_filter(adapter, f); + } + if (adapter->flags & FULL_INIT_DONE) cxgb_down(adapter); diff --git a/drivers/net/ethernet/chelsio/cxgb4/cxgb4_uld.h b/drivers/net/ethernet/chelsio/cxgb4/cxgb4_uld.h index 39bec73..e2bbc7f3e 100644 --- a/drivers/net/ethernet/chelsio/cxgb4/cxgb4_uld.h +++ b/drivers/net/ethernet/chelsio/cxgb4/cxgb4_uld.h @@ -38,6 +38,7 @@ #include #include #include +#include #include /* CPL message priority levels */ @@ -97,7 +98,9 @@ struct tid_info { union aopen_entry *atid_tab; unsigned int natids; + unsigned int atid_base; + struct filter_entry *ftid_tab; unsigned int nftids; unsigned int ftid_base; unsigned int aftid_base; @@ -129,7 +132,7 @@ static inline void *lookup_atid(const struct tid_info *t, unsigned int atid) static inline void *lookup_stid(const struct tid_info *t, unsigned int stid) { stid -= t->stid_base; - return stid < t->nstids ? t->stid_tab[stid].data : NULL; + return stid < (t->nstids + t->nsftids) ? t->stid_tab[stid].data : NULL; } static inline void cxgb4_insert_tid(struct tid_info *t, void *data, @@ -141,6 +144,7 @@ static inline void cxgb4_insert_tid(struct tid_info *t, void *data, int cxgb4_alloc_atid(struct tid_info *t, void *data); int cxgb4_alloc_stid(struct tid_info *t, int family, void *data); +int cxgb4_alloc_sftid(struct tid_info *t, int family, void *data); void cxgb4_free_atid(struct tid_info *t, unsigned int atid); void cxgb4_free_stid(struct tid_info *t, unsigned int stid, int family); void cxgb4_remove_tid(struct tid_info *t, unsigned int qid, unsigned int tid); @@ -148,8 +152,14 @@ void cxgb4_remove_tid(struct tid_info *t, unsigned int qid, unsigned int tid); struct in6_addr; int cxgb4_create_server(const struct net_device *dev, unsigned int stid, - __be32 sip, __be16 sport, unsigned int queue); - + __be32 sip, __be16 sport, __be16 vlan, + unsigned int queue); +int cxgb4_create_server_filter(const struct net_device *dev, unsigned int stid, + __be32 sip, __be16 sport, __be16 vlan, + unsigned int queue, + unsigned char port, unsigned char mask); +int cxgb4_remove_server_filter(const struct net_device *dev, unsigned int stid, + unsigned int queue, bool ipv6); static inline void set_wr_txq(struct sk_buff *skb, int prio, int queue) { skb_set_queue_mapping(skb, (queue << 1) | prio); @@ -221,9 +231,16 @@ struct cxgb4_lld_info { unsigned int iscsi_iolen; /* iSCSI max I/O length */ unsigned short udb_density; /* # of user DB/page */ unsigned short ucq_density; /* # of user CQs/page */ + unsigned short filt_mode; /* filter optional components */ + unsigned short tx_modq[NCHAN]; /* maps each tx channel to a */ + /* scheduler queue */ void __iomem *gts_reg; /* address of GTS register */ void __iomem *db_reg; /* address of kernel doorbell */ int dbfifo_int_thresh; /* doorbell fifo int threshold */ + unsigned int sge_pktshift; /* Padding between CPL and */ + /* packet data */ + bool enable_fw_ofld_conn; /* Enable connection through fw */ + /* WR */ }; struct cxgb4_uld_info { diff --git a/drivers/net/ethernet/chelsio/cxgb4/l2t.c b/drivers/net/ethernet/chelsio/cxgb4/l2t.c index 6ac77a6..2987809 100644 --- a/drivers/net/ethernet/chelsio/cxgb4/l2t.c +++ b/drivers/net/ethernet/chelsio/cxgb4/l2t.c @@ -484,6 +484,38 @@ void t4_l2t_update(struct adapter *adap, struct neighbour *neigh) handle_failed_resolution(adap, arpq); } +/* Allocate an L2T entry for use by a switching rule. Such need to be + * explicitly freed and while busy they are not on any hash chain, so normal + * address resolution updates do not see them. + */ +struct l2t_entry *t4_l2t_alloc_switching(struct l2t_data *d) +{ + struct l2t_entry *e; + + write_lock_bh(&d->lock); + e = alloc_l2e(d); + if (e) { + spin_lock(&e->lock); /* avoid race with t4_l2t_free */ + e->state = L2T_STATE_SWITCHING; + atomic_set(&e->refcnt, 1); + spin_unlock(&e->lock); + } + write_unlock_bh(&d->lock); + return e; +} + +/* Sets/updates the contents of a switching L2T entry that has been allocated + * with an earlier call to @t4_l2t_alloc_switching. + */ +int t4_l2t_set_switching(struct adapter *adap, struct l2t_entry *e, u16 vlan, + u8 port, u8 *eth_addr) +{ + e->vlan = vlan; + e->lport = port; + memcpy(e->dmac, eth_addr, ETH_ALEN); + return write_l2e(adap, e, 0); +} + struct l2t_data *t4_init_l2t(void) { int i; diff --git a/drivers/net/ethernet/chelsio/cxgb4/l2t.h b/drivers/net/ethernet/chelsio/cxgb4/l2t.h index 02b31d0..108c0f1 100644 --- a/drivers/net/ethernet/chelsio/cxgb4/l2t.h +++ b/drivers/net/ethernet/chelsio/cxgb4/l2t.h @@ -100,6 +100,9 @@ struct l2t_entry *cxgb4_l2t_get(struct l2t_data *d, struct neighbour *neigh, unsigned int priority); void t4_l2t_update(struct adapter *adap, struct neighbour *neigh); +struct l2t_entry *t4_l2t_alloc_switching(struct l2t_data *d); +int t4_l2t_set_switching(struct adapter *adap, struct l2t_entry *e, u16 vlan, + u8 port, u8 *eth_addr); struct l2t_data *t4_init_l2t(void); void do_l2t_write_rpl(struct adapter *p, const struct cpl_l2t_write_rpl *rpl); diff --git a/drivers/net/ethernet/chelsio/cxgb4/t4_hw.c b/drivers/net/ethernet/chelsio/cxgb4/t4_hw.c index 8d9c754..22f3af5 100644 --- a/drivers/net/ethernet/chelsio/cxgb4/t4_hw.c +++ b/drivers/net/ethernet/chelsio/cxgb4/t4_hw.c @@ -109,7 +109,7 @@ void t4_set_reg_field(struct adapter *adapter, unsigned int addr, u32 mask, * Reads registers that are accessed indirectly through an address/data * register pair. */ -static void t4_read_indirect(struct adapter *adap, unsigned int addr_reg, +void t4_read_indirect(struct adapter *adap, unsigned int addr_reg, unsigned int data_reg, u32 *vals, unsigned int nregs, unsigned int start_idx) { @@ -2268,6 +2268,26 @@ int t4_wol_pat_enable(struct adapter *adap, unsigned int port, unsigned int map, return 0; } +/* t4_mk_filtdelwr - create a delete filter WR + * @ftid: the filter ID + * @wr: the filter work request to populate + * @qid: ingress queue to receive the delete notification + * + * Creates a filter work request to delete the supplied filter. If @qid is + * negative the delete notification is suppressed. + */ +void t4_mk_filtdelwr(unsigned int ftid, struct fw_filter_wr *wr, int qid) +{ + memset(wr, 0, sizeof(*wr)); + wr->op_pkd = htonl(FW_WR_OP(FW_FILTER_WR)); + wr->len16_pkd = htonl(FW_WR_LEN16(sizeof(*wr) / 16)); + wr->tid_to_iq = htonl(V_FW_FILTER_WR_TID(ftid) | + V_FW_FILTER_WR_NOREPLY(qid < 0)); + wr->del_filter_to_l2tix = htonl(F_FW_FILTER_WR_DEL_FILTER); + if (qid >= 0) + wr->rx_chan_rx_rpl_iq = htons(V_FW_FILTER_WR_RX_RPL_IQ(qid)); +} + #define INIT_CMD(var, cmd, rd_wr) do { \ (var).op_to_write = htonl(FW_CMD_OP(FW_##cmd##_CMD) | \ FW_CMD_REQUEST | FW_CMD_##rd_wr); \ diff --git a/drivers/net/ethernet/chelsio/cxgb4/t4_msg.h b/drivers/net/ethernet/chelsio/cxgb4/t4_msg.h index b760808..261d177 100644 --- a/drivers/net/ethernet/chelsio/cxgb4/t4_msg.h +++ b/drivers/net/ethernet/chelsio/cxgb4/t4_msg.h @@ -193,8 +193,24 @@ struct work_request_hdr { __be64 wr_lo; }; +/* wr_hi fields */ +#define S_WR_OP 24 +#define V_WR_OP(x) ((__u64)(x) << S_WR_OP) + #define WR_HDR struct work_request_hdr wr +/* option 0 fields */ +#define S_MSS_IDX 60 +#define M_MSS_IDX 0xF +#define V_MSS_IDX(x) ((__u64)(x) << S_MSS_IDX) +#define G_MSS_IDX(x) (((x) >> S_MSS_IDX) & M_MSS_IDX) + +/* option 2 fields */ +#define S_RSS_QUEUE 0 +#define M_RSS_QUEUE 0x3FF +#define V_RSS_QUEUE(x) ((x) << S_RSS_QUEUE) +#define G_RSS_QUEUE(x) (((x) >> S_RSS_QUEUE) & M_RSS_QUEUE) + struct cpl_pass_open_req { WR_HDR; union opcode_tid ot; @@ -204,12 +220,14 @@ struct cpl_pass_open_req { __be32 peer_ip; __be64 opt0; #define TX_CHAN(x) ((x) << 2) +#define NO_CONG(x) ((x) << 4) #define DELACK(x) ((x) << 5) #define ULP_MODE(x) ((x) << 8) #define RCV_BUFSIZ(x) ((x) << 12) #define DSCP(x) ((x) << 22) #define SMAC_SEL(x) ((u64)(x) << 28) #define L2T_IDX(x) ((u64)(x) << 36) +#define TCAM_BYPASS(x) ((u64)(x) << 48) #define NAGLE(x) ((u64)(x) << 49) #define WND_SCALE(x) ((u64)(x) << 50) #define KEEP_ALIVE(x) ((u64)(x) << 54) @@ -247,8 +265,10 @@ struct cpl_pass_accept_rpl { #define RSS_QUEUE_VALID (1 << 10) #define RX_COALESCE_VALID(x) ((x) << 11) #define RX_COALESCE(x) ((x) << 12) +#define PACE(x) ((x) << 16) #define TX_QUEUE(x) ((x) << 23) #define RX_CHANNEL(x) ((x) << 26) +#define CCTRL_ECN(x) ((x) << 27) #define WND_SCALE_EN(x) ((x) << 28) #define TSTAMPS_EN(x) ((x) << 29) #define SACK_EN(x) ((x) << 30) @@ -292,6 +312,9 @@ struct cpl_pass_establish { union opcode_tid ot; __be32 rsvd; __be32 tos_stid; +#define PASS_OPEN_TID(x) ((x) << 0) +#define PASS_OPEN_TOS(x) ((x) << 24) +#define GET_PASS_OPEN_TID(x) (((x) >> 0) & 0xFFFFFF) #define GET_POPEN_TID(x) ((x) & 0xffffff) #define GET_POPEN_TOS(x) (((x) >> 24) & 0xff) __be16 mac_idx; @@ -332,6 +355,7 @@ struct cpl_set_tcb_field { __be16 word_cookie; #define TCB_WORD(x) ((x) << 0) #define TCB_COOKIE(x) ((x) << 5) +#define GET_TCB_COOKIE(x) (((x) >> 5) & 7) __be64 mask; __be64 val; }; @@ -536,6 +560,37 @@ struct cpl_rx_pkt { __be16 err_vec; }; +/* rx_pkt.l2info fields */ +#define S_RX_ETHHDR_LEN 0 +#define M_RX_ETHHDR_LEN 0x1F +#define V_RX_ETHHDR_LEN(x) ((x) << S_RX_ETHHDR_LEN) +#define G_RX_ETHHDR_LEN(x) (((x) >> S_RX_ETHHDR_LEN) & M_RX_ETHHDR_LEN) + +#define S_RX_MACIDX 8 +#define M_RX_MACIDX 0x1FF +#define V_RX_MACIDX(x) ((x) << S_RX_MACIDX) +#define G_RX_MACIDX(x) (((x) >> S_RX_MACIDX) & M_RX_MACIDX) + +#define S_RXF_SYN 21 +#define V_RXF_SYN(x) ((x) << S_RXF_SYN) +#define F_RXF_SYN V_RXF_SYN(1U) + +#define S_RX_CHAN 28 +#define M_RX_CHAN 0xF +#define V_RX_CHAN(x) ((x) << S_RX_CHAN) +#define G_RX_CHAN(x) (((x) >> S_RX_CHAN) & M_RX_CHAN) + +/* rx_pkt.hdr_len fields */ +#define S_RX_TCPHDR_LEN 0 +#define M_RX_TCPHDR_LEN 0x3F +#define V_RX_TCPHDR_LEN(x) ((x) << S_RX_TCPHDR_LEN) +#define G_RX_TCPHDR_LEN(x) (((x) >> S_RX_TCPHDR_LEN) & M_RX_TCPHDR_LEN) + +#define S_RX_IPHDR_LEN 6 +#define M_RX_IPHDR_LEN 0x3FF +#define V_RX_IPHDR_LEN(x) ((x) << S_RX_IPHDR_LEN) +#define G_RX_IPHDR_LEN(x) (((x) >> S_RX_IPHDR_LEN) & M_RX_IPHDR_LEN) + struct cpl_trace_pkt { u8 opcode; u8 intf; @@ -634,6 +689,17 @@ struct cpl_fw6_msg { /* cpl_fw6_msg.type values */ enum { FW6_TYPE_CMD_RPL = 0, + FW6_TYPE_WR_RPL = 1, + FW6_TYPE_CQE = 2, + FW6_TYPE_OFLD_CONNECTION_WR_RPL = 3, +}; + +struct cpl_fw6_msg_ofld_connection_wr_rpl { + __u64 cookie; + __be32 tid; /* or atid in case of active failure */ + __u8 t_state; + __u8 retval; + __u8 rsvd[2]; }; enum { diff --git a/drivers/net/ethernet/chelsio/cxgb4/t4_regs.h b/drivers/net/ethernet/chelsio/cxgb4/t4_regs.h index 75393f5..83ec5f7 100644 --- a/drivers/net/ethernet/chelsio/cxgb4/t4_regs.h +++ b/drivers/net/ethernet/chelsio/cxgb4/t4_regs.h @@ -1064,4 +1064,41 @@ #define ADDRESS(x) ((x) << ADDRESS_SHIFT) #define XGMAC_PORT_INT_CAUSE 0x10dc + +#define A_TP_TX_MOD_QUEUE_REQ_MAP 0x7e28 + +#define A_TP_TX_MOD_CHANNEL_WEIGHT 0x7e34 + +#define S_TX_MOD_QUEUE_REQ_MAP 0 +#define M_TX_MOD_QUEUE_REQ_MAP 0xffffU +#define V_TX_MOD_QUEUE_REQ_MAP(x) ((x) << S_TX_MOD_QUEUE_REQ_MAP) + +#define A_TP_TX_MOD_QUEUE_WEIGHT0 0x7e30 + +#define S_TX_MODQ_WEIGHT3 24 +#define M_TX_MODQ_WEIGHT3 0xffU +#define V_TX_MODQ_WEIGHT3(x) ((x) << S_TX_MODQ_WEIGHT3) + +#define S_TX_MODQ_WEIGHT2 16 +#define M_TX_MODQ_WEIGHT2 0xffU +#define V_TX_MODQ_WEIGHT2(x) ((x) << S_TX_MODQ_WEIGHT2) + +#define S_TX_MODQ_WEIGHT1 8 +#define M_TX_MODQ_WEIGHT1 0xffU +#define V_TX_MODQ_WEIGHT1(x) ((x) << S_TX_MODQ_WEIGHT1) + +#define S_TX_MODQ_WEIGHT0 0 +#define M_TX_MODQ_WEIGHT0 0xffU +#define V_TX_MODQ_WEIGHT0(x) ((x) << S_TX_MODQ_WEIGHT0) + +#define A_TP_TX_SCHED_HDR 0x23 + +#define A_TP_TX_SCHED_FIFO 0x24 + +#define A_TP_TX_SCHED_PCMD 0x25 + +#define S_PORT 1 +#define V_PORT(x) ((x) << S_PORT) +#define F_PORT V_PORT(1U) + #endif /* __T4_REGS_H */ diff --git a/drivers/net/ethernet/chelsio/cxgb4/t4fw_api.h b/drivers/net/ethernet/chelsio/cxgb4/t4fw_api.h index 0abc864..a0dcccd 100644 --- a/drivers/net/ethernet/chelsio/cxgb4/t4fw_api.h +++ b/drivers/net/ethernet/chelsio/cxgb4/t4fw_api.h @@ -35,6 +35,45 @@ #ifndef _T4FW_INTERFACE_H_ #define _T4FW_INTERFACE_H_ +enum fw_retval { + FW_SUCCESS = 0, /* completed sucessfully */ + FW_EPERM = 1, /* operation not permitted */ + FW_ENOENT = 2, /* no such file or directory */ + FW_EIO = 5, /* input/output error; hw bad */ + FW_ENOEXEC = 8, /* exec format error; inv microcode */ + FW_EAGAIN = 11, /* try again */ + FW_ENOMEM = 12, /* out of memory */ + FW_EFAULT = 14, /* bad address; fw bad */ + FW_EBUSY = 16, /* resource busy */ + FW_EEXIST = 17, /* file exists */ + FW_EINVAL = 22, /* invalid argument */ + FW_ENOSPC = 28, /* no space left on device */ + FW_ENOSYS = 38, /* functionality not implemented */ + FW_EPROTO = 71, /* protocol error */ + FW_EADDRINUSE = 98, /* address already in use */ + FW_EADDRNOTAVAIL = 99, /* cannot assigned requested address */ + FW_ENETDOWN = 100, /* network is down */ + FW_ENETUNREACH = 101, /* network is unreachable */ + FW_ENOBUFS = 105, /* no buffer space available */ + FW_ETIMEDOUT = 110, /* timeout */ + FW_EINPROGRESS = 115, /* fw internal */ + FW_SCSI_ABORT_REQUESTED = 128, /* */ + FW_SCSI_ABORT_TIMEDOUT = 129, /* */ + FW_SCSI_ABORTED = 130, /* */ + FW_SCSI_CLOSE_REQUESTED = 131, /* */ + FW_ERR_LINK_DOWN = 132, /* */ + FW_RDEV_NOT_READY = 133, /* */ + FW_ERR_RDEV_LOST = 134, /* */ + FW_ERR_RDEV_LOGO = 135, /* */ + FW_FCOE_NO_XCHG = 136, /* */ + FW_SCSI_RSP_ERR = 137, /* */ + FW_ERR_RDEV_IMPL_LOGO = 138, /* */ + FW_SCSI_UNDER_FLOW_ERR = 139, /* */ + FW_SCSI_OVER_FLOW_ERR = 140, /* */ + FW_SCSI_DDP_ERR = 141, /* DDP error*/ + FW_SCSI_TASK_ERR = 142, /* No SCSI tasks available */ +}; + #define FW_T4VF_SGE_BASE_ADDR 0x0000 #define FW_T4VF_MPS_BASE_ADDR 0x0100 #define FW_T4VF_PL_BASE_ADDR 0x0200 @@ -46,6 +85,7 @@ enum fw_wr_opcodes { FW_ULPTX_WR = 0x04, FW_TP_WR = 0x05, FW_ETH_TX_PKT_WR = 0x08, + FW_OFLD_CONNECTION_WR = 0x2f, FW_FLOWC_WR = 0x0a, FW_OFLD_TX_DATA_WR = 0x0b, FW_CMD_WR = 0x10, @@ -81,6 +121,282 @@ struct fw_wr_hdr { #define FW_WR_LEN16(x) ((x) << 0) #define HW_TPL_FR_MT_PR_IV_P_FC 0X32B +#define HW_TPL_FR_MT_PR_OV_P_FC 0X327 + +/* filter wr reply code in cookie in CPL_SET_TCB_RPL */ +enum fw_filter_wr_cookie { + FW_FILTER_WR_SUCCESS, + FW_FILTER_WR_FLT_ADDED, + FW_FILTER_WR_FLT_DELETED, + FW_FILTER_WR_SMT_TBL_FULL, + FW_FILTER_WR_EINVAL, +}; + +struct fw_filter_wr { + __be32 op_pkd; + __be32 len16_pkd; + __be64 r3; + __be32 tid_to_iq; + __be32 del_filter_to_l2tix; + __be16 ethtype; + __be16 ethtypem; + __u8 frag_to_ovlan_vldm; + __u8 smac_sel; + __be16 rx_chan_rx_rpl_iq; + __be32 maci_to_matchtypem; + __u8 ptcl; + __u8 ptclm; + __u8 ttyp; + __u8 ttypm; + __be16 ivlan; + __be16 ivlanm; + __be16 ovlan; + __be16 ovlanm; + __u8 lip[16]; + __u8 lipm[16]; + __u8 fip[16]; + __u8 fipm[16]; + __be16 lp; + __be16 lpm; + __be16 fp; + __be16 fpm; + __be16 r7; + __u8 sma[6]; +}; + +#define S_FW_FILTER_WR_TID 12 +#define M_FW_FILTER_WR_TID 0xfffff +#define V_FW_FILTER_WR_TID(x) ((x) << S_FW_FILTER_WR_TID) +#define G_FW_FILTER_WR_TID(x) \ + (((x) >> S_FW_FILTER_WR_TID) & M_FW_FILTER_WR_TID) + +#define S_FW_FILTER_WR_RQTYPE 11 +#define M_FW_FILTER_WR_RQTYPE 0x1 +#define V_FW_FILTER_WR_RQTYPE(x) ((x) << S_FW_FILTER_WR_RQTYPE) +#define G_FW_FILTER_WR_RQTYPE(x) \ + (((x) >> S_FW_FILTER_WR_RQTYPE) & M_FW_FILTER_WR_RQTYPE) +#define F_FW_FILTER_WR_RQTYPE V_FW_FILTER_WR_RQTYPE(1U) + +#define S_FW_FILTER_WR_NOREPLY 10 +#define M_FW_FILTER_WR_NOREPLY 0x1 +#define V_FW_FILTER_WR_NOREPLY(x) ((x) << S_FW_FILTER_WR_NOREPLY) +#define G_FW_FILTER_WR_NOREPLY(x) \ + (((x) >> S_FW_FILTER_WR_NOREPLY) & M_FW_FILTER_WR_NOREPLY) +#define F_FW_FILTER_WR_NOREPLY V_FW_FILTER_WR_NOREPLY(1U) + +#define S_FW_FILTER_WR_IQ 0 +#define M_FW_FILTER_WR_IQ 0x3ff +#define V_FW_FILTER_WR_IQ(x) ((x) << S_FW_FILTER_WR_IQ) +#define G_FW_FILTER_WR_IQ(x) \ + (((x) >> S_FW_FILTER_WR_IQ) & M_FW_FILTER_WR_IQ) + +#define S_FW_FILTER_WR_DEL_FILTER 31 +#define M_FW_FILTER_WR_DEL_FILTER 0x1 +#define V_FW_FILTER_WR_DEL_FILTER(x) ((x) << S_FW_FILTER_WR_DEL_FILTER) +#define G_FW_FILTER_WR_DEL_FILTER(x) \ + (((x) >> S_FW_FILTER_WR_DEL_FILTER) & M_FW_FILTER_WR_DEL_FILTER) +#define F_FW_FILTER_WR_DEL_FILTER V_FW_FILTER_WR_DEL_FILTER(1U) + +#define S_FW_FILTER_WR_RPTTID 25 +#define M_FW_FILTER_WR_RPTTID 0x1 +#define V_FW_FILTER_WR_RPTTID(x) ((x) << S_FW_FILTER_WR_RPTTID) +#define G_FW_FILTER_WR_RPTTID(x) \ + (((x) >> S_FW_FILTER_WR_RPTTID) & M_FW_FILTER_WR_RPTTID) +#define F_FW_FILTER_WR_RPTTID V_FW_FILTER_WR_RPTTID(1U) + +#define S_FW_FILTER_WR_DROP 24 +#define M_FW_FILTER_WR_DROP 0x1 +#define V_FW_FILTER_WR_DROP(x) ((x) << S_FW_FILTER_WR_DROP) +#define G_FW_FILTER_WR_DROP(x) \ + (((x) >> S_FW_FILTER_WR_DROP) & M_FW_FILTER_WR_DROP) +#define F_FW_FILTER_WR_DROP V_FW_FILTER_WR_DROP(1U) + +#define S_FW_FILTER_WR_DIRSTEER 23 +#define M_FW_FILTER_WR_DIRSTEER 0x1 +#define V_FW_FILTER_WR_DIRSTEER(x) ((x) << S_FW_FILTER_WR_DIRSTEER) +#define G_FW_FILTER_WR_DIRSTEER(x) \ + (((x) >> S_FW_FILTER_WR_DIRSTEER) & M_FW_FILTER_WR_DIRSTEER) +#define F_FW_FILTER_WR_DIRSTEER V_FW_FILTER_WR_DIRSTEER(1U) + +#define S_FW_FILTER_WR_MASKHASH 22 +#define M_FW_FILTER_WR_MASKHASH 0x1 +#define V_FW_FILTER_WR_MASKHASH(x) ((x) << S_FW_FILTER_WR_MASKHASH) +#define G_FW_FILTER_WR_MASKHASH(x) \ + (((x) >> S_FW_FILTER_WR_MASKHASH) & M_FW_FILTER_WR_MASKHASH) +#define F_FW_FILTER_WR_MASKHASH V_FW_FILTER_WR_MASKHASH(1U) + +#define S_FW_FILTER_WR_DIRSTEERHASH 21 +#define M_FW_FILTER_WR_DIRSTEERHASH 0x1 +#define V_FW_FILTER_WR_DIRSTEERHASH(x) ((x) << S_FW_FILTER_WR_DIRSTEERHASH) +#define G_FW_FILTER_WR_DIRSTEERHASH(x) \ + (((x) >> S_FW_FILTER_WR_DIRSTEERHASH) & M_FW_FILTER_WR_DIRSTEERHASH) +#define F_FW_FILTER_WR_DIRSTEERHASH V_FW_FILTER_WR_DIRSTEERHASH(1U) + +#define S_FW_FILTER_WR_LPBK 20 +#define M_FW_FILTER_WR_LPBK 0x1 +#define V_FW_FILTER_WR_LPBK(x) ((x) << S_FW_FILTER_WR_LPBK) +#define G_FW_FILTER_WR_LPBK(x) \ + (((x) >> S_FW_FILTER_WR_LPBK) & M_FW_FILTER_WR_LPBK) +#define F_FW_FILTER_WR_LPBK V_FW_FILTER_WR_LPBK(1U) + +#define S_FW_FILTER_WR_DMAC 19 +#define M_FW_FILTER_WR_DMAC 0x1 +#define V_FW_FILTER_WR_DMAC(x) ((x) << S_FW_FILTER_WR_DMAC) +#define G_FW_FILTER_WR_DMAC(x) \ + (((x) >> S_FW_FILTER_WR_DMAC) & M_FW_FILTER_WR_DMAC) +#define F_FW_FILTER_WR_DMAC V_FW_FILTER_WR_DMAC(1U) + +#define S_FW_FILTER_WR_SMAC 18 +#define M_FW_FILTER_WR_SMAC 0x1 +#define V_FW_FILTER_WR_SMAC(x) ((x) << S_FW_FILTER_WR_SMAC) +#define G_FW_FILTER_WR_SMAC(x) \ + (((x) >> S_FW_FILTER_WR_SMAC) & M_FW_FILTER_WR_SMAC) +#define F_FW_FILTER_WR_SMAC V_FW_FILTER_WR_SMAC(1U) + +#define S_FW_FILTER_WR_INSVLAN 17 +#define M_FW_FILTER_WR_INSVLAN 0x1 +#define V_FW_FILTER_WR_INSVLAN(x) ((x) << S_FW_FILTER_WR_INSVLAN) +#define G_FW_FILTER_WR_INSVLAN(x) \ + (((x) >> S_FW_FILTER_WR_INSVLAN) & M_FW_FILTER_WR_INSVLAN) +#define F_FW_FILTER_WR_INSVLAN V_FW_FILTER_WR_INSVLAN(1U) + +#define S_FW_FILTER_WR_RMVLAN 16 +#define M_FW_FILTER_WR_RMVLAN 0x1 +#define V_FW_FILTER_WR_RMVLAN(x) ((x) << S_FW_FILTER_WR_RMVLAN) +#define G_FW_FILTER_WR_RMVLAN(x) \ + (((x) >> S_FW_FILTER_WR_RMVLAN) & M_FW_FILTER_WR_RMVLAN) +#define F_FW_FILTER_WR_RMVLAN V_FW_FILTER_WR_RMVLAN(1U) + +#define S_FW_FILTER_WR_HITCNTS 15 +#define M_FW_FILTER_WR_HITCNTS 0x1 +#define V_FW_FILTER_WR_HITCNTS(x) ((x) << S_FW_FILTER_WR_HITCNTS) +#define G_FW_FILTER_WR_HITCNTS(x) \ + (((x) >> S_FW_FILTER_WR_HITCNTS) & M_FW_FILTER_WR_HITCNTS) +#define F_FW_FILTER_WR_HITCNTS V_FW_FILTER_WR_HITCNTS(1U) + +#define S_FW_FILTER_WR_TXCHAN 13 +#define M_FW_FILTER_WR_TXCHAN 0x3 +#define V_FW_FILTER_WR_TXCHAN(x) ((x) << S_FW_FILTER_WR_TXCHAN) +#define G_FW_FILTER_WR_TXCHAN(x) \ + (((x) >> S_FW_FILTER_WR_TXCHAN) & M_FW_FILTER_WR_TXCHAN) + +#define S_FW_FILTER_WR_PRIO 12 +#define M_FW_FILTER_WR_PRIO 0x1 +#define V_FW_FILTER_WR_PRIO(x) ((x) << S_FW_FILTER_WR_PRIO) +#define G_FW_FILTER_WR_PRIO(x) \ + (((x) >> S_FW_FILTER_WR_PRIO) & M_FW_FILTER_WR_PRIO) +#define F_FW_FILTER_WR_PRIO V_FW_FILTER_WR_PRIO(1U) + +#define S_FW_FILTER_WR_L2TIX 0 +#define M_FW_FILTER_WR_L2TIX 0xfff +#define V_FW_FILTER_WR_L2TIX(x) ((x) << S_FW_FILTER_WR_L2TIX) +#define G_FW_FILTER_WR_L2TIX(x) \ + (((x) >> S_FW_FILTER_WR_L2TIX) & M_FW_FILTER_WR_L2TIX) + +#define S_FW_FILTER_WR_FRAG 7 +#define M_FW_FILTER_WR_FRAG 0x1 +#define V_FW_FILTER_WR_FRAG(x) ((x) << S_FW_FILTER_WR_FRAG) +#define G_FW_FILTER_WR_FRAG(x) \ + (((x) >> S_FW_FILTER_WR_FRAG) & M_FW_FILTER_WR_FRAG) +#define F_FW_FILTER_WR_FRAG V_FW_FILTER_WR_FRAG(1U) + +#define S_FW_FILTER_WR_FRAGM 6 +#define M_FW_FILTER_WR_FRAGM 0x1 +#define V_FW_FILTER_WR_FRAGM(x) ((x) << S_FW_FILTER_WR_FRAGM) +#define G_FW_FILTER_WR_FRAGM(x) \ + (((x) >> S_FW_FILTER_WR_FRAGM) & M_FW_FILTER_WR_FRAGM) +#define F_FW_FILTER_WR_FRAGM V_FW_FILTER_WR_FRAGM(1U) + +#define S_FW_FILTER_WR_IVLAN_VLD 5 +#define M_FW_FILTER_WR_IVLAN_VLD 0x1 +#define V_FW_FILTER_WR_IVLAN_VLD(x) ((x) << S_FW_FILTER_WR_IVLAN_VLD) +#define G_FW_FILTER_WR_IVLAN_VLD(x) \ + (((x) >> S_FW_FILTER_WR_IVLAN_VLD) & M_FW_FILTER_WR_IVLAN_VLD) +#define F_FW_FILTER_WR_IVLAN_VLD V_FW_FILTER_WR_IVLAN_VLD(1U) + +#define S_FW_FILTER_WR_OVLAN_VLD 4 +#define M_FW_FILTER_WR_OVLAN_VLD 0x1 +#define V_FW_FILTER_WR_OVLAN_VLD(x) ((x) << S_FW_FILTER_WR_OVLAN_VLD) +#define G_FW_FILTER_WR_OVLAN_VLD(x) \ + (((x) >> S_FW_FILTER_WR_OVLAN_VLD) & M_FW_FILTER_WR_OVLAN_VLD) +#define F_FW_FILTER_WR_OVLAN_VLD V_FW_FILTER_WR_OVLAN_VLD(1U) + +#define S_FW_FILTER_WR_IVLAN_VLDM 3 +#define M_FW_FILTER_WR_IVLAN_VLDM 0x1 +#define V_FW_FILTER_WR_IVLAN_VLDM(x) ((x) << S_FW_FILTER_WR_IVLAN_VLDM) +#define G_FW_FILTER_WR_IVLAN_VLDM(x) \ + (((x) >> S_FW_FILTER_WR_IVLAN_VLDM) & M_FW_FILTER_WR_IVLAN_VLDM) +#define F_FW_FILTER_WR_IVLAN_VLDM V_FW_FILTER_WR_IVLAN_VLDM(1U) + +#define S_FW_FILTER_WR_OVLAN_VLDM 2 +#define M_FW_FILTER_WR_OVLAN_VLDM 0x1 +#define V_FW_FILTER_WR_OVLAN_VLDM(x) ((x) << S_FW_FILTER_WR_OVLAN_VLDM) +#define G_FW_FILTER_WR_OVLAN_VLDM(x) \ + (((x) >> S_FW_FILTER_WR_OVLAN_VLDM) & M_FW_FILTER_WR_OVLAN_VLDM) +#define F_FW_FILTER_WR_OVLAN_VLDM V_FW_FILTER_WR_OVLAN_VLDM(1U) + +#define S_FW_FILTER_WR_RX_CHAN 15 +#define M_FW_FILTER_WR_RX_CHAN 0x1 +#define V_FW_FILTER_WR_RX_CHAN(x) ((x) << S_FW_FILTER_WR_RX_CHAN) +#define G_FW_FILTER_WR_RX_CHAN(x) \ + (((x) >> S_FW_FILTER_WR_RX_CHAN) & M_FW_FILTER_WR_RX_CHAN) +#define F_FW_FILTER_WR_RX_CHAN V_FW_FILTER_WR_RX_CHAN(1U) + +#define S_FW_FILTER_WR_RX_RPL_IQ 0 +#define M_FW_FILTER_WR_RX_RPL_IQ 0x3ff +#define V_FW_FILTER_WR_RX_RPL_IQ(x) ((x) << S_FW_FILTER_WR_RX_RPL_IQ) +#define G_FW_FILTER_WR_RX_RPL_IQ(x) \ + (((x) >> S_FW_FILTER_WR_RX_RPL_IQ) & M_FW_FILTER_WR_RX_RPL_IQ) + +#define S_FW_FILTER_WR_MACI 23 +#define M_FW_FILTER_WR_MACI 0x1ff +#define V_FW_FILTER_WR_MACI(x) ((x) << S_FW_FILTER_WR_MACI) +#define G_FW_FILTER_WR_MACI(x) \ + (((x) >> S_FW_FILTER_WR_MACI) & M_FW_FILTER_WR_MACI) + +#define S_FW_FILTER_WR_MACIM 14 +#define M_FW_FILTER_WR_MACIM 0x1ff +#define V_FW_FILTER_WR_MACIM(x) ((x) << S_FW_FILTER_WR_MACIM) +#define G_FW_FILTER_WR_MACIM(x) \ + (((x) >> S_FW_FILTER_WR_MACIM) & M_FW_FILTER_WR_MACIM) + +#define S_FW_FILTER_WR_FCOE 13 +#define M_FW_FILTER_WR_FCOE 0x1 +#define V_FW_FILTER_WR_FCOE(x) ((x) << S_FW_FILTER_WR_FCOE) +#define G_FW_FILTER_WR_FCOE(x) \ + (((x) >> S_FW_FILTER_WR_FCOE) & M_FW_FILTER_WR_FCOE) +#define F_FW_FILTER_WR_FCOE V_FW_FILTER_WR_FCOE(1U) + +#define S_FW_FILTER_WR_FCOEM 12 +#define M_FW_FILTER_WR_FCOEM 0x1 +#define V_FW_FILTER_WR_FCOEM(x) ((x) << S_FW_FILTER_WR_FCOEM) +#define G_FW_FILTER_WR_FCOEM(x) \ + (((x) >> S_FW_FILTER_WR_FCOEM) & M_FW_FILTER_WR_FCOEM) +#define F_FW_FILTER_WR_FCOEM V_FW_FILTER_WR_FCOEM(1U) + +#define S_FW_FILTER_WR_PORT 9 +#define M_FW_FILTER_WR_PORT 0x7 +#define V_FW_FILTER_WR_PORT(x) ((x) << S_FW_FILTER_WR_PORT) +#define G_FW_FILTER_WR_PORT(x) \ + (((x) >> S_FW_FILTER_WR_PORT) & M_FW_FILTER_WR_PORT) + +#define S_FW_FILTER_WR_PORTM 6 +#define M_FW_FILTER_WR_PORTM 0x7 +#define V_FW_FILTER_WR_PORTM(x) ((x) << S_FW_FILTER_WR_PORTM) +#define G_FW_FILTER_WR_PORTM(x) \ + (((x) >> S_FW_FILTER_WR_PORTM) & M_FW_FILTER_WR_PORTM) + +#define S_FW_FILTER_WR_MATCHTYPE 3 +#define M_FW_FILTER_WR_MATCHTYPE 0x7 +#define V_FW_FILTER_WR_MATCHTYPE(x) ((x) << S_FW_FILTER_WR_MATCHTYPE) +#define G_FW_FILTER_WR_MATCHTYPE(x) \ + (((x) >> S_FW_FILTER_WR_MATCHTYPE) & M_FW_FILTER_WR_MATCHTYPE) + +#define S_FW_FILTER_WR_MATCHTYPEM 0 +#define M_FW_FILTER_WR_MATCHTYPEM 0x7 +#define V_FW_FILTER_WR_MATCHTYPEM(x) ((x) << S_FW_FILTER_WR_MATCHTYPEM) +#define G_FW_FILTER_WR_MATCHTYPEM(x) \ + (((x) >> S_FW_FILTER_WR_MATCHTYPEM) & M_FW_FILTER_WR_MATCHTYPEM) struct fw_ulptx_wr { __be32 op_to_compl; @@ -100,6 +416,108 @@ struct fw_eth_tx_pkt_wr { __be64 r3; }; +struct fw_ofld_connection_wr { + __be32 op_compl; + __be32 len16_pkd; + __u64 cookie; + __be64 r2; + __be64 r3; + struct fw_ofld_connection_le { + __be32 version_cpl; + __be32 filter; + __be32 r1; + __be16 lport; + __be16 pport; + union fw_ofld_connection_leip { + struct fw_ofld_connection_le_ipv4 { + __be32 pip; + __be32 lip; + __be64 r0; + __be64 r1; + __be64 r2; + } ipv4; + struct fw_ofld_connection_le_ipv6 { + __be64 pip_hi; + __be64 pip_lo; + __be64 lip_hi; + __be64 lip_lo; + } ipv6; + } u; + } le; + struct fw_ofld_connection_tcb { + __be32 t_state_to_astid; + __be16 cplrxdataack_cplpassacceptrpl; + __be16 rcv_adv; + __be32 rcv_nxt; + __be32 tx_max; + __be64 opt0; + __be32 opt2; + __be32 r1; + __be64 r2; + __be64 r3; + } tcb; +}; + +#define S_FW_OFLD_CONNECTION_WR_VERSION 31 +#define M_FW_OFLD_CONNECTION_WR_VERSION 0x1 +#define V_FW_OFLD_CONNECTION_WR_VERSION(x) \ + ((x) << S_FW_OFLD_CONNECTION_WR_VERSION) +#define G_FW_OFLD_CONNECTION_WR_VERSION(x) \ + (((x) >> S_FW_OFLD_CONNECTION_WR_VERSION) & \ + M_FW_OFLD_CONNECTION_WR_VERSION) +#define F_FW_OFLD_CONNECTION_WR_VERSION \ + V_FW_OFLD_CONNECTION_WR_VERSION(1U) + +#define S_FW_OFLD_CONNECTION_WR_CPL 30 +#define M_FW_OFLD_CONNECTION_WR_CPL 0x1 +#define V_FW_OFLD_CONNECTION_WR_CPL(x) ((x) << S_FW_OFLD_CONNECTION_WR_CPL) +#define G_FW_OFLD_CONNECTION_WR_CPL(x) \ + (((x) >> S_FW_OFLD_CONNECTION_WR_CPL) & M_FW_OFLD_CONNECTION_WR_CPL) +#define F_FW_OFLD_CONNECTION_WR_CPL V_FW_OFLD_CONNECTION_WR_CPL(1U) + +#define S_FW_OFLD_CONNECTION_WR_T_STATE 28 +#define M_FW_OFLD_CONNECTION_WR_T_STATE 0xf +#define V_FW_OFLD_CONNECTION_WR_T_STATE(x) \ + ((x) << S_FW_OFLD_CONNECTION_WR_T_STATE) +#define G_FW_OFLD_CONNECTION_WR_T_STATE(x) \ + (((x) >> S_FW_OFLD_CONNECTION_WR_T_STATE) & \ + M_FW_OFLD_CONNECTION_WR_T_STATE) + +#define S_FW_OFLD_CONNECTION_WR_RCV_SCALE 24 +#define M_FW_OFLD_CONNECTION_WR_RCV_SCALE 0xf +#define V_FW_OFLD_CONNECTION_WR_RCV_SCALE(x) \ + ((x) << S_FW_OFLD_CONNECTION_WR_RCV_SCALE) +#define G_FW_OFLD_CONNECTION_WR_RCV_SCALE(x) \ + (((x) >> S_FW_OFLD_CONNECTION_WR_RCV_SCALE) & \ + M_FW_OFLD_CONNECTION_WR_RCV_SCALE) + +#define S_FW_OFLD_CONNECTION_WR_ASTID 0 +#define M_FW_OFLD_CONNECTION_WR_ASTID 0xffffff +#define V_FW_OFLD_CONNECTION_WR_ASTID(x) \ + ((x) << S_FW_OFLD_CONNECTION_WR_ASTID) +#define G_FW_OFLD_CONNECTION_WR_ASTID(x) \ + (((x) >> S_FW_OFLD_CONNECTION_WR_ASTID) & M_FW_OFLD_CONNECTION_WR_ASTID) + +#define S_FW_OFLD_CONNECTION_WR_CPLRXDATAACK 15 +#define M_FW_OFLD_CONNECTION_WR_CPLRXDATAACK 0x1 +#define V_FW_OFLD_CONNECTION_WR_CPLRXDATAACK(x) \ + ((x) << S_FW_OFLD_CONNECTION_WR_CPLRXDATAACK) +#define G_FW_OFLD_CONNECTION_WR_CPLRXDATAACK(x) \ + (((x) >> S_FW_OFLD_CONNECTION_WR_CPLRXDATAACK) & \ + M_FW_OFLD_CONNECTION_WR_CPLRXDATAACK) +#define F_FW_OFLD_CONNECTION_WR_CPLRXDATAACK \ + V_FW_OFLD_CONNECTION_WR_CPLRXDATAACK(1U) + +#define S_FW_OFLD_CONNECTION_WR_CPLPASSACCEPTRPL 14 +#define M_FW_OFLD_CONNECTION_WR_CPLPASSACCEPTRPL 0x1 +#define V_FW_OFLD_CONNECTION_WR_CPLPASSACCEPTRPL(x) \ + ((x) << S_FW_OFLD_CONNECTION_WR_CPLPASSACCEPTRPL) +#define G_FW_OFLD_CONNECTION_WR_CPLPASSACCEPTRPL(x) \ + (((x) >> S_FW_OFLD_CONNECTION_WR_CPLPASSACCEPTRPL) & \ + M_FW_OFLD_CONNECTION_WR_CPLPASSACCEPTRPL) +#define F_FW_OFLD_CONNECTION_WR_CPLPASSACCEPTRPL \ + V_FW_OFLD_CONNECTION_WR_CPLPASSACCEPTRPL(1U) + enum fw_flowc_mnem { FW_FLOWC_MNEM_PFNVFN, /* PFN [15:8] VFN [7:0] */ FW_FLOWC_MNEM_CH, diff --git a/drivers/net/ethernet/mellanox/mlx4/fw.c b/drivers/net/ethernet/mellanox/mlx4/fw.c index 9a9de51..8b3d051 100644 --- a/drivers/net/ethernet/mellanox/mlx4/fw.c +++ b/drivers/net/ethernet/mellanox/mlx4/fw.c @@ -1338,6 +1338,7 @@ int mlx4_QUERY_HCA(struct mlx4_dev *dev, { struct mlx4_cmd_mailbox *mailbox; __be32 *outbox; + u32 dword_field; int err; u8 byte_field; @@ -1372,10 +1373,18 @@ int mlx4_QUERY_HCA(struct mlx4_dev *dev, MLX4_GET(param->rdmarc_base, outbox, INIT_HCA_RDMARC_BASE_OFFSET); MLX4_GET(param->log_rd_per_qp, outbox, INIT_HCA_LOG_RD_OFFSET); + MLX4_GET(dword_field, outbox, INIT_HCA_FLAGS_OFFSET); + if (dword_field & (1 << INIT_HCA_DEVICE_MANAGED_FLOW_STEERING_EN)) { + param->steering_mode = MLX4_STEERING_MODE_DEVICE_MANAGED; + } else { + MLX4_GET(byte_field, outbox, INIT_HCA_UC_STEERING_OFFSET); + if (byte_field & 0x8) + param->steering_mode = MLX4_STEERING_MODE_B0; + else + param->steering_mode = MLX4_STEERING_MODE_A0; + } /* steering attributes */ - if (dev->caps.steering_mode == - MLX4_STEERING_MODE_DEVICE_MANAGED) { - + if (param->steering_mode == MLX4_STEERING_MODE_DEVICE_MANAGED) { MLX4_GET(param->mc_base, outbox, INIT_HCA_FS_BASE_OFFSET); MLX4_GET(param->log_mc_entry_sz, outbox, INIT_HCA_FS_LOG_ENTRY_SZ_OFFSET); diff --git a/drivers/net/ethernet/mellanox/mlx4/fw.h b/drivers/net/ethernet/mellanox/mlx4/fw.h index 2c2e7ad..dbf2f69 100644 --- a/drivers/net/ethernet/mellanox/mlx4/fw.h +++ b/drivers/net/ethernet/mellanox/mlx4/fw.h @@ -172,6 +172,7 @@ struct mlx4_init_hca_param { u8 log_uar_sz; u8 uar_page_sz; /* log pg sz in 4k chunks */ u8 fs_hash_enable_bits; + u8 steering_mode; /* for QUERY_HCA */ u64 dev_cap_enabled; }; diff --git a/drivers/net/ethernet/mellanox/mlx4/main.c b/drivers/net/ethernet/mellanox/mlx4/main.c index b2acbe7..e1bafff 100644 --- a/drivers/net/ethernet/mellanox/mlx4/main.c +++ b/drivers/net/ethernet/mellanox/mlx4/main.c @@ -85,15 +85,15 @@ static int probe_vf; module_param(probe_vf, int, 0644); MODULE_PARM_DESC(probe_vf, "number of vfs to probe by pf driver (num_vfs > 0)"); -int mlx4_log_num_mgm_entry_size = 10; +int mlx4_log_num_mgm_entry_size = MLX4_DEFAULT_MGM_LOG_ENTRY_SIZE; module_param_named(log_num_mgm_entry_size, mlx4_log_num_mgm_entry_size, int, 0444); MODULE_PARM_DESC(log_num_mgm_entry_size, "log mgm size, that defines the num" " of qp per mcg, for example:" - " 10 gives 248.range: 9<=" + " 10 gives 248.range: 7 <=" " log_num_mgm_entry_size <= 12." - " Not in use with device managed" - " flow steering"); + " To activate device managed" + " flow steering when available, set to -1"); static bool enable_64b_cqe_eqe; module_param(enable_64b_cqe_eqe, bool, 0444); @@ -281,28 +281,6 @@ static int mlx4_dev_cap(struct mlx4_dev *dev, struct mlx4_dev_cap *dev_cap) dev->caps.max_gso_sz = dev_cap->max_gso_sz; dev->caps.max_rss_tbl_sz = dev_cap->max_rss_tbl_sz; - if (dev_cap->flags2 & MLX4_DEV_CAP_FLAG2_FS_EN) { - dev->caps.steering_mode = MLX4_STEERING_MODE_DEVICE_MANAGED; - dev->caps.num_qp_per_mgm = dev_cap->fs_max_num_qp_per_entry; - dev->caps.fs_log_max_ucast_qp_range_size = - dev_cap->fs_log_max_ucast_qp_range_size; - } else { - if (dev->caps.flags & MLX4_DEV_CAP_FLAG_VEP_UC_STEER && - dev->caps.flags & MLX4_DEV_CAP_FLAG_VEP_MC_STEER) { - dev->caps.steering_mode = MLX4_STEERING_MODE_B0; - } else { - dev->caps.steering_mode = MLX4_STEERING_MODE_A0; - - if (dev->caps.flags & MLX4_DEV_CAP_FLAG_VEP_UC_STEER || - dev->caps.flags & MLX4_DEV_CAP_FLAG_VEP_MC_STEER) - mlx4_warn(dev, "Must have UC_STEER and MC_STEER flags " - "set to use B0 steering. Falling back to A0 steering mode.\n"); - } - dev->caps.num_qp_per_mgm = mlx4_get_qp_per_mgm(dev); - } - mlx4_dbg(dev, "Steering mode is: %s\n", - mlx4_steering_mode_str(dev->caps.steering_mode)); - /* Sense port always allowed on supported devices for ConnectX-1 and -2 */ if (mlx4_priv(dev)->pci_dev_data & MLX4_PCI_DEV_FORCE_SENSE_PORT) dev->caps.flags |= MLX4_DEV_CAP_FLAG_SENSE_SUPPORT; @@ -493,6 +471,23 @@ int mlx4_is_slave_active(struct mlx4_dev *dev, int slave) } EXPORT_SYMBOL(mlx4_is_slave_active); +static void slave_adjust_steering_mode(struct mlx4_dev *dev, + struct mlx4_dev_cap *dev_cap, + struct mlx4_init_hca_param *hca_param) +{ + dev->caps.steering_mode = hca_param->steering_mode; + if (dev->caps.steering_mode == MLX4_STEERING_MODE_DEVICE_MANAGED) { + dev->caps.num_qp_per_mgm = dev_cap->fs_max_num_qp_per_entry; + dev->caps.fs_log_max_ucast_qp_range_size = + dev_cap->fs_log_max_ucast_qp_range_size; + } else + dev->caps.num_qp_per_mgm = + 4 * ((1 << hca_param->log_mc_entry_sz)/16 - 2); + + mlx4_dbg(dev, "Steering mode is: %s\n", + mlx4_steering_mode_str(dev->caps.steering_mode)); +} + static int mlx4_slave_cap(struct mlx4_dev *dev) { int err; @@ -635,6 +630,8 @@ static int mlx4_slave_cap(struct mlx4_dev *dev) dev->caps.cqe_size = 32; } + slave_adjust_steering_mode(dev, &dev_cap, &hca_param); + return 0; err_mem: @@ -1321,6 +1318,59 @@ static void mlx4_parav_master_pf_caps(struct mlx4_dev *dev) } } +static int choose_log_fs_mgm_entry_size(int qp_per_entry) +{ + int i = MLX4_MIN_MGM_LOG_ENTRY_SIZE; + + for (i = MLX4_MIN_MGM_LOG_ENTRY_SIZE; i <= MLX4_MAX_MGM_LOG_ENTRY_SIZE; + i++) { + if (qp_per_entry <= 4 * ((1 << i) / 16 - 2)) + break; + } + + return (i <= MLX4_MAX_MGM_LOG_ENTRY_SIZE) ? i : -1; +} + +static void choose_steering_mode(struct mlx4_dev *dev, + struct mlx4_dev_cap *dev_cap) +{ + if (mlx4_log_num_mgm_entry_size == -1 && + dev_cap->flags2 & MLX4_DEV_CAP_FLAG2_FS_EN && + (!mlx4_is_mfunc(dev) || + (dev_cap->fs_max_num_qp_per_entry >= (num_vfs + 1))) && + choose_log_fs_mgm_entry_size(dev_cap->fs_max_num_qp_per_entry) >= + MLX4_MIN_MGM_LOG_ENTRY_SIZE) { + dev->oper_log_mgm_entry_size = + choose_log_fs_mgm_entry_size(dev_cap->fs_max_num_qp_per_entry); + dev->caps.steering_mode = MLX4_STEERING_MODE_DEVICE_MANAGED; + dev->caps.num_qp_per_mgm = dev_cap->fs_max_num_qp_per_entry; + dev->caps.fs_log_max_ucast_qp_range_size = + dev_cap->fs_log_max_ucast_qp_range_size; + } else { + if (dev->caps.flags & MLX4_DEV_CAP_FLAG_VEP_UC_STEER && + dev->caps.flags & MLX4_DEV_CAP_FLAG_VEP_MC_STEER) + dev->caps.steering_mode = MLX4_STEERING_MODE_B0; + else { + dev->caps.steering_mode = MLX4_STEERING_MODE_A0; + + if (dev->caps.flags & MLX4_DEV_CAP_FLAG_VEP_UC_STEER || + dev->caps.flags & MLX4_DEV_CAP_FLAG_VEP_MC_STEER) + mlx4_warn(dev, "Must have both UC_STEER and MC_STEER flags " + "set to use B0 steering. Falling back to A0 steering mode.\n"); + } + dev->oper_log_mgm_entry_size = + mlx4_log_num_mgm_entry_size > 0 ? + mlx4_log_num_mgm_entry_size : + MLX4_DEFAULT_MGM_LOG_ENTRY_SIZE; + dev->caps.num_qp_per_mgm = mlx4_get_qp_per_mgm(dev); + } + mlx4_dbg(dev, "Steering mode is: %s, oper_log_mgm_entry_size = %d, " + "modparam log_num_mgm_entry_size = %d\n", + mlx4_steering_mode_str(dev->caps.steering_mode), + dev->oper_log_mgm_entry_size, + mlx4_log_num_mgm_entry_size); +} + static int mlx4_init_hca(struct mlx4_dev *dev) { struct mlx4_priv *priv = mlx4_priv(dev); @@ -1360,6 +1410,8 @@ static int mlx4_init_hca(struct mlx4_dev *dev) goto err_stop_fw; } + choose_steering_mode(dev, &dev_cap); + if (mlx4_is_master(dev)) mlx4_parav_master_pf_caps(dev); @@ -2452,6 +2504,17 @@ static int __init mlx4_verify_params(void) port_type_array[0] = true; } + if (mlx4_log_num_mgm_entry_size != -1 && + (mlx4_log_num_mgm_entry_size < MLX4_MIN_MGM_LOG_ENTRY_SIZE || + mlx4_log_num_mgm_entry_size > MLX4_MAX_MGM_LOG_ENTRY_SIZE)) { + pr_warning("mlx4_core: mlx4_log_num_mgm_entry_size (%d) not " + "in legal range (-1 or %d..%d)\n", + mlx4_log_num_mgm_entry_size, + MLX4_MIN_MGM_LOG_ENTRY_SIZE, + MLX4_MAX_MGM_LOG_ENTRY_SIZE); + return -1; + } + return 0; } diff --git a/drivers/net/ethernet/mellanox/mlx4/mcg.c b/drivers/net/ethernet/mellanox/mlx4/mcg.c index e151c21..1ee4db3 100644 --- a/drivers/net/ethernet/mellanox/mlx4/mcg.c +++ b/drivers/net/ethernet/mellanox/mlx4/mcg.c @@ -54,12 +54,7 @@ struct mlx4_mgm { int mlx4_get_mgm_entry_size(struct mlx4_dev *dev) { - if (dev->caps.steering_mode == - MLX4_STEERING_MODE_DEVICE_MANAGED) - return 1 << MLX4_FS_MGM_LOG_ENTRY_SIZE; - else - return min((1 << mlx4_log_num_mgm_entry_size), - MLX4_MAX_MGM_ENTRY_SIZE); + return 1 << dev->oper_log_mgm_entry_size; } int mlx4_get_qp_per_mgm(struct mlx4_dev *dev) diff --git a/drivers/net/ethernet/mellanox/mlx4/mlx4.h b/drivers/net/ethernet/mellanox/mlx4/mlx4.h index 1cf4203..116c5c2 100644 --- a/drivers/net/ethernet/mellanox/mlx4/mlx4.h +++ b/drivers/net/ethernet/mellanox/mlx4/mlx4.h @@ -94,8 +94,10 @@ enum { }; enum { - MLX4_MAX_MGM_ENTRY_SIZE = 0x1000, - MLX4_MAX_QP_PER_MGM = 4 * (MLX4_MAX_MGM_ENTRY_SIZE / 16 - 2), + MLX4_DEFAULT_MGM_LOG_ENTRY_SIZE = 10, + MLX4_MIN_MGM_LOG_ENTRY_SIZE = 7, + MLX4_MAX_MGM_LOG_ENTRY_SIZE = 12, + MLX4_MAX_QP_PER_MGM = 4 * ((1 << MLX4_MAX_MGM_LOG_ENTRY_SIZE) / 16 - 2), MLX4_MTT_ENTRY_PER_SEG = 8, }; diff --git a/drivers/net/ethernet/mellanox/mlx4/resource_tracker.c b/drivers/net/ethernet/mellanox/mlx4/resource_tracker.c index b05705f..561ed2a 100644 --- a/drivers/net/ethernet/mellanox/mlx4/resource_tracker.c +++ b/drivers/net/ethernet/mellanox/mlx4/resource_tracker.c @@ -3071,6 +3071,7 @@ int mlx4_QP_FLOW_STEERING_ATTACH_wrapper(struct mlx4_dev *dev, int slave, struct mlx4_resource_tracker *tracker = &priv->mfunc.master.res_tracker; struct list_head *rlist = &tracker->slave_list[slave].res_list[RES_MAC]; int err; + int qpn; struct mlx4_net_trans_rule_hw_ctrl *ctrl; struct _rule_hw *rule_header; int header_id; @@ -3080,13 +3081,21 @@ int mlx4_QP_FLOW_STEERING_ATTACH_wrapper(struct mlx4_dev *dev, int slave, return -EOPNOTSUPP; ctrl = (struct mlx4_net_trans_rule_hw_ctrl *)inbox->buf; + qpn = be32_to_cpu(ctrl->qpn) & 0xffffff; + err = get_res(dev, slave, qpn, RES_QP, NULL); + if (err) { + pr_err("Steering rule with qpn 0x%x rejected.\n", qpn); + return err; + } rule_header = (struct _rule_hw *)(ctrl + 1); header_id = map_hw_to_sw_id(be16_to_cpu(rule_header->id)); switch (header_id) { case MLX4_NET_TRANS_RULE_ID_ETH: - if (validate_eth_header_mac(slave, rule_header, rlist)) - return -EINVAL; + if (validate_eth_header_mac(slave, rule_header, rlist)) { + err = -EINVAL; + goto err_put; + } break; case MLX4_NET_TRANS_RULE_ID_IB: break; @@ -3094,14 +3103,17 @@ int mlx4_QP_FLOW_STEERING_ATTACH_wrapper(struct mlx4_dev *dev, int slave, case MLX4_NET_TRANS_RULE_ID_TCP: case MLX4_NET_TRANS_RULE_ID_UDP: pr_warn("Can't attach FS rule without L2 headers, adding L2 header.\n"); - if (add_eth_header(dev, slave, inbox, rlist, header_id)) - return -EINVAL; + if (add_eth_header(dev, slave, inbox, rlist, header_id)) { + err = -EINVAL; + goto err_put; + } vhcr->in_modifier += sizeof(struct mlx4_net_trans_rule_hw_eth) >> 2; break; default: pr_err("Corrupted mailbox.\n"); - return -EINVAL; + err = -EINVAL; + goto err_put; } err = mlx4_cmd_imm(dev, inbox->dma, &vhcr->out_param, @@ -3109,16 +3121,18 @@ int mlx4_QP_FLOW_STEERING_ATTACH_wrapper(struct mlx4_dev *dev, int slave, MLX4_QP_FLOW_STEERING_ATTACH, MLX4_CMD_TIME_CLASS_A, MLX4_CMD_NATIVE); if (err) - return err; + goto err_put; err = add_res_range(dev, slave, vhcr->out_param, 1, RES_FS_RULE, 0); if (err) { mlx4_err(dev, "Fail to add flow steering resources.\n "); /* detach rule*/ mlx4_cmd(dev, vhcr->out_param, 0, 0, - MLX4_QP_FLOW_STEERING_ATTACH, MLX4_CMD_TIME_CLASS_A, + MLX4_QP_FLOW_STEERING_DETACH, MLX4_CMD_TIME_CLASS_A, MLX4_CMD_NATIVE); } +err_put: + put_res(dev, slave, qpn, RES_QP); return err; } diff --git a/drivers/scsi/csiostor/t4fw_api_stor.h b/drivers/scsi/csiostor/t4fw_api_stor.h index 1223e0d..097e52c 100644 --- a/drivers/scsi/csiostor/t4fw_api_stor.h +++ b/drivers/scsi/csiostor/t4fw_api_stor.h @@ -40,45 +40,6 @@ * R E T U R N V A L U E S ********************************/ -enum fw_retval { - FW_SUCCESS = 0, /* completed sucessfully */ - FW_EPERM = 1, /* operation not permitted */ - FW_ENOENT = 2, /* no such file or directory */ - FW_EIO = 5, /* input/output error; hw bad */ - FW_ENOEXEC = 8, /* exec format error; inv microcode */ - FW_EAGAIN = 11, /* try again */ - FW_ENOMEM = 12, /* out of memory */ - FW_EFAULT = 14, /* bad address; fw bad */ - FW_EBUSY = 16, /* resource busy */ - FW_EEXIST = 17, /* file exists */ - FW_EINVAL = 22, /* invalid argument */ - FW_ENOSPC = 28, /* no space left on device */ - FW_ENOSYS = 38, /* functionality not implemented */ - FW_EPROTO = 71, /* protocol error */ - FW_EADDRINUSE = 98, /* address already in use */ - FW_EADDRNOTAVAIL = 99, /* cannot assigned requested address */ - FW_ENETDOWN = 100, /* network is down */ - FW_ENETUNREACH = 101, /* network is unreachable */ - FW_ENOBUFS = 105, /* no buffer space available */ - FW_ETIMEDOUT = 110, /* timeout */ - FW_EINPROGRESS = 115, /* fw internal */ - FW_SCSI_ABORT_REQUESTED = 128, /* */ - FW_SCSI_ABORT_TIMEDOUT = 129, /* */ - FW_SCSI_ABORTED = 130, /* */ - FW_SCSI_CLOSE_REQUESTED = 131, /* */ - FW_ERR_LINK_DOWN = 132, /* */ - FW_RDEV_NOT_READY = 133, /* */ - FW_ERR_RDEV_LOST = 134, /* */ - FW_ERR_RDEV_LOGO = 135, /* */ - FW_FCOE_NO_XCHG = 136, /* */ - FW_SCSI_RSP_ERR = 137, /* */ - FW_ERR_RDEV_IMPL_LOGO = 138, /* */ - FW_SCSI_UNDER_FLOW_ERR = 139, /* */ - FW_SCSI_OVER_FLOW_ERR = 140, /* */ - FW_SCSI_DDP_ERR = 141, /* DDP error*/ - FW_SCSI_TASK_ERR = 142, /* No SCSI tasks available */ -}; - enum fw_fcoe_link_sub_op { FCOE_LINK_DOWN = 0x0, FCOE_LINK_UP = 0x1, diff --git a/drivers/watchdog/Kconfig b/drivers/watchdog/Kconfig index ad1bb93..7f809fd 100644 --- a/drivers/watchdog/Kconfig +++ b/drivers/watchdog/Kconfig @@ -76,6 +76,16 @@ config DA9052_WATCHDOG Alternatively say M to compile the driver as a module, which will be called da9052_wdt. +config DA9055_WATCHDOG + tristate "Dialog Semiconductor DA9055 Watchdog" + depends on MFD_DA9055 + help + If you say yes here you get support for watchdog on the Dialog + Semiconductor DA9055 PMIC. + + This driver can also be built as a module. If so, the module + will be called da9055_wdt. + config WM831X_WATCHDOG tristate "WM831x watchdog" depends on MFD_WM831X @@ -232,6 +242,7 @@ config EP93XX_WATCHDOG config OMAP_WATCHDOG tristate "OMAP Watchdog" depends on ARCH_OMAP16XX || ARCH_OMAP2PLUS + select WATCHDOG_CORE help Support for TI OMAP1610/OMAP1710/OMAP2420/OMAP3430/OMAP4430 watchdog. Say 'Y' here to enable the OMAP1610/OMAP1710/OMAP2420/OMAP3430/OMAP4430 watchdog timer. @@ -300,6 +311,7 @@ config COH901327_WATCHDOG config TWL4030_WATCHDOG tristate "TWL4030 Watchdog" depends on TWL4030_CORE + select WATCHDOG_CORE help Support for TI TWL4030 watchdog. Say 'Y' here to enable the watchdog timer support for TWL4030 chips. @@ -342,7 +354,7 @@ config MAX63XX_WATCHDOG config IMX2_WDT tristate "IMX2+ Watchdog" - depends on IMX_HAVE_PLATFORM_IMX2_WDT + depends on ARCH_MXC help This is the driver for the hardware watchdog on the Freescale IMX2 and later processors. @@ -431,7 +443,7 @@ config ALIM7101_WDT config F71808E_WDT tristate "Fintek F71808E, F71862FG, F71869, F71882FG and F71889FG Watchdog" - depends on X86 && EXPERIMENTAL + depends on X86 help This is the driver for the hardware watchdog on the Fintek F71808E, F71862FG, F71869, F71882FG and F71889FG Super I/O controllers. @@ -622,7 +634,7 @@ config IT8712F_WDT config IT87_WDT tristate "IT87 Watchdog Timer" - depends on X86 && EXPERIMENTAL + depends on X86 ---help--- This is the driver for the hardware watchdog on the ITE IT8702, IT8712, IT8716, IT8718, IT8720, IT8721, IT8726 and IT8728 diff --git a/drivers/watchdog/Makefile b/drivers/watchdog/Makefile index 572b39b..97bbdb3a 100644 --- a/drivers/watchdog/Makefile +++ b/drivers/watchdog/Makefile @@ -164,6 +164,7 @@ obj-$(CONFIG_XEN_WDT) += xen_wdt.o # Architecture Independent obj-$(CONFIG_DA9052_WATCHDOG) += da9052_wdt.o +obj-$(CONFIG_DA9055_WATCHDOG) += da9055_wdt.o obj-$(CONFIG_WM831X_WATCHDOG) += wm831x_wdt.o obj-$(CONFIG_WM8350_WATCHDOG) += wm8350_wdt.o obj-$(CONFIG_MAX63XX_WATCHDOG) += max63xx_wdt.o diff --git a/drivers/watchdog/ath79_wdt.c b/drivers/watchdog/ath79_wdt.c index 7c8ede7..38a999e 100644 --- a/drivers/watchdog/ath79_wdt.c +++ b/drivers/watchdog/ath79_wdt.c @@ -284,6 +284,7 @@ static void ath97_wdt_shutdown(struct platform_device *pdev) } static struct platform_driver ath79_wdt_driver = { + .probe = ath79_wdt_probe, .remove = ath79_wdt_remove, .shutdown = ath97_wdt_shutdown, .driver = { @@ -292,17 +293,7 @@ static struct platform_driver ath79_wdt_driver = { }, }; -static int __init ath79_wdt_init(void) -{ - return platform_driver_probe(&ath79_wdt_driver, ath79_wdt_probe); -} -module_init(ath79_wdt_init); - -static void __exit ath79_wdt_exit(void) -{ - platform_driver_unregister(&ath79_wdt_driver); -} -module_exit(ath79_wdt_exit); +module_platform_driver(ath79_wdt_driver); MODULE_DESCRIPTION("Atheros AR71XX/AR724X/AR913X hardware watchdog driver"); MODULE_AUTHOR("Gabor Juhos + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License as published by + * the Free Software Foundation; either version 2 of the License, or + * (at your option) any later version. + * + */ + +#include +#include +#include +#include +#include +#include +#include + +#include +#include + +static bool nowayout = WATCHDOG_NOWAYOUT; +module_param(nowayout, bool, 0); +MODULE_PARM_DESC(nowayout, + "Watchdog cannot be stopped once started (default=" + __MODULE_STRING(WATCHDOG_NOWAYOUT) ")"); + +#define DA9055_DEF_TIMEOUT 4 +#define DA9055_TWDMIN 256 + +struct da9055_wdt_data { + struct watchdog_device wdt; + struct da9055 *da9055; + struct kref kref; +}; + +static const struct { + u8 reg_val; + int user_time; /* In seconds */ +} da9055_wdt_maps[] = { + { 0, 0 }, + { 1, 2 }, + { 2, 4 }, + { 3, 8 }, + { 4, 16 }, + { 5, 32 }, + { 5, 33 }, /* Actual time 32.768s so included both 32s and 33s */ + { 6, 65 }, + { 6, 66 }, /* Actual time 65.536s so include both, 65s and 66s */ + { 7, 131 }, +}; + +static int da9055_wdt_set_timeout(struct watchdog_device *wdt_dev, + unsigned int timeout) +{ + struct da9055_wdt_data *driver_data = watchdog_get_drvdata(wdt_dev); + struct da9055 *da9055 = driver_data->da9055; + int ret, i; + + for (i = 0; i < ARRAY_SIZE(da9055_wdt_maps); i++) + if (da9055_wdt_maps[i].user_time == timeout) + break; + + if (i == ARRAY_SIZE(da9055_wdt_maps)) + ret = -EINVAL; + else + ret = da9055_reg_update(da9055, DA9055_REG_CONTROL_B, + DA9055_TWDSCALE_MASK, + da9055_wdt_maps[i].reg_val << + DA9055_TWDSCALE_SHIFT); + if (ret < 0) + dev_err(da9055->dev, + "Failed to update timescale bit, %d\n", ret); + + wdt_dev->timeout = timeout; + + return ret; +} + +static int da9055_wdt_ping(struct watchdog_device *wdt_dev) +{ + struct da9055_wdt_data *driver_data = watchdog_get_drvdata(wdt_dev); + struct da9055 *da9055 = driver_data->da9055; + int ret; + + /* + * We have a minimum time for watchdog window called TWDMIN. A write + * to the watchdog before this elapsed time will cause an error. + */ + mdelay(DA9055_TWDMIN); + + /* Reset the watchdog timer */ + ret = da9055_reg_update(da9055, DA9055_REG_CONTROL_E, + DA9055_WATCHDOG_MASK, 1); + + return ret; +} + +static void da9055_wdt_release_resources(struct kref *r) +{ + struct da9055_wdt_data *driver_data = + container_of(r, struct da9055_wdt_data, kref); + + kfree(driver_data); +} + +static void da9055_wdt_ref(struct watchdog_device *wdt_dev) +{ + struct da9055_wdt_data *driver_data = watchdog_get_drvdata(wdt_dev); + + kref_get(&driver_data->kref); +} + +static void da9055_wdt_unref(struct watchdog_device *wdt_dev) +{ + struct da9055_wdt_data *driver_data = watchdog_get_drvdata(wdt_dev); + + kref_put(&driver_data->kref, da9055_wdt_release_resources); +} + +static int da9055_wdt_start(struct watchdog_device *wdt_dev) +{ + return da9055_wdt_set_timeout(wdt_dev, wdt_dev->timeout); +} + +static int da9055_wdt_stop(struct watchdog_device *wdt_dev) +{ + return da9055_wdt_set_timeout(wdt_dev, 0); +} + +static struct watchdog_info da9055_wdt_info = { + .options = WDIOF_SETTIMEOUT | WDIOF_KEEPALIVEPING, + .identity = "DA9055 Watchdog", +}; + +static const struct watchdog_ops da9055_wdt_ops = { + .owner = THIS_MODULE, + .start = da9055_wdt_start, + .stop = da9055_wdt_stop, + .ping = da9055_wdt_ping, + .set_timeout = da9055_wdt_set_timeout, + .ref = da9055_wdt_ref, + .unref = da9055_wdt_unref, +}; + +static int da9055_wdt_probe(struct platform_device *pdev) +{ + struct da9055 *da9055 = dev_get_drvdata(pdev->dev.parent); + struct da9055_wdt_data *driver_data; + struct watchdog_device *da9055_wdt; + int ret; + + driver_data = devm_kzalloc(&pdev->dev, sizeof(*driver_data), + GFP_KERNEL); + if (!driver_data) { + dev_err(da9055->dev, "Failed to allocate watchdog device\n"); + return -ENOMEM; + } + + driver_data->da9055 = da9055; + + da9055_wdt = &driver_data->wdt; + + da9055_wdt->timeout = DA9055_DEF_TIMEOUT; + da9055_wdt->info = &da9055_wdt_info; + da9055_wdt->ops = &da9055_wdt_ops; + watchdog_set_nowayout(da9055_wdt, nowayout); + watchdog_set_drvdata(da9055_wdt, driver_data); + + kref_init(&driver_data->kref); + + ret = da9055_wdt_stop(da9055_wdt); + if (ret < 0) { + dev_err(&pdev->dev, "Failed to stop watchdog, %d\n", ret); + goto err; + } + + dev_set_drvdata(&pdev->dev, driver_data); + + ret = watchdog_register_device(&driver_data->wdt); + if (ret != 0) + dev_err(da9055->dev, "watchdog_register_device() failed: %d\n", + ret); + +err: + return ret; +} + +static int da9055_wdt_remove(struct platform_device *pdev) +{ + struct da9055_wdt_data *driver_data = dev_get_drvdata(&pdev->dev); + + watchdog_unregister_device(&driver_data->wdt); + kref_put(&driver_data->kref, da9055_wdt_release_resources); + + return 0; +} + +static struct platform_driver da9055_wdt_driver = { + .probe = da9055_wdt_probe, + .remove = da9055_wdt_remove, + .driver = { + .name = "da9055-watchdog", + }, +}; + +module_platform_driver(da9055_wdt_driver); + +MODULE_AUTHOR("David Dajun Chen "); +MODULE_DESCRIPTION("DA9055 watchdog"); +MODULE_LICENSE("GPL"); +MODULE_ALIAS("platform:da9055-watchdog"); diff --git a/drivers/watchdog/davinci_wdt.c b/drivers/watchdog/davinci_wdt.c index 8791879..e8e8724 100644 --- a/drivers/watchdog/davinci_wdt.c +++ b/drivers/watchdog/davinci_wdt.c @@ -208,7 +208,7 @@ static int davinci_wdt_probe(struct platform_device *pdev) if (WARN_ON(IS_ERR(wdt_clk))) return PTR_ERR(wdt_clk); - clk_enable(wdt_clk); + clk_prepare_enable(wdt_clk); if (heartbeat < 1 || heartbeat > MAX_HEARTBEAT) heartbeat = DEFAULT_HEARTBEAT; @@ -256,16 +256,23 @@ static int davinci_wdt_remove(struct platform_device *pdev) wdt_mem = NULL; } - clk_disable(wdt_clk); + clk_disable_unprepare(wdt_clk); clk_put(wdt_clk); return 0; } +static const struct of_device_id davinci_wdt_of_match[] = { + { .compatible = "ti,davinci-wdt", }, + {}, +}; +MODULE_DEVICE_TABLE(of, davinci_wdt_of_match); + static struct platform_driver platform_wdt_driver = { .driver = { .name = "watchdog", .owner = THIS_MODULE, + .of_match_table = davinci_wdt_of_match, }, .probe = davinci_wdt_probe, .remove = davinci_wdt_remove, diff --git a/drivers/watchdog/hpwdt.c b/drivers/watchdog/hpwdt.c index 8717255..11796b9 100644 --- a/drivers/watchdog/hpwdt.c +++ b/drivers/watchdog/hpwdt.c @@ -39,7 +39,7 @@ #endif /* CONFIG_HPWDT_NMI_DECODING */ #include -#define HPWDT_VERSION "1.3.0" +#define HPWDT_VERSION "1.3.1" #define SECS_TO_TICKS(secs) ((secs) * 1000 / 128) #define TICKS_TO_SECS(ticks) ((ticks) * 128 / 1000) #define HPWDT_MAX_TIMER TICKS_TO_SECS(65535) diff --git a/drivers/watchdog/mpcore_wdt.c b/drivers/watchdog/mpcore_wdt.c index a84eb55..233cfad 100644 --- a/drivers/watchdog/mpcore_wdt.c +++ b/drivers/watchdog/mpcore_wdt.c @@ -80,8 +80,7 @@ static irqreturn_t mpcore_wdt_fire(int irq, void *arg) /* Check it really was our interrupt */ if (readl(wdt->base + TWD_WDOG_INTSTAT)) { - dev_printk(KERN_CRIT, wdt->dev, - "Triggered - Reboot ignored.\n"); + dev_crit(wdt->dev, "Triggered - Reboot ignored\n"); /* Clear the interrupt on the watchdog */ writel(1, wdt->base + TWD_WDOG_INTSTAT); return IRQ_HANDLED; @@ -123,7 +122,7 @@ static void mpcore_wdt_stop(struct mpcore_wdt *wdt) static void mpcore_wdt_start(struct mpcore_wdt *wdt) { - dev_printk(KERN_INFO, wdt->dev, "enabling watchdog.\n"); + dev_info(wdt->dev, "enabling watchdog\n"); /* This loads the count register but does NOT start the count yet */ mpcore_wdt_keepalive(wdt); @@ -180,8 +179,8 @@ static int mpcore_wdt_release(struct inode *inode, struct file *file) if (wdt->expect_close == 42) mpcore_wdt_stop(wdt); else { - dev_printk(KERN_CRIT, wdt->dev, - "unexpected close, not stopping watchdog!\n"); + dev_crit(wdt->dev, + "unexpected close, not stopping watchdog!\n"); mpcore_wdt_keepalive(wdt); } clear_bit(0, &wdt->timer_alive); @@ -351,9 +350,9 @@ static int mpcore_wdt_probe(struct platform_device *pdev) ret = devm_request_irq(wdt->dev, wdt->irq, mpcore_wdt_fire, 0, "mpcore_wdt", wdt); if (ret) { - dev_printk(KERN_ERR, wdt->dev, - "cannot register IRQ%d for watchdog\n", - wdt->irq); + dev_err(wdt->dev, + "cannot register IRQ%d for watchdog\n", + wdt->irq); return ret; } } @@ -365,9 +364,9 @@ static int mpcore_wdt_probe(struct platform_device *pdev) mpcore_wdt_miscdev.parent = &pdev->dev; ret = misc_register(&mpcore_wdt_miscdev); if (ret) { - dev_printk(KERN_ERR, wdt->dev, + dev_err(wdt->dev, "cannot register miscdev on minor=%d (err=%d)\n", - WATCHDOG_MINOR, ret); + WATCHDOG_MINOR, ret); return ret; } diff --git a/drivers/watchdog/omap_wdt.c b/drivers/watchdog/omap_wdt.c index 3e3ebbc..34ed61e 100644 --- a/drivers/watchdog/omap_wdt.c +++ b/drivers/watchdog/omap_wdt.c @@ -31,42 +31,34 @@ #include #include #include -#include #include -#include #include #include #include #include #include #include -#include #include -#include #include #include #include #include "omap_wdt.h" -static struct platform_device *omap_wdt_dev; - static unsigned timer_margin; module_param(timer_margin, uint, 0); MODULE_PARM_DESC(timer_margin, "initial watchdog timeout (in seconds)"); -static unsigned int wdt_trgr_pattern = 0x1234; -static DEFINE_SPINLOCK(wdt_lock); - struct omap_wdt_dev { void __iomem *base; /* physical */ struct device *dev; - int omap_wdt_users; + bool omap_wdt_users; struct resource *mem; - struct miscdevice omap_wdt_miscdev; + int wdt_trgr_pattern; + struct mutex lock; /* to avoid races with PM */ }; -static void omap_wdt_ping(struct omap_wdt_dev *wdev) +static void omap_wdt_reload(struct omap_wdt_dev *wdev) { void __iomem *base = wdev->base; @@ -74,8 +66,8 @@ static void omap_wdt_ping(struct omap_wdt_dev *wdev) while ((__raw_readl(base + OMAP_WATCHDOG_WPS)) & 0x08) cpu_relax(); - wdt_trgr_pattern = ~wdt_trgr_pattern; - __raw_writel(wdt_trgr_pattern, (base + OMAP_WATCHDOG_TGR)); + wdev->wdt_trgr_pattern = ~wdev->wdt_trgr_pattern; + __raw_writel(wdev->wdt_trgr_pattern, (base + OMAP_WATCHDOG_TGR)); /* wait for posted write to complete */ while ((__raw_readl(base + OMAP_WATCHDOG_WPS)) & 0x08) @@ -111,18 +103,10 @@ static void omap_wdt_disable(struct omap_wdt_dev *wdev) cpu_relax(); } -static void omap_wdt_adjust_timeout(unsigned new_timeout) -{ - if (new_timeout < TIMER_MARGIN_MIN) - new_timeout = TIMER_MARGIN_DEFAULT; - if (new_timeout > TIMER_MARGIN_MAX) - new_timeout = TIMER_MARGIN_MAX; - timer_margin = new_timeout; -} - -static void omap_wdt_set_timeout(struct omap_wdt_dev *wdev) +static void omap_wdt_set_timer(struct omap_wdt_dev *wdev, + unsigned int timeout) { - u32 pre_margin = GET_WLDR_VAL(timer_margin); + u32 pre_margin = GET_WLDR_VAL(timeout); void __iomem *base = wdev->base; /* just count up at 32 KHz */ @@ -134,16 +118,14 @@ static void omap_wdt_set_timeout(struct omap_wdt_dev *wdev) cpu_relax(); } -/* - * Allow only one task to hold it open - */ -static int omap_wdt_open(struct inode *inode, struct file *file) +static int omap_wdt_start(struct watchdog_device *wdog) { - struct omap_wdt_dev *wdev = platform_get_drvdata(omap_wdt_dev); + struct omap_wdt_dev *wdev = watchdog_get_drvdata(wdog); void __iomem *base = wdev->base; - if (test_and_set_bit(1, (unsigned long *)&(wdev->omap_wdt_users))) - return -EBUSY; + mutex_lock(&wdev->lock); + + wdev->omap_wdt_users = true; pm_runtime_get_sync(wdev->dev); @@ -155,223 +137,169 @@ static int omap_wdt_open(struct inode *inode, struct file *file) while (__raw_readl(base + OMAP_WATCHDOG_WPS) & 0x01) cpu_relax(); - file->private_data = (void *) wdev; - - omap_wdt_set_timeout(wdev); - omap_wdt_ping(wdev); /* trigger loading of new timeout value */ + omap_wdt_set_timer(wdev, wdog->timeout); + omap_wdt_reload(wdev); /* trigger loading of new timeout value */ omap_wdt_enable(wdev); - return nonseekable_open(inode, file); + mutex_unlock(&wdev->lock); + + return 0; } -static int omap_wdt_release(struct inode *inode, struct file *file) +static int omap_wdt_stop(struct watchdog_device *wdog) { - struct omap_wdt_dev *wdev = file->private_data; + struct omap_wdt_dev *wdev = watchdog_get_drvdata(wdog); - /* - * Shut off the timer unless NOWAYOUT is defined. - */ -#ifndef CONFIG_WATCHDOG_NOWAYOUT + mutex_lock(&wdev->lock); omap_wdt_disable(wdev); - pm_runtime_put_sync(wdev->dev); -#else - pr_crit("Unexpected close, not stopping!\n"); -#endif - wdev->omap_wdt_users = 0; - + wdev->omap_wdt_users = false; + mutex_unlock(&wdev->lock); return 0; } -static ssize_t omap_wdt_write(struct file *file, const char __user *data, - size_t len, loff_t *ppos) +static int omap_wdt_ping(struct watchdog_device *wdog) { - struct omap_wdt_dev *wdev = file->private_data; + struct omap_wdt_dev *wdev = watchdog_get_drvdata(wdog); - /* Refresh LOAD_TIME. */ - if (len) { - spin_lock(&wdt_lock); - omap_wdt_ping(wdev); - spin_unlock(&wdt_lock); - } - return len; + mutex_lock(&wdev->lock); + omap_wdt_reload(wdev); + mutex_unlock(&wdev->lock); + + return 0; } -static long omap_wdt_ioctl(struct file *file, unsigned int cmd, - unsigned long arg) +static int omap_wdt_set_timeout(struct watchdog_device *wdog, + unsigned int timeout) { - struct omap_wd_timer_platform_data *pdata; - struct omap_wdt_dev *wdev; - u32 rs; - int new_margin, bs; - static const struct watchdog_info ident = { - .identity = "OMAP Watchdog", - .options = WDIOF_SETTIMEOUT, - .firmware_version = 0, - }; - - wdev = file->private_data; - pdata = wdev->dev->platform_data; - - switch (cmd) { - case WDIOC_GETSUPPORT: - return copy_to_user((struct watchdog_info __user *)arg, &ident, - sizeof(ident)); - case WDIOC_GETSTATUS: - return put_user(0, (int __user *)arg); - case WDIOC_GETBOOTSTATUS: - if (!pdata || !pdata->read_reset_sources) - return put_user(0, (int __user *)arg); - rs = pdata->read_reset_sources(); - bs = (rs & (1 << OMAP_MPU_WD_RST_SRC_ID_SHIFT)) ? - WDIOF_CARDRESET : 0; - return put_user(bs, (int __user *)arg); - case WDIOC_KEEPALIVE: - spin_lock(&wdt_lock); - omap_wdt_ping(wdev); - spin_unlock(&wdt_lock); - return 0; - case WDIOC_SETTIMEOUT: - if (get_user(new_margin, (int __user *)arg)) - return -EFAULT; - omap_wdt_adjust_timeout(new_margin); - - spin_lock(&wdt_lock); - omap_wdt_disable(wdev); - omap_wdt_set_timeout(wdev); - omap_wdt_enable(wdev); + struct omap_wdt_dev *wdev = watchdog_get_drvdata(wdog); - omap_wdt_ping(wdev); - spin_unlock(&wdt_lock); - /* Fall */ - case WDIOC_GETTIMEOUT: - return put_user(timer_margin, (int __user *)arg); - default: - return -ENOTTY; - } + mutex_lock(&wdev->lock); + omap_wdt_disable(wdev); + omap_wdt_set_timer(wdev, timeout); + omap_wdt_enable(wdev); + omap_wdt_reload(wdev); + wdog->timeout = timeout; + mutex_unlock(&wdev->lock); + + return 0; } -static const struct file_operations omap_wdt_fops = { - .owner = THIS_MODULE, - .write = omap_wdt_write, - .unlocked_ioctl = omap_wdt_ioctl, - .open = omap_wdt_open, - .release = omap_wdt_release, - .llseek = no_llseek, +static const struct watchdog_info omap_wdt_info = { + .options = WDIOF_SETTIMEOUT | WDIOF_KEEPALIVEPING, + .identity = "OMAP Watchdog", +}; + +static const struct watchdog_ops omap_wdt_ops = { + .owner = THIS_MODULE, + .start = omap_wdt_start, + .stop = omap_wdt_stop, + .ping = omap_wdt_ping, + .set_timeout = omap_wdt_set_timeout, }; static int omap_wdt_probe(struct platform_device *pdev) { + struct omap_wd_timer_platform_data *pdata = pdev->dev.platform_data; + bool nowayout = WATCHDOG_NOWAYOUT; + struct watchdog_device *omap_wdt; struct resource *res, *mem; struct omap_wdt_dev *wdev; + u32 rs; int ret; + omap_wdt = devm_kzalloc(&pdev->dev, sizeof(*omap_wdt), GFP_KERNEL); + if (!omap_wdt) + return -ENOMEM; + /* reserve static register mappings */ res = platform_get_resource(pdev, IORESOURCE_MEM, 0); - if (!res) { - ret = -ENOENT; - goto err_get_resource; - } + if (!res) + return -ENOENT; - if (omap_wdt_dev) { - ret = -EBUSY; - goto err_busy; - } + mem = devm_request_mem_region(&pdev->dev, res->start, + resource_size(res), pdev->name); + if (!mem) + return -EBUSY; - mem = request_mem_region(res->start, resource_size(res), pdev->name); - if (!mem) { - ret = -EBUSY; - goto err_busy; - } + wdev = devm_kzalloc(&pdev->dev, sizeof(*wdev), GFP_KERNEL); + if (!wdev) + return -ENOMEM; - wdev = kzalloc(sizeof(struct omap_wdt_dev), GFP_KERNEL); - if (!wdev) { - ret = -ENOMEM; - goto err_kzalloc; - } + wdev->omap_wdt_users = false; + wdev->mem = mem; + wdev->dev = &pdev->dev; + wdev->wdt_trgr_pattern = 0x1234; + mutex_init(&wdev->lock); - wdev->omap_wdt_users = 0; - wdev->mem = mem; - wdev->dev = &pdev->dev; + wdev->base = devm_ioremap(&pdev->dev, res->start, resource_size(res)); + if (!wdev->base) + return -ENOMEM; - wdev->base = ioremap(res->start, resource_size(res)); - if (!wdev->base) { - ret = -ENOMEM; - goto err_ioremap; - } + omap_wdt->info = &omap_wdt_info; + omap_wdt->ops = &omap_wdt_ops; + omap_wdt->min_timeout = TIMER_MARGIN_MIN; + omap_wdt->max_timeout = TIMER_MARGIN_MAX; + + if (timer_margin >= TIMER_MARGIN_MIN && + timer_margin <= TIMER_MARGIN_MAX) + omap_wdt->timeout = timer_margin; + else + omap_wdt->timeout = TIMER_MARGIN_DEFAULT; - platform_set_drvdata(pdev, wdev); + watchdog_set_drvdata(omap_wdt, wdev); + watchdog_set_nowayout(omap_wdt, nowayout); + + platform_set_drvdata(pdev, omap_wdt); pm_runtime_enable(wdev->dev); pm_runtime_get_sync(wdev->dev); - omap_wdt_disable(wdev); - omap_wdt_adjust_timeout(timer_margin); + if (pdata && pdata->read_reset_sources) + rs = pdata->read_reset_sources(); + else + rs = 0; + omap_wdt->bootstatus = (rs & (1 << OMAP_MPU_WD_RST_SRC_ID_SHIFT)) ? + WDIOF_CARDRESET : 0; - wdev->omap_wdt_miscdev.parent = &pdev->dev; - wdev->omap_wdt_miscdev.minor = WATCHDOG_MINOR; - wdev->omap_wdt_miscdev.name = "watchdog"; - wdev->omap_wdt_miscdev.fops = &omap_wdt_fops; + omap_wdt_disable(wdev); - ret = misc_register(&(wdev->omap_wdt_miscdev)); - if (ret) - goto err_misc; + ret = watchdog_register_device(omap_wdt); + if (ret) { + pm_runtime_disable(wdev->dev); + return ret; + } pr_info("OMAP Watchdog Timer Rev 0x%02x: initial timeout %d sec\n", __raw_readl(wdev->base + OMAP_WATCHDOG_REV) & 0xFF, - timer_margin); + omap_wdt->timeout); pm_runtime_put_sync(wdev->dev); - omap_wdt_dev = pdev; - return 0; - -err_misc: - pm_runtime_disable(wdev->dev); - platform_set_drvdata(pdev, NULL); - iounmap(wdev->base); - -err_ioremap: - wdev->base = NULL; - kfree(wdev); - -err_kzalloc: - release_mem_region(res->start, resource_size(res)); - -err_busy: -err_get_resource: - - return ret; } static void omap_wdt_shutdown(struct platform_device *pdev) { - struct omap_wdt_dev *wdev = platform_get_drvdata(pdev); + struct watchdog_device *wdog = platform_get_drvdata(pdev); + struct omap_wdt_dev *wdev = watchdog_get_drvdata(wdog); + mutex_lock(&wdev->lock); if (wdev->omap_wdt_users) { omap_wdt_disable(wdev); pm_runtime_put_sync(wdev->dev); } + mutex_unlock(&wdev->lock); } static int omap_wdt_remove(struct platform_device *pdev) { - struct omap_wdt_dev *wdev = platform_get_drvdata(pdev); + struct watchdog_device *wdog = platform_get_drvdata(pdev); + struct omap_wdt_dev *wdev = watchdog_get_drvdata(wdog); struct resource *res = platform_get_resource(pdev, IORESOURCE_MEM, 0); pm_runtime_disable(wdev->dev); - if (!res) - return -ENOENT; - - misc_deregister(&(wdev->omap_wdt_miscdev)); - release_mem_region(res->start, resource_size(res)); - platform_set_drvdata(pdev, NULL); - - iounmap(wdev->base); - - kfree(wdev); - omap_wdt_dev = NULL; + watchdog_unregister_device(wdog); return 0; } @@ -386,25 +314,31 @@ static int omap_wdt_remove(struct platform_device *pdev) static int omap_wdt_suspend(struct platform_device *pdev, pm_message_t state) { - struct omap_wdt_dev *wdev = platform_get_drvdata(pdev); + struct watchdog_device *wdog = platform_get_drvdata(pdev); + struct omap_wdt_dev *wdev = watchdog_get_drvdata(wdog); + mutex_lock(&wdev->lock); if (wdev->omap_wdt_users) { omap_wdt_disable(wdev); pm_runtime_put_sync(wdev->dev); } + mutex_unlock(&wdev->lock); return 0; } static int omap_wdt_resume(struct platform_device *pdev) { - struct omap_wdt_dev *wdev = platform_get_drvdata(pdev); + struct watchdog_device *wdog = platform_get_drvdata(pdev); + struct omap_wdt_dev *wdev = watchdog_get_drvdata(wdog); + mutex_lock(&wdev->lock); if (wdev->omap_wdt_users) { pm_runtime_get_sync(wdev->dev); omap_wdt_enable(wdev); - omap_wdt_ping(wdev); + omap_wdt_reload(wdev); } + mutex_unlock(&wdev->lock); return 0; } @@ -437,5 +371,4 @@ module_platform_driver(omap_wdt_driver); MODULE_AUTHOR("George G. Davis"); MODULE_LICENSE("GPL"); -MODULE_ALIAS_MISCDEV(WATCHDOG_MINOR); MODULE_ALIAS("platform:omap_wdt"); diff --git a/drivers/watchdog/orion_wdt.c b/drivers/watchdog/orion_wdt.c index 0478b00..7c18b3b 100644 --- a/drivers/watchdog/orion_wdt.c +++ b/drivers/watchdog/orion_wdt.c @@ -156,6 +156,8 @@ static int orion_wdt_probe(struct platform_device *pdev) wdt_tclk = clk_get_rate(clk); res = platform_get_resource(pdev, IORESOURCE_MEM, 0); + if (!res) + return -ENODEV; wdt_reg = devm_ioremap(&pdev->dev, res->start, resource_size(res)); if (!wdt_reg) return -ENOMEM; diff --git a/drivers/watchdog/s3c2410_wdt.c b/drivers/watchdog/s3c2410_wdt.c index b0dab10..27bcd4e 100644 --- a/drivers/watchdog/s3c2410_wdt.c +++ b/drivers/watchdog/s3c2410_wdt.c @@ -354,7 +354,7 @@ static int s3c2410wdt_probe(struct platform_device *pdev) goto err_map; } - clk_enable(wdt_clock); + clk_prepare_enable(wdt_clock); ret = s3c2410wdt_cpufreq_register(); if (ret < 0) { @@ -421,7 +421,7 @@ static int s3c2410wdt_probe(struct platform_device *pdev) s3c2410wdt_cpufreq_deregister(); err_clk: - clk_disable(wdt_clock); + clk_disable_unprepare(wdt_clock); clk_put(wdt_clock); wdt_clock = NULL; @@ -445,7 +445,7 @@ static int s3c2410wdt_remove(struct platform_device *dev) s3c2410wdt_cpufreq_deregister(); - clk_disable(wdt_clock); + clk_disable_unprepare(wdt_clock); clk_put(wdt_clock); wdt_clock = NULL; diff --git a/drivers/watchdog/sp5100_tco.c b/drivers/watchdog/sp5100_tco.c index b387681..2b0e000 100644 --- a/drivers/watchdog/sp5100_tco.c +++ b/drivers/watchdog/sp5100_tco.c @@ -13,7 +13,9 @@ * as published by the Free Software Foundation; either version * 2 of the License, or (at your option) any later version. * - * See AMD Publication 43009 "AMD SB700/710/750 Register Reference Guide" + * See AMD Publication 43009 "AMD SB700/710/750 Register Reference Guide", + * AMD Publication 45482 "AMD SB800-Series Southbridges Register + * Reference Guide" */ /* @@ -38,18 +40,24 @@ #include "sp5100_tco.h" /* Module and version information */ -#define TCO_VERSION "0.01" +#define TCO_VERSION "0.03" #define TCO_MODULE_NAME "SP5100 TCO timer" #define TCO_DRIVER_NAME TCO_MODULE_NAME ", v" TCO_VERSION /* internal variables */ static u32 tcobase_phys; +static u32 resbase_phys; +static u32 tco_wdt_fired; static void __iomem *tcobase; static unsigned int pm_iobase; static DEFINE_SPINLOCK(tco_lock); /* Guards the hardware */ static unsigned long timer_alive; static char tco_expect_close; static struct pci_dev *sp5100_tco_pci; +static struct resource wdt_res = { + .name = "Watchdog Timer", + .flags = IORESOURCE_MEM, +}; /* the watchdog platform device */ static struct platform_device *sp5100_tco_platform_device; @@ -64,9 +72,15 @@ MODULE_PARM_DESC(heartbeat, "Watchdog heartbeat in seconds. (default=" static bool nowayout = WATCHDOG_NOWAYOUT; module_param(nowayout, bool, 0); -MODULE_PARM_DESC(nowayout, "Watchdog cannot be stopped once started" +MODULE_PARM_DESC(nowayout, "Watchdog cannot be stopped once started." " (default=" __MODULE_STRING(WATCHDOG_NOWAYOUT) ")"); +static unsigned int force_addr; +module_param(force_addr, uint, 0); +MODULE_PARM_DESC(force_addr, "Force the use of specified MMIO address." + " ONLY USE THIS PARAMETER IF YOU REALLY KNOW" + " WHAT YOU ARE DOING (default=none)"); + /* * Some TCO specific functions */ @@ -122,6 +136,79 @@ static int tco_timer_set_heartbeat(int t) return 0; } +static void tco_timer_enable(void) +{ + int val; + + if (sp5100_tco_pci->revision >= 0x40) { + /* For SB800 or later */ + /* Set the Watchdog timer resolution to 1 sec */ + outb(SB800_PM_WATCHDOG_CONFIG, SB800_IO_PM_INDEX_REG); + val = inb(SB800_IO_PM_DATA_REG); + val |= SB800_PM_WATCHDOG_SECOND_RES; + outb(val, SB800_IO_PM_DATA_REG); + + /* Enable watchdog decode bit and watchdog timer */ + outb(SB800_PM_WATCHDOG_CONTROL, SB800_IO_PM_INDEX_REG); + val = inb(SB800_IO_PM_DATA_REG); + val |= SB800_PCI_WATCHDOG_DECODE_EN; + val &= ~SB800_PM_WATCHDOG_DISABLE; + outb(val, SB800_IO_PM_DATA_REG); + } else { + /* For SP5100 or SB7x0 */ + /* Enable watchdog decode bit */ + pci_read_config_dword(sp5100_tco_pci, + SP5100_PCI_WATCHDOG_MISC_REG, + &val); + + val |= SP5100_PCI_WATCHDOG_DECODE_EN; + + pci_write_config_dword(sp5100_tco_pci, + SP5100_PCI_WATCHDOG_MISC_REG, + val); + + /* Enable Watchdog timer and set the resolution to 1 sec */ + outb(SP5100_PM_WATCHDOG_CONTROL, SP5100_IO_PM_INDEX_REG); + val = inb(SP5100_IO_PM_DATA_REG); + val |= SP5100_PM_WATCHDOG_SECOND_RES; + val &= ~SP5100_PM_WATCHDOG_DISABLE; + outb(val, SP5100_IO_PM_DATA_REG); + } +} + +static void tco_timer_disable(void) +{ + int val; + + if (sp5100_tco_pci->revision >= 0x40) { + /* For SB800 or later */ + /* Enable watchdog decode bit and Disable watchdog timer */ + outb(SB800_PM_WATCHDOG_CONTROL, SB800_IO_PM_INDEX_REG); + val = inb(SB800_IO_PM_DATA_REG); + val |= SB800_PCI_WATCHDOG_DECODE_EN; + val |= SB800_PM_WATCHDOG_DISABLE; + outb(val, SB800_IO_PM_DATA_REG); + } else { + /* For SP5100 or SB7x0 */ + /* Enable watchdog decode bit */ + pci_read_config_dword(sp5100_tco_pci, + SP5100_PCI_WATCHDOG_MISC_REG, + &val); + + val |= SP5100_PCI_WATCHDOG_DECODE_EN; + + pci_write_config_dword(sp5100_tco_pci, + SP5100_PCI_WATCHDOG_MISC_REG, + val); + + /* Disable Watchdog timer */ + outb(SP5100_PM_WATCHDOG_CONTROL, SP5100_IO_PM_INDEX_REG); + val = inb(SP5100_IO_PM_DATA_REG); + val |= SP5100_PM_WATCHDOG_DISABLE; + outb(val, SP5100_IO_PM_DATA_REG); + } +} + /* * /dev/watchdog handling */ @@ -270,11 +357,12 @@ MODULE_DEVICE_TABLE(pci, sp5100_tco_pci_tbl); /* * Init & exit routines */ - static unsigned char sp5100_tco_setupdevice(void) { struct pci_dev *dev = NULL; + const char *dev_name = NULL; u32 val; + u32 index_reg, data_reg, base_addr; /* Match the PCI device */ for_each_pci_dev(dev) { @@ -287,29 +375,160 @@ static unsigned char sp5100_tco_setupdevice(void) if (!sp5100_tco_pci) return 0; + pr_info("PCI Revision ID: 0x%x\n", sp5100_tco_pci->revision); + + /* + * Determine type of southbridge chipset. + */ + if (sp5100_tco_pci->revision >= 0x40) { + dev_name = SB800_DEVNAME; + index_reg = SB800_IO_PM_INDEX_REG; + data_reg = SB800_IO_PM_DATA_REG; + base_addr = SB800_PM_WATCHDOG_BASE; + } else { + dev_name = SP5100_DEVNAME; + index_reg = SP5100_IO_PM_INDEX_REG; + data_reg = SP5100_IO_PM_DATA_REG; + base_addr = SP5100_PM_WATCHDOG_BASE; + } + /* Request the IO ports used by this driver */ pm_iobase = SP5100_IO_PM_INDEX_REG; - if (!request_region(pm_iobase, SP5100_PM_IOPORTS_SIZE, "SP5100 TCO")) { + if (!request_region(pm_iobase, SP5100_PM_IOPORTS_SIZE, dev_name)) { pr_err("I/O address 0x%04x already in use\n", pm_iobase); goto exit; } - /* Find the watchdog base address. */ - outb(SP5100_PM_WATCHDOG_BASE3, SP5100_IO_PM_INDEX_REG); - val = inb(SP5100_IO_PM_DATA_REG); - outb(SP5100_PM_WATCHDOG_BASE2, SP5100_IO_PM_INDEX_REG); - val = val << 8 | inb(SP5100_IO_PM_DATA_REG); - outb(SP5100_PM_WATCHDOG_BASE1, SP5100_IO_PM_INDEX_REG); - val = val << 8 | inb(SP5100_IO_PM_DATA_REG); - outb(SP5100_PM_WATCHDOG_BASE0, SP5100_IO_PM_INDEX_REG); - /* Low three bits of BASE0 are reserved. */ - val = val << 8 | (inb(SP5100_IO_PM_DATA_REG) & 0xf8); + /* + * First, Find the watchdog timer MMIO address from indirect I/O. + */ + outb(base_addr+3, index_reg); + val = inb(data_reg); + outb(base_addr+2, index_reg); + val = val << 8 | inb(data_reg); + outb(base_addr+1, index_reg); + val = val << 8 | inb(data_reg); + outb(base_addr+0, index_reg); + /* Low three bits of BASE are reserved */ + val = val << 8 | (inb(data_reg) & 0xf8); + + pr_debug("Got 0x%04x from indirect I/O\n", val); + + /* Check MMIO address conflict */ + if (request_mem_region_exclusive(val, SP5100_WDT_MEM_MAP_SIZE, + dev_name)) + goto setup_wdt; + else + pr_debug("MMIO address 0x%04x already in use\n", val); + + /* + * Secondly, Find the watchdog timer MMIO address + * from SBResource_MMIO register. + */ + if (sp5100_tco_pci->revision >= 0x40) { + /* Read SBResource_MMIO from AcpiMmioEn(PM_Reg: 24h) */ + outb(SB800_PM_ACPI_MMIO_EN+3, SB800_IO_PM_INDEX_REG); + val = inb(SB800_IO_PM_DATA_REG); + outb(SB800_PM_ACPI_MMIO_EN+2, SB800_IO_PM_INDEX_REG); + val = val << 8 | inb(SB800_IO_PM_DATA_REG); + outb(SB800_PM_ACPI_MMIO_EN+1, SB800_IO_PM_INDEX_REG); + val = val << 8 | inb(SB800_IO_PM_DATA_REG); + outb(SB800_PM_ACPI_MMIO_EN+0, SB800_IO_PM_INDEX_REG); + val = val << 8 | inb(SB800_IO_PM_DATA_REG); + } else { + /* Read SBResource_MMIO from PCI config(PCI_Reg: 9Ch) */ + pci_read_config_dword(sp5100_tco_pci, + SP5100_SB_RESOURCE_MMIO_BASE, &val); + } + + /* The SBResource_MMIO is enabled and mapped memory space? */ + if ((val & (SB800_ACPI_MMIO_DECODE_EN | SB800_ACPI_MMIO_SEL)) == + SB800_ACPI_MMIO_DECODE_EN) { + /* Clear unnecessary the low twelve bits */ + val &= ~0xFFF; + /* Add the Watchdog Timer offset to base address. */ + val += SB800_PM_WDT_MMIO_OFFSET; + /* Check MMIO address conflict */ + if (request_mem_region_exclusive(val, SP5100_WDT_MEM_MAP_SIZE, + dev_name)) { + pr_debug("Got 0x%04x from SBResource_MMIO register\n", + val); + goto setup_wdt; + } else + pr_debug("MMIO address 0x%04x already in use\n", val); + } else + pr_debug("SBResource_MMIO is disabled(0x%04x)\n", val); + + /* + * Lastly re-programming the watchdog timer MMIO address, + * This method is a last resort... + * + * Before re-programming, to ensure that the watchdog timer + * is disabled, disable the watchdog timer. + */ + tco_timer_disable(); + + if (force_addr) { + /* + * Force the use of watchdog timer MMIO address, and aligned to + * 8byte boundary. + */ + force_addr &= ~0x7; + val = force_addr; + + pr_info("Force the use of 0x%04x as MMIO address\n", val); + } else { + /* + * Get empty slot into the resource tree for watchdog timer. + */ + if (allocate_resource(&iomem_resource, + &wdt_res, + SP5100_WDT_MEM_MAP_SIZE, + 0xf0000000, + 0xfffffff8, + 0x8, + NULL, + NULL)) { + pr_err("MMIO allocation failed\n"); + goto unreg_region; + } + + val = resbase_phys = wdt_res.start; + pr_debug("Got 0x%04x from resource tree\n", val); + } + + /* Restore to the low three bits, if chipset is SB8x0(or later) */ + if (sp5100_tco_pci->revision >= 0x40) { + u8 reserved_bit; + reserved_bit = inb(base_addr) & 0x7; + val |= (u32)reserved_bit; + } + + /* Re-programming the watchdog timer base address */ + outb(base_addr+0, index_reg); + /* Low three bits of BASE are reserved */ + outb((val >> 0) & 0xf8, data_reg); + outb(base_addr+1, index_reg); + outb((val >> 8) & 0xff, data_reg); + outb(base_addr+2, index_reg); + outb((val >> 16) & 0xff, data_reg); + outb(base_addr+3, index_reg); + outb((val >> 24) & 0xff, data_reg); + + /* + * Clear unnecessary the low three bits, + * if chipset is SB8x0(or later) + */ + if (sp5100_tco_pci->revision >= 0x40) + val &= ~0x7; if (!request_mem_region_exclusive(val, SP5100_WDT_MEM_MAP_SIZE, - "SP5100 TCO")) { - pr_err("mmio address 0x%04x already in use\n", val); - goto unreg_region; + dev_name)) { + pr_err("MMIO address 0x%04x already in use\n", val); + goto unreg_resource; } + +setup_wdt: tcobase_phys = val; tcobase = ioremap(val, SP5100_WDT_MEM_MAP_SIZE); @@ -318,26 +537,18 @@ static unsigned char sp5100_tco_setupdevice(void) goto unreg_mem_region; } - /* Enable watchdog decode bit */ - pci_read_config_dword(sp5100_tco_pci, - SP5100_PCI_WATCHDOG_MISC_REG, - &val); - - val |= SP5100_PCI_WATCHDOG_DECODE_EN; + pr_info("Using 0x%04x for watchdog MMIO address\n", val); - pci_write_config_dword(sp5100_tco_pci, - SP5100_PCI_WATCHDOG_MISC_REG, - val); + /* Setup the watchdog timer */ + tco_timer_enable(); - /* Enable Watchdog timer and set the resolution to 1 sec. */ - outb(SP5100_PM_WATCHDOG_CONTROL, SP5100_IO_PM_INDEX_REG); - val = inb(SP5100_IO_PM_DATA_REG); - val |= SP5100_PM_WATCHDOG_SECOND_RES; - val &= ~SP5100_PM_WATCHDOG_DISABLE; - outb(val, SP5100_IO_PM_DATA_REG); - - /* Check that the watchdog action is set to reset the system. */ + /* Check that the watchdog action is set to reset the system */ val = readl(SP5100_WDT_CONTROL(tcobase)); + /* + * Save WatchDogFired status, because WatchDogFired flag is + * cleared here. + */ + tco_wdt_fired = val & SP5100_PM_WATCHDOG_FIRED; val &= ~SP5100_PM_WATCHDOG_ACTION_RESET; writel(val, SP5100_WDT_CONTROL(tcobase)); @@ -355,6 +566,9 @@ static unsigned char sp5100_tco_setupdevice(void) unreg_mem_region: release_mem_region(tcobase_phys, SP5100_WDT_MEM_MAP_SIZE); +unreg_resource: + if (resbase_phys) + release_resource(&wdt_res); unreg_region: release_region(pm_iobase, SP5100_PM_IOPORTS_SIZE); exit: @@ -364,23 +578,18 @@ exit: static int sp5100_tco_init(struct platform_device *dev) { int ret; - u32 val; + char addr_str[16]; - /* Check whether or not the hardware watchdog is there. If found, then + /* + * Check whether or not the hardware watchdog is there. If found, then * set it up. */ if (!sp5100_tco_setupdevice()) return -ENODEV; /* Check to see if last reboot was due to watchdog timeout */ - pr_info("Watchdog reboot %sdetected\n", - readl(SP5100_WDT_CONTROL(tcobase)) & SP5100_PM_WATCHDOG_FIRED ? - "" : "not "); - - /* Clear out the old status */ - val = readl(SP5100_WDT_CONTROL(tcobase)); - val &= ~SP5100_PM_WATCHDOG_FIRED; - writel(val, SP5100_WDT_CONTROL(tcobase)); + pr_info("Last reboot was %striggered by watchdog.\n", + tco_wdt_fired ? "" : "not "); /* * Check that the heartbeat value is within it's range. @@ -400,14 +609,24 @@ static int sp5100_tco_init(struct platform_device *dev) clear_bit(0, &timer_alive); - pr_info("initialized (0x%p). heartbeat=%d sec (nowayout=%d)\n", - tcobase, heartbeat, nowayout); + /* Show module parameters */ + if (force_addr == tcobase_phys) + /* The force_addr is vaild */ + sprintf(addr_str, "0x%04x", force_addr); + else + strcpy(addr_str, "none"); + + pr_info("initialized (0x%p). heartbeat=%d sec (nowayout=%d, " + "force_addr=%s)\n", + tcobase, heartbeat, nowayout, addr_str); return 0; exit: iounmap(tcobase); release_mem_region(tcobase_phys, SP5100_WDT_MEM_MAP_SIZE); + if (resbase_phys) + release_resource(&wdt_res); release_region(pm_iobase, SP5100_PM_IOPORTS_SIZE); return ret; } @@ -422,6 +641,8 @@ static void sp5100_tco_cleanup(void) misc_deregister(&sp5100_tco_miscdev); iounmap(tcobase); release_mem_region(tcobase_phys, SP5100_WDT_MEM_MAP_SIZE); + if (resbase_phys) + release_resource(&wdt_res); release_region(pm_iobase, SP5100_PM_IOPORTS_SIZE); } @@ -451,7 +672,7 @@ static int __init sp5100_tco_init_module(void) { int err; - pr_info("SP5100 TCO WatchDog Timer Driver v%s\n", TCO_VERSION); + pr_info("SP5100/SB800 TCO WatchDog Timer Driver v%s\n", TCO_VERSION); err = platform_driver_register(&sp5100_tco_driver); if (err) @@ -475,13 +696,13 @@ static void __exit sp5100_tco_cleanup_module(void) { platform_device_unregister(sp5100_tco_platform_device); platform_driver_unregister(&sp5100_tco_driver); - pr_info("SP5100 TCO Watchdog Module Unloaded\n"); + pr_info("SP5100/SB800 TCO Watchdog Module Unloaded\n"); } module_init(sp5100_tco_init_module); module_exit(sp5100_tco_cleanup_module); MODULE_AUTHOR("Priyanka Gupta"); -MODULE_DESCRIPTION("TCO timer driver for SP5100 chipset"); +MODULE_DESCRIPTION("TCO timer driver for SP5100/SB800 chipset"); MODULE_LICENSE("GPL"); MODULE_ALIAS_MISCDEV(WATCHDOG_MINOR); diff --git a/drivers/watchdog/sp5100_tco.h b/drivers/watchdog/sp5100_tco.h index a5a16cc..71594a0 100644 --- a/drivers/watchdog/sp5100_tco.h +++ b/drivers/watchdog/sp5100_tco.h @@ -9,33 +9,57 @@ /* * Some address definitions for the Watchdog */ - #define SP5100_WDT_MEM_MAP_SIZE 0x08 #define SP5100_WDT_CONTROL(base) ((base) + 0x00) /* Watchdog Control */ #define SP5100_WDT_COUNT(base) ((base) + 0x04) /* Watchdog Count */ -#define SP5100_WDT_START_STOP_BIT 1 +#define SP5100_WDT_START_STOP_BIT (1 << 0) #define SP5100_WDT_TRIGGER_BIT (1 << 7) -#define SP5100_PCI_WATCHDOG_MISC_REG 0x41 -#define SP5100_PCI_WATCHDOG_DECODE_EN (1 << 3) - #define SP5100_PM_IOPORTS_SIZE 0x02 -/* These two IO registers are hardcoded and there doesn't seem to be a way to +/* + * These two IO registers are hardcoded and there doesn't seem to be a way to * read them from a register. */ + +/* For SP5100/SB7x0 chipset */ #define SP5100_IO_PM_INDEX_REG 0xCD6 #define SP5100_IO_PM_DATA_REG 0xCD7 +#define SP5100_SB_RESOURCE_MMIO_BASE 0x9C + #define SP5100_PM_WATCHDOG_CONTROL 0x69 -#define SP5100_PM_WATCHDOG_BASE0 0x6C -#define SP5100_PM_WATCHDOG_BASE1 0x6D -#define SP5100_PM_WATCHDOG_BASE2 0x6E -#define SP5100_PM_WATCHDOG_BASE3 0x6F +#define SP5100_PM_WATCHDOG_BASE 0x6C #define SP5100_PM_WATCHDOG_FIRED (1 << 1) #define SP5100_PM_WATCHDOG_ACTION_RESET (1 << 2) -#define SP5100_PM_WATCHDOG_DISABLE 1 +#define SP5100_PCI_WATCHDOG_MISC_REG 0x41 +#define SP5100_PCI_WATCHDOG_DECODE_EN (1 << 3) + +#define SP5100_PM_WATCHDOG_DISABLE (1 << 0) #define SP5100_PM_WATCHDOG_SECOND_RES (3 << 1) + +#define SP5100_DEVNAME "SP5100 TCO" + + +/* For SB8x0(or later) chipset */ +#define SB800_IO_PM_INDEX_REG 0xCD6 +#define SB800_IO_PM_DATA_REG 0xCD7 + +#define SB800_PM_ACPI_MMIO_EN 0x24 +#define SB800_PM_WATCHDOG_CONTROL 0x48 +#define SB800_PM_WATCHDOG_BASE 0x48 +#define SB800_PM_WATCHDOG_CONFIG 0x4C + +#define SB800_PCI_WATCHDOG_DECODE_EN (1 << 0) +#define SB800_PM_WATCHDOG_DISABLE (1 << 2) +#define SB800_PM_WATCHDOG_SECOND_RES (3 << 0) +#define SB800_ACPI_MMIO_DECODE_EN (1 << 0) +#define SB800_ACPI_MMIO_SEL (1 << 2) + + +#define SB800_PM_WDT_MMIO_OFFSET 0xB00 + +#define SB800_DEVNAME "SB800 TCO" diff --git a/drivers/watchdog/sp805_wdt.c b/drivers/watchdog/sp805_wdt.c index 76c73cb..8872642 100644 --- a/drivers/watchdog/sp805_wdt.c +++ b/drivers/watchdog/sp805_wdt.c @@ -130,16 +130,10 @@ static int wdt_config(struct watchdog_device *wdd, bool ping) int ret; if (!ping) { - ret = clk_prepare(wdt->clk); - if (ret) { - dev_err(&wdt->adev->dev, "clock prepare fail"); - return ret; - } - ret = clk_enable(wdt->clk); + ret = clk_prepare_enable(wdt->clk); if (ret) { dev_err(&wdt->adev->dev, "clock enable fail"); - clk_unprepare(wdt->clk); return ret; } } @@ -190,8 +184,7 @@ static int wdt_disable(struct watchdog_device *wdd) readl_relaxed(wdt->base + WDTLOCK); spin_unlock(&wdt->lock); - clk_disable(wdt->clk); - clk_unprepare(wdt->clk); + clk_disable_unprepare(wdt->clk); return 0; } diff --git a/drivers/watchdog/twl4030_wdt.c b/drivers/watchdog/twl4030_wdt.c index 9f54b1d..81918cf 100644 --- a/drivers/watchdog/twl4030_wdt.c +++ b/drivers/watchdog/twl4030_wdt.c @@ -22,26 +22,12 @@ #include #include #include -#include #include #include -#include -#include #include #define TWL4030_WATCHDOG_CFG_REG_OFFS 0x3 -#define TWL4030_WDT_STATE_OPEN 0x1 -#define TWL4030_WDT_STATE_ACTIVE 0x8 - -static struct platform_device *twl4030_wdt_dev; - -struct twl4030_wdt { - struct miscdevice miscdev; - int timer_margin; - unsigned long state; -}; - static bool nowayout = WATCHDOG_NOWAYOUT; module_param(nowayout, bool, 0); MODULE_PARM_DESC(nowayout, "Watchdog cannot be stopped once started " @@ -49,175 +35,75 @@ MODULE_PARM_DESC(nowayout, "Watchdog cannot be stopped once started " static int twl4030_wdt_write(unsigned char val) { - return twl_i2c_write_u8(TWL4030_MODULE_PM_RECEIVER, val, + return twl_i2c_write_u8(TWL_MODULE_PM_RECEIVER, val, TWL4030_WATCHDOG_CFG_REG_OFFS); } -static int twl4030_wdt_enable(struct twl4030_wdt *wdt) +static int twl4030_wdt_start(struct watchdog_device *wdt) { - return twl4030_wdt_write(wdt->timer_margin + 1); + return twl4030_wdt_write(wdt->timeout + 1); } -static int twl4030_wdt_disable(struct twl4030_wdt *wdt) +static int twl4030_wdt_stop(struct watchdog_device *wdt) { return twl4030_wdt_write(0); } -static int twl4030_wdt_set_timeout(struct twl4030_wdt *wdt, int timeout) -{ - if (timeout < 0 || timeout > 30) { - dev_warn(wdt->miscdev.parent, - "Timeout can only be in the range [0-30] seconds"); - return -EINVAL; - } - wdt->timer_margin = timeout; - return twl4030_wdt_enable(wdt); -} - -static ssize_t twl4030_wdt_write_fop(struct file *file, - const char __user *data, size_t len, loff_t *ppos) +static int twl4030_wdt_set_timeout(struct watchdog_device *wdt, + unsigned int timeout) { - struct twl4030_wdt *wdt = file->private_data; - - if (len) - twl4030_wdt_enable(wdt); - - return len; -} - -static long twl4030_wdt_ioctl(struct file *file, - unsigned int cmd, unsigned long arg) -{ - void __user *argp = (void __user *)arg; - int __user *p = argp; - int new_margin; - struct twl4030_wdt *wdt = file->private_data; - - static const struct watchdog_info twl4030_wd_ident = { - .identity = "TWL4030 Watchdog", - .options = WDIOF_SETTIMEOUT, - .firmware_version = 0, - }; - - switch (cmd) { - case WDIOC_GETSUPPORT: - return copy_to_user(argp, &twl4030_wd_ident, - sizeof(twl4030_wd_ident)) ? -EFAULT : 0; - - case WDIOC_GETSTATUS: - case WDIOC_GETBOOTSTATUS: - return put_user(0, p); - - case WDIOC_KEEPALIVE: - twl4030_wdt_enable(wdt); - break; - - case WDIOC_SETTIMEOUT: - if (get_user(new_margin, p)) - return -EFAULT; - if (twl4030_wdt_set_timeout(wdt, new_margin)) - return -EINVAL; - return put_user(wdt->timer_margin, p); - - case WDIOC_GETTIMEOUT: - return put_user(wdt->timer_margin, p); - - default: - return -ENOTTY; - } - + wdt->timeout = timeout; return 0; } -static int twl4030_wdt_open(struct inode *inode, struct file *file) -{ - struct twl4030_wdt *wdt = platform_get_drvdata(twl4030_wdt_dev); - - /* /dev/watchdog can only be opened once */ - if (test_and_set_bit(0, &wdt->state)) - return -EBUSY; - - wdt->state |= TWL4030_WDT_STATE_ACTIVE; - file->private_data = (void *) wdt; - - twl4030_wdt_enable(wdt); - return nonseekable_open(inode, file); -} - -static int twl4030_wdt_release(struct inode *inode, struct file *file) -{ - struct twl4030_wdt *wdt = file->private_data; - if (nowayout) { - dev_alert(wdt->miscdev.parent, - "Unexpected close, watchdog still running!\n"); - twl4030_wdt_enable(wdt); - } else { - if (twl4030_wdt_disable(wdt)) - return -EFAULT; - wdt->state &= ~TWL4030_WDT_STATE_ACTIVE; - } - - clear_bit(0, &wdt->state); - return 0; -} +static const struct watchdog_info twl4030_wdt_info = { + .options = WDIOF_SETTIMEOUT | WDIOF_KEEPALIVEPING, + .identity = "TWL4030 Watchdog", +}; -static const struct file_operations twl4030_wdt_fops = { +static const struct watchdog_ops twl4030_wdt_ops = { .owner = THIS_MODULE, - .llseek = no_llseek, - .open = twl4030_wdt_open, - .release = twl4030_wdt_release, - .unlocked_ioctl = twl4030_wdt_ioctl, - .write = twl4030_wdt_write_fop, + .start = twl4030_wdt_start, + .stop = twl4030_wdt_stop, + .set_timeout = twl4030_wdt_set_timeout, }; static int twl4030_wdt_probe(struct platform_device *pdev) { int ret = 0; - struct twl4030_wdt *wdt; + struct watchdog_device *wdt; - wdt = kzalloc(sizeof(struct twl4030_wdt), GFP_KERNEL); + wdt = devm_kzalloc(&pdev->dev, sizeof(*wdt), GFP_KERNEL); if (!wdt) return -ENOMEM; - wdt->state = 0; - wdt->timer_margin = 30; - wdt->miscdev.parent = &pdev->dev; - wdt->miscdev.fops = &twl4030_wdt_fops; - wdt->miscdev.minor = WATCHDOG_MINOR; - wdt->miscdev.name = "watchdog"; + wdt->info = &twl4030_wdt_info; + wdt->ops = &twl4030_wdt_ops; + wdt->status = 0; + wdt->timeout = 30; + wdt->min_timeout = 1; + wdt->max_timeout = 30; + watchdog_set_nowayout(wdt, nowayout); platform_set_drvdata(pdev, wdt); - twl4030_wdt_dev = pdev; + twl4030_wdt_stop(wdt); - twl4030_wdt_disable(wdt); - - ret = misc_register(&wdt->miscdev); + ret = watchdog_register_device(wdt); if (ret) { - dev_err(wdt->miscdev.parent, - "Failed to register misc device\n"); platform_set_drvdata(pdev, NULL); - kfree(wdt); - twl4030_wdt_dev = NULL; return ret; } + return 0; } static int twl4030_wdt_remove(struct platform_device *pdev) { - struct twl4030_wdt *wdt = platform_get_drvdata(pdev); - - if (wdt->state & TWL4030_WDT_STATE_ACTIVE) - if (twl4030_wdt_disable(wdt)) - return -EFAULT; - - wdt->state &= ~TWL4030_WDT_STATE_ACTIVE; - misc_deregister(&wdt->miscdev); + struct watchdog_device *wdt = platform_get_drvdata(pdev); + watchdog_unregister_device(wdt); platform_set_drvdata(pdev, NULL); - kfree(wdt); - twl4030_wdt_dev = NULL; return 0; } @@ -225,18 +111,18 @@ static int twl4030_wdt_remove(struct platform_device *pdev) #ifdef CONFIG_PM static int twl4030_wdt_suspend(struct platform_device *pdev, pm_message_t state) { - struct twl4030_wdt *wdt = platform_get_drvdata(pdev); - if (wdt->state & TWL4030_WDT_STATE_ACTIVE) - return twl4030_wdt_disable(wdt); + struct watchdog_device *wdt = platform_get_drvdata(pdev); + if (watchdog_active(wdt)) + return twl4030_wdt_stop(wdt); return 0; } static int twl4030_wdt_resume(struct platform_device *pdev) { - struct twl4030_wdt *wdt = platform_get_drvdata(pdev); - if (wdt->state & TWL4030_WDT_STATE_ACTIVE) - return twl4030_wdt_enable(wdt); + struct watchdog_device *wdt = platform_get_drvdata(pdev); + if (watchdog_active(wdt)) + return twl4030_wdt_start(wdt); return 0; } @@ -260,6 +146,5 @@ module_platform_driver(twl4030_wdt_driver); MODULE_AUTHOR("Nokia Corporation"); MODULE_LICENSE("GPL"); -MODULE_ALIAS_MISCDEV(WATCHDOG_MINOR); MODULE_ALIAS("platform:twl4030_wdt"); diff --git a/fs/cifs/cifs_debug.h b/fs/cifs/cifs_debug.h index 86e92ef..69ae3d3 100644 --- a/fs/cifs/cifs_debug.h +++ b/fs/cifs/cifs_debug.h @@ -37,7 +37,6 @@ void dump_smb(void *, int); #define CIFS_TIMER 0x04 extern int cifsFYI; -extern int cifsERROR; /* * debug ON @@ -64,10 +63,7 @@ do { \ /* error event message: e.g., i/o error */ #define cifserror(fmt, ...) \ -do { \ - if (cifsERROR) \ - printk(KERN_ERR "CIFS VFS: " fmt "\n", ##__VA_ARGS__); \ -} while (0) + printk(KERN_ERR "CIFS VFS: " fmt "\n", ##__VA_ARGS__); \ #define cERROR(set, fmt, ...) \ do { \ diff --git a/fs/cifs/cifsfs.c b/fs/cifs/cifsfs.c index ce9f3c5..f653835 100644 --- a/fs/cifs/cifsfs.c +++ b/fs/cifs/cifsfs.c @@ -54,7 +54,6 @@ #endif int cifsFYI = 0; -int cifsERROR = 1; int traceSMB = 0; bool enable_oplocks = true; unsigned int linuxExtEnabled = 1; diff --git a/fs/cifs/connect.c b/fs/cifs/connect.c index 7635b5d..17c3643 100644 --- a/fs/cifs/connect.c +++ b/fs/cifs/connect.c @@ -1624,14 +1624,11 @@ cifs_parse_mount_options(const char *mountdata, const char *devname, case Opt_unc: string = vol->UNC; vol->UNC = match_strdup(args); - if (vol->UNC == NULL) { - kfree(string); + if (vol->UNC == NULL) goto out_nomem; - } convert_delimiter(vol->UNC, '\\'); if (vol->UNC[0] != '\\' || vol->UNC[1] != '\\') { - kfree(string); printk(KERN_ERR "CIFS: UNC Path does not " "begin with // or \\\\\n"); goto cifs_parse_mount_err; @@ -1687,10 +1684,8 @@ cifs_parse_mount_options(const char *mountdata, const char *devname, string = vol->prepath; vol->prepath = match_strdup(args); - if (vol->prepath == NULL) { - kfree(string); + if (vol->prepath == NULL) goto out_nomem; - } /* Compare old prefixpath= option to new one */ if (!string || strcmp(string, vol->prepath)) printk(KERN_WARNING "CIFS: the value of the " diff --git a/fs/cifs/readdir.c b/fs/cifs/readdir.c index 6002fdc..cdd6ff4 100644 --- a/fs/cifs/readdir.c +++ b/fs/cifs/readdir.c @@ -78,6 +78,7 @@ cifs_prime_dcache(struct dentry *parent, struct qstr *name, struct dentry *dentry, *alias; struct inode *inode; struct super_block *sb = parent->d_inode->i_sb; + struct cifs_sb_info *cifs_sb = CIFS_SB(sb); cFYI(1, "%s: for %s", __func__, name->name); @@ -91,10 +92,20 @@ cifs_prime_dcache(struct dentry *parent, struct qstr *name, int err; inode = dentry->d_inode; - /* update inode in place if i_ino didn't change */ - if (inode && CIFS_I(inode)->uniqueid == fattr->cf_uniqueid) { - cifs_fattr_to_inode(inode, fattr); - goto out; + if (inode) { + /* + * If we're generating inode numbers, then we don't + * want to clobber the existing one with the one that + * the readdir code created. + */ + if (!(cifs_sb->mnt_cifs_flags & CIFS_MOUNT_SERVER_INUM)) + fattr->cf_uniqueid = CIFS_I(inode)->uniqueid; + + /* update inode in place if i_ino didn't change */ + if (CIFS_I(inode)->uniqueid == fattr->cf_uniqueid) { + cifs_fattr_to_inode(inode, fattr); + goto out; + } } err = d_invalidate(dentry); dput(dentry); diff --git a/fs/nfs/fscache.h b/fs/nfs/fscache.h index 277b027..4ecb766 100644 --- a/fs/nfs/fscache.h +++ b/fs/nfs/fscache.h @@ -222,6 +222,7 @@ static inline void nfs_readpage_to_fscache(struct inode *inode, static inline void nfs_fscache_invalidate(struct inode *inode) {} +static inline void nfs_fscache_wait_on_invalidate(struct inode *inode) {} static inline const char *nfs_server_fscache_state(struct nfs_server *server) { diff --git a/fs/nfs/super.c b/fs/nfs/super.c index aa5315b..c25cadf8 100644 --- a/fs/nfs/super.c +++ b/fs/nfs/super.c @@ -2375,19 +2375,30 @@ static void nfs_get_cache_cookie(struct super_block *sb, struct nfs_parsed_mount_data *parsed, struct nfs_clone_mount *cloned) { + struct nfs_server *nfss = NFS_SB(sb); char *uniq = NULL; int ulen = 0; - if (parsed && parsed->fscache_uniq) { - uniq = parsed->fscache_uniq; - ulen = strlen(parsed->fscache_uniq); + nfss->fscache_key = NULL; + nfss->fscache = NULL; + + if (parsed) { + if (!(parsed->options & NFS_OPTION_FSCACHE)) + return; + if (parsed->fscache_uniq) { + uniq = parsed->fscache_uniq; + ulen = strlen(parsed->fscache_uniq); + } } else if (cloned) { struct nfs_server *mnt_s = NFS_SB(cloned->sb); + if (!(mnt_s->options & NFS_OPTION_FSCACHE)) + return; if (mnt_s->fscache_key) { uniq = mnt_s->fscache_key->key.uniquifier; ulen = mnt_s->fscache_key->key.uniq_len; }; - } + } else + return; nfs_fscache_get_super_cookie(sb, uniq, ulen); } diff --git a/fs/nfsd/vfs.c b/fs/nfsd/vfs.c index f0a6d88..d586117 100644 --- a/fs/nfsd/vfs.c +++ b/fs/nfsd/vfs.c @@ -934,7 +934,6 @@ nfsd_vfs_read(struct svc_rqst *rqstp, struct svc_fh *fhp, struct file *file, .u.data = rqstp, }; - WARN_ON_ONCE(rqstp->rq_next_page != rqstp->rq_respages + 1); rqstp->rq_next_page = rqstp->rq_respages + 1; host_err = splice_direct_to_actor(file, &sd, nfsd_direct_splice_actor); } else { diff --git a/include/asm-generic/io.h b/include/asm-generic/io.h index d1e9328..33bbbae 100644 --- a/include/asm-generic/io.h +++ b/include/asm-generic/io.h @@ -12,7 +12,6 @@ #define __ASM_GENERIC_IO_H #include /* I/O is all done through memory accesses */ -#include #include #ifdef CONFIG_GENERIC_IOMAP @@ -223,36 +222,6 @@ static inline void outsl(unsigned long addr, const void *buffer, int count) } #endif -static inline void readsl(const void __iomem *addr, void *buf, int len) -{ - insl(addr - PCI_IOBASE, buf, len); -} - -static inline void readsw(const void __iomem *addr, void *buf, int len) -{ - insw(addr - PCI_IOBASE, buf, len); -} - -static inline void readsb(const void __iomem *addr, void *buf, int len) -{ - insb(addr - PCI_IOBASE, buf, len); -} - -static inline void writesl(const void __iomem *addr, const void *buf, int len) -{ - outsl(addr - PCI_IOBASE, buf, len); -} - -static inline void writesw(const void __iomem *addr, const void *buf, int len) -{ - outsw(addr - PCI_IOBASE, buf, len); -} - -static inline void writesb(const void __iomem *addr, const void *buf, int len) -{ - outsb(addr - PCI_IOBASE, buf, len); -} - #ifndef CONFIG_GENERIC_IOMAP #define ioread8(addr) readb(addr) #define ioread16(addr) readw(addr) diff --git a/include/asm-generic/mmu.h b/include/asm-generic/mmu.h index 4f4aa56..0ed3f1c 100644 --- a/include/asm-generic/mmu.h +++ b/include/asm-generic/mmu.h @@ -7,8 +7,12 @@ */ #ifndef __ASSEMBLY__ typedef struct { - struct vm_list_struct *vmlist; unsigned long end_brk; + +#ifdef CONFIG_BINFMT_ELF_FDPIC + unsigned long exec_fdpic_loadmap; + unsigned long interp_fdpic_loadmap; +#endif } mm_context_t; #endif diff --git a/include/linux/device-mapper.h b/include/linux/device-mapper.h index 38d27a1..bf6afa2 100644 --- a/include/linux/device-mapper.h +++ b/include/linux/device-mapper.h @@ -23,7 +23,6 @@ typedef enum { STATUSTYPE_INFO, STATUSTYPE_TABLE } status_type_t; union map_info { void *ptr; unsigned long long ll; - unsigned target_request_nr; }; /* @@ -46,8 +45,7 @@ typedef void (*dm_dtr_fn) (struct dm_target *ti); * = 1: simple remap complete * = 2: The target wants to push back the io */ -typedef int (*dm_map_fn) (struct dm_target *ti, struct bio *bio, - union map_info *map_context); +typedef int (*dm_map_fn) (struct dm_target *ti, struct bio *bio); typedef int (*dm_map_request_fn) (struct dm_target *ti, struct request *clone, union map_info *map_context); @@ -60,8 +58,7 @@ typedef int (*dm_map_request_fn) (struct dm_target *ti, struct request *clone, * 2 : The target wants to push back the io */ typedef int (*dm_endio_fn) (struct dm_target *ti, - struct bio *bio, int error, - union map_info *map_context); + struct bio *bio, int error); typedef int (*dm_request_endio_fn) (struct dm_target *ti, struct request *clone, int error, union map_info *map_context); @@ -193,18 +190,30 @@ struct dm_target { * A number of zero-length barrier requests that will be submitted * to the target for the purpose of flushing cache. * - * The request number will be placed in union map_info->target_request_nr. + * The request number can be accessed with dm_bio_get_target_request_nr. * It is a responsibility of the target driver to remap these requests * to the real underlying devices. */ unsigned num_flush_requests; /* - * The number of discard requests that will be submitted to the - * target. map_info->request_nr is used just like num_flush_requests. + * The number of discard requests that will be submitted to the target. + * The request number can be accessed with dm_bio_get_target_request_nr. */ unsigned num_discard_requests; + /* + * The number of WRITE SAME requests that will be submitted to the target. + * The request number can be accessed with dm_bio_get_target_request_nr. + */ + unsigned num_write_same_requests; + + /* + * The minimum number of extra bytes allocated in each bio for the + * target to use. dm_per_bio_data returns the data location. + */ + unsigned per_bio_data_size; + /* target specific data */ void *private; @@ -241,6 +250,36 @@ struct dm_target_callbacks { int (*congested_fn) (struct dm_target_callbacks *, int); }; +/* + * For bio-based dm. + * One of these is allocated for each bio. + * This structure shouldn't be touched directly by target drivers. + * It is here so that we can inline dm_per_bio_data and + * dm_bio_from_per_bio_data + */ +struct dm_target_io { + struct dm_io *io; + struct dm_target *ti; + union map_info info; + unsigned target_request_nr; + struct bio clone; +}; + +static inline void *dm_per_bio_data(struct bio *bio, size_t data_size) +{ + return (char *)bio - offsetof(struct dm_target_io, clone) - data_size; +} + +static inline struct bio *dm_bio_from_per_bio_data(void *data, size_t data_size) +{ + return (struct bio *)((char *)data + data_size + offsetof(struct dm_target_io, clone)); +} + +static inline unsigned dm_bio_get_target_request_nr(const struct bio *bio) +{ + return container_of(bio, struct dm_target_io, clone)->target_request_nr; +} + int dm_register_target(struct target_type *t); void dm_unregister_target(struct target_type *t); diff --git a/include/linux/mlx4/device.h b/include/linux/mlx4/device.h index 21821da..20ea939 100644 --- a/include/linux/mlx4/device.h +++ b/include/linux/mlx4/device.h @@ -625,6 +625,7 @@ struct mlx4_dev { u8 rev_id; char board_id[MLX4_BOARD_ID_LEN]; int num_vfs; + int oper_log_mgm_entry_size; u64 regid_promisc_array[MLX4_MAX_PORTS + 1]; u64 regid_allmulti_array[MLX4_MAX_PORTS + 1]; }; diff --git a/include/linux/watchdog.h b/include/linux/watchdog.h index 87490ac..3a9df2f 100644 --- a/include/linux/watchdog.h +++ b/include/linux/watchdog.h @@ -129,7 +129,7 @@ static inline void *watchdog_get_drvdata(struct watchdog_device *wdd) return wdd->driver_data; } -/* drivers/watchdog/core/watchdog_core.c */ +/* drivers/watchdog/watchdog_core.c */ extern int watchdog_register_device(struct watchdog_device *); extern void watchdog_unregister_device(struct watchdog_device *); diff --git a/include/uapi/linux/dm-ioctl.h b/include/uapi/linux/dm-ioctl.h index 91e3a36..539b179 100644 --- a/include/uapi/linux/dm-ioctl.h +++ b/include/uapi/linux/dm-ioctl.h @@ -268,8 +268,8 @@ enum { #define DM_VERSION_MAJOR 4 #define DM_VERSION_MINOR 23 -#define DM_VERSION_PATCHLEVEL 0 -#define DM_VERSION_EXTRA "-ioctl (2012-07-25)" +#define DM_VERSION_PATCHLEVEL 1 +#define DM_VERSION_EXTRA "-ioctl (2012-12-18)" /* Status bits */ #define DM_READONLY_FLAG (1 << 0) /* In/Out */