diff --git a/Documentation/cgroups/blkio-controller.txt b/Documentation/cgroups/blkio-controller.txt index da272c8..cd556b9 100644 --- a/Documentation/cgroups/blkio-controller.txt +++ b/Documentation/cgroups/blkio-controller.txt @@ -94,11 +94,13 @@ Throttling/Upper Limit policy Hierarchical Cgroups ==================== -- Currently only CFQ supports hierarchical groups. For throttling, - cgroup interface does allow creation of hierarchical cgroups and - internally it treats them as flat hierarchy. - If somebody created a hierarchy like as follows. +Both CFQ and throttling implement hierarchy support; however, +throttling's hierarchy support is enabled iff "sane_behavior" is +enabled from cgroup side, which currently is a development option and +not publicly available. + +If somebody created a hierarchy like as follows. root / \ @@ -106,21 +108,20 @@ Hierarchical Cgroups | test3 - CFQ will handle the hierarchy correctly but and throttling will - practically treat all groups at same level. For details on CFQ - hierarchy support, refer to Documentation/block/cfq-iosched.txt. - Throttling will treat the hierarchy as if it looks like the - following. +CFQ by default and throttling with "sane_behavior" will handle the +hierarchy correctly. For details on CFQ hierarchy support, refer to +Documentation/block/cfq-iosched.txt. For throttling, all limits apply +to the whole subtree while all statistics are local to the IOs +directly generated by tasks in that cgroup. + +Throttling without "sane_behavior" enabled from cgroup side will +practically treat all groups at same level as if it looks like the +following. pivot / / \ \ root test1 test2 test3 - Nesting cgroups, while allowed, isn't officially supported and blkio - genereates warning when cgroups nest. Once throttling implements - hierarchy support, hierarchy will be supported and the warning will - be removed. - Various user visible config options =================================== CONFIG_BLK_CGROUP diff --git a/Documentation/device-mapper/switch.txt b/Documentation/device-mapper/switch.txt new file mode 100644 index 0000000..2fa7493 --- /dev/null +++ b/Documentation/device-mapper/switch.txt @@ -0,0 +1,126 @@ +dm-switch +========= + +The device-mapper switch target creates a device that supports an +arbitrary mapping of fixed-size regions of I/O across a fixed set of +paths. The path used for any specific region can be switched +dynamically by sending the target a message. + +It maps I/O to underlying block devices efficiently when there is a large +number of fixed-sized address regions but there is no simple pattern +that would allow for a compact representation of the mapping such as +dm-stripe. + +Background +---------- + +Dell EqualLogic and some other iSCSI storage arrays use a distributed +frameless architecture. In this architecture, the storage group +consists of a number of distinct storage arrays ("members") each having +independent controllers, disk storage and network adapters. When a LUN +is created it is spread across multiple members. The details of the +spreading are hidden from initiators connected to this storage system. +The storage group exposes a single target discovery portal, no matter +how many members are being used. When iSCSI sessions are created, each +session is connected to an eth port on a single member. Data to a LUN +can be sent on any iSCSI session, and if the blocks being accessed are +stored on another member the I/O will be forwarded as required. This +forwarding is invisible to the initiator. The storage layout is also +dynamic, and the blocks stored on disk may be moved from member to +member as needed to balance the load. + +This architecture simplifies the management and configuration of both +the storage group and initiators. In a multipathing configuration, it +is possible to set up multiple iSCSI sessions to use multiple network +interfaces on both the host and target to take advantage of the +increased network bandwidth. An initiator could use a simple round +robin algorithm to send I/O across all paths and let the storage array +members forward it as necessary, but there is a performance advantage to +sending data directly to the correct member. + +A device-mapper table already lets you map different regions of a +device onto different targets. However in this architecture the LUN is +spread with an address region size on the order of 10s of MBs, which +means the resulting table could have more than a million entries and +consume far too much memory. + +Using this device-mapper switch target we can now build a two-layer +device hierarchy: + + Upper Tier – Determine which array member the I/O should be sent to. + Lower Tier – Load balance amongst paths to a particular member. + +The lower tier consists of a single dm multipath device for each member. +Each of these multipath devices contains the set of paths directly to +the array member in one priority group, and leverages existing path +selectors to load balance amongst these paths. We also build a +non-preferred priority group containing paths to other array members for +failover reasons. + +The upper tier consists of a single dm-switch device. This device uses +a bitmap to look up the location of the I/O and choose the appropriate +lower tier device to route the I/O. By using a bitmap we are able to +use 4 bits for each address range in a 16 member group (which is very +large for us). This is a much denser representation than the dm table +b-tree can achieve. + +Construction Parameters +======================= + + [...] + [ ]+ + + + The number of paths across which to distribute the I/O. + + + The number of 512-byte sectors in a region. Each region can be redirected + to any of the available paths. + + + The number of optional arguments. Currently, no optional arguments + are supported and so this must be zero. + + + The block device that represents a specific path to the device. + + + The offset of the start of data on the specific (in units + of 512-byte sectors). This number is added to the sector number when + forwarding the request to the specific path. Typically it is zero. + +Messages +======== + +set_region_mappings : []: []:... + +Modify the region table by specifying which regions are redirected to +which paths. + + + The region number (region size was specified in constructor parameters). + If index is omitted, the next region (previous index + 1) is used. + Expressed in hexadecimal (WITHOUT any prefix like 0x). + + + The path number in the range 0 ... ( - 1). + Expressed in hexadecimal (WITHOUT any prefix like 0x). + +Status +====== + +No status line is reported. + +Example +======= + +Assume that you have volumes vg1/switch0 vg1/switch1 vg1/switch2 with +the same size. + +Create a switch device with 64kB region size: + dmsetup create switch --table "0 `blockdev --getsize /dev/vg1/switch0` + switch 3 128 0 /dev/vg1/switch0 0 /dev/vg1/switch1 0 /dev/vg1/switch2 0" + +Set mappings for the first 7 entries to point to devices switch0, switch1, +switch2, switch0, switch1, switch2, switch1: + dmsetup message switch 0 set_region_mappings 0:0 :1 :2 :0 :1 :2 :1 diff --git a/Documentation/devicetree/bindings/thermal/ti_soc_thermal.txt b/Documentation/devicetree/bindings/thermal/ti_soc_thermal.txt new file mode 100644 index 0000000..0c9222d --- /dev/null +++ b/Documentation/devicetree/bindings/thermal/ti_soc_thermal.txt @@ -0,0 +1,74 @@ +* Texas Instrument OMAP SCM bandgap bindings + +In the System Control Module, OMAP supplies a voltage reference +and a temperature sensor feature that are gathered in the band +gap voltage and temperature sensor (VBGAPTS) module. The band +gap provides current and voltage reference for its internal +circuits and other analog IP blocks. The analog-to-digital +converter (ADC) produces an output value that is proportional +to the silicon temperature. + +Required properties: +- compatible : Should be: + - "ti,omap4430-bandgap" : for OMAP4430 bandgap + - "ti,omap4460-bandgap" : for OMAP4460 bandgap + - "ti,omap4470-bandgap" : for OMAP4470 bandgap + - "ti,omap5430-bandgap" : for OMAP5430 bandgap +- interrupts : this entry should indicate which interrupt line +the talert signal is routed to; +Specific: +- gpios : this entry should be used to inform which GPIO +line the tshut signal is routed to. The informed GPIO will +be treated as an IRQ; +- regs : this entry must also be specified and it is specific +to each bandgap version, because the mapping may change from +soc to soc, apart of depending on available features. + +Example: +OMAP4430: +bandgap { + reg = <0x4a002260 0x4 0x4a00232C 0x4>; + compatible = "ti,omap4430-bandgap"; +}; + +OMAP4460: +bandgap { + reg = <0x4a002260 0x4 + 0x4a00232C 0x4 + 0x4a002378 0x18>; + compatible = "ti,omap4460-bandgap"; + interrupts = <0 126 4>; /* talert */ + gpios = <&gpio3 22 0>; /* tshut */ +}; + +OMAP4470: +bandgap { + reg = <0x4a002260 0x4 + 0x4a00232C 0x4 + 0x4a002378 0x18>; + compatible = "ti,omap4470-bandgap"; + interrupts = <0 126 4>; /* talert */ + gpios = <&gpio3 22 0>; /* tshut */ +}; + +OMAP5430: +bandgap { + reg = <0x4a0021e0 0xc + 0x4a00232c 0xc + 0x4a002380 0x2c + 0x4a0023C0 0x3c>; + compatible = "ti,omap5430-bandgap"; + interrupts = <0 126 4>; /* talert */ +}; + +DRA752: +bandgap { + reg = <0x4a0021e0 0xc + 0x4a00232c 0xc + 0x4a002380 0x2c + 0x4a0023C0 0x3c + 0x4a002564 0x8 + 0x4a002574 0x50>; + compatible = "ti,dra752-bandgap"; + interrupts = <0 126 4>; /* talert */ +}; diff --git a/Documentation/kernel-parameters.txt b/Documentation/kernel-parameters.txt index 75236f1..15356ac 100644 --- a/Documentation/kernel-parameters.txt +++ b/Documentation/kernel-parameters.txt @@ -3081,6 +3081,19 @@ bytes respectively. Such letter suffixes can also be entirely omitted. See also Documentation/trace/ftrace.txt "trace options" section. + traceoff_on_warning + [FTRACE] enable this option to disable tracing when a + warning is hit. This turns off "tracing_on". Tracing can + be enabled again by echoing '1' into the "tracing_on" + file located in /sys/kernel/debug/tracing/ + + This option is useful, as it disables the trace before + the WARNING dump is called, which prevents the trace to + be filled with content caused by the warning output. + + This option can also be set at run time via the sysctl + option: kernel/traceoff_on_warning + transparent_hugepage= [KNL] Format: [always|madvise|never] diff --git a/Documentation/thermal/x86_pkg_temperature_thermal b/Documentation/thermal/x86_pkg_temperature_thermal new file mode 100644 index 0000000..17a3a4c --- /dev/null +++ b/Documentation/thermal/x86_pkg_temperature_thermal @@ -0,0 +1,47 @@ +Kernel driver: x86_pkg_temp_thermal +=================== + +Supported chips: +* x86: with package level thermal management +(Verify using: CPUID.06H:EAX[bit 6] =1) + +Authors: Srinivas Pandruvada + +Reference +--- +Intel® 64 and IA-32 Architectures Software Developer’s Manual (Jan, 2013): +Chapter 14.6: PACKAGE LEVEL THERMAL MANAGEMENT + +Description +--------- + +This driver register CPU digital temperature package level sensor as a thermal +zone with maximum two user mode configurable trip points. Number of trip points +depends on the capability of the package. Once the trip point is violated, +user mode can receive notification via thermal notification mechanism and can +take any action to control temperature. + + +Threshold management +-------------------- +Each package will register as a thermal zone under /sys/class/thermal. +Example: +/sys/class/thermal/thermal_zone1 + +This contains two trip points: +- trip_point_0_temp +- trip_point_1_temp + +User can set any temperature between 0 to TJ-Max temperature. Temperature units +are in milli-degree Celsius. Refer to "Documentation/thermal/sysfs-api.txt" for +thermal sys-fs details. + +Any value other than 0 in these trip points, can trigger thermal notifications. +Setting 0, stops sending thermal notifications. + +Thermal notifications: To get kobject-uevent notifications, set the thermal zone +policy to "user_space". For example: echo -n "user_space" > policy + + + + diff --git a/Documentation/trace/events.txt b/Documentation/trace/events.txt index bb24c2a..37732a2 100644 --- a/Documentation/trace/events.txt +++ b/Documentation/trace/events.txt @@ -183,13 +183,22 @@ The relational-operators depend on the type of the field being tested: The operators available for numeric fields are: -==, !=, <, <=, >, >= +==, !=, <, <=, >, >=, & And for string fields they are: -==, != +==, !=, ~ -Currently, only exact string matches are supported. +The glob (~) only accepts a wild card character (*) at the start and or +end of the string. For example: + + prev_comm ~ "*sh" + prev_comm ~ "sh*" + prev_comm ~ "*sh*" + +But does not allow for it to be within the string: + + prev_comm ~ "ba*sh" <-- is invalid 5.2 Setting filters ------------------- diff --git a/Documentation/trace/ftrace.txt b/Documentation/trace/ftrace.txt index bfe8c29..b937c6e 100644 --- a/Documentation/trace/ftrace.txt +++ b/Documentation/trace/ftrace.txt @@ -2430,6 +2430,19 @@ The following commands are supported: echo '!schedule:disable_event:sched:sched_switch' > \ set_ftrace_filter +- dump + When the function is hit, it will dump the contents of the ftrace + ring buffer to the console. This is useful if you need to debug + something, and want to dump the trace when a certain function + is hit. Perhaps its a function that is called before a tripple + fault happens and does not allow you to get a regular dump. + +- cpudump + When the function is hit, it will dump the contents of the ftrace + ring buffer for the current CPU to the console. Unlike the "dump" + command, it only prints out the contents of the ring buffer for the + CPU that executed the function that triggered the dump. + trace_pipe ---------- diff --git a/MAINTAINERS b/MAINTAINERS index cbd4f66..9d771d9 100644 --- a/MAINTAINERS +++ b/MAINTAINERS @@ -2574,6 +2574,7 @@ S: Maintained DEVICE-MAPPER (LVM) M: Alasdair Kergon +M: Mike Snitzer M: dm-devel@redhat.com L: dm-devel@redhat.com W: http://sources.redhat.com/dm @@ -2585,6 +2586,7 @@ F: drivers/md/dm* F: drivers/md/persistent-data/ F: include/linux/device-mapper.h F: include/linux/dm-*.h +F: include/uapi/linux/dm-*.h DIOLAN U2C-12 I2C DRIVER M: Guenter Roeck @@ -8159,6 +8161,7 @@ M: Zhang Rui M: Eduardo Valentin L: linux-pm@vger.kernel.org T: git git://git.kernel.org/pub/scm/linux/kernel/git/rzhang/linux.git +T: git git://git.kernel.org/pub/scm/linux/kernel/git/evalenti/linux-soc-thermal.git Q: https://patchwork.kernel.org/project/linux-pm/list/ S: Supported F: drivers/thermal/ @@ -8183,8 +8186,8 @@ F: drivers/platform/x86/thinkpad_acpi.c TI BANDGAP AND THERMAL DRIVER M: Eduardo Valentin L: linux-pm@vger.kernel.org -S: Maintained -F: drivers/staging/omap-thermal/ +S: Supported +F: drivers/thermal/ti-soc-thermal/ TI FLASH MEDIA INTERFACE DRIVER M: Alex Dubov diff --git a/arch/x86/include/asm/mce.h b/arch/x86/include/asm/mce.h index 6b52980..29e3093 100644 --- a/arch/x86/include/asm/mce.h +++ b/arch/x86/include/asm/mce.h @@ -214,6 +214,13 @@ void mce_log_therm_throt_event(__u64 status); /* Interrupt Handler for core thermal thresholds */ extern int (*platform_thermal_notify)(__u64 msr_val); +/* Interrupt Handler for package thermal thresholds */ +extern int (*platform_thermal_package_notify)(__u64 msr_val); + +/* Callback support of rate control, return true, if + * callback has rate control */ +extern bool (*platform_thermal_package_rate_control)(void); + #ifdef CONFIG_X86_THERMAL_VECTOR extern void mcheck_intel_therm_init(void); #else diff --git a/arch/x86/kernel/cpu/mcheck/therm_throt.c b/arch/x86/kernel/cpu/mcheck/therm_throt.c index 98f2083..41e8e00 100644 --- a/arch/x86/kernel/cpu/mcheck/therm_throt.c +++ b/arch/x86/kernel/cpu/mcheck/therm_throt.c @@ -55,12 +55,24 @@ struct thermal_state { struct _thermal_state package_power_limit; struct _thermal_state core_thresh0; struct _thermal_state core_thresh1; + struct _thermal_state pkg_thresh0; + struct _thermal_state pkg_thresh1; }; /* Callback to handle core threshold interrupts */ int (*platform_thermal_notify)(__u64 msr_val); EXPORT_SYMBOL(platform_thermal_notify); +/* Callback to handle core package threshold_interrupts */ +int (*platform_thermal_package_notify)(__u64 msr_val); +EXPORT_SYMBOL_GPL(platform_thermal_package_notify); + +/* Callback support of rate control, return true, if + * callback has rate control */ +bool (*platform_thermal_package_rate_control)(void); +EXPORT_SYMBOL_GPL(platform_thermal_package_rate_control); + + static DEFINE_PER_CPU(struct thermal_state, thermal_state); static atomic_t therm_throt_en = ATOMIC_INIT(0); @@ -195,19 +207,25 @@ static int therm_throt_process(bool new_event, int event, int level) return 0; } -static int thresh_event_valid(int event) +static int thresh_event_valid(int level, int event) { struct _thermal_state *state; unsigned int this_cpu = smp_processor_id(); struct thermal_state *pstate = &per_cpu(thermal_state, this_cpu); u64 now = get_jiffies_64(); - state = (event == 0) ? &pstate->core_thresh0 : &pstate->core_thresh1; + if (level == PACKAGE_LEVEL) + state = (event == 0) ? &pstate->pkg_thresh0 : + &pstate->pkg_thresh1; + else + state = (event == 0) ? &pstate->core_thresh0 : + &pstate->core_thresh1; if (time_before64(now, state->next_check)) return 0; state->next_check = now + CHECK_INTERVAL; + return 1; } @@ -322,6 +340,39 @@ device_initcall(thermal_throttle_init_device); #endif /* CONFIG_SYSFS */ +static void notify_package_thresholds(__u64 msr_val) +{ + bool notify_thres_0 = false; + bool notify_thres_1 = false; + + if (!platform_thermal_package_notify) + return; + + /* lower threshold check */ + if (msr_val & THERM_LOG_THRESHOLD0) + notify_thres_0 = true; + /* higher threshold check */ + if (msr_val & THERM_LOG_THRESHOLD1) + notify_thres_1 = true; + + if (!notify_thres_0 && !notify_thres_1) + return; + + if (platform_thermal_package_rate_control && + platform_thermal_package_rate_control()) { + /* Rate control is implemented in callback */ + platform_thermal_package_notify(msr_val); + return; + } + + /* lower threshold reached */ + if (notify_thres_0 && thresh_event_valid(PACKAGE_LEVEL, 0)) + platform_thermal_package_notify(msr_val); + /* higher threshold reached */ + if (notify_thres_1 && thresh_event_valid(PACKAGE_LEVEL, 1)) + platform_thermal_package_notify(msr_val); +} + static void notify_thresholds(__u64 msr_val) { /* check whether the interrupt handler is defined; @@ -331,10 +382,12 @@ static void notify_thresholds(__u64 msr_val) return; /* lower threshold reached */ - if ((msr_val & THERM_LOG_THRESHOLD0) && thresh_event_valid(0)) + if ((msr_val & THERM_LOG_THRESHOLD0) && + thresh_event_valid(CORE_LEVEL, 0)) platform_thermal_notify(msr_val); /* higher threshold reached */ - if ((msr_val & THERM_LOG_THRESHOLD1) && thresh_event_valid(1)) + if ((msr_val & THERM_LOG_THRESHOLD1) && + thresh_event_valid(CORE_LEVEL, 1)) platform_thermal_notify(msr_val); } @@ -360,6 +413,8 @@ static void intel_thermal_interrupt(void) if (this_cpu_has(X86_FEATURE_PTS)) { rdmsrl(MSR_IA32_PACKAGE_THERM_STATUS, msr_val); + /* check violations of package thermal thresholds */ + notify_package_thresholds(msr_val); therm_throt_process(msr_val & PACKAGE_THERM_STATUS_PROCHOT, THERMAL_THROTTLING_EVENT, PACKAGE_LEVEL); diff --git a/arch/xtensa/Kconfig b/arch/xtensa/Kconfig index 0a1b95f..7ea6451 100644 --- a/arch/xtensa/Kconfig +++ b/arch/xtensa/Kconfig @@ -6,10 +6,12 @@ config XTENSA select ARCH_WANT_FRAME_POINTERS select HAVE_IDE select GENERIC_ATOMIC64 + select GENERIC_CLOCKEVENTS select HAVE_GENERIC_HARDIRQS select VIRT_TO_BUS select GENERIC_IRQ_SHOW select GENERIC_CPU_DEVICES + select GENERIC_SCHED_CLOCK select MODULES_USE_ELF_RELA select GENERIC_PCI_IOMAP select ARCH_WANT_IPC_PARSE_VERSION @@ -17,6 +19,7 @@ config XTENSA select CLONE_BACKWARDS select IRQ_DOMAIN select HAVE_OPROFILE + select HAVE_FUNCTION_TRACER help Xtensa processors are 32-bit RISC machines designed by Tensilica primarily for embedded systems. These processors are both diff --git a/arch/xtensa/Kconfig.debug b/arch/xtensa/Kconfig.debug index a34010e..af7da74 100644 --- a/arch/xtensa/Kconfig.debug +++ b/arch/xtensa/Kconfig.debug @@ -2,6 +2,16 @@ menu "Kernel hacking" source "lib/Kconfig.debug" +config DEBUG_TLB_SANITY + bool "Debug TLB sanity" + depends on DEBUG_KERNEL + help + Enable this to turn on TLB sanity check on each entry to userspace. + This check can spot missing TLB invalidation/wrong PTE permissions/ + premature page freeing. + + If unsure, say N. + config LD_NO_RELAX bool "Disable linker relaxation" default n diff --git a/arch/xtensa/boot/.gitignore b/arch/xtensa/boot/.gitignore new file mode 100644 index 0000000..be76559 --- /dev/null +++ b/arch/xtensa/boot/.gitignore @@ -0,0 +1,3 @@ +uImage +zImage.redboot +*.dtb diff --git a/arch/xtensa/boot/boot-elf/.gitignore b/arch/xtensa/boot/boot-elf/.gitignore new file mode 100644 index 0000000..5ff8fbb --- /dev/null +++ b/arch/xtensa/boot/boot-elf/.gitignore @@ -0,0 +1 @@ +boot.lds diff --git a/arch/xtensa/boot/lib/.gitignore b/arch/xtensa/boot/lib/.gitignore new file mode 100644 index 0000000..1629a61 --- /dev/null +++ b/arch/xtensa/boot/lib/.gitignore @@ -0,0 +1,3 @@ +inffast.c +inflate.c +inftrees.c diff --git a/arch/xtensa/boot/lib/Makefile b/arch/xtensa/boot/lib/Makefile index ad8952e..6868f2c 100644 --- a/arch/xtensa/boot/lib/Makefile +++ b/arch/xtensa/boot/lib/Makefile @@ -7,6 +7,13 @@ zlib := inffast.c inflate.c inftrees.c lib-y += $(zlib:.c=.o) zmem.o ccflags-y := -Ilib/zlib_inflate +ifdef CONFIG_FUNCTION_TRACER +CFLAGS_REMOVE_inflate.o = -pg +CFLAGS_REMOVE_zmem.o = -pg +CFLAGS_REMOVE_inftrees.o = -pg +CFLAGS_REMOVE_inffast.o = -pg +endif + quiet_cmd_copy_zlib = COPY $@ cmd_copy_zlib = cat $< > $@ diff --git a/arch/xtensa/include/asm/bootparam.h b/arch/xtensa/include/asm/bootparam.h index 0c25799..23392c5 100644 --- a/arch/xtensa/include/asm/bootparam.h +++ b/arch/xtensa/include/asm/bootparam.h @@ -20,7 +20,7 @@ #define BP_TAG_COMMAND_LINE 0x1001 /* command line (0-terminated string)*/ #define BP_TAG_INITRD 0x1002 /* ramdisk addr and size (bp_meminfo) */ #define BP_TAG_MEMORY 0x1003 /* memory addr and size (bp_meminfo) */ -#define BP_TAG_SERIAL_BAUSRATE 0x1004 /* baud rate of current console. */ +#define BP_TAG_SERIAL_BAUDRATE 0x1004 /* baud rate of current console. */ #define BP_TAG_SERIAL_PORT 0x1005 /* serial device of current console */ #define BP_TAG_FDT 0x1006 /* flat device tree addr */ diff --git a/arch/xtensa/include/asm/cmpxchg.h b/arch/xtensa/include/asm/cmpxchg.h index d9ab131..370b26f 100644 --- a/arch/xtensa/include/asm/cmpxchg.h +++ b/arch/xtensa/include/asm/cmpxchg.h @@ -93,6 +93,7 @@ static inline unsigned long __cmpxchg_local(volatile void *ptr, ((__typeof__(*(ptr)))__cmpxchg_local_generic((ptr), (unsigned long)(o),\ (unsigned long)(n), sizeof(*(ptr)))) #define cmpxchg64_local(ptr, o, n) __cmpxchg64_local_generic((ptr), (o), (n)) +#define cmpxchg64(ptr, o, n) cmpxchg64_local((ptr), (o), (n)) /* * xchg_u32 diff --git a/arch/xtensa/include/asm/delay.h b/arch/xtensa/include/asm/delay.h index 61fc5fa..3899610 100644 --- a/arch/xtensa/include/asm/delay.h +++ b/arch/xtensa/include/asm/delay.h @@ -12,7 +12,7 @@ #ifndef _XTENSA_DELAY_H #define _XTENSA_DELAY_H -#include +#include #include extern unsigned long loops_per_jiffy; @@ -24,24 +24,17 @@ static inline void __delay(unsigned long loops) : "=r" (loops) : "0" (loops)); } -static __inline__ u32 xtensa_get_ccount(void) -{ - u32 ccount; - asm volatile ("rsr %0, ccount\n" : "=r" (ccount)); - return ccount; -} - /* For SMP/NUMA systems, change boot_cpu_data to something like * local_cpu_data->... where local_cpu_data points to the current * cpu. */ static __inline__ void udelay (unsigned long usecs) { - unsigned long start = xtensa_get_ccount(); + unsigned long start = get_ccount(); unsigned long cycles = usecs * (loops_per_jiffy / (1000000UL / HZ)); /* Note: all variables are unsigned (can wrap around)! */ - while (((unsigned long)xtensa_get_ccount()) - start < cycles) + while (((unsigned long)get_ccount()) - start < cycles) ; } diff --git a/arch/xtensa/include/asm/ftrace.h b/arch/xtensa/include/asm/ftrace.h index 36dc7a6..73cc3f4 100644 --- a/arch/xtensa/include/asm/ftrace.h +++ b/arch/xtensa/include/asm/ftrace.h @@ -13,6 +13,7 @@ #include #define HAVE_ARCH_CALLER_ADDR +#ifndef __ASSEMBLY__ #define CALLER_ADDR0 ({ unsigned long a0, a1; \ __asm__ __volatile__ ( \ "mov %0, a0\n" \ @@ -24,10 +25,22 @@ extern unsigned long return_address(unsigned level); #define CALLER_ADDR1 return_address(1) #define CALLER_ADDR2 return_address(2) #define CALLER_ADDR3 return_address(3) -#else +#else /* CONFIG_FRAME_POINTER */ #define CALLER_ADDR1 (0) #define CALLER_ADDR2 (0) #define CALLER_ADDR3 (0) -#endif +#endif /* CONFIG_FRAME_POINTER */ +#endif /* __ASSEMBLY__ */ + +#ifdef CONFIG_FUNCTION_TRACER + +#define MCOUNT_ADDR ((unsigned long)(_mcount)) +#define MCOUNT_INSN_SIZE 3 + +#ifndef __ASSEMBLY__ +extern void _mcount(void); +#define mcount _mcount +#endif /* __ASSEMBLY__ */ +#endif /* CONFIG_FUNCTION_TRACER */ #endif /* _XTENSA_FTRACE_H */ diff --git a/arch/xtensa/include/asm/pgtable.h b/arch/xtensa/include/asm/pgtable.h index 8f017eb..0fdf5d0 100644 --- a/arch/xtensa/include/asm/pgtable.h +++ b/arch/xtensa/include/asm/pgtable.h @@ -5,7 +5,7 @@ * it under the terms of the GNU General Public License version 2 as * published by the Free Software Foundation. * - * Copyright (C) 2001 - 2007 Tensilica Inc. + * Copyright (C) 2001 - 2013 Tensilica Inc. */ #ifndef _XTENSA_PGTABLE_H @@ -64,41 +64,82 @@ * Virtual memory area. We keep a distance to other memory regions to be * on the safe side. We also use this area for cache aliasing. */ - #define VMALLOC_START 0xC0000000 #define VMALLOC_END 0xC7FEFFFF #define TLBTEMP_BASE_1 0xC7FF0000 #define TLBTEMP_BASE_2 0xC7FF8000 /* - * Xtensa Linux config PTE layout (when present): - * 31-12: PPN - * 11-6: Software - * 5-4: RING - * 3-0: CA + * For the Xtensa architecture, the PTE layout is as follows: + * + * 31------12 11 10-9 8-6 5-4 3-2 1-0 + * +-----------------------------------------+ + * | | Software | HARDWARE | + * | PPN | ADW | RI |Attribute| + * +-----------------------------------------+ + * pte_none | MBZ | 01 | 11 | 00 | + * +-----------------------------------------+ + * present | PPN | 0 | 00 | ADW | RI | CA | wx | + * +- - - - - - - - - - - - - - - - - - - - -+ + * (PAGE_NONE)| PPN | 0 | 00 | ADW | 01 | 11 | 11 | + * +-----------------------------------------+ + * swap | index | type | 01 | 11 | 00 | + * +- - - - - - - - - - - - - - - - - - - - -+ + * file | file offset | 01 | 11 | 10 | + * +-----------------------------------------+ + * + * For T1050 hardware and earlier the layout differs for present and (PAGE_NONE) + * +-----------------------------------------+ + * present | PPN | 0 | 00 | ADW | RI | CA | w1 | + * +-----------------------------------------+ + * (PAGE_NONE)| PPN | 0 | 00 | ADW | 01 | 01 | 00 | + * +-----------------------------------------+ * - * Similar to the Alpha and MIPS ports, we need to keep track of the ref - * and mod bits in software. We have a software "you can read - * from this page" bit, and a hardware one which actually lets the - * process read from the page. On the same token we have a software - * writable bit and the real hardware one which actually lets the - * process write to the page. + * Legend: + * PPN Physical Page Number + * ADW software: accessed (young) / dirty / writable + * RI ring (0=privileged, 1=user, 2 and 3 are unused) + * CA cache attribute: 00 bypass, 01 writeback, 10 writethrough + * (11 is invalid and used to mark pages that are not present) + * w page is writable (hw) + * x page is executable (hw) + * index swap offset / PAGE_SIZE (bit 11-31: 21 bits -> 8 GB) + * (note that the index is always non-zero) + * type swap type (5 bits -> 32 types) + * file offset 26-bit offset into the file, in increments of PAGE_SIZE * - * See further below for PTE layout for swapped-out pages. + * Notes: + * - (PROT_NONE) is a special case of 'present' but causes an exception for + * any access (read, write, and execute). + * - 'multihit-exception' has the highest priority of all MMU exceptions, + * so the ring must be set to 'RING_USER' even for 'non-present' pages. + * - on older hardware, the exectuable flag was not supported and + * used as a 'valid' flag, so it needs to be always set. + * - we need to keep track of certain flags in software (dirty and young) + * to do this, we use write exceptions and have a separate software w-flag. + * - attribute value 1101 (and 1111 on T1050 and earlier) is reserved */ +#define _PAGE_ATTRIB_MASK 0xf + #define _PAGE_HW_EXEC (1<<0) /* hardware: page is executable */ #define _PAGE_HW_WRITE (1<<1) /* hardware: page is writable */ -#define _PAGE_FILE (1<<1) /* non-linear mapping, if !present */ -#define _PAGE_PROTNONE (3<<0) /* special case for VM_PROT_NONE */ - -/* None of these cache modes include MP coherency: */ #define _PAGE_CA_BYPASS (0<<2) /* bypass, non-speculative */ #define _PAGE_CA_WB (1<<2) /* write-back */ #define _PAGE_CA_WT (2<<2) /* write-through */ #define _PAGE_CA_MASK (3<<2) -#define _PAGE_INVALID (3<<2) +#define _PAGE_CA_INVALID (3<<2) + +/* We use invalid attribute values to distinguish special pte entries */ +#if XCHAL_HW_VERSION_MAJOR < 2000 +#define _PAGE_HW_VALID 0x01 /* older HW needed this bit set */ +#define _PAGE_NONE 0x04 +#else +#define _PAGE_HW_VALID 0x00 +#define _PAGE_NONE 0x0f +#endif +#define _PAGE_FILE (1<<1) /* file mapped page, only if !present */ #define _PAGE_USER (1<<4) /* user access (ring=1) */ @@ -108,19 +149,12 @@ #define _PAGE_DIRTY (1<<7) /* software: page dirty */ #define _PAGE_ACCESSED (1<<8) /* software: page accessed (read) */ -/* On older HW revisions, we always have to set bit 0 */ -#if XCHAL_HW_VERSION_MAJOR < 2000 -# define _PAGE_VALID (1<<0) -#else -# define _PAGE_VALID 0 -#endif - -#define _PAGE_CHG_MASK (PAGE_MASK | _PAGE_ACCESSED | _PAGE_DIRTY) -#define _PAGE_PRESENT (_PAGE_VALID | _PAGE_CA_WB | _PAGE_ACCESSED) - #ifdef CONFIG_MMU -#define PAGE_NONE __pgprot(_PAGE_INVALID | _PAGE_USER | _PAGE_PROTNONE) +#define _PAGE_CHG_MASK (PAGE_MASK | _PAGE_ACCESSED | _PAGE_DIRTY) +#define _PAGE_PRESENT (_PAGE_HW_VALID | _PAGE_CA_WB | _PAGE_ACCESSED) + +#define PAGE_NONE __pgprot(_PAGE_NONE | _PAGE_USER) #define PAGE_COPY __pgprot(_PAGE_PRESENT | _PAGE_USER) #define PAGE_COPY_EXEC __pgprot(_PAGE_PRESENT | _PAGE_USER | _PAGE_HW_EXEC) #define PAGE_READONLY __pgprot(_PAGE_PRESENT | _PAGE_USER) @@ -132,9 +166,9 @@ #define PAGE_KERNEL_EXEC __pgprot(_PAGE_PRESENT|_PAGE_HW_WRITE|_PAGE_HW_EXEC) #if (DCACHE_WAY_SIZE > PAGE_SIZE) -# define _PAGE_DIRECTORY (_PAGE_VALID | _PAGE_ACCESSED) +# define _PAGE_DIRECTORY (_PAGE_HW_VALID | _PAGE_ACCESSED | _PAGE_CA_BYPASS) #else -# define _PAGE_DIRECTORY (_PAGE_VALID | _PAGE_ACCESSED | _PAGE_CA_WB) +# define _PAGE_DIRECTORY (_PAGE_HW_VALID | _PAGE_ACCESSED | _PAGE_CA_WB) #endif #else /* no mmu */ @@ -202,12 +236,16 @@ static inline void pgtable_cache_init(void) { } /* * pte status. */ -#define pte_none(pte) (pte_val(pte) == _PAGE_INVALID) -#define pte_present(pte) \ - (((pte_val(pte) & _PAGE_CA_MASK) != _PAGE_INVALID) \ - || ((pte_val(pte) & _PAGE_PROTNONE) == _PAGE_PROTNONE)) +# define pte_none(pte) (pte_val(pte) == (_PAGE_CA_INVALID | _PAGE_USER)) +#if XCHAL_HW_VERSION_MAJOR < 2000 +# define pte_present(pte) ((pte_val(pte) & _PAGE_CA_MASK) != _PAGE_CA_INVALID) +#else +# define pte_present(pte) \ + (((pte_val(pte) & _PAGE_CA_MASK) != _PAGE_CA_INVALID) \ + || ((pte_val(pte) & _PAGE_ATTRIB_MASK) == _PAGE_NONE)) +#endif #define pte_clear(mm,addr,ptep) \ - do { update_pte(ptep, __pte(_PAGE_INVALID)); } while(0) + do { update_pte(ptep, __pte(_PAGE_CA_INVALID | _PAGE_USER)); } while (0) #define pmd_none(pmd) (!pmd_val(pmd)) #define pmd_present(pmd) (pmd_val(pmd) & PAGE_MASK) @@ -328,35 +366,23 @@ ptep_set_wrprotect(struct mm_struct *mm, unsigned long addr, pte_t *ptep) /* - * Encode and decode a swap entry. - * - * Format of swap pte: - * bit 0 MBZ - * bit 1 page-file (must be zero) - * bits 2 - 3 page hw access mode (must be 11: _PAGE_INVALID) - * bits 4 - 5 ring protection (must be 01: _PAGE_USER) - * bits 6 - 10 swap type (5 bits -> 32 types) - * bits 11 - 31 swap offset / PAGE_SIZE (21 bits -> 8GB) - - * Format of file pte: - * bit 0 MBZ - * bit 1 page-file (must be one: _PAGE_FILE) - * bits 2 - 3 page hw access mode (must be 11: _PAGE_INVALID) - * bits 4 - 5 ring protection (must be 01: _PAGE_USER) - * bits 6 - 31 file offset / PAGE_SIZE + * Encode and decode a swap and file entry. */ +#define SWP_TYPE_BITS 5 +#define MAX_SWAPFILES_CHECK() BUILD_BUG_ON(MAX_SWAPFILES_SHIFT > SWP_TYPE_BITS) #define __swp_type(entry) (((entry).val >> 6) & 0x1f) #define __swp_offset(entry) ((entry).val >> 11) #define __swp_entry(type,offs) \ - ((swp_entry_t) {((type) << 6) | ((offs) << 11) | _PAGE_INVALID}) + ((swp_entry_t){((type) << 6) | ((offs) << 11) | \ + _PAGE_CA_INVALID | _PAGE_USER}) #define __pte_to_swp_entry(pte) ((swp_entry_t) { pte_val(pte) }) #define __swp_entry_to_pte(x) ((pte_t) { (x).val }) -#define PTE_FILE_MAX_BITS 28 -#define pte_to_pgoff(pte) (pte_val(pte) >> 4) +#define PTE_FILE_MAX_BITS 26 +#define pte_to_pgoff(pte) (pte_val(pte) >> 6) #define pgoff_to_pte(off) \ - ((pte_t) { ((off) << 4) | _PAGE_INVALID | _PAGE_FILE }) + ((pte_t) { ((off) << 6) | _PAGE_CA_INVALID | _PAGE_FILE | _PAGE_USER }) #endif /* !defined (__ASSEMBLY__) */ diff --git a/arch/xtensa/include/asm/platform.h b/arch/xtensa/include/asm/platform.h index ec098b6..32e98f2 100644 --- a/arch/xtensa/include/asm/platform.h +++ b/arch/xtensa/include/asm/platform.h @@ -30,11 +30,6 @@ extern void platform_init(bp_tag_t*); extern void platform_setup (char **); /* - * platform_init_irq is called from init_IRQ. - */ -extern void platform_init_irq (void); - -/* * platform_restart is called to restart the system. */ extern void platform_restart (void); diff --git a/arch/xtensa/include/asm/timex.h b/arch/xtensa/include/asm/timex.h index 3d35e5d..69f9017 100644 --- a/arch/xtensa/include/asm/timex.h +++ b/arch/xtensa/include/asm/timex.h @@ -35,19 +35,11 @@ # error "Bad timer number for Linux configurations!" #endif -#define LINUX_TIMER_MASK (1L << LINUX_TIMER_INT) - -#define CLOCK_TICK_RATE 1193180 /* (everyone is using this value) */ -#define CLOCK_TICK_FACTOR 20 /* Factor of both 10^6 and CLOCK_TICK_RATE */ - #ifdef CONFIG_XTENSA_CALIBRATE_CCOUNT -extern unsigned long ccount_per_jiffy; -extern unsigned long nsec_per_ccount; -#define CCOUNT_PER_JIFFY ccount_per_jiffy -#define NSEC_PER_CCOUNT nsec_per_ccount +extern unsigned long ccount_freq; +#define CCOUNT_PER_JIFFY (ccount_freq / HZ) #else #define CCOUNT_PER_JIFFY (CONFIG_XTENSA_CPU_CLOCK*(1000000UL/HZ)) -#define NSEC_PER_CCOUNT (1000UL / CONFIG_XTENSA_CPU_CLOCK) #endif diff --git a/arch/xtensa/kernel/.gitignore b/arch/xtensa/kernel/.gitignore new file mode 100644 index 0000000..c5f676c --- /dev/null +++ b/arch/xtensa/kernel/.gitignore @@ -0,0 +1 @@ +vmlinux.lds diff --git a/arch/xtensa/kernel/Makefile b/arch/xtensa/kernel/Makefile index 1e7fc87..f90265e 100644 --- a/arch/xtensa/kernel/Makefile +++ b/arch/xtensa/kernel/Makefile @@ -11,6 +11,7 @@ obj-y := align.o coprocessor.o entry.o irq.o pci-dma.o platform.o process.o \ obj-$(CONFIG_KGDB) += xtensa-stub.o obj-$(CONFIG_PCI) += pci.o obj-$(CONFIG_MODULES) += xtensa_ksyms.o module.o +obj-$(CONFIG_FUNCTION_TRACER) += mcount.o AFLAGS_head.o += -mtext-section-literals diff --git a/arch/xtensa/kernel/entry.S b/arch/xtensa/kernel/entry.S index 5082507..9298742 100644 --- a/arch/xtensa/kernel/entry.S +++ b/arch/xtensa/kernel/entry.S @@ -458,7 +458,7 @@ common_exception_return: _bbsi.l a4, TIF_NEED_RESCHED, 3f _bbsi.l a4, TIF_NOTIFY_RESUME, 2f - _bbci.l a4, TIF_SIGPENDING, 4f + _bbci.l a4, TIF_SIGPENDING, 5f 2: l32i a4, a1, PT_DEPC bgeui a4, VALID_DOUBLE_EXCEPTION_ADDRESS, 4f @@ -476,6 +476,13 @@ common_exception_return: callx4 a4 j 1b +5: +#ifdef CONFIG_DEBUG_TLB_SANITY + l32i a4, a1, PT_DEPC + bgeui a4, VALID_DOUBLE_EXCEPTION_ADDRESS, 4f + movi a4, check_tlb_sanity + callx4 a4 +#endif 4: /* Restore optional registers. */ load_xtregs_opt a1 a2 a4 a5 a6 a7 PT_XTREGS_OPT @@ -1792,10 +1799,15 @@ ENTRY(fast_store_prohibited) l32i a0, a0, 0 beqz a0, 2f - /* Note that we assume _PAGE_WRITABLE_BIT is only set if pte is valid.*/ + /* + * Note that we test _PAGE_WRITABLE_BIT only if PTE is present + * and is not PAGE_NONE. See pgtable.h for possible PTE layouts. + */ _PTE_OFFSET(a0, a1, a4) l32i a4, a0, 0 # read pteval + movi a1, _PAGE_CA_INVALID + ball a4, a1, 2f bbci.l a4, _PAGE_WRITABLE_BIT, 2f movi a1, _PAGE_ACCESSED | _PAGE_DIRTY | _PAGE_HW_WRITE diff --git a/arch/xtensa/kernel/head.S b/arch/xtensa/kernel/head.S index ef12c0e..7d740eb 100644 --- a/arch/xtensa/kernel/head.S +++ b/arch/xtensa/kernel/head.S @@ -68,6 +68,15 @@ _SetupMMU: #ifdef CONFIG_INITIALIZE_XTENSA_MMU_INSIDE_VMLINUX initialize_mmu +#if defined(CONFIG_MMU) && XCHAL_HAVE_PTP_MMU && XCHAL_HAVE_SPANNING_WAY + rsr a2, excsave1 + movi a3, 0x08000000 + bgeu a2, a3, 1f + movi a3, 0xd0000000 + add a2, a2, a3 + wsr a2, excsave1 +1: +#endif #endif .end no-absolute-literals diff --git a/arch/xtensa/kernel/mcount.S b/arch/xtensa/kernel/mcount.S new file mode 100644 index 0000000..0eeda2e --- /dev/null +++ b/arch/xtensa/kernel/mcount.S @@ -0,0 +1,50 @@ +/* + * arch/xtensa/kernel/mcount.S + * + * Xtensa specific mcount support + * + * This file is subject to the terms and conditions of the GNU General Public + * License. See the file "COPYING" in the main directory of this archive + * for more details. + * + * Copyright (C) 2013 Tensilica Inc. + */ + +#include +#include + +/* + * Entry condition: + * + * a2: a0 of the caller + */ + +ENTRY(_mcount) + + entry a1, 16 + + movi a4, ftrace_trace_function + l32i a4, a4, 0 + movi a3, ftrace_stub + bne a3, a4, 1f + retw + +1: xor a7, a2, a1 + movi a3, 0x3fffffff + and a7, a7, a3 + xor a7, a7, a1 + + xor a6, a0, a1 + and a6, a6, a3 + xor a6, a6, a1 + addi a6, a6, -MCOUNT_INSN_SIZE + callx4 a4 + + retw + +ENDPROC(_mcount) + +ENTRY(ftrace_stub) + entry a1, 16 + retw +ENDPROC(ftrace_stub) diff --git a/arch/xtensa/kernel/pci.c b/arch/xtensa/kernel/pci.c index 126c188..5b34033 100644 --- a/arch/xtensa/kernel/pci.c +++ b/arch/xtensa/kernel/pci.c @@ -77,9 +77,9 @@ pcibios_align_resource(void *data, const struct resource *res, if (res->flags & IORESOURCE_IO) { if (size > 0x100) { - printk(KERN_ERR "PCI: I/O Region %s/%d too large" - " (%ld bytes)\n", pci_name(dev), - dev->resource - res, size); + pr_err("PCI: I/O Region %s/%d too large (%u bytes)\n", + pci_name(dev), dev->resource - res, + size); } if (start & 0x300) @@ -174,7 +174,7 @@ static int __init pcibios_init(void) struct pci_controller *pci_ctrl; struct list_head resources; struct pci_bus *bus; - int next_busno = 0, i; + int next_busno = 0; printk("PCI: Probing PCI hardware\n"); @@ -197,7 +197,7 @@ static int __init pcibios_init(void) subsys_initcall(pcibios_init); -void __init pcibios_fixup_bus(struct pci_bus *bus) +void pcibios_fixup_bus(struct pci_bus *bus) { if (bus->parent) { /* This is a subordinate bridge */ diff --git a/arch/xtensa/kernel/platform.c b/arch/xtensa/kernel/platform.c index 2bd6c35..1cf0082 100644 --- a/arch/xtensa/kernel/platform.c +++ b/arch/xtensa/kernel/platform.c @@ -29,7 +29,6 @@ */ _F(void, setup, (char** cmd), { }); -_F(void, init_irq, (void), { }); _F(void, restart, (void), { while(1); }); _F(void, halt, (void), { while(1); }); _F(void, power_off, (void), { while(1); }); @@ -42,6 +41,6 @@ _F(void, pcibios_init, (void), { }); _F(void, calibrate_ccount, (void), { pr_err("ERROR: Cannot calibrate cpu frequency! Assuming 10MHz.\n"); - ccount_per_jiffy = 10 * (1000000UL/HZ); + ccount_freq = 10 * 1000000UL; }); #endif diff --git a/arch/xtensa/kernel/setup.c b/arch/xtensa/kernel/setup.c index 6dd25ec..42a8bba 100644 --- a/arch/xtensa/kernel/setup.c +++ b/arch/xtensa/kernel/setup.c @@ -152,8 +152,8 @@ static int __init parse_tag_initrd(const bp_tag_t* tag) { meminfo_t* mi; mi = (meminfo_t*)(tag->data); - initrd_start = (void*)(mi->start); - initrd_end = (void*)(mi->end); + initrd_start = __va(mi->start); + initrd_end = __va(mi->end); return 0; } @@ -164,7 +164,7 @@ __tagtable(BP_TAG_INITRD, parse_tag_initrd); static int __init parse_tag_fdt(const bp_tag_t *tag) { - dtb_start = (void *)(tag->data[0]); + dtb_start = __va(tag->data[0]); return 0; } @@ -256,7 +256,7 @@ void __init early_init_devtree(void *params) static void __init copy_devtree(void) { void *alloc = early_init_dt_alloc_memory_arch( - be32_to_cpu(initial_boot_params->totalsize), 0); + be32_to_cpu(initial_boot_params->totalsize), 8); if (alloc) { memcpy(alloc, initial_boot_params, be32_to_cpu(initial_boot_params->totalsize)); diff --git a/arch/xtensa/kernel/time.c b/arch/xtensa/kernel/time.c index ffb4741..bdbb173 100644 --- a/arch/xtensa/kernel/time.c +++ b/arch/xtensa/kernel/time.c @@ -16,6 +16,7 @@ #include #include #include +#include #include #include #include @@ -23,13 +24,13 @@ #include #include #include +#include #include #include #ifdef CONFIG_XTENSA_CALIBRATE_CCOUNT -unsigned long ccount_per_jiffy; /* per 1/HZ */ -unsigned long nsec_per_ccount; /* nsec per ccount increment */ +unsigned long ccount_freq; /* ccount Hz */ #endif static cycle_t ccount_read(struct clocksource *cs) @@ -37,6 +38,11 @@ static cycle_t ccount_read(struct clocksource *cs) return (cycle_t)get_ccount(); } +static u32 notrace ccount_sched_clock_read(void) +{ + return get_ccount(); +} + static struct clocksource ccount_clocksource = { .name = "ccount", .rating = 200, @@ -44,29 +50,98 @@ static struct clocksource ccount_clocksource = { .mask = CLOCKSOURCE_MASK(32), }; +static int ccount_timer_set_next_event(unsigned long delta, + struct clock_event_device *dev); +static void ccount_timer_set_mode(enum clock_event_mode mode, + struct clock_event_device *evt); +static struct ccount_timer_t { + struct clock_event_device evt; + int irq_enabled; +} ccount_timer = { + .evt = { + .name = "ccount_clockevent", + .features = CLOCK_EVT_FEAT_ONESHOT, + .rating = 300, + .set_next_event = ccount_timer_set_next_event, + .set_mode = ccount_timer_set_mode, + }, +}; + +static int ccount_timer_set_next_event(unsigned long delta, + struct clock_event_device *dev) +{ + unsigned long flags, next; + int ret = 0; + + local_irq_save(flags); + next = get_ccount() + delta; + set_linux_timer(next); + if (next - get_ccount() > delta) + ret = -ETIME; + local_irq_restore(flags); + + return ret; +} + +static void ccount_timer_set_mode(enum clock_event_mode mode, + struct clock_event_device *evt) +{ + struct ccount_timer_t *timer = + container_of(evt, struct ccount_timer_t, evt); + + /* + * There is no way to disable the timer interrupt at the device level, + * only at the intenable register itself. Since enable_irq/disable_irq + * calls are nested, we need to make sure that these calls are + * balanced. + */ + switch (mode) { + case CLOCK_EVT_MODE_SHUTDOWN: + case CLOCK_EVT_MODE_UNUSED: + if (timer->irq_enabled) { + disable_irq(evt->irq); + timer->irq_enabled = 0; + } + break; + case CLOCK_EVT_MODE_RESUME: + case CLOCK_EVT_MODE_ONESHOT: + if (!timer->irq_enabled) { + enable_irq(evt->irq); + timer->irq_enabled = 1; + } + default: + break; + } +} + static irqreturn_t timer_interrupt(int irq, void *dev_id); static struct irqaction timer_irqaction = { .handler = timer_interrupt, - .flags = IRQF_DISABLED, + .flags = IRQF_TIMER, .name = "timer", + .dev_id = &ccount_timer, }; void __init time_init(void) { - unsigned int irq; #ifdef CONFIG_XTENSA_CALIBRATE_CCOUNT printk("Calibrating CPU frequency "); platform_calibrate_ccount(); - printk("%d.%02d MHz\n", (int)ccount_per_jiffy/(1000000/HZ), - (int)(ccount_per_jiffy/(10000/HZ))%100); + printk("%d.%02d MHz\n", (int)ccount_freq/1000000, + (int)(ccount_freq/10000)%100); #endif clocksource_register_hz(&ccount_clocksource, CCOUNT_PER_JIFFY * HZ); - /* Initialize the linux timer interrupt. */ + ccount_timer.evt.cpumask = cpumask_of(0); + ccount_timer.evt.irq = irq_create_mapping(NULL, LINUX_TIMER_INT); + if (WARN(!ccount_timer.evt.irq, "error: can't map timer irq")) + return; + clockevents_config_and_register(&ccount_timer.evt, ccount_freq, 0xf, + 0xffffffff); + setup_irq(ccount_timer.evt.irq, &timer_irqaction); + ccount_timer.irq_enabled = 1; - irq = irq_create_mapping(NULL, LINUX_TIMER_INT); - setup_irq(irq, &timer_irqaction); - set_linux_timer(get_ccount() + CCOUNT_PER_JIFFY); + setup_sched_clock(ccount_sched_clock_read, 32, ccount_freq); } /* @@ -75,36 +150,14 @@ void __init time_init(void) irqreturn_t timer_interrupt (int irq, void *dev_id) { + struct ccount_timer_t *timer = dev_id; + struct clock_event_device *evt = &timer->evt; - unsigned long next; - - next = get_linux_timer(); - -again: - while ((signed long)(get_ccount() - next) > 0) { - - profile_tick(CPU_PROFILING); -#ifndef CONFIG_SMP - update_process_times(user_mode(get_irq_regs())); -#endif - - xtime_update(1); /* Linux handler in kernel/time/timekeeping */ - - /* Note that writing CCOMPARE clears the interrupt. */ - - next += CCOUNT_PER_JIFFY; - set_linux_timer(next); - } + evt->event_handler(evt); /* Allow platform to do something useful (Wdog). */ - platform_heartbeat(); - /* Make sure we didn't miss any tick... */ - - if ((signed long)(get_ccount() - next) > 0) - goto again; - return IRQ_HANDLED; } diff --git a/arch/xtensa/kernel/xtensa_ksyms.c b/arch/xtensa/kernel/xtensa_ksyms.c index 42c53c87..d8507f8 100644 --- a/arch/xtensa/kernel/xtensa_ksyms.c +++ b/arch/xtensa/kernel/xtensa_ksyms.c @@ -124,3 +124,7 @@ extern long common_exception_return; extern long _spill_registers; EXPORT_SYMBOL(common_exception_return); EXPORT_SYMBOL(_spill_registers); + +#ifdef CONFIG_FUNCTION_TRACER +EXPORT_SYMBOL(_mcount); +#endif diff --git a/arch/xtensa/mm/tlb.c b/arch/xtensa/mm/tlb.c index 5411aa6..ca9d236 100644 --- a/arch/xtensa/mm/tlb.c +++ b/arch/xtensa/mm/tlb.c @@ -64,7 +64,7 @@ void flush_tlb_mm(struct mm_struct *mm) { if (mm == current->active_mm) { unsigned long flags; - local_save_flags(flags); + local_irq_save(flags); __get_new_mmu_context(mm); __load_mmu_context(mm); local_irq_restore(flags); @@ -94,7 +94,7 @@ void flush_tlb_range (struct vm_area_struct *vma, printk("[tlbrange<%02lx,%08lx,%08lx>]\n", (unsigned long)mm->context, start, end); #endif - local_save_flags(flags); + local_irq_save(flags); if (end-start + (PAGE_SIZE-1) <= _TLB_ENTRIES << PAGE_SHIFT) { int oldpid = get_rasid_register(); @@ -128,9 +128,10 @@ void flush_tlb_page (struct vm_area_struct *vma, unsigned long page) if(mm->context == NO_CONTEXT) return; - local_save_flags(flags); + local_irq_save(flags); oldpid = get_rasid_register(); + set_rasid_register(ASID_INSERT(mm->context)); if (vma->vm_flags & VM_EXEC) invalidate_itlb_mapping(page); @@ -140,3 +141,116 @@ void flush_tlb_page (struct vm_area_struct *vma, unsigned long page) local_irq_restore(flags); } + +#ifdef CONFIG_DEBUG_TLB_SANITY + +static unsigned get_pte_for_vaddr(unsigned vaddr) +{ + struct task_struct *task = get_current(); + struct mm_struct *mm = task->mm; + pgd_t *pgd; + pmd_t *pmd; + pte_t *pte; + + if (!mm) + mm = task->active_mm; + pgd = pgd_offset(mm, vaddr); + if (pgd_none_or_clear_bad(pgd)) + return 0; + pmd = pmd_offset(pgd, vaddr); + if (pmd_none_or_clear_bad(pmd)) + return 0; + pte = pte_offset_map(pmd, vaddr); + if (!pte) + return 0; + return pte_val(*pte); +} + +enum { + TLB_SUSPICIOUS = 1, + TLB_INSANE = 2, +}; + +static void tlb_insane(void) +{ + BUG_ON(1); +} + +static void tlb_suspicious(void) +{ + WARN_ON(1); +} + +/* + * Check that TLB entries with kernel ASID (1) have kernel VMA (>= TASK_SIZE), + * and TLB entries with user ASID (>=4) have VMA < TASK_SIZE. + * + * Check that valid TLB entries either have the same PA as the PTE, or PTE is + * marked as non-present. Non-present PTE and the page with non-zero refcount + * and zero mapcount is normal for batched TLB flush operation. Zero refcount + * means that the page was freed prematurely. Non-zero mapcount is unusual, + * but does not necessary means an error, thus marked as suspicious. + */ +static int check_tlb_entry(unsigned w, unsigned e, bool dtlb) +{ + unsigned tlbidx = w | (e << PAGE_SHIFT); + unsigned r0 = dtlb ? + read_dtlb_virtual(tlbidx) : read_itlb_virtual(tlbidx); + unsigned vpn = (r0 & PAGE_MASK) | (e << PAGE_SHIFT); + unsigned pte = get_pte_for_vaddr(vpn); + unsigned mm_asid = (get_rasid_register() >> 8) & ASID_MASK; + unsigned tlb_asid = r0 & ASID_MASK; + bool kernel = tlb_asid == 1; + int rc = 0; + + if (tlb_asid > 0 && ((vpn < TASK_SIZE) == kernel)) { + pr_err("%cTLB: way: %u, entry: %u, VPN %08x in %s PTE\n", + dtlb ? 'D' : 'I', w, e, vpn, + kernel ? "kernel" : "user"); + rc |= TLB_INSANE; + } + + if (tlb_asid == mm_asid) { + unsigned r1 = dtlb ? read_dtlb_translation(tlbidx) : + read_itlb_translation(tlbidx); + if ((pte ^ r1) & PAGE_MASK) { + pr_err("%cTLB: way: %u, entry: %u, mapping: %08x->%08x, PTE: %08x\n", + dtlb ? 'D' : 'I', w, e, r0, r1, pte); + if (pte == 0 || !pte_present(__pte(pte))) { + struct page *p = pfn_to_page(r1 >> PAGE_SHIFT); + pr_err("page refcount: %d, mapcount: %d\n", + page_count(p), + page_mapcount(p)); + if (!page_count(p)) + rc |= TLB_INSANE; + else if (page_mapped(p)) + rc |= TLB_SUSPICIOUS; + } else { + rc |= TLB_INSANE; + } + } + } + return rc; +} + +void check_tlb_sanity(void) +{ + unsigned long flags; + unsigned w, e; + int bug = 0; + + local_irq_save(flags); + for (w = 0; w < DTLB_ARF_WAYS; ++w) + for (e = 0; e < (1 << XCHAL_DTLB_ARF_ENTRIES_LOG2); ++e) + bug |= check_tlb_entry(w, e, true); + for (w = 0; w < ITLB_ARF_WAYS; ++w) + for (e = 0; e < (1 << XCHAL_ITLB_ARF_ENTRIES_LOG2); ++e) + bug |= check_tlb_entry(w, e, false); + if (bug & TLB_INSANE) + tlb_insane(); + if (bug & TLB_SUSPICIOUS) + tlb_suspicious(); + local_irq_restore(flags); +} + +#endif /* CONFIG_DEBUG_TLB_SANITY */ diff --git a/arch/xtensa/platforms/iss/network.c b/arch/xtensa/platforms/iss/network.c index 7d0fea6..56f88b7 100644 --- a/arch/xtensa/platforms/iss/network.c +++ b/arch/xtensa/platforms/iss/network.c @@ -700,7 +700,7 @@ struct iss_net_init { #define ERR KERN_ERR "iss_net_setup: " -static int iss_net_setup(char *str) +static int __init iss_net_setup(char *str) { struct iss_net_private *device = NULL; struct iss_net_init *new; diff --git a/arch/xtensa/platforms/iss/simdisk.c b/arch/xtensa/platforms/iss/simdisk.c index c0edb35..8c6e819 100644 --- a/arch/xtensa/platforms/iss/simdisk.c +++ b/arch/xtensa/platforms/iss/simdisk.c @@ -108,13 +108,13 @@ static int simdisk_xfer_bio(struct simdisk *dev, struct bio *bio) sector_t sector = bio->bi_sector; bio_for_each_segment(bvec, bio, i) { - char *buffer = __bio_kmap_atomic(bio, i, KM_USER0); + char *buffer = __bio_kmap_atomic(bio, i); unsigned len = bvec->bv_len >> SECTOR_SHIFT; simdisk_transfer(dev, sector, len, buffer, bio_data_dir(bio) == WRITE); sector += len; - __bio_kunmap_atomic(bio, KM_USER0); + __bio_kunmap_atomic(bio); } return 0; } diff --git a/arch/xtensa/platforms/xtfpga/setup.c b/arch/xtensa/platforms/xtfpga/setup.c index 96ef8ee..74bb74f 100644 --- a/arch/xtensa/platforms/xtfpga/setup.c +++ b/arch/xtensa/platforms/xtfpga/setup.c @@ -163,7 +163,7 @@ void platform_heartbeat(void) #ifdef CONFIG_XTENSA_CALIBRATE_CCOUNT -void platform_calibrate_ccount(void) +void __init platform_calibrate_ccount(void) { long clk_freq = 0; #ifdef CONFIG_OF @@ -179,8 +179,7 @@ void platform_calibrate_ccount(void) if (!clk_freq) clk_freq = *(long *)XTFPGA_CLKFRQ_VADDR; - ccount_per_jiffy = clk_freq / HZ; - nsec_per_ccount = 1000000000UL / clk_freq; + ccount_freq = clk_freq; } #endif diff --git a/arch/xtensa/variants/s6000/delay.c b/arch/xtensa/variants/s6000/delay.c index 54b2b57..3915456 100644 --- a/arch/xtensa/variants/s6000/delay.c +++ b/arch/xtensa/variants/s6000/delay.c @@ -1,4 +1,3 @@ -#include #include #include #include @@ -17,11 +16,10 @@ void platform_calibrate_ccount(void) "1: l32i %0, %2, 0 ;" " beq %0, %1, 1b ;" : "=&a"(u) : "a"(t), "a"(tstamp)); - b = xtensa_get_ccount(); + b = get_ccount(); if (i == LOOPS) a = b; } while (--i >= 0); b -= a; - nsec_per_ccount = (LOOPS * 10000) / b; - ccount_per_jiffy = b * (100000UL / (LOOPS * HZ)); + ccount_freq = b * (100000UL / LOOPS); } diff --git a/block/blk-cgroup.c b/block/blk-cgroup.c index e8918ff..290792a 100644 --- a/block/blk-cgroup.c +++ b/block/blk-cgroup.c @@ -32,26 +32,6 @@ EXPORT_SYMBOL_GPL(blkcg_root); static struct blkcg_policy *blkcg_policy[BLKCG_MAX_POLS]; -static struct blkcg_gq *__blkg_lookup(struct blkcg *blkcg, - struct request_queue *q, bool update_hint); - -/** - * blkg_for_each_descendant_pre - pre-order walk of a blkg's descendants - * @d_blkg: loop cursor pointing to the current descendant - * @pos_cgrp: used for iteration - * @p_blkg: target blkg to walk descendants of - * - * Walk @c_blkg through the descendants of @p_blkg. Must be used with RCU - * read locked. If called under either blkcg or queue lock, the iteration - * is guaranteed to include all and only online blkgs. The caller may - * update @pos_cgrp by calling cgroup_rightmost_descendant() to skip - * subtree. - */ -#define blkg_for_each_descendant_pre(d_blkg, pos_cgrp, p_blkg) \ - cgroup_for_each_descendant_pre((pos_cgrp), (p_blkg)->blkcg->css.cgroup) \ - if (((d_blkg) = __blkg_lookup(cgroup_to_blkcg(pos_cgrp), \ - (p_blkg)->q, false))) - static bool blkcg_policy_enabled(struct request_queue *q, const struct blkcg_policy *pol) { @@ -71,18 +51,8 @@ static void blkg_free(struct blkcg_gq *blkg) if (!blkg) return; - for (i = 0; i < BLKCG_MAX_POLS; i++) { - struct blkcg_policy *pol = blkcg_policy[i]; - struct blkg_policy_data *pd = blkg->pd[i]; - - if (!pd) - continue; - - if (pol && pol->pd_exit_fn) - pol->pd_exit_fn(blkg); - - kfree(pd); - } + for (i = 0; i < BLKCG_MAX_POLS; i++) + kfree(blkg->pd[i]); blk_exit_rl(&blkg->rl); kfree(blkg); @@ -134,10 +104,6 @@ static struct blkcg_gq *blkg_alloc(struct blkcg *blkcg, struct request_queue *q, blkg->pd[i] = pd; pd->blkg = blkg; pd->plid = i; - - /* invoke per-policy init */ - if (pol->pd_init_fn) - pol->pd_init_fn(blkg); } return blkg; @@ -158,8 +124,8 @@ err_free: * @q's bypass state. If @update_hint is %true, the caller should be * holding @q->queue_lock and lookup hint is updated on success. */ -static struct blkcg_gq *__blkg_lookup(struct blkcg *blkcg, - struct request_queue *q, bool update_hint) +struct blkcg_gq *__blkg_lookup(struct blkcg *blkcg, struct request_queue *q, + bool update_hint) { struct blkcg_gq *blkg; @@ -234,16 +200,25 @@ static struct blkcg_gq *blkg_create(struct blkcg *blkcg, } blkg = new_blkg; - /* link parent and insert */ + /* link parent */ if (blkcg_parent(blkcg)) { blkg->parent = __blkg_lookup(blkcg_parent(blkcg), q, false); if (WARN_ON_ONCE(!blkg->parent)) { - blkg = ERR_PTR(-EINVAL); + ret = -EINVAL; goto err_put_css; } blkg_get(blkg->parent); } + /* invoke per-policy init */ + for (i = 0; i < BLKCG_MAX_POLS; i++) { + struct blkcg_policy *pol = blkcg_policy[i]; + + if (blkg->pd[i] && pol->pd_init_fn) + pol->pd_init_fn(blkg); + } + + /* insert */ spin_lock(&blkcg->lock); ret = radix_tree_insert(&blkcg->blkg_tree, q->id, blkg); if (likely(!ret)) { @@ -394,30 +369,38 @@ static void blkg_destroy_all(struct request_queue *q) q->root_rl.blkg = NULL; } -static void blkg_rcu_free(struct rcu_head *rcu_head) +/* + * A group is RCU protected, but having an rcu lock does not mean that one + * can access all the fields of blkg and assume these are valid. For + * example, don't try to follow throtl_data and request queue links. + * + * Having a reference to blkg under an rcu allows accesses to only values + * local to groups like group stats and group rate limits. + */ +void __blkg_release_rcu(struct rcu_head *rcu_head) { - blkg_free(container_of(rcu_head, struct blkcg_gq, rcu_head)); -} + struct blkcg_gq *blkg = container_of(rcu_head, struct blkcg_gq, rcu_head); + int i; + + /* tell policies that this one is being freed */ + for (i = 0; i < BLKCG_MAX_POLS; i++) { + struct blkcg_policy *pol = blkcg_policy[i]; + + if (blkg->pd[i] && pol->pd_exit_fn) + pol->pd_exit_fn(blkg); + } -void __blkg_release(struct blkcg_gq *blkg) -{ /* release the blkcg and parent blkg refs this blkg has been holding */ css_put(&blkg->blkcg->css); - if (blkg->parent) + if (blkg->parent) { + spin_lock_irq(blkg->q->queue_lock); blkg_put(blkg->parent); + spin_unlock_irq(blkg->q->queue_lock); + } - /* - * A group is freed in rcu manner. But having an rcu lock does not - * mean that one can access all the fields of blkg and assume these - * are valid. For example, don't try to follow throtl_data and - * request queue links. - * - * Having a reference to blkg under an rcu allows acess to only - * values local to groups like group stats and group rate limits - */ - call_rcu(&blkg->rcu_head, blkg_rcu_free); + blkg_free(blkg); } -EXPORT_SYMBOL_GPL(__blkg_release); +EXPORT_SYMBOL_GPL(__blkg_release_rcu); /* * The next function used by blk_queue_for_each_rl(). It's a bit tricky @@ -928,14 +911,6 @@ struct cgroup_subsys blkio_subsys = { .subsys_id = blkio_subsys_id, .base_cftypes = blkcg_files, .module = THIS_MODULE, - - /* - * blkio subsystem is utterly broken in terms of hierarchy support. - * It treats all cgroups equally regardless of where they're - * located in the hierarchy - all cgroups are treated as if they're - * right below the root. Fix it and remove the following. - */ - .broken_hierarchy = true, }; EXPORT_SYMBOL_GPL(blkio_subsys); diff --git a/block/blk-cgroup.h b/block/blk-cgroup.h index 4e595ee..8056c03 100644 --- a/block/blk-cgroup.h +++ b/block/blk-cgroup.h @@ -266,7 +266,7 @@ static inline void blkg_get(struct blkcg_gq *blkg) blkg->refcnt++; } -void __blkg_release(struct blkcg_gq *blkg); +void __blkg_release_rcu(struct rcu_head *rcu); /** * blkg_put - put a blkg reference @@ -279,9 +279,43 @@ static inline void blkg_put(struct blkcg_gq *blkg) lockdep_assert_held(blkg->q->queue_lock); WARN_ON_ONCE(blkg->refcnt <= 0); if (!--blkg->refcnt) - __blkg_release(blkg); + call_rcu(&blkg->rcu_head, __blkg_release_rcu); } +struct blkcg_gq *__blkg_lookup(struct blkcg *blkcg, struct request_queue *q, + bool update_hint); + +/** + * blkg_for_each_descendant_pre - pre-order walk of a blkg's descendants + * @d_blkg: loop cursor pointing to the current descendant + * @pos_cgrp: used for iteration + * @p_blkg: target blkg to walk descendants of + * + * Walk @c_blkg through the descendants of @p_blkg. Must be used with RCU + * read locked. If called under either blkcg or queue lock, the iteration + * is guaranteed to include all and only online blkgs. The caller may + * update @pos_cgrp by calling cgroup_rightmost_descendant() to skip + * subtree. + */ +#define blkg_for_each_descendant_pre(d_blkg, pos_cgrp, p_blkg) \ + cgroup_for_each_descendant_pre((pos_cgrp), (p_blkg)->blkcg->css.cgroup) \ + if (((d_blkg) = __blkg_lookup(cgroup_to_blkcg(pos_cgrp), \ + (p_blkg)->q, false))) + +/** + * blkg_for_each_descendant_post - post-order walk of a blkg's descendants + * @d_blkg: loop cursor pointing to the current descendant + * @pos_cgrp: used for iteration + * @p_blkg: target blkg to walk descendants of + * + * Similar to blkg_for_each_descendant_pre() but performs post-order + * traversal instead. Synchronization rules are the same. + */ +#define blkg_for_each_descendant_post(d_blkg, pos_cgrp, p_blkg) \ + cgroup_for_each_descendant_post((pos_cgrp), (p_blkg)->blkcg->css.cgroup) \ + if (((d_blkg) = __blkg_lookup(cgroup_to_blkcg(pos_cgrp), \ + (p_blkg)->q, false))) + /** * blk_get_rl - get request_list to use * @q: request_queue of interest diff --git a/block/blk-tag.c b/block/blk-tag.c index cc345e1..3f33d86 100644 --- a/block/blk-tag.c +++ b/block/blk-tag.c @@ -348,9 +348,16 @@ int blk_queue_start_tag(struct request_queue *q, struct request *rq) */ max_depth = bqt->max_depth; if (!rq_is_sync(rq) && max_depth > 1) { - max_depth -= 2; - if (!max_depth) + switch (max_depth) { + case 2: max_depth = 1; + break; + case 3: + max_depth = 2; + break; + default: + max_depth -= 2; + } if (q->in_flight[BLK_RW_ASYNC] > max_depth) return 1; } diff --git a/block/blk-throttle.c b/block/blk-throttle.c index 3114622..08a32df 100644 --- a/block/blk-throttle.c +++ b/block/blk-throttle.c @@ -25,18 +25,61 @@ static struct blkcg_policy blkcg_policy_throtl; /* A workqueue to queue throttle related work */ static struct workqueue_struct *kthrotld_workqueue; -static void throtl_schedule_delayed_work(struct throtl_data *td, - unsigned long delay); - -struct throtl_rb_root { - struct rb_root rb; - struct rb_node *left; - unsigned int count; - unsigned long min_disptime; + +/* + * To implement hierarchical throttling, throtl_grps form a tree and bios + * are dispatched upwards level by level until they reach the top and get + * issued. When dispatching bios from the children and local group at each + * level, if the bios are dispatched into a single bio_list, there's a risk + * of a local or child group which can queue many bios at once filling up + * the list starving others. + * + * To avoid such starvation, dispatched bios are queued separately + * according to where they came from. When they are again dispatched to + * the parent, they're popped in round-robin order so that no single source + * hogs the dispatch window. + * + * throtl_qnode is used to keep the queued bios separated by their sources. + * Bios are queued to throtl_qnode which in turn is queued to + * throtl_service_queue and then dispatched in round-robin order. + * + * It's also used to track the reference counts on blkg's. A qnode always + * belongs to a throtl_grp and gets queued on itself or the parent, so + * incrementing the reference of the associated throtl_grp when a qnode is + * queued and decrementing when dequeued is enough to keep the whole blkg + * tree pinned while bios are in flight. + */ +struct throtl_qnode { + struct list_head node; /* service_queue->queued[] */ + struct bio_list bios; /* queued bios */ + struct throtl_grp *tg; /* tg this qnode belongs to */ }; -#define THROTL_RB_ROOT (struct throtl_rb_root) { .rb = RB_ROOT, .left = NULL, \ - .count = 0, .min_disptime = 0} +struct throtl_service_queue { + struct throtl_service_queue *parent_sq; /* the parent service_queue */ + + /* + * Bios queued directly to this service_queue or dispatched from + * children throtl_grp's. + */ + struct list_head queued[2]; /* throtl_qnode [READ/WRITE] */ + unsigned int nr_queued[2]; /* number of queued bios */ + + /* + * RB tree of active children throtl_grp's, which are sorted by + * their ->disptime. + */ + struct rb_root pending_tree; /* RB tree of active tgs */ + struct rb_node *first_pending; /* first node in the tree */ + unsigned int nr_pending; /* # queued in the tree */ + unsigned long first_pending_disptime; /* disptime of the first tg */ + struct timer_list pending_timer; /* fires on first_pending_disptime */ +}; + +enum tg_state_flags { + THROTL_TG_PENDING = 1 << 0, /* on parent's pending tree */ + THROTL_TG_WAS_EMPTY = 1 << 1, /* bio_lists[] became non-empty */ +}; #define rb_entry_tg(node) rb_entry((node), struct throtl_grp, rb_node) @@ -52,9 +95,26 @@ struct throtl_grp { /* must be the first member */ struct blkg_policy_data pd; - /* active throtl group service_tree member */ + /* active throtl group service_queue member */ struct rb_node rb_node; + /* throtl_data this group belongs to */ + struct throtl_data *td; + + /* this group's service queue */ + struct throtl_service_queue service_queue; + + /* + * qnode_on_self is used when bios are directly queued to this + * throtl_grp so that local bios compete fairly with bios + * dispatched from children. qnode_on_parent is used when bios are + * dispatched from this throtl_grp into its parent and will compete + * with the sibling qnode_on_parents and the parent's + * qnode_on_self. + */ + struct throtl_qnode qnode_on_self[2]; + struct throtl_qnode qnode_on_parent[2]; + /* * Dispatch time in jiffies. This is the estimated time when group * will unthrottle and is ready to dispatch more bio. It is used as @@ -64,11 +124,8 @@ struct throtl_grp { unsigned int flags; - /* Two lists for READ and WRITE */ - struct bio_list bio_lists[2]; - - /* Number of queued bios on READ and WRITE lists */ - unsigned int nr_queued[2]; + /* are there any throtl rules between this group and td? */ + bool has_rules[2]; /* bytes per second rate limits */ uint64_t bps[2]; @@ -85,9 +142,6 @@ struct throtl_grp { unsigned long slice_start[2]; unsigned long slice_end[2]; - /* Some throttle limits got updated for the group */ - int limits_changed; - /* Per cpu stats pointer */ struct tg_stats_cpu __percpu *stats_cpu; @@ -98,7 +152,7 @@ struct throtl_grp { struct throtl_data { /* service tree for active throtl groups */ - struct throtl_rb_root tg_service_tree; + struct throtl_service_queue service_queue; struct request_queue *queue; @@ -111,9 +165,7 @@ struct throtl_data unsigned int nr_undestroyed_grps; /* Work for dispatching throttled bios */ - struct delayed_work throtl_work; - - int limits_changed; + struct work_struct dispatch_work; }; /* list and work item to allocate percpu group stats */ @@ -123,6 +175,8 @@ static LIST_HEAD(tg_stats_alloc_list); static void tg_stats_alloc_fn(struct work_struct *); static DECLARE_DELAYED_WORK(tg_stats_alloc_work, tg_stats_alloc_fn); +static void throtl_pending_timer_fn(unsigned long arg); + static inline struct throtl_grp *pd_to_tg(struct blkg_policy_data *pd) { return pd ? container_of(pd, struct throtl_grp, pd) : NULL; @@ -143,41 +197,65 @@ static inline struct throtl_grp *td_root_tg(struct throtl_data *td) return blkg_to_tg(td->queue->root_blkg); } -enum tg_state_flags { - THROTL_TG_FLAG_on_rr = 0, /* on round-robin busy list */ -}; - -#define THROTL_TG_FNS(name) \ -static inline void throtl_mark_tg_##name(struct throtl_grp *tg) \ -{ \ - (tg)->flags |= (1 << THROTL_TG_FLAG_##name); \ -} \ -static inline void throtl_clear_tg_##name(struct throtl_grp *tg) \ -{ \ - (tg)->flags &= ~(1 << THROTL_TG_FLAG_##name); \ -} \ -static inline int throtl_tg_##name(const struct throtl_grp *tg) \ -{ \ - return ((tg)->flags & (1 << THROTL_TG_FLAG_##name)) != 0; \ +/** + * sq_to_tg - return the throl_grp the specified service queue belongs to + * @sq: the throtl_service_queue of interest + * + * Return the throtl_grp @sq belongs to. If @sq is the top-level one + * embedded in throtl_data, %NULL is returned. + */ +static struct throtl_grp *sq_to_tg(struct throtl_service_queue *sq) +{ + if (sq && sq->parent_sq) + return container_of(sq, struct throtl_grp, service_queue); + else + return NULL; } -THROTL_TG_FNS(on_rr); +/** + * sq_to_td - return throtl_data the specified service queue belongs to + * @sq: the throtl_service_queue of interest + * + * A service_queue can be embeded in either a throtl_grp or throtl_data. + * Determine the associated throtl_data accordingly and return it. + */ +static struct throtl_data *sq_to_td(struct throtl_service_queue *sq) +{ + struct throtl_grp *tg = sq_to_tg(sq); -#define throtl_log_tg(td, tg, fmt, args...) do { \ - char __pbuf[128]; \ + if (tg) + return tg->td; + else + return container_of(sq, struct throtl_data, service_queue); +} + +/** + * throtl_log - log debug message via blktrace + * @sq: the service_queue being reported + * @fmt: printf format string + * @args: printf args + * + * The messages are prefixed with "throtl BLKG_NAME" if @sq belongs to a + * throtl_grp; otherwise, just "throtl". + * + * TODO: this should be made a function and name formatting should happen + * after testing whether blktrace is enabled. + */ +#define throtl_log(sq, fmt, args...) do { \ + struct throtl_grp *__tg = sq_to_tg((sq)); \ + struct throtl_data *__td = sq_to_td((sq)); \ + \ + (void)__td; \ + if ((__tg)) { \ + char __pbuf[128]; \ \ - blkg_path(tg_to_blkg(tg), __pbuf, sizeof(__pbuf)); \ - blk_add_trace_msg((td)->queue, "throtl %s " fmt, __pbuf, ##args); \ + blkg_path(tg_to_blkg(__tg), __pbuf, sizeof(__pbuf)); \ + blk_add_trace_msg(__td->queue, "throtl %s " fmt, __pbuf, ##args); \ + } else { \ + blk_add_trace_msg(__td->queue, "throtl " fmt, ##args); \ + } \ } while (0) -#define throtl_log(td, fmt, args...) \ - blk_add_trace_msg((td)->queue, "throtl " fmt, ##args) - -static inline unsigned int total_nr_queued(struct throtl_data *td) -{ - return td->nr_queued[0] + td->nr_queued[1]; -} - /* * Worker for allocating per cpu stat for tgs. This is scheduled on the * system_wq once there are some groups on the alloc_list waiting for @@ -215,15 +293,141 @@ alloc_stats: goto alloc_stats; } +static void throtl_qnode_init(struct throtl_qnode *qn, struct throtl_grp *tg) +{ + INIT_LIST_HEAD(&qn->node); + bio_list_init(&qn->bios); + qn->tg = tg; +} + +/** + * throtl_qnode_add_bio - add a bio to a throtl_qnode and activate it + * @bio: bio being added + * @qn: qnode to add bio to + * @queued: the service_queue->queued[] list @qn belongs to + * + * Add @bio to @qn and put @qn on @queued if it's not already on. + * @qn->tg's reference count is bumped when @qn is activated. See the + * comment on top of throtl_qnode definition for details. + */ +static void throtl_qnode_add_bio(struct bio *bio, struct throtl_qnode *qn, + struct list_head *queued) +{ + bio_list_add(&qn->bios, bio); + if (list_empty(&qn->node)) { + list_add_tail(&qn->node, queued); + blkg_get(tg_to_blkg(qn->tg)); + } +} + +/** + * throtl_peek_queued - peek the first bio on a qnode list + * @queued: the qnode list to peek + */ +static struct bio *throtl_peek_queued(struct list_head *queued) +{ + struct throtl_qnode *qn = list_first_entry(queued, struct throtl_qnode, node); + struct bio *bio; + + if (list_empty(queued)) + return NULL; + + bio = bio_list_peek(&qn->bios); + WARN_ON_ONCE(!bio); + return bio; +} + +/** + * throtl_pop_queued - pop the first bio form a qnode list + * @queued: the qnode list to pop a bio from + * @tg_to_put: optional out argument for throtl_grp to put + * + * Pop the first bio from the qnode list @queued. After popping, the first + * qnode is removed from @queued if empty or moved to the end of @queued so + * that the popping order is round-robin. + * + * When the first qnode is removed, its associated throtl_grp should be put + * too. If @tg_to_put is NULL, this function automatically puts it; + * otherwise, *@tg_to_put is set to the throtl_grp to put and the caller is + * responsible for putting it. + */ +static struct bio *throtl_pop_queued(struct list_head *queued, + struct throtl_grp **tg_to_put) +{ + struct throtl_qnode *qn = list_first_entry(queued, struct throtl_qnode, node); + struct bio *bio; + + if (list_empty(queued)) + return NULL; + + bio = bio_list_pop(&qn->bios); + WARN_ON_ONCE(!bio); + + if (bio_list_empty(&qn->bios)) { + list_del_init(&qn->node); + if (tg_to_put) + *tg_to_put = qn->tg; + else + blkg_put(tg_to_blkg(qn->tg)); + } else { + list_move_tail(&qn->node, queued); + } + + return bio; +} + +/* init a service_queue, assumes the caller zeroed it */ +static void throtl_service_queue_init(struct throtl_service_queue *sq, + struct throtl_service_queue *parent_sq) +{ + INIT_LIST_HEAD(&sq->queued[0]); + INIT_LIST_HEAD(&sq->queued[1]); + sq->pending_tree = RB_ROOT; + sq->parent_sq = parent_sq; + setup_timer(&sq->pending_timer, throtl_pending_timer_fn, + (unsigned long)sq); +} + +static void throtl_service_queue_exit(struct throtl_service_queue *sq) +{ + del_timer_sync(&sq->pending_timer); +} + static void throtl_pd_init(struct blkcg_gq *blkg) { struct throtl_grp *tg = blkg_to_tg(blkg); + struct throtl_data *td = blkg->q->td; + struct throtl_service_queue *parent_sq; unsigned long flags; + int rw; + + /* + * If sane_hierarchy is enabled, we switch to properly hierarchical + * behavior where limits on a given throtl_grp are applied to the + * whole subtree rather than just the group itself. e.g. If 16M + * read_bps limit is set on the root group, the whole system can't + * exceed 16M for the device. + * + * If sane_hierarchy is not enabled, the broken flat hierarchy + * behavior is retained where all throtl_grps are treated as if + * they're all separate root groups right below throtl_data. + * Limits of a group don't interact with limits of other groups + * regardless of the position of the group in the hierarchy. + */ + parent_sq = &td->service_queue; + + if (cgroup_sane_behavior(blkg->blkcg->css.cgroup) && blkg->parent) + parent_sq = &blkg_to_tg(blkg->parent)->service_queue; + + throtl_service_queue_init(&tg->service_queue, parent_sq); + + for (rw = READ; rw <= WRITE; rw++) { + throtl_qnode_init(&tg->qnode_on_self[rw], tg); + throtl_qnode_init(&tg->qnode_on_parent[rw], tg); + } RB_CLEAR_NODE(&tg->rb_node); - bio_list_init(&tg->bio_lists[0]); - bio_list_init(&tg->bio_lists[1]); - tg->limits_changed = false; + tg->td = td; tg->bps[READ] = -1; tg->bps[WRITE] = -1; @@ -241,6 +445,30 @@ static void throtl_pd_init(struct blkcg_gq *blkg) spin_unlock_irqrestore(&tg_stats_alloc_lock, flags); } +/* + * Set has_rules[] if @tg or any of its parents have limits configured. + * This doesn't require walking up to the top of the hierarchy as the + * parent's has_rules[] is guaranteed to be correct. + */ +static void tg_update_has_rules(struct throtl_grp *tg) +{ + struct throtl_grp *parent_tg = sq_to_tg(tg->service_queue.parent_sq); + int rw; + + for (rw = READ; rw <= WRITE; rw++) + tg->has_rules[rw] = (parent_tg && parent_tg->has_rules[rw]) || + (tg->bps[rw] != -1 || tg->iops[rw] != -1); +} + +static void throtl_pd_online(struct blkcg_gq *blkg) +{ + /* + * We don't want new groups to escape the limits of its ancestors. + * Update has_rules[] after a new group is brought online. + */ + tg_update_has_rules(blkg_to_tg(blkg)); +} + static void throtl_pd_exit(struct blkcg_gq *blkg) { struct throtl_grp *tg = blkg_to_tg(blkg); @@ -251,6 +479,8 @@ static void throtl_pd_exit(struct blkcg_gq *blkg) spin_unlock_irqrestore(&tg_stats_alloc_lock, flags); free_percpu(tg->stats_cpu); + + throtl_service_queue_exit(&tg->service_queue); } static void throtl_pd_reset_stats(struct blkcg_gq *blkg) @@ -309,17 +539,18 @@ static struct throtl_grp *throtl_lookup_create_tg(struct throtl_data *td, return tg; } -static struct throtl_grp *throtl_rb_first(struct throtl_rb_root *root) +static struct throtl_grp * +throtl_rb_first(struct throtl_service_queue *parent_sq) { /* Service tree is empty */ - if (!root->count) + if (!parent_sq->nr_pending) return NULL; - if (!root->left) - root->left = rb_first(&root->rb); + if (!parent_sq->first_pending) + parent_sq->first_pending = rb_first(&parent_sq->pending_tree); - if (root->left) - return rb_entry_tg(root->left); + if (parent_sq->first_pending) + return rb_entry_tg(parent_sq->first_pending); return NULL; } @@ -330,29 +561,30 @@ static void rb_erase_init(struct rb_node *n, struct rb_root *root) RB_CLEAR_NODE(n); } -static void throtl_rb_erase(struct rb_node *n, struct throtl_rb_root *root) +static void throtl_rb_erase(struct rb_node *n, + struct throtl_service_queue *parent_sq) { - if (root->left == n) - root->left = NULL; - rb_erase_init(n, &root->rb); - --root->count; + if (parent_sq->first_pending == n) + parent_sq->first_pending = NULL; + rb_erase_init(n, &parent_sq->pending_tree); + --parent_sq->nr_pending; } -static void update_min_dispatch_time(struct throtl_rb_root *st) +static void update_min_dispatch_time(struct throtl_service_queue *parent_sq) { struct throtl_grp *tg; - tg = throtl_rb_first(st); + tg = throtl_rb_first(parent_sq); if (!tg) return; - st->min_disptime = tg->disptime; + parent_sq->first_pending_disptime = tg->disptime; } -static void -tg_service_tree_add(struct throtl_rb_root *st, struct throtl_grp *tg) +static void tg_service_queue_add(struct throtl_grp *tg) { - struct rb_node **node = &st->rb.rb_node; + struct throtl_service_queue *parent_sq = tg->service_queue.parent_sq; + struct rb_node **node = &parent_sq->pending_tree.rb_node; struct rb_node *parent = NULL; struct throtl_grp *__tg; unsigned long key = tg->disptime; @@ -371,89 +603,135 @@ tg_service_tree_add(struct throtl_rb_root *st, struct throtl_grp *tg) } if (left) - st->left = &tg->rb_node; + parent_sq->first_pending = &tg->rb_node; rb_link_node(&tg->rb_node, parent, node); - rb_insert_color(&tg->rb_node, &st->rb); + rb_insert_color(&tg->rb_node, &parent_sq->pending_tree); } -static void __throtl_enqueue_tg(struct throtl_data *td, struct throtl_grp *tg) +static void __throtl_enqueue_tg(struct throtl_grp *tg) { - struct throtl_rb_root *st = &td->tg_service_tree; + tg_service_queue_add(tg); + tg->flags |= THROTL_TG_PENDING; + tg->service_queue.parent_sq->nr_pending++; +} - tg_service_tree_add(st, tg); - throtl_mark_tg_on_rr(tg); - st->count++; +static void throtl_enqueue_tg(struct throtl_grp *tg) +{ + if (!(tg->flags & THROTL_TG_PENDING)) + __throtl_enqueue_tg(tg); } -static void throtl_enqueue_tg(struct throtl_data *td, struct throtl_grp *tg) +static void __throtl_dequeue_tg(struct throtl_grp *tg) { - if (!throtl_tg_on_rr(tg)) - __throtl_enqueue_tg(td, tg); + throtl_rb_erase(&tg->rb_node, tg->service_queue.parent_sq); + tg->flags &= ~THROTL_TG_PENDING; } -static void __throtl_dequeue_tg(struct throtl_data *td, struct throtl_grp *tg) +static void throtl_dequeue_tg(struct throtl_grp *tg) { - throtl_rb_erase(&tg->rb_node, &td->tg_service_tree); - throtl_clear_tg_on_rr(tg); + if (tg->flags & THROTL_TG_PENDING) + __throtl_dequeue_tg(tg); } -static void throtl_dequeue_tg(struct throtl_data *td, struct throtl_grp *tg) +/* Call with queue lock held */ +static void throtl_schedule_pending_timer(struct throtl_service_queue *sq, + unsigned long expires) { - if (throtl_tg_on_rr(tg)) - __throtl_dequeue_tg(td, tg); + mod_timer(&sq->pending_timer, expires); + throtl_log(sq, "schedule timer. delay=%lu jiffies=%lu", + expires - jiffies, jiffies); } -static void throtl_schedule_next_dispatch(struct throtl_data *td) +/** + * throtl_schedule_next_dispatch - schedule the next dispatch cycle + * @sq: the service_queue to schedule dispatch for + * @force: force scheduling + * + * Arm @sq->pending_timer so that the next dispatch cycle starts on the + * dispatch time of the first pending child. Returns %true if either timer + * is armed or there's no pending child left. %false if the current + * dispatch window is still open and the caller should continue + * dispatching. + * + * If @force is %true, the dispatch timer is always scheduled and this + * function is guaranteed to return %true. This is to be used when the + * caller can't dispatch itself and needs to invoke pending_timer + * unconditionally. Note that forced scheduling is likely to induce short + * delay before dispatch starts even if @sq->first_pending_disptime is not + * in the future and thus shouldn't be used in hot paths. + */ +static bool throtl_schedule_next_dispatch(struct throtl_service_queue *sq, + bool force) { - struct throtl_rb_root *st = &td->tg_service_tree; + /* any pending children left? */ + if (!sq->nr_pending) + return true; - /* - * If there are more bios pending, schedule more work. - */ - if (!total_nr_queued(td)) - return; + update_min_dispatch_time(sq); - BUG_ON(!st->count); + /* is the next dispatch time in the future? */ + if (force || time_after(sq->first_pending_disptime, jiffies)) { + throtl_schedule_pending_timer(sq, sq->first_pending_disptime); + return true; + } - update_min_dispatch_time(st); + /* tell the caller to continue dispatching */ + return false; +} - if (time_before_eq(st->min_disptime, jiffies)) - throtl_schedule_delayed_work(td, 0); - else - throtl_schedule_delayed_work(td, (st->min_disptime - jiffies)); +static inline void throtl_start_new_slice_with_credit(struct throtl_grp *tg, + bool rw, unsigned long start) +{ + tg->bytes_disp[rw] = 0; + tg->io_disp[rw] = 0; + + /* + * Previous slice has expired. We must have trimmed it after last + * bio dispatch. That means since start of last slice, we never used + * that bandwidth. Do try to make use of that bandwidth while giving + * credit. + */ + if (time_after_eq(start, tg->slice_start[rw])) + tg->slice_start[rw] = start; + + tg->slice_end[rw] = jiffies + throtl_slice; + throtl_log(&tg->service_queue, + "[%c] new slice with credit start=%lu end=%lu jiffies=%lu", + rw == READ ? 'R' : 'W', tg->slice_start[rw], + tg->slice_end[rw], jiffies); } -static inline void -throtl_start_new_slice(struct throtl_data *td, struct throtl_grp *tg, bool rw) +static inline void throtl_start_new_slice(struct throtl_grp *tg, bool rw) { tg->bytes_disp[rw] = 0; tg->io_disp[rw] = 0; tg->slice_start[rw] = jiffies; tg->slice_end[rw] = jiffies + throtl_slice; - throtl_log_tg(td, tg, "[%c] new slice start=%lu end=%lu jiffies=%lu", - rw == READ ? 'R' : 'W', tg->slice_start[rw], - tg->slice_end[rw], jiffies); + throtl_log(&tg->service_queue, + "[%c] new slice start=%lu end=%lu jiffies=%lu", + rw == READ ? 'R' : 'W', tg->slice_start[rw], + tg->slice_end[rw], jiffies); } -static inline void throtl_set_slice_end(struct throtl_data *td, - struct throtl_grp *tg, bool rw, unsigned long jiffy_end) +static inline void throtl_set_slice_end(struct throtl_grp *tg, bool rw, + unsigned long jiffy_end) { tg->slice_end[rw] = roundup(jiffy_end, throtl_slice); } -static inline void throtl_extend_slice(struct throtl_data *td, - struct throtl_grp *tg, bool rw, unsigned long jiffy_end) +static inline void throtl_extend_slice(struct throtl_grp *tg, bool rw, + unsigned long jiffy_end) { tg->slice_end[rw] = roundup(jiffy_end, throtl_slice); - throtl_log_tg(td, tg, "[%c] extend slice start=%lu end=%lu jiffies=%lu", - rw == READ ? 'R' : 'W', tg->slice_start[rw], - tg->slice_end[rw], jiffies); + throtl_log(&tg->service_queue, + "[%c] extend slice start=%lu end=%lu jiffies=%lu", + rw == READ ? 'R' : 'W', tg->slice_start[rw], + tg->slice_end[rw], jiffies); } /* Determine if previously allocated or extended slice is complete or not */ -static bool -throtl_slice_used(struct throtl_data *td, struct throtl_grp *tg, bool rw) +static bool throtl_slice_used(struct throtl_grp *tg, bool rw) { if (time_in_range(jiffies, tg->slice_start[rw], tg->slice_end[rw])) return 0; @@ -462,8 +740,7 @@ throtl_slice_used(struct throtl_data *td, struct throtl_grp *tg, bool rw) } /* Trim the used slices and adjust slice start accordingly */ -static inline void -throtl_trim_slice(struct throtl_data *td, struct throtl_grp *tg, bool rw) +static inline void throtl_trim_slice(struct throtl_grp *tg, bool rw) { unsigned long nr_slices, time_elapsed, io_trim; u64 bytes_trim, tmp; @@ -475,7 +752,7 @@ throtl_trim_slice(struct throtl_data *td, struct throtl_grp *tg, bool rw) * renewed. Don't try to trim the slice if slice is used. A new * slice will start when appropriate. */ - if (throtl_slice_used(td, tg, rw)) + if (throtl_slice_used(tg, rw)) return; /* @@ -486,7 +763,7 @@ throtl_trim_slice(struct throtl_data *td, struct throtl_grp *tg, bool rw) * is bad because it does not allow new slice to start. */ - throtl_set_slice_end(td, tg, rw, jiffies + throtl_slice); + throtl_set_slice_end(tg, rw, jiffies + throtl_slice); time_elapsed = jiffies - tg->slice_start[rw]; @@ -515,14 +792,14 @@ throtl_trim_slice(struct throtl_data *td, struct throtl_grp *tg, bool rw) tg->slice_start[rw] += nr_slices * throtl_slice; - throtl_log_tg(td, tg, "[%c] trim slice nr=%lu bytes=%llu io=%lu" - " start=%lu end=%lu jiffies=%lu", - rw == READ ? 'R' : 'W', nr_slices, bytes_trim, io_trim, - tg->slice_start[rw], tg->slice_end[rw], jiffies); + throtl_log(&tg->service_queue, + "[%c] trim slice nr=%lu bytes=%llu io=%lu start=%lu end=%lu jiffies=%lu", + rw == READ ? 'R' : 'W', nr_slices, bytes_trim, io_trim, + tg->slice_start[rw], tg->slice_end[rw], jiffies); } -static bool tg_with_in_iops_limit(struct throtl_data *td, struct throtl_grp *tg, - struct bio *bio, unsigned long *wait) +static bool tg_with_in_iops_limit(struct throtl_grp *tg, struct bio *bio, + unsigned long *wait) { bool rw = bio_data_dir(bio); unsigned int io_allowed; @@ -571,8 +848,8 @@ static bool tg_with_in_iops_limit(struct throtl_data *td, struct throtl_grp *tg, return 0; } -static bool tg_with_in_bps_limit(struct throtl_data *td, struct throtl_grp *tg, - struct bio *bio, unsigned long *wait) +static bool tg_with_in_bps_limit(struct throtl_grp *tg, struct bio *bio, + unsigned long *wait) { bool rw = bio_data_dir(bio); u64 bytes_allowed, extra_bytes, tmp; @@ -613,18 +890,12 @@ static bool tg_with_in_bps_limit(struct throtl_data *td, struct throtl_grp *tg, return 0; } -static bool tg_no_rule_group(struct throtl_grp *tg, bool rw) { - if (tg->bps[rw] == -1 && tg->iops[rw] == -1) - return 1; - return 0; -} - /* * Returns whether one can dispatch a bio or not. Also returns approx number * of jiffies to wait before this bio is with-in IO rate and can be dispatched */ -static bool tg_may_dispatch(struct throtl_data *td, struct throtl_grp *tg, - struct bio *bio, unsigned long *wait) +static bool tg_may_dispatch(struct throtl_grp *tg, struct bio *bio, + unsigned long *wait) { bool rw = bio_data_dir(bio); unsigned long bps_wait = 0, iops_wait = 0, max_wait = 0; @@ -635,7 +906,8 @@ static bool tg_may_dispatch(struct throtl_data *td, struct throtl_grp *tg, * this function with a different bio if there are other bios * queued. */ - BUG_ON(tg->nr_queued[rw] && bio != bio_list_peek(&tg->bio_lists[rw])); + BUG_ON(tg->service_queue.nr_queued[rw] && + bio != throtl_peek_queued(&tg->service_queue.queued[rw])); /* If tg->bps = -1, then BW is unlimited */ if (tg->bps[rw] == -1 && tg->iops[rw] == -1) { @@ -649,15 +921,15 @@ static bool tg_may_dispatch(struct throtl_data *td, struct throtl_grp *tg, * existing slice to make sure it is at least throtl_slice interval * long since now. */ - if (throtl_slice_used(td, tg, rw)) - throtl_start_new_slice(td, tg, rw); + if (throtl_slice_used(tg, rw)) + throtl_start_new_slice(tg, rw); else { if (time_before(tg->slice_end[rw], jiffies + throtl_slice)) - throtl_extend_slice(td, tg, rw, jiffies + throtl_slice); + throtl_extend_slice(tg, rw, jiffies + throtl_slice); } - if (tg_with_in_bps_limit(td, tg, bio, &bps_wait) - && tg_with_in_iops_limit(td, tg, bio, &iops_wait)) { + if (tg_with_in_bps_limit(tg, bio, &bps_wait) && + tg_with_in_iops_limit(tg, bio, &iops_wait)) { if (wait) *wait = 0; return 1; @@ -669,7 +941,7 @@ static bool tg_may_dispatch(struct throtl_data *td, struct throtl_grp *tg, *wait = max_wait; if (time_before(tg->slice_end[rw], jiffies + max_wait)) - throtl_extend_slice(td, tg, rw, jiffies + max_wait); + throtl_extend_slice(tg, rw, jiffies + max_wait); return 0; } @@ -708,65 +980,136 @@ static void throtl_charge_bio(struct throtl_grp *tg, struct bio *bio) tg->bytes_disp[rw] += bio->bi_size; tg->io_disp[rw]++; - throtl_update_dispatch_stats(tg_to_blkg(tg), bio->bi_size, bio->bi_rw); + /* + * REQ_THROTTLED is used to prevent the same bio to be throttled + * more than once as a throttled bio will go through blk-throtl the + * second time when it eventually gets issued. Set it when a bio + * is being charged to a tg. + * + * Dispatch stats aren't recursive and each @bio should only be + * accounted by the @tg it was originally associated with. Let's + * update the stats when setting REQ_THROTTLED for the first time + * which is guaranteed to be for the @bio's original tg. + */ + if (!(bio->bi_rw & REQ_THROTTLED)) { + bio->bi_rw |= REQ_THROTTLED; + throtl_update_dispatch_stats(tg_to_blkg(tg), bio->bi_size, + bio->bi_rw); + } } -static void throtl_add_bio_tg(struct throtl_data *td, struct throtl_grp *tg, - struct bio *bio) +/** + * throtl_add_bio_tg - add a bio to the specified throtl_grp + * @bio: bio to add + * @qn: qnode to use + * @tg: the target throtl_grp + * + * Add @bio to @tg's service_queue using @qn. If @qn is not specified, + * tg->qnode_on_self[] is used. + */ +static void throtl_add_bio_tg(struct bio *bio, struct throtl_qnode *qn, + struct throtl_grp *tg) { + struct throtl_service_queue *sq = &tg->service_queue; bool rw = bio_data_dir(bio); - bio_list_add(&tg->bio_lists[rw], bio); - /* Take a bio reference on tg */ - blkg_get(tg_to_blkg(tg)); - tg->nr_queued[rw]++; - td->nr_queued[rw]++; - throtl_enqueue_tg(td, tg); + if (!qn) + qn = &tg->qnode_on_self[rw]; + + /* + * If @tg doesn't currently have any bios queued in the same + * direction, queueing @bio can change when @tg should be + * dispatched. Mark that @tg was empty. This is automatically + * cleaered on the next tg_update_disptime(). + */ + if (!sq->nr_queued[rw]) + tg->flags |= THROTL_TG_WAS_EMPTY; + + throtl_qnode_add_bio(bio, qn, &sq->queued[rw]); + + sq->nr_queued[rw]++; + throtl_enqueue_tg(tg); } -static void tg_update_disptime(struct throtl_data *td, struct throtl_grp *tg) +static void tg_update_disptime(struct throtl_grp *tg) { + struct throtl_service_queue *sq = &tg->service_queue; unsigned long read_wait = -1, write_wait = -1, min_wait = -1, disptime; struct bio *bio; - if ((bio = bio_list_peek(&tg->bio_lists[READ]))) - tg_may_dispatch(td, tg, bio, &read_wait); + if ((bio = throtl_peek_queued(&sq->queued[READ]))) + tg_may_dispatch(tg, bio, &read_wait); - if ((bio = bio_list_peek(&tg->bio_lists[WRITE]))) - tg_may_dispatch(td, tg, bio, &write_wait); + if ((bio = throtl_peek_queued(&sq->queued[WRITE]))) + tg_may_dispatch(tg, bio, &write_wait); min_wait = min(read_wait, write_wait); disptime = jiffies + min_wait; /* Update dispatch time */ - throtl_dequeue_tg(td, tg); + throtl_dequeue_tg(tg); tg->disptime = disptime; - throtl_enqueue_tg(td, tg); + throtl_enqueue_tg(tg); + + /* see throtl_add_bio_tg() */ + tg->flags &= ~THROTL_TG_WAS_EMPTY; } -static void tg_dispatch_one_bio(struct throtl_data *td, struct throtl_grp *tg, - bool rw, struct bio_list *bl) +static void start_parent_slice_with_credit(struct throtl_grp *child_tg, + struct throtl_grp *parent_tg, bool rw) { - struct bio *bio; + if (throtl_slice_used(parent_tg, rw)) { + throtl_start_new_slice_with_credit(parent_tg, rw, + child_tg->slice_start[rw]); + } + +} - bio = bio_list_pop(&tg->bio_lists[rw]); - tg->nr_queued[rw]--; - /* Drop bio reference on blkg */ - blkg_put(tg_to_blkg(tg)); +static void tg_dispatch_one_bio(struct throtl_grp *tg, bool rw) +{ + struct throtl_service_queue *sq = &tg->service_queue; + struct throtl_service_queue *parent_sq = sq->parent_sq; + struct throtl_grp *parent_tg = sq_to_tg(parent_sq); + struct throtl_grp *tg_to_put = NULL; + struct bio *bio; - BUG_ON(td->nr_queued[rw] <= 0); - td->nr_queued[rw]--; + /* + * @bio is being transferred from @tg to @parent_sq. Popping a bio + * from @tg may put its reference and @parent_sq might end up + * getting released prematurely. Remember the tg to put and put it + * after @bio is transferred to @parent_sq. + */ + bio = throtl_pop_queued(&sq->queued[rw], &tg_to_put); + sq->nr_queued[rw]--; throtl_charge_bio(tg, bio); - bio_list_add(bl, bio); - bio->bi_rw |= REQ_THROTTLED; - throtl_trim_slice(td, tg, rw); + /* + * If our parent is another tg, we just need to transfer @bio to + * the parent using throtl_add_bio_tg(). If our parent is + * @td->service_queue, @bio is ready to be issued. Put it on its + * bio_lists[] and decrease total number queued. The caller is + * responsible for issuing these bios. + */ + if (parent_tg) { + throtl_add_bio_tg(bio, &tg->qnode_on_parent[rw], parent_tg); + start_parent_slice_with_credit(tg, parent_tg, rw); + } else { + throtl_qnode_add_bio(bio, &tg->qnode_on_parent[rw], + &parent_sq->queued[rw]); + BUG_ON(tg->td->nr_queued[rw] <= 0); + tg->td->nr_queued[rw]--; + } + + throtl_trim_slice(tg, rw); + + if (tg_to_put) + blkg_put(tg_to_blkg(tg_to_put)); } -static int throtl_dispatch_tg(struct throtl_data *td, struct throtl_grp *tg, - struct bio_list *bl) +static int throtl_dispatch_tg(struct throtl_grp *tg) { + struct throtl_service_queue *sq = &tg->service_queue; unsigned int nr_reads = 0, nr_writes = 0; unsigned int max_nr_reads = throtl_grp_quantum*3/4; unsigned int max_nr_writes = throtl_grp_quantum - max_nr_reads; @@ -774,20 +1117,20 @@ static int throtl_dispatch_tg(struct throtl_data *td, struct throtl_grp *tg, /* Try to dispatch 75% READS and 25% WRITES */ - while ((bio = bio_list_peek(&tg->bio_lists[READ])) - && tg_may_dispatch(td, tg, bio, NULL)) { + while ((bio = throtl_peek_queued(&sq->queued[READ])) && + tg_may_dispatch(tg, bio, NULL)) { - tg_dispatch_one_bio(td, tg, bio_data_dir(bio), bl); + tg_dispatch_one_bio(tg, bio_data_dir(bio)); nr_reads++; if (nr_reads >= max_nr_reads) break; } - while ((bio = bio_list_peek(&tg->bio_lists[WRITE])) - && tg_may_dispatch(td, tg, bio, NULL)) { + while ((bio = throtl_peek_queued(&sq->queued[WRITE])) && + tg_may_dispatch(tg, bio, NULL)) { - tg_dispatch_one_bio(td, tg, bio_data_dir(bio), bl); + tg_dispatch_one_bio(tg, bio_data_dir(bio)); nr_writes++; if (nr_writes >= max_nr_writes) @@ -797,14 +1140,13 @@ static int throtl_dispatch_tg(struct throtl_data *td, struct throtl_grp *tg, return nr_reads + nr_writes; } -static int throtl_select_dispatch(struct throtl_data *td, struct bio_list *bl) +static int throtl_select_dispatch(struct throtl_service_queue *parent_sq) { unsigned int nr_disp = 0; - struct throtl_grp *tg; - struct throtl_rb_root *st = &td->tg_service_tree; while (1) { - tg = throtl_rb_first(st); + struct throtl_grp *tg = throtl_rb_first(parent_sq); + struct throtl_service_queue *sq = &tg->service_queue; if (!tg) break; @@ -812,14 +1154,12 @@ static int throtl_select_dispatch(struct throtl_data *td, struct bio_list *bl) if (time_before(jiffies, tg->disptime)) break; - throtl_dequeue_tg(td, tg); + throtl_dequeue_tg(tg); - nr_disp += throtl_dispatch_tg(td, tg, bl); + nr_disp += throtl_dispatch_tg(tg); - if (tg->nr_queued[0] || tg->nr_queued[1]) { - tg_update_disptime(td, tg); - throtl_enqueue_tg(td, tg); - } + if (sq->nr_queued[0] || sq->nr_queued[1]) + tg_update_disptime(tg); if (nr_disp >= throtl_quantum) break; @@ -828,111 +1168,111 @@ static int throtl_select_dispatch(struct throtl_data *td, struct bio_list *bl) return nr_disp; } -static void throtl_process_limit_change(struct throtl_data *td) +/** + * throtl_pending_timer_fn - timer function for service_queue->pending_timer + * @arg: the throtl_service_queue being serviced + * + * This timer is armed when a child throtl_grp with active bio's become + * pending and queued on the service_queue's pending_tree and expires when + * the first child throtl_grp should be dispatched. This function + * dispatches bio's from the children throtl_grps to the parent + * service_queue. + * + * If the parent's parent is another throtl_grp, dispatching is propagated + * by either arming its pending_timer or repeating dispatch directly. If + * the top-level service_tree is reached, throtl_data->dispatch_work is + * kicked so that the ready bio's are issued. + */ +static void throtl_pending_timer_fn(unsigned long arg) { + struct throtl_service_queue *sq = (void *)arg; + struct throtl_grp *tg = sq_to_tg(sq); + struct throtl_data *td = sq_to_td(sq); struct request_queue *q = td->queue; - struct blkcg_gq *blkg, *n; - - if (!td->limits_changed) - return; - - xchg(&td->limits_changed, false); - - throtl_log(td, "limits changed"); - - list_for_each_entry_safe(blkg, n, &q->blkg_list, q_node) { - struct throtl_grp *tg = blkg_to_tg(blkg); + struct throtl_service_queue *parent_sq; + bool dispatched; + int ret; - if (!tg->limits_changed) - continue; + spin_lock_irq(q->queue_lock); +again: + parent_sq = sq->parent_sq; + dispatched = false; + + while (true) { + throtl_log(sq, "dispatch nr_queued=%u read=%u write=%u", + sq->nr_queued[READ] + sq->nr_queued[WRITE], + sq->nr_queued[READ], sq->nr_queued[WRITE]); + + ret = throtl_select_dispatch(sq); + if (ret) { + throtl_log(sq, "bios disp=%u", ret); + dispatched = true; + } - if (!xchg(&tg->limits_changed, false)) - continue; + if (throtl_schedule_next_dispatch(sq, false)) + break; - throtl_log_tg(td, tg, "limit change rbps=%llu wbps=%llu" - " riops=%u wiops=%u", tg->bps[READ], tg->bps[WRITE], - tg->iops[READ], tg->iops[WRITE]); + /* this dispatch windows is still open, relax and repeat */ + spin_unlock_irq(q->queue_lock); + cpu_relax(); + spin_lock_irq(q->queue_lock); + } - /* - * Restart the slices for both READ and WRITES. It - * might happen that a group's limit are dropped - * suddenly and we don't want to account recently - * dispatched IO with new low rate - */ - throtl_start_new_slice(td, tg, 0); - throtl_start_new_slice(td, tg, 1); + if (!dispatched) + goto out_unlock; - if (throtl_tg_on_rr(tg)) - tg_update_disptime(td, tg); + if (parent_sq) { + /* @parent_sq is another throl_grp, propagate dispatch */ + if (tg->flags & THROTL_TG_WAS_EMPTY) { + tg_update_disptime(tg); + if (!throtl_schedule_next_dispatch(parent_sq, false)) { + /* window is already open, repeat dispatching */ + sq = parent_sq; + tg = sq_to_tg(sq); + goto again; + } + } + } else { + /* reached the top-level, queue issueing */ + queue_work(kthrotld_workqueue, &td->dispatch_work); } +out_unlock: + spin_unlock_irq(q->queue_lock); } -/* Dispatch throttled bios. Should be called without queue lock held. */ -static int throtl_dispatch(struct request_queue *q) +/** + * blk_throtl_dispatch_work_fn - work function for throtl_data->dispatch_work + * @work: work item being executed + * + * This function is queued for execution when bio's reach the bio_lists[] + * of throtl_data->service_queue. Those bio's are ready and issued by this + * function. + */ +void blk_throtl_dispatch_work_fn(struct work_struct *work) { - struct throtl_data *td = q->td; - unsigned int nr_disp = 0; + struct throtl_data *td = container_of(work, struct throtl_data, + dispatch_work); + struct throtl_service_queue *td_sq = &td->service_queue; + struct request_queue *q = td->queue; struct bio_list bio_list_on_stack; struct bio *bio; struct blk_plug plug; - - spin_lock_irq(q->queue_lock); - - throtl_process_limit_change(td); - - if (!total_nr_queued(td)) - goto out; + int rw; bio_list_init(&bio_list_on_stack); - throtl_log(td, "dispatch nr_queued=%u read=%u write=%u", - total_nr_queued(td), td->nr_queued[READ], - td->nr_queued[WRITE]); - - nr_disp = throtl_select_dispatch(td, &bio_list_on_stack); - - if (nr_disp) - throtl_log(td, "bios disp=%u", nr_disp); - - throtl_schedule_next_dispatch(td); -out: + spin_lock_irq(q->queue_lock); + for (rw = READ; rw <= WRITE; rw++) + while ((bio = throtl_pop_queued(&td_sq->queued[rw], NULL))) + bio_list_add(&bio_list_on_stack, bio); spin_unlock_irq(q->queue_lock); - /* - * If we dispatched some requests, unplug the queue to make sure - * immediate dispatch - */ - if (nr_disp) { + if (!bio_list_empty(&bio_list_on_stack)) { blk_start_plug(&plug); while((bio = bio_list_pop(&bio_list_on_stack))) generic_make_request(bio); blk_finish_plug(&plug); } - return nr_disp; -} - -void blk_throtl_work(struct work_struct *work) -{ - struct throtl_data *td = container_of(work, struct throtl_data, - throtl_work.work); - struct request_queue *q = td->queue; - - throtl_dispatch(q); -} - -/* Call with queue lock held */ -static void -throtl_schedule_delayed_work(struct throtl_data *td, unsigned long delay) -{ - - struct delayed_work *dwork = &td->throtl_work; - - /* schedule work if limits changed even if no bio is queued */ - if (total_nr_queued(td) || td->limits_changed) { - mod_delayed_work(kthrotld_workqueue, dwork, delay); - throtl_log(td, "schedule work. delay=%lu jiffies=%lu", - delay, jiffies); - } } static u64 tg_prfill_cpu_rwstat(struct seq_file *sf, @@ -1007,7 +1347,9 @@ static int tg_set_conf(struct cgroup *cgrp, struct cftype *cft, const char *buf, struct blkcg *blkcg = cgroup_to_blkcg(cgrp); struct blkg_conf_ctx ctx; struct throtl_grp *tg; - struct throtl_data *td; + struct throtl_service_queue *sq; + struct blkcg_gq *blkg; + struct cgroup *pos_cgrp; int ret; ret = blkg_conf_prep(blkcg, &blkcg_policy_throtl, buf, &ctx); @@ -1015,7 +1357,7 @@ static int tg_set_conf(struct cgroup *cgrp, struct cftype *cft, const char *buf, return ret; tg = blkg_to_tg(ctx.blkg); - td = ctx.blkg->q->td; + sq = &tg->service_queue; if (!ctx.v) ctx.v = -1; @@ -1025,10 +1367,37 @@ static int tg_set_conf(struct cgroup *cgrp, struct cftype *cft, const char *buf, else *(unsigned int *)((void *)tg + cft->private) = ctx.v; - /* XXX: we don't need the following deferred processing */ - xchg(&tg->limits_changed, true); - xchg(&td->limits_changed, true); - throtl_schedule_delayed_work(td, 0); + throtl_log(&tg->service_queue, + "limit change rbps=%llu wbps=%llu riops=%u wiops=%u", + tg->bps[READ], tg->bps[WRITE], + tg->iops[READ], tg->iops[WRITE]); + + /* + * Update has_rules[] flags for the updated tg's subtree. A tg is + * considered to have rules if either the tg itself or any of its + * ancestors has rules. This identifies groups without any + * restrictions in the whole hierarchy and allows them to bypass + * blk-throttle. + */ + tg_update_has_rules(tg); + blkg_for_each_descendant_pre(blkg, pos_cgrp, ctx.blkg) + tg_update_has_rules(blkg_to_tg(blkg)); + + /* + * We're already holding queue_lock and know @tg is valid. Let's + * apply the new config directly. + * + * Restart the slices for both READ and WRITES. It might happen + * that a group's limit are dropped suddenly and we don't want to + * account recently dispatched IO with new low rate. + */ + throtl_start_new_slice(tg, 0); + throtl_start_new_slice(tg, 1); + + if (tg->flags & THROTL_TG_PENDING) { + tg_update_disptime(tg); + throtl_schedule_next_dispatch(sq->parent_sq, true); + } blkg_conf_finish(&ctx); return 0; @@ -1092,7 +1461,7 @@ static void throtl_shutdown_wq(struct request_queue *q) { struct throtl_data *td = q->td; - cancel_delayed_work_sync(&td->throtl_work); + cancel_work_sync(&td->dispatch_work); } static struct blkcg_policy blkcg_policy_throtl = { @@ -1100,6 +1469,7 @@ static struct blkcg_policy blkcg_policy_throtl = { .cftypes = throtl_files, .pd_init_fn = throtl_pd_init, + .pd_online_fn = throtl_pd_online, .pd_exit_fn = throtl_pd_exit, .pd_reset_stats_fn = throtl_pd_reset_stats, }; @@ -1107,15 +1477,16 @@ static struct blkcg_policy blkcg_policy_throtl = { bool blk_throtl_bio(struct request_queue *q, struct bio *bio) { struct throtl_data *td = q->td; + struct throtl_qnode *qn = NULL; struct throtl_grp *tg; - bool rw = bio_data_dir(bio), update_disptime = true; + struct throtl_service_queue *sq; + bool rw = bio_data_dir(bio); struct blkcg *blkcg; bool throttled = false; - if (bio->bi_rw & REQ_THROTTLED) { - bio->bi_rw &= ~REQ_THROTTLED; + /* see throtl_charge_bio() */ + if (bio->bi_rw & REQ_THROTTLED) goto out; - } /* * A throtl_grp pointer retrieved under rcu can be used to access @@ -1126,7 +1497,7 @@ bool blk_throtl_bio(struct request_queue *q, struct bio *bio) blkcg = bio_blkcg(bio); tg = throtl_lookup_tg(td, blkcg); if (tg) { - if (tg_no_rule_group(tg, rw)) { + if (!tg->has_rules[rw]) { throtl_update_dispatch_stats(tg_to_blkg(tg), bio->bi_size, bio->bi_rw); goto out_unlock_rcu; @@ -1142,18 +1513,18 @@ bool blk_throtl_bio(struct request_queue *q, struct bio *bio) if (unlikely(!tg)) goto out_unlock; - if (tg->nr_queued[rw]) { - /* - * There is already another bio queued in same dir. No - * need to update dispatch time. - */ - update_disptime = false; - goto queue_bio; + sq = &tg->service_queue; - } + while (true) { + /* throtl is FIFO - if bios are already queued, should queue */ + if (sq->nr_queued[rw]) + break; + + /* if above limits, break to queue */ + if (!tg_may_dispatch(tg, bio, NULL)) + break; - /* Bio is with-in rate limit of group */ - if (tg_may_dispatch(td, tg, bio, NULL)) { + /* within limits, let's charge and dispatch directly */ throtl_charge_bio(tg, bio); /* @@ -1167,25 +1538,41 @@ bool blk_throtl_bio(struct request_queue *q, struct bio *bio) * * So keep on trimming slice even if bio is not queued. */ - throtl_trim_slice(td, tg, rw); - goto out_unlock; + throtl_trim_slice(tg, rw); + + /* + * @bio passed through this layer without being throttled. + * Climb up the ladder. If we''re already at the top, it + * can be executed directly. + */ + qn = &tg->qnode_on_parent[rw]; + sq = sq->parent_sq; + tg = sq_to_tg(sq); + if (!tg) + goto out_unlock; } -queue_bio: - throtl_log_tg(td, tg, "[%c] bio. bdisp=%llu sz=%u bps=%llu" - " iodisp=%u iops=%u queued=%d/%d", - rw == READ ? 'R' : 'W', - tg->bytes_disp[rw], bio->bi_size, tg->bps[rw], - tg->io_disp[rw], tg->iops[rw], - tg->nr_queued[READ], tg->nr_queued[WRITE]); + /* out-of-limit, queue to @tg */ + throtl_log(sq, "[%c] bio. bdisp=%llu sz=%u bps=%llu iodisp=%u iops=%u queued=%d/%d", + rw == READ ? 'R' : 'W', + tg->bytes_disp[rw], bio->bi_size, tg->bps[rw], + tg->io_disp[rw], tg->iops[rw], + sq->nr_queued[READ], sq->nr_queued[WRITE]); bio_associate_current(bio); - throtl_add_bio_tg(q->td, tg, bio); + tg->td->nr_queued[rw]++; + throtl_add_bio_tg(bio, qn, tg); throttled = true; - if (update_disptime) { - tg_update_disptime(td, tg); - throtl_schedule_next_dispatch(td); + /* + * Update @tg's dispatch time and force schedule dispatch if @tg + * was empty before @bio. The forced scheduling isn't likely to + * cause undue delay as @bio is likely to be dispatched directly if + * its @tg's disptime is not in the future. + */ + if (tg->flags & THROTL_TG_WAS_EMPTY) { + tg_update_disptime(tg); + throtl_schedule_next_dispatch(tg->service_queue.parent_sq, true); } out_unlock: @@ -1193,9 +1580,38 @@ out_unlock: out_unlock_rcu: rcu_read_unlock(); out: + /* + * As multiple blk-throtls may stack in the same issue path, we + * don't want bios to leave with the flag set. Clear the flag if + * being issued. + */ + if (!throttled) + bio->bi_rw &= ~REQ_THROTTLED; return throttled; } +/* + * Dispatch all bios from all children tg's queued on @parent_sq. On + * return, @parent_sq is guaranteed to not have any active children tg's + * and all bios from previously active tg's are on @parent_sq->bio_lists[]. + */ +static void tg_drain_bios(struct throtl_service_queue *parent_sq) +{ + struct throtl_grp *tg; + + while ((tg = throtl_rb_first(parent_sq))) { + struct throtl_service_queue *sq = &tg->service_queue; + struct bio *bio; + + throtl_dequeue_tg(tg); + + while ((bio = throtl_peek_queued(&sq->queued[READ]))) + tg_dispatch_one_bio(tg, bio_data_dir(bio)); + while ((bio = throtl_peek_queued(&sq->queued[WRITE]))) + tg_dispatch_one_bio(tg, bio_data_dir(bio)); + } +} + /** * blk_throtl_drain - drain throttled bios * @q: request_queue to drain throttled bios for @@ -1206,27 +1622,36 @@ void blk_throtl_drain(struct request_queue *q) __releases(q->queue_lock) __acquires(q->queue_lock) { struct throtl_data *td = q->td; - struct throtl_rb_root *st = &td->tg_service_tree; - struct throtl_grp *tg; - struct bio_list bl; + struct blkcg_gq *blkg; + struct cgroup *pos_cgrp; struct bio *bio; + int rw; queue_lockdep_assert_held(q); + rcu_read_lock(); + + /* + * Drain each tg while doing post-order walk on the blkg tree, so + * that all bios are propagated to td->service_queue. It'd be + * better to walk service_queue tree directly but blkg walk is + * easier. + */ + blkg_for_each_descendant_post(blkg, pos_cgrp, td->queue->root_blkg) + tg_drain_bios(&blkg_to_tg(blkg)->service_queue); - bio_list_init(&bl); + tg_drain_bios(&td_root_tg(td)->service_queue); - while ((tg = throtl_rb_first(st))) { - throtl_dequeue_tg(td, tg); + /* finally, transfer bios from top-level tg's into the td */ + tg_drain_bios(&td->service_queue); - while ((bio = bio_list_peek(&tg->bio_lists[READ]))) - tg_dispatch_one_bio(td, tg, bio_data_dir(bio), &bl); - while ((bio = bio_list_peek(&tg->bio_lists[WRITE]))) - tg_dispatch_one_bio(td, tg, bio_data_dir(bio), &bl); - } + rcu_read_unlock(); spin_unlock_irq(q->queue_lock); - while ((bio = bio_list_pop(&bl))) - generic_make_request(bio); + /* all bios now should be in td->service_queue, issue them */ + for (rw = READ; rw <= WRITE; rw++) + while ((bio = throtl_pop_queued(&td->service_queue.queued[rw], + NULL))) + generic_make_request(bio); spin_lock_irq(q->queue_lock); } @@ -1240,9 +1665,8 @@ int blk_throtl_init(struct request_queue *q) if (!td) return -ENOMEM; - td->tg_service_tree = THROTL_RB_ROOT; - td->limits_changed = false; - INIT_DELAYED_WORK(&td->throtl_work, blk_throtl_work); + INIT_WORK(&td->dispatch_work, blk_throtl_dispatch_work_fn); + throtl_service_queue_init(&td->service_queue, NULL); q->td = td; td->queue = q; diff --git a/block/cfq-iosched.c b/block/cfq-iosched.c index d5cd313..d5bbdcf 100644 --- a/block/cfq-iosched.c +++ b/block/cfq-iosched.c @@ -4347,18 +4347,28 @@ static void cfq_exit_queue(struct elevator_queue *e) kfree(cfqd); } -static int cfq_init_queue(struct request_queue *q) +static int cfq_init_queue(struct request_queue *q, struct elevator_type *e) { struct cfq_data *cfqd; struct blkcg_gq *blkg __maybe_unused; int i, ret; + struct elevator_queue *eq; + + eq = elevator_alloc(q, e); + if (!eq) + return -ENOMEM; cfqd = kmalloc_node(sizeof(*cfqd), GFP_KERNEL | __GFP_ZERO, q->node); - if (!cfqd) + if (!cfqd) { + kobject_put(&eq->kobj); return -ENOMEM; + } + eq->elevator_data = cfqd; cfqd->queue = q; - q->elevator->elevator_data = cfqd; + spin_lock_irq(q->queue_lock); + q->elevator = eq; + spin_unlock_irq(q->queue_lock); /* Init root service tree */ cfqd->grp_service_tree = CFQ_RB_ROOT; @@ -4433,6 +4443,7 @@ static int cfq_init_queue(struct request_queue *q) out_free: kfree(cfqd); + kobject_put(&eq->kobj); return ret; } diff --git a/block/deadline-iosched.c b/block/deadline-iosched.c index ba19a3a..20614a3 100644 --- a/block/deadline-iosched.c +++ b/block/deadline-iosched.c @@ -337,13 +337,21 @@ static void deadline_exit_queue(struct elevator_queue *e) /* * initialize elevator private data (deadline_data). */ -static int deadline_init_queue(struct request_queue *q) +static int deadline_init_queue(struct request_queue *q, struct elevator_type *e) { struct deadline_data *dd; + struct elevator_queue *eq; + + eq = elevator_alloc(q, e); + if (!eq) + return -ENOMEM; dd = kmalloc_node(sizeof(*dd), GFP_KERNEL | __GFP_ZERO, q->node); - if (!dd) + if (!dd) { + kobject_put(&eq->kobj); return -ENOMEM; + } + eq->elevator_data = dd; INIT_LIST_HEAD(&dd->fifo_list[READ]); INIT_LIST_HEAD(&dd->fifo_list[WRITE]); @@ -355,7 +363,9 @@ static int deadline_init_queue(struct request_queue *q) dd->front_merges = 1; dd->fifo_batch = fifo_batch; - q->elevator->elevator_data = dd; + spin_lock_irq(q->queue_lock); + q->elevator = eq; + spin_unlock_irq(q->queue_lock); return 0; } diff --git a/block/elevator.c b/block/elevator.c index eba5b04..668394d 100644 --- a/block/elevator.c +++ b/block/elevator.c @@ -150,7 +150,7 @@ void __init load_default_elevator_module(void) static struct kobj_type elv_ktype; -static struct elevator_queue *elevator_alloc(struct request_queue *q, +struct elevator_queue *elevator_alloc(struct request_queue *q, struct elevator_type *e) { struct elevator_queue *eq; @@ -170,6 +170,7 @@ err: elevator_put(e); return NULL; } +EXPORT_SYMBOL(elevator_alloc); static void elevator_release(struct kobject *kobj) { @@ -221,16 +222,7 @@ int elevator_init(struct request_queue *q, char *name) } } - q->elevator = elevator_alloc(q, e); - if (!q->elevator) - return -ENOMEM; - - err = e->ops.elevator_init_fn(q); - if (err) { - kobject_put(&q->elevator->kobj); - return err; - } - + err = e->ops.elevator_init_fn(q, e); return 0; } EXPORT_SYMBOL(elevator_init); @@ -935,16 +927,9 @@ static int elevator_switch(struct request_queue *q, struct elevator_type *new_e) spin_unlock_irq(q->queue_lock); /* allocate, init and register new elevator */ - err = -ENOMEM; - q->elevator = elevator_alloc(q, new_e); - if (!q->elevator) - goto fail_init; - - err = new_e->ops.elevator_init_fn(q); - if (err) { - kobject_put(&q->elevator->kobj); + err = new_e->ops.elevator_init_fn(q, new_e); + if (err) goto fail_init; - } if (registered) { err = elv_register_queue(q); diff --git a/block/noop-iosched.c b/block/noop-iosched.c index 5d1bf70..3de89d4 100644 --- a/block/noop-iosched.c +++ b/block/noop-iosched.c @@ -59,16 +59,27 @@ noop_latter_request(struct request_queue *q, struct request *rq) return list_entry(rq->queuelist.next, struct request, queuelist); } -static int noop_init_queue(struct request_queue *q) +static int noop_init_queue(struct request_queue *q, struct elevator_type *e) { struct noop_data *nd; + struct elevator_queue *eq; + + eq = elevator_alloc(q, e); + if (!eq) + return -ENOMEM; nd = kmalloc_node(sizeof(*nd), GFP_KERNEL, q->node); - if (!nd) + if (!nd) { + kobject_put(&eq->kobj); return -ENOMEM; + } + eq->elevator_data = nd; INIT_LIST_HEAD(&nd->queue); - q->elevator->elevator_data = nd; + + spin_lock_irq(q->queue_lock); + q->elevator = eq; + spin_unlock_irq(q->queue_lock); return 0; } diff --git a/drivers/acpi/device_pm.c b/drivers/acpi/device_pm.c index e9e8bb2..4ab807d 100644 --- a/drivers/acpi/device_pm.c +++ b/drivers/acpi/device_pm.c @@ -324,14 +324,27 @@ int acpi_bus_update_power(acpi_handle handle, int *state_p) if (result) return result; - if (state == ACPI_STATE_UNKNOWN) + if (state == ACPI_STATE_UNKNOWN) { state = ACPI_STATE_D0; - - result = acpi_device_set_power(device, state); - if (!result && state_p) + result = acpi_device_set_power(device, state); + if (result) + return result; + } else { + if (device->power.flags.power_resources) { + /* + * We don't need to really switch the state, bu we need + * to update the power resources' reference counters. + */ + result = acpi_power_transition(device, state); + if (result) + return result; + } + device->power.state = state; + } + if (state_p) *state_p = state; - return result; + return 0; } EXPORT_SYMBOL_GPL(acpi_bus_update_power); diff --git a/drivers/acpi/dock.c b/drivers/acpi/dock.c index 14de9f4..8265607 100644 --- a/drivers/acpi/dock.c +++ b/drivers/acpi/dock.c @@ -1064,10 +1064,10 @@ find_dock_and_bay(acpi_handle handle, u32 lvl, void *context, void **rv) return AE_OK; } -int __init acpi_dock_init(void) +void __init acpi_dock_init(void) { if (acpi_disabled) - return 0; + return; /* look for dock stations and bays */ acpi_walk_namespace(ACPI_TYPE_DEVICE, ACPI_ROOT_OBJECT, @@ -1075,11 +1075,10 @@ int __init acpi_dock_init(void) if (!dock_station_count) { pr_info(PREFIX "No dock devices found.\n"); - return 0; + return; } register_acpi_bus_notifier(&dock_acpi_notifier); pr_info(PREFIX "%s: %d docks/bays found\n", ACPI_DOCK_DRIVER_DESCRIPTION, dock_station_count); - return 0; } diff --git a/drivers/acpi/fan.c b/drivers/acpi/fan.c index 8d1c010..5b02a0a 100644 --- a/drivers/acpi/fan.c +++ b/drivers/acpi/fan.c @@ -84,7 +84,7 @@ static int fan_get_cur_state(struct thermal_cooling_device *cdev, unsigned long { struct acpi_device *device = cdev->devdata; int result; - int acpi_state; + int acpi_state = ACPI_STATE_D0; if (!device) return -EINVAL; diff --git a/drivers/acpi/power.c b/drivers/acpi/power.c index 288bb27..5c28c89 100644 --- a/drivers/acpi/power.c +++ b/drivers/acpi/power.c @@ -279,7 +279,7 @@ static int acpi_power_on_unlocked(struct acpi_power_resource *resource) if (resource->ref_count++) { ACPI_DEBUG_PRINT((ACPI_DB_INFO, - "Power resource [%s] already on", + "Power resource [%s] already on\n", resource->name)); } else { result = __acpi_power_on(resource); @@ -325,7 +325,7 @@ static int acpi_power_off_unlocked(struct acpi_power_resource *resource) if (!resource->ref_count) { ACPI_DEBUG_PRINT((ACPI_DB_INFO, - "Power resource [%s] already off", + "Power resource [%s] already off\n", resource->name)); return 0; } diff --git a/drivers/acpi/scan.c b/drivers/acpi/scan.c index dfe76f1..1098557 100644 --- a/drivers/acpi/scan.c +++ b/drivers/acpi/scan.c @@ -35,7 +35,6 @@ bool acpi_force_hot_remove; static const char *dummy_hid = "device"; -static LIST_HEAD(acpi_device_list); static LIST_HEAD(acpi_bus_id_list); static DEFINE_MUTEX(acpi_scan_lock); static LIST_HEAD(acpi_scan_handlers_list); diff --git a/drivers/cpufreq/cpufreq.c b/drivers/cpufreq/cpufreq.c index 6a015ad..0937b8d 100644 --- a/drivers/cpufreq/cpufreq.c +++ b/drivers/cpufreq/cpufreq.c @@ -312,11 +312,12 @@ static void __cpufreq_notify_transition(struct cpufreq_policy *policy, switch (state) { case CPUFREQ_PRECHANGE: - if (WARN(policy->transition_ongoing, + if (WARN(policy->transition_ongoing == + cpumask_weight(policy->cpus), "In middle of another frequency transition\n")) return; - policy->transition_ongoing = true; + policy->transition_ongoing++; /* detect if the driver reported a value as "old frequency" * which is not equal to what the cpufreq core thinks is @@ -341,7 +342,7 @@ static void __cpufreq_notify_transition(struct cpufreq_policy *policy, "No frequency transition in progress\n")) return; - policy->transition_ongoing = false; + policy->transition_ongoing--; adjust_jiffies(CPUFREQ_POSTCHANGE, freqs); pr_debug("FREQ: %lu - CPU: %lu", (unsigned long)freqs->new, diff --git a/drivers/infiniband/ulp/isert/ib_isert.c b/drivers/infiniband/ulp/isert/ib_isert.c index 2693129..3f62041 100644 --- a/drivers/infiniband/ulp/isert/ib_isert.c +++ b/drivers/infiniband/ulp/isert/ib_isert.c @@ -388,6 +388,7 @@ isert_connect_request(struct rdma_cm_id *cma_id, struct rdma_cm_event *event) init_waitqueue_head(&isert_conn->conn_wait_comp_err); kref_init(&isert_conn->conn_kref); kref_get(&isert_conn->conn_kref); + mutex_init(&isert_conn->conn_mutex); cma_id->context = isert_conn; isert_conn->conn_cm_id = cma_id; @@ -540,15 +541,32 @@ isert_disconnect_work(struct work_struct *work) struct isert_conn, conn_logout_work); pr_debug("isert_disconnect_work(): >>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>\n"); - + mutex_lock(&isert_conn->conn_mutex); isert_conn->state = ISER_CONN_DOWN; if (isert_conn->post_recv_buf_count == 0 && atomic_read(&isert_conn->post_send_buf_count) == 0) { pr_debug("Calling wake_up(&isert_conn->conn_wait);\n"); - wake_up(&isert_conn->conn_wait); + mutex_unlock(&isert_conn->conn_mutex); + goto wake_up; + } + if (!isert_conn->conn_cm_id) { + mutex_unlock(&isert_conn->conn_mutex); + isert_put_conn(isert_conn); + return; } + if (!isert_conn->logout_posted) { + pr_debug("Calling rdma_disconnect for !logout_posted from" + " isert_disconnect_work\n"); + rdma_disconnect(isert_conn->conn_cm_id); + mutex_unlock(&isert_conn->conn_mutex); + iscsit_cause_connection_reinstatement(isert_conn->conn, 0); + goto wake_up; + } + mutex_unlock(&isert_conn->conn_mutex); +wake_up: + wake_up(&isert_conn->conn_wait); isert_put_conn(isert_conn); } @@ -934,16 +952,11 @@ isert_handle_scsi_cmd(struct isert_conn *isert_conn, } sequence_cmd: - rc = iscsit_sequence_cmd(conn, cmd, hdr->cmdsn); + rc = iscsit_sequence_cmd(conn, cmd, buf, hdr->cmdsn); if (!rc && dump_payload == false && unsol_data) iscsit_set_unsoliticed_dataout(cmd); - if (rc == CMDSN_ERROR_CANNOT_RECOVER) - return iscsit_add_reject_from_cmd( - ISCSI_REASON_PROTOCOL_ERROR, - 1, 0, (unsigned char *)hdr, cmd); - return 0; } @@ -1001,17 +1014,71 @@ isert_handle_iscsi_dataout(struct isert_conn *isert_conn, } static int +isert_handle_nop_out(struct isert_conn *isert_conn, struct isert_cmd *isert_cmd, + struct iser_rx_desc *rx_desc, unsigned char *buf) +{ + struct iscsi_cmd *cmd = &isert_cmd->iscsi_cmd; + struct iscsi_conn *conn = isert_conn->conn; + struct iscsi_nopout *hdr = (struct iscsi_nopout *)buf; + int rc; + + rc = iscsit_setup_nop_out(conn, cmd, hdr); + if (rc < 0) + return rc; + /* + * FIXME: Add support for NOPOUT payload using unsolicited RDMA payload + */ + + return iscsit_process_nop_out(conn, cmd, hdr); +} + +static int +isert_handle_text_cmd(struct isert_conn *isert_conn, struct isert_cmd *isert_cmd, + struct iser_rx_desc *rx_desc, struct iscsi_text *hdr) +{ + struct iscsi_cmd *cmd = &isert_cmd->iscsi_cmd; + struct iscsi_conn *conn = isert_conn->conn; + u32 payload_length = ntoh24(hdr->dlength); + int rc; + unsigned char *text_in; + + rc = iscsit_setup_text_cmd(conn, cmd, hdr); + if (rc < 0) + return rc; + + text_in = kzalloc(payload_length, GFP_KERNEL); + if (!text_in) { + pr_err("Unable to allocate text_in of payload_length: %u\n", + payload_length); + return -ENOMEM; + } + cmd->text_in_ptr = text_in; + + memcpy(cmd->text_in_ptr, &rx_desc->data[0], payload_length); + + return iscsit_process_text_cmd(conn, cmd, hdr); +} + +static int isert_rx_opcode(struct isert_conn *isert_conn, struct iser_rx_desc *rx_desc, uint32_t read_stag, uint64_t read_va, uint32_t write_stag, uint64_t write_va) { struct iscsi_hdr *hdr = &rx_desc->iscsi_header; struct iscsi_conn *conn = isert_conn->conn; + struct iscsi_session *sess = conn->sess; struct iscsi_cmd *cmd; struct isert_cmd *isert_cmd; int ret = -EINVAL; u8 opcode = (hdr->opcode & ISCSI_OPCODE_MASK); + if (sess->sess_ops->SessionType && + (!(opcode & ISCSI_OP_TEXT) || !(opcode & ISCSI_OP_LOGOUT))) { + pr_err("Got illegal opcode: 0x%02x in SessionType=Discovery," + " ignoring\n", opcode); + return 0; + } + switch (opcode) { case ISCSI_OP_SCSI_CMD: cmd = iscsit_allocate_cmd(conn, GFP_KERNEL); @@ -1032,7 +1099,9 @@ isert_rx_opcode(struct isert_conn *isert_conn, struct iser_rx_desc *rx_desc, if (!cmd) break; - ret = iscsit_handle_nop_out(conn, cmd, (unsigned char *)hdr); + isert_cmd = container_of(cmd, struct isert_cmd, iscsi_cmd); + ret = isert_handle_nop_out(isert_conn, isert_cmd, + rx_desc, (unsigned char *)hdr); break; case ISCSI_OP_SCSI_DATA_OUT: ret = isert_handle_iscsi_dataout(isert_conn, rx_desc, @@ -1057,6 +1126,15 @@ isert_rx_opcode(struct isert_conn *isert_conn, struct iser_rx_desc *rx_desc, SECONDS_FOR_LOGOUT_COMP * HZ); break; + case ISCSI_OP_TEXT: + cmd = iscsit_allocate_cmd(conn, GFP_KERNEL); + if (!cmd) + break; + + isert_cmd = container_of(cmd, struct isert_cmd, iscsi_cmd); + ret = isert_handle_text_cmd(isert_conn, isert_cmd, + rx_desc, (struct iscsi_text *)hdr); + break; default: pr_err("Got unknown iSCSI OpCode: 0x%02x\n", opcode); dump_stack(); @@ -1184,14 +1262,12 @@ isert_put_cmd(struct isert_cmd *isert_cmd) { struct iscsi_cmd *cmd = &isert_cmd->iscsi_cmd; struct isert_conn *isert_conn = isert_cmd->conn; - struct iscsi_conn *conn; + struct iscsi_conn *conn = isert_conn->conn; pr_debug("Entering isert_put_cmd: %p\n", isert_cmd); switch (cmd->iscsi_opcode) { case ISCSI_OP_SCSI_CMD: - conn = isert_conn->conn; - spin_lock_bh(&conn->cmd_lock); if (!list_empty(&cmd->i_conn_node)) list_del(&cmd->i_conn_node); @@ -1201,16 +1277,19 @@ isert_put_cmd(struct isert_cmd *isert_cmd) iscsit_stop_dataout_timer(cmd); isert_unmap_cmd(isert_cmd, isert_conn); - /* - * Fall-through - */ + transport_generic_free_cmd(&cmd->se_cmd, 0); + break; case ISCSI_OP_SCSI_TMFUNC: + spin_lock_bh(&conn->cmd_lock); + if (!list_empty(&cmd->i_conn_node)) + list_del(&cmd->i_conn_node); + spin_unlock_bh(&conn->cmd_lock); + transport_generic_free_cmd(&cmd->se_cmd, 0); break; case ISCSI_OP_REJECT: case ISCSI_OP_NOOP_OUT: - conn = isert_conn->conn; - + case ISCSI_OP_TEXT: spin_lock_bh(&conn->cmd_lock); if (!list_empty(&cmd->i_conn_node)) list_del(&cmd->i_conn_node); @@ -1222,6 +1301,9 @@ isert_put_cmd(struct isert_cmd *isert_cmd) * associated cmd->se_cmd needs to be released. */ if (cmd->se_cmd.se_tfo != NULL) { + pr_debug("Calling transport_generic_free_cmd from" + " isert_put_cmd for 0x%02x\n", + cmd->iscsi_opcode); transport_generic_free_cmd(&cmd->se_cmd, 0); break; } @@ -1249,11 +1331,11 @@ static void isert_completion_put(struct iser_tx_desc *tx_desc, struct isert_cmd *isert_cmd, struct ib_device *ib_dev) { - if (isert_cmd->sense_buf_dma != 0) { - pr_debug("Calling ib_dma_unmap_single for isert_cmd->sense_buf_dma\n"); - ib_dma_unmap_single(ib_dev, isert_cmd->sense_buf_dma, - isert_cmd->sense_buf_len, DMA_TO_DEVICE); - isert_cmd->sense_buf_dma = 0; + if (isert_cmd->pdu_buf_dma != 0) { + pr_debug("Calling ib_dma_unmap_single for isert_cmd->pdu_buf_dma\n"); + ib_dma_unmap_single(ib_dev, isert_cmd->pdu_buf_dma, + isert_cmd->pdu_buf_len, DMA_TO_DEVICE); + isert_cmd->pdu_buf_dma = 0; } isert_unmap_tx_desc(tx_desc, ib_dev); @@ -1318,8 +1400,8 @@ isert_do_control_comp(struct work_struct *work) atomic_dec(&isert_conn->post_send_buf_count); cmd->i_state = ISTATE_SENT_STATUS; - complete(&cmd->reject_comp); isert_completion_put(&isert_cmd->tx_desc, isert_cmd, ib_dev); + break; case ISTATE_SEND_LOGOUTRSP: pr_debug("Calling iscsit_logout_post_handler >>>>>>>>>>>>>>\n"); /* @@ -1329,6 +1411,11 @@ isert_do_control_comp(struct work_struct *work) isert_conn->logout_posted = true; iscsit_logout_post_handler(cmd, cmd->conn); break; + case ISTATE_SEND_TEXTRSP: + atomic_dec(&isert_conn->post_send_buf_count); + cmd->i_state = ISTATE_SENT_STATUS; + isert_completion_put(&isert_cmd->tx_desc, isert_cmd, ib_dev); + break; default: pr_err("Unknown do_control_comp i_state %d\n", cmd->i_state); dump_stack(); @@ -1345,7 +1432,9 @@ isert_response_completion(struct iser_tx_desc *tx_desc, struct iscsi_cmd *cmd = &isert_cmd->iscsi_cmd; if (cmd->i_state == ISTATE_SEND_TASKMGTRSP || - cmd->i_state == ISTATE_SEND_LOGOUTRSP) { + cmd->i_state == ISTATE_SEND_LOGOUTRSP || + cmd->i_state == ISTATE_SEND_REJECT || + cmd->i_state == ISTATE_SEND_TEXTRSP) { isert_unmap_tx_desc(tx_desc, ib_dev); INIT_WORK(&isert_cmd->comp_work, isert_do_control_comp); @@ -1419,7 +1508,11 @@ isert_cq_comp_err(struct iser_tx_desc *tx_desc, struct isert_conn *isert_conn) pr_debug("isert_cq_comp_err >>>>>>>>>>>>>>>>>>>>>>>>>>>>>>\n"); pr_debug("Calling wake_up from isert_cq_comp_err\n"); - isert_conn->state = ISER_CONN_TERMINATING; + mutex_lock(&isert_conn->conn_mutex); + if (isert_conn->state != ISER_CONN_DOWN) + isert_conn->state = ISER_CONN_TERMINATING; + mutex_unlock(&isert_conn->conn_mutex); + wake_up(&isert_conn->conn_wait_comp_err); } } @@ -1445,6 +1538,7 @@ isert_cq_tx_work(struct work_struct *work) } else { pr_debug("TX wc.status != IB_WC_SUCCESS >>>>>>>>>>>>>>\n"); pr_debug("TX wc.status: 0x%08x\n", wc.status); + pr_debug("TX wc.vendor_err: 0x%08x\n", wc.vendor_err); atomic_dec(&isert_conn->post_send_buf_count); isert_cq_comp_err(tx_desc, isert_conn); } @@ -1484,9 +1578,11 @@ isert_cq_rx_work(struct work_struct *work) isert_rx_completion(rx_desc, isert_conn, xfer_len); } else { pr_debug("RX wc.status != IB_WC_SUCCESS >>>>>>>>>>>>>>\n"); - if (wc.status != IB_WC_WR_FLUSH_ERR) + if (wc.status != IB_WC_WR_FLUSH_ERR) { pr_debug("RX wc.status: 0x%08x\n", wc.status); - + pr_debug("RX wc.vendor_err: 0x%08x\n", + wc.vendor_err); + } isert_conn->post_recv_buf_count--; isert_cq_comp_err(NULL, isert_conn); } @@ -1543,7 +1639,7 @@ isert_put_response(struct iscsi_conn *conn, struct iscsi_cmd *cmd) (cmd->se_cmd.se_cmd_flags & SCF_EMULATED_TASK_SENSE))) { struct ib_device *ib_dev = isert_conn->conn_cm_id->device; struct ib_sge *tx_dsg = &isert_cmd->tx_desc.tx_sg[1]; - u32 padding, sense_len; + u32 padding, pdu_len; put_unaligned_be16(cmd->se_cmd.scsi_sense_length, cmd->sense_buffer); @@ -1551,15 +1647,15 @@ isert_put_response(struct iscsi_conn *conn, struct iscsi_cmd *cmd) padding = -(cmd->se_cmd.scsi_sense_length) & 3; hton24(hdr->dlength, (u32)cmd->se_cmd.scsi_sense_length); - sense_len = cmd->se_cmd.scsi_sense_length + padding; + pdu_len = cmd->se_cmd.scsi_sense_length + padding; - isert_cmd->sense_buf_dma = ib_dma_map_single(ib_dev, - (void *)cmd->sense_buffer, sense_len, + isert_cmd->pdu_buf_dma = ib_dma_map_single(ib_dev, + (void *)cmd->sense_buffer, pdu_len, DMA_TO_DEVICE); - isert_cmd->sense_buf_len = sense_len; - tx_dsg->addr = isert_cmd->sense_buf_dma; - tx_dsg->length = sense_len; + isert_cmd->pdu_buf_len = pdu_len; + tx_dsg->addr = isert_cmd->pdu_buf_dma; + tx_dsg->length = pdu_len; tx_dsg->lkey = isert_conn->conn_mr->lkey; isert_cmd->tx_desc.num_sge = 2; } @@ -1637,11 +1733,25 @@ isert_put_reject(struct iscsi_cmd *cmd, struct iscsi_conn *conn) struct isert_cmd, iscsi_cmd); struct isert_conn *isert_conn = (struct isert_conn *)conn->context; struct ib_send_wr *send_wr = &isert_cmd->tx_desc.send_wr; + struct ib_device *ib_dev = isert_conn->conn_cm_id->device; + struct ib_sge *tx_dsg = &isert_cmd->tx_desc.tx_sg[1]; + struct iscsi_reject *hdr = + (struct iscsi_reject *)&isert_cmd->tx_desc.iscsi_header; isert_create_send_desc(isert_conn, isert_cmd, &isert_cmd->tx_desc); - iscsit_build_reject(cmd, conn, (struct iscsi_reject *) - &isert_cmd->tx_desc.iscsi_header); + iscsit_build_reject(cmd, conn, hdr); isert_init_tx_hdrs(isert_conn, &isert_cmd->tx_desc); + + hton24(hdr->dlength, ISCSI_HDR_LEN); + isert_cmd->pdu_buf_dma = ib_dma_map_single(ib_dev, + (void *)cmd->buf_ptr, ISCSI_HDR_LEN, + DMA_TO_DEVICE); + isert_cmd->pdu_buf_len = ISCSI_HDR_LEN; + tx_dsg->addr = isert_cmd->pdu_buf_dma; + tx_dsg->length = ISCSI_HDR_LEN; + tx_dsg->lkey = isert_conn->conn_mr->lkey; + isert_cmd->tx_desc.num_sge = 2; + isert_init_send_wr(isert_cmd, send_wr); pr_debug("Posting Reject IB_WR_SEND >>>>>>>>>>>>>>>>>>>>>>\n"); @@ -1650,6 +1760,47 @@ isert_put_reject(struct iscsi_cmd *cmd, struct iscsi_conn *conn) } static int +isert_put_text_rsp(struct iscsi_cmd *cmd, struct iscsi_conn *conn) +{ + struct isert_cmd *isert_cmd = container_of(cmd, + struct isert_cmd, iscsi_cmd); + struct isert_conn *isert_conn = (struct isert_conn *)conn->context; + struct ib_send_wr *send_wr = &isert_cmd->tx_desc.send_wr; + struct iscsi_text_rsp *hdr = + (struct iscsi_text_rsp *)&isert_cmd->tx_desc.iscsi_header; + u32 txt_rsp_len; + int rc; + + isert_create_send_desc(isert_conn, isert_cmd, &isert_cmd->tx_desc); + rc = iscsit_build_text_rsp(cmd, conn, hdr); + if (rc < 0) + return rc; + + txt_rsp_len = rc; + isert_init_tx_hdrs(isert_conn, &isert_cmd->tx_desc); + + if (txt_rsp_len) { + struct ib_device *ib_dev = isert_conn->conn_cm_id->device; + struct ib_sge *tx_dsg = &isert_cmd->tx_desc.tx_sg[1]; + void *txt_rsp_buf = cmd->buf_ptr; + + isert_cmd->pdu_buf_dma = ib_dma_map_single(ib_dev, + txt_rsp_buf, txt_rsp_len, DMA_TO_DEVICE); + + isert_cmd->pdu_buf_len = txt_rsp_len; + tx_dsg->addr = isert_cmd->pdu_buf_dma; + tx_dsg->length = txt_rsp_len; + tx_dsg->lkey = isert_conn->conn_mr->lkey; + isert_cmd->tx_desc.num_sge = 2; + } + isert_init_send_wr(isert_cmd, send_wr); + + pr_debug("Posting Text Response IB_WR_SEND >>>>>>>>>>>>>>>>>>>>>>\n"); + + return isert_post_response(isert_conn, isert_cmd); +} + +static int isert_build_rdma_wr(struct isert_conn *isert_conn, struct isert_cmd *isert_cmd, struct ib_sge *ib_sge, struct ib_send_wr *send_wr, u32 data_left, u32 offset) @@ -1947,6 +2098,9 @@ isert_response_queue(struct iscsi_conn *conn, struct iscsi_cmd *cmd, int state) case ISTATE_SEND_REJECT: ret = isert_put_reject(cmd, conn); break; + case ISTATE_SEND_TEXTRSP: + ret = isert_put_text_rsp(cmd, conn); + break; case ISTATE_SEND_STATUS: /* * Special case for sending non GOOD SCSI status from TX thread @@ -2175,6 +2329,17 @@ isert_free_np(struct iscsi_np *np) kfree(isert_np); } +static int isert_check_state(struct isert_conn *isert_conn, int state) +{ + int ret; + + mutex_lock(&isert_conn->conn_mutex); + ret = (isert_conn->state == state); + mutex_unlock(&isert_conn->conn_mutex); + + return ret; +} + static void isert_free_conn(struct iscsi_conn *conn) { struct isert_conn *isert_conn = conn->context; @@ -2184,26 +2349,43 @@ static void isert_free_conn(struct iscsi_conn *conn) * Decrement post_send_buf_count for special case when called * from isert_do_control_comp() -> iscsit_logout_post_handler() */ + mutex_lock(&isert_conn->conn_mutex); if (isert_conn->logout_posted) atomic_dec(&isert_conn->post_send_buf_count); - if (isert_conn->conn_cm_id) + if (isert_conn->conn_cm_id && isert_conn->state != ISER_CONN_DOWN) { + pr_debug("Calling rdma_disconnect from isert_free_conn\n"); rdma_disconnect(isert_conn->conn_cm_id); + } /* * Only wait for conn_wait_comp_err if the isert_conn made it * into full feature phase.. */ - if (isert_conn->state > ISER_CONN_INIT) { + if (isert_conn->state == ISER_CONN_UP) { pr_debug("isert_free_conn: Before wait_event comp_err %d\n", isert_conn->state); + mutex_unlock(&isert_conn->conn_mutex); + wait_event(isert_conn->conn_wait_comp_err, - isert_conn->state == ISER_CONN_TERMINATING); - pr_debug("isert_free_conn: After wait_event #1 >>>>>>>>>>>>\n"); + (isert_check_state(isert_conn, ISER_CONN_TERMINATING))); + + wait_event(isert_conn->conn_wait, + (isert_check_state(isert_conn, ISER_CONN_DOWN))); + + isert_put_conn(isert_conn); + return; + } + if (isert_conn->state == ISER_CONN_INIT) { + mutex_unlock(&isert_conn->conn_mutex); + isert_put_conn(isert_conn); + return; } + pr_debug("isert_free_conn: wait_event conn_wait %d\n", + isert_conn->state); + mutex_unlock(&isert_conn->conn_mutex); - pr_debug("isert_free_conn: wait_event conn_wait %d\n", isert_conn->state); - wait_event(isert_conn->conn_wait, isert_conn->state == ISER_CONN_DOWN); - pr_debug("isert_free_conn: After wait_event #2 >>>>>>>>>>>>>>>>>>>>\n"); + wait_event(isert_conn->conn_wait, + (isert_check_state(isert_conn, ISER_CONN_DOWN))); isert_put_conn(isert_conn); } diff --git a/drivers/infiniband/ulp/isert/ib_isert.h b/drivers/infiniband/ulp/isert/ib_isert.h index b104f4c..191117b 100644 --- a/drivers/infiniband/ulp/isert/ib_isert.h +++ b/drivers/infiniband/ulp/isert/ib_isert.h @@ -61,8 +61,8 @@ struct isert_cmd { uint32_t write_stag; uint64_t read_va; uint64_t write_va; - u64 sense_buf_dma; - u32 sense_buf_len; + u64 pdu_buf_dma; + u32 pdu_buf_len; u32 read_va_off; u32 write_va_off; u32 rdma_wr_num; @@ -102,6 +102,7 @@ struct isert_conn { struct ib_qp *conn_qp; struct isert_device *conn_device; struct work_struct conn_logout_work; + struct mutex conn_mutex; wait_queue_head_t conn_wait; wait_queue_head_t conn_wait_comp_err; struct kref conn_kref; diff --git a/drivers/infiniband/ulp/srpt/ib_srpt.c b/drivers/infiniband/ulp/srpt/ib_srpt.c index 3f3f041..653ac6b 100644 --- a/drivers/infiniband/ulp/srpt/ib_srpt.c +++ b/drivers/infiniband/ulp/srpt/ib_srpt.c @@ -3011,7 +3011,7 @@ static u8 tcm_to_srp_tsk_mgmt_status(const int tcm_mgmt_status) * Callback function called by the TCM core. Must not block since it can be * invoked on the context of the IB completion handler. */ -static int srpt_queue_response(struct se_cmd *cmd) +static void srpt_queue_response(struct se_cmd *cmd) { struct srpt_rdma_ch *ch; struct srpt_send_ioctx *ioctx; @@ -3022,8 +3022,6 @@ static int srpt_queue_response(struct se_cmd *cmd) int resp_len; u8 srp_tm_status; - ret = 0; - ioctx = container_of(cmd, struct srpt_send_ioctx, cmd); ch = ioctx->ch; BUG_ON(!ch); @@ -3049,7 +3047,7 @@ static int srpt_queue_response(struct se_cmd *cmd) || WARN_ON_ONCE(state == SRPT_STATE_CMD_RSP_SENT))) { atomic_inc(&ch->req_lim_delta); srpt_abort_cmd(ioctx); - goto out; + return; } dir = ioctx->cmd.data_direction; @@ -3061,7 +3059,7 @@ static int srpt_queue_response(struct se_cmd *cmd) if (ret) { printk(KERN_ERR "xfer_data failed for tag %llu\n", ioctx->tag); - goto out; + return; } } @@ -3082,9 +3080,17 @@ static int srpt_queue_response(struct se_cmd *cmd) srpt_set_cmd_state(ioctx, SRPT_STATE_DONE); target_put_sess_cmd(ioctx->ch->sess, &ioctx->cmd); } +} -out: - return ret; +static int srpt_queue_data_in(struct se_cmd *cmd) +{ + srpt_queue_response(cmd); + return 0; +} + +static void srpt_queue_tm_rsp(struct se_cmd *cmd) +{ + srpt_queue_response(cmd); } static int srpt_queue_status(struct se_cmd *cmd) @@ -3097,7 +3103,8 @@ static int srpt_queue_status(struct se_cmd *cmd) (SCF_TRANSPORT_TASK_SENSE | SCF_EMULATED_TASK_SENSE)) WARN_ON(cmd->scsi_status != SAM_STAT_CHECK_CONDITION); ioctx->queue_status_only = true; - return srpt_queue_response(cmd); + srpt_queue_response(cmd); + return 0; } static void srpt_refresh_port_work(struct work_struct *work) @@ -3930,9 +3937,9 @@ static struct target_core_fabric_ops srpt_template = { .set_default_node_attributes = srpt_set_default_node_attrs, .get_task_tag = srpt_get_task_tag, .get_cmd_state = srpt_get_tcm_cmd_state, - .queue_data_in = srpt_queue_response, + .queue_data_in = srpt_queue_data_in, .queue_status = srpt_queue_status, - .queue_tm_rsp = srpt_queue_response, + .queue_tm_rsp = srpt_queue_tm_rsp, /* * Setup function pointers for generic logic in * target_core_fabric_configfs.c diff --git a/drivers/md/Kconfig b/drivers/md/Kconfig index 3bfc8f1..30b426e 100644 --- a/drivers/md/Kconfig +++ b/drivers/md/Kconfig @@ -412,4 +412,18 @@ config DM_VERITY If unsure, say N. +config DM_SWITCH + tristate "Switch target support (EXPERIMENTAL)" + depends on BLK_DEV_DM + ---help--- + This device-mapper target creates a device that supports an arbitrary + mapping of fixed-size regions of I/O across a fixed set of paths. + The path used for any specific region can be switched dynamically + by sending the target a message. + + To compile this code as a module, choose M here: the module will + be called dm-switch. + + If unsure, say N. + endif # MD diff --git a/drivers/md/Makefile b/drivers/md/Makefile index 1439fd4..5ef78ef 100644 --- a/drivers/md/Makefile +++ b/drivers/md/Makefile @@ -40,6 +40,7 @@ obj-$(CONFIG_DM_FLAKEY) += dm-flakey.o obj-$(CONFIG_DM_MULTIPATH) += dm-multipath.o dm-round-robin.o obj-$(CONFIG_DM_MULTIPATH_QL) += dm-queue-length.o obj-$(CONFIG_DM_MULTIPATH_ST) += dm-service-time.o +obj-$(CONFIG_DM_SWITCH) += dm-switch.o obj-$(CONFIG_DM_SNAPSHOT) += dm-snapshot.o obj-$(CONFIG_DM_PERSISTENT_DATA) += persistent-data/ obj-$(CONFIG_DM_MIRROR) += dm-mirror.o dm-log.o dm-region-hash.o diff --git a/drivers/md/dm-bufio.c b/drivers/md/dm-bufio.c index 0387e05..5227e07 100644 --- a/drivers/md/dm-bufio.c +++ b/drivers/md/dm-bufio.c @@ -145,6 +145,7 @@ struct dm_buffer { unsigned long state; unsigned long last_accessed; struct dm_bufio_client *c; + struct list_head write_list; struct bio bio; struct bio_vec bio_vec[DM_BUFIO_INLINE_VECS]; }; @@ -349,7 +350,7 @@ static void *alloc_buffer_data(struct dm_bufio_client *c, gfp_t gfp_mask, if (gfp_mask & __GFP_NORETRY) noio_flag = memalloc_noio_save(); - ptr = __vmalloc(c->block_size, gfp_mask, PAGE_KERNEL); + ptr = __vmalloc(c->block_size, gfp_mask | __GFP_HIGHMEM, PAGE_KERNEL); if (gfp_mask & __GFP_NORETRY) memalloc_noio_restore(noio_flag); @@ -630,7 +631,8 @@ static int do_io_schedule(void *word) * - Submit our write and don't wait on it. We set B_WRITING indicating * that there is a write in progress. */ -static void __write_dirty_buffer(struct dm_buffer *b) +static void __write_dirty_buffer(struct dm_buffer *b, + struct list_head *write_list) { if (!test_bit(B_DIRTY, &b->state)) return; @@ -639,7 +641,24 @@ static void __write_dirty_buffer(struct dm_buffer *b) wait_on_bit_lock(&b->state, B_WRITING, do_io_schedule, TASK_UNINTERRUPTIBLE); - submit_io(b, WRITE, b->block, write_endio); + if (!write_list) + submit_io(b, WRITE, b->block, write_endio); + else + list_add_tail(&b->write_list, write_list); +} + +static void __flush_write_list(struct list_head *write_list) +{ + struct blk_plug plug; + blk_start_plug(&plug); + while (!list_empty(write_list)) { + struct dm_buffer *b = + list_entry(write_list->next, struct dm_buffer, write_list); + list_del(&b->write_list); + submit_io(b, WRITE, b->block, write_endio); + dm_bufio_cond_resched(); + } + blk_finish_plug(&plug); } /* @@ -655,7 +674,7 @@ static void __make_buffer_clean(struct dm_buffer *b) return; wait_on_bit(&b->state, B_READING, do_io_schedule, TASK_UNINTERRUPTIBLE); - __write_dirty_buffer(b); + __write_dirty_buffer(b, NULL); wait_on_bit(&b->state, B_WRITING, do_io_schedule, TASK_UNINTERRUPTIBLE); } @@ -802,7 +821,8 @@ static void __free_buffer_wake(struct dm_buffer *b) wake_up(&c->free_buffer_wait); } -static void __write_dirty_buffers_async(struct dm_bufio_client *c, int no_wait) +static void __write_dirty_buffers_async(struct dm_bufio_client *c, int no_wait, + struct list_head *write_list) { struct dm_buffer *b, *tmp; @@ -818,7 +838,7 @@ static void __write_dirty_buffers_async(struct dm_bufio_client *c, int no_wait) if (no_wait && test_bit(B_WRITING, &b->state)) return; - __write_dirty_buffer(b); + __write_dirty_buffer(b, write_list); dm_bufio_cond_resched(); } } @@ -853,7 +873,8 @@ static void __get_memory_limit(struct dm_bufio_client *c, * If we are over threshold_buffers, start freeing buffers. * If we're over "limit_buffers", block until we get under the limit. */ -static void __check_watermark(struct dm_bufio_client *c) +static void __check_watermark(struct dm_bufio_client *c, + struct list_head *write_list) { unsigned long threshold_buffers, limit_buffers; @@ -872,7 +893,7 @@ static void __check_watermark(struct dm_bufio_client *c) } if (c->n_buffers[LIST_DIRTY] > threshold_buffers) - __write_dirty_buffers_async(c, 1); + __write_dirty_buffers_async(c, 1, write_list); } /* @@ -897,7 +918,8 @@ static struct dm_buffer *__find(struct dm_bufio_client *c, sector_t block) *--------------------------------------------------------------*/ static struct dm_buffer *__bufio_new(struct dm_bufio_client *c, sector_t block, - enum new_flag nf, int *need_submit) + enum new_flag nf, int *need_submit, + struct list_head *write_list) { struct dm_buffer *b, *new_b = NULL; @@ -924,7 +946,7 @@ static struct dm_buffer *__bufio_new(struct dm_bufio_client *c, sector_t block, goto found_buffer; } - __check_watermark(c); + __check_watermark(c, write_list); b = new_b; b->hold_count = 1; @@ -992,10 +1014,14 @@ static void *new_read(struct dm_bufio_client *c, sector_t block, int need_submit; struct dm_buffer *b; + LIST_HEAD(write_list); + dm_bufio_lock(c); - b = __bufio_new(c, block, nf, &need_submit); + b = __bufio_new(c, block, nf, &need_submit, &write_list); dm_bufio_unlock(c); + __flush_write_list(&write_list); + if (!b) return b; @@ -1047,6 +1073,8 @@ void dm_bufio_prefetch(struct dm_bufio_client *c, { struct blk_plug plug; + LIST_HEAD(write_list); + BUG_ON(dm_bufio_in_request()); blk_start_plug(&plug); @@ -1055,7 +1083,15 @@ void dm_bufio_prefetch(struct dm_bufio_client *c, for (; n_blocks--; block++) { int need_submit; struct dm_buffer *b; - b = __bufio_new(c, block, NF_PREFETCH, &need_submit); + b = __bufio_new(c, block, NF_PREFETCH, &need_submit, + &write_list); + if (unlikely(!list_empty(&write_list))) { + dm_bufio_unlock(c); + blk_finish_plug(&plug); + __flush_write_list(&write_list); + blk_start_plug(&plug); + dm_bufio_lock(c); + } if (unlikely(b != NULL)) { dm_bufio_unlock(c); @@ -1069,7 +1105,6 @@ void dm_bufio_prefetch(struct dm_bufio_client *c, goto flush_plug; dm_bufio_lock(c); } - } dm_bufio_unlock(c); @@ -1126,11 +1161,14 @@ EXPORT_SYMBOL_GPL(dm_bufio_mark_buffer_dirty); void dm_bufio_write_dirty_buffers_async(struct dm_bufio_client *c) { + LIST_HEAD(write_list); + BUG_ON(dm_bufio_in_request()); dm_bufio_lock(c); - __write_dirty_buffers_async(c, 0); + __write_dirty_buffers_async(c, 0, &write_list); dm_bufio_unlock(c); + __flush_write_list(&write_list); } EXPORT_SYMBOL_GPL(dm_bufio_write_dirty_buffers_async); @@ -1147,8 +1185,13 @@ int dm_bufio_write_dirty_buffers(struct dm_bufio_client *c) unsigned long buffers_processed = 0; struct dm_buffer *b, *tmp; + LIST_HEAD(write_list); + + dm_bufio_lock(c); + __write_dirty_buffers_async(c, 0, &write_list); + dm_bufio_unlock(c); + __flush_write_list(&write_list); dm_bufio_lock(c); - __write_dirty_buffers_async(c, 0); again: list_for_each_entry_safe_reverse(b, tmp, &c->lru[LIST_DIRTY], lru_list) { @@ -1274,7 +1317,7 @@ retry: BUG_ON(!b->hold_count); BUG_ON(test_bit(B_READING, &b->state)); - __write_dirty_buffer(b); + __write_dirty_buffer(b, NULL); if (b->hold_count == 1) { wait_on_bit(&b->state, B_WRITING, do_io_schedule, TASK_UNINTERRUPTIBLE); diff --git a/drivers/md/dm-cache-target.c b/drivers/md/dm-cache-target.c index df44b60..0df3ec0 100644 --- a/drivers/md/dm-cache-target.c +++ b/drivers/md/dm-cache-target.c @@ -425,6 +425,10 @@ static bool block_size_is_power_of_two(struct cache *cache) return cache->sectors_per_block_shift >= 0; } +/* gcc on ARM generates spurious references to __udivdi3 and __umoddi3 */ +#if defined(CONFIG_ARM) && __GNUC__ == 4 && __GNUC_MINOR__ <= 6 +__always_inline +#endif static dm_block_t block_div(dm_block_t b, uint32_t n) { do_div(b, n); diff --git a/drivers/md/dm-flakey.c b/drivers/md/dm-flakey.c index 7fcf21c..c80a0ec 100644 --- a/drivers/md/dm-flakey.c +++ b/drivers/md/dm-flakey.c @@ -176,7 +176,7 @@ static int flakey_ctr(struct dm_target *ti, unsigned int argc, char **argv) fc = kzalloc(sizeof(*fc), GFP_KERNEL); if (!fc) { - ti->error = "Cannot allocate linear context"; + ti->error = "Cannot allocate context"; return -ENOMEM; } fc->start_time = jiffies; diff --git a/drivers/md/dm-ioctl.c b/drivers/md/dm-ioctl.c index aa04f02..f1b7586 100644 --- a/drivers/md/dm-ioctl.c +++ b/drivers/md/dm-ioctl.c @@ -36,6 +36,14 @@ struct hash_cell { struct dm_table *new_map; }; +/* + * A dummy definition to make RCU happy. + * struct dm_table should never be dereferenced in this file. + */ +struct dm_table { + int undefined__; +}; + struct vers_iter { size_t param_size; struct dm_target_versions *vers, *old_vers; @@ -242,9 +250,10 @@ static int dm_hash_insert(const char *name, const char *uuid, struct mapped_devi return -EBUSY; } -static void __hash_remove(struct hash_cell *hc) +static struct dm_table *__hash_remove(struct hash_cell *hc) { struct dm_table *table; + int srcu_idx; /* remove from the dev hash */ list_del(&hc->uuid_list); @@ -253,16 +262,18 @@ static void __hash_remove(struct hash_cell *hc) dm_set_mdptr(hc->md, NULL); mutex_unlock(&dm_hash_cells_mutex); - table = dm_get_live_table(hc->md); - if (table) { + table = dm_get_live_table(hc->md, &srcu_idx); + if (table) dm_table_event(table); - dm_table_put(table); - } + dm_put_live_table(hc->md, srcu_idx); + table = NULL; if (hc->new_map) - dm_table_destroy(hc->new_map); + table = hc->new_map; dm_put(hc->md); free_cell(hc); + + return table; } static void dm_hash_remove_all(int keep_open_devices) @@ -270,6 +281,7 @@ static void dm_hash_remove_all(int keep_open_devices) int i, dev_skipped; struct hash_cell *hc; struct mapped_device *md; + struct dm_table *t; retry: dev_skipped = 0; @@ -287,10 +299,14 @@ retry: continue; } - __hash_remove(hc); + t = __hash_remove(hc); up_write(&_hash_lock); + if (t) { + dm_sync_table(md); + dm_table_destroy(t); + } dm_put(md); if (likely(keep_open_devices)) dm_destroy(md); @@ -356,6 +372,7 @@ static struct mapped_device *dm_hash_rename(struct dm_ioctl *param, struct dm_table *table; struct mapped_device *md; unsigned change_uuid = (param->flags & DM_UUID_FLAG) ? 1 : 0; + int srcu_idx; /* * duplicate new. @@ -418,11 +435,10 @@ static struct mapped_device *dm_hash_rename(struct dm_ioctl *param, /* * Wake up any dm event waiters. */ - table = dm_get_live_table(hc->md); - if (table) { + table = dm_get_live_table(hc->md, &srcu_idx); + if (table) dm_table_event(table); - dm_table_put(table); - } + dm_put_live_table(hc->md, srcu_idx); if (!dm_kobject_uevent(hc->md, KOBJ_CHANGE, param->event_nr)) param->flags |= DM_UEVENT_GENERATED_FLAG; @@ -620,11 +636,14 @@ static int check_name(const char *name) * _hash_lock without first calling dm_table_put, because dm_table_destroy * waits for this dm_table_put and could be called under this lock. */ -static struct dm_table *dm_get_inactive_table(struct mapped_device *md) +static struct dm_table *dm_get_inactive_table(struct mapped_device *md, int *srcu_idx) { struct hash_cell *hc; struct dm_table *table = NULL; + /* increment rcu count, we don't care about the table pointer */ + dm_get_live_table(md, srcu_idx); + down_read(&_hash_lock); hc = dm_get_mdptr(md); if (!hc || hc->md != md) { @@ -633,8 +652,6 @@ static struct dm_table *dm_get_inactive_table(struct mapped_device *md) } table = hc->new_map; - if (table) - dm_table_get(table); out: up_read(&_hash_lock); @@ -643,10 +660,11 @@ out: } static struct dm_table *dm_get_live_or_inactive_table(struct mapped_device *md, - struct dm_ioctl *param) + struct dm_ioctl *param, + int *srcu_idx) { return (param->flags & DM_QUERY_INACTIVE_TABLE_FLAG) ? - dm_get_inactive_table(md) : dm_get_live_table(md); + dm_get_inactive_table(md, srcu_idx) : dm_get_live_table(md, srcu_idx); } /* @@ -657,6 +675,7 @@ static void __dev_status(struct mapped_device *md, struct dm_ioctl *param) { struct gendisk *disk = dm_disk(md); struct dm_table *table; + int srcu_idx; param->flags &= ~(DM_SUSPEND_FLAG | DM_READONLY_FLAG | DM_ACTIVE_PRESENT_FLAG); @@ -676,26 +695,27 @@ static void __dev_status(struct mapped_device *md, struct dm_ioctl *param) param->event_nr = dm_get_event_nr(md); param->target_count = 0; - table = dm_get_live_table(md); + table = dm_get_live_table(md, &srcu_idx); if (table) { if (!(param->flags & DM_QUERY_INACTIVE_TABLE_FLAG)) { if (get_disk_ro(disk)) param->flags |= DM_READONLY_FLAG; param->target_count = dm_table_get_num_targets(table); } - dm_table_put(table); param->flags |= DM_ACTIVE_PRESENT_FLAG; } + dm_put_live_table(md, srcu_idx); if (param->flags & DM_QUERY_INACTIVE_TABLE_FLAG) { - table = dm_get_inactive_table(md); + int srcu_idx; + table = dm_get_inactive_table(md, &srcu_idx); if (table) { if (!(dm_table_get_mode(table) & FMODE_WRITE)) param->flags |= DM_READONLY_FLAG; param->target_count = dm_table_get_num_targets(table); - dm_table_put(table); } + dm_put_live_table(md, srcu_idx); } } @@ -796,6 +816,7 @@ static int dev_remove(struct dm_ioctl *param, size_t param_size) struct hash_cell *hc; struct mapped_device *md; int r; + struct dm_table *t; down_write(&_hash_lock); hc = __find_device_hash_cell(param); @@ -819,9 +840,14 @@ static int dev_remove(struct dm_ioctl *param, size_t param_size) return r; } - __hash_remove(hc); + t = __hash_remove(hc); up_write(&_hash_lock); + if (t) { + dm_sync_table(md); + dm_table_destroy(t); + } + if (!dm_kobject_uevent(md, KOBJ_REMOVE, param->event_nr)) param->flags |= DM_UEVENT_GENERATED_FLAG; @@ -986,6 +1012,7 @@ static int do_resume(struct dm_ioctl *param) old_map = dm_swap_table(md, new_map); if (IS_ERR(old_map)) { + dm_sync_table(md); dm_table_destroy(new_map); dm_put(md); return PTR_ERR(old_map); @@ -1003,6 +1030,10 @@ static int do_resume(struct dm_ioctl *param) param->flags |= DM_UEVENT_GENERATED_FLAG; } + /* + * Since dm_swap_table synchronizes RCU, nobody should be in + * read-side critical section already. + */ if (old_map) dm_table_destroy(old_map); @@ -1125,6 +1156,7 @@ static int dev_wait(struct dm_ioctl *param, size_t param_size) int r = 0; struct mapped_device *md; struct dm_table *table; + int srcu_idx; md = find_device(param); if (!md) @@ -1145,11 +1177,10 @@ static int dev_wait(struct dm_ioctl *param, size_t param_size) */ __dev_status(md, param); - table = dm_get_live_or_inactive_table(md, param); - if (table) { + table = dm_get_live_or_inactive_table(md, param, &srcu_idx); + if (table) retrieve_status(table, param, param_size); - dm_table_put(table); - } + dm_put_live_table(md, srcu_idx); out: dm_put(md); @@ -1221,7 +1252,7 @@ static int table_load(struct dm_ioctl *param, size_t param_size) { int r; struct hash_cell *hc; - struct dm_table *t; + struct dm_table *t, *old_map = NULL; struct mapped_device *md; struct target_type *immutable_target_type; @@ -1277,14 +1308,14 @@ static int table_load(struct dm_ioctl *param, size_t param_size) hc = dm_get_mdptr(md); if (!hc || hc->md != md) { DMWARN("device has been removed from the dev hash table."); - dm_table_destroy(t); up_write(&_hash_lock); + dm_table_destroy(t); r = -ENXIO; goto out; } if (hc->new_map) - dm_table_destroy(hc->new_map); + old_map = hc->new_map; hc->new_map = t; up_write(&_hash_lock); @@ -1292,6 +1323,11 @@ static int table_load(struct dm_ioctl *param, size_t param_size) __dev_status(md, param); out: + if (old_map) { + dm_sync_table(md); + dm_table_destroy(old_map); + } + dm_put(md); return r; @@ -1301,6 +1337,7 @@ static int table_clear(struct dm_ioctl *param, size_t param_size) { struct hash_cell *hc; struct mapped_device *md; + struct dm_table *old_map = NULL; down_write(&_hash_lock); @@ -1312,7 +1349,7 @@ static int table_clear(struct dm_ioctl *param, size_t param_size) } if (hc->new_map) { - dm_table_destroy(hc->new_map); + old_map = hc->new_map; hc->new_map = NULL; } @@ -1321,6 +1358,10 @@ static int table_clear(struct dm_ioctl *param, size_t param_size) __dev_status(hc->md, param); md = hc->md; up_write(&_hash_lock); + if (old_map) { + dm_sync_table(md); + dm_table_destroy(old_map); + } dm_put(md); return 0; @@ -1370,6 +1411,7 @@ static int table_deps(struct dm_ioctl *param, size_t param_size) { struct mapped_device *md; struct dm_table *table; + int srcu_idx; md = find_device(param); if (!md) @@ -1377,11 +1419,10 @@ static int table_deps(struct dm_ioctl *param, size_t param_size) __dev_status(md, param); - table = dm_get_live_or_inactive_table(md, param); - if (table) { + table = dm_get_live_or_inactive_table(md, param, &srcu_idx); + if (table) retrieve_deps(table, param, param_size); - dm_table_put(table); - } + dm_put_live_table(md, srcu_idx); dm_put(md); @@ -1396,6 +1437,7 @@ static int table_status(struct dm_ioctl *param, size_t param_size) { struct mapped_device *md; struct dm_table *table; + int srcu_idx; md = find_device(param); if (!md) @@ -1403,11 +1445,10 @@ static int table_status(struct dm_ioctl *param, size_t param_size) __dev_status(md, param); - table = dm_get_live_or_inactive_table(md, param); - if (table) { + table = dm_get_live_or_inactive_table(md, param, &srcu_idx); + if (table) retrieve_status(table, param, param_size); - dm_table_put(table); - } + dm_put_live_table(md, srcu_idx); dm_put(md); @@ -1443,6 +1484,7 @@ static int target_message(struct dm_ioctl *param, size_t param_size) struct dm_target_msg *tmsg = (void *) param + param->data_start; size_t maxlen; char *result = get_result_buffer(param, param_size, &maxlen); + int srcu_idx; md = find_device(param); if (!md) @@ -1470,9 +1512,9 @@ static int target_message(struct dm_ioctl *param, size_t param_size) if (r <= 1) goto out_argv; - table = dm_get_live_table(md); + table = dm_get_live_table(md, &srcu_idx); if (!table) - goto out_argv; + goto out_table; if (dm_deleting_md(md)) { r = -ENXIO; @@ -1491,7 +1533,7 @@ static int target_message(struct dm_ioctl *param, size_t param_size) } out_table: - dm_table_put(table); + dm_put_live_table(md, srcu_idx); out_argv: kfree(argv); out: @@ -1644,7 +1686,10 @@ static int copy_params(struct dm_ioctl __user *user, struct dm_ioctl *param_kern } if (!dmi) { - dmi = __vmalloc(param_kernel->data_size, GFP_NOIO | __GFP_REPEAT | __GFP_HIGH, PAGE_KERNEL); + unsigned noio_flag; + noio_flag = memalloc_noio_save(); + dmi = __vmalloc(param_kernel->data_size, GFP_NOIO | __GFP_REPEAT | __GFP_HIGH | __GFP_HIGHMEM, PAGE_KERNEL); + memalloc_noio_restore(noio_flag); if (dmi) *param_flags |= DM_PARAMS_VMALLOC; } diff --git a/drivers/md/dm-mpath.c b/drivers/md/dm-mpath.c index bdf26f5..5adede1 100644 --- a/drivers/md/dm-mpath.c +++ b/drivers/md/dm-mpath.c @@ -1561,7 +1561,6 @@ static int multipath_ioctl(struct dm_target *ti, unsigned int cmd, unsigned long flags; int r; -again: bdev = NULL; mode = 0; r = 0; @@ -1579,7 +1578,7 @@ again: } if ((pgpath && m->queue_io) || (!pgpath && m->queue_if_no_path)) - r = -EAGAIN; + r = -ENOTCONN; else if (!bdev) r = -EIO; @@ -1591,11 +1590,8 @@ again: if (!r && ti->len != i_size_read(bdev->bd_inode) >> SECTOR_SHIFT) r = scsi_verify_blk_ioctl(NULL, cmd); - if (r == -EAGAIN && !fatal_signal_pending(current)) { + if (r == -ENOTCONN && !fatal_signal_pending(current)) queue_work(kmultipathd, &m->process_queued_ios); - msleep(10); - goto again; - } return r ? : __blkdev_driver_ioctl(bdev, mode, cmd, arg); } diff --git a/drivers/md/dm-switch.c b/drivers/md/dm-switch.c new file mode 100644 index 0000000..ff9ac4b --- /dev/null +++ b/drivers/md/dm-switch.c @@ -0,0 +1,538 @@ +/* + * Copyright (C) 2010-2012 by Dell Inc. All rights reserved. + * Copyright (C) 2011-2013 Red Hat, Inc. + * + * This file is released under the GPL. + * + * dm-switch is a device-mapper target that maps IO to underlying block + * devices efficiently when there are a large number of fixed-sized + * address regions but there is no simple pattern to allow for a compact + * mapping representation such as dm-stripe. + */ + +#include + +#include +#include +#include + +#define DM_MSG_PREFIX "switch" + +/* + * One region_table_slot_t holds region table + * entries each of which is in size. + */ +typedef unsigned long region_table_slot_t; + +/* + * A device with the offset to its start sector. + */ +struct switch_path { + struct dm_dev *dmdev; + sector_t start; +}; + +/* + * Context block for a dm switch device. + */ +struct switch_ctx { + struct dm_target *ti; + + unsigned nr_paths; /* Number of paths in path_list. */ + + unsigned region_size; /* Region size in 512-byte sectors */ + unsigned long nr_regions; /* Number of regions making up the device */ + signed char region_size_bits; /* log2 of region_size or -1 */ + + unsigned char region_table_entry_bits; /* Number of bits in one region table entry */ + unsigned char region_entries_per_slot; /* Number of entries in one region table slot */ + signed char region_entries_per_slot_bits; /* log2 of region_entries_per_slot or -1 */ + + region_table_slot_t *region_table; /* Region table */ + + /* + * Array of dm devices to switch between. + */ + struct switch_path path_list[0]; +}; + +static struct switch_ctx *alloc_switch_ctx(struct dm_target *ti, unsigned nr_paths, + unsigned region_size) +{ + struct switch_ctx *sctx; + + sctx = kzalloc(sizeof(struct switch_ctx) + nr_paths * sizeof(struct switch_path), + GFP_KERNEL); + if (!sctx) + return NULL; + + sctx->ti = ti; + sctx->region_size = region_size; + + ti->private = sctx; + + return sctx; +} + +static int alloc_region_table(struct dm_target *ti, unsigned nr_paths) +{ + struct switch_ctx *sctx = ti->private; + sector_t nr_regions = ti->len; + sector_t nr_slots; + + if (!(sctx->region_size & (sctx->region_size - 1))) + sctx->region_size_bits = __ffs(sctx->region_size); + else + sctx->region_size_bits = -1; + + sctx->region_table_entry_bits = 1; + while (sctx->region_table_entry_bits < sizeof(region_table_slot_t) * 8 && + (region_table_slot_t)1 << sctx->region_table_entry_bits < nr_paths) + sctx->region_table_entry_bits++; + + sctx->region_entries_per_slot = (sizeof(region_table_slot_t) * 8) / sctx->region_table_entry_bits; + if (!(sctx->region_entries_per_slot & (sctx->region_entries_per_slot - 1))) + sctx->region_entries_per_slot_bits = __ffs(sctx->region_entries_per_slot); + else + sctx->region_entries_per_slot_bits = -1; + + if (sector_div(nr_regions, sctx->region_size)) + nr_regions++; + + sctx->nr_regions = nr_regions; + if (sctx->nr_regions != nr_regions || sctx->nr_regions >= ULONG_MAX) { + ti->error = "Region table too large"; + return -EINVAL; + } + + nr_slots = nr_regions; + if (sector_div(nr_slots, sctx->region_entries_per_slot)) + nr_slots++; + + if (nr_slots > ULONG_MAX / sizeof(region_table_slot_t)) { + ti->error = "Region table too large"; + return -EINVAL; + } + + sctx->region_table = vmalloc(nr_slots * sizeof(region_table_slot_t)); + if (!sctx->region_table) { + ti->error = "Cannot allocate region table"; + return -ENOMEM; + } + + return 0; +} + +static void switch_get_position(struct switch_ctx *sctx, unsigned long region_nr, + unsigned long *region_index, unsigned *bit) +{ + if (sctx->region_entries_per_slot_bits >= 0) { + *region_index = region_nr >> sctx->region_entries_per_slot_bits; + *bit = region_nr & (sctx->region_entries_per_slot - 1); + } else { + *region_index = region_nr / sctx->region_entries_per_slot; + *bit = region_nr % sctx->region_entries_per_slot; + } + + *bit *= sctx->region_table_entry_bits; +} + +/* + * Find which path to use at given offset. + */ +static unsigned switch_get_path_nr(struct switch_ctx *sctx, sector_t offset) +{ + unsigned long region_index; + unsigned bit, path_nr; + sector_t p; + + p = offset; + if (sctx->region_size_bits >= 0) + p >>= sctx->region_size_bits; + else + sector_div(p, sctx->region_size); + + switch_get_position(sctx, p, ®ion_index, &bit); + path_nr = (ACCESS_ONCE(sctx->region_table[region_index]) >> bit) & + ((1 << sctx->region_table_entry_bits) - 1); + + /* This can only happen if the processor uses non-atomic stores. */ + if (unlikely(path_nr >= sctx->nr_paths)) + path_nr = 0; + + return path_nr; +} + +static void switch_region_table_write(struct switch_ctx *sctx, unsigned long region_nr, + unsigned value) +{ + unsigned long region_index; + unsigned bit; + region_table_slot_t pte; + + switch_get_position(sctx, region_nr, ®ion_index, &bit); + + pte = sctx->region_table[region_index]; + pte &= ~((((region_table_slot_t)1 << sctx->region_table_entry_bits) - 1) << bit); + pte |= (region_table_slot_t)value << bit; + sctx->region_table[region_index] = pte; +} + +/* + * Fill the region table with an initial round robin pattern. + */ +static void initialise_region_table(struct switch_ctx *sctx) +{ + unsigned path_nr = 0; + unsigned long region_nr; + + for (region_nr = 0; region_nr < sctx->nr_regions; region_nr++) { + switch_region_table_write(sctx, region_nr, path_nr); + if (++path_nr >= sctx->nr_paths) + path_nr = 0; + } +} + +static int parse_path(struct dm_arg_set *as, struct dm_target *ti) +{ + struct switch_ctx *sctx = ti->private; + unsigned long long start; + int r; + + r = dm_get_device(ti, dm_shift_arg(as), dm_table_get_mode(ti->table), + &sctx->path_list[sctx->nr_paths].dmdev); + if (r) { + ti->error = "Device lookup failed"; + return r; + } + + if (kstrtoull(dm_shift_arg(as), 10, &start) || start != (sector_t)start) { + ti->error = "Invalid device starting offset"; + dm_put_device(ti, sctx->path_list[sctx->nr_paths].dmdev); + return -EINVAL; + } + + sctx->path_list[sctx->nr_paths].start = start; + + sctx->nr_paths++; + + return 0; +} + +/* + * Destructor: Don't free the dm_target, just the ti->private data (if any). + */ +static void switch_dtr(struct dm_target *ti) +{ + struct switch_ctx *sctx = ti->private; + + while (sctx->nr_paths--) + dm_put_device(ti, sctx->path_list[sctx->nr_paths].dmdev); + + vfree(sctx->region_table); + kfree(sctx); +} + +/* + * Constructor arguments: + * [...] + * [ ]+ + * + * Optional args are to allow for future extension: currently this + * parameter must be 0. + */ +static int switch_ctr(struct dm_target *ti, unsigned argc, char **argv) +{ + static struct dm_arg _args[] = { + {1, (KMALLOC_MAX_SIZE - sizeof(struct switch_ctx)) / sizeof(struct switch_path), "Invalid number of paths"}, + {1, UINT_MAX, "Invalid region size"}, + {0, 0, "Invalid number of optional args"}, + }; + + struct switch_ctx *sctx; + struct dm_arg_set as; + unsigned nr_paths, region_size, nr_optional_args; + int r; + + as.argc = argc; + as.argv = argv; + + r = dm_read_arg(_args, &as, &nr_paths, &ti->error); + if (r) + return -EINVAL; + + r = dm_read_arg(_args + 1, &as, ®ion_size, &ti->error); + if (r) + return r; + + r = dm_read_arg_group(_args + 2, &as, &nr_optional_args, &ti->error); + if (r) + return r; + /* parse optional arguments here, if we add any */ + + if (as.argc != nr_paths * 2) { + ti->error = "Incorrect number of path arguments"; + return -EINVAL; + } + + sctx = alloc_switch_ctx(ti, nr_paths, region_size); + if (!sctx) { + ti->error = "Cannot allocate redirection context"; + return -ENOMEM; + } + + r = dm_set_target_max_io_len(ti, region_size); + if (r) + goto error; + + while (as.argc) { + r = parse_path(&as, ti); + if (r) + goto error; + } + + r = alloc_region_table(ti, nr_paths); + if (r) + goto error; + + initialise_region_table(sctx); + + /* For UNMAP, sending the request down any path is sufficient */ + ti->num_discard_bios = 1; + + return 0; + +error: + switch_dtr(ti); + + return r; +} + +static int switch_map(struct dm_target *ti, struct bio *bio) +{ + struct switch_ctx *sctx = ti->private; + sector_t offset = dm_target_offset(ti, bio->bi_sector); + unsigned path_nr = switch_get_path_nr(sctx, offset); + + bio->bi_bdev = sctx->path_list[path_nr].dmdev->bdev; + bio->bi_sector = sctx->path_list[path_nr].start + offset; + + return DM_MAPIO_REMAPPED; +} + +/* + * We need to parse hex numbers in the message as quickly as possible. + * + * This table-based hex parser improves performance. + * It improves a time to load 1000000 entries compared to the condition-based + * parser. + * table-based parser condition-based parser + * PA-RISC 0.29s 0.31s + * Opteron 0.0495s 0.0498s + */ +static const unsigned char hex_table[256] = { +255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, +255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, +255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, +0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 255, 255, 255, 255, 255, 255, +255, 10, 11, 12, 13, 14, 15, 255, 255, 255, 255, 255, 255, 255, 255, 255, +255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, +255, 10, 11, 12, 13, 14, 15, 255, 255, 255, 255, 255, 255, 255, 255, 255, +255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, +255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, +255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, +255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, +255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, +255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, +255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, +255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, +255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255 +}; + +static __always_inline unsigned long parse_hex(const char **string) +{ + unsigned char d; + unsigned long r = 0; + + while ((d = hex_table[(unsigned char)**string]) < 16) { + r = (r << 4) | d; + (*string)++; + } + + return r; +} + +static int process_set_region_mappings(struct switch_ctx *sctx, + unsigned argc, char **argv) +{ + unsigned i; + unsigned long region_index = 0; + + for (i = 1; i < argc; i++) { + unsigned long path_nr; + const char *string = argv[i]; + + if (*string == ':') + region_index++; + else { + region_index = parse_hex(&string); + if (unlikely(*string != ':')) { + DMWARN("invalid set_region_mappings argument: '%s'", argv[i]); + return -EINVAL; + } + } + + string++; + if (unlikely(!*string)) { + DMWARN("invalid set_region_mappings argument: '%s'", argv[i]); + return -EINVAL; + } + + path_nr = parse_hex(&string); + if (unlikely(*string)) { + DMWARN("invalid set_region_mappings argument: '%s'", argv[i]); + return -EINVAL; + } + if (unlikely(region_index >= sctx->nr_regions)) { + DMWARN("invalid set_region_mappings region number: %lu >= %lu", region_index, sctx->nr_regions); + return -EINVAL; + } + if (unlikely(path_nr >= sctx->nr_paths)) { + DMWARN("invalid set_region_mappings device: %lu >= %u", path_nr, sctx->nr_paths); + return -EINVAL; + } + + switch_region_table_write(sctx, region_index, path_nr); + } + + return 0; +} + +/* + * Messages are processed one-at-a-time. + * + * Only set_region_mappings is supported. + */ +static int switch_message(struct dm_target *ti, unsigned argc, char **argv) +{ + static DEFINE_MUTEX(message_mutex); + + struct switch_ctx *sctx = ti->private; + int r = -EINVAL; + + mutex_lock(&message_mutex); + + if (!strcasecmp(argv[0], "set_region_mappings")) + r = process_set_region_mappings(sctx, argc, argv); + else + DMWARN("Unrecognised message received."); + + mutex_unlock(&message_mutex); + + return r; +} + +static void switch_status(struct dm_target *ti, status_type_t type, + unsigned status_flags, char *result, unsigned maxlen) +{ + struct switch_ctx *sctx = ti->private; + unsigned sz = 0; + int path_nr; + + switch (type) { + case STATUSTYPE_INFO: + result[0] = '\0'; + break; + + case STATUSTYPE_TABLE: + DMEMIT("%u %u 0", sctx->nr_paths, sctx->region_size); + for (path_nr = 0; path_nr < sctx->nr_paths; path_nr++) + DMEMIT(" %s %llu", sctx->path_list[path_nr].dmdev->name, + (unsigned long long)sctx->path_list[path_nr].start); + break; + } +} + +/* + * Switch ioctl: + * + * Passthrough all ioctls to the path for sector 0 + */ +static int switch_ioctl(struct dm_target *ti, unsigned cmd, + unsigned long arg) +{ + struct switch_ctx *sctx = ti->private; + struct block_device *bdev; + fmode_t mode; + unsigned path_nr; + int r = 0; + + path_nr = switch_get_path_nr(sctx, 0); + + bdev = sctx->path_list[path_nr].dmdev->bdev; + mode = sctx->path_list[path_nr].dmdev->mode; + + /* + * Only pass ioctls through if the device sizes match exactly. + */ + if (ti->len + sctx->path_list[path_nr].start != i_size_read(bdev->bd_inode) >> SECTOR_SHIFT) + r = scsi_verify_blk_ioctl(NULL, cmd); + + return r ? : __blkdev_driver_ioctl(bdev, mode, cmd, arg); +} + +static int switch_iterate_devices(struct dm_target *ti, + iterate_devices_callout_fn fn, void *data) +{ + struct switch_ctx *sctx = ti->private; + int path_nr; + int r; + + for (path_nr = 0; path_nr < sctx->nr_paths; path_nr++) { + r = fn(ti, sctx->path_list[path_nr].dmdev, + sctx->path_list[path_nr].start, ti->len, data); + if (r) + return r; + } + + return 0; +} + +static struct target_type switch_target = { + .name = "switch", + .version = {1, 0, 0}, + .module = THIS_MODULE, + .ctr = switch_ctr, + .dtr = switch_dtr, + .map = switch_map, + .message = switch_message, + .status = switch_status, + .ioctl = switch_ioctl, + .iterate_devices = switch_iterate_devices, +}; + +static int __init dm_switch_init(void) +{ + int r; + + r = dm_register_target(&switch_target); + if (r < 0) + DMERR("dm_register_target() failed %d", r); + + return r; +} + +static void __exit dm_switch_exit(void) +{ + dm_unregister_target(&switch_target); +} + +module_init(dm_switch_init); +module_exit(dm_switch_exit); + +MODULE_DESCRIPTION(DM_NAME " dynamic path switching target"); +MODULE_AUTHOR("Kevin D. O'Kelley "); +MODULE_AUTHOR("Narendran Ganapathy "); +MODULE_AUTHOR("Jim Ramsay "); +MODULE_AUTHOR("Mikulas Patocka "); +MODULE_LICENSE("GPL"); diff --git a/drivers/md/dm-table.c b/drivers/md/dm-table.c index 1ff252a..f221812 100644 --- a/drivers/md/dm-table.c +++ b/drivers/md/dm-table.c @@ -26,22 +26,8 @@ #define KEYS_PER_NODE (NODE_SIZE / sizeof(sector_t)) #define CHILDREN_PER_NODE (KEYS_PER_NODE + 1) -/* - * The table has always exactly one reference from either mapped_device->map - * or hash_cell->new_map. This reference is not counted in table->holders. - * A pair of dm_create_table/dm_destroy_table functions is used for table - * creation/destruction. - * - * Temporary references from the other code increase table->holders. A pair - * of dm_table_get/dm_table_put functions is used to manipulate it. - * - * When the table is about to be destroyed, we wait for table->holders to - * drop to zero. - */ - struct dm_table { struct mapped_device *md; - atomic_t holders; unsigned type; /* btree table */ @@ -208,7 +194,6 @@ int dm_table_create(struct dm_table **result, fmode_t mode, INIT_LIST_HEAD(&t->devices); INIT_LIST_HEAD(&t->target_callbacks); - atomic_set(&t->holders, 0); if (!num_targets) num_targets = KEYS_PER_NODE; @@ -246,10 +231,6 @@ void dm_table_destroy(struct dm_table *t) if (!t) return; - while (atomic_read(&t->holders)) - msleep(1); - smp_mb(); - /* free the indexes */ if (t->depth >= 2) vfree(t->index[t->depth - 2]); @@ -274,22 +255,6 @@ void dm_table_destroy(struct dm_table *t) kfree(t); } -void dm_table_get(struct dm_table *t) -{ - atomic_inc(&t->holders); -} -EXPORT_SYMBOL(dm_table_get); - -void dm_table_put(struct dm_table *t) -{ - if (!t) - return; - - smp_mb__before_atomic_dec(); - atomic_dec(&t->holders); -} -EXPORT_SYMBOL(dm_table_put); - /* * Checks to see if we need to extend highs or targets. */ diff --git a/drivers/md/dm-verity.c b/drivers/md/dm-verity.c index b948fd8..4b7941d 100644 --- a/drivers/md/dm-verity.c +++ b/drivers/md/dm-verity.c @@ -451,7 +451,7 @@ static void verity_prefetch_io(struct work_struct *work) goto no_prefetch_cluster; if (unlikely(cluster & (cluster - 1))) - cluster = 1 << (fls(cluster) - 1); + cluster = 1 << __fls(cluster); hash_block_start &= ~(sector_t)(cluster - 1); hash_block_end |= cluster - 1; @@ -695,8 +695,8 @@ static int verity_ctr(struct dm_target *ti, unsigned argc, char **argv) goto bad; } - if (sscanf(argv[0], "%d%c", &num, &dummy) != 1 || - num < 0 || num > 1) { + if (sscanf(argv[0], "%u%c", &num, &dummy) != 1 || + num > 1) { ti->error = "Invalid version"; r = -EINVAL; goto bad; @@ -723,7 +723,7 @@ static int verity_ctr(struct dm_target *ti, unsigned argc, char **argv) r = -EINVAL; goto bad; } - v->data_dev_block_bits = ffs(num) - 1; + v->data_dev_block_bits = __ffs(num); if (sscanf(argv[4], "%u%c", &num, &dummy) != 1 || !num || (num & (num - 1)) || @@ -733,7 +733,7 @@ static int verity_ctr(struct dm_target *ti, unsigned argc, char **argv) r = -EINVAL; goto bad; } - v->hash_dev_block_bits = ffs(num) - 1; + v->hash_dev_block_bits = __ffs(num); if (sscanf(argv[5], "%llu%c", &num_ll, &dummy) != 1 || (sector_t)(num_ll << (v->data_dev_block_bits - SECTOR_SHIFT)) @@ -812,7 +812,7 @@ static int verity_ctr(struct dm_target *ti, unsigned argc, char **argv) } v->hash_per_block_bits = - fls((1 << v->hash_dev_block_bits) / v->digest_size) - 1; + __fls((1 << v->hash_dev_block_bits) / v->digest_size); v->levels = 0; if (v->data_blocks) @@ -831,9 +831,8 @@ static int verity_ctr(struct dm_target *ti, unsigned argc, char **argv) for (i = v->levels - 1; i >= 0; i--) { sector_t s; v->hash_level_block[i] = hash_position; - s = verity_position_at_level(v, v->data_blocks, i); - s = (s >> v->hash_per_block_bits) + - !!(s & ((1 << v->hash_per_block_bits) - 1)); + s = (v->data_blocks + ((sector_t)1 << ((i + 1) * v->hash_per_block_bits)) - 1) + >> ((i + 1) * v->hash_per_block_bits); if (hash_position + s < hash_position) { ti->error = "Hash device offset overflow"; r = -E2BIG; diff --git a/drivers/md/dm.c b/drivers/md/dm.c index d5370a9..9e39d2b 100644 --- a/drivers/md/dm.c +++ b/drivers/md/dm.c @@ -117,15 +117,29 @@ EXPORT_SYMBOL_GPL(dm_get_rq_mapinfo); #define DMF_MERGE_IS_OPTIONAL 6 /* + * A dummy definition to make RCU happy. + * struct dm_table should never be dereferenced in this file. + */ +struct dm_table { + int undefined__; +}; + +/* * Work processed by per-device workqueue. */ struct mapped_device { - struct rw_semaphore io_lock; + struct srcu_struct io_barrier; struct mutex suspend_lock; - rwlock_t map_lock; atomic_t holders; atomic_t open_count; + /* + * The current mapping. + * Use dm_get_live_table{_fast} or take suspend_lock for + * dereference. + */ + struct dm_table *map; + unsigned long flags; struct request_queue *queue; @@ -155,11 +169,6 @@ struct mapped_device { struct workqueue_struct *wq; /* - * The current mapping. - */ - struct dm_table *map; - - /* * io objects are allocated from here. */ mempool_t *io_pool; @@ -386,10 +395,14 @@ static int dm_blk_ioctl(struct block_device *bdev, fmode_t mode, unsigned int cmd, unsigned long arg) { struct mapped_device *md = bdev->bd_disk->private_data; - struct dm_table *map = dm_get_live_table(md); + int srcu_idx; + struct dm_table *map; struct dm_target *tgt; int r = -ENOTTY; +retry: + map = dm_get_live_table(md, &srcu_idx); + if (!map || !dm_table_get_size(map)) goto out; @@ -408,7 +421,12 @@ static int dm_blk_ioctl(struct block_device *bdev, fmode_t mode, r = tgt->type->ioctl(tgt, cmd, arg); out: - dm_table_put(map); + dm_put_live_table(md, srcu_idx); + + if (r == -ENOTCONN) { + msleep(10); + goto retry; + } return r; } @@ -502,20 +520,39 @@ static void queue_io(struct mapped_device *md, struct bio *bio) /* * Everyone (including functions in this file), should use this * function to access the md->map field, and make sure they call - * dm_table_put() when finished. + * dm_put_live_table() when finished. */ -struct dm_table *dm_get_live_table(struct mapped_device *md) +struct dm_table *dm_get_live_table(struct mapped_device *md, int *srcu_idx) __acquires(md->io_barrier) { - struct dm_table *t; - unsigned long flags; + *srcu_idx = srcu_read_lock(&md->io_barrier); + + return srcu_dereference(md->map, &md->io_barrier); +} - read_lock_irqsave(&md->map_lock, flags); - t = md->map; - if (t) - dm_table_get(t); - read_unlock_irqrestore(&md->map_lock, flags); +void dm_put_live_table(struct mapped_device *md, int srcu_idx) __releases(md->io_barrier) +{ + srcu_read_unlock(&md->io_barrier, srcu_idx); +} - return t; +void dm_sync_table(struct mapped_device *md) +{ + synchronize_srcu(&md->io_barrier); + synchronize_rcu_expedited(); +} + +/* + * A fast alternative to dm_get_live_table/dm_put_live_table. + * The caller must not block between these two functions. + */ +static struct dm_table *dm_get_live_table_fast(struct mapped_device *md) __acquires(RCU) +{ + rcu_read_lock(); + return rcu_dereference(md->map); +} + +static void dm_put_live_table_fast(struct mapped_device *md) __releases(RCU) +{ + rcu_read_unlock(); } /* @@ -1349,17 +1386,18 @@ static int __split_and_process_non_flush(struct clone_info *ci) /* * Entry point to split a bio into clones and submit them to the targets. */ -static void __split_and_process_bio(struct mapped_device *md, struct bio *bio) +static void __split_and_process_bio(struct mapped_device *md, + struct dm_table *map, struct bio *bio) { struct clone_info ci; int error = 0; - ci.map = dm_get_live_table(md); - if (unlikely(!ci.map)) { + if (unlikely(!map)) { bio_io_error(bio); return; } + ci.map = map; ci.md = md; ci.io = alloc_io(md); ci.io->error = 0; @@ -1386,7 +1424,6 @@ static void __split_and_process_bio(struct mapped_device *md, struct bio *bio) /* drop the extra reference count */ dec_pending(ci.io, error); - dm_table_put(ci.map); } /*----------------------------------------------------------------- * CRUD END @@ -1397,7 +1434,7 @@ static int dm_merge_bvec(struct request_queue *q, struct bio_vec *biovec) { struct mapped_device *md = q->queuedata; - struct dm_table *map = dm_get_live_table(md); + struct dm_table *map = dm_get_live_table_fast(md); struct dm_target *ti; sector_t max_sectors; int max_size = 0; @@ -1407,7 +1444,7 @@ static int dm_merge_bvec(struct request_queue *q, ti = dm_table_find_target(map, bvm->bi_sector); if (!dm_target_is_valid(ti)) - goto out_table; + goto out; /* * Find maximum amount of I/O that won't need splitting @@ -1436,10 +1473,8 @@ static int dm_merge_bvec(struct request_queue *q, max_size = 0; -out_table: - dm_table_put(map); - out: + dm_put_live_table_fast(md); /* * Always allow an entire first page */ @@ -1458,8 +1493,10 @@ static void _dm_request(struct request_queue *q, struct bio *bio) int rw = bio_data_dir(bio); struct mapped_device *md = q->queuedata; int cpu; + int srcu_idx; + struct dm_table *map; - down_read(&md->io_lock); + map = dm_get_live_table(md, &srcu_idx); cpu = part_stat_lock(); part_stat_inc(cpu, &dm_disk(md)->part0, ios[rw]); @@ -1468,7 +1505,7 @@ static void _dm_request(struct request_queue *q, struct bio *bio) /* if we're suspended, we have to queue this io for later */ if (unlikely(test_bit(DMF_BLOCK_IO_FOR_SUSPEND, &md->flags))) { - up_read(&md->io_lock); + dm_put_live_table(md, srcu_idx); if (bio_rw(bio) != READA) queue_io(md, bio); @@ -1477,8 +1514,8 @@ static void _dm_request(struct request_queue *q, struct bio *bio) return; } - __split_and_process_bio(md, bio); - up_read(&md->io_lock); + __split_and_process_bio(md, map, bio); + dm_put_live_table(md, srcu_idx); return; } @@ -1664,7 +1701,8 @@ static struct request *dm_start_request(struct mapped_device *md, struct request static void dm_request_fn(struct request_queue *q) { struct mapped_device *md = q->queuedata; - struct dm_table *map = dm_get_live_table(md); + int srcu_idx; + struct dm_table *map = dm_get_live_table(md, &srcu_idx); struct dm_target *ti; struct request *rq, *clone; sector_t pos; @@ -1719,7 +1757,7 @@ requeued: delay_and_out: blk_delay_queue(q, HZ / 10); out: - dm_table_put(map); + dm_put_live_table(md, srcu_idx); } int dm_underlying_device_busy(struct request_queue *q) @@ -1732,14 +1770,14 @@ static int dm_lld_busy(struct request_queue *q) { int r; struct mapped_device *md = q->queuedata; - struct dm_table *map = dm_get_live_table(md); + struct dm_table *map = dm_get_live_table_fast(md); if (!map || test_bit(DMF_BLOCK_IO_FOR_SUSPEND, &md->flags)) r = 1; else r = dm_table_any_busy_target(map); - dm_table_put(map); + dm_put_live_table_fast(md); return r; } @@ -1751,7 +1789,7 @@ static int dm_any_congested(void *congested_data, int bdi_bits) struct dm_table *map; if (!test_bit(DMF_BLOCK_IO_FOR_SUSPEND, &md->flags)) { - map = dm_get_live_table(md); + map = dm_get_live_table_fast(md); if (map) { /* * Request-based dm cares about only own queue for @@ -1762,9 +1800,8 @@ static int dm_any_congested(void *congested_data, int bdi_bits) bdi_bits; else r = dm_table_any_congested(map, bdi_bits); - - dm_table_put(map); } + dm_put_live_table_fast(md); } return r; @@ -1869,12 +1906,14 @@ static struct mapped_device *alloc_dev(int minor) if (r < 0) goto bad_minor; + r = init_srcu_struct(&md->io_barrier); + if (r < 0) + goto bad_io_barrier; + md->type = DM_TYPE_NONE; - init_rwsem(&md->io_lock); mutex_init(&md->suspend_lock); mutex_init(&md->type_lock); spin_lock_init(&md->deferred_lock); - rwlock_init(&md->map_lock); atomic_set(&md->holders, 1); atomic_set(&md->open_count, 0); atomic_set(&md->event_nr, 0); @@ -1937,6 +1976,8 @@ bad_thread: bad_disk: blk_cleanup_queue(md->queue); bad_queue: + cleanup_srcu_struct(&md->io_barrier); +bad_io_barrier: free_minor(minor); bad_minor: module_put(THIS_MODULE); @@ -1960,6 +2001,7 @@ static void free_dev(struct mapped_device *md) bioset_free(md->bs); blk_integrity_unregister(md->disk); del_gendisk(md->disk); + cleanup_srcu_struct(&md->io_barrier); free_minor(minor); spin_lock(&_minor_lock); @@ -2102,7 +2144,6 @@ static struct dm_table *__bind(struct mapped_device *md, struct dm_table *t, struct dm_table *old_map; struct request_queue *q = md->queue; sector_t size; - unsigned long flags; int merge_is_optional; size = dm_table_get_size(t); @@ -2131,9 +2172,8 @@ static struct dm_table *__bind(struct mapped_device *md, struct dm_table *t, merge_is_optional = dm_table_merge_is_optional(t); - write_lock_irqsave(&md->map_lock, flags); old_map = md->map; - md->map = t; + rcu_assign_pointer(md->map, t); md->immutable_target_type = dm_table_get_immutable_target_type(t); dm_table_set_restrictions(t, q, limits); @@ -2141,7 +2181,7 @@ static struct dm_table *__bind(struct mapped_device *md, struct dm_table *t, set_bit(DMF_MERGE_IS_OPTIONAL, &md->flags); else clear_bit(DMF_MERGE_IS_OPTIONAL, &md->flags); - write_unlock_irqrestore(&md->map_lock, flags); + dm_sync_table(md); return old_map; } @@ -2152,15 +2192,13 @@ static struct dm_table *__bind(struct mapped_device *md, struct dm_table *t, static struct dm_table *__unbind(struct mapped_device *md) { struct dm_table *map = md->map; - unsigned long flags; if (!map) return NULL; dm_table_event_callback(map, NULL, NULL); - write_lock_irqsave(&md->map_lock, flags); - md->map = NULL; - write_unlock_irqrestore(&md->map_lock, flags); + rcu_assign_pointer(md->map, NULL); + dm_sync_table(md); return map; } @@ -2312,11 +2350,12 @@ EXPORT_SYMBOL_GPL(dm_device_name); static void __dm_destroy(struct mapped_device *md, bool wait) { struct dm_table *map; + int srcu_idx; might_sleep(); spin_lock(&_minor_lock); - map = dm_get_live_table(md); + map = dm_get_live_table(md, &srcu_idx); idr_replace(&_minor_idr, MINOR_ALLOCED, MINOR(disk_devt(dm_disk(md)))); set_bit(DMF_FREEING, &md->flags); spin_unlock(&_minor_lock); @@ -2326,6 +2365,9 @@ static void __dm_destroy(struct mapped_device *md, bool wait) dm_table_postsuspend_targets(map); } + /* dm_put_live_table must be before msleep, otherwise deadlock is possible */ + dm_put_live_table(md, srcu_idx); + /* * Rare, but there may be I/O requests still going to complete, * for example. Wait for all references to disappear. @@ -2340,7 +2382,6 @@ static void __dm_destroy(struct mapped_device *md, bool wait) dm_device_name(md), atomic_read(&md->holders)); dm_sysfs_exit(md); - dm_table_put(map); dm_table_destroy(__unbind(md)); free_dev(md); } @@ -2397,8 +2438,10 @@ static void dm_wq_work(struct work_struct *work) struct mapped_device *md = container_of(work, struct mapped_device, work); struct bio *c; + int srcu_idx; + struct dm_table *map; - down_read(&md->io_lock); + map = dm_get_live_table(md, &srcu_idx); while (!test_bit(DMF_BLOCK_IO_FOR_SUSPEND, &md->flags)) { spin_lock_irq(&md->deferred_lock); @@ -2408,17 +2451,13 @@ static void dm_wq_work(struct work_struct *work) if (!c) break; - up_read(&md->io_lock); - if (dm_request_based(md)) generic_make_request(c); else - __split_and_process_bio(md, c); - - down_read(&md->io_lock); + __split_and_process_bio(md, map, c); } - up_read(&md->io_lock); + dm_put_live_table(md, srcu_idx); } static void dm_queue_flush(struct mapped_device *md) @@ -2450,10 +2489,10 @@ struct dm_table *dm_swap_table(struct mapped_device *md, struct dm_table *table) * reappear. */ if (dm_table_has_no_data_devices(table)) { - live_map = dm_get_live_table(md); + live_map = dm_get_live_table_fast(md); if (live_map) limits = md->queue->limits; - dm_table_put(live_map); + dm_put_live_table_fast(md); } if (!live_map) { @@ -2533,7 +2572,7 @@ int dm_suspend(struct mapped_device *md, unsigned suspend_flags) goto out_unlock; } - map = dm_get_live_table(md); + map = md->map; /* * DMF_NOFLUSH_SUSPENDING must be set before presuspend. @@ -2554,7 +2593,7 @@ int dm_suspend(struct mapped_device *md, unsigned suspend_flags) if (!noflush && do_lockfs) { r = lock_fs(md); if (r) - goto out; + goto out_unlock; } /* @@ -2569,9 +2608,8 @@ int dm_suspend(struct mapped_device *md, unsigned suspend_flags) * (dm_wq_work), we set BMF_BLOCK_IO_FOR_SUSPEND and call * flush_workqueue(md->wq). */ - down_write(&md->io_lock); set_bit(DMF_BLOCK_IO_FOR_SUSPEND, &md->flags); - up_write(&md->io_lock); + synchronize_srcu(&md->io_barrier); /* * Stop md->queue before flushing md->wq in case request-based @@ -2589,10 +2627,9 @@ int dm_suspend(struct mapped_device *md, unsigned suspend_flags) */ r = dm_wait_for_completion(md, TASK_INTERRUPTIBLE); - down_write(&md->io_lock); if (noflush) clear_bit(DMF_NOFLUSH_SUSPENDING, &md->flags); - up_write(&md->io_lock); + synchronize_srcu(&md->io_barrier); /* were we interrupted ? */ if (r < 0) { @@ -2602,7 +2639,7 @@ int dm_suspend(struct mapped_device *md, unsigned suspend_flags) start_queue(md->queue); unlock_fs(md); - goto out; /* pushback list is already flushed, so skip flush */ + goto out_unlock; /* pushback list is already flushed, so skip flush */ } /* @@ -2615,9 +2652,6 @@ int dm_suspend(struct mapped_device *md, unsigned suspend_flags) dm_table_postsuspend_targets(map); -out: - dm_table_put(map); - out_unlock: mutex_unlock(&md->suspend_lock); return r; @@ -2632,7 +2666,7 @@ int dm_resume(struct mapped_device *md) if (!dm_suspended_md(md)) goto out; - map = dm_get_live_table(md); + map = md->map; if (!map || !dm_table_get_size(map)) goto out; @@ -2656,7 +2690,6 @@ int dm_resume(struct mapped_device *md) r = 0; out: - dm_table_put(map); mutex_unlock(&md->suspend_lock); return r; diff --git a/drivers/remoteproc/remoteproc_core.c b/drivers/remoteproc/remoteproc_core.c index 022dc63..3cd85a6 100644 --- a/drivers/remoteproc/remoteproc_core.c +++ b/drivers/remoteproc/remoteproc_core.c @@ -762,13 +762,6 @@ static void rproc_resource_cleanup(struct rproc *rproc) kfree(entry); } - /* clean up carveout allocations */ - list_for_each_entry_safe(entry, tmp, &rproc->carveouts, node) { - dma_free_coherent(dev->parent, entry->len, entry->va, entry->dma); - list_del(&entry->node); - kfree(entry); - } - /* clean up iommu mapping entries */ list_for_each_entry_safe(entry, tmp, &rproc->mappings, node) { size_t unmapped; @@ -783,6 +776,13 @@ static void rproc_resource_cleanup(struct rproc *rproc) list_del(&entry->node); kfree(entry); } + + /* clean up carveout allocations */ + list_for_each_entry_safe(entry, tmp, &rproc->carveouts, node) { + dma_free_coherent(dev->parent, entry->len, entry->va, entry->dma); + list_del(&entry->node); + kfree(entry); + } } /* @@ -815,18 +815,17 @@ static int rproc_fw_boot(struct rproc *rproc, const struct firmware *fw) } rproc->bootaddr = rproc_get_boot_addr(rproc, fw); + ret = -EINVAL; /* look for the resource table */ table = rproc_find_rsc_table(rproc, fw, &tablesz); if (!table) { - ret = -EINVAL; goto clean_up; } /* Verify that resource table in loaded fw is unchanged */ if (rproc->table_csum != crc32(0, table, tablesz)) { dev_err(dev, "resource checksum failed, fw changed?\n"); - ret = -EINVAL; goto clean_up; } @@ -852,8 +851,10 @@ static int rproc_fw_boot(struct rproc *rproc, const struct firmware *fw) * copy this information to device memory. */ loaded_table = rproc_find_loaded_rsc_table(rproc, fw); - if (!loaded_table) + if (!loaded_table) { + ret = -EINVAL; goto clean_up; + } memcpy(loaded_table, rproc->cached_table, tablesz); @@ -913,11 +914,10 @@ static void rproc_fw_config_virtio(const struct firmware *fw, void *context) * will be stored in the cached_table. Before the device is started, * cached_table will be copied into devic memory. */ - rproc->cached_table = kmalloc(tablesz, GFP_KERNEL); + rproc->cached_table = kmemdup(table, tablesz, GFP_KERNEL); if (!rproc->cached_table) goto out; - memcpy(rproc->cached_table, table, tablesz); rproc->table_ptr = rproc->cached_table; /* count the number of notify-ids */ diff --git a/drivers/remoteproc/remoteproc_debugfs.c b/drivers/remoteproc/remoteproc_debugfs.c index 157a573..9d30809 100644 --- a/drivers/remoteproc/remoteproc_debugfs.c +++ b/drivers/remoteproc/remoteproc_debugfs.c @@ -248,6 +248,5 @@ void __init rproc_init_debugfs(void) void __exit rproc_exit_debugfs(void) { - if (rproc_dbg) - debugfs_remove(rproc_dbg); + debugfs_remove(rproc_dbg); } diff --git a/drivers/remoteproc/remoteproc_internal.h b/drivers/remoteproc/remoteproc_internal.h index 157e762..70701a5 100644 --- a/drivers/remoteproc/remoteproc_internal.h +++ b/drivers/remoteproc/remoteproc_internal.h @@ -107,12 +107,12 @@ struct resource_table *rproc_find_rsc_table(struct rproc *rproc, static inline struct resource_table *rproc_find_loaded_rsc_table(struct rproc *rproc, - const struct firmware *fw) + const struct firmware *fw) { if (rproc->fw_ops->find_loaded_rsc_table) return rproc->fw_ops->find_loaded_rsc_table(rproc, fw); - return NULL; + return NULL; } extern const struct rproc_fw_ops rproc_elf_fw_ops; diff --git a/drivers/scsi/qla2xxx/qla_target.c b/drivers/scsi/qla2xxx/qla_target.c index fcdc223..83a8f7a 100644 --- a/drivers/scsi/qla2xxx/qla_target.c +++ b/drivers/scsi/qla2xxx/qla_target.c @@ -544,102 +544,6 @@ out_free_id_list: return res; } -static bool qlt_check_fcport_exist(struct scsi_qla_host *vha, - struct qla_tgt_sess *sess) -{ - struct qla_hw_data *ha = vha->hw; - struct qla_port_24xx_data *pmap24; - bool res, found = false; - int rc, i; - uint16_t loop_id = 0xFFFF; /* to eliminate compiler's warning */ - uint16_t entries; - void *pmap; - int pmap_len; - fc_port_t *fcport; - int global_resets; - unsigned long flags; - -retry: - global_resets = atomic_read(&ha->tgt.qla_tgt->tgt_global_resets_count); - - rc = qla2x00_get_node_name_list(vha, &pmap, &pmap_len); - if (rc != QLA_SUCCESS) { - res = false; - goto out; - } - - pmap24 = pmap; - entries = pmap_len/sizeof(*pmap24); - - for (i = 0; i < entries; ++i) { - if (!memcmp(sess->port_name, pmap24[i].port_name, WWN_SIZE)) { - loop_id = le16_to_cpu(pmap24[i].loop_id); - found = true; - break; - } - } - - kfree(pmap); - - if (!found) { - res = false; - goto out; - } - - ql_dbg(ql_dbg_tgt_mgt, vha, 0xf046, - "qlt_check_fcport_exist(): loop_id %d", loop_id); - - fcport = kzalloc(sizeof(*fcport), GFP_KERNEL); - if (fcport == NULL) { - ql_dbg(ql_dbg_tgt_mgt, vha, 0xf047, - "qla_target(%d): Allocation of tmp FC port failed", - vha->vp_idx); - res = false; - goto out; - } - - fcport->loop_id = loop_id; - - rc = qla2x00_get_port_database(vha, fcport, 0); - if (rc != QLA_SUCCESS) { - ql_dbg(ql_dbg_tgt_mgt, vha, 0xf048, - "qla_target(%d): Failed to retrieve fcport " - "information -- get_port_database() returned %x " - "(loop_id=0x%04x)", vha->vp_idx, rc, loop_id); - res = false; - goto out_free_fcport; - } - - if (global_resets != - atomic_read(&ha->tgt.qla_tgt->tgt_global_resets_count)) { - ql_dbg(ql_dbg_tgt_mgt, vha, 0xf002, - "qla_target(%d): global reset during session discovery" - " (counter was %d, new %d), retrying", - vha->vp_idx, global_resets, - atomic_read(&ha->tgt.qla_tgt->tgt_global_resets_count)); - goto retry; - } - - ql_dbg(ql_dbg_tgt_mgt, vha, 0xf003, - "Updating sess %p s_id %x:%x:%x, loop_id %d) to d_id %x:%x:%x, " - "loop_id %d", sess, sess->s_id.b.domain, sess->s_id.b.al_pa, - sess->s_id.b.area, sess->loop_id, fcport->d_id.b.domain, - fcport->d_id.b.al_pa, fcport->d_id.b.area, fcport->loop_id); - - spin_lock_irqsave(&ha->hardware_lock, flags); - ha->tgt.tgt_ops->update_sess(sess, fcport->d_id, fcport->loop_id, - (fcport->flags & FCF_CONF_COMP_SUPPORTED)); - spin_unlock_irqrestore(&ha->hardware_lock, flags); - - res = true; - -out_free_fcport: - kfree(fcport); - -out: - return res; -} - /* ha->hardware_lock supposed to be held on entry */ static void qlt_undelete_sess(struct qla_tgt_sess *sess) { @@ -663,43 +567,13 @@ static void qlt_del_sess_work_fn(struct delayed_work *work) sess = list_entry(tgt->del_sess_list.next, typeof(*sess), del_list_entry); if (time_after_eq(jiffies, sess->expires)) { - bool cancel; - qlt_undelete_sess(sess); - spin_unlock_irqrestore(&ha->hardware_lock, flags); - cancel = qlt_check_fcport_exist(vha, sess); - - if (cancel) { - if (sess->deleted) { - /* - * sess was again deleted while we were - * discovering it - */ - spin_lock_irqsave(&ha->hardware_lock, - flags); - continue; - } - - ql_dbg(ql_dbg_tgt_mgt, vha, 0xf049, - "qla_target(%d): cancel deletion of " - "session for port %02x:%02x:%02x:%02x:%02x:" - "%02x:%02x:%02x (loop ID %d), because " - " it isn't deleted by firmware", - vha->vp_idx, sess->port_name[0], - sess->port_name[1], sess->port_name[2], - sess->port_name[3], sess->port_name[4], - sess->port_name[5], sess->port_name[6], - sess->port_name[7], sess->loop_id); - } else { - ql_dbg(ql_dbg_tgt_mgt, vha, 0xf004, - "Timeout: sess %p about to be deleted\n", - sess); - ha->tgt.tgt_ops->shutdown_sess(sess); - ha->tgt.tgt_ops->put_sess(sess); - } - - spin_lock_irqsave(&ha->hardware_lock, flags); + ql_dbg(ql_dbg_tgt_mgt, vha, 0xf004, + "Timeout: sess %p about to be deleted\n", + sess); + ha->tgt.tgt_ops->shutdown_sess(sess); + ha->tgt.tgt_ops->put_sess(sess); } else { schedule_delayed_work(&tgt->sess_del_work, jiffies - sess->expires); @@ -884,9 +758,8 @@ void qlt_fc_port_added(struct scsi_qla_host *vha, fc_port_t *fcport) sess->loop_id); sess->local = 0; } - spin_unlock_irqrestore(&ha->hardware_lock, flags); - ha->tgt.tgt_ops->put_sess(sess); + spin_unlock_irqrestore(&ha->hardware_lock, flags); } void qlt_fc_port_deleted(struct scsi_qla_host *vha, fc_port_t *fcport) @@ -2706,7 +2579,9 @@ static void qlt_do_work(struct work_struct *work) /* * Drop extra session reference from qla_tgt_handle_cmd_for_atio*( */ + spin_lock_irqsave(&ha->hardware_lock, flags); ha->tgt.tgt_ops->put_sess(sess); + spin_unlock_irqrestore(&ha->hardware_lock, flags); return; out_term: @@ -2718,9 +2593,9 @@ out_term: spin_lock_irqsave(&ha->hardware_lock, flags); qlt_send_term_exchange(vha, NULL, &cmd->atio, 1); kmem_cache_free(qla_tgt_cmd_cachep, cmd); - spin_unlock_irqrestore(&ha->hardware_lock, flags); if (sess) ha->tgt.tgt_ops->put_sess(sess); + spin_unlock_irqrestore(&ha->hardware_lock, flags); } /* ha->hardware_lock supposed to be held on entry */ @@ -4169,16 +4044,16 @@ static void qlt_abort_work(struct qla_tgt *tgt, rc = __qlt_24xx_handle_abts(vha, &prm->abts, sess); if (rc != 0) goto out_term; - spin_unlock_irqrestore(&ha->hardware_lock, flags); ha->tgt.tgt_ops->put_sess(sess); + spin_unlock_irqrestore(&ha->hardware_lock, flags); return; out_term: qlt_24xx_send_abts_resp(vha, &prm->abts, FCP_TMF_REJECTED, false); - spin_unlock_irqrestore(&ha->hardware_lock, flags); if (sess) ha->tgt.tgt_ops->put_sess(sess); + spin_unlock_irqrestore(&ha->hardware_lock, flags); } static void qlt_tmr_work(struct qla_tgt *tgt, @@ -4226,16 +4101,16 @@ static void qlt_tmr_work(struct qla_tgt *tgt, rc = qlt_issue_task_mgmt(sess, unpacked_lun, fn, iocb, 0); if (rc != 0) goto out_term; - spin_unlock_irqrestore(&ha->hardware_lock, flags); ha->tgt.tgt_ops->put_sess(sess); + spin_unlock_irqrestore(&ha->hardware_lock, flags); return; out_term: qlt_send_term_exchange(vha, NULL, &prm->tm_iocb2, 1); - spin_unlock_irqrestore(&ha->hardware_lock, flags); if (sess) ha->tgt.tgt_ops->put_sess(sess); + spin_unlock_irqrestore(&ha->hardware_lock, flags); } static void qlt_sess_work_fn(struct work_struct *work) diff --git a/drivers/scsi/qla2xxx/tcm_qla2xxx.c b/drivers/scsi/qla2xxx/tcm_qla2xxx.c index 66b0b26..a318092 100644 --- a/drivers/scsi/qla2xxx/tcm_qla2xxx.c +++ b/drivers/scsi/qla2xxx/tcm_qla2xxx.c @@ -703,7 +703,7 @@ static int tcm_qla2xxx_queue_status(struct se_cmd *se_cmd) return qlt_xmit_response(cmd, xmit_type, se_cmd->scsi_status); } -static int tcm_qla2xxx_queue_tm_rsp(struct se_cmd *se_cmd) +static void tcm_qla2xxx_queue_tm_rsp(struct se_cmd *se_cmd) { struct se_tmr_req *se_tmr = se_cmd->se_tmr_req; struct qla_tgt_mgmt_cmd *mcmd = container_of(se_cmd, @@ -735,8 +735,6 @@ static int tcm_qla2xxx_queue_tm_rsp(struct se_cmd *se_cmd) * CTIO response packet. */ qlt_xmit_tm_rsp(mcmd); - - return 0; } /* Local pointer to allocated TCM configfs fabric module */ @@ -799,12 +797,14 @@ static void tcm_qla2xxx_put_session(struct se_session *se_sess) static void tcm_qla2xxx_put_sess(struct qla_tgt_sess *sess) { - tcm_qla2xxx_put_session(sess->se_sess); + assert_spin_locked(&sess->vha->hw->hardware_lock); + kref_put(&sess->se_sess->sess_kref, tcm_qla2xxx_release_session); } static void tcm_qla2xxx_shutdown_sess(struct qla_tgt_sess *sess) { - tcm_qla2xxx_shutdown_session(sess->se_sess); + assert_spin_locked(&sess->vha->hw->hardware_lock); + target_sess_cmd_list_set_waiting(sess->se_sess); } static struct se_node_acl *tcm_qla2xxx_make_nodeacl( diff --git a/drivers/staging/Kconfig b/drivers/staging/Kconfig index f64b662..3227ebe 100644 --- a/drivers/staging/Kconfig +++ b/drivers/staging/Kconfig @@ -120,8 +120,6 @@ source "drivers/staging/gdm72xx/Kconfig" source "drivers/staging/csr/Kconfig" -source "drivers/staging/ti-soc-thermal/Kconfig" - source "drivers/staging/silicom/Kconfig" source "drivers/staging/ced1401/Kconfig" diff --git a/drivers/staging/Makefile b/drivers/staging/Makefile index 1fb58a1..4d79ebe 100644 --- a/drivers/staging/Makefile +++ b/drivers/staging/Makefile @@ -53,7 +53,6 @@ obj-$(CONFIG_ANDROID) += android/ obj-$(CONFIG_USB_WPAN_HCD) += ozwpan/ obj-$(CONFIG_WIMAX_GDM72XX) += gdm72xx/ obj-$(CONFIG_CSR_WIFI) += csr/ -obj-$(CONFIG_TI_SOC_THERMAL) += ti-soc-thermal/ obj-$(CONFIG_NET_VENDOR_SILICOM) += silicom/ obj-$(CONFIG_CED1401) += ced1401/ obj-$(CONFIG_DRM_IMX) += imx-drm/ diff --git a/drivers/staging/ti-soc-thermal/Kconfig b/drivers/staging/ti-soc-thermal/Kconfig deleted file mode 100644 index e81375f..0000000 --- a/drivers/staging/ti-soc-thermal/Kconfig +++ /dev/null @@ -1,48 +0,0 @@ -config TI_SOC_THERMAL - tristate "Texas Instruments SoCs temperature sensor driver" - depends on THERMAL - depends on ARCH_HAS_BANDGAP - help - If you say yes here you get support for the Texas Instruments - OMAP4460+ on die bandgap temperature sensor support. The register - set is part of system control module. - - This includes alert interrupts generation and also the TSHUT - support. - -config TI_THERMAL - bool "Texas Instruments SoCs thermal framework support" - depends on TI_SOC_THERMAL - depends on CPU_THERMAL - help - If you say yes here you want to get support for generic thermal - framework for the Texas Instruments on die bandgap temperature sensor. - - This includes trip points definitions, extrapolation rules and - CPU cooling device bindings. - -config OMAP4_THERMAL - bool "Texas Instruments OMAP4 thermal support" - depends on TI_SOC_THERMAL - depends on ARCH_OMAP4 - help - If you say yes here you get thermal support for the Texas Instruments - OMAP4 SoC family. The current chip supported are: - - OMAP4430 - - OMAP4460 - - OMAP4470 - - This includes alert interrupts generation and also the TSHUT - support. - -config OMAP5_THERMAL - bool "Texas Instruments OMAP5 thermal support" - depends on TI_SOC_THERMAL - depends on SOC_OMAP5 - help - If you say yes here you get thermal support for the Texas Instruments - OMAP5 SoC family. The current chip supported are: - - OMAP5430 - - This includes alert interrupts generation and also the TSHUT - support. diff --git a/drivers/staging/ti-soc-thermal/Makefile b/drivers/staging/ti-soc-thermal/Makefile deleted file mode 100644 index 0ca034f..0000000 --- a/drivers/staging/ti-soc-thermal/Makefile +++ /dev/null @@ -1,5 +0,0 @@ -obj-$(CONFIG_TI_SOC_THERMAL) += ti-soc-thermal.o -ti-soc-thermal-y := ti-bandgap.o -ti-soc-thermal-$(CONFIG_TI_THERMAL) += ti-thermal-common.o -ti-soc-thermal-$(CONFIG_OMAP4_THERMAL) += omap4-thermal-data.o -ti-soc-thermal-$(CONFIG_OMAP5_THERMAL) += omap5-thermal-data.o diff --git a/drivers/staging/ti-soc-thermal/TODO b/drivers/staging/ti-soc-thermal/TODO deleted file mode 100644 index 7da787d..0000000 --- a/drivers/staging/ti-soc-thermal/TODO +++ /dev/null @@ -1,12 +0,0 @@ -List of TODOs (by Eduardo Valentin) - -on ti-bandgap.c: -- Revisit PM support - -on ti-thermal-common.c/ti-thermal.h: -- Revisit need for locking - -generally: -- make sure this code works on OMAP4430, OMAP4460 and OMAP5430 - -Copy patches to Eduardo Valentin diff --git a/drivers/staging/ti-soc-thermal/omap4-thermal-data.c b/drivers/staging/ti-soc-thermal/omap4-thermal-data.c deleted file mode 100644 index d255d33..0000000 --- a/drivers/staging/ti-soc-thermal/omap4-thermal-data.c +++ /dev/null @@ -1,267 +0,0 @@ -/* - * OMAP4 thermal driver. - * - * Copyright (C) 2011-2012 Texas Instruments Inc. - * Contact: - * Eduardo Valentin - * - * This software is licensed under the terms of the GNU General Public - * License version 2, as published by the Free Software Foundation, and - * may be copied, distributed, and modified under those terms. - * - * This program is distributed in the hope that it will be useful, - * but WITHOUT ANY WARRANTY; without even the implied warranty of - * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the - * GNU General Public License for more details. - * - */ - -#include "ti-thermal.h" -#include "ti-bandgap.h" -#include "omap4xxx-bandgap.h" - -/* - * OMAP4430 has one instance of thermal sensor for MPU - * need to describe the individual bit fields - */ -static struct temp_sensor_registers -omap4430_mpu_temp_sensor_registers = { - .temp_sensor_ctrl = OMAP4430_TEMP_SENSOR_CTRL_OFFSET, - .bgap_tempsoff_mask = OMAP4430_BGAP_TEMPSOFF_MASK, - .bgap_soc_mask = OMAP4430_BGAP_TEMP_SENSOR_SOC_MASK, - .bgap_eocz_mask = OMAP4430_BGAP_TEMP_SENSOR_EOCZ_MASK, - .bgap_dtemp_mask = OMAP4430_BGAP_TEMP_SENSOR_DTEMP_MASK, - - .bgap_mode_ctrl = OMAP4430_TEMP_SENSOR_CTRL_OFFSET, - .mode_ctrl_mask = OMAP4430_SINGLE_MODE_MASK, - - .bgap_efuse = OMAP4430_FUSE_OPP_BGAP, -}; - -/* Thresholds and limits for OMAP4430 MPU temperature sensor */ -static struct temp_sensor_data omap4430_mpu_temp_sensor_data = { - .min_freq = OMAP4430_MIN_FREQ, - .max_freq = OMAP4430_MAX_FREQ, - .max_temp = OMAP4430_MAX_TEMP, - .min_temp = OMAP4430_MIN_TEMP, - .hyst_val = OMAP4430_HYST_VAL, -}; - -/* - * Temperature values in milli degree celsius - * ADC code values from 530 to 923 - */ -static const int -omap4430_adc_to_temp[OMAP4430_ADC_END_VALUE - OMAP4430_ADC_START_VALUE + 1] = { - -38000, -35000, -34000, -32000, -30000, -28000, -26000, -24000, -22000, - -20000, -18000, -17000, -15000, -13000, -12000, -10000, -8000, -6000, - -5000, -3000, -1000, 0, 2000, 3000, 5000, 6000, 8000, 10000, 12000, - 13000, 15000, 17000, 19000, 21000, 23000, 25000, 27000, 28000, 30000, - 32000, 33000, 35000, 37000, 38000, 40000, 42000, 43000, 45000, 47000, - 48000, 50000, 52000, 53000, 55000, 57000, 58000, 60000, 62000, 64000, - 66000, 68000, 70000, 71000, 73000, 75000, 77000, 78000, 80000, 82000, - 83000, 85000, 87000, 88000, 90000, 92000, 93000, 95000, 97000, 98000, - 100000, 102000, 103000, 105000, 107000, 109000, 111000, 113000, 115000, - 117000, 118000, 120000, 122000, 123000, -}; - -/* OMAP4430 data */ -const struct ti_bandgap_data omap4430_data = { - .features = TI_BANDGAP_FEATURE_MODE_CONFIG | - TI_BANDGAP_FEATURE_CLK_CTRL | - TI_BANDGAP_FEATURE_POWER_SWITCH, - .fclock_name = "bandgap_fclk", - .div_ck_name = "bandgap_fclk", - .conv_table = omap4430_adc_to_temp, - .adc_start_val = OMAP4430_ADC_START_VALUE, - .adc_end_val = OMAP4430_ADC_END_VALUE, - .expose_sensor = ti_thermal_expose_sensor, - .remove_sensor = ti_thermal_remove_sensor, - .sensors = { - { - .registers = &omap4430_mpu_temp_sensor_registers, - .ts_data = &omap4430_mpu_temp_sensor_data, - .domain = "cpu", - .slope = OMAP_GRADIENT_SLOPE_4430, - .constant = OMAP_GRADIENT_CONST_4430, - .slope_pcb = OMAP_GRADIENT_SLOPE_W_PCB_4430, - .constant_pcb = OMAP_GRADIENT_CONST_W_PCB_4430, - .register_cooling = ti_thermal_register_cpu_cooling, - .unregister_cooling = ti_thermal_unregister_cpu_cooling, - }, - }, - .sensor_count = 1, -}; -/* - * OMAP4460 has one instance of thermal sensor for MPU - * need to describe the individual bit fields - */ -static struct temp_sensor_registers -omap4460_mpu_temp_sensor_registers = { - .temp_sensor_ctrl = OMAP4460_TEMP_SENSOR_CTRL_OFFSET, - .bgap_tempsoff_mask = OMAP4460_BGAP_TEMPSOFF_MASK, - .bgap_soc_mask = OMAP4460_BGAP_TEMP_SENSOR_SOC_MASK, - .bgap_eocz_mask = OMAP4460_BGAP_TEMP_SENSOR_EOCZ_MASK, - .bgap_dtemp_mask = OMAP4460_BGAP_TEMP_SENSOR_DTEMP_MASK, - - .bgap_mask_ctrl = OMAP4460_BGAP_CTRL_OFFSET, - .mask_hot_mask = OMAP4460_MASK_HOT_MASK, - .mask_cold_mask = OMAP4460_MASK_COLD_MASK, - - .bgap_mode_ctrl = OMAP4460_BGAP_CTRL_OFFSET, - .mode_ctrl_mask = OMAP4460_SINGLE_MODE_MASK, - - .bgap_counter = OMAP4460_BGAP_COUNTER_OFFSET, - .counter_mask = OMAP4460_COUNTER_MASK, - - .bgap_threshold = OMAP4460_BGAP_THRESHOLD_OFFSET, - .threshold_thot_mask = OMAP4460_T_HOT_MASK, - .threshold_tcold_mask = OMAP4460_T_COLD_MASK, - - .tshut_threshold = OMAP4460_BGAP_TSHUT_OFFSET, - .tshut_hot_mask = OMAP4460_TSHUT_HOT_MASK, - .tshut_cold_mask = OMAP4460_TSHUT_COLD_MASK, - - .bgap_status = OMAP4460_BGAP_STATUS_OFFSET, - .status_clean_stop_mask = OMAP4460_CLEAN_STOP_MASK, - .status_bgap_alert_mask = OMAP4460_BGAP_ALERT_MASK, - .status_hot_mask = OMAP4460_HOT_FLAG_MASK, - .status_cold_mask = OMAP4460_COLD_FLAG_MASK, - - .bgap_efuse = OMAP4460_FUSE_OPP_BGAP, -}; - -/* Thresholds and limits for OMAP4460 MPU temperature sensor */ -static struct temp_sensor_data omap4460_mpu_temp_sensor_data = { - .tshut_hot = OMAP4460_TSHUT_HOT, - .tshut_cold = OMAP4460_TSHUT_COLD, - .t_hot = OMAP4460_T_HOT, - .t_cold = OMAP4460_T_COLD, - .min_freq = OMAP4460_MIN_FREQ, - .max_freq = OMAP4460_MAX_FREQ, - .max_temp = OMAP4460_MAX_TEMP, - .min_temp = OMAP4460_MIN_TEMP, - .hyst_val = OMAP4460_HYST_VAL, - .update_int1 = 1000, - .update_int2 = 2000, -}; - -/* - * Temperature values in milli degree celsius - * ADC code values from 530 to 923 - */ -static const int -omap4460_adc_to_temp[OMAP4460_ADC_END_VALUE - OMAP4460_ADC_START_VALUE + 1] = { - -40000, -40000, -40000, -40000, -39800, -39400, -39000, -38600, -38200, - -37800, -37300, -36800, -36400, -36000, -35600, -35200, -34800, - -34300, -33800, -33400, -33000, -32600, -32200, -31800, -31300, - -30800, -30400, -30000, -29600, -29200, -28700, -28200, -27800, - -27400, -27000, -26600, -26200, -25700, -25200, -24800, -24400, - -24000, -23600, -23200, -22700, -22200, -21800, -21400, -21000, - -20600, -20200, -19700, -19200, -18800, -18400, -18000, -17600, - -17200, -16700, -16200, -15800, -15400, -15000, -14600, -14200, - -13700, -13200, -12800, -12400, -12000, -11600, -11200, -10700, - -10200, -9800, -9400, -9000, -8600, -8200, -7700, -7200, -6800, - -6400, -6000, -5600, -5200, -4800, -4300, -3800, -3400, -3000, - -2600, -2200, -1800, -1300, -800, -400, 0, 400, 800, 1200, 1600, - 2100, 2600, 3000, 3400, 3800, 4200, 4600, 5100, 5600, 6000, 6400, - 6800, 7200, 7600, 8000, 8500, 9000, 9400, 9800, 10200, 10600, 11000, - 11400, 11900, 12400, 12800, 13200, 13600, 14000, 14400, 14800, - 15300, 15800, 16200, 16600, 17000, 17400, 17800, 18200, 18700, - 19200, 19600, 20000, 20400, 20800, 21200, 21600, 22100, 22600, - 23000, 23400, 23800, 24200, 24600, 25000, 25400, 25900, 26400, - 26800, 27200, 27600, 28000, 28400, 28800, 29300, 29800, 30200, - 30600, 31000, 31400, 31800, 32200, 32600, 33100, 33600, 34000, - 34400, 34800, 35200, 35600, 36000, 36400, 36800, 37300, 37800, - 38200, 38600, 39000, 39400, 39800, 40200, 40600, 41100, 41600, - 42000, 42400, 42800, 43200, 43600, 44000, 44400, 44800, 45300, - 45800, 46200, 46600, 47000, 47400, 47800, 48200, 48600, 49000, - 49500, 50000, 50400, 50800, 51200, 51600, 52000, 52400, 52800, - 53200, 53700, 54200, 54600, 55000, 55400, 55800, 56200, 56600, - 57000, 57400, 57800, 58200, 58700, 59200, 59600, 60000, 60400, - 60800, 61200, 61600, 62000, 62400, 62800, 63300, 63800, 64200, - 64600, 65000, 65400, 65800, 66200, 66600, 67000, 67400, 67800, - 68200, 68700, 69200, 69600, 70000, 70400, 70800, 71200, 71600, - 72000, 72400, 72800, 73200, 73600, 74100, 74600, 75000, 75400, - 75800, 76200, 76600, 77000, 77400, 77800, 78200, 78600, 79000, - 79400, 79800, 80300, 80800, 81200, 81600, 82000, 82400, 82800, - 83200, 83600, 84000, 84400, 84800, 85200, 85600, 86000, 86400, - 86800, 87300, 87800, 88200, 88600, 89000, 89400, 89800, 90200, - 90600, 91000, 91400, 91800, 92200, 92600, 93000, 93400, 93800, - 94200, 94600, 95000, 95500, 96000, 96400, 96800, 97200, 97600, - 98000, 98400, 98800, 99200, 99600, 100000, 100400, 100800, 101200, - 101600, 102000, 102400, 102800, 103200, 103600, 104000, 104400, - 104800, 105200, 105600, 106100, 106600, 107000, 107400, 107800, - 108200, 108600, 109000, 109400, 109800, 110200, 110600, 111000, - 111400, 111800, 112200, 112600, 113000, 113400, 113800, 114200, - 114600, 115000, 115400, 115800, 116200, 116600, 117000, 117400, - 117800, 118200, 118600, 119000, 119400, 119800, 120200, 120600, - 121000, 121400, 121800, 122200, 122600, 123000, 123400, 123800, 124200, - 124600, 124900, 125000, 125000, 125000, 125000 -}; - -/* OMAP4460 data */ -const struct ti_bandgap_data omap4460_data = { - .features = TI_BANDGAP_FEATURE_TSHUT | - TI_BANDGAP_FEATURE_TSHUT_CONFIG | - TI_BANDGAP_FEATURE_TALERT | - TI_BANDGAP_FEATURE_MODE_CONFIG | - TI_BANDGAP_FEATURE_POWER_SWITCH | - TI_BANDGAP_FEATURE_CLK_CTRL | - TI_BANDGAP_FEATURE_COUNTER, - .fclock_name = "bandgap_ts_fclk", - .div_ck_name = "div_ts_ck", - .conv_table = omap4460_adc_to_temp, - .adc_start_val = OMAP4460_ADC_START_VALUE, - .adc_end_val = OMAP4460_ADC_END_VALUE, - .expose_sensor = ti_thermal_expose_sensor, - .remove_sensor = ti_thermal_remove_sensor, - .report_temperature = ti_thermal_report_sensor_temperature, - .sensors = { - { - .registers = &omap4460_mpu_temp_sensor_registers, - .ts_data = &omap4460_mpu_temp_sensor_data, - .domain = "cpu", - .slope = OMAP_GRADIENT_SLOPE_4460, - .constant = OMAP_GRADIENT_CONST_4460, - .slope_pcb = OMAP_GRADIENT_SLOPE_W_PCB_4460, - .constant_pcb = OMAP_GRADIENT_CONST_W_PCB_4460, - .register_cooling = ti_thermal_register_cpu_cooling, - .unregister_cooling = ti_thermal_unregister_cpu_cooling, - }, - }, - .sensor_count = 1, -}; - -/* OMAP4470 data */ -const struct ti_bandgap_data omap4470_data = { - .features = TI_BANDGAP_FEATURE_TSHUT | - TI_BANDGAP_FEATURE_TSHUT_CONFIG | - TI_BANDGAP_FEATURE_TALERT | - TI_BANDGAP_FEATURE_MODE_CONFIG | - TI_BANDGAP_FEATURE_POWER_SWITCH | - TI_BANDGAP_FEATURE_CLK_CTRL | - TI_BANDGAP_FEATURE_COUNTER, - .fclock_name = "bandgap_ts_fclk", - .div_ck_name = "div_ts_ck", - .conv_table = omap4460_adc_to_temp, - .adc_start_val = OMAP4460_ADC_START_VALUE, - .adc_end_val = OMAP4460_ADC_END_VALUE, - .expose_sensor = ti_thermal_expose_sensor, - .remove_sensor = ti_thermal_remove_sensor, - .report_temperature = ti_thermal_report_sensor_temperature, - .sensors = { - { - .registers = &omap4460_mpu_temp_sensor_registers, - .ts_data = &omap4460_mpu_temp_sensor_data, - .domain = "cpu", - .slope = OMAP_GRADIENT_SLOPE_4470, - .constant = OMAP_GRADIENT_CONST_4470, - .slope_pcb = OMAP_GRADIENT_SLOPE_W_PCB_4470, - .constant_pcb = OMAP_GRADIENT_CONST_W_PCB_4470, - .register_cooling = ti_thermal_register_cpu_cooling, - .unregister_cooling = ti_thermal_unregister_cpu_cooling, - }, - }, - .sensor_count = 1, -}; diff --git a/drivers/staging/ti-soc-thermal/omap4xxx-bandgap.h b/drivers/staging/ti-soc-thermal/omap4xxx-bandgap.h deleted file mode 100644 index 6f2de3a..0000000 --- a/drivers/staging/ti-soc-thermal/omap4xxx-bandgap.h +++ /dev/null @@ -1,175 +0,0 @@ -/* - * OMAP4xxx bandgap registers, bitfields and temperature definitions - * - * Copyright (C) 2013 Texas Instruments Incorporated - http://www.ti.com/ - * Contact: - * Eduardo Valentin - * - * This program is free software; you can redistribute it and/or - * modify it under the terms of the GNU General Public License - * version 2 as published by the Free Software Foundation. - * - * This program is distributed in the hope that it will be useful, but - * WITHOUT ANY WARRANTY; without even the implied warranty of - * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU - * General Public License for more details. - * - * You should have received a copy of the GNU General Public License - * along with this program; if not, write to the Free Software - * Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA - * 02110-1301 USA - * - */ -#ifndef __OMAP4XXX_BANDGAP_H -#define __OMAP4XXX_BANDGAP_H - -/** - * *** OMAP4430 *** - * - * Below, in sequence, are the Register definitions, - * the bitfields and the temperature definitions for OMAP4430. - */ - -/** - * OMAP4430 register definitions - * - * Registers are defined as offsets. The offsets are - * relative to FUSE_OPP_BGAP on 4430. - */ - -/* OMAP4430.FUSE_OPP_BGAP */ -#define OMAP4430_FUSE_OPP_BGAP 0x0 - -/* OMAP4430.TEMP_SENSOR */ -#define OMAP4430_TEMP_SENSOR_CTRL_OFFSET 0xCC - -/** - * Register and bit definitions for OMAP4430 - * - * All the macros bellow define the required bits for - * controlling temperature on OMAP4430. Bit defines are - * grouped by register. - */ - -/* OMAP4430.TEMP_SENSOR bits */ -#define OMAP4430_BGAP_TEMPSOFF_MASK BIT(12) -#define OMAP4430_BGAP_TSHUT_MASK BIT(11) -#define OMAP4430_SINGLE_MODE_MASK BIT(10) -#define OMAP4430_BGAP_TEMP_SENSOR_SOC_MASK BIT(9) -#define OMAP4430_BGAP_TEMP_SENSOR_EOCZ_MASK BIT(8) -#define OMAP4430_BGAP_TEMP_SENSOR_DTEMP_MASK (0xff << 0) - -/** - * Temperature limits and thresholds for OMAP4430 - * - * All the macros bellow are definitions for handling the - * ADC conversions and representation of temperature limits - * and thresholds for OMAP4430. - */ - -/* ADC conversion table limits */ -#define OMAP4430_ADC_START_VALUE 0 -#define OMAP4430_ADC_END_VALUE 127 -/* bandgap clock limits (no control on 4430) */ -#define OMAP4430_MAX_FREQ 32768 -#define OMAP4430_MIN_FREQ 32768 -/* sensor limits */ -#define OMAP4430_MIN_TEMP -40000 -#define OMAP4430_MAX_TEMP 125000 -#define OMAP4430_HYST_VAL 5000 - -/** - * *** OMAP4460 *** Applicable for OMAP4470 - * - * Below, in sequence, are the Register definitions, - * the bitfields and the temperature definitions for OMAP4460. - */ - -/** - * OMAP4460 register definitions - * - * Registers are defined as offsets. The offsets are - * relative to FUSE_OPP_BGAP on 4460. - */ - -/* OMAP4460.FUSE_OPP_BGAP */ -#define OMAP4460_FUSE_OPP_BGAP 0x0 - -/* OMAP4460.TEMP_SENSOR */ -#define OMAP4460_TEMP_SENSOR_CTRL_OFFSET 0xCC - -/* OMAP4460.BANDGAP_CTRL */ -#define OMAP4460_BGAP_CTRL_OFFSET 0x118 - -/* OMAP4460.BANDGAP_COUNTER */ -#define OMAP4460_BGAP_COUNTER_OFFSET 0x11C - -/* OMAP4460.BANDGAP_THRESHOLD */ -#define OMAP4460_BGAP_THRESHOLD_OFFSET 0x120 - -/* OMAP4460.TSHUT_THRESHOLD */ -#define OMAP4460_BGAP_TSHUT_OFFSET 0x124 - -/* OMAP4460.BANDGAP_STATUS */ -#define OMAP4460_BGAP_STATUS_OFFSET 0x128 - -/** - * Register bitfields for OMAP4460 - * - * All the macros bellow define the required bits for - * controlling temperature on OMAP4460. Bit defines are - * grouped by register. - */ -/* OMAP4460.TEMP_SENSOR bits */ -#define OMAP4460_BGAP_TEMPSOFF_MASK BIT(13) -#define OMAP4460_BGAP_TEMP_SENSOR_SOC_MASK BIT(11) -#define OMAP4460_BGAP_TEMP_SENSOR_EOCZ_MASK BIT(10) -#define OMAP4460_BGAP_TEMP_SENSOR_DTEMP_MASK (0x3ff << 0) - -/* OMAP4460.BANDGAP_CTRL bits */ -#define OMAP4460_SINGLE_MODE_MASK BIT(31) -#define OMAP4460_MASK_HOT_MASK BIT(1) -#define OMAP4460_MASK_COLD_MASK BIT(0) - -/* OMAP4460.BANDGAP_COUNTER bits */ -#define OMAP4460_COUNTER_MASK (0xffffff << 0) - -/* OMAP4460.BANDGAP_THRESHOLD bits */ -#define OMAP4460_T_HOT_MASK (0x3ff << 16) -#define OMAP4460_T_COLD_MASK (0x3ff << 0) - -/* OMAP4460.TSHUT_THRESHOLD bits */ -#define OMAP4460_TSHUT_HOT_MASK (0x3ff << 16) -#define OMAP4460_TSHUT_COLD_MASK (0x3ff << 0) - -/* OMAP4460.BANDGAP_STATUS bits */ -#define OMAP4460_CLEAN_STOP_MASK BIT(3) -#define OMAP4460_BGAP_ALERT_MASK BIT(2) -#define OMAP4460_HOT_FLAG_MASK BIT(1) -#define OMAP4460_COLD_FLAG_MASK BIT(0) - -/** - * Temperature limits and thresholds for OMAP4460 - * - * All the macros bellow are definitions for handling the - * ADC conversions and representation of temperature limits - * and thresholds for OMAP4460. - */ - -/* ADC conversion table limits */ -#define OMAP4460_ADC_START_VALUE 530 -#define OMAP4460_ADC_END_VALUE 932 -/* bandgap clock limits */ -#define OMAP4460_MAX_FREQ 1500000 -#define OMAP4460_MIN_FREQ 1000000 -/* sensor limits */ -#define OMAP4460_MIN_TEMP -40000 -#define OMAP4460_MAX_TEMP 123000 -#define OMAP4460_HYST_VAL 5000 -/* interrupts thresholds */ -#define OMAP4460_TSHUT_HOT 900 /* 122 deg C */ -#define OMAP4460_TSHUT_COLD 895 /* 100 deg C */ -#define OMAP4460_T_HOT 800 /* 73 deg C */ -#define OMAP4460_T_COLD 795 /* 71 deg C */ - -#endif /* __OMAP4XXX_BANDGAP_H */ diff --git a/drivers/staging/ti-soc-thermal/omap5-thermal-data.c b/drivers/staging/ti-soc-thermal/omap5-thermal-data.c deleted file mode 100644 index eff0c80..0000000 --- a/drivers/staging/ti-soc-thermal/omap5-thermal-data.c +++ /dev/null @@ -1,359 +0,0 @@ -/* - * OMAP5 thermal driver. - * - * Copyright (C) 2011-2012 Texas Instruments Inc. - * Contact: - * Eduardo Valentin - * - * This software is licensed under the terms of the GNU General Public - * License version 2, as published by the Free Software Foundation, and - * may be copied, distributed, and modified under those terms. - * - * This program is distributed in the hope that it will be useful, - * but WITHOUT ANY WARRANTY; without even the implied warranty of - * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the - * GNU General Public License for more details. - * - */ - -#include "ti-thermal.h" -#include "ti-bandgap.h" -#include "omap5xxx-bandgap.h" - -/* - * OMAP5430 has three instances of thermal sensor for MPU, GPU & CORE, - * need to describe the individual registers and bit fields. - */ - -/* - * OMAP5430 MPU thermal sensor register offset and bit-fields - */ -static struct temp_sensor_registers -omap5430_mpu_temp_sensor_registers = { - .temp_sensor_ctrl = OMAP5430_TEMP_SENSOR_MPU_OFFSET, - .bgap_tempsoff_mask = OMAP5430_BGAP_TEMPSOFF_MASK, - .bgap_eocz_mask = OMAP5430_BGAP_TEMP_SENSOR_EOCZ_MASK, - .bgap_dtemp_mask = OMAP5430_BGAP_TEMP_SENSOR_DTEMP_MASK, - - .bgap_mask_ctrl = OMAP5430_BGAP_CTRL_OFFSET, - .mask_hot_mask = OMAP5430_MASK_HOT_MPU_MASK, - .mask_cold_mask = OMAP5430_MASK_COLD_MPU_MASK, - .mask_sidlemode_mask = OMAP5430_MASK_SIDLEMODE_MASK, - .mask_counter_delay_mask = OMAP5430_MASK_COUNTER_DELAY_MASK, - .mask_freeze_mask = OMAP5430_MASK_FREEZE_MPU_MASK, - .mask_clear_mask = OMAP5430_MASK_CLEAR_MPU_MASK, - .mask_clear_accum_mask = OMAP5430_MASK_CLEAR_ACCUM_MPU_MASK, - - - .bgap_counter = OMAP5430_BGAP_CTRL_OFFSET, - .counter_mask = OMAP5430_COUNTER_MASK, - - .bgap_threshold = OMAP5430_BGAP_THRESHOLD_MPU_OFFSET, - .threshold_thot_mask = OMAP5430_T_HOT_MASK, - .threshold_tcold_mask = OMAP5430_T_COLD_MASK, - - .tshut_threshold = OMAP5430_BGAP_TSHUT_MPU_OFFSET, - .tshut_hot_mask = OMAP5430_TSHUT_HOT_MASK, - .tshut_cold_mask = OMAP5430_TSHUT_COLD_MASK, - - .bgap_status = OMAP5430_BGAP_STATUS_OFFSET, - .status_clean_stop_mask = 0x0, - .status_bgap_alert_mask = OMAP5430_BGAP_ALERT_MASK, - .status_hot_mask = OMAP5430_HOT_MPU_FLAG_MASK, - .status_cold_mask = OMAP5430_COLD_MPU_FLAG_MASK, - - .bgap_cumul_dtemp = OMAP5430_BGAP_CUMUL_DTEMP_MPU_OFFSET, - .ctrl_dtemp_0 = OMAP5430_BGAP_DTEMP_MPU_0_OFFSET, - .ctrl_dtemp_1 = OMAP5430_BGAP_DTEMP_MPU_1_OFFSET, - .ctrl_dtemp_2 = OMAP5430_BGAP_DTEMP_MPU_2_OFFSET, - .ctrl_dtemp_3 = OMAP5430_BGAP_DTEMP_MPU_3_OFFSET, - .ctrl_dtemp_4 = OMAP5430_BGAP_DTEMP_MPU_4_OFFSET, - .bgap_efuse = OMAP5430_FUSE_OPP_BGAP_MPU, -}; - -/* - * OMAP5430 GPU thermal sensor register offset and bit-fields - */ -static struct temp_sensor_registers -omap5430_gpu_temp_sensor_registers = { - .temp_sensor_ctrl = OMAP5430_TEMP_SENSOR_GPU_OFFSET, - .bgap_tempsoff_mask = OMAP5430_BGAP_TEMPSOFF_MASK, - .bgap_eocz_mask = OMAP5430_BGAP_TEMP_SENSOR_EOCZ_MASK, - .bgap_dtemp_mask = OMAP5430_BGAP_TEMP_SENSOR_DTEMP_MASK, - - .bgap_mask_ctrl = OMAP5430_BGAP_CTRL_OFFSET, - .mask_hot_mask = OMAP5430_MASK_HOT_GPU_MASK, - .mask_cold_mask = OMAP5430_MASK_COLD_GPU_MASK, - .mask_sidlemode_mask = OMAP5430_MASK_SIDLEMODE_MASK, - .mask_counter_delay_mask = OMAP5430_MASK_COUNTER_DELAY_MASK, - .mask_freeze_mask = OMAP5430_MASK_FREEZE_GPU_MASK, - .mask_clear_mask = OMAP5430_MASK_CLEAR_GPU_MASK, - .mask_clear_accum_mask = OMAP5430_MASK_CLEAR_ACCUM_GPU_MASK, - - .bgap_counter = OMAP5430_BGAP_CTRL_OFFSET, - .counter_mask = OMAP5430_COUNTER_MASK, - - .bgap_threshold = OMAP5430_BGAP_THRESHOLD_GPU_OFFSET, - .threshold_thot_mask = OMAP5430_T_HOT_MASK, - .threshold_tcold_mask = OMAP5430_T_COLD_MASK, - - .tshut_threshold = OMAP5430_BGAP_TSHUT_GPU_OFFSET, - .tshut_hot_mask = OMAP5430_TSHUT_HOT_MASK, - .tshut_cold_mask = OMAP5430_TSHUT_COLD_MASK, - - .bgap_status = OMAP5430_BGAP_STATUS_OFFSET, - .status_clean_stop_mask = 0x0, - .status_bgap_alert_mask = OMAP5430_BGAP_ALERT_MASK, - .status_hot_mask = OMAP5430_HOT_GPU_FLAG_MASK, - .status_cold_mask = OMAP5430_COLD_GPU_FLAG_MASK, - - .bgap_cumul_dtemp = OMAP5430_BGAP_CUMUL_DTEMP_GPU_OFFSET, - .ctrl_dtemp_0 = OMAP5430_BGAP_DTEMP_GPU_0_OFFSET, - .ctrl_dtemp_1 = OMAP5430_BGAP_DTEMP_GPU_1_OFFSET, - .ctrl_dtemp_2 = OMAP5430_BGAP_DTEMP_GPU_2_OFFSET, - .ctrl_dtemp_3 = OMAP5430_BGAP_DTEMP_GPU_3_OFFSET, - .ctrl_dtemp_4 = OMAP5430_BGAP_DTEMP_GPU_4_OFFSET, - - .bgap_efuse = OMAP5430_FUSE_OPP_BGAP_GPU, -}; - -/* - * OMAP5430 CORE thermal sensor register offset and bit-fields - */ -static struct temp_sensor_registers -omap5430_core_temp_sensor_registers = { - .temp_sensor_ctrl = OMAP5430_TEMP_SENSOR_CORE_OFFSET, - .bgap_tempsoff_mask = OMAP5430_BGAP_TEMPSOFF_MASK, - .bgap_eocz_mask = OMAP5430_BGAP_TEMP_SENSOR_EOCZ_MASK, - .bgap_dtemp_mask = OMAP5430_BGAP_TEMP_SENSOR_DTEMP_MASK, - - .bgap_mask_ctrl = OMAP5430_BGAP_CTRL_OFFSET, - .mask_hot_mask = OMAP5430_MASK_HOT_CORE_MASK, - .mask_cold_mask = OMAP5430_MASK_COLD_CORE_MASK, - .mask_sidlemode_mask = OMAP5430_MASK_SIDLEMODE_MASK, - .mask_counter_delay_mask = OMAP5430_MASK_COUNTER_DELAY_MASK, - .mask_freeze_mask = OMAP5430_MASK_FREEZE_CORE_MASK, - .mask_clear_mask = OMAP5430_MASK_CLEAR_CORE_MASK, - .mask_clear_accum_mask = OMAP5430_MASK_CLEAR_ACCUM_CORE_MASK, - - .bgap_counter = OMAP5430_BGAP_CTRL_OFFSET, - .counter_mask = OMAP5430_COUNTER_MASK, - - .bgap_threshold = OMAP5430_BGAP_THRESHOLD_CORE_OFFSET, - .threshold_thot_mask = OMAP5430_T_HOT_MASK, - .threshold_tcold_mask = OMAP5430_T_COLD_MASK, - - .tshut_threshold = OMAP5430_BGAP_TSHUT_CORE_OFFSET, - .tshut_hot_mask = OMAP5430_TSHUT_HOT_MASK, - .tshut_cold_mask = OMAP5430_TSHUT_COLD_MASK, - - .bgap_status = OMAP5430_BGAP_STATUS_OFFSET, - .status_clean_stop_mask = 0x0, - .status_bgap_alert_mask = OMAP5430_BGAP_ALERT_MASK, - .status_hot_mask = OMAP5430_HOT_CORE_FLAG_MASK, - .status_cold_mask = OMAP5430_COLD_CORE_FLAG_MASK, - - .bgap_cumul_dtemp = OMAP5430_BGAP_CUMUL_DTEMP_CORE_OFFSET, - .ctrl_dtemp_0 = OMAP5430_BGAP_DTEMP_CORE_0_OFFSET, - .ctrl_dtemp_1 = OMAP5430_BGAP_DTEMP_CORE_1_OFFSET, - .ctrl_dtemp_2 = OMAP5430_BGAP_DTEMP_CORE_2_OFFSET, - .ctrl_dtemp_3 = OMAP5430_BGAP_DTEMP_CORE_3_OFFSET, - .ctrl_dtemp_4 = OMAP5430_BGAP_DTEMP_CORE_4_OFFSET, - - .bgap_efuse = OMAP5430_FUSE_OPP_BGAP_CORE, -}; - -/* Thresholds and limits for OMAP5430 MPU temperature sensor */ -static struct temp_sensor_data omap5430_mpu_temp_sensor_data = { - .tshut_hot = OMAP5430_MPU_TSHUT_HOT, - .tshut_cold = OMAP5430_MPU_TSHUT_COLD, - .t_hot = OMAP5430_MPU_T_HOT, - .t_cold = OMAP5430_MPU_T_COLD, - .min_freq = OMAP5430_MPU_MIN_FREQ, - .max_freq = OMAP5430_MPU_MAX_FREQ, - .max_temp = OMAP5430_MPU_MAX_TEMP, - .min_temp = OMAP5430_MPU_MIN_TEMP, - .hyst_val = OMAP5430_MPU_HYST_VAL, - .update_int1 = 1000, - .update_int2 = 2000, -}; - -/* Thresholds and limits for OMAP5430 GPU temperature sensor */ -static struct temp_sensor_data omap5430_gpu_temp_sensor_data = { - .tshut_hot = OMAP5430_GPU_TSHUT_HOT, - .tshut_cold = OMAP5430_GPU_TSHUT_COLD, - .t_hot = OMAP5430_GPU_T_HOT, - .t_cold = OMAP5430_GPU_T_COLD, - .min_freq = OMAP5430_GPU_MIN_FREQ, - .max_freq = OMAP5430_GPU_MAX_FREQ, - .max_temp = OMAP5430_GPU_MAX_TEMP, - .min_temp = OMAP5430_GPU_MIN_TEMP, - .hyst_val = OMAP5430_GPU_HYST_VAL, - .update_int1 = 1000, - .update_int2 = 2000, -}; - -/* Thresholds and limits for OMAP5430 CORE temperature sensor */ -static struct temp_sensor_data omap5430_core_temp_sensor_data = { - .tshut_hot = OMAP5430_CORE_TSHUT_HOT, - .tshut_cold = OMAP5430_CORE_TSHUT_COLD, - .t_hot = OMAP5430_CORE_T_HOT, - .t_cold = OMAP5430_CORE_T_COLD, - .min_freq = OMAP5430_CORE_MIN_FREQ, - .max_freq = OMAP5430_CORE_MAX_FREQ, - .max_temp = OMAP5430_CORE_MAX_TEMP, - .min_temp = OMAP5430_CORE_MIN_TEMP, - .hyst_val = OMAP5430_CORE_HYST_VAL, - .update_int1 = 1000, - .update_int2 = 2000, -}; - -/* - * OMAP54xx ES2.0 : Temperature values in milli degree celsius - * ADC code values from 540 to 945 - */ -static int -omap5430_adc_to_temp[ - OMAP5430_ADC_END_VALUE - OMAP5430_ADC_START_VALUE + 1] = { - /* Index 540 - 549 */ - -40000, -40000, -40000, -40000, -39800, -39400, -39000, -38600, -38200, - -37800, - /* Index 550 - 559 */ - -37400, -37000, -36600, -36200, -35800, -35300, -34700, -34200, -33800, - -33400, - /* Index 560 - 569 */ - -33000, -32600, -32200, -31800, -31400, -31000, -30600, -30200, -29800, - -29400, - /* Index 570 - 579 */ - -29000, -28600, -28200, -27700, -27100, -26600, -26200, -25800, -25400, - -25000, - /* Index 580 - 589 */ - -24600, -24200, -23800, -23400, -23000, -22600, -22200, -21600, -21400, - -21000, - /* Index 590 - 599 */ - -20500, -19900, -19400, -19000, -18600, -18200, -17800, -17400, -17000, - -16600, - /* Index 600 - 609 */ - -16200, -15800, -15400, -15000, -14600, -14200, -13800, -13400, -13000, - -12500, - /* Index 610 - 619 */ - -11900, -11400, -11000, -10600, -10200, -9800, -9400, -9000, -8600, - -8200, - /* Index 620 - 629 */ - -7800, -7400, -7000, -6600, -6200, -5800, -5400, -5000, -4500, -3900, - /* Index 630 - 639 */ - -3400, -3000, -2600, -2200, -1800, -1400, -1000, -600, -200, 200, - /* Index 640 - 649 */ - 600, 1000, 1400, 1800, 2200, 2600, 3000, 3400, 3900, 4500, - /* Index 650 - 659 */ - 5000, 5400, 5800, 6200, 6600, 7000, 7400, 7800, 8200, 8600, - /* Index 660 - 669 */ - 9000, 9400, 9800, 10200, 10600, 11000, 11400, 11800, 12200, 12700, - /* Index 670 - 679 */ - 13300, 13800, 14200, 14600, 15000, 15400, 15800, 16200, 16600, 17000, - /* Index 680 - 689 */ - 17400, 17800, 18200, 18600, 19000, 19400, 19800, 20200, 20600, 21100, - /* Index 690 - 699 */ - 21400, 21900, 22500, 23000, 23400, 23800, 24200, 24600, 25000, 25400, - /* Index 700 - 709 */ - 25800, 26200, 26600, 27000, 27400, 27800, 28200, 28600, 29000, 29400, - /* Index 710 - 719 */ - 29800, 30200, 30600, 31000, 31400, 31900, 32500, 33000, 33400, 33800, - /* Index 720 - 729 */ - 34200, 34600, 35000, 35400, 35800, 36200, 36600, 37000, 37400, 37800, - /* Index 730 - 739 */ - 38200, 38600, 39000, 39400, 39800, 40200, 40600, 41000, 41400, 41800, - /* Index 740 - 749 */ - 42200, 42600, 43100, 43700, 44200, 44600, 45000, 45400, 45800, 46200, - /* Index 750 - 759 */ - 46600, 47000, 47400, 47800, 48200, 48600, 49000, 49400, 49800, 50200, - /* Index 760 - 769 */ - 50600, 51000, 51400, 51800, 52200, 52600, 53000, 53400, 53800, 54200, - /* Index 770 - 779 */ - 54600, 55000, 55400, 55900, 56500, 57000, 57400, 57800, 58200, 58600, - /* Index 780 - 789 */ - 59000, 59400, 59800, 60200, 60600, 61000, 61400, 61800, 62200, 62600, - /* Index 790 - 799 */ - 63000, 63400, 63800, 64200, 64600, 65000, 65400, 65800, 66200, 66600, - /* Index 800 - 809 */ - 67000, 67400, 67800, 68200, 68600, 69000, 69400, 69800, 70200, 70600, - /* Index 810 - 819 */ - 71000, 71500, 72100, 72600, 73000, 73400, 73800, 74200, 74600, 75000, - /* Index 820 - 829 */ - 75400, 75800, 76200, 76600, 77000, 77400, 77800, 78200, 78600, 79000, - /* Index 830 - 839 */ - 79400, 79800, 80200, 80600, 81000, 81400, 81800, 82200, 82600, 83000, - /* Index 840 - 849 */ - 83400, 83800, 84200, 84600, 85000, 85400, 85800, 86200, 86600, 87000, - /* Index 850 - 859 */ - 87400, 87800, 88200, 88600, 89000, 89400, 89800, 90200, 90600, 91000, - /* Index 860 - 869 */ - 91400, 91800, 92200, 92600, 93000, 93400, 93800, 94200, 94600, 95000, - /* Index 870 - 879 */ - 95400, 95800, 96200, 96600, 97000, 97500, 98100, 98600, 99000, 99400, - /* Index 880 - 889 */ - 99800, 100200, 100600, 101000, 101400, 101800, 102200, 102600, 103000, - 103400, - /* Index 890 - 899 */ - 103800, 104200, 104600, 105000, 105400, 105800, 106200, 106600, 107000, - 107400, - /* Index 900 - 909 */ - 107800, 108200, 108600, 109000, 109400, 109800, 110200, 110600, 111000, - 111400, - /* Index 910 - 919 */ - 111800, 112200, 112600, 113000, 113400, 113800, 114200, 114600, 115000, - 115400, - /* Index 920 - 929 */ - 115800, 116200, 116600, 117000, 117400, 117800, 118200, 118600, 119000, - 119400, - /* Index 930 - 939 */ - 119800, 120200, 120600, 121000, 121400, 121800, 122400, 122600, 123000, - 123400, - /* Index 940 - 945 */ - 123800, 1242000, 124600, 124900, 125000, 125000, -}; - -/* OMAP54xx ES2.0 data */ -const struct ti_bandgap_data omap5430_data = { - .features = TI_BANDGAP_FEATURE_TSHUT_CONFIG | - TI_BANDGAP_FEATURE_FREEZE_BIT | - TI_BANDGAP_FEATURE_TALERT | - TI_BANDGAP_FEATURE_COUNTER_DELAY | - TI_BANDGAP_FEATURE_HISTORY_BUFFER, - .fclock_name = "l3instr_ts_gclk_div", - .div_ck_name = "l3instr_ts_gclk_div", - .conv_table = omap5430_adc_to_temp, - .adc_start_val = OMAP5430_ADC_START_VALUE, - .adc_end_val = OMAP5430_ADC_END_VALUE, - .expose_sensor = ti_thermal_expose_sensor, - .remove_sensor = ti_thermal_remove_sensor, - .report_temperature = ti_thermal_report_sensor_temperature, - .sensors = { - { - .registers = &omap5430_mpu_temp_sensor_registers, - .ts_data = &omap5430_mpu_temp_sensor_data, - .domain = "cpu", - .register_cooling = ti_thermal_register_cpu_cooling, - .unregister_cooling = ti_thermal_unregister_cpu_cooling, - .slope = OMAP_GRADIENT_SLOPE_5430_CPU, - .constant = OMAP_GRADIENT_CONST_5430_CPU, - .slope_pcb = OMAP_GRADIENT_SLOPE_W_PCB_5430_CPU, - .constant_pcb = OMAP_GRADIENT_CONST_W_PCB_5430_CPU, - }, - { - .registers = &omap5430_gpu_temp_sensor_registers, - .ts_data = &omap5430_gpu_temp_sensor_data, - .domain = "gpu", - .slope = OMAP_GRADIENT_SLOPE_5430_GPU, - .constant = OMAP_GRADIENT_CONST_5430_GPU, - .slope_pcb = OMAP_GRADIENT_SLOPE_W_PCB_5430_GPU, - .constant_pcb = OMAP_GRADIENT_CONST_W_PCB_5430_GPU, - }, - { - .registers = &omap5430_core_temp_sensor_registers, - .ts_data = &omap5430_core_temp_sensor_data, - .domain = "core", - }, - }, - .sensor_count = 3, -}; diff --git a/drivers/staging/ti-soc-thermal/omap5xxx-bandgap.h b/drivers/staging/ti-soc-thermal/omap5xxx-bandgap.h deleted file mode 100644 index 400b55d..0000000 --- a/drivers/staging/ti-soc-thermal/omap5xxx-bandgap.h +++ /dev/null @@ -1,200 +0,0 @@ -/* - * OMAP5xxx bandgap registers, bitfields and temperature definitions - * - * Copyright (C) 2013 Texas Instruments Incorporated - http://www.ti.com/ - * Contact: - * Eduardo Valentin - * - * This program is free software; you can redistribute it and/or - * modify it under the terms of the GNU General Public License - * version 2 as published by the Free Software Foundation. - * - * This program is distributed in the hope that it will be useful, but - * WITHOUT ANY WARRANTY; without even the implied warranty of - * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU - * General Public License for more details. - * - * You should have received a copy of the GNU General Public License - * along with this program; if not, write to the Free Software - * Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA - * 02110-1301 USA - * - */ -#ifndef __OMAP5XXX_BANDGAP_H -#define __OMAP5XXX_BANDGAP_H - -/** - * *** OMAP5430 *** - * - * Below, in sequence, are the Register definitions, - * the bitfields and the temperature definitions for OMAP5430. - */ - -/** - * OMAP5430 register definitions - * - * Registers are defined as offsets. The offsets are - * relative to FUSE_OPP_BGAP_GPU on 5430. - * - * Register below are grouped by domain (not necessarily in offset order) - */ - -/* OMAP5430.GPU register offsets */ -#define OMAP5430_FUSE_OPP_BGAP_GPU 0x0 -#define OMAP5430_TEMP_SENSOR_GPU_OFFSET 0x150 -#define OMAP5430_BGAP_THRESHOLD_GPU_OFFSET 0x1A8 -#define OMAP5430_BGAP_TSHUT_GPU_OFFSET 0x1B4 -#define OMAP5430_BGAP_CUMUL_DTEMP_GPU_OFFSET 0x1C0 -#define OMAP5430_BGAP_DTEMP_GPU_0_OFFSET 0x1F4 -#define OMAP5430_BGAP_DTEMP_GPU_1_OFFSET 0x1F8 -#define OMAP5430_BGAP_DTEMP_GPU_2_OFFSET 0x1FC -#define OMAP5430_BGAP_DTEMP_GPU_3_OFFSET 0x200 -#define OMAP5430_BGAP_DTEMP_GPU_4_OFFSET 0x204 - -/* OMAP5430.MPU register offsets */ -#define OMAP5430_FUSE_OPP_BGAP_MPU 0x4 -#define OMAP5430_TEMP_SENSOR_MPU_OFFSET 0x14C -#define OMAP5430_BGAP_THRESHOLD_MPU_OFFSET 0x1A4 -#define OMAP5430_BGAP_TSHUT_MPU_OFFSET 0x1B0 -#define OMAP5430_BGAP_CUMUL_DTEMP_MPU_OFFSET 0x1BC -#define OMAP5430_BGAP_DTEMP_MPU_0_OFFSET 0x1E0 -#define OMAP5430_BGAP_DTEMP_MPU_1_OFFSET 0x1E4 -#define OMAP5430_BGAP_DTEMP_MPU_2_OFFSET 0x1E8 -#define OMAP5430_BGAP_DTEMP_MPU_3_OFFSET 0x1EC -#define OMAP5430_BGAP_DTEMP_MPU_4_OFFSET 0x1F0 - -/* OMAP5430.MPU register offsets */ -#define OMAP5430_FUSE_OPP_BGAP_CORE 0x8 -#define OMAP5430_TEMP_SENSOR_CORE_OFFSET 0x154 -#define OMAP5430_BGAP_THRESHOLD_CORE_OFFSET 0x1AC -#define OMAP5430_BGAP_TSHUT_CORE_OFFSET 0x1B8 -#define OMAP5430_BGAP_CUMUL_DTEMP_CORE_OFFSET 0x1C4 -#define OMAP5430_BGAP_DTEMP_CORE_0_OFFSET 0x208 -#define OMAP5430_BGAP_DTEMP_CORE_1_OFFSET 0x20C -#define OMAP5430_BGAP_DTEMP_CORE_2_OFFSET 0x210 -#define OMAP5430_BGAP_DTEMP_CORE_3_OFFSET 0x214 -#define OMAP5430_BGAP_DTEMP_CORE_4_OFFSET 0x218 - -/* OMAP5430.common register offsets */ -#define OMAP5430_BGAP_CTRL_OFFSET 0x1A0 -#define OMAP5430_BGAP_STATUS_OFFSET 0x1C8 - -/** - * Register bitfields for OMAP5430 - * - * All the macros bellow define the required bits for - * controlling temperature on OMAP5430. Bit defines are - * grouped by register. - */ - -/* OMAP5430.TEMP_SENSOR */ -#define OMAP5430_BGAP_TEMP_SENSOR_SOC_MASK BIT(12) -#define OMAP5430_BGAP_TEMPSOFF_MASK BIT(11) -#define OMAP5430_BGAP_TEMP_SENSOR_EOCZ_MASK BIT(10) -#define OMAP5430_BGAP_TEMP_SENSOR_DTEMP_MASK (0x3ff << 0) - -/* OMAP5430.BANDGAP_CTRL */ -#define OMAP5430_MASK_SIDLEMODE_MASK (0x3 << 30) -#define OMAP5430_MASK_COUNTER_DELAY_MASK (0x7 << 27) -#define OMAP5430_MASK_FREEZE_CORE_MASK BIT(23) -#define OMAP5430_MASK_FREEZE_GPU_MASK BIT(22) -#define OMAP5430_MASK_FREEZE_MPU_MASK BIT(21) -#define OMAP5430_MASK_CLEAR_CORE_MASK BIT(20) -#define OMAP5430_MASK_CLEAR_GPU_MASK BIT(19) -#define OMAP5430_MASK_CLEAR_MPU_MASK BIT(18) -#define OMAP5430_MASK_CLEAR_ACCUM_CORE_MASK BIT(17) -#define OMAP5430_MASK_CLEAR_ACCUM_GPU_MASK BIT(16) -#define OMAP5430_MASK_CLEAR_ACCUM_MPU_MASK BIT(15) -#define OMAP5430_MASK_HOT_CORE_MASK BIT(5) -#define OMAP5430_MASK_COLD_CORE_MASK BIT(4) -#define OMAP5430_MASK_HOT_GPU_MASK BIT(3) -#define OMAP5430_MASK_COLD_GPU_MASK BIT(2) -#define OMAP5430_MASK_HOT_MPU_MASK BIT(1) -#define OMAP5430_MASK_COLD_MPU_MASK BIT(0) - -/* OMAP5430.BANDGAP_COUNTER */ -#define OMAP5430_COUNTER_MASK (0xffffff << 0) - -/* OMAP5430.BANDGAP_THRESHOLD */ -#define OMAP5430_T_HOT_MASK (0x3ff << 16) -#define OMAP5430_T_COLD_MASK (0x3ff << 0) - -/* OMAP5430.TSHUT_THRESHOLD */ -#define OMAP5430_TSHUT_HOT_MASK (0x3ff << 16) -#define OMAP5430_TSHUT_COLD_MASK (0x3ff << 0) - -/* OMAP5430.BANDGAP_CUMUL_DTEMP_MPU */ -#define OMAP5430_CUMUL_DTEMP_MPU_MASK (0xffffffff << 0) - -/* OMAP5430.BANDGAP_CUMUL_DTEMP_GPU */ -#define OMAP5430_CUMUL_DTEMP_GPU_MASK (0xffffffff << 0) - -/* OMAP5430.BANDGAP_CUMUL_DTEMP_CORE */ -#define OMAP5430_CUMUL_DTEMP_CORE_MASK (0xffffffff << 0) - -/* OMAP5430.BANDGAP_STATUS */ -#define OMAP5430_BGAP_ALERT_MASK BIT(31) -#define OMAP5430_HOT_CORE_FLAG_MASK BIT(5) -#define OMAP5430_COLD_CORE_FLAG_MASK BIT(4) -#define OMAP5430_HOT_GPU_FLAG_MASK BIT(3) -#define OMAP5430_COLD_GPU_FLAG_MASK BIT(2) -#define OMAP5430_HOT_MPU_FLAG_MASK BIT(1) -#define OMAP5430_COLD_MPU_FLAG_MASK BIT(0) - -/** - * Temperature limits and thresholds for OMAP5430 - * - * All the macros bellow are definitions for handling the - * ADC conversions and representation of temperature limits - * and thresholds for OMAP5430. Definitions are grouped - * by temperature domain. - */ - -/* OMAP5430.common temperature definitions */ -/* ADC conversion table limits */ -#define OMAP5430_ADC_START_VALUE 540 -#define OMAP5430_ADC_END_VALUE 945 - -/* OMAP5430.GPU temperature definitions */ -/* bandgap clock limits */ -#define OMAP5430_GPU_MAX_FREQ 1500000 -#define OMAP5430_GPU_MIN_FREQ 1000000 -/* sensor limits */ -#define OMAP5430_GPU_MIN_TEMP -40000 -#define OMAP5430_GPU_MAX_TEMP 125000 -#define OMAP5430_GPU_HYST_VAL 5000 -/* interrupts thresholds */ -#define OMAP5430_GPU_TSHUT_HOT 915 -#define OMAP5430_GPU_TSHUT_COLD 900 -#define OMAP5430_GPU_T_HOT 800 -#define OMAP5430_GPU_T_COLD 795 - -/* OMAP5430.MPU temperature definitions */ -/* bandgap clock limits */ -#define OMAP5430_MPU_MAX_FREQ 1500000 -#define OMAP5430_MPU_MIN_FREQ 1000000 -/* sensor limits */ -#define OMAP5430_MPU_MIN_TEMP -40000 -#define OMAP5430_MPU_MAX_TEMP 125000 -#define OMAP5430_MPU_HYST_VAL 5000 -/* interrupts thresholds */ -#define OMAP5430_MPU_TSHUT_HOT 915 -#define OMAP5430_MPU_TSHUT_COLD 900 -#define OMAP5430_MPU_T_HOT 800 -#define OMAP5430_MPU_T_COLD 795 - -/* OMAP5430.CORE temperature definitions */ -/* bandgap clock limits */ -#define OMAP5430_CORE_MAX_FREQ 1500000 -#define OMAP5430_CORE_MIN_FREQ 1000000 -/* sensor limits */ -#define OMAP5430_CORE_MIN_TEMP -40000 -#define OMAP5430_CORE_MAX_TEMP 125000 -#define OMAP5430_CORE_HYST_VAL 5000 -/* interrupts thresholds */ -#define OMAP5430_CORE_TSHUT_HOT 915 -#define OMAP5430_CORE_TSHUT_COLD 900 -#define OMAP5430_CORE_T_HOT 800 -#define OMAP5430_CORE_T_COLD 795 - -#endif /* __OMAP5XXX_BANDGAP_H */ diff --git a/drivers/staging/ti-soc-thermal/ti-bandgap.c b/drivers/staging/ti-soc-thermal/ti-bandgap.c deleted file mode 100644 index f20c1cf..0000000 --- a/drivers/staging/ti-soc-thermal/ti-bandgap.c +++ /dev/null @@ -1,1546 +0,0 @@ -/* - * TI Bandgap temperature sensor driver - * - * Copyright (C) 2011-2012 Texas Instruments Incorporated - http://www.ti.com/ - * Author: J Keerthy - * Author: Moiz Sonasath - * Couple of fixes, DT and MFD adaptation: - * Eduardo Valentin - * - * This program is free software; you can redistribute it and/or - * modify it under the terms of the GNU General Public License - * version 2 as published by the Free Software Foundation. - * - * This program is distributed in the hope that it will be useful, but - * WITHOUT ANY WARRANTY; without even the implied warranty of - * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU - * General Public License for more details. - * - * You should have received a copy of the GNU General Public License - * along with this program; if not, write to the Free Software - * Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA - * 02110-1301 USA - * - */ - -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include - -#include "ti-bandgap.h" - -/*** Helper functions to access registers and their bitfields ***/ - -/** - * ti_bandgap_readl() - simple read helper function - * @bgp: pointer to ti_bandgap structure - * @reg: desired register (offset) to be read - * - * Helper function to read bandgap registers. It uses the io remapped area. - * Return: the register value. - */ -static u32 ti_bandgap_readl(struct ti_bandgap *bgp, u32 reg) -{ - return readl(bgp->base + reg); -} - -/** - * ti_bandgap_writel() - simple write helper function - * @bgp: pointer to ti_bandgap structure - * @val: desired register value to be written - * @reg: desired register (offset) to be written - * - * Helper function to write bandgap registers. It uses the io remapped area. - */ -static void ti_bandgap_writel(struct ti_bandgap *bgp, u32 val, u32 reg) -{ - writel(val, bgp->base + reg); -} - -/** - * DOC: macro to update bits. - * - * RMW_BITS() - used to read, modify and update bandgap bitfields. - * The value passed will be shifted. - */ -#define RMW_BITS(bgp, id, reg, mask, val) \ -do { \ - struct temp_sensor_registers *t; \ - u32 r; \ - \ - t = bgp->conf->sensors[(id)].registers; \ - r = ti_bandgap_readl(bgp, t->reg); \ - r &= ~t->mask; \ - r |= (val) << __ffs(t->mask); \ - ti_bandgap_writel(bgp, r, t->reg); \ -} while (0) - -/*** Basic helper functions ***/ - -/** - * ti_bandgap_power() - controls the power state of a bandgap device - * @bgp: pointer to ti_bandgap structure - * @on: desired power state (1 - on, 0 - off) - * - * Used to power on/off a bandgap device instance. Only used on those - * that features tempsoff bit. - * - * Return: 0 on success, -ENOTSUPP if tempsoff is not supported. - */ -static int ti_bandgap_power(struct ti_bandgap *bgp, bool on) -{ - int i, ret = 0; - - if (!TI_BANDGAP_HAS(bgp, POWER_SWITCH)) { - ret = -ENOTSUPP; - goto exit; - } - - for (i = 0; i < bgp->conf->sensor_count; i++) - /* active on 0 */ - RMW_BITS(bgp, i, temp_sensor_ctrl, bgap_tempsoff_mask, !on); - -exit: - return ret; -} - -/** - * ti_bandgap_read_temp() - helper function to read sensor temperature - * @bgp: pointer to ti_bandgap structure - * @id: bandgap sensor id - * - * Function to concentrate the steps to read sensor temperature register. - * This function is desired because, depending on bandgap device version, - * it might be needed to freeze the bandgap state machine, before fetching - * the register value. - * - * Return: temperature in ADC values. - */ -static u32 ti_bandgap_read_temp(struct ti_bandgap *bgp, int id) -{ - struct temp_sensor_registers *tsr; - u32 temp, reg; - - tsr = bgp->conf->sensors[id].registers; - reg = tsr->temp_sensor_ctrl; - - if (TI_BANDGAP_HAS(bgp, FREEZE_BIT)) { - RMW_BITS(bgp, id, bgap_mask_ctrl, mask_freeze_mask, 1); - /* - * In case we cannot read from cur_dtemp / dtemp_0, - * then we read from the last valid temp read - */ - reg = tsr->ctrl_dtemp_1; - } - - /* read temperature */ - temp = ti_bandgap_readl(bgp, reg); - temp &= tsr->bgap_dtemp_mask; - - if (TI_BANDGAP_HAS(bgp, FREEZE_BIT)) - RMW_BITS(bgp, id, bgap_mask_ctrl, mask_freeze_mask, 0); - - return temp; -} - -/*** IRQ handlers ***/ - -/** - * ti_bandgap_talert_irq_handler() - handles Temperature alert IRQs - * @irq: IRQ number - * @data: private data (struct ti_bandgap *) - * - * This is the Talert handler. Use it only if bandgap device features - * HAS(TALERT). This handler goes over all sensors and checks their - * conditions and acts accordingly. In case there are events pending, - * it will reset the event mask to wait for the opposite event (next event). - * Every time there is a new event, it will be reported to thermal layer. - * - * Return: IRQ_HANDLED - */ -static irqreturn_t ti_bandgap_talert_irq_handler(int irq, void *data) -{ - struct ti_bandgap *bgp = data; - struct temp_sensor_registers *tsr; - u32 t_hot = 0, t_cold = 0, ctrl; - int i; - - spin_lock(&bgp->lock); - for (i = 0; i < bgp->conf->sensor_count; i++) { - tsr = bgp->conf->sensors[i].registers; - ctrl = ti_bandgap_readl(bgp, tsr->bgap_status); - - /* Read the status of t_hot */ - t_hot = ctrl & tsr->status_hot_mask; - - /* Read the status of t_cold */ - t_cold = ctrl & tsr->status_cold_mask; - - if (!t_cold && !t_hot) - continue; - - ctrl = ti_bandgap_readl(bgp, tsr->bgap_mask_ctrl); - /* - * One TALERT interrupt: Two sources - * If the interrupt is due to t_hot then mask t_hot and - * and unmask t_cold else mask t_cold and unmask t_hot - */ - if (t_hot) { - ctrl &= ~tsr->mask_hot_mask; - ctrl |= tsr->mask_cold_mask; - } else if (t_cold) { - ctrl &= ~tsr->mask_cold_mask; - ctrl |= tsr->mask_hot_mask; - } - - ti_bandgap_writel(bgp, ctrl, tsr->bgap_mask_ctrl); - - dev_dbg(bgp->dev, - "%s: IRQ from %s sensor: hotevent %d coldevent %d\n", - __func__, bgp->conf->sensors[i].domain, - t_hot, t_cold); - - /* report temperature to whom may concern */ - if (bgp->conf->report_temperature) - bgp->conf->report_temperature(bgp, i); - } - spin_unlock(&bgp->lock); - - return IRQ_HANDLED; -} - -/** - * ti_bandgap_tshut_irq_handler() - handles Temperature shutdown signal - * @irq: IRQ number - * @data: private data (unused) - * - * This is the Tshut handler. Use it only if bandgap device features - * HAS(TSHUT). If any sensor fires the Tshut signal, we simply shutdown - * the system. - * - * Return: IRQ_HANDLED - */ -static irqreturn_t ti_bandgap_tshut_irq_handler(int irq, void *data) -{ - pr_emerg("%s: TSHUT temperature reached. Needs shut down...\n", - __func__); - - orderly_poweroff(true); - - return IRQ_HANDLED; -} - -/*** Helper functions which manipulate conversion ADC <-> mi Celsius ***/ - -/** - * ti_bandgap_adc_to_mcelsius() - converts an ADC value to mCelsius scale - * @bgp: struct ti_bandgap pointer - * @adc_val: value in ADC representation - * @t: address where to write the resulting temperature in mCelsius - * - * Simple conversion from ADC representation to mCelsius. In case the ADC value - * is out of the ADC conv table range, it returns -ERANGE, 0 on success. - * The conversion table is indexed by the ADC values. - * - * Return: 0 if conversion was successful, else -ERANGE in case the @adc_val - * argument is out of the ADC conv table range. - */ -static -int ti_bandgap_adc_to_mcelsius(struct ti_bandgap *bgp, int adc_val, int *t) -{ - const struct ti_bandgap_data *conf = bgp->conf; - int ret = 0; - - /* look up for temperature in the table and return the temperature */ - if (adc_val < conf->adc_start_val || adc_val > conf->adc_end_val) { - ret = -ERANGE; - goto exit; - } - - *t = bgp->conf->conv_table[adc_val - conf->adc_start_val]; - -exit: - return ret; -} - -/** - * ti_bandgap_mcelsius_to_adc() - converts a mCelsius value to ADC scale - * @bgp: struct ti_bandgap pointer - * @temp: value in mCelsius - * @adc: address where to write the resulting temperature in ADC representation - * - * Simple conversion from mCelsius to ADC values. In case the temp value - * is out of the ADC conv table range, it returns -ERANGE, 0 on success. - * The conversion table is indexed by the ADC values. - * - * Return: 0 if conversion was successful, else -ERANGE in case the @temp - * argument is out of the ADC conv table range. - */ -static -int ti_bandgap_mcelsius_to_adc(struct ti_bandgap *bgp, long temp, int *adc) -{ - const struct ti_bandgap_data *conf = bgp->conf; - const int *conv_table = bgp->conf->conv_table; - int high, low, mid, ret = 0; - - low = 0; - high = conf->adc_end_val - conf->adc_start_val; - mid = (high + low) / 2; - - if (temp < conv_table[low] || temp > conv_table[high]) { - ret = -ERANGE; - goto exit; - } - - while (low < high) { - if (temp < conv_table[mid]) - high = mid - 1; - else - low = mid + 1; - mid = (low + high) / 2; - } - - *adc = conf->adc_start_val + low; - -exit: - return ret; -} - -/** - * ti_bandgap_add_hyst() - add hysteresis (in mCelsius) to an ADC value - * @bgp: struct ti_bandgap pointer - * @adc_val: temperature value in ADC representation - * @hyst_val: hysteresis value in mCelsius - * @sum: address where to write the resulting temperature (in ADC scale) - * - * Adds an hysteresis value (in mCelsius) to a ADC temperature value. - * - * Return: 0 on success, -ERANGE otherwise. - */ -static -int ti_bandgap_add_hyst(struct ti_bandgap *bgp, int adc_val, int hyst_val, - u32 *sum) -{ - int temp, ret; - - /* - * Need to add in the mcelsius domain, so we have a temperature - * the conv_table range - */ - ret = ti_bandgap_adc_to_mcelsius(bgp, adc_val, &temp); - if (ret < 0) - goto exit; - - temp += hyst_val; - - ret = ti_bandgap_mcelsius_to_adc(bgp, temp, sum); - -exit: - return ret; -} - -/*** Helper functions handling device Alert/Shutdown signals ***/ - -/** - * ti_bandgap_unmask_interrupts() - unmasks the events of thot & tcold - * @bgp: struct ti_bandgap pointer - * @id: bandgap sensor id - * @t_hot: hot temperature value to trigger alert signal - * @t_cold: cold temperature value to trigger alert signal - * - * Checks the requested t_hot and t_cold values and configures the IRQ event - * masks accordingly. Call this function only if bandgap features HAS(TALERT). - */ -static void ti_bandgap_unmask_interrupts(struct ti_bandgap *bgp, int id, - u32 t_hot, u32 t_cold) -{ - struct temp_sensor_registers *tsr; - u32 temp, reg_val; - - /* Read the current on die temperature */ - temp = ti_bandgap_read_temp(bgp, id); - - tsr = bgp->conf->sensors[id].registers; - reg_val = ti_bandgap_readl(bgp, tsr->bgap_mask_ctrl); - - if (temp < t_hot) - reg_val |= tsr->mask_hot_mask; - else - reg_val &= ~tsr->mask_hot_mask; - - if (t_cold < temp) - reg_val |= tsr->mask_cold_mask; - else - reg_val &= ~tsr->mask_cold_mask; - ti_bandgap_writel(bgp, reg_val, tsr->bgap_mask_ctrl); -} - -/** - * ti_bandgap_update_alert_threshold() - sequence to update thresholds - * @bgp: struct ti_bandgap pointer - * @id: bandgap sensor id - * @val: value (ADC) of a new threshold - * @hot: desired threshold to be updated. true if threshold hot, false if - * threshold cold - * - * It will program the required thresholds (hot and cold) for TALERT signal. - * This function can be used to update t_hot or t_cold, depending on @hot value. - * It checks the resulting t_hot and t_cold values, based on the new passed @val - * and configures the thresholds so that t_hot is always greater than t_cold. - * Call this function only if bandgap features HAS(TALERT). - * - * Return: 0 if no error, else corresponding error - */ -static int ti_bandgap_update_alert_threshold(struct ti_bandgap *bgp, int id, - int val, bool hot) -{ - struct temp_sensor_data *ts_data = bgp->conf->sensors[id].ts_data; - struct temp_sensor_registers *tsr; - u32 thresh_val, reg_val, t_hot, t_cold; - int err = 0; - - tsr = bgp->conf->sensors[id].registers; - - /* obtain the current value */ - thresh_val = ti_bandgap_readl(bgp, tsr->bgap_threshold); - t_cold = (thresh_val & tsr->threshold_tcold_mask) >> - __ffs(tsr->threshold_tcold_mask); - t_hot = (thresh_val & tsr->threshold_thot_mask) >> - __ffs(tsr->threshold_thot_mask); - if (hot) - t_hot = val; - else - t_cold = val; - - if (t_cold > t_hot) { - if (hot) - err = ti_bandgap_add_hyst(bgp, t_hot, - -ts_data->hyst_val, - &t_cold); - else - err = ti_bandgap_add_hyst(bgp, t_cold, - ts_data->hyst_val, - &t_hot); - } - - /* write the new threshold values */ - reg_val = thresh_val & - ~(tsr->threshold_thot_mask | tsr->threshold_tcold_mask); - reg_val |= (t_hot << __ffs(tsr->threshold_thot_mask)) | - (t_cold << __ffs(tsr->threshold_tcold_mask)); - ti_bandgap_writel(bgp, reg_val, tsr->bgap_threshold); - - if (err) { - dev_err(bgp->dev, "failed to reprogram thot threshold\n"); - err = -EIO; - goto exit; - } - - ti_bandgap_unmask_interrupts(bgp, id, t_hot, t_cold); -exit: - return err; -} - -/** - * ti_bandgap_validate() - helper to check the sanity of a struct ti_bandgap - * @bgp: struct ti_bandgap pointer - * @id: bandgap sensor id - * - * Checks if the bandgap pointer is valid and if the sensor id is also - * applicable. - * - * Return: 0 if no errors, -EINVAL for invalid @bgp pointer or -ERANGE if - * @id cannot index @bgp sensors. - */ -static inline int ti_bandgap_validate(struct ti_bandgap *bgp, int id) -{ - int ret = 0; - - if (IS_ERR_OR_NULL(bgp)) { - pr_err("%s: invalid bandgap pointer\n", __func__); - ret = -EINVAL; - goto exit; - } - - if ((id < 0) || (id >= bgp->conf->sensor_count)) { - dev_err(bgp->dev, "%s: sensor id out of range (%d)\n", - __func__, id); - ret = -ERANGE; - } - -exit: - return ret; -} - -/** - * _ti_bandgap_write_threshold() - helper to update TALERT t_cold or t_hot - * @bgp: struct ti_bandgap pointer - * @id: bandgap sensor id - * @val: value (mCelsius) of a new threshold - * @hot: desired threshold to be updated. true if threshold hot, false if - * threshold cold - * - * It will update the required thresholds (hot and cold) for TALERT signal. - * This function can be used to update t_hot or t_cold, depending on @hot value. - * Validates the mCelsius range and update the requested threshold. - * Call this function only if bandgap features HAS(TALERT). - * - * Return: 0 if no error, else corresponding error value. - */ -static int _ti_bandgap_write_threshold(struct ti_bandgap *bgp, int id, int val, - bool hot) -{ - struct temp_sensor_data *ts_data; - struct temp_sensor_registers *tsr; - u32 adc_val; - int ret; - - ret = ti_bandgap_validate(bgp, id); - if (ret) - goto exit; - - if (!TI_BANDGAP_HAS(bgp, TALERT)) { - ret = -ENOTSUPP; - goto exit; - } - - ts_data = bgp->conf->sensors[id].ts_data; - tsr = bgp->conf->sensors[id].registers; - if (hot) { - if (val < ts_data->min_temp + ts_data->hyst_val) - ret = -EINVAL; - } else { - if (val > ts_data->max_temp + ts_data->hyst_val) - ret = -EINVAL; - } - - if (ret) - goto exit; - - ret = ti_bandgap_mcelsius_to_adc(bgp, val, &adc_val); - if (ret < 0) - goto exit; - - spin_lock(&bgp->lock); - ret = ti_bandgap_update_alert_threshold(bgp, id, adc_val, hot); - spin_unlock(&bgp->lock); - -exit: - return ret; -} - -/** - * _ti_bandgap_read_threshold() - helper to read TALERT t_cold or t_hot - * @bgp: struct ti_bandgap pointer - * @id: bandgap sensor id - * @val: value (mCelsius) of a threshold - * @hot: desired threshold to be read. true if threshold hot, false if - * threshold cold - * - * It will fetch the required thresholds (hot and cold) for TALERT signal. - * This function can be used to read t_hot or t_cold, depending on @hot value. - * Call this function only if bandgap features HAS(TALERT). - * - * Return: 0 if no error, -ENOTSUPP if it has no TALERT support, or the - * corresponding error value if some operation fails. - */ -static int _ti_bandgap_read_threshold(struct ti_bandgap *bgp, int id, - int *val, bool hot) -{ - struct temp_sensor_registers *tsr; - u32 temp, mask; - int ret = 0; - - ret = ti_bandgap_validate(bgp, id); - if (ret) - goto exit; - - if (!TI_BANDGAP_HAS(bgp, TALERT)) { - ret = -ENOTSUPP; - goto exit; - } - - tsr = bgp->conf->sensors[id].registers; - if (hot) - mask = tsr->threshold_thot_mask; - else - mask = tsr->threshold_tcold_mask; - - temp = ti_bandgap_readl(bgp, tsr->bgap_threshold); - temp = (temp & mask) >> __ffs(mask); - ret |= ti_bandgap_adc_to_mcelsius(bgp, temp, &temp); - if (ret) { - dev_err(bgp->dev, "failed to read thot\n"); - ret = -EIO; - goto exit; - } - - *val = temp; - -exit: - return ret; -} - -/*** Exposed APIs ***/ - -/** - * ti_bandgap_read_thot() - reads sensor current thot - * @bgp: pointer to bandgap instance - * @id: sensor id - * @thot: resulting current thot value - * - * Return: 0 on success or the proper error code - */ -int ti_bandgap_read_thot(struct ti_bandgap *bgp, int id, int *thot) -{ - return _ti_bandgap_read_threshold(bgp, id, thot, true); -} - -/** - * ti_bandgap_write_thot() - sets sensor current thot - * @bgp: pointer to bandgap instance - * @id: sensor id - * @val: desired thot value - * - * Return: 0 on success or the proper error code - */ -int ti_bandgap_write_thot(struct ti_bandgap *bgp, int id, int val) -{ - return _ti_bandgap_write_threshold(bgp, id, val, true); -} - -/** - * ti_bandgap_read_tcold() - reads sensor current tcold - * @bgp: pointer to bandgap instance - * @id: sensor id - * @tcold: resulting current tcold value - * - * Return: 0 on success or the proper error code - */ -int ti_bandgap_read_tcold(struct ti_bandgap *bgp, int id, int *tcold) -{ - return _ti_bandgap_read_threshold(bgp, id, tcold, false); -} - -/** - * ti_bandgap_write_tcold() - sets the sensor tcold - * @bgp: pointer to bandgap instance - * @id: sensor id - * @val: desired tcold value - * - * Return: 0 on success or the proper error code - */ -int ti_bandgap_write_tcold(struct ti_bandgap *bgp, int id, int val) -{ - return _ti_bandgap_write_threshold(bgp, id, val, false); -} - -/** - * ti_bandgap_read_counter() - read the sensor counter - * @bgp: pointer to bandgap instance - * @id: sensor id - * @interval: resulting update interval in miliseconds - */ -static void ti_bandgap_read_counter(struct ti_bandgap *bgp, int id, - int *interval) -{ - struct temp_sensor_registers *tsr; - int time; - - tsr = bgp->conf->sensors[id].registers; - time = ti_bandgap_readl(bgp, tsr->bgap_counter); - time = (time & tsr->counter_mask) >> - __ffs(tsr->counter_mask); - time = time * 1000 / bgp->clk_rate; - *interval = time; -} - -/** - * ti_bandgap_read_counter_delay() - read the sensor counter delay - * @bgp: pointer to bandgap instance - * @id: sensor id - * @interval: resulting update interval in miliseconds - */ -static void ti_bandgap_read_counter_delay(struct ti_bandgap *bgp, int id, - int *interval) -{ - struct temp_sensor_registers *tsr; - int reg_val; - - tsr = bgp->conf->sensors[id].registers; - - reg_val = ti_bandgap_readl(bgp, tsr->bgap_mask_ctrl); - reg_val = (reg_val & tsr->mask_counter_delay_mask) >> - __ffs(tsr->mask_counter_delay_mask); - switch (reg_val) { - case 0: - *interval = 0; - break; - case 1: - *interval = 1; - break; - case 2: - *interval = 10; - break; - case 3: - *interval = 100; - break; - case 4: - *interval = 250; - break; - case 5: - *interval = 500; - break; - default: - dev_warn(bgp->dev, "Wrong counter delay value read from register %X", - reg_val); - } -} - -/** - * ti_bandgap_read_update_interval() - read the sensor update interval - * @bgp: pointer to bandgap instance - * @id: sensor id - * @interval: resulting update interval in miliseconds - * - * Return: 0 on success or the proper error code - */ -int ti_bandgap_read_update_interval(struct ti_bandgap *bgp, int id, - int *interval) -{ - int ret = 0; - - ret = ti_bandgap_validate(bgp, id); - if (ret) - goto exit; - - if (!TI_BANDGAP_HAS(bgp, COUNTER) && - !TI_BANDGAP_HAS(bgp, COUNTER_DELAY)) { - ret = -ENOTSUPP; - goto exit; - } - - if (TI_BANDGAP_HAS(bgp, COUNTER)) { - ti_bandgap_read_counter(bgp, id, interval); - goto exit; - } - - ti_bandgap_read_counter_delay(bgp, id, interval); -exit: - return ret; -} - -/** - * ti_bandgap_write_counter_delay() - set the counter_delay - * @bgp: pointer to bandgap instance - * @id: sensor id - * @interval: desired update interval in miliseconds - * - * Return: 0 on success or the proper error code - */ -static int ti_bandgap_write_counter_delay(struct ti_bandgap *bgp, int id, - u32 interval) -{ - int rval; - - switch (interval) { - case 0: /* Immediate conversion */ - rval = 0x0; - break; - case 1: /* Conversion after ever 1ms */ - rval = 0x1; - break; - case 10: /* Conversion after ever 10ms */ - rval = 0x2; - break; - case 100: /* Conversion after ever 100ms */ - rval = 0x3; - break; - case 250: /* Conversion after ever 250ms */ - rval = 0x4; - break; - case 500: /* Conversion after ever 500ms */ - rval = 0x5; - break; - default: - dev_warn(bgp->dev, "Delay %d ms is not supported\n", interval); - return -EINVAL; - } - - spin_lock(&bgp->lock); - RMW_BITS(bgp, id, bgap_mask_ctrl, mask_counter_delay_mask, rval); - spin_unlock(&bgp->lock); - - return 0; -} - -/** - * ti_bandgap_write_counter() - set the bandgap sensor counter - * @bgp: pointer to bandgap instance - * @id: sensor id - * @interval: desired update interval in miliseconds - */ -static void ti_bandgap_write_counter(struct ti_bandgap *bgp, int id, - u32 interval) -{ - interval = interval * bgp->clk_rate / 1000; - spin_lock(&bgp->lock); - RMW_BITS(bgp, id, bgap_counter, counter_mask, interval); - spin_unlock(&bgp->lock); -} - -/** - * ti_bandgap_write_update_interval() - set the update interval - * @bgp: pointer to bandgap instance - * @id: sensor id - * @interval: desired update interval in miliseconds - * - * Return: 0 on success or the proper error code - */ -int ti_bandgap_write_update_interval(struct ti_bandgap *bgp, - int id, u32 interval) -{ - int ret = ti_bandgap_validate(bgp, id); - if (ret) - goto exit; - - if (!TI_BANDGAP_HAS(bgp, COUNTER) && - !TI_BANDGAP_HAS(bgp, COUNTER_DELAY)) { - ret = -ENOTSUPP; - goto exit; - } - - if (TI_BANDGAP_HAS(bgp, COUNTER)) { - ti_bandgap_write_counter(bgp, id, interval); - goto exit; - } - - ret = ti_bandgap_write_counter_delay(bgp, id, interval); -exit: - return ret; -} - -/** - * ti_bandgap_read_temperature() - report current temperature - * @bgp: pointer to bandgap instance - * @id: sensor id - * @temperature: resulting temperature - * - * Return: 0 on success or the proper error code - */ -int ti_bandgap_read_temperature(struct ti_bandgap *bgp, int id, - int *temperature) -{ - u32 temp; - int ret; - - ret = ti_bandgap_validate(bgp, id); - if (ret) - return ret; - - spin_lock(&bgp->lock); - temp = ti_bandgap_read_temp(bgp, id); - spin_unlock(&bgp->lock); - - ret |= ti_bandgap_adc_to_mcelsius(bgp, temp, &temp); - if (ret) - return -EIO; - - *temperature = temp; - - return 0; -} - -/** - * ti_bandgap_set_sensor_data() - helper function to store thermal - * framework related data. - * @bgp: pointer to bandgap instance - * @id: sensor id - * @data: thermal framework related data to be stored - * - * Return: 0 on success or the proper error code - */ -int ti_bandgap_set_sensor_data(struct ti_bandgap *bgp, int id, void *data) -{ - int ret = ti_bandgap_validate(bgp, id); - if (ret) - return ret; - - bgp->regval[id].data = data; - - return 0; -} - -/** - * ti_bandgap_get_sensor_data() - helper function to get thermal - * framework related data. - * @bgp: pointer to bandgap instance - * @id: sensor id - * - * Return: data stored by set function with sensor id on success or NULL - */ -void *ti_bandgap_get_sensor_data(struct ti_bandgap *bgp, int id) -{ - int ret = ti_bandgap_validate(bgp, id); - if (ret) - return ERR_PTR(ret); - - return bgp->regval[id].data; -} - -/*** Helper functions used during device initialization ***/ - -/** - * ti_bandgap_force_single_read() - executes 1 single ADC conversion - * @bgp: pointer to struct ti_bandgap - * @id: sensor id which it is desired to read 1 temperature - * - * Used to initialize the conversion state machine and set it to a valid - * state. Called during device initialization and context restore events. - * - * Return: 0 - */ -static int -ti_bandgap_force_single_read(struct ti_bandgap *bgp, int id) -{ - u32 temp = 0, counter = 1000; - - /* Select single conversion mode */ - if (TI_BANDGAP_HAS(bgp, MODE_CONFIG)) - RMW_BITS(bgp, id, bgap_mode_ctrl, mode_ctrl_mask, 0); - - /* Start of Conversion = 1 */ - RMW_BITS(bgp, id, temp_sensor_ctrl, bgap_soc_mask, 1); - /* Wait until DTEMP is updated */ - temp = ti_bandgap_read_temp(bgp, id); - - while ((temp == 0) && --counter) - temp = ti_bandgap_read_temp(bgp, id); - /* REVISIT: Check correct condition for end of conversion */ - - /* Start of Conversion = 0 */ - RMW_BITS(bgp, id, temp_sensor_ctrl, bgap_soc_mask, 0); - - return 0; -} - -/** - * ti_bandgap_set_continous_mode() - One time enabling of continuous mode - * @bgp: pointer to struct ti_bandgap - * - * Call this function only if HAS(MODE_CONFIG) is set. As this driver may - * be used for junction temperature monitoring, it is desirable that the - * sensors are operational all the time, so that alerts are generated - * properly. - * - * Return: 0 - */ -static int ti_bandgap_set_continuous_mode(struct ti_bandgap *bgp) -{ - int i; - - for (i = 0; i < bgp->conf->sensor_count; i++) { - /* Perform a single read just before enabling continuous */ - ti_bandgap_force_single_read(bgp, i); - RMW_BITS(bgp, i, bgap_mode_ctrl, mode_ctrl_mask, 1); - } - - return 0; -} - -/** - * ti_bandgap_get_trend() - To fetch the temperature trend of a sensor - * @bgp: pointer to struct ti_bandgap - * @id: id of the individual sensor - * @trend: Pointer to trend. - * - * This function needs to be called to fetch the temperature trend of a - * Particular sensor. The function computes the difference in temperature - * w.r.t time. For the bandgaps with built in history buffer the temperatures - * are read from the buffer and for those without the Buffer -ENOTSUPP is - * returned. - * - * Return: 0 if no error, else return corresponding error. If no - * error then the trend value is passed on to trend parameter - */ -int ti_bandgap_get_trend(struct ti_bandgap *bgp, int id, int *trend) -{ - struct temp_sensor_registers *tsr; - u32 temp1, temp2, reg1, reg2; - int t1, t2, interval, ret = 0; - - ret = ti_bandgap_validate(bgp, id); - if (ret) - goto exit; - - if (!TI_BANDGAP_HAS(bgp, HISTORY_BUFFER) || - !TI_BANDGAP_HAS(bgp, FREEZE_BIT)) { - ret = -ENOTSUPP; - goto exit; - } - - tsr = bgp->conf->sensors[id].registers; - - /* Freeze and read the last 2 valid readings */ - reg1 = tsr->ctrl_dtemp_1; - reg2 = tsr->ctrl_dtemp_2; - - /* read temperature from history buffer */ - temp1 = ti_bandgap_readl(bgp, reg1); - temp1 &= tsr->bgap_dtemp_mask; - - temp2 = ti_bandgap_readl(bgp, reg2); - temp2 &= tsr->bgap_dtemp_mask; - - /* Convert from adc values to mCelsius temperature */ - ret = ti_bandgap_adc_to_mcelsius(bgp, temp1, &t1); - if (ret) - goto exit; - - ret = ti_bandgap_adc_to_mcelsius(bgp, temp2, &t2); - if (ret) - goto exit; - - /* Fetch the update interval */ - ret = ti_bandgap_read_update_interval(bgp, id, &interval); - if (ret || !interval) - goto exit; - - *trend = (t1 - t2) / interval; - - dev_dbg(bgp->dev, "The temperatures are t1 = %d and t2 = %d and trend =%d\n", - t1, t2, *trend); - -exit: - return ret; -} - -/** - * ti_bandgap_tshut_init() - setup and initialize tshut handling - * @bgp: pointer to struct ti_bandgap - * @pdev: pointer to device struct platform_device - * - * Call this function only in case the bandgap features HAS(TSHUT). - * In this case, the driver needs to handle the TSHUT signal as an IRQ. - * The IRQ is wired as a GPIO, and for this purpose, it is required - * to specify which GPIO line is used. TSHUT IRQ is fired anytime - * one of the bandgap sensors violates the TSHUT high/hot threshold. - * And in that case, the system must go off. - * - * Return: 0 if no error, else error status - */ -static int ti_bandgap_tshut_init(struct ti_bandgap *bgp, - struct platform_device *pdev) -{ - int gpio_nr = bgp->tshut_gpio; - int status; - - /* Request for gpio_86 line */ - status = gpio_request(gpio_nr, "tshut"); - if (status < 0) { - dev_err(bgp->dev, "Could not request for TSHUT GPIO:%i\n", 86); - return status; - } - status = gpio_direction_input(gpio_nr); - if (status) { - dev_err(bgp->dev, "Cannot set input TSHUT GPIO %d\n", gpio_nr); - return status; - } - - status = request_irq(gpio_to_irq(gpio_nr), ti_bandgap_tshut_irq_handler, - IRQF_TRIGGER_RISING, "tshut", NULL); - if (status) { - gpio_free(gpio_nr); - dev_err(bgp->dev, "request irq failed for TSHUT"); - } - - return 0; -} - -/** - * ti_bandgap_alert_init() - setup and initialize talert handling - * @bgp: pointer to struct ti_bandgap - * @pdev: pointer to device struct platform_device - * - * Call this function only in case the bandgap features HAS(TALERT). - * In this case, the driver needs to handle the TALERT signals as an IRQs. - * TALERT is a normal IRQ and it is fired any time thresholds (hot or cold) - * are violated. In these situation, the driver must reprogram the thresholds, - * accordingly to specified policy. - * - * Return: 0 if no error, else return corresponding error. - */ -static int ti_bandgap_talert_init(struct ti_bandgap *bgp, - struct platform_device *pdev) -{ - int ret; - - bgp->irq = platform_get_irq(pdev, 0); - if (bgp->irq < 0) { - dev_err(&pdev->dev, "get_irq failed\n"); - return bgp->irq; - } - ret = request_threaded_irq(bgp->irq, NULL, - ti_bandgap_talert_irq_handler, - IRQF_TRIGGER_HIGH | IRQF_ONESHOT, - "talert", bgp); - if (ret) { - dev_err(&pdev->dev, "Request threaded irq failed.\n"); - return ret; - } - - return 0; -} - -static const struct of_device_id of_ti_bandgap_match[]; -/** - * ti_bandgap_build() - parse DT and setup a struct ti_bandgap - * @pdev: pointer to device struct platform_device - * - * Used to read the device tree properties accordingly to the bandgap - * matching version. Based on bandgap version and its capabilities it - * will build a struct ti_bandgap out of the required DT entries. - * - * Return: valid bandgap structure if successful, else returns ERR_PTR - * return value must be verified with IS_ERR. - */ -static struct ti_bandgap *ti_bandgap_build(struct platform_device *pdev) -{ - struct device_node *node = pdev->dev.of_node; - const struct of_device_id *of_id; - struct ti_bandgap *bgp; - struct resource *res; - u32 prop; - int i; - - /* just for the sake */ - if (!node) { - dev_err(&pdev->dev, "no platform information available\n"); - return ERR_PTR(-EINVAL); - } - - bgp = devm_kzalloc(&pdev->dev, sizeof(*bgp), GFP_KERNEL); - if (!bgp) { - dev_err(&pdev->dev, "Unable to allocate mem for driver ref\n"); - return ERR_PTR(-ENOMEM); - } - - of_id = of_match_device(of_ti_bandgap_match, &pdev->dev); - if (of_id) - bgp->conf = of_id->data; - - /* register shadow for context save and restore */ - bgp->regval = devm_kzalloc(&pdev->dev, sizeof(*bgp->regval) * - bgp->conf->sensor_count, GFP_KERNEL); - if (!bgp) { - dev_err(&pdev->dev, "Unable to allocate mem for driver ref\n"); - return ERR_PTR(-ENOMEM); - } - - i = 0; - do { - void __iomem *chunk; - - res = platform_get_resource(pdev, IORESOURCE_MEM, i); - if (!res) - break; - chunk = devm_ioremap_resource(&pdev->dev, res); - if (i == 0) - bgp->base = chunk; - if (IS_ERR(chunk)) - return ERR_CAST(chunk); - - i++; - } while (res); - - if (TI_BANDGAP_HAS(bgp, TSHUT)) { - if (of_property_read_u32(node, "ti,tshut-gpio", &prop) < 0) { - dev_err(&pdev->dev, "missing tshut gpio in device tree\n"); - return ERR_PTR(-EINVAL); - } - bgp->tshut_gpio = prop; - if (!gpio_is_valid(bgp->tshut_gpio)) { - dev_err(&pdev->dev, "invalid gpio for tshut (%d)\n", - bgp->tshut_gpio); - return ERR_PTR(-EINVAL); - } - } - - return bgp; -} - -/*** Device driver call backs ***/ - -static -int ti_bandgap_probe(struct platform_device *pdev) -{ - struct ti_bandgap *bgp; - int clk_rate, ret = 0, i; - - bgp = ti_bandgap_build(pdev); - if (IS_ERR_OR_NULL(bgp)) { - dev_err(&pdev->dev, "failed to fetch platform data\n"); - return PTR_ERR(bgp); - } - bgp->dev = &pdev->dev; - - if (TI_BANDGAP_HAS(bgp, TSHUT)) { - ret = ti_bandgap_tshut_init(bgp, pdev); - if (ret) { - dev_err(&pdev->dev, - "failed to initialize system tshut IRQ\n"); - return ret; - } - } - - bgp->fclock = clk_get(NULL, bgp->conf->fclock_name); - ret = IS_ERR_OR_NULL(bgp->fclock); - if (ret) { - dev_err(&pdev->dev, "failed to request fclock reference\n"); - goto free_irqs; - } - - bgp->div_clk = clk_get(NULL, bgp->conf->div_ck_name); - ret = IS_ERR_OR_NULL(bgp->div_clk); - if (ret) { - dev_err(&pdev->dev, - "failed to request div_ts_ck clock ref\n"); - goto free_irqs; - } - - for (i = 0; i < bgp->conf->sensor_count; i++) { - struct temp_sensor_registers *tsr; - u32 val; - - tsr = bgp->conf->sensors[i].registers; - /* - * check if the efuse has a non-zero value if not - * it is an untrimmed sample and the temperatures - * may not be accurate - */ - val = ti_bandgap_readl(bgp, tsr->bgap_efuse); - if (ret || !val) - dev_info(&pdev->dev, - "Non-trimmed BGAP, Temp not accurate\n"); - } - - clk_rate = clk_round_rate(bgp->div_clk, - bgp->conf->sensors[0].ts_data->max_freq); - if (clk_rate < bgp->conf->sensors[0].ts_data->min_freq || - clk_rate == 0xffffffff) { - ret = -ENODEV; - dev_err(&pdev->dev, "wrong clock rate (%d)\n", clk_rate); - goto put_clks; - } - - ret = clk_set_rate(bgp->div_clk, clk_rate); - if (ret) - dev_err(&pdev->dev, "Cannot re-set clock rate. Continuing\n"); - - bgp->clk_rate = clk_rate; - if (TI_BANDGAP_HAS(bgp, CLK_CTRL)) - clk_prepare_enable(bgp->fclock); - - - spin_lock_init(&bgp->lock); - bgp->dev = &pdev->dev; - platform_set_drvdata(pdev, bgp); - - ti_bandgap_power(bgp, true); - - /* Set default counter to 1 for now */ - if (TI_BANDGAP_HAS(bgp, COUNTER)) - for (i = 0; i < bgp->conf->sensor_count; i++) - RMW_BITS(bgp, i, bgap_counter, counter_mask, 1); - - /* Set default thresholds for alert and shutdown */ - for (i = 0; i < bgp->conf->sensor_count; i++) { - struct temp_sensor_data *ts_data; - - ts_data = bgp->conf->sensors[i].ts_data; - - if (TI_BANDGAP_HAS(bgp, TALERT)) { - /* Set initial Talert thresholds */ - RMW_BITS(bgp, i, bgap_threshold, - threshold_tcold_mask, ts_data->t_cold); - RMW_BITS(bgp, i, bgap_threshold, - threshold_thot_mask, ts_data->t_hot); - /* Enable the alert events */ - RMW_BITS(bgp, i, bgap_mask_ctrl, mask_hot_mask, 1); - RMW_BITS(bgp, i, bgap_mask_ctrl, mask_cold_mask, 1); - } - - if (TI_BANDGAP_HAS(bgp, TSHUT_CONFIG)) { - /* Set initial Tshut thresholds */ - RMW_BITS(bgp, i, tshut_threshold, - tshut_hot_mask, ts_data->tshut_hot); - RMW_BITS(bgp, i, tshut_threshold, - tshut_cold_mask, ts_data->tshut_cold); - } - } - - if (TI_BANDGAP_HAS(bgp, MODE_CONFIG)) - ti_bandgap_set_continuous_mode(bgp); - - /* Set .250 seconds time as default counter */ - if (TI_BANDGAP_HAS(bgp, COUNTER)) - for (i = 0; i < bgp->conf->sensor_count; i++) - RMW_BITS(bgp, i, bgap_counter, counter_mask, - bgp->clk_rate / 4); - - /* Every thing is good? Then expose the sensors */ - for (i = 0; i < bgp->conf->sensor_count; i++) { - char *domain; - - if (bgp->conf->sensors[i].register_cooling) { - ret = bgp->conf->sensors[i].register_cooling(bgp, i); - if (ret) - goto remove_sensors; - } - - if (bgp->conf->expose_sensor) { - domain = bgp->conf->sensors[i].domain; - ret = bgp->conf->expose_sensor(bgp, i, domain); - if (ret) - goto remove_last_cooling; - } - } - - /* - * Enable the Interrupts once everything is set. Otherwise irq handler - * might be called as soon as it is enabled where as rest of framework - * is still getting initialised. - */ - if (TI_BANDGAP_HAS(bgp, TALERT)) { - ret = ti_bandgap_talert_init(bgp, pdev); - if (ret) { - dev_err(&pdev->dev, "failed to initialize Talert IRQ\n"); - i = bgp->conf->sensor_count; - goto disable_clk; - } - } - - return 0; - -remove_last_cooling: - if (bgp->conf->sensors[i].unregister_cooling) - bgp->conf->sensors[i].unregister_cooling(bgp, i); -remove_sensors: - for (i--; i >= 0; i--) { - if (bgp->conf->sensors[i].unregister_cooling) - bgp->conf->sensors[i].unregister_cooling(bgp, i); - if (bgp->conf->remove_sensor) - bgp->conf->remove_sensor(bgp, i); - } - ti_bandgap_power(bgp, false); -disable_clk: - if (TI_BANDGAP_HAS(bgp, CLK_CTRL)) - clk_disable_unprepare(bgp->fclock); -put_clks: - clk_put(bgp->fclock); - clk_put(bgp->div_clk); -free_irqs: - if (TI_BANDGAP_HAS(bgp, TSHUT)) { - free_irq(gpio_to_irq(bgp->tshut_gpio), NULL); - gpio_free(bgp->tshut_gpio); - } - - return ret; -} - -static -int ti_bandgap_remove(struct platform_device *pdev) -{ - struct ti_bandgap *bgp = platform_get_drvdata(pdev); - int i; - - /* First thing is to remove sensor interfaces */ - for (i = 0; i < bgp->conf->sensor_count; i++) { - if (bgp->conf->sensors[i].unregister_cooling) - bgp->conf->sensors[i].unregister_cooling(bgp, i); - - if (bgp->conf->remove_sensor) - bgp->conf->remove_sensor(bgp, i); - } - - ti_bandgap_power(bgp, false); - - if (TI_BANDGAP_HAS(bgp, CLK_CTRL)) - clk_disable_unprepare(bgp->fclock); - clk_put(bgp->fclock); - clk_put(bgp->div_clk); - - if (TI_BANDGAP_HAS(bgp, TALERT)) - free_irq(bgp->irq, bgp); - - if (TI_BANDGAP_HAS(bgp, TSHUT)) { - free_irq(gpio_to_irq(bgp->tshut_gpio), NULL); - gpio_free(bgp->tshut_gpio); - } - - return 0; -} - -#ifdef CONFIG_PM -static int ti_bandgap_save_ctxt(struct ti_bandgap *bgp) -{ - int i; - - for (i = 0; i < bgp->conf->sensor_count; i++) { - struct temp_sensor_registers *tsr; - struct temp_sensor_regval *rval; - - rval = &bgp->regval[i]; - tsr = bgp->conf->sensors[i].registers; - - if (TI_BANDGAP_HAS(bgp, MODE_CONFIG)) - rval->bg_mode_ctrl = ti_bandgap_readl(bgp, - tsr->bgap_mode_ctrl); - if (TI_BANDGAP_HAS(bgp, COUNTER)) - rval->bg_counter = ti_bandgap_readl(bgp, - tsr->bgap_counter); - if (TI_BANDGAP_HAS(bgp, TALERT)) { - rval->bg_threshold = ti_bandgap_readl(bgp, - tsr->bgap_threshold); - rval->bg_ctrl = ti_bandgap_readl(bgp, - tsr->bgap_mask_ctrl); - } - - if (TI_BANDGAP_HAS(bgp, TSHUT_CONFIG)) - rval->tshut_threshold = ti_bandgap_readl(bgp, - tsr->tshut_threshold); - } - - return 0; -} - -static int ti_bandgap_restore_ctxt(struct ti_bandgap *bgp) -{ - int i; - - for (i = 0; i < bgp->conf->sensor_count; i++) { - struct temp_sensor_registers *tsr; - struct temp_sensor_regval *rval; - u32 val = 0; - - rval = &bgp->regval[i]; - tsr = bgp->conf->sensors[i].registers; - - if (TI_BANDGAP_HAS(bgp, COUNTER)) - val = ti_bandgap_readl(bgp, tsr->bgap_counter); - - if (TI_BANDGAP_HAS(bgp, TSHUT_CONFIG)) - ti_bandgap_writel(bgp, rval->tshut_threshold, - tsr->tshut_threshold); - /* Force immediate temperature measurement and update - * of the DTEMP field - */ - ti_bandgap_force_single_read(bgp, i); - - if (TI_BANDGAP_HAS(bgp, COUNTER)) - ti_bandgap_writel(bgp, rval->bg_counter, - tsr->bgap_counter); - if (TI_BANDGAP_HAS(bgp, MODE_CONFIG)) - ti_bandgap_writel(bgp, rval->bg_mode_ctrl, - tsr->bgap_mode_ctrl); - if (TI_BANDGAP_HAS(bgp, TALERT)) { - ti_bandgap_writel(bgp, rval->bg_threshold, - tsr->bgap_threshold); - ti_bandgap_writel(bgp, rval->bg_ctrl, - tsr->bgap_mask_ctrl); - } - } - - return 0; -} - -static int ti_bandgap_suspend(struct device *dev) -{ - struct ti_bandgap *bgp = dev_get_drvdata(dev); - int err; - - err = ti_bandgap_save_ctxt(bgp); - ti_bandgap_power(bgp, false); - - if (TI_BANDGAP_HAS(bgp, CLK_CTRL)) - clk_disable_unprepare(bgp->fclock); - - return err; -} - -static int ti_bandgap_resume(struct device *dev) -{ - struct ti_bandgap *bgp = dev_get_drvdata(dev); - - if (TI_BANDGAP_HAS(bgp, CLK_CTRL)) - clk_prepare_enable(bgp->fclock); - - ti_bandgap_power(bgp, true); - - return ti_bandgap_restore_ctxt(bgp); -} -static const struct dev_pm_ops ti_bandgap_dev_pm_ops = { - SET_SYSTEM_SLEEP_PM_OPS(ti_bandgap_suspend, - ti_bandgap_resume) -}; - -#define DEV_PM_OPS (&ti_bandgap_dev_pm_ops) -#else -#define DEV_PM_OPS NULL -#endif - -static const struct of_device_id of_ti_bandgap_match[] = { -#ifdef CONFIG_OMAP4_THERMAL - { - .compatible = "ti,omap4430-bandgap", - .data = (void *)&omap4430_data, - }, - { - .compatible = "ti,omap4460-bandgap", - .data = (void *)&omap4460_data, - }, - { - .compatible = "ti,omap4470-bandgap", - .data = (void *)&omap4470_data, - }, -#endif -#ifdef CONFIG_OMAP5_THERMAL - { - .compatible = "ti,omap5430-bandgap", - .data = (void *)&omap5430_data, - }, -#endif - /* Sentinel */ - { }, -}; -MODULE_DEVICE_TABLE(of, of_ti_bandgap_match); - -static struct platform_driver ti_bandgap_sensor_driver = { - .probe = ti_bandgap_probe, - .remove = ti_bandgap_remove, - .driver = { - .name = "ti-soc-thermal", - .pm = DEV_PM_OPS, - .of_match_table = of_ti_bandgap_match, - }, -}; - -module_platform_driver(ti_bandgap_sensor_driver); - -MODULE_DESCRIPTION("OMAP4+ bandgap temperature sensor driver"); -MODULE_LICENSE("GPL v2"); -MODULE_ALIAS("platform:ti-soc-thermal"); -MODULE_AUTHOR("Texas Instrument Inc."); diff --git a/drivers/staging/ti-soc-thermal/ti-bandgap.h b/drivers/staging/ti-soc-thermal/ti-bandgap.h deleted file mode 100644 index 5f4794a..0000000 --- a/drivers/staging/ti-soc-thermal/ti-bandgap.h +++ /dev/null @@ -1,403 +0,0 @@ -/* - * OMAP4 Bandgap temperature sensor driver - * - * Copyright (C) 2011 Texas Instruments Incorporated - http://www.ti.com/ - * Contact: - * Eduardo Valentin - * - * This program is free software; you can redistribute it and/or - * modify it under the terms of the GNU General Public License - * version 2 as published by the Free Software Foundation. - * - * This program is distributed in the hope that it will be useful, but - * WITHOUT ANY WARRANTY; without even the implied warranty of - * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU - * General Public License for more details. - * - * You should have received a copy of the GNU General Public License - * along with this program; if not, write to the Free Software - * Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA - * 02110-1301 USA - * - */ -#ifndef __TI_BANDGAP_H -#define __TI_BANDGAP_H - -#include -#include -#include - -/** - * DOC: bandgap driver data structure - * ================================== - * - * +----------+----------------+ - * | struct temp_sensor_regval | - * +---------------------------+ - * * (Array of) - * | - * | - * +-------------------+ +-----------------+ - * | struct ti_bandgap |-->| struct device * | - * +----------+--------+ +-----------------+ - * | - * | - * V - * +------------------------+ - * | struct ti_bandgap_data | - * +------------------------+ - * | - * | - * * (Array of) - * +------------+------------------------------------------------------+ - * | +----------+------------+ +-------------------------+ | - * | | struct ti_temp_sensor |-->| struct temp_sensor_data | | - * | +-----------------------+ +------------+------------+ | - * | | | - * | + | - * | V | - * | +----------+-------------------+ | - * | | struct temp_sensor_registers | | - * | +------------------------------+ | - * | | - * +-------------------------------------------------------------------+ - * - * Above is a simple diagram describing how the data structure below - * are organized. For each bandgap device there should be a ti_bandgap_data - * containing the device instance configuration, as well as, an array of - * sensors, representing every sensor instance present in this bandgap. - */ - -/** - * struct temp_sensor_registers - descriptor to access registers and bitfields - * @temp_sensor_ctrl: TEMP_SENSOR_CTRL register offset - * @bgap_tempsoff_mask: mask to temp_sensor_ctrl.tempsoff - * @bgap_soc_mask: mask to temp_sensor_ctrl.soc - * @bgap_eocz_mask: mask to temp_sensor_ctrl.eocz - * @bgap_dtemp_mask: mask to temp_sensor_ctrl.dtemp - * @bgap_mask_ctrl: BANDGAP_MASK_CTRL register offset - * @mask_hot_mask: mask to bandgap_mask_ctrl.mask_hot - * @mask_cold_mask: mask to bandgap_mask_ctrl.mask_cold - * @mask_sidlemode_mask: mask to bandgap_mask_ctrl.mask_sidlemode - * @mask_counter_delay_mask: mask to bandgap_mask_ctrl.mask_counter_delay - * @mask_freeze_mask: mask to bandgap_mask_ctrl.mask_free - * @mask_clear_mask: mask to bandgap_mask_ctrl.mask_clear - * @mask_clear_accum_mask: mask to bandgap_mask_ctrl.mask_clear_accum - * @bgap_mode_ctrl: BANDGAP_MODE_CTRL register offset - * @mode_ctrl_mask: mask to bandgap_mode_ctrl.mode_ctrl - * @bgap_counter: BANDGAP_COUNTER register offset - * @counter_mask: mask to bandgap_counter.counter - * @bgap_threshold: BANDGAP_THRESHOLD register offset (TALERT thresholds) - * @threshold_thot_mask: mask to bandgap_threhold.thot - * @threshold_tcold_mask: mask to bandgap_threhold.tcold - * @tshut_threshold: TSHUT_THRESHOLD register offset (TSHUT thresholds) - * @tshut_efuse_mask: mask to tshut_threshold.tshut_efuse - * @tshut_efuse_shift: shift to tshut_threshold.tshut_efuse - * @tshut_hot_mask: mask to tshut_threhold.thot - * @tshut_cold_mask: mask to tshut_threhold.thot - * @bgap_status: BANDGAP_STATUS register offset - * @status_clean_stop_mask: mask to bandgap_status.clean_stop - * @status_bgap_alert_mask: mask to bandgap_status.bandgap_alert - * @status_hot_mask: mask to bandgap_status.hot - * @status_cold_mask: mask to bandgap_status.cold - * @bgap_cumul_dtemp: BANDGAP_CUMUL_DTEMP register offset - * @ctrl_dtemp_0: CTRL_DTEMP0 register offset - * @ctrl_dtemp_1: CTRL_DTEMP1 register offset - * @ctrl_dtemp_2: CTRL_DTEMP2 register offset - * @ctrl_dtemp_3: CTRL_DTEMP3 register offset - * @ctrl_dtemp_4: CTRL_DTEMP4 register offset - * @bgap_efuse: BANDGAP_EFUSE register offset - * - * The register offsets and bitfields might change across - * OMAP and variants versions. Hence this struct serves as a - * descriptor map on how to access the registers and the bitfields. - * - * This descriptor contains registers of all versions of bandgap chips. - * Not all versions will use all registers, depending on the available - * features. Please read TRMs for descriptive explanation on each bitfield. - */ - -struct temp_sensor_registers { - u32 temp_sensor_ctrl; - u32 bgap_tempsoff_mask; - u32 bgap_soc_mask; - u32 bgap_eocz_mask; /* not used: but needs revisit */ - u32 bgap_dtemp_mask; - - u32 bgap_mask_ctrl; - u32 mask_hot_mask; - u32 mask_cold_mask; - u32 mask_sidlemode_mask; /* not used: but may be needed for pm */ - u32 mask_counter_delay_mask; - u32 mask_freeze_mask; - u32 mask_clear_mask; /* not used: but needed for trending */ - u32 mask_clear_accum_mask; /* not used: but needed for trending */ - - u32 bgap_mode_ctrl; - u32 mode_ctrl_mask; - - u32 bgap_counter; - u32 counter_mask; - - u32 bgap_threshold; - u32 threshold_thot_mask; - u32 threshold_tcold_mask; - - u32 tshut_threshold; - u32 tshut_efuse_mask; /* not used */ - u32 tshut_efuse_shift; /* not used */ - u32 tshut_hot_mask; - u32 tshut_cold_mask; - - u32 bgap_status; - u32 status_clean_stop_mask; /* not used: but needed for trending */ - u32 status_bgap_alert_mask; /* not used */ - u32 status_hot_mask; - u32 status_cold_mask; - - u32 bgap_cumul_dtemp; /* not used: but needed for trending */ - u32 ctrl_dtemp_0; /* not used: but needed for trending */ - u32 ctrl_dtemp_1; /* not used: but needed for trending */ - u32 ctrl_dtemp_2; /* not used: but needed for trending */ - u32 ctrl_dtemp_3; /* not used: but needed for trending */ - u32 ctrl_dtemp_4; /* not used: but needed for trending */ - u32 bgap_efuse; -}; - -/** - * struct temp_sensor_data - The thresholds and limits for temperature sensors. - * @tshut_hot: temperature to trigger a thermal reset (initial value) - * @tshut_cold: temp to get the plat out of reset due to thermal (init val) - * @t_hot: temperature to trigger a thermal alert (high initial value) - * @t_cold: temperature to trigger a thermal alert (low initial value) - * @min_freq: sensor minimum clock rate - * @max_freq: sensor maximum clock rate - * @max_temp: sensor maximum temperature - * @min_temp: sensor minimum temperature - * @hyst_val: temperature hysteresis considered while converting ADC values - * @update_int1: update interval - * @update_int2: update interval - * - * This data structure will hold the required thresholds and temperature limits - * for a specific temperature sensor, like shutdown temperature, alert - * temperature, clock / rate used, ADC conversion limits and update intervals - */ -struct temp_sensor_data { - u32 tshut_hot; - u32 tshut_cold; - u32 t_hot; - u32 t_cold; - u32 min_freq; - u32 max_freq; - int max_temp; - int min_temp; - int hyst_val; - u32 update_int1; /* not used */ - u32 update_int2; /* not used */ -}; - -struct ti_bandgap_data; - -/** - * struct temp_sensor_regval - temperature sensor register values and priv data - * @bg_mode_ctrl: temp sensor control register value - * @bg_ctrl: bandgap ctrl register value - * @bg_counter: bandgap counter value - * @bg_threshold: bandgap threshold register value - * @tshut_threshold: bandgap tshut register value - * @data: private data - * - * Data structure to save and restore bandgap register set context. Only - * required registers are shadowed, when needed. - */ -struct temp_sensor_regval { - u32 bg_mode_ctrl; - u32 bg_ctrl; - u32 bg_counter; - u32 bg_threshold; - u32 tshut_threshold; - void *data; -}; - -/** - * struct ti_bandgap - bandgap device structure - * @dev: struct device pointer - * @base: io memory base address - * @conf: struct with bandgap configuration set (# sensors, conv_table, etc) - * @regval: temperature sensor register values - * @fclock: pointer to functional clock of temperature sensor - * @div_clk: pointer to divider clock of temperature sensor fclk - * @lock: spinlock for ti_bandgap structure - * @irq: MPU IRQ number for thermal alert - * @tshut_gpio: GPIO where Tshut signal is routed - * @clk_rate: Holds current clock rate - * - * The bandgap device structure representing the bandgap device instance. - * It holds most of the dynamic stuff. Configurations and sensor specific - * entries are inside the @conf structure. - */ -struct ti_bandgap { - struct device *dev; - void __iomem *base; - const struct ti_bandgap_data *conf; - struct temp_sensor_regval *regval; - struct clk *fclock; - struct clk *div_clk; - spinlock_t lock; /* shields this struct */ - int irq; - int tshut_gpio; - u32 clk_rate; -}; - -/** - * struct ti_temp_sensor - bandgap temperature sensor configuration data - * @ts_data: pointer to struct with thresholds, limits of temperature sensor - * @registers: pointer to the list of register offsets and bitfields - * @domain: the name of the domain where the sensor is located - * @slope: sensor gradient slope info for hotspot extrapolation equation - * @constant: sensor gradient const info for hotspot extrapolation equation - * @slope_pcb: sensor gradient slope info for hotspot extrapolation equation - * with no external influence - * @constant_pcb: sensor gradient const info for hotspot extrapolation equation - * with no external influence - * @register_cooling: function to describe how this sensor is going to be cooled - * @unregister_cooling: function to release cooling data - * - * Data structure to describe a temperature sensor handled by a bandgap device. - * It should provide configuration details on this sensor, such as how to - * access the registers affecting this sensor, shadow register buffer, how to - * assess the gradient from hotspot, how to cooldown the domain when sensor - * reports too hot temperature. - */ -struct ti_temp_sensor { - struct temp_sensor_data *ts_data; - struct temp_sensor_registers *registers; - char *domain; - /* for hotspot extrapolation */ - const int slope; - const int constant; - const int slope_pcb; - const int constant_pcb; - int (*register_cooling)(struct ti_bandgap *bgp, int id); - int (*unregister_cooling)(struct ti_bandgap *bgp, int id); -}; - -/** - * DOC: ti bandgap feature types - * - * TI_BANDGAP_FEATURE_TSHUT - used when the thermal shutdown signal output - * of a bandgap device instance is routed to the processor. This means - * the system must react and perform the shutdown by itself (handle an - * IRQ, for instance). - * - * TI_BANDGAP_FEATURE_TSHUT_CONFIG - used when the bandgap device has control - * over the thermal shutdown configuration. This means that the thermal - * shutdown thresholds are programmable, for instance. - * - * TI_BANDGAP_FEATURE_TALERT - used when the bandgap device instance outputs - * a signal representing violation of programmable alert thresholds. - * - * TI_BANDGAP_FEATURE_MODE_CONFIG - used when it is possible to choose which - * mode, continuous or one shot, the bandgap device instance will operate. - * - * TI_BANDGAP_FEATURE_COUNTER - used when the bandgap device instance allows - * programming the update interval of its internal state machine. - * - * TI_BANDGAP_FEATURE_POWER_SWITCH - used when the bandgap device allows - * itself to be switched on/off. - * - * TI_BANDGAP_FEATURE_CLK_CTRL - used when the clocks feeding the bandgap - * device are gateable or not. - * - * TI_BANDGAP_FEATURE_FREEZE_BIT - used when the bandgap device features - * a history buffer that its update can be freezed/unfreezed. - * - * TI_BANDGAP_FEATURE_COUNTER_DELAY - used when the bandgap device features - * a delay programming based on distinct values. - * - * TI_BANDGAP_FEATURE_HISTORY_BUFFER - used when the bandgap device features - * a history buffer of temperatures. - * - * TI_BANDGAP_HAS(b, f) - macro to check if a bandgap device is capable of a - * specific feature (above) or not. Return non-zero, if yes. - */ -#define TI_BANDGAP_FEATURE_TSHUT BIT(0) -#define TI_BANDGAP_FEATURE_TSHUT_CONFIG BIT(1) -#define TI_BANDGAP_FEATURE_TALERT BIT(2) -#define TI_BANDGAP_FEATURE_MODE_CONFIG BIT(3) -#define TI_BANDGAP_FEATURE_COUNTER BIT(4) -#define TI_BANDGAP_FEATURE_POWER_SWITCH BIT(5) -#define TI_BANDGAP_FEATURE_CLK_CTRL BIT(6) -#define TI_BANDGAP_FEATURE_FREEZE_BIT BIT(7) -#define TI_BANDGAP_FEATURE_COUNTER_DELAY BIT(8) -#define TI_BANDGAP_FEATURE_HISTORY_BUFFER BIT(9) -#define TI_BANDGAP_HAS(b, f) \ - ((b)->conf->features & TI_BANDGAP_FEATURE_ ## f) - -/** - * struct ti_bandgap_data - ti bandgap data configuration structure - * @features: a bitwise flag set to describe the device features - * @conv_table: Pointer to ADC to temperature conversion table - * @adc_start_val: ADC conversion table starting value - * @adc_end_val: ADC conversion table ending value - * @fclock_name: clock name of the functional clock - * @div_ck_name: clock name of the clock divisor - * @sensor_count: count of temperature sensor within this bandgap device - * @report_temperature: callback to report thermal alert to thermal API - * @expose_sensor: callback to export sensor to thermal API - * @remove_sensor: callback to destroy sensor from thermal API - * @sensors: array of sensors present in this bandgap instance - * - * This is a data structure which should hold most of the static configuration - * of a bandgap device instance. It should describe which features this instance - * is capable of, the clock names to feed this device, the amount of sensors and - * their configuration representation, and how to export and unexport them to - * a thermal API. - */ -struct ti_bandgap_data { - unsigned int features; - const int *conv_table; - u32 adc_start_val; - u32 adc_end_val; - char *fclock_name; - char *div_ck_name; - int sensor_count; - int (*report_temperature)(struct ti_bandgap *bgp, int id); - int (*expose_sensor)(struct ti_bandgap *bgp, int id, char *domain); - int (*remove_sensor)(struct ti_bandgap *bgp, int id); - - /* this needs to be at the end */ - struct ti_temp_sensor sensors[]; -}; - -int ti_bandgap_read_thot(struct ti_bandgap *bgp, int id, int *thot); -int ti_bandgap_write_thot(struct ti_bandgap *bgp, int id, int val); -int ti_bandgap_read_tcold(struct ti_bandgap *bgp, int id, int *tcold); -int ti_bandgap_write_tcold(struct ti_bandgap *bgp, int id, int val); -int ti_bandgap_read_update_interval(struct ti_bandgap *bgp, int id, - int *interval); -int ti_bandgap_write_update_interval(struct ti_bandgap *bgp, int id, - u32 interval); -int ti_bandgap_read_temperature(struct ti_bandgap *bgp, int id, - int *temperature); -int ti_bandgap_set_sensor_data(struct ti_bandgap *bgp, int id, void *data); -void *ti_bandgap_get_sensor_data(struct ti_bandgap *bgp, int id); -int ti_bandgap_get_trend(struct ti_bandgap *bgp, int id, int *trend); - -#ifdef CONFIG_OMAP4_THERMAL -extern const struct ti_bandgap_data omap4430_data; -extern const struct ti_bandgap_data omap4460_data; -extern const struct ti_bandgap_data omap4470_data; -#else -#define omap4430_data NULL -#define omap4460_data NULL -#define omap4470_data NULL -#endif - -#ifdef CONFIG_OMAP5_THERMAL -extern const struct ti_bandgap_data omap5430_data; -#else -#define omap5430_data NULL -#endif - -#endif diff --git a/drivers/staging/ti-soc-thermal/ti-thermal-common.c b/drivers/staging/ti-soc-thermal/ti-thermal-common.c deleted file mode 100644 index 8e67ebf..0000000 --- a/drivers/staging/ti-soc-thermal/ti-thermal-common.c +++ /dev/null @@ -1,377 +0,0 @@ -/* - * OMAP thermal driver interface - * - * Copyright (C) 2012 Texas Instruments Incorporated - http://www.ti.com/ - * Contact: - * Eduardo Valentin - * - * This program is free software; you can redistribute it and/or - * modify it under the terms of the GNU General Public License - * version 2 as published by the Free Software Foundation. - * - * This program is distributed in the hope that it will be useful, but - * WITHOUT ANY WARRANTY; without even the implied warranty of - * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU - * General Public License for more details. - * - * You should have received a copy of the GNU General Public License - * along with this program; if not, write to the Free Software - * Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA - * 02110-1301 USA - * - */ - -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include - -#include "ti-thermal.h" -#include "ti-bandgap.h" - -/* common data structures */ -struct ti_thermal_data { - struct thermal_zone_device *ti_thermal; - struct thermal_zone_device *pcb_tz; - struct thermal_cooling_device *cool_dev; - struct ti_bandgap *bgp; - enum thermal_device_mode mode; - struct work_struct thermal_wq; - int sensor_id; -}; - -static void ti_thermal_work(struct work_struct *work) -{ - struct ti_thermal_data *data = container_of(work, - struct ti_thermal_data, thermal_wq); - - thermal_zone_device_update(data->ti_thermal); - - dev_dbg(&data->ti_thermal->device, "updated thermal zone %s\n", - data->ti_thermal->type); -} - -/** - * ti_thermal_hotspot_temperature - returns sensor extrapolated temperature - * @t: omap sensor temperature - * @s: omap sensor slope value - * @c: omap sensor const value - */ -static inline int ti_thermal_hotspot_temperature(int t, int s, int c) -{ - int delta = t * s / 1000 + c; - - if (delta < 0) - delta = 0; - - return t + delta; -} - -/* thermal zone ops */ -/* Get temperature callback function for thermal zone*/ -static inline int ti_thermal_get_temp(struct thermal_zone_device *thermal, - unsigned long *temp) -{ - struct thermal_zone_device *pcb_tz = NULL; - struct ti_thermal_data *data = thermal->devdata; - struct ti_bandgap *bgp; - const struct ti_temp_sensor *s; - int ret, tmp, slope, constant; - unsigned long pcb_temp; - - if (!data) - return 0; - - bgp = data->bgp; - s = &bgp->conf->sensors[data->sensor_id]; - - ret = ti_bandgap_read_temperature(bgp, data->sensor_id, &tmp); - if (ret) - return ret; - - /* Default constants */ - slope = s->slope; - constant = s->constant; - - pcb_tz = data->pcb_tz; - /* In case pcb zone is available, use the extrapolation rule with it */ - if (!IS_ERR_OR_NULL(pcb_tz)) { - ret = thermal_zone_get_temp(pcb_tz, &pcb_temp); - if (!ret) { - tmp -= pcb_temp; /* got a valid PCB temp */ - slope = s->slope_pcb; - constant = s->constant_pcb; - } else { - dev_err(bgp->dev, - "Failed to read PCB state. Using defaults\n"); - } - } - *temp = ti_thermal_hotspot_temperature(tmp, slope, constant); - - return ret; -} - -/* Bind callback functions for thermal zone */ -static int ti_thermal_bind(struct thermal_zone_device *thermal, - struct thermal_cooling_device *cdev) -{ - struct ti_thermal_data *data = thermal->devdata; - int id; - - if (IS_ERR_OR_NULL(data)) - return -ENODEV; - - /* check if this is the cooling device we registered */ - if (data->cool_dev != cdev) - return 0; - - id = data->sensor_id; - - /* Simple thing, two trips, one passive another critical */ - return thermal_zone_bind_cooling_device(thermal, 0, cdev, - /* bind with min and max states defined by cpu_cooling */ - THERMAL_NO_LIMIT, - THERMAL_NO_LIMIT); -} - -/* Unbind callback functions for thermal zone */ -static int ti_thermal_unbind(struct thermal_zone_device *thermal, - struct thermal_cooling_device *cdev) -{ - struct ti_thermal_data *data = thermal->devdata; - - if (IS_ERR_OR_NULL(data)) - return -ENODEV; - - /* check if this is the cooling device we registered */ - if (data->cool_dev != cdev) - return 0; - - /* Simple thing, two trips, one passive another critical */ - return thermal_zone_unbind_cooling_device(thermal, 0, cdev); -} - -/* Get mode callback functions for thermal zone */ -static int ti_thermal_get_mode(struct thermal_zone_device *thermal, - enum thermal_device_mode *mode) -{ - struct ti_thermal_data *data = thermal->devdata; - - if (data) - *mode = data->mode; - - return 0; -} - -/* Set mode callback functions for thermal zone */ -static int ti_thermal_set_mode(struct thermal_zone_device *thermal, - enum thermal_device_mode mode) -{ - struct ti_thermal_data *data = thermal->devdata; - - if (!data->ti_thermal) { - dev_notice(&thermal->device, "thermal zone not registered\n"); - return 0; - } - - mutex_lock(&data->ti_thermal->lock); - - if (mode == THERMAL_DEVICE_ENABLED) - data->ti_thermal->polling_delay = FAST_TEMP_MONITORING_RATE; - else - data->ti_thermal->polling_delay = 0; - - mutex_unlock(&data->ti_thermal->lock); - - data->mode = mode; - thermal_zone_device_update(data->ti_thermal); - dev_dbg(&thermal->device, "thermal polling set for duration=%d msec\n", - data->ti_thermal->polling_delay); - - return 0; -} - -/* Get trip type callback functions for thermal zone */ -static int ti_thermal_get_trip_type(struct thermal_zone_device *thermal, - int trip, enum thermal_trip_type *type) -{ - if (!ti_thermal_is_valid_trip(trip)) - return -EINVAL; - - if (trip + 1 == OMAP_TRIP_NUMBER) - *type = THERMAL_TRIP_CRITICAL; - else - *type = THERMAL_TRIP_PASSIVE; - - return 0; -} - -/* Get trip temperature callback functions for thermal zone */ -static int ti_thermal_get_trip_temp(struct thermal_zone_device *thermal, - int trip, unsigned long *temp) -{ - if (!ti_thermal_is_valid_trip(trip)) - return -EINVAL; - - *temp = ti_thermal_get_trip_value(trip); - - return 0; -} - -/* Get the temperature trend callback functions for thermal zone */ -static int ti_thermal_get_trend(struct thermal_zone_device *thermal, - int trip, enum thermal_trend *trend) -{ - struct ti_thermal_data *data = thermal->devdata; - struct ti_bandgap *bgp; - int id, tr, ret = 0; - - bgp = data->bgp; - id = data->sensor_id; - - ret = ti_bandgap_get_trend(bgp, id, &tr); - if (ret) - return ret; - - if (tr > 0) - *trend = THERMAL_TREND_RAISING; - else if (tr < 0) - *trend = THERMAL_TREND_DROPPING; - else - *trend = THERMAL_TREND_STABLE; - - return 0; -} - -/* Get critical temperature callback functions for thermal zone */ -static int ti_thermal_get_crit_temp(struct thermal_zone_device *thermal, - unsigned long *temp) -{ - /* shutdown zone */ - return ti_thermal_get_trip_temp(thermal, OMAP_TRIP_NUMBER - 1, temp); -} - -static struct thermal_zone_device_ops ti_thermal_ops = { - .get_temp = ti_thermal_get_temp, - .get_trend = ti_thermal_get_trend, - .bind = ti_thermal_bind, - .unbind = ti_thermal_unbind, - .get_mode = ti_thermal_get_mode, - .set_mode = ti_thermal_set_mode, - .get_trip_type = ti_thermal_get_trip_type, - .get_trip_temp = ti_thermal_get_trip_temp, - .get_crit_temp = ti_thermal_get_crit_temp, -}; - -static struct ti_thermal_data -*ti_thermal_build_data(struct ti_bandgap *bgp, int id) -{ - struct ti_thermal_data *data; - - data = devm_kzalloc(bgp->dev, sizeof(*data), GFP_KERNEL); - if (!data) { - dev_err(bgp->dev, "kzalloc fail\n"); - return NULL; - } - data->sensor_id = id; - data->bgp = bgp; - data->mode = THERMAL_DEVICE_ENABLED; - data->pcb_tz = thermal_zone_get_zone_by_name("pcb"); - INIT_WORK(&data->thermal_wq, ti_thermal_work); - - return data; -} - -int ti_thermal_expose_sensor(struct ti_bandgap *bgp, int id, - char *domain) -{ - struct ti_thermal_data *data; - - data = ti_bandgap_get_sensor_data(bgp, id); - - if (IS_ERR_OR_NULL(data)) - data = ti_thermal_build_data(bgp, id); - - if (!data) - return -EINVAL; - - /* Create thermal zone */ - data->ti_thermal = thermal_zone_device_register(domain, - OMAP_TRIP_NUMBER, 0, data, &ti_thermal_ops, - NULL, FAST_TEMP_MONITORING_RATE, - FAST_TEMP_MONITORING_RATE); - if (IS_ERR_OR_NULL(data->ti_thermal)) { - dev_err(bgp->dev, "thermal zone device is NULL\n"); - return PTR_ERR(data->ti_thermal); - } - data->ti_thermal->polling_delay = FAST_TEMP_MONITORING_RATE; - ti_bandgap_set_sensor_data(bgp, id, data); - - return 0; -} - -int ti_thermal_remove_sensor(struct ti_bandgap *bgp, int id) -{ - struct ti_thermal_data *data; - - data = ti_bandgap_get_sensor_data(bgp, id); - - thermal_zone_device_unregister(data->ti_thermal); - - return 0; -} - -int ti_thermal_report_sensor_temperature(struct ti_bandgap *bgp, int id) -{ - struct ti_thermal_data *data; - - data = ti_bandgap_get_sensor_data(bgp, id); - - schedule_work(&data->thermal_wq); - - return 0; -} - -int ti_thermal_register_cpu_cooling(struct ti_bandgap *bgp, int id) -{ - struct ti_thermal_data *data; - - data = ti_bandgap_get_sensor_data(bgp, id); - if (IS_ERR_OR_NULL(data)) - data = ti_thermal_build_data(bgp, id); - - if (!data) - return -EINVAL; - - if (!cpufreq_get_current_driver()) { - dev_dbg(bgp->dev, "no cpufreq driver yet\n"); - return -EPROBE_DEFER; - } - - /* Register cooling device */ - data->cool_dev = cpufreq_cooling_register(cpu_present_mask); - if (IS_ERR_OR_NULL(data->cool_dev)) { - dev_err(bgp->dev, - "Failed to register cpufreq cooling device\n"); - return PTR_ERR(data->cool_dev); - } - ti_bandgap_set_sensor_data(bgp, id, data); - - return 0; -} - -int ti_thermal_unregister_cpu_cooling(struct ti_bandgap *bgp, int id) -{ - struct ti_thermal_data *data; - - data = ti_bandgap_get_sensor_data(bgp, id); - cpufreq_cooling_unregister(data->cool_dev); - - return 0; -} diff --git a/drivers/staging/ti-soc-thermal/ti-thermal.h b/drivers/staging/ti-soc-thermal/ti-thermal.h deleted file mode 100644 index 5055777..0000000 --- a/drivers/staging/ti-soc-thermal/ti-thermal.h +++ /dev/null @@ -1,117 +0,0 @@ -/* - * OMAP thermal definitions - * - * Copyright (C) 2012 Texas Instruments Incorporated - http://www.ti.com/ - * Contact: - * Eduardo Valentin - * - * This program is free software; you can redistribute it and/or - * modify it under the terms of the GNU General Public License - * version 2 as published by the Free Software Foundation. - * - * This program is distributed in the hope that it will be useful, but - * WITHOUT ANY WARRANTY; without even the implied warranty of - * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU - * General Public License for more details. - * - * You should have received a copy of the GNU General Public License - * along with this program; if not, write to the Free Software - * Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA - * 02110-1301 USA - * - */ -#ifndef __TI_THERMAL_H -#define __TI_THERMAL_H - -#include "ti-bandgap.h" - -/* sensors gradient and offsets */ -#define OMAP_GRADIENT_SLOPE_4430 0 -#define OMAP_GRADIENT_CONST_4430 20000 -#define OMAP_GRADIENT_SLOPE_4460 348 -#define OMAP_GRADIENT_CONST_4460 -9301 -#define OMAP_GRADIENT_SLOPE_4470 308 -#define OMAP_GRADIENT_CONST_4470 -7896 - -#define OMAP_GRADIENT_SLOPE_5430_CPU 65 -#define OMAP_GRADIENT_CONST_5430_CPU -1791 -#define OMAP_GRADIENT_SLOPE_5430_GPU 117 -#define OMAP_GRADIENT_CONST_5430_GPU -2992 - -/* PCB sensor calculation constants */ -#define OMAP_GRADIENT_SLOPE_W_PCB_4430 0 -#define OMAP_GRADIENT_CONST_W_PCB_4430 20000 -#define OMAP_GRADIENT_SLOPE_W_PCB_4460 1142 -#define OMAP_GRADIENT_CONST_W_PCB_4460 -393 -#define OMAP_GRADIENT_SLOPE_W_PCB_4470 1063 -#define OMAP_GRADIENT_CONST_W_PCB_4470 -477 - -#define OMAP_GRADIENT_SLOPE_W_PCB_5430_CPU 100 -#define OMAP_GRADIENT_CONST_W_PCB_5430_CPU 484 -#define OMAP_GRADIENT_SLOPE_W_PCB_5430_GPU 464 -#define OMAP_GRADIENT_CONST_W_PCB_5430_GPU -5102 - -/* trip points of interest in milicelsius (at hotspot level) */ -#define OMAP_TRIP_COLD 100000 -#define OMAP_TRIP_HOT 110000 -#define OMAP_TRIP_SHUTDOWN 125000 -#define OMAP_TRIP_NUMBER 2 -#define OMAP_TRIP_STEP \ - ((OMAP_TRIP_SHUTDOWN - OMAP_TRIP_HOT) / (OMAP_TRIP_NUMBER - 1)) - -/* Update rates */ -#define FAST_TEMP_MONITORING_RATE 250 - -/* helper macros */ -/** - * ti_thermal_get_trip_value - returns trip temperature based on index - * @i: trip index - */ -#define ti_thermal_get_trip_value(i) \ - (OMAP_TRIP_HOT + ((i) * OMAP_TRIP_STEP)) - -/** - * ti_thermal_is_valid_trip - check for trip index - * @i: trip index - */ -#define ti_thermal_is_valid_trip(trip) \ - ((trip) >= 0 && (trip) < OMAP_TRIP_NUMBER) - -#ifdef CONFIG_TI_THERMAL -int ti_thermal_expose_sensor(struct ti_bandgap *bgp, int id, char *domain); -int ti_thermal_remove_sensor(struct ti_bandgap *bgp, int id); -int ti_thermal_report_sensor_temperature(struct ti_bandgap *bgp, int id); -int ti_thermal_register_cpu_cooling(struct ti_bandgap *bgp, int id); -int ti_thermal_unregister_cpu_cooling(struct ti_bandgap *bgp, int id); -#else -static inline -int ti_thermal_expose_sensor(struct ti_bandgap *bgp, int id, char *domain) -{ - return 0; -} - -static inline -int ti_thermal_remove_sensor(struct ti_bandgap *bgp, int id) -{ - return 0; -} - -static inline -int ti_thermal_report_sensor_temperature(struct ti_bandgap *bgp, int id) -{ - return 0; -} - -static inline -int ti_thermal_register_cpu_cooling(struct ti_bandgap *bgp, int id) -{ - return 0; -} - -static inline -int ti_thermal_unregister_cpu_cooling(struct ti_bandgap *bgp, int id) -{ - return 0; -} -#endif -#endif diff --git a/drivers/staging/ti-soc-thermal/ti_soc_thermal.txt b/drivers/staging/ti-soc-thermal/ti_soc_thermal.txt deleted file mode 100644 index 1629652..0000000 --- a/drivers/staging/ti-soc-thermal/ti_soc_thermal.txt +++ /dev/null @@ -1,61 +0,0 @@ -* Texas Instrument OMAP SCM bandgap bindings - -In the System Control Module, OMAP supplies a voltage reference -and a temperature sensor feature that are gathered in the band -gap voltage and temperature sensor (VBGAPTS) module. The band -gap provides current and voltage reference for its internal -circuits and other analog IP blocks. The analog-to-digital -converter (ADC) produces an output value that is proportional -to the silicon temperature. - -Required properties: -- compatible : Should be: - - "ti,omap4430-bandgap" : for OMAP4430 bandgap - - "ti,omap4460-bandgap" : for OMAP4460 bandgap - - "ti,omap4470-bandgap" : for OMAP4470 bandgap - - "ti,omap5430-bandgap" : for OMAP5430 bandgap -- interrupts : this entry should indicate which interrupt line -the talert signal is routed to; -Specific: -- ti,tshut-gpio : this entry should be used to inform which GPIO -line the tshut signal is routed to; -- regs : this entry must also be specified and it is specific -to each bandgap version, because the mapping may change from -soc to soc, apart of depending on available features. - -Example: -OMAP4430: -bandgap { - reg = <0x4a002260 0x4 0x4a00232C 0x4>; - compatible = "ti,omap4430-bandgap"; -}; - -OMAP4460: -bandgap { - reg = <0x4a002260 0x4 - 0x4a00232C 0x4 - 0x4a002378 0x18>; - compatible = "ti,omap4460-bandgap"; - interrupts = <0 126 4>; /* talert */ - ti,tshut-gpio = <86>; -}; - -OMAP4470: -bandgap { - reg = <0x4a002260 0x4 - 0x4a00232C 0x4 - 0x4a002378 0x18>; - compatible = "ti,omap4470-bandgap"; - interrupts = <0 126 4>; /* talert */ - ti,tshut-gpio = <86>; -}; - -OMAP5430: -bandgap { - reg = <0x4a0021e0 0xc - 0x4a00232c 0xc - 0x4a002380 0x2c - 0x4a0023C0 0x3c>; - compatible = "ti,omap5430-bandgap"; - interrupts = <0 126 4>; /* talert */ -}; diff --git a/drivers/target/iscsi/iscsi_target.c b/drivers/target/iscsi/iscsi_target.c index d7705e5..f73da43 100644 --- a/drivers/target/iscsi/iscsi_target.c +++ b/drivers/target/iscsi/iscsi_target.c @@ -628,25 +628,18 @@ static void __exit iscsi_target_cleanup_module(void) } static int iscsit_add_reject( + struct iscsi_conn *conn, u8 reason, - int fail_conn, - unsigned char *buf, - struct iscsi_conn *conn) + unsigned char *buf) { struct iscsi_cmd *cmd; - struct iscsi_reject *hdr; - int ret; cmd = iscsit_allocate_cmd(conn, GFP_KERNEL); if (!cmd) return -1; cmd->iscsi_opcode = ISCSI_OP_REJECT; - if (fail_conn) - cmd->cmd_flags |= ICF_REJECT_FAIL_CONN; - - hdr = (struct iscsi_reject *) cmd->pdu; - hdr->reason = reason; + cmd->reject_reason = reason; cmd->buf_ptr = kmemdup(buf, ISCSI_HDR_LEN, GFP_KERNEL); if (!cmd->buf_ptr) { @@ -662,23 +655,16 @@ static int iscsit_add_reject( cmd->i_state = ISTATE_SEND_REJECT; iscsit_add_cmd_to_response_queue(cmd, conn, cmd->i_state); - ret = wait_for_completion_interruptible(&cmd->reject_comp); - if (ret != 0) - return -1; - - return (!fail_conn) ? 0 : -1; + return -1; } -int iscsit_add_reject_from_cmd( +static int iscsit_add_reject_from_cmd( + struct iscsi_cmd *cmd, u8 reason, - int fail_conn, - int add_to_conn, - unsigned char *buf, - struct iscsi_cmd *cmd) + bool add_to_conn, + unsigned char *buf) { struct iscsi_conn *conn; - struct iscsi_reject *hdr; - int ret; if (!cmd->conn) { pr_err("cmd->conn is NULL for ITT: 0x%08x\n", @@ -688,11 +674,7 @@ int iscsit_add_reject_from_cmd( conn = cmd->conn; cmd->iscsi_opcode = ISCSI_OP_REJECT; - if (fail_conn) - cmd->cmd_flags |= ICF_REJECT_FAIL_CONN; - - hdr = (struct iscsi_reject *) cmd->pdu; - hdr->reason = reason; + cmd->reject_reason = reason; cmd->buf_ptr = kmemdup(buf, ISCSI_HDR_LEN, GFP_KERNEL); if (!cmd->buf_ptr) { @@ -709,8 +691,6 @@ int iscsit_add_reject_from_cmd( cmd->i_state = ISTATE_SEND_REJECT; iscsit_add_cmd_to_response_queue(cmd, conn, cmd->i_state); - - ret = wait_for_completion_interruptible(&cmd->reject_comp); /* * Perform the kref_put now if se_cmd has already been setup by * scsit_setup_scsi_cmd() @@ -719,12 +699,19 @@ int iscsit_add_reject_from_cmd( pr_debug("iscsi reject: calling target_put_sess_cmd >>>>>>\n"); target_put_sess_cmd(conn->sess->se_sess, &cmd->se_cmd); } - if (ret != 0) - return -1; + return -1; +} - return (!fail_conn) ? 0 : -1; +static int iscsit_add_reject_cmd(struct iscsi_cmd *cmd, u8 reason, + unsigned char *buf) +{ + return iscsit_add_reject_from_cmd(cmd, reason, true, buf); +} + +int iscsit_reject_cmd(struct iscsi_cmd *cmd, u8 reason, unsigned char *buf) +{ + return iscsit_add_reject_from_cmd(cmd, reason, false, buf); } -EXPORT_SYMBOL(iscsit_add_reject_from_cmd); /* * Map some portion of the allocated scatterlist to an iovec, suitable for @@ -844,8 +831,8 @@ int iscsit_setup_scsi_cmd(struct iscsi_conn *conn, struct iscsi_cmd *cmd, !(hdr->flags & ISCSI_FLAG_CMD_FINAL)) { pr_err("ISCSI_FLAG_CMD_WRITE & ISCSI_FLAG_CMD_FINAL" " not set. Bad iSCSI Initiator.\n"); - return iscsit_add_reject_from_cmd(ISCSI_REASON_BOOKMARK_INVALID, - 1, 1, buf, cmd); + return iscsit_add_reject_cmd(cmd, + ISCSI_REASON_BOOKMARK_INVALID, buf); } if (((hdr->flags & ISCSI_FLAG_CMD_READ) || @@ -865,8 +852,8 @@ int iscsit_setup_scsi_cmd(struct iscsi_conn *conn, struct iscsi_cmd *cmd, pr_err("ISCSI_FLAG_CMD_READ or ISCSI_FLAG_CMD_WRITE" " set when Expected Data Transfer Length is 0 for" " CDB: 0x%02x. Bad iSCSI Initiator.\n", hdr->cdb[0]); - return iscsit_add_reject_from_cmd(ISCSI_REASON_BOOKMARK_INVALID, - 1, 1, buf, cmd); + return iscsit_add_reject_cmd(cmd, + ISCSI_REASON_BOOKMARK_INVALID, buf); } done: @@ -875,62 +862,62 @@ done: pr_err("ISCSI_FLAG_CMD_READ and/or ISCSI_FLAG_CMD_WRITE" " MUST be set if Expected Data Transfer Length is not 0." " Bad iSCSI Initiator\n"); - return iscsit_add_reject_from_cmd(ISCSI_REASON_BOOKMARK_INVALID, - 1, 1, buf, cmd); + return iscsit_add_reject_cmd(cmd, + ISCSI_REASON_BOOKMARK_INVALID, buf); } if ((hdr->flags & ISCSI_FLAG_CMD_READ) && (hdr->flags & ISCSI_FLAG_CMD_WRITE)) { pr_err("Bidirectional operations not supported!\n"); - return iscsit_add_reject_from_cmd(ISCSI_REASON_BOOKMARK_INVALID, - 1, 1, buf, cmd); + return iscsit_add_reject_cmd(cmd, + ISCSI_REASON_BOOKMARK_INVALID, buf); } if (hdr->opcode & ISCSI_OP_IMMEDIATE) { pr_err("Illegally set Immediate Bit in iSCSI Initiator" " Scsi Command PDU.\n"); - return iscsit_add_reject_from_cmd(ISCSI_REASON_BOOKMARK_INVALID, - 1, 1, buf, cmd); + return iscsit_add_reject_cmd(cmd, + ISCSI_REASON_BOOKMARK_INVALID, buf); } if (payload_length && !conn->sess->sess_ops->ImmediateData) { pr_err("ImmediateData=No but DataSegmentLength=%u," " protocol error.\n", payload_length); - return iscsit_add_reject_from_cmd(ISCSI_REASON_PROTOCOL_ERROR, - 1, 1, buf, cmd); + return iscsit_add_reject_cmd(cmd, + ISCSI_REASON_PROTOCOL_ERROR, buf); } - if ((be32_to_cpu(hdr->data_length )== payload_length) && + if ((be32_to_cpu(hdr->data_length) == payload_length) && (!(hdr->flags & ISCSI_FLAG_CMD_FINAL))) { pr_err("Expected Data Transfer Length and Length of" " Immediate Data are the same, but ISCSI_FLAG_CMD_FINAL" " bit is not set protocol error\n"); - return iscsit_add_reject_from_cmd(ISCSI_REASON_PROTOCOL_ERROR, - 1, 1, buf, cmd); + return iscsit_add_reject_cmd(cmd, + ISCSI_REASON_PROTOCOL_ERROR, buf); } if (payload_length > be32_to_cpu(hdr->data_length)) { pr_err("DataSegmentLength: %u is greater than" " EDTL: %u, protocol error.\n", payload_length, hdr->data_length); - return iscsit_add_reject_from_cmd(ISCSI_REASON_PROTOCOL_ERROR, - 1, 1, buf, cmd); + return iscsit_add_reject_cmd(cmd, + ISCSI_REASON_PROTOCOL_ERROR, buf); } if (payload_length > conn->conn_ops->MaxXmitDataSegmentLength) { pr_err("DataSegmentLength: %u is greater than" " MaxXmitDataSegmentLength: %u, protocol error.\n", payload_length, conn->conn_ops->MaxXmitDataSegmentLength); - return iscsit_add_reject_from_cmd(ISCSI_REASON_PROTOCOL_ERROR, - 1, 1, buf, cmd); + return iscsit_add_reject_cmd(cmd, + ISCSI_REASON_PROTOCOL_ERROR, buf); } if (payload_length > conn->sess->sess_ops->FirstBurstLength) { pr_err("DataSegmentLength: %u is greater than" " FirstBurstLength: %u, protocol error.\n", payload_length, conn->sess->sess_ops->FirstBurstLength); - return iscsit_add_reject_from_cmd(ISCSI_REASON_BOOKMARK_INVALID, - 1, 1, buf, cmd); + return iscsit_add_reject_cmd(cmd, + ISCSI_REASON_BOOKMARK_INVALID, buf); } data_direction = (hdr->flags & ISCSI_FLAG_CMD_WRITE) ? DMA_TO_DEVICE : @@ -985,9 +972,8 @@ done: dr = iscsit_allocate_datain_req(); if (!dr) - return iscsit_add_reject_from_cmd( - ISCSI_REASON_BOOKMARK_NO_RESOURCES, - 1, 1, buf, cmd); + return iscsit_add_reject_cmd(cmd, + ISCSI_REASON_BOOKMARK_NO_RESOURCES, buf); iscsit_attach_datain_req(cmd, dr); } @@ -1015,18 +1001,16 @@ done: cmd->sense_reason = target_setup_cmd_from_cdb(&cmd->se_cmd, hdr->cdb); if (cmd->sense_reason) { if (cmd->sense_reason == TCM_OUT_OF_RESOURCES) { - return iscsit_add_reject_from_cmd( - ISCSI_REASON_BOOKMARK_NO_RESOURCES, - 1, 1, buf, cmd); + return iscsit_add_reject_cmd(cmd, + ISCSI_REASON_BOOKMARK_NO_RESOURCES, buf); } goto attach_cmd; } if (iscsit_build_pdu_and_seq_lists(cmd, payload_length) < 0) { - return iscsit_add_reject_from_cmd( - ISCSI_REASON_BOOKMARK_NO_RESOURCES, - 1, 1, buf, cmd); + return iscsit_add_reject_cmd(cmd, + ISCSI_REASON_BOOKMARK_NO_RESOURCES, buf); } attach_cmd: @@ -1068,17 +1052,13 @@ int iscsit_process_scsi_cmd(struct iscsi_conn *conn, struct iscsi_cmd *cmd, * be acknowledged. (See below) */ if (!cmd->immediate_data) { - cmdsn_ret = iscsit_sequence_cmd(conn, cmd, hdr->cmdsn); - if (cmdsn_ret == CMDSN_LOWER_THAN_EXP) { - if (!cmd->sense_reason) - return 0; - + cmdsn_ret = iscsit_sequence_cmd(conn, cmd, + (unsigned char *)hdr, hdr->cmdsn); + if (cmdsn_ret == CMDSN_ERROR_CANNOT_RECOVER) + return -1; + else if (cmdsn_ret == CMDSN_LOWER_THAN_EXP) { target_put_sess_cmd(conn->sess->se_sess, &cmd->se_cmd); return 0; - } else if (cmdsn_ret == CMDSN_ERROR_CANNOT_RECOVER) { - return iscsit_add_reject_from_cmd( - ISCSI_REASON_PROTOCOL_ERROR, - 1, 0, (unsigned char *)hdr, cmd); } } @@ -1103,6 +1083,9 @@ int iscsit_process_scsi_cmd(struct iscsi_conn *conn, struct iscsi_cmd *cmd, * iscsit_check_received_cmdsn() in iscsit_get_immediate_data() below. */ if (cmd->sense_reason) { + if (cmd->reject_reason) + return 0; + target_put_sess_cmd(conn->sess->se_sess, &cmd->se_cmd); return 1; } @@ -1111,10 +1094,8 @@ int iscsit_process_scsi_cmd(struct iscsi_conn *conn, struct iscsi_cmd *cmd, * the backend memory allocation. */ cmd->sense_reason = transport_generic_new_cmd(&cmd->se_cmd); - if (cmd->sense_reason) { - target_put_sess_cmd(conn->sess->se_sess, &cmd->se_cmd); + if (cmd->sense_reason) return 1; - } return 0; } @@ -1124,6 +1105,7 @@ static int iscsit_get_immediate_data(struct iscsi_cmd *cmd, struct iscsi_scsi_req *hdr, bool dump_payload) { + struct iscsi_conn *conn = cmd->conn; int cmdsn_ret = 0, immed_ret = IMMEDIATE_DATA_NORMAL_OPERATION; /* * Special case for Unsupported SAM WRITE Opcodes and ImmediateData=Yes. @@ -1140,20 +1122,25 @@ after_immediate_data: * DataCRC, check against ExpCmdSN/MaxCmdSN if * Immediate Bit is not set. */ - cmdsn_ret = iscsit_sequence_cmd(cmd->conn, cmd, hdr->cmdsn); + cmdsn_ret = iscsit_sequence_cmd(cmd->conn, cmd, + (unsigned char *)hdr, hdr->cmdsn); + if (cmdsn_ret == CMDSN_ERROR_CANNOT_RECOVER) { + return -1; + } else if (cmdsn_ret == CMDSN_LOWER_THAN_EXP) { + target_put_sess_cmd(conn->sess->se_sess, &cmd->se_cmd); + return 0; + } if (cmd->sense_reason) { - if (iscsit_dump_data_payload(cmd->conn, - cmd->first_burst_len, 1) < 0) - return -1; + int rc; + + rc = iscsit_dump_data_payload(cmd->conn, + cmd->first_burst_len, 1); + target_put_sess_cmd(conn->sess->se_sess, &cmd->se_cmd); + return rc; } else if (cmd->unsolicited_data) iscsit_set_unsoliticed_dataout(cmd); - if (cmdsn_ret == CMDSN_ERROR_CANNOT_RECOVER) - return iscsit_add_reject_from_cmd( - ISCSI_REASON_PROTOCOL_ERROR, - 1, 0, (unsigned char *)hdr, cmd); - } else if (immed_ret == IMMEDIATE_DATA_ERL1_CRC_FAILURE) { /* * Immediate Data failed DataCRC and ERL>=1, @@ -1184,15 +1171,14 @@ iscsit_handle_scsi_cmd(struct iscsi_conn *conn, struct iscsi_cmd *cmd, rc = iscsit_setup_scsi_cmd(conn, cmd, buf); if (rc < 0) - return rc; + return 0; /* * Allocation iovecs needed for struct socket operations for * traditional iSCSI block I/O. */ if (iscsit_allocate_iovecs(cmd) < 0) { - return iscsit_add_reject_from_cmd( - ISCSI_REASON_BOOKMARK_NO_RESOURCES, - 1, 0, buf, cmd); + return iscsit_add_reject_cmd(cmd, + ISCSI_REASON_BOOKMARK_NO_RESOURCES, buf); } immed_data = cmd->immediate_data; @@ -1277,14 +1263,13 @@ iscsit_check_dataout_hdr(struct iscsi_conn *conn, unsigned char *buf, struct iscsi_data *hdr = (struct iscsi_data *)buf; struct iscsi_cmd *cmd = NULL; struct se_cmd *se_cmd; - unsigned long flags; u32 payload_length = ntoh24(hdr->dlength); int rc; if (!payload_length) { pr_err("DataOUT payload is ZERO, protocol error.\n"); - return iscsit_add_reject(ISCSI_REASON_PROTOCOL_ERROR, 1, - buf, conn); + return iscsit_add_reject(conn, ISCSI_REASON_PROTOCOL_ERROR, + buf); } /* iSCSI write */ @@ -1301,8 +1286,8 @@ iscsit_check_dataout_hdr(struct iscsi_conn *conn, unsigned char *buf, pr_err("DataSegmentLength: %u is greater than" " MaxXmitDataSegmentLength: %u\n", payload_length, conn->conn_ops->MaxXmitDataSegmentLength); - return iscsit_add_reject(ISCSI_REASON_PROTOCOL_ERROR, 1, - buf, conn); + return iscsit_add_reject(conn, ISCSI_REASON_PROTOCOL_ERROR, + buf); } cmd = iscsit_find_cmd_from_itt_or_dump(conn, hdr->itt, @@ -1325,8 +1310,7 @@ iscsit_check_dataout_hdr(struct iscsi_conn *conn, unsigned char *buf, if (cmd->data_direction != DMA_TO_DEVICE) { pr_err("Command ITT: 0x%08x received DataOUT for a" " NON-WRITE command.\n", cmd->init_task_tag); - return iscsit_add_reject_from_cmd(ISCSI_REASON_PROTOCOL_ERROR, - 1, 0, buf, cmd); + return iscsit_reject_cmd(cmd, ISCSI_REASON_PROTOCOL_ERROR, buf); } se_cmd = &cmd->se_cmd; iscsit_mod_dataout_timer(cmd); @@ -1335,8 +1319,7 @@ iscsit_check_dataout_hdr(struct iscsi_conn *conn, unsigned char *buf, pr_err("DataOut Offset: %u, Length %u greater than" " iSCSI Command EDTL %u, protocol error.\n", hdr->offset, payload_length, cmd->se_cmd.data_length); - return iscsit_add_reject_from_cmd(ISCSI_REASON_BOOKMARK_INVALID, - 1, 0, buf, cmd); + return iscsit_reject_cmd(cmd, ISCSI_REASON_BOOKMARK_INVALID, buf); } if (cmd->unsolicited_data) { @@ -1356,14 +1339,9 @@ iscsit_check_dataout_hdr(struct iscsi_conn *conn, unsigned char *buf, */ /* Something's amiss if we're not in WRITE_PENDING state... */ - spin_lock_irqsave(&se_cmd->t_state_lock, flags); WARN_ON(se_cmd->t_state != TRANSPORT_WRITE_PENDING); - spin_unlock_irqrestore(&se_cmd->t_state_lock, flags); - - spin_lock_irqsave(&se_cmd->t_state_lock, flags); if (!(se_cmd->se_cmd_flags & SCF_SUPPORTED_SAM_OPCODE)) dump_unsolicited_data = 1; - spin_unlock_irqrestore(&se_cmd->t_state_lock, flags); if (dump_unsolicited_data) { /* @@ -1528,7 +1506,7 @@ static int iscsit_handle_data_out(struct iscsi_conn *conn, unsigned char *buf) rc = iscsit_check_dataout_hdr(conn, buf, &cmd); if (rc < 0) - return rc; + return 0; else if (!cmd) return 0; @@ -1541,24 +1519,16 @@ static int iscsit_handle_data_out(struct iscsi_conn *conn, unsigned char *buf) return iscsit_check_dataout_payload(cmd, hdr, data_crc_failed); } -int iscsit_handle_nop_out(struct iscsi_conn *conn, struct iscsi_cmd *cmd, - unsigned char *buf) +int iscsit_setup_nop_out(struct iscsi_conn *conn, struct iscsi_cmd *cmd, + struct iscsi_nopout *hdr) { - unsigned char *ping_data = NULL; - int cmdsn_ret, niov = 0, ret = 0, rx_got, rx_size; - u32 checksum, data_crc, padding = 0, payload_length; - struct iscsi_cmd *cmd_p = NULL; - struct kvec *iov = NULL; - struct iscsi_nopout *hdr; - - hdr = (struct iscsi_nopout *) buf; - payload_length = ntoh24(hdr->dlength); + u32 payload_length = ntoh24(hdr->dlength); if (hdr->itt == RESERVED_ITT && !(hdr->opcode & ISCSI_OP_IMMEDIATE)) { pr_err("NOPOUT ITT is reserved, but Immediate Bit is" " not set, protocol error.\n"); - return iscsit_add_reject(ISCSI_REASON_PROTOCOL_ERROR, 1, - buf, conn); + return iscsit_reject_cmd(cmd, ISCSI_REASON_PROTOCOL_ERROR, + (unsigned char *)hdr); } if (payload_length > conn->conn_ops->MaxXmitDataSegmentLength) { @@ -1566,8 +1536,8 @@ int iscsit_handle_nop_out(struct iscsi_conn *conn, struct iscsi_cmd *cmd, " greater than MaxXmitDataSegmentLength: %u, protocol" " error.\n", payload_length, conn->conn_ops->MaxXmitDataSegmentLength); - return iscsit_add_reject(ISCSI_REASON_PROTOCOL_ERROR, 1, - buf, conn); + return iscsit_reject_cmd(cmd, ISCSI_REASON_PROTOCOL_ERROR, + (unsigned char *)hdr); } pr_debug("Got NOPOUT Ping %s ITT: 0x%08x, TTT: 0x%08x," @@ -1583,11 +1553,6 @@ int iscsit_handle_nop_out(struct iscsi_conn *conn, struct iscsi_cmd *cmd, * can contain ping data. */ if (hdr->ttt == cpu_to_be32(0xFFFFFFFF)) { - if (!cmd) - return iscsit_add_reject( - ISCSI_REASON_BOOKMARK_NO_RESOURCES, - 1, buf, conn); - cmd->iscsi_opcode = ISCSI_OP_NOOP_OUT; cmd->i_state = ISTATE_SEND_NOPIN; cmd->immediate_cmd = ((hdr->opcode & ISCSI_OP_IMMEDIATE) ? @@ -1599,8 +1564,85 @@ int iscsit_handle_nop_out(struct iscsi_conn *conn, struct iscsi_cmd *cmd, cmd->data_direction = DMA_NONE; } + return 0; +} +EXPORT_SYMBOL(iscsit_setup_nop_out); + +int iscsit_process_nop_out(struct iscsi_conn *conn, struct iscsi_cmd *cmd, + struct iscsi_nopout *hdr) +{ + struct iscsi_cmd *cmd_p = NULL; + int cmdsn_ret = 0; + /* + * Initiator is expecting a NopIN ping reply.. + */ + if (hdr->itt != RESERVED_ITT) { + BUG_ON(!cmd); + + spin_lock_bh(&conn->cmd_lock); + list_add_tail(&cmd->i_conn_node, &conn->conn_cmd_list); + spin_unlock_bh(&conn->cmd_lock); + + iscsit_ack_from_expstatsn(conn, be32_to_cpu(hdr->exp_statsn)); + + if (hdr->opcode & ISCSI_OP_IMMEDIATE) { + iscsit_add_cmd_to_response_queue(cmd, conn, + cmd->i_state); + return 0; + } + + cmdsn_ret = iscsit_sequence_cmd(conn, cmd, + (unsigned char *)hdr, hdr->cmdsn); + if (cmdsn_ret == CMDSN_LOWER_THAN_EXP) + return 0; + if (cmdsn_ret == CMDSN_ERROR_CANNOT_RECOVER) + return -1; + + return 0; + } + /* + * This was a response to a unsolicited NOPIN ping. + */ + if (hdr->ttt != cpu_to_be32(0xFFFFFFFF)) { + cmd_p = iscsit_find_cmd_from_ttt(conn, be32_to_cpu(hdr->ttt)); + if (!cmd_p) + return -EINVAL; + + iscsit_stop_nopin_response_timer(conn); + + cmd_p->i_state = ISTATE_REMOVE; + iscsit_add_cmd_to_immediate_queue(cmd_p, conn, cmd_p->i_state); + + iscsit_start_nopin_timer(conn); + return 0; + } + /* + * Otherwise, initiator is not expecting a NOPIN is response. + * Just ignore for now. + */ + return 0; +} +EXPORT_SYMBOL(iscsit_process_nop_out); + +static int iscsit_handle_nop_out(struct iscsi_conn *conn, struct iscsi_cmd *cmd, + unsigned char *buf) +{ + unsigned char *ping_data = NULL; + struct iscsi_nopout *hdr = (struct iscsi_nopout *)buf; + struct kvec *iov = NULL; + u32 payload_length = ntoh24(hdr->dlength); + int ret; + + ret = iscsit_setup_nop_out(conn, cmd, hdr); + if (ret < 0) + return 0; + /* + * Handle NOP-OUT payload for traditional iSCSI sockets + */ if (payload_length && hdr->ttt == cpu_to_be32(0xFFFFFFFF)) { - rx_size = payload_length; + u32 checksum, data_crc, padding = 0; + int niov = 0, rx_got, rx_size = payload_length; + ping_data = kzalloc(payload_length + 1, GFP_KERNEL); if (!ping_data) { pr_err("Unable to allocate memory for" @@ -1679,76 +1721,14 @@ int iscsit_handle_nop_out(struct iscsi_conn *conn, struct iscsi_cmd *cmd, pr_debug("Ping Data: \"%s\"\n", ping_data); } - if (hdr->itt != RESERVED_ITT) { - if (!cmd) { - pr_err("Checking CmdSN for NOPOUT," - " but cmd is NULL!\n"); - return -1; - } - /* - * Initiator is expecting a NopIN ping reply, - */ - spin_lock_bh(&conn->cmd_lock); - list_add_tail(&cmd->i_conn_node, &conn->conn_cmd_list); - spin_unlock_bh(&conn->cmd_lock); - - iscsit_ack_from_expstatsn(conn, be32_to_cpu(hdr->exp_statsn)); - - if (hdr->opcode & ISCSI_OP_IMMEDIATE) { - iscsit_add_cmd_to_response_queue(cmd, conn, - cmd->i_state); - return 0; - } - - cmdsn_ret = iscsit_sequence_cmd(conn, cmd, hdr->cmdsn); - if (cmdsn_ret == CMDSN_LOWER_THAN_EXP) { - ret = 0; - goto ping_out; - } - if (cmdsn_ret == CMDSN_ERROR_CANNOT_RECOVER) - return iscsit_add_reject_from_cmd( - ISCSI_REASON_PROTOCOL_ERROR, - 1, 0, buf, cmd); - - return 0; - } - - if (hdr->ttt != cpu_to_be32(0xFFFFFFFF)) { - /* - * This was a response to a unsolicited NOPIN ping. - */ - cmd_p = iscsit_find_cmd_from_ttt(conn, be32_to_cpu(hdr->ttt)); - if (!cmd_p) - return -1; - - iscsit_stop_nopin_response_timer(conn); - - cmd_p->i_state = ISTATE_REMOVE; - iscsit_add_cmd_to_immediate_queue(cmd_p, conn, cmd_p->i_state); - iscsit_start_nopin_timer(conn); - } else { - /* - * Initiator is not expecting a NOPIN is response. - * Just ignore for now. - * - * iSCSI v19-91 10.18 - * "A NOP-OUT may also be used to confirm a changed - * ExpStatSN if another PDU will not be available - * for a long time." - */ - ret = 0; - goto out; - } - - return 0; + return iscsit_process_nop_out(conn, cmd, hdr); out: if (cmd) iscsit_free_cmd(cmd, false); -ping_out: + kfree(ping_data); return ret; } -EXPORT_SYMBOL(iscsit_handle_nop_out); int iscsit_handle_task_mgt_cmd(struct iscsi_conn *conn, struct iscsi_cmd *cmd, @@ -1757,8 +1737,8 @@ iscsit_handle_task_mgt_cmd(struct iscsi_conn *conn, struct iscsi_cmd *cmd, struct se_tmr_req *se_tmr; struct iscsi_tmr_req *tmr_req; struct iscsi_tm *hdr; - int out_of_order_cmdsn = 0; - int ret; + int out_of_order_cmdsn = 0, ret; + bool sess_ref = false; u8 function; hdr = (struct iscsi_tm *) buf; @@ -1782,8 +1762,8 @@ iscsit_handle_task_mgt_cmd(struct iscsi_conn *conn, struct iscsi_cmd *cmd, pr_err("Task Management Request TASK_REASSIGN not" " issued as immediate command, bad iSCSI Initiator" "implementation\n"); - return iscsit_add_reject_from_cmd(ISCSI_REASON_PROTOCOL_ERROR, - 1, 1, buf, cmd); + return iscsit_add_reject_cmd(cmd, + ISCSI_REASON_PROTOCOL_ERROR, buf); } if ((function != ISCSI_TM_FUNC_ABORT_TASK) && be32_to_cpu(hdr->refcmdsn) != ISCSI_RESERVED_TAG) @@ -1795,9 +1775,9 @@ iscsit_handle_task_mgt_cmd(struct iscsi_conn *conn, struct iscsi_cmd *cmd, if (!cmd->tmr_req) { pr_err("Unable to allocate memory for" " Task Management command!\n"); - return iscsit_add_reject_from_cmd( - ISCSI_REASON_BOOKMARK_NO_RESOURCES, - 1, 1, buf, cmd); + return iscsit_add_reject_cmd(cmd, + ISCSI_REASON_BOOKMARK_NO_RESOURCES, + buf); } /* @@ -1814,6 +1794,9 @@ iscsit_handle_task_mgt_cmd(struct iscsi_conn *conn, struct iscsi_cmd *cmd, conn->sess->se_sess, 0, DMA_NONE, MSG_SIMPLE_TAG, cmd->sense_buffer + 2); + target_get_sess_cmd(conn->sess->se_sess, &cmd->se_cmd, true); + sess_ref = true; + switch (function) { case ISCSI_TM_FUNC_ABORT_TASK: tcm_function = TMR_ABORT_TASK; @@ -1839,17 +1822,15 @@ iscsit_handle_task_mgt_cmd(struct iscsi_conn *conn, struct iscsi_cmd *cmd, default: pr_err("Unknown iSCSI TMR Function:" " 0x%02x\n", function); - return iscsit_add_reject_from_cmd( - ISCSI_REASON_BOOKMARK_NO_RESOURCES, - 1, 1, buf, cmd); + return iscsit_add_reject_cmd(cmd, + ISCSI_REASON_BOOKMARK_NO_RESOURCES, buf); } ret = core_tmr_alloc_req(&cmd->se_cmd, cmd->tmr_req, tcm_function, GFP_KERNEL); if (ret < 0) - return iscsit_add_reject_from_cmd( - ISCSI_REASON_BOOKMARK_NO_RESOURCES, - 1, 1, buf, cmd); + return iscsit_add_reject_cmd(cmd, + ISCSI_REASON_BOOKMARK_NO_RESOURCES, buf); cmd->tmr_req->se_tmr_req = cmd->se_cmd.se_tmr_req; } @@ -1908,9 +1889,8 @@ iscsit_handle_task_mgt_cmd(struct iscsi_conn *conn, struct iscsi_cmd *cmd, break; if (iscsit_check_task_reassign_expdatasn(tmr_req, conn) < 0) - return iscsit_add_reject_from_cmd( - ISCSI_REASON_BOOKMARK_INVALID, 1, 1, - buf, cmd); + return iscsit_add_reject_cmd(cmd, + ISCSI_REASON_BOOKMARK_INVALID, buf); break; default: pr_err("Unknown TMR function: 0x%02x, protocol" @@ -1928,15 +1908,13 @@ attach: spin_unlock_bh(&conn->cmd_lock); if (!(hdr->opcode & ISCSI_OP_IMMEDIATE)) { - int cmdsn_ret = iscsit_sequence_cmd(conn, cmd, hdr->cmdsn); + int cmdsn_ret = iscsit_sequence_cmd(conn, cmd, buf, hdr->cmdsn); if (cmdsn_ret == CMDSN_HIGHER_THAN_EXP) out_of_order_cmdsn = 1; else if (cmdsn_ret == CMDSN_LOWER_THAN_EXP) return 0; else if (cmdsn_ret == CMDSN_ERROR_CANNOT_RECOVER) - return iscsit_add_reject_from_cmd( - ISCSI_REASON_PROTOCOL_ERROR, - 1, 0, buf, cmd); + return -1; } iscsit_ack_from_expstatsn(conn, be32_to_cpu(hdr->exp_statsn)); @@ -1956,51 +1934,135 @@ attach: * For connection recovery, this is also the default action for * TMR TASK_REASSIGN. */ + if (sess_ref) { + pr_debug("Handle TMR, using sess_ref=true check\n"); + target_put_sess_cmd(conn->sess->se_sess, &cmd->se_cmd); + } + iscsit_add_cmd_to_response_queue(cmd, conn, cmd->i_state); return 0; } EXPORT_SYMBOL(iscsit_handle_task_mgt_cmd); /* #warning FIXME: Support Text Command parameters besides SendTargets */ -static int iscsit_handle_text_cmd( - struct iscsi_conn *conn, - unsigned char *buf) +int +iscsit_setup_text_cmd(struct iscsi_conn *conn, struct iscsi_cmd *cmd, + struct iscsi_text *hdr) { - char *text_ptr, *text_in; - int cmdsn_ret, niov = 0, rx_got, rx_size; - u32 checksum = 0, data_crc = 0, payload_length; - u32 padding = 0, pad_bytes = 0, text_length = 0; - struct iscsi_cmd *cmd; - struct kvec iov[3]; - struct iscsi_text *hdr; - - hdr = (struct iscsi_text *) buf; - payload_length = ntoh24(hdr->dlength); + u32 payload_length = ntoh24(hdr->dlength); if (payload_length > conn->conn_ops->MaxXmitDataSegmentLength) { pr_err("Unable to accept text parameter length: %u" "greater than MaxXmitDataSegmentLength %u.\n", payload_length, conn->conn_ops->MaxXmitDataSegmentLength); - return iscsit_add_reject(ISCSI_REASON_PROTOCOL_ERROR, 1, - buf, conn); + return iscsit_reject_cmd(cmd, ISCSI_REASON_PROTOCOL_ERROR, + (unsigned char *)hdr); } pr_debug("Got Text Request: ITT: 0x%08x, CmdSN: 0x%08x," " ExpStatSN: 0x%08x, Length: %u\n", hdr->itt, hdr->cmdsn, hdr->exp_statsn, payload_length); - rx_size = text_length = payload_length; - if (text_length) { - text_in = kzalloc(text_length, GFP_KERNEL); + cmd->iscsi_opcode = ISCSI_OP_TEXT; + cmd->i_state = ISTATE_SEND_TEXTRSP; + cmd->immediate_cmd = ((hdr->opcode & ISCSI_OP_IMMEDIATE) ? 1 : 0); + conn->sess->init_task_tag = cmd->init_task_tag = hdr->itt; + cmd->targ_xfer_tag = 0xFFFFFFFF; + cmd->cmd_sn = be32_to_cpu(hdr->cmdsn); + cmd->exp_stat_sn = be32_to_cpu(hdr->exp_statsn); + cmd->data_direction = DMA_NONE; + + return 0; +} +EXPORT_SYMBOL(iscsit_setup_text_cmd); + +int +iscsit_process_text_cmd(struct iscsi_conn *conn, struct iscsi_cmd *cmd, + struct iscsi_text *hdr) +{ + unsigned char *text_in = cmd->text_in_ptr, *text_ptr; + int cmdsn_ret; + + if (!text_in) { + pr_err("Unable to locate text_in buffer for sendtargets" + " discovery\n"); + goto reject; + } + if (strncmp("SendTargets", text_in, 11) != 0) { + pr_err("Received Text Data that is not" + " SendTargets, cannot continue.\n"); + goto reject; + } + text_ptr = strchr(text_in, '='); + if (!text_ptr) { + pr_err("No \"=\" separator found in Text Data," + " cannot continue.\n"); + goto reject; + } + if (!strncmp("=All", text_ptr, 4)) { + cmd->cmd_flags |= IFC_SENDTARGETS_ALL; + } else if (!strncmp("=iqn.", text_ptr, 5) || + !strncmp("=eui.", text_ptr, 5)) { + cmd->cmd_flags |= IFC_SENDTARGETS_SINGLE; + } else { + pr_err("Unable to locate valid SendTargets=%s value\n", text_ptr); + goto reject; + } + + spin_lock_bh(&conn->cmd_lock); + list_add_tail(&cmd->i_conn_node, &conn->conn_cmd_list); + spin_unlock_bh(&conn->cmd_lock); + + iscsit_ack_from_expstatsn(conn, be32_to_cpu(hdr->exp_statsn)); + + if (!(hdr->opcode & ISCSI_OP_IMMEDIATE)) { + cmdsn_ret = iscsit_sequence_cmd(conn, cmd, + (unsigned char *)hdr, hdr->cmdsn); + if (cmdsn_ret == CMDSN_ERROR_CANNOT_RECOVER) + return -1; + + return 0; + } + + return iscsit_execute_cmd(cmd, 0); + +reject: + return iscsit_reject_cmd(cmd, ISCSI_REASON_PROTOCOL_ERROR, + (unsigned char *)hdr); +} +EXPORT_SYMBOL(iscsit_process_text_cmd); + +static int +iscsit_handle_text_cmd(struct iscsi_conn *conn, struct iscsi_cmd *cmd, + unsigned char *buf) +{ + struct iscsi_text *hdr = (struct iscsi_text *)buf; + char *text_in = NULL; + u32 payload_length = ntoh24(hdr->dlength); + int rx_size, rc; + + rc = iscsit_setup_text_cmd(conn, cmd, hdr); + if (rc < 0) + return 0; + + rx_size = payload_length; + if (payload_length) { + u32 checksum = 0, data_crc = 0; + u32 padding = 0, pad_bytes = 0; + int niov = 0, rx_got; + struct kvec iov[3]; + + text_in = kzalloc(payload_length, GFP_KERNEL); if (!text_in) { pr_err("Unable to allocate memory for" " incoming text parameters\n"); - return -1; + goto reject; } + cmd->text_in_ptr = text_in; memset(iov, 0, 3 * sizeof(struct kvec)); iov[niov].iov_base = text_in; - iov[niov++].iov_len = text_length; + iov[niov++].iov_len = payload_length; padding = ((-payload_length) & 3); if (padding != 0) { @@ -2017,14 +2079,12 @@ static int iscsit_handle_text_cmd( } rx_got = rx_data(conn, &iov[0], niov, rx_size); - if (rx_got != rx_size) { - kfree(text_in); - return -1; - } + if (rx_got != rx_size) + goto reject; if (conn->conn_ops->DataDigest) { iscsit_do_crypto_hash_buf(&conn->conn_rx_hash, - text_in, text_length, + text_in, payload_length, padding, (u8 *)&pad_bytes, (u8 *)&data_crc); @@ -2036,8 +2096,7 @@ static int iscsit_handle_text_cmd( pr_err("Unable to recover from" " Text Data digest failure while in" " ERL=0.\n"); - kfree(text_in); - return -1; + goto reject; } else { /* * Silently drop this PDU and let the @@ -2052,68 +2111,22 @@ static int iscsit_handle_text_cmd( } else { pr_debug("Got CRC32C DataDigest" " 0x%08x for %u bytes of text data.\n", - checksum, text_length); + checksum, payload_length); } } - text_in[text_length - 1] = '\0'; + text_in[payload_length - 1] = '\0'; pr_debug("Successfully read %d bytes of text" - " data.\n", text_length); - - if (strncmp("SendTargets", text_in, 11) != 0) { - pr_err("Received Text Data that is not" - " SendTargets, cannot continue.\n"); - kfree(text_in); - return -1; - } - text_ptr = strchr(text_in, '='); - if (!text_ptr) { - pr_err("No \"=\" separator found in Text Data," - " cannot continue.\n"); - kfree(text_in); - return -1; - } - if (strncmp("=All", text_ptr, 4) != 0) { - pr_err("Unable to locate All value for" - " SendTargets key, cannot continue.\n"); - kfree(text_in); - return -1; - } -/*#warning Support SendTargets=(iSCSI Target Name/Nothing) values. */ - kfree(text_in); + " data.\n", payload_length); } - cmd = iscsit_allocate_cmd(conn, GFP_KERNEL); - if (!cmd) - return iscsit_add_reject(ISCSI_REASON_BOOKMARK_NO_RESOURCES, - 1, buf, conn); - - cmd->iscsi_opcode = ISCSI_OP_TEXT; - cmd->i_state = ISTATE_SEND_TEXTRSP; - cmd->immediate_cmd = ((hdr->opcode & ISCSI_OP_IMMEDIATE) ? 1 : 0); - conn->sess->init_task_tag = cmd->init_task_tag = hdr->itt; - cmd->targ_xfer_tag = 0xFFFFFFFF; - cmd->cmd_sn = be32_to_cpu(hdr->cmdsn); - cmd->exp_stat_sn = be32_to_cpu(hdr->exp_statsn); - cmd->data_direction = DMA_NONE; - - spin_lock_bh(&conn->cmd_lock); - list_add_tail(&cmd->i_conn_node, &conn->conn_cmd_list); - spin_unlock_bh(&conn->cmd_lock); + return iscsit_process_text_cmd(conn, cmd, hdr); - iscsit_ack_from_expstatsn(conn, be32_to_cpu(hdr->exp_statsn)); - - if (!(hdr->opcode & ISCSI_OP_IMMEDIATE)) { - cmdsn_ret = iscsit_sequence_cmd(conn, cmd, hdr->cmdsn); - if (cmdsn_ret == CMDSN_ERROR_CANNOT_RECOVER) - return iscsit_add_reject_from_cmd( - ISCSI_REASON_PROTOCOL_ERROR, - 1, 0, buf, cmd); - - return 0; - } - - return iscsit_execute_cmd(cmd, 0); +reject: + kfree(cmd->text_in_ptr); + cmd->text_in_ptr = NULL; + return iscsit_reject_cmd(cmd, ISCSI_REASON_PROTOCOL_ERROR, buf); } +EXPORT_SYMBOL(iscsit_handle_text_cmd); int iscsit_logout_closesession(struct iscsi_cmd *cmd, struct iscsi_conn *conn) { @@ -2292,14 +2305,11 @@ iscsit_handle_logout_cmd(struct iscsi_conn *conn, struct iscsi_cmd *cmd, if (ret < 0) return ret; } else { - cmdsn_ret = iscsit_sequence_cmd(conn, cmd, hdr->cmdsn); - if (cmdsn_ret == CMDSN_LOWER_THAN_EXP) { + cmdsn_ret = iscsit_sequence_cmd(conn, cmd, buf, hdr->cmdsn); + if (cmdsn_ret == CMDSN_LOWER_THAN_EXP) logout_remove = 0; - } else if (cmdsn_ret == CMDSN_ERROR_CANNOT_RECOVER) { - return iscsit_add_reject_from_cmd( - ISCSI_REASON_PROTOCOL_ERROR, - 1, 0, buf, cmd); - } + else if (cmdsn_ret == CMDSN_ERROR_CANNOT_RECOVER) + return -1; } return logout_remove; @@ -2323,8 +2333,8 @@ static int iscsit_handle_snack( if (!conn->sess->sess_ops->ErrorRecoveryLevel) { pr_err("Initiator sent SNACK request while in" " ErrorRecoveryLevel=0.\n"); - return iscsit_add_reject(ISCSI_REASON_PROTOCOL_ERROR, 1, - buf, conn); + return iscsit_add_reject(conn, ISCSI_REASON_PROTOCOL_ERROR, + buf); } /* * SNACK_DATA and SNACK_R2T are both 0, so check which function to @@ -2348,13 +2358,13 @@ static int iscsit_handle_snack( case ISCSI_FLAG_SNACK_TYPE_RDATA: /* FIXME: Support R-Data SNACK */ pr_err("R-Data SNACK Not Supported.\n"); - return iscsit_add_reject(ISCSI_REASON_PROTOCOL_ERROR, 1, - buf, conn); + return iscsit_add_reject(conn, ISCSI_REASON_PROTOCOL_ERROR, + buf); default: pr_err("Unknown SNACK type 0x%02x, protocol" " error.\n", hdr->flags & 0x0f); - return iscsit_add_reject(ISCSI_REASON_PROTOCOL_ERROR, 1, - buf, conn); + return iscsit_add_reject(conn, ISCSI_REASON_PROTOCOL_ERROR, + buf); } return 0; @@ -2426,14 +2436,14 @@ static int iscsit_handle_immediate_data( pr_err("Unable to recover from" " Immediate Data digest failure while" " in ERL=0.\n"); - iscsit_add_reject_from_cmd( + iscsit_reject_cmd(cmd, ISCSI_REASON_DATA_DIGEST_ERROR, - 1, 0, (unsigned char *)hdr, cmd); + (unsigned char *)hdr); return IMMEDIATE_DATA_CANNOT_RECOVER; } else { - iscsit_add_reject_from_cmd( + iscsit_reject_cmd(cmd, ISCSI_REASON_DATA_DIGEST_ERROR, - 0, 0, (unsigned char *)hdr, cmd); + (unsigned char *)hdr); return IMMEDIATE_DATA_ERL1_CRC_FAILURE; } } else { @@ -3276,8 +3286,6 @@ static u8 iscsit_convert_tcm_tmr_rsp(struct se_tmr_req *se_tmr) return ISCSI_TMF_RSP_NO_LUN; case TMR_TASK_MGMT_FUNCTION_NOT_SUPPORTED: return ISCSI_TMF_RSP_NOT_SUPPORTED; - case TMR_FUNCTION_AUTHORIZATION_FAILED: - return ISCSI_TMF_RSP_AUTH_FAILED; case TMR_FUNCTION_REJECTED: default: return ISCSI_TMF_RSP_REJECTED; @@ -3372,6 +3380,7 @@ static int iscsit_build_sendtargets_response(struct iscsi_cmd *cmd) struct iscsi_tpg_np *tpg_np; int buffer_len, end_of_buf = 0, len = 0, payload_len = 0; unsigned char buf[ISCSI_IQN_LEN+12]; /* iqn + "TargetName=" + \0 */ + unsigned char *text_in = cmd->text_in_ptr, *text_ptr = NULL; buffer_len = max(conn->conn_ops->MaxRecvDataSegmentLength, SENDTARGETS_BUF_LIMIT); @@ -3382,9 +3391,31 @@ static int iscsit_build_sendtargets_response(struct iscsi_cmd *cmd) " response.\n"); return -ENOMEM; } + /* + * Locate pointer to iqn./eui. string for IFC_SENDTARGETS_SINGLE + * explicit case.. + */ + if (cmd->cmd_flags & IFC_SENDTARGETS_SINGLE) { + text_ptr = strchr(text_in, '='); + if (!text_ptr) { + pr_err("Unable to locate '=' string in text_in:" + " %s\n", text_in); + kfree(payload); + return -EINVAL; + } + /* + * Skip over '=' character.. + */ + text_ptr += 1; + } spin_lock(&tiqn_lock); list_for_each_entry(tiqn, &g_tiqn_list, tiqn_list) { + if ((cmd->cmd_flags & IFC_SENDTARGETS_SINGLE) && + strcmp(tiqn->tiqn, text_ptr)) { + continue; + } + len = sprintf(buf, "TargetName=%s", tiqn->tiqn); len += 1; @@ -3438,6 +3469,9 @@ static int iscsit_build_sendtargets_response(struct iscsi_cmd *cmd) eob: if (end_of_buf) break; + + if (cmd->cmd_flags & IFC_SENDTARGETS_SINGLE) + break; } spin_unlock(&tiqn_lock); @@ -3446,52 +3480,62 @@ eob: return payload_len; } -/* - * FIXME: Add support for F_BIT and C_BIT when the length is longer than - * MaxRecvDataSegmentLength. - */ -static int iscsit_send_text_rsp( - struct iscsi_cmd *cmd, - struct iscsi_conn *conn) +int +iscsit_build_text_rsp(struct iscsi_cmd *cmd, struct iscsi_conn *conn, + struct iscsi_text_rsp *hdr) { - struct iscsi_text_rsp *hdr; - struct kvec *iov; - u32 padding = 0, tx_size = 0; - int text_length, iov_count = 0; + int text_length, padding; text_length = iscsit_build_sendtargets_response(cmd); if (text_length < 0) return text_length; + hdr->opcode = ISCSI_OP_TEXT_RSP; + hdr->flags |= ISCSI_FLAG_CMD_FINAL; padding = ((-text_length) & 3); - if (padding != 0) { - memset(cmd->buf_ptr + text_length, 0, padding); - pr_debug("Attaching %u additional bytes for" - " padding.\n", padding); - } - - hdr = (struct iscsi_text_rsp *) cmd->pdu; - memset(hdr, 0, ISCSI_HDR_LEN); - hdr->opcode = ISCSI_OP_TEXT_RSP; - hdr->flags |= ISCSI_FLAG_CMD_FINAL; hton24(hdr->dlength, text_length); - hdr->itt = cmd->init_task_tag; - hdr->ttt = cpu_to_be32(cmd->targ_xfer_tag); - cmd->stat_sn = conn->stat_sn++; - hdr->statsn = cpu_to_be32(cmd->stat_sn); + hdr->itt = cmd->init_task_tag; + hdr->ttt = cpu_to_be32(cmd->targ_xfer_tag); + cmd->stat_sn = conn->stat_sn++; + hdr->statsn = cpu_to_be32(cmd->stat_sn); iscsit_increment_maxcmdsn(cmd, conn->sess); - hdr->exp_cmdsn = cpu_to_be32(conn->sess->exp_cmd_sn); - hdr->max_cmdsn = cpu_to_be32(conn->sess->max_cmd_sn); + hdr->exp_cmdsn = cpu_to_be32(conn->sess->exp_cmd_sn); + hdr->max_cmdsn = cpu_to_be32(conn->sess->max_cmd_sn); - iov = &cmd->iov_misc[0]; + pr_debug("Built Text Response: ITT: 0x%08x, StatSN: 0x%08x," + " Length: %u, CID: %hu\n", cmd->init_task_tag, cmd->stat_sn, + text_length, conn->cid); + + return text_length + padding; +} +EXPORT_SYMBOL(iscsit_build_text_rsp); +/* + * FIXME: Add support for F_BIT and C_BIT when the length is longer than + * MaxRecvDataSegmentLength. + */ +static int iscsit_send_text_rsp( + struct iscsi_cmd *cmd, + struct iscsi_conn *conn) +{ + struct iscsi_text_rsp *hdr = (struct iscsi_text_rsp *)cmd->pdu; + struct kvec *iov; + u32 tx_size = 0; + int text_length, iov_count = 0, rc; + + rc = iscsit_build_text_rsp(cmd, conn, hdr); + if (rc < 0) + return rc; + + text_length = rc; + iov = &cmd->iov_misc[0]; iov[iov_count].iov_base = cmd->pdu; iov[iov_count++].iov_len = ISCSI_HDR_LEN; iov[iov_count].iov_base = cmd->buf_ptr; - iov[iov_count++].iov_len = text_length + padding; + iov[iov_count++].iov_len = text_length; - tx_size += (ISCSI_HDR_LEN + text_length + padding); + tx_size += (ISCSI_HDR_LEN + text_length); if (conn->conn_ops->HeaderDigest) { u32 *header_digest = (u32 *)&cmd->pdu[ISCSI_HDR_LEN]; @@ -3507,7 +3551,7 @@ static int iscsit_send_text_rsp( if (conn->conn_ops->DataDigest) { iscsit_do_crypto_hash_buf(&conn->conn_tx_hash, - cmd->buf_ptr, (text_length + padding), + cmd->buf_ptr, text_length, 0, NULL, (u8 *)&cmd->data_crc); iov[iov_count].iov_base = &cmd->data_crc; @@ -3515,16 +3559,13 @@ static int iscsit_send_text_rsp( tx_size += ISCSI_CRC_LEN; pr_debug("Attaching DataDigest for %u bytes of text" - " data, CRC 0x%08x\n", (text_length + padding), + " data, CRC 0x%08x\n", text_length, cmd->data_crc); } cmd->iov_misc_count = iov_count; cmd->tx_size = tx_size; - pr_debug("Built Text Response: ITT: 0x%08x, StatSN: 0x%08x," - " Length: %u, CID: %hu\n", cmd->init_task_tag, cmd->stat_sn, - text_length, conn->cid); return 0; } @@ -3533,6 +3574,7 @@ iscsit_build_reject(struct iscsi_cmd *cmd, struct iscsi_conn *conn, struct iscsi_reject *hdr) { hdr->opcode = ISCSI_OP_REJECT; + hdr->reason = cmd->reject_reason; hdr->flags |= ISCSI_FLAG_CMD_FINAL; hton24(hdr->dlength, ISCSI_HDR_LEN); hdr->ffffffff = cpu_to_be32(0xffffffff); @@ -3806,18 +3848,11 @@ check_rsp_state: case ISTATE_SEND_STATUS_RECOVERY: case ISTATE_SEND_TEXTRSP: case ISTATE_SEND_TASKMGTRSP: + case ISTATE_SEND_REJECT: spin_lock_bh(&cmd->istate_lock); cmd->i_state = ISTATE_SENT_STATUS; spin_unlock_bh(&cmd->istate_lock); break; - case ISTATE_SEND_REJECT: - if (cmd->cmd_flags & ICF_REJECT_FAIL_CONN) { - cmd->cmd_flags &= ~ICF_REJECT_FAIL_CONN; - complete(&cmd->reject_comp); - goto err; - } - complete(&cmd->reject_comp); - break; default: pr_err("Unknown Opcode: 0x%02x ITT:" " 0x%08x, i_state: %d on CID: %hu\n", @@ -3922,8 +3957,7 @@ static int iscsi_target_rx_opcode(struct iscsi_conn *conn, unsigned char *buf) case ISCSI_OP_SCSI_CMD: cmd = iscsit_allocate_cmd(conn, GFP_KERNEL); if (!cmd) - return iscsit_add_reject(ISCSI_REASON_BOOKMARK_NO_RESOURCES, - 1, buf, conn); + goto reject; ret = iscsit_handle_scsi_cmd(conn, cmd, buf); break; @@ -3935,27 +3969,28 @@ static int iscsi_target_rx_opcode(struct iscsi_conn *conn, unsigned char *buf) if (hdr->ttt == cpu_to_be32(0xFFFFFFFF)) { cmd = iscsit_allocate_cmd(conn, GFP_KERNEL); if (!cmd) - return iscsit_add_reject(ISCSI_REASON_BOOKMARK_NO_RESOURCES, - 1, buf, conn); + goto reject; } ret = iscsit_handle_nop_out(conn, cmd, buf); break; case ISCSI_OP_SCSI_TMFUNC: cmd = iscsit_allocate_cmd(conn, GFP_KERNEL); if (!cmd) - return iscsit_add_reject(ISCSI_REASON_BOOKMARK_NO_RESOURCES, - 1, buf, conn); + goto reject; ret = iscsit_handle_task_mgt_cmd(conn, cmd, buf); break; case ISCSI_OP_TEXT: - ret = iscsit_handle_text_cmd(conn, buf); + cmd = iscsit_allocate_cmd(conn, GFP_KERNEL); + if (!cmd) + goto reject; + + ret = iscsit_handle_text_cmd(conn, cmd, buf); break; case ISCSI_OP_LOGOUT: cmd = iscsit_allocate_cmd(conn, GFP_KERNEL); if (!cmd) - return iscsit_add_reject(ISCSI_REASON_BOOKMARK_NO_RESOURCES, - 1, buf, conn); + goto reject; ret = iscsit_handle_logout_cmd(conn, cmd, buf); if (ret > 0) @@ -3987,6 +4022,8 @@ static int iscsi_target_rx_opcode(struct iscsi_conn *conn, unsigned char *buf) } return ret; +reject: + return iscsit_add_reject(conn, ISCSI_REASON_BOOKMARK_NO_RESOURCES, buf); } int iscsi_target_rx_thread(void *arg) @@ -4039,11 +4076,6 @@ restart: goto transport_err; } - /* - * Set conn->bad_hdr for use with REJECT PDUs. - */ - memcpy(&conn->bad_hdr, &buffer, ISCSI_HDR_LEN); - if (conn->conn_ops->HeaderDigest) { iov.iov_base = &digest; iov.iov_len = ISCSI_CRC_LEN; @@ -4086,8 +4118,8 @@ restart: (!(opcode & ISCSI_OP_LOGOUT)))) { pr_err("Received illegal iSCSI Opcode: 0x%02x" " while in Discovery Session, rejecting.\n", opcode); - iscsit_add_reject(ISCSI_REASON_PROTOCOL_ERROR, 1, - buffer, conn); + iscsit_add_reject(conn, ISCSI_REASON_PROTOCOL_ERROR, + buffer); goto transport_err; } diff --git a/drivers/target/iscsi/iscsi_target.h b/drivers/target/iscsi/iscsi_target.h index a0050b2..2c437cb 100644 --- a/drivers/target/iscsi/iscsi_target.h +++ b/drivers/target/iscsi/iscsi_target.h @@ -15,7 +15,7 @@ extern struct iscsi_np *iscsit_add_np(struct __kernel_sockaddr_storage *, extern int iscsit_reset_np_thread(struct iscsi_np *, struct iscsi_tpg_np *, struct iscsi_portal_group *); extern int iscsit_del_np(struct iscsi_np *); -extern int iscsit_add_reject_from_cmd(u8, int, int, unsigned char *, struct iscsi_cmd *); +extern int iscsit_reject_cmd(struct iscsi_cmd *cmd, u8, unsigned char *); extern void iscsit_set_unsoliticed_dataout(struct iscsi_cmd *); extern int iscsit_logout_closesession(struct iscsi_cmd *, struct iscsi_conn *); extern int iscsit_logout_closeconnection(struct iscsi_cmd *, struct iscsi_conn *); diff --git a/drivers/target/iscsi/iscsi_target_configfs.c b/drivers/target/iscsi/iscsi_target_configfs.c index 8d8b3ff..bbfd288 100644 --- a/drivers/target/iscsi/iscsi_target_configfs.c +++ b/drivers/target/iscsi/iscsi_target_configfs.c @@ -20,6 +20,7 @@ ****************************************************************************/ #include +#include #include #include #include @@ -78,11 +79,12 @@ static ssize_t lio_target_np_store_sctp( struct iscsi_tpg_np *tpg_np = container_of(se_tpg_np, struct iscsi_tpg_np, se_tpg_np); struct iscsi_tpg_np *tpg_np_sctp = NULL; - char *endptr; u32 op; int ret; - op = simple_strtoul(page, &endptr, 0); + ret = kstrtou32(page, 0, &op); + if (ret) + return ret; if ((op != 1) && (op != 0)) { pr_err("Illegal value for tpg_enable: %u\n", op); return -EINVAL; @@ -382,11 +384,12 @@ static ssize_t iscsi_nacl_attrib_store_##name( \ { \ struct iscsi_node_acl *nacl = container_of(se_nacl, struct iscsi_node_acl, \ se_node_acl); \ - char *endptr; \ u32 val; \ int ret; \ \ - val = simple_strtoul(page, &endptr, 0); \ + ret = kstrtou32(page, 0, &val); \ + if (ret) \ + return ret; \ ret = iscsit_na_##name(nacl, val); \ if (ret < 0) \ return ret; \ @@ -474,7 +477,7 @@ static ssize_t __iscsi_##prefix##_store_##name( \ if (!capable(CAP_SYS_ADMIN)) \ return -EPERM; \ \ - snprintf(auth->name, PAGE_SIZE, "%s", page); \ + snprintf(auth->name, sizeof(auth->name), "%s", page); \ if (!strncmp("NULL", auth->name, 4)) \ auth->naf_flags &= ~flags; \ else \ @@ -789,11 +792,12 @@ static ssize_t lio_target_nacl_store_cmdsn_depth( struct iscsi_portal_group *tpg = container_of(se_tpg, struct iscsi_portal_group, tpg_se_tpg); struct config_item *acl_ci, *tpg_ci, *wwn_ci; - char *endptr; u32 cmdsn_depth = 0; int ret; - cmdsn_depth = simple_strtoul(page, &endptr, 0); + ret = kstrtou32(page, 0, &cmdsn_depth); + if (ret) + return ret; if (cmdsn_depth > TA_DEFAULT_CMDSN_DEPTH_MAX) { pr_err("Passed cmdsn_depth: %u exceeds" " TA_DEFAULT_CMDSN_DEPTH_MAX: %u\n", cmdsn_depth, @@ -977,14 +981,15 @@ static ssize_t iscsi_tpg_attrib_store_##name( \ { \ struct iscsi_portal_group *tpg = container_of(se_tpg, \ struct iscsi_portal_group, tpg_se_tpg); \ - char *endptr; \ u32 val; \ int ret; \ \ if (iscsit_get_tpg(tpg) < 0) \ return -EINVAL; \ \ - val = simple_strtoul(page, &endptr, 0); \ + ret = kstrtou32(page, 0, &val); \ + if (ret) \ + goto out; \ ret = iscsit_ta_##name(tpg, val); \ if (ret < 0) \ goto out; \ @@ -1053,6 +1058,131 @@ static struct configfs_attribute *lio_target_tpg_attrib_attrs[] = { /* End items for lio_target_tpg_attrib_cit */ +/* Start items for lio_target_tpg_auth_cit */ + +#define __DEF_TPG_AUTH_STR(prefix, name, flags) \ +static ssize_t __iscsi_##prefix##_show_##name( \ + struct se_portal_group *se_tpg, \ + char *page) \ +{ \ + struct iscsi_portal_group *tpg = container_of(se_tpg, \ + struct iscsi_portal_group, tpg_se_tpg); \ + struct iscsi_node_auth *auth = &tpg->tpg_demo_auth; \ + \ + if (!capable(CAP_SYS_ADMIN)) \ + return -EPERM; \ + \ + return snprintf(page, PAGE_SIZE, "%s\n", auth->name); \ +} \ + \ +static ssize_t __iscsi_##prefix##_store_##name( \ + struct se_portal_group *se_tpg, \ + const char *page, \ + size_t count) \ +{ \ + struct iscsi_portal_group *tpg = container_of(se_tpg, \ + struct iscsi_portal_group, tpg_se_tpg); \ + struct iscsi_node_auth *auth = &tpg->tpg_demo_auth; \ + \ + if (!capable(CAP_SYS_ADMIN)) \ + return -EPERM; \ + \ + snprintf(auth->name, sizeof(auth->name), "%s", page); \ + if (!(strncmp("NULL", auth->name, 4))) \ + auth->naf_flags &= ~flags; \ + else \ + auth->naf_flags |= flags; \ + \ + if ((auth->naf_flags & NAF_USERID_IN_SET) && \ + (auth->naf_flags & NAF_PASSWORD_IN_SET)) \ + auth->authenticate_target = 1; \ + else \ + auth->authenticate_target = 0; \ + \ + return count; \ +} + +#define __DEF_TPG_AUTH_INT(prefix, name) \ +static ssize_t __iscsi_##prefix##_show_##name( \ + struct se_portal_group *se_tpg, \ + char *page) \ +{ \ + struct iscsi_portal_group *tpg = container_of(se_tpg, \ + struct iscsi_portal_group, tpg_se_tpg); \ + struct iscsi_node_auth *auth = &tpg->tpg_demo_auth; \ + \ + if (!capable(CAP_SYS_ADMIN)) \ + return -EPERM; \ + \ + return snprintf(page, PAGE_SIZE, "%d\n", auth->name); \ +} + +#define DEF_TPG_AUTH_STR(name, flags) \ + __DEF_TPG_AUTH_STR(tpg_auth, name, flags) \ +static ssize_t iscsi_tpg_auth_show_##name( \ + struct se_portal_group *se_tpg, \ + char *page) \ +{ \ + return __iscsi_tpg_auth_show_##name(se_tpg, page); \ +} \ + \ +static ssize_t iscsi_tpg_auth_store_##name( \ + struct se_portal_group *se_tpg, \ + const char *page, \ + size_t count) \ +{ \ + return __iscsi_tpg_auth_store_##name(se_tpg, page, count); \ +} + +#define DEF_TPG_AUTH_INT(name) \ + __DEF_TPG_AUTH_INT(tpg_auth, name) \ +static ssize_t iscsi_tpg_auth_show_##name( \ + struct se_portal_group *se_tpg, \ + char *page) \ +{ \ + return __iscsi_tpg_auth_show_##name(se_tpg, page); \ +} + +#define TPG_AUTH_ATTR(_name, _mode) TF_TPG_AUTH_ATTR(iscsi, _name, _mode); +#define TPG_AUTH_ATTR_RO(_name) TF_TPG_AUTH_ATTR_RO(iscsi, _name); + +/* + * * One-way authentication userid + * */ +DEF_TPG_AUTH_STR(userid, NAF_USERID_SET); +TPG_AUTH_ATTR(userid, S_IRUGO | S_IWUSR); +/* + * * One-way authentication password + * */ +DEF_TPG_AUTH_STR(password, NAF_PASSWORD_SET); +TPG_AUTH_ATTR(password, S_IRUGO | S_IWUSR); +/* + * * Enforce mutual authentication + * */ +DEF_TPG_AUTH_INT(authenticate_target); +TPG_AUTH_ATTR_RO(authenticate_target); +/* + * * Mutual authentication userid + * */ +DEF_TPG_AUTH_STR(userid_mutual, NAF_USERID_IN_SET); +TPG_AUTH_ATTR(userid_mutual, S_IRUGO | S_IWUSR); +/* + * * Mutual authentication password + * */ +DEF_TPG_AUTH_STR(password_mutual, NAF_PASSWORD_IN_SET); +TPG_AUTH_ATTR(password_mutual, S_IRUGO | S_IWUSR); + +static struct configfs_attribute *lio_target_tpg_auth_attrs[] = { + &iscsi_tpg_auth_userid.attr, + &iscsi_tpg_auth_password.attr, + &iscsi_tpg_auth_authenticate_target.attr, + &iscsi_tpg_auth_userid_mutual.attr, + &iscsi_tpg_auth_password_mutual.attr, + NULL, +}; + +/* End items for lio_target_tpg_auth_cit */ + /* Start items for lio_target_tpg_param_cit */ #define DEF_TPG_PARAM(name) \ @@ -1087,13 +1217,14 @@ static ssize_t iscsi_tpg_param_store_##name( \ struct iscsi_portal_group *tpg = container_of(se_tpg, \ struct iscsi_portal_group, tpg_se_tpg); \ char *buf; \ - int ret; \ + int ret, len; \ \ buf = kzalloc(PAGE_SIZE, GFP_KERNEL); \ if (!buf) \ return -ENOMEM; \ - snprintf(buf, PAGE_SIZE, "%s=%s", __stringify(name), page); \ - buf[strlen(buf)-1] = '\0'; /* Kill newline */ \ + len = snprintf(buf, PAGE_SIZE, "%s=%s", __stringify(name), page); \ + if (isspace(buf[len-1])) \ + buf[len-1] = '\0'; /* Kill newline */ \ \ if (iscsit_get_tpg(tpg) < 0) { \ kfree(buf); \ @@ -1230,11 +1361,12 @@ static ssize_t lio_target_tpg_store_enable( { struct iscsi_portal_group *tpg = container_of(se_tpg, struct iscsi_portal_group, tpg_se_tpg); - char *endptr; u32 op; - int ret = 0; + int ret; - op = simple_strtoul(page, &endptr, 0); + ret = kstrtou32(page, 0, &op); + if (ret) + return ret; if ((op != 1) && (op != 0)) { pr_err("Illegal value for tpg_enable: %u\n", op); return -EINVAL; @@ -1282,15 +1414,15 @@ static struct se_portal_group *lio_target_tiqn_addtpg( { struct iscsi_portal_group *tpg; struct iscsi_tiqn *tiqn; - char *tpgt_str, *end_ptr; - int ret = 0; - unsigned short int tpgt; + char *tpgt_str; + int ret; + u16 tpgt; tiqn = container_of(wwn, struct iscsi_tiqn, tiqn_wwn); /* * Only tpgt_# directory groups can be created below * target/iscsi/iqn.superturodiskarry/ - */ + */ tpgt_str = strstr(name, "tpgt_"); if (!tpgt_str) { pr_err("Unable to locate \"tpgt_#\" directory" @@ -1298,7 +1430,9 @@ static struct se_portal_group *lio_target_tiqn_addtpg( return NULL; } tpgt_str += 5; /* Skip ahead of "tpgt_" */ - tpgt = (unsigned short int) simple_strtoul(tpgt_str, &end_ptr, 0); + ret = kstrtou16(tpgt_str, 0, &tpgt); + if (ret) + return NULL; tpg = iscsit_alloc_portal_group(tiqn, tpgt); if (!tpg) @@ -1506,10 +1640,12 @@ static ssize_t iscsi_disc_store_enforce_discovery_auth( { struct iscsi_param *param; struct iscsi_portal_group *discovery_tpg = iscsit_global->discovery_tpg; - char *endptr; u32 op; + int err; - op = simple_strtoul(page, &endptr, 0); + err = kstrtou32(page, 0, &op); + if (err) + return -EINVAL; if ((op != 1) && (op != 0)) { pr_err("Illegal value for enforce_discovery_auth:" " %u\n", op); @@ -1655,13 +1791,12 @@ static int lio_queue_status(struct se_cmd *se_cmd) return 0; } -static int lio_queue_tm_rsp(struct se_cmd *se_cmd) +static void lio_queue_tm_rsp(struct se_cmd *se_cmd) { struct iscsi_cmd *cmd = container_of(se_cmd, struct iscsi_cmd, se_cmd); cmd->i_state = ISTATE_SEND_TASKMGTRSP; iscsit_add_cmd_to_response_queue(cmd, cmd->conn, cmd->i_state); - return 0; } static char *lio_tpg_get_endpoint_wwn(struct se_portal_group *se_tpg) @@ -1866,6 +2001,7 @@ int iscsi_target_register_configfs(void) TF_CIT_TMPL(fabric)->tfc_wwn_cit.ct_attrs = lio_target_wwn_attrs; TF_CIT_TMPL(fabric)->tfc_tpg_base_cit.ct_attrs = lio_target_tpg_attrs; TF_CIT_TMPL(fabric)->tfc_tpg_attrib_cit.ct_attrs = lio_target_tpg_attrib_attrs; + TF_CIT_TMPL(fabric)->tfc_tpg_auth_cit.ct_attrs = lio_target_tpg_auth_attrs; TF_CIT_TMPL(fabric)->tfc_tpg_param_cit.ct_attrs = lio_target_tpg_param_attrs; TF_CIT_TMPL(fabric)->tfc_tpg_np_base_cit.ct_attrs = lio_target_portal_attrs; TF_CIT_TMPL(fabric)->tfc_tpg_nacl_base_cit.ct_attrs = lio_target_initiator_attrs; diff --git a/drivers/target/iscsi/iscsi_target_core.h b/drivers/target/iscsi/iscsi_target_core.h index 60ec4b9..4f77a78 100644 --- a/drivers/target/iscsi/iscsi_target_core.h +++ b/drivers/target/iscsi/iscsi_target_core.h @@ -132,7 +132,8 @@ enum cmd_flags_table { ICF_CONTIG_MEMORY = 0x00000020, ICF_ATTACHED_TO_RQUEUE = 0x00000040, ICF_OOO_CMDSN = 0x00000080, - ICF_REJECT_FAIL_CONN = 0x00000100, + IFC_SENDTARGETS_ALL = 0x00000100, + IFC_SENDTARGETS_SINGLE = 0x00000200, }; /* struct iscsi_cmd->i_state */ @@ -366,6 +367,8 @@ struct iscsi_cmd { u8 maxcmdsn_inc; /* Immediate Unsolicited Dataout */ u8 unsolicited_data; + /* Reject reason code */ + u8 reject_reason; /* CID contained in logout PDU when opcode == ISCSI_INIT_LOGOUT_CMND */ u16 logout_cid; /* Command flags */ @@ -427,6 +430,8 @@ struct iscsi_cmd { u32 tx_size; /* Buffer used for various purposes */ void *buf_ptr; + /* Used by SendTargets=[iqn.,eui.] discovery */ + void *text_in_ptr; /* See include/linux/dma-mapping.h */ enum dma_data_direction data_direction; /* iSCSI PDU Header + CRC */ @@ -446,7 +451,6 @@ struct iscsi_cmd { struct list_head datain_list; /* R2T List */ struct list_head cmd_r2t_list; - struct completion reject_comp; /* Timer for DataOUT */ struct timer_list dataout_timer; /* Iovecs for SCSI data payload RX/TX w/ kernel level sockets */ @@ -528,8 +532,6 @@ struct iscsi_conn { u32 of_marker; /* Used for calculating OFMarker offset to next PDU */ u32 of_marker_offset; - /* Complete Bad PDU for sending reject */ - unsigned char bad_hdr[ISCSI_HDR_LEN]; #define IPV6_ADDRESS_SPACE 48 unsigned char login_ip[IPV6_ADDRESS_SPACE]; unsigned char local_ip[IPV6_ADDRESS_SPACE]; @@ -809,6 +811,7 @@ struct iscsi_portal_group { struct mutex tpg_access_lock; struct mutex np_login_lock; struct iscsi_tpg_attrib tpg_attrib; + struct iscsi_node_auth tpg_demo_auth; /* Pointer to default list of iSCSI parameters for TPG */ struct iscsi_param_list *param_list; struct iscsi_tiqn *tpg_tiqn; diff --git a/drivers/target/iscsi/iscsi_target_erl0.c b/drivers/target/iscsi/iscsi_target_erl0.c index dcb199d..08bd878 100644 --- a/drivers/target/iscsi/iscsi_target_erl0.c +++ b/drivers/target/iscsi/iscsi_target_erl0.c @@ -746,13 +746,12 @@ int iscsit_check_post_dataout( if (!conn->sess->sess_ops->ErrorRecoveryLevel) { pr_err("Unable to recover from DataOUT CRC" " failure while ERL=0, closing session.\n"); - iscsit_add_reject_from_cmd(ISCSI_REASON_DATA_DIGEST_ERROR, - 1, 0, buf, cmd); + iscsit_reject_cmd(cmd, ISCSI_REASON_DATA_DIGEST_ERROR, + buf); return DATAOUT_CANNOT_RECOVER; } - iscsit_add_reject_from_cmd(ISCSI_REASON_DATA_DIGEST_ERROR, - 0, 0, buf, cmd); + iscsit_reject_cmd(cmd, ISCSI_REASON_DATA_DIGEST_ERROR, buf); return iscsit_dataout_post_crc_failed(cmd, buf); } } @@ -909,6 +908,7 @@ void iscsit_cause_connection_reinstatement(struct iscsi_conn *conn, int sleep) wait_for_completion(&conn->conn_wait_comp); complete(&conn->conn_post_wait_comp); } +EXPORT_SYMBOL(iscsit_cause_connection_reinstatement); void iscsit_fall_back_to_erl0(struct iscsi_session *sess) { diff --git a/drivers/target/iscsi/iscsi_target_erl1.c b/drivers/target/iscsi/iscsi_target_erl1.c index 40d9dbc..586c268 100644 --- a/drivers/target/iscsi/iscsi_target_erl1.c +++ b/drivers/target/iscsi/iscsi_target_erl1.c @@ -162,9 +162,8 @@ static int iscsit_handle_r2t_snack( " protocol error.\n", cmd->init_task_tag, begrun, (begrun + runlength), cmd->acked_data_sn); - return iscsit_add_reject_from_cmd( - ISCSI_REASON_PROTOCOL_ERROR, - 1, 0, buf, cmd); + return iscsit_reject_cmd(cmd, + ISCSI_REASON_PROTOCOL_ERROR, buf); } if (runlength) { @@ -173,8 +172,8 @@ static int iscsit_handle_r2t_snack( " with BegRun: 0x%08x, RunLength: 0x%08x, exceeds" " current R2TSN: 0x%08x, protocol error.\n", cmd->init_task_tag, begrun, runlength, cmd->r2t_sn); - return iscsit_add_reject_from_cmd( - ISCSI_REASON_BOOKMARK_INVALID, 1, 0, buf, cmd); + return iscsit_reject_cmd(cmd, + ISCSI_REASON_BOOKMARK_INVALID, buf); } last_r2tsn = (begrun + runlength); } else @@ -433,8 +432,7 @@ static int iscsit_handle_recovery_datain( " protocol error.\n", cmd->init_task_tag, begrun, (begrun + runlength), cmd->acked_data_sn); - return iscsit_add_reject_from_cmd(ISCSI_REASON_PROTOCOL_ERROR, - 1, 0, buf, cmd); + return iscsit_reject_cmd(cmd, ISCSI_REASON_PROTOCOL_ERROR, buf); } /* @@ -445,14 +443,14 @@ static int iscsit_handle_recovery_datain( pr_err("Initiator requesting BegRun: 0x%08x, RunLength" ": 0x%08x greater than maximum DataSN: 0x%08x.\n", begrun, runlength, (cmd->data_sn - 1)); - return iscsit_add_reject_from_cmd(ISCSI_REASON_BOOKMARK_INVALID, - 1, 0, buf, cmd); + return iscsit_reject_cmd(cmd, ISCSI_REASON_BOOKMARK_INVALID, + buf); } dr = iscsit_allocate_datain_req(); if (!dr) - return iscsit_add_reject_from_cmd(ISCSI_REASON_BOOKMARK_NO_RESOURCES, - 1, 0, buf, cmd); + return iscsit_reject_cmd(cmd, ISCSI_REASON_BOOKMARK_NO_RESOURCES, + buf); dr->data_sn = dr->begrun = begrun; dr->runlength = runlength; @@ -1090,7 +1088,7 @@ int iscsit_handle_ooo_cmdsn( ooo_cmdsn = iscsit_allocate_ooo_cmdsn(); if (!ooo_cmdsn) - return CMDSN_ERROR_CANNOT_RECOVER; + return -ENOMEM; ooo_cmdsn->cmd = cmd; ooo_cmdsn->batch_count = (batch) ? @@ -1101,10 +1099,10 @@ int iscsit_handle_ooo_cmdsn( if (iscsit_attach_ooo_cmdsn(sess, ooo_cmdsn) < 0) { kmem_cache_free(lio_ooo_cache, ooo_cmdsn); - return CMDSN_ERROR_CANNOT_RECOVER; + return -ENOMEM; } - return CMDSN_HIGHER_THAN_EXP; + return 0; } static int iscsit_set_dataout_timeout_values( diff --git a/drivers/target/iscsi/iscsi_target_nego.c b/drivers/target/iscsi/iscsi_target_nego.c index cd5018f..c4675b4 100644 --- a/drivers/target/iscsi/iscsi_target_nego.c +++ b/drivers/target/iscsi/iscsi_target_nego.c @@ -112,6 +112,7 @@ static u32 iscsi_handle_authentication( struct iscsi_session *sess = conn->sess; struct iscsi_node_auth *auth; struct iscsi_node_acl *iscsi_nacl; + struct iscsi_portal_group *iscsi_tpg; struct se_node_acl *se_nacl; if (!sess->sess_ops->SessionType) { @@ -132,7 +133,17 @@ static u32 iscsi_handle_authentication( return -1; } - auth = ISCSI_NODE_AUTH(iscsi_nacl); + if (se_nacl->dynamic_node_acl) { + iscsi_tpg = container_of(se_nacl->se_tpg, + struct iscsi_portal_group, tpg_se_tpg); + + auth = &iscsi_tpg->tpg_demo_auth; + } else { + iscsi_nacl = container_of(se_nacl, struct iscsi_node_acl, + se_node_acl); + + auth = ISCSI_NODE_AUTH(iscsi_nacl); + } } else { /* * For SessionType=Discovery diff --git a/drivers/target/iscsi/iscsi_target_parameters.c b/drivers/target/iscsi/iscsi_target_parameters.c index e382221..35fd643 100644 --- a/drivers/target/iscsi/iscsi_target_parameters.c +++ b/drivers/target/iscsi/iscsi_target_parameters.c @@ -1799,9 +1799,6 @@ void iscsi_set_connection_parameters( * this key is not sent over the wire. */ if (!strcmp(param->name, MAXXMITDATASEGMENTLENGTH)) { - if (param_list->iser == true) - continue; - ops->MaxXmitDataSegmentLength = simple_strtoul(param->value, &tmpptr, 0); pr_debug("MaxXmitDataSegmentLength: %s\n", diff --git a/drivers/target/iscsi/iscsi_target_util.c b/drivers/target/iscsi/iscsi_target_util.c index 08a3bac..1df06d5 100644 --- a/drivers/target/iscsi/iscsi_target_util.c +++ b/drivers/target/iscsi/iscsi_target_util.c @@ -178,7 +178,6 @@ struct iscsi_cmd *iscsit_allocate_cmd(struct iscsi_conn *conn, gfp_t gfp_mask) INIT_LIST_HEAD(&cmd->i_conn_node); INIT_LIST_HEAD(&cmd->datain_list); INIT_LIST_HEAD(&cmd->cmd_r2t_list); - init_completion(&cmd->reject_comp); spin_lock_init(&cmd->datain_lock); spin_lock_init(&cmd->dataout_timeout_lock); spin_lock_init(&cmd->istate_lock); @@ -284,13 +283,12 @@ static inline int iscsit_check_received_cmdsn(struct iscsi_session *sess, u32 cm * Commands may be received out of order if MC/S is in use. * Ensure they are executed in CmdSN order. */ -int iscsit_sequence_cmd( - struct iscsi_conn *conn, - struct iscsi_cmd *cmd, - __be32 cmdsn) +int iscsit_sequence_cmd(struct iscsi_conn *conn, struct iscsi_cmd *cmd, + unsigned char *buf, __be32 cmdsn) { - int ret; - int cmdsn_ret; + int ret, cmdsn_ret; + bool reject = false; + u8 reason = ISCSI_REASON_BOOKMARK_NO_RESOURCES; mutex_lock(&conn->sess->cmdsn_mutex); @@ -300,9 +298,19 @@ int iscsit_sequence_cmd( ret = iscsit_execute_cmd(cmd, 0); if ((ret >= 0) && !list_empty(&conn->sess->sess_ooo_cmdsn_list)) iscsit_execute_ooo_cmdsns(conn->sess); + else if (ret < 0) { + reject = true; + ret = CMDSN_ERROR_CANNOT_RECOVER; + } break; case CMDSN_HIGHER_THAN_EXP: ret = iscsit_handle_ooo_cmdsn(conn->sess, cmd, be32_to_cpu(cmdsn)); + if (ret < 0) { + reject = true; + ret = CMDSN_ERROR_CANNOT_RECOVER; + break; + } + ret = CMDSN_HIGHER_THAN_EXP; break; case CMDSN_LOWER_THAN_EXP: cmd->i_state = ISTATE_REMOVE; @@ -310,11 +318,16 @@ int iscsit_sequence_cmd( ret = cmdsn_ret; break; default: + reason = ISCSI_REASON_PROTOCOL_ERROR; + reject = true; ret = cmdsn_ret; break; } mutex_unlock(&conn->sess->cmdsn_mutex); + if (reject) + iscsit_reject_cmd(cmd, reason, buf); + return ret; } EXPORT_SYMBOL(iscsit_sequence_cmd); @@ -681,6 +694,7 @@ void iscsit_release_cmd(struct iscsi_cmd *cmd) kfree(cmd->seq_list); kfree(cmd->tmr_req); kfree(cmd->iov_data); + kfree(cmd->text_in_ptr); kmem_cache_free(lio_cmd_cache, cmd); } diff --git a/drivers/target/iscsi/iscsi_target_util.h b/drivers/target/iscsi/iscsi_target_util.h index a442265..e4fc34a 100644 --- a/drivers/target/iscsi/iscsi_target_util.h +++ b/drivers/target/iscsi/iscsi_target_util.h @@ -13,7 +13,8 @@ extern struct iscsi_cmd *iscsit_allocate_cmd(struct iscsi_conn *, gfp_t); extern struct iscsi_seq *iscsit_get_seq_holder_for_datain(struct iscsi_cmd *, u32); extern struct iscsi_seq *iscsit_get_seq_holder_for_r2t(struct iscsi_cmd *); extern struct iscsi_r2t *iscsit_get_holder_for_r2tsn(struct iscsi_cmd *, u32); -int iscsit_sequence_cmd(struct iscsi_conn *conn, struct iscsi_cmd *cmd, __be32 cmdsn); +extern int iscsit_sequence_cmd(struct iscsi_conn *conn, struct iscsi_cmd *cmd, + unsigned char * ,__be32 cmdsn); extern int iscsit_check_unsolicited_dataout(struct iscsi_cmd *, unsigned char *); extern struct iscsi_cmd *iscsit_find_cmd_from_itt(struct iscsi_conn *, itt_t); extern struct iscsi_cmd *iscsit_find_cmd_from_itt_or_dump(struct iscsi_conn *, diff --git a/drivers/target/loopback/tcm_loop.c b/drivers/target/loopback/tcm_loop.c index 7c90814..568ad25 100644 --- a/drivers/target/loopback/tcm_loop.c +++ b/drivers/target/loopback/tcm_loop.c @@ -786,7 +786,7 @@ static int tcm_loop_queue_status(struct se_cmd *se_cmd) return 0; } -static int tcm_loop_queue_tm_rsp(struct se_cmd *se_cmd) +static void tcm_loop_queue_tm_rsp(struct se_cmd *se_cmd) { struct se_tmr_req *se_tmr = se_cmd->se_tmr_req; struct tcm_loop_tmr *tl_tmr = se_tmr->fabric_tmr_ptr; @@ -796,7 +796,6 @@ static int tcm_loop_queue_tm_rsp(struct se_cmd *se_cmd) */ atomic_set(&tl_tmr->tmr_complete, 1); wake_up(&tl_tmr->tl_tmr_wait); - return 0; } static char *tcm_loop_dump_proto_id(struct tcm_loop_hba *tl_hba) diff --git a/drivers/target/sbp/sbp_target.c b/drivers/target/sbp/sbp_target.c index d3536f5..e51b09a 100644 --- a/drivers/target/sbp/sbp_target.c +++ b/drivers/target/sbp/sbp_target.c @@ -1842,9 +1842,8 @@ static int sbp_queue_status(struct se_cmd *se_cmd) return sbp_send_sense(req); } -static int sbp_queue_tm_rsp(struct se_cmd *se_cmd) +static void sbp_queue_tm_rsp(struct se_cmd *se_cmd) { - return 0; } static int sbp_check_stop_free(struct se_cmd *se_cmd) diff --git a/drivers/target/target_core_configfs.c b/drivers/target/target_core_configfs.c index 4a8bd36..e4d2293 100644 --- a/drivers/target/target_core_configfs.c +++ b/drivers/target/target_core_configfs.c @@ -983,7 +983,6 @@ static ssize_t target_core_dev_pr_show_spc3_res(struct se_device *dev, struct se_node_acl *se_nacl; struct t10_pr_registration *pr_reg; char i_buf[PR_REG_ISID_ID_LEN]; - int prf_isid; memset(i_buf, 0, PR_REG_ISID_ID_LEN); @@ -992,12 +991,11 @@ static ssize_t target_core_dev_pr_show_spc3_res(struct se_device *dev, return sprintf(page, "No SPC-3 Reservation holder\n"); se_nacl = pr_reg->pr_reg_nacl; - prf_isid = core_pr_dump_initiator_port(pr_reg, &i_buf[0], - PR_REG_ISID_ID_LEN); + core_pr_dump_initiator_port(pr_reg, i_buf, PR_REG_ISID_ID_LEN); return sprintf(page, "SPC-3 Reservation: %s Initiator: %s%s\n", se_nacl->se_tpg->se_tpg_tfo->get_fabric_name(), - se_nacl->initiatorname, (prf_isid) ? &i_buf[0] : ""); + se_nacl->initiatorname, i_buf); } static ssize_t target_core_dev_pr_show_spc2_res(struct se_device *dev, @@ -1116,7 +1114,7 @@ static ssize_t target_core_dev_pr_show_attr_res_pr_registered_i_pts( unsigned char buf[384]; char i_buf[PR_REG_ISID_ID_LEN]; ssize_t len = 0; - int reg_count = 0, prf_isid; + int reg_count = 0; len += sprintf(page+len, "SPC-3 PR Registrations:\n"); @@ -1127,12 +1125,11 @@ static ssize_t target_core_dev_pr_show_attr_res_pr_registered_i_pts( memset(buf, 0, 384); memset(i_buf, 0, PR_REG_ISID_ID_LEN); tfo = pr_reg->pr_reg_nacl->se_tpg->se_tpg_tfo; - prf_isid = core_pr_dump_initiator_port(pr_reg, &i_buf[0], + core_pr_dump_initiator_port(pr_reg, i_buf, PR_REG_ISID_ID_LEN); sprintf(buf, "%s Node: %s%s Key: 0x%016Lx PRgen: 0x%08x\n", tfo->get_fabric_name(), - pr_reg->pr_reg_nacl->initiatorname, (prf_isid) ? - &i_buf[0] : "", pr_reg->pr_res_key, + pr_reg->pr_reg_nacl->initiatorname, i_buf, pr_reg->pr_res_key, pr_reg->pr_res_generation); if (len + strlen(buf) >= PAGE_SIZE) diff --git a/drivers/target/target_core_device.c b/drivers/target/target_core_device.c index 4630481..8f4142f 100644 --- a/drivers/target/target_core_device.c +++ b/drivers/target/target_core_device.c @@ -1410,7 +1410,6 @@ struct se_device *target_alloc_device(struct se_hba *hba, const char *name) INIT_LIST_HEAD(&dev->t10_alua.tg_pt_gps_list); spin_lock_init(&dev->t10_alua.tg_pt_gps_lock); - dev->t10_pr.pr_aptpl_buf_len = PR_APTPL_BUF_LEN; dev->t10_wwn.t10_dev = dev; dev->t10_alua.t10_dev = dev; @@ -1545,7 +1544,7 @@ int core_dev_setup_virtual_lun0(void) { struct se_hba *hba; struct se_device *dev; - char buf[16]; + char buf[] = "rd_pages=8,rd_nullio=1"; int ret; hba = core_alloc_hba("rd_mcp", 0, HBA_FLAGS_INTERNAL_USE); @@ -1558,8 +1557,6 @@ int core_dev_setup_virtual_lun0(void) goto out_free_hba; } - memset(buf, 0, 16); - sprintf(buf, "rd_pages=8"); hba->transport->set_configfs_dev_params(dev, buf, sizeof(buf)); ret = target_configure_device(dev); diff --git a/drivers/target/target_core_fabric_configfs.c b/drivers/target/target_core_fabric_configfs.c index 04c775c..eb56eb1 100644 --- a/drivers/target/target_core_fabric_configfs.c +++ b/drivers/target/target_core_fabric_configfs.c @@ -965,6 +965,19 @@ TF_CIT_SETUP(tpg_attrib, &target_fabric_tpg_attrib_item_ops, NULL, NULL); /* End of tfc_tpg_attrib_cit */ +/* Start of tfc_tpg_auth_cit */ + +CONFIGFS_EATTR_OPS(target_fabric_tpg_auth, se_portal_group, tpg_auth_group); + +static struct configfs_item_operations target_fabric_tpg_auth_item_ops = { + .show_attribute = target_fabric_tpg_auth_attr_show, + .store_attribute = target_fabric_tpg_auth_attr_store, +}; + +TF_CIT_SETUP(tpg_auth, &target_fabric_tpg_auth_item_ops, NULL, NULL); + +/* End of tfc_tpg_attrib_cit */ + /* Start of tfc_tpg_param_cit */ CONFIGFS_EATTR_OPS(target_fabric_tpg_param, se_portal_group, tpg_param_group); @@ -1030,8 +1043,9 @@ static struct config_group *target_fabric_make_tpg( se_tpg->tpg_group.default_groups[1] = &se_tpg->tpg_np_group; se_tpg->tpg_group.default_groups[2] = &se_tpg->tpg_acl_group; se_tpg->tpg_group.default_groups[3] = &se_tpg->tpg_attrib_group; - se_tpg->tpg_group.default_groups[4] = &se_tpg->tpg_param_group; - se_tpg->tpg_group.default_groups[5] = NULL; + se_tpg->tpg_group.default_groups[4] = &se_tpg->tpg_auth_group; + se_tpg->tpg_group.default_groups[5] = &se_tpg->tpg_param_group; + se_tpg->tpg_group.default_groups[6] = NULL; config_group_init_type_name(&se_tpg->tpg_group, name, &TF_CIT_TMPL(tf)->tfc_tpg_base_cit); @@ -1043,6 +1057,8 @@ static struct config_group *target_fabric_make_tpg( &TF_CIT_TMPL(tf)->tfc_tpg_nacl_cit); config_group_init_type_name(&se_tpg->tpg_attrib_group, "attrib", &TF_CIT_TMPL(tf)->tfc_tpg_attrib_cit); + config_group_init_type_name(&se_tpg->tpg_auth_group, "auth", + &TF_CIT_TMPL(tf)->tfc_tpg_auth_cit); config_group_init_type_name(&se_tpg->tpg_param_group, "param", &TF_CIT_TMPL(tf)->tfc_tpg_param_cit); @@ -1202,6 +1218,7 @@ int target_fabric_setup_cits(struct target_fabric_configfs *tf) target_fabric_setup_tpg_np_cit(tf); target_fabric_setup_tpg_np_base_cit(tf); target_fabric_setup_tpg_attrib_cit(tf); + target_fabric_setup_tpg_auth_cit(tf); target_fabric_setup_tpg_param_cit(tf); target_fabric_setup_tpg_nacl_cit(tf); target_fabric_setup_tpg_nacl_base_cit(tf); diff --git a/drivers/target/target_core_pr.c b/drivers/target/target_core_pr.c index 3240f2c..bd78faf 100644 --- a/drivers/target/target_core_pr.c +++ b/drivers/target/target_core_pr.c @@ -53,18 +53,28 @@ struct pr_transport_id_holder { struct list_head dest_list; }; -int core_pr_dump_initiator_port( +void core_pr_dump_initiator_port( struct t10_pr_registration *pr_reg, char *buf, u32 size) { if (!pr_reg->isid_present_at_reg) - return 0; + buf[0] = '\0'; - snprintf(buf, size, ",i,0x%s", &pr_reg->pr_reg_isid[0]); - return 1; + snprintf(buf, size, ",i,0x%s", pr_reg->pr_reg_isid); } +enum register_type { + REGISTER, + REGISTER_AND_IGNORE_EXISTING_KEY, + REGISTER_AND_MOVE, +}; + +enum preempt_type { + PREEMPT, + PREEMPT_AND_ABORT, +}; + static void __core_scsi3_complete_pro_release(struct se_device *, struct se_node_acl *, struct t10_pr_registration *, int); @@ -596,14 +606,6 @@ static struct t10_pr_registration *__core_scsi3_do_alloc_registration( return NULL; } - pr_reg->pr_aptpl_buf = kzalloc(dev->t10_pr.pr_aptpl_buf_len, - GFP_ATOMIC); - if (!pr_reg->pr_aptpl_buf) { - pr_err("Unable to allocate pr_reg->pr_aptpl_buf\n"); - kmem_cache_free(t10_pr_reg_cache, pr_reg); - return NULL; - } - INIT_LIST_HEAD(&pr_reg->pr_reg_list); INIT_LIST_HEAD(&pr_reg->pr_reg_abort_list); INIT_LIST_HEAD(&pr_reg->pr_reg_aptpl_list); @@ -794,7 +796,6 @@ int core_scsi3_alloc_aptpl_registration( pr_err("Unable to allocate struct t10_pr_registration\n"); return -ENOMEM; } - pr_reg->pr_aptpl_buf = kzalloc(pr_tmpl->pr_aptpl_buf_len, GFP_KERNEL); INIT_LIST_HEAD(&pr_reg->pr_reg_list); INIT_LIST_HEAD(&pr_reg->pr_reg_abort_list); @@ -848,11 +849,9 @@ static void core_scsi3_aptpl_reserve( struct t10_pr_registration *pr_reg) { char i_buf[PR_REG_ISID_ID_LEN]; - int prf_isid; memset(i_buf, 0, PR_REG_ISID_ID_LEN); - prf_isid = core_pr_dump_initiator_port(pr_reg, &i_buf[0], - PR_REG_ISID_ID_LEN); + core_pr_dump_initiator_port(pr_reg, i_buf, PR_REG_ISID_ID_LEN); spin_lock(&dev->dev_reservation_lock); dev->dev_pr_res_holder = pr_reg; @@ -865,11 +864,11 @@ static void core_scsi3_aptpl_reserve( (pr_reg->pr_reg_all_tg_pt) ? 1 : 0); pr_debug("SPC-3 PR [%s] RESERVE Node: %s%s\n", tpg->se_tpg_tfo->get_fabric_name(), node_acl->initiatorname, - (prf_isid) ? &i_buf[0] : ""); + i_buf); } static void __core_scsi3_add_registration(struct se_device *, struct se_node_acl *, - struct t10_pr_registration *, int, int); + struct t10_pr_registration *, enum register_type, int); static int __core_scsi3_check_aptpl_registration( struct se_device *dev, @@ -962,21 +961,19 @@ static void __core_scsi3_dump_registration( struct se_device *dev, struct se_node_acl *nacl, struct t10_pr_registration *pr_reg, - int register_type) + enum register_type register_type) { struct se_portal_group *se_tpg = nacl->se_tpg; char i_buf[PR_REG_ISID_ID_LEN]; - int prf_isid; memset(&i_buf[0], 0, PR_REG_ISID_ID_LEN); - prf_isid = core_pr_dump_initiator_port(pr_reg, &i_buf[0], - PR_REG_ISID_ID_LEN); + core_pr_dump_initiator_port(pr_reg, i_buf, PR_REG_ISID_ID_LEN); pr_debug("SPC-3 PR [%s] Service Action: REGISTER%s Initiator" - " Node: %s%s\n", tfo->get_fabric_name(), (register_type == 2) ? - "_AND_MOVE" : (register_type == 1) ? + " Node: %s%s\n", tfo->get_fabric_name(), (register_type == REGISTER_AND_MOVE) ? + "_AND_MOVE" : (register_type == REGISTER_AND_IGNORE_EXISTING_KEY) ? "_AND_IGNORE_EXISTING_KEY" : "", nacl->initiatorname, - (prf_isid) ? i_buf : ""); + i_buf); pr_debug("SPC-3 PR [%s] registration on Target Port: %s,0x%04x\n", tfo->get_fabric_name(), tfo->tpg_get_wwn(se_tpg), tfo->tpg_get_tag(se_tpg)); @@ -998,7 +995,7 @@ static void __core_scsi3_add_registration( struct se_device *dev, struct se_node_acl *nacl, struct t10_pr_registration *pr_reg, - int register_type, + enum register_type register_type, int register_move) { struct target_core_fabric_ops *tfo = nacl->se_tpg->se_tpg_tfo; @@ -1064,7 +1061,7 @@ static int core_scsi3_alloc_registration( u64 sa_res_key, int all_tg_pt, int aptpl, - int register_type, + enum register_type register_type, int register_move) { struct t10_pr_registration *pr_reg; @@ -1225,11 +1222,9 @@ static void __core_scsi3_free_registration( pr_reg->pr_reg_nacl->se_tpg->se_tpg_tfo; struct t10_reservation *pr_tmpl = &dev->t10_pr; char i_buf[PR_REG_ISID_ID_LEN]; - int prf_isid; memset(i_buf, 0, PR_REG_ISID_ID_LEN); - prf_isid = core_pr_dump_initiator_port(pr_reg, &i_buf[0], - PR_REG_ISID_ID_LEN); + core_pr_dump_initiator_port(pr_reg, i_buf, PR_REG_ISID_ID_LEN); pr_reg->pr_reg_deve->def_pr_registered = 0; pr_reg->pr_reg_deve->pr_res_key = 0; @@ -1257,7 +1252,7 @@ static void __core_scsi3_free_registration( pr_debug("SPC-3 PR [%s] Service Action: UNREGISTER Initiator" " Node: %s%s\n", tfo->get_fabric_name(), pr_reg->pr_reg_nacl->initiatorname, - (prf_isid) ? &i_buf[0] : ""); + i_buf); pr_debug("SPC-3 PR [%s] for %s TCM Subsystem %s Object Target" " Port(s)\n", tfo->get_fabric_name(), (pr_reg->pr_reg_all_tg_pt) ? "ALL" : "SINGLE", @@ -1269,7 +1264,6 @@ static void __core_scsi3_free_registration( if (!preempt_and_abort_list) { pr_reg->pr_reg_deve = NULL; pr_reg->pr_reg_nacl = NULL; - kfree(pr_reg->pr_aptpl_buf); kmem_cache_free(t10_pr_reg_cache, pr_reg); return; } @@ -1338,7 +1332,6 @@ void core_scsi3_free_all_registrations( list_for_each_entry_safe(pr_reg, pr_reg_tmp, &pr_tmpl->aptpl_reg_list, pr_reg_aptpl_list) { list_del(&pr_reg->pr_reg_aptpl_list); - kfree(pr_reg->pr_aptpl_buf); kmem_cache_free(t10_pr_reg_cache, pr_reg); } spin_unlock(&pr_tmpl->aptpl_reg_lock); @@ -1453,7 +1446,7 @@ core_scsi3_decode_spec_i_port( char *iport_ptr = NULL, dest_iport[64], i_buf[PR_REG_ISID_ID_LEN]; sense_reason_t ret; u32 tpdl, tid_len = 0; - int dest_local_nexus, prf_isid; + int dest_local_nexus; u32 dest_rtpi = 0; memset(dest_iport, 0, 64); @@ -1764,8 +1757,7 @@ core_scsi3_decode_spec_i_port( kfree(tidh); memset(i_buf, 0, PR_REG_ISID_ID_LEN); - prf_isid = core_pr_dump_initiator_port(dest_pr_reg, &i_buf[0], - PR_REG_ISID_ID_LEN); + core_pr_dump_initiator_port(dest_pr_reg, i_buf, PR_REG_ISID_ID_LEN); __core_scsi3_add_registration(cmd->se_dev, dest_node_acl, dest_pr_reg, 0, 0); @@ -1773,8 +1765,7 @@ core_scsi3_decode_spec_i_port( pr_debug("SPC-3 PR [%s] SPEC_I_PT: Successfully" " registered Transport ID for Node: %s%s Mapped LUN:" " %u\n", dest_tpg->se_tpg_tfo->get_fabric_name(), - dest_node_acl->initiatorname, (prf_isid) ? - &i_buf[0] : "", dest_se_deve->mapped_lun); + dest_node_acl->initiatorname, i_buf, dest_se_deve->mapped_lun); if (dest_local_nexus) continue; @@ -1813,7 +1804,6 @@ out: kmem_cache_free(t10_pr_reg_cache, pr_reg_tmp); } - kfree(dest_pr_reg->pr_aptpl_buf); kmem_cache_free(t10_pr_reg_cache, dest_pr_reg); if (dest_local_nexus) @@ -1826,14 +1816,10 @@ out: return ret; } -/* - * Called with struct se_device->dev_reservation_lock held - */ -static int __core_scsi3_update_aptpl_buf( +static int core_scsi3_update_aptpl_buf( struct se_device *dev, unsigned char *buf, - u32 pr_aptpl_buf_len, - int clear_aptpl_metadata) + u32 pr_aptpl_buf_len) { struct se_lun *lun; struct se_portal_group *tpg; @@ -1841,20 +1827,13 @@ static int __core_scsi3_update_aptpl_buf( unsigned char tmp[512], isid_buf[32]; ssize_t len = 0; int reg_count = 0; + int ret = 0; - memset(buf, 0, pr_aptpl_buf_len); - /* - * Called to clear metadata once APTPL has been deactivated. - */ - if (clear_aptpl_metadata) { - snprintf(buf, pr_aptpl_buf_len, - "No Registrations or Reservations\n"); - return 0; - } + spin_lock(&dev->dev_reservation_lock); + spin_lock(&dev->t10_pr.registration_lock); /* * Walk the registration list.. */ - spin_lock(&dev->t10_pr.registration_lock); list_for_each_entry(pr_reg, &dev->t10_pr.registration_list, pr_reg_list) { @@ -1900,8 +1879,8 @@ static int __core_scsi3_update_aptpl_buf( if ((len + strlen(tmp) >= pr_aptpl_buf_len)) { pr_err("Unable to update renaming" " APTPL metadata\n"); - spin_unlock(&dev->t10_pr.registration_lock); - return -EMSGSIZE; + ret = -EMSGSIZE; + goto out; } len += sprintf(buf+len, "%s", tmp); @@ -1918,48 +1897,32 @@ static int __core_scsi3_update_aptpl_buf( if ((len + strlen(tmp) >= pr_aptpl_buf_len)) { pr_err("Unable to update renaming" " APTPL metadata\n"); - spin_unlock(&dev->t10_pr.registration_lock); - return -EMSGSIZE; + ret = -EMSGSIZE; + goto out; } len += sprintf(buf+len, "%s", tmp); reg_count++; } - spin_unlock(&dev->t10_pr.registration_lock); if (!reg_count) len += sprintf(buf+len, "No Registrations or Reservations"); - return 0; -} - -static int core_scsi3_update_aptpl_buf( - struct se_device *dev, - unsigned char *buf, - u32 pr_aptpl_buf_len, - int clear_aptpl_metadata) -{ - int ret; - - spin_lock(&dev->dev_reservation_lock); - ret = __core_scsi3_update_aptpl_buf(dev, buf, pr_aptpl_buf_len, - clear_aptpl_metadata); +out: + spin_unlock(&dev->t10_pr.registration_lock); spin_unlock(&dev->dev_reservation_lock); return ret; } -/* - * Called with struct se_device->aptpl_file_mutex held - */ static int __core_scsi3_write_aptpl_to_file( struct se_device *dev, - unsigned char *buf, - u32 pr_aptpl_buf_len) + unsigned char *buf) { struct t10_wwn *wwn = &dev->t10_wwn; struct file *file; int flags = O_RDWR | O_CREAT | O_TRUNC; char path[512]; + u32 pr_aptpl_buf_len; int ret; memset(path, 0, 512); @@ -1978,8 +1941,7 @@ static int __core_scsi3_write_aptpl_to_file( return PTR_ERR(file); } - if (!pr_aptpl_buf_len) - pr_aptpl_buf_len = (strlen(&buf[0]) + 1); /* Add extra for NULL */ + pr_aptpl_buf_len = (strlen(buf) + 1); /* Add extra for NULL */ ret = kernel_write(file, buf, pr_aptpl_buf_len, 0); @@ -1990,60 +1952,64 @@ static int __core_scsi3_write_aptpl_to_file( return ret ? -EIO : 0; } -static int -core_scsi3_update_and_write_aptpl(struct se_device *dev, unsigned char *in_buf, - u32 in_pr_aptpl_buf_len) +/* + * Clear the APTPL metadata if APTPL has been disabled, otherwise + * write out the updated metadata to struct file for this SCSI device. + */ +static sense_reason_t core_scsi3_update_and_write_aptpl(struct se_device *dev, bool aptpl) { - unsigned char null_buf[64], *buf; - u32 pr_aptpl_buf_len; - int clear_aptpl_metadata = 0; - int ret; + unsigned char *buf; + int rc; - /* - * Can be called with a NULL pointer from PROUT service action CLEAR - */ - if (!in_buf) { - memset(null_buf, 0, 64); - buf = &null_buf[0]; - /* - * This will clear the APTPL metadata to: - * "No Registrations or Reservations" status - */ - pr_aptpl_buf_len = 64; - clear_aptpl_metadata = 1; - } else { - buf = in_buf; - pr_aptpl_buf_len = in_pr_aptpl_buf_len; + if (!aptpl) { + char *null_buf = "No Registrations or Reservations\n"; + + rc = __core_scsi3_write_aptpl_to_file(dev, null_buf); + dev->t10_pr.pr_aptpl_active = 0; + pr_debug("SPC-3 PR: Set APTPL Bit Deactivated\n"); + + if (rc) + return TCM_LOGICAL_UNIT_COMMUNICATION_FAILURE; + + return 0; } - ret = core_scsi3_update_aptpl_buf(dev, buf, pr_aptpl_buf_len, - clear_aptpl_metadata); - if (ret != 0) - return ret; + buf = kzalloc(PR_APTPL_BUF_LEN, GFP_KERNEL); + if (!buf) + return TCM_OUT_OF_RESOURCES; - /* - * __core_scsi3_write_aptpl_to_file() will call strlen() - * on the passed buf to determine pr_aptpl_buf_len. - */ - return __core_scsi3_write_aptpl_to_file(dev, buf, 0); + rc = core_scsi3_update_aptpl_buf(dev, buf, PR_APTPL_BUF_LEN); + if (rc < 0) { + kfree(buf); + return TCM_OUT_OF_RESOURCES; + } + + rc = __core_scsi3_write_aptpl_to_file(dev, buf); + if (rc != 0) { + pr_err("SPC-3 PR: Could not update APTPL\n"); + kfree(buf); + return TCM_LOGICAL_UNIT_COMMUNICATION_FAILURE; + } + dev->t10_pr.pr_aptpl_active = 1; + kfree(buf); + pr_debug("SPC-3 PR: Set APTPL Bit Activated\n"); + return 0; } static sense_reason_t core_scsi3_emulate_pro_register(struct se_cmd *cmd, u64 res_key, u64 sa_res_key, - int aptpl, int all_tg_pt, int spec_i_pt, int ignore_key) + bool aptpl, bool all_tg_pt, bool spec_i_pt, enum register_type register_type) { struct se_session *se_sess = cmd->se_sess; struct se_device *dev = cmd->se_dev; struct se_dev_entry *se_deve; struct se_lun *se_lun = cmd->se_lun; struct se_portal_group *se_tpg; - struct t10_pr_registration *pr_reg, *pr_reg_p, *pr_reg_tmp, *pr_reg_e; + struct t10_pr_registration *pr_reg, *pr_reg_p, *pr_reg_tmp; struct t10_reservation *pr_tmpl = &dev->t10_pr; - /* Used for APTPL metadata w/ UNREGISTER */ - unsigned char *pr_aptpl_buf = NULL; unsigned char isid_buf[PR_REG_ISID_LEN], *isid_ptr = NULL; sense_reason_t ret = TCM_NO_SENSE; - int pr_holder = 0, type; + int pr_holder = 0; if (!se_sess || !se_lun) { pr_err("SPC-3 PR: se_sess || struct se_lun is NULL!\n"); @@ -2061,8 +2027,8 @@ core_scsi3_emulate_pro_register(struct se_cmd *cmd, u64 res_key, u64 sa_res_key, /* * Follow logic from spc4r17 Section 5.7.7, Register Behaviors Table 47 */ - pr_reg_e = core_scsi3_locate_pr_reg(dev, se_sess->se_node_acl, se_sess); - if (!pr_reg_e) { + pr_reg = core_scsi3_locate_pr_reg(dev, se_sess->se_node_acl, se_sess); + if (!pr_reg) { if (res_key) { pr_warn("SPC-3 PR: Reservation Key non-zero" " for SA REGISTER, returning CONFLICT\n"); @@ -2083,7 +2049,7 @@ core_scsi3_emulate_pro_register(struct se_cmd *cmd, u64 res_key, u64 sa_res_key, if (core_scsi3_alloc_registration(cmd->se_dev, se_sess->se_node_acl, se_deve, isid_ptr, sa_res_key, all_tg_pt, aptpl, - ignore_key, 0)) { + register_type, 0)) { pr_err("Unable to allocate" " struct t10_pr_registration\n"); return TCM_INVALID_PARAMETER_LIST; @@ -2102,97 +2068,68 @@ core_scsi3_emulate_pro_register(struct se_cmd *cmd, u64 res_key, u64 sa_res_key, if (ret != 0) return ret; } - /* - * Nothing left to do for the APTPL=0 case. - */ - if (!aptpl) { - pr_tmpl->pr_aptpl_active = 0; - core_scsi3_update_and_write_aptpl(cmd->se_dev, NULL, 0); - pr_debug("SPC-3 PR: Set APTPL Bit Deactivated for" - " REGISTER\n"); - return 0; - } - /* - * Locate the newly allocated local I_T Nexus *pr_reg, and - * update the APTPL metadata information using its - * preallocated *pr_reg->pr_aptpl_buf. - */ - pr_reg = core_scsi3_locate_pr_reg(cmd->se_dev, - se_sess->se_node_acl, se_sess); - - if (core_scsi3_update_and_write_aptpl(cmd->se_dev, - &pr_reg->pr_aptpl_buf[0], - pr_tmpl->pr_aptpl_buf_len)) { - pr_tmpl->pr_aptpl_active = 1; - pr_debug("SPC-3 PR: Set APTPL Bit Activated for REGISTER\n"); - } - goto out_put_pr_reg; + return core_scsi3_update_and_write_aptpl(dev, aptpl); } - /* - * Locate the existing *pr_reg via struct se_node_acl pointers - */ - pr_reg = pr_reg_e; - type = pr_reg->pr_res_type; - - if (!ignore_key) { - if (res_key != pr_reg->pr_res_key) { - pr_err("SPC-3 PR REGISTER: Received" - " res_key: 0x%016Lx does not match" - " existing SA REGISTER res_key:" - " 0x%016Lx\n", res_key, - pr_reg->pr_res_key); - ret = TCM_RESERVATION_CONFLICT; - goto out_put_pr_reg; - } + /* ok, existing registration */ + + if ((register_type == REGISTER) && (res_key != pr_reg->pr_res_key)) { + pr_err("SPC-3 PR REGISTER: Received" + " res_key: 0x%016Lx does not match" + " existing SA REGISTER res_key:" + " 0x%016Lx\n", res_key, + pr_reg->pr_res_key); + ret = TCM_RESERVATION_CONFLICT; + goto out; } if (spec_i_pt) { - pr_err("SPC-3 PR UNREGISTER: SPEC_I_PT" - " set while sa_res_key=0\n"); + pr_err("SPC-3 PR REGISTER: SPEC_I_PT" + " set on a registered nexus\n"); ret = TCM_INVALID_PARAMETER_LIST; - goto out_put_pr_reg; + goto out; } /* * An existing ALL_TG_PT=1 registration being released * must also set ALL_TG_PT=1 in the incoming PROUT. */ - if (pr_reg->pr_reg_all_tg_pt && !(all_tg_pt)) { - pr_err("SPC-3 PR UNREGISTER: ALL_TG_PT=1" + if (pr_reg->pr_reg_all_tg_pt && !all_tg_pt) { + pr_err("SPC-3 PR REGISTER: ALL_TG_PT=1" " registration exists, but ALL_TG_PT=1 bit not" " present in received PROUT\n"); ret = TCM_INVALID_CDB_FIELD; - goto out_put_pr_reg; + goto out; } /* - * Allocate APTPL metadata buffer used for UNREGISTER ops + * sa_res_key=1 Change Reservation Key for registered I_T Nexus. */ - if (aptpl) { - pr_aptpl_buf = kzalloc(pr_tmpl->pr_aptpl_buf_len, - GFP_KERNEL); - if (!pr_aptpl_buf) { - pr_err("Unable to allocate" - " pr_aptpl_buf\n"); - ret = TCM_LOGICAL_UNIT_COMMUNICATION_FAILURE; - goto out_put_pr_reg; - } - } + if (sa_res_key) { + /* + * Increment PRgeneration counter for struct se_device" + * upon a successful REGISTER, see spc4r17 section 6.3.2 + * READ_KEYS service action. + */ + pr_reg->pr_res_generation = core_scsi3_pr_generation(cmd->se_dev); + pr_reg->pr_res_key = sa_res_key; + pr_debug("SPC-3 PR [%s] REGISTER%s: Changed Reservation" + " Key for %s to: 0x%016Lx PRgeneration:" + " 0x%08x\n", cmd->se_tfo->get_fabric_name(), + (register_type == REGISTER_AND_IGNORE_EXISTING_KEY) ? "_AND_IGNORE_EXISTING_KEY" : "", + pr_reg->pr_reg_nacl->initiatorname, + pr_reg->pr_res_key, pr_reg->pr_res_generation); - /* - * sa_res_key=0 Unregister Reservation Key for registered I_T - * Nexus sa_res_key=1 Change Reservation Key for registered I_T - * Nexus. - */ - if (!sa_res_key) { + } else { + /* + * sa_res_key=0 Unregister Reservation Key for registered I_T Nexus. + */ pr_holder = core_scsi3_check_implict_release( cmd->se_dev, pr_reg); if (pr_holder < 0) { - kfree(pr_aptpl_buf); ret = TCM_RESERVATION_CONFLICT; - goto out_put_pr_reg; + goto out; } spin_lock(&pr_tmpl->registration_lock); @@ -2237,8 +2174,8 @@ core_scsi3_emulate_pro_register(struct se_cmd *cmd, u64 res_key, u64 sa_res_key, * RESERVATIONS RELEASED. */ if (pr_holder && - (type == PR_TYPE_WRITE_EXCLUSIVE_REGONLY || - type == PR_TYPE_EXCLUSIVE_ACCESS_REGONLY)) { + (pr_reg->pr_res_type == PR_TYPE_WRITE_EXCLUSIVE_REGONLY || + pr_reg->pr_res_type == PR_TYPE_EXCLUSIVE_ACCESS_REGONLY)) { list_for_each_entry(pr_reg_p, &pr_tmpl->registration_list, pr_reg_list) { @@ -2250,60 +2187,13 @@ core_scsi3_emulate_pro_register(struct se_cmd *cmd, u64 res_key, u64 sa_res_key, ASCQ_2AH_RESERVATIONS_RELEASED); } } - spin_unlock(&pr_tmpl->registration_lock); - - if (!aptpl) { - pr_tmpl->pr_aptpl_active = 0; - core_scsi3_update_and_write_aptpl(dev, NULL, 0); - pr_debug("SPC-3 PR: Set APTPL Bit Deactivated" - " for UNREGISTER\n"); - return 0; - } - if (!core_scsi3_update_and_write_aptpl(dev, &pr_aptpl_buf[0], - pr_tmpl->pr_aptpl_buf_len)) { - pr_tmpl->pr_aptpl_active = 1; - pr_debug("SPC-3 PR: Set APTPL Bit Activated" - " for UNREGISTER\n"); - } - - goto out_free_aptpl_buf; - } - - /* - * Increment PRgeneration counter for struct se_device" - * upon a successful REGISTER, see spc4r17 section 6.3.2 - * READ_KEYS service action. - */ - pr_reg->pr_res_generation = core_scsi3_pr_generation(cmd->se_dev); - pr_reg->pr_res_key = sa_res_key; - pr_debug("SPC-3 PR [%s] REGISTER%s: Changed Reservation" - " Key for %s to: 0x%016Lx PRgeneration:" - " 0x%08x\n", cmd->se_tfo->get_fabric_name(), - (ignore_key) ? "_AND_IGNORE_EXISTING_KEY" : "", - pr_reg->pr_reg_nacl->initiatorname, - pr_reg->pr_res_key, pr_reg->pr_res_generation); - - if (!aptpl) { - pr_tmpl->pr_aptpl_active = 0; - core_scsi3_update_and_write_aptpl(dev, NULL, 0); - pr_debug("SPC-3 PR: Set APTPL Bit Deactivated" - " for REGISTER\n"); - ret = 0; - goto out_put_pr_reg; + spin_unlock(&pr_tmpl->registration_lock); } - if (!core_scsi3_update_and_write_aptpl(dev, &pr_aptpl_buf[0], - pr_tmpl->pr_aptpl_buf_len)) { - pr_tmpl->pr_aptpl_active = 1; - pr_debug("SPC-3 PR: Set APTPL Bit Activated" - " for REGISTER\n"); - } + ret = core_scsi3_update_and_write_aptpl(dev, aptpl); -out_free_aptpl_buf: - kfree(pr_aptpl_buf); - ret = 0; -out_put_pr_reg: +out: core_scsi3_put_pr_reg(pr_reg); return ret; } @@ -2340,7 +2230,6 @@ core_scsi3_pro_reserve(struct se_cmd *cmd, int type, int scope, u64 res_key) struct t10_reservation *pr_tmpl = &dev->t10_pr; char i_buf[PR_REG_ISID_ID_LEN]; sense_reason_t ret; - int prf_isid; memset(i_buf, 0, PR_REG_ISID_ID_LEN); @@ -2466,8 +2355,7 @@ core_scsi3_pro_reserve(struct se_cmd *cmd, int type, int scope, u64 res_key) pr_reg->pr_res_type = type; pr_reg->pr_res_holder = 1; dev->dev_pr_res_holder = pr_reg; - prf_isid = core_pr_dump_initiator_port(pr_reg, &i_buf[0], - PR_REG_ISID_ID_LEN); + core_pr_dump_initiator_port(pr_reg, i_buf, PR_REG_ISID_ID_LEN); pr_debug("SPC-3 PR [%s] Service Action: RESERVE created new" " reservation holder TYPE: %s ALL_TG_PT: %d\n", @@ -2476,17 +2364,11 @@ core_scsi3_pro_reserve(struct se_cmd *cmd, int type, int scope, u64 res_key) pr_debug("SPC-3 PR [%s] RESERVE Node: %s%s\n", cmd->se_tfo->get_fabric_name(), se_sess->se_node_acl->initiatorname, - (prf_isid) ? &i_buf[0] : ""); + i_buf); spin_unlock(&dev->dev_reservation_lock); - if (pr_tmpl->pr_aptpl_active) { - if (!core_scsi3_update_and_write_aptpl(cmd->se_dev, - &pr_reg->pr_aptpl_buf[0], - pr_tmpl->pr_aptpl_buf_len)) { - pr_debug("SPC-3 PR: Updated APTPL metadata" - " for RESERVE\n"); - } - } + if (pr_tmpl->pr_aptpl_active) + core_scsi3_update_and_write_aptpl(cmd->se_dev, true); ret = 0; out_put_pr_reg: @@ -2524,11 +2406,9 @@ static void __core_scsi3_complete_pro_release( { struct target_core_fabric_ops *tfo = se_nacl->se_tpg->se_tpg_tfo; char i_buf[PR_REG_ISID_ID_LEN]; - int prf_isid; memset(i_buf, 0, PR_REG_ISID_ID_LEN); - prf_isid = core_pr_dump_initiator_port(pr_reg, &i_buf[0], - PR_REG_ISID_ID_LEN); + core_pr_dump_initiator_port(pr_reg, i_buf, PR_REG_ISID_ID_LEN); /* * Go ahead and release the current PR reservation holder. */ @@ -2541,7 +2421,7 @@ static void __core_scsi3_complete_pro_release( (pr_reg->pr_reg_all_tg_pt) ? 1 : 0); pr_debug("SPC-3 PR [%s] RELEASE Node: %s%s\n", tfo->get_fabric_name(), se_nacl->initiatorname, - (prf_isid) ? &i_buf[0] : ""); + i_buf); /* * Clear TYPE and SCOPE for the next PROUT Service Action: RESERVE */ @@ -2702,12 +2582,9 @@ core_scsi3_emulate_pro_release(struct se_cmd *cmd, int type, int scope, spin_unlock(&pr_tmpl->registration_lock); write_aptpl: - if (pr_tmpl->pr_aptpl_active) { - if (!core_scsi3_update_and_write_aptpl(cmd->se_dev, - &pr_reg->pr_aptpl_buf[0], pr_tmpl->pr_aptpl_buf_len)) { - pr_debug("SPC-3 PR: Updated APTPL metadata for RELEASE\n"); - } - } + if (pr_tmpl->pr_aptpl_active) + core_scsi3_update_and_write_aptpl(cmd->se_dev, true); + out_put_pr_reg: core_scsi3_put_pr_reg(pr_reg); return ret; @@ -2791,11 +2668,7 @@ core_scsi3_emulate_pro_clear(struct se_cmd *cmd, u64 res_key) pr_debug("SPC-3 PR [%s] Service Action: CLEAR complete\n", cmd->se_tfo->get_fabric_name()); - if (pr_tmpl->pr_aptpl_active) { - core_scsi3_update_and_write_aptpl(cmd->se_dev, NULL, 0); - pr_debug("SPC-3 PR: Updated APTPL metadata" - " for CLEAR\n"); - } + core_scsi3_update_and_write_aptpl(cmd->se_dev, false); core_scsi3_pr_generation(dev); return 0; @@ -2810,16 +2683,14 @@ static void __core_scsi3_complete_pro_preempt( struct list_head *preempt_and_abort_list, int type, int scope, - int abort) + enum preempt_type preempt_type) { struct se_node_acl *nacl = pr_reg->pr_reg_nacl; struct target_core_fabric_ops *tfo = nacl->se_tpg->se_tpg_tfo; char i_buf[PR_REG_ISID_ID_LEN]; - int prf_isid; memset(i_buf, 0, PR_REG_ISID_ID_LEN); - prf_isid = core_pr_dump_initiator_port(pr_reg, &i_buf[0], - PR_REG_ISID_ID_LEN); + core_pr_dump_initiator_port(pr_reg, i_buf, PR_REG_ISID_ID_LEN); /* * Do an implict RELEASE of the existing reservation. */ @@ -2834,12 +2705,12 @@ static void __core_scsi3_complete_pro_preempt( pr_debug("SPC-3 PR [%s] Service Action: PREEMPT%s created new" " reservation holder TYPE: %s ALL_TG_PT: %d\n", - tfo->get_fabric_name(), (abort) ? "_AND_ABORT" : "", + tfo->get_fabric_name(), (preempt_type == PREEMPT_AND_ABORT) ? "_AND_ABORT" : "", core_scsi3_pr_dump_type(type), (pr_reg->pr_reg_all_tg_pt) ? 1 : 0); pr_debug("SPC-3 PR [%s] PREEMPT%s from Node: %s%s\n", - tfo->get_fabric_name(), (abort) ? "_AND_ABORT" : "", - nacl->initiatorname, (prf_isid) ? &i_buf[0] : ""); + tfo->get_fabric_name(), (preempt_type == PREEMPT_AND_ABORT) ? "_AND_ABORT" : "", + nacl->initiatorname, i_buf); /* * For PREEMPT_AND_ABORT, add the preempting reservation's * struct t10_pr_registration to the list that will be compared @@ -2869,14 +2740,13 @@ static void core_scsi3_release_preempt_and_abort( pr_reg->pr_reg_deve = NULL; pr_reg->pr_reg_nacl = NULL; - kfree(pr_reg->pr_aptpl_buf); kmem_cache_free(t10_pr_reg_cache, pr_reg); } } static sense_reason_t core_scsi3_pro_preempt(struct se_cmd *cmd, int type, int scope, u64 res_key, - u64 sa_res_key, int abort) + u64 sa_res_key, enum preempt_type preempt_type) { struct se_device *dev = cmd->se_dev; struct se_node_acl *pr_reg_nacl; @@ -2896,7 +2766,7 @@ core_scsi3_pro_preempt(struct se_cmd *cmd, int type, int scope, u64 res_key, if (!pr_reg_n) { pr_err("SPC-3 PR: Unable to locate" " PR_REGISTERED *pr_reg for PREEMPT%s\n", - (abort) ? "_AND_ABORT" : ""); + (preempt_type == PREEMPT_AND_ABORT) ? "_AND_ABORT" : ""); return TCM_RESERVATION_CONFLICT; } if (pr_reg_n->pr_res_key != res_key) { @@ -2965,7 +2835,7 @@ core_scsi3_pro_preempt(struct se_cmd *cmd, int type, int scope, u64 res_key, pr_reg_nacl = pr_reg->pr_reg_nacl; pr_res_mapped_lun = pr_reg->pr_res_mapped_lun; __core_scsi3_free_registration(dev, pr_reg, - (abort) ? &preempt_and_abort_list : + (preempt_type == PREEMPT_AND_ABORT) ? &preempt_and_abort_list : NULL, calling_it_nexus); released_regs++; } else { @@ -2993,7 +2863,7 @@ core_scsi3_pro_preempt(struct se_cmd *cmd, int type, int scope, u64 res_key, pr_reg_nacl = pr_reg->pr_reg_nacl; pr_res_mapped_lun = pr_reg->pr_res_mapped_lun; __core_scsi3_free_registration(dev, pr_reg, - (abort) ? &preempt_and_abort_list : + (preempt_type == PREEMPT_AND_ABORT) ? &preempt_and_abort_list : NULL, 0); released_regs++; } @@ -3022,24 +2892,17 @@ core_scsi3_pro_preempt(struct se_cmd *cmd, int type, int scope, u64 res_key, */ if (pr_res_holder && all_reg && !(sa_res_key)) { __core_scsi3_complete_pro_preempt(dev, pr_reg_n, - (abort) ? &preempt_and_abort_list : NULL, - type, scope, abort); + (preempt_type == PREEMPT_AND_ABORT) ? &preempt_and_abort_list : NULL, + type, scope, preempt_type); - if (abort) + if (preempt_type == PREEMPT_AND_ABORT) core_scsi3_release_preempt_and_abort( &preempt_and_abort_list, pr_reg_n); } spin_unlock(&dev->dev_reservation_lock); - if (pr_tmpl->pr_aptpl_active) { - if (!core_scsi3_update_and_write_aptpl(cmd->se_dev, - &pr_reg_n->pr_aptpl_buf[0], - pr_tmpl->pr_aptpl_buf_len)) { - pr_debug("SPC-3 PR: Updated APTPL" - " metadata for PREEMPT%s\n", (abort) ? - "_AND_ABORT" : ""); - } - } + if (pr_tmpl->pr_aptpl_active) + core_scsi3_update_and_write_aptpl(cmd->se_dev, true); core_scsi3_put_pr_reg(pr_reg_n); core_scsi3_pr_generation(cmd->se_dev); @@ -3103,7 +2966,7 @@ core_scsi3_pro_preempt(struct se_cmd *cmd, int type, int scope, u64 res_key, pr_reg_nacl = pr_reg->pr_reg_nacl; pr_res_mapped_lun = pr_reg->pr_res_mapped_lun; __core_scsi3_free_registration(dev, pr_reg, - (abort) ? &preempt_and_abort_list : NULL, + (preempt_type == PREEMPT_AND_ABORT) ? &preempt_and_abort_list : NULL, calling_it_nexus); /* * e) Establish a unit attention condition for the initiator @@ -3120,8 +2983,8 @@ core_scsi3_pro_preempt(struct se_cmd *cmd, int type, int scope, u64 res_key, * I_T nexus using the contents of the SCOPE and TYPE fields; */ __core_scsi3_complete_pro_preempt(dev, pr_reg_n, - (abort) ? &preempt_and_abort_list : NULL, - type, scope, abort); + (preempt_type == PREEMPT_AND_ABORT) ? &preempt_and_abort_list : NULL, + type, scope, preempt_type); /* * d) Process tasks as defined in 5.7.1; * e) See above.. @@ -3161,20 +3024,14 @@ core_scsi3_pro_preempt(struct se_cmd *cmd, int type, int scope, u64 res_key, * been removed from the primary pr_reg list), except the * new persistent reservation holder, the calling Initiator Port. */ - if (abort) { + if (preempt_type == PREEMPT_AND_ABORT) { core_tmr_lun_reset(dev, NULL, &preempt_and_abort_list, cmd); core_scsi3_release_preempt_and_abort(&preempt_and_abort_list, pr_reg_n); } - if (pr_tmpl->pr_aptpl_active) { - if (!core_scsi3_update_and_write_aptpl(cmd->se_dev, - &pr_reg_n->pr_aptpl_buf[0], - pr_tmpl->pr_aptpl_buf_len)) { - pr_debug("SPC-3 PR: Updated APTPL metadata for PREEMPT" - "%s\n", abort ? "_AND_ABORT" : ""); - } - } + if (pr_tmpl->pr_aptpl_active) + core_scsi3_update_and_write_aptpl(cmd->se_dev, true); core_scsi3_put_pr_reg(pr_reg_n); core_scsi3_pr_generation(cmd->se_dev); @@ -3183,7 +3040,7 @@ core_scsi3_pro_preempt(struct se_cmd *cmd, int type, int scope, u64 res_key, static sense_reason_t core_scsi3_emulate_pro_preempt(struct se_cmd *cmd, int type, int scope, - u64 res_key, u64 sa_res_key, int abort) + u64 res_key, u64 sa_res_key, enum preempt_type preempt_type) { switch (type) { case PR_TYPE_WRITE_EXCLUSIVE: @@ -3193,10 +3050,10 @@ core_scsi3_emulate_pro_preempt(struct se_cmd *cmd, int type, int scope, case PR_TYPE_WRITE_EXCLUSIVE_ALLREG: case PR_TYPE_EXCLUSIVE_ACCESS_ALLREG: return core_scsi3_pro_preempt(cmd, type, scope, res_key, - sa_res_key, abort); + sa_res_key, preempt_type); default: pr_err("SPC-3 PR: Unknown Service Action PREEMPT%s" - " Type: 0x%02x\n", (abort) ? "_AND_ABORT" : "", type); + " Type: 0x%02x\n", (preempt_type == PREEMPT_AND_ABORT) ? "_AND_ABORT" : "", type); return TCM_INVALID_CDB_FIELD; } } @@ -3220,7 +3077,7 @@ core_scsi3_emulate_pro_register_and_move(struct se_cmd *cmd, u64 res_key, unsigned char *initiator_str; char *iport_ptr = NULL, dest_iport[64], i_buf[PR_REG_ISID_ID_LEN]; u32 tid_len, tmp_tid_len; - int new_reg = 0, type, scope, matching_iname, prf_isid; + int new_reg = 0, type, scope, matching_iname; sense_reason_t ret; unsigned short rtpi; unsigned char proto_ident; @@ -3564,8 +3421,7 @@ after_iport_check: dest_pr_reg->pr_res_holder = 1; dest_pr_reg->pr_res_type = type; pr_reg->pr_res_scope = scope; - prf_isid = core_pr_dump_initiator_port(pr_reg, &i_buf[0], - PR_REG_ISID_ID_LEN); + core_pr_dump_initiator_port(pr_reg, i_buf, PR_REG_ISID_ID_LEN); /* * Increment PRGeneration for existing registrations.. */ @@ -3581,7 +3437,7 @@ after_iport_check: pr_debug("SPC-3 PR Successfully moved reservation from" " %s Fabric Node: %s%s -> %s Fabric Node: %s %s\n", tf_ops->get_fabric_name(), pr_reg_nacl->initiatorname, - (prf_isid) ? &i_buf[0] : "", dest_tf_ops->get_fabric_name(), + i_buf, dest_tf_ops->get_fabric_name(), dest_node_acl->initiatorname, (iport_ptr != NULL) ? iport_ptr : ""); /* @@ -3602,24 +3458,7 @@ after_iport_check: } else core_scsi3_put_pr_reg(pr_reg); - /* - * Clear the APTPL metadata if APTPL has been disabled, otherwise - * write out the updated metadata to struct file for this SCSI device. - */ - if (!aptpl) { - pr_tmpl->pr_aptpl_active = 0; - core_scsi3_update_and_write_aptpl(cmd->se_dev, NULL, 0); - pr_debug("SPC-3 PR: Set APTPL Bit Deactivated for" - " REGISTER_AND_MOVE\n"); - } else { - pr_tmpl->pr_aptpl_active = 1; - if (!core_scsi3_update_and_write_aptpl(cmd->se_dev, - &dest_pr_reg->pr_aptpl_buf[0], - pr_tmpl->pr_aptpl_buf_len)) { - pr_debug("SPC-3 PR: Set APTPL Bit Activated for" - " REGISTER_AND_MOVE\n"); - } - } + core_scsi3_update_and_write_aptpl(cmd->se_dev, aptpl); transport_kunmap_data_sg(cmd); @@ -3752,7 +3591,7 @@ target_scsi3_emulate_pr_out(struct se_cmd *cmd) switch (sa) { case PRO_REGISTER: ret = core_scsi3_emulate_pro_register(cmd, - res_key, sa_res_key, aptpl, all_tg_pt, spec_i_pt, 0); + res_key, sa_res_key, aptpl, all_tg_pt, spec_i_pt, REGISTER); break; case PRO_RESERVE: ret = core_scsi3_emulate_pro_reserve(cmd, type, scope, res_key); @@ -3765,15 +3604,15 @@ target_scsi3_emulate_pr_out(struct se_cmd *cmd) break; case PRO_PREEMPT: ret = core_scsi3_emulate_pro_preempt(cmd, type, scope, - res_key, sa_res_key, 0); + res_key, sa_res_key, PREEMPT); break; case PRO_PREEMPT_AND_ABORT: ret = core_scsi3_emulate_pro_preempt(cmd, type, scope, - res_key, sa_res_key, 1); + res_key, sa_res_key, PREEMPT_AND_ABORT); break; case PRO_REGISTER_AND_IGNORE_EXISTING_KEY: ret = core_scsi3_emulate_pro_register(cmd, - 0, sa_res_key, aptpl, all_tg_pt, spec_i_pt, 1); + 0, sa_res_key, aptpl, all_tg_pt, spec_i_pt, REGISTER_AND_IGNORE_EXISTING_KEY); break; case PRO_REGISTER_AND_MOVE: ret = core_scsi3_emulate_pro_register_and_move(cmd, res_key, diff --git a/drivers/target/target_core_pr.h b/drivers/target/target_core_pr.h index b4a0042..ed75cdd 100644 --- a/drivers/target/target_core_pr.h +++ b/drivers/target/target_core_pr.h @@ -45,7 +45,7 @@ extern struct kmem_cache *t10_pr_reg_cache; -extern int core_pr_dump_initiator_port(struct t10_pr_registration *, +extern void core_pr_dump_initiator_port(struct t10_pr_registration *, char *, u32); extern sense_reason_t target_scsi2_reservation_release(struct se_cmd *); extern sense_reason_t target_scsi2_reservation_reserve(struct se_cmd *); diff --git a/drivers/target/target_core_rd.c b/drivers/target/target_core_rd.c index 0921a64..51127d1 100644 --- a/drivers/target/target_core_rd.c +++ b/drivers/target/target_core_rd.c @@ -139,6 +139,11 @@ static int rd_build_device_space(struct rd_dev *rd_dev) rd_dev->rd_page_count); return -EINVAL; } + + /* Don't need backing pages for NULLIO */ + if (rd_dev->rd_flags & RDF_NULLIO) + return 0; + total_sg_needed = rd_dev->rd_page_count; sg_tables = (total_sg_needed / max_sg_per_table) + 1; diff --git a/drivers/target/target_core_sbc.c b/drivers/target/target_core_sbc.c index bbc5b0e..8a46277 100644 --- a/drivers/target/target_core_sbc.c +++ b/drivers/target/target_core_sbc.c @@ -38,11 +38,27 @@ static sense_reason_t sbc_emulate_readcapacity(struct se_cmd *cmd) { struct se_device *dev = cmd->se_dev; + unsigned char *cdb = cmd->t_task_cdb; unsigned long long blocks_long = dev->transport->get_blocks(dev); unsigned char *rbuf; unsigned char buf[8]; u32 blocks; + /* + * SBC-2 says: + * If the PMI bit is set to zero and the LOGICAL BLOCK + * ADDRESS field is not set to zero, the device server shall + * terminate the command with CHECK CONDITION status with + * the sense key set to ILLEGAL REQUEST and the additional + * sense code set to INVALID FIELD IN CDB. + * + * In SBC-3, these fields are obsolete, but some SCSI + * compliance tests actually check this, so we might as well + * follow SBC-2. + */ + if (!(cdb[8] & 1) && !!(cdb[2] | cdb[3] | cdb[4] | cdb[5])) + return TCM_INVALID_CDB_FIELD; + if (blocks_long >= 0x00000000ffffffff) blocks = 0xffffffff; else @@ -581,7 +597,7 @@ sbc_parse_cdb(struct se_cmd *cmd, struct sbc_ops *ops) pr_err("cmd exceeds last lba %llu " "(lba %llu, sectors %u)\n", end_lba, cmd->t_task_lba, sectors); - return TCM_INVALID_CDB_FIELD; + return TCM_ADDRESS_OUT_OF_RANGE; } size = sbc_get_size(cmd, sectors); diff --git a/drivers/target/target_core_tmr.c b/drivers/target/target_core_tmr.c index d0b4dd9..0d7cacb 100644 --- a/drivers/target/target_core_tmr.c +++ b/drivers/target/target_core_tmr.c @@ -85,13 +85,8 @@ void core_tmr_release_req( static void core_tmr_handle_tas_abort( struct se_node_acl *tmr_nacl, struct se_cmd *cmd, - int tas, - int fe_count) + int tas) { - if (!fe_count) { - transport_cmd_finish_abort(cmd, 1); - return; - } /* * TASK ABORTED status (TAS) bit support */ @@ -253,7 +248,6 @@ static void core_tmr_drain_state_list( LIST_HEAD(drain_task_list); struct se_cmd *cmd, *next; unsigned long flags; - int fe_count; /* * Complete outstanding commands with TASK_ABORTED SAM status. @@ -329,12 +323,10 @@ static void core_tmr_drain_state_list( spin_lock_irqsave(&cmd->t_state_lock, flags); target_stop_cmd(cmd, &flags); - fe_count = atomic_read(&cmd->t_fe_count); - cmd->transport_state |= CMD_T_ABORTED; spin_unlock_irqrestore(&cmd->t_state_lock, flags); - core_tmr_handle_tas_abort(tmr_nacl, cmd, tas, fe_count); + core_tmr_handle_tas_abort(tmr_nacl, cmd, tas); } } diff --git a/drivers/target/target_core_transport.c b/drivers/target/target_core_transport.c index 21e3158..7172d00 100644 --- a/drivers/target/target_core_transport.c +++ b/drivers/target/target_core_transport.c @@ -52,6 +52,9 @@ #include "target_core_pr.h" #include "target_core_ua.h" +#define CREATE_TRACE_POINTS +#include + static struct workqueue_struct *target_completion_wq; static struct kmem_cache *se_sess_cache; struct kmem_cache *se_ua_cache; @@ -446,11 +449,15 @@ static void target_remove_from_state_list(struct se_cmd *cmd) spin_unlock_irqrestore(&dev->execute_task_lock, flags); } -static int transport_cmd_check_stop(struct se_cmd *cmd, bool remove_from_lists) +static int transport_cmd_check_stop(struct se_cmd *cmd, bool remove_from_lists, + bool write_pending) { unsigned long flags; spin_lock_irqsave(&cmd->t_state_lock, flags); + if (write_pending) + cmd->t_state = TRANSPORT_WRITE_PENDING; + /* * Determine if IOCTL context caller in requesting the stopping of this * command for LUN shutdown purposes. @@ -515,7 +522,7 @@ static int transport_cmd_check_stop(struct se_cmd *cmd, bool remove_from_lists) static int transport_cmd_check_stop_to_fabric(struct se_cmd *cmd) { - return transport_cmd_check_stop(cmd, true); + return transport_cmd_check_stop(cmd, true, false); } static void transport_lun_remove_cmd(struct se_cmd *cmd) @@ -526,13 +533,6 @@ static void transport_lun_remove_cmd(struct se_cmd *cmd) if (!lun) return; - spin_lock_irqsave(&cmd->t_state_lock, flags); - if (cmd->transport_state & CMD_T_DEV_ACTIVE) { - cmd->transport_state &= ~CMD_T_DEV_ACTIVE; - target_remove_from_state_list(cmd); - } - spin_unlock_irqrestore(&cmd->t_state_lock, flags); - spin_lock_irqsave(&lun->lun_cmd_lock, flags); if (!list_empty(&cmd->se_lun_node)) list_del_init(&cmd->se_lun_node); @@ -1092,7 +1092,6 @@ sense_reason_t target_setup_cmd_from_cdb(struct se_cmd *cmd, unsigned char *cdb) { struct se_device *dev = cmd->se_dev; - unsigned long flags; sense_reason_t ret; /* @@ -1127,6 +1126,8 @@ target_setup_cmd_from_cdb(struct se_cmd *cmd, unsigned char *cdb) */ memcpy(cmd->t_task_cdb, cdb, scsi_command_size(cdb)); + trace_target_sequencer_start(cmd); + /* * Check for an existing UNIT ATTENTION condition */ @@ -1152,9 +1153,7 @@ target_setup_cmd_from_cdb(struct se_cmd *cmd, unsigned char *cdb) if (ret) return ret; - spin_lock_irqsave(&cmd->t_state_lock, flags); cmd->se_cmd_flags |= SCF_SUPPORTED_SAM_OPCODE; - spin_unlock_irqrestore(&cmd->t_state_lock, flags); spin_lock(&cmd->se_lun->lun_sep_lock); if (cmd->se_lun->lun_sep) @@ -1552,7 +1551,8 @@ void transport_generic_request_failure(struct se_cmd *cmd, cmd->orig_fe_lun, 0x2C, ASCQ_2CH_PREVIOUS_RESERVATION_CONFLICT_STATUS); - ret = cmd->se_tfo->queue_status(cmd); + trace_target_cmd_complete(cmd); + ret = cmd->se_tfo-> queue_status(cmd); if (ret == -EAGAIN || ret == -ENOMEM) goto queue_full; goto check_stop; @@ -1583,10 +1583,6 @@ static void __target_execute_cmd(struct se_cmd *cmd) { sense_reason_t ret; - spin_lock_irq(&cmd->t_state_lock); - cmd->transport_state |= (CMD_T_BUSY|CMD_T_SENT); - spin_unlock_irq(&cmd->t_state_lock); - if (cmd->execute_cmd) { ret = cmd->execute_cmd(cmd); if (ret) { @@ -1693,11 +1689,17 @@ void target_execute_cmd(struct se_cmd *cmd) } cmd->t_state = TRANSPORT_PROCESSING; - cmd->transport_state |= CMD_T_ACTIVE; + cmd->transport_state |= CMD_T_ACTIVE|CMD_T_BUSY|CMD_T_SENT; spin_unlock_irq(&cmd->t_state_lock); - if (!target_handle_task_attr(cmd)) - __target_execute_cmd(cmd); + if (target_handle_task_attr(cmd)) { + spin_lock_irq(&cmd->t_state_lock); + cmd->transport_state &= ~CMD_T_BUSY|CMD_T_SENT; + spin_unlock_irq(&cmd->t_state_lock); + return; + } + + __target_execute_cmd(cmd); } EXPORT_SYMBOL(target_execute_cmd); @@ -1770,6 +1772,7 @@ static void transport_complete_qf(struct se_cmd *cmd) transport_complete_task_attr(cmd); if (cmd->se_cmd_flags & SCF_TRANSPORT_TASK_SENSE) { + trace_target_cmd_complete(cmd); ret = cmd->se_tfo->queue_status(cmd); if (ret) goto out; @@ -1777,6 +1780,7 @@ static void transport_complete_qf(struct se_cmd *cmd) switch (cmd->data_direction) { case DMA_FROM_DEVICE: + trace_target_cmd_complete(cmd); ret = cmd->se_tfo->queue_data_in(cmd); break; case DMA_TO_DEVICE: @@ -1787,6 +1791,7 @@ static void transport_complete_qf(struct se_cmd *cmd) } /* Fall through for DMA_TO_DEVICE */ case DMA_NONE: + trace_target_cmd_complete(cmd); ret = cmd->se_tfo->queue_status(cmd); break; default: @@ -1865,6 +1870,7 @@ static void target_complete_ok_work(struct work_struct *work) } spin_unlock(&cmd->se_lun->lun_sep_lock); + trace_target_cmd_complete(cmd); ret = cmd->se_tfo->queue_data_in(cmd); if (ret == -EAGAIN || ret == -ENOMEM) goto queue_full; @@ -1893,6 +1899,7 @@ static void target_complete_ok_work(struct work_struct *work) } /* Fall through for DMA_TO_DEVICE */ case DMA_NONE: + trace_target_cmd_complete(cmd); ret = cmd->se_tfo->queue_status(cmd); if (ret == -EAGAIN || ret == -ENOMEM) goto queue_full; @@ -1956,11 +1963,7 @@ static int transport_release_cmd(struct se_cmd *cmd) * If this cmd has been setup with target_get_sess_cmd(), drop * the kref and call ->release_cmd() in kref callback. */ - if (cmd->check_release != 0) - return target_put_sess_cmd(cmd->se_sess, cmd); - - cmd->se_tfo->release_cmd(cmd); - return 1; + return target_put_sess_cmd(cmd->se_sess, cmd); } /** @@ -1971,21 +1974,6 @@ static int transport_release_cmd(struct se_cmd *cmd) */ static int transport_put_cmd(struct se_cmd *cmd) { - unsigned long flags; - - spin_lock_irqsave(&cmd->t_state_lock, flags); - if (atomic_read(&cmd->t_fe_count) && - !atomic_dec_and_test(&cmd->t_fe_count)) { - spin_unlock_irqrestore(&cmd->t_state_lock, flags); - return 0; - } - - if (cmd->transport_state & CMD_T_DEV_ACTIVE) { - cmd->transport_state &= ~CMD_T_DEV_ACTIVE; - target_remove_from_state_list(cmd); - } - spin_unlock_irqrestore(&cmd->t_state_lock, flags); - transport_free_pages(cmd); return transport_release_cmd(cmd); } @@ -2103,9 +2091,6 @@ transport_generic_new_cmd(struct se_cmd *cmd) if (ret < 0) return TCM_LOGICAL_UNIT_COMMUNICATION_FAILURE; } - - atomic_inc(&cmd->t_fe_count); - /* * If this command is not a write we can execute it right here, * for write buffers we need to notify the fabric driver first @@ -2116,12 +2101,7 @@ transport_generic_new_cmd(struct se_cmd *cmd) target_execute_cmd(cmd); return 0; } - - spin_lock_irq(&cmd->t_state_lock); - cmd->t_state = TRANSPORT_WRITE_PENDING; - spin_unlock_irq(&cmd->t_state_lock); - - transport_cmd_check_stop(cmd, false); + transport_cmd_check_stop(cmd, false, true); ret = cmd->se_tfo->write_pending(cmd); if (ret == -EAGAIN || ret == -ENOMEM) @@ -2202,8 +2182,6 @@ int target_get_sess_cmd(struct se_session *se_sess, struct se_cmd *se_cmd, goto out; } list_add_tail(&se_cmd->se_cmd_list, &se_sess->sess_cmd_list); - se_cmd->check_release = 1; - out: spin_unlock_irqrestore(&se_sess->sess_cmd_lock, flags); return ret; @@ -2319,7 +2297,7 @@ static int transport_lun_wait_for_tasks(struct se_cmd *cmd, struct se_lun *lun) pr_debug("ConfigFS ITT[0x%08x] - CMD_T_STOP, skipping\n", cmd->se_tfo->get_task_tag(cmd)); spin_unlock_irqrestore(&cmd->t_state_lock, flags); - transport_cmd_check_stop(cmd, false); + transport_cmd_check_stop(cmd, false, false); return -EPERM; } cmd->transport_state |= CMD_T_LUN_FE_STOP; @@ -2427,7 +2405,7 @@ check_cond: spin_unlock_irqrestore(&cmd->t_state_lock, cmd_flags); - transport_cmd_check_stop(cmd, false); + transport_cmd_check_stop(cmd, false, false); complete(&cmd->transport_lun_fe_stop_comp); spin_lock_irqsave(&lun->lun_cmd_lock, lun_flags); continue; @@ -2778,6 +2756,7 @@ transport_send_check_condition_and_sense(struct se_cmd *cmd, cmd->scsi_sense_length = TRANSPORT_SENSE_BUFFER; after_reason: + trace_target_cmd_complete(cmd); return cmd->se_tfo->queue_status(cmd); } EXPORT_SYMBOL(transport_send_check_condition_and_sense); @@ -2794,6 +2773,7 @@ int transport_check_aborted_status(struct se_cmd *cmd, int send_status) cmd->t_task_cdb[0], cmd->se_tfo->get_task_tag(cmd)); cmd->se_cmd_flags |= SCF_SENT_DELAYED_TAS; + trace_target_cmd_complete(cmd); cmd->se_tfo->queue_status(cmd); return 1; @@ -2831,6 +2811,7 @@ void transport_send_task_abort(struct se_cmd *cmd) " ITT: 0x%08x\n", cmd->t_task_cdb[0], cmd->se_tfo->get_task_tag(cmd)); + trace_target_cmd_complete(cmd); cmd->se_tfo->queue_status(cmd); } diff --git a/drivers/target/tcm_fc/tcm_fc.h b/drivers/target/tcm_fc/tcm_fc.h index eea6935..0dd54a4 100644 --- a/drivers/target/tcm_fc/tcm_fc.h +++ b/drivers/target/tcm_fc/tcm_fc.h @@ -161,7 +161,7 @@ int ft_write_pending(struct se_cmd *); int ft_write_pending_status(struct se_cmd *); u32 ft_get_task_tag(struct se_cmd *); int ft_get_cmd_state(struct se_cmd *); -int ft_queue_tm_resp(struct se_cmd *); +void ft_queue_tm_resp(struct se_cmd *); /* * other internal functions. diff --git a/drivers/target/tcm_fc/tfc_cmd.c b/drivers/target/tcm_fc/tfc_cmd.c index b406f17..0e5a1cae 100644 --- a/drivers/target/tcm_fc/tfc_cmd.c +++ b/drivers/target/tcm_fc/tfc_cmd.c @@ -394,14 +394,14 @@ static void ft_send_tm(struct ft_cmd *cmd) /* * Send status from completed task management request. */ -int ft_queue_tm_resp(struct se_cmd *se_cmd) +void ft_queue_tm_resp(struct se_cmd *se_cmd) { struct ft_cmd *cmd = container_of(se_cmd, struct ft_cmd, se_cmd); struct se_tmr_req *tmr = se_cmd->se_tmr_req; enum fcp_resp_rsp_codes code; if (cmd->aborted) - return 0; + return; switch (tmr->response) { case TMR_FUNCTION_COMPLETE: code = FCP_TMF_CMPL; @@ -413,10 +413,7 @@ int ft_queue_tm_resp(struct se_cmd *se_cmd) code = FCP_TMF_REJECTED; break; case TMR_TASK_DOES_NOT_EXIST: - case TMR_TASK_STILL_ALLEGIANT: - case TMR_TASK_FAILOVER_NOT_SUPPORTED: case TMR_TASK_MGMT_FUNCTION_NOT_SUPPORTED: - case TMR_FUNCTION_AUTHORIZATION_FAILED: default: code = FCP_TMF_FAILED; break; @@ -424,7 +421,6 @@ int ft_queue_tm_resp(struct se_cmd *se_cmd) pr_debug("tmr fn %d resp %d fcp code %d\n", tmr->function, tmr->response, code); ft_send_resp_code(cmd, code); - return 0; } static void ft_send_work(struct work_struct *work); diff --git a/drivers/thermal/Kconfig b/drivers/thermal/Kconfig index 5e3c025..e988c81 100644 --- a/drivers/thermal/Kconfig +++ b/drivers/thermal/Kconfig @@ -169,4 +169,19 @@ config INTEL_POWERCLAMP enforce idle time which results in more package C-state residency. The user interface is exposed via generic thermal framework. +config X86_PKG_TEMP_THERMAL + tristate "X86 package temperature thermal driver" + depends on X86_THERMAL_VECTOR + select THERMAL_GOV_USER_SPACE + default m + help + Enable this to register CPU digital sensor for package temperature as + thermal zone. Each package will have its own thermal zone. There are + two trip points which can be set by user to get notifications via thermal + notification methods. + +menu "Texas Instruments thermal drivers" +source "drivers/thermal/ti-soc-thermal/Kconfig" +endmenu + endif diff --git a/drivers/thermal/Makefile b/drivers/thermal/Makefile index c054d41..67184a2 100644 --- a/drivers/thermal/Makefile +++ b/drivers/thermal/Makefile @@ -23,4 +23,5 @@ obj-$(CONFIG_DB8500_THERMAL) += db8500_thermal.o obj-$(CONFIG_ARMADA_THERMAL) += armada_thermal.o obj-$(CONFIG_DB8500_CPUFREQ_COOLING) += db8500_cpufreq_cooling.o obj-$(CONFIG_INTEL_POWERCLAMP) += intel_powerclamp.o - +obj-$(CONFIG_X86_PKG_TEMP_THERMAL) += x86_pkg_temp_thermal.o +obj-$(CONFIG_TI_SOC_THERMAL) += ti-soc-thermal/ diff --git a/drivers/thermal/armada_thermal.c b/drivers/thermal/armada_thermal.c index 54ffd64..5e53212 100644 --- a/drivers/thermal/armada_thermal.c +++ b/drivers/thermal/armada_thermal.c @@ -200,7 +200,6 @@ static int armada_thermal_exit(struct platform_device *pdev) platform_get_drvdata(pdev); thermal_zone_device_unregister(armada_thermal); - platform_set_drvdata(pdev, NULL); return 0; } @@ -211,7 +210,7 @@ static struct platform_driver armada_thermal_driver = { .driver = { .name = "armada_thermal", .owner = THIS_MODULE, - .of_match_table = of_match_ptr(armada_thermal_id_table), + .of_match_table = armada_thermal_id_table, }, }; diff --git a/drivers/thermal/cpu_cooling.c b/drivers/thermal/cpu_cooling.c index c94bf2e..82e15db 100644 --- a/drivers/thermal/cpu_cooling.c +++ b/drivers/thermal/cpu_cooling.c @@ -167,7 +167,7 @@ static int get_property(unsigned int cpu, unsigned long input, continue; /* get the frequency order */ - if (freq != CPUFREQ_ENTRY_INVALID && descend != -1) + if (freq != CPUFREQ_ENTRY_INVALID && descend == -1) descend = !!(freq > table[i].frequency); freq = table[i].frequency; diff --git a/drivers/thermal/dove_thermal.c b/drivers/thermal/dove_thermal.c index a088d13..828f5e3 100644 --- a/drivers/thermal/dove_thermal.c +++ b/drivers/thermal/dove_thermal.c @@ -134,16 +134,11 @@ static int dove_thermal_probe(struct platform_device *pdev) struct resource *res; int ret; - res = platform_get_resource(pdev, IORESOURCE_MEM, 0); - if (!res) { - dev_err(&pdev->dev, "Failed to get platform resource\n"); - return -ENODEV; - } - priv = devm_kzalloc(&pdev->dev, sizeof(*priv), GFP_KERNEL); if (!priv) return -ENOMEM; + res = platform_get_resource(pdev, IORESOURCE_MEM, 0); priv->sensor = devm_ioremap_resource(&pdev->dev, res); if (IS_ERR(priv->sensor)) return PTR_ERR(priv->sensor); @@ -178,7 +173,6 @@ static int dove_thermal_exit(struct platform_device *pdev) platform_get_drvdata(pdev); thermal_zone_device_unregister(dove_thermal); - platform_set_drvdata(pdev, NULL); return 0; } @@ -191,7 +185,7 @@ static struct platform_driver dove_thermal_driver = { .driver = { .name = "dove_thermal", .owner = THIS_MODULE, - .of_match_table = of_match_ptr(dove_thermal_id_table), + .of_match_table = dove_thermal_id_table, }, }; diff --git a/drivers/thermal/exynos_thermal.c b/drivers/thermal/exynos_thermal.c index 4cbe3ee..9af4b93 100644 --- a/drivers/thermal/exynos_thermal.c +++ b/drivers/thermal/exynos_thermal.c @@ -997,7 +997,6 @@ static int exynos_tmu_probe(struct platform_device *pdev) return 0; err_clk: - platform_set_drvdata(pdev, NULL); clk_unprepare(data->clk); return ret; } @@ -1012,8 +1011,6 @@ static int exynos_tmu_remove(struct platform_device *pdev) clk_unprepare(data->clk); - platform_set_drvdata(pdev, NULL); - return 0; } diff --git a/drivers/thermal/kirkwood_thermal.c b/drivers/thermal/kirkwood_thermal.c index dfeceaf..3b034a0 100644 --- a/drivers/thermal/kirkwood_thermal.c +++ b/drivers/thermal/kirkwood_thermal.c @@ -75,16 +75,11 @@ static int kirkwood_thermal_probe(struct platform_device *pdev) struct kirkwood_thermal_priv *priv; struct resource *res; - res = platform_get_resource(pdev, IORESOURCE_MEM, 0); - if (!res) { - dev_err(&pdev->dev, "Failed to get platform resource\n"); - return -ENODEV; - } - priv = devm_kzalloc(&pdev->dev, sizeof(*priv), GFP_KERNEL); if (!priv) return -ENOMEM; + res = platform_get_resource(pdev, IORESOURCE_MEM, 0); priv->sensor = devm_ioremap_resource(&pdev->dev, res); if (IS_ERR(priv->sensor)) return PTR_ERR(priv->sensor); @@ -108,7 +103,6 @@ static int kirkwood_thermal_exit(struct platform_device *pdev) platform_get_drvdata(pdev); thermal_zone_device_unregister(kirkwood_thermal); - platform_set_drvdata(pdev, NULL); return 0; } @@ -121,7 +115,7 @@ static struct platform_driver kirkwood_thermal_driver = { .driver = { .name = "kirkwood_thermal", .owner = THIS_MODULE, - .of_match_table = of_match_ptr(kirkwood_thermal_id_table), + .of_match_table = kirkwood_thermal_id_table, }, }; diff --git a/drivers/thermal/rcar_thermal.c b/drivers/thermal/rcar_thermal.c index 8d7edd4..88f92e1 100644 --- a/drivers/thermal/rcar_thermal.c +++ b/drivers/thermal/rcar_thermal.c @@ -389,11 +389,6 @@ static int rcar_thermal_probe(struct platform_device *pdev) * platform has IRQ support. * Then, drier use common register */ - res = platform_get_resource(pdev, IORESOURCE_MEM, mres++); - if (!res) { - dev_err(dev, "Could not get platform resource\n"); - return -ENODEV; - } ret = devm_request_irq(dev, irq->start, rcar_thermal_irq, 0, dev_name(dev), common); @@ -405,6 +400,7 @@ static int rcar_thermal_probe(struct platform_device *pdev) /* * rcar_has_irq_support() will be enabled */ + res = platform_get_resource(pdev, IORESOURCE_MEM, mres++); common->base = devm_ioremap_resource(dev, res); if (IS_ERR(common->base)) return PTR_ERR(common->base); @@ -458,7 +454,7 @@ static int rcar_thermal_probe(struct platform_device *pdev) platform_set_drvdata(pdev, common); - dev_info(dev, "%d sensor proved\n", i); + dev_info(dev, "%d sensor probed\n", i); return 0; @@ -487,8 +483,6 @@ static int rcar_thermal_remove(struct platform_device *pdev) rcar_thermal_irq_disable(priv); } - platform_set_drvdata(pdev, NULL); - pm_runtime_put_sync(dev); pm_runtime_disable(dev); diff --git a/drivers/thermal/spear_thermal.c b/drivers/thermal/spear_thermal.c index 3c5ee56..ab79ea4 100644 --- a/drivers/thermal/spear_thermal.c +++ b/drivers/thermal/spear_thermal.c @@ -104,7 +104,7 @@ static int spear_thermal_probe(struct platform_device *pdev) struct thermal_zone_device *spear_thermal = NULL; struct spear_thermal_dev *stdev; struct device_node *np = pdev->dev.of_node; - struct resource *stres = platform_get_resource(pdev, IORESOURCE_MEM, 0); + struct resource *res; int ret = 0, val; if (!np || !of_property_read_u32(np, "st,thermal-flags", &val)) { @@ -112,11 +112,6 @@ static int spear_thermal_probe(struct platform_device *pdev) return -EINVAL; } - if (!stres) { - dev_err(&pdev->dev, "memory resource missing\n"); - return -ENODEV; - } - stdev = devm_kzalloc(&pdev->dev, sizeof(*stdev), GFP_KERNEL); if (!stdev) { dev_err(&pdev->dev, "kzalloc fail\n"); @@ -124,12 +119,10 @@ static int spear_thermal_probe(struct platform_device *pdev) } /* Enable thermal sensor */ - stdev->thermal_base = devm_ioremap(&pdev->dev, stres->start, - resource_size(stres)); - if (!stdev->thermal_base) { - dev_err(&pdev->dev, "ioremap failed\n"); - return -ENOMEM; - } + res = platform_get_resource(pdev, IORESOURCE_MEM, 0); + stdev->thermal_base = devm_ioremap_resource(&pdev->dev, res); + if (IS_ERR(stdev->thermal_base)) + return PTR_ERR(stdev->thermal_base); stdev->clk = devm_clk_get(&pdev->dev, NULL); if (IS_ERR(stdev->clk)) { @@ -174,7 +167,6 @@ static int spear_thermal_exit(struct platform_device *pdev) struct spear_thermal_dev *stdev = spear_thermal->devdata; thermal_zone_device_unregister(spear_thermal); - platform_set_drvdata(pdev, NULL); /* Disable SPEAr Thermal Sensor */ actual_mask = readl_relaxed(stdev->thermal_base); @@ -198,7 +190,7 @@ static struct platform_driver spear_thermal_driver = { .name = "spear_thermal", .owner = THIS_MODULE, .pm = &spear_thermal_pm_ops, - .of_match_table = of_match_ptr(spear_thermal_id_table), + .of_match_table = spear_thermal_id_table, }, }; diff --git a/drivers/thermal/thermal_core.c b/drivers/thermal/thermal_core.c index d755440..1f02e8e 100644 --- a/drivers/thermal/thermal_core.c +++ b/drivers/thermal/thermal_core.c @@ -33,6 +33,7 @@ #include #include #include +#include #include #include @@ -155,7 +156,8 @@ int get_tz_trend(struct thermal_zone_device *tz, int trip) { enum thermal_trend trend; - if (!tz->ops->get_trend || tz->ops->get_trend(tz, trip, &trend)) { + if (tz->emul_temperature || !tz->ops->get_trend || + tz->ops->get_trend(tz, trip, &trend)) { if (tz->temperature > tz->last_temperature) trend = THERMAL_TREND_RAISING; else if (tz->temperature < tz->last_temperature) @@ -713,10 +715,13 @@ policy_store(struct device *dev, struct device_attribute *attr, int ret = -EINVAL; struct thermal_zone_device *tz = to_thermal_zone(dev); struct thermal_governor *gov; + char name[THERMAL_NAME_LENGTH]; + + snprintf(name, sizeof(name), "%s", buf); mutex_lock(&thermal_governor_lock); - gov = __find_governor(buf); + gov = __find_governor(strim(name)); if (!gov) goto exit; @@ -1624,7 +1629,7 @@ struct thermal_zone_device *thermal_zone_device_register(const char *type, if (!ops || !ops->get_temp) return ERR_PTR(-EINVAL); - if (trips > 0 && !ops->get_trip_type) + if (trips > 0 && (!ops->get_trip_type || !ops->get_trip_temp)) return ERR_PTR(-EINVAL); tz = kzalloc(sizeof(struct thermal_zone_device), GFP_KERNEL); diff --git a/drivers/thermal/ti-soc-thermal/Kconfig b/drivers/thermal/ti-soc-thermal/Kconfig new file mode 100644 index 0000000..bd4c7be --- /dev/null +++ b/drivers/thermal/ti-soc-thermal/Kconfig @@ -0,0 +1,60 @@ +config TI_SOC_THERMAL + tristate "Texas Instruments SoCs temperature sensor driver" + depends on THERMAL + depends on ARCH_HAS_BANDGAP + help + If you say yes here you get support for the Texas Instruments + OMAP4460+ on die bandgap temperature sensor support. The register + set is part of system control module. + + This includes alert interrupts generation and also the TSHUT + support. + +config TI_THERMAL + bool "Texas Instruments SoCs thermal framework support" + depends on TI_SOC_THERMAL + depends on CPU_THERMAL + help + If you say yes here you want to get support for generic thermal + framework for the Texas Instruments on die bandgap temperature sensor. + + This includes trip points definitions, extrapolation rules and + CPU cooling device bindings. + +config OMAP4_THERMAL + bool "Texas Instruments OMAP4 thermal support" + depends on TI_SOC_THERMAL + depends on ARCH_OMAP4 + help + If you say yes here you get thermal support for the Texas Instruments + OMAP4 SoC family. The current chip supported are: + - OMAP4430 + - OMAP4460 + - OMAP4470 + + This includes alert interrupts generation and also the TSHUT + support. + +config OMAP5_THERMAL + bool "Texas Instruments OMAP5 thermal support" + depends on TI_SOC_THERMAL + depends on SOC_OMAP5 + help + If you say yes here you get thermal support for the Texas Instruments + OMAP5 SoC family. The current chip supported are: + - OMAP5430 + + This includes alert interrupts generation and also the TSHUT + support. + +config DRA752_THERMAL + bool "Texas Instruments DRA752 thermal support" + depends on TI_SOC_THERMAL + depends on SOC_DRA7XX + help + If you say yes here you get thermal support for the Texas Instruments + DRA752 SoC family. The current chip supported are: + - DRA752 + + This includes alert interrupts generation and also the TSHUT + support. diff --git a/drivers/thermal/ti-soc-thermal/Makefile b/drivers/thermal/ti-soc-thermal/Makefile new file mode 100644 index 0000000..1226b24 --- /dev/null +++ b/drivers/thermal/ti-soc-thermal/Makefile @@ -0,0 +1,6 @@ +obj-$(CONFIG_TI_SOC_THERMAL) += ti-soc-thermal.o +ti-soc-thermal-y := ti-bandgap.o +ti-soc-thermal-$(CONFIG_TI_THERMAL) += ti-thermal-common.o +ti-soc-thermal-$(CONFIG_DRA752_THERMAL) += dra752-thermal-data.o +ti-soc-thermal-$(CONFIG_OMAP4_THERMAL) += omap4-thermal-data.o +ti-soc-thermal-$(CONFIG_OMAP5_THERMAL) += omap5-thermal-data.o diff --git a/drivers/thermal/ti-soc-thermal/TODO b/drivers/thermal/ti-soc-thermal/TODO new file mode 100644 index 0000000..7da787d --- /dev/null +++ b/drivers/thermal/ti-soc-thermal/TODO @@ -0,0 +1,12 @@ +List of TODOs (by Eduardo Valentin) + +on ti-bandgap.c: +- Revisit PM support + +on ti-thermal-common.c/ti-thermal.h: +- Revisit need for locking + +generally: +- make sure this code works on OMAP4430, OMAP4460 and OMAP5430 + +Copy patches to Eduardo Valentin diff --git a/drivers/thermal/ti-soc-thermal/dra752-bandgap.h b/drivers/thermal/ti-soc-thermal/dra752-bandgap.h new file mode 100644 index 0000000..6b0f2b1 --- /dev/null +++ b/drivers/thermal/ti-soc-thermal/dra752-bandgap.h @@ -0,0 +1,280 @@ +/* + * DRA752 bandgap registers, bitfields and temperature definitions + * + * Copyright (C) 2013 Texas Instruments Incorporated - http://www.ti.com/ + * Contact: + * Eduardo Valentin + * Tero Kristo + * + * This is an auto generated file. + * + * This program is free software; you can redistribute it and/or + * modify it under the terms of the GNU General Public License + * version 2 as published by the Free Software Foundation. + * + * This program is distributed in the hope that it will be useful, but + * WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + * General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with this program; if not, write to the Free Software + * Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA + * 02110-1301 USA + * + */ +#ifndef __DRA752_BANDGAP_H +#define __DRA752_BANDGAP_H + +/** + * *** DRA752 *** + * + * Below, in sequence, are the Register definitions, + * the bitfields and the temperature definitions for DRA752. + */ + +/** + * DRA752 register definitions + * + * Registers are defined as offsets. The offsets are + * relative to FUSE_OPP_BGAP_GPU on DRA752. + * DRA752_BANDGAP_BASE 0x4a0021e0 + * + * Register below are grouped by domain (not necessarily in offset order) + */ + + +/* DRA752.common register offsets */ +#define DRA752_BANDGAP_CTRL_1_OFFSET 0x1a0 +#define DRA752_BANDGAP_STATUS_1_OFFSET 0x1c8 +#define DRA752_BANDGAP_CTRL_2_OFFSET 0x39c +#define DRA752_BANDGAP_STATUS_2_OFFSET 0x3b8 + +/* DRA752.core register offsets */ +#define DRA752_STD_FUSE_OPP_BGAP_CORE_OFFSET 0x8 +#define DRA752_TEMP_SENSOR_CORE_OFFSET 0x154 +#define DRA752_BANDGAP_THRESHOLD_CORE_OFFSET 0x1ac +#define DRA752_BANDGAP_TSHUT_CORE_OFFSET 0x1b8 +#define DRA752_BANDGAP_CUMUL_DTEMP_CORE_OFFSET 0x1c4 +#define DRA752_DTEMP_CORE_0_OFFSET 0x208 +#define DRA752_DTEMP_CORE_1_OFFSET 0x20c +#define DRA752_DTEMP_CORE_2_OFFSET 0x210 +#define DRA752_DTEMP_CORE_3_OFFSET 0x214 +#define DRA752_DTEMP_CORE_4_OFFSET 0x218 + +/* DRA752.iva register offsets */ +#define DRA752_STD_FUSE_OPP_BGAP_IVA_OFFSET 0x388 +#define DRA752_TEMP_SENSOR_IVA_OFFSET 0x398 +#define DRA752_BANDGAP_THRESHOLD_IVA_OFFSET 0x3a4 +#define DRA752_BANDGAP_TSHUT_IVA_OFFSET 0x3ac +#define DRA752_BANDGAP_CUMUL_DTEMP_IVA_OFFSET 0x3b4 +#define DRA752_DTEMP_IVA_0_OFFSET 0x3d0 +#define DRA752_DTEMP_IVA_1_OFFSET 0x3d4 +#define DRA752_DTEMP_IVA_2_OFFSET 0x3d8 +#define DRA752_DTEMP_IVA_3_OFFSET 0x3dc +#define DRA752_DTEMP_IVA_4_OFFSET 0x3e0 + +/* DRA752.mpu register offsets */ +#define DRA752_STD_FUSE_OPP_BGAP_MPU_OFFSET 0x4 +#define DRA752_TEMP_SENSOR_MPU_OFFSET 0x14c +#define DRA752_BANDGAP_THRESHOLD_MPU_OFFSET 0x1a4 +#define DRA752_BANDGAP_TSHUT_MPU_OFFSET 0x1b0 +#define DRA752_BANDGAP_CUMUL_DTEMP_MPU_OFFSET 0x1bc +#define DRA752_DTEMP_MPU_0_OFFSET 0x1e0 +#define DRA752_DTEMP_MPU_1_OFFSET 0x1e4 +#define DRA752_DTEMP_MPU_2_OFFSET 0x1e8 +#define DRA752_DTEMP_MPU_3_OFFSET 0x1ec +#define DRA752_DTEMP_MPU_4_OFFSET 0x1f0 + +/* DRA752.dspeve register offsets */ +#define DRA752_STD_FUSE_OPP_BGAP_DSPEVE_OFFSET 0x384 +#define DRA752_TEMP_SENSOR_DSPEVE_OFFSET 0x394 +#define DRA752_BANDGAP_THRESHOLD_DSPEVE_OFFSET 0x3a0 +#define DRA752_BANDGAP_TSHUT_DSPEVE_OFFSET 0x3a8 +#define DRA752_BANDGAP_CUMUL_DTEMP_DSPEVE_OFFSET 0x3b0 +#define DRA752_DTEMP_DSPEVE_0_OFFSET 0x3bc +#define DRA752_DTEMP_DSPEVE_1_OFFSET 0x3c0 +#define DRA752_DTEMP_DSPEVE_2_OFFSET 0x3c4 +#define DRA752_DTEMP_DSPEVE_3_OFFSET 0x3c8 +#define DRA752_DTEMP_DSPEVE_4_OFFSET 0x3cc + +/* DRA752.gpu register offsets */ +#define DRA752_STD_FUSE_OPP_BGAP_GPU_OFFSET 0x0 +#define DRA752_TEMP_SENSOR_GPU_OFFSET 0x150 +#define DRA752_BANDGAP_THRESHOLD_GPU_OFFSET 0x1a8 +#define DRA752_BANDGAP_TSHUT_GPU_OFFSET 0x1b4 +#define DRA752_BANDGAP_CUMUL_DTEMP_GPU_OFFSET 0x1c0 +#define DRA752_DTEMP_GPU_0_OFFSET 0x1f4 +#define DRA752_DTEMP_GPU_1_OFFSET 0x1f8 +#define DRA752_DTEMP_GPU_2_OFFSET 0x1fc +#define DRA752_DTEMP_GPU_3_OFFSET 0x200 +#define DRA752_DTEMP_GPU_4_OFFSET 0x204 + +/** + * Register bitfields for DRA752 + * + * All the macros bellow define the required bits for + * controlling temperature on DRA752. Bit defines are + * grouped by register. + */ + +/* DRA752.BANDGAP_STATUS_1 */ +#define DRA752_BANDGAP_STATUS_1_ALERT_MASK BIT(31) +#define DRA752_BANDGAP_STATUS_1_HOT_CORE_MASK BIT(5) +#define DRA752_BANDGAP_STATUS_1_COLD_CORE_MASK BIT(4) +#define DRA752_BANDGAP_STATUS_1_HOT_GPU_MASK BIT(3) +#define DRA752_BANDGAP_STATUS_1_COLD_GPU_MASK BIT(2) +#define DRA752_BANDGAP_STATUS_1_HOT_MPU_MASK BIT(1) +#define DRA752_BANDGAP_STATUS_1_COLD_MPU_MASK BIT(0) + +/* DRA752.BANDGAP_CTRL_2 */ +#define DRA752_BANDGAP_CTRL_2_FREEZE_IVA_MASK BIT(22) +#define DRA752_BANDGAP_CTRL_2_FREEZE_DSPEVE_MASK BIT(21) +#define DRA752_BANDGAP_CTRL_2_CLEAR_IVA_MASK BIT(19) +#define DRA752_BANDGAP_CTRL_2_CLEAR_DSPEVE_MASK BIT(18) +#define DRA752_BANDGAP_CTRL_2_CLEAR_ACCUM_IVA_MASK BIT(16) +#define DRA752_BANDGAP_CTRL_2_CLEAR_ACCUM_DSPEVE_MASK BIT(15) +#define DRA752_BANDGAP_CTRL_2_MASK_HOT_IVA_MASK BIT(3) +#define DRA752_BANDGAP_CTRL_2_MASK_COLD_IVA_MASK BIT(2) +#define DRA752_BANDGAP_CTRL_2_MASK_HOT_DSPEVE_MASK BIT(1) +#define DRA752_BANDGAP_CTRL_2_MASK_COLD_DSPEVE_MASK BIT(0) + +/* DRA752.BANDGAP_STATUS_2 */ +#define DRA752_BANDGAP_STATUS_2_HOT_IVA_MASK BIT(3) +#define DRA752_BANDGAP_STATUS_2_COLD_IVA_MASK BIT(2) +#define DRA752_BANDGAP_STATUS_2_HOT_DSPEVE_MASK BIT(1) +#define DRA752_BANDGAP_STATUS_2_COLD_DSPEVE_MASK BIT(0) + +/* DRA752.BANDGAP_CTRL_1 */ +#define DRA752_BANDGAP_CTRL_1_SIDLEMODE_MASK (0x3 << 30) +#define DRA752_BANDGAP_CTRL_1_COUNTER_DELAY_MASK (0x7 << 27) +#define DRA752_BANDGAP_CTRL_1_FREEZE_CORE_MASK BIT(23) +#define DRA752_BANDGAP_CTRL_1_FREEZE_GPU_MASK BIT(22) +#define DRA752_BANDGAP_CTRL_1_FREEZE_MPU_MASK BIT(21) +#define DRA752_BANDGAP_CTRL_1_CLEAR_CORE_MASK BIT(20) +#define DRA752_BANDGAP_CTRL_1_CLEAR_GPU_MASK BIT(19) +#define DRA752_BANDGAP_CTRL_1_CLEAR_MPU_MASK BIT(18) +#define DRA752_BANDGAP_CTRL_1_CLEAR_ACCUM_CORE_MASK BIT(17) +#define DRA752_BANDGAP_CTRL_1_CLEAR_ACCUM_GPU_MASK BIT(16) +#define DRA752_BANDGAP_CTRL_1_CLEAR_ACCUM_MPU_MASK BIT(15) +#define DRA752_BANDGAP_CTRL_1_MASK_HOT_CORE_MASK BIT(5) +#define DRA752_BANDGAP_CTRL_1_MASK_COLD_CORE_MASK BIT(4) +#define DRA752_BANDGAP_CTRL_1_MASK_HOT_GPU_MASK BIT(3) +#define DRA752_BANDGAP_CTRL_1_MASK_COLD_GPU_MASK BIT(2) +#define DRA752_BANDGAP_CTRL_1_MASK_HOT_MPU_MASK BIT(1) +#define DRA752_BANDGAP_CTRL_1_MASK_COLD_MPU_MASK BIT(0) + +/* DRA752.TEMP_SENSOR */ +#define DRA752_TEMP_SENSOR_TMPSOFF_MASK BIT(11) +#define DRA752_TEMP_SENSOR_EOCZ_MASK BIT(10) +#define DRA752_TEMP_SENSOR_DTEMP_MASK (0x3ff << 0) + +/* DRA752.BANDGAP_THRESHOLD */ +#define DRA752_BANDGAP_THRESHOLD_HOT_MASK (0x3ff << 16) +#define DRA752_BANDGAP_THRESHOLD_COLD_MASK (0x3ff << 0) + +/* DRA752.TSHUT_THRESHOLD */ +#define DRA752_TSHUT_THRESHOLD_MUXCTRL_MASK BIT(31) +#define DRA752_TSHUT_THRESHOLD_HOT_MASK (0x3ff << 16) +#define DRA752_TSHUT_THRESHOLD_COLD_MASK (0x3ff << 0) + +/* DRA752.BANDGAP_CUMUL_DTEMP_CORE */ +#define DRA752_BANDGAP_CUMUL_DTEMP_CORE_MASK (0xffffffff << 0) + +/* DRA752.BANDGAP_CUMUL_DTEMP_IVA */ +#define DRA752_BANDGAP_CUMUL_DTEMP_IVA_MASK (0xffffffff << 0) + +/* DRA752.BANDGAP_CUMUL_DTEMP_MPU */ +#define DRA752_BANDGAP_CUMUL_DTEMP_MPU_MASK (0xffffffff << 0) + +/* DRA752.BANDGAP_CUMUL_DTEMP_DSPEVE */ +#define DRA752_BANDGAP_CUMUL_DTEMP_DSPEVE_MASK (0xffffffff << 0) + +/* DRA752.BANDGAP_CUMUL_DTEMP_GPU */ +#define DRA752_BANDGAP_CUMUL_DTEMP_GPU_MASK (0xffffffff << 0) + +/** + * Temperature limits and thresholds for DRA752 + * + * All the macros bellow are definitions for handling the + * ADC conversions and representation of temperature limits + * and thresholds for DRA752. Definitions are grouped + * by temperature domain. + */ + +/* DRA752.common temperature definitions */ +/* ADC conversion table limits */ +#define DRA752_ADC_START_VALUE 540 +#define DRA752_ADC_END_VALUE 945 + +/* DRA752.GPU temperature definitions */ +/* bandgap clock limits */ +#define DRA752_GPU_MAX_FREQ 1500000 +#define DRA752_GPU_MIN_FREQ 1000000 +/* sensor limits */ +#define DRA752_GPU_MIN_TEMP -40000 +#define DRA752_GPU_MAX_TEMP 125000 +#define DRA752_GPU_HYST_VAL 5000 +/* interrupts thresholds */ +#define DRA752_GPU_TSHUT_HOT 915 +#define DRA752_GPU_TSHUT_COLD 900 +#define DRA752_GPU_T_HOT 800 +#define DRA752_GPU_T_COLD 795 + +/* DRA752.MPU temperature definitions */ +/* bandgap clock limits */ +#define DRA752_MPU_MAX_FREQ 1500000 +#define DRA752_MPU_MIN_FREQ 1000000 +/* sensor limits */ +#define DRA752_MPU_MIN_TEMP -40000 +#define DRA752_MPU_MAX_TEMP 125000 +#define DRA752_MPU_HYST_VAL 5000 +/* interrupts thresholds */ +#define DRA752_MPU_TSHUT_HOT 915 +#define DRA752_MPU_TSHUT_COLD 900 +#define DRA752_MPU_T_HOT 800 +#define DRA752_MPU_T_COLD 795 + +/* DRA752.CORE temperature definitions */ +/* bandgap clock limits */ +#define DRA752_CORE_MAX_FREQ 1500000 +#define DRA752_CORE_MIN_FREQ 1000000 +/* sensor limits */ +#define DRA752_CORE_MIN_TEMP -40000 +#define DRA752_CORE_MAX_TEMP 125000 +#define DRA752_CORE_HYST_VAL 5000 +/* interrupts thresholds */ +#define DRA752_CORE_TSHUT_HOT 915 +#define DRA752_CORE_TSHUT_COLD 900 +#define DRA752_CORE_T_HOT 800 +#define DRA752_CORE_T_COLD 795 + +/* DRA752.DSPEVE temperature definitions */ +/* bandgap clock limits */ +#define DRA752_DSPEVE_MAX_FREQ 1500000 +#define DRA752_DSPEVE_MIN_FREQ 1000000 +/* sensor limits */ +#define DRA752_DSPEVE_MIN_TEMP -40000 +#define DRA752_DSPEVE_MAX_TEMP 125000 +#define DRA752_DSPEVE_HYST_VAL 5000 +/* interrupts thresholds */ +#define DRA752_DSPEVE_TSHUT_HOT 915 +#define DRA752_DSPEVE_TSHUT_COLD 900 +#define DRA752_DSPEVE_T_HOT 800 +#define DRA752_DSPEVE_T_COLD 795 + +/* DRA752.IVA temperature definitions */ +/* bandgap clock limits */ +#define DRA752_IVA_MAX_FREQ 1500000 +#define DRA752_IVA_MIN_FREQ 1000000 +/* sensor limits */ +#define DRA752_IVA_MIN_TEMP -40000 +#define DRA752_IVA_MAX_TEMP 125000 +#define DRA752_IVA_HYST_VAL 5000 +/* interrupts thresholds */ +#define DRA752_IVA_TSHUT_HOT 915 +#define DRA752_IVA_TSHUT_COLD 900 +#define DRA752_IVA_T_HOT 800 +#define DRA752_IVA_T_COLD 795 + +#endif /* __DRA752_BANDGAP_H */ diff --git a/drivers/thermal/ti-soc-thermal/dra752-thermal-data.c b/drivers/thermal/ti-soc-thermal/dra752-thermal-data.c new file mode 100644 index 0000000..e5d8326 --- /dev/null +++ b/drivers/thermal/ti-soc-thermal/dra752-thermal-data.c @@ -0,0 +1,476 @@ +/* + * DRA752 thermal data. + * + * Copyright (C) 2013 Texas Instruments Inc. + * Contact: + * Eduardo Valentin + * Tero Kristo + * + * This file is partially autogenerated. + * + * This software is licensed under the terms of the GNU General Public + * License version 2, as published by the Free Software Foundation, and + * may be copied, distributed, and modified under those terms. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + */ + +#include "ti-thermal.h" +#include "ti-bandgap.h" +#include "dra752-bandgap.h" + +/* + * DRA752 has five instances of thermal sensor: MPU, GPU, CORE, + * IVA and DSPEVE need to describe the individual registers and + * bit fields. + */ + +/* + * DRA752 CORE thermal sensor register offsets and bit-fields + */ +static struct temp_sensor_registers +dra752_core_temp_sensor_registers = { + .temp_sensor_ctrl = DRA752_TEMP_SENSOR_CORE_OFFSET, + .bgap_tempsoff_mask = DRA752_TEMP_SENSOR_TMPSOFF_MASK, + .bgap_eocz_mask = DRA752_TEMP_SENSOR_EOCZ_MASK, + .bgap_dtemp_mask = DRA752_TEMP_SENSOR_DTEMP_MASK, + .bgap_mask_ctrl = DRA752_BANDGAP_CTRL_1_OFFSET, + .mask_hot_mask = DRA752_BANDGAP_CTRL_1_MASK_HOT_CORE_MASK, + .mask_cold_mask = DRA752_BANDGAP_CTRL_1_MASK_COLD_CORE_MASK, + .mask_sidlemode_mask = DRA752_BANDGAP_CTRL_1_SIDLEMODE_MASK, + .mask_freeze_mask = DRA752_BANDGAP_CTRL_1_FREEZE_CORE_MASK, + .mask_clear_mask = DRA752_BANDGAP_CTRL_1_CLEAR_CORE_MASK, + .mask_clear_accum_mask = DRA752_BANDGAP_CTRL_1_CLEAR_ACCUM_CORE_MASK, + .bgap_threshold = DRA752_BANDGAP_THRESHOLD_CORE_OFFSET, + .threshold_thot_mask = DRA752_BANDGAP_THRESHOLD_HOT_MASK, + .threshold_tcold_mask = DRA752_BANDGAP_THRESHOLD_COLD_MASK, + .tshut_threshold = DRA752_BANDGAP_TSHUT_CORE_OFFSET, + .tshut_hot_mask = DRA752_TSHUT_THRESHOLD_HOT_MASK, + .tshut_cold_mask = DRA752_TSHUT_THRESHOLD_COLD_MASK, + .bgap_status = DRA752_BANDGAP_STATUS_1_OFFSET, + .status_bgap_alert_mask = DRA752_BANDGAP_STATUS_1_ALERT_MASK, + .status_hot_mask = DRA752_BANDGAP_STATUS_1_HOT_CORE_MASK, + .status_cold_mask = DRA752_BANDGAP_STATUS_1_COLD_CORE_MASK, + .bgap_cumul_dtemp = DRA752_BANDGAP_CUMUL_DTEMP_CORE_OFFSET, + .ctrl_dtemp_0 = DRA752_DTEMP_CORE_0_OFFSET, + .ctrl_dtemp_1 = DRA752_DTEMP_CORE_1_OFFSET, + .ctrl_dtemp_2 = DRA752_DTEMP_CORE_2_OFFSET, + .ctrl_dtemp_3 = DRA752_DTEMP_CORE_3_OFFSET, + .ctrl_dtemp_4 = DRA752_DTEMP_CORE_4_OFFSET, + .bgap_efuse = DRA752_STD_FUSE_OPP_BGAP_CORE_OFFSET, +}; + +/* + * DRA752 IVA thermal sensor register offsets and bit-fields + */ +static struct temp_sensor_registers +dra752_iva_temp_sensor_registers = { + .temp_sensor_ctrl = DRA752_TEMP_SENSOR_IVA_OFFSET, + .bgap_tempsoff_mask = DRA752_TEMP_SENSOR_TMPSOFF_MASK, + .bgap_eocz_mask = DRA752_TEMP_SENSOR_EOCZ_MASK, + .bgap_dtemp_mask = DRA752_TEMP_SENSOR_DTEMP_MASK, + .bgap_mask_ctrl = DRA752_BANDGAP_CTRL_2_OFFSET, + .mask_hot_mask = DRA752_BANDGAP_CTRL_2_MASK_HOT_IVA_MASK, + .mask_cold_mask = DRA752_BANDGAP_CTRL_2_MASK_COLD_IVA_MASK, + .mask_sidlemode_mask = DRA752_BANDGAP_CTRL_1_SIDLEMODE_MASK, + .mask_freeze_mask = DRA752_BANDGAP_CTRL_2_FREEZE_IVA_MASK, + .mask_clear_mask = DRA752_BANDGAP_CTRL_2_CLEAR_IVA_MASK, + .mask_clear_accum_mask = DRA752_BANDGAP_CTRL_2_CLEAR_ACCUM_IVA_MASK, + .bgap_threshold = DRA752_BANDGAP_THRESHOLD_IVA_OFFSET, + .threshold_thot_mask = DRA752_BANDGAP_THRESHOLD_HOT_MASK, + .threshold_tcold_mask = DRA752_BANDGAP_THRESHOLD_COLD_MASK, + .tshut_threshold = DRA752_BANDGAP_TSHUT_IVA_OFFSET, + .tshut_hot_mask = DRA752_TSHUT_THRESHOLD_HOT_MASK, + .tshut_cold_mask = DRA752_TSHUT_THRESHOLD_COLD_MASK, + .bgap_status = DRA752_BANDGAP_STATUS_2_OFFSET, + .status_bgap_alert_mask = DRA752_BANDGAP_STATUS_1_ALERT_MASK, + .status_hot_mask = DRA752_BANDGAP_STATUS_2_HOT_IVA_MASK, + .status_cold_mask = DRA752_BANDGAP_STATUS_2_COLD_IVA_MASK, + .bgap_cumul_dtemp = DRA752_BANDGAP_CUMUL_DTEMP_IVA_OFFSET, + .ctrl_dtemp_0 = DRA752_DTEMP_IVA_0_OFFSET, + .ctrl_dtemp_1 = DRA752_DTEMP_IVA_1_OFFSET, + .ctrl_dtemp_2 = DRA752_DTEMP_IVA_2_OFFSET, + .ctrl_dtemp_3 = DRA752_DTEMP_IVA_3_OFFSET, + .ctrl_dtemp_4 = DRA752_DTEMP_IVA_4_OFFSET, + .bgap_efuse = DRA752_STD_FUSE_OPP_BGAP_IVA_OFFSET, +}; + +/* + * DRA752 MPU thermal sensor register offsets and bit-fields + */ +static struct temp_sensor_registers +dra752_mpu_temp_sensor_registers = { + .temp_sensor_ctrl = DRA752_TEMP_SENSOR_MPU_OFFSET, + .bgap_tempsoff_mask = DRA752_TEMP_SENSOR_TMPSOFF_MASK, + .bgap_eocz_mask = DRA752_TEMP_SENSOR_EOCZ_MASK, + .bgap_dtemp_mask = DRA752_TEMP_SENSOR_DTEMP_MASK, + .bgap_mask_ctrl = DRA752_BANDGAP_CTRL_1_OFFSET, + .mask_hot_mask = DRA752_BANDGAP_CTRL_1_MASK_HOT_MPU_MASK, + .mask_cold_mask = DRA752_BANDGAP_CTRL_1_MASK_COLD_MPU_MASK, + .mask_sidlemode_mask = DRA752_BANDGAP_CTRL_1_SIDLEMODE_MASK, + .mask_freeze_mask = DRA752_BANDGAP_CTRL_1_FREEZE_MPU_MASK, + .mask_clear_mask = DRA752_BANDGAP_CTRL_1_CLEAR_MPU_MASK, + .mask_clear_accum_mask = DRA752_BANDGAP_CTRL_1_CLEAR_ACCUM_MPU_MASK, + .bgap_threshold = DRA752_BANDGAP_THRESHOLD_MPU_OFFSET, + .threshold_thot_mask = DRA752_BANDGAP_THRESHOLD_HOT_MASK, + .threshold_tcold_mask = DRA752_BANDGAP_THRESHOLD_COLD_MASK, + .tshut_threshold = DRA752_BANDGAP_TSHUT_MPU_OFFSET, + .tshut_hot_mask = DRA752_TSHUT_THRESHOLD_HOT_MASK, + .tshut_cold_mask = DRA752_TSHUT_THRESHOLD_COLD_MASK, + .bgap_status = DRA752_BANDGAP_STATUS_1_OFFSET, + .status_bgap_alert_mask = DRA752_BANDGAP_STATUS_1_ALERT_MASK, + .status_hot_mask = DRA752_BANDGAP_STATUS_1_HOT_MPU_MASK, + .status_cold_mask = DRA752_BANDGAP_STATUS_1_COLD_MPU_MASK, + .bgap_cumul_dtemp = DRA752_BANDGAP_CUMUL_DTEMP_MPU_OFFSET, + .ctrl_dtemp_0 = DRA752_DTEMP_MPU_0_OFFSET, + .ctrl_dtemp_1 = DRA752_DTEMP_MPU_1_OFFSET, + .ctrl_dtemp_2 = DRA752_DTEMP_MPU_2_OFFSET, + .ctrl_dtemp_3 = DRA752_DTEMP_MPU_3_OFFSET, + .ctrl_dtemp_4 = DRA752_DTEMP_MPU_4_OFFSET, + .bgap_efuse = DRA752_STD_FUSE_OPP_BGAP_MPU_OFFSET, +}; + +/* + * DRA752 DSPEVE thermal sensor register offsets and bit-fields + */ +static struct temp_sensor_registers +dra752_dspeve_temp_sensor_registers = { + .temp_sensor_ctrl = DRA752_TEMP_SENSOR_DSPEVE_OFFSET, + .bgap_tempsoff_mask = DRA752_TEMP_SENSOR_TMPSOFF_MASK, + .bgap_eocz_mask = DRA752_TEMP_SENSOR_EOCZ_MASK, + .bgap_dtemp_mask = DRA752_TEMP_SENSOR_DTEMP_MASK, + .bgap_mask_ctrl = DRA752_BANDGAP_CTRL_2_OFFSET, + .mask_hot_mask = DRA752_BANDGAP_CTRL_2_MASK_HOT_DSPEVE_MASK, + .mask_cold_mask = DRA752_BANDGAP_CTRL_2_MASK_COLD_DSPEVE_MASK, + .mask_sidlemode_mask = DRA752_BANDGAP_CTRL_1_SIDLEMODE_MASK, + .mask_freeze_mask = DRA752_BANDGAP_CTRL_2_FREEZE_DSPEVE_MASK, + .mask_clear_mask = DRA752_BANDGAP_CTRL_2_CLEAR_DSPEVE_MASK, + .mask_clear_accum_mask = DRA752_BANDGAP_CTRL_2_CLEAR_ACCUM_DSPEVE_MASK, + .bgap_threshold = DRA752_BANDGAP_THRESHOLD_DSPEVE_OFFSET, + .threshold_thot_mask = DRA752_BANDGAP_THRESHOLD_HOT_MASK, + .threshold_tcold_mask = DRA752_BANDGAP_THRESHOLD_COLD_MASK, + .tshut_threshold = DRA752_BANDGAP_TSHUT_DSPEVE_OFFSET, + .tshut_hot_mask = DRA752_TSHUT_THRESHOLD_HOT_MASK, + .tshut_cold_mask = DRA752_TSHUT_THRESHOLD_COLD_MASK, + .bgap_status = DRA752_BANDGAP_STATUS_2_OFFSET, + .status_bgap_alert_mask = DRA752_BANDGAP_STATUS_1_ALERT_MASK, + .status_hot_mask = DRA752_BANDGAP_STATUS_2_HOT_DSPEVE_MASK, + .status_cold_mask = DRA752_BANDGAP_STATUS_2_COLD_DSPEVE_MASK, + .bgap_cumul_dtemp = DRA752_BANDGAP_CUMUL_DTEMP_DSPEVE_OFFSET, + .ctrl_dtemp_0 = DRA752_DTEMP_DSPEVE_0_OFFSET, + .ctrl_dtemp_1 = DRA752_DTEMP_DSPEVE_1_OFFSET, + .ctrl_dtemp_2 = DRA752_DTEMP_DSPEVE_2_OFFSET, + .ctrl_dtemp_3 = DRA752_DTEMP_DSPEVE_3_OFFSET, + .ctrl_dtemp_4 = DRA752_DTEMP_DSPEVE_4_OFFSET, + .bgap_efuse = DRA752_STD_FUSE_OPP_BGAP_DSPEVE_OFFSET, +}; + +/* + * DRA752 GPU thermal sensor register offsets and bit-fields + */ +static struct temp_sensor_registers +dra752_gpu_temp_sensor_registers = { + .temp_sensor_ctrl = DRA752_TEMP_SENSOR_GPU_OFFSET, + .bgap_tempsoff_mask = DRA752_TEMP_SENSOR_TMPSOFF_MASK, + .bgap_eocz_mask = DRA752_TEMP_SENSOR_EOCZ_MASK, + .bgap_dtemp_mask = DRA752_TEMP_SENSOR_DTEMP_MASK, + .bgap_mask_ctrl = DRA752_BANDGAP_CTRL_1_OFFSET, + .mask_hot_mask = DRA752_BANDGAP_CTRL_1_MASK_HOT_GPU_MASK, + .mask_cold_mask = DRA752_BANDGAP_CTRL_1_MASK_COLD_GPU_MASK, + .mask_sidlemode_mask = DRA752_BANDGAP_CTRL_1_SIDLEMODE_MASK, + .mask_freeze_mask = DRA752_BANDGAP_CTRL_1_FREEZE_GPU_MASK, + .mask_clear_mask = DRA752_BANDGAP_CTRL_1_CLEAR_GPU_MASK, + .mask_clear_accum_mask = DRA752_BANDGAP_CTRL_1_CLEAR_ACCUM_GPU_MASK, + .bgap_threshold = DRA752_BANDGAP_THRESHOLD_GPU_OFFSET, + .threshold_thot_mask = DRA752_BANDGAP_THRESHOLD_HOT_MASK, + .threshold_tcold_mask = DRA752_BANDGAP_THRESHOLD_COLD_MASK, + .tshut_threshold = DRA752_BANDGAP_TSHUT_GPU_OFFSET, + .tshut_hot_mask = DRA752_TSHUT_THRESHOLD_HOT_MASK, + .tshut_cold_mask = DRA752_TSHUT_THRESHOLD_COLD_MASK, + .bgap_status = DRA752_BANDGAP_STATUS_1_OFFSET, + .status_bgap_alert_mask = DRA752_BANDGAP_STATUS_1_ALERT_MASK, + .status_hot_mask = DRA752_BANDGAP_STATUS_1_HOT_GPU_MASK, + .status_cold_mask = DRA752_BANDGAP_STATUS_1_COLD_GPU_MASK, + .bgap_cumul_dtemp = DRA752_BANDGAP_CUMUL_DTEMP_GPU_OFFSET, + .ctrl_dtemp_0 = DRA752_DTEMP_GPU_0_OFFSET, + .ctrl_dtemp_1 = DRA752_DTEMP_GPU_1_OFFSET, + .ctrl_dtemp_2 = DRA752_DTEMP_GPU_2_OFFSET, + .ctrl_dtemp_3 = DRA752_DTEMP_GPU_3_OFFSET, + .ctrl_dtemp_4 = DRA752_DTEMP_GPU_4_OFFSET, + .bgap_efuse = DRA752_STD_FUSE_OPP_BGAP_GPU_OFFSET, +}; + +/* Thresholds and limits for DRA752 MPU temperature sensor */ +static struct temp_sensor_data dra752_mpu_temp_sensor_data = { + .tshut_hot = DRA752_MPU_TSHUT_HOT, + .tshut_cold = DRA752_MPU_TSHUT_COLD, + .t_hot = DRA752_MPU_T_HOT, + .t_cold = DRA752_MPU_T_COLD, + .min_freq = DRA752_MPU_MIN_FREQ, + .max_freq = DRA752_MPU_MAX_FREQ, + .max_temp = DRA752_MPU_MAX_TEMP, + .min_temp = DRA752_MPU_MIN_TEMP, + .hyst_val = DRA752_MPU_HYST_VAL, + .update_int1 = 1000, + .update_int2 = 2000, +}; + +/* Thresholds and limits for DRA752 GPU temperature sensor */ +static struct temp_sensor_data dra752_gpu_temp_sensor_data = { + .tshut_hot = DRA752_GPU_TSHUT_HOT, + .tshut_cold = DRA752_GPU_TSHUT_COLD, + .t_hot = DRA752_GPU_T_HOT, + .t_cold = DRA752_GPU_T_COLD, + .min_freq = DRA752_GPU_MIN_FREQ, + .max_freq = DRA752_GPU_MAX_FREQ, + .max_temp = DRA752_GPU_MAX_TEMP, + .min_temp = DRA752_GPU_MIN_TEMP, + .hyst_val = DRA752_GPU_HYST_VAL, + .update_int1 = 1000, + .update_int2 = 2000, +}; + +/* Thresholds and limits for DRA752 CORE temperature sensor */ +static struct temp_sensor_data dra752_core_temp_sensor_data = { + .tshut_hot = DRA752_CORE_TSHUT_HOT, + .tshut_cold = DRA752_CORE_TSHUT_COLD, + .t_hot = DRA752_CORE_T_HOT, + .t_cold = DRA752_CORE_T_COLD, + .min_freq = DRA752_CORE_MIN_FREQ, + .max_freq = DRA752_CORE_MAX_FREQ, + .max_temp = DRA752_CORE_MAX_TEMP, + .min_temp = DRA752_CORE_MIN_TEMP, + .hyst_val = DRA752_CORE_HYST_VAL, + .update_int1 = 1000, + .update_int2 = 2000, +}; + +/* Thresholds and limits for DRA752 DSPEVE temperature sensor */ +static struct temp_sensor_data dra752_dspeve_temp_sensor_data = { + .tshut_hot = DRA752_DSPEVE_TSHUT_HOT, + .tshut_cold = DRA752_DSPEVE_TSHUT_COLD, + .t_hot = DRA752_DSPEVE_T_HOT, + .t_cold = DRA752_DSPEVE_T_COLD, + .min_freq = DRA752_DSPEVE_MIN_FREQ, + .max_freq = DRA752_DSPEVE_MAX_FREQ, + .max_temp = DRA752_DSPEVE_MAX_TEMP, + .min_temp = DRA752_DSPEVE_MIN_TEMP, + .hyst_val = DRA752_DSPEVE_HYST_VAL, + .update_int1 = 1000, + .update_int2 = 2000, +}; + +/* Thresholds and limits for DRA752 IVA temperature sensor */ +static struct temp_sensor_data dra752_iva_temp_sensor_data = { + .tshut_hot = DRA752_IVA_TSHUT_HOT, + .tshut_cold = DRA752_IVA_TSHUT_COLD, + .t_hot = DRA752_IVA_T_HOT, + .t_cold = DRA752_IVA_T_COLD, + .min_freq = DRA752_IVA_MIN_FREQ, + .max_freq = DRA752_IVA_MAX_FREQ, + .max_temp = DRA752_IVA_MAX_TEMP, + .min_temp = DRA752_IVA_MIN_TEMP, + .hyst_val = DRA752_IVA_HYST_VAL, + .update_int1 = 1000, + .update_int2 = 2000, +}; + +/* + * DRA752 : Temperature values in milli degree celsius + * ADC code values from 540 to 945 + */ +static +int dra752_adc_to_temp[DRA752_ADC_END_VALUE - DRA752_ADC_START_VALUE + 1] = { + /* Index 540 - 549 */ + -40000, -40000, -40000, -40000, -39800, -39400, -39000, -38600, -38200, + -37800, + /* Index 550 - 559 */ + -37400, -37000, -36600, -36200, -35800, -35300, -34700, -34200, -33800, + -33400, + /* Index 560 - 569 */ + -33000, -32600, -32200, -31800, -31400, -31000, -30600, -30200, -29800, + -29400, + /* Index 570 - 579 */ + -29000, -28600, -28200, -27700, -27100, -26600, -26200, -25800, -25400, + -25000, + /* Index 580 - 589 */ + -24600, -24200, -23800, -23400, -23000, -22600, -22200, -21800, -21400, + -21000, + /* Index 590 - 599 */ + -20500, -19900, -19400, -19000, -18600, -18200, -17800, -17400, -17000, + -16600, + /* Index 600 - 609 */ + -16200, -15800, -15400, -15000, -14600, -14200, -13800, -13400, -13000, + -12500, + /* Index 610 - 619 */ + -11900, -11400, -11000, -10600, -10200, -9800, -9400, -9000, -8600, + -8200, + /* Index 620 - 629 */ + -7800, -7400, -7000, -6600, -6200, -5800, -5400, -5000, -4500, + -3900, + /* Index 630 - 639 */ + -3400, -3000, -2600, -2200, -1800, -1400, -1000, -600, -200, + 200, + /* Index 640 - 649 */ + 600, 1000, 1400, 1800, 2200, 2600, 3000, 3400, 3900, + 4500, + /* Index 650 - 659 */ + 5000, 5400, 5800, 6200, 6600, 7000, 7400, 7800, 8200, + 8600, + /* Index 660 - 669 */ + 9000, 9400, 9800, 10200, 10600, 11000, 11400, 11800, 12200, + 12700, + /* Index 670 - 679 */ + 13300, 13800, 14200, 14600, 15000, 15400, 15800, 16200, 16600, + 17000, + /* Index 680 - 689 */ + 17400, 17800, 18200, 18600, 19000, 19400, 19800, 20200, 20600, + 21000, + /* Index 690 - 699 */ + 21400, 21900, 22500, 23000, 23400, 23800, 24200, 24600, 25000, + 25400, + /* Index 700 - 709 */ + 25800, 26200, 26600, 27000, 27400, 27800, 28200, 28600, 29000, + 29400, + /* Index 710 - 719 */ + 29800, 30200, 30600, 31000, 31400, 31900, 32500, 33000, 33400, + 33800, + /* Index 720 - 729 */ + 34200, 34600, 35000, 35400, 35800, 36200, 36600, 37000, 37400, + 37800, + /* Index 730 - 739 */ + 38200, 38600, 39000, 39400, 39800, 40200, 40600, 41000, 41400, + 41800, + /* Index 740 - 749 */ + 42200, 42600, 43100, 43700, 44200, 44600, 45000, 45400, 45800, + 46200, + /* Index 750 - 759 */ + 46600, 47000, 47400, 47800, 48200, 48600, 49000, 49400, 49800, + 50200, + /* Index 760 - 769 */ + 50600, 51000, 51400, 51800, 52200, 52600, 53000, 53400, 53800, + 54200, + /* Index 770 - 779 */ + 54600, 55000, 55400, 55900, 56500, 57000, 57400, 57800, 58200, + 58600, + /* Index 780 - 789 */ + 59000, 59400, 59800, 60200, 60600, 61000, 61400, 61800, 62200, + 62600, + /* Index 790 - 799 */ + 63000, 63400, 63800, 64200, 64600, 65000, 65400, 65800, 66200, + 66600, + /* Index 800 - 809 */ + 67000, 67400, 67800, 68200, 68600, 69000, 69400, 69800, 70200, + 70600, + /* Index 810 - 819 */ + 71000, 71500, 72100, 72600, 73000, 73400, 73800, 74200, 74600, + 75000, + /* Index 820 - 829 */ + 75400, 75800, 76200, 76600, 77000, 77400, 77800, 78200, 78600, + 79000, + /* Index 830 - 839 */ + 79400, 79800, 80200, 80600, 81000, 81400, 81800, 82200, 82600, + 83000, + /* Index 840 - 849 */ + 83400, 83800, 84200, 84600, 85000, 85400, 85800, 86200, 86600, + 87000, + /* Index 850 - 859 */ + 87400, 87800, 88200, 88600, 89000, 89400, 89800, 90200, 90600, + 91000, + /* Index 860 - 869 */ + 91400, 91800, 92200, 92600, 93000, 93400, 93800, 94200, 94600, + 95000, + /* Index 870 - 879 */ + 95400, 95800, 96200, 96600, 97000, 97500, 98100, 98600, 99000, + 99400, + /* Index 880 - 889 */ + 99800, 100200, 100600, 101000, 101400, 101800, 102200, 102600, 103000, + 103400, + /* Index 890 - 899 */ + 103800, 104200, 104600, 105000, 105400, 105800, 106200, 106600, 107000, + 107400, + /* Index 900 - 909 */ + 107800, 108200, 108600, 109000, 109400, 109800, 110200, 110600, 111000, + 111400, + /* Index 910 - 919 */ + 111800, 112200, 112600, 113000, 113400, 113800, 114200, 114600, 115000, + 115400, + /* Index 920 - 929 */ + 115800, 116200, 116600, 117000, 117400, 117800, 118200, 118600, 119000, + 119400, + /* Index 930 - 939 */ + 119800, 120200, 120600, 121000, 121400, 121800, 122200, 122600, 123000, + 123400, + /* Index 940 - 945 */ + 123800, 124200, 124600, 124900, 125000, 125000, +}; + +/* DRA752 data */ +const struct ti_bandgap_data dra752_data = { + .features = TI_BANDGAP_FEATURE_TSHUT_CONFIG | + TI_BANDGAP_FEATURE_FREEZE_BIT | + TI_BANDGAP_FEATURE_TALERT | + TI_BANDGAP_FEATURE_COUNTER_DELAY | + TI_BANDGAP_FEATURE_HISTORY_BUFFER, + .fclock_name = "l3instr_ts_gclk_div", + .div_ck_name = "l3instr_ts_gclk_div", + .conv_table = dra752_adc_to_temp, + .adc_start_val = DRA752_ADC_START_VALUE, + .adc_end_val = DRA752_ADC_END_VALUE, + .expose_sensor = ti_thermal_expose_sensor, + .remove_sensor = ti_thermal_remove_sensor, + .sensors = { + { + .registers = &dra752_mpu_temp_sensor_registers, + .ts_data = &dra752_mpu_temp_sensor_data, + .domain = "cpu", + .register_cooling = ti_thermal_register_cpu_cooling, + .unregister_cooling = ti_thermal_unregister_cpu_cooling, + .slope = DRA752_GRADIENT_SLOPE, + .constant = DRA752_GRADIENT_CONST, + .slope_pcb = DRA752_GRADIENT_SLOPE_W_PCB, + .constant_pcb = DRA752_GRADIENT_CONST_W_PCB, + }, + { + .registers = &dra752_gpu_temp_sensor_registers, + .ts_data = &dra752_gpu_temp_sensor_data, + .domain = "gpu", + .slope = DRA752_GRADIENT_SLOPE, + .constant = DRA752_GRADIENT_CONST, + .slope_pcb = DRA752_GRADIENT_SLOPE_W_PCB, + .constant_pcb = DRA752_GRADIENT_CONST_W_PCB, + }, + { + .registers = &dra752_core_temp_sensor_registers, + .ts_data = &dra752_core_temp_sensor_data, + .domain = "core", + .slope = DRA752_GRADIENT_SLOPE, + .constant = DRA752_GRADIENT_CONST, + .slope_pcb = DRA752_GRADIENT_SLOPE_W_PCB, + .constant_pcb = DRA752_GRADIENT_CONST_W_PCB, + }, + { + .registers = &dra752_dspeve_temp_sensor_registers, + .ts_data = &dra752_dspeve_temp_sensor_data, + .domain = "dspeve", + .slope = DRA752_GRADIENT_SLOPE, + .constant = DRA752_GRADIENT_CONST, + .slope_pcb = DRA752_GRADIENT_SLOPE_W_PCB, + .constant_pcb = DRA752_GRADIENT_CONST_W_PCB, + }, + { + .registers = &dra752_iva_temp_sensor_registers, + .ts_data = &dra752_iva_temp_sensor_data, + .domain = "iva", + .slope = DRA752_GRADIENT_SLOPE, + .constant = DRA752_GRADIENT_CONST, + .slope_pcb = DRA752_GRADIENT_SLOPE_W_PCB, + .constant_pcb = DRA752_GRADIENT_CONST_W_PCB, + }, + }, + .sensor_count = 5, +}; diff --git a/drivers/thermal/ti-soc-thermal/omap4-thermal-data.c b/drivers/thermal/ti-soc-thermal/omap4-thermal-data.c new file mode 100644 index 0000000..d255d33 --- /dev/null +++ b/drivers/thermal/ti-soc-thermal/omap4-thermal-data.c @@ -0,0 +1,267 @@ +/* + * OMAP4 thermal driver. + * + * Copyright (C) 2011-2012 Texas Instruments Inc. + * Contact: + * Eduardo Valentin + * + * This software is licensed under the terms of the GNU General Public + * License version 2, as published by the Free Software Foundation, and + * may be copied, distributed, and modified under those terms. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + */ + +#include "ti-thermal.h" +#include "ti-bandgap.h" +#include "omap4xxx-bandgap.h" + +/* + * OMAP4430 has one instance of thermal sensor for MPU + * need to describe the individual bit fields + */ +static struct temp_sensor_registers +omap4430_mpu_temp_sensor_registers = { + .temp_sensor_ctrl = OMAP4430_TEMP_SENSOR_CTRL_OFFSET, + .bgap_tempsoff_mask = OMAP4430_BGAP_TEMPSOFF_MASK, + .bgap_soc_mask = OMAP4430_BGAP_TEMP_SENSOR_SOC_MASK, + .bgap_eocz_mask = OMAP4430_BGAP_TEMP_SENSOR_EOCZ_MASK, + .bgap_dtemp_mask = OMAP4430_BGAP_TEMP_SENSOR_DTEMP_MASK, + + .bgap_mode_ctrl = OMAP4430_TEMP_SENSOR_CTRL_OFFSET, + .mode_ctrl_mask = OMAP4430_SINGLE_MODE_MASK, + + .bgap_efuse = OMAP4430_FUSE_OPP_BGAP, +}; + +/* Thresholds and limits for OMAP4430 MPU temperature sensor */ +static struct temp_sensor_data omap4430_mpu_temp_sensor_data = { + .min_freq = OMAP4430_MIN_FREQ, + .max_freq = OMAP4430_MAX_FREQ, + .max_temp = OMAP4430_MAX_TEMP, + .min_temp = OMAP4430_MIN_TEMP, + .hyst_val = OMAP4430_HYST_VAL, +}; + +/* + * Temperature values in milli degree celsius + * ADC code values from 530 to 923 + */ +static const int +omap4430_adc_to_temp[OMAP4430_ADC_END_VALUE - OMAP4430_ADC_START_VALUE + 1] = { + -38000, -35000, -34000, -32000, -30000, -28000, -26000, -24000, -22000, + -20000, -18000, -17000, -15000, -13000, -12000, -10000, -8000, -6000, + -5000, -3000, -1000, 0, 2000, 3000, 5000, 6000, 8000, 10000, 12000, + 13000, 15000, 17000, 19000, 21000, 23000, 25000, 27000, 28000, 30000, + 32000, 33000, 35000, 37000, 38000, 40000, 42000, 43000, 45000, 47000, + 48000, 50000, 52000, 53000, 55000, 57000, 58000, 60000, 62000, 64000, + 66000, 68000, 70000, 71000, 73000, 75000, 77000, 78000, 80000, 82000, + 83000, 85000, 87000, 88000, 90000, 92000, 93000, 95000, 97000, 98000, + 100000, 102000, 103000, 105000, 107000, 109000, 111000, 113000, 115000, + 117000, 118000, 120000, 122000, 123000, +}; + +/* OMAP4430 data */ +const struct ti_bandgap_data omap4430_data = { + .features = TI_BANDGAP_FEATURE_MODE_CONFIG | + TI_BANDGAP_FEATURE_CLK_CTRL | + TI_BANDGAP_FEATURE_POWER_SWITCH, + .fclock_name = "bandgap_fclk", + .div_ck_name = "bandgap_fclk", + .conv_table = omap4430_adc_to_temp, + .adc_start_val = OMAP4430_ADC_START_VALUE, + .adc_end_val = OMAP4430_ADC_END_VALUE, + .expose_sensor = ti_thermal_expose_sensor, + .remove_sensor = ti_thermal_remove_sensor, + .sensors = { + { + .registers = &omap4430_mpu_temp_sensor_registers, + .ts_data = &omap4430_mpu_temp_sensor_data, + .domain = "cpu", + .slope = OMAP_GRADIENT_SLOPE_4430, + .constant = OMAP_GRADIENT_CONST_4430, + .slope_pcb = OMAP_GRADIENT_SLOPE_W_PCB_4430, + .constant_pcb = OMAP_GRADIENT_CONST_W_PCB_4430, + .register_cooling = ti_thermal_register_cpu_cooling, + .unregister_cooling = ti_thermal_unregister_cpu_cooling, + }, + }, + .sensor_count = 1, +}; +/* + * OMAP4460 has one instance of thermal sensor for MPU + * need to describe the individual bit fields + */ +static struct temp_sensor_registers +omap4460_mpu_temp_sensor_registers = { + .temp_sensor_ctrl = OMAP4460_TEMP_SENSOR_CTRL_OFFSET, + .bgap_tempsoff_mask = OMAP4460_BGAP_TEMPSOFF_MASK, + .bgap_soc_mask = OMAP4460_BGAP_TEMP_SENSOR_SOC_MASK, + .bgap_eocz_mask = OMAP4460_BGAP_TEMP_SENSOR_EOCZ_MASK, + .bgap_dtemp_mask = OMAP4460_BGAP_TEMP_SENSOR_DTEMP_MASK, + + .bgap_mask_ctrl = OMAP4460_BGAP_CTRL_OFFSET, + .mask_hot_mask = OMAP4460_MASK_HOT_MASK, + .mask_cold_mask = OMAP4460_MASK_COLD_MASK, + + .bgap_mode_ctrl = OMAP4460_BGAP_CTRL_OFFSET, + .mode_ctrl_mask = OMAP4460_SINGLE_MODE_MASK, + + .bgap_counter = OMAP4460_BGAP_COUNTER_OFFSET, + .counter_mask = OMAP4460_COUNTER_MASK, + + .bgap_threshold = OMAP4460_BGAP_THRESHOLD_OFFSET, + .threshold_thot_mask = OMAP4460_T_HOT_MASK, + .threshold_tcold_mask = OMAP4460_T_COLD_MASK, + + .tshut_threshold = OMAP4460_BGAP_TSHUT_OFFSET, + .tshut_hot_mask = OMAP4460_TSHUT_HOT_MASK, + .tshut_cold_mask = OMAP4460_TSHUT_COLD_MASK, + + .bgap_status = OMAP4460_BGAP_STATUS_OFFSET, + .status_clean_stop_mask = OMAP4460_CLEAN_STOP_MASK, + .status_bgap_alert_mask = OMAP4460_BGAP_ALERT_MASK, + .status_hot_mask = OMAP4460_HOT_FLAG_MASK, + .status_cold_mask = OMAP4460_COLD_FLAG_MASK, + + .bgap_efuse = OMAP4460_FUSE_OPP_BGAP, +}; + +/* Thresholds and limits for OMAP4460 MPU temperature sensor */ +static struct temp_sensor_data omap4460_mpu_temp_sensor_data = { + .tshut_hot = OMAP4460_TSHUT_HOT, + .tshut_cold = OMAP4460_TSHUT_COLD, + .t_hot = OMAP4460_T_HOT, + .t_cold = OMAP4460_T_COLD, + .min_freq = OMAP4460_MIN_FREQ, + .max_freq = OMAP4460_MAX_FREQ, + .max_temp = OMAP4460_MAX_TEMP, + .min_temp = OMAP4460_MIN_TEMP, + .hyst_val = OMAP4460_HYST_VAL, + .update_int1 = 1000, + .update_int2 = 2000, +}; + +/* + * Temperature values in milli degree celsius + * ADC code values from 530 to 923 + */ +static const int +omap4460_adc_to_temp[OMAP4460_ADC_END_VALUE - OMAP4460_ADC_START_VALUE + 1] = { + -40000, -40000, -40000, -40000, -39800, -39400, -39000, -38600, -38200, + -37800, -37300, -36800, -36400, -36000, -35600, -35200, -34800, + -34300, -33800, -33400, -33000, -32600, -32200, -31800, -31300, + -30800, -30400, -30000, -29600, -29200, -28700, -28200, -27800, + -27400, -27000, -26600, -26200, -25700, -25200, -24800, -24400, + -24000, -23600, -23200, -22700, -22200, -21800, -21400, -21000, + -20600, -20200, -19700, -19200, -18800, -18400, -18000, -17600, + -17200, -16700, -16200, -15800, -15400, -15000, -14600, -14200, + -13700, -13200, -12800, -12400, -12000, -11600, -11200, -10700, + -10200, -9800, -9400, -9000, -8600, -8200, -7700, -7200, -6800, + -6400, -6000, -5600, -5200, -4800, -4300, -3800, -3400, -3000, + -2600, -2200, -1800, -1300, -800, -400, 0, 400, 800, 1200, 1600, + 2100, 2600, 3000, 3400, 3800, 4200, 4600, 5100, 5600, 6000, 6400, + 6800, 7200, 7600, 8000, 8500, 9000, 9400, 9800, 10200, 10600, 11000, + 11400, 11900, 12400, 12800, 13200, 13600, 14000, 14400, 14800, + 15300, 15800, 16200, 16600, 17000, 17400, 17800, 18200, 18700, + 19200, 19600, 20000, 20400, 20800, 21200, 21600, 22100, 22600, + 23000, 23400, 23800, 24200, 24600, 25000, 25400, 25900, 26400, + 26800, 27200, 27600, 28000, 28400, 28800, 29300, 29800, 30200, + 30600, 31000, 31400, 31800, 32200, 32600, 33100, 33600, 34000, + 34400, 34800, 35200, 35600, 36000, 36400, 36800, 37300, 37800, + 38200, 38600, 39000, 39400, 39800, 40200, 40600, 41100, 41600, + 42000, 42400, 42800, 43200, 43600, 44000, 44400, 44800, 45300, + 45800, 46200, 46600, 47000, 47400, 47800, 48200, 48600, 49000, + 49500, 50000, 50400, 50800, 51200, 51600, 52000, 52400, 52800, + 53200, 53700, 54200, 54600, 55000, 55400, 55800, 56200, 56600, + 57000, 57400, 57800, 58200, 58700, 59200, 59600, 60000, 60400, + 60800, 61200, 61600, 62000, 62400, 62800, 63300, 63800, 64200, + 64600, 65000, 65400, 65800, 66200, 66600, 67000, 67400, 67800, + 68200, 68700, 69200, 69600, 70000, 70400, 70800, 71200, 71600, + 72000, 72400, 72800, 73200, 73600, 74100, 74600, 75000, 75400, + 75800, 76200, 76600, 77000, 77400, 77800, 78200, 78600, 79000, + 79400, 79800, 80300, 80800, 81200, 81600, 82000, 82400, 82800, + 83200, 83600, 84000, 84400, 84800, 85200, 85600, 86000, 86400, + 86800, 87300, 87800, 88200, 88600, 89000, 89400, 89800, 90200, + 90600, 91000, 91400, 91800, 92200, 92600, 93000, 93400, 93800, + 94200, 94600, 95000, 95500, 96000, 96400, 96800, 97200, 97600, + 98000, 98400, 98800, 99200, 99600, 100000, 100400, 100800, 101200, + 101600, 102000, 102400, 102800, 103200, 103600, 104000, 104400, + 104800, 105200, 105600, 106100, 106600, 107000, 107400, 107800, + 108200, 108600, 109000, 109400, 109800, 110200, 110600, 111000, + 111400, 111800, 112200, 112600, 113000, 113400, 113800, 114200, + 114600, 115000, 115400, 115800, 116200, 116600, 117000, 117400, + 117800, 118200, 118600, 119000, 119400, 119800, 120200, 120600, + 121000, 121400, 121800, 122200, 122600, 123000, 123400, 123800, 124200, + 124600, 124900, 125000, 125000, 125000, 125000 +}; + +/* OMAP4460 data */ +const struct ti_bandgap_data omap4460_data = { + .features = TI_BANDGAP_FEATURE_TSHUT | + TI_BANDGAP_FEATURE_TSHUT_CONFIG | + TI_BANDGAP_FEATURE_TALERT | + TI_BANDGAP_FEATURE_MODE_CONFIG | + TI_BANDGAP_FEATURE_POWER_SWITCH | + TI_BANDGAP_FEATURE_CLK_CTRL | + TI_BANDGAP_FEATURE_COUNTER, + .fclock_name = "bandgap_ts_fclk", + .div_ck_name = "div_ts_ck", + .conv_table = omap4460_adc_to_temp, + .adc_start_val = OMAP4460_ADC_START_VALUE, + .adc_end_val = OMAP4460_ADC_END_VALUE, + .expose_sensor = ti_thermal_expose_sensor, + .remove_sensor = ti_thermal_remove_sensor, + .report_temperature = ti_thermal_report_sensor_temperature, + .sensors = { + { + .registers = &omap4460_mpu_temp_sensor_registers, + .ts_data = &omap4460_mpu_temp_sensor_data, + .domain = "cpu", + .slope = OMAP_GRADIENT_SLOPE_4460, + .constant = OMAP_GRADIENT_CONST_4460, + .slope_pcb = OMAP_GRADIENT_SLOPE_W_PCB_4460, + .constant_pcb = OMAP_GRADIENT_CONST_W_PCB_4460, + .register_cooling = ti_thermal_register_cpu_cooling, + .unregister_cooling = ti_thermal_unregister_cpu_cooling, + }, + }, + .sensor_count = 1, +}; + +/* OMAP4470 data */ +const struct ti_bandgap_data omap4470_data = { + .features = TI_BANDGAP_FEATURE_TSHUT | + TI_BANDGAP_FEATURE_TSHUT_CONFIG | + TI_BANDGAP_FEATURE_TALERT | + TI_BANDGAP_FEATURE_MODE_CONFIG | + TI_BANDGAP_FEATURE_POWER_SWITCH | + TI_BANDGAP_FEATURE_CLK_CTRL | + TI_BANDGAP_FEATURE_COUNTER, + .fclock_name = "bandgap_ts_fclk", + .div_ck_name = "div_ts_ck", + .conv_table = omap4460_adc_to_temp, + .adc_start_val = OMAP4460_ADC_START_VALUE, + .adc_end_val = OMAP4460_ADC_END_VALUE, + .expose_sensor = ti_thermal_expose_sensor, + .remove_sensor = ti_thermal_remove_sensor, + .report_temperature = ti_thermal_report_sensor_temperature, + .sensors = { + { + .registers = &omap4460_mpu_temp_sensor_registers, + .ts_data = &omap4460_mpu_temp_sensor_data, + .domain = "cpu", + .slope = OMAP_GRADIENT_SLOPE_4470, + .constant = OMAP_GRADIENT_CONST_4470, + .slope_pcb = OMAP_GRADIENT_SLOPE_W_PCB_4470, + .constant_pcb = OMAP_GRADIENT_CONST_W_PCB_4470, + .register_cooling = ti_thermal_register_cpu_cooling, + .unregister_cooling = ti_thermal_unregister_cpu_cooling, + }, + }, + .sensor_count = 1, +}; diff --git a/drivers/thermal/ti-soc-thermal/omap4xxx-bandgap.h b/drivers/thermal/ti-soc-thermal/omap4xxx-bandgap.h new file mode 100644 index 0000000..6f2de3a --- /dev/null +++ b/drivers/thermal/ti-soc-thermal/omap4xxx-bandgap.h @@ -0,0 +1,175 @@ +/* + * OMAP4xxx bandgap registers, bitfields and temperature definitions + * + * Copyright (C) 2013 Texas Instruments Incorporated - http://www.ti.com/ + * Contact: + * Eduardo Valentin + * + * This program is free software; you can redistribute it and/or + * modify it under the terms of the GNU General Public License + * version 2 as published by the Free Software Foundation. + * + * This program is distributed in the hope that it will be useful, but + * WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + * General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with this program; if not, write to the Free Software + * Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA + * 02110-1301 USA + * + */ +#ifndef __OMAP4XXX_BANDGAP_H +#define __OMAP4XXX_BANDGAP_H + +/** + * *** OMAP4430 *** + * + * Below, in sequence, are the Register definitions, + * the bitfields and the temperature definitions for OMAP4430. + */ + +/** + * OMAP4430 register definitions + * + * Registers are defined as offsets. The offsets are + * relative to FUSE_OPP_BGAP on 4430. + */ + +/* OMAP4430.FUSE_OPP_BGAP */ +#define OMAP4430_FUSE_OPP_BGAP 0x0 + +/* OMAP4430.TEMP_SENSOR */ +#define OMAP4430_TEMP_SENSOR_CTRL_OFFSET 0xCC + +/** + * Register and bit definitions for OMAP4430 + * + * All the macros bellow define the required bits for + * controlling temperature on OMAP4430. Bit defines are + * grouped by register. + */ + +/* OMAP4430.TEMP_SENSOR bits */ +#define OMAP4430_BGAP_TEMPSOFF_MASK BIT(12) +#define OMAP4430_BGAP_TSHUT_MASK BIT(11) +#define OMAP4430_SINGLE_MODE_MASK BIT(10) +#define OMAP4430_BGAP_TEMP_SENSOR_SOC_MASK BIT(9) +#define OMAP4430_BGAP_TEMP_SENSOR_EOCZ_MASK BIT(8) +#define OMAP4430_BGAP_TEMP_SENSOR_DTEMP_MASK (0xff << 0) + +/** + * Temperature limits and thresholds for OMAP4430 + * + * All the macros bellow are definitions for handling the + * ADC conversions and representation of temperature limits + * and thresholds for OMAP4430. + */ + +/* ADC conversion table limits */ +#define OMAP4430_ADC_START_VALUE 0 +#define OMAP4430_ADC_END_VALUE 127 +/* bandgap clock limits (no control on 4430) */ +#define OMAP4430_MAX_FREQ 32768 +#define OMAP4430_MIN_FREQ 32768 +/* sensor limits */ +#define OMAP4430_MIN_TEMP -40000 +#define OMAP4430_MAX_TEMP 125000 +#define OMAP4430_HYST_VAL 5000 + +/** + * *** OMAP4460 *** Applicable for OMAP4470 + * + * Below, in sequence, are the Register definitions, + * the bitfields and the temperature definitions for OMAP4460. + */ + +/** + * OMAP4460 register definitions + * + * Registers are defined as offsets. The offsets are + * relative to FUSE_OPP_BGAP on 4460. + */ + +/* OMAP4460.FUSE_OPP_BGAP */ +#define OMAP4460_FUSE_OPP_BGAP 0x0 + +/* OMAP4460.TEMP_SENSOR */ +#define OMAP4460_TEMP_SENSOR_CTRL_OFFSET 0xCC + +/* OMAP4460.BANDGAP_CTRL */ +#define OMAP4460_BGAP_CTRL_OFFSET 0x118 + +/* OMAP4460.BANDGAP_COUNTER */ +#define OMAP4460_BGAP_COUNTER_OFFSET 0x11C + +/* OMAP4460.BANDGAP_THRESHOLD */ +#define OMAP4460_BGAP_THRESHOLD_OFFSET 0x120 + +/* OMAP4460.TSHUT_THRESHOLD */ +#define OMAP4460_BGAP_TSHUT_OFFSET 0x124 + +/* OMAP4460.BANDGAP_STATUS */ +#define OMAP4460_BGAP_STATUS_OFFSET 0x128 + +/** + * Register bitfields for OMAP4460 + * + * All the macros bellow define the required bits for + * controlling temperature on OMAP4460. Bit defines are + * grouped by register. + */ +/* OMAP4460.TEMP_SENSOR bits */ +#define OMAP4460_BGAP_TEMPSOFF_MASK BIT(13) +#define OMAP4460_BGAP_TEMP_SENSOR_SOC_MASK BIT(11) +#define OMAP4460_BGAP_TEMP_SENSOR_EOCZ_MASK BIT(10) +#define OMAP4460_BGAP_TEMP_SENSOR_DTEMP_MASK (0x3ff << 0) + +/* OMAP4460.BANDGAP_CTRL bits */ +#define OMAP4460_SINGLE_MODE_MASK BIT(31) +#define OMAP4460_MASK_HOT_MASK BIT(1) +#define OMAP4460_MASK_COLD_MASK BIT(0) + +/* OMAP4460.BANDGAP_COUNTER bits */ +#define OMAP4460_COUNTER_MASK (0xffffff << 0) + +/* OMAP4460.BANDGAP_THRESHOLD bits */ +#define OMAP4460_T_HOT_MASK (0x3ff << 16) +#define OMAP4460_T_COLD_MASK (0x3ff << 0) + +/* OMAP4460.TSHUT_THRESHOLD bits */ +#define OMAP4460_TSHUT_HOT_MASK (0x3ff << 16) +#define OMAP4460_TSHUT_COLD_MASK (0x3ff << 0) + +/* OMAP4460.BANDGAP_STATUS bits */ +#define OMAP4460_CLEAN_STOP_MASK BIT(3) +#define OMAP4460_BGAP_ALERT_MASK BIT(2) +#define OMAP4460_HOT_FLAG_MASK BIT(1) +#define OMAP4460_COLD_FLAG_MASK BIT(0) + +/** + * Temperature limits and thresholds for OMAP4460 + * + * All the macros bellow are definitions for handling the + * ADC conversions and representation of temperature limits + * and thresholds for OMAP4460. + */ + +/* ADC conversion table limits */ +#define OMAP4460_ADC_START_VALUE 530 +#define OMAP4460_ADC_END_VALUE 932 +/* bandgap clock limits */ +#define OMAP4460_MAX_FREQ 1500000 +#define OMAP4460_MIN_FREQ 1000000 +/* sensor limits */ +#define OMAP4460_MIN_TEMP -40000 +#define OMAP4460_MAX_TEMP 123000 +#define OMAP4460_HYST_VAL 5000 +/* interrupts thresholds */ +#define OMAP4460_TSHUT_HOT 900 /* 122 deg C */ +#define OMAP4460_TSHUT_COLD 895 /* 100 deg C */ +#define OMAP4460_T_HOT 800 /* 73 deg C */ +#define OMAP4460_T_COLD 795 /* 71 deg C */ + +#endif /* __OMAP4XXX_BANDGAP_H */ diff --git a/drivers/thermal/ti-soc-thermal/omap5-thermal-data.c b/drivers/thermal/ti-soc-thermal/omap5-thermal-data.c new file mode 100644 index 0000000..eff0c80 --- /dev/null +++ b/drivers/thermal/ti-soc-thermal/omap5-thermal-data.c @@ -0,0 +1,359 @@ +/* + * OMAP5 thermal driver. + * + * Copyright (C) 2011-2012 Texas Instruments Inc. + * Contact: + * Eduardo Valentin + * + * This software is licensed under the terms of the GNU General Public + * License version 2, as published by the Free Software Foundation, and + * may be copied, distributed, and modified under those terms. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + */ + +#include "ti-thermal.h" +#include "ti-bandgap.h" +#include "omap5xxx-bandgap.h" + +/* + * OMAP5430 has three instances of thermal sensor for MPU, GPU & CORE, + * need to describe the individual registers and bit fields. + */ + +/* + * OMAP5430 MPU thermal sensor register offset and bit-fields + */ +static struct temp_sensor_registers +omap5430_mpu_temp_sensor_registers = { + .temp_sensor_ctrl = OMAP5430_TEMP_SENSOR_MPU_OFFSET, + .bgap_tempsoff_mask = OMAP5430_BGAP_TEMPSOFF_MASK, + .bgap_eocz_mask = OMAP5430_BGAP_TEMP_SENSOR_EOCZ_MASK, + .bgap_dtemp_mask = OMAP5430_BGAP_TEMP_SENSOR_DTEMP_MASK, + + .bgap_mask_ctrl = OMAP5430_BGAP_CTRL_OFFSET, + .mask_hot_mask = OMAP5430_MASK_HOT_MPU_MASK, + .mask_cold_mask = OMAP5430_MASK_COLD_MPU_MASK, + .mask_sidlemode_mask = OMAP5430_MASK_SIDLEMODE_MASK, + .mask_counter_delay_mask = OMAP5430_MASK_COUNTER_DELAY_MASK, + .mask_freeze_mask = OMAP5430_MASK_FREEZE_MPU_MASK, + .mask_clear_mask = OMAP5430_MASK_CLEAR_MPU_MASK, + .mask_clear_accum_mask = OMAP5430_MASK_CLEAR_ACCUM_MPU_MASK, + + + .bgap_counter = OMAP5430_BGAP_CTRL_OFFSET, + .counter_mask = OMAP5430_COUNTER_MASK, + + .bgap_threshold = OMAP5430_BGAP_THRESHOLD_MPU_OFFSET, + .threshold_thot_mask = OMAP5430_T_HOT_MASK, + .threshold_tcold_mask = OMAP5430_T_COLD_MASK, + + .tshut_threshold = OMAP5430_BGAP_TSHUT_MPU_OFFSET, + .tshut_hot_mask = OMAP5430_TSHUT_HOT_MASK, + .tshut_cold_mask = OMAP5430_TSHUT_COLD_MASK, + + .bgap_status = OMAP5430_BGAP_STATUS_OFFSET, + .status_clean_stop_mask = 0x0, + .status_bgap_alert_mask = OMAP5430_BGAP_ALERT_MASK, + .status_hot_mask = OMAP5430_HOT_MPU_FLAG_MASK, + .status_cold_mask = OMAP5430_COLD_MPU_FLAG_MASK, + + .bgap_cumul_dtemp = OMAP5430_BGAP_CUMUL_DTEMP_MPU_OFFSET, + .ctrl_dtemp_0 = OMAP5430_BGAP_DTEMP_MPU_0_OFFSET, + .ctrl_dtemp_1 = OMAP5430_BGAP_DTEMP_MPU_1_OFFSET, + .ctrl_dtemp_2 = OMAP5430_BGAP_DTEMP_MPU_2_OFFSET, + .ctrl_dtemp_3 = OMAP5430_BGAP_DTEMP_MPU_3_OFFSET, + .ctrl_dtemp_4 = OMAP5430_BGAP_DTEMP_MPU_4_OFFSET, + .bgap_efuse = OMAP5430_FUSE_OPP_BGAP_MPU, +}; + +/* + * OMAP5430 GPU thermal sensor register offset and bit-fields + */ +static struct temp_sensor_registers +omap5430_gpu_temp_sensor_registers = { + .temp_sensor_ctrl = OMAP5430_TEMP_SENSOR_GPU_OFFSET, + .bgap_tempsoff_mask = OMAP5430_BGAP_TEMPSOFF_MASK, + .bgap_eocz_mask = OMAP5430_BGAP_TEMP_SENSOR_EOCZ_MASK, + .bgap_dtemp_mask = OMAP5430_BGAP_TEMP_SENSOR_DTEMP_MASK, + + .bgap_mask_ctrl = OMAP5430_BGAP_CTRL_OFFSET, + .mask_hot_mask = OMAP5430_MASK_HOT_GPU_MASK, + .mask_cold_mask = OMAP5430_MASK_COLD_GPU_MASK, + .mask_sidlemode_mask = OMAP5430_MASK_SIDLEMODE_MASK, + .mask_counter_delay_mask = OMAP5430_MASK_COUNTER_DELAY_MASK, + .mask_freeze_mask = OMAP5430_MASK_FREEZE_GPU_MASK, + .mask_clear_mask = OMAP5430_MASK_CLEAR_GPU_MASK, + .mask_clear_accum_mask = OMAP5430_MASK_CLEAR_ACCUM_GPU_MASK, + + .bgap_counter = OMAP5430_BGAP_CTRL_OFFSET, + .counter_mask = OMAP5430_COUNTER_MASK, + + .bgap_threshold = OMAP5430_BGAP_THRESHOLD_GPU_OFFSET, + .threshold_thot_mask = OMAP5430_T_HOT_MASK, + .threshold_tcold_mask = OMAP5430_T_COLD_MASK, + + .tshut_threshold = OMAP5430_BGAP_TSHUT_GPU_OFFSET, + .tshut_hot_mask = OMAP5430_TSHUT_HOT_MASK, + .tshut_cold_mask = OMAP5430_TSHUT_COLD_MASK, + + .bgap_status = OMAP5430_BGAP_STATUS_OFFSET, + .status_clean_stop_mask = 0x0, + .status_bgap_alert_mask = OMAP5430_BGAP_ALERT_MASK, + .status_hot_mask = OMAP5430_HOT_GPU_FLAG_MASK, + .status_cold_mask = OMAP5430_COLD_GPU_FLAG_MASK, + + .bgap_cumul_dtemp = OMAP5430_BGAP_CUMUL_DTEMP_GPU_OFFSET, + .ctrl_dtemp_0 = OMAP5430_BGAP_DTEMP_GPU_0_OFFSET, + .ctrl_dtemp_1 = OMAP5430_BGAP_DTEMP_GPU_1_OFFSET, + .ctrl_dtemp_2 = OMAP5430_BGAP_DTEMP_GPU_2_OFFSET, + .ctrl_dtemp_3 = OMAP5430_BGAP_DTEMP_GPU_3_OFFSET, + .ctrl_dtemp_4 = OMAP5430_BGAP_DTEMP_GPU_4_OFFSET, + + .bgap_efuse = OMAP5430_FUSE_OPP_BGAP_GPU, +}; + +/* + * OMAP5430 CORE thermal sensor register offset and bit-fields + */ +static struct temp_sensor_registers +omap5430_core_temp_sensor_registers = { + .temp_sensor_ctrl = OMAP5430_TEMP_SENSOR_CORE_OFFSET, + .bgap_tempsoff_mask = OMAP5430_BGAP_TEMPSOFF_MASK, + .bgap_eocz_mask = OMAP5430_BGAP_TEMP_SENSOR_EOCZ_MASK, + .bgap_dtemp_mask = OMAP5430_BGAP_TEMP_SENSOR_DTEMP_MASK, + + .bgap_mask_ctrl = OMAP5430_BGAP_CTRL_OFFSET, + .mask_hot_mask = OMAP5430_MASK_HOT_CORE_MASK, + .mask_cold_mask = OMAP5430_MASK_COLD_CORE_MASK, + .mask_sidlemode_mask = OMAP5430_MASK_SIDLEMODE_MASK, + .mask_counter_delay_mask = OMAP5430_MASK_COUNTER_DELAY_MASK, + .mask_freeze_mask = OMAP5430_MASK_FREEZE_CORE_MASK, + .mask_clear_mask = OMAP5430_MASK_CLEAR_CORE_MASK, + .mask_clear_accum_mask = OMAP5430_MASK_CLEAR_ACCUM_CORE_MASK, + + .bgap_counter = OMAP5430_BGAP_CTRL_OFFSET, + .counter_mask = OMAP5430_COUNTER_MASK, + + .bgap_threshold = OMAP5430_BGAP_THRESHOLD_CORE_OFFSET, + .threshold_thot_mask = OMAP5430_T_HOT_MASK, + .threshold_tcold_mask = OMAP5430_T_COLD_MASK, + + .tshut_threshold = OMAP5430_BGAP_TSHUT_CORE_OFFSET, + .tshut_hot_mask = OMAP5430_TSHUT_HOT_MASK, + .tshut_cold_mask = OMAP5430_TSHUT_COLD_MASK, + + .bgap_status = OMAP5430_BGAP_STATUS_OFFSET, + .status_clean_stop_mask = 0x0, + .status_bgap_alert_mask = OMAP5430_BGAP_ALERT_MASK, + .status_hot_mask = OMAP5430_HOT_CORE_FLAG_MASK, + .status_cold_mask = OMAP5430_COLD_CORE_FLAG_MASK, + + .bgap_cumul_dtemp = OMAP5430_BGAP_CUMUL_DTEMP_CORE_OFFSET, + .ctrl_dtemp_0 = OMAP5430_BGAP_DTEMP_CORE_0_OFFSET, + .ctrl_dtemp_1 = OMAP5430_BGAP_DTEMP_CORE_1_OFFSET, + .ctrl_dtemp_2 = OMAP5430_BGAP_DTEMP_CORE_2_OFFSET, + .ctrl_dtemp_3 = OMAP5430_BGAP_DTEMP_CORE_3_OFFSET, + .ctrl_dtemp_4 = OMAP5430_BGAP_DTEMP_CORE_4_OFFSET, + + .bgap_efuse = OMAP5430_FUSE_OPP_BGAP_CORE, +}; + +/* Thresholds and limits for OMAP5430 MPU temperature sensor */ +static struct temp_sensor_data omap5430_mpu_temp_sensor_data = { + .tshut_hot = OMAP5430_MPU_TSHUT_HOT, + .tshut_cold = OMAP5430_MPU_TSHUT_COLD, + .t_hot = OMAP5430_MPU_T_HOT, + .t_cold = OMAP5430_MPU_T_COLD, + .min_freq = OMAP5430_MPU_MIN_FREQ, + .max_freq = OMAP5430_MPU_MAX_FREQ, + .max_temp = OMAP5430_MPU_MAX_TEMP, + .min_temp = OMAP5430_MPU_MIN_TEMP, + .hyst_val = OMAP5430_MPU_HYST_VAL, + .update_int1 = 1000, + .update_int2 = 2000, +}; + +/* Thresholds and limits for OMAP5430 GPU temperature sensor */ +static struct temp_sensor_data omap5430_gpu_temp_sensor_data = { + .tshut_hot = OMAP5430_GPU_TSHUT_HOT, + .tshut_cold = OMAP5430_GPU_TSHUT_COLD, + .t_hot = OMAP5430_GPU_T_HOT, + .t_cold = OMAP5430_GPU_T_COLD, + .min_freq = OMAP5430_GPU_MIN_FREQ, + .max_freq = OMAP5430_GPU_MAX_FREQ, + .max_temp = OMAP5430_GPU_MAX_TEMP, + .min_temp = OMAP5430_GPU_MIN_TEMP, + .hyst_val = OMAP5430_GPU_HYST_VAL, + .update_int1 = 1000, + .update_int2 = 2000, +}; + +/* Thresholds and limits for OMAP5430 CORE temperature sensor */ +static struct temp_sensor_data omap5430_core_temp_sensor_data = { + .tshut_hot = OMAP5430_CORE_TSHUT_HOT, + .tshut_cold = OMAP5430_CORE_TSHUT_COLD, + .t_hot = OMAP5430_CORE_T_HOT, + .t_cold = OMAP5430_CORE_T_COLD, + .min_freq = OMAP5430_CORE_MIN_FREQ, + .max_freq = OMAP5430_CORE_MAX_FREQ, + .max_temp = OMAP5430_CORE_MAX_TEMP, + .min_temp = OMAP5430_CORE_MIN_TEMP, + .hyst_val = OMAP5430_CORE_HYST_VAL, + .update_int1 = 1000, + .update_int2 = 2000, +}; + +/* + * OMAP54xx ES2.0 : Temperature values in milli degree celsius + * ADC code values from 540 to 945 + */ +static int +omap5430_adc_to_temp[ + OMAP5430_ADC_END_VALUE - OMAP5430_ADC_START_VALUE + 1] = { + /* Index 540 - 549 */ + -40000, -40000, -40000, -40000, -39800, -39400, -39000, -38600, -38200, + -37800, + /* Index 550 - 559 */ + -37400, -37000, -36600, -36200, -35800, -35300, -34700, -34200, -33800, + -33400, + /* Index 560 - 569 */ + -33000, -32600, -32200, -31800, -31400, -31000, -30600, -30200, -29800, + -29400, + /* Index 570 - 579 */ + -29000, -28600, -28200, -27700, -27100, -26600, -26200, -25800, -25400, + -25000, + /* Index 580 - 589 */ + -24600, -24200, -23800, -23400, -23000, -22600, -22200, -21600, -21400, + -21000, + /* Index 590 - 599 */ + -20500, -19900, -19400, -19000, -18600, -18200, -17800, -17400, -17000, + -16600, + /* Index 600 - 609 */ + -16200, -15800, -15400, -15000, -14600, -14200, -13800, -13400, -13000, + -12500, + /* Index 610 - 619 */ + -11900, -11400, -11000, -10600, -10200, -9800, -9400, -9000, -8600, + -8200, + /* Index 620 - 629 */ + -7800, -7400, -7000, -6600, -6200, -5800, -5400, -5000, -4500, -3900, + /* Index 630 - 639 */ + -3400, -3000, -2600, -2200, -1800, -1400, -1000, -600, -200, 200, + /* Index 640 - 649 */ + 600, 1000, 1400, 1800, 2200, 2600, 3000, 3400, 3900, 4500, + /* Index 650 - 659 */ + 5000, 5400, 5800, 6200, 6600, 7000, 7400, 7800, 8200, 8600, + /* Index 660 - 669 */ + 9000, 9400, 9800, 10200, 10600, 11000, 11400, 11800, 12200, 12700, + /* Index 670 - 679 */ + 13300, 13800, 14200, 14600, 15000, 15400, 15800, 16200, 16600, 17000, + /* Index 680 - 689 */ + 17400, 17800, 18200, 18600, 19000, 19400, 19800, 20200, 20600, 21100, + /* Index 690 - 699 */ + 21400, 21900, 22500, 23000, 23400, 23800, 24200, 24600, 25000, 25400, + /* Index 700 - 709 */ + 25800, 26200, 26600, 27000, 27400, 27800, 28200, 28600, 29000, 29400, + /* Index 710 - 719 */ + 29800, 30200, 30600, 31000, 31400, 31900, 32500, 33000, 33400, 33800, + /* Index 720 - 729 */ + 34200, 34600, 35000, 35400, 35800, 36200, 36600, 37000, 37400, 37800, + /* Index 730 - 739 */ + 38200, 38600, 39000, 39400, 39800, 40200, 40600, 41000, 41400, 41800, + /* Index 740 - 749 */ + 42200, 42600, 43100, 43700, 44200, 44600, 45000, 45400, 45800, 46200, + /* Index 750 - 759 */ + 46600, 47000, 47400, 47800, 48200, 48600, 49000, 49400, 49800, 50200, + /* Index 760 - 769 */ + 50600, 51000, 51400, 51800, 52200, 52600, 53000, 53400, 53800, 54200, + /* Index 770 - 779 */ + 54600, 55000, 55400, 55900, 56500, 57000, 57400, 57800, 58200, 58600, + /* Index 780 - 789 */ + 59000, 59400, 59800, 60200, 60600, 61000, 61400, 61800, 62200, 62600, + /* Index 790 - 799 */ + 63000, 63400, 63800, 64200, 64600, 65000, 65400, 65800, 66200, 66600, + /* Index 800 - 809 */ + 67000, 67400, 67800, 68200, 68600, 69000, 69400, 69800, 70200, 70600, + /* Index 810 - 819 */ + 71000, 71500, 72100, 72600, 73000, 73400, 73800, 74200, 74600, 75000, + /* Index 820 - 829 */ + 75400, 75800, 76200, 76600, 77000, 77400, 77800, 78200, 78600, 79000, + /* Index 830 - 839 */ + 79400, 79800, 80200, 80600, 81000, 81400, 81800, 82200, 82600, 83000, + /* Index 840 - 849 */ + 83400, 83800, 84200, 84600, 85000, 85400, 85800, 86200, 86600, 87000, + /* Index 850 - 859 */ + 87400, 87800, 88200, 88600, 89000, 89400, 89800, 90200, 90600, 91000, + /* Index 860 - 869 */ + 91400, 91800, 92200, 92600, 93000, 93400, 93800, 94200, 94600, 95000, + /* Index 870 - 879 */ + 95400, 95800, 96200, 96600, 97000, 97500, 98100, 98600, 99000, 99400, + /* Index 880 - 889 */ + 99800, 100200, 100600, 101000, 101400, 101800, 102200, 102600, 103000, + 103400, + /* Index 890 - 899 */ + 103800, 104200, 104600, 105000, 105400, 105800, 106200, 106600, 107000, + 107400, + /* Index 900 - 909 */ + 107800, 108200, 108600, 109000, 109400, 109800, 110200, 110600, 111000, + 111400, + /* Index 910 - 919 */ + 111800, 112200, 112600, 113000, 113400, 113800, 114200, 114600, 115000, + 115400, + /* Index 920 - 929 */ + 115800, 116200, 116600, 117000, 117400, 117800, 118200, 118600, 119000, + 119400, + /* Index 930 - 939 */ + 119800, 120200, 120600, 121000, 121400, 121800, 122400, 122600, 123000, + 123400, + /* Index 940 - 945 */ + 123800, 1242000, 124600, 124900, 125000, 125000, +}; + +/* OMAP54xx ES2.0 data */ +const struct ti_bandgap_data omap5430_data = { + .features = TI_BANDGAP_FEATURE_TSHUT_CONFIG | + TI_BANDGAP_FEATURE_FREEZE_BIT | + TI_BANDGAP_FEATURE_TALERT | + TI_BANDGAP_FEATURE_COUNTER_DELAY | + TI_BANDGAP_FEATURE_HISTORY_BUFFER, + .fclock_name = "l3instr_ts_gclk_div", + .div_ck_name = "l3instr_ts_gclk_div", + .conv_table = omap5430_adc_to_temp, + .adc_start_val = OMAP5430_ADC_START_VALUE, + .adc_end_val = OMAP5430_ADC_END_VALUE, + .expose_sensor = ti_thermal_expose_sensor, + .remove_sensor = ti_thermal_remove_sensor, + .report_temperature = ti_thermal_report_sensor_temperature, + .sensors = { + { + .registers = &omap5430_mpu_temp_sensor_registers, + .ts_data = &omap5430_mpu_temp_sensor_data, + .domain = "cpu", + .register_cooling = ti_thermal_register_cpu_cooling, + .unregister_cooling = ti_thermal_unregister_cpu_cooling, + .slope = OMAP_GRADIENT_SLOPE_5430_CPU, + .constant = OMAP_GRADIENT_CONST_5430_CPU, + .slope_pcb = OMAP_GRADIENT_SLOPE_W_PCB_5430_CPU, + .constant_pcb = OMAP_GRADIENT_CONST_W_PCB_5430_CPU, + }, + { + .registers = &omap5430_gpu_temp_sensor_registers, + .ts_data = &omap5430_gpu_temp_sensor_data, + .domain = "gpu", + .slope = OMAP_GRADIENT_SLOPE_5430_GPU, + .constant = OMAP_GRADIENT_CONST_5430_GPU, + .slope_pcb = OMAP_GRADIENT_SLOPE_W_PCB_5430_GPU, + .constant_pcb = OMAP_GRADIENT_CONST_W_PCB_5430_GPU, + }, + { + .registers = &omap5430_core_temp_sensor_registers, + .ts_data = &omap5430_core_temp_sensor_data, + .domain = "core", + }, + }, + .sensor_count = 3, +}; diff --git a/drivers/thermal/ti-soc-thermal/omap5xxx-bandgap.h b/drivers/thermal/ti-soc-thermal/omap5xxx-bandgap.h new file mode 100644 index 0000000..400b55d --- /dev/null +++ b/drivers/thermal/ti-soc-thermal/omap5xxx-bandgap.h @@ -0,0 +1,200 @@ +/* + * OMAP5xxx bandgap registers, bitfields and temperature definitions + * + * Copyright (C) 2013 Texas Instruments Incorporated - http://www.ti.com/ + * Contact: + * Eduardo Valentin + * + * This program is free software; you can redistribute it and/or + * modify it under the terms of the GNU General Public License + * version 2 as published by the Free Software Foundation. + * + * This program is distributed in the hope that it will be useful, but + * WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + * General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with this program; if not, write to the Free Software + * Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA + * 02110-1301 USA + * + */ +#ifndef __OMAP5XXX_BANDGAP_H +#define __OMAP5XXX_BANDGAP_H + +/** + * *** OMAP5430 *** + * + * Below, in sequence, are the Register definitions, + * the bitfields and the temperature definitions for OMAP5430. + */ + +/** + * OMAP5430 register definitions + * + * Registers are defined as offsets. The offsets are + * relative to FUSE_OPP_BGAP_GPU on 5430. + * + * Register below are grouped by domain (not necessarily in offset order) + */ + +/* OMAP5430.GPU register offsets */ +#define OMAP5430_FUSE_OPP_BGAP_GPU 0x0 +#define OMAP5430_TEMP_SENSOR_GPU_OFFSET 0x150 +#define OMAP5430_BGAP_THRESHOLD_GPU_OFFSET 0x1A8 +#define OMAP5430_BGAP_TSHUT_GPU_OFFSET 0x1B4 +#define OMAP5430_BGAP_CUMUL_DTEMP_GPU_OFFSET 0x1C0 +#define OMAP5430_BGAP_DTEMP_GPU_0_OFFSET 0x1F4 +#define OMAP5430_BGAP_DTEMP_GPU_1_OFFSET 0x1F8 +#define OMAP5430_BGAP_DTEMP_GPU_2_OFFSET 0x1FC +#define OMAP5430_BGAP_DTEMP_GPU_3_OFFSET 0x200 +#define OMAP5430_BGAP_DTEMP_GPU_4_OFFSET 0x204 + +/* OMAP5430.MPU register offsets */ +#define OMAP5430_FUSE_OPP_BGAP_MPU 0x4 +#define OMAP5430_TEMP_SENSOR_MPU_OFFSET 0x14C +#define OMAP5430_BGAP_THRESHOLD_MPU_OFFSET 0x1A4 +#define OMAP5430_BGAP_TSHUT_MPU_OFFSET 0x1B0 +#define OMAP5430_BGAP_CUMUL_DTEMP_MPU_OFFSET 0x1BC +#define OMAP5430_BGAP_DTEMP_MPU_0_OFFSET 0x1E0 +#define OMAP5430_BGAP_DTEMP_MPU_1_OFFSET 0x1E4 +#define OMAP5430_BGAP_DTEMP_MPU_2_OFFSET 0x1E8 +#define OMAP5430_BGAP_DTEMP_MPU_3_OFFSET 0x1EC +#define OMAP5430_BGAP_DTEMP_MPU_4_OFFSET 0x1F0 + +/* OMAP5430.MPU register offsets */ +#define OMAP5430_FUSE_OPP_BGAP_CORE 0x8 +#define OMAP5430_TEMP_SENSOR_CORE_OFFSET 0x154 +#define OMAP5430_BGAP_THRESHOLD_CORE_OFFSET 0x1AC +#define OMAP5430_BGAP_TSHUT_CORE_OFFSET 0x1B8 +#define OMAP5430_BGAP_CUMUL_DTEMP_CORE_OFFSET 0x1C4 +#define OMAP5430_BGAP_DTEMP_CORE_0_OFFSET 0x208 +#define OMAP5430_BGAP_DTEMP_CORE_1_OFFSET 0x20C +#define OMAP5430_BGAP_DTEMP_CORE_2_OFFSET 0x210 +#define OMAP5430_BGAP_DTEMP_CORE_3_OFFSET 0x214 +#define OMAP5430_BGAP_DTEMP_CORE_4_OFFSET 0x218 + +/* OMAP5430.common register offsets */ +#define OMAP5430_BGAP_CTRL_OFFSET 0x1A0 +#define OMAP5430_BGAP_STATUS_OFFSET 0x1C8 + +/** + * Register bitfields for OMAP5430 + * + * All the macros bellow define the required bits for + * controlling temperature on OMAP5430. Bit defines are + * grouped by register. + */ + +/* OMAP5430.TEMP_SENSOR */ +#define OMAP5430_BGAP_TEMP_SENSOR_SOC_MASK BIT(12) +#define OMAP5430_BGAP_TEMPSOFF_MASK BIT(11) +#define OMAP5430_BGAP_TEMP_SENSOR_EOCZ_MASK BIT(10) +#define OMAP5430_BGAP_TEMP_SENSOR_DTEMP_MASK (0x3ff << 0) + +/* OMAP5430.BANDGAP_CTRL */ +#define OMAP5430_MASK_SIDLEMODE_MASK (0x3 << 30) +#define OMAP5430_MASK_COUNTER_DELAY_MASK (0x7 << 27) +#define OMAP5430_MASK_FREEZE_CORE_MASK BIT(23) +#define OMAP5430_MASK_FREEZE_GPU_MASK BIT(22) +#define OMAP5430_MASK_FREEZE_MPU_MASK BIT(21) +#define OMAP5430_MASK_CLEAR_CORE_MASK BIT(20) +#define OMAP5430_MASK_CLEAR_GPU_MASK BIT(19) +#define OMAP5430_MASK_CLEAR_MPU_MASK BIT(18) +#define OMAP5430_MASK_CLEAR_ACCUM_CORE_MASK BIT(17) +#define OMAP5430_MASK_CLEAR_ACCUM_GPU_MASK BIT(16) +#define OMAP5430_MASK_CLEAR_ACCUM_MPU_MASK BIT(15) +#define OMAP5430_MASK_HOT_CORE_MASK BIT(5) +#define OMAP5430_MASK_COLD_CORE_MASK BIT(4) +#define OMAP5430_MASK_HOT_GPU_MASK BIT(3) +#define OMAP5430_MASK_COLD_GPU_MASK BIT(2) +#define OMAP5430_MASK_HOT_MPU_MASK BIT(1) +#define OMAP5430_MASK_COLD_MPU_MASK BIT(0) + +/* OMAP5430.BANDGAP_COUNTER */ +#define OMAP5430_COUNTER_MASK (0xffffff << 0) + +/* OMAP5430.BANDGAP_THRESHOLD */ +#define OMAP5430_T_HOT_MASK (0x3ff << 16) +#define OMAP5430_T_COLD_MASK (0x3ff << 0) + +/* OMAP5430.TSHUT_THRESHOLD */ +#define OMAP5430_TSHUT_HOT_MASK (0x3ff << 16) +#define OMAP5430_TSHUT_COLD_MASK (0x3ff << 0) + +/* OMAP5430.BANDGAP_CUMUL_DTEMP_MPU */ +#define OMAP5430_CUMUL_DTEMP_MPU_MASK (0xffffffff << 0) + +/* OMAP5430.BANDGAP_CUMUL_DTEMP_GPU */ +#define OMAP5430_CUMUL_DTEMP_GPU_MASK (0xffffffff << 0) + +/* OMAP5430.BANDGAP_CUMUL_DTEMP_CORE */ +#define OMAP5430_CUMUL_DTEMP_CORE_MASK (0xffffffff << 0) + +/* OMAP5430.BANDGAP_STATUS */ +#define OMAP5430_BGAP_ALERT_MASK BIT(31) +#define OMAP5430_HOT_CORE_FLAG_MASK BIT(5) +#define OMAP5430_COLD_CORE_FLAG_MASK BIT(4) +#define OMAP5430_HOT_GPU_FLAG_MASK BIT(3) +#define OMAP5430_COLD_GPU_FLAG_MASK BIT(2) +#define OMAP5430_HOT_MPU_FLAG_MASK BIT(1) +#define OMAP5430_COLD_MPU_FLAG_MASK BIT(0) + +/** + * Temperature limits and thresholds for OMAP5430 + * + * All the macros bellow are definitions for handling the + * ADC conversions and representation of temperature limits + * and thresholds for OMAP5430. Definitions are grouped + * by temperature domain. + */ + +/* OMAP5430.common temperature definitions */ +/* ADC conversion table limits */ +#define OMAP5430_ADC_START_VALUE 540 +#define OMAP5430_ADC_END_VALUE 945 + +/* OMAP5430.GPU temperature definitions */ +/* bandgap clock limits */ +#define OMAP5430_GPU_MAX_FREQ 1500000 +#define OMAP5430_GPU_MIN_FREQ 1000000 +/* sensor limits */ +#define OMAP5430_GPU_MIN_TEMP -40000 +#define OMAP5430_GPU_MAX_TEMP 125000 +#define OMAP5430_GPU_HYST_VAL 5000 +/* interrupts thresholds */ +#define OMAP5430_GPU_TSHUT_HOT 915 +#define OMAP5430_GPU_TSHUT_COLD 900 +#define OMAP5430_GPU_T_HOT 800 +#define OMAP5430_GPU_T_COLD 795 + +/* OMAP5430.MPU temperature definitions */ +/* bandgap clock limits */ +#define OMAP5430_MPU_MAX_FREQ 1500000 +#define OMAP5430_MPU_MIN_FREQ 1000000 +/* sensor limits */ +#define OMAP5430_MPU_MIN_TEMP -40000 +#define OMAP5430_MPU_MAX_TEMP 125000 +#define OMAP5430_MPU_HYST_VAL 5000 +/* interrupts thresholds */ +#define OMAP5430_MPU_TSHUT_HOT 915 +#define OMAP5430_MPU_TSHUT_COLD 900 +#define OMAP5430_MPU_T_HOT 800 +#define OMAP5430_MPU_T_COLD 795 + +/* OMAP5430.CORE temperature definitions */ +/* bandgap clock limits */ +#define OMAP5430_CORE_MAX_FREQ 1500000 +#define OMAP5430_CORE_MIN_FREQ 1000000 +/* sensor limits */ +#define OMAP5430_CORE_MIN_TEMP -40000 +#define OMAP5430_CORE_MAX_TEMP 125000 +#define OMAP5430_CORE_HYST_VAL 5000 +/* interrupts thresholds */ +#define OMAP5430_CORE_TSHUT_HOT 915 +#define OMAP5430_CORE_TSHUT_COLD 900 +#define OMAP5430_CORE_T_HOT 800 +#define OMAP5430_CORE_T_COLD 795 + +#endif /* __OMAP5XXX_BANDGAP_H */ diff --git a/drivers/thermal/ti-soc-thermal/ti-bandgap.c b/drivers/thermal/ti-soc-thermal/ti-bandgap.c new file mode 100644 index 0000000..9dfd471 --- /dev/null +++ b/drivers/thermal/ti-soc-thermal/ti-bandgap.c @@ -0,0 +1,1556 @@ +/* + * TI Bandgap temperature sensor driver + * + * Copyright (C) 2011-2012 Texas Instruments Incorporated - http://www.ti.com/ + * Author: J Keerthy + * Author: Moiz Sonasath + * Couple of fixes, DT and MFD adaptation: + * Eduardo Valentin + * + * This program is free software; you can redistribute it and/or + * modify it under the terms of the GNU General Public License + * version 2 as published by the Free Software Foundation. + * + * This program is distributed in the hope that it will be useful, but + * WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + * General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with this program; if not, write to the Free Software + * Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA + * 02110-1301 USA + * + */ + +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include + +#include "ti-bandgap.h" + +/*** Helper functions to access registers and their bitfields ***/ + +/** + * ti_bandgap_readl() - simple read helper function + * @bgp: pointer to ti_bandgap structure + * @reg: desired register (offset) to be read + * + * Helper function to read bandgap registers. It uses the io remapped area. + * Return: the register value. + */ +static u32 ti_bandgap_readl(struct ti_bandgap *bgp, u32 reg) +{ + return readl(bgp->base + reg); +} + +/** + * ti_bandgap_writel() - simple write helper function + * @bgp: pointer to ti_bandgap structure + * @val: desired register value to be written + * @reg: desired register (offset) to be written + * + * Helper function to write bandgap registers. It uses the io remapped area. + */ +static void ti_bandgap_writel(struct ti_bandgap *bgp, u32 val, u32 reg) +{ + writel(val, bgp->base + reg); +} + +/** + * DOC: macro to update bits. + * + * RMW_BITS() - used to read, modify and update bandgap bitfields. + * The value passed will be shifted. + */ +#define RMW_BITS(bgp, id, reg, mask, val) \ +do { \ + struct temp_sensor_registers *t; \ + u32 r; \ + \ + t = bgp->conf->sensors[(id)].registers; \ + r = ti_bandgap_readl(bgp, t->reg); \ + r &= ~t->mask; \ + r |= (val) << __ffs(t->mask); \ + ti_bandgap_writel(bgp, r, t->reg); \ +} while (0) + +/*** Basic helper functions ***/ + +/** + * ti_bandgap_power() - controls the power state of a bandgap device + * @bgp: pointer to ti_bandgap structure + * @on: desired power state (1 - on, 0 - off) + * + * Used to power on/off a bandgap device instance. Only used on those + * that features tempsoff bit. + * + * Return: 0 on success, -ENOTSUPP if tempsoff is not supported. + */ +static int ti_bandgap_power(struct ti_bandgap *bgp, bool on) +{ + int i, ret = 0; + + if (!TI_BANDGAP_HAS(bgp, POWER_SWITCH)) { + ret = -ENOTSUPP; + goto exit; + } + + for (i = 0; i < bgp->conf->sensor_count; i++) + /* active on 0 */ + RMW_BITS(bgp, i, temp_sensor_ctrl, bgap_tempsoff_mask, !on); + +exit: + return ret; +} + +/** + * ti_bandgap_read_temp() - helper function to read sensor temperature + * @bgp: pointer to ti_bandgap structure + * @id: bandgap sensor id + * + * Function to concentrate the steps to read sensor temperature register. + * This function is desired because, depending on bandgap device version, + * it might be needed to freeze the bandgap state machine, before fetching + * the register value. + * + * Return: temperature in ADC values. + */ +static u32 ti_bandgap_read_temp(struct ti_bandgap *bgp, int id) +{ + struct temp_sensor_registers *tsr; + u32 temp, reg; + + tsr = bgp->conf->sensors[id].registers; + reg = tsr->temp_sensor_ctrl; + + if (TI_BANDGAP_HAS(bgp, FREEZE_BIT)) { + RMW_BITS(bgp, id, bgap_mask_ctrl, mask_freeze_mask, 1); + /* + * In case we cannot read from cur_dtemp / dtemp_0, + * then we read from the last valid temp read + */ + reg = tsr->ctrl_dtemp_1; + } + + /* read temperature */ + temp = ti_bandgap_readl(bgp, reg); + temp &= tsr->bgap_dtemp_mask; + + if (TI_BANDGAP_HAS(bgp, FREEZE_BIT)) + RMW_BITS(bgp, id, bgap_mask_ctrl, mask_freeze_mask, 0); + + return temp; +} + +/*** IRQ handlers ***/ + +/** + * ti_bandgap_talert_irq_handler() - handles Temperature alert IRQs + * @irq: IRQ number + * @data: private data (struct ti_bandgap *) + * + * This is the Talert handler. Use it only if bandgap device features + * HAS(TALERT). This handler goes over all sensors and checks their + * conditions and acts accordingly. In case there are events pending, + * it will reset the event mask to wait for the opposite event (next event). + * Every time there is a new event, it will be reported to thermal layer. + * + * Return: IRQ_HANDLED + */ +static irqreturn_t ti_bandgap_talert_irq_handler(int irq, void *data) +{ + struct ti_bandgap *bgp = data; + struct temp_sensor_registers *tsr; + u32 t_hot = 0, t_cold = 0, ctrl; + int i; + + spin_lock(&bgp->lock); + for (i = 0; i < bgp->conf->sensor_count; i++) { + tsr = bgp->conf->sensors[i].registers; + ctrl = ti_bandgap_readl(bgp, tsr->bgap_status); + + /* Read the status of t_hot */ + t_hot = ctrl & tsr->status_hot_mask; + + /* Read the status of t_cold */ + t_cold = ctrl & tsr->status_cold_mask; + + if (!t_cold && !t_hot) + continue; + + ctrl = ti_bandgap_readl(bgp, tsr->bgap_mask_ctrl); + /* + * One TALERT interrupt: Two sources + * If the interrupt is due to t_hot then mask t_hot and + * and unmask t_cold else mask t_cold and unmask t_hot + */ + if (t_hot) { + ctrl &= ~tsr->mask_hot_mask; + ctrl |= tsr->mask_cold_mask; + } else if (t_cold) { + ctrl &= ~tsr->mask_cold_mask; + ctrl |= tsr->mask_hot_mask; + } + + ti_bandgap_writel(bgp, ctrl, tsr->bgap_mask_ctrl); + + dev_dbg(bgp->dev, + "%s: IRQ from %s sensor: hotevent %d coldevent %d\n", + __func__, bgp->conf->sensors[i].domain, + t_hot, t_cold); + + /* report temperature to whom may concern */ + if (bgp->conf->report_temperature) + bgp->conf->report_temperature(bgp, i); + } + spin_unlock(&bgp->lock); + + return IRQ_HANDLED; +} + +/** + * ti_bandgap_tshut_irq_handler() - handles Temperature shutdown signal + * @irq: IRQ number + * @data: private data (unused) + * + * This is the Tshut handler. Use it only if bandgap device features + * HAS(TSHUT). If any sensor fires the Tshut signal, we simply shutdown + * the system. + * + * Return: IRQ_HANDLED + */ +static irqreturn_t ti_bandgap_tshut_irq_handler(int irq, void *data) +{ + pr_emerg("%s: TSHUT temperature reached. Needs shut down...\n", + __func__); + + orderly_poweroff(true); + + return IRQ_HANDLED; +} + +/*** Helper functions which manipulate conversion ADC <-> mi Celsius ***/ + +/** + * ti_bandgap_adc_to_mcelsius() - converts an ADC value to mCelsius scale + * @bgp: struct ti_bandgap pointer + * @adc_val: value in ADC representation + * @t: address where to write the resulting temperature in mCelsius + * + * Simple conversion from ADC representation to mCelsius. In case the ADC value + * is out of the ADC conv table range, it returns -ERANGE, 0 on success. + * The conversion table is indexed by the ADC values. + * + * Return: 0 if conversion was successful, else -ERANGE in case the @adc_val + * argument is out of the ADC conv table range. + */ +static +int ti_bandgap_adc_to_mcelsius(struct ti_bandgap *bgp, int adc_val, int *t) +{ + const struct ti_bandgap_data *conf = bgp->conf; + int ret = 0; + + /* look up for temperature in the table and return the temperature */ + if (adc_val < conf->adc_start_val || adc_val > conf->adc_end_val) { + ret = -ERANGE; + goto exit; + } + + *t = bgp->conf->conv_table[adc_val - conf->adc_start_val]; + +exit: + return ret; +} + +/** + * ti_bandgap_mcelsius_to_adc() - converts a mCelsius value to ADC scale + * @bgp: struct ti_bandgap pointer + * @temp: value in mCelsius + * @adc: address where to write the resulting temperature in ADC representation + * + * Simple conversion from mCelsius to ADC values. In case the temp value + * is out of the ADC conv table range, it returns -ERANGE, 0 on success. + * The conversion table is indexed by the ADC values. + * + * Return: 0 if conversion was successful, else -ERANGE in case the @temp + * argument is out of the ADC conv table range. + */ +static +int ti_bandgap_mcelsius_to_adc(struct ti_bandgap *bgp, long temp, int *adc) +{ + const struct ti_bandgap_data *conf = bgp->conf; + const int *conv_table = bgp->conf->conv_table; + int high, low, mid, ret = 0; + + low = 0; + high = conf->adc_end_val - conf->adc_start_val; + mid = (high + low) / 2; + + if (temp < conv_table[low] || temp > conv_table[high]) { + ret = -ERANGE; + goto exit; + } + + while (low < high) { + if (temp < conv_table[mid]) + high = mid - 1; + else + low = mid + 1; + mid = (low + high) / 2; + } + + *adc = conf->adc_start_val + low; + +exit: + return ret; +} + +/** + * ti_bandgap_add_hyst() - add hysteresis (in mCelsius) to an ADC value + * @bgp: struct ti_bandgap pointer + * @adc_val: temperature value in ADC representation + * @hyst_val: hysteresis value in mCelsius + * @sum: address where to write the resulting temperature (in ADC scale) + * + * Adds an hysteresis value (in mCelsius) to a ADC temperature value. + * + * Return: 0 on success, -ERANGE otherwise. + */ +static +int ti_bandgap_add_hyst(struct ti_bandgap *bgp, int adc_val, int hyst_val, + u32 *sum) +{ + int temp, ret; + + /* + * Need to add in the mcelsius domain, so we have a temperature + * the conv_table range + */ + ret = ti_bandgap_adc_to_mcelsius(bgp, adc_val, &temp); + if (ret < 0) + goto exit; + + temp += hyst_val; + + ret = ti_bandgap_mcelsius_to_adc(bgp, temp, sum); + +exit: + return ret; +} + +/*** Helper functions handling device Alert/Shutdown signals ***/ + +/** + * ti_bandgap_unmask_interrupts() - unmasks the events of thot & tcold + * @bgp: struct ti_bandgap pointer + * @id: bandgap sensor id + * @t_hot: hot temperature value to trigger alert signal + * @t_cold: cold temperature value to trigger alert signal + * + * Checks the requested t_hot and t_cold values and configures the IRQ event + * masks accordingly. Call this function only if bandgap features HAS(TALERT). + */ +static void ti_bandgap_unmask_interrupts(struct ti_bandgap *bgp, int id, + u32 t_hot, u32 t_cold) +{ + struct temp_sensor_registers *tsr; + u32 temp, reg_val; + + /* Read the current on die temperature */ + temp = ti_bandgap_read_temp(bgp, id); + + tsr = bgp->conf->sensors[id].registers; + reg_val = ti_bandgap_readl(bgp, tsr->bgap_mask_ctrl); + + if (temp < t_hot) + reg_val |= tsr->mask_hot_mask; + else + reg_val &= ~tsr->mask_hot_mask; + + if (t_cold < temp) + reg_val |= tsr->mask_cold_mask; + else + reg_val &= ~tsr->mask_cold_mask; + ti_bandgap_writel(bgp, reg_val, tsr->bgap_mask_ctrl); +} + +/** + * ti_bandgap_update_alert_threshold() - sequence to update thresholds + * @bgp: struct ti_bandgap pointer + * @id: bandgap sensor id + * @val: value (ADC) of a new threshold + * @hot: desired threshold to be updated. true if threshold hot, false if + * threshold cold + * + * It will program the required thresholds (hot and cold) for TALERT signal. + * This function can be used to update t_hot or t_cold, depending on @hot value. + * It checks the resulting t_hot and t_cold values, based on the new passed @val + * and configures the thresholds so that t_hot is always greater than t_cold. + * Call this function only if bandgap features HAS(TALERT). + * + * Return: 0 if no error, else corresponding error + */ +static int ti_bandgap_update_alert_threshold(struct ti_bandgap *bgp, int id, + int val, bool hot) +{ + struct temp_sensor_data *ts_data = bgp->conf->sensors[id].ts_data; + struct temp_sensor_registers *tsr; + u32 thresh_val, reg_val, t_hot, t_cold; + int err = 0; + + tsr = bgp->conf->sensors[id].registers; + + /* obtain the current value */ + thresh_val = ti_bandgap_readl(bgp, tsr->bgap_threshold); + t_cold = (thresh_val & tsr->threshold_tcold_mask) >> + __ffs(tsr->threshold_tcold_mask); + t_hot = (thresh_val & tsr->threshold_thot_mask) >> + __ffs(tsr->threshold_thot_mask); + if (hot) + t_hot = val; + else + t_cold = val; + + if (t_cold > t_hot) { + if (hot) + err = ti_bandgap_add_hyst(bgp, t_hot, + -ts_data->hyst_val, + &t_cold); + else + err = ti_bandgap_add_hyst(bgp, t_cold, + ts_data->hyst_val, + &t_hot); + } + + /* write the new threshold values */ + reg_val = thresh_val & + ~(tsr->threshold_thot_mask | tsr->threshold_tcold_mask); + reg_val |= (t_hot << __ffs(tsr->threshold_thot_mask)) | + (t_cold << __ffs(tsr->threshold_tcold_mask)); + ti_bandgap_writel(bgp, reg_val, tsr->bgap_threshold); + + if (err) { + dev_err(bgp->dev, "failed to reprogram thot threshold\n"); + err = -EIO; + goto exit; + } + + ti_bandgap_unmask_interrupts(bgp, id, t_hot, t_cold); +exit: + return err; +} + +/** + * ti_bandgap_validate() - helper to check the sanity of a struct ti_bandgap + * @bgp: struct ti_bandgap pointer + * @id: bandgap sensor id + * + * Checks if the bandgap pointer is valid and if the sensor id is also + * applicable. + * + * Return: 0 if no errors, -EINVAL for invalid @bgp pointer or -ERANGE if + * @id cannot index @bgp sensors. + */ +static inline int ti_bandgap_validate(struct ti_bandgap *bgp, int id) +{ + int ret = 0; + + if (!bgp || IS_ERR(bgp)) { + pr_err("%s: invalid bandgap pointer\n", __func__); + ret = -EINVAL; + goto exit; + } + + if ((id < 0) || (id >= bgp->conf->sensor_count)) { + dev_err(bgp->dev, "%s: sensor id out of range (%d)\n", + __func__, id); + ret = -ERANGE; + } + +exit: + return ret; +} + +/** + * _ti_bandgap_write_threshold() - helper to update TALERT t_cold or t_hot + * @bgp: struct ti_bandgap pointer + * @id: bandgap sensor id + * @val: value (mCelsius) of a new threshold + * @hot: desired threshold to be updated. true if threshold hot, false if + * threshold cold + * + * It will update the required thresholds (hot and cold) for TALERT signal. + * This function can be used to update t_hot or t_cold, depending on @hot value. + * Validates the mCelsius range and update the requested threshold. + * Call this function only if bandgap features HAS(TALERT). + * + * Return: 0 if no error, else corresponding error value. + */ +static int _ti_bandgap_write_threshold(struct ti_bandgap *bgp, int id, int val, + bool hot) +{ + struct temp_sensor_data *ts_data; + struct temp_sensor_registers *tsr; + u32 adc_val; + int ret; + + ret = ti_bandgap_validate(bgp, id); + if (ret) + goto exit; + + if (!TI_BANDGAP_HAS(bgp, TALERT)) { + ret = -ENOTSUPP; + goto exit; + } + + ts_data = bgp->conf->sensors[id].ts_data; + tsr = bgp->conf->sensors[id].registers; + if (hot) { + if (val < ts_data->min_temp + ts_data->hyst_val) + ret = -EINVAL; + } else { + if (val > ts_data->max_temp + ts_data->hyst_val) + ret = -EINVAL; + } + + if (ret) + goto exit; + + ret = ti_bandgap_mcelsius_to_adc(bgp, val, &adc_val); + if (ret < 0) + goto exit; + + spin_lock(&bgp->lock); + ret = ti_bandgap_update_alert_threshold(bgp, id, adc_val, hot); + spin_unlock(&bgp->lock); + +exit: + return ret; +} + +/** + * _ti_bandgap_read_threshold() - helper to read TALERT t_cold or t_hot + * @bgp: struct ti_bandgap pointer + * @id: bandgap sensor id + * @val: value (mCelsius) of a threshold + * @hot: desired threshold to be read. true if threshold hot, false if + * threshold cold + * + * It will fetch the required thresholds (hot and cold) for TALERT signal. + * This function can be used to read t_hot or t_cold, depending on @hot value. + * Call this function only if bandgap features HAS(TALERT). + * + * Return: 0 if no error, -ENOTSUPP if it has no TALERT support, or the + * corresponding error value if some operation fails. + */ +static int _ti_bandgap_read_threshold(struct ti_bandgap *bgp, int id, + int *val, bool hot) +{ + struct temp_sensor_registers *tsr; + u32 temp, mask; + int ret = 0; + + ret = ti_bandgap_validate(bgp, id); + if (ret) + goto exit; + + if (!TI_BANDGAP_HAS(bgp, TALERT)) { + ret = -ENOTSUPP; + goto exit; + } + + tsr = bgp->conf->sensors[id].registers; + if (hot) + mask = tsr->threshold_thot_mask; + else + mask = tsr->threshold_tcold_mask; + + temp = ti_bandgap_readl(bgp, tsr->bgap_threshold); + temp = (temp & mask) >> __ffs(mask); + ret |= ti_bandgap_adc_to_mcelsius(bgp, temp, &temp); + if (ret) { + dev_err(bgp->dev, "failed to read thot\n"); + ret = -EIO; + goto exit; + } + + *val = temp; + +exit: + return ret; +} + +/*** Exposed APIs ***/ + +/** + * ti_bandgap_read_thot() - reads sensor current thot + * @bgp: pointer to bandgap instance + * @id: sensor id + * @thot: resulting current thot value + * + * Return: 0 on success or the proper error code + */ +int ti_bandgap_read_thot(struct ti_bandgap *bgp, int id, int *thot) +{ + return _ti_bandgap_read_threshold(bgp, id, thot, true); +} + +/** + * ti_bandgap_write_thot() - sets sensor current thot + * @bgp: pointer to bandgap instance + * @id: sensor id + * @val: desired thot value + * + * Return: 0 on success or the proper error code + */ +int ti_bandgap_write_thot(struct ti_bandgap *bgp, int id, int val) +{ + return _ti_bandgap_write_threshold(bgp, id, val, true); +} + +/** + * ti_bandgap_read_tcold() - reads sensor current tcold + * @bgp: pointer to bandgap instance + * @id: sensor id + * @tcold: resulting current tcold value + * + * Return: 0 on success or the proper error code + */ +int ti_bandgap_read_tcold(struct ti_bandgap *bgp, int id, int *tcold) +{ + return _ti_bandgap_read_threshold(bgp, id, tcold, false); +} + +/** + * ti_bandgap_write_tcold() - sets the sensor tcold + * @bgp: pointer to bandgap instance + * @id: sensor id + * @val: desired tcold value + * + * Return: 0 on success or the proper error code + */ +int ti_bandgap_write_tcold(struct ti_bandgap *bgp, int id, int val) +{ + return _ti_bandgap_write_threshold(bgp, id, val, false); +} + +/** + * ti_bandgap_read_counter() - read the sensor counter + * @bgp: pointer to bandgap instance + * @id: sensor id + * @interval: resulting update interval in miliseconds + */ +static void ti_bandgap_read_counter(struct ti_bandgap *bgp, int id, + int *interval) +{ + struct temp_sensor_registers *tsr; + int time; + + tsr = bgp->conf->sensors[id].registers; + time = ti_bandgap_readl(bgp, tsr->bgap_counter); + time = (time & tsr->counter_mask) >> + __ffs(tsr->counter_mask); + time = time * 1000 / bgp->clk_rate; + *interval = time; +} + +/** + * ti_bandgap_read_counter_delay() - read the sensor counter delay + * @bgp: pointer to bandgap instance + * @id: sensor id + * @interval: resulting update interval in miliseconds + */ +static void ti_bandgap_read_counter_delay(struct ti_bandgap *bgp, int id, + int *interval) +{ + struct temp_sensor_registers *tsr; + int reg_val; + + tsr = bgp->conf->sensors[id].registers; + + reg_val = ti_bandgap_readl(bgp, tsr->bgap_mask_ctrl); + reg_val = (reg_val & tsr->mask_counter_delay_mask) >> + __ffs(tsr->mask_counter_delay_mask); + switch (reg_val) { + case 0: + *interval = 0; + break; + case 1: + *interval = 1; + break; + case 2: + *interval = 10; + break; + case 3: + *interval = 100; + break; + case 4: + *interval = 250; + break; + case 5: + *interval = 500; + break; + default: + dev_warn(bgp->dev, "Wrong counter delay value read from register %X", + reg_val); + } +} + +/** + * ti_bandgap_read_update_interval() - read the sensor update interval + * @bgp: pointer to bandgap instance + * @id: sensor id + * @interval: resulting update interval in miliseconds + * + * Return: 0 on success or the proper error code + */ +int ti_bandgap_read_update_interval(struct ti_bandgap *bgp, int id, + int *interval) +{ + int ret = 0; + + ret = ti_bandgap_validate(bgp, id); + if (ret) + goto exit; + + if (!TI_BANDGAP_HAS(bgp, COUNTER) && + !TI_BANDGAP_HAS(bgp, COUNTER_DELAY)) { + ret = -ENOTSUPP; + goto exit; + } + + if (TI_BANDGAP_HAS(bgp, COUNTER)) { + ti_bandgap_read_counter(bgp, id, interval); + goto exit; + } + + ti_bandgap_read_counter_delay(bgp, id, interval); +exit: + return ret; +} + +/** + * ti_bandgap_write_counter_delay() - set the counter_delay + * @bgp: pointer to bandgap instance + * @id: sensor id + * @interval: desired update interval in miliseconds + * + * Return: 0 on success or the proper error code + */ +static int ti_bandgap_write_counter_delay(struct ti_bandgap *bgp, int id, + u32 interval) +{ + int rval; + + switch (interval) { + case 0: /* Immediate conversion */ + rval = 0x0; + break; + case 1: /* Conversion after ever 1ms */ + rval = 0x1; + break; + case 10: /* Conversion after ever 10ms */ + rval = 0x2; + break; + case 100: /* Conversion after ever 100ms */ + rval = 0x3; + break; + case 250: /* Conversion after ever 250ms */ + rval = 0x4; + break; + case 500: /* Conversion after ever 500ms */ + rval = 0x5; + break; + default: + dev_warn(bgp->dev, "Delay %d ms is not supported\n", interval); + return -EINVAL; + } + + spin_lock(&bgp->lock); + RMW_BITS(bgp, id, bgap_mask_ctrl, mask_counter_delay_mask, rval); + spin_unlock(&bgp->lock); + + return 0; +} + +/** + * ti_bandgap_write_counter() - set the bandgap sensor counter + * @bgp: pointer to bandgap instance + * @id: sensor id + * @interval: desired update interval in miliseconds + */ +static void ti_bandgap_write_counter(struct ti_bandgap *bgp, int id, + u32 interval) +{ + interval = interval * bgp->clk_rate / 1000; + spin_lock(&bgp->lock); + RMW_BITS(bgp, id, bgap_counter, counter_mask, interval); + spin_unlock(&bgp->lock); +} + +/** + * ti_bandgap_write_update_interval() - set the update interval + * @bgp: pointer to bandgap instance + * @id: sensor id + * @interval: desired update interval in miliseconds + * + * Return: 0 on success or the proper error code + */ +int ti_bandgap_write_update_interval(struct ti_bandgap *bgp, + int id, u32 interval) +{ + int ret = ti_bandgap_validate(bgp, id); + if (ret) + goto exit; + + if (!TI_BANDGAP_HAS(bgp, COUNTER) && + !TI_BANDGAP_HAS(bgp, COUNTER_DELAY)) { + ret = -ENOTSUPP; + goto exit; + } + + if (TI_BANDGAP_HAS(bgp, COUNTER)) { + ti_bandgap_write_counter(bgp, id, interval); + goto exit; + } + + ret = ti_bandgap_write_counter_delay(bgp, id, interval); +exit: + return ret; +} + +/** + * ti_bandgap_read_temperature() - report current temperature + * @bgp: pointer to bandgap instance + * @id: sensor id + * @temperature: resulting temperature + * + * Return: 0 on success or the proper error code + */ +int ti_bandgap_read_temperature(struct ti_bandgap *bgp, int id, + int *temperature) +{ + u32 temp; + int ret; + + ret = ti_bandgap_validate(bgp, id); + if (ret) + return ret; + + spin_lock(&bgp->lock); + temp = ti_bandgap_read_temp(bgp, id); + spin_unlock(&bgp->lock); + + ret |= ti_bandgap_adc_to_mcelsius(bgp, temp, &temp); + if (ret) + return -EIO; + + *temperature = temp; + + return 0; +} + +/** + * ti_bandgap_set_sensor_data() - helper function to store thermal + * framework related data. + * @bgp: pointer to bandgap instance + * @id: sensor id + * @data: thermal framework related data to be stored + * + * Return: 0 on success or the proper error code + */ +int ti_bandgap_set_sensor_data(struct ti_bandgap *bgp, int id, void *data) +{ + int ret = ti_bandgap_validate(bgp, id); + if (ret) + return ret; + + bgp->regval[id].data = data; + + return 0; +} + +/** + * ti_bandgap_get_sensor_data() - helper function to get thermal + * framework related data. + * @bgp: pointer to bandgap instance + * @id: sensor id + * + * Return: data stored by set function with sensor id on success or NULL + */ +void *ti_bandgap_get_sensor_data(struct ti_bandgap *bgp, int id) +{ + int ret = ti_bandgap_validate(bgp, id); + if (ret) + return ERR_PTR(ret); + + return bgp->regval[id].data; +} + +/*** Helper functions used during device initialization ***/ + +/** + * ti_bandgap_force_single_read() - executes 1 single ADC conversion + * @bgp: pointer to struct ti_bandgap + * @id: sensor id which it is desired to read 1 temperature + * + * Used to initialize the conversion state machine and set it to a valid + * state. Called during device initialization and context restore events. + * + * Return: 0 + */ +static int +ti_bandgap_force_single_read(struct ti_bandgap *bgp, int id) +{ + u32 temp = 0, counter = 1000; + + /* Select single conversion mode */ + if (TI_BANDGAP_HAS(bgp, MODE_CONFIG)) + RMW_BITS(bgp, id, bgap_mode_ctrl, mode_ctrl_mask, 0); + + /* Start of Conversion = 1 */ + RMW_BITS(bgp, id, temp_sensor_ctrl, bgap_soc_mask, 1); + /* Wait until DTEMP is updated */ + temp = ti_bandgap_read_temp(bgp, id); + + while ((temp == 0) && --counter) + temp = ti_bandgap_read_temp(bgp, id); + /* REVISIT: Check correct condition for end of conversion */ + + /* Start of Conversion = 0 */ + RMW_BITS(bgp, id, temp_sensor_ctrl, bgap_soc_mask, 0); + + return 0; +} + +/** + * ti_bandgap_set_continous_mode() - One time enabling of continuous mode + * @bgp: pointer to struct ti_bandgap + * + * Call this function only if HAS(MODE_CONFIG) is set. As this driver may + * be used for junction temperature monitoring, it is desirable that the + * sensors are operational all the time, so that alerts are generated + * properly. + * + * Return: 0 + */ +static int ti_bandgap_set_continuous_mode(struct ti_bandgap *bgp) +{ + int i; + + for (i = 0; i < bgp->conf->sensor_count; i++) { + /* Perform a single read just before enabling continuous */ + ti_bandgap_force_single_read(bgp, i); + RMW_BITS(bgp, i, bgap_mode_ctrl, mode_ctrl_mask, 1); + } + + return 0; +} + +/** + * ti_bandgap_get_trend() - To fetch the temperature trend of a sensor + * @bgp: pointer to struct ti_bandgap + * @id: id of the individual sensor + * @trend: Pointer to trend. + * + * This function needs to be called to fetch the temperature trend of a + * Particular sensor. The function computes the difference in temperature + * w.r.t time. For the bandgaps with built in history buffer the temperatures + * are read from the buffer and for those without the Buffer -ENOTSUPP is + * returned. + * + * Return: 0 if no error, else return corresponding error. If no + * error then the trend value is passed on to trend parameter + */ +int ti_bandgap_get_trend(struct ti_bandgap *bgp, int id, int *trend) +{ + struct temp_sensor_registers *tsr; + u32 temp1, temp2, reg1, reg2; + int t1, t2, interval, ret = 0; + + ret = ti_bandgap_validate(bgp, id); + if (ret) + goto exit; + + if (!TI_BANDGAP_HAS(bgp, HISTORY_BUFFER) || + !TI_BANDGAP_HAS(bgp, FREEZE_BIT)) { + ret = -ENOTSUPP; + goto exit; + } + + spin_lock(&bgp->lock); + + tsr = bgp->conf->sensors[id].registers; + + /* Freeze and read the last 2 valid readings */ + RMW_BITS(bgp, id, bgap_mask_ctrl, mask_freeze_mask, 1); + reg1 = tsr->ctrl_dtemp_1; + reg2 = tsr->ctrl_dtemp_2; + + /* read temperature from history buffer */ + temp1 = ti_bandgap_readl(bgp, reg1); + temp1 &= tsr->bgap_dtemp_mask; + + temp2 = ti_bandgap_readl(bgp, reg2); + temp2 &= tsr->bgap_dtemp_mask; + + /* Convert from adc values to mCelsius temperature */ + ret = ti_bandgap_adc_to_mcelsius(bgp, temp1, &t1); + if (ret) + goto unfreeze; + + ret = ti_bandgap_adc_to_mcelsius(bgp, temp2, &t2); + if (ret) + goto unfreeze; + + /* Fetch the update interval */ + ret = ti_bandgap_read_update_interval(bgp, id, &interval); + if (ret || !interval) + goto unfreeze; + + *trend = (t1 - t2) / interval; + + dev_dbg(bgp->dev, "The temperatures are t1 = %d and t2 = %d and trend =%d\n", + t1, t2, *trend); + +unfreeze: + RMW_BITS(bgp, id, bgap_mask_ctrl, mask_freeze_mask, 0); + spin_unlock(&bgp->lock); +exit: + return ret; +} + +/** + * ti_bandgap_tshut_init() - setup and initialize tshut handling + * @bgp: pointer to struct ti_bandgap + * @pdev: pointer to device struct platform_device + * + * Call this function only in case the bandgap features HAS(TSHUT). + * In this case, the driver needs to handle the TSHUT signal as an IRQ. + * The IRQ is wired as a GPIO, and for this purpose, it is required + * to specify which GPIO line is used. TSHUT IRQ is fired anytime + * one of the bandgap sensors violates the TSHUT high/hot threshold. + * And in that case, the system must go off. + * + * Return: 0 if no error, else error status + */ +static int ti_bandgap_tshut_init(struct ti_bandgap *bgp, + struct platform_device *pdev) +{ + int gpio_nr = bgp->tshut_gpio; + int status; + + /* Request for gpio_86 line */ + status = gpio_request(gpio_nr, "tshut"); + if (status < 0) { + dev_err(bgp->dev, "Could not request for TSHUT GPIO:%i\n", 86); + return status; + } + status = gpio_direction_input(gpio_nr); + if (status) { + dev_err(bgp->dev, "Cannot set input TSHUT GPIO %d\n", gpio_nr); + return status; + } + + status = request_irq(gpio_to_irq(gpio_nr), ti_bandgap_tshut_irq_handler, + IRQF_TRIGGER_RISING, "tshut", NULL); + if (status) { + gpio_free(gpio_nr); + dev_err(bgp->dev, "request irq failed for TSHUT"); + } + + return 0; +} + +/** + * ti_bandgap_alert_init() - setup and initialize talert handling + * @bgp: pointer to struct ti_bandgap + * @pdev: pointer to device struct platform_device + * + * Call this function only in case the bandgap features HAS(TALERT). + * In this case, the driver needs to handle the TALERT signals as an IRQs. + * TALERT is a normal IRQ and it is fired any time thresholds (hot or cold) + * are violated. In these situation, the driver must reprogram the thresholds, + * accordingly to specified policy. + * + * Return: 0 if no error, else return corresponding error. + */ +static int ti_bandgap_talert_init(struct ti_bandgap *bgp, + struct platform_device *pdev) +{ + int ret; + + bgp->irq = platform_get_irq(pdev, 0); + if (bgp->irq < 0) { + dev_err(&pdev->dev, "get_irq failed\n"); + return bgp->irq; + } + ret = request_threaded_irq(bgp->irq, NULL, + ti_bandgap_talert_irq_handler, + IRQF_TRIGGER_HIGH | IRQF_ONESHOT, + "talert", bgp); + if (ret) { + dev_err(&pdev->dev, "Request threaded irq failed.\n"); + return ret; + } + + return 0; +} + +static const struct of_device_id of_ti_bandgap_match[]; +/** + * ti_bandgap_build() - parse DT and setup a struct ti_bandgap + * @pdev: pointer to device struct platform_device + * + * Used to read the device tree properties accordingly to the bandgap + * matching version. Based on bandgap version and its capabilities it + * will build a struct ti_bandgap out of the required DT entries. + * + * Return: valid bandgap structure if successful, else returns ERR_PTR + * return value must be verified with IS_ERR. + */ +static struct ti_bandgap *ti_bandgap_build(struct platform_device *pdev) +{ + struct device_node *node = pdev->dev.of_node; + const struct of_device_id *of_id; + struct ti_bandgap *bgp; + struct resource *res; + int i; + + /* just for the sake */ + if (!node) { + dev_err(&pdev->dev, "no platform information available\n"); + return ERR_PTR(-EINVAL); + } + + bgp = devm_kzalloc(&pdev->dev, sizeof(*bgp), GFP_KERNEL); + if (!bgp) { + dev_err(&pdev->dev, "Unable to allocate mem for driver ref\n"); + return ERR_PTR(-ENOMEM); + } + + of_id = of_match_device(of_ti_bandgap_match, &pdev->dev); + if (of_id) + bgp->conf = of_id->data; + + /* register shadow for context save and restore */ + bgp->regval = devm_kzalloc(&pdev->dev, sizeof(*bgp->regval) * + bgp->conf->sensor_count, GFP_KERNEL); + if (!bgp) { + dev_err(&pdev->dev, "Unable to allocate mem for driver ref\n"); + return ERR_PTR(-ENOMEM); + } + + i = 0; + do { + void __iomem *chunk; + + res = platform_get_resource(pdev, IORESOURCE_MEM, i); + if (!res) + break; + chunk = devm_ioremap_resource(&pdev->dev, res); + if (i == 0) + bgp->base = chunk; + if (IS_ERR(chunk)) + return ERR_CAST(chunk); + + i++; + } while (res); + + if (TI_BANDGAP_HAS(bgp, TSHUT)) { + bgp->tshut_gpio = of_get_gpio(node, 0); + if (!gpio_is_valid(bgp->tshut_gpio)) { + dev_err(&pdev->dev, "invalid gpio for tshut (%d)\n", + bgp->tshut_gpio); + return ERR_PTR(-EINVAL); + } + } + + return bgp; +} + +/*** Device driver call backs ***/ + +static +int ti_bandgap_probe(struct platform_device *pdev) +{ + struct ti_bandgap *bgp; + int clk_rate, ret = 0, i; + + bgp = ti_bandgap_build(pdev); + if (IS_ERR(bgp)) { + dev_err(&pdev->dev, "failed to fetch platform data\n"); + return PTR_ERR(bgp); + } + bgp->dev = &pdev->dev; + + if (TI_BANDGAP_HAS(bgp, TSHUT)) { + ret = ti_bandgap_tshut_init(bgp, pdev); + if (ret) { + dev_err(&pdev->dev, + "failed to initialize system tshut IRQ\n"); + return ret; + } + } + + bgp->fclock = clk_get(NULL, bgp->conf->fclock_name); + ret = IS_ERR(bgp->fclock); + if (ret) { + dev_err(&pdev->dev, "failed to request fclock reference\n"); + ret = PTR_ERR(bgp->fclock); + goto free_irqs; + } + + bgp->div_clk = clk_get(NULL, bgp->conf->div_ck_name); + ret = IS_ERR(bgp->div_clk); + if (ret) { + dev_err(&pdev->dev, + "failed to request div_ts_ck clock ref\n"); + ret = PTR_ERR(bgp->div_clk); + goto free_irqs; + } + + for (i = 0; i < bgp->conf->sensor_count; i++) { + struct temp_sensor_registers *tsr; + u32 val; + + tsr = bgp->conf->sensors[i].registers; + /* + * check if the efuse has a non-zero value if not + * it is an untrimmed sample and the temperatures + * may not be accurate + */ + val = ti_bandgap_readl(bgp, tsr->bgap_efuse); + if (ret || !val) + dev_info(&pdev->dev, + "Non-trimmed BGAP, Temp not accurate\n"); + } + + clk_rate = clk_round_rate(bgp->div_clk, + bgp->conf->sensors[0].ts_data->max_freq); + if (clk_rate < bgp->conf->sensors[0].ts_data->min_freq || + clk_rate == 0xffffffff) { + ret = -ENODEV; + dev_err(&pdev->dev, "wrong clock rate (%d)\n", clk_rate); + goto put_clks; + } + + ret = clk_set_rate(bgp->div_clk, clk_rate); + if (ret) + dev_err(&pdev->dev, "Cannot re-set clock rate. Continuing\n"); + + bgp->clk_rate = clk_rate; + if (TI_BANDGAP_HAS(bgp, CLK_CTRL)) + clk_prepare_enable(bgp->fclock); + + + spin_lock_init(&bgp->lock); + bgp->dev = &pdev->dev; + platform_set_drvdata(pdev, bgp); + + ti_bandgap_power(bgp, true); + + /* Set default counter to 1 for now */ + if (TI_BANDGAP_HAS(bgp, COUNTER)) + for (i = 0; i < bgp->conf->sensor_count; i++) + RMW_BITS(bgp, i, bgap_counter, counter_mask, 1); + + /* Set default thresholds for alert and shutdown */ + for (i = 0; i < bgp->conf->sensor_count; i++) { + struct temp_sensor_data *ts_data; + + ts_data = bgp->conf->sensors[i].ts_data; + + if (TI_BANDGAP_HAS(bgp, TALERT)) { + /* Set initial Talert thresholds */ + RMW_BITS(bgp, i, bgap_threshold, + threshold_tcold_mask, ts_data->t_cold); + RMW_BITS(bgp, i, bgap_threshold, + threshold_thot_mask, ts_data->t_hot); + /* Enable the alert events */ + RMW_BITS(bgp, i, bgap_mask_ctrl, mask_hot_mask, 1); + RMW_BITS(bgp, i, bgap_mask_ctrl, mask_cold_mask, 1); + } + + if (TI_BANDGAP_HAS(bgp, TSHUT_CONFIG)) { + /* Set initial Tshut thresholds */ + RMW_BITS(bgp, i, tshut_threshold, + tshut_hot_mask, ts_data->tshut_hot); + RMW_BITS(bgp, i, tshut_threshold, + tshut_cold_mask, ts_data->tshut_cold); + } + } + + if (TI_BANDGAP_HAS(bgp, MODE_CONFIG)) + ti_bandgap_set_continuous_mode(bgp); + + /* Set .250 seconds time as default counter */ + if (TI_BANDGAP_HAS(bgp, COUNTER)) + for (i = 0; i < bgp->conf->sensor_count; i++) + RMW_BITS(bgp, i, bgap_counter, counter_mask, + bgp->clk_rate / 4); + + /* Every thing is good? Then expose the sensors */ + for (i = 0; i < bgp->conf->sensor_count; i++) { + char *domain; + + if (bgp->conf->sensors[i].register_cooling) { + ret = bgp->conf->sensors[i].register_cooling(bgp, i); + if (ret) + goto remove_sensors; + } + + if (bgp->conf->expose_sensor) { + domain = bgp->conf->sensors[i].domain; + ret = bgp->conf->expose_sensor(bgp, i, domain); + if (ret) + goto remove_last_cooling; + } + } + + /* + * Enable the Interrupts once everything is set. Otherwise irq handler + * might be called as soon as it is enabled where as rest of framework + * is still getting initialised. + */ + if (TI_BANDGAP_HAS(bgp, TALERT)) { + ret = ti_bandgap_talert_init(bgp, pdev); + if (ret) { + dev_err(&pdev->dev, "failed to initialize Talert IRQ\n"); + i = bgp->conf->sensor_count; + goto disable_clk; + } + } + + return 0; + +remove_last_cooling: + if (bgp->conf->sensors[i].unregister_cooling) + bgp->conf->sensors[i].unregister_cooling(bgp, i); +remove_sensors: + for (i--; i >= 0; i--) { + if (bgp->conf->sensors[i].unregister_cooling) + bgp->conf->sensors[i].unregister_cooling(bgp, i); + if (bgp->conf->remove_sensor) + bgp->conf->remove_sensor(bgp, i); + } + ti_bandgap_power(bgp, false); +disable_clk: + if (TI_BANDGAP_HAS(bgp, CLK_CTRL)) + clk_disable_unprepare(bgp->fclock); +put_clks: + clk_put(bgp->fclock); + clk_put(bgp->div_clk); +free_irqs: + if (TI_BANDGAP_HAS(bgp, TSHUT)) { + free_irq(gpio_to_irq(bgp->tshut_gpio), NULL); + gpio_free(bgp->tshut_gpio); + } + + return ret; +} + +static +int ti_bandgap_remove(struct platform_device *pdev) +{ + struct ti_bandgap *bgp = platform_get_drvdata(pdev); + int i; + + /* First thing is to remove sensor interfaces */ + for (i = 0; i < bgp->conf->sensor_count; i++) { + if (bgp->conf->sensors[i].unregister_cooling) + bgp->conf->sensors[i].unregister_cooling(bgp, i); + + if (bgp->conf->remove_sensor) + bgp->conf->remove_sensor(bgp, i); + } + + ti_bandgap_power(bgp, false); + + if (TI_BANDGAP_HAS(bgp, CLK_CTRL)) + clk_disable_unprepare(bgp->fclock); + clk_put(bgp->fclock); + clk_put(bgp->div_clk); + + if (TI_BANDGAP_HAS(bgp, TALERT)) + free_irq(bgp->irq, bgp); + + if (TI_BANDGAP_HAS(bgp, TSHUT)) { + free_irq(gpio_to_irq(bgp->tshut_gpio), NULL); + gpio_free(bgp->tshut_gpio); + } + + return 0; +} + +#ifdef CONFIG_PM +static int ti_bandgap_save_ctxt(struct ti_bandgap *bgp) +{ + int i; + + for (i = 0; i < bgp->conf->sensor_count; i++) { + struct temp_sensor_registers *tsr; + struct temp_sensor_regval *rval; + + rval = &bgp->regval[i]; + tsr = bgp->conf->sensors[i].registers; + + if (TI_BANDGAP_HAS(bgp, MODE_CONFIG)) + rval->bg_mode_ctrl = ti_bandgap_readl(bgp, + tsr->bgap_mode_ctrl); + if (TI_BANDGAP_HAS(bgp, COUNTER)) + rval->bg_counter = ti_bandgap_readl(bgp, + tsr->bgap_counter); + if (TI_BANDGAP_HAS(bgp, TALERT)) { + rval->bg_threshold = ti_bandgap_readl(bgp, + tsr->bgap_threshold); + rval->bg_ctrl = ti_bandgap_readl(bgp, + tsr->bgap_mask_ctrl); + } + + if (TI_BANDGAP_HAS(bgp, TSHUT_CONFIG)) + rval->tshut_threshold = ti_bandgap_readl(bgp, + tsr->tshut_threshold); + } + + return 0; +} + +static int ti_bandgap_restore_ctxt(struct ti_bandgap *bgp) +{ + int i; + + for (i = 0; i < bgp->conf->sensor_count; i++) { + struct temp_sensor_registers *tsr; + struct temp_sensor_regval *rval; + u32 val = 0; + + rval = &bgp->regval[i]; + tsr = bgp->conf->sensors[i].registers; + + if (TI_BANDGAP_HAS(bgp, COUNTER)) + val = ti_bandgap_readl(bgp, tsr->bgap_counter); + + if (TI_BANDGAP_HAS(bgp, TSHUT_CONFIG)) + ti_bandgap_writel(bgp, rval->tshut_threshold, + tsr->tshut_threshold); + /* Force immediate temperature measurement and update + * of the DTEMP field + */ + ti_bandgap_force_single_read(bgp, i); + + if (TI_BANDGAP_HAS(bgp, COUNTER)) + ti_bandgap_writel(bgp, rval->bg_counter, + tsr->bgap_counter); + if (TI_BANDGAP_HAS(bgp, MODE_CONFIG)) + ti_bandgap_writel(bgp, rval->bg_mode_ctrl, + tsr->bgap_mode_ctrl); + if (TI_BANDGAP_HAS(bgp, TALERT)) { + ti_bandgap_writel(bgp, rval->bg_threshold, + tsr->bgap_threshold); + ti_bandgap_writel(bgp, rval->bg_ctrl, + tsr->bgap_mask_ctrl); + } + } + + return 0; +} + +static int ti_bandgap_suspend(struct device *dev) +{ + struct ti_bandgap *bgp = dev_get_drvdata(dev); + int err; + + err = ti_bandgap_save_ctxt(bgp); + ti_bandgap_power(bgp, false); + + if (TI_BANDGAP_HAS(bgp, CLK_CTRL)) + clk_disable_unprepare(bgp->fclock); + + return err; +} + +static int ti_bandgap_resume(struct device *dev) +{ + struct ti_bandgap *bgp = dev_get_drvdata(dev); + + if (TI_BANDGAP_HAS(bgp, CLK_CTRL)) + clk_prepare_enable(bgp->fclock); + + ti_bandgap_power(bgp, true); + + return ti_bandgap_restore_ctxt(bgp); +} +static const struct dev_pm_ops ti_bandgap_dev_pm_ops = { + SET_SYSTEM_SLEEP_PM_OPS(ti_bandgap_suspend, + ti_bandgap_resume) +}; + +#define DEV_PM_OPS (&ti_bandgap_dev_pm_ops) +#else +#define DEV_PM_OPS NULL +#endif + +static const struct of_device_id of_ti_bandgap_match[] = { +#ifdef CONFIG_OMAP4_THERMAL + { + .compatible = "ti,omap4430-bandgap", + .data = (void *)&omap4430_data, + }, + { + .compatible = "ti,omap4460-bandgap", + .data = (void *)&omap4460_data, + }, + { + .compatible = "ti,omap4470-bandgap", + .data = (void *)&omap4470_data, + }, +#endif +#ifdef CONFIG_OMAP5_THERMAL + { + .compatible = "ti,omap5430-bandgap", + .data = (void *)&omap5430_data, + }, +#endif +#ifdef CONFIG_DRA752_THERMAL + { + .compatible = "ti,dra752-bandgap", + .data = (void *)&dra752_data, + }, +#endif + /* Sentinel */ + { }, +}; +MODULE_DEVICE_TABLE(of, of_ti_bandgap_match); + +static struct platform_driver ti_bandgap_sensor_driver = { + .probe = ti_bandgap_probe, + .remove = ti_bandgap_remove, + .driver = { + .name = "ti-soc-thermal", + .pm = DEV_PM_OPS, + .of_match_table = of_ti_bandgap_match, + }, +}; + +module_platform_driver(ti_bandgap_sensor_driver); + +MODULE_DESCRIPTION("OMAP4+ bandgap temperature sensor driver"); +MODULE_LICENSE("GPL v2"); +MODULE_ALIAS("platform:ti-soc-thermal"); +MODULE_AUTHOR("Texas Instrument Inc."); diff --git a/drivers/thermal/ti-soc-thermal/ti-bandgap.h b/drivers/thermal/ti-soc-thermal/ti-bandgap.h new file mode 100644 index 0000000..b3adf72 --- /dev/null +++ b/drivers/thermal/ti-soc-thermal/ti-bandgap.h @@ -0,0 +1,408 @@ +/* + * OMAP4 Bandgap temperature sensor driver + * + * Copyright (C) 2011 Texas Instruments Incorporated - http://www.ti.com/ + * Contact: + * Eduardo Valentin + * + * This program is free software; you can redistribute it and/or + * modify it under the terms of the GNU General Public License + * version 2 as published by the Free Software Foundation. + * + * This program is distributed in the hope that it will be useful, but + * WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + * General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with this program; if not, write to the Free Software + * Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA + * 02110-1301 USA + * + */ +#ifndef __TI_BANDGAP_H +#define __TI_BANDGAP_H + +#include +#include +#include + +/** + * DOC: bandgap driver data structure + * ================================== + * + * +----------+----------------+ + * | struct temp_sensor_regval | + * +---------------------------+ + * * (Array of) + * | + * | + * +-------------------+ +-----------------+ + * | struct ti_bandgap |-->| struct device * | + * +----------+--------+ +-----------------+ + * | + * | + * V + * +------------------------+ + * | struct ti_bandgap_data | + * +------------------------+ + * | + * | + * * (Array of) + * +------------+------------------------------------------------------+ + * | +----------+------------+ +-------------------------+ | + * | | struct ti_temp_sensor |-->| struct temp_sensor_data | | + * | +-----------------------+ +------------+------------+ | + * | | | + * | + | + * | V | + * | +----------+-------------------+ | + * | | struct temp_sensor_registers | | + * | +------------------------------+ | + * | | + * +-------------------------------------------------------------------+ + * + * Above is a simple diagram describing how the data structure below + * are organized. For each bandgap device there should be a ti_bandgap_data + * containing the device instance configuration, as well as, an array of + * sensors, representing every sensor instance present in this bandgap. + */ + +/** + * struct temp_sensor_registers - descriptor to access registers and bitfields + * @temp_sensor_ctrl: TEMP_SENSOR_CTRL register offset + * @bgap_tempsoff_mask: mask to temp_sensor_ctrl.tempsoff + * @bgap_soc_mask: mask to temp_sensor_ctrl.soc + * @bgap_eocz_mask: mask to temp_sensor_ctrl.eocz + * @bgap_dtemp_mask: mask to temp_sensor_ctrl.dtemp + * @bgap_mask_ctrl: BANDGAP_MASK_CTRL register offset + * @mask_hot_mask: mask to bandgap_mask_ctrl.mask_hot + * @mask_cold_mask: mask to bandgap_mask_ctrl.mask_cold + * @mask_sidlemode_mask: mask to bandgap_mask_ctrl.mask_sidlemode + * @mask_counter_delay_mask: mask to bandgap_mask_ctrl.mask_counter_delay + * @mask_freeze_mask: mask to bandgap_mask_ctrl.mask_free + * @mask_clear_mask: mask to bandgap_mask_ctrl.mask_clear + * @mask_clear_accum_mask: mask to bandgap_mask_ctrl.mask_clear_accum + * @bgap_mode_ctrl: BANDGAP_MODE_CTRL register offset + * @mode_ctrl_mask: mask to bandgap_mode_ctrl.mode_ctrl + * @bgap_counter: BANDGAP_COUNTER register offset + * @counter_mask: mask to bandgap_counter.counter + * @bgap_threshold: BANDGAP_THRESHOLD register offset (TALERT thresholds) + * @threshold_thot_mask: mask to bandgap_threhold.thot + * @threshold_tcold_mask: mask to bandgap_threhold.tcold + * @tshut_threshold: TSHUT_THRESHOLD register offset (TSHUT thresholds) + * @tshut_efuse_mask: mask to tshut_threshold.tshut_efuse + * @tshut_efuse_shift: shift to tshut_threshold.tshut_efuse + * @tshut_hot_mask: mask to tshut_threhold.thot + * @tshut_cold_mask: mask to tshut_threhold.thot + * @bgap_status: BANDGAP_STATUS register offset + * @status_clean_stop_mask: mask to bandgap_status.clean_stop + * @status_bgap_alert_mask: mask to bandgap_status.bandgap_alert + * @status_hot_mask: mask to bandgap_status.hot + * @status_cold_mask: mask to bandgap_status.cold + * @bgap_cumul_dtemp: BANDGAP_CUMUL_DTEMP register offset + * @ctrl_dtemp_0: CTRL_DTEMP0 register offset + * @ctrl_dtemp_1: CTRL_DTEMP1 register offset + * @ctrl_dtemp_2: CTRL_DTEMP2 register offset + * @ctrl_dtemp_3: CTRL_DTEMP3 register offset + * @ctrl_dtemp_4: CTRL_DTEMP4 register offset + * @bgap_efuse: BANDGAP_EFUSE register offset + * + * The register offsets and bitfields might change across + * OMAP and variants versions. Hence this struct serves as a + * descriptor map on how to access the registers and the bitfields. + * + * This descriptor contains registers of all versions of bandgap chips. + * Not all versions will use all registers, depending on the available + * features. Please read TRMs for descriptive explanation on each bitfield. + */ + +struct temp_sensor_registers { + u32 temp_sensor_ctrl; + u32 bgap_tempsoff_mask; + u32 bgap_soc_mask; + u32 bgap_eocz_mask; /* not used: but needs revisit */ + u32 bgap_dtemp_mask; + + u32 bgap_mask_ctrl; + u32 mask_hot_mask; + u32 mask_cold_mask; + u32 mask_sidlemode_mask; /* not used: but may be needed for pm */ + u32 mask_counter_delay_mask; + u32 mask_freeze_mask; + u32 mask_clear_mask; /* not used: but needed for trending */ + u32 mask_clear_accum_mask; /* not used: but needed for trending */ + + u32 bgap_mode_ctrl; + u32 mode_ctrl_mask; + + u32 bgap_counter; + u32 counter_mask; + + u32 bgap_threshold; + u32 threshold_thot_mask; + u32 threshold_tcold_mask; + + u32 tshut_threshold; + u32 tshut_efuse_mask; /* not used */ + u32 tshut_efuse_shift; /* not used */ + u32 tshut_hot_mask; + u32 tshut_cold_mask; + + u32 bgap_status; + u32 status_clean_stop_mask; /* not used: but needed for trending */ + u32 status_bgap_alert_mask; /* not used */ + u32 status_hot_mask; + u32 status_cold_mask; + + u32 bgap_cumul_dtemp; /* not used: but needed for trending */ + u32 ctrl_dtemp_0; /* not used: but needed for trending */ + u32 ctrl_dtemp_1; /* not used: but needed for trending */ + u32 ctrl_dtemp_2; /* not used: but needed for trending */ + u32 ctrl_dtemp_3; /* not used: but needed for trending */ + u32 ctrl_dtemp_4; /* not used: but needed for trending */ + u32 bgap_efuse; +}; + +/** + * struct temp_sensor_data - The thresholds and limits for temperature sensors. + * @tshut_hot: temperature to trigger a thermal reset (initial value) + * @tshut_cold: temp to get the plat out of reset due to thermal (init val) + * @t_hot: temperature to trigger a thermal alert (high initial value) + * @t_cold: temperature to trigger a thermal alert (low initial value) + * @min_freq: sensor minimum clock rate + * @max_freq: sensor maximum clock rate + * @max_temp: sensor maximum temperature + * @min_temp: sensor minimum temperature + * @hyst_val: temperature hysteresis considered while converting ADC values + * @update_int1: update interval + * @update_int2: update interval + * + * This data structure will hold the required thresholds and temperature limits + * for a specific temperature sensor, like shutdown temperature, alert + * temperature, clock / rate used, ADC conversion limits and update intervals + */ +struct temp_sensor_data { + u32 tshut_hot; + u32 tshut_cold; + u32 t_hot; + u32 t_cold; + u32 min_freq; + u32 max_freq; + int max_temp; + int min_temp; + int hyst_val; + u32 update_int1; /* not used */ + u32 update_int2; /* not used */ +}; + +struct ti_bandgap_data; + +/** + * struct temp_sensor_regval - temperature sensor register values and priv data + * @bg_mode_ctrl: temp sensor control register value + * @bg_ctrl: bandgap ctrl register value + * @bg_counter: bandgap counter value + * @bg_threshold: bandgap threshold register value + * @tshut_threshold: bandgap tshut register value + * @data: private data + * + * Data structure to save and restore bandgap register set context. Only + * required registers are shadowed, when needed. + */ +struct temp_sensor_regval { + u32 bg_mode_ctrl; + u32 bg_ctrl; + u32 bg_counter; + u32 bg_threshold; + u32 tshut_threshold; + void *data; +}; + +/** + * struct ti_bandgap - bandgap device structure + * @dev: struct device pointer + * @base: io memory base address + * @conf: struct with bandgap configuration set (# sensors, conv_table, etc) + * @regval: temperature sensor register values + * @fclock: pointer to functional clock of temperature sensor + * @div_clk: pointer to divider clock of temperature sensor fclk + * @lock: spinlock for ti_bandgap structure + * @irq: MPU IRQ number for thermal alert + * @tshut_gpio: GPIO where Tshut signal is routed + * @clk_rate: Holds current clock rate + * + * The bandgap device structure representing the bandgap device instance. + * It holds most of the dynamic stuff. Configurations and sensor specific + * entries are inside the @conf structure. + */ +struct ti_bandgap { + struct device *dev; + void __iomem *base; + const struct ti_bandgap_data *conf; + struct temp_sensor_regval *regval; + struct clk *fclock; + struct clk *div_clk; + spinlock_t lock; /* shields this struct */ + int irq; + int tshut_gpio; + u32 clk_rate; +}; + +/** + * struct ti_temp_sensor - bandgap temperature sensor configuration data + * @ts_data: pointer to struct with thresholds, limits of temperature sensor + * @registers: pointer to the list of register offsets and bitfields + * @domain: the name of the domain where the sensor is located + * @slope: sensor gradient slope info for hotspot extrapolation equation + * @constant: sensor gradient const info for hotspot extrapolation equation + * @slope_pcb: sensor gradient slope info for hotspot extrapolation equation + * with no external influence + * @constant_pcb: sensor gradient const info for hotspot extrapolation equation + * with no external influence + * @register_cooling: function to describe how this sensor is going to be cooled + * @unregister_cooling: function to release cooling data + * + * Data structure to describe a temperature sensor handled by a bandgap device. + * It should provide configuration details on this sensor, such as how to + * access the registers affecting this sensor, shadow register buffer, how to + * assess the gradient from hotspot, how to cooldown the domain when sensor + * reports too hot temperature. + */ +struct ti_temp_sensor { + struct temp_sensor_data *ts_data; + struct temp_sensor_registers *registers; + char *domain; + /* for hotspot extrapolation */ + const int slope; + const int constant; + const int slope_pcb; + const int constant_pcb; + int (*register_cooling)(struct ti_bandgap *bgp, int id); + int (*unregister_cooling)(struct ti_bandgap *bgp, int id); +}; + +/** + * DOC: ti bandgap feature types + * + * TI_BANDGAP_FEATURE_TSHUT - used when the thermal shutdown signal output + * of a bandgap device instance is routed to the processor. This means + * the system must react and perform the shutdown by itself (handle an + * IRQ, for instance). + * + * TI_BANDGAP_FEATURE_TSHUT_CONFIG - used when the bandgap device has control + * over the thermal shutdown configuration. This means that the thermal + * shutdown thresholds are programmable, for instance. + * + * TI_BANDGAP_FEATURE_TALERT - used when the bandgap device instance outputs + * a signal representing violation of programmable alert thresholds. + * + * TI_BANDGAP_FEATURE_MODE_CONFIG - used when it is possible to choose which + * mode, continuous or one shot, the bandgap device instance will operate. + * + * TI_BANDGAP_FEATURE_COUNTER - used when the bandgap device instance allows + * programming the update interval of its internal state machine. + * + * TI_BANDGAP_FEATURE_POWER_SWITCH - used when the bandgap device allows + * itself to be switched on/off. + * + * TI_BANDGAP_FEATURE_CLK_CTRL - used when the clocks feeding the bandgap + * device are gateable or not. + * + * TI_BANDGAP_FEATURE_FREEZE_BIT - used when the bandgap device features + * a history buffer that its update can be freezed/unfreezed. + * + * TI_BANDGAP_FEATURE_COUNTER_DELAY - used when the bandgap device features + * a delay programming based on distinct values. + * + * TI_BANDGAP_FEATURE_HISTORY_BUFFER - used when the bandgap device features + * a history buffer of temperatures. + * + * TI_BANDGAP_HAS(b, f) - macro to check if a bandgap device is capable of a + * specific feature (above) or not. Return non-zero, if yes. + */ +#define TI_BANDGAP_FEATURE_TSHUT BIT(0) +#define TI_BANDGAP_FEATURE_TSHUT_CONFIG BIT(1) +#define TI_BANDGAP_FEATURE_TALERT BIT(2) +#define TI_BANDGAP_FEATURE_MODE_CONFIG BIT(3) +#define TI_BANDGAP_FEATURE_COUNTER BIT(4) +#define TI_BANDGAP_FEATURE_POWER_SWITCH BIT(5) +#define TI_BANDGAP_FEATURE_CLK_CTRL BIT(6) +#define TI_BANDGAP_FEATURE_FREEZE_BIT BIT(7) +#define TI_BANDGAP_FEATURE_COUNTER_DELAY BIT(8) +#define TI_BANDGAP_FEATURE_HISTORY_BUFFER BIT(9) +#define TI_BANDGAP_HAS(b, f) \ + ((b)->conf->features & TI_BANDGAP_FEATURE_ ## f) + +/** + * struct ti_bandgap_data - ti bandgap data configuration structure + * @features: a bitwise flag set to describe the device features + * @conv_table: Pointer to ADC to temperature conversion table + * @adc_start_val: ADC conversion table starting value + * @adc_end_val: ADC conversion table ending value + * @fclock_name: clock name of the functional clock + * @div_ck_name: clock name of the clock divisor + * @sensor_count: count of temperature sensor within this bandgap device + * @report_temperature: callback to report thermal alert to thermal API + * @expose_sensor: callback to export sensor to thermal API + * @remove_sensor: callback to destroy sensor from thermal API + * @sensors: array of sensors present in this bandgap instance + * + * This is a data structure which should hold most of the static configuration + * of a bandgap device instance. It should describe which features this instance + * is capable of, the clock names to feed this device, the amount of sensors and + * their configuration representation, and how to export and unexport them to + * a thermal API. + */ +struct ti_bandgap_data { + unsigned int features; + const int *conv_table; + u32 adc_start_val; + u32 adc_end_val; + char *fclock_name; + char *div_ck_name; + int sensor_count; + int (*report_temperature)(struct ti_bandgap *bgp, int id); + int (*expose_sensor)(struct ti_bandgap *bgp, int id, char *domain); + int (*remove_sensor)(struct ti_bandgap *bgp, int id); + + /* this needs to be at the end */ + struct ti_temp_sensor sensors[]; +}; + +int ti_bandgap_read_thot(struct ti_bandgap *bgp, int id, int *thot); +int ti_bandgap_write_thot(struct ti_bandgap *bgp, int id, int val); +int ti_bandgap_read_tcold(struct ti_bandgap *bgp, int id, int *tcold); +int ti_bandgap_write_tcold(struct ti_bandgap *bgp, int id, int val); +int ti_bandgap_read_update_interval(struct ti_bandgap *bgp, int id, + int *interval); +int ti_bandgap_write_update_interval(struct ti_bandgap *bgp, int id, + u32 interval); +int ti_bandgap_read_temperature(struct ti_bandgap *bgp, int id, + int *temperature); +int ti_bandgap_set_sensor_data(struct ti_bandgap *bgp, int id, void *data); +void *ti_bandgap_get_sensor_data(struct ti_bandgap *bgp, int id); +int ti_bandgap_get_trend(struct ti_bandgap *bgp, int id, int *trend); + +#ifdef CONFIG_OMAP4_THERMAL +extern const struct ti_bandgap_data omap4430_data; +extern const struct ti_bandgap_data omap4460_data; +extern const struct ti_bandgap_data omap4470_data; +#else +#define omap4430_data NULL +#define omap4460_data NULL +#define omap4470_data NULL +#endif + +#ifdef CONFIG_OMAP5_THERMAL +extern const struct ti_bandgap_data omap5430_data; +#else +#define omap5430_data NULL +#endif + +#ifdef CONFIG_DRA752_THERMAL +extern const struct ti_bandgap_data dra752_data; +#else +#define dra752_data NULL +#endif +#endif diff --git a/drivers/thermal/ti-soc-thermal/ti-thermal-common.c b/drivers/thermal/ti-soc-thermal/ti-thermal-common.c new file mode 100644 index 0000000..4c5f55c37 --- /dev/null +++ b/drivers/thermal/ti-soc-thermal/ti-thermal-common.c @@ -0,0 +1,378 @@ +/* + * OMAP thermal driver interface + * + * Copyright (C) 2012 Texas Instruments Incorporated - http://www.ti.com/ + * Contact: + * Eduardo Valentin + * + * This program is free software; you can redistribute it and/or + * modify it under the terms of the GNU General Public License + * version 2 as published by the Free Software Foundation. + * + * This program is distributed in the hope that it will be useful, but + * WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + * General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with this program; if not, write to the Free Software + * Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA + * 02110-1301 USA + * + */ + +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include + +#include "ti-thermal.h" +#include "ti-bandgap.h" + +/* common data structures */ +struct ti_thermal_data { + struct thermal_zone_device *ti_thermal; + struct thermal_zone_device *pcb_tz; + struct thermal_cooling_device *cool_dev; + struct ti_bandgap *bgp; + enum thermal_device_mode mode; + struct work_struct thermal_wq; + int sensor_id; +}; + +static void ti_thermal_work(struct work_struct *work) +{ + struct ti_thermal_data *data = container_of(work, + struct ti_thermal_data, thermal_wq); + + thermal_zone_device_update(data->ti_thermal); + + dev_dbg(&data->ti_thermal->device, "updated thermal zone %s\n", + data->ti_thermal->type); +} + +/** + * ti_thermal_hotspot_temperature - returns sensor extrapolated temperature + * @t: omap sensor temperature + * @s: omap sensor slope value + * @c: omap sensor const value + */ +static inline int ti_thermal_hotspot_temperature(int t, int s, int c) +{ + int delta = t * s / 1000 + c; + + if (delta < 0) + delta = 0; + + return t + delta; +} + +/* thermal zone ops */ +/* Get temperature callback function for thermal zone*/ +static inline int ti_thermal_get_temp(struct thermal_zone_device *thermal, + unsigned long *temp) +{ + struct thermal_zone_device *pcb_tz = NULL; + struct ti_thermal_data *data = thermal->devdata; + struct ti_bandgap *bgp; + const struct ti_temp_sensor *s; + int ret, tmp, slope, constant; + unsigned long pcb_temp; + + if (!data) + return 0; + + bgp = data->bgp; + s = &bgp->conf->sensors[data->sensor_id]; + + ret = ti_bandgap_read_temperature(bgp, data->sensor_id, &tmp); + if (ret) + return ret; + + /* Default constants */ + slope = s->slope; + constant = s->constant; + + pcb_tz = data->pcb_tz; + /* In case pcb zone is available, use the extrapolation rule with it */ + if (!IS_ERR(pcb_tz)) { + ret = thermal_zone_get_temp(pcb_tz, &pcb_temp); + if (!ret) { + tmp -= pcb_temp; /* got a valid PCB temp */ + slope = s->slope_pcb; + constant = s->constant_pcb; + } else { + dev_err(bgp->dev, + "Failed to read PCB state. Using defaults\n"); + } + } + *temp = ti_thermal_hotspot_temperature(tmp, slope, constant); + + return ret; +} + +/* Bind callback functions for thermal zone */ +static int ti_thermal_bind(struct thermal_zone_device *thermal, + struct thermal_cooling_device *cdev) +{ + struct ti_thermal_data *data = thermal->devdata; + int id; + + if (!data || IS_ERR(data)) + return -ENODEV; + + /* check if this is the cooling device we registered */ + if (data->cool_dev != cdev) + return 0; + + id = data->sensor_id; + + /* Simple thing, two trips, one passive another critical */ + return thermal_zone_bind_cooling_device(thermal, 0, cdev, + /* bind with min and max states defined by cpu_cooling */ + THERMAL_NO_LIMIT, + THERMAL_NO_LIMIT); +} + +/* Unbind callback functions for thermal zone */ +static int ti_thermal_unbind(struct thermal_zone_device *thermal, + struct thermal_cooling_device *cdev) +{ + struct ti_thermal_data *data = thermal->devdata; + + if (!data || IS_ERR(data)) + return -ENODEV; + + /* check if this is the cooling device we registered */ + if (data->cool_dev != cdev) + return 0; + + /* Simple thing, two trips, one passive another critical */ + return thermal_zone_unbind_cooling_device(thermal, 0, cdev); +} + +/* Get mode callback functions for thermal zone */ +static int ti_thermal_get_mode(struct thermal_zone_device *thermal, + enum thermal_device_mode *mode) +{ + struct ti_thermal_data *data = thermal->devdata; + + if (data) + *mode = data->mode; + + return 0; +} + +/* Set mode callback functions for thermal zone */ +static int ti_thermal_set_mode(struct thermal_zone_device *thermal, + enum thermal_device_mode mode) +{ + struct ti_thermal_data *data = thermal->devdata; + + if (!data->ti_thermal) { + dev_notice(&thermal->device, "thermal zone not registered\n"); + return 0; + } + + mutex_lock(&data->ti_thermal->lock); + + if (mode == THERMAL_DEVICE_ENABLED) + data->ti_thermal->polling_delay = FAST_TEMP_MONITORING_RATE; + else + data->ti_thermal->polling_delay = 0; + + mutex_unlock(&data->ti_thermal->lock); + + data->mode = mode; + thermal_zone_device_update(data->ti_thermal); + dev_dbg(&thermal->device, "thermal polling set for duration=%d msec\n", + data->ti_thermal->polling_delay); + + return 0; +} + +/* Get trip type callback functions for thermal zone */ +static int ti_thermal_get_trip_type(struct thermal_zone_device *thermal, + int trip, enum thermal_trip_type *type) +{ + if (!ti_thermal_is_valid_trip(trip)) + return -EINVAL; + + if (trip + 1 == OMAP_TRIP_NUMBER) + *type = THERMAL_TRIP_CRITICAL; + else + *type = THERMAL_TRIP_PASSIVE; + + return 0; +} + +/* Get trip temperature callback functions for thermal zone */ +static int ti_thermal_get_trip_temp(struct thermal_zone_device *thermal, + int trip, unsigned long *temp) +{ + if (!ti_thermal_is_valid_trip(trip)) + return -EINVAL; + + *temp = ti_thermal_get_trip_value(trip); + + return 0; +} + +/* Get the temperature trend callback functions for thermal zone */ +static int ti_thermal_get_trend(struct thermal_zone_device *thermal, + int trip, enum thermal_trend *trend) +{ + struct ti_thermal_data *data = thermal->devdata; + struct ti_bandgap *bgp; + int id, tr, ret = 0; + + bgp = data->bgp; + id = data->sensor_id; + + ret = ti_bandgap_get_trend(bgp, id, &tr); + if (ret) + return ret; + + if (tr > 0) + *trend = THERMAL_TREND_RAISING; + else if (tr < 0) + *trend = THERMAL_TREND_DROPPING; + else + *trend = THERMAL_TREND_STABLE; + + return 0; +} + +/* Get critical temperature callback functions for thermal zone */ +static int ti_thermal_get_crit_temp(struct thermal_zone_device *thermal, + unsigned long *temp) +{ + /* shutdown zone */ + return ti_thermal_get_trip_temp(thermal, OMAP_TRIP_NUMBER - 1, temp); +} + +static struct thermal_zone_device_ops ti_thermal_ops = { + .get_temp = ti_thermal_get_temp, + .get_trend = ti_thermal_get_trend, + .bind = ti_thermal_bind, + .unbind = ti_thermal_unbind, + .get_mode = ti_thermal_get_mode, + .set_mode = ti_thermal_set_mode, + .get_trip_type = ti_thermal_get_trip_type, + .get_trip_temp = ti_thermal_get_trip_temp, + .get_crit_temp = ti_thermal_get_crit_temp, +}; + +static struct ti_thermal_data +*ti_thermal_build_data(struct ti_bandgap *bgp, int id) +{ + struct ti_thermal_data *data; + + data = devm_kzalloc(bgp->dev, sizeof(*data), GFP_KERNEL); + if (!data) { + dev_err(bgp->dev, "kzalloc fail\n"); + return NULL; + } + data->sensor_id = id; + data->bgp = bgp; + data->mode = THERMAL_DEVICE_ENABLED; + /* pcb_tz will be either valid or PTR_ERR() */ + data->pcb_tz = thermal_zone_get_zone_by_name("pcb"); + INIT_WORK(&data->thermal_wq, ti_thermal_work); + + return data; +} + +int ti_thermal_expose_sensor(struct ti_bandgap *bgp, int id, + char *domain) +{ + struct ti_thermal_data *data; + + data = ti_bandgap_get_sensor_data(bgp, id); + + if (!data || IS_ERR(data)) + data = ti_thermal_build_data(bgp, id); + + if (!data) + return -EINVAL; + + /* Create thermal zone */ + data->ti_thermal = thermal_zone_device_register(domain, + OMAP_TRIP_NUMBER, 0, data, &ti_thermal_ops, + NULL, FAST_TEMP_MONITORING_RATE, + FAST_TEMP_MONITORING_RATE); + if (IS_ERR(data->ti_thermal)) { + dev_err(bgp->dev, "thermal zone device is NULL\n"); + return PTR_ERR(data->ti_thermal); + } + data->ti_thermal->polling_delay = FAST_TEMP_MONITORING_RATE; + ti_bandgap_set_sensor_data(bgp, id, data); + + return 0; +} + +int ti_thermal_remove_sensor(struct ti_bandgap *bgp, int id) +{ + struct ti_thermal_data *data; + + data = ti_bandgap_get_sensor_data(bgp, id); + + thermal_zone_device_unregister(data->ti_thermal); + + return 0; +} + +int ti_thermal_report_sensor_temperature(struct ti_bandgap *bgp, int id) +{ + struct ti_thermal_data *data; + + data = ti_bandgap_get_sensor_data(bgp, id); + + schedule_work(&data->thermal_wq); + + return 0; +} + +int ti_thermal_register_cpu_cooling(struct ti_bandgap *bgp, int id) +{ + struct ti_thermal_data *data; + + data = ti_bandgap_get_sensor_data(bgp, id); + if (!data || IS_ERR(data)) + data = ti_thermal_build_data(bgp, id); + + if (!data) + return -EINVAL; + + if (!cpufreq_get_current_driver()) { + dev_dbg(bgp->dev, "no cpufreq driver yet\n"); + return -EPROBE_DEFER; + } + + /* Register cooling device */ + data->cool_dev = cpufreq_cooling_register(cpu_present_mask); + if (IS_ERR(data->cool_dev)) { + dev_err(bgp->dev, + "Failed to register cpufreq cooling device\n"); + return PTR_ERR(data->cool_dev); + } + ti_bandgap_set_sensor_data(bgp, id, data); + + return 0; +} + +int ti_thermal_unregister_cpu_cooling(struct ti_bandgap *bgp, int id) +{ + struct ti_thermal_data *data; + + data = ti_bandgap_get_sensor_data(bgp, id); + cpufreq_cooling_unregister(data->cool_dev); + + return 0; +} diff --git a/drivers/thermal/ti-soc-thermal/ti-thermal.h b/drivers/thermal/ti-soc-thermal/ti-thermal.h new file mode 100644 index 0000000..f8b7ffe --- /dev/null +++ b/drivers/thermal/ti-soc-thermal/ti-thermal.h @@ -0,0 +1,123 @@ +/* + * OMAP thermal definitions + * + * Copyright (C) 2012 Texas Instruments Incorporated - http://www.ti.com/ + * Contact: + * Eduardo Valentin + * + * This program is free software; you can redistribute it and/or + * modify it under the terms of the GNU General Public License + * version 2 as published by the Free Software Foundation. + * + * This program is distributed in the hope that it will be useful, but + * WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + * General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with this program; if not, write to the Free Software + * Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA + * 02110-1301 USA + * + */ +#ifndef __TI_THERMAL_H +#define __TI_THERMAL_H + +#include "ti-bandgap.h" + +/* sensors gradient and offsets */ +#define OMAP_GRADIENT_SLOPE_4430 0 +#define OMAP_GRADIENT_CONST_4430 20000 +#define OMAP_GRADIENT_SLOPE_4460 348 +#define OMAP_GRADIENT_CONST_4460 -9301 +#define OMAP_GRADIENT_SLOPE_4470 308 +#define OMAP_GRADIENT_CONST_4470 -7896 + +#define OMAP_GRADIENT_SLOPE_5430_CPU 65 +#define OMAP_GRADIENT_CONST_5430_CPU -1791 +#define OMAP_GRADIENT_SLOPE_5430_GPU 117 +#define OMAP_GRADIENT_CONST_5430_GPU -2992 + +#define DRA752_GRADIENT_SLOPE 0 +#define DRA752_GRADIENT_CONST 2000 + +/* PCB sensor calculation constants */ +#define OMAP_GRADIENT_SLOPE_W_PCB_4430 0 +#define OMAP_GRADIENT_CONST_W_PCB_4430 20000 +#define OMAP_GRADIENT_SLOPE_W_PCB_4460 1142 +#define OMAP_GRADIENT_CONST_W_PCB_4460 -393 +#define OMAP_GRADIENT_SLOPE_W_PCB_4470 1063 +#define OMAP_GRADIENT_CONST_W_PCB_4470 -477 + +#define OMAP_GRADIENT_SLOPE_W_PCB_5430_CPU 100 +#define OMAP_GRADIENT_CONST_W_PCB_5430_CPU 484 +#define OMAP_GRADIENT_SLOPE_W_PCB_5430_GPU 464 +#define OMAP_GRADIENT_CONST_W_PCB_5430_GPU -5102 + +#define DRA752_GRADIENT_SLOPE_W_PCB 0 +#define DRA752_GRADIENT_CONST_W_PCB 2000 + +/* trip points of interest in milicelsius (at hotspot level) */ +#define OMAP_TRIP_COLD 100000 +#define OMAP_TRIP_HOT 110000 +#define OMAP_TRIP_SHUTDOWN 125000 +#define OMAP_TRIP_NUMBER 2 +#define OMAP_TRIP_STEP \ + ((OMAP_TRIP_SHUTDOWN - OMAP_TRIP_HOT) / (OMAP_TRIP_NUMBER - 1)) + +/* Update rates */ +#define FAST_TEMP_MONITORING_RATE 250 + +/* helper macros */ +/** + * ti_thermal_get_trip_value - returns trip temperature based on index + * @i: trip index + */ +#define ti_thermal_get_trip_value(i) \ + (OMAP_TRIP_HOT + ((i) * OMAP_TRIP_STEP)) + +/** + * ti_thermal_is_valid_trip - check for trip index + * @i: trip index + */ +#define ti_thermal_is_valid_trip(trip) \ + ((trip) >= 0 && (trip) < OMAP_TRIP_NUMBER) + +#ifdef CONFIG_TI_THERMAL +int ti_thermal_expose_sensor(struct ti_bandgap *bgp, int id, char *domain); +int ti_thermal_remove_sensor(struct ti_bandgap *bgp, int id); +int ti_thermal_report_sensor_temperature(struct ti_bandgap *bgp, int id); +int ti_thermal_register_cpu_cooling(struct ti_bandgap *bgp, int id); +int ti_thermal_unregister_cpu_cooling(struct ti_bandgap *bgp, int id); +#else +static inline +int ti_thermal_expose_sensor(struct ti_bandgap *bgp, int id, char *domain) +{ + return 0; +} + +static inline +int ti_thermal_remove_sensor(struct ti_bandgap *bgp, int id) +{ + return 0; +} + +static inline +int ti_thermal_report_sensor_temperature(struct ti_bandgap *bgp, int id) +{ + return 0; +} + +static inline +int ti_thermal_register_cpu_cooling(struct ti_bandgap *bgp, int id) +{ + return 0; +} + +static inline +int ti_thermal_unregister_cpu_cooling(struct ti_bandgap *bgp, int id) +{ + return 0; +} +#endif +#endif diff --git a/drivers/thermal/x86_pkg_temp_thermal.c b/drivers/thermal/x86_pkg_temp_thermal.c new file mode 100644 index 0000000..5de56f6 --- /dev/null +++ b/drivers/thermal/x86_pkg_temp_thermal.c @@ -0,0 +1,642 @@ +/* + * x86_pkg_temp_thermal driver + * Copyright (c) 2013, Intel Corporation. + * + * This program is free software; you can redistribute it and/or modify it + * under the terms and conditions of the GNU General Public License, + * version 2, as published by the Free Software Foundation. + * + * This program is distributed in the hope it will be useful, but WITHOUT + * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or + * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for + * more details. + * + * You should have received a copy of the GNU General Public License along with + * this program; if not, write to the Free Software Foundation, Inc. + * + */ +#define pr_fmt(fmt) KBUILD_MODNAME ": " fmt + +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include + +/* +* Rate control delay: Idea is to introduce denounce effect +* This should be long enough to avoid reduce events, when +* threshold is set to a temperature, which is constantly +* violated, but at the short enough to take any action. +* The action can be remove threshold or change it to next +* interesting setting. Based on experiments, in around +* every 5 seconds under load will give us a significant +* temperature change. +*/ +#define PKG_TEMP_THERMAL_NOTIFY_DELAY 5000 +static int notify_delay_ms = PKG_TEMP_THERMAL_NOTIFY_DELAY; +module_param(notify_delay_ms, int, 0644); +MODULE_PARM_DESC(notify_delay_ms, + "User space notification delay in milli seconds."); + +/* Number of trip points in thermal zone. Currently it can't +* be more than 2. MSR can allow setting and getting notifications +* for only 2 thresholds. This define enforces this, if there +* is some wrong values returned by cpuid for number of thresholds. +*/ +#define MAX_NUMBER_OF_TRIPS 2 + +struct phy_dev_entry { + struct list_head list; + u16 phys_proc_id; + u16 first_cpu; + u32 tj_max; + int ref_cnt; + u32 start_pkg_therm_low; + u32 start_pkg_therm_high; + struct thermal_zone_device *tzone; +}; + +/* List maintaining number of package instances */ +static LIST_HEAD(phy_dev_list); +static DEFINE_MUTEX(phy_dev_list_mutex); + +/* Interrupt to work function schedule queue */ +static DEFINE_PER_CPU(struct delayed_work, pkg_temp_thermal_threshold_work); + +/* To track if the work is already scheduled on a package */ +static u8 *pkg_work_scheduled; + +/* Spin lock to prevent races with pkg_work_scheduled */ +static spinlock_t pkg_work_lock; +static u16 max_phy_id; + +/* Debug counters to show using debugfs */ +static struct dentry *debugfs; +static unsigned int pkg_interrupt_cnt; +static unsigned int pkg_work_cnt; + +static int pkg_temp_debugfs_init(void) +{ + struct dentry *d; + + debugfs = debugfs_create_dir("pkg_temp_thermal", NULL); + if (!debugfs) + return -ENOENT; + + d = debugfs_create_u32("pkg_thres_interrupt", S_IRUGO, debugfs, + (u32 *)&pkg_interrupt_cnt); + if (!d) + goto err_out; + + d = debugfs_create_u32("pkg_thres_work", S_IRUGO, debugfs, + (u32 *)&pkg_work_cnt); + if (!d) + goto err_out; + + return 0; + +err_out: + debugfs_remove_recursive(debugfs); + return -ENOENT; +} + +static struct phy_dev_entry + *pkg_temp_thermal_get_phy_entry(unsigned int cpu) +{ + u16 phys_proc_id = topology_physical_package_id(cpu); + struct phy_dev_entry *phy_ptr; + + mutex_lock(&phy_dev_list_mutex); + + list_for_each_entry(phy_ptr, &phy_dev_list, list) + if (phy_ptr->phys_proc_id == phys_proc_id) { + mutex_unlock(&phy_dev_list_mutex); + return phy_ptr; + } + + mutex_unlock(&phy_dev_list_mutex); + + return NULL; +} + +/* +* tj-max is is interesting because threshold is set relative to this +* temperature. +*/ +static int get_tj_max(int cpu, u32 *tj_max) +{ + u32 eax, edx; + u32 val; + int err; + + err = rdmsr_safe_on_cpu(cpu, MSR_IA32_TEMPERATURE_TARGET, &eax, &edx); + if (err) + goto err_ret; + else { + val = (eax >> 16) & 0xff; + if (val) + *tj_max = val * 1000; + else { + err = -EINVAL; + goto err_ret; + } + } + + return 0; +err_ret: + *tj_max = 0; + return err; +} + +static int sys_get_curr_temp(struct thermal_zone_device *tzd, unsigned long *temp) +{ + u32 eax, edx; + struct phy_dev_entry *phy_dev_entry; + + phy_dev_entry = tzd->devdata; + rdmsr_on_cpu(phy_dev_entry->first_cpu, MSR_IA32_PACKAGE_THERM_STATUS, + &eax, &edx); + if (eax & 0x80000000) { + *temp = phy_dev_entry->tj_max - + ((eax >> 16) & 0x7f) * 1000; + pr_debug("sys_get_curr_temp %ld\n", *temp); + return 0; + } + + return -EINVAL; +} + +static int sys_get_trip_temp(struct thermal_zone_device *tzd, + int trip, unsigned long *temp) +{ + u32 eax, edx; + struct phy_dev_entry *phy_dev_entry; + u32 mask, shift; + unsigned long thres_reg_value; + int ret; + + if (trip >= MAX_NUMBER_OF_TRIPS) + return -EINVAL; + + phy_dev_entry = tzd->devdata; + + if (trip) { + mask = THERM_MASK_THRESHOLD1; + shift = THERM_SHIFT_THRESHOLD1; + } else { + mask = THERM_MASK_THRESHOLD0; + shift = THERM_SHIFT_THRESHOLD0; + } + + ret = rdmsr_on_cpu(phy_dev_entry->first_cpu, + MSR_IA32_PACKAGE_THERM_INTERRUPT, &eax, &edx); + if (ret < 0) + return -EINVAL; + + thres_reg_value = (eax & mask) >> shift; + if (thres_reg_value) + *temp = phy_dev_entry->tj_max - thres_reg_value * 1000; + else + *temp = 0; + pr_debug("sys_get_trip_temp %ld\n", *temp); + + return 0; +} + +int sys_set_trip_temp(struct thermal_zone_device *tzd, int trip, + unsigned long temp) +{ + u32 l, h; + struct phy_dev_entry *phy_dev_entry; + u32 mask, shift, intr; + int ret; + + phy_dev_entry = tzd->devdata; + + if (trip >= MAX_NUMBER_OF_TRIPS || temp >= phy_dev_entry->tj_max) + return -EINVAL; + + ret = rdmsr_on_cpu(phy_dev_entry->first_cpu, + MSR_IA32_PACKAGE_THERM_INTERRUPT, + &l, &h); + if (ret < 0) + return -EINVAL; + + if (trip) { + mask = THERM_MASK_THRESHOLD1; + shift = THERM_SHIFT_THRESHOLD1; + intr = THERM_INT_THRESHOLD1_ENABLE; + } else { + mask = THERM_MASK_THRESHOLD0; + shift = THERM_SHIFT_THRESHOLD0; + intr = THERM_INT_THRESHOLD0_ENABLE; + } + l &= ~mask; + /* + * When users space sets a trip temperature == 0, which is indication + * that, it is no longer interested in receiving notifications. + */ + if (!temp) + l &= ~intr; + else { + l |= (phy_dev_entry->tj_max - temp)/1000 << shift; + l |= intr; + } + + return wrmsr_on_cpu(phy_dev_entry->first_cpu, + MSR_IA32_PACKAGE_THERM_INTERRUPT, + l, h); +} + +static int sys_get_trip_type(struct thermal_zone_device *thermal, + int trip, enum thermal_trip_type *type) +{ + + *type = THERMAL_TRIP_PASSIVE; + + return 0; +} + +/* Thermal zone callback registry */ +static struct thermal_zone_device_ops tzone_ops = { + .get_temp = sys_get_curr_temp, + .get_trip_temp = sys_get_trip_temp, + .get_trip_type = sys_get_trip_type, + .set_trip_temp = sys_set_trip_temp, +}; + +static bool pkg_temp_thermal_platform_thermal_rate_control(void) +{ + return true; +} + +/* Enable threshold interrupt on local package/cpu */ +static inline void enable_pkg_thres_interrupt(void) +{ + u32 l, h; + u8 thres_0, thres_1; + + rdmsr(MSR_IA32_PACKAGE_THERM_INTERRUPT, l, h); + /* only enable/disable if it had valid threshold value */ + thres_0 = (l & THERM_MASK_THRESHOLD0) >> THERM_SHIFT_THRESHOLD0; + thres_1 = (l & THERM_MASK_THRESHOLD1) >> THERM_SHIFT_THRESHOLD1; + if (thres_0) + l |= THERM_INT_THRESHOLD0_ENABLE; + if (thres_1) + l |= THERM_INT_THRESHOLD1_ENABLE; + wrmsr(MSR_IA32_PACKAGE_THERM_INTERRUPT, l, h); +} + +/* Disable threshold interrupt on local package/cpu */ +static inline void disable_pkg_thres_interrupt(void) +{ + u32 l, h; + rdmsr(MSR_IA32_PACKAGE_THERM_INTERRUPT, l, h); + wrmsr(MSR_IA32_PACKAGE_THERM_INTERRUPT, + l & (~THERM_INT_THRESHOLD0_ENABLE) & + (~THERM_INT_THRESHOLD1_ENABLE), h); +} + +static void pkg_temp_thermal_threshold_work_fn(struct work_struct *work) +{ + __u64 msr_val; + int cpu = smp_processor_id(); + int phy_id = topology_physical_package_id(cpu); + struct phy_dev_entry *phdev = pkg_temp_thermal_get_phy_entry(cpu); + bool notify = false; + + if (!phdev) + return; + + spin_lock(&pkg_work_lock); + ++pkg_work_cnt; + if (unlikely(phy_id > max_phy_id)) { + spin_unlock(&pkg_work_lock); + return; + } + pkg_work_scheduled[phy_id] = 0; + spin_unlock(&pkg_work_lock); + + enable_pkg_thres_interrupt(); + rdmsrl(MSR_IA32_PACKAGE_THERM_STATUS, msr_val); + if (msr_val & THERM_LOG_THRESHOLD0) { + wrmsrl(MSR_IA32_PACKAGE_THERM_STATUS, + msr_val & ~THERM_LOG_THRESHOLD0); + notify = true; + } + if (msr_val & THERM_LOG_THRESHOLD1) { + wrmsrl(MSR_IA32_PACKAGE_THERM_STATUS, + msr_val & ~THERM_LOG_THRESHOLD1); + notify = true; + } + if (notify) { + pr_debug("thermal_zone_device_update\n"); + thermal_zone_device_update(phdev->tzone); + } +} + +static int pkg_temp_thermal_platform_thermal_notify(__u64 msr_val) +{ + unsigned long flags; + int cpu = smp_processor_id(); + int phy_id = topology_physical_package_id(cpu); + + /* + * When a package is in interrupted state, all CPU's in that package + * are in the same interrupt state. So scheduling on any one CPU in + * the package is enough and simply return for others. + */ + spin_lock_irqsave(&pkg_work_lock, flags); + ++pkg_interrupt_cnt; + if (unlikely(phy_id > max_phy_id) || unlikely(!pkg_work_scheduled) || + pkg_work_scheduled[phy_id]) { + disable_pkg_thres_interrupt(); + spin_unlock_irqrestore(&pkg_work_lock, flags); + return -EINVAL; + } + pkg_work_scheduled[phy_id] = 1; + spin_unlock_irqrestore(&pkg_work_lock, flags); + + disable_pkg_thres_interrupt(); + schedule_delayed_work_on(cpu, + &per_cpu(pkg_temp_thermal_threshold_work, cpu), + msecs_to_jiffies(notify_delay_ms)); + return 0; +} + +static int find_siblings_cpu(int cpu) +{ + int i; + int id = topology_physical_package_id(cpu); + + for_each_online_cpu(i) + if (i != cpu && topology_physical_package_id(i) == id) + return i; + + return 0; +} + +static int pkg_temp_thermal_device_add(unsigned int cpu) +{ + int err; + u32 tj_max; + struct phy_dev_entry *phy_dev_entry; + char buffer[30]; + int thres_count; + u32 eax, ebx, ecx, edx; + + cpuid(6, &eax, &ebx, &ecx, &edx); + thres_count = ebx & 0x07; + if (!thres_count) + return -ENODEV; + + thres_count = clamp_val(thres_count, 0, MAX_NUMBER_OF_TRIPS); + + err = get_tj_max(cpu, &tj_max); + if (err) + goto err_ret; + + mutex_lock(&phy_dev_list_mutex); + + phy_dev_entry = kzalloc(sizeof(*phy_dev_entry), GFP_KERNEL); + if (!phy_dev_entry) { + err = -ENOMEM; + goto err_ret_unlock; + } + + spin_lock(&pkg_work_lock); + if (topology_physical_package_id(cpu) > max_phy_id) + max_phy_id = topology_physical_package_id(cpu); + pkg_work_scheduled = krealloc(pkg_work_scheduled, + (max_phy_id+1) * sizeof(u8), GFP_ATOMIC); + if (!pkg_work_scheduled) { + spin_unlock(&pkg_work_lock); + err = -ENOMEM; + goto err_ret_free; + } + pkg_work_scheduled[topology_physical_package_id(cpu)] = 0; + spin_unlock(&pkg_work_lock); + + phy_dev_entry->phys_proc_id = topology_physical_package_id(cpu); + phy_dev_entry->first_cpu = cpu; + phy_dev_entry->tj_max = tj_max; + phy_dev_entry->ref_cnt = 1; + snprintf(buffer, sizeof(buffer), "pkg-temp-%d\n", + phy_dev_entry->phys_proc_id); + phy_dev_entry->tzone = thermal_zone_device_register(buffer, + thres_count, + (thres_count == MAX_NUMBER_OF_TRIPS) ? + 0x03 : 0x01, + phy_dev_entry, &tzone_ops, NULL, 0, 0); + if (IS_ERR(phy_dev_entry->tzone)) { + err = PTR_ERR(phy_dev_entry->tzone); + goto err_ret_free; + } + /* Store MSR value for package thermal interrupt, to restore at exit */ + rdmsr_on_cpu(cpu, MSR_IA32_PACKAGE_THERM_INTERRUPT, + &phy_dev_entry->start_pkg_therm_low, + &phy_dev_entry->start_pkg_therm_high); + + list_add_tail(&phy_dev_entry->list, &phy_dev_list); + pr_debug("pkg_temp_thermal_device_add :phy_id %d cpu %d\n", + phy_dev_entry->phys_proc_id, cpu); + + mutex_unlock(&phy_dev_list_mutex); + + return 0; + +err_ret_free: + kfree(phy_dev_entry); +err_ret_unlock: + mutex_unlock(&phy_dev_list_mutex); + +err_ret: + return err; +} + +static int pkg_temp_thermal_device_remove(unsigned int cpu) +{ + struct phy_dev_entry *n; + u16 phys_proc_id = topology_physical_package_id(cpu); + struct phy_dev_entry *phdev = + pkg_temp_thermal_get_phy_entry(cpu); + + if (!phdev) + return -ENODEV; + + mutex_lock(&phy_dev_list_mutex); + /* If we are loosing the first cpu for this package, we need change */ + if (phdev->first_cpu == cpu) { + phdev->first_cpu = find_siblings_cpu(cpu); + pr_debug("thermal_device_remove: first cpu switched %d\n", + phdev->first_cpu); + } + /* + * It is possible that no siblings left as this was the last cpu + * going offline. We don't need to worry about this assignment + * as the phydev entry will be removed in this case and + * thermal zone is removed. + */ + --phdev->ref_cnt; + pr_debug("thermal_device_remove: pkg: %d cpu %d ref_cnt %d\n", + phys_proc_id, cpu, phdev->ref_cnt); + if (!phdev->ref_cnt) + list_for_each_entry_safe(phdev, n, &phy_dev_list, list) { + if (phdev->phys_proc_id == phys_proc_id) { + thermal_zone_device_unregister(phdev->tzone); + list_del(&phdev->list); + kfree(phdev); + break; + } + } + mutex_unlock(&phy_dev_list_mutex); + + return 0; +} + +static int get_core_online(unsigned int cpu) +{ + struct cpuinfo_x86 *c = &cpu_data(cpu); + struct phy_dev_entry *phdev = pkg_temp_thermal_get_phy_entry(cpu); + + /* Check if there is already an instance for this package */ + if (!phdev) { + if (!cpu_has(c, X86_FEATURE_DTHERM) && + !cpu_has(c, X86_FEATURE_PTS)) + return -ENODEV; + if (pkg_temp_thermal_device_add(cpu)) + return -ENODEV; + } else { + mutex_lock(&phy_dev_list_mutex); + ++phdev->ref_cnt; + pr_debug("get_core_online: cpu %d ref_cnt %d\n", + cpu, phdev->ref_cnt); + mutex_unlock(&phy_dev_list_mutex); + } + INIT_DELAYED_WORK(&per_cpu(pkg_temp_thermal_threshold_work, cpu), + pkg_temp_thermal_threshold_work_fn); + + pr_debug("get_core_online: cpu %d successful\n", cpu); + + return 0; +} + +static void put_core_offline(unsigned int cpu) +{ + if (!pkg_temp_thermal_device_remove(cpu)) + cancel_delayed_work_sync( + &per_cpu(pkg_temp_thermal_threshold_work, cpu)); + + pr_debug("put_core_offline: cpu %d\n", cpu); +} + +static int pkg_temp_thermal_cpu_callback(struct notifier_block *nfb, + unsigned long action, void *hcpu) +{ + unsigned int cpu = (unsigned long) hcpu; + + switch (action) { + case CPU_ONLINE: + case CPU_DOWN_FAILED: + get_core_online(cpu); + break; + case CPU_DOWN_PREPARE: + put_core_offline(cpu); + break; + } + return NOTIFY_OK; +} + +static struct notifier_block pkg_temp_thermal_notifier __refdata = { + .notifier_call = pkg_temp_thermal_cpu_callback, +}; + +static const struct x86_cpu_id __initconst pkg_temp_thermal_ids[] = { + { X86_VENDOR_INTEL, X86_FAMILY_ANY, X86_MODEL_ANY, X86_FEATURE_DTHERM }, + {} +}; +MODULE_DEVICE_TABLE(x86cpu, pkg_temp_thermal_ids); + +static int __init pkg_temp_thermal_init(void) +{ + int i; + + if (!x86_match_cpu(pkg_temp_thermal_ids)) + return -ENODEV; + + spin_lock_init(&pkg_work_lock); + platform_thermal_package_notify = + pkg_temp_thermal_platform_thermal_notify; + platform_thermal_package_rate_control = + pkg_temp_thermal_platform_thermal_rate_control; + + get_online_cpus(); + for_each_online_cpu(i) + if (get_core_online(i)) + goto err_ret; + register_hotcpu_notifier(&pkg_temp_thermal_notifier); + put_online_cpus(); + + pkg_temp_debugfs_init(); /* Don't care if fails */ + + return 0; + +err_ret: + get_online_cpus(); + for_each_online_cpu(i) + put_core_offline(i); + put_online_cpus(); + kfree(pkg_work_scheduled); + platform_thermal_package_notify = NULL; + platform_thermal_package_rate_control = NULL; + + return -ENODEV; +} + +static void __exit pkg_temp_thermal_exit(void) +{ + struct phy_dev_entry *phdev, *n; + int i; + + get_online_cpus(); + unregister_hotcpu_notifier(&pkg_temp_thermal_notifier); + mutex_lock(&phy_dev_list_mutex); + list_for_each_entry_safe(phdev, n, &phy_dev_list, list) { + /* Retore old MSR value for package thermal interrupt */ + wrmsr_on_cpu(phdev->first_cpu, + MSR_IA32_PACKAGE_THERM_INTERRUPT, + phdev->start_pkg_therm_low, + phdev->start_pkg_therm_high); + thermal_zone_device_unregister(phdev->tzone); + list_del(&phdev->list); + kfree(phdev); + } + mutex_unlock(&phy_dev_list_mutex); + platform_thermal_package_notify = NULL; + platform_thermal_package_rate_control = NULL; + for_each_online_cpu(i) + cancel_delayed_work_sync( + &per_cpu(pkg_temp_thermal_threshold_work, i)); + put_online_cpus(); + + kfree(pkg_work_scheduled); + + debugfs_remove_recursive(debugfs); +} + +module_init(pkg_temp_thermal_init) +module_exit(pkg_temp_thermal_exit) + +MODULE_DESCRIPTION("X86 PKG TEMP Thermal Driver"); +MODULE_AUTHOR("Srinivas Pandruvada "); +MODULE_LICENSE("GPL v2"); diff --git a/drivers/usb/gadget/tcm_usb_gadget.c b/drivers/usb/gadget/tcm_usb_gadget.c index 7cacd6a..0ff3339 100644 --- a/drivers/usb/gadget/tcm_usb_gadget.c +++ b/drivers/usb/gadget/tcm_usb_gadget.c @@ -1467,9 +1467,8 @@ static int usbg_get_cmd_state(struct se_cmd *se_cmd) return 0; } -static int usbg_queue_tm_rsp(struct se_cmd *se_cmd) +static void usbg_queue_tm_rsp(struct se_cmd *se_cmd) { - return 0; } static const char *usbg_check_wwn(const char *name) diff --git a/drivers/vhost/scsi.c b/drivers/vhost/scsi.c index 4264840..06adf31 100644 --- a/drivers/vhost/scsi.c +++ b/drivers/vhost/scsi.c @@ -448,7 +448,19 @@ static u32 tcm_vhost_tpg_get_inst_index(struct se_portal_group *se_tpg) static void tcm_vhost_release_cmd(struct se_cmd *se_cmd) { - return; + struct tcm_vhost_cmd *tv_cmd = container_of(se_cmd, + struct tcm_vhost_cmd, tvc_se_cmd); + + if (tv_cmd->tvc_sgl_count) { + u32 i; + for (i = 0; i < tv_cmd->tvc_sgl_count; i++) + put_page(sg_page(&tv_cmd->tvc_sgl[i])); + + kfree(tv_cmd->tvc_sgl); + } + + tcm_vhost_put_inflight(tv_cmd->inflight); + kfree(tv_cmd); } static int tcm_vhost_shutdown_session(struct se_session *se_sess) @@ -518,9 +530,9 @@ static int tcm_vhost_queue_status(struct se_cmd *se_cmd) return 0; } -static int tcm_vhost_queue_tm_rsp(struct se_cmd *se_cmd) +static void tcm_vhost_queue_tm_rsp(struct se_cmd *se_cmd) { - return 0; + return; } static void tcm_vhost_free_evt(struct vhost_scsi *vs, struct tcm_vhost_evt *evt) @@ -560,19 +572,13 @@ static void vhost_scsi_free_cmd(struct tcm_vhost_cmd *cmd) struct se_cmd *se_cmd = &cmd->tvc_se_cmd; /* TODO locking against target/backend threads? */ - transport_generic_free_cmd(se_cmd, 1); + transport_generic_free_cmd(se_cmd, 0); - if (cmd->tvc_sgl_count) { - u32 i; - for (i = 0; i < cmd->tvc_sgl_count; i++) - put_page(sg_page(&cmd->tvc_sgl[i])); - - kfree(cmd->tvc_sgl); - } - - tcm_vhost_put_inflight(cmd->inflight); +} - kfree(cmd); +static int vhost_scsi_check_stop_free(struct se_cmd *se_cmd) +{ + return target_put_sess_cmd(se_cmd->se_sess, se_cmd); } static void @@ -856,7 +862,7 @@ static void tcm_vhost_submission_work(struct work_struct *work) cmd->tvc_cdb, &cmd->tvc_sense_buf[0], cmd->tvc_lun, cmd->tvc_exp_data_len, cmd->tvc_task_attr, cmd->tvc_data_direction, - 0, sg_ptr, cmd->tvc_sgl_count, + TARGET_SCF_ACK_KREF, sg_ptr, cmd->tvc_sgl_count, sg_bidi_ptr, sg_no_bidi); if (rc < 0) { transport_send_check_condition_and_sense(se_cmd, @@ -2028,6 +2034,7 @@ static struct target_core_fabric_ops tcm_vhost_ops = { .tpg_release_fabric_acl = tcm_vhost_release_fabric_acl, .tpg_get_inst_index = tcm_vhost_tpg_get_inst_index, .release_cmd = tcm_vhost_release_cmd, + .check_stop_free = vhost_scsi_check_stop_free, .shutdown_session = tcm_vhost_shutdown_session, .close_session = tcm_vhost_close_session, .sess_get_index = tcm_vhost_sess_get_index, diff --git a/fs/9p/Kconfig b/fs/9p/Kconfig index 55abfd6..6489e1f 100644 --- a/fs/9p/Kconfig +++ b/fs/9p/Kconfig @@ -31,3 +31,16 @@ config 9P_FS_POSIX_ACL If you don't know what Access Control Lists are, say N endif + + +config 9P_FS_SECURITY + bool "9P Security Labels" + depends on 9P_FS + help + Security labels support alternative access control models + implemented by security modules like SELinux. This option + enables an extended attribute handler for file security + labels in the 9P filesystem. + + If you are not using a security module that requires using + extended attributes for file security labels, say N. diff --git a/fs/9p/Makefile b/fs/9p/Makefile index ab8c127..ff7be98 100644 --- a/fs/9p/Makefile +++ b/fs/9p/Makefile @@ -11,7 +11,9 @@ obj-$(CONFIG_9P_FS) := 9p.o v9fs.o \ fid.o \ xattr.o \ - xattr_user.o + xattr_user.o \ + xattr_trusted.o 9p-$(CONFIG_9P_FSCACHE) += cache.o 9p-$(CONFIG_9P_FS_POSIX_ACL) += acl.o +9p-$(CONFIG_9P_FS_SECURITY) += xattr_security.o diff --git a/fs/9p/vfs_inode.c b/fs/9p/vfs_inode.c index d86edc8..25b018e 100644 --- a/fs/9p/vfs_inode.c +++ b/fs/9p/vfs_inode.c @@ -1054,13 +1054,11 @@ static int v9fs_vfs_getattr(struct vfsmount *mnt, struct dentry *dentry, struct kstat *stat) { - int err; struct v9fs_session_info *v9ses; struct p9_fid *fid; struct p9_wstat *st; p9_debug(P9_DEBUG_VFS, "dentry: %p\n", dentry); - err = -EPERM; v9ses = v9fs_dentry2v9ses(dentry); if (v9ses->cache == CACHE_LOOSE || v9ses->cache == CACHE_FSCACHE) { generic_fillattr(dentry->d_inode, stat); diff --git a/fs/9p/xattr.c b/fs/9p/xattr.c index c45e016..3c28cdf 100644 --- a/fs/9p/xattr.c +++ b/fs/9p/xattr.c @@ -167,9 +167,13 @@ ssize_t v9fs_listxattr(struct dentry *dentry, char *buffer, size_t buffer_size) const struct xattr_handler *v9fs_xattr_handlers[] = { &v9fs_xattr_user_handler, + &v9fs_xattr_trusted_handler, #ifdef CONFIG_9P_FS_POSIX_ACL &v9fs_xattr_acl_access_handler, &v9fs_xattr_acl_default_handler, #endif +#ifdef CONFIG_9P_FS_SECURITY + &v9fs_xattr_security_handler, +#endif NULL }; diff --git a/fs/9p/xattr.h b/fs/9p/xattr.h index eec348a..d3e2ea3 100644 --- a/fs/9p/xattr.h +++ b/fs/9p/xattr.h @@ -20,6 +20,8 @@ extern const struct xattr_handler *v9fs_xattr_handlers[]; extern struct xattr_handler v9fs_xattr_user_handler; +extern struct xattr_handler v9fs_xattr_trusted_handler; +extern struct xattr_handler v9fs_xattr_security_handler; extern const struct xattr_handler v9fs_xattr_acl_access_handler; extern const struct xattr_handler v9fs_xattr_acl_default_handler; diff --git a/fs/9p/xattr_security.c b/fs/9p/xattr_security.c new file mode 100644 index 0000000..cb247a1 --- /dev/null +++ b/fs/9p/xattr_security.c @@ -0,0 +1,80 @@ +/* + * Copyright IBM Corporation, 2010 + * Author Aneesh Kumar K.V + * + * This program is free software; you can redistribute it and/or modify it + * under the terms of version 2.1 of the GNU Lesser General Public License + * as published by the Free Software Foundation. + * + * This program is distributed in the hope that it would be useful, but + * WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. + * + */ + + +#include +#include +#include +#include +#include "xattr.h" + +static int v9fs_xattr_security_get(struct dentry *dentry, const char *name, + void *buffer, size_t size, int type) +{ + int retval; + char *full_name; + size_t name_len; + size_t prefix_len = XATTR_SECURITY_PREFIX_LEN; + + if (name == NULL) + return -EINVAL; + + if (strcmp(name, "") == 0) + return -EINVAL; + + name_len = strlen(name); + full_name = kmalloc(prefix_len + name_len + 1 , GFP_KERNEL); + if (!full_name) + return -ENOMEM; + memcpy(full_name, XATTR_SECURITY_PREFIX, prefix_len); + memcpy(full_name+prefix_len, name, name_len); + full_name[prefix_len + name_len] = '\0'; + + retval = v9fs_xattr_get(dentry, full_name, buffer, size); + kfree(full_name); + return retval; +} + +static int v9fs_xattr_security_set(struct dentry *dentry, const char *name, + const void *value, size_t size, int flags, int type) +{ + int retval; + char *full_name; + size_t name_len; + size_t prefix_len = XATTR_SECURITY_PREFIX_LEN; + + if (name == NULL) + return -EINVAL; + + if (strcmp(name, "") == 0) + return -EINVAL; + + name_len = strlen(name); + full_name = kmalloc(prefix_len + name_len + 1 , GFP_KERNEL); + if (!full_name) + return -ENOMEM; + memcpy(full_name, XATTR_SECURITY_PREFIX, prefix_len); + memcpy(full_name + prefix_len, name, name_len); + full_name[prefix_len + name_len] = '\0'; + + retval = v9fs_xattr_set(dentry, full_name, value, size, flags); + kfree(full_name); + return retval; +} + +struct xattr_handler v9fs_xattr_security_handler = { + .prefix = XATTR_SECURITY_PREFIX, + .get = v9fs_xattr_security_get, + .set = v9fs_xattr_security_set, +}; diff --git a/fs/9p/xattr_trusted.c b/fs/9p/xattr_trusted.c new file mode 100644 index 0000000..e30d33b --- /dev/null +++ b/fs/9p/xattr_trusted.c @@ -0,0 +1,80 @@ +/* + * Copyright IBM Corporation, 2010 + * Author Aneesh Kumar K.V + * + * This program is free software; you can redistribute it and/or modify it + * under the terms of version 2.1 of the GNU Lesser General Public License + * as published by the Free Software Foundation. + * + * This program is distributed in the hope that it would be useful, but + * WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. + * + */ + + +#include +#include +#include +#include +#include "xattr.h" + +static int v9fs_xattr_trusted_get(struct dentry *dentry, const char *name, + void *buffer, size_t size, int type) +{ + int retval; + char *full_name; + size_t name_len; + size_t prefix_len = XATTR_TRUSTED_PREFIX_LEN; + + if (name == NULL) + return -EINVAL; + + if (strcmp(name, "") == 0) + return -EINVAL; + + name_len = strlen(name); + full_name = kmalloc(prefix_len + name_len + 1 , GFP_KERNEL); + if (!full_name) + return -ENOMEM; + memcpy(full_name, XATTR_TRUSTED_PREFIX, prefix_len); + memcpy(full_name+prefix_len, name, name_len); + full_name[prefix_len + name_len] = '\0'; + + retval = v9fs_xattr_get(dentry, full_name, buffer, size); + kfree(full_name); + return retval; +} + +static int v9fs_xattr_trusted_set(struct dentry *dentry, const char *name, + const void *value, size_t size, int flags, int type) +{ + int retval; + char *full_name; + size_t name_len; + size_t prefix_len = XATTR_TRUSTED_PREFIX_LEN; + + if (name == NULL) + return -EINVAL; + + if (strcmp(name, "") == 0) + return -EINVAL; + + name_len = strlen(name); + full_name = kmalloc(prefix_len + name_len + 1 , GFP_KERNEL); + if (!full_name) + return -ENOMEM; + memcpy(full_name, XATTR_TRUSTED_PREFIX, prefix_len); + memcpy(full_name + prefix_len, name, name_len); + full_name[prefix_len + name_len] = '\0'; + + retval = v9fs_xattr_set(dentry, full_name, value, size, flags); + kfree(full_name); + return retval; +} + +struct xattr_handler v9fs_xattr_trusted_handler = { + .prefix = XATTR_TRUSTED_PREFIX, + .get = v9fs_xattr_trusted_get, + .set = v9fs_xattr_trusted_set, +}; diff --git a/fs/block_dev.c b/fs/block_dev.c index bb43ce0..c7bda5c 100644 --- a/fs/block_dev.c +++ b/fs/block_dev.c @@ -58,17 +58,24 @@ static void bdev_inode_switch_bdi(struct inode *inode, struct backing_dev_info *dst) { struct backing_dev_info *old = inode->i_data.backing_dev_info; + bool wakeup_bdi = false; if (unlikely(dst == old)) /* deadlock avoidance */ return; bdi_lock_two(&old->wb, &dst->wb); spin_lock(&inode->i_lock); inode->i_data.backing_dev_info = dst; - if (inode->i_state & I_DIRTY) + if (inode->i_state & I_DIRTY) { + if (bdi_cap_writeback_dirty(dst) && !wb_has_dirty_io(&dst->wb)) + wakeup_bdi = true; list_move(&inode->i_wb_list, &dst->wb.b_dirty); + } spin_unlock(&inode->i_lock); spin_unlock(&old->wb.list_lock); spin_unlock(&dst->wb.list_lock); + + if (wakeup_bdi) + bdi_wakeup_thread_delayed(dst); } /* Kill _all_ buffers and pagecache , dirty or not.. */ diff --git a/fs/ecryptfs/crypto.c b/fs/ecryptfs/crypto.c index cfa109a..d107576 100644 --- a/fs/ecryptfs/crypto.c +++ b/fs/ecryptfs/crypto.c @@ -37,16 +37,8 @@ #include #include "ecryptfs_kernel.h" -static int -ecryptfs_decrypt_page_offset(struct ecryptfs_crypt_stat *crypt_stat, - struct page *dst_page, int dst_offset, - struct page *src_page, int src_offset, int size, - unsigned char *iv); -static int -ecryptfs_encrypt_page_offset(struct ecryptfs_crypt_stat *crypt_stat, - struct page *dst_page, int dst_offset, - struct page *src_page, int src_offset, int size, - unsigned char *iv); +#define DECRYPT 0 +#define ENCRYPT 1 /** * ecryptfs_to_hex @@ -336,19 +328,20 @@ static void extent_crypt_complete(struct crypto_async_request *req, int rc) } /** - * encrypt_scatterlist + * crypt_scatterlist * @crypt_stat: Pointer to the crypt_stat struct to initialize. - * @dest_sg: Destination of encrypted data - * @src_sg: Data to be encrypted - * @size: Length of data to be encrypted - * @iv: iv to use during encryption + * @dst_sg: Destination of the data after performing the crypto operation + * @src_sg: Data to be encrypted or decrypted + * @size: Length of data + * @iv: IV to use + * @op: ENCRYPT or DECRYPT to indicate the desired operation * - * Returns the number of bytes encrypted; negative value on error + * Returns the number of bytes encrypted or decrypted; negative value on error */ -static int encrypt_scatterlist(struct ecryptfs_crypt_stat *crypt_stat, - struct scatterlist *dest_sg, - struct scatterlist *src_sg, int size, - unsigned char *iv) +static int crypt_scatterlist(struct ecryptfs_crypt_stat *crypt_stat, + struct scatterlist *dst_sg, + struct scatterlist *src_sg, int size, + unsigned char *iv, int op) { struct ablkcipher_request *req = NULL; struct extent_crypt_result ecr; @@ -391,9 +384,9 @@ static int encrypt_scatterlist(struct ecryptfs_crypt_stat *crypt_stat, crypt_stat->flags |= ECRYPTFS_KEY_SET; } mutex_unlock(&crypt_stat->cs_tfm_mutex); - ecryptfs_printk(KERN_DEBUG, "Encrypting [%d] bytes.\n", size); - ablkcipher_request_set_crypt(req, src_sg, dest_sg, size, iv); - rc = crypto_ablkcipher_encrypt(req); + ablkcipher_request_set_crypt(req, src_sg, dst_sg, size, iv); + rc = op == ENCRYPT ? crypto_ablkcipher_encrypt(req) : + crypto_ablkcipher_decrypt(req); if (rc == -EINPROGRESS || rc == -EBUSY) { struct extent_crypt_result *ecr = req->base.data; @@ -407,41 +400,43 @@ out: } /** - * ecryptfs_lower_offset_for_extent + * lower_offset_for_page * * Convert an eCryptfs page index into a lower byte offset */ -static void ecryptfs_lower_offset_for_extent(loff_t *offset, loff_t extent_num, - struct ecryptfs_crypt_stat *crypt_stat) +static loff_t lower_offset_for_page(struct ecryptfs_crypt_stat *crypt_stat, + struct page *page) { - (*offset) = ecryptfs_lower_header_size(crypt_stat) - + (crypt_stat->extent_size * extent_num); + return ecryptfs_lower_header_size(crypt_stat) + + (page->index << PAGE_CACHE_SHIFT); } /** - * ecryptfs_encrypt_extent - * @enc_extent_page: Allocated page into which to encrypt the data in - * @page + * crypt_extent * @crypt_stat: crypt_stat containing cryptographic context for the * encryption operation - * @page: Page containing plaintext data extent to encrypt + * @dst_page: The page to write the result into + * @src_page: The page to read from * @extent_offset: Page extent offset for use in generating IV + * @op: ENCRYPT or DECRYPT to indicate the desired operation * - * Encrypts one extent of data. + * Encrypts or decrypts one extent of data. * * Return zero on success; non-zero otherwise */ -static int ecryptfs_encrypt_extent(struct page *enc_extent_page, - struct ecryptfs_crypt_stat *crypt_stat, - struct page *page, - unsigned long extent_offset) +static int crypt_extent(struct ecryptfs_crypt_stat *crypt_stat, + struct page *dst_page, + struct page *src_page, + unsigned long extent_offset, int op) { + pgoff_t page_index = op == ENCRYPT ? src_page->index : dst_page->index; loff_t extent_base; char extent_iv[ECRYPTFS_MAX_IV_BYTES]; + struct scatterlist src_sg, dst_sg; + size_t extent_size = crypt_stat->extent_size; int rc; - extent_base = (((loff_t)page->index) - * (PAGE_CACHE_SIZE / crypt_stat->extent_size)); + extent_base = (((loff_t)page_index) * (PAGE_CACHE_SIZE / extent_size)); rc = ecryptfs_derive_iv(extent_iv, crypt_stat, (extent_base + extent_offset)); if (rc) { @@ -450,15 +445,21 @@ static int ecryptfs_encrypt_extent(struct page *enc_extent_page, (unsigned long long)(extent_base + extent_offset), rc); goto out; } - rc = ecryptfs_encrypt_page_offset(crypt_stat, enc_extent_page, 0, - page, (extent_offset - * crypt_stat->extent_size), - crypt_stat->extent_size, extent_iv); + + sg_init_table(&src_sg, 1); + sg_init_table(&dst_sg, 1); + + sg_set_page(&src_sg, src_page, extent_size, + extent_offset * extent_size); + sg_set_page(&dst_sg, dst_page, extent_size, + extent_offset * extent_size); + + rc = crypt_scatterlist(crypt_stat, &dst_sg, &src_sg, extent_size, + extent_iv, op); if (rc < 0) { - printk(KERN_ERR "%s: Error attempting to encrypt page with " - "page->index = [%ld], extent_offset = [%ld]; " - "rc = [%d]\n", __func__, page->index, extent_offset, - rc); + printk(KERN_ERR "%s: Error attempting to crypt page with " + "page_index = [%ld], extent_offset = [%ld]; " + "rc = [%d]\n", __func__, page_index, extent_offset, rc); goto out; } rc = 0; @@ -489,6 +490,7 @@ int ecryptfs_encrypt_page(struct page *page) char *enc_extent_virt; struct page *enc_extent_page = NULL; loff_t extent_offset; + loff_t lower_offset; int rc = 0; ecryptfs_inode = page->mapping->host; @@ -502,75 +504,35 @@ int ecryptfs_encrypt_page(struct page *page) "encrypted extent\n"); goto out; } - enc_extent_virt = kmap(enc_extent_page); + for (extent_offset = 0; extent_offset < (PAGE_CACHE_SIZE / crypt_stat->extent_size); extent_offset++) { - loff_t offset; - - rc = ecryptfs_encrypt_extent(enc_extent_page, crypt_stat, page, - extent_offset); + rc = crypt_extent(crypt_stat, enc_extent_page, page, + extent_offset, ENCRYPT); if (rc) { printk(KERN_ERR "%s: Error encrypting extent; " "rc = [%d]\n", __func__, rc); goto out; } - ecryptfs_lower_offset_for_extent( - &offset, ((((loff_t)page->index) - * (PAGE_CACHE_SIZE - / crypt_stat->extent_size)) - + extent_offset), crypt_stat); - rc = ecryptfs_write_lower(ecryptfs_inode, enc_extent_virt, - offset, crypt_stat->extent_size); - if (rc < 0) { - ecryptfs_printk(KERN_ERR, "Error attempting " - "to write lower page; rc = [%d]" - "\n", rc); - goto out; - } - } - rc = 0; -out: - if (enc_extent_page) { - kunmap(enc_extent_page); - __free_page(enc_extent_page); } - return rc; -} -static int ecryptfs_decrypt_extent(struct page *page, - struct ecryptfs_crypt_stat *crypt_stat, - struct page *enc_extent_page, - unsigned long extent_offset) -{ - loff_t extent_base; - char extent_iv[ECRYPTFS_MAX_IV_BYTES]; - int rc; - - extent_base = (((loff_t)page->index) - * (PAGE_CACHE_SIZE / crypt_stat->extent_size)); - rc = ecryptfs_derive_iv(extent_iv, crypt_stat, - (extent_base + extent_offset)); - if (rc) { - ecryptfs_printk(KERN_ERR, "Error attempting to derive IV for " - "extent [0x%.16llx]; rc = [%d]\n", - (unsigned long long)(extent_base + extent_offset), rc); - goto out; - } - rc = ecryptfs_decrypt_page_offset(crypt_stat, page, - (extent_offset - * crypt_stat->extent_size), - enc_extent_page, 0, - crypt_stat->extent_size, extent_iv); + lower_offset = lower_offset_for_page(crypt_stat, page); + enc_extent_virt = kmap(enc_extent_page); + rc = ecryptfs_write_lower(ecryptfs_inode, enc_extent_virt, lower_offset, + PAGE_CACHE_SIZE); + kunmap(enc_extent_page); if (rc < 0) { - printk(KERN_ERR "%s: Error attempting to decrypt to page with " - "page->index = [%ld], extent_offset = [%ld]; " - "rc = [%d]\n", __func__, page->index, extent_offset, - rc); + ecryptfs_printk(KERN_ERR, + "Error attempting to write lower page; rc = [%d]\n", + rc); goto out; } rc = 0; out: + if (enc_extent_page) { + __free_page(enc_extent_page); + } return rc; } @@ -594,43 +556,33 @@ int ecryptfs_decrypt_page(struct page *page) { struct inode *ecryptfs_inode; struct ecryptfs_crypt_stat *crypt_stat; - char *enc_extent_virt; - struct page *enc_extent_page = NULL; + char *page_virt; unsigned long extent_offset; + loff_t lower_offset; int rc = 0; ecryptfs_inode = page->mapping->host; crypt_stat = &(ecryptfs_inode_to_private(ecryptfs_inode)->crypt_stat); BUG_ON(!(crypt_stat->flags & ECRYPTFS_ENCRYPTED)); - enc_extent_page = alloc_page(GFP_USER); - if (!enc_extent_page) { - rc = -ENOMEM; - ecryptfs_printk(KERN_ERR, "Error allocating memory for " - "encrypted extent\n"); + + lower_offset = lower_offset_for_page(crypt_stat, page); + page_virt = kmap(page); + rc = ecryptfs_read_lower(page_virt, lower_offset, PAGE_CACHE_SIZE, + ecryptfs_inode); + kunmap(page); + if (rc < 0) { + ecryptfs_printk(KERN_ERR, + "Error attempting to read lower page; rc = [%d]\n", + rc); goto out; } - enc_extent_virt = kmap(enc_extent_page); + for (extent_offset = 0; extent_offset < (PAGE_CACHE_SIZE / crypt_stat->extent_size); extent_offset++) { - loff_t offset; - - ecryptfs_lower_offset_for_extent( - &offset, ((page->index * (PAGE_CACHE_SIZE - / crypt_stat->extent_size)) - + extent_offset), crypt_stat); - rc = ecryptfs_read_lower(enc_extent_virt, offset, - crypt_stat->extent_size, - ecryptfs_inode); - if (rc < 0) { - ecryptfs_printk(KERN_ERR, "Error attempting " - "to read lower page; rc = [%d]" - "\n", rc); - goto out; - } - rc = ecryptfs_decrypt_extent(page, crypt_stat, enc_extent_page, - extent_offset); + rc = crypt_extent(crypt_stat, page, page, + extent_offset, DECRYPT); if (rc) { printk(KERN_ERR "%s: Error encrypting extent; " "rc = [%d]\n", __func__, rc); @@ -638,140 +590,7 @@ int ecryptfs_decrypt_page(struct page *page) } } out: - if (enc_extent_page) { - kunmap(enc_extent_page); - __free_page(enc_extent_page); - } - return rc; -} - -/** - * decrypt_scatterlist - * @crypt_stat: Cryptographic context - * @dest_sg: The destination scatterlist to decrypt into - * @src_sg: The source scatterlist to decrypt from - * @size: The number of bytes to decrypt - * @iv: The initialization vector to use for the decryption - * - * Returns the number of bytes decrypted; negative value on error - */ -static int decrypt_scatterlist(struct ecryptfs_crypt_stat *crypt_stat, - struct scatterlist *dest_sg, - struct scatterlist *src_sg, int size, - unsigned char *iv) -{ - struct ablkcipher_request *req = NULL; - struct extent_crypt_result ecr; - int rc = 0; - - BUG_ON(!crypt_stat || !crypt_stat->tfm - || !(crypt_stat->flags & ECRYPTFS_STRUCT_INITIALIZED)); - if (unlikely(ecryptfs_verbosity > 0)) { - ecryptfs_printk(KERN_DEBUG, "Key size [%zd]; key:\n", - crypt_stat->key_size); - ecryptfs_dump_hex(crypt_stat->key, - crypt_stat->key_size); - } - - init_completion(&ecr.completion); - - mutex_lock(&crypt_stat->cs_tfm_mutex); - req = ablkcipher_request_alloc(crypt_stat->tfm, GFP_NOFS); - if (!req) { - mutex_unlock(&crypt_stat->cs_tfm_mutex); - rc = -ENOMEM; - goto out; - } - - ablkcipher_request_set_callback(req, - CRYPTO_TFM_REQ_MAY_BACKLOG | CRYPTO_TFM_REQ_MAY_SLEEP, - extent_crypt_complete, &ecr); - /* Consider doing this once, when the file is opened */ - if (!(crypt_stat->flags & ECRYPTFS_KEY_SET)) { - rc = crypto_ablkcipher_setkey(crypt_stat->tfm, crypt_stat->key, - crypt_stat->key_size); - if (rc) { - ecryptfs_printk(KERN_ERR, - "Error setting key; rc = [%d]\n", - rc); - mutex_unlock(&crypt_stat->cs_tfm_mutex); - rc = -EINVAL; - goto out; - } - crypt_stat->flags |= ECRYPTFS_KEY_SET; - } - mutex_unlock(&crypt_stat->cs_tfm_mutex); - ecryptfs_printk(KERN_DEBUG, "Decrypting [%d] bytes.\n", size); - ablkcipher_request_set_crypt(req, src_sg, dest_sg, size, iv); - rc = crypto_ablkcipher_decrypt(req); - if (rc == -EINPROGRESS || rc == -EBUSY) { - struct extent_crypt_result *ecr = req->base.data; - - wait_for_completion(&ecr->completion); - rc = ecr->rc; - INIT_COMPLETION(ecr->completion); - } -out: - ablkcipher_request_free(req); return rc; - -} - -/** - * ecryptfs_encrypt_page_offset - * @crypt_stat: The cryptographic context - * @dst_page: The page to encrypt into - * @dst_offset: The offset in the page to encrypt into - * @src_page: The page to encrypt from - * @src_offset: The offset in the page to encrypt from - * @size: The number of bytes to encrypt - * @iv: The initialization vector to use for the encryption - * - * Returns the number of bytes encrypted - */ -static int -ecryptfs_encrypt_page_offset(struct ecryptfs_crypt_stat *crypt_stat, - struct page *dst_page, int dst_offset, - struct page *src_page, int src_offset, int size, - unsigned char *iv) -{ - struct scatterlist src_sg, dst_sg; - - sg_init_table(&src_sg, 1); - sg_init_table(&dst_sg, 1); - - sg_set_page(&src_sg, src_page, size, src_offset); - sg_set_page(&dst_sg, dst_page, size, dst_offset); - return encrypt_scatterlist(crypt_stat, &dst_sg, &src_sg, size, iv); -} - -/** - * ecryptfs_decrypt_page_offset - * @crypt_stat: The cryptographic context - * @dst_page: The page to decrypt into - * @dst_offset: The offset in the page to decrypt into - * @src_page: The page to decrypt from - * @src_offset: The offset in the page to decrypt from - * @size: The number of bytes to decrypt - * @iv: The initialization vector to use for the decryption - * - * Returns the number of bytes decrypted - */ -static int -ecryptfs_decrypt_page_offset(struct ecryptfs_crypt_stat *crypt_stat, - struct page *dst_page, int dst_offset, - struct page *src_page, int src_offset, int size, - unsigned char *iv) -{ - struct scatterlist src_sg, dst_sg; - - sg_init_table(&src_sg, 1); - sg_set_page(&src_sg, src_page, size, src_offset); - - sg_init_table(&dst_sg, 1); - sg_set_page(&dst_sg, dst_page, size, dst_offset); - - return decrypt_scatterlist(crypt_stat, &dst_sg, &src_sg, size, iv); } #define ECRYPTFS_MAX_SCATTERLIST_LEN 4 diff --git a/fs/ecryptfs/file.c b/fs/ecryptfs/file.c index 24f1105..992cf95 100644 --- a/fs/ecryptfs/file.c +++ b/fs/ecryptfs/file.c @@ -49,7 +49,7 @@ static ssize_t ecryptfs_read_update_atime(struct kiocb *iocb, unsigned long nr_segs, loff_t pos) { ssize_t rc; - struct path lower; + struct path *path; struct file *file = iocb->ki_filp; rc = generic_file_aio_read(iocb, iov, nr_segs, pos); @@ -60,9 +60,8 @@ static ssize_t ecryptfs_read_update_atime(struct kiocb *iocb, if (-EIOCBQUEUED == rc) rc = wait_on_sync_kiocb(iocb); if (rc >= 0) { - lower.dentry = ecryptfs_dentry_to_lower(file->f_path.dentry); - lower.mnt = ecryptfs_dentry_to_lower_mnt(file->f_path.dentry); - touch_atime(&lower); + path = ecryptfs_dentry_to_lower_path(file->f_path.dentry); + touch_atime(path); } return rc; } diff --git a/fs/ecryptfs/main.c b/fs/ecryptfs/main.c index e924cf4..eb1c597 100644 --- a/fs/ecryptfs/main.c +++ b/fs/ecryptfs/main.c @@ -120,16 +120,15 @@ static int ecryptfs_init_lower_file(struct dentry *dentry, struct file **lower_file) { const struct cred *cred = current_cred(); - struct dentry *lower_dentry = ecryptfs_dentry_to_lower(dentry); - struct vfsmount *lower_mnt = ecryptfs_dentry_to_lower_mnt(dentry); + struct path *path = ecryptfs_dentry_to_lower_path(dentry); int rc; - rc = ecryptfs_privileged_open(lower_file, lower_dentry, lower_mnt, + rc = ecryptfs_privileged_open(lower_file, path->dentry, path->mnt, cred); if (rc) { printk(KERN_ERR "Error opening lower file " "for lower_dentry [0x%p] and lower_mnt [0x%p]; " - "rc = [%d]\n", lower_dentry, lower_mnt, rc); + "rc = [%d]\n", path->dentry, path->mnt, rc); (*lower_file) = NULL; } return rc; diff --git a/fs/ecryptfs/messaging.c b/fs/ecryptfs/messaging.c index 49ff8ea0..e57380e 100644 --- a/fs/ecryptfs/messaging.c +++ b/fs/ecryptfs/messaging.c @@ -247,14 +247,13 @@ int ecryptfs_process_response(struct ecryptfs_daemon *daemon, goto unlock; } msg_size = (sizeof(*msg) + msg->data_len); - msg_ctx->msg = kmalloc(msg_size, GFP_KERNEL); + msg_ctx->msg = kmemdup(msg, msg_size, GFP_KERNEL); if (!msg_ctx->msg) { rc = -ENOMEM; printk(KERN_ERR "%s: Failed to allocate [%zd] bytes of " "GFP_KERNEL memory\n", __func__, msg_size); goto unlock; } - memcpy(msg_ctx->msg, msg, msg_size); msg_ctx->state = ECRYPTFS_MSG_CTX_STATE_DONE; wake_up_process(msg_ctx->task); rc = 0; diff --git a/fs/jfs/jfs_dmap.c b/fs/jfs/jfs_dmap.c index 9a55f53..370d7b6 100644 --- a/fs/jfs/jfs_dmap.c +++ b/fs/jfs/jfs_dmap.c @@ -346,8 +346,7 @@ int dbFree(struct inode *ip, s64 blkno, s64 nblocks) printk(KERN_ERR "blkno = %Lx, nblocks = %Lx\n", (unsigned long long) blkno, (unsigned long long) nblocks); - jfs_error(ip->i_sb, - "dbFree: block to be freed is outside the map"); + jfs_error(ip->i_sb, "block to be freed is outside the map\n"); return -EIO; } @@ -384,7 +383,7 @@ int dbFree(struct inode *ip, s64 blkno, s64 nblocks) /* free the blocks. */ if ((rc = dbFreeDmap(bmp, dp, blkno, nb))) { - jfs_error(ip->i_sb, "dbFree: error in block map\n"); + jfs_error(ip->i_sb, "error in block map\n"); release_metapage(mp); IREAD_UNLOCK(ipbmap); return (rc); @@ -441,8 +440,7 @@ dbUpdatePMap(struct inode *ipbmap, printk(KERN_ERR "blkno = %Lx, nblocks = %Lx\n", (unsigned long long) blkno, (unsigned long long) nblocks); - jfs_error(ipbmap->i_sb, - "dbUpdatePMap: blocks are outside the map"); + jfs_error(ipbmap->i_sb, "blocks are outside the map\n"); return -EIO; } @@ -726,7 +724,7 @@ int dbAlloc(struct inode *ip, s64 hint, s64 nblocks, s64 * results) /* the hint should be within the map */ if (hint >= mapSize) { - jfs_error(ip->i_sb, "dbAlloc: the hint is outside the map"); + jfs_error(ip->i_sb, "the hint is outside the map\n"); return -EIO; } @@ -1057,8 +1055,7 @@ static int dbExtend(struct inode *ip, s64 blkno, s64 nblocks, s64 addnblocks) bmp = sbi->bmap; if (lastblkno < 0 || lastblkno >= bmp->db_mapsize) { IREAD_UNLOCK(ipbmap); - jfs_error(ip->i_sb, - "dbExtend: the block is outside the filesystem"); + jfs_error(ip->i_sb, "the block is outside the filesystem\n"); return -EIO; } @@ -1134,8 +1131,7 @@ static int dbAllocNext(struct bmap * bmp, struct dmap * dp, s64 blkno, u32 mask; if (dp->tree.leafidx != cpu_to_le32(LEAFIND)) { - jfs_error(bmp->db_ipbmap->i_sb, - "dbAllocNext: Corrupt dmap page"); + jfs_error(bmp->db_ipbmap->i_sb, "Corrupt dmap page\n"); return -EIO; } @@ -1265,8 +1261,7 @@ dbAllocNear(struct bmap * bmp, s8 *leaf; if (dp->tree.leafidx != cpu_to_le32(LEAFIND)) { - jfs_error(bmp->db_ipbmap->i_sb, - "dbAllocNear: Corrupt dmap page"); + jfs_error(bmp->db_ipbmap->i_sb, "Corrupt dmap page\n"); return -EIO; } @@ -1381,8 +1376,7 @@ dbAllocAG(struct bmap * bmp, int agno, s64 nblocks, int l2nb, s64 * results) */ if (l2nb > bmp->db_agl2size) { jfs_error(bmp->db_ipbmap->i_sb, - "dbAllocAG: allocation request is larger than the " - "allocation group size"); + "allocation request is larger than the allocation group size\n"); return -EIO; } @@ -1417,7 +1411,7 @@ dbAllocAG(struct bmap * bmp, int agno, s64 nblocks, int l2nb, s64 * results) (unsigned long long) blkno, (unsigned long long) nblocks); jfs_error(bmp->db_ipbmap->i_sb, - "dbAllocAG: dbAllocCtl failed in free AG"); + "dbAllocCtl failed in free AG\n"); } return (rc); } @@ -1433,8 +1427,7 @@ dbAllocAG(struct bmap * bmp, int agno, s64 nblocks, int l2nb, s64 * results) budmin = dcp->budmin; if (dcp->leafidx != cpu_to_le32(CTLLEAFIND)) { - jfs_error(bmp->db_ipbmap->i_sb, - "dbAllocAG: Corrupt dmapctl page"); + jfs_error(bmp->db_ipbmap->i_sb, "Corrupt dmapctl page\n"); release_metapage(mp); return -EIO; } @@ -1475,7 +1468,7 @@ dbAllocAG(struct bmap * bmp, int agno, s64 nblocks, int l2nb, s64 * results) } if (n == 4) { jfs_error(bmp->db_ipbmap->i_sb, - "dbAllocAG: failed descending stree"); + "failed descending stree\n"); release_metapage(mp); return -EIO; } @@ -1515,8 +1508,7 @@ dbAllocAG(struct bmap * bmp, int agno, s64 nblocks, int l2nb, s64 * results) &blkno))) { if (rc == -ENOSPC) { jfs_error(bmp->db_ipbmap->i_sb, - "dbAllocAG: control page " - "inconsistent"); + "control page inconsistent\n"); return -EIO; } return (rc); @@ -1528,7 +1520,7 @@ dbAllocAG(struct bmap * bmp, int agno, s64 nblocks, int l2nb, s64 * results) rc = dbAllocCtl(bmp, nblocks, l2nb, blkno, results); if (rc == -ENOSPC) { jfs_error(bmp->db_ipbmap->i_sb, - "dbAllocAG: unable to allocate blocks"); + "unable to allocate blocks\n"); rc = -EIO; } return (rc); @@ -1587,8 +1579,7 @@ static int dbAllocAny(struct bmap * bmp, s64 nblocks, int l2nb, s64 * results) */ rc = dbAllocCtl(bmp, nblocks, l2nb, blkno, results); if (rc == -ENOSPC) { - jfs_error(bmp->db_ipbmap->i_sb, - "dbAllocAny: unable to allocate blocks"); + jfs_error(bmp->db_ipbmap->i_sb, "unable to allocate blocks\n"); return -EIO; } return (rc); @@ -1652,8 +1643,7 @@ s64 dbDiscardAG(struct inode *ip, int agno, s64 minlen) range_cnt = min_t(u64, max_ranges + 1, 32 * 1024); totrim = kmalloc(sizeof(struct range2trim) * range_cnt, GFP_NOFS); if (totrim == NULL) { - jfs_error(bmp->db_ipbmap->i_sb, - "dbDiscardAG: no memory for trim array"); + jfs_error(bmp->db_ipbmap->i_sb, "no memory for trim array\n"); IWRITE_UNLOCK(ipbmap); return 0; } @@ -1682,8 +1672,7 @@ s64 dbDiscardAG(struct inode *ip, int agno, s64 minlen) nblocks = 1 << l2nb; } else { /* Trim any already allocated blocks */ - jfs_error(bmp->db_ipbmap->i_sb, - "dbDiscardAG: -EIO"); + jfs_error(bmp->db_ipbmap->i_sb, "-EIO\n"); break; } @@ -1761,7 +1750,7 @@ static int dbFindCtl(struct bmap * bmp, int l2nb, int level, s64 * blkno) if (dcp->leafidx != cpu_to_le32(CTLLEAFIND)) { jfs_error(bmp->db_ipbmap->i_sb, - "dbFindCtl: Corrupt dmapctl page"); + "Corrupt dmapctl page\n"); release_metapage(mp); return -EIO; } @@ -1782,7 +1771,7 @@ static int dbFindCtl(struct bmap * bmp, int l2nb, int level, s64 * blkno) if (rc) { if (lev != level) { jfs_error(bmp->db_ipbmap->i_sb, - "dbFindCtl: dmap inconsistent"); + "dmap inconsistent\n"); return -EIO; } return -ENOSPC; @@ -1906,7 +1895,7 @@ dbAllocCtl(struct bmap * bmp, s64 nblocks, int l2nb, s64 blkno, s64 * results) if (dp->tree.stree[ROOT] != L2BPERDMAP) { release_metapage(mp); jfs_error(bmp->db_ipbmap->i_sb, - "dbAllocCtl: the dmap is not all free"); + "the dmap is not all free\n"); rc = -EIO; goto backout; } @@ -1953,7 +1942,7 @@ dbAllocCtl(struct bmap * bmp, s64 nblocks, int l2nb, s64 blkno, s64 * results) * to indicate that we have leaked blocks. */ jfs_error(bmp->db_ipbmap->i_sb, - "dbAllocCtl: I/O Error: Block Leakage."); + "I/O Error: Block Leakage\n"); continue; } dp = (struct dmap *) mp->data; @@ -1965,8 +1954,7 @@ dbAllocCtl(struct bmap * bmp, s64 nblocks, int l2nb, s64 blkno, s64 * results) * to indicate that we have leaked blocks. */ release_metapage(mp); - jfs_error(bmp->db_ipbmap->i_sb, - "dbAllocCtl: Block Leakage."); + jfs_error(bmp->db_ipbmap->i_sb, "Block Leakage\n"); continue; } @@ -2263,8 +2251,7 @@ static void dbAllocBits(struct bmap * bmp, struct dmap * dp, s64 blkno, for (; nwords > 0; nwords -= nw) { if (leaf[word] < BUDMIN) { jfs_error(bmp->db_ipbmap->i_sb, - "dbAllocBits: leaf page " - "corrupt"); + "leaf page corrupt\n"); break; } @@ -2536,8 +2523,7 @@ dbAdjCtl(struct bmap * bmp, s64 blkno, int newval, int alloc, int level) dcp = (struct dmapctl *) mp->data; if (dcp->leafidx != cpu_to_le32(CTLLEAFIND)) { - jfs_error(bmp->db_ipbmap->i_sb, - "dbAdjCtl: Corrupt dmapctl page"); + jfs_error(bmp->db_ipbmap->i_sb, "Corrupt dmapctl page\n"); release_metapage(mp); return -EIO; } @@ -2638,8 +2624,7 @@ dbAdjCtl(struct bmap * bmp, s64 blkno, int newval, int alloc, int level) assert(level == bmp->db_maxlevel); if (bmp->db_maxfreebud != oldroot) { jfs_error(bmp->db_ipbmap->i_sb, - "dbAdjCtl: the maximum free buddy is " - "not the old root"); + "the maximum free buddy is not the old root\n"); } bmp->db_maxfreebud = dcp->stree[ROOT]; } @@ -3481,7 +3466,7 @@ int dbExtendFS(struct inode *ipbmap, s64 blkno, s64 nblocks) p = BMAPBLKNO + nbperpage; /* L2 page */ l2mp = read_metapage(ipbmap, p, PSIZE, 0); if (!l2mp) { - jfs_error(ipbmap->i_sb, "dbExtendFS: L2 page could not be read"); + jfs_error(ipbmap->i_sb, "L2 page could not be read\n"); return -EIO; } l2dcp = (struct dmapctl *) l2mp->data; @@ -3646,8 +3631,7 @@ int dbExtendFS(struct inode *ipbmap, s64 blkno, s64 nblocks) } } /* for each L1 in a L2 */ - jfs_error(ipbmap->i_sb, - "dbExtendFS: function has not returned as expected"); + jfs_error(ipbmap->i_sb, "function has not returned as expected\n"); errout: if (l0mp) release_metapage(l0mp); @@ -3717,7 +3701,7 @@ void dbFinalizeBmap(struct inode *ipbmap) } if (bmp->db_agpref >= bmp->db_numag) { jfs_error(ipbmap->i_sb, - "cannot find ag with average freespace"); + "cannot find ag with average freespace\n"); } } diff --git a/fs/jfs/jfs_dtree.c b/fs/jfs/jfs_dtree.c index 9f4ed13..8743ba9 100644 --- a/fs/jfs/jfs_dtree.c +++ b/fs/jfs/jfs_dtree.c @@ -124,21 +124,21 @@ struct dtsplit { #define DT_PAGE(IP, MP) BT_PAGE(IP, MP, dtpage_t, i_dtroot) /* get page buffer for specified block address */ -#define DT_GETPAGE(IP, BN, MP, SIZE, P, RC)\ -{\ - BT_GETPAGE(IP, BN, MP, dtpage_t, SIZE, P, RC, i_dtroot)\ - if (!(RC))\ - {\ - if (((P)->header.nextindex > (((BN)==0)?DTROOTMAXSLOT:(P)->header.maxslot)) ||\ - ((BN) && ((P)->header.maxslot > DTPAGEMAXSLOT)))\ - {\ - BT_PUTPAGE(MP);\ - jfs_error((IP)->i_sb, "DT_GETPAGE: dtree page corrupt");\ - MP = NULL;\ - RC = -EIO;\ - }\ - }\ -} +#define DT_GETPAGE(IP, BN, MP, SIZE, P, RC) \ +do { \ + BT_GETPAGE(IP, BN, MP, dtpage_t, SIZE, P, RC, i_dtroot); \ + if (!(RC)) { \ + if (((P)->header.nextindex > \ + (((BN) == 0) ? DTROOTMAXSLOT : (P)->header.maxslot)) || \ + ((BN) && ((P)->header.maxslot > DTPAGEMAXSLOT))) { \ + BT_PUTPAGE(MP); \ + jfs_error((IP)->i_sb, \ + "DT_GETPAGE: dtree page corrupt\n"); \ + MP = NULL; \ + RC = -EIO; \ + } \ + } \ +} while (0) /* for consistency */ #define DT_PUTPAGE(MP) BT_PUTPAGE(MP) @@ -776,7 +776,7 @@ int dtSearch(struct inode *ip, struct component_name * key, ino_t * data, /* Something's corrupted, mark filesystem dirty so * chkdsk will fix it. */ - jfs_error(sb, "stack overrun in dtSearch!"); + jfs_error(sb, "stack overrun!\n"); BT_STACK_DUMP(btstack); rc = -EIO; goto out; @@ -3247,8 +3247,7 @@ int jfs_readdir(struct file *file, struct dir_context *ctx) /* Sanity Check */ if (d_namleft == 0) { jfs_error(ip->i_sb, - "JFS:Dtree error: ino = " - "%ld, bn=%Ld, index = %d", + "JFS:Dtree error: ino = %ld, bn=%lld, index = %d\n", (long)ip->i_ino, (long long)bn, i); @@ -3368,7 +3367,7 @@ static int dtReadFirst(struct inode *ip, struct btstack * btstack) */ if (BT_STACK_FULL(btstack)) { DT_PUTPAGE(mp); - jfs_error(ip->i_sb, "dtReadFirst: btstack overrun"); + jfs_error(ip->i_sb, "btstack overrun\n"); BT_STACK_DUMP(btstack); return -EIO; } diff --git a/fs/jfs/jfs_extent.c b/fs/jfs/jfs_extent.c index e5fe850..2ae7d59 100644 --- a/fs/jfs/jfs_extent.c +++ b/fs/jfs/jfs_extent.c @@ -388,7 +388,7 @@ int extHint(struct inode *ip, s64 offset, xad_t * xp) if ((rc == 0) && xlen) { if (xlen != nbperpage) { - jfs_error(ip->i_sb, "extHint: corrupt xtree"); + jfs_error(ip->i_sb, "corrupt xtree\n"); rc = -EIO; } XADaddress(xp, xaddr); diff --git a/fs/jfs/jfs_imap.c b/fs/jfs/jfs_imap.c index f7e042b..f321986 100644 --- a/fs/jfs/jfs_imap.c +++ b/fs/jfs/jfs_imap.c @@ -386,7 +386,7 @@ int diRead(struct inode *ip) dp += rel_inode; if (ip->i_ino != le32_to_cpu(dp->di_number)) { - jfs_error(ip->i_sb, "diRead: i_ino != di_number"); + jfs_error(ip->i_sb, "i_ino != di_number\n"); rc = -EIO; } else if (le32_to_cpu(dp->di_nlink) == 0) rc = -ESTALE; @@ -625,7 +625,7 @@ int diWrite(tid_t tid, struct inode *ip) if (!addressPXD(&(jfs_ip->ixpxd)) || (lengthPXD(&(jfs_ip->ixpxd)) != JFS_IP(ipimap)->i_imap->im_nbperiext)) { - jfs_error(ip->i_sb, "diWrite: ixpxd invalid"); + jfs_error(ip->i_sb, "ixpxd invalid\n"); return -EIO; } @@ -893,8 +893,7 @@ int diFree(struct inode *ip) if (iagno >= imap->im_nextiag) { print_hex_dump(KERN_ERR, "imap: ", DUMP_PREFIX_ADDRESS, 16, 4, imap, 32, 0); - jfs_error(ip->i_sb, - "diFree: inum = %d, iagno = %d, nextiag = %d", + jfs_error(ip->i_sb, "inum = %d, iagno = %d, nextiag = %d\n", (uint) inum, iagno, imap->im_nextiag); return -EIO; } @@ -930,15 +929,14 @@ int diFree(struct inode *ip) mask = HIGHORDER >> bitno; if (!(le32_to_cpu(iagp->wmap[extno]) & mask)) { - jfs_error(ip->i_sb, - "diFree: wmap shows inode already free"); + jfs_error(ip->i_sb, "wmap shows inode already free\n"); } if (!addressPXD(&iagp->inoext[extno])) { release_metapage(mp); IREAD_UNLOCK(ipimap); AG_UNLOCK(imap, agno); - jfs_error(ip->i_sb, "diFree: invalid inoext"); + jfs_error(ip->i_sb, "invalid inoext\n"); return -EIO; } @@ -950,7 +948,7 @@ int diFree(struct inode *ip) release_metapage(mp); IREAD_UNLOCK(ipimap); AG_UNLOCK(imap, agno); - jfs_error(ip->i_sb, "diFree: numfree > numinos"); + jfs_error(ip->i_sb, "numfree > numinos\n"); return -EIO; } /* @@ -1199,7 +1197,7 @@ int diFree(struct inode *ip) * for the inode being freed. */ if (iagp->pmap[extno] != 0) { - jfs_error(ip->i_sb, "diFree: the pmap does not show inode free"); + jfs_error(ip->i_sb, "the pmap does not show inode free\n"); } iagp->wmap[extno] = 0; PXDlength(&iagp->inoext[extno], 0); @@ -1518,8 +1516,7 @@ int diAlloc(struct inode *pip, bool dir, struct inode *ip) release_metapage(mp); AG_UNLOCK(imap, agno); jfs_error(ip->i_sb, - "diAlloc: can't find free bit " - "in wmap"); + "can't find free bit in wmap\n"); return -EIO; } @@ -1660,7 +1657,7 @@ diAllocAG(struct inomap * imap, int agno, bool dir, struct inode *ip) numinos = imap->im_agctl[agno].numinos; if (numfree > numinos) { - jfs_error(ip->i_sb, "diAllocAG: numfree > numinos"); + jfs_error(ip->i_sb, "numfree > numinos\n"); return -EIO; } @@ -1811,8 +1808,7 @@ static int diAllocIno(struct inomap * imap, int agno, struct inode *ip) if (!iagp->nfreeinos) { IREAD_UNLOCK(imap->im_ipimap); release_metapage(mp); - jfs_error(ip->i_sb, - "diAllocIno: nfreeinos = 0, but iag on freelist"); + jfs_error(ip->i_sb, "nfreeinos = 0, but iag on freelist\n"); return -EIO; } @@ -1824,7 +1820,7 @@ static int diAllocIno(struct inomap * imap, int agno, struct inode *ip) IREAD_UNLOCK(imap->im_ipimap); release_metapage(mp); jfs_error(ip->i_sb, - "diAllocIno: free inode not found in summary map"); + "free inode not found in summary map\n"); return -EIO; } @@ -1839,7 +1835,7 @@ static int diAllocIno(struct inomap * imap, int agno, struct inode *ip) if (rem >= EXTSPERSUM) { IREAD_UNLOCK(imap->im_ipimap); release_metapage(mp); - jfs_error(ip->i_sb, "diAllocIno: no free extent found"); + jfs_error(ip->i_sb, "no free extent found\n"); return -EIO; } extno = (sword << L2EXTSPERSUM) + rem; @@ -1850,7 +1846,7 @@ static int diAllocIno(struct inomap * imap, int agno, struct inode *ip) if (rem >= INOSPEREXT) { IREAD_UNLOCK(imap->im_ipimap); release_metapage(mp); - jfs_error(ip->i_sb, "diAllocIno: free inode not found"); + jfs_error(ip->i_sb, "free inode not found\n"); return -EIO; } @@ -1936,7 +1932,7 @@ static int diAllocExt(struct inomap * imap, int agno, struct inode *ip) IREAD_LOCK(imap->im_ipimap, RDWRLOCK_IMAP); if ((rc = diIAGRead(imap, iagno, &mp))) { IREAD_UNLOCK(imap->im_ipimap); - jfs_error(ip->i_sb, "diAllocExt: error reading iag"); + jfs_error(ip->i_sb, "error reading iag\n"); return rc; } iagp = (struct iag *) mp->data; @@ -1948,8 +1944,7 @@ static int diAllocExt(struct inomap * imap, int agno, struct inode *ip) if (sword >= SMAPSZ) { release_metapage(mp); IREAD_UNLOCK(imap->im_ipimap); - jfs_error(ip->i_sb, - "diAllocExt: free ext summary map not found"); + jfs_error(ip->i_sb, "free ext summary map not found\n"); return -EIO; } if (~iagp->extsmap[sword]) @@ -1962,7 +1957,7 @@ static int diAllocExt(struct inomap * imap, int agno, struct inode *ip) if (rem >= EXTSPERSUM) { release_metapage(mp); IREAD_UNLOCK(imap->im_ipimap); - jfs_error(ip->i_sb, "diAllocExt: free extent not found"); + jfs_error(ip->i_sb, "free extent not found\n"); return -EIO; } extno = (sword << L2EXTSPERSUM) + rem; @@ -2081,8 +2076,7 @@ static int diAllocBit(struct inomap * imap, struct iag * iagp, int ino) if (bmp) release_metapage(bmp); - jfs_error(imap->im_ipimap->i_sb, - "diAllocBit: iag inconsistent"); + jfs_error(imap->im_ipimap->i_sb, "iag inconsistent\n"); return -EIO; } @@ -2189,7 +2183,7 @@ static int diNewExt(struct inomap * imap, struct iag * iagp, int extno) /* better have free extents. */ if (!iagp->nfreeexts) { - jfs_error(imap->im_ipimap->i_sb, "diNewExt: no free extents"); + jfs_error(imap->im_ipimap->i_sb, "no free extents\n"); return -EIO; } @@ -2261,7 +2255,7 @@ static int diNewExt(struct inomap * imap, struct iag * iagp, int extno) } if (ciagp == NULL) { jfs_error(imap->im_ipimap->i_sb, - "diNewExt: ciagp == NULL"); + "ciagp == NULL\n"); rc = -EIO; goto error_out; } @@ -2498,7 +2492,7 @@ diNewIAG(struct inomap * imap, int *iagnop, int agno, struct metapage ** mpp) IWRITE_UNLOCK(ipimap); IAGFREE_UNLOCK(imap); jfs_error(imap->im_ipimap->i_sb, - "diNewIAG: ipimap->i_size is wrong"); + "ipimap->i_size is wrong\n"); return -EIO; } @@ -2758,8 +2752,7 @@ diUpdatePMap(struct inode *ipimap, iagno = INOTOIAG(inum); /* make sure that the iag is contained within the map */ if (iagno >= imap->im_nextiag) { - jfs_error(ipimap->i_sb, - "diUpdatePMap: the iag is outside the map"); + jfs_error(ipimap->i_sb, "the iag is outside the map\n"); return -EIO; } /* read the iag */ @@ -2788,13 +2781,13 @@ diUpdatePMap(struct inode *ipimap, */ if (!(le32_to_cpu(iagp->wmap[extno]) & mask)) { jfs_error(ipimap->i_sb, - "diUpdatePMap: inode %ld not marked as " - "allocated in wmap!", inum); + "inode %ld not marked as allocated in wmap!\n", + inum); } if (!(le32_to_cpu(iagp->pmap[extno]) & mask)) { jfs_error(ipimap->i_sb, - "diUpdatePMap: inode %ld not marked as " - "allocated in pmap!", inum); + "inode %ld not marked as allocated in pmap!\n", + inum); } /* update the bitmap for the extent of the freed inode */ iagp->pmap[extno] &= cpu_to_le32(~mask); @@ -2809,15 +2802,13 @@ diUpdatePMap(struct inode *ipimap, if (!(le32_to_cpu(iagp->wmap[extno]) & mask)) { release_metapage(mp); jfs_error(ipimap->i_sb, - "diUpdatePMap: the inode is not allocated in " - "the working map"); + "the inode is not allocated in the working map\n"); return -EIO; } if ((le32_to_cpu(iagp->pmap[extno]) & mask) != 0) { release_metapage(mp); jfs_error(ipimap->i_sb, - "diUpdatePMap: the inode is not free in the " - "persistent map"); + "the inode is not free in the persistent map\n"); return -EIO; } /* update the bitmap for the extent of the allocated inode */ @@ -2909,8 +2900,7 @@ int diExtendFS(struct inode *ipimap, struct inode *ipbmap) iagp = (struct iag *) bp->data; if (le32_to_cpu(iagp->iagnum) != i) { release_metapage(bp); - jfs_error(ipimap->i_sb, - "diExtendFs: unexpected value of iagnum"); + jfs_error(ipimap->i_sb, "unexpected value of iagnum\n"); return -EIO; } @@ -2986,8 +2976,7 @@ int diExtendFS(struct inode *ipimap, struct inode *ipbmap) if (xnuminos != atomic_read(&imap->im_numinos) || xnumfree != atomic_read(&imap->im_numfree)) { - jfs_error(ipimap->i_sb, - "diExtendFs: numinos or numfree incorrect"); + jfs_error(ipimap->i_sb, "numinos or numfree incorrect\n"); return -EIO; } diff --git a/fs/jfs/jfs_metapage.c b/fs/jfs/jfs_metapage.c index 9e3aaff..d165cde 100644 --- a/fs/jfs/jfs_metapage.c +++ b/fs/jfs/jfs_metapage.c @@ -647,7 +647,7 @@ struct metapage *__get_metapage(struct inode *inode, unsigned long lblock, if (mp) { if (mp->logical_size != size) { jfs_error(inode->i_sb, - "__get_metapage: mp->logical_size != size"); + "get_mp->logical_size != size\n"); jfs_err("logical_size = %d, size = %d", mp->logical_size, size); dump_stack(); @@ -658,8 +658,7 @@ struct metapage *__get_metapage(struct inode *inode, unsigned long lblock, if (test_bit(META_discard, &mp->flag)) { if (!new) { jfs_error(inode->i_sb, - "__get_metapage: using a " - "discarded metapage"); + "using a discarded metapage\n"); discard_metapage(mp); goto unlock; } diff --git a/fs/jfs/jfs_superblock.h b/fs/jfs/jfs_superblock.h index 884fc21..04847b8 100644 --- a/fs/jfs/jfs_superblock.h +++ b/fs/jfs/jfs_superblock.h @@ -108,6 +108,7 @@ struct jfs_superblock { extern int readSuper(struct super_block *, struct buffer_head **); extern int updateSuper(struct super_block *, uint); +__printf(2, 3) extern void jfs_error(struct super_block *, const char *, ...); extern int jfs_mount(struct super_block *); extern int jfs_mount_rw(struct super_block *, int); diff --git a/fs/jfs/jfs_txnmgr.c b/fs/jfs/jfs_txnmgr.c index 5fcc02e..564c4f2 100644 --- a/fs/jfs/jfs_txnmgr.c +++ b/fs/jfs/jfs_txnmgr.c @@ -2684,7 +2684,7 @@ void txAbort(tid_t tid, int dirty) * mark filesystem dirty */ if (dirty) - jfs_error(tblk->sb, "txAbort"); + jfs_error(tblk->sb, "\n"); return; } diff --git a/fs/jfs/jfs_xtree.c b/fs/jfs/jfs_xtree.c index 6c50871..5ad7748 100644 --- a/fs/jfs/jfs_xtree.c +++ b/fs/jfs/jfs_xtree.c @@ -64,22 +64,23 @@ /* get page buffer for specified block address */ /* ToDo: Replace this ugly macro with a function */ -#define XT_GETPAGE(IP, BN, MP, SIZE, P, RC)\ -{\ - BT_GETPAGE(IP, BN, MP, xtpage_t, SIZE, P, RC, i_xtroot)\ - if (!(RC))\ - {\ - if ((le16_to_cpu((P)->header.nextindex) < XTENTRYSTART) ||\ - (le16_to_cpu((P)->header.nextindex) > le16_to_cpu((P)->header.maxentry)) ||\ - (le16_to_cpu((P)->header.maxentry) > (((BN)==0)?XTROOTMAXSLOT:PSIZE>>L2XTSLOTSIZE)))\ - {\ - jfs_error((IP)->i_sb, "XT_GETPAGE: xtree page corrupt");\ - BT_PUTPAGE(MP);\ - MP = NULL;\ - RC = -EIO;\ - }\ - }\ -} +#define XT_GETPAGE(IP, BN, MP, SIZE, P, RC) \ +do { \ + BT_GETPAGE(IP, BN, MP, xtpage_t, SIZE, P, RC, i_xtroot); \ + if (!(RC)) { \ + if ((le16_to_cpu((P)->header.nextindex) < XTENTRYSTART) || \ + (le16_to_cpu((P)->header.nextindex) > \ + le16_to_cpu((P)->header.maxentry)) || \ + (le16_to_cpu((P)->header.maxentry) > \ + (((BN) == 0) ? XTROOTMAXSLOT : PSIZE >> L2XTSLOTSIZE))) { \ + jfs_error((IP)->i_sb, \ + "XT_GETPAGE: xtree page corrupt\n"); \ + BT_PUTPAGE(MP); \ + MP = NULL; \ + RC = -EIO; \ + } \ + } \ +} while (0) /* for consistency */ #define XT_PUTPAGE(MP) BT_PUTPAGE(MP) @@ -499,7 +500,7 @@ static int xtSearch(struct inode *ip, s64 xoff, s64 *nextp, /* push (bn, index) of the parent page/entry */ if (BT_STACK_FULL(btstack)) { - jfs_error(ip->i_sb, "stack overrun in xtSearch!"); + jfs_error(ip->i_sb, "stack overrun!\n"); XT_PUTPAGE(mp); return -EIO; } @@ -1385,7 +1386,7 @@ int xtExtend(tid_t tid, /* transaction id */ if (cmp != 0) { XT_PUTPAGE(mp); - jfs_error(ip->i_sb, "xtExtend: xtSearch did not find extent"); + jfs_error(ip->i_sb, "xtSearch did not find extent\n"); return -EIO; } @@ -1393,7 +1394,7 @@ int xtExtend(tid_t tid, /* transaction id */ xad = &p->xad[index]; if ((offsetXAD(xad) + lengthXAD(xad)) != xoff) { XT_PUTPAGE(mp); - jfs_error(ip->i_sb, "xtExtend: extension is not contiguous"); + jfs_error(ip->i_sb, "extension is not contiguous\n"); return -EIO; } @@ -1552,7 +1553,7 @@ printf("xtTailgate: nxoff:0x%lx nxlen:0x%x nxaddr:0x%lx\n", if (cmp != 0) { XT_PUTPAGE(mp); - jfs_error(ip->i_sb, "xtTailgate: couldn't find extent"); + jfs_error(ip->i_sb, "couldn't find extent\n"); return -EIO; } @@ -1560,8 +1561,7 @@ printf("xtTailgate: nxoff:0x%lx nxlen:0x%x nxaddr:0x%lx\n", nextindex = le16_to_cpu(p->header.nextindex); if (index != nextindex - 1) { XT_PUTPAGE(mp); - jfs_error(ip->i_sb, - "xtTailgate: the entry found is not the last entry"); + jfs_error(ip->i_sb, "the entry found is not the last entry\n"); return -EIO; } @@ -1734,7 +1734,7 @@ int xtUpdate(tid_t tid, struct inode *ip, xad_t * nxad) if (cmp != 0) { XT_PUTPAGE(mp); - jfs_error(ip->i_sb, "xtUpdate: Could not find extent"); + jfs_error(ip->i_sb, "Could not find extent\n"); return -EIO; } @@ -1758,7 +1758,7 @@ int xtUpdate(tid_t tid, struct inode *ip, xad_t * nxad) (nxoff + nxlen > xoff + xlen)) { XT_PUTPAGE(mp); jfs_error(ip->i_sb, - "xtUpdate: nXAD in not completely contained within XAD"); + "nXAD in not completely contained within XAD\n"); return -EIO; } @@ -1907,7 +1907,7 @@ int xtUpdate(tid_t tid, struct inode *ip, xad_t * nxad) if (xoff >= nxoff) { XT_PUTPAGE(mp); - jfs_error(ip->i_sb, "xtUpdate: xoff >= nxoff"); + jfs_error(ip->i_sb, "xoff >= nxoff\n"); return -EIO; } /* #endif _JFS_WIP_COALESCE */ @@ -2048,14 +2048,13 @@ int xtUpdate(tid_t tid, struct inode *ip, xad_t * nxad) if (cmp != 0) { XT_PUTPAGE(mp); - jfs_error(ip->i_sb, "xtUpdate: xtSearch failed"); + jfs_error(ip->i_sb, "xtSearch failed\n"); return -EIO; } if (index0 != index) { XT_PUTPAGE(mp); - jfs_error(ip->i_sb, - "xtUpdate: unexpected value of index"); + jfs_error(ip->i_sb, "unexpected value of index\n"); return -EIO; } } @@ -3650,7 +3649,7 @@ s64 xtTruncate(tid_t tid, struct inode *ip, s64 newsize, int flag) getChild: /* save current parent entry for the child page */ if (BT_STACK_FULL(&btstack)) { - jfs_error(ip->i_sb, "stack overrun in xtTruncate!"); + jfs_error(ip->i_sb, "stack overrun!\n"); XT_PUTPAGE(mp); return -EIO; } @@ -3751,8 +3750,7 @@ s64 xtTruncate_pmap(tid_t tid, struct inode *ip, s64 committed_size) if (cmp != 0) { XT_PUTPAGE(mp); - jfs_error(ip->i_sb, - "xtTruncate_pmap: did not find extent"); + jfs_error(ip->i_sb, "did not find extent\n"); return -EIO; } } else { @@ -3851,7 +3849,7 @@ s64 xtTruncate_pmap(tid_t tid, struct inode *ip, s64 committed_size) getChild: /* save current parent entry for the child page */ if (BT_STACK_FULL(&btstack)) { - jfs_error(ip->i_sb, "stack overrun in xtTruncate_pmap!"); + jfs_error(ip->i_sb, "stack overrun!\n"); XT_PUTPAGE(mp); return -EIO; } diff --git a/fs/jfs/namei.c b/fs/jfs/namei.c index 8b19027..aa8a337 100644 --- a/fs/jfs/namei.c +++ b/fs/jfs/namei.c @@ -1176,7 +1176,7 @@ static int jfs_rename(struct inode *old_dir, struct dentry *old_dentry, if (!S_ISDIR(old_ip->i_mode) && new_ip) IWRITE_UNLOCK(new_ip); jfs_error(new_ip->i_sb, - "jfs_rename: new_ip->i_nlink != 0"); + "new_ip->i_nlink != 0\n"); return -EIO; } tblk = tid_to_tblock(tid); diff --git a/fs/jfs/resize.c b/fs/jfs/resize.c index 8d0c1c7..90b3bc2 100644 --- a/fs/jfs/resize.c +++ b/fs/jfs/resize.c @@ -530,7 +530,7 @@ int jfs_extendfs(struct super_block *sb, s64 newLVSize, int newLogSize) goto resume; error_out: - jfs_error(sb, "jfs_extendfs"); + jfs_error(sb, "\n"); resume: /* diff --git a/fs/jfs/super.c b/fs/jfs/super.c index 788e0a9..6669aa2 100644 --- a/fs/jfs/super.c +++ b/fs/jfs/super.c @@ -92,16 +92,20 @@ static void jfs_handle_error(struct super_block *sb) /* nothing is done for continue beyond marking the superblock dirty */ } -void jfs_error(struct super_block *sb, const char * function, ...) +void jfs_error(struct super_block *sb, const char *fmt, ...) { - static char error_buf[256]; + struct va_format vaf; va_list args; - va_start(args, function); - vsnprintf(error_buf, sizeof(error_buf), function, args); - va_end(args); + va_start(args, fmt); + + vaf.fmt = fmt; + vaf.va = &args; - pr_err("ERROR: (device %s): %s\n", sb->s_id, error_buf); + pr_err("ERROR: (device %s): %pf: %pV\n", + sb->s_id, __builtin_return_address(0), &vaf); + + va_end(args); jfs_handle_error(sb); } @@ -617,7 +621,7 @@ static int jfs_freeze(struct super_block *sb) txQuiesce(sb); rc = lmLogShutdown(log); if (rc) { - jfs_error(sb, "jfs_freeze: lmLogShutdown failed"); + jfs_error(sb, "lmLogShutdown failed\n"); /* let operations fail rather than hang */ txResume(sb); @@ -646,12 +650,12 @@ static int jfs_unfreeze(struct super_block *sb) if (!(sb->s_flags & MS_RDONLY)) { rc = updateSuper(sb, FM_MOUNT); if (rc) { - jfs_error(sb, "jfs_unfreeze: updateSuper failed"); + jfs_error(sb, "updateSuper failed\n"); goto out; } rc = lmLogInit(log); if (rc) - jfs_error(sb, "jfs_unfreeze: lmLogInit failed"); + jfs_error(sb, "lmLogInit failed\n"); out: txResume(sb); } diff --git a/fs/jfs/xattr.c b/fs/jfs/xattr.c index 42d67f9..d3472f4 100644 --- a/fs/jfs/xattr.c +++ b/fs/jfs/xattr.c @@ -382,7 +382,7 @@ static int ea_read(struct inode *ip, struct jfs_ea_list *ealist) nbytes = sizeDXD(&ji->ea); if (!nbytes) { - jfs_error(sb, "ea_read: nbytes is 0"); + jfs_error(sb, "nbytes is 0\n"); return -EIO; } @@ -482,7 +482,7 @@ static int ea_get(struct inode *inode, struct ea_buffer *ea_buf, int min_size) current_blocks = 0; } else { if (!(ji->ea.flag & DXD_EXTENT)) { - jfs_error(sb, "ea_get: invalid ea.flag)"); + jfs_error(sb, "invalid ea.flag\n"); return -EIO; } current_blocks = (ea_size + sb->s_blocksize - 1) >> @@ -1089,8 +1089,8 @@ int jfs_removexattr(struct dentry *dentry, const char *name) } #ifdef CONFIG_JFS_SECURITY -int jfs_initxattrs(struct inode *inode, const struct xattr *xattr_array, - void *fs_info) +static int jfs_initxattrs(struct inode *inode, const struct xattr *xattr_array, + void *fs_info) { const struct xattr *xattr; tid_t *tid = fs_info; diff --git a/fs/nfs/dir.c b/fs/nfs/dir.c index 0fac2cb..e474ca2b 100644 --- a/fs/nfs/dir.c +++ b/fs/nfs/dir.c @@ -450,6 +450,7 @@ void nfs_prime_dcache(struct dentry *parent, struct nfs_entry *entry) dentry = d_lookup(parent, &filename); if (dentry != NULL) { if (nfs_same_file(dentry, entry)) { + nfs_set_verifier(dentry, nfs_save_change_attribute(dir)); status = nfs_refresh_inode(dentry->d_inode, entry->fattr); if (!status) nfs_setsecurity(dentry->d_inode, entry->fattr, entry->label); @@ -817,7 +818,7 @@ static int nfs_readdir(struct file *file, struct dir_context *ctx) nfs_readdir_descriptor_t my_desc, *desc = &my_desc; struct nfs_open_dir_context *dir_ctx = file->private_data; - int res; + int res = 0; dfprintk(FILE, "NFS: readdir(%s/%s) starting at cookie %llu\n", dentry->d_parent->d_name.name, dentry->d_name.name, @@ -839,7 +840,8 @@ static int nfs_readdir(struct file *file, struct dir_context *ctx) desc->plus = nfs_use_readdirplus(inode, ctx) ? 1 : 0; nfs_block_sillyrename(dentry); - res = nfs_revalidate_mapping(inode, file->f_mapping); + if (ctx->pos == 0 || nfs_attribute_cache_expired(inode)) + res = nfs_revalidate_mapping(inode, file->f_mapping); if (res < 0) goto out; diff --git a/fs/nfs/inode.c b/fs/nfs/inode.c index c93639e..af6e806 100644 --- a/fs/nfs/inode.c +++ b/fs/nfs/inode.c @@ -936,7 +936,7 @@ int nfs_attribute_timeout(struct inode *inode) return !time_in_range_open(jiffies, nfsi->read_cache_jiffies, nfsi->read_cache_jiffies + nfsi->attrtimeo); } -static int nfs_attribute_cache_expired(struct inode *inode) +int nfs_attribute_cache_expired(struct inode *inode) { if (nfs_have_delegated_attributes(inode)) return 0; diff --git a/fs/nfs/write.c b/fs/nfs/write.c index a2c7c28..f1bdb72 100644 --- a/fs/nfs/write.c +++ b/fs/nfs/write.c @@ -888,6 +888,28 @@ out: return PageUptodate(page) != 0; } +/* If we know the page is up to date, and we're not using byte range locks (or + * if we have the whole file locked for writing), it may be more efficient to + * extend the write to cover the entire page in order to avoid fragmentation + * inefficiencies. + * + * If the file is opened for synchronous writes or if we have a write delegation + * from the server then we can just skip the rest of the checks. + */ +static int nfs_can_extend_write(struct file *file, struct page *page, struct inode *inode) +{ + if (file->f_flags & O_DSYNC) + return 0; + if (NFS_PROTO(inode)->have_delegation(inode, FMODE_WRITE)) + return 1; + if (nfs_write_pageuptodate(page, inode) && (inode->i_flock == NULL || + (inode->i_flock->fl_start == 0 && + inode->i_flock->fl_end == OFFSET_MAX && + inode->i_flock->fl_type != F_RDLCK))) + return 1; + return 0; +} + /* * Update and possibly write a cached page of an NFS file. * @@ -908,14 +930,7 @@ int nfs_updatepage(struct file *file, struct page *page, file->f_path.dentry->d_name.name, count, (long long)(page_file_offset(page) + offset)); - /* If we're not using byte range locks, and we know the page - * is up to date, it may be more efficient to extend the write - * to cover the entire page in order to avoid fragmentation - * inefficiencies. - */ - if (nfs_write_pageuptodate(page, inode) && - inode->i_flock == NULL && - !(file->f_flags & O_DSYNC)) { + if (nfs_can_extend_write(file, page, inode)) { count = max(count + offset, nfs_page_length(page)); offset = 0; } diff --git a/fs/nfsd/Kconfig b/fs/nfsd/Kconfig index 430b687..dc8f1ef 100644 --- a/fs/nfsd/Kconfig +++ b/fs/nfsd/Kconfig @@ -81,6 +81,22 @@ config NFSD_V4 If unsure, say N. +config NFSD_V4_SECURITY_LABEL + bool "Provide Security Label support for NFSv4 server" + depends on NFSD_V4 && SECURITY + help + + Say Y here if you want enable fine-grained security label attribute + support for NFS version 4. Security labels allow security modules like + SELinux and Smack to label files to facilitate enforcement of their policies. + Without this an NFSv4 mount will have the same label on each file. + + If you do not wish to enable fine-grained security labels SELinux or + Smack policies on NFSv4 files, say N. + + WARNING: there is still a chance of backwards-incompatible protocol changes. + For now we recommend "Y" only for developers and testers." + config NFSD_FAULT_INJECTION bool "NFS server manual fault injection" depends on NFSD_V4 && DEBUG_KERNEL diff --git a/fs/nfsd/nfs4proc.c b/fs/nfsd/nfs4proc.c index 27d74a2..a7cee86 100644 --- a/fs/nfsd/nfs4proc.c +++ b/fs/nfsd/nfs4proc.c @@ -42,6 +42,36 @@ #include "current_stateid.h" #include "netns.h" +#ifdef CONFIG_NFSD_V4_SECURITY_LABEL +#include + +static inline void +nfsd4_security_inode_setsecctx(struct svc_fh *resfh, struct xdr_netobj *label, u32 *bmval) +{ + struct inode *inode = resfh->fh_dentry->d_inode; + int status; + + mutex_lock(&inode->i_mutex); + status = security_inode_setsecctx(resfh->fh_dentry, + label->data, label->len); + mutex_unlock(&inode->i_mutex); + + if (status) + /* + * XXX: We should really fail the whole open, but we may + * already have created a new file, so it may be too + * late. For now this seems the least of evils: + */ + bmval[2] &= ~FATTR4_WORD2_SECURITY_LABEL; + + return; +} +#else +static inline void +nfsd4_security_inode_setsecctx(struct svc_fh *resfh, struct xdr_netobj *label, u32 *bmval) +{ } +#endif + #define NFSDDBG_FACILITY NFSDDBG_PROC static u32 nfsd_attrmask[] = { @@ -239,6 +269,9 @@ do_open_lookup(struct svc_rqst *rqstp, struct nfsd4_compound_state *cstate, stru (u32 *)open->op_verf.data, &open->op_truncate, &open->op_created); + if (!status && open->op_label.len) + nfsd4_security_inode_setsecctx(resfh, &open->op_label, open->op_bmval); + /* * Following rfc 3530 14.2.16, use the returned bitmask * to indicate which attributes we used to store the @@ -263,7 +296,8 @@ do_open_lookup(struct svc_rqst *rqstp, struct nfsd4_compound_state *cstate, stru nfsd4_set_open_owner_reply_cache(cstate, open, resfh); accmode = NFSD_MAY_NOP; - if (open->op_created) + if (open->op_created || + open->op_claim_type == NFS4_OPEN_CLAIM_DELEGATE_CUR) accmode |= NFSD_MAY_OWNER_OVERRIDE; status = do_open_permission(rqstp, resfh, open, accmode); set_change_info(&open->op_cinfo, current_fh); @@ -637,6 +671,9 @@ nfsd4_create(struct svc_rqst *rqstp, struct nfsd4_compound_state *cstate, if (status) goto out; + if (create->cr_label.len) + nfsd4_security_inode_setsecctx(&resfh, &create->cr_label, create->cr_bmval); + if (create->cr_acl != NULL) do_set_nfs4_acl(rqstp, &resfh, create->cr_acl, create->cr_bmval); @@ -916,6 +953,11 @@ nfsd4_setattr(struct svc_rqst *rqstp, struct nfsd4_compound_state *cstate, setattr->sa_acl); if (status) goto out; + if (setattr->sa_label.len) + status = nfsd4_set_nfs4_label(rqstp, &cstate->current_fh, + &setattr->sa_label); + if (status) + goto out; status = nfsd_setattr(rqstp, &cstate->current_fh, &setattr->sa_iattr, 0, (time_t)0); out: diff --git a/fs/nfsd/nfs4state.c b/fs/nfsd/nfs4state.c index f170518..280acef 100644 --- a/fs/nfsd/nfs4state.c +++ b/fs/nfsd/nfs4state.c @@ -97,19 +97,20 @@ nfs4_lock_state(void) static void free_session(struct nfsd4_session *); -void nfsd4_put_session(struct nfsd4_session *ses) +static bool is_session_dead(struct nfsd4_session *ses) { - atomic_dec(&ses->se_ref); + return ses->se_flags & NFS4_SESSION_DEAD; } -static bool is_session_dead(struct nfsd4_session *ses) +void nfsd4_put_session(struct nfsd4_session *ses) { - return ses->se_flags & NFS4_SESSION_DEAD; + if (atomic_dec_and_test(&ses->se_ref) && is_session_dead(ses)) + free_session(ses); } -static __be32 mark_session_dead_locked(struct nfsd4_session *ses) +static __be32 mark_session_dead_locked(struct nfsd4_session *ses, int ref_held_by_me) { - if (atomic_read(&ses->se_ref)) + if (atomic_read(&ses->se_ref) > ref_held_by_me) return nfserr_jukebox; ses->se_flags |= NFS4_SESSION_DEAD; return nfs_ok; @@ -364,19 +365,12 @@ static struct nfs4_ol_stateid * nfs4_alloc_stateid(struct nfs4_client *clp) } static struct nfs4_delegation * -alloc_init_deleg(struct nfs4_client *clp, struct nfs4_ol_stateid *stp, struct svc_fh *current_fh, u32 type) +alloc_init_deleg(struct nfs4_client *clp, struct nfs4_ol_stateid *stp, struct svc_fh *current_fh) { struct nfs4_delegation *dp; struct nfs4_file *fp = stp->st_file; dprintk("NFSD alloc_init_deleg\n"); - /* - * Major work on the lease subsystem (for example, to support - * calbacks on stat) will be required before we can support - * write delegations properly. - */ - if (type != NFS4_OPEN_DELEGATE_READ) - return NULL; if (fp->fi_had_conflict) return NULL; if (num_delegations > max_delegations) @@ -397,7 +391,7 @@ alloc_init_deleg(struct nfs4_client *clp, struct nfs4_ol_stateid *stp, struct sv INIT_LIST_HEAD(&dp->dl_recall_lru); get_nfs4_file(fp); dp->dl_file = fp; - dp->dl_type = type; + dp->dl_type = NFS4_OPEN_DELEGATE_READ; fh_copy_shallow(&dp->dl_fh, ¤t_fh->fh_handle); dp->dl_time = 0; atomic_set(&dp->dl_count, 1); @@ -1188,6 +1182,9 @@ static int copy_cred(struct svc_cred *target, struct svc_cred *source) target->cr_gid = source->cr_gid; target->cr_group_info = source->cr_group_info; get_group_info(target->cr_group_info); + target->cr_gss_mech = source->cr_gss_mech; + if (source->cr_gss_mech) + gss_mech_get(source->cr_gss_mech); return 0; } @@ -1262,6 +1259,31 @@ same_creds(struct svc_cred *cr1, struct svc_cred *cr2) return 0 == strcmp(cr1->cr_principal, cr2->cr_principal); } +static bool svc_rqst_integrity_protected(struct svc_rqst *rqstp) +{ + struct svc_cred *cr = &rqstp->rq_cred; + u32 service; + + service = gss_pseudoflavor_to_service(cr->cr_gss_mech, cr->cr_flavor); + return service == RPC_GSS_SVC_INTEGRITY || + service == RPC_GSS_SVC_PRIVACY; +} + +static bool mach_creds_match(struct nfs4_client *cl, struct svc_rqst *rqstp) +{ + struct svc_cred *cr = &rqstp->rq_cred; + + if (!cl->cl_mach_cred) + return true; + if (cl->cl_cred.cr_gss_mech != cr->cr_gss_mech) + return false; + if (!svc_rqst_integrity_protected(rqstp)) + return false; + if (!cr->cr_principal) + return false; + return 0 == strcmp(cl->cl_cred.cr_principal, cr->cr_principal); +} + static void gen_clid(struct nfs4_client *clp, struct nfsd_net *nn) { static u32 current_clientid = 1; @@ -1639,16 +1661,16 @@ nfsd4_exchange_id(struct svc_rqst *rqstp, if (exid->flags & ~EXCHGID4_FLAG_MASK_A) return nfserr_inval; - /* Currently only support SP4_NONE */ switch (exid->spa_how) { + case SP4_MACH_CRED: + if (!svc_rqst_integrity_protected(rqstp)) + return nfserr_inval; case SP4_NONE: break; default: /* checked by xdr code */ WARN_ON_ONCE(1); case SP4_SSV: return nfserr_encr_alg_unsupp; - case SP4_MACH_CRED: - return nfserr_serverfault; /* no excuse :-/ */ } /* Cases below refer to rfc 5661 section 18.35.4: */ @@ -1663,6 +1685,10 @@ nfsd4_exchange_id(struct svc_rqst *rqstp, status = nfserr_inval; goto out; } + if (!mach_creds_match(conf, rqstp)) { + status = nfserr_wrong_cred; + goto out; + } if (!creds_match) { /* case 9 */ status = nfserr_perm; goto out; @@ -1709,7 +1735,8 @@ out_new: status = nfserr_jukebox; goto out; } - new->cl_minorversion = 1; + new->cl_minorversion = cstate->minorversion; + new->cl_mach_cred = (exid->spa_how == SP4_MACH_CRED); gen_clid(new, nn); add_to_unconfirmed(new); @@ -1839,6 +1866,24 @@ static __be32 check_backchannel_attrs(struct nfsd4_channel_attrs *ca) return nfs_ok; } +static __be32 nfsd4_check_cb_sec(struct nfsd4_cb_sec *cbs) +{ + switch (cbs->flavor) { + case RPC_AUTH_NULL: + case RPC_AUTH_UNIX: + return nfs_ok; + default: + /* + * GSS case: the spec doesn't allow us to return this + * error. But it also doesn't allow us not to support + * GSS. + * I'd rather this fail hard than return some error the + * client might think it can already handle: + */ + return nfserr_encr_alg_unsupp; + } +} + __be32 nfsd4_create_session(struct svc_rqst *rqstp, struct nfsd4_compound_state *cstate, @@ -1854,6 +1899,9 @@ nfsd4_create_session(struct svc_rqst *rqstp, if (cr_ses->flags & ~SESSION4_FLAG_MASK_A) return nfserr_inval; + status = nfsd4_check_cb_sec(&cr_ses->cb_sec); + if (status) + return status; status = check_forechannel_attrs(&cr_ses->fore_channel, nn); if (status) return status; @@ -1874,6 +1922,9 @@ nfsd4_create_session(struct svc_rqst *rqstp, WARN_ON_ONCE(conf && unconf); if (conf) { + status = nfserr_wrong_cred; + if (!mach_creds_match(conf, rqstp)) + goto out_free_conn; cs_slot = &conf->cl_cs_slot; status = check_slot_seqid(cr_ses->seqid, cs_slot->sl_seqid, 0); if (status == nfserr_replay_cache) { @@ -1890,6 +1941,9 @@ nfsd4_create_session(struct svc_rqst *rqstp, status = nfserr_clid_inuse; goto out_free_conn; } + status = nfserr_wrong_cred; + if (!mach_creds_match(unconf, rqstp)) + goto out_free_conn; cs_slot = &unconf->cl_cs_slot; status = check_slot_seqid(cr_ses->seqid, cs_slot->sl_seqid, 0); if (status) { @@ -1957,7 +2011,11 @@ __be32 nfsd4_backchannel_ctl(struct svc_rqst *rqstp, struct nfsd4_compound_state { struct nfsd4_session *session = cstate->session; struct nfsd_net *nn = net_generic(SVC_NET(rqstp), nfsd_net_id); + __be32 status; + status = nfsd4_check_cb_sec(&bc->bc_cb_sec); + if (status) + return status; spin_lock(&nn->client_lock); session->se_cb_prog = bc->bc_cb_program; session->se_cb_sec = bc->bc_cb_sec; @@ -1986,6 +2044,9 @@ __be32 nfsd4_bind_conn_to_session(struct svc_rqst *rqstp, status = nfserr_badsession; if (!session) goto out; + status = nfserr_wrong_cred; + if (!mach_creds_match(session->se_client, rqstp)) + goto out; status = nfsd4_map_bcts_dir(&bcts->dir); if (status) goto out; @@ -2014,6 +2075,7 @@ nfsd4_destroy_session(struct svc_rqst *r, { struct nfsd4_session *ses; __be32 status; + int ref_held_by_me = 0; struct nfsd_net *nn = net_generic(SVC_NET(r), nfsd_net_id); nfs4_lock_state(); @@ -2021,6 +2083,7 @@ nfsd4_destroy_session(struct svc_rqst *r, if (nfsd4_compound_in_session(cstate->session, &sessionid->sessionid)) { if (!nfsd4_last_compound_op(r)) goto out; + ref_held_by_me++; } dump_sessionid(__func__, &sessionid->sessionid); spin_lock(&nn->client_lock); @@ -2028,17 +2091,22 @@ nfsd4_destroy_session(struct svc_rqst *r, status = nfserr_badsession; if (!ses) goto out_client_lock; - status = mark_session_dead_locked(ses); - if (status) + status = nfserr_wrong_cred; + if (!mach_creds_match(ses->se_client, r)) goto out_client_lock; + nfsd4_get_session_locked(ses); + status = mark_session_dead_locked(ses, 1 + ref_held_by_me); + if (status) + goto out_put_session; unhash_session(ses); spin_unlock(&nn->client_lock); nfsd4_probe_callback_sync(ses->se_client); spin_lock(&nn->client_lock); - free_session(ses); status = nfs_ok; +out_put_session: + nfsd4_put_session(ses); out_client_lock: spin_unlock(&nn->client_lock); out: @@ -2058,26 +2126,31 @@ static struct nfsd4_conn *__nfsd4_find_conn(struct svc_xprt *xpt, struct nfsd4_s return NULL; } -static void nfsd4_sequence_check_conn(struct nfsd4_conn *new, struct nfsd4_session *ses) +static __be32 nfsd4_sequence_check_conn(struct nfsd4_conn *new, struct nfsd4_session *ses) { struct nfs4_client *clp = ses->se_client; struct nfsd4_conn *c; + __be32 status = nfs_ok; int ret; spin_lock(&clp->cl_lock); c = __nfsd4_find_conn(new->cn_xprt, ses); - if (c) { - spin_unlock(&clp->cl_lock); - free_conn(new); - return; - } + if (c) + goto out_free; + status = nfserr_conn_not_bound_to_session; + if (clp->cl_mach_cred) + goto out_free; __nfsd4_hash_conn(new, ses); spin_unlock(&clp->cl_lock); ret = nfsd4_register_conn(new); if (ret) /* oops; xprt is already down: */ nfsd4_conn_lost(&new->cn_xpt_user); - return; + return nfs_ok; +out_free: + spin_unlock(&clp->cl_lock); + free_conn(new); + return status; } static bool nfsd4_session_too_many_ops(struct svc_rqst *rqstp, struct nfsd4_session *session) @@ -2169,8 +2242,10 @@ nfsd4_sequence(struct svc_rqst *rqstp, if (status) goto out_put_session; - nfsd4_sequence_check_conn(conn, session); + status = nfsd4_sequence_check_conn(conn, session); conn = NULL; + if (status) + goto out_put_session; /* Success! bump slot seqid */ slot->sl_seqid = seq->seqid; @@ -2232,7 +2307,10 @@ nfsd4_destroy_clientid(struct svc_rqst *rqstp, struct nfsd4_compound_state *csta status = nfserr_stale_clientid; goto out; } - + if (!mach_creds_match(clp, rqstp)) { + status = nfserr_wrong_cred; + goto out; + } expire_client(clp); out: nfs4_unlock_state(); @@ -2940,13 +3018,13 @@ static struct file_lock *nfs4_alloc_init_lease(struct nfs4_delegation *dp, int f return fl; } -static int nfs4_setlease(struct nfs4_delegation *dp, int flag) +static int nfs4_setlease(struct nfs4_delegation *dp) { struct nfs4_file *fp = dp->dl_file; struct file_lock *fl; int status; - fl = nfs4_alloc_init_lease(dp, flag); + fl = nfs4_alloc_init_lease(dp, NFS4_OPEN_DELEGATE_READ); if (!fl) return -ENOMEM; fl->fl_file = find_readable_file(fp); @@ -2964,12 +3042,12 @@ static int nfs4_setlease(struct nfs4_delegation *dp, int flag) return 0; } -static int nfs4_set_delegation(struct nfs4_delegation *dp, int flag) +static int nfs4_set_delegation(struct nfs4_delegation *dp) { struct nfs4_file *fp = dp->dl_file; if (!fp->fi_lease) - return nfs4_setlease(dp, flag); + return nfs4_setlease(dp); spin_lock(&recall_lock); if (fp->fi_had_conflict) { spin_unlock(&recall_lock); @@ -3005,6 +3083,9 @@ static void nfsd4_open_deleg_none_ext(struct nfsd4_open *open, int status) /* * Attempt to hand out a delegation. + * + * Note we don't support write delegations, and won't until the vfs has + * proper support for them. */ static void nfs4_open_delegation(struct net *net, struct svc_fh *fh, @@ -3013,39 +3094,45 @@ nfs4_open_delegation(struct net *net, struct svc_fh *fh, struct nfs4_delegation *dp; struct nfs4_openowner *oo = container_of(stp->st_stateowner, struct nfs4_openowner, oo_owner); int cb_up; - int status = 0, flag = 0; + int status = 0; cb_up = nfsd4_cb_channel_good(oo->oo_owner.so_client); - flag = NFS4_OPEN_DELEGATE_NONE; open->op_recall = 0; switch (open->op_claim_type) { case NFS4_OPEN_CLAIM_PREVIOUS: if (!cb_up) open->op_recall = 1; - flag = open->op_delegate_type; - if (flag == NFS4_OPEN_DELEGATE_NONE) - goto out; + if (open->op_delegate_type != NFS4_OPEN_DELEGATE_READ) + goto out_no_deleg; break; case NFS4_OPEN_CLAIM_NULL: - /* Let's not give out any delegations till everyone's - * had the chance to reclaim theirs.... */ + /* + * Let's not give out any delegations till everyone's + * had the chance to reclaim theirs.... + */ if (locks_in_grace(net)) - goto out; + goto out_no_deleg; if (!cb_up || !(oo->oo_flags & NFS4_OO_CONFIRMED)) - goto out; + goto out_no_deleg; + /* + * Also, if the file was opened for write or + * create, there's a good chance the client's + * about to write to it, resulting in an + * immediate recall (since we don't support + * write delegations): + */ if (open->op_share_access & NFS4_SHARE_ACCESS_WRITE) - flag = NFS4_OPEN_DELEGATE_WRITE; - else - flag = NFS4_OPEN_DELEGATE_READ; + goto out_no_deleg; + if (open->op_create == NFS4_OPEN_CREATE) + goto out_no_deleg; break; default: - goto out; + goto out_no_deleg; } - - dp = alloc_init_deleg(oo->oo_owner.so_client, stp, fh, flag); + dp = alloc_init_deleg(oo->oo_owner.so_client, stp, fh); if (dp == NULL) goto out_no_deleg; - status = nfs4_set_delegation(dp, flag); + status = nfs4_set_delegation(dp); if (status) goto out_free; @@ -3053,24 +3140,23 @@ nfs4_open_delegation(struct net *net, struct svc_fh *fh, dprintk("NFSD: delegation stateid=" STATEID_FMT "\n", STATEID_VAL(&dp->dl_stid.sc_stateid)); -out: - open->op_delegate_type = flag; - if (flag == NFS4_OPEN_DELEGATE_NONE) { - if (open->op_claim_type == NFS4_OPEN_CLAIM_PREVIOUS && - open->op_delegate_type != NFS4_OPEN_DELEGATE_NONE) - dprintk("NFSD: WARNING: refusing delegation reclaim\n"); - - /* 4.1 client asking for a delegation? */ - if (open->op_deleg_want) - nfsd4_open_deleg_none_ext(open, status); - } + open->op_delegate_type = NFS4_OPEN_DELEGATE_READ; return; out_free: unhash_stid(&dp->dl_stid); nfs4_put_delegation(dp); out_no_deleg: - flag = NFS4_OPEN_DELEGATE_NONE; - goto out; + open->op_delegate_type = NFS4_OPEN_DELEGATE_NONE; + if (open->op_claim_type == NFS4_OPEN_CLAIM_PREVIOUS && + open->op_delegate_type != NFS4_OPEN_DELEGATE_NONE) { + dprintk("NFSD: WARNING: refusing delegation reclaim\n"); + open->op_recall = 1; + } + + /* 4.1 client asking for a delegation? */ + if (open->op_deleg_want) + nfsd4_open_deleg_none_ext(open, status); + return; } static void nfsd4_deleg_xgrade_none_ext(struct nfsd4_open *open, @@ -3427,7 +3513,7 @@ grace_disallows_io(struct net *net, struct inode *inode) /* Returns true iff a is later than b: */ static bool stateid_generation_after(stateid_t *a, stateid_t *b) { - return (s32)a->si_generation - (s32)b->si_generation > 0; + return (s32)(a->si_generation - b->si_generation) > 0; } static __be32 check_stateid_generation(stateid_t *in, stateid_t *ref, bool has_session) @@ -4435,7 +4521,6 @@ __be32 nfsd4_locku(struct svc_rqst *rqstp, struct nfsd4_compound_state *cstate, struct nfsd4_locku *locku) { - struct nfs4_lockowner *lo; struct nfs4_ol_stateid *stp; struct file *filp = NULL; struct file_lock *file_lock = NULL; @@ -4468,10 +4553,9 @@ nfsd4_locku(struct svc_rqst *rqstp, struct nfsd4_compound_state *cstate, status = nfserr_jukebox; goto out; } - lo = lockowner(stp->st_stateowner); locks_init_lock(file_lock); file_lock->fl_type = F_UNLCK; - file_lock->fl_owner = (fl_owner_t)lo; + file_lock->fl_owner = (fl_owner_t)lockowner(stp->st_stateowner); file_lock->fl_pid = current->tgid; file_lock->fl_file = filp; file_lock->fl_flags = FL_POSIX; @@ -4490,11 +4574,6 @@ nfsd4_locku(struct svc_rqst *rqstp, struct nfsd4_compound_state *cstate, update_stateid(&stp->st_stid.sc_stateid); memcpy(&locku->lu_stateid, &stp->st_stid.sc_stateid, sizeof(stateid_t)); - if (nfsd4_has_session(cstate) && !check_for_locks(stp->st_file, lo)) { - WARN_ON_ONCE(cstate->replay_owner); - release_lockowner(lo); - } - out: nfsd4_bump_seqid(cstate, status); if (!cstate->replay_owner) diff --git a/fs/nfsd/nfs4xdr.c b/fs/nfsd/nfs4xdr.c index 6cd86e0..0c0f3ea9 100644 --- a/fs/nfsd/nfs4xdr.c +++ b/fs/nfsd/nfs4xdr.c @@ -55,6 +55,11 @@ #include "cache.h" #include "netns.h" +#ifdef CONFIG_NFSD_V4_SECURITY_LABEL +#include +#endif + + #define NFSDDBG_FACILITY NFSDDBG_XDR /* @@ -134,6 +139,19 @@ xdr_error: \ } \ } while (0) +static void next_decode_page(struct nfsd4_compoundargs *argp) +{ + argp->pagelist++; + argp->p = page_address(argp->pagelist[0]); + if (argp->pagelen < PAGE_SIZE) { + argp->end = argp->p + (argp->pagelen>>2); + argp->pagelen = 0; + } else { + argp->end = argp->p + (PAGE_SIZE>>2); + argp->pagelen -= PAGE_SIZE; + } +} + static __be32 *read_buf(struct nfsd4_compoundargs *argp, u32 nbytes) { /* We want more bytes than seem to be available. @@ -161,16 +179,7 @@ static __be32 *read_buf(struct nfsd4_compoundargs *argp, u32 nbytes) * guarantee p points to at least nbytes bytes. */ memcpy(p, argp->p, avail); - /* step to next page */ - argp->p = page_address(argp->pagelist[0]); - argp->pagelist++; - if (argp->pagelen < PAGE_SIZE) { - argp->end = argp->p + (argp->pagelen>>2); - argp->pagelen = 0; - } else { - argp->end = argp->p + (PAGE_SIZE>>2); - argp->pagelen -= PAGE_SIZE; - } + next_decode_page(argp); memcpy(((char*)p)+avail, argp->p, (nbytes - avail)); argp->p += XDR_QUADLEN(nbytes - avail); return p; @@ -242,7 +251,8 @@ nfsd4_decode_bitmap(struct nfsd4_compoundargs *argp, u32 *bmval) static __be32 nfsd4_decode_fattr(struct nfsd4_compoundargs *argp, u32 *bmval, - struct iattr *iattr, struct nfs4_acl **acl) + struct iattr *iattr, struct nfs4_acl **acl, + struct xdr_netobj *label) { int expected_len, len = 0; u32 dummy32; @@ -380,6 +390,32 @@ nfsd4_decode_fattr(struct nfsd4_compoundargs *argp, u32 *bmval, goto xdr_error; } } + + label->len = 0; +#ifdef CONFIG_NFSD_V4_SECURITY_LABEL + if (bmval[2] & FATTR4_WORD2_SECURITY_LABEL) { + READ_BUF(4); + len += 4; + READ32(dummy32); /* lfs: we don't use it */ + READ_BUF(4); + len += 4; + READ32(dummy32); /* pi: we don't use it either */ + READ_BUF(4); + len += 4; + READ32(dummy32); + READ_BUF(dummy32); + if (dummy32 > NFSD4_MAX_SEC_LABEL_LEN) + return nfserr_badlabel; + len += (XDR_QUADLEN(dummy32) << 2); + READMEM(buf, dummy32); + label->data = kzalloc(dummy32 + 1, GFP_KERNEL); + if (!label->data) + return nfserr_jukebox; + defer_free(argp, kfree, label->data); + memcpy(label->data, buf, dummy32); + } +#endif + if (bmval[0] & ~NFSD_WRITEABLE_ATTRS_WORD0 || bmval[1] & ~NFSD_WRITEABLE_ATTRS_WORD1 || bmval[2] & ~NFSD_WRITEABLE_ATTRS_WORD2) @@ -428,7 +464,11 @@ static __be32 nfsd4_decode_cb_sec(struct nfsd4_compoundargs *argp, struct nfsd4_ /* callback_sec_params4 */ READ_BUF(4); READ32(nr_secflavs); - cbs->flavor = (u32)(-1); + if (nr_secflavs) + cbs->flavor = (u32)(-1); + else + /* Is this legal? Be generous, take it to mean AUTH_NONE: */ + cbs->flavor = 0; for (i = 0; i < nr_secflavs; ++i) { READ_BUF(4); READ32(dummy); @@ -576,7 +616,7 @@ nfsd4_decode_create(struct nfsd4_compoundargs *argp, struct nfsd4_create *create return status; status = nfsd4_decode_fattr(argp, create->cr_bmval, &create->cr_iattr, - &create->cr_acl); + &create->cr_acl, &create->cr_label); if (status) goto out; @@ -827,7 +867,7 @@ nfsd4_decode_open(struct nfsd4_compoundargs *argp, struct nfsd4_open *open) case NFS4_CREATE_UNCHECKED: case NFS4_CREATE_GUARDED: status = nfsd4_decode_fattr(argp, open->op_bmval, - &open->op_iattr, &open->op_acl); + &open->op_iattr, &open->op_acl, &open->op_label); if (status) goto out; break; @@ -841,7 +881,7 @@ nfsd4_decode_open(struct nfsd4_compoundargs *argp, struct nfsd4_open *open) READ_BUF(NFS4_VERIFIER_SIZE); COPYMEM(open->op_verf.data, NFS4_VERIFIER_SIZE); status = nfsd4_decode_fattr(argp, open->op_bmval, - &open->op_iattr, &open->op_acl); + &open->op_iattr, &open->op_acl, &open->op_label); if (status) goto out; break; @@ -1063,7 +1103,7 @@ nfsd4_decode_setattr(struct nfsd4_compoundargs *argp, struct nfsd4_setattr *seta if (status) return status; return nfsd4_decode_fattr(argp, setattr->sa_bmval, &setattr->sa_iattr, - &setattr->sa_acl); + &setattr->sa_acl, &setattr->sa_label); } static __be32 @@ -1567,6 +1607,7 @@ struct nfsd4_minorversion_ops { static struct nfsd4_minorversion_ops nfsd4_minorversion[] = { [0] = { nfsd4_dec_ops, ARRAY_SIZE(nfsd4_dec_ops) }, [1] = { nfsd41_dec_ops, ARRAY_SIZE(nfsd41_dec_ops) }, + [2] = { nfsd41_dec_ops, ARRAY_SIZE(nfsd41_dec_ops) }, }; static __be32 @@ -1953,6 +1994,36 @@ nfsd4_encode_aclname(struct svc_rqst *rqstp, struct nfs4_ace *ace, FATTR4_WORD0_RDATTR_ERROR) #define WORD1_ABSENT_FS_ATTRS FATTR4_WORD1_MOUNTED_ON_FILEID +#ifdef CONFIG_NFSD_V4_SECURITY_LABEL +static inline __be32 +nfsd4_encode_security_label(struct svc_rqst *rqstp, void *context, int len, __be32 **pp, int *buflen) +{ + __be32 *p = *pp; + + if (*buflen < ((XDR_QUADLEN(len) << 2) + 4 + 4 + 4)) + return nfserr_resource; + + /* + * For now we use a 0 here to indicate the null translation; in + * the future we may place a call to translation code here. + */ + if ((*buflen -= 8) < 0) + return nfserr_resource; + + WRITE32(0); /* lfs */ + WRITE32(0); /* pi */ + p = xdr_encode_opaque(p, context, len); + *buflen -= (XDR_QUADLEN(len) << 2) + 4; + + *pp = p; + return 0; +} +#else +static inline __be32 +nfsd4_encode_security_label(struct svc_rqst *rqstp, void *context, int len, __be32 **pp, int *buflen) +{ return 0; } +#endif + static __be32 fattr_handle_absent_fs(u32 *bmval0, u32 *bmval1, u32 *rdattr_err) { /* As per referral draft: */ @@ -2012,6 +2083,9 @@ nfsd4_encode_fattr(struct svc_fh *fhp, struct svc_export *exp, int err; int aclsupport = 0; struct nfs4_acl *acl = NULL; + void *context = NULL; + int contextlen; + bool contextsupport = false; struct nfsd4_compoundres *resp = rqstp->rq_resp; u32 minorversion = resp->cstate.minorversion; struct path path = { @@ -2065,6 +2139,21 @@ nfsd4_encode_fattr(struct svc_fh *fhp, struct svc_export *exp, } } +#ifdef CONFIG_NFSD_V4_SECURITY_LABEL + if ((bmval[2] & FATTR4_WORD2_SECURITY_LABEL) || + bmval[0] & FATTR4_WORD0_SUPPORTED_ATTRS) { + err = security_inode_getsecctx(dentry->d_inode, + &context, &contextlen); + contextsupport = (err == 0); + if (bmval2 & FATTR4_WORD2_SECURITY_LABEL) { + if (err == -EOPNOTSUPP) + bmval2 &= ~FATTR4_WORD2_SECURITY_LABEL; + else if (err) + goto out_nfserr; + } + } +#endif /* CONFIG_NFSD_V4_SECURITY_LABEL */ + if (bmval2) { if ((buflen -= 16) < 0) goto out_resource; @@ -2093,6 +2182,8 @@ nfsd4_encode_fattr(struct svc_fh *fhp, struct svc_export *exp, if (!aclsupport) word0 &= ~FATTR4_WORD0_ACL; + if (!contextsupport) + word2 &= ~FATTR4_WORD2_SECURITY_LABEL; if (!word2) { if ((buflen -= 12) < 0) goto out_resource; @@ -2400,6 +2491,12 @@ out_acl: get_parent_attributes(exp, &stat); WRITE64(stat.ino); } + if (bmval2 & FATTR4_WORD2_SECURITY_LABEL) { + status = nfsd4_encode_security_label(rqstp, context, + contextlen, &p, &buflen); + if (status) + goto out; + } if (bmval2 & FATTR4_WORD2_SUPPATTR_EXCLCREAT) { WRITE32(3); WRITE32(NFSD_SUPPATTR_EXCLCREAT_WORD0); @@ -2412,6 +2509,10 @@ out_acl: status = nfs_ok; out: +#ifdef CONFIG_NFSD_V4_SECURITY_LABEL + if (context) + security_release_secctx(context, contextlen); +#endif /* CONFIG_NFSD_V4_SECURITY_LABEL */ kfree(acl); if (fhp == &tempfh) fh_put(&tempfh); @@ -3176,16 +3277,18 @@ nfsd4_encode_setattr(struct nfsd4_compoundres *resp, __be32 nfserr, struct nfsd4 { __be32 *p; - RESERVE_SPACE(12); + RESERVE_SPACE(16); if (nfserr) { - WRITE32(2); + WRITE32(3); + WRITE32(0); WRITE32(0); WRITE32(0); } else { - WRITE32(2); + WRITE32(3); WRITE32(setattr->sa_bmval[0]); WRITE32(setattr->sa_bmval[1]); + WRITE32(setattr->sa_bmval[2]); } ADJUST_ARGS(); return nfserr; @@ -3226,6 +3329,14 @@ nfsd4_encode_write(struct nfsd4_compoundres *resp, __be32 nfserr, struct nfsd4_w return nfserr; } +static const u32 nfs4_minimal_spo_must_enforce[2] = { + [1] = 1 << (OP_BIND_CONN_TO_SESSION - 32) | + 1 << (OP_EXCHANGE_ID - 32) | + 1 << (OP_CREATE_SESSION - 32) | + 1 << (OP_DESTROY_SESSION - 32) | + 1 << (OP_DESTROY_CLIENTID - 32) +}; + static __be32 nfsd4_encode_exchange_id(struct nfsd4_compoundres *resp, __be32 nfserr, struct nfsd4_exchange_id *exid) @@ -3264,6 +3375,20 @@ nfsd4_encode_exchange_id(struct nfsd4_compoundres *resp, __be32 nfserr, /* state_protect4_r. Currently only support SP4_NONE */ BUG_ON(exid->spa_how != SP4_NONE); WRITE32(exid->spa_how); + switch (exid->spa_how) { + case SP4_NONE: + break; + case SP4_MACH_CRED: + /* spo_must_enforce bitmap: */ + WRITE32(2); + WRITE32(nfs4_minimal_spo_must_enforce[0]); + WRITE32(nfs4_minimal_spo_must_enforce[1]); + /* empty spo_must_allow bitmap: */ + WRITE32(0); + break; + default: + WARN_ON_ONCE(1); + } /* The server_owner struct */ WRITE64(minor_id); /* Minor id */ @@ -3635,13 +3760,17 @@ nfs4svc_encode_compoundres(struct svc_rqst *rqstp, __be32 *p, struct nfsd4_compo iov->iov_len = ((char*)resp->p) - (char*)iov->iov_base; BUG_ON(iov->iov_len > PAGE_SIZE); if (nfsd4_has_session(cs)) { + struct nfsd_net *nn = net_generic(SVC_NET(rqstp), nfsd_net_id); + struct nfs4_client *clp = cs->session->se_client; if (cs->status != nfserr_replay_cache) { nfsd4_store_cache_entry(resp); cs->slot->sl_flags &= ~NFSD4_SLOT_INUSE; } /* Renew the clientid on success and on replay */ - put_client_renew(cs->session->se_client); + spin_lock(&nn->client_lock); nfsd4_put_session(cs->session); + spin_unlock(&nn->client_lock); + put_client_renew(clp); } return 1; } diff --git a/fs/nfsd/nfsd.h b/fs/nfsd/nfsd.h index c0d9317..2bbd94e 100644 --- a/fs/nfsd/nfsd.h +++ b/fs/nfsd/nfsd.h @@ -24,7 +24,7 @@ /* * nfsd version */ -#define NFSD_SUPPORTED_MINOR_VERSION 1 +#define NFSD_SUPPORTED_MINOR_VERSION 2 /* * Maximum blocksizes supported by daemon under various circumstances. */ @@ -328,6 +328,13 @@ void nfsd_lockd_shutdown(void); #define NFSD4_1_SUPPORTED_ATTRS_WORD2 \ (NFSD4_SUPPORTED_ATTRS_WORD2 | FATTR4_WORD2_SUPPATTR_EXCLCREAT) +#ifdef CONFIG_NFSD_V4_SECURITY_LABEL +#define NFSD4_2_SUPPORTED_ATTRS_WORD2 \ + (NFSD4_1_SUPPORTED_ATTRS_WORD2 | FATTR4_WORD2_SECURITY_LABEL) +#else +#define NFSD4_2_SUPPORTED_ATTRS_WORD2 0 +#endif + static inline u32 nfsd_suppattrs0(u32 minorversion) { return minorversion ? NFSD4_1_SUPPORTED_ATTRS_WORD0 @@ -342,8 +349,11 @@ static inline u32 nfsd_suppattrs1(u32 minorversion) static inline u32 nfsd_suppattrs2(u32 minorversion) { - return minorversion ? NFSD4_1_SUPPORTED_ATTRS_WORD2 - : NFSD4_SUPPORTED_ATTRS_WORD2; + switch (minorversion) { + default: return NFSD4_2_SUPPORTED_ATTRS_WORD2; + case 1: return NFSD4_1_SUPPORTED_ATTRS_WORD2; + case 0: return NFSD4_SUPPORTED_ATTRS_WORD2; + } } /* These will return ERR_INVAL if specified in GETATTR or READDIR. */ @@ -356,7 +366,11 @@ static inline u32 nfsd_suppattrs2(u32 minorversion) #define NFSD_WRITEABLE_ATTRS_WORD1 \ (FATTR4_WORD1_MODE | FATTR4_WORD1_OWNER | FATTR4_WORD1_OWNER_GROUP \ | FATTR4_WORD1_TIME_ACCESS_SET | FATTR4_WORD1_TIME_MODIFY_SET) +#ifdef CONFIG_NFSD_V4_SECURITY_LABEL +#define NFSD_WRITEABLE_ATTRS_WORD2 FATTR4_WORD2_SECURITY_LABEL +#else #define NFSD_WRITEABLE_ATTRS_WORD2 0 +#endif #define NFSD_SUPPATTR_EXCLCREAT_WORD0 \ NFSD_WRITEABLE_ATTRS_WORD0 diff --git a/fs/nfsd/nfssvc.c b/fs/nfsd/nfssvc.c index 262df5c..6b9f48c 100644 --- a/fs/nfsd/nfssvc.c +++ b/fs/nfsd/nfssvc.c @@ -116,7 +116,7 @@ struct svc_program nfsd_program = { }; -u32 nfsd_supported_minorversion; +u32 nfsd_supported_minorversion = 1; int nfsd_vers(int vers, enum vers_op change) { diff --git a/fs/nfsd/state.h b/fs/nfsd/state.h index 274e2a1..424d8f5 100644 --- a/fs/nfsd/state.h +++ b/fs/nfsd/state.h @@ -246,6 +246,7 @@ struct nfs4_client { nfs4_verifier cl_verifier; /* generated by client */ time_t cl_time; /* time of last lease renewal */ struct sockaddr_storage cl_addr; /* client ipaddress */ + bool cl_mach_cred; /* SP4_MACH_CRED in force */ struct svc_cred cl_cred; /* setclientid principal */ clientid_t cl_clientid; /* generated by server */ nfs4_verifier cl_confirm; /* generated by server */ diff --git a/fs/nfsd/vfs.c b/fs/nfsd/vfs.c index a6bc8a7..8ff6a00 100644 --- a/fs/nfsd/vfs.c +++ b/fs/nfsd/vfs.c @@ -28,6 +28,7 @@ #include #include #include +#include #ifdef CONFIG_NFSD_V3 #include "xdr3.h" @@ -621,6 +622,33 @@ int nfsd4_is_junction(struct dentry *dentry) return 0; return 1; } +#ifdef CONFIG_NFSD_V4_SECURITY_LABEL +__be32 nfsd4_set_nfs4_label(struct svc_rqst *rqstp, struct svc_fh *fhp, + struct xdr_netobj *label) +{ + __be32 error; + int host_error; + struct dentry *dentry; + + error = fh_verify(rqstp, fhp, 0 /* S_IFREG */, NFSD_MAY_SATTR); + if (error) + return error; + + dentry = fhp->fh_dentry; + + mutex_lock(&dentry->d_inode->i_mutex); + host_error = security_inode_setsecctx(dentry, label->data, label->len); + mutex_unlock(&dentry->d_inode->i_mutex); + return nfserrno(host_error); +} +#else +__be32 nfsd4_set_nfs4_label(struct svc_rqst *rqstp, struct svc_fh *fhp, + struct xdr_netobj *label) +{ + return nfserr_notsupp; +} +#endif + #endif /* defined(CONFIG_NFSD_V4) */ #ifdef CONFIG_NFSD_V3 diff --git a/fs/nfsd/vfs.h b/fs/nfsd/vfs.h index 5b58941..a4be2e3 100644 --- a/fs/nfsd/vfs.h +++ b/fs/nfsd/vfs.h @@ -39,7 +39,6 @@ typedef int (*nfsd_dirop_t)(struct inode *, struct dentry *, int, int); /* nfsd/vfs.c */ -int fh_lock_parent(struct svc_fh *, struct dentry *); int nfsd_racache_init(int); void nfsd_racache_shutdown(void); int nfsd_cross_mnt(struct svc_rqst *rqstp, struct dentry **dpp, @@ -56,6 +55,8 @@ int nfsd_mountpoint(struct dentry *, struct svc_export *); __be32 nfsd4_set_nfs4_acl(struct svc_rqst *, struct svc_fh *, struct nfs4_acl *); int nfsd4_get_nfs4_acl(struct svc_rqst *, struct dentry *, struct nfs4_acl **); +__be32 nfsd4_set_nfs4_label(struct svc_rqst *, struct svc_fh *, + struct xdr_netobj *); #endif /* CONFIG_NFSD_V4 */ __be32 nfsd_create(struct svc_rqst *, struct svc_fh *, char *name, int len, struct iattr *attrs, @@ -92,17 +93,13 @@ __be32 nfsd_remove(struct svc_rqst *, struct svc_fh *, char *, int); __be32 nfsd_unlink(struct svc_rqst *, struct svc_fh *, int type, char *name, int len); -int nfsd_truncate(struct svc_rqst *, struct svc_fh *, - unsigned long size); __be32 nfsd_readdir(struct svc_rqst *, struct svc_fh *, loff_t *, struct readdir_cd *, filldir_t); __be32 nfsd_statfs(struct svc_rqst *, struct svc_fh *, struct kstatfs *, int access); -int nfsd_notify_change(struct inode *, struct iattr *); __be32 nfsd_permission(struct svc_rqst *, struct svc_export *, struct dentry *, int); -int nfsd_sync_dir(struct dentry *dp); #if defined(CONFIG_NFSD_V2_ACL) || defined(CONFIG_NFSD_V3_ACL) struct posix_acl *nfsd_get_posix_acl(struct svc_fh *, int); diff --git a/fs/nfsd/xdr4.h b/fs/nfsd/xdr4.h index 3b271d2..b3ed644 100644 --- a/fs/nfsd/xdr4.h +++ b/fs/nfsd/xdr4.h @@ -40,6 +40,7 @@ #include "state.h" #include "nfsd.h" +#define NFSD4_MAX_SEC_LABEL_LEN 2048 #define NFSD4_MAX_TAGLEN 128 #define XDR_LEN(n) (((n) + 3) & ~3) @@ -118,6 +119,7 @@ struct nfsd4_create { struct iattr cr_iattr; /* request */ struct nfsd4_change_info cr_cinfo; /* response */ struct nfs4_acl *cr_acl; + struct xdr_netobj cr_label; }; #define cr_linklen u.link.namelen #define cr_linkname u.link.name @@ -246,6 +248,7 @@ struct nfsd4_open { struct nfs4_file *op_file; /* used during processing */ struct nfs4_ol_stateid *op_stp; /* used during processing */ struct nfs4_acl *op_acl; + struct xdr_netobj op_label; }; #define op_iattr iattr @@ -330,6 +333,7 @@ struct nfsd4_setattr { u32 sa_bmval[3]; /* request */ struct iattr sa_iattr; /* request */ struct nfs4_acl *sa_acl; + struct xdr_netobj sa_label; }; struct nfsd4_setclientid { diff --git a/include/linux/bio.h b/include/linux/bio.h index ef24466..ec48bac 100644 --- a/include/linux/bio.h +++ b/include/linux/bio.h @@ -97,11 +97,11 @@ static inline void *bio_data(struct bio *bio) * permanent PIO fall back, user is probably better off disabling highmem * I/O completely on that queue (see ide-dma for example) */ -#define __bio_kmap_atomic(bio, idx, kmtype) \ +#define __bio_kmap_atomic(bio, idx) \ (kmap_atomic(bio_iovec_idx((bio), (idx))->bv_page) + \ bio_iovec_idx((bio), (idx))->bv_offset) -#define __bio_kunmap_atomic(addr, kmtype) kunmap_atomic(addr) +#define __bio_kunmap_atomic(addr) kunmap_atomic(addr) /* * merge helpers etc diff --git a/include/linux/cgroup.h b/include/linux/cgroup.h index fd097ec..297462b 100644 --- a/include/linux/cgroup.h +++ b/include/linux/cgroup.h @@ -278,6 +278,8 @@ enum { * * - memcg: use_hierarchy is on by default and the cgroup file for * the flag is not created. + * + * - blkcg: blk-throttle becomes properly hierarchical. */ CGRP_ROOT_SANE_BEHAVIOR = (1 << 0), diff --git a/include/linux/cpu_cooling.h b/include/linux/cpu_cooling.h index 282e270..a5d52ee 100644 --- a/include/linux/cpu_cooling.h +++ b/include/linux/cpu_cooling.h @@ -41,7 +41,7 @@ cpufreq_cooling_register(const struct cpumask *clip_cpus); */ void cpufreq_cooling_unregister(struct thermal_cooling_device *cdev); -unsigned long cpufreq_cooling_get_level(unsigned int, unsigned int); +unsigned long cpufreq_cooling_get_level(unsigned int cpu, unsigned int freq); #else /* !CONFIG_CPU_THERMAL */ static inline struct thermal_cooling_device * cpufreq_cooling_register(const struct cpumask *clip_cpus) @@ -54,7 +54,7 @@ void cpufreq_cooling_unregister(struct thermal_cooling_device *cdev) return; } static inline -unsigned long cpufreq_cooling_get_level(unsigned int, unsigned int) +unsigned long cpufreq_cooling_get_level(unsigned int cpu, unsigned int freq) { return THERMAL_CSTATE_INVALID; } diff --git a/include/linux/cpufreq.h b/include/linux/cpufreq.h index 4d7390b..90d5a15 100644 --- a/include/linux/cpufreq.h +++ b/include/linux/cpufreq.h @@ -119,7 +119,7 @@ struct cpufreq_policy { struct kobject kobj; struct completion kobj_unregister; - bool transition_ongoing; /* Tracks transition status */ + int transition_ongoing; /* Tracks transition status */ }; #define CPUFREQ_ADJUST (0) diff --git a/include/linux/device-mapper.h b/include/linux/device-mapper.h index 3cd3247..e151d4c 100644 --- a/include/linux/device-mapper.h +++ b/include/linux/device-mapper.h @@ -446,9 +446,9 @@ int __must_check dm_set_target_max_io_len(struct dm_target *ti, sector_t len); /* * Table reference counting. */ -struct dm_table *dm_get_live_table(struct mapped_device *md); -void dm_table_get(struct dm_table *t); -void dm_table_put(struct dm_table *t); +struct dm_table *dm_get_live_table(struct mapped_device *md, int *srcu_idx); +void dm_put_live_table(struct mapped_device *md, int srcu_idx); +void dm_sync_table(struct mapped_device *md); /* * Queries diff --git a/include/linux/elevator.h b/include/linux/elevator.h index acd0312..306dd8c 100644 --- a/include/linux/elevator.h +++ b/include/linux/elevator.h @@ -7,6 +7,7 @@ #ifdef CONFIG_BLOCK struct io_cq; +struct elevator_type; typedef int (elevator_merge_fn) (struct request_queue *, struct request **, struct bio *); @@ -35,7 +36,8 @@ typedef void (elevator_put_req_fn) (struct request *); typedef void (elevator_activate_req_fn) (struct request_queue *, struct request *); typedef void (elevator_deactivate_req_fn) (struct request_queue *, struct request *); -typedef int (elevator_init_fn) (struct request_queue *); +typedef int (elevator_init_fn) (struct request_queue *, + struct elevator_type *e); typedef void (elevator_exit_fn) (struct elevator_queue *); struct elevator_ops @@ -155,6 +157,8 @@ extern int elevator_init(struct request_queue *, char *); extern void elevator_exit(struct elevator_queue *); extern int elevator_change(struct request_queue *, const char *); extern bool elv_rq_merge_ok(struct request *, struct bio *); +extern struct elevator_queue *elevator_alloc(struct request_queue *, + struct elevator_type *); /* * Helper functions. diff --git a/include/linux/ftrace.h b/include/linux/ftrace.h index 99d0fbc..9f15c00 100644 --- a/include/linux/ftrace.h +++ b/include/linux/ftrace.h @@ -566,10 +566,6 @@ static inline ssize_t ftrace_filter_write(struct file *file, const char __user * size_t cnt, loff_t *ppos) { return -ENODEV; } static inline ssize_t ftrace_notrace_write(struct file *file, const char __user *ubuf, size_t cnt, loff_t *ppos) { return -ENODEV; } -static inline loff_t ftrace_regex_lseek(struct file *file, loff_t offset, int whence) -{ - return -ENODEV; -} static inline int ftrace_regex_release(struct inode *inode, struct file *file) { return -ENODEV; } #endif /* CONFIG_DYNAMIC_FTRACE */ @@ -828,10 +824,15 @@ enum ftrace_dump_mode; extern enum ftrace_dump_mode ftrace_dump_on_oops; +extern void disable_trace_on_warning(void); +extern int __disable_trace_on_warning; + #ifdef CONFIG_PREEMPT #define INIT_TRACE_RECURSION .trace_recursion = 0, #endif +#else /* CONFIG_TRACING */ +static inline void disable_trace_on_warning(void) { } #endif /* CONFIG_TRACING */ #ifndef INIT_TRACE_RECURSION diff --git a/include/linux/nfs_fs.h b/include/linux/nfs_fs.h index 0b17629..7125cef 100644 --- a/include/linux/nfs_fs.h +++ b/include/linux/nfs_fs.h @@ -348,6 +348,7 @@ extern int nfs_permission(struct inode *, int); extern int nfs_open(struct inode *, struct file *); extern int nfs_release(struct inode *, struct file *); extern int nfs_attribute_timeout(struct inode *inode); +extern int nfs_attribute_cache_expired(struct inode *inode); extern int nfs_revalidate_inode(struct nfs_server *server, struct inode *inode); extern int __nfs_revalidate_inode(struct nfs_server *, struct inode *); extern int nfs_revalidate_mapping(struct inode *inode, struct address_space *mapping); diff --git a/include/linux/platform_data/remoteproc-omap.h b/include/linux/platform_data/remoteproc-omap.h index 3c1c644..bfbd12b 100644 --- a/include/linux/platform_data/remoteproc-omap.h +++ b/include/linux/platform_data/remoteproc-omap.h @@ -50,7 +50,7 @@ void __init omap_rproc_reserve_cma(void); #else -void __init omap_rproc_reserve_cma(void) +static inline void __init omap_rproc_reserve_cma(void) { } diff --git a/include/linux/sunrpc/cache.h b/include/linux/sunrpc/cache.h index 303399b..6ce690d 100644 --- a/include/linux/sunrpc/cache.h +++ b/include/linux/sunrpc/cache.h @@ -57,6 +57,7 @@ struct cache_head { #define CACHE_VALID 0 /* Entry contains valid data */ #define CACHE_NEGATIVE 1 /* Negative entry - there is no match for the key */ #define CACHE_PENDING 2 /* An upcall has been sent but no reply received yet*/ +#define CACHE_CLEANED 3 /* Entry has been cleaned from cache */ #define CACHE_NEW_EXPIRY 120 /* keep new things pending confirmation for 120 seconds */ @@ -148,6 +149,24 @@ struct cache_deferred_req { int too_many); }; +/* + * timestamps kept in the cache are expressed in seconds + * since boot. This is the best for measuring differences in + * real time. + */ +static inline time_t seconds_since_boot(void) +{ + struct timespec boot; + getboottime(&boot); + return get_seconds() - boot.tv_sec; +} + +static inline time_t convert_to_wallclock(time_t sinceboot) +{ + struct timespec boot; + getboottime(&boot); + return boot.tv_sec + sinceboot; +} extern const struct file_operations cache_file_operations_pipefs; extern const struct file_operations content_file_operations_pipefs; @@ -181,15 +200,10 @@ static inline void cache_put(struct cache_head *h, struct cache_detail *cd) kref_put(&h->ref, cd->cache_put); } -static inline int cache_valid(struct cache_head *h) +static inline int cache_is_expired(struct cache_detail *detail, struct cache_head *h) { - /* If an item has been unhashed pending removal when - * the refcount drops to 0, the expiry_time will be - * set to 0. We don't want to consider such items - * valid in this context even though CACHE_VALID is - * set. - */ - return (h->expiry_time != 0 && test_bit(CACHE_VALID, &h->flags)); + return (h->expiry_time < seconds_since_boot()) || + (detail->flush_time > h->last_refresh); } extern int cache_check(struct cache_detail *detail, @@ -250,25 +264,6 @@ static inline int get_uint(char **bpp, unsigned int *anint) return 0; } -/* - * timestamps kept in the cache are expressed in seconds - * since boot. This is the best for measuring differences in - * real time. - */ -static inline time_t seconds_since_boot(void) -{ - struct timespec boot; - getboottime(&boot); - return get_seconds() - boot.tv_sec; -} - -static inline time_t convert_to_wallclock(time_t sinceboot) -{ - struct timespec boot; - getboottime(&boot); - return boot.tv_sec + sinceboot; -} - static inline time_t get_expiry(char **bpp) { int rv; diff --git a/include/linux/sunrpc/gss_api.h b/include/linux/sunrpc/gss_api.h index 161463e..1f911cc 100644 --- a/include/linux/sunrpc/gss_api.h +++ b/include/linux/sunrpc/gss_api.h @@ -151,6 +151,8 @@ struct gss_api_mech *gss_mech_get_by_pseudoflavor(u32); /* Fill in an array with a list of supported pseudoflavors */ int gss_mech_list_pseudoflavors(rpc_authflavor_t *, int); +struct gss_api_mech * gss_mech_get(struct gss_api_mech *); + /* For every successful gss_mech_get or gss_mech_get_by_* call there must be a * corresponding call to gss_mech_put. */ void gss_mech_put(struct gss_api_mech *); diff --git a/include/linux/sunrpc/svcauth.h b/include/linux/sunrpc/svcauth.h index ff374ab..8d71d65 100644 --- a/include/linux/sunrpc/svcauth.h +++ b/include/linux/sunrpc/svcauth.h @@ -14,6 +14,7 @@ #include #include #include +#include #include #include @@ -23,13 +24,23 @@ struct svc_cred { struct group_info *cr_group_info; u32 cr_flavor; /* pseudoflavor */ char *cr_principal; /* for gss */ + struct gss_api_mech *cr_gss_mech; }; +static inline void init_svc_cred(struct svc_cred *cred) +{ + cred->cr_group_info = NULL; + cred->cr_principal = NULL; + cred->cr_gss_mech = NULL; +} + static inline void free_svc_cred(struct svc_cred *cred) { if (cred->cr_group_info) put_group_info(cred->cr_group_info); kfree(cred->cr_principal); + gss_mech_put(cred->cr_gss_mech); + init_svc_cred(cred); } struct svc_rqst; /* forward decl */ diff --git a/include/net/9p/transport.h b/include/net/9p/transport.h index adcbb20..d9fa68f 100644 --- a/include/net/9p/transport.h +++ b/include/net/9p/transport.h @@ -26,6 +26,9 @@ #ifndef NET_9P_TRANSPORT_H #define NET_9P_TRANSPORT_H +#define P9_DEF_MIN_RESVPORT (665U) +#define P9_DEF_MAX_RESVPORT (1023U) + /** * struct p9_trans_module - transport module interface * @list: used to maintain a list of currently available transports @@ -37,6 +40,8 @@ * @close: member function to discard a connection on this transport * @request: member function to issue a request to the transport * @cancel: member function to cancel a request (if it hasn't been sent) + * @cancelled: member function to notify that a cancelled request will not + * not receive a reply * * This is the basic API for a transport module which is registered by the * transport module with the 9P core network module and used by the client @@ -55,6 +60,7 @@ struct p9_trans_module { void (*close) (struct p9_client *); int (*request) (struct p9_client *, struct p9_req_t *req); int (*cancel) (struct p9_client *, struct p9_req_t *req); + int (*cancelled)(struct p9_client *, struct p9_req_t *req); int (*zc_request)(struct p9_client *, struct p9_req_t *, char *, char *, int , int, int, int); }; diff --git a/include/target/iscsi/iscsi_transport.h b/include/target/iscsi/iscsi_transport.h index 23a87d0..e5d09d2 100644 --- a/include/target/iscsi/iscsi_transport.h +++ b/include/target/iscsi/iscsi_transport.h @@ -34,8 +34,6 @@ extern void iscsit_put_transport(struct iscsit_transport *); /* * From iscsi_target.c */ -extern int iscsit_add_reject_from_cmd(u8, int, int, unsigned char *, - struct iscsi_cmd *); extern int iscsit_setup_scsi_cmd(struct iscsi_conn *, struct iscsi_cmd *, unsigned char *); extern void iscsit_set_unsoliticed_dataout(struct iscsi_cmd *); @@ -45,18 +43,26 @@ extern int iscsit_check_dataout_hdr(struct iscsi_conn *, unsigned char *, struct iscsi_cmd **); extern int iscsit_check_dataout_payload(struct iscsi_cmd *, struct iscsi_data *, bool); -extern int iscsit_handle_nop_out(struct iscsi_conn *, struct iscsi_cmd *, - unsigned char *); +extern int iscsit_setup_nop_out(struct iscsi_conn *, struct iscsi_cmd *, + struct iscsi_nopout *); +extern int iscsit_process_nop_out(struct iscsi_conn *, struct iscsi_cmd *, + struct iscsi_nopout *); extern int iscsit_handle_logout_cmd(struct iscsi_conn *, struct iscsi_cmd *, unsigned char *); extern int iscsit_handle_task_mgt_cmd(struct iscsi_conn *, struct iscsi_cmd *, unsigned char *); +extern int iscsit_setup_text_cmd(struct iscsi_conn *, struct iscsi_cmd *, + struct iscsi_text *); +extern int iscsit_process_text_cmd(struct iscsi_conn *, struct iscsi_cmd *, + struct iscsi_text *); extern void iscsit_build_rsp_pdu(struct iscsi_cmd *, struct iscsi_conn *, bool, struct iscsi_scsi_rsp *); extern void iscsit_build_nopin_rsp(struct iscsi_cmd *, struct iscsi_conn *, struct iscsi_nopin *, bool); extern void iscsit_build_task_mgt_rsp(struct iscsi_cmd *, struct iscsi_conn *, struct iscsi_tm_rsp *); +extern int iscsit_build_text_rsp(struct iscsi_cmd *, struct iscsi_conn *, + struct iscsi_text_rsp *); extern void iscsit_build_reject(struct iscsi_cmd *, struct iscsi_conn *, struct iscsi_reject *); extern int iscsit_build_logout_rsp(struct iscsi_cmd *, struct iscsi_conn *, @@ -67,6 +73,10 @@ extern int iscsit_logout_post_handler(struct iscsi_cmd *, struct iscsi_conn *); */ extern void iscsit_increment_maxcmdsn(struct iscsi_cmd *, struct iscsi_session *); /* + * From iscsi_target_erl0.c + */ +extern void iscsit_cause_connection_reinstatement(struct iscsi_conn *, int); +/* * From iscsi_target_erl1.c */ extern void iscsit_stop_dataout_timer(struct iscsi_cmd *); @@ -80,4 +90,5 @@ extern int iscsit_tmr_post_handler(struct iscsi_cmd *, struct iscsi_conn *); * From iscsi_target_util.c */ extern struct iscsi_cmd *iscsit_allocate_cmd(struct iscsi_conn *, gfp_t); -extern int iscsit_sequence_cmd(struct iscsi_conn *, struct iscsi_cmd *, __be32); +extern int iscsit_sequence_cmd(struct iscsi_conn *, struct iscsi_cmd *, + unsigned char *, __be32); diff --git a/include/target/target_core_base.h b/include/target/target_core_base.h index 4ea4f98..e34fc90 100644 --- a/include/target/target_core_base.h +++ b/include/target/target_core_base.h @@ -218,14 +218,11 @@ enum tcm_tmreq_table { /* fabric independent task management response values */ enum tcm_tmrsp_table { - TMR_FUNCTION_COMPLETE = 0, - TMR_TASK_DOES_NOT_EXIST = 1, - TMR_LUN_DOES_NOT_EXIST = 2, - TMR_TASK_STILL_ALLEGIANT = 3, - TMR_TASK_FAILOVER_NOT_SUPPORTED = 4, - TMR_TASK_MGMT_FUNCTION_NOT_SUPPORTED = 5, - TMR_FUNCTION_AUTHORIZATION_FAILED = 6, - TMR_FUNCTION_REJECTED = 255, + TMR_FUNCTION_COMPLETE = 1, + TMR_TASK_DOES_NOT_EXIST = 2, + TMR_LUN_DOES_NOT_EXIST = 3, + TMR_TASK_MGMT_FUNCTION_NOT_SUPPORTED = 4, + TMR_FUNCTION_REJECTED = 5, }; /* @@ -339,8 +336,6 @@ struct t10_pr_registration { /* Used during APTPL metadata reading */ #define PR_APTPL_MAX_TPORT_LEN 256 unsigned char pr_tport[PR_APTPL_MAX_TPORT_LEN]; - /* For writing out live meta data */ - unsigned char *pr_aptpl_buf; u16 pr_aptpl_rpti; u16 pr_reg_tpgt; /* Reservation effects all target ports */ @@ -374,9 +369,7 @@ struct t10_reservation { /* Activate Persistence across Target Power Loss enabled * for SCSI device */ int pr_aptpl_active; - /* Used by struct t10_reservation->pr_aptpl_buf_len */ #define PR_APTPL_BUF_LEN 8192 - u32 pr_aptpl_buf_len; u32 pr_generation; spinlock_t registration_lock; spinlock_t aptpl_reg_lock; @@ -424,8 +417,6 @@ struct se_cmd { int sam_task_attr; /* Transport protocol dependent state, see transport_state_table */ enum transport_state_table t_state; - /* Used to signal cmd->se_tfo->check_release_cmd() usage per cmd */ - unsigned check_release:1; unsigned cmd_wait_set:1; unsigned unknown_data_length:1; /* See se_cmd_flags_table */ @@ -458,7 +449,6 @@ struct se_cmd { unsigned char *t_task_cdb; unsigned char __t_task_cdb[TCM_MAX_COMMAND_SIZE]; unsigned long long t_task_lba; - atomic_t t_fe_count; unsigned int transport_state; #define CMD_T_ABORTED (1 << 0) #define CMD_T_ACTIVE (1 << 1) @@ -802,11 +792,12 @@ struct se_portal_group { struct target_core_fabric_ops *se_tpg_tfo; struct se_wwn *se_tpg_wwn; struct config_group tpg_group; - struct config_group *tpg_default_groups[6]; + struct config_group *tpg_default_groups[7]; struct config_group tpg_lun_group; struct config_group tpg_np_group; struct config_group tpg_acl_group; struct config_group tpg_attrib_group; + struct config_group tpg_auth_group; struct config_group tpg_param_group; }; diff --git a/include/target/target_core_configfs.h b/include/target/target_core_configfs.h index 6125095..713c500 100644 --- a/include/target/target_core_configfs.h +++ b/include/target/target_core_configfs.h @@ -23,6 +23,7 @@ struct target_fabric_configfs_template { struct config_item_type tfc_tpg_np_cit; struct config_item_type tfc_tpg_np_base_cit; struct config_item_type tfc_tpg_attrib_cit; + struct config_item_type tfc_tpg_auth_cit; struct config_item_type tfc_tpg_param_cit; struct config_item_type tfc_tpg_nacl_cit; struct config_item_type tfc_tpg_nacl_base_cit; diff --git a/include/target/target_core_fabric.h b/include/target/target_core_fabric.h index 1dcce9c..7a16178 100644 --- a/include/target/target_core_fabric.h +++ b/include/target/target_core_fabric.h @@ -61,7 +61,7 @@ struct target_core_fabric_ops { int (*get_cmd_state)(struct se_cmd *); int (*queue_data_in)(struct se_cmd *); int (*queue_status)(struct se_cmd *); - int (*queue_tm_rsp)(struct se_cmd *); + void (*queue_tm_rsp)(struct se_cmd *); /* * fabric module calls for target_core_fabric_configfs.c */ diff --git a/include/target/target_core_fabric_configfs.h b/include/target/target_core_fabric_configfs.h index a26fb75..b32a149 100644 --- a/include/target/target_core_fabric_configfs.h +++ b/include/target/target_core_fabric_configfs.h @@ -62,6 +62,17 @@ static struct target_fabric_tpg_attrib_attribute _fabric##_tpg_attrib_##_name = _fabric##_tpg_attrib_show_##_name, \ _fabric##_tpg_attrib_store_##_name); +CONFIGFS_EATTR_STRUCT(target_fabric_tpg_auth, se_portal_group); +#define TF_TPG_AUTH_ATTR(_fabric, _name, _mode) \ +static struct target_fabric_tpg_auth_attribute _fabric##_tpg_auth_##_name = \ + __CONFIGFS_EATTR(_name, _mode, \ + _fabric##_tpg_auth_show_##_name, \ + _fabric##_tpg_auth_store_##_name); + +#define TF_TPG_AUTH_ATTR_RO(_fabric, _name) \ +static struct target_fabric_tpg_auth_attribute _fabric##_tpg_auth_##_name = \ + __CONFIGFS_EATTR_RO(_name, \ + _fabric##_tpg_auth_show_##_name); CONFIGFS_EATTR_STRUCT(target_fabric_tpg_param, se_portal_group); #define TF_TPG_PARAM_ATTR(_fabric, _name, _mode) \ diff --git a/include/trace/events/target.h b/include/trace/events/target.h new file mode 100644 index 0000000..aef8fc3 --- /dev/null +++ b/include/trace/events/target.h @@ -0,0 +1,214 @@ +#undef TRACE_SYSTEM +#define TRACE_SYSTEM target + +#if !defined(_TRACE_TARGET_H) || defined(TRACE_HEADER_MULTI_READ) +#define _TRACE_TARGET_H + +#include +#include +#include +#include +#include + +/* cribbed verbatim from */ +#define scsi_opcode_name(opcode) { opcode, #opcode } +#define show_opcode_name(val) \ + __print_symbolic(val, \ + scsi_opcode_name(TEST_UNIT_READY), \ + scsi_opcode_name(REZERO_UNIT), \ + scsi_opcode_name(REQUEST_SENSE), \ + scsi_opcode_name(FORMAT_UNIT), \ + scsi_opcode_name(READ_BLOCK_LIMITS), \ + scsi_opcode_name(REASSIGN_BLOCKS), \ + scsi_opcode_name(INITIALIZE_ELEMENT_STATUS), \ + scsi_opcode_name(READ_6), \ + scsi_opcode_name(WRITE_6), \ + scsi_opcode_name(SEEK_6), \ + scsi_opcode_name(READ_REVERSE), \ + scsi_opcode_name(WRITE_FILEMARKS), \ + scsi_opcode_name(SPACE), \ + scsi_opcode_name(INQUIRY), \ + scsi_opcode_name(RECOVER_BUFFERED_DATA), \ + scsi_opcode_name(MODE_SELECT), \ + scsi_opcode_name(RESERVE), \ + scsi_opcode_name(RELEASE), \ + scsi_opcode_name(COPY), \ + scsi_opcode_name(ERASE), \ + scsi_opcode_name(MODE_SENSE), \ + scsi_opcode_name(START_STOP), \ + scsi_opcode_name(RECEIVE_DIAGNOSTIC), \ + scsi_opcode_name(SEND_DIAGNOSTIC), \ + scsi_opcode_name(ALLOW_MEDIUM_REMOVAL), \ + scsi_opcode_name(SET_WINDOW), \ + scsi_opcode_name(READ_CAPACITY), \ + scsi_opcode_name(READ_10), \ + scsi_opcode_name(WRITE_10), \ + scsi_opcode_name(SEEK_10), \ + scsi_opcode_name(POSITION_TO_ELEMENT), \ + scsi_opcode_name(WRITE_VERIFY), \ + scsi_opcode_name(VERIFY), \ + scsi_opcode_name(SEARCH_HIGH), \ + scsi_opcode_name(SEARCH_EQUAL), \ + scsi_opcode_name(SEARCH_LOW), \ + scsi_opcode_name(SET_LIMITS), \ + scsi_opcode_name(PRE_FETCH), \ + scsi_opcode_name(READ_POSITION), \ + scsi_opcode_name(SYNCHRONIZE_CACHE), \ + scsi_opcode_name(LOCK_UNLOCK_CACHE), \ + scsi_opcode_name(READ_DEFECT_DATA), \ + scsi_opcode_name(MEDIUM_SCAN), \ + scsi_opcode_name(COMPARE), \ + scsi_opcode_name(COPY_VERIFY), \ + scsi_opcode_name(WRITE_BUFFER), \ + scsi_opcode_name(READ_BUFFER), \ + scsi_opcode_name(UPDATE_BLOCK), \ + scsi_opcode_name(READ_LONG), \ + scsi_opcode_name(WRITE_LONG), \ + scsi_opcode_name(CHANGE_DEFINITION), \ + scsi_opcode_name(WRITE_SAME), \ + scsi_opcode_name(UNMAP), \ + scsi_opcode_name(READ_TOC), \ + scsi_opcode_name(LOG_SELECT), \ + scsi_opcode_name(LOG_SENSE), \ + scsi_opcode_name(XDWRITEREAD_10), \ + scsi_opcode_name(MODE_SELECT_10), \ + scsi_opcode_name(RESERVE_10), \ + scsi_opcode_name(RELEASE_10), \ + scsi_opcode_name(MODE_SENSE_10), \ + scsi_opcode_name(PERSISTENT_RESERVE_IN), \ + scsi_opcode_name(PERSISTENT_RESERVE_OUT), \ + scsi_opcode_name(VARIABLE_LENGTH_CMD), \ + scsi_opcode_name(REPORT_LUNS), \ + scsi_opcode_name(MAINTENANCE_IN), \ + scsi_opcode_name(MAINTENANCE_OUT), \ + scsi_opcode_name(MOVE_MEDIUM), \ + scsi_opcode_name(EXCHANGE_MEDIUM), \ + scsi_opcode_name(READ_12), \ + scsi_opcode_name(WRITE_12), \ + scsi_opcode_name(WRITE_VERIFY_12), \ + scsi_opcode_name(SEARCH_HIGH_12), \ + scsi_opcode_name(SEARCH_EQUAL_12), \ + scsi_opcode_name(SEARCH_LOW_12), \ + scsi_opcode_name(READ_ELEMENT_STATUS), \ + scsi_opcode_name(SEND_VOLUME_TAG), \ + scsi_opcode_name(WRITE_LONG_2), \ + scsi_opcode_name(READ_16), \ + scsi_opcode_name(WRITE_16), \ + scsi_opcode_name(VERIFY_16), \ + scsi_opcode_name(WRITE_SAME_16), \ + scsi_opcode_name(SERVICE_ACTION_IN), \ + scsi_opcode_name(SAI_READ_CAPACITY_16), \ + scsi_opcode_name(SAI_GET_LBA_STATUS), \ + scsi_opcode_name(MI_REPORT_TARGET_PGS), \ + scsi_opcode_name(MO_SET_TARGET_PGS), \ + scsi_opcode_name(READ_32), \ + scsi_opcode_name(WRITE_32), \ + scsi_opcode_name(WRITE_SAME_32), \ + scsi_opcode_name(ATA_16), \ + scsi_opcode_name(ATA_12)) + +#define show_task_attribute_name(val) \ + __print_symbolic(val, \ + { MSG_SIMPLE_TAG, "SIMPLE" }, \ + { MSG_HEAD_TAG, "HEAD" }, \ + { MSG_ORDERED_TAG, "ORDERED" }, \ + { MSG_ACA_TAG, "ACA" } ) + +#define show_scsi_status_name(val) \ + __print_symbolic(val, \ + { SAM_STAT_GOOD, "GOOD" }, \ + { SAM_STAT_CHECK_CONDITION, "CHECK CONDITION" }, \ + { SAM_STAT_CONDITION_MET, "CONDITION MET" }, \ + { SAM_STAT_BUSY, "BUSY" }, \ + { SAM_STAT_INTERMEDIATE, "INTERMEDIATE" }, \ + { SAM_STAT_INTERMEDIATE_CONDITION_MET, "INTERMEDIATE CONDITION MET" }, \ + { SAM_STAT_RESERVATION_CONFLICT, "RESERVATION CONFLICT" }, \ + { SAM_STAT_COMMAND_TERMINATED, "COMMAND TERMINATED" }, \ + { SAM_STAT_TASK_SET_FULL, "TASK SET FULL" }, \ + { SAM_STAT_ACA_ACTIVE, "ACA ACTIVE" }, \ + { SAM_STAT_TASK_ABORTED, "TASK ABORTED" } ) + +TRACE_EVENT(target_sequencer_start, + + TP_PROTO(struct se_cmd *cmd), + + TP_ARGS(cmd), + + TP_STRUCT__entry( + __field( unsigned int, unpacked_lun ) + __field( unsigned int, opcode ) + __field( unsigned int, data_length ) + __field( unsigned int, task_attribute ) + __array( unsigned char, cdb, TCM_MAX_COMMAND_SIZE ) + __string( initiator, cmd->se_sess->se_node_acl->initiatorname ) + ), + + TP_fast_assign( + __entry->unpacked_lun = cmd->se_lun->unpacked_lun; + __entry->opcode = cmd->t_task_cdb[0]; + __entry->data_length = cmd->data_length; + __entry->task_attribute = cmd->sam_task_attr; + memcpy(__entry->cdb, cmd->t_task_cdb, TCM_MAX_COMMAND_SIZE); + __assign_str(initiator, cmd->se_sess->se_node_acl->initiatorname); + ), + + TP_printk("%s -> LUN %03u %s data_length %6u CDB %s (TA:%s C:%02x)", + __get_str(initiator), __entry->unpacked_lun, + show_opcode_name(__entry->opcode), + __entry->data_length, __print_hex(__entry->cdb, 16), + show_task_attribute_name(__entry->task_attribute), + scsi_command_size(__entry->cdb) <= 16 ? + __entry->cdb[scsi_command_size(__entry->cdb) - 1] : + __entry->cdb[1] + ) +); + +TRACE_EVENT(target_cmd_complete, + + TP_PROTO(struct se_cmd *cmd), + + TP_ARGS(cmd), + + TP_STRUCT__entry( + __field( unsigned int, unpacked_lun ) + __field( unsigned int, opcode ) + __field( unsigned int, data_length ) + __field( unsigned int, task_attribute ) + __field( unsigned char, scsi_status ) + __field( unsigned char, sense_length ) + __array( unsigned char, cdb, TCM_MAX_COMMAND_SIZE ) + __array( unsigned char, sense_data, 18 ) + __string(initiator, cmd->se_sess->se_node_acl->initiatorname) + ), + + TP_fast_assign( + __entry->unpacked_lun = cmd->se_lun->unpacked_lun; + __entry->opcode = cmd->t_task_cdb[0]; + __entry->data_length = cmd->data_length; + __entry->task_attribute = cmd->sam_task_attr; + __entry->scsi_status = cmd->scsi_status; + __entry->sense_length = cmd->scsi_status == SAM_STAT_CHECK_CONDITION ? + min(18, ((u8 *) cmd->sense_buffer)[SPC_ADD_SENSE_LEN_OFFSET] + 8) : 0; + memcpy(__entry->cdb, cmd->t_task_cdb, TCM_MAX_COMMAND_SIZE); + memcpy(__entry->sense_data, cmd->sense_buffer, __entry->sense_length); + __assign_str(initiator, cmd->se_sess->se_node_acl->initiatorname); + ), + + TP_printk("%s <- LUN %03u status %s (sense len %d%s%s) %s data_length %6u CDB %s (TA:%s C:%02x)", + __get_str(initiator), __entry->unpacked_lun, + show_scsi_status_name(__entry->scsi_status), + __entry->sense_length, __entry->sense_length ? " / " : "", + __print_hex(__entry->sense_data, __entry->sense_length), + show_opcode_name(__entry->opcode), + __entry->data_length, __print_hex(__entry->cdb, 16), + show_task_attribute_name(__entry->task_attribute), + scsi_command_size(__entry->cdb) <= 16 ? + __entry->cdb[scsi_command_size(__entry->cdb) - 1] : + __entry->cdb[1] + ) +); + +#endif /* _TRACE_TARGET_H */ + +/* This part must be outside protection */ +#include diff --git a/include/trace/syscall.h b/include/trace/syscall.h index 84bc419..fed853f 100644 --- a/include/trace/syscall.h +++ b/include/trace/syscall.h @@ -16,6 +16,7 @@ * @nb_args: number of parameters it takes * @types: list of types as strings * @args: list of args as strings (args[i] matches types[i]) + * @enter_fields: list of fields for syscall_enter trace event * @enter_event: associated syscall_enter trace event * @exit_event: associated syscall_exit trace event */ diff --git a/include/uapi/linux/dm-ioctl.h b/include/uapi/linux/dm-ioctl.h index 7e75b6f..afd0cbd 100644 --- a/include/uapi/linux/dm-ioctl.h +++ b/include/uapi/linux/dm-ioctl.h @@ -267,9 +267,9 @@ enum { #define DM_DEV_SET_GEOMETRY _IOWR(DM_IOCTL, DM_DEV_SET_GEOMETRY_CMD, struct dm_ioctl) #define DM_VERSION_MAJOR 4 -#define DM_VERSION_MINOR 24 +#define DM_VERSION_MINOR 25 #define DM_VERSION_PATCHLEVEL 0 -#define DM_VERSION_EXTRA "-ioctl (2013-01-15)" +#define DM_VERSION_EXTRA "-ioctl (2013-06-26)" /* Status bits */ #define DM_READONLY_FLAG (1 << 0) /* In/Out */ diff --git a/kernel/panic.c b/kernel/panic.c index 9771231..8018646 100644 --- a/kernel/panic.c +++ b/kernel/panic.c @@ -15,6 +15,7 @@ #include #include #include +#include #include #include #include @@ -399,6 +400,8 @@ struct slowpath_args { static void warn_slowpath_common(const char *file, int line, void *caller, unsigned taint, struct slowpath_args *args) { + disable_trace_on_warning(); + pr_warn("------------[ cut here ]------------\n"); pr_warn("WARNING: CPU: %d PID: %d at %s:%d %pS()\n", raw_smp_processor_id(), current->pid, file, line, caller); diff --git a/kernel/sysctl.c b/kernel/sysctl.c index 4ce13c3..e5b31af 100644 --- a/kernel/sysctl.c +++ b/kernel/sysctl.c @@ -599,6 +599,13 @@ static struct ctl_table kern_table[] = { .mode = 0644, .proc_handler = proc_dointvec, }, + { + .procname = "traceoff_on_warning", + .data = &__disable_trace_on_warning, + .maxlen = sizeof(__disable_trace_on_warning), + .mode = 0644, + .proc_handler = proc_dointvec, + }, #endif #ifdef CONFIG_MODULES { diff --git a/kernel/trace/ftrace.c b/kernel/trace/ftrace.c index 6c508ff..67708f4 100644 --- a/kernel/trace/ftrace.c +++ b/kernel/trace/ftrace.c @@ -413,6 +413,17 @@ static int __register_ftrace_function(struct ftrace_ops *ops) return 0; } +static void ftrace_sync(struct work_struct *work) +{ + /* + * This function is just a stub to implement a hard force + * of synchronize_sched(). This requires synchronizing + * tasks even in userspace and idle. + * + * Yes, function tracing is rude. + */ +} + static int __unregister_ftrace_function(struct ftrace_ops *ops) { int ret; @@ -440,8 +451,12 @@ static int __unregister_ftrace_function(struct ftrace_ops *ops) * so there'll be no new users. We must ensure * all current users are done before we free * the control data. + * Note synchronize_sched() is not enough, as we + * use preempt_disable() to do RCU, but the function + * tracer can be called where RCU is not active + * (before user_exit()). */ - synchronize_sched(); + schedule_on_each_cpu(ftrace_sync); control_ops_free(ops); } } else @@ -456,9 +471,13 @@ static int __unregister_ftrace_function(struct ftrace_ops *ops) /* * Dynamic ops may be freed, we must make sure that all * callers are done before leaving this function. + * + * Again, normal synchronize_sched() is not good enough. + * We need to do a hard force of sched synchronization. */ if (ops->flags & FTRACE_OPS_FL_DYNAMIC) - synchronize_sched(); + schedule_on_each_cpu(ftrace_sync); + return 0; } @@ -622,12 +641,18 @@ static int function_stat_show(struct seq_file *m, void *v) if (rec->counter <= 1) stddev = 0; else { - stddev = rec->time_squared - rec->counter * avg * avg; + /* + * Apply Welford's method: + * s^2 = 1 / (n * (n-1)) * (n * \Sum (x_i)^2 - (\Sum x_i)^2) + */ + stddev = rec->counter * rec->time_squared - + rec->time * rec->time; + /* * Divide only 1000 for ns^2 -> us^2 conversion. * trace_print_graph_duration will divide 1000 again. */ - do_div(stddev, (rec->counter - 1) * 1000); + do_div(stddev, rec->counter * (rec->counter - 1) * 1000); } trace_seq_init(&s); @@ -3512,8 +3537,12 @@ EXPORT_SYMBOL_GPL(ftrace_set_global_notrace); static char ftrace_notrace_buf[FTRACE_FILTER_SIZE] __initdata; static char ftrace_filter_buf[FTRACE_FILTER_SIZE] __initdata; +/* Used by function selftest to not test if filter is set */ +bool ftrace_filter_param __initdata; + static int __init set_ftrace_notrace(char *str) { + ftrace_filter_param = true; strlcpy(ftrace_notrace_buf, str, FTRACE_FILTER_SIZE); return 1; } @@ -3521,6 +3550,7 @@ __setup("ftrace_notrace=", set_ftrace_notrace); static int __init set_ftrace_filter(char *str) { + ftrace_filter_param = true; strlcpy(ftrace_filter_buf, str, FTRACE_FILTER_SIZE); return 1; } diff --git a/kernel/trace/trace.c b/kernel/trace/trace.c index e71a8be..0cd500b 100644 --- a/kernel/trace/trace.c +++ b/kernel/trace/trace.c @@ -115,6 +115,9 @@ cpumask_var_t __read_mostly tracing_buffer_mask; enum ftrace_dump_mode ftrace_dump_on_oops; +/* When set, tracing will stop when a WARN*() is hit */ +int __disable_trace_on_warning; + static int tracing_set_tracer(const char *buf); #define MAX_TRACER_SIZE 100 @@ -149,6 +152,13 @@ static int __init set_ftrace_dump_on_oops(char *str) } __setup("ftrace_dump_on_oops", set_ftrace_dump_on_oops); +static int __init stop_trace_on_warning(char *str) +{ + __disable_trace_on_warning = 1; + return 1; +} +__setup("traceoff_on_warning=", stop_trace_on_warning); + static int __init boot_alloc_snapshot(char *str) { allocate_snapshot = true; @@ -170,6 +180,7 @@ static int __init set_trace_boot_options(char *str) } __setup("trace_options=", set_trace_boot_options); + unsigned long long ns2usecs(cycle_t nsec) { nsec += 500; @@ -193,6 +204,37 @@ static struct trace_array global_trace; LIST_HEAD(ftrace_trace_arrays); +int trace_array_get(struct trace_array *this_tr) +{ + struct trace_array *tr; + int ret = -ENODEV; + + mutex_lock(&trace_types_lock); + list_for_each_entry(tr, &ftrace_trace_arrays, list) { + if (tr == this_tr) { + tr->ref++; + ret = 0; + break; + } + } + mutex_unlock(&trace_types_lock); + + return ret; +} + +static void __trace_array_put(struct trace_array *this_tr) +{ + WARN_ON(!this_tr->ref); + this_tr->ref--; +} + +void trace_array_put(struct trace_array *this_tr) +{ + mutex_lock(&trace_types_lock); + __trace_array_put(this_tr); + mutex_unlock(&trace_types_lock); +} + int filter_current_check_discard(struct ring_buffer *buffer, struct ftrace_event_call *call, void *rec, struct ring_buffer_event *event) @@ -215,9 +257,24 @@ cycle_t ftrace_now(int cpu) return ts; } +/** + * tracing_is_enabled - Show if global_trace has been disabled + * + * Shows if the global trace has been enabled or not. It uses the + * mirror flag "buffer_disabled" to be used in fast paths such as for + * the irqsoff tracer. But it may be inaccurate due to races. If you + * need to know the accurate state, use tracing_is_on() which is a little + * slower, but accurate. + */ int tracing_is_enabled(void) { - return tracing_is_on(); + /* + * For quick access (irqsoff uses this in fast path), just + * return the mirror variable of the state of the ring buffer. + * It's a little racy, but we don't really care. + */ + smp_rmb(); + return !global_trace.buffer_disabled; } /* @@ -240,7 +297,7 @@ static struct tracer *trace_types __read_mostly; /* * trace_types_lock is used to protect the trace_types list. */ -static DEFINE_MUTEX(trace_types_lock); +DEFINE_MUTEX(trace_types_lock); /* * serialize the access of the ring buffer @@ -330,6 +387,23 @@ unsigned long trace_flags = TRACE_ITER_PRINT_PARENT | TRACE_ITER_PRINTK | TRACE_ITER_GRAPH_TIME | TRACE_ITER_RECORD_CMD | TRACE_ITER_OVERWRITE | TRACE_ITER_IRQ_INFO | TRACE_ITER_MARKERS | TRACE_ITER_FUNCTION; +static void tracer_tracing_on(struct trace_array *tr) +{ + if (tr->trace_buffer.buffer) + ring_buffer_record_on(tr->trace_buffer.buffer); + /* + * This flag is looked at when buffers haven't been allocated + * yet, or by some tracers (like irqsoff), that just want to + * know if the ring buffer has been disabled, but it can handle + * races of where it gets disabled but we still do a record. + * As the check is in the fast path of the tracers, it is more + * important to be fast than accurate. + */ + tr->buffer_disabled = 0; + /* Make the flag seen by readers */ + smp_wmb(); +} + /** * tracing_on - enable tracing buffers * @@ -338,15 +412,7 @@ unsigned long trace_flags = TRACE_ITER_PRINT_PARENT | TRACE_ITER_PRINTK | */ void tracing_on(void) { - if (global_trace.trace_buffer.buffer) - ring_buffer_record_on(global_trace.trace_buffer.buffer); - /* - * This flag is only looked at when buffers haven't been - * allocated yet. We don't really care about the race - * between setting this flag and actually turning - * on the buffer. - */ - global_trace.buffer_disabled = 0; + tracer_tracing_on(&global_trace); } EXPORT_SYMBOL_GPL(tracing_on); @@ -540,6 +606,23 @@ void tracing_snapshot_alloc(void) EXPORT_SYMBOL_GPL(tracing_snapshot_alloc); #endif /* CONFIG_TRACER_SNAPSHOT */ +static void tracer_tracing_off(struct trace_array *tr) +{ + if (tr->trace_buffer.buffer) + ring_buffer_record_off(tr->trace_buffer.buffer); + /* + * This flag is looked at when buffers haven't been allocated + * yet, or by some tracers (like irqsoff), that just want to + * know if the ring buffer has been disabled, but it can handle + * races of where it gets disabled but we still do a record. + * As the check is in the fast path of the tracers, it is more + * important to be fast than accurate. + */ + tr->buffer_disabled = 1; + /* Make the flag seen by readers */ + smp_wmb(); +} + /** * tracing_off - turn off tracing buffers * @@ -550,26 +633,35 @@ EXPORT_SYMBOL_GPL(tracing_snapshot_alloc); */ void tracing_off(void) { - if (global_trace.trace_buffer.buffer) - ring_buffer_record_off(global_trace.trace_buffer.buffer); - /* - * This flag is only looked at when buffers haven't been - * allocated yet. We don't really care about the race - * between setting this flag and actually turning - * on the buffer. - */ - global_trace.buffer_disabled = 1; + tracer_tracing_off(&global_trace); } EXPORT_SYMBOL_GPL(tracing_off); +void disable_trace_on_warning(void) +{ + if (__disable_trace_on_warning) + tracing_off(); +} + +/** + * tracer_tracing_is_on - show real state of ring buffer enabled + * @tr : the trace array to know if ring buffer is enabled + * + * Shows real state of the ring buffer if it is enabled or not. + */ +static int tracer_tracing_is_on(struct trace_array *tr) +{ + if (tr->trace_buffer.buffer) + return ring_buffer_record_is_on(tr->trace_buffer.buffer); + return !tr->buffer_disabled; +} + /** * tracing_is_on - show state of ring buffers enabled */ int tracing_is_on(void) { - if (global_trace.trace_buffer.buffer) - return ring_buffer_record_is_on(global_trace.trace_buffer.buffer); - return !global_trace.buffer_disabled; + return tracer_tracing_is_on(&global_trace); } EXPORT_SYMBOL_GPL(tracing_is_on); @@ -1543,15 +1635,6 @@ trace_function(struct trace_array *tr, __buffer_unlock_commit(buffer, event); } -void -ftrace(struct trace_array *tr, struct trace_array_cpu *data, - unsigned long ip, unsigned long parent_ip, unsigned long flags, - int pc) -{ - if (likely(!atomic_read(&data->disabled))) - trace_function(tr, ip, parent_ip, flags, pc); -} - #ifdef CONFIG_STACKTRACE #define FTRACE_STACK_MAX_ENTRIES (PAGE_SIZE / sizeof(unsigned long)) @@ -2768,10 +2851,9 @@ static const struct seq_operations tracer_seq_ops = { }; static struct trace_iterator * -__tracing_open(struct inode *inode, struct file *file, bool snapshot) +__tracing_open(struct trace_array *tr, struct trace_cpu *tc, + struct inode *inode, struct file *file, bool snapshot) { - struct trace_cpu *tc = inode->i_private; - struct trace_array *tr = tc->tr; struct trace_iterator *iter; int cpu; @@ -2850,8 +2932,6 @@ __tracing_open(struct inode *inode, struct file *file, bool snapshot) tracing_iter_reset(iter, cpu); } - tr->ref++; - mutex_unlock(&trace_types_lock); return iter; @@ -2874,6 +2954,43 @@ int tracing_open_generic(struct inode *inode, struct file *filp) return 0; } +/* + * Open and update trace_array ref count. + * Must have the current trace_array passed to it. + */ +static int tracing_open_generic_tr(struct inode *inode, struct file *filp) +{ + struct trace_array *tr = inode->i_private; + + if (tracing_disabled) + return -ENODEV; + + if (trace_array_get(tr) < 0) + return -ENODEV; + + filp->private_data = inode->i_private; + + return 0; + +} + +static int tracing_open_generic_tc(struct inode *inode, struct file *filp) +{ + struct trace_cpu *tc = inode->i_private; + struct trace_array *tr = tc->tr; + + if (tracing_disabled) + return -ENODEV; + + if (trace_array_get(tr) < 0) + return -ENODEV; + + filp->private_data = inode->i_private; + + return 0; + +} + static int tracing_release(struct inode *inode, struct file *file) { struct seq_file *m = file->private_data; @@ -2881,17 +2998,20 @@ static int tracing_release(struct inode *inode, struct file *file) struct trace_array *tr; int cpu; - if (!(file->f_mode & FMODE_READ)) + /* Writes do not use seq_file, need to grab tr from inode */ + if (!(file->f_mode & FMODE_READ)) { + struct trace_cpu *tc = inode->i_private; + + trace_array_put(tc->tr); return 0; + } iter = m->private; tr = iter->tr; + trace_array_put(tr); mutex_lock(&trace_types_lock); - WARN_ON(!tr->ref); - tr->ref--; - for_each_tracing_cpu(cpu) { if (iter->buffer_iter[cpu]) ring_buffer_read_finish(iter->buffer_iter[cpu]); @@ -2910,20 +3030,49 @@ static int tracing_release(struct inode *inode, struct file *file) kfree(iter->trace); kfree(iter->buffer_iter); seq_release_private(inode, file); + + return 0; +} + +static int tracing_release_generic_tr(struct inode *inode, struct file *file) +{ + struct trace_array *tr = inode->i_private; + + trace_array_put(tr); return 0; } +static int tracing_release_generic_tc(struct inode *inode, struct file *file) +{ + struct trace_cpu *tc = inode->i_private; + struct trace_array *tr = tc->tr; + + trace_array_put(tr); + return 0; +} + +static int tracing_single_release_tr(struct inode *inode, struct file *file) +{ + struct trace_array *tr = inode->i_private; + + trace_array_put(tr); + + return single_release(inode, file); +} + static int tracing_open(struct inode *inode, struct file *file) { + struct trace_cpu *tc = inode->i_private; + struct trace_array *tr = tc->tr; struct trace_iterator *iter; int ret = 0; + if (trace_array_get(tr) < 0) + return -ENODEV; + /* If this file was open for write, then erase contents */ if ((file->f_mode & FMODE_WRITE) && (file->f_flags & O_TRUNC)) { - struct trace_cpu *tc = inode->i_private; - struct trace_array *tr = tc->tr; - if (tc->cpu == RING_BUFFER_ALL_CPUS) tracing_reset_online_cpus(&tr->trace_buffer); else @@ -2931,12 +3080,16 @@ static int tracing_open(struct inode *inode, struct file *file) } if (file->f_mode & FMODE_READ) { - iter = __tracing_open(inode, file, false); + iter = __tracing_open(tr, tc, inode, file, false); if (IS_ERR(iter)) ret = PTR_ERR(iter); else if (trace_flags & TRACE_ITER_LATENCY_FMT) iter->iter_flags |= TRACE_FILE_LAT_FMT; } + + if (ret < 0) + trace_array_put(tr); + return ret; } @@ -3293,9 +3446,14 @@ tracing_trace_options_write(struct file *filp, const char __user *ubuf, static int tracing_trace_options_open(struct inode *inode, struct file *file) { + struct trace_array *tr = inode->i_private; + if (tracing_disabled) return -ENODEV; + if (trace_array_get(tr) < 0) + return -ENODEV; + return single_open(file, tracing_trace_options_show, inode->i_private); } @@ -3303,7 +3461,7 @@ static const struct file_operations tracing_iter_fops = { .open = tracing_trace_options_open, .read = seq_read, .llseek = seq_lseek, - .release = single_release, + .release = tracing_single_release_tr, .write = tracing_trace_options_write, }; @@ -3791,6 +3949,9 @@ static int tracing_open_pipe(struct inode *inode, struct file *filp) if (tracing_disabled) return -ENODEV; + if (trace_array_get(tr) < 0) + return -ENODEV; + mutex_lock(&trace_types_lock); /* create a buffer to store the information to pass to userspace */ @@ -3843,6 +4004,7 @@ out: fail: kfree(iter->trace); kfree(iter); + __trace_array_put(tr); mutex_unlock(&trace_types_lock); return ret; } @@ -3850,6 +4012,8 @@ fail: static int tracing_release_pipe(struct inode *inode, struct file *file) { struct trace_iterator *iter = file->private_data; + struct trace_cpu *tc = inode->i_private; + struct trace_array *tr = tc->tr; mutex_lock(&trace_types_lock); @@ -3863,6 +4027,8 @@ static int tracing_release_pipe(struct inode *inode, struct file *file) kfree(iter->trace); kfree(iter); + trace_array_put(tr); + return 0; } @@ -3939,7 +4105,7 @@ static int tracing_wait_pipe(struct file *filp) * * iter->pos will be 0 if we haven't read anything. */ - if (!tracing_is_enabled() && iter->pos) + if (!tracing_is_on() && iter->pos) break; } @@ -4320,6 +4486,8 @@ tracing_free_buffer_release(struct inode *inode, struct file *filp) /* resize the ring buffer to 0 */ tracing_resize_ring_buffer(tr, 0, RING_BUFFER_ALL_CPUS); + trace_array_put(tr); + return 0; } @@ -4328,6 +4496,7 @@ tracing_mark_write(struct file *filp, const char __user *ubuf, size_t cnt, loff_t *fpos) { unsigned long addr = (unsigned long)ubuf; + struct trace_array *tr = filp->private_data; struct ring_buffer_event *event; struct ring_buffer *buffer; struct print_entry *entry; @@ -4387,7 +4556,7 @@ tracing_mark_write(struct file *filp, const char __user *ubuf, local_save_flags(irq_flags); size = sizeof(*entry) + cnt + 2; /* possible \n added */ - buffer = global_trace.trace_buffer.buffer; + buffer = tr->trace_buffer.buffer; event = trace_buffer_lock_reserve(buffer, TRACE_PRINT, size, irq_flags, preempt_count()); if (!event) { @@ -4495,10 +4664,20 @@ static ssize_t tracing_clock_write(struct file *filp, const char __user *ubuf, static int tracing_clock_open(struct inode *inode, struct file *file) { + struct trace_array *tr = inode->i_private; + int ret; + if (tracing_disabled) return -ENODEV; - return single_open(file, tracing_clock_show, inode->i_private); + if (trace_array_get(tr)) + return -ENODEV; + + ret = single_open(file, tracing_clock_show, inode->i_private); + if (ret < 0) + trace_array_put(tr); + + return ret; } struct ftrace_buffer_info { @@ -4511,12 +4690,16 @@ struct ftrace_buffer_info { static int tracing_snapshot_open(struct inode *inode, struct file *file) { struct trace_cpu *tc = inode->i_private; + struct trace_array *tr = tc->tr; struct trace_iterator *iter; struct seq_file *m; int ret = 0; + if (trace_array_get(tr) < 0) + return -ENODEV; + if (file->f_mode & FMODE_READ) { - iter = __tracing_open(inode, file, true); + iter = __tracing_open(tr, tc, inode, file, true); if (IS_ERR(iter)) ret = PTR_ERR(iter); } else { @@ -4529,13 +4712,16 @@ static int tracing_snapshot_open(struct inode *inode, struct file *file) kfree(m); return -ENOMEM; } - iter->tr = tc->tr; + iter->tr = tr; iter->trace_buffer = &tc->tr->max_buffer; iter->cpu_file = tc->cpu; m->private = iter; file->private_data = m; } + if (ret < 0) + trace_array_put(tr); + return ret; } @@ -4616,9 +4802,12 @@ out: static int tracing_snapshot_release(struct inode *inode, struct file *file) { struct seq_file *m = file->private_data; + int ret; + + ret = tracing_release(inode, file); if (file->f_mode & FMODE_READ) - return tracing_release(inode, file); + return ret; /* If write only, the seq_file is just a stub */ if (m) @@ -4684,34 +4873,38 @@ static const struct file_operations tracing_pipe_fops = { }; static const struct file_operations tracing_entries_fops = { - .open = tracing_open_generic, + .open = tracing_open_generic_tc, .read = tracing_entries_read, .write = tracing_entries_write, .llseek = generic_file_llseek, + .release = tracing_release_generic_tc, }; static const struct file_operations tracing_total_entries_fops = { - .open = tracing_open_generic, + .open = tracing_open_generic_tr, .read = tracing_total_entries_read, .llseek = generic_file_llseek, + .release = tracing_release_generic_tr, }; static const struct file_operations tracing_free_buffer_fops = { + .open = tracing_open_generic_tr, .write = tracing_free_buffer_write, .release = tracing_free_buffer_release, }; static const struct file_operations tracing_mark_fops = { - .open = tracing_open_generic, + .open = tracing_open_generic_tr, .write = tracing_mark_write, .llseek = generic_file_llseek, + .release = tracing_release_generic_tr, }; static const struct file_operations trace_clock_fops = { .open = tracing_clock_open, .read = seq_read, .llseek = seq_lseek, - .release = single_release, + .release = tracing_single_release_tr, .write = tracing_clock_write, }; @@ -4739,13 +4932,19 @@ static int tracing_buffers_open(struct inode *inode, struct file *filp) struct trace_cpu *tc = inode->i_private; struct trace_array *tr = tc->tr; struct ftrace_buffer_info *info; + int ret; if (tracing_disabled) return -ENODEV; + if (trace_array_get(tr) < 0) + return -ENODEV; + info = kzalloc(sizeof(*info), GFP_KERNEL); - if (!info) + if (!info) { + trace_array_put(tr); return -ENOMEM; + } mutex_lock(&trace_types_lock); @@ -4763,7 +4962,11 @@ static int tracing_buffers_open(struct inode *inode, struct file *filp) mutex_unlock(&trace_types_lock); - return nonseekable_open(inode, filp); + ret = nonseekable_open(inode, filp); + if (ret < 0) + trace_array_put(tr); + + return ret; } static unsigned int @@ -4863,8 +5066,7 @@ static int tracing_buffers_release(struct inode *inode, struct file *file) mutex_lock(&trace_types_lock); - WARN_ON(!iter->tr->ref); - iter->tr->ref--; + __trace_array_put(iter->tr); if (info->spare) ring_buffer_free_read_page(iter->trace_buffer->buffer, info->spare); @@ -5612,15 +5814,10 @@ rb_simple_read(struct file *filp, char __user *ubuf, size_t cnt, loff_t *ppos) { struct trace_array *tr = filp->private_data; - struct ring_buffer *buffer = tr->trace_buffer.buffer; char buf[64]; int r; - if (buffer) - r = ring_buffer_record_is_on(buffer); - else - r = 0; - + r = tracer_tracing_is_on(tr); r = sprintf(buf, "%d\n", r); return simple_read_from_buffer(ubuf, cnt, ppos, buf, r); @@ -5642,11 +5839,11 @@ rb_simple_write(struct file *filp, const char __user *ubuf, if (buffer) { mutex_lock(&trace_types_lock); if (val) { - ring_buffer_record_on(buffer); + tracer_tracing_on(tr); if (tr->current_trace->start) tr->current_trace->start(tr); } else { - ring_buffer_record_off(buffer); + tracer_tracing_off(tr); if (tr->current_trace->stop) tr->current_trace->stop(tr); } @@ -5659,9 +5856,10 @@ rb_simple_write(struct file *filp, const char __user *ubuf, } static const struct file_operations rb_simple_fops = { - .open = tracing_open_generic, + .open = tracing_open_generic_tr, .read = rb_simple_read, .write = rb_simple_write, + .release = tracing_release_generic_tr, .llseek = default_llseek, }; @@ -5933,7 +6131,7 @@ init_tracer_debugfs(struct trace_array *tr, struct dentry *d_tracer) trace_create_file("buffer_total_size_kb", 0444, d_tracer, tr, &tracing_total_entries_fops); - trace_create_file("free_buffer", 0644, d_tracer, + trace_create_file("free_buffer", 0200, d_tracer, tr, &tracing_free_buffer_fops); trace_create_file("trace_marker", 0220, d_tracer, diff --git a/kernel/trace/trace.h b/kernel/trace/trace.h index 20572ed..4a4f6e1 100644 --- a/kernel/trace/trace.h +++ b/kernel/trace/trace.h @@ -224,6 +224,11 @@ enum { extern struct list_head ftrace_trace_arrays; +extern struct mutex trace_types_lock; + +extern int trace_array_get(struct trace_array *tr); +extern void trace_array_put(struct trace_array *tr); + /* * The global tracer (top) should be the first trace array added, * but we check the flag anyway. @@ -554,11 +559,6 @@ void tracing_iter_reset(struct trace_iterator *iter, int cpu); void poll_wait_pipe(struct trace_iterator *iter); -void ftrace(struct trace_array *tr, - struct trace_array_cpu *data, - unsigned long ip, - unsigned long parent_ip, - unsigned long flags, int pc); void tracing_sched_switch_trace(struct trace_array *tr, struct task_struct *prev, struct task_struct *next, @@ -774,6 +774,7 @@ print_graph_function_flags(struct trace_iterator *iter, u32 flags) extern struct list_head ftrace_pids; #ifdef CONFIG_FUNCTION_TRACER +extern bool ftrace_filter_param __initdata; static inline int ftrace_trace_task(struct task_struct *task) { if (list_empty(&ftrace_pids)) @@ -899,12 +900,6 @@ static inline void trace_branch_disable(void) /* set ring buffers to default size if not already done so */ int tracing_update_buffers(void); -/* trace event type bit fields, not numeric */ -enum { - TRACE_EVENT_TYPE_PRINTF = 1, - TRACE_EVENT_TYPE_RAW = 2, -}; - struct ftrace_event_field { struct list_head link; const char *name; diff --git a/kernel/trace/trace_events.c b/kernel/trace/trace_events.c index 27963e2..7d85429 100644 --- a/kernel/trace/trace_events.c +++ b/kernel/trace/trace_events.c @@ -41,6 +41,23 @@ static LIST_HEAD(ftrace_common_fields); static struct kmem_cache *field_cachep; static struct kmem_cache *file_cachep; +#define SYSTEM_FL_FREE_NAME (1 << 31) + +static inline int system_refcount(struct event_subsystem *system) +{ + return system->ref_count & ~SYSTEM_FL_FREE_NAME; +} + +static int system_refcount_inc(struct event_subsystem *system) +{ + return (system->ref_count++) & ~SYSTEM_FL_FREE_NAME; +} + +static int system_refcount_dec(struct event_subsystem *system) +{ + return (--system->ref_count) & ~SYSTEM_FL_FREE_NAME; +} + /* Double loops, do not use break, only goto's work */ #define do_for_each_event_file(tr, file) \ list_for_each_entry(tr, &ftrace_trace_arrays, list) { \ @@ -97,7 +114,7 @@ static int __trace_define_field(struct list_head *head, const char *type, field = kmem_cache_alloc(field_cachep, GFP_TRACE); if (!field) - goto err; + return -ENOMEM; field->name = name; field->type = type; @@ -114,11 +131,6 @@ static int __trace_define_field(struct list_head *head, const char *type, list_add(&field->link, head); return 0; - -err: - kmem_cache_free(field_cachep, field); - - return -ENOMEM; } int trace_define_field(struct ftrace_event_call *call, const char *type, @@ -279,9 +291,11 @@ static int __ftrace_event_enable_disable(struct ftrace_event_file *file, } call->class->reg(call, TRACE_REG_UNREGISTER, file); } - /* If in SOFT_MODE, just set the SOFT_DISABLE_BIT */ + /* If in SOFT_MODE, just set the SOFT_DISABLE_BIT, else clear it */ if (file->flags & FTRACE_EVENT_FL_SOFT_MODE) set_bit(FTRACE_EVENT_FL_SOFT_DISABLED_BIT, &file->flags); + else + clear_bit(FTRACE_EVENT_FL_SOFT_DISABLED_BIT, &file->flags); break; case 1: /* @@ -349,8 +363,8 @@ static void __put_system(struct event_subsystem *system) { struct event_filter *filter = system->filter; - WARN_ON_ONCE(system->ref_count == 0); - if (--system->ref_count) + WARN_ON_ONCE(system_refcount(system) == 0); + if (system_refcount_dec(system)) return; list_del(&system->list); @@ -359,13 +373,15 @@ static void __put_system(struct event_subsystem *system) kfree(filter->filter_string); kfree(filter); } + if (system->ref_count & SYSTEM_FL_FREE_NAME) + kfree(system->name); kfree(system); } static void __get_system(struct event_subsystem *system) { - WARN_ON_ONCE(system->ref_count == 0); - system->ref_count++; + WARN_ON_ONCE(system_refcount(system) == 0); + system_refcount_inc(system); } static void __get_system_dir(struct ftrace_subsystem_dir *dir) @@ -379,7 +395,7 @@ static void __put_system_dir(struct ftrace_subsystem_dir *dir) { WARN_ON_ONCE(dir->ref_count == 0); /* If the subsystem is about to be freed, the dir must be too */ - WARN_ON_ONCE(dir->subsystem->ref_count == 1 && dir->ref_count != 1); + WARN_ON_ONCE(system_refcount(dir->subsystem) == 1 && dir->ref_count != 1); __put_system(dir->subsystem); if (!--dir->ref_count) @@ -394,16 +410,45 @@ static void put_system(struct ftrace_subsystem_dir *dir) } /* + * Open and update trace_array ref count. + * Must have the current trace_array passed to it. + */ +static int tracing_open_generic_file(struct inode *inode, struct file *filp) +{ + struct ftrace_event_file *file = inode->i_private; + struct trace_array *tr = file->tr; + int ret; + + if (trace_array_get(tr) < 0) + return -ENODEV; + + ret = tracing_open_generic(inode, filp); + if (ret < 0) + trace_array_put(tr); + return ret; +} + +static int tracing_release_generic_file(struct inode *inode, struct file *filp) +{ + struct ftrace_event_file *file = inode->i_private; + struct trace_array *tr = file->tr; + + trace_array_put(tr); + + return 0; +} + +/* * __ftrace_set_clr_event(NULL, NULL, NULL, set) will set/unset all events. */ -static int __ftrace_set_clr_event(struct trace_array *tr, const char *match, - const char *sub, const char *event, int set) +static int +__ftrace_set_clr_event_nolock(struct trace_array *tr, const char *match, + const char *sub, const char *event, int set) { struct ftrace_event_file *file; struct ftrace_event_call *call; int ret = -EINVAL; - mutex_lock(&event_mutex); list_for_each_entry(file, &tr->events, list) { call = file->event_call; @@ -429,6 +474,17 @@ static int __ftrace_set_clr_event(struct trace_array *tr, const char *match, ret = 0; } + + return ret; +} + +static int __ftrace_set_clr_event(struct trace_array *tr, const char *match, + const char *sub, const char *event, int set) +{ + int ret; + + mutex_lock(&event_mutex); + ret = __ftrace_set_clr_event_nolock(tr, match, sub, event, set); mutex_unlock(&event_mutex); return ret; @@ -624,17 +680,17 @@ event_enable_read(struct file *filp, char __user *ubuf, size_t cnt, loff_t *ppos) { struct ftrace_event_file *file = filp->private_data; - char *buf; + char buf[4] = "0"; - if (file->flags & FTRACE_EVENT_FL_ENABLED) { - if (file->flags & FTRACE_EVENT_FL_SOFT_DISABLED) - buf = "0*\n"; - else if (file->flags & FTRACE_EVENT_FL_SOFT_MODE) - buf = "1*\n"; - else - buf = "1\n"; - } else - buf = "0\n"; + if (file->flags & FTRACE_EVENT_FL_ENABLED && + !(file->flags & FTRACE_EVENT_FL_SOFT_DISABLED)) + strcpy(buf, "1"); + + if (file->flags & FTRACE_EVENT_FL_SOFT_DISABLED || + file->flags & FTRACE_EVENT_FL_SOFT_MODE) + strcat(buf, "*"); + + strcat(buf, "\n"); return simple_read_from_buffer(ubuf, cnt, ppos, buf, strlen(buf)); } @@ -992,6 +1048,7 @@ static int subsystem_open(struct inode *inode, struct file *filp) int ret; /* Make sure the system still exists */ + mutex_lock(&trace_types_lock); mutex_lock(&event_mutex); list_for_each_entry(tr, &ftrace_trace_arrays, list) { list_for_each_entry(dir, &tr->systems, list) { @@ -1007,6 +1064,7 @@ static int subsystem_open(struct inode *inode, struct file *filp) } exit_loop: mutex_unlock(&event_mutex); + mutex_unlock(&trace_types_lock); if (!system) return -ENODEV; @@ -1014,9 +1072,17 @@ static int subsystem_open(struct inode *inode, struct file *filp) /* Some versions of gcc think dir can be uninitialized here */ WARN_ON(!dir); + /* Still need to increment the ref count of the system */ + if (trace_array_get(tr) < 0) { + put_system(dir); + return -ENODEV; + } + ret = tracing_open_generic(inode, filp); - if (ret < 0) + if (ret < 0) { + trace_array_put(tr); put_system(dir); + } return ret; } @@ -1027,16 +1093,23 @@ static int system_tr_open(struct inode *inode, struct file *filp) struct trace_array *tr = inode->i_private; int ret; + if (trace_array_get(tr) < 0) + return -ENODEV; + /* Make a temporary dir that has no system but points to tr */ dir = kzalloc(sizeof(*dir), GFP_KERNEL); - if (!dir) + if (!dir) { + trace_array_put(tr); return -ENOMEM; + } dir->tr = tr; ret = tracing_open_generic(inode, filp); - if (ret < 0) + if (ret < 0) { + trace_array_put(tr); kfree(dir); + } filp->private_data = dir; @@ -1047,6 +1120,8 @@ static int subsystem_release(struct inode *inode, struct file *file) { struct ftrace_subsystem_dir *dir = file->private_data; + trace_array_put(dir->tr); + /* * If dir->subsystem is NULL, then this is a temporary * descriptor that was made for a trace_array to enable @@ -1174,9 +1249,10 @@ static const struct file_operations ftrace_set_event_fops = { }; static const struct file_operations ftrace_enable_fops = { - .open = tracing_open_generic, + .open = tracing_open_generic_file, .read = event_enable_read, .write = event_enable_write, + .release = tracing_release_generic_file, .llseek = default_llseek, }; @@ -1279,7 +1355,15 @@ create_new_subsystem(const char *name) return NULL; system->ref_count = 1; - system->name = name; + + /* Only allocate if dynamic (kprobes and modules) */ + if (!core_kernel_data((unsigned long)name)) { + system->ref_count |= SYSTEM_FL_FREE_NAME; + system->name = kstrdup(name, GFP_KERNEL); + if (!system->name) + goto out_free; + } else + system->name = name; system->filter = NULL; @@ -1292,6 +1376,8 @@ create_new_subsystem(const char *name) return system; out_free: + if (system->ref_count & SYSTEM_FL_FREE_NAME) + kfree(system->name); kfree(system); return NULL; } @@ -1591,6 +1677,7 @@ static void __add_event_to_tracers(struct ftrace_event_call *call, int trace_add_event_call(struct ftrace_event_call *call) { int ret; + mutex_lock(&trace_types_lock); mutex_lock(&event_mutex); ret = __register_event(call, NULL); @@ -1598,11 +1685,13 @@ int trace_add_event_call(struct ftrace_event_call *call) __add_event_to_tracers(call, NULL); mutex_unlock(&event_mutex); + mutex_unlock(&trace_types_lock); return ret; } /* - * Must be called under locking both of event_mutex and trace_event_sem. + * Must be called under locking of trace_types_lock, event_mutex and + * trace_event_sem. */ static void __trace_remove_event_call(struct ftrace_event_call *call) { @@ -1614,11 +1703,13 @@ static void __trace_remove_event_call(struct ftrace_event_call *call) /* Remove an event_call */ void trace_remove_event_call(struct ftrace_event_call *call) { + mutex_lock(&trace_types_lock); mutex_lock(&event_mutex); down_write(&trace_event_sem); __trace_remove_event_call(call); up_write(&trace_event_sem); mutex_unlock(&event_mutex); + mutex_unlock(&trace_types_lock); } #define for_each_event(event, start, end) \ @@ -1762,6 +1853,7 @@ static int trace_module_notify(struct notifier_block *self, { struct module *mod = data; + mutex_lock(&trace_types_lock); mutex_lock(&event_mutex); switch (val) { case MODULE_STATE_COMING: @@ -1772,6 +1864,7 @@ static int trace_module_notify(struct notifier_block *self, break; } mutex_unlock(&event_mutex); + mutex_unlock(&trace_types_lock); return 0; } @@ -2011,10 +2104,7 @@ event_enable_func(struct ftrace_hash *hash, int ret; /* hash funcs only work with set_ftrace_filter */ - if (!enabled) - return -EINVAL; - - if (!param) + if (!enabled || !param) return -EINVAL; system = strsep(¶m, ":"); @@ -2329,11 +2419,11 @@ early_event_add_tracer(struct dentry *parent, struct trace_array *tr) int event_trace_del_tracer(struct trace_array *tr) { - /* Disable any running events */ - __ftrace_set_clr_event(tr, NULL, NULL, NULL, 0); - mutex_lock(&event_mutex); + /* Disable any running events */ + __ftrace_set_clr_event_nolock(tr, NULL, NULL, NULL, 0); + down_write(&trace_event_sem); __trace_remove_event_dirs(tr); debugfs_remove_recursive(tr->event_dir); diff --git a/kernel/trace/trace_events_filter.c b/kernel/trace/trace_events_filter.c index e1b653f..0d883dc 100644 --- a/kernel/trace/trace_events_filter.c +++ b/kernel/trace/trace_events_filter.c @@ -44,6 +44,7 @@ enum filter_op_ids OP_LE, OP_GT, OP_GE, + OP_BAND, OP_NONE, OP_OPEN_PAREN, }; @@ -54,6 +55,7 @@ struct filter_op { int precedence; }; +/* Order must be the same as enum filter_op_ids above */ static struct filter_op filter_ops[] = { { OP_OR, "||", 1 }, { OP_AND, "&&", 2 }, @@ -64,6 +66,7 @@ static struct filter_op filter_ops[] = { { OP_LE, "<=", 5 }, { OP_GT, ">", 5 }, { OP_GE, ">=", 5 }, + { OP_BAND, "&", 6 }, { OP_NONE, "OP_NONE", 0 }, { OP_OPEN_PAREN, "(", 0 }, }; @@ -156,6 +159,9 @@ static int filter_pred_##type(struct filter_pred *pred, void *event) \ case OP_GE: \ match = (*addr >= val); \ break; \ + case OP_BAND: \ + match = (*addr & val); \ + break; \ default: \ break; \ } \ diff --git a/kernel/trace/trace_functions.c b/kernel/trace/trace_functions.c index c4d6d71..b863f93 100644 --- a/kernel/trace/trace_functions.c +++ b/kernel/trace/trace_functions.c @@ -290,6 +290,21 @@ ftrace_stacktrace_count(unsigned long ip, unsigned long parent_ip, void **data) trace_dump_stack(STACK_SKIP); } +static void +ftrace_dump_probe(unsigned long ip, unsigned long parent_ip, void **data) +{ + if (update_count(data)) + ftrace_dump(DUMP_ALL); +} + +/* Only dump the current CPU buffer. */ +static void +ftrace_cpudump_probe(unsigned long ip, unsigned long parent_ip, void **data) +{ + if (update_count(data)) + ftrace_dump(DUMP_ORIG); +} + static int ftrace_probe_print(const char *name, struct seq_file *m, unsigned long ip, void *data) @@ -327,6 +342,20 @@ ftrace_stacktrace_print(struct seq_file *m, unsigned long ip, return ftrace_probe_print("stacktrace", m, ip, data); } +static int +ftrace_dump_print(struct seq_file *m, unsigned long ip, + struct ftrace_probe_ops *ops, void *data) +{ + return ftrace_probe_print("dump", m, ip, data); +} + +static int +ftrace_cpudump_print(struct seq_file *m, unsigned long ip, + struct ftrace_probe_ops *ops, void *data) +{ + return ftrace_probe_print("cpudump", m, ip, data); +} + static struct ftrace_probe_ops traceon_count_probe_ops = { .func = ftrace_traceon_count, .print = ftrace_traceon_print, @@ -342,6 +371,16 @@ static struct ftrace_probe_ops stacktrace_count_probe_ops = { .print = ftrace_stacktrace_print, }; +static struct ftrace_probe_ops dump_probe_ops = { + .func = ftrace_dump_probe, + .print = ftrace_dump_print, +}; + +static struct ftrace_probe_ops cpudump_probe_ops = { + .func = ftrace_cpudump_probe, + .print = ftrace_cpudump_print, +}; + static struct ftrace_probe_ops traceon_probe_ops = { .func = ftrace_traceon, .print = ftrace_traceon_print, @@ -425,6 +464,32 @@ ftrace_stacktrace_callback(struct ftrace_hash *hash, param, enable); } +static int +ftrace_dump_callback(struct ftrace_hash *hash, + char *glob, char *cmd, char *param, int enable) +{ + struct ftrace_probe_ops *ops; + + ops = &dump_probe_ops; + + /* Only dump once. */ + return ftrace_trace_probe_callback(ops, hash, glob, cmd, + "1", enable); +} + +static int +ftrace_cpudump_callback(struct ftrace_hash *hash, + char *glob, char *cmd, char *param, int enable) +{ + struct ftrace_probe_ops *ops; + + ops = &cpudump_probe_ops; + + /* Only dump once. */ + return ftrace_trace_probe_callback(ops, hash, glob, cmd, + "1", enable); +} + static struct ftrace_func_command ftrace_traceon_cmd = { .name = "traceon", .func = ftrace_trace_onoff_callback, @@ -440,6 +505,16 @@ static struct ftrace_func_command ftrace_stacktrace_cmd = { .func = ftrace_stacktrace_callback, }; +static struct ftrace_func_command ftrace_dump_cmd = { + .name = "dump", + .func = ftrace_dump_callback, +}; + +static struct ftrace_func_command ftrace_cpudump_cmd = { + .name = "cpudump", + .func = ftrace_cpudump_callback, +}; + static int __init init_func_cmd_traceon(void) { int ret; @@ -450,13 +525,31 @@ static int __init init_func_cmd_traceon(void) ret = register_ftrace_command(&ftrace_traceon_cmd); if (ret) - unregister_ftrace_command(&ftrace_traceoff_cmd); + goto out_free_traceoff; ret = register_ftrace_command(&ftrace_stacktrace_cmd); - if (ret) { - unregister_ftrace_command(&ftrace_traceoff_cmd); - unregister_ftrace_command(&ftrace_traceon_cmd); - } + if (ret) + goto out_free_traceon; + + ret = register_ftrace_command(&ftrace_dump_cmd); + if (ret) + goto out_free_stacktrace; + + ret = register_ftrace_command(&ftrace_cpudump_cmd); + if (ret) + goto out_free_dump; + + return 0; + + out_free_dump: + unregister_ftrace_command(&ftrace_dump_cmd); + out_free_stacktrace: + unregister_ftrace_command(&ftrace_stacktrace_cmd); + out_free_traceon: + unregister_ftrace_command(&ftrace_traceon_cmd); + out_free_traceoff: + unregister_ftrace_command(&ftrace_traceoff_cmd); + return ret; } #else diff --git a/kernel/trace/trace_irqsoff.c b/kernel/trace/trace_irqsoff.c index b19d065..2aefbee 100644 --- a/kernel/trace/trace_irqsoff.c +++ b/kernel/trace/trace_irqsoff.c @@ -373,7 +373,7 @@ start_critical_timing(unsigned long ip, unsigned long parent_ip) struct trace_array_cpu *data; unsigned long flags; - if (likely(!tracer_enabled)) + if (!tracer_enabled || !tracing_is_enabled()) return; cpu = raw_smp_processor_id(); @@ -416,7 +416,7 @@ stop_critical_timing(unsigned long ip, unsigned long parent_ip) else return; - if (!tracer_enabled) + if (!tracer_enabled || !tracing_is_enabled()) return; data = per_cpu_ptr(tr->trace_buffer.data, cpu); diff --git a/kernel/trace/trace_kprobe.c b/kernel/trace/trace_kprobe.c index 9f46e98..7ed6976 100644 --- a/kernel/trace/trace_kprobe.c +++ b/kernel/trace/trace_kprobe.c @@ -35,12 +35,17 @@ struct trace_probe { const char *symbol; /* symbol name */ struct ftrace_event_class class; struct ftrace_event_call call; - struct ftrace_event_file * __rcu *files; + struct list_head files; ssize_t size; /* trace entry size */ unsigned int nr_args; struct probe_arg args[]; }; +struct event_file_link { + struct ftrace_event_file *file; + struct list_head list; +}; + #define SIZEOF_TRACE_PROBE(n) \ (offsetof(struct trace_probe, args) + \ (sizeof(struct probe_arg) * (n))) @@ -150,6 +155,7 @@ static struct trace_probe *alloc_trace_probe(const char *group, goto error; INIT_LIST_HEAD(&tp->list); + INIT_LIST_HEAD(&tp->files); return tp; error: kfree(tp->call.name); @@ -183,25 +189,6 @@ static struct trace_probe *find_trace_probe(const char *event, return NULL; } -static int trace_probe_nr_files(struct trace_probe *tp) -{ - struct ftrace_event_file **file; - int ret = 0; - - /* - * Since all tp->files updater is protected by probe_enable_lock, - * we don't need to lock an rcu_read_lock. - */ - file = rcu_dereference_raw(tp->files); - if (file) - while (*(file++)) - ret++; - - return ret; -} - -static DEFINE_MUTEX(probe_enable_lock); - /* * Enable trace_probe * if the file is NULL, enable "perf" handler, or enable "trace" handler. @@ -211,67 +198,42 @@ enable_trace_probe(struct trace_probe *tp, struct ftrace_event_file *file) { int ret = 0; - mutex_lock(&probe_enable_lock); - if (file) { - struct ftrace_event_file **new, **old; - int n = trace_probe_nr_files(tp); - - old = rcu_dereference_raw(tp->files); - /* 1 is for new one and 1 is for stopper */ - new = kzalloc((n + 2) * sizeof(struct ftrace_event_file *), - GFP_KERNEL); - if (!new) { + struct event_file_link *link; + + link = kmalloc(sizeof(*link), GFP_KERNEL); + if (!link) { ret = -ENOMEM; - goto out_unlock; + goto out; } - memcpy(new, old, n * sizeof(struct ftrace_event_file *)); - new[n] = file; - /* The last one keeps a NULL */ - rcu_assign_pointer(tp->files, new); - tp->flags |= TP_FLAG_TRACE; + link->file = file; + list_add_tail_rcu(&link->list, &tp->files); - if (old) { - /* Make sure the probe is done with old files */ - synchronize_sched(); - kfree(old); - } + tp->flags |= TP_FLAG_TRACE; } else tp->flags |= TP_FLAG_PROFILE; - if (trace_probe_is_enabled(tp) && trace_probe_is_registered(tp) && - !trace_probe_has_gone(tp)) { + if (trace_probe_is_registered(tp) && !trace_probe_has_gone(tp)) { if (trace_probe_is_return(tp)) ret = enable_kretprobe(&tp->rp); else ret = enable_kprobe(&tp->rp.kp); } - - out_unlock: - mutex_unlock(&probe_enable_lock); - + out: return ret; } -static int -trace_probe_file_index(struct trace_probe *tp, struct ftrace_event_file *file) +static struct event_file_link * +find_event_file_link(struct trace_probe *tp, struct ftrace_event_file *file) { - struct ftrace_event_file **files; - int i; + struct event_file_link *link; - /* - * Since all tp->files updater is protected by probe_enable_lock, - * we don't need to lock an rcu_read_lock. - */ - files = rcu_dereference_raw(tp->files); - if (files) { - for (i = 0; files[i]; i++) - if (files[i] == file) - return i; - } + list_for_each_entry(link, &tp->files, list) + if (link->file == file) + return link; - return -1; + return NULL; } /* @@ -283,41 +245,24 @@ disable_trace_probe(struct trace_probe *tp, struct ftrace_event_file *file) { int ret = 0; - mutex_lock(&probe_enable_lock); - if (file) { - struct ftrace_event_file **new, **old; - int n = trace_probe_nr_files(tp); - int i, j; + struct event_file_link *link; - old = rcu_dereference_raw(tp->files); - if (n == 0 || trace_probe_file_index(tp, file) < 0) { + link = find_event_file_link(tp, file); + if (!link) { ret = -EINVAL; - goto out_unlock; + goto out; } - if (n == 1) { /* Remove the last file */ - tp->flags &= ~TP_FLAG_TRACE; - new = NULL; - } else { - new = kzalloc(n * sizeof(struct ftrace_event_file *), - GFP_KERNEL); - if (!new) { - ret = -ENOMEM; - goto out_unlock; - } - - /* This copy & check loop copies the NULL stopper too */ - for (i = 0, j = 0; j < n && i < n + 1; i++) - if (old[i] != file) - new[j++] = old[i]; - } + list_del_rcu(&link->list); + /* synchronize with kprobe_trace_func/kretprobe_trace_func */ + synchronize_sched(); + kfree(link); - rcu_assign_pointer(tp->files, new); + if (!list_empty(&tp->files)) + goto out; - /* Make sure the probe is done with old files */ - synchronize_sched(); - kfree(old); + tp->flags &= ~TP_FLAG_TRACE; } else tp->flags &= ~TP_FLAG_PROFILE; @@ -327,10 +272,7 @@ disable_trace_probe(struct trace_probe *tp, struct ftrace_event_file *file) else disable_kprobe(&tp->rp.kp); } - - out_unlock: - mutex_unlock(&probe_enable_lock); - + out: return ret; } @@ -885,20 +827,10 @@ __kprobe_trace_func(struct trace_probe *tp, struct pt_regs *regs, static __kprobes void kprobe_trace_func(struct trace_probe *tp, struct pt_regs *regs) { - /* - * Note: preempt is already disabled around the kprobe handler. - * However, we still need an smp_read_barrier_depends() corresponding - * to smp_wmb() in rcu_assign_pointer() to access the pointer. - */ - struct ftrace_event_file **file = rcu_dereference_raw(tp->files); - - if (unlikely(!file)) - return; + struct event_file_link *link; - while (*file) { - __kprobe_trace_func(tp, regs, *file); - file++; - } + list_for_each_entry_rcu(link, &tp->files, list) + __kprobe_trace_func(tp, regs, link->file); } /* Kretprobe handler */ @@ -945,20 +877,10 @@ static __kprobes void kretprobe_trace_func(struct trace_probe *tp, struct kretprobe_instance *ri, struct pt_regs *regs) { - /* - * Note: preempt is already disabled around the kprobe handler. - * However, we still need an smp_read_barrier_depends() corresponding - * to smp_wmb() in rcu_assign_pointer() to access the pointer. - */ - struct ftrace_event_file **file = rcu_dereference_raw(tp->files); + struct event_file_link *link; - if (unlikely(!file)) - return; - - while (*file) { - __kretprobe_trace_func(tp, ri, regs, *file); - file++; - } + list_for_each_entry_rcu(link, &tp->files, list) + __kretprobe_trace_func(tp, ri, regs, link->file); } /* Event entry printers */ @@ -1157,6 +1079,10 @@ kprobe_perf_func(struct trace_probe *tp, struct pt_regs *regs) int size, __size, dsize; int rctx; + head = this_cpu_ptr(call->perf_events); + if (hlist_empty(head)) + return; + dsize = __get_data_size(tp, regs); __size = sizeof(*entry) + tp->size + dsize; size = ALIGN(__size + sizeof(u32), sizeof(u64)); @@ -1172,10 +1098,7 @@ kprobe_perf_func(struct trace_probe *tp, struct pt_regs *regs) entry->ip = (unsigned long)tp->rp.kp.addr; memset(&entry[1], 0, dsize); store_trace_args(sizeof(*entry), tp, regs, (u8 *)&entry[1], dsize); - - head = this_cpu_ptr(call->perf_events); - perf_trace_buf_submit(entry, size, rctx, - entry->ip, 1, regs, head, NULL); + perf_trace_buf_submit(entry, size, rctx, 0, 1, regs, head, NULL); } /* Kretprobe profile handler */ @@ -1189,6 +1112,10 @@ kretprobe_perf_func(struct trace_probe *tp, struct kretprobe_instance *ri, int size, __size, dsize; int rctx; + head = this_cpu_ptr(call->perf_events); + if (hlist_empty(head)) + return; + dsize = __get_data_size(tp, regs); __size = sizeof(*entry) + tp->size + dsize; size = ALIGN(__size + sizeof(u32), sizeof(u64)); @@ -1204,13 +1131,16 @@ kretprobe_perf_func(struct trace_probe *tp, struct kretprobe_instance *ri, entry->func = (unsigned long)tp->rp.kp.addr; entry->ret_ip = (unsigned long)ri->ret_addr; store_trace_args(sizeof(*entry), tp, regs, (u8 *)&entry[1], dsize); - - head = this_cpu_ptr(call->perf_events); - perf_trace_buf_submit(entry, size, rctx, - entry->ret_ip, 1, regs, head, NULL); + perf_trace_buf_submit(entry, size, rctx, 0, 1, regs, head, NULL); } #endif /* CONFIG_PERF_EVENTS */ +/* + * called by perf_trace_init() or __ftrace_set_clr_event() under event_mutex. + * + * kprobe_trace_self_tests_init() does enable_trace_probe/disable_trace_probe + * lockless, but we can't race with this __init function. + */ static __kprobes int kprobe_register(struct ftrace_event_call *event, enum trace_reg type, void *data) @@ -1376,6 +1306,10 @@ find_trace_probe_file(struct trace_probe *tp, struct trace_array *tr) return NULL; } +/* + * Nobody but us can call enable_trace_probe/disable_trace_probe at this + * stage, we can do this lockless. + */ static __init int kprobe_trace_self_tests_init(void) { int ret, warn = 0; diff --git a/kernel/trace/trace_selftest.c b/kernel/trace/trace_selftest.c index 2901e3b..a7329b7 100644 --- a/kernel/trace/trace_selftest.c +++ b/kernel/trace/trace_selftest.c @@ -640,13 +640,20 @@ out: * Enable ftrace, sleep 1/10 second, and then read the trace * buffer to see if all is in order. */ -int +__init int trace_selftest_startup_function(struct tracer *trace, struct trace_array *tr) { int save_ftrace_enabled = ftrace_enabled; unsigned long count; int ret; +#ifdef CONFIG_DYNAMIC_FTRACE + if (ftrace_filter_param) { + printk(KERN_CONT " ... kernel command line filter set: force PASS ... "); + return 0; + } +#endif + /* make sure msleep has been recorded */ msleep(1); @@ -727,13 +734,20 @@ static int trace_graph_entry_watchdog(struct ftrace_graph_ent *trace) * Pretty much the same than for the function tracer from which the selftest * has been borrowed. */ -int +__init int trace_selftest_startup_function_graph(struct tracer *trace, struct trace_array *tr) { int ret; unsigned long count; +#ifdef CONFIG_DYNAMIC_FTRACE + if (ftrace_filter_param) { + printk(KERN_CONT " ... kernel command line filter set: force PASS ... "); + return 0; + } +#endif + /* * Simulate the init() callback but we attach a watchdog callback * to detect and recover from possible hangs diff --git a/kernel/trace/trace_syscalls.c b/kernel/trace/trace_syscalls.c index 8f2ac73..322e164 100644 --- a/kernel/trace/trace_syscalls.c +++ b/kernel/trace/trace_syscalls.c @@ -306,6 +306,8 @@ static void ftrace_syscall_enter(void *data, struct pt_regs *regs, long id) struct syscall_metadata *sys_data; struct ring_buffer_event *event; struct ring_buffer *buffer; + unsigned long irq_flags; + int pc; int syscall_nr; int size; @@ -321,9 +323,12 @@ static void ftrace_syscall_enter(void *data, struct pt_regs *regs, long id) size = sizeof(*entry) + sizeof(unsigned long) * sys_data->nb_args; + local_save_flags(irq_flags); + pc = preempt_count(); + buffer = tr->trace_buffer.buffer; event = trace_buffer_lock_reserve(buffer, - sys_data->enter_event->event.type, size, 0, 0); + sys_data->enter_event->event.type, size, irq_flags, pc); if (!event) return; @@ -333,7 +338,8 @@ static void ftrace_syscall_enter(void *data, struct pt_regs *regs, long id) if (!filter_current_check_discard(buffer, sys_data->enter_event, entry, event)) - trace_current_buffer_unlock_commit(buffer, event, 0, 0); + trace_current_buffer_unlock_commit(buffer, event, + irq_flags, pc); } static void ftrace_syscall_exit(void *data, struct pt_regs *regs, long ret) @@ -343,6 +349,8 @@ static void ftrace_syscall_exit(void *data, struct pt_regs *regs, long ret) struct syscall_metadata *sys_data; struct ring_buffer_event *event; struct ring_buffer *buffer; + unsigned long irq_flags; + int pc; int syscall_nr; syscall_nr = trace_get_syscall_nr(current, regs); @@ -355,9 +363,13 @@ static void ftrace_syscall_exit(void *data, struct pt_regs *regs, long ret) if (!sys_data) return; + local_save_flags(irq_flags); + pc = preempt_count(); + buffer = tr->trace_buffer.buffer; event = trace_buffer_lock_reserve(buffer, - sys_data->exit_event->event.type, sizeof(*entry), 0, 0); + sys_data->exit_event->event.type, sizeof(*entry), + irq_flags, pc); if (!event) return; @@ -367,7 +379,8 @@ static void ftrace_syscall_exit(void *data, struct pt_regs *regs, long ret) if (!filter_current_check_discard(buffer, sys_data->exit_event, entry, event)) - trace_current_buffer_unlock_commit(buffer, event, 0, 0); + trace_current_buffer_unlock_commit(buffer, event, + irq_flags, pc); } static int reg_event_syscall_enter(struct ftrace_event_file *file, diff --git a/kernel/trace/trace_uprobe.c b/kernel/trace/trace_uprobe.c index 32494fb0..d5d0cd3 100644 --- a/kernel/trace/trace_uprobe.c +++ b/kernel/trace/trace_uprobe.c @@ -283,8 +283,10 @@ static int create_trace_uprobe(int argc, char **argv) return -EINVAL; } arg = strchr(argv[1], ':'); - if (!arg) + if (!arg) { + ret = -EINVAL; goto fail_address_parse; + } *arg++ = '\0'; filename = argv[1]; diff --git a/net/9p/client.c b/net/9p/client.c index 01f1779..8b93cae 100644 --- a/net/9p/client.c +++ b/net/9p/client.c @@ -204,6 +204,17 @@ free_and_return: return ret; } +struct p9_fcall *p9_fcall_alloc(int alloc_msize) +{ + struct p9_fcall *fc; + fc = kmalloc(sizeof(struct p9_fcall) + alloc_msize, GFP_NOFS); + if (!fc) + return NULL; + fc->capacity = alloc_msize; + fc->sdata = (char *) fc + sizeof(struct p9_fcall); + return fc; +} + /** * p9_tag_alloc - lookup/allocate a request by tag * @c: client session to lookup tag within @@ -256,39 +267,36 @@ p9_tag_alloc(struct p9_client *c, u16 tag, unsigned int max_size) col = tag % P9_ROW_MAXTAG; req = &c->reqs[row][col]; - if (!req->tc) { + if (!req->wq) { req->wq = kmalloc(sizeof(wait_queue_head_t), GFP_NOFS); - if (!req->wq) { - pr_err("Couldn't grow tag array\n"); - return ERR_PTR(-ENOMEM); - } + if (!req->wq) + goto grow_failed; init_waitqueue_head(req->wq); - req->tc = kmalloc(sizeof(struct p9_fcall) + alloc_msize, - GFP_NOFS); - req->rc = kmalloc(sizeof(struct p9_fcall) + alloc_msize, - GFP_NOFS); - if ((!req->tc) || (!req->rc)) { - pr_err("Couldn't grow tag array\n"); - kfree(req->tc); - kfree(req->rc); - kfree(req->wq); - req->tc = req->rc = NULL; - req->wq = NULL; - return ERR_PTR(-ENOMEM); - } - req->tc->capacity = alloc_msize; - req->rc->capacity = alloc_msize; - req->tc->sdata = (char *) req->tc + sizeof(struct p9_fcall); - req->rc->sdata = (char *) req->rc + sizeof(struct p9_fcall); } + if (!req->tc) + req->tc = p9_fcall_alloc(alloc_msize); + if (!req->rc) + req->rc = p9_fcall_alloc(alloc_msize); + if (!req->tc || !req->rc) + goto grow_failed; + p9pdu_reset(req->tc); p9pdu_reset(req->rc); req->tc->tag = tag-1; req->status = REQ_STATUS_ALLOC; - return &c->reqs[row][col]; + return req; + +grow_failed: + pr_err("Couldn't grow tag array\n"); + kfree(req->tc); + kfree(req->rc); + kfree(req->wq); + req->tc = req->rc = NULL; + req->wq = NULL; + return ERR_PTR(-ENOMEM); } /** @@ -648,12 +656,20 @@ static int p9_client_flush(struct p9_client *c, struct p9_req_t *oldreq) return PTR_ERR(req); - /* if we haven't received a response for oldreq, - remove it from the list. */ + /* + * if we haven't received a response for oldreq, + * remove it from the list, and notify the transport + * layer that the reply will never arrive. + */ spin_lock(&c->lock); - if (oldreq->status == REQ_STATUS_FLSH) + if (oldreq->status == REQ_STATUS_FLSH) { list_del(&oldreq->req_list); - spin_unlock(&c->lock); + spin_unlock(&c->lock); + if (c->trans_mod->cancelled) + c->trans_mod->cancelled(c, req); + } else { + spin_unlock(&c->lock); + } p9_free_req(c, req); return 0; diff --git a/net/9p/trans_fd.c b/net/9p/trans_fd.c index 02efb25..3ffda1b 100644 --- a/net/9p/trans_fd.c +++ b/net/9p/trans_fd.c @@ -63,6 +63,7 @@ struct p9_fd_opts { int rfd; int wfd; u16 port; + int privport; }; /** @@ -87,12 +88,15 @@ struct p9_trans_fd { enum { /* Options that take integer arguments */ Opt_port, Opt_rfdno, Opt_wfdno, Opt_err, + /* Options that take no arguments */ + Opt_privport, }; static const match_table_t tokens = { {Opt_port, "port=%u"}, {Opt_rfdno, "rfdno=%u"}, {Opt_wfdno, "wfdno=%u"}, + {Opt_privport, "privport"}, {Opt_err, NULL}, }; @@ -161,6 +165,9 @@ static DEFINE_SPINLOCK(p9_poll_lock); static LIST_HEAD(p9_poll_pending_list); static DECLARE_WORK(p9_poll_work, p9_poll_workfn); +static unsigned int p9_ipport_resv_min = P9_DEF_MIN_RESVPORT; +static unsigned int p9_ipport_resv_max = P9_DEF_MAX_RESVPORT; + static void p9_mux_poll_stop(struct p9_conn *m) { unsigned long flags; @@ -741,7 +748,7 @@ static int parse_opts(char *params, struct p9_fd_opts *opts) if (!*p) continue; token = match_token(p, tokens, args); - if (token != Opt_err) { + if ((token != Opt_err) && (token != Opt_privport)) { r = match_int(&args[0], &option); if (r < 0) { p9_debug(P9_DEBUG_ERROR, @@ -759,6 +766,9 @@ static int parse_opts(char *params, struct p9_fd_opts *opts) case Opt_wfdno: opts->wfd = option; break; + case Opt_privport: + opts->privport = 1; + break; default: continue; } @@ -898,6 +908,24 @@ static inline int valid_ipaddr4(const char *buf) return 0; } +static int p9_bind_privport(struct socket *sock) +{ + struct sockaddr_in cl; + int port, err = -EINVAL; + + memset(&cl, 0, sizeof(cl)); + cl.sin_family = AF_INET; + cl.sin_addr.s_addr = INADDR_ANY; + for (port = p9_ipport_resv_max; port >= p9_ipport_resv_min; port--) { + cl.sin_port = htons((ushort)port); + err = kernel_bind(sock, (struct sockaddr *)&cl, sizeof(cl)); + if (err != -EADDRINUSE) + break; + } + return err; +} + + static int p9_fd_create_tcp(struct p9_client *client, const char *addr, char *args) { @@ -926,6 +954,16 @@ p9_fd_create_tcp(struct p9_client *client, const char *addr, char *args) return err; } + if (opts.privport) { + err = p9_bind_privport(csocket); + if (err < 0) { + pr_err("%s (%d): problem binding to privport\n", + __func__, task_pid_nr(current)); + sock_release(csocket); + return err; + } + } + err = csocket->ops->connect(csocket, (struct sockaddr *)&sin_server, sizeof(struct sockaddr_in), 0); diff --git a/net/9p/trans_rdma.c b/net/9p/trans_rdma.c index 2c69ddd..928f2bb 100644 --- a/net/9p/trans_rdma.c +++ b/net/9p/trans_rdma.c @@ -57,9 +57,7 @@ #define P9_RDMA_IRD 0 #define P9_RDMA_ORD 0 #define P9_RDMA_TIMEOUT 30000 /* 30 seconds */ -#define P9_RDMA_MAXSIZE (4*4096) /* Min SGE is 4, so we can - * safely advertise a maxsize - * of 64k */ +#define P9_RDMA_MAXSIZE (1024*1024) /* 1MB */ /** * struct p9_trans_rdma - RDMA transport instance @@ -75,7 +73,9 @@ * @sq_depth: The depth of the Send Queue * @sq_sem: Semaphore for the SQ * @rq_depth: The depth of the Receive Queue. - * @rq_count: Count of requests in the Receive Queue. + * @rq_sem: Semaphore for the RQ + * @excess_rc : Amount of posted Receive Contexts without a pending request. + * See rdma_request() * @addr: The remote peer's address * @req_lock: Protects the active request list * @cm_done: Completion event for connection management tracking @@ -100,7 +100,8 @@ struct p9_trans_rdma { int sq_depth; struct semaphore sq_sem; int rq_depth; - atomic_t rq_count; + struct semaphore rq_sem; + atomic_t excess_rc; struct sockaddr_in addr; spinlock_t req_lock; @@ -296,6 +297,13 @@ handle_recv(struct p9_client *client, struct p9_trans_rdma *rdma, if (!req) goto err_out; + /* Check that we have not yet received a reply for this request. + */ + if (unlikely(req->rc)) { + pr_err("Duplicate reply for request %d", tag); + goto err_out; + } + req->rc = c->rc; req->status = REQ_STATUS_RCVD; p9_client_cb(client, req); @@ -336,8 +344,8 @@ static void cq_comp_handler(struct ib_cq *cq, void *cq_context) switch (c->wc_op) { case IB_WC_RECV: - atomic_dec(&rdma->rq_count); handle_recv(client, rdma, c, wc.status, wc.byte_len); + up(&rdma->rq_sem); break; case IB_WC_SEND: @@ -421,32 +429,33 @@ static int rdma_request(struct p9_client *client, struct p9_req_t *req) struct p9_rdma_context *c = NULL; struct p9_rdma_context *rpl_context = NULL; + /* When an error occurs between posting the recv and the send, + * there will be a receive context posted without a pending request. + * Since there is no way to "un-post" it, we remember it and skip + * post_recv() for the next request. + * So here, + * see if we are this `next request' and need to absorb an excess rc. + * If yes, then drop and free our own, and do not recv_post(). + **/ + if (unlikely(atomic_read(&rdma->excess_rc) > 0)) { + if ((atomic_sub_return(1, &rdma->excess_rc) >= 0)) { + /* Got one ! */ + kfree(req->rc); + req->rc = NULL; + goto dont_need_post_recv; + } else { + /* We raced and lost. */ + atomic_inc(&rdma->excess_rc); + } + } + /* Allocate an fcall for the reply */ rpl_context = kmalloc(sizeof *rpl_context, GFP_NOFS); if (!rpl_context) { err = -ENOMEM; - goto err_close; - } - - /* - * If the request has a buffer, steal it, otherwise - * allocate a new one. Typically, requests should already - * have receive buffers allocated and just swap them around - */ - if (!req->rc) { - req->rc = kmalloc(sizeof(struct p9_fcall)+client->msize, - GFP_NOFS); - if (req->rc) { - req->rc->sdata = (char *) req->rc + - sizeof(struct p9_fcall); - req->rc->capacity = client->msize; - } + goto recv_error; } rpl_context->rc = req->rc; - if (!rpl_context->rc) { - err = -ENOMEM; - goto err_free2; - } /* * Post a receive buffer for this request. We need to ensure @@ -455,29 +464,35 @@ static int rdma_request(struct p9_client *client, struct p9_req_t *req) * outstanding request, so we must keep a count to avoid * overflowing the RQ. */ - if (atomic_inc_return(&rdma->rq_count) <= rdma->rq_depth) { - err = post_recv(client, rpl_context); - if (err) - goto err_free1; - } else - atomic_dec(&rdma->rq_count); + if (down_interruptible(&rdma->rq_sem)) { + err = -EINTR; + goto recv_error; + } + err = post_recv(client, rpl_context); + if (err) { + p9_debug(P9_DEBUG_FCALL, "POST RECV failed\n"); + goto recv_error; + } /* remove posted receive buffer from request structure */ req->rc = NULL; +dont_need_post_recv: /* Post the request */ c = kmalloc(sizeof *c, GFP_NOFS); if (!c) { err = -ENOMEM; - goto err_free1; + goto send_error; } c->req = req; c->busa = ib_dma_map_single(rdma->cm_id->device, c->req->tc->sdata, c->req->tc->size, DMA_TO_DEVICE); - if (ib_dma_mapping_error(rdma->cm_id->device, c->busa)) - goto error; + if (ib_dma_mapping_error(rdma->cm_id->device, c->busa)) { + err = -EIO; + goto send_error; + } sge.addr = c->busa; sge.length = c->req->tc->size; @@ -491,22 +506,32 @@ static int rdma_request(struct p9_client *client, struct p9_req_t *req) wr.sg_list = &sge; wr.num_sge = 1; - if (down_interruptible(&rdma->sq_sem)) - goto error; + if (down_interruptible(&rdma->sq_sem)) { + err = -EINTR; + goto send_error; + } - return ib_post_send(rdma->qp, &wr, &bad_wr); + err = ib_post_send(rdma->qp, &wr, &bad_wr); + if (err) + goto send_error; - error: + /* Success */ + return 0; + + /* Handle errors that happened during or while preparing the send: */ + send_error: kfree(c); - kfree(rpl_context->rc); - kfree(rpl_context); - p9_debug(P9_DEBUG_ERROR, "EIO\n"); - return -EIO; - err_free1: - kfree(rpl_context->rc); - err_free2: + p9_debug(P9_DEBUG_ERROR, "Error %d in rdma_request()\n", err); + + /* Ach. + * We did recv_post(), but not send. We have one recv_post in excess. + */ + atomic_inc(&rdma->excess_rc); + return err; + + /* Handle errors that happened during or while preparing post_recv(): */ + recv_error: kfree(rpl_context); - err_close: spin_lock_irqsave(&rdma->req_lock, flags); if (rdma->state < P9_RDMA_CLOSING) { rdma->state = P9_RDMA_CLOSING; @@ -551,7 +576,8 @@ static struct p9_trans_rdma *alloc_rdma(struct p9_rdma_opts *opts) spin_lock_init(&rdma->req_lock); init_completion(&rdma->cm_done); sema_init(&rdma->sq_sem, rdma->sq_depth); - atomic_set(&rdma->rq_count, 0); + sema_init(&rdma->rq_sem, rdma->rq_depth); + atomic_set(&rdma->excess_rc, 0); return rdma; } @@ -562,6 +588,17 @@ static int rdma_cancel(struct p9_client *client, struct p9_req_t *req) return 1; } +/* A request has been fully flushed without a reply. + * That means we have posted one buffer in excess. + */ +static int rdma_cancelled(struct p9_client *client, struct p9_req_t *req) +{ + struct p9_trans_rdma *rdma = client->trans; + + atomic_inc(&rdma->excess_rc); + return 0; +} + /** * trans_create_rdma - Transport method for creating atransport instance * @client: client instance diff --git a/net/sunrpc/auth_gss/gss_mech_switch.c b/net/sunrpc/auth_gss/gss_mech_switch.c index defa9d3..27ce262 100644 --- a/net/sunrpc/auth_gss/gss_mech_switch.c +++ b/net/sunrpc/auth_gss/gss_mech_switch.c @@ -139,11 +139,12 @@ void gss_mech_unregister(struct gss_api_mech *gm) } EXPORT_SYMBOL_GPL(gss_mech_unregister); -static struct gss_api_mech *gss_mech_get(struct gss_api_mech *gm) +struct gss_api_mech *gss_mech_get(struct gss_api_mech *gm) { __module_get(gm->gm_owner); return gm; } +EXPORT_SYMBOL(gss_mech_get); static struct gss_api_mech * _gss_mech_get_by_name(const char *name) @@ -360,6 +361,7 @@ gss_pseudoflavor_to_service(struct gss_api_mech *gm, u32 pseudoflavor) } return 0; } +EXPORT_SYMBOL(gss_pseudoflavor_to_service); char * gss_service_to_auth_domain_name(struct gss_api_mech *gm, u32 service) @@ -379,6 +381,7 @@ gss_mech_put(struct gss_api_mech * gm) if (gm) module_put(gm->gm_owner); } +EXPORT_SYMBOL(gss_mech_put); /* The mech could probably be determined from the token instead, but it's just * as easy for now to pass it in. */ diff --git a/net/sunrpc/auth_gss/svcauth_gss.c b/net/sunrpc/auth_gss/svcauth_gss.c index b05ace4..d0347d1 100644 --- a/net/sunrpc/auth_gss/svcauth_gss.c +++ b/net/sunrpc/auth_gss/svcauth_gss.c @@ -377,8 +377,7 @@ rsc_init(struct cache_head *cnew, struct cache_head *ctmp) new->handle.data = tmp->handle.data; tmp->handle.data = NULL; new->mechctx = NULL; - new->cred.cr_group_info = NULL; - new->cred.cr_principal = NULL; + init_svc_cred(&new->cred); } static void @@ -392,9 +391,7 @@ update_rsc(struct cache_head *cnew, struct cache_head *ctmp) memset(&new->seqdata, 0, sizeof(new->seqdata)); spin_lock_init(&new->seqdata.sd_lock); new->cred = tmp->cred; - tmp->cred.cr_group_info = NULL; - new->cred.cr_principal = tmp->cred.cr_principal; - tmp->cred.cr_principal = NULL; + init_svc_cred(&tmp->cred); } static struct cache_head * @@ -487,7 +484,7 @@ static int rsc_parse(struct cache_detail *cd, len = qword_get(&mesg, buf, mlen); if (len < 0) goto out; - gm = gss_mech_get_by_name(buf); + gm = rsci.cred.cr_gss_mech = gss_mech_get_by_name(buf); status = -EOPNOTSUPP; if (!gm) goto out; @@ -517,7 +514,6 @@ static int rsc_parse(struct cache_detail *cd, rscp = rsc_update(cd, &rsci, rscp); status = 0; out: - gss_mech_put(gm); rsc_free(&rsci); if (rscp) cache_put(&rscp->h, cd); diff --git a/net/sunrpc/cache.c b/net/sunrpc/cache.c index 80fe5c8..49eb370 100644 --- a/net/sunrpc/cache.c +++ b/net/sunrpc/cache.c @@ -50,12 +50,6 @@ static void cache_init(struct cache_head *h) h->last_refresh = now; } -static inline int cache_is_expired(struct cache_detail *detail, struct cache_head *h) -{ - return (h->expiry_time < seconds_since_boot()) || - (detail->flush_time > h->last_refresh); -} - struct cache_head *sunrpc_cache_lookup(struct cache_detail *detail, struct cache_head *key, int hash) { @@ -201,7 +195,7 @@ static int cache_make_upcall(struct cache_detail *cd, struct cache_head *h) return sunrpc_cache_pipe_upcall(cd, h); } -static inline int cache_is_valid(struct cache_detail *detail, struct cache_head *h) +static inline int cache_is_valid(struct cache_head *h) { if (!test_bit(CACHE_VALID, &h->flags)) return -EAGAIN; @@ -227,16 +221,15 @@ static int try_to_negate_entry(struct cache_detail *detail, struct cache_head *h int rv; write_lock(&detail->hash_lock); - rv = cache_is_valid(detail, h); - if (rv != -EAGAIN) { - write_unlock(&detail->hash_lock); - return rv; + rv = cache_is_valid(h); + if (rv == -EAGAIN) { + set_bit(CACHE_NEGATIVE, &h->flags); + cache_fresh_locked(h, seconds_since_boot()+CACHE_NEW_EXPIRY); + rv = -ENOENT; } - set_bit(CACHE_NEGATIVE, &h->flags); - cache_fresh_locked(h, seconds_since_boot()+CACHE_NEW_EXPIRY); write_unlock(&detail->hash_lock); cache_fresh_unlocked(h, detail); - return -ENOENT; + return rv; } /* @@ -260,7 +253,7 @@ int cache_check(struct cache_detail *detail, long refresh_age, age; /* First decide return status as best we can */ - rv = cache_is_valid(detail, h); + rv = cache_is_valid(h); /* now see if we want to start an upcall */ refresh_age = (h->expiry_time - h->last_refresh); @@ -269,19 +262,17 @@ int cache_check(struct cache_detail *detail, if (rqstp == NULL) { if (rv == -EAGAIN) rv = -ENOENT; - } else if (rv == -EAGAIN || age > refresh_age/2) { + } else if (rv == -EAGAIN || + (h->expiry_time != 0 && age > refresh_age/2)) { dprintk("RPC: Want update, refage=%ld, age=%ld\n", refresh_age, age); if (!test_and_set_bit(CACHE_PENDING, &h->flags)) { switch (cache_make_upcall(detail, h)) { case -EINVAL: - clear_bit(CACHE_PENDING, &h->flags); - cache_revisit_request(h); rv = try_to_negate_entry(detail, h); break; case -EAGAIN: - clear_bit(CACHE_PENDING, &h->flags); - cache_revisit_request(h); + cache_fresh_unlocked(h, detail); break; } } @@ -293,7 +284,7 @@ int cache_check(struct cache_detail *detail, * Request was not deferred; handle it as best * we can ourselves: */ - rv = cache_is_valid(detail, h); + rv = cache_is_valid(h); if (rv == -EAGAIN) rv = -ETIMEDOUT; } @@ -310,7 +301,7 @@ EXPORT_SYMBOL_GPL(cache_check); * a current pointer into that list and into the table * for that entry. * - * Each time clean_cache is called it finds the next non-empty entry + * Each time cache_clean is called it finds the next non-empty entry * in the current table and walks the list in that entry * looking for entries that can be removed. * @@ -457,9 +448,8 @@ static int cache_clean(void) current_index ++; spin_unlock(&cache_list_lock); if (ch) { - if (test_and_clear_bit(CACHE_PENDING, &ch->flags)) - cache_dequeue(current_detail, ch); - cache_revisit_request(ch); + set_bit(CACHE_CLEANED, &ch->flags); + cache_fresh_unlocked(ch, d); cache_put(ch, d); } } else @@ -1036,23 +1026,32 @@ static int cache_release(struct inode *inode, struct file *filp, static void cache_dequeue(struct cache_detail *detail, struct cache_head *ch) { - struct cache_queue *cq; + struct cache_queue *cq, *tmp; + struct cache_request *cr; + struct list_head dequeued; + + INIT_LIST_HEAD(&dequeued); spin_lock(&queue_lock); - list_for_each_entry(cq, &detail->queue, list) + list_for_each_entry_safe(cq, tmp, &detail->queue, list) if (!cq->reader) { - struct cache_request *cr = container_of(cq, struct cache_request, q); + cr = container_of(cq, struct cache_request, q); if (cr->item != ch) continue; + if (test_bit(CACHE_PENDING, &ch->flags)) + /* Lost a race and it is pending again */ + break; if (cr->readers != 0) continue; - list_del(&cr->q.list); - spin_unlock(&queue_lock); - cache_put(cr->item, detail); - kfree(cr->buf); - kfree(cr); - return; + list_move(&cr->q.list, &dequeued); } spin_unlock(&queue_lock); + while (!list_empty(&dequeued)) { + cr = list_entry(dequeued.next, struct cache_request, q.list); + list_del(&cr->q.list); + cache_put(cr->item, detail); + kfree(cr->buf); + kfree(cr); + } } /* @@ -1166,6 +1165,7 @@ int sunrpc_cache_pipe_upcall(struct cache_detail *detail, struct cache_head *h) char *buf; struct cache_request *crq; + int ret = 0; if (!detail->cache_request) return -EINVAL; @@ -1174,6 +1174,9 @@ int sunrpc_cache_pipe_upcall(struct cache_detail *detail, struct cache_head *h) warn_no_listener(detail); return -EINVAL; } + if (test_bit(CACHE_CLEANED, &h->flags)) + /* Too late to make an upcall */ + return -EAGAIN; buf = kmalloc(PAGE_SIZE, GFP_KERNEL); if (!buf) @@ -1191,10 +1194,18 @@ int sunrpc_cache_pipe_upcall(struct cache_detail *detail, struct cache_head *h) crq->len = 0; crq->readers = 0; spin_lock(&queue_lock); - list_add_tail(&crq->q.list, &detail->queue); + if (test_bit(CACHE_PENDING, &h->flags)) + list_add_tail(&crq->q.list, &detail->queue); + else + /* Lost a race, no longer PENDING, so don't enqueue */ + ret = -EAGAIN; spin_unlock(&queue_lock); wake_up(&queue_wait); - return 0; + if (ret == -EAGAIN) { + kfree(buf); + kfree(crq); + } + return ret; } EXPORT_SYMBOL_GPL(sunrpc_cache_pipe_upcall); diff --git a/net/sunrpc/clnt.c b/net/sunrpc/clnt.c index f0339ae9..aa40156 100644 --- a/net/sunrpc/clnt.c +++ b/net/sunrpc/clnt.c @@ -290,7 +290,7 @@ static int rpc_client_register(const struct rpc_create_args *args, struct rpc_auth *auth; struct net *net = rpc_net_ns(clnt); struct super_block *pipefs_sb; - int err = 0; + int err; pipefs_sb = rpc_get_sb_net(net); if (pipefs_sb) { @@ -299,6 +299,10 @@ static int rpc_client_register(const struct rpc_create_args *args, goto out; } + rpc_register_client(clnt); + if (pipefs_sb) + rpc_put_sb_net(net); + auth = rpcauth_create(args->authflavor, clnt); if (IS_ERR(auth)) { dprintk("RPC: Couldn't create auth handle (flavor %u)\n", @@ -306,16 +310,14 @@ static int rpc_client_register(const struct rpc_create_args *args, err = PTR_ERR(auth); goto err_auth; } - - rpc_register_client(clnt); + return 0; +err_auth: + pipefs_sb = rpc_get_sb_net(net); + __rpc_clnt_remove_pipedir(clnt); out: if (pipefs_sb) rpc_put_sb_net(net); return err; - -err_auth: - __rpc_clnt_remove_pipedir(clnt); - goto out; } static struct rpc_clnt * rpc_new_client(const struct rpc_create_args *args, struct rpc_xprt *xprt) diff --git a/net/sunrpc/rpc_pipe.c b/net/sunrpc/rpc_pipe.c index 4679df5..61239a2 100644 --- a/net/sunrpc/rpc_pipe.c +++ b/net/sunrpc/rpc_pipe.c @@ -480,6 +480,23 @@ static const struct dentry_operations rpc_dentry_operations = { .d_delete = rpc_delete_dentry, }; +/* + * Lookup the data. This is trivial - if the dentry didn't already + * exist, we know it is negative. + */ +static struct dentry * +rpc_lookup(struct inode *dir, struct dentry *dentry, unsigned int flags) +{ + if (dentry->d_name.len > NAME_MAX) + return ERR_PTR(-ENAMETOOLONG); + d_add(dentry, NULL); + return NULL; +} + +static const struct inode_operations rpc_dir_inode_operations = { + .lookup = rpc_lookup, +}; + static struct inode * rpc_get_inode(struct super_block *sb, umode_t mode) { @@ -492,7 +509,7 @@ rpc_get_inode(struct super_block *sb, umode_t mode) switch (mode & S_IFMT) { case S_IFDIR: inode->i_fop = &simple_dir_operations; - inode->i_op = &simple_dir_inode_operations; + inode->i_op = &rpc_dir_inode_operations; inc_nlink(inode); default: break; @@ -666,11 +683,8 @@ static struct dentry *__rpc_lookup_create_exclusive(struct dentry *parent, if (!dentry) return ERR_PTR(-ENOMEM); } - if (dentry->d_inode == NULL) { - if (!dentry->d_op) - d_set_d_op(dentry, &rpc_dentry_operations); + if (dentry->d_inode == NULL) return dentry; - } dput(dentry); return ERR_PTR(-EEXIST); } @@ -1117,6 +1131,7 @@ rpc_fill_super(struct super_block *sb, void *data, int silent) sb->s_blocksize_bits = PAGE_CACHE_SHIFT; sb->s_magic = RPCAUTH_GSSMAGIC; sb->s_op = &s_ops; + sb->s_d_op = &rpc_dentry_operations; sb->s_time_gran = 1; inode = rpc_get_inode(sb, S_IFDIR | S_IRUGO | S_IXUGO); diff --git a/net/sunrpc/svcauth_unix.c b/net/sunrpc/svcauth_unix.c index 06bdf5a..621ca7b 100644 --- a/net/sunrpc/svcauth_unix.c +++ b/net/sunrpc/svcauth_unix.c @@ -347,13 +347,13 @@ ip_map_cached_get(struct svc_xprt *xprt) spin_lock(&xprt->xpt_lock); ipm = xprt->xpt_auth_cache; if (ipm != NULL) { - if (!cache_valid(&ipm->h)) { + sn = net_generic(xprt->xpt_net, sunrpc_net_id); + if (cache_is_expired(sn->ip_map_cache, &ipm->h)) { /* * The entry has been invalidated since it was * remembered, e.g. by a second mount from the * same IP address. */ - sn = net_generic(xprt->xpt_net, sunrpc_net_id); xprt->xpt_auth_cache = NULL; spin_unlock(&xprt->xpt_lock); cache_put(&ipm->h, sn->ip_map_cache); @@ -493,8 +493,6 @@ static int unix_gid_parse(struct cache_detail *cd, if (rv) return -EINVAL; uid = make_kuid(&init_user_ns, id); - if (!uid_valid(uid)) - return -EINVAL; ug.uid = uid; expiry = get_expiry(&mesg); diff --git a/net/sunrpc/svcsock.c b/net/sunrpc/svcsock.c index 0f679df..305374d 100644 --- a/net/sunrpc/svcsock.c +++ b/net/sunrpc/svcsock.c @@ -917,7 +917,10 @@ static void svc_tcp_clear_pages(struct svc_sock *svsk) len = svsk->sk_datalen; npages = (len + PAGE_SIZE - 1) >> PAGE_SHIFT; for (i = 0; i < npages; i++) { - BUG_ON(svsk->sk_pages[i] == NULL); + if (svsk->sk_pages[i] == NULL) { + WARN_ON_ONCE(1); + continue; + } put_page(svsk->sk_pages[i]); svsk->sk_pages[i] = NULL; } @@ -1092,8 +1095,10 @@ static int svc_tcp_recvfrom(struct svc_rqst *rqstp) goto err_noclose; } - if (svc_sock_reclen(svsk) < 8) + if (svsk->sk_datalen < 8) { + svsk->sk_datalen = 0; goto err_delete; /* client is nuts. */ + } rqstp->rq_arg.len = svsk->sk_datalen; rqstp->rq_arg.page_base = 0; diff --git a/net/sunrpc/xprtsock.c b/net/sunrpc/xprtsock.c index 412de7c..ddf0602 100644 --- a/net/sunrpc/xprtsock.c +++ b/net/sunrpc/xprtsock.c @@ -2534,7 +2534,6 @@ static struct rpc_xprt_ops bc_tcp_ops = { .reserve_xprt = xprt_reserve_xprt, .release_xprt = xprt_release_xprt, .alloc_slot = xprt_alloc_slot, - .rpcbind = xs_local_rpcbind, .buf_alloc = bc_malloc, .buf_free = bc_free, .send_request = bc_send_request, diff --git a/security/capability.c b/security/capability.c index d32e16e..32b5157 100644 --- a/security/capability.c +++ b/security/capability.c @@ -858,7 +858,7 @@ static int cap_inode_setsecctx(struct dentry *dentry, void *ctx, u32 ctxlen) static int cap_inode_getsecctx(struct inode *inode, void **ctx, u32 *ctxlen) { - return 0; + return -EOPNOTSUPP; } #ifdef CONFIG_KEYS static int cap_key_alloc(struct key *key, const struct cred *cred, diff --git a/sound/pci/hda/patch_analog.c b/sound/pci/hda/patch_analog.c index 977b0d8..d97f0d6 100644 --- a/sound/pci/hda/patch_analog.c +++ b/sound/pci/hda/patch_analog.c @@ -2112,6 +2112,9 @@ static void ad_vmaster_eapd_hook(void *private_data, int enabled) { struct hda_codec *codec = private_data; struct ad198x_spec *spec = codec->spec; + + if (!spec->eapd_nid) + return; snd_hda_codec_update_cache(codec, spec->eapd_nid, 0, AC_VERB_SET_EAPD_BTLENABLE, enabled ? 0x02 : 0x00); @@ -3601,13 +3604,16 @@ static void ad1884_fixup_hp_eapd(struct hda_codec *codec, { struct ad198x_spec *spec = codec->spec; - if (action == HDA_FIXUP_ACT_PRE_PROBE) { + switch (action) { + case HDA_FIXUP_ACT_PRE_PROBE: + spec->gen.vmaster_mute.hook = ad_vmaster_eapd_hook; + break; + case HDA_FIXUP_ACT_PROBE: if (spec->gen.autocfg.line_out_type == AUTO_PIN_SPEAKER_OUT) spec->eapd_nid = spec->gen.autocfg.line_out_pins[0]; else spec->eapd_nid = spec->gen.autocfg.speaker_pins[0]; - if (spec->eapd_nid) - spec->gen.vmaster_mute.hook = ad_vmaster_eapd_hook; + break; } } diff --git a/sound/pci/hda/patch_realtek.c b/sound/pci/hda/patch_realtek.c index 14ac9b0..8bd2261 100644 --- a/sound/pci/hda/patch_realtek.c +++ b/sound/pci/hda/patch_realtek.c @@ -37,6 +37,9 @@ #include "hda_jack.h" #include "hda_generic.h" +/* keep halting ALC5505 DSP, for power saving */ +#define HALT_REALTEK_ALC5505 + /* unsol event tags */ #define ALC_DCVOL_EVENT 0x08 @@ -2659,15 +2662,27 @@ static void alc5505_dsp_init(struct hda_codec *codec) alc5505_coef_set(codec, 0x880c, 0x00000004); /* DRAM Function control */ alc5505_coef_set(codec, 0x880c, 0x00000003); alc5505_coef_set(codec, 0x880c, 0x00000010); + +#ifdef HALT_REALTEK_ALC5505 + alc5505_dsp_halt(codec); +#endif } +#ifdef HALT_REALTEK_ALC5505 +#define alc5505_dsp_suspend(codec) /* NOP */ +#define alc5505_dsp_resume(codec) /* NOP */ +#else +#define alc5505_dsp_suspend(codec) alc5505_dsp_halt(codec) +#define alc5505_dsp_resume(codec) alc5505_dsp_back_from_halt(codec) +#endif + #ifdef CONFIG_PM static int alc269_suspend(struct hda_codec *codec) { struct alc_spec *spec = codec->spec; if (spec->has_alc5505_dsp) - alc5505_dsp_halt(codec); + alc5505_dsp_suspend(codec); return alc_suspend(codec); } @@ -2696,7 +2711,7 @@ static int alc269_resume(struct hda_codec *codec) alc_inv_dmic_sync(codec, true); hda_call_check_power_status(codec, 0x01); if (spec->has_alc5505_dsp) - alc5505_dsp_back_from_halt(codec); + alc5505_dsp_resume(codec); return 0; } #endif /* CONFIG_PM */ diff --git a/sound/soc/codecs/wm8962.c b/sound/soc/codecs/wm8962.c index b1dc7d4..e2de9ec 100644 --- a/sound/soc/codecs/wm8962.c +++ b/sound/soc/codecs/wm8962.c @@ -3377,7 +3377,7 @@ static int wm8962_probe(struct snd_soc_codec *codec) { int ret; struct wm8962_priv *wm8962 = snd_soc_codec_get_drvdata(codec); - struct wm8962_pdata *pdata = dev_get_platdata(codec->dev); + struct wm8962_pdata *pdata = &wm8962->pdata; int i, trigger, irq_pol; bool dmicclk, dmicdat; diff --git a/sound/soc/fsl/imx-sgtl5000.c b/sound/soc/fsl/imx-sgtl5000.c index 7a8bc12..3f726e4 100644 --- a/sound/soc/fsl/imx-sgtl5000.c +++ b/sound/soc/fsl/imx-sgtl5000.c @@ -113,13 +113,13 @@ static int imx_sgtl5000_probe(struct platform_device *pdev) ssi_pdev = of_find_device_by_node(ssi_np); if (!ssi_pdev) { dev_err(&pdev->dev, "failed to find SSI platform device\n"); - ret = -EINVAL; + ret = -EPROBE_DEFER; goto fail; } codec_dev = of_find_i2c_device_by_node(codec_np); if (!codec_dev) { dev_err(&pdev->dev, "failed to find codec platform device\n"); - return -EINVAL; + return -EPROBE_DEFER; } data = devm_kzalloc(&pdev->dev, sizeof(*data), GFP_KERNEL); diff --git a/sound/soc/mxs/mxs-saif.c b/sound/soc/mxs/mxs-saif.c index 49d8700..54511c5 100644 --- a/sound/soc/mxs/mxs-saif.c +++ b/sound/soc/mxs/mxs-saif.c @@ -24,6 +24,7 @@ #include #include #include +#include #include #include #include @@ -658,6 +659,33 @@ static irqreturn_t mxs_saif_irq(int irq, void *dev_id) return IRQ_HANDLED; } +static int mxs_saif_mclk_init(struct platform_device *pdev) +{ + struct mxs_saif *saif = platform_get_drvdata(pdev); + struct device_node *np = pdev->dev.of_node; + struct clk *clk; + int ret; + + clk = clk_register_divider(&pdev->dev, "mxs_saif_mclk", + __clk_get_name(saif->clk), 0, + saif->base + SAIF_CTRL, + BP_SAIF_CTRL_BITCLK_MULT_RATE, 3, + 0, NULL); + if (IS_ERR(clk)) { + ret = PTR_ERR(clk); + if (ret == -EEXIST) + return 0; + dev_err(&pdev->dev, "failed to register mclk: %d\n", ret); + return PTR_ERR(clk); + } + + ret = of_clk_add_provider(np, of_clk_src_simple_get, clk); + if (ret) + return ret; + + return 0; +} + static int mxs_saif_probe(struct platform_device *pdev) { struct device_node *np = pdev->dev.of_node; @@ -734,6 +762,13 @@ static int mxs_saif_probe(struct platform_device *pdev) platform_set_drvdata(pdev, saif); + /* We only support saif0 being tx and clock master */ + if (saif->id == 0) { + ret = mxs_saif_mclk_init(pdev); + if (ret) + dev_warn(&pdev->dev, "failed to init clocks\n"); + } + ret = snd_soc_register_component(&pdev->dev, &mxs_saif_component, &mxs_saif_dai, 1); if (ret) { diff --git a/sound/soc/samsung/i2s.c b/sound/soc/samsung/i2s.c index 82ebb1a..7a17346 100644 --- a/sound/soc/samsung/i2s.c +++ b/sound/soc/samsung/i2s.c @@ -1016,52 +1016,6 @@ static struct i2s_dai *i2s_alloc_dai(struct platform_device *pdev, bool sec) return i2s; } -#ifdef CONFIG_OF -static int samsung_i2s_parse_dt_gpio(struct i2s_dai *i2s) -{ - struct device *dev = &i2s->pdev->dev; - int index, gpio, ret; - - for (index = 0; index < 7; index++) { - gpio = of_get_gpio(dev->of_node, index); - if (!gpio_is_valid(gpio)) { - dev_err(dev, "invalid gpio[%d]: %d\n", index, gpio); - goto free_gpio; - } - - ret = gpio_request(gpio, dev_name(dev)); - if (ret) { - dev_err(dev, "gpio [%d] request failed\n", gpio); - goto free_gpio; - } - i2s->gpios[index] = gpio; - } - return 0; - -free_gpio: - while (--index >= 0) - gpio_free(i2s->gpios[index]); - return -EINVAL; -} - -static void samsung_i2s_dt_gpio_free(struct i2s_dai *i2s) -{ - unsigned int index; - for (index = 0; index < 7; index++) - gpio_free(i2s->gpios[index]); -} -#else -static int samsung_i2s_parse_dt_gpio(struct i2s_dai *dai) -{ - return -EINVAL; -} - -static void samsung_i2s_dt_gpio_free(struct i2s_dai *dai) -{ -} - -#endif - static const struct of_device_id exynos_i2s_match[]; static inline int samsung_i2s_get_driver_data(struct platform_device *pdev) @@ -1235,18 +1189,10 @@ static int samsung_i2s_probe(struct platform_device *pdev) pri_dai->sec_dai = sec_dai; } - if (np) { - if (samsung_i2s_parse_dt_gpio(pri_dai)) { - dev_err(&pdev->dev, "Unable to configure gpio\n"); - ret = -EINVAL; - goto err; - } - } else { - if (i2s_pdata->cfg_gpio && i2s_pdata->cfg_gpio(pdev)) { - dev_err(&pdev->dev, "Unable to configure gpio\n"); - ret = -EINVAL; - goto err; - } + if (i2s_pdata && i2s_pdata->cfg_gpio && i2s_pdata->cfg_gpio(pdev)) { + dev_err(&pdev->dev, "Unable to configure gpio\n"); + ret = -EINVAL; + goto err; } snd_soc_register_component(&pri_dai->pdev->dev, &samsung_i2s_component, @@ -1267,14 +1213,10 @@ static int samsung_i2s_remove(struct platform_device *pdev) { struct i2s_dai *i2s, *other; struct resource *res; - struct s3c_audio_pdata *i2s_pdata = pdev->dev.platform_data; i2s = dev_get_drvdata(&pdev->dev); other = i2s->pri_dai ? : i2s->sec_dai; - if (!i2s_pdata->cfg_gpio && pdev->dev.of_node) - samsung_i2s_dt_gpio_free(i2s->pri_dai); - if (other) { other->pri_dai = NULL; other->sec_dai = NULL; diff --git a/sound/soc/samsung/s3c-i2s-v2.c b/sound/soc/samsung/s3c-i2s-v2.c index 20e98d1..e5e81b1 100644 --- a/sound/soc/samsung/s3c-i2s-v2.c +++ b/sound/soc/samsung/s3c-i2s-v2.c @@ -1,6 +1,4 @@ -/* sound/soc/samsung/s3c-i2c-v2.c - * - * ALSA Soc Audio Layer - I2S core for newer Samsung SoCs. +/* ALSA Soc Audio Layer - I2S core for newer Samsung SoCs. * * Copyright (c) 2006 Wolfson Microelectronics PLC. * Graeme Gregory graeme.gregory@wolfsonmicro.com diff --git a/sound/usb/quirks.c b/sound/usb/quirks.c index 5b01330..1bc45e7 100644 --- a/sound/usb/quirks.c +++ b/sound/usb/quirks.c @@ -129,6 +129,7 @@ static int create_fixed_stream_quirk(struct snd_usb_audio *chip, { struct audioformat *fp; struct usb_host_interface *alts; + struct usb_interface_descriptor *altsd; int stream, err; unsigned *rate_table = NULL; @@ -166,6 +167,9 @@ static int create_fixed_stream_quirk(struct snd_usb_audio *chip, return -EINVAL; } alts = &iface->altsetting[fp->altset_idx]; + altsd = get_iface_desc(alts); + fp->protocol = altsd->bInterfaceProtocol; + if (fp->datainterval == 0) fp->datainterval = snd_usb_parse_datainterval(chip, alts); if (fp->maxpacksize == 0) diff --git a/tools/power/cpupower/Makefile b/tools/power/cpupower/Makefile index d875a74..cbfec92 100644 --- a/tools/power/cpupower/Makefile +++ b/tools/power/cpupower/Makefile @@ -128,10 +128,12 @@ UTIL_OBJS = utils/helpers/amd.o utils/helpers/topology.o utils/helpers/msr.o \ utils/helpers/sysfs.o utils/helpers/misc.o utils/helpers/cpuid.o \ utils/helpers/pci.o utils/helpers/bitmask.o \ utils/idle_monitor/nhm_idle.o utils/idle_monitor/snb_idle.o \ + utils/idle_monitor/hsw_ext_idle.o \ utils/idle_monitor/amd_fam14h_idle.o utils/idle_monitor/cpuidle_sysfs.o \ utils/idle_monitor/mperf_monitor.o utils/idle_monitor/cpupower-monitor.o \ utils/cpupower.o utils/cpufreq-info.o utils/cpufreq-set.o \ - utils/cpupower-set.o utils/cpupower-info.o utils/cpuidle-info.o + utils/cpupower-set.o utils/cpupower-info.o utils/cpuidle-info.o \ + utils/cpuidle-set.o UTIL_SRC := $(UTIL_OBJS:.o=.c) diff --git a/tools/power/cpupower/man/cpupower-monitor.1 b/tools/power/cpupower/man/cpupower-monitor.1 index e01c35d..914cbb9 100644 --- a/tools/power/cpupower/man/cpupower-monitor.1 +++ b/tools/power/cpupower/man/cpupower-monitor.1 @@ -110,13 +110,21 @@ May work poorly on Linux-2.6.20 through 2.6.29, as the \fBacpi-cpufreq \fP kernel frequency driver periodically cleared aperf/mperf registers in those kernels. -.SS "Nehalem" "SandyBridge" +.SS "Nehalem" "SandyBridge" "HaswellExtended" Intel Core and Package sleep state counters. Threads (hyperthreaded cores) may not be able to enter deeper core states if its sibling is utilized. Deepest package sleep states may in reality show up as machine/platform wide sleep states and can only be entered if all cores are idle. Look up Intel manuals (some are provided in the References section) for further details. +The monitors are named after the CPU family where the sleep state capabilities +got introduced and may not match exactly the CPU name of the platform. +For example an IvyBridge processor has sleep state capabilities which got +introduced in Nehalem and SandyBridge processor families. +Thus on an IvyBridge processor one will get Nehalem and SandyBridge sleep +state monitors. +HaswellExtended extra package sleep state capabilities are available only in a +specific Haswell (family 0x45) and probably also other future processors. .SS "Fam_12h" "Fam_14h" AMD laptop and desktop processor (family 12h and 14h) sleep state counters. diff --git a/tools/power/cpupower/utils/builtin.h b/tools/power/cpupower/utils/builtin.h index c10496f..2284c8e 100644 --- a/tools/power/cpupower/utils/builtin.h +++ b/tools/power/cpupower/utils/builtin.h @@ -5,6 +5,7 @@ extern int cmd_set(int argc, const char **argv); extern int cmd_info(int argc, const char **argv); extern int cmd_freq_set(int argc, const char **argv); extern int cmd_freq_info(int argc, const char **argv); +extern int cmd_idle_set(int argc, const char **argv); extern int cmd_idle_info(int argc, const char **argv); extern int cmd_monitor(int argc, const char **argv); diff --git a/tools/power/cpupower/utils/cpuidle-info.c b/tools/power/cpupower/utils/cpuidle-info.c index 8145af5..75e66de 100644 --- a/tools/power/cpupower/utils/cpuidle-info.c +++ b/tools/power/cpupower/utils/cpuidle-info.c @@ -22,7 +22,7 @@ static void cpuidle_cpu_output(unsigned int cpu, int verbose) { - int idlestates, idlestate; + unsigned int idlestates, idlestate; char *tmp; printf(_ ("Analyzing CPU %d:\n"), cpu); @@ -31,10 +31,8 @@ static void cpuidle_cpu_output(unsigned int cpu, int verbose) if (idlestates == 0) { printf(_("CPU %u: No idle states\n"), cpu); return; - } else if (idlestates <= 0) { - printf(_("CPU %u: Can't read idle state info\n"), cpu); - return; } + printf(_("Number of idle states: %d\n"), idlestates); printf(_("Available idle states:")); for (idlestate = 0; idlestate < idlestates; idlestate++) { @@ -50,10 +48,14 @@ static void cpuidle_cpu_output(unsigned int cpu, int verbose) return; for (idlestate = 0; idlestate < idlestates; idlestate++) { + int disabled = sysfs_is_idlestate_disabled(cpu, idlestate); + /* Disabled interface not supported on older kernels */ + if (disabled < 0) + disabled = 0; tmp = sysfs_get_idlestate_name(cpu, idlestate); if (!tmp) continue; - printf("%s:\n", tmp); + printf("%s%s:\n", tmp, (disabled) ? " (DISABLED) " : ""); free(tmp); tmp = sysfs_get_idlestate_desc(cpu, idlestate); @@ -98,21 +100,13 @@ static void cpuidle_general_output(void) static void proc_cpuidle_cpu_output(unsigned int cpu) { long max_allowed_cstate = 2000000000; - int cstates, cstate; + unsigned int cstate, cstates; cstates = sysfs_get_idlestate_count(cpu); if (cstates == 0) { - /* - * Go on and print same useless info as you'd see with - * cat /proc/acpi/processor/../power - * printf(_("CPU %u: No C-states available\n"), cpu); - * return; - */ - } else if (cstates <= 0) { - printf(_("CPU %u: Can't read C-state info\n"), cpu); + printf(_("CPU %u: No C-states info\n"), cpu); return; } - /* printf("Cstates: %d\n", cstates); */ printf(_("active state: C0\n")); printf(_("max_cstate: C%u\n"), cstates-1); diff --git a/tools/power/cpupower/utils/cpuidle-set.c b/tools/power/cpupower/utils/cpuidle-set.c new file mode 100644 index 0000000..c78141c --- /dev/null +++ b/tools/power/cpupower/utils/cpuidle-set.c @@ -0,0 +1,118 @@ +#include +#include +#include +#include +#include +#include +#include + +#include + +#include "cpufreq.h" +#include "helpers/helpers.h" +#include "helpers/sysfs.h" + +static struct option info_opts[] = { + { .name = "disable", .has_arg = required_argument, .flag = NULL, .val = 'd'}, + { .name = "enable", .has_arg = required_argument, .flag = NULL, .val = 'e'}, + { }, +}; + + +int cmd_idle_set(int argc, char **argv) +{ + extern char *optarg; + extern int optind, opterr, optopt; + int ret = 0, cont = 1, param = 0, idlestate = 0; + unsigned int cpu = 0; + + do { + ret = getopt_long(argc, argv, "d:e:", info_opts, NULL); + if (ret == -1) + break; + switch (ret) { + case '?': + param = '?'; + cont = 0; + break; + case 'd': + if (param) { + param = -1; + cont = 0; + break; + } + param = ret; + idlestate = atoi(optarg); + break; + case 'e': + if (param) { + param = -1; + cont = 0; + break; + } + param = ret; + idlestate = atoi(optarg); + break; + case -1: + cont = 0; + break; + } + } while (cont); + + switch (param) { + case -1: + printf(_("You can't specify more than one " + "output-specific argument\n")); + exit(EXIT_FAILURE); + case '?': + printf(_("invalid or unknown argument\n")); + exit(EXIT_FAILURE); + } + + /* Default is: set all CPUs */ + if (bitmask_isallclear(cpus_chosen)) + bitmask_setall(cpus_chosen); + + for (cpu = bitmask_first(cpus_chosen); + cpu <= bitmask_last(cpus_chosen); cpu++) { + + if (!bitmask_isbitset(cpus_chosen, cpu)) + continue; + + switch (param) { + + case 'd': + ret = sysfs_idlestate_disable(cpu, idlestate, 1); + if (ret == 0) + printf(_("Idlestate %u disabled on CPU %u\n"), idlestate, cpu); + else if (ret == -1) + printf(_("Idlestate %u not available on CPU %u\n"), + idlestate, cpu); + else if (ret == -2) + printf(_("Idlestate disabling not supported by kernel\n")); + else + printf(_("Idlestate %u not disabled on CPU %u\n"), + idlestate, cpu); + break; + case 'e': + ret = sysfs_idlestate_disable(cpu, idlestate, 0); + if (ret == 0) + printf(_("Idlestate %u enabled on CPU %u\n"), idlestate, cpu); + else if (ret == -1) + printf(_("Idlestate %u not available on CPU %u\n"), + idlestate, cpu); + else if (ret == -2) + printf(_("Idlestate enabling not supported by kernel\n")); + else + printf(_("Idlestate %u not enabled on CPU %u\n"), + idlestate, cpu); + break; + default: + /* Not reachable with proper args checking */ + printf(_("Invalid or unknown argument\n")); + exit(EXIT_FAILURE); + break; + } + } + return EXIT_SUCCESS; +} diff --git a/tools/power/cpupower/utils/cpupower.c b/tools/power/cpupower/utils/cpupower.c index 52bee59..7efc570 100644 --- a/tools/power/cpupower/utils/cpupower.c +++ b/tools/power/cpupower/utils/cpupower.c @@ -17,12 +17,6 @@ #include "helpers/helpers.h" #include "helpers/bitmask.h" -struct cmd_struct { - const char *cmd; - int (*main)(int, const char **); - int needs_root; -}; - #define ARRAY_SIZE(x) (sizeof(x)/sizeof(x[0])) static int cmd_help(int argc, const char **argv); @@ -43,10 +37,17 @@ int be_verbose; static void print_help(void); +struct cmd_struct { + const char *cmd; + int (*main)(int, const char **); + int needs_root; +}; + static struct cmd_struct commands[] = { { "frequency-info", cmd_freq_info, 0 }, { "frequency-set", cmd_freq_set, 1 }, { "idle-info", cmd_idle_info, 0 }, + { "idle-set", cmd_idle_set, 1 }, { "set", cmd_set, 1 }, { "info", cmd_info, 0 }, { "monitor", cmd_monitor, 0 }, diff --git a/tools/power/cpupower/utils/helpers/sysfs.c b/tools/power/cpupower/utils/helpers/sysfs.c index 38ab916..5cdc600 100644 --- a/tools/power/cpupower/utils/helpers/sysfs.c +++ b/tools/power/cpupower/utils/helpers/sysfs.c @@ -89,6 +89,33 @@ int sysfs_is_cpu_online(unsigned int cpu) /* CPUidle idlestate specific /sys/devices/system/cpu/cpuX/cpuidle/ access */ + +/* CPUidle idlestate specific /sys/devices/system/cpu/cpuX/cpuidle/ access */ + +/* + * helper function to check whether a file under "../cpuX/cpuidle/stateX/" dir + * exists. + * For example the functionality to disable c-states was introduced in later + * kernel versions, this function can be used to explicitly check for this + * feature. + * + * returns 1 if the file exists, 0 otherwise. + */ +unsigned int sysfs_idlestate_file_exists(unsigned int cpu, + unsigned int idlestate, + const char *fname) +{ + char path[SYSFS_PATH_MAX]; + struct stat statbuf; + + + snprintf(path, sizeof(path), PATH_TO_CPU "cpu%u/cpuidle/state%u/%s", + cpu, idlestate, fname); + if (stat(path, &statbuf) != 0) + return 0; + return 1; +} + /* * helper function to read file from /sys into given buffer * fname is a relative path under "cpuX/cpuidle/stateX/" dir @@ -121,6 +148,40 @@ unsigned int sysfs_idlestate_read_file(unsigned int cpu, unsigned int idlestate, return (unsigned int) numread; } +/* + * helper function to write a new value to a /sys file + * fname is a relative path under "../cpuX/cpuidle/cstateY/" dir + * + * Returns the number of bytes written or 0 on error + */ +static +unsigned int sysfs_idlestate_write_file(unsigned int cpu, + unsigned int idlestate, + const char *fname, + const char *value, size_t len) +{ + char path[SYSFS_PATH_MAX]; + int fd; + ssize_t numwrite; + + snprintf(path, sizeof(path), PATH_TO_CPU "cpu%u/cpuidle/state%u/%s", + cpu, idlestate, fname); + + fd = open(path, O_WRONLY); + if (fd == -1) + return 0; + + numwrite = write(fd, value, len); + if (numwrite < 1) { + close(fd); + return 0; + } + + close(fd); + + return (unsigned int) numwrite; +} + /* read access to files which contain one numeric value */ enum idlestate_value { @@ -128,6 +189,7 @@ enum idlestate_value { IDLESTATE_POWER, IDLESTATE_LATENCY, IDLESTATE_TIME, + IDLESTATE_DISABLE, MAX_IDLESTATE_VALUE_FILES }; @@ -136,6 +198,7 @@ static const char *idlestate_value_files[MAX_IDLESTATE_VALUE_FILES] = { [IDLESTATE_POWER] = "power", [IDLESTATE_LATENCY] = "latency", [IDLESTATE_TIME] = "time", + [IDLESTATE_DISABLE] = "disable", }; static unsigned long long sysfs_idlestate_get_one_value(unsigned int cpu, @@ -205,8 +268,59 @@ static char *sysfs_idlestate_get_one_string(unsigned int cpu, return result; } +/* + * Returns: + * 1 if disabled + * 0 if enabled + * -1 if idlestate is not available + * -2 if disabling is not supported by the kernel + */ +int sysfs_is_idlestate_disabled(unsigned int cpu, + unsigned int idlestate) +{ + if (sysfs_get_idlestate_count(cpu) < idlestate) + return -1; + + if (!sysfs_idlestate_file_exists(cpu, idlestate, + idlestate_value_files[IDLESTATE_DISABLE])) + return -2; + return sysfs_idlestate_get_one_value(cpu, idlestate, IDLESTATE_DISABLE); +} + +/* + * Pass 1 as last argument to disable or 0 to enable the state + * Returns: + * 0 on success + * negative values on error, for example: + * -1 if idlestate is not available + * -2 if disabling is not supported by the kernel + * -3 No write access to disable/enable C-states + */ +int sysfs_idlestate_disable(unsigned int cpu, + unsigned int idlestate, + unsigned int disable) +{ + char value[SYSFS_PATH_MAX]; + int bytes_written; + + if (sysfs_get_idlestate_count(cpu) < idlestate) + return -1; + + if (!sysfs_idlestate_file_exists(cpu, idlestate, + idlestate_value_files[IDLESTATE_DISABLE])) + return -2; + + snprintf(value, SYSFS_PATH_MAX, "%u", disable); + + bytes_written = sysfs_idlestate_write_file(cpu, idlestate, "disable", + value, sizeof(disable)); + if (bytes_written) + return 0; + return -3; +} + unsigned long sysfs_get_idlestate_latency(unsigned int cpu, - unsigned int idlestate) + unsigned int idlestate) { return sysfs_idlestate_get_one_value(cpu, idlestate, IDLESTATE_LATENCY); } @@ -238,7 +352,7 @@ char *sysfs_get_idlestate_desc(unsigned int cpu, unsigned int idlestate) * Negativ in error case * Zero if cpuidle does not export any C-states */ -int sysfs_get_idlestate_count(unsigned int cpu) +unsigned int sysfs_get_idlestate_count(unsigned int cpu) { char file[SYSFS_PATH_MAX]; struct stat statbuf; diff --git a/tools/power/cpupower/utils/helpers/sysfs.h b/tools/power/cpupower/utils/helpers/sysfs.h index 8cb797b..d28f11f 100644 --- a/tools/power/cpupower/utils/helpers/sysfs.h +++ b/tools/power/cpupower/utils/helpers/sysfs.h @@ -7,8 +7,16 @@ extern unsigned int sysfs_read_file(const char *path, char *buf, size_t buflen); +extern unsigned int sysfs_idlestate_file_exists(unsigned int cpu, + unsigned int idlestate, + const char *fname); + extern int sysfs_is_cpu_online(unsigned int cpu); +extern int sysfs_is_idlestate_disabled(unsigned int cpu, + unsigned int idlestate); +extern int sysfs_idlestate_disable(unsigned int cpu, unsigned int idlestate, + unsigned int disable); extern unsigned long sysfs_get_idlestate_latency(unsigned int cpu, unsigned int idlestate); extern unsigned long sysfs_get_idlestate_usage(unsigned int cpu, @@ -19,7 +27,7 @@ extern char *sysfs_get_idlestate_name(unsigned int cpu, unsigned int idlestate); extern char *sysfs_get_idlestate_desc(unsigned int cpu, unsigned int idlestate); -extern int sysfs_get_idlestate_count(unsigned int cpu); +extern unsigned int sysfs_get_idlestate_count(unsigned int cpu); extern char *sysfs_get_cpuidle_governor(void); extern char *sysfs_get_cpuidle_driver(void); diff --git a/tools/power/cpupower/utils/idle_monitor/hsw_ext_idle.c b/tools/power/cpupower/utils/idle_monitor/hsw_ext_idle.c new file mode 100644 index 0000000..ebeaba6 --- /dev/null +++ b/tools/power/cpupower/utils/idle_monitor/hsw_ext_idle.c @@ -0,0 +1,196 @@ +/* + * (C) 2010,2011 Thomas Renninger , Novell Inc. + * + * Licensed under the terms of the GNU GPL License version 2. + * + * Based on SandyBridge monitor. Implements the new package C-states + * (PC8, PC9, PC10) coming with a specific Haswell (family 0x45) CPU. + */ + +#if defined(__i386__) || defined(__x86_64__) + +#include +#include +#include +#include + +#include "helpers/helpers.h" +#include "idle_monitor/cpupower-monitor.h" + +#define MSR_PKG_C8_RESIDENCY 0x00000630 +#define MSR_PKG_C9_RESIDENCY 0x00000631 +#define MSR_PKG_C10_RESIDENCY 0x00000632 + +#define MSR_TSC 0x10 + +enum intel_hsw_ext_id { PC8 = 0, PC9, PC10, HSW_EXT_CSTATE_COUNT, + TSC = 0xFFFF }; + +static int hsw_ext_get_count_percent(unsigned int self_id, double *percent, + unsigned int cpu); + +static cstate_t hsw_ext_cstates[HSW_EXT_CSTATE_COUNT] = { + { + .name = "PC8", + .desc = N_("Processor Package C8"), + .id = PC8, + .range = RANGE_PACKAGE, + .get_count_percent = hsw_ext_get_count_percent, + }, + { + .name = "PC9", + .desc = N_("Processor Package C9"), + .desc = N_("Processor Package C2"), + .id = PC9, + .range = RANGE_PACKAGE, + .get_count_percent = hsw_ext_get_count_percent, + }, + { + .name = "PC10", + .desc = N_("Processor Package C10"), + .id = PC10, + .range = RANGE_PACKAGE, + .get_count_percent = hsw_ext_get_count_percent, + }, +}; + +static unsigned long long tsc_at_measure_start; +static unsigned long long tsc_at_measure_end; +static unsigned long long *previous_count[HSW_EXT_CSTATE_COUNT]; +static unsigned long long *current_count[HSW_EXT_CSTATE_COUNT]; +/* valid flag for all CPUs. If a MSR read failed it will be zero */ +static int *is_valid; + +static int hsw_ext_get_count(enum intel_hsw_ext_id id, unsigned long long *val, + unsigned int cpu) +{ + int msr; + + switch (id) { + case PC8: + msr = MSR_PKG_C8_RESIDENCY; + break; + case PC9: + msr = MSR_PKG_C9_RESIDENCY; + break; + case PC10: + msr = MSR_PKG_C10_RESIDENCY; + break; + case TSC: + msr = MSR_TSC; + break; + default: + return -1; + }; + if (read_msr(cpu, msr, val)) + return -1; + return 0; +} + +static int hsw_ext_get_count_percent(unsigned int id, double *percent, + unsigned int cpu) +{ + *percent = 0.0; + + if (!is_valid[cpu]) + return -1; + + *percent = (100.0 * + (current_count[id][cpu] - previous_count[id][cpu])) / + (tsc_at_measure_end - tsc_at_measure_start); + + dprint("%s: previous: %llu - current: %llu - (%u)\n", + hsw_ext_cstates[id].name, previous_count[id][cpu], + current_count[id][cpu], cpu); + + dprint("%s: tsc_diff: %llu - count_diff: %llu - percent: %2.f (%u)\n", + hsw_ext_cstates[id].name, + (unsigned long long) tsc_at_measure_end - tsc_at_measure_start, + current_count[id][cpu] - previous_count[id][cpu], + *percent, cpu); + + return 0; +} + +static int hsw_ext_start(void) +{ + int num, cpu; + unsigned long long val; + + for (num = 0; num < HSW_EXT_CSTATE_COUNT; num++) { + for (cpu = 0; cpu < cpu_count; cpu++) { + hsw_ext_get_count(num, &val, cpu); + previous_count[num][cpu] = val; + } + } + hsw_ext_get_count(TSC, &tsc_at_measure_start, 0); + return 0; +} + +static int hsw_ext_stop(void) +{ + unsigned long long val; + int num, cpu; + + hsw_ext_get_count(TSC, &tsc_at_measure_end, 0); + + for (num = 0; num < HSW_EXT_CSTATE_COUNT; num++) { + for (cpu = 0; cpu < cpu_count; cpu++) { + is_valid[cpu] = !hsw_ext_get_count(num, &val, cpu); + current_count[num][cpu] = val; + } + } + return 0; +} + +struct cpuidle_monitor intel_hsw_ext_monitor; + +static struct cpuidle_monitor *hsw_ext_register(void) +{ + int num; + + if (cpupower_cpu_info.vendor != X86_VENDOR_INTEL + || cpupower_cpu_info.family != 6) + return NULL; + + switch (cpupower_cpu_info.model) { + case 0x45: /* HSW */ + break; + default: + return NULL; + } + + is_valid = calloc(cpu_count, sizeof(int)); + for (num = 0; num < HSW_EXT_CSTATE_COUNT; num++) { + previous_count[num] = calloc(cpu_count, + sizeof(unsigned long long)); + current_count[num] = calloc(cpu_count, + sizeof(unsigned long long)); + } + intel_hsw_ext_monitor.name_len = strlen(intel_hsw_ext_monitor.name); + return &intel_hsw_ext_monitor; +} + +void hsw_ext_unregister(void) +{ + int num; + free(is_valid); + for (num = 0; num < HSW_EXT_CSTATE_COUNT; num++) { + free(previous_count[num]); + free(current_count[num]); + } +} + +struct cpuidle_monitor intel_hsw_ext_monitor = { + .name = "HaswellExtended", + .hw_states = hsw_ext_cstates, + .hw_states_num = HSW_EXT_CSTATE_COUNT, + .start = hsw_ext_start, + .stop = hsw_ext_stop, + .do_register = hsw_ext_register, + .unregister = hsw_ext_unregister, + .needs_root = 1, + .overflow_s = 922000000 /* 922337203 seconds TSC overflow + at 20GHz */ +}; +#endif /* defined(__i386__) || defined(__x86_64__) */ diff --git a/tools/power/cpupower/utils/idle_monitor/idle_monitors.def b/tools/power/cpupower/utils/idle_monitor/idle_monitors.def index e3f8d9b..0d6ba4d 100644 --- a/tools/power/cpupower/utils/idle_monitor/idle_monitors.def +++ b/tools/power/cpupower/utils/idle_monitor/idle_monitors.def @@ -2,6 +2,7 @@ DEF(amd_fam14h) DEF(intel_nhm) DEF(intel_snb) +DEF(intel_hsw_ext) DEF(mperf) #endif DEF(cpuidle_sysfs) diff --git a/tools/power/cpupower/utils/idle_monitor/snb_idle.c b/tools/power/cpupower/utils/idle_monitor/snb_idle.c index a99b43b..efc8a69 100644 --- a/tools/power/cpupower/utils/idle_monitor/snb_idle.c +++ b/tools/power/cpupower/utils/idle_monitor/snb_idle.c @@ -155,6 +155,10 @@ static struct cpuidle_monitor *snb_register(void) case 0x2D: /* SNB Xeon */ case 0x3A: /* IVB */ case 0x3E: /* IVB Xeon */ + case 0x3C: /* HSW */ + case 0x3F: /* HSW */ + case 0x45: /* HSW */ + case 0x46: /* HSW */ break; default: return NULL;