diff --git a/Makefile b/Makefile index 346e28f..540f7b2 100644 --- a/Makefile +++ b/Makefile @@ -1,7 +1,7 @@ VERSION = 3 PATCHLEVEL = 7 SUBLEVEL = 0 -EXTRAVERSION = -rc8 +EXTRAVERSION = NAME = Terrified Chipmunk # *DOCUMENTATION* diff --git a/arch/mips/include/asm/hugetlb.h b/arch/mips/include/asm/hugetlb.h index bd94946..ef99db9 100644 --- a/arch/mips/include/asm/hugetlb.h +++ b/arch/mips/include/asm/hugetlb.h @@ -95,7 +95,17 @@ static inline int huge_ptep_set_access_flags(struct vm_area_struct *vma, pte_t *ptep, pte_t pte, int dirty) { - return ptep_set_access_flags(vma, addr, ptep, pte, dirty); + int changed = !pte_same(*ptep, pte); + + if (changed) { + set_pte_at(vma->vm_mm, addr, ptep, pte); + /* + * There could be some standard sized pages in there, + * get them all. + */ + flush_tlb_range(vma, addr, addr + HPAGE_SIZE); + } + return changed; } static inline pte_t huge_ptep_get(pte_t *ptep) diff --git a/arch/mips/kernel/cpu-probe.c b/arch/mips/kernel/cpu-probe.c index b1fb7af..cce3782 100644 --- a/arch/mips/kernel/cpu-probe.c +++ b/arch/mips/kernel/cpu-probe.c @@ -510,7 +510,6 @@ static inline void cpu_probe_legacy(struct cpuinfo_mips *c, unsigned int cpu) c->cputype = CPU_R3000A; __cpu_name[cpu] = "R3000A"; } - break; } else { c->cputype = CPU_R3000; __cpu_name[cpu] = "R3000"; diff --git a/arch/mips/kernel/entry.S b/arch/mips/kernel/entry.S index a6c1332..9b00362 100644 --- a/arch/mips/kernel/entry.S +++ b/arch/mips/kernel/entry.S @@ -36,6 +36,11 @@ FEXPORT(ret_from_exception) FEXPORT(ret_from_irq) LONG_S s0, TI_REGS($28) FEXPORT(__ret_from_irq) +/* + * We can be coming here from a syscall done in the kernel space, + * e.g. a failed kernel_execve(). + */ +resume_userspace_check: LONG_L t0, PT_STATUS(sp) # returning to kernel mode? andi t0, t0, KU_USER beqz t0, resume_kernel @@ -162,7 +167,7 @@ work_notifysig: # deal with pending signals and move a0, sp li a1, 0 jal do_notify_resume # a2 already loaded - j resume_userspace + j resume_userspace_check FEXPORT(syscall_exit_partial) local_irq_disable # make sure need_resched doesn't diff --git a/arch/mips/kernel/scall64-n32.S b/arch/mips/kernel/scall64-n32.S index f6ba838..86ec03f 100644 --- a/arch/mips/kernel/scall64-n32.S +++ b/arch/mips/kernel/scall64-n32.S @@ -397,14 +397,14 @@ EXPORT(sysn32_call_table) PTR sys_timerfd_create PTR compat_sys_timerfd_gettime /* 6285 */ PTR compat_sys_timerfd_settime - PTR sys_signalfd4 + PTR compat_sys_signalfd4 PTR sys_eventfd2 PTR sys_epoll_create1 PTR sys_dup3 /* 6290 */ PTR sys_pipe2 PTR sys_inotify_init1 - PTR sys_preadv - PTR sys_pwritev + PTR compat_sys_preadv + PTR compat_sys_pwritev PTR compat_sys_rt_tgsigqueueinfo /* 6295 */ PTR sys_perf_event_open PTR sys_accept4 diff --git a/arch/mips/mm/tlb-r4k.c b/arch/mips/mm/tlb-r4k.c index 4b9b935..88e79ad 100644 --- a/arch/mips/mm/tlb-r4k.c +++ b/arch/mips/mm/tlb-r4k.c @@ -120,18 +120,11 @@ void local_flush_tlb_range(struct vm_area_struct *vma, unsigned long start, if (cpu_context(cpu, mm) != 0) { unsigned long size, flags; - int huge = is_vm_hugetlb_page(vma); ENTER_CRITICAL(flags); - if (huge) { - start = round_down(start, HPAGE_SIZE); - end = round_up(end, HPAGE_SIZE); - size = (end - start) >> HPAGE_SHIFT; - } else { - start = round_down(start, PAGE_SIZE << 1); - end = round_up(end, PAGE_SIZE << 1); - size = (end - start) >> (PAGE_SHIFT + 1); - } + start = round_down(start, PAGE_SIZE << 1); + end = round_up(end, PAGE_SIZE << 1); + size = (end - start) >> (PAGE_SHIFT + 1); if (size <= current_cpu_data.tlbsize/2) { int oldpid = read_c0_entryhi(); int newpid = cpu_asid(cpu, mm); @@ -140,10 +133,7 @@ void local_flush_tlb_range(struct vm_area_struct *vma, unsigned long start, int idx; write_c0_entryhi(start | newpid); - if (huge) - start += HPAGE_SIZE; - else - start += (PAGE_SIZE << 1); + start += (PAGE_SIZE << 1); mtc0_tlbw_hazard(); tlb_probe(); tlb_probe_hazard(); diff --git a/arch/sparc/boot/piggyback.c b/arch/sparc/boot/piggyback.c index c0a798f..bb7c951 100644 --- a/arch/sparc/boot/piggyback.c +++ b/arch/sparc/boot/piggyback.c @@ -81,18 +81,18 @@ static void usage(void) static int start_line(const char *line) { - if (strcmp(line + 8, " T _start\n") == 0) + if (strcmp(line + 10, " _start\n") == 0) return 1; - else if (strcmp(line + 16, " T _start\n") == 0) + else if (strcmp(line + 18, " _start\n") == 0) return 1; return 0; } static int end_line(const char *line) { - if (strcmp(line + 8, " A _end\n") == 0) + if (strcmp(line + 10, " _end\n") == 0) return 1; - else if (strcmp (line + 16, " A _end\n") == 0) + else if (strcmp (line + 18, " _end\n") == 0) return 1; return 0; } @@ -100,8 +100,8 @@ static int end_line(const char *line) /* * Find address for start and end in System.map. * The file looks like this: - * f0004000 T _start - * f0379f79 A _end + * f0004000 ... _start + * f0379f79 ... _end * 1234567890123456 * ^coloumn 1 * There is support for 64 bit addresses too. diff --git a/arch/sparc/kernel/sys32.S b/arch/sparc/kernel/sys32.S index 44025f4..8475a47 100644 --- a/arch/sparc/kernel/sys32.S +++ b/arch/sparc/kernel/sys32.S @@ -47,7 +47,7 @@ STUB: sra REG1, 0, REG1; \ sra REG4, 0, REG4 SIGN1(sys32_exit, sparc_exit, %o0) -SIGN1(sys32_exit_group, sys_exit_group, %o0) +SIGN1(sys32_exit_group, sparc_exit_group, %o0) SIGN1(sys32_wait4, compat_sys_wait4, %o2) SIGN1(sys32_creat, sys_creat, %o1) SIGN1(sys32_mknod, sys_mknod, %o1) diff --git a/arch/sparc/kernel/syscalls.S b/arch/sparc/kernel/syscalls.S index 7f5f65d..bf23477 100644 --- a/arch/sparc/kernel/syscalls.S +++ b/arch/sparc/kernel/syscalls.S @@ -118,10 +118,20 @@ ret_from_syscall: ba,pt %xcc, ret_sys_call ldx [%sp + PTREGS_OFF + PT_V9_I0], %o0 + .globl sparc_exit_group + .type sparc_exit_group,#function +sparc_exit_group: + sethi %hi(sys_exit_group), %g7 + ba,pt %xcc, 1f + or %g7, %lo(sys_exit_group), %g7 + .size sparc_exit_group,.-sparc_exit_group + .globl sparc_exit .type sparc_exit,#function sparc_exit: - rdpr %pstate, %g2 + sethi %hi(sys_exit), %g7 + or %g7, %lo(sys_exit), %g7 +1: rdpr %pstate, %g2 wrpr %g2, PSTATE_IE, %pstate rdpr %otherwin, %g1 rdpr %cansave, %g3 @@ -129,7 +139,7 @@ sparc_exit: wrpr %g3, 0x0, %cansave wrpr %g0, 0x0, %otherwin wrpr %g2, 0x0, %pstate - ba,pt %xcc, sys_exit + jmpl %g7, %g0 stb %g0, [%g6 + TI_WSAVED] .size sparc_exit,.-sparc_exit diff --git a/arch/sparc/kernel/systbls_64.S b/arch/sparc/kernel/systbls_64.S index 1c9af9f..017b74a 100644 --- a/arch/sparc/kernel/systbls_64.S +++ b/arch/sparc/kernel/systbls_64.S @@ -133,7 +133,7 @@ sys_call_table: /*170*/ .word sys_lsetxattr, sys_fsetxattr, sys_getxattr, sys_lgetxattr, sys_getdents .word sys_setsid, sys_fchdir, sys_fgetxattr, sys_listxattr, sys_llistxattr /*180*/ .word sys_flistxattr, sys_removexattr, sys_lremovexattr, sys_nis_syscall, sys_ni_syscall - .word sys_setpgid, sys_fremovexattr, sys_tkill, sys_exit_group, sys_newuname + .word sys_setpgid, sys_fremovexattr, sys_tkill, sparc_exit_group, sys_newuname /*190*/ .word sys_init_module, sys_sparc64_personality, sys_remap_file_pages, sys_epoll_create, sys_epoll_ctl .word sys_epoll_wait, sys_ioprio_set, sys_getppid, sys_nis_syscall, sys_sgetmask /*200*/ .word sys_ssetmask, sys_nis_syscall, sys_newlstat, sys_uselib, sys_nis_syscall diff --git a/drivers/gpu/drm/i915/i915_gem.c b/drivers/gpu/drm/i915/i915_gem.c index 107f09b..9b285da 100644 --- a/drivers/gpu/drm/i915/i915_gem.c +++ b/drivers/gpu/drm/i915/i915_gem.c @@ -1796,7 +1796,7 @@ i915_gem_object_get_pages_gtt(struct drm_i915_gem_object *obj) */ mapping = obj->base.filp->f_path.dentry->d_inode->i_mapping; gfp = mapping_gfp_mask(mapping); - gfp |= __GFP_NORETRY | __GFP_NOWARN; + gfp |= __GFP_NORETRY | __GFP_NOWARN | __GFP_NO_KSWAPD; gfp &= ~(__GFP_IO | __GFP_WAIT); for_each_sg(st->sgl, sg, page_count, i) { page = shmem_read_mapping_page_gfp(mapping, i, gfp); @@ -1809,7 +1809,7 @@ i915_gem_object_get_pages_gtt(struct drm_i915_gem_object *obj) * our own buffer, now let the real VM do its job and * go down in flames if truly OOM. */ - gfp &= ~(__GFP_NORETRY | __GFP_NOWARN); + gfp &= ~(__GFP_NORETRY | __GFP_NOWARN | __GFP_NO_KSWAPD); gfp |= __GFP_IO | __GFP_WAIT; i915_gem_shrink_all(dev_priv); @@ -1817,7 +1817,7 @@ i915_gem_object_get_pages_gtt(struct drm_i915_gem_object *obj) if (IS_ERR(page)) goto err_pages; - gfp |= __GFP_NORETRY | __GFP_NOWARN; + gfp |= __GFP_NORETRY | __GFP_NOWARN | __GFP_NO_KSWAPD; gfp &= ~(__GFP_IO | __GFP_WAIT); } diff --git a/drivers/input/matrix-keymap.c b/drivers/input/matrix-keymap.c index 443ad64b..d88d9be 100644 --- a/drivers/input/matrix-keymap.c +++ b/drivers/input/matrix-keymap.c @@ -23,6 +23,7 @@ #include #include #include +#include #include static bool matrix_keypad_map_key(struct input_dev *input_dev, @@ -161,3 +162,5 @@ int matrix_keypad_build_keymap(const struct matrix_keymap_data *keymap_data, return 0; } EXPORT_SYMBOL(matrix_keypad_build_keymap); + +MODULE_LICENSE("GPL"); diff --git a/drivers/mmc/host/sdhci-s3c.c b/drivers/mmc/host/sdhci-s3c.c index a54dd5d..c9ec725 100644 --- a/drivers/mmc/host/sdhci-s3c.c +++ b/drivers/mmc/host/sdhci-s3c.c @@ -373,18 +373,25 @@ static struct sdhci_ops sdhci_s3c_ops = { static void sdhci_s3c_notify_change(struct platform_device *dev, int state) { struct sdhci_host *host = platform_get_drvdata(dev); + struct sdhci_s3c *sc = sdhci_priv(host); unsigned long flags; if (host) { spin_lock_irqsave(&host->lock, flags); if (state) { dev_dbg(&dev->dev, "card inserted.\n"); +#ifdef CONFIG_PM_RUNTIME + clk_prepare_enable(sc->clk_io); +#endif host->flags &= ~SDHCI_DEVICE_DEAD; host->quirks |= SDHCI_QUIRK_BROKEN_CARD_DETECTION; } else { dev_dbg(&dev->dev, "card removed.\n"); host->flags |= SDHCI_DEVICE_DEAD; host->quirks &= ~SDHCI_QUIRK_BROKEN_CARD_DETECTION; +#ifdef CONFIG_PM_RUNTIME + clk_disable_unprepare(sc->clk_io); +#endif } tasklet_schedule(&host->card_tasklet); spin_unlock_irqrestore(&host->lock, flags); diff --git a/drivers/mmc/host/sh_mmcif.c b/drivers/mmc/host/sh_mmcif.c index d25bc97..7eaee3e 100644 --- a/drivers/mmc/host/sh_mmcif.c +++ b/drivers/mmc/host/sh_mmcif.c @@ -1104,7 +1104,6 @@ static irqreturn_t sh_mmcif_irqt(int irq, void *dev_id) { struct sh_mmcif_host *host = dev_id; struct mmc_request *mrq = host->mrq; - struct mmc_data *data = mrq->data; cancel_delayed_work_sync(&host->timeout_work); @@ -1152,13 +1151,14 @@ static irqreturn_t sh_mmcif_irqt(int irq, void *dev_id) case MMCIF_WAIT_FOR_READ_END: case MMCIF_WAIT_FOR_WRITE_END: if (host->sd_error) - data->error = sh_mmcif_error_manage(host); + mrq->data->error = sh_mmcif_error_manage(host); break; default: BUG(); } if (host->wait_for != MMCIF_WAIT_FOR_STOP) { + struct mmc_data *data = mrq->data; if (!mrq->cmd->error && data && !data->error) data->bytes_xfered = data->blocks * data->blksz; @@ -1231,10 +1231,6 @@ static irqreturn_t sh_mmcif_intr(int irq, void *dev_id) host->sd_error = true; dev_dbg(&host->pd->dev, "int err state = %08x\n", state); } - if (host->state == STATE_IDLE) { - dev_info(&host->pd->dev, "Spurious IRQ status 0x%x", state); - return IRQ_HANDLED; - } if (state & ~(INT_CMD12RBE | INT_CMD12CRE)) { if (!host->dma_active) return IRQ_WAKE_THREAD; diff --git a/drivers/mtd/mtdcore.c b/drivers/mtd/mtdcore.c index 374c46d..ec794a7 100644 --- a/drivers/mtd/mtdcore.c +++ b/drivers/mtd/mtdcore.c @@ -1077,7 +1077,8 @@ EXPORT_SYMBOL_GPL(mtd_writev); * until the request succeeds or until the allocation size falls below * the system page size. This attempts to make sure it does not adversely * impact system performance, so when allocating more than one page, we - * ask the memory allocator to avoid re-trying. + * ask the memory allocator to avoid re-trying, swapping, writing back + * or performing I/O. * * Note, this function also makes sure that the allocated buffer is aligned to * the MTD device's min. I/O unit, i.e. the "mtd->writesize" value. @@ -1091,7 +1092,8 @@ EXPORT_SYMBOL_GPL(mtd_writev); */ void *mtd_kmalloc_up_to(const struct mtd_info *mtd, size_t *size) { - gfp_t flags = __GFP_NOWARN | __GFP_WAIT | __GFP_NORETRY; + gfp_t flags = __GFP_NOWARN | __GFP_WAIT | + __GFP_NORETRY | __GFP_NO_KSWAPD; size_t min_alloc = max_t(size_t, mtd->writesize, PAGE_SIZE); void *kbuf; diff --git a/drivers/mtd/ubi/wl.c b/drivers/mtd/ubi/wl.c index da7b449..2144f61 100644 --- a/drivers/mtd/ubi/wl.c +++ b/drivers/mtd/ubi/wl.c @@ -498,7 +498,7 @@ out: * @ubi: UBI device description object * * This function returns a physical eraseblock in case of success and a - * negative error code in case of failure. Might sleep. + * negative error code in case of failure. */ static int __wl_get_peb(struct ubi_device *ubi) { @@ -540,13 +540,6 @@ retry: * ubi_wl_get_peb() after removing e from the pool. */ prot_queue_add(ubi, e); #endif - err = ubi_self_check_all_ff(ubi, e->pnum, ubi->vid_hdr_aloffset, - ubi->peb_size - ubi->vid_hdr_aloffset); - if (err) { - ubi_err("new PEB %d does not contain all 0xFF bytes", e->pnum); - return err; - } - return e->pnum; } @@ -679,17 +672,30 @@ static struct ubi_wl_entry *get_peb_for_wl(struct ubi_device *ubi) #else static struct ubi_wl_entry *get_peb_for_wl(struct ubi_device *ubi) { - return find_wl_entry(ubi, &ubi->free, WL_FREE_MAX_DIFF); + struct ubi_wl_entry *e; + + e = find_wl_entry(ubi, &ubi->free, WL_FREE_MAX_DIFF); + self_check_in_wl_tree(ubi, e, &ubi->free); + rb_erase(&e->u.rb, &ubi->free); + + return e; } int ubi_wl_get_peb(struct ubi_device *ubi) { - int peb; + int peb, err; spin_lock(&ubi->wl_lock); peb = __wl_get_peb(ubi); spin_unlock(&ubi->wl_lock); + err = ubi_self_check_all_ff(ubi, peb, ubi->vid_hdr_aloffset, + ubi->peb_size - ubi->vid_hdr_aloffset); + if (err) { + ubi_err("new PEB %d does not contain all 0xFF bytes", peb); + return err; + } + return peb; } #endif diff --git a/drivers/scsi/megaraid/megaraid_sas.h b/drivers/scsi/megaraid/megaraid_sas.h index 16b7a72..3b2365c 100644 --- a/drivers/scsi/megaraid/megaraid_sas.h +++ b/drivers/scsi/megaraid/megaraid_sas.h @@ -1276,7 +1276,7 @@ struct megasas_evt_detail { } __attribute__ ((packed)); struct megasas_aen_event { - struct work_struct hotplug_work; + struct delayed_work hotplug_work; struct megasas_instance *instance; }; diff --git a/drivers/scsi/megaraid/megaraid_sas_base.c b/drivers/scsi/megaraid/megaraid_sas_base.c index d2c5366..e4f2baa 100644 --- a/drivers/scsi/megaraid/megaraid_sas_base.c +++ b/drivers/scsi/megaraid/megaraid_sas_base.c @@ -2060,9 +2060,9 @@ megasas_service_aen(struct megasas_instance *instance, struct megasas_cmd *cmd) } else { ev->instance = instance; instance->ev = ev; - INIT_WORK(&ev->hotplug_work, megasas_aen_polling); - schedule_delayed_work( - (struct delayed_work *)&ev->hotplug_work, 0); + INIT_DELAYED_WORK(&ev->hotplug_work, + megasas_aen_polling); + schedule_delayed_work(&ev->hotplug_work, 0); } } } @@ -4352,8 +4352,7 @@ megasas_suspend(struct pci_dev *pdev, pm_message_t state) /* cancel the delayed work if this work still in queue */ if (instance->ev != NULL) { struct megasas_aen_event *ev = instance->ev; - cancel_delayed_work_sync( - (struct delayed_work *)&ev->hotplug_work); + cancel_delayed_work_sync(&ev->hotplug_work); instance->ev = NULL; } @@ -4545,8 +4544,7 @@ static void __devexit megasas_detach_one(struct pci_dev *pdev) /* cancel the delayed work if this work still in queue*/ if (instance->ev != NULL) { struct megasas_aen_event *ev = instance->ev; - cancel_delayed_work_sync( - (struct delayed_work *)&ev->hotplug_work); + cancel_delayed_work_sync(&ev->hotplug_work); instance->ev = NULL; } @@ -5190,7 +5188,7 @@ static void megasas_aen_polling(struct work_struct *work) { struct megasas_aen_event *ev = - container_of(work, struct megasas_aen_event, hotplug_work); + container_of(work, struct megasas_aen_event, hotplug_work.work); struct megasas_instance *instance = ev->instance; union megasas_evt_class_locale class_locale; struct Scsi_Host *host; diff --git a/fs/block_dev.c b/fs/block_dev.c index a1e09b4..ab3a456 100644 --- a/fs/block_dev.c +++ b/fs/block_dev.c @@ -1544,6 +1544,22 @@ ssize_t blkdev_aio_write(struct kiocb *iocb, const struct iovec *iov, } EXPORT_SYMBOL_GPL(blkdev_aio_write); +static ssize_t blkdev_aio_read(struct kiocb *iocb, const struct iovec *iov, + unsigned long nr_segs, loff_t pos) +{ + struct file *file = iocb->ki_filp; + struct inode *bd_inode = file->f_mapping->host; + loff_t size = i_size_read(bd_inode); + + if (pos >= size) + return 0; + + size -= pos; + if (size < INT_MAX) + nr_segs = iov_shorten((struct iovec *)iov, nr_segs, size); + return generic_file_aio_read(iocb, iov, nr_segs, pos); +} + /* * Try to release a page associated with block device when the system * is under memory pressure. @@ -1574,7 +1590,7 @@ const struct file_operations def_blk_fops = { .llseek = block_llseek, .read = do_sync_read, .write = do_sync_write, - .aio_read = generic_file_aio_read, + .aio_read = blkdev_aio_read, .aio_write = blkdev_aio_write, .mmap = generic_file_mmap, .fsync = blkdev_fsync, diff --git a/fs/buffer.c b/fs/buffer.c index 3586fb0..ec0aca8 100644 --- a/fs/buffer.c +++ b/fs/buffer.c @@ -2893,6 +2893,55 @@ static void end_bio_bh_io_sync(struct bio *bio, int err) bio_put(bio); } +/* + * This allows us to do IO even on the odd last sectors + * of a device, even if the bh block size is some multiple + * of the physical sector size. + * + * We'll just truncate the bio to the size of the device, + * and clear the end of the buffer head manually. + * + * Truly out-of-range accesses will turn into actual IO + * errors, this only handles the "we need to be able to + * do IO at the final sector" case. + */ +static void guard_bh_eod(int rw, struct bio *bio, struct buffer_head *bh) +{ + sector_t maxsector; + unsigned bytes; + + maxsector = i_size_read(bio->bi_bdev->bd_inode) >> 9; + if (!maxsector) + return; + + /* + * If the *whole* IO is past the end of the device, + * let it through, and the IO layer will turn it into + * an EIO. + */ + if (unlikely(bio->bi_sector >= maxsector)) + return; + + maxsector -= bio->bi_sector; + bytes = bio->bi_size; + if (likely((bytes >> 9) <= maxsector)) + return; + + /* Uhhuh. We've got a bh that straddles the device size! */ + bytes = maxsector << 9; + + /* Truncate the bio.. */ + bio->bi_size = bytes; + bio->bi_io_vec[0].bv_len = bytes; + + /* ..and clear the end of the buffer for reads */ + if ((rw & RW_MASK) == READ) { + void *kaddr = kmap_atomic(bh->b_page); + memset(kaddr + bh_offset(bh) + bytes, 0, bh->b_size - bytes); + kunmap_atomic(kaddr); + } +} + int submit_bh(int rw, struct buffer_head * bh) { struct bio *bio; @@ -2929,6 +2978,9 @@ int submit_bh(int rw, struct buffer_head * bh) bio->bi_end_io = end_bio_bh_io_sync; bio->bi_private = bh; + /* Take care of bh's that straddle the end of the device */ + guard_bh_eod(rw, bio, bh); + bio_get(bio); submit_bio(rw, bio); diff --git a/include/linux/gfp.h b/include/linux/gfp.h index 76e1aa2..d0a7967 100644 --- a/include/linux/gfp.h +++ b/include/linux/gfp.h @@ -30,9 +30,10 @@ struct vm_area_struct; #define ___GFP_HARDWALL 0x20000u #define ___GFP_THISNODE 0x40000u #define ___GFP_RECLAIMABLE 0x80000u -#define ___GFP_NOTRACK 0x100000u -#define ___GFP_OTHER_NODE 0x200000u -#define ___GFP_WRITE 0x400000u +#define ___GFP_NOTRACK 0x200000u +#define ___GFP_NO_KSWAPD 0x400000u +#define ___GFP_OTHER_NODE 0x800000u +#define ___GFP_WRITE 0x1000000u /* * GFP bitmasks.. @@ -85,6 +86,7 @@ struct vm_area_struct; #define __GFP_RECLAIMABLE ((__force gfp_t)___GFP_RECLAIMABLE) /* Page is reclaimable */ #define __GFP_NOTRACK ((__force gfp_t)___GFP_NOTRACK) /* Don't track with kmemcheck */ +#define __GFP_NO_KSWAPD ((__force gfp_t)___GFP_NO_KSWAPD) #define __GFP_OTHER_NODE ((__force gfp_t)___GFP_OTHER_NODE) /* On behalf of other node */ #define __GFP_WRITE ((__force gfp_t)___GFP_WRITE) /* Allocator intends to dirty page */ @@ -94,7 +96,7 @@ struct vm_area_struct; */ #define __GFP_NOTRACK_FALSE_POSITIVE (__GFP_NOTRACK) -#define __GFP_BITS_SHIFT 23 /* Room for N __GFP_FOO bits */ +#define __GFP_BITS_SHIFT 25 /* Room for N __GFP_FOO bits */ #define __GFP_BITS_MASK ((__force gfp_t)((1 << __GFP_BITS_SHIFT) - 1)) /* This equals 0, but use constants in case they ever change */ @@ -114,7 +116,8 @@ struct vm_area_struct; __GFP_MOVABLE) #define GFP_IOFS (__GFP_IO | __GFP_FS) #define GFP_TRANSHUGE (GFP_HIGHUSER_MOVABLE | __GFP_COMP | \ - __GFP_NOMEMALLOC | __GFP_NORETRY | __GFP_NOWARN) + __GFP_NOMEMALLOC | __GFP_NORETRY | __GFP_NOWARN | \ + __GFP_NO_KSWAPD) #ifdef CONFIG_NUMA #define GFP_THISNODE (__GFP_THISNODE | __GFP_NOWARN | __GFP_NORETRY) diff --git a/include/linux/kernel.h b/include/linux/kernel.h index a123b13..7d8dfc7 100644 --- a/include/linux/kernel.h +++ b/include/linux/kernel.h @@ -701,6 +701,13 @@ static inline void ftrace_dump(enum ftrace_dump_mode oops_dump_mode) { } #define COMPACTION_BUILD 0 #endif +/* This helps us to avoid #ifdef CONFIG_SYMBOL_PREFIX */ +#ifdef CONFIG_SYMBOL_PREFIX +#define SYMBOL_PREFIX CONFIG_SYMBOL_PREFIX +#else +#define SYMBOL_PREFIX "" +#endif + /* Rebuild everything on CONFIG_FTRACE_MCOUNT_RECORD */ #ifdef CONFIG_FTRACE_MCOUNT_RECORD # define REBUILD_DUE_TO_FTRACE_MCOUNT_RECORD diff --git a/include/linux/mempolicy.h b/include/linux/mempolicy.h index e5ccb9d..dbd2127 100644 --- a/include/linux/mempolicy.h +++ b/include/linux/mempolicy.h @@ -82,16 +82,6 @@ static inline void mpol_cond_put(struct mempolicy *pol) __mpol_put(pol); } -extern struct mempolicy *__mpol_cond_copy(struct mempolicy *tompol, - struct mempolicy *frompol); -static inline struct mempolicy *mpol_cond_copy(struct mempolicy *tompol, - struct mempolicy *frompol) -{ - if (!frompol) - return frompol; - return __mpol_cond_copy(tompol, frompol); -} - extern struct mempolicy *__mpol_dup(struct mempolicy *pol); static inline struct mempolicy *mpol_dup(struct mempolicy *pol) { @@ -215,12 +205,6 @@ static inline void mpol_cond_put(struct mempolicy *pol) { } -static inline struct mempolicy *mpol_cond_copy(struct mempolicy *to, - struct mempolicy *from) -{ - return from; -} - static inline void mpol_get(struct mempolicy *pol) { } diff --git a/include/linux/netdevice.h b/include/linux/netdevice.h index f8eda02..a848ffc 100644 --- a/include/linux/netdevice.h +++ b/include/linux/netdevice.h @@ -1488,6 +1488,9 @@ struct napi_gro_cb { /* Used in ipv6_gro_receive() */ int proto; + + /* used in skb_gro_receive() slow path */ + struct sk_buff *last; }; #define NAPI_GRO_CB(skb) ((struct napi_gro_cb *)(skb)->cb) diff --git a/include/net/tcp.h b/include/net/tcp.h index 6feeccd..4af45e3 100644 --- a/include/net/tcp.h +++ b/include/net/tcp.h @@ -525,6 +525,7 @@ static inline __u32 cookie_v6_init_sequence(struct sock *sk, extern void __tcp_push_pending_frames(struct sock *sk, unsigned int cur_mss, int nonagle); extern bool tcp_may_send_now(struct sock *sk); +extern int __tcp_retransmit_skb(struct sock *, struct sk_buff *); extern int tcp_retransmit_skb(struct sock *, struct sk_buff *); extern void tcp_retransmit_timer(struct sock *sk); extern void tcp_xmit_retransmit_queue(struct sock *); diff --git a/include/trace/events/gfpflags.h b/include/trace/events/gfpflags.h index 9391706..d6fd8e5 100644 --- a/include/trace/events/gfpflags.h +++ b/include/trace/events/gfpflags.h @@ -36,6 +36,7 @@ {(unsigned long)__GFP_RECLAIMABLE, "GFP_RECLAIMABLE"}, \ {(unsigned long)__GFP_MOVABLE, "GFP_MOVABLE"}, \ {(unsigned long)__GFP_NOTRACK, "GFP_NOTRACK"}, \ + {(unsigned long)__GFP_NO_KSWAPD, "GFP_NO_KSWAPD"}, \ {(unsigned long)__GFP_OTHER_NODE, "GFP_OTHER_NODE"} \ ) : "GFP_NOWAIT" diff --git a/kernel/modsign_pubkey.c b/kernel/modsign_pubkey.c index 4646eb2..767e559 100644 --- a/kernel/modsign_pubkey.c +++ b/kernel/modsign_pubkey.c @@ -21,10 +21,10 @@ struct key *modsign_keyring; extern __initdata const u8 modsign_certificate_list[]; extern __initdata const u8 modsign_certificate_list_end[]; asm(".section .init.data,\"aw\"\n" - "modsign_certificate_list:\n" + SYMBOL_PREFIX "modsign_certificate_list:\n" ".incbin \"signing_key.x509\"\n" ".incbin \"extra_certificates\"\n" - "modsign_certificate_list_end:" + SYMBOL_PREFIX "modsign_certificate_list_end:" ); /* diff --git a/kernel/module_signing.c b/kernel/module_signing.c index ea1b1df..f2970bd 100644 --- a/kernel/module_signing.c +++ b/kernel/module_signing.c @@ -27,13 +27,13 @@ * - Information block */ struct module_signature { - enum pkey_algo algo : 8; /* Public-key crypto algorithm */ - enum pkey_hash_algo hash : 8; /* Digest algorithm */ - enum pkey_id_type id_type : 8; /* Key identifier type */ - u8 signer_len; /* Length of signer's name */ - u8 key_id_len; /* Length of key identifier */ - u8 __pad[3]; - __be32 sig_len; /* Length of signature data */ + u8 algo; /* Public-key crypto algorithm [enum pkey_algo] */ + u8 hash; /* Digest algorithm [enum pkey_hash_algo] */ + u8 id_type; /* Key identifier type [enum pkey_id_type] */ + u8 signer_len; /* Length of signer's name */ + u8 key_id_len; /* Length of key identifier */ + u8 __pad[3]; + __be32 sig_len; /* Length of signature data */ }; /* diff --git a/kernel/watchdog.c b/kernel/watchdog.c index dd4b80a..c8c21be 100644 --- a/kernel/watchdog.c +++ b/kernel/watchdog.c @@ -368,6 +368,9 @@ static void watchdog_disable(unsigned int cpu) { struct hrtimer *hrtimer = &__raw_get_cpu_var(watchdog_hrtimer); + if (!watchdog_enabled) + return; + watchdog_set_prio(SCHED_NORMAL, 0); hrtimer_cancel(hrtimer); /* disable the perf event */ diff --git a/kernel/workqueue.c b/kernel/workqueue.c index 084aa47..1dae900 100644 --- a/kernel/workqueue.c +++ b/kernel/workqueue.c @@ -1361,8 +1361,8 @@ static void __queue_delayed_work(int cpu, struct workqueue_struct *wq, WARN_ON_ONCE(timer->function != delayed_work_timer_fn || timer->data != (unsigned long)dwork); - BUG_ON(timer_pending(timer)); - BUG_ON(!list_empty(&work->entry)); + WARN_ON_ONCE(timer_pending(timer)); + WARN_ON_ONCE(!list_empty(&work->entry)); /* * If @delay is 0, queue @dwork->work immediately. This is for diff --git a/lib/Makefile b/lib/Makefile index 821a162..a08b791 100644 --- a/lib/Makefile +++ b/lib/Makefile @@ -163,7 +163,7 @@ $(obj)/crc32table.h: $(obj)/gen_crc32table # obj-$(CONFIG_OID_REGISTRY) += oid_registry.o -$(obj)/oid_registry.c: $(obj)/oid_registry_data.c +$(obj)/oid_registry.o: $(obj)/oid_registry_data.c $(obj)/oid_registry_data.c: $(srctree)/include/linux/oid_registry.h \ $(src)/build_OID_registry diff --git a/lib/asn1_decoder.c b/lib/asn1_decoder.c index de2c8b5..5293d24 100644 --- a/lib/asn1_decoder.c +++ b/lib/asn1_decoder.c @@ -91,7 +91,7 @@ next_tag: /* Extract the length */ len = data[dp++]; - if (len < 0x7f) { + if (len <= 0x7f) { dp += len; goto next_tag; } diff --git a/mm/compaction.c b/mm/compaction.c index 9eef558..694eaab 100644 --- a/mm/compaction.c +++ b/mm/compaction.c @@ -713,7 +713,15 @@ static void isolate_freepages(struct zone *zone, /* Found a block suitable for isolating free pages from */ isolated = 0; - end_pfn = min(pfn + pageblock_nr_pages, zone_end_pfn); + + /* + * As pfn may not start aligned, pfn+pageblock_nr_page + * may cross a MAX_ORDER_NR_PAGES boundary and miss + * a pfn_valid check. Ensure isolate_freepages_block() + * only scans within a pageblock + */ + end_pfn = ALIGN(pfn + 1, pageblock_nr_pages); + end_pfn = min(end_pfn, zone_end_pfn); isolated = isolate_freepages_block(cc, pfn, end_pfn, freelist, false); nr_freepages += isolated; diff --git a/mm/mempolicy.c b/mm/mempolicy.c index d04a8a5..4ea600d 100644 --- a/mm/mempolicy.c +++ b/mm/mempolicy.c @@ -2037,28 +2037,6 @@ struct mempolicy *__mpol_dup(struct mempolicy *old) return new; } -/* - * If *frompol needs [has] an extra ref, copy *frompol to *tompol , - * eliminate the * MPOL_F_* flags that require conditional ref and - * [NOTE!!!] drop the extra ref. Not safe to reference *frompol directly - * after return. Use the returned value. - * - * Allows use of a mempolicy for, e.g., multiple allocations with a single - * policy lookup, even if the policy needs/has extra ref on lookup. - * shmem_readahead needs this. - */ -struct mempolicy *__mpol_cond_copy(struct mempolicy *tompol, - struct mempolicy *frompol) -{ - if (!mpol_needs_cond_ref(frompol)) - return frompol; - - *tompol = *frompol; - tompol->flags &= ~MPOL_F_SHARED; /* copy doesn't need unref */ - __mpol_put(frompol); - return tompol; -} - /* Slow path of a mempolicy comparison */ bool __mpol_equal(struct mempolicy *a, struct mempolicy *b) { diff --git a/mm/page_alloc.c b/mm/page_alloc.c index a8f2c87..7e208f0 100644 --- a/mm/page_alloc.c +++ b/mm/page_alloc.c @@ -2378,15 +2378,6 @@ bool gfp_pfmemalloc_allowed(gfp_t gfp_mask) return !!(gfp_to_alloc_flags(gfp_mask) & ALLOC_NO_WATERMARKS); } -/* Returns true if the allocation is likely for THP */ -static bool is_thp_alloc(gfp_t gfp_mask, unsigned int order) -{ - if (order == pageblock_order && - (gfp_mask & (__GFP_MOVABLE|__GFP_REPEAT)) == __GFP_MOVABLE) - return true; - return false; -} - static inline struct page * __alloc_pages_slowpath(gfp_t gfp_mask, unsigned int order, struct zonelist *zonelist, enum zone_type high_zoneidx, @@ -2425,10 +2416,9 @@ __alloc_pages_slowpath(gfp_t gfp_mask, unsigned int order, goto nopage; restart: - /* The decision whether to wake kswapd for THP is made later */ - if (!is_thp_alloc(gfp_mask, order)) + if (!(gfp_mask & __GFP_NO_KSWAPD)) wake_all_kswapd(order, zonelist, high_zoneidx, - zone_idx(preferred_zone)); + zone_idx(preferred_zone)); /* * OK, we're below the kswapd watermark and have kicked background @@ -2498,21 +2488,15 @@ rebalance: goto got_pg; sync_migration = true; - if (is_thp_alloc(gfp_mask, order)) { - /* - * If compaction is deferred for high-order allocations, it is - * because sync compaction recently failed. If this is the case - * and the caller requested a movable allocation that does not - * heavily disrupt the system then fail the allocation instead - * of entering direct reclaim. - */ - if (deferred_compaction || contended_compaction) - goto nopage; - - /* If process is willing to reclaim/compact then wake kswapd */ - wake_all_kswapd(order, zonelist, high_zoneidx, - zone_idx(preferred_zone)); - } + /* + * If compaction is deferred for high-order allocations, it is because + * sync compaction recently failed. In this is the case and the caller + * requested a movable allocation that does not heavily disrupt the + * system then fail the allocation instead of entering direct reclaim. + */ + if ((deferred_compaction || contended_compaction) && + (gfp_mask & __GFP_NO_KSWAPD)) + goto nopage; /* Try direct reclaim and then allocating */ page = __alloc_pages_direct_reclaim(gfp_mask, order, diff --git a/mm/shmem.c b/mm/shmem.c index 89341b6..50c5b8f 100644 --- a/mm/shmem.c +++ b/mm/shmem.c @@ -910,25 +910,29 @@ static struct mempolicy *shmem_get_sbmpol(struct shmem_sb_info *sbinfo) static struct page *shmem_swapin(swp_entry_t swap, gfp_t gfp, struct shmem_inode_info *info, pgoff_t index) { - struct mempolicy mpol, *spol; struct vm_area_struct pvma; - - spol = mpol_cond_copy(&mpol, - mpol_shared_policy_lookup(&info->policy, index)); + struct page *page; /* Create a pseudo vma that just contains the policy */ pvma.vm_start = 0; /* Bias interleave by inode number to distribute better across nodes */ pvma.vm_pgoff = index + info->vfs_inode.i_ino; pvma.vm_ops = NULL; - pvma.vm_policy = spol; - return swapin_readahead(swap, gfp, &pvma, 0); + pvma.vm_policy = mpol_shared_policy_lookup(&info->policy, index); + + page = swapin_readahead(swap, gfp, &pvma, 0); + + /* Drop reference taken by mpol_shared_policy_lookup() */ + mpol_cond_put(pvma.vm_policy); + + return page; } static struct page *shmem_alloc_page(gfp_t gfp, struct shmem_inode_info *info, pgoff_t index) { struct vm_area_struct pvma; + struct page *page; /* Create a pseudo vma that just contains the policy */ pvma.vm_start = 0; @@ -937,10 +941,12 @@ static struct page *shmem_alloc_page(gfp_t gfp, pvma.vm_ops = NULL; pvma.vm_policy = mpol_shared_policy_lookup(&info->policy, index); - /* - * alloc_page_vma() will drop the shared policy reference - */ - return alloc_page_vma(gfp, &pvma, 0); + page = alloc_page_vma(gfp, &pvma, 0); + + /* Drop reference taken by mpol_shared_policy_lookup() */ + mpol_cond_put(pvma.vm_policy); + + return page; } #else /* !CONFIG_NUMA */ #ifdef CONFIG_TMPFS diff --git a/mm/vmscan.c b/mm/vmscan.c index 83f4d0e..b7ed376 100644 --- a/mm/vmscan.c +++ b/mm/vmscan.c @@ -2823,29 +2823,10 @@ out: if (!populated_zone(zone)) continue; - if (zone->all_unreclaimable && - sc.priority != DEF_PRIORITY) - continue; - - /* Would compaction fail due to lack of free memory? */ - if (COMPACTION_BUILD && - compaction_suitable(zone, order) == COMPACT_SKIPPED) - goto loop_again; - - /* Confirm the zone is balanced for order-0 */ - if (!zone_watermark_ok(zone, 0, - high_wmark_pages(zone), 0, 0)) { - order = sc.order = 0; - goto loop_again; - } - /* Check if the memory needs to be defragmented. */ if (zone_watermark_ok(zone, order, low_wmark_pages(zone), *classzone_idx, 0)) zones_need_compaction = 0; - - /* If balanced, clear the congested flag */ - zone_clear_flag(zone, ZONE_CONGESTED); } if (zones_need_compaction) diff --git a/net/core/dev.c b/net/core/dev.c index c0946cb..e5942bf 100644 --- a/net/core/dev.c +++ b/net/core/dev.c @@ -3451,6 +3451,8 @@ static int napi_gro_complete(struct sk_buff *skb) struct list_head *head = &ptype_base[ntohs(type) & PTYPE_HASH_MASK]; int err = -ENOENT; + BUILD_BUG_ON(sizeof(struct napi_gro_cb) > sizeof(skb->cb)); + if (NAPI_GRO_CB(skb)->count == 1) { skb_shinfo(skb)->gso_size = 0; goto out; diff --git a/net/core/skbuff.c b/net/core/skbuff.c index 4007c14..3f0636c 100644 --- a/net/core/skbuff.c +++ b/net/core/skbuff.c @@ -3004,7 +3004,7 @@ int skb_gro_receive(struct sk_buff **head, struct sk_buff *skb) skb_shinfo(nskb)->gso_size = pinfo->gso_size; pinfo->gso_size = 0; skb_header_release(p); - nskb->prev = p; + NAPI_GRO_CB(nskb)->last = p; nskb->data_len += p->len; nskb->truesize += p->truesize; @@ -3030,8 +3030,8 @@ merge: __skb_pull(skb, offset); - p->prev->next = skb; - p->prev = skb; + NAPI_GRO_CB(p)->last->next = skb; + NAPI_GRO_CB(p)->last = skb; skb_header_release(skb); done: diff --git a/net/ipv4/inet_diag.c b/net/ipv4/inet_diag.c index 0c34bfa..e23e16d 100644 --- a/net/ipv4/inet_diag.c +++ b/net/ipv4/inet_diag.c @@ -44,6 +44,10 @@ struct inet_diag_entry { u16 dport; u16 family; u16 userlocks; +#if IS_ENABLED(CONFIG_IPV6) + struct in6_addr saddr_storage; /* for IPv4-mapped-IPv6 addresses */ + struct in6_addr daddr_storage; /* for IPv4-mapped-IPv6 addresses */ +#endif }; static DEFINE_MUTEX(inet_diag_table_mutex); @@ -428,25 +432,31 @@ static int inet_diag_bc_run(const struct nlattr *_bc, break; } - if (cond->prefix_len == 0) - break; - if (op->code == INET_DIAG_BC_S_COND) addr = entry->saddr; else addr = entry->daddr; + if (cond->family != AF_UNSPEC && + cond->family != entry->family) { + if (entry->family == AF_INET6 && + cond->family == AF_INET) { + if (addr[0] == 0 && addr[1] == 0 && + addr[2] == htonl(0xffff) && + bitstring_match(addr + 3, + cond->addr, + cond->prefix_len)) + break; + } + yes = 0; + break; + } + + if (cond->prefix_len == 0) + break; if (bitstring_match(addr, cond->addr, cond->prefix_len)) break; - if (entry->family == AF_INET6 && - cond->family == AF_INET) { - if (addr[0] == 0 && addr[1] == 0 && - addr[2] == htonl(0xffff) && - bitstring_match(addr + 3, cond->addr, - cond->prefix_len)) - break; - } yes = 0; break; } @@ -509,6 +519,55 @@ static int valid_cc(const void *bc, int len, int cc) return 0; } +/* Validate an inet_diag_hostcond. */ +static bool valid_hostcond(const struct inet_diag_bc_op *op, int len, + int *min_len) +{ + int addr_len; + struct inet_diag_hostcond *cond; + + /* Check hostcond space. */ + *min_len += sizeof(struct inet_diag_hostcond); + if (len < *min_len) + return false; + cond = (struct inet_diag_hostcond *)(op + 1); + + /* Check address family and address length. */ + switch (cond->family) { + case AF_UNSPEC: + addr_len = 0; + break; + case AF_INET: + addr_len = sizeof(struct in_addr); + break; + case AF_INET6: + addr_len = sizeof(struct in6_addr); + break; + default: + return false; + } + *min_len += addr_len; + if (len < *min_len) + return false; + + /* Check prefix length (in bits) vs address length (in bytes). */ + if (cond->prefix_len > 8 * addr_len) + return false; + + return true; +} + +/* Validate a port comparison operator. */ +static inline bool valid_port_comparison(const struct inet_diag_bc_op *op, + int len, int *min_len) +{ + /* Port comparisons put the port in a follow-on inet_diag_bc_op. */ + *min_len += sizeof(struct inet_diag_bc_op); + if (len < *min_len) + return false; + return true; +} + static int inet_diag_bc_audit(const void *bytecode, int bytecode_len) { const void *bc = bytecode; @@ -516,29 +575,39 @@ static int inet_diag_bc_audit(const void *bytecode, int bytecode_len) while (len > 0) { const struct inet_diag_bc_op *op = bc; + int min_len = sizeof(struct inet_diag_bc_op); //printk("BC: %d %d %d {%d} / %d\n", op->code, op->yes, op->no, op[1].no, len); switch (op->code) { - case INET_DIAG_BC_AUTO: case INET_DIAG_BC_S_COND: case INET_DIAG_BC_D_COND: + if (!valid_hostcond(bc, len, &min_len)) + return -EINVAL; + break; case INET_DIAG_BC_S_GE: case INET_DIAG_BC_S_LE: case INET_DIAG_BC_D_GE: case INET_DIAG_BC_D_LE: - case INET_DIAG_BC_JMP: - if (op->no < 4 || op->no > len + 4 || op->no & 3) - return -EINVAL; - if (op->no < len && - !valid_cc(bytecode, bytecode_len, len - op->no)) + if (!valid_port_comparison(bc, len, &min_len)) return -EINVAL; break; + case INET_DIAG_BC_AUTO: + case INET_DIAG_BC_JMP: case INET_DIAG_BC_NOP: break; default: return -EINVAL; } - if (op->yes < 4 || op->yes > len + 4 || op->yes & 3) + + if (op->code != INET_DIAG_BC_NOP) { + if (op->no < min_len || op->no > len + 4 || op->no & 3) + return -EINVAL; + if (op->no < len && + !valid_cc(bytecode, bytecode_len, len - op->no)) + return -EINVAL; + } + + if (op->yes < min_len || op->yes > len + 4 || op->yes & 3) return -EINVAL; bc += op->yes; len -= op->yes; @@ -596,6 +665,36 @@ static int inet_twsk_diag_dump(struct inet_timewait_sock *tw, cb->nlh->nlmsg_seq, NLM_F_MULTI, cb->nlh); } +/* Get the IPv4, IPv6, or IPv4-mapped-IPv6 local and remote addresses + * from a request_sock. For IPv4-mapped-IPv6 we must map IPv4 to IPv6. + */ +static inline void inet_diag_req_addrs(const struct sock *sk, + const struct request_sock *req, + struct inet_diag_entry *entry) +{ + struct inet_request_sock *ireq = inet_rsk(req); + +#if IS_ENABLED(CONFIG_IPV6) + if (sk->sk_family == AF_INET6) { + if (req->rsk_ops->family == AF_INET6) { + entry->saddr = inet6_rsk(req)->loc_addr.s6_addr32; + entry->daddr = inet6_rsk(req)->rmt_addr.s6_addr32; + } else if (req->rsk_ops->family == AF_INET) { + ipv6_addr_set_v4mapped(ireq->loc_addr, + &entry->saddr_storage); + ipv6_addr_set_v4mapped(ireq->rmt_addr, + &entry->daddr_storage); + entry->saddr = entry->saddr_storage.s6_addr32; + entry->daddr = entry->daddr_storage.s6_addr32; + } + } else +#endif + { + entry->saddr = &ireq->loc_addr; + entry->daddr = &ireq->rmt_addr; + } +} + static int inet_diag_fill_req(struct sk_buff *skb, struct sock *sk, struct request_sock *req, struct user_namespace *user_ns, @@ -637,8 +736,10 @@ static int inet_diag_fill_req(struct sk_buff *skb, struct sock *sk, r->idiag_inode = 0; #if IS_ENABLED(CONFIG_IPV6) if (r->idiag_family == AF_INET6) { - *(struct in6_addr *)r->id.idiag_src = inet6_rsk(req)->loc_addr; - *(struct in6_addr *)r->id.idiag_dst = inet6_rsk(req)->rmt_addr; + struct inet_diag_entry entry; + inet_diag_req_addrs(sk, req, &entry); + memcpy(r->id.idiag_src, entry.saddr, sizeof(struct in6_addr)); + memcpy(r->id.idiag_dst, entry.daddr, sizeof(struct in6_addr)); } #endif @@ -691,18 +792,7 @@ static int inet_diag_dump_reqs(struct sk_buff *skb, struct sock *sk, continue; if (bc) { - entry.saddr = -#if IS_ENABLED(CONFIG_IPV6) - (entry.family == AF_INET6) ? - inet6_rsk(req)->loc_addr.s6_addr32 : -#endif - &ireq->loc_addr; - entry.daddr = -#if IS_ENABLED(CONFIG_IPV6) - (entry.family == AF_INET6) ? - inet6_rsk(req)->rmt_addr.s6_addr32 : -#endif - &ireq->rmt_addr; + inet_diag_req_addrs(sk, req, &entry); entry.dport = ntohs(ireq->rmt_port); if (!inet_diag_bc_run(bc, &entry)) diff --git a/net/ipv4/ip_fragment.c b/net/ipv4/ip_fragment.c index 448e685..8d5cc75 100644 --- a/net/ipv4/ip_fragment.c +++ b/net/ipv4/ip_fragment.c @@ -707,28 +707,27 @@ EXPORT_SYMBOL(ip_defrag); struct sk_buff *ip_check_defrag(struct sk_buff *skb, u32 user) { - const struct iphdr *iph; + struct iphdr iph; u32 len; if (skb->protocol != htons(ETH_P_IP)) return skb; - if (!pskb_may_pull(skb, sizeof(struct iphdr))) + if (!skb_copy_bits(skb, 0, &iph, sizeof(iph))) return skb; - iph = ip_hdr(skb); - if (iph->ihl < 5 || iph->version != 4) + if (iph.ihl < 5 || iph.version != 4) return skb; - if (!pskb_may_pull(skb, iph->ihl*4)) - return skb; - iph = ip_hdr(skb); - len = ntohs(iph->tot_len); - if (skb->len < len || len < (iph->ihl * 4)) + + len = ntohs(iph.tot_len); + if (skb->len < len || len < (iph.ihl * 4)) return skb; - if (ip_is_fragment(ip_hdr(skb))) { + if (ip_is_fragment(&iph)) { skb = skb_share_check(skb, GFP_ATOMIC); if (skb) { + if (!pskb_may_pull(skb, iph.ihl*4)) + return skb; if (pskb_trim_rcsum(skb, len)) return skb; memset(IPCB(skb), 0, sizeof(struct inet_skb_parm)); diff --git a/net/ipv4/tcp_input.c b/net/ipv4/tcp_input.c index 609ff98..181fc82 100644 --- a/net/ipv4/tcp_input.c +++ b/net/ipv4/tcp_input.c @@ -5645,7 +5645,11 @@ static bool tcp_rcv_fastopen_synack(struct sock *sk, struct sk_buff *synack, tcp_fastopen_cache_set(sk, mss, cookie, syn_drop); if (data) { /* Retransmit unacked data in SYN */ - tcp_retransmit_skb(sk, data); + tcp_for_write_queue_from(data, sk) { + if (data == tcp_send_head(sk) || + __tcp_retransmit_skb(sk, data)) + break; + } tcp_rearm_rto(sk); return true; } diff --git a/net/ipv4/tcp_output.c b/net/ipv4/tcp_output.c index 2798706..948ac27 100644 --- a/net/ipv4/tcp_output.c +++ b/net/ipv4/tcp_output.c @@ -2309,12 +2309,11 @@ static void tcp_retrans_try_collapse(struct sock *sk, struct sk_buff *to, * state updates are done by the caller. Returns non-zero if an * error occurred which prevented the send. */ -int tcp_retransmit_skb(struct sock *sk, struct sk_buff *skb) +int __tcp_retransmit_skb(struct sock *sk, struct sk_buff *skb) { struct tcp_sock *tp = tcp_sk(sk); struct inet_connection_sock *icsk = inet_csk(sk); unsigned int cur_mss; - int err; /* Inconslusive MTU probe */ if (icsk->icsk_mtup.probe_size) { @@ -2387,11 +2386,17 @@ int tcp_retransmit_skb(struct sock *sk, struct sk_buff *skb) if (unlikely(NET_IP_ALIGN && ((unsigned long)skb->data & 3))) { struct sk_buff *nskb = __pskb_copy(skb, MAX_TCP_HEADER, GFP_ATOMIC); - err = nskb ? tcp_transmit_skb(sk, nskb, 0, GFP_ATOMIC) : - -ENOBUFS; + return nskb ? tcp_transmit_skb(sk, nskb, 0, GFP_ATOMIC) : + -ENOBUFS; } else { - err = tcp_transmit_skb(sk, skb, 1, GFP_ATOMIC); + return tcp_transmit_skb(sk, skb, 1, GFP_ATOMIC); } +} + +int tcp_retransmit_skb(struct sock *sk, struct sk_buff *skb) +{ + struct tcp_sock *tp = tcp_sk(sk); + int err = __tcp_retransmit_skb(sk, skb); if (err == 0) { /* Update global TCP statistics. */