diff options
321 files changed, 4409 insertions, 1815 deletions
diff --git a/Documentation/devicetree/bindings/mtd/jedec,spi-nor.txt b/Documentation/devicetree/bindings/mtd/jedec,spi-nor.txt index 376fa2f50e6b..956bb046e599 100644 --- a/Documentation/devicetree/bindings/mtd/jedec,spi-nor.txt +++ b/Documentation/devicetree/bindings/mtd/jedec,spi-nor.txt @@ -13,7 +13,6 @@ Required properties: at25df321a at25df641 at26df081a - en25s64 mr25h128 mr25h256 mr25h10 @@ -33,7 +32,6 @@ Required properties: s25fl008k s25fl064k sst25vf040b - sst25wf040b m25p40 m25p80 m25p16 diff --git a/Documentation/devicetree/bindings/sound/da7218.txt b/Documentation/devicetree/bindings/sound/da7218.txt index 5ca5a709b6aa..3ab9dfef38d1 100644 --- a/Documentation/devicetree/bindings/sound/da7218.txt +++ b/Documentation/devicetree/bindings/sound/da7218.txt @@ -73,7 +73,7 @@ Example: compatible = "dlg,da7218"; reg = <0x1a>; interrupt-parent = <&gpio6>; - interrupts = <11 IRQ_TYPE_LEVEL_HIGH>; + interrupts = <11 IRQ_TYPE_LEVEL_LOW>; wakeup-source; VDD-supply = <®_audio>; diff --git a/Documentation/devicetree/bindings/sound/da7219.txt b/Documentation/devicetree/bindings/sound/da7219.txt index cf61681826b6..5b54d2d045c3 100644 --- a/Documentation/devicetree/bindings/sound/da7219.txt +++ b/Documentation/devicetree/bindings/sound/da7219.txt @@ -77,7 +77,7 @@ Example: reg = <0x1a>; interrupt-parent = <&gpio6>; - interrupts = <11 IRQ_TYPE_LEVEL_HIGH>; + interrupts = <11 IRQ_TYPE_LEVEL_LOW>; VDD-supply = <®_audio>; VDDMIC-supply = <®_audio>; diff --git a/Documentation/devicetree/bindings/spi/fsl-imx-cspi.txt b/Documentation/devicetree/bindings/spi/fsl-imx-cspi.txt index 5bf13960f7f4..e3c48b20b1a6 100644 --- a/Documentation/devicetree/bindings/spi/fsl-imx-cspi.txt +++ b/Documentation/devicetree/bindings/spi/fsl-imx-cspi.txt @@ -12,24 +12,30 @@ Required properties: - "fsl,imx53-ecspi" for SPI compatible with the one integrated on i.MX53 and later Soc - reg : Offset and length of the register set for the device - interrupts : Should contain CSPI/eCSPI interrupt -- cs-gpios : Specifies the gpio pins to be used for chipselects. - clocks : Clock specifiers for both ipg and per clocks. - clock-names : Clock names should include both "ipg" and "per" See the clock consumer binding, Documentation/devicetree/bindings/clock/clock-bindings.txt -- dmas: DMA specifiers for tx and rx dma. See the DMA client binding, - Documentation/devicetree/bindings/dma/dma.txt -- dma-names: DMA request names should include "tx" and "rx" if present. -Obsolete properties: -- fsl,spi-num-chipselects : Contains the number of the chipselect +Recommended properties: +- cs-gpios : GPIOs to use as chip selects, see spi-bus.txt. While the native chip +select lines can be used, they appear to always generate a pulse between each +word of a transfer. Most use cases will require GPIO based chip selects to +generate a valid transaction. Optional properties: +- num-cs : Number of total chip selects, see spi-bus.txt. +- dmas: DMA specifiers for tx and rx dma. See the DMA client binding, +Documentation/devicetree/bindings/dma/dma.txt. +- dma-names: DMA request names, if present, should include "tx" and "rx". - fsl,spi-rdy-drctl: Integer, representing the value of DRCTL, the register controlling the SPI_READY handling. Note that to enable the DRCTL consideration, the SPI_READY mode-flag needs to be set too. Valid values are: 0 (disabled), 1 (edge-triggered burst) and 2 (level-triggered burst). +Obsolete properties: +- fsl,spi-num-chipselects : Contains the number of the chipselect + Example: ecspi@70010000 { diff --git a/Documentation/x86/x86_64/mm.txt b/Documentation/x86/x86_64/mm.txt index 3448e675b462..51101708a03a 100644 --- a/Documentation/x86/x86_64/mm.txt +++ b/Documentation/x86/x86_64/mm.txt @@ -1,6 +1,4 @@ -<previous description obsolete, deleted> - Virtual memory map with 4 level page tables: 0000000000000000 - 00007fffffffffff (=47 bits) user space, different per mm @@ -14,13 +12,15 @@ ffffea0000000000 - ffffeaffffffffff (=40 bits) virtual memory map (1TB) ... unused hole ... ffffec0000000000 - fffffbffffffffff (=44 bits) kasan shadow memory (16TB) ... unused hole ... +fffffe8000000000 - fffffeffffffffff (=39 bits) cpu_entry_area mapping ffffff0000000000 - ffffff7fffffffff (=39 bits) %esp fixup stacks ... unused hole ... ffffffef00000000 - fffffffeffffffff (=64 GB) EFI region mapping space ... unused hole ... ffffffff80000000 - ffffffff9fffffff (=512 MB) kernel text mapping, from phys 0 -ffffffffa0000000 - ffffffffff5fffff (=1526 MB) module mapping space (variable) -ffffffffff600000 - ffffffffffdfffff (=8 MB) vsyscalls +ffffffffa0000000 - [fixmap start] (~1526 MB) module mapping space (variable) +[fixmap start] - ffffffffff5fffff kernel-internal fixmap range +ffffffffff600000 - ffffffffff600fff (=4 kB) legacy vsyscall ABI ffffffffffe00000 - ffffffffffffffff (=2 MB) unused hole Virtual memory map with 5 level page tables: @@ -36,19 +36,22 @@ ffd4000000000000 - ffd5ffffffffffff (=49 bits) virtual memory map (512TB) ... unused hole ... ffdf000000000000 - fffffc0000000000 (=53 bits) kasan shadow memory (8PB) ... unused hole ... +fffffe8000000000 - fffffeffffffffff (=39 bits) cpu_entry_area mapping ffffff0000000000 - ffffff7fffffffff (=39 bits) %esp fixup stacks ... unused hole ... ffffffef00000000 - fffffffeffffffff (=64 GB) EFI region mapping space ... unused hole ... ffffffff80000000 - ffffffff9fffffff (=512 MB) kernel text mapping, from phys 0 -ffffffffa0000000 - ffffffffff5fffff (=1526 MB) module mapping space -ffffffffff600000 - ffffffffffdfffff (=8 MB) vsyscalls +ffffffffa0000000 - [fixmap start] (~1526 MB) module mapping space +[fixmap start] - ffffffffff5fffff kernel-internal fixmap range +ffffffffff600000 - ffffffffff600fff (=4 kB) legacy vsyscall ABI ffffffffffe00000 - ffffffffffffffff (=2 MB) unused hole Architecture defines a 64-bit virtual address. Implementations can support less. Currently supported are 48- and 57-bit virtual addresses. Bits 63 -through to the most-significant implemented bit are set to either all ones -or all zero. This causes hole between user space and kernel addresses. +through to the most-significant implemented bit are sign extended. +This causes hole between user space and kernel addresses if you interpret them +as unsigned. The direct mapping covers all memory in the system up to the highest memory address (this means in some cases it can also include PCI memory @@ -58,9 +61,6 @@ vmalloc space is lazily synchronized into the different PML4/PML5 pages of the processes using the page fault handler, with init_top_pgt as reference. -Current X86-64 implementations support up to 46 bits of address space (64 TB), -which is our current limit. This expands into MBZ space in the page tables. - We map EFI runtime services in the 'efi_pgd' PGD in a 64Gb large virtual memory window (this size is arbitrary, it can be raised later if needed). The mappings are not part of any other kernel PGD and are only available @@ -72,5 +72,3 @@ following fixmap section. Note that if CONFIG_RANDOMIZE_MEMORY is enabled, the direct mapping of all physical memory, vmalloc/ioremap space and virtual memory map are randomized. Their order is preserved but their base will be offset early at boot time. - --Andi Kleen, Jul 2004 @@ -2,7 +2,7 @@ VERSION = 4 PATCHLEVEL = 15 SUBLEVEL = 0 -EXTRAVERSION = -rc4 +EXTRAVERSION = -rc5 NAME = Fearless Coyote # *DOCUMENTATION* diff --git a/arch/arm/lib/csumpartialcopyuser.S b/arch/arm/lib/csumpartialcopyuser.S index 1712f132b80d..b83fdc06286a 100644 --- a/arch/arm/lib/csumpartialcopyuser.S +++ b/arch/arm/lib/csumpartialcopyuser.S @@ -85,7 +85,11 @@ .pushsection .text.fixup,"ax" .align 4 9001: mov r4, #-EFAULT +#ifdef CONFIG_CPU_SW_DOMAIN_PAN + ldr r5, [sp, #9*4] @ *err_ptr +#else ldr r5, [sp, #8*4] @ *err_ptr +#endif str r4, [r5] ldmia sp, {r1, r2} @ retrieve dst, len add r2, r2, r1 diff --git a/arch/arm64/kvm/hyp/debug-sr.c b/arch/arm64/kvm/hyp/debug-sr.c index 321c9c05dd9e..f4363d40e2cd 100644 --- a/arch/arm64/kvm/hyp/debug-sr.c +++ b/arch/arm64/kvm/hyp/debug-sr.c @@ -74,6 +74,9 @@ static void __hyp_text __debug_save_spe_nvhe(u64 *pmscr_el1) { u64 reg; + /* Clear pmscr in case of early return */ + *pmscr_el1 = 0; + /* SPE present on this CPU? */ if (!cpuid_feature_extract_unsigned_field(read_sysreg(id_aa64dfr0_el1), ID_AA64DFR0_PMSVER_SHIFT)) diff --git a/arch/parisc/boot/compressed/misc.c b/arch/parisc/boot/compressed/misc.c index 9345b44b86f0..f57118e1f6b4 100644 --- a/arch/parisc/boot/compressed/misc.c +++ b/arch/parisc/boot/compressed/misc.c @@ -123,8 +123,8 @@ int puts(const char *s) while ((nuline = strchr(s, '\n')) != NULL) { if (nuline != s) pdc_iodc_print(s, nuline - s); - pdc_iodc_print("\r\n", 2); - s = nuline + 1; + pdc_iodc_print("\r\n", 2); + s = nuline + 1; } if (*s != '\0') pdc_iodc_print(s, strlen(s)); diff --git a/arch/parisc/include/asm/thread_info.h b/arch/parisc/include/asm/thread_info.h index c980a02a52bc..598c8d60fa5e 100644 --- a/arch/parisc/include/asm/thread_info.h +++ b/arch/parisc/include/asm/thread_info.h @@ -35,7 +35,12 @@ struct thread_info { /* thread information allocation */ +#ifdef CONFIG_IRQSTACKS +#define THREAD_SIZE_ORDER 2 /* PA-RISC requires at least 16k stack */ +#else #define THREAD_SIZE_ORDER 3 /* PA-RISC requires at least 32k stack */ +#endif + /* Be sure to hunt all references to this down when you change the size of * the kernel stack */ #define THREAD_SIZE (PAGE_SIZE << THREAD_SIZE_ORDER) diff --git a/arch/parisc/kernel/entry.S b/arch/parisc/kernel/entry.S index a4fd296c958e..f3cecf5117cf 100644 --- a/arch/parisc/kernel/entry.S +++ b/arch/parisc/kernel/entry.S @@ -878,9 +878,6 @@ ENTRY_CFI(syscall_exit_rfi) STREG %r19,PT_SR7(%r16) intr_return: - /* NOTE: Need to enable interrupts incase we schedule. */ - ssm PSW_SM_I, %r0 - /* check for reschedule */ mfctl %cr30,%r1 LDREG TI_FLAGS(%r1),%r19 /* sched.h: TIF_NEED_RESCHED */ @@ -907,6 +904,11 @@ intr_check_sig: LDREG PT_IASQ1(%r16), %r20 cmpib,COND(=),n 0,%r20,intr_restore /* backward */ + /* NOTE: We need to enable interrupts if we have to deliver + * signals. We used to do this earlier but it caused kernel + * stack overflows. */ + ssm PSW_SM_I, %r0 + copy %r0, %r25 /* long in_syscall = 0 */ #ifdef CONFIG_64BIT ldo -16(%r30),%r29 /* Reference param save area */ @@ -958,6 +960,10 @@ intr_do_resched: cmpib,COND(=) 0, %r20, intr_do_preempt nop + /* NOTE: We need to enable interrupts if we schedule. We used + * to do this earlier but it caused kernel stack overflows. */ + ssm PSW_SM_I, %r0 + #ifdef CONFIG_64BIT ldo -16(%r30),%r29 /* Reference param save area */ #endif diff --git a/arch/parisc/kernel/hpmc.S b/arch/parisc/kernel/hpmc.S index e3a8e5e4d5de..8d072c44f300 100644 --- a/arch/parisc/kernel/hpmc.S +++ b/arch/parisc/kernel/hpmc.S @@ -305,6 +305,7 @@ ENDPROC_CFI(os_hpmc) __INITRODATA + .align 4 .export os_hpmc_size os_hpmc_size: .word .os_hpmc_end-.os_hpmc diff --git a/arch/parisc/kernel/unwind.c b/arch/parisc/kernel/unwind.c index 5a657986ebbf..143f90e2f9f3 100644 --- a/arch/parisc/kernel/unwind.c +++ b/arch/parisc/kernel/unwind.c @@ -15,7 +15,6 @@ #include <linux/slab.h> #include <linux/kallsyms.h> #include <linux/sort.h> -#include <linux/sched.h> #include <linux/uaccess.h> #include <asm/assembly.h> diff --git a/arch/parisc/lib/delay.c b/arch/parisc/lib/delay.c index 7eab4bb8abe6..66e506520505 100644 --- a/arch/parisc/lib/delay.c +++ b/arch/parisc/lib/delay.c @@ -16,9 +16,7 @@ #include <linux/preempt.h> #include <linux/init.h> -#include <asm/processor.h> #include <asm/delay.h> - #include <asm/special_insns.h> /* for mfctl() */ #include <asm/processor.h> /* for boot_cpu_data */ diff --git a/arch/powerpc/include/asm/mmu_context.h b/arch/powerpc/include/asm/mmu_context.h index 6177d43f0ce8..e2a2b8400490 100644 --- a/arch/powerpc/include/asm/mmu_context.h +++ b/arch/powerpc/include/asm/mmu_context.h @@ -160,9 +160,10 @@ static inline void enter_lazy_tlb(struct mm_struct *mm, #endif } -static inline void arch_dup_mmap(struct mm_struct *oldmm, - struct mm_struct *mm) +static inline int arch_dup_mmap(struct mm_struct *oldmm, + struct mm_struct *mm) { + return 0; } #ifndef CONFIG_PPC_BOOK3S_64 diff --git a/arch/powerpc/kernel/process.c b/arch/powerpc/kernel/process.c index 5acb5a176dbe..72be0c32e902 100644 --- a/arch/powerpc/kernel/process.c +++ b/arch/powerpc/kernel/process.c @@ -1403,7 +1403,7 @@ void show_regs(struct pt_regs * regs) printk("NIP: "REG" LR: "REG" CTR: "REG"\n", regs->nip, regs->link, regs->ctr); - printk("REGS: %p TRAP: %04lx %s (%s)\n", + printk("REGS: %px TRAP: %04lx %s (%s)\n", regs, regs->trap, print_tainted(), init_utsname()->release); printk("MSR: "REG" ", regs->msr); print_msr_bits(regs->msr); diff --git a/arch/powerpc/kvm/book3s_xive.c b/arch/powerpc/kvm/book3s_xive.c index bf457843e032..0d750d274c4e 100644 --- a/arch/powerpc/kvm/book3s_xive.c +++ b/arch/powerpc/kvm/book3s_xive.c @@ -725,7 +725,8 @@ u64 kvmppc_xive_get_icp(struct kvm_vcpu *vcpu) /* Return the per-cpu state for state saving/migration */ return (u64)xc->cppr << KVM_REG_PPC_ICP_CPPR_SHIFT | - (u64)xc->mfrr << KVM_REG_PPC_ICP_MFRR_SHIFT; + (u64)xc->mfrr << KVM_REG_PPC_ICP_MFRR_SHIFT | + (u64)0xff << KVM_REG_PPC_ICP_PPRI_SHIFT; } int kvmppc_xive_set_icp(struct kvm_vcpu *vcpu, u64 icpval) @@ -1558,7 +1559,7 @@ static int xive_set_source(struct kvmppc_xive *xive, long irq, u64 addr) /* * Restore P and Q. If the interrupt was pending, we - * force both P and Q, which will trigger a resend. + * force Q and !P, which will trigger a resend. * * That means that a guest that had both an interrupt * pending (queued) and Q set will restore with only @@ -1566,7 +1567,7 @@ static int xive_set_source(struct kvmppc_xive *xive, long irq, u64 addr) * is perfectly fine as coalescing interrupts that haven't * been presented yet is always allowed. */ - if (val & KVM_XICS_PRESENTED || val & KVM_XICS_PENDING) + if (val & KVM_XICS_PRESENTED && !(val & KVM_XICS_PENDING)) state->old_p = true; if (val & KVM_XICS_QUEUED || val & KVM_XICS_PENDING) state->old_q = true; diff --git a/arch/powerpc/net/bpf_jit_comp64.c b/arch/powerpc/net/bpf_jit_comp64.c index 46d74e81aff1..d183b4801bdb 100644 --- a/arch/powerpc/net/bpf_jit_comp64.c +++ b/arch/powerpc/net/bpf_jit_comp64.c @@ -763,7 +763,8 @@ emit_clear: func = (u8 *) __bpf_call_base + imm; /* Save skb pointer if we need to re-cache skb data */ - if (bpf_helper_changes_pkt_data(func)) + if ((ctx->seen & SEEN_SKB) && + bpf_helper_changes_pkt_data(func)) PPC_BPF_STL(3, 1, bpf_jit_stack_local(ctx)); bpf_jit_emit_func_call(image, ctx, (u64)func); @@ -772,7 +773,8 @@ emit_clear: PPC_MR(b2p[BPF_REG_0], 3); /* refresh skb cache */ - if (bpf_helper_changes_pkt_data(func)) { + if ((ctx->seen & SEEN_SKB) && + bpf_helper_changes_pkt_data(func)) { /* reload skb pointer to r3 */ PPC_BPF_LL(3, 1, bpf_jit_stack_local(ctx)); bpf_jit_emit_skb_loads(image, ctx); diff --git a/arch/powerpc/perf/core-book3s.c b/arch/powerpc/perf/core-book3s.c index 153812966365..fce545774d50 100644 --- a/arch/powerpc/perf/core-book3s.c +++ b/arch/powerpc/perf/core-book3s.c @@ -410,8 +410,12 @@ static __u64 power_pmu_bhrb_to(u64 addr) int ret; __u64 target; - if (is_kernel_addr(addr)) - return branch_target((unsigned int *)addr); + if (is_kernel_addr(addr)) { + if (probe_kernel_read(&instr, (void *)addr, sizeof(instr))) + return 0; + + return branch_target(&instr); + } /* Userspace: need copy instruction here then translate it */ pagefault_disable(); diff --git a/arch/powerpc/perf/imc-pmu.c b/arch/powerpc/perf/imc-pmu.c index 0ead3cd73caa..be4e7f84f70a 100644 --- a/arch/powerpc/perf/imc-pmu.c +++ b/arch/powerpc/perf/imc-pmu.c @@ -310,6 +310,19 @@ static int ppc_nest_imc_cpu_offline(unsigned int cpu) return 0; /* + * Check whether nest_imc is registered. We could end up here if the + * cpuhotplug callback registration fails. i.e, callback invokes the + * offline path for all successfully registered nodes. At this stage, + * nest_imc pmu will not be registered and we should return here. + * + * We return with a zero since this is not an offline failure. And + * cpuhp_setup_state() returns the actual failure reason to the caller, + * which in turn will call the cleanup routine. + */ + if (!nest_pmus) + return 0; + + /* * Now that this cpu is one of the designated, * find a next cpu a) which is online and b) in same chip. */ @@ -1171,6 +1184,7 @@ static void imc_common_cpuhp_mem_free(struct imc_pmu *pmu_ptr) if (nest_pmus == 1) { cpuhp_remove_state(CPUHP_AP_PERF_POWERPC_NEST_IMC_ONLINE); kfree(nest_imc_refc); + kfree(per_nest_pmu_arr); } if (nest_pmus > 0) @@ -1195,7 +1209,6 @@ static void imc_common_cpuhp_mem_free(struct imc_pmu *pmu_ptr) kfree(pmu_ptr->attr_groups[IMC_EVENT_ATTR]->attrs); kfree(pmu_ptr->attr_groups[IMC_EVENT_ATTR]); kfree(pmu_ptr); - kfree(per_nest_pmu_arr); return; } @@ -1309,6 +1322,8 @@ int init_imc_pmu(struct device_node *parent, struct imc_pmu *pmu_ptr, int pmu_id ret = nest_pmu_cpumask_init(); if (ret) { mutex_unlock(&nest_init_lock); + kfree(nest_imc_refc); + kfree(per_nest_pmu_arr); goto err_free; } } diff --git a/arch/s390/net/bpf_jit_comp.c b/arch/s390/net/bpf_jit_comp.c index e81c16838b90..9557d8b516df 100644 --- a/arch/s390/net/bpf_jit_comp.c +++ b/arch/s390/net/bpf_jit_comp.c @@ -55,8 +55,7 @@ struct bpf_jit { #define SEEN_LITERAL 8 /* code uses literals */ #define SEEN_FUNC 16 /* calls C functions */ #define SEEN_TAIL_CALL 32 /* code uses tail calls */ -#define SEEN_SKB_CHANGE 64 /* code changes skb data */ -#define SEEN_REG_AX 128 /* code uses constant blinding */ +#define SEEN_REG_AX 64 /* code uses constant blinding */ #define SEEN_STACK (SEEN_FUNC | SEEN_MEM | SEEN_SKB) /* @@ -448,12 +447,12 @@ static void bpf_jit_prologue(struct bpf_jit *jit, u32 stack_depth) EMIT6_DISP_LH(0xe3000000, 0x0024, REG_W1, REG_0, REG_15, 152); } - if (jit->seen & SEEN_SKB) + if (jit->seen & SEEN_SKB) { emit_load_skb_data_hlen(jit); - if (jit->seen & SEEN_SKB_CHANGE) /* stg %b1,ST_OFF_SKBP(%r0,%r15) */ EMIT6_DISP_LH(0xe3000000, 0x0024, BPF_REG_1, REG_0, REG_15, STK_OFF_SKBP); + } } /* @@ -983,8 +982,8 @@ static noinline int bpf_jit_insn(struct bpf_jit *jit, struct bpf_prog *fp, int i EMIT2(0x0d00, REG_14, REG_W1); /* lgr %b0,%r2: load return value into %b0 */ EMIT4(0xb9040000, BPF_REG_0, REG_2); - if (bpf_helper_changes_pkt_data((void *)func)) { - jit->seen |= SEEN_SKB_CHANGE; + if ((jit->seen & SEEN_SKB) && + bpf_helper_changes_pkt_data((void *)func)) { /* lg %b1,ST_OFF_SKBP(%r15) */ EMIT6_DISP_LH(0xe3000000, 0x0004, BPF_REG_1, REG_0, REG_15, STK_OFF_SKBP); diff --git a/arch/sparc/mm/fault_32.c b/arch/sparc/mm/fault_32.c index be3136f142a9..a8103a84b4ac 100644 --- a/arch/sparc/mm/fault_32.c +++ b/arch/sparc/mm/fault_32.c @@ -113,7 +113,7 @@ show_signal_msg(struct pt_regs *regs, int sig, int code, if (!printk_ratelimit()) return; - printk("%s%s[%d]: segfault at %lx ip %p (rpc %p) sp %p error %x", + printk("%s%s[%d]: segfault at %lx ip %px (rpc %px) sp %px error %x", task_pid_nr(tsk) > 1 ? KERN_INFO : KERN_EMERG, tsk->comm, task_pid_nr(tsk), address, (void *)regs->pc, (void *)regs->u_regs[UREG_I7], diff --git a/arch/sparc/mm/fault_64.c b/arch/sparc/mm/fault_64.c index 815c03d7a765..41363f46797b 100644 --- a/arch/sparc/mm/fault_64.c +++ b/arch/sparc/mm/fault_64.c @@ -154,7 +154,7 @@ show_signal_msg(struct pt_regs *regs, int sig, int code, if (!printk_ratelimit()) return; - printk("%s%s[%d]: segfault at %lx ip %p (rpc %p) sp %p error %x", + printk("%s%s[%d]: segfault at %lx ip %px (rpc %px) sp %px error %x", task_pid_nr(tsk) > 1 ? KERN_INFO : KERN_EMERG, tsk->comm, task_pid_nr(tsk), address, (void *)regs->tpc, (void *)regs->u_regs[UREG_I7], diff --git a/arch/sparc/net/bpf_jit_comp_64.c b/arch/sparc/net/bpf_jit_comp_64.c index 5765e7e711f7..ff5f9cb3039a 100644 --- a/arch/sparc/net/bpf_jit_comp_64.c +++ b/arch/sparc/net/bpf_jit_comp_64.c @@ -1245,14 +1245,16 @@ static int build_insn(const struct bpf_insn *insn, struct jit_ctx *ctx) u8 *func = ((u8 *)__bpf_call_base) + imm; ctx->saw_call = true; + if (ctx->saw_ld_abs_ind && bpf_helper_changes_pkt_data(func)) + emit_reg_move(bpf2sparc[BPF_REG_1], L7, ctx); emit_call((u32 *)func, ctx); emit_nop(ctx); emit_reg_move(O0, bpf2sparc[BPF_REG_0], ctx); - if (bpf_helper_changes_pkt_data(func) && ctx->saw_ld_abs_ind) - load_skb_regs(ctx, bpf2sparc[BPF_REG_6]); + if (ctx->saw_ld_abs_ind && bpf_helper_changes_pkt_data(func)) + load_skb_regs(ctx, L7); break; } diff --git a/arch/um/include/asm/mmu_context.h b/arch/um/include/asm/mmu_context.h index b668e351fd6c..fca34b2177e2 100644 --- a/arch/um/include/asm/mmu_context.h +++ b/arch/um/include/asm/mmu_context.h @@ -15,9 +15,10 @@ extern void uml_setup_stubs(struct mm_struct *mm); /* * Needed since we do not use the asm-generic/mm_hooks.h: */ -static inline void arch_dup_mmap(struct mm_struct *oldmm, struct mm_struct *mm) +static inline int arch_dup_mmap(struct mm_struct *oldmm, struct mm_struct *mm) { uml_setup_stubs(mm); + return 0; } extern void arch_exit_mmap(struct mm_struct *mm); static inline void arch_unmap(struct mm_struct *mm, diff --git a/arch/um/kernel/trap.c b/arch/um/kernel/trap.c index 4e6fcb32620f..428644175956 100644 --- a/arch/um/kernel/trap.c +++ b/arch/um/kernel/trap.c @@ -150,7 +150,7 @@ static void show_segv_info(struct uml_pt_regs *regs) if (!printk_ratelimit()) return; - printk("%s%s[%d]: segfault at %lx ip %p sp %p error %x", + printk("%s%s[%d]: segfault at %lx ip %px sp %px error %x", task_pid_nr(tsk) > 1 ? KERN_INFO : KERN_EMERG, tsk->comm, task_pid_nr(tsk), FAULT_ADDRESS(*fi), (void *)UPT_IP(regs), (void *)UPT_SP(regs), diff --git a/arch/unicore32/include/asm/mmu_context.h b/arch/unicore32/include/asm/mmu_context.h index 59b06b48f27d..5c205a9cb5a6 100644 --- a/arch/unicore32/include/asm/mmu_context.h +++ b/arch/unicore32/include/asm/mmu_context.h @@ -81,9 +81,10 @@ do { \ } \ } while (0) -static inline void arch_dup_mmap(struct mm_struct *oldmm, - struct mm_struct *mm) +static inline int arch_dup_mmap(struct mm_struct *oldmm, + struct mm_struct *mm) { + return 0; } static inline void arch_unmap(struct mm_struct *mm, diff --git a/arch/x86/Kconfig b/arch/x86/Kconfig index 8eed3f94bfc7..d4fc98c50378 100644 --- a/arch/x86/Kconfig +++ b/arch/x86/Kconfig @@ -926,7 +926,8 @@ config MAXSMP config NR_CPUS int "Maximum number of CPUs" if SMP && !MAXSMP range 2 8 if SMP && X86_32 && !X86_BIGSMP - range 2 512 if SMP && !MAXSMP && !CPUMASK_OFFSTACK + range 2 64 if SMP && X86_32 && X86_BIGSMP + range 2 512 if SMP && !MAXSMP && !CPUMASK_OFFSTACK && X86_64 range 2 8192 if SMP && !MAXSMP && CPUMASK_OFFSTACK && X86_64 default "1" if !SMP default "8192" if MAXSMP diff --git a/arch/x86/entry/entry_32.S b/arch/x86/entry/entry_32.S index 4838037f97f6..ace8f321a5a1 100644 --- a/arch/x86/entry/entry_32.S +++ b/arch/x86/entry/entry_32.S @@ -941,9 +941,10 @@ ENTRY(debug) movl %esp, %eax # pt_regs pointer /* Are we currently on the SYSENTER stack? */ - PER_CPU(cpu_tss + CPU_TSS_SYSENTER_stack + SIZEOF_SYSENTER_stack, %ecx) - subl %eax, %ecx /* ecx = (end of SYSENTER_stack) - esp */ - cmpl $SIZEOF_SYSENTER_stack, %ecx + movl PER_CPU_VAR(cpu_entry_area), %ecx + addl $CPU_ENTRY_AREA_entry_stack + SIZEOF_entry_stack, %ecx + subl %eax, %ecx /* ecx = (end of entry_stack) - esp */ + cmpl $SIZEOF_entry_stack, %ecx jb .Ldebug_from_sysenter_stack TRACE_IRQS_OFF @@ -984,9 +985,10 @@ ENTRY(nmi) movl %esp, %eax # pt_regs pointer /* Are we currently on the SYSENTER stack? */ - PER_CPU(cpu_tss + CPU_TSS_SYSENTER_stack + SIZEOF_SYSENTER_stack, %ecx) - subl %eax, %ecx /* ecx = (end of SYSENTER_stack) - esp */ - cmpl $SIZEOF_SYSENTER_stack, %ecx + movl PER_CPU_VAR(cpu_entry_area), %ecx + addl $CPU_ENTRY_AREA_entry_stack + SIZEOF_entry_stack, %ecx + subl %eax, %ecx /* ecx = (end of entry_stack) - esp */ + cmpl $SIZEOF_entry_stack, %ecx jb .Lnmi_from_sysenter_stack /* Not on SYSENTER stack. */ diff --git a/arch/x86/entry/entry_64.S b/arch/x86/entry/entry_64.S index f81d50d7ceac..3d19c830e1b1 100644 --- a/arch/x86/entry/entry_64.S +++ b/arch/x86/entry/entry_64.S @@ -140,6 +140,64 @@ END(native_usergs_sysret64) * with them due to bugs in both AMD and Intel CPUs. */ + .pushsection .entry_trampoline, "ax" + +/* + * The code in here gets remapped into cpu_entry_area's trampoline. This means + * that the assembler and linker have the wrong idea as to where this code + * lives (and, in fact, it's mapped more than once, so it's not even at a + * fixed address). So we can't reference any symbols outside the entry + * trampoline and expect it to work. + * + * Instead, we carefully abuse %rip-relative addressing. + * _entry_trampoline(%rip) refers to the start of the remapped) entry + * trampoline. We can thus find cpu_entry_area with this macro: + */ + +#define CPU_ENTRY_AREA \ + _entry_trampoline - CPU_ENTRY_AREA_entry_trampoline(%rip) + +/* The top word of the SYSENTER stack is hot and is usable as scratch space. */ +#define RSP_SCRATCH CPU_ENTRY_AREA_entry_stack + \ + SIZEOF_entry_stack - 8 + CPU_ENTRY_AREA + +ENTRY(entry_SYSCALL_64_trampoline) + UNWIND_HINT_EMPTY + swapgs + + /* Stash the user RSP. */ + movq %rsp, RSP_SCRATCH + + /* Load the top of the task stack into RSP */ + movq CPU_ENTRY_AREA_tss + TSS_sp1 + CPU_ENTRY_AREA, %rsp + + /* Start building the simulated IRET frame. */ + pushq $__USER_DS /* pt_regs->ss */ + pushq RSP_SCRATCH /* pt_regs->sp */ + pushq %r11 /* pt_regs->flags */ + pushq $__USER_CS /* pt_regs->cs */ + pushq %rcx /* pt_regs->ip */ + + /* + * x86 lacks a near absolute jump, and we can't jump to the real + * entry text with a relative jump. We could push the target + * address and then use retq, but this destroys the pipeline on + * many CPUs (wasting over 20 cycles on Sandy Bridge). Instead, + * spill RDI and restore it in a second-stage trampoline. + */ + pushq %rdi + movq $entry_SYSCALL_64_stage2, %rdi + jmp *%rdi +END(entry_SYSCALL_64_trampoline) + + .popsection + +ENTRY(entry_SYSCALL_64_stage2) + UNWIND_HINT_EMPTY + popq %rdi + jmp entry_SYSCALL_64_after_hwframe +END(entry_SYSCALL_64_stage2) + ENTRY(entry_SYSCALL_64) UNWIND_HINT_EMPTY /* @@ -330,8 +388,24 @@ syscall_return_via_sysret: popq %rsi /* skip rcx */ popq %rdx popq %rsi + + /* + * Now all regs are restored except RSP and RDI. + * Save old stack pointer and switch to trampoline stack. + */ + movq %rsp, %rdi + movq PER_CPU_VAR(cpu_tss_rw + TSS_sp0), %rsp + + pushq RSP-RDI(%rdi) /* RSP */ + pushq (%rdi) /* RDI */ + + /* + * We are on the trampoline stack. All regs except RDI are live. + * We can do future final exit work right here. + */ + popq %rdi - movq RSP-ORIG_RAX(%rsp), %rsp + popq %rsp USERGS_SYSRET64 END(entry_SYSCALL_64) @@ -466,12 +540,13 @@ END(irq_entries_start) .macro DEBUG_ENTRY_ASSERT_IRQS_OFF #ifdef CONFIG_DEBUG_ENTRY - pushfq - testl $X86_EFLAGS_IF, (%rsp) + pushq %rax + SAVE_FLAGS(CLBR_RAX) + testl $X86_EFLAGS_IF, %eax jz .Lokay_\@ ud2 .Lokay_\@: - addq $8, %rsp + popq %rax #endif .endm @@ -563,6 +638,13 @@ END(irq_entries_start) /* 0(%rsp): ~(interrupt number) */ .macro interrupt func cld + + testb $3, CS-ORIG_RAX(%rsp) + jz 1f + SWAPGS + call switch_to_thread_stack +1: + ALLOC_PT_GPREGS_ON_STACK SAVE_C_REGS SAVE_EXTRA_REGS @@ -572,12 +654,8 @@ END(irq_entries_start) jz 1f /* - * IRQ from user mode. Switch to kernel gsbase and inform context - * tracking that we're in kernel mode. - */ - SWAPGS - - /* + * IRQ from user mode. + * * We need to tell lockdep that IRQs are off. We can't do this until * we fix gsbase, and we should do it before enter_from_user_mode * (which can take locks). Since TRACE_IRQS_OFF idempotent, @@ -630,10 +708,41 @@ GLOBAL(swapgs_restore_regs_and_return_to_usermode) ud2 1: #endif - SWAPGS POP_EXTRA_REGS - POP_C_REGS - addq $8, %rsp /* skip regs->orig_ax */ + popq %r11 + popq %r10 + popq %r9 + popq %r8 + popq %rax + popq %rcx + popq %rdx + popq %rsi + + /* + * The stack is now user RDI, orig_ax, RIP, CS, EFLAGS, RSP, SS. + * Save old stack pointer and switch to trampoline stack. + */ + movq %rsp, %rdi + movq PER_CPU_VAR(cpu_tss_rw + TSS_sp0), %rsp + + /* Copy the IRET frame to the trampoline stack. */ + pushq 6*8(%rdi) /* SS */ + pushq 5*8(%rdi) /* RSP */ + pushq 4*8(%rdi) /* EFLAGS */ + pushq 3*8(%rdi) /* CS */ + pushq 2*8(%rdi) /* RIP */ + + /* Push user RDI on the trampoline stack. */ + pushq (%rdi) + + /* + * We are on the trampoline stack. All regs except RDI are live. + * We can do future final exit work right here. + */ + + /* Restore RDI. */ + popq %rdi + SWAPGS INTERRUPT_RETURN @@ -829,7 +938,33 @@ apicinterrupt IRQ_WORK_VECTOR irq_work_interrupt smp_irq_work_interrupt /* * Exception entry points. */ -#define CPU_TSS_IST(x) PER_CPU_VAR(cpu_tss) + (TSS_ist + ((x) - 1) * 8) +#define CPU_TSS_IST(x) PER_CPU_VAR(cpu_tss_rw) + (TSS_ist + ((x) - 1) * 8) + +/* + * Switch to the thread stack. This is called with the IRET frame and + * orig_ax on the stack. (That is, RDI..R12 are not on the stack and + * space has not been allocated for them.) + */ +ENTRY(switch_to_thread_stack) + UNWIND_HINT_FUNC + + pushq %rdi + movq %rsp, %rdi + movq PER_CPU_VAR(cpu_current_top_of_stack), %rsp + UNWIND_HINT sp_offset=16 sp_reg=ORC_REG_DI + + pushq 7*8(%rdi) /* regs->ss */ + pushq 6*8(%rdi) /* regs->rsp */ + pushq 5*8(%rdi) /* regs->eflags */ + pushq 4*8(%rdi) /* regs->cs */ + pushq 3*8(%rdi) /* regs->ip */ + pushq 2*8(%rdi) /* regs->orig_ax */ + pushq 8(%rdi) /* return address */ + UNWIND_HINT_FUNC + + movq (%rdi), %rdi + ret +END(switch_to_thread_stack) .macro idtentry sym do_sym has_error_code:req paranoid=0 shift_ist=-1 ENTRY(\sym) @@ -848,11 +983,12 @@ ENTRY(\sym) ALLOC_PT_GPREGS_ON_STACK - .if \paranoid - .if \paranoid == 1 + .if \paranoid < 2 testb $3, CS(%rsp) /* If coming from userspace, switch stacks */ - jnz 1f + jnz .Lfrom_usermode_switch_stack_\@ .endif + + .if \paranoid call paranoid_entry .else call error_entry @@ -894,20 +1030,15 @@ ENTRY(\sym) jmp error_exit .endif - .if \paranoid == 1 + .if \paranoid < 2 /* - * Paranoid entry from userspace. Switch stacks and treat it + * Entry from userspace. Switch stacks and treat it * as a normal entry. This means that paranoid handlers * run in real process context if user_mode(regs). */ -1: +.Lfrom_usermode_switch_stack_\@: call error_entry - - movq %rsp, %rdi /* pt_regs pointer */ - call sync_regs - movq %rax, %rsp /* switch stack */ - movq %rsp, %rdi /* pt_regs pointer */ .if \has_error_code @@ -1170,6 +1301,14 @@ ENTRY(error_entry) SWAPGS .Lerror_entry_from_usermode_after_swapgs: + /* Put us onto the real thread stack. */ + popq %r12 /* save return addr in %12 */ + movq %rsp, %rdi /* arg0 = pt_regs pointer */ + call sync_regs + movq %rax, %rsp /* switch stack */ + ENCODE_FRAME_POINTER + pushq %r12 + /* * We need to tell lockdep that IRQs are off. We can't do this until * we fix gsbase, and we should do it before enter_from_user_mode diff --git a/arch/x86/entry/entry_64_compat.S b/arch/x86/entry/entry_64_compat.S index 568e130d932c..95ad40eb7eff 100644 --- a/arch/x86/entry/entry_64_compat.S +++ b/arch/x86/entry/entry_64_compat.S @@ -48,7 +48,7 @@ */ ENTRY(entry_SYSENTER_compat) /* Interrupts are off on entry. */ - SWAPGS_UNSAFE_STACK + SWAPGS movq PER_CPU_VAR(cpu_current_top_of_stack), %rsp /* @@ -306,8 +306,11 @@ ENTRY(entry_INT80_compat) */ movl %eax, %eax - /* Construct struct pt_regs on stack (iret frame is already on stack) */ pushq %rax /* pt_regs->orig_ax */ + + /* switch to thread stack expects orig_ax to be pushed */ + call switch_to_thread_stack + pushq %rdi /* pt_regs->di */ pushq %rsi /* pt_regs->si */ pushq %rdx /* pt_regs->dx */ diff --git a/arch/x86/entry/vsyscall/vsyscall_64.c b/arch/x86/entry/vsyscall/vsyscall_64.c index f279ba2643dc..1faf40f2dda9 100644 --- a/arch/x86/entry/vsyscall/vsyscall_64.c +++ b/arch/x86/entry/vsyscall/vsyscall_64.c @@ -37,6 +37,7 @@ #include <asm/unistd.h> #include <asm/fixmap.h> #include <asm/traps.h> +#include <asm/paravirt.h> #define CREATE_TRACE_POINTS #include "vsyscall_trace.h" @@ -138,6 +139,10 @@ bool emulate_vsyscall(struct pt_regs *regs, unsigned long address) WARN_ON_ONCE(address != regs->ip); + /* This should be unreachable in NATIVE mode. */ + if (WARN_ON(vsyscall_mode == NATIVE)) + return false; + if (vsyscall_mode == NONE) { warn_bad_vsyscall(KERN_INFO, regs, "vsyscall attempted with vsyscall=none"); @@ -329,16 +334,47 @@ int in_gate_area_no_mm(unsigned long addr) return vsyscall_mode != NONE && (addr & PAGE_MASK) == VSYSCALL_ADDR; } +/* + * The VSYSCALL page is the only user-accessible page in the kernel address + * range. Normally, the kernel page tables can have _PAGE_USER clear, but + * the tables covering VSYSCALL_ADDR need _PAGE_USER set if vsyscalls + * are enabled. + * + * Some day we may create a "minimal" vsyscall mode in which we emulate + * vsyscalls but leave the page not present. If so, we skip calling + * this. + */ +static void __init set_vsyscall_pgtable_user_bits(void) +{ + pgd_t *pgd; + p4d_t *p4d; + pud_t *pud; + pmd_t *pmd; + + pgd = pgd_offset_k(VSYSCALL_ADDR); + set_pgd(pgd, __pgd(pgd_val(*pgd) | _PAGE_USER)); + p4d = p4d_offset(pgd, VSYSCALL_ADDR); +#if CONFIG_PGTABLE_LEVELS >= 5 + p4d->p4d |= _PAGE_USER; +#endif + pud = pud_offset(p4d, VSYSCALL_ADDR); + set_pud(pud, __pud(pud_val(*pud) | _PAGE_USER)); + pmd = pmd_offset(pud, VSYSCALL_ADDR); + set_pmd(pmd, __pmd(pmd_val(*pmd) | _PAGE_USER)); +} + void __init map_vsyscall(void) { extern char __vsyscall_page; unsigned long physaddr_vsyscall = __pa_symbol(&__vsyscall_page); - if (vsyscall_mode != NONE) + if (vsyscall_mode != NONE) { __set_fixmap(VSYSCALL_PAGE, physaddr_vsyscall, vsyscall_mode == NATIVE ? PAGE_KERNEL_VSYSCALL : PAGE_KERNEL_VVAR); + set_vsyscall_pgtable_user_bits(); + } BUILD_BUG_ON((unsigned long)__fix_to_virt(VSYSCALL_PAGE) != (unsigned long)VSYSCALL_ADDR); diff --git a/arch/x86/include/asm/cpu_entry_area.h b/arch/x86/include/asm/cpu_entry_area.h new file mode 100644 index 000000000000..2fbc69a0916e --- /dev/null +++ b/arch/x86/include/asm/cpu_entry_area.h @@ -0,0 +1,68 @@ +// SPDX-License-Identifier: GPL-2.0 + +#ifndef _ASM_X86_CPU_ENTRY_AREA_H +#define _ASM_X86_CPU_ENTRY_AREA_H + +#include <linux/percpu-defs.h> +#include <asm/processor.h> + +/* + * cpu_entry_area is a percpu region that contains things needed by the CPU + * and early entry/exit code. Real types aren't used for all fields here + * to avoid circular header dependencies. + * + * Every field is a virtual alias of some other allocated backing store. + * There is no direct allocation of a struct cpu_entry_area. + */ +struct cpu_entry_area { + char gdt[PAGE_SIZE]; + + /* + * The GDT is just below entry_stack and thus serves (on x86_64) as + * a a read-only guard page. + */ + struct entry_stack_page entry_stack_page; + + /* + * On x86_64, the TSS is mapped RO. On x86_32, it's mapped RW because + * we need task switches to work, and task switches write to the TSS. + */ + struct tss_struct tss; + + char entry_trampoline[PAGE_SIZE]; + +#ifdef CONFIG_X86_64 + /* + * Exception stacks used for IST entries. + * + * In the future, this should have a separate slot for each stack + * with guard pages between them. + */ + char exception_stacks[(N_EXCEPTION_STACKS - 1) * EXCEPTION_STKSZ + DEBUG_STKSZ]; +#endif +}; + +#define CPU_ENTRY_AREA_SIZE (sizeof(struct cpu_entry_area)) +#define CPU_ENTRY_AREA_TOT_SIZE (CPU_ENTRY_AREA_SIZE * NR_CPUS) + +DECLARE_PER_CPU(struct cpu_entry_area *, cpu_entry_area); + +extern void setup_cpu_entry_areas(void); +extern void cea_set_pte(void *cea_vaddr, phys_addr_t pa, pgprot_t flags); + +#define CPU_ENTRY_AREA_RO_IDT CPU_ENTRY_AREA_BASE +#define CPU_ENTRY_AREA_PER_CPU (CPU_ENTRY_AREA_RO_IDT + PAGE_SIZE) + +#define CPU_ENTRY_AREA_RO_IDT_VADDR ((void *)CPU_ENTRY_AREA_RO_IDT) + +#define CPU_ENTRY_AREA_MAP_SIZE \ + (CPU_ENTRY_AREA_PER_CPU + CPU_ENTRY_AREA_TOT_SIZE - CPU_ENTRY_AREA_BASE) + +extern struct cpu_entry_area *get_cpu_entry_area(int cpu); + +static inline struct entry_stack *cpu_entry_stack(int cpu) +{ + return &get_cpu_entry_area(cpu)->entry_stack_page.stack; +} + +#endif diff --git a/arch/x86/include/asm/cpufeature.h b/arch/x86/include/asm/cpufeature.h index bf6a76202a77..ea9a7dde62e5 100644 --- a/arch/x86/include/asm/cpufeature.h +++ b/arch/x86/include/asm/cpufeature.h @@ -135,6 +135,8 @@ extern void clear_cpu_cap(struct cpuinfo_x86 *c, unsigned int bit); set_bit(bit, (unsigned long *)cpu_caps_set); \ } while (0) +#define setup_force_cpu_bug(bit) setup_force_cpu_cap(bit) + #if defined(CC_HAVE_ASM_GOTO) && defined(CONFIG_X86_FAST_FEATURE_TESTS) /* * Static testing of CPU features. Used the same as boot_cpu_has(). diff --git a/arch/x86/include/asm/desc.h b/arch/x86/include/asm/desc.h index 4011cb03ef08..ec8be07c0cda 100644 --- a/arch/x86/include/asm/desc.h +++ b/arch/x86/include/asm/desc.h @@ -7,6 +7,7 @@ #include <asm/mmu.h> #include <asm/fixmap.h> #include <asm/irq_vectors.h> +#include <asm/cpu_entry_area.h> #include <linux/smp.h> #include <linux/percpu.h> @@ -60,17 +61,10 @@ static inline struct desc_struct *get_current_gdt_rw(void) return this_cpu_ptr(&gdt_page)->gdt; } -/* Get the fixmap index for a specific processor */ -static inline unsigned int get_cpu_gdt_ro_index(int cpu) -{ - return FIX_GDT_REMAP_BEGIN + cpu; -} - /* Provide the fixmap address of the remapped GDT */ static inline struct desc_struct *get_cpu_gdt_ro(int cpu) { - unsigned int idx = get_cpu_gdt_ro_index(cpu); - return (struct desc_struct *)__fix_to_virt(idx); + return (struct desc_struct *)&get_cpu_entry_area(cpu)->gdt; } /* Provide the current read-only GDT */ @@ -185,7 +179,7 @@ static inline void set_tssldt_descriptor(void *d, unsigned long addr, #endif } -static inline void __set_tss_desc(unsigned cpu, unsigned int entry, void *addr) +static inline void __set_tss_desc(unsigned cpu, unsigned int entry, struct x86_hw_tss *addr) { struct desc_struct *d = get_cpu_gdt_rw(cpu); tss_desc tss; diff --git a/arch/x86/include/asm/espfix.h b/arch/x86/include/asm/espfix.h index 0211029076ea..6777480d8a42 100644 --- a/arch/x86/include/asm/espfix.h +++ b/arch/x86/include/asm/espfix.h @@ -2,7 +2,7 @@ #ifndef _ASM_X86_ESPFIX_H #define _ASM_X86_ESPFIX_H -#ifdef CONFIG_X86_64 +#ifdef CONFIG_X86_ESPFIX64 #include <asm/percpu.h> @@ -11,7 +11,8 @@ DECLARE_PER_CPU_READ_MOSTLY(unsigned long, espfix_waddr); extern void init_espfix_bsp(void); extern void init_espfix_ap(int cpu); - -#endif /* CONFIG_X86_64 */ +#else +static inline void init_espfix_ap(int cpu) { } +#endif #endif /* _ASM_X86_ESPFIX_H */ diff --git a/arch/x86/include/asm/fixmap.h b/arch/x86/include/asm/fixmap.h index b0c505fe9a95..64c4a30e0d39 100644 --- a/arch/x86/include/asm/fixmap.h +++ b/arch/x86/include/asm/fixmap.h @@ -44,7 +44,6 @@ extern unsigned long __FIXADDR_TOP; PAGE_SIZE) #endif - /* * Here we define all the compile-time 'special' virtual * addresses. The point is to have a constant address at @@ -84,7 +83,6 @@ enum fixed_addresses { FIX_IO_APIC_BASE_0, FIX_IO_APIC_BASE_END = FIX_IO_APIC_BASE_0 + MAX_IO_APICS - 1, #endif - FIX_RO_IDT, /* Virtual mapping for read-only IDT */ #ifdef CONFIG_X86_32 FIX_KMAP_BEGIN, /* reserved pte's for temporary kernel mappings */ FIX_KMAP_END = FIX_KMAP_BEGIN+(KM_TYPE_NR*NR_CPUS)-1, @@ -100,9 +98,6 @@ enum fixed_addresses { #ifdef CONFIG_X86_INTEL_MID FIX_LNW_VRTC, #endif - /* Fixmap entries to remap the GDTs, one per processor. */ - FIX_GDT_REMAP_BEGIN, - FIX_GDT_REMAP_END = FIX_GDT_REMAP_BEGIN + NR_CPUS - 1, #ifdef CONFIG_ACPI_APEI_GHES /* Used for GHES mapping from assorted contexts */ @@ -143,7 +138,7 @@ enum fixed_addresses { extern void reserve_top_address(unsigned long reserve); #define FIXADDR_SIZE (__end_of_permanent_fixed_addresses << PAGE_SHIFT) -#define FIXADDR_START (FIXADDR_TOP - FIXADDR_SIZE) +#define FIXADDR_START (FIXADDR_TOP - FIXADDR_SIZE) extern int fixmaps_set; diff --git a/arch/x86/include/asm/hypervisor.h b/arch/x86/include/asm/hypervisor.h index 1b0a5abcd8ae..96aa6b9884dc 100644 --- a/arch/x86/include/asm/hypervisor.h +++ b/arch/x86/include/asm/hypervisor.h @@ -20,16 +20,7 @@ #ifndef _ASM_X86_HYPERVISOR_H #define _ASM_X86_HYPERVISOR_H -#ifdef CONFIG_HYPERVISOR_GUEST - -#include <asm/kvm_para.h> -#include <asm/x86_init.h> -#include <asm/xen/hypervisor.h> - -/* - * x86 hypervisor information - */ - +/* x86 hypervisor types */ enum x86_hypervisor_type { X86_HYPER_NATIVE = 0, X86_HYPER_VMWARE, @@ -39,6 +30,12 @@ enum x86_hypervisor_type { X86_HYPER_KVM, }; +#ifdef CONFIG_HYPERVISOR_GUEST + +#include <asm/kvm_para.h> +#include <asm/x86_init.h> +#include <asm/xen/hypervisor.h> + struct hypervisor_x86 { /* Hypervisor name */ const char *name; @@ -58,7 +55,15 @@ struct hypervisor_x86 { extern enum x86_hypervisor_type x86_hyper_type; extern void init_hypervisor_platform(void); +static inline bool hypervisor_is_type(enum x86_hypervisor_type type) +{ + return x86_hyper_type == type; +} #else static inline void init_hypervisor_platform(void) { } +static inline bool hypervisor_is_type(enum x86_hypervisor_type type) +{ + return type == X86_HYPER_NATIVE; +} #endif /* CONFIG_HYPERVISOR_GUEST */ #endif /* _ASM_X86_HYPERVISOR_H */ diff --git a/arch/x86/include/asm/invpcid.h b/arch/x86/include/asm/invpcid.h new file mode 100644 index 000000000000..989cfa86de85 --- /dev/null +++ b/arch/x86/include/asm/invpcid.h @@ -0,0 +1,53 @@ +/* SPDX-License-Identifier: GPL-2.0 */ +#ifndef _ASM_X86_INVPCID +#define _ASM_X86_INVPCID + +static inline void __invpcid(unsigned long pcid, unsigned long addr, + unsigned long type) +{ + struct { u64 d[2]; } desc = { { pcid, addr } }; + + /* + * The memory clobber is because the whole point is to invalidate + * stale TLB entries and, especially if we're flushing global + * mappings, we don't want the compiler to reorder any subsequent + * memory accesses before the TLB flush. + * + * The hex opcode is invpcid (%ecx), %eax in 32-bit mode and + * invpcid (%rcx), %rax in long mode. + */ + asm volatile (".byte 0x66, 0x0f, 0x38, 0x82, 0x01" + : : "m" (desc), "a" (type), "c" (&desc) : "memory"); +} + +#define INVPCID_TYPE_INDIV_ADDR 0 +#define INVPCID_TYPE_SINGLE_CTXT 1 +#define INVPCID_TYPE_ALL_INCL_GLOBAL 2 +#define INVPCID_TYPE_ALL_NON_GLOBAL 3 + +/* Flush all mappings for a given pcid and addr, not including globals. */ +static inline void invpcid_flush_one(unsigned long pcid, + unsigned long addr) +{ + __invpcid(pcid, addr, INVPCID_TYPE_INDIV_ADDR); +} + +/* Flush all mappings for a given PCID, not including globals. */ +static inline void invpcid_flush_single_context(unsigned long pcid) +{ + __invpcid(pcid, 0, INVPCID_TYPE_SINGLE_CTXT); +} + +/* Flush all mappings, including globals, for all PCIDs. */ +static inline void invpcid_flush_all(void) +{ + __invpcid(0, 0, INVPCID_TYPE_ALL_INCL_GLOBAL); +} + +/* Flush all mappings for all PCIDs except globals. */ +static inline void invpcid_flush_all_nonglobals(void) +{ + __invpcid(0, 0, INVPCID_TYPE_ALL_NON_GLOBAL); +} + +#endif /* _ASM_X86_INVPCID */ diff --git a/arch/x86/include/asm/irqflags.h b/arch/x86/include/asm/irqflags.h index c8ef23f2c28f..89f08955fff7 100644 --- a/arch/x86/include/asm/irqflags.h +++ b/arch/x86/include/asm/irqflags.h @@ -142,6 +142,9 @@ static inline notrace unsigned long arch_local_irq_save(void) swapgs; \ sysretl +#ifdef CONFIG_DEBUG_ENTRY +#define SAVE_FLAGS(x) pushfq; popq %rax +#endif #else #define INTERRUPT_RETURN iret #define ENABLE_INTERRUPTS_SYSEXIT sti; sysexit diff --git a/arch/x86/include/asm/kdebug.h b/arch/x86/include/asm/kdebug.h index f86a8caa561e..395c9631e000 100644 --- a/arch/x86/include/asm/kdebug.h +++ b/arch/x86/include/asm/kdebug.h @@ -26,6 +26,7 @@ extern void die(const char *, struct pt_regs *,long); extern int __must_check __die(const char *, struct pt_regs *, long); extern void show_stack_regs(struct pt_regs *regs); extern void __show_regs(struct pt_regs *regs, int all); +extern void show_iret_regs(struct pt_regs *regs); extern unsigned long oops_begin(void); extern void oops_end(unsigned long, struct pt_regs *, int signr); diff --git a/arch/x86/include/asm/mmu.h b/arch/x86/include/asm/mmu.h index 9ea26f167497..5ff3e8af2c20 100644 --- a/arch/x86/include/asm/mmu.h +++ b/arch/x86/include/asm/mmu.h @@ -3,6 +3,7 @@ #define _ASM_X86_MMU_H #include <linux/spinlock.h> +#include <linux/rwsem.h> #include <linux/mutex.h> #include <linux/atomic.h> @@ -27,7 +28,8 @@ typedef struct { atomic64_t tlb_gen; #ifdef CONFIG_MODIFY_LDT_SYSCALL - struct ldt_struct *ldt; + struct rw_semaphore ldt_usr_sem; + struct ldt_struct *ldt; #endif #ifdef CONFIG_X86_64 diff --git a/arch/x86/include/asm/mmu_context.h b/arch/x86/include/asm/mmu_context.h index 6d16d15d09a0..5ede7cae1d67 100644 --- a/arch/x86/include/asm/mmu_context.h +++ b/arch/x86/include/asm/mmu_context.h @@ -57,11 +57,17 @@ struct ldt_struct { /* * Used for LDT copy/destruction. */ -int init_new_context_ldt(struct task_struct *tsk, struct mm_struct *mm); +static inline void init_new_context_ldt(struct mm_struct *mm) +{ + mm->context.ldt = NULL; + init_rwsem(&mm->context.ldt_usr_sem); +} +int ldt_dup_context(struct mm_struct *oldmm, struct mm_struct *mm); void destroy_context_ldt(struct mm_struct *mm); #else /* CONFIG_MODIFY_LDT_SYSCALL */ -static inline int init_new_context_ldt(struct task_struct *tsk, - struct mm_struct *mm) +static inline void init_new_context_ldt(struct mm_struct *mm) { } +static inline int ldt_dup_context(struct mm_struct *oldmm, + struct mm_struct *mm) { return 0; } @@ -132,18 +138,21 @@ void enter_lazy_tlb(struct mm_struct *mm, struct task_struct *tsk); static inline int init_new_context(struct task_struct *tsk, struct mm_struct *mm) { + mutex_init(&mm->context.lock); + mm->context.ctx_id = atomic64_inc_return(&last_mm_ctx_id); atomic64_set(&mm->context.tlb_gen, 0); - #ifdef CONFIG_X86_INTEL_MEMORY_PROTECTION_KEYS +#ifdef CONFIG_X86_INTEL_MEMORY_PROTECTION_KEYS if (cpu_feature_enabled(X86_FEATURE_OSPKE)) { /* pkey 0 is the default and always allocated */ mm->context.pkey_allocation_map = 0x1; /* -1 means unallocated or invalid */ mm->context.execute_only_pkey = -1; } - #endif - return init_new_context_ldt(tsk, mm); +#endif + init_new_context_ldt(mm); + return 0; } static inline void destroy_context(struct mm_struct *mm) { @@ -176,10 +185,10 @@ do { \ } while (0) #endif -static inline void arch_dup_mmap(struct mm_struct *oldmm, - struct mm_struct *mm) +static inline int arch_dup_mmap(struct mm_struct *oldmm, struct mm_struct *mm) { paravirt_arch_dup_mmap(oldmm, mm); + return ldt_dup_context(oldmm, mm); } static inline void arch_exit_mmap(struct mm_struct *mm) @@ -282,33 +291,6 @@ static inline bool arch_vma_access_permitted(struct vm_area_struct *vma, } /* - * If PCID is on, ASID-aware code paths put the ASID+1 into the PCID - * bits. This serves two purposes. It prevents a nasty situation in - * which PCID-unaware code saves CR3, loads some other value (with PCID - * == 0), and then restores CR3, thus corrupting the TLB for ASID 0 if - * the saved ASID was nonzero. It also means that any bugs involving - * loading a PCID-enabled CR3 with CR4.PCIDE off will trigger - * deterministically. - */ - -static inline unsigned long build_cr3(struct mm_struct *mm, u16 asid) -{ - if (static_cpu_has(X86_FEATURE_PCID)) { - VM_WARN_ON_ONCE(asid > 4094); - return __sme_pa(mm->pgd) | (asid + 1); - } else { - VM_WARN_ON_ONCE(asid != 0); - return __sme_pa(mm->pgd); - } -} - -static inline unsigned long build_cr3_noflush(struct mm_struct *mm, u16 asid) -{ - VM_WARN_ON_ONCE(asid > 4094); - return __sme_pa(mm->pgd) | (asid + 1) | CR3_NOFLUSH; -} - -/* * This can be used from process context to figure out what the value of * CR3 is without needing to do a (slow) __read_cr3(). * @@ -317,7 +299,7 @@ static inline unsigned long build_cr3_noflush(struct mm_struct *mm, u16 asid) */ static inline unsigned long __get_current_cr3_fast(void) { - unsigned long cr3 = build_cr3(this_cpu_read(cpu_tlbstate.loaded_mm), + unsigned long cr3 = build_cr3(this_cpu_read(cpu_tlbstate.loaded_mm)->pgd, this_cpu_read(cpu_tlbstate.loaded_mm_asid)); /* For now, be very restrictive about when this can be called. */ diff --git a/arch/x86/include/asm/paravirt.h b/arch/x86/include/asm/paravirt.h index 283efcaac8af..892df375b615 100644 --- a/arch/x86/include/asm/paravirt.h +++ b/arch/x86/include/asm/paravirt.h @@ -927,6 +927,15 @@ extern void default_banner(void); PARA_SITE(PARA_PATCH(pv_cpu_ops, PV_CPU_usergs_sysret64), \ CLBR_NONE, \ jmp PARA_INDIRECT(pv_cpu_ops+PV_CPU_usergs_sysret64)) + +#ifdef CONFIG_DEBUG_ENTRY +#define SAVE_FLAGS(clobbers) \ + PARA_SITE(PARA_PATCH(pv_irq_ops, PV_IRQ_save_fl), clobbers, \ + PV_SAVE_REGS(clobbers | CLBR_CALLEE_SAVE); \ + call PARA_INDIRECT(pv_irq_ops+PV_IRQ_save_fl); \ + PV_RESTORE_REGS(clobbers | CLBR_CALLEE_SAVE);) +#endif + #endif /* CONFIG_X86_32 */ #endif /* __ASSEMBLY__ */ diff --git a/arch/x86/include/asm/pgtable_32_types.h b/arch/x86/include/asm/pgtable_32_types.h index f2ca9b28fd68..ce245b0cdfca 100644 --- a/arch/x86/include/asm/pgtable_32_types.h +++ b/arch/x86/include/asm/pgtable_32_types.h @@ -38,13 +38,22 @@ extern bool __vmalloc_start_set; /* set once high_memory is set */ #define LAST_PKMAP 1024 #endif -#define PKMAP_BASE ((FIXADDR_START - PAGE_SIZE * (LAST_PKMAP + 1)) \ - & PMD_MASK) +/* + * Define this here and validate with BUILD_BUG_ON() in pgtable_32.c + * to avoid include recursion hell + */ +#define CPU_ENTRY_AREA_PAGES (NR_CPUS * 40) + +#define CPU_ENTRY_AREA_BASE \ + ((FIXADDR_START - PAGE_SIZE * (CPU_ENTRY_AREA_PAGES + 1)) & PMD_MASK) + +#define PKMAP_BASE \ + ((CPU_ENTRY_AREA_BASE - PAGE_SIZE) & PMD_MASK) #ifdef CONFIG_HIGHMEM # define VMALLOC_END (PKMAP_BASE - 2 * PAGE_SIZE) #else -# define VMALLOC_END (FIXADDR_START - 2 * PAGE_SIZE) +# define VMALLOC_END (CPU_ENTRY_AREA_BASE - 2 * PAGE_SIZE) #endif #define MODULES_VADDR VMALLOC_START diff --git a/arch/x86/include/asm/pgtable_64_types.h b/arch/x86/include/asm/pgtable_64_types.h index 6d5f45dcd4a1..3d27831bc58d 100644 --- a/arch/x86/include/asm/pgtable_64_types.h +++ b/arch/x86/include/asm/pgtable_64_types.h @@ -76,32 +76,41 @@ typedef struct { pteval_t pte; } pte_t; #define PGDIR_MASK (~(PGDIR_SIZE - 1)) /* See Documentation/x86/x86_64/mm.txt for a description of the memory map. */ -#define MAXMEM _AC(__AC(1, UL) << MAX_PHYSMEM_BITS, UL) +#define MAXMEM _AC(__AC(1, UL) << MAX_PHYSMEM_BITS, UL) + #ifdef CONFIG_X86_5LEVEL -#define VMALLOC_SIZE_TB _AC(16384, UL) -#define __VMALLOC_BASE _AC(0xff92000000000000, UL) -#define __VMEMMAP_BASE _AC(0xffd4000000000000, UL) +# define VMALLOC_SIZE_TB _AC(16384, UL) +# define __VMALLOC_BASE _AC(0xff92000000000000, UL) +# define __VMEMMAP_BASE _AC(0xffd4000000000000, UL) #else -#define VMALLOC_SIZE_TB _AC(32, UL) -#define __VMALLOC_BASE _AC(0xffffc90000000000, UL) -#define __VMEMMAP_BASE _AC(0xffffea0000000000, UL) +# define VMALLOC_SIZE_TB _AC(32, UL) +# define __VMALLOC_BASE _AC(0xffffc90000000000, UL) +# define __VMEMMAP_BASE _AC(0xffffea0000000000, UL) #endif + #ifdef CONFIG_RANDOMIZE_MEMORY -#define VMALLOC_START vmalloc_base -#define VMEMMAP_START vmemmap_base +# define VMALLOC_START vmalloc_base +# define VMEMMAP_START vmemmap_base #else -#define VMALLOC_START __VMALLOC_BASE -#define VMEMMAP_START __VMEMMAP_BASE +# define VMALLOC_START __VMALLOC_BASE +# define VMEMMAP_START __VMEMMAP_BASE #endif /* CONFIG_RANDOMIZE_MEMORY */ -#define VMALLOC_END (VMALLOC_START + _AC((VMALLOC_SIZE_TB << 40) - 1, UL)) -#define MODULES_VADDR (__START_KERNEL_map + KERNEL_IMAGE_SIZE) + +#define VMALLOC_END (VMALLOC_START + _AC((VMALLOC_SIZE_TB << 40) - 1, UL)) + +#define MODULES_VADDR (__START_KERNEL_map + KERNEL_IMAGE_SIZE) /* The module sections ends with the start of the fixmap */ -#define MODULES_END __fix_to_virt(__end_of_fixed_addresses + 1) -#define MODULES_LEN (MODULES_END - MODULES_VADDR) -#define ESPFIX_PGD_ENTRY _AC(-2, UL) -#define ESPFIX_BASE_ADDR (ESPFIX_PGD_ENTRY << P4D_SHIFT) -#define EFI_VA_START ( -4 * (_AC(1, UL) << 30)) -#define EFI_VA_END (-68 * (_AC(1, UL) << 30)) +#define MODULES_END __fix_to_virt(__end_of_fixed_addresses + 1) +#define MODULES_LEN (MODULES_END - MODULES_VADDR) + +#define ESPFIX_PGD_ENTRY _AC(-2, UL) +#define ESPFIX_BASE_ADDR (ESPFIX_PGD_ENTRY << P4D_SHIFT) + +#define CPU_ENTRY_AREA_PGD _AC(-3, UL) +#define CPU_ENTRY_AREA_BASE (CPU_ENTRY_AREA_PGD << P4D_SHIFT) + +#define EFI_VA_START ( -4 * (_AC(1, UL) << 30)) +#define EFI_VA_END (-68 * (_AC(1, UL) << 30)) #define EARLY_DYNAMIC_PAGE_TABLES 64 diff --git a/arch/x86/include/asm/processor.h b/arch/x86/include/asm/processor.h index cc16fa882e3e..cad8dab266bc 100644 --- a/arch/x86/include/asm/processor.h +++ b/arch/x86/include/asm/processor.h @@ -163,9 +163,9 @@ enum cpuid_regs_idx { extern struct cpuinfo_x86 boot_cpu_data; extern struct cpuinfo_x86 new_cpu_data; -extern struct tss_struct doublefault_tss; -extern __u32 cpu_caps_cleared[NCAPINTS]; -extern __u32 cpu_caps_set[NCAPINTS]; +extern struct x86_hw_tss doublefault_tss; +extern __u32 cpu_caps_cleared[NCAPINTS + NBUGINTS]; +extern __u32 cpu_caps_set[NCAPINTS + NBUGINTS]; #ifdef CONFIG_SMP DECLARE_PER_CPU_READ_MOSTLY(struct cpuinfo_x86, cpu_info); @@ -253,6 +253,11 @@ static inline void load_cr3(pgd_t *pgdir) write_cr3(__sme_pa(pgdir)); } +/* + * Note that while the legacy 'TSS' name comes from 'Task State Segment', + * on modern x86 CPUs the TSS also holds information important to 64-bit mode, + * unrelated to the task-switch mechanism: + */ #ifdef CONFIG_X86_32 /* This is the TSS defined by the hardware. */ struct x86_hw_tss { @@ -305,7 +310,13 @@ struct x86_hw_tss { struct x86_hw_tss { u32 reserved1; u64 sp0; + + /* + * We store cpu_current_top_of_stack in sp1 so it's always accessible. + * Linux does not use ring 1, so sp1 is not otherwise needed. + */ u64 sp1; + u64 sp2; u64 reserved2; u64 ist[7]; @@ -323,12 +334,22 @@ struct x86_hw_tss { #define IO_BITMAP_BITS 65536 #define IO_BITMAP_BYTES (IO_BITMAP_BITS/8) #define IO_BITMAP_LONGS (IO_BITMAP_BYTES/sizeof(long)) -#define IO_BITMAP_OFFSET offsetof(struct tss_struct, io_bitmap) +#define IO_BITMAP_OFFSET (offsetof(struct tss_struct, io_bitmap) - offsetof(struct tss_struct, x86_tss)) #define INVALID_IO_BITMAP_OFFSET 0x8000 +struct entry_stack { + unsigned long words[64]; +}; + +struct entry_stack_page { + struct entry_stack stack; +} __aligned(PAGE_SIZE); + struct tss_struct { /* - * The hardware state: + * The fixed hardware portion. This must not cross a page boundary + * at risk of violating the SDM's advice and potentially triggering + * errata. */ struct x86_hw_tss x86_tss; @@ -339,18 +360,9 @@ struct tss_struct { * be within the limit. */ unsigned long io_bitmap[IO_BITMAP_LONGS + 1]; +} __aligned(PAGE_SIZE); -#ifdef CONFIG_X86_32 - /* - * Space for the temporary SYSENTER stack. - */ - unsigned long SYSENTER_stack_canary; - unsigned long SYSENTER_stack[64]; -#endif - -} ____cacheline_aligned; - -DECLARE_PER_CPU_SHARED_ALIGNED(struct tss_struct, cpu_tss); +DECLARE_PER_CPU_PAGE_ALIGNED(struct tss_struct, cpu_tss_rw); /* * sizeof(unsigned long) coming from an extra "long" at the end @@ -364,6 +376,9 @@ DECLARE_PER_CPU_SHARED_ALIGNED(struct tss_struct, cpu_tss); #ifdef CONFIG_X86_32 DECLARE_PER_CPU(unsigned long, cpu_current_top_of_stack); +#else +/* The RO copy can't be accessed with this_cpu_xyz(), so use the RW copy. */ +#define cpu_current_top_of_stack cpu_tss_rw.x86_tss.sp1 #endif /* @@ -523,7 +538,7 @@ static inline void native_set_iopl_mask(unsigned mask) static inline void native_load_sp0(unsigned long sp0) { - this_cpu_write(cpu_tss.x86_tss.sp0, sp0); + this_cpu_write(cpu_tss_rw.x86_tss.sp0, sp0); } static inline void native_swapgs(void) @@ -535,12 +550,12 @@ static inline void native_swapgs(void) static inline unsigned long current_top_of_stack(void) { -#ifdef CONFIG_X86_64 - return this_cpu_read_stable(cpu_tss.x86_tss.sp0); -#else - /* sp0 on x86_32 is special in and around vm86 mode. */ + /* + * We can't read directly from tss.sp0: sp0 on x86_32 is special in + * and around vm86 mode and sp0 on x86_64 is special because of the + * entry trampoline. + */ return this_cpu_read_stable(cpu_current_top_of_stack); -#endif } static inline bool on_thread_stack(void) diff --git a/arch/x86/include/asm/stacktrace.h b/arch/x86/include/asm/stacktrace.h index 8da111b3c342..f73706878772 100644 --- a/arch/x86/include/asm/stacktrace.h +++ b/arch/x86/include/asm/stacktrace.h @@ -16,6 +16,7 @@ enum stack_type { STACK_TYPE_TASK, STACK_TYPE_IRQ, STACK_TYPE_SOFTIRQ, + STACK_TYPE_ENTRY, STACK_TYPE_EXCEPTION, STACK_TYPE_EXCEPTION_LAST = STACK_TYPE_EXCEPTION + N_EXCEPTION_STACKS-1, }; @@ -28,6 +29,8 @@ struct stack_info { bool in_task_stack(unsigned long *stack, struct task_struct *task, struct stack_info *info); +bool in_entry_stack(unsigned long *stack, struct stack_info *info); + int get_stack_info(unsigned long *stack, struct task_struct *task, struct stack_info *info, unsigned long *visit_mask); diff --git a/arch/x86/include/asm/switch_to.h b/arch/x86/include/asm/switch_to.h index 8c6bd6863db9..9b6df68d8fd1 100644 --- a/arch/x86/include/asm/switch_to.h +++ b/arch/x86/include/asm/switch_to.h @@ -79,10 +79,10 @@ do { \ static inline void refresh_sysenter_cs(struct thread_struct *thread) { /* Only happens when SEP is enabled, no need to test "SEP"arately: */ - if (unlikely(this_cpu_read(cpu_tss.x86_tss.ss1) == thread->sysenter_cs)) + if (unlikely(this_cpu_read(cpu_tss_rw.x86_tss.ss1) == thread->sysenter_cs)) return; - this_cpu_write(cpu_tss.x86_tss.ss1, thread->sysenter_cs); + this_cpu_write(cpu_tss_rw.x86_tss.ss1, thread->sysenter_cs); wrmsr(MSR_IA32_SYSENTER_CS, thread->sysenter_cs, 0); } #endif @@ -90,10 +90,12 @@ static inline void refresh_sysenter_cs(struct thread_struct *thread) /* This is used when switching tasks or entering/exiting vm86 mode. */ static inline void update_sp0(struct task_struct *task) { + /* On x86_64, sp0 always points to the entry trampoline stack, which is constant: */ #ifdef CONFIG_X86_32 load_sp0(task->thread.sp0); #else - load_sp0(task_top_of_stack(task)); + if (static_cpu_has(X86_FEATURE_XENPV)) + load_sp0(task_top_of_stack(task)); #endif } diff --git a/arch/x86/include/asm/thread_info.h b/arch/x86/include/asm/thread_info.h index 70f425947dc5..00223333821a 100644 --- a/arch/x86/include/asm/thread_info.h +++ b/arch/x86/include/asm/thread_info.h @@ -207,7 +207,7 @@ static inline int arch_within_stack_frames(const void * const stack, #else /* !__ASSEMBLY__ */ #ifdef CONFIG_X86_64 -# define cpu_current_top_of_stack (cpu_tss + TSS_sp0) +# define cpu_current_top_of_stack (cpu_tss_rw + TSS_sp1) #endif #endif diff --git a/arch/x86/include/asm/tlbflush.h b/arch/x86/include/asm/tlbflush.h index 877b5c1a1b12..e1884cf35257 100644 --- a/arch/x86/include/asm/tlbflush.h +++ b/arch/x86/include/asm/tlbflush.h @@ -9,70 +9,66 @@ #include <asm/cpufeature.h> #include <asm/special_insns.h> #include <asm/smp.h> +#include <asm/invpcid.h> -static inline void __invpcid(unsigned long pcid, unsigned long addr, - unsigned long type) +static inline u64 inc_mm_tlb_gen(struct mm_struct *mm) { - struct { u64 d[2]; } desc = { { pcid, addr } }; - /* - * The memory clobber is because the whole point is to invalidate - * stale TLB entries and, especially if we're flushing global - * mappings, we don't want the compiler to reorder any subsequent - * memory accesses before the TLB flush. - * - * The hex opcode is invpcid (%ecx), %eax in 32-bit mode and - * invpcid (%rcx), %rax in long mode. + * Bump the generation count. This also serves as a full barrier + * that synchronizes with switch_mm(): callers are required to order + * their read of mm_cpumask after their writes to the paging + * structures. */ - asm volatile (".byte 0x66, 0x0f, 0x38, 0x82, 0x01" - : : "m" (desc), "a" (type), "c" (&desc) : "memory"); + return atomic64_inc_return(&mm->context.tlb_gen); } -#define INVPCID_TYPE_INDIV_ADDR 0 -#define INVPCID_TYPE_SINGLE_CTXT 1 -#define INVPCID_TYPE_ALL_INCL_GLOBAL 2 -#define INVPCID_TYPE_ALL_NON_GLOBAL 3 +/* There are 12 bits of space for ASIDS in CR3 */ +#define CR3_HW_ASID_BITS 12 +/* + * When enabled, PAGE_TABLE_ISOLATION consumes a single bit for + * user/kernel switches + */ +#define PTI_CONSUMED_ASID_BITS 0 -/* Flush all mappings for a given pcid and addr, not including globals. */ -static inline void invpcid_flush_one(unsigned long pcid, - unsigned long addr) -{ - __invpcid(pcid, addr, INVPCID_TYPE_INDIV_ADDR); -} +#define CR3_AVAIL_ASID_BITS (CR3_HW_ASID_BITS - PTI_CONSUMED_ASID_BITS) +/* + * ASIDs are zero-based: 0->MAX_AVAIL_ASID are valid. -1 below to account + * for them being zero-based. Another -1 is because ASID 0 is reserved for + * use by non-PCID-aware users. + */ +#define MAX_ASID_AVAILABLE ((1 << CR3_AVAIL_ASID_BITS) - 2) -/* Flush all mappings for a given PCID, not including globals. */ -static inline void invpcid_flush_single_context(unsigned long pcid) +static inline u16 kern_pcid(u16 asid) { - __invpcid(pcid, 0, INVPCID_TYPE_SINGLE_CTXT); + VM_WARN_ON_ONCE(asid > MAX_ASID_AVAILABLE); + /* + * If PCID is on, ASID-aware code paths put the ASID+1 into the + * PCID bits. This serves two purposes. It prevents a nasty + * situation in which PCID-unaware code saves CR3, loads some other + * value (with PCID == 0), and then restores CR3, thus corrupting + * the TLB for ASID 0 if the saved ASID was nonzero. It also means + * that any bugs involving loading a PCID-enabled CR3 with + * CR4.PCIDE off will trigger deterministically. + */ + return asid + 1; } -/* Flush all mappings, including globals, for all PCIDs. */ -static inline void invpcid_flush_all(void) +struct pgd_t; +static inline unsigned long build_cr3(pgd_t *pgd, u16 asid) { - __invpcid(0, 0, INVPCID_TYPE_ALL_INCL_GLOBAL); + if (static_cpu_has(X86_FEATURE_PCID)) { + return __sme_pa(pgd) | kern_pcid(asid); + } else { + VM_WARN_ON_ONCE(asid != 0); + return __sme_pa(pgd); + } } -/* Flush all mappings for all PCIDs except globals. */ -static inline void invpcid_flush_all_nonglobals(void) +static inline unsigned long build_cr3_noflush(pgd_t *pgd, u16 asid) { - __invpcid(0, 0, INVPCID_TYPE_ALL_NON_GLOBAL); -} - -static inline u64 inc_mm_tlb_gen(struct mm_struct *mm) -{ - u64 new_tlb_gen; - - /* - * Bump the generation count. This also serves as a full barrier - * that synchronizes with switch_mm(): callers are required to order - * their read of mm_cpumask after their writes to the paging - * structures. - */ - smp_mb__before_atomic(); - new_tlb_gen = atomic64_inc_return(&mm->context.tlb_gen); - smp_mb__after_atomic(); - - return new_tlb_gen; + VM_WARN_ON_ONCE(asid > MAX_ASID_AVAILABLE); + VM_WARN_ON_ONCE(!this_cpu_has(X86_FEATURE_PCID)); + return __sme_pa(pgd) | kern_pcid(asid) | CR3_NOFLUSH; } #ifdef CONFIG_PARAVIRT @@ -237,6 +233,9 @@ static inline void cr4_set_bits_and_update_boot(unsigned long mask) extern void initialize_tlbstate_and_flush(void); +/* + * flush the entire current user mapping + */ static inline void __native_flush_tlb(void) { /* @@ -249,20 +248,12 @@ static inline void __native_flush_tlb(void) preempt_enable(); } -static inline void __native_flush_tlb_global_irq_disabled(void) -{ - unsigned long cr4; - - cr4 = this_cpu_read(cpu_tlbstate.cr4); - /* clear PGE */ - native_write_cr4(cr4 & ~X86_CR4_PGE); - /* write old PGE again and flush TLBs */ - native_write_cr4(cr4); -} - +/* + * flush everything + */ static inline void __native_flush_tlb_global(void) { - unsigned long flags; + unsigned long cr4, flags; if (static_cpu_has(X86_FEATURE_INVPCID)) { /* @@ -280,22 +271,36 @@ static inline void __native_flush_tlb_global(void) */ raw_local_irq_save(flags); - __native_flush_tlb_global_irq_disabled(); + cr4 = this_cpu_read(cpu_tlbstate.cr4); + /* toggle PGE */ + native_write_cr4(cr4 ^ X86_CR4_PGE); + /* write old PGE again and flush TLBs */ + native_write_cr4(cr4); raw_local_irq_restore(flags); } +/* + * flush one page in the user mapping + */ static inline void __native_flush_tlb_single(unsigned long addr) { asm volatile("invlpg (%0)" ::"r" (addr) : "memory"); } +/* + * flush everything + */ static inline void __flush_tlb_all(void) { - if (boot_cpu_has(X86_FEATURE_PGE)) + if (boot_cpu_has(X86_FEATURE_PGE)) { __flush_tlb_global(); - else + } else { + /* + * !PGE -> !PCID (setup_pcid()), thus every flush is total. + */ __flush_tlb(); + } /* * Note: if we somehow had PCID but not PGE, then this wouldn't work -- @@ -306,6 +311,9 @@ static inline void __flush_tlb_all(void) */ } +/* + * flush one page in the kernel mapping + */ static inline void __flush_tlb_one(unsigned long addr) { count_vm_tlb_event(NR_TLB_LOCAL_FLUSH_ONE); diff --git a/arch/x86/include/asm/traps.h b/arch/x86/include/asm/traps.h index 1fadd310ff68..31051f35cbb7 100644 --- a/arch/x86/include/asm/traps.h +++ b/arch/x86/include/asm/traps.h @@ -75,7 +75,6 @@ dotraplinkage void do_segment_not_present(struct pt_regs *, long); dotraplinkage void do_stack_segment(struct pt_regs *, long); #ifdef CONFIG_X86_64 dotraplinkage void do_double_fault(struct pt_regs *, long); -asmlinkage struct pt_regs *sync_regs(struct pt_regs *); #endif dotraplinkage void do_general_protection(struct pt_regs *, long); dotraplinkage void do_page_fault(struct pt_regs *, unsigned long); diff --git a/arch/x86/include/asm/unwind.h b/arch/x86/include/asm/unwind.h index e9cc6fe1fc6f..c1688c2d0a12 100644 --- a/arch/x86/include/asm/unwind.h +++ b/arch/x86/include/asm/unwind.h @@ -7,6 +7,9 @@ #include <asm/ptrace.h> #include <asm/stacktrace.h> +#define IRET_FRAME_OFFSET (offsetof(struct pt_regs, ip)) +#define IRET_FRAME_SIZE (sizeof(struct pt_regs) - IRET_FRAME_OFFSET) + struct unwind_state { struct stack_info stack_info; unsigned long stack_mask; @@ -52,6 +55,10 @@ void unwind_start(struct unwind_state *state, struct task_struct *task, } #if defined(CONFIG_UNWINDER_ORC) || defined(CONFIG_UNWINDER_FRAME_POINTER) +/* + * WARNING: The entire pt_regs may not be safe to dereference. In some cases, + * only the iret frame registers are accessible. Use with caution! + */ static inline struct pt_regs *unwind_get_entry_regs(struct unwind_state *state) { if (unwind_done(state)) diff --git a/arch/x86/kernel/asm-offsets.c b/arch/x86/kernel/asm-offsets.c index 8ea78275480d..676b7cf4b62b 100644 --- a/arch/x86/kernel/asm-offsets.c +++ b/arch/x86/kernel/asm-offsets.c @@ -93,4 +93,10 @@ void common(void) { BLANK(); DEFINE(PTREGS_SIZE, sizeof(struct pt_regs)); + + /* Layout info for cpu_entry_area */ + OFFSET(CPU_ENTRY_AREA_tss, cpu_entry_area, tss); + OFFSET(CPU_ENTRY_AREA_entry_trampoline, cpu_entry_area, entry_trampoline); + OFFSET(CPU_ENTRY_AREA_entry_stack, cpu_entry_area, entry_stack_page); + DEFINE(SIZEOF_entry_stack, sizeof(struct entry_stack)); } diff --git a/arch/x86/kernel/asm-offsets_32.c b/arch/x86/kernel/asm-offsets_32.c index dedf428b20b6..fa1261eefa16 100644 --- a/arch/x86/kernel/asm-offsets_32.c +++ b/arch/x86/kernel/asm-offsets_32.c @@ -47,13 +47,8 @@ void foo(void) BLANK(); /* Offset from the sysenter stack to tss.sp0 */ - DEFINE(TSS_sysenter_sp0, offsetof(struct tss_struct, x86_tss.sp0) - - offsetofend(struct tss_struct, SYSENTER_stack)); - - /* Offset from cpu_tss to SYSENTER_stack */ - OFFSET(CPU_TSS_SYSENTER_stack, tss_struct, SYSENTER_stack); - /* Size of SYSENTER_stack */ - DEFINE(SIZEOF_SYSENTER_stack, sizeof(((struct tss_struct *)0)->SYSENTER_stack)); + DEFINE(TSS_sysenter_sp0, offsetof(struct cpu_entry_area, tss.x86_tss.sp0) - + offsetofend(struct cpu_entry_area, entry_stack_page.stack)); #ifdef CONFIG_CC_STACKPROTECTOR BLANK(); diff --git a/arch/x86/kernel/asm-offsets_64.c b/arch/x86/kernel/asm-offsets_64.c index 630212fa9b9d..bf51e51d808d 100644 --- a/arch/x86/kernel/asm-offsets_64.c +++ b/arch/x86/kernel/asm-offsets_64.c @@ -23,6 +23,9 @@ int main(void) #ifdef CONFIG_PARAVIRT OFFSET(PV_CPU_usergs_sysret64, pv_cpu_ops, usergs_sysret64); OFFSET(PV_CPU_swapgs, pv_cpu_ops, swapgs); +#ifdef CONFIG_DEBUG_ENTRY + OFFSET(PV_IRQ_save_fl, pv_irq_ops, save_fl); +#endif BLANK(); #endif @@ -63,6 +66,7 @@ int main(void) OFFSET(TSS_ist, tss_struct, x86_tss.ist); OFFSET(TSS_sp0, tss_struct, x86_tss.sp0); + OFFSET(TSS_sp1, tss_struct, x86_tss.sp1); BLANK(); #ifdef CONFIG_CC_STACKPROTECTOR diff --git a/arch/x86/kernel/cpu/common.c b/arch/x86/kernel/cpu/common.c index fa998ca8aa5a..c9757f07d738 100644 --- a/arch/x86/kernel/cpu/common.c +++ b/arch/x86/kernel/cpu/common.c @@ -476,8 +476,8 @@ static const char *table_lookup_model(struct cpuinfo_x86 *c) return NULL; /* Not found */ } -__u32 cpu_caps_cleared[NCAPINTS]; -__u32 cpu_caps_set[NCAPINTS]; +__u32 cpu_caps_cleared[NCAPINTS + NBUGINTS]; +__u32 cpu_caps_set[NCAPINTS + NBUGINTS]; void load_percpu_segment(int cpu) { @@ -490,28 +490,23 @@ void load_percpu_segment(int cpu) load_stack_canary_segment(); } -/* Setup the fixmap mapping only once per-processor */ -static inline void setup_fixmap_gdt(int cpu) -{ -#ifdef CONFIG_X86_64 - /* On 64-bit systems, we use a read-only fixmap GDT. */ - pgprot_t prot = PAGE_KERNEL_RO; -#else - /* - * On native 32-bit systems, the GDT cannot be read-only because - * our double fault handler uses a task gate, and entering through - * a task gate needs to change an available TSS to busy. If the GDT - * is read-only, that will triple fault. - * - * On Xen PV, the GDT must be read-only because the hypervisor requires - * it. - */ - pgprot_t prot = boot_cpu_has(X86_FEATURE_XENPV) ? - PAGE_KERNEL_RO : PAGE_KERNEL; +#ifdef CONFIG_X86_32 +/* The 32-bit entry code needs to find cpu_entry_area. */ +DEFINE_PER_CPU(struct cpu_entry_area *, cpu_entry_area); #endif - __set_fixmap(get_cpu_gdt_ro_index(cpu), get_cpu_gdt_paddr(cpu), prot); -} +#ifdef CONFIG_X86_64 +/* + * Special IST stacks which the CPU switches to when it calls + * an IST-marked descriptor entry. Up to 7 stacks (hardware + * limit), all of them are 4K, except the debug stack which + * is 8K. + */ +static const unsigned int exception_stack_sizes[N_EXCEPTION_STACKS] = { + [0 ... N_EXCEPTION_STACKS - 1] = EXCEPTION_STKSZ, + [DEBUG_STACK - 1] = DEBUG_STKSZ +}; +#endif /* Load the original GDT from the per-cpu structure */ void load_direct_gdt(int cpu) @@ -747,7 +742,7 @@ static void apply_forced_caps(struct cpuinfo_x86 *c) { int i; - for (i = 0; i < NCAPINTS; i++) { + for (i = 0; i < NCAPINTS + NBUGINTS; i++) { c->x86_capability[i] &= ~cpu_caps_cleared[i]; c->x86_capability[i] |= cpu_caps_set[i]; } @@ -1250,7 +1245,7 @@ void enable_sep_cpu(void) return; cpu = get_cpu(); - tss = &per_cpu(cpu_tss, cpu); + tss = &per_cpu(cpu_tss_rw, cpu); /* * We cache MSR_IA32_SYSENTER_CS's value in the TSS's ss1 field -- @@ -1259,11 +1254,7 @@ void enable_sep_cpu(void) tss->x86_tss.ss1 = __KERNEL_CS; wrmsr(MSR_IA32_SYSENTER_CS, tss->x86_tss.ss1, 0); - - wrmsr(MSR_IA32_SYSENTER_ESP, - (unsigned long)tss + offsetofend(struct tss_struct, SYSENTER_stack), - 0); - + wrmsr(MSR_IA32_SYSENTER_ESP, (unsigned long)(cpu_entry_stack(cpu) + 1), 0); wrmsr(MSR_IA32_SYSENTER_EIP, (unsigned long)entry_SYSENTER_32, 0); put_cpu(); @@ -1357,25 +1348,19 @@ DEFINE_PER_CPU(unsigned int, irq_count) __visible = -1; DEFINE_PER_CPU(int, __preempt_count) = INIT_PREEMPT_COUNT; EXPORT_PER_CPU_SYMBOL(__preempt_count); -/* - * Special IST stacks which the CPU switches to when it calls - * an IST-marked descriptor entry. Up to 7 stacks (hardware - * limit), all of them are 4K, except the debug stack which - * is 8K. - */ -static const unsigned int exception_stack_sizes[N_EXCEPTION_STACKS] = { - [0 ... N_EXCEPTION_STACKS - 1] = EXCEPTION_STKSZ, - [DEBUG_STACK - 1] = DEBUG_STKSZ -}; - -static DEFINE_PER_CPU_PAGE_ALIGNED(char, exception_stacks - [(N_EXCEPTION_STACKS - 1) * EXCEPTION_STKSZ + DEBUG_STKSZ]); - /* May not be marked __init: used by software suspend */ void syscall_init(void) { + extern char _entry_trampoline[]; + extern char entry_SYSCALL_64_trampoline[]; + + int cpu = smp_processor_id(); + unsigned long SYSCALL64_entry_trampoline = + (unsigned long)get_cpu_entry_area(cpu)->entry_trampoline + + (entry_SYSCALL_64_trampoline - _entry_trampoline); + wrmsr(MSR_STAR, 0, (__USER32_CS << 16) | __KERNEL_CS); - wrmsrl(MSR_LSTAR, (unsigned long)entry_SYSCALL_64); + wrmsrl(MSR_LSTAR, SYSCALL64_entry_trampoline); #ifdef CONFIG_IA32_EMULATION wrmsrl(MSR_CSTAR, (unsigned long)entry_SYSCALL_compat); @@ -1386,7 +1371,7 @@ void syscall_init(void) * AMD doesn't allow SYSENTER in long mode (either 32- or 64-bit). */ wrmsrl_safe(MSR_IA32_SYSENTER_CS, (u64)__KERNEL_CS); - wrmsrl_safe(MSR_IA32_SYSENTER_ESP, 0ULL); + wrmsrl_safe(MSR_IA32_SYSENTER_ESP, (unsigned long)(cpu_entry_stack(cpu) + 1)); wrmsrl_safe(MSR_IA32_SYSENTER_EIP, (u64)entry_SYSENTER_compat); #else wrmsrl(MSR_CSTAR, (unsigned long)ignore_sysret); @@ -1530,7 +1515,7 @@ void cpu_init(void) if (cpu) load_ucode_ap(); - t = &per_cpu(cpu_tss, cpu); + t = &per_cpu(cpu_tss_rw, cpu); oist = &per_cpu(orig_ist, cpu); #ifdef CONFIG_NUMA @@ -1569,7 +1554,7 @@ void cpu_init(void) * set up and load the per-CPU TSS */ if (!oist->ist[0]) { - char *estacks = per_cpu(exception_stacks, cpu); + char *estacks = get_cpu_entry_area(cpu)->exception_stacks; for (v = 0; v < N_EXCEPTION_STACKS; v++) { estacks += exception_stack_sizes[v]; @@ -1580,7 +1565,7 @@ void cpu_init(void) } } - t->x86_tss.io_bitmap_base = offsetof(struct tss_struct, io_bitmap); + t->x86_tss.io_bitmap_base = IO_BITMAP_OFFSET; /* * <= is required because the CPU will access up to @@ -1596,11 +1581,12 @@ void cpu_init(void) enter_lazy_tlb(&init_mm, me); /* - * Initialize the TSS. Don't bother initializing sp0, as the initial - * task never enters user mode. + * Initialize the TSS. sp0 points to the entry trampoline stack + * regardless of what task is running. */ - set_tss_desc(cpu, t); + set_tss_desc(cpu, &get_cpu_entry_area(cpu)->tss.x86_tss); load_TR_desc(); + load_sp0((unsigned long)(cpu_entry_stack(cpu) + 1)); load_mm_ldt(&init_mm); @@ -1612,7 +1598,6 @@ void cpu_init(void) if (is_uv_system()) uv_cpu_init(); - setup_fixmap_gdt(cpu); load_fixmap_gdt(cpu); } @@ -1622,7 +1607,7 @@ void cpu_init(void) { int cpu = smp_processor_id(); struct task_struct *curr = current; - struct tss_struct *t = &per_cpu(cpu_tss, cpu); + struct tss_struct *t = &per_cpu(cpu_tss_rw, cpu); wait_for_master_cpu(cpu); @@ -1657,12 +1642,12 @@ void cpu_init(void) * Initialize the TSS. Don't bother initializing sp0, as the initial * task never enters user mode. */ - set_tss_desc(cpu, t); + set_tss_desc(cpu, &get_cpu_entry_area(cpu)->tss.x86_tss); load_TR_desc(); load_mm_ldt(&init_mm); - t->x86_tss.io_bitmap_base = offsetof(struct tss_struct, io_bitmap); + t->x86_tss.io_bitmap_base = IO_BITMAP_OFFSET; #ifdef CONFIG_DOUBLEFAULT /* Set up doublefault TSS pointer in the GDT */ @@ -1674,7 +1659,6 @@ void cpu_init(void) fpu__init_cpu(); - setup_fixmap_gdt(cpu); load_fixmap_gdt(cpu); } #endif diff --git a/arch/x86/kernel/cpu/microcode/intel.c b/arch/x86/kernel/cpu/microcode/intel.c index 7dbcb7adf797..8ccdca6d3f9e 100644 --- a/arch/x86/kernel/cpu/microcode/intel.c +++ b/arch/x86/kernel/cpu/microcode/intel.c @@ -565,15 +565,6 @@ static void print_ucode(struct ucode_cpu_info *uci) } #else -/* - * Flush global tlb. We only do this in x86_64 where paging has been enabled - * already and PGE should be enabled as well. - */ -static inline void flush_tlb_early(void) -{ - __native_flush_tlb_global_irq_disabled(); -} - static inline void print_ucode(struct ucode_cpu_info *uci) { struct microcode_intel *mc; @@ -602,10 +593,6 @@ static int apply_microcode_early(struct ucode_cpu_info *uci, bool early) if (rev != mc->hdr.rev) return -1; -#ifdef CONFIG_X86_64 - /* Flush global tlb. This is precaution. */ - flush_tlb_early(); -#endif uci->cpu_sig.rev = rev; if (early) diff --git a/arch/x86/kernel/doublefault.c b/arch/x86/kernel/doublefault.c index 0e662c55ae90..0b8cedb20d6d 100644 --- a/arch/x86/kernel/doublefault.c +++ b/arch/x86/kernel/doublefault.c @@ -50,25 +50,23 @@ static void doublefault_fn(void) cpu_relax(); } -struct tss_struct doublefault_tss __cacheline_aligned = { - .x86_tss = { - .sp0 = STACK_START, - .ss0 = __KERNEL_DS, - .ldt = 0, - .io_bitmap_base = INVALID_IO_BITMAP_OFFSET, - - .ip = (unsigned long) doublefault_fn, - /* 0x2 bit is always set */ - .flags = X86_EFLAGS_SF | 0x2, - .sp = STACK_START, - .es = __USER_DS, - .cs = __KERNEL_CS, - .ss = __KERNEL_DS, - .ds = __USER_DS, - .fs = __KERNEL_PERCPU, - - .__cr3 = __pa_nodebug(swapper_pg_dir), - } +struct x86_hw_tss doublefault_tss __cacheline_aligned = { + .sp0 = STACK_START, + .ss0 = __KERNEL_DS, + .ldt = 0, + .io_bitmap_base = INVALID_IO_BITMAP_OFFSET, + + .ip = (unsigned long) doublefault_fn, + /* 0x2 bit is always set */ + .flags = X86_EFLAGS_SF | 0x2, + .sp = STACK_START, + .es = __USER_DS, + .cs = __KERNEL_CS, + .ss = __KERNEL_DS, + .ds = __USER_DS, + .fs = __KERNEL_PERCPU, + + .__cr3 = __pa_nodebug(swapper_pg_dir), }; /* dummy for do_double_fault() call */ diff --git a/arch/x86/kernel/dumpstack.c b/arch/x86/kernel/dumpstack.c index f13b4c00a5de..36b17e0febe8 100644 --- a/arch/x86/kernel/dumpstack.c +++ b/arch/x86/kernel/dumpstack.c @@ -18,6 +18,7 @@ #include <linux/nmi.h> #include <linux/sysfs.h> +#include <asm/cpu_entry_area.h> #include <asm/stacktrace.h> #include <asm/unwind.h> @@ -43,6 +44,24 @@ bool in_task_stack(unsigned long *stack, struct task_struct *task, return true; } +bool in_entry_stack(unsigned long *stack, struct stack_info *info) +{ + struct entry_stack *ss = cpu_entry_stack(smp_processor_id()); + + void *begin = ss; + void *end = ss + 1; + + if ((void *)stack < begin || (void *)stack >= end) + return false; + + info->type = STACK_TYPE_ENTRY; + info->begin = begin; + info->end = end; + info->next_sp = NULL; + + return true; +} + static void printk_stack_address(unsigned long address, int reliable, char *log_lvl) { @@ -50,6 +69,28 @@ static void printk_stack_address(unsigned long address, int reliable, printk("%s %s%pB\n", log_lvl, reliable ? "" : "? ", (void *)address); } +void show_iret_regs(struct pt_regs *regs) +{ + printk(KERN_DEFAULT "RIP: %04x:%pS\n", (int)regs->cs, (void *)regs->ip); + printk(KERN_DEFAULT "RSP: %04x:%016lx EFLAGS: %08lx", (int)regs->ss, + regs->sp, regs->flags); +} + +static void show_regs_safe(struct stack_info *info, struct pt_regs *regs) +{ + if (on_stack(info, regs, sizeof(*regs))) + __show_regs(regs, 0); + else if (on_stack(info, (void *)regs + IRET_FRAME_OFFSET, + IRET_FRAME_SIZE)) { + /* + * When an interrupt or exception occurs in entry code, the + * full pt_regs might not have been saved yet. In that case + * just print the iret frame. + */ + show_iret_regs(regs); + } +} + void show_trace_log_lvl(struct task_struct *task, struct pt_regs *regs, unsigned long *stack, char *log_lvl) { @@ -71,31 +112,35 @@ void show_trace_log_lvl(struct task_struct *task, struct pt_regs *regs, * - task stack * - interrupt stack * - HW exception stacks (double fault, nmi, debug, mce) + * - entry stack * - * x86-32 can have up to three stacks: + * x86-32 can have up to four stacks: * - task stack * - softirq stack * - hardirq stack + * - entry stack */ for (regs = NULL; stack; stack = PTR_ALIGN(stack_info.next_sp, sizeof(long))) { const char *stack_name; - /* - * If we overflowed the task stack into a guard page, jump back - * to the bottom of the usable stack. - */ - if (task_stack_page(task) - (void *)stack < PAGE_SIZE) - stack = task_stack_page(task); - - if (get_stack_info(stack, task, &stack_info, &visit_mask)) - break; + if (get_stack_info(stack, task, &stack_info, &visit_mask)) { + /* + * We weren't on a valid stack. It's possible that + * we overflowed a valid stack into a guard page. + * See if the next page up is valid so that we can + * generate some kind of backtrace if this happens. + */ + stack = (unsigned long *)PAGE_ALIGN((unsigned long)stack); + if (get_stack_info(stack, task, &stack_info, &visit_mask)) + break; + } stack_name = stack_type_name(stack_info.type); if (stack_name) printk("%s <%s>\n", log_lvl, stack_name); - if (regs && on_stack(&stack_info, regs, sizeof(*regs))) - __show_regs(regs, 0); + if (regs) + show_regs_safe(&stack_info, regs); /* * Scan the stack, printing any text addresses we find. At the @@ -119,7 +164,7 @@ void show_trace_log_lvl(struct task_struct *task, struct pt_regs *regs, /* * Don't print regs->ip again if it was already printed - * by __show_regs() below. + * by show_regs_safe() below. */ if (regs && stack == ®s->ip) goto next; @@ -155,8 +200,8 @@ next: /* if the frame has entry regs, print them */ regs = unwind_get_entry_regs(&state); - if (regs && on_stack(&stack_info, regs, sizeof(*regs))) - __show_regs(regs, 0); + if (regs) + show_regs_safe(&stack_info, regs); } if (stack_name) diff --git a/arch/x86/kernel/dumpstack_32.c b/arch/x86/kernel/dumpstack_32.c index daefae83a3aa..04170f63e3a1 100644 --- a/arch/x86/kernel/dumpstack_32.c +++ b/arch/x86/kernel/dumpstack_32.c @@ -26,6 +26,9 @@ const char *stack_type_name(enum stack_type type) if (type == STACK_TYPE_SOFTIRQ) return "SOFTIRQ"; + if (type == STACK_TYPE_ENTRY) + return "ENTRY_TRAMPOLINE"; + return NULL; } @@ -93,6 +96,9 @@ int get_stack_info(unsigned long *stack, struct task_struct *task, if (task != current) goto unknown; + if (in_entry_stack(stack, info)) + goto recursion_check; + if (in_hardirq_stack(stack, info)) goto recursion_check; diff --git a/arch/x86/kernel/dumpstack_64.c b/arch/x86/kernel/dumpstack_64.c index 88ce2ffdb110..563e28d14f2c 100644 --- a/arch/x86/kernel/dumpstack_64.c +++ b/arch/x86/kernel/dumpstack_64.c @@ -37,6 +37,15 @@ const char *stack_type_name(enum stack_type type) if (type == STACK_TYPE_IRQ) return "IRQ"; + if (type == STACK_TYPE_ENTRY) { + /* + * On 64-bit, we have a generic entry stack that we + * use for all the kernel entry points, including + * SYSENTER. + */ + return "ENTRY_TRAMPOLINE"; + } + if (type >= STACK_TYPE_EXCEPTION && type <= STACK_TYPE_EXCEPTION_LAST) return exception_stack_names[type - STACK_TYPE_EXCEPTION]; @@ -115,6 +124,9 @@ int get_stack_info(unsigned long *stack, struct task_struct *task, if (in_irq_stack(stack, info)) goto recursion_check; + if (in_entry_stack(stack, info)) + goto recursion_check; + goto unknown; recursion_check: diff --git a/arch/x86/kernel/ioport.c b/arch/x86/kernel/ioport.c index 3feb648781c4..2f723301eb58 100644 --- a/arch/x86/kernel/ioport.c +++ b/arch/x86/kernel/ioport.c @@ -67,7 +67,7 @@ asmlinkage long sys_ioperm(unsigned long from, unsigned long num, int turn_on) * because the ->io_bitmap_max value must match the bitmap * contents: */ - tss = &per_cpu(cpu_tss, get_cpu()); + tss = &per_cpu(cpu_tss_rw, get_cpu()); if (turn_on) bitmap_clear(t->io_bitmap_ptr, from, num); diff --git a/arch/x86/kernel/irq.c b/arch/x86/kernel/irq.c index 49cfd9fe7589..68e1867cca80 100644 --- a/arch/x86/kernel/irq.c +++ b/arch/x86/kernel/irq.c @@ -219,18 +219,6 @@ __visible unsigned int __irq_entry do_IRQ(struct pt_regs *regs) /* high bit used in ret_from_ code */ unsigned vector = ~regs->orig_ax; - /* - * NB: Unlike exception entries, IRQ entries do not reliably - * handle context tracking in the low-level entry code. This is - * because syscall entries execute briefly with IRQs on before - * updating context tracking state, so we can take an IRQ from - * kernel mode with CONTEXT_USER. The low-level entry code only - * updates the context if we came from user mode, so we won't - * switch to CONTEXT_KERNEL. We'll fix that once the syscall - * code is cleaned up enough that we can cleanly defer enabling - * IRQs. - */ - entering_irq(); /* entering_irq() tells RCU that we're not quiescent. Check it. */ diff --git a/arch/x86/kernel/irq_64.c b/arch/x86/kernel/irq_64.c index 020efbf5786b..d86e344f5b3d 100644 --- a/arch/x86/kernel/irq_64.c +++ b/arch/x86/kernel/irq_64.c @@ -57,10 +57,10 @@ static inline void stack_overflow_check(struct pt_regs *regs) if (regs->sp >= estack_top && regs->sp <= estack_bottom) return; - WARN_ONCE(1, "do_IRQ(): %s has overflown the kernel stack (cur:%Lx,sp:%lx,irq stk top-bottom:%Lx-%Lx,exception stk top-bottom:%Lx-%Lx)\n", + WARN_ONCE(1, "do_IRQ(): %s has overflown the kernel stack (cur:%Lx,sp:%lx,irq stk top-bottom:%Lx-%Lx,exception stk top-bottom:%Lx-%Lx,ip:%pF)\n", current->comm, curbase, regs->sp, irq_stack_top, irq_stack_bottom, - estack_top, estack_bottom); + estack_top, estack_bottom, (void *)regs->ip); if (sysctl_panic_on_stackoverflow) panic("low stack detected by irq handler - check messages\n"); diff --git a/arch/x86/kernel/ldt.c b/arch/x86/kernel/ldt.c index 1c1eae961340..a6b5d62f45a7 100644 --- a/arch/x86/kernel/ldt.c +++ b/arch/x86/kernel/ldt.c @@ -5,6 +5,11 @@ * Copyright (C) 2002 Andi Kleen * * This handles calls from both 32bit and 64bit mode. + * + * Lock order: + * contex.ldt_usr_sem + * mmap_sem + * context.lock */ #include <linux/errno.h> @@ -42,7 +47,7 @@ static void refresh_ldt_segments(void) #endif } -/* context.lock is held for us, so we don't need any locking. */ +/* context.lock is held by the task which issued the smp function call */ static void flush_ldt(void *__mm) { struct mm_struct *mm = __mm; @@ -99,15 +104,17 @@ static void finalize_ldt_struct(struct ldt_struct *ldt) paravirt_alloc_ldt(ldt->entries, ldt->nr_entries); } -/* context.lock is held */ -static void install_ldt(struct mm_struct *current_mm, - struct ldt_struct *ldt) +static void install_ldt(struct mm_struct *mm, struct ldt_struct *ldt) { + mutex_lock(&mm->context.lock); + /* Synchronizes with READ_ONCE in load_mm_ldt. */ - smp_store_release(¤t_mm->context.ldt, ldt); + smp_store_release(&mm->context.ldt, ldt); - /* Activate the LDT for all CPUs using current_mm. */ - on_each_cpu_mask(mm_cpumask(current_mm), flush_ldt, current_mm, true); + /* Activate the LDT for all CPUs using currents mm. */ + on_each_cpu_mask(mm_cpumask(mm), flush_ldt, mm, true); + + mutex_unlock(&mm->context.lock); } static void free_ldt_struct(struct ldt_struct *ldt) @@ -124,27 +131,20 @@ static void free_ldt_struct(struct ldt_struct *ldt) } /* - * we do not have to muck with descriptors here, that is - * done in switch_mm() as needed. + * Called on fork from arch_dup_mmap(). Just copy the current LDT state, + * the new task is not running, so nothing can be installed. */ -int init_new_context_ldt(struct task_struct *tsk, struct mm_struct *mm) +int ldt_dup_context(struct mm_struct *old_mm, struct mm_struct *mm) { struct ldt_struct *new_ldt; - struct mm_struct *old_mm; int retval = 0; - mutex_init(&mm->context.lock); - old_mm = current->mm; - if (!old_mm) { - mm->context.ldt = NULL; + if (!old_mm) return 0; - } mutex_lock(&old_mm->context.lock); - if (!old_mm->context.ldt) { - mm->context.ldt = NULL; + if (!old_mm->context.ldt) goto out_unlock; - } new_ldt = alloc_ldt_struct(old_mm->context.ldt->nr_entries); if (!new_ldt) { @@ -180,7 +180,7 @@ static int read_ldt(void __user *ptr, unsigned long bytecount) unsigned long entries_size; int retval; - mutex_lock(&mm->context.lock); + down_read(&mm->context.ldt_usr_sem); if (!mm->context.ldt) { retval = 0; @@ -209,7 +209,7 @@ static int read_ldt(void __user *ptr, unsigned long bytecount) retval = bytecount; out_unlock: - mutex_unlock(&mm->context.lock); + up_read(&mm->context.ldt_usr_sem); return retval; } @@ -269,7 +269,8 @@ static int write_ldt(void __user *ptr, unsigned long bytecount, int oldmode) ldt.avl = 0; } - mutex_lock(&mm->context.lock); + if (down_write_killable(&mm->context.ldt_usr_sem)) + return -EINTR; old_ldt = mm->context.ldt; old_nr_entries = old_ldt ? old_ldt->nr_entries : 0; @@ -291,7 +292,7 @@ static int write_ldt(void __user *ptr, unsigned long bytecount, int oldmode) error = 0; out_unlock: - mutex_unlock(&mm->context.lock); + up_write(&mm->context.ldt_usr_sem); out: return error; } diff --git a/arch/x86/kernel/paravirt_patch_64.c b/arch/x86/kernel/paravirt_patch_64.c index ac0be8283325..9edadabf04f6 100644 --- a/arch/x86/kernel/paravirt_patch_64.c +++ b/arch/x86/kernel/paravirt_patch_64.c @@ -10,7 +10,6 @@ DEF_NATIVE(pv_irq_ops, save_fl, "pushfq; popq %rax"); DEF_NATIVE(pv_mmu_ops, read_cr2, "movq %cr2, %rax"); DEF_NATIVE(pv_mmu_ops, read_cr3, "movq %cr3, %rax"); DEF_NATIVE(pv_mmu_ops, write_cr3, "movq %rdi, %cr3"); -DEF_NATIVE(pv_mmu_ops, flush_tlb_single, "invlpg (%rdi)"); DEF_NATIVE(pv_cpu_ops, wbinvd, "wbinvd"); DEF_NATIVE(pv_cpu_ops, usergs_sysret64, "swapgs; sysretq"); @@ -60,7 +59,6 @@ unsigned native_patch(u8 type, u16 clobbers, void *ibuf, PATCH_SITE(pv_mmu_ops, read_cr2); PATCH_SITE(pv_mmu_ops, read_cr3); PATCH_SITE(pv_mmu_ops, write_cr3); - PATCH_SITE(pv_mmu_ops, flush_tlb_single); PATCH_SITE(pv_cpu_ops, wbinvd); #if defined(CONFIG_PARAVIRT_SPINLOCKS) case PARAVIRT_PATCH(pv_lock_ops.queued_spin_unlock): diff --git a/arch/x86/kernel/process.c b/arch/x86/kernel/process.c index bb988a24db92..aed9d94bd46f 100644 --- a/arch/x86/kernel/process.c +++ b/arch/x86/kernel/process.c @@ -47,7 +47,7 @@ * section. Since TSS's are completely CPU-local, we want them * on exact cacheline boundaries, to eliminate cacheline ping-pong. */ -__visible DEFINE_PER_CPU_SHARED_ALIGNED(struct tss_struct, cpu_tss) = { +__visible DEFINE_PER_CPU_SHARED_ALIGNED(struct tss_struct, cpu_tss_rw) = { .x86_tss = { /* * .sp0 is only used when entering ring 0 from a lower @@ -56,6 +56,16 @@ __visible DEFINE_PER_CPU_SHARED_ALIGNED(struct tss_struct, cpu_tss) = { * Poison it. */ .sp0 = (1UL << (BITS_PER_LONG-1)) + 1, + +#ifdef CONFIG_X86_64 + /* + * .sp1 is cpu_current_top_of_stack. The init task never + * runs user code, but cpu_current_top_of_stack should still + * be well defined before the first context switch. + */ + .sp1 = TOP_OF_INIT_STACK, +#endif + #ifdef CONFIG_X86_32 .ss0 = __KERNEL_DS, .ss1 = __KERNEL_CS, @@ -71,11 +81,8 @@ __visible DEFINE_PER_CPU_SHARED_ALIGNED(struct tss_struct, cpu_tss) = { */ .io_bitmap = { [0 ... IO_BITMAP_LONGS] = ~0 }, #endif -#ifdef CONFIG_X86_32 - .SYSENTER_stack_canary = STACK_END_MAGIC, -#endif }; -EXPORT_PER_CPU_SYMBOL(cpu_tss); +EXPORT_PER_CPU_SYMBOL(cpu_tss_rw); DEFINE_PER_CPU(bool, __tss_limit_invalid); EXPORT_PER_CPU_SYMBOL_GPL(__tss_limit_invalid); @@ -104,7 +111,7 @@ void exit_thread(struct task_struct *tsk) struct fpu *fpu = &t->fpu; if (bp) { - struct tss_struct *tss = &per_cpu(cpu_tss, get_cpu()); + struct tss_struct *tss = &per_cpu(cpu_tss_rw, get_cpu()); t->io_bitmap_ptr = NULL; clear_thread_flag(TIF_IO_BITMAP); diff --git a/arch/x86/kernel/process_32.c b/arch/x86/kernel/process_32.c index 45bf0c5f93e1..5224c6099184 100644 --- a/arch/x86/kernel/process_32.c +++ b/arch/x86/kernel/process_32.c @@ -234,7 +234,7 @@ __switch_to(struct task_struct *prev_p, struct task_struct *next_p) struct fpu *prev_fpu = &prev->fpu; struct fpu *next_fpu = &next->fpu; int cpu = smp_processor_id(); - struct tss_struct *tss = &per_cpu(cpu_tss, cpu); + struct tss_struct *tss = &per_cpu(cpu_tss_rw, cpu); /* never put a printk in __switch_to... printk() calls wake_up*() indirectly */ diff --git a/arch/x86/kernel/process_64.c b/arch/x86/kernel/process_64.c index eeeb34f85c25..c75466232016 100644 --- a/arch/x86/kernel/process_64.c +++ b/arch/x86/kernel/process_64.c @@ -69,9 +69,8 @@ void __show_regs(struct pt_regs *regs, int all) unsigned int fsindex, gsindex; unsigned int ds, cs, es; - printk(KERN_DEFAULT "RIP: %04lx:%pS\n", regs->cs, (void *)regs->ip); - printk(KERN_DEFAULT "RSP: %04lx:%016lx EFLAGS: %08lx", regs->ss, - regs->sp, regs->flags); + show_iret_regs(regs); + if (regs->orig_ax != -1) pr_cont(" ORIG_RAX: %016lx\n", regs->orig_ax); else @@ -88,6 +87,9 @@ void __show_regs(struct pt_regs *regs, int all) printk(KERN_DEFAULT "R13: %016lx R14: %016lx R15: %016lx\n", regs->r13, regs->r14, regs->r15); + if (!all) + return; + asm("movl %%ds,%0" : "=r" (ds)); asm("movl %%cs,%0" : "=r" (cs)); asm("movl %%es,%0" : "=r" (es)); @@ -98,9 +100,6 @@ void __show_regs(struct pt_regs *regs, int all) rdmsrl(MSR_GS_BASE, gs); rdmsrl(MSR_KERNEL_GS_BASE, shadowgs); - if (!all) - return; - cr0 = read_cr0(); cr2 = read_cr2(); cr3 = __read_cr3(); @@ -400,7 +399,7 @@ __switch_to(struct task_struct *prev_p, struct task_struct *next_p) struct fpu *prev_fpu = &prev->fpu; struct fpu *next_fpu = &next->fpu; int cpu = smp_processor_id(); - struct tss_struct *tss = &per_cpu(cpu_tss, cpu); + struct tss_struct *tss = &per_cpu(cpu_tss_rw, cpu); WARN_ON_ONCE(IS_ENABLED(CONFIG_DEBUG_ENTRY) && this_cpu_read(irq_count) != -1); @@ -462,6 +461,7 @@ __switch_to(struct task_struct *prev_p, struct task_struct *next_p) * Switch the PDA and FPU contexts. */ this_cpu_write(current_task, next_p); + this_cpu_write(cpu_current_top_of_stack, task_top_of_stack(next_p)); /* Reload sp0. */ update_sp0(next_p); diff --git a/arch/x86/kernel/smpboot.c b/arch/x86/kernel/smpboot.c index 35cb20994e32..c5970efa8557 100644 --- a/arch/x86/kernel/smpboot.c +++ b/arch/x86/kernel/smpboot.c @@ -932,12 +932,8 @@ static int do_boot_cpu(int apicid, int cpu, struct task_struct *idle, initial_code = (unsigned long)start_secondary; initial_stack = idle->thread.sp; - /* - * Enable the espfix hack for this CPU - */ -#ifdef CONFIG_X86_ESPFIX64 + /* Enable the espfix hack for this CPU */ init_espfix_ap(cpu); -#endif /* So we see what's up */ announce_cpu(cpu, apicid); diff --git a/arch/x86/kernel/traps.c b/arch/x86/kernel/traps.c index 989514c94a55..f69dbd47d733 100644 --- a/arch/x86/kernel/traps.c +++ b/arch/x86/kernel/traps.c @@ -51,6 +51,7 @@ #include <asm/traps.h> #include <asm/desc.h> #include <asm/fpu/internal.h> +#include <asm/cpu_entry_area.h> #include <asm/mce.h> #include <asm/fixmap.h> #include <asm/mach_traps.h> @@ -348,9 +349,15 @@ dotraplinkage void do_double_fault(struct pt_regs *regs, long error_code) /* * If IRET takes a non-IST fault on the espfix64 stack, then we - * end up promoting it to a doublefault. In that case, modify - * the stack to make it look like we just entered the #GP - * handler from user space, similar to bad_iret. + * end up promoting it to a doublefault. In that case, take + * advantage of the fact that we're not using the normal (TSS.sp0) + * stack right now. We can write a fake #GP(0) frame at TSS.sp0 + * and then modify our own IRET frame so that, when we return, + * we land directly at the #GP(0) vector with the stack already + * set up according to its expectations. + * + * The net result is that our #GP handler will think that we + * entered from usermode with the bad user context. * * No need for ist_enter here because we don't use RCU. */ @@ -358,13 +365,26 @@ dotraplinkage void do_double_fault(struct pt_regs *regs, long error_code) regs->cs == __KERNEL_CS && regs->ip == (unsigned long)native_irq_return_iret) { - struct pt_regs *normal_regs = task_pt_regs(current); + struct pt_regs *gpregs = (struct pt_regs *)this_cpu_read(cpu_tss_rw.x86_tss.sp0) - 1; - /* Fake a #GP(0) from userspace. */ - memmove(&normal_regs->ip, (void *)regs->sp, 5*8); - normal_regs->orig_ax = 0; /* Missing (lost) #GP error code */ + /* + * regs->sp points to the failing IRET frame on the + * ESPFIX64 stack. Copy it to the entry stack. This fills + * in gpregs->ss through gpregs->ip. + * + */ + memmove(&gpregs->ip, (void *)regs->sp, 5*8); + gpregs->orig_ax = 0; /* Missing (lost) #GP error code */ + + /* + * Adjust our frame so that we return straight to the #GP + * vector with the expected RSP value. This is safe because + * we won't enable interupts or schedule before we invoke + * general_protection, so nothing will clobber the stack + * frame we just set up. + */ regs->ip = (unsigned long)general_protection; - regs->sp = (unsigned long)&normal_regs->orig_ax; + regs->sp = (unsigned long)&gpregs->orig_ax; return; } @@ -389,7 +409,7 @@ dotraplinkage void do_double_fault(struct pt_regs *regs, long error_code) * * Processors update CR2 whenever a page fault is detected. If a * second page fault occurs while an earlier page fault is being - * deliv- ered, the faulting linear address of the second fault will + * delivered, the faulting linear address of the second fault will * overwrite the contents of CR2 (replacing the previous * address). These updates to CR2 occur even if the page fault * results in a double fault or occurs during the delivery of a @@ -605,14 +625,15 @@ NOKPROBE_SYMBOL(do_int3); #ifdef CONFIG_X86_64 /* - * Help handler running on IST stack to switch off the IST stack if the - * interrupted code was in user mode. The actual stack switch is done in - * entry_64.S + * Help handler running on a per-cpu (IST or entry trampoline) stack + * to switch to the normal thread stack if the interrupted code was in + * user mode. The actual stack switch is done in entry_64.S */ asmlinkage __visible notrace struct pt_regs *sync_regs(struct pt_regs *eregs) { - struct pt_regs *regs = task_pt_regs(current); - *regs = *eregs; + struct pt_regs *regs = (struct pt_regs *)this_cpu_read(cpu_current_top_of_stack) - 1; + if (regs != eregs) + *regs = *eregs; return regs; } NOKPROBE_SYMBOL(sync_regs); @@ -628,13 +649,13 @@ struct bad_iret_stack *fixup_bad_iret(struct bad_iret_stack *s) /* * This is called from entry_64.S early in handling a fault * caused by a bad iret to user mode. To handle the fault - * correctly, we want move our stack frame to task_pt_regs - * and we want to pretend that the exception came from the - * iret target. + * correctly, we want to move our stack frame to where it would + * be had we entered directly on the entry stack (rather than + * just below the IRET frame) and we want to pretend that the + * exception came from the IRET target. */ struct bad_iret_stack *new_stack = - container_of(task_pt_regs(current), - struct bad_iret_stack, regs); + (struct bad_iret_stack *)this_cpu_read(cpu_tss_rw.x86_tss.sp0) - 1; /* Copy the IRET target to the new stack. */ memmove(&new_stack->regs.ip, (void *)s->regs.sp, 5*8); @@ -795,14 +816,6 @@ dotraplinkage void do_debug(struct pt_regs *regs, long error_code) debug_stack_usage_dec(); exit: -#if defined(CONFIG_X86_32) - /* - * This is the most likely code path that involves non-trivial use - * of the SYSENTER stack. Check that we haven't overrun it. - */ - WARN(this_cpu_read(cpu_tss.SYSENTER_stack_canary) != STACK_END_MAGIC, - "Overran or corrupted SYSENTER stack\n"); -#endif ist_exit(regs); } NOKPROBE_SYMBOL(do_debug); @@ -929,6 +942,9 @@ dotraplinkage void do_iret_error(struct pt_regs *regs, long error_code) void __init trap_init(void) { + /* Init cpu_entry_area before IST entries are set up */ + setup_cpu_entry_areas(); + idt_setup_traps(); /* @@ -936,8 +952,9 @@ void __init trap_init(void) * "sidt" instruction will not leak the location of the kernel, and * to defend the IDT against arbitrary memory write vulnerabilities. * It will be reloaded in cpu_init() */ - __set_fixmap(FIX_RO_IDT, __pa_symbol(idt_table), PAGE_KERNEL_RO); - idt_descr.address = fix_to_virt(FIX_RO_IDT); + cea_set_pte(CPU_ENTRY_AREA_RO_IDT_VADDR, __pa_symbol(idt_table), + PAGE_KERNEL_RO); + idt_descr.address = CPU_ENTRY_AREA_RO_IDT; /* * Should be a barrier for any external CPU state: diff --git a/arch/x86/kernel/unwind_orc.c b/arch/x86/kernel/unwind_orc.c index a3f973b2c97a..be86a865087a 100644 --- a/arch/x86/kernel/unwind_orc.c +++ b/arch/x86/kernel/unwind_orc.c @@ -253,22 +253,15 @@ unsigned long *unwind_get_return_address_ptr(struct unwind_state *state) return NULL; } -static bool stack_access_ok(struct unwind_state *state, unsigned long addr, +static bool stack_access_ok(struct unwind_state *state, unsigned long _addr, size_t len) { struct stack_info *info = &state->stack_info; + void *addr = (void *)_addr; - /* - * If the address isn't on the current stack, switch to the next one. - * - * We may have to traverse multiple stacks to deal with the possibility - * that info->next_sp could point to an empty stack and the address - * could be on a subsequent stack. - */ - while (!on_stack(info, (void *)addr, len)) - if (get_stack_info(info->next_sp, state->task, info, - &state->stack_mask)) - return false; + if (!on_stack(info, addr, len) && + (get_stack_info(addr, state->task, info, &state->stack_mask))) + return false; return true; } @@ -283,42 +276,32 @@ static bool deref_stack_reg(struct unwind_state *state, unsigned long addr, return true; } -#define REGS_SIZE (sizeof(struct pt_regs)) -#define SP_OFFSET (offsetof(struct pt_regs, sp)) -#define IRET_REGS_SIZE (REGS_SIZE - offsetof(struct pt_regs, ip)) -#define IRET_SP_OFFSET (SP_OFFSET - offsetof(struct pt_regs, ip)) - static bool deref_stack_regs(struct unwind_state *state, unsigned long addr, - unsigned long *ip, unsigned long *sp, bool full) + unsigned long *ip, unsigned long *sp) { - size_t regs_size = full ? REGS_SIZE : IRET_REGS_SIZE; - size_t sp_offset = full ? SP_OFFSET : IRET_SP_OFFSET; - struct pt_regs *regs = (struct pt_regs *)(addr + regs_size - REGS_SIZE); - - if (IS_ENABLED(CONFIG_X86_64)) { - if (!stack_access_ok(state, addr, regs_size)) - return false; + struct pt_regs *regs = (struct pt_regs *)addr; - *ip = regs->ip; - *sp = regs->sp; + /* x86-32 support will be more complicated due to the ®s->sp hack */ + BUILD_BUG_ON(IS_ENABLED(CONFIG_X86_32)); - return true; - } - - if (!stack_access_ok(state, addr, sp_offset)) + if (!stack_access_ok(state, addr, sizeof(struct pt_regs))) return false; *ip = regs->ip; + *sp = regs->sp; + return true; +} - if (user_mode(regs)) { - if (!stack_access_ok(state, addr + sp_offset, - REGS_SIZE - SP_OFFSET)) - return false; +static bool deref_stack_iret_regs(struct unwind_state *state, unsigned long addr, + unsigned long *ip, unsigned long *sp) +{ + struct pt_regs *regs = (void *)addr - IRET_FRAME_OFFSET; - *sp = regs->sp; - } else - *sp = (unsigned long)®s->sp; + if (!stack_access_ok(state, addr, IRET_FRAME_SIZE)) + return false; + *ip = regs->ip; + *sp = regs->sp; return true; } @@ -327,7 +310,6 @@ bool unwind_next_frame(struct unwind_state *state) unsigned long ip_p, sp, orig_ip, prev_sp = state->sp; enum stack_type prev_type = state->stack_info.type; struct orc_entry *orc; - struct pt_regs *ptregs; bool indirect = false; if (unwind_done(state)) @@ -435,7 +417,7 @@ bool unwind_next_frame(struct unwind_state *state) break; case ORC_TYPE_REGS: - if (!deref_stack_regs(state, sp, &state->ip, &state->sp, true)) { + if (!deref_stack_regs(state, sp, &state->ip, &state->sp)) { orc_warn("can't dereference registers at %p for ip %pB\n", (void *)sp, (void *)orig_ip); goto done; @@ -447,20 +429,14 @@ bool unwind_next_frame(struct unwind_state *state) break; case ORC_TYPE_REGS_IRET: - if (!deref_stack_regs(state, sp, &state->ip, &state->sp, false)) { + if (!deref_stack_iret_regs(state, sp, &state->ip, &state->sp)) { orc_warn("can't dereference iret registers at %p for ip %pB\n", (void *)sp, (void *)orig_ip); goto done; } - ptregs = container_of((void *)sp, struct pt_regs, ip); - if ((unsigned long)ptregs >= prev_sp && - on_stack(&state->stack_info, ptregs, REGS_SIZE)) { - state->regs = ptregs; - state->full_regs = false; - } else - state->regs = NULL; - + state->regs = (void *)sp - IRET_FRAME_OFFSET; + state->full_regs = false; state->signal = true; break; @@ -553,8 +529,18 @@ void __unwind_start(struct unwind_state *state, struct task_struct *task, } if (get_stack_info((unsigned long *)state->sp, state->task, - &state->stack_info, &state->stack_mask)) - return; + &state->stack_info, &state->stack_mask)) { + /* + * We weren't on a valid stack. It's possible that + * we overflowed a valid stack into a guard page. + * See if the next page up is valid so that we can + * generate some kind of backtrace if this happens. + */ + void *next_page = (void *)PAGE_ALIGN((unsigned long)state->sp); + if (get_stack_info(next_page, state->task, &state->stack_info, + &state->stack_mask)) + return; + } /* * The caller can provide the address of the first frame directly diff --git a/arch/x86/kernel/vmlinux.lds.S b/arch/x86/kernel/vmlinux.lds.S index a4009fb9be87..d2a8b5a24a44 100644 --- a/arch/x86/kernel/vmlinux.lds.S +++ b/arch/x86/kernel/vmlinux.lds.S @@ -107,6 +107,15 @@ SECTIONS SOFTIRQENTRY_TEXT *(.fixup) *(.gnu.warning) + +#ifdef CONFIG_X86_64 + . = ALIGN(PAGE_SIZE); + _entry_trampoline = .; + *(.entry_trampoline) + . = ALIGN(PAGE_SIZE); + ASSERT(. - _entry_trampoline == PAGE_SIZE, "entry trampoline is too big"); +#endif + /* End of text section */ _etext = .; } :text = 0x9090 diff --git a/arch/x86/kvm/emulate.c b/arch/x86/kvm/emulate.c index abe74f779f9d..b514b2b2845a 100644 --- a/arch/x86/kvm/emulate.c +++ b/arch/x86/kvm/emulate.c @@ -2390,9 +2390,21 @@ static int rsm_load_seg_64(struct x86_emulate_ctxt *ctxt, u64 smbase, int n) } static int rsm_enter_protected_mode(struct x86_emulate_ctxt *ctxt, - u64 cr0, u64 cr4) + u64 cr0, u64 cr3, u64 cr4) { int bad; + u64 pcid; + + /* In order to later set CR4.PCIDE, CR3[11:0] must be zero. */ + pcid = 0; + if (cr4 & X86_CR4_PCIDE) { + pcid = cr3 & 0xfff; + cr3 &= ~0xfff; + } + + bad = ctxt->ops->set_cr(ctxt, 3, cr3); + if (bad) + return X86EMUL_UNHANDLEABLE; /* * First enable PAE, long mode needs it before CR0.PG = 1 is set. @@ -2411,6 +2423,12 @@ static int rsm_enter_protected_mode(struct x86_emulate_ctxt *ctxt, bad = ctxt->ops->set_cr(ctxt, 4, cr4); if (bad) return X86EMUL_UNHANDLEABLE; + if (pcid) { + bad = ctxt->ops->set_cr(ctxt, 3, cr3 | pcid); + if (bad) + return X86EMUL_UNHANDLEABLE; + } + } return X86EMUL_CONTINUE; @@ -2421,11 +2439,11 @@ static int rsm_load_state_32(struct x86_emulate_ctxt *ctxt, u64 smbase) struct desc_struct desc; struct desc_ptr dt; u16 selector; - u32 val, cr0, cr4; + u32 val, cr0, cr3, cr4; int i; cr0 = GET_SMSTATE(u32, smbase, 0x7ffc); - ctxt->ops->set_cr(ctxt, 3, GET_SMSTATE(u32, smbase, 0x7ff8)); + cr3 = GET_SMSTATE(u32, smbase, 0x7ff8); ctxt->eflags = GET_SMSTATE(u32, smbase, 0x7ff4) | X86_EFLAGS_FIXED; ctxt->_eip = GET_SMSTATE(u32, smbase, 0x7ff0); @@ -2467,14 +2485,14 @@ static int rsm_load_state_32(struct x86_emulate_ctxt *ctxt, u64 smbase) ctxt->ops->set_smbase(ctxt, GET_SMSTATE(u32, smbase, 0x7ef8)); - return rsm_enter_protected_mode(ctxt, cr0, cr4); + return rsm_enter_protected_mode(ctxt, cr0, cr3, cr4); } static int rsm_load_state_64(struct x86_emulate_ctxt *ctxt, u64 smbase) { struct desc_struct desc; struct desc_ptr dt; - u64 val, cr0, cr4; + u64 val, cr0, cr3, cr4; u32 base3; u16 selector; int i, r; @@ -2491,7 +2509,7 @@ static int rsm_load_state_64(struct x86_emulate_ctxt *ctxt, u64 smbase) ctxt->ops->set_dr(ctxt, 7, (val & DR7_VOLATILE) | DR7_FIXED_1); cr0 = GET_SMSTATE(u64, smbase, 0x7f58); - ctxt->ops->set_cr(ctxt, 3, GET_SMSTATE(u64, smbase, 0x7f50)); + cr3 = GET_SMSTATE(u64, smbase, 0x7f50); cr4 = GET_SMSTATE(u64, smbase, 0x7f48); ctxt->ops->set_smbase(ctxt, GET_SMSTATE(u32, smbase, 0x7f00)); val = GET_SMSTATE(u64, smbase, 0x7ed0); @@ -2519,7 +2537,7 @@ static int rsm_load_state_64(struct x86_emulate_ctxt *ctxt, u64 smbase) dt.address = GET_SMSTATE(u64, smbase, 0x7e68); ctxt->ops->set_gdt(ctxt, &dt); - r = rsm_enter_protected_mode(ctxt, cr0, cr4); + r = rsm_enter_protected_mode(ctxt, cr0, cr3, cr4); if (r != X86EMUL_CONTINUE) return r; diff --git a/arch/x86/kvm/mmu.c b/arch/x86/kvm/mmu.c index e5e66e5c6640..c4deb1f34faa 100644 --- a/arch/x86/kvm/mmu.c +++ b/arch/x86/kvm/mmu.c @@ -3395,7 +3395,7 @@ static int mmu_alloc_direct_roots(struct kvm_vcpu *vcpu) spin_lock(&vcpu->kvm->mmu_lock); if(make_mmu_pages_available(vcpu) < 0) { spin_unlock(&vcpu->kvm->mmu_lock); - return 1; + return -ENOSPC; } sp = kvm_mmu_get_page(vcpu, 0, 0, vcpu->arch.mmu.shadow_root_level, 1, ACC_ALL); @@ -3410,7 +3410,7 @@ static int mmu_alloc_direct_roots(struct kvm_vcpu *vcpu) spin_lock(&vcpu->kvm->mmu_lock); if (make_mmu_pages_available(vcpu) < 0) { spin_unlock(&vcpu->kvm->mmu_lock); - return 1; + return -ENOSPC; } sp = kvm_mmu_get_page(vcpu, i << (30 - PAGE_SHIFT), i << 30, PT32_ROOT_LEVEL, 1, ACC_ALL); @@ -3450,7 +3450,7 @@ static int mmu_alloc_shadow_roots(struct kvm_vcpu *vcpu) spin_lock(&vcpu->kvm->mmu_lock); if (make_mmu_pages_available(vcpu) < 0) { spin_unlock(&vcpu->kvm->mmu_lock); - return 1; + return -ENOSPC; } sp = kvm_mmu_get_page(vcpu, root_gfn, 0, vcpu->arch.mmu.shadow_root_level, 0, ACC_ALL); @@ -3487,7 +3487,7 @@ static int mmu_alloc_shadow_roots(struct kvm_vcpu *vcpu) spin_lock(&vcpu->kvm->mmu_lock); if (make_mmu_pages_available(vcpu) < 0) { spin_unlock(&vcpu->kvm->mmu_lock); - return 1; + return -ENOSPC; } sp = kvm_mmu_get_page(vcpu, root_gfn, i << 30, PT32_ROOT_LEVEL, 0, ACC_ALL); diff --git a/arch/x86/kvm/vmx.c b/arch/x86/kvm/vmx.c index 8eba631c4dbd..023afa0c8887 100644 --- a/arch/x86/kvm/vmx.c +++ b/arch/x86/kvm/vmx.c @@ -2302,7 +2302,7 @@ static void vmx_vcpu_load(struct kvm_vcpu *vcpu, int cpu) * processors. See 22.2.4. */ vmcs_writel(HOST_TR_BASE, - (unsigned long)this_cpu_ptr(&cpu_tss)); + (unsigned long)&get_cpu_entry_area(cpu)->tss.x86_tss); vmcs_writel(HOST_GDTR_BASE, (unsigned long)gdt); /* 22.2.4 */ /* diff --git a/arch/x86/kvm/x86.c b/arch/x86/kvm/x86.c index faf843c9b916..1cec2c62a0b0 100644 --- a/arch/x86/kvm/x86.c +++ b/arch/x86/kvm/x86.c @@ -4384,7 +4384,7 @@ static int vcpu_mmio_read(struct kvm_vcpu *vcpu, gpa_t addr, int len, void *v) addr, n, v)) && kvm_io_bus_read(vcpu, KVM_MMIO_BUS, addr, n, v)) break; - trace_kvm_mmio(KVM_TRACE_MMIO_READ, n, addr, *(u64 *)v); + trace_kvm_mmio(KVM_TRACE_MMIO_READ, n, addr, v); handled += n; addr += n; len -= n; @@ -4643,7 +4643,7 @@ static int read_prepare(struct kvm_vcpu *vcpu, void *val, int bytes) { if (vcpu->mmio_read_completed) { trace_kvm_mmio(KVM_TRACE_MMIO_READ, bytes, - vcpu->mmio_fragments[0].gpa, *(u64 *)val); + vcpu->mmio_fragments[0].gpa, val); vcpu->mmio_read_completed = 0; return 1; } @@ -4665,14 +4665,14 @@ static int write_emulate(struct kvm_vcpu *vcpu, gpa_t gpa, static int write_mmio(struct kvm_vcpu *vcpu, gpa_t gpa, int bytes, void *val) { - trace_kvm_mmio(KVM_TRACE_MMIO_WRITE, bytes, gpa, *(u64 *)val); + trace_kvm_mmio(KVM_TRACE_MMIO_WRITE, bytes, gpa, val); return vcpu_mmio_write(vcpu, gpa, bytes, val); } static int read_exit_mmio(struct kvm_vcpu *vcpu, gpa_t gpa, void *val, int bytes) { - trace_kvm_mmio(KVM_TRACE_MMIO_READ_UNSATISFIED, bytes, gpa, 0); + trace_kvm_mmio(KVM_TRACE_MMIO_READ_UNSATISFIED, bytes, gpa, NULL); return X86EMUL_IO_NEEDED; } @@ -7264,13 +7264,12 @@ static int complete_emulated_mmio(struct kvm_vcpu *vcpu) int kvm_arch_vcpu_ioctl_run(struct kvm_vcpu *vcpu, struct kvm_run *kvm_run) { - struct fpu *fpu = ¤t->thread.fpu; int r; - fpu__initialize(fpu); - kvm_sigset_activate(vcpu); + kvm_load_guest_fpu(vcpu); + if (unlikely(vcpu->arch.mp_state == KVM_MP_STATE_UNINITIALIZED)) { if (kvm_run->immediate_exit) { r = -EINTR; @@ -7296,14 +7295,12 @@ int kvm_arch_vcpu_ioctl_run(struct kvm_vcpu *vcpu, struct kvm_run *kvm_run) } } - kvm_load_guest_fpu(vcpu); - if (unlikely(vcpu->arch.complete_userspace_io)) { int (*cui)(struct kvm_vcpu *) = vcpu->arch.complete_userspace_io; vcpu->arch.complete_userspace_io = NULL; r = cui(vcpu); if (r <= 0) - goto out_fpu; + goto out; } else WARN_ON(vcpu->arch.pio.count || vcpu->mmio_needed); @@ -7312,9 +7309,8 @@ int kvm_arch_vcpu_ioctl_run(struct kvm_vcpu *vcpu, struct kvm_run *kvm_run) else r = vcpu_run(vcpu); -out_fpu: - kvm_put_guest_fpu(vcpu); out: + kvm_put_guest_fpu(vcpu); post_kvm_run_save(vcpu); kvm_sigset_deactivate(vcpu); @@ -7384,7 +7380,7 @@ int kvm_arch_vcpu_ioctl_set_regs(struct kvm_vcpu *vcpu, struct kvm_regs *regs) #endif kvm_rip_write(vcpu, regs->rip); - kvm_set_rflags(vcpu, regs->rflags); + kvm_set_rflags(vcpu, regs->rflags | X86_EFLAGS_FIXED); vcpu->arch.exception.pending = false; @@ -7498,6 +7494,29 @@ int kvm_task_switch(struct kvm_vcpu *vcpu, u16 tss_selector, int idt_index, } EXPORT_SYMBOL_GPL(kvm_task_switch); +int kvm_valid_sregs(struct kvm_vcpu *vcpu, struct kvm_sregs *sregs) +{ + if ((sregs->efer & EFER_LME) && (sregs->cr0 & X86_CR0_PG_BIT)) { + /* + * When EFER.LME and CR0.PG are set, the processor is in + * 64-bit mode (though maybe in a 32-bit code segment). + * CR4.PAE and EFER.LMA must be set. + */ + if (!(sregs->cr4 & X86_CR4_PAE_BIT) + || !(sregs->efer & EFER_LMA)) + return -EINVAL; + } else { + /* + * Not in 64-bit mode: EFER.LMA is clear and the code + * segment cannot be 64-bit. + */ + if (sregs->efer & EFER_LMA || sregs->cs.l) + return -EINVAL; + } + + return 0; +} + int kvm_arch_vcpu_ioctl_set_sregs(struct kvm_vcpu *vcpu, struct kvm_sregs *sregs) { @@ -7510,6 +7529,9 @@ int kvm_arch_vcpu_ioctl_set_sregs(struct kvm_vcpu *vcpu, (sregs->cr4 & X86_CR4_OSXSAVE)) return -EINVAL; + if (kvm_valid_sregs(vcpu, sregs)) + return -EINVAL; + apic_base_msr.data = sregs->apic_base; apic_base_msr.host_initiated = true; if (kvm_set_apic_base(vcpu, &apic_base_msr)) diff --git a/arch/x86/lib/delay.c b/arch/x86/lib/delay.c index 553f8fd23cc4..4846eff7e4c8 100644 --- a/arch/x86/lib/delay.c +++ b/arch/x86/lib/delay.c @@ -107,10 +107,10 @@ static void delay_mwaitx(unsigned long __loops) delay = min_t(u64, MWAITX_MAX_LOOPS, loops); /* - * Use cpu_tss as a cacheline-aligned, seldomly + * Use cpu_tss_rw as a cacheline-aligned, seldomly * accessed per-cpu variable as the monitor target. */ - __monitorx(raw_cpu_ptr(&cpu_tss), 0, 0); + __monitorx(raw_cpu_ptr(&cpu_tss_rw), 0, 0); /* * AMD, like Intel, supports the EAX hint and EAX=0xf diff --git a/arch/x86/mm/Makefile b/arch/x86/mm/Makefile index 8e13b8cc6bed..52195ee3f6d5 100644 --- a/arch/x86/mm/Makefile +++ b/arch/x86/mm/Makefile @@ -10,7 +10,7 @@ CFLAGS_REMOVE_mem_encrypt.o = -pg endif obj-y := init.o init_$(BITS).o fault.o ioremap.o extable.o pageattr.o mmap.o \ - pat.o pgtable.o physaddr.o setup_nx.o tlb.o + pat.o pgtable.o physaddr.o setup_nx.o tlb.o cpu_entry_area.o # Make sure __phys_addr has no stackprotector nostackp := $(call cc-option, -fno-stack-protector) diff --git a/arch/x86/mm/cpu_entry_area.c b/arch/x86/mm/cpu_entry_area.c new file mode 100644 index 000000000000..fe814fd5e014 --- /dev/null +++ b/arch/x86/mm/cpu_entry_area.c @@ -0,0 +1,139 @@ +// SPDX-License-Identifier: GPL-2.0 + +#include <linux/spinlock.h> +#include <linux/percpu.h> + +#include <asm/cpu_entry_area.h> +#include <asm/pgtable.h> +#include <asm/fixmap.h> +#include <asm/desc.h> + +static DEFINE_PER_CPU_PAGE_ALIGNED(struct entry_stack_page, entry_stack_storage); + +#ifdef CONFIG_X86_64 +static DEFINE_PER_CPU_PAGE_ALIGNED(char, exception_stacks + [(N_EXCEPTION_STACKS - 1) * EXCEPTION_STKSZ + DEBUG_STKSZ]); +#endif + +struct cpu_entry_area *get_cpu_entry_area(int cpu) +{ + unsigned long va = CPU_ENTRY_AREA_PER_CPU + cpu * CPU_ENTRY_AREA_SIZE; + BUILD_BUG_ON(sizeof(struct cpu_entry_area) % PAGE_SIZE != 0); + + return (struct cpu_entry_area *) va; +} +EXPORT_SYMBOL(get_cpu_entry_area); + +void cea_set_pte(void *cea_vaddr, phys_addr_t pa, pgprot_t flags) +{ + unsigned long va = (unsigned long) cea_vaddr; + + set_pte_vaddr(va, pfn_pte(pa >> PAGE_SHIFT, flags)); +} + +static void __init +cea_map_percpu_pages(void *cea_vaddr, void *ptr, int pages, pgprot_t prot) +{ + for ( ; pages; pages--, cea_vaddr+= PAGE_SIZE, ptr += PAGE_SIZE) + cea_set_pte(cea_vaddr, per_cpu_ptr_to_phys(ptr), prot); +} + +/* Setup the fixmap mappings only once per-processor */ +static void __init setup_cpu_entry_area(int cpu) +{ +#ifdef CONFIG_X86_64 + extern char _entry_trampoline[]; + + /* On 64-bit systems, we use a read-only fixmap GDT and TSS. */ + pgprot_t gdt_prot = PAGE_KERNEL_RO; + pgprot_t tss_prot = PAGE_KERNEL_RO; +#else + /* + * On native 32-bit systems, the GDT cannot be read-only because + * our double fault handler uses a task gate, and entering through + * a task gate needs to change an available TSS to busy. If the + * GDT is read-only, that will triple fault. The TSS cannot be + * read-only because the CPU writes to it on task switches. + * + * On Xen PV, the GDT must be read-only because the hypervisor + * requires it. + */ + pgprot_t gdt_prot = boot_cpu_has(X86_FEATURE_XENPV) ? + PAGE_KERNEL_RO : PAGE_KERNEL; + pgprot_t tss_prot = PAGE_KERNEL; +#endif + + cea_set_pte(&get_cpu_entry_area(cpu)->gdt, get_cpu_gdt_paddr(cpu), + gdt_prot); + + cea_map_percpu_pages(&get_cpu_entry_area(cpu)->entry_stack_page, + per_cpu_ptr(&entry_stack_storage, cpu), 1, + PAGE_KERNEL); + + /* + * The Intel SDM says (Volume 3, 7.2.1): + * + * Avoid placing a page boundary in the part of the TSS that the + * processor reads during a task switch (the first 104 bytes). The + * processor may not correctly perform address translations if a + * boundary occurs in this area. During a task switch, the processor + * reads and writes into the first 104 bytes of each TSS (using + * contiguous physical addresses beginning with the physical address + * of the first byte of the TSS). So, after TSS access begins, if + * part of the 104 bytes is not physically contiguous, the processor + * will access incorrect information without generating a page-fault + * exception. + * + * There are also a lot of errata involving the TSS spanning a page + * boundary. Assert that we're not doing that. + */ + BUILD_BUG_ON((offsetof(struct tss_struct, x86_tss) ^ + offsetofend(struct tss_struct, x86_tss)) & PAGE_MASK); + BUILD_BUG_ON(sizeof(struct tss_struct) % PAGE_SIZE != 0); + cea_map_percpu_pages(&get_cpu_entry_area(cpu)->tss, + &per_cpu(cpu_tss_rw, cpu), + sizeof(struct tss_struct) / PAGE_SIZE, tss_prot); + +#ifdef CONFIG_X86_32 + per_cpu(cpu_entry_area, cpu) = get_cpu_entry_area(cpu); +#endif + +#ifdef CONFIG_X86_64 + BUILD_BUG_ON(sizeof(exception_stacks) % PAGE_SIZE != 0); + BUILD_BUG_ON(sizeof(exception_stacks) != + sizeof(((struct cpu_entry_area *)0)->exception_stacks)); + cea_map_percpu_pages(&get_cpu_entry_area(cpu)->exception_stacks, + &per_cpu(exception_stacks, cpu), + sizeof(exception_stacks) / PAGE_SIZE, PAGE_KERNEL); + + cea_set_pte(&get_cpu_entry_area(cpu)->entry_trampoline, + __pa_symbol(_entry_trampoline), PAGE_KERNEL_RX); +#endif +} + +static __init void setup_cpu_entry_area_ptes(void) +{ +#ifdef CONFIG_X86_32 + unsigned long start, end; + + BUILD_BUG_ON(CPU_ENTRY_AREA_PAGES * PAGE_SIZE < CPU_ENTRY_AREA_MAP_SIZE); + BUG_ON(CPU_ENTRY_AREA_BASE & ~PMD_MASK); + + start = CPU_ENTRY_AREA_BASE; + end = start + CPU_ENTRY_AREA_MAP_SIZE; + + /* Careful here: start + PMD_SIZE might wrap around */ + for (; start < end && start >= CPU_ENTRY_AREA_BASE; start += PMD_SIZE) + populate_extra_pte(start); +#endif +} + +void __init setup_cpu_entry_areas(void) +{ + unsigned int cpu; + + setup_cpu_entry_area_ptes(); + + for_each_possible_cpu(cpu) + setup_cpu_entry_area(cpu); +} diff --git a/arch/x86/mm/dump_pagetables.c b/arch/x86/mm/dump_pagetables.c index 5e3ac6fe6c9e..43dedbfb7257 100644 --- a/arch/x86/mm/dump_pagetables.c +++ b/arch/x86/mm/dump_pagetables.c @@ -44,10 +44,12 @@ struct addr_marker { unsigned long max_lines; }; -/* indices for address_markers; keep sync'd w/ address_markers below */ +/* Address space markers hints */ + +#ifdef CONFIG_X86_64 + enum address_markers_idx { USER_SPACE_NR = 0, -#ifdef CONFIG_X86_64 KERNEL_SPACE_NR, LOW_KERNEL_NR, VMALLOC_START_NR, @@ -56,56 +58,74 @@ enum address_markers_idx { KASAN_SHADOW_START_NR, KASAN_SHADOW_END_NR, #endif -# ifdef CONFIG_X86_ESPFIX64 + CPU_ENTRY_AREA_NR, +#ifdef CONFIG_X86_ESPFIX64 ESPFIX_START_NR, -# endif +#endif +#ifdef CONFIG_EFI + EFI_END_NR, +#endif HIGH_KERNEL_NR, MODULES_VADDR_NR, MODULES_END_NR, -#else + FIXADDR_START_NR, + END_OF_SPACE_NR, +}; + +static struct addr_marker address_markers[] = { + [USER_SPACE_NR] = { 0, "User Space" }, + [KERNEL_SPACE_NR] = { (1UL << 63), "Kernel Space" }, + [LOW_KERNEL_NR] = { 0UL, "Low Kernel Mapping" }, + [VMALLOC_START_NR] = { 0UL, "vmalloc() Area" }, + [VMEMMAP_START_NR] = { 0UL, "Vmemmap" }, +#ifdef CONFIG_KASAN + [KASAN_SHADOW_START_NR] = { KASAN_SHADOW_START, "KASAN shadow" }, + [KASAN_SHADOW_END_NR] = { KASAN_SHADOW_END, "KASAN shadow end" }, +#endif + [CPU_ENTRY_AREA_NR] = { CPU_ENTRY_AREA_BASE,"CPU entry Area" }, +#ifdef CONFIG_X86_ESPFIX64 + [ESPFIX_START_NR] = { ESPFIX_BASE_ADDR, "ESPfix Area", 16 }, +#endif +#ifdef CONFIG_EFI + [EFI_END_NR] = { EFI_VA_END, "EFI Runtime Services" }, +#endif + [HIGH_KERNEL_NR] = { __START_KERNEL_map, "High Kernel Mapping" }, + [MODULES_VADDR_NR] = { MODULES_VADDR, "Modules" }, + [MODULES_END_NR] = { MODULES_END, "End Modules" }, + [FIXADDR_START_NR] = { FIXADDR_START, "Fixmap Area" }, + [END_OF_SPACE_NR] = { -1, NULL } +}; + +#else /* CONFIG_X86_64 */ + +enum address_markers_idx { + USER_SPACE_NR = 0, KERNEL_SPACE_NR, VMALLOC_START_NR, VMALLOC_END_NR, -# ifdef CONFIG_HIGHMEM +#ifdef CONFIG_HIGHMEM PKMAP_BASE_NR, -# endif - FIXADDR_START_NR, #endif + CPU_ENTRY_AREA_NR, + FIXADDR_START_NR, + END_OF_SPACE_NR, }; -/* Address space markers hints */ static struct addr_marker address_markers[] = { - { 0, "User Space" }, -#ifdef CONFIG_X86_64 - { 0x8000000000000000UL, "Kernel Space" }, - { 0/* PAGE_OFFSET */, "Low Kernel Mapping" }, - { 0/* VMALLOC_START */, "vmalloc() Area" }, - { 0/* VMEMMAP_START */, "Vmemmap" }, -#ifdef CONFIG_KASAN - { KASAN_SHADOW_START, "KASAN shadow" }, - { KASAN_SHADOW_END, "KASAN shadow end" }, + [USER_SPACE_NR] = { 0, "User Space" }, + [KERNEL_SPACE_NR] = { PAGE_OFFSET, "Kernel Mapping" }, + [VMALLOC_START_NR] = { 0UL, "vmalloc() Area" }, + [VMALLOC_END_NR] = { 0UL, "vmalloc() End" }, +#ifdef CONFIG_HIGHMEM + [PKMAP_BASE_NR] = { 0UL, "Persistent kmap() Area" }, #endif -# ifdef CONFIG_X86_ESPFIX64 - { ESPFIX_BASE_ADDR, "ESPfix Area", 16 }, -# endif -# ifdef CONFIG_EFI - { EFI_VA_END, "EFI Runtime Services" }, -# endif - { __START_KERNEL_map, "High Kernel Mapping" }, - { MODULES_VADDR, "Modules" }, - { MODULES_END, "End Modules" }, -#else - { PAGE_OFFSET, "Kernel Mapping" }, - { 0/* VMALLOC_START */, "vmalloc() Area" }, - { 0/*VMALLOC_END*/, "vmalloc() End" }, -# ifdef CONFIG_HIGHMEM - { 0/*PKMAP_BASE*/, "Persistent kmap() Area" }, -# endif - { 0/*FIXADDR_START*/, "Fixmap Area" }, -#endif - { -1, NULL } /* End of list */ + [CPU_ENTRY_AREA_NR] = { 0UL, "CPU entry area" }, + [FIXADDR_START_NR] = { 0UL, "Fixmap area" }, + [END_OF_SPACE_NR] = { -1, NULL } }; +#endif /* !CONFIG_X86_64 */ + /* Multipliers for offsets within the PTEs */ #define PTE_LEVEL_MULT (PAGE_SIZE) #define PMD_LEVEL_MULT (PTRS_PER_PTE * PTE_LEVEL_MULT) @@ -140,7 +160,7 @@ static void printk_prot(struct seq_file *m, pgprot_t prot, int level, bool dmsg) static const char * const level_name[] = { "cr3", "pgd", "p4d", "pud", "pmd", "pte" }; - if (!pgprot_val(prot)) { + if (!(pr & _PAGE_PRESENT)) { /* Not present */ pt_dump_cont_printf(m, dmsg, " "); } else { @@ -525,8 +545,8 @@ static int __init pt_dump_init(void) address_markers[PKMAP_BASE_NR].start_address = PKMAP_BASE; # endif address_markers[FIXADDR_START_NR].start_address = FIXADDR_START; + address_markers[CPU_ENTRY_AREA_NR].start_address = CPU_ENTRY_AREA_BASE; #endif - return 0; } __initcall(pt_dump_init); diff --git a/arch/x86/mm/fault.c b/arch/x86/mm/fault.c index febf6980e653..06fe3d51d385 100644 --- a/arch/x86/mm/fault.c +++ b/arch/x86/mm/fault.c @@ -860,7 +860,7 @@ show_signal_msg(struct pt_regs *regs, unsigned long error_code, if (!printk_ratelimit()) return; - printk("%s%s[%d]: segfault at %lx ip %p sp %p error %lx", + printk("%s%s[%d]: segfault at %lx ip %px sp %px error %lx", task_pid_nr(tsk) > 1 ? KERN_INFO : KERN_EMERG, tsk->comm, task_pid_nr(tsk), address, (void *)regs->ip, (void *)regs->sp, error_code); diff --git a/arch/x86/mm/init_32.c b/arch/x86/mm/init_32.c index 8a64a6f2848d..135c9a7898c7 100644 --- a/arch/x86/mm/init_32.c +++ b/arch/x86/mm/init_32.c @@ -50,6 +50,7 @@ #include <asm/setup.h> #include <asm/set_memory.h> #include <asm/page_types.h> +#include <asm/cpu_entry_area.h> #include <asm/init.h> #include "mm_internal.h" @@ -766,6 +767,7 @@ void __init mem_init(void) mem_init_print_info(NULL); printk(KERN_INFO "virtual kernel memory layout:\n" " fixmap : 0x%08lx - 0x%08lx (%4ld kB)\n" + " cpu_entry : 0x%08lx - 0x%08lx (%4ld kB)\n" #ifdef CONFIG_HIGHMEM " pkmap : 0x%08lx - 0x%08lx (%4ld kB)\n" #endif @@ -777,6 +779,10 @@ void __init mem_init(void) FIXADDR_START, FIXADDR_TOP, (FIXADDR_TOP - FIXADDR_START) >> 10, + CPU_ENTRY_AREA_BASE, + CPU_ENTRY_AREA_BASE + CPU_ENTRY_AREA_MAP_SIZE, + CPU_ENTRY_AREA_MAP_SIZE >> 10, + #ifdef CONFIG_HIGHMEM PKMAP_BASE, PKMAP_BASE+LAST_PKMAP*PAGE_SIZE, (LAST_PKMAP*PAGE_SIZE) >> 10, diff --git a/arch/x86/mm/kasan_init_64.c b/arch/x86/mm/kasan_init_64.c index 99dfed6dfef8..47388f0c0e59 100644 --- a/arch/x86/mm/kasan_init_64.c +++ b/arch/x86/mm/kasan_init_64.c @@ -15,6 +15,7 @@ #include <asm/tlbflush.h> #include <asm/sections.h> #include <asm/pgtable.h> +#include <asm/cpu_entry_area.h> extern struct range pfn_mapped[E820_MAX_ENTRIES]; @@ -277,6 +278,7 @@ void __init kasan_early_init(void) void __init kasan_init(void) { int i; + void *shadow_cpu_entry_begin, *shadow_cpu_entry_end; #ifdef CONFIG_KASAN_INLINE register_die_notifier(&kasan_die_notifier); @@ -321,16 +323,33 @@ void __init kasan_init(void) map_range(&pfn_mapped[i]); } + shadow_cpu_entry_begin = (void *)CPU_ENTRY_AREA_BASE; + shadow_cpu_entry_begin = kasan_mem_to_shadow(shadow_cpu_entry_begin); + shadow_cpu_entry_begin = (void *)round_down((unsigned long)shadow_cpu_entry_begin, + PAGE_SIZE); + + shadow_cpu_entry_end = (void *)(CPU_ENTRY_AREA_BASE + + CPU_ENTRY_AREA_MAP_SIZE); + shadow_cpu_entry_end = kasan_mem_to_shadow(shadow_cpu_entry_end); + shadow_cpu_entry_end = (void *)round_up((unsigned long)shadow_cpu_entry_end, + PAGE_SIZE); + kasan_populate_zero_shadow( kasan_mem_to_shadow((void *)PAGE_OFFSET + MAXMEM), - kasan_mem_to_shadow((void *)__START_KERNEL_map)); + shadow_cpu_entry_begin); + + kasan_populate_shadow((unsigned long)shadow_cpu_entry_begin, + (unsigned long)shadow_cpu_entry_end, 0); + + kasan_populate_zero_shadow(shadow_cpu_entry_end, + kasan_mem_to_shadow((void *)__START_KERNEL_map)); kasan_populate_shadow((unsigned long)kasan_mem_to_shadow(_stext), (unsigned long)kasan_mem_to_shadow(_end), early_pfn_to_nid(__pa(_stext))); kasan_populate_zero_shadow(kasan_mem_to_shadow((void *)MODULES_END), - (void *)KASAN_SHADOW_END); + (void *)KASAN_SHADOW_END); load_cr3(init_top_pgt); __flush_tlb_all(); diff --git a/arch/x86/mm/pgtable_32.c b/arch/x86/mm/pgtable_32.c index 6b9bf023a700..c3c5274410a9 100644 --- a/arch/x86/mm/pgtable_32.c +++ b/arch/x86/mm/pgtable_32.c @@ -10,6 +10,7 @@ #include <linux/pagemap.h> #include <linux/spinlock.h> +#include <asm/cpu_entry_area.h> #include <asm/pgtable.h> #include <asm/pgalloc.h> #include <asm/fixmap.h> diff --git a/arch/x86/mm/tlb.c b/arch/x86/mm/tlb.c index 3118392cdf75..0a1be3adc97e 100644 --- a/arch/x86/mm/tlb.c +++ b/arch/x86/mm/tlb.c @@ -128,7 +128,7 @@ void switch_mm_irqs_off(struct mm_struct *prev, struct mm_struct *next, * isn't free. */ #ifdef CONFIG_DEBUG_VM - if (WARN_ON_ONCE(__read_cr3() != build_cr3(real_prev, prev_asid))) { + if (WARN_ON_ONCE(__read_cr3() != build_cr3(real_prev->pgd, prev_asid))) { /* * If we were to BUG here, we'd be very likely to kill * the system so hard that we don't see the call trace. @@ -195,7 +195,7 @@ void switch_mm_irqs_off(struct mm_struct *prev, struct mm_struct *next, if (need_flush) { this_cpu_write(cpu_tlbstate.ctxs[new_asid].ctx_id, next->context.ctx_id); this_cpu_write(cpu_tlbstate.ctxs[new_asid].tlb_gen, next_tlb_gen); - write_cr3(build_cr3(next, new_asid)); + write_cr3(build_cr3(next->pgd, new_asid)); /* * NB: This gets called via leave_mm() in the idle path @@ -208,7 +208,7 @@ void switch_mm_irqs_off(struct mm_struct *prev, struct mm_struct *next, trace_tlb_flush_rcuidle(TLB_FLUSH_ON_TASK_SWITCH, TLB_FLUSH_ALL); } else { /* The new ASID is already up to date. */ - write_cr3(build_cr3_noflush(next, new_asid)); + write_cr3(build_cr3_noflush(next->pgd, new_asid)); /* See above wrt _rcuidle. */ trace_tlb_flush_rcuidle(TLB_FLUSH_ON_TASK_SWITCH, 0); @@ -288,7 +288,7 @@ void initialize_tlbstate_and_flush(void) !(cr4_read_shadow() & X86_CR4_PCIDE)); /* Force ASID 0 and force a TLB flush. */ - write_cr3(build_cr3(mm, 0)); + write_cr3(build_cr3(mm->pgd, 0)); /* Reinitialize tlbstate. */ this_cpu_write(cpu_tlbstate.loaded_mm_asid, 0); @@ -551,7 +551,7 @@ static void do_kernel_range_flush(void *info) /* flush range by one by one 'invlpg' */ for (addr = f->start; addr < f->end; addr += PAGE_SIZE) - __flush_tlb_single(addr); + __flush_tlb_one(addr); } void flush_tlb_kernel_range(unsigned long start, unsigned long end) diff --git a/arch/x86/platform/uv/tlb_uv.c b/arch/x86/platform/uv/tlb_uv.c index f44c0bc95aa2..8538a6723171 100644 --- a/arch/x86/platform/uv/tlb_uv.c +++ b/arch/x86/platform/uv/tlb_uv.c @@ -299,7 +299,7 @@ static void bau_process_message(struct msg_desc *mdp, struct bau_control *bcp, local_flush_tlb(); stat->d_alltlb++; } else { - __flush_tlb_one(msg->address); + __flush_tlb_single(msg->address); stat->d_onetlb++; } stat->d_requestee++; diff --git a/arch/x86/power/cpu.c b/arch/x86/power/cpu.c index 36a28eddb435..a7d966964c6f 100644 --- a/arch/x86/power/cpu.c +++ b/arch/x86/power/cpu.c @@ -152,17 +152,19 @@ static void do_fpu_end(void) static void fix_processor_context(void) { int cpu = smp_processor_id(); - struct tss_struct *t = &per_cpu(cpu_tss, cpu); #ifdef CONFIG_X86_64 struct desc_struct *desc = get_cpu_gdt_rw(cpu); tss_desc tss; #endif - set_tss_desc(cpu, t); /* - * This just modifies memory; should not be - * necessary. But... This is necessary, because - * 386 hardware has concept of busy TSS or some - * similar stupidity. - */ + + /* + * We need to reload TR, which requires that we change the + * GDT entry to indicate "available" first. + * + * XXX: This could probably all be replaced by a call to + * force_reload_TR(). + */ + set_tss_desc(cpu, &get_cpu_entry_area(cpu)->tss.x86_tss); #ifdef CONFIG_X86_64 memcpy(&tss, &desc[GDT_ENTRY_TSS], sizeof(tss_desc)); diff --git a/arch/x86/xen/enlighten.c b/arch/x86/xen/enlighten.c index d669e9d89001..c9081c6671f0 100644 --- a/arch/x86/xen/enlighten.c +++ b/arch/x86/xen/enlighten.c @@ -1,8 +1,12 @@ +#ifdef CONFIG_XEN_BALLOON_MEMORY_HOTPLUG +#include <linux/bootmem.h> +#endif #include <linux/cpu.h> #include <linux/kexec.h> #include <xen/features.h> #include <xen/page.h> +#include <xen/interface/memory.h> #include <asm/xen/hypercall.h> #include <asm/xen/hypervisor.h> @@ -331,3 +335,80 @@ void xen_arch_unregister_cpu(int num) } EXPORT_SYMBOL(xen_arch_unregister_cpu); #endif + +#ifdef CONFIG_XEN_BALLOON_MEMORY_HOTPLUG +void __init arch_xen_balloon_init(struct resource *hostmem_resource) +{ + struct xen_memory_map memmap; + int rc; + unsigned int i, last_guest_ram; + phys_addr_t max_addr = PFN_PHYS(max_pfn); + struct e820_table *xen_e820_table; + const struct e820_entry *entry; + struct resource *res; + + if (!xen_initial_domain()) + return; + + xen_e820_table = kmalloc(sizeof(*xen_e820_table), GFP_KERNEL); + if (!xen_e820_table) + return; + + memmap.nr_entries = ARRAY_SIZE(xen_e820_table->entries); + set_xen_guest_handle(memmap.buffer, xen_e820_table->entries); + rc = HYPERVISOR_memory_op(XENMEM_machine_memory_map, &memmap); + if (rc) { + pr_warn("%s: Can't read host e820 (%d)\n", __func__, rc); + goto out; + } + + last_guest_ram = 0; + for (i = 0; i < memmap.nr_entries; i++) { + if (xen_e820_table->entries[i].addr >= max_addr) + break; + if (xen_e820_table->entries[i].type == E820_TYPE_RAM) + last_guest_ram = i; + } + + entry = &xen_e820_table->entries[last_guest_ram]; + if (max_addr >= entry->addr + entry->size) + goto out; /* No unallocated host RAM. */ + + hostmem_resource->start = max_addr; + hostmem_resource->end = entry->addr + entry->size; + + /* + * Mark non-RAM regions between the end of dom0 RAM and end of host RAM + * as unavailable. The rest of that region can be used for hotplug-based + * ballooning. + */ + for (; i < memmap.nr_entries; i++) { + entry = &xen_e820_table->entries[i]; + + if (entry->type == E820_TYPE_RAM) + continue; + + if (entry->addr >= hostmem_resource->end) + break; + + res = kzalloc(sizeof(*res), GFP_KERNEL); + if (!res) + goto out; + + res->name = "Unavailable host RAM"; + res->start = entry->addr; + res->end = (entry->addr + entry->size < hostmem_resource->end) ? + entry->addr + entry->size : hostmem_resource->end; + rc = insert_resource(hostmem_resource, res); + if (rc) { + pr_warn("%s: Can't insert [%llx - %llx) (%d)\n", + __func__, res->start, res->end, rc); + kfree(res); + goto out; + } + } + + out: + kfree(xen_e820_table); +} +#endif /* CONFIG_XEN_BALLOON_MEMORY_HOTPLUG */ diff --git a/arch/x86/xen/enlighten_pv.c b/arch/x86/xen/enlighten_pv.c index f2414c6c5e7c..c047f42552e1 100644 --- a/arch/x86/xen/enlighten_pv.c +++ b/arch/x86/xen/enlighten_pv.c @@ -88,6 +88,8 @@ #include "multicalls.h" #include "pmu.h" +#include "../kernel/cpu/cpu.h" /* get_cpu_cap() */ + void *xen_initial_gdt; static int xen_cpu_up_prepare_pv(unsigned int cpu); @@ -826,7 +828,7 @@ static void xen_load_sp0(unsigned long sp0) mcs = xen_mc_entry(0); MULTI_stack_switch(mcs.mc, __KERNEL_DS, sp0); xen_mc_issue(PARAVIRT_LAZY_CPU); - this_cpu_write(cpu_tss.x86_tss.sp0, sp0); + this_cpu_write(cpu_tss_rw.x86_tss.sp0, sp0); } void xen_set_iopl_mask(unsigned mask) @@ -1258,6 +1260,7 @@ asmlinkage __visible void __init xen_start_kernel(void) __userpte_alloc_gfp &= ~__GFP_HIGHMEM; /* Work out if we support NX */ + get_cpu_cap(&boot_cpu_data); x86_configure_nx(); /* Get mfn list */ diff --git a/arch/x86/xen/mmu_pv.c b/arch/x86/xen/mmu_pv.c index fc048ec686e7..4d62c071b166 100644 --- a/arch/x86/xen/mmu_pv.c +++ b/arch/x86/xen/mmu_pv.c @@ -1902,6 +1902,18 @@ void __init xen_setup_kernel_pagetable(pgd_t *pgd, unsigned long max_pfn) /* Graft it onto L4[511][510] */ copy_page(level2_kernel_pgt, l2); + /* + * Zap execute permission from the ident map. Due to the sharing of + * L1 entries we need to do this in the L2. + */ + if (__supported_pte_mask & _PAGE_NX) { + for (i = 0; i < PTRS_PER_PMD; ++i) { + if (pmd_none(level2_ident_pgt[i])) + continue; + level2_ident_pgt[i] = pmd_set_flags(level2_ident_pgt[i], _PAGE_NX); + } + } + /* Copy the initial P->M table mappings if necessary. */ i = pgd_index(xen_start_info->mfn_list); if (i && i < pgd_index(__START_KERNEL_map)) @@ -2261,7 +2273,6 @@ static void xen_set_fixmap(unsigned idx, phys_addr_t phys, pgprot_t prot) switch (idx) { case FIX_BTMAP_END ... FIX_BTMAP_BEGIN: - case FIX_RO_IDT: #ifdef CONFIG_X86_32 case FIX_WP_TEST: # ifdef CONFIG_HIGHMEM @@ -2272,7 +2283,6 @@ static void xen_set_fixmap(unsigned idx, phys_addr_t phys, pgprot_t prot) #endif case FIX_TEXT_POKE0: case FIX_TEXT_POKE1: - case FIX_GDT_REMAP_BEGIN ... FIX_GDT_REMAP_END: /* All local page mappings */ pte = pfn_pte(phys, prot); break; diff --git a/arch/x86/xen/setup.c b/arch/x86/xen/setup.c index c114ca767b3b..6e0d2086eacb 100644 --- a/arch/x86/xen/setup.c +++ b/arch/x86/xen/setup.c @@ -808,7 +808,6 @@ char * __init xen_memory_setup(void) addr = xen_e820_table.entries[0].addr; size = xen_e820_table.entries[0].size; while (i < xen_e820_table.nr_entries) { - bool discard = false; chunk_size = size; type = xen_e820_table.entries[i].type; @@ -824,11 +823,10 @@ char * __init xen_memory_setup(void) xen_add_extra_mem(pfn_s, n_pfns); xen_max_p2m_pfn = pfn_s + n_pfns; } else - discard = true; + type = E820_TYPE_UNUSABLE; } - if (!discard) - xen_align_and_add_e820_region(addr, chunk_size, type); + xen_align_and_add_e820_region(addr, chunk_size, type); addr += chunk_size; size -= chunk_size; diff --git a/block/bio.c b/block/bio.c index 8bfdea58159b..9ef6cf3addb3 100644 --- a/block/bio.c +++ b/block/bio.c @@ -599,6 +599,8 @@ void __bio_clone_fast(struct bio *bio, struct bio *bio_src) bio->bi_disk = bio_src->bi_disk; bio->bi_partno = bio_src->bi_partno; bio_set_flag(bio, BIO_CLONED); + if (bio_flagged(bio_src, BIO_THROTTLED)) + bio_set_flag(bio, BIO_THROTTLED); bio->bi_opf = bio_src->bi_opf; bio->bi_write_hint = bio_src->bi_write_hint; bio->bi_iter = bio_src->bi_iter; diff --git a/block/blk-map.c b/block/blk-map.c index b21f8e86f120..d3a94719f03f 100644 --- a/block/blk-map.c +++ b/block/blk-map.c @@ -12,22 +12,29 @@ #include "blk.h" /* - * Append a bio to a passthrough request. Only works can be merged into - * the request based on the driver constraints. + * Append a bio to a passthrough request. Only works if the bio can be merged + * into the request based on the driver constraints. */ -int blk_rq_append_bio(struct request *rq, struct bio *bio) +int blk_rq_append_bio(struct request *rq, struct bio **bio) { - blk_queue_bounce(rq->q, &bio); + struct bio *orig_bio = *bio; + + blk_queue_bounce(rq->q, bio); if (!rq->bio) { - blk_rq_bio_prep(rq->q, rq, bio); + blk_rq_bio_prep(rq->q, rq, *bio); } else { - if (!ll_back_merge_fn(rq->q, rq, bio)) + if (!ll_back_merge_fn(rq->q, rq, *bio)) { + if (orig_bio != *bio) { + bio_put(*bio); + *bio = orig_bio; + } return -EINVAL; + } - rq->biotail->bi_next = bio; - rq->biotail = bio; - rq->__data_len += bio->bi_iter.bi_size; + rq->biotail->bi_next = *bio; + rq->biotail = *bio; + rq->__data_len += (*bio)->bi_iter.bi_size; } return 0; @@ -73,14 +80,12 @@ static int __blk_rq_map_user_iov(struct request *rq, * We link the bounce buffer in and could have to traverse it * later so we have to get a ref to prevent it from being freed */ - ret = blk_rq_append_bio(rq, bio); - bio_get(bio); + ret = blk_rq_append_bio(rq, &bio); if (ret) { - bio_endio(bio); __blk_rq_unmap_user(orig_bio); - bio_put(bio); return ret; } + bio_get(bio); return 0; } @@ -213,7 +218,7 @@ int blk_rq_map_kern(struct request_queue *q, struct request *rq, void *kbuf, int reading = rq_data_dir(rq) == READ; unsigned long addr = (unsigned long) kbuf; int do_copy = 0; - struct bio *bio; + struct bio *bio, *orig_bio; int ret; if (len > (queue_max_hw_sectors(q) << 9)) @@ -236,10 +241,11 @@ int blk_rq_map_kern(struct request_queue *q, struct request *rq, void *kbuf, if (do_copy) rq->rq_flags |= RQF_COPY_USER; - ret = blk_rq_append_bio(rq, bio); + orig_bio = bio; + ret = blk_rq_append_bio(rq, &bio); if (unlikely(ret)) { /* request is too big */ - bio_put(bio); + bio_put(orig_bio); return ret; } diff --git a/block/blk-throttle.c b/block/blk-throttle.c index 825bc29767e6..d19f416d6101 100644 --- a/block/blk-throttle.c +++ b/block/blk-throttle.c @@ -2226,13 +2226,7 @@ again: out_unlock: spin_unlock_irq(q->queue_lock); out: - /* - * As multiple blk-throtls may stack in the same issue path, we - * don't want bios to leave with the flag set. Clear the flag if - * being issued. - */ - if (!throttled) - bio_clear_flag(bio, BIO_THROTTLED); + bio_set_flag(bio, BIO_THROTTLED); #ifdef CONFIG_BLK_DEV_THROTTLING_LOW if (throttled || !td->track_bio_latency) diff --git a/block/bounce.c b/block/bounce.c index fceb1a96480b..1d05c422c932 100644 --- a/block/bounce.c +++ b/block/bounce.c @@ -200,6 +200,7 @@ static void __blk_queue_bounce(struct request_queue *q, struct bio **bio_orig, unsigned i = 0; bool bounce = false; int sectors = 0; + bool passthrough = bio_is_passthrough(*bio_orig); bio_for_each_segment(from, *bio_orig, iter) { if (i++ < BIO_MAX_PAGES) @@ -210,13 +211,14 @@ static void __blk_queue_bounce(struct request_queue *q, struct bio **bio_orig, if (!bounce) return; - if (sectors < bio_sectors(*bio_orig)) { + if (!passthrough && sectors < bio_sectors(*bio_orig)) { bio = bio_split(*bio_orig, sectors, GFP_NOIO, bounce_bio_split); bio_chain(bio, *bio_orig); generic_make_request(*bio_orig); *bio_orig = bio; } - bio = bio_clone_bioset(*bio_orig, GFP_NOIO, bounce_bio_set); + bio = bio_clone_bioset(*bio_orig, GFP_NOIO, passthrough ? NULL : + bounce_bio_set); bio_for_each_segment_all(to, bio, i) { struct page *page = to->bv_page; diff --git a/block/kyber-iosched.c b/block/kyber-iosched.c index b4df317c2916..f95c60774ce8 100644 --- a/block/kyber-iosched.c +++ b/block/kyber-iosched.c @@ -100,9 +100,13 @@ struct kyber_hctx_data { unsigned int cur_domain; unsigned int batching; wait_queue_entry_t domain_wait[KYBER_NUM_DOMAINS]; + struct sbq_wait_state *domain_ws[KYBER_NUM_DOMAINS]; atomic_t wait_index[KYBER_NUM_DOMAINS]; }; +static int kyber_domain_wake(wait_queue_entry_t *wait, unsigned mode, int flags, + void *key); + static int rq_sched_domain(const struct request *rq) { unsigned int op = rq->cmd_flags; @@ -385,6 +389,9 @@ static int kyber_init_hctx(struct blk_mq_hw_ctx *hctx, unsigned int hctx_idx) for (i = 0; i < KYBER_NUM_DOMAINS; i++) { INIT_LIST_HEAD(&khd->rqs[i]); + init_waitqueue_func_entry(&khd->domain_wait[i], + kyber_domain_wake); + khd->domain_wait[i].private = hctx; INIT_LIST_HEAD(&khd->domain_wait[i].entry); atomic_set(&khd->wait_index[i], 0); } @@ -524,35 +531,39 @@ static int kyber_get_domain_token(struct kyber_queue_data *kqd, int nr; nr = __sbitmap_queue_get(domain_tokens); - if (nr >= 0) - return nr; /* * If we failed to get a domain token, make sure the hardware queue is * run when one becomes available. Note that this is serialized on * khd->lock, but we still need to be careful about the waker. */ - if (list_empty_careful(&wait->entry)) { - init_waitqueue_func_entry(wait, kyber_domain_wake); - wait->private = hctx; + if (nr < 0 && list_empty_careful(&wait->entry)) { ws = sbq_wait_ptr(domain_tokens, &khd->wait_index[sched_domain]); + khd->domain_ws[sched_domain] = ws; add_wait_queue(&ws->wait, wait); /* * Try again in case a token was freed before we got on the wait - * queue. The waker may have already removed the entry from the - * wait queue, but list_del_init() is okay with that. + * queue. */ nr = __sbitmap_queue_get(domain_tokens); - if (nr >= 0) { - unsigned long flags; + } - spin_lock_irqsave(&ws->wait.lock, flags); - list_del_init(&wait->entry); - spin_unlock_irqrestore(&ws->wait.lock, flags); - } + /* + * If we got a token while we were on the wait queue, remove ourselves + * from the wait queue to ensure that all wake ups make forward + * progress. It's possible that the waker already deleted the entry + * between the !list_empty_careful() check and us grabbing the lock, but + * list_del_init() is okay with that. + */ + if (nr >= 0 && !list_empty_careful(&wait->entry)) { + ws = khd->domain_ws[sched_domain]; + spin_lock_irq(&ws->wait.lock); + list_del_init(&wait->entry); + spin_unlock_irq(&ws->wait.lock); } + return nr; } diff --git a/crypto/af_alg.c b/crypto/af_alg.c index 415a54ced4d6..444a387df219 100644 --- a/crypto/af_alg.c +++ b/crypto/af_alg.c @@ -1138,12 +1138,6 @@ int af_alg_get_rsgl(struct sock *sk, struct msghdr *msg, int flags, if (!af_alg_readable(sk)) break; - if (!ctx->used) { - err = af_alg_wait_for_data(sk, flags); - if (err) - return err; - } - seglen = min_t(size_t, (maxsize - len), msg_data_left(msg)); diff --git a/crypto/algif_aead.c b/crypto/algif_aead.c index 48b34e9c6834..ddcc45f77edd 100644 --- a/crypto/algif_aead.c +++ b/crypto/algif_aead.c @@ -111,6 +111,12 @@ static int _aead_recvmsg(struct socket *sock, struct msghdr *msg, size_t usedpages = 0; /* [in] RX bufs to be used from user */ size_t processed = 0; /* [in] TX bufs to be consumed */ + if (!ctx->used) { + err = af_alg_wait_for_data(sk, flags); + if (err) + return err; + } + /* * Data length provided by caller via sendmsg/sendpage that has not * yet been processed. @@ -285,6 +291,10 @@ static int _aead_recvmsg(struct socket *sock, struct msghdr *msg, /* AIO operation */ sock_hold(sk); areq->iocb = msg->msg_iocb; + + /* Remember output size that will be generated. */ + areq->outlen = outlen; + aead_request_set_callback(&areq->cra_u.aead_req, CRYPTO_TFM_REQ_MAY_BACKLOG, af_alg_async_cb, areq); @@ -292,12 +302,8 @@ static int _aead_recvmsg(struct socket *sock, struct msghdr *msg, crypto_aead_decrypt(&areq->cra_u.aead_req); /* AIO operation in progress */ - if (err == -EINPROGRESS || err == -EBUSY) { - /* Remember output size that will be generated. */ - areq->outlen = outlen; - + if (err == -EINPROGRESS || err == -EBUSY) return -EIOCBQUEUED; - } sock_put(sk); } else { diff --git a/crypto/algif_skcipher.c b/crypto/algif_skcipher.c index 30cff827dd8f..baef9bfccdda 100644 --- a/crypto/algif_skcipher.c +++ b/crypto/algif_skcipher.c @@ -72,6 +72,12 @@ static int _skcipher_recvmsg(struct socket *sock, struct msghdr *msg, int err = 0; size_t len = 0; + if (!ctx->used) { + err = af_alg_wait_for_data(sk, flags); + if (err) + return err; + } + /* Allocate cipher request for current operation. */ areq = af_alg_alloc_areq(sk, sizeof(struct af_alg_async_req) + crypto_skcipher_reqsize(tfm)); @@ -119,6 +125,10 @@ static int _skcipher_recvmsg(struct socket *sock, struct msghdr *msg, /* AIO operation */ sock_hold(sk); areq->iocb = msg->msg_iocb; + + /* Remember output size that will be generated. */ + areq->outlen = len; + skcipher_request_set_callback(&areq->cra_u.skcipher_req, CRYPTO_TFM_REQ_MAY_SLEEP, af_alg_async_cb, areq); @@ -127,12 +137,8 @@ static int _skcipher_recvmsg(struct socket *sock, struct msghdr *msg, crypto_skcipher_decrypt(&areq->cra_u.skcipher_req); /* AIO operation in progress */ - if (err == -EINPROGRESS || err == -EBUSY) { - /* Remember output size that will be generated. */ - areq->outlen = len; - + if (err == -EINPROGRESS || err == -EBUSY) return -EIOCBQUEUED; - } sock_put(sk); } else { diff --git a/crypto/mcryptd.c b/crypto/mcryptd.c index 4e6472658852..eca04d3729b3 100644 --- a/crypto/mcryptd.c +++ b/crypto/mcryptd.c @@ -81,6 +81,7 @@ static int mcryptd_init_queue(struct mcryptd_queue *queue, pr_debug("cpu_queue #%d %p\n", cpu, queue->cpu_queue); crypto_init_queue(&cpu_queue->queue, max_cpu_qlen); INIT_WORK(&cpu_queue->work, mcryptd_queue_worker); + spin_lock_init(&cpu_queue->q_lock); } return 0; } @@ -104,15 +105,16 @@ static int mcryptd_enqueue_request(struct mcryptd_queue *queue, int cpu, err; struct mcryptd_cpu_queue *cpu_queue; - cpu = get_cpu(); - cpu_queue = this_cpu_ptr(queue->cpu_queue); - rctx->tag.cpu = cpu; + cpu_queue = raw_cpu_ptr(queue->cpu_queue); + spin_lock(&cpu_queue->q_lock); + cpu = smp_processor_id(); + rctx->tag.cpu = smp_processor_id(); err = crypto_enqueue_request(&cpu_queue->queue, request); pr_debug("enqueue request: cpu %d cpu_queue %p request %p\n", cpu, cpu_queue, request); + spin_unlock(&cpu_queue->q_lock); queue_work_on(cpu, kcrypto_wq, &cpu_queue->work); - put_cpu(); return err; } @@ -161,16 +163,11 @@ static void mcryptd_queue_worker(struct work_struct *work) cpu_queue = container_of(work, struct mcryptd_cpu_queue, work); i = 0; while (i < MCRYPTD_BATCH || single_task_running()) { - /* - * preempt_disable/enable is used to prevent - * being preempted by mcryptd_enqueue_request() - */ - local_bh_disable(); - preempt_disable(); + + spin_lock_bh(&cpu_queue->q_lock); backlog = crypto_get_backlog(&cpu_queue->queue); req = crypto_dequeue_request(&cpu_queue->queue); - preempt_enable(); - local_bh_enable(); + spin_unlock_bh(&cpu_queue->q_lock); if (!req) { mcryptd_opportunistic_flush(); @@ -185,7 +182,7 @@ static void mcryptd_queue_worker(struct work_struct *work) ++i; } if (cpu_queue->queue.qlen) - queue_work(kcrypto_wq, &cpu_queue->work); + queue_work_on(smp_processor_id(), kcrypto_wq, &cpu_queue->work); } void mcryptd_flusher(struct work_struct *__work) diff --git a/crypto/skcipher.c b/crypto/skcipher.c index 778e0ff42bfa..11af5fd6a443 100644 --- a/crypto/skcipher.c +++ b/crypto/skcipher.c @@ -449,6 +449,8 @@ static int skcipher_walk_skcipher(struct skcipher_walk *walk, walk->total = req->cryptlen; walk->nbytes = 0; + walk->iv = req->iv; + walk->oiv = req->iv; if (unlikely(!walk->total)) return 0; @@ -456,9 +458,6 @@ static int skcipher_walk_skcipher(struct skcipher_walk *walk, scatterwalk_start(&walk->in, req->src); scatterwalk_start(&walk->out, req->dst); - walk->iv = req->iv; - walk->oiv = req->iv; - walk->flags &= ~SKCIPHER_WALK_SLEEP; walk->flags |= req->base.flags & CRYPTO_TFM_REQ_MAY_SLEEP ? SKCIPHER_WALK_SLEEP : 0; @@ -510,6 +509,8 @@ static int skcipher_walk_aead_common(struct skcipher_walk *walk, int err; walk->nbytes = 0; + walk->iv = req->iv; + walk->oiv = req->iv; if (unlikely(!walk->total)) return 0; @@ -525,9 +526,6 @@ static int skcipher_walk_aead_common(struct skcipher_walk *walk, scatterwalk_done(&walk->in, 0, walk->total); scatterwalk_done(&walk->out, 0, walk->total); - walk->iv = req->iv; - walk->oiv = req->iv; - if (req->base.flags & CRYPTO_TFM_REQ_MAY_SLEEP) walk->flags |= SKCIPHER_WALK_SLEEP; else diff --git a/drivers/acpi/apei/erst.c b/drivers/acpi/apei/erst.c index 6742f6c68034..9bff853e85f3 100644 --- a/drivers/acpi/apei/erst.c +++ b/drivers/acpi/apei/erst.c @@ -1007,7 +1007,7 @@ skip: /* The record may be cleared by others, try read next record */ if (len == -ENOENT) goto skip; - else if (len < sizeof(*rcd)) { + else if (len < 0 || len < sizeof(*rcd)) { rc = -EIO; goto out; } diff --git a/drivers/acpi/cppc_acpi.c b/drivers/acpi/cppc_acpi.c index 30e84cc600ae..06ea4749ebd9 100644 --- a/drivers/acpi/cppc_acpi.c +++ b/drivers/acpi/cppc_acpi.c @@ -1171,7 +1171,7 @@ int cppc_set_perf(int cpu, struct cppc_perf_ctrls *perf_ctrls) struct cpc_desc *cpc_desc = per_cpu(cpc_desc_ptr, cpu); struct cpc_register_resource *desired_reg; int pcc_ss_id = per_cpu(cpu_pcc_subspace_idx, cpu); - struct cppc_pcc_data *pcc_ss_data = pcc_data[pcc_ss_id]; + struct cppc_pcc_data *pcc_ss_data; int ret = 0; if (!cpc_desc || pcc_ss_id < 0) { diff --git a/drivers/acpi/nfit/core.c b/drivers/acpi/nfit/core.c index ff2580e7611d..abeb4df4f22e 100644 --- a/drivers/acpi/nfit/core.c +++ b/drivers/acpi/nfit/core.c @@ -1670,6 +1670,11 @@ static int acpi_nfit_add_dimm(struct acpi_nfit_desc *acpi_desc, dev_name(&adev_dimm->dev)); return -ENXIO; } + /* + * Record nfit_mem for the notification path to track back to + * the nfit sysfs attributes for this dimm device object. + */ + dev_set_drvdata(&adev_dimm->dev, nfit_mem); /* * Until standardization materializes we need to consider 4 @@ -1752,9 +1757,11 @@ static void shutdown_dimm_notify(void *data) sysfs_put(nfit_mem->flags_attr); nfit_mem->flags_attr = NULL; } - if (adev_dimm) + if (adev_dimm) { acpi_remove_notify_handler(adev_dimm->handle, ACPI_DEVICE_NOTIFY, acpi_nvdimm_notify); + dev_set_drvdata(&adev_dimm->dev, NULL); + } } mutex_unlock(&acpi_desc->init_mutex); } diff --git a/drivers/block/null_blk.c b/drivers/block/null_blk.c index ccb9975a97fa..ad0477ae820f 100644 --- a/drivers/block/null_blk.c +++ b/drivers/block/null_blk.c @@ -35,13 +35,13 @@ static inline u64 mb_per_tick(int mbps) struct nullb_cmd { struct list_head list; struct llist_node ll_list; - call_single_data_t csd; + struct __call_single_data csd; struct request *rq; struct bio *bio; unsigned int tag; + blk_status_t error; struct nullb_queue *nq; struct hrtimer timer; - blk_status_t error; }; struct nullb_queue { diff --git a/drivers/clk/clk.c b/drivers/clk/clk.c index 647d056df88c..8a1860a36c77 100644 --- a/drivers/clk/clk.c +++ b/drivers/clk/clk.c @@ -1564,6 +1564,9 @@ static void clk_change_rate(struct clk_core *core) best_parent_rate = core->parent->rate; } + if (clk_pm_runtime_get(core)) + return; + if (core->flags & CLK_SET_RATE_UNGATE) { unsigned long flags; @@ -1634,6 +1637,8 @@ static void clk_change_rate(struct clk_core *core) /* handle the new child who might not be in core->children yet */ if (core->new_child) clk_change_rate(core->new_child); + + clk_pm_runtime_put(core); } static int clk_core_set_rate_nolock(struct clk_core *core, diff --git a/drivers/clk/sunxi/clk-sun9i-mmc.c b/drivers/clk/sunxi/clk-sun9i-mmc.c index a1a634253d6f..f00d8758ba24 100644 --- a/drivers/clk/sunxi/clk-sun9i-mmc.c +++ b/drivers/clk/sunxi/clk-sun9i-mmc.c @@ -16,6 +16,7 @@ #include <linux/clk.h> #include <linux/clk-provider.h> +#include <linux/delay.h> #include <linux/init.h> #include <linux/of.h> #include <linux/of_device.h> @@ -83,9 +84,20 @@ static int sun9i_mmc_reset_deassert(struct reset_controller_dev *rcdev, return 0; } +static int sun9i_mmc_reset_reset(struct reset_controller_dev *rcdev, + unsigned long id) +{ + sun9i_mmc_reset_assert(rcdev, id); + udelay(10); + sun9i_mmc_reset_deassert(rcdev, id); + + return 0; +} + static const struct reset_control_ops sun9i_mmc_reset_ops = { .assert = sun9i_mmc_reset_assert, .deassert = sun9i_mmc_reset_deassert, + .reset = sun9i_mmc_reset_reset, }; static int sun9i_a80_mmc_config_clk_probe(struct platform_device *pdev) diff --git a/drivers/cpufreq/cpufreq_governor.c b/drivers/cpufreq/cpufreq_governor.c index 58d4f4e1ad6a..ca38229b045a 100644 --- a/drivers/cpufreq/cpufreq_governor.c +++ b/drivers/cpufreq/cpufreq_governor.c @@ -22,6 +22,8 @@ #include "cpufreq_governor.h" +#define CPUFREQ_DBS_MIN_SAMPLING_INTERVAL (2 * TICK_NSEC / NSEC_PER_USEC) + static DEFINE_PER_CPU(struct cpu_dbs_info, cpu_dbs); static DEFINE_MUTEX(gov_dbs_data_mutex); @@ -47,11 +49,15 @@ ssize_t store_sampling_rate(struct gov_attr_set *attr_set, const char *buf, { struct dbs_data *dbs_data = to_dbs_data(attr_set); struct policy_dbs_info *policy_dbs; + unsigned int sampling_interval; int ret; - ret = sscanf(buf, "%u", &dbs_data->sampling_rate); - if (ret != 1) + + ret = sscanf(buf, "%u", &sampling_interval); + if (ret != 1 || sampling_interval < CPUFREQ_DBS_MIN_SAMPLING_INTERVAL) return -EINVAL; + dbs_data->sampling_rate = sampling_interval; + /* * We are operating under dbs_data->mutex and so the list and its * entries can't be freed concurrently. @@ -430,7 +436,14 @@ int cpufreq_dbs_governor_init(struct cpufreq_policy *policy) if (ret) goto free_policy_dbs_info; - dbs_data->sampling_rate = cpufreq_policy_transition_delay_us(policy); + /* + * The sampling interval should not be less than the transition latency + * of the CPU and it also cannot be too small for dbs_update() to work + * correctly. + */ + dbs_data->sampling_rate = max_t(unsigned int, + CPUFREQ_DBS_MIN_SAMPLING_INTERVAL, + cpufreq_policy_transition_delay_us(policy)); if (!have_governor_per_policy()) gov->gdbs_data = dbs_data; diff --git a/drivers/cpufreq/imx6q-cpufreq.c b/drivers/cpufreq/imx6q-cpufreq.c index 628fe899cb48..d9b2c2de49c4 100644 --- a/drivers/cpufreq/imx6q-cpufreq.c +++ b/drivers/cpufreq/imx6q-cpufreq.c @@ -226,17 +226,18 @@ static void imx6q_opp_check_speed_grading(struct device *dev) val >>= OCOTP_CFG3_SPEED_SHIFT; val &= 0x3; - if ((val != OCOTP_CFG3_SPEED_1P2GHZ) && - of_machine_is_compatible("fsl,imx6q")) - if (dev_pm_opp_disable(dev, 1200000000)) - dev_warn(dev, "failed to disable 1.2GHz OPP\n"); if (val < OCOTP_CFG3_SPEED_996MHZ) if (dev_pm_opp_disable(dev, 996000000)) dev_warn(dev, "failed to disable 996MHz OPP\n"); - if (of_machine_is_compatible("fsl,imx6q")) { + + if (of_machine_is_compatible("fsl,imx6q") || + of_machine_is_compatible("fsl,imx6qp")) { if (val != OCOTP_CFG3_SPEED_852MHZ) if (dev_pm_opp_disable(dev, 852000000)) dev_warn(dev, "failed to disable 852MHz OPP\n"); + if (val != OCOTP_CFG3_SPEED_1P2GHZ) + if (dev_pm_opp_disable(dev, 1200000000)) + dev_warn(dev, "failed to disable 1.2GHz OPP\n"); } iounmap(base); put_node: diff --git a/drivers/gpio/gpio-reg.c b/drivers/gpio/gpio-reg.c index 23e771dba4c1..e85903eddc68 100644 --- a/drivers/gpio/gpio-reg.c +++ b/drivers/gpio/gpio-reg.c @@ -103,8 +103,8 @@ static int gpio_reg_to_irq(struct gpio_chip *gc, unsigned offset) struct gpio_reg *r = to_gpio_reg(gc); int irq = r->irqs[offset]; - if (irq >= 0 && r->irq.domain) - irq = irq_find_mapping(r->irq.domain, irq); + if (irq >= 0 && r->irqdomain) + irq = irq_find_mapping(r->irqdomain, irq); return irq; } diff --git a/drivers/gpio/gpiolib-acpi.c b/drivers/gpio/gpiolib-acpi.c index eb4528c87c0b..d6f3d9ee1350 100644 --- a/drivers/gpio/gpiolib-acpi.c +++ b/drivers/gpio/gpiolib-acpi.c @@ -1074,7 +1074,7 @@ void acpi_gpiochip_add(struct gpio_chip *chip) } if (!chip->names) - devprop_gpiochip_set_names(chip); + devprop_gpiochip_set_names(chip, dev_fwnode(chip->parent)); acpi_gpiochip_request_regions(acpi_gpio); acpi_gpiochip_scan_gpios(acpi_gpio); diff --git a/drivers/gpio/gpiolib-devprop.c b/drivers/gpio/gpiolib-devprop.c index 27f383bda7d9..f748aa3e77f7 100644 --- a/drivers/gpio/gpiolib-devprop.c +++ b/drivers/gpio/gpiolib-devprop.c @@ -19,30 +19,27 @@ /** * devprop_gpiochip_set_names - Set GPIO line names using device properties * @chip: GPIO chip whose lines should be named, if possible + * @fwnode: Property Node containing the gpio-line-names property * * Looks for device property "gpio-line-names" and if it exists assigns * GPIO line names for the chip. The memory allocated for the assigned * names belong to the underlying firmware node and should not be released * by the caller. */ -void devprop_gpiochip_set_names(struct gpio_chip *chip) +void devprop_gpiochip_set_names(struct gpio_chip *chip, + const struct fwnode_handle *fwnode) { struct gpio_device *gdev = chip->gpiodev; const char **names; int ret, i; - if (!chip->parent) { - dev_warn(&gdev->dev, "GPIO chip parent is NULL\n"); - return; - } - - ret = device_property_read_string_array(chip->parent, "gpio-line-names", + ret = fwnode_property_read_string_array(fwnode, "gpio-line-names", NULL, 0); if (ret < 0) return; if (ret != gdev->ngpio) { - dev_warn(chip->parent, + dev_warn(&gdev->dev, "names %d do not match number of GPIOs %d\n", ret, gdev->ngpio); return; @@ -52,10 +49,10 @@ void devprop_gpiochip_set_names(struct gpio_chip *chip) if (!names) return; - ret = device_property_read_string_array(chip->parent, "gpio-line-names", + ret = fwnode_property_read_string_array(fwnode, "gpio-line-names", names, gdev->ngpio); if (ret < 0) { - dev_warn(chip->parent, "failed to read GPIO line names\n"); + dev_warn(&gdev->dev, "failed to read GPIO line names\n"); kfree(names); return; } diff --git a/drivers/gpio/gpiolib-of.c b/drivers/gpio/gpiolib-of.c index e0d59e61b52f..72a0695d2ac3 100644 --- a/drivers/gpio/gpiolib-of.c +++ b/drivers/gpio/gpiolib-of.c @@ -493,7 +493,8 @@ int of_gpiochip_add(struct gpio_chip *chip) /* If the chip defines names itself, these take precedence */ if (!chip->names) - devprop_gpiochip_set_names(chip); + devprop_gpiochip_set_names(chip, + of_fwnode_handle(chip->of_node)); of_node_get(chip->of_node); diff --git a/drivers/gpio/gpiolib.h b/drivers/gpio/gpiolib.h index af48322839c3..6c44d1652139 100644 --- a/drivers/gpio/gpiolib.h +++ b/drivers/gpio/gpiolib.h @@ -228,7 +228,8 @@ static inline int gpio_chip_hwgpio(const struct gpio_desc *desc) return desc - &desc->gdev->descs[0]; } -void devprop_gpiochip_set_names(struct gpio_chip *chip); +void devprop_gpiochip_set_names(struct gpio_chip *chip, + const struct fwnode_handle *fwnode); /* With descriptor prefix */ diff --git a/drivers/gpu/drm/amd/amdgpu/gfx_v9_0.c b/drivers/gpu/drm/amd/amdgpu/gfx_v9_0.c index da43813d67a4..5aeb5f8816f3 100644 --- a/drivers/gpu/drm/amd/amdgpu/gfx_v9_0.c +++ b/drivers/gpu/drm/amd/amdgpu/gfx_v9_0.c @@ -2467,7 +2467,7 @@ static int gfx_v9_0_kiq_kcq_enable(struct amdgpu_device *adev) PACKET3_MAP_QUEUES_PIPE(ring->pipe) | PACKET3_MAP_QUEUES_ME((ring->me == 1 ? 0 : 1)) | PACKET3_MAP_QUEUES_QUEUE_TYPE(0) | /*queue_type: normal compute queue */ - PACKET3_MAP_QUEUES_ALLOC_FORMAT(1) | /* alloc format: all_on_one_pipe */ + PACKET3_MAP_QUEUES_ALLOC_FORMAT(0) | /* alloc format: all_on_one_pipe */ PACKET3_MAP_QUEUES_ENGINE_SEL(0) | /* engine_sel: compute */ PACKET3_MAP_QUEUES_NUM_QUEUES(1)); /* num_queues: must be 1 */ amdgpu_ring_write(kiq_ring, PACKET3_MAP_QUEUES_DOORBELL_OFFSET(ring->doorbell_index)); diff --git a/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm.c b/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm.c index f71fe6d2ddda..bb5fa895fb64 100644 --- a/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm.c +++ b/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm.c @@ -2336,7 +2336,7 @@ create_stream_for_sink(struct amdgpu_dm_connector *aconnector, const struct dm_connector_state *dm_state) { struct drm_display_mode *preferred_mode = NULL; - const struct drm_connector *drm_connector; + struct drm_connector *drm_connector; struct dc_stream_state *stream = NULL; struct drm_display_mode mode = *drm_mode; bool native_mode_found = false; @@ -2355,11 +2355,13 @@ create_stream_for_sink(struct amdgpu_dm_connector *aconnector, if (!aconnector->dc_sink) { /* - * Exclude MST from creating fake_sink - * TODO: need to enable MST into fake_sink feature + * Create dc_sink when necessary to MST + * Don't apply fake_sink to MST */ - if (aconnector->mst_port) - goto stream_create_fail; + if (aconnector->mst_port) { + dm_dp_mst_dc_sink_create(drm_connector); + goto mst_dc_sink_create_done; + } if (create_fake_sink(aconnector)) goto stream_create_fail; @@ -2410,6 +2412,7 @@ create_stream_for_sink(struct amdgpu_dm_connector *aconnector, stream_create_fail: dm_state_null: drm_connector_null: +mst_dc_sink_create_done: return stream; } diff --git a/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm.h b/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm.h index 117521c6a6ed..0230250a1164 100644 --- a/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm.h +++ b/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm.h @@ -189,6 +189,8 @@ struct amdgpu_dm_connector { struct mutex hpd_lock; bool fake_enable; + + bool mst_connected; }; #define to_amdgpu_dm_connector(x) container_of(x, struct amdgpu_dm_connector, base) diff --git a/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm_mst_types.c b/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm_mst_types.c index f8efb98b1fa7..638c2c2b5cd7 100644 --- a/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm_mst_types.c +++ b/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm_mst_types.c @@ -185,6 +185,42 @@ static int dm_connector_update_modes(struct drm_connector *connector, return ret; } +void dm_dp_mst_dc_sink_create(struct drm_connector *connector) +{ + struct amdgpu_dm_connector *aconnector = to_amdgpu_dm_connector(connector); + struct edid *edid; + struct dc_sink *dc_sink; + struct dc_sink_init_data init_params = { + .link = aconnector->dc_link, + .sink_signal = SIGNAL_TYPE_DISPLAY_PORT_MST }; + + edid = drm_dp_mst_get_edid(connector, &aconnector->mst_port->mst_mgr, aconnector->port); + + if (!edid) { + drm_mode_connector_update_edid_property( + &aconnector->base, + NULL); + return; + } + + aconnector->edid = edid; + + dc_sink = dc_link_add_remote_sink( + aconnector->dc_link, + (uint8_t *)aconnector->edid, + (aconnector->edid->extensions + 1) * EDID_LENGTH, + &init_params); + + dc_sink->priv = aconnector; + aconnector->dc_sink = dc_sink; + + amdgpu_dm_add_sink_to_freesync_module( + connector, aconnector->edid); + + drm_mode_connector_update_edid_property( + &aconnector->base, aconnector->edid); +} + static int dm_dp_mst_get_modes(struct drm_connector *connector) { struct amdgpu_dm_connector *aconnector = to_amdgpu_dm_connector(connector); @@ -311,6 +347,7 @@ dm_dp_add_mst_connector(struct drm_dp_mst_topology_mgr *mgr, drm_mode_connector_set_path_property(connector, pathprop); drm_connector_list_iter_end(&conn_iter); + aconnector->mst_connected = true; return &aconnector->base; } } @@ -363,6 +400,8 @@ dm_dp_add_mst_connector(struct drm_dp_mst_topology_mgr *mgr, */ amdgpu_dm_connector_funcs_reset(connector); + aconnector->mst_connected = true; + DRM_INFO("DM_MST: added connector: %p [id: %d] [master: %p]\n", aconnector, connector->base.id, aconnector->mst_port); @@ -394,6 +433,8 @@ static void dm_dp_destroy_mst_connector(struct drm_dp_mst_topology_mgr *mgr, drm_mode_connector_update_edid_property( &aconnector->base, NULL); + + aconnector->mst_connected = false; } static void dm_dp_mst_hotplug(struct drm_dp_mst_topology_mgr *mgr) @@ -404,10 +445,18 @@ static void dm_dp_mst_hotplug(struct drm_dp_mst_topology_mgr *mgr) drm_kms_helper_hotplug_event(dev); } +static void dm_dp_mst_link_status_reset(struct drm_connector *connector) +{ + mutex_lock(&connector->dev->mode_config.mutex); + drm_mode_connector_set_link_status_property(connector, DRM_MODE_LINK_STATUS_BAD); + mutex_unlock(&connector->dev->mode_config.mutex); +} + static void dm_dp_mst_register_connector(struct drm_connector *connector) { struct drm_device *dev = connector->dev; struct amdgpu_device *adev = dev->dev_private; + struct amdgpu_dm_connector *aconnector = to_amdgpu_dm_connector(connector); if (adev->mode_info.rfbdev) drm_fb_helper_add_one_connector(&adev->mode_info.rfbdev->helper, connector); @@ -416,6 +465,8 @@ static void dm_dp_mst_register_connector(struct drm_connector *connector) drm_connector_register(connector); + if (aconnector->mst_connected) + dm_dp_mst_link_status_reset(connector); } static const struct drm_dp_mst_topology_cbs dm_mst_cbs = { diff --git a/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm_mst_types.h b/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm_mst_types.h index 2da851b40042..8cf51da26657 100644 --- a/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm_mst_types.h +++ b/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm_mst_types.h @@ -31,5 +31,6 @@ struct amdgpu_dm_connector; void amdgpu_dm_initialize_dp_connector(struct amdgpu_display_manager *dm, struct amdgpu_dm_connector *aconnector); +void dm_dp_mst_dc_sink_create(struct drm_connector *connector); #endif diff --git a/drivers/gpu/drm/amd/display/dc/calcs/dcn_calcs.c b/drivers/gpu/drm/amd/display/dc/calcs/dcn_calcs.c index 3dce35e66b09..b142629a1058 100644 --- a/drivers/gpu/drm/amd/display/dc/calcs/dcn_calcs.c +++ b/drivers/gpu/drm/amd/display/dc/calcs/dcn_calcs.c @@ -900,6 +900,15 @@ bool dcn_validate_bandwidth( v->override_vta_ps[input_idx] = pipe->plane_res.scl_data.taps.v_taps; v->override_hta_pschroma[input_idx] = pipe->plane_res.scl_data.taps.h_taps_c; v->override_vta_pschroma[input_idx] = pipe->plane_res.scl_data.taps.v_taps_c; + /* + * Spreadsheet doesn't handle taps_c is one properly, + * need to force Chroma to always be scaled to pass + * bandwidth validation. + */ + if (v->override_hta_pschroma[input_idx] == 1) + v->override_hta_pschroma[input_idx] = 2; + if (v->override_vta_pschroma[input_idx] == 1) + v->override_vta_pschroma[input_idx] = 2; v->source_scan[input_idx] = (pipe->plane_state->rotation % 2) ? dcn_bw_vert : dcn_bw_hor; } if (v->is_line_buffer_bpp_fixed == dcn_bw_yes) diff --git a/drivers/gpu/drm/amd/display/dc/core/dc_link.c b/drivers/gpu/drm/amd/display/dc/core/dc_link.c index e27ed4a45265..42a111b9505d 100644 --- a/drivers/gpu/drm/amd/display/dc/core/dc_link.c +++ b/drivers/gpu/drm/amd/display/dc/core/dc_link.c @@ -1801,7 +1801,7 @@ static void disable_link(struct dc_link *link, enum signal_type signal) link->link_enc->funcs->disable_output(link->link_enc, signal, link); } -bool dp_active_dongle_validate_timing( +static bool dp_active_dongle_validate_timing( const struct dc_crtc_timing *timing, const struct dc_dongle_caps *dongle_caps) { @@ -1833,6 +1833,8 @@ bool dp_active_dongle_validate_timing( /* Check Color Depth and Pixel Clock */ if (timing->pixel_encoding == PIXEL_ENCODING_YCBCR420) required_pix_clk /= 2; + else if (timing->pixel_encoding == PIXEL_ENCODING_YCBCR422) + required_pix_clk = required_pix_clk * 2 / 3; switch (timing->display_color_depth) { case COLOR_DEPTH_666: diff --git a/drivers/gpu/drm/amd/display/dc/dce110/dce110_hw_sequencer.c b/drivers/gpu/drm/amd/display/dc/dce110/dce110_hw_sequencer.c index 07ff8d2faf3f..d844fadcd56f 100644 --- a/drivers/gpu/drm/amd/display/dc/dce110/dce110_hw_sequencer.c +++ b/drivers/gpu/drm/amd/display/dc/dce110/dce110_hw_sequencer.c @@ -2866,16 +2866,19 @@ static void dce110_apply_ctx_for_surface( int num_planes, struct dc_state *context) { - int i, be_idx; + int i; if (num_planes == 0) return; - be_idx = -1; for (i = 0; i < dc->res_pool->pipe_count; i++) { - if (stream == context->res_ctx.pipe_ctx[i].stream) { - be_idx = context->res_ctx.pipe_ctx[i].stream_res.tg->inst; - break; + struct pipe_ctx *pipe_ctx = &context->res_ctx.pipe_ctx[i]; + struct pipe_ctx *old_pipe_ctx = &dc->current_state->res_ctx.pipe_ctx[i]; + + if (stream == pipe_ctx->stream) { + if (!pipe_ctx->top_pipe && + (pipe_ctx->plane_state || old_pipe_ctx->plane_state)) + dc->hwss.pipe_control_lock(dc, pipe_ctx, true); } } @@ -2895,9 +2898,22 @@ static void dce110_apply_ctx_for_surface( context->stream_count); dce110_program_front_end_for_pipe(dc, pipe_ctx); + + dc->hwss.update_plane_addr(dc, pipe_ctx); + program_surface_visibility(dc, pipe_ctx); } + + for (i = 0; i < dc->res_pool->pipe_count; i++) { + struct pipe_ctx *pipe_ctx = &context->res_ctx.pipe_ctx[i]; + struct pipe_ctx *old_pipe_ctx = &dc->current_state->res_ctx.pipe_ctx[i]; + + if ((stream == pipe_ctx->stream) && + (!pipe_ctx->top_pipe) && + (pipe_ctx->plane_state || old_pipe_ctx->plane_state)) + dc->hwss.pipe_control_lock(dc, pipe_ctx, false); + } } static void dce110_power_down_fe(struct dc *dc, int fe_idx) diff --git a/drivers/gpu/drm/amd/display/dc/dcn10/dcn10_dpp.c b/drivers/gpu/drm/amd/display/dc/dcn10/dcn10_dpp.c index 74e7c82bdc76..a9d55d0dd69e 100644 --- a/drivers/gpu/drm/amd/display/dc/dcn10/dcn10_dpp.c +++ b/drivers/gpu/drm/amd/display/dc/dcn10/dcn10_dpp.c @@ -159,11 +159,10 @@ bool dpp_get_optimal_number_of_taps( scl_data->taps.h_taps = 1; if (IDENTITY_RATIO(scl_data->ratios.vert)) scl_data->taps.v_taps = 1; - /* - * Spreadsheet doesn't handle taps_c is one properly, - * need to force Chroma to always be scaled to pass - * bandwidth validation. - */ + if (IDENTITY_RATIO(scl_data->ratios.horz_c)) + scl_data->taps.h_taps_c = 1; + if (IDENTITY_RATIO(scl_data->ratios.vert_c)) + scl_data->taps.v_taps_c = 1; } return true; diff --git a/drivers/gpu/drm/drm_lease.c b/drivers/gpu/drm/drm_lease.c index 59849f02e2ad..1402c0e71b03 100644 --- a/drivers/gpu/drm/drm_lease.c +++ b/drivers/gpu/drm/drm_lease.c @@ -220,17 +220,6 @@ static struct drm_master *drm_lease_create(struct drm_master *lessor, struct idr mutex_lock(&dev->mode_config.idr_mutex); - /* Insert the new lessee into the tree */ - id = idr_alloc(&(drm_lease_owner(lessor)->lessee_idr), lessee, 1, 0, GFP_KERNEL); - if (id < 0) { - error = id; - goto out_lessee; - } - - lessee->lessee_id = id; - lessee->lessor = drm_master_get(lessor); - list_add_tail(&lessee->lessee_list, &lessor->lessees); - idr_for_each_entry(leases, entry, object) { error = 0; if (!idr_find(&dev->mode_config.crtc_idr, object)) @@ -246,6 +235,17 @@ static struct drm_master *drm_lease_create(struct drm_master *lessor, struct idr } } + /* Insert the new lessee into the tree */ + id = idr_alloc(&(drm_lease_owner(lessor)->lessee_idr), lessee, 1, 0, GFP_KERNEL); + if (id < 0) { + error = id; + goto out_lessee; + } + + lessee->lessee_id = id; + lessee->lessor = drm_master_get(lessor); + list_add_tail(&lessee->lessee_list, &lessor->lessees); + /* Move the leases over */ lessee->leases = *leases; DRM_DEBUG_LEASE("new lessee %d %p, lessor %d %p\n", lessee->lessee_id, lessee, lessor->lessee_id, lessor); diff --git a/drivers/gpu/drm/drm_plane.c b/drivers/gpu/drm/drm_plane.c index 37a93cdffb4a..2c90519576a3 100644 --- a/drivers/gpu/drm/drm_plane.c +++ b/drivers/gpu/drm/drm_plane.c @@ -558,11 +558,10 @@ int drm_plane_check_pixel_format(const struct drm_plane *plane, u32 format) } /* - * setplane_internal - setplane handler for internal callers + * __setplane_internal - setplane handler for internal callers * - * Note that we assume an extra reference has already been taken on fb. If the - * update fails, this reference will be dropped before return; if it succeeds, - * the previous framebuffer (if any) will be unreferenced instead. + * This function will take a reference on the new fb for the plane + * on success. * * src_{x,y,w,h} are provided in 16.16 fixed point format */ @@ -630,14 +629,12 @@ static int __setplane_internal(struct drm_plane *plane, if (!ret) { plane->crtc = crtc; plane->fb = fb; - fb = NULL; + drm_framebuffer_get(plane->fb); } else { plane->old_fb = NULL; } out: - if (fb) - drm_framebuffer_put(fb); if (plane->old_fb) drm_framebuffer_put(plane->old_fb); plane->old_fb = NULL; @@ -685,6 +682,7 @@ int drm_mode_setplane(struct drm_device *dev, void *data, struct drm_plane *plane; struct drm_crtc *crtc = NULL; struct drm_framebuffer *fb = NULL; + int ret; if (!drm_core_check_feature(dev, DRIVER_MODESET)) return -EINVAL; @@ -717,15 +715,16 @@ int drm_mode_setplane(struct drm_device *dev, void *data, } } - /* - * setplane_internal will take care of deref'ing either the old or new - * framebuffer depending on success. - */ - return setplane_internal(plane, crtc, fb, - plane_req->crtc_x, plane_req->crtc_y, - plane_req->crtc_w, plane_req->crtc_h, - plane_req->src_x, plane_req->src_y, - plane_req->src_w, plane_req->src_h); + ret = setplane_internal(plane, crtc, fb, + plane_req->crtc_x, plane_req->crtc_y, + plane_req->crtc_w, plane_req->crtc_h, + plane_req->src_x, plane_req->src_y, + plane_req->src_w, plane_req->src_h); + + if (fb) + drm_framebuffer_put(fb); + + return ret; } static int drm_mode_cursor_universal(struct drm_crtc *crtc, @@ -788,13 +787,12 @@ static int drm_mode_cursor_universal(struct drm_crtc *crtc, src_h = fb->height << 16; } - /* - * setplane_internal will take care of deref'ing either the old or new - * framebuffer depending on success. - */ ret = __setplane_internal(crtc->cursor, crtc, fb, - crtc_x, crtc_y, crtc_w, crtc_h, - 0, 0, src_w, src_h, ctx); + crtc_x, crtc_y, crtc_w, crtc_h, + 0, 0, src_w, src_h, ctx); + + if (fb) + drm_framebuffer_put(fb); /* Update successful; save new cursor position, if necessary */ if (ret == 0 && req->flags & DRM_MODE_CURSOR_MOVE) { diff --git a/drivers/gpu/drm/drm_syncobj.c b/drivers/gpu/drm/drm_syncobj.c index f776fc1cc543..cb4d09c70fd4 100644 --- a/drivers/gpu/drm/drm_syncobj.c +++ b/drivers/gpu/drm/drm_syncobj.c @@ -369,40 +369,26 @@ static const struct file_operations drm_syncobj_file_fops = { .release = drm_syncobj_file_release, }; -static int drm_syncobj_alloc_file(struct drm_syncobj *syncobj) -{ - struct file *file = anon_inode_getfile("syncobj_file", - &drm_syncobj_file_fops, - syncobj, 0); - if (IS_ERR(file)) - return PTR_ERR(file); - - drm_syncobj_get(syncobj); - if (cmpxchg(&syncobj->file, NULL, file)) { - /* lost the race */ - fput(file); - } - - return 0; -} - int drm_syncobj_get_fd(struct drm_syncobj *syncobj, int *p_fd) { - int ret; + struct file *file; int fd; fd = get_unused_fd_flags(O_CLOEXEC); if (fd < 0) return fd; - if (!syncobj->file) { - ret = drm_syncobj_alloc_file(syncobj); - if (ret) { - put_unused_fd(fd); - return ret; - } + file = anon_inode_getfile("syncobj_file", + &drm_syncobj_file_fops, + syncobj, 0); + if (IS_ERR(file)) { + put_unused_fd(fd); + return PTR_ERR(file); } - fd_install(fd, syncobj->file); + + drm_syncobj_get(syncobj); + fd_install(fd, file); + *p_fd = fd; return 0; } @@ -422,31 +408,24 @@ static int drm_syncobj_handle_to_fd(struct drm_file *file_private, return ret; } -static struct drm_syncobj *drm_syncobj_fdget(int fd) -{ - struct file *file = fget(fd); - - if (!file) - return NULL; - if (file->f_op != &drm_syncobj_file_fops) - goto err; - - return file->private_data; -err: - fput(file); - return NULL; -}; - static int drm_syncobj_fd_to_handle(struct drm_file *file_private, int fd, u32 *handle) { - struct drm_syncobj *syncobj = drm_syncobj_fdget(fd); + struct drm_syncobj *syncobj; + struct file *file; int ret; - if (!syncobj) + file = fget(fd); + if (!file) return -EINVAL; + if (file->f_op != &drm_syncobj_file_fops) { + fput(file); + return -EINVAL; + } + /* take a reference to put in the idr */ + syncobj = file->private_data; drm_syncobj_get(syncobj); idr_preload(GFP_KERNEL); @@ -455,12 +434,14 @@ static int drm_syncobj_fd_to_handle(struct drm_file *file_private, spin_unlock(&file_private->syncobj_table_lock); idr_preload_end(); - if (ret < 0) { - fput(syncobj->file); - return ret; - } - *handle = ret; - return 0; + if (ret > 0) { + *handle = ret; + ret = 0; + } else + drm_syncobj_put(syncobj); + + fput(file); + return ret; } static int drm_syncobj_import_sync_file_fence(struct drm_file *file_private, diff --git a/drivers/gpu/drm/i915/i915_gem.c b/drivers/gpu/drm/i915/i915_gem.c index ad4050f7ab3b..18de6569d04a 100644 --- a/drivers/gpu/drm/i915/i915_gem.c +++ b/drivers/gpu/drm/i915/i915_gem.c @@ -330,17 +330,10 @@ int i915_gem_object_unbind(struct drm_i915_gem_object *obj) * must wait for all rendering to complete to the object (as unbinding * must anyway), and retire the requests. */ - ret = i915_gem_object_wait(obj, - I915_WAIT_INTERRUPTIBLE | - I915_WAIT_LOCKED | - I915_WAIT_ALL, - MAX_SCHEDULE_TIMEOUT, - NULL); + ret = i915_gem_object_set_to_cpu_domain(obj, false); if (ret) return ret; - i915_gem_retire_requests(to_i915(obj->base.dev)); - while ((vma = list_first_entry_or_null(&obj->vma_list, struct i915_vma, obj_link))) { diff --git a/drivers/gpu/drm/i915/i915_sw_fence.c b/drivers/gpu/drm/i915/i915_sw_fence.c index e8ca67a129d2..ac236b88c99c 100644 --- a/drivers/gpu/drm/i915/i915_sw_fence.c +++ b/drivers/gpu/drm/i915/i915_sw_fence.c @@ -367,6 +367,7 @@ struct i915_sw_dma_fence_cb { struct dma_fence *dma; struct timer_list timer; struct irq_work work; + struct rcu_head rcu; }; static void timer_i915_sw_fence_wake(struct timer_list *t) @@ -406,7 +407,7 @@ static void irq_i915_sw_fence_work(struct irq_work *wrk) del_timer_sync(&cb->timer); dma_fence_put(cb->dma); - kfree(cb); + kfree_rcu(cb, rcu); } int i915_sw_fence_await_dma_fence(struct i915_sw_fence *fence, diff --git a/drivers/gpu/drm/i915/intel_breadcrumbs.c b/drivers/gpu/drm/i915/intel_breadcrumbs.c index 5f8b9f1f40f1..bcbc7abe6693 100644 --- a/drivers/gpu/drm/i915/intel_breadcrumbs.c +++ b/drivers/gpu/drm/i915/intel_breadcrumbs.c @@ -186,7 +186,7 @@ void intel_engine_disarm_breadcrumbs(struct intel_engine_cs *engine) struct intel_wait *wait, *n, *first; if (!b->irq_armed) - return; + goto wakeup_signaler; /* We only disarm the irq when we are idle (all requests completed), * so if the bottom-half remains asleep, it missed the request @@ -208,6 +208,14 @@ void intel_engine_disarm_breadcrumbs(struct intel_engine_cs *engine) b->waiters = RB_ROOT; spin_unlock_irq(&b->rb_lock); + + /* + * The signaling thread may be asleep holding a reference to a request, + * that had its signaling cancelled prior to being preempted. We need + * to kick the signaler, just in case, to release any such reference. + */ +wakeup_signaler: + wake_up_process(b->signaler); } static bool use_fake_irq(const struct intel_breadcrumbs *b) @@ -651,23 +659,15 @@ static int intel_breadcrumbs_signaler(void *arg) } if (unlikely(do_schedule)) { - DEFINE_WAIT(exec); - if (kthread_should_park()) kthread_parkme(); - if (kthread_should_stop()) { - GEM_BUG_ON(request); + if (unlikely(kthread_should_stop())) { + i915_gem_request_put(request); break; } - if (request) - add_wait_queue(&request->execute, &exec); - schedule(); - - if (request) - remove_wait_queue(&request->execute, &exec); } i915_gem_request_put(request); } while (1); diff --git a/drivers/gpu/drm/i915/intel_ddi.c b/drivers/gpu/drm/i915/intel_ddi.c index e0843bb99169..58a3755544b2 100644 --- a/drivers/gpu/drm/i915/intel_ddi.c +++ b/drivers/gpu/drm/i915/intel_ddi.c @@ -2128,6 +2128,8 @@ static void intel_ddi_clk_select(struct intel_encoder *encoder, if (WARN_ON(!pll)) return; + mutex_lock(&dev_priv->dpll_lock); + if (IS_CANNONLAKE(dev_priv)) { /* Configure DPCLKA_CFGCR0 to map the DPLL to the DDI. */ val = I915_READ(DPCLKA_CFGCR0); @@ -2157,6 +2159,8 @@ static void intel_ddi_clk_select(struct intel_encoder *encoder, } else if (INTEL_INFO(dev_priv)->gen < 9) { I915_WRITE(PORT_CLK_SEL(port), hsw_pll_to_ddi_pll_sel(pll)); } + + mutex_unlock(&dev_priv->dpll_lock); } static void intel_ddi_clk_disable(struct intel_encoder *encoder) diff --git a/drivers/gpu/drm/i915/intel_display.c b/drivers/gpu/drm/i915/intel_display.c index e8ccf89cb17b..ff9397030092 100644 --- a/drivers/gpu/drm/i915/intel_display.c +++ b/drivers/gpu/drm/i915/intel_display.c @@ -9944,11 +9944,10 @@ found: } ret = intel_modeset_setup_plane_state(state, crtc, mode, fb, 0, 0); + drm_framebuffer_put(fb); if (ret) goto fail; - drm_framebuffer_put(fb); - ret = drm_atomic_set_mode_for_crtc(&crtc_state->base, mode); if (ret) goto fail; diff --git a/drivers/gpu/drm/i915/intel_lpe_audio.c b/drivers/gpu/drm/i915/intel_lpe_audio.c index 3bf65288ffff..5809b29044fc 100644 --- a/drivers/gpu/drm/i915/intel_lpe_audio.c +++ b/drivers/gpu/drm/i915/intel_lpe_audio.c @@ -193,7 +193,7 @@ static bool lpe_audio_detect(struct drm_i915_private *dev_priv) }; if (!pci_dev_present(atom_hdaudio_ids)) { - DRM_INFO("%s\n", "HDaudio controller not detected, using LPE audio instead\n"); + DRM_INFO("HDaudio controller not detected, using LPE audio instead\n"); lpe_present = true; } } diff --git a/drivers/gpu/drm/nouveau/nouveau_bo.c b/drivers/gpu/drm/nouveau/nouveau_bo.c index 2615912430cc..435ff8662cfa 100644 --- a/drivers/gpu/drm/nouveau/nouveau_bo.c +++ b/drivers/gpu/drm/nouveau/nouveau_bo.c @@ -224,7 +224,7 @@ nouveau_bo_new(struct nouveau_cli *cli, u64 size, int align, /* Determine if we can get a cache-coherent map, forcing * uncached mapping if we can't. */ - if (mmu->type[drm->ttm.type_host].type & NVIF_MEM_UNCACHED) + if (!nouveau_drm_use_coherent_gpu_mapping(drm)) nvbo->force_coherent = true; } @@ -262,7 +262,8 @@ nouveau_bo_new(struct nouveau_cli *cli, u64 size, int align, if (cli->device.info.family > NV_DEVICE_INFO_V0_CURIE && (flags & TTM_PL_FLAG_VRAM) && !vmm->page[i].vram) continue; - if ((flags & TTM_PL_FLAG_TT ) && !vmm->page[i].host) + if ((flags & TTM_PL_FLAG_TT) && + (!vmm->page[i].host || vmm->page[i].shift > PAGE_SHIFT)) continue; /* Select this page size if it's the first that supports diff --git a/drivers/gpu/drm/nouveau/nouveau_drv.h b/drivers/gpu/drm/nouveau/nouveau_drv.h index 3331e82ae9e7..96f6bd8aee5d 100644 --- a/drivers/gpu/drm/nouveau/nouveau_drv.h +++ b/drivers/gpu/drm/nouveau/nouveau_drv.h @@ -157,8 +157,8 @@ struct nouveau_drm { struct nvif_object copy; int mtrr; int type_vram; - int type_host; - int type_ncoh; + int type_host[2]; + int type_ncoh[2]; } ttm; /* GEM interface support */ @@ -217,6 +217,13 @@ nouveau_drm(struct drm_device *dev) return dev->dev_private; } +static inline bool +nouveau_drm_use_coherent_gpu_mapping(struct nouveau_drm *drm) +{ + struct nvif_mmu *mmu = &drm->client.mmu; + return !(mmu->type[drm->ttm.type_host[0]].type & NVIF_MEM_UNCACHED); +} + int nouveau_pmops_suspend(struct device *); int nouveau_pmops_resume(struct device *); bool nouveau_pmops_runtime(void); diff --git a/drivers/gpu/drm/nouveau/nouveau_fbcon.c b/drivers/gpu/drm/nouveau/nouveau_fbcon.c index c533d8e04afc..be7357bf2246 100644 --- a/drivers/gpu/drm/nouveau/nouveau_fbcon.c +++ b/drivers/gpu/drm/nouveau/nouveau_fbcon.c @@ -429,7 +429,7 @@ nouveau_fbcon_destroy(struct drm_device *dev, struct nouveau_fbdev *fbcon) drm_fb_helper_unregister_fbi(&fbcon->helper); drm_fb_helper_fini(&fbcon->helper); - if (nouveau_fb->nvbo) { + if (nouveau_fb && nouveau_fb->nvbo) { nouveau_vma_del(&nouveau_fb->vma); nouveau_bo_unmap(nouveau_fb->nvbo); nouveau_bo_unpin(nouveau_fb->nvbo); diff --git a/drivers/gpu/drm/nouveau/nouveau_mem.c b/drivers/gpu/drm/nouveau/nouveau_mem.c index 589a9621db76..c002f8968507 100644 --- a/drivers/gpu/drm/nouveau/nouveau_mem.c +++ b/drivers/gpu/drm/nouveau/nouveau_mem.c @@ -103,10 +103,10 @@ nouveau_mem_host(struct ttm_mem_reg *reg, struct ttm_dma_tt *tt) u8 type; int ret; - if (mmu->type[drm->ttm.type_host].type & NVIF_MEM_UNCACHED) - type = drm->ttm.type_ncoh; + if (!nouveau_drm_use_coherent_gpu_mapping(drm)) + type = drm->ttm.type_ncoh[!!mem->kind]; else - type = drm->ttm.type_host; + type = drm->ttm.type_host[0]; if (mem->kind && !(mmu->type[type].type & NVIF_MEM_KIND)) mem->comp = mem->kind = 0; diff --git a/drivers/gpu/drm/nouveau/nouveau_ttm.c b/drivers/gpu/drm/nouveau/nouveau_ttm.c index 08b974b30482..dff51a0ee028 100644 --- a/drivers/gpu/drm/nouveau/nouveau_ttm.c +++ b/drivers/gpu/drm/nouveau/nouveau_ttm.c @@ -235,27 +235,46 @@ nouveau_ttm_global_release(struct nouveau_drm *drm) drm->ttm.mem_global_ref.release = NULL; } -int -nouveau_ttm_init(struct nouveau_drm *drm) +static int +nouveau_ttm_init_host(struct nouveau_drm *drm, u8 kind) { - struct nvkm_device *device = nvxx_device(&drm->client.device); - struct nvkm_pci *pci = device->pci; struct nvif_mmu *mmu = &drm->client.mmu; - struct drm_device *dev = drm->dev; - int typei, ret; + int typei; typei = nvif_mmu_type(mmu, NVIF_MEM_HOST | NVIF_MEM_MAPPABLE | - NVIF_MEM_COHERENT); + kind | NVIF_MEM_COHERENT); if (typei < 0) return -ENOSYS; - drm->ttm.type_host = typei; + drm->ttm.type_host[!!kind] = typei; - typei = nvif_mmu_type(mmu, NVIF_MEM_HOST | NVIF_MEM_MAPPABLE); + typei = nvif_mmu_type(mmu, NVIF_MEM_HOST | NVIF_MEM_MAPPABLE | kind); if (typei < 0) return -ENOSYS; - drm->ttm.type_ncoh = typei; + drm->ttm.type_ncoh[!!kind] = typei; + return 0; +} + +int +nouveau_ttm_init(struct nouveau_drm *drm) +{ + struct nvkm_device *device = nvxx_device(&drm->client.device); + struct nvkm_pci *pci = device->pci; + struct nvif_mmu *mmu = &drm->client.mmu; + struct drm_device *dev = drm->dev; + int typei, ret; + + ret = nouveau_ttm_init_host(drm, 0); + if (ret) + return ret; + + if (drm->client.device.info.family >= NV_DEVICE_INFO_V0_TESLA && + drm->client.device.info.chipset != 0x50) { + ret = nouveau_ttm_init_host(drm, NVIF_MEM_KIND); + if (ret) + return ret; + } if (drm->client.device.info.platform != NV_DEVICE_INFO_V0_SOC && drm->client.device.info.family >= NV_DEVICE_INFO_V0_TESLA) { diff --git a/drivers/gpu/drm/nouveau/nouveau_vmm.c b/drivers/gpu/drm/nouveau/nouveau_vmm.c index 9e2628dd8e4d..f5371d96b003 100644 --- a/drivers/gpu/drm/nouveau/nouveau_vmm.c +++ b/drivers/gpu/drm/nouveau/nouveau_vmm.c @@ -67,8 +67,8 @@ nouveau_vma_del(struct nouveau_vma **pvma) nvif_vmm_put(&vma->vmm->vmm, &tmp); } list_del(&vma->head); - *pvma = NULL; kfree(*pvma); + *pvma = NULL; } } diff --git a/drivers/gpu/drm/nouveau/nvkm/engine/device/base.c b/drivers/gpu/drm/nouveau/nvkm/engine/device/base.c index e14643615698..00eeaaffeae5 100644 --- a/drivers/gpu/drm/nouveau/nvkm/engine/device/base.c +++ b/drivers/gpu/drm/nouveau/nvkm/engine/device/base.c @@ -2369,7 +2369,7 @@ nv13b_chipset = { .imem = gk20a_instmem_new, .ltc = gp100_ltc_new, .mc = gp10b_mc_new, - .mmu = gf100_mmu_new, + .mmu = gp10b_mmu_new, .secboot = gp10b_secboot_new, .pmu = gm20b_pmu_new, .timer = gk20a_timer_new, diff --git a/drivers/gpu/drm/nouveau/nvkm/subdev/bios/dp.c b/drivers/gpu/drm/nouveau/nvkm/subdev/bios/dp.c index 972370ed36f0..7c7efa4ea0d0 100644 --- a/drivers/gpu/drm/nouveau/nvkm/subdev/bios/dp.c +++ b/drivers/gpu/drm/nouveau/nvkm/subdev/bios/dp.c @@ -36,6 +36,7 @@ nvbios_dp_table(struct nvkm_bios *bios, u8 *ver, u8 *hdr, u8 *cnt, u8 *len) if (data) { *ver = nvbios_rd08(bios, data + 0x00); switch (*ver) { + case 0x20: case 0x21: case 0x30: case 0x40: @@ -63,6 +64,7 @@ nvbios_dpout_entry(struct nvkm_bios *bios, u8 idx, if (data && idx < *cnt) { u16 outp = nvbios_rd16(bios, data + *hdr + idx * *len); switch (*ver * !!outp) { + case 0x20: case 0x21: case 0x30: *hdr = nvbios_rd08(bios, data + 0x04); @@ -96,12 +98,16 @@ nvbios_dpout_parse(struct nvkm_bios *bios, u8 idx, info->type = nvbios_rd16(bios, data + 0x00); info->mask = nvbios_rd16(bios, data + 0x02); switch (*ver) { + case 0x20: + info->mask |= 0x00c0; /* match any link */ + /* fall-through */ case 0x21: case 0x30: info->flags = nvbios_rd08(bios, data + 0x05); info->script[0] = nvbios_rd16(bios, data + 0x06); info->script[1] = nvbios_rd16(bios, data + 0x08); - info->lnkcmp = nvbios_rd16(bios, data + 0x0a); + if (*len >= 0x0c) + info->lnkcmp = nvbios_rd16(bios, data + 0x0a); if (*len >= 0x0f) { info->script[2] = nvbios_rd16(bios, data + 0x0c); info->script[3] = nvbios_rd16(bios, data + 0x0e); @@ -170,6 +176,7 @@ nvbios_dpcfg_parse(struct nvkm_bios *bios, u16 outp, u8 idx, memset(info, 0x00, sizeof(*info)); if (data) { switch (*ver) { + case 0x20: case 0x21: info->dc = nvbios_rd08(bios, data + 0x02); info->pe = nvbios_rd08(bios, data + 0x03); diff --git a/drivers/gpu/drm/nouveau/nvkm/subdev/instmem/nv50.c b/drivers/gpu/drm/nouveau/nvkm/subdev/instmem/nv50.c index 1ba7289684aa..db48a1daca0c 100644 --- a/drivers/gpu/drm/nouveau/nvkm/subdev/instmem/nv50.c +++ b/drivers/gpu/drm/nouveau/nvkm/subdev/instmem/nv50.c @@ -249,7 +249,7 @@ nv50_instobj_acquire(struct nvkm_memory *memory) iobj->base.memory.ptrs = &nv50_instobj_fast; else iobj->base.memory.ptrs = &nv50_instobj_slow; - refcount_inc(&iobj->maps); + refcount_set(&iobj->maps, 1); } mutex_unlock(&imem->subdev.mutex); diff --git a/drivers/gpu/drm/nouveau/nvkm/subdev/pci/base.c b/drivers/gpu/drm/nouveau/nvkm/subdev/pci/base.c index b1b1f3626b96..deb96de54b00 100644 --- a/drivers/gpu/drm/nouveau/nvkm/subdev/pci/base.c +++ b/drivers/gpu/drm/nouveau/nvkm/subdev/pci/base.c @@ -136,6 +136,13 @@ nvkm_pci_init(struct nvkm_subdev *subdev) return ret; pci->irq = pdev->irq; + + /* Ensure MSI interrupts are armed, for the case where there are + * already interrupts pending (for whatever reason) at load time. + */ + if (pci->msi) + pci->func->msi_rearm(pci); + return ret; } diff --git a/drivers/gpu/drm/sun4i/sun4i_hdmi_enc.c b/drivers/gpu/drm/sun4i/sun4i_hdmi_enc.c index dda904ec0534..500b6fb3e028 100644 --- a/drivers/gpu/drm/sun4i/sun4i_hdmi_enc.c +++ b/drivers/gpu/drm/sun4i/sun4i_hdmi_enc.c @@ -175,11 +175,31 @@ static void sun4i_hdmi_mode_set(struct drm_encoder *encoder, writel(val, hdmi->base + SUN4I_HDMI_VID_TIMING_POL_REG); } +static enum drm_mode_status sun4i_hdmi_mode_valid(struct drm_encoder *encoder, + const struct drm_display_mode *mode) +{ + struct sun4i_hdmi *hdmi = drm_encoder_to_sun4i_hdmi(encoder); + unsigned long rate = mode->clock * 1000; + unsigned long diff = rate / 200; /* +-0.5% allowed by HDMI spec */ + long rounded_rate; + + /* 165 MHz is the typical max pixelclock frequency for HDMI <= 1.2 */ + if (rate > 165000000) + return MODE_CLOCK_HIGH; + rounded_rate = clk_round_rate(hdmi->tmds_clk, rate); + if (rounded_rate > 0 && + max_t(unsigned long, rounded_rate, rate) - + min_t(unsigned long, rounded_rate, rate) < diff) + return MODE_OK; + return MODE_NOCLOCK; +} + static const struct drm_encoder_helper_funcs sun4i_hdmi_helper_funcs = { .atomic_check = sun4i_hdmi_atomic_check, .disable = sun4i_hdmi_disable, .enable = sun4i_hdmi_enable, .mode_set = sun4i_hdmi_mode_set, + .mode_valid = sun4i_hdmi_mode_valid, }; static const struct drm_encoder_funcs sun4i_hdmi_funcs = { diff --git a/drivers/gpu/drm/sun4i/sun4i_tcon.c b/drivers/gpu/drm/sun4i/sun4i_tcon.c index e122f5b2a395..f4284b51bdca 100644 --- a/drivers/gpu/drm/sun4i/sun4i_tcon.c +++ b/drivers/gpu/drm/sun4i/sun4i_tcon.c @@ -724,12 +724,12 @@ static int sun4i_tcon_bind(struct device *dev, struct device *master, if (IS_ERR(tcon->crtc)) { dev_err(dev, "Couldn't create our CRTC\n"); ret = PTR_ERR(tcon->crtc); - goto err_free_clocks; + goto err_free_dotclock; } ret = sun4i_rgb_init(drm, tcon); if (ret < 0) - goto err_free_clocks; + goto err_free_dotclock; if (tcon->quirks->needs_de_be_mux) { /* diff --git a/drivers/gpu/drm/ttm/ttm_page_alloc.c b/drivers/gpu/drm/ttm/ttm_page_alloc.c index 44343a2bf55c..b5ba6441489f 100644 --- a/drivers/gpu/drm/ttm/ttm_page_alloc.c +++ b/drivers/gpu/drm/ttm/ttm_page_alloc.c @@ -455,6 +455,7 @@ ttm_pool_shrink_scan(struct shrinker *shrink, struct shrink_control *sc) freed += (nr_free_pool - shrink_pages) << pool->order; if (freed >= sc->nr_to_scan) break; + shrink_pages <<= pool->order; } mutex_unlock(&lock); return freed; @@ -543,7 +544,7 @@ static int ttm_alloc_new_pages(struct list_head *pages, gfp_t gfp_flags, int r = 0; unsigned i, j, cpages; unsigned npages = 1 << order; - unsigned max_cpages = min(count, (unsigned)NUM_PAGES_TO_ALLOC); + unsigned max_cpages = min(count << order, (unsigned)NUM_PAGES_TO_ALLOC); /* allocate array for page caching change */ caching_array = kmalloc(max_cpages*sizeof(struct page *), GFP_KERNEL); diff --git a/drivers/hwmon/hwmon.c b/drivers/hwmon/hwmon.c index c9790e2c3440..af5123042990 100644 --- a/drivers/hwmon/hwmon.c +++ b/drivers/hwmon/hwmon.c @@ -143,6 +143,7 @@ static int hwmon_thermal_add_sensor(struct device *dev, struct hwmon_device *hwdev, int index) { struct hwmon_thermal_data *tdata; + struct thermal_zone_device *tzd; tdata = devm_kzalloc(dev, sizeof(*tdata), GFP_KERNEL); if (!tdata) @@ -151,8 +152,14 @@ static int hwmon_thermal_add_sensor(struct device *dev, tdata->hwdev = hwdev; tdata->index = index; - devm_thermal_zone_of_sensor_register(&hwdev->dev, index, tdata, - &hwmon_thermal_ops); + tzd = devm_thermal_zone_of_sensor_register(&hwdev->dev, index, tdata, + &hwmon_thermal_ops); + /* + * If CONFIG_THERMAL_OF is disabled, this returns -ENODEV, + * so ignore that error but forward any other error. + */ + if (IS_ERR(tzd) && (PTR_ERR(tzd) != -ENODEV)) + return PTR_ERR(tzd); return 0; } @@ -621,14 +628,20 @@ __hwmon_device_register(struct device *dev, const char *name, void *drvdata, if (!chip->ops->is_visible(drvdata, hwmon_temp, hwmon_temp_input, j)) continue; - if (info[i]->config[j] & HWMON_T_INPUT) - hwmon_thermal_add_sensor(dev, hwdev, j); + if (info[i]->config[j] & HWMON_T_INPUT) { + err = hwmon_thermal_add_sensor(dev, + hwdev, j); + if (err) + goto free_device; + } } } } return hdev; +free_device: + device_unregister(hdev); free_hwmon: kfree(hwdev); ida_remove: diff --git a/drivers/mfd/cros_ec_spi.c b/drivers/mfd/cros_ec_spi.c index c9714072e224..59c82cdcf48d 100644 --- a/drivers/mfd/cros_ec_spi.c +++ b/drivers/mfd/cros_ec_spi.c @@ -377,6 +377,7 @@ static int cros_ec_pkt_xfer_spi(struct cros_ec_device *ec_dev, u8 *ptr; u8 *rx_buf; u8 sum; + u8 rx_byte; int ret = 0, final_ret; len = cros_ec_prepare_tx(ec_dev, ec_msg); @@ -421,25 +422,22 @@ static int cros_ec_pkt_xfer_spi(struct cros_ec_device *ec_dev, if (!ret) { /* Verify that EC can process command */ for (i = 0; i < len; i++) { - switch (rx_buf[i]) { - case EC_SPI_PAST_END: - case EC_SPI_RX_BAD_DATA: - case EC_SPI_NOT_READY: - ret = -EAGAIN; - ec_msg->result = EC_RES_IN_PROGRESS; - default: + rx_byte = rx_buf[i]; + if (rx_byte == EC_SPI_PAST_END || + rx_byte == EC_SPI_RX_BAD_DATA || + rx_byte == EC_SPI_NOT_READY) { + ret = -EREMOTEIO; break; } - if (ret) - break; } - if (!ret) - ret = cros_ec_spi_receive_packet(ec_dev, - ec_msg->insize + sizeof(*response)); - } else { - dev_err(ec_dev->dev, "spi transfer failed: %d\n", ret); } + if (!ret) + ret = cros_ec_spi_receive_packet(ec_dev, + ec_msg->insize + sizeof(*response)); + else + dev_err(ec_dev->dev, "spi transfer failed: %d\n", ret); + final_ret = terminate_request(ec_dev); spi_bus_unlock(ec_spi->spi->master); @@ -508,6 +506,7 @@ static int cros_ec_cmd_xfer_spi(struct cros_ec_device *ec_dev, int i, len; u8 *ptr; u8 *rx_buf; + u8 rx_byte; int sum; int ret = 0, final_ret; @@ -544,25 +543,22 @@ static int cros_ec_cmd_xfer_spi(struct cros_ec_device *ec_dev, if (!ret) { /* Verify that EC can process command */ for (i = 0; i < len; i++) { - switch (rx_buf[i]) { - case EC_SPI_PAST_END: - case EC_SPI_RX_BAD_DATA: - case EC_SPI_NOT_READY: - ret = -EAGAIN; - ec_msg->result = EC_RES_IN_PROGRESS; - default: + rx_byte = rx_buf[i]; + if (rx_byte == EC_SPI_PAST_END || + rx_byte == EC_SPI_RX_BAD_DATA || + rx_byte == EC_SPI_NOT_READY) { + ret = -EREMOTEIO; break; } - if (ret) - break; } - if (!ret) - ret = cros_ec_spi_receive_response(ec_dev, - ec_msg->insize + EC_MSG_TX_PROTO_BYTES); - } else { - dev_err(ec_dev->dev, "spi transfer failed: %d\n", ret); } + if (!ret) + ret = cros_ec_spi_receive_response(ec_dev, + ec_msg->insize + EC_MSG_TX_PROTO_BYTES); + else + dev_err(ec_dev->dev, "spi transfer failed: %d\n", ret); + final_ret = terminate_request(ec_dev); spi_bus_unlock(ec_spi->spi->master); @@ -667,6 +663,7 @@ static int cros_ec_spi_probe(struct spi_device *spi) sizeof(struct ec_response_get_protocol_info); ec_dev->dout_size = sizeof(struct ec_host_request); + ec_spi->last_transfer_ns = ktime_get_ns(); err = cros_ec_register(ec_dev); if (err) { diff --git a/drivers/mfd/twl4030-audio.c b/drivers/mfd/twl4030-audio.c index da16bf45fab4..dc94ffc6321a 100644 --- a/drivers/mfd/twl4030-audio.c +++ b/drivers/mfd/twl4030-audio.c @@ -159,13 +159,18 @@ unsigned int twl4030_audio_get_mclk(void) EXPORT_SYMBOL_GPL(twl4030_audio_get_mclk); static bool twl4030_audio_has_codec(struct twl4030_audio_data *pdata, - struct device_node *node) + struct device_node *parent) { + struct device_node *node; + if (pdata && pdata->codec) return true; - if (of_find_node_by_name(node, "codec")) + node = of_get_child_by_name(parent, "codec"); + if (node) { + of_node_put(node); return true; + } return false; } diff --git a/drivers/mfd/twl6040.c b/drivers/mfd/twl6040.c index d66502d36ba0..dd19f17a1b63 100644 --- a/drivers/mfd/twl6040.c +++ b/drivers/mfd/twl6040.c @@ -97,12 +97,16 @@ static struct reg_sequence twl6040_patch[] = { }; -static bool twl6040_has_vibra(struct device_node *node) +static bool twl6040_has_vibra(struct device_node *parent) { -#ifdef CONFIG_OF - if (of_find_node_by_name(node, "vibra")) + struct device_node *node; + + node = of_get_child_by_name(parent, "vibra"); + if (node) { + of_node_put(node); return true; -#endif + } + return false; } diff --git a/drivers/mtd/mtdcore.c b/drivers/mtd/mtdcore.c index f80e911b8843..73b605577447 100644 --- a/drivers/mtd/mtdcore.c +++ b/drivers/mtd/mtdcore.c @@ -1114,7 +1114,7 @@ static int mtd_check_oob_ops(struct mtd_info *mtd, loff_t offs, if (!ops->oobbuf) ops->ooblen = 0; - if (offs < 0 || offs + ops->len >= mtd->size) + if (offs < 0 || offs + ops->len > mtd->size) return -EINVAL; if (ops->ooblen) { diff --git a/drivers/mtd/nand/brcmnand/brcmnand.c b/drivers/mtd/nand/brcmnand/brcmnand.c index e0eb51d8c012..dd56a671ea42 100644 --- a/drivers/mtd/nand/brcmnand/brcmnand.c +++ b/drivers/mtd/nand/brcmnand/brcmnand.c @@ -1763,7 +1763,7 @@ try_dmaread: err = brcmstb_nand_verify_erased_page(mtd, chip, buf, addr); /* erased page bitflips corrected */ - if (err > 0) + if (err >= 0) return err; } diff --git a/drivers/mtd/nand/gpio.c b/drivers/mtd/nand/gpio.c index 484f7fbc3f7d..a8bde6665c24 100644 --- a/drivers/mtd/nand/gpio.c +++ b/drivers/mtd/nand/gpio.c @@ -253,9 +253,9 @@ static int gpio_nand_probe(struct platform_device *pdev) goto out_ce; } - gpiomtd->nwp = devm_gpiod_get(dev, "ale", GPIOD_OUT_LOW); - if (IS_ERR(gpiomtd->nwp)) { - ret = PTR_ERR(gpiomtd->nwp); + gpiomtd->ale = devm_gpiod_get(dev, "ale", GPIOD_OUT_LOW); + if (IS_ERR(gpiomtd->ale)) { + ret = PTR_ERR(gpiomtd->ale); goto out_ce; } diff --git a/drivers/mtd/nand/gpmi-nand/gpmi-nand.c b/drivers/mtd/nand/gpmi-nand/gpmi-nand.c index 50f8d4a1b983..d4d824ef64e9 100644 --- a/drivers/mtd/nand/gpmi-nand/gpmi-nand.c +++ b/drivers/mtd/nand/gpmi-nand/gpmi-nand.c @@ -1067,9 +1067,6 @@ static int gpmi_ecc_read_page(struct mtd_info *mtd, struct nand_chip *chip, return ret; } - /* handle the block mark swapping */ - block_mark_swapping(this, payload_virt, auxiliary_virt); - /* Loop over status bytes, accumulating ECC status. */ status = auxiliary_virt + nfc_geo->auxiliary_status_offset; @@ -1158,6 +1155,9 @@ static int gpmi_ecc_read_page(struct mtd_info *mtd, struct nand_chip *chip, max_bitflips = max_t(unsigned int, max_bitflips, *status); } + /* handle the block mark swapping */ + block_mark_swapping(this, buf, auxiliary_virt); + if (oob_required) { /* * It's time to deliver the OOB bytes. See gpmi_ecc_read_oob() diff --git a/drivers/net/ethernet/arc/emac.h b/drivers/net/ethernet/arc/emac.h index 3c63b16d485f..d9efbc8d783b 100644 --- a/drivers/net/ethernet/arc/emac.h +++ b/drivers/net/ethernet/arc/emac.h @@ -159,6 +159,8 @@ struct arc_emac_priv { unsigned int link; unsigned int duplex; unsigned int speed; + + unsigned int rx_missed_errors; }; /** diff --git a/drivers/net/ethernet/arc/emac_main.c b/drivers/net/ethernet/arc/emac_main.c index 3241af1ce718..bd277b0dc615 100644 --- a/drivers/net/ethernet/arc/emac_main.c +++ b/drivers/net/ethernet/arc/emac_main.c @@ -26,6 +26,8 @@ #include "emac.h" +static void arc_emac_restart(struct net_device *ndev); + /** * arc_emac_tx_avail - Return the number of available slots in the tx ring. * @priv: Pointer to ARC EMAC private data structure. @@ -210,39 +212,48 @@ static int arc_emac_rx(struct net_device *ndev, int budget) continue; } - pktlen = info & LEN_MASK; - stats->rx_packets++; - stats->rx_bytes += pktlen; - skb = rx_buff->skb; - skb_put(skb, pktlen); - skb->dev = ndev; - skb->protocol = eth_type_trans(skb, ndev); - - dma_unmap_single(&ndev->dev, dma_unmap_addr(rx_buff, addr), - dma_unmap_len(rx_buff, len), DMA_FROM_DEVICE); - - /* Prepare the BD for next cycle */ - rx_buff->skb = netdev_alloc_skb_ip_align(ndev, - EMAC_BUFFER_SIZE); - if (unlikely(!rx_buff->skb)) { + /* Prepare the BD for next cycle. netif_receive_skb() + * only if new skb was allocated and mapped to avoid holes + * in the RX fifo. + */ + skb = netdev_alloc_skb_ip_align(ndev, EMAC_BUFFER_SIZE); + if (unlikely(!skb)) { + if (net_ratelimit()) + netdev_err(ndev, "cannot allocate skb\n"); + /* Return ownership to EMAC */ + rxbd->info = cpu_to_le32(FOR_EMAC | EMAC_BUFFER_SIZE); stats->rx_errors++; - /* Because receive_skb is below, increment rx_dropped */ stats->rx_dropped++; continue; } - /* receive_skb only if new skb was allocated to avoid holes */ - netif_receive_skb(skb); - - addr = dma_map_single(&ndev->dev, (void *)rx_buff->skb->data, + addr = dma_map_single(&ndev->dev, (void *)skb->data, EMAC_BUFFER_SIZE, DMA_FROM_DEVICE); if (dma_mapping_error(&ndev->dev, addr)) { if (net_ratelimit()) - netdev_err(ndev, "cannot dma map\n"); - dev_kfree_skb(rx_buff->skb); + netdev_err(ndev, "cannot map dma buffer\n"); + dev_kfree_skb(skb); + /* Return ownership to EMAC */ + rxbd->info = cpu_to_le32(FOR_EMAC | EMAC_BUFFER_SIZE); stats->rx_errors++; + stats->rx_dropped++; continue; } + + /* unmap previosly mapped skb */ + dma_unmap_single(&ndev->dev, dma_unmap_addr(rx_buff, addr), + dma_unmap_len(rx_buff, len), DMA_FROM_DEVICE); + + pktlen = info & LEN_MASK; + stats->rx_packets++; + stats->rx_bytes += pktlen; + skb_put(rx_buff->skb, pktlen); + rx_buff->skb->dev = ndev; + rx_buff->skb->protocol = eth_type_trans(rx_buff->skb, ndev); + + netif_receive_skb(rx_buff->skb); + + rx_buff->skb = skb; dma_unmap_addr_set(rx_buff, addr, addr); dma_unmap_len_set(rx_buff, len, EMAC_BUFFER_SIZE); @@ -259,6 +270,53 @@ static int arc_emac_rx(struct net_device *ndev, int budget) } /** + * arc_emac_rx_miss_handle - handle R_MISS register + * @ndev: Pointer to the net_device structure. + */ +static void arc_emac_rx_miss_handle(struct net_device *ndev) +{ + struct arc_emac_priv *priv = netdev_priv(ndev); + struct net_device_stats *stats = &ndev->stats; + unsigned int miss; + + miss = arc_reg_get(priv, R_MISS); + if (miss) { + stats->rx_errors += miss; + stats->rx_missed_errors += miss; + priv->rx_missed_errors += miss; + } +} + +/** + * arc_emac_rx_stall_check - check RX stall + * @ndev: Pointer to the net_device structure. + * @budget: How many BDs requested to process on 1 call. + * @work_done: How many BDs processed + * + * Under certain conditions EMAC stop reception of incoming packets and + * continuously increment R_MISS register instead of saving data into + * provided buffer. This function detect that condition and restart + * EMAC. + */ +static void arc_emac_rx_stall_check(struct net_device *ndev, + int budget, unsigned int work_done) +{ + struct arc_emac_priv *priv = netdev_priv(ndev); + struct arc_emac_bd *rxbd; + + if (work_done) + priv->rx_missed_errors = 0; + + if (priv->rx_missed_errors && budget) { + rxbd = &priv->rxbd[priv->last_rx_bd]; + if (le32_to_cpu(rxbd->info) & FOR_EMAC) { + arc_emac_restart(ndev); + priv->rx_missed_errors = 0; + } + } +} + +/** * arc_emac_poll - NAPI poll handler. * @napi: Pointer to napi_struct structure. * @budget: How many BDs to process on 1 call. @@ -272,6 +330,7 @@ static int arc_emac_poll(struct napi_struct *napi, int budget) unsigned int work_done; arc_emac_tx_clean(ndev); + arc_emac_rx_miss_handle(ndev); work_done = arc_emac_rx(ndev, budget); if (work_done < budget) { @@ -279,6 +338,8 @@ static int arc_emac_poll(struct napi_struct *napi, int budget) arc_reg_or(priv, R_ENABLE, RXINT_MASK | TXINT_MASK); } + arc_emac_rx_stall_check(ndev, budget, work_done); + return work_done; } @@ -320,6 +381,8 @@ static irqreturn_t arc_emac_intr(int irq, void *dev_instance) if (status & MSER_MASK) { stats->rx_missed_errors += 0x100; stats->rx_errors += 0x100; + priv->rx_missed_errors += 0x100; + napi_schedule(&priv->napi); } if (status & RXCR_MASK) { @@ -732,6 +795,63 @@ static int arc_emac_ioctl(struct net_device *dev, struct ifreq *rq, int cmd) } +/** + * arc_emac_restart - Restart EMAC + * @ndev: Pointer to net_device structure. + * + * This function do hardware reset of EMAC in order to restore + * network packets reception. + */ +static void arc_emac_restart(struct net_device *ndev) +{ + struct arc_emac_priv *priv = netdev_priv(ndev); + struct net_device_stats *stats = &ndev->stats; + int i; + + if (net_ratelimit()) + netdev_warn(ndev, "restarting stalled EMAC\n"); + + netif_stop_queue(ndev); + + /* Disable interrupts */ + arc_reg_clr(priv, R_ENABLE, RXINT_MASK | TXINT_MASK | ERR_MASK); + + /* Disable EMAC */ + arc_reg_clr(priv, R_CTRL, EN_MASK); + + /* Return the sk_buff to system */ + arc_free_tx_queue(ndev); + + /* Clean Tx BD's */ + priv->txbd_curr = 0; + priv->txbd_dirty = 0; + memset(priv->txbd, 0, TX_RING_SZ); + + for (i = 0; i < RX_BD_NUM; i++) { + struct arc_emac_bd *rxbd = &priv->rxbd[i]; + unsigned int info = le32_to_cpu(rxbd->info); + + if (!(info & FOR_EMAC)) { + stats->rx_errors++; + stats->rx_dropped++; + } + /* Return ownership to EMAC */ + rxbd->info = cpu_to_le32(FOR_EMAC | EMAC_BUFFER_SIZE); + } + priv->last_rx_bd = 0; + + /* Make sure info is visible to EMAC before enable */ + wmb(); + + /* Enable interrupts */ + arc_reg_set(priv, R_ENABLE, RXINT_MASK | TXINT_MASK | ERR_MASK); + + /* Enable EMAC */ + arc_reg_or(priv, R_CTRL, EN_MASK); + + netif_start_queue(ndev); +} + static const struct net_device_ops arc_emac_netdev_ops = { .ndo_open = arc_emac_open, .ndo_stop = arc_emac_stop, diff --git a/drivers/net/ethernet/broadcom/tg3.c b/drivers/net/ethernet/broadcom/tg3.c index de51c2177d03..d09c5a9c53b5 100644 --- a/drivers/net/ethernet/broadcom/tg3.c +++ b/drivers/net/ethernet/broadcom/tg3.c @@ -14225,7 +14225,9 @@ static int tg3_change_mtu(struct net_device *dev, int new_mtu) /* Reset PHY, otherwise the read DMA engine will be in a mode that * breaks all requests to 256 bytes. */ - if (tg3_asic_rev(tp) == ASIC_REV_57766) + if (tg3_asic_rev(tp) == ASIC_REV_57766 || + tg3_asic_rev(tp) == ASIC_REV_5717 || + tg3_asic_rev(tp) == ASIC_REV_5719) reset_phy = true; err = tg3_restart_hw(tp, reset_phy); diff --git a/drivers/net/ethernet/marvell/mvneta.c b/drivers/net/ethernet/marvell/mvneta.c index bc93b69cfd1e..a539263cd79c 100644 --- a/drivers/net/ethernet/marvell/mvneta.c +++ b/drivers/net/ethernet/marvell/mvneta.c @@ -1214,6 +1214,10 @@ static void mvneta_port_disable(struct mvneta_port *pp) val &= ~MVNETA_GMAC0_PORT_ENABLE; mvreg_write(pp, MVNETA_GMAC_CTRL_0, val); + pp->link = 0; + pp->duplex = -1; + pp->speed = 0; + udelay(200); } @@ -1958,9 +1962,9 @@ static int mvneta_rx_swbm(struct mvneta_port *pp, int rx_todo, if (!mvneta_rxq_desc_is_first_last(rx_status) || (rx_status & MVNETA_RXD_ERR_SUMMARY)) { + mvneta_rx_error(pp, rx_desc); err_drop_frame: dev->stats.rx_errors++; - mvneta_rx_error(pp, rx_desc); /* leave the descriptor untouched */ continue; } @@ -3011,7 +3015,7 @@ static void mvneta_cleanup_rxqs(struct mvneta_port *pp) { int queue; - for (queue = 0; queue < txq_number; queue++) + for (queue = 0; queue < rxq_number; queue++) mvneta_rxq_deinit(pp, &pp->rxqs[queue]); } diff --git a/drivers/net/ethernet/mediatek/mtk_eth_soc.c b/drivers/net/ethernet/mediatek/mtk_eth_soc.c index 54adfd967858..fc67e35b253e 100644 --- a/drivers/net/ethernet/mediatek/mtk_eth_soc.c +++ b/drivers/net/ethernet/mediatek/mtk_eth_soc.c @@ -1961,11 +1961,12 @@ static int mtk_hw_init(struct mtk_eth *eth) /* set GE2 TUNE */ regmap_write(eth->pctl, GPIO_BIAS_CTRL, 0x0); - /* GE1, Force 1000M/FD, FC ON */ - mtk_w32(eth, MAC_MCR_FIXED_LINK, MTK_MAC_MCR(0)); - - /* GE2, Force 1000M/FD, FC ON */ - mtk_w32(eth, MAC_MCR_FIXED_LINK, MTK_MAC_MCR(1)); + /* Set linkdown as the default for each GMAC. Its own MCR would be set + * up with the more appropriate value when mtk_phy_link_adjust call is + * being invoked. + */ + for (i = 0; i < MTK_MAC_COUNT; i++) + mtk_w32(eth, 0, MTK_MAC_MCR(i)); /* Indicates CDM to parse the MTK special tag from CPU * which also is working out for untag packets. diff --git a/drivers/net/ethernet/mellanox/mlx5/core/cmd.c b/drivers/net/ethernet/mellanox/mlx5/core/cmd.c index 1fffdebbc9e8..e9a1fbcc4adf 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/cmd.c +++ b/drivers/net/ethernet/mellanox/mlx5/core/cmd.c @@ -362,7 +362,7 @@ static int mlx5_internal_err_ret_value(struct mlx5_core_dev *dev, u16 op, case MLX5_CMD_OP_QUERY_VPORT_COUNTER: case MLX5_CMD_OP_ALLOC_Q_COUNTER: case MLX5_CMD_OP_QUERY_Q_COUNTER: - case MLX5_CMD_OP_SET_RATE_LIMIT: + case MLX5_CMD_OP_SET_PP_RATE_LIMIT: case MLX5_CMD_OP_QUERY_RATE_LIMIT: case MLX5_CMD_OP_CREATE_SCHEDULING_ELEMENT: case MLX5_CMD_OP_QUERY_SCHEDULING_ELEMENT: @@ -505,7 +505,7 @@ const char *mlx5_command_str(int command) MLX5_COMMAND_STR_CASE(ALLOC_Q_COUNTER); MLX5_COMMAND_STR_CASE(DEALLOC_Q_COUNTER); MLX5_COMMAND_STR_CASE(QUERY_Q_COUNTER); - MLX5_COMMAND_STR_CASE(SET_RATE_LIMIT); + MLX5_COMMAND_STR_CASE(SET_PP_RATE_LIMIT); MLX5_COMMAND_STR_CASE(QUERY_RATE_LIMIT); MLX5_COMMAND_STR_CASE(CREATE_SCHEDULING_ELEMENT); MLX5_COMMAND_STR_CASE(DESTROY_SCHEDULING_ELEMENT); diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en.h b/drivers/net/ethernet/mellanox/mlx5/core/en.h index c0872b3284cb..543060c305a0 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/en.h +++ b/drivers/net/ethernet/mellanox/mlx5/core/en.h @@ -82,6 +82,9 @@ max_t(u32, MLX5_MPWRQ_MIN_LOG_STRIDE_SZ(mdev), req) #define MLX5_MPWRQ_DEF_LOG_STRIDE_SZ(mdev) MLX5_MPWRQ_LOG_STRIDE_SZ(mdev, 6) #define MLX5_MPWRQ_CQE_CMPRS_LOG_STRIDE_SZ(mdev) MLX5_MPWRQ_LOG_STRIDE_SZ(mdev, 8) +#define MLX5E_MPWQE_STRIDE_SZ(mdev, cqe_cmprs) \ + (cqe_cmprs ? MLX5_MPWRQ_CQE_CMPRS_LOG_STRIDE_SZ(mdev) : \ + MLX5_MPWRQ_DEF_LOG_STRIDE_SZ(mdev)) #define MLX5_MPWRQ_LOG_WQE_SZ 18 #define MLX5_MPWRQ_WQE_PAGE_ORDER (MLX5_MPWRQ_LOG_WQE_SZ - PAGE_SHIFT > 0 ? \ @@ -590,6 +593,7 @@ struct mlx5e_channel { struct mlx5_core_dev *mdev; struct hwtstamp_config *tstamp; int ix; + int cpu; }; struct mlx5e_channels { @@ -935,8 +939,9 @@ void mlx5e_set_tx_cq_mode_params(struct mlx5e_params *params, u8 cq_period_mode); void mlx5e_set_rx_cq_mode_params(struct mlx5e_params *params, u8 cq_period_mode); -void mlx5e_set_rq_type_params(struct mlx5_core_dev *mdev, - struct mlx5e_params *params, u8 rq_type); +void mlx5e_init_rq_type_params(struct mlx5_core_dev *mdev, + struct mlx5e_params *params, + u8 rq_type); static inline bool mlx5e_tunnel_inner_ft_supported(struct mlx5_core_dev *mdev) { diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en_dcbnl.c b/drivers/net/ethernet/mellanox/mlx5/core/en_dcbnl.c index c6d90b6dd80e..9bcf38f4123b 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/en_dcbnl.c +++ b/drivers/net/ethernet/mellanox/mlx5/core/en_dcbnl.c @@ -274,6 +274,7 @@ int mlx5e_dcbnl_ieee_setets_core(struct mlx5e_priv *priv, struct ieee_ets *ets) static int mlx5e_dbcnl_validate_ets(struct net_device *netdev, struct ieee_ets *ets) { + bool have_ets_tc = false; int bw_sum = 0; int i; @@ -288,11 +289,14 @@ static int mlx5e_dbcnl_validate_ets(struct net_device *netdev, } /* Validate Bandwidth Sum */ - for (i = 0; i < IEEE_8021QAZ_MAX_TCS; i++) - if (ets->tc_tsa[i] == IEEE_8021QAZ_TSA_ETS) + for (i = 0; i < IEEE_8021QAZ_MAX_TCS; i++) { + if (ets->tc_tsa[i] == IEEE_8021QAZ_TSA_ETS) { + have_ets_tc = true; bw_sum += ets->tc_tx_bw[i]; + } + } - if (bw_sum != 0 && bw_sum != 100) { + if (have_ets_tc && bw_sum != 100) { netdev_err(netdev, "Failed to validate ETS: BW sum is illegal\n"); return -EINVAL; diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en_ethtool.c b/drivers/net/ethernet/mellanox/mlx5/core/en_ethtool.c index 23425f028405..8f05efa5c829 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/en_ethtool.c +++ b/drivers/net/ethernet/mellanox/mlx5/core/en_ethtool.c @@ -1523,8 +1523,10 @@ int mlx5e_modify_rx_cqe_compression_locked(struct mlx5e_priv *priv, bool new_val new_channels.params = priv->channels.params; MLX5E_SET_PFLAG(&new_channels.params, MLX5E_PFLAG_RX_CQE_COMPRESS, new_val); - mlx5e_set_rq_type_params(priv->mdev, &new_channels.params, - new_channels.params.rq_wq_type); + new_channels.params.mpwqe_log_stride_sz = + MLX5E_MPWQE_STRIDE_SZ(priv->mdev, new_val); + new_channels.params.mpwqe_log_num_strides = + MLX5_MPWRQ_LOG_WQE_SZ - new_channels.params.mpwqe_log_stride_sz; if (!test_bit(MLX5E_STATE_OPENED, &priv->state)) { priv->channels.params = new_channels.params; @@ -1536,6 +1538,10 @@ int mlx5e_modify_rx_cqe_compression_locked(struct mlx5e_priv *priv, bool new_val return err; mlx5e_switch_priv_channels(priv, &new_channels, NULL); + mlx5e_dbg(DRV, priv, "MLX5E: RxCqeCmprss was turned %s\n", + MLX5E_GET_PFLAG(&priv->channels.params, + MLX5E_PFLAG_RX_CQE_COMPRESS) ? "ON" : "OFF"); + return 0; } diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en_main.c b/drivers/net/ethernet/mellanox/mlx5/core/en_main.c index d2b057a3e512..d9d8227f195f 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/en_main.c +++ b/drivers/net/ethernet/mellanox/mlx5/core/en_main.c @@ -71,11 +71,6 @@ struct mlx5e_channel_param { struct mlx5e_cq_param icosq_cq; }; -static int mlx5e_get_node(struct mlx5e_priv *priv, int ix) -{ - return pci_irq_get_node(priv->mdev->pdev, MLX5_EQ_VEC_COMP_BASE + ix); -} - static bool mlx5e_check_fragmented_striding_rq_cap(struct mlx5_core_dev *mdev) { return MLX5_CAP_GEN(mdev, striding_rq) && @@ -83,8 +78,8 @@ static bool mlx5e_check_fragmented_striding_rq_cap(struct mlx5_core_dev *mdev) MLX5_CAP_ETH(mdev, reg_umr_sq); } -void mlx5e_set_rq_type_params(struct mlx5_core_dev *mdev, - struct mlx5e_params *params, u8 rq_type) +void mlx5e_init_rq_type_params(struct mlx5_core_dev *mdev, + struct mlx5e_params *params, u8 rq_type) { params->rq_wq_type = rq_type; params->lro_wqe_sz = MLX5E_PARAMS_DEFAULT_LRO_WQE_SZ; @@ -93,10 +88,8 @@ void mlx5e_set_rq_type_params(struct mlx5_core_dev *mdev, params->log_rq_size = is_kdump_kernel() ? MLX5E_PARAMS_MINIMUM_LOG_RQ_SIZE_MPW : MLX5E_PARAMS_DEFAULT_LOG_RQ_SIZE_MPW; - params->mpwqe_log_stride_sz = - MLX5E_GET_PFLAG(params, MLX5E_PFLAG_RX_CQE_COMPRESS) ? - MLX5_MPWRQ_CQE_CMPRS_LOG_STRIDE_SZ(mdev) : - MLX5_MPWRQ_DEF_LOG_STRIDE_SZ(mdev); + params->mpwqe_log_stride_sz = MLX5E_MPWQE_STRIDE_SZ(mdev, + MLX5E_GET_PFLAG(params, MLX5E_PFLAG_RX_CQE_COMPRESS)); params->mpwqe_log_num_strides = MLX5_MPWRQ_LOG_WQE_SZ - params->mpwqe_log_stride_sz; break; @@ -120,13 +113,14 @@ void mlx5e_set_rq_type_params(struct mlx5_core_dev *mdev, MLX5E_GET_PFLAG(params, MLX5E_PFLAG_RX_CQE_COMPRESS)); } -static void mlx5e_set_rq_params(struct mlx5_core_dev *mdev, struct mlx5e_params *params) +static void mlx5e_set_rq_params(struct mlx5_core_dev *mdev, + struct mlx5e_params *params) { u8 rq_type = mlx5e_check_fragmented_striding_rq_cap(mdev) && !params->xdp_prog && !MLX5_IPSEC_DEV(mdev) ? MLX5_WQ_TYPE_LINKED_LIST_STRIDING_RQ : MLX5_WQ_TYPE_LINKED_LIST; - mlx5e_set_rq_type_params(mdev, params, rq_type); + mlx5e_init_rq_type_params(mdev, params, rq_type); } static void mlx5e_update_carrier(struct mlx5e_priv *priv) @@ -444,17 +438,16 @@ static int mlx5e_rq_alloc_mpwqe_info(struct mlx5e_rq *rq, int wq_sz = mlx5_wq_ll_get_size(&rq->wq); int mtt_sz = mlx5e_get_wqe_mtt_sz(); int mtt_alloc = mtt_sz + MLX5_UMR_ALIGN - 1; - int node = mlx5e_get_node(c->priv, c->ix); int i; rq->mpwqe.info = kzalloc_node(wq_sz * sizeof(*rq->mpwqe.info), - GFP_KERNEL, node); + GFP_KERNEL, cpu_to_node(c->cpu)); if (!rq->mpwqe.info) goto err_out; /* We allocate more than mtt_sz as we will align the pointer */ - rq->mpwqe.mtt_no_align = kzalloc_node(mtt_alloc * wq_sz, - GFP_KERNEL, node); + rq->mpwqe.mtt_no_align = kzalloc_node(mtt_alloc * wq_sz, GFP_KERNEL, + cpu_to_node(c->cpu)); if (unlikely(!rq->mpwqe.mtt_no_align)) goto err_free_wqe_info; @@ -562,7 +555,7 @@ static int mlx5e_alloc_rq(struct mlx5e_channel *c, int err; int i; - rqp->wq.db_numa_node = mlx5e_get_node(c->priv, c->ix); + rqp->wq.db_numa_node = cpu_to_node(c->cpu); err = mlx5_wq_ll_create(mdev, &rqp->wq, rqc_wq, &rq->wq, &rq->wq_ctrl); @@ -629,8 +622,7 @@ static int mlx5e_alloc_rq(struct mlx5e_channel *c, default: /* MLX5_WQ_TYPE_LINKED_LIST */ rq->wqe.frag_info = kzalloc_node(wq_sz * sizeof(*rq->wqe.frag_info), - GFP_KERNEL, - mlx5e_get_node(c->priv, c->ix)); + GFP_KERNEL, cpu_to_node(c->cpu)); if (!rq->wqe.frag_info) { err = -ENOMEM; goto err_rq_wq_destroy; @@ -1000,13 +992,13 @@ static int mlx5e_alloc_xdpsq(struct mlx5e_channel *c, sq->uar_map = mdev->mlx5e_res.bfreg.map; sq->min_inline_mode = params->tx_min_inline_mode; - param->wq.db_numa_node = mlx5e_get_node(c->priv, c->ix); + param->wq.db_numa_node = cpu_to_node(c->cpu); err = mlx5_wq_cyc_create(mdev, ¶m->wq, sqc_wq, &sq->wq, &sq->wq_ctrl); if (err) return err; sq->wq.db = &sq->wq.db[MLX5_SND_DBR]; - err = mlx5e_alloc_xdpsq_db(sq, mlx5e_get_node(c->priv, c->ix)); + err = mlx5e_alloc_xdpsq_db(sq, cpu_to_node(c->cpu)); if (err) goto err_sq_wq_destroy; @@ -1053,13 +1045,13 @@ static int mlx5e_alloc_icosq(struct mlx5e_channel *c, sq->channel = c; sq->uar_map = mdev->mlx5e_res.bfreg.map; - param->wq.db_numa_node = mlx5e_get_node(c->priv, c->ix); + param->wq.db_numa_node = cpu_to_node(c->cpu); err = mlx5_wq_cyc_create(mdev, ¶m->wq, sqc_wq, &sq->wq, &sq->wq_ctrl); if (err) return err; sq->wq.db = &sq->wq.db[MLX5_SND_DBR]; - err = mlx5e_alloc_icosq_db(sq, mlx5e_get_node(c->priv, c->ix)); + err = mlx5e_alloc_icosq_db(sq, cpu_to_node(c->cpu)); if (err) goto err_sq_wq_destroy; @@ -1126,13 +1118,13 @@ static int mlx5e_alloc_txqsq(struct mlx5e_channel *c, if (MLX5_IPSEC_DEV(c->priv->mdev)) set_bit(MLX5E_SQ_STATE_IPSEC, &sq->state); - param->wq.db_numa_node = mlx5e_get_node(c->priv, c->ix); + param->wq.db_numa_node = cpu_to_node(c->cpu); err = mlx5_wq_cyc_create(mdev, ¶m->wq, sqc_wq, &sq->wq, &sq->wq_ctrl); if (err) return err; sq->wq.db = &sq->wq.db[MLX5_SND_DBR]; - err = mlx5e_alloc_txqsq_db(sq, mlx5e_get_node(c->priv, c->ix)); + err = mlx5e_alloc_txqsq_db(sq, cpu_to_node(c->cpu)); if (err) goto err_sq_wq_destroy; @@ -1504,8 +1496,8 @@ static int mlx5e_alloc_cq(struct mlx5e_channel *c, struct mlx5_core_dev *mdev = c->priv->mdev; int err; - param->wq.buf_numa_node = mlx5e_get_node(c->priv, c->ix); - param->wq.db_numa_node = mlx5e_get_node(c->priv, c->ix); + param->wq.buf_numa_node = cpu_to_node(c->cpu); + param->wq.db_numa_node = cpu_to_node(c->cpu); param->eq_ix = c->ix; err = mlx5e_alloc_cq_common(mdev, param, cq); @@ -1604,6 +1596,11 @@ static void mlx5e_close_cq(struct mlx5e_cq *cq) mlx5e_free_cq(cq); } +static int mlx5e_get_cpu(struct mlx5e_priv *priv, int ix) +{ + return cpumask_first(priv->mdev->priv.irq_info[ix].mask); +} + static int mlx5e_open_tx_cqs(struct mlx5e_channel *c, struct mlx5e_params *params, struct mlx5e_channel_param *cparam) @@ -1752,12 +1749,13 @@ static int mlx5e_open_channel(struct mlx5e_priv *priv, int ix, { struct mlx5e_cq_moder icocq_moder = {0, 0}; struct net_device *netdev = priv->netdev; + int cpu = mlx5e_get_cpu(priv, ix); struct mlx5e_channel *c; unsigned int irq; int err; int eqn; - c = kzalloc_node(sizeof(*c), GFP_KERNEL, mlx5e_get_node(priv, ix)); + c = kzalloc_node(sizeof(*c), GFP_KERNEL, cpu_to_node(cpu)); if (!c) return -ENOMEM; @@ -1765,6 +1763,7 @@ static int mlx5e_open_channel(struct mlx5e_priv *priv, int ix, c->mdev = priv->mdev; c->tstamp = &priv->tstamp; c->ix = ix; + c->cpu = cpu; c->pdev = &priv->mdev->pdev->dev; c->netdev = priv->netdev; c->mkey_be = cpu_to_be32(priv->mdev->mlx5e_res.mkey.key); @@ -1853,8 +1852,7 @@ static void mlx5e_activate_channel(struct mlx5e_channel *c) for (tc = 0; tc < c->num_tc; tc++) mlx5e_activate_txqsq(&c->sq[tc]); mlx5e_activate_rq(&c->rq); - netif_set_xps_queue(c->netdev, - mlx5_get_vector_affinity(c->priv->mdev, c->ix), c->ix); + netif_set_xps_queue(c->netdev, get_cpu_mask(c->cpu), c->ix); } static void mlx5e_deactivate_channel(struct mlx5e_channel *c) @@ -3679,6 +3677,7 @@ static netdev_features_t mlx5e_tunnel_features_check(struct mlx5e_priv *priv, struct sk_buff *skb, netdev_features_t features) { + unsigned int offset = 0; struct udphdr *udph; u8 proto; u16 port; @@ -3688,7 +3687,7 @@ static netdev_features_t mlx5e_tunnel_features_check(struct mlx5e_priv *priv, proto = ip_hdr(skb)->protocol; break; case htons(ETH_P_IPV6): - proto = ipv6_hdr(skb)->nexthdr; + proto = ipv6_find_hdr(skb, &offset, -1, NULL, NULL); break; default: goto out; diff --git a/drivers/net/ethernet/mellanox/mlx5/core/eq.c b/drivers/net/ethernet/mellanox/mlx5/core/eq.c index 60771865c99c..e7e7cef2bde4 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/eq.c +++ b/drivers/net/ethernet/mellanox/mlx5/core/eq.c @@ -466,7 +466,7 @@ static irqreturn_t mlx5_eq_int(int irq, void *eq_ptr) break; case MLX5_EVENT_TYPE_CQ_ERROR: cqn = be32_to_cpu(eqe->data.cq_err.cqn) & 0xffffff; - mlx5_core_warn(dev, "CQ error on CQN 0x%x, syndrom 0x%x\n", + mlx5_core_warn(dev, "CQ error on CQN 0x%x, syndrome 0x%x\n", cqn, eqe->data.cq_err.syndrome); mlx5_cq_event(dev, cqn, eqe->type); break; @@ -775,7 +775,7 @@ err1: return err; } -int mlx5_stop_eqs(struct mlx5_core_dev *dev) +void mlx5_stop_eqs(struct mlx5_core_dev *dev) { struct mlx5_eq_table *table = &dev->priv.eq_table; int err; @@ -784,22 +784,26 @@ int mlx5_stop_eqs(struct mlx5_core_dev *dev) if (MLX5_CAP_GEN(dev, pg)) { err = mlx5_destroy_unmap_eq(dev, &table->pfault_eq); if (err) - return err; + mlx5_core_err(dev, "failed to destroy page fault eq, err(%d)\n", + err); } #endif err = mlx5_destroy_unmap_eq(dev, &table->pages_eq); if (err) - return err; + mlx5_core_err(dev, "failed to destroy pages eq, err(%d)\n", + err); - mlx5_destroy_unmap_eq(dev, &table->async_eq); + err = mlx5_destroy_unmap_eq(dev, &table->async_eq); + if (err) + mlx5_core_err(dev, "failed to destroy async eq, err(%d)\n", + err); mlx5_cmd_use_polling(dev); err = mlx5_destroy_unmap_eq(dev, &table->cmd_eq); if (err) - mlx5_cmd_use_events(dev); - - return err; + mlx5_core_err(dev, "failed to destroy command eq, err(%d)\n", + err); } int mlx5_core_eq_query(struct mlx5_core_dev *dev, struct mlx5_eq *eq, diff --git a/drivers/net/ethernet/mellanox/mlx5/core/fpga/sdk.c b/drivers/net/ethernet/mellanox/mlx5/core/fpga/sdk.c index 3c11d6e2160a..14962969c5ba 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/fpga/sdk.c +++ b/drivers/net/ethernet/mellanox/mlx5/core/fpga/sdk.c @@ -66,6 +66,9 @@ static int mlx5_fpga_mem_read_i2c(struct mlx5_fpga_device *fdev, size_t size, u8 actual_size; int err; + if (!size) + return -EINVAL; + if (!fdev->mdev) return -ENOTCONN; @@ -95,6 +98,9 @@ static int mlx5_fpga_mem_write_i2c(struct mlx5_fpga_device *fdev, size_t size, u8 actual_size; int err; + if (!size) + return -EINVAL; + if (!fdev->mdev) return -ENOTCONN; diff --git a/drivers/net/ethernet/mellanox/mlx5/core/fs_core.c b/drivers/net/ethernet/mellanox/mlx5/core/fs_core.c index c70fd663a633..dfaad9ecb2b8 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/fs_core.c +++ b/drivers/net/ethernet/mellanox/mlx5/core/fs_core.c @@ -174,6 +174,8 @@ static void del_hw_fte(struct fs_node *node); static void del_sw_flow_table(struct fs_node *node); static void del_sw_flow_group(struct fs_node *node); static void del_sw_fte(struct fs_node *node); +static void del_sw_prio(struct fs_node *node); +static void del_sw_ns(struct fs_node *node); /* Delete rule (destination) is special case that * requires to lock the FTE for all the deletion process. */ @@ -408,6 +410,16 @@ static inline struct mlx5_core_dev *get_dev(struct fs_node *node) return NULL; } +static void del_sw_ns(struct fs_node *node) +{ + kfree(node); +} + +static void del_sw_prio(struct fs_node *node) +{ + kfree(node); +} + static void del_hw_flow_table(struct fs_node *node) { struct mlx5_flow_table *ft; @@ -2064,7 +2076,7 @@ static struct fs_prio *fs_create_prio(struct mlx5_flow_namespace *ns, return ERR_PTR(-ENOMEM); fs_prio->node.type = FS_TYPE_PRIO; - tree_init_node(&fs_prio->node, NULL, NULL); + tree_init_node(&fs_prio->node, NULL, del_sw_prio); tree_add_node(&fs_prio->node, &ns->node); fs_prio->num_levels = num_levels; fs_prio->prio = prio; @@ -2090,7 +2102,7 @@ static struct mlx5_flow_namespace *fs_create_namespace(struct fs_prio *prio) return ERR_PTR(-ENOMEM); fs_init_namespace(ns); - tree_init_node(&ns->node, NULL, NULL); + tree_init_node(&ns->node, NULL, del_sw_ns); tree_add_node(&ns->node, &prio->node); list_add_tail(&ns->node.list, &prio->node.children); diff --git a/drivers/net/ethernet/mellanox/mlx5/core/health.c b/drivers/net/ethernet/mellanox/mlx5/core/health.c index 1a0e797ad001..21d29f7936f6 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/health.c +++ b/drivers/net/ethernet/mellanox/mlx5/core/health.c @@ -241,7 +241,7 @@ static void print_health_info(struct mlx5_core_dev *dev) u32 fw; int i; - /* If the syndrom is 0, the device is OK and no need to print buffer */ + /* If the syndrome is 0, the device is OK and no need to print buffer */ if (!ioread8(&h->synd)) return; diff --git a/drivers/net/ethernet/mellanox/mlx5/core/ipoib/ipoib.c b/drivers/net/ethernet/mellanox/mlx5/core/ipoib/ipoib.c index d2a66dc4adc6..8812d7208e8f 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/ipoib/ipoib.c +++ b/drivers/net/ethernet/mellanox/mlx5/core/ipoib/ipoib.c @@ -57,7 +57,7 @@ static void mlx5i_build_nic_params(struct mlx5_core_dev *mdev, struct mlx5e_params *params) { /* Override RQ params as IPoIB supports only LINKED LIST RQ for now */ - mlx5e_set_rq_type_params(mdev, params, MLX5_WQ_TYPE_LINKED_LIST); + mlx5e_init_rq_type_params(mdev, params, MLX5_WQ_TYPE_LINKED_LIST); /* RQ size in ipoib by default is 512 */ params->log_rq_size = is_kdump_kernel() ? diff --git a/drivers/net/ethernet/mellanox/mlx5/core/main.c b/drivers/net/ethernet/mellanox/mlx5/core/main.c index 5f323442cc5a..8a89c7e8cd63 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/main.c +++ b/drivers/net/ethernet/mellanox/mlx5/core/main.c @@ -317,9 +317,6 @@ static int mlx5_alloc_irq_vectors(struct mlx5_core_dev *dev) { struct mlx5_priv *priv = &dev->priv; struct mlx5_eq_table *table = &priv->eq_table; - struct irq_affinity irqdesc = { - .pre_vectors = MLX5_EQ_VEC_COMP_BASE, - }; int num_eqs = 1 << MLX5_CAP_GEN(dev, log_max_eq); int nvec; @@ -333,10 +330,9 @@ static int mlx5_alloc_irq_vectors(struct mlx5_core_dev *dev) if (!priv->irq_info) goto err_free_msix; - nvec = pci_alloc_irq_vectors_affinity(dev->pdev, + nvec = pci_alloc_irq_vectors(dev->pdev, MLX5_EQ_VEC_COMP_BASE + 1, nvec, - PCI_IRQ_MSIX | PCI_IRQ_AFFINITY, - &irqdesc); + PCI_IRQ_MSIX); if (nvec < 0) return nvec; @@ -622,6 +618,63 @@ u64 mlx5_read_internal_timer(struct mlx5_core_dev *dev) return (u64)timer_l | (u64)timer_h1 << 32; } +static int mlx5_irq_set_affinity_hint(struct mlx5_core_dev *mdev, int i) +{ + struct mlx5_priv *priv = &mdev->priv; + int irq = pci_irq_vector(mdev->pdev, MLX5_EQ_VEC_COMP_BASE + i); + + if (!zalloc_cpumask_var(&priv->irq_info[i].mask, GFP_KERNEL)) { + mlx5_core_warn(mdev, "zalloc_cpumask_var failed"); + return -ENOMEM; + } + + cpumask_set_cpu(cpumask_local_spread(i, priv->numa_node), + priv->irq_info[i].mask); + + if (IS_ENABLED(CONFIG_SMP) && + irq_set_affinity_hint(irq, priv->irq_info[i].mask)) + mlx5_core_warn(mdev, "irq_set_affinity_hint failed, irq 0x%.4x", irq); + + return 0; +} + +static void mlx5_irq_clear_affinity_hint(struct mlx5_core_dev *mdev, int i) +{ + struct mlx5_priv *priv = &mdev->priv; + int irq = pci_irq_vector(mdev->pdev, MLX5_EQ_VEC_COMP_BASE + i); + + irq_set_affinity_hint(irq, NULL); + free_cpumask_var(priv->irq_info[i].mask); +} + +static int mlx5_irq_set_affinity_hints(struct mlx5_core_dev *mdev) +{ + int err; + int i; + + for (i = 0; i < mdev->priv.eq_table.num_comp_vectors; i++) { + err = mlx5_irq_set_affinity_hint(mdev, i); + if (err) + goto err_out; + } + + return 0; + +err_out: + for (i--; i >= 0; i--) + mlx5_irq_clear_affinity_hint(mdev, i); + + return err; +} + +static void mlx5_irq_clear_affinity_hints(struct mlx5_core_dev *mdev) +{ + int i; + + for (i = 0; i < mdev->priv.eq_table.num_comp_vectors; i++) + mlx5_irq_clear_affinity_hint(mdev, i); +} + int mlx5_vector2eqn(struct mlx5_core_dev *dev, int vector, int *eqn, unsigned int *irqn) { @@ -1097,6 +1150,12 @@ static int mlx5_load_one(struct mlx5_core_dev *dev, struct mlx5_priv *priv, goto err_stop_eqs; } + err = mlx5_irq_set_affinity_hints(dev); + if (err) { + dev_err(&pdev->dev, "Failed to alloc affinity hint cpumask\n"); + goto err_affinity_hints; + } + err = mlx5_init_fs(dev); if (err) { dev_err(&pdev->dev, "Failed to init flow steering\n"); @@ -1154,6 +1213,9 @@ err_sriov: mlx5_cleanup_fs(dev); err_fs: + mlx5_irq_clear_affinity_hints(dev); + +err_affinity_hints: free_comp_eqs(dev); err_stop_eqs: @@ -1222,6 +1284,7 @@ static int mlx5_unload_one(struct mlx5_core_dev *dev, struct mlx5_priv *priv, mlx5_sriov_detach(dev); mlx5_cleanup_fs(dev); + mlx5_irq_clear_affinity_hints(dev); free_comp_eqs(dev); mlx5_stop_eqs(dev); mlx5_put_uars_page(dev, priv->uar); diff --git a/drivers/net/ethernet/mellanox/mlx5/core/qp.c b/drivers/net/ethernet/mellanox/mlx5/core/qp.c index db9e665ab104..889130edb715 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/qp.c +++ b/drivers/net/ethernet/mellanox/mlx5/core/qp.c @@ -213,8 +213,8 @@ int mlx5_core_create_qp(struct mlx5_core_dev *dev, err_cmd: memset(din, 0, sizeof(din)); memset(dout, 0, sizeof(dout)); - MLX5_SET(destroy_qp_in, in, opcode, MLX5_CMD_OP_DESTROY_QP); - MLX5_SET(destroy_qp_in, in, qpn, qp->qpn); + MLX5_SET(destroy_qp_in, din, opcode, MLX5_CMD_OP_DESTROY_QP); + MLX5_SET(destroy_qp_in, din, qpn, qp->qpn); mlx5_cmd_exec(dev, din, sizeof(din), dout, sizeof(dout)); return err; } diff --git a/drivers/net/ethernet/mellanox/mlx5/core/rl.c b/drivers/net/ethernet/mellanox/mlx5/core/rl.c index e651e4c02867..d3c33e9eea72 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/rl.c +++ b/drivers/net/ethernet/mellanox/mlx5/core/rl.c @@ -125,16 +125,16 @@ static struct mlx5_rl_entry *find_rl_entry(struct mlx5_rl_table *table, return ret_entry; } -static int mlx5_set_rate_limit_cmd(struct mlx5_core_dev *dev, +static int mlx5_set_pp_rate_limit_cmd(struct mlx5_core_dev *dev, u32 rate, u16 index) { - u32 in[MLX5_ST_SZ_DW(set_rate_limit_in)] = {0}; - u32 out[MLX5_ST_SZ_DW(set_rate_limit_out)] = {0}; + u32 in[MLX5_ST_SZ_DW(set_pp_rate_limit_in)] = {0}; + u32 out[MLX5_ST_SZ_DW(set_pp_rate_limit_out)] = {0}; - MLX5_SET(set_rate_limit_in, in, opcode, - MLX5_CMD_OP_SET_RATE_LIMIT); - MLX5_SET(set_rate_limit_in, in, rate_limit_index, index); - MLX5_SET(set_rate_limit_in, in, rate_limit, rate); + MLX5_SET(set_pp_rate_limit_in, in, opcode, + MLX5_CMD_OP_SET_PP_RATE_LIMIT); + MLX5_SET(set_pp_rate_limit_in, in, rate_limit_index, index); + MLX5_SET(set_pp_rate_limit_in, in, rate_limit, rate); return mlx5_cmd_exec(dev, in, sizeof(in), out, sizeof(out)); } @@ -173,7 +173,7 @@ int mlx5_rl_add_rate(struct mlx5_core_dev *dev, u32 rate, u16 *index) entry->refcount++; } else { /* new rate limit */ - err = mlx5_set_rate_limit_cmd(dev, rate, entry->index); + err = mlx5_set_pp_rate_limit_cmd(dev, rate, entry->index); if (err) { mlx5_core_err(dev, "Failed configuring rate: %u (%d)\n", rate, err); @@ -209,7 +209,7 @@ void mlx5_rl_remove_rate(struct mlx5_core_dev *dev, u32 rate) entry->refcount--; if (!entry->refcount) { /* need to remove rate */ - mlx5_set_rate_limit_cmd(dev, 0, entry->index); + mlx5_set_pp_rate_limit_cmd(dev, 0, entry->index); entry->rate = 0; } @@ -262,8 +262,8 @@ void mlx5_cleanup_rl_table(struct mlx5_core_dev *dev) /* Clear all configured rates */ for (i = 0; i < table->max_size; i++) if (table->rl_entry[i].rate) - mlx5_set_rate_limit_cmd(dev, 0, - table->rl_entry[i].index); + mlx5_set_pp_rate_limit_cmd(dev, 0, + table->rl_entry[i].index); kfree(dev->priv.rl_table.rl_entry); } diff --git a/drivers/net/ethernet/mellanox/mlx5/core/vxlan.c b/drivers/net/ethernet/mellanox/mlx5/core/vxlan.c index 07a9ba6cfc70..2f74953e4561 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/vxlan.c +++ b/drivers/net/ethernet/mellanox/mlx5/core/vxlan.c @@ -71,9 +71,9 @@ struct mlx5e_vxlan *mlx5e_vxlan_lookup_port(struct mlx5e_priv *priv, u16 port) struct mlx5e_vxlan_db *vxlan_db = &priv->vxlan; struct mlx5e_vxlan *vxlan; - spin_lock(&vxlan_db->lock); + spin_lock_bh(&vxlan_db->lock); vxlan = radix_tree_lookup(&vxlan_db->tree, port); - spin_unlock(&vxlan_db->lock); + spin_unlock_bh(&vxlan_db->lock); return vxlan; } @@ -88,8 +88,12 @@ static void mlx5e_vxlan_add_port(struct work_struct *work) struct mlx5e_vxlan *vxlan; int err; - if (mlx5e_vxlan_lookup_port(priv, port)) + mutex_lock(&priv->state_lock); + vxlan = mlx5e_vxlan_lookup_port(priv, port); + if (vxlan) { + atomic_inc(&vxlan->refcount); goto free_work; + } if (mlx5e_vxlan_core_add_port_cmd(priv->mdev, port)) goto free_work; @@ -99,10 +103,11 @@ static void mlx5e_vxlan_add_port(struct work_struct *work) goto err_delete_port; vxlan->udp_port = port; + atomic_set(&vxlan->refcount, 1); - spin_lock_irq(&vxlan_db->lock); + spin_lock_bh(&vxlan_db->lock); err = radix_tree_insert(&vxlan_db->tree, vxlan->udp_port, vxlan); - spin_unlock_irq(&vxlan_db->lock); + spin_unlock_bh(&vxlan_db->lock); if (err) goto err_free; @@ -113,35 +118,39 @@ err_free: err_delete_port: mlx5e_vxlan_core_del_port_cmd(priv->mdev, port); free_work: + mutex_unlock(&priv->state_lock); kfree(vxlan_work); } -static void __mlx5e_vxlan_core_del_port(struct mlx5e_priv *priv, u16 port) +static void mlx5e_vxlan_del_port(struct work_struct *work) { + struct mlx5e_vxlan_work *vxlan_work = + container_of(work, struct mlx5e_vxlan_work, work); + struct mlx5e_priv *priv = vxlan_work->priv; struct mlx5e_vxlan_db *vxlan_db = &priv->vxlan; + u16 port = vxlan_work->port; struct mlx5e_vxlan *vxlan; + bool remove = false; - spin_lock_irq(&vxlan_db->lock); - vxlan = radix_tree_delete(&vxlan_db->tree, port); - spin_unlock_irq(&vxlan_db->lock); - + mutex_lock(&priv->state_lock); + spin_lock_bh(&vxlan_db->lock); + vxlan = radix_tree_lookup(&vxlan_db->tree, port); if (!vxlan) - return; - - mlx5e_vxlan_core_del_port_cmd(priv->mdev, vxlan->udp_port); - - kfree(vxlan); -} + goto out_unlock; -static void mlx5e_vxlan_del_port(struct work_struct *work) -{ - struct mlx5e_vxlan_work *vxlan_work = - container_of(work, struct mlx5e_vxlan_work, work); - struct mlx5e_priv *priv = vxlan_work->priv; - u16 port = vxlan_work->port; + if (atomic_dec_and_test(&vxlan->refcount)) { + radix_tree_delete(&vxlan_db->tree, port); + remove = true; + } - __mlx5e_vxlan_core_del_port(priv, port); +out_unlock: + spin_unlock_bh(&vxlan_db->lock); + if (remove) { + mlx5e_vxlan_core_del_port_cmd(priv->mdev, port); + kfree(vxlan); + } + mutex_unlock(&priv->state_lock); kfree(vxlan_work); } @@ -171,12 +180,11 @@ void mlx5e_vxlan_cleanup(struct mlx5e_priv *priv) struct mlx5e_vxlan *vxlan; unsigned int port = 0; - spin_lock_irq(&vxlan_db->lock); + /* Lockless since we are the only radix-tree consumers, wq is disabled */ while (radix_tree_gang_lookup(&vxlan_db->tree, (void **)&vxlan, port, 1)) { port = vxlan->udp_port; - spin_unlock_irq(&vxlan_db->lock); - __mlx5e_vxlan_core_del_port(priv, (u16)port); - spin_lock_irq(&vxlan_db->lock); + radix_tree_delete(&vxlan_db->tree, port); + mlx5e_vxlan_core_del_port_cmd(priv->mdev, port); + kfree(vxlan); } - spin_unlock_irq(&vxlan_db->lock); } diff --git a/drivers/net/ethernet/mellanox/mlx5/core/vxlan.h b/drivers/net/ethernet/mellanox/mlx5/core/vxlan.h index 5def12c048e3..5ef6ae7d568a 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/vxlan.h +++ b/drivers/net/ethernet/mellanox/mlx5/core/vxlan.h @@ -36,6 +36,7 @@ #include "en.h" struct mlx5e_vxlan { + atomic_t refcount; u16 udp_port; }; diff --git a/drivers/net/ethernet/mellanox/mlxsw/spectrum_router.c b/drivers/net/ethernet/mellanox/mlxsw/spectrum_router.c index 72ef4f8025f0..be657b8533f0 100644 --- a/drivers/net/ethernet/mellanox/mlxsw/spectrum_router.c +++ b/drivers/net/ethernet/mellanox/mlxsw/spectrum_router.c @@ -2436,25 +2436,16 @@ static void mlxsw_sp_neigh_fini(struct mlxsw_sp *mlxsw_sp) rhashtable_destroy(&mlxsw_sp->router->neigh_ht); } -static int mlxsw_sp_neigh_rif_flush(struct mlxsw_sp *mlxsw_sp, - const struct mlxsw_sp_rif *rif) -{ - char rauht_pl[MLXSW_REG_RAUHT_LEN]; - - mlxsw_reg_rauht_pack(rauht_pl, MLXSW_REG_RAUHT_OP_WRITE_DELETE_ALL, - rif->rif_index, rif->addr); - return mlxsw_reg_write(mlxsw_sp->core, MLXSW_REG(rauht), rauht_pl); -} - static void mlxsw_sp_neigh_rif_gone_sync(struct mlxsw_sp *mlxsw_sp, struct mlxsw_sp_rif *rif) { struct mlxsw_sp_neigh_entry *neigh_entry, *tmp; - mlxsw_sp_neigh_rif_flush(mlxsw_sp, rif); list_for_each_entry_safe(neigh_entry, tmp, &rif->neigh_list, - rif_list_node) + rif_list_node) { + mlxsw_sp_neigh_entry_update(mlxsw_sp, neigh_entry, false); mlxsw_sp_neigh_entry_destroy(mlxsw_sp, neigh_entry); + } } enum mlxsw_sp_nexthop_type { diff --git a/drivers/net/ethernet/netronome/nfp/bpf/main.c b/drivers/net/ethernet/netronome/nfp/bpf/main.c index e379b78e86ef..13190aa09faf 100644 --- a/drivers/net/ethernet/netronome/nfp/bpf/main.c +++ b/drivers/net/ethernet/netronome/nfp/bpf/main.c @@ -82,10 +82,33 @@ static const char *nfp_bpf_extra_cap(struct nfp_app *app, struct nfp_net *nn) return nfp_net_ebpf_capable(nn) ? "BPF" : ""; } +static int +nfp_bpf_vnic_alloc(struct nfp_app *app, struct nfp_net *nn, unsigned int id) +{ + int err; + + nn->app_priv = kzalloc(sizeof(struct nfp_bpf_vnic), GFP_KERNEL); + if (!nn->app_priv) + return -ENOMEM; + + err = nfp_app_nic_vnic_alloc(app, nn, id); + if (err) + goto err_free_priv; + + return 0; +err_free_priv: + kfree(nn->app_priv); + return err; +} + static void nfp_bpf_vnic_free(struct nfp_app *app, struct nfp_net *nn) { + struct nfp_bpf_vnic *bv = nn->app_priv; + if (nn->dp.bpf_offload_xdp) nfp_bpf_xdp_offload(app, nn, NULL); + WARN_ON(bv->tc_prog); + kfree(bv); } static int nfp_bpf_setup_tc_block_cb(enum tc_setup_type type, @@ -93,6 +116,9 @@ static int nfp_bpf_setup_tc_block_cb(enum tc_setup_type type, { struct tc_cls_bpf_offload *cls_bpf = type_data; struct nfp_net *nn = cb_priv; + struct bpf_prog *oldprog; + struct nfp_bpf_vnic *bv; + int err; if (type != TC_SETUP_CLSBPF || !tc_can_offload(nn->dp.netdev) || @@ -100,8 +126,6 @@ static int nfp_bpf_setup_tc_block_cb(enum tc_setup_type type, cls_bpf->common.protocol != htons(ETH_P_ALL) || cls_bpf->common.chain_index) return -EOPNOTSUPP; - if (nn->dp.bpf_offload_xdp) - return -EBUSY; /* Only support TC direct action */ if (!cls_bpf->exts_integrated || @@ -110,16 +134,25 @@ static int nfp_bpf_setup_tc_block_cb(enum tc_setup_type type, return -EOPNOTSUPP; } - switch (cls_bpf->command) { - case TC_CLSBPF_REPLACE: - return nfp_net_bpf_offload(nn, cls_bpf->prog, true); - case TC_CLSBPF_ADD: - return nfp_net_bpf_offload(nn, cls_bpf->prog, false); - case TC_CLSBPF_DESTROY: - return nfp_net_bpf_offload(nn, NULL, true); - default: + if (cls_bpf->command != TC_CLSBPF_OFFLOAD) return -EOPNOTSUPP; + + bv = nn->app_priv; + oldprog = cls_bpf->oldprog; + + /* Don't remove if oldprog doesn't match driver's state */ + if (bv->tc_prog != oldprog) { + oldprog = NULL; + if (!cls_bpf->prog) + return 0; } + + err = nfp_net_bpf_offload(nn, cls_bpf->prog, oldprog); + if (err) + return err; + + bv->tc_prog = cls_bpf->prog; + return 0; } static int nfp_bpf_setup_tc_block(struct net_device *netdev, @@ -167,7 +200,7 @@ const struct nfp_app_type app_bpf = { .extra_cap = nfp_bpf_extra_cap, - .vnic_alloc = nfp_app_nic_vnic_alloc, + .vnic_alloc = nfp_bpf_vnic_alloc, .vnic_free = nfp_bpf_vnic_free, .setup_tc = nfp_bpf_setup_tc, diff --git a/drivers/net/ethernet/netronome/nfp/bpf/main.h b/drivers/net/ethernet/netronome/nfp/bpf/main.h index 082a15f6dfb5..57b6043177a3 100644 --- a/drivers/net/ethernet/netronome/nfp/bpf/main.h +++ b/drivers/net/ethernet/netronome/nfp/bpf/main.h @@ -172,6 +172,14 @@ struct nfp_prog { struct list_head insns; }; +/** + * struct nfp_bpf_vnic - per-vNIC BPF priv structure + * @tc_prog: currently loaded cls_bpf program + */ +struct nfp_bpf_vnic { + struct bpf_prog *tc_prog; +}; + int nfp_bpf_jit(struct nfp_prog *prog); extern const struct bpf_ext_analyzer_ops nfp_bpf_analyzer_ops; diff --git a/drivers/net/ethernet/qualcomm/emac/emac.c b/drivers/net/ethernet/qualcomm/emac/emac.c index 70c92b649b29..38c924bdd32e 100644 --- a/drivers/net/ethernet/qualcomm/emac/emac.c +++ b/drivers/net/ethernet/qualcomm/emac/emac.c @@ -253,18 +253,18 @@ static int emac_open(struct net_device *netdev) return ret; } - ret = emac_mac_up(adpt); + ret = adpt->phy.open(adpt); if (ret) { emac_mac_rx_tx_rings_free_all(adpt); free_irq(irq->irq, irq); return ret; } - ret = adpt->phy.open(adpt); + ret = emac_mac_up(adpt); if (ret) { - emac_mac_down(adpt); emac_mac_rx_tx_rings_free_all(adpt); free_irq(irq->irq, irq); + adpt->phy.close(adpt); return ret; } diff --git a/drivers/net/ethernet/stmicro/stmmac/common.h b/drivers/net/ethernet/stmicro/stmmac/common.h index e1e5ac053760..ce2ea2d491ac 100644 --- a/drivers/net/ethernet/stmicro/stmmac/common.h +++ b/drivers/net/ethernet/stmicro/stmmac/common.h @@ -409,7 +409,7 @@ struct stmmac_desc_ops { /* get timestamp value */ u64(*get_timestamp) (void *desc, u32 ats); /* get rx timestamp status */ - int (*get_rx_timestamp_status) (void *desc, u32 ats); + int (*get_rx_timestamp_status)(void *desc, void *next_desc, u32 ats); /* Display ring */ void (*display_ring)(void *head, unsigned int size, bool rx); /* set MSS via context descriptor */ diff --git a/drivers/net/ethernet/stmicro/stmmac/dwmac4_descs.c b/drivers/net/ethernet/stmicro/stmmac/dwmac4_descs.c index 4b286e27c4ca..7e089bf906b4 100644 --- a/drivers/net/ethernet/stmicro/stmmac/dwmac4_descs.c +++ b/drivers/net/ethernet/stmicro/stmmac/dwmac4_descs.c @@ -258,7 +258,8 @@ static int dwmac4_rx_check_timestamp(void *desc) return ret; } -static int dwmac4_wrback_get_rx_timestamp_status(void *desc, u32 ats) +static int dwmac4_wrback_get_rx_timestamp_status(void *desc, void *next_desc, + u32 ats) { struct dma_desc *p = (struct dma_desc *)desc; int ret = -EINVAL; @@ -270,7 +271,7 @@ static int dwmac4_wrback_get_rx_timestamp_status(void *desc, u32 ats) /* Check if timestamp is OK from context descriptor */ do { - ret = dwmac4_rx_check_timestamp(desc); + ret = dwmac4_rx_check_timestamp(next_desc); if (ret < 0) goto exit; i++; diff --git a/drivers/net/ethernet/stmicro/stmmac/enh_desc.c b/drivers/net/ethernet/stmicro/stmmac/enh_desc.c index 7546b3664113..2a828a312814 100644 --- a/drivers/net/ethernet/stmicro/stmmac/enh_desc.c +++ b/drivers/net/ethernet/stmicro/stmmac/enh_desc.c @@ -400,7 +400,8 @@ static u64 enh_desc_get_timestamp(void *desc, u32 ats) return ns; } -static int enh_desc_get_rx_timestamp_status(void *desc, u32 ats) +static int enh_desc_get_rx_timestamp_status(void *desc, void *next_desc, + u32 ats) { if (ats) { struct dma_extended_desc *p = (struct dma_extended_desc *)desc; diff --git a/drivers/net/ethernet/stmicro/stmmac/norm_desc.c b/drivers/net/ethernet/stmicro/stmmac/norm_desc.c index f817f8f36569..db4cee57bb24 100644 --- a/drivers/net/ethernet/stmicro/stmmac/norm_desc.c +++ b/drivers/net/ethernet/stmicro/stmmac/norm_desc.c @@ -265,7 +265,7 @@ static u64 ndesc_get_timestamp(void *desc, u32 ats) return ns; } -static int ndesc_get_rx_timestamp_status(void *desc, u32 ats) +static int ndesc_get_rx_timestamp_status(void *desc, void *next_desc, u32 ats) { struct dma_desc *p = (struct dma_desc *)desc; diff --git a/drivers/net/ethernet/stmicro/stmmac/stmmac_hwtstamp.c b/drivers/net/ethernet/stmicro/stmmac/stmmac_hwtstamp.c index 721b61655261..08c19ebd5306 100644 --- a/drivers/net/ethernet/stmicro/stmmac/stmmac_hwtstamp.c +++ b/drivers/net/ethernet/stmicro/stmmac/stmmac_hwtstamp.c @@ -34,6 +34,7 @@ static u32 stmmac_config_sub_second_increment(void __iomem *ioaddr, { u32 value = readl(ioaddr + PTP_TCR); unsigned long data; + u32 reg_value; /* For GMAC3.x, 4.x versions, convert the ptp_clock to nano second * formula = (1/ptp_clock) * 1000000000 @@ -50,10 +51,11 @@ static u32 stmmac_config_sub_second_increment(void __iomem *ioaddr, data &= PTP_SSIR_SSINC_MASK; + reg_value = data; if (gmac4) - data = data << GMAC4_PTP_SSIR_SSINC_SHIFT; + reg_value <<= GMAC4_PTP_SSIR_SSINC_SHIFT; - writel(data, ioaddr + PTP_SSIR); + writel(reg_value, ioaddr + PTP_SSIR); return data; } diff --git a/drivers/net/ethernet/stmicro/stmmac/stmmac_main.c b/drivers/net/ethernet/stmicro/stmmac/stmmac_main.c index d7250539d0bd..337d53d12e94 100644 --- a/drivers/net/ethernet/stmicro/stmmac/stmmac_main.c +++ b/drivers/net/ethernet/stmicro/stmmac/stmmac_main.c @@ -482,7 +482,7 @@ static void stmmac_get_rx_hwtstamp(struct stmmac_priv *priv, struct dma_desc *p, desc = np; /* Check if timestamp is available */ - if (priv->hw->desc->get_rx_timestamp_status(desc, priv->adv_ts)) { + if (priv->hw->desc->get_rx_timestamp_status(p, np, priv->adv_ts)) { ns = priv->hw->desc->get_timestamp(desc, priv->adv_ts); netdev_dbg(priv->dev, "get valid RX hw timestamp %llu\n", ns); shhwtstamp = skb_hwtstamps(skb); diff --git a/drivers/net/phy/marvell.c b/drivers/net/phy/marvell.c index b5a8f750e433..82104edca393 100644 --- a/drivers/net/phy/marvell.c +++ b/drivers/net/phy/marvell.c @@ -879,6 +879,8 @@ static int m88e1510_config_init(struct phy_device *phydev) /* SGMII-to-Copper mode initialization */ if (phydev->interface == PHY_INTERFACE_MODE_SGMII) { + u32 pause; + /* Select page 18 */ err = marvell_set_page(phydev, 18); if (err < 0) @@ -902,6 +904,16 @@ static int m88e1510_config_init(struct phy_device *phydev) err = marvell_set_page(phydev, MII_MARVELL_COPPER_PAGE); if (err < 0) return err; + + /* There appears to be a bug in the 88e1512 when used in + * SGMII to copper mode, where the AN advertisment register + * clears the pause bits each time a negotiation occurs. + * This means we can never be truely sure what was advertised, + * so disable Pause support. + */ + pause = SUPPORTED_Pause | SUPPORTED_Asym_Pause; + phydev->supported &= ~pause; + phydev->advertising &= ~pause; } return m88e1121_config_init(phydev); @@ -2073,7 +2085,7 @@ static struct phy_driver marvell_drivers[] = { .flags = PHY_HAS_INTERRUPT, .probe = marvell_probe, .config_init = &m88e1145_config_init, - .config_aneg = &marvell_config_aneg, + .config_aneg = &m88e1101_config_aneg, .read_status = &genphy_read_status, .ack_interrupt = &marvell_ack_interrupt, .config_intr = &marvell_config_intr, diff --git a/drivers/net/phy/mdio-xgene.c b/drivers/net/phy/mdio-xgene.c index bfd3090fb055..07c6048200c6 100644 --- a/drivers/net/phy/mdio-xgene.c +++ b/drivers/net/phy/mdio-xgene.c @@ -194,8 +194,11 @@ static int xgene_mdio_reset(struct xgene_mdio_pdata *pdata) } ret = xgene_enet_ecc_init(pdata); - if (ret) + if (ret) { + if (pdata->dev->of_node) + clk_disable_unprepare(pdata->clk); return ret; + } xgene_gmac_reset(pdata); return 0; @@ -388,8 +391,10 @@ static int xgene_mdio_probe(struct platform_device *pdev) return ret; mdio_bus = mdiobus_alloc(); - if (!mdio_bus) - return -ENOMEM; + if (!mdio_bus) { + ret = -ENOMEM; + goto out_clk; + } mdio_bus->name = "APM X-Gene MDIO bus"; @@ -418,7 +423,7 @@ static int xgene_mdio_probe(struct platform_device *pdev) mdio_bus->phy_mask = ~0; ret = mdiobus_register(mdio_bus); if (ret) - goto out; + goto out_mdiobus; acpi_walk_namespace(ACPI_TYPE_DEVICE, ACPI_HANDLE(dev), 1, acpi_register_phy, NULL, mdio_bus, NULL); @@ -426,16 +431,20 @@ static int xgene_mdio_probe(struct platform_device *pdev) } if (ret) - goto out; + goto out_mdiobus; pdata->mdio_bus = mdio_bus; xgene_mdio_status = true; return 0; -out: +out_mdiobus: mdiobus_free(mdio_bus); +out_clk: + if (dev->of_node) + clk_disable_unprepare(pdata->clk); + return ret; } diff --git a/drivers/net/vxlan.c b/drivers/net/vxlan.c index 19b9cc51079e..31f4b7911ef8 100644 --- a/drivers/net/vxlan.c +++ b/drivers/net/vxlan.c @@ -2155,6 +2155,13 @@ static void vxlan_xmit_one(struct sk_buff *skb, struct net_device *dev, } ndst = &rt->dst; + if (skb_dst(skb)) { + int mtu = dst_mtu(ndst) - VXLAN_HEADROOM; + + skb_dst(skb)->ops->update_pmtu(skb_dst(skb), NULL, + skb, mtu); + } + tos = ip_tunnel_ecn_encap(tos, old_iph, skb); ttl = ttl ? : ip4_dst_hoplimit(&rt->dst); err = vxlan_build_skb(skb, ndst, sizeof(struct iphdr), @@ -2190,6 +2197,13 @@ static void vxlan_xmit_one(struct sk_buff *skb, struct net_device *dev, goto out_unlock; } + if (skb_dst(skb)) { + int mtu = dst_mtu(ndst) - VXLAN6_HEADROOM; + + skb_dst(skb)->ops->update_pmtu(skb_dst(skb), NULL, + skb, mtu); + } + tos = ip_tunnel_ecn_encap(tos, old_iph, skb); ttl = ttl ? : ip6_dst_hoplimit(ndst); skb_scrub_packet(skb, xnet); @@ -3103,6 +3117,11 @@ static void vxlan_config_apply(struct net_device *dev, max_mtu = lowerdev->mtu - (use_ipv6 ? VXLAN6_HEADROOM : VXLAN_HEADROOM); + if (max_mtu < ETH_MIN_MTU) + max_mtu = ETH_MIN_MTU; + + if (!changelink && !conf->mtu) + dev->mtu = max_mtu; } if (dev->mtu > max_mtu) diff --git a/drivers/net/wireless/mac80211_hwsim.c b/drivers/net/wireless/mac80211_hwsim.c index 10b075a46b26..e8189c07b41f 100644 --- a/drivers/net/wireless/mac80211_hwsim.c +++ b/drivers/net/wireless/mac80211_hwsim.c @@ -684,6 +684,7 @@ static void hwsim_send_nullfunc(struct mac80211_hwsim_data *data, u8 *mac, hdr = skb_put(skb, sizeof(*hdr) - ETH_ALEN); hdr->frame_control = cpu_to_le16(IEEE80211_FTYPE_DATA | IEEE80211_STYPE_NULLFUNC | + IEEE80211_FCTL_TODS | (ps ? IEEE80211_FCTL_PM : 0)); hdr->duration_id = cpu_to_le16(0); memcpy(hdr->addr1, vp->bssid, ETH_ALEN); @@ -3215,7 +3216,7 @@ static int hwsim_get_radio_nl(struct sk_buff *msg, struct genl_info *info) if (!net_eq(wiphy_net(data->hw->wiphy), genl_info_net(info))) continue; - skb = nlmsg_new(NLMSG_DEFAULT_SIZE, GFP_KERNEL); + skb = nlmsg_new(NLMSG_DEFAULT_SIZE, GFP_ATOMIC); if (!skb) { res = -ENOMEM; goto out_err; diff --git a/drivers/nvdimm/btt.c b/drivers/nvdimm/btt.c index e949e3302af4..c586bcdb5190 100644 --- a/drivers/nvdimm/btt.c +++ b/drivers/nvdimm/btt.c @@ -211,12 +211,12 @@ static int btt_map_read(struct arena_info *arena, u32 lba, u32 *mapping, return ret; } -static int btt_log_read_pair(struct arena_info *arena, u32 lane, - struct log_entry *ent) +static int btt_log_group_read(struct arena_info *arena, u32 lane, + struct log_group *log) { return arena_read_bytes(arena, - arena->logoff + (2 * lane * LOG_ENT_SIZE), ent, - 2 * LOG_ENT_SIZE, 0); + arena->logoff + (lane * LOG_GRP_SIZE), log, + LOG_GRP_SIZE, 0); } static struct dentry *debugfs_root; @@ -256,6 +256,8 @@ static void arena_debugfs_init(struct arena_info *a, struct dentry *parent, debugfs_create_x64("logoff", S_IRUGO, d, &a->logoff); debugfs_create_x64("info2off", S_IRUGO, d, &a->info2off); debugfs_create_x32("flags", S_IRUGO, d, &a->flags); + debugfs_create_u32("log_index_0", S_IRUGO, d, &a->log_index[0]); + debugfs_create_u32("log_index_1", S_IRUGO, d, &a->log_index[1]); } static void btt_debugfs_init(struct btt *btt) @@ -274,6 +276,11 @@ static void btt_debugfs_init(struct btt *btt) } } +static u32 log_seq(struct log_group *log, int log_idx) +{ + return le32_to_cpu(log->ent[log_idx].seq); +} + /* * This function accepts two log entries, and uses the * sequence number to find the 'older' entry. @@ -283,8 +290,10 @@ static void btt_debugfs_init(struct btt *btt) * * TODO The logic feels a bit kludge-y. make it better.. */ -static int btt_log_get_old(struct log_entry *ent) +static int btt_log_get_old(struct arena_info *a, struct log_group *log) { + int idx0 = a->log_index[0]; + int idx1 = a->log_index[1]; int old; /* @@ -292,23 +301,23 @@ static int btt_log_get_old(struct log_entry *ent) * the next time, the following logic works out to put this * (next) entry into [1] */ - if (ent[0].seq == 0) { - ent[0].seq = cpu_to_le32(1); + if (log_seq(log, idx0) == 0) { + log->ent[idx0].seq = cpu_to_le32(1); return 0; } - if (ent[0].seq == ent[1].seq) + if (log_seq(log, idx0) == log_seq(log, idx1)) return -EINVAL; - if (le32_to_cpu(ent[0].seq) + le32_to_cpu(ent[1].seq) > 5) + if (log_seq(log, idx0) + log_seq(log, idx1) > 5) return -EINVAL; - if (le32_to_cpu(ent[0].seq) < le32_to_cpu(ent[1].seq)) { - if (le32_to_cpu(ent[1].seq) - le32_to_cpu(ent[0].seq) == 1) + if (log_seq(log, idx0) < log_seq(log, idx1)) { + if ((log_seq(log, idx1) - log_seq(log, idx0)) == 1) old = 0; else old = 1; } else { - if (le32_to_cpu(ent[0].seq) - le32_to_cpu(ent[1].seq) == 1) + if ((log_seq(log, idx0) - log_seq(log, idx1)) == 1) old = 1; else old = 0; @@ -328,17 +337,18 @@ static int btt_log_read(struct arena_info *arena, u32 lane, { int ret; int old_ent, ret_ent; - struct log_entry log[2]; + struct log_group log; - ret = btt_log_read_pair(arena, lane, log); + ret = btt_log_group_read(arena, lane, &log); if (ret) return -EIO; - old_ent = btt_log_get_old(log); + old_ent = btt_log_get_old(arena, &log); if (old_ent < 0 || old_ent > 1) { dev_err(to_dev(arena), "log corruption (%d): lane %d seq [%d, %d]\n", - old_ent, lane, log[0].seq, log[1].seq); + old_ent, lane, log.ent[arena->log_index[0]].seq, + log.ent[arena->log_index[1]].seq); /* TODO set error state? */ return -EIO; } @@ -346,7 +356,7 @@ static int btt_log_read(struct arena_info *arena, u32 lane, ret_ent = (old_flag ? old_ent : (1 - old_ent)); if (ent != NULL) - memcpy(ent, &log[ret_ent], LOG_ENT_SIZE); + memcpy(ent, &log.ent[arena->log_index[ret_ent]], LOG_ENT_SIZE); return ret_ent; } @@ -360,17 +370,13 @@ static int __btt_log_write(struct arena_info *arena, u32 lane, u32 sub, struct log_entry *ent, unsigned long flags) { int ret; - /* - * Ignore the padding in log_entry for calculating log_half. - * The entry is 'committed' when we write the sequence number, - * and we want to ensure that that is the last thing written. - * We don't bother writing the padding as that would be extra - * media wear and write amplification - */ - unsigned int log_half = (LOG_ENT_SIZE - 2 * sizeof(u64)) / 2; - u64 ns_off = arena->logoff + (((2 * lane) + sub) * LOG_ENT_SIZE); + u32 group_slot = arena->log_index[sub]; + unsigned int log_half = LOG_ENT_SIZE / 2; void *src = ent; + u64 ns_off; + ns_off = arena->logoff + (lane * LOG_GRP_SIZE) + + (group_slot * LOG_ENT_SIZE); /* split the 16B write into atomic, durable halves */ ret = arena_write_bytes(arena, ns_off, src, log_half, flags); if (ret) @@ -453,7 +459,7 @@ static int btt_log_init(struct arena_info *arena) { size_t logsize = arena->info2off - arena->logoff; size_t chunk_size = SZ_4K, offset = 0; - struct log_entry log; + struct log_entry ent; void *zerobuf; int ret; u32 i; @@ -485,11 +491,11 @@ static int btt_log_init(struct arena_info *arena) } for (i = 0; i < arena->nfree; i++) { - log.lba = cpu_to_le32(i); - log.old_map = cpu_to_le32(arena->external_nlba + i); - log.new_map = cpu_to_le32(arena->external_nlba + i); - log.seq = cpu_to_le32(LOG_SEQ_INIT); - ret = __btt_log_write(arena, i, 0, &log, 0); + ent.lba = cpu_to_le32(i); + ent.old_map = cpu_to_le32(arena->external_nlba + i); + ent.new_map = cpu_to_le32(arena->external_nlba + i); + ent.seq = cpu_to_le32(LOG_SEQ_INIT); + ret = __btt_log_write(arena, i, 0, &ent, 0); if (ret) goto free; } @@ -594,6 +600,123 @@ static int btt_freelist_init(struct arena_info *arena) return 0; } +static bool ent_is_padding(struct log_entry *ent) +{ + return (ent->lba == 0) && (ent->old_map == 0) && (ent->new_map == 0) + && (ent->seq == 0); +} + +/* + * Detecting valid log indices: We read a log group (see the comments in btt.h + * for a description of a 'log_group' and its 'slots'), and iterate over its + * four slots. We expect that a padding slot will be all-zeroes, and use this + * to detect a padding slot vs. an actual entry. + * + * If a log_group is in the initial state, i.e. hasn't been used since the + * creation of this BTT layout, it will have three of the four slots with + * zeroes. We skip over these log_groups for the detection of log_index. If + * all log_groups are in the initial state (i.e. the BTT has never been + * written to), it is safe to assume the 'new format' of log entries in slots + * (0, 1). + */ +static int log_set_indices(struct arena_info *arena) +{ + bool idx_set = false, initial_state = true; + int ret, log_index[2] = {-1, -1}; + u32 i, j, next_idx = 0; + struct log_group log; + u32 pad_count = 0; + + for (i = 0; i < arena->nfree; i++) { + ret = btt_log_group_read(arena, i, &log); + if (ret < 0) + return ret; + + for (j = 0; j < 4; j++) { + if (!idx_set) { + if (ent_is_padding(&log.ent[j])) { + pad_count++; + continue; + } else { + /* Skip if index has been recorded */ + if ((next_idx == 1) && + (j == log_index[0])) + continue; + /* valid entry, record index */ + log_index[next_idx] = j; + next_idx++; + } + if (next_idx == 2) { + /* two valid entries found */ + idx_set = true; + } else if (next_idx > 2) { + /* too many valid indices */ + return -ENXIO; + } + } else { + /* + * once the indices have been set, just verify + * that all subsequent log groups are either in + * their initial state or follow the same + * indices. + */ + if (j == log_index[0]) { + /* entry must be 'valid' */ + if (ent_is_padding(&log.ent[j])) + return -ENXIO; + } else if (j == log_index[1]) { + ; + /* + * log_index[1] can be padding if the + * lane never got used and it is still + * in the initial state (three 'padding' + * entries) + */ + } else { + /* entry must be invalid (padding) */ + if (!ent_is_padding(&log.ent[j])) + return -ENXIO; + } + } + } + /* + * If any of the log_groups have more than one valid, + * non-padding entry, then the we are no longer in the + * initial_state + */ + if (pad_count < 3) + initial_state = false; + pad_count = 0; + } + + if (!initial_state && !idx_set) + return -ENXIO; + + /* + * If all the entries in the log were in the initial state, + * assume new padding scheme + */ + if (initial_state) + log_index[1] = 1; + + /* + * Only allow the known permutations of log/padding indices, + * i.e. (0, 1), and (0, 2) + */ + if ((log_index[0] == 0) && ((log_index[1] == 1) || (log_index[1] == 2))) + ; /* known index possibilities */ + else { + dev_err(to_dev(arena), "Found an unknown padding scheme\n"); + return -ENXIO; + } + + arena->log_index[0] = log_index[0]; + arena->log_index[1] = log_index[1]; + dev_dbg(to_dev(arena), "log_index_0 = %d\n", log_index[0]); + dev_dbg(to_dev(arena), "log_index_1 = %d\n", log_index[1]); + return 0; +} + static int btt_rtt_init(struct arena_info *arena) { arena->rtt = kcalloc(arena->nfree, sizeof(u32), GFP_KERNEL); @@ -650,8 +773,7 @@ static struct arena_info *alloc_arena(struct btt *btt, size_t size, available -= 2 * BTT_PG_SIZE; /* The log takes a fixed amount of space based on nfree */ - logsize = roundup(2 * arena->nfree * sizeof(struct log_entry), - BTT_PG_SIZE); + logsize = roundup(arena->nfree * LOG_GRP_SIZE, BTT_PG_SIZE); available -= logsize; /* Calculate optimal split between map and data area */ @@ -668,6 +790,10 @@ static struct arena_info *alloc_arena(struct btt *btt, size_t size, arena->mapoff = arena->dataoff + datasize; arena->logoff = arena->mapoff + mapsize; arena->info2off = arena->logoff + logsize; + + /* Default log indices are (0,1) */ + arena->log_index[0] = 0; + arena->log_index[1] = 1; return arena; } @@ -758,6 +884,13 @@ static int discover_arenas(struct btt *btt) arena->external_lba_start = cur_nlba; parse_arena_meta(arena, super, cur_off); + ret = log_set_indices(arena); + if (ret) { + dev_err(to_dev(arena), + "Unable to deduce log/padding indices\n"); + goto out; + } + mutex_init(&arena->err_lock); ret = btt_freelist_init(arena); if (ret) diff --git a/drivers/nvdimm/btt.h b/drivers/nvdimm/btt.h index 578c2057524d..db3cb6d4d0d4 100644 --- a/drivers/nvdimm/btt.h +++ b/drivers/nvdimm/btt.h @@ -27,6 +27,7 @@ #define MAP_ERR_MASK (1 << MAP_ERR_SHIFT) #define MAP_LBA_MASK (~((1 << MAP_TRIM_SHIFT) | (1 << MAP_ERR_SHIFT))) #define MAP_ENT_NORMAL 0xC0000000 +#define LOG_GRP_SIZE sizeof(struct log_group) #define LOG_ENT_SIZE sizeof(struct log_entry) #define ARENA_MIN_SIZE (1UL << 24) /* 16 MB */ #define ARENA_MAX_SIZE (1ULL << 39) /* 512 GB */ @@ -50,12 +51,52 @@ enum btt_init_state { INIT_READY }; +/* + * A log group represents one log 'lane', and consists of four log entries. + * Two of the four entries are valid entries, and the remaining two are + * padding. Due to an old bug in the padding location, we need to perform a + * test to determine the padding scheme being used, and use that scheme + * thereafter. + * + * In kernels prior to 4.15, 'log group' would have actual log entries at + * indices (0, 2) and padding at indices (1, 3), where as the correct/updated + * format has log entries at indices (0, 1) and padding at indices (2, 3). + * + * Old (pre 4.15) format: + * +-----------------+-----------------+ + * | ent[0] | ent[1] | + * | 16B | 16B | + * | lba/old/new/seq | pad | + * +-----------------------------------+ + * | ent[2] | ent[3] | + * | 16B | 16B | + * | lba/old/new/seq | pad | + * +-----------------+-----------------+ + * + * New format: + * +-----------------+-----------------+ + * | ent[0] | ent[1] | + * | 16B | 16B | + * | lba/old/new/seq | lba/old/new/seq | + * +-----------------------------------+ + * | ent[2] | ent[3] | + * | 16B | 16B | + * | pad | pad | + * +-----------------+-----------------+ + * + * We detect during start-up which format is in use, and set + * arena->log_index[(0, 1)] with the detected format. + */ + struct log_entry { __le32 lba; __le32 old_map; __le32 new_map; __le32 seq; - __le64 padding[2]; +}; + +struct log_group { + struct log_entry ent[4]; }; struct btt_sb { @@ -125,6 +166,8 @@ struct aligned_lock { * @list: List head for list of arenas * @debugfs_dir: Debugfs dentry * @flags: Arena flags - may signify error states. + * @err_lock: Mutex for synchronizing error clearing. + * @log_index: Indices of the valid log entries in a log_group * * arena_info is a per-arena handle. Once an arena is narrowed down for an * IO, this struct is passed around for the duration of the IO. @@ -157,6 +200,7 @@ struct arena_info { /* Arena flags */ u32 flags; struct mutex err_lock; + int log_index[2]; }; /** @@ -176,6 +220,7 @@ struct arena_info { * @init_lock: Mutex used for the BTT initialization * @init_state: Flag describing the initialization state for the BTT * @num_arenas: Number of arenas in the BTT instance + * @phys_bb: Pointer to the namespace's badblocks structure */ struct btt { struct gendisk *btt_disk; diff --git a/drivers/nvdimm/pfn_devs.c b/drivers/nvdimm/pfn_devs.c index 65cc171c721d..2adada1a5855 100644 --- a/drivers/nvdimm/pfn_devs.c +++ b/drivers/nvdimm/pfn_devs.c @@ -364,9 +364,9 @@ struct device *nd_pfn_create(struct nd_region *nd_region) int nd_pfn_validate(struct nd_pfn *nd_pfn, const char *sig) { u64 checksum, offset; - unsigned long align; enum nd_pfn_mode mode; struct nd_namespace_io *nsio; + unsigned long align, start_pad; struct nd_pfn_sb *pfn_sb = nd_pfn->pfn_sb; struct nd_namespace_common *ndns = nd_pfn->ndns; const u8 *parent_uuid = nd_dev_to_uuid(&ndns->dev); @@ -410,6 +410,7 @@ int nd_pfn_validate(struct nd_pfn *nd_pfn, const char *sig) align = le32_to_cpu(pfn_sb->align); offset = le64_to_cpu(pfn_sb->dataoff); + start_pad = le32_to_cpu(pfn_sb->start_pad); if (align == 0) align = 1UL << ilog2(offset); mode = le32_to_cpu(pfn_sb->mode); @@ -468,7 +469,7 @@ int nd_pfn_validate(struct nd_pfn *nd_pfn, const char *sig) return -EBUSY; } - if ((align && !IS_ALIGNED(offset, align)) + if ((align && !IS_ALIGNED(nsio->res.start + offset + start_pad, align)) || !IS_ALIGNED(offset, PAGE_SIZE)) { dev_err(&nd_pfn->dev, "bad offset: %#llx dax disabled align: %#lx\n", @@ -582,6 +583,12 @@ static struct vmem_altmap *__nvdimm_setup_pfn(struct nd_pfn *nd_pfn, return altmap; } +static u64 phys_pmem_align_down(struct nd_pfn *nd_pfn, u64 phys) +{ + return min_t(u64, PHYS_SECTION_ALIGN_DOWN(phys), + ALIGN_DOWN(phys, nd_pfn->align)); +} + static int nd_pfn_init(struct nd_pfn *nd_pfn) { u32 dax_label_reserve = is_nd_dax(&nd_pfn->dev) ? SZ_128K : 0; @@ -637,13 +644,16 @@ static int nd_pfn_init(struct nd_pfn *nd_pfn) start = nsio->res.start; size = PHYS_SECTION_ALIGN_UP(start + size) - start; if (region_intersects(start, size, IORESOURCE_SYSTEM_RAM, - IORES_DESC_NONE) == REGION_MIXED) { + IORES_DESC_NONE) == REGION_MIXED + || !IS_ALIGNED(start + resource_size(&nsio->res), + nd_pfn->align)) { size = resource_size(&nsio->res); - end_trunc = start + size - PHYS_SECTION_ALIGN_DOWN(start + size); + end_trunc = start + size - phys_pmem_align_down(nd_pfn, + start + size); } if (start_pad + end_trunc) - dev_info(&nd_pfn->dev, "%s section collision, truncate %d bytes\n", + dev_info(&nd_pfn->dev, "%s alignment collision, truncate %d bytes\n", dev_name(&ndns->dev), start_pad + end_trunc); /* diff --git a/drivers/nvme/host/core.c b/drivers/nvme/host/core.c index f837d666cbd4..1e46e60b8f10 100644 --- a/drivers/nvme/host/core.c +++ b/drivers/nvme/host/core.c @@ -1287,7 +1287,7 @@ static void nvme_config_discard(struct nvme_ctrl *ctrl, BUILD_BUG_ON(PAGE_SIZE / sizeof(struct nvme_dsm_range) < NVME_DSM_MAX_RANGES); - queue->limits.discard_alignment = size; + queue->limits.discard_alignment = 0; queue->limits.discard_granularity = size; blk_queue_max_discard_sectors(queue, UINT_MAX); @@ -1705,7 +1705,8 @@ static void nvme_set_queue_limits(struct nvme_ctrl *ctrl, blk_queue_max_hw_sectors(q, ctrl->max_hw_sectors); blk_queue_max_segments(q, min_t(u32, max_segments, USHRT_MAX)); } - if (ctrl->quirks & NVME_QUIRK_STRIPE_SIZE) + if ((ctrl->quirks & NVME_QUIRK_STRIPE_SIZE) && + is_power_of_2(ctrl->max_hw_sectors)) blk_queue_chunk_sectors(q, ctrl->max_hw_sectors); blk_queue_virt_boundary(q, ctrl->page_size - 1); if (ctrl->vwc & NVME_CTRL_VWC_PRESENT) @@ -2869,7 +2870,6 @@ static void nvme_alloc_ns(struct nvme_ctrl *ctrl, unsigned nsid) blk_queue_logical_block_size(ns->queue, 1 << ns->lba_shift); nvme_set_queue_limits(ctrl, ns->queue); - nvme_setup_streams_ns(ctrl, ns); id = nvme_identify_ns(ctrl, nsid); if (!id) @@ -2880,6 +2880,7 @@ static void nvme_alloc_ns(struct nvme_ctrl *ctrl, unsigned nsid) if (nvme_init_ns_head(ns, nsid, id, &new)) goto out_free_id; + nvme_setup_streams_ns(ctrl, ns); #ifdef CONFIG_NVME_MULTIPATH /* @@ -2965,8 +2966,6 @@ static void nvme_ns_remove(struct nvme_ns *ns) return; if (ns->disk && ns->disk->flags & GENHD_FL_UP) { - if (blk_get_integrity(ns->disk)) - blk_integrity_unregister(ns->disk); nvme_mpath_remove_disk_links(ns); sysfs_remove_group(&disk_to_dev(ns->disk)->kobj, &nvme_ns_id_attr_group); @@ -2974,6 +2973,8 @@ static void nvme_ns_remove(struct nvme_ns *ns) nvme_nvm_unregister_sysfs(ns); del_gendisk(ns->disk); blk_cleanup_queue(ns->queue); + if (blk_get_integrity(ns->disk)) + blk_integrity_unregister(ns->disk); } mutex_lock(&ns->ctrl->subsys->lock); diff --git a/drivers/nvme/host/fc.c b/drivers/nvme/host/fc.c index 0a8af4daef89..794e66e4aa20 100644 --- a/drivers/nvme/host/fc.c +++ b/drivers/nvme/host/fc.c @@ -3221,7 +3221,6 @@ nvme_fc_init_ctrl(struct device *dev, struct nvmf_ctrl_options *opts, /* initiate nvme ctrl ref counting teardown */ nvme_uninit_ctrl(&ctrl->ctrl); - nvme_put_ctrl(&ctrl->ctrl); /* Remove core ctrl ref. */ nvme_put_ctrl(&ctrl->ctrl); diff --git a/drivers/parisc/lba_pci.c b/drivers/parisc/lba_pci.c index a25fed52f7e9..41b740aed3a3 100644 --- a/drivers/parisc/lba_pci.c +++ b/drivers/parisc/lba_pci.c @@ -1692,3 +1692,36 @@ void lba_set_iregs(struct parisc_device *lba, u32 ibase, u32 imask) iounmap(base_addr); } + +/* + * The design of the Diva management card in rp34x0 machines (rp3410, rp3440) + * seems rushed, so that many built-in components simply don't work. + * The following quirks disable the serial AUX port and the built-in ATI RV100 + * Radeon 7000 graphics card which both don't have any external connectors and + * thus are useless, and even worse, e.g. the AUX port occupies ttyS0 and as + * such makes those machines the only PARISC machines on which we can't use + * ttyS0 as boot console. + */ +static void quirk_diva_ati_card(struct pci_dev *dev) +{ + if (dev->subsystem_vendor != PCI_VENDOR_ID_HP || + dev->subsystem_device != 0x1292) + return; + + dev_info(&dev->dev, "Hiding Diva built-in ATI card"); + dev->device = 0; +} +DECLARE_PCI_FIXUP_HEADER(PCI_VENDOR_ID_ATI, PCI_DEVICE_ID_ATI_RADEON_QY, + quirk_diva_ati_card); + +static void quirk_diva_aux_disable(struct pci_dev *dev) +{ + if (dev->subsystem_vendor != PCI_VENDOR_ID_HP || + dev->subsystem_device != 0x1291) + return; + + dev_info(&dev->dev, "Hiding Diva built-in AUX serial device"); + dev->device = 0; +} +DECLARE_PCI_FIXUP_HEADER(PCI_VENDOR_ID_HP, PCI_DEVICE_ID_HP_DIVA_AUX, + quirk_diva_aux_disable); diff --git a/drivers/pci/pci-driver.c b/drivers/pci/pci-driver.c index 945099d49f8f..14fd865a5120 100644 --- a/drivers/pci/pci-driver.c +++ b/drivers/pci/pci-driver.c @@ -1012,7 +1012,12 @@ static int pci_pm_thaw_noirq(struct device *dev) if (pci_has_legacy_pm_support(pci_dev)) return pci_legacy_resume_early(dev); - pci_update_current_state(pci_dev, PCI_D0); + /* + * pci_restore_state() requires the device to be in D0 (because of MSI + * restoration among other things), so force it into D0 in case the + * driver's "freeze" callbacks put it into a low-power state directly. + */ + pci_set_power_state(pci_dev, PCI_D0); pci_restore_state(pci_dev); if (drv && drv->pm && drv->pm->thaw_noirq) diff --git a/drivers/pinctrl/intel/pinctrl-cherryview.c b/drivers/pinctrl/intel/pinctrl-cherryview.c index bdedb6325c72..4471fd94e1fe 100644 --- a/drivers/pinctrl/intel/pinctrl-cherryview.c +++ b/drivers/pinctrl/intel/pinctrl-cherryview.c @@ -1620,6 +1620,22 @@ static int chv_gpio_probe(struct chv_pinctrl *pctrl, int irq) clear_bit(i, chip->irq.valid_mask); } + /* + * The same set of machines in chv_no_valid_mask[] have incorrectly + * configured GPIOs that generate spurious interrupts so we use + * this same list to apply another quirk for them. + * + * See also https://bugzilla.kernel.org/show_bug.cgi?id=197953. + */ + if (!need_valid_mask) { + /* + * Mask all interrupts the community is able to generate + * but leave the ones that can only generate GPEs unmasked. + */ + chv_writel(GENMASK(31, pctrl->community->nirqs), + pctrl->regs + CHV_INTMASK); + } + /* Clear all interrupts */ chv_writel(0xffff, pctrl->regs + CHV_INTSTAT); diff --git a/drivers/s390/net/qeth_core_main.c b/drivers/s390/net/qeth_core_main.c index 6c815207f4f5..3614df68830f 100644 --- a/drivers/s390/net/qeth_core_main.c +++ b/drivers/s390/net/qeth_core_main.c @@ -5386,6 +5386,13 @@ out: } EXPORT_SYMBOL_GPL(qeth_poll); +static int qeth_setassparms_inspect_rc(struct qeth_ipa_cmd *cmd) +{ + if (!cmd->hdr.return_code) + cmd->hdr.return_code = cmd->data.setassparms.hdr.return_code; + return cmd->hdr.return_code; +} + int qeth_setassparms_cb(struct qeth_card *card, struct qeth_reply *reply, unsigned long data) { @@ -6242,7 +6249,7 @@ static int qeth_ipa_checksum_run_cmd_cb(struct qeth_card *card, (struct qeth_checksum_cmd *)reply->param; QETH_CARD_TEXT(card, 4, "chkdoccb"); - if (cmd->hdr.return_code) + if (qeth_setassparms_inspect_rc(cmd)) return 0; memset(chksum_cb, 0, sizeof(*chksum_cb)); diff --git a/drivers/scsi/aacraid/aacraid.h b/drivers/scsi/aacraid/aacraid.h index 6e3d81969a77..d52265416da2 100644 --- a/drivers/scsi/aacraid/aacraid.h +++ b/drivers/scsi/aacraid/aacraid.h @@ -1725,6 +1725,7 @@ struct aac_dev #define FIB_CONTEXT_FLAG_NATIVE_HBA (0x00000010) #define FIB_CONTEXT_FLAG_NATIVE_HBA_TMF (0x00000020) #define FIB_CONTEXT_FLAG_SCSI_CMD (0x00000040) +#define FIB_CONTEXT_FLAG_EH_RESET (0x00000080) /* * Define the command values diff --git a/drivers/scsi/aacraid/linit.c b/drivers/scsi/aacraid/linit.c index bdf127aaab41..d55332de08f9 100644 --- a/drivers/scsi/aacraid/linit.c +++ b/drivers/scsi/aacraid/linit.c @@ -1037,7 +1037,7 @@ static int aac_eh_bus_reset(struct scsi_cmnd* cmd) info = &aac->hba_map[bus][cid]; if (bus >= AAC_MAX_BUSES || cid >= AAC_MAX_TARGETS || info->devtype != AAC_DEVTYPE_NATIVE_RAW) { - fib->flags |= FIB_CONTEXT_FLAG_TIMED_OUT; + fib->flags |= FIB_CONTEXT_FLAG_EH_RESET; cmd->SCp.phase = AAC_OWNER_ERROR_HANDLER; } } diff --git a/drivers/scsi/osd/osd_initiator.c b/drivers/scsi/osd/osd_initiator.c index a4f28b7e4c65..e18877177f1b 100644 --- a/drivers/scsi/osd/osd_initiator.c +++ b/drivers/scsi/osd/osd_initiator.c @@ -1576,7 +1576,9 @@ static struct request *_make_request(struct request_queue *q, bool has_write, return req; for_each_bio(bio) { - ret = blk_rq_append_bio(req, bio); + struct bio *bounce_bio = bio; + + ret = blk_rq_append_bio(req, &bounce_bio); if (ret) return ERR_PTR(ret); } diff --git a/drivers/scsi/scsi_devinfo.c b/drivers/scsi/scsi_devinfo.c index 449ef5adbb2b..dfb8da83fa50 100644 --- a/drivers/scsi/scsi_devinfo.c +++ b/drivers/scsi/scsi_devinfo.c @@ -374,10 +374,8 @@ int scsi_dev_info_list_add_keyed(int compatible, char *vendor, char *model, model, compatible); if (strflags) - devinfo->flags = simple_strtoul(strflags, NULL, 0); - else - devinfo->flags = flags; - + flags = (__force blist_flags_t)simple_strtoul(strflags, NULL, 0); + devinfo->flags = flags; devinfo->compatible = compatible; if (compatible) diff --git a/drivers/scsi/scsi_scan.c b/drivers/scsi/scsi_scan.c index be5e919db0e8..0880d975eed3 100644 --- a/drivers/scsi/scsi_scan.c +++ b/drivers/scsi/scsi_scan.c @@ -770,7 +770,7 @@ static int scsi_probe_lun(struct scsi_device *sdev, unsigned char *inq_result, * SCSI_SCAN_LUN_PRESENT: a new scsi_device was allocated and initialized **/ static int scsi_add_lun(struct scsi_device *sdev, unsigned char *inq_result, - int *bflags, int async) + blist_flags_t *bflags, int async) { int ret; @@ -1049,14 +1049,15 @@ static unsigned char *scsi_inq_str(unsigned char *buf, unsigned char *inq, * - SCSI_SCAN_LUN_PRESENT: a new scsi_device was allocated and initialized **/ static int scsi_probe_and_add_lun(struct scsi_target *starget, - u64 lun, int *bflagsp, + u64 lun, blist_flags_t *bflagsp, struct scsi_device **sdevp, enum scsi_scan_mode rescan, void *hostdata) { struct scsi_device *sdev; unsigned char *result; - int bflags, res = SCSI_SCAN_NO_RESPONSE, result_len = 256; + blist_flags_t bflags; + int res = SCSI_SCAN_NO_RESPONSE, result_len = 256; struct Scsi_Host *shost = dev_to_shost(starget->dev.parent); /* @@ -1201,7 +1202,7 @@ static int scsi_probe_and_add_lun(struct scsi_target *starget, * Modifies sdevscan->lun. **/ static void scsi_sequential_lun_scan(struct scsi_target *starget, - int bflags, int scsi_level, + blist_flags_t bflags, int scsi_level, enum scsi_scan_mode rescan) { uint max_dev_lun; @@ -1292,7 +1293,7 @@ static void scsi_sequential_lun_scan(struct scsi_target *starget, * 0: scan completed (or no memory, so further scanning is futile) * 1: could not scan with REPORT LUN **/ -static int scsi_report_lun_scan(struct scsi_target *starget, int bflags, +static int scsi_report_lun_scan(struct scsi_target *starget, blist_flags_t bflags, enum scsi_scan_mode rescan) { unsigned char scsi_cmd[MAX_COMMAND_SIZE]; @@ -1538,7 +1539,7 @@ static void __scsi_scan_target(struct device *parent, unsigned int channel, unsigned int id, u64 lun, enum scsi_scan_mode rescan) { struct Scsi_Host *shost = dev_to_shost(parent); - int bflags = 0; + blist_flags_t bflags = 0; int res; struct scsi_target *starget; diff --git a/drivers/scsi/scsi_sysfs.c b/drivers/scsi/scsi_sysfs.c index 50e7d7e4a861..a9996c16f4ae 100644 --- a/drivers/scsi/scsi_sysfs.c +++ b/drivers/scsi/scsi_sysfs.c @@ -967,7 +967,8 @@ sdev_show_wwid(struct device *dev, struct device_attribute *attr, } static DEVICE_ATTR(wwid, S_IRUGO, sdev_show_wwid, NULL); -#define BLIST_FLAG_NAME(name) [ilog2(BLIST_##name)] = #name +#define BLIST_FLAG_NAME(name) \ + [ilog2((__force unsigned int)BLIST_##name)] = #name static const char *const sdev_bflags_name[] = { #include "scsi_devinfo_tbl.c" }; @@ -984,7 +985,7 @@ sdev_show_blacklist(struct device *dev, struct device_attribute *attr, for (i = 0; i < sizeof(sdev->sdev_bflags) * BITS_PER_BYTE; i++) { const char *name = NULL; - if (!(sdev->sdev_bflags & BIT(i))) + if (!(sdev->sdev_bflags & (__force blist_flags_t)BIT(i))) continue; if (i < ARRAY_SIZE(sdev_bflags_name) && sdev_bflags_name[i]) name = sdev_bflags_name[i]; diff --git a/drivers/scsi/scsi_transport_spi.c b/drivers/scsi/scsi_transport_spi.c index d0219e36080c..10ebb213ddb3 100644 --- a/drivers/scsi/scsi_transport_spi.c +++ b/drivers/scsi/scsi_transport_spi.c @@ -50,14 +50,14 @@ /* Our blacklist flags */ enum { - SPI_BLIST_NOIUS = 0x1, + SPI_BLIST_NOIUS = (__force blist_flags_t)0x1, }; /* blacklist table, modelled on scsi_devinfo.c */ static struct { char *vendor; char *model; - unsigned flags; + blist_flags_t flags; } spi_static_device_list[] __initdata = { {"HP", "Ultrium 3-SCSI", SPI_BLIST_NOIUS }, {"IBM", "ULTRIUM-TD3", SPI_BLIST_NOIUS }, @@ -221,9 +221,11 @@ static int spi_device_configure(struct transport_container *tc, { struct scsi_device *sdev = to_scsi_device(dev); struct scsi_target *starget = sdev->sdev_target; - unsigned bflags = scsi_get_device_flags_keyed(sdev, &sdev->inquiry[8], - &sdev->inquiry[16], - SCSI_DEVINFO_SPI); + blist_flags_t bflags; + + bflags = scsi_get_device_flags_keyed(sdev, &sdev->inquiry[8], + &sdev->inquiry[16], + SCSI_DEVINFO_SPI); /* Populate the target capability fields with the values * gleaned from the device inquiry */ diff --git a/drivers/spi/spi-armada-3700.c b/drivers/spi/spi-armada-3700.c index 77fe55ce790c..d65345312527 100644 --- a/drivers/spi/spi-armada-3700.c +++ b/drivers/spi/spi-armada-3700.c @@ -79,6 +79,7 @@ #define A3700_SPI_BYTE_LEN BIT(5) #define A3700_SPI_CLK_PRESCALE BIT(0) #define A3700_SPI_CLK_PRESCALE_MASK (0x1f) +#define A3700_SPI_CLK_EVEN_OFFS (0x10) #define A3700_SPI_WFIFO_THRS_BIT 28 #define A3700_SPI_RFIFO_THRS_BIT 24 @@ -220,6 +221,13 @@ static void a3700_spi_clock_set(struct a3700_spi *a3700_spi, prescale = DIV_ROUND_UP(clk_get_rate(a3700_spi->clk), speed_hz); + /* For prescaler values over 15, we can only set it by steps of 2. + * Starting from A3700_SPI_CLK_EVEN_OFFS, we set values from 0 up to + * 30. We only use this range from 16 to 30. + */ + if (prescale > 15) + prescale = A3700_SPI_CLK_EVEN_OFFS + DIV_ROUND_UP(prescale, 2); + val = spireg_read(a3700_spi, A3700_SPI_IF_CFG_REG); val = val & ~A3700_SPI_CLK_PRESCALE_MASK; diff --git a/drivers/spi/spi-atmel.c b/drivers/spi/spi-atmel.c index f95da364c283..669470971023 100644 --- a/drivers/spi/spi-atmel.c +++ b/drivers/spi/spi-atmel.c @@ -1661,12 +1661,12 @@ static int atmel_spi_remove(struct platform_device *pdev) pm_runtime_get_sync(&pdev->dev); /* reset the hardware and block queue progress */ - spin_lock_irq(&as->lock); if (as->use_dma) { atmel_spi_stop_dma(master); atmel_spi_release_dma(master); } + spin_lock_irq(&as->lock); spi_writel(as, CR, SPI_BIT(SWRST)); spi_writel(as, CR, SPI_BIT(SWRST)); /* AT91SAM9263 Rev B workaround */ spi_readl(as, SR); diff --git a/drivers/spi/spi-rspi.c b/drivers/spi/spi-rspi.c index 2ce875764ca6..0835a8d88fb8 100644 --- a/drivers/spi/spi-rspi.c +++ b/drivers/spi/spi-rspi.c @@ -377,8 +377,8 @@ static int qspi_set_config_register(struct rspi_data *rspi, int access_size) /* Sets SPCMD */ rspi_write16(rspi, rspi->spcmd, RSPI_SPCMD0); - /* Enables SPI function in master mode */ - rspi_write8(rspi, SPCR_SPE | SPCR_MSTR, RSPI_SPCR); + /* Sets RSPI mode */ + rspi_write8(rspi, SPCR_MSTR, RSPI_SPCR); return 0; } diff --git a/drivers/spi/spi-sun4i.c b/drivers/spi/spi-sun4i.c index c5cd635c28f3..41410031f8e9 100644 --- a/drivers/spi/spi-sun4i.c +++ b/drivers/spi/spi-sun4i.c @@ -525,7 +525,7 @@ err_free_master: static int sun4i_spi_remove(struct platform_device *pdev) { - pm_runtime_disable(&pdev->dev); + pm_runtime_force_suspend(&pdev->dev); return 0; } diff --git a/drivers/spi/spi-xilinx.c b/drivers/spi/spi-xilinx.c index bc7100b93dfc..e0b9fe1d0e37 100644 --- a/drivers/spi/spi-xilinx.c +++ b/drivers/spi/spi-xilinx.c @@ -271,6 +271,7 @@ static int xilinx_spi_txrx_bufs(struct spi_device *spi, struct spi_transfer *t) while (remaining_words) { int n_words, tx_words, rx_words; u32 sr; + int stalled; n_words = min(remaining_words, xspi->buffer_size); @@ -299,7 +300,17 @@ static int xilinx_spi_txrx_bufs(struct spi_device *spi, struct spi_transfer *t) /* Read out all the data from the Rx FIFO */ rx_words = n_words; + stalled = 10; while (rx_words) { + if (rx_words == n_words && !(stalled--) && + !(sr & XSPI_SR_TX_EMPTY_MASK) && + (sr & XSPI_SR_RX_EMPTY_MASK)) { + dev_err(&spi->dev, + "Detected stall. Check C_SPI_MODE and C_SPI_MEMORY\n"); + xspi_init_hw(xspi); + return -EIO; + } + if ((sr & XSPI_SR_TX_EMPTY_MASK) && (rx_words > 1)) { xilinx_spi_rx(xspi); rx_words--; diff --git a/drivers/target/target_core_pscsi.c b/drivers/target/target_core_pscsi.c index 7c69b4a9694d..0d99b242e82e 100644 --- a/drivers/target/target_core_pscsi.c +++ b/drivers/target/target_core_pscsi.c @@ -920,7 +920,7 @@ pscsi_map_sg(struct se_cmd *cmd, struct scatterlist *sgl, u32 sgl_nents, " %d i: %d bio: %p, allocating another" " bio\n", bio->bi_vcnt, i, bio); - rc = blk_rq_append_bio(req, bio); + rc = blk_rq_append_bio(req, &bio); if (rc) { pr_err("pSCSI: failed to append bio\n"); goto fail; @@ -938,7 +938,7 @@ pscsi_map_sg(struct se_cmd *cmd, struct scatterlist *sgl, u32 sgl_nents, } if (bio) { - rc = blk_rq_append_bio(req, bio); + rc = blk_rq_append_bio(req, &bio); if (rc) { pr_err("pSCSI: failed to append bio\n"); goto fail; diff --git a/drivers/xen/balloon.c b/drivers/xen/balloon.c index f77e499afddd..065f0b607373 100644 --- a/drivers/xen/balloon.c +++ b/drivers/xen/balloon.c @@ -257,10 +257,25 @@ static void release_memory_resource(struct resource *resource) kfree(resource); } +/* + * Host memory not allocated to dom0. We can use this range for hotplug-based + * ballooning. + * + * It's a type-less resource. Setting IORESOURCE_MEM will make resource + * management algorithms (arch_remove_reservations()) look into guest e820, + * which we don't want. + */ +static struct resource hostmem_resource = { + .name = "Host RAM", +}; + +void __attribute__((weak)) __init arch_xen_balloon_init(struct resource *res) +{} + static struct resource *additional_memory_resource(phys_addr_t size) { - struct resource *res; - int ret; + struct resource *res, *res_hostmem; + int ret = -ENOMEM; res = kzalloc(sizeof(*res), GFP_KERNEL); if (!res) @@ -269,13 +284,42 @@ static struct resource *additional_memory_resource(phys_addr_t size) res->name = "System RAM"; res->flags = IORESOURCE_SYSTEM_RAM | IORESOURCE_BUSY; - ret = allocate_resource(&iomem_resource, res, - size, 0, -1, - PAGES_PER_SECTION * PAGE_SIZE, NULL, NULL); - if (ret < 0) { - pr_err("Cannot allocate new System RAM resource\n"); - kfree(res); - return NULL; + res_hostmem = kzalloc(sizeof(*res), GFP_KERNEL); + if (res_hostmem) { + /* Try to grab a range from hostmem */ + res_hostmem->name = "Host memory"; + ret = allocate_resource(&hostmem_resource, res_hostmem, + size, 0, -1, + PAGES_PER_SECTION * PAGE_SIZE, NULL, NULL); + } + + if (!ret) { + /* + * Insert this resource into iomem. Because hostmem_resource + * tracks portion of guest e820 marked as UNUSABLE noone else + * should try to use it. + */ + res->start = res_hostmem->start; + res->end = res_hostmem->end; + ret = insert_resource(&iomem_resource, res); + if (ret < 0) { + pr_err("Can't insert iomem_resource [%llx - %llx]\n", + res->start, res->end); + release_memory_resource(res_hostmem); + res_hostmem = NULL; + res->start = res->end = 0; + } + } + + if (ret) { + ret = allocate_resource(&iomem_resource, res, + size, 0, -1, + PAGES_PER_SECTION * PAGE_SIZE, NULL, NULL); + if (ret < 0) { + pr_err("Cannot allocate new System RAM resource\n"); + kfree(res); + return NULL; + } } #ifdef CONFIG_SPARSEMEM @@ -287,6 +331,7 @@ static struct resource *additional_memory_resource(phys_addr_t size) pr_err("New System RAM resource outside addressable RAM (%lu > %lu)\n", pfn, limit); release_memory_resource(res); + release_memory_resource(res_hostmem); return NULL; } } @@ -765,6 +810,8 @@ static int __init balloon_init(void) set_online_page_callback(&xen_online_page); register_memory_notifier(&xen_memory_nb); register_sysctl_table(xen_root); + + arch_xen_balloon_init(&hostmem_resource); #endif #ifdef CONFIG_XEN_PV diff --git a/fs/xfs/libxfs/xfs_alloc.c b/fs/xfs/libxfs/xfs_alloc.c index 0da80019a917..83ed7715f856 100644 --- a/fs/xfs/libxfs/xfs_alloc.c +++ b/fs/xfs/libxfs/xfs_alloc.c @@ -702,7 +702,7 @@ xfs_alloc_ag_vextent( ASSERT(args->agbno % args->alignment == 0); /* if not file data, insert new block into the reverse map btree */ - if (args->oinfo.oi_owner != XFS_RMAP_OWN_UNKNOWN) { + if (!xfs_rmap_should_skip_owner_update(&args->oinfo)) { error = xfs_rmap_alloc(args->tp, args->agbp, args->agno, args->agbno, args->len, &args->oinfo); if (error) @@ -1682,7 +1682,7 @@ xfs_free_ag_extent( bno_cur = cnt_cur = NULL; mp = tp->t_mountp; - if (oinfo->oi_owner != XFS_RMAP_OWN_UNKNOWN) { + if (!xfs_rmap_should_skip_owner_update(oinfo)) { error = xfs_rmap_free(tp, agbp, agno, bno, len, oinfo); if (error) goto error0; diff --git a/fs/xfs/libxfs/xfs_attr.c b/fs/xfs/libxfs/xfs_attr.c index 6249c92671de..a76914db72ef 100644 --- a/fs/xfs/libxfs/xfs_attr.c +++ b/fs/xfs/libxfs/xfs_attr.c @@ -212,6 +212,7 @@ xfs_attr_set( int flags) { struct xfs_mount *mp = dp->i_mount; + struct xfs_buf *leaf_bp = NULL; struct xfs_da_args args; struct xfs_defer_ops dfops; struct xfs_trans_res tres; @@ -327,9 +328,16 @@ xfs_attr_set( * GROT: another possible req'mt for a double-split btree op. */ xfs_defer_init(args.dfops, args.firstblock); - error = xfs_attr_shortform_to_leaf(&args); + error = xfs_attr_shortform_to_leaf(&args, &leaf_bp); if (error) goto out_defer_cancel; + /* + * Prevent the leaf buffer from being unlocked so that a + * concurrent AIL push cannot grab the half-baked leaf + * buffer and run into problems with the write verifier. + */ + xfs_trans_bhold(args.trans, leaf_bp); + xfs_defer_bjoin(args.dfops, leaf_bp); xfs_defer_ijoin(args.dfops, dp); error = xfs_defer_finish(&args.trans, args.dfops); if (error) @@ -337,13 +345,14 @@ xfs_attr_set( /* * Commit the leaf transformation. We'll need another (linked) - * transaction to add the new attribute to the leaf. + * transaction to add the new attribute to the leaf, which + * means that we have to hold & join the leaf buffer here too. */ - error = xfs_trans_roll_inode(&args.trans, dp); if (error) goto out; - + xfs_trans_bjoin(args.trans, leaf_bp); + leaf_bp = NULL; } if (xfs_bmap_one_block(dp, XFS_ATTR_FORK)) @@ -374,8 +383,9 @@ xfs_attr_set( out_defer_cancel: xfs_defer_cancel(&dfops); - args.trans = NULL; out: + if (leaf_bp) + xfs_trans_brelse(args.trans, leaf_bp); if (args.trans) xfs_trans_cancel(args.trans); xfs_iunlock(dp, XFS_ILOCK_EXCL); diff --git a/fs/xfs/libxfs/xfs_attr_leaf.c b/fs/xfs/libxfs/xfs_attr_leaf.c index 53cc8b986eac..601eaa36f1ad 100644 --- a/fs/xfs/libxfs/xfs_attr_leaf.c +++ b/fs/xfs/libxfs/xfs_attr_leaf.c @@ -735,10 +735,13 @@ xfs_attr_shortform_getvalue(xfs_da_args_t *args) } /* - * Convert from using the shortform to the leaf. + * Convert from using the shortform to the leaf. On success, return the + * buffer so that we can keep it locked until we're totally done with it. */ int -xfs_attr_shortform_to_leaf(xfs_da_args_t *args) +xfs_attr_shortform_to_leaf( + struct xfs_da_args *args, + struct xfs_buf **leaf_bp) { xfs_inode_t *dp; xfs_attr_shortform_t *sf; @@ -818,7 +821,7 @@ xfs_attr_shortform_to_leaf(xfs_da_args_t *args) sfe = XFS_ATTR_SF_NEXTENTRY(sfe); } error = 0; - + *leaf_bp = bp; out: kmem_free(tmpbuffer); return error; diff --git a/fs/xfs/libxfs/xfs_attr_leaf.h b/fs/xfs/libxfs/xfs_attr_leaf.h index f7dda0c237b0..894124efb421 100644 --- a/fs/xfs/libxfs/xfs_attr_leaf.h +++ b/fs/xfs/libxfs/xfs_attr_leaf.h @@ -48,7 +48,8 @@ void xfs_attr_shortform_create(struct xfs_da_args *args); void xfs_attr_shortform_add(struct xfs_da_args *args, int forkoff); int xfs_attr_shortform_lookup(struct xfs_da_args *args); int xfs_attr_shortform_getvalue(struct xfs_da_args *args); -int xfs_attr_shortform_to_leaf(struct xfs_da_args *args); +int xfs_attr_shortform_to_leaf(struct xfs_da_args *args, + struct xfs_buf **leaf_bp); int xfs_attr_shortform_remove(struct xfs_da_args *args); int xfs_attr_shortform_allfit(struct xfs_buf *bp, struct xfs_inode *dp); int xfs_attr_shortform_bytesfit(struct xfs_inode *dp, int bytes); diff --git a/fs/xfs/libxfs/xfs_bmap.c b/fs/xfs/libxfs/xfs_bmap.c index 1210f684d3c2..1bddbba6b80c 100644 --- a/fs/xfs/libxfs/xfs_bmap.c +++ b/fs/xfs/libxfs/xfs_bmap.c @@ -5136,7 +5136,7 @@ __xfs_bunmapi( * blowing out the transaction with a mix of EFIs and reflink * adjustments. */ - if (xfs_is_reflink_inode(ip) && whichfork == XFS_DATA_FORK) + if (tp && xfs_is_reflink_inode(ip) && whichfork == XFS_DATA_FORK) max_len = min(len, xfs_refcount_max_unmap(tp->t_log_res)); else max_len = len; diff --git a/fs/xfs/libxfs/xfs_defer.c b/fs/xfs/libxfs/xfs_defer.c index 072ebfe1d6ae..087fea02c389 100644 --- a/fs/xfs/libxfs/xfs_defer.c +++ b/fs/xfs/libxfs/xfs_defer.c @@ -249,6 +249,10 @@ xfs_defer_trans_roll( for (i = 0; i < XFS_DEFER_OPS_NR_INODES && dop->dop_inodes[i]; i++) xfs_trans_log_inode(*tp, dop->dop_inodes[i], XFS_ILOG_CORE); + /* Hold the (previously bjoin'd) buffer locked across the roll. */ + for (i = 0; i < XFS_DEFER_OPS_NR_BUFS && dop->dop_bufs[i]; i++) + xfs_trans_dirty_buf(*tp, dop->dop_bufs[i]); + trace_xfs_defer_trans_roll((*tp)->t_mountp, dop); /* Roll the transaction. */ @@ -264,6 +268,12 @@ xfs_defer_trans_roll( for (i = 0; i < XFS_DEFER_OPS_NR_INODES && dop->dop_inodes[i]; i++) xfs_trans_ijoin(*tp, dop->dop_inodes[i], 0); + /* Rejoin the buffers and dirty them so the log moves forward. */ + for (i = 0; i < XFS_DEFER_OPS_NR_BUFS && dop->dop_bufs[i]; i++) { + xfs_trans_bjoin(*tp, dop->dop_bufs[i]); + xfs_trans_bhold(*tp, dop->dop_bufs[i]); + } + return error; } @@ -295,6 +305,31 @@ xfs_defer_ijoin( } } + ASSERT(0); + return -EFSCORRUPTED; +} + +/* + * Add this buffer to the deferred op. Each joined buffer is relogged + * each time we roll the transaction. + */ +int +xfs_defer_bjoin( + struct xfs_defer_ops *dop, + struct xfs_buf *bp) +{ + int i; + + for (i = 0; i < XFS_DEFER_OPS_NR_BUFS; i++) { + if (dop->dop_bufs[i] == bp) + return 0; + else if (dop->dop_bufs[i] == NULL) { + dop->dop_bufs[i] = bp; + return 0; + } + } + + ASSERT(0); return -EFSCORRUPTED; } @@ -493,9 +528,7 @@ xfs_defer_init( struct xfs_defer_ops *dop, xfs_fsblock_t *fbp) { - dop->dop_committed = false; - dop->dop_low = false; - memset(&dop->dop_inodes, 0, sizeof(dop->dop_inodes)); + memset(dop, 0, sizeof(struct xfs_defer_ops)); *fbp = NULLFSBLOCK; INIT_LIST_HEAD(&dop->dop_intake); INIT_LIST_HEAD(&dop->dop_pending); diff --git a/fs/xfs/libxfs/xfs_defer.h b/fs/xfs/libxfs/xfs_defer.h index d4f046dd44bd..045beacdd37d 100644 --- a/fs/xfs/libxfs/xfs_defer.h +++ b/fs/xfs/libxfs/xfs_defer.h @@ -59,6 +59,7 @@ enum xfs_defer_ops_type { }; #define XFS_DEFER_OPS_NR_INODES 2 /* join up to two inodes */ +#define XFS_DEFER_OPS_NR_BUFS 2 /* join up to two buffers */ struct xfs_defer_ops { bool dop_committed; /* did any trans commit? */ @@ -66,8 +67,9 @@ struct xfs_defer_ops { struct list_head dop_intake; /* unlogged pending work */ struct list_head dop_pending; /* logged pending work */ - /* relog these inodes with each roll */ + /* relog these with each roll */ struct xfs_inode *dop_inodes[XFS_DEFER_OPS_NR_INODES]; + struct xfs_buf *dop_bufs[XFS_DEFER_OPS_NR_BUFS]; }; void xfs_defer_add(struct xfs_defer_ops *dop, enum xfs_defer_ops_type type, @@ -77,6 +79,7 @@ void xfs_defer_cancel(struct xfs_defer_ops *dop); void xfs_defer_init(struct xfs_defer_ops *dop, xfs_fsblock_t *fbp); bool xfs_defer_has_unfinished_work(struct xfs_defer_ops *dop); int xfs_defer_ijoin(struct xfs_defer_ops *dop, struct xfs_inode *ip); +int xfs_defer_bjoin(struct xfs_defer_ops *dop, struct xfs_buf *bp); /* Description of a deferred type. */ struct xfs_defer_op_type { diff --git a/fs/xfs/libxfs/xfs_iext_tree.c b/fs/xfs/libxfs/xfs_iext_tree.c index 89bf16b4d937..b0f31791c7e6 100644 --- a/fs/xfs/libxfs/xfs_iext_tree.c +++ b/fs/xfs/libxfs/xfs_iext_tree.c @@ -632,8 +632,6 @@ xfs_iext_insert( struct xfs_iext_leaf *new = NULL; int nr_entries, i; - trace_xfs_iext_insert(ip, cur, state, _RET_IP_); - if (ifp->if_height == 0) xfs_iext_alloc_root(ifp, cur); else if (ifp->if_height == 1) @@ -661,6 +659,8 @@ xfs_iext_insert( xfs_iext_set(cur_rec(cur), irec); ifp->if_bytes += sizeof(struct xfs_iext_rec); + trace_xfs_iext_insert(ip, cur, state, _RET_IP_); + if (new) xfs_iext_insert_node(ifp, xfs_iext_leaf_key(new, 0), new, 2); } diff --git a/fs/xfs/libxfs/xfs_refcount.c b/fs/xfs/libxfs/xfs_refcount.c index 585b35d34142..c40d26763075 100644 --- a/fs/xfs/libxfs/xfs_refcount.c +++ b/fs/xfs/libxfs/xfs_refcount.c @@ -1488,27 +1488,12 @@ __xfs_refcount_cow_alloc( xfs_extlen_t aglen, struct xfs_defer_ops *dfops) { - int error; - trace_xfs_refcount_cow_increase(rcur->bc_mp, rcur->bc_private.a.agno, agbno, aglen); /* Add refcount btree reservation */ - error = xfs_refcount_adjust_cow(rcur, agbno, aglen, + return xfs_refcount_adjust_cow(rcur, agbno, aglen, XFS_REFCOUNT_ADJUST_COW_ALLOC, dfops); - if (error) - return error; - - /* Add rmap entry */ - if (xfs_sb_version_hasrmapbt(&rcur->bc_mp->m_sb)) { - error = xfs_rmap_alloc_extent(rcur->bc_mp, dfops, - rcur->bc_private.a.agno, - agbno, aglen, XFS_RMAP_OWN_COW); - if (error) - return error; - } - - return error; } /* @@ -1521,27 +1506,12 @@ __xfs_refcount_cow_free( xfs_extlen_t aglen, struct xfs_defer_ops *dfops) { - int error; - trace_xfs_refcount_cow_decrease(rcur->bc_mp, rcur->bc_private.a.agno, agbno, aglen); /* Remove refcount btree reservation */ - error = xfs_refcount_adjust_cow(rcur, agbno, aglen, + return xfs_refcount_adjust_cow(rcur, agbno, aglen, XFS_REFCOUNT_ADJUST_COW_FREE, dfops); - if (error) - return error; - - /* Remove rmap entry */ - if (xfs_sb_version_hasrmapbt(&rcur->bc_mp->m_sb)) { - error = xfs_rmap_free_extent(rcur->bc_mp, dfops, - rcur->bc_private.a.agno, - agbno, aglen, XFS_RMAP_OWN_COW); - if (error) - return error; - } - - return error; } /* Record a CoW staging extent in the refcount btree. */ @@ -1552,11 +1522,19 @@ xfs_refcount_alloc_cow_extent( xfs_fsblock_t fsb, xfs_extlen_t len) { + int error; + if (!xfs_sb_version_hasreflink(&mp->m_sb)) return 0; - return __xfs_refcount_add(mp, dfops, XFS_REFCOUNT_ALLOC_COW, + error = __xfs_refcount_add(mp, dfops, XFS_REFCOUNT_ALLOC_COW, fsb, len); + if (error) + return error; + + /* Add rmap entry */ + return xfs_rmap_alloc_extent(mp, dfops, XFS_FSB_TO_AGNO(mp, fsb), + XFS_FSB_TO_AGBNO(mp, fsb), len, XFS_RMAP_OWN_COW); } /* Forget a CoW staging event in the refcount btree. */ @@ -1567,9 +1545,17 @@ xfs_refcount_free_cow_extent( xfs_fsblock_t fsb, xfs_extlen_t len) { + int error; + if (!xfs_sb_version_hasreflink(&mp->m_sb)) return 0; + /* Remove rmap entry */ + error = xfs_rmap_free_extent(mp, dfops, XFS_FSB_TO_AGNO(mp, fsb), + XFS_FSB_TO_AGBNO(mp, fsb), len, XFS_RMAP_OWN_COW); + if (error) + return error; + return __xfs_refcount_add(mp, dfops, XFS_REFCOUNT_FREE_COW, fsb, len); } diff --git a/fs/xfs/libxfs/xfs_rmap.c b/fs/xfs/libxfs/xfs_rmap.c index dd019cee1b3b..50db920ceeeb 100644 --- a/fs/xfs/libxfs/xfs_rmap.c +++ b/fs/xfs/libxfs/xfs_rmap.c @@ -368,6 +368,51 @@ xfs_rmap_lookup_le_range( } /* + * Perform all the relevant owner checks for a removal op. If we're doing an + * unknown-owner removal then we have no owner information to check. + */ +static int +xfs_rmap_free_check_owner( + struct xfs_mount *mp, + uint64_t ltoff, + struct xfs_rmap_irec *rec, + xfs_fsblock_t bno, + xfs_filblks_t len, + uint64_t owner, + uint64_t offset, + unsigned int flags) +{ + int error = 0; + + if (owner == XFS_RMAP_OWN_UNKNOWN) + return 0; + + /* Make sure the unwritten flag matches. */ + XFS_WANT_CORRUPTED_GOTO(mp, (flags & XFS_RMAP_UNWRITTEN) == + (rec->rm_flags & XFS_RMAP_UNWRITTEN), out); + + /* Make sure the owner matches what we expect to find in the tree. */ + XFS_WANT_CORRUPTED_GOTO(mp, owner == rec->rm_owner, out); + + /* Check the offset, if necessary. */ + if (XFS_RMAP_NON_INODE_OWNER(owner)) + goto out; + + if (flags & XFS_RMAP_BMBT_BLOCK) { + XFS_WANT_CORRUPTED_GOTO(mp, rec->rm_flags & XFS_RMAP_BMBT_BLOCK, + out); + } else { + XFS_WANT_CORRUPTED_GOTO(mp, rec->rm_offset <= offset, out); + XFS_WANT_CORRUPTED_GOTO(mp, + ltoff + rec->rm_blockcount >= offset + len, + out); + } + +out: + return error; +} + +/* * Find the extent in the rmap btree and remove it. * * The record we find should always be an exact match for the extent that we're @@ -444,33 +489,40 @@ xfs_rmap_unmap( goto out_done; } - /* Make sure the unwritten flag matches. */ - XFS_WANT_CORRUPTED_GOTO(mp, (flags & XFS_RMAP_UNWRITTEN) == - (ltrec.rm_flags & XFS_RMAP_UNWRITTEN), out_error); + /* + * If we're doing an unknown-owner removal for EFI recovery, we expect + * to find the full range in the rmapbt or nothing at all. If we + * don't find any rmaps overlapping either end of the range, we're + * done. Hopefully this means that the EFI creator already queued + * (and finished) a RUI to remove the rmap. + */ + if (owner == XFS_RMAP_OWN_UNKNOWN && + ltrec.rm_startblock + ltrec.rm_blockcount <= bno) { + struct xfs_rmap_irec rtrec; + + error = xfs_btree_increment(cur, 0, &i); + if (error) + goto out_error; + if (i == 0) + goto out_done; + error = xfs_rmap_get_rec(cur, &rtrec, &i); + if (error) + goto out_error; + XFS_WANT_CORRUPTED_GOTO(mp, i == 1, out_error); + if (rtrec.rm_startblock >= bno + len) + goto out_done; + } /* Make sure the extent we found covers the entire freeing range. */ XFS_WANT_CORRUPTED_GOTO(mp, ltrec.rm_startblock <= bno && - ltrec.rm_startblock + ltrec.rm_blockcount >= - bno + len, out_error); + ltrec.rm_startblock + ltrec.rm_blockcount >= + bno + len, out_error); - /* Make sure the owner matches what we expect to find in the tree. */ - XFS_WANT_CORRUPTED_GOTO(mp, owner == ltrec.rm_owner || - XFS_RMAP_NON_INODE_OWNER(owner), out_error); - - /* Check the offset, if necessary. */ - if (!XFS_RMAP_NON_INODE_OWNER(owner)) { - if (flags & XFS_RMAP_BMBT_BLOCK) { - XFS_WANT_CORRUPTED_GOTO(mp, - ltrec.rm_flags & XFS_RMAP_BMBT_BLOCK, - out_error); - } else { - XFS_WANT_CORRUPTED_GOTO(mp, - ltrec.rm_offset <= offset, out_error); - XFS_WANT_CORRUPTED_GOTO(mp, - ltoff + ltrec.rm_blockcount >= offset + len, - out_error); - } - } + /* Check owner information. */ + error = xfs_rmap_free_check_owner(mp, ltoff, <rec, bno, len, owner, + offset, flags); + if (error) + goto out_error; if (ltrec.rm_startblock == bno && ltrec.rm_blockcount == len) { /* exact match, simply remove the record from rmap tree */ @@ -664,6 +716,7 @@ xfs_rmap_map( flags |= XFS_RMAP_UNWRITTEN; trace_xfs_rmap_map(mp, cur->bc_private.a.agno, bno, len, unwritten, oinfo); + ASSERT(!xfs_rmap_should_skip_owner_update(oinfo)); /* * For the initial lookup, look for an exact match or the left-adjacent diff --git a/fs/xfs/libxfs/xfs_rmap.h b/fs/xfs/libxfs/xfs_rmap.h index 466ede637080..0fcd5b1ba729 100644 --- a/fs/xfs/libxfs/xfs_rmap.h +++ b/fs/xfs/libxfs/xfs_rmap.h @@ -61,7 +61,21 @@ static inline void xfs_rmap_skip_owner_update( struct xfs_owner_info *oi) { - oi->oi_owner = XFS_RMAP_OWN_UNKNOWN; + xfs_rmap_ag_owner(oi, XFS_RMAP_OWN_NULL); +} + +static inline bool +xfs_rmap_should_skip_owner_update( + struct xfs_owner_info *oi) +{ + return oi->oi_owner == XFS_RMAP_OWN_NULL; +} + +static inline void +xfs_rmap_any_owner_update( + struct xfs_owner_info *oi) +{ + xfs_rmap_ag_owner(oi, XFS_RMAP_OWN_UNKNOWN); } /* Reverse mapping functions. */ diff --git a/fs/xfs/xfs_extfree_item.c b/fs/xfs/xfs_extfree_item.c index 44f8c5451210..64da90655e95 100644 --- a/fs/xfs/xfs_extfree_item.c +++ b/fs/xfs/xfs_extfree_item.c @@ -538,7 +538,7 @@ xfs_efi_recover( return error; efdp = xfs_trans_get_efd(tp, efip, efip->efi_format.efi_nextents); - xfs_rmap_skip_owner_update(&oinfo); + xfs_rmap_any_owner_update(&oinfo); for (i = 0; i < efip->efi_format.efi_nextents; i++) { extp = &efip->efi_format.efi_extents[i]; error = xfs_trans_free_extent(tp, efdp, extp->ext_start, diff --git a/fs/xfs/xfs_fsops.c b/fs/xfs/xfs_fsops.c index 8f22fc579dbb..60a2e128cb6a 100644 --- a/fs/xfs/xfs_fsops.c +++ b/fs/xfs/xfs_fsops.c @@ -571,6 +571,11 @@ xfs_growfs_data_private( * this doesn't actually exist in the rmap btree. */ xfs_rmap_ag_owner(&oinfo, XFS_RMAP_OWN_NULL); + error = xfs_rmap_free(tp, bp, agno, + be32_to_cpu(agf->agf_length) - new, + new, &oinfo); + if (error) + goto error0; error = xfs_free_extent(tp, XFS_AGB_TO_FSB(mp, agno, be32_to_cpu(agf->agf_length) - new), diff --git a/fs/xfs/xfs_icache.c b/fs/xfs/xfs_icache.c index 43005fbe8b1e..3861d61fb265 100644 --- a/fs/xfs/xfs_icache.c +++ b/fs/xfs/xfs_icache.c @@ -870,7 +870,7 @@ xfs_eofblocks_worker( * based on the 'speculative_cow_prealloc_lifetime' tunable (5m by default). * (We'll just piggyback on the post-EOF prealloc space workqueue.) */ -STATIC void +void xfs_queue_cowblocks( struct xfs_mount *mp) { @@ -1536,8 +1536,23 @@ xfs_inode_free_quota_eofblocks( return __xfs_inode_free_quota_eofblocks(ip, xfs_icache_free_eofblocks); } +static inline unsigned long +xfs_iflag_for_tag( + int tag) +{ + switch (tag) { + case XFS_ICI_EOFBLOCKS_TAG: + return XFS_IEOFBLOCKS; + case XFS_ICI_COWBLOCKS_TAG: + return XFS_ICOWBLOCKS; + default: + ASSERT(0); + return 0; + } +} + static void -__xfs_inode_set_eofblocks_tag( +__xfs_inode_set_blocks_tag( xfs_inode_t *ip, void (*execute)(struct xfs_mount *mp), void (*set_tp)(struct xfs_mount *mp, xfs_agnumber_t agno, @@ -1552,10 +1567,10 @@ __xfs_inode_set_eofblocks_tag( * Don't bother locking the AG and looking up in the radix trees * if we already know that we have the tag set. */ - if (ip->i_flags & XFS_IEOFBLOCKS) + if (ip->i_flags & xfs_iflag_for_tag(tag)) return; spin_lock(&ip->i_flags_lock); - ip->i_flags |= XFS_IEOFBLOCKS; + ip->i_flags |= xfs_iflag_for_tag(tag); spin_unlock(&ip->i_flags_lock); pag = xfs_perag_get(mp, XFS_INO_TO_AGNO(mp, ip->i_ino)); @@ -1587,13 +1602,13 @@ xfs_inode_set_eofblocks_tag( xfs_inode_t *ip) { trace_xfs_inode_set_eofblocks_tag(ip); - return __xfs_inode_set_eofblocks_tag(ip, xfs_queue_eofblocks, + return __xfs_inode_set_blocks_tag(ip, xfs_queue_eofblocks, trace_xfs_perag_set_eofblocks, XFS_ICI_EOFBLOCKS_TAG); } static void -__xfs_inode_clear_eofblocks_tag( +__xfs_inode_clear_blocks_tag( xfs_inode_t *ip, void (*clear_tp)(struct xfs_mount *mp, xfs_agnumber_t agno, int error, unsigned long caller_ip), @@ -1603,7 +1618,7 @@ __xfs_inode_clear_eofblocks_tag( struct xfs_perag *pag; spin_lock(&ip->i_flags_lock); - ip->i_flags &= ~XFS_IEOFBLOCKS; + ip->i_flags &= ~xfs_iflag_for_tag(tag); spin_unlock(&ip->i_flags_lock); pag = xfs_perag_get(mp, XFS_INO_TO_AGNO(mp, ip->i_ino)); @@ -1630,7 +1645,7 @@ xfs_inode_clear_eofblocks_tag( xfs_inode_t *ip) { trace_xfs_inode_clear_eofblocks_tag(ip); - return __xfs_inode_clear_eofblocks_tag(ip, + return __xfs_inode_clear_blocks_tag(ip, trace_xfs_perag_clear_eofblocks, XFS_ICI_EOFBLOCKS_TAG); } @@ -1724,7 +1739,7 @@ xfs_inode_set_cowblocks_tag( xfs_inode_t *ip) { trace_xfs_inode_set_cowblocks_tag(ip); - return __xfs_inode_set_eofblocks_tag(ip, xfs_queue_cowblocks, + return __xfs_inode_set_blocks_tag(ip, xfs_queue_cowblocks, trace_xfs_perag_set_cowblocks, XFS_ICI_COWBLOCKS_TAG); } @@ -1734,6 +1749,6 @@ xfs_inode_clear_cowblocks_tag( xfs_inode_t *ip) { trace_xfs_inode_clear_cowblocks_tag(ip); - return __xfs_inode_clear_eofblocks_tag(ip, + return __xfs_inode_clear_blocks_tag(ip, trace_xfs_perag_clear_cowblocks, XFS_ICI_COWBLOCKS_TAG); } diff --git a/fs/xfs/xfs_icache.h b/fs/xfs/xfs_icache.h index bff4d85e5498..d4a77588eca1 100644 --- a/fs/xfs/xfs_icache.h +++ b/fs/xfs/xfs_icache.h @@ -81,6 +81,7 @@ void xfs_inode_clear_cowblocks_tag(struct xfs_inode *ip); int xfs_icache_free_cowblocks(struct xfs_mount *, struct xfs_eofblocks *); int xfs_inode_free_quota_cowblocks(struct xfs_inode *ip); void xfs_cowblocks_worker(struct work_struct *); +void xfs_queue_cowblocks(struct xfs_mount *); int xfs_inode_ag_iterator(struct xfs_mount *mp, int (*execute)(struct xfs_inode *ip, int flags, void *args), diff --git a/fs/xfs/xfs_inode.c b/fs/xfs/xfs_inode.c index b41952a4ddd8..6f95bdb408ce 100644 --- a/fs/xfs/xfs_inode.c +++ b/fs/xfs/xfs_inode.c @@ -1487,6 +1487,24 @@ xfs_link( return error; } +/* Clear the reflink flag and the cowblocks tag if possible. */ +static void +xfs_itruncate_clear_reflink_flags( + struct xfs_inode *ip) +{ + struct xfs_ifork *dfork; + struct xfs_ifork *cfork; + + if (!xfs_is_reflink_inode(ip)) + return; + dfork = XFS_IFORK_PTR(ip, XFS_DATA_FORK); + cfork = XFS_IFORK_PTR(ip, XFS_COW_FORK); + if (dfork->if_bytes == 0 && cfork->if_bytes == 0) + ip->i_d.di_flags2 &= ~XFS_DIFLAG2_REFLINK; + if (cfork->if_bytes == 0) + xfs_inode_clear_cowblocks_tag(ip); +} + /* * Free up the underlying blocks past new_size. The new size must be smaller * than the current size. This routine can be used both for the attribute and @@ -1583,15 +1601,7 @@ xfs_itruncate_extents( if (error) goto out; - /* - * Clear the reflink flag if there are no data fork blocks and - * there are no extents staged in the cow fork. - */ - if (xfs_is_reflink_inode(ip) && ip->i_cnextents == 0) { - if (ip->i_d.di_nblocks == 0) - ip->i_d.di_flags2 &= ~XFS_DIFLAG2_REFLINK; - xfs_inode_clear_cowblocks_tag(ip); - } + xfs_itruncate_clear_reflink_flags(ip); /* * Always re-log the inode so that our permanent transaction can keep diff --git a/fs/xfs/xfs_inode.h b/fs/xfs/xfs_inode.h index b2136af9289f..d383e392ec9d 100644 --- a/fs/xfs/xfs_inode.h +++ b/fs/xfs/xfs_inode.h @@ -232,6 +232,7 @@ static inline bool xfs_is_reflink_inode(struct xfs_inode *ip) * log recovery to replay a bmap operation on the inode. */ #define XFS_IRECOVERY (1 << 11) +#define XFS_ICOWBLOCKS (1 << 12)/* has the cowblocks tag set */ /* * Per-lifetime flags need to be reset when re-using a reclaimable inode during diff --git a/fs/xfs/xfs_reflink.c b/fs/xfs/xfs_reflink.c index cf7c8f81bebb..47aea2e82c26 100644 --- a/fs/xfs/xfs_reflink.c +++ b/fs/xfs/xfs_reflink.c @@ -454,6 +454,8 @@ retry: if (error) goto out_bmap_cancel; + xfs_inode_set_cowblocks_tag(ip); + /* Finish up. */ error = xfs_defer_finish(&tp, &dfops); if (error) @@ -490,8 +492,9 @@ xfs_reflink_find_cow_mapping( struct xfs_iext_cursor icur; ASSERT(xfs_isilocked(ip, XFS_ILOCK_EXCL | XFS_ILOCK_SHARED)); - ASSERT(xfs_is_reflink_inode(ip)); + if (!xfs_is_reflink_inode(ip)) + return false; offset_fsb = XFS_B_TO_FSBT(ip->i_mount, offset); if (!xfs_iext_lookup_extent(ip, ifp, offset_fsb, &icur, &got)) return false; @@ -610,6 +613,9 @@ xfs_reflink_cancel_cow_blocks( /* Remove the mapping from the CoW fork. */ xfs_bmap_del_extent_cow(ip, &icur, &got, &del); + } else { + /* Didn't do anything, push cursor back. */ + xfs_iext_prev(ifp, &icur); } next_extent: if (!xfs_iext_get_extent(ifp, &icur, &got)) @@ -725,7 +731,7 @@ xfs_reflink_end_cow( (unsigned int)(end_fsb - offset_fsb), XFS_DATA_FORK); error = xfs_trans_alloc(ip->i_mount, &M_RES(ip->i_mount)->tr_write, - resblks, 0, 0, &tp); + resblks, 0, XFS_TRANS_RESERVE, &tp); if (error) goto out; @@ -1291,6 +1297,17 @@ xfs_reflink_remap_range( trace_xfs_reflink_remap_range(src, pos_in, len, dest, pos_out); + /* + * Clear out post-eof preallocations because we don't have page cache + * backing the delayed allocations and they'll never get freed on + * their own. + */ + if (xfs_can_free_eofblocks(dest, true)) { + ret = xfs_free_eofblocks(dest); + if (ret) + goto out_unlock; + } + /* Set flags and remap blocks. */ ret = xfs_reflink_set_inode_flag(src, dest); if (ret) diff --git a/fs/xfs/xfs_super.c b/fs/xfs/xfs_super.c index 5122d3021117..1dacccc367f8 100644 --- a/fs/xfs/xfs_super.c +++ b/fs/xfs/xfs_super.c @@ -1360,6 +1360,7 @@ xfs_fs_remount( xfs_force_shutdown(mp, SHUTDOWN_CORRUPT_INCORE); return error; } + xfs_queue_cowblocks(mp); /* Create the per-AG metadata reservation pool .*/ error = xfs_fs_reserve_ag_blocks(mp); @@ -1369,6 +1370,14 @@ xfs_fs_remount( /* rw -> ro */ if (!(mp->m_flags & XFS_MOUNT_RDONLY) && (*flags & SB_RDONLY)) { + /* Get rid of any leftover CoW reservations... */ + cancel_delayed_work_sync(&mp->m_cowblocks_work); + error = xfs_icache_free_cowblocks(mp, NULL); + if (error) { + xfs_force_shutdown(mp, SHUTDOWN_CORRUPT_INCORE); + return error; + } + /* Free the per-AG metadata reservation pool. */ error = xfs_fs_unreserve_ag_blocks(mp); if (error) { diff --git a/include/asm-generic/mm_hooks.h b/include/asm-generic/mm_hooks.h index ea189d88a3cc..8ac4e68a12f0 100644 --- a/include/asm-generic/mm_hooks.h +++ b/include/asm-generic/mm_hooks.h @@ -7,9 +7,10 @@ #ifndef _ASM_GENERIC_MM_HOOKS_H #define _ASM_GENERIC_MM_HOOKS_H -static inline void arch_dup_mmap(struct mm_struct *oldmm, - struct mm_struct *mm) +static inline int arch_dup_mmap(struct mm_struct *oldmm, + struct mm_struct *mm) { + return 0; } static inline void arch_exit_mmap(struct mm_struct *mm) diff --git a/include/asm-generic/pgtable.h b/include/asm-generic/pgtable.h index b234d54f2cb6..868e68561f91 100644 --- a/include/asm-generic/pgtable.h +++ b/include/asm-generic/pgtable.h @@ -1025,6 +1025,11 @@ static inline int pmd_clear_huge(pmd_t *pmd) struct file; int phys_mem_access_prot_allowed(struct file *file, unsigned long pfn, unsigned long size, pgprot_t *vma_prot); + +#ifndef CONFIG_X86_ESPFIX64 +static inline void init_espfix_bsp(void) { } +#endif + #endif /* !__ASSEMBLY__ */ #ifndef io_remap_pfn_range diff --git a/include/crypto/mcryptd.h b/include/crypto/mcryptd.h index cceafa01f907..b67404fc4b34 100644 --- a/include/crypto/mcryptd.h +++ b/include/crypto/mcryptd.h @@ -27,6 +27,7 @@ static inline struct mcryptd_ahash *__mcryptd_ahash_cast( struct mcryptd_cpu_queue { struct crypto_queue queue; + spinlock_t q_lock; struct work_struct work; }; diff --git a/include/kvm/arm_arch_timer.h b/include/kvm/arm_arch_timer.h index 6e45608b2399..9da6ce22803f 100644 --- a/include/kvm/arm_arch_timer.h +++ b/include/kvm/arm_arch_timer.h @@ -62,7 +62,7 @@ struct arch_timer_cpu { bool enabled; }; -int kvm_timer_hyp_init(void); +int kvm_timer_hyp_init(bool); int kvm_timer_enable(struct kvm_vcpu *vcpu); int kvm_timer_vcpu_reset(struct kvm_vcpu *vcpu); void kvm_timer_vcpu_init(struct kvm_vcpu *vcpu); diff --git a/include/linux/bio.h b/include/linux/bio.h index 82f0c8fd7be8..23d29b39f71e 100644 --- a/include/linux/bio.h +++ b/include/linux/bio.h @@ -492,6 +492,8 @@ extern unsigned int bvec_nr_vecs(unsigned short idx); #define bio_set_dev(bio, bdev) \ do { \ + if ((bio)->bi_disk != (bdev)->bd_disk) \ + bio_clear_flag(bio, BIO_THROTTLED);\ (bio)->bi_disk = (bdev)->bd_disk; \ (bio)->bi_partno = (bdev)->bd_partno; \ } while (0) diff --git a/include/linux/blk_types.h b/include/linux/blk_types.h index a1e628e032da..9e7d8bd776d2 100644 --- a/include/linux/blk_types.h +++ b/include/linux/blk_types.h @@ -50,8 +50,6 @@ struct blk_issue_stat { struct bio { struct bio *bi_next; /* request queue link */ struct gendisk *bi_disk; - u8 bi_partno; - blk_status_t bi_status; unsigned int bi_opf; /* bottom bits req flags, * top bits REQ_OP. Use * accessors. @@ -59,8 +57,8 @@ struct bio { unsigned short bi_flags; /* status, etc and bvec pool number */ unsigned short bi_ioprio; unsigned short bi_write_hint; - - struct bvec_iter bi_iter; + blk_status_t bi_status; + u8 bi_partno; /* Number of segments in this BIO after * physical address coalescing is performed. @@ -74,8 +72,9 @@ struct bio { unsigned int bi_seg_front_size; unsigned int bi_seg_back_size; - atomic_t __bi_remaining; + struct bvec_iter bi_iter; + atomic_t __bi_remaining; bio_end_io_t *bi_end_io; void *bi_private; diff --git a/include/linux/blkdev.h b/include/linux/blkdev.h index 8089ca17db9a..0ce8a372d506 100644 --- a/include/linux/blkdev.h +++ b/include/linux/blkdev.h @@ -135,7 +135,7 @@ typedef __u32 __bitwise req_flags_t; struct request { struct list_head queuelist; union { - call_single_data_t csd; + struct __call_single_data csd; u64 fifo_time; }; @@ -241,14 +241,24 @@ struct request { struct request *next_rq; }; +static inline bool blk_op_is_scsi(unsigned int op) +{ + return op == REQ_OP_SCSI_IN || op == REQ_OP_SCSI_OUT; +} + +static inline bool blk_op_is_private(unsigned int op) +{ + return op == REQ_OP_DRV_IN || op == REQ_OP_DRV_OUT; +} + static inline bool blk_rq_is_scsi(struct request *rq) { - return req_op(rq) == REQ_OP_SCSI_IN || req_op(rq) == REQ_OP_SCSI_OUT; + return blk_op_is_scsi(req_op(rq)); } static inline bool blk_rq_is_private(struct request *rq) { - return req_op(rq) == REQ_OP_DRV_IN || req_op(rq) == REQ_OP_DRV_OUT; + return blk_op_is_private(req_op(rq)); } static inline bool blk_rq_is_passthrough(struct request *rq) @@ -256,6 +266,13 @@ static inline bool blk_rq_is_passthrough(struct request *rq) return blk_rq_is_scsi(rq) || blk_rq_is_private(rq); } +static inline bool bio_is_passthrough(struct bio *bio) +{ + unsigned op = bio_op(bio); + + return blk_op_is_scsi(op) || blk_op_is_private(op); +} + static inline unsigned short req_get_ioprio(struct request *req) { return req->ioprio; @@ -948,7 +965,7 @@ extern int blk_rq_prep_clone(struct request *rq, struct request *rq_src, extern void blk_rq_unprep_clone(struct request *rq); extern blk_status_t blk_insert_cloned_request(struct request_queue *q, struct request *rq); -extern int blk_rq_append_bio(struct request *rq, struct bio *bio); +extern int blk_rq_append_bio(struct request *rq, struct bio **bio); extern void blk_delay_queue(struct request_queue *, unsigned long); extern void blk_queue_split(struct request_queue *, struct bio **); extern void blk_recount_segments(struct request_queue *, struct bio *); diff --git a/include/linux/bpf_verifier.h b/include/linux/bpf_verifier.h index c561b986bab0..1632bb13ad8a 100644 --- a/include/linux/bpf_verifier.h +++ b/include/linux/bpf_verifier.h @@ -15,11 +15,11 @@ * In practice this is far bigger than any realistic pointer offset; this limit * ensures that umax_value + (int)off + (int)size cannot overflow a u64. */ -#define BPF_MAX_VAR_OFF (1ULL << 31) +#define BPF_MAX_VAR_OFF (1 << 29) /* Maximum variable size permitted for ARG_CONST_SIZE[_OR_ZERO]. This ensures * that converting umax_value to int cannot overflow. */ -#define BPF_MAX_VAR_SIZ INT_MAX +#define BPF_MAX_VAR_SIZ (1 << 29) /* Liveness marks, used for registers and spilled-regs (in stack slots). * Read marks propagate upwards until they find a write mark; they record that diff --git a/include/linux/ipv6.h b/include/linux/ipv6.h index cb18c6290ca8..8415bf1a9776 100644 --- a/include/linux/ipv6.h +++ b/include/linux/ipv6.h @@ -273,7 +273,8 @@ struct ipv6_pinfo { * 100: prefer care-of address */ dontfrag:1, - autoflowlabel:1; + autoflowlabel:1, + autoflowlabel_set:1; __u8 min_hopcount; __u8 tclass; __be32 rcv_flowinfo; diff --git a/include/linux/mfd/rtsx_pci.h b/include/linux/mfd/rtsx_pci.h index a2a1318a3d0c..c3d3f04d8cc6 100644 --- a/include/linux/mfd/rtsx_pci.h +++ b/include/linux/mfd/rtsx_pci.h @@ -915,10 +915,10 @@ enum PDEV_STAT {PDEV_STAT_IDLE, PDEV_STAT_RUN}; #define LTR_L1SS_PWR_GATE_CHECK_CARD_EN BIT(6) enum dev_aspm_mode { - DEV_ASPM_DISABLE = 0, DEV_ASPM_DYNAMIC, DEV_ASPM_BACKDOOR, DEV_ASPM_STATIC, + DEV_ASPM_DISABLE, }; /* diff --git a/include/linux/mlx5/driver.h b/include/linux/mlx5/driver.h index a886b51511ab..57b109c6e422 100644 --- a/include/linux/mlx5/driver.h +++ b/include/linux/mlx5/driver.h @@ -556,6 +556,7 @@ struct mlx5_core_sriov { }; struct mlx5_irq_info { + cpumask_var_t mask; char name[MLX5_MAX_IRQ_NAME]; }; @@ -1048,7 +1049,7 @@ int mlx5_create_map_eq(struct mlx5_core_dev *dev, struct mlx5_eq *eq, u8 vecidx, enum mlx5_eq_type type); int mlx5_destroy_unmap_eq(struct mlx5_core_dev *dev, struct mlx5_eq *eq); int mlx5_start_eqs(struct mlx5_core_dev *dev); -int mlx5_stop_eqs(struct mlx5_core_dev *dev); +void mlx5_stop_eqs(struct mlx5_core_dev *dev); int mlx5_vector2eqn(struct mlx5_core_dev *dev, int vector, int *eqn, unsigned int *irqn); int mlx5_core_attach_mcg(struct mlx5_core_dev *dev, union ib_gid *mgid, u32 qpn); diff --git a/include/linux/mlx5/mlx5_ifc.h b/include/linux/mlx5/mlx5_ifc.h index 38a7577a9ce7..d44ec5f41d4a 100644 --- a/include/linux/mlx5/mlx5_ifc.h +++ b/include/linux/mlx5/mlx5_ifc.h @@ -147,7 +147,7 @@ enum { MLX5_CMD_OP_ALLOC_Q_COUNTER = 0x771, MLX5_CMD_OP_DEALLOC_Q_COUNTER = 0x772, MLX5_CMD_OP_QUERY_Q_COUNTER = 0x773, - MLX5_CMD_OP_SET_RATE_LIMIT = 0x780, + MLX5_CMD_OP_SET_PP_RATE_LIMIT = 0x780, MLX5_CMD_OP_QUERY_RATE_LIMIT = 0x781, MLX5_CMD_OP_CREATE_SCHEDULING_ELEMENT = 0x782, MLX5_CMD_OP_DESTROY_SCHEDULING_ELEMENT = 0x783, @@ -7239,7 +7239,7 @@ struct mlx5_ifc_add_vxlan_udp_dport_in_bits { u8 vxlan_udp_port[0x10]; }; -struct mlx5_ifc_set_rate_limit_out_bits { +struct mlx5_ifc_set_pp_rate_limit_out_bits { u8 status[0x8]; u8 reserved_at_8[0x18]; @@ -7248,7 +7248,7 @@ struct mlx5_ifc_set_rate_limit_out_bits { u8 reserved_at_40[0x40]; }; -struct mlx5_ifc_set_rate_limit_in_bits { +struct mlx5_ifc_set_pp_rate_limit_in_bits { u8 opcode[0x10]; u8 reserved_at_10[0x10]; @@ -7261,6 +7261,8 @@ struct mlx5_ifc_set_rate_limit_in_bits { u8 reserved_at_60[0x20]; u8 rate_limit[0x20]; + + u8 reserved_at_a0[0x160]; }; struct mlx5_ifc_access_register_out_bits { diff --git a/include/linux/spi/spi.h b/include/linux/spi/spi.h index 7b2170bfd6e7..bc6bb325d1bf 100644 --- a/include/linux/spi/spi.h +++ b/include/linux/spi/spi.h @@ -126,7 +126,7 @@ void spi_statistics_add_transfer_stats(struct spi_statistics *stats, * for that name. This appears in the sysfs "modalias" attribute * for driver coldplugging, and in uevents used for hotplugging * @cs_gpio: gpio number of the chipselect line (optional, -ENOENT when - * when not using a GPIO line) + * not using a GPIO line) * * @statistics: statistics for the spi_device * diff --git a/include/net/cfg80211.h b/include/net/cfg80211.h index 8b8118a7fadb..cb4d92b79cd9 100644 --- a/include/net/cfg80211.h +++ b/include/net/cfg80211.h @@ -3226,7 +3226,6 @@ struct cfg80211_ops { * @WIPHY_FLAG_IBSS_RSN: The device supports IBSS RSN. * @WIPHY_FLAG_MESH_AUTH: The device supports mesh authentication by routing * auth frames to userspace. See @NL80211_MESH_SETUP_USERSPACE_AUTH. - * @WIPHY_FLAG_SUPPORTS_SCHED_SCAN: The device supports scheduled scans. * @WIPHY_FLAG_SUPPORTS_FW_ROAM: The device supports roaming feature in the * firmware. * @WIPHY_FLAG_AP_UAPSD: The device supports uapsd on AP. diff --git a/include/net/pkt_cls.h b/include/net/pkt_cls.h index 0105445cab83..8e08b6da72f3 100644 --- a/include/net/pkt_cls.h +++ b/include/net/pkt_cls.h @@ -694,9 +694,7 @@ struct tc_cls_matchall_offload { }; enum tc_clsbpf_command { - TC_CLSBPF_ADD, - TC_CLSBPF_REPLACE, - TC_CLSBPF_DESTROY, + TC_CLSBPF_OFFLOAD, TC_CLSBPF_STATS, }; @@ -705,6 +703,7 @@ struct tc_cls_bpf_offload { enum tc_clsbpf_command command; struct tcf_exts *exts; struct bpf_prog *prog; + struct bpf_prog *oldprog; const char *name; bool exts_integrated; u32 gen_flags; diff --git a/include/trace/events/clk.h b/include/trace/events/clk.h index 758607226bfd..2cd449328aee 100644 --- a/include/trace/events/clk.h +++ b/include/trace/events/clk.h @@ -134,12 +134,12 @@ DECLARE_EVENT_CLASS(clk_parent, TP_STRUCT__entry( __string( name, core->name ) - __string( pname, parent->name ) + __string( pname, parent ? parent->name : "none" ) ), TP_fast_assign( __assign_str(name, core->name); - __assign_str(pname, parent->name); + __assign_str(pname, parent ? parent->name : "none"); ), TP_printk("%s %s", __get_str(name), __get_str(pname)) diff --git a/include/trace/events/kvm.h b/include/trace/events/kvm.h index e4b0b8e09932..2c735a3e6613 100644 --- a/include/trace/events/kvm.h +++ b/include/trace/events/kvm.h @@ -211,7 +211,7 @@ TRACE_EVENT(kvm_ack_irq, { KVM_TRACE_MMIO_WRITE, "write" } TRACE_EVENT(kvm_mmio, - TP_PROTO(int type, int len, u64 gpa, u64 val), + TP_PROTO(int type, int len, u64 gpa, void *val), TP_ARGS(type, len, gpa, val), TP_STRUCT__entry( @@ -225,7 +225,10 @@ TRACE_EVENT(kvm_mmio, __entry->type = type; __entry->len = len; __entry->gpa = gpa; - __entry->val = val; + __entry->val = 0; + if (val) + memcpy(&__entry->val, val, + min_t(u32, sizeof(__entry->val), len)); ), TP_printk("mmio %s len %u gpa 0x%llx val 0x%llx", diff --git a/include/xen/balloon.h b/include/xen/balloon.h index 4914b93a23f2..61f410fd74e4 100644 --- a/include/xen/balloon.h +++ b/include/xen/balloon.h @@ -44,3 +44,8 @@ static inline void xen_balloon_init(void) { } #endif + +#ifdef CONFIG_XEN_BALLOON_MEMORY_HOTPLUG +struct resource; +void arch_xen_balloon_init(struct resource *hostmem_resource); +#endif diff --git a/init/main.c b/init/main.c index e96e3a14533c..7b606fc48482 100644 --- a/init/main.c +++ b/init/main.c @@ -504,6 +504,8 @@ static void __init mm_init(void) pgtable_init(); vmalloc_init(); ioremap_huge_init(); + /* Should be run before the first non-init thread is created */ + init_espfix_bsp(); } asmlinkage __visible void __init start_kernel(void) @@ -679,10 +681,6 @@ asmlinkage __visible void __init start_kernel(void) if (efi_enabled(EFI_RUNTIME_SERVICES)) efi_enter_virtual_mode(); #endif -#ifdef CONFIG_X86_ESPFIX64 - /* Should be run before the first non-init thread is created */ - init_espfix_bsp(); -#endif thread_stack_cache_init(); cred_init(); fork_init(); diff --git a/kernel/bpf/verifier.c b/kernel/bpf/verifier.c index d4593571c404..04b24876cd23 100644 --- a/kernel/bpf/verifier.c +++ b/kernel/bpf/verifier.c @@ -1059,6 +1059,11 @@ static int check_ptr_alignment(struct bpf_verifier_env *env, break; case PTR_TO_STACK: pointer_desc = "stack "; + /* The stack spill tracking logic in check_stack_write() + * and check_stack_read() relies on stack accesses being + * aligned. + */ + strict = true; break; default: break; @@ -1067,6 +1072,29 @@ static int check_ptr_alignment(struct bpf_verifier_env *env, strict); } +/* truncate register to smaller size (in bytes) + * must be called with size < BPF_REG_SIZE + */ +static void coerce_reg_to_size(struct bpf_reg_state *reg, int size) +{ + u64 mask; + + /* clear high bits in bit representation */ + reg->var_off = tnum_cast(reg->var_off, size); + + /* fix arithmetic bounds */ + mask = ((u64)1 << (size * 8)) - 1; + if ((reg->umin_value & ~mask) == (reg->umax_value & ~mask)) { + reg->umin_value &= mask; + reg->umax_value &= mask; + } else { + reg->umin_value = 0; + reg->umax_value = mask; + } + reg->smin_value = reg->umin_value; + reg->smax_value = reg->umax_value; +} + /* check whether memory at (regno + off) is accessible for t = (read | write) * if t==write, value_regno is a register which value is stored into memory * if t==read, value_regno is a register which will receive the value from memory @@ -1200,9 +1228,7 @@ static int check_mem_access(struct bpf_verifier_env *env, int insn_idx, u32 regn if (!err && size < BPF_REG_SIZE && value_regno >= 0 && t == BPF_READ && regs[value_regno].type == SCALAR_VALUE) { /* b/h/w load zero-extends, mark upper bits as known 0 */ - regs[value_regno].var_off = - tnum_cast(regs[value_regno].var_off, size); - __update_reg_bounds(®s[value_regno]); + coerce_reg_to_size(®s[value_regno], size); } return err; } @@ -1282,6 +1308,7 @@ static int check_stack_boundary(struct bpf_verifier_env *env, int regno, tnum_strn(tn_buf, sizeof(tn_buf), regs[regno].var_off); verbose(env, "invalid variable stack read R%d var_off=%s\n", regno, tn_buf); + return -EACCES; } off = regs[regno].off + regs[regno].var_off.value; if (off >= 0 || off < -MAX_BPF_STACK || off + access_size > 0 || @@ -1674,7 +1701,13 @@ static int check_call(struct bpf_verifier_env *env, int func_id, int insn_idx) return -EINVAL; } + /* With LD_ABS/IND some JITs save/restore skb from r1. */ changes_data = bpf_helper_changes_pkt_data(fn->func); + if (changes_data && fn->arg1_type != ARG_PTR_TO_CTX) { + verbose(env, "kernel subsystem misconfigured func %s#%d: r1 != ctx\n", + func_id_name(func_id), func_id); + return -EINVAL; + } memset(&meta, 0, sizeof(meta)); meta.pkt_access = fn->pkt_access; @@ -1766,14 +1799,6 @@ static int check_call(struct bpf_verifier_env *env, int func_id, int insn_idx) return 0; } -static void coerce_reg_to_32(struct bpf_reg_state *reg) -{ - /* clear high 32 bits */ - reg->var_off = tnum_cast(reg->var_off, 4); - /* Update bounds */ - __update_reg_bounds(reg); -} - static bool signed_add_overflows(s64 a, s64 b) { /* Do the add in u64, where overflow is well-defined */ @@ -1794,6 +1819,41 @@ static bool signed_sub_overflows(s64 a, s64 b) return res > a; } +static bool check_reg_sane_offset(struct bpf_verifier_env *env, + const struct bpf_reg_state *reg, + enum bpf_reg_type type) +{ + bool known = tnum_is_const(reg->var_off); + s64 val = reg->var_off.value; + s64 smin = reg->smin_value; + + if (known && (val >= BPF_MAX_VAR_OFF || val <= -BPF_MAX_VAR_OFF)) { + verbose(env, "math between %s pointer and %lld is not allowed\n", + reg_type_str[type], val); + return false; + } + + if (reg->off >= BPF_MAX_VAR_OFF || reg->off <= -BPF_MAX_VAR_OFF) { + verbose(env, "%s pointer offset %d is not allowed\n", + reg_type_str[type], reg->off); + return false; + } + + if (smin == S64_MIN) { + verbose(env, "math between %s pointer and register with unbounded min value is not allowed\n", + reg_type_str[type]); + return false; + } + + if (smin >= BPF_MAX_VAR_OFF || smin <= -BPF_MAX_VAR_OFF) { + verbose(env, "value %lld makes %s pointer be out of bounds\n", + smin, reg_type_str[type]); + return false; + } + + return true; +} + /* Handles arithmetic on a pointer and a scalar: computes new min/max and var_off. * Caller should also handle BPF_MOV case separately. * If we return -EACCES, caller may want to try again treating pointer as a @@ -1830,29 +1890,25 @@ static int adjust_ptr_min_max_vals(struct bpf_verifier_env *env, if (BPF_CLASS(insn->code) != BPF_ALU64) { /* 32-bit ALU ops on pointers produce (meaningless) scalars */ - if (!env->allow_ptr_leaks) - verbose(env, - "R%d 32-bit pointer arithmetic prohibited\n", - dst); + verbose(env, + "R%d 32-bit pointer arithmetic prohibited\n", + dst); return -EACCES; } if (ptr_reg->type == PTR_TO_MAP_VALUE_OR_NULL) { - if (!env->allow_ptr_leaks) - verbose(env, "R%d pointer arithmetic on PTR_TO_MAP_VALUE_OR_NULL prohibited, null-check it first\n", - dst); + verbose(env, "R%d pointer arithmetic on PTR_TO_MAP_VALUE_OR_NULL prohibited, null-check it first\n", + dst); return -EACCES; } if (ptr_reg->type == CONST_PTR_TO_MAP) { - if (!env->allow_ptr_leaks) - verbose(env, "R%d pointer arithmetic on CONST_PTR_TO_MAP prohibited\n", - dst); + verbose(env, "R%d pointer arithmetic on CONST_PTR_TO_MAP prohibited\n", + dst); return -EACCES; } if (ptr_reg->type == PTR_TO_PACKET_END) { - if (!env->allow_ptr_leaks) - verbose(env, "R%d pointer arithmetic on PTR_TO_PACKET_END prohibited\n", - dst); + verbose(env, "R%d pointer arithmetic on PTR_TO_PACKET_END prohibited\n", + dst); return -EACCES; } @@ -1862,6 +1918,10 @@ static int adjust_ptr_min_max_vals(struct bpf_verifier_env *env, dst_reg->type = ptr_reg->type; dst_reg->id = ptr_reg->id; + if (!check_reg_sane_offset(env, off_reg, ptr_reg->type) || + !check_reg_sane_offset(env, ptr_reg, ptr_reg->type)) + return -EINVAL; + switch (opcode) { case BPF_ADD: /* We can take a fixed offset as long as it doesn't overflow @@ -1915,9 +1975,8 @@ static int adjust_ptr_min_max_vals(struct bpf_verifier_env *env, case BPF_SUB: if (dst_reg == off_reg) { /* scalar -= pointer. Creates an unknown scalar */ - if (!env->allow_ptr_leaks) - verbose(env, "R%d tried to subtract pointer from scalar\n", - dst); + verbose(env, "R%d tried to subtract pointer from scalar\n", + dst); return -EACCES; } /* We don't allow subtraction from FP, because (according to @@ -1925,9 +1984,8 @@ static int adjust_ptr_min_max_vals(struct bpf_verifier_env *env, * be able to deal with it. */ if (ptr_reg->type == PTR_TO_STACK) { - if (!env->allow_ptr_leaks) - verbose(env, "R%d subtraction from stack pointer prohibited\n", - dst); + verbose(env, "R%d subtraction from stack pointer prohibited\n", + dst); return -EACCES; } if (known && (ptr_reg->off - smin_val == @@ -1976,28 +2034,30 @@ static int adjust_ptr_min_max_vals(struct bpf_verifier_env *env, case BPF_AND: case BPF_OR: case BPF_XOR: - /* bitwise ops on pointers are troublesome, prohibit for now. - * (However, in principle we could allow some cases, e.g. - * ptr &= ~3 which would reduce min_value by 3.) - */ - if (!env->allow_ptr_leaks) - verbose(env, "R%d bitwise operator %s on pointer prohibited\n", - dst, bpf_alu_string[opcode >> 4]); + /* bitwise ops on pointers are troublesome, prohibit. */ + verbose(env, "R%d bitwise operator %s on pointer prohibited\n", + dst, bpf_alu_string[opcode >> 4]); return -EACCES; default: /* other operators (e.g. MUL,LSH) produce non-pointer results */ - if (!env->allow_ptr_leaks) - verbose(env, "R%d pointer arithmetic with %s operator prohibited\n", - dst, bpf_alu_string[opcode >> 4]); + verbose(env, "R%d pointer arithmetic with %s operator prohibited\n", + dst, bpf_alu_string[opcode >> 4]); return -EACCES; } + if (!check_reg_sane_offset(env, dst_reg, ptr_reg->type)) + return -EINVAL; + __update_reg_bounds(dst_reg); __reg_deduce_bounds(dst_reg); __reg_bound_offset(dst_reg); return 0; } +/* WARNING: This function does calculations on 64-bit values, but the actual + * execution may occur on 32-bit values. Therefore, things like bitshifts + * need extra checks in the 32-bit case. + */ static int adjust_scalar_min_max_vals(struct bpf_verifier_env *env, struct bpf_insn *insn, struct bpf_reg_state *dst_reg, @@ -2008,12 +2068,8 @@ static int adjust_scalar_min_max_vals(struct bpf_verifier_env *env, bool src_known, dst_known; s64 smin_val, smax_val; u64 umin_val, umax_val; + u64 insn_bitness = (BPF_CLASS(insn->code) == BPF_ALU64) ? 64 : 32; - if (BPF_CLASS(insn->code) != BPF_ALU64) { - /* 32-bit ALU ops are (32,32)->64 */ - coerce_reg_to_32(dst_reg); - coerce_reg_to_32(&src_reg); - } smin_val = src_reg.smin_value; smax_val = src_reg.smax_value; umin_val = src_reg.umin_value; @@ -2021,6 +2077,12 @@ static int adjust_scalar_min_max_vals(struct bpf_verifier_env *env, src_known = tnum_is_const(src_reg.var_off); dst_known = tnum_is_const(dst_reg->var_off); + if (!src_known && + opcode != BPF_ADD && opcode != BPF_SUB && opcode != BPF_AND) { + __mark_reg_unknown(dst_reg); + return 0; + } + switch (opcode) { case BPF_ADD: if (signed_add_overflows(dst_reg->smin_value, smin_val) || @@ -2149,9 +2211,9 @@ static int adjust_scalar_min_max_vals(struct bpf_verifier_env *env, __update_reg_bounds(dst_reg); break; case BPF_LSH: - if (umax_val > 63) { - /* Shifts greater than 63 are undefined. This includes - * shifts by a negative number. + if (umax_val >= insn_bitness) { + /* Shifts greater than 31 or 63 are undefined. + * This includes shifts by a negative number. */ mark_reg_unknown(env, regs, insn->dst_reg); break; @@ -2177,27 +2239,29 @@ static int adjust_scalar_min_max_vals(struct bpf_verifier_env *env, __update_reg_bounds(dst_reg); break; case BPF_RSH: - if (umax_val > 63) { - /* Shifts greater than 63 are undefined. This includes - * shifts by a negative number. + if (umax_val >= insn_bitness) { + /* Shifts greater than 31 or 63 are undefined. + * This includes shifts by a negative number. */ mark_reg_unknown(env, regs, insn->dst_reg); break; } - /* BPF_RSH is an unsigned shift, so make the appropriate casts */ - if (dst_reg->smin_value < 0) { - if (umin_val) { - /* Sign bit will be cleared */ - dst_reg->smin_value = 0; - } else { - /* Lost sign bit information */ - dst_reg->smin_value = S64_MIN; - dst_reg->smax_value = S64_MAX; - } - } else { - dst_reg->smin_value = - (u64)(dst_reg->smin_value) >> umax_val; - } + /* BPF_RSH is an unsigned shift. If the value in dst_reg might + * be negative, then either: + * 1) src_reg might be zero, so the sign bit of the result is + * unknown, so we lose our signed bounds + * 2) it's known negative, thus the unsigned bounds capture the + * signed bounds + * 3) the signed bounds cross zero, so they tell us nothing + * about the result + * If the value in dst_reg is known nonnegative, then again the + * unsigned bounts capture the signed bounds. + * Thus, in all cases it suffices to blow away our signed bounds + * and rely on inferring new ones from the unsigned bounds and + * var_off of the result. + */ + dst_reg->smin_value = S64_MIN; + dst_reg->smax_value = S64_MAX; if (src_known) dst_reg->var_off = tnum_rshift(dst_reg->var_off, umin_val); @@ -2213,6 +2277,12 @@ static int adjust_scalar_min_max_vals(struct bpf_verifier_env *env, break; } + if (BPF_CLASS(insn->code) != BPF_ALU64) { + /* 32-bit ALU ops are (32,32)->32 */ + coerce_reg_to_size(dst_reg, 4); + coerce_reg_to_size(&src_reg, 4); + } + __reg_deduce_bounds(dst_reg); __reg_bound_offset(dst_reg); return 0; @@ -2227,7 +2297,6 @@ static int adjust_reg_min_max_vals(struct bpf_verifier_env *env, struct bpf_reg_state *regs = cur_regs(env), *dst_reg, *src_reg; struct bpf_reg_state *ptr_reg = NULL, off_reg = {0}; u8 opcode = BPF_OP(insn->code); - int rc; dst_reg = ®s[insn->dst_reg]; src_reg = NULL; @@ -2238,43 +2307,29 @@ static int adjust_reg_min_max_vals(struct bpf_verifier_env *env, if (src_reg->type != SCALAR_VALUE) { if (dst_reg->type != SCALAR_VALUE) { /* Combining two pointers by any ALU op yields - * an arbitrary scalar. + * an arbitrary scalar. Disallow all math except + * pointer subtraction */ - if (!env->allow_ptr_leaks) { - verbose(env, "R%d pointer %s pointer prohibited\n", - insn->dst_reg, - bpf_alu_string[opcode >> 4]); - return -EACCES; + if (opcode == BPF_SUB){ + mark_reg_unknown(env, regs, insn->dst_reg); + return 0; } - mark_reg_unknown(env, regs, insn->dst_reg); - return 0; + verbose(env, "R%d pointer %s pointer prohibited\n", + insn->dst_reg, + bpf_alu_string[opcode >> 4]); + return -EACCES; } else { /* scalar += pointer * This is legal, but we have to reverse our * src/dest handling in computing the range */ - rc = adjust_ptr_min_max_vals(env, insn, - src_reg, dst_reg); - if (rc == -EACCES && env->allow_ptr_leaks) { - /* scalar += unknown scalar */ - __mark_reg_unknown(&off_reg); - return adjust_scalar_min_max_vals( - env, insn, - dst_reg, off_reg); - } - return rc; + return adjust_ptr_min_max_vals(env, insn, + src_reg, dst_reg); } } else if (ptr_reg) { /* pointer += scalar */ - rc = adjust_ptr_min_max_vals(env, insn, - dst_reg, src_reg); - if (rc == -EACCES && env->allow_ptr_leaks) { - /* unknown scalar += scalar */ - __mark_reg_unknown(dst_reg); - return adjust_scalar_min_max_vals( - env, insn, dst_reg, *src_reg); - } - return rc; + return adjust_ptr_min_max_vals(env, insn, + dst_reg, src_reg); } } else { /* Pretend the src is a reg with a known value, since we only @@ -2283,17 +2338,9 @@ static int adjust_reg_min_max_vals(struct bpf_verifier_env *env, off_reg.type = SCALAR_VALUE; __mark_reg_known(&off_reg, insn->imm); src_reg = &off_reg; - if (ptr_reg) { /* pointer += K */ - rc = adjust_ptr_min_max_vals(env, insn, - ptr_reg, src_reg); - if (rc == -EACCES && env->allow_ptr_leaks) { - /* unknown scalar += K */ - __mark_reg_unknown(dst_reg); - return adjust_scalar_min_max_vals( - env, insn, dst_reg, off_reg); - } - return rc; - } + if (ptr_reg) /* pointer += K */ + return adjust_ptr_min_max_vals(env, insn, + ptr_reg, src_reg); } /* Got here implies adding two SCALAR_VALUEs */ @@ -2390,17 +2437,20 @@ static int check_alu_op(struct bpf_verifier_env *env, struct bpf_insn *insn) return -EACCES; } mark_reg_unknown(env, regs, insn->dst_reg); - /* high 32 bits are known zero. */ - regs[insn->dst_reg].var_off = tnum_cast( - regs[insn->dst_reg].var_off, 4); - __update_reg_bounds(®s[insn->dst_reg]); + coerce_reg_to_size(®s[insn->dst_reg], 4); } } else { /* case: R = imm * remember the value we stored into this reg */ regs[insn->dst_reg].type = SCALAR_VALUE; - __mark_reg_known(regs + insn->dst_reg, insn->imm); + if (BPF_CLASS(insn->code) == BPF_ALU64) { + __mark_reg_known(regs + insn->dst_reg, + insn->imm); + } else { + __mark_reg_known(regs + insn->dst_reg, + (u32)insn->imm); + } } } else if (opcode > BPF_END) { @@ -3431,15 +3481,14 @@ static bool regsafe(struct bpf_reg_state *rold, struct bpf_reg_state *rcur, return range_within(rold, rcur) && tnum_in(rold->var_off, rcur->var_off); } else { - /* if we knew anything about the old value, we're not - * equal, because we can't know anything about the - * scalar value of the pointer in the new value. + /* We're trying to use a pointer in place of a scalar. + * Even if the scalar was unbounded, this could lead to + * pointer leaks because scalars are allowed to leak + * while pointers are not. We could make this safe in + * special cases if root is calling us, but it's + * probably not worth the hassle. */ - return rold->umin_value == 0 && - rold->umax_value == U64_MAX && - rold->smin_value == S64_MIN && - rold->smax_value == S64_MAX && - tnum_is_unknown(rold->var_off); + return false; } case PTR_TO_MAP_VALUE: /* If the new min/max/var_off satisfy the old ones and diff --git a/kernel/fork.c b/kernel/fork.c index 432eadf6b58c..2295fc69717f 100644 --- a/kernel/fork.c +++ b/kernel/fork.c @@ -721,8 +721,7 @@ static __latent_entropy int dup_mmap(struct mm_struct *mm, goto out; } /* a new mm has just been created */ - arch_dup_mmap(oldmm, mm); - retval = 0; + retval = arch_dup_mmap(oldmm, mm); out: up_write(&mm->mmap_sem); flush_tlb_mm(oldmm); diff --git a/lib/test_bpf.c b/lib/test_bpf.c index aa8812ae6776..9e9748089270 100644 --- a/lib/test_bpf.c +++ b/lib/test_bpf.c @@ -435,6 +435,41 @@ loop: return 0; } +static int bpf_fill_ld_abs_vlan_push_pop2(struct bpf_test *self) +{ + struct bpf_insn *insn; + + insn = kmalloc_array(16, sizeof(*insn), GFP_KERNEL); + if (!insn) + return -ENOMEM; + + /* Due to func address being non-const, we need to + * assemble this here. + */ + insn[0] = BPF_MOV64_REG(R6, R1); + insn[1] = BPF_LD_ABS(BPF_B, 0); + insn[2] = BPF_LD_ABS(BPF_H, 0); + insn[3] = BPF_LD_ABS(BPF_W, 0); + insn[4] = BPF_MOV64_REG(R7, R6); + insn[5] = BPF_MOV64_IMM(R6, 0); + insn[6] = BPF_MOV64_REG(R1, R7); + insn[7] = BPF_MOV64_IMM(R2, 1); + insn[8] = BPF_MOV64_IMM(R3, 2); + insn[9] = BPF_RAW_INSN(BPF_JMP | BPF_CALL, 0, 0, 0, + bpf_skb_vlan_push_proto.func - __bpf_call_base); + insn[10] = BPF_MOV64_REG(R6, R7); + insn[11] = BPF_LD_ABS(BPF_B, 0); + insn[12] = BPF_LD_ABS(BPF_H, 0); + insn[13] = BPF_LD_ABS(BPF_W, 0); + insn[14] = BPF_MOV64_IMM(R0, 42); + insn[15] = BPF_EXIT_INSN(); + + self->u.ptr.insns = insn; + self->u.ptr.len = 16; + + return 0; +} + static int bpf_fill_jump_around_ld_abs(struct bpf_test *self) { unsigned int len = BPF_MAXINSNS; @@ -6066,6 +6101,14 @@ static struct bpf_test tests[] = { {}, { {0x1, 0x42 } }, }, + { + "LD_ABS with helper changing skb data", + { }, + INTERNAL, + { 0x34 }, + { { ETH_HLEN, 42 } }, + .fill_helper = bpf_fill_ld_abs_vlan_push_pop2, + }, }; static struct net_device dev; diff --git a/mm/backing-dev.c b/mm/backing-dev.c index 84b2dc76f140..b5f940ce0143 100644 --- a/mm/backing-dev.c +++ b/mm/backing-dev.c @@ -882,13 +882,10 @@ int bdi_register_va(struct backing_dev_info *bdi, const char *fmt, va_list args) if (IS_ERR(dev)) return PTR_ERR(dev); - if (bdi_debug_register(bdi, dev_name(dev))) { - device_destroy(bdi_class, dev->devt); - return -ENOMEM; - } cgwb_bdi_register(bdi); bdi->dev = dev; + bdi_debug_register(bdi, dev_name(dev)); set_bit(WB_registered, &bdi->wb.state); spin_lock_bh(&bdi_lock); diff --git a/net/bridge/br_netlink.c b/net/bridge/br_netlink.c index d0ef0a8e8831..015f465c514b 100644 --- a/net/bridge/br_netlink.c +++ b/net/bridge/br_netlink.c @@ -1262,19 +1262,20 @@ static int br_dev_newlink(struct net *src_net, struct net_device *dev, struct net_bridge *br = netdev_priv(dev); int err; + err = register_netdevice(dev); + if (err) + return err; + if (tb[IFLA_ADDRESS]) { spin_lock_bh(&br->lock); br_stp_change_bridge_id(br, nla_data(tb[IFLA_ADDRESS])); spin_unlock_bh(&br->lock); } - err = register_netdevice(dev); - if (err) - return err; - err = br_changelink(dev, tb, data, extack); if (err) - unregister_netdevice(dev); + br_dev_delete(dev, NULL); + return err; } diff --git a/net/core/dev.c b/net/core/dev.c index f47e96b62308..01ee854454a8 100644 --- a/net/core/dev.c +++ b/net/core/dev.c @@ -3904,7 +3904,7 @@ static u32 netif_receive_generic_xdp(struct sk_buff *skb, hroom > 0 ? ALIGN(hroom, NET_SKB_PAD) : 0, troom > 0 ? troom + 128 : 0, GFP_ATOMIC)) goto do_drop; - if (troom > 0 && __skb_linearize(skb)) + if (skb_linearize(skb)) goto do_drop; } diff --git a/net/core/net_namespace.c b/net/core/net_namespace.c index b797832565d3..60a71be75aea 100644 --- a/net/core/net_namespace.c +++ b/net/core/net_namespace.c @@ -267,7 +267,7 @@ struct net *get_net_ns_by_id(struct net *net, int id) spin_lock_bh(&net->nsid_lock); peer = idr_find(&net->netns_ids, id); if (peer) - get_net(peer); + peer = maybe_get_net(peer); spin_unlock_bh(&net->nsid_lock); rcu_read_unlock(); diff --git a/net/core/skbuff.c b/net/core/skbuff.c index a592ca025fc4..a3cb0be4c6f3 100644 --- a/net/core/skbuff.c +++ b/net/core/skbuff.c @@ -1178,7 +1178,7 @@ int skb_copy_ubufs(struct sk_buff *skb, gfp_t gfp_mask) u32 d_off; if (!num_frags) - return 0; + goto release; if (skb_shared(skb) || skb_unclone(skb, gfp_mask)) return -EINVAL; @@ -1238,6 +1238,7 @@ int skb_copy_ubufs(struct sk_buff *skb, gfp_t gfp_mask) __skb_fill_page_desc(skb, new_frags - 1, head, 0, d_off); skb_shinfo(skb)->nr_frags = new_frags; +release: skb_zcopy_clear(skb, false); return 0; } @@ -3654,8 +3655,6 @@ normal: skb_shinfo(nskb)->tx_flags |= skb_shinfo(head_skb)->tx_flags & SKBTX_SHARED_FRAG; - if (skb_zerocopy_clone(nskb, head_skb, GFP_ATOMIC)) - goto err; while (pos < offset + len) { if (i >= nfrags) { @@ -3681,6 +3680,8 @@ normal: if (unlikely(skb_orphan_frags(frag_skb, GFP_ATOMIC))) goto err; + if (skb_zerocopy_clone(nskb, frag_skb, GFP_ATOMIC)) + goto err; *nskb_frag = *frag; __skb_frag_ref(nskb_frag); diff --git a/net/ipv4/fib_frontend.c b/net/ipv4/fib_frontend.c index f52d27a422c3..08259d078b1c 100644 --- a/net/ipv4/fib_frontend.c +++ b/net/ipv4/fib_frontend.c @@ -1298,14 +1298,19 @@ err_table_hash_alloc: static void ip_fib_net_exit(struct net *net) { - unsigned int i; + int i; rtnl_lock(); #ifdef CONFIG_IP_MULTIPLE_TABLES RCU_INIT_POINTER(net->ipv4.fib_main, NULL); RCU_INIT_POINTER(net->ipv4.fib_default, NULL); #endif - for (i = 0; i < FIB_TABLE_HASHSZ; i++) { + /* Destroy the tables in reverse order to guarantee that the + * local table, ID 255, is destroyed before the main table, ID + * 254. This is necessary as the local table may contain + * references to data contained in the main table. + */ + for (i = FIB_TABLE_HASHSZ - 1; i >= 0; i--) { struct hlist_head *head = &net->ipv4.fib_table_hash[i]; struct hlist_node *tmp; struct fib_table *tb; diff --git a/net/ipv4/fib_semantics.c b/net/ipv4/fib_semantics.c index f04d944f8abe..c586597da20d 100644 --- a/net/ipv4/fib_semantics.c +++ b/net/ipv4/fib_semantics.c @@ -698,7 +698,7 @@ bool fib_metrics_match(struct fib_config *cfg, struct fib_info *fi) nla_for_each_attr(nla, cfg->fc_mx, cfg->fc_mx_len, remaining) { int type = nla_type(nla); - u32 val; + u32 fi_val, val; if (!type) continue; @@ -715,7 +715,11 @@ bool fib_metrics_match(struct fib_config *cfg, struct fib_info *fi) val = nla_get_u32(nla); } - if (fi->fib_metrics->metrics[type - 1] != val) + fi_val = fi->fib_metrics->metrics[type - 1]; + if (type == RTAX_FEATURES) + fi_val &= ~DST_FEATURE_ECN_CA; + + if (fi_val != val) return false; } diff --git a/net/ipv4/ip_gre.c b/net/ipv4/ip_gre.c index 9c1735632c8c..45ffd3d045d2 100644 --- a/net/ipv4/ip_gre.c +++ b/net/ipv4/ip_gre.c @@ -1310,6 +1310,7 @@ static const struct net_device_ops erspan_netdev_ops = { static void ipgre_tap_setup(struct net_device *dev) { ether_setup(dev); + dev->max_mtu = 0; dev->netdev_ops = &gre_tap_netdev_ops; dev->priv_flags &= ~IFF_TX_SKB_SHARING; dev->priv_flags |= IFF_LIVE_ADDR_CHANGE; diff --git a/net/ipv6/af_inet6.c b/net/ipv6/af_inet6.c index c26f71234b9c..c9441ca45399 100644 --- a/net/ipv6/af_inet6.c +++ b/net/ipv6/af_inet6.c @@ -210,7 +210,6 @@ lookup_protocol: np->mcast_hops = IPV6_DEFAULT_MCASTHOPS; np->mc_loop = 1; np->pmtudisc = IPV6_PMTUDISC_WANT; - np->autoflowlabel = ip6_default_np_autolabel(net); np->repflow = net->ipv6.sysctl.flowlabel_reflect; sk->sk_ipv6only = net->ipv6.sysctl.bindv6only; diff --git a/net/ipv6/ip6_gre.c b/net/ipv6/ip6_gre.c index 4cfd8e0696fe..416c8913f132 100644 --- a/net/ipv6/ip6_gre.c +++ b/net/ipv6/ip6_gre.c @@ -1308,6 +1308,7 @@ static void ip6gre_tap_setup(struct net_device *dev) ether_setup(dev); + dev->max_mtu = 0; dev->netdev_ops = &ip6gre_tap_netdev_ops; dev->needs_free_netdev = true; dev->priv_destructor = ip6gre_dev_free; diff --git a/net/ipv6/ip6_output.c b/net/ipv6/ip6_output.c index 5110a418cc4d..f7dd51c42314 100644 --- a/net/ipv6/ip6_output.c +++ b/net/ipv6/ip6_output.c @@ -166,6 +166,14 @@ int ip6_output(struct net *net, struct sock *sk, struct sk_buff *skb) !(IP6CB(skb)->flags & IP6SKB_REROUTED)); } +static bool ip6_autoflowlabel(struct net *net, const struct ipv6_pinfo *np) +{ + if (!np->autoflowlabel_set) + return ip6_default_np_autolabel(net); + else + return np->autoflowlabel; +} + /* * xmit an sk_buff (used by TCP, SCTP and DCCP) * Note : socket lock is not held for SYNACK packets, but might be modified @@ -230,7 +238,7 @@ int ip6_xmit(const struct sock *sk, struct sk_buff *skb, struct flowi6 *fl6, hlimit = ip6_dst_hoplimit(dst); ip6_flow_hdr(hdr, tclass, ip6_make_flowlabel(net, skb, fl6->flowlabel, - np->autoflowlabel, fl6)); + ip6_autoflowlabel(net, np), fl6)); hdr->payload_len = htons(seg_len); hdr->nexthdr = proto; @@ -1626,7 +1634,7 @@ struct sk_buff *__ip6_make_skb(struct sock *sk, ip6_flow_hdr(hdr, v6_cork->tclass, ip6_make_flowlabel(net, skb, fl6->flowlabel, - np->autoflowlabel, fl6)); + ip6_autoflowlabel(net, np), fl6)); hdr->hop_limit = v6_cork->hop_limit; hdr->nexthdr = proto; hdr->saddr = fl6->saddr; diff --git a/net/ipv6/ip6_tunnel.c b/net/ipv6/ip6_tunnel.c index db84f523656d..931c38f6ff4a 100644 --- a/net/ipv6/ip6_tunnel.c +++ b/net/ipv6/ip6_tunnel.c @@ -1123,8 +1123,13 @@ route_lookup: max_headroom += 8; mtu -= 8; } - if (mtu < IPV6_MIN_MTU) - mtu = IPV6_MIN_MTU; + if (skb->protocol == htons(ETH_P_IPV6)) { + if (mtu < IPV6_MIN_MTU) + mtu = IPV6_MIN_MTU; + } else if (mtu < 576) { + mtu = 576; + } + if (skb_dst(skb) && !t->parms.collect_md) skb_dst(skb)->ops->update_pmtu(skb_dst(skb), NULL, skb, mtu); if (skb->len - t->tun_hlen - eth_hlen > mtu && !skb_is_gso(skb)) { diff --git a/net/ipv6/ipv6_sockglue.c b/net/ipv6/ipv6_sockglue.c index b9404feabd78..2d4680e0376f 100644 --- a/net/ipv6/ipv6_sockglue.c +++ b/net/ipv6/ipv6_sockglue.c @@ -886,6 +886,7 @@ pref_skip_coa: break; case IPV6_AUTOFLOWLABEL: np->autoflowlabel = valbool; + np->autoflowlabel_set = 1; retv = 0; break; case IPV6_RECVFRAGSIZE: diff --git a/net/ipv6/route.c b/net/ipv6/route.c index 7a8d1500d374..0458b761f3c5 100644 --- a/net/ipv6/route.c +++ b/net/ipv6/route.c @@ -2336,6 +2336,7 @@ struct dst_entry *icmp6_dst_alloc(struct net_device *dev, } rt->dst.flags |= DST_HOST; + rt->dst.input = ip6_input; rt->dst.output = ip6_output; rt->rt6i_gateway = fl6->daddr; rt->rt6i_dst.addr = fl6->daddr; @@ -4297,19 +4298,13 @@ static int inet6_rtm_getroute(struct sk_buff *in_skb, struct nlmsghdr *nlh, if (!ipv6_addr_any(&fl6.saddr)) flags |= RT6_LOOKUP_F_HAS_SADDR; - if (!fibmatch) - dst = ip6_route_input_lookup(net, dev, &fl6, flags); - else - dst = ip6_route_lookup(net, &fl6, 0); + dst = ip6_route_input_lookup(net, dev, &fl6, flags); rcu_read_unlock(); } else { fl6.flowi6_oif = oif; - if (!fibmatch) - dst = ip6_route_output(net, NULL, &fl6); - else - dst = ip6_route_lookup(net, &fl6, 0); + dst = ip6_route_output(net, NULL, &fl6); } @@ -4326,6 +4321,15 @@ static int inet6_rtm_getroute(struct sk_buff *in_skb, struct nlmsghdr *nlh, goto errout; } + if (fibmatch && rt->dst.from) { + struct rt6_info *ort = container_of(rt->dst.from, + struct rt6_info, dst); + + dst_hold(&ort->dst); + ip6_rt_put(rt); + rt = ort; + } + skb = alloc_skb(NLMSG_GOODSIZE, GFP_KERNEL); if (!skb) { ip6_rt_put(rt); diff --git a/net/openvswitch/flow.c b/net/openvswitch/flow.c index dbe2379329c5..f039064ce922 100644 --- a/net/openvswitch/flow.c +++ b/net/openvswitch/flow.c @@ -579,6 +579,7 @@ static int key_extract(struct sk_buff *skb, struct sw_flow_key *key) return -EINVAL; skb_reset_network_header(skb); + key->eth.type = skb->protocol; } else { eth = eth_hdr(skb); ether_addr_copy(key->eth.src, eth->h_source); @@ -592,15 +593,23 @@ static int key_extract(struct sk_buff *skb, struct sw_flow_key *key) if (unlikely(parse_vlan(skb, key))) return -ENOMEM; - skb->protocol = parse_ethertype(skb); - if (unlikely(skb->protocol == htons(0))) + key->eth.type = parse_ethertype(skb); + if (unlikely(key->eth.type == htons(0))) return -ENOMEM; + /* Multiple tagged packets need to retain TPID to satisfy + * skb_vlan_pop(), which will later shift the ethertype into + * skb->protocol. + */ + if (key->eth.cvlan.tci & htons(VLAN_TAG_PRESENT)) + skb->protocol = key->eth.cvlan.tpid; + else + skb->protocol = key->eth.type; + skb_reset_network_header(skb); __skb_push(skb, skb->data - skb_mac_header(skb)); } skb_reset_mac_len(skb); - key->eth.type = skb->protocol; /* Network layer. */ if (key->eth.type == htons(ETH_P_IP)) { diff --git a/net/sched/cls_bpf.c b/net/sched/cls_bpf.c index 6fe798c2df1a..8d78e7f4ecc3 100644 --- a/net/sched/cls_bpf.c +++ b/net/sched/cls_bpf.c @@ -42,7 +42,6 @@ struct cls_bpf_prog { struct list_head link; struct tcf_result res; bool exts_integrated; - bool offloaded; u32 gen_flags; struct tcf_exts exts; u32 handle; @@ -148,33 +147,37 @@ static bool cls_bpf_is_ebpf(const struct cls_bpf_prog *prog) } static int cls_bpf_offload_cmd(struct tcf_proto *tp, struct cls_bpf_prog *prog, - enum tc_clsbpf_command cmd) + struct cls_bpf_prog *oldprog) { - bool addorrep = cmd == TC_CLSBPF_ADD || cmd == TC_CLSBPF_REPLACE; struct tcf_block *block = tp->chain->block; - bool skip_sw = tc_skip_sw(prog->gen_flags); struct tc_cls_bpf_offload cls_bpf = {}; + struct cls_bpf_prog *obj; + bool skip_sw; int err; + skip_sw = prog && tc_skip_sw(prog->gen_flags); + obj = prog ?: oldprog; + tc_cls_common_offload_init(&cls_bpf.common, tp); - cls_bpf.command = cmd; - cls_bpf.exts = &prog->exts; - cls_bpf.prog = prog->filter; - cls_bpf.name = prog->bpf_name; - cls_bpf.exts_integrated = prog->exts_integrated; - cls_bpf.gen_flags = prog->gen_flags; + cls_bpf.command = TC_CLSBPF_OFFLOAD; + cls_bpf.exts = &obj->exts; + cls_bpf.prog = prog ? prog->filter : NULL; + cls_bpf.oldprog = oldprog ? oldprog->filter : NULL; + cls_bpf.name = obj->bpf_name; + cls_bpf.exts_integrated = obj->exts_integrated; + cls_bpf.gen_flags = obj->gen_flags; err = tc_setup_cb_call(block, NULL, TC_SETUP_CLSBPF, &cls_bpf, skip_sw); - if (addorrep) { + if (prog) { if (err < 0) { - cls_bpf_offload_cmd(tp, prog, TC_CLSBPF_DESTROY); + cls_bpf_offload_cmd(tp, oldprog, prog); return err; } else if (err > 0) { prog->gen_flags |= TCA_CLS_FLAGS_IN_HW; } } - if (addorrep && skip_sw && !(prog->gen_flags & TCA_CLS_FLAGS_IN_HW)) + if (prog && skip_sw && !(prog->gen_flags & TCA_CLS_FLAGS_IN_HW)) return -EINVAL; return 0; @@ -183,38 +186,17 @@ static int cls_bpf_offload_cmd(struct tcf_proto *tp, struct cls_bpf_prog *prog, static int cls_bpf_offload(struct tcf_proto *tp, struct cls_bpf_prog *prog, struct cls_bpf_prog *oldprog) { - struct cls_bpf_prog *obj = prog; - enum tc_clsbpf_command cmd; - bool skip_sw; - int ret; - - skip_sw = tc_skip_sw(prog->gen_flags) || - (oldprog && tc_skip_sw(oldprog->gen_flags)); - - if (oldprog && oldprog->offloaded) { - if (!tc_skip_hw(prog->gen_flags)) { - cmd = TC_CLSBPF_REPLACE; - } else if (!tc_skip_sw(prog->gen_flags)) { - obj = oldprog; - cmd = TC_CLSBPF_DESTROY; - } else { - return -EINVAL; - } - } else { - if (tc_skip_hw(prog->gen_flags)) - return skip_sw ? -EINVAL : 0; - cmd = TC_CLSBPF_ADD; - } - - ret = cls_bpf_offload_cmd(tp, obj, cmd); - if (ret) - return ret; + if (prog && oldprog && prog->gen_flags != oldprog->gen_flags) + return -EINVAL; - obj->offloaded = true; - if (oldprog) - oldprog->offloaded = false; + if (prog && tc_skip_hw(prog->gen_flags)) + prog = NULL; + if (oldprog && tc_skip_hw(oldprog->gen_flags)) + oldprog = NULL; + if (!prog && !oldprog) + return 0; - return 0; + return cls_bpf_offload_cmd(tp, prog, oldprog); } static void cls_bpf_stop_offload(struct tcf_proto *tp, @@ -222,25 +204,26 @@ static void cls_bpf_stop_offload(struct tcf_proto *tp, { int err; - if (!prog->offloaded) - return; - - err = cls_bpf_offload_cmd(tp, prog, TC_CLSBPF_DESTROY); - if (err) { + err = cls_bpf_offload_cmd(tp, NULL, prog); + if (err) pr_err("Stopping hardware offload failed: %d\n", err); - return; - } - - prog->offloaded = false; } static void cls_bpf_offload_update_stats(struct tcf_proto *tp, struct cls_bpf_prog *prog) { - if (!prog->offloaded) - return; + struct tcf_block *block = tp->chain->block; + struct tc_cls_bpf_offload cls_bpf = {}; + + tc_cls_common_offload_init(&cls_bpf.common, tp); + cls_bpf.command = TC_CLSBPF_STATS; + cls_bpf.exts = &prog->exts; + cls_bpf.prog = prog->filter; + cls_bpf.name = prog->bpf_name; + cls_bpf.exts_integrated = prog->exts_integrated; + cls_bpf.gen_flags = prog->gen_flags; - cls_bpf_offload_cmd(tp, prog, TC_CLSBPF_STATS); + tc_setup_cb_call(block, NULL, TC_SETUP_CLSBPF, &cls_bpf, false); } static int cls_bpf_init(struct tcf_proto *tp) diff --git a/net/sctp/debug.c b/net/sctp/debug.c index 3f619fdcbf0a..291c97b07058 100644 --- a/net/sctp/debug.c +++ b/net/sctp/debug.c @@ -78,6 +78,9 @@ const char *sctp_cname(const union sctp_subtype cid) case SCTP_CID_AUTH: return "AUTH"; + case SCTP_CID_RECONF: + return "RECONF"; + default: break; } diff --git a/net/sctp/ulpqueue.c b/net/sctp/ulpqueue.c index a71be33f3afe..e36ec5dd64c6 100644 --- a/net/sctp/ulpqueue.c +++ b/net/sctp/ulpqueue.c @@ -1084,29 +1084,21 @@ void sctp_ulpq_partial_delivery(struct sctp_ulpq *ulpq, void sctp_ulpq_renege(struct sctp_ulpq *ulpq, struct sctp_chunk *chunk, gfp_t gfp) { - struct sctp_association *asoc; - __u16 needed, freed; - - asoc = ulpq->asoc; + struct sctp_association *asoc = ulpq->asoc; + __u32 freed = 0; + __u16 needed; - if (chunk) { - needed = ntohs(chunk->chunk_hdr->length); - needed -= sizeof(struct sctp_data_chunk); - } else - needed = SCTP_DEFAULT_MAXWINDOW; - - freed = 0; + needed = ntohs(chunk->chunk_hdr->length) - + sizeof(struct sctp_data_chunk); if (skb_queue_empty(&asoc->base.sk->sk_receive_queue)) { freed = sctp_ulpq_renege_order(ulpq, needed); - if (freed < needed) { + if (freed < needed) freed += sctp_ulpq_renege_frags(ulpq, needed - freed); - } } /* If able to free enough room, accept this chunk. */ - if (chunk && (freed >= needed)) { - int retval; - retval = sctp_ulpq_tail_data(ulpq, chunk, gfp); + if (freed >= needed) { + int retval = sctp_ulpq_tail_data(ulpq, chunk, gfp); /* * Enter partial delivery if chunk has not been * delivered; otherwise, drain the reassembly queue. diff --git a/net/tipc/group.c b/net/tipc/group.c index 95fec2c057d6..7ebbdeb2a90e 100644 --- a/net/tipc/group.c +++ b/net/tipc/group.c @@ -351,8 +351,7 @@ void tipc_group_update_member(struct tipc_member *m, int len) if (m->window >= ADV_IDLE) return; - if (!list_empty(&m->congested)) - return; + list_del_init(&m->congested); /* Sort member into congested members' list */ list_for_each_entry_safe(_m, tmp, &grp->congested, congested) { @@ -648,6 +647,7 @@ static void tipc_group_proto_xmit(struct tipc_group *grp, struct tipc_member *m, } else if (mtyp == GRP_REMIT_MSG) { msg_set_grp_remitted(hdr, m->window); } + msg_set_dest_droppable(hdr, true); __skb_queue_tail(xmitq, skb); } @@ -689,15 +689,16 @@ void tipc_group_proto_rcv(struct tipc_group *grp, bool *usr_wakeup, msg_set_grp_bc_seqno(ehdr, m->bc_syncpt); __skb_queue_tail(inputq, m->event_msg); } - if (m->window < ADV_IDLE) - tipc_group_update_member(m, 0); - else - list_del_init(&m->congested); + list_del_init(&m->congested); + tipc_group_update_member(m, 0); return; case GRP_LEAVE_MSG: if (!m) return; m->bc_syncpt = msg_grp_bc_syncpt(hdr); + list_del_init(&m->list); + list_del_init(&m->congested); + *usr_wakeup = true; /* Wait until WITHDRAW event is received */ if (m->state != MBR_LEAVING) { @@ -709,8 +710,6 @@ void tipc_group_proto_rcv(struct tipc_group *grp, bool *usr_wakeup, ehdr = buf_msg(m->event_msg); msg_set_grp_bc_seqno(ehdr, m->bc_syncpt); __skb_queue_tail(inputq, m->event_msg); - *usr_wakeup = true; - list_del_init(&m->congested); return; case GRP_ADV_MSG: if (!m) @@ -862,6 +861,7 @@ void tipc_group_member_evt(struct tipc_group *grp, msg_set_grp_bc_seqno(hdr, m->bc_rcv_nxt); __skb_queue_tail(inputq, skb); } + list_del_init(&m->list); list_del_init(&m->congested); } *sk_rcvbuf = tipc_group_rcvbuf_limit(grp); diff --git a/net/wireless/Makefile b/net/wireless/Makefile index d7d6cb00c47b..1d84f91bbfb0 100644 --- a/net/wireless/Makefile +++ b/net/wireless/Makefile @@ -23,27 +23,14 @@ ifneq ($(CONFIG_CFG80211_EXTRA_REGDB_KEYDIR),) cfg80211-y += extra-certs.o endif -$(obj)/shipped-certs.c: $(wildcard $(srctree)/$(src)/certs/*.x509) +$(obj)/shipped-certs.c: $(wildcard $(srctree)/$(src)/certs/*.hex) @$(kecho) " GEN $@" - @(set -e; \ - allf=""; \ - for f in $^ ; do \ - # similar to hexdump -v -e '1/1 "0x%.2x," "\n"' \ - thisf=$$(od -An -v -tx1 < $$f | \ - sed -e 's/ /\n/g' | \ - sed -e 's/^[0-9a-f]\+$$/\0/;t;d' | \ - sed -e 's/^/0x/;s/$$/,/'); \ - # file should not be empty - maybe command substitution failed? \ - test ! -z "$$thisf";\ - allf=$$allf$$thisf;\ - done; \ - ( \ - echo '#include "reg.h"'; \ - echo 'const u8 shipped_regdb_certs[] = {'; \ - echo "$$allf"; \ - echo '};'; \ - echo 'unsigned int shipped_regdb_certs_len = sizeof(shipped_regdb_certs);'; \ - ) >> $@) + @(echo '#include "reg.h"'; \ + echo 'const u8 shipped_regdb_certs[] = {'; \ + cat $^ ; \ + echo '};'; \ + echo 'unsigned int shipped_regdb_certs_len = sizeof(shipped_regdb_certs);'; \ + ) > $@ $(obj)/extra-certs.c: $(CONFIG_CFG80211_EXTRA_REGDB_KEYDIR:"%"=%) \ $(wildcard $(CONFIG_CFG80211_EXTRA_REGDB_KEYDIR:"%"=%)/*.x509) @@ -66,4 +53,6 @@ $(obj)/extra-certs.c: $(CONFIG_CFG80211_EXTRA_REGDB_KEYDIR:"%"=%) \ echo "$$allf"; \ echo '};'; \ echo 'unsigned int extra_regdb_certs_len = sizeof(extra_regdb_certs);'; \ - ) >> $@) + ) > $@) + +clean-files += shipped-certs.c extra-certs.c diff --git a/net/wireless/certs/sforshee.hex b/net/wireless/certs/sforshee.hex new file mode 100644 index 000000000000..14ea66643ffa --- /dev/null +++ b/net/wireless/certs/sforshee.hex @@ -0,0 +1,86 @@ +/* Seth Forshee's regdb certificate */ +0x30, 0x82, 0x02, 0xa4, 0x30, 0x82, 0x01, 0x8c, +0x02, 0x09, 0x00, 0xb2, 0x8d, 0xdf, 0x47, 0xae, +0xf9, 0xce, 0xa7, 0x30, 0x0d, 0x06, 0x09, 0x2a, +0x86, 0x48, 0x86, 0xf7, 0x0d, 0x01, 0x01, 0x0b, +0x05, 0x00, 0x30, 0x13, 0x31, 0x11, 0x30, 0x0f, +0x06, 0x03, 0x55, 0x04, 0x03, 0x0c, 0x08, 0x73, +0x66, 0x6f, 0x72, 0x73, 0x68, 0x65, 0x65, 0x30, +0x20, 0x17, 0x0d, 0x31, 0x37, 0x31, 0x30, 0x30, +0x36, 0x31, 0x39, 0x34, 0x30, 0x33, 0x35, 0x5a, +0x18, 0x0f, 0x32, 0x31, 0x31, 0x37, 0x30, 0x39, +0x31, 0x32, 0x31, 0x39, 0x34, 0x30, 0x33, 0x35, +0x5a, 0x30, 0x13, 0x31, 0x11, 0x30, 0x0f, 0x06, +0x03, 0x55, 0x04, 0x03, 0x0c, 0x08, 0x73, 0x66, +0x6f, 0x72, 0x73, 0x68, 0x65, 0x65, 0x30, 0x82, +0x01, 0x22, 0x30, 0x0d, 0x06, 0x09, 0x2a, 0x86, +0x48, 0x86, 0xf7, 0x0d, 0x01, 0x01, 0x01, 0x05, +0x00, 0x03, 0x82, 0x01, 0x0f, 0x00, 0x30, 0x82, +0x01, 0x0a, 0x02, 0x82, 0x01, 0x01, 0x00, 0xb5, +0x40, 0xe3, 0x9c, 0x28, 0x84, 0x39, 0x03, 0xf2, +0x39, 0xd7, 0x66, 0x2c, 0x41, 0x38, 0x15, 0xac, +0x7e, 0xa5, 0x83, 0x71, 0x25, 0x7e, 0x90, 0x7c, +0x68, 0xdd, 0x6f, 0x3f, 0xd9, 0xd7, 0x59, 0x38, +0x9f, 0x7c, 0x6a, 0x52, 0xc2, 0x03, 0x2a, 0x2d, +0x7e, 0x66, 0xf4, 0x1e, 0xb3, 0x12, 0x70, 0x20, +0x5b, 0xd4, 0x97, 0x32, 0x3d, 0x71, 0x8b, 0x3b, +0x1b, 0x08, 0x17, 0x14, 0x6b, 0x61, 0xc4, 0x57, +0x8b, 0x96, 0x16, 0x1c, 0xfd, 0x24, 0xd5, 0x0b, +0x09, 0xf9, 0x68, 0x11, 0x84, 0xfb, 0xca, 0x51, +0x0c, 0xd1, 0x45, 0x19, 0xda, 0x10, 0x44, 0x8a, +0xd9, 0xfe, 0x76, 0xa9, 0xfd, 0x60, 0x2d, 0x18, +0x0b, 0x28, 0x95, 0xb2, 0x2d, 0xea, 0x88, 0x98, +0xb8, 0xd1, 0x56, 0x21, 0xf0, 0x53, 0x1f, 0xf1, +0x02, 0x6f, 0xe9, 0x46, 0x9b, 0x93, 0x5f, 0x28, +0x90, 0x0f, 0xac, 0x36, 0xfa, 0x68, 0x23, 0x71, +0x57, 0x56, 0xf6, 0xcc, 0xd3, 0xdf, 0x7d, 0x2a, +0xd9, 0x1b, 0x73, 0x45, 0xeb, 0xba, 0x27, 0x85, +0xef, 0x7a, 0x7f, 0xa5, 0xcb, 0x80, 0xc7, 0x30, +0x36, 0xd2, 0x53, 0xee, 0xec, 0xac, 0x1e, 0xe7, +0x31, 0xf1, 0x36, 0xa2, 0x9c, 0x63, 0xc6, 0x65, +0x5b, 0x7f, 0x25, 0x75, 0x68, 0xa1, 0xea, 0xd3, +0x7e, 0x00, 0x5c, 0x9a, 0x5e, 0xd8, 0x20, 0x18, +0x32, 0x77, 0x07, 0x29, 0x12, 0x66, 0x1e, 0x36, +0x73, 0xe7, 0x97, 0x04, 0x41, 0x37, 0xb1, 0xb1, +0x72, 0x2b, 0xf4, 0xa1, 0x29, 0x20, 0x7c, 0x96, +0x79, 0x0b, 0x2b, 0xd0, 0xd8, 0xde, 0xc8, 0x6c, +0x3f, 0x93, 0xfb, 0xc5, 0xee, 0x78, 0x52, 0x11, +0x15, 0x1b, 0x7a, 0xf6, 0xe2, 0x68, 0x99, 0xe7, +0xfb, 0x46, 0x16, 0x84, 0xe3, 0xc7, 0xa1, 0xe6, +0xe0, 0xd2, 0x46, 0xd5, 0xe1, 0xc4, 0x5f, 0xa0, +0x66, 0xf4, 0xda, 0xc4, 0xff, 0x95, 0x1d, 0x02, +0x03, 0x01, 0x00, 0x01, 0x30, 0x0d, 0x06, 0x09, +0x2a, 0x86, 0x48, 0x86, 0xf7, 0x0d, 0x01, 0x01, +0x0b, 0x05, 0x00, 0x03, 0x82, 0x01, 0x01, 0x00, +0x87, 0x03, 0xda, 0xf2, 0x82, 0xc2, 0xdd, 0xaf, +0x7c, 0x44, 0x2f, 0x86, 0xd3, 0x5f, 0x4c, 0x93, +0x48, 0xb9, 0xfe, 0x07, 0x17, 0xbb, 0x21, 0xf7, +0x25, 0x23, 0x4e, 0xaa, 0x22, 0x0c, 0x16, 0xb9, +0x73, 0xae, 0x9d, 0x46, 0x7c, 0x75, 0xd9, 0xc3, +0x49, 0x57, 0x47, 0xbf, 0x33, 0xb7, 0x97, 0xec, +0xf5, 0x40, 0x75, 0xc0, 0x46, 0x22, 0xf0, 0xa0, +0x5d, 0x9c, 0x79, 0x13, 0xa1, 0xff, 0xb8, 0xa3, +0x2f, 0x7b, 0x8e, 0x06, 0x3f, 0xc8, 0xb6, 0xe4, +0x6a, 0x28, 0xf2, 0x34, 0x5c, 0x23, 0x3f, 0x32, +0xc0, 0xe6, 0xad, 0x0f, 0xac, 0xcf, 0x55, 0x74, +0x47, 0x73, 0xd3, 0x01, 0x85, 0xb7, 0x0b, 0x22, +0x56, 0x24, 0x7d, 0x9f, 0x09, 0xa9, 0x0e, 0x86, +0x9e, 0x37, 0x5b, 0x9c, 0x6d, 0x02, 0xd9, 0x8c, +0xc8, 0x50, 0x6a, 0xe2, 0x59, 0xf3, 0x16, 0x06, +0xea, 0xb2, 0x42, 0xb5, 0x58, 0xfe, 0xba, 0xd1, +0x81, 0x57, 0x1a, 0xef, 0xb2, 0x38, 0x88, 0x58, +0xf6, 0xaa, 0xc4, 0x2e, 0x8b, 0x5a, 0x27, 0xe4, +0xa5, 0xe8, 0xa4, 0xca, 0x67, 0x5c, 0xac, 0x72, +0x67, 0xc3, 0x6f, 0x13, 0xc3, 0x2d, 0x35, 0x79, +0xd7, 0x8a, 0xe7, 0xf5, 0xd4, 0x21, 0x30, 0x4a, +0xd5, 0xf6, 0xa3, 0xd9, 0x79, 0x56, 0xf2, 0x0f, +0x10, 0xf7, 0x7d, 0xd0, 0x51, 0x93, 0x2f, 0x47, +0xf8, 0x7d, 0x4b, 0x0a, 0x84, 0x55, 0x12, 0x0a, +0x7d, 0x4e, 0x3b, 0x1f, 0x2b, 0x2f, 0xfc, 0x28, +0xb3, 0x69, 0x34, 0xe1, 0x80, 0x80, 0xbb, 0xe2, +0xaf, 0xb9, 0xd6, 0x30, 0xf1, 0x1d, 0x54, 0x87, +0x23, 0x99, 0x9f, 0x51, 0x03, 0x4c, 0x45, 0x7d, +0x02, 0x65, 0x73, 0xab, 0xfd, 0xcf, 0x94, 0xcc, +0x0d, 0x3a, 0x60, 0xfd, 0x3c, 0x14, 0x2f, 0x16, +0x33, 0xa9, 0x21, 0x1f, 0xcb, 0x50, 0xb1, 0x8f, +0x03, 0xee, 0xa0, 0x66, 0xa9, 0x16, 0x79, 0x14, diff --git a/net/wireless/certs/sforshee.x509 b/net/wireless/certs/sforshee.x509 Binary files differdeleted file mode 100644 index c6f8f9d6b988..000000000000 --- a/net/wireless/certs/sforshee.x509 +++ /dev/null diff --git a/net/wireless/nl80211.c b/net/wireless/nl80211.c index b1ac23ca20c8..213d0c498c97 100644 --- a/net/wireless/nl80211.c +++ b/net/wireless/nl80211.c @@ -2610,7 +2610,7 @@ static int nl80211_send_iface(struct sk_buff *msg, u32 portid, u32 seq, int flag case NL80211_IFTYPE_AP: if (wdev->ssid_len && nla_put(msg, NL80211_ATTR_SSID, wdev->ssid_len, wdev->ssid)) - goto nla_put_failure; + goto nla_put_failure_locked; break; case NL80211_IFTYPE_STATION: case NL80211_IFTYPE_P2P_CLIENT: @@ -2623,7 +2623,7 @@ static int nl80211_send_iface(struct sk_buff *msg, u32 portid, u32 seq, int flag if (!ssid_ie) break; if (nla_put(msg, NL80211_ATTR_SSID, ssid_ie[1], ssid_ie + 2)) - goto nla_put_failure; + goto nla_put_failure_locked; break; } default: @@ -2635,6 +2635,8 @@ static int nl80211_send_iface(struct sk_buff *msg, u32 portid, u32 seq, int flag genlmsg_end(msg, hdr); return 0; + nla_put_failure_locked: + wdev_unlock(wdev); nla_put_failure: genlmsg_cancel(msg, hdr); return -EMSGSIZE; diff --git a/sound/core/rawmidi.c b/sound/core/rawmidi.c index b3b353d72527..f055ca10bbc1 100644 --- a/sound/core/rawmidi.c +++ b/sound/core/rawmidi.c @@ -579,15 +579,14 @@ static int snd_rawmidi_info_user(struct snd_rawmidi_substream *substream, return 0; } -int snd_rawmidi_info_select(struct snd_card *card, struct snd_rawmidi_info *info) +static int __snd_rawmidi_info_select(struct snd_card *card, + struct snd_rawmidi_info *info) { struct snd_rawmidi *rmidi; struct snd_rawmidi_str *pstr; struct snd_rawmidi_substream *substream; - mutex_lock(®ister_mutex); rmidi = snd_rawmidi_search(card, info->device); - mutex_unlock(®ister_mutex); if (!rmidi) return -ENXIO; if (info->stream < 0 || info->stream > 1) @@ -603,6 +602,16 @@ int snd_rawmidi_info_select(struct snd_card *card, struct snd_rawmidi_info *info } return -ENXIO; } + +int snd_rawmidi_info_select(struct snd_card *card, struct snd_rawmidi_info *info) +{ + int ret; + + mutex_lock(®ister_mutex); + ret = __snd_rawmidi_info_select(card, info); + mutex_unlock(®ister_mutex); + return ret; +} EXPORT_SYMBOL(snd_rawmidi_info_select); static int snd_rawmidi_info_select_user(struct snd_card *card, diff --git a/sound/hda/hdac_i915.c b/sound/hda/hdac_i915.c index 038a180d3f81..cbe818eda336 100644 --- a/sound/hda/hdac_i915.c +++ b/sound/hda/hdac_i915.c @@ -325,7 +325,7 @@ static int hdac_component_master_match(struct device *dev, void *data) */ int snd_hdac_i915_register_notifier(const struct i915_audio_component_audio_ops *aops) { - if (WARN_ON(!hdac_acomp)) + if (!hdac_acomp) return -ENODEV; hdac_acomp->audio_ops = aops; diff --git a/sound/pci/hda/patch_conexant.c b/sound/pci/hda/patch_conexant.c index a81aacf684b2..37e1cf8218ff 100644 --- a/sound/pci/hda/patch_conexant.c +++ b/sound/pci/hda/patch_conexant.c @@ -271,6 +271,8 @@ enum { CXT_FIXUP_HP_SPECTRE, CXT_FIXUP_HP_GATE_MIC, CXT_FIXUP_MUTE_LED_GPIO, + CXT_FIXUP_HEADSET_MIC, + CXT_FIXUP_HP_MIC_NO_PRESENCE, }; /* for hda_fixup_thinkpad_acpi() */ @@ -350,6 +352,18 @@ static void cxt_fixup_headphone_mic(struct hda_codec *codec, } } +static void cxt_fixup_headset_mic(struct hda_codec *codec, + const struct hda_fixup *fix, int action) +{ + struct conexant_spec *spec = codec->spec; + + switch (action) { + case HDA_FIXUP_ACT_PRE_PROBE: + spec->parse_flags |= HDA_PINCFG_HEADSET_MIC; + break; + } +} + /* OPLC XO 1.5 fixup */ /* OLPC XO-1.5 supports DC input mode (e.g. for use with analog sensors) @@ -880,6 +894,19 @@ static const struct hda_fixup cxt_fixups[] = { .type = HDA_FIXUP_FUNC, .v.func = cxt_fixup_mute_led_gpio, }, + [CXT_FIXUP_HEADSET_MIC] = { + .type = HDA_FIXUP_FUNC, + .v.func = cxt_fixup_headset_mic, + }, + [CXT_FIXUP_HP_MIC_NO_PRESENCE] = { + .type = HDA_FIXUP_PINS, + .v.pins = (const struct hda_pintbl[]) { + { 0x1a, 0x02a1113c }, + { } + }, + .chained = true, + .chain_id = CXT_FIXUP_HEADSET_MIC, + }, }; static const struct snd_pci_quirk cxt5045_fixups[] = { @@ -934,6 +961,8 @@ static const struct snd_pci_quirk cxt5066_fixups[] = { SND_PCI_QUIRK(0x103c, 0x8115, "HP Z1 Gen3", CXT_FIXUP_HP_GATE_MIC), SND_PCI_QUIRK(0x103c, 0x814f, "HP ZBook 15u G3", CXT_FIXUP_MUTE_LED_GPIO), SND_PCI_QUIRK(0x103c, 0x822e, "HP ProBook 440 G4", CXT_FIXUP_MUTE_LED_GPIO), + SND_PCI_QUIRK(0x103c, 0x8299, "HP 800 G3 SFF", CXT_FIXUP_HP_MIC_NO_PRESENCE), + SND_PCI_QUIRK(0x103c, 0x829a, "HP 800 G3 DM", CXT_FIXUP_HP_MIC_NO_PRESENCE), SND_PCI_QUIRK(0x1043, 0x138d, "Asus", CXT_FIXUP_HEADPHONE_MIC_PIN), SND_PCI_QUIRK(0x152d, 0x0833, "OLPC XO-1.5", CXT_FIXUP_OLPC_XO), SND_PCI_QUIRK(0x17aa, 0x20f2, "Lenovo T400", CXT_PINCFG_LENOVO_TP410), diff --git a/sound/pci/hda/patch_hdmi.c b/sound/pci/hda/patch_hdmi.c index c19c81d230bd..b4f1b6e88305 100644 --- a/sound/pci/hda/patch_hdmi.c +++ b/sound/pci/hda/patch_hdmi.c @@ -55,10 +55,11 @@ MODULE_PARM_DESC(static_hdmi_pcm, "Don't restrict PCM parameters per ELD info"); #define is_kabylake(codec) ((codec)->core.vendor_id == 0x8086280b) #define is_geminilake(codec) (((codec)->core.vendor_id == 0x8086280d) || \ ((codec)->core.vendor_id == 0x80862800)) +#define is_cannonlake(codec) ((codec)->core.vendor_id == 0x8086280c) #define is_haswell_plus(codec) (is_haswell(codec) || is_broadwell(codec) \ || is_skylake(codec) || is_broxton(codec) \ - || is_kabylake(codec)) || is_geminilake(codec) - + || is_kabylake(codec)) || is_geminilake(codec) \ + || is_cannonlake(codec) #define is_valleyview(codec) ((codec)->core.vendor_id == 0x80862882) #define is_cherryview(codec) ((codec)->core.vendor_id == 0x80862883) #define is_valleyview_plus(codec) (is_valleyview(codec) || is_cherryview(codec)) @@ -3841,6 +3842,7 @@ HDA_CODEC_ENTRY(0x80862808, "Broadwell HDMI", patch_i915_hsw_hdmi), HDA_CODEC_ENTRY(0x80862809, "Skylake HDMI", patch_i915_hsw_hdmi), HDA_CODEC_ENTRY(0x8086280a, "Broxton HDMI", patch_i915_hsw_hdmi), HDA_CODEC_ENTRY(0x8086280b, "Kabylake HDMI", patch_i915_hsw_hdmi), +HDA_CODEC_ENTRY(0x8086280c, "Cannonlake HDMI", patch_i915_glk_hdmi), HDA_CODEC_ENTRY(0x8086280d, "Geminilake HDMI", patch_i915_glk_hdmi), HDA_CODEC_ENTRY(0x80862800, "Geminilake HDMI", patch_i915_glk_hdmi), HDA_CODEC_ENTRY(0x80862880, "CedarTrail HDMI", patch_generic_hdmi), diff --git a/sound/pci/hda/patch_realtek.c b/sound/pci/hda/patch_realtek.c index 4b21f71d685c..8fd2d9c62c96 100644 --- a/sound/pci/hda/patch_realtek.c +++ b/sound/pci/hda/patch_realtek.c @@ -324,8 +324,12 @@ static void alc_fill_eapd_coef(struct hda_codec *codec) case 0x10ec0292: alc_update_coef_idx(codec, 0x4, 1<<15, 0); break; - case 0x10ec0215: case 0x10ec0225: + case 0x10ec0295: + case 0x10ec0299: + alc_update_coef_idx(codec, 0x67, 0xf000, 0x3000); + /* fallthrough */ + case 0x10ec0215: case 0x10ec0233: case 0x10ec0236: case 0x10ec0255: @@ -336,10 +340,8 @@ static void alc_fill_eapd_coef(struct hda_codec *codec) case 0x10ec0286: case 0x10ec0288: case 0x10ec0285: - case 0x10ec0295: case 0x10ec0298: case 0x10ec0289: - case 0x10ec0299: alc_update_coef_idx(codec, 0x10, 1<<9, 0); break; case 0x10ec0275: @@ -5185,6 +5187,22 @@ static void alc233_alc662_fixup_lenovo_dual_codecs(struct hda_codec *codec, } } +/* Forcibly assign NID 0x03 to HP/LO while NID 0x02 to SPK for EQ */ +static void alc274_fixup_bind_dacs(struct hda_codec *codec, + const struct hda_fixup *fix, int action) +{ + struct alc_spec *spec = codec->spec; + static hda_nid_t preferred_pairs[] = { + 0x21, 0x03, 0x1b, 0x03, 0x16, 0x02, + 0 + }; + + if (action != HDA_FIXUP_ACT_PRE_PROBE) + return; + + spec->gen.preferred_dacs = preferred_pairs; +} + /* for hda_fixup_thinkpad_acpi() */ #include "thinkpad_helper.c" @@ -5302,6 +5320,8 @@ enum { ALC233_FIXUP_LENOVO_MULTI_CODECS, ALC294_FIXUP_LENOVO_MIC_LOCATION, ALC700_FIXUP_INTEL_REFERENCE, + ALC274_FIXUP_DELL_BIND_DACS, + ALC274_FIXUP_DELL_AIO_LINEOUT_VERB, }; static const struct hda_fixup alc269_fixups[] = { @@ -6112,6 +6132,21 @@ static const struct hda_fixup alc269_fixups[] = { {} } }, + [ALC274_FIXUP_DELL_BIND_DACS] = { + .type = HDA_FIXUP_FUNC, + .v.func = alc274_fixup_bind_dacs, + .chained = true, + .chain_id = ALC269_FIXUP_DELL1_MIC_NO_PRESENCE + }, + [ALC274_FIXUP_DELL_AIO_LINEOUT_VERB] = { + .type = HDA_FIXUP_PINS, + .v.pins = (const struct hda_pintbl[]) { + { 0x1b, 0x0401102f }, + { } + }, + .chained = true, + .chain_id = ALC274_FIXUP_DELL_BIND_DACS + }, }; static const struct snd_pci_quirk alc269_fixup_tbl[] = { @@ -6295,6 +6330,7 @@ static const struct snd_pci_quirk alc269_fixup_tbl[] = { SND_PCI_QUIRK(0x17aa, 0x30bb, "ThinkCentre AIO", ALC233_FIXUP_LENOVO_LINE2_MIC_HOTKEY), SND_PCI_QUIRK(0x17aa, 0x30e2, "ThinkCentre AIO", ALC233_FIXUP_LENOVO_LINE2_MIC_HOTKEY), SND_PCI_QUIRK(0x17aa, 0x310c, "ThinkCentre Station", ALC294_FIXUP_LENOVO_MIC_LOCATION), + SND_PCI_QUIRK(0x17aa, 0x313c, "ThinkCentre Station", ALC294_FIXUP_LENOVO_MIC_LOCATION), SND_PCI_QUIRK(0x17aa, 0x3112, "ThinkCentre AIO", ALC233_FIXUP_LENOVO_LINE2_MIC_HOTKEY), SND_PCI_QUIRK(0x17aa, 0x3902, "Lenovo E50-80", ALC269_FIXUP_DMIC_THINKPAD_ACPI), SND_PCI_QUIRK(0x17aa, 0x3977, "IdeaPad S210", ALC283_FIXUP_INT_MIC), @@ -6553,6 +6589,11 @@ static const struct snd_hda_pin_quirk alc269_pin_fixup_tbl[] = { {0x1b, 0x01011020}, {0x21, 0x02211010}), SND_HDA_PIN_QUIRK(0x10ec0256, 0x1028, "Dell", ALC255_FIXUP_DELL1_MIC_NO_PRESENCE, + {0x12, 0x90a60130}, + {0x14, 0x90170110}, + {0x1b, 0x01011020}, + {0x21, 0x0221101f}), + SND_HDA_PIN_QUIRK(0x10ec0256, 0x1028, "Dell", ALC255_FIXUP_DELL1_MIC_NO_PRESENCE, {0x12, 0x90a60160}, {0x14, 0x90170120}, {0x21, 0x02211030}), @@ -6578,7 +6619,7 @@ static const struct snd_hda_pin_quirk alc269_pin_fixup_tbl[] = { {0x14, 0x90170110}, {0x1b, 0x90a70130}, {0x21, 0x03211020}), - SND_HDA_PIN_QUIRK(0x10ec0274, 0x1028, "Dell", ALC269_FIXUP_DELL1_MIC_NO_PRESENCE, + SND_HDA_PIN_QUIRK(0x10ec0274, 0x1028, "Dell", ALC274_FIXUP_DELL_AIO_LINEOUT_VERB, {0x12, 0xb7a60130}, {0x13, 0xb8a61140}, {0x16, 0x90170110}, diff --git a/sound/soc/amd/acp-pcm-dma.c b/sound/soc/amd/acp-pcm-dma.c index 9f521a55d610..b5e41df6bb3a 100644 --- a/sound/soc/amd/acp-pcm-dma.c +++ b/sound/soc/amd/acp-pcm-dma.c @@ -1051,6 +1051,11 @@ static int acp_audio_probe(struct platform_device *pdev) struct resource *res; const u32 *pdata = pdev->dev.platform_data; + if (!pdata) { + dev_err(&pdev->dev, "Missing platform data\n"); + return -ENODEV; + } + audio_drv_data = devm_kzalloc(&pdev->dev, sizeof(struct audio_drv_data), GFP_KERNEL); if (audio_drv_data == NULL) @@ -1058,6 +1063,8 @@ static int acp_audio_probe(struct platform_device *pdev) res = platform_get_resource(pdev, IORESOURCE_MEM, 0); audio_drv_data->acp_mmio = devm_ioremap_resource(&pdev->dev, res); + if (IS_ERR(audio_drv_data->acp_mmio)) + return PTR_ERR(audio_drv_data->acp_mmio); /* The following members gets populated in device 'open' * function. Till then interrupts are disabled in 'acp_init' diff --git a/sound/soc/atmel/Kconfig b/sound/soc/atmel/Kconfig index 4a56f3dfba51..dcee145dd179 100644 --- a/sound/soc/atmel/Kconfig +++ b/sound/soc/atmel/Kconfig @@ -64,7 +64,7 @@ config SND_AT91_SOC_SAM9X5_WM8731 config SND_ATMEL_SOC_CLASSD tristate "Atmel ASoC driver for boards using CLASSD" depends on ARCH_AT91 || COMPILE_TEST - select SND_ATMEL_SOC_DMA + select SND_SOC_GENERIC_DMAENGINE_PCM select REGMAP_MMIO help Say Y if you want to add support for Atmel ASoC driver for boards using diff --git a/sound/soc/codecs/da7218.c b/sound/soc/codecs/da7218.c index b2d42ec1dcd9..56564ce90cb6 100644 --- a/sound/soc/codecs/da7218.c +++ b/sound/soc/codecs/da7218.c @@ -2520,7 +2520,7 @@ static struct da7218_pdata *da7218_of_to_pdata(struct snd_soc_codec *codec) } if (da7218->dev_id == DA7218_DEV_ID) { - hpldet_np = of_find_node_by_name(np, "da7218_hpldet"); + hpldet_np = of_get_child_by_name(np, "da7218_hpldet"); if (!hpldet_np) return pdata; diff --git a/sound/soc/codecs/msm8916-wcd-analog.c b/sound/soc/codecs/msm8916-wcd-analog.c index 5f3c42c4f74a..066ea2f4ce7b 100644 --- a/sound/soc/codecs/msm8916-wcd-analog.c +++ b/sound/soc/codecs/msm8916-wcd-analog.c @@ -267,7 +267,7 @@ #define MSM8916_WCD_ANALOG_RATES (SNDRV_PCM_RATE_8000 | SNDRV_PCM_RATE_16000 |\ SNDRV_PCM_RATE_32000 | SNDRV_PCM_RATE_48000) #define MSM8916_WCD_ANALOG_FORMATS (SNDRV_PCM_FMTBIT_S16_LE |\ - SNDRV_PCM_FMTBIT_S24_LE) + SNDRV_PCM_FMTBIT_S32_LE) static int btn_mask = SND_JACK_BTN_0 | SND_JACK_BTN_1 | SND_JACK_BTN_2 | SND_JACK_BTN_3 | SND_JACK_BTN_4; diff --git a/sound/soc/codecs/msm8916-wcd-digital.c b/sound/soc/codecs/msm8916-wcd-digital.c index a10a724eb448..13354d6304a8 100644 --- a/sound/soc/codecs/msm8916-wcd-digital.c +++ b/sound/soc/codecs/msm8916-wcd-digital.c @@ -194,7 +194,7 @@ SNDRV_PCM_RATE_32000 | \ SNDRV_PCM_RATE_48000) #define MSM8916_WCD_DIGITAL_FORMATS (SNDRV_PCM_FMTBIT_S16_LE |\ - SNDRV_PCM_FMTBIT_S24_LE) + SNDRV_PCM_FMTBIT_S32_LE) struct msm8916_wcd_digital_priv { struct clk *ahbclk, *mclk; @@ -645,7 +645,7 @@ static int msm8916_wcd_digital_hw_params(struct snd_pcm_substream *substream, RX_I2S_CTL_RX_I2S_MODE_MASK, RX_I2S_CTL_RX_I2S_MODE_16); break; - case SNDRV_PCM_FORMAT_S24_LE: + case SNDRV_PCM_FORMAT_S32_LE: snd_soc_update_bits(dai->codec, LPASS_CDC_CLK_TX_I2S_CTL, TX_I2S_CTL_TX_I2S_MODE_MASK, TX_I2S_CTL_TX_I2S_MODE_32); diff --git a/sound/soc/codecs/nau8825.c b/sound/soc/codecs/nau8825.c index 714ce17da717..e853a6dfd33b 100644 --- a/sound/soc/codecs/nau8825.c +++ b/sound/soc/codecs/nau8825.c @@ -905,6 +905,7 @@ static int nau8825_adc_event(struct snd_soc_dapm_widget *w, switch (event) { case SND_SOC_DAPM_POST_PMU: + msleep(125); regmap_update_bits(nau8825->regmap, NAU8825_REG_ENA_CTRL, NAU8825_ENABLE_ADC, NAU8825_ENABLE_ADC); break; diff --git a/sound/soc/codecs/rt5514-spi.c b/sound/soc/codecs/rt5514-spi.c index 2df91db765ac..64bf26cec20d 100644 --- a/sound/soc/codecs/rt5514-spi.c +++ b/sound/soc/codecs/rt5514-spi.c @@ -289,6 +289,8 @@ static int rt5514_spi_pcm_probe(struct snd_soc_platform *platform) dev_err(&rt5514_spi->dev, "%s Failed to reguest IRQ: %d\n", __func__, ret); + else + device_init_wakeup(rt5514_dsp->dev, true); } return 0; @@ -456,8 +458,6 @@ static int rt5514_spi_probe(struct spi_device *spi) return ret; } - device_init_wakeup(&spi->dev, true); - return 0; } @@ -482,10 +482,13 @@ static int __maybe_unused rt5514_resume(struct device *dev) if (device_may_wakeup(dev)) disable_irq_wake(irq); - if (rt5514_dsp->substream) { - rt5514_spi_burst_read(RT5514_IRQ_CTRL, (u8 *)&buf, sizeof(buf)); - if (buf[0] & RT5514_IRQ_STATUS_BIT) - rt5514_schedule_copy(rt5514_dsp); + if (rt5514_dsp) { + if (rt5514_dsp->substream) { + rt5514_spi_burst_read(RT5514_IRQ_CTRL, (u8 *)&buf, + sizeof(buf)); + if (buf[0] & RT5514_IRQ_STATUS_BIT) + rt5514_schedule_copy(rt5514_dsp); + } } return 0; diff --git a/sound/soc/codecs/rt5514.c b/sound/soc/codecs/rt5514.c index 2a5b5d74e697..2dd6e9f990a4 100644 --- a/sound/soc/codecs/rt5514.c +++ b/sound/soc/codecs/rt5514.c @@ -496,7 +496,7 @@ static const struct snd_soc_dapm_widget rt5514_dapm_widgets[] = { SND_SOC_DAPM_PGA("DMIC1", SND_SOC_NOPM, 0, 0, NULL, 0), SND_SOC_DAPM_PGA("DMIC2", SND_SOC_NOPM, 0, 0, NULL, 0), - SND_SOC_DAPM_SUPPLY("DMIC CLK", SND_SOC_NOPM, 0, 0, + SND_SOC_DAPM_SUPPLY_S("DMIC CLK", 1, SND_SOC_NOPM, 0, 0, rt5514_set_dmic_clk, SND_SOC_DAPM_PRE_PMU), SND_SOC_DAPM_SUPPLY("ADC CLK", RT5514_CLK_CTRL1, diff --git a/sound/soc/codecs/rt5645.c b/sound/soc/codecs/rt5645.c index f020d2d1eef4..edc152c8a1fe 100644 --- a/sound/soc/codecs/rt5645.c +++ b/sound/soc/codecs/rt5645.c @@ -3823,6 +3823,8 @@ static int rt5645_i2c_probe(struct i2c_client *i2c, regmap_read(regmap, RT5645_VENDOR_ID, &val); rt5645->v_id = val & 0xff; + regmap_write(rt5645->regmap, RT5645_AD_DA_MIXER, 0x8080); + ret = regmap_register_patch(rt5645->regmap, init_list, ARRAY_SIZE(init_list)); if (ret != 0) diff --git a/sound/soc/codecs/rt5663.c b/sound/soc/codecs/rt5663.c index b036c9dc0c8c..d329bf719d80 100644 --- a/sound/soc/codecs/rt5663.c +++ b/sound/soc/codecs/rt5663.c @@ -1560,6 +1560,10 @@ static int rt5663_jack_detect(struct snd_soc_codec *codec, int jack_insert) RT5663_IRQ_POW_SAV_MASK, RT5663_IRQ_POW_SAV_EN); snd_soc_update_bits(codec, RT5663_IRQ_1, RT5663_EN_IRQ_JD1_MASK, RT5663_EN_IRQ_JD1_EN); + snd_soc_update_bits(codec, RT5663_EM_JACK_TYPE_1, + RT5663_EM_JD_MASK, RT5663_EM_JD_RST); + snd_soc_update_bits(codec, RT5663_EM_JACK_TYPE_1, + RT5663_EM_JD_MASK, RT5663_EM_JD_NOR); while (true) { regmap_read(rt5663->regmap, RT5663_INT_ST_2, &val); diff --git a/sound/soc/codecs/rt5663.h b/sound/soc/codecs/rt5663.h index c5a9b69579ad..03adc8004ba9 100644 --- a/sound/soc/codecs/rt5663.h +++ b/sound/soc/codecs/rt5663.h @@ -1029,6 +1029,10 @@ #define RT5663_POL_EXT_JD_SHIFT 10 #define RT5663_POL_EXT_JD_EN (0x1 << 10) #define RT5663_POL_EXT_JD_DIS (0x0 << 10) +#define RT5663_EM_JD_MASK (0x1 << 7) +#define RT5663_EM_JD_SHIFT 7 +#define RT5663_EM_JD_NOR (0x1 << 7) +#define RT5663_EM_JD_RST (0x0 << 7) /* DACREF LDO Control (0x0112)*/ #define RT5663_PWR_LDO_DACREFL_MASK (0x1 << 9) diff --git a/sound/soc/codecs/tlv320aic31xx.h b/sound/soc/codecs/tlv320aic31xx.h index 730fb2058869..1ff3edb7bbb6 100644 --- a/sound/soc/codecs/tlv320aic31xx.h +++ b/sound/soc/codecs/tlv320aic31xx.h @@ -116,7 +116,7 @@ struct aic31xx_pdata { /* INT2 interrupt control */ #define AIC31XX_INT2CTRL AIC31XX_REG(0, 49) /* GPIO1 control */ -#define AIC31XX_GPIO1 AIC31XX_REG(0, 50) +#define AIC31XX_GPIO1 AIC31XX_REG(0, 51) #define AIC31XX_DACPRB AIC31XX_REG(0, 60) /* ADC Instruction Set Register */ diff --git a/sound/soc/codecs/twl4030.c b/sound/soc/codecs/twl4030.c index c482b2e7a7d2..cfe72b9d4356 100644 --- a/sound/soc/codecs/twl4030.c +++ b/sound/soc/codecs/twl4030.c @@ -232,7 +232,7 @@ static struct twl4030_codec_data *twl4030_get_pdata(struct snd_soc_codec *codec) struct twl4030_codec_data *pdata = dev_get_platdata(codec->dev); struct device_node *twl4030_codec_node = NULL; - twl4030_codec_node = of_find_node_by_name(codec->dev->parent->of_node, + twl4030_codec_node = of_get_child_by_name(codec->dev->parent->of_node, "codec"); if (!pdata && twl4030_codec_node) { @@ -241,9 +241,11 @@ static struct twl4030_codec_data *twl4030_get_pdata(struct snd_soc_codec *codec) GFP_KERNEL); if (!pdata) { dev_err(codec->dev, "Can not allocate memory\n"); + of_node_put(twl4030_codec_node); return NULL; } twl4030_setup_pdata_of(pdata, twl4030_codec_node); + of_node_put(twl4030_codec_node); } return pdata; diff --git a/sound/soc/codecs/wm_adsp.c b/sound/soc/codecs/wm_adsp.c index 65c059b5ffd7..66e32f5d2917 100644 --- a/sound/soc/codecs/wm_adsp.c +++ b/sound/soc/codecs/wm_adsp.c @@ -1733,7 +1733,7 @@ static int wm_adsp_load(struct wm_adsp *dsp) le64_to_cpu(footer->timestamp)); while (pos < firmware->size && - pos - firmware->size > sizeof(*region)) { + sizeof(*region) < firmware->size - pos) { region = (void *)&(firmware->data[pos]); region_name = "Unknown"; reg = 0; @@ -1782,8 +1782,8 @@ static int wm_adsp_load(struct wm_adsp *dsp) regions, le32_to_cpu(region->len), offset, region_name); - if ((pos + le32_to_cpu(region->len) + sizeof(*region)) > - firmware->size) { + if (le32_to_cpu(region->len) > + firmware->size - pos - sizeof(*region)) { adsp_err(dsp, "%s.%d: %s region len %d bytes exceeds file length %zu\n", file, regions, region_name, @@ -2253,7 +2253,7 @@ static int wm_adsp_load_coeff(struct wm_adsp *dsp) blocks = 0; while (pos < firmware->size && - pos - firmware->size > sizeof(*blk)) { + sizeof(*blk) < firmware->size - pos) { blk = (void *)(&firmware->data[pos]); type = le16_to_cpu(blk->type); @@ -2327,8 +2327,8 @@ static int wm_adsp_load_coeff(struct wm_adsp *dsp) } if (reg) { - if ((pos + le32_to_cpu(blk->len) + sizeof(*blk)) > - firmware->size) { + if (le32_to_cpu(blk->len) > + firmware->size - pos - sizeof(*blk)) { adsp_err(dsp, "%s.%d: %s region len %d bytes exceeds file length %zu\n", file, blocks, region_name, diff --git a/sound/soc/fsl/fsl_asrc.h b/sound/soc/fsl/fsl_asrc.h index 0f163abe4ba3..52c27a358933 100644 --- a/sound/soc/fsl/fsl_asrc.h +++ b/sound/soc/fsl/fsl_asrc.h @@ -260,8 +260,8 @@ #define ASRFSTi_OUTPUT_FIFO_SHIFT 12 #define ASRFSTi_OUTPUT_FIFO_MASK (((1 << ASRFSTi_OUTPUT_FIFO_WIDTH) - 1) << ASRFSTi_OUTPUT_FIFO_SHIFT) #define ASRFSTi_IAEi_SHIFT 11 -#define ASRFSTi_IAEi_MASK (1 << ASRFSTi_OAFi_SHIFT) -#define ASRFSTi_IAEi (1 << ASRFSTi_OAFi_SHIFT) +#define ASRFSTi_IAEi_MASK (1 << ASRFSTi_IAEi_SHIFT) +#define ASRFSTi_IAEi (1 << ASRFSTi_IAEi_SHIFT) #define ASRFSTi_INPUT_FIFO_WIDTH 7 #define ASRFSTi_INPUT_FIFO_SHIFT 0 #define ASRFSTi_INPUT_FIFO_MASK ((1 << ASRFSTi_INPUT_FIFO_WIDTH) - 1) diff --git a/sound/soc/fsl/fsl_ssi.c b/sound/soc/fsl/fsl_ssi.c index f2f51e06e22c..424bafaf51ef 100644 --- a/sound/soc/fsl/fsl_ssi.c +++ b/sound/soc/fsl/fsl_ssi.c @@ -38,6 +38,7 @@ #include <linux/ctype.h> #include <linux/device.h> #include <linux/delay.h> +#include <linux/mutex.h> #include <linux/slab.h> #include <linux/spinlock.h> #include <linux/of.h> @@ -265,6 +266,8 @@ struct fsl_ssi_private { u32 fifo_watermark; u32 dma_maxburst; + + struct mutex ac97_reg_lock; }; /* @@ -1260,11 +1263,13 @@ static void fsl_ssi_ac97_write(struct snd_ac97 *ac97, unsigned short reg, if (reg > 0x7f) return; + mutex_lock(&fsl_ac97_data->ac97_reg_lock); + ret = clk_prepare_enable(fsl_ac97_data->clk); if (ret) { pr_err("ac97 write clk_prepare_enable failed: %d\n", ret); - return; + goto ret_unlock; } lreg = reg << 12; @@ -1278,6 +1283,9 @@ static void fsl_ssi_ac97_write(struct snd_ac97 *ac97, unsigned short reg, udelay(100); clk_disable_unprepare(fsl_ac97_data->clk); + +ret_unlock: + mutex_unlock(&fsl_ac97_data->ac97_reg_lock); } static unsigned short fsl_ssi_ac97_read(struct snd_ac97 *ac97, @@ -1285,16 +1293,18 @@ static unsigned short fsl_ssi_ac97_read(struct snd_ac97 *ac97, { struct regmap *regs = fsl_ac97_data->regs; - unsigned short val = -1; + unsigned short val = 0; u32 reg_val; unsigned int lreg; int ret; + mutex_lock(&fsl_ac97_data->ac97_reg_lock); + ret = clk_prepare_enable(fsl_ac97_data->clk); if (ret) { pr_err("ac97 read clk_prepare_enable failed: %d\n", ret); - return -1; + goto ret_unlock; } lreg = (reg & 0x7f) << 12; @@ -1309,6 +1319,8 @@ static unsigned short fsl_ssi_ac97_read(struct snd_ac97 *ac97, clk_disable_unprepare(fsl_ac97_data->clk); +ret_unlock: + mutex_unlock(&fsl_ac97_data->ac97_reg_lock); return val; } @@ -1458,12 +1470,6 @@ static int fsl_ssi_probe(struct platform_device *pdev) sizeof(fsl_ssi_ac97_dai)); fsl_ac97_data = ssi_private; - - ret = snd_soc_set_ac97_ops_of_reset(&fsl_ssi_ac97_ops, pdev); - if (ret) { - dev_err(&pdev->dev, "could not set AC'97 ops\n"); - return ret; - } } else { /* Initialize this copy of the CPU DAI driver structure */ memcpy(&ssi_private->cpu_dai_drv, &fsl_ssi_dai_template, @@ -1574,6 +1580,15 @@ static int fsl_ssi_probe(struct platform_device *pdev) return ret; } + if (fsl_ssi_is_ac97(ssi_private)) { + mutex_init(&ssi_private->ac97_reg_lock); + ret = snd_soc_set_ac97_ops_of_reset(&fsl_ssi_ac97_ops, pdev); + if (ret) { + dev_err(&pdev->dev, "could not set AC'97 ops\n"); + goto error_ac97_ops; + } + } + ret = devm_snd_soc_register_component(&pdev->dev, &fsl_ssi_component, &ssi_private->cpu_dai_drv, 1); if (ret) { @@ -1657,6 +1672,13 @@ error_sound_card: fsl_ssi_debugfs_remove(&ssi_private->dbg_stats); error_asoc_register: + if (fsl_ssi_is_ac97(ssi_private)) + snd_soc_set_ac97_ops(NULL); + +error_ac97_ops: + if (fsl_ssi_is_ac97(ssi_private)) + mutex_destroy(&ssi_private->ac97_reg_lock); + if (ssi_private->soc->imx) fsl_ssi_imx_clean(pdev, ssi_private); @@ -1675,8 +1697,10 @@ static int fsl_ssi_remove(struct platform_device *pdev) if (ssi_private->soc->imx) fsl_ssi_imx_clean(pdev, ssi_private); - if (fsl_ssi_is_ac97(ssi_private)) + if (fsl_ssi_is_ac97(ssi_private)) { snd_soc_set_ac97_ops(NULL); + mutex_destroy(&ssi_private->ac97_reg_lock); + } return 0; } diff --git a/sound/soc/intel/boards/kbl_rt5663_max98927.c b/sound/soc/intel/boards/kbl_rt5663_max98927.c index 6f9a8bcf20f3..6dcad0a8a0d0 100644 --- a/sound/soc/intel/boards/kbl_rt5663_max98927.c +++ b/sound/soc/intel/boards/kbl_rt5663_max98927.c @@ -101,7 +101,7 @@ static const struct snd_soc_dapm_route kabylake_map[] = { { "ssp0 Tx", NULL, "spk_out" }, { "AIF Playback", NULL, "ssp1 Tx" }, - { "ssp1 Tx", NULL, "hs_out" }, + { "ssp1 Tx", NULL, "codec1_out" }, { "hs_in", NULL, "ssp1 Rx" }, { "ssp1 Rx", NULL, "AIF Capture" }, diff --git a/sound/soc/intel/boards/kbl_rt5663_rt5514_max98927.c b/sound/soc/intel/boards/kbl_rt5663_rt5514_max98927.c index 6072164f2d43..271ae3c2c535 100644 --- a/sound/soc/intel/boards/kbl_rt5663_rt5514_max98927.c +++ b/sound/soc/intel/boards/kbl_rt5663_rt5514_max98927.c @@ -109,7 +109,7 @@ static const struct snd_soc_dapm_route kabylake_map[] = { { "ssp0 Tx", NULL, "spk_out" }, { "AIF Playback", NULL, "ssp1 Tx" }, - { "ssp1 Tx", NULL, "hs_out" }, + { "ssp1 Tx", NULL, "codec1_out" }, { "hs_in", NULL, "ssp1 Rx" }, { "ssp1 Rx", NULL, "AIF Capture" }, diff --git a/sound/soc/intel/skylake/skl-nhlt.c b/sound/soc/intel/skylake/skl-nhlt.c index d14c50a60289..3eaac41090ca 100644 --- a/sound/soc/intel/skylake/skl-nhlt.c +++ b/sound/soc/intel/skylake/skl-nhlt.c @@ -119,11 +119,16 @@ static bool skl_check_ep_match(struct device *dev, struct nhlt_endpoint *epnt, if ((epnt->virtual_bus_id == instance_id) && (epnt->linktype == link_type) && - (epnt->direction == dirn) && - (epnt->device_type == dev_type)) - return true; - else - return false; + (epnt->direction == dirn)) { + /* do not check dev_type for DMIC link type */ + if (epnt->linktype == NHLT_LINK_DMIC) + return true; + + if (epnt->device_type == dev_type) + return true; + } + + return false; } struct nhlt_specific_cfg diff --git a/sound/soc/intel/skylake/skl-topology.c b/sound/soc/intel/skylake/skl-topology.c index a072bcf209d2..81923da18ac2 100644 --- a/sound/soc/intel/skylake/skl-topology.c +++ b/sound/soc/intel/skylake/skl-topology.c @@ -2908,7 +2908,7 @@ static int skl_tplg_control_load(struct snd_soc_component *cmpnt, break; default: - dev_warn(bus->dev, "Control load not supported %d:%d:%d\n", + dev_dbg(bus->dev, "Control load not supported %d:%d:%d\n", hdr->ops.get, hdr->ops.put, hdr->ops.info); break; } diff --git a/sound/soc/rockchip/rockchip_spdif.c b/sound/soc/rockchip/rockchip_spdif.c index ee5055d47d13..a89fe9b6463b 100644 --- a/sound/soc/rockchip/rockchip_spdif.c +++ b/sound/soc/rockchip/rockchip_spdif.c @@ -322,26 +322,30 @@ static int rk_spdif_probe(struct platform_device *pdev) spdif->mclk = devm_clk_get(&pdev->dev, "mclk"); if (IS_ERR(spdif->mclk)) { dev_err(&pdev->dev, "Can't retrieve rk_spdif master clock\n"); - return PTR_ERR(spdif->mclk); + ret = PTR_ERR(spdif->mclk); + goto err_disable_hclk; } ret = clk_prepare_enable(spdif->mclk); if (ret) { dev_err(spdif->dev, "clock enable failed %d\n", ret); - return ret; + goto err_disable_clocks; } res = platform_get_resource(pdev, IORESOURCE_MEM, 0); regs = devm_ioremap_resource(&pdev->dev, res); - if (IS_ERR(regs)) - return PTR_ERR(regs); + if (IS_ERR(regs)) { + ret = PTR_ERR(regs); + goto err_disable_clocks; + } spdif->regmap = devm_regmap_init_mmio_clk(&pdev->dev, "hclk", regs, &rk_spdif_regmap_config); if (IS_ERR(spdif->regmap)) { dev_err(&pdev->dev, "Failed to initialise managed register map\n"); - return PTR_ERR(spdif->regmap); + ret = PTR_ERR(spdif->regmap); + goto err_disable_clocks; } spdif->playback_dma_data.addr = res->start + SPDIF_SMPDR; @@ -373,6 +377,10 @@ static int rk_spdif_probe(struct platform_device *pdev) err_pm_runtime: pm_runtime_disable(&pdev->dev); +err_disable_clocks: + clk_disable_unprepare(spdif->mclk); +err_disable_hclk: + clk_disable_unprepare(spdif->hclk); return ret; } diff --git a/sound/soc/sh/rcar/adg.c b/sound/soc/sh/rcar/adg.c index 8ddb08714faa..4672688cac32 100644 --- a/sound/soc/sh/rcar/adg.c +++ b/sound/soc/sh/rcar/adg.c @@ -222,7 +222,7 @@ int rsnd_adg_set_cmd_timsel_gen2(struct rsnd_mod *cmd_mod, NULL, &val, NULL); val = val << shift; - mask = 0xffff << shift; + mask = 0x0f1f << shift; rsnd_mod_bset(adg_mod, CMDOUT_TIMSEL, mask, val); @@ -250,7 +250,7 @@ int rsnd_adg_set_src_timesel_gen2(struct rsnd_mod *src_mod, in = in << shift; out = out << shift; - mask = 0xffff << shift; + mask = 0x0f1f << shift; switch (id / 2) { case 0: @@ -380,7 +380,7 @@ int rsnd_adg_ssi_clk_try_start(struct rsnd_mod *ssi_mod, unsigned int rate) ckr = 0x80000000; } - rsnd_mod_bset(adg_mod, BRGCKR, 0x80FF0000, adg->ckr | ckr); + rsnd_mod_bset(adg_mod, BRGCKR, 0x80770000, adg->ckr | ckr); rsnd_mod_write(adg_mod, BRRA, adg->rbga); rsnd_mod_write(adg_mod, BRRB, adg->rbgb); diff --git a/sound/soc/sh/rcar/core.c b/sound/soc/sh/rcar/core.c index c70eb2097816..f12a88a21dfa 100644 --- a/sound/soc/sh/rcar/core.c +++ b/sound/soc/sh/rcar/core.c @@ -1332,8 +1332,8 @@ static int rsnd_pcm_new(struct snd_soc_pcm_runtime *rtd) return snd_pcm_lib_preallocate_pages_for_all( rtd->pcm, - SNDRV_DMA_TYPE_CONTINUOUS, - snd_dma_continuous_data(GFP_KERNEL), + SNDRV_DMA_TYPE_DEV, + rtd->card->snd_card->dev, PREALLOC_BUFFER, PREALLOC_BUFFER_MAX); } diff --git a/sound/soc/sh/rcar/dma.c b/sound/soc/sh/rcar/dma.c index fd557abfe390..4d750bdf8e24 100644 --- a/sound/soc/sh/rcar/dma.c +++ b/sound/soc/sh/rcar/dma.c @@ -26,10 +26,7 @@ struct rsnd_dmaen { struct dma_chan *chan; dma_cookie_t cookie; - dma_addr_t dma_buf; unsigned int dma_len; - unsigned int dma_period; - unsigned int dma_cnt; }; struct rsnd_dmapp { @@ -71,38 +68,10 @@ static struct rsnd_mod mem = { /* * Audio DMAC */ -#define rsnd_dmaen_sync(dmaen, io, i) __rsnd_dmaen_sync(dmaen, io, i, 1) -#define rsnd_dmaen_unsync(dmaen, io, i) __rsnd_dmaen_sync(dmaen, io, i, 0) -static void __rsnd_dmaen_sync(struct rsnd_dmaen *dmaen, struct rsnd_dai_stream *io, - int i, int sync) -{ - struct device *dev = dmaen->chan->device->dev; - enum dma_data_direction dir; - int is_play = rsnd_io_is_play(io); - dma_addr_t buf; - int len, max; - size_t period; - - len = dmaen->dma_len; - period = dmaen->dma_period; - max = len / period; - i = i % max; - buf = dmaen->dma_buf + (period * i); - - dir = is_play ? DMA_TO_DEVICE : DMA_FROM_DEVICE; - - if (sync) - dma_sync_single_for_device(dev, buf, period, dir); - else - dma_sync_single_for_cpu(dev, buf, period, dir); -} - static void __rsnd_dmaen_complete(struct rsnd_mod *mod, struct rsnd_dai_stream *io) { struct rsnd_priv *priv = rsnd_mod_to_priv(mod); - struct rsnd_dma *dma = rsnd_mod_to_dma(mod); - struct rsnd_dmaen *dmaen = rsnd_dma_to_dmaen(dma); bool elapsed = false; unsigned long flags; @@ -115,22 +84,9 @@ static void __rsnd_dmaen_complete(struct rsnd_mod *mod, */ spin_lock_irqsave(&priv->lock, flags); - if (rsnd_io_is_working(io)) { - rsnd_dmaen_unsync(dmaen, io, dmaen->dma_cnt); - - /* - * Next period is already started. - * Let's sync Next Next period - * see - * rsnd_dmaen_start() - */ - rsnd_dmaen_sync(dmaen, io, dmaen->dma_cnt + 2); - + if (rsnd_io_is_working(io)) elapsed = true; - dmaen->dma_cnt++; - } - spin_unlock_irqrestore(&priv->lock, flags); if (elapsed) @@ -165,14 +121,8 @@ static int rsnd_dmaen_stop(struct rsnd_mod *mod, struct rsnd_dma *dma = rsnd_mod_to_dma(mod); struct rsnd_dmaen *dmaen = rsnd_dma_to_dmaen(dma); - if (dmaen->chan) { - int is_play = rsnd_io_is_play(io); - + if (dmaen->chan) dmaengine_terminate_all(dmaen->chan); - dma_unmap_single(dmaen->chan->device->dev, - dmaen->dma_buf, dmaen->dma_len, - is_play ? DMA_TO_DEVICE : DMA_FROM_DEVICE); - } return 0; } @@ -237,11 +187,7 @@ static int rsnd_dmaen_start(struct rsnd_mod *mod, struct device *dev = rsnd_priv_to_dev(priv); struct dma_async_tx_descriptor *desc; struct dma_slave_config cfg = {}; - dma_addr_t buf; - size_t len; - size_t period; int is_play = rsnd_io_is_play(io); - int i; int ret; cfg.direction = is_play ? DMA_MEM_TO_DEV : DMA_DEV_TO_MEM; @@ -258,19 +204,10 @@ static int rsnd_dmaen_start(struct rsnd_mod *mod, if (ret < 0) return ret; - len = snd_pcm_lib_buffer_bytes(substream); - period = snd_pcm_lib_period_bytes(substream); - buf = dma_map_single(dmaen->chan->device->dev, - substream->runtime->dma_area, - len, - is_play ? DMA_TO_DEVICE : DMA_FROM_DEVICE); - if (dma_mapping_error(dmaen->chan->device->dev, buf)) { - dev_err(dev, "dma map failed\n"); - return -EIO; - } - desc = dmaengine_prep_dma_cyclic(dmaen->chan, - buf, len, period, + substream->runtime->dma_addr, + snd_pcm_lib_buffer_bytes(substream), + snd_pcm_lib_period_bytes(substream), is_play ? DMA_MEM_TO_DEV : DMA_DEV_TO_MEM, DMA_PREP_INTERRUPT | DMA_CTRL_ACK); @@ -282,18 +219,7 @@ static int rsnd_dmaen_start(struct rsnd_mod *mod, desc->callback = rsnd_dmaen_complete; desc->callback_param = rsnd_mod_get(dma); - dmaen->dma_buf = buf; - dmaen->dma_len = len; - dmaen->dma_period = period; - dmaen->dma_cnt = 0; - - /* - * synchronize this and next period - * see - * __rsnd_dmaen_complete() - */ - for (i = 0; i < 2; i++) - rsnd_dmaen_sync(dmaen, io, i); + dmaen->dma_len = snd_pcm_lib_buffer_bytes(substream); dmaen->cookie = dmaengine_submit(desc); if (dmaen->cookie < 0) { diff --git a/sound/soc/sh/rcar/ssi.c b/sound/soc/sh/rcar/ssi.c index fece1e5f582f..cbf3bf312d23 100644 --- a/sound/soc/sh/rcar/ssi.c +++ b/sound/soc/sh/rcar/ssi.c @@ -446,25 +446,29 @@ static bool rsnd_ssi_pointer_update(struct rsnd_mod *mod, int byte) { struct rsnd_ssi *ssi = rsnd_mod_to_ssi(mod); + bool ret = false; + int byte_pos; - ssi->byte_pos += byte; + byte_pos = ssi->byte_pos + byte; - if (ssi->byte_pos >= ssi->next_period_byte) { + if (byte_pos >= ssi->next_period_byte) { struct snd_pcm_runtime *runtime = rsnd_io_to_runtime(io); ssi->period_pos++; ssi->next_period_byte += ssi->byte_per_period; if (ssi->period_pos >= runtime->periods) { - ssi->byte_pos = 0; + byte_pos = 0; ssi->period_pos = 0; ssi->next_period_byte = ssi->byte_per_period; } - return true; + ret = true; } - return false; + WRITE_ONCE(ssi->byte_pos, byte_pos); + + return ret; } /* @@ -838,7 +842,7 @@ static int rsnd_ssi_pointer(struct rsnd_mod *mod, struct rsnd_ssi *ssi = rsnd_mod_to_ssi(mod); struct snd_pcm_runtime *runtime = rsnd_io_to_runtime(io); - *pointer = bytes_to_frames(runtime, ssi->byte_pos); + *pointer = bytes_to_frames(runtime, READ_ONCE(ssi->byte_pos)); return 0; } diff --git a/sound/soc/sh/rcar/ssiu.c b/sound/soc/sh/rcar/ssiu.c index 4d948757d300..6ff8a36c2c82 100644 --- a/sound/soc/sh/rcar/ssiu.c +++ b/sound/soc/sh/rcar/ssiu.c @@ -125,6 +125,7 @@ static int rsnd_ssiu_init_gen2(struct rsnd_mod *mod, { int hdmi = rsnd_ssi_hdmi_port(io); int ret; + u32 mode = 0; ret = rsnd_ssiu_init(mod, io, priv); if (ret < 0) @@ -136,9 +137,11 @@ static int rsnd_ssiu_init_gen2(struct rsnd_mod *mod, * see * rsnd_ssi_config_init() */ - rsnd_mod_write(mod, SSI_MODE, 0x1); + mode = 0x1; } + rsnd_mod_write(mod, SSI_MODE, mode); + if (rsnd_ssi_use_busif(io)) { rsnd_mod_write(mod, SSI_BUSIF_ADINR, rsnd_get_adinr_bit(mod, io) | diff --git a/sound/usb/mixer.c b/sound/usb/mixer.c index 7c9e361b2200..2b4ceda36291 100644 --- a/sound/usb/mixer.c +++ b/sound/usb/mixer.c @@ -2173,20 +2173,25 @@ static int parse_audio_selector_unit(struct mixer_build *state, int unitid, kctl->private_value = (unsigned long)namelist; kctl->private_free = usb_mixer_selector_elem_free; - nameid = uac_selector_unit_iSelector(desc); + /* check the static mapping table at first */ len = check_mapped_name(map, kctl->id.name, sizeof(kctl->id.name)); - if (len) - ; - else if (nameid) - len = snd_usb_copy_string_desc(state, nameid, kctl->id.name, - sizeof(kctl->id.name)); - else - len = get_term_name(state, &state->oterm, - kctl->id.name, sizeof(kctl->id.name), 0); - if (!len) { - strlcpy(kctl->id.name, "USB", sizeof(kctl->id.name)); + /* no mapping ? */ + /* if iSelector is given, use it */ + nameid = uac_selector_unit_iSelector(desc); + if (nameid) + len = snd_usb_copy_string_desc(state, nameid, + kctl->id.name, + sizeof(kctl->id.name)); + /* ... or pick up the terminal name at next */ + if (!len) + len = get_term_name(state, &state->oterm, + kctl->id.name, sizeof(kctl->id.name), 0); + /* ... or use the fixed string "USB" as the last resort */ + if (!len) + strlcpy(kctl->id.name, "USB", sizeof(kctl->id.name)); + /* and add the proper suffix */ if (desc->bDescriptorSubtype == UAC2_CLOCK_SELECTOR) append_ctl_name(kctl, " Clock Source"); else if ((state->oterm.type & 0xff00) == 0x0100) diff --git a/sound/usb/quirks.c b/sound/usb/quirks.c index 77eecaa4db1f..a66ef5777887 100644 --- a/sound/usb/quirks.c +++ b/sound/usb/quirks.c @@ -1166,10 +1166,11 @@ static bool is_marantz_denon_dac(unsigned int id) /* TEAC UD-501/UD-503/NT-503 USB DACs need a vendor cmd to switch * between PCM/DOP and native DSD mode */ -static bool is_teac_50X_dac(unsigned int id) +static bool is_teac_dsd_dac(unsigned int id) { switch (id) { case USB_ID(0x0644, 0x8043): /* TEAC UD-501/UD-503/NT-503 */ + case USB_ID(0x0644, 0x8044): /* Esoteric D-05X */ return true; } return false; @@ -1202,7 +1203,7 @@ int snd_usb_select_mode_quirk(struct snd_usb_substream *subs, break; } mdelay(20); - } else if (is_teac_50X_dac(subs->stream->chip->usb_id)) { + } else if (is_teac_dsd_dac(subs->stream->chip->usb_id)) { /* Vendor mode switch cmd is required. */ switch (fmt->altsetting) { case 3: /* DSD mode (DSD_U32) requested */ @@ -1392,7 +1393,7 @@ u64 snd_usb_interface_dsd_format_quirks(struct snd_usb_audio *chip, } /* TEAC devices with USB DAC functionality */ - if (is_teac_50X_dac(chip->usb_id)) { + if (is_teac_dsd_dac(chip->usb_id)) { if (fp->altsetting == 3) return SNDRV_PCM_FMTBIT_DSD_U32_BE; } diff --git a/tools/arch/s390/include/uapi/asm/bpf_perf_event.h b/tools/arch/s390/include/uapi/asm/bpf_perf_event.h index cefe7c7cd4f6..0a8e37a519f2 100644 --- a/tools/arch/s390/include/uapi/asm/bpf_perf_event.h +++ b/tools/arch/s390/include/uapi/asm/bpf_perf_event.h @@ -2,7 +2,7 @@ #ifndef _UAPI__ASM_BPF_PERF_EVENT_H__ #define _UAPI__ASM_BPF_PERF_EVENT_H__ -#include <asm/ptrace.h> +#include "ptrace.h" typedef user_pt_regs bpf_user_pt_regs_t; diff --git a/tools/kvm/kvm_stat/kvm_stat b/tools/kvm/kvm_stat/kvm_stat index 217cf6f95c36..a5684d0968b4 100755 --- a/tools/kvm/kvm_stat/kvm_stat +++ b/tools/kvm/kvm_stat/kvm_stat @@ -478,7 +478,7 @@ class Provider(object): @staticmethod def is_field_wanted(fields_filter, field): """Indicate whether field is valid according to fields_filter.""" - if not fields_filter or fields_filter == "help": + if not fields_filter: return True return re.match(fields_filter, field) is not None @@ -549,8 +549,8 @@ class TracepointProvider(Provider): def update_fields(self, fields_filter): """Refresh fields, applying fields_filter""" - self._fields = [field for field in self.get_available_fields() - if self.is_field_wanted(fields_filter, field)] + self.fields = [field for field in self.get_available_fields() + if self.is_field_wanted(fields_filter, field)] @staticmethod def get_online_cpus(): @@ -950,7 +950,8 @@ class Tui(object): curses.nocbreak() curses.endwin() - def get_all_gnames(self): + @staticmethod + def get_all_gnames(): """Returns a list of (pid, gname) tuples of all running guests""" res = [] try: @@ -963,7 +964,7 @@ class Tui(object): # perform a sanity check before calling the more expensive # function to possibly extract the guest name if ' -name ' in line[1]: - res.append((line[0], self.get_gname_from_pid(line[0]))) + res.append((line[0], Tui.get_gname_from_pid(line[0]))) child.stdout.close() return res @@ -984,7 +985,8 @@ class Tui(object): except Exception: self.screen.addstr(row + 1, 2, 'Not available') - def get_pid_from_gname(self, gname): + @staticmethod + def get_pid_from_gname(gname): """Fuzzy function to convert guest name to QEMU process pid. Returns a list of potential pids, can be empty if no match found. @@ -992,7 +994,7 @@ class Tui(object): """ pids = [] - for line in self.get_all_gnames(): + for line in Tui.get_all_gnames(): if gname == line[1]: pids.append(int(line[0])) @@ -1090,15 +1092,16 @@ class Tui(object): # sort by totals return (0, -stats[x][0]) total = 0. - for val in stats.values(): - total += val[0] + for key in stats.keys(): + if key.find('(') is -1: + total += stats[key][0] if self._sorting == SORT_DEFAULT: sortkey = sortCurAvg else: sortkey = sortTotal + tavg = 0 for key in sorted(stats.keys(), key=sortkey): - - if row >= self.screen.getmaxyx()[0]: + if row >= self.screen.getmaxyx()[0] - 1: break values = stats[key] if not values[0] and not values[1]: @@ -1110,9 +1113,15 @@ class Tui(object): self.screen.addstr(row, 1, '%-40s %10d%7.1f %8s' % (key, values[0], values[0] * 100 / total, cur)) + if cur is not '' and key.find('(') is -1: + tavg += cur row += 1 if row == 3: self.screen.addstr(4, 1, 'No matching events reported yet') + else: + self.screen.addstr(row, 1, '%-40s %10d %8s' % + ('Total', total, tavg if tavg else ''), + curses.A_BOLD) self.screen.refresh() def show_msg(self, text): @@ -1358,7 +1367,7 @@ class Tui(object): if char == 'x': self.update_drilldown() # prevents display of current values on next refresh - self.stats.get() + self.stats.get(self._display_guests) except KeyboardInterrupt: break except curses.error: @@ -1451,16 +1460,13 @@ Press any other key to refresh statistics immediately. try: pids = Tui.get_pid_from_gname(val) except: - raise optparse.OptionValueError('Error while searching for guest ' - '"{}", use "-p" to specify a pid ' - 'instead'.format(val)) + sys.exit('Error while searching for guest "{}". Use "-p" to ' + 'specify a pid instead?'.format(val)) if len(pids) == 0: - raise optparse.OptionValueError('No guest by the name "{}" ' - 'found'.format(val)) + sys.exit('Error: No guest by the name "{}" found'.format(val)) if len(pids) > 1: - raise optparse.OptionValueError('Multiple processes found (pids: ' - '{}) - use "-p" to specify a pid ' - 'instead'.format(" ".join(pids))) + sys.exit('Error: Multiple processes found (pids: {}). Use "-p" ' + 'to specify the desired pid'.format(" ".join(pids))) parser.values.pid = pids[0] optparser = optparse.OptionParser(description=description_text, @@ -1518,7 +1524,16 @@ Press any other key to refresh statistics immediately. help='restrict statistics to guest by name', callback=cb_guest_to_pid, ) - (options, _) = optparser.parse_args(sys.argv) + options, unkn = optparser.parse_args(sys.argv) + if len(unkn) != 1: + sys.exit('Error: Extra argument(s): ' + ' '.join(unkn[1:])) + try: + # verify that we were passed a valid regex up front + re.compile(options.fields) + except re.error: + sys.exit('Error: "' + options.fields + '" is not a valid regular ' + 'expression') + return options @@ -1564,16 +1579,13 @@ def main(): stats = Stats(options) - if options.fields == "help": - event_list = "\n" - s = stats.get() - for key in s.keys(): - if key.find('(') != -1: - key = key[0:key.find('(')] - if event_list.find('\n' + key + '\n') == -1: - event_list += key + '\n' - sys.stdout.write(event_list) - return "" + if options.fields == 'help': + stats.fields_filter = None + event_list = [] + for key in stats.get().keys(): + event_list.append(key.split('(', 1)[0]) + sys.stdout.write(' ' + '\n '.join(sorted(set(event_list))) + '\n') + sys.exit(0) if options.log: log(stats) diff --git a/tools/kvm/kvm_stat/kvm_stat.txt b/tools/kvm/kvm_stat/kvm_stat.txt index e5cf836be8a1..b5b3810c9e94 100644 --- a/tools/kvm/kvm_stat/kvm_stat.txt +++ b/tools/kvm/kvm_stat/kvm_stat.txt @@ -50,6 +50,8 @@ INTERACTIVE COMMANDS *s*:: set update interval *x*:: toggle reporting of stats for child trace events + :: *Note*: The stats for the parents summarize the respective child trace + events Press any other key to refresh statistics immediately. @@ -86,7 +88,7 @@ OPTIONS -f<fields>:: --fields=<fields>:: - fields to display (regex) + fields to display (regex), "-f help" for a list of available events -h:: --help:: diff --git a/tools/testing/selftests/bpf/Makefile b/tools/testing/selftests/bpf/Makefile index 792af7c3b74f..05fc4e2e7b3a 100644 --- a/tools/testing/selftests/bpf/Makefile +++ b/tools/testing/selftests/bpf/Makefile @@ -11,7 +11,7 @@ ifneq ($(wildcard $(GENHDR)),) endif CFLAGS += -Wall -O2 -I$(APIDIR) -I$(LIBDIR) -I$(GENDIR) $(GENFLAGS) -I../../../include -LDLIBS += -lcap -lelf +LDLIBS += -lcap -lelf -lrt TEST_GEN_PROGS = test_verifier test_tag test_maps test_lru_map test_lpm_map test_progs \ test_align test_verifier_log test_dev_cgroup diff --git a/tools/testing/selftests/bpf/test_progs.c b/tools/testing/selftests/bpf/test_progs.c index 69427531408d..6761be18a91f 100644 --- a/tools/testing/selftests/bpf/test_progs.c +++ b/tools/testing/selftests/bpf/test_progs.c @@ -351,7 +351,7 @@ static void test_bpf_obj_id(void) info_len != sizeof(struct bpf_map_info) || strcmp((char *)map_infos[i].name, expected_map_name), "get-map-info(fd)", - "err %d errno %d type %d(%d) info_len %u(%lu) key_size %u value_size %u max_entries %u map_flags %X name %s(%s)\n", + "err %d errno %d type %d(%d) info_len %u(%Zu) key_size %u value_size %u max_entries %u map_flags %X name %s(%s)\n", err, errno, map_infos[i].type, BPF_MAP_TYPE_ARRAY, info_len, sizeof(struct bpf_map_info), @@ -395,7 +395,7 @@ static void test_bpf_obj_id(void) *(int *)prog_infos[i].map_ids != map_infos[i].id || strcmp((char *)prog_infos[i].name, expected_prog_name), "get-prog-info(fd)", - "err %d errno %d i %d type %d(%d) info_len %u(%lu) jit_enabled %d jited_prog_len %u xlated_prog_len %u jited_prog %d xlated_prog %d load_time %lu(%lu) uid %u(%u) nr_map_ids %u(%u) map_id %u(%u) name %s(%s)\n", + "err %d errno %d i %d type %d(%d) info_len %u(%Zu) jit_enabled %d jited_prog_len %u xlated_prog_len %u jited_prog %d xlated_prog %d load_time %lu(%lu) uid %u(%u) nr_map_ids %u(%u) map_id %u(%u) name %s(%s)\n", err, errno, i, prog_infos[i].type, BPF_PROG_TYPE_SOCKET_FILTER, info_len, sizeof(struct bpf_prog_info), @@ -463,7 +463,7 @@ static void test_bpf_obj_id(void) memcmp(&prog_info, &prog_infos[i], info_len) || *(int *)prog_info.map_ids != saved_map_id, "get-prog-info(next_id->fd)", - "err %d errno %d info_len %u(%lu) memcmp %d map_id %u(%u)\n", + "err %d errno %d info_len %u(%Zu) memcmp %d map_id %u(%u)\n", err, errno, info_len, sizeof(struct bpf_prog_info), memcmp(&prog_info, &prog_infos[i], info_len), *(int *)prog_info.map_ids, saved_map_id); @@ -509,7 +509,7 @@ static void test_bpf_obj_id(void) memcmp(&map_info, &map_infos[i], info_len) || array_value != array_magic_value, "check get-map-info(next_id->fd)", - "err %d errno %d info_len %u(%lu) memcmp %d array_value %llu(%llu)\n", + "err %d errno %d info_len %u(%Zu) memcmp %d array_value %llu(%llu)\n", err, errno, info_len, sizeof(struct bpf_map_info), memcmp(&map_info, &map_infos[i], info_len), array_value, array_magic_value); diff --git a/tools/testing/selftests/bpf/test_verifier.c b/tools/testing/selftests/bpf/test_verifier.c index 3c64f30cf63c..b51017404c62 100644 --- a/tools/testing/selftests/bpf/test_verifier.c +++ b/tools/testing/selftests/bpf/test_verifier.c @@ -422,9 +422,7 @@ static struct bpf_test tests[] = { BPF_STX_MEM(BPF_DW, BPF_REG_1, BPF_REG_0, 0), BPF_EXIT_INSN(), }, - .errstr_unpriv = "R1 subtraction from stack pointer", - .result_unpriv = REJECT, - .errstr = "R1 invalid mem access", + .errstr = "R1 subtraction from stack pointer", .result = REJECT, }, { @@ -606,7 +604,6 @@ static struct bpf_test tests[] = { }, .errstr = "misaligned stack access", .result = REJECT, - .flags = F_LOAD_WITH_STRICT_ALIGNMENT, }, { "invalid map_fd for function call", @@ -1797,7 +1794,6 @@ static struct bpf_test tests[] = { }, .result = REJECT, .errstr = "misaligned stack access off (0x0; 0x0)+-8+2 size 8", - .flags = F_LOAD_WITH_STRICT_ALIGNMENT, }, { "PTR_TO_STACK store/load - bad alignment on reg", @@ -1810,7 +1806,6 @@ static struct bpf_test tests[] = { }, .result = REJECT, .errstr = "misaligned stack access off (0x0; 0x0)+-10+8 size 8", - .flags = F_LOAD_WITH_STRICT_ALIGNMENT, }, { "PTR_TO_STACK store/load - out of bounds low", @@ -1862,9 +1857,8 @@ static struct bpf_test tests[] = { BPF_MOV64_IMM(BPF_REG_0, 0), BPF_EXIT_INSN(), }, - .result = ACCEPT, - .result_unpriv = REJECT, - .errstr_unpriv = "R1 pointer += pointer", + .result = REJECT, + .errstr = "R1 pointer += pointer", }, { "unpriv: neg pointer", @@ -2592,7 +2586,8 @@ static struct bpf_test tests[] = { BPF_LDX_MEM(BPF_W, BPF_REG_3, BPF_REG_1, offsetof(struct __sk_buff, data)), BPF_ALU64_REG(BPF_ADD, BPF_REG_3, BPF_REG_4), - BPF_MOV64_REG(BPF_REG_2, BPF_REG_1), + BPF_LDX_MEM(BPF_W, BPF_REG_2, BPF_REG_1, + offsetof(struct __sk_buff, len)), BPF_ALU64_IMM(BPF_LSH, BPF_REG_2, 49), BPF_ALU64_IMM(BPF_RSH, BPF_REG_2, 49), BPF_ALU64_REG(BPF_ADD, BPF_REG_3, BPF_REG_2), @@ -2899,7 +2894,7 @@ static struct bpf_test tests[] = { BPF_MOV64_IMM(BPF_REG_0, 0), BPF_EXIT_INSN(), }, - .errstr = "invalid access to packet", + .errstr = "R3 pointer arithmetic on PTR_TO_PACKET_END", .result = REJECT, .prog_type = BPF_PROG_TYPE_SCHED_CLS, }, @@ -3885,9 +3880,7 @@ static struct bpf_test tests[] = { BPF_EXIT_INSN(), }, .fixup_map2 = { 3, 11 }, - .errstr_unpriv = "R0 pointer += pointer", - .errstr = "R0 invalid mem access 'inv'", - .result_unpriv = REJECT, + .errstr = "R0 pointer += pointer", .result = REJECT, .flags = F_NEEDS_EFFICIENT_UNALIGNED_ACCESS, }, @@ -3928,7 +3921,7 @@ static struct bpf_test tests[] = { BPF_EXIT_INSN(), }, .fixup_map1 = { 4 }, - .errstr = "R4 invalid mem access", + .errstr = "R4 pointer arithmetic on PTR_TO_MAP_VALUE_OR_NULL", .result = REJECT, .prog_type = BPF_PROG_TYPE_SCHED_CLS }, @@ -3949,7 +3942,7 @@ static struct bpf_test tests[] = { BPF_EXIT_INSN(), }, .fixup_map1 = { 4 }, - .errstr = "R4 invalid mem access", + .errstr = "R4 pointer arithmetic on PTR_TO_MAP_VALUE_OR_NULL", .result = REJECT, .prog_type = BPF_PROG_TYPE_SCHED_CLS }, @@ -3970,7 +3963,7 @@ static struct bpf_test tests[] = { BPF_EXIT_INSN(), }, .fixup_map1 = { 4 }, - .errstr = "R4 invalid mem access", + .errstr = "R4 pointer arithmetic on PTR_TO_MAP_VALUE_OR_NULL", .result = REJECT, .prog_type = BPF_PROG_TYPE_SCHED_CLS }, @@ -5195,10 +5188,8 @@ static struct bpf_test tests[] = { BPF_EXIT_INSN(), }, .fixup_map2 = { 3 }, - .errstr_unpriv = "R0 bitwise operator &= on pointer", - .errstr = "invalid mem access 'inv'", + .errstr = "R0 bitwise operator &= on pointer", .result = REJECT, - .result_unpriv = REJECT, }, { "map element value illegal alu op, 2", @@ -5214,10 +5205,8 @@ static struct bpf_test tests[] = { BPF_EXIT_INSN(), }, .fixup_map2 = { 3 }, - .errstr_unpriv = "R0 32-bit pointer arithmetic prohibited", - .errstr = "invalid mem access 'inv'", + .errstr = "R0 32-bit pointer arithmetic prohibited", .result = REJECT, - .result_unpriv = REJECT, }, { "map element value illegal alu op, 3", @@ -5233,10 +5222,8 @@ static struct bpf_test tests[] = { BPF_EXIT_INSN(), }, .fixup_map2 = { 3 }, - .errstr_unpriv = "R0 pointer arithmetic with /= operator", - .errstr = "invalid mem access 'inv'", + .errstr = "R0 pointer arithmetic with /= operator", .result = REJECT, - .result_unpriv = REJECT, }, { "map element value illegal alu op, 4", @@ -6019,8 +6006,7 @@ static struct bpf_test tests[] = { BPF_EXIT_INSN(), }, .fixup_map_in_map = { 3 }, - .errstr = "R1 type=inv expected=map_ptr", - .errstr_unpriv = "R1 pointer arithmetic on CONST_PTR_TO_MAP prohibited", + .errstr = "R1 pointer arithmetic on CONST_PTR_TO_MAP prohibited", .result = REJECT, }, { @@ -6117,6 +6103,30 @@ static struct bpf_test tests[] = { .result = ACCEPT, }, { + "ld_abs: tests on r6 and skb data reload helper", + .insns = { + BPF_MOV64_REG(BPF_REG_6, BPF_REG_1), + BPF_LD_ABS(BPF_B, 0), + BPF_LD_ABS(BPF_H, 0), + BPF_LD_ABS(BPF_W, 0), + BPF_MOV64_REG(BPF_REG_7, BPF_REG_6), + BPF_MOV64_IMM(BPF_REG_6, 0), + BPF_MOV64_REG(BPF_REG_1, BPF_REG_7), + BPF_MOV64_IMM(BPF_REG_2, 1), + BPF_MOV64_IMM(BPF_REG_3, 2), + BPF_RAW_INSN(BPF_JMP | BPF_CALL, 0, 0, 0, + BPF_FUNC_skb_vlan_push), + BPF_MOV64_REG(BPF_REG_6, BPF_REG_7), + BPF_LD_ABS(BPF_B, 0), + BPF_LD_ABS(BPF_H, 0), + BPF_LD_ABS(BPF_W, 0), + BPF_MOV64_IMM(BPF_REG_0, 42), + BPF_EXIT_INSN(), + }, + .prog_type = BPF_PROG_TYPE_SCHED_CLS, + .result = ACCEPT, + }, + { "ld_ind: check calling conv, r1", .insns = { BPF_MOV64_REG(BPF_REG_6, BPF_REG_1), @@ -6300,7 +6310,7 @@ static struct bpf_test tests[] = { BPF_EXIT_INSN(), }, .fixup_map1 = { 3 }, - .errstr = "R0 min value is negative", + .errstr = "unbounded min value", .result = REJECT, }, { @@ -6324,7 +6334,7 @@ static struct bpf_test tests[] = { BPF_EXIT_INSN(), }, .fixup_map1 = { 3 }, - .errstr = "R0 min value is negative", + .errstr = "unbounded min value", .result = REJECT, }, { @@ -6350,7 +6360,7 @@ static struct bpf_test tests[] = { BPF_EXIT_INSN(), }, .fixup_map1 = { 3 }, - .errstr = "R8 invalid mem access 'inv'", + .errstr = "unbounded min value", .result = REJECT, }, { @@ -6375,7 +6385,7 @@ static struct bpf_test tests[] = { BPF_EXIT_INSN(), }, .fixup_map1 = { 3 }, - .errstr = "R8 invalid mem access 'inv'", + .errstr = "unbounded min value", .result = REJECT, }, { @@ -6423,7 +6433,7 @@ static struct bpf_test tests[] = { BPF_EXIT_INSN(), }, .fixup_map1 = { 3 }, - .errstr = "R0 min value is negative", + .errstr = "unbounded min value", .result = REJECT, }, { @@ -6494,7 +6504,7 @@ static struct bpf_test tests[] = { BPF_EXIT_INSN(), }, .fixup_map1 = { 3 }, - .errstr = "R0 min value is negative", + .errstr = "unbounded min value", .result = REJECT, }, { @@ -6545,7 +6555,7 @@ static struct bpf_test tests[] = { BPF_EXIT_INSN(), }, .fixup_map1 = { 3 }, - .errstr = "R0 min value is negative", + .errstr = "unbounded min value", .result = REJECT, }, { @@ -6572,7 +6582,7 @@ static struct bpf_test tests[] = { BPF_EXIT_INSN(), }, .fixup_map1 = { 3 }, - .errstr = "R0 min value is negative", + .errstr = "unbounded min value", .result = REJECT, }, { @@ -6598,7 +6608,7 @@ static struct bpf_test tests[] = { BPF_EXIT_INSN(), }, .fixup_map1 = { 3 }, - .errstr = "R0 min value is negative", + .errstr = "unbounded min value", .result = REJECT, }, { @@ -6627,7 +6637,7 @@ static struct bpf_test tests[] = { BPF_EXIT_INSN(), }, .fixup_map1 = { 3 }, - .errstr = "R0 min value is negative", + .errstr = "unbounded min value", .result = REJECT, }, { @@ -6657,7 +6667,7 @@ static struct bpf_test tests[] = { BPF_JMP_IMM(BPF_JA, 0, 0, -7), }, .fixup_map1 = { 4 }, - .errstr = "R0 min value is negative", + .errstr = "unbounded min value", .result = REJECT, }, { @@ -6685,8 +6695,7 @@ static struct bpf_test tests[] = { BPF_EXIT_INSN(), }, .fixup_map1 = { 3 }, - .errstr_unpriv = "R0 pointer comparison prohibited", - .errstr = "R0 min value is negative", + .errstr = "unbounded min value", .result = REJECT, .result_unpriv = REJECT, }, @@ -6742,6 +6751,462 @@ static struct bpf_test tests[] = { .result = REJECT, }, { + "bounds check based on zero-extended MOV", + .insns = { + BPF_ST_MEM(BPF_DW, BPF_REG_10, -8, 0), + BPF_MOV64_REG(BPF_REG_2, BPF_REG_10), + BPF_ALU64_IMM(BPF_ADD, BPF_REG_2, -8), + BPF_LD_MAP_FD(BPF_REG_1, 0), + BPF_RAW_INSN(BPF_JMP | BPF_CALL, 0, 0, 0, + BPF_FUNC_map_lookup_elem), + BPF_JMP_IMM(BPF_JEQ, BPF_REG_0, 0, 4), + /* r2 = 0x0000'0000'ffff'ffff */ + BPF_MOV32_IMM(BPF_REG_2, 0xffffffff), + /* r2 = 0 */ + BPF_ALU64_IMM(BPF_RSH, BPF_REG_2, 32), + /* no-op */ + BPF_ALU64_REG(BPF_ADD, BPF_REG_0, BPF_REG_2), + /* access at offset 0 */ + BPF_LDX_MEM(BPF_B, BPF_REG_0, BPF_REG_0, 0), + /* exit */ + BPF_MOV64_IMM(BPF_REG_0, 0), + BPF_EXIT_INSN(), + }, + .fixup_map1 = { 3 }, + .result = ACCEPT + }, + { + "bounds check based on sign-extended MOV. test1", + .insns = { + BPF_ST_MEM(BPF_DW, BPF_REG_10, -8, 0), + BPF_MOV64_REG(BPF_REG_2, BPF_REG_10), + BPF_ALU64_IMM(BPF_ADD, BPF_REG_2, -8), + BPF_LD_MAP_FD(BPF_REG_1, 0), + BPF_RAW_INSN(BPF_JMP | BPF_CALL, 0, 0, 0, + BPF_FUNC_map_lookup_elem), + BPF_JMP_IMM(BPF_JEQ, BPF_REG_0, 0, 4), + /* r2 = 0xffff'ffff'ffff'ffff */ + BPF_MOV64_IMM(BPF_REG_2, 0xffffffff), + /* r2 = 0xffff'ffff */ + BPF_ALU64_IMM(BPF_RSH, BPF_REG_2, 32), + /* r0 = <oob pointer> */ + BPF_ALU64_REG(BPF_ADD, BPF_REG_0, BPF_REG_2), + /* access to OOB pointer */ + BPF_LDX_MEM(BPF_B, BPF_REG_0, BPF_REG_0, 0), + /* exit */ + BPF_MOV64_IMM(BPF_REG_0, 0), + BPF_EXIT_INSN(), + }, + .fixup_map1 = { 3 }, + .errstr = "map_value pointer and 4294967295", + .result = REJECT + }, + { + "bounds check based on sign-extended MOV. test2", + .insns = { + BPF_ST_MEM(BPF_DW, BPF_REG_10, -8, 0), + BPF_MOV64_REG(BPF_REG_2, BPF_REG_10), + BPF_ALU64_IMM(BPF_ADD, BPF_REG_2, -8), + BPF_LD_MAP_FD(BPF_REG_1, 0), + BPF_RAW_INSN(BPF_JMP | BPF_CALL, 0, 0, 0, + BPF_FUNC_map_lookup_elem), + BPF_JMP_IMM(BPF_JEQ, BPF_REG_0, 0, 4), + /* r2 = 0xffff'ffff'ffff'ffff */ + BPF_MOV64_IMM(BPF_REG_2, 0xffffffff), + /* r2 = 0xfff'ffff */ + BPF_ALU64_IMM(BPF_RSH, BPF_REG_2, 36), + /* r0 = <oob pointer> */ + BPF_ALU64_REG(BPF_ADD, BPF_REG_0, BPF_REG_2), + /* access to OOB pointer */ + BPF_LDX_MEM(BPF_B, BPF_REG_0, BPF_REG_0, 0), + /* exit */ + BPF_MOV64_IMM(BPF_REG_0, 0), + BPF_EXIT_INSN(), + }, + .fixup_map1 = { 3 }, + .errstr = "R0 min value is outside of the array range", + .result = REJECT + }, + { + "bounds check based on reg_off + var_off + insn_off. test1", + .insns = { + BPF_LDX_MEM(BPF_W, BPF_REG_6, BPF_REG_1, + offsetof(struct __sk_buff, mark)), + BPF_ST_MEM(BPF_DW, BPF_REG_10, -8, 0), + BPF_MOV64_REG(BPF_REG_2, BPF_REG_10), + BPF_ALU64_IMM(BPF_ADD, BPF_REG_2, -8), + BPF_LD_MAP_FD(BPF_REG_1, 0), + BPF_RAW_INSN(BPF_JMP | BPF_CALL, 0, 0, 0, + BPF_FUNC_map_lookup_elem), + BPF_JMP_IMM(BPF_JEQ, BPF_REG_0, 0, 4), + BPF_ALU64_IMM(BPF_AND, BPF_REG_6, 1), + BPF_ALU64_IMM(BPF_ADD, BPF_REG_6, (1 << 29) - 1), + BPF_ALU64_REG(BPF_ADD, BPF_REG_0, BPF_REG_6), + BPF_ALU64_IMM(BPF_ADD, BPF_REG_0, (1 << 29) - 1), + BPF_LDX_MEM(BPF_B, BPF_REG_0, BPF_REG_0, 3), + BPF_MOV64_IMM(BPF_REG_0, 0), + BPF_EXIT_INSN(), + }, + .fixup_map1 = { 4 }, + .errstr = "value_size=8 off=1073741825", + .result = REJECT, + .prog_type = BPF_PROG_TYPE_SCHED_CLS, + }, + { + "bounds check based on reg_off + var_off + insn_off. test2", + .insns = { + BPF_LDX_MEM(BPF_W, BPF_REG_6, BPF_REG_1, + offsetof(struct __sk_buff, mark)), + BPF_ST_MEM(BPF_DW, BPF_REG_10, -8, 0), + BPF_MOV64_REG(BPF_REG_2, BPF_REG_10), + BPF_ALU64_IMM(BPF_ADD, BPF_REG_2, -8), + BPF_LD_MAP_FD(BPF_REG_1, 0), + BPF_RAW_INSN(BPF_JMP | BPF_CALL, 0, 0, 0, + BPF_FUNC_map_lookup_elem), + BPF_JMP_IMM(BPF_JEQ, BPF_REG_0, 0, 4), + BPF_ALU64_IMM(BPF_AND, BPF_REG_6, 1), + BPF_ALU64_IMM(BPF_ADD, BPF_REG_6, (1 << 30) - 1), + BPF_ALU64_REG(BPF_ADD, BPF_REG_0, BPF_REG_6), + BPF_ALU64_IMM(BPF_ADD, BPF_REG_0, (1 << 29) - 1), + BPF_LDX_MEM(BPF_B, BPF_REG_0, BPF_REG_0, 3), + BPF_MOV64_IMM(BPF_REG_0, 0), + BPF_EXIT_INSN(), + }, + .fixup_map1 = { 4 }, + .errstr = "value 1073741823", + .result = REJECT, + .prog_type = BPF_PROG_TYPE_SCHED_CLS, + }, + { + "bounds check after truncation of non-boundary-crossing range", + .insns = { + BPF_ST_MEM(BPF_DW, BPF_REG_10, -8, 0), + BPF_MOV64_REG(BPF_REG_2, BPF_REG_10), + BPF_ALU64_IMM(BPF_ADD, BPF_REG_2, -8), + BPF_LD_MAP_FD(BPF_REG_1, 0), + BPF_RAW_INSN(BPF_JMP | BPF_CALL, 0, 0, 0, + BPF_FUNC_map_lookup_elem), + BPF_JMP_IMM(BPF_JEQ, BPF_REG_0, 0, 9), + /* r1 = [0x00, 0xff] */ + BPF_LDX_MEM(BPF_B, BPF_REG_1, BPF_REG_0, 0), + BPF_MOV64_IMM(BPF_REG_2, 1), + /* r2 = 0x10'0000'0000 */ + BPF_ALU64_IMM(BPF_LSH, BPF_REG_2, 36), + /* r1 = [0x10'0000'0000, 0x10'0000'00ff] */ + BPF_ALU64_REG(BPF_ADD, BPF_REG_1, BPF_REG_2), + /* r1 = [0x10'7fff'ffff, 0x10'8000'00fe] */ + BPF_ALU64_IMM(BPF_ADD, BPF_REG_1, 0x7fffffff), + /* r1 = [0x00, 0xff] */ + BPF_ALU32_IMM(BPF_SUB, BPF_REG_1, 0x7fffffff), + /* r1 = 0 */ + BPF_ALU64_IMM(BPF_RSH, BPF_REG_1, 8), + /* no-op */ + BPF_ALU64_REG(BPF_ADD, BPF_REG_0, BPF_REG_1), + /* access at offset 0 */ + BPF_LDX_MEM(BPF_B, BPF_REG_0, BPF_REG_0, 0), + /* exit */ + BPF_MOV64_IMM(BPF_REG_0, 0), + BPF_EXIT_INSN(), + }, + .fixup_map1 = { 3 }, + .result = ACCEPT + }, + { + "bounds check after truncation of boundary-crossing range (1)", + .insns = { + BPF_ST_MEM(BPF_DW, BPF_REG_10, -8, 0), + BPF_MOV64_REG(BPF_REG_2, BPF_REG_10), + BPF_ALU64_IMM(BPF_ADD, BPF_REG_2, -8), + BPF_LD_MAP_FD(BPF_REG_1, 0), + BPF_RAW_INSN(BPF_JMP | BPF_CALL, 0, 0, 0, + BPF_FUNC_map_lookup_elem), + BPF_JMP_IMM(BPF_JEQ, BPF_REG_0, 0, 9), + /* r1 = [0x00, 0xff] */ + BPF_LDX_MEM(BPF_B, BPF_REG_1, BPF_REG_0, 0), + BPF_ALU64_IMM(BPF_ADD, BPF_REG_1, 0xffffff80 >> 1), + /* r1 = [0xffff'ff80, 0x1'0000'007f] */ + BPF_ALU64_IMM(BPF_ADD, BPF_REG_1, 0xffffff80 >> 1), + /* r1 = [0xffff'ff80, 0xffff'ffff] or + * [0x0000'0000, 0x0000'007f] + */ + BPF_ALU32_IMM(BPF_ADD, BPF_REG_1, 0), + BPF_ALU64_IMM(BPF_SUB, BPF_REG_1, 0xffffff80 >> 1), + /* r1 = [0x00, 0xff] or + * [0xffff'ffff'0000'0080, 0xffff'ffff'ffff'ffff] + */ + BPF_ALU64_IMM(BPF_SUB, BPF_REG_1, 0xffffff80 >> 1), + /* r1 = 0 or + * [0x00ff'ffff'ff00'0000, 0x00ff'ffff'ffff'ffff] + */ + BPF_ALU64_IMM(BPF_RSH, BPF_REG_1, 8), + /* no-op or OOB pointer computation */ + BPF_ALU64_REG(BPF_ADD, BPF_REG_0, BPF_REG_1), + /* potentially OOB access */ + BPF_LDX_MEM(BPF_B, BPF_REG_0, BPF_REG_0, 0), + /* exit */ + BPF_MOV64_IMM(BPF_REG_0, 0), + BPF_EXIT_INSN(), + }, + .fixup_map1 = { 3 }, + /* not actually fully unbounded, but the bound is very high */ + .errstr = "R0 unbounded memory access", + .result = REJECT + }, + { + "bounds check after truncation of boundary-crossing range (2)", + .insns = { + BPF_ST_MEM(BPF_DW, BPF_REG_10, -8, 0), + BPF_MOV64_REG(BPF_REG_2, BPF_REG_10), + BPF_ALU64_IMM(BPF_ADD, BPF_REG_2, -8), + BPF_LD_MAP_FD(BPF_REG_1, 0), + BPF_RAW_INSN(BPF_JMP | BPF_CALL, 0, 0, 0, + BPF_FUNC_map_lookup_elem), + BPF_JMP_IMM(BPF_JEQ, BPF_REG_0, 0, 9), + /* r1 = [0x00, 0xff] */ + BPF_LDX_MEM(BPF_B, BPF_REG_1, BPF_REG_0, 0), + BPF_ALU64_IMM(BPF_ADD, BPF_REG_1, 0xffffff80 >> 1), + /* r1 = [0xffff'ff80, 0x1'0000'007f] */ + BPF_ALU64_IMM(BPF_ADD, BPF_REG_1, 0xffffff80 >> 1), + /* r1 = [0xffff'ff80, 0xffff'ffff] or + * [0x0000'0000, 0x0000'007f] + * difference to previous test: truncation via MOV32 + * instead of ALU32. + */ + BPF_MOV32_REG(BPF_REG_1, BPF_REG_1), + BPF_ALU64_IMM(BPF_SUB, BPF_REG_1, 0xffffff80 >> 1), + /* r1 = [0x00, 0xff] or + * [0xffff'ffff'0000'0080, 0xffff'ffff'ffff'ffff] + */ + BPF_ALU64_IMM(BPF_SUB, BPF_REG_1, 0xffffff80 >> 1), + /* r1 = 0 or + * [0x00ff'ffff'ff00'0000, 0x00ff'ffff'ffff'ffff] + */ + BPF_ALU64_IMM(BPF_RSH, BPF_REG_1, 8), + /* no-op or OOB pointer computation */ + BPF_ALU64_REG(BPF_ADD, BPF_REG_0, BPF_REG_1), + /* potentially OOB access */ + BPF_LDX_MEM(BPF_B, BPF_REG_0, BPF_REG_0, 0), + /* exit */ + BPF_MOV64_IMM(BPF_REG_0, 0), + BPF_EXIT_INSN(), + }, + .fixup_map1 = { 3 }, + /* not actually fully unbounded, but the bound is very high */ + .errstr = "R0 unbounded memory access", + .result = REJECT + }, + { + "bounds check after wrapping 32-bit addition", + .insns = { + BPF_ST_MEM(BPF_DW, BPF_REG_10, -8, 0), + BPF_MOV64_REG(BPF_REG_2, BPF_REG_10), + BPF_ALU64_IMM(BPF_ADD, BPF_REG_2, -8), + BPF_LD_MAP_FD(BPF_REG_1, 0), + BPF_RAW_INSN(BPF_JMP | BPF_CALL, 0, 0, 0, + BPF_FUNC_map_lookup_elem), + BPF_JMP_IMM(BPF_JEQ, BPF_REG_0, 0, 5), + /* r1 = 0x7fff'ffff */ + BPF_MOV64_IMM(BPF_REG_1, 0x7fffffff), + /* r1 = 0xffff'fffe */ + BPF_ALU64_IMM(BPF_ADD, BPF_REG_1, 0x7fffffff), + /* r1 = 0 */ + BPF_ALU32_IMM(BPF_ADD, BPF_REG_1, 2), + /* no-op */ + BPF_ALU64_REG(BPF_ADD, BPF_REG_0, BPF_REG_1), + /* access at offset 0 */ + BPF_LDX_MEM(BPF_B, BPF_REG_0, BPF_REG_0, 0), + /* exit */ + BPF_MOV64_IMM(BPF_REG_0, 0), + BPF_EXIT_INSN(), + }, + .fixup_map1 = { 3 }, + .result = ACCEPT + }, + { + "bounds check after shift with oversized count operand", + .insns = { + BPF_ST_MEM(BPF_DW, BPF_REG_10, -8, 0), + BPF_MOV64_REG(BPF_REG_2, BPF_REG_10), + BPF_ALU64_IMM(BPF_ADD, BPF_REG_2, -8), + BPF_LD_MAP_FD(BPF_REG_1, 0), + BPF_RAW_INSN(BPF_JMP | BPF_CALL, 0, 0, 0, + BPF_FUNC_map_lookup_elem), + BPF_JMP_IMM(BPF_JEQ, BPF_REG_0, 0, 6), + BPF_MOV64_IMM(BPF_REG_2, 32), + BPF_MOV64_IMM(BPF_REG_1, 1), + /* r1 = (u32)1 << (u32)32 = ? */ + BPF_ALU32_REG(BPF_LSH, BPF_REG_1, BPF_REG_2), + /* r1 = [0x0000, 0xffff] */ + BPF_ALU64_IMM(BPF_AND, BPF_REG_1, 0xffff), + /* computes unknown pointer, potentially OOB */ + BPF_ALU64_REG(BPF_ADD, BPF_REG_0, BPF_REG_1), + /* potentially OOB access */ + BPF_LDX_MEM(BPF_B, BPF_REG_0, BPF_REG_0, 0), + /* exit */ + BPF_MOV64_IMM(BPF_REG_0, 0), + BPF_EXIT_INSN(), + }, + .fixup_map1 = { 3 }, + .errstr = "R0 max value is outside of the array range", + .result = REJECT + }, + { + "bounds check after right shift of maybe-negative number", + .insns = { + BPF_ST_MEM(BPF_DW, BPF_REG_10, -8, 0), + BPF_MOV64_REG(BPF_REG_2, BPF_REG_10), + BPF_ALU64_IMM(BPF_ADD, BPF_REG_2, -8), + BPF_LD_MAP_FD(BPF_REG_1, 0), + BPF_RAW_INSN(BPF_JMP | BPF_CALL, 0, 0, 0, + BPF_FUNC_map_lookup_elem), + BPF_JMP_IMM(BPF_JEQ, BPF_REG_0, 0, 6), + /* r1 = [0x00, 0xff] */ + BPF_LDX_MEM(BPF_B, BPF_REG_1, BPF_REG_0, 0), + /* r1 = [-0x01, 0xfe] */ + BPF_ALU64_IMM(BPF_SUB, BPF_REG_1, 1), + /* r1 = 0 or 0xff'ffff'ffff'ffff */ + BPF_ALU64_IMM(BPF_RSH, BPF_REG_1, 8), + /* r1 = 0 or 0xffff'ffff'ffff */ + BPF_ALU64_IMM(BPF_RSH, BPF_REG_1, 8), + /* computes unknown pointer, potentially OOB */ + BPF_ALU64_REG(BPF_ADD, BPF_REG_0, BPF_REG_1), + /* potentially OOB access */ + BPF_LDX_MEM(BPF_B, BPF_REG_0, BPF_REG_0, 0), + /* exit */ + BPF_MOV64_IMM(BPF_REG_0, 0), + BPF_EXIT_INSN(), + }, + .fixup_map1 = { 3 }, + .errstr = "R0 unbounded memory access", + .result = REJECT + }, + { + "bounds check map access with off+size signed 32bit overflow. test1", + .insns = { + BPF_ST_MEM(BPF_DW, BPF_REG_10, -8, 0), + BPF_MOV64_REG(BPF_REG_2, BPF_REG_10), + BPF_ALU64_IMM(BPF_ADD, BPF_REG_2, -8), + BPF_LD_MAP_FD(BPF_REG_1, 0), + BPF_RAW_INSN(BPF_JMP | BPF_CALL, 0, 0, 0, + BPF_FUNC_map_lookup_elem), + BPF_JMP_IMM(BPF_JNE, BPF_REG_0, 0, 1), + BPF_EXIT_INSN(), + BPF_ALU64_IMM(BPF_ADD, BPF_REG_0, 0x7ffffffe), + BPF_LDX_MEM(BPF_DW, BPF_REG_0, BPF_REG_0, 0), + BPF_JMP_A(0), + BPF_EXIT_INSN(), + }, + .fixup_map1 = { 3 }, + .errstr = "map_value pointer and 2147483646", + .result = REJECT + }, + { + "bounds check map access with off+size signed 32bit overflow. test2", + .insns = { + BPF_ST_MEM(BPF_DW, BPF_REG_10, -8, 0), + BPF_MOV64_REG(BPF_REG_2, BPF_REG_10), + BPF_ALU64_IMM(BPF_ADD, BPF_REG_2, -8), + BPF_LD_MAP_FD(BPF_REG_1, 0), + BPF_RAW_INSN(BPF_JMP | BPF_CALL, 0, 0, 0, + BPF_FUNC_map_lookup_elem), + BPF_JMP_IMM(BPF_JNE, BPF_REG_0, 0, 1), + BPF_EXIT_INSN(), + BPF_ALU64_IMM(BPF_ADD, BPF_REG_0, 0x1fffffff), + BPF_ALU64_IMM(BPF_ADD, BPF_REG_0, 0x1fffffff), + BPF_ALU64_IMM(BPF_ADD, BPF_REG_0, 0x1fffffff), + BPF_LDX_MEM(BPF_DW, BPF_REG_0, BPF_REG_0, 0), + BPF_JMP_A(0), + BPF_EXIT_INSN(), + }, + .fixup_map1 = { 3 }, + .errstr = "pointer offset 1073741822", + .result = REJECT + }, + { + "bounds check map access with off+size signed 32bit overflow. test3", + .insns = { + BPF_ST_MEM(BPF_DW, BPF_REG_10, -8, 0), + BPF_MOV64_REG(BPF_REG_2, BPF_REG_10), + BPF_ALU64_IMM(BPF_ADD, BPF_REG_2, -8), + BPF_LD_MAP_FD(BPF_REG_1, 0), + BPF_RAW_INSN(BPF_JMP | BPF_CALL, 0, 0, 0, + BPF_FUNC_map_lookup_elem), + BPF_JMP_IMM(BPF_JNE, BPF_REG_0, 0, 1), + BPF_EXIT_INSN(), + BPF_ALU64_IMM(BPF_SUB, BPF_REG_0, 0x1fffffff), + BPF_ALU64_IMM(BPF_SUB, BPF_REG_0, 0x1fffffff), + BPF_LDX_MEM(BPF_DW, BPF_REG_0, BPF_REG_0, 2), + BPF_JMP_A(0), + BPF_EXIT_INSN(), + }, + .fixup_map1 = { 3 }, + .errstr = "pointer offset -1073741822", + .result = REJECT + }, + { + "bounds check map access with off+size signed 32bit overflow. test4", + .insns = { + BPF_ST_MEM(BPF_DW, BPF_REG_10, -8, 0), + BPF_MOV64_REG(BPF_REG_2, BPF_REG_10), + BPF_ALU64_IMM(BPF_ADD, BPF_REG_2, -8), + BPF_LD_MAP_FD(BPF_REG_1, 0), + BPF_RAW_INSN(BPF_JMP | BPF_CALL, 0, 0, 0, + BPF_FUNC_map_lookup_elem), + BPF_JMP_IMM(BPF_JNE, BPF_REG_0, 0, 1), + BPF_EXIT_INSN(), + BPF_MOV64_IMM(BPF_REG_1, 1000000), + BPF_ALU64_IMM(BPF_MUL, BPF_REG_1, 1000000), + BPF_ALU64_REG(BPF_ADD, BPF_REG_0, BPF_REG_1), + BPF_LDX_MEM(BPF_DW, BPF_REG_0, BPF_REG_0, 2), + BPF_JMP_A(0), + BPF_EXIT_INSN(), + }, + .fixup_map1 = { 3 }, + .errstr = "map_value pointer and 1000000000000", + .result = REJECT + }, + { + "pointer/scalar confusion in state equality check (way 1)", + .insns = { + BPF_ST_MEM(BPF_DW, BPF_REG_10, -8, 0), + BPF_MOV64_REG(BPF_REG_2, BPF_REG_10), + BPF_ALU64_IMM(BPF_ADD, BPF_REG_2, -8), + BPF_LD_MAP_FD(BPF_REG_1, 0), + BPF_RAW_INSN(BPF_JMP | BPF_CALL, 0, 0, 0, + BPF_FUNC_map_lookup_elem), + BPF_JMP_IMM(BPF_JEQ, BPF_REG_0, 0, 2), + BPF_LDX_MEM(BPF_DW, BPF_REG_0, BPF_REG_0, 0), + BPF_JMP_A(1), + BPF_MOV64_REG(BPF_REG_0, BPF_REG_10), + BPF_JMP_A(0), + BPF_EXIT_INSN(), + }, + .fixup_map1 = { 3 }, + .result = ACCEPT, + .result_unpriv = REJECT, + .errstr_unpriv = "R0 leaks addr as return value" + }, + { + "pointer/scalar confusion in state equality check (way 2)", + .insns = { + BPF_ST_MEM(BPF_DW, BPF_REG_10, -8, 0), + BPF_MOV64_REG(BPF_REG_2, BPF_REG_10), + BPF_ALU64_IMM(BPF_ADD, BPF_REG_2, -8), + BPF_LD_MAP_FD(BPF_REG_1, 0), + BPF_RAW_INSN(BPF_JMP | BPF_CALL, 0, 0, 0, + BPF_FUNC_map_lookup_elem), + BPF_JMP_IMM(BPF_JNE, BPF_REG_0, 0, 2), + BPF_MOV64_REG(BPF_REG_0, BPF_REG_10), + BPF_JMP_A(1), + BPF_LDX_MEM(BPF_DW, BPF_REG_0, BPF_REG_0, 0), + BPF_EXIT_INSN(), + }, + .fixup_map1 = { 3 }, + .result = ACCEPT, + .result_unpriv = REJECT, + .errstr_unpriv = "R0 leaks addr as return value" + }, + { "variable-offset ctx access", .insns = { /* Get an unknown value */ @@ -6783,6 +7248,71 @@ static struct bpf_test tests[] = { .prog_type = BPF_PROG_TYPE_LWT_IN, }, { + "indirect variable-offset stack access", + .insns = { + /* Fill the top 8 bytes of the stack */ + BPF_ST_MEM(BPF_DW, BPF_REG_10, -8, 0), + /* Get an unknown value */ + BPF_LDX_MEM(BPF_W, BPF_REG_2, BPF_REG_1, 0), + /* Make it small and 4-byte aligned */ + BPF_ALU64_IMM(BPF_AND, BPF_REG_2, 4), + BPF_ALU64_IMM(BPF_SUB, BPF_REG_2, 8), + /* add it to fp. We now have either fp-4 or fp-8, but + * we don't know which + */ + BPF_ALU64_REG(BPF_ADD, BPF_REG_2, BPF_REG_10), + /* dereference it indirectly */ + BPF_LD_MAP_FD(BPF_REG_1, 0), + BPF_RAW_INSN(BPF_JMP | BPF_CALL, 0, 0, 0, + BPF_FUNC_map_lookup_elem), + BPF_MOV64_IMM(BPF_REG_0, 0), + BPF_EXIT_INSN(), + }, + .fixup_map1 = { 5 }, + .errstr = "variable stack read R2", + .result = REJECT, + .prog_type = BPF_PROG_TYPE_LWT_IN, + }, + { + "direct stack access with 32-bit wraparound. test1", + .insns = { + BPF_MOV64_REG(BPF_REG_1, BPF_REG_10), + BPF_ALU64_IMM(BPF_ADD, BPF_REG_1, 0x7fffffff), + BPF_ALU64_IMM(BPF_ADD, BPF_REG_1, 0x7fffffff), + BPF_MOV32_IMM(BPF_REG_0, 0), + BPF_STX_MEM(BPF_B, BPF_REG_1, BPF_REG_0, 0), + BPF_EXIT_INSN() + }, + .errstr = "fp pointer and 2147483647", + .result = REJECT + }, + { + "direct stack access with 32-bit wraparound. test2", + .insns = { + BPF_MOV64_REG(BPF_REG_1, BPF_REG_10), + BPF_ALU64_IMM(BPF_ADD, BPF_REG_1, 0x3fffffff), + BPF_ALU64_IMM(BPF_ADD, BPF_REG_1, 0x3fffffff), + BPF_MOV32_IMM(BPF_REG_0, 0), + BPF_STX_MEM(BPF_B, BPF_REG_1, BPF_REG_0, 0), + BPF_EXIT_INSN() + }, + .errstr = "fp pointer and 1073741823", + .result = REJECT + }, + { + "direct stack access with 32-bit wraparound. test3", + .insns = { + BPF_MOV64_REG(BPF_REG_1, BPF_REG_10), + BPF_ALU64_IMM(BPF_ADD, BPF_REG_1, 0x1fffffff), + BPF_ALU64_IMM(BPF_ADD, BPF_REG_1, 0x1fffffff), + BPF_MOV32_IMM(BPF_REG_0, 0), + BPF_STX_MEM(BPF_B, BPF_REG_1, BPF_REG_0, 0), + BPF_EXIT_INSN() + }, + .errstr = "fp pointer offset 1073741822", + .result = REJECT + }, + { "liveness pruning and write screening", .insns = { /* Get an unknown value */ @@ -7104,6 +7634,19 @@ static struct bpf_test tests[] = { .prog_type = BPF_PROG_TYPE_SCHED_CLS, }, { + "pkt_end - pkt_start is allowed", + .insns = { + BPF_LDX_MEM(BPF_W, BPF_REG_0, BPF_REG_1, + offsetof(struct __sk_buff, data_end)), + BPF_LDX_MEM(BPF_W, BPF_REG_2, BPF_REG_1, + offsetof(struct __sk_buff, data)), + BPF_ALU64_REG(BPF_SUB, BPF_REG_0, BPF_REG_2), + BPF_EXIT_INSN(), + }, + .result = ACCEPT, + .prog_type = BPF_PROG_TYPE_SCHED_CLS, + }, + { "XDP pkt read, pkt_end mangling, bad access 1", .insns = { BPF_LDX_MEM(BPF_W, BPF_REG_2, BPF_REG_1, @@ -7118,7 +7661,7 @@ static struct bpf_test tests[] = { BPF_MOV64_IMM(BPF_REG_0, 0), BPF_EXIT_INSN(), }, - .errstr = "R1 offset is outside of the packet", + .errstr = "R3 pointer arithmetic on PTR_TO_PACKET_END", .result = REJECT, .prog_type = BPF_PROG_TYPE_XDP, }, @@ -7137,7 +7680,7 @@ static struct bpf_test tests[] = { BPF_MOV64_IMM(BPF_REG_0, 0), BPF_EXIT_INSN(), }, - .errstr = "R1 offset is outside of the packet", + .errstr = "R3 pointer arithmetic on PTR_TO_PACKET_END", .result = REJECT, .prog_type = BPF_PROG_TYPE_XDP, }, diff --git a/tools/testing/selftests/net/config b/tools/testing/selftests/net/config index e57b4ac40e72..7177bea1fdfa 100644 --- a/tools/testing/selftests/net/config +++ b/tools/testing/selftests/net/config @@ -1,3 +1,4 @@ CONFIG_USER_NS=y CONFIG_BPF_SYSCALL=y CONFIG_TEST_BPF=m +CONFIG_NUMA=y diff --git a/tools/testing/selftests/x86/ldt_gdt.c b/tools/testing/selftests/x86/ldt_gdt.c index 66e5ce5b91f0..0304ffb714f2 100644 --- a/tools/testing/selftests/x86/ldt_gdt.c +++ b/tools/testing/selftests/x86/ldt_gdt.c @@ -627,13 +627,10 @@ static void do_multicpu_tests(void) static int finish_exec_test(void) { /* - * In a sensible world, this would be check_invalid_segment(0, 1); - * For better or for worse, though, the LDT is inherited across exec. - * We can probably change this safely, but for now we test it. + * Older kernel versions did inherit the LDT on exec() which is + * wrong because exec() starts from a clean state. */ - check_valid_segment(0, 1, - AR_DPL3 | AR_TYPE_XRCODE | AR_S | AR_P | AR_DB, - 42, true); + check_invalid_segment(0, 1); return nerrs ? 1 : 0; } diff --git a/virt/kvm/arm/arch_timer.c b/virt/kvm/arm/arch_timer.c index f9555b1e7f15..cc29a8148328 100644 --- a/virt/kvm/arm/arch_timer.c +++ b/virt/kvm/arm/arch_timer.c @@ -92,16 +92,23 @@ static irqreturn_t kvm_arch_timer_handler(int irq, void *dev_id) { struct kvm_vcpu *vcpu = *(struct kvm_vcpu **)dev_id; struct arch_timer_context *vtimer; + u32 cnt_ctl; - if (!vcpu) { - pr_warn_once("Spurious arch timer IRQ on non-VCPU thread\n"); - return IRQ_NONE; - } - vtimer = vcpu_vtimer(vcpu); + /* + * We may see a timer interrupt after vcpu_put() has been called which + * sets the CPU's vcpu pointer to NULL, because even though the timer + * has been disabled in vtimer_save_state(), the hardware interrupt + * signal may not have been retired from the interrupt controller yet. + */ + if (!vcpu) + return IRQ_HANDLED; + vtimer = vcpu_vtimer(vcpu); if (!vtimer->irq.level) { - vtimer->cnt_ctl = read_sysreg_el0(cntv_ctl); - if (kvm_timer_irq_can_fire(vtimer)) + cnt_ctl = read_sysreg_el0(cntv_ctl); + cnt_ctl &= ARCH_TIMER_CTRL_ENABLE | ARCH_TIMER_CTRL_IT_STAT | + ARCH_TIMER_CTRL_IT_MASK; + if (cnt_ctl == (ARCH_TIMER_CTRL_ENABLE | ARCH_TIMER_CTRL_IT_STAT)) kvm_timer_update_irq(vcpu, true, vtimer); } @@ -355,6 +362,7 @@ static void vtimer_save_state(struct kvm_vcpu *vcpu) /* Disable the virtual timer */ write_sysreg_el0(0, cntv_ctl); + isb(); vtimer->loaded = false; out: @@ -720,7 +728,7 @@ static int kvm_timer_dying_cpu(unsigned int cpu) return 0; } -int kvm_timer_hyp_init(void) +int kvm_timer_hyp_init(bool has_gic) { struct arch_timer_kvm_info *info; int err; @@ -756,10 +764,13 @@ int kvm_timer_hyp_init(void) return err; } - err = irq_set_vcpu_affinity(host_vtimer_irq, kvm_get_running_vcpus()); - if (err) { - kvm_err("kvm_arch_timer: error setting vcpu affinity\n"); - goto out_free_irq; + if (has_gic) { + err = irq_set_vcpu_affinity(host_vtimer_irq, + kvm_get_running_vcpus()); + if (err) { + kvm_err("kvm_arch_timer: error setting vcpu affinity\n"); + goto out_free_irq; + } } kvm_info("virtual timer IRQ%d\n", host_vtimer_irq); @@ -835,10 +846,7 @@ int kvm_timer_enable(struct kvm_vcpu *vcpu) no_vgic: preempt_disable(); timer->enabled = 1; - if (!irqchip_in_kernel(vcpu->kvm)) - kvm_timer_vcpu_load_user(vcpu); - else - kvm_timer_vcpu_load_vgic(vcpu); + kvm_timer_vcpu_load(vcpu); preempt_enable(); return 0; diff --git a/virt/kvm/arm/arm.c b/virt/kvm/arm/arm.c index 6b60c98a6e22..2e43f9d42bd5 100644 --- a/virt/kvm/arm/arm.c +++ b/virt/kvm/arm/arm.c @@ -1326,7 +1326,7 @@ static int init_subsystems(void) /* * Init HYP architected timer support */ - err = kvm_timer_hyp_init(); + err = kvm_timer_hyp_init(vgic_present); if (err) goto out; diff --git a/virt/kvm/arm/mmio.c b/virt/kvm/arm/mmio.c index b6e715fd3c90..dac7ceb1a677 100644 --- a/virt/kvm/arm/mmio.c +++ b/virt/kvm/arm/mmio.c @@ -112,7 +112,7 @@ int kvm_handle_mmio_return(struct kvm_vcpu *vcpu, struct kvm_run *run) } trace_kvm_mmio(KVM_TRACE_MMIO_READ, len, run->mmio.phys_addr, - data); + &data); data = vcpu_data_host_to_guest(vcpu, data, len); vcpu_set_reg(vcpu, vcpu->arch.mmio_decode.rt, data); } @@ -182,14 +182,14 @@ int io_mem_abort(struct kvm_vcpu *vcpu, struct kvm_run *run, data = vcpu_data_guest_to_host(vcpu, vcpu_get_reg(vcpu, rt), len); - trace_kvm_mmio(KVM_TRACE_MMIO_WRITE, len, fault_ipa, data); + trace_kvm_mmio(KVM_TRACE_MMIO_WRITE, len, fault_ipa, &data); kvm_mmio_write_buf(data_buf, len, data); ret = kvm_io_bus_write(vcpu, KVM_MMIO_BUS, fault_ipa, len, data_buf); } else { trace_kvm_mmio(KVM_TRACE_MMIO_READ_UNSATISFIED, len, - fault_ipa, 0); + fault_ipa, NULL); ret = kvm_io_bus_read(vcpu, KVM_MMIO_BUS, fault_ipa, len, data_buf); diff --git a/virt/kvm/arm/mmu.c b/virt/kvm/arm/mmu.c index b36945d49986..b4b69c2d1012 100644 --- a/virt/kvm/arm/mmu.c +++ b/virt/kvm/arm/mmu.c @@ -509,8 +509,6 @@ static void unmap_hyp_range(pgd_t *pgdp, phys_addr_t start, u64 size) */ void free_hyp_pgds(void) { - unsigned long addr; - mutex_lock(&kvm_hyp_pgd_mutex); if (boot_hyp_pgd) { @@ -521,10 +519,10 @@ void free_hyp_pgds(void) if (hyp_pgd) { unmap_hyp_range(hyp_pgd, hyp_idmap_start, PAGE_SIZE); - for (addr = PAGE_OFFSET; virt_addr_valid(addr); addr += PGDIR_SIZE) - unmap_hyp_range(hyp_pgd, kern_hyp_va(addr), PGDIR_SIZE); - for (addr = VMALLOC_START; is_vmalloc_addr((void*)addr); addr += PGDIR_SIZE) - unmap_hyp_range(hyp_pgd, kern_hyp_va(addr), PGDIR_SIZE); + unmap_hyp_range(hyp_pgd, kern_hyp_va(PAGE_OFFSET), + (uintptr_t)high_memory - PAGE_OFFSET); + unmap_hyp_range(hyp_pgd, kern_hyp_va(VMALLOC_START), + VMALLOC_END - VMALLOC_START); free_pages((unsigned long)hyp_pgd, hyp_pgd_order); hyp_pgd = NULL; |