Merge tag 'for-linus' of git://git.kernel.org/pub/scm/virt/kvm/kvm
authorLinus Torvalds <torvalds@linux-foundation.org>
Tue, 2 Aug 2016 20:11:27 +0000 (16:11 -0400)
committerLinus Torvalds <torvalds@linux-foundation.org>
Tue, 2 Aug 2016 20:11:27 +0000 (16:11 -0400)
Pull KVM updates from Paolo Bonzini:

 - ARM: GICv3 ITS emulation and various fixes.  Removal of the
   old VGIC implementation.

 - s390: support for trapping software breakpoints, nested
   virtualization (vSIE), the STHYI opcode, initial extensions
   for CPU model support.

 - MIPS: support for MIPS64 hosts (32-bit guests only) and lots
   of cleanups, preliminary to this and the upcoming support for
   hardware virtualization extensions.

 - x86: support for execute-only mappings in nested EPT; reduced
   vmexit latency for TSC deadline timer (by about 30%) on Intel
   hosts; support for more than 255 vCPUs.

 - PPC: bugfixes.

* tag 'for-linus' of git://git.kernel.org/pub/scm/virt/kvm/kvm: (302 commits)
  KVM: PPC: Introduce KVM_CAP_PPC_HTM
  MIPS: Select HAVE_KVM for MIPS64_R{2,6}
  MIPS: KVM: Reset CP0_PageMask during host TLB flush
  MIPS: KVM: Fix ptr->int cast via KVM_GUEST_KSEGX()
  MIPS: KVM: Sign extend MFC0/RDHWR results
  MIPS: KVM: Fix 64-bit big endian dynamic translation
  MIPS: KVM: Fail if ebase doesn't fit in CP0_EBase
  MIPS: KVM: Use 64-bit CP0_EBase when appropriate
  MIPS: KVM: Set CP0_Status.KX on MIPS64
  MIPS: KVM: Make entry code MIPS64 friendly
  MIPS: KVM: Use kmap instead of CKSEG0ADDR()
  MIPS: KVM: Use virt_to_phys() to get commpage PFN
  MIPS: Fix definition of KSEGX() for 64-bit
  KVM: VMX: Add VMCS to CPU's loaded VMCSs before VMPTRLD
  kvm: x86: nVMX: maintain internal copy of current VMCS
  KVM: PPC: Book3S HV: Save/restore TM state in H_CEDE
  KVM: PPC: Book3S HV: Pull out TM state save/restore into separate procedures
  KVM: arm64: vgic-its: Simplify MAPI error handling
  KVM: arm64: vgic-its: Make vgic_its_cmd_handle_mapi similar to other handlers
  KVM: arm64: vgic-its: Turn device_id validation into generic ID validation
  ...

40 files changed:
1  2 
arch/arm/include/asm/pgtable.h
arch/arm/kvm/arm.c
arch/arm64/include/asm/cpufeature.h
arch/arm64/include/asm/virt.h
arch/arm64/kernel/cpufeature.c
arch/arm64/kvm/hyp/switch.c
arch/powerpc/include/asm/paca.h
arch/powerpc/kernel/Makefile
arch/powerpc/kernel/exceptions-64s.S
arch/powerpc/kernel/idle_book3s.S
arch/powerpc/kernel/traps.c
arch/powerpc/kvm/book3s_hv_rmhandlers.S
arch/powerpc/kvm/book3s_pr.c
arch/powerpc/platforms/powernv/opal-wrappers.S
arch/s390/include/asm/diag.h
arch/s390/include/asm/kvm_host.h
arch/s390/include/asm/mmu.h
arch/s390/include/asm/mmu_context.h
arch/s390/include/asm/page.h
arch/s390/include/asm/pgtable.h
arch/s390/include/asm/processor.h
arch/s390/kernel/diag.c
arch/s390/kvm/intercept.c
arch/s390/kvm/kvm-s390.c
arch/s390/mm/fault.c
arch/s390/mm/gmap.c
arch/s390/mm/pgalloc.c
arch/s390/mm/pgtable.c
arch/x86/include/asm/kvm_host.h
arch/x86/kvm/iommu.c
arch/x86/kvm/lapic.c
arch/x86/kvm/mmu.c
arch/x86/kvm/svm.c
arch/x86/kvm/vmx.c
arch/x86/kvm/x86.c
include/linux/context_tracking.h
include/linux/irqchip/arm-gic-v3.h
mm/gup.c
virt/kvm/arm/vgic/vgic-init.c
virt/kvm/kvm_main.c

Simple merge
Simple merge
Simple merge
Simple merge
Simple merge
Simple merge
index ad171e979ab067852b9fa9738453d147bbf05847,4b17bd058e01f0cc1ca228cc438426f0777e1960..148303e7771ff6f2a438b31a07984cd11c9e48dd
@@@ -25,7 -25,7 +25,8 @@@
  #ifdef CONFIG_KVM_BOOK3S_64_HANDLER
  #include <asm/kvm_book3s_asm.h>
  #endif
 +#include <asm/accounting.h>
+ #include <asm/hmi.h>
  
  register struct paca_struct *local_paca asm("r13");
  
index fe4c075bcf50eda75905e7c53b4830914cef5b00,6972a23433d349e4de099051e23971494417f04a..b2027a5cf50817649667cb148087912324b87893
@@@ -41,7 -41,8 +41,7 @@@ obj-$(CONFIG_VDSO32)          += vdso32
  obj-$(CONFIG_HAVE_HW_BREAKPOINT)      += hw_breakpoint.o
  obj-$(CONFIG_PPC_BOOK3S_64)   += cpu_setup_ppc970.o cpu_setup_pa6t.o
  obj-$(CONFIG_PPC_BOOK3S_64)   += cpu_setup_power.o
- obj-$(CONFIG_PPC_BOOK3S_64)   += mce.o mce_power.o
+ obj-$(CONFIG_PPC_BOOK3S_64)   += mce.o mce_power.o hmi.o
 -obj64-$(CONFIG_RELOCATABLE)   += reloc_64.o
  obj-$(CONFIG_PPC_BOOK3E_64)   += exceptions-64e.o idle_book3e.o
  obj-$(CONFIG_PPC64)           += vdso64/
  obj-$(CONFIG_ALTIVEC)         += vecemu.o
index 6200e4925d260a35f3640b353cc87ade3ec74ad6,0eba47e074b94ce7d8c70049fc9a098eb654234a..694def6c9d617818bf2a4cf4778a7648b0fb82f5
@@@ -669,8 -680,8 +669,10 @@@ _GLOBAL(__replay_interrupt
  BEGIN_FTR_SECTION
        cmpwi   r3,0xe80
        beq     h_doorbell_common
 +      cmpwi   r3,0xea0
 +      beq     h_virt_irq_common
+       cmpwi   r3,0xe60
+       beq     hmi_exception_common
  FTR_SECTION_ELSE
        cmpwi   r3,0xa00
        beq     doorbell_super_common
@@@ -1161,18 -1172,9 +1163,18 @@@ fwnmi_data_area
        . = 0x8000
  #endif /* defined(CONFIG_PPC_PSERIES) || defined(CONFIG_PPC_POWERNV) */
  
 +      STD_EXCEPTION_COMMON(0xf60, facility_unavailable, facility_unavailable_exception)
 +      STD_EXCEPTION_COMMON(0xf80, hv_facility_unavailable, facility_unavailable_exception)
 +
 +#ifdef CONFIG_CBE_RAS
 +      STD_EXCEPTION_COMMON(0x1200, cbe_system_error, cbe_system_error_exception)
 +      STD_EXCEPTION_COMMON(0x1600, cbe_maintenance, cbe_maintenance_exception)
 +      STD_EXCEPTION_COMMON(0x1800, cbe_thermal, cbe_thermal_exception)
 +#endif /* CONFIG_CBE_RAS */
 +
        .globl hmi_exception_early
  hmi_exception_early:
-       EXCEPTION_PROLOG_1(PACA_EXGEN, NOTEST, 0xe60)
+       EXCEPTION_PROLOG_1(PACA_EXGEN, KVMTEST, 0xe62)
        mr      r10,r1                  /* Save r1                      */
        ld      r1,PACAEMERGSP(r13)     /* Use emergency stack          */
        subi    r1,r1,INT_FRAME_SIZE    /* alloc stack frame            */
index 335eb6cedae5ae17bc4cd16dd52d6b02f8533e76,0000000000000000000000000000000000000000..8a56a51fc0cbc08d5a011cfe4b91a503d0c41eb8
mode 100644,000000..100644
--- /dev/null
@@@ -1,662 -1,0 +1,664 @@@
-       bl      opal_rm_handle_hmi;                                     \
 +/*
 + *  This file contains idle entry/exit functions for POWER7,
 + *  POWER8 and POWER9 CPUs.
 + *
 + *  This program is free software; you can redistribute it and/or
 + *  modify it under the terms of the GNU General Public License
 + *  as published by the Free Software Foundation; either version
 + *  2 of the License, or (at your option) any later version.
 + */
 +
 +#include <linux/threads.h>
 +#include <asm/processor.h>
 +#include <asm/page.h>
 +#include <asm/cputable.h>
 +#include <asm/thread_info.h>
 +#include <asm/ppc_asm.h>
 +#include <asm/asm-offsets.h>
 +#include <asm/ppc-opcode.h>
 +#include <asm/hw_irq.h>
 +#include <asm/kvm_book3s_asm.h>
 +#include <asm/opal.h>
 +#include <asm/cpuidle.h>
 +#include <asm/book3s/64/mmu-hash.h>
 +#include <asm/mmu.h>
 +
 +#undef DEBUG
 +
 +/*
 + * Use unused space in the interrupt stack to save and restore
 + * registers for winkle support.
 + */
 +#define _SDR1 GPR3
 +#define _RPR  GPR4
 +#define _SPURR        GPR5
 +#define _PURR GPR6
 +#define _TSCR GPR7
 +#define _DSCR GPR8
 +#define _AMOR GPR9
 +#define _WORT GPR10
 +#define _WORC GPR11
 +#define _PTCR GPR12
 +
 +#define PSSCR_HV_TEMPLATE     PSSCR_ESL | PSSCR_EC | \
 +                              PSSCR_PSLL_MASK | PSSCR_TR_MASK | \
 +                              PSSCR_MTL_MASK
 +
 +/* Idle state entry routines */
 +
 +#define       IDLE_STATE_ENTER_SEQ(IDLE_INST)                         \
 +      /* Magic NAP/SLEEP/WINKLE mode enter sequence */        \
 +      std     r0,0(r1);                                       \
 +      ptesync;                                                \
 +      ld      r0,0(r1);                                       \
 +1:    cmp     cr0,r0,r0;                                      \
 +      bne     1b;                                             \
 +      IDLE_INST;                                              \
 +      b       .
 +
 +      .text
 +
 +/*
 + * Used by threads before entering deep idle states. Saves SPRs
 + * in interrupt stack frame
 + */
 +save_sprs_to_stack:
 +      /*
 +       * Note all register i.e per-core, per-subcore or per-thread is saved
 +       * here since any thread in the core might wake up first
 +       */
 +BEGIN_FTR_SECTION
 +      mfspr   r3,SPRN_PTCR
 +      std     r3,_PTCR(r1)
 +      /*
 +       * Note - SDR1 is dropped in Power ISA v3. Hence not restoring
 +       * SDR1 here
 +       */
 +FTR_SECTION_ELSE
 +      mfspr   r3,SPRN_SDR1
 +      std     r3,_SDR1(r1)
 +ALT_FTR_SECTION_END_IFSET(CPU_FTR_ARCH_300)
 +      mfspr   r3,SPRN_RPR
 +      std     r3,_RPR(r1)
 +      mfspr   r3,SPRN_SPURR
 +      std     r3,_SPURR(r1)
 +      mfspr   r3,SPRN_PURR
 +      std     r3,_PURR(r1)
 +      mfspr   r3,SPRN_TSCR
 +      std     r3,_TSCR(r1)
 +      mfspr   r3,SPRN_DSCR
 +      std     r3,_DSCR(r1)
 +      mfspr   r3,SPRN_AMOR
 +      std     r3,_AMOR(r1)
 +      mfspr   r3,SPRN_WORT
 +      std     r3,_WORT(r1)
 +      mfspr   r3,SPRN_WORC
 +      std     r3,_WORC(r1)
 +
 +      blr
 +
 +/*
 + * Used by threads when the lock bit of core_idle_state is set.
 + * Threads will spin in HMT_LOW until the lock bit is cleared.
 + * r14 - pointer to core_idle_state
 + * r15 - used to load contents of core_idle_state
 + */
 +
 +core_idle_lock_held:
 +      HMT_LOW
 +3:    lwz     r15,0(r14)
 +      andi.   r15,r15,PNV_CORE_IDLE_LOCK_BIT
 +      bne     3b
 +      HMT_MEDIUM
 +      lwarx   r15,0,r14
 +      blr
 +
 +/*
 + * Pass requested state in r3:
 + *    r3 - PNV_THREAD_NAP/SLEEP/WINKLE in POWER8
 + *       - Requested STOP state in POWER9
 + *
 + * To check IRQ_HAPPENED in r4
 + *    0 - don't check
 + *    1 - check
 + *
 + * Address to 'rfid' to in r5
 + */
 +_GLOBAL(pnv_powersave_common)
 +      /* Use r3 to pass state nap/sleep/winkle */
 +      /* NAP is a state loss, we create a regs frame on the
 +       * stack, fill it up with the state we care about and
 +       * stick a pointer to it in PACAR1. We really only
 +       * need to save PC, some CR bits and the NV GPRs,
 +       * but for now an interrupt frame will do.
 +       */
 +      mflr    r0
 +      std     r0,16(r1)
 +      stdu    r1,-INT_FRAME_SIZE(r1)
 +      std     r0,_LINK(r1)
 +      std     r0,_NIP(r1)
 +
 +      /* Hard disable interrupts */
 +      mfmsr   r9
 +      rldicl  r9,r9,48,1
 +      rotldi  r9,r9,16
 +      mtmsrd  r9,1                    /* hard-disable interrupts */
 +
 +      /* Check if something happened while soft-disabled */
 +      lbz     r0,PACAIRQHAPPENED(r13)
 +      andi.   r0,r0,~PACA_IRQ_HARD_DIS@l
 +      beq     1f
 +      cmpwi   cr0,r4,0
 +      beq     1f
 +      addi    r1,r1,INT_FRAME_SIZE
 +      ld      r0,16(r1)
 +      li      r3,0                    /* Return 0 (no nap) */
 +      mtlr    r0
 +      blr
 +
 +1:    /* We mark irqs hard disabled as this is the state we'll
 +       * be in when returning and we need to tell arch_local_irq_restore()
 +       * about it
 +       */
 +      li      r0,PACA_IRQ_HARD_DIS
 +      stb     r0,PACAIRQHAPPENED(r13)
 +
 +      /* We haven't lost state ... yet */
 +      li      r0,0
 +      stb     r0,PACA_NAPSTATELOST(r13)
 +
 +      /* Continue saving state */
 +      SAVE_GPR(2, r1)
 +      SAVE_NVGPRS(r1)
 +      mfcr    r4
 +      std     r4,_CCR(r1)
 +      std     r9,_MSR(r1)
 +      std     r1,PACAR1(r13)
 +
 +#ifdef CONFIG_KVM_BOOK3S_HV_POSSIBLE
 +      /* Tell KVM we're entering idle */
 +      li      r4,KVM_HWTHREAD_IN_IDLE
 +      stb     r4,HSTATE_HWTHREAD_STATE(r13)
 +#endif
 +
 +      /*
 +       * Go to real mode to do the nap, as required by the architecture.
 +       * Also, we need to be in real mode before setting hwthread_state,
 +       * because as soon as we do that, another thread can switch
 +       * the MMU context to the guest.
 +       */
 +      LOAD_REG_IMMEDIATE(r7, MSR_IDLE)
 +      li      r6, MSR_RI
 +      andc    r6, r9, r6
 +      mtmsrd  r6, 1           /* clear RI before setting SRR0/1 */
 +      mtspr   SPRN_SRR0, r5
 +      mtspr   SPRN_SRR1, r7
 +      rfid
 +
 +      .globl pnv_enter_arch207_idle_mode
 +pnv_enter_arch207_idle_mode:
 +      stb     r3,PACA_THREAD_IDLE_STATE(r13)
 +      cmpwi   cr3,r3,PNV_THREAD_SLEEP
 +      bge     cr3,2f
 +      IDLE_STATE_ENTER_SEQ(PPC_NAP)
 +      /* No return */
 +2:
 +      /* Sleep or winkle */
 +      lbz     r7,PACA_THREAD_MASK(r13)
 +      ld      r14,PACA_CORE_IDLE_STATE_PTR(r13)
 +lwarx_loop1:
 +      lwarx   r15,0,r14
 +
 +      andi.   r9,r15,PNV_CORE_IDLE_LOCK_BIT
 +      bnel    core_idle_lock_held
 +
 +      andc    r15,r15,r7                      /* Clear thread bit */
 +
 +      andi.   r15,r15,PNV_CORE_IDLE_THREAD_BITS
 +
 +/*
 + * If cr0 = 0, then current thread is the last thread of the core entering
 + * sleep. Last thread needs to execute the hardware bug workaround code if
 + * required by the platform.
 + * Make the workaround call unconditionally here. The below branch call is
 + * patched out when the idle states are discovered if the platform does not
 + * require it.
 + */
 +.global pnv_fastsleep_workaround_at_entry
 +pnv_fastsleep_workaround_at_entry:
 +      beq     fastsleep_workaround_at_entry
 +
 +      stwcx.  r15,0,r14
 +      bne-    lwarx_loop1
 +      isync
 +
 +common_enter: /* common code for all the threads entering sleep or winkle */
 +      bgt     cr3,enter_winkle
 +      IDLE_STATE_ENTER_SEQ(PPC_SLEEP)
 +
 +fastsleep_workaround_at_entry:
 +      ori     r15,r15,PNV_CORE_IDLE_LOCK_BIT
 +      stwcx.  r15,0,r14
 +      bne-    lwarx_loop1
 +      isync
 +
 +      /* Fast sleep workaround */
 +      li      r3,1
 +      li      r4,1
 +      bl      opal_rm_config_cpu_idle_state
 +
 +      /* Clear Lock bit */
 +      li      r0,0
 +      lwsync
 +      stw     r0,0(r14)
 +      b       common_enter
 +
 +enter_winkle:
 +      bl      save_sprs_to_stack
 +
 +      IDLE_STATE_ENTER_SEQ(PPC_WINKLE)
 +
 +/*
 + * r3 - requested stop state
 + */
 +power_enter_stop:
 +/*
 + * Check if the requested state is a deep idle state.
 + */
 +      LOAD_REG_ADDRBASE(r5,pnv_first_deep_stop_state)
 +      ld      r4,ADDROFF(pnv_first_deep_stop_state)(r5)
 +      cmpd    r3,r4
 +      bge     2f
 +      IDLE_STATE_ENTER_SEQ(PPC_STOP)
 +2:
 +/*
 + * Entering deep idle state.
 + * Clear thread bit in PACA_CORE_IDLE_STATE, save SPRs to
 + * stack and enter stop
 + */
 +      lbz     r7,PACA_THREAD_MASK(r13)
 +      ld      r14,PACA_CORE_IDLE_STATE_PTR(r13)
 +
 +lwarx_loop_stop:
 +      lwarx   r15,0,r14
 +      andi.   r9,r15,PNV_CORE_IDLE_LOCK_BIT
 +      bnel    core_idle_lock_held
 +      andc    r15,r15,r7                      /* Clear thread bit */
 +
 +      stwcx.  r15,0,r14
 +      bne-    lwarx_loop_stop
 +      isync
 +
 +      bl      save_sprs_to_stack
 +
 +      IDLE_STATE_ENTER_SEQ(PPC_STOP)
 +
 +_GLOBAL(power7_idle)
 +      /* Now check if user or arch enabled NAP mode */
 +      LOAD_REG_ADDRBASE(r3,powersave_nap)
 +      lwz     r4,ADDROFF(powersave_nap)(r3)
 +      cmpwi   0,r4,0
 +      beqlr
 +      li      r3, 1
 +      /* fall through */
 +
 +_GLOBAL(power7_nap)
 +      mr      r4,r3
 +      li      r3,PNV_THREAD_NAP
 +      LOAD_REG_ADDR(r5, pnv_enter_arch207_idle_mode)
 +      b       pnv_powersave_common
 +      /* No return */
 +
 +_GLOBAL(power7_sleep)
 +      li      r3,PNV_THREAD_SLEEP
 +      li      r4,1
 +      LOAD_REG_ADDR(r5, pnv_enter_arch207_idle_mode)
 +      b       pnv_powersave_common
 +      /* No return */
 +
 +_GLOBAL(power7_winkle)
 +      li      r3,PNV_THREAD_WINKLE
 +      li      r4,1
 +      LOAD_REG_ADDR(r5, pnv_enter_arch207_idle_mode)
 +      b       pnv_powersave_common
 +      /* No return */
 +
 +#define CHECK_HMI_INTERRUPT                                           \
 +      mfspr   r0,SPRN_SRR1;                                           \
 +BEGIN_FTR_SECTION_NESTED(66);                                         \
 +      rlwinm  r0,r0,45-31,0xf;  /* extract wake reason field (P8) */  \
 +FTR_SECTION_ELSE_NESTED(66);                                          \
 +      rlwinm  r0,r0,45-31,0xe;  /* P7 wake reason field is 3 bits */  \
 +ALT_FTR_SECTION_END_NESTED_IFSET(CPU_FTR_ARCH_207S, 66);              \
 +      cmpwi   r0,0xa;                 /* Hypervisor maintenance ? */  \
 +      bne     20f;                                                    \
 +      /* Invoke opal call to handle hmi */                            \
 +      ld      r2,PACATOC(r13);                                        \
 +      ld      r1,PACAR1(r13);                                         \
 +      std     r3,ORIG_GPR3(r1);       /* Save original r3 */          \
++      li      r3,0;                   /* NULL argument */             \
++      bl      hmi_exception_realmode;                                 \
++      nop;                                                            \
 +      ld      r3,ORIG_GPR3(r1);       /* Restore original r3 */       \
 +20:   nop;
 +
 +
 +/*
 + * r3 - requested stop state
 + */
 +_GLOBAL(power9_idle_stop)
 +      LOAD_REG_IMMEDIATE(r4, PSSCR_HV_TEMPLATE)
 +      or      r4,r4,r3
 +      mtspr   SPRN_PSSCR, r4
 +      li      r4, 1
 +      LOAD_REG_ADDR(r5,power_enter_stop)
 +      b       pnv_powersave_common
 +      /* No return */
 +/*
 + * Called from reset vector. Check whether we have woken up with
 + * hypervisor state loss. If yes, restore hypervisor state and return
 + * back to reset vector.
 + *
 + * r13 - Contents of HSPRG0
 + * cr3 - set to gt if waking up with partial/complete hypervisor state loss
 + */
 +_GLOBAL(pnv_restore_hyp_resource)
 +      ld      r2,PACATOC(r13);
 +BEGIN_FTR_SECTION
 +      /*
 +       * POWER ISA 3. Use PSSCR to determine if we
 +       * are waking up from deep idle state
 +       */
 +      LOAD_REG_ADDRBASE(r5,pnv_first_deep_stop_state)
 +      ld      r4,ADDROFF(pnv_first_deep_stop_state)(r5)
 +
 +      mfspr   r5,SPRN_PSSCR
 +      /*
 +       * 0-3 bits correspond to Power-Saving Level Status
 +       * which indicates the idle state we are waking up from
 +       */
 +      rldicl  r5,r5,4,60
 +      cmpd    cr4,r5,r4
 +      bge     cr4,pnv_wakeup_tb_loss
 +      /*
 +       * Waking up without hypervisor state loss. Return to
 +       * reset vector
 +       */
 +      blr
 +
 +END_FTR_SECTION_IFSET(CPU_FTR_ARCH_300)
 +
 +      /*
 +       * POWER ISA 2.07 or less.
 +       * Check if last bit of HSPGR0 is set. This indicates whether we are
 +       * waking up from winkle.
 +       */
 +      clrldi  r5,r13,63
 +      clrrdi  r13,r13,1
 +      cmpwi   cr4,r5,1
 +      mtspr   SPRN_HSPRG0,r13
 +
 +      lbz     r0,PACA_THREAD_IDLE_STATE(r13)
 +      cmpwi   cr2,r0,PNV_THREAD_NAP
 +      bgt     cr2,pnv_wakeup_tb_loss  /* Either sleep or Winkle */
 +
 +      /*
 +       * We fall through here if PACA_THREAD_IDLE_STATE shows we are waking
 +       * up from nap. At this stage CR3 shouldn't contains 'gt' since that
 +       * indicates we are waking with hypervisor state loss from nap.
 +       */
 +      bgt     cr3,.
 +
 +      blr     /* Return back to System Reset vector from where
 +                 pnv_restore_hyp_resource was invoked */
 +
 +/*
 + * Called if waking up from idle state which can cause either partial or
 + * complete hyp state loss.
 + * In POWER8, called if waking up from fastsleep or winkle
 + * In POWER9, called if waking up from stop state >= pnv_first_deep_stop_state
 + *
 + * r13 - PACA
 + * cr3 - gt if waking up with partial/complete hypervisor state loss
 + * cr4 - eq if waking up from complete hypervisor state loss.
 + */
 +_GLOBAL(pnv_wakeup_tb_loss)
 +      ld      r1,PACAR1(r13)
 +      /*
 +       * Before entering any idle state, the NVGPRs are saved in the stack
 +       * and they are restored before switching to the process context. Hence
 +       * until they are restored, they are free to be used.
 +       *
 +       * Save SRR1 and LR in NVGPRs as they might be clobbered in
 +       * opal_call() (called in CHECK_HMI_INTERRUPT). SRR1 is required
 +       * to determine the wakeup reason if we branch to kvm_start_guest. LR
 +       * is required to return back to reset vector after hypervisor state
 +       * restore is complete.
 +       */
 +      mflr    r17
 +      mfspr   r16,SPRN_SRR1
 +BEGIN_FTR_SECTION
 +      CHECK_HMI_INTERRUPT
 +END_FTR_SECTION_IFSET(CPU_FTR_HVMODE)
 +
 +      lbz     r7,PACA_THREAD_MASK(r13)
 +      ld      r14,PACA_CORE_IDLE_STATE_PTR(r13)
 +lwarx_loop2:
 +      lwarx   r15,0,r14
 +      andi.   r9,r15,PNV_CORE_IDLE_LOCK_BIT
 +      /*
 +       * Lock bit is set in one of the 2 cases-
 +       * a. In the sleep/winkle enter path, the last thread is executing
 +       * fastsleep workaround code.
 +       * b. In the wake up path, another thread is executing fastsleep
 +       * workaround undo code or resyncing timebase or restoring context
 +       * In either case loop until the lock bit is cleared.
 +       */
 +      bnel    core_idle_lock_held
 +
 +      cmpwi   cr2,r15,0
 +
 +      /*
 +       * At this stage
 +       * cr2 - eq if first thread to wakeup in core
 +       * cr3-  gt if waking up with partial/complete hypervisor state loss
 +       * cr4 - eq if waking up from complete hypervisor state loss.
 +       */
 +
 +      ori     r15,r15,PNV_CORE_IDLE_LOCK_BIT
 +      stwcx.  r15,0,r14
 +      bne-    lwarx_loop2
 +      isync
 +
 +BEGIN_FTR_SECTION
 +      lbz     r4,PACA_SUBCORE_SIBLING_MASK(r13)
 +      and     r4,r4,r15
 +      cmpwi   r4,0    /* Check if first in subcore */
 +
 +      or      r15,r15,r7              /* Set thread bit */
 +      beq     first_thread_in_subcore
 +END_FTR_SECTION_IFCLR(CPU_FTR_ARCH_300)
 +
 +      or      r15,r15,r7              /* Set thread bit */
 +      beq     cr2,first_thread_in_core
 +
 +      /* Not first thread in core or subcore to wake up */
 +      b       clear_lock
 +
 +first_thread_in_subcore:
 +      /*
 +       * If waking up from sleep, subcore state is not lost. Hence
 +       * skip subcore state restore
 +       */
 +      bne     cr4,subcore_state_restored
 +
 +      /* Restore per-subcore state */
 +      ld      r4,_SDR1(r1)
 +      mtspr   SPRN_SDR1,r4
 +
 +      ld      r4,_RPR(r1)
 +      mtspr   SPRN_RPR,r4
 +      ld      r4,_AMOR(r1)
 +      mtspr   SPRN_AMOR,r4
 +
 +subcore_state_restored:
 +      /*
 +       * Check if the thread is also the first thread in the core. If not,
 +       * skip to clear_lock.
 +       */
 +      bne     cr2,clear_lock
 +
 +first_thread_in_core:
 +
 +      /*
 +       * First thread in the core waking up from any state which can cause
 +       * partial or complete hypervisor state loss. It needs to
 +       * call the fastsleep workaround code if the platform requires it.
 +       * Call it unconditionally here. The below branch instruction will
 +       * be patched out if the platform does not have fastsleep or does not
 +       * require the workaround. Patching will be performed during the
 +       * discovery of idle-states.
 +       */
 +.global pnv_fastsleep_workaround_at_exit
 +pnv_fastsleep_workaround_at_exit:
 +      b       fastsleep_workaround_at_exit
 +
 +timebase_resync:
 +      /*
 +       * Use cr3 which indicates that we are waking up with atleast partial
 +       * hypervisor state loss to determine if TIMEBASE RESYNC is needed.
 +       */
 +      ble     cr3,clear_lock
 +      /* Time base re-sync */
 +      bl      opal_rm_resync_timebase;
 +      /*
 +       * If waking up from sleep, per core state is not lost, skip to
 +       * clear_lock.
 +       */
 +      bne     cr4,clear_lock
 +
 +      /*
 +       * First thread in the core to wake up and its waking up with
 +       * complete hypervisor state loss. Restore per core hypervisor
 +       * state.
 +       */
 +BEGIN_FTR_SECTION
 +      ld      r4,_PTCR(r1)
 +      mtspr   SPRN_PTCR,r4
 +      ld      r4,_RPR(r1)
 +      mtspr   SPRN_RPR,r4
 +END_FTR_SECTION_IFSET(CPU_FTR_ARCH_300)
 +
 +      ld      r4,_TSCR(r1)
 +      mtspr   SPRN_TSCR,r4
 +      ld      r4,_WORC(r1)
 +      mtspr   SPRN_WORC,r4
 +
 +clear_lock:
 +      andi.   r15,r15,PNV_CORE_IDLE_THREAD_BITS
 +      lwsync
 +      stw     r15,0(r14)
 +
 +common_exit:
 +      /*
 +       * Common to all threads.
 +       *
 +       * If waking up from sleep, hypervisor state is not lost. Hence
 +       * skip hypervisor state restore.
 +       */
 +      bne     cr4,hypervisor_state_restored
 +
 +      /* Waking up from winkle */
 +
 +BEGIN_MMU_FTR_SECTION
 +      b       no_segments
 +END_MMU_FTR_SECTION_IFSET(MMU_FTR_RADIX)
 +      /* Restore SLB  from PACA */
 +      ld      r8,PACA_SLBSHADOWPTR(r13)
 +
 +      .rept   SLB_NUM_BOLTED
 +      li      r3, SLBSHADOW_SAVEAREA
 +      LDX_BE  r5, r8, r3
 +      addi    r3, r3, 8
 +      LDX_BE  r6, r8, r3
 +      andis.  r7,r5,SLB_ESID_V@h
 +      beq     1f
 +      slbmte  r6,r5
 +1:    addi    r8,r8,16
 +      .endr
 +no_segments:
 +
 +      /* Restore per thread state */
 +
 +      ld      r4,_SPURR(r1)
 +      mtspr   SPRN_SPURR,r4
 +      ld      r4,_PURR(r1)
 +      mtspr   SPRN_PURR,r4
 +      ld      r4,_DSCR(r1)
 +      mtspr   SPRN_DSCR,r4
 +      ld      r4,_WORT(r1)
 +      mtspr   SPRN_WORT,r4
 +
 +      /* Call cur_cpu_spec->cpu_restore() */
 +      LOAD_REG_ADDR(r4, cur_cpu_spec)
 +      ld      r4,0(r4)
 +      ld      r12,CPU_SPEC_RESTORE(r4)
 +#ifdef PPC64_ELF_ABI_v1
 +      ld      r12,0(r12)
 +#endif
 +      mtctr   r12
 +      bctrl
 +
 +hypervisor_state_restored:
 +
 +      mtspr   SPRN_SRR1,r16
 +      mtlr    r17
 +      blr     /* Return back to System Reset vector from where
 +                 pnv_restore_hyp_resource was invoked */
 +
 +fastsleep_workaround_at_exit:
 +      li      r3,1
 +      li      r4,0
 +      bl      opal_rm_config_cpu_idle_state
 +      b       timebase_resync
 +
 +/*
 + * R3 here contains the value that will be returned to the caller
 + * of power7_nap.
 + */
 +_GLOBAL(pnv_wakeup_loss)
 +      ld      r1,PACAR1(r13)
 +BEGIN_FTR_SECTION
 +      CHECK_HMI_INTERRUPT
 +END_FTR_SECTION_IFSET(CPU_FTR_HVMODE)
 +      REST_NVGPRS(r1)
 +      REST_GPR(2, r1)
 +      ld      r6,_CCR(r1)
 +      ld      r4,_MSR(r1)
 +      ld      r5,_NIP(r1)
 +      addi    r1,r1,INT_FRAME_SIZE
 +      mtcr    r6
 +      mtspr   SPRN_SRR1,r4
 +      mtspr   SPRN_SRR0,r5
 +      rfid
 +
 +/*
 + * R3 here contains the value that will be returned to the caller
 + * of power7_nap.
 + */
 +_GLOBAL(pnv_wakeup_noloss)
 +      lbz     r0,PACA_NAPSTATELOST(r13)
 +      cmpwi   r0,0
 +      bne     pnv_wakeup_loss
 +BEGIN_FTR_SECTION
 +      CHECK_HMI_INTERRUPT
 +END_FTR_SECTION_IFSET(CPU_FTR_HVMODE)
 +      ld      r1,PACAR1(r13)
 +      ld      r6,_CCR(r1)
 +      ld      r4,_MSR(r1)
 +      ld      r5,_NIP(r1)
 +      addi    r1,r1,INT_FRAME_SIZE
 +      mtcr    r6
 +      mtspr   SPRN_SRR1,r4
 +      mtspr   SPRN_SRR0,r5
 +      rfid
index f7e2f2e318bd64c2fd235f04118a5311269d46aa,9ec95daccad92a2f16df44eaea7c47504808c69b..2cb589264cb748869e2228ff1806e2067af65fe4
@@@ -60,7 -60,7 +60,8 @@@
  #include <asm/switch_to.h>
  #include <asm/tm.h>
  #include <asm/debug.h>
 +#include <asm/asm-prototypes.h>
+ #include <asm/hmi.h>
  #include <sysdev/fsl_pci.h>
  
  #if defined(CONFIG_DEBUGGER) || defined(CONFIG_KEXEC)
Simple merge
Simple merge
Simple merge
index 18226437a8320b1e6866d0330040e4d66c537ade,b941528cc49e69ef2259978284f71fdcf3eadd5d..6d39329c894b3e9b399174e7787efc92e5345426
@@@ -6,10 -6,11 +6,11 @@@
  
  typedef struct {
        cpumask_t cpu_attach_mask;
 -      atomic_t attach_count;
 +      atomic_t flush_count;
        unsigned int flush_mm;
-       spinlock_t list_lock;
+       spinlock_t pgtable_lock;
        struct list_head pgtable_list;
+       spinlock_t gmap_lock;
        struct list_head gmap_list;
        unsigned long asce;
        unsigned long asce_limit;
index f77c638bf3974063a941b5d9b8e537a2c7cfb1a4,3ce3854b7a41b8225c5c2b875cdfe2fe5684e8d9..c6a088c91aee36c97c1adebc9ad5063233471f55
  static inline int init_new_context(struct task_struct *tsk,
                                   struct mm_struct *mm)
  {
-       spin_lock_init(&mm->context.list_lock);
+       spin_lock_init(&mm->context.pgtable_lock);
        INIT_LIST_HEAD(&mm->context.pgtable_list);
+       spin_lock_init(&mm->context.gmap_lock);
        INIT_LIST_HEAD(&mm->context.gmap_list);
        cpumask_clear(&mm->context.cpu_attach_mask);
 -      atomic_set(&mm->context.attach_count, 0);
 +      atomic_set(&mm->context.flush_count, 0);
        mm->context.flush_mm = 0;
  #ifdef CONFIG_PGSTE
        mm->context.alloc_pgste = page_table_allocate_pgste;
Simple merge
Simple merge
Simple merge
index 48b37b8357e6838dd213f230eaaf274027193b74,a44faf4a045442b68a330bbf09a0aab2d755cda1..a97354c8c667161555dfa85ae3361ea3fd709bca
@@@ -162,6 -162,28 +162,30 @@@ int diag14(unsigned long rx, unsigned l
  }
  EXPORT_SYMBOL(diag14);
  
 -static inline int __diag204(unsigned long subcode, unsigned long size, void *addr)
++static inline int __diag204(unsigned long *subcode, unsigned long size, void *addr)
+ {
 -      register unsigned long _subcode asm("0") = subcode;
++      register unsigned long _subcode asm("0") = *subcode;
+       register unsigned long _size asm("1") = size;
+       asm volatile(
+               "       diag    %2,%0,0x204\n"
 -              "0:\n"
++              "0:     nopr    %%r7\n"
+               EX_TABLE(0b,0b)
+               : "+d" (_subcode), "+d" (_size) : "d" (addr) : "memory");
 -      if (_subcode)
 -              return -1;
++      *subcode = _subcode;
+       return _size;
+ }
+ int diag204(unsigned long subcode, unsigned long size, void *addr)
+ {
+       diag_stat_inc(DIAG_STAT_X204);
 -      return __diag204(subcode, size, addr);
++      size = __diag204(&subcode, size, addr);
++      if (subcode)
++              return -1;
++      return size;
+ }
+ EXPORT_SYMBOL(diag204);
  /*
   * Diagnose 210: Get information about a virtual device
   */
Simple merge
index 6f5c344cd7852d2c803c3df8e60645d1607bb391,63ac7c1641a7b16fbd412d1b4dd3336512f067d4..3f3ae4865d579e8a9420cc22c6c2b39f37e70a9f
  #include <linux/slab.h>
  #include <linux/timer.h>
  #include <linux/vmalloc.h>
+ #include <linux/bitmap.h>
  #include <asm/asm-offsets.h>
  #include <asm/lowcore.h>
 -#include <asm/etr.h>
 +#include <asm/stp.h>
  #include <asm/pgtable.h>
  #include <asm/gmap.h>
  #include <asm/nmi.h>
  #include <asm/switch_to.h>
  #include <asm/isc.h>
  #include <asm/sclp.h>
 -#include <asm/etr.h>
+ #include <asm/cpacf.h>
++#include <asm/timex.h>
  #include "kvm-s390.h"
  #include "gaccess.h"
  
@@@ -61,9 -65,9 +65,10 @@@ struct kvm_stats_debugfs_item debugfs_e
        { "exit_external_request", VCPU_STAT(exit_external_request) },
        { "exit_external_interrupt", VCPU_STAT(exit_external_interrupt) },
        { "exit_instruction", VCPU_STAT(exit_instruction) },
 +      { "exit_pei", VCPU_STAT(exit_pei) },
        { "exit_program_interruption", VCPU_STAT(exit_program_interruption) },
        { "exit_instr_and_program_int", VCPU_STAT(exit_instr_and_program) },
+       { "exit_operation_exception", VCPU_STAT(exit_operation_exception) },
        { "halt_successful_poll", VCPU_STAT(halt_successful_poll) },
        { "halt_attempted_poll", VCPU_STAT(halt_attempted_poll) },
        { "halt_poll_invalid", VCPU_STAT(halt_poll_invalid) },
@@@ -188,6 -211,101 +212,103 @@@ void kvm_arch_hardware_unsetup(void
                                         &kvm_clock_notifier);
  }
  
 -              etr_ptff(kvm_s390_available_subfunc.ptff, ETR_PTFF_QAF);
+ static void allow_cpu_feat(unsigned long nr)
+ {
+       set_bit_inv(nr, kvm_s390_available_cpu_feat);
+ }
+ static inline int plo_test_bit(unsigned char nr)
+ {
+       register unsigned long r0 asm("0") = (unsigned long) nr | 0x100;
+       int cc = 3; /* subfunction not available */
+       asm volatile(
+               /* Parameter registers are ignored for "test bit" */
+               "       plo     0,0,0,0(0)\n"
+               "       ipm     %0\n"
+               "       srl     %0,28\n"
+               : "=d" (cc)
+               : "d" (r0)
+               : "cc");
+       return cc == 0;
+ }
+ static void kvm_s390_cpu_feat_init(void)
+ {
+       int i;
+       for (i = 0; i < 256; ++i) {
+               if (plo_test_bit(i))
+                       kvm_s390_available_subfunc.plo[i >> 3] |= 0x80 >> (i & 7);
+       }
+       if (test_facility(28)) /* TOD-clock steering */
++              ptff(kvm_s390_available_subfunc.ptff,
++                   sizeof(kvm_s390_available_subfunc.ptff),
++                   PTFF_QAF);
+       if (test_facility(17)) { /* MSA */
+               __cpacf_query(CPACF_KMAC, kvm_s390_available_subfunc.kmac);
+               __cpacf_query(CPACF_KMC, kvm_s390_available_subfunc.kmc);
+               __cpacf_query(CPACF_KM, kvm_s390_available_subfunc.km);
+               __cpacf_query(CPACF_KIMD, kvm_s390_available_subfunc.kimd);
+               __cpacf_query(CPACF_KLMD, kvm_s390_available_subfunc.klmd);
+       }
+       if (test_facility(76)) /* MSA3 */
+               __cpacf_query(CPACF_PCKMO, kvm_s390_available_subfunc.pckmo);
+       if (test_facility(77)) { /* MSA4 */
+               __cpacf_query(CPACF_KMCTR, kvm_s390_available_subfunc.kmctr);
+               __cpacf_query(CPACF_KMF, kvm_s390_available_subfunc.kmf);
+               __cpacf_query(CPACF_KMO, kvm_s390_available_subfunc.kmo);
+               __cpacf_query(CPACF_PCC, kvm_s390_available_subfunc.pcc);
+       }
+       if (test_facility(57)) /* MSA5 */
+               __cpacf_query(CPACF_PPNO, kvm_s390_available_subfunc.ppno);
+       if (MACHINE_HAS_ESOP)
+               allow_cpu_feat(KVM_S390_VM_CPU_FEAT_ESOP);
+       /*
+        * We need SIE support, ESOP (PROT_READ protection for gmap_shadow),
+        * 64bit SCAO (SCA passthrough) and IDTE (for gmap_shadow unshadowing).
+        */
+       if (!sclp.has_sief2 || !MACHINE_HAS_ESOP || !sclp.has_64bscao ||
+           !test_facility(3) || !nested)
+               return;
+       allow_cpu_feat(KVM_S390_VM_CPU_FEAT_SIEF2);
+       if (sclp.has_64bscao)
+               allow_cpu_feat(KVM_S390_VM_CPU_FEAT_64BSCAO);
+       if (sclp.has_siif)
+               allow_cpu_feat(KVM_S390_VM_CPU_FEAT_SIIF);
+       if (sclp.has_gpere)
+               allow_cpu_feat(KVM_S390_VM_CPU_FEAT_GPERE);
+       if (sclp.has_gsls)
+               allow_cpu_feat(KVM_S390_VM_CPU_FEAT_GSLS);
+       if (sclp.has_ib)
+               allow_cpu_feat(KVM_S390_VM_CPU_FEAT_IB);
+       if (sclp.has_cei)
+               allow_cpu_feat(KVM_S390_VM_CPU_FEAT_CEI);
+       if (sclp.has_ibs)
+               allow_cpu_feat(KVM_S390_VM_CPU_FEAT_IBS);
+       /*
+        * KVM_S390_VM_CPU_FEAT_SKEY: Wrong shadow of PTE.I bits will make
+        * all skey handling functions read/set the skey from the PGSTE
+        * instead of the real storage key.
+        *
+        * KVM_S390_VM_CPU_FEAT_CMMA: Wrong shadow of PTE.I bits will make
+        * pages being detected as preserved although they are resident.
+        *
+        * KVM_S390_VM_CPU_FEAT_PFMFI: Wrong shadow of PTE.I bits will
+        * have the same effect as for KVM_S390_VM_CPU_FEAT_SKEY.
+        *
+        * For KVM_S390_VM_CPU_FEAT_SKEY, KVM_S390_VM_CPU_FEAT_CMMA and
+        * KVM_S390_VM_CPU_FEAT_PFMFI, all PTE.I and PGSTE bits have to be
+        * correctly shadowed. We can do that for the PGSTE but not for PTE.I.
+        *
+        * KVM_S390_VM_CPU_FEAT_SIGPIF: Wrong SCB addresses in the SCA. We
+        * cannot easily shadow the SCA because of the ipte lock.
+        */
+ }
  int kvm_arch_init(void *opaque)
  {
        kvm_s390_dbf = debug_register("kvm-trace", 32, 1, 7 * sizeof(long));
Simple merge
Simple merge
Simple merge
index b98d1a152d461e05aad2f5b32ab85aae7e5c2692,293130b5aee763342f209ae4be2121328f841836..5f092015aaa75d1f2096a6162fa2ea1526806886
@@@ -456,9 -415,92 +459,93 @@@ void ptep_set_notify(struct mm_struct *
        pgste = pgste_get_lock(ptep);
        pgste_val(pgste) |= PGSTE_IN_BIT;
        pgste_set_unlock(ptep, pgste);
 +      preempt_enable();
  }
  
+ /**
+  * ptep_force_prot - change access rights of a locked pte
+  * @mm: pointer to the process mm_struct
+  * @addr: virtual address in the guest address space
+  * @ptep: pointer to the page table entry
+  * @prot: indicates guest access rights: PROT_NONE, PROT_READ or PROT_WRITE
+  * @bit: pgste bit to set (e.g. for notification)
+  *
+  * Returns 0 if the access rights were changed and -EAGAIN if the current
+  * and requested access rights are incompatible.
+  */
+ int ptep_force_prot(struct mm_struct *mm, unsigned long addr,
+                   pte_t *ptep, int prot, unsigned long bit)
+ {
+       pte_t entry;
+       pgste_t pgste;
+       int pte_i, pte_p;
+       pgste = pgste_get_lock(ptep);
+       entry = *ptep;
+       /* Check pte entry after all locks have been acquired */
+       pte_i = pte_val(entry) & _PAGE_INVALID;
+       pte_p = pte_val(entry) & _PAGE_PROTECT;
+       if ((pte_i && (prot != PROT_NONE)) ||
+           (pte_p && (prot & PROT_WRITE))) {
+               pgste_set_unlock(ptep, pgste);
+               return -EAGAIN;
+       }
+       /* Change access rights and set pgste bit */
+       if (prot == PROT_NONE && !pte_i) {
+               ptep_flush_direct(mm, addr, ptep);
+               pgste = pgste_update_all(entry, pgste, mm);
+               pte_val(entry) |= _PAGE_INVALID;
+       }
+       if (prot == PROT_READ && !pte_p) {
+               ptep_flush_direct(mm, addr, ptep);
+               pte_val(entry) &= ~_PAGE_INVALID;
+               pte_val(entry) |= _PAGE_PROTECT;
+       }
+       pgste_val(pgste) |= bit;
+       pgste = pgste_set_pte(ptep, pgste, entry);
+       pgste_set_unlock(ptep, pgste);
+       return 0;
+ }
+ int ptep_shadow_pte(struct mm_struct *mm, unsigned long saddr,
+                   pte_t *sptep, pte_t *tptep, pte_t pte)
+ {
+       pgste_t spgste, tpgste;
+       pte_t spte, tpte;
+       int rc = -EAGAIN;
+       if (!(pte_val(*tptep) & _PAGE_INVALID))
+               return 0;       /* already shadowed */
+       spgste = pgste_get_lock(sptep);
+       spte = *sptep;
+       if (!(pte_val(spte) & _PAGE_INVALID) &&
+           !((pte_val(spte) & _PAGE_PROTECT) &&
+             !(pte_val(pte) & _PAGE_PROTECT))) {
+               pgste_val(spgste) |= PGSTE_VSIE_BIT;
+               tpgste = pgste_get_lock(tptep);
+               pte_val(tpte) = (pte_val(spte) & PAGE_MASK) |
+                               (pte_val(pte) & _PAGE_PROTECT);
+               /* don't touch the storage key - it belongs to parent pgste */
+               tpgste = pgste_set_pte(tptep, tpgste, tpte);
+               pgste_set_unlock(tptep, tpgste);
+               rc = 1;
+       }
+       pgste_set_unlock(sptep, spgste);
+       return rc;
+ }
+ void ptep_unshadow_pte(struct mm_struct *mm, unsigned long saddr, pte_t *ptep)
+ {
+       pgste_t pgste;
+       pgste = pgste_get_lock(ptep);
+       /* notifier is called by the caller */
+       ptep_flush_direct(mm, saddr, ptep);
+       /* don't touch the storage key - it belongs to parent pgste */
+       pgste = pgste_set_pte(ptep, pgste, __pte(_PAGE_INVALID));
+       pgste_set_unlock(ptep, pgste);
+ }
  static void ptep_zap_swap_entry(struct mm_struct *mm, swp_entry_t entry)
  {
        if (!non_swap_entry(entry))
Simple merge
index 95e0e6481f071b90c287990d7e807eec907cb52b,4f2010c5feba711dd81509c55eb6fa6a703efbf8..b181426f67b4cf16e6cdf4f8e31c52a14c0e1752
  
  #include <linux/list.h>
  #include <linux/kvm_host.h>
 -#include <linux/module.h>
 +#include <linux/moduleparam.h>
  #include <linux/pci.h>
  #include <linux/stat.h>
- #include <linux/dmar.h>
  #include <linux/iommu.h>
- #include <linux/intel-iommu.h>
  #include "assigned-dev.h"
  
  static bool allow_unsafe_assigned_interrupts;
index 57549ed47ca5aa74f992996bb2ee8180a0e4f7af,6895fd28aae97c8d1b11c908ccfd7162011cc3ab..730cf174090affdf3c4fb4f97e6550c9b03229d5
@@@ -1310,10 -1313,111 +1313,112 @@@ void wait_lapic_expire(struct kvm_vcpu 
  
        /* __delay is delay_tsc whenever the hardware has TSC, thus always.  */
        if (guest_tsc < tsc_deadline)
 -              __delay(tsc_deadline - guest_tsc);
 +              __delay(min(tsc_deadline - guest_tsc,
 +                      nsec_to_cycles(vcpu, lapic_timer_advance_ns)));
  }
  
+ static void start_sw_tscdeadline(struct kvm_lapic *apic)
+ {
+       u64 guest_tsc, tscdeadline = apic->lapic_timer.tscdeadline;
+       u64 ns = 0;
+       ktime_t expire;
+       struct kvm_vcpu *vcpu = apic->vcpu;
+       unsigned long this_tsc_khz = vcpu->arch.virtual_tsc_khz;
+       unsigned long flags;
+       ktime_t now;
+       if (unlikely(!tscdeadline || !this_tsc_khz))
+               return;
+       local_irq_save(flags);
+       now = apic->lapic_timer.timer.base->get_time();
+       guest_tsc = kvm_read_l1_tsc(vcpu, rdtsc());
+       if (likely(tscdeadline > guest_tsc)) {
+               ns = (tscdeadline - guest_tsc) * 1000000ULL;
+               do_div(ns, this_tsc_khz);
+               expire = ktime_add_ns(now, ns);
+               expire = ktime_sub_ns(expire, lapic_timer_advance_ns);
+               hrtimer_start(&apic->lapic_timer.timer,
+                               expire, HRTIMER_MODE_ABS_PINNED);
+       } else
+               apic_timer_expired(apic);
+       local_irq_restore(flags);
+ }
+ bool kvm_lapic_hv_timer_in_use(struct kvm_vcpu *vcpu)
+ {
+       return vcpu->arch.apic->lapic_timer.hv_timer_in_use;
+ }
+ EXPORT_SYMBOL_GPL(kvm_lapic_hv_timer_in_use);
+ static void cancel_hv_tscdeadline(struct kvm_lapic *apic)
+ {
+       kvm_x86_ops->cancel_hv_timer(apic->vcpu);
+       apic->lapic_timer.hv_timer_in_use = false;
+ }
+ void kvm_lapic_expired_hv_timer(struct kvm_vcpu *vcpu)
+ {
+       struct kvm_lapic *apic = vcpu->arch.apic;
+       WARN_ON(!apic->lapic_timer.hv_timer_in_use);
+       WARN_ON(swait_active(&vcpu->wq));
+       cancel_hv_tscdeadline(apic);
+       apic_timer_expired(apic);
+ }
+ EXPORT_SYMBOL_GPL(kvm_lapic_expired_hv_timer);
+ static bool start_hv_tscdeadline(struct kvm_lapic *apic)
+ {
+       u64 tscdeadline = apic->lapic_timer.tscdeadline;
+       if (atomic_read(&apic->lapic_timer.pending) ||
+               kvm_x86_ops->set_hv_timer(apic->vcpu, tscdeadline)) {
+               if (apic->lapic_timer.hv_timer_in_use)
+                       cancel_hv_tscdeadline(apic);
+       } else {
+               apic->lapic_timer.hv_timer_in_use = true;
+               hrtimer_cancel(&apic->lapic_timer.timer);
+               /* In case the sw timer triggered in the window */
+               if (atomic_read(&apic->lapic_timer.pending))
+                       cancel_hv_tscdeadline(apic);
+       }
+       trace_kvm_hv_timer_state(apic->vcpu->vcpu_id,
+                       apic->lapic_timer.hv_timer_in_use);
+       return apic->lapic_timer.hv_timer_in_use;
+ }
+ void kvm_lapic_switch_to_hv_timer(struct kvm_vcpu *vcpu)
+ {
+       struct kvm_lapic *apic = vcpu->arch.apic;
+       WARN_ON(apic->lapic_timer.hv_timer_in_use);
+       if (apic_lvtt_tscdeadline(apic))
+               start_hv_tscdeadline(apic);
+ }
+ EXPORT_SYMBOL_GPL(kvm_lapic_switch_to_hv_timer);
+ void kvm_lapic_switch_to_sw_timer(struct kvm_vcpu *vcpu)
+ {
+       struct kvm_lapic *apic = vcpu->arch.apic;
+       /* Possibly the TSC deadline timer is not enabled yet */
+       if (!apic->lapic_timer.hv_timer_in_use)
+               return;
+       cancel_hv_tscdeadline(apic);
+       if (atomic_read(&apic->lapic_timer.pending))
+               return;
+       start_sw_tscdeadline(apic);
+ }
+ EXPORT_SYMBOL_GPL(kvm_lapic_switch_to_sw_timer);
  static void start_apic_timer(struct kvm_lapic *apic)
  {
        ktime_t now;
Simple merge
Simple merge
index df07a0a4611ffa81b059229aaa08d04a2981bc56,b2f559159f3a4e46f3eeabbc357b0c573fa93712..bc354f003ce1f7fd5b8e27e365f956c4eba72dc6
@@@ -7560,6 -7708,19 +7711,12 @@@ static int handle_pml_full(struct kvm_v
        return 1;
  }
  
 -static int handle_pcommit(struct kvm_vcpu *vcpu)
 -{
 -      /* we never catch pcommit instruct for L1 guest. */
 -      WARN_ON(1);
 -      return 1;
 -}
 -
+ static int handle_preemption_timer(struct kvm_vcpu *vcpu)
+ {
+       kvm_lapic_expired_hv_timer(vcpu);
+       return 1;
+ }
  /*
   * The exit handlers return 1 if the exit was handled fully and guest execution
   * may resume.  Otherwise they set the kvm_run parameter to indicate what needs
@@@ -7610,6 -7771,8 +7767,7 @@@ static int (*const kvm_vmx_exit_handler
        [EXIT_REASON_XSAVES]                  = handle_xsaves,
        [EXIT_REASON_XRSTORS]                 = handle_xrstors,
        [EXIT_REASON_PML_FULL]                = handle_pml_full,
 -      [EXIT_REASON_PCOMMIT]                 = handle_pcommit,
+       [EXIT_REASON_PREEMPTION_TIMER]        = handle_preemption_timer,
  };
  
  static const int kvm_vmx_max_exit_handlers =
@@@ -7918,6 -8081,10 +8076,8 @@@ static bool nested_vmx_exit_handled(str
                 * the XSS exit bitmap in vmcs12.
                 */
                return nested_cpu_has2(vmcs12, SECONDARY_EXEC_XSAVES);
 -      case EXIT_REASON_PCOMMIT:
 -              return nested_cpu_has2(vmcs12, SECONDARY_EXEC_PCOMMIT);
+       case EXIT_REASON_PREEMPTION_TIMER:
+               return false;
        default:
                return true;
        }
@@@ -8940,6 -9115,20 +9120,8 @@@ static struct kvm_vcpu *vmx_create_vcpu
        vmx->nested.current_vmptr = -1ull;
        vmx->nested.current_vmcs12 = NULL;
  
 -      /*
 -       * If PML is turned on, failure on enabling PML just results in failure
 -       * of creating the vcpu, therefore we can simplify PML logic (by
 -       * avoiding dealing with cases, such as enabling PML partially on vcpus
 -       * for the guest, etc.
 -       */
 -      if (enable_pml) {
 -              err = vmx_create_pml_buffer(vmx);
 -              if (err)
 -                      goto free_vmcs;
 -      }
 -
+       vmx->msr_ia32_feature_control_valid_bits = FEATURE_CONTROL_LOCKED;
        return &vmx->vcpu;
  
  free_vmcs:
@@@ -9080,6 -9267,22 +9262,13 @@@ static void vmx_cpuid_update(struct kvm
  
        if (cpu_has_secondary_exec_ctrls())
                vmcs_set_secondary_exec_control(secondary_exec_ctl);
 -      if (static_cpu_has(X86_FEATURE_PCOMMIT) && nested) {
 -              if (guest_cpuid_has_pcommit(vcpu))
 -                      vmx->nested.nested_vmx_secondary_ctls_high |=
 -                              SECONDARY_EXEC_PCOMMIT;
 -              else
 -                      vmx->nested.nested_vmx_secondary_ctls_high &=
 -                              ~SECONDARY_EXEC_PCOMMIT;
 -      }
 -
+       if (nested_vmx_allowed(vcpu))
+               to_vmx(vcpu)->msr_ia32_feature_control_valid_bits |=
+                       FEATURE_CONTROL_VMXON_ENABLED_OUTSIDE_SMX;
+       else
+               to_vmx(vcpu)->msr_ia32_feature_control_valid_bits &=
+                       ~FEATURE_CONTROL_VMXON_ENABLED_OUTSIDE_SMX;
  }
  
  static void vmx_set_supported_cpuid(u32 func, struct kvm_cpuid_entry2 *entry)
index 9c496c7e8c00e54c0ab133f1b18dec6c39297299,a27b33033700aadd08637b6cfb63981858b48ee6..19f9f9e05c2a812fd07d07ce0e69223ec2accc25
  #include <asm/div64.h>
  #include <asm/irq_remapping.h>
  
 +#define CREATE_TRACE_POINTS
 +#include "trace.h"
 +
  #define MAX_IO_MSRS 256
  #define KVM_MAX_MCE_BANKS 32
- #define KVM_MCE_CAP_SUPPORTED (MCG_CTL_P | MCG_SER_P)
+ u64 __read_mostly kvm_mce_cap_supported = MCG_CTL_P | MCG_SER_P;
+ EXPORT_SYMBOL_GPL(kvm_mce_cap_supported);
  
  #define emul_to_vcpu(ctxt) \
        container_of(ctxt, struct kvm_vcpu, arch.emulate_ctxt)
Simple merge
index 107eed475b9448e607b2b06e32b21491f0c0e094,700b4216c87a356b7990a393f2595c180ae674c0..56b0b7ec66aacd6bb895deb21d818ef9b068e801
  #define GITS_BASER_PAGE_SIZE_64K      (2UL << GITS_BASER_PAGE_SIZE_SHIFT)
  #define GITS_BASER_PAGE_SIZE_MASK     (3UL << GITS_BASER_PAGE_SIZE_SHIFT)
  #define GITS_BASER_PAGES_MAX          256
 +#define GITS_BASER_PAGES_SHIFT                (0)
+ #define GITS_BASER_NR_PAGES(r)                (((r) & 0xff) + 1)
  
  #define GITS_BASER_TYPE_NONE          0
  #define GITS_BASER_TYPE_DEVICE                1
diff --cc mm/gup.c
Simple merge
Simple merge
index 2e791367c576c9b2fb7dc6ee30f49f61356d3a05,61b31a5f76c856f2d7e7f09c6b68ff5ac2fea526..cc081ccfcaa3743ca5e0f46dd350bda36bc3ec72
@@@ -3487,6 -3545,34 +3543,30 @@@ int kvm_io_bus_unregister_dev(struct kv
        return r;
  }
  
 -static struct notifier_block kvm_cpu_notifier = {
 -      .notifier_call = kvm_cpu_hotplug,
 -};
 -
+ struct kvm_io_device *kvm_io_bus_get_dev(struct kvm *kvm, enum kvm_bus bus_idx,
+                                        gpa_t addr)
+ {
+       struct kvm_io_bus *bus;
+       int dev_idx, srcu_idx;
+       struct kvm_io_device *iodev = NULL;
+       srcu_idx = srcu_read_lock(&kvm->srcu);
+       bus = srcu_dereference(kvm->buses[bus_idx], &kvm->srcu);
+       dev_idx = kvm_io_bus_get_first_dev(bus, addr, 1);
+       if (dev_idx < 0)
+               goto out_unlock;
+       iodev = bus->range[dev_idx].dev;
+ out_unlock:
+       srcu_read_unlock(&kvm->srcu, srcu_idx);
+       return iodev;
+ }
+ EXPORT_SYMBOL_GPL(kvm_io_bus_get_dev);
  static int kvm_debugfs_open(struct inode *inode, struct file *file,
                           int (*get)(void *, u64 *), int (*set)(void *, u64),
                           const char *fmt)