arch/arm/kvm/arm.c

   1 /*
   2  * Copyright (C) 2012 - Virtual Open Systems and Columbia University
   3  * Author: Christoffer Dall <c.dall@virtualopensystems.com>
   4  *
   5  * This program is free software; you can redistribute it and/or modify
   6  * it under the terms of the GNU General Public License, version 2, as
   7  * published by the Free Software Foundation.
   8  *
   9  * This program is distributed in the hope that it will be useful,
  10  * but WITHOUT ANY WARRANTY; without even the implied warranty of
  11  * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
  12  * GNU General Public License for more details.
  13  *
  14  * You should have received a copy of the GNU General Public License
  15  * along with this program; if not, write to the Free Software
  16  * Foundation, 51 Franklin Street, Fifth Floor, Boston, MA  02110-1301, USA.
  17  */
  18
  19 #include <linux/errno.h>
  20 #include <linux/err.h>
  21 #include <linux/kvm_host.h>
  22 #include <linux/module.h>
  23 #include <linux/vmalloc.h>
  24 #include <linux/fs.h>
  25 #include <linux/mman.h>
  26 #include <linux/sched.h>
  27 #include <linux/kvm.h>
  28 #include <trace/events/kvm.h>
  29
  30 #define CREATE_TRACE_POINTS
  31 #include "trace.h"
  32
  33 #include <asm/unified.h>
  34 #include <asm/uaccess.h>
  35 #include <asm/ptrace.h>
  36 #include <asm/mman.h>
  37 #include <asm/cputype.h>
  38 #include <asm/tlbflush.h>
  39 #include <asm/cacheflush.h>
  40 #include <asm/virt.h>
  41 #include <asm/kvm_arm.h>
  42 #include <asm/kvm_asm.h>
  43 #include <asm/kvm_mmu.h>
  44 #include <asm/kvm_emulate.h>
  45 #include <asm/kvm_coproc.h>
  46 #include <asm/kvm_psci.h>
  47 #include <asm/opcodes.h>
  48
  49 #ifdef REQUIRES_VIRT
  50 __asm__(".arch_extension        virt");
  51 #endif
  52
  53 static DEFINE_PER_CPU(unsigned long, kvm_arm_hyp_stack_page);
  54 static struct vfp_hard_struct __percpu *kvm_host_vfp_state;
  55 static unsigned long hyp_default_vectors;
  56
  57 /* Per-CPU variable containing the currently running vcpu. */
  58 static DEFINE_PER_CPU(struct kvm_vcpu *, kvm_arm_running_vcpu);
  59
  60 /* The VMID used in the VTTBR */
  61 static atomic64_t kvm_vmid_gen = ATOMIC64_INIT(1);
  62 static u8 kvm_next_vmid;
  63 static DEFINE_SPINLOCK(kvm_vmid_lock);
  64
  65 static bool vgic_present;
  66
  67 static void kvm_arm_set_running_vcpu(struct kvm_vcpu *vcpu)
  68 {
  69         BUG_ON(preemptible());
  70         __get_cpu_var(kvm_arm_running_vcpu) = vcpu;
  71 }
  72
  73 /**
  74  * kvm_arm_get_running_vcpu - get the vcpu running on the current CPU.
  75  * Must be called from non-preemptible context
  76  */
  77 struct kvm_vcpu *kvm_arm_get_running_vcpu(void)
  78 {
  79         BUG_ON(preemptible());
  80         return __get_cpu_var(kvm_arm_running_vcpu);
  81 }
  82
  83 /**
  84  * kvm_arm_get_running_vcpus - get the per-CPU array of currently running vcpus.
  85  */
  86 struct kvm_vcpu __percpu **kvm_get_running_vcpus(void)
  87 {
  88         return &kvm_arm_running_vcpu;
  89 }
  90
  91 int kvm_arch_hardware_enable(void *garbage)
  92 {
  93         return 0;
  94 }
  95
  96 int kvm_arch_vcpu_should_kick(struct kvm_vcpu *vcpu)
  97 {
  98         return kvm_vcpu_exiting_guest_mode(vcpu) == IN_GUEST_MODE;
  99 }
 100
 101 void kvm_arch_hardware_disable(void *garbage)
 102 {
 103 }
 104
 105 int kvm_arch_hardware_setup(void)
 106 {
 107         return 0;
 108 }
 109
 110 void kvm_arch_hardware_unsetup(void)
 111 {
 112 }
 113
 114 void kvm_arch_check_processor_compat(void *rtn)
 115 {
 116         *(int *)rtn = 0;
 117 }
 118
 119 void kvm_arch_sync_events(struct kvm *kvm)
 120 {
 121 }
 122
 123 /**
 124  * kvm_arch_init_vm - initializes a VM data structure
 125  * @kvm:        pointer to the KVM struct
 126  */
 127 int kvm_arch_init_vm(struct kvm *kvm, unsigned long type)
 128 {
 129         int ret = 0;
 130
 131         if (type)
 132                 return -EINVAL;
 133
 134         ret = kvm_alloc_stage2_pgd(kvm);
 135         if (ret)
 136                 goto out_fail_alloc;
 137
 138         ret = create_hyp_mappings(kvm, kvm + 1);
 139         if (ret)
 140                 goto out_free_stage2_pgd;
 141
 142         /* Mark the initial VMID generation invalid */
 143         kvm->arch.vmid_gen = 0;
 144
 145         return ret;
 146 out_free_stage2_pgd:
 147         kvm_free_stage2_pgd(kvm);
 148 out_fail_alloc:
 149         return ret;
 150 }
 151
 152 int kvm_arch_vcpu_fault(struct kvm_vcpu *vcpu, struct vm_fault *vmf)
 153 {
 154         return VM_FAULT_SIGBUS;
 155 }
 156
 157 void kvm_arch_free_memslot(struct kvm_memory_slot *free,
 158                            struct kvm_memory_slot *dont)
 159 {
 160 }
 161
 162 int kvm_arch_create_memslot(struct kvm_memory_slot *slot, unsigned long npages)
 163 {
 164         return 0;
 165 }
 166
 167 /**
 168  * kvm_arch_destroy_vm - destroy the VM data structure
 169  * @kvm:        pointer to the KVM struct
 170  */
 171 void kvm_arch_destroy_vm(struct kvm *kvm)
 172 {
 173         int i;
 174
 175         kvm_free_stage2_pgd(kvm);
 176
 177         for (i = 0; i < KVM_MAX_VCPUS; ++i) {
 178                 if (kvm->vcpus[i]) {
 179                         kvm_arch_vcpu_free(kvm->vcpus[i]);
 180                         kvm->vcpus[i] = NULL;
 181                 }
 182         }
 183 }
 184
 185 int kvm_dev_ioctl_check_extension(long ext)
 186 {
 187         int r;
 188         switch (ext) {
 189         case KVM_CAP_IRQCHIP:
 190                 r = vgic_present;
 191                 break;
 192         case KVM_CAP_USER_MEMORY:
 193         case KVM_CAP_SYNC_MMU:
 194         case KVM_CAP_DESTROY_MEMORY_REGION_WORKS:
 195         case KVM_CAP_ONE_REG:
 196         case KVM_CAP_ARM_PSCI:
 197                 r = 1;
 198                 break;
 199         case KVM_CAP_COALESCED_MMIO:
 200                 r = KVM_COALESCED_MMIO_PAGE_OFFSET;
 201                 break;
 202         case KVM_CAP_ARM_SET_DEVICE_ADDR:
 203                 r = 1;
 204                 break;
 205         case KVM_CAP_NR_VCPUS:
 206                 r = num_online_cpus();
 207                 break;
 208         case KVM_CAP_MAX_VCPUS:
 209                 r = KVM_MAX_VCPUS;
 210                 break;
 211         default:
 212                 r = 0;
 213                 break;
 214         }
 215         return r;
 216 }
 217
 218 long kvm_arch_dev_ioctl(struct file *filp,
 219                         unsigned int ioctl, unsigned long arg)
 220 {
 221         return -EINVAL;
 222 }
 223
 224 int kvm_arch_set_memory_region(struct kvm *kvm,
 225                                struct kvm_userspace_memory_region *mem,
 226                                struct kvm_memory_slot old,
 227                                int user_alloc)
 228 {
 229         return 0;
 230 }
 231
 232 int kvm_arch_prepare_memory_region(struct kvm *kvm,
 233                                    struct kvm_memory_slot *memslot,
 234                                    struct kvm_memory_slot old,
 235                                    struct kvm_userspace_memory_region *mem,
 236                                    bool user_alloc)
 237 {
 238         return 0;
 239 }
 240
 241 void kvm_arch_commit_memory_region(struct kvm *kvm,
 242                                    struct kvm_userspace_memory_region *mem,
 243                                    struct kvm_memory_slot old,
 244                                    bool user_alloc)
 245 {
 246 }
 247
 248 void kvm_arch_flush_shadow_all(struct kvm *kvm)
 249 {
 250 }
 251
 252 void kvm_arch_flush_shadow_memslot(struct kvm *kvm,
 253                                    struct kvm_memory_slot *slot)
 254 {
 255 }
 256
 257 struct kvm_vcpu *kvm_arch_vcpu_create(struct kvm *kvm, unsigned int id)
 258 {
 259         int err;
 260         struct kvm_vcpu *vcpu;
 261
 262         vcpu = kmem_cache_zalloc(kvm_vcpu_cache, GFP_KERNEL);
 263         if (!vcpu) {
 264                 err = -ENOMEM;
 265                 goto out;
 266         }
 267
 268         err = kvm_vcpu_init(vcpu, kvm, id);
 269         if (err)
 270                 goto free_vcpu;
 271
 272         err = create_hyp_mappings(vcpu, vcpu + 1);
 273         if (err)
 274                 goto vcpu_uninit;
 275
 276         return vcpu;
 277 vcpu_uninit:
 278         kvm_vcpu_uninit(vcpu);
 279 free_vcpu:
 280         kmem_cache_free(kvm_vcpu_cache, vcpu);
 281 out:
 282         return ERR_PTR(err);
 283 }
 284
 285 int kvm_arch_vcpu_postcreate(struct kvm_vcpu *vcpu)
 286 {
 287         return 0;
 288 }
 289
 290 void kvm_arch_vcpu_free(struct kvm_vcpu *vcpu)
 291 {
 292         kvm_mmu_free_memory_caches(vcpu);
 293         kvm_timer_vcpu_terminate(vcpu);
 294         kmem_cache_free(kvm_vcpu_cache, vcpu);
 295 }
 296
 297 void kvm_arch_vcpu_destroy(struct kvm_vcpu *vcpu)
 298 {
 299         kvm_arch_vcpu_free(vcpu);
 300 }
 301
 302 int kvm_cpu_has_pending_timer(struct kvm_vcpu *vcpu)
 303 {
 304         return 0;
 305 }
 306
 307 int __attribute_const__ kvm_target_cpu(void)
 308 {
 309         unsigned long implementor = read_cpuid_implementor();
 310         unsigned long part_number = read_cpuid_part_number();
 311
 312         if (implementor != ARM_CPU_IMP_ARM)
 313                 return -EINVAL;
 314
 315         switch (part_number) {
 316         case ARM_CPU_PART_CORTEX_A15:
 317                 return KVM_ARM_TARGET_CORTEX_A15;
 318         default:
 319                 return -EINVAL;
 320         }
 321 }
 322
 323 int kvm_arch_vcpu_init(struct kvm_vcpu *vcpu)
 324 {
 325         int ret;
 326
 327         /* Force users to call KVM_ARM_VCPU_INIT */
 328         vcpu->arch.target = -1;
 329
 330         /* Set up VGIC */
 331         ret = kvm_vgic_vcpu_init(vcpu);
 332         if (ret)
 333                 return ret;
 334
 335         /* Set up the timer */
 336         kvm_timer_vcpu_init(vcpu);
 337
 338         return 0;
 339 }
 340
 341 void kvm_arch_vcpu_uninit(struct kvm_vcpu *vcpu)
 342 {
 343 }
 344
 345 void kvm_arch_vcpu_load(struct kvm_vcpu *vcpu, int cpu)
 346 {
 347         vcpu->cpu = cpu;
 348         vcpu->arch.vfp_host = this_cpu_ptr(kvm_host_vfp_state);
 349
 350         /*
 351          * Check whether this vcpu requires the cache to be flushed on
 352          * this physical CPU. This is a consequence of doing dcache
 353          * operations by set/way on this vcpu. We do it here to be in
 354          * a non-preemptible section.
 355          */
 356         if (cpumask_test_and_clear_cpu(cpu, &vcpu->arch.require_dcache_flush))
 357                 flush_cache_all(); /* We'd really want v7_flush_dcache_all() */
 358
 359         kvm_arm_set_running_vcpu(vcpu);
 360 }
 361
 362 void kvm_arch_vcpu_put(struct kvm_vcpu *vcpu)
 363 {
 364         kvm_arm_set_running_vcpu(NULL);
 365 }
 366
 367 int kvm_arch_vcpu_ioctl_set_guest_debug(struct kvm_vcpu *vcpu,
 368                                         struct kvm_guest_debug *dbg)
 369 {
 370         return -EINVAL;
 371 }
 372
 373
 374 int kvm_arch_vcpu_ioctl_get_mpstate(struct kvm_vcpu *vcpu,
 375                                     struct kvm_mp_state *mp_state)
 376 {
 377         return -EINVAL;
 378 }
 379
 380 int kvm_arch_vcpu_ioctl_set_mpstate(struct kvm_vcpu *vcpu,
 381                                     struct kvm_mp_state *mp_state)
 382 {
 383         return -EINVAL;
 384 }
 385
 386 /**
 387  * kvm_arch_vcpu_runnable - determine if the vcpu can be scheduled
 388  * @v:          The VCPU pointer
 389  *
 390  * If the guest CPU is not waiting for interrupts or an interrupt line is
 391  * asserted, the CPU is by definition runnable.
 392  */
 393 int kvm_arch_vcpu_runnable(struct kvm_vcpu *v)
 394 {
 395         return !!v->arch.irq_lines || kvm_vgic_vcpu_pending_irq(v);
 396 }
 397
 398 /* Just ensure a guest exit from a particular CPU */
 399 static void exit_vm_noop(void *info)
 400 {
 401 }
 402
 403 void force_vm_exit(const cpumask_t *mask)
 404 {
 405         smp_call_function_many(mask, exit_vm_noop, NULL, true);
 406 }
 407
 408 /**
 409  * need_new_vmid_gen - check that the VMID is still valid
 410  * @kvm: The VM's VMID to checkt
 411  *
 412  * return true if there is a new generation of VMIDs being used
 413  *
 414  * The hardware supports only 256 values with the value zero reserved for the
 415  * host, so we check if an assigned value belongs to a previous generation,
 416  * which which requires us to assign a new value. If we're the first to use a
 417  * VMID for the new generation, we must flush necessary caches and TLBs on all
 418  * CPUs.
 419  */
 420 static bool need_new_vmid_gen(struct kvm *kvm)
 421 {
 422         return unlikely(kvm->arch.vmid_gen != atomic64_read(&kvm_vmid_gen));
 423 }
 424
 425 /**
 426  * update_vttbr - Update the VTTBR with a valid VMID before the guest runs
 427  * @kvm The guest that we are about to run
 428  *
 429  * Called from kvm_arch_vcpu_ioctl_run before entering the guest to ensure the
 430  * VM has a valid VMID, otherwise assigns a new one and flushes corresponding
 431  * caches and TLBs.
 432  */
 433 static void update_vttbr(struct kvm *kvm)
 434 {
 435         phys_addr_t pgd_phys;
 436         u64 vmid;
 437
 438         if (!need_new_vmid_gen(kvm))
 439                 return;
 440
 441         spin_lock(&kvm_vmid_lock);
 442
 443         /*
 444          * We need to re-check the vmid_gen here to ensure that if another vcpu
 445          * already allocated a valid vmid for this vm, then this vcpu should
 446          * use the same vmid.
 447          */
 448         if (!need_new_vmid_gen(kvm)) {
 449                 spin_unlock(&kvm_vmid_lock);
 450                 return;
 451         }
 452
 453         /* First user of a new VMID generation? */
 454         if (unlikely(kvm_next_vmid == 0)) {
 455                 atomic64_inc(&kvm_vmid_gen);
 456                 kvm_next_vmid = 1;
 457
 458                 /*
 459                  * On SMP we know no other CPUs can use this CPU's or each
 460                  * other's VMID after force_vm_exit returns since the
 461                  * kvm_vmid_lock blocks them from reentry to the guest.
 462                  */
 463                 force_vm_exit(cpu_all_mask);
 464                 /*
 465                  * Now broadcast TLB + ICACHE invalidation over the inner
 466                  * shareable domain to make sure all data structures are
 467                  * clean.
 468                  */
 469                 kvm_call_hyp(__kvm_flush_vm_context);
 470         }
 471
 472         kvm->arch.vmid_gen = atomic64_read(&kvm_vmid_gen);
 473         kvm->arch.vmid = kvm_next_vmid;
 474         kvm_next_vmid++;
 475
 476         /* update vttbr to be used with the new vmid */
 477         pgd_phys = virt_to_phys(kvm->arch.pgd);
 478         vmid = ((u64)(kvm->arch.vmid) << VTTBR_VMID_SHIFT) & VTTBR_VMID_MASK;
 479         kvm->arch.vttbr = pgd_phys & VTTBR_BADDR_MASK;
 480         kvm->arch.vttbr |= vmid;
 481
 482         spin_unlock(&kvm_vmid_lock);
 483 }
 484
 485 static int handle_svc_hyp(struct kvm_vcpu *vcpu, struct kvm_run *run)
 486 {
 487         /* SVC called from Hyp mode should never get here */
 488         kvm_debug("SVC called from Hyp mode shouldn't go here\n");
 489         BUG();
 490         return -EINVAL; /* Squash warning */
 491 }
 492
 493 static int handle_hvc(struct kvm_vcpu *vcpu, struct kvm_run *run)
 494 {
 495         trace_kvm_hvc(*vcpu_pc(vcpu), *vcpu_reg(vcpu, 0),
 496                       vcpu->arch.hsr & HSR_HVC_IMM_MASK);
 497
 498         if (kvm_psci_call(vcpu))
 499                 return 1;
 500
 501         kvm_inject_undefined(vcpu);
 502         return 1;
 503 }
 504
 505 static int handle_smc(struct kvm_vcpu *vcpu, struct kvm_run *run)
 506 {
 507         if (kvm_psci_call(vcpu))
 508                 return 1;
 509
 510         kvm_inject_undefined(vcpu);
 511         return 1;
 512 }
 513
 514 static int handle_pabt_hyp(struct kvm_vcpu *vcpu, struct kvm_run *run)
 515 {
 516         /* The hypervisor should never cause aborts */
 517         kvm_err("Prefetch Abort taken from Hyp mode at %#08x (HSR: %#08x)\n",
 518                 vcpu->arch.hxfar, vcpu->arch.hsr);
 519         return -EFAULT;
 520 }
 521
 522 static int handle_dabt_hyp(struct kvm_vcpu *vcpu, struct kvm_run *run)
 523 {
 524         /* This is either an error in the ws. code or an external abort */
 525         kvm_err("Data Abort taken from Hyp mode at %#08x (HSR: %#08x)\n",
 526                 vcpu->arch.hxfar, vcpu->arch.hsr);
 527         return -EFAULT;
 528 }
 529
 530 typedef int (*exit_handle_fn)(struct kvm_vcpu *, struct kvm_run *);
 531 static exit_handle_fn arm_exit_handlers[] = {
 532         [HSR_EC_WFI]            = kvm_handle_wfi,
 533         [HSR_EC_CP15_32]        = kvm_handle_cp15_32,
 534         [HSR_EC_CP15_64]        = kvm_handle_cp15_64,
 535         [HSR_EC_CP14_MR]        = kvm_handle_cp14_access,
 536         [HSR_EC_CP14_LS]        = kvm_handle_cp14_load_store,
 537         [HSR_EC_CP14_64]        = kvm_handle_cp14_access,
 538         [HSR_EC_CP_0_13]        = kvm_handle_cp_0_13_access,
 539         [HSR_EC_CP10_ID]        = kvm_handle_cp10_id,
 540         [HSR_EC_SVC_HYP]        = handle_svc_hyp,
 541         [HSR_EC_HVC]            = handle_hvc,
 542         [HSR_EC_SMC]            = handle_smc,
 543         [HSR_EC_IABT]           = kvm_handle_guest_abort,
 544         [HSR_EC_IABT_HYP]       = handle_pabt_hyp,
 545         [HSR_EC_DABT]           = kvm_handle_guest_abort,
 546         [HSR_EC_DABT_HYP]       = handle_dabt_hyp,
 547 };
 548
 549 /*
 550  * A conditional instruction is allowed to trap, even though it
 551  * wouldn't be executed.  So let's re-implement the hardware, in
 552  * software!
 553  */
 554 static bool kvm_condition_valid(struct kvm_vcpu *vcpu)
 555 {
 556         unsigned long cpsr, cond, insn;
 557
 558         /*
 559          * Exception Code 0 can only happen if we set HCR.TGE to 1, to
 560          * catch undefined instructions, and then we won't get past
 561          * the arm_exit_handlers test anyway.
 562          */
 563         BUG_ON(((vcpu->arch.hsr & HSR_EC) >> HSR_EC_SHIFT) == 0);
 564
 565         /* Top two bits non-zero?  Unconditional. */
 566         if (vcpu->arch.hsr >> 30)
 567                 return true;
 568
 569         cpsr = *vcpu_cpsr(vcpu);
 570
 571         /* Is condition field valid? */
 572         if ((vcpu->arch.hsr & HSR_CV) >> HSR_CV_SHIFT)
 573                 cond = (vcpu->arch.hsr & HSR_COND) >> HSR_COND_SHIFT;
 574         else {
 575                 /* This can happen in Thumb mode: examine IT state. */
 576                 unsigned long it;
 577
 578                 it = ((cpsr >> 8) & 0xFC) | ((cpsr >> 25) & 0x3);
 579
 580                 /* it == 0 => unconditional. */
 581                 if (it == 0)
 582                         return true;
 583
 584                 /* The cond for this insn works out as the top 4 bits. */
 585                 cond = (it >> 4);
 586         }
 587
 588         /* Shift makes it look like an ARM-mode instruction */
 589         insn = cond << 28;
 590         return arm_check_condition(insn, cpsr) != ARM_OPCODE_CONDTEST_FAIL;
 591 }
 592
 593 /*
 594  * Return > 0 to return to guest, < 0 on error, 0 (and set exit_reason) on
 595  * proper exit to QEMU.
 596  */
 597 static int handle_exit(struct kvm_vcpu *vcpu, struct kvm_run *run,
 598                        int exception_index)
 599 {
 600         unsigned long hsr_ec;
 601
 602         switch (exception_index) {
 603         case ARM_EXCEPTION_IRQ:
 604                 return 1;
 605         case ARM_EXCEPTION_UNDEFINED:
 606                 kvm_err("Undefined exception in Hyp mode at: %#08x\n",
 607                         vcpu->arch.hyp_pc);
 608                 BUG();
 609                 panic("KVM: Hypervisor undefined exception!\n");
 610         case ARM_EXCEPTION_DATA_ABORT:
 611         case ARM_EXCEPTION_PREF_ABORT:
 612         case ARM_EXCEPTION_HVC:
 613                 hsr_ec = (vcpu->arch.hsr & HSR_EC) >> HSR_EC_SHIFT;
 614
 615                 if (hsr_ec >= ARRAY_SIZE(arm_exit_handlers)
 616                     || !arm_exit_handlers[hsr_ec]) {
 617                         kvm_err("Unknown exception class: %#08lx, "
 618                                 "hsr: %#08x\n", hsr_ec,
 619                                 (unsigned int)vcpu->arch.hsr);
 620                         BUG();
 621                 }
 622
 623                 /*
 624                  * See ARM ARM B1.14.1: "Hyp traps on instructions
 625                  * that fail their condition code check"
 626                  */
 627                 if (!kvm_condition_valid(vcpu)) {
 628                         bool is_wide = vcpu->arch.hsr & HSR_IL;
 629                         kvm_skip_instr(vcpu, is_wide);
 630                         return 1;
 631                 }
 632
 633                 return arm_exit_handlers[hsr_ec](vcpu, run);
 634         default:
 635                 kvm_pr_unimpl("Unsupported exception type: %d",
 636                               exception_index);
 637                 run->exit_reason = KVM_EXIT_INTERNAL_ERROR;
 638                 return 0;
 639         }
 640 }
 641
 642 static int kvm_vcpu_first_run_init(struct kvm_vcpu *vcpu)
 643 {
 644         if (likely(vcpu->arch.has_run_once))
 645                 return 0;
 646
 647         vcpu->arch.has_run_once = true;
 648
 649         /*
 650          * Initialize the VGIC before running a vcpu the first time on
 651          * this VM.
 652          */
 653         if (irqchip_in_kernel(vcpu->kvm) &&
 654             unlikely(!vgic_initialized(vcpu->kvm))) {
 655                 int ret = kvm_vgic_init(vcpu->kvm);
 656                 if (ret)
 657                         return ret;
 658         }
 659
 660         /*
 661          * Handle the "start in power-off" case by calling into the
 662          * PSCI code.
 663          */
 664         if (test_and_clear_bit(KVM_ARM_VCPU_POWER_OFF, vcpu->arch.features)) {
 665                 *vcpu_reg(vcpu, 0) = KVM_PSCI_FN_CPU_OFF;
 666                 kvm_psci_call(vcpu);
 667         }
 668
 669         return 0;
 670 }
 671
 672 static void vcpu_pause(struct kvm_vcpu *vcpu)
 673 {
 674         wait_queue_head_t *wq = kvm_arch_vcpu_wq(vcpu);
 675
 676         wait_event_interruptible(*wq, !vcpu->arch.pause);
 677 }
 678
 679 /**
 680  * kvm_arch_vcpu_ioctl_run - the main VCPU run function to execute guest code
 681  * @vcpu:       The VCPU pointer
 682  * @run:        The kvm_run structure pointer used for userspace state exchange
 683  *
 684  * This function is called through the VCPU_RUN ioctl called from user space. It
 685  * will execute VM code in a loop until the time slice for the process is used
 686  * or some emulation is needed from user space in which case the function will
 687  * return with return value 0 and with the kvm_run structure filled in with the
 688  * required data for the requested emulation.
 689  */
 690 int kvm_arch_vcpu_ioctl_run(struct kvm_vcpu *vcpu, struct kvm_run *run)
 691 {
 692         int ret;
 693         sigset_t sigsaved;
 694
 695         /* Make sure they initialize the vcpu with KVM_ARM_VCPU_INIT */
 696         if (unlikely(vcpu->arch.target < 0))
 697                 return -ENOEXEC;
 698
 699         ret = kvm_vcpu_first_run_init(vcpu);
 700         if (ret)
 701                 return ret;
 702
 703         if (run->exit_reason == KVM_EXIT_MMIO) {
 704                 ret = kvm_handle_mmio_return(vcpu, vcpu->run);
 705                 if (ret)
 706                         return ret;
 707         }
 708
 709         if (vcpu->sigset_active)
 710                 sigprocmask(SIG_SETMASK, &vcpu->sigset, &sigsaved);
 711
 712         ret = 1;
 713         run->exit_reason = KVM_EXIT_UNKNOWN;
 714         while (ret > 0) {
 715                 /*
 716                  * Check conditions before entering the guest
 717                  */
 718                 cond_resched();
 719
 720                 update_vttbr(vcpu->kvm);
 721
 722                 if (vcpu->arch.pause)
 723                         vcpu_pause(vcpu);
 724
 725                 kvm_vgic_flush_hwstate(vcpu);
 726                 kvm_timer_flush_hwstate(vcpu);
 727
 728                 local_irq_disable();
 729
 730                 /*
 731                  * Re-check atomic conditions
 732                  */
 733                 if (signal_pending(current)) {
 734                         ret = -EINTR;
 735                         run->exit_reason = KVM_EXIT_INTR;
 736                 }
 737
 738                 if (ret <= 0 || need_new_vmid_gen(vcpu->kvm)) {
 739                         local_irq_enable();
 740                         kvm_timer_sync_hwstate(vcpu);
 741                         kvm_vgic_sync_hwstate(vcpu);
 742                         continue;
 743                 }
 744
 745                 /**************************************************************
 746                  * Enter the guest
 747                  */
 748                 trace_kvm_entry(*vcpu_pc(vcpu));
 749                 kvm_guest_enter();
 750                 vcpu->mode = IN_GUEST_MODE;
 751
 752                 ret = kvm_call_hyp(__kvm_vcpu_run, vcpu);
 753
 754                 vcpu->mode = OUTSIDE_GUEST_MODE;
 755                 vcpu->arch.last_pcpu = smp_processor_id();
 756                 kvm_guest_exit();
 757                 trace_kvm_exit(*vcpu_pc(vcpu));
 758                 /*
 759                  * We may have taken a host interrupt in HYP mode (ie
 760                  * while executing the guest). This interrupt is still
 761                  * pending, as we haven't serviced it yet!
 762                  *
 763                  * We're now back in SVC mode, with interrupts
 764                  * disabled.  Enabling the interrupts now will have
 765                  * the effect of taking the interrupt again, in SVC
 766                  * mode this time.
 767                  */
 768                 local_irq_enable();
 769
 770                 /*
 771                  * Back from guest
 772                  *************************************************************/
 773
 774                 kvm_timer_sync_hwstate(vcpu);
 775                 kvm_vgic_sync_hwstate(vcpu);
 776
 777                 ret = handle_exit(vcpu, run, ret);
 778         }
 779
 780         if (vcpu->sigset_active)
 781                 sigprocmask(SIG_SETMASK, &sigsaved, NULL);
 782         return ret;
 783 }
 784
 785 static int vcpu_interrupt_line(struct kvm_vcpu *vcpu, int number, bool level)
 786 {
 787         int bit_index;
 788         bool set;
 789         unsigned long *ptr;
 790
 791         if (number == KVM_ARM_IRQ_CPU_IRQ)
 792                 bit_index = __ffs(HCR_VI);
 793         else /* KVM_ARM_IRQ_CPU_FIQ */
 794                 bit_index = __ffs(HCR_VF);
 795
 796         ptr = (unsigned long *)&vcpu->arch.irq_lines;
 797         if (level)
 798                 set = test_and_set_bit(bit_index, ptr);
 799         else
 800                 set = test_and_clear_bit(bit_index, ptr);
 801
 802         /*
 803          * If we didn't change anything, no need to wake up or kick other CPUs
 804          */
 805         if (set == level)
 806                 return 0;
 807
 808         /*
 809          * The vcpu irq_lines field was updated, wake up sleeping VCPUs and
 810          * trigger a world-switch round on the running physical CPU to set the
 811          * virtual IRQ/FIQ fields in the HCR appropriately.
 812          */
 813         kvm_vcpu_kick(vcpu);
 814
 815         return 0;
 816 }
 817
 818 int kvm_vm_ioctl_irq_line(struct kvm *kvm, struct kvm_irq_level *irq_level)
 819 {
 820         u32 irq = irq_level->irq;
 821         unsigned int irq_type, vcpu_idx, irq_num;
 822         int nrcpus = atomic_read(&kvm->online_vcpus);
 823         struct kvm_vcpu *vcpu = NULL;
 824         bool level = irq_level->level;
 825
 826         irq_type = (irq >> KVM_ARM_IRQ_TYPE_SHIFT) & KVM_ARM_IRQ_TYPE_MASK;
 827         vcpu_idx = (irq >> KVM_ARM_IRQ_VCPU_SHIFT) & KVM_ARM_IRQ_VCPU_MASK;
 828         irq_num = (irq >> KVM_ARM_IRQ_NUM_SHIFT) & KVM_ARM_IRQ_NUM_MASK;
 829
 830         trace_kvm_irq_line(irq_type, vcpu_idx, irq_num, irq_level->level);
 831
 832         switch (irq_type) {
 833         case KVM_ARM_IRQ_TYPE_CPU:
 834                 if (irqchip_in_kernel(kvm))
 835                         return -ENXIO;
 836
 837                 if (vcpu_idx >= nrcpus)
 838                         return -EINVAL;
 839
 840                 vcpu = kvm_get_vcpu(kvm, vcpu_idx);
 841                 if (!vcpu)
 842                         return -EINVAL;
 843
 844                 if (irq_num > KVM_ARM_IRQ_CPU_FIQ)
 845                         return -EINVAL;
 846
 847                 return vcpu_interrupt_line(vcpu, irq_num, level);
 848         case KVM_ARM_IRQ_TYPE_PPI:
 849                 if (!irqchip_in_kernel(kvm))
 850                         return -ENXIO;
 851
 852                 if (vcpu_idx >= nrcpus)
 853                         return -EINVAL;
 854
 855                 vcpu = kvm_get_vcpu(kvm, vcpu_idx);
 856                 if (!vcpu)
 857                         return -EINVAL;
 858
 859                 if (irq_num < VGIC_NR_SGIS || irq_num >= VGIC_NR_PRIVATE_IRQS)
 860                         return -EINVAL;
 861
 862                 return kvm_vgic_inject_irq(kvm, vcpu->vcpu_id, irq_num, level);
 863         case KVM_ARM_IRQ_TYPE_SPI:
 864                 if (!irqchip_in_kernel(kvm))
 865                         return -ENXIO;
 866
 867                 if (irq_num < VGIC_NR_PRIVATE_IRQS ||
 868                     irq_num > KVM_ARM_IRQ_GIC_MAX)
 869                         return -EINVAL;
 870
 871                 return kvm_vgic_inject_irq(kvm, 0, irq_num, level);
 872         }
 873
 874         return -EINVAL;
 875 }
 876
 877 long kvm_arch_vcpu_ioctl(struct file *filp,
 878                          unsigned int ioctl, unsigned long arg)
 879 {
 880         struct kvm_vcpu *vcpu = filp->private_data;
 881         void __user *argp = (void __user *)arg;
 882
 883         switch (ioctl) {
 884         case KVM_ARM_VCPU_INIT: {
 885                 struct kvm_vcpu_init init;
 886
 887                 if (copy_from_user(&init, argp, sizeof(init)))
 888                         return -EFAULT;
 889
 890                 return kvm_vcpu_set_target(vcpu, &init);
 891
 892         }
 893         case KVM_SET_ONE_REG:
 894         case KVM_GET_ONE_REG: {
 895                 struct kvm_one_reg reg;
 896                 if (copy_from_user(&reg, argp, sizeof(reg)))
 897                         return -EFAULT;
 898                 if (ioctl == KVM_SET_ONE_REG)
 899                         return kvm_arm_set_reg(vcpu, &reg);
 900                 else
 901                         return kvm_arm_get_reg(vcpu, &reg);
 902         }
 903         case KVM_GET_REG_LIST: {
 904                 struct kvm_reg_list __user *user_list = argp;
 905                 struct kvm_reg_list reg_list;
 906                 unsigned n;
 907
 908                 if (copy_from_user(&reg_list, user_list, sizeof(reg_list)))
 909                         return -EFAULT;
 910                 n = reg_list.n;
 911                 reg_list.n = kvm_arm_num_regs(vcpu);
 912                 if (copy_to_user(user_list, &reg_list, sizeof(reg_list)))
 913                         return -EFAULT;
 914                 if (n < reg_list.n)
 915                         return -E2BIG;
 916                 return kvm_arm_copy_reg_indices(vcpu, user_list->reg);
 917         }
 918         default:
 919                 return -EINVAL;
 920         }
 921 }
 922
 923 int kvm_vm_ioctl_get_dirty_log(struct kvm *kvm, struct kvm_dirty_log *log)
 924 {
 925         return -EINVAL;
 926 }
 927
 928 static int kvm_vm_ioctl_set_device_addr(struct kvm *kvm,
 929                                         struct kvm_arm_device_addr *dev_addr)
 930 {
 931         unsigned long dev_id, type;
 932
 933         dev_id = (dev_addr->id & KVM_ARM_DEVICE_ID_MASK) >>
 934                 KVM_ARM_DEVICE_ID_SHIFT;
 935         type = (dev_addr->id & KVM_ARM_DEVICE_TYPE_MASK) >>
 936                 KVM_ARM_DEVICE_TYPE_SHIFT;
 937
 938         switch (dev_id) {
 939         case KVM_ARM_DEVICE_VGIC_V2:
 940                 if (!vgic_present)
 941                         return -ENXIO;
 942                 return kvm_vgic_set_addr(kvm, type, dev_addr->addr);
 943         default:
 944                 return -ENODEV;
 945         }
 946 }
 947
 948 long kvm_arch_vm_ioctl(struct file *filp,
 949                        unsigned int ioctl, unsigned long arg)
 950 {
 951         struct kvm *kvm = filp->private_data;
 952         void __user *argp = (void __user *)arg;
 953
 954         switch (ioctl) {
 955         case KVM_CREATE_IRQCHIP: {
 956                 if (vgic_present)
 957                         return kvm_vgic_create(kvm);
 958                 else
 959                         return -ENXIO;
 960         }
 961         case KVM_ARM_SET_DEVICE_ADDR: {
 962                 struct kvm_arm_device_addr dev_addr;
 963
 964                 if (copy_from_user(&dev_addr, argp, sizeof(dev_addr)))
 965                         return -EFAULT;
 966                 return kvm_vm_ioctl_set_device_addr(kvm, &dev_addr);
 967         }
 968         default:
 969                 return -EINVAL;
 970         }
 971 }
 972
 973 static void cpu_init_hyp_mode(void *vector)
 974 {
 975         unsigned long long pgd_ptr;
 976         unsigned long pgd_low, pgd_high;
 977         unsigned long hyp_stack_ptr;
 978         unsigned long stack_page;
 979         unsigned long vector_ptr;
 980
 981         /* Switch from the HYP stub to our own HYP init vector */
 982         __hyp_set_vectors((unsigned long)vector);
 983
 984         pgd_ptr = (unsigned long long)kvm_mmu_get_httbr();
 985         pgd_low = (pgd_ptr & ((1ULL << 32) - 1));
 986         pgd_high = (pgd_ptr >> 32ULL);
 987         stack_page = __get_cpu_var(kvm_arm_hyp_stack_page);
 988         hyp_stack_ptr = stack_page + PAGE_SIZE;
 989         vector_ptr = (unsigned long)__kvm_hyp_vector;
 990
 991         /*
 992          * Call initialization code, and switch to the full blown
 993          * HYP code. The init code doesn't need to preserve these registers as
 994          * r1-r3 and r12 are already callee save according to the AAPCS.
 995          * Note that we slightly misuse the prototype by casing the pgd_low to
 996          * a void *.
 997          */
 998         kvm_call_hyp((void *)pgd_low, pgd_high, hyp_stack_ptr, vector_ptr);
 999 }
1000
1001 /**
1002  * Inits Hyp-mode on all online CPUs
1003  */
1004 static int init_hyp_mode(void)
1005 {
1006         phys_addr_t init_phys_addr;
1007         int cpu;
1008         int err = 0;
1009
1010         /*
1011          * Allocate Hyp PGD and setup Hyp identity mapping
1012          */
1013         err = kvm_mmu_init();
1014         if (err)
1015                 goto out_err;
1016
1017         /*
1018          * It is probably enough to obtain the default on one
1019          * CPU. It's unlikely to be different on the others.
1020          */
1021         hyp_default_vectors = __hyp_get_vectors();
1022
1023         /*
1024          * Allocate stack pages for Hypervisor-mode
1025          */
1026         for_each_possible_cpu(cpu) {
1027                 unsigned long stack_page;
1028
1029                 stack_page = __get_free_page(GFP_KERNEL);
1030                 if (!stack_page) {
1031                         err = -ENOMEM;
1032                         goto out_free_stack_pages;
1033                 }
1034
1035                 per_cpu(kvm_arm_hyp_stack_page, cpu) = stack_page;
1036         }
1037
1038         /*
1039          * Execute the init code on each CPU.
1040          *
1041          * Note: The stack is not mapped yet, so don't do anything else than
1042          * initializing the hypervisor mode on each CPU using a local stack
1043          * space for temporary storage.
1044          */
1045         init_phys_addr = virt_to_phys(__kvm_hyp_init);
1046         for_each_online_cpu(cpu) {
1047                 smp_call_function_single(cpu, cpu_init_hyp_mode,
1048                                          (void *)(long)init_phys_addr, 1);
1049         }
1050
1051         /*
1052          * Unmap the identity mapping
1053          */
1054         kvm_clear_hyp_idmap();
1055
1056         /*
1057          * Map the Hyp-code called directly from the host
1058          */
1059         err = create_hyp_mappings(__kvm_hyp_code_start, __kvm_hyp_code_end);
1060         if (err) {
1061                 kvm_err("Cannot map world-switch code\n");
1062                 goto out_free_mappings;
1063         }
1064
1065         /*
1066          * Map the Hyp stack pages
1067          */
1068         for_each_possible_cpu(cpu) {
1069                 char *stack_page = (char *)per_cpu(kvm_arm_hyp_stack_page, cpu);
1070                 err = create_hyp_mappings(stack_page, stack_page + PAGE_SIZE);
1071
1072                 if (err) {
1073                         kvm_err("Cannot map hyp stack\n");
1074                         goto out_free_mappings;
1075                 }
1076         }
1077
1078         /*
1079          * Map the host VFP structures
1080          */
1081         kvm_host_vfp_state = alloc_percpu(struct vfp_hard_struct);
1082         if (!kvm_host_vfp_state) {
1083                 err = -ENOMEM;
1084                 kvm_err("Cannot allocate host VFP state\n");
1085                 goto out_free_mappings;
1086         }
1087
1088         for_each_possible_cpu(cpu) {
1089                 struct vfp_hard_struct *vfp;
1090
1091                 vfp = per_cpu_ptr(kvm_host_vfp_state, cpu);
1092                 err = create_hyp_mappings(vfp, vfp + 1);
1093
1094                 if (err) {
1095                         kvm_err("Cannot map host VFP state: %d\n", err);
1096                         goto out_free_vfp;
1097                 }
1098         }
1099
1100         /*
1101          * Init HYP view of VGIC
1102          */
1103         err = kvm_vgic_hyp_init();
1104         if (err)
1105                 goto out_free_vfp;
1106
1107 #ifdef CONFIG_KVM_ARM_VGIC
1108                 vgic_present = true;
1109 #endif
1110
1111         /*
1112          * Init HYP architected timer support
1113          */
1114         err = kvm_timer_hyp_init();
1115         if (err)
1116                 goto out_free_mappings;
1117
1118         kvm_info("Hyp mode initialized successfully\n");
1119         return 0;
1120 out_free_vfp:
1121         free_percpu(kvm_host_vfp_state);
1122 out_free_mappings:
1123         free_hyp_pmds();
1124 out_free_stack_pages:
1125         for_each_possible_cpu(cpu)
1126                 free_page(per_cpu(kvm_arm_hyp_stack_page, cpu));
1127 out_err:
1128         kvm_err("error initializing Hyp mode: %d\n", err);
1129         return err;
1130 }
1131
1132 /**
1133  * Initialize Hyp-mode and memory mappings on all CPUs.
1134  */
1135 int kvm_arch_init(void *opaque)
1136 {
1137         int err;
1138
1139         if (!is_hyp_mode_available()) {
1140                 kvm_err("HYP mode not available\n");
1141                 return -ENODEV;
1142         }
1143
1144         if (kvm_target_cpu() < 0) {
1145                 kvm_err("Target CPU not supported!\n");
1146                 return -ENODEV;
1147         }
1148
1149         err = init_hyp_mode();
1150         if (err)
1151                 goto out_err;
1152
1153         kvm_coproc_table_init();
1154         return 0;
1155 out_err:
1156         return err;
1157 }
1158
1159 /* NOP: Compiling as a module not supported */
1160 void kvm_arch_exit(void)
1161 {
1162 }
1163
1164 static int arm_init(void)
1165 {
1166         int rc = kvm_init(NULL, sizeof(struct kvm_vcpu), 0, THIS_MODULE);
1167         return rc;
1168 }
1169
1170 module_init(arm_init);