KVM: i8254: use atomic_t instead of pit.inject_lock
authorRadim Krčmář <rkrcmar@redhat.com>
Wed, 2 Mar 2016 21:56:41 +0000 (22:56 +0100)
committerPaolo Bonzini <pbonzini@redhat.com>
Fri, 4 Mar 2016 08:29:47 +0000 (09:29 +0100)
The lock was an overkill, the same can be done with atomics.

A mb() was added in kvm_pit_ack_irq, to pair with implicit barrier
between pit_timer_fn and pit_do_work.  The mb() prevents a race that
could happen if pending == 0 and irq_ack == 0:

  kvm_pit_ack_irq:                | pit_timer_fn:
   p = atomic_read(&ps->pending); |
                                  |  atomic_inc(&ps->pending);
                                  |  queue_work(pit_do_work);
                                  | pit_do_work:
                                  |  atomic_xchg(&ps->irq_ack, 0);
                                  |  return;
   atomic_set(&ps->irq_ack, 1);   |
   if (p == 0) return;            |

where the interrupt would not be delivered in this tick of pit_timer_fn.
PIT would have eventually delivered the interrupt, but we sacrifice
perofmance to make sure that interrupts are not needlessly delayed.

sfence isn't enough: atomic_dec_if_positive does atomic_read first and
x86 can reorder loads before stores.  lfence isn't enough: store can
pass lfence, turning it into a nop.  A compiler barrier would be more
than enough as CPU needs to stall for unbelievably long to use fences.

This patch doesn't do anything in kvm_pit_reset_reinject, because any
order of resets can race, but the result differs by at most one
interrupt, which is ok, because it's the same result as if the reset
happened at a slightly different time.  (Original code didn't protect
the reset path with a proper lock, so users have to be robust.)

Signed-off-by: Radim Krčmář <rkrcmar@redhat.com>
Signed-off-by: Paolo Bonzini <pbonzini@redhat.com>
arch/x86/kvm/i8254.c
arch/x86/kvm/i8254.h

index bdbb3f076e723e8e385c3d9c7915f005e7e1173d..0f5655c50e0cb1dcd1751a2afc1619cba7d94e6b 100644 (file)
@@ -237,11 +237,13 @@ static void kvm_pit_ack_irq(struct kvm_irq_ack_notifier *kian)
        struct kvm_kpit_state *ps = container_of(kian, struct kvm_kpit_state,
                                                 irq_ack_notifier);
 
-       spin_lock(&ps->inject_lock);
+       atomic_set(&ps->irq_ack, 1);
+       /* irq_ack should be set before pending is read.  Order accesses with
+        * inc(pending) in pit_timer_fn and xchg(irq_ack, 0) in pit_do_work.
+        */
+       smp_mb();
        if (atomic_dec_if_positive(&ps->pending) > 0 && ps->reinject)
                queue_kthread_work(&ps->pit->worker, &ps->pit->expired);
-       ps->irq_ack = 1;
-       spin_unlock(&ps->inject_lock);
 }
 
 void __kvm_migrate_pit_timer(struct kvm_vcpu *vcpu)
@@ -272,36 +274,25 @@ static void pit_do_work(struct kthread_work *work)
        struct kvm_vcpu *vcpu;
        int i;
        struct kvm_kpit_state *ps = &pit->pit_state;
-       int inject = 0;
 
-       /* Try to inject pending interrupts when
-        * last one has been acked.
+       if (ps->reinject && !atomic_xchg(&ps->irq_ack, 0))
+               return;
+
+       kvm_set_irq(kvm, kvm->arch.vpit->irq_source_id, 0, 1, false);
+       kvm_set_irq(kvm, kvm->arch.vpit->irq_source_id, 0, 0, false);
+
+       /*
+        * Provides NMI watchdog support via Virtual Wire mode.
+        * The route is: PIT -> LVT0 in NMI mode.
+        *
+        * Note: Our Virtual Wire implementation does not follow
+        * the MP specification.  We propagate a PIT interrupt to all
+        * VCPUs and only when LVT0 is in NMI mode.  The interrupt can
+        * also be simultaneously delivered through PIC and IOAPIC.
         */
-       spin_lock(&ps->inject_lock);
-       if (!ps->reinject)
-               inject = 1;
-       else if (ps->irq_ack) {
-               ps->irq_ack = 0;
-               inject = 1;
-       }
-       spin_unlock(&ps->inject_lock);
-       if (inject) {
-               kvm_set_irq(kvm, kvm->arch.vpit->irq_source_id, 0, 1, false);
-               kvm_set_irq(kvm, kvm->arch.vpit->irq_source_id, 0, 0, false);
-
-               /*
-                * Provides NMI watchdog support via Virtual Wire mode.
-                * The route is: PIT -> PIC -> LVT0 in NMI mode.
-                *
-                * Note: Our Virtual Wire implementation is simplified, only
-                * propagating PIT interrupts to all VCPUs when they have set
-                * LVT0 to NMI delivery. Other PIC interrupts are just sent to
-                * VCPU0, and only if its LVT0 is in EXTINT mode.
-                */
-               if (atomic_read(&kvm->arch.vapics_in_nmi_mode) > 0)
-                       kvm_for_each_vcpu(i, vcpu, kvm)
-                               kvm_apic_nmi_wd_deliver(vcpu);
-       }
+       if (atomic_read(&kvm->arch.vapics_in_nmi_mode) > 0)
+               kvm_for_each_vcpu(i, vcpu, kvm)
+                       kvm_apic_nmi_wd_deliver(vcpu);
 }
 
 static enum hrtimer_restart pit_timer_fn(struct hrtimer *data)
@@ -324,7 +315,7 @@ static enum hrtimer_restart pit_timer_fn(struct hrtimer *data)
 static inline void kvm_pit_reset_reinject(struct kvm_pit *pit)
 {
        atomic_set(&pit->pit_state.pending, 0);
-       pit->pit_state.irq_ack = 1;
+       atomic_set(&pit->pit_state.irq_ack, 1);
 }
 
 static void create_pit_timer(struct kvm *kvm, u32 val, int is_period)
@@ -691,7 +682,6 @@ struct kvm_pit *kvm_create_pit(struct kvm *kvm, u32 flags)
 
        mutex_init(&pit->pit_state.lock);
        mutex_lock(&pit->pit_state.lock);
-       spin_lock_init(&pit->pit_state.inject_lock);
 
        pid = get_pid(task_tgid(current));
        pid_nr = pid_vnr(pid);
index c84990b42b5b189550eecc904f781c0e3ecadf3d..f8cf4b84f435521c7e6d1ababd0fb23ffd5e6270 100644 (file)
@@ -33,8 +33,7 @@ struct kvm_kpit_state {
        u32    speaker_data_on;
        struct mutex lock;
        struct kvm_pit *pit;
-       spinlock_t inject_lock;
-       unsigned long irq_ack;
+       atomic_t irq_ack;
        struct kvm_irq_ack_notifier irq_ack_notifier;
 };