KVM: x86: remove irq disablement around KVM_SET_CLOCK/KVM_GET_CLOCK
authorMarcelo Tosatti <mtosatti@redhat.com>
Mon, 17 Apr 2017 15:51:37 +0000 (12:51 -0300)
committerPaolo Bonzini <pbonzini@redhat.com>
Fri, 21 Apr 2017 10:50:28 +0000 (12:50 +0200)
The disablement of interrupts at KVM_SET_CLOCK/KVM_GET_CLOCK
attempts to disable software suspend from causing "non atomic behaviour" of
the operation:

    Add a helper function to compute the kernel time and convert nanoseconds
    back to CPU specific cycles.  Note that these must not be called in preemptible
    context, as that would mean the kernel could enter software suspend state,
    which would cause non-atomic operation.

However, assume the kernel can enter software suspend at the following 2 points:

        ktime_get_ts(&ts);
1.
hypothetical_ktime_get_ts(&ts)
        monotonic_to_bootbased(&ts);
2.

monotonic_to_bootbased() should be correct relative to a ktime_get_ts(&ts)
performed after point 1 (that is after resuming from software suspend),
hypothetical_ktime_get_ts()

Therefore it is also correct for the ktime_get_ts(&ts) before point 1,
which is

ktime_get_ts(&ts) = hypothetical_ktime_get_ts(&ts) + time-to-execute-suspend-code

Note CLOCK_MONOTONIC does not count during suspension.

So remove the irq disablement, which causes the following warning on
-RT kernels:

 With this reasoning, and the -RT bug that the irq disablement causes
 (because spin_lock is now a sleeping lock), remove the IRQ protection as it
 causes:

 [ 1064.668109] in_atomic(): 0, irqs_disabled(): 1, pid: 15296, name:m
 [ 1064.668110] INFO: lockdep is turned off.
 [ 1064.668110] irq event stamp: 0
 [ 1064.668112] hardirqs last  enabled at (0): [<          (null)>]  )
 [ 1064.668116] hardirqs last disabled at (0): [] c0
 [ 1064.668118] softirqs last  enabled at (0): [] c0
 [ 1064.668118] softirqs last disabled at (0): [<          (null)>]  )
 [ 1064.668121] CPU: 13 PID: 15296 Comm: qemu-kvm Not tainted 3.10.0-1
 [ 1064.668121] Hardware name: Dell Inc. PowerEdge R730/0H21J3, BIOS 5
 [ 1064.668123]  ffff8c1796b88000 00000000afe7344c ffff8c179abf3c68 f3
 [ 1064.668125]  ffff8c179abf3c90 ffffffff930ccb3d ffff8c1b992b3610 f0
 [ 1064.668126]  00007ffc1a26fbc0 ffff8c179abf3cb0 ffffffff9375f694 f0
 [ 1064.668126] Call Trace:
 [ 1064.668132]  [] dump_stack+0x19/0x1b
 [ 1064.668135]  [] __might_sleep+0x12d/0x1f0
 [ 1064.668138]  [] rt_spin_lock+0x24/0x60
 [ 1064.668155]  [] __get_kvmclock_ns+0x36/0x110 [k]
 [ 1064.668159]  [] ? futex_wait_queue_me+0x103/0x10
 [ 1064.668171]  [] kvm_arch_vm_ioctl+0xa2/0xd70 [k]
 [ 1064.668173]  [] ? futex_wait+0x1ac/0x2a0

v2: notice get_kvmclock_ns with the same problem (Pankaj).
v3: remove useless helper function (Pankaj).

Signed-off-by: Marcelo Tosatti <mtosatti@redhat.com>
Signed-off-by: Paolo Bonzini <pbonzini@redhat.com>
arch/x86/kvm/x86.c

index 2f9fe6bf7091609cd667c7ae1517edbf46b3bff9..f68c5b2ba627daba09d7bff5ca8c97b4d4189b90 100644 (file)
@@ -1759,7 +1759,7 @@ static void kvm_gen_update_masterclock(struct kvm *kvm)
 #endif
 }
 
-static u64 __get_kvmclock_ns(struct kvm *kvm)
+u64 get_kvmclock_ns(struct kvm *kvm)
 {
        struct kvm_arch *ka = &kvm->arch;
        struct pvclock_vcpu_time_info hv_clock;
@@ -1780,18 +1780,6 @@ static u64 __get_kvmclock_ns(struct kvm *kvm)
        return __pvclock_read_cycles(&hv_clock, rdtsc());
 }
 
-u64 get_kvmclock_ns(struct kvm *kvm)
-{
-       unsigned long flags;
-       s64 ns;
-
-       local_irq_save(flags);
-       ns = __get_kvmclock_ns(kvm);
-       local_irq_restore(flags);
-
-       return ns;
-}
-
 static void kvm_setup_pvclock_page(struct kvm_vcpu *v)
 {
        struct kvm_vcpu_arch *vcpu = &v->arch;
@@ -4202,10 +4190,8 @@ long kvm_arch_vm_ioctl(struct file *filp,
                        goto out;
 
                r = 0;
-               local_irq_disable();
-               now_ns = __get_kvmclock_ns(kvm);
+               now_ns = get_kvmclock_ns(kvm);
                kvm->arch.kvmclock_offset += user_ns.clock - now_ns;
-               local_irq_enable();
                kvm_gen_update_masterclock(kvm);
                break;
        }
@@ -4213,11 +4199,9 @@ long kvm_arch_vm_ioctl(struct file *filp,
                struct kvm_clock_data user_ns;
                u64 now_ns;
 
-               local_irq_disable();
-               now_ns = __get_kvmclock_ns(kvm);
+               now_ns = get_kvmclock_ns(kvm);
                user_ns.clock = now_ns;
                user_ns.flags = kvm->arch.use_master_clock ? KVM_CLOCK_TSC_STABLE : 0;
-               local_irq_enable();
                memset(&user_ns.pad, 0, sizeof(user_ns.pad));
 
                r = -EFAULT;