xen: cache cr0 value to avoid trap'n'emulate for read_cr0
authorJeremy Fitzhardinge <jeremy.fitzhardinge@citrix.com>
Fri, 24 Apr 2009 07:26:50 +0000 (00:26 -0700)
committerJeremy Fitzhardinge <jeremy.fitzhardinge@citrix.com>
Fri, 8 May 2009 22:55:24 +0000 (15:55 -0700)
stts() is implemented in terms of read_cr0/write_cr0 to update the
state of the TS bit.  This happens during context switch, and so
is fairly performance critical.  Rather than falling back to
a trap-and-emulate native read_cr0, implement our own by caching
the last-written value from write_cr0 (the TS bit is the only one
we really care about).

Impact: optimise Xen context switches
Signed-off-by: Jeremy Fitzhardinge <jeremy.fitzhardinge@citrix.com>
arch/x86/xen/enlighten.c

index e9df942aa143dbe2f4d7ede1cb9971f674ec2529..0a1700a2be9c8c9a548822ca4deef1bb475b4059 100644 (file)
@@ -658,10 +658,26 @@ static void xen_clts(void)
        xen_mc_issue(PARAVIRT_LAZY_CPU);
 }
 
+static DEFINE_PER_CPU(unsigned long, xen_cr0_value);
+
+static unsigned long xen_read_cr0(void)
+{
+       unsigned long cr0 = percpu_read(xen_cr0_value);
+
+       if (unlikely(cr0 == 0)) {
+               cr0 = native_read_cr0();
+               percpu_write(xen_cr0_value, cr0);
+       }
+
+       return cr0;
+}
+
 static void xen_write_cr0(unsigned long cr0)
 {
        struct multicall_space mcs;
 
+       percpu_write(xen_cr0_value, cr0);
+
        /* Only pay attention to cr0.TS; everything else is
           ignored. */
        mcs = xen_mc_entry(0);
@@ -847,7 +863,7 @@ static const struct pv_cpu_ops xen_cpu_ops __initdata = {
 
        .clts = xen_clts,
 
-       .read_cr0 = native_read_cr0,
+       .read_cr0 = xen_read_cr0,
        .write_cr0 = xen_write_cr0,
 
        .read_cr4 = native_read_cr4,