[S390] rework idle code
authorMartin Schwidefsky <schwidefsky@de.ibm.com>
Sun, 11 Mar 2012 15:59:27 +0000 (11:59 -0400)
committerMartin Schwidefsky <schwidefsky@de.ibm.com>
Sun, 11 Mar 2012 15:59:28 +0000 (11:59 -0400)
Whenever the cpu loads an enabled wait PSW it will appear as idle to the
underlying host system. The code in default_idle calls vtime_stop_cpu
which does the necessary voodoo to get the cpu time accounting right.
The udelay code just loads an enabled wait PSW. To correct this rework
the vtime_stop_cpu/vtime_start_cpu logic and move the difficult parts
to entry[64].S, vtime_stop_cpu can now be called from anywhere and
vtime_start_cpu is gone. The correction of the cpu time during wakeup
from an enabled wait PSW is done with a critical section in entry[64].S.
As vtime_start_cpu is gone, s390_idle_check can be removed as well.

Signed-off-by: Martin Schwidefsky <schwidefsky@de.ibm.com>
13 files changed:
arch/s390/include/asm/cputime.h
arch/s390/include/asm/timer.h
arch/s390/kernel/asm-offsets.c
arch/s390/kernel/entry.S
arch/s390/kernel/entry.h
arch/s390/kernel/entry64.S
arch/s390/kernel/irq.c
arch/s390/kernel/nmi.c
arch/s390/kernel/process.c
arch/s390/kernel/smp.c
arch/s390/kernel/vtime.c
arch/s390/lib/delay.c
drivers/s390/cio/cio.c

index c23c3900c3040925f253b83900ca72b8cb2481f4..24ef186a1c4f6f0b7170834986aa01d320e45163 100644 (file)
@@ -170,24 +170,17 @@ struct s390_idle_data {
        unsigned int sequence;
        unsigned long long idle_count;
        unsigned long long idle_enter;
+       unsigned long long idle_exit;
        unsigned long long idle_time;
        int nohz_delay;
 };
 
 DECLARE_PER_CPU(struct s390_idle_data, s390_idle);
 
-void vtime_start_cpu(__u64 int_clock, __u64 enter_timer);
 cputime64_t s390_get_idle_time(int cpu);
 
 #define arch_idle_time(cpu) s390_get_idle_time(cpu)
 
-static inline void s390_idle_check(struct pt_regs *regs, __u64 int_clock,
-                                  __u64 enter_timer)
-{
-       if (regs->psw.mask & PSW_MASK_WAIT)
-               vtime_start_cpu(int_clock, enter_timer);
-}
-
 static inline int s390_nohz_delay(int cpu)
 {
        return __get_cpu_var(s390_idle).nohz_delay != 0;
index 814243cafdfebc8740ba0ef269217c942cef79ee..e63069ba39e3b274d46d691f62ae74e5d14d4249 100644 (file)
@@ -33,8 +33,8 @@ struct vtimer_queue {
        spinlock_t lock;
        __u64 timer;            /* last programmed timer */
        __u64 elapsed;          /* elapsed time of timer expire values */
-       __u64 idle;             /* temp var for idle */
-       int do_spt;             /* =1: reprogram cpu timer in idle */
+       __u64 idle_enter;       /* cpu timer on idle enter */
+       __u64 idle_exit;        /* cpu timer on idle exit */
 };
 
 extern void init_virt_timer(struct vtimer_list *timer);
index aeeaf896be9b69604566172c60218a9e6113d7f4..ed8c913db79eb6db214100cf31fe57ffbc11a39a 100644 (file)
@@ -8,6 +8,8 @@
 
 #include <linux/kbuild.h>
 #include <linux/sched.h>
+#include <asm/cputime.h>
+#include <asm/timer.h>
 #include <asm/vdso.h>
 #include <asm/pgtable.h>
 #include <asm/system.h>
@@ -70,6 +72,12 @@ int main(void)
        DEFINE(__CLOCK_MONOTONIC, CLOCK_MONOTONIC);
        DEFINE(__CLOCK_REALTIME_RES, MONOTONIC_RES_NSEC);
        BLANK();
+       /* idle data offsets */
+       DEFINE(__IDLE_ENTER, offsetof(struct s390_idle_data, idle_enter));
+       DEFINE(__IDLE_EXIT, offsetof(struct s390_idle_data, idle_exit));
+       /* vtimer queue offsets */
+       DEFINE(__VQ_IDLE_ENTER, offsetof(struct vtimer_queue, idle_enter));
+       DEFINE(__VQ_IDLE_EXIT, offsetof(struct vtimer_queue, idle_exit));
        /* lowcore offsets */
        DEFINE(__LC_EXT_PARAMS, offsetof(struct _lowcore, ext_params));
        DEFINE(__LC_EXT_CPU_ADDR, offsetof(struct _lowcore, ext_cpu_addr));
index 6143521a4fffa5f9f311cc0ac71e97b61dd87f89..74ee563fe62b15acb0ff1e4ff2efb9561c682962 100644 (file)
@@ -105,14 +105,14 @@ STACK_SIZE  = 1 << STACK_SHIFT
 
        .macro  ADD64 high,low,timer
        al      \high,\timer
-       al      \low,\timer+4
+       al      \low,4+\timer
        brc     12,.+8
        ahi     \high,1
        .endm
 
        .macro  SUB64 high,low,timer
        sl      \high,\timer
-       sl      \low,\timer+4
+       sl      \low,4+\timer
        brc     3,.+8
        ahi     \high,-1
        .endm
@@ -471,7 +471,6 @@ io_tif:
        jnz     io_work                 # there is work to do (signals etc.)
 io_restore:
        mvc     __LC_RETURN_PSW(8),__PT_PSW(%r11)
-       ni      __LC_RETURN_PSW+1,0xfd  # clean wait state bit
        stpt    __LC_EXIT_TIMER
        lm      %r0,%r15,__PT_R0(%r11)
        lpsw    __LC_RETURN_PSW
@@ -612,6 +611,26 @@ ext_skip:
        basr    %r14,%r1                # call do_extint
        j       io_return
 
+/*
+ * Load idle PSW. The second "half" of this function is in cleanup_idle.
+ */
+ENTRY(psw_idle)
+       st      %r4,__SF_EMPTY(%r15)
+       basr    %r1,0
+       la      %r1,psw_idle_lpsw+4-.(%r1)
+       st      %r1,__SF_EMPTY+4(%r15)
+       oi      __SF_EMPTY+4(%r15),0x80
+       la      %r1,.Lvtimer_max-psw_idle_lpsw-4(%r1)
+       stck    __IDLE_ENTER(%r2)
+       ltr     %r5,%r5
+       stpt    __VQ_IDLE_ENTER(%r3)
+       jz      psw_idle_lpsw
+       spt     0(%r1)
+psw_idle_lpsw:
+       lpsw    __SF_EMPTY(%r15)
+       br      %r14
+psw_idle_end:
+
 __critical_end:
 
 /*
@@ -673,7 +692,6 @@ mcck_skip:
        TRACE_IRQS_ON
 mcck_return:
        mvc     __LC_RETURN_MCCK_PSW(8),__PT_PSW(%r11) # move return PSW
-       ni      __LC_RETURN_MCCK_PSW+1,0xfd # clear wait state bit
        tm      __LC_RETURN_MCCK_PSW+1,0x01 # returning to user ?
        jno     0f
        lm      %r0,%r15,__PT_R0(%r11)
@@ -748,6 +766,8 @@ cleanup_table:
        .long   io_tif + 0x80000000
        .long   io_restore + 0x80000000
        .long   io_done + 0x80000000
+       .long   psw_idle + 0x80000000
+       .long   psw_idle_end + 0x80000000
 
 cleanup_critical:
        cl      %r9,BASED(cleanup_table)        # system_call
@@ -766,6 +786,10 @@ cleanup_critical:
        jl      cleanup_io_tif
        cl      %r9,BASED(cleanup_table+28)     # io_done
        jl      cleanup_io_restore
+       cl      %r9,BASED(cleanup_table+32)     # psw_idle
+       jl      0f
+       cl      %r9,BASED(cleanup_table+36)     # psw_idle_end
+       jl      cleanup_idle
 0:     br      %r14
 
 cleanup_system_call:
@@ -849,7 +873,6 @@ cleanup_io_restore:
        jhe     0f
        l       %r9,12(%r11)            # get saved r11 pointer to pt_regs
        mvc     __LC_RETURN_PSW(8),__PT_PSW(%r9)
-       ni      __LC_RETURN_PSW+1,0xfd  # clear wait state bit
        mvc     0(32,%r11),__PT_R8(%r9)
        lm      %r0,%r7,__PT_R0(%r9)
 0:     lm      %r8,%r9,__LC_RETURN_PSW
@@ -857,11 +880,52 @@ cleanup_io_restore:
 cleanup_io_restore_insn:
        .long   io_done - 4 + 0x80000000
 
+cleanup_idle:
+       # copy interrupt clock & cpu timer
+       mvc     __IDLE_EXIT(8,%r2),__LC_INT_CLOCK
+       mvc     __VQ_IDLE_EXIT(8,%r3),__LC_ASYNC_ENTER_TIMER
+       chi     %r11,__LC_SAVE_AREA_ASYNC
+       je      0f
+       mvc     __IDLE_EXIT(8,%r2),__LC_MCCK_CLOCK
+       mvc     __VQ_IDLE_EXIT(8,%r3),__LC_MCCK_ENTER_TIMER
+0:     # check if stck has been executed
+       cl      %r9,BASED(cleanup_idle_insn)
+       jhe     1f
+       mvc     __IDLE_ENTER(8,%r2),__IDLE_EXIT(%r2)
+       mvc     __VQ_IDLE_ENTER(8,%r3),__VQ_IDLE_EXIT(%r3)
+       j       2f
+1:     # check if the cpu timer has been reprogrammed
+       ltr     %r5,%r5
+       jz      2f
+       spt     __VQ_IDLE_ENTER(%r3)
+2:     # account system time going idle
+       lm      %r9,%r10,__LC_STEAL_TIMER
+       ADD64   %r9,%r10,__IDLE_ENTER(%r2)
+       SUB64   %r9,%r10,__LC_LAST_UPDATE_CLOCK
+       stm     %r9,%r10,__LC_STEAL_TIMER
+       mvc     __LC_LAST_UPDATE_CLOCK(8),__IDLE_EXIT(%r2)
+       lm      %r9,%r10,__LC_SYSTEM_TIMER
+       ADD64   %r9,%r10,__LC_LAST_UPDATE_TIMER
+       SUB64   %r9,%r10,__VQ_IDLE_ENTER(%r3)
+       stm     %r9,%r10,__LC_SYSTEM_TIMER
+       mvc     __LC_LAST_UPDATE_TIMER(8),__VQ_IDLE_EXIT(%r3)
+       # prepare return psw
+       n       %r8,BASED(cleanup_idle_wait)    # clear wait state bit
+       l       %r9,24(%r11)                    # return from psw_idle
+       br      %r14
+cleanup_idle_insn:
+       .long   psw_idle_lpsw + 0x80000000
+cleanup_idle_wait:
+       .long   0xfffdffff
+
 /*
  * Integer constants
  */
        .align  4
-.Lnr_syscalls:         .long   NR_syscalls
+.Lnr_syscalls:
+       .long   NR_syscalls
+.Lvtimer_max:
+       .quad   0x7fffffffffffffff
 
 /*
  * Symbol constants
index 92b1617d0c951ff1fc39939516fdbf0b408365e5..4984785e3078938da4096bd99c71b3a7a3007b63 100644 (file)
@@ -4,7 +4,8 @@
 #include <linux/types.h>
 #include <linux/signal.h>
 #include <asm/ptrace.h>
-
+#include <asm/cputime.h>
+#include <asm/timer.h>
 
 extern void (*pgm_check_table[128])(struct pt_regs *);
 extern void *restart_stack;
@@ -16,6 +17,8 @@ void io_int_handler(void);
 void mcck_int_handler(void);
 void restart_int_handler(void);
 void restart_call_handler(void);
+void psw_idle(struct s390_idle_data *, struct vtimer_queue *,
+             unsigned long, int);
 
 asmlinkage long do_syscall_trace_enter(struct pt_regs *regs);
 asmlinkage void do_syscall_trace_exit(struct pt_regs *regs);
index e33789a457527b5f170ff979f9c2d16e0c8a5143..4e1c292fa7e3b21068c238f2b334b53e7b67ce21 100644 (file)
@@ -489,7 +489,6 @@ io_restore:
        lg      %r14,__LC_VDSO_PER_CPU
        lmg     %r0,%r10,__PT_R0(%r11)
        mvc     __LC_RETURN_PSW(16),__PT_PSW(%r11)
-       ni      __LC_RETURN_PSW+1,0xfd  # clear wait state bit
        stpt    __LC_EXIT_TIMER
        mvc     __VDSO_ECTG_BASE(16,%r14),__LC_EXIT_TIMER
        lmg     %r11,%r15,__PT_R11(%r11)
@@ -631,6 +630,24 @@ ext_skip:
        brasl   %r14,do_extint
        j       io_return
 
+/*
+ * Load idle PSW. The second "half" of this function is in cleanup_idle.
+ */
+ENTRY(psw_idle)
+       stg     %r4,__SF_EMPTY(%r15)
+       larl    %r1,psw_idle_lpsw+4
+       stg     %r1,__SF_EMPTY+8(%r15)
+       larl    %r1,.Lvtimer_max
+       stck    __IDLE_ENTER(%r2)
+       ltr     %r5,%r5
+       stpt    __VQ_IDLE_ENTER(%r3)
+       jz      psw_idle_lpsw
+       spt     0(%r1)
+psw_idle_lpsw:
+       lpswe   __SF_EMPTY(%r15)
+       br      %r14
+psw_idle_end:
+
 __critical_end:
 
 /*
@@ -696,7 +713,6 @@ mcck_return:
        lg      %r14,__LC_VDSO_PER_CPU
        lmg     %r0,%r10,__PT_R0(%r11)
        mvc     __LC_RETURN_MCCK_PSW(16),__PT_PSW(%r11) # move return PSW
-       ni      __LC_RETURN_MCCK_PSW+1,0xfd # clear wait state bit
        tm      __LC_RETURN_MCCK_PSW+1,0x01 # returning to user ?
        jno     0f
        stpt    __LC_EXIT_TIMER
@@ -770,6 +786,8 @@ cleanup_table:
        .quad   io_tif
        .quad   io_restore
        .quad   io_done
+       .quad   psw_idle
+       .quad   psw_idle_end
 
 cleanup_critical:
        clg     %r9,BASED(cleanup_table)        # system_call
@@ -788,6 +806,10 @@ cleanup_critical:
        jl      cleanup_io_tif
        clg     %r9,BASED(cleanup_table+56)     # io_done
        jl      cleanup_io_restore
+       clg     %r9,BASED(cleanup_table+64)     # psw_idle
+       jl      0f
+       clg     %r9,BASED(cleanup_table+72)     # psw_idle_end
+       jl      cleanup_idle
 0:     br      %r14
 
 
@@ -877,7 +899,6 @@ cleanup_io_restore:
        je      0f
        lg      %r9,24(%r11)            # get saved r11 pointer to pt_regs
        mvc     __LC_RETURN_PSW(16),__PT_PSW(%r9)
-       ni      __LC_RETURN_PSW+1,0xfd  # clear wait state bit
        mvc     0(64,%r11),__PT_R8(%r9)
        lmg     %r0,%r7,__PT_R0(%r9)
 0:     lmg     %r8,%r9,__LC_RETURN_PSW
@@ -885,6 +906,42 @@ cleanup_io_restore:
 cleanup_io_restore_insn:
        .quad   io_done - 4
 
+cleanup_idle:
+       # copy interrupt clock & cpu timer
+       mvc     __IDLE_EXIT(8,%r2),__LC_INT_CLOCK
+       mvc     __VQ_IDLE_EXIT(8,%r3),__LC_ASYNC_ENTER_TIMER
+       cghi    %r11,__LC_SAVE_AREA_ASYNC
+       je      0f
+       mvc     __IDLE_EXIT(8,%r2),__LC_MCCK_CLOCK
+       mvc     __VQ_IDLE_EXIT(8,%r3),__LC_MCCK_ENTER_TIMER
+0:     # check if stck & stpt have been executed
+       clg     %r9,BASED(cleanup_idle_insn)
+       jhe     1f
+       mvc     __IDLE_ENTER(8,%r2),__IDLE_EXIT(%r2)
+       mvc     __VQ_IDLE_ENTER(8,%r3),__VQ_IDLE_EXIT(%r3)
+       j       2f
+1:     # check if the cpu timer has been reprogrammed
+       ltr     %r5,%r5
+       jz      2f
+       spt     __VQ_IDLE_ENTER(%r3)
+2:     # account system time going idle
+       lg      %r9,__LC_STEAL_TIMER
+       alg     %r9,__IDLE_ENTER(%r2)
+       slg     %r9,__LC_LAST_UPDATE_CLOCK
+       stg     %r9,__LC_STEAL_TIMER
+       mvc     __LC_LAST_UPDATE_CLOCK(8),__IDLE_EXIT(%r2)
+       lg      %r9,__LC_SYSTEM_TIMER
+       alg     %r9,__LC_LAST_UPDATE_TIMER
+       slg     %r9,__VQ_IDLE_ENTER(%r3)
+       stg     %r9,__LC_SYSTEM_TIMER
+       mvc     __LC_LAST_UPDATE_TIMER(8),__VQ_IDLE_EXIT(%r3)
+       # prepare return psw
+       nihh    %r8,0xfffd              # clear wait state bit
+       lg      %r9,48(%r11)            # return from psw_idle
+       br      %r14
+cleanup_idle_insn:
+       .quad   psw_idle_lpsw
+
 /*
  * Integer constants
  */
@@ -893,6 +950,8 @@ cleanup_io_restore_insn:
        .quad   __critical_start
 .Lcritical_length:
        .quad   __critical_end - __critical_start
+.Lvtimer_max:
+       .quad   0x7fffffffffffffff
 
 
 #if defined(CONFIG_KVM) || defined(CONFIG_KVM_MODULE)
index b9a7fdd9c814c95060ae982edbb557cbd2e46a50..09a014c6253710a38f5311dcbbd2c68fde783bb8 100644 (file)
@@ -219,8 +219,6 @@ void __irq_entry do_extint(struct pt_regs *regs, unsigned int ext_int_code,
 
        code = (unsigned short) ext_int_code;
        old_regs = set_irq_regs(regs);
-       s390_idle_check(regs, S390_lowcore.int_clock,
-                       S390_lowcore.async_enter_timer);
        irq_enter();
        if (S390_lowcore.int_clock >= S390_lowcore.clock_comparator)
                /* Serve timer interrupts first. */
index 0fd2e863e114e161ced171368521a3512ea2571d..8c372ca613500d93c23daaea586efb9ca585f764 100644 (file)
@@ -254,8 +254,6 @@ void notrace s390_do_machine_check(struct pt_regs *regs)
        int umode;
 
        nmi_enter();
-       s390_idle_check(regs, S390_lowcore.mcck_clock,
-                       S390_lowcore.mcck_enter_timer);
        kstat_cpu(smp_processor_id()).irqs[NMI_NMI]++;
        mci = (struct mci *) &S390_lowcore.mcck_interruption_code;
        mcck = &__get_cpu_var(cpu_mcck);
index e795933eb2cbaaf446d5ea62c0b43976d38479f0..78b3c149b8b6c2ef4825e4546b811b18eb7d03b7 100644 (file)
@@ -77,13 +77,8 @@ static void default_idle(void)
                local_irq_enable();
                return;
        }
-       trace_hardirqs_on();
-       /* Don't trace preempt off for idle. */
-       stop_critical_timings();
-       /* Stop virtual timer and halt the cpu. */
+       /* Halt the cpu and keep track of cpu time accounting. */
        vtime_stop_cpu();
-       /* Reenable preemption tracer. */
-       start_critical_timings();
 }
 
 void cpu_idle(void)
index 6db8526a602d7c4b50a01d967c50023e1341cc02..afd6e5113a90dee922debfa8f20b25a817cc5101 100644 (file)
@@ -972,22 +972,16 @@ static DEVICE_ATTR(capability, 0444, show_capability, NULL);
 static ssize_t show_idle_count(struct device *dev,
                                struct device_attribute *attr, char *buf)
 {
-       struct s390_idle_data *idle;
+       struct s390_idle_data *idle = &per_cpu(s390_idle, dev->id);
        unsigned long long idle_count;
        unsigned int sequence;
 
-       idle = &per_cpu(s390_idle, dev->id);
-repeat:
-       sequence = idle->sequence;
-       smp_rmb();
-       if (sequence & 1)
-               goto repeat;
-       idle_count = idle->idle_count;
-       if (idle->idle_enter)
-               idle_count++;
-       smp_rmb();
-       if (idle->sequence != sequence)
-               goto repeat;
+       do {
+               sequence = ACCESS_ONCE(idle->sequence);
+               idle_count = ACCESS_ONCE(idle->idle_count);
+               if (ACCESS_ONCE(idle->idle_enter))
+                       idle_count++;
+       } while ((sequence & 1) || (idle->sequence != sequence));
        return sprintf(buf, "%llu\n", idle_count);
 }
 static DEVICE_ATTR(idle_count, 0444, show_idle_count, NULL);
@@ -995,24 +989,18 @@ static DEVICE_ATTR(idle_count, 0444, show_idle_count, NULL);
 static ssize_t show_idle_time(struct device *dev,
                                struct device_attribute *attr, char *buf)
 {
-       struct s390_idle_data *idle;
-       unsigned long long now, idle_time, idle_enter;
+       struct s390_idle_data *idle = &per_cpu(s390_idle, dev->id);
+       unsigned long long now, idle_time, idle_enter, idle_exit;
        unsigned int sequence;
 
-       idle = &per_cpu(s390_idle, dev->id);
-       now = get_clock();
-repeat:
-       sequence = idle->sequence;
-       smp_rmb();
-       if (sequence & 1)
-               goto repeat;
-       idle_time = idle->idle_time;
-       idle_enter = idle->idle_enter;
-       if (idle_enter != 0ULL && idle_enter < now)
-               idle_time += now - idle_enter;
-       smp_rmb();
-       if (idle->sequence != sequence)
-               goto repeat;
+       do {
+               now = get_clock();
+               sequence = ACCESS_ONCE(idle->sequence);
+               idle_time = ACCESS_ONCE(idle->idle_time);
+               idle_enter = ACCESS_ONCE(idle->idle_enter);
+               idle_exit = ACCESS_ONCE(idle->idle_exit);
+       } while ((sequence & 1) || (idle->sequence != sequence));
+       idle_time += idle_enter ? ((idle_exit ? : now) - idle_enter) : 0;
        return sprintf(buf, "%llu\n", idle_time >> 12);
 }
 static DEVICE_ATTR(idle_time_us, 0444, show_idle_time, NULL);
index 7bacee9a546f5058a9fdb182722882a1ecbeae86..277ea712b2320a037265ed24023e0fd3e480f906 100644 (file)
@@ -26,6 +26,7 @@
 #include <asm/irq_regs.h>
 #include <asm/cputime.h>
 #include <asm/irq.h>
+#include "entry.h"
 
 static DEFINE_PER_CPU(struct vtimer_queue, virt_cpu_timer);
 
@@ -123,153 +124,53 @@ void account_system_vtime(struct task_struct *tsk)
 }
 EXPORT_SYMBOL_GPL(account_system_vtime);
 
-void __kprobes vtime_start_cpu(__u64 int_clock, __u64 enter_timer)
+void __kprobes vtime_stop_cpu(void)
 {
        struct s390_idle_data *idle = &__get_cpu_var(s390_idle);
        struct vtimer_queue *vq = &__get_cpu_var(virt_cpu_timer);
-       __u64 idle_time, expires;
+       unsigned long long idle_time;
+       unsigned long psw_mask;
 
-       if (idle->idle_enter == 0ULL)
-               return;
+       trace_hardirqs_on();
+       /* Don't trace preempt off for idle. */
+       stop_critical_timings();
 
-       /* Account time spent with enabled wait psw loaded as idle time. */
-       idle_time = int_clock - idle->idle_enter;
-       account_idle_time(idle_time);
-       S390_lowcore.steal_timer +=
-               idle->idle_enter - S390_lowcore.last_update_clock;
-       S390_lowcore.last_update_clock = int_clock;
-
-       /* Account system time spent going idle. */
-       S390_lowcore.system_timer += S390_lowcore.last_update_timer - vq->idle;
-       S390_lowcore.last_update_timer = enter_timer;
-
-       /* Restart vtime CPU timer */
-       if (vq->do_spt) {
-               /* Program old expire value but first save progress. */
-               expires = vq->idle - enter_timer;
-               expires += get_vtimer();
-               set_vtimer(expires);
-       } else {
-               /* Don't account the CPU timer delta while the cpu was idle. */
-               vq->elapsed -= vq->idle - enter_timer;
-       }
+       /* Wait for external, I/O or machine check interrupt. */
+       psw_mask = psw_kernel_bits | PSW_MASK_WAIT | PSW_MASK_DAT |
+               PSW_MASK_IO | PSW_MASK_EXT | PSW_MASK_MCHECK;
+       idle->nohz_delay = 0;
 
+       /* Call the assembler magic in entry.S */
+       psw_idle(idle, vq, psw_mask, !list_empty(&vq->list));
+
+       /* Reenable preemption tracer. */
+       start_critical_timings();
+
+       /* Account time spent with enabled wait psw loaded as idle time. */
        idle->sequence++;
        smp_wmb();
+       idle_time = idle->idle_exit - idle->idle_enter;
        idle->idle_time += idle_time;
-       idle->idle_enter = 0ULL;
+       idle->idle_enter = idle->idle_exit = 0ULL;
        idle->idle_count++;
+       account_idle_time(idle_time);
        smp_wmb();
        idle->sequence++;
 }
 
-void __kprobes vtime_stop_cpu(void)
-{
-       struct s390_idle_data *idle = &__get_cpu_var(s390_idle);
-       struct vtimer_queue *vq = &__get_cpu_var(virt_cpu_timer);
-       psw_t psw;
-
-       /* Wait for external, I/O or machine check interrupt. */
-       psw.mask = psw_kernel_bits | PSW_MASK_WAIT |
-               PSW_MASK_DAT | PSW_MASK_IO | PSW_MASK_EXT | PSW_MASK_MCHECK;
-
-       idle->nohz_delay = 0;
-
-       /* Check if the CPU timer needs to be reprogrammed. */
-       if (vq->do_spt) {
-               __u64 vmax = VTIMER_MAX_SLICE;
-               /*
-                * The inline assembly is equivalent to
-                *      vq->idle = get_cpu_timer();
-                *      set_cpu_timer(VTIMER_MAX_SLICE);
-                *      idle->idle_enter = get_clock();
-                *      __load_psw_mask(psw_kernel_bits | PSW_MASK_WAIT |
-                *                         PSW_MASK_DAT | PSW_MASK_IO |
-                *                         PSW_MASK_EXT | PSW_MASK_MCHECK);
-                * The difference is that the inline assembly makes sure that
-                * the last three instruction are stpt, stck and lpsw in that
-                * order. This is done to increase the precision.
-                */
-               asm volatile(
-#ifndef CONFIG_64BIT
-                       "       basr    1,0\n"
-                       "0:     ahi     1,1f-0b\n"
-                       "       st      1,4(%2)\n"
-#else /* CONFIG_64BIT */
-                       "       larl    1,1f\n"
-                       "       stg     1,8(%2)\n"
-#endif /* CONFIG_64BIT */
-                       "       stpt    0(%4)\n"
-                       "       spt     0(%5)\n"
-                       "       stck    0(%3)\n"
-#ifndef CONFIG_64BIT
-                       "       lpsw    0(%2)\n"
-#else /* CONFIG_64BIT */
-                       "       lpswe   0(%2)\n"
-#endif /* CONFIG_64BIT */
-                       "1:"
-                       : "=m" (idle->idle_enter), "=m" (vq->idle)
-                       : "a" (&psw), "a" (&idle->idle_enter),
-                         "a" (&vq->idle), "a" (&vmax), "m" (vmax), "m" (psw)
-                       : "memory", "cc", "1");
-       } else {
-               /*
-                * The inline assembly is equivalent to
-                *      vq->idle = get_cpu_timer();
-                *      idle->idle_enter = get_clock();
-                *      __load_psw_mask(psw_kernel_bits | PSW_MASK_WAIT |
-                *                         PSW_MASK_DAT | PSW_MASK_IO |
-                *                         PSW_MASK_EXT | PSW_MASK_MCHECK);
-                * The difference is that the inline assembly makes sure that
-                * the last three instruction are stpt, stck and lpsw in that
-                * order. This is done to increase the precision.
-                */
-               asm volatile(
-#ifndef CONFIG_64BIT
-                       "       basr    1,0\n"
-                       "0:     ahi     1,1f-0b\n"
-                       "       st      1,4(%2)\n"
-#else /* CONFIG_64BIT */
-                       "       larl    1,1f\n"
-                       "       stg     1,8(%2)\n"
-#endif /* CONFIG_64BIT */
-                       "       stpt    0(%4)\n"
-                       "       stck    0(%3)\n"
-#ifndef CONFIG_64BIT
-                       "       lpsw    0(%2)\n"
-#else /* CONFIG_64BIT */
-                       "       lpswe   0(%2)\n"
-#endif /* CONFIG_64BIT */
-                       "1:"
-                       : "=m" (idle->idle_enter), "=m" (vq->idle)
-                       : "a" (&psw), "a" (&idle->idle_enter),
-                         "a" (&vq->idle), "m" (psw)
-                       : "memory", "cc", "1");
-       }
-}
-
 cputime64_t s390_get_idle_time(int cpu)
 {
-       struct s390_idle_data *idle;
-       unsigned long long now, idle_time, idle_enter;
+       struct s390_idle_data *idle = &per_cpu(s390_idle, cpu);
+       unsigned long long now, idle_enter, idle_exit;
        unsigned int sequence;
 
-       idle = &per_cpu(s390_idle, cpu);
-
-       now = get_clock();
-repeat:
-       sequence = idle->sequence;
-       smp_rmb();
-       if (sequence & 1)
-               goto repeat;
-       idle_time = 0;
-       idle_enter = idle->idle_enter;
-       if (idle_enter != 0ULL && idle_enter < now)
-               idle_time = now - idle_enter;
-       smp_rmb();
-       if (idle->sequence != sequence)
-               goto repeat;
-       return idle_time;
+       do {
+               now = get_clock();
+               sequence = ACCESS_ONCE(idle->sequence);
+               idle_enter = ACCESS_ONCE(idle->idle_enter);
+               idle_exit = ACCESS_ONCE(idle->idle_exit);
+       } while ((sequence & 1) || (idle->sequence != sequence));
+       return idle_enter ? ((idle_exit ? : now) - idle_enter) : 0;
 }
 
 /*
@@ -346,7 +247,6 @@ static void do_cpu_timer_interrupt(unsigned int ext_int_code,
        }
        spin_unlock(&vq->lock);
 
-       vq->do_spt = list_empty(&cb_list);
        do_callbacks(&cb_list);
 
        /* next event is first in list */
@@ -355,8 +255,7 @@ static void do_cpu_timer_interrupt(unsigned int ext_int_code,
        if (!list_empty(&vq->list)) {
                event = list_first_entry(&vq->list, struct vtimer_list, entry);
                next = event->expires;
-       } else
-               vq->do_spt = 0;
+       }
        spin_unlock(&vq->lock);
        /*
         * To improve precision add the time spent by the
index db92f044024c4508993fd4b0e7c67c9b74e37b0e..9f1f71e857784096b76ddf88f92c5464813a39b4 100644 (file)
@@ -13,6 +13,7 @@
 #include <linux/irqflags.h>
 #include <linux/interrupt.h>
 #include <asm/div64.h>
+#include <asm/timer.h>
 
 void __delay(unsigned long loops)
 {
@@ -28,36 +29,33 @@ void __delay(unsigned long loops)
 
 static void __udelay_disabled(unsigned long long usecs)
 {
-       unsigned long mask, cr0, cr0_saved;
-       u64 clock_saved;
-       u64 end;
+       unsigned long cr0, cr6, new;
+       u64 clock_saved, end;
 
-       mask = psw_kernel_bits | PSW_MASK_DAT | PSW_MASK_WAIT |
-               PSW_MASK_EXT | PSW_MASK_MCHECK;
        end = get_clock() + (usecs << 12);
        clock_saved = local_tick_disable();
-       __ctl_store(cr0_saved, 0, 0);
-       cr0 = (cr0_saved & 0xffff00e0) | 0x00000800;
-       __ctl_load(cr0 , 0, 0);
+       __ctl_store(cr0, 0, 0);
+       __ctl_store(cr6, 6, 6);
+       new = (cr0 &  0xffff00e0) | 0x00000800;
+       __ctl_load(new , 0, 0);
+       new = 0;
+       __ctl_load(new, 6, 6);
        lockdep_off();
        do {
                set_clock_comparator(end);
-               trace_hardirqs_on();
-               __load_psw_mask(mask);
+               vtime_stop_cpu();
                local_irq_disable();
        } while (get_clock() < end);
        lockdep_on();
-       __ctl_load(cr0_saved, 0, 0);
+       __ctl_load(cr0, 0, 0);
+       __ctl_load(cr6, 6, 6);
        local_tick_enable(clock_saved);
 }
 
 static void __udelay_enabled(unsigned long long usecs)
 {
-       unsigned long mask;
-       u64 clock_saved;
-       u64 end;
+       u64 clock_saved, end;
 
-       mask = psw_kernel_bits | PSW_MASK_WAIT | PSW_MASK_EXT | PSW_MASK_IO;
        end = get_clock() + (usecs << 12);
        do {
                clock_saved = 0;
@@ -65,8 +63,7 @@ static void __udelay_enabled(unsigned long long usecs)
                        clock_saved = local_tick_disable();
                        set_clock_comparator(end);
                }
-               trace_hardirqs_on();
-               __load_psw_mask(mask);
+               vtime_stop_cpu();
                local_irq_disable();
                if (clock_saved)
                        local_tick_enable(clock_saved);
index dc67c397449e529f7cb377f776df6784d72944c1..a49c46c9198367dafb29aa583c7d62ca865681b9 100644 (file)
@@ -601,8 +601,6 @@ void __irq_entry do_IRQ(struct pt_regs *regs)
        struct pt_regs *old_regs;
 
        old_regs = set_irq_regs(regs);
-       s390_idle_check(regs, S390_lowcore.int_clock,
-                       S390_lowcore.async_enter_timer);
        irq_enter();
        __this_cpu_write(s390_idle.nohz_delay, 1);
        if (S390_lowcore.int_clock >= S390_lowcore.clock_comparator)