Merge branch 'x86/urgent' into x86/xen
authorIngo Molnar <mingo@elte.hu>
Fri, 8 May 2009 08:50:00 +0000 (10:50 +0200)
committerIngo Molnar <mingo@elte.hu>
Fri, 8 May 2009 08:50:00 +0000 (10:50 +0200)
Conflicts:
arch/frv/include/asm/pgtable.h
arch/x86/include/asm/required-features.h
arch/x86/xen/mmu.c

Merge reason: x86/xen was on a .29 base still, move it to a fresher
              branch and pick up Xen fixes as well, plus resolve
              conflicts

Signed-off-by: Ingo Molnar <mingo@elte.hu>
1  2 
arch/x86/include/asm/paravirt.h
arch/x86/lguest/boot.c
arch/x86/mm/pageattr.c
arch/x86/xen/mmu.c
kernel/sched.c

index bc384be6aa44d404ca12ce7ec350bee248dd6df4,378e3691c08c54dd76e060eb468a00e8b61c59f9..1fe583783792800da28de628b5bc9dea69382b28
@@@ -56,7 -56,6 +56,7 @@@ struct desc_ptr
  struct tss_struct;
  struct mm_struct;
  struct desc_struct;
 +struct task_struct;
  
  /*
   * Wrapper type for pointers to code which uses the non-standard
@@@ -204,8 -203,7 +204,8 @@@ struct pv_cpu_ops 
  
        void (*swapgs)(void);
  
 -      struct pv_lazy_ops lazy_mode;
 +      void (*start_context_switch)(struct task_struct *prev);
 +      void (*end_context_switch)(struct task_struct *next);
  };
  
  struct pv_irq_ops {
@@@ -349,7 -347,7 +349,7 @@@ struct pv_mmu_ops 
        /* Sometimes the physical address is a pfn, and sometimes its
           an mfn.  We can tell which is which from the index. */
        void (*set_fixmap)(unsigned /* enum fixed_addresses */ idx,
-                          unsigned long phys, pgprot_t flags);
+                          phys_addr_t phys, pgprot_t flags);
  };
  
  struct raw_spinlock;
@@@ -1401,23 -1399,25 +1401,23 @@@ enum paravirt_lazy_mode 
  };
  
  enum paravirt_lazy_mode paravirt_get_lazy_mode(void);
 -void paravirt_enter_lazy_cpu(void);
 -void paravirt_leave_lazy_cpu(void);
 +void paravirt_start_context_switch(struct task_struct *prev);
 +void paravirt_end_context_switch(struct task_struct *next);
 +
  void paravirt_enter_lazy_mmu(void);
  void paravirt_leave_lazy_mmu(void);
 -void paravirt_leave_lazy(enum paravirt_lazy_mode mode);
  
 -#define  __HAVE_ARCH_ENTER_LAZY_CPU_MODE
 -static inline void arch_enter_lazy_cpu_mode(void)
 +#define  __HAVE_ARCH_START_CONTEXT_SWITCH
 +static inline void arch_start_context_switch(struct task_struct *prev)
  {
 -      PVOP_VCALL0(pv_cpu_ops.lazy_mode.enter);
 +      PVOP_VCALL1(pv_cpu_ops.start_context_switch, prev);
  }
  
 -static inline void arch_leave_lazy_cpu_mode(void)
 +static inline void arch_end_context_switch(struct task_struct *next)
  {
 -      PVOP_VCALL0(pv_cpu_ops.lazy_mode.leave);
 +      PVOP_VCALL1(pv_cpu_ops.end_context_switch, next);
  }
  
 -void arch_flush_lazy_cpu_mode(void);
 -
  #define  __HAVE_ARCH_ENTER_LAZY_MMU_MODE
  static inline void arch_enter_lazy_mmu_mode(void)
  {
@@@ -1432,7 -1432,7 +1432,7 @@@ static inline void arch_leave_lazy_mmu_
  void arch_flush_lazy_mmu_mode(void);
  
  static inline void __set_fixmap(unsigned /* enum fixed_addresses */ idx,
-                               unsigned long phys, pgprot_t flags)
+                               phys_addr_t phys, pgprot_t flags)
  {
        pv_mmu_ops.set_fixmap(idx, phys, flags);
  }
diff --combined arch/x86/lguest/boot.c
index cfb2d68dc7959a6767696f509418f115a1307a07,ca7ec44bafc3b313aa1e3919339042f8ed53cf20..8f935c6d5512232dd44faf794da816313691f2c4
@@@ -166,16 -166,10 +166,16 @@@ static void lazy_hcall3(unsigned long c
  
  /* When lazy mode is turned off reset the per-cpu lazy mode variable and then
   * issue the do-nothing hypercall to flush any stored calls. */
 -static void lguest_leave_lazy_mode(void)
 +static void lguest_leave_lazy_mmu_mode(void)
  {
 -      paravirt_leave_lazy(paravirt_get_lazy_mode());
        kvm_hypercall0(LHCALL_FLUSH_ASYNC);
 +      paravirt_leave_lazy_mmu();
 +}
 +
 +static void lguest_end_context_switch(struct task_struct *next)
 +{
 +      kvm_hypercall0(LHCALL_FLUSH_ASYNC);
 +      paravirt_end_context_switch(next);
  }
  
  /*G:033
@@@ -279,15 -273,15 +279,15 @@@ static void lguest_load_idt(const struc
   * controls the entire thing and the Guest asks it to make changes using the
   * LOAD_GDT hypercall.
   *
-  * This is the opposite of the IDT code where we have a LOAD_IDT_ENTRY
-  * hypercall and use that repeatedly to load a new IDT.  I don't think it
-  * really matters, but wouldn't it be nice if they were the same?  Wouldn't
-  * it be even better if you were the one to send the patch to fix it?
+  * This is the exactly like the IDT code.
   */
  static void lguest_load_gdt(const struct desc_ptr *desc)
  {
-       BUG_ON((desc->size + 1) / 8 != GDT_ENTRIES);
-       kvm_hypercall2(LHCALL_LOAD_GDT, __pa(desc->address), GDT_ENTRIES);
+       unsigned int i;
+       struct desc_struct *gdt = (void *)desc->address;
+       for (i = 0; i < (desc->size+1)/8; i++)
+               kvm_hypercall3(LHCALL_LOAD_GDT_ENTRY, i, gdt[i].a, gdt[i].b);
  }
  
  /* For a single GDT entry which changes, we do the lazy thing: alter our GDT,
@@@ -297,7 -291,9 +297,9 @@@ static void lguest_write_gdt_entry(stru
                                   const void *desc, int type)
  {
        native_write_gdt_entry(dt, entrynum, desc, type);
-       kvm_hypercall2(LHCALL_LOAD_GDT, __pa(dt), GDT_ENTRIES);
+       /* Tell Host about this new entry. */
+       kvm_hypercall3(LHCALL_LOAD_GDT_ENTRY, entrynum,
+                      dt[entrynum].a, dt[entrynum].b);
  }
  
  /* OK, I lied.  There are three "thread local storage" GDT entries which change
@@@ -667,7 -663,7 +669,7 @@@ static unsigned long lguest_tsc_khz(voi
  
  /* If we can't use the TSC, the kernel falls back to our lower-priority
   * "lguest_clock", where we read the time value given to us by the Host. */
- static cycle_t lguest_clock_read(void)
+ static cycle_t lguest_clock_read(struct clocksource *cs)
  {
        unsigned long sec, nsec;
  
@@@ -1057,8 -1053,8 +1059,8 @@@ __init void lguest_init(void
        pv_cpu_ops.write_gdt_entry = lguest_write_gdt_entry;
        pv_cpu_ops.write_idt_entry = lguest_write_idt_entry;
        pv_cpu_ops.wbinvd = lguest_wbinvd;
 -      pv_cpu_ops.lazy_mode.enter = paravirt_enter_lazy_cpu;
 -      pv_cpu_ops.lazy_mode.leave = lguest_leave_lazy_mode;
 +      pv_cpu_ops.start_context_switch = paravirt_start_context_switch;
 +      pv_cpu_ops.end_context_switch = lguest_end_context_switch;
  
        /* pagetable management */
        pv_mmu_ops.write_cr3 = lguest_write_cr3;
        pv_mmu_ops.read_cr2 = lguest_read_cr2;
        pv_mmu_ops.read_cr3 = lguest_read_cr3;
        pv_mmu_ops.lazy_mode.enter = paravirt_enter_lazy_mmu;
 -      pv_mmu_ops.lazy_mode.leave = lguest_leave_lazy_mode;
 +      pv_mmu_ops.lazy_mode.leave = lguest_leave_lazy_mmu_mode;
        pv_mmu_ops.pte_update = lguest_pte_update;
        pv_mmu_ops.pte_update_defer = lguest_pte_update;
  
diff --combined arch/x86/mm/pageattr.c
index 660cac75ae11e489e2d443bee8d9a78d37eb8178,797f9f107cb6871a3797680c7dbed9430cb0f354..b81b41a0481f8a42924c78b036ab2630722fefeb
@@@ -844,6 -844,13 +844,6 @@@ static int change_page_attr_set_clr(uns
  
        vm_unmap_aliases();
  
 -      /*
 -       * If we're called with lazy mmu updates enabled, the
 -       * in-memory pte state may be stale.  Flush pending updates to
 -       * bring them up to date.
 -       */
 -      arch_flush_lazy_mmu_mode();
 -
        cpa.vaddr = addr;
        cpa.pages = pages;
        cpa.numpages = numpages;
        } else
                cpa_flush_all(cache);
  
 -      /*
 -       * If we've been called with lazy mmu updates enabled, then
 -       * make sure that everything gets flushed out before we
 -       * return.
 -       */
 -      arch_flush_lazy_mmu_mode();
 -
  out:
        return ret;
  }
@@@ -931,71 -945,94 +931,94 @@@ int _set_memory_uc(unsigned long addr, 
  
  int set_memory_uc(unsigned long addr, int numpages)
  {
+       int ret;
        /*
         * for now UC MINUS. see comments in ioremap_nocache()
         */
-       if (reserve_memtype(__pa(addr), __pa(addr) + numpages * PAGE_SIZE,
-                           _PAGE_CACHE_UC_MINUS, NULL))
-               return -EINVAL;
+       ret = reserve_memtype(__pa(addr), __pa(addr) + numpages * PAGE_SIZE,
+                           _PAGE_CACHE_UC_MINUS, NULL);
+       if (ret)
+               goto out_err;
+       ret = _set_memory_uc(addr, numpages);
+       if (ret)
+               goto out_free;
  
-       return _set_memory_uc(addr, numpages);
+       return 0;
+ out_free:
+       free_memtype(__pa(addr), __pa(addr) + numpages * PAGE_SIZE);
+ out_err:
+       return ret;
  }
  EXPORT_SYMBOL(set_memory_uc);
  
  int set_memory_array_uc(unsigned long *addr, int addrinarray)
  {
-       unsigned long start;
-       unsigned long end;
-       int i;
+       int i, j;
+       int ret;
        /*
         * for now UC MINUS. see comments in ioremap_nocache()
         */
        for (i = 0; i < addrinarray; i++) {
-               start = __pa(addr[i]);
-               for (end = start + PAGE_SIZE; i < addrinarray - 1; end += PAGE_SIZE) {
-                       if (end != __pa(addr[i + 1]))
-                               break;
-                       i++;
-               }
-               if (reserve_memtype(start, end, _PAGE_CACHE_UC_MINUS, NULL))
-                       goto out;
+               ret = reserve_memtype(__pa(addr[i]), __pa(addr[i]) + PAGE_SIZE,
+                                       _PAGE_CACHE_UC_MINUS, NULL);
+               if (ret)
+                       goto out_free;
        }
  
-       return change_page_attr_set(addr, addrinarray,
+       ret = change_page_attr_set(addr, addrinarray,
                                    __pgprot(_PAGE_CACHE_UC_MINUS), 1);
- out:
-       for (i = 0; i < addrinarray; i++) {
-               unsigned long tmp = __pa(addr[i]);
-               if (tmp == start)
-                       break;
-               for (end = tmp + PAGE_SIZE; i < addrinarray - 1; end += PAGE_SIZE) {
-                       if (end != __pa(addr[i + 1]))
-                               break;
-                       i++;
-               }
-               free_memtype(tmp, end);
-       }
-       return -EINVAL;
+       if (ret)
+               goto out_free;
+       return 0;
+ out_free:
+       for (j = 0; j < i; j++)
+               free_memtype(__pa(addr[j]), __pa(addr[j]) + PAGE_SIZE);
+       return ret;
  }
  EXPORT_SYMBOL(set_memory_array_uc);
  
  int _set_memory_wc(unsigned long addr, int numpages)
  {
-       return change_page_attr_set(&addr, numpages,
+       int ret;
+       ret = change_page_attr_set(&addr, numpages,
+                                   __pgprot(_PAGE_CACHE_UC_MINUS), 0);
+       if (!ret) {
+               ret = change_page_attr_set(&addr, numpages,
                                    __pgprot(_PAGE_CACHE_WC), 0);
+       }
+       return ret;
  }
  
  int set_memory_wc(unsigned long addr, int numpages)
  {
+       int ret;
        if (!pat_enabled)
                return set_memory_uc(addr, numpages);
  
-       if (reserve_memtype(__pa(addr), __pa(addr) + numpages * PAGE_SIZE,
-               _PAGE_CACHE_WC, NULL))
-               return -EINVAL;
+       ret = reserve_memtype(__pa(addr), __pa(addr) + numpages * PAGE_SIZE,
+               _PAGE_CACHE_WC, NULL);
+       if (ret)
+               goto out_err;
+       ret = _set_memory_wc(addr, numpages);
+       if (ret)
+               goto out_free;
+       return 0;
  
-       return _set_memory_wc(addr, numpages);
+ out_free:
+       free_memtype(__pa(addr), __pa(addr) + numpages * PAGE_SIZE);
+ out_err:
+       return ret;
  }
  EXPORT_SYMBOL(set_memory_wc);
  
@@@ -1007,29 -1044,31 +1030,31 @@@ int _set_memory_wb(unsigned long addr, 
  
  int set_memory_wb(unsigned long addr, int numpages)
  {
-       free_memtype(__pa(addr), __pa(addr) + numpages * PAGE_SIZE);
+       int ret;
+       ret = _set_memory_wb(addr, numpages);
+       if (ret)
+               return ret;
  
-       return _set_memory_wb(addr, numpages);
+       free_memtype(__pa(addr), __pa(addr) + numpages * PAGE_SIZE);
+       return 0;
  }
  EXPORT_SYMBOL(set_memory_wb);
  
  int set_memory_array_wb(unsigned long *addr, int addrinarray)
  {
        int i;
+       int ret;
  
-       for (i = 0; i < addrinarray; i++) {
-               unsigned long start = __pa(addr[i]);
-               unsigned long end;
-               for (end = start + PAGE_SIZE; i < addrinarray - 1; end += PAGE_SIZE) {
-                       if (end != __pa(addr[i + 1]))
-                               break;
-                       i++;
-               }
-               free_memtype(start, end);
-       }
-       return change_page_attr_clear(addr, addrinarray,
+       ret = change_page_attr_clear(addr, addrinarray,
                                      __pgprot(_PAGE_CACHE_MASK), 1);
+       if (ret)
+               return ret;
+       for (i = 0; i < addrinarray; i++)
+               free_memtype(__pa(addr[i]), __pa(addr[i]) + PAGE_SIZE);
+       return 0;
  }
  EXPORT_SYMBOL(set_memory_array_wb);
  
@@@ -1122,6 -1161,8 +1147,8 @@@ int set_pages_array_wb(struct page **pa
  
        retval = cpa_clear_pages_array(pages, addrinarray,
                        __pgprot(_PAGE_CACHE_MASK));
+       if (retval)
+               return retval;
  
        for (i = 0; i < addrinarray; i++) {
                start = (unsigned long)page_address(pages[i]);
                free_memtype(start, end);
        }
  
-       return retval;
+       return 0;
  }
  EXPORT_SYMBOL(set_pages_array_wb);
  
diff --combined arch/x86/xen/mmu.c
index a96f5b9393ea9b8a0540660e9993723b5ff662be,e25a78e1113a11f8b2c057508697d48b3d99b2af..760e3a512059053041db5a9a0216ad5d4e203ca8
@@@ -451,6 -451,10 +451,6 @@@ void set_pte_mfn(unsigned long vaddr, u
  void xen_set_pte_at(struct mm_struct *mm, unsigned long addr,
                    pte_t *ptep, pte_t pteval)
  {
 -      /* updates to init_mm may be done without lock */
 -      if (mm == &init_mm)
 -              preempt_disable();
 -
        ADD_STATS(set_pte_at, 1);
  //    ADD_STATS(set_pte_at_pinned, xen_page_pinned(ptep));
        ADD_STATS(set_pte_at_current, mm == current->mm);
        }
        xen_set_pte(ptep, pteval);
  
 -out:
 -      if (mm == &init_mm)
 -              preempt_enable();
 +out:  return;
  }
  
  pte_t xen_ptep_modify_prot_start(struct mm_struct *mm,
@@@ -1145,8 -1151,10 +1145,8 @@@ static void drop_other_mm_ref(void *inf
  
        /* If this cpu still has a stale cr3 reference, then make sure
           it has been flushed. */
 -      if (percpu_read(xen_current_cr3) == __pa(mm->pgd)) {
 +      if (percpu_read(xen_current_cr3) == __pa(mm->pgd))
                load_cr3(swapper_pg_dir);
 -              arch_flush_lazy_cpu_mode();
 -      }
  }
  
  static void xen_drop_mm_ref(struct mm_struct *mm)
                        load_cr3(swapper_pg_dir);
                else
                        leave_mm(smp_processor_id());
 -              arch_flush_lazy_cpu_mode();
        }
  
        /* Get the "official" set of cpus referring to our pagetable. */
@@@ -1785,11 -1794,16 +1785,16 @@@ __init pgd_t *xen_setup_kernel_pagetabl
  
        pin_pagetable_pfn(MMUEXT_PIN_L3_TABLE, PFN_DOWN(__pa(swapper_pg_dir)));
  
+       reserve_early(__pa(xen_start_info->pt_base),
+                     __pa(xen_start_info->pt_base +
+                          xen_start_info->nr_pt_frames * PAGE_SIZE),
+                     "XEN PAGETABLES");
        return swapper_pg_dir;
  }
  #endif        /* CONFIG_X86_64 */
  
- static void xen_set_fixmap(unsigned idx, unsigned long phys, pgprot_t prot)
+ static void xen_set_fixmap(unsigned idx, phys_addr_t phys, pgprot_t prot)
  {
        pte_t pte;
  
@@@ -1861,14 -1875,6 +1866,14 @@@ __init void xen_post_allocator_init(voi
        xen_mark_init_mm_pinned();
  }
  
 +static void xen_leave_lazy_mmu(void)
 +{
 +      preempt_disable();
 +      xen_mc_flush();
 +      paravirt_leave_lazy_mmu();
 +      preempt_enable();
 +}
 +
  const struct pv_mmu_ops xen_mmu_ops __initdata = {
        .pagetable_setup_start = xen_pagetable_setup_start,
        .pagetable_setup_done = xen_pagetable_setup_done,
  
        .lazy_mode = {
                .enter = paravirt_enter_lazy_mmu,
 -              .leave = xen_leave_lazy,
 +              .leave = xen_leave_lazy_mmu,
        },
  
        .set_fixmap = xen_set_fixmap,
diff --combined kernel/sched.c
index b38bd96098f62da8154a9edf048ca15f0c95d6b6,26efa475bdc143f6e4459067c18ce57e71608764..9e0fd1ef1a47425b1388d19af05b8d4b2bab1b7f
@@@ -1418,10 -1418,22 +1418,22 @@@ iter_move_one_task(struct rq *this_rq, 
                   struct rq_iterator *iterator);
  #endif
  
+ /* Time spent by the tasks of the cpu accounting group executing in ... */
+ enum cpuacct_stat_index {
+       CPUACCT_STAT_USER,      /* ... user mode */
+       CPUACCT_STAT_SYSTEM,    /* ... kernel mode */
+       CPUACCT_STAT_NSTATS,
+ };
  #ifdef CONFIG_CGROUP_CPUACCT
  static void cpuacct_charge(struct task_struct *tsk, u64 cputime);
+ static void cpuacct_update_stats(struct task_struct *tsk,
+               enum cpuacct_stat_index idx, cputime_t val);
  #else
  static inline void cpuacct_charge(struct task_struct *tsk, u64 cputime) {}
+ static inline void cpuacct_update_stats(struct task_struct *tsk,
+               enum cpuacct_stat_index idx, cputime_t val) {}
  #endif
  
  static inline void inc_cpu_load(struct rq *rq, unsigned long load)
@@@ -2754,7 -2766,7 +2766,7 @@@ context_switch(struct rq *rq, struct ta
         * combine the page table reload and the switch backend into
         * one hypercall.
         */
 -      arch_enter_lazy_cpu_mode();
 +      arch_start_context_switch(prev);
  
        if (unlikely(!mm)) {
                next->active_mm = oldmm;
@@@ -4511,9 -4523,25 +4523,25 @@@ DEFINE_PER_CPU(struct kernel_stat, ksta
  EXPORT_PER_CPU_SYMBOL(kstat);
  
  /*
-  * Return any ns on the sched_clock that have not yet been banked in
+  * Return any ns on the sched_clock that have not yet been accounted in
   * @p in case that task is currently running.
+  *
+  * Called with task_rq_lock() held on @rq.
   */
+ static u64 do_task_delta_exec(struct task_struct *p, struct rq *rq)
+ {
+       u64 ns = 0;
+       if (task_current(rq, p)) {
+               update_rq_clock(rq);
+               ns = rq->clock - p->se.exec_start;
+               if ((s64)ns < 0)
+                       ns = 0;
+       }
+       return ns;
+ }
  unsigned long long task_delta_exec(struct task_struct *p)
  {
        unsigned long flags;
        u64 ns = 0;
  
        rq = task_rq_lock(p, &flags);
+       ns = do_task_delta_exec(p, rq);
+       task_rq_unlock(rq, &flags);
  
-       if (task_current(rq, p)) {
-               u64 delta_exec;
+       return ns;
+ }
  
-               update_rq_clock(rq);
-               delta_exec = rq->clock - p->se.exec_start;
-               if ((s64)delta_exec > 0)
-                       ns = delta_exec;
-       }
+ /*
+  * Return accounted runtime for the task.
+  * In case the task is currently running, return the runtime plus current's
+  * pending runtime that have not been accounted yet.
+  */
+ unsigned long long task_sched_runtime(struct task_struct *p)
+ {
+       unsigned long flags;
+       struct rq *rq;
+       u64 ns = 0;
+       rq = task_rq_lock(p, &flags);
+       ns = p->se.sum_exec_runtime + do_task_delta_exec(p, rq);
+       task_rq_unlock(rq, &flags);
+       return ns;
+ }
+ /*
+  * Return sum_exec_runtime for the thread group.
+  * In case the task is currently running, return the sum plus current's
+  * pending runtime that have not been accounted yet.
+  *
+  * Note that the thread group might have other running tasks as well,
+  * so the return value not includes other pending runtime that other
+  * running tasks might have.
+  */
+ unsigned long long thread_group_sched_runtime(struct task_struct *p)
+ {
+       struct task_cputime totals;
+       unsigned long flags;
+       struct rq *rq;
+       u64 ns;
  
+       rq = task_rq_lock(p, &flags);
+       thread_group_cputime(p, &totals);
+       ns = totals.sum_exec_runtime + do_task_delta_exec(p, rq);
        task_rq_unlock(rq, &flags);
  
        return ns;
@@@ -4559,6 -4620,8 +4620,8 @@@ void account_user_time(struct task_stru
                cpustat->nice = cputime64_add(cpustat->nice, tmp);
        else
                cpustat->user = cputime64_add(cpustat->user, tmp);
+       cpuacct_update_stats(p, CPUACCT_STAT_USER, cputime);
        /* Account for user time used */
        acct_update_integrals(p);
  }
@@@ -4620,6 -4683,8 +4683,8 @@@ void account_system_time(struct task_st
        else
                cpustat->system = cputime64_add(cpustat->system, tmp);
  
+       cpuacct_update_stats(p, CPUACCT_STAT_SYSTEM, cputime);
        /* Account for system time used */
        acct_update_integrals(p);
  }
@@@ -4667,7 -4732,7 +4732,7 @@@ void account_process_tick(struct task_s
  
        if (user_tick)
                account_user_time(p, one_jiffy, one_jiffy_scaled);
-       else if (p != rq->idle)
+       else if ((p != rq->idle) || (irq_count() != HARDIRQ_OFFSET))
                account_system_time(p, HARDIRQ_OFFSET, one_jiffy,
                                    one_jiffy_scaled);
        else
@@@ -4781,7 -4846,7 +4846,7 @@@ void scheduler_tick(void
  #endif
  }
  
- unsigned long get_parent_ip(unsigned long addr)
notrace unsigned long get_parent_ip(unsigned long addr)
  {
        if (in_lock_functions(addr)) {
                addr = CALLER_ADDR2;
@@@ -7302,7 -7367,12 +7367,12 @@@ static int sched_domain_debug_one(struc
                cpumask_or(groupmask, groupmask, sched_group_cpus(group));
  
                cpulist_scnprintf(str, sizeof(str), sched_group_cpus(group));
                printk(KERN_CONT " %s", str);
+               if (group->__cpu_power != SCHED_LOAD_SCALE) {
+                       printk(KERN_CONT " (__cpu_power = %d)",
+                               group->__cpu_power);
+               }
  
                group = group->next;
        } while (group != sd->groups);
@@@ -9925,6 -9995,7 +9995,7 @@@ struct cpuacct 
        struct cgroup_subsys_state css;
        /* cpuusage holds pointer to a u64-type object on every cpu */
        u64 *cpuusage;
+       struct percpu_counter cpustat[CPUACCT_STAT_NSTATS];
        struct cpuacct *parent;
  };
  
@@@ -9949,20 -10020,32 +10020,32 @@@ static struct cgroup_subsys_state *cpua
        struct cgroup_subsys *ss, struct cgroup *cgrp)
  {
        struct cpuacct *ca = kzalloc(sizeof(*ca), GFP_KERNEL);
+       int i;
  
        if (!ca)
-               return ERR_PTR(-ENOMEM);
+               goto out;
  
        ca->cpuusage = alloc_percpu(u64);
-       if (!ca->cpuusage) {
-               kfree(ca);
-               return ERR_PTR(-ENOMEM);
-       }
+       if (!ca->cpuusage)
+               goto out_free_ca;
+       for (i = 0; i < CPUACCT_STAT_NSTATS; i++)
+               if (percpu_counter_init(&ca->cpustat[i], 0))
+                       goto out_free_counters;
  
        if (cgrp->parent)
                ca->parent = cgroup_ca(cgrp->parent);
  
        return &ca->css;
+ out_free_counters:
+       while (--i >= 0)
+               percpu_counter_destroy(&ca->cpustat[i]);
+       free_percpu(ca->cpuusage);
+ out_free_ca:
+       kfree(ca);
+ out:
+       return ERR_PTR(-ENOMEM);
  }
  
  /* destroy an existing cpu accounting group */
@@@ -9970,7 -10053,10 +10053,10 @@@ static voi
  cpuacct_destroy(struct cgroup_subsys *ss, struct cgroup *cgrp)
  {
        struct cpuacct *ca = cgroup_ca(cgrp);
+       int i;
  
+       for (i = 0; i < CPUACCT_STAT_NSTATS; i++)
+               percpu_counter_destroy(&ca->cpustat[i]);
        free_percpu(ca->cpuusage);
        kfree(ca);
  }
@@@ -10057,6 -10143,25 +10143,25 @@@ static int cpuacct_percpu_seq_read(stru
        return 0;
  }
  
+ static const char *cpuacct_stat_desc[] = {
+       [CPUACCT_STAT_USER] = "user",
+       [CPUACCT_STAT_SYSTEM] = "system",
+ };
+ static int cpuacct_stats_show(struct cgroup *cgrp, struct cftype *cft,
+               struct cgroup_map_cb *cb)
+ {
+       struct cpuacct *ca = cgroup_ca(cgrp);
+       int i;
+       for (i = 0; i < CPUACCT_STAT_NSTATS; i++) {
+               s64 val = percpu_counter_read(&ca->cpustat[i]);
+               val = cputime64_to_clock_t(val);
+               cb->fill(cb, cpuacct_stat_desc[i], val);
+       }
+       return 0;
+ }
  static struct cftype files[] = {
        {
                .name = "usage",
                .name = "usage_percpu",
                .read_seq_string = cpuacct_percpu_seq_read,
        },
+       {
+               .name = "stat",
+               .read_map = cpuacct_stats_show,
+       },
  };
  
  static int cpuacct_populate(struct cgroup_subsys *ss, struct cgroup *cgrp)
@@@ -10089,12 -10197,38 +10197,38 @@@ static void cpuacct_charge(struct task_
                return;
  
        cpu = task_cpu(tsk);
+       rcu_read_lock();
        ca = task_ca(tsk);
  
        for (; ca; ca = ca->parent) {
                u64 *cpuusage = per_cpu_ptr(ca->cpuusage, cpu);
                *cpuusage += cputime;
        }
+       rcu_read_unlock();
+ }
+ /*
+  * Charge the system/user time to the task's accounting group.
+  */
+ static void cpuacct_update_stats(struct task_struct *tsk,
+               enum cpuacct_stat_index idx, cputime_t val)
+ {
+       struct cpuacct *ca;
+       if (unlikely(!cpuacct_subsys.active))
+               return;
+       rcu_read_lock();
+       ca = task_ca(tsk);
+       do {
+               percpu_counter_add(&ca->cpustat[idx], val);
+               ca = ca->parent;
+       } while (ca);
+       rcu_read_unlock();
  }
  
  struct cgroup_subsys cpuacct_subsys = {