if INSTRUMENTATION
-source "arch/i386/oprofile/Kconfig"
+source "arch/x86/oprofile/Kconfig"
config KPROBES
bool "Kprobes"
drivers-$(CONFIG_MATH_EMULATION) += arch/x86/math-emu/
drivers-$(CONFIG_PCI) += arch/x86/pci/
# must be linked after kernel/
-drivers-$(CONFIG_OPROFILE) += arch/i386/oprofile/
+drivers-$(CONFIG_OPROFILE) += arch/x86/oprofile/
drivers-$(CONFIG_PM) += arch/x86/power/
drivers-$(CONFIG_FB) += arch/i386/video/
+++ /dev/null
-config PROFILING
- bool "Profiling support (EXPERIMENTAL)"
- help
- Say Y here to enable the extended profiling support mechanisms used
- by profilers such as OProfile.
-
-
-config OPROFILE
- tristate "OProfile system profiling (EXPERIMENTAL)"
- depends on PROFILING
- help
- OProfile is a profiling system capable of profiling the
- whole system, include the kernel, kernel modules, libraries,
- and applications.
-
- If unsure, say N.
-
+++ /dev/null
-obj-$(CONFIG_OPROFILE) += oprofile.o
-
-DRIVER_OBJS = $(addprefix ../../../drivers/oprofile/, \
- oprof.o cpu_buffer.o buffer_sync.o \
- event_buffer.o oprofile_files.o \
- oprofilefs.o oprofile_stats.o \
- timer_int.o )
-
-oprofile-y := $(DRIVER_OBJS) init.o backtrace.o
-oprofile-$(CONFIG_X86_LOCAL_APIC) += nmi_int.o op_model_athlon.o \
- op_model_ppro.o op_model_p4.o
-oprofile-$(CONFIG_X86_IO_APIC) += nmi_timer_int.o
+++ /dev/null
-/**
- * @file backtrace.c
- *
- * @remark Copyright 2002 OProfile authors
- * @remark Read the file COPYING
- *
- * @author John Levon
- * @author David Smith
- */
-
-#include <linux/oprofile.h>
-#include <linux/sched.h>
-#include <linux/mm.h>
-#include <asm/ptrace.h>
-#include <asm/uaccess.h>
-
-struct frame_head {
- struct frame_head * ebp;
- unsigned long ret;
-} __attribute__((packed));
-
-static struct frame_head *
-dump_kernel_backtrace(struct frame_head * head)
-{
- oprofile_add_trace(head->ret);
-
- /* frame pointers should strictly progress back up the stack
- * (towards higher addresses) */
- if (head >= head->ebp)
- return NULL;
-
- return head->ebp;
-}
-
-static struct frame_head *
-dump_user_backtrace(struct frame_head * head)
-{
- struct frame_head bufhead[2];
-
- /* Also check accessibility of one struct frame_head beyond */
- if (!access_ok(VERIFY_READ, head, sizeof(bufhead)))
- return NULL;
- if (__copy_from_user_inatomic(bufhead, head, sizeof(bufhead)))
- return NULL;
-
- oprofile_add_trace(bufhead[0].ret);
-
- /* frame pointers should strictly progress back up the stack
- * (towards higher addresses) */
- if (head >= bufhead[0].ebp)
- return NULL;
-
- return bufhead[0].ebp;
-}
-
-/*
- * | | /\ Higher addresses
- * | |
- * --------------- stack base (address of current_thread_info)
- * | thread info |
- * . .
- * | stack |
- * --------------- saved regs->ebp value if valid (frame_head address)
- * . .
- * --------------- saved regs->rsp value if x86_64
- * | |
- * --------------- struct pt_regs * stored on stack if 32-bit
- * | |
- * . .
- * | |
- * --------------- %esp
- * | |
- * | | \/ Lower addresses
- *
- * Thus, regs (or regs->rsp for x86_64) <-> stack base restricts the
- * valid(ish) ebp values. Note: (1) for x86_64, NMI and several other
- * exceptions use special stacks, maintained by the interrupt stack table
- * (IST). These stacks are set up in trap_init() in
- * arch/x86_64/kernel/traps.c. Thus, for x86_64, regs now does not point
- * to the kernel stack; instead, it points to some location on the NMI
- * stack. On the other hand, regs->rsp is the stack pointer saved when the
- * NMI occurred. (2) For 32-bit, regs->esp is not valid because the
- * processor does not save %esp on the kernel stack when interrupts occur
- * in the kernel mode.
- */
-#ifdef CONFIG_FRAME_POINTER
-static int valid_kernel_stack(struct frame_head * head, struct pt_regs * regs)
-{
- unsigned long headaddr = (unsigned long)head;
-#ifdef CONFIG_X86_64
- unsigned long stack = (unsigned long)regs->rsp;
-#else
- unsigned long stack = (unsigned long)regs;
-#endif
- unsigned long stack_base = (stack & ~(THREAD_SIZE - 1)) + THREAD_SIZE;
-
- return headaddr > stack && headaddr < stack_base;
-}
-#else
-/* without fp, it's just junk */
-static int valid_kernel_stack(struct frame_head * head, struct pt_regs * regs)
-{
- return 0;
-}
-#endif
-
-
-void
-x86_backtrace(struct pt_regs * const regs, unsigned int depth)
-{
- struct frame_head *head;
-
-#ifdef CONFIG_X86_64
- head = (struct frame_head *)regs->rbp;
-#else
- head = (struct frame_head *)regs->ebp;
-#endif
-
- if (!user_mode_vm(regs)) {
- while (depth-- && valid_kernel_stack(head, regs))
- head = dump_kernel_backtrace(head);
- return;
- }
-
- while (depth-- && head)
- head = dump_user_backtrace(head);
-}
+++ /dev/null
-/**
- * @file init.c
- *
- * @remark Copyright 2002 OProfile authors
- * @remark Read the file COPYING
- *
- * @author John Levon <levon@movementarian.org>
- */
-
-#include <linux/oprofile.h>
-#include <linux/init.h>
-#include <linux/errno.h>
-
-/* We support CPUs that have performance counters like the Pentium Pro
- * with the NMI mode driver.
- */
-
-extern int op_nmi_init(struct oprofile_operations * ops);
-extern int op_nmi_timer_init(struct oprofile_operations * ops);
-extern void op_nmi_exit(void);
-extern void x86_backtrace(struct pt_regs * const regs, unsigned int depth);
-
-
-int __init oprofile_arch_init(struct oprofile_operations * ops)
-{
- int ret;
-
- ret = -ENODEV;
-
-#ifdef CONFIG_X86_LOCAL_APIC
- ret = op_nmi_init(ops);
-#endif
-#ifdef CONFIG_X86_IO_APIC
- if (ret < 0)
- ret = op_nmi_timer_init(ops);
-#endif
- ops->backtrace = x86_backtrace;
-
- return ret;
-}
-
-
-void oprofile_arch_exit(void)
-{
-#ifdef CONFIG_X86_LOCAL_APIC
- op_nmi_exit();
-#endif
-}
+++ /dev/null
-/**
- * @file nmi_int.c
- *
- * @remark Copyright 2002 OProfile authors
- * @remark Read the file COPYING
- *
- * @author John Levon <levon@movementarian.org>
- */
-
-#include <linux/init.h>
-#include <linux/notifier.h>
-#include <linux/smp.h>
-#include <linux/oprofile.h>
-#include <linux/sysdev.h>
-#include <linux/slab.h>
-#include <linux/moduleparam.h>
-#include <linux/kdebug.h>
-#include <asm/nmi.h>
-#include <asm/msr.h>
-#include <asm/apic.h>
-
-#include "op_counter.h"
-#include "op_x86_model.h"
-
-static struct op_x86_model_spec const * model;
-static struct op_msrs cpu_msrs[NR_CPUS];
-static unsigned long saved_lvtpc[NR_CPUS];
-
-static int nmi_start(void);
-static void nmi_stop(void);
-
-/* 0 == registered but off, 1 == registered and on */
-static int nmi_enabled = 0;
-
-#ifdef CONFIG_PM
-
-static int nmi_suspend(struct sys_device *dev, pm_message_t state)
-{
- if (nmi_enabled == 1)
- nmi_stop();
- return 0;
-}
-
-
-static int nmi_resume(struct sys_device *dev)
-{
- if (nmi_enabled == 1)
- nmi_start();
- return 0;
-}
-
-
-static struct sysdev_class oprofile_sysclass = {
- set_kset_name("oprofile"),
- .resume = nmi_resume,
- .suspend = nmi_suspend,
-};
-
-
-static struct sys_device device_oprofile = {
- .id = 0,
- .cls = &oprofile_sysclass,
-};
-
-
-static int __init init_sysfs(void)
-{
- int error;
- if (!(error = sysdev_class_register(&oprofile_sysclass)))
- error = sysdev_register(&device_oprofile);
- return error;
-}
-
-
-static void exit_sysfs(void)
-{
- sysdev_unregister(&device_oprofile);
- sysdev_class_unregister(&oprofile_sysclass);
-}
-
-#else
-#define init_sysfs() do { } while (0)
-#define exit_sysfs() do { } while (0)
-#endif /* CONFIG_PM */
-
-static int profile_exceptions_notify(struct notifier_block *self,
- unsigned long val, void *data)
-{
- struct die_args *args = (struct die_args *)data;
- int ret = NOTIFY_DONE;
- int cpu = smp_processor_id();
-
- switch(val) {
- case DIE_NMI:
- if (model->check_ctrs(args->regs, &cpu_msrs[cpu]))
- ret = NOTIFY_STOP;
- break;
- default:
- break;
- }
- return ret;
-}
-
-static void nmi_cpu_save_registers(struct op_msrs * msrs)
-{
- unsigned int const nr_ctrs = model->num_counters;
- unsigned int const nr_ctrls = model->num_controls;
- struct op_msr * counters = msrs->counters;
- struct op_msr * controls = msrs->controls;
- unsigned int i;
-
- for (i = 0; i < nr_ctrs; ++i) {
- if (counters[i].addr){
- rdmsr(counters[i].addr,
- counters[i].saved.low,
- counters[i].saved.high);
- }
- }
-
- for (i = 0; i < nr_ctrls; ++i) {
- if (controls[i].addr){
- rdmsr(controls[i].addr,
- controls[i].saved.low,
- controls[i].saved.high);
- }
- }
-}
-
-
-static void nmi_save_registers(void * dummy)
-{
- int cpu = smp_processor_id();
- struct op_msrs * msrs = &cpu_msrs[cpu];
- nmi_cpu_save_registers(msrs);
-}
-
-
-static void free_msrs(void)
-{
- int i;
- for_each_possible_cpu(i) {
- kfree(cpu_msrs[i].counters);
- cpu_msrs[i].counters = NULL;
- kfree(cpu_msrs[i].controls);
- cpu_msrs[i].controls = NULL;
- }
-}
-
-
-static int allocate_msrs(void)
-{
- int success = 1;
- size_t controls_size = sizeof(struct op_msr) * model->num_controls;
- size_t counters_size = sizeof(struct op_msr) * model->num_counters;
-
- int i;
- for_each_possible_cpu(i) {
- cpu_msrs[i].counters = kmalloc(counters_size, GFP_KERNEL);
- if (!cpu_msrs[i].counters) {
- success = 0;
- break;
- }
- cpu_msrs[i].controls = kmalloc(controls_size, GFP_KERNEL);
- if (!cpu_msrs[i].controls) {
- success = 0;
- break;
- }
- }
-
- if (!success)
- free_msrs();
-
- return success;
-}
-
-
-static void nmi_cpu_setup(void * dummy)
-{
- int cpu = smp_processor_id();
- struct op_msrs * msrs = &cpu_msrs[cpu];
- spin_lock(&oprofilefs_lock);
- model->setup_ctrs(msrs);
- spin_unlock(&oprofilefs_lock);
- saved_lvtpc[cpu] = apic_read(APIC_LVTPC);
- apic_write(APIC_LVTPC, APIC_DM_NMI);
-}
-
-static struct notifier_block profile_exceptions_nb = {
- .notifier_call = profile_exceptions_notify,
- .next = NULL,
- .priority = 0
-};
-
-static int nmi_setup(void)
-{
- int err=0;
- int cpu;
-
- if (!allocate_msrs())
- return -ENOMEM;
-
- if ((err = register_die_notifier(&profile_exceptions_nb))){
- free_msrs();
- return err;
- }
-
- /* We need to serialize save and setup for HT because the subset
- * of msrs are distinct for save and setup operations
- */
-
- /* Assume saved/restored counters are the same on all CPUs */
- model->fill_in_addresses(&cpu_msrs[0]);
- for_each_possible_cpu (cpu) {
- if (cpu != 0) {
- memcpy(cpu_msrs[cpu].counters, cpu_msrs[0].counters,
- sizeof(struct op_msr) * model->num_counters);
-
- memcpy(cpu_msrs[cpu].controls, cpu_msrs[0].controls,
- sizeof(struct op_msr) * model->num_controls);
- }
-
- }
- on_each_cpu(nmi_save_registers, NULL, 0, 1);
- on_each_cpu(nmi_cpu_setup, NULL, 0, 1);
- nmi_enabled = 1;
- return 0;
-}
-
-
-static void nmi_restore_registers(struct op_msrs * msrs)
-{
- unsigned int const nr_ctrs = model->num_counters;
- unsigned int const nr_ctrls = model->num_controls;
- struct op_msr * counters = msrs->counters;
- struct op_msr * controls = msrs->controls;
- unsigned int i;
-
- for (i = 0; i < nr_ctrls; ++i) {
- if (controls[i].addr){
- wrmsr(controls[i].addr,
- controls[i].saved.low,
- controls[i].saved.high);
- }
- }
-
- for (i = 0; i < nr_ctrs; ++i) {
- if (counters[i].addr){
- wrmsr(counters[i].addr,
- counters[i].saved.low,
- counters[i].saved.high);
- }
- }
-}
-
-
-static void nmi_cpu_shutdown(void * dummy)
-{
- unsigned int v;
- int cpu = smp_processor_id();
- struct op_msrs * msrs = &cpu_msrs[cpu];
-
- /* restoring APIC_LVTPC can trigger an apic error because the delivery
- * mode and vector nr combination can be illegal. That's by design: on
- * power on apic lvt contain a zero vector nr which are legal only for
- * NMI delivery mode. So inhibit apic err before restoring lvtpc
- */
- v = apic_read(APIC_LVTERR);
- apic_write(APIC_LVTERR, v | APIC_LVT_MASKED);
- apic_write(APIC_LVTPC, saved_lvtpc[cpu]);
- apic_write(APIC_LVTERR, v);
- nmi_restore_registers(msrs);
- model->shutdown(msrs);
-}
-
-
-static void nmi_shutdown(void)
-{
- nmi_enabled = 0;
- on_each_cpu(nmi_cpu_shutdown, NULL, 0, 1);
- unregister_die_notifier(&profile_exceptions_nb);
- free_msrs();
-}
-
-
-static void nmi_cpu_start(void * dummy)
-{
- struct op_msrs const * msrs = &cpu_msrs[smp_processor_id()];
- model->start(msrs);
-}
-
-
-static int nmi_start(void)
-{
- on_each_cpu(nmi_cpu_start, NULL, 0, 1);
- return 0;
-}
-
-
-static void nmi_cpu_stop(void * dummy)
-{
- struct op_msrs const * msrs = &cpu_msrs[smp_processor_id()];
- model->stop(msrs);
-}
-
-
-static void nmi_stop(void)
-{
- on_each_cpu(nmi_cpu_stop, NULL, 0, 1);
-}
-
-
-struct op_counter_config counter_config[OP_MAX_COUNTER];
-
-static int nmi_create_files(struct super_block * sb, struct dentry * root)
-{
- unsigned int i;
-
- for (i = 0; i < model->num_counters; ++i) {
- struct dentry * dir;
- char buf[4];
-
- /* quick little hack to _not_ expose a counter if it is not
- * available for use. This should protect userspace app.
- * NOTE: assumes 1:1 mapping here (that counters are organized
- * sequentially in their struct assignment).
- */
- if (unlikely(!avail_to_resrv_perfctr_nmi_bit(i)))
- continue;
-
- snprintf(buf, sizeof(buf), "%d", i);
- dir = oprofilefs_mkdir(sb, root, buf);
- oprofilefs_create_ulong(sb, dir, "enabled", &counter_config[i].enabled);
- oprofilefs_create_ulong(sb, dir, "event", &counter_config[i].event);
- oprofilefs_create_ulong(sb, dir, "count", &counter_config[i].count);
- oprofilefs_create_ulong(sb, dir, "unit_mask", &counter_config[i].unit_mask);
- oprofilefs_create_ulong(sb, dir, "kernel", &counter_config[i].kernel);
- oprofilefs_create_ulong(sb, dir, "user", &counter_config[i].user);
- }
-
- return 0;
-}
-
-static int p4force;
-module_param(p4force, int, 0);
-
-static int __init p4_init(char ** cpu_type)
-{
- __u8 cpu_model = boot_cpu_data.x86_model;
-
- if (!p4force && (cpu_model > 6 || cpu_model == 5))
- return 0;
-
-#ifndef CONFIG_SMP
- *cpu_type = "i386/p4";
- model = &op_p4_spec;
- return 1;
-#else
- switch (smp_num_siblings) {
- case 1:
- *cpu_type = "i386/p4";
- model = &op_p4_spec;
- return 1;
-
- case 2:
- *cpu_type = "i386/p4-ht";
- model = &op_p4_ht2_spec;
- return 1;
- }
-#endif
-
- printk(KERN_INFO "oprofile: P4 HyperThreading detected with > 2 threads\n");
- printk(KERN_INFO "oprofile: Reverting to timer mode.\n");
- return 0;
-}
-
-
-static int __init ppro_init(char ** cpu_type)
-{
- __u8 cpu_model = boot_cpu_data.x86_model;
-
- if (cpu_model == 14)
- *cpu_type = "i386/core";
- else if (cpu_model == 15)
- *cpu_type = "i386/core_2";
- else if (cpu_model > 0xd)
- return 0;
- else if (cpu_model == 9) {
- *cpu_type = "i386/p6_mobile";
- } else if (cpu_model > 5) {
- *cpu_type = "i386/piii";
- } else if (cpu_model > 2) {
- *cpu_type = "i386/pii";
- } else {
- *cpu_type = "i386/ppro";
- }
-
- model = &op_ppro_spec;
- return 1;
-}
-
-/* in order to get sysfs right */
-static int using_nmi;
-
-int __init op_nmi_init(struct oprofile_operations *ops)
-{
- __u8 vendor = boot_cpu_data.x86_vendor;
- __u8 family = boot_cpu_data.x86;
- char *cpu_type;
-
- if (!cpu_has_apic)
- return -ENODEV;
-
- switch (vendor) {
- case X86_VENDOR_AMD:
- /* Needs to be at least an Athlon (or hammer in 32bit mode) */
-
- switch (family) {
- default:
- return -ENODEV;
- case 6:
- model = &op_athlon_spec;
- cpu_type = "i386/athlon";
- break;
- case 0xf:
- model = &op_athlon_spec;
- /* Actually it could be i386/hammer too, but give
- user space an consistent name. */
- cpu_type = "x86-64/hammer";
- break;
- case 0x10:
- model = &op_athlon_spec;
- cpu_type = "x86-64/family10";
- break;
- }
- break;
-
- case X86_VENDOR_INTEL:
- switch (family) {
- /* Pentium IV */
- case 0xf:
- if (!p4_init(&cpu_type))
- return -ENODEV;
- break;
-
- /* A P6-class processor */
- case 6:
- if (!ppro_init(&cpu_type))
- return -ENODEV;
- break;
-
- default:
- return -ENODEV;
- }
- break;
-
- default:
- return -ENODEV;
- }
-
- init_sysfs();
- using_nmi = 1;
- ops->create_files = nmi_create_files;
- ops->setup = nmi_setup;
- ops->shutdown = nmi_shutdown;
- ops->start = nmi_start;
- ops->stop = nmi_stop;
- ops->cpu_type = cpu_type;
- printk(KERN_INFO "oprofile: using NMI interrupt.\n");
- return 0;
-}
-
-
-void op_nmi_exit(void)
-{
- if (using_nmi)
- exit_sysfs();
-}
+++ /dev/null
-/**
- * @file nmi_timer_int.c
- *
- * @remark Copyright 2003 OProfile authors
- * @remark Read the file COPYING
- *
- * @author Zwane Mwaikambo <zwane@linuxpower.ca>
- */
-
-#include <linux/init.h>
-#include <linux/smp.h>
-#include <linux/errno.h>
-#include <linux/oprofile.h>
-#include <linux/rcupdate.h>
-#include <linux/kdebug.h>
-
-#include <asm/nmi.h>
-#include <asm/apic.h>
-#include <asm/ptrace.h>
-
-static int profile_timer_exceptions_notify(struct notifier_block *self,
- unsigned long val, void *data)
-{
- struct die_args *args = (struct die_args *)data;
- int ret = NOTIFY_DONE;
-
- switch(val) {
- case DIE_NMI:
- oprofile_add_sample(args->regs, 0);
- ret = NOTIFY_STOP;
- break;
- default:
- break;
- }
- return ret;
-}
-
-static struct notifier_block profile_timer_exceptions_nb = {
- .notifier_call = profile_timer_exceptions_notify,
- .next = NULL,
- .priority = 0
-};
-
-static int timer_start(void)
-{
- if (register_die_notifier(&profile_timer_exceptions_nb))
- return 1;
- return 0;
-}
-
-
-static void timer_stop(void)
-{
- unregister_die_notifier(&profile_timer_exceptions_nb);
- synchronize_sched(); /* Allow already-started NMIs to complete. */
-}
-
-
-int __init op_nmi_timer_init(struct oprofile_operations * ops)
-{
- if ((nmi_watchdog != NMI_IO_APIC) || (atomic_read(&nmi_active) <= 0))
- return -ENODEV;
-
- ops->start = timer_start;
- ops->stop = timer_stop;
- ops->cpu_type = "timer";
- printk(KERN_INFO "oprofile: using NMI timer interrupt.\n");
- return 0;
-}
+++ /dev/null
-/**
- * @file op_counter.h
- *
- * @remark Copyright 2002 OProfile authors
- * @remark Read the file COPYING
- *
- * @author John Levon
- */
-
-#ifndef OP_COUNTER_H
-#define OP_COUNTER_H
-
-#define OP_MAX_COUNTER 8
-
-/* Per-perfctr configuration as set via
- * oprofilefs.
- */
-struct op_counter_config {
- unsigned long count;
- unsigned long enabled;
- unsigned long event;
- unsigned long kernel;
- unsigned long user;
- unsigned long unit_mask;
-};
-
-extern struct op_counter_config counter_config[];
-
-#endif /* OP_COUNTER_H */
+++ /dev/null
-/**
- * @file op_model_athlon.h
- * athlon / K7 model-specific MSR operations
- *
- * @remark Copyright 2002 OProfile authors
- * @remark Read the file COPYING
- *
- * @author John Levon
- * @author Philippe Elie
- * @author Graydon Hoare
- */
-
-#include <linux/oprofile.h>
-#include <asm/ptrace.h>
-#include <asm/msr.h>
-#include <asm/nmi.h>
-
-#include "op_x86_model.h"
-#include "op_counter.h"
-
-#define NUM_COUNTERS 4
-#define NUM_CONTROLS 4
-
-#define CTR_IS_RESERVED(msrs,c) (msrs->counters[(c)].addr ? 1 : 0)
-#define CTR_READ(l,h,msrs,c) do {rdmsr(msrs->counters[(c)].addr, (l), (h));} while (0)
-#define CTR_WRITE(l,msrs,c) do {wrmsr(msrs->counters[(c)].addr, -(unsigned int)(l), -1);} while (0)
-#define CTR_OVERFLOWED(n) (!((n) & (1U<<31)))
-
-#define CTRL_IS_RESERVED(msrs,c) (msrs->controls[(c)].addr ? 1 : 0)
-#define CTRL_READ(l,h,msrs,c) do {rdmsr(msrs->controls[(c)].addr, (l), (h));} while (0)
-#define CTRL_WRITE(l,h,msrs,c) do {wrmsr(msrs->controls[(c)].addr, (l), (h));} while (0)
-#define CTRL_SET_ACTIVE(n) (n |= (1<<22))
-#define CTRL_SET_INACTIVE(n) (n &= ~(1<<22))
-#define CTRL_CLEAR(x) (x &= (1<<21))
-#define CTRL_SET_ENABLE(val) (val |= 1<<20)
-#define CTRL_SET_USR(val,u) (val |= ((u & 1) << 16))
-#define CTRL_SET_KERN(val,k) (val |= ((k & 1) << 17))
-#define CTRL_SET_UM(val, m) (val |= (m << 8))
-#define CTRL_SET_EVENT(val, e) (val |= e)
-
-static unsigned long reset_value[NUM_COUNTERS];
-
-static void athlon_fill_in_addresses(struct op_msrs * const msrs)
-{
- int i;
-
- for (i=0; i < NUM_COUNTERS; i++) {
- if (reserve_perfctr_nmi(MSR_K7_PERFCTR0 + i))
- msrs->counters[i].addr = MSR_K7_PERFCTR0 + i;
- else
- msrs->counters[i].addr = 0;
- }
-
- for (i=0; i < NUM_CONTROLS; i++) {
- if (reserve_evntsel_nmi(MSR_K7_EVNTSEL0 + i))
- msrs->controls[i].addr = MSR_K7_EVNTSEL0 + i;
- else
- msrs->controls[i].addr = 0;
- }
-}
-
-
-static void athlon_setup_ctrs(struct op_msrs const * const msrs)
-{
- unsigned int low, high;
- int i;
-
- /* clear all counters */
- for (i = 0 ; i < NUM_CONTROLS; ++i) {
- if (unlikely(!CTRL_IS_RESERVED(msrs,i)))
- continue;
- CTRL_READ(low, high, msrs, i);
- CTRL_CLEAR(low);
- CTRL_WRITE(low, high, msrs, i);
- }
-
- /* avoid a false detection of ctr overflows in NMI handler */
- for (i = 0; i < NUM_COUNTERS; ++i) {
- if (unlikely(!CTR_IS_RESERVED(msrs,i)))
- continue;
- CTR_WRITE(1, msrs, i);
- }
-
- /* enable active counters */
- for (i = 0; i < NUM_COUNTERS; ++i) {
- if ((counter_config[i].enabled) && (CTR_IS_RESERVED(msrs,i))) {
- reset_value[i] = counter_config[i].count;
-
- CTR_WRITE(counter_config[i].count, msrs, i);
-
- CTRL_READ(low, high, msrs, i);
- CTRL_CLEAR(low);
- CTRL_SET_ENABLE(low);
- CTRL_SET_USR(low, counter_config[i].user);
- CTRL_SET_KERN(low, counter_config[i].kernel);
- CTRL_SET_UM(low, counter_config[i].unit_mask);
- CTRL_SET_EVENT(low, counter_config[i].event);
- CTRL_WRITE(low, high, msrs, i);
- } else {
- reset_value[i] = 0;
- }
- }
-}
-
-
-static int athlon_check_ctrs(struct pt_regs * const regs,
- struct op_msrs const * const msrs)
-{
- unsigned int low, high;
- int i;
-
- for (i = 0 ; i < NUM_COUNTERS; ++i) {
- if (!reset_value[i])
- continue;
- CTR_READ(low, high, msrs, i);
- if (CTR_OVERFLOWED(low)) {
- oprofile_add_sample(regs, i);
- CTR_WRITE(reset_value[i], msrs, i);
- }
- }
-
- /* See op_model_ppro.c */
- return 1;
-}
-
-
-static void athlon_start(struct op_msrs const * const msrs)
-{
- unsigned int low, high;
- int i;
- for (i = 0 ; i < NUM_COUNTERS ; ++i) {
- if (reset_value[i]) {
- CTRL_READ(low, high, msrs, i);
- CTRL_SET_ACTIVE(low);
- CTRL_WRITE(low, high, msrs, i);
- }
- }
-}
-
-
-static void athlon_stop(struct op_msrs const * const msrs)
-{
- unsigned int low,high;
- int i;
-
- /* Subtle: stop on all counters to avoid race with
- * setting our pm callback */
- for (i = 0 ; i < NUM_COUNTERS ; ++i) {
- if (!reset_value[i])
- continue;
- CTRL_READ(low, high, msrs, i);
- CTRL_SET_INACTIVE(low);
- CTRL_WRITE(low, high, msrs, i);
- }
-}
-
-static void athlon_shutdown(struct op_msrs const * const msrs)
-{
- int i;
-
- for (i = 0 ; i < NUM_COUNTERS ; ++i) {
- if (CTR_IS_RESERVED(msrs,i))
- release_perfctr_nmi(MSR_K7_PERFCTR0 + i);
- }
- for (i = 0 ; i < NUM_CONTROLS ; ++i) {
- if (CTRL_IS_RESERVED(msrs,i))
- release_evntsel_nmi(MSR_K7_EVNTSEL0 + i);
- }
-}
-
-struct op_x86_model_spec const op_athlon_spec = {
- .num_counters = NUM_COUNTERS,
- .num_controls = NUM_CONTROLS,
- .fill_in_addresses = &athlon_fill_in_addresses,
- .setup_ctrs = &athlon_setup_ctrs,
- .check_ctrs = &athlon_check_ctrs,
- .start = &athlon_start,
- .stop = &athlon_stop,
- .shutdown = &athlon_shutdown
-};
+++ /dev/null
-/**
- * @file op_model_p4.c
- * P4 model-specific MSR operations
- *
- * @remark Copyright 2002 OProfile authors
- * @remark Read the file COPYING
- *
- * @author Graydon Hoare
- */
-
-#include <linux/oprofile.h>
-#include <linux/smp.h>
-#include <asm/msr.h>
-#include <asm/ptrace.h>
-#include <asm/fixmap.h>
-#include <asm/apic.h>
-#include <asm/nmi.h>
-
-#include "op_x86_model.h"
-#include "op_counter.h"
-
-#define NUM_EVENTS 39
-
-#define NUM_COUNTERS_NON_HT 8
-#define NUM_ESCRS_NON_HT 45
-#define NUM_CCCRS_NON_HT 18
-#define NUM_CONTROLS_NON_HT (NUM_ESCRS_NON_HT + NUM_CCCRS_NON_HT)
-
-#define NUM_COUNTERS_HT2 4
-#define NUM_ESCRS_HT2 23
-#define NUM_CCCRS_HT2 9
-#define NUM_CONTROLS_HT2 (NUM_ESCRS_HT2 + NUM_CCCRS_HT2)
-
-static unsigned int num_counters = NUM_COUNTERS_NON_HT;
-static unsigned int num_controls = NUM_CONTROLS_NON_HT;
-
-/* this has to be checked dynamically since the
- hyper-threadedness of a chip is discovered at
- kernel boot-time. */
-static inline void setup_num_counters(void)
-{
-#ifdef CONFIG_SMP
- if (smp_num_siblings == 2){
- num_counters = NUM_COUNTERS_HT2;
- num_controls = NUM_CONTROLS_HT2;
- }
-#endif
-}
-
-static int inline addr_increment(void)
-{
-#ifdef CONFIG_SMP
- return smp_num_siblings == 2 ? 2 : 1;
-#else
- return 1;
-#endif
-}
-
-
-/* tables to simulate simplified hardware view of p4 registers */
-struct p4_counter_binding {
- int virt_counter;
- int counter_address;
- int cccr_address;
-};
-
-struct p4_event_binding {
- int escr_select; /* value to put in CCCR */
- int event_select; /* value to put in ESCR */
- struct {
- int virt_counter; /* for this counter... */
- int escr_address; /* use this ESCR */
- } bindings[2];
-};
-
-/* nb: these CTR_* defines are a duplicate of defines in
- event/i386.p4*events. */
-
-
-#define CTR_BPU_0 (1 << 0)
-#define CTR_MS_0 (1 << 1)
-#define CTR_FLAME_0 (1 << 2)
-#define CTR_IQ_4 (1 << 3)
-#define CTR_BPU_2 (1 << 4)
-#define CTR_MS_2 (1 << 5)
-#define CTR_FLAME_2 (1 << 6)
-#define CTR_IQ_5 (1 << 7)
-
-static struct p4_counter_binding p4_counters [NUM_COUNTERS_NON_HT] = {
- { CTR_BPU_0, MSR_P4_BPU_PERFCTR0, MSR_P4_BPU_CCCR0 },
- { CTR_MS_0, MSR_P4_MS_PERFCTR0, MSR_P4_MS_CCCR0 },
- { CTR_FLAME_0, MSR_P4_FLAME_PERFCTR0, MSR_P4_FLAME_CCCR0 },
- { CTR_IQ_4, MSR_P4_IQ_PERFCTR4, MSR_P4_IQ_CCCR4 },
- { CTR_BPU_2, MSR_P4_BPU_PERFCTR2, MSR_P4_BPU_CCCR2 },
- { CTR_MS_2, MSR_P4_MS_PERFCTR2, MSR_P4_MS_CCCR2 },
- { CTR_FLAME_2, MSR_P4_FLAME_PERFCTR2, MSR_P4_FLAME_CCCR2 },
- { CTR_IQ_5, MSR_P4_IQ_PERFCTR5, MSR_P4_IQ_CCCR5 }
-};
-
-#define NUM_UNUSED_CCCRS NUM_CCCRS_NON_HT - NUM_COUNTERS_NON_HT
-
-/* p4 event codes in libop/op_event.h are indices into this table. */
-
-static struct p4_event_binding p4_events[NUM_EVENTS] = {
-
- { /* BRANCH_RETIRED */
- 0x05, 0x06,
- { {CTR_IQ_4, MSR_P4_CRU_ESCR2},
- {CTR_IQ_5, MSR_P4_CRU_ESCR3} }
- },
-
- { /* MISPRED_BRANCH_RETIRED */
- 0x04, 0x03,
- { { CTR_IQ_4, MSR_P4_CRU_ESCR0},
- { CTR_IQ_5, MSR_P4_CRU_ESCR1} }
- },
-
- { /* TC_DELIVER_MODE */
- 0x01, 0x01,
- { { CTR_MS_0, MSR_P4_TC_ESCR0},
- { CTR_MS_2, MSR_P4_TC_ESCR1} }
- },
-
- { /* BPU_FETCH_REQUEST */
- 0x00, 0x03,
- { { CTR_BPU_0, MSR_P4_BPU_ESCR0},
- { CTR_BPU_2, MSR_P4_BPU_ESCR1} }
- },
-
- { /* ITLB_REFERENCE */
- 0x03, 0x18,
- { { CTR_BPU_0, MSR_P4_ITLB_ESCR0},
- { CTR_BPU_2, MSR_P4_ITLB_ESCR1} }
- },
-
- { /* MEMORY_CANCEL */
- 0x05, 0x02,
- { { CTR_FLAME_0, MSR_P4_DAC_ESCR0},
- { CTR_FLAME_2, MSR_P4_DAC_ESCR1} }
- },
-
- { /* MEMORY_COMPLETE */
- 0x02, 0x08,
- { { CTR_FLAME_0, MSR_P4_SAAT_ESCR0},
- { CTR_FLAME_2, MSR_P4_SAAT_ESCR1} }
- },
-
- { /* LOAD_PORT_REPLAY */
- 0x02, 0x04,
- { { CTR_FLAME_0, MSR_P4_SAAT_ESCR0},
- { CTR_FLAME_2, MSR_P4_SAAT_ESCR1} }
- },
-
- { /* STORE_PORT_REPLAY */
- 0x02, 0x05,
- { { CTR_FLAME_0, MSR_P4_SAAT_ESCR0},
- { CTR_FLAME_2, MSR_P4_SAAT_ESCR1} }
- },
-
- { /* MOB_LOAD_REPLAY */
- 0x02, 0x03,
- { { CTR_BPU_0, MSR_P4_MOB_ESCR0},
- { CTR_BPU_2, MSR_P4_MOB_ESCR1} }
- },
-
- { /* PAGE_WALK_TYPE */
- 0x04, 0x01,
- { { CTR_BPU_0, MSR_P4_PMH_ESCR0},
- { CTR_BPU_2, MSR_P4_PMH_ESCR1} }
- },
-
- { /* BSQ_CACHE_REFERENCE */
- 0x07, 0x0c,
- { { CTR_BPU_0, MSR_P4_BSU_ESCR0},
- { CTR_BPU_2, MSR_P4_BSU_ESCR1} }
- },
-
- { /* IOQ_ALLOCATION */
- 0x06, 0x03,
- { { CTR_BPU_0, MSR_P4_FSB_ESCR0},
- { 0, 0 } }
- },
-
- { /* IOQ_ACTIVE_ENTRIES */
- 0x06, 0x1a,
- { { CTR_BPU_2, MSR_P4_FSB_ESCR1},
- { 0, 0 } }
- },
-
- { /* FSB_DATA_ACTIVITY */
- 0x06, 0x17,
- { { CTR_BPU_0, MSR_P4_FSB_ESCR0},
- { CTR_BPU_2, MSR_P4_FSB_ESCR1} }
- },
-
- { /* BSQ_ALLOCATION */
- 0x07, 0x05,
- { { CTR_BPU_0, MSR_P4_BSU_ESCR0},
- { 0, 0 } }
- },
-
- { /* BSQ_ACTIVE_ENTRIES */
- 0x07, 0x06,
- { { CTR_BPU_2, MSR_P4_BSU_ESCR1 /* guess */},
- { 0, 0 } }
- },
-
- { /* X87_ASSIST */
- 0x05, 0x03,
- { { CTR_IQ_4, MSR_P4_CRU_ESCR2},
- { CTR_IQ_5, MSR_P4_CRU_ESCR3} }
- },
-
- { /* SSE_INPUT_ASSIST */
- 0x01, 0x34,
- { { CTR_FLAME_0, MSR_P4_FIRM_ESCR0},
- { CTR_FLAME_2, MSR_P4_FIRM_ESCR1} }
- },
-
- { /* PACKED_SP_UOP */
- 0x01, 0x08,
- { { CTR_FLAME_0, MSR_P4_FIRM_ESCR0},
- { CTR_FLAME_2, MSR_P4_FIRM_ESCR1} }
- },
-
- { /* PACKED_DP_UOP */
- 0x01, 0x0c,
- { { CTR_FLAME_0, MSR_P4_FIRM_ESCR0},
- { CTR_FLAME_2, MSR_P4_FIRM_ESCR1} }
- },
-
- { /* SCALAR_SP_UOP */
- 0x01, 0x0a,
- { { CTR_FLAME_0, MSR_P4_FIRM_ESCR0},
- { CTR_FLAME_2, MSR_P4_FIRM_ESCR1} }
- },
-
- { /* SCALAR_DP_UOP */
- 0x01, 0x0e,
- { { CTR_FLAME_0, MSR_P4_FIRM_ESCR0},
- { CTR_FLAME_2, MSR_P4_FIRM_ESCR1} }
- },
-
- { /* 64BIT_MMX_UOP */
- 0x01, 0x02,
- { { CTR_FLAME_0, MSR_P4_FIRM_ESCR0},
- { CTR_FLAME_2, MSR_P4_FIRM_ESCR1} }
- },
-
- { /* 128BIT_MMX_UOP */
- 0x01, 0x1a,
- { { CTR_FLAME_0, MSR_P4_FIRM_ESCR0},
- { CTR_FLAME_2, MSR_P4_FIRM_ESCR1} }
- },
-
- { /* X87_FP_UOP */
- 0x01, 0x04,
- { { CTR_FLAME_0, MSR_P4_FIRM_ESCR0},
- { CTR_FLAME_2, MSR_P4_FIRM_ESCR1} }
- },
-
- { /* X87_SIMD_MOVES_UOP */
- 0x01, 0x2e,
- { { CTR_FLAME_0, MSR_P4_FIRM_ESCR0},
- { CTR_FLAME_2, MSR_P4_FIRM_ESCR1} }
- },
-
- { /* MACHINE_CLEAR */
- 0x05, 0x02,
- { { CTR_IQ_4, MSR_P4_CRU_ESCR2},
- { CTR_IQ_5, MSR_P4_CRU_ESCR3} }
- },
-
- { /* GLOBAL_POWER_EVENTS */
- 0x06, 0x13 /* older manual says 0x05, newer 0x13 */,
- { { CTR_BPU_0, MSR_P4_FSB_ESCR0},
- { CTR_BPU_2, MSR_P4_FSB_ESCR1} }
- },
-
- { /* TC_MS_XFER */
- 0x00, 0x05,
- { { CTR_MS_0, MSR_P4_MS_ESCR0},
- { CTR_MS_2, MSR_P4_MS_ESCR1} }
- },
-
- { /* UOP_QUEUE_WRITES */
- 0x00, 0x09,
- { { CTR_MS_0, MSR_P4_MS_ESCR0},
- { CTR_MS_2, MSR_P4_MS_ESCR1} }
- },
-
- { /* FRONT_END_EVENT */
- 0x05, 0x08,
- { { CTR_IQ_4, MSR_P4_CRU_ESCR2},
- { CTR_IQ_5, MSR_P4_CRU_ESCR3} }
- },
-
- { /* EXECUTION_EVENT */
- 0x05, 0x0c,
- { { CTR_IQ_4, MSR_P4_CRU_ESCR2},
- { CTR_IQ_5, MSR_P4_CRU_ESCR3} }
- },
-
- { /* REPLAY_EVENT */
- 0x05, 0x09,
- { { CTR_IQ_4, MSR_P4_CRU_ESCR2},
- { CTR_IQ_5, MSR_P4_CRU_ESCR3} }
- },
-
- { /* INSTR_RETIRED */
- 0x04, 0x02,
- { { CTR_IQ_4, MSR_P4_CRU_ESCR0},
- { CTR_IQ_5, MSR_P4_CRU_ESCR1} }
- },
-
- { /* UOPS_RETIRED */
- 0x04, 0x01,
- { { CTR_IQ_4, MSR_P4_CRU_ESCR0},
- { CTR_IQ_5, MSR_P4_CRU_ESCR1} }
- },
-
- { /* UOP_TYPE */
- 0x02, 0x02,
- { { CTR_IQ_4, MSR_P4_RAT_ESCR0},
- { CTR_IQ_5, MSR_P4_RAT_ESCR1} }
- },
-
- { /* RETIRED_MISPRED_BRANCH_TYPE */
- 0x02, 0x05,
- { { CTR_MS_0, MSR_P4_TBPU_ESCR0},
- { CTR_MS_2, MSR_P4_TBPU_ESCR1} }
- },
-
- { /* RETIRED_BRANCH_TYPE */
- 0x02, 0x04,
- { { CTR_MS_0, MSR_P4_TBPU_ESCR0},
- { CTR_MS_2, MSR_P4_TBPU_ESCR1} }
- }
-};
-
-
-#define MISC_PMC_ENABLED_P(x) ((x) & 1 << 7)
-
-#define ESCR_RESERVED_BITS 0x80000003
-#define ESCR_CLEAR(escr) ((escr) &= ESCR_RESERVED_BITS)
-#define ESCR_SET_USR_0(escr, usr) ((escr) |= (((usr) & 1) << 2))
-#define ESCR_SET_OS_0(escr, os) ((escr) |= (((os) & 1) << 3))
-#define ESCR_SET_USR_1(escr, usr) ((escr) |= (((usr) & 1)))
-#define ESCR_SET_OS_1(escr, os) ((escr) |= (((os) & 1) << 1))
-#define ESCR_SET_EVENT_SELECT(escr, sel) ((escr) |= (((sel) & 0x3f) << 25))
-#define ESCR_SET_EVENT_MASK(escr, mask) ((escr) |= (((mask) & 0xffff) << 9))
-#define ESCR_READ(escr,high,ev,i) do {rdmsr(ev->bindings[(i)].escr_address, (escr), (high));} while (0)
-#define ESCR_WRITE(escr,high,ev,i) do {wrmsr(ev->bindings[(i)].escr_address, (escr), (high));} while (0)
-
-#define CCCR_RESERVED_BITS 0x38030FFF
-#define CCCR_CLEAR(cccr) ((cccr) &= CCCR_RESERVED_BITS)
-#define CCCR_SET_REQUIRED_BITS(cccr) ((cccr) |= 0x00030000)
-#define CCCR_SET_ESCR_SELECT(cccr, sel) ((cccr) |= (((sel) & 0x07) << 13))
-#define CCCR_SET_PMI_OVF_0(cccr) ((cccr) |= (1<<26))
-#define CCCR_SET_PMI_OVF_1(cccr) ((cccr) |= (1<<27))
-#define CCCR_SET_ENABLE(cccr) ((cccr) |= (1<<12))
-#define CCCR_SET_DISABLE(cccr) ((cccr) &= ~(1<<12))
-#define CCCR_READ(low, high, i) do {rdmsr(p4_counters[(i)].cccr_address, (low), (high));} while (0)
-#define CCCR_WRITE(low, high, i) do {wrmsr(p4_counters[(i)].cccr_address, (low), (high));} while (0)
-#define CCCR_OVF_P(cccr) ((cccr) & (1U<<31))
-#define CCCR_CLEAR_OVF(cccr) ((cccr) &= (~(1U<<31)))
-
-#define CTRL_IS_RESERVED(msrs,c) (msrs->controls[(c)].addr ? 1 : 0)
-#define CTR_IS_RESERVED(msrs,c) (msrs->counters[(c)].addr ? 1 : 0)
-#define CTR_READ(l,h,i) do {rdmsr(p4_counters[(i)].counter_address, (l), (h));} while (0)
-#define CTR_WRITE(l,i) do {wrmsr(p4_counters[(i)].counter_address, -(u32)(l), -1);} while (0)
-#define CTR_OVERFLOW_P(ctr) (!((ctr) & 0x80000000))
-
-
-/* this assigns a "stagger" to the current CPU, which is used throughout
- the code in this module as an extra array offset, to select the "even"
- or "odd" part of all the divided resources. */
-static unsigned int get_stagger(void)
-{
-#ifdef CONFIG_SMP
- int cpu = smp_processor_id();
- return (cpu != first_cpu(cpu_sibling_map[cpu]));
-#endif
- return 0;
-}
-
-
-/* finally, mediate access to a real hardware counter
- by passing a "virtual" counter numer to this macro,
- along with your stagger setting. */
-#define VIRT_CTR(stagger, i) ((i) + ((num_counters) * (stagger)))
-
-static unsigned long reset_value[NUM_COUNTERS_NON_HT];
-
-
-static void p4_fill_in_addresses(struct op_msrs * const msrs)
-{
- unsigned int i;
- unsigned int addr, cccraddr, stag;
-
- setup_num_counters();
- stag = get_stagger();
-
- /* initialize some registers */
- for (i = 0; i < num_counters; ++i) {
- msrs->counters[i].addr = 0;
- }
- for (i = 0; i < num_controls; ++i) {
- msrs->controls[i].addr = 0;
- }
-
- /* the counter & cccr registers we pay attention to */
- for (i = 0; i < num_counters; ++i) {
- addr = p4_counters[VIRT_CTR(stag, i)].counter_address;
- cccraddr = p4_counters[VIRT_CTR(stag, i)].cccr_address;
- if (reserve_perfctr_nmi(addr)){
- msrs->counters[i].addr = addr;
- msrs->controls[i].addr = cccraddr;
- }
- }
-
- /* 43 ESCR registers in three or four discontiguous group */
- for (addr = MSR_P4_BSU_ESCR0 + stag;
- addr < MSR_P4_IQ_ESCR0; ++i, addr += addr_increment()) {
- if (reserve_evntsel_nmi(addr))
- msrs->controls[i].addr = addr;
- }
-
- /* no IQ_ESCR0/1 on some models, we save a seconde time BSU_ESCR0/1
- * to avoid special case in nmi_{save|restore}_registers() */
- if (boot_cpu_data.x86_model >= 0x3) {
- for (addr = MSR_P4_BSU_ESCR0 + stag;
- addr <= MSR_P4_BSU_ESCR1; ++i, addr += addr_increment()) {
- if (reserve_evntsel_nmi(addr))
- msrs->controls[i].addr = addr;
- }
- } else {
- for (addr = MSR_P4_IQ_ESCR0 + stag;
- addr <= MSR_P4_IQ_ESCR1; ++i, addr += addr_increment()) {
- if (reserve_evntsel_nmi(addr))
- msrs->controls[i].addr = addr;
- }
- }
-
- for (addr = MSR_P4_RAT_ESCR0 + stag;
- addr <= MSR_P4_SSU_ESCR0; ++i, addr += addr_increment()) {
- if (reserve_evntsel_nmi(addr))
- msrs->controls[i].addr = addr;
- }
-
- for (addr = MSR_P4_MS_ESCR0 + stag;
- addr <= MSR_P4_TC_ESCR1; ++i, addr += addr_increment()) {
- if (reserve_evntsel_nmi(addr))
- msrs->controls[i].addr = addr;
- }
-
- for (addr = MSR_P4_IX_ESCR0 + stag;
- addr <= MSR_P4_CRU_ESCR3; ++i, addr += addr_increment()) {
- if (reserve_evntsel_nmi(addr))
- msrs->controls[i].addr = addr;
- }
-
- /* there are 2 remaining non-contiguously located ESCRs */
-
- if (num_counters == NUM_COUNTERS_NON_HT) {
- /* standard non-HT CPUs handle both remaining ESCRs*/
- if (reserve_evntsel_nmi(MSR_P4_CRU_ESCR5))
- msrs->controls[i++].addr = MSR_P4_CRU_ESCR5;
- if (reserve_evntsel_nmi(MSR_P4_CRU_ESCR4))
- msrs->controls[i++].addr = MSR_P4_CRU_ESCR4;
-
- } else if (stag == 0) {
- /* HT CPUs give the first remainder to the even thread, as
- the 32nd control register */
- if (reserve_evntsel_nmi(MSR_P4_CRU_ESCR4))
- msrs->controls[i++].addr = MSR_P4_CRU_ESCR4;
-
- } else {
- /* and two copies of the second to the odd thread,
- for the 22st and 23nd control registers */
- if (reserve_evntsel_nmi(MSR_P4_CRU_ESCR5)) {
- msrs->controls[i++].addr = MSR_P4_CRU_ESCR5;
- msrs->controls[i++].addr = MSR_P4_CRU_ESCR5;
- }
- }
-}
-
-
-static void pmc_setup_one_p4_counter(unsigned int ctr)
-{
- int i;
- int const maxbind = 2;
- unsigned int cccr = 0;
- unsigned int escr = 0;
- unsigned int high = 0;
- unsigned int counter_bit;
- struct p4_event_binding *ev = NULL;
- unsigned int stag;
-
- stag = get_stagger();
-
- /* convert from counter *number* to counter *bit* */
- counter_bit = 1 << VIRT_CTR(stag, ctr);
-
- /* find our event binding structure. */
- if (counter_config[ctr].event <= 0 || counter_config[ctr].event > NUM_EVENTS) {
- printk(KERN_ERR
- "oprofile: P4 event code 0x%lx out of range\n",
- counter_config[ctr].event);
- return;
- }
-
- ev = &(p4_events[counter_config[ctr].event - 1]);
-
- for (i = 0; i < maxbind; i++) {
- if (ev->bindings[i].virt_counter & counter_bit) {
-
- /* modify ESCR */
- ESCR_READ(escr, high, ev, i);
- ESCR_CLEAR(escr);
- if (stag == 0) {
- ESCR_SET_USR_0(escr, counter_config[ctr].user);
- ESCR_SET_OS_0(escr, counter_config[ctr].kernel);
- } else {
- ESCR_SET_USR_1(escr, counter_config[ctr].user);
- ESCR_SET_OS_1(escr, counter_config[ctr].kernel);
- }
- ESCR_SET_EVENT_SELECT(escr, ev->event_select);
- ESCR_SET_EVENT_MASK(escr, counter_config[ctr].unit_mask);
- ESCR_WRITE(escr, high, ev, i);
-
- /* modify CCCR */
- CCCR_READ(cccr, high, VIRT_CTR(stag, ctr));
- CCCR_CLEAR(cccr);
- CCCR_SET_REQUIRED_BITS(cccr);
- CCCR_SET_ESCR_SELECT(cccr, ev->escr_select);
- if (stag == 0) {
- CCCR_SET_PMI_OVF_0(cccr);
- } else {
- CCCR_SET_PMI_OVF_1(cccr);
- }
- CCCR_WRITE(cccr, high, VIRT_CTR(stag, ctr));
- return;
- }
- }
-
- printk(KERN_ERR
- "oprofile: P4 event code 0x%lx no binding, stag %d ctr %d\n",
- counter_config[ctr].event, stag, ctr);
-}
-
-
-static void p4_setup_ctrs(struct op_msrs const * const msrs)
-{
- unsigned int i;
- unsigned int low, high;
- unsigned int stag;
-
- stag = get_stagger();
-
- rdmsr(MSR_IA32_MISC_ENABLE, low, high);
- if (! MISC_PMC_ENABLED_P(low)) {
- printk(KERN_ERR "oprofile: P4 PMC not available\n");
- return;
- }
-
- /* clear the cccrs we will use */
- for (i = 0 ; i < num_counters ; i++) {
- if (unlikely(!CTRL_IS_RESERVED(msrs,i)))
- continue;
- rdmsr(p4_counters[VIRT_CTR(stag, i)].cccr_address, low, high);
- CCCR_CLEAR(low);
- CCCR_SET_REQUIRED_BITS(low);
- wrmsr(p4_counters[VIRT_CTR(stag, i)].cccr_address, low, high);
- }
-
- /* clear all escrs (including those outside our concern) */
- for (i = num_counters; i < num_controls; i++) {
- if (unlikely(!CTRL_IS_RESERVED(msrs,i)))
- continue;
- wrmsr(msrs->controls[i].addr, 0, 0);
- }
-
- /* setup all counters */
- for (i = 0 ; i < num_counters ; ++i) {
- if ((counter_config[i].enabled) && (CTRL_IS_RESERVED(msrs,i))) {
- reset_value[i] = counter_config[i].count;
- pmc_setup_one_p4_counter(i);
- CTR_WRITE(counter_config[i].count, VIRT_CTR(stag, i));
- } else {
- reset_value[i] = 0;
- }
- }
-}
-
-
-static int p4_check_ctrs(struct pt_regs * const regs,
- struct op_msrs const * const msrs)
-{
- unsigned long ctr, low, high, stag, real;
- int i;
-
- stag = get_stagger();
-
- for (i = 0; i < num_counters; ++i) {
-
- if (!reset_value[i])
- continue;
-
- /*
- * there is some eccentricity in the hardware which
- * requires that we perform 2 extra corrections:
- *
- * - check both the CCCR:OVF flag for overflow and the
- * counter high bit for un-flagged overflows.
- *
- * - write the counter back twice to ensure it gets
- * updated properly.
- *
- * the former seems to be related to extra NMIs happening
- * during the current NMI; the latter is reported as errata
- * N15 in intel doc 249199-029, pentium 4 specification
- * update, though their suggested work-around does not
- * appear to solve the problem.
- */
-
- real = VIRT_CTR(stag, i);
-
- CCCR_READ(low, high, real);
- CTR_READ(ctr, high, real);
- if (CCCR_OVF_P(low) || CTR_OVERFLOW_P(ctr)) {
- oprofile_add_sample(regs, i);
- CTR_WRITE(reset_value[i], real);
- CCCR_CLEAR_OVF(low);
- CCCR_WRITE(low, high, real);
- CTR_WRITE(reset_value[i], real);
- }
- }
-
- /* P4 quirk: you have to re-unmask the apic vector */
- apic_write(APIC_LVTPC, apic_read(APIC_LVTPC) & ~APIC_LVT_MASKED);
-
- /* See op_model_ppro.c */
- return 1;
-}
-
-
-static void p4_start(struct op_msrs const * const msrs)
-{
- unsigned int low, high, stag;
- int i;
-
- stag = get_stagger();
-
- for (i = 0; i < num_counters; ++i) {
- if (!reset_value[i])
- continue;
- CCCR_READ(low, high, VIRT_CTR(stag, i));
- CCCR_SET_ENABLE(low);
- CCCR_WRITE(low, high, VIRT_CTR(stag, i));
- }
-}
-
-
-static void p4_stop(struct op_msrs const * const msrs)
-{
- unsigned int low, high, stag;
- int i;
-
- stag = get_stagger();
-
- for (i = 0; i < num_counters; ++i) {
- if (!reset_value[i])
- continue;
- CCCR_READ(low, high, VIRT_CTR(stag, i));
- CCCR_SET_DISABLE(low);
- CCCR_WRITE(low, high, VIRT_CTR(stag, i));
- }
-}
-
-static void p4_shutdown(struct op_msrs const * const msrs)
-{
- int i;
-
- for (i = 0 ; i < num_counters ; ++i) {
- if (CTR_IS_RESERVED(msrs,i))
- release_perfctr_nmi(msrs->counters[i].addr);
- }
- /* some of the control registers are specially reserved in
- * conjunction with the counter registers (hence the starting offset).
- * This saves a few bits.
- */
- for (i = num_counters ; i < num_controls ; ++i) {
- if (CTRL_IS_RESERVED(msrs,i))
- release_evntsel_nmi(msrs->controls[i].addr);
- }
-}
-
-
-#ifdef CONFIG_SMP
-struct op_x86_model_spec const op_p4_ht2_spec = {
- .num_counters = NUM_COUNTERS_HT2,
- .num_controls = NUM_CONTROLS_HT2,
- .fill_in_addresses = &p4_fill_in_addresses,
- .setup_ctrs = &p4_setup_ctrs,
- .check_ctrs = &p4_check_ctrs,
- .start = &p4_start,
- .stop = &p4_stop,
- .shutdown = &p4_shutdown
-};
-#endif
-
-struct op_x86_model_spec const op_p4_spec = {
- .num_counters = NUM_COUNTERS_NON_HT,
- .num_controls = NUM_CONTROLS_NON_HT,
- .fill_in_addresses = &p4_fill_in_addresses,
- .setup_ctrs = &p4_setup_ctrs,
- .check_ctrs = &p4_check_ctrs,
- .start = &p4_start,
- .stop = &p4_stop,
- .shutdown = &p4_shutdown
-};
+++ /dev/null
-/**
- * @file op_model_ppro.h
- * pentium pro / P6 model-specific MSR operations
- *
- * @remark Copyright 2002 OProfile authors
- * @remark Read the file COPYING
- *
- * @author John Levon
- * @author Philippe Elie
- * @author Graydon Hoare
- */
-
-#include <linux/oprofile.h>
-#include <asm/ptrace.h>
-#include <asm/msr.h>
-#include <asm/apic.h>
-#include <asm/nmi.h>
-
-#include "op_x86_model.h"
-#include "op_counter.h"
-
-#define NUM_COUNTERS 2
-#define NUM_CONTROLS 2
-
-#define CTR_IS_RESERVED(msrs,c) (msrs->counters[(c)].addr ? 1 : 0)
-#define CTR_READ(l,h,msrs,c) do {rdmsr(msrs->counters[(c)].addr, (l), (h));} while (0)
-#define CTR_32BIT_WRITE(l,msrs,c) \
- do {wrmsr(msrs->counters[(c)].addr, -(u32)(l), 0);} while (0)
-#define CTR_OVERFLOWED(n) (!((n) & (1U<<31)))
-
-#define CTRL_IS_RESERVED(msrs,c) (msrs->controls[(c)].addr ? 1 : 0)
-#define CTRL_READ(l,h,msrs,c) do {rdmsr((msrs->controls[(c)].addr), (l), (h));} while (0)
-#define CTRL_WRITE(l,h,msrs,c) do {wrmsr((msrs->controls[(c)].addr), (l), (h));} while (0)
-#define CTRL_SET_ACTIVE(n) (n |= (1<<22))
-#define CTRL_SET_INACTIVE(n) (n &= ~(1<<22))
-#define CTRL_CLEAR(x) (x &= (1<<21))
-#define CTRL_SET_ENABLE(val) (val |= 1<<20)
-#define CTRL_SET_USR(val,u) (val |= ((u & 1) << 16))
-#define CTRL_SET_KERN(val,k) (val |= ((k & 1) << 17))
-#define CTRL_SET_UM(val, m) (val |= (m << 8))
-#define CTRL_SET_EVENT(val, e) (val |= e)
-
-static unsigned long reset_value[NUM_COUNTERS];
-
-static void ppro_fill_in_addresses(struct op_msrs * const msrs)
-{
- int i;
-
- for (i=0; i < NUM_COUNTERS; i++) {
- if (reserve_perfctr_nmi(MSR_P6_PERFCTR0 + i))
- msrs->counters[i].addr = MSR_P6_PERFCTR0 + i;
- else
- msrs->counters[i].addr = 0;
- }
-
- for (i=0; i < NUM_CONTROLS; i++) {
- if (reserve_evntsel_nmi(MSR_P6_EVNTSEL0 + i))
- msrs->controls[i].addr = MSR_P6_EVNTSEL0 + i;
- else
- msrs->controls[i].addr = 0;
- }
-}
-
-
-static void ppro_setup_ctrs(struct op_msrs const * const msrs)
-{
- unsigned int low, high;
- int i;
-
- /* clear all counters */
- for (i = 0 ; i < NUM_CONTROLS; ++i) {
- if (unlikely(!CTRL_IS_RESERVED(msrs,i)))
- continue;
- CTRL_READ(low, high, msrs, i);
- CTRL_CLEAR(low);
- CTRL_WRITE(low, high, msrs, i);
- }
-
- /* avoid a false detection of ctr overflows in NMI handler */
- for (i = 0; i < NUM_COUNTERS; ++i) {
- if (unlikely(!CTR_IS_RESERVED(msrs,i)))
- continue;
- CTR_32BIT_WRITE(1, msrs, i);
- }
-
- /* enable active counters */
- for (i = 0; i < NUM_COUNTERS; ++i) {
- if ((counter_config[i].enabled) && (CTR_IS_RESERVED(msrs,i))) {
- reset_value[i] = counter_config[i].count;
-
- CTR_32BIT_WRITE(counter_config[i].count, msrs, i);
-
- CTRL_READ(low, high, msrs, i);
- CTRL_CLEAR(low);
- CTRL_SET_ENABLE(low);
- CTRL_SET_USR(low, counter_config[i].user);
- CTRL_SET_KERN(low, counter_config[i].kernel);
- CTRL_SET_UM(low, counter_config[i].unit_mask);
- CTRL_SET_EVENT(low, counter_config[i].event);
- CTRL_WRITE(low, high, msrs, i);
- } else {
- reset_value[i] = 0;
- }
- }
-}
-
-
-static int ppro_check_ctrs(struct pt_regs * const regs,
- struct op_msrs const * const msrs)
-{
- unsigned int low, high;
- int i;
-
- for (i = 0 ; i < NUM_COUNTERS; ++i) {
- if (!reset_value[i])
- continue;
- CTR_READ(low, high, msrs, i);
- if (CTR_OVERFLOWED(low)) {
- oprofile_add_sample(regs, i);
- CTR_32BIT_WRITE(reset_value[i], msrs, i);
- }
- }
-
- /* Only P6 based Pentium M need to re-unmask the apic vector but it
- * doesn't hurt other P6 variant */
- apic_write(APIC_LVTPC, apic_read(APIC_LVTPC) & ~APIC_LVT_MASKED);
-
- /* We can't work out if we really handled an interrupt. We
- * might have caught a *second* counter just after overflowing
- * the interrupt for this counter then arrives
- * and we don't find a counter that's overflowed, so we
- * would return 0 and get dazed + confused. Instead we always
- * assume we found an overflow. This sucks.
- */
- return 1;
-}
-
-
-static void ppro_start(struct op_msrs const * const msrs)
-{
- unsigned int low,high;
- int i;
-
- for (i = 0; i < NUM_COUNTERS; ++i) {
- if (reset_value[i]) {
- CTRL_READ(low, high, msrs, i);
- CTRL_SET_ACTIVE(low);
- CTRL_WRITE(low, high, msrs, i);
- }
- }
-}
-
-
-static void ppro_stop(struct op_msrs const * const msrs)
-{
- unsigned int low,high;
- int i;
-
- for (i = 0; i < NUM_COUNTERS; ++i) {
- if (!reset_value[i])
- continue;
- CTRL_READ(low, high, msrs, i);
- CTRL_SET_INACTIVE(low);
- CTRL_WRITE(low, high, msrs, i);
- }
-}
-
-static void ppro_shutdown(struct op_msrs const * const msrs)
-{
- int i;
-
- for (i = 0 ; i < NUM_COUNTERS ; ++i) {
- if (CTR_IS_RESERVED(msrs,i))
- release_perfctr_nmi(MSR_P6_PERFCTR0 + i);
- }
- for (i = 0 ; i < NUM_CONTROLS ; ++i) {
- if (CTRL_IS_RESERVED(msrs,i))
- release_evntsel_nmi(MSR_P6_EVNTSEL0 + i);
- }
-}
-
-
-struct op_x86_model_spec const op_ppro_spec = {
- .num_counters = NUM_COUNTERS,
- .num_controls = NUM_CONTROLS,
- .fill_in_addresses = &ppro_fill_in_addresses,
- .setup_ctrs = &ppro_setup_ctrs,
- .check_ctrs = &ppro_check_ctrs,
- .start = &ppro_start,
- .stop = &ppro_stop,
- .shutdown = &ppro_shutdown
-};
+++ /dev/null
-/**
- * @file op_x86_model.h
- * interface to x86 model-specific MSR operations
- *
- * @remark Copyright 2002 OProfile authors
- * @remark Read the file COPYING
- *
- * @author Graydon Hoare
- */
-
-#ifndef OP_X86_MODEL_H
-#define OP_X86_MODEL_H
-
-struct op_saved_msr {
- unsigned int high;
- unsigned int low;
-};
-
-struct op_msr {
- unsigned long addr;
- struct op_saved_msr saved;
-};
-
-struct op_msrs {
- struct op_msr * counters;
- struct op_msr * controls;
-};
-
-struct pt_regs;
-
-/* The model vtable abstracts the differences between
- * various x86 CPU model's perfctr support.
- */
-struct op_x86_model_spec {
- unsigned int const num_counters;
- unsigned int const num_controls;
- void (*fill_in_addresses)(struct op_msrs * const msrs);
- void (*setup_ctrs)(struct op_msrs const * const msrs);
- int (*check_ctrs)(struct pt_regs * const regs,
- struct op_msrs const * const msrs);
- void (*start)(struct op_msrs const * const msrs);
- void (*stop)(struct op_msrs const * const msrs);
- void (*shutdown)(struct op_msrs const * const msrs);
-};
-
-extern struct op_x86_model_spec const op_ppro_spec;
-extern struct op_x86_model_spec const op_p4_spec;
-extern struct op_x86_model_spec const op_p4_ht2_spec;
-extern struct op_x86_model_spec const op_athlon_spec;
-
-#endif /* OP_X86_MODEL_H */
--- /dev/null
+config PROFILING
+ bool "Profiling support (EXPERIMENTAL)"
+ help
+ Say Y here to enable the extended profiling support mechanisms used
+ by profilers such as OProfile.
+
+
+config OPROFILE
+ tristate "OProfile system profiling (EXPERIMENTAL)"
+ depends on PROFILING
+ help
+ OProfile is a profiling system capable of profiling the
+ whole system, include the kernel, kernel modules, libraries,
+ and applications.
+
+ If unsure, say N.
+
--- /dev/null
+obj-$(CONFIG_OPROFILE) += oprofile.o
+
+DRIVER_OBJS = $(addprefix ../../../drivers/oprofile/, \
+ oprof.o cpu_buffer.o buffer_sync.o \
+ event_buffer.o oprofile_files.o \
+ oprofilefs.o oprofile_stats.o \
+ timer_int.o )
+
+oprofile-y := $(DRIVER_OBJS) init.o backtrace.o
+oprofile-$(CONFIG_X86_LOCAL_APIC) += nmi_int.o op_model_athlon.o \
+ op_model_ppro.o op_model_p4.o
+oprofile-$(CONFIG_X86_IO_APIC) += nmi_timer_int.o
--- /dev/null
+/**
+ * @file backtrace.c
+ *
+ * @remark Copyright 2002 OProfile authors
+ * @remark Read the file COPYING
+ *
+ * @author John Levon
+ * @author David Smith
+ */
+
+#include <linux/oprofile.h>
+#include <linux/sched.h>
+#include <linux/mm.h>
+#include <asm/ptrace.h>
+#include <asm/uaccess.h>
+
+struct frame_head {
+ struct frame_head * ebp;
+ unsigned long ret;
+} __attribute__((packed));
+
+static struct frame_head *
+dump_kernel_backtrace(struct frame_head * head)
+{
+ oprofile_add_trace(head->ret);
+
+ /* frame pointers should strictly progress back up the stack
+ * (towards higher addresses) */
+ if (head >= head->ebp)
+ return NULL;
+
+ return head->ebp;
+}
+
+static struct frame_head *
+dump_user_backtrace(struct frame_head * head)
+{
+ struct frame_head bufhead[2];
+
+ /* Also check accessibility of one struct frame_head beyond */
+ if (!access_ok(VERIFY_READ, head, sizeof(bufhead)))
+ return NULL;
+ if (__copy_from_user_inatomic(bufhead, head, sizeof(bufhead)))
+ return NULL;
+
+ oprofile_add_trace(bufhead[0].ret);
+
+ /* frame pointers should strictly progress back up the stack
+ * (towards higher addresses) */
+ if (head >= bufhead[0].ebp)
+ return NULL;
+
+ return bufhead[0].ebp;
+}
+
+/*
+ * | | /\ Higher addresses
+ * | |
+ * --------------- stack base (address of current_thread_info)
+ * | thread info |
+ * . .
+ * | stack |
+ * --------------- saved regs->ebp value if valid (frame_head address)
+ * . .
+ * --------------- saved regs->rsp value if x86_64
+ * | |
+ * --------------- struct pt_regs * stored on stack if 32-bit
+ * | |
+ * . .
+ * | |
+ * --------------- %esp
+ * | |
+ * | | \/ Lower addresses
+ *
+ * Thus, regs (or regs->rsp for x86_64) <-> stack base restricts the
+ * valid(ish) ebp values. Note: (1) for x86_64, NMI and several other
+ * exceptions use special stacks, maintained by the interrupt stack table
+ * (IST). These stacks are set up in trap_init() in
+ * arch/x86_64/kernel/traps.c. Thus, for x86_64, regs now does not point
+ * to the kernel stack; instead, it points to some location on the NMI
+ * stack. On the other hand, regs->rsp is the stack pointer saved when the
+ * NMI occurred. (2) For 32-bit, regs->esp is not valid because the
+ * processor does not save %esp on the kernel stack when interrupts occur
+ * in the kernel mode.
+ */
+#ifdef CONFIG_FRAME_POINTER
+static int valid_kernel_stack(struct frame_head * head, struct pt_regs * regs)
+{
+ unsigned long headaddr = (unsigned long)head;
+#ifdef CONFIG_X86_64
+ unsigned long stack = (unsigned long)regs->rsp;
+#else
+ unsigned long stack = (unsigned long)regs;
+#endif
+ unsigned long stack_base = (stack & ~(THREAD_SIZE - 1)) + THREAD_SIZE;
+
+ return headaddr > stack && headaddr < stack_base;
+}
+#else
+/* without fp, it's just junk */
+static int valid_kernel_stack(struct frame_head * head, struct pt_regs * regs)
+{
+ return 0;
+}
+#endif
+
+
+void
+x86_backtrace(struct pt_regs * const regs, unsigned int depth)
+{
+ struct frame_head *head;
+
+#ifdef CONFIG_X86_64
+ head = (struct frame_head *)regs->rbp;
+#else
+ head = (struct frame_head *)regs->ebp;
+#endif
+
+ if (!user_mode_vm(regs)) {
+ while (depth-- && valid_kernel_stack(head, regs))
+ head = dump_kernel_backtrace(head);
+ return;
+ }
+
+ while (depth-- && head)
+ head = dump_user_backtrace(head);
+}
--- /dev/null
+/**
+ * @file init.c
+ *
+ * @remark Copyright 2002 OProfile authors
+ * @remark Read the file COPYING
+ *
+ * @author John Levon <levon@movementarian.org>
+ */
+
+#include <linux/oprofile.h>
+#include <linux/init.h>
+#include <linux/errno.h>
+
+/* We support CPUs that have performance counters like the Pentium Pro
+ * with the NMI mode driver.
+ */
+
+extern int op_nmi_init(struct oprofile_operations * ops);
+extern int op_nmi_timer_init(struct oprofile_operations * ops);
+extern void op_nmi_exit(void);
+extern void x86_backtrace(struct pt_regs * const regs, unsigned int depth);
+
+
+int __init oprofile_arch_init(struct oprofile_operations * ops)
+{
+ int ret;
+
+ ret = -ENODEV;
+
+#ifdef CONFIG_X86_LOCAL_APIC
+ ret = op_nmi_init(ops);
+#endif
+#ifdef CONFIG_X86_IO_APIC
+ if (ret < 0)
+ ret = op_nmi_timer_init(ops);
+#endif
+ ops->backtrace = x86_backtrace;
+
+ return ret;
+}
+
+
+void oprofile_arch_exit(void)
+{
+#ifdef CONFIG_X86_LOCAL_APIC
+ op_nmi_exit();
+#endif
+}
--- /dev/null
+/**
+ * @file nmi_int.c
+ *
+ * @remark Copyright 2002 OProfile authors
+ * @remark Read the file COPYING
+ *
+ * @author John Levon <levon@movementarian.org>
+ */
+
+#include <linux/init.h>
+#include <linux/notifier.h>
+#include <linux/smp.h>
+#include <linux/oprofile.h>
+#include <linux/sysdev.h>
+#include <linux/slab.h>
+#include <linux/moduleparam.h>
+#include <linux/kdebug.h>
+#include <asm/nmi.h>
+#include <asm/msr.h>
+#include <asm/apic.h>
+
+#include "op_counter.h"
+#include "op_x86_model.h"
+
+static struct op_x86_model_spec const * model;
+static struct op_msrs cpu_msrs[NR_CPUS];
+static unsigned long saved_lvtpc[NR_CPUS];
+
+static int nmi_start(void);
+static void nmi_stop(void);
+
+/* 0 == registered but off, 1 == registered and on */
+static int nmi_enabled = 0;
+
+#ifdef CONFIG_PM
+
+static int nmi_suspend(struct sys_device *dev, pm_message_t state)
+{
+ if (nmi_enabled == 1)
+ nmi_stop();
+ return 0;
+}
+
+
+static int nmi_resume(struct sys_device *dev)
+{
+ if (nmi_enabled == 1)
+ nmi_start();
+ return 0;
+}
+
+
+static struct sysdev_class oprofile_sysclass = {
+ set_kset_name("oprofile"),
+ .resume = nmi_resume,
+ .suspend = nmi_suspend,
+};
+
+
+static struct sys_device device_oprofile = {
+ .id = 0,
+ .cls = &oprofile_sysclass,
+};
+
+
+static int __init init_sysfs(void)
+{
+ int error;
+ if (!(error = sysdev_class_register(&oprofile_sysclass)))
+ error = sysdev_register(&device_oprofile);
+ return error;
+}
+
+
+static void exit_sysfs(void)
+{
+ sysdev_unregister(&device_oprofile);
+ sysdev_class_unregister(&oprofile_sysclass);
+}
+
+#else
+#define init_sysfs() do { } while (0)
+#define exit_sysfs() do { } while (0)
+#endif /* CONFIG_PM */
+
+static int profile_exceptions_notify(struct notifier_block *self,
+ unsigned long val, void *data)
+{
+ struct die_args *args = (struct die_args *)data;
+ int ret = NOTIFY_DONE;
+ int cpu = smp_processor_id();
+
+ switch(val) {
+ case DIE_NMI:
+ if (model->check_ctrs(args->regs, &cpu_msrs[cpu]))
+ ret = NOTIFY_STOP;
+ break;
+ default:
+ break;
+ }
+ return ret;
+}
+
+static void nmi_cpu_save_registers(struct op_msrs * msrs)
+{
+ unsigned int const nr_ctrs = model->num_counters;
+ unsigned int const nr_ctrls = model->num_controls;
+ struct op_msr * counters = msrs->counters;
+ struct op_msr * controls = msrs->controls;
+ unsigned int i;
+
+ for (i = 0; i < nr_ctrs; ++i) {
+ if (counters[i].addr){
+ rdmsr(counters[i].addr,
+ counters[i].saved.low,
+ counters[i].saved.high);
+ }
+ }
+
+ for (i = 0; i < nr_ctrls; ++i) {
+ if (controls[i].addr){
+ rdmsr(controls[i].addr,
+ controls[i].saved.low,
+ controls[i].saved.high);
+ }
+ }
+}
+
+
+static void nmi_save_registers(void * dummy)
+{
+ int cpu = smp_processor_id();
+ struct op_msrs * msrs = &cpu_msrs[cpu];
+ nmi_cpu_save_registers(msrs);
+}
+
+
+static void free_msrs(void)
+{
+ int i;
+ for_each_possible_cpu(i) {
+ kfree(cpu_msrs[i].counters);
+ cpu_msrs[i].counters = NULL;
+ kfree(cpu_msrs[i].controls);
+ cpu_msrs[i].controls = NULL;
+ }
+}
+
+
+static int allocate_msrs(void)
+{
+ int success = 1;
+ size_t controls_size = sizeof(struct op_msr) * model->num_controls;
+ size_t counters_size = sizeof(struct op_msr) * model->num_counters;
+
+ int i;
+ for_each_possible_cpu(i) {
+ cpu_msrs[i].counters = kmalloc(counters_size, GFP_KERNEL);
+ if (!cpu_msrs[i].counters) {
+ success = 0;
+ break;
+ }
+ cpu_msrs[i].controls = kmalloc(controls_size, GFP_KERNEL);
+ if (!cpu_msrs[i].controls) {
+ success = 0;
+ break;
+ }
+ }
+
+ if (!success)
+ free_msrs();
+
+ return success;
+}
+
+
+static void nmi_cpu_setup(void * dummy)
+{
+ int cpu = smp_processor_id();
+ struct op_msrs * msrs = &cpu_msrs[cpu];
+ spin_lock(&oprofilefs_lock);
+ model->setup_ctrs(msrs);
+ spin_unlock(&oprofilefs_lock);
+ saved_lvtpc[cpu] = apic_read(APIC_LVTPC);
+ apic_write(APIC_LVTPC, APIC_DM_NMI);
+}
+
+static struct notifier_block profile_exceptions_nb = {
+ .notifier_call = profile_exceptions_notify,
+ .next = NULL,
+ .priority = 0
+};
+
+static int nmi_setup(void)
+{
+ int err=0;
+ int cpu;
+
+ if (!allocate_msrs())
+ return -ENOMEM;
+
+ if ((err = register_die_notifier(&profile_exceptions_nb))){
+ free_msrs();
+ return err;
+ }
+
+ /* We need to serialize save and setup for HT because the subset
+ * of msrs are distinct for save and setup operations
+ */
+
+ /* Assume saved/restored counters are the same on all CPUs */
+ model->fill_in_addresses(&cpu_msrs[0]);
+ for_each_possible_cpu (cpu) {
+ if (cpu != 0) {
+ memcpy(cpu_msrs[cpu].counters, cpu_msrs[0].counters,
+ sizeof(struct op_msr) * model->num_counters);
+
+ memcpy(cpu_msrs[cpu].controls, cpu_msrs[0].controls,
+ sizeof(struct op_msr) * model->num_controls);
+ }
+
+ }
+ on_each_cpu(nmi_save_registers, NULL, 0, 1);
+ on_each_cpu(nmi_cpu_setup, NULL, 0, 1);
+ nmi_enabled = 1;
+ return 0;
+}
+
+
+static void nmi_restore_registers(struct op_msrs * msrs)
+{
+ unsigned int const nr_ctrs = model->num_counters;
+ unsigned int const nr_ctrls = model->num_controls;
+ struct op_msr * counters = msrs->counters;
+ struct op_msr * controls = msrs->controls;
+ unsigned int i;
+
+ for (i = 0; i < nr_ctrls; ++i) {
+ if (controls[i].addr){
+ wrmsr(controls[i].addr,
+ controls[i].saved.low,
+ controls[i].saved.high);
+ }
+ }
+
+ for (i = 0; i < nr_ctrs; ++i) {
+ if (counters[i].addr){
+ wrmsr(counters[i].addr,
+ counters[i].saved.low,
+ counters[i].saved.high);
+ }
+ }
+}
+
+
+static void nmi_cpu_shutdown(void * dummy)
+{
+ unsigned int v;
+ int cpu = smp_processor_id();
+ struct op_msrs * msrs = &cpu_msrs[cpu];
+
+ /* restoring APIC_LVTPC can trigger an apic error because the delivery
+ * mode and vector nr combination can be illegal. That's by design: on
+ * power on apic lvt contain a zero vector nr which are legal only for
+ * NMI delivery mode. So inhibit apic err before restoring lvtpc
+ */
+ v = apic_read(APIC_LVTERR);
+ apic_write(APIC_LVTERR, v | APIC_LVT_MASKED);
+ apic_write(APIC_LVTPC, saved_lvtpc[cpu]);
+ apic_write(APIC_LVTERR, v);
+ nmi_restore_registers(msrs);
+ model->shutdown(msrs);
+}
+
+
+static void nmi_shutdown(void)
+{
+ nmi_enabled = 0;
+ on_each_cpu(nmi_cpu_shutdown, NULL, 0, 1);
+ unregister_die_notifier(&profile_exceptions_nb);
+ free_msrs();
+}
+
+
+static void nmi_cpu_start(void * dummy)
+{
+ struct op_msrs const * msrs = &cpu_msrs[smp_processor_id()];
+ model->start(msrs);
+}
+
+
+static int nmi_start(void)
+{
+ on_each_cpu(nmi_cpu_start, NULL, 0, 1);
+ return 0;
+}
+
+
+static void nmi_cpu_stop(void * dummy)
+{
+ struct op_msrs const * msrs = &cpu_msrs[smp_processor_id()];
+ model->stop(msrs);
+}
+
+
+static void nmi_stop(void)
+{
+ on_each_cpu(nmi_cpu_stop, NULL, 0, 1);
+}
+
+
+struct op_counter_config counter_config[OP_MAX_COUNTER];
+
+static int nmi_create_files(struct super_block * sb, struct dentry * root)
+{
+ unsigned int i;
+
+ for (i = 0; i < model->num_counters; ++i) {
+ struct dentry * dir;
+ char buf[4];
+
+ /* quick little hack to _not_ expose a counter if it is not
+ * available for use. This should protect userspace app.
+ * NOTE: assumes 1:1 mapping here (that counters are organized
+ * sequentially in their struct assignment).
+ */
+ if (unlikely(!avail_to_resrv_perfctr_nmi_bit(i)))
+ continue;
+
+ snprintf(buf, sizeof(buf), "%d", i);
+ dir = oprofilefs_mkdir(sb, root, buf);
+ oprofilefs_create_ulong(sb, dir, "enabled", &counter_config[i].enabled);
+ oprofilefs_create_ulong(sb, dir, "event", &counter_config[i].event);
+ oprofilefs_create_ulong(sb, dir, "count", &counter_config[i].count);
+ oprofilefs_create_ulong(sb, dir, "unit_mask", &counter_config[i].unit_mask);
+ oprofilefs_create_ulong(sb, dir, "kernel", &counter_config[i].kernel);
+ oprofilefs_create_ulong(sb, dir, "user", &counter_config[i].user);
+ }
+
+ return 0;
+}
+
+static int p4force;
+module_param(p4force, int, 0);
+
+static int __init p4_init(char ** cpu_type)
+{
+ __u8 cpu_model = boot_cpu_data.x86_model;
+
+ if (!p4force && (cpu_model > 6 || cpu_model == 5))
+ return 0;
+
+#ifndef CONFIG_SMP
+ *cpu_type = "i386/p4";
+ model = &op_p4_spec;
+ return 1;
+#else
+ switch (smp_num_siblings) {
+ case 1:
+ *cpu_type = "i386/p4";
+ model = &op_p4_spec;
+ return 1;
+
+ case 2:
+ *cpu_type = "i386/p4-ht";
+ model = &op_p4_ht2_spec;
+ return 1;
+ }
+#endif
+
+ printk(KERN_INFO "oprofile: P4 HyperThreading detected with > 2 threads\n");
+ printk(KERN_INFO "oprofile: Reverting to timer mode.\n");
+ return 0;
+}
+
+
+static int __init ppro_init(char ** cpu_type)
+{
+ __u8 cpu_model = boot_cpu_data.x86_model;
+
+ if (cpu_model == 14)
+ *cpu_type = "i386/core";
+ else if (cpu_model == 15)
+ *cpu_type = "i386/core_2";
+ else if (cpu_model > 0xd)
+ return 0;
+ else if (cpu_model == 9) {
+ *cpu_type = "i386/p6_mobile";
+ } else if (cpu_model > 5) {
+ *cpu_type = "i386/piii";
+ } else if (cpu_model > 2) {
+ *cpu_type = "i386/pii";
+ } else {
+ *cpu_type = "i386/ppro";
+ }
+
+ model = &op_ppro_spec;
+ return 1;
+}
+
+/* in order to get sysfs right */
+static int using_nmi;
+
+int __init op_nmi_init(struct oprofile_operations *ops)
+{
+ __u8 vendor = boot_cpu_data.x86_vendor;
+ __u8 family = boot_cpu_data.x86;
+ char *cpu_type;
+
+ if (!cpu_has_apic)
+ return -ENODEV;
+
+ switch (vendor) {
+ case X86_VENDOR_AMD:
+ /* Needs to be at least an Athlon (or hammer in 32bit mode) */
+
+ switch (family) {
+ default:
+ return -ENODEV;
+ case 6:
+ model = &op_athlon_spec;
+ cpu_type = "i386/athlon";
+ break;
+ case 0xf:
+ model = &op_athlon_spec;
+ /* Actually it could be i386/hammer too, but give
+ user space an consistent name. */
+ cpu_type = "x86-64/hammer";
+ break;
+ case 0x10:
+ model = &op_athlon_spec;
+ cpu_type = "x86-64/family10";
+ break;
+ }
+ break;
+
+ case X86_VENDOR_INTEL:
+ switch (family) {
+ /* Pentium IV */
+ case 0xf:
+ if (!p4_init(&cpu_type))
+ return -ENODEV;
+ break;
+
+ /* A P6-class processor */
+ case 6:
+ if (!ppro_init(&cpu_type))
+ return -ENODEV;
+ break;
+
+ default:
+ return -ENODEV;
+ }
+ break;
+
+ default:
+ return -ENODEV;
+ }
+
+ init_sysfs();
+ using_nmi = 1;
+ ops->create_files = nmi_create_files;
+ ops->setup = nmi_setup;
+ ops->shutdown = nmi_shutdown;
+ ops->start = nmi_start;
+ ops->stop = nmi_stop;
+ ops->cpu_type = cpu_type;
+ printk(KERN_INFO "oprofile: using NMI interrupt.\n");
+ return 0;
+}
+
+
+void op_nmi_exit(void)
+{
+ if (using_nmi)
+ exit_sysfs();
+}
--- /dev/null
+/**
+ * @file nmi_timer_int.c
+ *
+ * @remark Copyright 2003 OProfile authors
+ * @remark Read the file COPYING
+ *
+ * @author Zwane Mwaikambo <zwane@linuxpower.ca>
+ */
+
+#include <linux/init.h>
+#include <linux/smp.h>
+#include <linux/errno.h>
+#include <linux/oprofile.h>
+#include <linux/rcupdate.h>
+#include <linux/kdebug.h>
+
+#include <asm/nmi.h>
+#include <asm/apic.h>
+#include <asm/ptrace.h>
+
+static int profile_timer_exceptions_notify(struct notifier_block *self,
+ unsigned long val, void *data)
+{
+ struct die_args *args = (struct die_args *)data;
+ int ret = NOTIFY_DONE;
+
+ switch(val) {
+ case DIE_NMI:
+ oprofile_add_sample(args->regs, 0);
+ ret = NOTIFY_STOP;
+ break;
+ default:
+ break;
+ }
+ return ret;
+}
+
+static struct notifier_block profile_timer_exceptions_nb = {
+ .notifier_call = profile_timer_exceptions_notify,
+ .next = NULL,
+ .priority = 0
+};
+
+static int timer_start(void)
+{
+ if (register_die_notifier(&profile_timer_exceptions_nb))
+ return 1;
+ return 0;
+}
+
+
+static void timer_stop(void)
+{
+ unregister_die_notifier(&profile_timer_exceptions_nb);
+ synchronize_sched(); /* Allow already-started NMIs to complete. */
+}
+
+
+int __init op_nmi_timer_init(struct oprofile_operations * ops)
+{
+ if ((nmi_watchdog != NMI_IO_APIC) || (atomic_read(&nmi_active) <= 0))
+ return -ENODEV;
+
+ ops->start = timer_start;
+ ops->stop = timer_stop;
+ ops->cpu_type = "timer";
+ printk(KERN_INFO "oprofile: using NMI timer interrupt.\n");
+ return 0;
+}
--- /dev/null
+/**
+ * @file op_counter.h
+ *
+ * @remark Copyright 2002 OProfile authors
+ * @remark Read the file COPYING
+ *
+ * @author John Levon
+ */
+
+#ifndef OP_COUNTER_H
+#define OP_COUNTER_H
+
+#define OP_MAX_COUNTER 8
+
+/* Per-perfctr configuration as set via
+ * oprofilefs.
+ */
+struct op_counter_config {
+ unsigned long count;
+ unsigned long enabled;
+ unsigned long event;
+ unsigned long kernel;
+ unsigned long user;
+ unsigned long unit_mask;
+};
+
+extern struct op_counter_config counter_config[];
+
+#endif /* OP_COUNTER_H */
--- /dev/null
+/**
+ * @file op_model_athlon.h
+ * athlon / K7 model-specific MSR operations
+ *
+ * @remark Copyright 2002 OProfile authors
+ * @remark Read the file COPYING
+ *
+ * @author John Levon
+ * @author Philippe Elie
+ * @author Graydon Hoare
+ */
+
+#include <linux/oprofile.h>
+#include <asm/ptrace.h>
+#include <asm/msr.h>
+#include <asm/nmi.h>
+
+#include "op_x86_model.h"
+#include "op_counter.h"
+
+#define NUM_COUNTERS 4
+#define NUM_CONTROLS 4
+
+#define CTR_IS_RESERVED(msrs,c) (msrs->counters[(c)].addr ? 1 : 0)
+#define CTR_READ(l,h,msrs,c) do {rdmsr(msrs->counters[(c)].addr, (l), (h));} while (0)
+#define CTR_WRITE(l,msrs,c) do {wrmsr(msrs->counters[(c)].addr, -(unsigned int)(l), -1);} while (0)
+#define CTR_OVERFLOWED(n) (!((n) & (1U<<31)))
+
+#define CTRL_IS_RESERVED(msrs,c) (msrs->controls[(c)].addr ? 1 : 0)
+#define CTRL_READ(l,h,msrs,c) do {rdmsr(msrs->controls[(c)].addr, (l), (h));} while (0)
+#define CTRL_WRITE(l,h,msrs,c) do {wrmsr(msrs->controls[(c)].addr, (l), (h));} while (0)
+#define CTRL_SET_ACTIVE(n) (n |= (1<<22))
+#define CTRL_SET_INACTIVE(n) (n &= ~(1<<22))
+#define CTRL_CLEAR(x) (x &= (1<<21))
+#define CTRL_SET_ENABLE(val) (val |= 1<<20)
+#define CTRL_SET_USR(val,u) (val |= ((u & 1) << 16))
+#define CTRL_SET_KERN(val,k) (val |= ((k & 1) << 17))
+#define CTRL_SET_UM(val, m) (val |= (m << 8))
+#define CTRL_SET_EVENT(val, e) (val |= e)
+
+static unsigned long reset_value[NUM_COUNTERS];
+
+static void athlon_fill_in_addresses(struct op_msrs * const msrs)
+{
+ int i;
+
+ for (i=0; i < NUM_COUNTERS; i++) {
+ if (reserve_perfctr_nmi(MSR_K7_PERFCTR0 + i))
+ msrs->counters[i].addr = MSR_K7_PERFCTR0 + i;
+ else
+ msrs->counters[i].addr = 0;
+ }
+
+ for (i=0; i < NUM_CONTROLS; i++) {
+ if (reserve_evntsel_nmi(MSR_K7_EVNTSEL0 + i))
+ msrs->controls[i].addr = MSR_K7_EVNTSEL0 + i;
+ else
+ msrs->controls[i].addr = 0;
+ }
+}
+
+
+static void athlon_setup_ctrs(struct op_msrs const * const msrs)
+{
+ unsigned int low, high;
+ int i;
+
+ /* clear all counters */
+ for (i = 0 ; i < NUM_CONTROLS; ++i) {
+ if (unlikely(!CTRL_IS_RESERVED(msrs,i)))
+ continue;
+ CTRL_READ(low, high, msrs, i);
+ CTRL_CLEAR(low);
+ CTRL_WRITE(low, high, msrs, i);
+ }
+
+ /* avoid a false detection of ctr overflows in NMI handler */
+ for (i = 0; i < NUM_COUNTERS; ++i) {
+ if (unlikely(!CTR_IS_RESERVED(msrs,i)))
+ continue;
+ CTR_WRITE(1, msrs, i);
+ }
+
+ /* enable active counters */
+ for (i = 0; i < NUM_COUNTERS; ++i) {
+ if ((counter_config[i].enabled) && (CTR_IS_RESERVED(msrs,i))) {
+ reset_value[i] = counter_config[i].count;
+
+ CTR_WRITE(counter_config[i].count, msrs, i);
+
+ CTRL_READ(low, high, msrs, i);
+ CTRL_CLEAR(low);
+ CTRL_SET_ENABLE(low);
+ CTRL_SET_USR(low, counter_config[i].user);
+ CTRL_SET_KERN(low, counter_config[i].kernel);
+ CTRL_SET_UM(low, counter_config[i].unit_mask);
+ CTRL_SET_EVENT(low, counter_config[i].event);
+ CTRL_WRITE(low, high, msrs, i);
+ } else {
+ reset_value[i] = 0;
+ }
+ }
+}
+
+
+static int athlon_check_ctrs(struct pt_regs * const regs,
+ struct op_msrs const * const msrs)
+{
+ unsigned int low, high;
+ int i;
+
+ for (i = 0 ; i < NUM_COUNTERS; ++i) {
+ if (!reset_value[i])
+ continue;
+ CTR_READ(low, high, msrs, i);
+ if (CTR_OVERFLOWED(low)) {
+ oprofile_add_sample(regs, i);
+ CTR_WRITE(reset_value[i], msrs, i);
+ }
+ }
+
+ /* See op_model_ppro.c */
+ return 1;
+}
+
+
+static void athlon_start(struct op_msrs const * const msrs)
+{
+ unsigned int low, high;
+ int i;
+ for (i = 0 ; i < NUM_COUNTERS ; ++i) {
+ if (reset_value[i]) {
+ CTRL_READ(low, high, msrs, i);
+ CTRL_SET_ACTIVE(low);
+ CTRL_WRITE(low, high, msrs, i);
+ }
+ }
+}
+
+
+static void athlon_stop(struct op_msrs const * const msrs)
+{
+ unsigned int low,high;
+ int i;
+
+ /* Subtle: stop on all counters to avoid race with
+ * setting our pm callback */
+ for (i = 0 ; i < NUM_COUNTERS ; ++i) {
+ if (!reset_value[i])
+ continue;
+ CTRL_READ(low, high, msrs, i);
+ CTRL_SET_INACTIVE(low);
+ CTRL_WRITE(low, high, msrs, i);
+ }
+}
+
+static void athlon_shutdown(struct op_msrs const * const msrs)
+{
+ int i;
+
+ for (i = 0 ; i < NUM_COUNTERS ; ++i) {
+ if (CTR_IS_RESERVED(msrs,i))
+ release_perfctr_nmi(MSR_K7_PERFCTR0 + i);
+ }
+ for (i = 0 ; i < NUM_CONTROLS ; ++i) {
+ if (CTRL_IS_RESERVED(msrs,i))
+ release_evntsel_nmi(MSR_K7_EVNTSEL0 + i);
+ }
+}
+
+struct op_x86_model_spec const op_athlon_spec = {
+ .num_counters = NUM_COUNTERS,
+ .num_controls = NUM_CONTROLS,
+ .fill_in_addresses = &athlon_fill_in_addresses,
+ .setup_ctrs = &athlon_setup_ctrs,
+ .check_ctrs = &athlon_check_ctrs,
+ .start = &athlon_start,
+ .stop = &athlon_stop,
+ .shutdown = &athlon_shutdown
+};
--- /dev/null
+/**
+ * @file op_model_p4.c
+ * P4 model-specific MSR operations
+ *
+ * @remark Copyright 2002 OProfile authors
+ * @remark Read the file COPYING
+ *
+ * @author Graydon Hoare
+ */
+
+#include <linux/oprofile.h>
+#include <linux/smp.h>
+#include <asm/msr.h>
+#include <asm/ptrace.h>
+#include <asm/fixmap.h>
+#include <asm/apic.h>
+#include <asm/nmi.h>
+
+#include "op_x86_model.h"
+#include "op_counter.h"
+
+#define NUM_EVENTS 39
+
+#define NUM_COUNTERS_NON_HT 8
+#define NUM_ESCRS_NON_HT 45
+#define NUM_CCCRS_NON_HT 18
+#define NUM_CONTROLS_NON_HT (NUM_ESCRS_NON_HT + NUM_CCCRS_NON_HT)
+
+#define NUM_COUNTERS_HT2 4
+#define NUM_ESCRS_HT2 23
+#define NUM_CCCRS_HT2 9
+#define NUM_CONTROLS_HT2 (NUM_ESCRS_HT2 + NUM_CCCRS_HT2)
+
+static unsigned int num_counters = NUM_COUNTERS_NON_HT;
+static unsigned int num_controls = NUM_CONTROLS_NON_HT;
+
+/* this has to be checked dynamically since the
+ hyper-threadedness of a chip is discovered at
+ kernel boot-time. */
+static inline void setup_num_counters(void)
+{
+#ifdef CONFIG_SMP
+ if (smp_num_siblings == 2){
+ num_counters = NUM_COUNTERS_HT2;
+ num_controls = NUM_CONTROLS_HT2;
+ }
+#endif
+}
+
+static int inline addr_increment(void)
+{
+#ifdef CONFIG_SMP
+ return smp_num_siblings == 2 ? 2 : 1;
+#else
+ return 1;
+#endif
+}
+
+
+/* tables to simulate simplified hardware view of p4 registers */
+struct p4_counter_binding {
+ int virt_counter;
+ int counter_address;
+ int cccr_address;
+};
+
+struct p4_event_binding {
+ int escr_select; /* value to put in CCCR */
+ int event_select; /* value to put in ESCR */
+ struct {
+ int virt_counter; /* for this counter... */
+ int escr_address; /* use this ESCR */
+ } bindings[2];
+};
+
+/* nb: these CTR_* defines are a duplicate of defines in
+ event/i386.p4*events. */
+
+
+#define CTR_BPU_0 (1 << 0)
+#define CTR_MS_0 (1 << 1)
+#define CTR_FLAME_0 (1 << 2)
+#define CTR_IQ_4 (1 << 3)
+#define CTR_BPU_2 (1 << 4)
+#define CTR_MS_2 (1 << 5)
+#define CTR_FLAME_2 (1 << 6)
+#define CTR_IQ_5 (1 << 7)
+
+static struct p4_counter_binding p4_counters [NUM_COUNTERS_NON_HT] = {
+ { CTR_BPU_0, MSR_P4_BPU_PERFCTR0, MSR_P4_BPU_CCCR0 },
+ { CTR_MS_0, MSR_P4_MS_PERFCTR0, MSR_P4_MS_CCCR0 },
+ { CTR_FLAME_0, MSR_P4_FLAME_PERFCTR0, MSR_P4_FLAME_CCCR0 },
+ { CTR_IQ_4, MSR_P4_IQ_PERFCTR4, MSR_P4_IQ_CCCR4 },
+ { CTR_BPU_2, MSR_P4_BPU_PERFCTR2, MSR_P4_BPU_CCCR2 },
+ { CTR_MS_2, MSR_P4_MS_PERFCTR2, MSR_P4_MS_CCCR2 },
+ { CTR_FLAME_2, MSR_P4_FLAME_PERFCTR2, MSR_P4_FLAME_CCCR2 },
+ { CTR_IQ_5, MSR_P4_IQ_PERFCTR5, MSR_P4_IQ_CCCR5 }
+};
+
+#define NUM_UNUSED_CCCRS NUM_CCCRS_NON_HT - NUM_COUNTERS_NON_HT
+
+/* p4 event codes in libop/op_event.h are indices into this table. */
+
+static struct p4_event_binding p4_events[NUM_EVENTS] = {
+
+ { /* BRANCH_RETIRED */
+ 0x05, 0x06,
+ { {CTR_IQ_4, MSR_P4_CRU_ESCR2},
+ {CTR_IQ_5, MSR_P4_CRU_ESCR3} }
+ },
+
+ { /* MISPRED_BRANCH_RETIRED */
+ 0x04, 0x03,
+ { { CTR_IQ_4, MSR_P4_CRU_ESCR0},
+ { CTR_IQ_5, MSR_P4_CRU_ESCR1} }
+ },
+
+ { /* TC_DELIVER_MODE */
+ 0x01, 0x01,
+ { { CTR_MS_0, MSR_P4_TC_ESCR0},
+ { CTR_MS_2, MSR_P4_TC_ESCR1} }
+ },
+
+ { /* BPU_FETCH_REQUEST */
+ 0x00, 0x03,
+ { { CTR_BPU_0, MSR_P4_BPU_ESCR0},
+ { CTR_BPU_2, MSR_P4_BPU_ESCR1} }
+ },
+
+ { /* ITLB_REFERENCE */
+ 0x03, 0x18,
+ { { CTR_BPU_0, MSR_P4_ITLB_ESCR0},
+ { CTR_BPU_2, MSR_P4_ITLB_ESCR1} }
+ },
+
+ { /* MEMORY_CANCEL */
+ 0x05, 0x02,
+ { { CTR_FLAME_0, MSR_P4_DAC_ESCR0},
+ { CTR_FLAME_2, MSR_P4_DAC_ESCR1} }
+ },
+
+ { /* MEMORY_COMPLETE */
+ 0x02, 0x08,
+ { { CTR_FLAME_0, MSR_P4_SAAT_ESCR0},
+ { CTR_FLAME_2, MSR_P4_SAAT_ESCR1} }
+ },
+
+ { /* LOAD_PORT_REPLAY */
+ 0x02, 0x04,
+ { { CTR_FLAME_0, MSR_P4_SAAT_ESCR0},
+ { CTR_FLAME_2, MSR_P4_SAAT_ESCR1} }
+ },
+
+ { /* STORE_PORT_REPLAY */
+ 0x02, 0x05,
+ { { CTR_FLAME_0, MSR_P4_SAAT_ESCR0},
+ { CTR_FLAME_2, MSR_P4_SAAT_ESCR1} }
+ },
+
+ { /* MOB_LOAD_REPLAY */
+ 0x02, 0x03,
+ { { CTR_BPU_0, MSR_P4_MOB_ESCR0},
+ { CTR_BPU_2, MSR_P4_MOB_ESCR1} }
+ },
+
+ { /* PAGE_WALK_TYPE */
+ 0x04, 0x01,
+ { { CTR_BPU_0, MSR_P4_PMH_ESCR0},
+ { CTR_BPU_2, MSR_P4_PMH_ESCR1} }
+ },
+
+ { /* BSQ_CACHE_REFERENCE */
+ 0x07, 0x0c,
+ { { CTR_BPU_0, MSR_P4_BSU_ESCR0},
+ { CTR_BPU_2, MSR_P4_BSU_ESCR1} }
+ },
+
+ { /* IOQ_ALLOCATION */
+ 0x06, 0x03,
+ { { CTR_BPU_0, MSR_P4_FSB_ESCR0},
+ { 0, 0 } }
+ },
+
+ { /* IOQ_ACTIVE_ENTRIES */
+ 0x06, 0x1a,
+ { { CTR_BPU_2, MSR_P4_FSB_ESCR1},
+ { 0, 0 } }
+ },
+
+ { /* FSB_DATA_ACTIVITY */
+ 0x06, 0x17,
+ { { CTR_BPU_0, MSR_P4_FSB_ESCR0},
+ { CTR_BPU_2, MSR_P4_FSB_ESCR1} }
+ },
+
+ { /* BSQ_ALLOCATION */
+ 0x07, 0x05,
+ { { CTR_BPU_0, MSR_P4_BSU_ESCR0},
+ { 0, 0 } }
+ },
+
+ { /* BSQ_ACTIVE_ENTRIES */
+ 0x07, 0x06,
+ { { CTR_BPU_2, MSR_P4_BSU_ESCR1 /* guess */},
+ { 0, 0 } }
+ },
+
+ { /* X87_ASSIST */
+ 0x05, 0x03,
+ { { CTR_IQ_4, MSR_P4_CRU_ESCR2},
+ { CTR_IQ_5, MSR_P4_CRU_ESCR3} }
+ },
+
+ { /* SSE_INPUT_ASSIST */
+ 0x01, 0x34,
+ { { CTR_FLAME_0, MSR_P4_FIRM_ESCR0},
+ { CTR_FLAME_2, MSR_P4_FIRM_ESCR1} }
+ },
+
+ { /* PACKED_SP_UOP */
+ 0x01, 0x08,
+ { { CTR_FLAME_0, MSR_P4_FIRM_ESCR0},
+ { CTR_FLAME_2, MSR_P4_FIRM_ESCR1} }
+ },
+
+ { /* PACKED_DP_UOP */
+ 0x01, 0x0c,
+ { { CTR_FLAME_0, MSR_P4_FIRM_ESCR0},
+ { CTR_FLAME_2, MSR_P4_FIRM_ESCR1} }
+ },
+
+ { /* SCALAR_SP_UOP */
+ 0x01, 0x0a,
+ { { CTR_FLAME_0, MSR_P4_FIRM_ESCR0},
+ { CTR_FLAME_2, MSR_P4_FIRM_ESCR1} }
+ },
+
+ { /* SCALAR_DP_UOP */
+ 0x01, 0x0e,
+ { { CTR_FLAME_0, MSR_P4_FIRM_ESCR0},
+ { CTR_FLAME_2, MSR_P4_FIRM_ESCR1} }
+ },
+
+ { /* 64BIT_MMX_UOP */
+ 0x01, 0x02,
+ { { CTR_FLAME_0, MSR_P4_FIRM_ESCR0},
+ { CTR_FLAME_2, MSR_P4_FIRM_ESCR1} }
+ },
+
+ { /* 128BIT_MMX_UOP */
+ 0x01, 0x1a,
+ { { CTR_FLAME_0, MSR_P4_FIRM_ESCR0},
+ { CTR_FLAME_2, MSR_P4_FIRM_ESCR1} }
+ },
+
+ { /* X87_FP_UOP */
+ 0x01, 0x04,
+ { { CTR_FLAME_0, MSR_P4_FIRM_ESCR0},
+ { CTR_FLAME_2, MSR_P4_FIRM_ESCR1} }
+ },
+
+ { /* X87_SIMD_MOVES_UOP */
+ 0x01, 0x2e,
+ { { CTR_FLAME_0, MSR_P4_FIRM_ESCR0},
+ { CTR_FLAME_2, MSR_P4_FIRM_ESCR1} }
+ },
+
+ { /* MACHINE_CLEAR */
+ 0x05, 0x02,
+ { { CTR_IQ_4, MSR_P4_CRU_ESCR2},
+ { CTR_IQ_5, MSR_P4_CRU_ESCR3} }
+ },
+
+ { /* GLOBAL_POWER_EVENTS */
+ 0x06, 0x13 /* older manual says 0x05, newer 0x13 */,
+ { { CTR_BPU_0, MSR_P4_FSB_ESCR0},
+ { CTR_BPU_2, MSR_P4_FSB_ESCR1} }
+ },
+
+ { /* TC_MS_XFER */
+ 0x00, 0x05,
+ { { CTR_MS_0, MSR_P4_MS_ESCR0},
+ { CTR_MS_2, MSR_P4_MS_ESCR1} }
+ },
+
+ { /* UOP_QUEUE_WRITES */
+ 0x00, 0x09,
+ { { CTR_MS_0, MSR_P4_MS_ESCR0},
+ { CTR_MS_2, MSR_P4_MS_ESCR1} }
+ },
+
+ { /* FRONT_END_EVENT */
+ 0x05, 0x08,
+ { { CTR_IQ_4, MSR_P4_CRU_ESCR2},
+ { CTR_IQ_5, MSR_P4_CRU_ESCR3} }
+ },
+
+ { /* EXECUTION_EVENT */
+ 0x05, 0x0c,
+ { { CTR_IQ_4, MSR_P4_CRU_ESCR2},
+ { CTR_IQ_5, MSR_P4_CRU_ESCR3} }
+ },
+
+ { /* REPLAY_EVENT */
+ 0x05, 0x09,
+ { { CTR_IQ_4, MSR_P4_CRU_ESCR2},
+ { CTR_IQ_5, MSR_P4_CRU_ESCR3} }
+ },
+
+ { /* INSTR_RETIRED */
+ 0x04, 0x02,
+ { { CTR_IQ_4, MSR_P4_CRU_ESCR0},
+ { CTR_IQ_5, MSR_P4_CRU_ESCR1} }
+ },
+
+ { /* UOPS_RETIRED */
+ 0x04, 0x01,
+ { { CTR_IQ_4, MSR_P4_CRU_ESCR0},
+ { CTR_IQ_5, MSR_P4_CRU_ESCR1} }
+ },
+
+ { /* UOP_TYPE */
+ 0x02, 0x02,
+ { { CTR_IQ_4, MSR_P4_RAT_ESCR0},
+ { CTR_IQ_5, MSR_P4_RAT_ESCR1} }
+ },
+
+ { /* RETIRED_MISPRED_BRANCH_TYPE */
+ 0x02, 0x05,
+ { { CTR_MS_0, MSR_P4_TBPU_ESCR0},
+ { CTR_MS_2, MSR_P4_TBPU_ESCR1} }
+ },
+
+ { /* RETIRED_BRANCH_TYPE */
+ 0x02, 0x04,
+ { { CTR_MS_0, MSR_P4_TBPU_ESCR0},
+ { CTR_MS_2, MSR_P4_TBPU_ESCR1} }
+ }
+};
+
+
+#define MISC_PMC_ENABLED_P(x) ((x) & 1 << 7)
+
+#define ESCR_RESERVED_BITS 0x80000003
+#define ESCR_CLEAR(escr) ((escr) &= ESCR_RESERVED_BITS)
+#define ESCR_SET_USR_0(escr, usr) ((escr) |= (((usr) & 1) << 2))
+#define ESCR_SET_OS_0(escr, os) ((escr) |= (((os) & 1) << 3))
+#define ESCR_SET_USR_1(escr, usr) ((escr) |= (((usr) & 1)))
+#define ESCR_SET_OS_1(escr, os) ((escr) |= (((os) & 1) << 1))
+#define ESCR_SET_EVENT_SELECT(escr, sel) ((escr) |= (((sel) & 0x3f) << 25))
+#define ESCR_SET_EVENT_MASK(escr, mask) ((escr) |= (((mask) & 0xffff) << 9))
+#define ESCR_READ(escr,high,ev,i) do {rdmsr(ev->bindings[(i)].escr_address, (escr), (high));} while (0)
+#define ESCR_WRITE(escr,high,ev,i) do {wrmsr(ev->bindings[(i)].escr_address, (escr), (high));} while (0)
+
+#define CCCR_RESERVED_BITS 0x38030FFF
+#define CCCR_CLEAR(cccr) ((cccr) &= CCCR_RESERVED_BITS)
+#define CCCR_SET_REQUIRED_BITS(cccr) ((cccr) |= 0x00030000)
+#define CCCR_SET_ESCR_SELECT(cccr, sel) ((cccr) |= (((sel) & 0x07) << 13))
+#define CCCR_SET_PMI_OVF_0(cccr) ((cccr) |= (1<<26))
+#define CCCR_SET_PMI_OVF_1(cccr) ((cccr) |= (1<<27))
+#define CCCR_SET_ENABLE(cccr) ((cccr) |= (1<<12))
+#define CCCR_SET_DISABLE(cccr) ((cccr) &= ~(1<<12))
+#define CCCR_READ(low, high, i) do {rdmsr(p4_counters[(i)].cccr_address, (low), (high));} while (0)
+#define CCCR_WRITE(low, high, i) do {wrmsr(p4_counters[(i)].cccr_address, (low), (high));} while (0)
+#define CCCR_OVF_P(cccr) ((cccr) & (1U<<31))
+#define CCCR_CLEAR_OVF(cccr) ((cccr) &= (~(1U<<31)))
+
+#define CTRL_IS_RESERVED(msrs,c) (msrs->controls[(c)].addr ? 1 : 0)
+#define CTR_IS_RESERVED(msrs,c) (msrs->counters[(c)].addr ? 1 : 0)
+#define CTR_READ(l,h,i) do {rdmsr(p4_counters[(i)].counter_address, (l), (h));} while (0)
+#define CTR_WRITE(l,i) do {wrmsr(p4_counters[(i)].counter_address, -(u32)(l), -1);} while (0)
+#define CTR_OVERFLOW_P(ctr) (!((ctr) & 0x80000000))
+
+
+/* this assigns a "stagger" to the current CPU, which is used throughout
+ the code in this module as an extra array offset, to select the "even"
+ or "odd" part of all the divided resources. */
+static unsigned int get_stagger(void)
+{
+#ifdef CONFIG_SMP
+ int cpu = smp_processor_id();
+ return (cpu != first_cpu(cpu_sibling_map[cpu]));
+#endif
+ return 0;
+}
+
+
+/* finally, mediate access to a real hardware counter
+ by passing a "virtual" counter numer to this macro,
+ along with your stagger setting. */
+#define VIRT_CTR(stagger, i) ((i) + ((num_counters) * (stagger)))
+
+static unsigned long reset_value[NUM_COUNTERS_NON_HT];
+
+
+static void p4_fill_in_addresses(struct op_msrs * const msrs)
+{
+ unsigned int i;
+ unsigned int addr, cccraddr, stag;
+
+ setup_num_counters();
+ stag = get_stagger();
+
+ /* initialize some registers */
+ for (i = 0; i < num_counters; ++i) {
+ msrs->counters[i].addr = 0;
+ }
+ for (i = 0; i < num_controls; ++i) {
+ msrs->controls[i].addr = 0;
+ }
+
+ /* the counter & cccr registers we pay attention to */
+ for (i = 0; i < num_counters; ++i) {
+ addr = p4_counters[VIRT_CTR(stag, i)].counter_address;
+ cccraddr = p4_counters[VIRT_CTR(stag, i)].cccr_address;
+ if (reserve_perfctr_nmi(addr)){
+ msrs->counters[i].addr = addr;
+ msrs->controls[i].addr = cccraddr;
+ }
+ }
+
+ /* 43 ESCR registers in three or four discontiguous group */
+ for (addr = MSR_P4_BSU_ESCR0 + stag;
+ addr < MSR_P4_IQ_ESCR0; ++i, addr += addr_increment()) {
+ if (reserve_evntsel_nmi(addr))
+ msrs->controls[i].addr = addr;
+ }
+
+ /* no IQ_ESCR0/1 on some models, we save a seconde time BSU_ESCR0/1
+ * to avoid special case in nmi_{save|restore}_registers() */
+ if (boot_cpu_data.x86_model >= 0x3) {
+ for (addr = MSR_P4_BSU_ESCR0 + stag;
+ addr <= MSR_P4_BSU_ESCR1; ++i, addr += addr_increment()) {
+ if (reserve_evntsel_nmi(addr))
+ msrs->controls[i].addr = addr;
+ }
+ } else {
+ for (addr = MSR_P4_IQ_ESCR0 + stag;
+ addr <= MSR_P4_IQ_ESCR1; ++i, addr += addr_increment()) {
+ if (reserve_evntsel_nmi(addr))
+ msrs->controls[i].addr = addr;
+ }
+ }
+
+ for (addr = MSR_P4_RAT_ESCR0 + stag;
+ addr <= MSR_P4_SSU_ESCR0; ++i, addr += addr_increment()) {
+ if (reserve_evntsel_nmi(addr))
+ msrs->controls[i].addr = addr;
+ }
+
+ for (addr = MSR_P4_MS_ESCR0 + stag;
+ addr <= MSR_P4_TC_ESCR1; ++i, addr += addr_increment()) {
+ if (reserve_evntsel_nmi(addr))
+ msrs->controls[i].addr = addr;
+ }
+
+ for (addr = MSR_P4_IX_ESCR0 + stag;
+ addr <= MSR_P4_CRU_ESCR3; ++i, addr += addr_increment()) {
+ if (reserve_evntsel_nmi(addr))
+ msrs->controls[i].addr = addr;
+ }
+
+ /* there are 2 remaining non-contiguously located ESCRs */
+
+ if (num_counters == NUM_COUNTERS_NON_HT) {
+ /* standard non-HT CPUs handle both remaining ESCRs*/
+ if (reserve_evntsel_nmi(MSR_P4_CRU_ESCR5))
+ msrs->controls[i++].addr = MSR_P4_CRU_ESCR5;
+ if (reserve_evntsel_nmi(MSR_P4_CRU_ESCR4))
+ msrs->controls[i++].addr = MSR_P4_CRU_ESCR4;
+
+ } else if (stag == 0) {
+ /* HT CPUs give the first remainder to the even thread, as
+ the 32nd control register */
+ if (reserve_evntsel_nmi(MSR_P4_CRU_ESCR4))
+ msrs->controls[i++].addr = MSR_P4_CRU_ESCR4;
+
+ } else {
+ /* and two copies of the second to the odd thread,
+ for the 22st and 23nd control registers */
+ if (reserve_evntsel_nmi(MSR_P4_CRU_ESCR5)) {
+ msrs->controls[i++].addr = MSR_P4_CRU_ESCR5;
+ msrs->controls[i++].addr = MSR_P4_CRU_ESCR5;
+ }
+ }
+}
+
+
+static void pmc_setup_one_p4_counter(unsigned int ctr)
+{
+ int i;
+ int const maxbind = 2;
+ unsigned int cccr = 0;
+ unsigned int escr = 0;
+ unsigned int high = 0;
+ unsigned int counter_bit;
+ struct p4_event_binding *ev = NULL;
+ unsigned int stag;
+
+ stag = get_stagger();
+
+ /* convert from counter *number* to counter *bit* */
+ counter_bit = 1 << VIRT_CTR(stag, ctr);
+
+ /* find our event binding structure. */
+ if (counter_config[ctr].event <= 0 || counter_config[ctr].event > NUM_EVENTS) {
+ printk(KERN_ERR
+ "oprofile: P4 event code 0x%lx out of range\n",
+ counter_config[ctr].event);
+ return;
+ }
+
+ ev = &(p4_events[counter_config[ctr].event - 1]);
+
+ for (i = 0; i < maxbind; i++) {
+ if (ev->bindings[i].virt_counter & counter_bit) {
+
+ /* modify ESCR */
+ ESCR_READ(escr, high, ev, i);
+ ESCR_CLEAR(escr);
+ if (stag == 0) {
+ ESCR_SET_USR_0(escr, counter_config[ctr].user);
+ ESCR_SET_OS_0(escr, counter_config[ctr].kernel);
+ } else {
+ ESCR_SET_USR_1(escr, counter_config[ctr].user);
+ ESCR_SET_OS_1(escr, counter_config[ctr].kernel);
+ }
+ ESCR_SET_EVENT_SELECT(escr, ev->event_select);
+ ESCR_SET_EVENT_MASK(escr, counter_config[ctr].unit_mask);
+ ESCR_WRITE(escr, high, ev, i);
+
+ /* modify CCCR */
+ CCCR_READ(cccr, high, VIRT_CTR(stag, ctr));
+ CCCR_CLEAR(cccr);
+ CCCR_SET_REQUIRED_BITS(cccr);
+ CCCR_SET_ESCR_SELECT(cccr, ev->escr_select);
+ if (stag == 0) {
+ CCCR_SET_PMI_OVF_0(cccr);
+ } else {
+ CCCR_SET_PMI_OVF_1(cccr);
+ }
+ CCCR_WRITE(cccr, high, VIRT_CTR(stag, ctr));
+ return;
+ }
+ }
+
+ printk(KERN_ERR
+ "oprofile: P4 event code 0x%lx no binding, stag %d ctr %d\n",
+ counter_config[ctr].event, stag, ctr);
+}
+
+
+static void p4_setup_ctrs(struct op_msrs const * const msrs)
+{
+ unsigned int i;
+ unsigned int low, high;
+ unsigned int stag;
+
+ stag = get_stagger();
+
+ rdmsr(MSR_IA32_MISC_ENABLE, low, high);
+ if (! MISC_PMC_ENABLED_P(low)) {
+ printk(KERN_ERR "oprofile: P4 PMC not available\n");
+ return;
+ }
+
+ /* clear the cccrs we will use */
+ for (i = 0 ; i < num_counters ; i++) {
+ if (unlikely(!CTRL_IS_RESERVED(msrs,i)))
+ continue;
+ rdmsr(p4_counters[VIRT_CTR(stag, i)].cccr_address, low, high);
+ CCCR_CLEAR(low);
+ CCCR_SET_REQUIRED_BITS(low);
+ wrmsr(p4_counters[VIRT_CTR(stag, i)].cccr_address, low, high);
+ }
+
+ /* clear all escrs (including those outside our concern) */
+ for (i = num_counters; i < num_controls; i++) {
+ if (unlikely(!CTRL_IS_RESERVED(msrs,i)))
+ continue;
+ wrmsr(msrs->controls[i].addr, 0, 0);
+ }
+
+ /* setup all counters */
+ for (i = 0 ; i < num_counters ; ++i) {
+ if ((counter_config[i].enabled) && (CTRL_IS_RESERVED(msrs,i))) {
+ reset_value[i] = counter_config[i].count;
+ pmc_setup_one_p4_counter(i);
+ CTR_WRITE(counter_config[i].count, VIRT_CTR(stag, i));
+ } else {
+ reset_value[i] = 0;
+ }
+ }
+}
+
+
+static int p4_check_ctrs(struct pt_regs * const regs,
+ struct op_msrs const * const msrs)
+{
+ unsigned long ctr, low, high, stag, real;
+ int i;
+
+ stag = get_stagger();
+
+ for (i = 0; i < num_counters; ++i) {
+
+ if (!reset_value[i])
+ continue;
+
+ /*
+ * there is some eccentricity in the hardware which
+ * requires that we perform 2 extra corrections:
+ *
+ * - check both the CCCR:OVF flag for overflow and the
+ * counter high bit for un-flagged overflows.
+ *
+ * - write the counter back twice to ensure it gets
+ * updated properly.
+ *
+ * the former seems to be related to extra NMIs happening
+ * during the current NMI; the latter is reported as errata
+ * N15 in intel doc 249199-029, pentium 4 specification
+ * update, though their suggested work-around does not
+ * appear to solve the problem.
+ */
+
+ real = VIRT_CTR(stag, i);
+
+ CCCR_READ(low, high, real);
+ CTR_READ(ctr, high, real);
+ if (CCCR_OVF_P(low) || CTR_OVERFLOW_P(ctr)) {
+ oprofile_add_sample(regs, i);
+ CTR_WRITE(reset_value[i], real);
+ CCCR_CLEAR_OVF(low);
+ CCCR_WRITE(low, high, real);
+ CTR_WRITE(reset_value[i], real);
+ }
+ }
+
+ /* P4 quirk: you have to re-unmask the apic vector */
+ apic_write(APIC_LVTPC, apic_read(APIC_LVTPC) & ~APIC_LVT_MASKED);
+
+ /* See op_model_ppro.c */
+ return 1;
+}
+
+
+static void p4_start(struct op_msrs const * const msrs)
+{
+ unsigned int low, high, stag;
+ int i;
+
+ stag = get_stagger();
+
+ for (i = 0; i < num_counters; ++i) {
+ if (!reset_value[i])
+ continue;
+ CCCR_READ(low, high, VIRT_CTR(stag, i));
+ CCCR_SET_ENABLE(low);
+ CCCR_WRITE(low, high, VIRT_CTR(stag, i));
+ }
+}
+
+
+static void p4_stop(struct op_msrs const * const msrs)
+{
+ unsigned int low, high, stag;
+ int i;
+
+ stag = get_stagger();
+
+ for (i = 0; i < num_counters; ++i) {
+ if (!reset_value[i])
+ continue;
+ CCCR_READ(low, high, VIRT_CTR(stag, i));
+ CCCR_SET_DISABLE(low);
+ CCCR_WRITE(low, high, VIRT_CTR(stag, i));
+ }
+}
+
+static void p4_shutdown(struct op_msrs const * const msrs)
+{
+ int i;
+
+ for (i = 0 ; i < num_counters ; ++i) {
+ if (CTR_IS_RESERVED(msrs,i))
+ release_perfctr_nmi(msrs->counters[i].addr);
+ }
+ /* some of the control registers are specially reserved in
+ * conjunction with the counter registers (hence the starting offset).
+ * This saves a few bits.
+ */
+ for (i = num_counters ; i < num_controls ; ++i) {
+ if (CTRL_IS_RESERVED(msrs,i))
+ release_evntsel_nmi(msrs->controls[i].addr);
+ }
+}
+
+
+#ifdef CONFIG_SMP
+struct op_x86_model_spec const op_p4_ht2_spec = {
+ .num_counters = NUM_COUNTERS_HT2,
+ .num_controls = NUM_CONTROLS_HT2,
+ .fill_in_addresses = &p4_fill_in_addresses,
+ .setup_ctrs = &p4_setup_ctrs,
+ .check_ctrs = &p4_check_ctrs,
+ .start = &p4_start,
+ .stop = &p4_stop,
+ .shutdown = &p4_shutdown
+};
+#endif
+
+struct op_x86_model_spec const op_p4_spec = {
+ .num_counters = NUM_COUNTERS_NON_HT,
+ .num_controls = NUM_CONTROLS_NON_HT,
+ .fill_in_addresses = &p4_fill_in_addresses,
+ .setup_ctrs = &p4_setup_ctrs,
+ .check_ctrs = &p4_check_ctrs,
+ .start = &p4_start,
+ .stop = &p4_stop,
+ .shutdown = &p4_shutdown
+};
--- /dev/null
+/**
+ * @file op_model_ppro.h
+ * pentium pro / P6 model-specific MSR operations
+ *
+ * @remark Copyright 2002 OProfile authors
+ * @remark Read the file COPYING
+ *
+ * @author John Levon
+ * @author Philippe Elie
+ * @author Graydon Hoare
+ */
+
+#include <linux/oprofile.h>
+#include <asm/ptrace.h>
+#include <asm/msr.h>
+#include <asm/apic.h>
+#include <asm/nmi.h>
+
+#include "op_x86_model.h"
+#include "op_counter.h"
+
+#define NUM_COUNTERS 2
+#define NUM_CONTROLS 2
+
+#define CTR_IS_RESERVED(msrs,c) (msrs->counters[(c)].addr ? 1 : 0)
+#define CTR_READ(l,h,msrs,c) do {rdmsr(msrs->counters[(c)].addr, (l), (h));} while (0)
+#define CTR_32BIT_WRITE(l,msrs,c) \
+ do {wrmsr(msrs->counters[(c)].addr, -(u32)(l), 0);} while (0)
+#define CTR_OVERFLOWED(n) (!((n) & (1U<<31)))
+
+#define CTRL_IS_RESERVED(msrs,c) (msrs->controls[(c)].addr ? 1 : 0)
+#define CTRL_READ(l,h,msrs,c) do {rdmsr((msrs->controls[(c)].addr), (l), (h));} while (0)
+#define CTRL_WRITE(l,h,msrs,c) do {wrmsr((msrs->controls[(c)].addr), (l), (h));} while (0)
+#define CTRL_SET_ACTIVE(n) (n |= (1<<22))
+#define CTRL_SET_INACTIVE(n) (n &= ~(1<<22))
+#define CTRL_CLEAR(x) (x &= (1<<21))
+#define CTRL_SET_ENABLE(val) (val |= 1<<20)
+#define CTRL_SET_USR(val,u) (val |= ((u & 1) << 16))
+#define CTRL_SET_KERN(val,k) (val |= ((k & 1) << 17))
+#define CTRL_SET_UM(val, m) (val |= (m << 8))
+#define CTRL_SET_EVENT(val, e) (val |= e)
+
+static unsigned long reset_value[NUM_COUNTERS];
+
+static void ppro_fill_in_addresses(struct op_msrs * const msrs)
+{
+ int i;
+
+ for (i=0; i < NUM_COUNTERS; i++) {
+ if (reserve_perfctr_nmi(MSR_P6_PERFCTR0 + i))
+ msrs->counters[i].addr = MSR_P6_PERFCTR0 + i;
+ else
+ msrs->counters[i].addr = 0;
+ }
+
+ for (i=0; i < NUM_CONTROLS; i++) {
+ if (reserve_evntsel_nmi(MSR_P6_EVNTSEL0 + i))
+ msrs->controls[i].addr = MSR_P6_EVNTSEL0 + i;
+ else
+ msrs->controls[i].addr = 0;
+ }
+}
+
+
+static void ppro_setup_ctrs(struct op_msrs const * const msrs)
+{
+ unsigned int low, high;
+ int i;
+
+ /* clear all counters */
+ for (i = 0 ; i < NUM_CONTROLS; ++i) {
+ if (unlikely(!CTRL_IS_RESERVED(msrs,i)))
+ continue;
+ CTRL_READ(low, high, msrs, i);
+ CTRL_CLEAR(low);
+ CTRL_WRITE(low, high, msrs, i);
+ }
+
+ /* avoid a false detection of ctr overflows in NMI handler */
+ for (i = 0; i < NUM_COUNTERS; ++i) {
+ if (unlikely(!CTR_IS_RESERVED(msrs,i)))
+ continue;
+ CTR_32BIT_WRITE(1, msrs, i);
+ }
+
+ /* enable active counters */
+ for (i = 0; i < NUM_COUNTERS; ++i) {
+ if ((counter_config[i].enabled) && (CTR_IS_RESERVED(msrs,i))) {
+ reset_value[i] = counter_config[i].count;
+
+ CTR_32BIT_WRITE(counter_config[i].count, msrs, i);
+
+ CTRL_READ(low, high, msrs, i);
+ CTRL_CLEAR(low);
+ CTRL_SET_ENABLE(low);
+ CTRL_SET_USR(low, counter_config[i].user);
+ CTRL_SET_KERN(low, counter_config[i].kernel);
+ CTRL_SET_UM(low, counter_config[i].unit_mask);
+ CTRL_SET_EVENT(low, counter_config[i].event);
+ CTRL_WRITE(low, high, msrs, i);
+ } else {
+ reset_value[i] = 0;
+ }
+ }
+}
+
+
+static int ppro_check_ctrs(struct pt_regs * const regs,
+ struct op_msrs const * const msrs)
+{
+ unsigned int low, high;
+ int i;
+
+ for (i = 0 ; i < NUM_COUNTERS; ++i) {
+ if (!reset_value[i])
+ continue;
+ CTR_READ(low, high, msrs, i);
+ if (CTR_OVERFLOWED(low)) {
+ oprofile_add_sample(regs, i);
+ CTR_32BIT_WRITE(reset_value[i], msrs, i);
+ }
+ }
+
+ /* Only P6 based Pentium M need to re-unmask the apic vector but it
+ * doesn't hurt other P6 variant */
+ apic_write(APIC_LVTPC, apic_read(APIC_LVTPC) & ~APIC_LVT_MASKED);
+
+ /* We can't work out if we really handled an interrupt. We
+ * might have caught a *second* counter just after overflowing
+ * the interrupt for this counter then arrives
+ * and we don't find a counter that's overflowed, so we
+ * would return 0 and get dazed + confused. Instead we always
+ * assume we found an overflow. This sucks.
+ */
+ return 1;
+}
+
+
+static void ppro_start(struct op_msrs const * const msrs)
+{
+ unsigned int low,high;
+ int i;
+
+ for (i = 0; i < NUM_COUNTERS; ++i) {
+ if (reset_value[i]) {
+ CTRL_READ(low, high, msrs, i);
+ CTRL_SET_ACTIVE(low);
+ CTRL_WRITE(low, high, msrs, i);
+ }
+ }
+}
+
+
+static void ppro_stop(struct op_msrs const * const msrs)
+{
+ unsigned int low,high;
+ int i;
+
+ for (i = 0; i < NUM_COUNTERS; ++i) {
+ if (!reset_value[i])
+ continue;
+ CTRL_READ(low, high, msrs, i);
+ CTRL_SET_INACTIVE(low);
+ CTRL_WRITE(low, high, msrs, i);
+ }
+}
+
+static void ppro_shutdown(struct op_msrs const * const msrs)
+{
+ int i;
+
+ for (i = 0 ; i < NUM_COUNTERS ; ++i) {
+ if (CTR_IS_RESERVED(msrs,i))
+ release_perfctr_nmi(MSR_P6_PERFCTR0 + i);
+ }
+ for (i = 0 ; i < NUM_CONTROLS ; ++i) {
+ if (CTRL_IS_RESERVED(msrs,i))
+ release_evntsel_nmi(MSR_P6_EVNTSEL0 + i);
+ }
+}
+
+
+struct op_x86_model_spec const op_ppro_spec = {
+ .num_counters = NUM_COUNTERS,
+ .num_controls = NUM_CONTROLS,
+ .fill_in_addresses = &ppro_fill_in_addresses,
+ .setup_ctrs = &ppro_setup_ctrs,
+ .check_ctrs = &ppro_check_ctrs,
+ .start = &ppro_start,
+ .stop = &ppro_stop,
+ .shutdown = &ppro_shutdown
+};
--- /dev/null
+/**
+ * @file op_x86_model.h
+ * interface to x86 model-specific MSR operations
+ *
+ * @remark Copyright 2002 OProfile authors
+ * @remark Read the file COPYING
+ *
+ * @author Graydon Hoare
+ */
+
+#ifndef OP_X86_MODEL_H
+#define OP_X86_MODEL_H
+
+struct op_saved_msr {
+ unsigned int high;
+ unsigned int low;
+};
+
+struct op_msr {
+ unsigned long addr;
+ struct op_saved_msr saved;
+};
+
+struct op_msrs {
+ struct op_msr * counters;
+ struct op_msr * controls;
+};
+
+struct pt_regs;
+
+/* The model vtable abstracts the differences between
+ * various x86 CPU model's perfctr support.
+ */
+struct op_x86_model_spec {
+ unsigned int const num_counters;
+ unsigned int const num_controls;
+ void (*fill_in_addresses)(struct op_msrs * const msrs);
+ void (*setup_ctrs)(struct op_msrs const * const msrs);
+ int (*check_ctrs)(struct pt_regs * const regs,
+ struct op_msrs const * const msrs);
+ void (*start)(struct op_msrs const * const msrs);
+ void (*stop)(struct op_msrs const * const msrs);
+ void (*shutdown)(struct op_msrs const * const msrs);
+};
+
+extern struct op_x86_model_spec const op_ppro_spec;
+extern struct op_x86_model_spec const op_p4_spec;
+extern struct op_x86_model_spec const op_p4_ht2_spec;
+extern struct op_x86_model_spec const op_athlon_spec;
+
+#endif /* OP_X86_MODEL_H */
arch/x86_64/vdso/
core-$(CONFIG_IA32_EMULATION) += arch/x86_64/ia32/
drivers-$(CONFIG_PCI) += arch/x86_64/pci/
-drivers-$(CONFIG_OPROFILE) += arch/i386/oprofile/
+drivers-$(CONFIG_OPROFILE) += arch/x86/oprofile/
boot := arch/x86_64/boot