x86, hwmon: Package Level Thermal/Power: power limit
authorFenghua Yu <fenghua.yu@intel.com>
Fri, 30 Jul 2010 00:13:45 +0000 (17:13 -0700)
committerH. Peter Anvin <hpa@linux.intel.com>
Tue, 3 Aug 2010 22:58:56 +0000 (15:58 -0700)
Power limit notification feature is published in Intel 64 and IA-32
Architectures SDMV Vol 3A 14.5.6 Power Limit Notification.

It is implemented first on Intel Sandy Bridge platform.

The patch handles notification interrupt. Interrupt handler dumps power limit
information in log_buf, logs the event in mce log, and increases the event
counters (core_power_limit and package_power_limit). Upper level applications
could use the data to detect system health or diagnose functionality/performance
issues.

In the future, the event could be handled in a more fancy way.

Signed-off-by: Fenghua Yu <fenghua.yu@intel.com>
LKML-Reference: <1280448826-12004-5-git-send-email-fenghua.yu@intel.com>
Reviewed-by: Len Brown <len.brown@intel.com>
Signed-off-by: H. Peter Anvin <hpa@linux.intel.com>
arch/x86/kernel/cpu/mcheck/therm_throt.c

index d307f9f64c23081c56daba261b62390f83b709cb..c2a8b26d4feacf4ac6b6b022c0a9c590fbbcef85 100644 (file)
 /* How long to wait between reporting thermal events */
 #define CHECK_INTERVAL         (300 * HZ)
 
+#define THERMAL_THROTTLING_EVENT       0
+#define POWER_LIMIT_EVENT              1
+
 /*
- * Current thermal throttling state:
+ * Current thermal event state:
  */
 struct _thermal_state {
-       bool                    is_throttled;
-
+       bool                    new_event;
+       int                     event;
        u64                     next_check;
-       unsigned long           throttle_count;
-       unsigned long           last_throttle_count;
+       unsigned long           count;
+       unsigned long           last_count;
 };
 
 struct thermal_state {
-       struct _thermal_state core;
-       struct _thermal_state package;
+       struct _thermal_state core_throttle;
+       struct _thermal_state core_power_limit;
+       struct _thermal_state package_throttle;
+       struct _thermal_state package_power_limit;
 };
 
 static DEFINE_PER_CPU(struct thermal_state, thermal_state);
@@ -62,9 +67,9 @@ static u32 lvtthmr_init __read_mostly;
                           therm_throt_sysdev_show_##_name,             \
                                   NULL)                                \
 
-#define define_therm_throt_sysdev_show_func(level, name)               \
+#define define_therm_throt_sysdev_show_func(event, name)               \
                                                                        \
-static ssize_t therm_throt_sysdev_show_##level##_##name(               \
+static ssize_t therm_throt_sysdev_show_##event##_##name(               \
                        struct sys_device *dev,                         \
                        struct sysdev_attribute *attr,                  \
                        char *buf)                                      \
@@ -75,7 +80,7 @@ static ssize_t therm_throt_sysdev_show_##level##_##name(              \
        preempt_disable();      /* CPU hotplug */                       \
        if (cpu_online(cpu)) {                                          \
                ret = sprintf(buf, "%lu\n",                             \
-                             per_cpu(thermal_state, cpu).level.name);  \
+                             per_cpu(thermal_state, cpu).event.name);  \
        } else                                                          \
                ret = 0;                                                \
        preempt_enable();                                               \
@@ -83,23 +88,32 @@ static ssize_t therm_throt_sysdev_show_##level##_##name(            \
        return ret;                                                     \
 }
 
-define_therm_throt_sysdev_show_func(core, throttle_count);
+define_therm_throt_sysdev_show_func(core_throttle, count);
 define_therm_throt_sysdev_one_ro(core_throttle_count);
 
-define_therm_throt_sysdev_show_func(package, throttle_count);
+define_therm_throt_sysdev_show_func(core_power_limit, count);
+define_therm_throt_sysdev_one_ro(core_power_limit_count);
+
+define_therm_throt_sysdev_show_func(package_throttle, count);
 define_therm_throt_sysdev_one_ro(package_throttle_count);
 
+define_therm_throt_sysdev_show_func(package_power_limit, count);
+define_therm_throt_sysdev_one_ro(package_power_limit_count);
+
 static struct attribute *thermal_throttle_attrs[] = {
        &attr_core_throttle_count.attr,
        NULL
 };
 
-static struct attribute_group thermal_throttle_attr_group = {
+static struct attribute_group thermal_attr_group = {
        .attrs  = thermal_throttle_attrs,
        .name   = "thermal_throttle"
 };
 #endif /* CONFIG_SYSFS */
 
+#define CORE_LEVEL     0
+#define PACKAGE_LEVEL  1
+
 /***
  * therm_throt_process - Process thermal throttling event from interrupt
  * @curr: Whether the condition is current or not (boolean), since the
@@ -116,49 +130,70 @@ static struct attribute_group thermal_throttle_attr_group = {
  *          1 : Event should be logged further, and a message has been
  *              printed to the syslog.
  */
-#define CORE_LEVEL     0
-#define PACKAGE_LEVEL  1
-static int therm_throt_process(bool is_throttled, int level)
+static int therm_throt_process(bool new_event, int event, int level)
 {
        struct _thermal_state *state;
-       unsigned int this_cpu;
-       bool was_throttled;
+       unsigned int this_cpu = smp_processor_id();
+       bool old_event;
        u64 now;
+       struct thermal_state *pstate = &per_cpu(thermal_state, this_cpu);
 
-       this_cpu = smp_processor_id();
        now = get_jiffies_64();
-       if (level == CORE_LEVEL)
-               state = &per_cpu(thermal_state, this_cpu).core;
-       else
-               state = &per_cpu(thermal_state, this_cpu).package;
+       if (level == CORE_LEVEL) {
+               if (event == THERMAL_THROTTLING_EVENT)
+                       state = &pstate->core_throttle;
+               else if (event == POWER_LIMIT_EVENT)
+                       state = &pstate->core_power_limit;
+               else
+                        return 0;
+       } else if (level == PACKAGE_LEVEL) {
+               if (event == THERMAL_THROTTLING_EVENT)
+                       state = &pstate->package_throttle;
+               else if (event == POWER_LIMIT_EVENT)
+                       state = &pstate->package_power_limit;
+               else
+                       return 0;
+       } else
+               return 0;
 
-       was_throttled = state->is_throttled;
-       state->is_throttled = is_throttled;
+       old_event = state->new_event;
+       state->new_event = new_event;
 
-       if (is_throttled)
-               state->throttle_count++;
+       if (new_event)
+               state->count++;
 
        if (time_before64(now, state->next_check) &&
-                       state->throttle_count != state->last_throttle_count)
+                       state->count != state->last_count)
                return 0;
 
        state->next_check = now + CHECK_INTERVAL;
-       state->last_throttle_count = state->throttle_count;
+       state->last_count = state->count;
 
        /* if we just entered the thermal event */
-       if (is_throttled) {
-               printk(KERN_CRIT "CPU%d: %s temperature above threshold, cpu clock throttled (total events = %lu)\n",
-                     this_cpu,
-                     level == CORE_LEVEL ? "Core" : "Package",
-                     state->throttle_count);
+       if (new_event) {
+               if (event == THERMAL_THROTTLING_EVENT)
+                       printk(KERN_CRIT "CPU%d: %s temperature above threshold, cpu clock throttled (total events = %lu)\n",
+                               this_cpu,
+                               level == CORE_LEVEL ? "Core" : "Package",
+                               state->count);
+               else
+                       printk(KERN_CRIT "CPU%d: %s power limit notification (total events = %lu)\n",
+                               this_cpu,
+                               level == CORE_LEVEL ? "Core" : "Package",
+                               state->count);
 
                add_taint(TAINT_MACHINE_CHECK);
                return 1;
        }
-       if (was_throttled) {
-               printk(KERN_INFO "CPU%d: %s temperature/speed normal\n",
-                      this_cpu,
-                      level == CORE_LEVEL ? "Core" : "Package");
+       if (old_event) {
+               if (event == THERMAL_THROTTLING_EVENT)
+                       printk(KERN_INFO "CPU%d: %s temperature/speed normal\n",
+                               this_cpu,
+                               level == CORE_LEVEL ? "Core" : "Package");
+               else
+                       printk(KERN_INFO "CPU%d: %s power limit normal\n",
+                               this_cpu,
+                               level == CORE_LEVEL ? "Core" : "Package");
                return 1;
        }
 
@@ -172,21 +207,29 @@ static __cpuinit int thermal_throttle_add_dev(struct sys_device *sys_dev)
        int err;
        struct cpuinfo_x86 *c = &cpu_data(smp_processor_id());
 
-       err = sysfs_create_group(&sys_dev->kobj, &thermal_throttle_attr_group);
+       err = sysfs_create_group(&sys_dev->kobj, &thermal_attr_group);
        if (err)
                return err;
 
+       if (cpu_has(c, X86_FEATURE_PLN))
+               err = sysfs_add_file_to_group(&sys_dev->kobj,
+                                             &attr_core_power_limit_count.attr,
+                                             thermal_attr_group.name);
        if (cpu_has(c, X86_FEATURE_PTS))
                err = sysfs_add_file_to_group(&sys_dev->kobj,
                                              &attr_package_throttle_count.attr,
-                                             thermal_throttle_attr_group.name);
+                                             thermal_attr_group.name);
+               if (cpu_has(c, X86_FEATURE_PLN))
+                       err = sysfs_add_file_to_group(&sys_dev->kobj,
+                                       &attr_package_power_limit_count.attr,
+                                       thermal_attr_group.name);
 
        return err;
 }
 
 static __cpuinit void thermal_throttle_remove_dev(struct sys_device *sys_dev)
 {
-       sysfs_remove_group(&sys_dev->kobj, &thermal_throttle_attr_group);
+       sysfs_remove_group(&sys_dev->kobj, &thermal_attr_group);
 }
 
 /* Mutex protecting device creation against CPU hotplug: */
@@ -257,6 +300,17 @@ device_initcall(thermal_throttle_init_device);
 
 #endif /* CONFIG_SYSFS */
 
+/*
+ * Set up the most two significant bit to notify mce log that this thermal
+ * event type.
+ * This is a temp solution. May be changed in the future with mce log
+ * infrasture.
+ */
+#define CORE_THROTTLED         (0)
+#define CORE_POWER_LIMIT       ((__u64)1 << 62)
+#define PACKAGE_THROTTLED      ((__u64)2 << 62)
+#define PACKAGE_POWER_LIMIT    ((__u64)3 << 62)
+
 /* Thermal transition interrupt handler */
 static void intel_thermal_interrupt(void)
 {
@@ -264,21 +318,31 @@ static void intel_thermal_interrupt(void)
        struct cpuinfo_x86 *c = &cpu_data(smp_processor_id());
 
        rdmsrl(MSR_IA32_THERM_STATUS, msr_val);
+
        if (therm_throt_process(msr_val & THERM_STATUS_PROCHOT,
+                               THERMAL_THROTTLING_EVENT,
                                CORE_LEVEL) != 0)
-               mce_log_therm_throt_event(msr_val);
+               mce_log_therm_throt_event(CORE_THROTTLED | msr_val);
+
+       if (cpu_has(c, X86_FEATURE_PLN))
+               if (therm_throt_process(msr_val & THERM_STATUS_POWER_LIMIT,
+                                       POWER_LIMIT_EVENT,
+                                       CORE_LEVEL) != 0)
+                       mce_log_therm_throt_event(CORE_POWER_LIMIT | msr_val);
 
        if (cpu_has(c, X86_FEATURE_PTS)) {
                rdmsrl(MSR_IA32_PACKAGE_THERM_STATUS, msr_val);
                if (therm_throt_process(msr_val & PACKAGE_THERM_STATUS_PROCHOT,
+                                       THERMAL_THROTTLING_EVENT,
                                        PACKAGE_LEVEL) != 0)
-                       /*
-                        * Set up the most significant bit to notify mce log
-                        * that this thermal event is a package level event.
-                        * This is a temp solution. May be changed in the future
-                        * with mce log infrasture.
-                        */
-                       mce_log_therm_throt_event(((__u64)1 << 63) | msr_val);
+                       mce_log_therm_throt_event(PACKAGE_THROTTLED | msr_val);
+               if (cpu_has(c, X86_FEATURE_PLN))
+                       if (therm_throt_process(msr_val &
+                                       PACKAGE_THERM_STATUS_POWER_LIMIT,
+                                       POWER_LIMIT_EVENT,
+                                       PACKAGE_LEVEL) != 0)
+                               mce_log_therm_throt_event(PACKAGE_POWER_LIMIT
+                                                         | msr_val);
        }
 }
 
@@ -381,14 +445,25 @@ void intel_init_thermal(struct cpuinfo_x86 *c)
        apic_write(APIC_LVTTHMR, h);
 
        rdmsr(MSR_IA32_THERM_INTERRUPT, l, h);
-       wrmsr(MSR_IA32_THERM_INTERRUPT,
-               l | (THERM_INT_LOW_ENABLE | THERM_INT_HIGH_ENABLE), h);
+       if (cpu_has(c, X86_FEATURE_PLN))
+               wrmsr(MSR_IA32_THERM_INTERRUPT,
+                     l | (THERM_INT_LOW_ENABLE
+                       | THERM_INT_HIGH_ENABLE | THERM_INT_PLN_ENABLE), h);
+       else
+               wrmsr(MSR_IA32_THERM_INTERRUPT,
+                     l | (THERM_INT_LOW_ENABLE | THERM_INT_HIGH_ENABLE), h);
 
        if (cpu_has(c, X86_FEATURE_PTS)) {
                rdmsr(MSR_IA32_PACKAGE_THERM_INTERRUPT, l, h);
-               wrmsr(MSR_IA32_PACKAGE_THERM_INTERRUPT,
-                       l | (PACKAGE_THERM_INT_LOW_ENABLE
-                 | PACKAGE_THERM_INT_HIGH_ENABLE), h);
+               if (cpu_has(c, X86_FEATURE_PLN))
+                       wrmsr(MSR_IA32_PACKAGE_THERM_INTERRUPT,
+                             l | (PACKAGE_THERM_INT_LOW_ENABLE
+                               | PACKAGE_THERM_INT_HIGH_ENABLE
+                               | PACKAGE_THERM_INT_PLN_ENABLE), h);
+               else
+                       wrmsr(MSR_IA32_PACKAGE_THERM_INTERRUPT,
+                             l | (PACKAGE_THERM_INT_LOW_ENABLE
+                               | PACKAGE_THERM_INT_HIGH_ENABLE), h);
        }
 
        smp_thermal_vector = intel_thermal_interrupt;