rtc-opal: Fix handling of firmware error codes, prevent busy loops
authorStewart Smith <stewart@linux.vnet.ibm.com>
Tue, 2 Aug 2016 01:50:16 +0000 (11:50 +1000)
committerGreg Kroah-Hartman <gregkh@linuxfoundation.org>
Thu, 22 Feb 2018 14:42:25 +0000 (15:42 +0100)
commit 5b8b58063029f02da573120ef4dc9079822e3cda upstream.

According to the OPAL docs:
  skiboot-5.2.5/doc/opal-api/opal-rtc-read-3.txt
  skiboot-5.2.5/doc/opal-api/opal-rtc-write-4.txt

OPAL_HARDWARE may be returned from OPAL_RTC_READ or OPAL_RTC_WRITE and
this indicates either a transient or permanent error.

Prior to this patch, Linux was not dealing with OPAL_HARDWARE being a
permanent error particularly well, in that you could end up in a busy
loop.

This was not too hard to trigger on an AMI BMC based OpenPOWER machine
doing a continuous "ipmitool mc reset cold" to the BMC, the result of
that being that we'd get stuck in an infinite loop in
opal_get_rtc_time().

We now retry a few times before returning the error higher up the
stack.

Fixes: 16b1d26e77b1 ("rtc/tpo: Driver to support rtc and wakeup on PowerNV platform")
Cc: stable@vger.kernel.org # v3.19+
Signed-off-by: Stewart Smith <stewart@linux.vnet.ibm.com>
Signed-off-by: Michael Ellerman <mpe@ellerman.id.au>
Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
drivers/rtc/rtc-opal.c

index e2a946c0e667e150e1029728134c6d20baf4d076..304e891e35fcb060a8ff26f78122fa3c63a1eb87 100644 (file)
@@ -58,6 +58,7 @@ static void tm_to_opal(struct rtc_time *tm, u32 *y_m_d, u64 *h_m_s_ms)
 static int opal_get_rtc_time(struct device *dev, struct rtc_time *tm)
 {
        long rc = OPAL_BUSY;
+       int retries = 10;
        u32 y_m_d;
        u64 h_m_s_ms;
        __be32 __y_m_d;
@@ -67,8 +68,11 @@ static int opal_get_rtc_time(struct device *dev, struct rtc_time *tm)
                rc = opal_rtc_read(&__y_m_d, &__h_m_s_ms);
                if (rc == OPAL_BUSY_EVENT)
                        opal_poll_events(NULL);
-               else
+               else if (retries-- && (rc == OPAL_HARDWARE
+                                      || rc == OPAL_INTERNAL_ERROR))
                        msleep(10);
+               else if (rc != OPAL_BUSY && rc != OPAL_BUSY_EVENT)
+                       break;
        }
 
        if (rc != OPAL_SUCCESS)
@@ -84,6 +88,7 @@ static int opal_get_rtc_time(struct device *dev, struct rtc_time *tm)
 static int opal_set_rtc_time(struct device *dev, struct rtc_time *tm)
 {
        long rc = OPAL_BUSY;
+       int retries = 10;
        u32 y_m_d = 0;
        u64 h_m_s_ms = 0;
 
@@ -92,8 +97,11 @@ static int opal_set_rtc_time(struct device *dev, struct rtc_time *tm)
                rc = opal_rtc_write(y_m_d, h_m_s_ms);
                if (rc == OPAL_BUSY_EVENT)
                        opal_poll_events(NULL);
-               else
+               else if (retries-- && (rc == OPAL_HARDWARE
+                                      || rc == OPAL_INTERNAL_ERROR))
                        msleep(10);
+               else if (rc != OPAL_BUSY && rc != OPAL_BUSY_EVENT)
+                       break;
        }
 
        return rc == OPAL_SUCCESS ? 0 : -EIO;