#include <asm/memory.h>
#include <asm/param.h> /* HZ */
+/*
+ * Loop (or tick) based delay:
+ *
+ * loops = loops_per_jiffy * jiffies_per_sec * delay_us / us_per_sec
+ *
+ * where:
+ *
+ * jiffies_per_sec = HZ
+ * us_per_sec = 1000000
+ *
+ * Therefore the constant part is HZ / 1000000 which is a small
+ * fractional number. To make this usable with integer math, we
+ * scale up this constant by 2^31, perform the actual multiplication,
+ * and scale the result back down by 2^31 with a simple shift:
+ *
+ * loops = (loops_per_jiffy * delay_us * UDELAY_MULT) >> 31
+ *
+ * where:
+ *
+ * UDELAY_MULT = 2^31 * HZ / 1000000
+ * = (2^31 / 1000000) * HZ
+ * = 2147.483648 * HZ
+ * = 2147 * HZ + 483648 * HZ / 1000000
+ *
+ * 31 is the biggest scale shift value that won't overflow 32 bits for
+ * delay_us * UDELAY_MULT assuming HZ <= 1000 and delay_us <= 2000.
+ */
#define MAX_UDELAY_MS 2
#define UDELAY_MULT UL(2147 * HZ + 483648 * HZ / 1000000)
#define UDELAY_SHIFT 31
.LC1: .word UDELAY_MULT
/*
+ * loops = r0 * HZ * loops_per_jiffy / 1000000
+ *
* r0 <= 2000
* HZ <= 1000
*/
ENTRY(__loop_udelay)
ldr r2, .LC1
- mul r0, r2, r0
-ENTRY(__loop_const_udelay) @ 0 <= r0 <= 0x7fffff06
+ mul r0, r2, r0 @ r0 = delay_us * UDELAY_MULT
+ENTRY(__loop_const_udelay) @ 0 <= r0 <= 0xfffffaf0
ldr r2, .LC0
ldr r2, [r2]
- umull r1, r0, r2, r0
- adds r1, r1, #0xffffffff
- adcs r0, r0, r0
+ umull r1, r0, r2, r0 @ r0-r1 = r0 * loops_per_jiffy
+ adds r1, r1, #0xffffffff @ rounding up ...
+ adcs r0, r0, r0 @ and right shift by 31
reteq lr
-/*
- * loops = r0 * HZ * loops_per_jiffy / 1000000
- */
.align 3
@ Delay routine