h8300: library functions
authorYoshinori Sato <ysato@users.sourceforge.jp>
Tue, 27 Jan 2015 17:48:15 +0000 (02:48 +0900)
committerYoshinori Sato <ysato@users.sourceforge.jp>
Tue, 23 Jun 2015 04:35:54 +0000 (13:35 +0900)
Signed-off-by: Yoshinori Sato <ysato@users.sourceforge.jp>
16 files changed:
arch/h8300/lib/Makefile [new file with mode: 0644]
arch/h8300/lib/abs.S [new file with mode: 0644]
arch/h8300/lib/ashldi3.c [new file with mode: 0644]
arch/h8300/lib/ashrdi3.c [new file with mode: 0644]
arch/h8300/lib/delay.c [new file with mode: 0644]
arch/h8300/lib/libgcc.h [new file with mode: 0644]
arch/h8300/lib/lshrdi3.c [new file with mode: 0644]
arch/h8300/lib/memcpy.S [new file with mode: 0644]
arch/h8300/lib/memset.S [new file with mode: 0644]
arch/h8300/lib/moddivsi3.S [new file with mode: 0644]
arch/h8300/lib/modsi3.S [new file with mode: 0644]
arch/h8300/lib/muldi3.c [new file with mode: 0644]
arch/h8300/lib/mulsi3.S [new file with mode: 0644]
arch/h8300/lib/strncpy.S [new file with mode: 0644]
arch/h8300/lib/ucmpdi2.c [new file with mode: 0644]
arch/h8300/lib/udivsi3.S [new file with mode: 0644]

diff --git a/arch/h8300/lib/Makefile b/arch/h8300/lib/Makefile
new file mode 100644 (file)
index 0000000..28ff560
--- /dev/null
@@ -0,0 +1,8 @@
+#
+# Makefile for H8/300-specific library files..
+#
+
+lib-y  = memcpy.o memset.o abs.o strncpy.o \
+        mulsi3.o udivsi3.o muldi3.o moddivsi3.o \
+        ashldi3.o lshrdi3.o ashrdi3.o ucmpdi2.o \
+        delay.o
diff --git a/arch/h8300/lib/abs.S b/arch/h8300/lib/abs.S
new file mode 100644 (file)
index 0000000..efda749
--- /dev/null
@@ -0,0 +1,20 @@
+;;; abs.S
+
+#include <asm/linkage.h>
+
+#if defined(CONFIG_CPU_H8300H)
+       .h8300h
+#endif
+#if defined(CONFIG_CPU_H8S)
+       .h8300s
+#endif
+       .text
+.global _abs
+
+;;; int abs(int n)
+_abs:
+       mov.l   er0,er0
+       bpl     1f
+       neg.l   er0
+1:
+       rts
diff --git a/arch/h8300/lib/ashldi3.c b/arch/h8300/lib/ashldi3.c
new file mode 100644 (file)
index 0000000..c6aa8ea
--- /dev/null
@@ -0,0 +1,24 @@
+#include "libgcc.h"
+
+DWtype
+__ashldi3(DWtype u, word_type b)
+{
+       const DWunion uu = {.ll = u};
+       const word_type bm = (sizeof (Wtype) * BITS_PER_UNIT) - b;
+       DWunion w;
+
+       if (b == 0)
+               return u;
+
+       if (bm <= 0) {
+               w.s.low = 0;
+               w.s.high = (UWtype) uu.s.low << -bm;
+       } else {
+               const UWtype carries = (UWtype) uu.s.low >> bm;
+
+               w.s.low = (UWtype) uu.s.low << b;
+               w.s.high = ((UWtype) uu.s.high << b) | carries;
+       }
+
+       return w.ll;
+}
diff --git a/arch/h8300/lib/ashrdi3.c b/arch/h8300/lib/ashrdi3.c
new file mode 100644 (file)
index 0000000..070adf9
--- /dev/null
@@ -0,0 +1,24 @@
+#include "libgcc.h"
+
+DWtype __ashrdi3(DWtype u, word_type b)
+{
+       const DWunion uu = {.ll = u};
+       const word_type bm = (sizeof (Wtype) * BITS_PER_UNIT) - b;
+       DWunion w;
+
+       if (b == 0)
+               return u;
+
+       if (bm <= 0) {
+               /* w.s.high = 1..1 or 0..0 */
+               w.s.high = uu.s.high >> (sizeof (Wtype) * BITS_PER_UNIT - 1);
+               w.s.low = uu.s.high >> -bm;
+       } else {
+               const UWtype carries = (UWtype) uu.s.high << bm;
+
+               w.s.high = uu.s.high >> b;
+               w.s.low = ((UWtype) uu.s.low >> b) | carries;
+       }
+
+       return w.ll;
+}
diff --git a/arch/h8300/lib/delay.c b/arch/h8300/lib/delay.c
new file mode 100644 (file)
index 0000000..463f6b3
--- /dev/null
@@ -0,0 +1,40 @@
+/*
+ * delay loops
+ *
+ * Copyright (C) 2015 Yoshinori Sato
+ */
+
+#include <linux/module.h>
+#include <linux/delay.h>
+#include <asm/param.h>
+#include <asm/processor.h>
+#include <asm/timex.h>
+
+void __delay(unsigned long cycles)
+{
+       __asm__ volatile ("1: dec.l #1,%0\n\t"
+                         "bne 1b":"=r"(cycles):"0"(cycles));
+}
+EXPORT_SYMBOL(__delay);
+
+void __const_udelay(unsigned long xloops)
+{
+       u64 loops;
+
+       loops = (u64)xloops * loops_per_jiffy * HZ;
+
+       __delay(loops >> 32);
+}
+EXPORT_SYMBOL(__const_udelay);
+
+void __udelay(unsigned long usecs)
+{
+       __const_udelay(usecs * 0x10C7UL); /* 2**32 / 1000000 (rounded up) */
+}
+EXPORT_SYMBOL(__udelay);
+
+void __ndelay(unsigned long nsecs)
+{
+       __const_udelay(nsecs * 0x5UL); /* 2**32 / 1000000000 (rounded up) */
+}
+EXPORT_SYMBOL(__ndelay);
diff --git a/arch/h8300/lib/libgcc.h b/arch/h8300/lib/libgcc.h
new file mode 100644 (file)
index 0000000..468a8f7
--- /dev/null
@@ -0,0 +1,77 @@
+#ifndef __H8300_LIBGCC_H__
+#define __H8300_LIBGCC_H__
+
+#ifdef __ASSEMBLY__
+#define A0 r0
+#define A0L r0l
+#define A0H r0h
+
+#define A1 r1
+#define A1L r1l
+#define A1H r1h
+
+#define A2 r2
+#define A2L r2l
+#define A2H r2h
+
+#define A3 r3
+#define A3L r3l
+#define A3H r3h
+
+#define S0 r4
+#define S0L r4l
+#define S0H r4h
+
+#define S1 r5
+#define S1L r5l
+#define S1H r5h
+
+#define S2 r6
+#define S2L r6l
+#define S2H r6h
+
+#define PUSHP  push.l
+#define POPP   pop.l
+
+#define A0P    er0
+#define A1P    er1
+#define A2P    er2
+#define A3P    er3
+#define S0P    er4
+#define S1P    er5
+#define S2P    er6
+
+#define A0E    e0
+#define A1E    e1
+#define A2E    e2
+#define A3E    e3
+#else
+#define Wtype   SItype
+#define UWtype  USItype
+#define HWtype  SItype
+#define UHWtype USItype
+#define DWtype  DItype
+#define UDWtype UDItype
+#define UWtype  USItype
+#define Wtype   SItype
+#define UWtype  USItype
+#define W_TYPE_SIZE (4 * BITS_PER_UNIT)
+#define BITS_PER_UNIT (8)
+
+typedef          int SItype     __attribute__ ((mode (SI)));
+typedef unsigned int USItype    __attribute__ ((mode (SI)));
+typedef                 int DItype     __attribute__ ((mode (DI)));
+typedef unsigned int UDItype   __attribute__ ((mode (DI)));
+struct DWstruct {
+       Wtype high, low;
+};
+typedef union {
+       struct DWstruct s;
+       DWtype ll;
+} DWunion;
+
+typedef int word_type __attribute__ ((mode (__word__)));
+
+#endif
+
+#endif
diff --git a/arch/h8300/lib/lshrdi3.c b/arch/h8300/lib/lshrdi3.c
new file mode 100644 (file)
index 0000000..a86bbe3
--- /dev/null
@@ -0,0 +1,23 @@
+#include "libgcc.h"
+
+DWtype __lshrdi3(DWtype u, word_type b)
+{
+       const DWunion uu = {.ll = u};
+       const word_type bm = (sizeof (Wtype) * BITS_PER_UNIT) - b;
+       DWunion w;
+
+       if (b == 0)
+               return u;
+
+       if (bm <= 0) {
+               w.s.high = 0;
+               w.s.low = (UWtype) uu.s.high >> -bm;
+       } else {
+               const UWtype carries = (UWtype) uu.s.high << bm;
+
+               w.s.high = (UWtype) uu.s.high >> b;
+               w.s.low = ((UWtype) uu.s.low >> b) | carries;
+       }
+
+       return w.ll;
+}
diff --git a/arch/h8300/lib/memcpy.S b/arch/h8300/lib/memcpy.S
new file mode 100644 (file)
index 0000000..0c9a51f
--- /dev/null
@@ -0,0 +1,85 @@
+;;; memcpy.S
+
+#include <asm/linkage.h>
+
+#if defined(CONFIG_CPU_H8300H)
+       .h8300h
+#endif
+#if defined(CONFIG_CPU_H8S)
+       .h8300s
+#endif
+       .text
+.global memcpy
+
+;;; void *memcpy(void *to, void *from, size_t n)
+memcpy:
+       mov.l   er2,er2
+       bne     1f
+       rts
+1:
+       ;; address check
+       bld     #0,r0l
+       bxor    #0,r1l
+       bcs     4f
+       mov.l   er4,@-sp
+       mov.l   er0,@-sp
+       btst    #0,r0l
+       beq     1f
+       ;; (aligned even) odd address
+       mov.b   @er1,r3l
+       mov.b   r3l,@er0
+       adds    #1,er1
+       adds    #1,er0
+       dec.l   #1,er2
+       beq     3f
+1:
+       ;; n < sizeof(unsigned long) check
+       sub.l   er4,er4
+       adds    #4,er4          ; loop count check value
+       cmp.l   er4,er2
+       blo     2f
+       ;; unsigned long copy
+1:
+       mov.l   @er1,er3
+       mov.l   er3,@er0
+       adds    #4,er0
+       adds    #4,er1
+       subs    #4,er2
+       cmp.l   er4,er2
+       bcc     1b
+       ;; rest
+2:
+       mov.l   er2,er2
+       beq     3f
+1:
+       mov.b   @er1,r3l
+       mov.b   r3l,@er0
+       adds    #1,er1
+       adds    #1,er0
+       dec.l   #1,er2
+       bne     1b
+3:
+       mov.l   @sp+,er0
+       mov.l   @sp+,er4
+       rts
+
+       ;; odd <- even / even <- odd
+4:
+       mov.l   er4,er3
+       mov.l   er2,er4
+       mov.l   er5,er2
+       mov.l   er1,er5
+       mov.l   er6,er1
+       mov.l   er0,er6
+1:
+       eepmov.w
+       mov.w   r4,r4
+       bne     1b
+       dec.w   #1,e4
+       bpl     1b
+       mov.l   er1,er6
+       mov.l   er2,er5
+       mov.l   er3,er4
+       rts
+
+       .end
diff --git a/arch/h8300/lib/memset.S b/arch/h8300/lib/memset.S
new file mode 100644 (file)
index 0000000..18d4e70
--- /dev/null
@@ -0,0 +1,69 @@
+/* memset.S */
+
+#include <asm/linkage.h>
+
+#if defined(CONFIG_CPU_H8300H)
+       .h8300h
+#endif
+#if defined(CONFIG_CPU_H8S)
+       .h8300s
+#endif
+       .text
+
+.global        memset
+.global        clear_user
+
+;;void *memset(*ptr, int c, size_t count)
+;; ptr = er0
+;; c   = er1(r1l)
+;; count = er2
+memset:
+       btst    #0,r0l
+       beq     2f
+
+       ;; odd address
+1:
+       mov.b   r1l,@er0
+       adds    #1,er0
+       dec.l   #1,er2
+       beq     6f
+
+       ;; even address
+2:
+       mov.l   er2,er3
+       cmp.l   #4,er2
+       blo     4f
+       ;; count>=4 -> count/4
+#if defined(CONFIG_CPU_H8300H)
+       shlr.l  er2
+       shlr.l  er2
+#endif
+#if defined(CONFIG_CPU_H8S)
+       shlr.l  #2,er2
+#endif
+       ;; byte -> long
+       mov.b   r1l,r1h
+       mov.w   r1,e1
+3:
+       mov.l   er1,@er0
+       adds    #4,er0
+       dec.l   #1,er2
+       bne     3b
+4:
+       ;; count % 4
+       and.b   #3,r3l
+       beq     6f
+5:
+       mov.b   r1l,@er0
+       adds    #1,er0
+       dec.b   r3l
+       bne     5b
+6:
+       rts
+
+clear_user:
+       mov.l   er1, er2
+       sub.l   er1, er1
+       bra     memset
+
+       .end
diff --git a/arch/h8300/lib/moddivsi3.S b/arch/h8300/lib/moddivsi3.S
new file mode 100644 (file)
index 0000000..c803129
--- /dev/null
@@ -0,0 +1,72 @@
+#include "libgcc.h"
+
+; numerator in A0/A1
+; denominator in A2/A3
+       .global __modsi3
+__modsi3:
+       PUSHP   S2P
+       bsr     modnorm
+       bsr     __divsi3
+       mov.l   er3,er0
+       bra     exitdiv
+
+       .global __umodsi3
+__umodsi3:
+       bsr     __udivsi3:16
+       mov.l   er3,er0
+       rts
+
+       .global __divsi3
+__divsi3:
+       PUSHP   S2P
+       bsr     divnorm
+       bsr     __udivsi3:16
+
+       ; examine what the sign should be
+exitdiv:
+       btst    #3,S2L
+       beq     reti
+
+       ; should be -ve
+       neg.l   A0P
+
+reti:
+       POPP    S2P
+       rts
+
+divnorm:
+       mov.l   A0P,A0P         ; is the numerator -ve
+       stc     ccr,S2L         ; keep the sign in bit 3 of S2L
+       bge     postive
+
+       neg.l   A0P             ; negate arg
+
+postive:
+       mov.l   A1P,A1P         ; is the denominator -ve
+       bge     postive2
+
+       neg.l   A1P             ; negate arg
+       xor.b   #0x08,S2L       ; toggle the result sign
+
+postive2:
+       rts
+
+;; Basically the same, except that the sign of the divisor determines
+;; the sign.
+modnorm:
+       mov.l   A0P,A0P         ; is the numerator -ve
+       stc     ccr,S2L         ; keep the sign in bit 3 of S2L
+       bge     mpostive
+
+       neg.l   A0P             ; negate arg
+
+mpostive:
+       mov.l   A1P,A1P         ; is the denominator -ve
+       bge     mpostive2
+
+       neg.l   A1P             ; negate arg
+
+mpostive2:
+       rts
+
+       .end
diff --git a/arch/h8300/lib/modsi3.S b/arch/h8300/lib/modsi3.S
new file mode 100644 (file)
index 0000000..68b1dfc
--- /dev/null
@@ -0,0 +1,72 @@
+#include "libgcc.h"
+
+; numerator in A0/A1
+; denominator in A2/A3
+       .global __modsi3
+__modsi3:
+       PUSHP   S2P
+       bsr     modnorm
+       bsr     __divsi3
+       mov.l   er3,er0
+       bra     exitdiv
+
+       .global __umodsi3
+__umodsi3:
+       bsr     __udivsi3
+       mov.l   er3,er0
+       rts
+
+       .global __divsi3
+__divsi3:
+       PUSHP   S2P
+       jsr     divnorm
+       bsr     __udivsi3
+
+       ; examine what the sign should be
+exitdiv:
+       btst    #3,S2L
+       beq     reti
+
+       ; should be -ve
+       neg.l   A0P
+
+reti:
+       POPP    S2P
+       rts
+
+divnorm:
+       mov.l   A0P,A0P         ; is the numerator -ve
+       stc     ccr,S2L         ; keep the sign in bit 3 of S2L
+       bge     postive
+
+       neg.l   A0P             ; negate arg
+
+postive:
+       mov.l   A1P,A1P         ; is the denominator -ve
+       bge     postive2
+
+       neg.l   A1P             ; negate arg
+       xor.b   #0x08,S2L       ; toggle the result sign
+
+postive2:
+       rts
+
+;; Basically the same, except that the sign of the divisor determines
+;; the sign.
+modnorm:
+       mov.l   A0P,A0P         ; is the numerator -ve
+       stc     ccr,S2L         ; keep the sign in bit 3 of S2L
+       bge     mpostive
+
+       neg.l   A0P             ; negate arg
+
+mpostive:
+       mov.l   A1P,A1P         ; is the denominator -ve
+       bge     mpostive2
+
+       neg.l   A1P             ; negate arg
+
+mpostive2:
+       rts
+
+       .end
diff --git a/arch/h8300/lib/muldi3.c b/arch/h8300/lib/muldi3.c
new file mode 100644 (file)
index 0000000..7905122
--- /dev/null
@@ -0,0 +1,44 @@
+#include "libgcc.h"
+
+#define __ll_B ((UWtype) 1 << (W_TYPE_SIZE / 2))
+#define __ll_lowpart(t) ((UWtype) (t) & (__ll_B - 1))
+#define __ll_highpart(t) ((UWtype) (t) >> (W_TYPE_SIZE / 2))
+
+#define umul_ppmm(w1, w0, u, v) \
+       do {                       \
+               UWtype __x0, __x1, __x2, __x3;  \
+               UHWtype __ul, __vl, __uh, __vh; \
+               __ul = __ll_lowpart(u); \
+               __uh = __ll_highpart(u);        \
+               __vl = __ll_lowpart(v); \
+               __vh = __ll_highpart(v);        \
+               __x0 = (UWtype) __ul * __vl;    \
+               __x1 = (UWtype) __ul * __vh;    \
+               __x2 = (UWtype) __uh * __vl;    \
+               __x3 = (UWtype) __uh * __vh;    \
+               __x1 += __ll_highpart(__x0);    \
+               __x1 += __x2;                   \
+               if (__x1 < __x2)                \
+                       __x3 += __ll_B;         \
+               (w1) = __x3 + __ll_highpart(__x1);             \
+               (w0) = __ll_lowpart(__x1) * __ll_B + __ll_lowpart(__x0); \
+       } while (0)
+
+#define __umulsidi3(u, v) (                    \
+               {                               \
+                       DWunion __w;            \
+                       umul_ppmm(__w.s.high, __w.s.low, u, v); \
+                       __w.ll; }                                       \
+               )
+
+DWtype __muldi3(DWtype u, DWtype v)
+{
+       const DWunion uu = {.ll = u};
+       const DWunion vv = {.ll = v};
+       DWunion w = {.ll = __umulsidi3(uu.s.low, vv.s.low)};
+
+       w.s.high += ((UWtype) uu.s.low * (UWtype) vv.s.high
+                    + (UWtype) uu.s.high * (UWtype) vv.s.low);
+
+       return w.ll;
+}
diff --git a/arch/h8300/lib/mulsi3.S b/arch/h8300/lib/mulsi3.S
new file mode 100644 (file)
index 0000000..451f0e0
--- /dev/null
@@ -0,0 +1,38 @@
+;
+; mulsi3 for H8/300H - based on Renesas SH implementation
+;
+; by Toshiyasu Morita
+;
+; Old code:
+;
+; 16b * 16b = 372 states (worst case)
+; 32b * 32b = 724 states (worst case)
+;
+; New code:
+;
+; 16b * 16b =  48 states
+; 16b * 32b =  72 states
+; 32b * 32b =  92 states
+;
+
+       .global __mulsi3
+__mulsi3:
+       mov.w   r1,r2   ; ( 2 states) b * d
+       mulxu   r0,er2  ; (22 states)
+
+       mov.w   e0,r3   ; ( 2 states) a * d
+       beq     L_skip1 ; ( 4 states)
+       mulxu   r1,er3  ; (22 states)
+       add.w   r3,e2   ; ( 2 states)
+
+L_skip1:
+       mov.w   e1,r3   ; ( 2 states) c * b
+       beq     L_skip2 ; ( 4 states)
+       mulxu   r0,er3  ; (22 states)
+       add.w   r3,e2   ; ( 2 states)
+
+L_skip2:
+       mov.l   er2,er0 ; ( 2 states)
+       rts             ; (10 states)
+
+       .end
diff --git a/arch/h8300/lib/strncpy.S b/arch/h8300/lib/strncpy.S
new file mode 100644 (file)
index 0000000..d00396a
--- /dev/null
@@ -0,0 +1,34 @@
+;;; strncpy.S
+
+#include <asm/linkage.h>
+
+       .text
+.global strncpy_from_user
+
+;;; long strncpy_from_user(void *to, void *from, size_t n)
+strncpy_from_user:
+       mov.l   er2,er2
+       bne     1f
+       sub.l   er0,er0
+       rts
+1:
+       mov.l   er4,@-sp
+       sub.l   er3,er3
+2:
+       mov.b   @er1+,r4l
+       mov.b   r4l,@er0
+       adds    #1,er0
+       beq     3f
+       inc.l   #1,er3
+       dec.l   #1,er2
+       bne     2b
+3:
+       dec.l   #1,er2
+4:
+       mov.b   r4l,@er0
+       adds    #1,er0
+       dec.l   #1,er2
+       bne     4b
+       mov.l   er3,er0
+       mov.l   @sp+,er4
+       rts
diff --git a/arch/h8300/lib/ucmpdi2.c b/arch/h8300/lib/ucmpdi2.c
new file mode 100644 (file)
index 0000000..772399d
--- /dev/null
@@ -0,0 +1,17 @@
+#include "libgcc.h"
+
+word_type __ucmpdi2(DWtype a, DWtype b)
+{
+       const DWunion au = {.ll = a};
+       const DWunion bu = {.ll = b};
+
+       if ((UWtype) au.s.high < (UWtype) bu.s.high)
+               return 0;
+       else if ((UWtype) au.s.high > (UWtype) bu.s.high)
+               return 2;
+       if ((UWtype) au.s.low < (UWtype) bu.s.low)
+               return 0;
+       else if ((UWtype) au.s.low > (UWtype) bu.s.low)
+               return 2;
+       return 1;
+}
diff --git a/arch/h8300/lib/udivsi3.S b/arch/h8300/lib/udivsi3.S
new file mode 100644 (file)
index 0000000..bbe6561
--- /dev/null
@@ -0,0 +1,76 @@
+#include "libgcc.h"
+
+       ;; This function also computes the remainder and stores it in er3.
+       .global __udivsi3
+__udivsi3:
+       mov.w   A1E,A1E         ; denominator top word 0?
+       bne     DenHighNonZero
+
+       ; do it the easy way, see page 107 in manual
+       mov.w   A0E,A2
+       extu.l  A2P
+       divxu.w A1,A2P
+       mov.w   A2E,A0E
+       divxu.w A1,A0P
+       mov.w   A0E,A3
+       mov.w   A2,A0E
+       extu.l  A3P
+       rts
+
+       ; er0 = er0 / er1
+       ; er3 = er0 % er1
+       ; trashes er1 er2
+       ; expects er1 >= 2^16
+DenHighNonZero:
+       mov.l   er0,er3
+       mov.l   er1,er2
+#ifdef CONFIG_CPU_H8300H
+divmod_L21:
+       shlr.l  er0
+       shlr.l  er2             ; make divisor < 2^16
+       mov.w   e2,e2
+       bne     divmod_L21
+#else
+       shlr.l  #2,er2          ; make divisor < 2^16
+       mov.w   e2,e2
+       beq     divmod_L22A
+divmod_L21:
+       shlr.l  #2,er0
+divmod_L22:
+       shlr.l  #2,er2          ; make divisor < 2^16
+       mov.w   e2,e2
+       bne     divmod_L21
+divmod_L22A:
+       rotxl.w r2
+       bcs     divmod_L23
+       shlr.l  er0
+       bra     divmod_L24
+divmod_L23:
+       rotxr.w r2
+       shlr.l  #2,er0
+divmod_L24:
+#endif
+       ;; At this point,
+       ;;  er0 contains shifted dividend
+       ;;  er1 contains divisor
+       ;;  er2 contains shifted divisor
+       ;;  er3 contains dividend, later remainder
+       divxu.w r2,er0          ; r0 now contains the approximate quotient (AQ)
+       extu.l  er0
+       beq     divmod_L25
+       subs    #1,er0          ; er0 = AQ - 1
+       mov.w   e1,r2
+       mulxu.w r0,er2          ; er2 = upper (AQ - 1) * divisor
+       sub.w   r2,e3           ; dividend - 65536 * er2
+       mov.w   r1,r2
+       mulxu.w r0,er2          ; compute er3 = remainder (tentative)
+       sub.l   er2,er3         ; er3 = dividend - (AQ - 1) * divisor
+divmod_L25:
+       cmp.l   er1,er3         ; is divisor < remainder?
+       blo     divmod_L26
+       adds    #1,er0
+       sub.l   er1,er3         ; correct the remainder
+divmod_L26:
+       rts
+
+       .end