--- /dev/null
+#
+# Makefile for H8/300-specific library files..
+#
+
+lib-y = memcpy.o memset.o abs.o strncpy.o \
+ mulsi3.o udivsi3.o muldi3.o moddivsi3.o \
+ ashldi3.o lshrdi3.o ashrdi3.o ucmpdi2.o \
+ delay.o
--- /dev/null
+;;; abs.S
+
+#include <asm/linkage.h>
+
+#if defined(CONFIG_CPU_H8300H)
+ .h8300h
+#endif
+#if defined(CONFIG_CPU_H8S)
+ .h8300s
+#endif
+ .text
+.global _abs
+
+;;; int abs(int n)
+_abs:
+ mov.l er0,er0
+ bpl 1f
+ neg.l er0
+1:
+ rts
--- /dev/null
+#include "libgcc.h"
+
+DWtype
+__ashldi3(DWtype u, word_type b)
+{
+ const DWunion uu = {.ll = u};
+ const word_type bm = (sizeof (Wtype) * BITS_PER_UNIT) - b;
+ DWunion w;
+
+ if (b == 0)
+ return u;
+
+ if (bm <= 0) {
+ w.s.low = 0;
+ w.s.high = (UWtype) uu.s.low << -bm;
+ } else {
+ const UWtype carries = (UWtype) uu.s.low >> bm;
+
+ w.s.low = (UWtype) uu.s.low << b;
+ w.s.high = ((UWtype) uu.s.high << b) | carries;
+ }
+
+ return w.ll;
+}
--- /dev/null
+#include "libgcc.h"
+
+DWtype __ashrdi3(DWtype u, word_type b)
+{
+ const DWunion uu = {.ll = u};
+ const word_type bm = (sizeof (Wtype) * BITS_PER_UNIT) - b;
+ DWunion w;
+
+ if (b == 0)
+ return u;
+
+ if (bm <= 0) {
+ /* w.s.high = 1..1 or 0..0 */
+ w.s.high = uu.s.high >> (sizeof (Wtype) * BITS_PER_UNIT - 1);
+ w.s.low = uu.s.high >> -bm;
+ } else {
+ const UWtype carries = (UWtype) uu.s.high << bm;
+
+ w.s.high = uu.s.high >> b;
+ w.s.low = ((UWtype) uu.s.low >> b) | carries;
+ }
+
+ return w.ll;
+}
--- /dev/null
+/*
+ * delay loops
+ *
+ * Copyright (C) 2015 Yoshinori Sato
+ */
+
+#include <linux/module.h>
+#include <linux/delay.h>
+#include <asm/param.h>
+#include <asm/processor.h>
+#include <asm/timex.h>
+
+void __delay(unsigned long cycles)
+{
+ __asm__ volatile ("1: dec.l #1,%0\n\t"
+ "bne 1b":"=r"(cycles):"0"(cycles));
+}
+EXPORT_SYMBOL(__delay);
+
+void __const_udelay(unsigned long xloops)
+{
+ u64 loops;
+
+ loops = (u64)xloops * loops_per_jiffy * HZ;
+
+ __delay(loops >> 32);
+}
+EXPORT_SYMBOL(__const_udelay);
+
+void __udelay(unsigned long usecs)
+{
+ __const_udelay(usecs * 0x10C7UL); /* 2**32 / 1000000 (rounded up) */
+}
+EXPORT_SYMBOL(__udelay);
+
+void __ndelay(unsigned long nsecs)
+{
+ __const_udelay(nsecs * 0x5UL); /* 2**32 / 1000000000 (rounded up) */
+}
+EXPORT_SYMBOL(__ndelay);
--- /dev/null
+#ifndef __H8300_LIBGCC_H__
+#define __H8300_LIBGCC_H__
+
+#ifdef __ASSEMBLY__
+#define A0 r0
+#define A0L r0l
+#define A0H r0h
+
+#define A1 r1
+#define A1L r1l
+#define A1H r1h
+
+#define A2 r2
+#define A2L r2l
+#define A2H r2h
+
+#define A3 r3
+#define A3L r3l
+#define A3H r3h
+
+#define S0 r4
+#define S0L r4l
+#define S0H r4h
+
+#define S1 r5
+#define S1L r5l
+#define S1H r5h
+
+#define S2 r6
+#define S2L r6l
+#define S2H r6h
+
+#define PUSHP push.l
+#define POPP pop.l
+
+#define A0P er0
+#define A1P er1
+#define A2P er2
+#define A3P er3
+#define S0P er4
+#define S1P er5
+#define S2P er6
+
+#define A0E e0
+#define A1E e1
+#define A2E e2
+#define A3E e3
+#else
+#define Wtype SItype
+#define UWtype USItype
+#define HWtype SItype
+#define UHWtype USItype
+#define DWtype DItype
+#define UDWtype UDItype
+#define UWtype USItype
+#define Wtype SItype
+#define UWtype USItype
+#define W_TYPE_SIZE (4 * BITS_PER_UNIT)
+#define BITS_PER_UNIT (8)
+
+typedef int SItype __attribute__ ((mode (SI)));
+typedef unsigned int USItype __attribute__ ((mode (SI)));
+typedef int DItype __attribute__ ((mode (DI)));
+typedef unsigned int UDItype __attribute__ ((mode (DI)));
+struct DWstruct {
+ Wtype high, low;
+};
+typedef union {
+ struct DWstruct s;
+ DWtype ll;
+} DWunion;
+
+typedef int word_type __attribute__ ((mode (__word__)));
+
+#endif
+
+#endif
--- /dev/null
+#include "libgcc.h"
+
+DWtype __lshrdi3(DWtype u, word_type b)
+{
+ const DWunion uu = {.ll = u};
+ const word_type bm = (sizeof (Wtype) * BITS_PER_UNIT) - b;
+ DWunion w;
+
+ if (b == 0)
+ return u;
+
+ if (bm <= 0) {
+ w.s.high = 0;
+ w.s.low = (UWtype) uu.s.high >> -bm;
+ } else {
+ const UWtype carries = (UWtype) uu.s.high << bm;
+
+ w.s.high = (UWtype) uu.s.high >> b;
+ w.s.low = ((UWtype) uu.s.low >> b) | carries;
+ }
+
+ return w.ll;
+}
--- /dev/null
+;;; memcpy.S
+
+#include <asm/linkage.h>
+
+#if defined(CONFIG_CPU_H8300H)
+ .h8300h
+#endif
+#if defined(CONFIG_CPU_H8S)
+ .h8300s
+#endif
+ .text
+.global memcpy
+
+;;; void *memcpy(void *to, void *from, size_t n)
+memcpy:
+ mov.l er2,er2
+ bne 1f
+ rts
+1:
+ ;; address check
+ bld #0,r0l
+ bxor #0,r1l
+ bcs 4f
+ mov.l er4,@-sp
+ mov.l er0,@-sp
+ btst #0,r0l
+ beq 1f
+ ;; (aligned even) odd address
+ mov.b @er1,r3l
+ mov.b r3l,@er0
+ adds #1,er1
+ adds #1,er0
+ dec.l #1,er2
+ beq 3f
+1:
+ ;; n < sizeof(unsigned long) check
+ sub.l er4,er4
+ adds #4,er4 ; loop count check value
+ cmp.l er4,er2
+ blo 2f
+ ;; unsigned long copy
+1:
+ mov.l @er1,er3
+ mov.l er3,@er0
+ adds #4,er0
+ adds #4,er1
+ subs #4,er2
+ cmp.l er4,er2
+ bcc 1b
+ ;; rest
+2:
+ mov.l er2,er2
+ beq 3f
+1:
+ mov.b @er1,r3l
+ mov.b r3l,@er0
+ adds #1,er1
+ adds #1,er0
+ dec.l #1,er2
+ bne 1b
+3:
+ mov.l @sp+,er0
+ mov.l @sp+,er4
+ rts
+
+ ;; odd <- even / even <- odd
+4:
+ mov.l er4,er3
+ mov.l er2,er4
+ mov.l er5,er2
+ mov.l er1,er5
+ mov.l er6,er1
+ mov.l er0,er6
+1:
+ eepmov.w
+ mov.w r4,r4
+ bne 1b
+ dec.w #1,e4
+ bpl 1b
+ mov.l er1,er6
+ mov.l er2,er5
+ mov.l er3,er4
+ rts
+
+ .end
--- /dev/null
+/* memset.S */
+
+#include <asm/linkage.h>
+
+#if defined(CONFIG_CPU_H8300H)
+ .h8300h
+#endif
+#if defined(CONFIG_CPU_H8S)
+ .h8300s
+#endif
+ .text
+
+.global memset
+.global clear_user
+
+;;void *memset(*ptr, int c, size_t count)
+;; ptr = er0
+;; c = er1(r1l)
+;; count = er2
+memset:
+ btst #0,r0l
+ beq 2f
+
+ ;; odd address
+1:
+ mov.b r1l,@er0
+ adds #1,er0
+ dec.l #1,er2
+ beq 6f
+
+ ;; even address
+2:
+ mov.l er2,er3
+ cmp.l #4,er2
+ blo 4f
+ ;; count>=4 -> count/4
+#if defined(CONFIG_CPU_H8300H)
+ shlr.l er2
+ shlr.l er2
+#endif
+#if defined(CONFIG_CPU_H8S)
+ shlr.l #2,er2
+#endif
+ ;; byte -> long
+ mov.b r1l,r1h
+ mov.w r1,e1
+3:
+ mov.l er1,@er0
+ adds #4,er0
+ dec.l #1,er2
+ bne 3b
+4:
+ ;; count % 4
+ and.b #3,r3l
+ beq 6f
+5:
+ mov.b r1l,@er0
+ adds #1,er0
+ dec.b r3l
+ bne 5b
+6:
+ rts
+
+clear_user:
+ mov.l er1, er2
+ sub.l er1, er1
+ bra memset
+
+ .end
--- /dev/null
+#include "libgcc.h"
+
+; numerator in A0/A1
+; denominator in A2/A3
+ .global __modsi3
+__modsi3:
+ PUSHP S2P
+ bsr modnorm
+ bsr __divsi3
+ mov.l er3,er0
+ bra exitdiv
+
+ .global __umodsi3
+__umodsi3:
+ bsr __udivsi3:16
+ mov.l er3,er0
+ rts
+
+ .global __divsi3
+__divsi3:
+ PUSHP S2P
+ bsr divnorm
+ bsr __udivsi3:16
+
+ ; examine what the sign should be
+exitdiv:
+ btst #3,S2L
+ beq reti
+
+ ; should be -ve
+ neg.l A0P
+
+reti:
+ POPP S2P
+ rts
+
+divnorm:
+ mov.l A0P,A0P ; is the numerator -ve
+ stc ccr,S2L ; keep the sign in bit 3 of S2L
+ bge postive
+
+ neg.l A0P ; negate arg
+
+postive:
+ mov.l A1P,A1P ; is the denominator -ve
+ bge postive2
+
+ neg.l A1P ; negate arg
+ xor.b #0x08,S2L ; toggle the result sign
+
+postive2:
+ rts
+
+;; Basically the same, except that the sign of the divisor determines
+;; the sign.
+modnorm:
+ mov.l A0P,A0P ; is the numerator -ve
+ stc ccr,S2L ; keep the sign in bit 3 of S2L
+ bge mpostive
+
+ neg.l A0P ; negate arg
+
+mpostive:
+ mov.l A1P,A1P ; is the denominator -ve
+ bge mpostive2
+
+ neg.l A1P ; negate arg
+
+mpostive2:
+ rts
+
+ .end
--- /dev/null
+#include "libgcc.h"
+
+; numerator in A0/A1
+; denominator in A2/A3
+ .global __modsi3
+__modsi3:
+ PUSHP S2P
+ bsr modnorm
+ bsr __divsi3
+ mov.l er3,er0
+ bra exitdiv
+
+ .global __umodsi3
+__umodsi3:
+ bsr __udivsi3
+ mov.l er3,er0
+ rts
+
+ .global __divsi3
+__divsi3:
+ PUSHP S2P
+ jsr divnorm
+ bsr __udivsi3
+
+ ; examine what the sign should be
+exitdiv:
+ btst #3,S2L
+ beq reti
+
+ ; should be -ve
+ neg.l A0P
+
+reti:
+ POPP S2P
+ rts
+
+divnorm:
+ mov.l A0P,A0P ; is the numerator -ve
+ stc ccr,S2L ; keep the sign in bit 3 of S2L
+ bge postive
+
+ neg.l A0P ; negate arg
+
+postive:
+ mov.l A1P,A1P ; is the denominator -ve
+ bge postive2
+
+ neg.l A1P ; negate arg
+ xor.b #0x08,S2L ; toggle the result sign
+
+postive2:
+ rts
+
+;; Basically the same, except that the sign of the divisor determines
+;; the sign.
+modnorm:
+ mov.l A0P,A0P ; is the numerator -ve
+ stc ccr,S2L ; keep the sign in bit 3 of S2L
+ bge mpostive
+
+ neg.l A0P ; negate arg
+
+mpostive:
+ mov.l A1P,A1P ; is the denominator -ve
+ bge mpostive2
+
+ neg.l A1P ; negate arg
+
+mpostive2:
+ rts
+
+ .end
--- /dev/null
+#include "libgcc.h"
+
+#define __ll_B ((UWtype) 1 << (W_TYPE_SIZE / 2))
+#define __ll_lowpart(t) ((UWtype) (t) & (__ll_B - 1))
+#define __ll_highpart(t) ((UWtype) (t) >> (W_TYPE_SIZE / 2))
+
+#define umul_ppmm(w1, w0, u, v) \
+ do { \
+ UWtype __x0, __x1, __x2, __x3; \
+ UHWtype __ul, __vl, __uh, __vh; \
+ __ul = __ll_lowpart(u); \
+ __uh = __ll_highpart(u); \
+ __vl = __ll_lowpart(v); \
+ __vh = __ll_highpart(v); \
+ __x0 = (UWtype) __ul * __vl; \
+ __x1 = (UWtype) __ul * __vh; \
+ __x2 = (UWtype) __uh * __vl; \
+ __x3 = (UWtype) __uh * __vh; \
+ __x1 += __ll_highpart(__x0); \
+ __x1 += __x2; \
+ if (__x1 < __x2) \
+ __x3 += __ll_B; \
+ (w1) = __x3 + __ll_highpart(__x1); \
+ (w0) = __ll_lowpart(__x1) * __ll_B + __ll_lowpart(__x0); \
+ } while (0)
+
+#define __umulsidi3(u, v) ( \
+ { \
+ DWunion __w; \
+ umul_ppmm(__w.s.high, __w.s.low, u, v); \
+ __w.ll; } \
+ )
+
+DWtype __muldi3(DWtype u, DWtype v)
+{
+ const DWunion uu = {.ll = u};
+ const DWunion vv = {.ll = v};
+ DWunion w = {.ll = __umulsidi3(uu.s.low, vv.s.low)};
+
+ w.s.high += ((UWtype) uu.s.low * (UWtype) vv.s.high
+ + (UWtype) uu.s.high * (UWtype) vv.s.low);
+
+ return w.ll;
+}
--- /dev/null
+;
+; mulsi3 for H8/300H - based on Renesas SH implementation
+;
+; by Toshiyasu Morita
+;
+; Old code:
+;
+; 16b * 16b = 372 states (worst case)
+; 32b * 32b = 724 states (worst case)
+;
+; New code:
+;
+; 16b * 16b = 48 states
+; 16b * 32b = 72 states
+; 32b * 32b = 92 states
+;
+
+ .global __mulsi3
+__mulsi3:
+ mov.w r1,r2 ; ( 2 states) b * d
+ mulxu r0,er2 ; (22 states)
+
+ mov.w e0,r3 ; ( 2 states) a * d
+ beq L_skip1 ; ( 4 states)
+ mulxu r1,er3 ; (22 states)
+ add.w r3,e2 ; ( 2 states)
+
+L_skip1:
+ mov.w e1,r3 ; ( 2 states) c * b
+ beq L_skip2 ; ( 4 states)
+ mulxu r0,er3 ; (22 states)
+ add.w r3,e2 ; ( 2 states)
+
+L_skip2:
+ mov.l er2,er0 ; ( 2 states)
+ rts ; (10 states)
+
+ .end
--- /dev/null
+;;; strncpy.S
+
+#include <asm/linkage.h>
+
+ .text
+.global strncpy_from_user
+
+;;; long strncpy_from_user(void *to, void *from, size_t n)
+strncpy_from_user:
+ mov.l er2,er2
+ bne 1f
+ sub.l er0,er0
+ rts
+1:
+ mov.l er4,@-sp
+ sub.l er3,er3
+2:
+ mov.b @er1+,r4l
+ mov.b r4l,@er0
+ adds #1,er0
+ beq 3f
+ inc.l #1,er3
+ dec.l #1,er2
+ bne 2b
+3:
+ dec.l #1,er2
+4:
+ mov.b r4l,@er0
+ adds #1,er0
+ dec.l #1,er2
+ bne 4b
+ mov.l er3,er0
+ mov.l @sp+,er4
+ rts
--- /dev/null
+#include "libgcc.h"
+
+word_type __ucmpdi2(DWtype a, DWtype b)
+{
+ const DWunion au = {.ll = a};
+ const DWunion bu = {.ll = b};
+
+ if ((UWtype) au.s.high < (UWtype) bu.s.high)
+ return 0;
+ else if ((UWtype) au.s.high > (UWtype) bu.s.high)
+ return 2;
+ if ((UWtype) au.s.low < (UWtype) bu.s.low)
+ return 0;
+ else if ((UWtype) au.s.low > (UWtype) bu.s.low)
+ return 2;
+ return 1;
+}
--- /dev/null
+#include "libgcc.h"
+
+ ;; This function also computes the remainder and stores it in er3.
+ .global __udivsi3
+__udivsi3:
+ mov.w A1E,A1E ; denominator top word 0?
+ bne DenHighNonZero
+
+ ; do it the easy way, see page 107 in manual
+ mov.w A0E,A2
+ extu.l A2P
+ divxu.w A1,A2P
+ mov.w A2E,A0E
+ divxu.w A1,A0P
+ mov.w A0E,A3
+ mov.w A2,A0E
+ extu.l A3P
+ rts
+
+ ; er0 = er0 / er1
+ ; er3 = er0 % er1
+ ; trashes er1 er2
+ ; expects er1 >= 2^16
+DenHighNonZero:
+ mov.l er0,er3
+ mov.l er1,er2
+#ifdef CONFIG_CPU_H8300H
+divmod_L21:
+ shlr.l er0
+ shlr.l er2 ; make divisor < 2^16
+ mov.w e2,e2
+ bne divmod_L21
+#else
+ shlr.l #2,er2 ; make divisor < 2^16
+ mov.w e2,e2
+ beq divmod_L22A
+divmod_L21:
+ shlr.l #2,er0
+divmod_L22:
+ shlr.l #2,er2 ; make divisor < 2^16
+ mov.w e2,e2
+ bne divmod_L21
+divmod_L22A:
+ rotxl.w r2
+ bcs divmod_L23
+ shlr.l er0
+ bra divmod_L24
+divmod_L23:
+ rotxr.w r2
+ shlr.l #2,er0
+divmod_L24:
+#endif
+ ;; At this point,
+ ;; er0 contains shifted dividend
+ ;; er1 contains divisor
+ ;; er2 contains shifted divisor
+ ;; er3 contains dividend, later remainder
+ divxu.w r2,er0 ; r0 now contains the approximate quotient (AQ)
+ extu.l er0
+ beq divmod_L25
+ subs #1,er0 ; er0 = AQ - 1
+ mov.w e1,r2
+ mulxu.w r0,er2 ; er2 = upper (AQ - 1) * divisor
+ sub.w r2,e3 ; dividend - 65536 * er2
+ mov.w r1,r2
+ mulxu.w r0,er2 ; compute er3 = remainder (tentative)
+ sub.l er2,er3 ; er3 = dividend - (AQ - 1) * divisor
+divmod_L25:
+ cmp.l er1,er3 ; is divisor < remainder?
+ blo divmod_L26
+ adds #1,er0
+ sub.l er1,er3 ; correct the remainder
+divmod_L26:
+ rts
+
+ .end