From: Paul Mundt Date: Thu, 8 Nov 2007 09:51:33 +0000 (+0900) Subject: sh: Move arch/sh64/lib to arch/sh/lib64. X-Git-Url: https://git.stricted.de/?a=commitdiff_plain;h=dd730b8ff8adfdf8d39cd060acc7223f0d374d32;p=GitHub%2FLineageOS%2Fandroid_kernel_motorola_exynos9610.git sh: Move arch/sh64/lib to arch/sh/lib64. Signed-off-by: Paul Mundt --- diff --git a/arch/sh/lib64/.gitignore b/arch/sh/lib64/.gitignore new file mode 100644 index 000000000000..3508c2cb23c4 --- /dev/null +++ b/arch/sh/lib64/.gitignore @@ -0,0 +1 @@ +syscalltab.h diff --git a/arch/sh/lib64/Makefile b/arch/sh/lib64/Makefile new file mode 100644 index 000000000000..6a4cc3f9c0b1 --- /dev/null +++ b/arch/sh/lib64/Makefile @@ -0,0 +1,19 @@ +# +# This file is subject to the terms and conditions of the GNU General Public +# License. See the file "COPYING" in the main directory of this archive +# for more details. +# +# Copyright (C) 2000, 2001 Paolo Alberelli +# Coprygith (C) 2003 Paul Mundt +# +# Makefile for the SH-5 specific library files.. +# +# Note! Dependencies are done automagically by 'make dep', which also +# removes any old dependencies. DON'T put your own dependencies here +# unless it's something special (ie not a .c file). +# + +# Panic should really be compiled as PIC +lib-y := udelay.o c-checksum.o dbg.o io.o panic.o memcpy.o copy_user_memcpy.o \ + page_copy.o page_clear.o iomap.o + diff --git a/arch/sh/lib64/c-checksum.c b/arch/sh/lib64/c-checksum.c new file mode 100644 index 000000000000..053137abd8a0 --- /dev/null +++ b/arch/sh/lib64/c-checksum.c @@ -0,0 +1,217 @@ +/* + * arch/sh64/lib/c-checksum.c + * + * This file contains network checksum routines that are better done + * in an architecture-specific manner due to speed.. + */ + +#undef DEBUG + +#include +#include +#include +#include +#include +#include + +static inline unsigned short from64to16(unsigned long long x) +{ + /* add up 32-bit words for 33 bits */ + x = (x & 0xffffffff) + (x >> 32); + /* add up 16-bit and 17-bit words for 17+c bits */ + x = (x & 0xffff) + (x >> 16); + /* add up 16-bit and 2-bit for 16+c bit */ + x = (x & 0xffff) + (x >> 16); + /* add up carry.. */ + x = (x & 0xffff) + (x >> 16); + return x; +} + +static inline unsigned short foldto16(unsigned long x) +{ + /* add up 16-bit for 17 bits */ + x = (x & 0xffff) + (x >> 16); + /* add up carry.. */ + x = (x & 0xffff) + (x >> 16); + return x; +} + +static inline unsigned short myfoldto16(unsigned long long x) +{ + /* Fold down to 32-bits so we don't loose in the typedef-less + network stack. */ + /* 64 to 33 */ + x = (x & 0xffffffff) + (x >> 32); + /* 33 to 32 */ + x = (x & 0xffffffff) + (x >> 32); + + /* add up 16-bit for 17 bits */ + x = (x & 0xffff) + (x >> 16); + /* add up carry.. */ + x = (x & 0xffff) + (x >> 16); + return x; +} + +#define odd(x) ((x)&1) +#define U16(x) ntohs(x) + +static unsigned long do_csum(const unsigned char *buff, int len) +{ + int odd, count; + unsigned long result = 0; + + pr_debug("do_csum buff %p, len %d (0x%x)\n", buff, len, len); +#ifdef DEBUG + for (i = 0; i < len; i++) { + if ((i % 26) == 0) + printk("\n"); + printk("%02X ", buff[i]); + } +#endif + + if (len <= 0) + goto out; + + odd = 1 & (unsigned long) buff; + if (odd) { + result = *buff << 8; + len--; + buff++; + } + count = len >> 1; /* nr of 16-bit words.. */ + if (count) { + if (2 & (unsigned long) buff) { + result += *(unsigned short *) buff; + count--; + len -= 2; + buff += 2; + } + count >>= 1; /* nr of 32-bit words.. */ + if (count) { + unsigned long carry = 0; + do { + unsigned long w = *(unsigned long *) buff; + buff += 4; + count--; + result += carry; + result += w; + carry = (w > result); + } while (count); + result += carry; + result = (result & 0xffff) + (result >> 16); + } + if (len & 2) { + result += *(unsigned short *) buff; + buff += 2; + } + } + if (len & 1) + result += *buff; + result = foldto16(result); + if (odd) + result = ((result >> 8) & 0xff) | ((result & 0xff) << 8); + + pr_debug("\nCHECKSUM is 0x%lx\n", result); + + out: + return result; +} + +/* computes the checksum of a memory block at buff, length len, + and adds in "sum" (32-bit) */ +__wsum csum_partial(const void *buff, int len, __wsum sum) +{ + unsigned long long result = do_csum(buff, len); + + /* add in old sum, and carry.. */ + result += (__force u32)sum; + /* 32+c bits -> 32 bits */ + result = (result & 0xffffffff) + (result >> 32); + + pr_debug("csum_partial, buff %p len %d sum 0x%x result=0x%016Lx\n", + buff, len, sum, result); + + return (__force __wsum)result; +} + +/* Copy while checksumming, otherwise like csum_partial. */ +__wsum +csum_partial_copy_nocheck(const void *src, void *dst, int len, __wsum sum) +{ + sum = csum_partial(src, len, sum); + memcpy(dst, src, len); + + return sum; +} + +/* Copy from userspace and compute checksum. If we catch an exception + then zero the rest of the buffer. */ +__wsum +csum_partial_copy_from_user(const void __user *src, void *dst, int len, + __wsum sum, int *err_ptr) +{ + int missing; + + pr_debug + ("csum_partial_copy_from_user src %p, dest %p, len %d, sum %08x, err_ptr %p\n", + src, dst, len, sum, err_ptr); + missing = copy_from_user(dst, src, len); + pr_debug(" access_ok %d\n", __access_ok((unsigned long) src, len)); + pr_debug(" missing %d\n", missing); + if (missing) { + memset(dst + len - missing, 0, missing); + *err_ptr = -EFAULT; + } + + return csum_partial(dst, len, sum); +} + +/* Copy to userspace and compute checksum. */ +__wsum +csum_partial_copy_to_user(const unsigned char *src, unsigned char *dst, int len, + __wsum sum, int *err_ptr) +{ + sum = csum_partial(src, len, sum); + + if (copy_to_user(dst, src, len)) + *err_ptr = -EFAULT; + + return sum; +} + +/* + * This is a version of ip_compute_csum() optimized for IP headers, + * which always checksum on 4 octet boundaries. + */ +__sum16 ip_fast_csum(const void *iph, unsigned int ihl) +{ + pr_debug("ip_fast_csum %p,%d\n", iph, ihl); + + return (__force __sum16)~do_csum(iph, ihl * 4); +} + +__wsum csum_tcpudp_nofold(__be32 saddr, __be32 daddr, + unsigned short len, + unsigned short proto, __wsum sum) +{ + unsigned long long result; + + pr_debug("ntohs(0x%x)=0x%x\n", 0xdead, ntohs(0xdead)); + pr_debug("htons(0x%x)=0x%x\n", 0xdead, htons(0xdead)); + + result = (__force u64) saddr + (__force u64) daddr + + (__force u64) sum + ((len + proto) << 8); + + /* Fold down to 32-bits so we don't loose in the typedef-less + network stack. */ + /* 64 to 33 */ + result = (result & 0xffffffff) + (result >> 32); + /* 33 to 32 */ + result = (result & 0xffffffff) + (result >> 32); + + pr_debug("%s saddr %x daddr %x len %x proto %x sum %x result %08Lx\n", + __FUNCTION__, saddr, daddr, len, proto, sum, result); + + return (__wsum)result; +} +EXPORT_SYMBOL(csum_tcpudp_nofold); diff --git a/arch/sh/lib64/copy_user_memcpy.S b/arch/sh/lib64/copy_user_memcpy.S new file mode 100644 index 000000000000..2a62816d2ddd --- /dev/null +++ b/arch/sh/lib64/copy_user_memcpy.S @@ -0,0 +1,217 @@ +! +! Fast SH memcpy +! +! by Toshiyasu Morita (tm@netcom.com) +! hacked by J"orn Rernnecke (joern.rennecke@superh.com) ("o for o-umlaut) +! SH5 code Copyright 2002 SuperH Ltd. +! +! Entry: ARG0: destination pointer +! ARG1: source pointer +! ARG2: byte count +! +! Exit: RESULT: destination pointer +! any other registers in the range r0-r7: trashed +! +! Notes: Usually one wants to do small reads and write a longword, but +! unfortunately it is difficult in some cases to concatanate bytes +! into a longword on the SH, so this does a longword read and small +! writes. +! +! This implementation makes two assumptions about how it is called: +! +! 1.: If the byte count is nonzero, the address of the last byte to be +! copied is unsigned greater than the address of the first byte to +! be copied. This could be easily swapped for a signed comparison, +! but the algorithm used needs some comparison. +! +! 2.: When there are two or three bytes in the last word of an 11-or-more +! bytes memory chunk to b copied, the rest of the word can be read +! without side effects. +! This could be easily changed by increasing the minumum size of +! a fast memcpy and the amount subtracted from r7 before L_2l_loop be 2, +! however, this would cost a few extra cyles on average. +! For SHmedia, the assumption is that any quadword can be read in its +! enirety if at least one byte is included in the copy. + +/* Imported into Linux kernel by Richard Curnow. This is used to implement the + __copy_user function in the general case, so it has to be a distinct + function from intra-kernel memcpy to allow for exception fix-ups in the + event that the user pointer is bad somewhere in the copy (e.g. due to + running off the end of the vma). + + Note, this algorithm will be slightly wasteful in the case where the source + and destination pointers are equally aligned, because the stlo/sthi pairs + could then be merged back into single stores. If there are a lot of cache + misses, this is probably offset by the stall lengths on the preloads. + +*/ + +/* NOTE : Prefetches removed and allocos guarded by synco to avoid TAKum03020 + * erratum. The first two prefetches are nop-ed out to avoid upsetting the + * instruction counts used in the jump address calculation. + * */ + + .section .text..SHmedia32,"ax" + .little + .balign 32 + .global copy_user_memcpy + .global copy_user_memcpy_end +copy_user_memcpy: + +#define LDUAQ(P,O,D0,D1) ldlo.q P,O,D0; ldhi.q P,O+7,D1 +#define STUAQ(P,O,D0,D1) stlo.q P,O,D0; sthi.q P,O+7,D1 +#define LDUAL(P,O,D0,D1) ldlo.l P,O,D0; ldhi.l P,O+3,D1 +#define STUAL(P,O,D0,D1) stlo.l P,O,D0; sthi.l P,O+3,D1 + + nop ! ld.b r3,0,r63 ! TAKum03020 + pta/l Large,tr0 + movi 25,r0 + bgeu/u r4,r0,tr0 + nsb r4,r0 + shlli r0,5,r0 + movi (L1-L0+63*32 + 1) & 0xffff,r1 + sub r1, r0, r0 +L0: ptrel r0,tr0 + add r2,r4,r5 + ptabs r18,tr1 + add r3,r4,r6 + blink tr0,r63 + +/* Rearranged to make cut2 safe */ + .balign 8 +L4_7: /* 4..7 byte memcpy cntd. */ + stlo.l r2, 0, r0 + or r6, r7, r6 + sthi.l r5, -1, r6 + stlo.l r5, -4, r6 + blink tr1,r63 + + .balign 8 +L1: /* 0 byte memcpy */ + nop + blink tr1,r63 + nop + nop + nop + nop + +L2_3: /* 2 or 3 byte memcpy cntd. */ + st.b r5,-1,r6 + blink tr1,r63 + + /* 1 byte memcpy */ + ld.b r3,0,r0 + st.b r2,0,r0 + blink tr1,r63 + +L8_15: /* 8..15 byte memcpy cntd. */ + stlo.q r2, 0, r0 + or r6, r7, r6 + sthi.q r5, -1, r6 + stlo.q r5, -8, r6 + blink tr1,r63 + + /* 2 or 3 byte memcpy */ + ld.b r3,0,r0 + nop ! ld.b r2,0,r63 ! TAKum03020 + ld.b r3,1,r1 + st.b r2,0,r0 + pta/l L2_3,tr0 + ld.b r6,-1,r6 + st.b r2,1,r1 + blink tr0, r63 + + /* 4 .. 7 byte memcpy */ + LDUAL (r3, 0, r0, r1) + pta L4_7, tr0 + ldlo.l r6, -4, r7 + or r0, r1, r0 + sthi.l r2, 3, r0 + ldhi.l r6, -1, r6 + blink tr0, r63 + + /* 8 .. 15 byte memcpy */ + LDUAQ (r3, 0, r0, r1) + pta L8_15, tr0 + ldlo.q r6, -8, r7 + or r0, r1, r0 + sthi.q r2, 7, r0 + ldhi.q r6, -1, r6 + blink tr0, r63 + + /* 16 .. 24 byte memcpy */ + LDUAQ (r3, 0, r0, r1) + LDUAQ (r3, 8, r8, r9) + or r0, r1, r0 + sthi.q r2, 7, r0 + or r8, r9, r8 + sthi.q r2, 15, r8 + ldlo.q r6, -8, r7 + ldhi.q r6, -1, r6 + stlo.q r2, 8, r8 + stlo.q r2, 0, r0 + or r6, r7, r6 + sthi.q r5, -1, r6 + stlo.q r5, -8, r6 + blink tr1,r63 + +Large: + ! ld.b r2, 0, r63 ! TAKum03020 + pta/l Loop_ua, tr1 + ori r3, -8, r7 + sub r2, r7, r22 + sub r3, r2, r6 + add r2, r4, r5 + ldlo.q r3, 0, r0 + addi r5, -16, r5 + movi 64+8, r27 ! could subtract r7 from that. + stlo.q r2, 0, r0 + sthi.q r2, 7, r0 + ldx.q r22, r6, r0 + bgtu/l r27, r4, tr1 + + addi r5, -48, r27 + pta/l Loop_line, tr0 + addi r6, 64, r36 + addi r6, -24, r19 + addi r6, -16, r20 + addi r6, -8, r21 + +Loop_line: + ! ldx.q r22, r36, r63 ! TAKum03020 + alloco r22, 32 + synco + addi r22, 32, r22 + ldx.q r22, r19, r23 + sthi.q r22, -25, r0 + ldx.q r22, r20, r24 + ldx.q r22, r21, r25 + stlo.q r22, -32, r0 + ldx.q r22, r6, r0 + sthi.q r22, -17, r23 + sthi.q r22, -9, r24 + sthi.q r22, -1, r25 + stlo.q r22, -24, r23 + stlo.q r22, -16, r24 + stlo.q r22, -8, r25 + bgeu r27, r22, tr0 + +Loop_ua: + addi r22, 8, r22 + sthi.q r22, -1, r0 + stlo.q r22, -8, r0 + ldx.q r22, r6, r0 + bgtu/l r5, r22, tr1 + + add r3, r4, r7 + ldlo.q r7, -8, r1 + sthi.q r22, 7, r0 + ldhi.q r7, -1, r7 + ptabs r18,tr1 + stlo.q r22, 0, r0 + or r1, r7, r1 + sthi.q r5, 15, r1 + stlo.q r5, 8, r1 + blink tr1, r63 +copy_user_memcpy_end: + nop diff --git a/arch/sh/lib64/dbg.c b/arch/sh/lib64/dbg.c new file mode 100644 index 000000000000..97816e0baf19 --- /dev/null +++ b/arch/sh/lib64/dbg.c @@ -0,0 +1,430 @@ +/*-------------------------------------------------------------------------- +-- +-- Identity : Linux50 Debug Funcions +-- +-- File : arch/sh64/lib/dbg.C +-- +-- Copyright 2000, 2001 STMicroelectronics Limited. +-- Copyright 2004 Richard Curnow (evt_debug etc) +-- +--------------------------------------------------------------------------*/ +#include +#include +#include +#include +#include +#include + +typedef u64 regType_t; + +static regType_t getConfigReg(u64 id) +{ + register u64 reg __asm__("r2"); + asm volatile ("getcfg %1, 0, %0":"=r" (reg):"r"(id)); + return (reg); +} + +/* ======================================================================= */ + +static char *szTab[] = { "4k", "64k", "1M", "512M" }; +static char *protTab[] = { "----", + "---R", + "--X-", + "--XR", + "-W--", + "-W-R", + "-WX-", + "-WXR", + "U---", + "U--R", + "U-X-", + "U-XR", + "UW--", + "UW-R", + "UWX-", + "UWXR" +}; +#define ITLB_BASE 0x00000000 +#define DTLB_BASE 0x00800000 +#define MAX_TLBs 64 +/* PTE High */ +#define GET_VALID(pte) ((pte) & 0x1) +#define GET_SHARED(pte) ((pte) & 0x2) +#define GET_ASID(pte) ((pte >> 2) & 0x0ff) +#define GET_EPN(pte) ((pte) & 0xfffff000) + +/* PTE Low */ +#define GET_CBEHAVIOR(pte) ((pte) & 0x3) +#define GET_PAGE_SIZE(pte) szTab[((pte >> 3) & 0x3)] +#define GET_PROTECTION(pte) protTab[((pte >> 6) & 0xf)] +#define GET_PPN(pte) ((pte) & 0xfffff000) + +#define PAGE_1K_MASK 0x00000000 +#define PAGE_4K_MASK 0x00000010 +#define PAGE_64K_MASK 0x00000080 +#define MMU_PAGESIZE_MASK (PAGE_64K_MASK | PAGE_4K_MASK) +#define PAGE_1MB_MASK MMU_PAGESIZE_MASK +#define PAGE_1K (1024) +#define PAGE_4K (1024 * 4) +#define PAGE_64K (1024 * 64) +#define PAGE_1MB (1024 * 1024) + +#define HOW_TO_READ_TLB_CONTENT \ + "[ ID] PPN EPN ASID Share CB P.Size PROT.\n" + +void print_single_tlb(unsigned long tlb, int single_print) +{ + regType_t pteH; + regType_t pteL; + unsigned int valid, shared, asid, epn, cb, ppn; + char *pSize; + char *pProt; + + /* + ** in case of single print is true, this implies: + ** 1) print the TLB in any case also if NOT VALID + ** 2) print out the header + */ + + pteH = getConfigReg(tlb); + valid = GET_VALID(pteH); + if (single_print) + printk(HOW_TO_READ_TLB_CONTENT); + else if (!valid) + return; + + pteL = getConfigReg(tlb + 1); + + shared = GET_SHARED(pteH); + asid = GET_ASID(pteH); + epn = GET_EPN(pteH); + cb = GET_CBEHAVIOR(pteL); + pSize = GET_PAGE_SIZE(pteL); + pProt = GET_PROTECTION(pteL); + ppn = GET_PPN(pteL); + printk("[%c%2ld] 0x%08x 0x%08x %03d %02x %02x %4s %s\n", + ((valid) ? ' ' : 'u'), ((tlb & 0x0ffff) / TLB_STEP), + ppn, epn, asid, shared, cb, pSize, pProt); +} + +void print_dtlb(void) +{ + int count; + unsigned long tlb; + + printk(" ================= SH-5 D-TLBs Status ===================\n"); + printk(HOW_TO_READ_TLB_CONTENT); + tlb = DTLB_BASE; + for (count = 0; count < MAX_TLBs; count++, tlb += TLB_STEP) + print_single_tlb(tlb, 0); + printk + (" =============================================================\n"); +} + +void print_itlb(void) +{ + int count; + unsigned long tlb; + + printk(" ================= SH-5 I-TLBs Status ===================\n"); + printk(HOW_TO_READ_TLB_CONTENT); + tlb = ITLB_BASE; + for (count = 0; count < MAX_TLBs; count++, tlb += TLB_STEP) + print_single_tlb(tlb, 0); + printk + (" =============================================================\n"); +} + +/* ======================================================================= */ + +#ifdef CONFIG_POOR_MANS_STRACE + +#include "syscalltab.h" + +struct ring_node { + int evt; + int ret_addr; + int event; + int tra; + int pid; + unsigned long sp; + unsigned long pc; +}; + +static struct ring_node event_ring[16]; +static int event_ptr = 0; + +struct stored_syscall_data { + int pid; + int syscall_number; +}; + +#define N_STORED_SYSCALLS 16 + +static struct stored_syscall_data stored_syscalls[N_STORED_SYSCALLS]; +static int syscall_next=0; +static int syscall_next_print=0; + +void evt_debug(int evt, int ret_addr, int event, int tra, struct pt_regs *regs) +{ + int syscallno = tra & 0xff; + unsigned long sp; + unsigned long stack_bottom; + int pid; + struct ring_node *rr; + + pid = current->pid; + stack_bottom = (unsigned long) task_stack_page(current); + asm volatile("ori r15, 0, %0" : "=r" (sp)); + rr = event_ring + event_ptr; + rr->evt = evt; + rr->ret_addr = ret_addr; + rr->event = event; + rr->tra = tra; + rr->pid = pid; + rr->sp = sp; + rr->pc = regs->pc; + + if (sp < stack_bottom + 3092) { + printk("evt_debug : stack underflow report\n"); + int i, j; + for (j=0, i = event_ptr; j<16; j++) { + rr = event_ring + i; + printk("evt=%08x event=%08x tra=%08x pid=%5d sp=%08lx pc=%08lx\n", + rr->evt, rr->event, rr->tra, rr->pid, rr->sp, rr->pc); + i--; + i &= 15; + } + panic("STACK UNDERFLOW\n"); + } + + event_ptr = (event_ptr + 1) & 15; + + if ((event == 2) && (evt == 0x160)) { + if (syscallno < NUM_SYSCALL_INFO_ENTRIES) { + /* Store the syscall information to print later. We + * can't print this now - currently we're running with + * SR.BL=1, so we can't take a tlbmiss (which could occur + * in the console drivers under printk). + * + * Just overwrite old entries on ring overflow - this + * is only for last-hope debugging. */ + stored_syscalls[syscall_next].pid = current->pid; + stored_syscalls[syscall_next].syscall_number = syscallno; + syscall_next++; + syscall_next &= (N_STORED_SYSCALLS - 1); + } + } +} + +static void drain_syscalls(void) { + while (syscall_next_print != syscall_next) { + printk("Task %d: %s()\n", + stored_syscalls[syscall_next_print].pid, + syscall_info_table[stored_syscalls[syscall_next_print].syscall_number].name); + syscall_next_print++; + syscall_next_print &= (N_STORED_SYSCALLS - 1); + } +} + +void evt_debug2(unsigned int ret) +{ + drain_syscalls(); + printk("Task %d: syscall returns %08x\n", current->pid, ret); +} + +void evt_debug_ret_from_irq(struct pt_regs *regs) +{ + int pid; + struct ring_node *rr; + + pid = current->pid; + rr = event_ring + event_ptr; + rr->evt = 0xffff; + rr->ret_addr = 0; + rr->event = 0; + rr->tra = 0; + rr->pid = pid; + rr->pc = regs->pc; + event_ptr = (event_ptr + 1) & 15; +} + +void evt_debug_ret_from_exc(struct pt_regs *regs) +{ + int pid; + struct ring_node *rr; + + pid = current->pid; + rr = event_ring + event_ptr; + rr->evt = 0xfffe; + rr->ret_addr = 0; + rr->event = 0; + rr->tra = 0; + rr->pid = pid; + rr->pc = regs->pc; + event_ptr = (event_ptr + 1) & 15; +} + +#endif /* CONFIG_POOR_MANS_STRACE */ + +/* ======================================================================= */ + +void show_excp_regs(char *from, int trapnr, int signr, struct pt_regs *regs) +{ + + unsigned long long ah, al, bh, bl, ch, cl; + + printk("\n"); + printk("EXCEPTION - %s: task %d; Linux trap # %d; signal = %d\n", + ((from) ? from : "???"), current->pid, trapnr, signr); + + asm volatile ("getcon " __EXPEVT ", %0":"=r"(ah)); + asm volatile ("getcon " __EXPEVT ", %0":"=r"(al)); + ah = (ah) >> 32; + al = (al) & 0xffffffff; + asm volatile ("getcon " __KCR1 ", %0":"=r"(bh)); + asm volatile ("getcon " __KCR1 ", %0":"=r"(bl)); + bh = (bh) >> 32; + bl = (bl) & 0xffffffff; + asm volatile ("getcon " __INTEVT ", %0":"=r"(ch)); + asm volatile ("getcon " __INTEVT ", %0":"=r"(cl)); + ch = (ch) >> 32; + cl = (cl) & 0xffffffff; + printk("EXPE: %08Lx%08Lx KCR1: %08Lx%08Lx INTE: %08Lx%08Lx\n", + ah, al, bh, bl, ch, cl); + + asm volatile ("getcon " __PEXPEVT ", %0":"=r"(ah)); + asm volatile ("getcon " __PEXPEVT ", %0":"=r"(al)); + ah = (ah) >> 32; + al = (al) & 0xffffffff; + asm volatile ("getcon " __PSPC ", %0":"=r"(bh)); + asm volatile ("getcon " __PSPC ", %0":"=r"(bl)); + bh = (bh) >> 32; + bl = (bl) & 0xffffffff; + asm volatile ("getcon " __PSSR ", %0":"=r"(ch)); + asm volatile ("getcon " __PSSR ", %0":"=r"(cl)); + ch = (ch) >> 32; + cl = (cl) & 0xffffffff; + printk("PEXP: %08Lx%08Lx PSPC: %08Lx%08Lx PSSR: %08Lx%08Lx\n", + ah, al, bh, bl, ch, cl); + + ah = (regs->pc) >> 32; + al = (regs->pc) & 0xffffffff; + bh = (regs->regs[18]) >> 32; + bl = (regs->regs[18]) & 0xffffffff; + ch = (regs->regs[15]) >> 32; + cl = (regs->regs[15]) & 0xffffffff; + printk("PC : %08Lx%08Lx LINK: %08Lx%08Lx SP : %08Lx%08Lx\n", + ah, al, bh, bl, ch, cl); + + ah = (regs->sr) >> 32; + al = (regs->sr) & 0xffffffff; + asm volatile ("getcon " __TEA ", %0":"=r"(bh)); + asm volatile ("getcon " __TEA ", %0":"=r"(bl)); + bh = (bh) >> 32; + bl = (bl) & 0xffffffff; + asm volatile ("getcon " __KCR0 ", %0":"=r"(ch)); + asm volatile ("getcon " __KCR0 ", %0":"=r"(cl)); + ch = (ch) >> 32; + cl = (cl) & 0xffffffff; + printk("SR : %08Lx%08Lx TEA : %08Lx%08Lx KCR0: %08Lx%08Lx\n", + ah, al, bh, bl, ch, cl); + + ah = (regs->regs[0]) >> 32; + al = (regs->regs[0]) & 0xffffffff; + bh = (regs->regs[1]) >> 32; + bl = (regs->regs[1]) & 0xffffffff; + ch = (regs->regs[2]) >> 32; + cl = (regs->regs[2]) & 0xffffffff; + printk("R0 : %08Lx%08Lx R1 : %08Lx%08Lx R2 : %08Lx%08Lx\n", + ah, al, bh, bl, ch, cl); + + ah = (regs->regs[3]) >> 32; + al = (regs->regs[3]) & 0xffffffff; + bh = (regs->regs[4]) >> 32; + bl = (regs->regs[4]) & 0xffffffff; + ch = (regs->regs[5]) >> 32; + cl = (regs->regs[5]) & 0xffffffff; + printk("R3 : %08Lx%08Lx R4 : %08Lx%08Lx R5 : %08Lx%08Lx\n", + ah, al, bh, bl, ch, cl); + + ah = (regs->regs[6]) >> 32; + al = (regs->regs[6]) & 0xffffffff; + bh = (regs->regs[7]) >> 32; + bl = (regs->regs[7]) & 0xffffffff; + ch = (regs->regs[8]) >> 32; + cl = (regs->regs[8]) & 0xffffffff; + printk("R6 : %08Lx%08Lx R7 : %08Lx%08Lx R8 : %08Lx%08Lx\n", + ah, al, bh, bl, ch, cl); + + ah = (regs->regs[9]) >> 32; + al = (regs->regs[9]) & 0xffffffff; + bh = (regs->regs[10]) >> 32; + bl = (regs->regs[10]) & 0xffffffff; + ch = (regs->regs[11]) >> 32; + cl = (regs->regs[11]) & 0xffffffff; + printk("R9 : %08Lx%08Lx R10 : %08Lx%08Lx R11 : %08Lx%08Lx\n", + ah, al, bh, bl, ch, cl); + printk("....\n"); + + ah = (regs->tregs[0]) >> 32; + al = (regs->tregs[0]) & 0xffffffff; + bh = (regs->tregs[1]) >> 32; + bl = (regs->tregs[1]) & 0xffffffff; + ch = (regs->tregs[2]) >> 32; + cl = (regs->tregs[2]) & 0xffffffff; + printk("T0 : %08Lx%08Lx T1 : %08Lx%08Lx T2 : %08Lx%08Lx\n", + ah, al, bh, bl, ch, cl); + printk("....\n"); + + print_dtlb(); + print_itlb(); +} + +/* ======================================================================= */ + +/* +** Depending on scan the MMU, Data or Instruction side +** looking for a valid mapping matching Eaddr & asid. +** Return -1 if not found or the TLB id entry otherwise. +** Note: it works only for 4k pages! +*/ +static unsigned long +lookup_mmu_side(unsigned long base, unsigned long Eaddr, unsigned long asid) +{ + regType_t pteH; + unsigned long epn; + int count; + + epn = Eaddr & 0xfffff000; + + for (count = 0; count < MAX_TLBs; count++, base += TLB_STEP) { + pteH = getConfigReg(base); + if (GET_VALID(pteH)) + if ((unsigned long) GET_EPN(pteH) == epn) + if ((unsigned long) GET_ASID(pteH) == asid) + break; + } + return ((unsigned long) ((count < MAX_TLBs) ? base : -1)); +} + +unsigned long lookup_dtlb(unsigned long Eaddr) +{ + unsigned long asid = get_asid(); + return (lookup_mmu_side((u64) DTLB_BASE, Eaddr, asid)); +} + +unsigned long lookup_itlb(unsigned long Eaddr) +{ + unsigned long asid = get_asid(); + return (lookup_mmu_side((u64) ITLB_BASE, Eaddr, asid)); +} + +void print_page(struct page *page) +{ + printk(" page[%p] -> index 0x%lx, count 0x%x, flags 0x%lx\n", + page, page->index, page_count(page), page->flags); + printk(" address_space = %p, pages =%ld\n", page->mapping, + page->mapping->nrpages); + +} diff --git a/arch/sh/lib64/io.c b/arch/sh/lib64/io.c new file mode 100644 index 000000000000..a3f3a2b8e25b --- /dev/null +++ b/arch/sh/lib64/io.c @@ -0,0 +1,128 @@ +/* + * Copyright (C) 2000 David J. Mckay (david.mckay@st.com) + * + * May be copied or modified under the terms of the GNU General Public + * License. See linux/COPYING for more information. + * + * This file contains the I/O routines for use on the overdrive board + * + */ + +#include +#include +#include +#include +#include +#include +#include + +/* Now for the string version of these functions */ +void outsb(unsigned long port, const void *addr, unsigned long count) +{ + int i; + unsigned char *p = (unsigned char *) addr; + + for (i = 0; i < count; i++, p++) { + outb(*p, port); + } +} +EXPORT_SYMBOL(outsb); + +void insb(unsigned long port, void *addr, unsigned long count) +{ + int i; + unsigned char *p = (unsigned char *) addr; + + for (i = 0; i < count; i++, p++) { + *p = inb(port); + } +} +EXPORT_SYMBOL(insb); + +/* For the 16 and 32 bit string functions, we have to worry about alignment. + * The SH does not do unaligned accesses, so we have to read as bytes and + * then write as a word or dword. + * This can be optimised a lot more, especially in the case where the data + * is aligned + */ + +void outsw(unsigned long port, const void *addr, unsigned long count) +{ + int i; + unsigned short tmp; + unsigned char *p = (unsigned char *) addr; + + for (i = 0; i < count; i++, p += 2) { + tmp = (*p) | ((*(p + 1)) << 8); + outw(tmp, port); + } +} +EXPORT_SYMBOL(outsw); + +void insw(unsigned long port, void *addr, unsigned long count) +{ + int i; + unsigned short tmp; + unsigned char *p = (unsigned char *) addr; + + for (i = 0; i < count; i++, p += 2) { + tmp = inw(port); + p[0] = tmp & 0xff; + p[1] = (tmp >> 8) & 0xff; + } +} +EXPORT_SYMBOL(insw); + +void outsl(unsigned long port, const void *addr, unsigned long count) +{ + int i; + unsigned tmp; + unsigned char *p = (unsigned char *) addr; + + for (i = 0; i < count; i++, p += 4) { + tmp = (*p) | ((*(p + 1)) << 8) | ((*(p + 2)) << 16) | + ((*(p + 3)) << 24); + outl(tmp, port); + } +} +EXPORT_SYMBOL(outsl); + +void insl(unsigned long port, void *addr, unsigned long count) +{ + int i; + unsigned tmp; + unsigned char *p = (unsigned char *) addr; + + for (i = 0; i < count; i++, p += 4) { + tmp = inl(port); + p[0] = tmp & 0xff; + p[1] = (tmp >> 8) & 0xff; + p[2] = (tmp >> 16) & 0xff; + p[3] = (tmp >> 24) & 0xff; + + } +} +EXPORT_SYMBOL(insl); + +void memcpy_toio(void __iomem *to, const void *from, long count) +{ + unsigned char *p = (unsigned char *) from; + + while (count) { + count--; + writeb(*p++, to++); + } +} +EXPORT_SYMBOL(memcpy_toio); + +void memcpy_fromio(void *to, void __iomem *from, long count) +{ + int i; + unsigned char *p = (unsigned char *) to; + + for (i = 0; i < count; i++) { + p[i] = readb(from); + from++; + } +} +EXPORT_SYMBOL(memcpy_fromio); diff --git a/arch/sh/lib64/iomap.c b/arch/sh/lib64/iomap.c new file mode 100644 index 000000000000..253d1e351d49 --- /dev/null +++ b/arch/sh/lib64/iomap.c @@ -0,0 +1,54 @@ +/* + * arch/sh64/lib/iomap.c + * + * Generic sh64 iomap interface + * + * Copyright (C) 2004 Paul Mundt + * + * This file is subject to the terms and conditions of the GNU General Public + * License. See the file "COPYING" in the main directory of this archive + * for more details. + */ +#include +#include + +void __iomem *__attribute__ ((weak)) +ioport_map(unsigned long port, unsigned int len) +{ + return (void __iomem *)port; +} +EXPORT_SYMBOL(ioport_map); + +void ioport_unmap(void __iomem *addr) +{ + /* Nothing .. */ +} +EXPORT_SYMBOL(ioport_unmap); + +#ifdef CONFIG_PCI +void __iomem *pci_iomap(struct pci_dev *dev, int bar, unsigned long max) +{ + unsigned long start = pci_resource_start(dev, bar); + unsigned long len = pci_resource_len(dev, bar); + unsigned long flags = pci_resource_flags(dev, bar); + + if (!len) + return NULL; + if (max && len > max) + len = max; + if (flags & IORESOURCE_IO) + return ioport_map(start + pciio_virt, len); + if (flags & IORESOURCE_MEM) + return (void __iomem *)start; + + /* What? */ + return NULL; +} +EXPORT_SYMBOL(pci_iomap); + +void pci_iounmap(struct pci_dev *dev, void __iomem *addr) +{ + /* Nothing .. */ +} +EXPORT_SYMBOL(pci_iounmap); +#endif diff --git a/arch/sh/lib64/memcpy.c b/arch/sh/lib64/memcpy.c new file mode 100644 index 000000000000..fba436a92bfa --- /dev/null +++ b/arch/sh/lib64/memcpy.c @@ -0,0 +1,81 @@ +/* + * Copyright (C) 2002 Mark Debbage (Mark.Debbage@superh.com) + * + * May be copied or modified under the terms of the GNU General Public + * License. See linux/COPYING for more information. + * + */ + +#include +#include + +// This is a simplistic optimization of memcpy to increase the +// granularity of access beyond one byte using aligned +// loads and stores. This is not an optimal implementation +// for SH-5 (especially with regard to prefetching and the cache), +// and a better version should be provided later ... + +void *memcpy(void *dest, const void *src, size_t count) +{ + char *d = (char *) dest, *s = (char *) src; + + if (count >= 32) { + int i = 8 - (((unsigned long) d) & 0x7); + + if (i != 8) + while (i-- && count--) { + *d++ = *s++; + } + + if (((((unsigned long) d) & 0x7) == 0) && + ((((unsigned long) s) & 0x7) == 0)) { + while (count >= 32) { + unsigned long long t1, t2, t3, t4; + t1 = *(unsigned long long *) (s); + t2 = *(unsigned long long *) (s + 8); + t3 = *(unsigned long long *) (s + 16); + t4 = *(unsigned long long *) (s + 24); + *(unsigned long long *) (d) = t1; + *(unsigned long long *) (d + 8) = t2; + *(unsigned long long *) (d + 16) = t3; + *(unsigned long long *) (d + 24) = t4; + d += 32; + s += 32; + count -= 32; + } + while (count >= 8) { + *(unsigned long long *) d = + *(unsigned long long *) s; + d += 8; + s += 8; + count -= 8; + } + } + + if (((((unsigned long) d) & 0x3) == 0) && + ((((unsigned long) s) & 0x3) == 0)) { + while (count >= 4) { + *(unsigned long *) d = *(unsigned long *) s; + d += 4; + s += 4; + count -= 4; + } + } + + if (((((unsigned long) d) & 0x1) == 0) && + ((((unsigned long) s) & 0x1) == 0)) { + while (count >= 2) { + *(unsigned short *) d = *(unsigned short *) s; + d += 2; + s += 2; + count -= 2; + } + } + } + + while (count--) { + *d++ = *s++; + } + + return d; +} diff --git a/arch/sh/lib64/page_clear.S b/arch/sh/lib64/page_clear.S new file mode 100644 index 000000000000..ac0111d669a3 --- /dev/null +++ b/arch/sh/lib64/page_clear.S @@ -0,0 +1,54 @@ +/* + Copyright 2003 Richard Curnow, SuperH (UK) Ltd. + + This file is subject to the terms and conditions of the GNU General Public + License. See the file "COPYING" in the main directory of this archive + for more details. + + Tight version of memset for the case of just clearing a page. It turns out + that having the alloco's spaced out slightly due to the increment/branch + pair causes them to contend less for access to the cache. Similarly, + keeping the stores apart from the allocos causes less contention. => Do two + separate loops. Do multiple stores per loop to amortise the + increment/branch cost a little. + + Parameters: + r2 : source effective address (start of page) + + Always clears 4096 bytes. + + Note : alloco guarded by synco to avoid TAKum03020 erratum + +*/ + + .section .text..SHmedia32,"ax" + .little + + .balign 8 + .global sh64_page_clear +sh64_page_clear: + pta/l 1f, tr1 + pta/l 2f, tr2 + ptabs/l r18, tr0 + + movi 4096, r7 + add r2, r7, r7 + add r2, r63, r6 +1: + alloco r6, 0 + synco ! TAKum03020 + addi r6, 32, r6 + bgt/l r7, r6, tr1 + + add r2, r63, r6 +2: + st.q r6, 0, r63 + st.q r6, 8, r63 + st.q r6, 16, r63 + st.q r6, 24, r63 + addi r6, 32, r6 + bgt/l r7, r6, tr2 + + blink tr0, r63 + + diff --git a/arch/sh/lib64/page_copy.S b/arch/sh/lib64/page_copy.S new file mode 100644 index 000000000000..e159c3cd2582 --- /dev/null +++ b/arch/sh/lib64/page_copy.S @@ -0,0 +1,91 @@ +/* + Copyright 2003 Richard Curnow, SuperH (UK) Ltd. + + This file is subject to the terms and conditions of the GNU General Public + License. See the file "COPYING" in the main directory of this archive + for more details. + + Tight version of mempy for the case of just copying a page. + Prefetch strategy empirically optimised against RTL simulations + of SH5-101 cut2 eval chip with Cayman board DDR memory. + + Parameters: + r2 : source effective address (start of page) + r3 : destination effective address (start of page) + + Always copies 4096 bytes. + + Points to review. + * Currently the prefetch is 4 lines ahead and the alloco is 2 lines ahead. + It seems like the prefetch needs to be at at least 4 lines ahead to get + the data into the cache in time, and the allocos contend with outstanding + prefetches for the same cache set, so it's better to have the numbers + different. + */ + + .section .text..SHmedia32,"ax" + .little + + .balign 8 + .global sh64_page_copy +sh64_page_copy: + + /* Copy 4096 bytes worth of data from r2 to r3. + Do prefetches 4 lines ahead. + Do alloco 2 lines ahead */ + + pta 1f, tr1 + pta 2f, tr2 + pta 3f, tr3 + ptabs r18, tr0 + +#if 0 + /* TAKum03020 */ + ld.q r2, 0x00, r63 + ld.q r2, 0x20, r63 + ld.q r2, 0x40, r63 + ld.q r2, 0x60, r63 +#endif + alloco r3, 0x00 + synco ! TAKum03020 + alloco r3, 0x20 + synco ! TAKum03020 + + movi 3968, r6 + add r3, r6, r6 + addi r6, 64, r7 + addi r7, 64, r8 + sub r2, r3, r60 + addi r60, 8, r61 + addi r61, 8, r62 + addi r62, 8, r23 + addi r60, 0x80, r22 + +/* Minimal code size. The extra branches inside the loop don't cost much + because they overlap with the time spent waiting for prefetches to + complete. */ +1: +#if 0 + /* TAKum03020 */ + bge/u r3, r6, tr2 ! skip prefetch for last 4 lines + ldx.q r3, r22, r63 ! prefetch 4 lines hence +#endif +2: + bge/u r3, r7, tr3 ! skip alloco for last 2 lines + alloco r3, 0x40 ! alloc destination line 2 lines ahead + synco ! TAKum03020 +3: + ldx.q r3, r60, r36 + ldx.q r3, r61, r37 + ldx.q r3, r62, r38 + ldx.q r3, r23, r39 + st.q r3, 0, r36 + st.q r3, 8, r37 + st.q r3, 16, r38 + st.q r3, 24, r39 + addi r3, 32, r3 + bgt/l r8, r3, tr1 + + blink tr0, r63 ! return + + diff --git a/arch/sh/lib64/panic.c b/arch/sh/lib64/panic.c new file mode 100644 index 000000000000..c9eb1cb50d97 --- /dev/null +++ b/arch/sh/lib64/panic.c @@ -0,0 +1,58 @@ +/* + * Copyright (C) 2003 Richard Curnow, SuperH UK Limited + * + * This file is subject to the terms and conditions of the GNU General Public + * License. See the file "COPYING" in the main directory of this archive + * for more details. + */ + +#include +#include +#include + +/* THIS IS A PHYSICAL ADDRESS */ +#define HDSP2534_ADDR (0x04002100) + +#ifdef CONFIG_SH_CAYMAN + +static void poor_mans_delay(void) +{ + int i; + for (i = 0; i < 2500000; i++) { + } /* poor man's delay */ +} + +static void show_value(unsigned long x) +{ + int i; + unsigned nibble; + for (i = 0; i < 8; i++) { + nibble = ((x >> (i * 4)) & 0xf); + + ctrl_outb(nibble + ((nibble > 9) ? 55 : 48), + HDSP2534_ADDR + 0xe0 + ((7 - i) << 2)); + } +} + +#endif + +void +panic_handler(unsigned long panicPC, unsigned long panicSSR, + unsigned long panicEXPEVT) +{ +#ifdef CONFIG_SH_CAYMAN + while (1) { + /* This piece of code displays the PC on the LED display */ + show_value(panicPC); + poor_mans_delay(); + show_value(panicSSR); + poor_mans_delay(); + show_value(panicEXPEVT); + poor_mans_delay(); + } +#endif + + /* Never return from the panic handler */ + for (;;) ; + +} diff --git a/arch/sh/lib64/udelay.c b/arch/sh/lib64/udelay.c new file mode 100644 index 000000000000..327653914007 --- /dev/null +++ b/arch/sh/lib64/udelay.c @@ -0,0 +1,59 @@ +/* + * arch/sh64/lib/udelay.c + * + * Delay routines, using a pre-computed "loops_per_jiffy" value. + * + * Copyright (C) 2000, 2001 Paolo Alberelli + * Copyright (C) 2003, 2004 Paul Mundt + * + * This file is subject to the terms and conditions of the GNU General Public + * License. See the file "COPYING" in the main directory of this archive + * for more details. + */ +#include +#include + +extern unsigned long loops_per_jiffy; + +/* + * Use only for very small delays (< 1 msec). + * + * The active part of our cycle counter is only 32-bits wide, and + * we're treating the difference between two marks as signed. On + * a 1GHz box, that's about 2 seconds. + */ + +void __delay(int loops) +{ + long long dummy; + __asm__ __volatile__("gettr tr0, %1\n\t" + "pta $+4, tr0\n\t" + "addi %0, -1, %0\n\t" + "bne %0, r63, tr0\n\t" + "ptabs %1, tr0\n\t":"=r"(loops), + "=r"(dummy) + :"0"(loops)); +} + +void __udelay(unsigned long long usecs, unsigned long lpj) +{ + usecs *= (((unsigned long long) HZ << 32) / 1000000) * lpj; + __delay((long long) usecs >> 32); +} + +void __ndelay(unsigned long long nsecs, unsigned long lpj) +{ + nsecs *= (((unsigned long long) HZ << 32) / 1000000000) * lpj; + __delay((long long) nsecs >> 32); +} + +void udelay(unsigned long usecs) +{ + __udelay(usecs, loops_per_jiffy); +} + +void ndelay(unsigned long nsecs) +{ + __ndelay(nsecs, loops_per_jiffy); +} + diff --git a/arch/sh64/lib/.gitignore b/arch/sh64/lib/.gitignore deleted file mode 100644 index 3508c2cb23c4..000000000000 --- a/arch/sh64/lib/.gitignore +++ /dev/null @@ -1 +0,0 @@ -syscalltab.h diff --git a/arch/sh64/lib/Makefile b/arch/sh64/lib/Makefile deleted file mode 100644 index 6a4cc3f9c0b1..000000000000 --- a/arch/sh64/lib/Makefile +++ /dev/null @@ -1,19 +0,0 @@ -# -# This file is subject to the terms and conditions of the GNU General Public -# License. See the file "COPYING" in the main directory of this archive -# for more details. -# -# Copyright (C) 2000, 2001 Paolo Alberelli -# Coprygith (C) 2003 Paul Mundt -# -# Makefile for the SH-5 specific library files.. -# -# Note! Dependencies are done automagically by 'make dep', which also -# removes any old dependencies. DON'T put your own dependencies here -# unless it's something special (ie not a .c file). -# - -# Panic should really be compiled as PIC -lib-y := udelay.o c-checksum.o dbg.o io.o panic.o memcpy.o copy_user_memcpy.o \ - page_copy.o page_clear.o iomap.o - diff --git a/arch/sh64/lib/c-checksum.c b/arch/sh64/lib/c-checksum.c deleted file mode 100644 index 053137abd8a0..000000000000 --- a/arch/sh64/lib/c-checksum.c +++ /dev/null @@ -1,217 +0,0 @@ -/* - * arch/sh64/lib/c-checksum.c - * - * This file contains network checksum routines that are better done - * in an architecture-specific manner due to speed.. - */ - -#undef DEBUG - -#include -#include -#include -#include -#include -#include - -static inline unsigned short from64to16(unsigned long long x) -{ - /* add up 32-bit words for 33 bits */ - x = (x & 0xffffffff) + (x >> 32); - /* add up 16-bit and 17-bit words for 17+c bits */ - x = (x & 0xffff) + (x >> 16); - /* add up 16-bit and 2-bit for 16+c bit */ - x = (x & 0xffff) + (x >> 16); - /* add up carry.. */ - x = (x & 0xffff) + (x >> 16); - return x; -} - -static inline unsigned short foldto16(unsigned long x) -{ - /* add up 16-bit for 17 bits */ - x = (x & 0xffff) + (x >> 16); - /* add up carry.. */ - x = (x & 0xffff) + (x >> 16); - return x; -} - -static inline unsigned short myfoldto16(unsigned long long x) -{ - /* Fold down to 32-bits so we don't loose in the typedef-less - network stack. */ - /* 64 to 33 */ - x = (x & 0xffffffff) + (x >> 32); - /* 33 to 32 */ - x = (x & 0xffffffff) + (x >> 32); - - /* add up 16-bit for 17 bits */ - x = (x & 0xffff) + (x >> 16); - /* add up carry.. */ - x = (x & 0xffff) + (x >> 16); - return x; -} - -#define odd(x) ((x)&1) -#define U16(x) ntohs(x) - -static unsigned long do_csum(const unsigned char *buff, int len) -{ - int odd, count; - unsigned long result = 0; - - pr_debug("do_csum buff %p, len %d (0x%x)\n", buff, len, len); -#ifdef DEBUG - for (i = 0; i < len; i++) { - if ((i % 26) == 0) - printk("\n"); - printk("%02X ", buff[i]); - } -#endif - - if (len <= 0) - goto out; - - odd = 1 & (unsigned long) buff; - if (odd) { - result = *buff << 8; - len--; - buff++; - } - count = len >> 1; /* nr of 16-bit words.. */ - if (count) { - if (2 & (unsigned long) buff) { - result += *(unsigned short *) buff; - count--; - len -= 2; - buff += 2; - } - count >>= 1; /* nr of 32-bit words.. */ - if (count) { - unsigned long carry = 0; - do { - unsigned long w = *(unsigned long *) buff; - buff += 4; - count--; - result += carry; - result += w; - carry = (w > result); - } while (count); - result += carry; - result = (result & 0xffff) + (result >> 16); - } - if (len & 2) { - result += *(unsigned short *) buff; - buff += 2; - } - } - if (len & 1) - result += *buff; - result = foldto16(result); - if (odd) - result = ((result >> 8) & 0xff) | ((result & 0xff) << 8); - - pr_debug("\nCHECKSUM is 0x%lx\n", result); - - out: - return result; -} - -/* computes the checksum of a memory block at buff, length len, - and adds in "sum" (32-bit) */ -__wsum csum_partial(const void *buff, int len, __wsum sum) -{ - unsigned long long result = do_csum(buff, len); - - /* add in old sum, and carry.. */ - result += (__force u32)sum; - /* 32+c bits -> 32 bits */ - result = (result & 0xffffffff) + (result >> 32); - - pr_debug("csum_partial, buff %p len %d sum 0x%x result=0x%016Lx\n", - buff, len, sum, result); - - return (__force __wsum)result; -} - -/* Copy while checksumming, otherwise like csum_partial. */ -__wsum -csum_partial_copy_nocheck(const void *src, void *dst, int len, __wsum sum) -{ - sum = csum_partial(src, len, sum); - memcpy(dst, src, len); - - return sum; -} - -/* Copy from userspace and compute checksum. If we catch an exception - then zero the rest of the buffer. */ -__wsum -csum_partial_copy_from_user(const void __user *src, void *dst, int len, - __wsum sum, int *err_ptr) -{ - int missing; - - pr_debug - ("csum_partial_copy_from_user src %p, dest %p, len %d, sum %08x, err_ptr %p\n", - src, dst, len, sum, err_ptr); - missing = copy_from_user(dst, src, len); - pr_debug(" access_ok %d\n", __access_ok((unsigned long) src, len)); - pr_debug(" missing %d\n", missing); - if (missing) { - memset(dst + len - missing, 0, missing); - *err_ptr = -EFAULT; - } - - return csum_partial(dst, len, sum); -} - -/* Copy to userspace and compute checksum. */ -__wsum -csum_partial_copy_to_user(const unsigned char *src, unsigned char *dst, int len, - __wsum sum, int *err_ptr) -{ - sum = csum_partial(src, len, sum); - - if (copy_to_user(dst, src, len)) - *err_ptr = -EFAULT; - - return sum; -} - -/* - * This is a version of ip_compute_csum() optimized for IP headers, - * which always checksum on 4 octet boundaries. - */ -__sum16 ip_fast_csum(const void *iph, unsigned int ihl) -{ - pr_debug("ip_fast_csum %p,%d\n", iph, ihl); - - return (__force __sum16)~do_csum(iph, ihl * 4); -} - -__wsum csum_tcpudp_nofold(__be32 saddr, __be32 daddr, - unsigned short len, - unsigned short proto, __wsum sum) -{ - unsigned long long result; - - pr_debug("ntohs(0x%x)=0x%x\n", 0xdead, ntohs(0xdead)); - pr_debug("htons(0x%x)=0x%x\n", 0xdead, htons(0xdead)); - - result = (__force u64) saddr + (__force u64) daddr + - (__force u64) sum + ((len + proto) << 8); - - /* Fold down to 32-bits so we don't loose in the typedef-less - network stack. */ - /* 64 to 33 */ - result = (result & 0xffffffff) + (result >> 32); - /* 33 to 32 */ - result = (result & 0xffffffff) + (result >> 32); - - pr_debug("%s saddr %x daddr %x len %x proto %x sum %x result %08Lx\n", - __FUNCTION__, saddr, daddr, len, proto, sum, result); - - return (__wsum)result; -} -EXPORT_SYMBOL(csum_tcpudp_nofold); diff --git a/arch/sh64/lib/copy_user_memcpy.S b/arch/sh64/lib/copy_user_memcpy.S deleted file mode 100644 index 2a62816d2ddd..000000000000 --- a/arch/sh64/lib/copy_user_memcpy.S +++ /dev/null @@ -1,217 +0,0 @@ -! -! Fast SH memcpy -! -! by Toshiyasu Morita (tm@netcom.com) -! hacked by J"orn Rernnecke (joern.rennecke@superh.com) ("o for o-umlaut) -! SH5 code Copyright 2002 SuperH Ltd. -! -! Entry: ARG0: destination pointer -! ARG1: source pointer -! ARG2: byte count -! -! Exit: RESULT: destination pointer -! any other registers in the range r0-r7: trashed -! -! Notes: Usually one wants to do small reads and write a longword, but -! unfortunately it is difficult in some cases to concatanate bytes -! into a longword on the SH, so this does a longword read and small -! writes. -! -! This implementation makes two assumptions about how it is called: -! -! 1.: If the byte count is nonzero, the address of the last byte to be -! copied is unsigned greater than the address of the first byte to -! be copied. This could be easily swapped for a signed comparison, -! but the algorithm used needs some comparison. -! -! 2.: When there are two or three bytes in the last word of an 11-or-more -! bytes memory chunk to b copied, the rest of the word can be read -! without side effects. -! This could be easily changed by increasing the minumum size of -! a fast memcpy and the amount subtracted from r7 before L_2l_loop be 2, -! however, this would cost a few extra cyles on average. -! For SHmedia, the assumption is that any quadword can be read in its -! enirety if at least one byte is included in the copy. - -/* Imported into Linux kernel by Richard Curnow. This is used to implement the - __copy_user function in the general case, so it has to be a distinct - function from intra-kernel memcpy to allow for exception fix-ups in the - event that the user pointer is bad somewhere in the copy (e.g. due to - running off the end of the vma). - - Note, this algorithm will be slightly wasteful in the case where the source - and destination pointers are equally aligned, because the stlo/sthi pairs - could then be merged back into single stores. If there are a lot of cache - misses, this is probably offset by the stall lengths on the preloads. - -*/ - -/* NOTE : Prefetches removed and allocos guarded by synco to avoid TAKum03020 - * erratum. The first two prefetches are nop-ed out to avoid upsetting the - * instruction counts used in the jump address calculation. - * */ - - .section .text..SHmedia32,"ax" - .little - .balign 32 - .global copy_user_memcpy - .global copy_user_memcpy_end -copy_user_memcpy: - -#define LDUAQ(P,O,D0,D1) ldlo.q P,O,D0; ldhi.q P,O+7,D1 -#define STUAQ(P,O,D0,D1) stlo.q P,O,D0; sthi.q P,O+7,D1 -#define LDUAL(P,O,D0,D1) ldlo.l P,O,D0; ldhi.l P,O+3,D1 -#define STUAL(P,O,D0,D1) stlo.l P,O,D0; sthi.l P,O+3,D1 - - nop ! ld.b r3,0,r63 ! TAKum03020 - pta/l Large,tr0 - movi 25,r0 - bgeu/u r4,r0,tr0 - nsb r4,r0 - shlli r0,5,r0 - movi (L1-L0+63*32 + 1) & 0xffff,r1 - sub r1, r0, r0 -L0: ptrel r0,tr0 - add r2,r4,r5 - ptabs r18,tr1 - add r3,r4,r6 - blink tr0,r63 - -/* Rearranged to make cut2 safe */ - .balign 8 -L4_7: /* 4..7 byte memcpy cntd. */ - stlo.l r2, 0, r0 - or r6, r7, r6 - sthi.l r5, -1, r6 - stlo.l r5, -4, r6 - blink tr1,r63 - - .balign 8 -L1: /* 0 byte memcpy */ - nop - blink tr1,r63 - nop - nop - nop - nop - -L2_3: /* 2 or 3 byte memcpy cntd. */ - st.b r5,-1,r6 - blink tr1,r63 - - /* 1 byte memcpy */ - ld.b r3,0,r0 - st.b r2,0,r0 - blink tr1,r63 - -L8_15: /* 8..15 byte memcpy cntd. */ - stlo.q r2, 0, r0 - or r6, r7, r6 - sthi.q r5, -1, r6 - stlo.q r5, -8, r6 - blink tr1,r63 - - /* 2 or 3 byte memcpy */ - ld.b r3,0,r0 - nop ! ld.b r2,0,r63 ! TAKum03020 - ld.b r3,1,r1 - st.b r2,0,r0 - pta/l L2_3,tr0 - ld.b r6,-1,r6 - st.b r2,1,r1 - blink tr0, r63 - - /* 4 .. 7 byte memcpy */ - LDUAL (r3, 0, r0, r1) - pta L4_7, tr0 - ldlo.l r6, -4, r7 - or r0, r1, r0 - sthi.l r2, 3, r0 - ldhi.l r6, -1, r6 - blink tr0, r63 - - /* 8 .. 15 byte memcpy */ - LDUAQ (r3, 0, r0, r1) - pta L8_15, tr0 - ldlo.q r6, -8, r7 - or r0, r1, r0 - sthi.q r2, 7, r0 - ldhi.q r6, -1, r6 - blink tr0, r63 - - /* 16 .. 24 byte memcpy */ - LDUAQ (r3, 0, r0, r1) - LDUAQ (r3, 8, r8, r9) - or r0, r1, r0 - sthi.q r2, 7, r0 - or r8, r9, r8 - sthi.q r2, 15, r8 - ldlo.q r6, -8, r7 - ldhi.q r6, -1, r6 - stlo.q r2, 8, r8 - stlo.q r2, 0, r0 - or r6, r7, r6 - sthi.q r5, -1, r6 - stlo.q r5, -8, r6 - blink tr1,r63 - -Large: - ! ld.b r2, 0, r63 ! TAKum03020 - pta/l Loop_ua, tr1 - ori r3, -8, r7 - sub r2, r7, r22 - sub r3, r2, r6 - add r2, r4, r5 - ldlo.q r3, 0, r0 - addi r5, -16, r5 - movi 64+8, r27 ! could subtract r7 from that. - stlo.q r2, 0, r0 - sthi.q r2, 7, r0 - ldx.q r22, r6, r0 - bgtu/l r27, r4, tr1 - - addi r5, -48, r27 - pta/l Loop_line, tr0 - addi r6, 64, r36 - addi r6, -24, r19 - addi r6, -16, r20 - addi r6, -8, r21 - -Loop_line: - ! ldx.q r22, r36, r63 ! TAKum03020 - alloco r22, 32 - synco - addi r22, 32, r22 - ldx.q r22, r19, r23 - sthi.q r22, -25, r0 - ldx.q r22, r20, r24 - ldx.q r22, r21, r25 - stlo.q r22, -32, r0 - ldx.q r22, r6, r0 - sthi.q r22, -17, r23 - sthi.q r22, -9, r24 - sthi.q r22, -1, r25 - stlo.q r22, -24, r23 - stlo.q r22, -16, r24 - stlo.q r22, -8, r25 - bgeu r27, r22, tr0 - -Loop_ua: - addi r22, 8, r22 - sthi.q r22, -1, r0 - stlo.q r22, -8, r0 - ldx.q r22, r6, r0 - bgtu/l r5, r22, tr1 - - add r3, r4, r7 - ldlo.q r7, -8, r1 - sthi.q r22, 7, r0 - ldhi.q r7, -1, r7 - ptabs r18,tr1 - stlo.q r22, 0, r0 - or r1, r7, r1 - sthi.q r5, 15, r1 - stlo.q r5, 8, r1 - blink tr1, r63 -copy_user_memcpy_end: - nop diff --git a/arch/sh64/lib/dbg.c b/arch/sh64/lib/dbg.c deleted file mode 100644 index 97816e0baf19..000000000000 --- a/arch/sh64/lib/dbg.c +++ /dev/null @@ -1,430 +0,0 @@ -/*-------------------------------------------------------------------------- --- --- Identity : Linux50 Debug Funcions --- --- File : arch/sh64/lib/dbg.C --- --- Copyright 2000, 2001 STMicroelectronics Limited. --- Copyright 2004 Richard Curnow (evt_debug etc) --- ---------------------------------------------------------------------------*/ -#include -#include -#include -#include -#include -#include - -typedef u64 regType_t; - -static regType_t getConfigReg(u64 id) -{ - register u64 reg __asm__("r2"); - asm volatile ("getcfg %1, 0, %0":"=r" (reg):"r"(id)); - return (reg); -} - -/* ======================================================================= */ - -static char *szTab[] = { "4k", "64k", "1M", "512M" }; -static char *protTab[] = { "----", - "---R", - "--X-", - "--XR", - "-W--", - "-W-R", - "-WX-", - "-WXR", - "U---", - "U--R", - "U-X-", - "U-XR", - "UW--", - "UW-R", - "UWX-", - "UWXR" -}; -#define ITLB_BASE 0x00000000 -#define DTLB_BASE 0x00800000 -#define MAX_TLBs 64 -/* PTE High */ -#define GET_VALID(pte) ((pte) & 0x1) -#define GET_SHARED(pte) ((pte) & 0x2) -#define GET_ASID(pte) ((pte >> 2) & 0x0ff) -#define GET_EPN(pte) ((pte) & 0xfffff000) - -/* PTE Low */ -#define GET_CBEHAVIOR(pte) ((pte) & 0x3) -#define GET_PAGE_SIZE(pte) szTab[((pte >> 3) & 0x3)] -#define GET_PROTECTION(pte) protTab[((pte >> 6) & 0xf)] -#define GET_PPN(pte) ((pte) & 0xfffff000) - -#define PAGE_1K_MASK 0x00000000 -#define PAGE_4K_MASK 0x00000010 -#define PAGE_64K_MASK 0x00000080 -#define MMU_PAGESIZE_MASK (PAGE_64K_MASK | PAGE_4K_MASK) -#define PAGE_1MB_MASK MMU_PAGESIZE_MASK -#define PAGE_1K (1024) -#define PAGE_4K (1024 * 4) -#define PAGE_64K (1024 * 64) -#define PAGE_1MB (1024 * 1024) - -#define HOW_TO_READ_TLB_CONTENT \ - "[ ID] PPN EPN ASID Share CB P.Size PROT.\n" - -void print_single_tlb(unsigned long tlb, int single_print) -{ - regType_t pteH; - regType_t pteL; - unsigned int valid, shared, asid, epn, cb, ppn; - char *pSize; - char *pProt; - - /* - ** in case of single print is true, this implies: - ** 1) print the TLB in any case also if NOT VALID - ** 2) print out the header - */ - - pteH = getConfigReg(tlb); - valid = GET_VALID(pteH); - if (single_print) - printk(HOW_TO_READ_TLB_CONTENT); - else if (!valid) - return; - - pteL = getConfigReg(tlb + 1); - - shared = GET_SHARED(pteH); - asid = GET_ASID(pteH); - epn = GET_EPN(pteH); - cb = GET_CBEHAVIOR(pteL); - pSize = GET_PAGE_SIZE(pteL); - pProt = GET_PROTECTION(pteL); - ppn = GET_PPN(pteL); - printk("[%c%2ld] 0x%08x 0x%08x %03d %02x %02x %4s %s\n", - ((valid) ? ' ' : 'u'), ((tlb & 0x0ffff) / TLB_STEP), - ppn, epn, asid, shared, cb, pSize, pProt); -} - -void print_dtlb(void) -{ - int count; - unsigned long tlb; - - printk(" ================= SH-5 D-TLBs Status ===================\n"); - printk(HOW_TO_READ_TLB_CONTENT); - tlb = DTLB_BASE; - for (count = 0; count < MAX_TLBs; count++, tlb += TLB_STEP) - print_single_tlb(tlb, 0); - printk - (" =============================================================\n"); -} - -void print_itlb(void) -{ - int count; - unsigned long tlb; - - printk(" ================= SH-5 I-TLBs Status ===================\n"); - printk(HOW_TO_READ_TLB_CONTENT); - tlb = ITLB_BASE; - for (count = 0; count < MAX_TLBs; count++, tlb += TLB_STEP) - print_single_tlb(tlb, 0); - printk - (" =============================================================\n"); -} - -/* ======================================================================= */ - -#ifdef CONFIG_POOR_MANS_STRACE - -#include "syscalltab.h" - -struct ring_node { - int evt; - int ret_addr; - int event; - int tra; - int pid; - unsigned long sp; - unsigned long pc; -}; - -static struct ring_node event_ring[16]; -static int event_ptr = 0; - -struct stored_syscall_data { - int pid; - int syscall_number; -}; - -#define N_STORED_SYSCALLS 16 - -static struct stored_syscall_data stored_syscalls[N_STORED_SYSCALLS]; -static int syscall_next=0; -static int syscall_next_print=0; - -void evt_debug(int evt, int ret_addr, int event, int tra, struct pt_regs *regs) -{ - int syscallno = tra & 0xff; - unsigned long sp; - unsigned long stack_bottom; - int pid; - struct ring_node *rr; - - pid = current->pid; - stack_bottom = (unsigned long) task_stack_page(current); - asm volatile("ori r15, 0, %0" : "=r" (sp)); - rr = event_ring + event_ptr; - rr->evt = evt; - rr->ret_addr = ret_addr; - rr->event = event; - rr->tra = tra; - rr->pid = pid; - rr->sp = sp; - rr->pc = regs->pc; - - if (sp < stack_bottom + 3092) { - printk("evt_debug : stack underflow report\n"); - int i, j; - for (j=0, i = event_ptr; j<16; j++) { - rr = event_ring + i; - printk("evt=%08x event=%08x tra=%08x pid=%5d sp=%08lx pc=%08lx\n", - rr->evt, rr->event, rr->tra, rr->pid, rr->sp, rr->pc); - i--; - i &= 15; - } - panic("STACK UNDERFLOW\n"); - } - - event_ptr = (event_ptr + 1) & 15; - - if ((event == 2) && (evt == 0x160)) { - if (syscallno < NUM_SYSCALL_INFO_ENTRIES) { - /* Store the syscall information to print later. We - * can't print this now - currently we're running with - * SR.BL=1, so we can't take a tlbmiss (which could occur - * in the console drivers under printk). - * - * Just overwrite old entries on ring overflow - this - * is only for last-hope debugging. */ - stored_syscalls[syscall_next].pid = current->pid; - stored_syscalls[syscall_next].syscall_number = syscallno; - syscall_next++; - syscall_next &= (N_STORED_SYSCALLS - 1); - } - } -} - -static void drain_syscalls(void) { - while (syscall_next_print != syscall_next) { - printk("Task %d: %s()\n", - stored_syscalls[syscall_next_print].pid, - syscall_info_table[stored_syscalls[syscall_next_print].syscall_number].name); - syscall_next_print++; - syscall_next_print &= (N_STORED_SYSCALLS - 1); - } -} - -void evt_debug2(unsigned int ret) -{ - drain_syscalls(); - printk("Task %d: syscall returns %08x\n", current->pid, ret); -} - -void evt_debug_ret_from_irq(struct pt_regs *regs) -{ - int pid; - struct ring_node *rr; - - pid = current->pid; - rr = event_ring + event_ptr; - rr->evt = 0xffff; - rr->ret_addr = 0; - rr->event = 0; - rr->tra = 0; - rr->pid = pid; - rr->pc = regs->pc; - event_ptr = (event_ptr + 1) & 15; -} - -void evt_debug_ret_from_exc(struct pt_regs *regs) -{ - int pid; - struct ring_node *rr; - - pid = current->pid; - rr = event_ring + event_ptr; - rr->evt = 0xfffe; - rr->ret_addr = 0; - rr->event = 0; - rr->tra = 0; - rr->pid = pid; - rr->pc = regs->pc; - event_ptr = (event_ptr + 1) & 15; -} - -#endif /* CONFIG_POOR_MANS_STRACE */ - -/* ======================================================================= */ - -void show_excp_regs(char *from, int trapnr, int signr, struct pt_regs *regs) -{ - - unsigned long long ah, al, bh, bl, ch, cl; - - printk("\n"); - printk("EXCEPTION - %s: task %d; Linux trap # %d; signal = %d\n", - ((from) ? from : "???"), current->pid, trapnr, signr); - - asm volatile ("getcon " __EXPEVT ", %0":"=r"(ah)); - asm volatile ("getcon " __EXPEVT ", %0":"=r"(al)); - ah = (ah) >> 32; - al = (al) & 0xffffffff; - asm volatile ("getcon " __KCR1 ", %0":"=r"(bh)); - asm volatile ("getcon " __KCR1 ", %0":"=r"(bl)); - bh = (bh) >> 32; - bl = (bl) & 0xffffffff; - asm volatile ("getcon " __INTEVT ", %0":"=r"(ch)); - asm volatile ("getcon " __INTEVT ", %0":"=r"(cl)); - ch = (ch) >> 32; - cl = (cl) & 0xffffffff; - printk("EXPE: %08Lx%08Lx KCR1: %08Lx%08Lx INTE: %08Lx%08Lx\n", - ah, al, bh, bl, ch, cl); - - asm volatile ("getcon " __PEXPEVT ", %0":"=r"(ah)); - asm volatile ("getcon " __PEXPEVT ", %0":"=r"(al)); - ah = (ah) >> 32; - al = (al) & 0xffffffff; - asm volatile ("getcon " __PSPC ", %0":"=r"(bh)); - asm volatile ("getcon " __PSPC ", %0":"=r"(bl)); - bh = (bh) >> 32; - bl = (bl) & 0xffffffff; - asm volatile ("getcon " __PSSR ", %0":"=r"(ch)); - asm volatile ("getcon " __PSSR ", %0":"=r"(cl)); - ch = (ch) >> 32; - cl = (cl) & 0xffffffff; - printk("PEXP: %08Lx%08Lx PSPC: %08Lx%08Lx PSSR: %08Lx%08Lx\n", - ah, al, bh, bl, ch, cl); - - ah = (regs->pc) >> 32; - al = (regs->pc) & 0xffffffff; - bh = (regs->regs[18]) >> 32; - bl = (regs->regs[18]) & 0xffffffff; - ch = (regs->regs[15]) >> 32; - cl = (regs->regs[15]) & 0xffffffff; - printk("PC : %08Lx%08Lx LINK: %08Lx%08Lx SP : %08Lx%08Lx\n", - ah, al, bh, bl, ch, cl); - - ah = (regs->sr) >> 32; - al = (regs->sr) & 0xffffffff; - asm volatile ("getcon " __TEA ", %0":"=r"(bh)); - asm volatile ("getcon " __TEA ", %0":"=r"(bl)); - bh = (bh) >> 32; - bl = (bl) & 0xffffffff; - asm volatile ("getcon " __KCR0 ", %0":"=r"(ch)); - asm volatile ("getcon " __KCR0 ", %0":"=r"(cl)); - ch = (ch) >> 32; - cl = (cl) & 0xffffffff; - printk("SR : %08Lx%08Lx TEA : %08Lx%08Lx KCR0: %08Lx%08Lx\n", - ah, al, bh, bl, ch, cl); - - ah = (regs->regs[0]) >> 32; - al = (regs->regs[0]) & 0xffffffff; - bh = (regs->regs[1]) >> 32; - bl = (regs->regs[1]) & 0xffffffff; - ch = (regs->regs[2]) >> 32; - cl = (regs->regs[2]) & 0xffffffff; - printk("R0 : %08Lx%08Lx R1 : %08Lx%08Lx R2 : %08Lx%08Lx\n", - ah, al, bh, bl, ch, cl); - - ah = (regs->regs[3]) >> 32; - al = (regs->regs[3]) & 0xffffffff; - bh = (regs->regs[4]) >> 32; - bl = (regs->regs[4]) & 0xffffffff; - ch = (regs->regs[5]) >> 32; - cl = (regs->regs[5]) & 0xffffffff; - printk("R3 : %08Lx%08Lx R4 : %08Lx%08Lx R5 : %08Lx%08Lx\n", - ah, al, bh, bl, ch, cl); - - ah = (regs->regs[6]) >> 32; - al = (regs->regs[6]) & 0xffffffff; - bh = (regs->regs[7]) >> 32; - bl = (regs->regs[7]) & 0xffffffff; - ch = (regs->regs[8]) >> 32; - cl = (regs->regs[8]) & 0xffffffff; - printk("R6 : %08Lx%08Lx R7 : %08Lx%08Lx R8 : %08Lx%08Lx\n", - ah, al, bh, bl, ch, cl); - - ah = (regs->regs[9]) >> 32; - al = (regs->regs[9]) & 0xffffffff; - bh = (regs->regs[10]) >> 32; - bl = (regs->regs[10]) & 0xffffffff; - ch = (regs->regs[11]) >> 32; - cl = (regs->regs[11]) & 0xffffffff; - printk("R9 : %08Lx%08Lx R10 : %08Lx%08Lx R11 : %08Lx%08Lx\n", - ah, al, bh, bl, ch, cl); - printk("....\n"); - - ah = (regs->tregs[0]) >> 32; - al = (regs->tregs[0]) & 0xffffffff; - bh = (regs->tregs[1]) >> 32; - bl = (regs->tregs[1]) & 0xffffffff; - ch = (regs->tregs[2]) >> 32; - cl = (regs->tregs[2]) & 0xffffffff; - printk("T0 : %08Lx%08Lx T1 : %08Lx%08Lx T2 : %08Lx%08Lx\n", - ah, al, bh, bl, ch, cl); - printk("....\n"); - - print_dtlb(); - print_itlb(); -} - -/* ======================================================================= */ - -/* -** Depending on scan the MMU, Data or Instruction side -** looking for a valid mapping matching Eaddr & asid. -** Return -1 if not found or the TLB id entry otherwise. -** Note: it works only for 4k pages! -*/ -static unsigned long -lookup_mmu_side(unsigned long base, unsigned long Eaddr, unsigned long asid) -{ - regType_t pteH; - unsigned long epn; - int count; - - epn = Eaddr & 0xfffff000; - - for (count = 0; count < MAX_TLBs; count++, base += TLB_STEP) { - pteH = getConfigReg(base); - if (GET_VALID(pteH)) - if ((unsigned long) GET_EPN(pteH) == epn) - if ((unsigned long) GET_ASID(pteH) == asid) - break; - } - return ((unsigned long) ((count < MAX_TLBs) ? base : -1)); -} - -unsigned long lookup_dtlb(unsigned long Eaddr) -{ - unsigned long asid = get_asid(); - return (lookup_mmu_side((u64) DTLB_BASE, Eaddr, asid)); -} - -unsigned long lookup_itlb(unsigned long Eaddr) -{ - unsigned long asid = get_asid(); - return (lookup_mmu_side((u64) ITLB_BASE, Eaddr, asid)); -} - -void print_page(struct page *page) -{ - printk(" page[%p] -> index 0x%lx, count 0x%x, flags 0x%lx\n", - page, page->index, page_count(page), page->flags); - printk(" address_space = %p, pages =%ld\n", page->mapping, - page->mapping->nrpages); - -} diff --git a/arch/sh64/lib/io.c b/arch/sh64/lib/io.c deleted file mode 100644 index a3f3a2b8e25b..000000000000 --- a/arch/sh64/lib/io.c +++ /dev/null @@ -1,128 +0,0 @@ -/* - * Copyright (C) 2000 David J. Mckay (david.mckay@st.com) - * - * May be copied or modified under the terms of the GNU General Public - * License. See linux/COPYING for more information. - * - * This file contains the I/O routines for use on the overdrive board - * - */ - -#include -#include -#include -#include -#include -#include -#include - -/* Now for the string version of these functions */ -void outsb(unsigned long port, const void *addr, unsigned long count) -{ - int i; - unsigned char *p = (unsigned char *) addr; - - for (i = 0; i < count; i++, p++) { - outb(*p, port); - } -} -EXPORT_SYMBOL(outsb); - -void insb(unsigned long port, void *addr, unsigned long count) -{ - int i; - unsigned char *p = (unsigned char *) addr; - - for (i = 0; i < count; i++, p++) { - *p = inb(port); - } -} -EXPORT_SYMBOL(insb); - -/* For the 16 and 32 bit string functions, we have to worry about alignment. - * The SH does not do unaligned accesses, so we have to read as bytes and - * then write as a word or dword. - * This can be optimised a lot more, especially in the case where the data - * is aligned - */ - -void outsw(unsigned long port, const void *addr, unsigned long count) -{ - int i; - unsigned short tmp; - unsigned char *p = (unsigned char *) addr; - - for (i = 0; i < count; i++, p += 2) { - tmp = (*p) | ((*(p + 1)) << 8); - outw(tmp, port); - } -} -EXPORT_SYMBOL(outsw); - -void insw(unsigned long port, void *addr, unsigned long count) -{ - int i; - unsigned short tmp; - unsigned char *p = (unsigned char *) addr; - - for (i = 0; i < count; i++, p += 2) { - tmp = inw(port); - p[0] = tmp & 0xff; - p[1] = (tmp >> 8) & 0xff; - } -} -EXPORT_SYMBOL(insw); - -void outsl(unsigned long port, const void *addr, unsigned long count) -{ - int i; - unsigned tmp; - unsigned char *p = (unsigned char *) addr; - - for (i = 0; i < count; i++, p += 4) { - tmp = (*p) | ((*(p + 1)) << 8) | ((*(p + 2)) << 16) | - ((*(p + 3)) << 24); - outl(tmp, port); - } -} -EXPORT_SYMBOL(outsl); - -void insl(unsigned long port, void *addr, unsigned long count) -{ - int i; - unsigned tmp; - unsigned char *p = (unsigned char *) addr; - - for (i = 0; i < count; i++, p += 4) { - tmp = inl(port); - p[0] = tmp & 0xff; - p[1] = (tmp >> 8) & 0xff; - p[2] = (tmp >> 16) & 0xff; - p[3] = (tmp >> 24) & 0xff; - - } -} -EXPORT_SYMBOL(insl); - -void memcpy_toio(void __iomem *to, const void *from, long count) -{ - unsigned char *p = (unsigned char *) from; - - while (count) { - count--; - writeb(*p++, to++); - } -} -EXPORT_SYMBOL(memcpy_toio); - -void memcpy_fromio(void *to, void __iomem *from, long count) -{ - int i; - unsigned char *p = (unsigned char *) to; - - for (i = 0; i < count; i++) { - p[i] = readb(from); - from++; - } -} -EXPORT_SYMBOL(memcpy_fromio); diff --git a/arch/sh64/lib/iomap.c b/arch/sh64/lib/iomap.c deleted file mode 100644 index 253d1e351d49..000000000000 --- a/arch/sh64/lib/iomap.c +++ /dev/null @@ -1,54 +0,0 @@ -/* - * arch/sh64/lib/iomap.c - * - * Generic sh64 iomap interface - * - * Copyright (C) 2004 Paul Mundt - * - * This file is subject to the terms and conditions of the GNU General Public - * License. See the file "COPYING" in the main directory of this archive - * for more details. - */ -#include -#include - -void __iomem *__attribute__ ((weak)) -ioport_map(unsigned long port, unsigned int len) -{ - return (void __iomem *)port; -} -EXPORT_SYMBOL(ioport_map); - -void ioport_unmap(void __iomem *addr) -{ - /* Nothing .. */ -} -EXPORT_SYMBOL(ioport_unmap); - -#ifdef CONFIG_PCI -void __iomem *pci_iomap(struct pci_dev *dev, int bar, unsigned long max) -{ - unsigned long start = pci_resource_start(dev, bar); - unsigned long len = pci_resource_len(dev, bar); - unsigned long flags = pci_resource_flags(dev, bar); - - if (!len) - return NULL; - if (max && len > max) - len = max; - if (flags & IORESOURCE_IO) - return ioport_map(start + pciio_virt, len); - if (flags & IORESOURCE_MEM) - return (void __iomem *)start; - - /* What? */ - return NULL; -} -EXPORT_SYMBOL(pci_iomap); - -void pci_iounmap(struct pci_dev *dev, void __iomem *addr) -{ - /* Nothing .. */ -} -EXPORT_SYMBOL(pci_iounmap); -#endif diff --git a/arch/sh64/lib/memcpy.c b/arch/sh64/lib/memcpy.c deleted file mode 100644 index fba436a92bfa..000000000000 --- a/arch/sh64/lib/memcpy.c +++ /dev/null @@ -1,81 +0,0 @@ -/* - * Copyright (C) 2002 Mark Debbage (Mark.Debbage@superh.com) - * - * May be copied or modified under the terms of the GNU General Public - * License. See linux/COPYING for more information. - * - */ - -#include -#include - -// This is a simplistic optimization of memcpy to increase the -// granularity of access beyond one byte using aligned -// loads and stores. This is not an optimal implementation -// for SH-5 (especially with regard to prefetching and the cache), -// and a better version should be provided later ... - -void *memcpy(void *dest, const void *src, size_t count) -{ - char *d = (char *) dest, *s = (char *) src; - - if (count >= 32) { - int i = 8 - (((unsigned long) d) & 0x7); - - if (i != 8) - while (i-- && count--) { - *d++ = *s++; - } - - if (((((unsigned long) d) & 0x7) == 0) && - ((((unsigned long) s) & 0x7) == 0)) { - while (count >= 32) { - unsigned long long t1, t2, t3, t4; - t1 = *(unsigned long long *) (s); - t2 = *(unsigned long long *) (s + 8); - t3 = *(unsigned long long *) (s + 16); - t4 = *(unsigned long long *) (s + 24); - *(unsigned long long *) (d) = t1; - *(unsigned long long *) (d + 8) = t2; - *(unsigned long long *) (d + 16) = t3; - *(unsigned long long *) (d + 24) = t4; - d += 32; - s += 32; - count -= 32; - } - while (count >= 8) { - *(unsigned long long *) d = - *(unsigned long long *) s; - d += 8; - s += 8; - count -= 8; - } - } - - if (((((unsigned long) d) & 0x3) == 0) && - ((((unsigned long) s) & 0x3) == 0)) { - while (count >= 4) { - *(unsigned long *) d = *(unsigned long *) s; - d += 4; - s += 4; - count -= 4; - } - } - - if (((((unsigned long) d) & 0x1) == 0) && - ((((unsigned long) s) & 0x1) == 0)) { - while (count >= 2) { - *(unsigned short *) d = *(unsigned short *) s; - d += 2; - s += 2; - count -= 2; - } - } - } - - while (count--) { - *d++ = *s++; - } - - return d; -} diff --git a/arch/sh64/lib/page_clear.S b/arch/sh64/lib/page_clear.S deleted file mode 100644 index ac0111d669a3..000000000000 --- a/arch/sh64/lib/page_clear.S +++ /dev/null @@ -1,54 +0,0 @@ -/* - Copyright 2003 Richard Curnow, SuperH (UK) Ltd. - - This file is subject to the terms and conditions of the GNU General Public - License. See the file "COPYING" in the main directory of this archive - for more details. - - Tight version of memset for the case of just clearing a page. It turns out - that having the alloco's spaced out slightly due to the increment/branch - pair causes them to contend less for access to the cache. Similarly, - keeping the stores apart from the allocos causes less contention. => Do two - separate loops. Do multiple stores per loop to amortise the - increment/branch cost a little. - - Parameters: - r2 : source effective address (start of page) - - Always clears 4096 bytes. - - Note : alloco guarded by synco to avoid TAKum03020 erratum - -*/ - - .section .text..SHmedia32,"ax" - .little - - .balign 8 - .global sh64_page_clear -sh64_page_clear: - pta/l 1f, tr1 - pta/l 2f, tr2 - ptabs/l r18, tr0 - - movi 4096, r7 - add r2, r7, r7 - add r2, r63, r6 -1: - alloco r6, 0 - synco ! TAKum03020 - addi r6, 32, r6 - bgt/l r7, r6, tr1 - - add r2, r63, r6 -2: - st.q r6, 0, r63 - st.q r6, 8, r63 - st.q r6, 16, r63 - st.q r6, 24, r63 - addi r6, 32, r6 - bgt/l r7, r6, tr2 - - blink tr0, r63 - - diff --git a/arch/sh64/lib/page_copy.S b/arch/sh64/lib/page_copy.S deleted file mode 100644 index e159c3cd2582..000000000000 --- a/arch/sh64/lib/page_copy.S +++ /dev/null @@ -1,91 +0,0 @@ -/* - Copyright 2003 Richard Curnow, SuperH (UK) Ltd. - - This file is subject to the terms and conditions of the GNU General Public - License. See the file "COPYING" in the main directory of this archive - for more details. - - Tight version of mempy for the case of just copying a page. - Prefetch strategy empirically optimised against RTL simulations - of SH5-101 cut2 eval chip with Cayman board DDR memory. - - Parameters: - r2 : source effective address (start of page) - r3 : destination effective address (start of page) - - Always copies 4096 bytes. - - Points to review. - * Currently the prefetch is 4 lines ahead and the alloco is 2 lines ahead. - It seems like the prefetch needs to be at at least 4 lines ahead to get - the data into the cache in time, and the allocos contend with outstanding - prefetches for the same cache set, so it's better to have the numbers - different. - */ - - .section .text..SHmedia32,"ax" - .little - - .balign 8 - .global sh64_page_copy -sh64_page_copy: - - /* Copy 4096 bytes worth of data from r2 to r3. - Do prefetches 4 lines ahead. - Do alloco 2 lines ahead */ - - pta 1f, tr1 - pta 2f, tr2 - pta 3f, tr3 - ptabs r18, tr0 - -#if 0 - /* TAKum03020 */ - ld.q r2, 0x00, r63 - ld.q r2, 0x20, r63 - ld.q r2, 0x40, r63 - ld.q r2, 0x60, r63 -#endif - alloco r3, 0x00 - synco ! TAKum03020 - alloco r3, 0x20 - synco ! TAKum03020 - - movi 3968, r6 - add r3, r6, r6 - addi r6, 64, r7 - addi r7, 64, r8 - sub r2, r3, r60 - addi r60, 8, r61 - addi r61, 8, r62 - addi r62, 8, r23 - addi r60, 0x80, r22 - -/* Minimal code size. The extra branches inside the loop don't cost much - because they overlap with the time spent waiting for prefetches to - complete. */ -1: -#if 0 - /* TAKum03020 */ - bge/u r3, r6, tr2 ! skip prefetch for last 4 lines - ldx.q r3, r22, r63 ! prefetch 4 lines hence -#endif -2: - bge/u r3, r7, tr3 ! skip alloco for last 2 lines - alloco r3, 0x40 ! alloc destination line 2 lines ahead - synco ! TAKum03020 -3: - ldx.q r3, r60, r36 - ldx.q r3, r61, r37 - ldx.q r3, r62, r38 - ldx.q r3, r23, r39 - st.q r3, 0, r36 - st.q r3, 8, r37 - st.q r3, 16, r38 - st.q r3, 24, r39 - addi r3, 32, r3 - bgt/l r8, r3, tr1 - - blink tr0, r63 ! return - - diff --git a/arch/sh64/lib/panic.c b/arch/sh64/lib/panic.c deleted file mode 100644 index c9eb1cb50d97..000000000000 --- a/arch/sh64/lib/panic.c +++ /dev/null @@ -1,58 +0,0 @@ -/* - * Copyright (C) 2003 Richard Curnow, SuperH UK Limited - * - * This file is subject to the terms and conditions of the GNU General Public - * License. See the file "COPYING" in the main directory of this archive - * for more details. - */ - -#include -#include -#include - -/* THIS IS A PHYSICAL ADDRESS */ -#define HDSP2534_ADDR (0x04002100) - -#ifdef CONFIG_SH_CAYMAN - -static void poor_mans_delay(void) -{ - int i; - for (i = 0; i < 2500000; i++) { - } /* poor man's delay */ -} - -static void show_value(unsigned long x) -{ - int i; - unsigned nibble; - for (i = 0; i < 8; i++) { - nibble = ((x >> (i * 4)) & 0xf); - - ctrl_outb(nibble + ((nibble > 9) ? 55 : 48), - HDSP2534_ADDR + 0xe0 + ((7 - i) << 2)); - } -} - -#endif - -void -panic_handler(unsigned long panicPC, unsigned long panicSSR, - unsigned long panicEXPEVT) -{ -#ifdef CONFIG_SH_CAYMAN - while (1) { - /* This piece of code displays the PC on the LED display */ - show_value(panicPC); - poor_mans_delay(); - show_value(panicSSR); - poor_mans_delay(); - show_value(panicEXPEVT); - poor_mans_delay(); - } -#endif - - /* Never return from the panic handler */ - for (;;) ; - -} diff --git a/arch/sh64/lib/udelay.c b/arch/sh64/lib/udelay.c deleted file mode 100644 index 327653914007..000000000000 --- a/arch/sh64/lib/udelay.c +++ /dev/null @@ -1,59 +0,0 @@ -/* - * arch/sh64/lib/udelay.c - * - * Delay routines, using a pre-computed "loops_per_jiffy" value. - * - * Copyright (C) 2000, 2001 Paolo Alberelli - * Copyright (C) 2003, 2004 Paul Mundt - * - * This file is subject to the terms and conditions of the GNU General Public - * License. See the file "COPYING" in the main directory of this archive - * for more details. - */ -#include -#include - -extern unsigned long loops_per_jiffy; - -/* - * Use only for very small delays (< 1 msec). - * - * The active part of our cycle counter is only 32-bits wide, and - * we're treating the difference between two marks as signed. On - * a 1GHz box, that's about 2 seconds. - */ - -void __delay(int loops) -{ - long long dummy; - __asm__ __volatile__("gettr tr0, %1\n\t" - "pta $+4, tr0\n\t" - "addi %0, -1, %0\n\t" - "bne %0, r63, tr0\n\t" - "ptabs %1, tr0\n\t":"=r"(loops), - "=r"(dummy) - :"0"(loops)); -} - -void __udelay(unsigned long long usecs, unsigned long lpj) -{ - usecs *= (((unsigned long long) HZ << 32) / 1000000) * lpj; - __delay((long long) usecs >> 32); -} - -void __ndelay(unsigned long long nsecs, unsigned long lpj) -{ - nsecs *= (((unsigned long long) HZ << 32) / 1000000000) * lpj; - __delay((long long) nsecs >> 32); -} - -void udelay(unsigned long usecs) -{ - __udelay(usecs, loops_per_jiffy); -} - -void ndelay(unsigned long nsecs) -{ - __ndelay(nsecs, loops_per_jiffy); -} -