+++ /dev/null
-/*
- * Copyright (C) 2006 The Android Open Source Project
- *
- * Licensed under the Apache License, Version 2.0 (the "License");
- * you may not use this file except in compliance with the License.
- * You may obtain a copy of the License at
- *
- * http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-/*
- * memset32.S
- *
- */
-
- .syntax unified
-
- .text
- .align
-
- .global android_memset32
- .type android_memset32, %function
- .global android_memset16
- .type android_memset16, %function
-
- /*
- * Optimized memset32 and memset16 for ARM.
- *
- * void android_memset16(uint16_t* dst, uint16_t value, size_t size);
- * void android_memset32(uint32_t* dst, uint32_t value, size_t size);
- *
- */
-
-android_memset16:
- .fnstart
- cmp r2, #1
- bxle lr
-
- /* expand the data to 32 bits */
- mov r1, r1, lsl #16
- orr r1, r1, r1, lsr #16
-
- /* align to 32 bits */
- tst r0, #2
- strhne r1, [r0], #2
- subne r2, r2, #2
- .fnend
-
-android_memset32:
- .fnstart
- .cfi_startproc
- str lr, [sp, #-4]!
- .cfi_def_cfa_offset 4
- .cfi_rel_offset lr, 0
-
- /* align the destination to a cache-line */
- mov r12, r1
- mov lr, r1
- rsb r3, r0, #0
- ands r3, r3, #0x1C
- beq .Laligned32
- cmp r3, r2
- andhi r3, r2, #0x1C
- sub r2, r2, r3
-
- /* conditionally writes 0 to 7 words (length in r3) */
- movs r3, r3, lsl #28
- stmiacs r0!, {r1, lr}
- stmiacs r0!, {r1, lr}
- stmiami r0!, {r1, lr}
- movs r3, r3, lsl #2
- strcs r1, [r0], #4
-
-.Laligned32:
- mov r3, r1
-1: subs r2, r2, #32
- stmiahs r0!, {r1,r3,r12,lr}
- stmiahs r0!, {r1,r3,r12,lr}
- bhs 1b
- add r2, r2, #32
-
- /* conditionally stores 0 to 30 bytes */
- movs r2, r2, lsl #28
- stmiacs r0!, {r1,r3,r12,lr}
- stmiami r0!, {r1,lr}
- movs r2, r2, lsl #2
- strcs r1, [r0], #4
- strhmi lr, [r0], #2
-
- ldr lr, [sp], #4
- .cfi_def_cfa_offset 0
- .cfi_restore lr
- bx lr
- .cfi_endproc
- .fnend
+++ /dev/null
-/* Copyright (c) 2012, Linaro Limited
- All rights reserved.
-
- Redistribution and use in source and binary forms, with or without
- modification, are permitted provided that the following conditions are met:
- * Redistributions of source code must retain the above copyright
- notice, this list of conditions and the following disclaimer.
- * Redistributions in binary form must reproduce the above copyright
- notice, this list of conditions and the following disclaimer in the
- documentation and/or other materials provided with the distribution.
- * Neither the name of the Linaro nor the
- names of its contributors may be used to endorse or promote products
- derived from this software without specific prior written permission.
-
- THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
- "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
- LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
- A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
- HOLDER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
- SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
- LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
- DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
- THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
- (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
- OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
-*/
-
-/* Assumptions:
- *
- * ARMv8-a, AArch64
- * Unaligned accesses
- *
- */
-
-/* By default we assume that the DC instruction can be used to zero
- data blocks more efficiently. In some circumstances this might be
- unsafe, for example in an asymmetric multiprocessor environment with
- different DC clear lengths (neither the upper nor lower lengths are
- safe to use). */
-
-#define dst x0
-#define count x2
-#define tmp1 x3
-#define tmp1w w3
-#define tmp2 x4
-#define tmp2w w4
-#define zva_len_x x5
-#define zva_len w5
-#define zva_bits_x x6
-
-#define A_l x1
-#define A_lw w1
-#define tmp3w w9
-
-#define ENTRY(f) \
- .text; \
- .globl f; \
- .align 0; \
- .type f, %function; \
- f: \
- .cfi_startproc \
-
-#define END(f) \
- .cfi_endproc; \
- .size f, .-f; \
-
-ENTRY(android_memset16)
- ands A_lw, A_lw, #0xffff
- b.eq .Lzero_mem
- orr A_lw, A_lw, A_lw, lsl #16
- b .Lexpand_to_64
-END(android_memset16)
-
-ENTRY(android_memset32)
- cmp A_lw, #0
- b.eq .Lzero_mem
-.Lexpand_to_64:
- orr A_l, A_l, A_l, lsl #32
-.Ltail_maybe_long:
- cmp count, #64
- b.ge .Lnot_short
-.Ltail_maybe_tiny:
- cmp count, #15
- b.le .Ltail15tiny
-.Ltail63:
- ands tmp1, count, #0x30
- b.eq .Ltail15
- add dst, dst, tmp1
- cmp tmp1w, #0x20
- b.eq 1f
- b.lt 2f
- stp A_l, A_l, [dst, #-48]
-1:
- stp A_l, A_l, [dst, #-32]
-2:
- stp A_l, A_l, [dst, #-16]
-
-.Ltail15:
- and count, count, #15
- add dst, dst, count
- stp A_l, A_l, [dst, #-16] /* Repeat some/all of last store. */
- ret
-
-.Ltail15tiny:
- /* Set up to 15 bytes. Does not assume earlier memory
- being set. */
- tbz count, #3, 1f
- str A_l, [dst], #8
-1:
- tbz count, #2, 1f
- str A_lw, [dst], #4
-1:
- tbz count, #1, 1f
- strh A_lw, [dst], #2
-1:
- ret
-
- /* Critical loop. Start at a new cache line boundary. Assuming
- * 64 bytes per line, this ensures the entire loop is in one line. */
- .p2align 6
-.Lnot_short:
- neg tmp2, dst
- ands tmp2, tmp2, #15
- b.eq 2f
- /* Bring DST to 128-bit (16-byte) alignment. We know that there's
- * more than that to set, so we simply store 16 bytes and advance by
- * the amount required to reach alignment. */
- sub count, count, tmp2
- stp A_l, A_l, [dst]
- add dst, dst, tmp2
- /* There may be less than 63 bytes to go now. */
- cmp count, #63
- b.le .Ltail63
-2:
- sub dst, dst, #16 /* Pre-bias. */
- sub count, count, #64
-1:
- stp A_l, A_l, [dst, #16]
- stp A_l, A_l, [dst, #32]
- stp A_l, A_l, [dst, #48]
- stp A_l, A_l, [dst, #64]!
- subs count, count, #64
- b.ge 1b
- tst count, #0x3f
- add dst, dst, #16
- b.ne .Ltail63
- ret
-
- /* For zeroing memory, check to see if we can use the ZVA feature to
- * zero entire 'cache' lines. */
-.Lzero_mem:
- mov A_l, #0
- cmp count, #63
- b.le .Ltail_maybe_tiny
- neg tmp2, dst
- ands tmp2, tmp2, #15
- b.eq 1f
- sub count, count, tmp2
- stp A_l, A_l, [dst]
- add dst, dst, tmp2
- cmp count, #63
- b.le .Ltail63
-1:
- /* For zeroing small amounts of memory, it's not worth setting up
- * the line-clear code. */
- cmp count, #128
- b.lt .Lnot_short
- mrs tmp1, dczid_el0
- tbnz tmp1, #4, .Lnot_short
- mov tmp3w, #4
- and zva_len, tmp1w, #15 /* Safety: other bits reserved. */
- lsl zva_len, tmp3w, zva_len
-
-.Lzero_by_line:
- /* Compute how far we need to go to become suitably aligned. We're
- * already at quad-word alignment. */
- cmp count, zva_len_x
- b.lt .Lnot_short /* Not enough to reach alignment. */
- sub zva_bits_x, zva_len_x, #1
- neg tmp2, dst
- ands tmp2, tmp2, zva_bits_x
- b.eq 1f /* Already aligned. */
- /* Not aligned, check that there's enough to copy after alignment. */
- sub tmp1, count, tmp2
- cmp tmp1, #64
- ccmp tmp1, zva_len_x, #8, ge /* NZCV=0b1000 */
- b.lt .Lnot_short
- /* We know that there's at least 64 bytes to zero and that it's safe
- * to overrun by 64 bytes. */
- mov count, tmp1
-2:
- stp A_l, A_l, [dst]
- stp A_l, A_l, [dst, #16]
- stp A_l, A_l, [dst, #32]
- subs tmp2, tmp2, #64
- stp A_l, A_l, [dst, #48]
- add dst, dst, #64
- b.ge 2b
- /* We've overrun a bit, so adjust dst downwards. */
- add dst, dst, tmp2
-1:
- sub count, count, zva_len_x
-3:
- dc zva, dst
- add dst, dst, zva_len_x
- subs count, count, zva_len_x
- b.ge 3b
- ands count, count, zva_bits_x
- b.ne .Ltail_maybe_long
- ret
-END(android_memset32)