newlib: libc: memcpy M-profile PACBTI-enablement

Add function prologue/epilogue to conditionally add BTI landing pads
and/or PAC code generation & authentication instructions depending on
compilation flags.

Signed-off-by: chao an <anchao@xiaomi.com>
This commit is contained in:
chao an 2023-04-26 17:37:30 +08:00 committed by Xiang Xiao
parent d197ca9967
commit 9fa097ab69
16 changed files with 980 additions and 270 deletions

View file

@ -65,3 +65,6 @@ endif
DEPPATH += --dep-path machine/arm
VPATH += :machine/arm
AFLAGS += ${INCDIR_PREFIX}$(TOPDIR)$(DELIM)libs$(DELIM)libc$(DELIM)machine$(DELIM)arm
CFLAGS += ${INCDIR_PREFIX}$(TOPDIR)$(DELIM)libs$(DELIM)libc$(DELIM)machine$(DELIM)arm

View file

@ -0,0 +1,188 @@
/*
* libs/libc/machine/arm/arm-acle-compat.h
*
* Copyright (c) 2014 ARM Ltd
* All rights reserved.
*
* Redistribution and use in source and binary forms, with or without
* modification, are permitted provided that the following conditions
* are met:
* 1. Redistributions of source code must retain the above copyright
* notice, this list of conditions and the following disclaimer.
* 2. Redistributions in binary form must reproduce the above copyright
* notice, this list of conditions and the following disclaimer in the
* documentation and/or other materials provided with the distribution.
* 3. The name of the company may not be used to endorse or promote
* products derived from this software without specific prior written
* permission.
*
* THIS SOFTWARE IS PROVIDED BY ARM LTD ``AS IS'' AND ANY EXPRESS OR IMPLIED
* WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF
* MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED.
* IN NO EVENT SHALL ARM LTD BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
* SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED
* TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
* PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF
* LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING
* NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
* SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
*/
#ifndef __LIBS_LIBC_MACHINE_ARM_ARM_ACLE_COMPAT_H
#define __LIBS_LIBC_MACHINE_ARM_ARM_ACLE_COMPAT_H
#ifndef __ARM_ARCH
/* ACLE standardises a set of pre-defines that describe the ARM architecture.
These were mostly implemented in GCC around GCC-4.8; older versions
have no, or only partial support. To provide a level of backwards
compatibility we try to work out what the definitions should be, given
the older pre-defines that GCC did produce. This isn't complete, but
it should be enough for use by routines that depend on this header. */
/* No need to handle ARMv8, GCC had ACLE support before that. */
# ifdef __ARM_ARCH_7__
/* The common subset of ARMv7 in all profiles. */
# define __ARM_ARCH 7
# define __ARM_ARCH_ISA_THUMB 2
# define __ARM_FEATURE_CLZ
# define __ARM_FEATURE_LDREX 7
# define __ARM_FEATURE_UNALIGNED
# endif
# if defined (__ARM_ARCH_7A__) || defined (__ARM_ARCH_7R__)
# define __ARM_ARCH 7
# define __ARM_ARCH_ISA_THUMB 2
# define __ARM_ARCH_ISA_ARM
# define __ARM_FEATURE_CLZ
# define __ARM_FEATURE_SIMD32
# define __ARM_FEATURE_DSP
# define __ARM_FEATURE_QBIT
# define __ARM_FEATURE_SAT
# define __ARM_FEATURE_LDREX 15
# define __ARM_FEATURE_UNALIGNED
# ifdef __ARM_ARCH_7A__
# define __ARM_ARCH_PROFILE 'A'
# else
# define __ARM_ARCH_PROFILE 'R'
# endif
# endif
# ifdef __ARM_ARCH_7EM__
# define __ARM_ARCH 7
# define __ARM_ARCH_ISA_THUMB 2
# define __ARM_FEATURE_CLZ
# define __ARM_FEATURE_SIMD32
# define __ARM_FEATURE_DSP
# define __ARM_FEATURE_QBIT
# define __ARM_FEATURE_SAT
# define __ARM_FEATURE_LDREX 7
# define __ARM_FEATURE_UNALIGNED
# define __ARM_ARCH_PROFILE 'M'
# endif
# ifdef __ARM_ARCH_7M__
# define __ARM_ARCH 7
# define __ARM_ARCH_ISA_THUMB 2
# define __ARM_FEATURE_CLZ
# define __ARM_FEATURE_QBIT
# define __ARM_FEATURE_SAT
# define __ARM_FEATURE_LDREX 7
# define __ARM_FEATURE_UNALIGNED
# define __ARM_ARCH_PROFILE 'M'
# endif
# ifdef __ARM_ARCH_6T2__
# define __ARM_ARCH 6
# define __ARM_ARCH_ISA_THUMB 2
# define __ARM_ARCH_ISA_ARM
# define __ARM_FEATURE_CLZ
# define __ARM_FEATURE_SIMD32
# define __ARM_FEATURE_DSP
# define __ARM_FEATURE_QBIT
# define __ARM_FEATURE_SAT
# define __ARM_FEATURE_LDREX 4
# define __ARM_FEATURE_UNALIGNED
# endif
# ifdef __ARM_ARCH_6M__
# define __ARM_ARCH 6
# define __ARM_ARCH_ISA_THUMB 1
# define __ARM_ARCH_PROFILE 'M'
# endif
# if defined (__ARM_ARCH_6__) || defined (__ARM_ARCH_6J__) \
|| defined (__ARM_ARCH_6K__) || defined (__ARM_ARCH_6Z__) \
|| defined (__ARM_ARCH_6ZK__)
# define __ARM_ARCH 6
# define __ARM_ARCH_ISA_THUMB 1
# define __ARM_ARCH_ISA_ARM
# define __ARM_FEATURE_CLZ
# define __ARM_FEATURE_SIMD32
# define __ARM_FEATURE_DSP
# define __ARM_FEATURE_QBIT
# define __ARM_FEATURE_SAT
# define __ARM_FEATURE_UNALIGNED
# ifndef __thumb__
# if defined (__ARM_ARCH_6K__) || defined (__ARM_ARCH_6ZK__)
# define __ARM_FEATURE_LDREX 15
# else
# define __ARM_FEATURE_LDREX 4
# endif
# endif
# endif
# if defined (__ARM_ARCH_5TE__) || defined (__ARM_ARCH_5E__)
# define __ARM_ARCH 5
# define __ARM_ARCH_ISA_ARM
# ifdef __ARM_ARCH_5TE__
# define __ARM_ARCH_ISA_THUMB 1
# endif
# define __ARM_FEATURE_CLZ
# define __ARM_FEATURE_DSP
# endif
# if defined (__ARM_ARCH_5T__) || defined (__ARM_ARCH_5__)
# define __ARM_ARCH 5
# define __ARM_ARCH_ISA_ARM
# ifdef __ARM_ARCH_5TE__
# define __ARM_ARCH_ISA_THUMB 1
# endif
# define __ARM_FEATURE_CLZ
# endif
# ifdef __ARM_ARCH_4T__
# define __ARM_ARCH 4
# define __ARM_ARCH_ISA_ARM
# define __ARM_ARCH_ISA_THUMB 1
# endif
# ifdef __ARM_ARCH_4__
# define __ARM_ARCH 4
# define __ARM_ARCH_ISA_ARM
# endif
# if defined (__ARM_ARCH_3__) || defined (__ARM_ARCH_3M__)
# define __ARM_ARCH 3
# define __ARM_ARCH_ISA_ARM
# endif
# ifdef __ARM_ARCH_2__
# define __ARM_ARCH 2
# define __ARM_ARCH_ISA_ARM
# endif
# ifdef __ARMEB__
# define __ARM_BIG_ENDIAN
# endif
/* If we still don't know what the target architecture is, then we're
probably not using GCC. */
# ifndef __ARM_ARCH
# error Unable to determine architecture version.
# endif
#endif /* __ARM_ARCH */
#endif /* __LIBS_LIBC_MACHINE_ARM_ARM_ACLE_COMPAT_H */

View file

@ -0,0 +1,510 @@
/*
* libs/libc/machine/arm/arm_asm.h
*
* Copyright (c) 2009 ARM Ltd
* All rights reserved.
*
* Redistribution and use in source and binary forms, with or without
* modification, are permitted provided that the following conditions
* are met:
* 1. Redistributions of source code must retain the above copyright
* notice, this list of conditions and the following disclaimer.
* 2. Redistributions in binary form must reproduce the above copyright
* notice, this list of conditions and the following disclaimer in the
* documentation and/or other materials provided with the distribution.
* 3. The name of the company may not be used to endorse or promote
* products derived from this software without specific prior written
* permission.
*
* THIS SOFTWARE IS PROVIDED BY ARM LTD ``AS IS'' AND ANY EXPRESS OR IMPLIED
* WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF
* MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED.
* IN NO EVENT SHALL ARM LTD BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
* SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED
* TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
* PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF
* LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING
* NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
* SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
*/
#ifndef __LIBS_LIBC_MACHINE_ARM_ARM_ASM_H
#define __LIBS_LIBC_MACHINE_ARM_ARM_ASM_H
#include "arm-acle-compat.h"
#if __ARM_ARCH >= 7 && defined (__ARM_ARCH_ISA_ARM)
# define _ISA_ARM_7
#endif
#if __ARM_ARCH >= 6 && defined (__ARM_ARCH_ISA_ARM)
# define _ISA_ARM_6
#endif
#if __ARM_ARCH >= 5
# define _ISA_ARM_5
#endif
#if __ARM_ARCH >= 4 && __ARM_ARCH_ISA_THUMB >= 1
# define _ISA_ARM_4T
#endif
#if __ARM_ARCH >= 4 && __ARM_ARCH_ISA_THUMB == 0
# define _ISA_ARM_4
#endif
#if __ARM_ARCH_ISA_THUMB >= 2
# define _ISA_THUMB_2
#endif
#if __ARM_ARCH_ISA_THUMB >= 1
# define _ISA_THUMB_1
#endif
/* Check whether leaf function PAC signing has been requested in the
-mbranch-protect compile-time option. */
#define LEAF_PROTECT_BIT 2
#ifdef __ARM_FEATURE_PAC_DEFAULT
# define HAVE_PAC_LEAF \
((__ARM_FEATURE_PAC_DEFAULT & (1 << LEAF_PROTECT_BIT)) && 1)
#else
# define HAVE_PAC_LEAF 0
#endif
/* Provide default parameters for PAC-code handling in leaf-functions. */
#if HAVE_PAC_LEAF
# ifndef PAC_LEAF_PUSH_IP
# define PAC_LEAF_PUSH_IP 1
# endif
#else /* !HAVE_PAC_LEAF */
# undef PAC_LEAF_PUSH_IP
# define PAC_LEAF_PUSH_IP 0
#endif /* HAVE_PAC_LEAF */
#define STACK_ALIGN_ENFORCE 0
#ifndef __ARM_FEATURE_BTI_DEFAULT
#define __ARM_FEATURE_BTI_DEFAULT 0
#endif
#ifdef __ASSEMBLER__
/******************************************************************************
* Implementation of the prologue and epilogue assembler macros and their
* associated helper functions.
*
* These functions add support for the following:
*
* - M-profile branch target identification (BTI) landing-pads when compiled
* with `-mbranch-protection=bti'.
* - PAC-signing and verification instructions, depending on hardware support
* and whether the PAC-signing of leaf functions has been requested via the
* `-mbranch-protection=pac-ret+leaf' compiler argument.
* - 8-byte stack alignment preservation at function entry, defaulting to the
* value of STACK_ALIGN_ENFORCE.
*
* Notes:
* - Prologue stack alignment is implemented by detecting a push with an odd
* number of registers and prepending a dummy register to the list.
* - If alignment is attempted on a list containing r0, compilation will result
* in an error.
* - If alignment is attempted in a list containing r1, r0 will be prepended to
* the register list and r0 will be restored prior to function return. for
* functions with non-void return types, this will result in the corruption of
* the result register.
* - Stack alignment is enforced via the following helper macro call-chain:
*
* {prologue|epilogue} ->_align8 -> _preprocess_reglist ->
* _preprocess_reglist1 -> {_prologue|_epilogue}
*
* - Debug CFI directives are automatically added to prologues and epilogues,
* assisted by `cfisavelist' and `cfirestorelist', respectively.
*
* Arguments:
* prologue
* --------
* - first - If `last' specified, this serves as start of general-purpose
* register (GPR) range to push onto stack, otherwise represents
* single GPR to push onto stack. If omitted, no GPRs pushed
* onto stack at prologue.
* - last - If given, specifies inclusive upper-bound of GPR range.
* - push_ip - Determines whether IP register is to be pushed to stack at
* prologue. When pac-signing is requested, this holds the
* the pac-key. Either 1 or 0 to push or not push, respectively.
* Default behavior: Set to value of PAC_LEAF_PUSH_IP macro.
* - push_lr - Determines whether to push lr to the stack on function entry.
* Either 1 or 0 to push or not push, respectively.
* - align8 - Whether to enforce alignment. Either 1 or 0, with 1 requesting
* alignment.
*
* epilogue
* --------
* The epilogue should be called passing the same arguments as those passed to
* the prologue to ensure the stack is not corrupted on function return.
*
* Usage examples:
*
* prologue push_ip=1 -> push {ip}
* epilogue push_ip=1, align8=1 -> pop {r2, ip}
* prologue push_ip=1, push_lr=1 -> push {ip, lr}
* epilogue 1 -> pop {r1}
* prologue 1, align8=1 -> push {r0, r1}
* epilogue 1, push_ip=1 -> pop {r1, ip}
* prologue 1, 4 -> push {r1-r4}
* epilogue 1, 4 push_ip=1 -> pop {r1-r4, ip}
*
******************************************************************************/
/* Emit .cfi_restore directives for a consecutive sequence of registers. */
.macro cfirestorelist first, last
.cfi_restore \last
.if \last-\first
cfirestorelist \first, \last-1
.endif
.endm
/* Emit .cfi_offset directives for a consecutive sequence of registers. */
.macro cfisavelist first, last, index=1
.cfi_offset \last, -4*(\index)
.if \last-\first
cfisavelist \first, \last-1, \index+1
.endif
.endm
.macro _prologue first=-1, last=-1, push_ip=PAC_LEAF_PUSH_IP, push_lr=0
.if \push_ip & 1 != \push_ip
.error "push_ip may be either 0 or 1"
.endif
.if \push_lr & 1 != \push_lr
.error "push_lr may be either 0 or 1"
.endif
.if \first != -1
.if \last == -1
/* Upper-bound not provided: Set upper = lower. */
_prologue \first, \first, \push_ip, \push_lr
.exitm
.endif
.endif
#if HAVE_PAC_LEAF
#if __ARM_FEATURE_BTI_DEFAULT
pacbti ip, lr, sp
#else
pac ip, lr, sp
#endif /* __ARM_FEATURE_BTI_DEFAULT */
.cfi_register 143, 12
#else
#if __ARM_FEATURE_BTI_DEFAULT
bti
#endif /* __ARM_FEATURE_BTI_DEFAULT */
#endif /* HAVE_PAC_LEAF */
.if \first != -1
.if \last != \first
.if \last >= 13
.error "SP cannot be in the save list"
.endif
.if \push_ip
.if \push_lr
/* Case 1: push register range, ip and lr registers. */
push {r\first-r\last, ip, lr}
.cfi_adjust_cfa_offset ((\last-\first)+3)*4
.cfi_offset 14, -4
.cfi_offset 143, -8
cfisavelist \first, \last, 3
.else // !\push_lr
/* Case 2: push register range and ip register. */
push {r\first-r\last, ip}
.cfi_adjust_cfa_offset ((\last-\first)+2)*4
.cfi_offset 143, -4
cfisavelist \first, \last, 2
.endif
.else // !\push_ip
.if \push_lr
/* Case 3: push register range and lr register. */
push {r\first-r\last, lr}
.cfi_adjust_cfa_offset ((\last-\first)+2)*4
.cfi_offset 14, -4
cfisavelist \first, \last, 2
.else // !\push_lr
/* Case 4: push register range. */
push {r\first-r\last}
.cfi_adjust_cfa_offset ((\last-\first)+1)*4
cfisavelist \first, \last, 1
.endif
.endif
.else // \last == \first
.if \push_ip
.if \push_lr
/* Case 5: push single GP register plus ip and lr registers. */
push {r\first, ip, lr}
.cfi_adjust_cfa_offset 12
.cfi_offset 14, -4
.cfi_offset 143, -8
cfisavelist \first, \first, 3
.else // !\push_lr
/* Case 6: push single GP register plus ip register. */
push {r\first, ip}
.cfi_adjust_cfa_offset 8
.cfi_offset 143, -4
cfisavelist \first, \first, 2
.endif
.else // !\push_ip
.if \push_lr
/* Case 7: push single GP register plus lr register. */
push {r\first, lr}
.cfi_adjust_cfa_offset 8
.cfi_offset 14, -4
cfisavelist \first, \first, 2
.else // !\push_lr
/* Case 8: push single GP register. */
push {r\first}
.cfi_adjust_cfa_offset 4
cfisavelist \first, \first, 1
.endif
.endif
.endif
.else // \first == -1
.if \push_ip
.if \push_lr
/* Case 9: push ip and lr registers. */
push {ip, lr}
.cfi_adjust_cfa_offset 8
.cfi_offset 14, -4
.cfi_offset 143, -8
.else // !\push_lr
/* Case 10: push ip register. */
push {ip}
.cfi_adjust_cfa_offset 4
.cfi_offset 143, -4
.endif
.else // !\push_ip
.if \push_lr
/* Case 11: push lr register. */
push {lr}
.cfi_adjust_cfa_offset 4
.cfi_offset 14, -4
.endif
.endif
.endif
.endm
.macro _epilogue first=-1, last=-1, push_ip=PAC_LEAF_PUSH_IP, push_lr=0
.if \push_ip & 1 != \push_ip
.error "push_ip may be either 0 or 1"
.endif
.if \push_lr & 1 != \push_lr
.error "push_lr may be either 0 or 1"
.endif
.if \first != -1
.if \last == -1
/* Upper-bound not provided: Set upper = lower. */
_epilogue \first, \first, \push_ip, \push_lr
.exitm
.endif
.if \last != \first
.if \last >= 13
.error "SP cannot be in the save list"
.endif
.if \push_ip
.if \push_lr
/* Case 1: pop register range, ip and lr registers. */
pop {r\first-r\last, ip, lr}
.cfi_restore 14
.cfi_register 143, 12
cfirestorelist \first, \last
.else // !\push_lr
/* Case 2: pop register range and ip register. */
pop {r\first-r\last, ip}
.cfi_register 143, 12
cfirestorelist \first, \last
.endif
.else // !\push_ip
.if \push_lr
/* Case 3: pop register range and lr register. */
pop {r\first-r\last, lr}
.cfi_restore 14
cfirestorelist \first, \last
.else // !\push_lr
/* Case 4: pop register range. */
pop {r\first-r\last}
cfirestorelist \first, \last
.endif
.endif
.else // \last == \first
.if \push_ip
.if \push_lr
/* Case 5: pop single GP register plus ip and lr registers. */
pop {r\first, ip, lr}
.cfi_restore 14
.cfi_register 143, 12
cfirestorelist \first, \first
.else // !\push_lr
/* Case 6: pop single GP register plus ip register. */
pop {r\first, ip}
.cfi_register 143, 12
cfirestorelist \first, \first
.endif
.else // !\push_ip
.if \push_lr
/* Case 7: pop single GP register plus lr register. */
pop {r\first, lr}
.cfi_restore 14
cfirestorelist \first, \first
.else // !\push_lr
/* Case 8: pop single GP register. */
pop {r\first}
cfirestorelist \first, \first
.endif
.endif
.endif
.else // \first == -1
.if \push_ip
.if \push_lr
/* Case 9: pop ip and lr registers. */
pop {ip, lr}
.cfi_restore 14
.cfi_register 143, 12
.else // !\push_lr
/* Case 10: pop ip register. */
pop {ip}
.cfi_register 143, 12
.endif
.else // !\push_ip
.if \push_lr
/* Case 11: pop lr register. */
pop {lr}
.cfi_restore 14
.endif
.endif
.endif
#if HAVE_PAC_LEAF
aut ip, lr, sp
#endif /* HAVE_PAC_LEAF */
bx lr
.endm
# clean up expressions in 'last'
.macro _preprocess_reglist1 first:req, last:req, push_ip:req, push_lr:req, reglist_op:req
.if \last == 0
\reglist_op \first, 0, \push_ip, \push_lr
.elseif \last == 1
\reglist_op \first, 1, \push_ip, \push_lr
.elseif \last == 2
\reglist_op \first, 2, \push_ip, \push_lr
.elseif \last == 3
\reglist_op \first, 3, \push_ip, \push_lr
.elseif \last == 4
\reglist_op \first, 4, \push_ip, \push_lr
.elseif \last == 5
\reglist_op \first, 5, \push_ip, \push_lr
.elseif \last == 6
\reglist_op \first, 6, \push_ip, \push_lr
.elseif \last == 7
\reglist_op \first, 7, \push_ip, \push_lr
.elseif \last == 8
\reglist_op \first, 8, \push_ip, \push_lr
.elseif \last == 9
\reglist_op \first, 9, \push_ip, \push_lr
.elseif \last == 10
\reglist_op \first, 10, \push_ip, \push_lr
.elseif \last == 11
\reglist_op \first, 11, \push_ip, \push_lr
.else
.error "last (\last) out of range"
.endif
.endm
# clean up expressions in 'first'
.macro _preprocess_reglist first:req, last, push_ip=0, push_lr=0, reglist_op:req
.ifb \last
_preprocess_reglist \first \first \push_ip \push_lr
.else
.if \first > \last
.error "last (\last) must be at least as great as first (\first)"
.endif
.if \first == 0
_preprocess_reglist1 0, \last, \push_ip, \push_lr, \reglist_op
.elseif \first == 1
_preprocess_reglist1 1, \last, \push_ip, \push_lr, \reglist_op
.elseif \first == 2
_preprocess_reglist1 2, \last, \push_ip, \push_lr, \reglist_op
.elseif \first == 3
_preprocess_reglist1 3, \last, \push_ip, \push_lr, \reglist_op
.elseif \first == 4
_preprocess_reglist1 4, \last, \push_ip, \push_lr, \reglist_op
.elseif \first == 5
_preprocess_reglist1 5, \last, \push_ip, \push_lr, \reglist_op
.elseif \first == 6
_preprocess_reglist1 6, \last, \push_ip, \push_lr, \reglist_op
.elseif \first == 7
_preprocess_reglist1 7, \last, \push_ip, \push_lr, \reglist_op
.elseif \first == 8
_preprocess_reglist1 8, \last, \push_ip, \push_lr, \reglist_op
.elseif \first == 9
_preprocess_reglist1 9, \last, \push_ip, \push_lr, \reglist_op
.elseif \first == 10
_preprocess_reglist1 10, \last, \push_ip, \push_lr, \reglist_op
.elseif \first == 11
_preprocess_reglist1 11, \last, \push_ip, \push_lr, \reglist_op
.else
.error "first (\first) out of range"
.endif
.endif
.endm
.macro _align8 first, last, push_ip=0, push_lr=0, reglist_op=_prologue
.ifb \first
.ifnb \last
.error "can't have last (\last) without specifying first"
.else // \last not blank
.if ((\push_ip + \push_lr) % 2) == 0
\reglist_op first=-1, last=-1, push_ip=\push_ip, push_lr=\push_lr
.exitm
.else // ((\push_ip + \push_lr) % 2) odd
_align8 2, 2, \push_ip, \push_lr, \reglist_op
.exitm
.endif // ((\push_ip + \push_lr) % 2) == 0
.endif // .ifnb \last
.endif // .ifb \first
.ifb \last
_align8 \first, \first, \push_ip, \push_lr, \reglist_op
.else
.if \push_ip & 1 <> \push_ip
.error "push_ip may be 0 or 1"
.endif
.if \push_lr & 1 <> \push_lr
.error "push_lr may be 0 or 1"
.endif
.ifeq (\last - \first + \push_ip + \push_lr) % 2
.if \first == 0
.error "Alignment required and first register is r0"
.exitm
.endif
_preprocess_reglist \first-1, \last, \push_ip, \push_lr, \reglist_op
.else
_preprocess_reglist \first \last, \push_ip, \push_lr, \reglist_op
.endif
.endif
.endm
.macro prologue first, last, push_ip=PAC_LEAF_PUSH_IP, push_lr=0, align8=STACK_ALIGN_ENFORCE
.if \align8
_align8 \first, \last, \push_ip, \push_lr, _prologue
.else
_prologue first=\first, last=\last, push_ip=\push_ip, push_lr=\push_lr
.endif
.endm
.macro epilogue first, last, push_ip=PAC_LEAF_PUSH_IP, push_lr=0, align8=STACK_ALIGN_ENFORCE
.if \align8
_align8 \first, \last, \push_ip, \push_lr, reglist_op=_epilogue
.else
_epilogue first=\first, last=\last, push_ip=\push_ip, push_lr=\push_lr
.endif
.endm
#endif /* __ASSEMBLER__ */
#endif /* __LIBS_LIBC_MACHINE_ARM_ARM_ASM_H */

View file

@ -1,183 +0,0 @@
/****************************************************************************
* libs/libc/machine/arm/armv7-m/gnu/acle-compat.h
*
* Copyright (c) 2014 ARM Ltd
* All rights reserved.
*
* Redistribution and use in source and binary forms, with or without
* modification, are permitted provided that the following conditions
* are met:
* 1. Redistributions of source code must retain the above copyright
* notice, this list of conditions and the following disclaimer.
* 2. Redistributions in binary form must reproduce the above copyright
* notice, this list of conditions and the following disclaimer in the
* documentation and/or other materials provided with the distribution.
* 3. The name of the company may not be used to endorse or promote
* products derived from this software without specific prior written
* permission.
*
* THIS SOFTWARE IS PROVIDED BY ARM LTD ``AS IS'' AND ANY EXPRESS OR IMPLIED
* WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF
* MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED.
* IN NO EVENT SHALL ARM LTD BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
* SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED
* TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
* PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF
* LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING
* NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
* SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
****************************************************************************/
#ifndef __LIBS_LIBC_MACHINE_ARM_ARMV7M_GNU_ACLE_COMPAT_H
#define __LIBS_LIBC_MACHINE_ARM_ARMV7M_GNU_ACLE_COMPAT_H
#ifndef __ARM_ARCH
/* ACLE standardises a set of pre-defines that describe the ARM architecture.
* These were mostly implemented in GCC around GCC-4.8; older versions
* have no, or only partial support. To provide a level of backwards
* compatibility we try to work out what the definitions should be, given
* the older pre-defines that GCC did produce. This isn't complete, but
* it should be enough for use by routines that depend on this header.
*/
/* No need to handle ARMv8, GCC had ACLE support before that. */
# ifdef __ARM_ARCH_7__
/* The common subset of ARMv7 in all profiles. */
# define __ARM_ARCH 7
# define __ARM_ARCH_ISA_THUMB 2
# define __ARM_FEATURE_CLZ
# define __ARM_FEATURE_LDREX 7
# define __ARM_FEATURE_UNALIGNED
# endif
# if defined (__ARM_ARCH_7A__) || defined (__ARM_ARCH_7R__)
# define __ARM_ARCH 7
# define __ARM_ARCH_ISA_THUMB 2
# define __ARM_ARCH_ISA_ARM
# define __ARM_FEATURE_CLZ
# define __ARM_FEATURE_SIMD32
# define __ARM_FEATURE_DSP
# define __ARM_FEATURE_QBIT
# define __ARM_FEATURE_SAT
# define __ARM_FEATURE_LDREX 15
# define __ARM_FEATURE_UNALIGNED
# ifdef __ARM_ARCH_7A__
# define __ARM_ARCH_PROFILE 'A'
# else
# define __ARM_ARCH_PROFILE 'R'
# endif
# endif
# ifdef __ARM_ARCH_7EM__
# define __ARM_ARCH 7
# define __ARM_ARCH_ISA_THUMB 2
# define __ARM_FEATURE_CLZ
# define __ARM_FEATURE_SIMD32
# define __ARM_FEATURE_DSP
# define __ARM_FEATURE_QBIT
# define __ARM_FEATURE_SAT
# define __ARM_FEATURE_LDREX 7
# define __ARM_FEATURE_UNALIGNED
# define __ARM_ARCH_PROFILE 'M'
# endif
# ifdef __ARM_ARCH_7M__
# define __ARM_ARCH 7
# define __ARM_ARCH_ISA_THUMB 2
# define __ARM_FEATURE_CLZ
# define __ARM_FEATURE_QBIT
# define __ARM_FEATURE_SAT
# define __ARM_FEATURE_LDREX 7
# define __ARM_FEATURE_UNALIGNED
# define __ARM_ARCH_PROFILE 'M'
# endif
# ifdef __ARM_ARCH_6T2__
# define __ARM_ARCH 6
# define __ARM_ARCH_ISA_THUMB 2
# define __ARM_ARCH_ISA_ARM
# define __ARM_FEATURE_CLZ
# define __ARM_FEATURE_SIMD32
# define __ARM_FEATURE_DSP
# define __ARM_FEATURE_QBIT
# define __ARM_FEATURE_SAT
# define __ARM_FEATURE_LDREX 4
# define __ARM_FEATURE_UNALIGNED
# endif
# ifdef __ARM_ARCH_6M__
# define __ARM_ARCH 6
# define __ARM_ARCH_ISA_THUMB 1
# define __ARM_ARCH_PROFILE 'M'
# endif
# if defined (__ARM_ARCH_6__) || defined (__ARM_ARCH_6J__) \
|| defined (__ARM_ARCH_6K__) || defined (__ARM_ARCH_6Z__) \
|| defined (__ARM_ARCH_6ZK__)
# define __ARM_ARCH 6
# define __ARM_ARCH_ISA_THUMB 1
# define __ARM_ARCH_ISA_ARM
# define __ARM_FEATURE_CLZ
# define __ARM_FEATURE_SIMD32
# define __ARM_FEATURE_DSP
# define __ARM_FEATURE_QBIT
# define __ARM_FEATURE_SAT
# define __ARM_FEATURE_UNALIGNED
# ifndef __thumb__
# if defined (__ARM_ARCH_6K__) || defined (__ARM_ARCH_6ZK__)
# define __ARM_FEATURE_LDREX 15
# else
# define __ARM_FEATURE_LDREX 4
# endif
# endif
# endif
# if defined (__ARM_ARCH_5TE__) || defined (__ARM_ARCH_5E__)
# define __ARM_ARCH 5
# define __ARM_ARCH_ISA_ARM
# ifdef __ARM_ARCH_5TE__
# define __ARM_ARCH_ISA_THUMB 1
# endif
# define __ARM_FEATURE_CLZ
# define __ARM_FEATURE_DSP
# endif
# if defined (__ARM_ARCH_5T__) || defined (__ARM_ARCH_5__)
# define __ARM_ARCH 5
# define __ARM_ARCH_ISA_ARM
# ifdef __ARM_ARCH_5TE__
# define __ARM_ARCH_ISA_THUMB 1
# endif
# define __ARM_FEATURE_CLZ
# endif
# ifdef __ARM_ARCH_4T__
# define __ARM_ARCH 4
# define __ARM_ARCH_ISA_ARM
# define __ARM_ARCH_ISA_THUMB 1
# endif
# ifdef __ARM_ARCH_4__
# define __ARM_ARCH 4
# define __ARM_ARCH_ISA_ARM
# endif
# if defined (__ARM_ARCH_3__) || defined (__ARM_ARCH_3M__)
# define __ARM_ARCH 3
# define __ARM_ARCH_ISA_ARM
# endif
# ifdef __ARM_ARCH_2__
# define __ARM_ARCH 2
# define __ARM_ARCH_ISA_ARM
# endif
# ifdef __ARMEB__
# define __ARM_BIG_ENDIAN
# endif
#endif
#endif /* __LIBS_LIBC_MACHINE_ARM_ARMV7M_GNU_ACLE_COMPAT_H */

View file

@ -80,7 +80,8 @@
.syntax unified
#include "acle-compat.h"
#include "arm-acle-compat.h"
#include "arm_asm.h"
@ NOTE: This ifdef MUST match the one in memchr-stub.c
#if defined (__ARM_NEON__) || defined (__ARM_NEON)
@ -272,10 +273,14 @@ memchr:
#elif __ARM_ARCH_ISA_THUMB >= 2 && defined (__ARM_FEATURE_DSP)
#if __ARM_ARCH_PROFILE == 'M'
.arch armv7e-m
#if __ARM_ARCH >= 8
/* keep config inherited from -march=. */
#else
.arch armv6t2
#endif
.arch armv7e-m
#endif /* __ARM_ARCH >= 8 */
#else
.arch armv6t2
#endif /* __ARM_ARCH_PROFILE == 'M' */
@ this lets us check a flag in a 00/ff byte easily in either endianness
#ifdef __ARMEB__
@ -292,11 +297,14 @@ memchr:
.p2align 4,,15
.global memchr
.type memchr,%function
.fnstart
.cfi_startproc
memchr:
@ r0 = start of memory to scan
@ r1 = character to look for
@ r2 = length
@ returns r0 = pointer to character or NULL if not found
prologue
and r1,r1,#0xff @ Don't trust the caller to pass a char
cmp r2,#16 @ If short don't bother with anything clever
@ -318,6 +326,11 @@ memchr:
10:
@ We are aligned, we know we have at least 8 bytes to work with
push {r4,r5,r6,r7}
.cfi_adjust_cfa_offset 16
.cfi_rel_offset 4, 0
.cfi_rel_offset 5, 4
.cfi_rel_offset 6, 8
.cfi_rel_offset 7, 12
orr r1, r1, r1, lsl #8 @ expand the match word across all bytes
orr r1, r1, r1, lsl #16
bic r4, r2, #7 @ Number of double words to work with * 8
@ -339,6 +352,11 @@ memchr:
bne 15b @ (Flags from the subs above)
pop {r4,r5,r6,r7}
.cfi_restore 7
.cfi_restore 6
.cfi_restore 5
.cfi_restore 4
.cfi_adjust_cfa_offset -16
and r1,r1,#0xff @ r1 back to a single character
and r2,r2,#7 @ Leave the count remaining as the number
@ after the double words have been done
@ -354,17 +372,29 @@ memchr:
bne 21b @ on r2 flags
40:
.cfi_remember_state
movs r0,#0 @ not found
bx lr
epilogue
50:
.cfi_restore_state
.cfi_remember_state
subs r0,r0,#1 @ found
bx lr
epilogue
60: @ We're here because the fast path found a hit
@ now we have to track down exactly which word it was
@ r0 points to the start of the double word after the one tested
@ r5 has the 00/ff pattern for the first word, r6 has the chained value
@ This point is reached from cbnz midway through label 15 prior to
@ popping r4-r7 off the stack. .cfi_restore_state alone disregards
@ this, so we manually correct this.
.cfi_restore_state @ Standard post-prologue state
.cfi_adjust_cfa_offset 16
.cfi_rel_offset 4, 0
.cfi_rel_offset 5, 4
.cfi_rel_offset 6, 8
.cfi_rel_offset 7, 12
cmp r5, #0
itte eq
moveq r5, r6 @ the end is in the 2nd word
@ -384,8 +414,16 @@ memchr:
61:
pop {r4,r5,r6,r7}
.cfi_restore 7
.cfi_restore 6
.cfi_restore 5
.cfi_restore 4
.cfi_adjust_cfa_offset -16
subs r0,r0,#1
bx lr
epilogue
.cfi_endproc
.cantunwind
.fnend
#else
/* Defined in memchr-stub.c. */
#endif
#endif

View file

@ -49,6 +49,8 @@
__OPT_BIG_BLOCK_SIZE: Size of big block in words. Default to 64.
__OPT_MID_BLOCK_SIZE: Size of big block in words. Default to 16.
*/
#include "arm_asm.h"
#ifndef __OPT_BIG_BLOCK_SIZE
#define __OPT_BIG_BLOCK_SIZE (4 * 16)
#endif
@ -88,6 +90,8 @@
.global memcpy
.thumb
.thumb_func
.fnstart
.cfi_startproc
.type memcpy, %function
memcpy:
@ r0: dst
@ -96,10 +100,11 @@ memcpy:
#ifdef __ARM_FEATURE_UNALIGNED
/* In case of UNALIGNED access supported, ip is not used in
function body. */
prologue push_ip=HAVE_PAC_LEAF
mov ip, r0
#else
push {r0}
#endif
prologue 0 push_ip=HAVE_PAC_LEAF
#endif /* __ARM_FEATURE_UNALIGNED */
orr r3, r1, r0
ands r3, r3, #3
bne .Lmisaligned_copy
@ -181,15 +186,17 @@ memcpy:
#endif /* __ARM_FEATURE_UNALIGNED */
.Ldone:
.cfi_remember_state
#ifdef __ARM_FEATURE_UNALIGNED
mov r0, ip
epilogue push_ip=HAVE_PAC_LEAF
#else
pop {r0}
#endif
bx lr
epilogue 0 push_ip=HAVE_PAC_LEAF
#endif /* __ARM_FEATURE_UNALIGNED */
.align 2
.Lmisaligned_copy:
.cfi_restore_state
#ifdef __ARM_FEATURE_UNALIGNED
/* Define label DST_ALIGNED to BIG_BLOCK. It will go to aligned copy
once destination is adjusted to aligned. */
@ -250,6 +257,9 @@ memcpy:
/* dst is aligned, but src isn't. Misaligned copy. */
push {r4, r5}
.cfi_adjust_cfa_offset 8
.cfi_rel_offset 4, 0
.cfi_rel_offset 5, 4
subs r2, #4
/* Backward r1 by misaligned bytes, to make r1 aligned.
@ -302,6 +312,9 @@ memcpy:
adds r2, #4
subs r1, ip
pop {r4, r5}
.cfi_restore 4
.cfi_restore 5
.cfi_adjust_cfa_offset -8
#endif /* __ARM_FEATURE_UNALIGNED */
@ -324,9 +337,11 @@ memcpy:
#ifdef __ARM_FEATURE_UNALIGNED
mov r0, ip
epilogue push_ip=HAVE_PAC_LEAF
#else
pop {r0}
#endif
bx lr
.size memcpy, .-memcpy
epilogue 0 push_ip=HAVE_PAC_LEAF
#endif /* __ARM_FEATURE_UNALIGNED */
.cfi_endproc
.cantunwind
.fnend
.size memcpy, .-memcpy

View file

@ -29,13 +29,17 @@
*
****************************************************************************/
#include "arm_asm.h"
.thumb
.syntax unified
.global memmove
.type memmove, %function
memmove:
.fnstart
.cfi_startproc
prologue 4
cmp r0, r1
push {r4}
bls 3f
adds r3, r1, r2
cmp r0, r3
@ -49,9 +53,10 @@ memmove:
strb r4, [r1, #-1]!
bne 1b
2:
pop {r4}
bx lr
.cfi_remember_state
epilogue 4
3:
.cfi_restore_state
cmp r2, #0
beq 2b
add r2, r2, r1
@ -61,6 +66,8 @@ memmove:
cmp r2, r1
strb r4, [r3, #1]!
bne 4b
pop {r4}
bx lr
.size memmove, . - memmove
epilogue 4
.cfi_endproc
.cantunwind
.fnend
.size memmove, . - memmove

View file

@ -29,12 +29,16 @@
*
****************************************************************************/
#include "arm_asm.h"
.thumb
.syntax unified
.global memset
.type memset, %function
.fnstart
.cfi_startproc
memset:
push {r4, r5, r6}
prologue 4 6
lsls r4, r0, #30
beq 10f
subs r4, r2, #1
@ -99,10 +103,14 @@ memset:
cmp r3, r4
bne 8b
9:
pop {r4, r5, r6}
bx lr
.cfi_remember_state
epilogue 4 6
10:
.cfi_restore_state
mov r4, r2
mov r3, r0
b 3b
.size memset, . - memset
.cfi_endproc
.cantunwind
.fnend
.size memset, . - memset

View file

@ -32,6 +32,8 @@
/* Very similar to the generic code, but uses Thumb2 as implemented
in ARMv7-M. */
#include "arm_asm.h"
/* Parameters and result. */
#define src1 r0
#define src2 r1
@ -47,8 +49,10 @@
.thumb
.syntax unified
def_fn strcmp
.fnstart
.cfi_sections .debug_frame
.cfi_startproc
prologue push_ip=HAVE_PAC_LEAF
eor tmp1, src1, src2
tst tmp1, #3
/* Strings not at same byte offset from a word boundary. */
@ -85,6 +89,7 @@ def_fn strcmp
ldreq data2, [src2], #4
beq 4b
2:
.cfi_remember_state
/* There's a zero or a different byte in the word */
S2HI result, data1, #24
S2LO data1, data1, #8
@ -99,7 +104,7 @@ def_fn strcmp
both cases the other bits in RESULT are all zero. For DATA2 the
interesting byte is at the other end of the word, but the
other bits are not necessarily zero. We need a signed result
representing the difference in the unsigned bytes, so for the
representing the differnece in the unsigned bytes, so for the
little-endian case we can't just shift the interesting bits
up. */
#ifdef __ARM_BIG_ENDIAN
@ -109,11 +114,11 @@ def_fn strcmp
lsrs result, result, #24
subs result, result, data2
#endif
bx lr
epilogue push_ip=HAVE_PAC_LEAF
#if 0
/* The assembly code below is based on the following algorithm. */
/* The assembly code below is based on the following alogrithm. */
#ifdef __ARM_BIG_ENDIAN
#define RSHIFT <<
#define LSHIFT >>
@ -208,8 +213,10 @@ def_fn strcmp
/* First of all, compare bytes until src1(sp1) is word-aligned. */
.Lstrcmp_unaligned:
.cfi_restore_state
tst src1, #3
beq 2f
.cfi_remember_state
ldrb data1, [src1], #1
ldrb data2, [src2], #1
cmp data1, #1
@ -217,12 +224,13 @@ def_fn strcmp
cmpcs data1, data2
beq .Lstrcmp_unaligned
sub result, data1, data2
bx lr
epilogue push_ip=HAVE_PAC_LEAF
2:
.cfi_restore_state
stmfd sp!, {r5}
.cfi_def_cfa_offset 4
.cfi_offset 5, -4
.cfi_adjust_cfa_offset 4
.cfi_rel_offset 5, 0
ldr data1, [src1], #4
and tmp2, src2, #3
@ -358,8 +366,8 @@ def_fn strcmp
.cfi_remember_state
ldmfd sp!, {r5}
.cfi_restore 5
.cfi_def_cfa_offset 0
bx lr
.cfi_adjust_cfa_offset -4
epilogue push_ip=HAVE_PAC_LEAF
.Lstrcmp_tail:
.cfi_restore_state
@ -375,7 +383,9 @@ def_fn strcmp
sub result, r2, result
ldmfd sp!, {r5}
.cfi_restore 5
.cfi_def_cfa_offset 0
bx lr
.cfi_adjust_cfa_offset -4
epilogue push_ip=HAVE_PAC_LEAF
.cfi_endproc
.size strcmp, . - strcmp
.cantunwind
.fnend
.size strcmp, . - strcmp

View file

@ -62,7 +62,8 @@
*
****************************************************************************/
#include "acle-compat.h"
#include "arm-acle-compat.h"
#include "arm_asm.h"
.macro def_fn f p2align=0
.text
@ -82,7 +83,11 @@
/* This code requires Thumb. */
#if __ARM_ARCH_PROFILE == 'M'
#if __ARM_ARCH >= 8
/* keep config inherited from -march=. */
#else
.arch armv7e-m
#endif /* if __ARM_ARCH >= 8 */
#else
.arch armv6t2
#endif
@ -104,8 +109,10 @@
#define tmp2 r5
def_fn strlen p2align=6
.fnstart
.cfi_startproc
prologue 4 5 push_ip=HAVE_PAC_LEAF
pld [srcin, #0]
strd r4, r5, [sp, #-8]!
bic src, srcin, #7
mvn const_m1, #0
ands tmp1, srcin, #7 /* (8 - bytes) to alignment. */
@ -155,6 +162,7 @@ def_fn strlen p2align=6
beq .Lloop_aligned
.Lnull_found:
.cfi_remember_state
cmp data1a, #0
itt eq
addeq result, result, #4
@ -163,11 +171,11 @@ def_fn strlen p2align=6
rev data1a, data1a
#endif
clz data1a, data1a
ldrd r4, r5, [sp], #8
add result, result, data1a, lsr #3 /* Bits -> Bytes. */
bx lr
epilogue 4 5 push_ip=HAVE_PAC_LEAF
.Lmisaligned8:
.cfi_restore_state
ldrd data1a, data1b, [src]
and tmp2, tmp1, #3
rsb result, tmp1, #0
@ -181,4 +189,7 @@ def_fn strlen p2align=6
movne data1a, const_m1
mov const_0, #0
b .Lstart_realigned
.size strlen, . - strlen
.cfi_endproc
.cantunwind
.fnend
.size strlen, . - strlen

View file

@ -80,6 +80,9 @@
.syntax unified
#include "arm-acle-compat.h"
#include "arm_asm.h"
@ NOTE: This ifdef MUST match the one in memchr-stub.c
#if defined (__ARM_NEON__) || defined (__ARM_NEON)
#if __ARM_ARCH >= 8 && __ARM_ARCH_PROFILE == 'R'
@ -270,10 +273,14 @@ memchr:
#elif __ARM_ARCH_ISA_THUMB >= 2 && defined (__ARM_FEATURE_DSP)
#if __ARM_ARCH_PROFILE == 'M'
.arch armv7e-m
#if __ARM_ARCH >= 8
/* keep config inherited from -march=. */
#else
.arch armv6t2
#endif
.arch armv7e-m
#endif /* __ARM_ARCH >= 8 */
#else
.arch armv6t2
#endif /* __ARM_ARCH_PROFILE == 'M' */
@ this lets us check a flag in a 00/ff byte easily in either endianness
#ifdef __ARMEB__
@ -290,11 +297,14 @@ memchr:
.p2align 4,,15
.global memchr
.type memchr,%function
.fnstart
.cfi_startproc
memchr:
@ r0 = start of memory to scan
@ r1 = character to look for
@ r2 = length
@ returns r0 = pointer to character or NULL if not found
prologue
and r1,r1,#0xff @ Don't trust the caller to pass a char
cmp r2,#16 @ If short don't bother with anything clever
@ -316,6 +326,11 @@ memchr:
10:
@ We are aligned, we know we have at least 8 bytes to work with
push {r4,r5,r6,r7}
.cfi_adjust_cfa_offset 16
.cfi_rel_offset 4, 0
.cfi_rel_offset 5, 4
.cfi_rel_offset 6, 8
.cfi_rel_offset 7, 12
orr r1, r1, r1, lsl #8 @ expand the match word across all bytes
orr r1, r1, r1, lsl #16
bic r4, r2, #7 @ Number of double words to work with * 8
@ -337,6 +352,11 @@ memchr:
bne 15b @ (Flags from the subs above)
pop {r4,r5,r6,r7}
.cfi_restore 7
.cfi_restore 6
.cfi_restore 5
.cfi_restore 4
.cfi_adjust_cfa_offset -16
and r1,r1,#0xff @ r1 back to a single character
and r2,r2,#7 @ Leave the count remaining as the number
@ after the double words have been done
@ -352,17 +372,29 @@ memchr:
bne 21b @ on r2 flags
40:
.cfi_remember_state
movs r0,#0 @ not found
bx lr
epilogue
50:
.cfi_restore_state
.cfi_remember_state
subs r0,r0,#1 @ found
bx lr
epilogue
60: @ We're here because the fast path found a hit
@ now we have to track down exactly which word it was
@ r0 points to the start of the double word after the one tested
@ r5 has the 00/ff pattern for the first word, r6 has the chained value
@ This point is reached from cbnz midway through label 15 prior to
@ popping r4-r7 off the stack. .cfi_restore_state alone disregards
@ this, so we manually correct this.
.cfi_restore_state @ Standard post-prologue state
.cfi_adjust_cfa_offset 16
.cfi_rel_offset 4, 0
.cfi_rel_offset 5, 4
.cfi_rel_offset 6, 8
.cfi_rel_offset 7, 12
cmp r5, #0
itte eq
moveq r5, r6 @ the end is in the 2nd word
@ -382,8 +414,16 @@ memchr:
61:
pop {r4,r5,r6,r7}
.cfi_restore 7
.cfi_restore 6
.cfi_restore 5
.cfi_restore 4
.cfi_adjust_cfa_offset -16
subs r0,r0,#1
bx lr
epilogue
.cfi_endproc
.cantunwind
.fnend
#else
/* Defined in memchr-stub.c. */
#endif

View file

@ -50,6 +50,8 @@
__OPT_BIG_BLOCK_SIZE: Size of big block in words. Default to 64.
__OPT_MID_BLOCK_SIZE: Size of big block in words. Default to 16.
*/
#include "arm_asm.h"
#ifndef __OPT_BIG_BLOCK_SIZE
#define __OPT_BIG_BLOCK_SIZE (4 * 16)
#endif
@ -90,11 +92,21 @@
.global memcpy
.thumb
.thumb_func
.fnstart
.cfi_startproc
.type memcpy, %function
memcpy:
@ r0: dst
@ r1: src
@ r2: len
#ifdef __ARM_FEATURE_UNALIGNED
/* In case of UNALIGNED access supported, ip is not used in
function body. */
prologue push_ip=HAVE_PAC_LEAF
mov ip, r0
#else
prologue 0 push_ip=HAVE_PAC_LEAF
#endif /* __ARM_FEATURE_UNALIGNED */
#ifdef __ARM_FEATURE_MVE
mov r3, lr
wlstp.8 lr, r2, 2f
@ -104,15 +116,14 @@ memcpy:
vstrb.8 q0, [r2], #16
letp lr, 1b
2:
#ifdef __ARM_FEATURE_UNALIGNED
mov r0, ip
epilogue push_ip=HAVE_PAC_LEAF
#else
epilogue 0 push_ip=HAVE_PAC_LEAF
#endif /* __ARM_FEATURE_UNALIGNED */
bx r3
#else
#ifdef __ARM_FEATURE_UNALIGNED
/* In case of UNALIGNED access supported, ip is not used in
function body. */
mov ip, r0
#else
push {r0}
#endif
orr r3, r1, r0
ands r3, r3, #3
bne .Lmisaligned_copy
@ -194,15 +205,17 @@ memcpy:
#endif /* __ARM_FEATURE_UNALIGNED */
.Ldone:
.cfi_remember_state
#ifdef __ARM_FEATURE_UNALIGNED
mov r0, ip
epilogue push_ip=HAVE_PAC_LEAF
#else
pop {r0}
#endif
bx lr
epilogue 0 push_ip=HAVE_PAC_LEAF
#endif /* __ARM_FEATURE_UNALIGNED */
.align 2
.Lmisaligned_copy:
.cfi_restore_state
#ifdef __ARM_FEATURE_UNALIGNED
/* Define label DST_ALIGNED to BIG_BLOCK. It will go to aligned copy
once destination is adjusted to aligned. */
@ -263,6 +276,9 @@ memcpy:
/* dst is aligned, but src isn't. Misaligned copy. */
push {r4, r5}
.cfi_adjust_cfa_offset 8
.cfi_rel_offset 4, 0
.cfi_rel_offset 5, 4
subs r2, #4
/* Backward r1 by misaligned bytes, to make r1 aligned.
@ -315,6 +331,9 @@ memcpy:
adds r2, #4
subs r1, ip
pop {r4, r5}
.cfi_restore 4
.cfi_restore 5
.cfi_adjust_cfa_offset -8
#endif /* __ARM_FEATURE_UNALIGNED */
@ -337,9 +356,12 @@ memcpy:
#ifdef __ARM_FEATURE_UNALIGNED
mov r0, ip
epilogue push_ip=HAVE_PAC_LEAF
#else
pop {r0}
#endif
bx lr
epilogue 0 push_ip=HAVE_PAC_LEAF
#endif /* __ARM_FEATURE_UNALIGNED */
#endif
.cfi_endproc
.cantunwind
.fnend
.size memcpy, .-memcpy

View file

@ -29,13 +29,17 @@
*
****************************************************************************/
#include "arm_asm.h"
.thumb
.syntax unified
.global memmove
.type memmove, %function
memmove:
.fnstart
.cfi_startproc
prologue 4
cmp r0, r1
push {r4}
bls 3f
adds r3, r1, r2
cmp r0, r3
@ -49,9 +53,10 @@ memmove:
strb r4, [r1, #-1]!
bne 1b
2:
pop {r4}
bx lr
.cfi_remember_state
epilogue 4
3:
.cfi_restore_state
cmp r2, #0
beq 2b
add r2, r2, r1
@ -61,6 +66,8 @@ memmove:
cmp r2, r1
strb r4, [r3, #1]!
bne 4b
pop {r4}
bx lr
epilogue 4
.cfi_endproc
.cantunwind
.fnend
.size memmove, . - memmove

View file

@ -29,11 +29,16 @@
*
****************************************************************************/
#include "arm_asm.h"
.thumb
.syntax unified
.global memset
.type memset, %function
.fnstart
.cfi_startproc
memset:
prologue 4 6
#ifdef __ARM_FEATURE_MVE
vdup.8 q0, r1
mov r3, lr
@ -43,9 +48,11 @@ memset:
vstrb.8 q0, [r1], #16
letp lr, 1b
2:
.cfi_remember_state
epilogue 4 6
.cfi_restore_state
bx r3
#else
push {r4, r5, r6}
lsls r4, r0, #30
beq 10f
subs r4, r2, #1
@ -110,11 +117,15 @@ memset:
cmp r3, r4
bne 8b
9:
pop {r4, r5, r6}
bx lr
.cfi_remember_state
epilogue 4 6
10:
.cfi_restore_state
mov r4, r2
mov r3, r0
b 3b
#endif
.cfi_endproc
.cantunwind
.fnend
.size memset, . - memset

View file

@ -29,6 +29,11 @@
*
****************************************************************************/
/* Very similar to the generic code, but uses Thumb2 as implemented
in ARMv7-M. */
#include "arm_asm.h"
#ifdef __ARM_BIG_ENDIAN
#define S2LO lsl
#define S2LOEQ lsleq
@ -59,9 +64,6 @@
\f:
.endm
/* Very similar to the generic code, but uses Thumb2 as implemented
in ARMv7-M. */
/* Parameters and result. */
#define src1 r0
#define src2 r1
@ -77,8 +79,10 @@
.thumb
.syntax unified
def_fn strcmp
.fnstart
.cfi_sections .debug_frame
.cfi_startproc
prologue push_ip=HAVE_PAC_LEAF
eor tmp1, src1, src2
tst tmp1, #3
/* Strings not at same byte offset from a word boundary. */
@ -115,6 +119,7 @@ def_fn strcmp
ldreq data2, [src2], #4
beq 4b
2:
.cfi_remember_state
/* There's a zero or a different byte in the word */
S2HI result, data1, #24
S2LO data1, data1, #8
@ -129,7 +134,7 @@ def_fn strcmp
both cases the other bits in RESULT are all zero. For DATA2 the
interesting byte is at the other end of the word, but the
other bits are not necessarily zero. We need a signed result
representing the difference in the unsigned bytes, so for the
representing the differnece in the unsigned bytes, so for the
little-endian case we can't just shift the interesting bits
up. */
#ifdef __ARM_BIG_ENDIAN
@ -139,11 +144,11 @@ def_fn strcmp
lsrs result, result, #24
subs result, result, data2
#endif
bx lr
epilogue push_ip=HAVE_PAC_LEAF
#if 0
/* The assembly code below is based on the following algorithm. */
/* The assembly code below is based on the following alogrithm. */
#ifdef __ARM_BIG_ENDIAN
#define RSHIFT <<
#define LSHIFT >>
@ -238,8 +243,10 @@ def_fn strcmp
/* First of all, compare bytes until src1(sp1) is word-aligned. */
.Lstrcmp_unaligned:
.cfi_restore_state
tst src1, #3
beq 2f
.cfi_remember_state
ldrb data1, [src1], #1
ldrb data2, [src2], #1
cmp data1, #1
@ -247,12 +254,13 @@ def_fn strcmp
cmpcs data1, data2
beq .Lstrcmp_unaligned
sub result, data1, data2
bx lr
epilogue push_ip=HAVE_PAC_LEAF
2:
.cfi_restore_state
stmfd sp!, {r5}
.cfi_def_cfa_offset 4
.cfi_offset 5, -4
.cfi_adjust_cfa_offset 4
.cfi_rel_offset 5, 0
ldr data1, [src1], #4
and tmp2, src2, #3
@ -388,8 +396,8 @@ def_fn strcmp
.cfi_remember_state
ldmfd sp!, {r5}
.cfi_restore 5
.cfi_def_cfa_offset 0
bx lr
.cfi_adjust_cfa_offset -4
epilogue push_ip=HAVE_PAC_LEAF
.Lstrcmp_tail:
.cfi_restore_state
@ -405,7 +413,9 @@ def_fn strcmp
sub result, r2, result
ldmfd sp!, {r5}
.cfi_restore 5
.cfi_def_cfa_offset 0
bx lr
.cfi_adjust_cfa_offset -4
epilogue push_ip=HAVE_PAC_LEAF
.cfi_endproc
.cantunwind
.fnend
.size strcmp, . - strcmp

View file

@ -62,6 +62,9 @@
*
****************************************************************************/
#include "arm-acle-compat.h"
#include "arm_asm.h"
.macro def_fn f p2align=0
.text
.p2align \p2align
@ -80,7 +83,11 @@
/* This code requires Thumb. */
#if __ARM_ARCH_PROFILE == 'M'
#if __ARM_ARCH >= 8
/* keep config inherited from -march=. */
#else
.arch armv7e-m
#endif /* if __ARM_ARCH >= 8 */
#else
.arch armv6t2
#endif
@ -102,8 +109,10 @@
#define tmp2 r5
def_fn strlen p2align=6
.fnstart
.cfi_startproc
prologue 4 5 push_ip=HAVE_PAC_LEAF
pld [srcin, #0]
strd r4, r5, [sp, #-8]!
bic src, srcin, #7
mvn const_m1, #0
ands tmp1, srcin, #7 /* (8 - bytes) to alignment. */
@ -153,6 +162,7 @@ def_fn strlen p2align=6
beq .Lloop_aligned
.Lnull_found:
.cfi_remember_state
cmp data1a, #0
itt eq
addeq result, result, #4
@ -161,11 +171,11 @@ def_fn strlen p2align=6
rev data1a, data1a
#endif
clz data1a, data1a
ldrd r4, r5, [sp], #8
add result, result, data1a, lsr #3 /* Bits -> Bytes. */
bx lr
epilogue 4 5 push_ip=HAVE_PAC_LEAF
.Lmisaligned8:
.cfi_restore_state
ldrd data1a, data1b, [src]
and tmp2, tmp1, #3
rsb result, tmp1, #0
@ -179,4 +189,7 @@ def_fn strlen p2align=6
movne data1a, const_m1
mov const_0, #0
b .Lstart_realigned
.cfi_endproc
.cantunwind
.fnend
.size strlen, . - strlen