libc: add armv7-a armv7-m armv8-m libc function

Porting memory and string optimize functions from newlib and bionic

Signed-off-by: zhangyuan21 <zhangyuan21@xiaomi.com>
This commit is contained in:
zhangyuan21 2023-01-06 11:36:56 +08:00 committed by Xiang Xiao
parent 5f92c62874
commit 650f17169a
29 changed files with 5478 additions and 911 deletions

355
LICENSE
View file

@ -422,48 +422,153 @@ libc/string/lib_vikmemcpy.c
3. This notice may not be removed or altered from any source
distribution.
libs/libc/machine/arm/armv7-a/arch_memcpy.S
libs/libc/machine/arm/armv7-r/arch_memcpy.S
libs/libc/machine/arm/armv7-m/gnu/arch_memcpy.S
libs/libc/machine/arm/armv7-a/gnu/arch_memchr.S
libs/libc/machine/arm/armv7-m/gnu/arch_memchr.S
libs/libc/machine/arm/armv8-m/gnu/arch_memchr.S
================================================
Copyright (C) 2017 Gregory Nutt. All rights reserved.
Copyright (c) 2010-2011, Linaro Limited
All rights reserved.
Based on the ARMv7-M version contributed by Mike Smith. Apparently in the public
domain and is re-released here under the modified BSD license:
Redistribution and use in source and binary forms, with or without
modification, are permitted provided that the following conditions
are met:
Obtained via a posting on the Stellaris forum:
http://e2e.ti.com/support/microcontrollers/\
stellaris_arm_cortex-m3_microcontroller/f/473/t/44360.aspx
* Redistributions of source code must retain the above copyright
notice, this list of conditions and the following disclaimer.
Posted by rocksoft on Jul 24, 2008 10:19 AM
* Redistributions in binary form must reproduce the above copyright
notice, this list of conditions and the following disclaimer in the
documentation and/or other materials provided with the distribution.
Hi,
* Neither the name of Linaro Limited nor the names of its
contributors may be used to endorse or promote products derived
from this software without specific prior written permission.
I recently finished a "memcpy" replacement and thought it might be useful for
others...
THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
"AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
HOLDER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
(INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
I've put some instructions and the code here:
Written by Dave Gilbert <david.gilbert@linaro.org>
http://www.rock-software.net/downloads/memcpy/
Copyright (c) 2015 ARM Ltd.
All rights reserved.
Hope it works for you as well as it did for me.
Redistribution and use in source and binary forms, with or without
modification, are permitted provided that the following conditions are met:
* Redistributions of source code must retain the above copyright
notice, this list of conditions and the following disclaimer.
* Redistributions in binary form must reproduce the above copyright
notice, this list of conditions and the following disclaimer in the
documentation and/or other materials provided with the distribution.
* Neither the name of the Linaro nor the
names of its contributors may be used to endorse or promote products
derived from this software without specific prior written permission.
Liam.
THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
"AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
HOLDER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
(INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
libs/libc/machine/arm/armv7-a/gnu/arch_memcpy.S
libs/libc/machine/arm/armv7-a/gnu/arch_strlen.S
================================================
Copyright (c) 2013, Linaro Limited
All rights reserved.
Redistribution and use in source and binary forms, with or without
modification, are permitted provided that the following conditions
are met:
* Redistributions of source code must retain the above copyright
notice, this list of conditions and the following disclaimer.
* Redistributions in binary form must reproduce the above copyright
notice, this list of conditions and the following disclaimer in the
documentation and/or other materials provided with the distribution.
* Neither the name of Linaro Limited nor the names of its
contributors may be used to endorse or promote products derived
from this software without specific prior written permission.
THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
"AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
HOLDER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
(INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
libs/libc/machine/arm/armv7-a/gnu/arch_memmove.S
libs/libc/machine/arm/armv7-m/gnu/arch_memmove.S
libs/libc/machine/arm/armv7-m/gnu/arch_memset.S
libs/libc/machine/arm/armv8-m/gnu/arch_memmove.S
libs/libc/machine/arm/armv8-m/gnu/arch_memset.S
================================================
Copyright (c) 2015 ARM Ltd
All rights reserved.
Redistribution and use in source and binary forms, with or without
modification, are permitted provided that the following conditions
are met:
1. Redistributions of source code must retain the above copyright
notice, this list of conditions and the following disclaimer.
notice, this list of conditions and the following disclaimer.
2. Redistributions in binary form must reproduce the above copyright
notice, this list of conditions and the following disclaimer in
the documentation and/or other materials provided with the
distribution.
3. Neither the name NuttX nor the names of its contributors may be
used to endorse or promote products derived from this software
without specific prior written permission.
notice, this list of conditions and the following disclaimer in the
documentation and/or other materials provided with the distribution.
3. The name of the company may not be used to endorse or promote
products derived from this software without specific prior written
permission.
THIS SOFTWARE IS PROVIDED BY ARM LTD ``AS IS'' AND ANY EXPRESS OR IMPLIED
WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF
MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED.
IN NO EVENT SHALL ARM LTD BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED
TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF
LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING
NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
libs/libc/machine/arm/armv7-a/gnu/arch_memset.S
================================================
Copyright (C) 2013 The Android Open Source Project
All rights reserved.
Redistribution and use in source and binary forms, with or without
modification, are permitted provided that the following conditions
are met:
* Redistributions of source code must retain the above copyright
notice, this list of conditions and the following disclaimer.
* Redistributions in binary form must reproduce the above copyright
notice, this list of conditions and the following disclaimer in
the documentation and/or other materials provided with the
distribution.
THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
"AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
@ -473,10 +578,160 @@ libs/libc/machine/arm/armv7-m/gnu/arch_memcpy.S
INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING,
BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS
OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED
AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN
ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
POSSIBILITY OF SUCH DAMAGE.
AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY,
OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT
OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
SUCH DAMAGE.
libs/libc/machine/arm/armv7-a/gnu/arch_strcmp.S
================================================
Copyright (c) 2011 The Android Open Source Project
Copyright (c) 2008 ARM Ltd
All rights reserved.
Redistribution and use in source and binary forms, with or without
modification, are permitted provided that the following conditions
are met:
1. Redistributions of source code must retain the above copyright
notice, this list of conditions and the following disclaimer.
2. Redistributions in binary form must reproduce the above copyright
notice, this list of conditions and the following disclaimer in the
documentation and/or other materials provided with the distribution.
3. The name of the company may not be used to endorse or promote
products derived from this software without specific prior written
permission.
THIS SOFTWARE IS PROVIDED BY ARM LTD ``AS IS'' AND ANY EXPRESS OR IMPLIED
WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF
MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED.
IN NO EVENT SHALL ARM LTD BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED
TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF
LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING
NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
libs/libc/machine/arm/armv7-m/gnu/arch_memcpy.S
libs/libc/machine/arm/armv8-m/gnu/arch_memcpy.S
================================================
Copyright (c) 2013 ARM Ltd
All rights reserved.
Redistribution and use in source and binary forms, with or without
modification, are permitted provided that the following conditions
are met:
1. Redistributions of source code must retain the above copyright
notice, this list of conditions and the following disclaimer.
2. Redistributions in binary form must reproduce the above copyright
notice, this list of conditions and the following disclaimer in the
documentation and/or other materials provided with the distribution.
3. The name of the company may not be used to endorse or promote
products derived from this software without specific prior written
permission.
THIS SOFTWARE IS PROVIDED BY ARM LTD ``AS IS'' AND ANY EXPRESS OR IMPLIED
WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF
MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED.
IN NO EVENT SHALL ARM LTD BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED
TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF
LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING
NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
libs/libc/machine/arm/armv7-m/gnu/arch_strcmp.S
libs/libc/machine/arm/armv8-m/gnu/arch_strcmp.S
================================================
Copyright (c) 2012-2014 ARM Ltd
All rights reserved.
Redistribution and use in source and binary forms, with or without
modification, are permitted provided that the following conditions
are met:
1. Redistributions of source code must retain the above copyright
notice, this list of conditions and the following disclaimer.
2. Redistributions in binary form must reproduce the above copyright
notice, this list of conditions and the following disclaimer in the
documentation and/or other materials provided with the distribution.
3. The name of the company may not be used to endorse or promote
products derived from this software without specific prior written
permission.
THIS SOFTWARE IS PROVIDED BY ARM LTD ``AS IS'' AND ANY EXPRESS OR IMPLIED
WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF
MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED.
IN NO EVENT SHALL ARM LTD BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED
TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF
LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING
NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
libs/libc/machine/arm/armv7-m/gnu/arch_strlen.S
libs/libc/machine/arm/armv8-m/gnu/arch_strlen.S
================================================
Copyright (c) 2010-2011,2013 Linaro Limited
All rights reserved.
Redistribution and use in source and binary forms, with or without
modification, are permitted provided that the following conditions
are met:
* Redistributions of source code must retain the above copyright
notice, this list of conditions and the following disclaimer.
* Redistributions in binary form must reproduce the above copyright
notice, this list of conditions and the following disclaimer in the
documentation and/or other materials provided with the distribution.
* Neither the name of Linaro Limited nor the names of its
contributors may be used to endorse or promote products derived
from this software without specific prior written permission.
THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
"AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
HOLDER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
(INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
Copyright (c) 2015 ARM Ltd.
All rights reserved.
Redistribution and use in source and binary forms, with or without
modification, are permitted provided that the following conditions are met:
* Redistributions of source code must retain the above copyright
notice, this list of conditions and the following disclaimer.
* Redistributions in binary form must reproduce the above copyright
notice, this list of conditions and the following disclaimer in the
documentation and/or other materials provided with the distribution.
* Neither the name of the Linaro nor the
names of its contributors may be used to endorse or promote products
derived from this software without specific prior written permission.
THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
"AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
HOLDER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
(INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
libs/libc/misc/lib_crc32.c
=========================
@ -5267,48 +5522,6 @@ drivers/mtd/at24xx.c
ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
POSSIBILITY OF SUCH DAMAGE.
libs/libc/machine/arm/armv7-a/arch_memcpy.S
libs/libc/marchine/arm/armv7-r/arch_memcpy.S
==============================================
Obtained via a posting on the Stellaris forum:
http://e2e.ti.com/support/microcontrollers/\
stellaris_arm_cortex-m3_microcontroller/f/473/t/44360.aspx
Based on the ARMv7-M version contributed by Mike Smith. Apparently in the public
domain and is re-released here under the modified BSD license:
Adapted for use with ARMv7-A and NuttX by:
Copyright (C) 2017 Gregory Nutt. All rights reserved.
Redistribution and use in source and binary forms, with or without
modification, are permitted provided that the following conditions
are met:
1. Redistributions of source code must retain the above copyright
notice, this list of conditions and the following disclaimer.
2. Redistributions in binary form must reproduce the above copyright
notice, this list of conditions and the following disclaimer in
the documentation and/or other materials provided with the
distribution.
3. Neither the name NuttX nor the names of its contributors may be
used to endorse or promote products derived from this software
without specific prior written permission.
THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
"AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS
FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE
COPYRIGHT OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT,
INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING,
BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS
OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED
AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN
ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
POSSIBILITY OF SUCH DAMAGE.
libs/libc/machine/arm/armv8-m
=============================

View file

@ -19,10 +19,8 @@
############################################################################
ifeq ($(CONFIG_LIBC_ARCH_ELF),y)
CSRCS += arch_elf.c
endif
DEPPATH += --dep-path machine/arm/arm
VPATH += :machine/arm/arm
endif

View file

@ -19,10 +19,8 @@
############################################################################
ifeq ($(CONFIG_LIBC_ARCH_ELF),y)
CSRCS += arch_elf.c
endif
DEPPATH += --dep-path machine/arm/armv6-m
VPATH += :machine/arm/armv6-m
endif

View file

@ -3,9 +3,50 @@
# see the file kconfig-language.txt in the NuttX tools repository.
#
config ARMV7A_MEMCHR
bool "Enable optimized memchr() for ARMv7-A"
default n
select LIBC_ARCH_MEMCHR
depends on ARCH_TOOLCHAIN_GNU
---help---
Enable optimized ARMv7-A specific memchr() library function
config ARMV7A_MEMCPY
bool "Enable optimized memcpy() for ARMv7-A"
select LIBC_ARCH_MEMCPY
depends on ARCH_TOOLCHAIN_GNU
---help---
Enable optimized ARMv7-A specific memcpy() library function
config ARMV7A_MEMMOVE
bool "Enable optimized memmove() for ARMv7-A"
default n
select LIBC_ARCH_MEMMOVE
depends on ARCH_TOOLCHAIN_GNU
---help---
Enable optimized ARMv7-A specific memmove() library function
config ARMV7A_MEMSET
bool "Enable optimized memset() for ARMv7-A"
default n
select LIBC_ARCH_MEMSET
depends on ARCH_TOOLCHAIN_GNU
depends on ARM_NEON
---help---
Enable optimized ARMv7-A specific memset() library function
config ARMV7A_STRCMP
bool "Enable optimized strcmp() for ARMv7-A"
default n
select LIBC_ARCH_STRCMP
depends on ARCH_TOOLCHAIN_GNU
---help---
Enable optimized ARMv7-A specific strcmp() library function
config ARMV7A_STRLEN
bool "Enable optimized strlen() for ARMv7-A"
default n
select LIBC_ARCH_STRLEN
depends on ARCH_TOOLCHAIN_GNU
---help---
Enable optimized ARMv7-A specific strlen() library function

View file

@ -18,20 +18,38 @@
#
############################################################################
ifeq ($(CONFIG_ARMV7A_MEMCHR),y)
ASRCS += arch_memchr.S
endif
ifeq ($(CONFIG_ARMV7A_MEMCPY),y)
ASRCS += arch_memcpy.S
endif
DEPPATH += --dep-path machine/arm/armv7-a/gnu
VPATH += :machine/arm/armv7-a/gnu
ifeq ($(CONFIG_ARMV7A_MEMMOVE),y)
ASRCS += arch_memmove.S
endif
ifeq ($(CONFIG_ARMV7A_MEMSET),y)
ASRCS += arch_memset.S
endif
ifeq ($(CONFIG_ARMV7A_STRCMP),y)
ASRCS += arch_strcmp.S
endif
ifeq ($(CONFIG_ARMV7A_STRLEN),y)
ASRCS += arch_strlen.S
endif
ifeq ($(CONFIG_LIBC_ARCH_ELF),y)
CSRCS += arch_elf.c
endif
ifeq ($(CONFIG_ARCH_TOOLCHAIN_GNU),y)
DEPPATH += --dep-path machine/arm/armv7-a/gnu
VPATH += :machine/arm/armv7-a/gnu
endif
DEPPATH += --dep-path machine/arm/armv7-a
VPATH += :machine/arm/armv7-a
endif

View file

@ -0,0 +1,187 @@
/****************************************************************************
* libs/libc/machine/arm/armv7-a/gnu/acle-compat.h
*
* Copyright (c) 2014 ARM Ltd
* All rights reserved.
*
* Redistribution and use in source and binary forms, with or without
* modification, are permitted provided that the following conditions
* are met:
* 1. Redistributions of source code must retain the above copyright
* notice, this list of conditions and the following disclaimer.
* 2. Redistributions in binary form must reproduce the above copyright
* notice, this list of conditions and the following disclaimer in the
* documentation and/or other materials provided with the distribution.
* 3. The name of the company may not be used to endorse or promote
* products derived from this software without specific prior written
* permission.
*
* THIS SOFTWARE IS PROVIDED BY ARM LTD ``AS IS'' AND ANY EXPRESS OR IMPLIED
* WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF
* MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED.
* IN NO EVENT SHALL ARM LTD BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
* SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED
* TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
* PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF
* LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING
* NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
* SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
****************************************************************************/
#ifndef __LIBS_LIBC_MACHINE_ARM_ARMV7A_GNU_ACLE_COMPAT_H
#define __LIBS_LIBC_MACHINE_ARM_ARMV7A_GNU_ACLE_COMPAT_H
/* ACLE standardises a set of pre-defines that describe the ARM architecture.
* These were mostly implemented in GCC around GCC-4.8; older versions
* have no, or only partial support. To provide a level of backwards
* compatibility we try to work out what the definitions should be, given
* the older pre-defines that GCC did produce. This isn't complete, but
* it should be enough for use by routines that depend on this header.
*/
/* No need to handle ARMv8, GCC had ACLE support before that. */
#ifdef __ARM_ARCH_7__
/* The common subset of ARMv7 in all profiles. */
# define __ARM_ARCH 7
# define __ARM_ARCH_ISA_THUMB 2
# define __ARM_FEATURE_CLZ
# define __ARM_FEATURE_LDREX 7
# define __ARM_FEATURE_UNALIGNED
#endif
#if defined (__ARM_ARCH_7A__) || defined (__ARM_ARCH_7R__)
# define __ARM_ARCH 7
# define __ARM_ARCH_ISA_THUMB 2
# define __ARM_ARCH_ISA_ARM
# define __ARM_FEATURE_CLZ
# define __ARM_FEATURE_SIMD32
# define __ARM_FEATURE_DSP
# define __ARM_FEATURE_QBIT
# define __ARM_FEATURE_SAT
# define __ARM_FEATURE_LDREX 15
# define __ARM_FEATURE_UNALIGNED
# ifdef __ARM_ARCH_7A__
# define __ARM_ARCH_PROFILE 'A'
# else
# define __ARM_ARCH_PROFILE 'R'
# endif
#endif
#ifdef __ARM_ARCH_7EM__
# define __ARM_ARCH 7
# define __ARM_ARCH_ISA_THUMB 2
# define __ARM_FEATURE_CLZ
# define __ARM_FEATURE_SIMD32
# define __ARM_FEATURE_DSP
# define __ARM_FEATURE_QBIT
# define __ARM_FEATURE_SAT
# define __ARM_FEATURE_LDREX 7
# define __ARM_FEATURE_UNALIGNED
# define __ARM_ARCH_PROFILE 'M'
#endif
#ifdef __ARM_ARCH_7M__
# define __ARM_ARCH 7
# define __ARM_ARCH_ISA_THUMB 2
# define __ARM_FEATURE_CLZ
# define __ARM_FEATURE_QBIT
# define __ARM_FEATURE_SAT
# define __ARM_FEATURE_LDREX 7
# define __ARM_FEATURE_UNALIGNED
# define __ARM_ARCH_PROFILE 'M'
#endif
#ifdef __ARM_ARCH_6T2__
# define __ARM_ARCH 6
# define __ARM_ARCH_ISA_THUMB 2
# define __ARM_ARCH_ISA_ARM
# define __ARM_FEATURE_CLZ
# define __ARM_FEATURE_SIMD32
# define __ARM_FEATURE_DSP
# define __ARM_FEATURE_QBIT
# define __ARM_FEATURE_SAT
# define __ARM_FEATURE_LDREX 4
# define __ARM_FEATURE_UNALIGNED
#endif
#ifdef __ARM_ARCH_6M__
# define __ARM_ARCH 6
# define __ARM_ARCH_ISA_THUMB 1
# define __ARM_ARCH_PROFILE 'M'
#endif
#if defined (__ARM_ARCH_6__) || defined (__ARM_ARCH_6J__) \
|| defined (__ARM_ARCH_6K__) || defined (__ARM_ARCH_6Z__) \
|| defined (__ARM_ARCH_6ZK__)
# define __ARM_ARCH 6
# define __ARM_ARCH_ISA_THUMB 1
# define __ARM_ARCH_ISA_ARM
# define __ARM_FEATURE_CLZ
# define __ARM_FEATURE_SIMD32
# define __ARM_FEATURE_DSP
# define __ARM_FEATURE_QBIT
# define __ARM_FEATURE_SAT
# define __ARM_FEATURE_UNALIGNED
# ifndef __thumb__
# if defined (__ARM_ARCH_6K__) || defined (__ARM_ARCH_6ZK__)
# define __ARM_FEATURE_LDREX 15
# else
# define __ARM_FEATURE_LDREX 4
# endif
# endif
#endif
#if defined (__ARM_ARCH_5TE__) || defined (__ARM_ARCH_5E__)
# define __ARM_ARCH 5
# define __ARM_ARCH_ISA_ARM
# ifdef __ARM_ARCH_5TE__
# define __ARM_ARCH_ISA_THUMB 1
# endif
# define __ARM_FEATURE_CLZ
# define __ARM_FEATURE_DSP
#endif
#if defined (__ARM_ARCH_5T__) || defined (__ARM_ARCH_5__)
# define __ARM_ARCH 5
# define __ARM_ARCH_ISA_ARM
# ifdef __ARM_ARCH_5TE__
# define __ARM_ARCH_ISA_THUMB 1
# endif
# define __ARM_FEATURE_CLZ
#endif
#ifdef __ARM_ARCH_4T__
# define __ARM_ARCH 4
# define __ARM_ARCH_ISA_ARM
# define __ARM_ARCH_ISA_THUMB 1
#endif
#ifdef __ARM_ARCH_4__
# define __ARM_ARCH 4
# define __ARM_ARCH_ISA_ARM
#endif
#if defined (__ARM_ARCH_3__) || defined (__ARM_ARCH_3M__)
# define __ARM_ARCH 3
# define __ARM_ARCH_ISA_ARM
#endif
#ifdef __ARM_ARCH_2__
# define __ARM_ARCH 2
# define __ARM_ARCH_ISA_ARM
#endif
#ifdef __ARMEB__
# define __ARM_BIG_ENDIAN
#endif
/* If we still don't know what the target architecture is, then we're
* probably not using GCC.
*/
#ifndef __ARM_ARCH
# error Unable to determine architecture version.
#endif
#endif /* __LIBS_LIBC_MACHINE_ARM_ARMV7A_GNU_ACLE_COMPAT_H */

View file

@ -0,0 +1,391 @@
/****************************************************************************
* libs/libc/machine/arm/armv7-a/gnu/arch_memchr.S
*
* Copyright (c) 2010-2011, Linaro Limited
* All rights reserved.
*
* Redistribution and use in source and binary forms, with or without
* modification, are permitted provided that the following conditions
* are met:
*
* * Redistributions of source code must retain the above copyright
* notice, this list of conditions and the following disclaimer.
*
* * Redistributions in binary form must reproduce the above copyright
* notice, this list of conditions and the following disclaimer in the
* documentation and/or other materials provided with the distribution.
*
* * Neither the name of Linaro Limited nor the names of its
* contributors may be used to endorse or promote products derived
* from this software without specific prior written permission.
*
* THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
* "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
* LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
* A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
* HOLDER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
* SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
* LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
* DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
* THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
* (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
* OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
*
* Written by Dave Gilbert <david.gilbert@linaro.org>
*
* This memchr routine is optimised on a Cortex-A9 and should work on
* all ARMv7 processors. It has a fast path for short sizes, and has
* an optimised path for large data sets; the worst case is finding the
* match early in a large data set.
*
* Copyright (c) 2015 ARM Ltd.
* All rights reserved.
*
* Redistribution and use in source and binary forms, with or without
* modification, are permitted provided that the following conditions are met:
* * Redistributions of source code must retain the above copyright
* notice, this list of conditions and the following disclaimer.
* * Redistributions in binary form must reproduce the above copyright
* notice, this list of conditions and the following disclaimer in the
* documentation and/or other materials provided with the distribution.
* * Neither the name of the Linaro nor the
* names of its contributors may be used to endorse or promote products
* derived from this software without specific prior written permission.
*
* THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
* "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
* LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
* A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
* HOLDER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
* SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
* LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
* DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
* THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
* (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
* OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
*
****************************************************************************/
@ 2011-02-07 david.gilbert@linaro.org
@ Extracted from local git a5b438d861
@ 2011-07-14 david.gilbert@linaro.org
@ Import endianness fix from local git ea786f1b
@ 2011-10-11 david.gilbert@linaro.org
@ Import from cortex-strings bzr rev 63
@ Flip to ldrd (as suggested by Greta Yorsh)
@ Make conditional on CPU type
@ tidy
@ This code requires armv6t2 or later. Uses Thumb2.
.syntax unified
#include "acle-compat.h"
@ NOTE: This ifdef MUST match the one in memchr-stub.c
#if defined (__ARM_NEON__) || defined (__ARM_NEON)
#if __ARM_ARCH >= 8 && __ARM_ARCH_PROFILE == 'R'
.arch armv8-r
#else
.arch armv7-a
#endif
.fpu neon
/* Arguments */
#define srcin r0
#define chrin r1
#define cntin r2
/* Retval */
#define result r0 /* Live range does not overlap with srcin */
/* Working registers */
#define src r1 /* Live range does not overlap with chrin */
#define tmp r3
#define synd r0 /* No overlap with srcin or result */
#define soff r12
/* Working NEON registers */
#define vrepchr q0
#define vdata0 q1
#define vdata0_0 d2 /* Lower half of vdata0 */
#define vdata0_1 d3 /* Upper half of vdata0 */
#define vdata1 q2
#define vdata1_0 d4 /* Lower half of vhas_chr0 */
#define vdata1_1 d5 /* Upper half of vhas_chr0 */
#define vrepmask q3
#define vrepmask0 d6
#define vrepmask1 d7
#define vend q4
#define vend0 d8
#define vend1 d9
/*
* Core algorithm:
*
* For each 32-byte chunk we calculate a 32-bit syndrome value, with one bit per
* byte. Each bit is set if the relevant byte matched the requested character
* and cleared otherwise. Since the bits in the syndrome reflect exactly the
* order in which things occur in the original string, counting trailing zeros
* allows to identify exactly which byte has matched.
*/
.text
.thumb_func
.align 4
.p2align 4,,15
.global memchr
.type memchr,%function
memchr:
.cfi_sections .debug_frame
.cfi_startproc
/* Use a simple loop if there are less than 8 bytes to search. */
cmp cntin, #7
bhi .Llargestr
and chrin, chrin, #0xff
.Lsmallstr:
subs cntin, cntin, #1
blo .Lnotfound /* Return not found if reached end. */
ldrb tmp, [srcin], #1
cmp tmp, chrin
bne .Lsmallstr /* Loop again if not found. */
/* Otherwise fixup address and return. */
sub result, result, #1
bx lr
.Llargestr:
vdup.8 vrepchr, chrin /* Duplicate char across all lanes. */
/*
* Magic constant 0x8040201008040201 allows us to identify which lane
* matches the requested byte.
*/
movw tmp, #0x0201
movt tmp, #0x0804
lsl soff, tmp, #4
vmov vrepmask0, tmp, soff
vmov vrepmask1, tmp, soff
/* Work with aligned 32-byte chunks */
bic src, srcin, #31
ands soff, srcin, #31
beq .Lloopintro /* Go straight to main loop if it's aligned. */
/*
* Input string is not 32-byte aligned. We calculate the syndrome
* value for the aligned 32 bytes block containing the first bytes
* and mask the irrelevant part.
*/
vld1.8 {vdata0, vdata1}, [src:256]!
sub tmp, soff, #32
adds cntin, cntin, tmp
vceq.i8 vdata0, vdata0, vrepchr
vceq.i8 vdata1, vdata1, vrepchr
vand vdata0, vdata0, vrepmask
vand vdata1, vdata1, vrepmask
vpadd.i8 vdata0_0, vdata0_0, vdata0_1
vpadd.i8 vdata1_0, vdata1_0, vdata1_1
vpadd.i8 vdata0_0, vdata0_0, vdata1_0
vpadd.i8 vdata0_0, vdata0_0, vdata0_0
vmov synd, vdata0_0[0]
/* Clear the soff lower bits */
lsr synd, synd, soff
lsl synd, synd, soff
/* The first block can also be the last */
bls .Lmasklast
/* Have we found something already? */
cbnz synd, .Ltail
.Lloopintro:
vpush {vend}
/* 264/265 correspond to d8/d9 for q4 */
.cfi_adjust_cfa_offset 16
.cfi_rel_offset 264, 0
.cfi_rel_offset 265, 8
.p2align 3,,7
.Lloop:
vld1.8 {vdata0, vdata1}, [src:256]!
subs cntin, cntin, #32
vceq.i8 vdata0, vdata0, vrepchr
vceq.i8 vdata1, vdata1, vrepchr
/* If we're out of data we finish regardless of the result. */
bls .Lend
/* Use a fast check for the termination condition. */
vorr vend, vdata0, vdata1
vorr vend0, vend0, vend1
vmov synd, tmp, vend0
orrs synd, synd, tmp
/* We're not out of data, loop if we haven't found the character. */
beq .Lloop
.Lend:
vpop {vend}
.cfi_adjust_cfa_offset -16
.cfi_restore 264
.cfi_restore 265
/* Termination condition found, let's calculate the syndrome value. */
vand vdata0, vdata0, vrepmask
vand vdata1, vdata1, vrepmask
vpadd.i8 vdata0_0, vdata0_0, vdata0_1
vpadd.i8 vdata1_0, vdata1_0, vdata1_1
vpadd.i8 vdata0_0, vdata0_0, vdata1_0
vpadd.i8 vdata0_0, vdata0_0, vdata0_0
vmov synd, vdata0_0[0]
cbz synd, .Lnotfound
bhi .Ltail
.Lmasklast:
/* Clear the (-cntin) upper bits to avoid out-of-bounds matches. */
neg cntin, cntin
lsl synd, synd, cntin
lsrs synd, synd, cntin
it eq
moveq src, #0 /* If no match, set src to 0 so the retval is 0. */
.Ltail:
/* Count the trailing zeros using bit reversing */
rbit synd, synd
/* Compensate the last post-increment */
sub src, src, #32
/* Count the leading zeros */
clz synd, synd
/* Compute the potential result and return */
add result, src, synd
bx lr
.Lnotfound:
/* Set result to NULL if not found and return */
mov result, #0
bx lr
.cfi_endproc
.size memchr, . - memchr
#elif __ARM_ARCH_ISA_THUMB >= 2 && defined (__ARM_FEATURE_DSP)
#if __ARM_ARCH_PROFILE == 'M'
.arch armv7e-m
#else
.arch armv6t2
#endif
@ this lets us check a flag in a 00/ff byte easily in either endianness
#ifdef __ARMEB__
#define CHARTSTMASK(c) 1<<(31-(c*8))
#else
#define CHARTSTMASK(c) 1<<(c*8)
#endif
.text
.thumb
@ ---------------------------------------------------------------------------
.thumb_func
.align 2
.p2align 4,,15
.global memchr
.type memchr,%function
memchr:
@ r0 = start of memory to scan
@ r1 = character to look for
@ r2 = length
@ returns r0 = pointer to character or NULL if not found
and r1,r1,#0xff @ Don't trust the caller to pass a char
cmp r2,#16 @ If short don't bother with anything clever
blt 20f
tst r0, #7 @ If it's already aligned skip the next bit
beq 10f
@ Work up to an aligned point
5:
ldrb r3, [r0],#1
subs r2, r2, #1
cmp r3, r1
beq 50f @ If it matches exit found
tst r0, #7
cbz r2, 40f @ If we run off the end, exit not found
bne 5b @ If not aligned yet then do next byte
10:
@ We are aligned, we know we have at least 8 bytes to work with
push {r4,r5,r6,r7}
orr r1, r1, r1, lsl #8 @ expand the match word across all bytes
orr r1, r1, r1, lsl #16
bic r4, r2, #7 @ Number of double words to work with * 8
mvns r7, #0 @ all F's
movs r3, #0
15:
ldrd r5,r6,[r0],#8
subs r4, r4, #8
eor r5,r5, r1 @ r5,r6 have 00's where bytes match the target
eor r6,r6, r1
uadd8 r5, r5, r7 @ Par add 0xff - sets GE bits for bytes!=0
sel r5, r3, r7 @ bytes are 00 for none-00 bytes,
@ or ff for 00 bytes - NOTE INVERSION
uadd8 r6, r6, r7 @ Par add 0xff - sets GE bits for bytes!=0
sel r6, r5, r7 @ chained....bytes are 00 for none-00 bytes
@ or ff for 00 bytes - NOTE INVERSION
cbnz r6, 60f
bne 15b @ (Flags from the subs above)
pop {r4,r5,r6,r7}
and r1,r1,#0xff @ r1 back to a single character
and r2,r2,#7 @ Leave the count remaining as the number
@ after the double words have been done
20:
cbz r2, 40f @ 0 length or hit the end already then not found
21: @ Post aligned section, or just a short call
ldrb r3,[r0],#1
subs r2,r2,#1
eor r3,r3,r1 @ r3 = 0 if match - doesn't break flags from sub
cbz r3, 50f
bne 21b @ on r2 flags
40:
movs r0,#0 @ not found
bx lr
50:
subs r0,r0,#1 @ found
bx lr
60: @ We're here because the fast path found a hit
@ now we have to track down exactly which word it was
@ r0 points to the start of the double word after the one tested
@ r5 has the 00/ff pattern for the first word, r6 has the chained value
cmp r5, #0
itte eq
moveq r5, r6 @ the end is in the 2nd word
subeq r0,r0,#3 @ Points to 2nd byte of 2nd word
subne r0,r0,#7 @ or 2nd byte of 1st word
@ r0 currently points to the 2nd byte of the word containing the hit
tst r5, # CHARTSTMASK(0) @ 1st character
bne 61f
adds r0,r0,#1
tst r5, # CHARTSTMASK(1) @ 2nd character
ittt eq
addeq r0,r0,#1
tsteq r5, # (3<<15) @ 2nd & 3rd character
@ If not the 3rd must be the last one
addeq r0,r0,#1
61:
pop {r4,r5,r6,r7}
subs r0,r0,#1
bx lr
#else
/* Defined in memchr-stub.c. */
#endif

File diff suppressed because it is too large Load diff

View file

@ -0,0 +1,66 @@
/****************************************************************************
* libs/libc/machine/arm/armv7-a/gnu/arch_memmove.S
*
* Copyright (c) 2015 ARM Ltd
* All rights reserved.
*
* Redistribution and use in source and binary forms, with or without
* modification, are permitted provided that the following conditions
* are met:
* 1. Redistributions of source code must retain the above copyright
* notice, this list of conditions and the following disclaimer.
* 2. Redistributions in binary form must reproduce the above copyright
* notice, this list of conditions and the following disclaimer in the
* documentation and/or other materials provided with the distribution.
* 3. The name of the company may not be used to endorse or promote
* products derived from this software without specific prior written
* permission.
*
* THIS SOFTWARE IS PROVIDED BY ARM LTD ``AS IS'' AND ANY EXPRESS OR IMPLIED
* WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF
* MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED.
* IN NO EVENT SHALL ARM LTD BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
* SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED
* TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
* PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF
* LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING
* NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
* SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
*
****************************************************************************/
.thumb
.syntax unified
.global memmove
.type memmove, %function
memmove:
cmp r0, r1
push {r4}
bls 3f
adds r3, r1, r2
cmp r0, r3
bcs 3f
adds r1, r0, r2
cbz r2, 2f
subs r2, r3, r2
1:
ldrb r4, [r3, #-1]!
cmp r2, r3
strb r4, [r1, #-1]!
bne 1b
2:
pop {r4}
bx lr
3:
cmp r2, #0
beq 2b
add r2, r2, r1
subs r3, r0, #1
4:
ldrb r4, [r1], #1
cmp r2, r1
strb r4, [r3, #1]!
bne 4b
pop {r4}
bx lr
.size memmove, . - memmove

View file

@ -0,0 +1,146 @@
/****************************************************************************
* libs/libc/machine/arm/armv7-a/gnu/arch_memset.S
*
* Copyright (C) 2013 The Android Open Source Project
* All rights reserved.
*
* Redistribution and use in source and binary forms, with or without
* modification, are permitted provided that the following conditions
* are met:
* * Redistributions of source code must retain the above copyright
* notice, this list of conditions and the following disclaimer.
* * Redistributions in binary form must reproduce the above copyright
* notice, this list of conditions and the following disclaimer in
* the documentation and/or other materials provided with the
* distribution.
*
* THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
* "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
* LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS
* FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE
* COPYRIGHT OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT,
* INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING,
* BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS
* OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED
* AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY,
* OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT
* OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
* SUCH DAMAGE.
*
****************************************************************************/
.arm
.syntax unified
.global memset
.type memset, %function
memset:
mov r3, r0
// At this point only d0, d1 are going to be used below.
vdup.8 q0, r1
cmp r2, #16
blo .L_set_less_than_16_unknown_align
.L_check_alignment:
// Align destination to a double word to avoid the store crossing
// a cache line boundary.
ands ip, r3, #7
bne .L_do_double_word_align
.L_double_word_aligned:
// Duplicate since the less than 64 can use d2, d3.
vmov q1, q0
subs r2, #64
blo .L_set_less_than_64
// Duplicate the copy value so that we can store 64 bytes at a time.
vmov q2, q0
vmov q3, q0
1:
// Main loop stores 64 bytes at a time.
subs r2, #64
vstmia r3!, {d0 - d7}
bge 1b
.L_set_less_than_64:
// Restore r2 to the count of bytes left to set.
add r2, #64
lsls ip, r2, #27
bcc .L_set_less_than_32
// Set 32 bytes.
vstmia r3!, {d0 - d3}
.L_set_less_than_32:
bpl .L_set_less_than_16
// Set 16 bytes.
vstmia r3!, {d0, d1}
.L_set_less_than_16:
// Less than 16 bytes to set.
lsls ip, r2, #29
bcc .L_set_less_than_8
// Set 8 bytes.
vstmia r3!, {d0}
.L_set_less_than_8:
bpl .L_set_less_than_4
// Set 4 bytes
vst1.32 {d0[0]}, [r3]!
.L_set_less_than_4:
lsls ip, r2, #31
it ne
strbne r1, [r3], #1
itt cs
strbcs r1, [r3], #1
strbcs r1, [r3]
bx lr
.L_do_double_word_align:
rsb ip, ip, #8
sub r2, r2, ip
// Do this comparison now, otherwise we'll need to save a
// register to the stack since we've used all available
// registers.
cmp ip, #4
blo 1f
// Need to do a four byte copy.
movs ip, ip, lsl #31
it mi
strbmi r1, [r3], #1
itt cs
strbcs r1, [r3], #1
strbcs r1, [r3], #1
vst1.32 {d0[0]}, [r3]!
b .L_double_word_aligned
1:
// No four byte copy.
movs ip, ip, lsl #31
it mi
strbmi r1, [r3], #1
itt cs
strbcs r1, [r3], #1
strbcs r1, [r3], #1
b .L_double_word_aligned
.L_set_less_than_16_unknown_align:
// Set up to 15 bytes.
movs ip, r2, lsl #29
bcc 1f
vst1.8 {d0}, [r3]!
1:
bge 2f
vst1.32 {d0[0]}, [r3]!
2:
movs ip, r2, lsl #31
it mi
strbmi r1, [r3], #1
itt cs
strbcs r1, [r3], #1
strbcs r1, [r3], #1
bx lr
.size memset, . - memset

View file

@ -0,0 +1,303 @@
/****************************************************************************
* libs/libc/machine/arm/armv7-a/gnu/arch_strcmp.S
*
* Copyright (c) 2011 The Android Open Source Project
* Copyright (c) 2008 ARM Ltd
* All rights reserved.
*
* Redistribution and use in source and binary forms, with or without
* modification, are permitted provided that the following conditions
* are met:
* 1. Redistributions of source code must retain the above copyright
* notice, this list of conditions and the following disclaimer.
* 2. Redistributions in binary form must reproduce the above copyright
* notice, this list of conditions and the following disclaimer in the
* documentation and/or other materials provided with the distribution.
* 3. The name of the company may not be used to endorse or promote
* products derived from this software without specific prior written
* permission.
*
* THIS SOFTWARE IS PROVIDED BY ARM LTD ``AS IS'' AND ANY EXPRESS OR IMPLIED
* WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF
* MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED.
* IN NO EVENT SHALL ARM LTD BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
* SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED
* TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
* PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF
* LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING
* NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
* SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
*
****************************************************************************/
#ifdef __ARMEB__
#define SHFT2LSB lsl
#define SHFT2LSBEQ lsleq
#define SHFT2MSB lsr
#define SHFT2MSBEQ lsreq
#define MSB 0x000000ff
#define LSB 0xff000000
#else
#define SHFT2LSB lsr
#define SHFT2LSBEQ lsreq
#define SHFT2MSB lsl
#define SHFT2MSBEQ lsleq
#define MSB 0xff000000
#define LSB 0x000000ff
#endif
#define magic1(REG) REG
#define magic2(REG) REG, lsl #7
.arm
.syntax unified
.global strcmp
.type strcmp, %function
strcmp:
pld [r0, #0]
pld [r1, #0]
eor r2, r0, r1
tst r2, #3
/* Strings not at same byte offset from a word boundary. */
bne .Lstrcmp_unaligned
ands r2, r0, #3
bic r0, r0, #3
bic r1, r1, #3
ldr ip, [r0], #4
it eq
ldreq r3, [r1], #4
beq 1f
/* Although s1 and s2 have identical initial alignment, they are
* not currently word aligned. Rather than comparing bytes,
* make sure that any bytes fetched from before the addressed
* bytes are forced to 0xff. Then they will always compare
* equal.
*/
eor r2, r2, #3
lsl r2, r2, #3
mvn r3, #MSB
SHFT2LSB r2, r3, r2
ldr r3, [r1], #4
orr ip, ip, r2
orr r3, r3, r2
1:
/* Load the 'magic' constant 0x01010101. */
str r4, [sp, #-4]!
mov r4, #1
orr r4, r4, r4, lsl #8
orr r4, r4, r4, lsl #16
.p2align 2
4:
pld [r0, #8]
pld [r1, #8]
sub r2, ip, magic1(r4)
cmp ip, r3
itttt eq
/* check for any zero bytes in first word */
biceq r2, r2, ip
tsteq r2, magic2(r4)
ldreq ip, [r0], #4
ldreq r3, [r1], #4
beq 4b
2:
/* There's a zero or a different byte in the word */
SHFT2MSB r0, ip, #24
SHFT2LSB ip, ip, #8
cmp r0, #1
it cs
cmpcs r0, r3, SHFT2MSB #24
it eq
SHFT2LSBEQ r3, r3, #8
beq 2b
/* On a big-endian machine, r0 contains the desired byte in bits
* 0-7; on a little-endian machine they are in bits 24-31. In
* both cases the other bits in r0 are all zero. For r3 the
* interesting byte is at the other end of the word, but the
* other bits are not necessarily zero. We need a signed result
* representing the differnece in the unsigned bytes, so for the
* little-endian case we can't just shift the interesting bits up.
*/
#ifdef __ARMEB__
sub r0, r0, r3, lsr #24
#else
and r3, r3, #255
/* No RSB instruction in Thumb2 */
#ifdef __thumb2__
lsr r0, r0, #24
sub r0, r0, r3
#else
rsb r0, r3, r0, lsr #24
#endif
#endif
ldr r4, [sp], #4
bx lr
.Lstrcmp_unaligned:
wp1 .req r0
wp2 .req r1
b1 .req r2
w1 .req r4
w2 .req r5
t1 .req ip
@ r3 is scratch
/* First of all, compare bytes until wp1(sp1) is word-aligned. */
1:
tst wp1, #3
beq 2f
ldrb r2, [wp1], #1
ldrb r3, [wp2], #1
cmp r2, #1
it cs
cmpcs r2, r3
beq 1b
sub r0, r2, r3
bx lr
2:
str r5, [sp, #-4]!
str r4, [sp, #-4]!
mov b1, #1
orr b1, b1, b1, lsl #8
orr b1, b1, b1, lsl #16
and t1, wp2, #3
bic wp2, wp2, #3
ldr w1, [wp1], #4
ldr w2, [wp2], #4
cmp t1, #2
beq 2f
bhi 3f
/* Critical inner Loop: Block with 3 bytes initial overlap */
.p2align 2
1:
bic t1, w1, #MSB
cmp t1, w2, SHFT2LSB #8
sub r3, w1, b1
bic r3, r3, w1
bne 4f
ands r3, r3, b1, lsl #7
it eq
ldreq w2, [wp2], #4
bne 5f
eor t1, t1, w1
cmp t1, w2, SHFT2MSB #24
bne 6f
ldr w1, [wp1], #4
b 1b
4:
SHFT2LSB w2, w2, #8
b 8f
5:
#ifdef __ARMEB__
/* The syndrome value may contain false ones if the string ends
* with the bytes 0x01 0x00
*/
tst w1, #0xff000000
itt ne
tstne w1, #0x00ff0000
tstne w1, #0x0000ff00
beq 7f
#else
bics r3, r3, #0xff000000
bne 7f
#endif
ldrb w2, [wp2]
SHFT2LSB t1, w1, #24
#ifdef __ARMEB__
lsl w2, w2, #24
#endif
b 8f
6:
SHFT2LSB t1, w1, #24
and w2, w2, #LSB
b 8f
/* Critical inner Loop: Block with 2 bytes initial overlap */
.p2align 2
2:
SHFT2MSB t1, w1, #16
sub r3, w1, b1
SHFT2LSB t1, t1, #16
bic r3, r3, w1
cmp t1, w2, SHFT2LSB #16
bne 4f
ands r3, r3, b1, lsl #7
it eq
ldreq w2, [wp2], #4
bne 5f
eor t1, t1, w1
cmp t1, w2, SHFT2MSB #16
bne 6f
ldr w1, [wp1], #4
b 2b
5:
#ifdef __ARMEB__
/* The syndrome value may contain false ones if the string ends
* with the bytes 0x01 0x00
*/
tst w1, #0xff000000
it ne
tstne w1, #0x00ff0000
beq 7f
#else
lsls r3, r3, #16
bne 7f
#endif
ldrh w2, [wp2]
SHFT2LSB t1, w1, #16
#ifdef __ARMEB__
lsl w2, w2, #16
#endif
b 8f
6:
SHFT2MSB w2, w2, #16
SHFT2LSB t1, w1, #16
4:
SHFT2LSB w2, w2, #16
b 8f
/* Critical inner Loop: Block with 1 byte initial overlap */
.p2align 2
3:
and t1, w1, #LSB
cmp t1, w2, SHFT2LSB #24
sub r3, w1, b1
bic r3, r3, w1
bne 4f
ands r3, r3, b1, lsl #7
it eq
ldreq w2, [wp2], #4
bne 5f
eor t1, t1, w1
cmp t1, w2, SHFT2MSB #8
bne 6f
ldr w1, [wp1], #4
b 3b
4:
SHFT2LSB w2, w2, #24
b 8f
5:
/* The syndrome value may contain false ones if the string ends
* with the bytes 0x01 0x00
*/
tst w1, #LSB
beq 7f
ldr w2, [wp2], #4
6:
SHFT2LSB t1, w1, #8
bic w2, w2, #MSB
b 8f
7:
mov r0, #0
ldr r4, [sp], #4
ldr r5, [sp], #4
bx lr
8:
and r2, t1, #LSB
and r0, w2, #LSB
cmp r0, #1
it cs
cmpcs r0, r2
itt eq
SHFT2LSBEQ t1, t1, #8
SHFT2LSBEQ w2, w2, #8
beq 8b
sub r0, r2, r0
ldr r4, [sp], #4
ldr r5, [sp], #4
bx lr
.size strcmp, . - strcmp

View file

@ -0,0 +1,184 @@
/****************************************************************************
* libs/libc/machine/arm/armv7-a/gnu/arch_strlen.S
*
* Copyright (c) 2010-2011,2013 Linaro Limited
* All rights reserved.
*
* Redistribution and use in source and binary forms, with or without
* modification, are permitted provided that the following conditions
* are met:
*
* * Redistributions of source code must retain the above copyright
* notice, this list of conditions and the following disclaimer.
*
* * Redistributions in binary form must reproduce the above copyright
* notice, this list of conditions and the following disclaimer in the
* documentation and/or other materials provided with the distribution.
*
* * Neither the name of Linaro Limited nor the names of its
* contributors may be used to endorse or promote products derived
* from this software without specific prior written permission.
*
* THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
* "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
* LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
* A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
* HOLDER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
* SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
* LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
* DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
* THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
* (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
* OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
*
* Assumes:
* ARMv6T2 or ARMv7E-M, AArch32
*
* Copyright (c) 2015 ARM Ltd.
* All rights reserved.
*
* Redistribution and use in source and binary forms, with or without
* modification, are permitted provided that the following conditions are met:
* * Redistributions of source code must retain the above copyright
* notice, this list of conditions and the following disclaimer.
* * Redistributions in binary form must reproduce the above copyright
* notice, this list of conditions and the following disclaimer in the
* documentation and/or other materials provided with the distribution.
* * Neither the name of the Linaro nor the
* names of its contributors may be used to endorse or promote products
* derived from this software without specific prior written permission.
*
* THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
* "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
* LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
* A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
* HOLDER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
* SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
* LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
* DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
* THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
* (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
* OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
*
****************************************************************************/
#include "acle-compat.h"
.macro def_fn f p2align=0
.text
.p2align \p2align
.global \f
.type \f, %function
\f:
.endm
#ifdef __ARMEB__
#define S2LO lsl
#define S2HI lsr
#else
#define S2LO lsr
#define S2HI lsl
#endif
/* This code requires Thumb. */
#if __ARM_ARCH_PROFILE == 'M'
.arch armv7e-m
#else
.arch armv6t2
#endif
.eabi_attribute Tag_ARM_ISA_use, 0
.thumb
.syntax unified
/* Parameters and result. */
#define srcin r0
#define result r0
/* Internal variables. */
#define src r1
#define data1a r2
#define data1b r3
#define const_m1 r12
#define const_0 r4
#define tmp1 r4 /* Overlaps const_0 */
#define tmp2 r5
def_fn strlen p2align=6
pld [srcin, #0]
strd r4, r5, [sp, #-8]!
bic src, srcin, #7
mvn const_m1, #0
ands tmp1, srcin, #7 /* (8 - bytes) to alignment. */
pld [src, #32]
bne.w .Lmisaligned8
mov const_0, #0
mov result, #-8
.Lloop_aligned:
/* Bytes 0-7. */
ldrd data1a, data1b, [src]
pld [src, #64]
add result, result, #8
.Lstart_realigned:
uadd8 data1a, data1a, const_m1 /* Saturating GE<0:3> set. */
sel data1a, const_0, const_m1 /* Select based on GE<0:3>. */
uadd8 data1b, data1b, const_m1
sel data1b, data1a, const_m1 /* Only used if d1a == 0. */
cbnz data1b, .Lnull_found
/* Bytes 8-15. */
ldrd data1a, data1b, [src, #8]
uadd8 data1a, data1a, const_m1 /* Saturating GE<0:3> set. */
add result, result, #8
sel data1a, const_0, const_m1 /* Select based on GE<0:3>. */
uadd8 data1b, data1b, const_m1
sel data1b, data1a, const_m1 /* Only used if d1a == 0. */
cbnz data1b, .Lnull_found
/* Bytes 16-23. */
ldrd data1a, data1b, [src, #16]
uadd8 data1a, data1a, const_m1 /* Saturating GE<0:3> set. */
add result, result, #8
sel data1a, const_0, const_m1 /* Select based on GE<0:3>. */
uadd8 data1b, data1b, const_m1
sel data1b, data1a, const_m1 /* Only used if d1a == 0. */
cbnz data1b, .Lnull_found
/* Bytes 24-31. */
ldrd data1a, data1b, [src, #24]
add src, src, #32
uadd8 data1a, data1a, const_m1 /* Saturating GE<0:3> set. */
add result, result, #8
sel data1a, const_0, const_m1 /* Select based on GE<0:3>. */
uadd8 data1b, data1b, const_m1
sel data1b, data1a, const_m1 /* Only used if d1a == 0. */
cmp data1b, #0
beq .Lloop_aligned
.Lnull_found:
cmp data1a, #0
itt eq
addeq result, result, #4
moveq data1a, data1b
#ifndef __ARMEB__
rev data1a, data1a
#endif
clz data1a, data1a
ldrd r4, r5, [sp], #8
add result, result, data1a, lsr #3 /* Bits -> Bytes. */
bx lr
.Lmisaligned8:
ldrd data1a, data1b, [src]
and tmp2, tmp1, #3
rsb result, tmp1, #0
lsl tmp2, tmp2, #3 /* Bytes -> bits. */
tst tmp1, #4
pld [src, #64]
S2HI tmp2, const_m1, tmp2
orn data1a, data1a, tmp2
itt ne
ornne data1b, data1b, tmp2
movne data1a, const_m1
mov const_0, #0
b .Lstart_realigned
.size strlen, . - strlen

View file

@ -5,19 +5,57 @@
if ARCH_ARMV7M
config ARMV7M_MEMCHR
bool "Enable optimized memchr() for ARMv7-M"
default n
select LIBC_ARCH_MEMCHR
depends on ARCH_TOOLCHAIN_GNU
---help---
Enable optimized ARMv7-M specific memchr() library function
config ARMV7M_MEMCPY
bool "Enable optimized memcpy() for ARMv7-M"
default n
select MACHINE_OPTS_ARMV7M
select LIBC_ARCH_MEMCPY
depends on ARCH_TOOLCHAIN_GNU
---help---
Enable optimized ARMv7-M specific memcpy() library function
config ARMV7M_MEMSET
bool "Enable optimized memset() for ARMv7-M"
default n
select LIBC_ARCH_MEMSET
depends on ARCH_TOOLCHAIN_GNU
---help---
Enable optimized ARMv7-M specific memset() library function
config ARMV7M_MEMMOVE
bool "Enable optimized memmove() for ARMv7-M"
default n
select LIBC_ARCH_MEMMOVE
depends on ARCH_TOOLCHAIN_GNU
---help---
Enable optimized ARMv7-M specific memmove() library function
config ARMV7M_STRCMP
bool "Enable optimized strcmp() for ARMv7-M"
default n
select LIBC_ARCH_STRCMP
depends on ARCH_TOOLCHAIN_GNU
---help---
Enable optimized ARMv7-M specific strcmp() library function
config ARMV7M_STRLEN
bool "Enable optimized strlen() for ARMv7-M"
default n
select LIBC_ARCH_STRLEN
depends on ARCH_TOOLCHAIN_GNU
---help---
Enable optimized ARMv7-M specific strlen() library function
config ARMV7M_LIBM
bool "Architecture specific FPU optimizations"
default n
select MACHINE_OPTS_ARMV7M
select LIBM_ARCH_FABSF
select LIBM_ARCH_SQRTF
depends on ARCH_FPU
@ -26,10 +64,6 @@ config ARMV7M_LIBM
Enable ARMv7E-M specific floating point optimizations
for fabsf() and fsqrtf()
config MACHINE_OPTS_ARMV7M
bool
default n
config LIBM_ARCH_FABSF
bool
default n

View file

@ -18,15 +18,34 @@
#
############################################################################
ifeq ($(CONFIG_ARMV7M_MEMCHR),y)
ASRCS += arch_memchr.S
endif
ifeq ($(CONFIG_ARMV7M_MEMCPY),y)
ASRCS += arch_memcpy.S
endif
ifeq ($(CONFIG_ARMV7M_MEMSET),y)
ASRCS += arch_memset.S
endif
ifeq ($(CONFIG_ARMV7M_MEMMOVE),y)
ASRCS += arch_memmove.S
endif
ifeq ($(CONFIG_ARMV7M_STRCMP),y)
ASRCS += arch_strcmp.S
endif
ifeq ($(CONFIG_ARMV7M_STRLEN),y)
ASRCS += arch_strlen.S
endif
ifeq ($(CONFIG_LIBC_ARCH_ELF),y)
CSRCS += arch_elf.c
endif
ifeq ($(CONFIG_MACHINE_OPTS_ARMV7M),y)
ifeq ($(CONFIG_LIBM_ARCH_FABSF),y)
CSRCS += arch_fabsf.c
endif
@ -34,7 +53,6 @@ endif
ifeq ($(CONFIG_LIBM_ARCH_SQRTF),y)
CSRCS += arch_sqrtf.c
endif
endif
ifeq ($(CONFIG_ARCH_TOOLCHAIN_GNU),y)
DEPPATH += --dep-path machine/arm/armv7-m/gnu

View file

@ -0,0 +1,187 @@
/****************************************************************************
* libs/libc/machine/arm/armv7-m/gnu/acle-compat.h
*
* Copyright (c) 2014 ARM Ltd
* All rights reserved.
*
* Redistribution and use in source and binary forms, with or without
* modification, are permitted provided that the following conditions
* are met:
* 1. Redistributions of source code must retain the above copyright
* notice, this list of conditions and the following disclaimer.
* 2. Redistributions in binary form must reproduce the above copyright
* notice, this list of conditions and the following disclaimer in the
* documentation and/or other materials provided with the distribution.
* 3. The name of the company may not be used to endorse or promote
* products derived from this software without specific prior written
* permission.
*
* THIS SOFTWARE IS PROVIDED BY ARM LTD ``AS IS'' AND ANY EXPRESS OR IMPLIED
* WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF
* MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED.
* IN NO EVENT SHALL ARM LTD BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
* SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED
* TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
* PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF
* LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING
* NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
* SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
****************************************************************************/
#ifndef __LIBS_LIBC_MACHINE_ARM_ARMV7M_GNU_ACLE_COMPAT_H
#define __LIBS_LIBC_MACHINE_ARM_ARMV7M_GNU_ACLE_COMPAT_H
/* ACLE standardises a set of pre-defines that describe the ARM architecture.
* These were mostly implemented in GCC around GCC-4.8; older versions
* have no, or only partial support. To provide a level of backwards
* compatibility we try to work out what the definitions should be, given
* the older pre-defines that GCC did produce. This isn't complete, but
* it should be enough for use by routines that depend on this header.
*/
/* No need to handle ARMv8, GCC had ACLE support before that. */
#ifdef __ARM_ARCH_7__
/* The common subset of ARMv7 in all profiles. */
# define __ARM_ARCH 7
# define __ARM_ARCH_ISA_THUMB 2
# define __ARM_FEATURE_CLZ
# define __ARM_FEATURE_LDREX 7
# define __ARM_FEATURE_UNALIGNED
#endif
#if defined (__ARM_ARCH_7A__) || defined (__ARM_ARCH_7R__)
# define __ARM_ARCH 7
# define __ARM_ARCH_ISA_THUMB 2
# define __ARM_ARCH_ISA_ARM
# define __ARM_FEATURE_CLZ
# define __ARM_FEATURE_SIMD32
# define __ARM_FEATURE_DSP
# define __ARM_FEATURE_QBIT
# define __ARM_FEATURE_SAT
# define __ARM_FEATURE_LDREX 15
# define __ARM_FEATURE_UNALIGNED
# ifdef __ARM_ARCH_7A__
# define __ARM_ARCH_PROFILE 'A'
# else
# define __ARM_ARCH_PROFILE 'R'
# endif
#endif
#ifdef __ARM_ARCH_7EM__
# define __ARM_ARCH 7
# define __ARM_ARCH_ISA_THUMB 2
# define __ARM_FEATURE_CLZ
# define __ARM_FEATURE_SIMD32
# define __ARM_FEATURE_DSP
# define __ARM_FEATURE_QBIT
# define __ARM_FEATURE_SAT
# define __ARM_FEATURE_LDREX 7
# define __ARM_FEATURE_UNALIGNED
# define __ARM_ARCH_PROFILE 'M'
#endif
#ifdef __ARM_ARCH_7M__
# define __ARM_ARCH 7
# define __ARM_ARCH_ISA_THUMB 2
# define __ARM_FEATURE_CLZ
# define __ARM_FEATURE_QBIT
# define __ARM_FEATURE_SAT
# define __ARM_FEATURE_LDREX 7
# define __ARM_FEATURE_UNALIGNED
# define __ARM_ARCH_PROFILE 'M'
#endif
#ifdef __ARM_ARCH_6T2__
# define __ARM_ARCH 6
# define __ARM_ARCH_ISA_THUMB 2
# define __ARM_ARCH_ISA_ARM
# define __ARM_FEATURE_CLZ
# define __ARM_FEATURE_SIMD32
# define __ARM_FEATURE_DSP
# define __ARM_FEATURE_QBIT
# define __ARM_FEATURE_SAT
# define __ARM_FEATURE_LDREX 4
# define __ARM_FEATURE_UNALIGNED
#endif
#ifdef __ARM_ARCH_6M__
# define __ARM_ARCH 6
# define __ARM_ARCH_ISA_THUMB 1
# define __ARM_ARCH_PROFILE 'M'
#endif
#if defined (__ARM_ARCH_6__) || defined (__ARM_ARCH_6J__) \
|| defined (__ARM_ARCH_6K__) || defined (__ARM_ARCH_6Z__) \
|| defined (__ARM_ARCH_6ZK__)
# define __ARM_ARCH 6
# define __ARM_ARCH_ISA_THUMB 1
# define __ARM_ARCH_ISA_ARM
# define __ARM_FEATURE_CLZ
# define __ARM_FEATURE_SIMD32
# define __ARM_FEATURE_DSP
# define __ARM_FEATURE_QBIT
# define __ARM_FEATURE_SAT
# define __ARM_FEATURE_UNALIGNED
# ifndef __thumb__
# if defined (__ARM_ARCH_6K__) || defined (__ARM_ARCH_6ZK__)
# define __ARM_FEATURE_LDREX 15
# else
# define __ARM_FEATURE_LDREX 4
# endif
# endif
#endif
#if defined (__ARM_ARCH_5TE__) || defined (__ARM_ARCH_5E__)
# define __ARM_ARCH 5
# define __ARM_ARCH_ISA_ARM
# ifdef __ARM_ARCH_5TE__
# define __ARM_ARCH_ISA_THUMB 1
# endif
# define __ARM_FEATURE_CLZ
# define __ARM_FEATURE_DSP
#endif
#if defined (__ARM_ARCH_5T__) || defined (__ARM_ARCH_5__)
# define __ARM_ARCH 5
# define __ARM_ARCH_ISA_ARM
# ifdef __ARM_ARCH_5TE__
# define __ARM_ARCH_ISA_THUMB 1
# endif
# define __ARM_FEATURE_CLZ
#endif
#ifdef __ARM_ARCH_4T__
# define __ARM_ARCH 4
# define __ARM_ARCH_ISA_ARM
# define __ARM_ARCH_ISA_THUMB 1
#endif
#ifdef __ARM_ARCH_4__
# define __ARM_ARCH 4
# define __ARM_ARCH_ISA_ARM
#endif
#if defined (__ARM_ARCH_3__) || defined (__ARM_ARCH_3M__)
# define __ARM_ARCH 3
# define __ARM_ARCH_ISA_ARM
#endif
#ifdef __ARM_ARCH_2__
# define __ARM_ARCH 2
# define __ARM_ARCH_ISA_ARM
#endif
#ifdef __ARMEB__
# define __ARM_BIG_ENDIAN
#endif
/* If we still don't know what the target architecture is, then we're
* probably not using GCC.
*/
#ifndef __ARM_ARCH
# error Unable to determine architecture version.
#endif
#endif /* __LIBS_LIBC_MACHINE_ARM_ARMV7M_GNU_ACLE_COMPAT_H */

View file

@ -0,0 +1,391 @@
/****************************************************************************
* libs/libc/machine/arm/armv7-m/gnu/arch_memchr.S
*
* Copyright (c) 2010-2011, Linaro Limited
* All rights reserved.
*
* Redistribution and use in source and binary forms, with or without
* modification, are permitted provided that the following conditions
* are met:
*
* * Redistributions of source code must retain the above copyright
* notice, this list of conditions and the following disclaimer.
*
* * Redistributions in binary form must reproduce the above copyright
* notice, this list of conditions and the following disclaimer in the
* documentation and/or other materials provided with the distribution.
*
* * Neither the name of Linaro Limited nor the names of its
* contributors may be used to endorse or promote products derived
* from this software without specific prior written permission.
*
* THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
* "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
* LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
* A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
* HOLDER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
* SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
* LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
* DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
* THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
* (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
* OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
*
* Written by Dave Gilbert <david.gilbert@linaro.org>
*
* This memchr routine is optimised on a Cortex-A9 and should work on
* all ARMv7 processors. It has a fast path for short sizes, and has
* an optimised path for large data sets; the worst case is finding the
* match early in a large data set.
*
* Copyright (c) 2015 ARM Ltd.
* All rights reserved.
*
* Redistribution and use in source and binary forms, with or without
* modification, are permitted provided that the following conditions are met:
* * Redistributions of source code must retain the above copyright
* notice, this list of conditions and the following disclaimer.
* * Redistributions in binary form must reproduce the above copyright
* notice, this list of conditions and the following disclaimer in the
* documentation and/or other materials provided with the distribution.
* * Neither the name of the Linaro nor the
* names of its contributors may be used to endorse or promote products
* derived from this software without specific prior written permission.
*
* THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
* "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
* LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
* A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
* HOLDER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
* SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
* LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
* DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
* THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
* (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
* OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
*
****************************************************************************/
@ 2011-02-07 david.gilbert@linaro.org
@ Extracted from local git a5b438d861
@ 2011-07-14 david.gilbert@linaro.org
@ Import endianness fix from local git ea786f1b
@ 2011-10-11 david.gilbert@linaro.org
@ Import from cortex-strings bzr rev 63
@ Flip to ldrd (as suggested by Greta Yorsh)
@ Make conditional on CPU type
@ tidy
@ This code requires armv6t2 or later. Uses Thumb2.
.syntax unified
#include "acle-compat.h"
@ NOTE: This ifdef MUST match the one in memchr-stub.c
#if defined (__ARM_NEON__) || defined (__ARM_NEON)
#if __ARM_ARCH >= 8 && __ARM_ARCH_PROFILE == 'R'
.arch armv8-r
#else
.arch armv7-a
#endif
.fpu neon
/* Arguments */
#define srcin r0
#define chrin r1
#define cntin r2
/* Retval */
#define result r0 /* Live range does not overlap with srcin */
/* Working registers */
#define src r1 /* Live range does not overlap with chrin */
#define tmp r3
#define synd r0 /* No overlap with srcin or result */
#define soff r12
/* Working NEON registers */
#define vrepchr q0
#define vdata0 q1
#define vdata0_0 d2 /* Lower half of vdata0 */
#define vdata0_1 d3 /* Upper half of vdata0 */
#define vdata1 q2
#define vdata1_0 d4 /* Lower half of vhas_chr0 */
#define vdata1_1 d5 /* Upper half of vhas_chr0 */
#define vrepmask q3
#define vrepmask0 d6
#define vrepmask1 d7
#define vend q4
#define vend0 d8
#define vend1 d9
/*
* Core algorithm:
*
* For each 32-byte chunk we calculate a 32-bit syndrome value, with one bit per
* byte. Each bit is set if the relevant byte matched the requested character
* and cleared otherwise. Since the bits in the syndrome reflect exactly the
* order in which things occur in the original string, counting trailing zeros
* allows to identify exactly which byte has matched.
*/
.text
.thumb_func
.align 4
.p2align 4,,15
.global memchr
.type memchr,%function
memchr:
.cfi_sections .debug_frame
.cfi_startproc
/* Use a simple loop if there are less than 8 bytes to search. */
cmp cntin, #7
bhi .Llargestr
and chrin, chrin, #0xff
.Lsmallstr:
subs cntin, cntin, #1
blo .Lnotfound /* Return not found if reached end. */
ldrb tmp, [srcin], #1
cmp tmp, chrin
bne .Lsmallstr /* Loop again if not found. */
/* Otherwise fixup address and return. */
sub result, result, #1
bx lr
.Llargestr:
vdup.8 vrepchr, chrin /* Duplicate char across all lanes. */
/*
* Magic constant 0x8040201008040201 allows us to identify which lane
* matches the requested byte.
*/
movw tmp, #0x0201
movt tmp, #0x0804
lsl soff, tmp, #4
vmov vrepmask0, tmp, soff
vmov vrepmask1, tmp, soff
/* Work with aligned 32-byte chunks */
bic src, srcin, #31
ands soff, srcin, #31
beq .Lloopintro /* Go straight to main loop if it's aligned. */
/*
* Input string is not 32-byte aligned. We calculate the syndrome
* value for the aligned 32 bytes block containing the first bytes
* and mask the irrelevant part.
*/
vld1.8 {vdata0, vdata1}, [src:256]!
sub tmp, soff, #32
adds cntin, cntin, tmp
vceq.i8 vdata0, vdata0, vrepchr
vceq.i8 vdata1, vdata1, vrepchr
vand vdata0, vdata0, vrepmask
vand vdata1, vdata1, vrepmask
vpadd.i8 vdata0_0, vdata0_0, vdata0_1
vpadd.i8 vdata1_0, vdata1_0, vdata1_1
vpadd.i8 vdata0_0, vdata0_0, vdata1_0
vpadd.i8 vdata0_0, vdata0_0, vdata0_0
vmov synd, vdata0_0[0]
/* Clear the soff lower bits */
lsr synd, synd, soff
lsl synd, synd, soff
/* The first block can also be the last */
bls .Lmasklast
/* Have we found something already? */
cbnz synd, .Ltail
.Lloopintro:
vpush {vend}
/* 264/265 correspond to d8/d9 for q4 */
.cfi_adjust_cfa_offset 16
.cfi_rel_offset 264, 0
.cfi_rel_offset 265, 8
.p2align 3,,7
.Lloop:
vld1.8 {vdata0, vdata1}, [src:256]!
subs cntin, cntin, #32
vceq.i8 vdata0, vdata0, vrepchr
vceq.i8 vdata1, vdata1, vrepchr
/* If we're out of data we finish regardless of the result. */
bls .Lend
/* Use a fast check for the termination condition. */
vorr vend, vdata0, vdata1
vorr vend0, vend0, vend1
vmov synd, tmp, vend0
orrs synd, synd, tmp
/* We're not out of data, loop if we haven't found the character. */
beq .Lloop
.Lend:
vpop {vend}
.cfi_adjust_cfa_offset -16
.cfi_restore 264
.cfi_restore 265
/* Termination condition found, let's calculate the syndrome value. */
vand vdata0, vdata0, vrepmask
vand vdata1, vdata1, vrepmask
vpadd.i8 vdata0_0, vdata0_0, vdata0_1
vpadd.i8 vdata1_0, vdata1_0, vdata1_1
vpadd.i8 vdata0_0, vdata0_0, vdata1_0
vpadd.i8 vdata0_0, vdata0_0, vdata0_0
vmov synd, vdata0_0[0]
cbz synd, .Lnotfound
bhi .Ltail
.Lmasklast:
/* Clear the (-cntin) upper bits to avoid out-of-bounds matches. */
neg cntin, cntin
lsl synd, synd, cntin
lsrs synd, synd, cntin
it eq
moveq src, #0 /* If no match, set src to 0 so the retval is 0. */
.Ltail:
/* Count the trailing zeros using bit reversing */
rbit synd, synd
/* Compensate the last post-increment */
sub src, src, #32
/* Count the leading zeros */
clz synd, synd
/* Compute the potential result and return */
add result, src, synd
bx lr
.Lnotfound:
/* Set result to NULL if not found and return */
mov result, #0
bx lr
.cfi_endproc
.size memchr, . - memchr
#elif __ARM_ARCH_ISA_THUMB >= 2 && defined (__ARM_FEATURE_DSP)
#if __ARM_ARCH_PROFILE == 'M'
.arch armv7e-m
#else
.arch armv6t2
#endif
@ this lets us check a flag in a 00/ff byte easily in either endianness
#ifdef __ARMEB__
#define CHARTSTMASK(c) 1<<(31-(c*8))
#else
#define CHARTSTMASK(c) 1<<(c*8)
#endif
.text
.thumb
@ ---------------------------------------------------------------------------
.thumb_func
.align 2
.p2align 4,,15
.global memchr
.type memchr,%function
memchr:
@ r0 = start of memory to scan
@ r1 = character to look for
@ r2 = length
@ returns r0 = pointer to character or NULL if not found
and r1,r1,#0xff @ Don't trust the caller to pass a char
cmp r2,#16 @ If short don't bother with anything clever
blt 20f
tst r0, #7 @ If it's already aligned skip the next bit
beq 10f
@ Work up to an aligned point
5:
ldrb r3, [r0],#1
subs r2, r2, #1
cmp r3, r1
beq 50f @ If it matches exit found
tst r0, #7
cbz r2, 40f @ If we run off the end, exit not found
bne 5b @ If not aligned yet then do next byte
10:
@ We are aligned, we know we have at least 8 bytes to work with
push {r4,r5,r6,r7}
orr r1, r1, r1, lsl #8 @ expand the match word across all bytes
orr r1, r1, r1, lsl #16
bic r4, r2, #7 @ Number of double words to work with * 8
mvns r7, #0 @ all F's
movs r3, #0
15:
ldrd r5,r6,[r0],#8
subs r4, r4, #8
eor r5,r5, r1 @ r5,r6 have 00's where bytes match the target
eor r6,r6, r1
uadd8 r5, r5, r7 @ Par add 0xff - sets GE bits for bytes!=0
sel r5, r3, r7 @ bytes are 00 for none-00 bytes,
@ or ff for 00 bytes - NOTE INVERSION
uadd8 r6, r6, r7 @ Par add 0xff - sets GE bits for bytes!=0
sel r6, r5, r7 @ chained....bytes are 00 for none-00 bytes
@ or ff for 00 bytes - NOTE INVERSION
cbnz r6, 60f
bne 15b @ (Flags from the subs above)
pop {r4,r5,r6,r7}
and r1,r1,#0xff @ r1 back to a single character
and r2,r2,#7 @ Leave the count remaining as the number
@ after the double words have been done
20:
cbz r2, 40f @ 0 length or hit the end already then not found
21: @ Post aligned section, or just a short call
ldrb r3,[r0],#1
subs r2,r2,#1
eor r3,r3,r1 @ r3 = 0 if match - doesn't break flags from sub
cbz r3, 50f
bne 21b @ on r2 flags
40:
movs r0,#0 @ not found
bx lr
50:
subs r0,r0,#1 @ found
bx lr
60: @ We're here because the fast path found a hit
@ now we have to track down exactly which word it was
@ r0 points to the start of the double word after the one tested
@ r5 has the 00/ff pattern for the first word, r6 has the chained value
cmp r5, #0
itte eq
moveq r5, r6 @ the end is in the 2nd word
subeq r0,r0,#3 @ Points to 2nd byte of 2nd word
subne r0,r0,#7 @ or 2nd byte of 1st word
@ r0 currently points to the 2nd byte of the word containing the hit
tst r5, # CHARTSTMASK(0) @ 1st character
bne 61f
adds r0,r0,#1
tst r5, # CHARTSTMASK(1) @ 2nd character
ittt eq
addeq r0,r0,#1
tsteq r5, # (3<<15) @ 2nd & 3rd character
@ If not the 3rd must be the last one
addeq r0,r0,#1
61:
pop {r4,r5,r6,r7}
subs r0,r0,#1
bx lr
#else
/* Defined in memchr-stub.c. */
#endif

View file

@ -1,429 +1,332 @@
/************************************************************************************
/****************************************************************************
* libs/libc/machine/arm/armv7-m/gnu/arch_memcpy.S
*
* armv7m-optimized memcpy, contributed by Mike Smith. Apparently in the public
* domain and is re-released here under the modified BSD license:
*
* Obtained via a posting on the Stellaris forum:
* http://e2e.ti.com/support/microcontrollers/\
* stellaris_arm_cortex-m3_microcontroller/f/473/t/44360.aspx
*
* Posted by rocksoft on Jul 24, 2008 10:19 AM
*
* Hi,
*
* I recently finished a "memcpy" replacement and thought it might be useful for
* others...
*
* I've put some instructions and the code here:
*
* http://www.rock-software.net/downloads/memcpy/
*
* Hope it works for you as well as it did for me.
*
* Liam.
* Copyright (c) 2013 ARM Ltd
* All rights reserved.
*
* Redistribution and use in source and binary forms, with or without
* modification, are permitted provided that the following conditions
* are met:
*
* 1. Redistributions of source code must retain the above copyright
* notice, this list of conditions and the following disclaimer.
* 2. Redistributions in binary form must reproduce the above copyright
* notice, this list of conditions and the following disclaimer in
* the documentation and/or other materials provided with the
* distribution.
* 3. Neither the name NuttX nor the names of its contributors may be
* used to endorse or promote products derived from this software
* without specific prior written permission.
* notice, this list of conditions and the following disclaimer in the
* documentation and/or other materials provided with the distribution.
* 3. The name of the company may not be used to endorse or promote
* products derived from this software without specific prior written
* permission.
*
* THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
* "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
* LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS
* FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE
* COPYRIGHT OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT,
* INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING,
* BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS
* OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED
* AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
* LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN
* ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
* POSSIBILITY OF SUCH DAMAGE.
* THIS SOFTWARE IS PROVIDED BY ARM LTD ``AS IS'' AND ANY EXPRESS OR IMPLIED
* WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF
* MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED.
* IN NO EVENT SHALL ARM LTD BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
* SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED
* TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
* PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF
* LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING
* NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
* SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
*
************************************************************************************/
****************************************************************************/
/************************************************************************************
* Public Symbols
************************************************************************************/
/* This memcpy routine is optimised for Cortex-M3/M4 cores with/without
unaligned access.
.global memcpy
.syntax unified
.thumb
.file "arch_memcpy.S"
If compiled with GCC, this file should be enclosed within following
pre-processing check:
if defined (__ARM_ARCH_7M__) || defined (__ARM_ARCH_7EM__)
/************************************************************************************
* .text
************************************************************************************/
Prototype: void *memcpy (void *dst, const void *src, size_t count);
The job will be done in 5 steps.
Step 1: Align src/dest pointers, copy mis-aligned if fail to align both
Step 2: Repeatedly copy big block size of __OPT_BIG_BLOCK_SIZE
Step 3: Repeatedly copy big block size of __OPT_MID_BLOCK_SIZE
Step 4: Copy word by word
Step 5: Copy byte-to-byte
Tunable options:
__OPT_BIG_BLOCK_SIZE: Size of big block in words. Default to 64.
__OPT_MID_BLOCK_SIZE: Size of big block in words. Default to 16.
*/
#ifndef __OPT_BIG_BLOCK_SIZE
#define __OPT_BIG_BLOCK_SIZE (4 * 16)
#endif
#ifndef __OPT_MID_BLOCK_SIZE
#define __OPT_MID_BLOCK_SIZE (4 * 4)
#endif
#if __OPT_BIG_BLOCK_SIZE == 16
#define BEGIN_UNROLL_BIG_BLOCK \
.irp offset, 0,4,8,12
#elif __OPT_BIG_BLOCK_SIZE == 32
#define BEGIN_UNROLL_BIG_BLOCK \
.irp offset, 0,4,8,12,16,20,24,28
#elif __OPT_BIG_BLOCK_SIZE == 64
#define BEGIN_UNROLL_BIG_BLOCK \
.irp offset, 0,4,8,12,16,20,24,28,32,36,40,44,48,52,56,60
#else
#error "Illegal __OPT_BIG_BLOCK_SIZE"
#endif
#if __OPT_MID_BLOCK_SIZE == 8
#define BEGIN_UNROLL_MID_BLOCK \
.irp offset, 0,4
#elif __OPT_MID_BLOCK_SIZE == 16
#define BEGIN_UNROLL_MID_BLOCK \
.irp offset, 0,4,8,12
#else
#error "Illegal __OPT_MID_BLOCK_SIZE"
#endif
#define END_UNROLL .endr
.syntax unified
.text
/************************************************************************************
* Private Constant Data
************************************************************************************/
/* We have 16 possible alignment combinations of src and dst, this jump table
* directs the copy operation
*
* Bits: Src=00, Dst=00 - Long to Long copy
* Bits: Src=00, Dst=01 - Long to Byte before half word
* Bits: Src=00, Dst=10 - Long to Half word
* Bits: Src=00, Dst=11 - Long to Byte before long word
* Bits: Src=01, Dst=00 - Byte before half word to long
* Bits: Src=01, Dst=01 - Byte before half word to byte before half word -
* Same alignment
* Bits: Src=01, Dst=10 - Byte before half word to half word
* Bits: Src=01, Dst=11 - Byte before half word to byte before long word
* Bits: Src=10, Dst=00 - Half word to long word
* Bits: Src=10, Dst=01 - Half word to byte before half word
* Bits: Src=10, Dst=10 - Half word to half word - Same Alignment
* Bits: Src=10, Dst=11 - Half word to byte before long word
* Bits: Src=11, Dst=00 - Byte before long word to long word
* Bits: Src=11, Dst=01 - Byte before long word to byte before half word
* Bits: Src=11, Dst=11 - Byte before long word to half word
* Bits: Src=11, Dst=11 - Byte before long word to Byte before long word -
* Same alignment
*/
MEM_DataCopyTable:
.byte (MEM_DataCopy0 - MEM_DataCopyJump) >> 1
.byte (MEM_DataCopy1 - MEM_DataCopyJump) >> 1
.byte (MEM_DataCopy2 - MEM_DataCopyJump) >> 1
.byte (MEM_DataCopy3 - MEM_DataCopyJump) >> 1
.byte (MEM_DataCopy4 - MEM_DataCopyJump) >> 1
.byte (MEM_DataCopy5 - MEM_DataCopyJump) >> 1
.byte (MEM_DataCopy6 - MEM_DataCopyJump) >> 1
.byte (MEM_DataCopy7 - MEM_DataCopyJump) >> 1
.byte (MEM_DataCopy8 - MEM_DataCopyJump) >> 1
.byte (MEM_DataCopy9 - MEM_DataCopyJump) >> 1
.byte (MEM_DataCopy10 - MEM_DataCopyJump) >> 1
.byte (MEM_DataCopy11 - MEM_DataCopyJump) >> 1
.byte (MEM_DataCopy12 - MEM_DataCopyJump) >> 1
.byte (MEM_DataCopy13 - MEM_DataCopyJump) >> 1
.byte (MEM_DataCopy14 - MEM_DataCopyJump) >> 1
.byte (MEM_DataCopy15 - MEM_DataCopyJump) >> 1
.align 2
MEM_LongCopyTable:
.byte (MEM_LongCopyEnd - MEM_LongCopyJump) >> 1 /* 0 bytes left */
.byte 0 /* 4 bytes left */
.byte (1 * 10) >> 1 /* 8 bytes left */
.byte (2 * 10) >> 1 /* 12 bytes left */
.byte (3 * 10) >> 1 /* 16 bytes left */
.byte (4 * 10) >> 1 /* 20 bytes left */
.byte (5 * 10) >> 1 /* 24 bytes left */
.byte (6 * 10) >> 1 /* 28 bytes left */
.byte (7 * 10) >> 1 /* 32 bytes left */
.byte (8 * 10) >> 1 /* 36 bytes left */
.align 2
/************************************************************************************
* Public Functions
************************************************************************************/
/************************************************************************************
* Name: memcpy
*
* Description:
* Optimized "general" copy routine
*
* Input Parameters:
* r0 = destination, r1 = source, r2 = length
*
* Returned Value:
* r0 = destination r1-r3 burned
*
************************************************************************************/
.align 4
.align 2
.global memcpy
.thumb
.thumb_func
.type memcpy, %function
memcpy:
push {r14}
push {r0}
bl _do_memcpy
pop {r0}
pop {pc}
@ r0: dst
@ r1: src
@ r2: len
#ifdef __ARM_FEATURE_UNALIGNED
/* In case of UNALIGNED access supported, ip is not used in
function body. */
mov ip, r0
#else
push {r0}
#endif
orr r3, r1, r0
ands r3, r3, #3
bne .Lmisaligned_copy
.align 4
.Lbig_block:
subs r2, __OPT_BIG_BLOCK_SIZE
blo .Lmid_block
.thumb_func
_do_memcpy:
push {r14}
/* Kernel loop for big block copy */
.align 2
.Lbig_block_loop:
BEGIN_UNROLL_BIG_BLOCK
#ifdef __ARM_ARCH_7EM__
ldr r3, [r1], #4
str r3, [r0], #4
END_UNROLL
#else /* __ARM_ARCH_7M__ */
ldr r3, [r1, \offset]
str r3, [r0, \offset]
END_UNROLL
adds r0, __OPT_BIG_BLOCK_SIZE
adds r1, __OPT_BIG_BLOCK_SIZE
#endif
subs r2, __OPT_BIG_BLOCK_SIZE
bhs .Lbig_block_loop
/* This allows the inner workings to "assume" a minimum amount of bytes */
/* Quickly check for very short copies */
.Lmid_block:
adds r2, __OPT_BIG_BLOCK_SIZE - __OPT_MID_BLOCK_SIZE
blo .Lcopy_word_by_word
cmp r2, #4
blt.n MEM_DataCopyBytes
/* Kernel loop for mid-block copy */
.align 2
.Lmid_block_loop:
BEGIN_UNROLL_MID_BLOCK
#ifdef __ARM_ARCH_7EM__
ldr r3, [r1], #4
str r3, [r0], #4
END_UNROLL
#else /* __ARM_ARCH_7M__ */
ldr r3, [r1, \offset]
str r3, [r0, \offset]
END_UNROLL
adds r0, __OPT_MID_BLOCK_SIZE
adds r1, __OPT_MID_BLOCK_SIZE
#endif
subs r2, __OPT_MID_BLOCK_SIZE
bhs .Lmid_block_loop
and r14, r0, #3 /* Get destination alignment bits */
bfi r14, r1, #2, #2 /* Get source alignment bits */
ldr r3, =MEM_DataCopyTable /* Jump table base */
tbb [r3, r14] /* Perform jump on src/dst alignment bits */
MEM_DataCopyJump:
.Lcopy_word_by_word:
adds r2, __OPT_MID_BLOCK_SIZE - 4
blo .Lcopy_less_than_4
.align 4
/* Kernel loop for small block copy */
.align 2
.Lcopy_word_by_word_loop:
ldr r3, [r1], #4
str r3, [r0], #4
subs r2, #4
bhs .Lcopy_word_by_word_loop
/* Bits: Src=01, Dst=01 - Byte before half word to byte before half word - Same alignment
* 3 bytes to read for long word aligning
*/
.Lcopy_less_than_4:
adds r2, #4
beq .Ldone
MEM_DataCopy5:
ldrb r3, [r1], #0x01
strb r3, [r0], #0x01
sub r2, r2, #0x01
lsls r2, r2, #31
itt ne
ldrbne r3, [r1], #1
strbne r3, [r0], #1
/* Bits: Src=10, Dst=10 - Half word to half word - Same Alignment
* 2 bytes to read for long word aligning
*/
bcc .Ldone
#ifdef __ARM_FEATURE_UNALIGNED
ldrh r3, [r1]
strh r3, [r0]
#else
ldrb r3, [r1]
strb r3, [r0]
ldrb r3, [r1, #1]
strb r3, [r0, #1]
#endif /* __ARM_FEATURE_UNALIGNED */
MEM_DataCopy10:
ldrb r3, [r1], #0x01
strb r3, [r0], #0x01
sub r2, r2, #0x01
.Ldone:
#ifdef __ARM_FEATURE_UNALIGNED
mov r0, ip
#else
pop {r0}
#endif
bx lr
/* Bits: Src=11, Dst=11 - Byte before long word to Byte before long word - Same alignment
* 1 bytes to read for long word aligning
*/
.align 2
.Lmisaligned_copy:
#ifdef __ARM_FEATURE_UNALIGNED
/* Define label DST_ALIGNED to BIG_BLOCK. It will go to aligned copy
once destination is adjusted to aligned. */
#define Ldst_aligned Lbig_block
MEM_DataCopy15:
ldrb r3, [r1], #0x01
strb r3, [r0], #0x01
sub r2, r2, #0x01
/* Copy word by word using LDR when alignment can be done in hardware,
i.e., SCTLR.A is set, supporting unaligned access in LDR and STR. */
/* Bits: Src=00, Dst=00 - Long to Long copy */
cmp r2, #8
blo .Lbyte_copy
MEM_DataCopy0:
/* Save regs that may be used by memcpy */
/* if src is aligned, just go to the big block loop. */
lsls r3, r1, #30
beq .Ldst_aligned
#else
/* if len < 12, misalignment adjustment has more overhead than
just byte-to-byte copy. Also, len must >=8 to guarantee code
afterward work correctly. */
cmp r2, #12
blo .Lbyte_copy
#endif /* __ARM_FEATURE_UNALIGNED */
push {r4-r12}
/* Align dst only, not trying to align src. That is the because
handling of aligned src and misaligned dst need more overhead than
otherwise. By doing this the worst case is when initial src is aligned,
additional up to 4 byte additional copy will executed, which is
acceptable. */
/* Check for short word-aligned copy */
ands r3, r0, #3
beq .Ldst_aligned
cmp r2, #0x28
blt.n MEM_DataCopy0_2
rsb r3, #4
subs r2, r3
/* Bulk copy loop */
lsls r3, r3, #31
itt ne
ldrbne r3, [r1], #1
strbne r3, [r0], #1
MEM_DataCopy0_1:
ldmia r1!, {r3-r12}
stmia r0!, {r3-r12}
sub r2, r2, #0x28
cmp r2, #0x28
bge.n MEM_DataCopy0_1
bcc .Ldst_aligned
/* Copy remaining long words */
#ifdef __ARM_FEATURE_UNALIGNED
ldrh r3, [r1], #2
strh r3, [r0], #2
b .Ldst_aligned
#else
ldrb r3, [r1], #1
strb r3, [r0], #1
ldrb r3, [r1], #1
strb r3, [r0], #1
/* Now that dst is aligned */
.Ldst_aligned:
/* if r1 is aligned now, it means r0/r1 has the same misalignment,
and they are both aligned now. Go aligned copy. */
ands r3, r1, #3
beq .Lbig_block
MEM_DataCopy0_2:
/* Copy remaining long words */
/* dst is aligned, but src isn't. Misaligned copy. */
ldr r14, =MEM_LongCopyTable
lsr r11, r2, #0x02
tbb [r14, r11]
push {r4, r5}
subs r2, #4
/* longword copy branch table anchor */
/* Backward r1 by misaligned bytes, to make r1 aligned.
Since we need to restore r1 to unaligned address after the loop,
we need keep the offset bytes to ip and sub it from r1 afterward. */
subs r1, r3
rsb ip, r3, #4
MEM_LongCopyJump:
ldr.w r3, [r1], #0x04 /* 4 bytes remain */
str.w r3, [r0], #0x04
b.n MEM_LongCopyEnd
ldmia.w r1!, {r3-r4} /* 8 bytes remain */
stmia.w r0!, {r3-r4}
b.n MEM_LongCopyEnd
ldmia.w r1!, {r3-r5} /* 12 bytes remain */
stmia.w r0!, {r3-r5}
b.n MEM_LongCopyEnd
ldmia.w r1!, {r3-r6} /* 16 bytes remain */
stmia.w r0!, {r3-r6}
b.n MEM_LongCopyEnd
ldmia.w r1!, {r3-r7} /* 20 bytes remain */
stmia.w r0!, {r3-r7}
b.n MEM_LongCopyEnd
ldmia.w r1!, {r3-r8} /* 24 bytes remain */
stmia.w r0!, {r3-r8}
b.n MEM_LongCopyEnd
ldmia.w r1!, {r3-r9} /* 28 bytes remain */
stmia.w r0!, {r3-r9}
b.n MEM_LongCopyEnd
ldmia.w r1!, {r3-r10} /* 32 bytes remain */
stmia.w r0!, {r3-r10}
b.n MEM_LongCopyEnd
ldmia.w r1!, {r3-r11} /* 36 bytes remain */
stmia.w r0!, {r3-r11}
/* Pre-load on word */
ldr r4, [r1], #4
MEM_LongCopyEnd:
pop {r4-r12}
and r2, r2, #0x03 /* All the longs have been copied */
cmp r3, #2
beq .Lmisaligned_copy_2_2
cmp r3, #3
beq .Lmisaligned_copy_3_1
/* Deal with up to 3 remaining bytes */
.macro mis_src_copy shift
1:
#ifdef __ARM_BIG_ENDIAN
lsls r4, r4, \shift
#else
lsrs r4, r4, \shift
#endif
ldr r3, [r1], #4
#ifdef __ARM_BIG_ENDIAN
lsrs r5, r3, 32-\shift
#else
lsls r5, r3, 32-\shift
#endif
orr r4, r4, r5
str r4, [r0], #4
mov r4, r3
subs r2, #4
bhs 1b
.endm
MEM_DataCopyBytes:
/* Deal with up to 3 remaining bytes */
.Lmisaligned_copy_1_3:
mis_src_copy shift=8
b .Lsrc_misaligned_tail
cmp r2, #0x00
it eq
popeq {pc}
ldrb r3, [r1], #0x01
strb r3, [r0], #0x01
subs r2, r2, #0x01
it eq
popeq {pc}
ldrb r3, [r1], #0x01
strb r3, [r0], #0x01
subs r2, r2, #0x01
it eq
popeq {pc}
ldrb r3, [r1], #0x01
strb r3, [r0], #0x01
pop {pc}
.Lmisaligned_copy_3_1:
mis_src_copy shift=24
b .Lsrc_misaligned_tail
.align 4
.Lmisaligned_copy_2_2:
/* For 2_2 misalignment, ldr is still faster than 2 x ldrh. */
mis_src_copy shift=16
/* Bits: Src=01, Dst=11 - Byte before half word to byte before long word
* 3 bytes to read for long word aligning the source
*/
.Lsrc_misaligned_tail:
adds r2, #4
subs r1, ip
pop {r4, r5}
MEM_DataCopy7:
ldrb r3, [r1], #0x01
strb r3, [r0], #0x01
sub r2, r2, #0x01
#endif /* __ARM_FEATURE_UNALIGNED */
/* Bits: Src=10, Dst=00 - Half word to long word
* 2 bytes to read for long word aligning the source
*/
.Lbyte_copy:
subs r2, #4
blo .Lcopy_less_than_4
MEM_DataCopy8:
ldrb r3, [r1], #0x01
strb r3, [r0], #0x01
sub r2, r2, #0x01
.Lbyte_copy_loop:
subs r2, #1
ldrb r3, [r1], #1
strb r3, [r0], #1
bhs .Lbyte_copy_loop
/* Bits: Src=11, Dst=01 - Byte before long word to byte before half word
* 1 byte to read for long word aligning the source
*/
ldrb r3, [r1]
strb r3, [r0]
ldrb r3, [r1, #1]
strb r3, [r0, #1]
ldrb r3, [r1, #2]
strb r3, [r0, #2]
MEM_DataCopy13:
ldrb r3, [r1], #0x01
strb r3, [r0], #0x01
sub r2, r2, #0x01
#ifdef __ARM_FEATURE_UNALIGNED
mov r0, ip
#else
pop {r0}
#endif
bx lr
/* Bits: Src=00, Dst=10 - Long to Half word */
MEM_DataCopy2:
cmp r2, #0x28
blt.n MEM_DataCopy2_1
/* Save regs */
push {r4-r12}
/* Bulk copy loop */
MEM_DataCopy2_2:
ldmia r1!, {r3-r12}
strh r3, [r0], #0x02
lsr r3, r3, #0x10
bfi r3, r4, #0x10, #0x10
lsr r4, r4, #0x10
bfi r4, r5, #0x10, #0x10
lsr r5, r5, #0x10
bfi r5, r6, #0x10, #0x10
lsr r6, r6, #0x10
bfi r6, r7, #0x10, #0x10
lsr r7, r7, #0x10
bfi r7, r8, #0x10, #0x10
lsr r8, r8, #0x10
bfi r8, r9, #0x10, #0x10
lsr r9, r9, #0x10
bfi r9, r10, #0x10, #0x10
lsr r10, r10, #0x10
bfi r10, r11, #0x10, #0x10
lsr r11, r11, #0x10
bfi r11, r12, #0x10, #0x10
stmia r0!, {r3-r11}
lsr r12, r12, #0x10
strh r12, [r0], #0x02
sub r2, r2, #0x28
cmp r2, #0x28
bge.n MEM_DataCopy2_2
pop {r4-r12}
MEM_DataCopy2_1: /* Read longs and write 2 x half words */
cmp r2, #4
blt.n MEM_DataCopyBytes
ldr r3, [r1], #0x04
strh r3, [r0], #0x02
lsr r3, r3, #0x10
strh r3, [r0], #0x02
sub r2, r2, #0x04
b.n MEM_DataCopy2
/* Bits: Src=01, Dst=00 - Byte before half word to long
* Bits: Src=01, Dst=10 - Byte before half word to half word
* 3 bytes to read for long word aligning the source
*/
MEM_DataCopy4:
MEM_DataCopy6:
/* Read B and write B */
ldrb r3, [r1], #0x01
strb r3, [r0], #0x01
sub r2, r2, #0x01
/* Bits: Src=10, Dst=01 - Half word to byte before half word
* Bits: Src=10, Dst=11 - Half word to byte before long word
* 2 bytes to read for long word aligning the source
*/
MEM_DataCopy9:
MEM_DataCopy11:
ldrb r3, [r1], #0x01
strb r3, [r0], #0x01
sub r2, r2, #0x01
/* Bits: Src=11, Dst=00 -chm Byte before long word to long word
* Bits: Src=11, Dst=11 - Byte before long word to half word
* 1 byte to read for long word aligning the source
*/
MEM_DataCopy12:
MEM_DataCopy14:
/* Read B and write B */
ldrb r3, [r1], #0x01
strb r3, [r0], #0x01
sub r2, r2, #0x01
/* Bits: Src=00, Dst=01 - Long to Byte before half word
* Bits: Src=00, Dst=11 - Long to Byte before long word
*/
MEM_DataCopy1: /* Read longs, write B->H->B */
MEM_DataCopy3:
cmp r2, #4
blt MEM_DataCopyBytes
ldr r3, [r1], #0x04
strb r3, [r0], #0x01
lsr r3, r3, #0x08
strh r3, [r0], #0x02
lsr r3, r3, #0x10
strb r3, [r0], #0x01
sub r2, r2, #0x04
b.n MEM_DataCopy3
.size memcpy, .-memcpy
.end
.size memcpy, .-memcpy

View file

@ -0,0 +1,66 @@
/****************************************************************************
* libs/libc/machine/arm/armv7-m/gnu/arch_memmove.S
*
* Copyright (c) 2015 ARM Ltd
* All rights reserved.
*
* Redistribution and use in source and binary forms, with or without
* modification, are permitted provided that the following conditions
* are met:
* 1. Redistributions of source code must retain the above copyright
* notice, this list of conditions and the following disclaimer.
* 2. Redistributions in binary form must reproduce the above copyright
* notice, this list of conditions and the following disclaimer in the
* documentation and/or other materials provided with the distribution.
* 3. The name of the company may not be used to endorse or promote
* products derived from this software without specific prior written
* permission.
*
* THIS SOFTWARE IS PROVIDED BY ARM LTD ``AS IS'' AND ANY EXPRESS OR IMPLIED
* WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF
* MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED.
* IN NO EVENT SHALL ARM LTD BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
* SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED
* TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
* PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF
* LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING
* NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
* SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
*
****************************************************************************/
.thumb
.syntax unified
.global memmove
.type memmove, %function
memmove:
cmp r0, r1
push {r4}
bls 3f
adds r3, r1, r2
cmp r0, r3
bcs 3f
adds r1, r0, r2
cbz r2, 2f
subs r2, r3, r2
1:
ldrb r4, [r3, #-1]!
cmp r2, r3
strb r4, [r1, #-1]!
bne 1b
2:
pop {r4}
bx lr
3:
cmp r2, #0
beq 2b
add r2, r2, r1
subs r3, r0, #1
4:
ldrb r4, [r1], #1
cmp r2, r1
strb r4, [r3, #1]!
bne 4b
pop {r4}
bx lr
.size memmove, . - memmove

View file

@ -0,0 +1,108 @@
/****************************************************************************
* libs/libc/machine/arm/armv7-m/gnu/arch_memset.S
*
* Copyright (c) 2015 ARM Ltd
* All rights reserved.
*
* Redistribution and use in source and binary forms, with or without
* modification, are permitted provided that the following conditions
* are met:
* 1. Redistributions of source code must retain the above copyright
* notice, this list of conditions and the following disclaimer.
* 2. Redistributions in binary form must reproduce the above copyright
* notice, this list of conditions and the following disclaimer in the
* documentation and/or other materials provided with the distribution.
* 3. The name of the company may not be used to endorse or promote
* products derived from this software without specific prior written
* permission.
*
* THIS SOFTWARE IS PROVIDED BY ARM LTD ``AS IS'' AND ANY EXPRESS OR IMPLIED
* WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF
* MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED.
* IN NO EVENT SHALL ARM LTD BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
* SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED
* TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
* PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF
* LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING
* NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
* SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
*
****************************************************************************/
.thumb
.syntax unified
.global memset
.type memset, %function
memset:
push {r4, r5, r6}
lsls r4, r0, #30
beq 10f
subs r4, r2, #1
cmp r2, #0
beq 9f
uxtb r5, r1
mov r3, r0
b 2f
1:
subs r2, r4, #1
cbz r4, 9f
mov r4, r2
2:
strb r5, [r3], #1
lsls r2, r3, #30
bne 1b
3:
cmp r4, #3
bls 7f
uxtb r5, r1
orr r5, r5, r5, lsl #8
cmp r4, #15
orr r5, r5, r5, lsl #16
bls 5f
mov r6, r4
add r2, r3, #16
4:
subs r6, r6, #16
cmp r6, #15
str r5, [r2, #-16]
str r5, [r2, #-12]
str r5, [r2, #-8]
str r5, [r2, #-4]
add r2, r2, #16
bhi 4b
sub r2, r4, #16
bic r2, r2, #15
and r4, r4, #15
adds r2, r2, #16
cmp r4, #3
add r3, r3, r2
bls 7f
5:
mov r6, r3
mov r2, r4
6:
subs r2, r2, #4
cmp r2, #3
str r5, [r6], #4
bhi 6b
subs r2, r4, #4
bic r2, r2, #3
adds r2, r2, #4
add r3, r3, r2
and r4, r4, #3
7:
cbz r4, 9f
uxtb r1, r1
add r4, r4, r3
8:
strb r1, [r3], #1
cmp r3, r4
bne 8b
9:
pop {r4, r5, r6}
bx lr
10:
mov r4, r2
mov r3, r0
b 3b
.size memset, . - memset

View file

@ -0,0 +1,381 @@
/****************************************************************************
* libs/libc/machine/arm/armv7-m/gnu/arch_strcmp.S
*
* Copyright (c) 2012-2014 ARM Ltd
* All rights reserved.
*
* Redistribution and use in source and binary forms, with or without
* modification, are permitted provided that the following conditions
* are met:
* 1. Redistributions of source code must retain the above copyright
* notice, this list of conditions and the following disclaimer.
* 2. Redistributions in binary form must reproduce the above copyright
* notice, this list of conditions and the following disclaimer in the
* documentation and/or other materials provided with the distribution.
* 3. The name of the company may not be used to endorse or promote
* products derived from this software without specific prior written
* permission.
*
* THIS SOFTWARE IS PROVIDED BY ARM LTD ``AS IS'' AND ANY EXPRESS OR IMPLIED
* WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF
* MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED.
* IN NO EVENT SHALL ARM LTD BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
* SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED
* TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
* PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF
* LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING
* NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
* SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
*
****************************************************************************/
/* Very similar to the generic code, but uses Thumb2 as implemented
in ARMv7-M. */
/* Parameters and result. */
#define src1 r0
#define src2 r1
#define result r0 /* Overlaps src1. */
/* Internal variables. */
#define data1 r2
#define data2 r3
#define tmp2 r5
#define tmp1 r12
#define syndrome r12 /* Overlaps tmp1 */
.thumb
.syntax unified
def_fn strcmp
.cfi_sections .debug_frame
.cfi_startproc
eor tmp1, src1, src2
tst tmp1, #3
/* Strings not at same byte offset from a word boundary. */
bne .Lstrcmp_unaligned
ands tmp1, src1, #3
bic src1, src1, #3
bic src2, src2, #3
ldr data1, [src1], #4
it eq
ldreq data2, [src2], #4
beq 4f
/* Although s1 and s2 have identical initial alignment, they are
not currently word aligned. Rather than comparing bytes,
make sure that any bytes fetched from before the addressed
bytes are forced to 0xff. Then they will always compare
equal. */
eor tmp1, tmp1, #3
mvn data2, #MSB
lsl tmp1, tmp1, #3
S2LO tmp1, data2, tmp1
ldr data2, [src2], #4
orr data1, data1, tmp1
orr data2, data2, tmp1
.p2align 2
/* Critical loop. */
4:
sub syndrome, data1, #0x01010101
cmp data1, data2
/* check for any zero bytes in first word */
itttt eq
biceq syndrome, syndrome, data1
tsteq syndrome, #0x80808080
ldreq data1, [src1], #4
ldreq data2, [src2], #4
beq 4b
2:
/* There's a zero or a different byte in the word */
S2HI result, data1, #24
S2LO data1, data1, #8
cmp result, #1
it cs
cmpcs result, data2, S2HI #24
it eq
S2LOEQ data2, data2, #8
beq 2b
/* On a big-endian machine, RESULT contains the desired byte in bits
0-7; on a little-endian machine they are in bits 24-31. In
both cases the other bits in RESULT are all zero. For DATA2 the
interesting byte is at the other end of the word, but the
other bits are not necessarily zero. We need a signed result
representing the differnece in the unsigned bytes, so for the
little-endian case we can't just shift the interesting bits
up. */
#ifdef __ARM_BIG_ENDIAN
sub result, result, data2, lsr #24
#else
and data2, data2, #255
lsrs result, result, #24
subs result, result, data2
#endif
bx lr
#if 0
/* The assembly code below is based on the following alogrithm. */
#ifdef __ARM_BIG_ENDIAN
#define RSHIFT <<
#define LSHIFT >>
#else
#define RSHIFT >>
#define LSHIFT <<
#endif
#define body(shift) \
mask = 0xffffffffU RSHIFT shift; \
data1 = *src1++; \
data2 = *src2++; \
do \
{ \
tmp2 = data1 & mask; \
if (__builtin_expect(tmp2 != data2 RSHIFT shift, 0)) \
{ \
data2 RSHIFT= shift; \
break; \
} \
if (__builtin_expect(((data1 - b1) & ~data1) & (b1 << 7), 0)) \
{ \
/* See comment in assembler below re syndrome on big-endian */\
if ((((data1 - b1) & ~data1) & (b1 << 7)) & mask) \
data2 RSHIFT= shift; \
else \
{ \
data2 = *src2; \
tmp2 = data1 RSHIFT (32 - shift); \
data2 = (data2 LSHIFT (32 - shift)) RSHIFT (32 - shift); \
} \
break; \
} \
data2 = *src2++; \
tmp2 ^= data1; \
if (__builtin_expect(tmp2 != data2 LSHIFT (32 - shift), 0)) \
{ \
tmp2 = data1 >> (32 - shift); \
data2 = (data2 << (32 - shift)) RSHIFT (32 - shift); \
break; \
} \
data1 = *src1++; \
} while (1)
const unsigned* src1;
const unsigned* src2;
unsigned data1, data2;
unsigned mask;
unsigned shift;
unsigned b1 = 0x01010101;
char c1, c2;
unsigned tmp2;
while (((unsigned) s1) & 3)
{
c1 = *s1++;
c2 = *s2++;
if (c1 == 0 || c1 != c2)
return c1 - (int)c2;
}
src1 = (unsigned*) (((unsigned)s1) & ~3);
src2 = (unsigned*) (((unsigned)s2) & ~3);
tmp2 = ((unsigned) s2) & 3;
if (tmp2 == 1)
{
body(8);
}
else if (tmp2 == 2)
{
body(16);
}
else
{
body (24);
}
do
{
#ifdef __ARM_BIG_ENDIAN
c1 = (char) tmp2 >> 24;
c2 = (char) data2 >> 24;
#else /* not __ARM_BIG_ENDIAN */
c1 = (char) tmp2;
c2 = (char) data2;
#endif /* not __ARM_BIG_ENDIAN */
tmp2 RSHIFT= 8;
data2 RSHIFT= 8;
} while (c1 != 0 && c1 == c2);
return c1 - c2;
#endif /* 0 */
/* First of all, compare bytes until src1(sp1) is word-aligned. */
.Lstrcmp_unaligned:
tst src1, #3
beq 2f
ldrb data1, [src1], #1
ldrb data2, [src2], #1
cmp data1, #1
it cs
cmpcs data1, data2
beq .Lstrcmp_unaligned
sub result, data1, data2
bx lr
2:
stmfd sp!, {r5}
.cfi_def_cfa_offset 4
.cfi_offset 5, -4
ldr data1, [src1], #4
and tmp2, src2, #3
bic src2, src2, #3
ldr data2, [src2], #4
cmp tmp2, #2
beq .Loverlap2
bhi .Loverlap1
/* Critical inner Loop: Block with 3 bytes initial overlap */
.p2align 2
.Loverlap3:
bic tmp2, data1, #MSB
cmp tmp2, data2, S2LO #8
sub syndrome, data1, #0x01010101
bic syndrome, syndrome, data1
bne 4f
ands syndrome, syndrome, #0x80808080
it eq
ldreq data2, [src2], #4
bne 5f
eor tmp2, tmp2, data1
cmp tmp2, data2, S2HI #24
bne 6f
ldr data1, [src1], #4
b .Loverlap3
4:
S2LO data2, data2, #8
b .Lstrcmp_tail
5:
#ifdef __ARM_BIG_ENDIAN
/* The syndrome value may contain false ones if the string ends
with the bytes 0x01 0x00. */
tst data1, #0xff000000
itt ne
tstne data1, #0x00ff0000
tstne data1, #0x0000ff00
beq .Lstrcmp_done_equal
#else
bics syndrome, syndrome, #0xff000000
bne .Lstrcmp_done_equal
#endif
ldrb data2, [src2]
S2LO tmp2, data1, #24
#ifdef __ARM_BIG_ENDIAN
lsl data2, data2, #24
#endif
b .Lstrcmp_tail
6:
S2LO tmp2, data1, #24
and data2, data2, #LSB
b .Lstrcmp_tail
/* Critical inner Loop: Block with 2 bytes initial overlap. */
.p2align 2
.Loverlap2:
S2HI tmp2, data1, #16
sub syndrome, data1, #0x01010101
S2LO tmp2, tmp2, #16
bic syndrome, syndrome, data1
cmp tmp2, data2, S2LO #16
bne 4f
ands syndrome, syndrome, #0x80808080
it eq
ldreq data2, [src2], #4
bne 5f
eor tmp2, tmp2, data1
cmp tmp2, data2, S2HI #16
bne 6f
ldr data1, [src1], #4
b .Loverlap2
5:
#ifdef __ARM_BIG_ENDIAN
/* The syndrome value may contain false ones if the string ends
with the bytes 0x01 0x00 */
tst data1, #0xff000000
it ne
tstne data1, #0x00ff0000
beq .Lstrcmp_done_equal
#else
lsls syndrome, syndrome, #16
bne .Lstrcmp_done_equal
#endif
ldrh data2, [src2]
S2LO tmp2, data1, #16
#ifdef __ARM_BIG_ENDIAN
lsl data2, data2, #16
#endif
b .Lstrcmp_tail
6:
S2HI data2, data2, #16
S2LO tmp2, data1, #16
4:
S2LO data2, data2, #16
b .Lstrcmp_tail
/* Critical inner Loop: Block with 1 byte initial overlap. */
.p2align 2
.Loverlap1:
and tmp2, data1, #LSB
cmp tmp2, data2, S2LO #24
sub syndrome, data1, #0x01010101
bic syndrome, syndrome, data1
bne 4f
ands syndrome, syndrome, #0x80808080
it eq
ldreq data2, [src2], #4
bne 5f
eor tmp2, tmp2, data1
cmp tmp2, data2, S2HI #8
bne 6f
ldr data1, [src1], #4
b .Loverlap1
4:
S2LO data2, data2, #24
b .Lstrcmp_tail
5:
/* The syndrome value may contain false ones if the string ends
with the bytes 0x01 0x00. */
tst data1, #LSB
beq .Lstrcmp_done_equal
ldr data2, [src2], #4
6:
S2LO tmp2, data1, #8
bic data2, data2, #MSB
b .Lstrcmp_tail
.Lstrcmp_done_equal:
mov result, #0
.cfi_remember_state
ldmfd sp!, {r5}
.cfi_restore 5
.cfi_def_cfa_offset 0
bx lr
.Lstrcmp_tail:
.cfi_restore_state
and r2, tmp2, #LSB
and result, data2, #LSB
cmp result, #1
it cs
cmpcs result, r2
itt eq
S2LOEQ tmp2, tmp2, #8
S2LOEQ data2, data2, #8
beq .Lstrcmp_tail
sub result, r2, result
ldmfd sp!, {r5}
.cfi_restore 5
.cfi_def_cfa_offset 0
bx lr
.cfi_endproc
.size strcmp, . - strcmp

View file

@ -0,0 +1,184 @@
/****************************************************************************
* libs/libc/machine/arm/armv7-m/gnu/arch_strlen.S
*
* Copyright (c) 2010-2011,2013 Linaro Limited
* All rights reserved.
*
* Redistribution and use in source and binary forms, with or without
* modification, are permitted provided that the following conditions
* are met:
*
* * Redistributions of source code must retain the above copyright
* notice, this list of conditions and the following disclaimer.
*
* * Redistributions in binary form must reproduce the above copyright
* notice, this list of conditions and the following disclaimer in the
* documentation and/or other materials provided with the distribution.
*
* * Neither the name of Linaro Limited nor the names of its
* contributors may be used to endorse or promote products derived
* from this software without specific prior written permission.
*
* THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
* "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
* LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
* A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
* HOLDER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
* SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
* LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
* DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
* THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
* (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
* OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
*
* Assumes:
* ARMv6T2 or ARMv7E-M, AArch32
*
* Copyright (c) 2015 ARM Ltd.
* All rights reserved.
*
* Redistribution and use in source and binary forms, with or without
* modification, are permitted provided that the following conditions are met:
* * Redistributions of source code must retain the above copyright
* notice, this list of conditions and the following disclaimer.
* * Redistributions in binary form must reproduce the above copyright
* notice, this list of conditions and the following disclaimer in the
* documentation and/or other materials provided with the distribution.
* * Neither the name of the Linaro nor the
* names of its contributors may be used to endorse or promote products
* derived from this software without specific prior written permission.
*
* THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
* "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
* LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
* A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
* HOLDER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
* SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
* LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
* DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
* THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
* (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
* OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
*
****************************************************************************/
#include "acle-compat.h"
.macro def_fn f p2align=0
.text
.p2align \p2align
.global \f
.type \f, %function
\f:
.endm
#ifdef __ARMEB__
#define S2LO lsl
#define S2HI lsr
#else
#define S2LO lsr
#define S2HI lsl
#endif
/* This code requires Thumb. */
#if __ARM_ARCH_PROFILE == 'M'
.arch armv7e-m
#else
.arch armv6t2
#endif
.eabi_attribute Tag_ARM_ISA_use, 0
.thumb
.syntax unified
/* Parameters and result. */
#define srcin r0
#define result r0
/* Internal variables. */
#define src r1
#define data1a r2
#define data1b r3
#define const_m1 r12
#define const_0 r4
#define tmp1 r4 /* Overlaps const_0 */
#define tmp2 r5
def_fn strlen p2align=6
pld [srcin, #0]
strd r4, r5, [sp, #-8]!
bic src, srcin, #7
mvn const_m1, #0
ands tmp1, srcin, #7 /* (8 - bytes) to alignment. */
pld [src, #32]
bne.w .Lmisaligned8
mov const_0, #0
mov result, #-8
.Lloop_aligned:
/* Bytes 0-7. */
ldrd data1a, data1b, [src]
pld [src, #64]
add result, result, #8
.Lstart_realigned:
uadd8 data1a, data1a, const_m1 /* Saturating GE<0:3> set. */
sel data1a, const_0, const_m1 /* Select based on GE<0:3>. */
uadd8 data1b, data1b, const_m1
sel data1b, data1a, const_m1 /* Only used if d1a == 0. */
cbnz data1b, .Lnull_found
/* Bytes 8-15. */
ldrd data1a, data1b, [src, #8]
uadd8 data1a, data1a, const_m1 /* Saturating GE<0:3> set. */
add result, result, #8
sel data1a, const_0, const_m1 /* Select based on GE<0:3>. */
uadd8 data1b, data1b, const_m1
sel data1b, data1a, const_m1 /* Only used if d1a == 0. */
cbnz data1b, .Lnull_found
/* Bytes 16-23. */
ldrd data1a, data1b, [src, #16]
uadd8 data1a, data1a, const_m1 /* Saturating GE<0:3> set. */
add result, result, #8
sel data1a, const_0, const_m1 /* Select based on GE<0:3>. */
uadd8 data1b, data1b, const_m1
sel data1b, data1a, const_m1 /* Only used if d1a == 0. */
cbnz data1b, .Lnull_found
/* Bytes 24-31. */
ldrd data1a, data1b, [src, #24]
add src, src, #32
uadd8 data1a, data1a, const_m1 /* Saturating GE<0:3> set. */
add result, result, #8
sel data1a, const_0, const_m1 /* Select based on GE<0:3>. */
uadd8 data1b, data1b, const_m1
sel data1b, data1a, const_m1 /* Only used if d1a == 0. */
cmp data1b, #0
beq .Lloop_aligned
.Lnull_found:
cmp data1a, #0
itt eq
addeq result, result, #4
moveq data1a, data1b
#ifndef __ARMEB__
rev data1a, data1a
#endif
clz data1a, data1a
ldrd r4, r5, [sp], #8
add result, result, data1a, lsr #3 /* Bits -> Bytes. */
bx lr
.Lmisaligned8:
ldrd data1a, data1b, [src]
and tmp2, tmp1, #3
rsb result, tmp1, #0
lsl tmp2, tmp2, #3 /* Bytes -> bits. */
tst tmp1, #4
pld [src, #64]
S2HI tmp2, const_m1, tmp2
orn data1a, data1a, tmp2
itt ne
ornne data1b, data1b, tmp2
movne data1a, const_m1
mov const_0, #0
b .Lstart_realigned
.size strlen, . - strlen

View file

@ -5,6 +5,54 @@
if ARCH_ARMV8M
config ARMV8M_MEMCHR
bool "Enable optimized memchr() for ARMv8-M"
default n
select LIBC_ARCH_MEMCHR
depends on ARCH_TOOLCHAIN_GNU
---help---
Enable optimized ARMv8-M specific memchr() library function
config ARMV8M_MEMCPY
bool "Enable optimized memcpy() for ARMv8-M"
default n
select LIBC_ARCH_MEMCPY
depends on ARCH_TOOLCHAIN_GNU
---help---
Enable optimized ARMv8-M specific memcpy() library function
config ARMV8M_MEMSET
bool "Enable optimized memset() for ARMv8-M"
default n
select LIBC_ARCH_MEMSET
depends on ARCH_TOOLCHAIN_GNU
---help---
Enable optimized ARMv8-M specific memset() library function
config ARMV8M_MEMMOVE
bool "Enable optimized memmove() for ARMv8-M"
default n
select LIBC_ARCH_MEMMOVE
depends on ARCH_TOOLCHAIN_GNU
---help---
Enable optimized ARMv8-M specific memmove() library function
config ARMV8M_STRCMP
bool "Enable optimized strcmp() for ARMv8-M"
default n
select LIBC_ARCH_STRCMP
depends on ARCH_TOOLCHAIN_GNU
---help---
Enable optimized ARMv8-M specific strcmp() library function
config ARMV8M_STRLEN
bool "Enable optimized strlen() for ARMv8-M"
default n
select LIBC_ARCH_STRLEN
depends on ARCH_TOOLCHAIN_GNU
---help---
Enable optimized ARMv8-M specific strlen() library function
config ARMV8M_LIBM
bool "Architecture specific optimizations"
default n

View file

@ -74,6 +74,30 @@ endif
endif # CONFIG_ARMV8M_LIBM
ifeq ($(CONFIG_ARMV8M_MEMCHR),y)
ASRCS += arch_memchr.S
endif
ifeq ($(CONFIG_ARMV8M_MEMCPY),y)
ASRCS += arch_memcpy.S
endif
ifeq ($(CONFIG_ARMV8M_MEMSET),y)
ASRCS += arch_memset.S
endif
ifeq ($(CONFIG_ARMV8M_MEMMOVE),y)
ASRCS += arch_memmove.S
endif
ifeq ($(CONFIG_ARMV8M_STRCMP),y)
ASRCS += arch_strcmp.S
endif
ifeq ($(CONFIG_ARMV8M_STRLEN),y)
ASRCS += arch_strlen.S
endif
ifeq ($(CONFIG_ARCH_TOOLCHAIN_GNU),y)
DEPPATH += --dep-path machine/arm/armv8-m/gnu
VPATH += :machine/arm/armv8-m/gnu

View file

@ -0,0 +1,389 @@
/****************************************************************************
* libs/libc/machine/arm/armv8-m/gnu/arch_memchr.S
*
* Copyright (c) 2010-2011, Linaro Limited
* All rights reserved.
*
* Redistribution and use in source and binary forms, with or without
* modification, are permitted provided that the following conditions
* are met:
*
* * Redistributions of source code must retain the above copyright
* notice, this list of conditions and the following disclaimer.
*
* * Redistributions in binary form must reproduce the above copyright
* notice, this list of conditions and the following disclaimer in the
* documentation and/or other materials provided with the distribution.
*
* * Neither the name of Linaro Limited nor the names of its
* contributors may be used to endorse or promote products derived
* from this software without specific prior written permission.
*
* THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
* "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
* LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
* A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
* HOLDER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
* SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
* LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
* DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
* THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
* (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
* OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
*
* Written by Dave Gilbert <david.gilbert@linaro.org>
*
* This memchr routine is optimised on a Cortex-A9 and should work on
* all ARMv7 processors. It has a fast path for short sizes, and has
* an optimised path for large data sets; the worst case is finding the
* match early in a large data set.
*
* Copyright (c) 2015 ARM Ltd.
* All rights reserved.
*
* Redistribution and use in source and binary forms, with or without
* modification, are permitted provided that the following conditions are met:
* * Redistributions of source code must retain the above copyright
* notice, this list of conditions and the following disclaimer.
* * Redistributions in binary form must reproduce the above copyright
* notice, this list of conditions and the following disclaimer in the
* documentation and/or other materials provided with the distribution.
* * Neither the name of the Linaro nor the
* names of its contributors may be used to endorse or promote products
* derived from this software without specific prior written permission.
*
* THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
* "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
* LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
* A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
* HOLDER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
* SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
* LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
* DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
* THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
* (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
* OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
*
****************************************************************************/
@ 2011-02-07 david.gilbert@linaro.org
@ Extracted from local git a5b438d861
@ 2011-07-14 david.gilbert@linaro.org
@ Import endianness fix from local git ea786f1b
@ 2011-10-11 david.gilbert@linaro.org
@ Import from cortex-strings bzr rev 63
@ Flip to ldrd (as suggested by Greta Yorsh)
@ Make conditional on CPU type
@ tidy
@ This code requires armv6t2 or later. Uses Thumb2.
.syntax unified
@ NOTE: This ifdef MUST match the one in memchr-stub.c
#if defined (__ARM_NEON__) || defined (__ARM_NEON)
#if __ARM_ARCH >= 8 && __ARM_ARCH_PROFILE == 'R'
.arch armv8-r
#else
.arch armv7-a
#endif
.fpu neon
/* Arguments */
#define srcin r0
#define chrin r1
#define cntin r2
/* Retval */
#define result r0 /* Live range does not overlap with srcin */
/* Working registers */
#define src r1 /* Live range does not overlap with chrin */
#define tmp r3
#define synd r0 /* No overlap with srcin or result */
#define soff r12
/* Working NEON registers */
#define vrepchr q0
#define vdata0 q1
#define vdata0_0 d2 /* Lower half of vdata0 */
#define vdata0_1 d3 /* Upper half of vdata0 */
#define vdata1 q2
#define vdata1_0 d4 /* Lower half of vhas_chr0 */
#define vdata1_1 d5 /* Upper half of vhas_chr0 */
#define vrepmask q3
#define vrepmask0 d6
#define vrepmask1 d7
#define vend q4
#define vend0 d8
#define vend1 d9
/*
* Core algorithm:
*
* For each 32-byte chunk we calculate a 32-bit syndrome value, with one bit per
* byte. Each bit is set if the relevant byte matched the requested character
* and cleared otherwise. Since the bits in the syndrome reflect exactly the
* order in which things occur in the original string, counting trailing zeros
* allows to identify exactly which byte has matched.
*/
.text
.thumb_func
.align 4
.p2align 4,,15
.global memchr
.type memchr,%function
memchr:
.cfi_sections .debug_frame
.cfi_startproc
/* Use a simple loop if there are less than 8 bytes to search. */
cmp cntin, #7
bhi .Llargestr
and chrin, chrin, #0xff
.Lsmallstr:
subs cntin, cntin, #1
blo .Lnotfound /* Return not found if reached end. */
ldrb tmp, [srcin], #1
cmp tmp, chrin
bne .Lsmallstr /* Loop again if not found. */
/* Otherwise fixup address and return. */
sub result, result, #1
bx lr
.Llargestr:
vdup.8 vrepchr, chrin /* Duplicate char across all lanes. */
/*
* Magic constant 0x8040201008040201 allows us to identify which lane
* matches the requested byte.
*/
movw tmp, #0x0201
movt tmp, #0x0804
lsl soff, tmp, #4
vmov vrepmask0, tmp, soff
vmov vrepmask1, tmp, soff
/* Work with aligned 32-byte chunks */
bic src, srcin, #31
ands soff, srcin, #31
beq .Lloopintro /* Go straight to main loop if it's aligned. */
/*
* Input string is not 32-byte aligned. We calculate the syndrome
* value for the aligned 32 bytes block containing the first bytes
* and mask the irrelevant part.
*/
vld1.8 {vdata0, vdata1}, [src:256]!
sub tmp, soff, #32
adds cntin, cntin, tmp
vceq.i8 vdata0, vdata0, vrepchr
vceq.i8 vdata1, vdata1, vrepchr
vand vdata0, vdata0, vrepmask
vand vdata1, vdata1, vrepmask
vpadd.i8 vdata0_0, vdata0_0, vdata0_1
vpadd.i8 vdata1_0, vdata1_0, vdata1_1
vpadd.i8 vdata0_0, vdata0_0, vdata1_0
vpadd.i8 vdata0_0, vdata0_0, vdata0_0
vmov synd, vdata0_0[0]
/* Clear the soff lower bits */
lsr synd, synd, soff
lsl synd, synd, soff
/* The first block can also be the last */
bls .Lmasklast
/* Have we found something already? */
cbnz synd, .Ltail
.Lloopintro:
vpush {vend}
/* 264/265 correspond to d8/d9 for q4 */
.cfi_adjust_cfa_offset 16
.cfi_rel_offset 264, 0
.cfi_rel_offset 265, 8
.p2align 3,,7
.Lloop:
vld1.8 {vdata0, vdata1}, [src:256]!
subs cntin, cntin, #32
vceq.i8 vdata0, vdata0, vrepchr
vceq.i8 vdata1, vdata1, vrepchr
/* If we're out of data we finish regardless of the result. */
bls .Lend
/* Use a fast check for the termination condition. */
vorr vend, vdata0, vdata1
vorr vend0, vend0, vend1
vmov synd, tmp, vend0
orrs synd, synd, tmp
/* We're not out of data, loop if we haven't found the character. */
beq .Lloop
.Lend:
vpop {vend}
.cfi_adjust_cfa_offset -16
.cfi_restore 264
.cfi_restore 265
/* Termination condition found, let's calculate the syndrome value. */
vand vdata0, vdata0, vrepmask
vand vdata1, vdata1, vrepmask
vpadd.i8 vdata0_0, vdata0_0, vdata0_1
vpadd.i8 vdata1_0, vdata1_0, vdata1_1
vpadd.i8 vdata0_0, vdata0_0, vdata1_0
vpadd.i8 vdata0_0, vdata0_0, vdata0_0
vmov synd, vdata0_0[0]
cbz synd, .Lnotfound
bhi .Ltail
.Lmasklast:
/* Clear the (-cntin) upper bits to avoid out-of-bounds matches. */
neg cntin, cntin
lsl synd, synd, cntin
lsrs synd, synd, cntin
it eq
moveq src, #0 /* If no match, set src to 0 so the retval is 0. */
.Ltail:
/* Count the trailing zeros using bit reversing */
rbit synd, synd
/* Compensate the last post-increment */
sub src, src, #32
/* Count the leading zeros */
clz synd, synd
/* Compute the potential result and return */
add result, src, synd
bx lr
.Lnotfound:
/* Set result to NULL if not found and return */
mov result, #0
bx lr
.cfi_endproc
.size memchr, . - memchr
#elif __ARM_ARCH_ISA_THUMB >= 2 && defined (__ARM_FEATURE_DSP)
#if __ARM_ARCH_PROFILE == 'M'
.arch armv7e-m
#else
.arch armv6t2
#endif
@ this lets us check a flag in a 00/ff byte easily in either endianness
#ifdef __ARMEB__
#define CHARTSTMASK(c) 1<<(31-(c*8))
#else
#define CHARTSTMASK(c) 1<<(c*8)
#endif
.text
.thumb
@ ---------------------------------------------------------------------------
.thumb_func
.align 2
.p2align 4,,15
.global memchr
.type memchr,%function
memchr:
@ r0 = start of memory to scan
@ r1 = character to look for
@ r2 = length
@ returns r0 = pointer to character or NULL if not found
and r1,r1,#0xff @ Don't trust the caller to pass a char
cmp r2,#16 @ If short don't bother with anything clever
blt 20f
tst r0, #7 @ If it's already aligned skip the next bit
beq 10f
@ Work up to an aligned point
5:
ldrb r3, [r0],#1
subs r2, r2, #1
cmp r3, r1
beq 50f @ If it matches exit found
tst r0, #7
cbz r2, 40f @ If we run off the end, exit not found
bne 5b @ If not aligned yet then do next byte
10:
@ We are aligned, we know we have at least 8 bytes to work with
push {r4,r5,r6,r7}
orr r1, r1, r1, lsl #8 @ expand the match word across all bytes
orr r1, r1, r1, lsl #16
bic r4, r2, #7 @ Number of double words to work with * 8
mvns r7, #0 @ all F's
movs r3, #0
15:
ldrd r5,r6,[r0],#8
subs r4, r4, #8
eor r5,r5, r1 @ r5,r6 have 00's where bytes match the target
eor r6,r6, r1
uadd8 r5, r5, r7 @ Par add 0xff - sets GE bits for bytes!=0
sel r5, r3, r7 @ bytes are 00 for none-00 bytes,
@ or ff for 00 bytes - NOTE INVERSION
uadd8 r6, r6, r7 @ Par add 0xff - sets GE bits for bytes!=0
sel r6, r5, r7 @ chained....bytes are 00 for none-00 bytes
@ or ff for 00 bytes - NOTE INVERSION
cbnz r6, 60f
bne 15b @ (Flags from the subs above)
pop {r4,r5,r6,r7}
and r1,r1,#0xff @ r1 back to a single character
and r2,r2,#7 @ Leave the count remaining as the number
@ after the double words have been done
20:
cbz r2, 40f @ 0 length or hit the end already then not found
21: @ Post aligned section, or just a short call
ldrb r3,[r0],#1
subs r2,r2,#1
eor r3,r3,r1 @ r3 = 0 if match - doesn't break flags from sub
cbz r3, 50f
bne 21b @ on r2 flags
40:
movs r0,#0 @ not found
bx lr
50:
subs r0,r0,#1 @ found
bx lr
60: @ We're here because the fast path found a hit
@ now we have to track down exactly which word it was
@ r0 points to the start of the double word after the one tested
@ r5 has the 00/ff pattern for the first word, r6 has the chained value
cmp r5, #0
itte eq
moveq r5, r6 @ the end is in the 2nd word
subeq r0,r0,#3 @ Points to 2nd byte of 2nd word
subne r0,r0,#7 @ or 2nd byte of 1st word
@ r0 currently points to the 2nd byte of the word containing the hit
tst r5, # CHARTSTMASK(0) @ 1st character
bne 61f
adds r0,r0,#1
tst r5, # CHARTSTMASK(1) @ 2nd character
ittt eq
addeq r0,r0,#1
tsteq r5, # (3<<15) @ 2nd & 3rd character
@ If not the 3rd must be the last one
addeq r0,r0,#1
61:
pop {r4,r5,r6,r7}
subs r0,r0,#1
bx lr
#else
/* Defined in memchr-stub.c. */
#endif

View file

@ -0,0 +1,345 @@
/****************************************************************************
* libs/libc/machine/arm/armv8-m/gnu/arch_memcpy.S
*
* Copyright (c) 2013 ARM Ltd
* All rights reserved.
*
* Redistribution and use in source and binary forms, with or without
* modification, are permitted provided that the following conditions
* are met:
* 1. Redistributions of source code must retain the above copyright
* notice, this list of conditions and the following disclaimer.
* 2. Redistributions in binary form must reproduce the above copyright
* notice, this list of conditions and the following disclaimer in the
* documentation and/or other materials provided with the distribution.
* 3. The name of the company may not be used to endorse or promote
* products derived from this software without specific prior written
* permission.
*
* THIS SOFTWARE IS PROVIDED BY ARM LTD ``AS IS'' AND ANY EXPRESS OR IMPLIED
* WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF
* MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED.
* IN NO EVENT SHALL ARM LTD BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
* SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED
* TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
* PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF
* LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING
* NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
* SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
*
****************************************************************************/
#ifndef __ARM_FEATURE_MVE
/* This memcpy routine is optimised for Cortex-M3/M4 cores with/without
unaligned access.
If compiled with GCC, this file should be enclosed within following
pre-processing check:
if defined (__ARM_ARCH_7M__) || defined (__ARM_ARCH_7EM__)
Prototype: void *memcpy (void *dst, const void *src, size_t count);
The job will be done in 5 steps.
Step 1: Align src/dest pointers, copy mis-aligned if fail to align both
Step 2: Repeatedly copy big block size of __OPT_BIG_BLOCK_SIZE
Step 3: Repeatedly copy big block size of __OPT_MID_BLOCK_SIZE
Step 4: Copy word by word
Step 5: Copy byte-to-byte
Tunable options:
__OPT_BIG_BLOCK_SIZE: Size of big block in words. Default to 64.
__OPT_MID_BLOCK_SIZE: Size of big block in words. Default to 16.
*/
#ifndef __OPT_BIG_BLOCK_SIZE
#define __OPT_BIG_BLOCK_SIZE (4 * 16)
#endif
#ifndef __OPT_MID_BLOCK_SIZE
#define __OPT_MID_BLOCK_SIZE (4 * 4)
#endif
#if __OPT_BIG_BLOCK_SIZE == 16
#define BEGIN_UNROLL_BIG_BLOCK \
.irp offset, 0,4,8,12
#elif __OPT_BIG_BLOCK_SIZE == 32
#define BEGIN_UNROLL_BIG_BLOCK \
.irp offset, 0,4,8,12,16,20,24,28
#elif __OPT_BIG_BLOCK_SIZE == 64
#define BEGIN_UNROLL_BIG_BLOCK \
.irp offset, 0,4,8,12,16,20,24,28,32,36,40,44,48,52,56,60
#else
#error "Illegal __OPT_BIG_BLOCK_SIZE"
#endif
#if __OPT_MID_BLOCK_SIZE == 8
#define BEGIN_UNROLL_MID_BLOCK \
.irp offset, 0,4
#elif __OPT_MID_BLOCK_SIZE == 16
#define BEGIN_UNROLL_MID_BLOCK \
.irp offset, 0,4,8,12
#else
#error "Illegal __OPT_MID_BLOCK_SIZE"
#endif
#define END_UNROLL .endr
#endif
.syntax unified
.text
.align 2
.global memcpy
.thumb
.thumb_func
.type memcpy, %function
memcpy:
@ r0: dst
@ r1: src
@ r2: len
#ifdef __ARM_FEATURE_MVE
mov r3, lr
wlstp.8 lr, r2, 2f
mov r2, r0
1:
vldrb.8 q0, [r1], #16
vstrb.8 q0, [r2], #16
letp lr, 1b
2:
bx r3
#else
#ifdef __ARM_FEATURE_UNALIGNED
/* In case of UNALIGNED access supported, ip is not used in
function body. */
mov ip, r0
#else
push {r0}
#endif
orr r3, r1, r0
ands r3, r3, #3
bne .Lmisaligned_copy
.Lbig_block:
subs r2, __OPT_BIG_BLOCK_SIZE
blo .Lmid_block
/* Kernel loop for big block copy */
.align 2
.Lbig_block_loop:
BEGIN_UNROLL_BIG_BLOCK
#ifdef __ARM_ARCH_7EM__
ldr r3, [r1], #4
str r3, [r0], #4
END_UNROLL
#else /* __ARM_ARCH_7M__ */
ldr r3, [r1, \offset]
str r3, [r0, \offset]
END_UNROLL
adds r0, __OPT_BIG_BLOCK_SIZE
adds r1, __OPT_BIG_BLOCK_SIZE
#endif
subs r2, __OPT_BIG_BLOCK_SIZE
bhs .Lbig_block_loop
.Lmid_block:
adds r2, __OPT_BIG_BLOCK_SIZE - __OPT_MID_BLOCK_SIZE
blo .Lcopy_word_by_word
/* Kernel loop for mid-block copy */
.align 2
.Lmid_block_loop:
BEGIN_UNROLL_MID_BLOCK
#ifdef __ARM_ARCH_7EM__
ldr r3, [r1], #4
str r3, [r0], #4
END_UNROLL
#else /* __ARM_ARCH_7M__ */
ldr r3, [r1, \offset]
str r3, [r0, \offset]
END_UNROLL
adds r0, __OPT_MID_BLOCK_SIZE
adds r1, __OPT_MID_BLOCK_SIZE
#endif
subs r2, __OPT_MID_BLOCK_SIZE
bhs .Lmid_block_loop
.Lcopy_word_by_word:
adds r2, __OPT_MID_BLOCK_SIZE - 4
blo .Lcopy_less_than_4
/* Kernel loop for small block copy */
.align 2
.Lcopy_word_by_word_loop:
ldr r3, [r1], #4
str r3, [r0], #4
subs r2, #4
bhs .Lcopy_word_by_word_loop
.Lcopy_less_than_4:
adds r2, #4
beq .Ldone
lsls r2, r2, #31
itt ne
ldrbne r3, [r1], #1
strbne r3, [r0], #1
bcc .Ldone
#ifdef __ARM_FEATURE_UNALIGNED
ldrh r3, [r1]
strh r3, [r0]
#else
ldrb r3, [r1]
strb r3, [r0]
ldrb r3, [r1, #1]
strb r3, [r0, #1]
#endif /* __ARM_FEATURE_UNALIGNED */
.Ldone:
#ifdef __ARM_FEATURE_UNALIGNED
mov r0, ip
#else
pop {r0}
#endif
bx lr
.align 2
.Lmisaligned_copy:
#ifdef __ARM_FEATURE_UNALIGNED
/* Define label DST_ALIGNED to BIG_BLOCK. It will go to aligned copy
once destination is adjusted to aligned. */
#define Ldst_aligned Lbig_block
/* Copy word by word using LDR when alignment can be done in hardware,
i.e., SCTLR.A is set, supporting unaligned access in LDR and STR. */
cmp r2, #8
blo .Lbyte_copy
/* if src is aligned, just go to the big block loop. */
lsls r3, r1, #30
beq .Ldst_aligned
#else
/* if len < 12, misalignment adjustment has more overhead than
just byte-to-byte copy. Also, len must >=8 to guarantee code
afterward work correctly. */
cmp r2, #12
blo .Lbyte_copy
#endif /* __ARM_FEATURE_UNALIGNED */
/* Align dst only, not trying to align src. That is the because
handling of aligned src and misaligned dst need more overhead than
otherwise. By doing this the worst case is when initial src is aligned,
additional up to 4 byte additional copy will executed, which is
acceptable. */
ands r3, r0, #3
beq .Ldst_aligned
rsb r3, #4
subs r2, r3
lsls r3, r3, #31
itt ne
ldrbne r3, [r1], #1
strbne r3, [r0], #1
bcc .Ldst_aligned
#ifdef __ARM_FEATURE_UNALIGNED
ldrh r3, [r1], #2
strh r3, [r0], #2
b .Ldst_aligned
#else
ldrb r3, [r1], #1
strb r3, [r0], #1
ldrb r3, [r1], #1
strb r3, [r0], #1
/* Now that dst is aligned */
.Ldst_aligned:
/* if r1 is aligned now, it means r0/r1 has the same misalignment,
and they are both aligned now. Go aligned copy. */
ands r3, r1, #3
beq .Lbig_block
/* dst is aligned, but src isn't. Misaligned copy. */
push {r4, r5}
subs r2, #4
/* Backward r1 by misaligned bytes, to make r1 aligned.
Since we need to restore r1 to unaligned address after the loop,
we need keep the offset bytes to ip and sub it from r1 afterward. */
subs r1, r3
rsb ip, r3, #4
/* Pre-load on word */
ldr r4, [r1], #4
cmp r3, #2
beq .Lmisaligned_copy_2_2
cmp r3, #3
beq .Lmisaligned_copy_3_1
.macro mis_src_copy shift
1:
#ifdef __ARM_BIG_ENDIAN
lsls r4, r4, \shift
#else
lsrs r4, r4, \shift
#endif
ldr r3, [r1], #4
#ifdef __ARM_BIG_ENDIAN
lsrs r5, r3, 32-\shift
#else
lsls r5, r3, 32-\shift
#endif
orr r4, r4, r5
str r4, [r0], #4
mov r4, r3
subs r2, #4
bhs 1b
.endm
.Lmisaligned_copy_1_3:
mis_src_copy shift=8
b .Lsrc_misaligned_tail
.Lmisaligned_copy_3_1:
mis_src_copy shift=24
b .Lsrc_misaligned_tail
.Lmisaligned_copy_2_2:
/* For 2_2 misalignment, ldr is still faster than 2 x ldrh. */
mis_src_copy shift=16
.Lsrc_misaligned_tail:
adds r2, #4
subs r1, ip
pop {r4, r5}
#endif /* __ARM_FEATURE_UNALIGNED */
.Lbyte_copy:
subs r2, #4
blo .Lcopy_less_than_4
.Lbyte_copy_loop:
subs r2, #1
ldrb r3, [r1], #1
strb r3, [r0], #1
bhs .Lbyte_copy_loop
ldrb r3, [r1]
strb r3, [r0]
ldrb r3, [r1, #1]
strb r3, [r0, #1]
ldrb r3, [r1, #2]
strb r3, [r0, #2]
#ifdef __ARM_FEATURE_UNALIGNED
mov r0, ip
#else
pop {r0}
#endif
bx lr
#endif
.size memcpy, .-memcpy

View file

@ -0,0 +1,66 @@
/****************************************************************************
* libs/libc/machine/arm/armv8-m/gnu/arch_memmove.S
*
* Copyright (c) 2015 ARM Ltd
* All rights reserved.
*
* Redistribution and use in source and binary forms, with or without
* modification, are permitted provided that the following conditions
* are met:
* 1. Redistributions of source code must retain the above copyright
* notice, this list of conditions and the following disclaimer.
* 2. Redistributions in binary form must reproduce the above copyright
* notice, this list of conditions and the following disclaimer in the
* documentation and/or other materials provided with the distribution.
* 3. The name of the company may not be used to endorse or promote
* products derived from this software without specific prior written
* permission.
*
* THIS SOFTWARE IS PROVIDED BY ARM LTD ``AS IS'' AND ANY EXPRESS OR IMPLIED
* WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF
* MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED.
* IN NO EVENT SHALL ARM LTD BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
* SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED
* TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
* PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF
* LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING
* NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
* SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
*
****************************************************************************/
.thumb
.syntax unified
.global memmove
.type memmove, %function
memmove:
cmp r0, r1
push {r4}
bls 3f
adds r3, r1, r2
cmp r0, r3
bcs 3f
adds r1, r0, r2
cbz r2, 2f
subs r2, r3, r2
1:
ldrb r4, [r3, #-1]!
cmp r2, r3
strb r4, [r1, #-1]!
bne 1b
2:
pop {r4}
bx lr
3:
cmp r2, #0
beq 2b
add r2, r2, r1
subs r3, r0, #1
4:
ldrb r4, [r1], #1
cmp r2, r1
strb r4, [r3, #1]!
bne 4b
pop {r4}
bx lr
.size memmove, . - memmove

View file

@ -0,0 +1,120 @@
/****************************************************************************
* libs/libc/machine/arm/armv8-m/gnu/arch_memset.S
*
* Copyright (c) 2015 ARM Ltd
* All rights reserved.
*
* Redistribution and use in source and binary forms, with or without
* modification, are permitted provided that the following conditions
* are met:
* 1. Redistributions of source code must retain the above copyright
* notice, this list of conditions and the following disclaimer.
* 2. Redistributions in binary form must reproduce the above copyright
* notice, this list of conditions and the following disclaimer in the
* documentation and/or other materials provided with the distribution.
* 3. The name of the company may not be used to endorse or promote
* products derived from this software without specific prior written
* permission.
*
* THIS SOFTWARE IS PROVIDED BY ARM LTD ``AS IS'' AND ANY EXPRESS OR IMPLIED
* WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF
* MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED.
* IN NO EVENT SHALL ARM LTD BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
* SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED
* TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
* PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF
* LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING
* NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
* SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
*
****************************************************************************/
.thumb
.syntax unified
.global memset
.type memset, %function
memset:
#ifdef __ARM_FEATURE_MVE
vdup.8 q0, r1
mov r3, lr
mov r1, r0
wlstp.8 lr, r2, 2f
1:
vstrb.8 q0, [r1], #16
letp lr, 1b
2:
bx r3
#else
push {r4, r5, r6}
lsls r4, r0, #30
beq 10f
subs r4, r2, #1
cmp r2, #0
beq 9f
uxtb r5, r1
mov r3, r0
b 2f
1:
subs r2, r4, #1
cbz r4, 9f
mov r4, r2
2:
strb r5, [r3], #1
lsls r2, r3, #30
bne 1b
3:
cmp r4, #3
bls 7f
uxtb r5, r1
orr r5, r5, r5, lsl #8
cmp r4, #15
orr r5, r5, r5, lsl #16
bls 5f
mov r6, r4
add r2, r3, #16
4:
subs r6, r6, #16
cmp r6, #15
str r5, [r2, #-16]
str r5, [r2, #-12]
str r5, [r2, #-8]
str r5, [r2, #-4]
add r2, r2, #16
bhi 4b
sub r2, r4, #16
bic r2, r2, #15
and r4, r4, #15
adds r2, r2, #16
cmp r4, #3
add r3, r3, r2
bls 7f
5:
mov r6, r3
mov r2, r4
6:
subs r2, r2, #4
cmp r2, #3
str r5, [r6], #4
bhi 6b
subs r2, r4, #4
bic r2, r2, #3
adds r2, r2, #4
add r3, r3, r2
and r4, r4, #3
7:
cbz r4, 9f
uxtb r1, r1
add r4, r4, r3
8:
strb r1, [r3], #1
cmp r3, r4
bne 8b
9:
pop {r4, r5, r6}
bx lr
10:
mov r4, r2
mov r3, r0
b 3b
#endif
.size memset, . - memset

View file

@ -0,0 +1,411 @@
/****************************************************************************
* libs/libc/machine/arm/armv8-m/gnu/arch_strcmp.S
*
* Copyright (c) 2012-2014 ARM Ltd
* All rights reserved.
*
* Redistribution and use in source and binary forms, with or without
* modification, are permitted provided that the following conditions
* are met:
* 1. Redistributions of source code must retain the above copyright
* notice, this list of conditions and the following disclaimer.
* 2. Redistributions in binary form must reproduce the above copyright
* notice, this list of conditions and the following disclaimer in the
* documentation and/or other materials provided with the distribution.
* 3. The name of the company may not be used to endorse or promote
* products derived from this software without specific prior written
* permission.
*
* THIS SOFTWARE IS PROVIDED BY ARM LTD ``AS IS'' AND ANY EXPRESS OR IMPLIED
* WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF
* MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED.
* IN NO EVENT SHALL ARM LTD BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
* SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED
* TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
* PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF
* LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING
* NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
* SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
*
****************************************************************************/
#ifdef __ARM_BIG_ENDIAN
#define S2LO lsl
#define S2LOEQ lsleq
#define S2HI lsr
#define MSB 0x000000ff
#define LSB 0xff000000
#define BYTE0_OFFSET 24
#define BYTE1_OFFSET 16
#define BYTE2_OFFSET 8
#define BYTE3_OFFSET 0
#else /* not __ARM_BIG_ENDIAN */
#define S2LO lsr
#define S2LOEQ lsreq
#define S2HI lsl
#define BYTE0_OFFSET 0
#define BYTE1_OFFSET 8
#define BYTE2_OFFSET 16
#define BYTE3_OFFSET 24
#define MSB 0xff000000
#define LSB 0x000000ff
#endif /* not __ARM_BIG_ENDIAN */
.macro def_fn f p2align=0
.text
.p2align \p2align
.global \f
.type \f, %function
\f:
.endm
/* Very similar to the generic code, but uses Thumb2 as implemented
in ARMv7-M. */
/* Parameters and result. */
#define src1 r0
#define src2 r1
#define result r0 /* Overlaps src1. */
/* Internal variables. */
#define data1 r2
#define data2 r3
#define tmp2 r5
#define tmp1 r12
#define syndrome r12 /* Overlaps tmp1 */
.thumb
.syntax unified
def_fn strcmp
.cfi_sections .debug_frame
.cfi_startproc
eor tmp1, src1, src2
tst tmp1, #3
/* Strings not at same byte offset from a word boundary. */
bne .Lstrcmp_unaligned
ands tmp1, src1, #3
bic src1, src1, #3
bic src2, src2, #3
ldr data1, [src1], #4
it eq
ldreq data2, [src2], #4
beq 4f
/* Although s1 and s2 have identical initial alignment, they are
not currently word aligned. Rather than comparing bytes,
make sure that any bytes fetched from before the addressed
bytes are forced to 0xff. Then they will always compare
equal. */
eor tmp1, tmp1, #3
mvn data2, #MSB
lsl tmp1, tmp1, #3
S2LO tmp1, data2, tmp1
ldr data2, [src2], #4
orr data1, data1, tmp1
orr data2, data2, tmp1
.p2align 2
/* Critical loop. */
4:
sub syndrome, data1, #0x01010101
cmp data1, data2
/* check for any zero bytes in first word */
itttt eq
biceq syndrome, syndrome, data1
tsteq syndrome, #0x80808080
ldreq data1, [src1], #4
ldreq data2, [src2], #4
beq 4b
2:
/* There's a zero or a different byte in the word */
S2HI result, data1, #24
S2LO data1, data1, #8
cmp result, #1
it cs
cmpcs result, data2, S2HI #24
it eq
S2LOEQ data2, data2, #8
beq 2b
/* On a big-endian machine, RESULT contains the desired byte in bits
0-7; on a little-endian machine they are in bits 24-31. In
both cases the other bits in RESULT are all zero. For DATA2 the
interesting byte is at the other end of the word, but the
other bits are not necessarily zero. We need a signed result
representing the differnece in the unsigned bytes, so for the
little-endian case we can't just shift the interesting bits
up. */
#ifdef __ARM_BIG_ENDIAN
sub result, result, data2, lsr #24
#else
and data2, data2, #255
lsrs result, result, #24
subs result, result, data2
#endif
bx lr
#if 0
/* The assembly code below is based on the following alogrithm. */
#ifdef __ARM_BIG_ENDIAN
#define RSHIFT <<
#define LSHIFT >>
#else
#define RSHIFT >>
#define LSHIFT <<
#endif
#define body(shift) \
mask = 0xffffffffU RSHIFT shift; \
data1 = *src1++; \
data2 = *src2++; \
do \
{ \
tmp2 = data1 & mask; \
if (__builtin_expect(tmp2 != data2 RSHIFT shift, 0)) \
{ \
data2 RSHIFT= shift; \
break; \
} \
if (__builtin_expect(((data1 - b1) & ~data1) & (b1 << 7), 0)) \
{ \
/* See comment in assembler below re syndrome on big-endian */\
if ((((data1 - b1) & ~data1) & (b1 << 7)) & mask) \
data2 RSHIFT= shift; \
else \
{ \
data2 = *src2; \
tmp2 = data1 RSHIFT (32 - shift); \
data2 = (data2 LSHIFT (32 - shift)) RSHIFT (32 - shift); \
} \
break; \
} \
data2 = *src2++; \
tmp2 ^= data1; \
if (__builtin_expect(tmp2 != data2 LSHIFT (32 - shift), 0)) \
{ \
tmp2 = data1 >> (32 - shift); \
data2 = (data2 << (32 - shift)) RSHIFT (32 - shift); \
break; \
} \
data1 = *src1++; \
} while (1)
const unsigned* src1;
const unsigned* src2;
unsigned data1, data2;
unsigned mask;
unsigned shift;
unsigned b1 = 0x01010101;
char c1, c2;
unsigned tmp2;
while (((unsigned) s1) & 3)
{
c1 = *s1++;
c2 = *s2++;
if (c1 == 0 || c1 != c2)
return c1 - (int)c2;
}
src1 = (unsigned*) (((unsigned)s1) & ~3);
src2 = (unsigned*) (((unsigned)s2) & ~3);
tmp2 = ((unsigned) s2) & 3;
if (tmp2 == 1)
{
body(8);
}
else if (tmp2 == 2)
{
body(16);
}
else
{
body (24);
}
do
{
#ifdef __ARM_BIG_ENDIAN
c1 = (char) tmp2 >> 24;
c2 = (char) data2 >> 24;
#else /* not __ARM_BIG_ENDIAN */
c1 = (char) tmp2;
c2 = (char) data2;
#endif /* not __ARM_BIG_ENDIAN */
tmp2 RSHIFT= 8;
data2 RSHIFT= 8;
} while (c1 != 0 && c1 == c2);
return c1 - c2;
#endif /* 0 */
/* First of all, compare bytes until src1(sp1) is word-aligned. */
.Lstrcmp_unaligned:
tst src1, #3
beq 2f
ldrb data1, [src1], #1
ldrb data2, [src2], #1
cmp data1, #1
it cs
cmpcs data1, data2
beq .Lstrcmp_unaligned
sub result, data1, data2
bx lr
2:
stmfd sp!, {r5}
.cfi_def_cfa_offset 4
.cfi_offset 5, -4
ldr data1, [src1], #4
and tmp2, src2, #3
bic src2, src2, #3
ldr data2, [src2], #4
cmp tmp2, #2
beq .Loverlap2
bhi .Loverlap1
/* Critical inner Loop: Block with 3 bytes initial overlap */
.p2align 2
.Loverlap3:
bic tmp2, data1, #MSB
cmp tmp2, data2, S2LO #8
sub syndrome, data1, #0x01010101
bic syndrome, syndrome, data1
bne 4f
ands syndrome, syndrome, #0x80808080
it eq
ldreq data2, [src2], #4
bne 5f
eor tmp2, tmp2, data1
cmp tmp2, data2, S2HI #24
bne 6f
ldr data1, [src1], #4
b .Loverlap3
4:
S2LO data2, data2, #8
b .Lstrcmp_tail
5:
#ifdef __ARM_BIG_ENDIAN
/* The syndrome value may contain false ones if the string ends
with the bytes 0x01 0x00. */
tst data1, #0xff000000
itt ne
tstne data1, #0x00ff0000
tstne data1, #0x0000ff00
beq .Lstrcmp_done_equal
#else
bics syndrome, syndrome, #0xff000000
bne .Lstrcmp_done_equal
#endif
ldrb data2, [src2]
S2LO tmp2, data1, #24
#ifdef __ARM_BIG_ENDIAN
lsl data2, data2, #24
#endif
b .Lstrcmp_tail
6:
S2LO tmp2, data1, #24
and data2, data2, #LSB
b .Lstrcmp_tail
/* Critical inner Loop: Block with 2 bytes initial overlap. */
.p2align 2
.Loverlap2:
S2HI tmp2, data1, #16
sub syndrome, data1, #0x01010101
S2LO tmp2, tmp2, #16
bic syndrome, syndrome, data1
cmp tmp2, data2, S2LO #16
bne 4f
ands syndrome, syndrome, #0x80808080
it eq
ldreq data2, [src2], #4
bne 5f
eor tmp2, tmp2, data1
cmp tmp2, data2, S2HI #16
bne 6f
ldr data1, [src1], #4
b .Loverlap2
5:
#ifdef __ARM_BIG_ENDIAN
/* The syndrome value may contain false ones if the string ends
with the bytes 0x01 0x00 */
tst data1, #0xff000000
it ne
tstne data1, #0x00ff0000
beq .Lstrcmp_done_equal
#else
lsls syndrome, syndrome, #16
bne .Lstrcmp_done_equal
#endif
ldrh data2, [src2]
S2LO tmp2, data1, #16
#ifdef __ARM_BIG_ENDIAN
lsl data2, data2, #16
#endif
b .Lstrcmp_tail
6:
S2HI data2, data2, #16
S2LO tmp2, data1, #16
4:
S2LO data2, data2, #16
b .Lstrcmp_tail
/* Critical inner Loop: Block with 1 byte initial overlap. */
.p2align 2
.Loverlap1:
and tmp2, data1, #LSB
cmp tmp2, data2, S2LO #24
sub syndrome, data1, #0x01010101
bic syndrome, syndrome, data1
bne 4f
ands syndrome, syndrome, #0x80808080
it eq
ldreq data2, [src2], #4
bne 5f
eor tmp2, tmp2, data1
cmp tmp2, data2, S2HI #8
bne 6f
ldr data1, [src1], #4
b .Loverlap1
4:
S2LO data2, data2, #24
b .Lstrcmp_tail
5:
/* The syndrome value may contain false ones if the string ends
with the bytes 0x01 0x00. */
tst data1, #LSB
beq .Lstrcmp_done_equal
ldr data2, [src2], #4
6:
S2LO tmp2, data1, #8
bic data2, data2, #MSB
b .Lstrcmp_tail
.Lstrcmp_done_equal:
mov result, #0
.cfi_remember_state
ldmfd sp!, {r5}
.cfi_restore 5
.cfi_def_cfa_offset 0
bx lr
.Lstrcmp_tail:
.cfi_restore_state
and r2, tmp2, #LSB
and result, data2, #LSB
cmp result, #1
it cs
cmpcs result, r2
itt eq
S2LOEQ tmp2, tmp2, #8
S2LOEQ data2, data2, #8
beq .Lstrcmp_tail
sub result, r2, result
ldmfd sp!, {r5}
.cfi_restore 5
.cfi_def_cfa_offset 0
bx lr
.cfi_endproc
.size strcmp, . - strcmp

View file

@ -0,0 +1,182 @@
/****************************************************************************
* libs/libc/machine/arm/armv8-m/gnu/arch_strlen.S
*
* Copyright (c) 2010-2011,2013 Linaro Limited
* All rights reserved.
*
* Redistribution and use in source and binary forms, with or without
* modification, are permitted provided that the following conditions
* are met:
*
* * Redistributions of source code must retain the above copyright
* notice, this list of conditions and the following disclaimer.
*
* * Redistributions in binary form must reproduce the above copyright
* notice, this list of conditions and the following disclaimer in the
* documentation and/or other materials provided with the distribution.
*
* * Neither the name of Linaro Limited nor the names of its
* contributors may be used to endorse or promote products derived
* from this software without specific prior written permission.
*
* THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
* "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
* LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
* A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
* HOLDER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
* SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
* LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
* DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
* THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
* (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
* OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
*
* Assumes:
* ARMv6T2 or ARMv7E-M, AArch32
*
* Copyright (c) 2015 ARM Ltd.
* All rights reserved.
*
* Redistribution and use in source and binary forms, with or without
* modification, are permitted provided that the following conditions are met:
* * Redistributions of source code must retain the above copyright
* notice, this list of conditions and the following disclaimer.
* * Redistributions in binary form must reproduce the above copyright
* notice, this list of conditions and the following disclaimer in the
* documentation and/or other materials provided with the distribution.
* * Neither the name of the Linaro nor the
* names of its contributors may be used to endorse or promote products
* derived from this software without specific prior written permission.
*
* THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
* "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
* LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
* A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
* HOLDER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
* SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
* LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
* DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
* THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
* (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
* OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
*
****************************************************************************/
.macro def_fn f p2align=0
.text
.p2align \p2align
.global \f
.type \f, %function
\f:
.endm
#ifdef __ARMEB__
#define S2LO lsl
#define S2HI lsr
#else
#define S2LO lsr
#define S2HI lsl
#endif
/* This code requires Thumb. */
#if __ARM_ARCH_PROFILE == 'M'
.arch armv7e-m
#else
.arch armv6t2
#endif
.eabi_attribute Tag_ARM_ISA_use, 0
.thumb
.syntax unified
/* Parameters and result. */
#define srcin r0
#define result r0
/* Internal variables. */
#define src r1
#define data1a r2
#define data1b r3
#define const_m1 r12
#define const_0 r4
#define tmp1 r4 /* Overlaps const_0 */
#define tmp2 r5
def_fn strlen p2align=6
pld [srcin, #0]
strd r4, r5, [sp, #-8]!
bic src, srcin, #7
mvn const_m1, #0
ands tmp1, srcin, #7 /* (8 - bytes) to alignment. */
pld [src, #32]
bne.w .Lmisaligned8
mov const_0, #0
mov result, #-8
.Lloop_aligned:
/* Bytes 0-7. */
ldrd data1a, data1b, [src]
pld [src, #64]
add result, result, #8
.Lstart_realigned:
uadd8 data1a, data1a, const_m1 /* Saturating GE<0:3> set. */
sel data1a, const_0, const_m1 /* Select based on GE<0:3>. */
uadd8 data1b, data1b, const_m1
sel data1b, data1a, const_m1 /* Only used if d1a == 0. */
cbnz data1b, .Lnull_found
/* Bytes 8-15. */
ldrd data1a, data1b, [src, #8]
uadd8 data1a, data1a, const_m1 /* Saturating GE<0:3> set. */
add result, result, #8
sel data1a, const_0, const_m1 /* Select based on GE<0:3>. */
uadd8 data1b, data1b, const_m1
sel data1b, data1a, const_m1 /* Only used if d1a == 0. */
cbnz data1b, .Lnull_found
/* Bytes 16-23. */
ldrd data1a, data1b, [src, #16]
uadd8 data1a, data1a, const_m1 /* Saturating GE<0:3> set. */
add result, result, #8
sel data1a, const_0, const_m1 /* Select based on GE<0:3>. */
uadd8 data1b, data1b, const_m1
sel data1b, data1a, const_m1 /* Only used if d1a == 0. */
cbnz data1b, .Lnull_found
/* Bytes 24-31. */
ldrd data1a, data1b, [src, #24]
add src, src, #32
uadd8 data1a, data1a, const_m1 /* Saturating GE<0:3> set. */
add result, result, #8
sel data1a, const_0, const_m1 /* Select based on GE<0:3>. */
uadd8 data1b, data1b, const_m1
sel data1b, data1a, const_m1 /* Only used if d1a == 0. */
cmp data1b, #0
beq .Lloop_aligned
.Lnull_found:
cmp data1a, #0
itt eq
addeq result, result, #4
moveq data1a, data1b
#ifndef __ARMEB__
rev data1a, data1a
#endif
clz data1a, data1a
ldrd r4, r5, [sp], #8
add result, result, data1a, lsr #3 /* Bits -> Bytes. */
bx lr
.Lmisaligned8:
ldrd data1a, data1b, [src]
and tmp2, tmp1, #3
rsb result, tmp1, #0
lsl tmp2, tmp2, #3 /* Bytes -> bits. */
tst tmp1, #4
pld [src, #64]
S2HI tmp2, const_m1, tmp2
orn data1a, data1a, tmp2
itt ne
ornne data1b, data1b, tmp2
movne data1a, const_m1
mov const_0, #0
b .Lstart_realigned
.size strlen, . - strlen