[PATCH v5 2/8] newlib: libc: strcmp M-profile PACBTI-enablement
Christophe Lyon
christophe.lyon@arm.com
Fri Jan 6 11:09:33 GMT 2023
On 12/21/22 12:21, Victor L. Do Nascimento wrote:
> Add function prologue/epilogue to conditionally add BTI landing pads
> and/or PAC code generation & authentication instructions depending on
> compilation flags.
>
> This patch enables PACBTI for all relevant variants of strcmp:
> * Newlib for armv8.1-m.main+pacbti
> * Newlib for armv8.1-m.main+pacbti+mve
> * Newlib-nano
> ---
> newlib/libc/machine/arm/strcmp-arm-tiny.S | 8 +++-
> newlib/libc/machine/arm/strcmp-armv7.S | 57 ++++++++++++++---------
> newlib/libc/machine/arm/strcmp-armv7m.S | 26 +++++++----
> 3 files changed, 60 insertions(+), 31 deletions(-)
>
> diff --git a/newlib/libc/machine/arm/strcmp-arm-tiny.S b/newlib/libc/machine/arm/strcmp-arm-tiny.S
> index 607a41daf..0bd2a2e6e 100644
> --- a/newlib/libc/machine/arm/strcmp-arm-tiny.S
> +++ b/newlib/libc/machine/arm/strcmp-arm-tiny.S
> @@ -29,10 +29,14 @@
> /* Tiny version of strcmp in ARM state. Used only when optimizing
> for size. Also supports Thumb-2. */
>
> +#include "arm_asm.h"
> +
> .syntax unified
> def_fn strcmp
> + .fnstart
> .cfi_sections .debug_frame
> .cfi_startproc
> + prologue
why no push_ip=HAVE_PAC_LEAF ?
Is that because this is a tiny version and we don't want to use an extra
push ip even it pacbti is enabled?
> 1:
> ldrb r2, [r0], #1
> ldrb r3, [r1], #1
> @@ -42,6 +46,8 @@ def_fn strcmp
> beq 1b
> 2:
> subs r0, r2, r3
> - bx lr
> + epilogue
> .cfi_endproc
> + .cantunwind
> + .fnend
> .size strcmp, . - strcmp
> diff --git a/newlib/libc/machine/arm/strcmp-armv7.S b/newlib/libc/machine/arm/strcmp-armv7.S
> index 2f93bfb73..7cafca151 100644
> --- a/newlib/libc/machine/arm/strcmp-armv7.S
> +++ b/newlib/libc/machine/arm/strcmp-armv7.S
> @@ -45,6 +45,8 @@
> .thumb
> .syntax unified
>
> +#include "arm_asm.h"
> +
> /* Parameters and result. */
> #define src1 r0
> #define src2 r1
> @@ -91,8 +93,9 @@
> ldrd r4, r5, [sp], #16
> .cfi_restore 4
> .cfi_restore 5
> + .cfi_adjust_cfa_offset -16
> sub result, result, r1, lsr #24
> - bx lr
> + epilogue push_ip=HAVE_PAC_LEAF
> #else
> /* To use the big-endian trick we'd have to reverse all three words.
> that's slower than this approach. */
> @@ -112,22 +115,21 @@
> ldrd r4, r5, [sp], #16
> .cfi_restore 4
> .cfi_restore 5
> + .cfi_adjust_cfa_offset -16
> sub result, result, r1
>
> - bx lr
> + epilogue push_ip=HAVE_PAC_LEAF
> #endif
> .endm
>
> +
> .text
> .p2align 5
> -.Lstrcmp_start_addr:
> -#ifndef STRCMP_NO_PRECHECK
> -.Lfastpath_exit:
> - sub r0, r2, r3
> - bx lr
> - nop
> -#endif
> def_fn strcmp
> + .fnstart
> + .cfi_sections .debug_frame
> + .cfi_startproc
> + prologue push_ip=HAVE_PAC_LEAF
> #ifndef STRCMP_NO_PRECHECK
> ldrb r2, [src1]
> ldrb r3, [src2]
> @@ -136,16 +138,14 @@ def_fn strcmp
> cmpcs r2, r3
> bne .Lfastpath_exit
> #endif
> - .cfi_sections .debug_frame
> - .cfi_startproc
> strd r4, r5, [sp, #-16]!
> - .cfi_def_cfa_offset 16
> - .cfi_offset 4, -16
> - .cfi_offset 5, -12
> + .cfi_adjust_cfa_offset 16
> + .cfi_rel_offset 4, 0
> + .cfi_rel_offset 5, 4
> orr tmp1, src1, src2
> strd r6, r7, [sp, #8]
> - .cfi_offset 6, -8
> - .cfi_offset 7, -4
> + .cfi_rel_offset 6, 8
> + .cfi_rel_offset 7, 12
> mvn const_m1, #0
> lsl r2, tmp1, #29
> cbz r2, .Lloop_aligned8
> @@ -270,7 +270,6 @@ def_fn strcmp
> ldr data1, [src1], #4
> beq .Laligned_m2
> bcs .Laligned_m1
> -
> #ifdef STRCMP_NO_PRECHECK
> ldrb data2, [src2, #1]
> uxtb tmp1, data1, ror #BYTE1_OFFSET
> @@ -314,10 +313,19 @@ def_fn strcmp
> mov result, tmp1
> ldr r4, [sp], #16
> .cfi_restore 4
> - bx lr
> + .cfi_adjust_cfa_offset -16
> + epilogue push_ip=HAVE_PAC_LEAF
>
> #ifndef STRCMP_NO_PRECHECK
> +.Lfastpath_exit:
> + .cfi_restore_state
> + .cfi_remember_state
> + sub r0, r2, r3
> + epilogue push_ip=HAVE_PAC_LEAF
> +
> .Laligned_m1:
> + .cfi_restore_state
> + .cfi_remember_state
> add src2, src2, #4
> #endif
> .Lsrc1_aligned:
> @@ -364,8 +372,9 @@ def_fn strcmp
> /* R6/7 Not used in this sequence. */
> .cfi_restore 6
> .cfi_restore 7
> + .cfi_adjust_cfa_offset -16
> neg result, result
> - bx lr
> + epilogue push_ip=HAVE_PAC_LEAF
>
> 6:
> .cfi_restore_state
> @@ -441,7 +450,8 @@ def_fn strcmp
> /* R6/7 not used in this sequence. */
> .cfi_restore 6
> .cfi_restore 7
> - bx lr
> + .cfi_adjust_cfa_offset -16
> + epilogue push_ip=HAVE_PAC_LEAF
>
> .Lstrcmp_tail:
> .cfi_restore_state
> @@ -463,7 +473,10 @@ def_fn strcmp
> /* R6/7 not used in this sequence. */
> .cfi_restore 6
> .cfi_restore 7
> + .cfi_adjust_cfa_offset -16
> sub result, result, data2, lsr #24
> - bx lr
> + epilogue push_ip=HAVE_PAC_LEAF
> .cfi_endproc
> - .size strcmp, . - .Lstrcmp_start_addr
> + .cantunwind
> + .fnend
> + .size strcmp, . - strcmp
> diff --git a/newlib/libc/machine/arm/strcmp-armv7m.S b/newlib/libc/machine/arm/strcmp-armv7m.S
> index cdb4912df..825b6e77f 100644
> --- a/newlib/libc/machine/arm/strcmp-armv7m.S
> +++ b/newlib/libc/machine/arm/strcmp-armv7m.S
> @@ -29,6 +29,8 @@
> /* Very similar to the generic code, but uses Thumb2 as implemented
> in ARMv7-M. */
>
> +#include "arm_asm.h"
> +
> /* Parameters and result. */
> #define src1 r0
> #define src2 r1
> @@ -44,8 +46,10 @@
> .thumb
> .syntax unified
> def_fn strcmp
> + .fnstart
> .cfi_sections .debug_frame
> .cfi_startproc
> + prologue push_ip=HAVE_PAC_LEAF
> eor tmp1, src1, src2
> tst tmp1, #3
> /* Strings not at same byte offset from a word boundary. */
> @@ -82,6 +86,7 @@ def_fn strcmp
> ldreq data2, [src2], #4
> beq 4b
> 2:
> + .cfi_remember_state
> /* There's a zero or a different byte in the word */
> S2HI result, data1, #24
> S2LO data1, data1, #8
> @@ -106,7 +111,7 @@ def_fn strcmp
> lsrs result, result, #24
> subs result, result, data2
> #endif
> - bx lr
> + epilogue push_ip=HAVE_PAC_LEAF
>
>
> #if 0
> @@ -205,8 +210,10 @@ def_fn strcmp
>
> /* First of all, compare bytes until src1(sp1) is word-aligned. */
> .Lstrcmp_unaligned:
> + .cfi_restore_state
> tst src1, #3
> beq 2f
> + .cfi_remember_state
> ldrb data1, [src1], #1
> ldrb data2, [src2], #1
> cmp data1, #1
> @@ -214,12 +221,13 @@ def_fn strcmp
> cmpcs data1, data2
> beq .Lstrcmp_unaligned
> sub result, data1, data2
> - bx lr
> + epilogue push_ip=HAVE_PAC_LEAF
>
> 2:
> + .cfi_restore_state
> stmfd sp!, {r5}
> - .cfi_def_cfa_offset 4
> - .cfi_offset 5, -4
> + .cfi_adjust_cfa_offset 4
> + .cfi_rel_offset 5, 0
>
> ldr data1, [src1], #4
> and tmp2, src2, #3
> @@ -355,8 +363,8 @@ def_fn strcmp
> .cfi_remember_state
> ldmfd sp!, {r5}
> .cfi_restore 5
> - .cfi_def_cfa_offset 0
> - bx lr
> + .cfi_adjust_cfa_offset -4
> + epilogue push_ip=HAVE_PAC_LEAF
>
> .Lstrcmp_tail:
> .cfi_restore_state
> @@ -372,7 +380,9 @@ def_fn strcmp
> sub result, r2, result
> ldmfd sp!, {r5}
> .cfi_restore 5
> - .cfi_def_cfa_offset 0
> - bx lr
> + .cfi_adjust_cfa_offset -4
> + epilogue push_ip=HAVE_PAC_LEAF
> .cfi_endproc
> + .cantunwind
> + .fnend
> .size strcmp, . - strcmp
More information about the Newlib
mailing list