[PATCH v5 2/8] newlib: libc: strcmp M-profile PACBTI-enablement
Victor Do Nascimento
Victor.DoNascimento@arm.com
Fri Jan 6 21:35:20 GMT 2023
On 1/6/23 11:09, Christophe Lyon wrote:
>
>
> On 12/21/22 12:21, Victor L. Do Nascimento wrote:
>> Add function prologue/epilogue to conditionally add BTI landing pads
>> and/or PAC code generation & authentication instructions depending on
>> compilation flags.
>>
>> This patch enables PACBTI for all relevant variants of strcmp:
>> * Newlib for armv8.1-m.main+pacbti
>> * Newlib for armv8.1-m.main+pacbti+mve
>> * Newlib-nano
>> ---
>> newlib/libc/machine/arm/strcmp-arm-tiny.S | 8 +++-
>> newlib/libc/machine/arm/strcmp-armv7.S | 57 ++++++++++++++---------
>> newlib/libc/machine/arm/strcmp-armv7m.S | 26 +++++++----
>> 3 files changed, 60 insertions(+), 31 deletions(-)
>>
>> diff --git a/newlib/libc/machine/arm/strcmp-arm-tiny.S
>> b/newlib/libc/machine/arm/strcmp-arm-tiny.S
>> index 607a41daf..0bd2a2e6e 100644
>> --- a/newlib/libc/machine/arm/strcmp-arm-tiny.S
>> +++ b/newlib/libc/machine/arm/strcmp-arm-tiny.S
>> @@ -29,10 +29,14 @@
>> /* Tiny version of strcmp in ARM state. Used only when optimizing
>> for size. Also supports Thumb-2. */
>> +#include "arm_asm.h"
>> +
>> .syntax unified
>> def_fn strcmp
>> + .fnstart
>> .cfi_sections .debug_frame
>> .cfi_startproc
>> + prologue
> why no push_ip=HAVE_PAC_LEAF ?
> Is that because this is a tiny version and we don't want to use an extra
> push ip even it pacbti is enabled?
push_ip=HAVE_PAC_LEAF is reserved for a particular scenario.
If we're PAC-signing leaf functions (that is, HAVE_PAC_LEAF is set) but
the intraprocedural scratch register r12 is not used in the function
body, there's no strict need to push the pac-code onto the stack, so
push_ip defaults to a potentially overridable value of PAC_LEAF_PUSH_IP.
If, on the other hand, r12 is used as part of the function body, our
PAC-code will be corrupted. In such cases, pushing ip should be strictly
dictated by the fact that we have requested leaf function PAC-signing,
so that it can later be restored.
Therefore, if r12 is corrupted and HAVE_PAC_LEAF is set we should push
ip to the stack irrespective of any overrides, and that's where
push_ip=HAVE_PAC_LEAF is important.
as strcmp-arm-tiny.S doesn't use r12, we have flexibility over whether
or not to push ip onto stack. That's why we have simply `push_ip' and
not `push_ip=HAVE_PAC_LEAF'. strcmp-armv7.S and strcmp-armv7m.S
represent the opposite scenario. :-)
Regards,
Victor
>> 1:
>> ldrb r2, [r0], #1
>> ldrb r3, [r1], #1
>> @@ -42,6 +46,8 @@ def_fn strcmp
>> beq 1b
>> 2:
>> subs r0, r2, r3
>> - bx lr
>> + epilogue
>> .cfi_endproc
>> + .cantunwind
>> + .fnend
>> .size strcmp, . - strcmp
>> diff --git a/newlib/libc/machine/arm/strcmp-armv7.S
>> b/newlib/libc/machine/arm/strcmp-armv7.S
>> index 2f93bfb73..7cafca151 100644
>> --- a/newlib/libc/machine/arm/strcmp-armv7.S
>> +++ b/newlib/libc/machine/arm/strcmp-armv7.S
>> @@ -45,6 +45,8 @@
>> .thumb
>> .syntax unified
>> +#include "arm_asm.h"
>> +
>> /* Parameters and result. */
>> #define src1 r0
>> #define src2 r1
>> @@ -91,8 +93,9 @@
>> ldrd r4, r5, [sp], #16
>> .cfi_restore 4
>> .cfi_restore 5
>> + .cfi_adjust_cfa_offset -16
>> sub result, result, r1, lsr #24
>> - bx lr
>> + epilogue push_ip=HAVE_PAC_LEAF
>> #else
>> /* To use the big-endian trick we'd have to reverse all three
>> words.
>> that's slower than this approach. */
>> @@ -112,22 +115,21 @@
>> ldrd r4, r5, [sp], #16
>> .cfi_restore 4
>> .cfi_restore 5
>> + .cfi_adjust_cfa_offset -16
>> sub result, result, r1
>> - bx lr
>> + epilogue push_ip=HAVE_PAC_LEAF
>> #endif
>> .endm
>> +
>> .text
>> .p2align 5
>> -.Lstrcmp_start_addr:
>> -#ifndef STRCMP_NO_PRECHECK
>> -.Lfastpath_exit:
>> - sub r0, r2, r3
>> - bx lr
>> - nop
>> -#endif
>> def_fn strcmp
>> + .fnstart
>> + .cfi_sections .debug_frame
>> + .cfi_startproc
>> + prologue push_ip=HAVE_PAC_LEAF
>> #ifndef STRCMP_NO_PRECHECK
>> ldrb r2, [src1]
>> ldrb r3, [src2]
>> @@ -136,16 +138,14 @@ def_fn strcmp
>> cmpcs r2, r3
>> bne .Lfastpath_exit
>> #endif
>> - .cfi_sections .debug_frame
>> - .cfi_startproc
>> strd r4, r5, [sp, #-16]!
>> - .cfi_def_cfa_offset 16
>> - .cfi_offset 4, -16
>> - .cfi_offset 5, -12
>> + .cfi_adjust_cfa_offset 16
>> + .cfi_rel_offset 4, 0
>> + .cfi_rel_offset 5, 4
>> orr tmp1, src1, src2
>> strd r6, r7, [sp, #8]
>> - .cfi_offset 6, -8
>> - .cfi_offset 7, -4
>> + .cfi_rel_offset 6, 8
>> + .cfi_rel_offset 7, 12
>> mvn const_m1, #0
>> lsl r2, tmp1, #29
>> cbz r2, .Lloop_aligned8
>> @@ -270,7 +270,6 @@ def_fn strcmp
>> ldr data1, [src1], #4
>> beq .Laligned_m2
>> bcs .Laligned_m1
>> -
>> #ifdef STRCMP_NO_PRECHECK
>> ldrb data2, [src2, #1]
>> uxtb tmp1, data1, ror #BYTE1_OFFSET
>> @@ -314,10 +313,19 @@ def_fn strcmp
>> mov result, tmp1
>> ldr r4, [sp], #16
>> .cfi_restore 4
>> - bx lr
>> + .cfi_adjust_cfa_offset -16
>> + epilogue push_ip=HAVE_PAC_LEAF
>> #ifndef STRCMP_NO_PRECHECK
>> +.Lfastpath_exit:
>> + .cfi_restore_state
>> + .cfi_remember_state
>> + sub r0, r2, r3
>> + epilogue push_ip=HAVE_PAC_LEAF
>> +
>> .Laligned_m1:
>> + .cfi_restore_state
>> + .cfi_remember_state
>> add src2, src2, #4
>> #endif
>> .Lsrc1_aligned:
>> @@ -364,8 +372,9 @@ def_fn strcmp
>> /* R6/7 Not used in this sequence. */
>> .cfi_restore 6
>> .cfi_restore 7
>> + .cfi_adjust_cfa_offset -16
>> neg result, result
>> - bx lr
>> + epilogue push_ip=HAVE_PAC_LEAF
>> 6:
>> .cfi_restore_state
>> @@ -441,7 +450,8 @@ def_fn strcmp
>> /* R6/7 not used in this sequence. */
>> .cfi_restore 6
>> .cfi_restore 7
>> - bx lr
>> + .cfi_adjust_cfa_offset -16
>> + epilogue push_ip=HAVE_PAC_LEAF
>> .Lstrcmp_tail:
>> .cfi_restore_state
>> @@ -463,7 +473,10 @@ def_fn strcmp
>> /* R6/7 not used in this sequence. */
>> .cfi_restore 6
>> .cfi_restore 7
>> + .cfi_adjust_cfa_offset -16
>> sub result, result, data2, lsr #24
>> - bx lr
>> + epilogue push_ip=HAVE_PAC_LEAF
>> .cfi_endproc
>> - .size strcmp, . - .Lstrcmp_start_addr
>> + .cantunwind
>> + .fnend
>> + .size strcmp, . - strcmp
>> diff --git a/newlib/libc/machine/arm/strcmp-armv7m.S
>> b/newlib/libc/machine/arm/strcmp-armv7m.S
>> index cdb4912df..825b6e77f 100644
>> --- a/newlib/libc/machine/arm/strcmp-armv7m.S
>> +++ b/newlib/libc/machine/arm/strcmp-armv7m.S
>> @@ -29,6 +29,8 @@
>> /* Very similar to the generic code, but uses Thumb2 as implemented
>> in ARMv7-M. */
>> +#include "arm_asm.h"
>> +
>> /* Parameters and result. */
>> #define src1 r0
>> #define src2 r1
>> @@ -44,8 +46,10 @@
>> .thumb
>> .syntax unified
>> def_fn strcmp
>> + .fnstart
>> .cfi_sections .debug_frame
>> .cfi_startproc
>> + prologue push_ip=HAVE_PAC_LEAF
>> eor tmp1, src1, src2
>> tst tmp1, #3
>> /* Strings not at same byte offset from a word boundary. */
>> @@ -82,6 +86,7 @@ def_fn strcmp
>> ldreq data2, [src2], #4
>> beq 4b
>> 2:
>> + .cfi_remember_state
>> /* There's a zero or a different byte in the word */
>> S2HI result, data1, #24
>> S2LO data1, data1, #8
>> @@ -106,7 +111,7 @@ def_fn strcmp
>> lsrs result, result, #24
>> subs result, result, data2
>> #endif
>> - bx lr
>> + epilogue push_ip=HAVE_PAC_LEAF
>> #if 0
>> @@ -205,8 +210,10 @@ def_fn strcmp
>> /* First of all, compare bytes until src1(sp1) is word-aligned. */
>> .Lstrcmp_unaligned:
>> + .cfi_restore_state
>> tst src1, #3
>> beq 2f
>> + .cfi_remember_state
>> ldrb data1, [src1], #1
>> ldrb data2, [src2], #1
>> cmp data1, #1
>> @@ -214,12 +221,13 @@ def_fn strcmp
>> cmpcs data1, data2
>> beq .Lstrcmp_unaligned
>> sub result, data1, data2
>> - bx lr
>> + epilogue push_ip=HAVE_PAC_LEAF
>> 2:
>> + .cfi_restore_state
>> stmfd sp!, {r5}
>> - .cfi_def_cfa_offset 4
>> - .cfi_offset 5, -4
>> + .cfi_adjust_cfa_offset 4
>> + .cfi_rel_offset 5, 0
>> ldr data1, [src1], #4
>> and tmp2, src2, #3
>> @@ -355,8 +363,8 @@ def_fn strcmp
>> .cfi_remember_state
>> ldmfd sp!, {r5}
>> .cfi_restore 5
>> - .cfi_def_cfa_offset 0
>> - bx lr
>> + .cfi_adjust_cfa_offset -4
>> + epilogue push_ip=HAVE_PAC_LEAF
>> .Lstrcmp_tail:
>> .cfi_restore_state
>> @@ -372,7 +380,9 @@ def_fn strcmp
>> sub result, r2, result
>> ldmfd sp!, {r5}
>> .cfi_restore 5
>> - .cfi_def_cfa_offset 0
>> - bx lr
>> + .cfi_adjust_cfa_offset -4
>> + epilogue push_ip=HAVE_PAC_LEAF
>> .cfi_endproc
>> + .cantunwind
>> + .fnend
>> .size strcmp, . - strcmp
More information about the Newlib
mailing list