[PATCH v5 2/8] newlib: libc: strcmp M-profile PACBTI-enablement

Victor Do Nascimento Victor.DoNascimento@arm.com
Fri Jan 6 21:35:20 GMT 2023



On 1/6/23 11:09, Christophe Lyon wrote:
> 
> 
> On 12/21/22 12:21, Victor L. Do Nascimento wrote:
>> Add function prologue/epilogue to conditionally add BTI landing pads
>> and/or PAC code generation & authentication instructions depending on
>> compilation flags.
>>
>> This patch enables PACBTI for all relevant variants of strcmp:
>>       * Newlib for armv8.1-m.main+pacbti
>>       * Newlib for armv8.1-m.main+pacbti+mve
>>       * Newlib-nano
>> ---
>>   newlib/libc/machine/arm/strcmp-arm-tiny.S |  8 +++-
>>   newlib/libc/machine/arm/strcmp-armv7.S    | 57 ++++++++++++++---------
>>   newlib/libc/machine/arm/strcmp-armv7m.S   | 26 +++++++----
>>   3 files changed, 60 insertions(+), 31 deletions(-)
>>
>> diff --git a/newlib/libc/machine/arm/strcmp-arm-tiny.S 
>> b/newlib/libc/machine/arm/strcmp-arm-tiny.S
>> index 607a41daf..0bd2a2e6e 100644
>> --- a/newlib/libc/machine/arm/strcmp-arm-tiny.S
>> +++ b/newlib/libc/machine/arm/strcmp-arm-tiny.S
>> @@ -29,10 +29,14 @@
>>   /* Tiny version of strcmp in ARM state.  Used only when optimizing
>>      for size.  Also supports Thumb-2.  */
>> +#include "arm_asm.h"
>> +
>>       .syntax unified
>>   def_fn strcmp
>> +    .fnstart
>>       .cfi_sections .debug_frame
>>       .cfi_startproc
>> +    prologue
> why no push_ip=HAVE_PAC_LEAF ?
> Is that because this is a tiny version and we don't want to use an extra 
> push ip even it pacbti is enabled?

push_ip=HAVE_PAC_LEAF is reserved for a particular scenario.

If we're PAC-signing leaf functions (that is, HAVE_PAC_LEAF is set) but 
the intraprocedural scratch register r12 is not used in the function 
body, there's no strict need to push the pac-code onto the stack, so 
push_ip defaults to a potentially overridable value of PAC_LEAF_PUSH_IP.

If, on the other hand, r12 is used as part of the function body, our 
PAC-code will be corrupted. In such cases, pushing ip should be strictly 
dictated by the fact that we have requested leaf function PAC-signing, 
so that it can later be restored.

Therefore, if r12 is corrupted and HAVE_PAC_LEAF is set we should push 
ip to the stack irrespective of any overrides, and that's where 
push_ip=HAVE_PAC_LEAF is important.

as strcmp-arm-tiny.S doesn't use r12, we have flexibility over whether 
or not to push ip onto stack. That's why we have simply `push_ip' and 
not `push_ip=HAVE_PAC_LEAF'. strcmp-armv7.S and strcmp-armv7m.S 
represent the opposite scenario. :-)

Regards,
Victor

>>   1:
>>       ldrb    r2, [r0], #1
>>       ldrb    r3, [r1], #1
>> @@ -42,6 +46,8 @@ def_fn strcmp
>>       beq    1b
>>   2:
>>       subs    r0, r2, r3
>> -    bx    lr
>> +    epilogue
>>       .cfi_endproc
>> +    .cantunwind
>> +    .fnend
>>       .size    strcmp, . - strcmp
>> diff --git a/newlib/libc/machine/arm/strcmp-armv7.S 
>> b/newlib/libc/machine/arm/strcmp-armv7.S
>> index 2f93bfb73..7cafca151 100644
>> --- a/newlib/libc/machine/arm/strcmp-armv7.S
>> +++ b/newlib/libc/machine/arm/strcmp-armv7.S
>> @@ -45,6 +45,8 @@
>>       .thumb
>>       .syntax unified
>> +#include "arm_asm.h"
>> +
>>   /* Parameters and result.  */
>>   #define src1        r0
>>   #define src2        r1
>> @@ -91,8 +93,9 @@
>>       ldrd    r4, r5, [sp], #16
>>       .cfi_restore 4
>>       .cfi_restore 5
>> +    .cfi_adjust_cfa_offset -16
>>       sub    result, result, r1, lsr #24
>> -    bx    lr
>> +    epilogue push_ip=HAVE_PAC_LEAF
>>   #else
>>       /* To use the big-endian trick we'd have to reverse all three 
>> words.
>>          that's slower than this approach.  */
>> @@ -112,22 +115,21 @@
>>       ldrd    r4, r5, [sp], #16
>>       .cfi_restore 4
>>       .cfi_restore 5
>> +    .cfi_adjust_cfa_offset -16
>>       sub    result, result, r1
>> -    bx    lr
>> +    epilogue push_ip=HAVE_PAC_LEAF
>>   #endif
>>       .endm
>> +
>>       .text
>>       .p2align    5
>> -.Lstrcmp_start_addr:
>> -#ifndef STRCMP_NO_PRECHECK
>> -.Lfastpath_exit:
>> -    sub    r0, r2, r3
>> -    bx    lr
>> -    nop
>> -#endif
>>   def_fn    strcmp
>> +    .fnstart
>> +    .cfi_sections .debug_frame
>> +    .cfi_startproc
>> +    prologue push_ip=HAVE_PAC_LEAF
>>   #ifndef STRCMP_NO_PRECHECK
>>       ldrb    r2, [src1]
>>       ldrb    r3, [src2]
>> @@ -136,16 +138,14 @@ def_fn    strcmp
>>       cmpcs    r2, r3
>>       bne    .Lfastpath_exit
>>   #endif
>> -    .cfi_sections .debug_frame
>> -    .cfi_startproc
>>       strd    r4, r5, [sp, #-16]!
>> -    .cfi_def_cfa_offset 16
>> -    .cfi_offset 4, -16
>> -    .cfi_offset 5, -12
>> +    .cfi_adjust_cfa_offset 16
>> +    .cfi_rel_offset 4, 0
>> +    .cfi_rel_offset 5, 4
>>       orr    tmp1, src1, src2
>>       strd    r6, r7, [sp, #8]
>> -    .cfi_offset 6, -8
>> -    .cfi_offset 7, -4
>> +    .cfi_rel_offset 6, 8
>> +    .cfi_rel_offset 7, 12
>>       mvn    const_m1, #0
>>       lsl    r2, tmp1, #29
>>       cbz    r2, .Lloop_aligned8
>> @@ -270,7 +270,6 @@ def_fn    strcmp
>>       ldr    data1, [src1], #4
>>       beq    .Laligned_m2
>>       bcs    .Laligned_m1
>> -
>>   #ifdef STRCMP_NO_PRECHECK
>>       ldrb    data2, [src2, #1]
>>       uxtb    tmp1, data1, ror #BYTE1_OFFSET
>> @@ -314,10 +313,19 @@ def_fn    strcmp
>>       mov    result, tmp1
>>       ldr    r4, [sp], #16
>>       .cfi_restore 4
>> -    bx    lr
>> +    .cfi_adjust_cfa_offset -16
>> +    epilogue push_ip=HAVE_PAC_LEAF
>>   #ifndef STRCMP_NO_PRECHECK
>> +.Lfastpath_exit:
>> +    .cfi_restore_state
>> +    .cfi_remember_state
>> +    sub    r0, r2, r3
>> +    epilogue push_ip=HAVE_PAC_LEAF
>> +
>>   .Laligned_m1:
>> +    .cfi_restore_state
>> +    .cfi_remember_state
>>       add    src2, src2, #4
>>   #endif
>>   .Lsrc1_aligned:
>> @@ -364,8 +372,9 @@ def_fn    strcmp
>>       /* R6/7 Not used in this sequence.  */
>>       .cfi_restore 6
>>       .cfi_restore 7
>> +    .cfi_adjust_cfa_offset -16
>>       neg    result, result
>> -    bx    lr
>> +    epilogue push_ip=HAVE_PAC_LEAF
>>   6:
>>       .cfi_restore_state
>> @@ -441,7 +450,8 @@ def_fn    strcmp
>>       /* R6/7 not used in this sequence.  */
>>       .cfi_restore 6
>>       .cfi_restore 7
>> -    bx    lr
>> +    .cfi_adjust_cfa_offset -16
>> +    epilogue push_ip=HAVE_PAC_LEAF
>>   .Lstrcmp_tail:
>>       .cfi_restore_state
>> @@ -463,7 +473,10 @@ def_fn    strcmp
>>       /* R6/7 not used in this sequence.  */
>>       .cfi_restore 6
>>       .cfi_restore 7
>> +    .cfi_adjust_cfa_offset -16
>>       sub    result, result, data2, lsr #24
>> -    bx    lr
>> +    epilogue push_ip=HAVE_PAC_LEAF
>>       .cfi_endproc
>> -    .size strcmp, . - .Lstrcmp_start_addr
>> +    .cantunwind
>> +    .fnend
>> +    .size strcmp, . - strcmp
>> diff --git a/newlib/libc/machine/arm/strcmp-armv7m.S 
>> b/newlib/libc/machine/arm/strcmp-armv7m.S
>> index cdb4912df..825b6e77f 100644
>> --- a/newlib/libc/machine/arm/strcmp-armv7m.S
>> +++ b/newlib/libc/machine/arm/strcmp-armv7m.S
>> @@ -29,6 +29,8 @@
>>   /* Very similar to the generic code, but uses Thumb2 as implemented
>>      in ARMv7-M.  */
>> +#include "arm_asm.h"
>> +
>>   /* Parameters and result.  */
>>   #define src1        r0
>>   #define src2        r1
>> @@ -44,8 +46,10 @@
>>       .thumb
>>       .syntax unified
>>   def_fn strcmp
>> +    .fnstart
>>       .cfi_sections .debug_frame
>>       .cfi_startproc
>> +    prologue push_ip=HAVE_PAC_LEAF
>>       eor    tmp1, src1, src2
>>       tst    tmp1, #3
>>       /* Strings not at same byte offset from a word boundary.  */
>> @@ -82,6 +86,7 @@ def_fn strcmp
>>       ldreq    data2, [src2], #4
>>       beq    4b
>>   2:
>> +    .cfi_remember_state
>>       /* There's a zero or a different byte in the word */
>>       S2HI    result, data1, #24
>>       S2LO    data1, data1, #8
>> @@ -106,7 +111,7 @@ def_fn strcmp
>>       lsrs    result, result, #24
>>       subs    result, result, data2
>>   #endif
>> -    bx    lr
>> +    epilogue push_ip=HAVE_PAC_LEAF
>>   #if 0
>> @@ -205,8 +210,10 @@ def_fn strcmp
>>       /* First of all, compare bytes until src1(sp1) is word-aligned. */
>>   .Lstrcmp_unaligned:
>> +    .cfi_restore_state
>>       tst    src1, #3
>>       beq    2f
>> +    .cfi_remember_state
>>       ldrb    data1, [src1], #1
>>       ldrb    data2, [src2], #1
>>       cmp    data1, #1
>> @@ -214,12 +221,13 @@ def_fn strcmp
>>       cmpcs    data1, data2
>>       beq    .Lstrcmp_unaligned
>>       sub    result, data1, data2
>> -    bx    lr
>> +    epilogue push_ip=HAVE_PAC_LEAF
>>   2:
>> +    .cfi_restore_state
>>       stmfd    sp!, {r5}
>> -    .cfi_def_cfa_offset 4
>> -    .cfi_offset 5, -4
>> +    .cfi_adjust_cfa_offset 4
>> +    .cfi_rel_offset 5, 0
>>       ldr    data1, [src1], #4
>>       and    tmp2, src2, #3
>> @@ -355,8 +363,8 @@ def_fn strcmp
>>       .cfi_remember_state
>>       ldmfd    sp!, {r5}
>>       .cfi_restore 5
>> -    .cfi_def_cfa_offset 0
>> -    bx    lr
>> +    .cfi_adjust_cfa_offset -4
>> +    epilogue push_ip=HAVE_PAC_LEAF
>>   .Lstrcmp_tail:
>>       .cfi_restore_state
>> @@ -372,7 +380,9 @@ def_fn strcmp
>>       sub    result, r2, result
>>       ldmfd    sp!, {r5}
>>       .cfi_restore 5
>> -    .cfi_def_cfa_offset 0
>> -    bx    lr
>> +    .cfi_adjust_cfa_offset -4
>> +    epilogue push_ip=HAVE_PAC_LEAF
>>       .cfi_endproc
>> +    .cantunwind
>> +    .fnend
>>       .size strcmp, . - strcmp


More information about the Newlib mailing list