[PATCH 3/4] powerpc: Runtime selection between sc and scv for syscalls

Matheus Castanho msc@linux.ibm.com
Thu Nov 19 20:34:32 GMT 2020



On 11/18/20 4:00 PM, Paul A. Clarke wrote:
> On Wed, Nov 18, 2020 at 11:47:02AM -0300, Matheus Castanho via Libc-alpha wrote:
>> Linux kernel v5.9 added support for system calls using the scv
>> instruction for POWER9 and later.  The new codepath provides better
>> performance (see below) if compared to using sc.  For the
>> foreseeable future, both sc and scv mechanisms will co-exist, so this
>> patch enables glibc to do a runtime check and always use scv when it is
>> available.
> 
> nit: "always" is perhaps too strong here, as there are exceptions, as noted
> in your message further below.

Right. I'll change that on v2.

> 
>> Before issuing the system call to the kernel, we check hwcap2 in the TCB
>> for PPC_FEATURE2_SCV to see if scv is supported by the kernel.  If not,
>> we fallback to sc and keep the old behavior.
>>
>> The kernel implements a different error return convention for scv, so
>> when returning from a system call we need to handle the return value
>> differently depending on the instruction we used to enter the kernel.
>>
>> For syscalls implemented in ASM, entry and exit are implemented by
>> different macros (PSEUDO and PSEUDO_RET, resp.), which may be used in
>> sequence (e.g. for templated syscalls) or with other instructions in
>> between (e.g. clone).  To avoid accessing the TCB a second time on
>> PSEUDO_RET to check which instruction we used, the value read from
>> hwcap2 is cached on a non-volatile register.
>>
>> This is not needed when using INTERNAL_SYSCALL macro, since entry and
>> exit are bundled into the same inline asm directive.
>>
>> Since system calls may be called before the TCB has been setup (e.g.
>> inside the dynamic loader), we also check the value of the thread
>> pointer before effectively accessing the TCB.  For such situations in
>> which the availability of scv cannot be determined, sc is always used.
>>
>> Support for scv in syscalls implemented in their own ASM file (clone and
>> vfork) will be added later. For now simply use sc as before.
>>
>> Average performance over 1M calls for each syscall "type":
>>   - stat: C wrapper calling INTERNAL_SYSCALL
>>   - getpid: templated ASM syscall
>>   - syscall: call to gettid using syscall function
>>
>>   Standard:
>>      stat : 1.573445 us / ~3619 cycles
>>    getpid : 0.164986 us / ~379 cycles
>>   syscall : 0.162743 us / ~374 cycles
>>
>>   With scv:
>>      stat : 1.537049 us / ~3535 cycles <~ -84 cycles  / -2.32%
>>    getpid : 0.109923 us / ~253 cycles  <~ -126 cycles / -33.25%
>>   syscall : 0.116410 us / ~268 cycles  <~ -106 cycles / -28.34%
>>
>> Tested on powerpc, powerpc64, powerpc64le (with and without scv)
>> ---
>>  sysdeps/powerpc/powerpc32/sysdep.h            | 19 ++--
>>  sysdeps/powerpc/powerpc64/sysdep.h            | 90 ++++++++++++++++++-
>>  .../unix/sysv/linux/powerpc/powerpc64/clone.S |  9 +-
>>  .../unix/sysv/linux/powerpc/powerpc64/vfork.S |  6 +-
>>  sysdeps/unix/sysv/linux/powerpc/syscall.S     | 11 ++-
>>  sysdeps/unix/sysv/linux/powerpc/sysdep.h      | 78 +++++++++++-----
>>  6 files changed, 174 insertions(+), 39 deletions(-)
>>
>> diff --git a/sysdeps/powerpc/powerpc32/sysdep.h b/sysdeps/powerpc/powerpc32/sysdep.h
>> index 829eec266a..bff18bdc8b 100644
>> --- a/sysdeps/powerpc/powerpc32/sysdep.h
>> +++ b/sysdeps/powerpc/powerpc32/sysdep.h
>> @@ -90,9 +90,12 @@ GOT_LABEL:			;					      \
>>    cfi_endproc;								      \
>>    ASM_SIZE_DIRECTIVE(name)
>>
>> -#define DO_CALL(syscall)						      \
>> -    li 0,syscall;							      \
>> -    sc
>> +#define DO_CALL(syscall) \
>> +	li 0,syscall; \
>> +	DO_CALL_SC
> 
> nit: there are some innocuous whitespace changes which could be avoided to minimize
> the diff (moving the '\' closer and changing from spaces to a tab).
> 

Fixed.

>> +
>> +#define DO_CALL_SC \
>> +	sc
>>
>>  #undef JUMPTARGET
>>  #ifdef PIC
>> @@ -106,14 +109,20 @@ GOT_LABEL:			;					      \
>>  # define HIDDEN_JUMPTARGET(name) __GI_##name##@local
>>  #endif
>>
>> +#define TAIL_CALL_SYSCALL_ERROR \
>> +    b __syscall_error@local
>> +
>>  #define PSEUDO(name, syscall_name, args)				      \
>>    .section ".text";							      \
>>    ENTRY (name)								      \
>>      DO_CALL (SYS_ify (syscall_name));
>>
>> +#define RET_SC \
>> +    bnslr+;
>> +
>>  #define PSEUDO_RET							      \
>> -    bnslr+;								      \
>> -    b __syscall_error@local
>> +    RET_SC;								      \
>> +    TAIL_CALL_SYSCALL_ERROR
>>  #define ret PSEUDO_RET
>>
>>  #undef	PSEUDO_END
>> diff --git a/sysdeps/powerpc/powerpc64/sysdep.h b/sysdeps/powerpc/powerpc64/sysdep.h
>> index d557098898..2d7dde64da 100644
>> --- a/sysdeps/powerpc/powerpc64/sysdep.h
>> +++ b/sysdeps/powerpc/powerpc64/sysdep.h
>> @@ -17,6 +17,7 @@
>>     <https://www.gnu.org/licenses/>.  */
>>
>>  #include <sysdeps/powerpc/sysdep.h>
>> +#include <tls.h>
>>
>>  #ifdef __ASSEMBLER__
>>
>> @@ -263,10 +264,72 @@ LT_LABELSUFFIX(name,_name_end): ; \
>>    TRACEBACK_MASK(name,mask);	\
>>    END_2(name)
>>
>> +/* We will allocate a new frame to save LR and the non-volatile register used to
>> +   read the TCB when checking for scv support on syscall code.  We actually just
>> +   need the minimum frame size plus room for 1 reg (64 bits).  But the ABI
> 
> nit: Since everything is in bytes below, suggest changing "64 bits" to "8 bytes".

Fixed.

> 
>> +   mandates stack frames should be aligned at 16 Bytes, so we end up allocating
>> +   a bit more space then what will actually be used.  */
>> +#define SCV_FRAME_SIZE (FRAME_MIN_SIZE+16)
> 
> 8 for the register save area + 8 more to maintain 16-byte alignment.  OK.
> 
>> +#define SCV_FRAME_NVOLREG_SAVE FRAME_MIN_SIZE
>> +
>> +/* Allocate frame and save register */
>> +#define NVOLREG_SAVE \
>> +    stdu r1,-SCV_FRAME_SIZE(r1); \
>> +    std r31,SCV_FRAME_NVOLREG_SAVE(r1); \
>> +    cfi_adjust_cfa_offset(SCV_FRAME_SIZE);
>> +
>> +/* Restore register and destroy frame */
>> +#define NVOLREG_RESTORE	\
>> +    ld r31,SCV_FRAME_NVOLREG_SAVE(r1); \
>> +    addi r1,r1,SCV_FRAME_SIZE; \
>> +    cfi_adjust_cfa_offset(-SCV_FRAME_SIZE);
>> +
>> +/* Check PPC_FEATURE2_SCV bit from hwcap2 in the TCB and update CR0
>> + * accordingly.  First, we check if the thread pointer != 0, so we don't try to
>> + * access the TCB before it has been initialized, e.g. inside the dynamic
>> + * loader.  If it is already initialized, check if scv is available.  On both
>> + * negative cases, go to JUMPFALSE (label given by the macro's caller).  We
>> + * save the value we read from the TCB in a non-volatile register so we can
>> + * reuse it later when exiting from the syscall in PSEUDO_RET.  */
> 
> Florian already mentioned removing the leading '*' for proper formatting.

Fixed.

> 
>> +    .macro CHECK_SCV_SUPPORT REG JUMPFALSE
>> +
>> +    /* Check if thread pointer has already been setup */
>> +    cmpdi r13,0
>> +    beq \JUMPFALSE
>> +
>> +    /* Read PPC_FEATURE2_SCV from TCB and store it in REG */
>> +    ld \REG,TCB_HWCAP(PT_THREAD_POINTER)
>> +    andis. \REG,\REG,PPC_FEATURE2_SCV>>16
>> +
>> +    beq \JUMPFALSE
>> +    .endm
>> +
>> +/* Before doing the syscall, check if we can use scv.  scv is supported by P9
>> + * and later with Linux v5.9 and later.  If so, use it.  Otherwise, fallback to
>> + * sc.  We use a non-volatile register to save hwcap2 from the TCB, so we need
>> + * to save its content beforehand. */
> 
> Here, too. Also need one more space after '.'.

Fixed and fixed.

> 
>>  #define DO_CALL(syscall) \
>> -    li 0,syscall; \
>> +    li r0,syscall; \
>> +    NVOLREG_SAVE; \
>> +    CHECK_SCV_SUPPORT r31 0f; \
>> +    DO_CALL_SCV; \
>> +    b 1f; \
>> +0:  DO_CALL_SC; \
>> +1:
>> +
>> +/* DO_CALL_SC and DO_CALL_SCV expect the syscall number to be loaded on r0.  */
> 
> nit: s/loaded on/in/

Fixed too.

> 
> rest looks OK.
> 
> With the comments fixed, LGTM. Fixing the nits is up to you.
> 
> PC
> 

Queued changes for v2.

Thanks,
Matheus Castanho


More information about the Libc-alpha mailing list