[PATCH] S390: Optimize __memcpy_z196.

Stefan Liebler stli@linux.ibm.com
Fri Jun 26 07:47:42 GMT 2020


committed

On 6/25/20 10:18 AM, Stefan Liebler via Libc-alpha wrote:
> Just as information, if nobody opposes, I'll commit this patch tomorrow.
> 
> On 6/19/20 3:49 PM, Stefan Liebler wrote:
>> This patch introduces an extra loop without pfd instructions
>> as it turned out that the pfd instructions are usefull
>> for copies >=64KB but are counterproductive for smaller copies.
>> ---
>>  sysdeps/s390/memcpy-z900.S | 21 +++++++++++++++------
>>  1 file changed, 15 insertions(+), 6 deletions(-)
>>
>> diff --git a/sysdeps/s390/memcpy-z900.S b/sysdeps/s390/memcpy-z900.S
>> index f2e9aaeb2d..dc2f491ec3 100644
>> --- a/sysdeps/s390/memcpy-z900.S
>> +++ b/sysdeps/s390/memcpy-z900.S
>> @@ -184,25 +184,34 @@ ENTRY(MEMCPY_Z196)
>>  	je      .L_Z196_4
>>  .L_Z196_start2:
>>  	aghi    %r4,-1
>> -	srlg    %r5,%r4,8
>> -	ltgr    %r5,%r5
>> +	risbg	%r5,%r4,8,128+63,56 # r0 = r5 / 256
>>  	jne     .L_Z196_5
>>  .L_Z196_3:
>>  	exrl    %r4,.L_Z196_14
>>  .L_Z196_4:
>>  	br      %r14
>>  .L_Z196_5:
>> -	cgfi    %r5,262144      # Switch to mvcle for copies >64MB
>> -	jh      __memcpy_mvcle
>> +	cgfi	%r5,255		# Switch to loop with pfd for copies >=64kB
>> +	jh	.L_Z196_6
>>  .L_Z196_2:
>> -	pfd     1,768(%r3)
>> -	pfd     2,768(%r1)
>>  	mvc     0(256,%r1),0(%r3)
>>  	aghi    %r5,-1
>>  	la      %r1,256(%r1)
>>  	la      %r3,256(%r3)
>>  	jne     .L_Z196_2
>>  	j       .L_Z196_3
>> +.L_Z196_6:
>> +	cgfi    %r5,262144      # Switch to mvcle for copies >64MB
>> +	jh      __memcpy_mvcle
>> +.L_Z196_7:
>> +	pfd     1,1024(%r3)
>> +	pfd     2,1024(%r1)
>> +	mvc     0(256,%r1),0(%r3)
>> +	aghi    %r5,-1
>> +	la      %r1,256(%r1)
>> +	la      %r3,256(%r3)
>> +	jne     .L_Z196_7
>> +	j       .L_Z196_3
>>  .L_Z196_14:
>>  	mvc     0(1,%r1),0(%r3)
>>  END(MEMCPY_Z196)
>>
> 



More information about the Libc-alpha mailing list