[PATCH v2 2/2] powerpc: Add optimized stpncpy for POWER9

Raphael M Zinsly rzinsly@linux.ibm.com
Fri Sep 4 16:59:52 GMT 2020


Benchtest output:
                             	generic_stpncpy	__stpncpy_power9 
__stpncpy_power8	__stpncpy_power7	__stpncpy_ppc
Length   16, n   16, alignment  1/ 1:	6.55566	2.5481	2.74063	5.28665	9.96288
Length   16, n   16, alignment  1/ 1:	6.70016	2.54137	2.7108	4.77502	9.91703
Length   16, n   16, alignment  1/ 2:	6.55975	2.56295	2.70641	5.49298 
9.59591
Length   16, n   16, alignment  2/ 1:	6.90759	2.52713	2.854	5.48949	9.37664
Length    2, n    4, alignment  7/ 2:	7.90969	2.22698	3.90151	4.6461	8.4503
Length    4, n    2, alignment  2/ 7:	6.14855	1.73403	2.67338	3.05675 
6.86316
Length    2, n    4, alignment  7/ 2:	8.40868	2.22338	4.50838	4.51078 
9.28489
Length    4, n    2, alignment  2/ 7:	6.14849	1.73402	2.67225	2.85349 
6.34342
Length   16, n   16, alignment  2/ 2:	6.963	2.54442	2.87779	5.63547	9.85162
Length   16, n   16, alignment  2/ 2:	6.59452	2.54121	2.84662	5.57178 
9.51406
Length   16, n   16, alignment  2/ 4:	6.79115	2.55835	2.84836	5.50427 
9.67999
Length   16, n   16, alignment  4/ 2:	6.78419	2.54132	3.54229	5.52563 
8.50938
Length    4, n    8, alignment  6/ 4:	8.45703	2.17266	4.80507	3.8714	9.04725
Length    8, n    4, alignment  4/ 6:	6.01753	1.73761	2.8185	2.41527	8.00051
Length    4, n    8, alignment  6/ 4:	7.82081	2.22612	4.80057	3.76103 
8.99812
Length    8, n    4, alignment  4/ 6:	6.01752	1.73474	2.82089	2.41524 
7.82703
Length   16, n   16, alignment  3/ 3:	6.78194	2.54143	3.21392	5.46447 
8.90749
Length   16, n   16, alignment  3/ 3:	6.76324	2.54088	3.22883	5.39689 
9.14749
Length   16, n   16, alignment  3/ 6:	7.05278	2.55795	3.22243	5.53422 
9.11315
Length   16, n   16, alignment  6/ 3:	6.72881	2.54183	4.58459	5.51658 
7.85006
Length    8, n   16, alignment  5/ 6:	7.67184	2.23969	4.13269	4.90728 
10.2248
Length   16, n    8, alignment  6/ 5:	5.73672	1.88048	2.6693	4.35579	6.11674
Length    8, n   16, alignment  5/ 6:	7.51707	2.2284	3.67276	4.90637	10.2411
Length   16, n    8, alignment  6/ 5:	5.73665	1.88119	2.57514	3.96351 
6.16253
Length   16, n   16, alignment  4/ 4:	7.03577	2.5415	3.66445	4.94157	8.98371
Length   16, n   16, alignment  4/ 4:	6.93549	2.53033	3.65577	5.53815 
8.48335
Length   16, n   16, alignment  4/ 0:	6.95106	2.53483	3.48744	5.43759 
8.45425
Length   16, n   16, alignment  0/ 4:	6.44601	1.87936	2.41984	5.49488 
6.92169
Length   16, n   32, alignment  4/ 0:	9.2036	3.04122	5.78685	6.66434	10.9065
Length   32, n   16, alignment  0/ 4:	6.65504	1.87934	2.41817	6.08706 
6.98513
Length   16, n   32, alignment  4/ 0:	9.17461	3.04153	5.77758	6.66444 
10.8015
Length   32, n   16, alignment  0/ 4:	6.44123	1.87936	2.41847	5.55207 
6.86039
Length   16, n   16, alignment  5/ 5:	6.56005	2.53132	4.22362	5.43527 
9.25109
Length   16, n   16, alignment  5/ 5:	6.55552	2.53088	4.22655	5.59271 
9.61369
Length   16, n   16, alignment  5/ 2:	6.55553	2.54559	4.31135	5.47438 
8.83103
Length   16, n   16, alignment  2/ 5:	6.88992	2.56255	2.84059	5.23185 
9.51441
Length   32, n   64, alignment  3/ 2:	12.5054	3.75138	6.42457	10.4719 
15.0663
Length   64, n   32, alignment  2/ 3:	9.87185	2.78283	3.17042	7.66624	11.503
Length   32, n   64, alignment  3/ 2:	12.4999	3.74537	6.38161	10.4578 
15.1104
Length   64, n   32, alignment  2/ 3:	9.86495	2.77889	3.19171	7.63272 
13.9799
Length   16, n   16, alignment  6/ 6:	6.41353	2.5453	4.50915	5.30382	8.45391
Length   16, n   16, alignment  6/ 6:	6.49495	2.54119	4.54493	5.55909	8.1629
Length   16, n   16, alignment  6/ 4:	6.41743	2.54487	4.57202	4.98659 
7.53033
Length   16, n   16, alignment  4/ 6:	6.91724	2.54649	3.67868	5.36838 
8.45677
Length   64, n  128, alignment  2/ 4:	14.0687	4.93151	8.11667	11.4411 
16.9533
Length  128, n   64, alignment  4/ 2:	11.7134	3.58948	4.90121	10.3018 
11.6692
Length   64, n  128, alignment  2/ 4:	14.0677	4.93413	7.28129	11.439	22.2186
Length  128, n   64, alignment  4/ 2:	11.7149	3.59312	4.85286	10.3403 
19.4651
Length   16, n   16, alignment  7/ 7:	6.76501	2.52563	5.55792	5.44155 
8.39997
Length   16, n   16, alignment  7/ 7:	7.16923	2.5265	5.55148	5.60184	7.98311
Length   16, n   16, alignment  7/ 6:	6.76252	2.52629	5.48067	5.51161 
7.61026
Length   16, n   16, alignment  6/ 7:	6.65772	2.5521	4.55758	5.48893	7.7301
Length  128, n  256, alignment  1/ 6:	16.2494	7.62034	9.3616	16.2888	19.7029
Length  256, n  128, alignment  6/ 1:	13.4311	4.94455	8.10802	12.2681 
15.6941
Length  128, n  256, alignment  1/ 6:	16.2608	7.6209	9.35509	16.2856	38.0277
Length  256, n  128, alignment  6/ 1:	13.4327	4.89474	8.35934	12.2646 
34.3268
Length    8, n   16, alignment  0/ 0:	7.20671	2.23256	3.75778	5.63555 
7.36414
Length   32, n   16, alignment  0/ 0:	6.4449	1.88	2.41577	2.89598	6.42537
Length    8, n   16, alignment  7/ 2:	7.45976	2.21832	3.91671	4.6524	8.45825
Length   32, n   16, alignment  7/ 2:	6.78267	2.34296	5.59161	5.58598 
6.88842
Length   16, n   32, alignment  0/ 0:	9.47971	3.10847	4.74758	4.75377 
10.2238
Length   64, n   32, alignment  0/ 0:	8.45634	2.34747	2.59248	2.82356 
9.42305
Length   16, n   32, alignment  6/ 4:	9.37784	3.05067	6.92384	9.47727 
10.1826
Length   64, n   32, alignment  6/ 4:	9.89233	2.77968	4.63672	7.09838 
10.2804
Length   32, n   64, alignment  0/ 0:	11.0813	3.71086	4.43777	5.3549	12.2048
Length  128, n   64, alignment  0/ 0:	9.25192	3.20123	3.53388	4.50794 
10.1934
Length   32, n   64, alignment  5/ 6:	12.5099	3.75871	7.29613	9.64902 
13.5821
Length  128, n   64, alignment  5/ 6:	11.6115	3.60165	5.71818	9.07288 
12.7929
Length   64, n  128, alignment  0/ 0:	12.3671	4.80754	5.46926	6.84492 
14.9238
Length  256, n  128, alignment  0/ 0:	8.08427	4.52607	6.47996	5.92086	11.701
Length   64, n  128, alignment  4/ 0:	12.5692	4.89717	7.11058	10.472	15.875
Length  256, n  128, alignment  4/ 0:	12.2945	4.94163	7.11645	12.3831 
16.6219
Length  128, n  256, alignment  0/ 0:	13.8948	7.28911	7.78784	9.30215 
17.0358
Length  512, n  256, alignment  0/ 0:	10.5266	6.56481	9.14202	9.31096 
20.0531
Length  128, n  256, alignment  3/ 2:	16.3534	7.46332	9.90009	18.5282 
19.5969
Length  512, n  256, alignment  3/ 2:	17.0519	7.09947	10.1635	23.5411 
25.0043
Length  256, n  512, alignment  0/ 0:	15.8935	12.6195	14.0756	14.7553 
28.5299
Length 1024, n  512, alignment  0/ 0:	16.3758	10.8028	16.5447	16.8966 
37.8653
Length  256, n  512, alignment  2/ 4:	21.16	13.2779	14.3088	26.4475	30.1647
Length 1024, n  512, alignment  2/ 4:	25.3364	12.0899	17.5443	42.7216 
47.5803
Length  512, n 1024, alignment  0/ 0:	20.5111	22.9782	19.6648	21.3857 
42.4801
Length 2048, n 1024, alignment  0/ 0:	28.4023	19.1577	36.9065	35.4799 
68.3555
Length  512, n 1024, alignment  1/ 6:	29.9694	24.3087	22.0513	46.7436 
51.5908
Length 2048, n 1024, alignment  1/ 6:	42.9897	21.5402	38.739	78.3266	84.3956


More information about the Libc-alpha mailing list