[PATCH] Don't use SSE4_2 instructions on Intel Silvermont Micro Architecture.

Ondřej Bílka neleai@seznam.cz
Mon Jun 24 12:38:00 GMT 2013


On Fri, Jun 21, 2013 at 03:06:19PM +0200, Andi Kleen wrote:
> > To see if this is a case I added Andi. Andi, could you browse sources
> > and tell if you think that benchtests are adequate to measure
> > performance?
> 
> It's hard to say in general and just from review.
> 
> Normally I would recommend to not run the tests in a tight loop,
> but have some cache / branch prediction thrashing code in between
> to measure "cache cold" behaviour too.
> 
> One objective approach to determine how good they are would be
> to assemble benchmark tests on a set of non micro applications that are known
> to be sensitive to memcpy/etc. performance (e.g. according to 
> perf sampling). Then do a set of results for them plus run the benchmarks.
> 

I tried to cross check my running time on data collected when I ran computer 
and recorded memcpy calls, here
kam.mff.cuni.cz/~ondra/dryrun_memcpy.tar.bz2
and got positive results but I cannot fully replicate environment.

Then I tried to measure running time of code where I measure memcpy with 
attached scripts and results so far are not very conclusive(see below).

Unless I LD_PRELOAD a byte-by-byte variant of memcpy which increases
running time by around 50%

> Change the memcpy et.al.  to make it slower (for example or faster if you have
> a good way). Rerun the applications. Rerun the benchmarks.
> 
> Then see how the two sets of benchmark results correlate.
> 
This is bit problematic. A benchmarks that are currently in libc measure
minimum over 32 runs on same data. I am not sure how should it correlate
with my benchmark/program running time.

> This would be significant work of course.
> 
> -Andi

core2
Var1 mean: 1315.415459 standard error: 3.041025
Var2 mean: 1317.700483 standard error: 3.453736
Var3 mean: 1318.371981 standard error: 4.607385
nehalem
Var1 mean: 1243.429864 standard error: 2.983470
Var2 mean: 1239.262443 standard error: 2.217374
Var3 mean: 1242.190045 standard error: 2.989906
ivy_bridge
Var1 mean: 1158.254545 standard error: 4.059097
Var2 mean: 1170.781818 standard error: 6.427400
Var3 mean: 1155.736364 standard error: 3.514919
fx10
Var1 mean: 1457.435294 standard error: 5.376727
Var2 mean: 1460.470588 standard error: 5.031461
Var3 mean: 1461.894118 standard error: 6.820933


-------------- next part --------------
tot1=0
tot2=0
tot3=0
for I in `seq 1 50000`; do
ts=$(date +%s%N); LD_PRELOAD=~/memcpy_profile/memcpy_empty.so $* >/dev/null 2>/dev/null ;tt=$((($(date +%s%N) - $ts)/1000000)) ; echo -n "	$tt";
tot1=$((($tot1 + $tt)))

ts=$(date +%s%N); LD_PRELOAD=~/memcpy_profile/memcpy_new_small.so  $* >/dev/null 2>/dev/null ;tt=$((($(date +%s%N) - $ts)/1000000)) ; echo -n "	$tt";
tot2=$((($tot2 + $tt)))

ts=$(date +%s%N); LD_PRELOAD=~/memcpy_profile/memcpy_new.so  $* >/dev/null 2>/dev/null ; tt=$((($(date +%s%N) - $ts)/1000000)) ; echo -n "	$tt";
tot3=$((($tot3 + $tt)))
echo "	$tot1	$tot2	$tot3" 
done

-------------- next part --------------
#include <stdio.h>
#include <stdlib.h>
#include <math.h>
typedef struct {
  int n;
  double mean;
  double M2;
} vars;

void init_data(vars*v){
 v->n=0;
 v->mean=0;
 v->M2=0;
}
void add_data(vars* v,double data){
  v->n++;
  double delta = data - v->mean;
  v->mean = v->mean + delta/v->n;
  v->M2 = v->M2 + delta*(data - v->mean);
}

int main(){int i;
int t1,t2,t3,tot1,tot2,tot3;
vars v1,v2,v3;
init_data(&v1);init_data(&v2);init_data(&v3);


while(scanf("%i %i %i %i %i %i",&t1,&t2,&t3,&tot1,&tot2,&tot3)!=EOF){
add_data(&v1,t1);add_data(&v2,t2);add_data(&v3,t3);
}
printf("Var1 mean: %f standard error: %f\n",v1.mean,sqrt(v1.M2)/v1.n);
printf("Var2 mean: %f standard error: %f\n",v2.mean,sqrt(v2.M2)/v2.n);
printf("Var3 mean: %f standard error: %f\n",v3.mean,sqrt(v3.M2)/v3.n);

}
-------------- next part --------------
for I in core2 nehalem ivy_bridge fx10; do
echo "$I"
./a.out < kam/memcpy_profile_$I/res
done

-------------- next part --------------
	1141	1175	1160	1141	1175	1160
	1150	1145	1146	2291	2320	2306
	1139	1129	1156	3430	3449	3462
	1120	1158	1157	4550	4607	4619
	1138	1142	1132	5688	5749	5751
	1148	1156	1158	6836	6905	6909
	1148	1158	1170	7984	8063	8079
	1137	1157	1137	9121	9220	9216
	1150	1147	1156	10271	10367	10372
	1167	1309	1167	11438	11676	11539
	1150	1167	1146	12588	12843	12685
	1159	1147	1157	13747	13990	13842
	1162	1167	1157	14909	15157	14999
	1167	1168	1159	16076	16325	16158
	1146	1162	1175	17222	17487	17333
	1157	1149	1187	18379	18636	18520
	1174	1158	1151	19553	19794	19671
	1138	1150	1146	20691	20944	20817
	1145	1136	1212	21836	22080	22029
	1167	1127	1150	23003	23207	23179
	1146	1287	1167	24149	24494	24346
	1148	1151	1154	25297	25645	25500
	1137	1128	1177	26434	26773	26677
	1158	1170	1164	27592	27943	27841
	1229	1140	1129	28821	29083	28970
	1155	1157	1174	29976	30240	30144
	1154	1155	1127	31130	31395	31271
	1161	1154	1157	32291	32549	32428
	1159	1127	1158	33450	33676	33586
	1128	1160	1157	34578	34836	34743
	1096	1098	1137	35674	35934	35880
	1148	1130	1174	36822	37064	37054
	1128	1155	1153	37950	38219	38207
	1097	1147	1148	39047	39366	39355
	1145	1159	1162	40192	40525	40517
	1135	1154	1140	41327	41679	41657
	1155	1162	1466	42482	42841	43123
	1138	1157	1147	43620	43998	44270
	1131	1356	1148	44751	45354	45418
	1137	1165	1130	45888	46519	46548
	1177	1177	1140	47065	47696	47688
	1118	1149	1156	48183	48845	48844
	1157	1177	1139	49340	50022	49983
	1159	1145	1160	50499	51167	51143
	1137	1338	1168	51636	52505	52311
	1176	1199	1155	52812	53704	53466
	1152	1145	1117	53964	54849	54583
	1158	1348	1168	55122	56197	55751
	1138	1157	1129	56260	57354	56880
	1124	1151	1166	57384	58505	58046
	1148	1130	1157	58532	59635	59203
	1146	1139	1265	59678	60774	60468
	1170	1160	1146	60848	61934	61614
	1359	1145	1165	62207	63079	62779
	1153	1577	1159	63360	64656	63938
	1154	1168	1169	64514	65824	65107
	1129	1138	1159	65643	66962	66266
	1168	1124	1148	66811	68086	67414
	1150	1166	1151	67961	69252	68565
	1165	1148	1161	69126	70400	69726
	1158	1143	1145	70284	71543	70871
	1144	1055	1042	71428	72598	71913
	1116	1149	1159	72544	73747	73072
	1169	1167	1172	73713	74914	74244
	1172	1149	1163	74885	76063	75407
	1153	1135	1167	76038	77198	76574
	1356	1158	1149	77394	78356	77723
	1158	1149	1138	78552	79505	78861
	1169	1350	1128	79721	80855	79989
	1178	1138	1148	80899	81993	81137
	1142	1142	1159	82041	83135	82296
	1127	1155	1131	83168	84290	83427
	1176	1360	1136	84344	85650	84563
	1148	1146	1156	85492	86796	85719
	1140	1146	1169	86632	87942	86888
	1107	1360	1085	87739	89302	87973
	1148	1145	1159	88887	90447	89132
	1130	1148	1175	90017	91595	90307
	1148	1139	1147	91165	92734	91454
	1128	1149	1145	92293	93883	92599
	1159	1147	1150	93452	95030	93749
	1155	1170	1168	94607	96200	94917
	1149	1156	1157	95756	97356	96074
	1166	1167	1159	96922	98523	97233
	1348	1156	1150	98270	99679	98383
	1147	1138	1139	99417	100817	99522
	1145	1169	1157	100562	101986	100679
	1159	1170	1165	101721	103156	101844
	1158	1147	1138	102879	104303	102982
	1167	1158	1157	104046	105461	104139
	1168	1149	1156	105214	106610	105295
	1159	1160	1136	106373	107770	106431
	1149	1166	1155	107522	108936	107586
	1152	1156	1156	108674	110092	108742
	1161	1157	1167	109835	111249	109909
	1128	1168	1148	110963	112417	111057
	1159	1149	1145	112122	113566	112202
	1168	1166	1140	113290	114732	113342
	1357	1170	1168	114647	115902	114510
	1145	1137	1166	115792	117039	115676
	1139	1161	1155	116931	118200	116831
	1218	1158	1139	118149	119358	117970
	1165	1158	1170	119314	120516	119140
	1168	1138	1147	120482	121654	120287
	1167	1157	1167	121649	122811	121454
	1148	1130	1135	122797	123941	122589
	1170	1155	1128	123967	125096	123717
	1156	1163	1116	125123	126259	124833
	1149	1366	1149	126272	127625	125982
	1136	1161	1154	127408	128786	127136
	1167	1169	1167	128575	129955	128303
	1157	1160	1156	129732	131115	129459
	1108	1148	1128	130840	132263	130587
	1150	1165	1150	131990	133428	131737
	1148	1159	1143	133138	134587	132880
	1163	1144	1151	134301	135731	134031
	1146	1160	1155	135447	136891	135186
	1325	1173	1177	136772	138064	136363
	1126	1150	1156	137898	139214	137519
	1165	1162	1168	139063	140376	138687
	1145	1158	1137	140208	141534	139824
	1155	1151	1167	141363	142685	140991
	1166	1170	1148	142529	143855	142139
	1157	1160	1137	143686	145015	143276
	1146	1171	1134	144832	146186	144410
	1156	1181	1149	145988	147367	145559
	1162	1161	1160	147150	148528	146719
	1087	1153	1142	148237	149681	147861
	1151	1137	1155	149388	150818	149016
	1157	1158	1357	150545	151976	150373
	1160	1167	1155	151705	153143	151528
	1129	1175	1150	152834	154318	152678
	1171	1173	1159	154005	155491	153837
	1169	1326	1139	155174	156817	154976
	1165	1156	1131	156339	157973	156107
	1158	1147	1129	157497	159120	157236
	1159	1155	1160	158656	160275	158396
	1168	1147	1139	159824	161422	159535
	1157	1139	1168	160981	162561	160703
	1126	1139	1165	162107	163700	161868
	1170	1138	1135	163277	164838	163003
	1179	1163	1151	164456	166001	164154
	1147	1119	1158	165603	167120	165312
	1146	1138	1167	166749	168258	166479
	1159	1161	1135	167908	169419	167614
	1167	1170	1166	169075	170589	168780
	1148	1138	1146	170223	171727	169926
	1152	1035	1080	171375	172762	171006
	1069	1146	1230	172444	173908	172236
	1163	1168	1169	173607	175076	173405
	1171	1136	1157	174778	176212	174562
	1166	1158	1149	175944	177370	175711
	1356	1159	1165	177300	178529	176876
	1148	1148	1138	178448	179677	178014
	1156	1174	1147	179604	180851	179161
	1157	1342	1154	180761	182193	180315
	1146	1157	1148	181907	183350	181463
	1170	1156	1154	183077	184506	182617
	1102	1156	1159	184179	185662	183776
	1158	1148	1156	185337	186810	184932
	1158	1157	1169	186495	187967	186101
	1166	1147	1149	187661	189114	187250
	1160	1145	1148	188821	190259	188398
	1140	1137	1148	189961	191396	189546
	1155	1156	1168	191116	192552	190714
	1138	1131	1124	192254	193683	191838
	1150	1157	1157	193404	194840	192995
	1161	1158	1145	194565	195998	194140
	1170	1166	1149	195735	197164	195289
	1345	1158	1167	197080	198322	196456
	1131	1167	1168	198211	199489	197624
	1356	1157	1148	199567	200646	198772
	1140	1155	1149	200707	201801	199921
	1167	1170	1173	201874	202971	201094
	1151	1160	1335	203025	204131	202429
	1141	1164	1151	204166	205295	203580
	1145	1159	1207	205311	206454	204787
	1147	1139	1148	206458	207593	205935
	1160	1136	1157	207618	208729	207092
	1156	1171	1134	208774	209900	208226
	1159	1168	1120	209933	211068	209346
	1156	1150	1157	211089	212218	210503
	1155	1166	1120	212244	213384	211623
	1147	1140	1154	213391	214524	212777
	1139	1140	1143	214530	215664	213920
	1158	1149	1129	215688	216813	215049
	1149	1156	1168	216837	217969	216217
	1136	1158	1123	217973	219127	217340
	1152	1165	1152	219125	220292	218492
	1167	1198	1140	220292	221490	219632
	1127	1159	1158	221419	222649	220790
	1145	1150	1149	222564	223799	221939
	1157	1145	1148	223721	224944	223087
	1158	1157	1169	224879	226101	224256
	1160	1135	1169	226039	227236	225425
	1167


More information about the Libc-alpha mailing list