--- Begin Message ---
- From: Olaf Lenz <olenz at Physik dot Uni-Bielefeld dot DE>
- To: Giulio Bottazzi <giulio dot bottazzi at libero dot it>
- Date: Tue, 19 Oct 2004 13:36:13 +0200
- Subject: Re: random variate from power exponential distribution: continue
- References: <20040926144946.2e6fd425.giulio.bottazzi@libero.it> <20040927160008.GI22210@austin.ibm.com> <20040927182502.402ceda9.giulio.bottazzi@libero.it> <20040927165234.GK22210@austin.ibm.com> <20040927202044.22929898.giulio.bottazzi@libero.it> <16730.58010.538409.26222@network-theory.co.uk> <20040930100652.23464fbc.giulio.bottazzi@libero.it> <16733.35626.396867.859567@network-theory.co.uk> <20041018184332.654fb99b.giulio.bottazzi@libero.it>
-----BEGIN PGP SIGNED MESSAGE-----
Hash: SHA1
Hello!
| I hope that my surprise about this finding is due to my ignorance and
| that someone could easily explain me the reason of these differences
| across systems. It would be nice if someone can run the attached program
| on his/her machines (just compile it with the line above and issue the
| command "./test" without options) and let me know about the results.
I've got some thing to say on the architecture struggle... I'm working
with a (self-programmed) Monte-Carlo simulation, and I noticed that the
simulation is running MUCH faster (more than double speed!) on an AMD
processor than on a (comparable) Intel processor. Therefore I was
interested in your problem.
First a simple remark: the timing procedure you use is not optimal. On a
multiuser system, you would get different timings each time you run the
test program. Better would be to use the times(2) function from
"sys/times.h" and to use the user time field tms_utime.
In my case this was necessary, therefore I've modified your code
accordingly (modified version of your code is attached).
I've tested on three platforms:
- - AMD Athlon(TM) XP 2400+, SuSE Linux 9.1, gcc 3.3.3
- - Intel(R) Pentium(R) 4 CPU 2.60GHz, SuSE Linux 9.1, gcc 3.3.3
- - Power 4+, AIX 5.2.0.0, xlc (native compiler)
- - Alpha EV 6.7 667 MHz, Tru64 4.0e, gcc 2.95.2
- - Sun UltraSparc whateverversion, Solaris 9, gcc 3.2
I've attached the results in the file results.txt.
The Intel/AMD benchmarks are basically identical to your results. For
the other platforms, the results are very different. Just look into the
results. I'm afraid it's hard to give any general advice on which method
to choose on which platform.
What strikes me as most remarkable is that the AMD CPU actually seems to
be ~30% faster than the Intel CPU, even though the Athlon has a lower
clock rate! This is not your original problem, but I think this is very
interesting. I've tested compiling with "-march=pentium4" and
"-march=athlon-xp" options, but this doesn't change the results
significantly.
Furthermore, I've written a simpler test that draws 100 million random
numbers from the gsl rngs. This test program (attached as test_rng.c)
also runs ~30% faster on the AMD (3.37 sec) than on the Pentium 4 (4.27
sec). It seems as if the AMD is better on integer operations than the
Pentium 4? Can someone confirm this?
Olaf
-----BEGIN PGP SIGNATURE-----
Version: GnuPG v1.2.4 (GNU/Linux)
Comment: Using GnuPG with Thunderbird - http://enigmail.mozdev.org
iD8DBQFBdPwstQ3riQ3oo/oRAvPjAKCgmPNnFzlTvnrZrwQsmGrVnDNe1gCgimH3
j4hweUtbQvITUrpwRnbS8jI=
=2xst
-----END PGP SIGNATURE-----
/* Giulio Bottazzi Mon Oct 18 2004*/
#define _GNU_SOURCE
#include <math.h>
#include <gsl/gsl_math.h>
#include <gsl/gsl_sf_gamma.h>
#include <gsl/gsl_rng.h>
#include <gsl/gsl_randist.h>
#include <sys/times.h>
#include <unistd.h>
/* Rejection method using Laplace distribution */
double
exppow_ED(const gsl_rng * r, const double a, const double b)
{
const double A = 1./b ;
const double B = pow(A,A) ;
double x,y ;
do{
x = gsl_ran_laplace(r,B) ;
y = gsl_rng_uniform (r) ;
}
while(log(y) > -pow(fabs(x),b)+fabs(x)/B-1.+A);
return a*x ;
}
/* Rejection method using Gaussian distribution */
double
exppow_EN(const gsl_rng * r, const double a, const double b)
{
const double A = 1./b ;
const double B = pow(A,A) ;
double x,y ;
do{
x = gsl_ran_gaussian_ratio_method (r,B) ;
y = gsl_rng_uniform (r) ;
}
while(log(y) > -pow(fabs(x),b)+x*x/(2.*B*B)+A-.5) ;
return a*x ;
}
/* transformation of Gamma variate */
double
exppow_gamma(const gsl_rng * r, const double a, const double b)
{
double u = gsl_rng_uniform (r) ;
double b_rec = 1.0/b;
double v = gsl_ran_gamma (r, b_rec, 1.0) ;
double z = a * pow(v, b_rec) ;
if (u > 0.5)
{
return z ;
}
else
{
return -z ;
}
}
int
main(int argc,char* argv[])
{
unsigned i,j;
/* parameters */
double a=1;
double b=1;
unsigned int n=1000000;
/* GSL RNG */
gsl_rng * rng;
long seed=0; /* the seed of the RNG */
/* time */
struct tms start, end;
double gamma_time_used,ED_time_used,EN_time_used;
/* b values */
const double bvals[]
= {.5,.7,.9,
1.001,1.25,1.5,1.75,
2.001,2.25,2.5,2.75,
3.,3.5,4.,5.,6.,8.,10
};
const unsigned bnum=17;
const unsigned clocks_per_sec = sysconf(_SC_CLK_TCK);
/* COMMAND LINE PROCESSING */
/* variables for reading command line options */
/* ------------------------------------------ */
char opt;
extern char *optarg;
/* extern int optind, opterr, optopt; */
extern int optopt;
/* ------------------------------------------ */
/* read the command line */
while((opt=getopt(argc,argv,"a:N:R:"))!=EOF){
if(opt=='?'){
fprintf(stderr,"option %c not recognized\n",optopt);
return(-1);
}
else if(opt=='N'){
/*number of rv to generate*/
n= (unsigned) atoi(optarg);
}
else if(opt=='a'){
a=atof(optarg);
}
else if(opt=='R'){
seed= (long) atoi(optarg);
}
else if(opt=='h'){
/*print short help*/
printf("Compare method to generate rv\n\n");
printf("Usage: %s [options]\n",argv[0]);
printf(" Options:\n");
printf("-N\tnumber of points [100]\n");
printf("-a\tset the scale [1.0]\n");
printf("-R\tset the seed [0]\n");
exit(0);
}
}
/* initialize RNG */
rng = gsl_rng_alloc (gsl_rng_mt19937);
gsl_rng_set (rng,seed);
printf("#b\tGS\tED\tEN\n");
for(j=0;j<bnum;j++){
const double b = bvals[j];
/* benchmarking gamma */
times(&start);
for (i=0;i<n;i++){
exppow_gamma(rng,a,b);
}
times(&end);
gamma_time_used = ((double) (end.tms_utime - start.tms_utime)) / clocks_per_sec;
if(b>=1) { /* benchmarking ED ; valid for b>=1*/
times(&start);
for (i=0;i<n;i++){
exppow_ED(rng,a,b);
}
times(&end);
ED_time_used = ((double) (end.tms_utime - start.tms_utime)) / clocks_per_sec;
}
else ED_time_used=GSL_NAN;
if(b>=2){/* benchmarking EN ; valid for b>=2 */
times(&start);
for (i=0;i<n;i++){
exppow_EN(rng,a,b);
}
times(&end);
EN_time_used = ((double) (end.tms_utime - start.tms_utime)) / clocks_per_sec;
}
else EN_time_used = GSL_NAN;
/* print results */
printf("%.3f\t%.3f\t%.3f\t%.3f\n",b,
gamma_time_used,ED_time_used,EN_time_used);
}
}
#include <gsl/gsl_rng.h>
#include <sys/times.h>
#include <unistd.h>
const int N = 100000000; /* the number of rn to be drawn */
const long seed = 123456; /* the seed of the RNG */
int
main(int argc,char* argv[])
{
unsigned i;
double sum;
/* GSL RNG */
gsl_rng * rng;
/* times */
struct tms start, end;
double time_used;
const unsigned clocks_per_sec = sysconf(_SC_CLK_TCK);
/* initialize RNG */
rng = gsl_rng_alloc (gsl_rng_mt19937);
gsl_rng_set (rng,seed);
times(&start);
for (i = 0; i < N; i++)
sum += gsl_rng_uniform(rng) * 2.0 - 1.0;
times(&end);
time_used = ((double) (end.tms_utime - start.tms_utime)) / clocks_per_sec;
/* print results */
printf("%.3f\n", time_used);
}
Intel:
#b GS ED EN best
0.500 0.460 nan nan GS
0.700 1.380 nan nan
0.900 1.280 nan nan
1.001 1.160 1.040 nan ED
1.250 1.170 1.040 nan
1.500 1.170 1.110 nan
1.750 1.150 1.150 nan
2.001 1.130 1.100 0.970 EN
2.250 1.120 1.160 1.020
2.500 1.120 1.190 1.050
2.750 1.100 1.230 1.080
3.000 1.100 1.030 0.940
3.500 1.080 1.300 1.150 GS
4.000 1.080 1.230 1.130
5.000 1.050 1.260 1.140
6.000 1.050 1.310 1.210
8.000 1.030 1.390 1.270
Athlon:
#b GS ED EN best
0.500 0.440 nan nan GS
0.700 1.050 nan nan
0.900 0.950 nan nan
1.001 0.880 0.800 nan ED
1.250 0.880 0.860 nan
1.500 0.870 0.910 nan GS
1.750 0.860 0.950 nan
2.001 0.860 0.960 0.900
2.250 0.840 0.990 0.940
2.500 0.830 1.030 0.970
2.750 0.810 1.030 0.970
3.000 0.830 0.940 0.900
3.500 0.810 1.140 1.070
4.000 0.810 0.980 0.950
5.000 0.780 1.040 1.000
6.000 0.780 1.100 1.060
8.000 0.770 1.140 1.100
Power 4+:
#b GS ED EN best
0.500 0.520 NANQ NANQ GS
0.700 1.320 NANQ NANQ
0.900 1.220 NANQ NANQ
1.001 1.170 0.970 NANQ ED
1.250 1.190 1.050 NANQ
1.500 1.170 1.100 NANQ
1.750 1.170 1.160 NANQ
2.001 1.160 1.200 1.150 EN
2.250 1.150 1.260 1.210 GS
2.500 1.140 1.290 1.240
2.750 1.130 1.310 1.260
3.000 1.120 1.350 1.280
3.500 1.110 1.390 1.340
4.000 1.090 1.420 1.370
5.000 1.080 1.530 1.350
6.000 1.060 1.570 1.500
8.000 1.040 1.650 1.570
Alpha:
#b GS ED EN best
0.500 0.617 NaNQ NaNQ GS
0.700 1.133 NaNQ NaNQ
0.900 1.050 NaNQ NaNQ
1.001 1.000 0.700 NaNQ ED
1.250 1.000 0.767 NaNQ
1.500 1.000 0.800 NaNQ
1.750 1.000 0.850 NaNQ
2.001 0.967 0.883 0.833 EN
2.250 0.967 0.917 0.867
2.500 0.950 0.950 0.883
2.750 0.950 0.967 0.917
3.000 0.933 1.017 0.917
3.500 0.933 1.033 0.967 GS
4.000 0.917 1.083 1.000
5.000 0.900 1.150 1.067
6.000 0.883 1.183 1.117
8.000 0.867 1.267 1.167
Sun:
#b GS ED EN best
0.500 2.140 NaN NaN GS
0.700 4.660 NaN NaN
0.900 4.250 NaN NaN
1.001 4.010 2.400 NaN ED
1.250 4.060 2.610 NaN
1.500 4.050 2.760 NaN
1.750 4.020 2.880 NaN
2.001 3.970 3.000 3.250
2.250 3.930 3.100 3.360
2.500 3.890 3.190 3.480
2.750 3.850 3.280 3.560
3.000 3.830 3.360 3.650
3.500 3.770 3.490 3.800
4.000 3.730 3.610 3.940 GS
5.000 3.660 3.800 4.130
6.000 3.610 3.950 4.300
8.000 3.550 4.180 4.560
--- End Message ---