cat > a.c <<\EOF cat /tmp/a.c #include const char *libs[] = { "libvclplug_gtk680lx.so", "libvclplug_gen680lx.so", "libnss_files.so.2", "libGL.so.1", "servicemgr.uno.so", "shlibloader.uno.so", "simplereg.uno.so", "nestedreg.uno.so", "typemgr.uno.so", "implreg.uno.so", "security.uno.so", "libreg.so.3", "libstore.so.3", "regtypeprov.uno.so", "configmgr2.uno.so", "typeconverter.uno.so", "gconfbe1.uno.so", "behelper.uno.so", "sax.uno.so", "localebe1.uno.so", "uriproc.uno.so", "libspl680lx.so", "libucb1.so", "ucpgvfs1.uno.so", "libgcc3_uno.so", "libpackage2.so", "libfileacc.so", "libuui680lx.so", "libfilterconfig1.so", "libdtransX11680lx.so", "i18npool.uno.so", "liblocaledata_en.so", "fsstorage.uno.so", "libxstor.so", "libdbtools680lx.so", "libcups.so.2", "libgnutls.so.13", "libgcrypt.so.11", "libgpg-error.so.0", "libmcnttype.so", "libucpchelp1.so", "svtmisc.uno.so" }; int main (int argc, char **argv) { int i; void *h; int flags = RTLD_LAZY; if (argv[1][0] == 'g') flags |= RTLD_GLOBAL; for (i = 0; i < sizeof (libs) / sizeof (libs[0]); ++i) h = dlopen (libs[i], flags); return 0; } EOF gcc -g -O2 -o a a.c -Wl,-rpath,/usr/lib64/openoffice.org2.0/program/ \ -L/usr/lib64/openoffice.org2.0/program/ -lsoffice -lsw680lx -lsvx680lx -lstdc++ -lm -shared-libgcc for V in local global; do for M in '' 'export LD_X=1' 'export LD_BIND_NOW=1' 'export LD_X=1 LD_BIND_NOW=1'; \ do ( for i in 1 2 3 4; do eval $M; time ./a $V; done 2>&1 > /dev/null | \ awk 'BEGIN { printf "'"$V $M"'\t" } /^real/ { printf "%s ", $2 } END { printf "\n" }' ); done; done local 0m0.264s 0m0.253s 0m0.256s 0m0.256s local export LD_X=1 0m0.544s 0m0.538s 0m0.538s 0m0.537s local export LD_BIND_NOW=1 0m0.480s 0m0.474s 0m0.477s 0m0.480s local export LD_X=1 LD_BIND_NOW=1 0m1.102s 0m1.094s 0m1.096s 0m1.095s global 0m0.301s 0m0.299s 0m0.294s 0m0.294s global export LD_X=1 0m0.625s 0m0.619s 0m0.619s 0m0.618s global export LD_BIND_NOW=1 0m0.553s 0m0.546s 0m0.544s 0m0.544s global export LD_X=1 LD_BIND_NOW=1 0m1.251s 0m1.245s 0m1.244s 0m1.243s for V in local global; do for M in '' 'export LD_X=1' 'export LD_BIND_NOW=1' 'export LD_X=1 LD_BIND_NOW=1'; \ do ( echo "$V $M"; eval $M; valgrind --tool=cachegrind ./a $V 2>&1 > /dev/null | sed -n '/== I refs/,$p' ); \ done; done local ==11628== I refs: 213,572,489 ==11628== I1 misses: 11,630 ==11628== L2i misses: 10,103 ==11628== I1 miss rate: 0.00% ==11628== L2i miss rate: 0.00% ==11628== ==11628== D refs: 78,630,135 (62,272,247 rd + 16,357,888 wr) ==11628== D1 misses: 4,699,115 ( 4,544,371 rd + 154,744 wr) ==11628== L2d misses: 643,429 ( 549,365 rd + 94,064 wr) ==11628== D1 miss rate: 5.9% ( 7.2% + 0.9% ) ==11628== L2d miss rate: 0.8% ( 0.8% + 0.5% ) ==11628== ==11628== L2 refs: 4,710,745 ( 4,556,001 rd + 154,744 wr) ==11628== L2 misses: 653,532 ( 559,468 rd + 94,064 wr) ==11628== L2 miss rate: 0.2% ( 0.2% + 0.5% ) local export LD_X=1 ==11632== I refs: 306,655,479 ==11632== I1 misses: 11,612 ==11632== L2i misses: 10,459 ==11632== I1 miss rate: 0.00% ==11632== L2i miss rate: 0.00% ==11632== ==11632== D refs: 129,271,101 (99,462,385 rd + 29,808,716 wr) ==11632== D1 misses: 9,739,970 ( 9,576,214 rd + 163,756 wr) ==11632== L2d misses: 3,035,531 ( 2,930,229 rd + 105,302 wr) ==11632== D1 miss rate: 7.5% ( 9.6% + 0.5% ) ==11632== L2d miss rate: 2.3% ( 2.9% + 0.3% ) ==11632== ==11632== L2 refs: 9,751,582 ( 9,587,826 rd + 163,756 wr) ==11632== L2 misses: 3,045,990 ( 2,940,688 rd + 105,302 wr) ==11632== L2 miss rate: 0.6% ( 0.7% + 0.3% ) local export LD_BIND_NOW=1 ==11638== I refs: 416,076,941 ==11638== I1 misses: 11,145 ==11638== L2i misses: 9,847 ==11638== I1 miss rate: 0.00% ==11638== L2i miss rate: 0.00% ==11638== ==11638== D refs: 156,764,733 (123,796,220 rd + 32,968,513 wr) ==11638== D1 misses: 9,682,235 ( 9,503,136 rd + 179,099 wr) ==11638== L2d misses: 967,489 ( 865,728 rd + 101,761 wr) ==11638== D1 miss rate: 6.1% ( 7.6% + 0.5% ) ==11638== L2d miss rate: 0.6% ( 0.6% + 0.3% ) ==11638== ==11638== L2 refs: 9,693,380 ( 9,514,281 rd + 179,099 wr) ==11638== L2 misses: 977,336 ( 875,575 rd + 101,761 wr) ==11638== L2 miss rate: 0.1% ( 0.1% + 0.3% ) local export LD_X=1 LD_BIND_NOW=1 ==11643== I refs: 612,287,612 ==11643== I1 misses: 11,141 ==11643== L2i misses: 10,057 ==11643== I1 miss rate: 0.00% ==11643== L2i miss rate: 0.00% ==11643== ==11643== D refs: 264,754,881 (202,680,154 rd + 62,074,727 wr) ==11643== D1 misses: 20,634,045 ( 20,436,902 rd + 197,143 wr) ==11643== L2d misses: 6,327,654 ( 6,214,729 rd + 112,925 wr) ==11643== D1 miss rate: 7.7% ( 10.0% + 0.3% ) ==11643== L2d miss rate: 2.3% ( 3.0% + 0.1% ) ==11643== ==11643== L2 refs: 20,645,186 ( 20,448,043 rd + 197,143 wr) ==11643== L2 misses: 6,337,711 ( 6,224,786 rd + 112,925 wr) ==11643== L2 miss rate: 0.7% ( 0.7% + 0.1% ) global ==11647== I refs: 229,660,039 ==11647== I1 misses: 11,781 ==11647== L2i misses: 10,255 ==11647== I1 miss rate: 0.00% ==11647== L2i miss rate: 0.00% ==11647== ==11647== D refs: 86,649,339 (68,557,134 rd + 18,092,205 wr) ==11647== D1 misses: 6,704,681 ( 6,545,220 rd + 159,461 wr) ==11647== L2d misses: 685,354 ( 590,853 rd + 94,501 wr) ==11647== D1 miss rate: 7.7% ( 9.5% + 0.8% ) ==11647== L2d miss rate: 0.7% ( 0.8% + 0.5% ) ==11647== ==11647== L2 refs: 6,716,462 ( 6,557,001 rd + 159,461 wr) ==11647== L2 misses: 695,609 ( 601,108 rd + 94,501 wr) ==11647== L2 miss rate: 0.2% ( 0.2% + 0.5% ) global export LD_X=1 ==11651== I refs: 331,688,345 ==11651== I1 misses: 11,730 ==11651== L2i misses: 10,602 ==11651== I1 miss rate: 0.00% ==11651== L2i miss rate: 0.00% ==11651== ==11651== D refs: 142,641,436 (109,595,921 rd + 33,045,515 wr) ==11651== D1 misses: 12,232,659 ( 12,067,731 rd + 164,928 wr) ==11651== L2d misses: 3,522,116 ( 3,416,331 rd + 105,785 wr) ==11651== D1 miss rate: 8.5% ( 11.0% + 0.4% ) ==11651== L2d miss rate: 2.4% ( 3.1% + 0.3% ) ==11651== ==11651== L2 refs: 12,244,389 ( 12,079,461 rd + 164,928 wr) ==11651== L2 misses: 3,532,718 ( 3,426,933 rd + 105,785 wr) ==11651== L2 miss rate: 0.7% ( 0.7% + 0.3% ) global export LD_BIND_NOW=1 ==11656== I refs: 445,261,358 ==11656== I1 misses: 11,280 ==11656== L2i misses: 9,978 ==11656== I1 miss rate: 0.00% ==11656== L2i miss rate: 0.00% ==11656== ==11656== D refs: 171,275,049 (135,170,564 rd + 36,104,485 wr) ==11656== D1 misses: 13,300,976 ( 13,111,867 rd + 189,109 wr) ==11656== L2d misses: 1,045,200 ( 943,012 rd + 102,188 wr) ==11656== D1 miss rate: 7.7% ( 9.7% + 0.5% ) ==11656== L2d miss rate: 0.6% ( 0.6% + 0.2% ) ==11656== ==11656== L2 refs: 13,312,256 ( 13,123,147 rd + 189,109 wr) ==11656== L2 misses: 1,055,178 ( 952,990 rd + 102,188 wr) ==11656== L2 miss rate: 0.1% ( 0.1% + 0.2% ) global export LD_X=1 LD_BIND_NOW=1 ==11660== I refs: 657,215,295 ==11660== I1 misses: 11,238 ==11660== L2i misses: 10,165 ==11660== I1 miss rate: 0.00% ==11660== L2i miss rate: 0.00% ==11660== ==11660== D refs: 288,810,775 (220,892,186 rd + 67,918,589 wr) ==11660== D1 misses: 25,132,151 ( 24,931,250 rd + 200,901 wr) ==11660== L2d misses: 7,240,360 ( 7,126,874 rd + 113,486 wr) ==11660== D1 miss rate: 8.7% ( 11.2% + 0.2% ) ==11660== L2d miss rate: 2.5% ( 3.2% + 0.1% ) ==11660== ==11660== L2 refs: 25,143,389 ( 24,942,488 rd + 200,901 wr) ==11660== L2 misses: 7,250,525 ( 7,137,039 rd + 113,486 wr) ==11660== L2 miss rate: 0.7% ( 0.8% + 0.1% ) for V in local global; do for M in '' '-E LD_X=1' '-E LD_BIND_NOW=1' '-E LD_X=1 -E LD_BIND_NOW=1'; \ do ( echo "$V $M"; ./timing $M ./a $V ); done; done local Strip out best and worst realtime result minimum: 0.252914000 sec real / 0.000051294 sec CPU maximum: 0.269686000 sec real / 0.000083306 sec CPU average: 0.254617928 sec real / 0.000071702 sec CPU stdev : 0.000890554 sec real / 0.000003730 sec CPU local -E LD_X=1 optarg="LD_X=1" Strip out best and worst realtime result minimum: 0.536379000 sec real / 0.000050866 sec CPU maximum: 0.539256000 sec real / 0.000079972 sec CPU average: 0.537778428 sec real / 0.000074764 sec CPU stdev : 0.000612980 sec real / 0.000002034 sec CPU local -E LD_BIND_NOW=1 optarg="LD_BIND_NOW=1" Strip out best and worst realtime result minimum: 0.470151000 sec real / 0.000053946 sec CPU maximum: 0.481664000 sec real / 0.000084505 sec CPU average: 0.473882142 sec real / 0.000073921 sec CPU stdev : 0.001887639 sec real / 0.000002616 sec CPU local -E LD_X=1 -E LD_BIND_NOW=1 optarg="LD_X=1" optarg="LD_BIND_NOW=1" Strip out best and worst realtime result minimum: 1.092469000 sec real / 0.000051647 sec CPU maximum: 1.106560000 sec real / 0.000078219 sec CPU average: 1.096268250 sec real / 0.000064646 sec CPU stdev : 0.002515850 sec real / 0.000003027 sec CPU global Strip out best and worst realtime result minimum: 0.294585000 sec real / 0.000050279 sec CPU maximum: 0.304168000 sec real / 0.000078209 sec CPU average: 0.297781285 sec real / 0.000072901 sec CPU stdev : 0.002508159 sec real / 0.000004136 sec CPU global -E LD_X=1 optarg="LD_X=1" Strip out best and worst realtime result minimum: 0.617157000 sec real / 0.000064151 sec CPU maximum: 0.645039000 sec real / 0.000084488 sec CPU average: 0.621962785 sec real / 0.000075530 sec CPU stdev : 0.002484547 sec real / 0.000003147 sec CPU global -E LD_BIND_NOW=1 optarg="LD_BIND_NOW=1" Strip out best and worst realtime result minimum: 0.544103000 sec real / 0.000052304 sec CPU maximum: 0.557447000 sec real / 0.000078790 sec CPU average: 0.548014107 sec real / 0.000073886 sec CPU stdev : 0.002805780 sec real / 0.000002697 sec CPU global -E LD_X=1 -E LD_BIND_NOW=1 optarg="LD_X=1" optarg="LD_BIND_NOW=1" Strip out best and worst realtime result minimum: 1.241722000 sec real / 0.000058554 sec CPU maximum: 1.255916000 sec real / 0.000076953 sec CPU average: 1.247884071 sec real / 0.000063511 sec CPU stdev : 0.003259242 sec real / 0.000002160 sec CPU /usr/sbin/prelink -vmR ./a for V in local global; do for M in '' 'export LD_X=1' 'export LD_BIND_NOW=1' 'export LD_X=1 LD_BIND_NOW=1'; \ do ( for i in 1 2 3 4; do eval $M; time ./a $V; done 2>&1 > /dev/null | \ awk 'BEGIN { printf "'"$V $M"'\t" } /^real/ { printf "%s ", $2 } END { printf "\n" }' ); done; done local 0m0.145s 0m0.138s 0m0.139s 0m0.139s local export LD_X=1 0m0.274s 0m0.268s 0m0.266s 0m0.269s local export LD_BIND_NOW=1 0m0.245s 0m0.238s 0m0.238s 0m0.239s local export LD_X=1 LD_BIND_NOW=1 0m0.504s 0m0.497s 0m0.498s 0m0.496s global 0m0.182s 0m0.175s 0m0.174s 0m0.175s global export LD_X=1 0m0.352s 0m0.357s 0m0.344s 0m0.346s global export LD_BIND_NOW=1 0m0.310s 0m0.305s 0m0.316s 0m0.306s global export LD_X=1 LD_BIND_NOW=1 0m0.647s 0m0.641s 0m0.640s 0m0.640s # valgrind --tool=cachegrind stats not provided for prelinked testcase, # as valgrind apparently uses LD_PRELOAD internally and thus prevents # prelinking. for V in local global; do for M in '' '-E LD_X=1' '-E LD_BIND_NOW=1' '-E LD_X=1 -E LD_BIND_NOW=1'; \ do ( echo "$V $M"; ./timing $M ./a $V ); done; done local Strip out best and worst realtime result minimum: 0.137495000 sec real / 0.000066247 sec CPU maximum: 0.142180000 sec real / 0.000086736 sec CPU average: 0.138369035 sec real / 0.000072997 sec CPU stdev : 0.000575184 sec real / 0.000002132 sec CPU local -E LD_X=1 optarg="LD_X=1" Strip out best and worst realtime result minimum: 0.264590000 sec real / 0.000060576 sec CPU maximum: 0.272804000 sec real / 0.000082688 sec CPU average: 0.266598571 sec real / 0.000072811 sec CPU stdev : 0.001817765 sec real / 0.000003394 sec CPU local -E LD_BIND_NOW=1 optarg="LD_BIND_NOW=1" Strip out best and worst realtime result minimum: 0.236854000 sec real / 0.000065925 sec CPU maximum: 0.245201000 sec real / 0.000080373 sec CPU average: 0.238382678 sec real / 0.000075591 sec CPU stdev : 0.000959453 sec real / 0.000002887 sec CPU local -E LD_X=1 -E LD_BIND_NOW=1 optarg="LD_X=1" optarg="LD_BIND_NOW=1" Strip out best and worst realtime result minimum: 0.496607000 sec real / 0.000065955 sec CPU maximum: 0.512757000 sec real / 0.000084887 sec CPU average: 0.498181678 sec real / 0.000074275 sec CPU stdev : 0.001529594 sec real / 0.000002630 sec CPU global Strip out best and worst realtime result minimum: 0.173740000 sec real / 0.000048699 sec CPU maximum: 0.181163000 sec real / 0.000083410 sec CPU average: 0.175901500 sec real / 0.000070443 sec CPU stdev : 0.001745144 sec real / 0.000003656 sec CPU global -E LD_X=1 optarg="LD_X=1" Strip out best and worst realtime result minimum: 0.344016000 sec real / 0.000058830 sec CPU maximum: 0.377289000 sec real / 0.000076792 sec CPU average: 0.346814392 sec real / 0.000072660 sec CPU stdev : 0.002058835 sec real / 0.000002517 sec CPU global -E LD_BIND_NOW=1 optarg="LD_BIND_NOW=1" Strip out best and worst realtime result minimum: 0.304208000 sec real / 0.000049604 sec CPU maximum: 0.314217000 sec real / 0.000077094 sec CPU average: 0.307348821 sec real / 0.000071335 sec CPU stdev : 0.002641413 sec real / 0.000003427 sec CPU global -E LD_X=1 -E LD_BIND_NOW=1 optarg="LD_X=1" optarg="LD_BIND_NOW=1" Strip out best and worst realtime result minimum: 0.640543000 sec real / 0.000044401 sec CPU maximum: 0.664382000 sec real / 0.000089763 sec CPU average: 0.646539678 sec real / 0.000071135 sec CPU stdev : 0.005879177 sec real / 0.000003697 sec CPU