From 6bff37abacf95b60a1d8d4c83c9e59e42ae1a7b9 Mon Sep 17 00:00:00 2001 From: Josh Stone Date: Wed, 9 Jun 2010 21:44:42 -0400 Subject: [PATCH] Let probe_bench work on archs besides x86_64 The cycles code is too x86_64-centric, but for now at least it's been neutered to let other archs report basic timing numbers. * scripts/probe_bench/bench.stp: Use get_cycles() instead of rdtsc(). * scripts/probe_bench/bench.c (rdtsc): Return 0 unless on x86_64. This needs more work and inline assembly to fit more architectures. --- scripts/probe_perf/bench.c | 11 ++++++++-- scripts/probe_perf/bench.stp | 42 ++++++++++++++++-------------------- 2 files changed, 27 insertions(+), 26 deletions(-) diff --git a/scripts/probe_perf/bench.c b/scripts/probe_perf/bench.c index ec391c875..1cb2ea8c7 100644 --- a/scripts/probe_perf/bench.c +++ b/scripts/probe_perf/bench.c @@ -139,8 +139,12 @@ float zr, zi; long rdtsc(void) __attribute__ ((__noinline__)); long rdtsc(void) { + // XXX this only works on x86_64 + // -- not i386 because the long result is only 32-bit + // -- not other archs because they need other asm +#if defined __x86_64__ int res[2]; - + __asm__ __volatile__ ("xorl %%eax,%%eax \n push %%rbx \n cpuid \n" ::: "%rax", "%rcx", "%rdx"); // read TSC, store edx:eax in res @@ -148,8 +152,11 @@ long rdtsc(void) : "=a" (res[0]), "=d" (res[1]) ); __asm__ __volatile__ ("xorl %%eax,%%eax \n cpuid \n pop %%rbx \n" ::: "%rax", "%rcx", "%rdx"); - + return *(long*)res; +#else + return 0; +#endif } Initrand () diff --git a/scripts/probe_perf/bench.stp b/scripts/probe_perf/bench.stp index 7b9628aae..df4b6a668 100644 --- a/scripts/probe_perf/bench.stp +++ b/scripts/probe_perf/bench.stp @@ -1,86 +1,80 @@ -function rdtsc () %{ - int res[2]; - __asm__ __volatile__ ("rdtsc\n" : "=a" (res[0]), "=d" (res[1]) ); - THIS->__retvalue = *(long*)res; -%} - global process_hits %( @2 == "NO_STAP_SDT" %? probe process(@1).function("*").label("BENCH_PERMUTE") -{ process_hits <<< (rdtsc() - $cycles_start) } +{ process_hits <<< (get_cycles() - $cycles_start) } %: probe process(@1).mark("permute") -{ process_hits <<< (rdtsc() - $arg1) } +{ process_hits <<< (get_cycles() - $arg1) } %) global tower_hits %( @2 == "NO_STAP_SDT" %? probe process(@1).function("*").label("BENCH_TOWER") -{ tower_hits <<< (rdtsc() - $cycles_start) } +{ tower_hits <<< (get_cycles() - $cycles_start) } %: probe process(@1).mark("tower") -{ tower_hits <<< (rdtsc() - $arg1) } +{ tower_hits <<< (get_cycles() - $arg1) } %) global try_hits %( @2 == "NO_STAP_SDT" %? probe process(@1).function("*").label("BENCH_TRY") -{ try_hits <<< (rdtsc() - $cycles_start) } +{ try_hits <<< (get_cycles() - $cycles_start) } %: probe process(@1).mark("try") -{ try_hits <<< (rdtsc() - $arg1) } +{ try_hits <<< (get_cycles() - $arg1) } %) global innerproduct_hits %( @2 == "NO_STAP_SDT" %? probe process(@1).function("*").label("BENCH_INNERPRODUCT") -{ innerproduct_hits <<< (rdtsc() - $cycles_start) } +{ innerproduct_hits <<< (get_cycles() - $cycles_start) } %: probe process(@1).mark("innerproduct") -{ innerproduct_hits <<< (rdtsc() - $arg1) } +{ innerproduct_hits <<< (get_cycles() - $arg1) } %) global trial_hits %( @2 == "NO_STAP_SDT" %? probe process(@1).function("*").label("BENCH_TRIAL") -{ trial_hits <<< (rdtsc() - $cycles_start) } +{ trial_hits <<< (get_cycles() - $cycles_start) } %: probe process(@1).mark("trial") -{ trial_hits <<< (rdtsc() - $arg1) } +{ trial_hits <<< (get_cycles() - $arg1) } %) global quicksort_hits %( @2 == "NO_STAP_SDT" %? probe process(@1).function("*").label("BENCH_QUICKSORT") -{ quicksort_hits <<< (rdtsc() - $cycles_start) } +{ quicksort_hits <<< (get_cycles() - $cycles_start) } %: probe process(@1).mark("quicksort") -{ quicksort_hits <<< (rdtsc() - $arg1) } +{ quicksort_hits <<< (get_cycles() - $arg1) } %) global bubble_hits %( @2 == "NO_STAP_SDT" %? probe process(@1).function("*").label("BENCH_BUBBLE") -{ bubble_hits <<< (rdtsc() - $cycles_start) } +{ bubble_hits <<< (get_cycles() - $cycles_start) } %: probe process(@1).mark("bubble") -{ bubble_hits <<< (rdtsc() - $arg1) } +{ bubble_hits <<< (get_cycles() - $arg1) } %) global insert_hits %( @2 == "NO_STAP_SDT" %? probe process(@1).function("*").label("BENCH_INSERT") -{ insert_hits <<< (rdtsc() - $cycles_start) } +{ insert_hits <<< (get_cycles() - $cycles_start) } %: probe process(@1).mark("insert") -{ insert_hits <<< (rdtsc() - $arg1) } +{ insert_hits <<< (get_cycles() - $arg1) } %) global fft_hits %( @2 == "NO_STAP_SDT" %? probe process(@1).function("*").label("BENCH_FFT") -{ fft_hits <<< (rdtsc() - $cycles_start) } +{ fft_hits <<< (get_cycles() - $cycles_start) } %: probe process(@1).mark("fft") -{ fft_hits <<< (rdtsc() - $arg1) } +{ fft_hits <<< (get_cycles() - $arg1) } %) -- 2.43.5