Debian libc6 2.9-4 Formula to calculate nanosecond time in glibc is broken. Goes back in time approximately every second, depending upon the exact clock frequency. Example code to demonstrate broken nanosecond calculation: $ cat monotonic.c #include <assert.h> #include <stdio.h> #include <time.h> int main() { struct timespec start_time, current_time; assert(clock_gettime(CLOCK_MONOTONIC, &start_time)==0); while (1) { assert(clock_gettime(CLOCK_MONOTONIC, ¤t_time)==0); printf("start_time=%ldns; current_time=%ldns; difference=%ldns\n", start_time.tv_nsec, current_time.tv_nsec, current_time.tv_nsec - start_time.tv_nsec); assert(current_time.tv_nsec > start_time.tv_nsec); } } $ gcc -O3 -std=gnu99 -lrt monotonic.c && ./a.out start_time=23613004ns; current_time=23613118ns; difference=114ns start_time=23613004ns; current_time=23729708ns; difference=116704ns start_time=23613004ns; current_time=23771717ns; difference=158713ns ... start_time=23613004ns; current_time=977412844ns; difference=953799840ns start_time=23613004ns; current_time=977415226ns; difference=953802222ns start_time=23613004ns; current_time=977417594ns; difference=953804590ns start_time=23613004ns; current_time=2518370ns; difference=-21094634ns a.out: monotonic.c:13: main: Assertion `current_time.tv_nsec > start_time.tv_nsec' failed. Aborted My TSC clock is constant and synchronized between cores, as confirmed by kernel logs [Intel Core 2 E8400 @ 3.00GHz; dual core]. Replication of bug in clock_gettime using glibc's formula plus correct portable nanosecond formula and verification of portable formula (when __int128_t is defined) follows: $ cat monotonic_tsc.c #include <assert.h> #include <inttypes.h> #include <stdint.h> #include <stdio.h> uint64_t tsc() { uint32_t low, high; __asm__ __volatile__("rdtsc" : "=a" (low), "=d" (high)); uint64_t r=((uint64_t) high<<32) | low; return r; } /* From sysdeps/unix/clock_gettime.c: "And the nanoseconds. This computation should be stable until we get machines with about 16GHz frequency." */ uint64_t glibc_nsec(uint64_t tsc, uint64_t freq) { return ((tsc % freq) * UINT64_C (1000000000)) / freq; } uint64_t adam_nsec(uint64_t tsc, uint64_t freq) { uint64_t quotient=tsc / freq; uint64_t remainder=tsc % freq; return (quotient * UINT64_C(1000000000)) + ((remainder * UINT64_C(1000000000)) / freq); } /* Note: gcc for IA32 doesn't support 128-bit integers */ uint64_t verify_nsec(uint64_t tsc, uint64_t freq) { __uint128_t tsc128=tsc; __uint128_t freq128=freq; return (tsc128 * UINT64_C(1000000000)) / freq128; } int main() { const uint64_t freq=2997000000; uint64_t start_tsc, start_gnsec, current_tsc, current_gnsec, current_ansec, current_vnsec; start_tsc=tsc(); start_gnsec=glibc_nsec(start_tsc, freq); while (1) { current_tsc=tsc(); current_gnsec=glibc_nsec(current_tsc, freq); current_ansec=adam_nsec(current_tsc, freq); current_vnsec=verify_nsec(current_tsc, freq); printf("start_tsc=%"PRIu64" current_tsc=%"PRIu64" diff_tsc=%"PRIi64"\n" "start_gnsec=%"PRIu64" current_gnsec=%"PRIu64" diff_gnsec=%"PRIi64 "\n", start_tsc, current_tsc, current_tsc - start_tsc, start_gnsec, current_gnsec, current_gnsec - start_gnsec); printf("tsc %% freq=%"PRIu64" (tsc %% freq)*UINT64_C(1000000000)=%" PRIu64"\n", current_tsc % freq, (current_tsc % freq) * UINT64_C (1000000000)); printf("current_adam_nsec=%"PRIu64" current_verify_nsec=%"PRIu64"\n\n", current_ansec, current_vnsec); assert(current_ansec==current_vnsec); assert(current_gnsec > start_gnsec); } } $ gcc -O3 -std=gnu99 monotonic_tsc.c && ./a.out start_tsc=31491500607801 current_tsc=31491500607909 diff_tsc=108 start_gnsec=674543810 current_gnsec=674543846 diff_gnsec=36 tsc % freq=2021607909 (tsc % freq)*UINT64_C(1000000000)=2021607909000000000 current_adam_nsec=10507674543846 current_verify_nsec=10507674543846 start_tsc=31491500607801 current_tsc=31491500747670 diff_tsc=139869 start_gnsec=674543810 current_gnsec=674590480 diff_gnsec=46670 tsc % freq=2021747670 (tsc % freq)*UINT64_C(1000000000)=2021747670000000000 current_adam_nsec=10507674590480 current_verify_nsec=10507674590480 ... start_tsc=31491500607801 current_tsc=31492474779024 diff_tsc=974171223 start_gnsec=674543810 current_gnsec=999592600 diff_gnsec=325048790 tsc % freq=2995779024 (tsc % freq)*UINT64_C(1000000000)=2995779024000000000 current_adam_nsec=10507999592600 current_verify_nsec=10507999592600 start_tsc=31491500607801 current_tsc=31492562806188 diff_tsc=1062198387 start_gnsec=674543810 current_gnsec=28964360 diff_gnsec=-645579450 tsc % freq=86806188 (tsc % freq)*UINT64_C(1000000000)=86806188000000000 current_adam_nsec=10508028964360 current_verify_nsec=10508028964360 a.out: monotonic_tsc.c:62: main: Assertion `current_gnsec > start_gnsec' failed. Aborted The assembly generated for adam_nsec() on x86-64 is: $ objdump -d -m i386:x86-64:intel a.out ... 0000000000400560 <adam_nsec>: 400560: 48 89 f8 mov rax,rdi 400563: 31 d2 xor edx,edx 400565: 48 f7 f6 div rsi 400568: 48 89 c1 mov rcx,rax 40056b: 48 69 c2 00 ca 9a 3b imul rax,rdx,0x3b9aca00 400572: 31 d2 xor edx,edx 400574: 48 f7 f6 div rsi 400577: 48 69 d1 00 ca 9a 3b imul rdx,rcx,0x3b9aca00 40057e: 48 01 d0 add rax,rdx 400581: c3 ret ... This is slightly suboptimal due to two divisions and two multiplications. x86-64 has a 64-bit x 64-bit -> 128 bit multiply and a 128-bit dividend / 64-bit divisor -> 64 bit quotient. Thus the whole calculation could be performed with one multiplication and one division.
Is the correct frequency divisor being reliably read? My processor has a constant_tsc, as verified by grepping the flags in /proc/cpuinfo. __get_clockfreq extracts the frequency divisor from the "cpu MHz" entry of /proc/cpuinfo. Intel® 64 and IA-32 Architectures Software Developer’s Manual Volume 3B: System Programming Guide, Part 2: 18.11 TIME-STAMP COUNTER: "For Pentium 4 processors, Intel Xeon processors (family [0FH], models [03H and higher]); for Intel Core Solo and Intel Core Duo processors (family [06H], model [0EH]); for the Intel Xeon processor 5100 series and Intel Core 2 Duo processors (family [06H], model [0FH]); for Intel Core 2 and Intel Xeon processors (family [06H], display_model [17H]); for Intel Atom processors (family [06H], display_model [1CH]): the time-stamp counter increments at a constant rate. That rate may be set by the maximum core-clock to bus-clock ratio of the processor or may be set by the maximum resolved frequency at which the processor is booted. The maximum resolved frequency may differ from the maximum qualified frequency of the processor, see Section 18.20.5 for more detail." 18.20 COUNTING CLOCKS: "For Intel processors that support Intel Dynamic Acceleration or XE operation, the processor core clocks may operate at a frequency that differs from the maximum qualified frequency (as indicated by brand string information reported by CPUID instruction). See Section 18.20.5 for more detail." 18.20.5 Cycle Counting and Opportunistic Processor Operation: "The TSC, IA32_MPERF, and IA32_FIXED_CTR2 operate at the same, maximum resolved frequency of the platform, which is equal to the product of scalable bus frequency and maximum resolved bus ratio." Thus the "cpu MHz" entry for a core will not be equal to the constant frequency that TSC increments when the core is operating at a frequency that differs from the maximum qualified frequency. Most of the time my cores are running at an on-demand frequency of 1998.000 MHz. This only rises to 2997.000 MHz (obtained from /proc/cpuinfo) when the cores are under load. If /proc/cpuinfo is read for a core that is not under load the wrong frequency divisor will be extracted. With Linux kernel 2.6.28 even maxing out one core only gives a 50% chance (on a dual-core processor) of reading the correct "cpu MHz": $ echo "Before:" && cat /proc/cpuinfo | grep 'cpu MHz' && for ((i=0; i<1000000; i+=1)); do true; done && echo "After:" && cat /proc/cpuinfo | grep 'cpu MHz' Before: cpu MHz : 1998.000 cpu MHz : 1998.000 After: cpu MHz : 1998.000 cpu MHz : 2997.000 $ echo "Before:" && cat /proc/cpuinfo | grep 'cpu MHz' && for ((i=0; i<1000000; i+=1)); do true; done && echo "After:" && cat /proc/cpuinfo | grep 'cpu MHz' Before: cpu MHz : 1998.000 cpu MHz : 1998.000 After: cpu MHz : 2997.000 cpu MHz : 1998.000 Workaround: Read all "cpu MHz" entries and return the maximum value found. Workaround will still fail if a cpufreq scaling governor is used that does not permit the processor to reach maximum qualified frequency. Potential workaround: Read maximum qualified frequency from "model name", e.g.: model name : Intel(R) Core(TM)2 Duo CPU E8400 @ 3.00GHz Maximum qualified frequency is 3.00GHz. Compare this to maximum cpu MHz read under load. If they differ significantly use the maximum qualified frequency read from the model name. If they are very similar (within say 50MHz) use the maximum "cpu MHz" entry. This approach would be appropriate for an overclocked processor if constant_tsc runs at the same rate even when the processor is overlocked [otherwise it is inappropriate and the highest "cpu MHz" entry should be used instead of the maximum qualified frequency]. Regards, Adam
Hint: struct timespec also contains tv_sec field.