Below is the output from a recent analysis run (contributed by Michael Meissner).
For the following program:
long simple_rand () { static unsigned long seed = 47114711; unsigned long this = seed * 1103515245 + 12345; seed = this; return this >> 8; } unsigned long int random_bitstring () { unsigned long int x; int ran, n_bits; int tot_bits = 0; x = 0; for (;;) { ran = simple_rand (); n_bits = (ran >> 1) % 16; tot_bits += n_bits; if (n_bits == 0) return x; else { x <<= n_bits; if (ran & 1) x |= (1 << n_bits) - 1; if (tot_bits > 8 * sizeof (long) + 6) return x; } } } #define ABS(x) ((x) >= 0 ? (x) : -(x)) main () { int i; for (i = 0; i < 50000; i++) { unsigned long x, y; x = random_bitstring (); y = random_bitstring (); if (sizeof (int) == sizeof (long)) goto save_time; { unsigned long xx = x, yy = y, r1, r2; if (yy == 0) continue; r1 = xx / yy; r2 = xx % yy; if (r2 >= yy || r1 * yy + r2 != xx) abort (); } { signed long xx = x, yy = y, r1, r2; if ((unsigned long) xx << 1 == 0 && yy == -1) continue; r1 = xx / yy; r2 = xx % yy; if (ABS (r2) >= (unsigned long) ABS (yy) || (signed long) (r1 * yy + r2) != xx) abort (); } save_time: { unsigned int xx = x, yy = y, r1, r2; if (yy == 0) continue; r1 = xx / yy; r2 = xx % yy; if (r2 >= yy || r1 * yy + r2 != xx) abort (); } { signed int xx = x, yy = y, r1, r2; if ((unsigned int) xx << 1 == 0 && yy == -1) continue; r1 = xx / yy; r2 = xx % yy; if (ABS (r2) >= (unsigned int) ABS (yy) || (signed int) (r1 * yy + r2) != xx) abort (); } { unsigned short xx = x, yy = y, r1, r2; if (yy == 0) continue; r1 = xx / yy; r2 = xx % yy; if (r2 >= yy || r1 * yy + r2 != xx) abort (); } { signed short xx = x, yy = y, r1, r2; r1 = xx / yy; r2 = xx % yy; if (ABS (r2) >= (unsigned short) ABS (yy) || (signed short) (r1 * yy + r2) != xx) abort (); } { unsigned char xx = x, yy = y, r1, r2; if (yy == 0) continue; r1 = xx / yy; r2 = xx % yy; if (r2 >= yy || r1 * yy + r2 != xx) abort (); } { signed char xx = x, yy = y, r1, r2; r1 = xx / yy; r2 = xx % yy; if (ABS (r2) >= (unsigned char) ABS (yy) || (signed char) (r1 * yy + r2) != xx) abort (); } } exit (0); }
Here is the current output generated with the -I switch on a 90 Mhz pentium (the compiler used is the devlopment version of GCC with a new scheduler replacing the old one):
CPU #1 executed 41,994 AND instructions. CPU #1 executed 519,785 AND Immediate instructions. CPU #1 executed 680,058 Add instructions. CPU #1 executed 41,994 Add Extended instructions. CPU #1 executed 921,916 Add Immediate instructions. CPU #1 executed 221,199 Add Immediate Carrying instructions. CPU #1 executed 943,823 Add Immediate Shifted instructions. CPU #1 executed 471,909 Add to Zero Extended instructions. CPU #1 executed 571,915 Branch instructions. CPU #1 executed 1,992,403 Branch Conditional instructions. CPU #1 executed 571,910 Branch Conditional to Link Register instructions. CPU #1 executed 320,431 Compare instructions. CPU #1 executed 471,911 Compare Immediate instructions. CPU #1 executed 145,867 Compare Logical instructions. CPU #1 executed 442,414 Compare Logical Immediate instructions. CPU #1 executed 1 Condition Register XOR instruction. CPU #1 executed 103,873 Divide Word instructions. CPU #1 executed 104,275 Divide Word Unsigned instructions. CPU #1 executed 132,510 Extend Sign Byte instructions. CPU #1 executed 178,895 Extend Sign Half Word instructions. CPU #1 executed 871,920 Load Word and Zero instructions. CPU #1 executed 41,994 Move From Condition Register instructions. CPU #1 executed 100,005 Move from Special Purpose Register instructions. CPU #1 executed 100,002 Move to Special Purpose Register instructions. CPU #1 executed 804,619 Multiply Low Word instructions. CPU #1 executed 421,201 OR instructions. CPU #1 executed 471,910 OR Immediate instructions. CPU #1 executed 1,292,020 Rotate Left Word Immediate then AND with Mask instructions. CPU #1 executed 663,613 Shift Left Word instructions. CPU #1 executed 1,151,564 Shift Right Algebraic Word Immediate instructions. CPU #1 executed 871,922 Store Word instructions. CPU #1 executed 100,004 Store Word with Update instructions. CPU #1 executed 887,804 Subtract From instructions. CPU #1 executed 83,988 Subtract From Immediate Carrying instructions. CPU #1 executed 1 System Call instruction. CPU #1 executed 207,746 XOR instructions. CPU #1 executed 23,740,856 cycles. CPU #1 executed 10,242,780 stalls waiting for data. CPU #1 executed 1 stall waiting for a function unit. CPU #1 executed 1 stall waiting for serialization. CPU #1 executed 1,757,900 times a writeback slot was unavilable. CPU #1 executed 1,088,135 branches. CPU #1 executed 2,048,093 conditional branches fell through. CPU #1 executed 1,088,135 successful branch predictions. CPU #1 executed 904,268 unsuccessful branch predictions. CPU #1 executed 742,557 branch if the condition is FALSE conditional branches. CPU #1 executed 1,249,846 branch if the condition is TRUE conditional branches. CPU #1 executed 571,910 branch always conditional branches. CPU #1 executed 9,493,653 1st single cycle integer functional unit instructions. CPU #1 executed 1,220,900 2nd single cycle integer functional unit instructions. CPU #1 executed 1,254,768 multiple cycle integer functional unit instructions. CPU #1 executed 1,843,846 load/store functional unit instructions. CPU #1 executed 3,136,229 branch functional unit instructions. CPU #1 executed 16,949,396 instructions that were accounted for in timing info. CPU #1 executed 871,920 data reads. CPU #1 executed 971,926 data writes. CPU #1 executed 221 icache misses. CPU #1 executed 16,949,396 instructions in total. Simulator speed was 250,731 instructions/second