Go to the first, previous, next, last section; table of contents; home; full screen; PSIM.

What performance analysis measurements can PSIM perform?

Below is the output from a recent analysis run (contributed by Michael Meissner).

For the following program:


long
simple_rand ()
{
  static unsigned long seed = 47114711;
  unsigned long this = seed * 1103515245 + 12345;
  seed = this;
  return this >> 8;
}

unsigned long int
random_bitstring ()
{
  unsigned long int x;
  int ran, n_bits;
  int tot_bits = 0;
  
  x = 0;
  for (;;)
    {
      ran = simple_rand ();
      n_bits = (ran >> 1) % 16;
      tot_bits += n_bits;
      
      if (n_bits == 0)
	return x;
      else
	{
	  x <<= n_bits;
	  if (ran & 1)
	    x |= (1 << n_bits) - 1;
	  
	  if (tot_bits > 8 * sizeof (long) + 6)
	    return x;
	}
    }
}

#define ABS(x) ((x) >= 0 ? (x) : -(x))

main ()
{
  int i;
  
  for (i = 0; i < 50000; i++)
    {
      unsigned long x, y;
      x = random_bitstring ();
      y = random_bitstring ();
      
      if (sizeof (int) == sizeof (long))
	goto save_time;
      
      { unsigned long xx = x, yy = y, r1, r2;
	if (yy == 0) continue;
	r1 = xx / yy;
	r2 = xx % yy;
	if (r2 >= yy || r1 * yy + r2 != xx)
	  abort ();
      }
      { signed long xx = x, yy = y, r1, r2;
	if ((unsigned long) xx << 1 == 0 && yy == -1)
	  continue;
	r1 = xx / yy;
	r2 = xx % yy;
	if (ABS (r2) >= (unsigned long) ABS (yy) || (signed long) (r1 * yy + r2) != xx)
	  abort ();
      }
    save_time:
      { unsigned int xx = x, yy = y, r1, r2;
	if (yy == 0) continue;
	r1 = xx / yy;
	r2 = xx % yy;
	if (r2 >= yy || r1 * yy + r2 != xx)
	  abort ();
      }
      { signed int xx = x, yy = y, r1, r2;
	if ((unsigned int) xx << 1 == 0 && yy == -1)
	  continue;
	r1 = xx / yy;
	r2 = xx % yy;
	if (ABS (r2) >= (unsigned int) ABS (yy) || (signed int) (r1 * yy + r2) != xx)
	  abort ();
      }
      { unsigned short xx = x, yy = y, r1, r2;
	if (yy == 0) continue;
	r1 = xx / yy;
	r2 = xx % yy;
	if (r2 >= yy || r1 * yy + r2 != xx)
	  abort ();
      }
      { signed short xx = x, yy = y, r1, r2;
	r1 = xx / yy;
	r2 = xx % yy;
	if (ABS (r2) >= (unsigned short) ABS (yy) || (signed short) (r1 * yy + r2) != xx)
	  abort ();
      }
      { unsigned char xx = x, yy = y, r1, r2;
	if (yy == 0) continue;
	r1 = xx / yy;
	r2 = xx % yy;
	if (r2 >= yy || r1 * yy + r2 != xx)
	  abort ();
      }
      { signed char xx = x, yy = y, r1, r2;
	r1 = xx / yy;
	r2 = xx % yy;
	if (ABS (r2) >= (unsigned char) ABS (yy) || (signed char) (r1 * yy + r2) != xx)
	  abort ();
      }
    }
  
  exit (0);
}

Here is the current output generated with the -I switch on a 90 Mhz pentium (the compiler used is the devlopment version of GCC with a new scheduler replacing the old one):


CPU #1 executed     41,994 AND instructions.
CPU #1 executed    519,785 AND Immediate instructions.
CPU #1 executed    680,058 Add instructions.
CPU #1 executed     41,994 Add Extended instructions.
CPU #1 executed    921,916 Add Immediate instructions.
CPU #1 executed    221,199 Add Immediate Carrying instructions.
CPU #1 executed    943,823 Add Immediate Shifted instructions.
CPU #1 executed    471,909 Add to Zero Extended instructions.
CPU #1 executed    571,915 Branch instructions.
CPU #1 executed  1,992,403 Branch Conditional instructions.
CPU #1 executed    571,910 Branch Conditional to Link Register instructions.
CPU #1 executed    320,431 Compare instructions.
CPU #1 executed    471,911 Compare Immediate instructions.
CPU #1 executed    145,867 Compare Logical instructions.
CPU #1 executed    442,414 Compare Logical Immediate instructions.
CPU #1 executed          1 Condition Register XOR instruction.
CPU #1 executed    103,873 Divide Word instructions.
CPU #1 executed    104,275 Divide Word Unsigned instructions.
CPU #1 executed    132,510 Extend Sign Byte instructions.
CPU #1 executed    178,895 Extend Sign Half Word instructions.
CPU #1 executed    871,920 Load Word and Zero instructions.
CPU #1 executed     41,994 Move From Condition Register instructions.
CPU #1 executed    100,005 Move from Special Purpose Register instructions.
CPU #1 executed    100,002 Move to Special Purpose Register instructions.
CPU #1 executed    804,619 Multiply Low Word instructions.
CPU #1 executed    421,201 OR instructions.
CPU #1 executed    471,910 OR Immediate instructions.
CPU #1 executed  1,292,020 Rotate Left Word Immediate then AND with Mask instructions.
CPU #1 executed    663,613 Shift Left Word instructions.
CPU #1 executed  1,151,564 Shift Right Algebraic Word Immediate instructions.
CPU #1 executed    871,922 Store Word instructions.
CPU #1 executed    100,004 Store Word with Update instructions.
CPU #1 executed    887,804 Subtract From instructions.
CPU #1 executed     83,988 Subtract From Immediate Carrying instructions.
CPU #1 executed          1 System Call instruction.
CPU #1 executed    207,746 XOR instructions.

CPU #1 executed 23,740,856 cycles.
CPU #1 executed 10,242,780 stalls waiting for data.
CPU #1 executed          1 stall waiting for a function unit.
CPU #1 executed          1 stall waiting for serialization.
CPU #1 executed  1,757,900 times a writeback slot was unavilable.
CPU #1 executed  1,088,135 branches.
CPU #1 executed  2,048,093 conditional branches fell through.
CPU #1 executed  1,088,135 successful branch predictions.
CPU #1 executed    904,268 unsuccessful branch predictions.
CPU #1 executed    742,557 branch if the condition is FALSE conditional branches.
CPU #1 executed  1,249,846 branch if the condition is TRUE conditional branches.
CPU #1 executed    571,910 branch always conditional branches.
CPU #1 executed  9,493,653 1st single cycle integer functional unit instructions.
CPU #1 executed  1,220,900 2nd single cycle integer functional unit instructions.
CPU #1 executed  1,254,768 multiple cycle integer functional unit instructions.
CPU #1 executed  1,843,846 load/store functional unit instructions.
CPU #1 executed  3,136,229 branch functional unit instructions.
CPU #1 executed 16,949,396 instructions that were accounted for in timing info.
CPU #1 executed    871,920 data reads.
CPU #1 executed    971,926 data writes.
CPU #1 executed        221 icache misses.
CPU #1 executed 16,949,396 instructions in total.

Simulator speed was 250,731 instructions/second


Go to the first, previous, next, last section; table of contents; home; full screen.