Back to the top page

Time-stamp counter

Pentium class cpu has an instruction to read the current time-stamp counter variable ,which is a 64-bit variable, into registers (edx:eax). TSC(time stamp counter) is incremented every cpu tick (1/CPU_HZ). For example, at 1GHz cpu, TSC is incremented by 10^9 per second. It allows to measure time activety in an accurate fashion.

PowerPC provides similar capability but PowerPC time counter is increments at either equal the processor core clock rate or as driven by a separate timer clock input. PowerPC time counter is also a 64-bit.


#if defined(__i386__)

static __inline__ unsigned long long rdtsc(void)
{
  unsigned long long int x;
     __asm__ volatile (".byte 0x0f, 0x31" : "=A" (x));
     return x;
}
#elif defined(__x86_64__)


static __inline__ unsigned long long rdtsc(void)
{
  unsigned hi, lo;
  __asm__ __volatile__ ("rdtsc" : "=a"(lo), "=d"(hi));
  return ( (unsigned long long)lo)|( ((unsigned long long)hi)<<32 );
}

#elif defined(__powerpc__)


static __inline__ unsigned long long rdtsc(void)
{
  unsigned long long int result=0;
  unsigned long int upper, lower,tmp;
  __asm__ volatile(
                "0:                  \n"
                "\tmftbu   %0           \n"
                "\tmftb    %1           \n"
                "\tmftbu   %2           \n"
                "\tcmpw    %2,%0        \n"
                "\tbne     0b         \n"
                : "=r"(upper),"=r"(lower),"=r"(tmp)
                );
  result = upper;
  result = result<<32;
  result = result|lower;

  return(result);
}

#endif

Click rdtsc.h to download.

Here is an example of usage:

#include <stdio.h>
#include "rdtsc.h"

int main(int argc, char* argv[])
{
  unsigned long long a,b;

  a = rdtsc();
  b = rdtsc();

  printf("%llu\n", b-a);
  return 0;
}

On my Pentimu-M machine, when this program is compiled with -O1 or more optimization level of gcc, it prints '43' which may be bigger than we expect. Assembler codes corresponding to two rdtsc()'s would be like below.

#APP
        .byte 0x0f, 0x31
#NO_APP
        movl    %eax, %ecx
        movl    %edx, %ebx
#APP
        .byte 0x0f, 0x31
#NO_APP
        subl    %ecx, %eax
        sbbl    %ebx, %edx

The next short benchmark code may show you some performance impacts from memory characteristic such as TLB miss, page fault or page swap in/out.

mem.c

#include <stdio.h>
#include <assert.h>
#include <stdint.h>
#include <stdlib.h>

#include "rdtsc.h"

#define N (1024*1024*2)

int main(int argc, char* argv[])
{
  unsigned long long a,b;
  unsigned long long min,max;
  char* p;
  int i;

  p = (char*)malloc(N);
  assert( p!=(char*)0 );

  max = 0;
  min = UINT64_MAX;

  for(i=0; i<N; i++ ) {
    a = rdtsc();
    p[i] = 0;
    b = rdtsc() - a;
    if( b > max ) max = b;
    else if( b < min ) min = b;
  }
  printf("min=%llu\n", min);
  printf("max=%llu\n", max);
  return 0;
}



Kazutomo Yoshii kazutomo@mcs.anl.gov
Last modified: Thu Feb 2 16:24:27 CST 2006