/* This version drops the low 10 bits, yielding something like * microseconds. */ inline static unsigned long sample_ppc_10() { unsigned long pclow, pchigh; GETCOUNTER(pclow,pchigh); return ((pclow >> 10) | (pchigh << 22)) & 0x3FFFFFFF; }
/* sm: I want a version that is as fast as possible, dropping * bits that aren't very important to achieve it. * * * This version drops the low 20 bits and the high 14 bits so the * result is 30 bits (always a positive Ocaml int); this yields * megacycles, which for GHz machines will be something like * milliseconds. */ static unsigned long sample_ppc_20(void) { unsigned long pclow, pchigh; GETCOUNTER(pclow, pchigh); return ((pclow >> 20) | (pchigh << 12)) & 0x3FFFFFFF; }
static int walkAtomicVar(NCD4meta* compiler, NCD4node* topvar, NCD4node* var, void** offsetp) { int ret = NC_NOERR; void* offset; d4size_t i; nc_type subsort; d4size_t dimproduct; NCD4node* basetype; basetype = (var->sort == NCD4_TYPE ? var : var->basetype); subsort = basetype->subsort; dimproduct = (var->sort == NCD4_TYPE ? 1 : NCD4_dimproduct(var)); offset = *offsetp; if(subsort == NC_ENUM) subsort = var->basetype->basetype->subsort; /* Only need to swap multi-byte integers and floats */ if(subsort != NC_STRING) { int typesize = NCD4_typesize(subsort); d4size_t totalsize = typesize*dimproduct; if(typesize == 1) { offset = INCR(offset,totalsize); } else { /*(typesize > 1)*/ for(i=0;i<dimproduct;i++) { char* sp = (char*)offset; if(compiler->swap) { switch (typesize) { case 2: swapinline16(sp); break; case 4: swapinline32(sp); break; case 8: swapinline64(sp); break; default: break; } } offset = INCR(offset,typesize); } } } else if(subsort == NC_STRING) { /* remaining case; just convert the counts */ COUNTERTYPE count; for(i=0;i<dimproduct;i++) { /* Get string count */ if(compiler->swap) swapinline64(offset); count = GETCOUNTER(offset); SKIPCOUNTER(offset); /* skip count bytes */ offset = INCR(offset,count); } } *offsetp = offset; return THROW(ret); }
/* This is the function that actually reads the performance counter. */ inline unsigned longlong read_ppc(void) { unsigned long pclow, pchigh; unsigned longlong lowhigh; GETCOUNTER(pclow, pchigh); // printf ("Read low=0x%08lx high=0x%08lx\n", low, high); // Put the 64-bit value together lowhigh = ((unsigned longlong)pclow) | ((unsigned longlong)pchigh << 32); if(first_value == 0) { first_value = lowhigh; } return lowhigh - first_value; }
/* Remember that the base type of var is a vlen. */ static int walkSeq(NCD4meta* compiler, NCD4node* topvar, NCD4node* vlentype, void** offsetp) { int ret = NC_NOERR; int i; void* offset; d4size_t recordcount; NCD4node* basetype; offset = *offsetp; /* process the record count */ recordcount = GETCOUNTER(offset); SKIPCOUNTER(offset); if(compiler->swap) swapinline64(&recordcount); basetype = vlentype->basetype; /* This may be of any type potentially */ assert(basetype->sort == NCD4_TYPE); for(i=0;i<recordcount;i++) { switch(basetype->subsort) { default: /* atomic basetype */ if((ret=walkAtomicVar(compiler,topvar,basetype,&offset))) goto done; break; case NC_OPAQUE: if((ret=walkOpaqueVar(compiler,topvar,basetype,&offset))) goto done; break; case NC_STRUCT: /* We can treat each record like a structure instance */ if((ret=walkStruct(compiler,topvar,basetype,&offset))) goto done; break; case NC_SEQ: if((ret=walkSeq(compiler,topvar,basetype,&offset))) goto done; break; } } *offsetp = offset; done: return THROW(ret); }
static int walkOpaqueVar(NCD4meta* compiler, NCD4node* topvar, NCD4node* var, void** offsetp) { int ret = NC_NOERR; void* offset; d4size_t i; unsigned long long count; d4size_t dimproduct = NCD4_dimproduct(var); dimproduct = (var->sort == NCD4_TYPE ? 1 : NCD4_dimproduct(var)); offset = *offsetp; for(i=0;i<dimproduct;i++) { /* Get and swap opaque count */ if(compiler->swap) swapinline64(offset); count = GETCOUNTER(offset); SKIPCOUNTER(offset); offset = INCR(offset,count); } *offsetp = offset; return THROW(ret); }