static int g1 (int a, int b) { int i; for (i = 0; i <= b; i++) use_pointer (&a); h (a, b); }
static void f (int a, int z) { if (z > 1) g1 (a, z); else g2 (a + 4, z); use_pointer (&a); }
void benchmark_loads(iter_t iterations, void *cookie) { struct mem_state* state = (struct mem_state*)cookie; register char **p = (char**)state->base; register int i; register int count = state->len / (state->line * 100) + 1; while (iterations-- > 0) { for (i = 0; i < count; ++i) { HUNDRED; } } use_pointer((void *)p); }
lng loads(char *array, lng range, lng stride, lng MHz, FILE *fp, int delay) { register char **p = 0; lng i, j=1, tries; lng time, best = 2000000000; fprintf(stderr, "\r%11ld %11ld %11ld", range, stride, range/stride); fflush(stderr); /* for (i = stride; i < range; i += stride) { p = (char **)&array[i]; *p = &array[i - stride]; } p = (char **)&array[0]; *p = &array[i - stride]; */ for (i = stride; i < range; i += stride); i -= stride; for (; i >= 0; i -= stride) { char *next; p = (char **)&array[i]; if (i < stride) { next = &array[range - stride]; } else { next = &array[i - stride]; } *p = next; } #define ONE p = (char **)*p; #define TEN ONE ONE ONE ONE ONE ONE ONE ONE ONE ONE #define HUNDRED TEN TEN TEN TEN TEN TEN TEN TEN TEN TEN /* #define THOUSAND HUNDRED HUNDRED HUNDRED HUNDRED HUNDRED \ HUNDRED HUNDRED HUNDRED HUNDRED HUNDRED */ #define FILL p++; p--; p++; p--; p++; p--; p++; p--; p++; p--; #define ONEx p = (char **)*p; \ FILL FILL FILL FILL FILL FILL FILL FILL FILL FILL /* #define TENx ONEx ONEx ONEx ONEx ONEx ONEx ONEx ONEx ONEx ONEx #define HUNDREDx TENx TENx TENx TENx TENx TENx TENx TENx TENx TENx */ if(delay) for (tries = 0; tries < NUMTRIES; ++tries) { i = (j * NUMLOADS) / REDUCE; time = now(); while (i > 0) { ONEx i -= 1; } time = now() - time; use_pointer((void *)p); if (time <= MINTIME) { j *= 2; tries--; } else { time *= REDUCE; time /= j; if (time < best) { best = time; } } } else for (tries = 0; tries < NUMTRIES; ++tries) { i = (j * NUMLOADS); time = now(); while (i > 0) { HUNDRED i -= 100; } time = now() - time; use_pointer((void *)p); if (time <= MINTIME) { j *= 2; tries--; } else { time /= j; if (time < best) { best = time; } } } fprintf(stderr, " %11ld %11ld", best*j, best); fflush(stderr); if (fp) { fprintf(fp, " %06ld %05.1f %05.1f" ,best /* elapsed time [microseconds] */ ,NSperIt(best) /* nanoseconds per iteration */ ,CYperIt(best) /* clocks per iteration */ ); fflush(fp); } return best; }
void loads(char *addr, size_t range, size_t stride, int cpu) { register char **p = 0 /* lint */; size_t i; int tries = 0; int result = 0x7fffffff; double time; unsigned long mask; unsigned int len = sizeof(mask); if (stride & (sizeof(char *) - 1)) { printf("lat_mem_rd: stride must be aligned.\n"); return; } if (range < stride) { return; } /* * First create a list of pointers. * * This used to go forwards, we want to go backwards to try and defeat * HP's fetch ahead. * * We really need to do a random pattern once we are doing one hit per * page. */ for (i = stride; i < range; i += stride) { *(char **)&addr[i] = (char*)&addr[i - stride]; } *(char **)&addr[0] = (char*)&addr[i - stride]; p = (char**)&addr[0]; /* * Now walk them and time it. */ for (tries = 0; tries < MEMTRIES; ++tries) { /* time loop with loads */ #define ONE p = (char **)*p; #define FIVE ONE ONE ONE ONE ONE #define TEN FIVE FIVE #define FIFTY TEN TEN TEN TEN TEN #define HUNDRED FIFTY FIFTY i = N; start(0); while (i >= 1000) { HUNDRED HUNDRED HUNDRED HUNDRED HUNDRED HUNDRED HUNDRED HUNDRED HUNDRED HUNDRED i -= 1000; } i = stop(0,0); use_pointer((void *)p); if (i < result) { result = i; } } /* * We want to get to nanoseconds / load. We don't want to * lose any precision in the process. What we have is the * milliseconds it took to do N loads, where N is 1 million, * and we expect that each load took between 10 and 2000 * nanoseconds. * * We want just the memory latency time, not including the * time to execute the load instruction. We allow one clock * for the instruction itself. So we need to subtract off * N * clk nanoseconds. * * lmbench 2.0 - do the subtration later, in the summary. * Doing it here was problematic. * * XXX - we do not account for loop overhead here. */ time = (double)result; time *= 1000.; /* convert to nanoseconds */ time /= (double)N; /* nanosecs per load */ fprintf(stderr, "%08d %.5f %.3f\n", cpu, range / (1024. * 1024), time); }