int benchmarkStrcmp(const command_data_t &cmd_data) { int size = cmd_data.args[0]; int iters = cmd_data.args[1]; // Allocate a large chunk of memory to hold both strings. uint8_t *memory = (uint8_t*)malloc(2*size + 2048); if (!memory) return -1; char *string1 = reinterpret_cast<char*>(getAlignedMemory(memory, cmd_data.src_align, cmd_data.src_or_mask)); char *string2 = reinterpret_cast<char*>(getAlignedMemory((uint8_t*)string1+size, cmd_data.dst_align, cmd_data.dst_or_mask)); for (int i = 0; i < size - 1; i++) { string1[i] = (char)(32 + (i % 96)); string2[i] = string1[i]; } string1[size-1] = '\0'; string2[size-1] = '\0'; uint64_t time_ns; double avg_kb, running_avg_kb = 0.0, square_avg_kb = 0.0; double max_kb = 0.0, min_kb = 0.0; int j; bool print_average = cmd_data.print_average; bool print_each_iter = cmd_data.print_each_iter; int copies = cmd_data.data_size / size; int retval = 0; for (int i = 0; iters == -1 || i < iters; i++) { time_ns = nanoTime(); for (j = 0; j < copies; j++) { retval = strcmp(string1, string2); if (retval != 0) { printf("strcmp failed, return value %d\n", retval); } } time_ns = nanoTime() - time_ns; // Compute in kb to avoid any overflows. COMPUTE_AVERAGE_KB(avg_kb, copies * size, time_ns); if (print_average) { COMPUTE_RUNNING(avg_kb, running_avg_kb, square_avg_kb, i); COMPUTE_MIN_MAX(avg_kb, min_kb, max_kb); } if (print_each_iter) { printf("strcmp %dx%d bytes took %.06f seconds (%f MB/s)\n", copies, size, (double)time_ns / NS_PER_SEC, avg_kb / 1024.0); } } if (print_average) { printf(" strcmp %dx%d bytes average %.2f MB/s std dev %.4f min %.2f MB/s max %.2f MB/s\n", copies, size, running_avg_kb/1024.0, GET_STD_DEV(running_avg_kb, square_avg_kb) / 1024.0, min_kb / 1024.0, max_kb / 1024.0); } return 0; }
inline static void *Execute(void* Arg) { CCStackThreadState *th_state; long i, rnum; volatile int j; long id = (long) Arg; fastRandomSetSeed(id + 1); th_state = getAlignedMemory(CACHE_LINE_SIZE, sizeof(CCStackThreadState)); CCStackThreadStateInit(&object_struct, th_state, (int)id); BarrierWait(&bar); if (id == 0) d1 = getTimeMillis(); for (i = 0; i < RUNS; i++) { // perform a push operation CCStackPush(&object_struct, th_state, id, id); rnum = fastRandomRange(1, MAX_WORK); for (j = 0; j < rnum; j++) ; // perform a pop operation CCStackPop(&object_struct, th_state, id); rnum = fastRandomRange(1, MAX_WORK); for (j = 0; j < rnum; j++) ; } return NULL; }
uint8_t *getColdBuffer(int num_buffers, size_t incr, int alignment, int or_mask) { uint8_t *buffers = reinterpret_cast<uint8_t*>(malloc(num_buffers * incr + 3 * alignment)); if (!buffers) { return NULL; } return getAlignedMemory(buffers, alignment, or_mask); }
/** * It initialize the circular buffer. * * \return If successful \p true is returned, otherwise \p false is * returned. */ bool init() { assert(buf); buf=(void**)getAlignedMemory(longxCacheLine*sizeof(long),size*sizeof(void*)); if (!buf) return false; reset(); return true; }
int main(int argc, char *argv[]) { if (argc != 2) { fprintf(stderr, "ERROR: Please set an upper bound for the backoff!\n"); exit(EXIT_SUCCESS); } else { sscanf(argv[1], "%d", &MAX_BACK); } sim_struct = getAlignedMemory(CACHE_LINE_SIZE, sizeof(SimStruct)); SimInit(sim_struct, MAX_BACK); BarrierInit(&bar, N_THREADS); StartThreadsN(N_THREADS, Execute, _USE_UTHREADS_); JoinThreadsN(N_THREADS); d2 = getTimeMillis(); printf("time: %d (ms)\tthroughput: %.2f (millions ops/sec)\t", (int) (d2 - d1), RUNS*N_THREADS/(1000.0*(d2 - d1))); printStats(N_THREADS); #ifdef DEBUG SimObjectState *l = (SimObjectState *)&sim_struct->pool[((pointer_t*)&sim_struct->sp)->struct_data.index]; fprintf(stderr, "Object state long value: %d\n", l->counter); fprintf(stderr, "object counter: %d\n", l->counter); fprintf(stderr, "rounds: %d\n", l->rounds); fprintf(stderr, "Average helping: %f\n", (float)l->counter/l->rounds); fprintf(stderr, "\n"); #endif return 0; }
inline static void *Execute(void* Arg) { MSQueueThreadState *th_state; long i; long id = (long) Arg; long rnum; volatile long j; th_state = getAlignedMemory(CACHE_LINE_SIZE, sizeof(MSQueueThreadState)); MSQueueThreadStateInit(th_state, MIN_BAK, MAX_BAK); fastRandomSetSeed(id + 1); BarrierWait(&bar); if (id == 0) d1 = getTimeMillis(); for (i = 0; i < RUNS; i++) { MSQueueEnqueue(&queue, th_state, id); rnum = fastRandomRange(1, MAX_WORK); for (j = 0; j < rnum; j++) ; MSQueueDequeue(&queue, th_state); rnum = fastRandomRange(1, MAX_WORK); for (j = 0; j < rnum; j++) ; } return NULL; }
/** * It initialise the buffer. Allocate space (\p size) of possibly aligned * memory and reset the pointers (read pointer and write pointer) by * placing them at the beginning of the buffer. * * \return TODO */ bool init(const bool startatlineend=false) { if (buf || (size==0)) return false; #if defined(SWSR_MULTIPUSH) if (size<MULTIPUSH_BUFFER_SIZE) return false; #endif // getAlignedMemory is a function defined in 'sysdep.h' buf=(void**)getAlignedMemory(longxCacheLine*sizeof(long),size*sizeof(void*)); if (!buf) return false; reset(startatlineend); return true; }
/** * TODO */ dynqueue(int cachesize=DEFAULT_CACHE_SIZE, bool fillcache=false):cachesize(cachesize) { Node * n = (Node *)::malloc(sizeof(Node)); n->data = NULL; n->next = NULL; head=tail=n; cache=(void**)getAlignedMemory(longxCacheLine*sizeof(long),cachesize*sizeof(void*)); if (!cache) abort(); if (fillcache) { for(int i=0;i<cachesize;++i) { n = (Node *)::malloc(sizeof(Node)); if (n) cachepush(n); } } }
inline static void *Execute(void* Arg) { SimThreadState *th_state; long i, rnum; long id = (long) Arg; volatile long j; th_state = getAlignedMemory(CACHE_LINE_SIZE, sizeof(SimThreadState)); SimThreadStateInit(th_state, id); fastRandomSetSeed((unsigned long)id + 1); BarrierWait(&bar); if (id == 0) d1 = getTimeMillis(); for (i = 0; i < RUNS; i++) { SimApplyOp(sim_struct, th_state, fetchAndMultiply, (Object) (id + 1), id); rnum = fastRandomRange(1, MAX_WORK); for (j = 0; j < rnum; j++) ; } return NULL; }
inline static void *Execute(void* Arg) { OyamaThreadState *th_state; long i, rnum; volatile int j; long id = (long) Arg; fastRandomSetSeed(id + 1); th_state = getAlignedMemory(CACHE_LINE_SIZE, sizeof(OyamaThreadState)); OyamaThreadStateInit(th_state); BarrierWait(&bar); if (id == 0) d1 = getTimeMillis(); for (i = 0; i < RUNS; i++) { // perform a fetchAndMultiply operation OyamaApplyOp((OyamaStruct *)&object_lock, th_state, fetchAndMultiply, (ArgVal) id, id); rnum = fastRandomRange(1, MAX_WORK); for (j = 0; j < rnum; j++) ; } return NULL; }
// Allocate memory with a specific alignment and return that pointer. // This function assumes an alignment value that is a power of 2. // If the alignment is 0, then use the pointer returned by malloc. uint8_t *allocateAlignedMemory(size_t size, int alignment, int or_mask) { uint64_t ptr = reinterpret_cast<uint64_t>(malloc(size + 3 * alignment)); if (!ptr) return NULL; return getAlignedMemory((uint8_t*)ptr, alignment, or_mask); }