void alloc_tex() { int i, ow = tex_w, oh = tex_h; for (tex_w = 1; tex_w < width; tex_w <<= 1); for (tex_h = 1; tex_h < height; tex_h <<= 1); if (tex_h != oh || tex_w != ow) { tex = realloc(tex, tex_h * tex_w * 3 + tex_h * sizeof(rgb_t*)); free_shared(SHARED_tex); SHARED_tex = malloc_shared(MIN(tex_h * tex_w * 3, BLOCK_SIZE), tex + tex_h); } for (tex[0] = (rgb_t *)(tex + tex_h), i = 1; i < tex_h; i++) tex[i] = tex[i - 1] + tex_w; }
int main() { int num_threads = 4; int global[num_threads]; int compare[num_threads]; int increment[num_threads]; pthread_t pool[num_threads]; struct args arg_array[num_threads]; int i, j, max = 10; //printf("Incrementing by %d for %d iterations:\n", increment, max); int *SHARED_MEM_global = malloc_shared(num_threads*sizeof(int), &global, LEGUP_RAM_LOCATION_ONCHIP); for (j = 0; j<num_threads;j++) { global[j] = 5; compare[j] = 5; increment[j] = 2*j; } for (i = 0; i<max; i++) { // pre-accelerator copy memcpy_to_shared(SHARED_MEM_global, &global, num_threads*sizeof(int)); // accelerator call for (j = 0; j < num_threads; j++) { struct args tmp = {&SHARED_MEM_global[j], increment[j]}; memcpy(arg_array+j, &tmp, sizeof(tmp)); pthread_create(pool+j, NULL, ByRef_thread, (void *)(arg_array + j)); } for (j = 0; j < num_threads; j++) { pthread_join(pool[j], NULL); } //ByRef(SHARED_MEM_global, increment); // post-accelerator copy memcpy_from_shared(&global, SHARED_MEM_global, num_threads*sizeof(int)); for (j = 0; j < num_threads; j++) { printf("value from thread %d is %d\n", j, global[j]); } } free_shared(SHARED_MEM_global); return 0; }