void set_texture() { alloc_tex(); int height_delta = BLOCK_SIZE / tex_w / 3; int i; /* Since the pcie hybrid version reuses the same memory to compute different portions of the image, the SW_ONLY executable should use a non-zero OFFSET*/ #ifdef SW_ONLY #define OFFSET (i*tex_w) #else #define OFFSET (0) #endif for (i = 0; i < height; i += height_delta) { calc_mandel(scale, cx, cy, SHARED_tex+OFFSET, tex_w, max_iter, width, i, MIN(i+height_delta,height), height); memcpy_from_shared(tex[i], SHARED_tex, height_delta * tex_w * 3); } glEnable(GL_TEXTURE_2D); glBindTexture(GL_TEXTURE_2D, texture); glTexImage2D(GL_TEXTURE_2D, 0, 3, tex_w, tex_h, 0, GL_RGB, GL_UNSIGNED_BYTE, tex[0]); glTexParameteri(GL_TEXTURE_2D, GL_TEXTURE_MIN_FILTER, GL_NEAREST); glTexParameteri(GL_TEXTURE_2D, GL_TEXTURE_MAG_FILTER, GL_NEAREST); render(); }
int main() { int num_threads = 4; int global[num_threads]; int compare[num_threads]; int increment[num_threads]; pthread_t pool[num_threads]; struct args arg_array[num_threads]; int i, j, max = 10; //printf("Incrementing by %d for %d iterations:\n", increment, max); int *SHARED_MEM_global = malloc_shared(num_threads*sizeof(int), &global, LEGUP_RAM_LOCATION_ONCHIP); for (j = 0; j<num_threads;j++) { global[j] = 5; compare[j] = 5; increment[j] = 2*j; } for (i = 0; i<max; i++) { // pre-accelerator copy memcpy_to_shared(SHARED_MEM_global, &global, num_threads*sizeof(int)); // accelerator call for (j = 0; j < num_threads; j++) { struct args tmp = {&SHARED_MEM_global[j], increment[j]}; memcpy(arg_array+j, &tmp, sizeof(tmp)); pthread_create(pool+j, NULL, ByRef_thread, (void *)(arg_array + j)); } for (j = 0; j < num_threads; j++) { pthread_join(pool[j], NULL); } //ByRef(SHARED_MEM_global, increment); // post-accelerator copy memcpy_from_shared(&global, SHARED_MEM_global, num_threads*sizeof(int)); for (j = 0; j < num_threads; j++) { printf("value from thread %d is %d\n", j, global[j]); } } free_shared(SHARED_MEM_global); return 0; }