/* Output procedures **********************/ void inc_O_r(void* cdata, _integer _V) { _put_int("r", _V); }/* Main procedure *************************/ int main(){ /* Context allocation */ int s = 0; struct inc_ctx* ctx = inc_new_ctx(NULL); /* Main loop */ ISATTY = isatty(0); while(1){ if (ISATTY) printf("## STEP %d ##########\n", s+1); else if(s) printf("\n"); fflush(stdout); ++s; inc_I_n(ctx, _get_int("n")); inc_step(ctx); } return 1; }
int main() { int i; float a[SIZE]; float b[SIZE]; float c[SIZE]; float seq[SIZE]; /* float Papi[SIZE][SIZE]; float *onedim; float *twodim; float temp[3]={a[0],b[0],c[0]}; */ #ifdef __NVCUDA__ acc_init( acc_device_nvcuda ); #endif #ifdef __NVOPENCL__ acc_init( acc_device_nvocl ); acc_list_devices_spec( acc_device_nvocl ); #endif // Initialize matrices. for (i = 0; i < SIZE; ++i) { //B a[i] = (float)i ; b[i] = (float)2*i; c[i] = 0.0f; }// B unsigned long long int tic, toc; // Compute vector Add int d[1]={0}; int k; for(k=0; k<3; k++){ //C printf("Calculation on GPU ... "); tic = clock(); #pragma acc data pcopyin(a[0:SIZE],b[0:SIZE]) pcopyout(c[0:SIZE]) pcopy(d[0:1]) { # pragma acc kernels { #pragma acc loop independent { for (i = 0; i < SIZE; ++i) { #pragma acc atomic capture { d[0]+=inc_step(); } c[i] = a[i] + b[i] ; } } } } toc = clock(); printf(" %6.4f ms\n",(toc-tic)/(float)1000); } // **************** // double-check the OpenACC result sequentially on the host // **************** // Perform the add printf("Calculation on CPU ... "); tic = clock(); for (i = 0; i < SIZE; ++i) { seq[i] = a[i] + b[i] ; if(c[i]!= seq[i]) { fprintf(stderr,"Error %d %16.10f!=%16.10f \n", i, c[i], seq[i]); return -1; } } toc = clock(); printf(" %6.4f ms\n",(toc-tic)/(float)1000); printf("atomic sum> %d (should be %d)\n", d[0], 3*SIZE*inc_step()); if(d[0]==3*SIZE*inc_step()){ fprintf(stderr,"OpenACC atomic operation test was successful!\n"); }else{ fprintf(stderr,"OpenACC atomic operation test failed!\n"); } return 0; }
static void measure(int fd, struct flow *fw) { long delay; fw->written_blocks++; fw->total_written += fw->block_size; if (fw->written_blocks < fw->blocks_per_delay) return; assert(!fdatasync(fd)); assert(!gettimeofday(&fw->t2, NULL)); /* Help the kernel to help us. */ assert(!posix_fadvise(fd, 0, 0, POSIX_FADV_DONTNEED)); delay = delay_ms(&fw->t1, &fw->t2); switch (fw->state) { case FW_INC: if (delay > fw->delay_ms) { move_to_search(fw, fw->blocks_per_delay - fw->step / 2, fw->blocks_per_delay); } else if (delay < fw->delay_ms) { inc_step(fw); } else move_to_steady(fw); break; case FW_DEC: if (delay > fw->delay_ms) { dec_step(fw); } else if (delay < fw->delay_ms) { move_to_search(fw, fw->blocks_per_delay, fw->blocks_per_delay + fw->step / 2); } else move_to_steady(fw); break; case FW_SEARCH: if (fw->bpd2 - fw->bpd1 <= 3) { move_to_steady(fw); break; } if (delay > fw->delay_ms) { fw->bpd2 = fw->blocks_per_delay; fw->blocks_per_delay = (fw->bpd1 + fw->bpd2) / 2; } else if (delay < fw->delay_ms) { fw->bpd1 = fw->blocks_per_delay; fw->blocks_per_delay = (fw->bpd1 + fw->bpd2) / 2; } else move_to_steady(fw); break; case FW_STEADY: update_mean(fw); if (delay <= fw->delay_ms) { move_to_inc(fw); } else if (fw->blocks_per_delay > 1) { move_to_dec(fw); } break; default: assert(0); } if (fw->progress) { /* Instantaneous speed. */ double inst_speed = (double)fw->blocks_per_delay * fw->block_size * 1000 / fw->delay_ms; const char *unit = adjust_unit(&inst_speed); double percent; /* The following shouldn't be necessary, but sometimes * the initial free space isn't exactly reported * by the kernel; this issue has been seen on Macs. */ if (fw->total_size < fw->total_written) fw->total_size = fw->total_written; percent = (double)fw->total_written * 100 / fw->total_size; erase(fw->erase); fw->erase = printf("%.2f%% -- %.2f %s/s", percent, inst_speed, unit); assert(fw->erase > 0); if (fw->measurements > 0) fw->erase += pr_time( (fw->total_size - fw->total_written) / get_avg_speed(fw)); fflush(stdout); } start_measurement(fw); }
static inline void move_to_inc(struct flow *fw) { move_to_inc_at_start(fw); inc_step(fw); }