void op_par_loop_save_soln(char const *name, op_set set, op_arg arg0, op_arg arg1 ){ int ninds = 0; int nargs = 2; op_arg args[2] = {arg0,arg1}; if (OP_diags>2) { printf(" kernel routine w/o indirection: save_soln \n"); } // initialise timers double cpu_t1, cpu_t2, wall_t1, wall_t2; op_timers_core(&cpu_t1, &wall_t1); // set number of threads #ifdef _OPENMP int nthreads = omp_get_max_threads( ); #else int nthreads = 1; #endif // execute plan #pragma omp parallel for for (int thr=0; thr<nthreads; thr++) { int start = (set->size* thr )/nthreads; int finish = (set->size*(thr+1))/nthreads; op_x86_save_soln( (double *) arg0.data, (double *) arg1.data, start, finish ); } //set dirty bit on direct/indirect datasets with access OP_INC,OP_WRITE, OP_RW for(int i = 0; i<nargs; i++) if(args[i].argtype == OP_ARG_DAT) set_dirtybit(args[i]); //performe any global operations // - NONE // update kernel record op_timers_core(&cpu_t2, &wall_t2); op_timing_realloc(0); OP_kernels[0].name = name; OP_kernels[0].count += 1; OP_kernels[0].time += wall_t2 - wall_t1; OP_kernels[0].transfer += (double)set->size * arg0.size; OP_kernels[0].transfer += (double)set->size * arg1.size; }
void op_par_loop_save_soln(char const *name, op_set set, op_arg arg0, op_arg arg1 ) { if (OP_diags>2) { printf(" kernel routine w/o indirection: save_soln \n"); } // initialise timers double cpu_t1, cpu_t2, wall_t1, wall_t2; op_timers(&cpu_t1, &wall_t1); // set number of threads #ifdef _OPENMP int nthreads = omp_get_max_threads( ); #else int nthreads = 1; #endif // execute plan #pragma omp parallel for for (int thr=0; thr<nthreads; thr++) { int start = (set->size* thr )/nthreads; int finish = (set->size*(thr+1))/nthreads; op_x86_save_soln( (float *) arg0.data, (float *) arg1.data, start, finish ); } // update kernel record op_timers(&cpu_t2, &wall_t2); op_timing_realloc(0); OP_kernels[0].name = name; OP_kernels[0].count += 1; OP_kernels[0].time += wall_t2 - wall_t1; OP_kernels[0].transfer += (float)set->size * arg0.size; OP_kernels[0].transfer += (float)set->size * arg1.size; }
void op_par_loop_save_soln(char const *name, op_set set, op_arg arg0, op_arg arg1 ){ int nargs = 2; op_arg args[2]; args[0] = arg0; args[1] = arg1; if (OP_diags>2) { printf(" kernel routine w/o indirection: save_soln\n"); } op_mpi_halo_exchanges(set, nargs, args); // initialise timers double cpu_t1, cpu_t2, wall_t1=0, wall_t2=0; op_timing_realloc(0); OP_kernels[0].name = name; OP_kernels[0].count += 1; // set number of threads #ifdef _OPENMP int nthreads = omp_get_max_threads( ); #else int nthreads = 1; #endif if (set->size >0) { op_timers_core(&cpu_t1, &wall_t1); // execute plan #pragma omp parallel for for (int thr=0; thr<nthreads; thr++) { int start = (set->size* thr )/nthreads; int finish = (set->size*(thr+1))/nthreads; op_x86_save_soln( (double *) arg0.data, (double *) arg1.data, start, finish ); } } // combine reduction data op_mpi_set_dirtybit(nargs, args); // update kernel record op_timers_core(&cpu_t2, &wall_t2); OP_kernels[0].time += wall_t2 - wall_t1; OP_kernels[0].transfer += (float)set->size * arg0.size; OP_kernels[0].transfer += (float)set->size * arg1.size; }