void revert_kernel_c_(int *xmin,int *xmax,int *ymin,int *ymax, double *density0, double *density1, double *energy0, double *energy1) { int x_min=*xmin; int x_max=*xmax; int y_min=*ymin; int y_max=*ymax; int j,k; { for (k=y_min;k<=y_max;k++) { #pragma ivdep for (j=x_min;j<=x_max;j++) { density1[FTNREF2D(j ,k ,x_max+4,x_min-2,y_min-2)]=density0[FTNREF2D(j ,k ,x_max+4,x_min-2,y_min-2)]; } } for (k=y_min;k<=y_max;k++) { #pragma ivdep for (j=x_min;j<=x_max;j++) { energy1[FTNREF2D(j ,k ,x_max+4,x_min-2,y_min-2)]=energy0[FTNREF2D(j ,k ,x_max+4,x_min-2,y_min-2)]; } } } }
void ideal_gas_kernel_c_(int *xmin,int *xmax,int *ymin,int *ymax, double *density, double *energy, double *pressure, double *soundspeed) { int x_min=*xmin; int x_max=*xmax; int y_min=*ymin; int y_max=*ymax; int j,k; double sound_speed_squared,v,pressurebyenergy,pressurebyvolume; #pragma omp parallel private(j) { #pragma omp for private(v,pressurebyenergy,pressurebyvolume,sound_speed_squared) for (k=y_min; k<=y_max; k++) { #pragma ivdep for (j=x_min; j<=x_max; j++) { v=1.0/density[FTNREF2D(j ,k ,x_max+4,x_min-2,y_min-2)]; pressure[FTNREF2D(j ,k ,x_max+4,x_min-2,y_min-2)]=(1.4-1.0)*density[FTNREF2D(j ,k ,x_max+4,x_min-2,y_min-2)] *energy[FTNREF2D(j ,k ,x_max+4,x_min-2,y_min-2)]; pressurebyenergy=(1.4-1.0)*density[FTNREF2D(j ,k ,x_max+4,x_min-2,y_min-2)]; pressurebyvolume=-density[FTNREF2D(j ,k ,x_max+4,x_min-2,y_min-2)]*pressure[FTNREF2D(j ,k ,x_max+4,x_min-2,y_min-2)]; sound_speed_squared=v*v*(pressure[FTNREF2D(j ,k ,x_max+4,x_min-2,y_min-2)]*pressurebyenergy-pressurebyvolume); soundspeed[FTNREF2D(j ,k ,x_max+4,x_min-2,y_min-2)]=sqrt(sound_speed_squared); } } } }
void unpack_left_right_buffers_c_(int *xmin,int *xmax,int *ymin,int *ymax, int *chnk_lft,int *chnk_rght,int *xtrnl_fc, int *xinc,int *yinc,int *dpth,int *sz, double *field, double *left_rcv_buffer, double *right_rcv_buffer) { int x_min=*xmin; int x_max=*xmax; int y_min=*ymin; int y_max=*ymax; int chunk_left=*chnk_lft; int chunk_right=*chnk_rght; int external_face=*xtrnl_fc; int x_inc=*xinc; int y_inc=*yinc; int depth=*dpth; int j,k,index; { if(chunk_left!=external_face) { for (k=y_min-depth;k<=y_max+y_inc+depth;k++) { #pragma ivdep for (j=1;j<=depth;j++) { index=j+(k+depth-1)*depth; field[FTNREF2D(x_min-j,k,x_max+4+x_inc,x_min-2,y_min-2)]=left_rcv_buffer[FTNREF1D(index,1)]; } } } if(chunk_right!=external_face) { for (k=y_min-depth;k<=y_max+y_inc+depth;k++) { #pragma ivdep for (j=1;j<=depth;j++) { index=j+(k+depth-1)*depth; field[FTNREF2D(x_max+x_inc+j,k,x_max+4+x_inc,x_min-2,y_min-2)]=right_rcv_buffer[FTNREF1D(index,1)]; } } } } }
void reset_field_kernel_c_(int *xmin,int *xmax,int *ymin,int *ymax, double *density0, double *density1, double *energy0, double *energy1, double *xvel0, double *xvel1, double *yvel0, double *yvel1) { int x_min=*xmin; int x_max=*xmax; int y_min=*ymin; int y_max=*ymax; int j,k; #pragma omp parallel { #pragma omp for private(j) for (k=y_min;k<=y_max;k++) { #pragma ivdep for (j=x_min;j<=x_max;j++) { density0[FTNREF2D(j ,k ,x_max+4,x_min-2,y_min-2)]=density1[FTNREF2D(j ,k ,x_max+4,x_min-2,y_min-2)]; } } #pragma omp for private(j) for (k=y_min;k<=y_max;k++) { #pragma ivdep for (j=x_min;j<=x_max;j++) { energy0[FTNREF2D(j ,k ,x_max+4,x_min-2,y_min-2)]=energy1[FTNREF2D(j ,k ,x_max+4,x_min-2,y_min-2)]; } } #pragma omp for private(j) for (k=y_min;k<=y_max+1;k++) { #pragma ivdep for (j=x_min;j<=x_max+1;j++) { xvel0[FTNREF2D(j ,k ,x_max+5,x_min-2,y_min-2)]=xvel1[FTNREF2D(j ,k ,x_max+5,x_min-2,y_min-2)]; } } #pragma omp for private(j) for (k=y_min;k<=y_max+1;k++) { #pragma ivdep for (j=x_min;j<=x_max+1;j++) { yvel0[FTNREF2D(j ,k ,x_max+5,x_min-2,y_min-2)]=yvel1[FTNREF2D(j ,k ,x_max+5,x_min-2,y_min-2)]; } } } }
void unpack_top_bottom_buffers_c_(int *xmin,int *xmax,int *ymin,int *ymax, int *chnk_bttm,int *chnk_tp,int *xtrnl_fc, int *xinc,int *yinc,int *dpth,int *sz, double *field, double *bottom_rcv_buffer, double *top_rcv_buffer) { int x_min=*xmin; int x_max=*xmax; int y_min=*ymin; int y_max=*ymax; int chunk_top=*chnk_tp; int chunk_bottom=*chnk_bttm; int external_face=*xtrnl_fc; int x_inc=*xinc; int y_inc=*yinc; int depth=*dpth; int j,k,index; { if(chunk_bottom!=external_face) { for (k=1;k<=depth;k++) { for (j=x_min-depth;j<=x_max+x_inc+depth;j++) { index=j+depth+(k-1)*(x_max+x_inc+(2*depth)); field[FTNREF2D(j,y_min-k,x_max+4+x_inc,x_min-2,y_min-2)]=bottom_rcv_buffer[FTNREF1D(index,1)]; } } } if(chunk_top!=external_face) { for (k=1;k<=depth;k++) { for (j=x_min-depth;j<=x_max+x_inc+depth;j++) { index=j+depth+(k-1)*(x_max+x_inc+(2*depth)); field[FTNREF2D(j,y_max+y_inc+k,x_max+4+x_inc,x_min-2,y_min-2)]=top_rcv_buffer[FTNREF1D(index,1)]; } } } } }
void accelerate_kernel_c_(int *xmin,int *xmax,int *ymin,int *ymax, double *dbyt, double *xarea, double *yarea, double *volume, double *density0, double *pressure, double *viscosity, double *xvel0, double *yvel0, double *xvel1, double *yvel1) { int x_min=*xmin; int x_max=*xmax; int y_min=*ymin; int y_max=*ymax; double dt=*dbyt; int j,k,err; double nodal_mass; double stepby_mass_s; for (k=y_min;k<=y_max+1;k++) { #pragma ivdep for (j=x_min;j<=x_max+1;j++) { nodal_mass=(density0[FTNREF2D(j-1,k-1,x_max+4,x_min-2,y_min-2)]*volume[FTNREF2D(j-1,k-1,x_max+4,x_min-2,y_min-2)] +density0[FTNREF2D(j ,k-1,x_max+4,x_min-2,y_min-2)]*volume[FTNREF2D(j ,k-1,x_max+4,x_min-2,y_min-2)] +density0[FTNREF2D(j ,k ,x_max+4,x_min-2,y_min-2)]*volume[FTNREF2D(j ,k ,x_max+4,x_min-2,y_min-2)] +density0[FTNREF2D(j-1,k ,x_max+4,x_min-2,y_min-2)]*volume[FTNREF2D(j-1,k ,x_max+4,x_min-2,y_min-2)]) *0.25; stepby_mass_s=0.5*dt/nodal_mass; xvel1[FTNREF2D(j ,k ,x_max+5,x_min-2,y_min-2)]=xvel0[FTNREF2D(j ,k ,x_max+5,x_min-2,y_min-2)] -stepby_mass_s *(xarea[FTNREF2D(j ,k ,x_max+5,x_min-2,y_min-2)] *(pressure[FTNREF2D(j ,k ,x_max+4,x_min-2,y_min-2)]-pressure[FTNREF2D(j-1,k ,x_max+4,x_min-2,y_min-2)]) +xarea[FTNREF2D(j ,k-1,x_max+5,x_min-2,y_min-2)] *(pressure[FTNREF2D(j ,k-1,x_max+4,x_min-2,y_min-2)]-pressure[FTNREF2D(j-1,k-1,x_max+4,x_min-2,y_min-2)])); yvel1[FTNREF2D(j ,k ,x_max+5,x_min-2,y_min-2)]=yvel0[FTNREF2D(j ,k ,x_max+5,x_min-2,y_min-2)] -stepby_mass_s *(yarea[FTNREF2D(j ,k ,x_max+4,x_min-2,y_min-2)] *(pressure[FTNREF2D(j ,k ,x_max+4,x_min-2,y_min-2)]-pressure[FTNREF2D(j ,k-1,x_max+4,x_min-2,y_min-2)]) +yarea[FTNREF2D(j-1,k ,x_max+4,x_min-2,y_min-2)] *(pressure[FTNREF2D(j-1,k ,x_max+4,x_min-2,y_min-2)]-pressure[FTNREF2D(j-1,k-1,x_max+4,x_min-2,y_min-2)])); xvel1[FTNREF2D(j ,k ,x_max+5,x_min-2,y_min-2)]=xvel1[FTNREF2D(j ,k ,x_max+5,x_min-2,y_min-2)] -stepby_mass_s *(xarea[FTNREF2D(j ,k ,x_max+5,x_min-2,y_min-2)] *(viscosity[FTNREF2D(j ,k ,x_max+4,x_min-2,y_min-2)]-viscosity[FTNREF2D(j-1,k ,x_max+4,x_min-2,y_min-2)]) +xarea[FTNREF2D(j ,k-1,x_max+5,x_min-2,y_min-2)] *(viscosity[FTNREF2D(j ,k-1,x_max+4,x_min-2,y_min-2)]-viscosity[FTNREF2D(j-1,k-1,x_max+4,x_min-2,y_min-2)])); yvel1[FTNREF2D(j ,k ,x_max+5,x_min-2,y_min-2)]=yvel1[FTNREF2D(j ,k ,x_max+5,x_min-2,y_min-2)] -stepby_mass_s *(yarea[FTNREF2D(j ,k ,x_max+4,x_min-2,y_min-2)] *(viscosity[FTNREF2D(j ,k ,x_max+4,x_min-2,y_min-2)]-viscosity[FTNREF2D(j ,k-1,x_max+4,x_min-2,y_min-2)]) +yarea[FTNREF2D(j-1,k ,x_max+4,x_min-2,y_min-2)] *(viscosity[FTNREF2D(j-1,k ,x_max+4,x_min-2,y_min-2)]-viscosity[FTNREF2D(j-1,k-1,x_max+4,x_min-2,y_min-2)])); } } }
void generate_chunk_kernel_c_(int *xmin,int *xmax,int *ymin,int *ymax, double *vertexx, double *vertexy, double *cellx, double *celly, double *density0, double *energy0, double *xvel0, double *yvel0, int *nmbr_f_stts, double *state_density, double *state_energy, double *state_xvel, double *state_yvel, double *state_xmin, double *state_xmax, double *state_ymin, double *state_ymax, double *state_radius, int *state_geometry, int *g_rct, int *g_crc, int *g_pnt) { int x_min=*xmin; int x_max=*xmax; int y_min=*ymin; int y_max=*ymax; int number_of_states=*nmbr_f_stts; int g_rect=*g_rct; int g_circ=*g_crc; int g_point=*g_pnt; START_PROFILING; /* State 1 is always the background state */ #pragma omp parallel for for (int k = y_min-2; k <= y_max+2; k++) { #pragma ivdep for (int j = x_min-2; j <= x_max+2; j++) { energy0[FTNREF2D(j ,k ,x_max+4,x_min-2,y_min-2)] = state_energy[FTNREF1D(1,1)]; } } #pragma omp parallel for for (int k=y_min-2;k<=y_max+2;k++) { #pragma ivdep for (int j=x_min-2;j<=x_max+2;j++) { density0[FTNREF2D(j ,k ,x_max+4,x_min-2,y_min-2)] = state_density[FTNREF1D(1,1)]; } } #pragma omp parallel for for (int k = y_min-2; k<= y_max+2; k++) { #pragma ivdep for (int j = x_min-2; j <= x_max+2; j++) { xvel0[FTNREF2D(j ,k ,x_max+5,x_min-2,y_min-2)] = state_xvel[FTNREF1D(1,1)]; } } #pragma omp parallel for for (int k = y_min-2;k <= y_max+2; k++) { #pragma ivdep for (int j = x_min-2; j <= x_max+2; j++) { yvel0[FTNREF2D(j ,k ,x_max+5,x_min-2,y_min-2)] = state_yvel[FTNREF1D(1,1)]; } } for (int state = 2; state <= number_of_states; state++) { /* Could the velocity setting be thread unsafe? */ double x_cent = state_xmin[FTNREF1D(state,1)]; double y_cent = state_ymin[FTNREF1D(state,1)]; #pragma omp parallel for for (int k = y_min-2; k <= y_max+2; k++) { #pragma ivdep for (int j = x_min-2; j <= x_max+2; j++) { if(state_geometry[FTNREF1D(state,1)] == g_rect ) { if(vertexx[FTNREF1D(j+1,x_min-2)]>=state_xmin[FTNREF1D(state,1)] && vertexx[FTNREF1D(j,x_min-2)]<state_xmax[FTNREF1D(state,1)]) { if(vertexy[FTNREF1D(k+1,y_min-2)]>=state_ymin[FTNREF1D(state,1)] && vertexy[FTNREF1D(k,y_min-2)]<state_ymax[FTNREF1D(state,1)]) { density0[FTNREF2D(j ,k ,x_max+4,x_min-2,y_min-2)] = state_density[FTNREF1D(state,1)]; energy0[FTNREF2D(j ,k ,x_max+4,x_min-2,y_min-2)] = state_energy[FTNREF1D(state,1)]; for (int kt = k; kt <= k+1; kt++) { for (int jt = j; jt <= j+1; jt++) { xvel0[FTNREF2D(jt,kt,x_max+5,x_min-2,y_min-2)] = state_xvel[FTNREF1D(state,1)]; yvel0[FTNREF2D(jt,kt,x_max+5,x_min-2,y_min-2)] = state_yvel[FTNREF1D(state,1)]; } } } } } else if(state_geometry[FTNREF1D(state,1)] == g_circ) { double radius = sqrt( (cellx[FTNREF1D(j,x_min-2)]-x_cent) * (cellx[FTNREF1D(j,x_min-2)]-x_cent) + (celly[FTNREF1D(k,y_min-2)]-y_cent) * (celly[FTNREF1D(k,y_min-2)]-y_cent)); if(radius <= state_radius[FTNREF1D(state,1)]) { density0[FTNREF2D(j ,k ,x_max+4,x_min-2,y_min-2)] = state_density[FTNREF1D(state,1)]; energy0[FTNREF2D(j ,k ,x_max+4,x_min-2,y_min-2)] = state_density[FTNREF1D(state,1)]; for (int kt = k; kt <= k+1; kt++) { for (int jt = j; jt <= j+1; jt++) { xvel0[FTNREF2D(jt,kt,x_max+5,x_min-2,y_min-2)] = state_xvel[FTNREF1D(state,1)]; yvel0[FTNREF2D(jt,kt,x_max+5,x_min-2,y_min-2)] = state_yvel[FTNREF1D(state,1)]; } } } } else if(state_geometry[FTNREF1D(state,1)] == g_point) { if(vertexx[FTNREF1D(j,x_min-2)] == x_cent && vertexy[FTNREF1D(j,x_min-2)]==y_cent) { density0[FTNREF2D(j ,k ,x_max+4,x_min-2,y_min-2)] = state_density[FTNREF1D(state,1)]; energy0[FTNREF2D(j ,k ,x_max+4,x_min-2,y_min-2)] = state_density[FTNREF1D(state,1)]; for (int kt = k; kt <= k+1; kt++) { for (int jt = j; jt <= j+1; jt++) { xvel0[FTNREF2D(jt,kt,x_max+5,x_min-2,y_min-2)] = state_xvel[FTNREF1D(state,1)]; yvel0[FTNREF2D(jt,kt,x_max+5,x_min-2,y_min-2)] = state_yvel[FTNREF1D(state,1)]; } } } } } } } STOP_PROFILING(__func__); }
void calc_dt_kernel_c_(int *xmin,int *xmax,int *ymin,int *ymax, double *gsmall,double *gbig,double *mindt, double *dtcsafe, double *dtusafe, double *dtvsafe, double *dtdivsafe, double *xarea, double *yarea, double *cellx, double *celly, double *celldx, double *celldy, double *volume, double *density0, double *energy0, double *pressure, double *viscosity, double *soundspeed, double *xvel0, double *yvel0, double *dt_min, double *dtminval, int *dtlcontrol, double *xlpos, double *ylpos, int *jldt, int *kldt, int *smll) { int x_min=*xmin; int x_max=*xmax; int y_min=*ymin; int y_max=*ymax; double g_small=*gsmall; double g_big=*gbig; double dt_min_val=*dtminval; double dtc_safe=*dtcsafe; double dtu_safe=*dtusafe; double dtv_safe=*dtvsafe; double dtdiv_safe=*dtdivsafe; double min_dt=*mindt; int dtl_control=*dtlcontrol; double xl_pos=*xlpos; double yl_pos=*ylpos; int j_ldt=*jldt; int k_ldt=*kldt; int small=*smll; int j,k; double div,dsx,dsy,dtut,dtvt,dtct,dtdivt,cc,dv1,dv2,jk_control; small=0; dt_min_val = g_big; jk_control=1.1; #pragma omp parallel { #pragma omp for private(dsx,dsy,cc,dv1,dv2,div,dtct,dtut,dtvt,dtdivt,j,k) for (k=y_min;k<=y_max;k++) { #pragma ivdep for (j=x_min;j<=x_max;j++) { dsx=celldx[FTNREF1D(j,x_min-2)]; dsy=celldy[FTNREF1D(k,y_min-2)]; cc=soundspeed[FTNREF2D(j,k,x_max+4,x_min-2,y_min-2)]*soundspeed[FTNREF2D(j,k,x_max+4,x_min-2,y_min-2)]; cc=cc+2.0*viscosity[FTNREF2D(j,k,x_max+4,x_min-2,y_min-2)]/density0[FTNREF2D(j,k,x_max+4,x_min-2,y_min-2)]; cc=MAX(sqrt(cc),g_small); dtct=dtc_safe*MIN(dsx,dsy)/cc; div=0.0; dv1=(xvel0[FTNREF2D(j ,k ,x_max+5,x_min-2,y_min-2)]+xvel0[FTNREF2D(j ,k+1,x_max+5,x_min-2,y_min-2)])*xarea[FTNREF2D(j ,k ,x_max+5,x_min-2,y_min-2)]; dv2=(xvel0[FTNREF2D(j+1,k ,x_max+5,x_min-2,y_min-2)]+xvel0[FTNREF2D(j+1,k+1,x_max+5,x_min-2,y_min-2)])*xarea[FTNREF2D(j+1,k ,x_max+5,x_min-2,y_min-2)]; div=div+dv2-dv1; dtut=dtu_safe*2.0*volume[FTNREF2D(j,k,x_max+4,x_min-2,y_min-2)]/MAX(fabs(dv1),MAX(fabs(dv2),g_small*volume[FTNREF2D(j,k,x_max+4,x_min-2,y_min-2)])); dv1=(yvel0[FTNREF2D(j,k,x_max+5,x_min-2,y_min-2)]+yvel0[FTNREF2D(j+1,k,x_max+5,x_min-2,y_min-2)])*yarea[FTNREF2D(j,k,x_max+4,x_min-2,y_min-2)]; dv2=(yvel0[FTNREF2D(j,k+1,x_max+5,x_min-2,y_min-2)]+yvel0[FTNREF2D(j+1,k+1,x_max+5,x_min-2,y_min-2)])*yarea[FTNREF2D(j,k+1,x_max+4,x_min-2,y_min-2)]; div=div+dv2-dv1; dtvt=dtv_safe*2.0*volume[FTNREF2D(j,k,x_max+4,x_min-2,y_min-2)]/MAX(fabs(dv1),MAX(fabs(dv2),g_small*volume[FTNREF2D(j,k,x_max+4,x_min-2,y_min-2)])); div=div/(2.0*volume[FTNREF2D(j,k,x_max+4,x_min-2,y_min-2)]); if(div < -g_small) { dtdivt=dtdiv_safe*(-1.0/div); } else { dtdivt=g_big; } dt_min[FTNREF2D(j,k,x_max+5,x_min-2,y_min-2)]=MIN(dtct,MIN(dtut,MIN(dtvt,dtdivt))); } } // Commenting out this one pragma - seems to be in Fortran format ('min') //#pragma omp for private(j) reduction(min:dt_min_val) for (k=y_min;k<=y_max;k++) { #pragma ivdep for (j=x_min;j<=x_max;j++) { if(dt_min[FTNREF2D(j,k,x_max+5,x_min-2,y_min-2)] < dt_min_val) dt_min_val=dt_min[FTNREF2D(j,k,x_max+5,x_min-2,y_min-2)]; } } } // Extract the mimimum timestep information dtl_control=10.01*(jk_control-(int)(jk_control)); jk_control=jk_control-(jk_control-(int)(jk_control)); j_ldt=1; //MOD(INT(jk_control),x_max) k_ldt=1; //1+(jk_control/x_max) //xl_pos=cellx[FTNREF1D(j_ldt,xmin-2)]; //yl_pos=celly[FTNREF1D(j_ldt,ymin-2)]; if(dt_min_val < min_dt) small=1; *dtminval=dt_min_val; *dtlcontrol=1; *xlpos=xl_pos; *ylpos=yl_pos; *jldt=j_ldt; *kldt=k_ldt; if(small != 0) { printf("Timestep information:\n"); printf("j, k :%i %i \n",j_ldt,k_ldt); printf("x, y :%f %f \n",xl_pos,yl_pos); printf("timestep : %f\n",dt_min_val); printf("Cell velocities;\n"); printf("%f %f \n",xvel0[FTNREF2D(j_ldt ,k_ldt ,x_max+5,x_min-2,y_min-2)],yvel0[FTNREF2D(j_ldt ,k_ldt ,x_max+5,x_min-2,y_min-2)]); printf("%f %f \n",xvel0[FTNREF2D(j_ldt+1,k_ldt ,x_max+5,x_min-2,y_min-2)],yvel0[FTNREF2D(j_ldt+1,k_ldt ,x_max+5,x_min-2,y_min-2)]); printf("%f %f \n",xvel0[FTNREF2D(j_ldt+1,k_ldt+1,x_max+5,x_min-2,y_min-2)],yvel0[FTNREF2D(j_ldt+1,k_ldt+1,x_max+5,x_min-2,y_min-2)]); printf("%f %f \n",xvel0[FTNREF2D(j_ldt ,k_ldt+1,x_max+5,x_min-2,y_min-2)],yvel0[FTNREF2D(j_ldt ,k_ldt+1,x_max+5,x_min-2,y_min-2)]); printf("density, energy, pressure, soundspeed \n"); printf("%f %f %f %f \n",density0[FTNREF2D(j_ldt,k_ldt,x_max+4,x_min-2,y_min-2)],energy0[FTNREF2D(j_ldt,k_ldt,x_max+4,x_min-2,y_min-2)],pressure[FTNREF2D(j_ldt,k_ldt,x_max+4,x_min-2,y_min-2)],soundspeed[FTNREF2D(j_ldt,k_ldt,x_max+4,x_min-2,y_min-2)]); } }
void initialise_chunk_kernel_c_(int *xmin,int *xmax,int *ymin,int *ymax, double *minx, double *miny, double *dx, double *dy, double *vertexx, double *vertexdx, double *vertexy, double *vertexdy, double *cellx, double *celldx, double *celly, double *celldy, double *volume, double *xarea, double *yarea) { int x_min=*xmin; int x_max=*xmax; int y_min=*ymin; int y_max=*ymax; double min_x=*minx; double min_y=*miny; double d_x=*dx; double d_y=*dy; START_PROFILING; #pragma omp parallel for #pragma ivdep for (int j = x_min-2; j <= x_max+3; j++) { vertexx[FTNREF1D(j,x_min-2)] = min_x+d_x*(double)(j-x_min); } #pragma omp parallel for #pragma ivdep for (int j = x_min-2; j <= x_max+3; j++) { vertexdx[FTNREF1D(j,x_min-2)] = d_x; } #pragma omp parallel for #pragma ivdep for (int k = y_min-2; k <= y_max+3; k++) { vertexy[FTNREF1D(k,y_min-2)] = min_y+d_y*(double)(k-y_min); } #pragma omp parallel for #pragma ivdep for (int k = y_min-2; k <= y_max+3; k++) { vertexdy[FTNREF1D(k,y_min-2)] = d_y; } #pragma omp parallel for #pragma ivdep for (int j = x_min-2; j <= x_max+2; j++) { cellx[FTNREF1D(j,x_min-2)] = 0.5 * (vertexx[FTNREF1D(j,x_min-2)]+vertexx[FTNREF1D(j+1,x_min-2)]); } #pragma omp parallel for #pragma ivdep for (int j = x_min-2; j <= x_max+2; j++) { celldx[FTNREF1D(j,x_min-2)]=d_x; } #pragma omp parallel for #pragma ivdep for (int k = y_min-2; k <= y_max+2; k++) { celly[FTNREF1D(k,y_min-2)] = 0.5 * (vertexy[FTNREF1D(k,y_min-2)]+vertexy[FTNREF1D(k+1,x_min-2)]); } #pragma omp parallel for #pragma ivdep for (int k = y_min-2; k <= y_max+2; k++) { celldy[FTNREF1D(k,y_min-2)] = d_y; } #pragma omp parallel for for (int k = y_min-2; k <= y_max+2; k++) { #pragma ivdep for (int j = x_min-2; j <= x_max+2; j++) { volume[FTNREF2D(j,k,x_max+4,x_min-2,y_min-2)] = d_x*d_y; } } #pragma omp parallel for for (int k = y_min-2; k <= y_max+2; k++) { #pragma ivdep for (int j = x_min-2; j <= x_max+2; j++) { xarea[FTNREF2D(j,k,x_max+5,x_min-2,y_min-2)]=celldy[FTNREF1D(k,y_min-2)]; } } #pragma omp parallel for for (int k = y_min-2; k <= y_max+2; k++) { #pragma ivdep for (int j = x_min-2; j <= x_max+2; j++) { yarea[FTNREF2D(j,k,x_max+4,x_min-2,y_min-2)] = celldx[FTNREF1D(j,x_min-2)]; } } STOP_PROFILING(__func__); }
void advec_cell_kernel_c_(int *g_mic_device,int *xmin,int *xmax,int *ymin,int *ymax, int *dr, int *swp_nmbr, int *vctr, double *vertexdx, double *vertexdy, double *volume, double *density1, double *energy1, double *mass_flux_x, double *vol_flux_x, double *mass_flux_y, double *vol_flux_y, double *pre_vol, double *post_vol, double *pre_mass, double *post_mass, double *advec_vol, double *post_ener, double *ener_flux) { int x_min=*xmin; int x_max=*xmax; int y_min=*ymin; int y_max=*ymax; int sweep_number=*swp_nmbr; int dir=*dr; int vector=*vctr; int j,k,upwind,donor,downwind,dif; int g_xdir=1,g_ydir=2; double sigma,sigmat,sigmav,sigmam,sigma3,sigma4,diffuw,diffdw,limiter; double one_by_six; one_by_six=1.0/6.0; #pragma offload target(mic:*g_mic_device) \ in(density1 :length(0) alloc_if(0) free_if(0)) \ in(energy1 :length(0) alloc_if(0) free_if(0)) \ in(vol_flux_x :length(0) alloc_if(0) free_if(0)) \ in(vol_flux_y :length(0) alloc_if(0) free_if(0)) \ in(volume :length(0) alloc_if(0) free_if(0)) \ in(mass_flux_x :length(0) alloc_if(0) free_if(0)) \ in(mass_flux_y :length(0) alloc_if(0) free_if(0)) \ in(vertexdx :length(0) alloc_if(0) free_if(0)) \ in(vertexdy :length(0) alloc_if(0) free_if(0)) \ in(pre_vol :length(0) alloc_if(0) free_if(0)) \ in(post_vol :length(0) alloc_if(0) free_if(0)) \ in(post_ener :length(0) alloc_if(0) free_if(0)) \ in(pre_mass :length(0) alloc_if(0) free_if(0)) \ in(post_mass :length(0) alloc_if(0) free_if(0)) \ in(advec_vol :length(0) alloc_if(0) free_if(0)) \ in(ener_flux :length(0) alloc_if(0) free_if(0)) #pragma omp parallel { if(dir==g_xdir){ if(sweep_number==1){ #pragma omp for private(j) for (k=y_min-2;k<=y_max+2;k++) { #pragma ivdep for (j=x_min-2;j<=x_max+2;j++) { pre_vol[FTNREF2D(j ,k ,x_max+5,x_min-2,y_min-2)]=volume[FTNREF2D(j ,k ,x_max+4,x_min-2,y_min-2)] +(vol_flux_x[FTNREF2D(j+1,k ,x_max+5,x_min-2,y_min-2)] -vol_flux_x[FTNREF2D(j ,k ,x_max+5,x_min-2,y_min-2)] +vol_flux_y[FTNREF2D(j ,k+1,x_max+4,x_min-2,y_min-2)] -vol_flux_y[FTNREF2D(j ,k ,x_max+4,x_min-2,y_min-2)]); post_vol[FTNREF2D(j ,k ,x_max+5,x_min-2,y_min-2)]=pre_vol[FTNREF2D(j ,k ,x_max+5,x_min-2,y_min-2)] -(vol_flux_x[FTNREF2D(j+1,k ,x_max+5,x_min-2,y_min-2)] -vol_flux_x[FTNREF2D(j ,k ,x_max+5,x_min-2,y_min-2)]); } } } else { #pragma omp for private(j) for (k=y_min-2;k<=y_max+2;k++) { #pragma ivdep for (j=x_min-2;j<=x_max+2;j++) { pre_vol[FTNREF2D(j ,k ,x_max+5,x_min-2,y_min-2)]=volume[FTNREF2D(j ,k ,x_max+4,x_min-2,y_min-2)] +vol_flux_x[FTNREF2D(j+1,k ,x_max+5,x_min-2,y_min-2)] -vol_flux_x[FTNREF2D(j ,k ,x_max+5,x_min-2,y_min-2)]; post_vol[FTNREF2D(j ,k ,x_max+5,x_min-2,y_min-2)]=volume[FTNREF2D(j ,k ,x_max+4,x_min-2,y_min-2)]; } } } #pragma omp for private(upwind,donor,downwind,dif,sigmat,sigma3,sigma4,sigmav,sigma,sigmam,diffuw,diffdw,limiter,j) for (k=y_min;k<=y_max;k++) { for (j=x_min;j<=x_max+2;j++) { if(vol_flux_x[FTNREF2D(j ,k ,x_max+5,x_min-2,y_min-2)]>0.0){ upwind =j-2; donor =j-1; downwind =j; dif =donor; } else { upwind =MIN(j+1,x_max+2); donor =j; downwind =j-1; dif =upwind; } sigmat=fabs(vol_flux_x[FTNREF2D(j,k,x_max+5,x_min-2,y_min-2)]/pre_vol[FTNREF2D(donor,k ,x_max+5,x_min-2,y_min-2)]); sigma3=(1.0+sigmat)*(vertexdx[FTNREF1D(j,x_min-2)]/vertexdx[FTNREF1D(dif,x_min-2)]); sigma4=2.0-sigmat; sigma=sigmat; sigmav=sigmat; diffuw=density1[FTNREF2D(donor,k ,x_max+4,x_min-2,y_min-2)]-density1[FTNREF2D(upwind,k ,x_max+4,x_min-2,y_min-2)]; diffdw=density1[FTNREF2D(downwind,k ,x_max+4,x_min-2,y_min-2)]-density1[FTNREF2D(donor,k ,x_max+4,x_min-2,y_min-2)]; if(diffuw*diffdw>0.0){ limiter=(1.0-sigmav)*SIGN(1.0,diffdw)*MIN(fabs(diffuw),MIN(fabs(diffdw) ,one_by_six*(sigma3*fabs(diffuw)+sigma4*fabs(diffdw)))); } else{ limiter=0.0; } mass_flux_x[FTNREF2D(j,k,x_max+5,x_min-2,y_min-2)]=vol_flux_x[FTNREF2D(j,k,x_max+5,x_min-2,y_min-2)] *(density1[FTNREF2D(donor,k ,x_max+4,x_min-2,y_min-2)]+limiter); sigmam=fabs(mass_flux_x[FTNREF2D(j,k,x_max+5,x_min-2,y_min-2)])/(density1[FTNREF2D(donor,k ,x_max+4,x_min-2,y_min-2)] *pre_vol[FTNREF2D(donor,k ,x_max+5,x_min-2,y_min-2)]); diffuw=energy1[FTNREF2D(donor,k ,x_max+4,x_min-2,y_min-2)]-energy1[FTNREF2D(upwind,k ,x_max+4,x_min-2,y_min-2)]; diffdw=energy1[FTNREF2D(downwind,k ,x_max+4,x_min-2,y_min-2)]-energy1[FTNREF2D(donor,k ,x_max+4,x_min-2,y_min-2)]; if(diffuw*diffdw>0.0){ limiter=(1.0-sigmam)*SIGN(1.0,diffdw)*MIN(fabs(diffuw),MIN(fabs(diffdw) ,one_by_six*(sigma3*fabs(diffuw)+sigma4*fabs(diffdw)))); } else { limiter=0.0; } ener_flux[FTNREF2D(j,k,x_max+5,x_min-2,y_min-2)]=mass_flux_x[FTNREF2D(j,k,x_max+5,x_min-2,y_min-2)] *(energy1[FTNREF2D(donor,k ,x_max+4,x_min-2,y_min-2)]+limiter); } } #pragma omp for private(j) for (k=y_min;k<=y_max;k++) { #pragma ivdep for (j=x_min;j<=x_max;j++) { pre_mass[FTNREF2D(j ,k ,x_max+5,x_min-2,y_min-2)]=density1[FTNREF2D(j ,k ,x_max+4,x_min-2,y_min-2)] *pre_vol[FTNREF2D(j ,k ,x_max+5,x_min-2,y_min-2)]; post_mass[FTNREF2D(j ,k ,x_max+5,x_min-2,y_min-2)]=pre_mass[FTNREF2D(j ,k ,x_max+5,x_min-2,y_min-2)] +mass_flux_x[FTNREF2D(j ,k,x_max+5,x_min-2,y_min-2)] -mass_flux_x[FTNREF2D(j+1,k,x_max+5,x_min-2,y_min-2)]; post_ener[FTNREF2D(j ,k ,x_max+5,x_min-2,y_min-2)]=(energy1[FTNREF2D(j ,k ,x_max+4,x_min-2,y_min-2)] *pre_mass[FTNREF2D(j ,k ,x_max+5,x_min-2,y_min-2)] +ener_flux[FTNREF2D(j ,k,x_max+5,x_min-2,y_min-2)] -ener_flux[FTNREF2D(j+1,k,x_max+5,x_min-2,y_min-2)]) /post_mass[FTNREF2D(j ,k ,x_max+5,x_min-2,y_min-2)]; advec_vol [FTNREF2D(j ,k ,x_max+5,x_min-2,y_min-2)]=pre_vol[FTNREF2D(j ,k ,x_max+5,x_min-2,y_min-2)] +vol_flux_x[FTNREF2D(j ,k,x_max+5,x_min-2,y_min-2)] -vol_flux_x[FTNREF2D(j+1,k,x_max+5,x_min-2,y_min-2)]; density1[FTNREF2D(j ,k ,x_max+4,x_min-2,y_min-2)]=post_mass[FTNREF2D(j ,k ,x_max+5,x_min-2,y_min-2)]/advec_vol[FTNREF2D(j ,k ,x_max+5,x_min-2,y_min-2)]; energy1[FTNREF2D(j ,k ,x_max+4,x_min-2,y_min-2)]=post_ener[FTNREF2D(j ,k ,x_max+5,x_min-2,y_min-2)]; } } } else if(dir==g_ydir){ if(sweep_number==1){ #pragma omp for private(j) for (k=y_min-2;k<=y_max+2;k++) { #pragma ivdep for (j=x_min-2;j<=x_max+2;j++) { pre_vol[FTNREF2D(j ,k ,x_max+5,x_min-2,y_min-2)]=volume[FTNREF2D(j ,k ,x_max+4,x_min-2,y_min-2)] +(vol_flux_y[FTNREF2D(j ,k+1,x_max+4,x_min-2,y_min-2)] -vol_flux_y[FTNREF2D(j ,k ,x_max+4,x_min-2,y_min-2)] +vol_flux_x[FTNREF2D(j+1,k ,x_max+5,x_min-2,y_min-2)] -vol_flux_x[FTNREF2D(j ,k ,x_max+5,x_min-2,y_min-2)]); post_vol[FTNREF2D(j ,k ,x_max+5,x_min-2,y_min-2)]=pre_vol[FTNREF2D(j ,k ,x_max+5,x_min-2,y_min-2)] -(vol_flux_y[FTNREF2D(j ,k+1,x_max+4,x_min-2,y_min-2)] -vol_flux_y[FTNREF2D(j ,k ,x_max+4,x_min-2,y_min-2)]); } } } else { #pragma omp for private(j) for (k=y_min-2;k<=y_max+2;k++) { #pragma ivdep for (j=x_min-2;j<=x_max+2;j++) { pre_vol[FTNREF2D(j ,k ,x_max+5,x_min-2,y_min-2)]=volume[FTNREF2D(j ,k ,x_max+4,x_min-2,y_min-2)] +vol_flux_y[FTNREF2D(j ,k+1,x_max+4,x_min-2,y_min-2)] -vol_flux_y[FTNREF2D(j ,k ,x_max+4,x_min-2,y_min-2)]; post_vol[FTNREF2D(j ,k ,x_max+5,x_min-2,y_min-2)]=volume[FTNREF2D(j ,k ,x_max+4,x_min-2,y_min-2)]; } } } #pragma omp for private(upwind,donor,downwind,dif,sigmat,sigma3,sigma4,sigmav,sigma,sigmam,diffuw,diffdw,limiter,j) for (k=y_min;k<=y_max+2;k++) { for (j=x_min;j<=x_max;j++) { if(vol_flux_y[FTNREF2D(j ,k ,x_max+4,x_min-2,y_min-2)]>0.0){ upwind =k-2; donor =k-1; downwind =k; dif =donor; } else { upwind =MIN(k+1,y_max+2); donor =k; downwind =k-1; dif =upwind; } sigmat=fabs(vol_flux_y[FTNREF2D(j,k,x_max+4,x_min-2,y_min-2)]/pre_vol[FTNREF2D(j ,donor,x_max+5,x_min-2,y_min-2)]); sigma3=(1.0+sigmat)*(vertexdy[FTNREF1D(k,y_min-2)]/vertexdy[FTNREF1D(dif,y_min-2)]); sigma4=2.0-sigmat; sigma=sigmat; sigmav=sigmat; diffuw=density1[FTNREF2D(j ,donor,x_max+4,x_min-2,y_min-2)]-density1[FTNREF2D(j ,upwind,x_max+4,x_min-2,y_min-2)]; diffdw=density1[FTNREF2D(j ,downwind,x_max+4,x_min-2,y_min-2)]-density1[FTNREF2D(j ,donor,x_max+4,x_min-2,y_min-2)]; if(diffuw*diffdw>0.0){ limiter=(1.0-sigmav)*SIGN(1.0,diffdw)*MIN(fabs(diffuw),MIN(fabs(diffdw) ,one_by_six*(sigma3*fabs(diffuw)+sigma4*fabs(diffdw)))); } else{ limiter=0.0; } mass_flux_y[FTNREF2D(j,k,x_max+4,x_min-2,y_min-2)]=vol_flux_y[FTNREF2D(j,k,x_max+4,x_min-2,y_min-2)] *(density1[FTNREF2D(j ,donor,x_max+4,x_min-2,y_min-2)]+limiter); sigmam=fabs(mass_flux_y[FTNREF2D(j,k,x_max+4,x_min-2,y_min-2)])/(density1[FTNREF2D(j ,donor,x_max+4,x_min-2,y_min-2)] *pre_vol[FTNREF2D(j ,donor,x_max+5,x_min-2,y_min-2)]); diffuw=energy1[FTNREF2D(j ,donor,x_max+4,x_min-2,y_min-2)]-energy1[FTNREF2D(j ,upwind,x_max+4,x_min-2,y_min-2)]; diffdw=energy1[FTNREF2D(j ,downwind,x_max+4,x_min-2,y_min-2)]-energy1[FTNREF2D(j ,donor,x_max+4,x_min-2,y_min-2)]; if(diffuw*diffdw>0.0){ limiter=(1.0-sigmam)*SIGN(1.0,diffdw)*MIN(fabs(diffuw),MIN(fabs(diffdw) ,one_by_six*(sigma3*fabs(diffuw)+sigma4*fabs(diffdw)))); } else { limiter=0.0; } ener_flux[FTNREF2D(j,k,x_max+5,x_min-2,y_min-2)]=mass_flux_y[FTNREF2D(j,k,x_max+4,x_min-2,y_min-2)] *(energy1[FTNREF2D(j ,donor,x_max+4,x_min-2,y_min-2)]+limiter); } } #pragma omp for private(j) for (k=y_min;k<=y_max;k++) { #pragma ivdep for (j=x_min;j<=x_max;j++) { pre_mass[FTNREF2D(j ,k ,x_max+5,x_min-2,y_min-2)]=density1[FTNREF2D(j ,k ,x_max+4,x_min-2,y_min-2)] *pre_vol[FTNREF2D(j ,k ,x_max+5,x_min-2,y_min-2)]; post_mass[FTNREF2D(j ,k ,x_max+5,x_min-2,y_min-2)]=pre_mass[FTNREF2D(j ,k ,x_max+5,x_min-2,y_min-2)] +mass_flux_y[FTNREF2D(j,k ,x_max+4,x_min-2,y_min-2)] -mass_flux_y[FTNREF2D(j,k+1,x_max+4,x_min-2,y_min-2)]; post_ener[FTNREF2D(j ,k ,x_max+5,x_min-2,y_min-2)]=(energy1[FTNREF2D(j ,k ,x_max+4,x_min-2,y_min-2)] *pre_mass[FTNREF2D(j ,k ,x_max+5,x_min-2,y_min-2)] +ener_flux[FTNREF2D(j,k ,x_max+5,x_min-2,y_min-2)] -ener_flux[FTNREF2D(j,k+1,x_max+5,x_min-2,y_min-2)]) /post_mass[FTNREF2D(j ,k ,x_max+5,x_min-2,y_min-2)]; advec_vol [FTNREF2D(j ,k ,x_max+5,x_min-2,y_min-2)]=pre_vol[FTNREF2D(j ,k ,x_max+5,x_min-2,y_min-2)] +vol_flux_y[FTNREF2D(j,k ,x_max+4,x_min-2,y_min-2)] -vol_flux_y[FTNREF2D(j,k+1,x_max+4,x_min-2,y_min-2)]; density1[FTNREF2D(j ,k ,x_max+4,x_min-2,y_min-2)]=post_mass[FTNREF2D(j ,k ,x_max+5,x_min-2,y_min-2)]/advec_vol[FTNREF2D(j ,k ,x_max+5,x_min-2,y_min-2)]; energy1[FTNREF2D(j ,k ,x_max+4,x_min-2,y_min-2)]=post_ener[FTNREF2D(j ,k ,x_max+5,x_min-2,y_min-2)]; } } } } }