void compute_step_factor(int nelr, double* variables, double* areas, double* step_factors) { { const unsigned long long parallel_for_start = current_time_ns(); #pragma omp parallel for default(shared) schedule(static) for(int i = 0; i < nelr; i++) { double density = variables[NVAR*i + VAR_DENSITY]; cfd_double3 momentum; momentum.x = variables[NVAR*i + (VAR_MOMENTUM+0)]; momentum.y = variables[NVAR*i + (VAR_MOMENTUM+1)]; momentum.z = variables[NVAR*i + (VAR_MOMENTUM+2)]; double density_energy = variables[NVAR*i + VAR_DENSITY_ENERGY]; cfd_double3 velocity; compute_velocity(density, momentum, velocity); double speed_sqd = compute_speed_sqd(velocity); double pressure = compute_pressure(density, density_energy, speed_sqd); double speed_of_sound = compute_speed_of_sound(density, pressure); // dt = double(0.5) * std::sqrt(areas[i]) / (||v|| + c).... but when we do time stepping, this later would need to be divided by the area, so we just do it all at once step_factors[i] = double(0.5) / (std::sqrt(areas[i]) * (std::sqrt(speed_sqd) + speed_of_sound)); } ; const unsigned long long parallel_for_end = current_time_ns(); printf("pragma155_omp_parallel %llu ns\n", parallel_for_end - parallel_for_start); } }
void compute_flux_contributions(int nelr, double* variables, double* fc_momentum_x, double* fc_momentum_y, double* fc_momentum_z, double* fc_density_energy) { #pragma acc kernels for(int i = 0; i < nelr; i++) { double density_i = variables[NVAR*i + VAR_DENSITY]; double3 momentum_i; momentum_i.x = variables[NVAR*i + (VAR_MOMENTUM+0)]; momentum_i.y = variables[NVAR*i + (VAR_MOMENTUM+1)]; momentum_i.z = variables[NVAR*i + (VAR_MOMENTUM+2)]; double density_energy_i = variables[NVAR*i + VAR_DENSITY_ENERGY]; double3 velocity_i; compute_velocity(density_i, momentum_i, velocity_i); double speed_sqd_i = compute_speed_sqd(velocity_i); double speed_i = sqrtf(speed_sqd_i); double pressure_i = compute_pressure(density_i, density_energy_i, speed_sqd_i); double speed_of_sound_i = compute_speed_of_sound(density_i, pressure_i); double3 fc_i_momentum_x, fc_i_momentum_y, fc_i_momentum_z; double3 fc_i_density_energy; compute_flux_contribution(density_i, momentum_i, density_energy_i, pressure_i, velocity_i, fc_i_momentum_x, fc_i_momentum_y, fc_i_momentum_z, fc_i_density_energy); fc_momentum_x[i*NDIM + 0] = fc_i_momentum_x.x; fc_momentum_x[i*NDIM + 1] = fc_i_momentum_x.y; fc_momentum_x[i*NDIM+ 2] = fc_i_momentum_x.z; fc_momentum_y[i*NDIM+ 0] = fc_i_momentum_y.x; fc_momentum_y[i*NDIM+ 1] = fc_i_momentum_y.y; fc_momentum_y[i*NDIM+ 2] = fc_i_momentum_y.z; fc_momentum_z[i*NDIM+ 0] = fc_i_momentum_z.x; fc_momentum_z[i*NDIM+ 1] = fc_i_momentum_z.y; fc_momentum_z[i*NDIM+ 2] = fc_i_momentum_z.z; fc_density_energy[i*NDIM+ 0] = fc_i_density_energy.x; fc_density_energy[i*NDIM+ 1] = fc_i_density_energy.y; fc_density_energy[i*NDIM+ 2] = fc_i_density_energy.z; } }
void compute_step_factor(int nelr, double* variables, double* areas, double* step_factors) { #pragma acc kernels for(int i = 0; i < nelr; i++) { double density = variables[NVAR*i + VAR_DENSITY]; double3 momentum; momentum.x = variables[NVAR*i + (VAR_MOMENTUM+0)]; momentum.y = variables[NVAR*i + (VAR_MOMENTUM+1)]; momentum.z = variables[NVAR*i + (VAR_MOMENTUM+2)]; double density_energy = variables[NVAR*i + VAR_DENSITY_ENERGY]; double3 velocity; compute_velocity(density, momentum, velocity); double speed_sqd = compute_speed_sqd(velocity); double pressure = compute_pressure(density, density_energy, speed_sqd); double speed_of_sound = compute_speed_of_sound(density, pressure); // dt = double(0.5) * std::sqrt(areas[i]) / (||v|| + c).... but when we do time stepping, this later would need to be divided by the area, so we just do it all at once step_factors[i] = double(0.5) / (std::sqrt(areas[i]) * (std::sqrt(speed_sqd) + speed_of_sound)); } }
void compute_flux(int nelr, int* elements_surrounding_elements, double* normals, double* variables, double* fluxes) { double smoothing_coefficient = double(0.2f); { const unsigned long long parallel_for_start = current_time_ns(); #pragma omp parallel for default(shared) schedule(static) for(int i = 0; i < nelr; i++) { int j, nb; cfd_double3 normal; double normal_len; double factor; double density_i = variables[NVAR*i + VAR_DENSITY]; cfd_double3 momentum_i; momentum_i.x = variables[NVAR*i + (VAR_MOMENTUM+0)]; momentum_i.y = variables[NVAR*i + (VAR_MOMENTUM+1)]; momentum_i.z = variables[NVAR*i + (VAR_MOMENTUM+2)]; double density_energy_i = variables[NVAR*i + VAR_DENSITY_ENERGY]; cfd_double3 velocity_i; compute_velocity(density_i, momentum_i, velocity_i); double speed_sqd_i = compute_speed_sqd(velocity_i); double speed_i = std::sqrt(speed_sqd_i); double pressure_i = compute_pressure(density_i, density_energy_i, speed_sqd_i); double speed_of_sound_i = compute_speed_of_sound(density_i, pressure_i); cfd_double3 flux_contribution_i_momentum_x, flux_contribution_i_momentum_y, flux_contribution_i_momentum_z; cfd_double3 flux_contribution_i_density_energy; compute_flux_contribution(density_i, momentum_i, density_energy_i, pressure_i, velocity_i, flux_contribution_i_momentum_x, flux_contribution_i_momentum_y, flux_contribution_i_momentum_z, flux_contribution_i_density_energy); double flux_i_density = double(0.0); cfd_double3 flux_i_momentum; flux_i_momentum.x = double(0.0); flux_i_momentum.y = double(0.0); flux_i_momentum.z = double(0.0); double flux_i_density_energy = double(0.0); cfd_double3 velocity_nb; double density_nb, density_energy_nb; cfd_double3 momentum_nb; cfd_double3 flux_contribution_nb_momentum_x, flux_contribution_nb_momentum_y, flux_contribution_nb_momentum_z; cfd_double3 flux_contribution_nb_density_energy; double speed_sqd_nb, speed_of_sound_nb, pressure_nb; for(j = 0; j < NNB; j++) { nb = elements_surrounding_elements[i*NNB + j]; normal.x = normals[(i*NNB + j)*NDIM + 0]; normal.y = normals[(i*NNB + j)*NDIM + 1]; normal.z = normals[(i*NNB + j)*NDIM + 2]; normal_len = std::sqrt(normal.x*normal.x + normal.y*normal.y + normal.z*normal.z); if(nb >= 0) // a legitimate neighbor { density_nb = variables[nb*NVAR + VAR_DENSITY]; momentum_nb.x = variables[nb*NVAR + (VAR_MOMENTUM+0)]; momentum_nb.y = variables[nb*NVAR + (VAR_MOMENTUM+1)]; momentum_nb.z = variables[nb*NVAR + (VAR_MOMENTUM+2)]; density_energy_nb = variables[nb*NVAR + VAR_DENSITY_ENERGY]; compute_velocity(density_nb, momentum_nb, velocity_nb); speed_sqd_nb = compute_speed_sqd(velocity_nb); pressure_nb = compute_pressure(density_nb, density_energy_nb, speed_sqd_nb); speed_of_sound_nb = compute_speed_of_sound(density_nb, pressure_nb); compute_flux_contribution(density_nb, momentum_nb, density_energy_nb, pressure_nb, velocity_nb, flux_contribution_nb_momentum_x, flux_contribution_nb_momentum_y, flux_contribution_nb_momentum_z, flux_contribution_nb_density_energy); // artificial viscosity factor = -normal_len*smoothing_coefficient*double(0.5)*(speed_i + std::sqrt(speed_sqd_nb) + speed_of_sound_i + speed_of_sound_nb); flux_i_density += factor*(density_i-density_nb); flux_i_density_energy += factor*(density_energy_i-density_energy_nb); flux_i_momentum.x += factor*(momentum_i.x-momentum_nb.x); flux_i_momentum.y += factor*(momentum_i.y-momentum_nb.y); flux_i_momentum.z += factor*(momentum_i.z-momentum_nb.z); // accumulate cell-centered fluxes factor = double(0.5)*normal.x; flux_i_density += factor*(momentum_nb.x+momentum_i.x); flux_i_density_energy += factor*(flux_contribution_nb_density_energy.x+flux_contribution_i_density_energy.x); flux_i_momentum.x += factor*(flux_contribution_nb_momentum_x.x+flux_contribution_i_momentum_x.x); flux_i_momentum.y += factor*(flux_contribution_nb_momentum_y.x+flux_contribution_i_momentum_y.x); flux_i_momentum.z += factor*(flux_contribution_nb_momentum_z.x+flux_contribution_i_momentum_z.x); factor = double(0.5)*normal.y; flux_i_density += factor*(momentum_nb.y+momentum_i.y); flux_i_density_energy += factor*(flux_contribution_nb_density_energy.y+flux_contribution_i_density_energy.y); flux_i_momentum.x += factor*(flux_contribution_nb_momentum_x.y+flux_contribution_i_momentum_x.y); flux_i_momentum.y += factor*(flux_contribution_nb_momentum_y.y+flux_contribution_i_momentum_y.y); flux_i_momentum.z += factor*(flux_contribution_nb_momentum_z.y+flux_contribution_i_momentum_z.y); factor = double(0.5)*normal.z; flux_i_density += factor*(momentum_nb.z+momentum_i.z); flux_i_density_energy += factor*(flux_contribution_nb_density_energy.z+flux_contribution_i_density_energy.z); flux_i_momentum.x += factor*(flux_contribution_nb_momentum_x.z+flux_contribution_i_momentum_x.z); flux_i_momentum.y += factor*(flux_contribution_nb_momentum_y.z+flux_contribution_i_momentum_y.z); flux_i_momentum.z += factor*(flux_contribution_nb_momentum_z.z+flux_contribution_i_momentum_z.z); } else if(nb == -1) // a wing boundary { flux_i_momentum.x += normal.x*pressure_i; flux_i_momentum.y += normal.y*pressure_i; flux_i_momentum.z += normal.z*pressure_i; } else if(nb == -2) // a far field boundary { factor = double(0.5)*normal.x; flux_i_density += factor*(ff_variable[VAR_MOMENTUM+0]+momentum_i.x); flux_i_density_energy += factor*(ff_flux_contribution_density_energy.x+flux_contribution_i_density_energy.x); flux_i_momentum.x += factor*(ff_flux_contribution_momentum_x.x + flux_contribution_i_momentum_x.x); flux_i_momentum.y += factor*(ff_flux_contribution_momentum_y.x + flux_contribution_i_momentum_y.x); flux_i_momentum.z += factor*(ff_flux_contribution_momentum_z.x + flux_contribution_i_momentum_z.x); factor = double(0.5)*normal.y; flux_i_density += factor*(ff_variable[VAR_MOMENTUM+1]+momentum_i.y); flux_i_density_energy += factor*(ff_flux_contribution_density_energy.y+flux_contribution_i_density_energy.y); flux_i_momentum.x += factor*(ff_flux_contribution_momentum_x.y + flux_contribution_i_momentum_x.y); flux_i_momentum.y += factor*(ff_flux_contribution_momentum_y.y + flux_contribution_i_momentum_y.y); flux_i_momentum.z += factor*(ff_flux_contribution_momentum_z.y + flux_contribution_i_momentum_z.y); factor = double(0.5)*normal.z; flux_i_density += factor*(ff_variable[VAR_MOMENTUM+2]+momentum_i.z); flux_i_density_energy += factor*(ff_flux_contribution_density_energy.z+flux_contribution_i_density_energy.z); flux_i_momentum.x += factor*(ff_flux_contribution_momentum_x.z + flux_contribution_i_momentum_x.z); flux_i_momentum.y += factor*(ff_flux_contribution_momentum_y.z + flux_contribution_i_momentum_y.z); flux_i_momentum.z += factor*(ff_flux_contribution_momentum_z.z + flux_contribution_i_momentum_z.z); } } fluxes[i*NVAR + VAR_DENSITY] = flux_i_density; fluxes[i*NVAR + (VAR_MOMENTUM+0)] = flux_i_momentum.x; fluxes[i*NVAR + (VAR_MOMENTUM+1)] = flux_i_momentum.y; fluxes[i*NVAR + (VAR_MOMENTUM+2)] = flux_i_momentum.z; fluxes[i*NVAR + VAR_DENSITY_ENERGY] = flux_i_density_energy; } ; const unsigned long long parallel_for_end = current_time_ns(); printf("pragma186_omp_parallel %llu ns\n", parallel_for_end - parallel_for_start); } }
void compute_flux(int nelr, int* elements_surrounding_elements, double* normals, double* variables, double* fc_momentum_x, double* fc_momentum_y, double* fc_momentum_z, double* fc_density_energy, double* fluxes) { const double smoothing_coefficient = double(0.2); #pragma acc kernels for(int i = 0; i < nelr; i++) { int j, nb; double3 normal; double normal_len; double factor; double density_i = variables[NVAR*i + VAR_DENSITY]; double3 momentum_i; momentum_i.x = variables[NVAR*i + (VAR_MOMENTUM+0)]; momentum_i.y = variables[NVAR*i + (VAR_MOMENTUM+1)]; momentum_i.z = variables[NVAR*i + (VAR_MOMENTUM+2)]; double density_energy_i = variables[NVAR*i + VAR_DENSITY_ENERGY]; double3 velocity_i; compute_velocity(density_i, momentum_i, velocity_i); double speed_sqd_i = compute_speed_sqd(velocity_i); double speed_i = std::sqrt(speed_sqd_i); double pressure_i = compute_pressure(density_i, density_energy_i, speed_sqd_i); double speed_of_sound_i = compute_speed_of_sound(density_i, pressure_i); double3 fc_i_momentum_x, fc_i_momentum_y, fc_i_momentum_z; double3 fc_i_density_energy; fc_i_momentum_x.x = fc_momentum_x[i*NDIM + 0]; fc_i_momentum_x.y = fc_momentum_x[i*NDIM + 1]; fc_i_momentum_x.z = fc_momentum_x[i*NDIM + 2]; fc_i_momentum_y.x = fc_momentum_y[i*NDIM + 0]; fc_i_momentum_y.y = fc_momentum_y[i*NDIM + 1]; fc_i_momentum_y.z = fc_momentum_y[i*NDIM + 2]; fc_i_momentum_z.x = fc_momentum_z[i*NDIM + 0]; fc_i_momentum_z.y = fc_momentum_z[i*NDIM + 1]; fc_i_momentum_z.z = fc_momentum_z[i*NDIM + 2]; fc_i_density_energy.x = fc_density_energy[i*NDIM + 0]; fc_i_density_energy.y = fc_density_energy[i*NDIM + 1]; fc_i_density_energy.z = fc_density_energy[i*NDIM + 2]; double flux_i_density = double(0.0); double3 flux_i_momentum; flux_i_momentum.x = double(0.0); flux_i_momentum.y = double(0.0); flux_i_momentum.z = double(0.0); double flux_i_density_energy = double(0.0); double3 velocity_nb; double density_nb, density_energy_nb; double3 momentum_nb; double3 fc_nb_momentum_x, fc_nb_momentum_y, fc_nb_momentum_z; double3 fc_nb_density_energy; double speed_sqd_nb, speed_of_sound_nb, pressure_nb; for(j = 0; j < NNB; j++) { nb = elements_surrounding_elements[i*NNB + j]; normal.x = normals[(i*NNB + j)*NDIM + 0]; normal.y = normals[(i*NNB + j)*NDIM + 1]; normal.z = normals[(i*NNB + j)*NDIM + 2]; normal_len = std::sqrt(normal.x*normal.x + normal.y*normal.y + normal.z*normal.z); if(nb >= 0) // a legitimate neighbor { density_nb = variables[nb*NVAR + VAR_DENSITY]; momentum_nb.x = variables[nb*NVAR + (VAR_MOMENTUM+0)]; momentum_nb.y = variables[nb*NVAR + (VAR_MOMENTUM+1)]; momentum_nb.z = variables[nb*NVAR + (VAR_MOMENTUM+2)]; density_energy_nb = variables[nb*NVAR + VAR_DENSITY_ENERGY]; compute_velocity(density_nb, momentum_nb, velocity_nb); speed_sqd_nb = compute_speed_sqd(velocity_nb); pressure_nb = compute_pressure(density_nb, density_energy_nb, speed_sqd_nb); speed_of_sound_nb = compute_speed_of_sound(density_nb, pressure_nb); fc_nb_momentum_x.x = fc_momentum_x[nb*NDIM + 0]; fc_nb_momentum_x.y = fc_momentum_x[nb*NDIM + 1]; fc_nb_momentum_x.z = fc_momentum_x[nb*NDIM + 2]; fc_nb_momentum_y.x = fc_momentum_y[nb*NDIM + 0]; fc_nb_momentum_y.y = fc_momentum_y[nb*NDIM + 1]; fc_nb_momentum_y.z = fc_momentum_y[nb*NDIM + 2]; fc_nb_momentum_z.x = fc_momentum_z[nb*NDIM + 0]; fc_nb_momentum_z.y = fc_momentum_z[nb*NDIM + 1]; fc_nb_momentum_z.z = fc_momentum_z[nb*NDIM + 2]; fc_nb_density_energy.x = fc_density_energy[nb*NDIM + 0]; fc_nb_density_energy.y = fc_density_energy[nb*NDIM + 1]; // artificial viscosity factor = -normal_len*smoothing_coefficient*double(0.5)*(speed_i + std::sqrt(speed_sqd_nb) + speed_of_sound_i + speed_of_sound_nb); flux_i_density += factor*(density_i-density_nb); flux_i_density_energy += factor*(density_energy_i-density_energy_nb); flux_i_momentum.x += factor*(momentum_i.x-momentum_nb.x); flux_i_momentum.y += factor*(momentum_i.y-momentum_nb.y); flux_i_momentum.z += factor*(momentum_i.z-momentum_nb.z); // accumulate cell-centered fluxes factor = double(0.5)*normal.x; flux_i_density += factor*(momentum_nb.x+momentum_i.x); flux_i_density_energy += factor*(fc_nb_density_energy.x+fc_i_density_energy.x); flux_i_momentum.x += factor*(fc_nb_momentum_x.x+fc_i_momentum_x.x); flux_i_momentum.y += factor*(fc_nb_momentum_y.x+fc_i_momentum_y.x); flux_i_momentum.z += factor*(fc_nb_momentum_z.x+fc_i_momentum_z.x); factor = double(0.5)*normal.y; flux_i_density += factor*(momentum_nb.y+momentum_i.y); flux_i_density_energy += factor*(fc_nb_density_energy.y+fc_i_density_energy.y); flux_i_momentum.x += factor*(fc_nb_momentum_x.y+fc_i_momentum_x.y); flux_i_momentum.y += factor*(fc_nb_momentum_y.y+fc_i_momentum_y.y); flux_i_momentum.z += factor*(fc_nb_momentum_z.y+fc_i_momentum_z.y); factor = double(0.5)*normal.z; flux_i_density += factor*(momentum_nb.z+momentum_i.z); flux_i_density_energy += factor*(fc_nb_density_energy.z+fc_i_density_energy.z); flux_i_momentum.x += factor*(fc_nb_momentum_x.z+fc_i_momentum_x.z); flux_i_momentum.y += factor*(fc_nb_momentum_y.z+fc_i_momentum_y.z); flux_i_momentum.z += factor*(fc_nb_momentum_z.z+fc_i_momentum_z.z); } else if(nb == -1) // a wing boundary { flux_i_momentum.x += normal.x*pressure_i; flux_i_momentum.y += normal.y*pressure_i; flux_i_momentum.z += normal.z*pressure_i; } else if(nb == -2) // a far field boundary { factor = double(0.5)*normal.x; flux_i_density += factor*(ff_variable[VAR_MOMENTUM+0]+momentum_i.x); flux_i_density_energy += factor*(ff_fc_density_energy.x+fc_i_density_energy.x); flux_i_momentum.x += factor*(ff_fc_momentum_x.x + fc_i_momentum_x.x); flux_i_momentum.y += factor*(ff_fc_momentum_y.x + fc_i_momentum_y.x); flux_i_momentum.z += factor*(ff_fc_momentum_z.x + fc_i_momentum_z.x); factor = double(0.5)*normal.y; flux_i_density += factor*(ff_variable[VAR_MOMENTUM+1]+momentum_i.y); flux_i_density_energy += factor*(ff_fc_density_energy.y+fc_i_density_energy.y); flux_i_momentum.x += factor*(ff_fc_momentum_x.y + fc_i_momentum_x.y); flux_i_momentum.y += factor*(ff_fc_momentum_y.y + fc_i_momentum_y.y); flux_i_momentum.z += factor*(ff_fc_momentum_z.y + fc_i_momentum_z.y); factor = double(0.5)*normal.z; flux_i_density += factor*(ff_variable[VAR_MOMENTUM+2]+momentum_i.z); flux_i_density_energy += factor*(ff_fc_density_energy.z+fc_i_density_energy.z); flux_i_momentum.x += factor*(ff_fc_momentum_x.z + fc_i_momentum_x.z); flux_i_momentum.y += factor*(ff_fc_momentum_y.z + fc_i_momentum_y.z); flux_i_momentum.z += factor*(ff_fc_momentum_z.z + fc_i_momentum_z.z); } } fluxes[i*NVAR + VAR_DENSITY] = flux_i_density; fluxes[i*NVAR + (VAR_MOMENTUM+0)] = flux_i_momentum.x; fluxes[i*NVAR + (VAR_MOMENTUM+1)] = flux_i_momentum.y; fluxes[i*NVAR + (VAR_MOMENTUM+2)] = flux_i_momentum.z; fluxes[i*NVAR + VAR_DENSITY_ENERGY] = flux_i_density_energy; } }