sp_read_ret_t *sp_read_1_svc(sp_read_arg_t * args, struct svc_req * req) { static sp_read_ret_t ret; uint32_t psize; storage_t *st = 0; DEBUG_FUNCTION; START_PROFILING_IO(read, args->nrb * rozofs_psizes[args->tid] * sizeof (bin_t)); xdr_free((xdrproc_t) xdr_sp_read_ret_t, (char *) &ret); ret.status = SP_FAILURE; if ((st = storaged_lookup(args->sid)) == 0) { ret.sp_read_ret_t_u.error = errno; goto out; } psize = rozofs_psizes[args->tid]; ret.sp_read_ret_t_u.bins.bins_len = args->nrb * psize * sizeof (bin_t); ret.sp_read_ret_t_u.bins.bins_val = (char *) xmalloc(args->nrb * psize * sizeof (bin_t)); if (storage_read (st, args->fid, args->tid, args->bid, args->nrb, (bin_t *) ret.sp_read_ret_t_u.bins.bins_val) != 0) { ret.sp_read_ret_t_u.error = errno; goto out; } ret.status = SP_SUCCESS; out: STOP_PROFILING(read); return &ret; }
void mp_subthread_locate(void * pt, rozorpc_srv_ctx_t *req_ctx_p) { mp_locate_arg_t * args = (mp_locate_arg_t*) pt; storage_t * st = 0; static mp_locate_ret_t ret; START_PROFILING(locate); /* ** Use received buffer for the response */ req_ctx_p->xmitBuf = req_ctx_p->recv_buf; req_ctx_p->recv_buf = NULL; if ((st = get_storage(args->cid, args->sid, req_ctx_p->socketRef)) == 0) { goto error; } if (storaged_sub_thread_intf_send_req(MP_LOCATE,req_ctx_p,st,tic)==0) { return; } error: ret.status = MP_FAILURE; ret.mp_locate_ret_t_u.error = errno; rozorpc_srv_forward_reply(req_ctx_p,(char*)&ret); /* ** release the context */ rozorpc_srv_release_context(req_ctx_p); STOP_PROFILING(locate); }
// Shared solver kernels void run_copy_u(Chunk* chunk, Settings* settings) { START_PROFILING(settings->kernel_profile); copy_u( chunk->x, chunk->y, settings->halo_depth, chunk->u0, chunk->u); STOP_PROFILING(settings->kernel_profile, __func__); }
// Entry point for calculating p void ext_cg_calc_p_( const int* chunk, double* p, double* r, double* z, const double* beta) { START_PROFILING; #pragma omp target if(_chunk.is_offload) device(_chunk.device_id) #pragma omp parallel for for(int ii = HALO_PAD; ii < _chunk.z-HALO_PAD; ++ii) { for(int jj = HALO_PAD; jj < _chunk.y-HALO_PAD; ++jj) { for(int kk = HALO_PAD; kk < _chunk.x-HALO_PAD; ++kk) { const int index = ii*_chunk.x*_chunk.y+jj*_chunk.x+kk; p[index] = *beta*p[index] + r[index]; } } } STOP_PROFILING(__func__); }
void mp_remove2_1_svc_nb(void * pt_req, rozorpc_srv_ctx_t *rozorpc_srv_ctx_p, void * pt_resp, uint32_t cnx_id) { mp_status_ret_t * ret = (mp_status_ret_t *) pt_resp; mp_remove2_arg_t * args = (mp_remove2_arg_t*) pt_req; storage_t *st = 0; DEBUG_FUNCTION; START_PROFILING(remove); ret->status = MP_FAILURE; if ((st = get_storage(args->cid, args->sid, cnx_id)) == 0) { ret->mp_status_ret_t_u.error = errno; goto out; } if (storage_rm2_file(st, (unsigned char *) args->fid, args->spare) != 0) { ret->mp_status_ret_t_u.error = errno; goto out; } ret->status = MP_SUCCESS; out: STOP_PROFILING(remove); }
// Initialises the Chebyshev solver. void TeaLeafChunk::ChebyInit( double* alphas, double* betas, const double theta, const bool preconditionerOn) { preconditioner = preconditionerOn; this->alphas = alphas; this->betas = betas; START_PROFILING; #pragma omp parallel for for(int jj = HALO_PAD; jj < yCells-HALO_PAD; ++jj) { for(int kk = HALO_PAD; kk < xCells-HALO_PAD; ++kk) { const int index = jj*xCells+kk; const double smvp = SMVP(u); w[index] = smvp; r[index] = u0[index]-w[index]; p[index] = (preconditioner ? mi[index]*r[index] : r[index])/theta; } } STOP_PROFILING("Cheby Init"); ChebyCalcU(); }
// Entry point for calculating w void ext_cg_calc_w_( const int* chunk, double* p, double* w, double* kx, double* ky, double* kz, double* pw) { START_PROFILING; double pwTemp = 0.0; #pragma omp target if(_chunk.is_offload) device(_chunk.device_id) #pragma omp parallel for reduction(+:pwTemp) for(int ii = HALO_PAD; ii < _chunk.z-HALO_PAD; ++ii) { for(int jj = HALO_PAD; jj < _chunk.y-HALO_PAD; ++jj) { for(int kk = HALO_PAD; kk < _chunk.x-HALO_PAD; ++kk) { const int index = ii*_chunk.x*_chunk.y+jj*_chunk.x+kk; const double smvp = SMVP(p); w[index] = smvp; pwTemp += w[index]*p[index]; } } } *pw = pwTemp; STOP_PROFILING(__func__); }
void calc_scattering_cross_section(void) { START_PROFILING; #pragma acc kernels \ present(scat_cs[:scat_cs_len], gg_cs[:gg_cs_len], mat[:mat_len]) #pragma acc loop collapse(5) independent for(unsigned int g = 0; g < ng; ++g) { for (unsigned int k = 0; k < nz; k++) { for (unsigned int j = 0; j < ny; j++) { for (unsigned int i = 0; i < nx; i++) { for (unsigned int l = 0; l < nmom; l++) { scat_cs(l,i,j,k,g) = gg_cs(mat(i,j,k)-1,l,g,g); } } } } } STOP_PROFILING(__func__); }
// Calculate the inner source void calc_inner_source(void) { START_PROFILING; #pragma acc kernels \ present(source[:source_len], g2g_source[:g2g_source_len], scat_cs[:scat_cs_len], \ scalar_flux[:scalar_flux_len], lma[:lma_len], scalar_mom[:scalar_mom_len]) #pragma acc loop collapse(4) independent for (unsigned int g = 0; g < ng; g++) { for(int k = 0; k < nz; ++k) { for(int j = 0; j < ny; ++j) { for(int i = 0; i < nx; ++i) { source(0,i,j,k,g) = g2g_source(0,i,j,k,g) + scat_cs(0,i,j,k,g) * scalar_flux(g,i,j,k); unsigned int mom = 1; for (unsigned int l = 1; l < nmom; l++) { for (int m = 0; m < lma(l); m++) { source(mom,i,j,k,g) = g2g_source(mom,i,j,k,g) + scat_cs(l,i,j,k,g) * scalar_mom(g,mom-1,i,j,k); mom++; } } } } } } STOP_PROFILING(__func__); }
// Calculates key values from the current field. void TeaLeafChunk::FieldSummary( double* volOut, double* massOut, double* ieOut, double* tempOut) { double vol = 0.0; double mass = 0.0; double ie = 0.0; double temp = 0.0; START_PROFILING; #pragma omp parallel for reduction(+:vol,mass,ie,temp) for(int jj = HALO_PAD; jj < yCells-HALO_PAD; ++jj) { for(int kk = HALO_PAD; kk < xCells-HALO_PAD; ++kk) { int index = jj*xCells+kk; double cellVol = volume[index]; double cellMass = cellVol*density[index]; vol += cellVol; mass += cellMass; ie += cellMass*energy0[index]; temp += cellMass*u[index]; } } STOP_PROFILING("Field Summary"); *volOut = vol; *massOut = mass; *ieOut = ie; *tempOut = temp; }
void run_store_energy(Chunk* chunk, Settings* settings) { START_PROFILING(settings->kernel_profile); store_energy( chunk->ext->raja_lists, chunk->energy0, chunk->energy); STOP_PROFILING(settings->kernel_profile, __func__); }
void run_calculate_2norm( Chunk* chunk, Settings* settings, double* buffer, double* norm) { START_PROFILING(settings->kernel_profile); calculate_2norm( chunk->x, chunk->y, settings->halo_depth, buffer, norm); STOP_PROFILING(settings->kernel_profile, __func__); }
// PPCG solver kernels void run_ppcg_init(Chunk* chunk, Settings* settings) { START_PROFILING(settings->kernel_profile); ppcg_init( chunk->x, chunk->y, settings->halo_depth, chunk->theta, chunk->r, chunk->sd); STOP_PROFILING(settings->kernel_profile, __func__); }
void run_cg_calc_w(Chunk* chunk, Settings* settings, double* pw) { START_PROFILING(settings->kernel_profile); cg_calc_w( chunk->ext->raja_lists, chunk->x, chunk->y, settings->halo_depth, pw, chunk->p, chunk->w, chunk->kx, chunk->ky); STOP_PROFILING(settings->kernel_profile, __func__); }
void run_finalise(Chunk* chunk, Settings* settings) { START_PROFILING(settings->kernel_profile); finalise( chunk->x, chunk->y, settings->halo_depth, chunk->energy, chunk->density, chunk->u); STOP_PROFILING(settings->kernel_profile, __func__); }
void run_calculate_residual(Chunk* chunk, Settings* settings) { START_PROFILING(settings->kernel_profile); calculate_residual( chunk->x, chunk->y, settings->halo_depth, chunk->u, chunk->u0, chunk->r, chunk->kx, chunk->ky); STOP_PROFILING(settings->kernel_profile, __func__); }
void run_cg_calc_p(Chunk* chunk, Settings* settings, double beta) { START_PROFILING(settings->kernel_profile); cg_calc_p(chunk->x, chunk->y, settings->halo_depth, beta, chunk->p, chunk->r); STOP_PROFILING(settings->kernel_profile, __func__); }
void run_pack_or_unpack( Chunk* chunk, Settings* settings, int depth, int face, bool pack, double* field, double* buffer) { START_PROFILING(settings->kernel_profile); pack_or_unpack(chunk->x, chunk->y, depth, settings->halo_depth, face, pack, field, buffer); STOP_PROFILING(settings->kernel_profile, __func__); }
void run_cg_calc_ur( Chunk* chunk, Settings* settings, double alpha, double* rrn) { START_PROFILING(settings->kernel_profile); cg_calc_ur(chunk->x, chunk->y, settings->halo_depth, alpha, rrn, chunk->u, chunk->p, chunk->r, chunk->w); STOP_PROFILING(settings->kernel_profile, __func__); }
void run_ppcg_inner_iteration( Chunk* chunk, Settings* settings, double alpha, double beta) { START_PROFILING(settings->kernel_profile); ppcg_inner_iteration( chunk->ext->raja_lists, chunk->x, chunk->y, settings->halo_depth, alpha, beta, chunk->u, chunk->r, chunk->kx, chunk->ky, chunk->sd); STOP_PROFILING(settings->kernel_profile, __func__); }
void run_jacobi_iterate( Chunk* chunk, Settings* settings, double* error) { START_PROFILING(settings->kernel_profile); jacobi_iterate( chunk->x, chunk->y, settings->halo_depth, error, chunk->kx, chunk->ky, chunk->u0, chunk->u, chunk->r); STOP_PROFILING(settings->kernel_profile, __func__); }
// Chebyshev solver kernels void run_cheby_init(Chunk* chunk, Settings* settings) { START_PROFILING(settings->kernel_profile); cheby_init( chunk->ext->raja_lists, chunk->x, chunk->y, settings->halo_depth, chunk->theta, chunk->u, chunk->u0, chunk->p, chunk->r, chunk->w, chunk->kx, chunk->ky); STOP_PROFILING(settings->kernel_profile, __func__); }
void run_cg_calc_w(Chunk* chunk, Settings* settings, double* pw) { START_PROFILING(settings->kernel_profile); cg_calc_w(chunk->x, chunk->y, settings->halo_depth, pw, chunk->p, chunk->w, chunk->kx, chunk->ky, chunk->ext->a_row_index, chunk->ext->a_col_index, chunk->ext->a_non_zeros); STOP_PROFILING(settings->kernel_profile, __func__); }
void run_cheby_iterate( Chunk* chunk, Settings* settings, double alpha, double beta) { START_PROFILING(settings->kernel_profile); cheby_iterate( chunk->ext->raja_lists, chunk->x, chunk->y, settings->halo_depth, alpha, beta, chunk->u, chunk->u0, chunk->p, chunk->r, chunk->w, chunk->kx, chunk->ky); STOP_PROFILING(settings->kernel_profile, __func__); }
// Jacobi solver kernels void run_jacobi_init( Chunk* chunk, Settings* settings, double rx, double ry) { START_PROFILING(settings->kernel_profile); jacobi_init( chunk->x, chunk->y, settings->halo_depth, settings->coefficient, rx, ry, chunk->density, chunk->energy, chunk->u0, chunk->u, chunk->kx, chunk->ky); STOP_PROFILING(settings->kernel_profile, __func__); }
void run_field_summary( Chunk* chunk, Settings* settings, double* vol, double* mass, double* ie, double* temp) { START_PROFILING(settings->kernel_profile); field_summary( chunk->x, chunk->y, settings->halo_depth, chunk->volume, chunk->density, chunk->energy0, chunk->u, vol, mass, ie, temp); STOP_PROFILING(settings->kernel_profile, __func__); }
// Solver-wide kernels void run_local_halos( Chunk* chunk, Settings* settings, int depth) { START_PROFILING(settings->kernel_profile); local_halos(chunk->x, chunk->y, depth, settings->halo_depth, chunk->neighbours, settings->fields_to_exchange, chunk->density, chunk->energy0, chunk->energy, chunk->u, chunk->p, chunk->sd); STOP_PROFILING(settings->kernel_profile, __func__); }
int mslnk_write_attributes(mslnk_t *mslnk, mattr_t *attrs) { int status; START_PROFILING(mslnk_write_attributes); status = pwrite(mslnk->fdattrs, attrs, sizeof(mattr_t), 0) == sizeof(mattr_t) ? 0 : -1; STOP_PROFILING(mslnk_write_attributes); return status; }
// Chebyshev solver kernels void run_cheby_init(Chunk* chunk, Settings* settings) { START_PROFILING(settings->kernel_profile); cheby_init( chunk->x, chunk->y, settings->halo_depth, chunk->theta, chunk->u, chunk->u0, chunk->p, chunk->r, chunk->w, chunk->kx, chunk->ky, chunk->ext->a_row_index, chunk->ext->a_col_index, chunk->ext->a_non_zeros); STOP_PROFILING(settings->kernel_profile, __func__); }
void run_cheby_iterate( Chunk* chunk, Settings* settings, double alpha, double beta) { START_PROFILING(settings->kernel_profile); cheby_iterate( chunk->x, chunk->y, settings->halo_depth, alpha, beta, chunk->u, chunk->u0, chunk->p, chunk->r, chunk->w, chunk->kx, chunk->ky, chunk->ext->a_row_index, chunk->ext->a_col_index, chunk->ext->a_non_zeros); STOP_PROFILING(settings->kernel_profile, __func__); }