int main(int argc, char *argv[]) { #ifdef SCOREP SCOREP_USER_REGION_DEFINE(handle_initialization); SCOREP_USER_REGION_DEFINE(handle_computation); SCOREP_USER_REGION_DEFINE(handle_finalization); #endif #ifdef SCOREP SCOREP_USER_REGION_BEGIN( handle_initialization, "INITIALIZATION",SCOREP_USER_REGION_TYPE_COMMON ); #endif int my_rank, num_procs, i; const int max_iters = 10000; /// maximum number of iteration to perform /** Simulation parameters parsed from the input datasets */ int nintci, nintcf; /// internal cells start and end index /// external cells start and end index. The external cells are only ghost cells. /// They are accessed only through internal cells int nextci, nextcf; int **lcc; /// link cell-to-cell array - stores neighboring information /// Boundary coefficients for each volume cell (South, East, North, West, High, Low) double *bs, *be, *bn, *bw, *bh, *bl; double *bp; /// Pole coefficient double *su; /// Source values double residual_ratio; /// the ratio between the reference and the current residual double *var; /// the variation vector -> keeps the result in the end /** Additional vectors required for the computation */ double *cgup, *oc, *cnorm; /** Geometry data */ int points_count; /// total number of points that define the geometry int** points; /// coordinates of the points that define the cells - size [points_cnt][3] int* elems; /// definition of the cells using their nodes (points) - each cell has 8 points /** Mapping between local and remote cell indices */ int* local_global_index; /// local to global index mapping int* global_local_index; /// global to local index mapping /** Lists for neighbouring information */ int nghb_cnt = 0; /// total number of neighbors of the current process int *nghb_to_rank; /// mapping of the neighbour index to the corresponding process rank int *send_cnt; /// number of cells to be sent to each neighbour (size: nghb_cnt) int **send_lst; /// lists of cells to be sent to each neighbour (size: nghb_cnt x send_cnt[*]) int *recv_cnt; /// number of cells to be received from each neighbour (size: nghb_cnt) int **recv_lst; /// lists of cells to be received from each neighbour (size: nghb_cnt x recv_cnt[*]) /* PAPI Parameters*/ float rtime, ptime, mflops; long long flpops; void handle_error (int retval) { printf("PAPI error %d: %s\n", retval, PAPI_strerror(retval)); exit(1); }
void seissol::checkpoint::sionlib::Wavefield::write(const void* header, size_t headerSize) { SCOREP_USER_REGION("CheckPoint_write", SCOREP_USER_REGION_TYPE_FUNCTION); logInfo(rank()) << "Checkpoint backend: Writing."; int file = open(dataFile(odd()), writeMode()); checkErr(file); // Write the header SCOREP_USER_REGION_DEFINE(r_write_header); SCOREP_USER_REGION_BEGIN(r_write_header, "checkpoint_write_header", SCOREP_USER_REGION_TYPE_COMMON); checkErr(sion_coll_fwrite(header, headerSize, 1, file), 1); SCOREP_USER_REGION_END(r_write_header); // Save data SCOREP_USER_REGION_DEFINE(r_write_wavefield); SCOREP_USER_REGION_BEGIN(r_write_wavefield, "checkpoint_write_wavefield", SCOREP_USER_REGION_TYPE_COMMON); checkErr(sion_coll_fwrite(dofs(), sizeof(real), numDofs(), file), numDofs()); SCOREP_USER_REGION_END(r_write_wavefield); // Finalize the checkpoint finalizeCheckpoint(file); logInfo(rank()) << "Checkpoint backend: Writing. Done."; }
void seissol::checkpoint::posix::Wavefield::write(double time, int timestepWaveField) { EPIK_TRACER("CheckPoint_write"); SCOREP_USER_REGION("CheckPoint_write", SCOREP_USER_REGION_TYPE_FUNCTION); logInfo(rank()) << "Checkpoint backend: Writing."; // Start at the beginning checkErr(lseek64(file(), 0, SEEK_SET)); // Write the header EPIK_USER_REG(r_write_header, "checkpoint_write_header"); SCOREP_USER_REGION_DEFINE(r_write_header); EPIK_USER_START(r_write_header); SCOREP_USER_REGION_BEGIN(r_write_header, "checkpoint_write_header", SCOREP_USER_REGION_TYPE_COMMON); WavefieldHeader header; header.time = time; header.timestepWaveField = timestepWaveField; writeHeader(file(), header); EPIK_USER_END(r_write_header); SCOREP_USER_REGION_END(r_write_header); // Save data EPIK_USER_REG(r_write_wavefield, "checkpoint_write_wavefield"); SCOREP_USER_REGION_DEFINE(r_write_wavefield); EPIK_USER_START(r_write_wavefield); SCOREP_USER_REGION_BEGIN(r_write_wavefield, "checkpoint_write_wavefield", SCOREP_USER_REGION_TYPE_COMMON); // Convert to char* to do pointer arithmetic const char* buffer = reinterpret_cast<const char*>(dofs()); unsigned long left = numDofs()*sizeof(real); if (alignment()) { left = (left + alignment() - 1) / alignment(); left *= alignment(); } while (left > 0) { unsigned long written = ::write(file(), buffer, left); if (written <= 0) checkErr(written, left); buffer += written; left -= written; } EPIK_USER_END(r_write_wavefield); SCOREP_USER_REGION_END(r_write_wavefield); // Finalize the checkpoint finalizeCheckpoint(); logInfo(rank()) << "Checkpoint backend: Writing. Done."; }
void seissol::checkpoint::posix::Wavefield::write(double time, int timestepWaveField) { EPIK_TRACER("CheckPoint_write"); SCOREP_USER_REGION("CheckPoint_write", SCOREP_USER_REGION_TYPE_FUNCTION); logInfo(rank()) << "Writing check point."; // Skip identifier checkErr(lseek64(file(), sizeof(unsigned long), SEEK_SET)); // Write the header EPIK_USER_REG(r_write_header, "checkpoint_write_header"); SCOREP_USER_REGION_DEFINE(r_write_header); EPIK_USER_START(r_write_header); SCOREP_USER_REGION_BEGIN(r_write_header, "checkpoint_write_header", SCOREP_USER_REGION_TYPE_COMMON); checkErr(::write(file(), &time, sizeof(time)), sizeof(time)); checkErr(::write(file(), ×tepWaveField, sizeof(timestepWaveField)), sizeof(timestepWaveField)); EPIK_USER_END(r_write_header); SCOREP_USER_REGION_END(r_write_header); // Save data EPIK_USER_REG(r_write_wavefield, "checkpoint_write_wavefield"); SCOREP_USER_REGION_DEFINE(r_write_wavefield); EPIK_USER_START(r_write_wavefield); SCOREP_USER_REGION_BEGIN(r_write_wavefield, "checkpoint_write_wavefield", SCOREP_USER_REGION_TYPE_COMMON); // Convert to char* to do pointer arithmetic const char* buffer = reinterpret_cast<const char*>(dofs()); unsigned long left = numDofs()*sizeof(real); while (left > 0) { unsigned long written = ::write(file(), buffer, left); if (written <= 0) checkErr(written, left); buffer += written; left -= written; } EPIK_USER_END(r_write_wavefield); SCOREP_USER_REGION_END(r_write_wavefield); // Finalize the checkpoint finalizeCheckpoint(); logInfo(rank()) << "Writing check point. Done."; }
void seissol::checkpoint::mpio::WavefieldAsync::writePrepare(double time, int timestepWaveField) { EPIK_TRACER("CheckPoint_writePrepare"); SCOREP_USER_REGION("CheckPoint_writePrepare", SCOREP_USER_REGION_TYPE_FUNCTION); // Write the header writeHeader(time, timestepWaveField); // Create copy of the dofs memcpy(m_dofsCopy, dofs(), numDofs()*sizeof(real)); // Save data EPIK_USER_REG(r_write_wavefield, "checkpoint_write_begin_wavefield"); SCOREP_USER_REGION_DEFINE(r_write_wavefield); EPIK_USER_START(r_write_wavefield); SCOREP_USER_REGION_BEGIN(r_write_wavefield, "checkpoint_write_begin_wavefield", SCOREP_USER_REGION_TYPE_COMMON); checkMPIErr(setDataView(file())); checkMPIErr(MPI_File_write_all_begin(file(), m_dofsCopy, numDofs(), MPI_DOUBLE)); EPIK_USER_END(r_write_wavefield); SCOREP_USER_REGION_END(r_write_wavefield); m_started = true; logInfo(rank()) << "Checkpoint backend: Writing. Done."; }
void seissol::checkpoint::mpio::FaultAsync::writePrepare(int timestepFault) { EPIK_TRACER("CheckPointFault_writePrepare"); SCOREP_USER_REGION("CheckPointFault_writePrepare", SCOREP_USER_REGION_TYPE_FUNCTION); if (numSides() == 0) return; // Write the header writeHeader(timestepFault); // Create copy of the data for (unsigned int i = 0; i < NUM_VARIABLES; i++) memcpy(&m_dataCopy[i*numSides()*numBndGP()], data(i), numSides()*numBndGP()*sizeof(double)); // Save data EPIK_USER_REG(r_write_wavefield, "checkpoint_write_begin_fault"); SCOREP_USER_REGION_DEFINE(r_write_fault); EPIK_USER_START(r_write_wavefield); SCOREP_USER_REGION_BEGIN(r_write_fault, "checkpoint_write_begin_fault", SCOREP_USER_REGION_TYPE_COMMON); checkMPIErr(setDataView(file())); checkMPIErr(MPI_File_write_all_begin(file(), m_dataCopy, numSides() * numBndGP() * NUM_VARIABLES, MPI_DOUBLE)); EPIK_USER_END(r_write_fault); SCOREP_USER_REGION_END(r_write_fault); m_started = true; logInfo(rank()) << "Writing fault check point. Done."; }
void seissol::checkpoint::mpio::Wavefield::write(double time, int timestepWaveField) { EPIK_TRACER("CheckPoint_write"); SCOREP_USER_REGION("CheckPoint_write", SCOREP_USER_REGION_TYPE_FUNCTION); logInfo(rank()) << "Writing check point."; // Write the header writeHeader(time, timestepWaveField); // Save data EPIK_USER_REG(r_write_wavefield, "checkpoint_write_wavefield"); SCOREP_USER_REGION_DEFINE(r_write_wavefield); EPIK_USER_START(r_write_wavefield); SCOREP_USER_REGION_BEGIN(r_write_wavefield, "checkpoint_write_wavefield", SCOREP_USER_REGION_TYPE_COMMON); checkMPIErr(setDataView(file())); checkMPIErr(MPI_File_write_all(file(), dofs(), numDofs(), MPI_DOUBLE, MPI_STATUS_IGNORE)); EPIK_USER_END(r_write_wavefield); SCOREP_USER_REGION_END(r_write_wavefield); // Finalize the checkpoint finalizeCheckpoint(); logInfo(rank()) << "Writing check point. Done."; }
void seissol::checkpoint::mpio::Wavefield::write(const void* header, size_t headerSize) { SCOREP_USER_REGION("CheckPoint_write", SCOREP_USER_REGION_TYPE_FUNCTION); logInfo(rank()) << "Checkpoint backend: Writing."; // Write the header writeHeader(header, headerSize); // Save data SCOREP_USER_REGION_DEFINE(r_write_wavefield); SCOREP_USER_REGION_BEGIN(r_write_wavefield, "checkpoint_write_wavefield", SCOREP_USER_REGION_TYPE_COMMON); checkMPIErr(setDataView(file())); unsigned int totalIter = totalIterations(); unsigned int iter = iterations(); unsigned int count = dofsPerIteration(); if (m_useLargeBuffer) { totalIter = (totalIter + sizeof(real) - 1) / sizeof(real); iter = (iter + sizeof(real) - 1) / sizeof(real); count *= sizeof(real); } unsigned long offset = 0; for (unsigned int i = 0; i < totalIter; i++) { if (i == iter-1) // Last iteration count = numDofs() - (iter-1) * count; checkMPIErr(MPI_File_write_all(file(), const_cast<real*>(&dofs()[offset]), count, MPI_DOUBLE, MPI_STATUS_IGNORE)); if (i < iter-1) offset += count; // otherwise we just continue writing the last chunk over and over else if (i != totalIter-1) checkMPIErr(MPI_File_seek(file(), -count * sizeof(real), MPI_SEEK_CUR)); } SCOREP_USER_REGION_END(r_write_wavefield); // Finalize the checkpoint finalizeCheckpoint(); logInfo(rank()) << "Checkpoint backend: Writing. Done."; }
void seissol::checkpoint::h5::Fault::write(int timestepFault) { EPIK_TRACER("CheckPointFault_write"); SCOREP_USER_REGION("CheckPointFault_write", SCOREP_USER_REGION_TYPE_FUNCTION); if (numSides() == 0) return; logInfo(rank()) << "Writing fault check point."; // Create array with all pointers EPIK_USER_REG(r_write_fault, "checkpoint_write_fault"); SCOREP_USER_REGION_DEFINE(r_write_fault); EPIK_USER_START(r_write_fault); SCOREP_USER_REGION_BEGIN(r_write_fault, "checkpoint_write_fault", SCOREP_USER_REGION_TYPE_COMMON); // Attributes checkH5Err(H5Awrite(m_h5timestepFault[odd()], H5T_NATIVE_INT, ×tepFault)); // Set memory and file space hsize_t fStart[2] = {fileOffset(), 0}; hsize_t count[2] = {numSides(), numBndGP()}; hid_t h5memSpace = H5Screate_simple(2, count, 0L); checkH5Err(h5memSpace); checkH5Err(H5Sselect_all(h5memSpace)); checkH5Err(H5Sselect_hyperslab(m_h5fSpaceData, H5S_SELECT_SET, fStart, 0L, count, 0L)); for (unsigned int i = 0; i < NUM_VARIABLES; i++) { checkH5Err(H5Dwrite(m_h5data[odd()][i], H5T_NATIVE_DOUBLE, h5memSpace, m_h5fSpaceData, h5XferList(), data(i))); } checkH5Err(H5Sclose(h5memSpace)); EPIK_USER_END(r_write_fault); SCOREP_USER_REGION_END(r_write_fault); // Finalize the checkpoint finalizeCheckpoint(); logInfo(rank()) << "Writing fault check point. Done."; }
int main(int argc, char *argv[]) { int my_rank, num_procs; const int max_iters = 10000; /// maximum number of iteration to perform /** Simulation parameters parsed from the input datasets */ int nintci, nintcf; /// internal cells start and end index /// external cells start and end index. The external cells are only ghost cells. /// They are accessed only through internal cells int nextci, nextcf; int **lcc; /// link cell-to-cell array - stores neighboring information int *lcc_local; /// Boundary coefficients for each volume cell (South, East, North, West, High, Low) double *bs, *be, *bn, *bw, *bl, *bh; double *bp; /// Pole coefficient double *su; /// Source values double residual_ratio; /// the ratio between the reference and the current residual double *var; /// the variation vector -> keeps the result in the end /** Additional vectors required for the computation */ double *cgup, *oc, *cnorm; /** Geometry data */ int points_count; /// total number of points that define the geometry int** points; /// coordinates of the points that define the cells - size [points_cnt][3] int* elems; /// definition of the cells using their nodes (points) - each cell has 8 points int num_elems; /** Mapping between local and remote cell indices */ int* local_global_index; /// local to global index mapping int* global_local_index; /// global to local index mapping int* local_global_index_full; /** Lists of cells requires for the communication */ int neighbors_count = 0; /// total number of neighbors to communicate with int* send_count; /// number of elements to send to each neighbor (size: neighbors_count) /// send lists for the other neighbors(cell ids which should be sent)(size:[#neighbors][#cells] int** send_list; int* recv_count; /// how many elements are in the recv lists for each neighbor int** recv_list; /// send lists for the other neighbor (see send_list) /** Metis Results */ int* epart; /// partition vector for the elements of the mesh int* npart; /// partition vector for the points (nodes) of the mesh int objval; /// resulting edgecut of total communication volume (classical distrib->zeros) MPI_Init(&argc, &argv); /// Start MPI SCOREP_USER_REGION_DEFINE(OA_Phase); MPI_Comm_rank(MPI_COMM_WORLD, &my_rank); /// Get current process id MPI_Comm_size(MPI_COMM_WORLD, &num_procs); /// get number of processes double elapsed_time, elapsed_time_max; FILE *pFile; if ( argc < 3 ) { fprintf(stderr, "Usage: ./gccg <input_file> <output_prefix> <partition_type>\n"); MPI_Abort(MPI_COMM_WORLD, -1); } char *file_in = argv[1]; char *out_prefix = argv[2]; char *part_type = (argc == 3 ? "classical" : argv[3]); char file_vtk_out[50]; char measure_out[20]; /********** START INITIALIZATION **********/ // read-in the input file elapsed_time = - MPI_Wtime(); SCOREP_USER_OA_PHASE_BEGIN(OA_Phase, "OA_Phase", SCOREP_USER_REGION_TYPE_COMMON); int init_status = initialization(file_in, part_type, &nintci, &nintcf, &nextci, &nextcf, &lcc, &bs, &be, &bn, &bw, &bl, &bh, &bp, &su, &points_count, &points, &elems, &var, &cgup, &oc, &cnorm, &local_global_index, &global_local_index, &local_global_index_full, &lcc_local, &neighbors_count, &send_count, &send_list, &recv_count, &recv_list, &epart, &npart, &objval); elapsed_time += MPI_Wtime(); MPI_Reduce(&elapsed_time, &elapsed_time_max, 1, MPI_DOUBLE, MPI_MAX, 0, MPI_COMM_WORLD); if (my_rank == 0) { sprintf(measure_out, "time--%d.txt", num_procs); pFile = fopen(measure_out, "a"); fprintf(pFile, "%s Elapsed max --initialization time-init_status = %d: %f secs\n", argv[2], init_status, elapsed_time_max); } if (init_status != 0 && init_status != 1) { fprintf(stderr, "Failed to initialize data!\n"); MPI_Abort(MPI_COMM_WORLD, my_rank); } num_elems = nintcf - nintci + 1; // test distribution /*if (my_rank == 2) { sprintf(file_vtk_out, "%s_cgup.vtk", out_prefix); test_distribution(file_in, file_vtk_out, local_global_index, num_elems, cgup); }*/ // Implement this function in test_functions.c and call it here /*if (my_rank == 2) { sprintf(file_vtk_out, "%s_commlist.vtk", out_prefix); test_communication(file_in, file_vtk_out, local_global_index, num_elems, neighbors_count, send_count, send_list, recv_count, recv_list); }*/ /********** END INITIALIZATION **********/ /********** START COMPUTATIONAL LOOP **********/ elapsed_time = - MPI_Wtime(); int total_iters = compute_solution(max_iters, nintci, nintcf, nextcf, lcc, bp, bs, bw, bl, bn, be, bh, cnorm, var, su, cgup, &residual_ratio, local_global_index, global_local_index, lcc_local, neighbors_count, send_count, send_list, recv_count, recv_list); elapsed_time += MPI_Wtime(); MPI_Reduce(&elapsed_time, &elapsed_time_max, 1, MPI_DOUBLE, MPI_MAX, 0, MPI_COMM_WORLD); if (my_rank == 0) { pFile = fopen(measure_out, "a"); fprintf(pFile, "%s Elapsed max --computation time-init_status = %d: %f secs\n", argv[2], init_status, elapsed_time_max); } /********** END COMPUTATIONAL LOOP **********/ /********** START FINALIZATION **********/ elapsed_time = - MPI_Wtime(); finalization(file_in, out_prefix, total_iters, residual_ratio, nintci, nintcf, points_count, points, elems, var, cgup, su, local_global_index_full, init_status); elapsed_time += MPI_Wtime(); MPI_Reduce(&elapsed_time, &elapsed_time_max, 1, MPI_DOUBLE, MPI_MAX, 0, MPI_COMM_WORLD); if (my_rank == 0) { pFile = fopen(measure_out, "a"); fprintf(pFile, "%s Elapsed max --finalization time-init_status = %d: %f secs\n", argv[2], init_status, elapsed_time_max); } SCOREP_USER_OA_PHASE_END(OA_Phase); /********** END FINALIZATION **********/ free(var); free(cgup); free(su); free(bp); free(bh); free(bl); free(bw); free(bn); free(be); free(bs); free(lcc_local); if (my_rank == 0) { free(cnorm); free(oc); free(elems); free(local_global_index); int i; for (i = 0; i < nintcf + 1; i++) { free(lcc[i]); } free(lcc); for (i = 0; i < points_count; i++) { free(points[i]); } free(points); } MPI_Finalize(); /// Cleanup MPI return 0; }
int compute_solution(int nprocs, int myrank, const int max_iters, int nintci, int nintcf, int nextcf, int** lcc, double* bp, double* bs, double* bw, double* bl, double* bn, double* be, double* bh, double* cnorm, double* var, double *su, double* cgup, double* residual_ratio, int* local_global_index, int* global_local_index, int nghb_cnt, int* nghb_to_rank, int* send_cnt, int** send_lst, int *recv_cnt, int** recv_lst){ // Add SCOREP manual instrumentation #ifdef SCOREP SCOREP_USER_REGION_DEFINE(handle1); SCOREP_USER_REGION_DEFINE(handle2); SCOREP_USER_REGION_DEFINE(handle3); SCOREP_USER_REGION_DEFINE(handle4); SCOREP_USER_REGION_DEFINE(handle5); SCOREP_USER_REGION_DEFINE(handle6); SCOREP_USER_REGION_DEFINE(handle7); SCOREP_USER_REGION_DEFINE(handle8); SCOREP_USER_REGION_DEFINE(handle9); SCOREP_USER_REGION_DEFINE(handle10); SCOREP_USER_REGION_DEFINE(handle_break); #endif #ifdef SCOREP SCOREP_USER_REGION_BEGIN( handle1, "handle1 - Initialization of variables and reference residuals.",SCOREP_USER_REGION_TYPE_COMMON ); #endif /** parameters used in gccg */ int iter = 1; int if1 = 0; int if2 = 0; int nor = 1; int nor1 = nor - 1; int nc = 0; int nomax = 3; /** the reference residual */ double resref = 0.0; /** array storing residuals */ double *resvec = (double *) calloc(sizeof(double), (nintcf + 1)); // initialize the reference residual for ( nc = nintci; nc <= nintcf; nc++ ) { resvec[nc] = su[nc]; resref = resref + resvec[nc] * resvec[nc]; } #ifdef SCOREP SCOREP_USER_REGION_END( handle1 ); SCOREP_USER_REGION_BEGIN( handle2, "handle2 - 1st Allreduce.",SCOREP_USER_REGION_TYPE_COMMON ); #endif // A2.3 double global_resref = 0; MPI_Allreduce(&resref, &global_resref, 1, MPI_DOUBLE, MPI_SUM, MPI_COMM_WORLD); resref = global_resref; #ifdef SCOREP SCOREP_USER_REGION_END( handle2 ); SCOREP_USER_REGION_BEGIN( handle3, "handle3 - Calculation of the residue sum.",SCOREP_USER_REGION_TYPE_COMMON ); #endif resref = sqrt(resref); if ( resref < 1.0e-15 ) { fprintf(stderr, "Residue sum less than 1.e-15 - %lf\n", resref); return 0; } #ifdef SCOREP SCOREP_USER_REGION_END( handle3 ); SCOREP_USER_REGION_BEGIN( handle4, "handle4 - Memory allocation.",SCOREP_USER_REGION_TYPE_COMMON ); #endif // Counting the number of ghost cells to extend the direc1 int ghost_cells_recv = 0, ghost_cells_send = 0; int proc, i, j; for (proc = 0; proc < nghb_cnt; proc++) { ghost_cells_recv += recv_cnt[proc]; ghost_cells_send += send_cnt[proc]; } /** the computation vectors */ // TODO: double *direc1 = (double *) calloc(sizeof(double), ((nextcf + 1) + ghost_cells_recv)); double *direc2 = (double *) calloc(sizeof(double), (nextcf + 1)); double *adxor1 = (double *) calloc(sizeof(double), (nintcf + 1)); double *adxor2 = (double *) calloc(sizeof(double), (nintcf + 1)); double *dxor1 = (double *) calloc(sizeof(double), (nintcf + 1)); double *dxor2 = (double *) calloc(sizeof(double), (nintcf + 1)); // Determine displacements for sending int **displacements = (int **) malloc(sizeof(double)*nghb_cnt); int **blocklenghts = (int **) malloc(sizeof(double)*nghb_cnt); for(proc = 0; proc < nghb_cnt; proc++) { displacements[proc] = (int*)calloc(send_cnt[proc],sizeof(int)); blocklenghts[proc] = (int*)calloc(send_cnt[proc],sizeof(int)); } j = 0; for (proc = 0; proc < nghb_cnt; proc++) { for (i = 0; i < send_cnt[proc]; i++) { displacements[proc][i] = global_local_index[send_lst[proc][i]]; blocklenghts[proc][i] = 1; } } MPI_Request request; MPI_Datatype *indextype; indextype = (MPI_Datatype *) malloc(sizeof(*indextype)*nghb_cnt); for (proc = 0; proc < nghb_cnt; proc++) { MPI_Type_indexed(send_cnt[proc], blocklenghts[proc], displacements[proc], MPI_DOUBLE, &(indextype[proc])); MPI_Type_commit(&(indextype[proc])); } #ifdef SCOREP SCOREP_USER_REGION_END( handle4 ); SCOREP_USER_REGION_BEGIN( handle5, "handle5 - Computation phase1. direc1 update.",SCOREP_USER_REGION_TYPE_COMMON ); #endif while ( iter < max_iters ) { /********** START COMP PHASE 1 **********/ // update the old values of direc for ( nc = nintci; nc <= nintcf; nc++ ) { direc1[nc] = direc1[nc] + resvec[nc] * cgup[nc]; } #ifdef SCOREP SCOREP_USER_REGION_END( handle5 ); SCOREP_USER_REGION_BEGIN( handle6, "handle6 - Computation phase1. direc1 communication",SCOREP_USER_REGION_TYPE_COMMON ); #endif // Communication of direc1 - start for (proc = 0; proc < nghb_cnt; proc++) { MPI_Isend(direc1, 1, indextype[proc], nghb_to_rank[proc], 0, MPI_COMM_WORLD, &request); } // Reference position in the direc1 int ref_pos = nextcf + 1; for (proc = 0; proc < nghb_cnt; proc++) { MPI_Recv(&(direc1[ref_pos]), recv_cnt[proc], MPI_DOUBLE, nghb_to_rank[proc], 0, MPI_COMM_WORLD, MPI_STATUS_IGNORE); ref_pos += recv_cnt[proc]; } // Communication of direc1 - stop #ifdef SCOREP SCOREP_USER_REGION_END( handle6 ); SCOREP_USER_REGION_BEGIN( handle7, "handle7 - Computation phase1. direc2 computation",SCOREP_USER_REGION_TYPE_COMMON ); #endif // compute new guess (approximation) for direc for ( nc = nintci; nc <= nintcf; nc++ ) { direc2[nc] = bp[nc] * direc1[nc] - bs[nc] * direc1[global_local_index[lcc[nc][0]]] - be[nc] * direc1[global_local_index[lcc[nc][1]]] - bn[nc] * direc1[global_local_index[lcc[nc][2]]] - bw[nc] * direc1[global_local_index[lcc[nc][3]]] - bl[nc] * direc1[global_local_index[lcc[nc][4]]] - bh[nc] * direc1[global_local_index[lcc[nc][5]]]; } /********** END COMP PHASE 1 **********/ #ifdef SCOREP SCOREP_USER_REGION_END( handle7 ); SCOREP_USER_REGION_BEGIN( handle8, "handle8 - Computation phase2. occ computation",SCOREP_USER_REGION_TYPE_COMMON ); #endif /********** START COMP PHASE 2 **********/ // execute normalization steps double oc1, oc2, occ; if ( nor1 == 1 ) { oc1 = 0; occ = 0; for ( nc = nintci; nc <= nintcf; nc++ ) { occ = occ + direc2[nc] * adxor1[nc]; } // A2.3 double global_occ = 0.0; MPI_Allreduce(&occ, &global_occ, 1, MPI_DOUBLE, MPI_SUM, MPI_COMM_WORLD); occ = global_occ; oc1 = occ / cnorm[1]; for ( nc = nintci; nc <= nintcf; nc++ ) { direc2[nc] = direc2[nc] - oc1 * adxor1[nc]; direc1[nc] = direc1[nc] - oc1 * dxor1[nc]; } if1++; } else { if ( nor1 == 2 ) { oc1 = 0; occ = 0; for ( nc = nintci; nc <= nintcf; nc++ ) { occ = occ + direc2[nc] * adxor1[nc]; } // A2.3 double global_occ = 0.0; MPI_Allreduce(&occ, &global_occ, 1, MPI_DOUBLE, MPI_SUM, MPI_COMM_WORLD); occ = global_occ; oc1 = occ / cnorm[1]; oc2 = 0; occ = 0; for ( nc = nintci; nc <= nintcf; nc++ ) { occ = occ + direc2[nc] * adxor2[nc]; } // A2.3 MPI_Allreduce(&occ, &global_occ, 1, MPI_DOUBLE, MPI_SUM, MPI_COMM_WORLD); occ = global_occ; oc2 = occ / cnorm[2]; for ( nc = nintci; nc <= nintcf; nc++ ) { direc1[nc] = direc1[nc] - oc1 * dxor1[nc] - oc2 * dxor2[nc]; direc2[nc] = direc2[nc] - oc1 * adxor1[nc] - oc2 * adxor2[nc]; } if2++; } } #ifdef SCOREP SCOREP_USER_REGION_END( handle8 ); SCOREP_USER_REGION_BEGIN( handle9, "handle9 - Computation phase2. residual_ratio computation - before break",SCOREP_USER_REGION_TYPE_COMMON ); #endif // compute the new residual cnorm[nor] = 0; double omega = 0; for ( nc = nintci; nc <= nintcf; nc++ ) { cnorm[nor] = cnorm[nor] + direc2[nc] * direc2[nc]; omega = omega + resvec[nc] * direc2[nc]; } // A2.3 double global_cnorm_nor = 0.0, global_omega = 0.0; MPI_Allreduce(&(cnorm[nor]), &global_cnorm_nor, 1, MPI_DOUBLE, MPI_SUM, MPI_COMM_WORLD); MPI_Allreduce(&omega, &global_omega, 1, MPI_DOUBLE, MPI_SUM, MPI_COMM_WORLD); cnorm[nor] = global_cnorm_nor; omega = global_omega; omega = omega / cnorm[nor]; double res_updated = 0.0; for ( nc = nintci; nc <= nintcf; nc++ ) { resvec[nc] = resvec[nc] - omega * direc2[nc]; res_updated = res_updated + resvec[nc] * resvec[nc]; var[nc] = var[nc] + omega * direc1[nc]; } // A2.3 double global_res_updated = 0.0; MPI_Allreduce(&res_updated, &global_res_updated, 1, MPI_DOUBLE, MPI_SUM, MPI_COMM_WORLD); res_updated = global_res_updated; res_updated = sqrt(res_updated); *residual_ratio = res_updated / resref; #ifdef SCOREP SCOREP_USER_REGION_END( handle9 ); #endif // exit on no improvements of residual if ( *residual_ratio <= 1.0e-10 ) break; #ifdef SCOREP SCOREP_USER_REGION_BEGIN( handle_break, "handle9 - Computation phase2. residual_ratio computation - after break",SCOREP_USER_REGION_TYPE_COMMON ); #endif iter++; // prepare additional arrays for the next iteration step if ( nor == nomax ) { nor = 1; } else { if ( nor == 1 ) { for ( nc = nintci; nc <= nintcf; nc++ ) { dxor1[nc] = direc1[nc]; adxor1[nc] = direc2[nc]; } } else { if ( nor == 2 ) { for ( nc = nintci; nc <= nintcf; nc++ ) { dxor2[nc] = direc1[nc]; adxor2[nc] = direc2[nc]; } } } nor++; } nor1 = nor - 1; /********** END COMP PHASE 2 **********/ } #ifdef SCOREP SCOREP_USER_REGION_END( handle_break ); SCOREP_USER_REGION_BEGIN( handle10, "handle10 - Memory freeing",SCOREP_USER_REGION_TYPE_COMMON ); #endif for (i = 0; i < nghb_cnt; i++){ free(displacements[i]); } free(displacements); free(indextype); free(direc1); free(direc2); free(adxor1); free(adxor2); free(dxor1); free(dxor2); free(resvec); return iter; #ifdef SCOREP SCOREP_USER_REGION_END( handle10 ); #endif }
void seissol::checkpoint::h5::Wavefield::write(double time, int waveFieldTimeStep) { EPIK_TRACER("CheckPoint_write"); SCOREP_USER_REGION("CheckPoint_write", SCOREP_USER_REGION_TYPE_FUNCTION); logInfo(rank()) << "Writing check point."; EPIK_USER_REG(r_header, "checkpoint_write_header"); SCOREP_USER_REGION_DEFINE(r_header); EPIK_USER_START(r_header); SCOREP_USER_REGION_BEGIN(r_header, "checkpoint_write_header", SCOREP_USER_REGION_TYPE_COMMON); // Time checkH5Err(H5Awrite(m_h5time[odd()], H5T_NATIVE_DOUBLE, &time)); // Wavefield writer checkH5Err(H5Awrite(m_h5timestepWavefield[odd()], H5T_NATIVE_INT, &waveFieldTimeStep)); EPIK_USER_END(r_header); SCOREP_USER_REGION_END(r_header); // Save data EPIK_USER_REG(r_write_wavefield, "checkpoint_write_wavefield"); SCOREP_USER_REGION_DEFINE(r_write_wavefield); EPIK_USER_START(r_write_wavefield); SCOREP_USER_REGION_BEGIN(r_write_wavefield, "checkpoint_write_wavefield", SCOREP_USER_REGION_TYPE_COMMON); // Write the wave field unsigned int offset = 0; hsize_t fStart = fileOffset(); hsize_t count = dofsPerIteration(); hid_t h5memSpace = H5Screate_simple(1, &count, 0L); checkH5Err(h5memSpace); checkH5Err(H5Sselect_all(h5memSpace)); for (unsigned int i = 0; i < totalIterations()-1; i++) { checkH5Err(H5Sselect_hyperslab(m_h5fSpaceData, H5S_SELECT_SET, &fStart, 0L, &count, 0L)); checkH5Err(H5Dwrite(m_h5data[odd()], H5T_NATIVE_DOUBLE, h5memSpace, m_h5fSpaceData, h5XferList(), &const_cast<real*>(dofs())[offset])); // We are finished in less iterations, read data twice // so everybody needs the same number of iterations if (i < iterations()-1) { fStart += count; offset += count; } } checkH5Err(H5Sclose(h5memSpace)); // Save reminding data in the last iteration count = numDofs() - (iterations() - 1) * count; h5memSpace = H5Screate_simple(1, &count, 0L); checkH5Err(h5memSpace); checkH5Err(H5Sselect_all(h5memSpace)); checkH5Err(H5Sselect_hyperslab(m_h5fSpaceData, H5S_SELECT_SET, &fStart, 0L, &count, 0L)); checkH5Err(H5Dwrite(m_h5data[odd()], H5T_NATIVE_DOUBLE, h5memSpace, m_h5fSpaceData, h5XferList(), &dofs()[offset])); checkH5Err(H5Sclose(h5memSpace)); EPIK_USER_END(r_write_wavefield); SCOREP_USER_REGION_END(r_write_wavefield); // Finalize the checkpoint finalizeCheckpoint(); logInfo(rank()) << "Writing check point. Done."; }