示例#1
0
文件: gccg.c 项目: mario1701/PoS-A2
int main(int argc, char *argv[]) {
  
    #ifdef SCOREP
    SCOREP_USER_REGION_DEFINE(handle_initialization);
    SCOREP_USER_REGION_DEFINE(handle_computation);
    SCOREP_USER_REGION_DEFINE(handle_finalization);
    #endif
  
    #ifdef SCOREP
    SCOREP_USER_REGION_BEGIN( handle_initialization, "INITIALIZATION",SCOREP_USER_REGION_TYPE_COMMON );
    #endif
    
    int my_rank, num_procs, i;

    const int max_iters = 10000;    /// maximum number of iteration to perform

    /** Simulation parameters parsed from the input datasets */
    int nintci, nintcf;    /// internal cells start and end index
    /// external cells start and end index. The external cells are only ghost cells.
    /// They are accessed only through internal cells
    int nextci, nextcf;
    int **lcc;    /// link cell-to-cell array - stores neighboring information
    /// Boundary coefficients for each volume cell (South, East, North, West, High, Low)
    double *bs, *be, *bn, *bw, *bh, *bl;
    double *bp;    /// Pole coefficient
    double *su;    /// Source values

    double residual_ratio;    /// the ratio between the reference and the current residual
    double *var;    /// the variation vector -> keeps the result in the end

    /** Additional vectors required for the computation */
    double *cgup, *oc, *cnorm;

    /** Geometry data */
    int points_count;    /// total number of points that define the geometry
    int** points;    /// coordinates of the points that define the cells - size [points_cnt][3]
    int* elems;    /// definition of the cells using their nodes (points) - each cell has 8 points

    /** Mapping between local and remote cell indices */
    int* local_global_index;    /// local to global index mapping
    int* global_local_index;    /// global to local index mapping
  

    /** Lists for neighbouring information */
    int nghb_cnt = 0;    /// total number of neighbors of the current process
    int *nghb_to_rank;  /// mapping of the neighbour index to the corresponding process rank
    int *send_cnt;    /// number of cells to be sent to each neighbour (size: nghb_cnt)
    int **send_lst;    /// lists of cells to be sent to each neighbour (size: nghb_cnt x send_cnt[*])
    int *recv_cnt;    /// number of cells to be received from each neighbour (size: nghb_cnt)
    int **recv_lst;    /// lists of cells to be received from each neighbour (size: nghb_cnt x recv_cnt[*])

    /* PAPI Parameters*/
    float rtime, ptime, mflops;
    long long flpops;

    void handle_error (int retval)
        {
             printf("PAPI error %d: %s\n", retval, PAPI_strerror(retval));
          exit(1);
        }
示例#2
0
void seissol::checkpoint::sionlib::Wavefield::write(const void* header, size_t headerSize)
{
	SCOREP_USER_REGION("CheckPoint_write", SCOREP_USER_REGION_TYPE_FUNCTION);

	logInfo(rank()) << "Checkpoint backend: Writing.";

	int file = open(dataFile(odd()), writeMode());
	checkErr(file);

	// Write the header
	SCOREP_USER_REGION_DEFINE(r_write_header);
	SCOREP_USER_REGION_BEGIN(r_write_header, "checkpoint_write_header", SCOREP_USER_REGION_TYPE_COMMON);

	checkErr(sion_coll_fwrite(header, headerSize, 1, file), 1);

	SCOREP_USER_REGION_END(r_write_header);

	// Save data
	SCOREP_USER_REGION_DEFINE(r_write_wavefield);
	SCOREP_USER_REGION_BEGIN(r_write_wavefield, "checkpoint_write_wavefield", SCOREP_USER_REGION_TYPE_COMMON);

	checkErr(sion_coll_fwrite(dofs(), sizeof(real), numDofs(), file), numDofs());

	SCOREP_USER_REGION_END(r_write_wavefield);

	// Finalize the checkpoint
	finalizeCheckpoint(file);

	logInfo(rank()) << "Checkpoint backend: Writing. Done.";
}
示例#3
0
void seissol::checkpoint::posix::Wavefield::write(double time, int timestepWaveField)
{
	EPIK_TRACER("CheckPoint_write");
	SCOREP_USER_REGION("CheckPoint_write", SCOREP_USER_REGION_TYPE_FUNCTION);

	logInfo(rank()) << "Checkpoint backend: Writing.";

	// Start at the beginning
	checkErr(lseek64(file(), 0, SEEK_SET));

	// Write the header
	EPIK_USER_REG(r_write_header, "checkpoint_write_header");
	SCOREP_USER_REGION_DEFINE(r_write_header);
	EPIK_USER_START(r_write_header);
	SCOREP_USER_REGION_BEGIN(r_write_header, "checkpoint_write_header", SCOREP_USER_REGION_TYPE_COMMON);

	WavefieldHeader header;
	header.time = time;
	header.timestepWaveField = timestepWaveField;
	writeHeader(file(), header);

	EPIK_USER_END(r_write_header);
	SCOREP_USER_REGION_END(r_write_header);

	// Save data
	EPIK_USER_REG(r_write_wavefield, "checkpoint_write_wavefield");
	SCOREP_USER_REGION_DEFINE(r_write_wavefield);
	EPIK_USER_START(r_write_wavefield);
	SCOREP_USER_REGION_BEGIN(r_write_wavefield, "checkpoint_write_wavefield", SCOREP_USER_REGION_TYPE_COMMON);

	// Convert to char* to do pointer arithmetic
	const char* buffer = reinterpret_cast<const char*>(dofs());
	unsigned long left = numDofs()*sizeof(real);
	if (alignment()) {
		left = (left + alignment() - 1) / alignment();
		left *= alignment();
	}

	while (left > 0) {
		unsigned long written = ::write(file(), buffer, left);
		if (written <= 0)
			checkErr(written, left);
		buffer += written;
		left -= written;
	}

	EPIK_USER_END(r_write_wavefield);
	SCOREP_USER_REGION_END(r_write_wavefield);

	// Finalize the checkpoint
	finalizeCheckpoint();

	logInfo(rank()) << "Checkpoint backend: Writing. Done.";
}
示例#4
0
void seissol::checkpoint::posix::Wavefield::write(double time, int timestepWaveField)
{
	EPIK_TRACER("CheckPoint_write");
	SCOREP_USER_REGION("CheckPoint_write", SCOREP_USER_REGION_TYPE_FUNCTION);

	logInfo(rank()) << "Writing check point.";

	// Skip identifier
	checkErr(lseek64(file(), sizeof(unsigned long), SEEK_SET));

	// Write the header
	EPIK_USER_REG(r_write_header, "checkpoint_write_header");
	SCOREP_USER_REGION_DEFINE(r_write_header);
	EPIK_USER_START(r_write_header);
	SCOREP_USER_REGION_BEGIN(r_write_header, "checkpoint_write_header", SCOREP_USER_REGION_TYPE_COMMON);

	checkErr(::write(file(), &time, sizeof(time)), sizeof(time));
	checkErr(::write(file(), &timestepWaveField, sizeof(timestepWaveField)),
			sizeof(timestepWaveField));

	EPIK_USER_END(r_write_header);
	SCOREP_USER_REGION_END(r_write_header);

	// Save data
	EPIK_USER_REG(r_write_wavefield, "checkpoint_write_wavefield");
	SCOREP_USER_REGION_DEFINE(r_write_wavefield);
	EPIK_USER_START(r_write_wavefield);
	SCOREP_USER_REGION_BEGIN(r_write_wavefield, "checkpoint_write_wavefield", SCOREP_USER_REGION_TYPE_COMMON);

	// Convert to char* to do pointer arithmetic
	const char* buffer = reinterpret_cast<const char*>(dofs());
	unsigned long left = numDofs()*sizeof(real);
	while (left > 0) {
		unsigned long written = ::write(file(), buffer, left);
		if (written <= 0)
			checkErr(written, left);
		buffer += written;
		left -= written;
	}

	EPIK_USER_END(r_write_wavefield);
	SCOREP_USER_REGION_END(r_write_wavefield);

	// Finalize the checkpoint
	finalizeCheckpoint();

	logInfo(rank()) << "Writing check point. Done.";
}
示例#5
0
void seissol::checkpoint::mpio::WavefieldAsync::writePrepare(double time, int timestepWaveField)
{
	EPIK_TRACER("CheckPoint_writePrepare");
	SCOREP_USER_REGION("CheckPoint_writePrepare", SCOREP_USER_REGION_TYPE_FUNCTION);

	// Write the header
	writeHeader(time, timestepWaveField);

	// Create copy of the dofs
	memcpy(m_dofsCopy, dofs(), numDofs()*sizeof(real));

	// Save data
	EPIK_USER_REG(r_write_wavefield, "checkpoint_write_begin_wavefield");
	SCOREP_USER_REGION_DEFINE(r_write_wavefield);
	EPIK_USER_START(r_write_wavefield);
	SCOREP_USER_REGION_BEGIN(r_write_wavefield, "checkpoint_write_begin_wavefield", SCOREP_USER_REGION_TYPE_COMMON);

	checkMPIErr(setDataView(file()));

	checkMPIErr(MPI_File_write_all_begin(file(), m_dofsCopy, numDofs(), MPI_DOUBLE));

	EPIK_USER_END(r_write_wavefield);
	SCOREP_USER_REGION_END(r_write_wavefield);

	m_started = true;

	logInfo(rank()) << "Checkpoint backend: Writing. Done.";
}
示例#6
0
void seissol::checkpoint::mpio::FaultAsync::writePrepare(int timestepFault)
{
	EPIK_TRACER("CheckPointFault_writePrepare");
	SCOREP_USER_REGION("CheckPointFault_writePrepare", SCOREP_USER_REGION_TYPE_FUNCTION);

	if (numSides() == 0)
		return;

	// Write the header
	writeHeader(timestepFault);

	// Create copy of the data
	for (unsigned int i = 0; i < NUM_VARIABLES; i++)
		memcpy(&m_dataCopy[i*numSides()*numBndGP()],
				data(i), numSides()*numBndGP()*sizeof(double));

	// Save data
	EPIK_USER_REG(r_write_wavefield, "checkpoint_write_begin_fault");
	SCOREP_USER_REGION_DEFINE(r_write_fault);
	EPIK_USER_START(r_write_wavefield);
	SCOREP_USER_REGION_BEGIN(r_write_fault, "checkpoint_write_begin_fault", SCOREP_USER_REGION_TYPE_COMMON);

	checkMPIErr(setDataView(file()));
	checkMPIErr(MPI_File_write_all_begin(file(), m_dataCopy, numSides() * numBndGP() * NUM_VARIABLES, MPI_DOUBLE));

	EPIK_USER_END(r_write_fault);
	SCOREP_USER_REGION_END(r_write_fault);

	m_started = true;

	logInfo(rank()) << "Writing fault check point. Done.";
}
示例#7
0
void seissol::checkpoint::mpio::Wavefield::write(double time, int timestepWaveField)
{
	EPIK_TRACER("CheckPoint_write");
	SCOREP_USER_REGION("CheckPoint_write", SCOREP_USER_REGION_TYPE_FUNCTION);

	logInfo(rank()) << "Writing check point.";

	// Write the header
	writeHeader(time, timestepWaveField);

	// Save data
	EPIK_USER_REG(r_write_wavefield, "checkpoint_write_wavefield");
	SCOREP_USER_REGION_DEFINE(r_write_wavefield);
	EPIK_USER_START(r_write_wavefield);
	SCOREP_USER_REGION_BEGIN(r_write_wavefield, "checkpoint_write_wavefield", SCOREP_USER_REGION_TYPE_COMMON);

	checkMPIErr(setDataView(file()));

	checkMPIErr(MPI_File_write_all(file(), dofs(), numDofs(), MPI_DOUBLE, MPI_STATUS_IGNORE));

	EPIK_USER_END(r_write_wavefield);
	SCOREP_USER_REGION_END(r_write_wavefield);

	// Finalize the checkpoint
	finalizeCheckpoint();

	logInfo(rank()) << "Writing check point. Done.";
}
示例#8
0
void seissol::checkpoint::mpio::Wavefield::write(const void* header, size_t headerSize)
{
	SCOREP_USER_REGION("CheckPoint_write", SCOREP_USER_REGION_TYPE_FUNCTION);

	logInfo(rank()) << "Checkpoint backend: Writing.";

	// Write the header
	writeHeader(header, headerSize);

	// Save data
	SCOREP_USER_REGION_DEFINE(r_write_wavefield);
	SCOREP_USER_REGION_BEGIN(r_write_wavefield, "checkpoint_write_wavefield", SCOREP_USER_REGION_TYPE_COMMON);
	checkMPIErr(setDataView(file()));

	unsigned int totalIter = totalIterations();
	unsigned int iter = iterations();
	unsigned int count = dofsPerIteration();
	if (m_useLargeBuffer) {
		totalIter = (totalIter + sizeof(real) - 1) / sizeof(real);
		iter = (iter + sizeof(real) - 1) / sizeof(real);
		count *= sizeof(real);
	}
	unsigned long offset = 0;
	for (unsigned int i = 0; i < totalIter; i++) {
		if (i == iter-1)
			// Last iteration
			count = numDofs() - (iter-1) * count;

		checkMPIErr(MPI_File_write_all(file(), const_cast<real*>(&dofs()[offset]), count, MPI_DOUBLE, MPI_STATUS_IGNORE));

		if (i < iter-1)
			offset += count;
		// otherwise we just continue writing the last chunk over and over
		else if (i != totalIter-1)
			checkMPIErr(MPI_File_seek(file(), -count * sizeof(real), MPI_SEEK_CUR));
	}

	SCOREP_USER_REGION_END(r_write_wavefield);

	// Finalize the checkpoint
	finalizeCheckpoint();

	logInfo(rank()) << "Checkpoint backend: Writing. Done.";
}
示例#9
0
void seissol::checkpoint::h5::Fault::write(int timestepFault)
{
	EPIK_TRACER("CheckPointFault_write");
	SCOREP_USER_REGION("CheckPointFault_write", SCOREP_USER_REGION_TYPE_FUNCTION);

	if (numSides() == 0)
		return;

	logInfo(rank()) << "Writing fault check point.";

	// Create array with all pointers
	EPIK_USER_REG(r_write_fault, "checkpoint_write_fault");
	SCOREP_USER_REGION_DEFINE(r_write_fault);
	EPIK_USER_START(r_write_fault);
	SCOREP_USER_REGION_BEGIN(r_write_fault, "checkpoint_write_fault", SCOREP_USER_REGION_TYPE_COMMON);

	// Attributes
	checkH5Err(H5Awrite(m_h5timestepFault[odd()], H5T_NATIVE_INT, &timestepFault));

	// Set memory and file space
	hsize_t fStart[2] = {fileOffset(), 0};
	hsize_t count[2] = {numSides(), numBndGP()};
	hid_t h5memSpace = H5Screate_simple(2, count, 0L);
	checkH5Err(h5memSpace);
	checkH5Err(H5Sselect_all(h5memSpace));
	checkH5Err(H5Sselect_hyperslab(m_h5fSpaceData, H5S_SELECT_SET, fStart, 0L, count, 0L));

	for (unsigned int i = 0; i < NUM_VARIABLES; i++) {
		checkH5Err(H5Dwrite(m_h5data[odd()][i], H5T_NATIVE_DOUBLE, h5memSpace, m_h5fSpaceData,
				h5XferList(), data(i)));
	}

	checkH5Err(H5Sclose(h5memSpace));

	EPIK_USER_END(r_write_fault);
	SCOREP_USER_REGION_END(r_write_fault);

	// Finalize the checkpoint
	finalizeCheckpoint();

	logInfo(rank()) << "Writing fault check point. Done.";
}
示例#10
0
int main(int argc, char *argv[]) {
    int my_rank, num_procs;

    const int max_iters = 10000;    /// maximum number of iteration to perform

    /** Simulation parameters parsed from the input datasets */
    int nintci, nintcf;    /// internal cells start and end index
    /// external cells start and end index. The external cells are only ghost cells.
    /// They are accessed only through internal cells
    int nextci, nextcf;
    int **lcc;    /// link cell-to-cell array - stores neighboring information
    int *lcc_local;
    /// Boundary coefficients for each volume cell (South, East, North, West, High, Low)
    double *bs, *be, *bn, *bw, *bl, *bh;
    double *bp;    /// Pole coefficient
    double *su;    /// Source values

    double residual_ratio;    /// the ratio between the reference and the current residual
    double *var;    /// the variation vector -> keeps the result in the end

    /** Additional vectors required for the computation */
    double *cgup, *oc, *cnorm;

    /** Geometry data */
    int points_count;    /// total number of points that define the geometry
    int** points;    /// coordinates of the points that define the cells - size [points_cnt][3]
    int* elems;    /// definition of the cells using their nodes (points) - each cell has 8 points
    int num_elems;

    /** Mapping between local and remote cell indices */
    int* local_global_index;    /// local to global index mapping
    int* global_local_index;    /// global to local index mapping
    int* local_global_index_full;

    /** Lists of cells requires for the communication */
    int neighbors_count = 0;    /// total number of neighbors to communicate with
    int* send_count;    /// number of elements to send to each neighbor (size: neighbors_count)
    /// send lists for the other neighbors(cell ids which should be sent)(size:[#neighbors][#cells]
    int** send_list;
    int* recv_count;    /// how many elements are in the recv lists for each neighbor
    int** recv_list;    /// send lists for the other neighbor (see send_list)

    /** Metis Results */
    int* epart;     /// partition vector for the elements of the mesh
    int* npart;     /// partition vector for the points (nodes) of the mesh
    int objval;    /// resulting edgecut of total communication volume (classical distrib->zeros)

    MPI_Init(&argc, &argv);    /// Start MPI
    SCOREP_USER_REGION_DEFINE(OA_Phase);
    MPI_Comm_rank(MPI_COMM_WORLD, &my_rank);    /// Get current process id
    MPI_Comm_size(MPI_COMM_WORLD, &num_procs);    /// get number of processes
    double elapsed_time, elapsed_time_max;
    FILE *pFile;
    if ( argc < 3 ) {
        fprintf(stderr, "Usage: ./gccg <input_file> <output_prefix> <partition_type>\n");
        MPI_Abort(MPI_COMM_WORLD, -1);
    }

    char *file_in = argv[1];
    char *out_prefix = argv[2];
    char *part_type = (argc == 3 ? "classical" : argv[3]);
    char file_vtk_out[50];
    char measure_out[20];
    /********** START INITIALIZATION **********/
    // read-in the input file
    elapsed_time = - MPI_Wtime();
    SCOREP_USER_OA_PHASE_BEGIN(OA_Phase, "OA_Phase", SCOREP_USER_REGION_TYPE_COMMON);
    int init_status = initialization(file_in, part_type, &nintci, &nintcf, &nextci, &nextcf, &lcc,
                                     &bs, &be, &bn, &bw, &bl, &bh, &bp, &su, &points_count, &points,
                                     &elems, &var, &cgup, &oc, &cnorm, &local_global_index,
                                     &global_local_index, &local_global_index_full, &lcc_local,
                                     &neighbors_count, &send_count, &send_list,
                                     &recv_count, &recv_list, &epart, &npart, &objval);
    elapsed_time += MPI_Wtime();
    MPI_Reduce(&elapsed_time, &elapsed_time_max, 1, MPI_DOUBLE, MPI_MAX, 0, MPI_COMM_WORLD);
    if (my_rank == 0) {
      sprintf(measure_out, "time--%d.txt", num_procs);
      pFile = fopen(measure_out, "a");
      fprintf(pFile, "%s Elapsed max --initialization time-init_status = %d: %f secs\n",
              argv[2], init_status, elapsed_time_max);
    }
    if (init_status != 0 && init_status != 1) {
        fprintf(stderr, "Failed to initialize data!\n");
        MPI_Abort(MPI_COMM_WORLD, my_rank);
    }
    num_elems = nintcf - nintci + 1;
    // test distribution
    /*if (my_rank == 2) {
        sprintf(file_vtk_out, "%s_cgup.vtk", out_prefix);
        test_distribution(file_in, file_vtk_out, local_global_index,
        num_elems, cgup);
    }*/
    // Implement this function in test_functions.c and call it here
    /*if (my_rank == 2) {
      sprintf(file_vtk_out, "%s_commlist.vtk", out_prefix);
      test_communication(file_in, file_vtk_out, local_global_index, num_elems,
       neighbors_count, send_count, send_list, recv_count, recv_list);
    }*/
    /********** END INITIALIZATION **********/

    /********** START COMPUTATIONAL LOOP **********/
    elapsed_time = - MPI_Wtime();
    int total_iters = compute_solution(max_iters, nintci, nintcf, nextcf, lcc, bp, bs, bw, bl, bn,
                                       be, bh, cnorm, var, su, cgup, &residual_ratio,
                                       local_global_index, global_local_index,
                                       lcc_local, neighbors_count,
                                       send_count, send_list, recv_count, recv_list);
    elapsed_time += MPI_Wtime();
    MPI_Reduce(&elapsed_time, &elapsed_time_max, 1, MPI_DOUBLE, MPI_MAX, 0, MPI_COMM_WORLD);
    if (my_rank == 0) {
      pFile = fopen(measure_out, "a");
      fprintf(pFile, "%s Elapsed max --computation time-init_status = %d: %f secs\n",
              argv[2], init_status, elapsed_time_max);
    }
    /********** END COMPUTATIONAL LOOP **********/

    /********** START FINALIZATION **********/
    elapsed_time = - MPI_Wtime();
    finalization(file_in, out_prefix, total_iters, residual_ratio, nintci, nintcf, points_count,
                 points, elems, var, cgup, su, local_global_index_full, init_status);
    elapsed_time += MPI_Wtime();
    MPI_Reduce(&elapsed_time, &elapsed_time_max, 1, MPI_DOUBLE, MPI_MAX, 0, MPI_COMM_WORLD);
    if (my_rank == 0) {
      pFile = fopen(measure_out, "a");
      fprintf(pFile, "%s Elapsed max --finalization time-init_status = %d: %f secs\n",
              argv[2], init_status, elapsed_time_max);
    }
    SCOREP_USER_OA_PHASE_END(OA_Phase);
    /********** END FINALIZATION **********/
    free(var);
    free(cgup);
    free(su);
    free(bp);
    free(bh);
    free(bl);
    free(bw);
    free(bn);
    free(be);
    free(bs);
    free(lcc_local);
    if (my_rank == 0) {
    free(cnorm);
    free(oc);
    free(elems);
    free(local_global_index);
    int i;
    for (i = 0; i < nintcf + 1; i++) {
        free(lcc[i]);
    }
    free(lcc);
    for (i = 0; i < points_count; i++) {
        free(points[i]);
    }
    free(points);
    }
    MPI_Finalize();    /// Cleanup MPI
    return 0;
}
示例#11
0
int compute_solution(int nprocs, int myrank, const int max_iters, int nintci, int nintcf, int nextcf, int** lcc, double* bp,
                     double* bs, double* bw, double* bl, double* bn, double* be, double* bh,
                     double* cnorm, double* var, double *su, double* cgup, double* residual_ratio,
                     int* local_global_index, int* global_local_index, int nghb_cnt, 
                     int* nghb_to_rank, int* send_cnt, int** send_lst, int *recv_cnt, int** recv_lst){
  
  // Add SCOREP manual instrumentation
  #ifdef SCOREP
  SCOREP_USER_REGION_DEFINE(handle1);
  SCOREP_USER_REGION_DEFINE(handle2);
  SCOREP_USER_REGION_DEFINE(handle3);
  SCOREP_USER_REGION_DEFINE(handle4);
  SCOREP_USER_REGION_DEFINE(handle5);
  SCOREP_USER_REGION_DEFINE(handle6);
  SCOREP_USER_REGION_DEFINE(handle7);
  SCOREP_USER_REGION_DEFINE(handle8);
  SCOREP_USER_REGION_DEFINE(handle9);
  SCOREP_USER_REGION_DEFINE(handle10);
  SCOREP_USER_REGION_DEFINE(handle_break);
  #endif
  
  #ifdef SCOREP
  SCOREP_USER_REGION_BEGIN( handle1, "handle1 - Initialization of variables and reference residuals.",SCOREP_USER_REGION_TYPE_COMMON );
  #endif
  
    /** parameters used in gccg */
    int iter = 1;
    int if1 = 0;
    int if2 = 0;
    int nor = 1;
    int nor1 = nor - 1;
    int nc = 0;
    int nomax = 3;
    
    /** the reference residual */
    double resref = 0.0;

    /** array storing residuals */
    double *resvec = (double *) calloc(sizeof(double), (nintcf + 1));

    // initialize the reference residual
    for ( nc = nintci; nc <= nintcf; nc++ ) {
        resvec[nc] = su[nc];
        resref = resref + resvec[nc] * resvec[nc];
    }
    
    #ifdef SCOREP
    SCOREP_USER_REGION_END( handle1 );
    SCOREP_USER_REGION_BEGIN( handle2, "handle2 - 1st Allreduce.",SCOREP_USER_REGION_TYPE_COMMON );
    #endif
    
    // A2.3
    double global_resref = 0;
    MPI_Allreduce(&resref, &global_resref, 1, MPI_DOUBLE, MPI_SUM, MPI_COMM_WORLD);
    resref = global_resref;
    
    #ifdef SCOREP
    SCOREP_USER_REGION_END( handle2 );
    SCOREP_USER_REGION_BEGIN( handle3, "handle3 - Calculation of the residue sum.",SCOREP_USER_REGION_TYPE_COMMON );
    #endif
    

    resref = sqrt(resref);
    if ( resref < 1.0e-15 ) {
        fprintf(stderr, "Residue sum less than 1.e-15 - %lf\n", resref);
        return 0;
    }
    
    #ifdef SCOREP
    SCOREP_USER_REGION_END( handle3 );
    SCOREP_USER_REGION_BEGIN( handle4, "handle4 - Memory allocation.",SCOREP_USER_REGION_TYPE_COMMON );
    #endif

    
    // Counting the number of ghost cells to extend the direc1
    int ghost_cells_recv = 0, ghost_cells_send = 0;
    int proc, i, j;
    
    for (proc = 0; proc < nghb_cnt; proc++) {
      ghost_cells_recv += recv_cnt[proc];
      ghost_cells_send += send_cnt[proc];
    }
    
    
    /** the computation vectors */
    // TODO:
    double *direc1 = (double *) calloc(sizeof(double), ((nextcf + 1) + ghost_cells_recv));
    double *direc2 = (double *) calloc(sizeof(double), (nextcf + 1));
    double *adxor1 = (double *) calloc(sizeof(double), (nintcf + 1));
    double *adxor2 = (double *) calloc(sizeof(double), (nintcf + 1));
    double *dxor1 = (double *) calloc(sizeof(double), (nintcf + 1));
    double *dxor2 = (double *) calloc(sizeof(double), (nintcf + 1));
    
    // Determine displacements for sending
    int **displacements = (int **) malloc(sizeof(double)*nghb_cnt);
    int **blocklenghts = (int **) malloc(sizeof(double)*nghb_cnt);
    
    for(proc = 0; proc < nghb_cnt; proc++) {
      displacements[proc] = (int*)calloc(send_cnt[proc],sizeof(int));
      blocklenghts[proc] = (int*)calloc(send_cnt[proc],sizeof(int));
    }
    
    j = 0;
    for (proc = 0; proc < nghb_cnt; proc++) {
      for (i = 0; i < send_cnt[proc]; i++) {
	displacements[proc][i] = global_local_index[send_lst[proc][i]];
	blocklenghts[proc][i] = 1;
      }
    }
    
    MPI_Request request;
    MPI_Datatype *indextype;
    indextype = (MPI_Datatype *) malloc(sizeof(*indextype)*nghb_cnt);
    
    for (proc = 0; proc < nghb_cnt; proc++) {
      MPI_Type_indexed(send_cnt[proc], blocklenghts[proc], displacements[proc], MPI_DOUBLE, &(indextype[proc]));
      MPI_Type_commit(&(indextype[proc]));
    }
    
    #ifdef SCOREP
    SCOREP_USER_REGION_END( handle4 );
    SCOREP_USER_REGION_BEGIN( handle5, "handle5 - Computation phase1. direc1 update.",SCOREP_USER_REGION_TYPE_COMMON );
    #endif


    while ( iter < max_iters ) {
        /**********  START COMP PHASE 1 **********/
        // update the old values of direc
        for ( nc = nintci; nc <= nintcf; nc++ ) {
            direc1[nc] = direc1[nc] + resvec[nc] * cgup[nc];
        }

	  #ifdef SCOREP
	  SCOREP_USER_REGION_END( handle5 );
	  SCOREP_USER_REGION_BEGIN( handle6, "handle6 - Computation phase1. direc1 communication",SCOREP_USER_REGION_TYPE_COMMON );
	  #endif
        
	  // Communication of direc1 - start
	  
	  for (proc = 0; proc < nghb_cnt; proc++) {
	    MPI_Isend(direc1, 1, indextype[proc], nghb_to_rank[proc], 0, MPI_COMM_WORLD, &request);
	  }
	  
	  // Reference position in the direc1
	  int ref_pos = nextcf + 1;
	  
	  for (proc = 0; proc < nghb_cnt; proc++) {
	    MPI_Recv(&(direc1[ref_pos]), recv_cnt[proc], MPI_DOUBLE, nghb_to_rank[proc], 0, MPI_COMM_WORLD, MPI_STATUS_IGNORE);
	    ref_pos += recv_cnt[proc];
	  }
	 
	  // Communication of direc1 - stop
	  
	  #ifdef SCOREP
	  SCOREP_USER_REGION_END( handle6 );
	  SCOREP_USER_REGION_BEGIN( handle7, "handle7 - Computation phase1. direc2 computation",SCOREP_USER_REGION_TYPE_COMMON );
	  #endif
        
        // compute new guess (approximation) for direc
        for ( nc = nintci; nc <= nintcf; nc++ ) {
		
            direc2[nc] = bp[nc] * direc1[nc] - bs[nc] * direc1[global_local_index[lcc[nc][0]]]
                         - be[nc] * direc1[global_local_index[lcc[nc][1]]] - bn[nc] * direc1[global_local_index[lcc[nc][2]]]
                         - bw[nc] * direc1[global_local_index[lcc[nc][3]]] - bl[nc] * direc1[global_local_index[lcc[nc][4]]]
                         - bh[nc] * direc1[global_local_index[lcc[nc][5]]];
			
			 
        }
        
        
        /********** END COMP PHASE 1 **********/
	
	  #ifdef SCOREP
	  SCOREP_USER_REGION_END( handle7 );
	  SCOREP_USER_REGION_BEGIN( handle8, "handle8 - Computation phase2. occ computation",SCOREP_USER_REGION_TYPE_COMMON );
	  #endif

        /********** START COMP PHASE 2 **********/
        // execute normalization steps
        double oc1, oc2, occ;
        if ( nor1 == 1 ) {
            oc1 = 0;
            occ = 0;

            for ( nc = nintci; nc <= nintcf; nc++ ) {
                occ = occ + direc2[nc] * adxor1[nc];
            }
            
            // A2.3
            double global_occ = 0.0;
	    MPI_Allreduce(&occ, &global_occ, 1, MPI_DOUBLE, MPI_SUM, MPI_COMM_WORLD);
	    occ = global_occ;

            oc1 = occ / cnorm[1];
            for ( nc = nintci; nc <= nintcf; nc++ ) {
                direc2[nc] = direc2[nc] - oc1 * adxor1[nc];
                direc1[nc] = direc1[nc] - oc1 * dxor1[nc];
            }

            if1++;
        } else {
            if ( nor1 == 2 ) {
                oc1 = 0;
                occ = 0;

                for ( nc = nintci; nc <= nintcf; nc++ ) {
                    occ = occ + direc2[nc] * adxor1[nc];
                }
                
                // A2.3
		double global_occ = 0.0;
		MPI_Allreduce(&occ, &global_occ, 1, MPI_DOUBLE, MPI_SUM, MPI_COMM_WORLD);
		occ = global_occ;
		

                oc1 = occ / cnorm[1];
                oc2 = 0;
                occ = 0;
                for ( nc = nintci; nc <= nintcf; nc++ ) {
                    occ = occ + direc2[nc] * adxor2[nc];
                }
                
                // A2.3
	
		MPI_Allreduce(&occ, &global_occ, 1, MPI_DOUBLE, MPI_SUM, MPI_COMM_WORLD);
		occ = global_occ;

                oc2 = occ / cnorm[2];
                for ( nc = nintci; nc <= nintcf; nc++ ) {
                    direc1[nc] = direc1[nc] - oc1 * dxor1[nc] - oc2 * dxor2[nc];
                    direc2[nc] = direc2[nc] - oc1 * adxor1[nc] - oc2 * adxor2[nc];
                }

                if2++;
            }
        }
        
       #ifdef SCOREP
      SCOREP_USER_REGION_END( handle8 );
      SCOREP_USER_REGION_BEGIN( handle9, "handle9 - Computation phase2. residual_ratio computation - before break",SCOREP_USER_REGION_TYPE_COMMON );
      #endif

        // compute the new residual
        cnorm[nor] = 0;
        double omega = 0;
        for ( nc = nintci; nc <= nintcf; nc++ ) {
            cnorm[nor] = cnorm[nor] + direc2[nc] * direc2[nc];
            omega = omega + resvec[nc] * direc2[nc];
        }
        
	// A2.3
	double global_cnorm_nor = 0.0, global_omega = 0.0;
	MPI_Allreduce(&(cnorm[nor]), &global_cnorm_nor, 1, MPI_DOUBLE, MPI_SUM, MPI_COMM_WORLD);
	MPI_Allreduce(&omega, &global_omega, 1, MPI_DOUBLE, MPI_SUM, MPI_COMM_WORLD);
	cnorm[nor] = global_cnorm_nor;
	omega = global_omega;

        omega = omega / cnorm[nor];
        double res_updated = 0.0;
        for ( nc = nintci; nc <= nintcf; nc++ ) {
            resvec[nc] = resvec[nc] - omega * direc2[nc];
            res_updated = res_updated + resvec[nc] * resvec[nc];
            var[nc] = var[nc] + omega * direc1[nc];
        }
        
	// A2.3
	double global_res_updated = 0.0;
	MPI_Allreduce(&res_updated, &global_res_updated, 1, MPI_DOUBLE, MPI_SUM, MPI_COMM_WORLD);
	res_updated = global_res_updated;

        res_updated = sqrt(res_updated);
        *residual_ratio = res_updated / resref;

       #ifdef SCOREP
      SCOREP_USER_REGION_END( handle9 );
      #endif
        // exit on no improvements of residual
        if ( *residual_ratio <= 1.0e-10 ) break;

      #ifdef SCOREP
      SCOREP_USER_REGION_BEGIN( handle_break, "handle9 - Computation phase2. residual_ratio computation - after break",SCOREP_USER_REGION_TYPE_COMMON );
      #endif

        iter++;

        // prepare additional arrays for the next iteration step
        if ( nor == nomax ) {
            nor = 1;
        } else {
            if ( nor == 1 ) {
                for ( nc = nintci; nc <= nintcf; nc++ ) {
                    dxor1[nc] = direc1[nc];
                    adxor1[nc] = direc2[nc];
                }
            } else {
                if ( nor == 2 ) {
                    for ( nc = nintci; nc <= nintcf; nc++ ) {
                        dxor2[nc] = direc1[nc];
                        adxor2[nc] = direc2[nc];
                    }
                }
            }

            nor++;
        }
        nor1 = nor - 1;
        /********** END COMP PHASE 2 **********/
    }
    
      #ifdef SCOREP
      SCOREP_USER_REGION_END( handle_break );
      SCOREP_USER_REGION_BEGIN( handle10, "handle10 - Memory freeing",SCOREP_USER_REGION_TYPE_COMMON );
      #endif

    for (i = 0; i < nghb_cnt; i++){
      free(displacements[i]);
    }

    free(displacements);
    
    free(indextype);
    
    free(direc1);
    free(direc2);
    free(adxor1);
    free(adxor2);
    free(dxor1);
    free(dxor2);
    free(resvec);

    return iter;
    
      #ifdef SCOREP
      SCOREP_USER_REGION_END( handle10 );
      #endif

    

}
示例#12
0
void seissol::checkpoint::h5::Wavefield::write(double time, int waveFieldTimeStep)
{
	EPIK_TRACER("CheckPoint_write");
	SCOREP_USER_REGION("CheckPoint_write", SCOREP_USER_REGION_TYPE_FUNCTION);

	logInfo(rank()) << "Writing check point.";

	EPIK_USER_REG(r_header, "checkpoint_write_header");
	SCOREP_USER_REGION_DEFINE(r_header);
	EPIK_USER_START(r_header);
	SCOREP_USER_REGION_BEGIN(r_header, "checkpoint_write_header", SCOREP_USER_REGION_TYPE_COMMON);

	// Time
	checkH5Err(H5Awrite(m_h5time[odd()], H5T_NATIVE_DOUBLE, &time));

	// Wavefield writer
	checkH5Err(H5Awrite(m_h5timestepWavefield[odd()], H5T_NATIVE_INT, &waveFieldTimeStep));

	EPIK_USER_END(r_header);
	SCOREP_USER_REGION_END(r_header);

	// Save data
	EPIK_USER_REG(r_write_wavefield, "checkpoint_write_wavefield");
	SCOREP_USER_REGION_DEFINE(r_write_wavefield);
	EPIK_USER_START(r_write_wavefield);
	SCOREP_USER_REGION_BEGIN(r_write_wavefield, "checkpoint_write_wavefield", SCOREP_USER_REGION_TYPE_COMMON);

	// Write the wave field
	unsigned int offset = 0;
	hsize_t fStart = fileOffset();
	hsize_t count = dofsPerIteration();
	hid_t h5memSpace = H5Screate_simple(1, &count, 0L);
	checkH5Err(h5memSpace);
	checkH5Err(H5Sselect_all(h5memSpace));
	for (unsigned int i = 0; i < totalIterations()-1; i++) {
		checkH5Err(H5Sselect_hyperslab(m_h5fSpaceData, H5S_SELECT_SET, &fStart, 0L, &count, 0L));

		checkH5Err(H5Dwrite(m_h5data[odd()], H5T_NATIVE_DOUBLE, h5memSpace, m_h5fSpaceData,
				h5XferList(), &const_cast<real*>(dofs())[offset]));

		// We are finished in less iterations, read data twice
		// so everybody needs the same number of iterations
		if (i < iterations()-1) {
			fStart += count;
			offset += count;
		}
	}
	checkH5Err(H5Sclose(h5memSpace));

	// Save reminding data in the last iteration
	count = numDofs() - (iterations() - 1) * count;
	h5memSpace = H5Screate_simple(1, &count, 0L);
	checkH5Err(h5memSpace);
	checkH5Err(H5Sselect_all(h5memSpace));
	checkH5Err(H5Sselect_hyperslab(m_h5fSpaceData, H5S_SELECT_SET, &fStart, 0L, &count, 0L));
	checkH5Err(H5Dwrite(m_h5data[odd()], H5T_NATIVE_DOUBLE, h5memSpace, m_h5fSpaceData,
			h5XferList(), &dofs()[offset]));
	checkH5Err(H5Sclose(h5memSpace));

	EPIK_USER_END(r_write_wavefield);
	SCOREP_USER_REGION_END(r_write_wavefield);

	// Finalize the checkpoint
	finalizeCheckpoint();

	logInfo(rank()) << "Writing check point. Done.";
}