int main(int argc, char* argv[]) { std::chrono::time_point<std::chrono::high_resolution_clock> tStart; std::chrono::time_point<std::chrono::high_resolution_clock> tStop; typedef std::chrono::duration<int,std::milli> millisecs_t ; int numprocs, rank, edge, pixel_count, start, end; double max_values_sq; Uint32 max_iter; MPI_Init(&argc, &argv); MPI_Comm_size(MPI_COMM_WORLD, &numprocs); MPI_Comm_rank(MPI_COMM_WORLD, &rank); if(numprocs <= 1) { std::cerr << argv[0] << ": error: requires at least two MPI processes\n"; return 1; } max_values_sq = 4.0; max_iter = 5000; edge = (MAX_X * MAX_Y) / (numprocs - 1); if(rank > 0) { int tile = rank - 1; Uint32* pixels; start = tile * edge; end = (tile == numprocs - 2) ? MAX_X * MAX_Y : (tile + 1) * edge; pixel_count = end - start; pixels = (Uint32*) malloc(pixel_count * sizeof(Uint32)); calc_lines(start, end, pixels, max_values_sq, max_iter); MPI_Send((void*)pixels, pixel_count, MPI_INT, 0, 0, MPI_COMM_WORLD); free(pixels); } else /* rank == 0 */ { int tile, recv_count = (edge + 1); char title[100]; Uint32* field = (Uint32*) malloc(MAX_X * MAX_Y * sizeof(Uint32)); Uint32* fieldpos; SDL_Surface* sdlSurface; SDL_Event event; MPI_Status status; tStart = std::chrono::high_resolution_clock::now(); for(tile = 1; tile < numprocs; tile++) { start = (tile - 1) * edge; end = (tile == numprocs - 1) ? MAX_X * MAX_Y : tile * edge; pixel_count = end - start; recv_count = pixel_count; fieldpos = field+start; MPI_Recv(fieldpos, recv_count, MPI_INT, tile, MPI_ANY_TAG, MPI_COMM_WORLD, &status); } tStop = std::chrono::high_resolution_clock::now(); millisecs_t duration( std::chrono::duration_cast<millisecs_t>(tStop-tStart) ) ; long elapsed = duration.count(); SDL_Init(SDL_INIT_EVERYTHING); sdlSurface = SDL_SetVideoMode(MAX_X, MAX_Y, 32, SDL_HWSURFACE | SDL_DOUBLEBUF); std::stringstream ss; ss << argv[0] << " " << numprocs << " processes " << elapsed*1.e-3 << " sec." << "\n"; SDL_WM_SetCaption(ss.str().c_str(), title); std::cout << ss.str().c_str() << "\n"; draw(sdlSurface, field); SDL_Flip(sdlSurface); do { SDL_Delay(50); SDL_PollEvent(&event); } while( event.type != SDL_QUIT && event.type != SDL_KEYDOWN ); SDL_FreeSurface(sdlSurface); SDL_Quit(); free(field); } MPI_Finalize(); return 0; }
int main(int argc, char *argv[]) { int rank; int n_ranks, start_rank; int i,j; float gamma = 0.25, rho = -0.495266; float GLOB_SUM = 0, sum = 0; MPI_Init(&argc, &argv); MPI_Comm_size(MPI_COMM_WORLD, &n_ranks); MPI_Comm_rank(MPI_COMM_WORLD, &rank); printf("before get data in id %d\n", rank); get_data(rank%4); start_rank = 6; n_ranks = 4; printf("done getting dat rank %d\n", rank); MPI_Barrier(MPI_COMM_WORLD); // printf("crossing bar1 %d\n", rank); for (j = 0; j < INPUT_SIZE; ++j) { get_input(rank, start_rank, n_ranks); sum = compute_svm_sum(rank%4, gamma); if(rank == start_rank) { float tempBuff; GLOB_SUM = sum; for (i = start_rank+1; i < start_rank + n_ranks; ++i) { MPI_Recv(&tempBuff, 1, MPI_FLOAT, i, 0, MPI_COMM_WORLD, MPI_STATUS_IGNORE); GLOB_SUM = GLOB_SUM + tempBuff; } GLOB_SUM -= rho; } else { MPI_Send((float*)&sum, 1, MPI_FLOAT, start_rank, 0, MPI_COMM_WORLD); } } //if(rank != 6) //printf("before bar2 %d\n", rank); MPI_Barrier(MPI_COMM_WORLD); if(rank == 6) { #ifdef DUMP m5_dump_stats(0, 0); m5_reset_stats(0, 0); #endif } //printf("done with thread %d\n", rank); if(rank == 6) printf("global sum = %f\n", GLOB_SUM); // free_data(); MPI_Finalize(); return 0; }
int main(int argc, char** argv) { int my_rank, p; int i, dest; mpz_t currentPrime; unsigned long int product; sscanf(argv[1], "%lu", &product); int secondFactor = 0; int bcastStatus; int equals; /** GMP library variables **/ mpz_t nextPrimeNumber; mpz_t testFactor; mpz_init(nextPrimeNumber); mpz_init_set_str (nextPrimeNumber, argv[1], 10); mpz_init(testFactor); mpz_init_set_ui(currentPrime, 2); mpz_nextprime(nextPrimeNumber, nextPrimeNumber); mpz_t testProduct; mpz_init(testProduct); /** MPI Initialization **/ MPI_Request finalValue; MPI_File out; MPI_Init(&argc, &argv); MPI_Comm_size(MPI_COMM_WORLD, &p); MPI_Comm_rank(MPI_COMM_WORLD, &my_rank); MPI_Status status; /** Get Ready to receive a factor if another process finds one */ MPI_Irecv(&secondFactor, 1, MPI_UNSIGNED_LONG, MPI_ANY_SOURCE, 0, MPI_COMM_WORLD, &finalValue); /** Prepare initial offset for each process **/ for (i=0 ; i < my_rank ; i++) { mpz_nextprime(currentPrime, currentPrime); } /** Start Timing **/ double start = MPI_Wtime(), diff; while (!secondFactor) { /** Check if another process has found the factors **/ MPI_Test (&finalValue, &bcastStatus, &status); if(bcastStatus) { /** Somebody else has found the factors, we are done **/ MPI_Wait(&finalValue, &status); break; } /** Skip P primes before checking again **/ for (i=0 ; i < p ; i++) { mpz_nextprime(currentPrime, currentPrime); } /** Brute force check if the current working prime is a factor of the input number **/ for (mpz_set_ui(testFactor , 2) ; mpz_get_ui(testFactor) <= mpz_get_ui(currentPrime); mpz_nextprime(testFactor, testFactor)) { /** Check if another process has found the factors **/ MPI_Test (&finalValue, &bcastStatus, &status); if(bcastStatus) { MPI_Wait(&finalValue, &status); break; } mpz_mul_ui(testProduct, currentPrime, mpz_get_ui(testFactor)); equals = mpz_cmp_ui(testProduct, product); if (equals == 0){ /** We've found the factor, find the second number, secnd it to the other processes **/ secondFactor = mpz_get_ui(testFactor); printf("done by process %d, factors are %lu and %d \n", my_rank, mpz_get_ui(currentPrime), secondFactor); fflush(stdout); for (dest = 0 ; dest < p ; dest++) { if (dest != my_rank) { MPI_Send(&secondFactor, 1, MPI_UNSIGNED_LONG, dest, 0, MPI_COMM_WORLD); } } } } } diff = MPI_Wtime() - start; /** End Timing **/ /** Prepare file contents **/ char fileName[200], fileContents[200]; sprintf(fileName, "time_%lu", product); sprintf(fileContents, "%d\t%f\n", my_rank, diff); /** Write File **/ MPI_File_open( MPI_COMM_WORLD, fileName, MPI_MODE_WRONLY | MPI_MODE_CREATE, MPI_INFO_NULL, &out ); MPI_File_seek(out, my_rank*strlen ( fileContents ) , MPI_SEEK_SET); MPI_File_write_all(out , &fileContents, strlen ( fileContents ), MPI_CHAR, &status ); MPI_File_close(&out); /** Fin **/ MPI_Barrier(MPI_COMM_WORLD); MPI_Finalize(); return(0); }
int main(int argc, char **argv) { MPI_File fp; LemonWriter *w; LemonReader *r; LemonRecordHeader *h; double *data; double tick, tock; double *timesRead; double *timesWrite; double stdRead = 0.0; double stdWrite = 0.0; int mpisize; int rank; char const *type; int ldsize; unsigned long long int fsize; int *hashMatch, *hashMatchAll; double const rscale = 1.0 / RAND_MAX; int ME_flag=1, MB_flag=1, status=0; int latDist[] = {0, 0, 0, 0}; int periods[] = {1, 1, 1, 1}; int locSizes[4]; int latSizes[4]; int localVol = 1; int latVol = localVol; MPI_Comm cartesian; int i, j; md5_state_t state; md5_byte_t before[16]; md5_byte_t after[16]; int L; int iters; MPI_Init(&argc, &argv); MPI_Comm_size(MPI_COMM_WORLD, &mpisize); MPI_Comm_rank(MPI_COMM_WORLD, &rank); if (argc != 3) { usage(rank, argv); MPI_Finalize(); return 1; } L = atoi(argv[1]); if (L <= 0) usage(rank, argv); iters = atoi(argv[2]); if (iters <= 0) usage(rank, argv); timesWrite = (double*)calloc(iters, sizeof(double)); if (timesWrite == (double*)NULL) { fprintf(stderr, "ERROR: Could not allocate memory.\n"); return 1; } timesRead = (double*)calloc(iters, sizeof(double)); if (timesRead == (double*)NULL) { fprintf(stderr, "ERROR: Could not allocate memory.\n"); return 1; } hashMatch = (int*)calloc(iters, sizeof(int)); if (hashMatch == (int*)NULL) { fprintf(stderr, "ERROR: Could not allocate memory.\n"); return 1; } hashMatchAll = (int*)calloc(iters, sizeof(int)); if (hashMatchAll == (int*)NULL) { fprintf(stderr, "ERROR: Could not allocate memory.\n"); return 1; } /* Construct a Cartesian topology, adjust lattice sizes where needed */ MPI_Dims_create(mpisize, 4, latDist); for (i = 0; i < 4; ++i) { int div = (i == 3 ? (2 * L) : L) / latDist[i]; locSizes[i] = div ? div : 1; localVol *= locSizes[i]; latSizes[i] = locSizes[i] * latDist[i]; } latVol = mpisize * localVol; ldsize = localVol * 72 * sizeof(double); fsize = (unsigned long long int)latVol * 72 * sizeof(double); MPI_Cart_create(MPI_COMM_WORLD, 4, latDist, periods, 1, &cartesian); MPI_Comm_rank(cartesian, &rank); if (rank == 0) { fprintf(stdout, "Benchmark on a block of data %s in size,\n", humanForm(fsize)); fprintf(stdout, "representing a %u x %u x %u x %u lattice", latSizes[0], latSizes[1], latSizes[2], latSizes[3]); if (mpisize == 1) fprintf(stdout, ".\n\n"); else { fprintf(stdout, ",\ndistributed over %u MPI processes\n", mpisize); fprintf(stdout, "for a local %u x %u x %u x %u lattice.\n\n", locSizes[0], locSizes[1], locSizes[2], locSizes[3]); } } /* Allocate a block of memory for dummy data to write */ data = (double*)malloc(ldsize); if (data == (double*)NULL) { fprintf(stderr, "ERROR: Could not allocate memory.\n"); return 1; } srand(time(NULL) + rank); /* Start of test */ for (i = 0; i < iters; ++i) { if (rank == 0) fprintf(stdout, "Measurement %d of %d.\n", i + 1, iters); /* Create a block of dummy data to write out Fill with some random numbers to make sure we don't get coincidental matches here */ for (j = 0; j < (localVol * 72); ++j) data[j] = rscale * (double)rand(); /* Calculate a hash of the data, to check integrity against */ md5_init(&state); md5_append(&state, (md5_byte_t const *)data, ldsize); md5_finish(&state, before); /* Note that the following is the only (?) way to truncate the file with MPI */ MPI_File_open(cartesian, "benchmark.test", MPI_MODE_WRONLY | MPI_MODE_CREATE, MPI_INFO_NULL, &fp); MPI_File_set_size(fp, 0); w = lemonCreateWriter(&fp, cartesian); h = lemonCreateHeader(MB_flag, ME_flag, "benchmark", latVol); status = lemonWriteRecordHeader(h, w); lemonDestroyHeader(h); MPI_Barrier(cartesian); tick = MPI_Wtime(); lemonWriteLatticeParallel(w, data, 72 * sizeof(double), latSizes); tock = MPI_Wtime(); MPI_Barrier(cartesian); timesWrite[i] = tock - tick; if (rank == 0) fprintf(stdout, "Time spent writing was %4.2g s.\n", timesWrite[i]); lemonWriterCloseRecord(w); lemonDestroyWriter(w); MPI_File_close(&fp); /* Clear data to avoid an utterly failed read giving md5 hash matches from the old data */ memset(data, 0, ldsize); /* Start of reading test */ MPI_File_open(cartesian, "benchmark.test", MPI_MODE_RDONLY | MPI_MODE_DELETE_ON_CLOSE, MPI_INFO_NULL, &fp); r = lemonCreateReader(&fp, cartesian); if (lemonReaderNextRecord(r)) fprintf(stderr, "Node %d reports: next record failed.\n", rank); type = lemonReaderType(r); if (strncmp(type, "benchmark", 13)) fprintf(stderr, "Node %d reports: wrong type read.\n", rank); MPI_Barrier(cartesian); tick = MPI_Wtime(); lemonReadLatticeParallel(r, data, 72 * sizeof(double), latSizes); tock = MPI_Wtime(); timesRead[i] = tock - tick; MPI_Barrier(cartesian); if (rank == 0) fprintf(stdout, "Time spent reading was %4.2g s.\n", timesRead[i]); lemonDestroyReader(r); MPI_File_close(&fp); md5_init(&state); md5_append(&state, (md5_byte_t const *)data, ldsize); md5_finish(&state, after); hashMatch[i] = strncmp((char const *)before, (char const *)after, 16) != 0 ? 1 : 0; MPI_Reduce(hashMatch + i, hashMatchAll + i, 1, MPI_INT, MPI_SUM, 0, cartesian); if (rank == 0) { if (hashMatchAll[i] == 0) fprintf(stdout, "All nodes report that MD5 hash matches.\n\n"); else fprintf(stdout, "WARNING: MD5 hash failure detected!\n\n"); } } /* Aggregate the data */ hashMatch[0] = 0; stdWrite = timesWrite[0] * timesWrite[0]; stdRead = timesRead[0] * timesRead[0]; for (i = 1; i < iters; ++i) { hashMatchAll[0] += hashMatchAll[i]; timesWrite[0] += timesWrite[i]; stdWrite += timesWrite[i] * timesWrite[i]; timesRead[0] += timesRead[i]; stdRead += timesRead[i] * timesRead[i]; } stdWrite /= iters; stdRead /= iters; timesWrite[0] /= iters; timesRead[0] /= iters; stdWrite -= timesWrite[0] * timesWrite[0]; stdRead -= timesRead[0] * timesRead[0]; if (rank == 0) { fprintf(stdout, "Average time spent writing was %4.2e s, ", timesWrite[0]); fprintf(stdout, "with a standard deviation of %4.2e s.\n", sqrt(stdWrite)); fprintf(stdout, "Average time spent reading was %4.2e s, ", timesRead[0]); fprintf(stdout, "with a standard deviation of %4.2e s.\n\n", sqrt(stdRead)); stdWrite *= (double)fsize / (timesWrite[0] * timesWrite[0]); stdRead *= (double)fsize / (timesRead[0] * timesRead[0]); fprintf(stdout, "Average writing speed was %s/s\n", humanForm((unsigned long long int)(fsize / timesWrite[0]))); fprintf(stdout, "Average reading speed was %s/s\n", humanForm((unsigned long long int)(fsize / timesRead[0]))); if (hashMatchAll[0] == 0) fprintf(stdout, "All data hashed correctly.\n"); else fprintf(stdout, "WARNING: %d hash mismatches detected!.\n", hashMatchAll[0]); } MPI_Finalize(); free(data); free(timesWrite); free(timesRead); free(hashMatch); free(hashMatchAll); return(0); }
int main(int argc, char** argv) { int iter_max = 1000; const float pi = 2.0 * asinf(1.0f); const float tol = 1.0e-5f; int rank = 0; int size = 1; MPI_Init(&argc, &argv); MPI_Comm_rank(MPI_COMM_WORLD, &rank); MPI_Comm_size(MPI_COMM_WORLD, &size); memset(A, 0, N * M * sizeof(float)); memset(Aref, 0, N * M * sizeof(float)); // set boundary conditions for (int j = 0; j < N; j++) { float y0 = sinf( 2.0 * pi * j / (N-1)); A[j][0] = y0; A[j][M-1] = y0; Aref[j][0] = y0; Aref[j][M-1] = y0; } #if _OPENACC int ngpus=acc_get_num_devices(acc_device_nvidia); int devicenum=rank%ngpus; acc_set_device_num(devicenum,acc_device_nvidia); // Call acc_init after acc_set_device_num to avoid multiple contexts on device 0 in multi GPU systems acc_init(acc_device_nvidia); #endif /*_OPENACC*/ // Ensure correctness if N%size != 0 int chunk_size = ceil( (1.0*N)/size ); int jstart = rank * chunk_size; int jend = jstart + chunk_size; // Do not process boundaries jstart = max( jstart, 1 ); jend = min( jend, N - 1 ); if ( rank == 0) printf("Jacobi relaxation Calculation: %d x %d mesh\n", N, M); if ( rank == 0) printf("Calculate reference solution and time serial execution.\n"); StartTimer(); laplace2d_serial( rank, iter_max, tol ); double runtime_serial = GetTimer(); //Wait for all processes to ensure correct timing of the parallel version MPI_Barrier( MPI_COMM_WORLD ); if ( rank == 0) printf("Parallel execution.\n"); StartTimer(); int iter = 0; float error = 1.0f; #pragma acc data copy(A) create(Anew) while ( error > tol && iter < iter_max ) { error = 0.f; #pragma acc kernels for (int j = jstart; j < jend; j++) { for( int i = 1; i < M-1; i++ ) { Anew[j][i] = 0.25f * ( A[j][i+1] + A[j][i-1] + A[j-1][i] + A[j+1][i]); error = fmaxf( error, fabsf(Anew[j][i]-A[j][i])); } } float globalerror = 0.0f; MPI_Allreduce( &error, &globalerror, 1, MPI_FLOAT, MPI_MAX, MPI_COMM_WORLD ); error = globalerror; #pragma acc kernels for (int j = jstart; j < jend; j++) { for( int i = 1; i < M-1; i++ ) { A[j][i] = Anew[j][i]; } } //Periodic boundary conditions int top = (rank == 0) ? (size-1) : rank-1; int bottom = (rank == (size-1)) ? 0 : rank+1; #pragma acc host_data use_device( A ) { //1. Sent row jstart (first modified row) to top receive lower boundary (jend) from bottom MPI_Sendrecv( A[jstart], M, MPI_FLOAT, top , 0, A[jend], M, MPI_FLOAT, bottom, 0, MPI_COMM_WORLD, MPI_STATUS_IGNORE ); //2. Sent row (jend-1) (last modified row) to bottom receive upper boundary (jstart-1) from top MPI_Sendrecv( A[(jend-1)], M, MPI_FLOAT, bottom, 0, A[(jstart-1)], M, MPI_FLOAT, top , 0, MPI_COMM_WORLD, MPI_STATUS_IGNORE ); } if(rank == 0 && (iter % 100) == 0) printf("%5d, %0.6f\n", iter, error); iter++; } MPI_Barrier( MPI_COMM_WORLD ); double runtime = GetTimer(); if (check_results( rank, jstart, jend, tol ) && rank == 0) { printf( "Num GPUs: %d\n", size ); printf( "%dx%d: 1 GPU: %8.4f s, %d GPUs: %8.4f s, speedup: %8.2f, efficiency: %8.2f%\n", N,M, runtime_serial/ 1000.f, size, runtime/ 1000.f, runtime_serial/runtime, runtime_serial/(size*runtime)*100 ); } MPI_Finalize(); return 0; }
int main(int argc, char* argv[]) { // // Get a default output stream from the Teuchos::VerboseObjectBase // Teuchos::RCP<Teuchos::FancyOStream> out = Teuchos::VerboseObjectBase::getDefaultOStream(); Teuchos::GlobalMPISession mpiSession(&argc,&argv); #ifdef HAVE_COMPLEX typedef std::complex<double> ST; // Scalar-type typedef #elif HAVE_COMPLEX_H typedef std::complex<double> ST; // Scalar-type typedef #else typedef double ST; // Scalar-type typedef #endif typedef Teuchos::ScalarTraits<ST>::magnitudeType MT; // Magnitude-type typedef typedef int OT; // Ordinal-type typedef ST one = Teuchos::ScalarTraits<ST>::one(); ST zero = Teuchos::ScalarTraits<ST>::zero(); #ifdef HAVE_MPI MPI_Comm mpiComm = MPI_COMM_WORLD; const Tpetra::MpiPlatform<OT,OT> ordinalPlatform(mpiComm); const Tpetra::MpiPlatform<OT,ST> scalarPlatform(mpiComm); #else const Tpetra::SerialPlatform<OT,OT> ordinalPlatform; const Tpetra::SerialPlatform<OT,ST> scalarPlatform; #endif // // Get the data from the HB file // // Name of input matrix file std::string matrixFile = "mhd1280b.cua"; int info=0; int dim,dim2,nnz; MT *dvals; int *colptr,*rowind; ST *cvals; nnz = -1; info = readHB_newmat_double(matrixFile.c_str(),&dim,&dim2,&nnz, &colptr,&rowind,&dvals); if (info == 0 || nnz < 0) { *out << "Error reading '" << matrixFile << "'" << std::endl; } #ifdef HAVE_MPI MPI_Finalize(); #endif // Convert interleaved doubles to std::complex values cvals = new ST[nnz]; for (int ii=0; ii<nnz; ii++) { cvals[ii] = ST(dvals[ii*2],dvals[ii*2+1]); } // Declare global dimension of the linear operator OT globalDim = dim; // Create the element space and std::vector space const Tpetra::ElementSpace<OT> elementSpace(globalDim,0,ordinalPlatform); const Tpetra::VectorSpace<OT,ST> vectorSpace(elementSpace,scalarPlatform); // Create my implementation of a Tpetra::Operator RCP<Tpetra::Operator<OT,ST> > tpetra_A = rcp( new MyOperator<OT,ST>(vectorSpace,dim,colptr,nnz,rowind,cvals) ); // Create a Thyra linear operator (A) using the Tpetra::CisMatrix (tpetra_A). RCP<Thyra::LinearOpBase<ST> > A = Teuchos::rcp( new Thyra::TpetraLinearOp<OT,ST>(tpetra_A) ); // // Set the parameters for the Belos LOWS Factory and create a parameter list. // int blockSize = 1; int maxIterations = globalDim; int maxRestarts = 15; int gmresKrylovLength = 50; int outputFrequency = 100; bool outputMaxResOnly = true; MT maxResid = 1e-5; Teuchos::RCP<Teuchos::ParameterList> belosLOWSFPL = Teuchos::rcp( new Teuchos::ParameterList() ); belosLOWSFPL->set("Solver Type","Block GMRES"); Teuchos::ParameterList& belosLOWSFPL_solver = belosLOWSFPL->sublist("Solver Types"); Teuchos::ParameterList& belosLOWSFPL_gmres = belosLOWSFPL_solver.sublist("Block GMRES"); belosLOWSFPL_gmres.set("Maximum Iterations",int(maxIterations)); belosLOWSFPL_gmres.set("Convergence Tolerance",MT(maxResid)); belosLOWSFPL_gmres.set("Maximum Restarts",int(maxRestarts)); belosLOWSFPL_gmres.set("Block Size",int(blockSize)); belosLOWSFPL_gmres.set("Num Blocks",int(gmresKrylovLength)); belosLOWSFPL_gmres.set("Output Frequency",int(outputFrequency)); belosLOWSFPL_gmres.set("Show Maximum Residual Norm Only",bool(outputMaxResOnly)); // Whether the linear solver succeeded. // (this will be set during the residual check at the end) bool success = true; // Number of random right-hand sides we will be solving for. int numRhs = 1; // Get the domain space for the Thyra linear operator Teuchos::RCP<const Thyra::VectorSpaceBase<ST> > domain = A->domain(); // Create the Belos LOWS factory. Teuchos::RCP<Thyra::LinearOpWithSolveFactoryBase<ST> > belosLOWSFactory = Teuchos::rcp(new Thyra::BelosLinearOpWithSolveFactory<ST>()); // Set the parameter list to specify the behavior of the factory. belosLOWSFactory->setParameterList( belosLOWSFPL ); // Set the output stream and the verbosity level (prints to std::cout by defualt) // NOTE: Set to VERB_NONE for no output from the solver. belosLOWSFactory->setVerbLevel(Teuchos::VERB_LOW); // Create a BelosLinearOpWithSolve object from the Belos LOWS factory. Teuchos::RCP<Thyra::LinearOpWithSolveBase<ST> > nsA = belosLOWSFactory->createOp(); // Initialize the BelosLinearOpWithSolve object with the Thyra linear operator. Thyra::initializeOp<ST>( *belosLOWSFactory, A, &*nsA ); // Create a right-hand side with numRhs vectors in it. Teuchos::RCP< Thyra::MultiVectorBase<ST> > b = Thyra::createMembers(domain, numRhs); // Create an initial std::vector with numRhs vectors in it and initialize it to one. Teuchos::RCP< Thyra::MultiVectorBase<ST> > x = Thyra::createMembers(domain, numRhs); Thyra::assign(&*x, one); // Initialize the right-hand side so that the solution is a std::vector of ones. A->apply( Thyra::NONCONJ_ELE, *x, &*b ); Thyra::assign(&*x, zero); // Perform solve using the linear operator to get the approximate solution of Ax=b, // where b is the right-hand side and x is the left-hand side. Thyra::SolveStatus<ST> solveStatus; solveStatus = Thyra::solve( *nsA, Thyra::NONCONJ_ELE, *b, &*x ); // Print out status of solve. *out << "\nBelos LOWS Status: "<< solveStatus << std::endl; // // Compute residual and ST check convergence. // std::vector<MT> norm_b(numRhs), norm_res(numRhs); Teuchos::RCP< Thyra::MultiVectorBase<ST> > y = Thyra::createMembers(domain, numRhs); // Compute the column norms of the right-hand side b. Thyra::norms_2( *b, &norm_b[0] ); // Compute y=A*x, where x is the solution from the linear solver. A->apply( Thyra::NONCONJ_ELE, *x, &*y ); // Compute A*x-b = y-b Thyra::update( -one, *b, &*y ); // Compute the column norms of A*x-b. Thyra::norms_2( *y, &norm_res[0] ); // Print out the final relative residual norms. MT rel_res = 0.0; *out << "Final relative residual norms" << std::endl; for (int i=0; i<numRhs; ++i) { rel_res = norm_res[i]/norm_b[i]; if (rel_res > maxResid) success = false; *out << "RHS " << i+1 << " : " << std::setw(16) << std::right << rel_res << std::endl; } return ( success ? 0 : 1 ); }
int main(int argc, char *argv[] ) { double time1, time2; time1 = MPI_Wtime(); int rank, processors; int j; // number of iterations int k; // number of iterations to perform before creating a checkpoint int l; // number of random samples per grid point int checkpoint_resume = 0; // 1 = resume from last checkpoint int c; // used to hold a character int i=0, row = 0, col = 0, pln = 0; // array iterators char ***local_array; char **local_array_2nd; char *local_array_pointer; char ***local_array_copy; char **local_array_copy_2nd; char *local_array_copy_pointer; char ***temp, *temp_pointer; int file_open_error; int command_line_incomplete = 0; int grid_size[3] = {0,0,0}; int proc_size[3] = {0,0,0}; int local_size[3] = {0,0,0}; int remainder_size[3] = {0,0,0}; int coords[3] = {0,0,0}; int start_indices[3] = {0,0,0}; int periods[3] = {0,0,0}; int mem_size[3] = {0,0,0}; MPI_Status status; MPI_Datatype filetype, memtype; MPI_File fh; MPI_Init(&argc, &argv); MPI_Comm_size(MPI_COMM_WORLD, &processors); MPI_Comm_rank(MPI_COMM_WORLD, &rank); // Interpret the command line arguments -------------------------------- if (rank == 0) { if (argc < 6 || argc > 8) { fputs("usage: x y z j k l r\n", stderr); fputs("where: x,y,z = x, y and z dimensions\n", stderr); fputs(" j = how many times the game of life is played\n", stderr); fputs(" k = checkpoint every k iterations\n", stderr); fputs(" l = number of random samples per grid point\n", stderr); fputs(" r = resume from the last checkpoint\n", stderr); fputs(INITIAL, stderr); fputs(" must be present.\n", stderr); fputs(CHECKPOINT, stderr); fputs(" must be present if resuming from the last checkpoint.\n", stderr); exit(EXIT_FAILURE); } } j = (int) strtol(argv[4], NULL, 10); k = (int) strtol(argv[5], NULL, 10); l = (int) strtol(argv[6], NULL, 10); if ( argc == 7 ) if ( argv[6][0] == 'r' ) checkpoint_resume = 1; if (rank == 0) printf("%d iterations \ncheckpoint every %d iterations \n%d samples per grid point \ncheckpoint resume = %d\n", j,k,l,checkpoint_resume); grid_size[0] = (int) strtol(argv[1], NULL, 10); grid_size[1] = (int) strtol(argv[2], NULL, 10); grid_size[2] = (int) strtol(argv[3], NULL, 10); if (rank==0) printf("grid_size: %d, %d, %d\n", grid_size[0], grid_size[1], grid_size[2]); MPI_Dims_create(processors, 3, proc_size); if (rank==0) printf("proc_size: %d, %d, %d\n", proc_size[0], proc_size[1], proc_size[2]); local_size[0] = grid_size[0] / proc_size[0]; local_size[1] = grid_size[1] / proc_size[1]; local_size[2] = grid_size[2] / proc_size[2]; if (rank==0) printf("local_size: %d, %d, %d\n", local_size[0], local_size[1], local_size[2]); remainder_size[0] = grid_size[0] % proc_size[0]; remainder_size[1] = grid_size[1] % proc_size[1]; remainder_size[2] = grid_size[2] % proc_size[2]; if (rank==0) printf("remainder_size: %d, %d, %d\n", remainder_size[0], remainder_size[1], remainder_size[2]); if (remainder_size[0] != 0 || remainder_size[1] != 0 || remainder_size[2] != 0) { fputs("remainder size != 0, check your dimensions", stderr); MPI_Finalize(); exit(EXIT_FAILURE); } MPI_Comm comm; MPI_Cart_create(MPI_COMM_WORLD, 3, proc_size, periods, 0, &comm); MPI_Comm_rank(comm, &rank); MPI_Cart_coords(comm, rank, 3, coords); start_indices[0] = coords[0] * local_size[0]; start_indices[1] = coords[1] * local_size[1]; start_indices[2] = coords[2] * local_size[2]; /* printf("A coords R%d: (%d, %d, %d) (%d, %d, %d)\n", rank, coords[0], coords[1], coords[2], start_indices[0], start_indices[1], start_indices[2]);*/ fflush(stdout); // create the file type --------------------------------------------------- MPI_Type_create_subarray(3, grid_size, local_size, start_indices, MPI_ORDER_C, MPI_CHAR, &filetype); MPI_Type_commit(&filetype); // create a local memory type with ghost rows ----------------------------- mem_size[0] = local_size[0] + 2; mem_size[1] = local_size[1] + 2; mem_size[2] = local_size[2] + 2; start_indices[0] = start_indices[1] = start_indices[2] = 1; MPI_Type_create_subarray(3, mem_size, local_size, start_indices, MPI_ORDER_C, MPI_CHAR, &memtype); MPI_Type_commit(&memtype); // find my neighbors ------------------------------------------------------ int nxminus, nxplus, nyminus, nyplus, nzminus, nzplus, tag = 333, *neighbors; // Neighbors Array: row- col- col+ row+ plane- plane+ neighbors = (int *) malloc(6 * sizeof(int)); for(i=0; i<6; i++) neighbors[i] = rank; MPI_Cart_shift(comm, 0, 1, &nxminus, &nxplus); MPI_Cart_shift(comm, 1, 1, &nyminus, &nyplus); MPI_Cart_shift(comm, 2, 1, &nzminus, &nzplus); // printf(" %d sending south to %d receiving from %d \n",rank,nxplus,nxminus); // fflush(stdout); MPI_Sendrecv(&rank, 1, MPI_INT, nxplus, tag, &(neighbors[0]), 1, MPI_INT, nxminus, tag, comm, &status); // printf(" %d sending North to %d receiving from %d \n",rank,nxminus,nxplus); // fflush(stdout); MPI_Sendrecv(&rank, 1, MPI_INT, nxminus, tag, &(neighbors[3]), 1, MPI_INT, nxplus, tag, comm, &status); // printf(" %d sending East to %d receiving from %d \n",rank,nyplus,nyminus); // fflush(stdout); MPI_Sendrecv(&rank, 1, MPI_INT, nyplus, tag, &neighbors[1], 1, MPI_INT, nyminus, tag, comm, &status); // printf(" %d sending West to %d receiving from %d \n",rank,nyminus,nyplus); // fflush(stdout); MPI_Sendrecv(&rank, 1, MPI_INT, nyminus, tag, &neighbors[2], 1, MPI_INT, nyplus, tag, comm, &status); // printf(" %d sending backwards to %d receiving from %d \n",rank,nzplus,nzminus); // fflush(stdout); MPI_Sendrecv(&rank, 1, MPI_INT, nzplus, tag, &(neighbors[4]), 1, MPI_INT, nzminus, tag, comm, &status); // printf(" %d sending forward to %d receiving from %d \n",rank,nzminus,nzplus); // fflush(stdout); MPI_Sendrecv(&rank, 1, MPI_INT, nzminus, tag, &(neighbors[5]), 1, MPI_INT, nzplus, tag, comm, &status); /* printf("neighboors R%d : (row-) %d (col-) %d (col+) %d (row+) %d (plane-) %d (plane+) %d\n",rank,neighbors[0],neighbors[1],neighbors[2],neighbors[3],neighbors[4],neighbors[5]);*/ fflush(stdout); //init_sprng(1,time(0),SPRNG_DEFAULT); srand((unsigned int)time(NULL)); // Open the initial condition (checkpoint or not) ---------------------- if ( checkpoint_resume ) { file_open_error = MPI_File_open(MPI_COMM_WORLD, CHECKPOINT, MPI_MODE_CREATE | MPI_MODE_RDWR, MPI_INFO_NULL, &fh); MPI_File_set_view(fh,0, MPI_CHAR, filetype, "native", MPI_INFO_NULL); } else { file_open_error = MPI_File_open(MPI_COMM_WORLD, INITIAL, MPI_MODE_CREATE | MPI_MODE_RDWR, MPI_INFO_NULL, &fh); MPI_File_set_view(fh,0, MPI_CHAR, filetype, "native", MPI_INFO_NULL); } if (file_open_error != MPI_SUCCESS) { if (checkpoint_resume) fputs(CHECKPOINT, stderr); else fputs(INITIAL, stderr); fputs(" could not be opened.\n", stderr); exit(EXIT_FAILURE); } // Allocate and Populate the local array ---------------------------------- local_array_copy_pointer = (char *) malloc(mem_size[0] * mem_size[1] * mem_size[2] * sizeof(char)); local_array_copy_2nd = (char **) malloc(mem_size[0] * mem_size[1] * sizeof(char*)); local_array_copy = (char ***) malloc(mem_size[0] * sizeof(char*)); for(i = 0; i < mem_size[0] * mem_size[1]; i++) local_array_copy_2nd[i] = &local_array_copy_pointer[i * mem_size[2]]; for(i = 0; i < mem_size[0]; i++) local_array_copy[i] = &local_array_copy_2nd[i * mem_size[1]]; local_array_pointer = (char *) malloc(mem_size[0] * mem_size[1] * mem_size[2] * sizeof(char)); local_array_2nd = (char **) malloc(mem_size[0] * mem_size[1] * sizeof(char*)); local_array = (char ***) malloc(mem_size[0] * sizeof(char*)); for(i = 0; i < mem_size[0] * mem_size[1]; i++) local_array_2nd[i] = &local_array_pointer[i * mem_size[2]]; for(i = 0; i < mem_size[0]; i++) local_array[i] = &local_array_2nd[i * mem_size[1]]; // if (rank==0) printf("Malloc complete\n"); for(row=0; row<mem_size[0]; row++) { for(col=0; col<mem_size[1]; col++) { for(pln=0; pln<mem_size[2]; pln++) { local_array[row][col][pln] = local_array_copy[row][col][pln] = '0'; } } } // if (rank==0) printf("Setup complete\n"); MPI_File_read_all(fh, local_array_pointer, 1, memtype, &status); if (rank==0) printf("File Read\n"); // if (rank==0) { // for(row=0; row<mem_size[0]; row++) { // for(col=0; col<mem_size[1]; col++) { // for(pln=0; pln<mem_size[2]; pln++) { // printf("%c", local_array[row][col][pln]); // } // printf("\n"); // } // printf("-----------------------\n"); // } // } MPI_File_close(&fh); // Construct the plane data types MPI_Datatype yzplane; MPI_Type_vector(local_size[1], local_size[2], local_size[2]+2, MPI_CHAR, &yzplane); MPI_Type_commit(&yzplane); MPI_Datatype xzplane; MPI_Type_vector(local_size[0], local_size[2], ((local_size[2]+2)*local_size[1])+((local_size[2]+2)*2), MPI_CHAR, &xzplane); MPI_Type_commit(&xzplane); // this type will also copy the corner x columns, can't skip blocks intermittently // since we aren't worrying about the corner data, it's ok MPI_Datatype xyplane; MPI_Type_vector((local_size[0]*local_size[1])+((local_size[0]*2)-2), 1, local_size[2]+2, MPI_CHAR, &xyplane); MPI_Type_commit(&xyplane); MPI_Barrier(comm); // start the iteration loop int iterations; int kCounter = k; for (iterations = 0; iterations < j; iterations++) { // send updated planes // Neighbors Array: // 0 1 2 3 4 5 // row- col- col+ row+ plane- plane+ // Note: corners are not handled // send top yzplane if (rank != neighbors[0]) MPI_Send(&local_array[1][1][1], 1, yzplane, neighbors[0], 0, comm); // recv bottom yzplane if (rank != neighbors[3]) MPI_Recv(&local_array[local_size[0]+1][1][1], 1, yzplane, neighbors[3], 0, comm, &status); // send bottom yzplane if (rank != neighbors[3]) MPI_Send(&local_array[local_size[0]][1][1], 1, yzplane, neighbors[3], 0, comm); // recv top yzplane if (rank != neighbors[0]) MPI_Recv(&local_array[0][1][1], 1, yzplane, neighbors[0], 0, comm, &status); // send left xzplane if (rank != neighbors[1]) MPI_Send(&local_array[1][1][1], 1, xzplane, neighbors[1], 0, comm); // recv right xzplane if (rank != neighbors[2]) MPI_Recv(&local_array[1][local_size[1]+1][1], 1, xzplane, neighbors[2], 0, comm, &status); // send right xzplane if (rank != neighbors[2]) MPI_Send(&local_array[1][local_size[1]][1], 1, xzplane, neighbors[2], 0, comm); // recv left xzplane if (rank != neighbors[1]) MPI_Recv(&local_array[1][0][1], 1, xzplane, neighbors[1], 0, comm, &status); // send front xyplane if (rank != neighbors[4]) MPI_Send(&local_array[1][1][1], 1, xyplane, neighbors[4], 0, comm); // recv back xyplane if (rank != neighbors[5]) MPI_Recv(&local_array[1][1][local_size[2]+1], 1, xyplane, neighbors[5], 0, comm, &status); // send back xyplane if (rank != neighbors[5]) MPI_Send(&local_array[1][1][local_size[2]], 1, xyplane, neighbors[5], 0, comm); // recv front xyplane if (rank != neighbors[4]) MPI_Recv(&local_array[1][1][0], 1, xyplane, neighbors[4], 0, comm, &status); // if (rank==0) { // for(row=0; row<mem_size[0]; row++) { // for(col=0; col<mem_size[1]; col++) { // for(pln=0; pln<mem_size[2]; pln++) { // printf("%c", local_array[row][col][pln]); // } // printf("\n"); // } // printf("-----------------------\n"); // } // } // run the game of life // gameOfLife(local_array, local_array_copy, local_size[0], local_size[1], l, rank); // swap the arrays // temp1 = local_array; // local_array = local_array_copy; // local_array_copy = temp1; // // temp2 = local_array_pointer; // local_array_pointer = local_array_copy_pointer; // local_array_copy_pointer = temp2; // check to see if this iteration needs a checkpoint kCounter--; if (kCounter == 0) { kCounter = k; // checkpoint code MPI_File_open(MPI_COMM_WORLD, CHECKPOINT, MPI_MODE_CREATE | MPI_MODE_WRONLY, MPI_INFO_NULL, &fh); MPI_File_set_view(fh, 0, MPI_CHAR, filetype, "native", MPI_INFO_NULL); MPI_File_write_all(fh, local_array_pointer, 1, memtype, &status); MPI_File_close(&fh); if (rank == 0) printf("Checkpoint made: Iteration %d\n", iterations+1); } // end if kCounter == 0 } // end iteration loop iterations--; // all done! repeat the checkpoint process MPI_File_open(MPI_COMM_WORLD, FINAL_RESULTS, MPI_MODE_CREATE | MPI_MODE_WRONLY, MPI_INFO_NULL, &fh); MPI_File_set_view(fh, 0, MPI_CHAR, filetype, "native", MPI_INFO_NULL); MPI_File_write_all(fh, local_array_pointer, 1, memtype, &status); MPI_File_close(&fh); if (rank == 0) printf("Final Results made: Iteration %d\n", iterations+1); time2 = MPI_Wtime(); if (rank == 0) printf("Elapsed Seconds: %f\n", time2-time1);fflush(stdout); MPI_Finalize(); return EXIT_SUCCESS; }
int main(int argc, char* argv[]) { #ifndef QUESO_HAS_MPI // Skip this test if we're not in parallel return 77; #else MPI_Init(&argc, &argv); std::string inputFileName = argv[1]; const char * test_srcdir = std::getenv("srcdir"); if (test_srcdir) inputFileName = test_srcdir + ('/' + inputFileName); // Initialize QUESO environment QUESO::FullEnvironment env(MPI_COMM_WORLD, inputFileName, "", NULL); //================================================================ // Statistical inverse problem (SIP): find posterior PDF for 'g' //================================================================ //------------------------------------------------------ // SIP Step 1 of 6: Instantiate the parameter space //------------------------------------------------------ QUESO::VectorSpace<> paramSpace(env, "param_", 1, NULL); //------------------------------------------------------ // SIP Step 2 of 6: Instantiate the parameter domain //------------------------------------------------------ QUESO::GslVector paramMinValues(paramSpace.zeroVector()); QUESO::GslVector paramMaxValues(paramSpace.zeroVector()); paramMinValues[0] = 8.; paramMaxValues[0] = 11.; QUESO::BoxSubset<> paramDomain("param_", paramSpace, paramMinValues, paramMaxValues); //------------------------------------------------------ // SIP Step 3 of 6: Instantiate the likelihood function // object to be used by QUESO. //------------------------------------------------------ Likelihood<> lhood("like_", paramDomain); //------------------------------------------------------ // SIP Step 4 of 6: Define the prior RV //------------------------------------------------------ QUESO::UniformVectorRV<> priorRv("prior_", paramDomain); //------------------------------------------------------ // SIP Step 5 of 6: Instantiate the inverse problem //------------------------------------------------------ // Extra prefix before the default "rv_" prefix QUESO::GenericVectorRV<> postRv("post_", paramSpace); // No extra prefix before the default "ip_" prefix QUESO::StatisticalInverseProblem<> ip("", NULL, priorRv, lhood, postRv); //------------------------------------------------------ // SIP Step 6 of 6: Solve the inverse problem, that is, // set the 'pdf' and the 'realizer' of the posterior RV //------------------------------------------------------ QUESO::GslVector paramInitials(paramSpace.zeroVector()); priorRv.realizer().realization(paramInitials); QUESO::GslMatrix proposalCovMatrix(paramSpace.zeroVector()); proposalCovMatrix(0,0) = std::pow(std::abs(paramInitials[0]) / 20.0, 2.0); ip.solveWithBayesMetropolisHastings(NULL, paramInitials, &proposalCovMatrix); //================================================================ // Statistical forward problem (SFP): find the max distance // traveled by an object in projectile motion; input pdf for 'g' // is the solution of the SIP above. //================================================================ //------------------------------------------------------ // SFP Step 1 of 6: Instantiate the parameter *and* qoi spaces. // SFP input RV = FIP posterior RV, so SFP parameter space // has been already defined. //------------------------------------------------------ QUESO::VectorSpace<> qoiSpace(env, "qoi_", 1, NULL); //------------------------------------------------------ // SFP Step 2 of 6: Instantiate the parameter domain //------------------------------------------------------ // Not necessary because input RV of the SFP = output RV of SIP. // Thus, the parameter domain has been already defined. //------------------------------------------------------ // SFP Step 3 of 6: Instantiate the qoi object // to be used by QUESO. //------------------------------------------------------ Qoi<> qoi("qoi_", paramDomain, qoiSpace); //------------------------------------------------------ // SFP Step 4 of 6: Define the input RV //------------------------------------------------------ // Not necessary because input RV of SFP = output RV of SIP // (postRv). //------------------------------------------------------ // SFP Step 5 of 6: Instantiate the forward problem //------------------------------------------------------ QUESO::GenericVectorRV<> qoiRv("qoi_", qoiSpace); QUESO::StatisticalForwardProblem<> fp("", NULL, postRv, qoi, qoiRv); //------------------------------------------------------ // SFP Step 6 of 6: Solve the forward problem //------------------------------------------------------ fp.solveWithMonteCarlo(NULL); MPI_Finalize(); return 0; #endif // QUESO_HAS_MPI }
// main driver int main(int argc, char *argv[]) { #ifdef HAVE_MPI MPI_Init(&argc, &argv); Epetra_MpiComm Comm(MPI_COMM_WORLD); #else Epetra_SerialComm Comm; #endif if (Comm.NumProc() != 2) { #ifdef HAVE_MPI MPI_Finalize(); #endif return(0); } int NumMyElements = 0; // NODES assigned to this processor int NumMyExternalElements = 0; // nodes used by this proc, but not hosted int NumMyTotalElements = 0; int FE_NumMyElements = 0; // TRIANGLES assigned to this processor int * MyGlobalElements = 0; // nodes assigned to this processor Epetra_IntSerialDenseMatrix T; // store the grid connectivity int MyPID=Comm.MyPID(); cout << MyPID << endl; switch( MyPID ) { case 0: NumMyElements = 3; NumMyExternalElements = 2; NumMyTotalElements = NumMyElements + NumMyExternalElements; FE_NumMyElements = 3; MyGlobalElements = new int[NumMyTotalElements]; MyGlobalElements[0] = 0; MyGlobalElements[1] = 4; MyGlobalElements[2] = 3; MyGlobalElements[3] = 1; MyGlobalElements[4] = 5; break; case 1: NumMyElements = 3; NumMyExternalElements = 2; NumMyTotalElements = NumMyElements + NumMyExternalElements; FE_NumMyElements = 3; MyGlobalElements = new int[NumMyTotalElements]; MyGlobalElements[0] = 1; MyGlobalElements[1] = 2; MyGlobalElements[2] = 5; MyGlobalElements[3] = 0; MyGlobalElements[4] = 4; break; } // build Map corresponding to update Epetra_Map Map(-1,NumMyElements,MyGlobalElements,0,Comm); // vector containing coordinates BEFORE exchanging external nodes Epetra_Vector CoordX_noExt(Map); Epetra_Vector CoordY_noExt(Map); switch( MyPID ) { case 0: T.Shape(3,FE_NumMyElements); // fill x-coordinates CoordX_noExt[0] = 0.0; CoordX_noExt[1] = 1.0; CoordX_noExt[2] = 0.0; // fill y-coordinates CoordY_noExt[0] = 0.0; CoordY_noExt[1] = 1.0; CoordY_noExt[2] = 1.0; // fill connectivity T(0,0) = 0; T(0,1) = 4; T(0,2) = 3; T(1,0) = 0; T(1,1) = 1; T(1,2) = 4; T(2,0) = 4; T(2,1) = 1; T(2,2) = 5; break; case 1: T.Shape(3,FE_NumMyElements); // fill x-coordinates CoordX_noExt[0] = 1.0; CoordX_noExt[1] = 2.0; CoordX_noExt[2] = 2.0; // fill y-coordinates CoordY_noExt[0] = 0.0; CoordY_noExt[1] = 0.0; CoordY_noExt[2] = 1.0; // fill connectivity T(0,0) = 0; T(0,1) = 1; T(0,2) = 4; T(1,0) = 1; T(1,1) = 5; T(1,2) = 4; T(2,0) = 1; T(2,1) = 2; T(2,2) = 5; break; } // - - - - - - - - - - - - - - - - - - - - // // E X T E R N A L N O D E S S E T U P // // - - - - - - - - - - - - - - - - - - - - // // build target map to exchange the valus of external nodes Epetra_Map TargetMap(-1,NumMyTotalElements, MyGlobalElements, 0, Comm); // !@# rename Map -> SourceMap ????? Epetra_Import Importer(TargetMap,Map); Epetra_Vector CoordX(TargetMap); Epetra_Vector CoordY(TargetMap); CoordX.Import(CoordX_noExt,Importer,Insert); CoordY.Import(CoordY_noExt,Importer,Insert); // now CoordX_noExt and CoordY_noExt are no longer required // NOTE: better to construct CoordX and CoordY as MultiVector // - - - - - - - - - - - - // // M A T R I X S E T U P // // - - - - - - - - - - - - // // build the CRS matrix corresponding to the grid // some vectors are allocated const int MaxNnzRow = 5; Epetra_CrsMatrix A(Copy,Map,MaxNnzRow); int Element, MyRow, GlobalRow, GlobalCol, i, j, k; Epetra_IntSerialDenseMatrix Struct; // temp to create the matrix connectivity Struct.Shape(NumMyElements,MaxNnzRow); for( i=0 ; i<NumMyElements ; ++i ) for( j=0 ; j<MaxNnzRow ; ++j ) Struct(i,j) = -1; // cycle over all the finite elements for( Element=0 ; Element<FE_NumMyElements ; ++Element ) { // cycle over each row for( i=0 ; i<3 ; ++i ) { // get the global and local number of this row GlobalRow = T(Element,i); MyRow = A.LRID(GlobalRow); if( MyRow != -1 ) { // only rows stored on this proc // cycle over the columns for( j=0 ; j<3 ; ++j ) { // get the global number only of this column GlobalCol = T(Element,j); // look if GlobalCol was already put in Struct for( k=0 ; k<MaxNnzRow ; ++k ) { if( Struct(MyRow,k) == GlobalCol || Struct(MyRow,k) == -1 ) break; } if( Struct(MyRow,k) == -1 ) { // new entry Struct(MyRow,k) = GlobalCol; } else if( Struct(MyRow,k) != GlobalCol ) { // maybe not enough space has beenn allocated cerr << "ERROR: not enough space for element " << GlobalRow << "," << GlobalCol << endl; return( 0 ); } } } } } int * Indices = new int [MaxNnzRow]; double * Values = new double [MaxNnzRow]; for( i=0 ; i<MaxNnzRow ; ++i ) Values[i] = 0.0; // now use Struct to fill build the matrix structure for( int Row=0 ; Row<NumMyElements ; ++Row ) { int Length = 0; for( int j=0 ; j<MaxNnzRow ; ++j ) { if( Struct(Row,j) == -1 ) break; Indices[Length] = Struct(Row,j); Length++; } GlobalRow = MyGlobalElements[Row]; A.InsertGlobalValues(GlobalRow, Length, Values, Indices); } // replace global numbering with local one in T for( int Element=0 ; Element<FE_NumMyElements ; ++Element ) { for( int i=0 ; i<3 ; ++i ) { int global = T(Element,i); int local = find(MyGlobalElements,NumMyTotalElements, global); if( global == -1 ) { cerr << "ERROR\n"; return( EXIT_FAILURE ); } T(Element,i) = local; } } // - - - - - - - - - - - - - - // // M A T R I X F I L L - I N // // - - - - - - - - - - - - - - // // room for the local matrix Epetra_SerialDenseMatrix Ke; Ke.Shape(3,3); // now fill the matrix for( int Element=0 ; Element<FE_NumMyElements ; ++Element ) { // variables used inside int GlobalRow; int MyRow; int GlobalCol; double x_triangle[3]; double y_triangle[3]; // get the spatial coordinate of each local node for( int i=0 ; i<3 ; ++i ) { MyRow = T(Element,i); y_triangle[i] = CoordX[MyRow]; x_triangle[i] = CoordY[MyRow]; } // compute the local matrix for Element compute_loc_matrix( x_triangle, y_triangle,Ke ); // insert it in the global one // cycle over each row for( int i=0 ; i<3 ; ++i ) { // get the global and local number of this row MyRow = T(Element,i); if( MyRow < NumMyElements ) { for( int j=0 ; j<3 ; ++j ) { // get global column number GlobalRow = MyGlobalElements[MyRow]; GlobalCol = MyGlobalElements[T(Element,j)]; A.SumIntoGlobalValues(GlobalRow,1,&(Ke(i,j)),&GlobalCol); } } } } A.FillComplete(); // - - - - - - - - - - - - - // // R H S & S O L U T I O N // // - - - - - - - - - - - - - // Epetra_Vector x(Map), b(Map); x.Random(); b.PutScalar(0.0); // Solution can be obtained using Aztecoo // free memory before leaving delete MyGlobalElements; delete Indices; delete Values; #ifdef HAVE_MPI MPI_Finalize(); #endif return( EXIT_SUCCESS ); } /* main */
int main(int argc, char *argv[]) { long N=20, M=30; // number of cells NxM int n=2, m=3; // number of blocks nxm int tpi=16, tpj=18; // test pressure coordinates int tai=7, taj=9; // test average coordinates int i, j, I, J; // local and global i,j int myi, myj; // my i,j in neighbor map int bi, bj; // block size in y and x direction int numprocs, myid; // number of processors and my rank id double **P, **A; // 2D array of pressures and averages int **B; // 2D array with map of neighbors MPI_Init(&argc, &argv); MPI_Comm_size(MPI_COMM_WORLD, &numprocs); MPI_Comm_rank(MPI_COMM_WORLD, &myid); // get command line arguments if any if (argc > 1) { if (argc != 5) { if (myid==0) { fprintf(stderr, "usage: prog [N M n m]\n"); fprintf(stderr, "Parameters:\n"); fprintf(stderr, "\tN: number of rows or cells in y direction. Default: %ld\n", N); fprintf(stderr, "\tM: number of columns or cells in x direction. Default: %ld\n", M); fprintf(stderr, "\tn: number of blocks in y direction. Default: %d\n", n); fprintf(stderr, "\tm: number of blocks in x direction. Default %d\n", m); } MPI_Finalize(); exit(3); } N = atoi(argv[1]); M = atoi(argv[2]); n = atoi(argv[3]); m = atoi(argv[4]); } bi = N/n; bj = M/m; // start message if (myid==0) { printf("Terapressure v0.1\n"); printf("=================\n"); printf("Number of cells: %lu (%lu x %lu)\n", N*M, N, M); printf("Number of blocks: %d (%d x %d)\n", n*m, n, m); printf("Number of processors %d\n", numprocs); printf("Block size: (%d x %d)\n", bi, bj); } // validate parameters if (N % n != 0 || M % m != 0) { if(myid==0) fprintf(stderr,"Number of blocks in x or y axis do not fit.\n"); MPI_Finalize(); exit(1); } if (numprocs != n*m) { if (myid==0) fprintf(stderr,"Number of processors must be the same as number of blocks: %d\n", n*m); MPI_Finalize(); exit(2); } double t = MPI_Wtime(); // memory allocation // stack allocation is simple but limited in size // double P[bi][bj]; // double A[bi][bj]; // int B[n][m]; // heap allocation P = malloc(sizeof(double*) * bi); A = malloc(sizeof(double*) * bi); for (i=0; i < bi; i++) { P[i] = malloc(sizeof(double) * bj); A[i] = malloc(sizeof(double) * bj); } B = malloc(sizeof(int*) * n); for (i=0; i < n; i++) { B[i] = malloc(sizeof(int) * m); } // domain decomposition int rank = 0; //printf("Neighbors map:\n"); for (i=0; i < n; i++) { for (j=0; j < m; j++) { if (rank == myid) { myi = i; myj = j; } B[i][j] = rank++; //printf ("%3d ", W[i][j]); } //printf ("\n"); } //printf("%d: my i,j in neighbor map: %d,%d\n", myid, myi, myj); // compute pressures // printf("%d: My pressures:\n", myid); double pressure = -1; for (i=0; i < bi; i++) { I = myi * bi + i; for (j=0; j < bj; j++) { J = myj * bj + j; if (I==0 || I==N-1 || J==0 || J==M-1) P[i][j] = 0; else P[i][j] = (double)(I+J) * (double)(I*J); //printf ("L(%d,%d) G(%d,%d): %.2f\t",i,j,I,J, P[i][j]); if (I == tpi && J == tpj) pressure = P[i][j]; } //printf ("\n"); } // average pressure int neighbor; double center, left, top, right, bottom; double average = -1; for (i=0; i < bi; i++) { I = myi * bi + i; for (j=0; j < bj; j++) { J = myj * bj + j; if ( I==0 || I==N-1 || J==0 || J==M-1 ) continue; center = P[i][j]; // top cell if (i==0) { neighbor = B[myi-1][myj]; MPI_Send(¢er, 1, MPI_DOUBLE, neighbor, 0, MPI_COMM_WORLD); MPI_Recv(&top, 1, MPI_DOUBLE, neighbor, 0, MPI_COMM_WORLD, 0); //printf("%2d: send to %d (%d,%d): %.2f\n", myid, neighbor,I,J,center); //printf("%2d: recv from %d (%d,%d): %.2f\n", myid, neighbor,I-1,J,top); } else { top = P[i-1][j]; } // bottom cell if (i==bi-1) { neighbor = B[myi+1][myj]; MPI_Send(¢er, 1, MPI_DOUBLE, neighbor, 0, MPI_COMM_WORLD); MPI_Recv(&bottom, 1, MPI_DOUBLE, neighbor, 0, MPI_COMM_WORLD, 0); //printf("%2d: send to %d (%d,%d): %.2f\n", myid, neighbor,I,J,center); //printf("%2d: recv from %d (%d,%d): %.2f\n", myid, neighbor,I+1,J,bottom); } else { bottom = P[i+1][j]; } // left cell if (j==0) { neighbor = B[myi][myj-1]; MPI_Send(¢er, 1, MPI_DOUBLE, neighbor, 0, MPI_COMM_WORLD); MPI_Recv(&left, 1, MPI_DOUBLE, neighbor, 0, MPI_COMM_WORLD, 0); //printf("%2d: send to %d (%d,%d): %.2f\n", myid, neighbor,I,J,center); //printf("%2d: recv from %d (%d,%d): %.2f\n", myid, neighbor,I,J-1,left); } else { left = P[i][j-1]; } // right cell if (j==bj-1) { neighbor = B[myi][myj+1]; MPI_Send(¢er, 1, MPI_DOUBLE, neighbor, 0, MPI_COMM_WORLD); MPI_Recv(&right, 1, MPI_DOUBLE, neighbor, 0, MPI_COMM_WORLD, 0); //printf("%2d: send to %d (%d,%d): %.2f\n", myid, neighbor,I,J,center); //printf("%2d: recv from %d (%d,%d): %.2f\n", myid, neighbor,I,J+1,right); } else { right = P[i][j+1]; } A[i][j] = ( center + left + top + right + bottom ) / 5; //printf ("L(%d,%d) G(%d,%d): %.2f\t",i,j,I,J, A[i][j]); if (I==tai && J==taj) average = A[i][j]; } //printf ("\n"); } // cleanup memory for (i=0; i < bi; i++) { free(P[i]); free(A[i]); } free(P); free(A); for (i=0; i < n; i++) { free(B[i]); } free(B); // report result //printf("Preasure at (16,18): %.2f\n", P[16][18]); //printf("Avg at (7,9): %.2f\n", A[7][9]); if (pressure > -1) printf("Preasure at (%2d,%2d): %.2f computed by processor %d\n", tpi, tpj, pressure, myid); if (average > -1) printf("Average at (%2d,%2d): %.2f computed by processor %d\n", tai, taj, average, myid); MPI_Barrier(MPI_COMM_WORLD); if (myid==0) printf("Time elapsed: %.2f seconds.\n", MPI_Wtime()-t); MPI_Finalize(); return 0; }
int main(int argc, char *argv[]) { MPI_Init(&argc, &argv); int rank = 0, size = 1; #ifdef ADIOS2_HAVE_MPI MPI_Comm_rank(MPI_COMM_WORLD, &rank); MPI_Comm_size(MPI_COMM_WORLD, &size); #endif /** Application variable */ std::vector<float> myFloats = {0, 1, 2, 3, 4, 5, 6, 7, 8, 9}; const std::size_t Nx = myFloats.size(); try { /** ADIOS class factory of IO class objects, DebugON is recommended */ adios2::ADIOS adios(MPI_COMM_WORLD, adios2::DebugON); /*** IO class object: settings and factory of Settings: Variables, * Parameters, Transports, and Execution: Engines */ adios2::IO &adios1IO = adios.DeclareIO("ADIOS1IO"); adios1IO.SetEngine("ADIOS1Writer"); adios1IO.AddTransport("file", {{"library", "MPI"}}); /** global array : name, { shape (total) }, { start (local) }, { count * (local) }, all are constant dimensions */ adios2::Variable<float> &bpFloats = adios1IO.DefineVariable<float>( "bpFloats", {size * Nx}, {rank * Nx}, {Nx}, adios2::ConstantDims); /** Engine derived class, spawned to start IO operations */ adios2::Engine &adios1Writer = adios1IO.Open("myVector.bp", adios2::Mode::Write); /** Write variable for buffering */ adios1Writer.PutSync<float>(bpFloats, myFloats.data()); /** Create bp file, engine becomes unreachable after this*/ adios1Writer.Close(); } catch (std::invalid_argument &e) { std::cout << "Invalid argument exception, STOPPING PROGRAM from rank " << rank << "\n"; std::cout << e.what() << "\n"; } catch (std::ios_base::failure &e) { std::cout << "IO System base failure exception, STOPPING PROGRAM from rank " << rank << "\n"; std::cout << e.what() << "\n"; } catch (std::exception &e) { std::cout << "Exception, STOPPING PROGRAM from rank " << rank << "\n"; std::cout << e.what() << "\n"; } MPI_Finalize(); return 0; }
int main(int argc, char **argv) { int err; // Error handling scheme: this function has failed until proven otherwise. int ret = EXIT_FAILURE; err = MPI_Init(&argc, &argv); if(err != MPI_SUCCESS) { // Theoretically, an error at this point will abort the program, and this // code path is never followed. This is here for completeness. fprintf(stderr, "unable to initialize MPI\n"); goto die_immed; } // Install the MPI error handler that returns error codes, so we can perform // the usual process suicide ritual. err = MPI_Comm_set_errhandler(MPI_COMM_WORLD, MPI_ERRORS_RETURN); if(err != MPI_SUCCESS) { // Again, theoretically, the previous error handler (MPI_Abort) gets called // instead of reaching this fail point. fprintf(stderr, "unable to reset MPI error handler\n"); goto die_finalize_mpi; } int size, rank; err = MPI_Comm_size(MPI_COMM_WORLD, &size); if(err == MPI_SUCCESS) err = MPI_Comm_rank(MPI_COMM_WORLD, &rank); if(err != MPI_SUCCESS) { fprintf(stderr, "unable to determine rank or size\n"); goto die_finalize_mpi; } /* Create cartestian communicator */ int dims[2] = {0, 0}; int periods[2] = {1, 1}; err = MPI_Dims_create(size, 2, dims); if(err != MPI_SUCCESS) { fprintf(stderr, "unable to create a cartestian topology\n"); goto die_finalize_mpi; } MPI_Comm cart; err = MPI_Cart_create(MPI_COMM_WORLD, 2, dims, periods, 1, &cart); if(err != MPI_SUCCESS) { fprintf(stderr, "unable to create cartestian communicator\n"); goto die_finalize_mpi; } dsfmt_t *prng = malloc(sizeof(dsfmt_t)); if(prng == NULL) { fprintf(stderr, "unable to allocate PRNG\n"); goto die_free_cart_comm; } dsfmt_init_gen_rand(prng, SEED + rank); int const net_elems = proc_elems[0]*proc_elems[1]; // Allocate master source array for FFT. double *const master = fftw_malloc(net_elems*sizeof(double)); if(master == NULL) { fprintf(stderr, "unable to allocate master array\n"); goto die_free_prng; } for(unsigned int i = 0; i < net_elems; ++i) { master[i] = dsfmt_genrand_open_close(prng) * 10; } /* Allocate source array for serial array. We copy the master array to this * array, then transform it in place, then reverse transform it. The idea is * that we should get the original data back, and we use this as a consistency * check. We need the original data to compare to. */ double *const source = fftw_malloc(net_elems*sizeof(double)); if(source == NULL) { fprintf(stderr, "unable to allocate source array\n"); goto die_free_master; } for(int i = 0; i < net_elems; ++i) source[i] = master[i]; /* Allocate the destination array */ double complex *const dest = fftw_malloc(net_elems*sizeof(double complex)); if(dest == NULL) { fprintf(stderr, "unable to allocate destination array\n"); goto die_free_source; } /* Allocate a plan to compute the FFT */ fft_par_plan plan = fft_par_plan_r2c(cart, proc_elems, 1, source, dest, &err); if(plan == NULL) { fprintf(stderr, "unable to initialize parallel FFT plan\n"); goto die_free_dest; } /* Execute the forward plan */ err = fft_par_execute_fwd(plan); if(err != MPI_SUCCESS) { fprintf(stderr, "error computing forward plan\n"); goto die_free_plan; } /* Execute the reverse plan */ err = fft_par_execute_rev(plan); if(err != MPI_SUCCESS) { fprintf(stderr, "error computing reverse plan\n"); goto die_free_plan; } /* Compare source to master, use supremum norm */ int norm = 0.0; for(int i = 0; i < net_elems; ++i) { /* Each FFT effectively multiplies by sqrt(net_elems*num_procs) */ norm = fmax(norm, fabs(master[i] - source[i]/net_elems/size)); } if(norm < 1.0e-6) { ret = EXIT_SUCCESS; } die_free_plan: fft_par_plan_destroy(plan); die_free_dest: fftw_free(dest); die_free_source: fftw_free(source); die_free_master: fftw_free(master); die_free_prng: free(prng); die_free_cart_comm: if(err == MPI_SUCCESS) err = MPI_Comm_free(&cart); if(err != MPI_SUCCESS) { fprintf(stderr, "unable to free cartestian communicator\n"); ret = EXIT_FAILURE; } die_finalize_mpi: if(err == MPI_SUCCESS) err = MPI_Finalize(); if(err != MPI_SUCCESS) { fprintf(stderr, "unable to finalize MPI\n"); ret = EXIT_FAILURE; } die_immed: fftw_cleanup(); return ret; }
int main( int argc, char* argv[] ) { MPI_Comm CommWorld; int rank; int numProcessors; int procToWatch; Dictionary* dictionary; AbstractContext* abstractContext; /* Initialise MPI, get world info */ MPI_Init( &argc, &argv ); MPI_Comm_dup( MPI_COMM_WORLD, &CommWorld ); MPI_Comm_size( CommWorld, &numProcessors ); MPI_Comm_rank( CommWorld, &rank ); BaseFoundation_Init( &argc, &argv ); BaseIO_Init( &argc, &argv ); BaseContainer_Init( &argc, &argv ); BaseAutomation_Init( &argc, &argv ); BaseExtensibility_Init( &argc, &argv ); BaseContext_Init( &argc, &argv ); stream = Journal_Register( InfoStream_Type, "myStream" ); if( argc >= 2 ) { procToWatch = atoi( argv[1] ); } else { procToWatch = 0; } if( rank == procToWatch ) Journal_Printf( (void*) stream, "Watching rank: %i\n", rank ); /* Read input */ dictionary = Dictionary_New(); /* Build the context */ abstractContext = _AbstractContext_New( sizeof(AbstractContext), "TestContext", MyDelete, MyPrint, NULL, NULL, NULL, _AbstractContext_Build, _AbstractContext_Initialise, _AbstractContext_Execute, _AbstractContext_Destroy, "context", True, MySetDt, 0, 10, CommWorld, dictionary ); /* add hooks to existing entry points */ ContextEP_ReplaceAll( abstractContext, AbstractContext_EP_Build, MyBuild ); ContextEP_ReplaceAll( abstractContext, AbstractContext_EP_Initialise, MyInitialConditions ); ContextEP_ReplaceAll( abstractContext, AbstractContext_EP_Solve, MySolve ); ContextEP_ReplaceAll( abstractContext, AbstractContext_EP_Dt, MyDt ); if( rank == procToWatch ) { Journal_Printf( (void*)stream, "abstractContext->entryPointList->_size: %lu\n", abstractContext->entryPoint_Register->_size ); Journal_Printf( (void*)stream, "abstractContext->entryPointList->count: %u\n", abstractContext->entryPoint_Register->count ); } ContextEP_Append( abstractContext, AbstractContext_EP_Solve, MySolve2 ); ContextEP_ReplaceAll( abstractContext, AbstractContext_EP_Initialise, MyInitialConditions2 ); if( rank == procToWatch ) { stream = Journal_Register( InfoStream_Type, AbstractContext_Type ); AbstractContext_PrintConcise( abstractContext, stream ); Journal_Printf( (void*)stream, "abstractContext->entryPointList->_size: %lu\n", abstractContext->entryPoint_Register->_size ); Journal_Printf( (void*)stream, "abstractContext->entryPointList->count: %u\n", abstractContext->entryPoint_Register->count ); } /* Run the context */ if( rank == procToWatch ) { Stg_Component_Build( abstractContext, 0 /* dummy */, False ); Stg_Component_Initialise( abstractContext, 0 /* dummy */, False ); Stg_Component_Execute( abstractContext, 0 /* dummy */, False ); Stg_Component_Destroy( abstractContext, 0 /* dummy */, False ); } /* Stg_Class_Delete stuff */ Stg_Class_Delete( abstractContext ); Stg_Class_Delete( dictionary ); BaseContext_Finalise(); BaseExtensibility_Finalise(); BaseAutomation_Finalise(); BaseContainer_Finalise(); BaseIO_Finalise(); BaseFoundation_Finalise(); /* Close off MPI */ MPI_Finalize(); return 0; /* success */ }
void TRAN_Input_std( MPI_Comm comm1, int Solver, /* input */ int SpinP_switch, char *filepath, double kBvalue, double TRAN_eV2Hartree, double Electronic_Temperature, /* output */ int *output_hks ) { FILE *fp; int i,po; int i_vec[20],i_vec2[20]; double r_vec[20]; char *s_vec[20]; char buf[MAXBUF]; int myid; MPI_Comm_rank(comm1,&myid); /**************************************************** parameters for TRANSPORT ****************************************************/ input_logical("NEGF.Output_HKS",&TRAN_output_hks,0); *output_hks = TRAN_output_hks; /* printf("NEGF.OutputHKS=%d\n",TRAN_output_hks); */ input_string("NEGF.filename.HKS",TRAN_hksoutfilename,"NEGF.hks"); /* printf("TRAN_hksoutfilename=%s\n",TRAN_hksoutfilename); */ input_logical("NEGF.Output.for.TranMain",&TRAN_output_TranMain,0); if ( Solver!=4 ) { return; } /**** show transport credit ****/ TRAN_Credit(comm1); input_string("NEGF.filename.hks.l",TRAN_hksfilename[0],"NEGF.hks.l"); input_string("NEGF.filename.hks.r",TRAN_hksfilename[1],"NEGF.hks.r"); /* read data of leads */ TRAN_RestartFile(comm1, "read","left", filepath,TRAN_hksfilename[0]); TRAN_RestartFile(comm1, "read","right",filepath,TRAN_hksfilename[1]); /* check b-, and c-axes of the unit cell of leads. */ po = 0; for (i=2; i<=3; i++){ if (1.0e-10<fabs(tv_e[0][i][1]-tv_e[1][i][1])) po = 1; if (1.0e-10<fabs(tv_e[0][i][2]-tv_e[1][i][2])) po = 1; if (1.0e-10<fabs(tv_e[0][i][3]-tv_e[1][i][3])) po = 1; } if (po==1){ if (myid==Host_ID){ printf("Warning: The b- or c-axis of the unit cell for the left lead is not same as that for the right lead.\n"); } MPI_Finalize(); exit(1); } /* show chemical potentials */ if (myid==Host_ID){ printf("\n"); printf("Intrinsic chemical potential (eV) of the leads\n"); printf(" Left lead: %15.12f\n",ChemP_e[0]*TRAN_eV2Hartree); printf(" Right lead: %15.12f\n",ChemP_e[1]*TRAN_eV2Hartree); } /* check the conflict of SpinP_switch */ if ( (SpinP_switch!=SpinP_switch_e[0]) || (SpinP_switch!=SpinP_switch_e[1]) ){ if (myid==Host_ID){ printf ("scf.SpinPolarization conflicts between leads or lead and center.\n"); } MPI_Finalize(); exit(0); } input_int( "NEGF.Surfgreen.iterationmax", &tran_surfgreen_iteration_max, 600); input_double("NEGF.Surfgreen.convergeeps", &tran_surfgreen_eps, 1.0e-12); /**** k-points parallel to the layer, which are used for the SCF calc. ****/ i_vec2[0]=1; i_vec2[1]=1; input_intv("NEGF.scf.Kgrid",2,i_vec,i_vec2); TRAN_Kspace_grid2 = i_vec[0]; TRAN_Kspace_grid3 = i_vec[1]; if (TRAN_Kspace_grid2<=0){ if (myid==Host_ID){ printf("NEGF.scf.Kgrid should be over 1\n"); } MPI_Finalize(); exit(1); } if (TRAN_Kspace_grid3<=0){ if (myid==Host_ID){ printf("NEGF.scf.Kgrid should be over 1\n"); } MPI_Finalize(); exit(1); } /* Poisson solver */ TRAN_Poisson_flag = 1; s_vec[0]="FD"; s_vec[1]="FFT"; i_vec[0]=1 ; i_vec[1]=2 ; input_string2int("NEGF.Poisson.Solver", &TRAN_Poisson_flag, 2, s_vec,i_vec); /* parameter to scale terms with Gpara=0 */ input_double("NEGF.Poisson_Gparazero.scaling", &TRAN_Poisson_Gpara_Scaling, 1.0); /* the number of buffer cells in FFTE */ input_int("NEGF.FFTE.Num.Buffer.Cells", &TRAN_FFTE_CpyNum, 1); /* the number of iterations by the Band calculation in the initial SCF iterations */ input_int("NEGF.SCF.Iter.Band", &TRAN_SCF_Iter_Band, 3); /* integration method */ TRAN_integration = 0; s_vec[0]="CF"; s_vec[1]="OLD"; i_vec[0]=0 ; i_vec[1]=1 ; input_string2int("NEGF.Integration", &TRAN_integration, 2, s_vec,i_vec); /**** k-points parallel to the layer, which are used for the transmission calc. ****/ i_vec2[0]=1; i_vec2[1]=1; input_intv("NEGF.tran.Kgrid",2,i_vec,i_vec2); TRAN_TKspace_grid2 = i_vec[0]; TRAN_TKspace_grid3 = i_vec[1]; if (TRAN_TKspace_grid2<=0){ if (myid==Host_ID){ printf("NEGF.tran.Kgrid should be over 1\n"); } MPI_Finalize(); exit(1); } if (TRAN_TKspace_grid3<=0){ if (myid==Host_ID){ printf("NEGF.tran.Kgrid should be over 1\n"); } MPI_Finalize(); exit(1); } /**** source and drain bias voltage ****/ input_logical("NEGF.bias.apply",&tran_bias_apply,1); /* default=on */ if ( tran_bias_apply ) { double tmp; tran_biasvoltage_e[0] = 0.0; input_double("NEGF.bias.voltage", &tmp, 0.0); /* in eV */ tran_biasvoltage_e[1] = tmp/TRAN_eV2Hartree; } else { tran_biasvoltage_e[0]=0.0; tran_biasvoltage_e[1]=0.0; } if (tran_bias_apply) { int side; side=0; TRAN_Apply_Bias2e(comm1, side, tran_biasvoltage_e[side], TRAN_eV2Hartree, SpinP_switch_e[side], atomnum_e[side], WhatSpecies_e[side], Spe_Total_CNO_e[side], FNAN_e[side], natn_e[side], Ngrid1_e[side], Ngrid2_e[side], Ngrid3_e[side], OLP_e[side][0], &ChemP_e[side],H_e[side], dVHart_Grid_e[side] ); /* output */ side=1; TRAN_Apply_Bias2e(comm1, side, tran_biasvoltage_e[side], TRAN_eV2Hartree, SpinP_switch_e[side], atomnum_e[side], WhatSpecies_e[side], Spe_Total_CNO_e[side], FNAN_e[side], natn_e[side], Ngrid1_e[side], Ngrid2_e[side], Ngrid3_e[side], OLP_e[side][0], &ChemP_e[side], H_e[side], dVHart_Grid_e[side] ); /* output */ } /**** gate voltage ****/ input_double("NEGF.gate.voltage", &tran_gate_voltage, 0.0); tran_gate_voltage /= TRAN_eV2Hartree; /****************************************************** parameters for the DOS calculation ******************************************************/ i=0; r_vec[i++] = -10.0; r_vec[i++] = 10.0; r_vec[i++] = 5.0e-3; input_doublev("NEGF.Dos.energyrange",i, tran_dos_energyrange, r_vec); /* in eV */ /* change the unit from eV to Hartree */ tran_dos_energyrange[0] /= TRAN_eV2Hartree; tran_dos_energyrange[1] /= TRAN_eV2Hartree; tran_dos_energyrange[2] /= TRAN_eV2Hartree; input_int("NEGF.Dos.energy.div",&tran_dos_energydiv,200); i_vec2[0]=1; i_vec2[1]=1; input_intv("NEGF.Dos.Kgrid",2,i_vec,i_vec2); TRAN_dos_Kspace_grid2 = i_vec[0]; TRAN_dos_Kspace_grid3 = i_vec[1]; /******************************************************** integration on real axis with a small imaginary part for the "non-equilibrium" region ********************************************************/ input_double("NEGF.bias.neq.im.energy", &Tran_bias_neq_im_energy, 1.0e-2); /* in eV */ if (Tran_bias_neq_im_energy<0.0) { if (myid==Host_ID) printf("NEGF.bias.neq.im.energy should be positive.\n"); MPI_Finalize(); exit(1); } /* change the unit from eV to Hartree */ Tran_bias_neq_im_energy /= TRAN_eV2Hartree; input_double("NEGF.bias.neq.energy.step", &Tran_bias_neq_energy_step, 0.02); /* in eV */ if (Tran_bias_neq_energy_step<0.0) { if (myid==Host_ID) printf("NEGF.bias.neq.energy.step should be positive.\n"); MPI_Finalize(); exit(1); } /* change the unit from eV to Hartree */ Tran_bias_neq_energy_step /= TRAN_eV2Hartree; input_double("NEGF.bias.neq.cutoff", &Tran_bias_neq_cutoff, 1.0e-8); /* dimensionless */ /******************************************************** contour integration based on a continued fraction representation of the Fermi function ********************************************************/ input_int("NEGF.Num.Poles", &tran_num_poles,150); TRAN_Set_IntegPath( comm1, TRAN_eV2Hartree, kBvalue, Electronic_Temperature ); }
int main(int argc, char *argv[]) { SOS_runtime *my_sos; int i; int elem; int next_elem; char pub_title[SOS_DEFAULT_STRING_LEN] = {0}; char elem_name[SOS_DEFAULT_STRING_LEN] = {0}; SOS_pub *pub; double time_now; double time_start; int MAX_SEND_COUNT; int ITERATION_SIZE; int PUB_ELEM_COUNT; int JITTER_ENABLED; double JITTER_INTERVAL; MPI_Init(&argc, &argv); /* Process command-line arguments */ if ( argc < 5 ) { fprintf(stderr, "%s\n", USAGE); exit(1); } MAX_SEND_COUNT = -1; ITERATION_SIZE = -1; PUB_ELEM_COUNT = -1; JITTER_ENABLED = 0; JITTER_INTERVAL = 0.0; for (elem = 1; elem < argc; ) { if ((next_elem = elem + 1) == argc) { fprintf(stderr, "%s\n", USAGE); exit(1); } if ( strcmp(argv[elem], "-i" ) == 0) { ITERATION_SIZE = atoi(argv[next_elem]); } else if ( strcmp(argv[elem], "-m" ) == 0) { MAX_SEND_COUNT = atoi(argv[next_elem]); } else if ( strcmp(argv[elem], "-p" ) == 0) { PUB_ELEM_COUNT = atoi(argv[next_elem]); } else if ( strcmp(argv[elem], "-j" ) == 0) { JITTER_INTERVAL = strtod(argv[next_elem], NULL); JITTER_ENABLED = 1; } else { fprintf(stderr, "Unknown flag: %s %s\n", argv[elem], argv[next_elem]); } elem = next_elem + 1; } if ( (MAX_SEND_COUNT < 1) || (ITERATION_SIZE < 1) || (PUB_ELEM_COUNT < 1) || (JITTER_INTERVAL < 0.0) ) { fprintf(stderr, "%s\n", USAGE); exit(1); } /* Example variables. */ char *str_node_id = getenv("HOSTNAME"); char *str_prog_ver = "1.0"; char var_string[100] = {0}; int var_int; double var_double; my_sos = SOS_init( &argc, &argv, SOS_ROLE_CLIENT, SOS_LAYER_APP); SOS_SET_CONTEXT(my_sos, "demo_app.main"); srandom(my_sos->my_guid); printf("demo_app starting...\n"); fflush(stdout); dlog(0, "Creating a pub...\n"); pub = SOS_pub_create(my_sos, "demo", SOS_NATURE_CREATE_OUTPUT); dlog(0, " ... pub->guid = %ld\n", pub->guid); dlog(0, "Manually configuring some pub metadata...\n"); strcpy (pub->prog_ver, str_prog_ver); pub->meta.channel = 1; pub->meta.nature = SOS_NATURE_EXEC_WORK; pub->meta.layer = SOS_LAYER_APP; pub->meta.pri_hint = SOS_PRI_DEFAULT; pub->meta.scope_hint = SOS_SCOPE_DEFAULT; pub->meta.retain_hint = SOS_RETAIN_DEFAULT; dlog(0, "Packing a couple values...\n"); var_int = 1234567890; snprintf(var_string, 100, "Hello, world!"); var_double = 0.0; int pos = -1; for (i = 0; i < PUB_ELEM_COUNT; i++) { snprintf(elem_name, SOS_DEFAULT_STRING_LEN, "example_dbl_%d", i); pos = SOS_pack(pub, elem_name, SOS_VAL_TYPE_DOUBLE, &var_double); dlog(0, " pub->data[%d]->guid == %" SOS_GUID_FMT "\n", pos, pub->data[pos]->guid); var_double += 0.0000001; } dlog(0, "Announcing\n"); SOS_announce(pub); dlog(0, "Re-packing --> Publishing %d values for %d times per iteration:\n", PUB_ELEM_COUNT, ITERATION_SIZE); SOS_TIME( time_start ); int mils = 0; int ones = 0; for (ones = 0; ones < ITERATION_SIZE; ones++) { for (i = 0; i < PUB_ELEM_COUNT; i++) { snprintf(elem_name, SOS_DEFAULT_STRING_LEN, "example_dbl_%d", i); SOS_pack(pub, elem_name, SOS_VAL_TYPE_DOUBLE, &var_double); var_double += 0.000001; } SOS_publish(pub); } /* Catch any stragglers. */ //SOS_publish(pub); dlog(0, " ... done.\n"); dlog(0, "demo_app finished successfully!\n"); SOS_finalize(my_sos); MPI_Finalize(); return (EXIT_SUCCESS); }
int main (int argc, char* argv[]) { /* check that we got an appropriate number of arguments */ if (argc != 1 && argc != 4) { printf("Usage: test_correctness [filesize times sleep_secs]\n"); return 1; } /* read parameters from command line, if any */ if (argc > 1) { filesize = (size_t) atol(argv[1]); times = atoi(argv[2]); seconds = atoi(argv[3]); } MPI_Init(&argc, &argv); MPI_Comm_rank(MPI_COMM_WORLD, &rank); MPI_Comm_size(MPI_COMM_WORLD, &ranks); /* time how long it takes to get through init */ MPI_Barrier(MPI_COMM_WORLD); double init_start = MPI_Wtime(); SCR_Init(); double init_end = MPI_Wtime(); double secs = init_end - init_start; MPI_Barrier(MPI_COMM_WORLD); /* compute and print the init stats */ double secsmin, secsmax, secssum; MPI_Reduce(&secs, &secsmin, 1, MPI_DOUBLE, MPI_MIN, 0, MPI_COMM_WORLD); MPI_Reduce(&secs, &secsmax, 1, MPI_DOUBLE, MPI_MAX, 0, MPI_COMM_WORLD); MPI_Reduce(&secs, &secssum, 1, MPI_DOUBLE, MPI_SUM, 0, MPI_COMM_WORLD); if (rank == 0) { printf("Init: Min %8.6f s\tMax %8.6f s\tAvg %8.6f s\n", secsmin, secsmax, secssum/ranks); } MPI_Barrier(MPI_COMM_WORLD); /* allocate space for the checkpoint data (make filesize a function of rank for some variation) */ filesize = filesize + rank; char* buf = (char*) malloc(filesize); /* get the name of our checkpoint file to open for read on restart */ char name[256]; char file[SCR_MAX_FILENAME]; sprintf(name, "rank_%d.ckpt", rank); int found_checkpoint = 0; if (SCR_Route_file(name, file) == SCR_SUCCESS) { if (read_checkpoint(file, ×tep, buf, filesize)) { /* read the file ok, now check that contents are good */ found_checkpoint = 1; //printf("%d: Successfully read checkpoint from %s\n", rank, file); if (!check_buffer(buf, filesize, rank, timestep)) { printf("%d: Invalid value in buffer\n", rank); MPI_Abort(MPI_COMM_WORLD, 1); return 1; } } else { printf("%d: Could not read checkpoint %d from %s\n", rank, timestep, file); } } else printf("%d: SCR_Route_file failed during restart attempt\n", rank); /* determine whether all tasks successfully read their checkpoint file */ int all_found_checkpoint = 0; MPI_Allreduce(&found_checkpoint, &all_found_checkpoint, 1, MPI_INT, MPI_LAND, MPI_COMM_WORLD); if (!all_found_checkpoint && rank == 0) { printf("At least one rank (perhaps all) did not find its checkpoint\n"); } /* check that everyone is at the same timestep */ int timestep_and, timestep_or; MPI_Allreduce(×tep, ×tep_and, 1, MPI_INT, MPI_BAND, MPI_COMM_WORLD); MPI_Allreduce(×tep, ×tep_or, 1, MPI_INT, MPI_BOR, MPI_COMM_WORLD); if (timestep_and != timestep_or) { printf("%d: Timesteps don't agree: timestep %d\n", rank, timestep); return 1; } /* make up some data for the next checkpoint */ init_buffer(buf, filesize, rank, timestep); timestep++; /* prime system once before timing */ getbw(name, buf, filesize, 1); /* now compute the bandwidth and print stats */ if (times > 0) { double bw = getbw(name, buf, filesize, times); MPI_Barrier(MPI_COMM_WORLD); /* compute stats and print them to the screen */ double bwmin, bwmax, bwsum; MPI_Reduce(&bw, &bwmin, 1, MPI_DOUBLE, MPI_MIN, 0, MPI_COMM_WORLD); MPI_Reduce(&bw, &bwmax, 1, MPI_DOUBLE, MPI_MAX, 0, MPI_COMM_WORLD); MPI_Reduce(&bw, &bwsum, 1, MPI_DOUBLE, MPI_SUM, 0, MPI_COMM_WORLD); if (rank == 0) { printf("FileIO: Min %7.2f MB/s\tMax %7.2f MB/s\tAvg %7.2f MB/s\tAgg %7.2f MB/s\n", bwmin, bwmax, bwsum/ranks, bwsum ); } } if (buf != NULL) { free(buf); buf = NULL; } SCR_Finalize(); MPI_Finalize(); return 0; }
int main( int argc, char* argv[] ) { MPI_Comm CommWorld; int rank; int procCount; Dictionary* dictionary; Geometry* geometry; ElementLayout* eLayout; Topology* nTopology; NodeLayout* nLayout; HexaMD* meshDecomp; Index i; Processor_Index procToWatch; /* Initialise MPI, get world info */ MPI_Init(&argc, &argv); MPI_Comm_dup( MPI_COMM_WORLD, &CommWorld ); MPI_Comm_size(CommWorld, &procCount); MPI_Comm_rank(CommWorld, &rank); Base_Init( &argc, &argv ); DiscretisationGeometry_Init( &argc, &argv ); DiscretisationShape_Init( &argc, &argv ); DiscretisationMesh_Init( &argc, &argv ); MPI_Barrier( CommWorld ); /* Ensures copyright info always come first in output */ procToWatch = argc >= 2 ? atoi(argv[1]) : 0; dictionary = Dictionary_New(); Dictionary_Add( dictionary, "meshSizeI", Dictionary_Entry_Value_FromUnsignedInt( 13 ) ); Dictionary_Add( dictionary, "meshSizeJ", Dictionary_Entry_Value_FromUnsignedInt( 4 ) ); Dictionary_Add( dictionary, "meshSizeK", Dictionary_Entry_Value_FromUnsignedInt( 4 ) ); Dictionary_Add( dictionary, "maxX", Dictionary_Entry_Value_FromUnsignedInt( 6 ) ); Dictionary_Add( dictionary, "allowUnbalancing", Dictionary_Entry_Value_FromBool( True ) ); Dictionary_Add( dictionary, "shadowDepth", Dictionary_Entry_Value_FromUnsignedInt( 1 ) ); Dictionary_Add( dictionary, "isPeriodicI", Dictionary_Entry_Value_FromBool( True ) ); eLayout = (ElementLayout*)ParallelPipedHexaEL_New( "PPHexaEL", 3, dictionary ); nTopology = (Topology*)IJK6Topology_New( "IJK6Topology", dictionary ); nLayout = (NodeLayout*)CornerNL_New( "CornerNL", dictionary, eLayout, nTopology ); meshDecomp = HexaMD_New( "HexaMD", dictionary, MPI_COMM_WORLD, eLayout, nLayout ); ElementLayout_Build( eLayout, meshDecomp ); if (rank == procToWatch) { printf( "Element with point:\n" ); } geometry = eLayout->geometry; for( i = 0; i < geometry->pointCount; i++ ) { Coord point; int excEl, incEl; geometry->pointAt( geometry, i, point ); if (rank == procToWatch) { printf( "\tNode %u (%0.2f,%0.2f,%0.2f):\n", i, point[0], point[1], point[2] ); excEl = eLayout->elementWithPoint( eLayout, meshDecomp, point, NULL, EXCLUSIVE_UPPER_BOUNDARY, 0, NULL ); incEl = eLayout->elementWithPoint( eLayout, meshDecomp, point, NULL, INCLUSIVE_UPPER_BOUNDARY, 0, NULL ); printf( "\t\tIncl %4u, Excl %4u\n", incEl, excEl ); } point[0] += 0.1; point[1] += 0.1; point[2] += 0.1; if (rank == procToWatch) { printf( "\tTest point %u (%0.2f,%0.2f,%0.2f):\n", i, point[0], point[1], point[2] ); excEl = eLayout->elementWithPoint( eLayout, meshDecomp, point, NULL, EXCLUSIVE_UPPER_BOUNDARY, 0, NULL ); incEl = eLayout->elementWithPoint( eLayout, meshDecomp, point, NULL, INCLUSIVE_UPPER_BOUNDARY, 0, NULL ); printf( "\t\tIncl %4u, Excl %4u\n", incEl, excEl ); } } if (rank == procToWatch) { printf( "\n" ); } Stg_Class_Delete( dictionary ); Stg_Class_Delete( meshDecomp ); Stg_Class_Delete( nLayout ); Stg_Class_Delete( nTopology ); Stg_Class_Delete( eLayout ); DiscretisationMesh_Finalise(); DiscretisationShape_Finalise(); DiscretisationGeometry_Finalise(); Base_Finalise(); /* Close off MPI */ MPI_Finalize(); return 0; }
int main( int argc, char *argv[] ) { int numprocs, myid, server, workerid, ranks[1], request, i, iter, ix, iy, done; long rands[CHUNKSIZE], max, in, out, totalin, totalout; double x, y, Pi, error, epsilon; MPI_Comm world, workers; MPI_Group world_group, worker_group; MPI_Status status; MPI_Init( &argc, &argv ); world = MPI_COMM_WORLD; MPI_Comm_size( world, &numprocs ); MPI_Comm_rank( world, &myid ); server = numprocs-1; // Last process is a random server /*** * Now Master should read epsilon from command line * and distribute it to all processes. */ if (myid == 0) // Read epsilon from command line sscanf( argv[1], "%lf", &epsilon ); MPI_Bcast( &epsilon, 1, MPI_DOUBLE, 0, MPI_COMM_WORLD ); /*** * Create new process group called world_group containing all * processes and its communicator called world * and a group called worker_group containing all processes * except the last one (called here server) * and its communicator called workers. */ MPI_Comm_group( world, &world_group ); ranks[0] = server; MPI_Group_excl( world_group, 1, ranks, &worker_group ); MPI_Comm_create( world, worker_group, &workers ); MPI_Group_free( &worker_group ); MPE_XGraph graph; MPE_Open_graphics(&graph,MPI_COMM_WORLD,(char*)0, -1,-1,WINDOW_SIZE,WINDOW_SIZE,MPE_GRAPH_INDEPENDENT); /*** * Server part * * Server should loop until request code is 0, in each iteration: * - receiving request code from any slave * - generating a vector of CHUNKSIZE randoms <= INT_MAX * - sending vector back to slave */ if (myid == server) { // I am the random generator server do { MPI_Recv( &request, 1, MPI_INT, MPI_ANY_SOURCE, REQUEST, world, &status ); if (request) { for (i = 0; i < CHUNKSIZE; ) { rands[i] = rand(); if ( rands[i] <= INT_MAX ) i++; } MPI_Send( rands, CHUNKSIZE, MPI_LONG, status.MPI_SOURCE, REPLY, world ); } } while( request > 0 ); } /*** * Workers (including Master) part * * Worker should send initial request to server. * Later, in a loop worker should: * - receive vector of randoms * - compute x,y point inside unit square * - check (and count result) if point is inside/outside * unit circle * - sum both counts over all workers * - calculate pi and its error (from "exact" value) * - test if error is within epsilon limit * - test continuation condition (error and max. points limit) * - print pi by master only * - send a request to server (all if more or master only if finish) * Before finishing workers should free their communicator. */ else { // I am a worker process request = 1; done = 0; in = out = 0; max = INT_MAX; // max int, for normalization MPI_Send( &request, 1, MPI_INT, server, REQUEST, world ); MPI_Comm_rank( workers, &workerid ); iter = 0; while (!done) { iter++; request = 1; MPI_Recv( rands, CHUNKSIZE, MPI_LONG, server, REPLY, world, &status ); for (i = 0; i < CHUNKSIZE - 1; ) { x = (((double) rands[i++])/max) * 2 - 1; y = (((double) rands[i++])/max) * 2 - 1; if ( x*x + y*y < 1.0 ) { MPE_Draw_point(graph,(int)(WINDOW_SIZE/2+x*WINDOW_SIZE/2),(int)(WINDOW_SIZE+y*WINDOW_SIZE/2),MPE_RED); in++; } else out++; } MPI_Allreduce( &in, &totalin, 1, MPI_LONG, MPI_SUM, workers ); MPI_Allreduce( &out, &totalout, 1, MPI_LONG, MPI_SUM, workers ); Pi = ( 4.0 * totalin ) / ( totalin + totalout ); error = fabs( Pi - PI ); done = ( error < epsilon || (totalin + totalout) > THROW_MAX ); request = (done) ? 0 : 1; MPE_Update(graph); if (myid == 0) { printf( "\rpi = %23.20f", Pi ); MPI_Send( &request, 1, MPI_INT, server, REQUEST, world ); } else { if (request) MPI_Send( &request, 1, MPI_INT, server, REQUEST, world ); } } MPI_Comm_free( &workers ); } /*** * Master should print final point counts. */ if (myid == 0) { printf( "\npoints: %ld\nin: %ld, out: %ld, <ret> to exit\n", totalin+totalout, totalin, totalout ); getchar(); } MPE_Close_graphics(graph); MPI_Finalize(); return 0; }
int main(int argc, char *argv[]) { int rank, nprocs; MPI_Init(&argc, &argv); MPI_Comm_rank(MPI_COMM_WORLD, &rank); MPI_Comm_size(MPI_COMM_WORLD, &nprocs); int m, n, m_At, n_At, nnz, k = 200, p = 200, display = 10; int len, begin, end, s_indx, t_s_indx; std::map<int, int> word_kind; FILE *fp, *fp2; douban::option_parser cmd_parser; cmd_parser.parse_all(argc, argv); fp = fopen("../data/pwtk.bin", "rb"); fread(&m, sizeof(int), 1, fp); fread(&n, sizeof(int), 1, fp); fread(&nnz, sizeof(int), 1, fp); fp2 = fopen("../data/pwtk_trans.bin", "rb"); fread(&m_At, sizeof(int), 1, fp2); fread(&n_At, sizeof(int), 1, fp2); fread(&nnz, sizeof(int), 1, fp2); douban::mat_container<double> U(m, k); douban::mat_container<double> V(n, k); douban::vec_container<double> S(k); std::vector<size_t> ar, ac, Ap, Ai, atr, atc, Atp, Ati; std::vector<double> av, atv, Av, Atv; len = nnz / nprocs; begin = rank * len; end = (rank + 1) * len; if(rank == nprocs - 1) { end = nnz; len = nnz - begin; } std::cout << len << std::endl; ar.resize(len); ac.resize(len); av.resize(len); atr.resize(len); atc.resize(len); atv.resize(len); fseek(fp, (begin * (sizeof(int)*2 + sizeof(double))), SEEK_CUR); for(size_t i = 0; i < (size_t)len; ++i) { fread(&(ar[i]), sizeof(int), 1, fp); fread(&(ac[i]), sizeof(int), 1, fp); fread(&(av[i]), sizeof(double), 1, fp); } fseek(fp2, (begin * (sizeof(int)*2 + sizeof(double))), SEEK_CUR); for(size_t i = 0; i < (size_t)len; ++i) { fread(&(atr[i]), sizeof(int), 1, fp2); fread(&(atc[i]), sizeof(int), 1, fp2); fread(&(atv[i]), sizeof(double), 1, fp2); } m = 0; m_At = 0; for(size_t i = 0; i < (size_t)len; ++i) { if(ar[i] != ar[i+1]) m++; if(atr[i] != atr[i+1]) m_At++; } /* m = ar[len - 1] - ar[0] + 1; m_At = atr[len - 1] - atr[0] + 1; */ std::cout << "local m is " << m << std::endl; std::cout << "local m_At is " << m_At << std::endl; s_indx = ar[0]; t_s_indx = atr[0]; if(rank != 0) { for(size_t i = 0; i < (size_t)len; ++i) ar[i] -= s_indx; for(size_t i = 0; i < (size_t)len; ++i) atr[i] -= t_s_indx; } Ap.resize(m + 1); Av.resize(len); Ai.resize(len); Atp.resize(m_At + 1); Atv.resize(len); Ati.resize(len); douban::coo_to_csr((size_t)m, (size_t)n, (size_t)len, av, ar, ac, Av, Ap, Ai); douban::coo_to_csr((size_t)m_At, (size_t)n_At, (size_t)len, atv, atr, atc, Atv, Atp, Ati); auto A = douban::make_mat_csr((size_t)m, (size_t)n, &Av, &Ap, &Ai); auto At = douban::make_mat_csr((size_t)m_At, (size_t)n_At, &Atv, &Atp, &Ati); douban::linalg::svd_tr(A, At, U, S, V, p, 1.e-7, display, s_indx, t_s_indx); //douban::linalg::svd_tr(douban::linalg::make_gemv_ax(&A), douban::linalg::make_gemv_ax(&At), U, S, V, p, 1.e-7, display, s_indx, t_s_indx); //std::cout << "svd_tr finished!" << std::endl; //cost = douban::linalg::svd_cost(A, U, S, V); //std::cout << "cost of SVD is " << cost << std::endl; for(size_t i = 0; i < (size_t)k; i++) std::cout << "S(i) is" << S.get(i) << std::endl; fclose(fp); fclose(fp2); MPI_Finalize(); return 0; }
int main(int argc, char **argv) { double** u = matrix(N, N); double** un = matrix(N, N); int ncpu, rank; MPI_Init(&argc, &argv); MPI_Comm_size(MPI_COMM_WORLD, &ncpu); MPI_Comm_rank(MPI_COMM_WORLD, &rank); int ncols = N / ncpu; int beg = rank*ncols + min(rank, N % ncpu); int end = (rank+1)*ncols + min(rank+1, N % ncpu); std::cout << rank << "\t" << beg << "\t" << end << "\n"; // Pocatecni podminka for (int i=beg; i<end; i++) for (int j=0; j<N; j++) u[i][j] = 0; // Okrajova podminka na dolni stene for (int i=beg; i<end; i++) { double x = (double)i / (N-1); u[i][0] = 4*x*(1-x); } Timer stopky("vypocet"); // Iterace stopky.start(); for (int n=0; n<10000; n++) { // Vymena dat na okrajich pasu update_ghost_zones(u,beg,end,rank,ncpu); // Vypocet pro vnitrni body for (int i=max(1,beg); i<min(N-1,end); i++) for (int j=1; j<N-1; j++) un[i][j] = (u[i-1][j] + u[i+1][j] + u[i][j-1] + u[i][j+1]) / 4; // Kopirovani un do u for (int i=max(1,beg); i<min(N-1,end); i++) for (int j=1; j<N-1; j++) u[i][j] = un[i][j]; // Tisk na obrazovku if (beg <= N/2 && N/2 < end) if (n%100 == 0) std::cout << "n=" << n << "\t u(0.5,0.5)="<<u[N/2][N/2]<<"\n"; } stopky.stop(); MPI_Finalize(); return 0; }
int main(int argc, char *argv[]) { MPI_Status status; int num, rank, size, tag, next, from; if (argc != 2) { printf("appel : nom du programme nbre de tours \n"); exit(-1); } /* Start up MPI */ MPI_Init(&argc, &argv); MPI_Comm_rank(MPI_COMM_WORLD, &rank); MPI_Comm_size(MPI_COMM_WORLD, &size); /* Arbitrarily choose 201 to be our tag. Calculate the */ /* rank of the next process in the ring. Use the modulus */ /* operator so that the last process "wraps around" to rank */ /* zero. */ tag = 201; next = (rank + 1) % size; from = (rank + size - 1) % size; /* If we are the "console" process, get a integer from the */ /* user to specify how many times we want to go around the */ /* ring */ if (rank == 0) { num = atoi (argv[1]); printf("Process %d sending %d to %d\n", rank, num, next); MPI_Send(&num, 1, MPI_INT, next, tag, MPI_COMM_WORLD); } /* Pass the message around the ring. The exit mechanism works */ /* as follows: the message (a positive integer) is passed */ /* around the ring. */ while (1) { MPI_Recv(&num, 1, MPI_INT, from, tag, MPI_COMM_WORLD, &status); printf("Process %d received %d\n", rank, num); if (rank == 0) { num--; printf("Process 0 decremented num\n"); } if (num == 0) { printf("Process %d exiting\n", rank); break; } printf("Process %d sending %d to %d\n", rank, num, next); MPI_Send(&num, 1, MPI_INT, next, tag, MPI_COMM_WORLD); } /* The last process does one extra send to process 0, which needs */ /* to be received before the program can exit */ if (rank == 0) MPI_Send(&num, 1, MPI_INT, next, tag, MPI_COMM_WORLD); /* Quit */ MPI_Finalize(); return 0; }
int main(int argc, char** argv) { // Establece el tiempo inicial del programa clock_t t_start = clock(); int rank; int numtasks; int i; int stride; int vector[MAX]; for(i = 1; i <= 100; i++) vector[ i - 1 ] = i; MPI_Init(&argc,&argv); MPI_Comm_size(MPI_COMM_WORLD, &numtasks); MPI_Comm_rank(MPI_COMM_WORLD, &rank); stride = MAX/(numtasks); //printf("Stride: %d\n", stride); int vtmp[stride]; int disp[stride]; int sendcount[stride]; int acum; for(i = 0; i < numtasks; i++) { disp[i] = i * stride; sendcount[i] = stride; } // &vector: desde donde se van a tomar los datos // sendcount: cuantos datos voy a enviar // disp: cuanto es el desplazamiento relativo a sendbuff, apartir del cual toma valores el proceso i // sendtype: el tipo de dato que voy a enviar // &recvbuff: donde se van almacenar los datos // recvcount: cuantos datos va a recibir // recvtype: el tipo de dato que va a recibir // root: quien origina la distribucion de los datos // comm: comunicador de procesos // MPI_Scatterv(&sendbuff, sendcount, sendtype, &recvbuff, recvcount, recvtype, root, comm) MPI_Scatterv(vector, sendcount, disp, MPI_INT, vtmp, stride, MPI_INT, 0, MPI_COMM_WORLD); acum = 0; for(i = 0; i < stride; i++) { acum += vtmp[i]; } printf("Subtotal %d en nodo %d\n", acum, rank); MPI_Reduce(&acum, vtmp, 1, MPI_INT, MPI_SUM, 0, MPI_COMM_WORLD); if(rank == 0) { printf("TOTAL: %d\n", vtmp[0]); // Establece el tiempo final del programa clock_t t_end = clock(); // Tiempo de ejecucion del programa clock_t t_run = t_end - t_start; printf ("Tiempo de Ejecucion: (%f segundos).\n",((float)t_run)/CLOCKS_PER_SEC); } MPI_Finalize(); return 0; }
int main (int argc, char **argv) { MPI_Init (&argc, &argv); GetPot cl (argc, argv); if (cl.search (2, "-h", "--help")) { std::cerr << help_text << std::endl; return 0; } const double a = cl.follow (double (0.0), "-a"); const double b = cl.follow (double (1.0), "-b"); const unsigned int nnodes = cl.follow (100, 2, "-n", "--nnodes"); const unsigned int nel = nnodes - 1; const std::string diffusion = cl.follow ("1.0", 2, "-d", "--diffusion"); const std::string forcing = cl.follow ("1.0", 2, "-f", "--forcing"); const double L = b - a; constexpr double tol = 1e-6; constexpr unsigned int maxit = 100; constexpr unsigned int overlap = 100; MPI_Status status; int mpi_size, mpi_rank, tag; MPI_Comm_size (MPI_COMM_WORLD, &mpi_size); MPI_Comm_rank (MPI_COMM_WORLD, &mpi_rank); const double L_loc = L / double(mpi_size); const double h = L_loc / ceil (double(nel) / double(mpi_size)); double a_loc = .0; double lval = .0; double b_loc = .0; double rval = .0; double buffer = .0; unsigned int nel_loc = 0; unsigned int ndof_loc = 1; fem_1d<double> *subproblems; coeff<double> a_coeff (diffusion); coeff<double> f_coeff (forcing); a_loc = a + mpi_rank * L_loc; b_loc = a_loc + L_loc; nel_loc = ceil (double(nel) / double(mpi_size)); if (mpi_rank > 0) { a_loc -= overlap * h; nel_loc += overlap; } if (mpi_rank < mpi_size - 1) { b_loc += overlap * h; nel_loc += overlap; } ndof_loc = nel_loc + 1; subproblems = new fem_1d<double> (new mesh<double> (a_loc, b_loc, ndof_loc)); subproblems->set_diffusion_coefficient (a_coeff); subproblems->set_source_coefficient (f_coeff); subproblems->assemble (); subproblems->set_dirichlet (fem_1d<double>::left_boundary, 0.0); subproblems->set_dirichlet (fem_1d<double>::right_boundary, 0.0); subproblems->solve (); for (unsigned int it = 0; it < maxit; ++it) { // With the following implementation // communication will occur sequentailly // left to right first then right to left // Receive from left neighbour if (mpi_rank > 0) { std::cerr << "rank " << mpi_rank << " receiving lval from rank " << mpi_rank - 1 << std::endl; MPI_Recv (&buffer, 1, MPI_DOUBLE, mpi_rank - 1, MPI_ANY_TAG, MPI_COMM_WORLD, &status); std::cerr << "rank " << mpi_rank << " received lval from rank " << mpi_rank - 1 << std::endl; lval = buffer; } tag = 10*mpi_rank; // Send to right neighbour if (mpi_rank < mpi_size - 1) { buffer = subproblems->result () [ndof_loc - 1 - 2*overlap]; std::cerr << "rank " << mpi_rank << " sending lval to rank " << mpi_rank + 1 << std::endl; MPI_Send (&buffer, 1, MPI_DOUBLE, mpi_rank + 1, tag, MPI_COMM_WORLD); std::cerr << "rank " << mpi_rank << " sent lval to rank " << mpi_rank + 1 << std::endl; } // Receive from right neighbour if (mpi_rank < mpi_size - 1) { std::cerr << "rank " << mpi_rank << " receiving rval from rank " << mpi_rank + 1 << std::endl; MPI_Recv (&buffer, 1, MPI_DOUBLE, mpi_rank + 1, MPI_ANY_TAG, MPI_COMM_WORLD, &status); std::cerr << "rank " << mpi_rank << " received rval from rank " << mpi_rank + 1 << std::endl; rval = buffer; } tag = 10*mpi_rank + 1; // Send to right neighbour if (mpi_rank > 0) { buffer = subproblems->result () [2*overlap]; std::cerr << "rank " << mpi_rank << " sending rval to rank " << mpi_rank - 1 << std::endl; MPI_Send (&buffer, 1, MPI_DOUBLE, mpi_rank - 1, tag, MPI_COMM_WORLD); std::cerr << "rank " << mpi_rank << " sent rval to rank " << mpi_rank - 1 << std::endl; } subproblems->set_dirichlet (fem_1d<double>::left_boundary, lval); subproblems->set_dirichlet (fem_1d<double>::right_boundary, rval); subproblems->solve (); } for (int rank = 0; rank < mpi_size; ++rank) { if (rank == mpi_rank) for (unsigned int ii = 0; ii < ndof_loc; ++ii) std::cout << subproblems->m->nodes[ii] << " " << subproblems->result ()(ii, 0) << std::endl; MPI_Barrier (MPI_COMM_WORLD); } MPI_Finalize (); return 0; };
int main(int argc, char **argv) { char *env = NULL; simulation_data sim; simulation_data_ctor(&sim); #ifdef PARALLEL /* Initialize MPI */ MPI_Init(&argc, &argv); /* Create a new communicator. */ if (MPI_Comm_dup(MPI_COMM_WORLD, &sim.par_comm) != MPI_SUCCESS) sim.par_comm = MPI_COMM_WORLD; MPI_Comm_rank (sim.par_comm, &sim.par_rank); MPI_Comm_size (sim.par_comm, &sim.par_size); #endif /* Initialize environment variables. */ SimulationArguments(argc, argv); #ifdef PARALLEL /* Install callback functions for global communication. */ VisItSetBroadcastIntFunction2(visit_broadcast_int_callback, (void*)&sim); VisItSetBroadcastStringFunction2(visit_broadcast_string_callback, (void*)&sim); /* Tell libsim whether the simulation is parallel. */ VisItSetParallel(sim.par_size > 1); VisItSetParallelRank(sim.par_rank); /* Tell libsim which communicator to use. You must pass the address of * an MPI_Comm object. */ VisItSetMPICommunicator((void *)&sim.par_comm); #endif /* Only read the environment on rank 0. This could happen before MPI_Init if * we are using an MPI that does not like to let us spawn processes but we * would not know our processor rank. */ if(sim.par_rank == 0) env = VisItGetEnvironment(); /* Pass the environment to all other processors collectively. */ VisItSetupEnvironment2(env); if(env != NULL) free(env); /* Write out .sim file that VisIt uses to connect. Only do it * on processor 0. */ /* CHANGE 3 */ if(sim.par_rank == 0) { /* Write out .sim file that VisIt uses to connect. */ VisItInitializeSocketAndDumpSimFile( #ifdef PARALLEL "multiblock_par", #else "multiblock", #endif "Demonstrates multiple blocks (collections of domains) and " "reduced/enhanced connectivity via domain boundaries.", "/path/to/where/sim/was/started", NULL, NULL, SimulationFilename()); } /* Read input problem setup, geometry, data.*/ read_input_deck(); /* Call the main loop. */ mainloop(&sim); simulation_data_dtor(&sim); #ifdef PARALLEL MPI_Finalize(); #endif return 0; }
int main(int argc, char *argv[]) { #ifdef HAVE_MPI MPI_Init(&argc, &argv); Epetra_MpiComm Comm(MPI_COMM_WORLD); #else Epetra_SerialComm Comm; #endif // set global dimension of the matrix to 5, could be any number int NumGlobalElements = 5; // create a map Epetra_Map Map(NumGlobalElements, 0, Comm); // local number of rows int NumMyElements = Map.NumMyElements(); // get update list int * MyGlobalElements = Map.MyGlobalElements( ); // Create an integer vector NumNz that is used to build the Petra Matrix. // NumNz[i] is the Number of OFF-DIAGONAL term for the ith global equation // on this processor int* NumNz = new int[NumMyElements]; // We are building a tridiagonal matrix where each row has (-1 2 -1) // So we need 2 off-diagonal terms (except for the first and last equation) for (int i = 0; i < NumMyElements; i++) if (MyGlobalElements[i]==0 || MyGlobalElements[i] == NumGlobalElements-1) NumNz[i] = 2; else NumNz[i] = 3; // Create a Epetra_Matrix Epetra_CrsMatrix A(Copy,Map,NumNz); // (NOTE: constructor `Epetra_CrsMatrix A(Copy,Map,3);' was ok too.) // Add rows one-at-a-time // Need some vectors to help // Off diagonal Values will always be -1, diagonal term 2 double* Values = new double[2]; Values[0] = -1.0; Values[1] = -1.0; int* Indices = new int[2]; double two = 2.0; int NumEntries; for (int i = 0 ; i < NumMyElements; ++i) { if (MyGlobalElements[i] == 0) { Indices[0] = 1; NumEntries = 1; } else if (MyGlobalElements[i] == NumGlobalElements-1) { Indices[0] = NumGlobalElements - 2; NumEntries = 1; } else { Indices[0] = MyGlobalElements[i] - 1; Indices[1] = MyGlobalElements[i] + 1; NumEntries = 2; } A.InsertGlobalValues(MyGlobalElements[i], NumEntries, Values, Indices); // Put in the diagonal entry A.InsertGlobalValues(MyGlobalElements[i], 1, &two, MyGlobalElements+i); } // Finish up, trasforming the matrix entries into local numbering, // to optimize data transfert during matrix-vector products A.FillComplete(); // build up two distributed vectors q and z, and compute // q = A * z Epetra_Vector q(A.RowMap()); Epetra_Vector z(A.RowMap()); // Fill z with 1's z.PutScalar(1.0); A.Multiply(false, z, q); // Compute q = A*z double dotProduct; z.Dot(q, &dotProduct); if (Comm.MyPID() == 0) cout << "q dot z = " << dotProduct << endl; #ifdef HAVE_MPI MPI_Finalize(); #endif delete NumNz; return( EXIT_SUCCESS ); } /* main */
int main (int argc, char **argv) { int nprocs = -1; int rank = -1; MPI_Comm comm = MPI_COMM_WORLD; char processor_name[128]; int namelen = 128; int buf[BUF_SIZE * 2]; int i, j, k, index, outcount, flag; int indices[2]; MPI_Request aReq[2]; MPI_Status aStatus[2]; /* init */ MPI_Init (&argc, &argv); MPI_Comm_size (comm, &nprocs); MPI_Comm_rank (comm, &rank); MPI_Get_processor_name (processor_name, &namelen); printf ("(%d) is alive on %s\n", rank, processor_name); fflush (stdout); if (rank == 0) { /* set up persistent sends... */ MPI_Send_init (&buf[0], BUF_SIZE, MPI_INT, 1, 0, comm, &aReq[0]); MPI_Send_init (&buf[BUF_SIZE], BUF_SIZE, MPI_INT, 1, 1, comm, &aReq[1]); /* initialize the send buffers */ for (i = 0; i < BUF_SIZE; i++) { buf[i] = i; buf[BUF_SIZE + i] = BUF_SIZE - 1 - i; } } for (k = 0; k < 4; k++) { if (rank == 1) { /* zero out the receive buffers */ bzero (buf, sizeof(int) * BUF_SIZE * 2); } MPI_Barrier(MPI_COMM_WORLD); if (rank == 0) { /* start the persistent sends... */ if (k % 2) { MPI_Startall (2, &aReq[0]); } else { for (j = 0; j < 2; j++) { MPI_Start (&aReq[j]); } } /* complete the sends */ if (k < 2) /* use MPI_Wait */ for (j = 0; j < 2; j++) MPI_Wait (&aReq[j], &aStatus[j]); else /* use MPI_Waitall */ MPI_Waitall (2, aReq, aStatus); } else if (rank == 1) { /* set up receives for all of the sends */ for (j = 0; j < 2; j++) { MPI_Irecv (&buf[j * BUF_SIZE], BUF_SIZE, MPI_INT, 0, j, comm, &aReq[j]); } /* complete all of the receives... */ MPI_Waitall (2, aReq, aStatus); } } MPI_Barrier(MPI_COMM_WORLD); if (rank == 0) { /* free the persistent requests */ for (i = 0 ; i < 2; i++) { MPI_Request_free (&aReq[i]); } } MPI_Finalize (); printf ("(%d) Finished normally\n", rank); }
int main(int argc, char *argv[]) { #ifdef ENABLE_INTEL_FLOATING_POINT_EXCEPTIONS cout << "NOTE: enabling floating point exceptions for divide by zero.\n"; _MM_SET_EXCEPTION_MASK(_MM_GET_EXCEPTION_MASK() & ~_MM_MASK_INVALID); #endif Teuchos::GlobalMPISession mpiSession(&argc, &argv); int rank = Teuchos::GlobalMPISession::getRank(); Teuchos::CommandLineProcessor cmdp(false,true); // false: don't throw exceptions; true: do return errors for unrecognized options bool useCondensedSolve = false; // condensed solve not yet compatible with minimum rule meshes int numGridPoints = 32; // in x,y -- idea is to keep the overall order of approximation constant int k = 4; // poly order for u double theta = 0.5; int numTimeSteps = 2000; int numCells = -1; // in x, y (-1 so we can set a default if unset from the command line.) int numFrames = 50; int delta_k = 2; // test space enrichment: should be 2 for 2D bool useMumpsIfAvailable = true; bool convertSolutionsToVTK = false; // when true assumes we've already run with precisely the same options, except without VTK support (so we have a bunch of .soln files) bool usePeriodicBCs = false; bool useConstantConvection = false; cmdp.setOption("polyOrder",&k,"polynomial order for field variable u"); cmdp.setOption("delta_k", &delta_k, "test space polynomial order enrichment"); cmdp.setOption("numCells",&numCells,"number of cells in x and y directions"); cmdp.setOption("theta",&theta,"theta weight for time-stepping"); cmdp.setOption("numTimeSteps",&numTimeSteps,"number of time steps"); cmdp.setOption("numFrames",&numFrames,"number of frames for export"); cmdp.setOption("usePeriodicBCs", "useDirichletBCs", &usePeriodicBCs); cmdp.setOption("useConstantConvection", "useVariableConvection", &useConstantConvection); cmdp.setOption("useCondensedSolve", "useUncondensedSolve", &useCondensedSolve, "use static condensation to reduce the size of the global solve"); cmdp.setOption("useMumps", "useKLU", &useMumpsIfAvailable, "use MUMPS (if available)"); cmdp.setOption("convertPreComputedSolutionsToVTK", "computeSolutions", &convertSolutionsToVTK); if (cmdp.parse(argc,argv) != Teuchos::CommandLineProcessor::PARSE_SUCCESSFUL) { #ifdef HAVE_MPI MPI_Finalize(); #endif return -1; } bool saveSolutionFiles = true; if (numCells==-1) numCells = numGridPoints / k; if (rank==0) { cout << "solving on " << numCells << " x " << numCells << " mesh " << "of order " << k << ".\n"; } set<int> timeStepsToExport; timeStepsToExport.insert(numTimeSteps); int timeStepsPerFrame = numTimeSteps / (numFrames - 1); if (timeStepsPerFrame==0) timeStepsPerFrame = 1; for (int n=0; n<numTimeSteps; n += timeStepsPerFrame) { timeStepsToExport.insert(n); } int H1Order = k + 1; const static double PI = 3.141592653589793238462; double dt = 2 * PI / numTimeSteps; VarFactory varFactory; // traces: VarPtr qHat = varFactory.fluxVar("\\widehat{q}"); // fields: VarPtr u = varFactory.fieldVar("u", L2); // test functions: VarPtr v = varFactory.testVar("v", HGRAD); FunctionPtr x = Function::xn(1); FunctionPtr y = Function::yn(1); FunctionPtr c; if (useConstantConvection) { c = Function::vectorize(Function::constant(0.5), Function::constant(0.5)); } else { c = Function::vectorize(y-0.5, 0.5-x); } // FunctionPtr c = Function::vectorize(y, x); FunctionPtr n = Function::normal(); BFPtr bf = Teuchos::rcp( new BF(varFactory) ); bf->addTerm(u / dt, v); bf->addTerm(- theta * u, c * v->grad()); // bf->addTerm(theta * u_hat, (c * n) * v); bf->addTerm(qHat, v); double width = 2.0, height = 2.0; int horizontalCells = numCells, verticalCells = numCells; double x0 = -0.5; double y0 = -0.5; if (usePeriodicBCs) { x0 = 0.0; y0 = 0.0; width = 1.0; height = 1.0; } BCPtr bc = BC::bc(); SpatialFilterPtr inflowFilter = Teuchos::rcp( new InflowFilterForClockwisePlanarRotation (x0,x0+width,y0,y0+height,0.5,0.5)); vector< PeriodicBCPtr > periodicBCs; if (! usePeriodicBCs) { // bc->addDirichlet(u_hat, SpatialFilter::allSpace(), Function::zero()); bc->addDirichlet(qHat, inflowFilter, Function::zero()); // zero BCs enforced at the inflow boundary. } else { periodicBCs.push_back(PeriodicBC::xIdentification(x0, x0+width)); periodicBCs.push_back(PeriodicBC::yIdentification(y0, y0+height)); } MeshPtr mesh = MeshFactory::quadMeshMinRule(bf, H1Order, delta_k, width, height, horizontalCells, verticalCells, false, x0, y0, periodicBCs); FunctionPtr u0 = Teuchos::rcp( new Cone_U0(0.0, 0.25, 0.1, 1.0, usePeriodicBCs) ); RHSPtr initialRHS = RHS::rhs(); initialRHS->addTerm(u0 / dt * v); initialRHS->addTerm((1-theta) * u0 * c * v->grad()); IPPtr ip; // ip = Teuchos::rcp( new IP ); // ip->addTerm(v); // ip->addTerm(c * v->grad()); ip = bf->graphNorm(); // create two Solution objects; we'll switch between these for time steps SolutionPtr soln0 = Solution::solution(mesh, bc, initialRHS, ip); soln0->setCubatureEnrichmentDegree(5); FunctionPtr u_soln0 = Function::solution(u, soln0); FunctionPtr qHat_soln0 = Function::solution(qHat, soln0); RHSPtr rhs1 = RHS::rhs(); rhs1->addTerm(u_soln0 / dt * v); rhs1->addTerm((1-theta) * u_soln0 * c * v->grad()); SolutionPtr soln1 = Solution::solution(mesh, bc, rhs1, ip); soln1->setCubatureEnrichmentDegree(5); FunctionPtr u_soln1 = Function::solution(u, soln1); FunctionPtr qHat_soln1 = Function::solution(qHat, soln1); RHSPtr rhs2 = RHS::rhs(); // after the first solve on soln0, we'll swap out initialRHS for rhs2 rhs2->addTerm(u_soln1 / dt * v); rhs2->addTerm((1-theta) * u_soln1 * c * v->grad()); Teuchos::RCP<Solver> solver = Teuchos::rcp( new KluSolver ); #ifdef HAVE_AMESOS_MUMPS if (useMumpsIfAvailable) solver = Teuchos::rcp( new MumpsSolver ); #endif // double energyErrorSum = 0; ostringstream filePrefix; filePrefix << "convectingCone_k" << k << "_t"; int frameNumber = 0; #ifdef USE_HDF5 ostringstream dir_name; dir_name << "convectingCone_k" << k; HDF5Exporter exporter(mesh,dir_name.str()); #endif #ifdef USE_VTK VTKExporter soln0Exporter(soln0,mesh,varFactory); VTKExporter soln1Exporter(soln1,mesh,varFactory); #endif if (convertSolutionsToVTK) { #ifdef USE_VTK if (rank==0) { cout << "Converting .soln files to VTK.\n"; for (int frameNumber=0; frameNumber<=numFrames; frameNumber++) { ostringstream filename; filename << filePrefix.str() << frameNumber << ".soln"; soln0->readFromFile(filename.str()); filename.str(""); filename << filePrefix.str() << frameNumber; soln0Exporter.exportFields(filename.str()); } } #else if (rank==0) cout << "Driver was built without USE_VTK defined. This must be defined to convert solution files to VTK files.\n"; #endif exit(0); } if (timeStepsToExport.find(0) != timeStepsToExport.end()) { map<int,FunctionPtr> solnMap; solnMap[u->ID()] = u0; // project field variables if (rank==0) cout << "About to project initial solution onto mesh.\n"; soln0->projectOntoMesh(solnMap); if (rank==0) cout << "...projected initial solution onto mesh.\n"; ostringstream filename; filename << filePrefix.str() << frameNumber++; if (rank==0) cout << "About to export initial solution.\n"; #ifdef USE_VTK if (rank==0) soln0Exporter.exportFields(filename.str()); #endif #ifdef USE_HDF5 exporter.exportSolution(soln0, varFactory,0); #endif if (saveSolutionFiles) { if (rank==0) { filename << ".soln"; soln0->writeToFile(filename.str()); cout << endl << "wrote " << filename.str() << endl; } } if (rank==0) cout << "...exported initial solution.\n"; } if (rank==0) cout << "About to solve initial time step.\n"; // first time step: soln0->setReportTimingResults(true); // added to gain insight into why MPI blocks in some cases on the server... if (useCondensedSolve) soln0->condensedSolve(solver); else soln0->solve(solver); soln0->setReportTimingResults(false); // energyErrorSum += soln0->energyErrorTotal(); soln0->setRHS(rhs2); if (rank==0) cout << "Solved initial time step.\n"; if (timeStepsToExport.find(1) != timeStepsToExport.end()) { ostringstream filename; filename << filePrefix.str() << frameNumber++; #ifdef USE_VTK if (rank==0) soln0Exporter.exportFields(filename.str()); #endif #ifdef USE_HDF5 exporter.exportSolution(soln0, varFactory); #endif if (saveSolutionFiles) { if (rank==0) { filename << ".soln"; soln0->writeToFile(filename.str()); cout << endl << "wrote " << filename.str() << endl; } } } bool reportTimings = false; for (int n=1; n<numTimeSteps; n++) { bool odd = (n%2)==1; SolutionPtr soln_n = odd ? soln1 : soln0; if (useCondensedSolve) soln_n->solve(solver); else soln_n->solve(solver); if (reportTimings) { if (rank==0) cout << "time step " << n << ", timing report:\n"; soln_n->reportTimings(); } if (rank==0) { cout << "\x1B[2K"; // Erase the entire current line. cout << "\x1B[0E"; // Move to the beginning of the current line. cout << "Solved time step: " << n; flush(cout); } if (timeStepsToExport.find(n+1)!=timeStepsToExport.end()) { ostringstream filename; filename << filePrefix.str() << frameNumber++; #ifdef USE_VTK if (rank==0) { if (odd) { soln1Exporter.exportFields(filename.str()); } else { soln0Exporter.exportFields(filename.str()); } } #endif #ifdef USE_HDF5 double t = n * dt; if (odd) { exporter.exportSolution(soln1, varFactory, t); } else { exporter.exportSolution(soln0, varFactory, t); } #endif if (saveSolutionFiles) { if (rank==0) { filename << ".soln"; if (odd) { soln1->writeToFile(filename.str()); } else { soln0->writeToFile(filename.str()); } cout << endl << "wrote " << filename.str() << endl; } } } // energyErrorSum += soln_n->energyErrorTotal(); } // if (rank==0) cout << "energy error, sum over all time steps: " << energyErrorSum << endl; return 0; }
int main(int argc, char **argv) { // MPI init MPI_Init(&argc, &argv); MPI_Comm_size(MPI_COMM_WORLD, &mpi_tasks); MPI_Comm_rank(MPI_COMM_WORLD, &mpi_rank); // See if we've been given a seed to use (for testing purposes). When you // specify a random seed, the evolution will be exactly the same each time // you use that seed number unsigned int seed = 0; for(int i=1 ; i<argc ; i++) if(strcmp(argv[i++],"seed") == 0) seed = atoi(argv[i]); // Declare variables for the GA parameters and set them to some default values. int popsize = 2; // Population int ngen = 2; // Generations float pmut = 0.03; float pcross = 0.65; // popsize / mpi_tasks must be an integer popsize = mpi_tasks * int((double)popsize/(double)mpi_tasks+0.999); // Create the phenotype for two variables. The number of bits you can use to // represent any number is limited by the type of computer you are using. // For this case we use 10 bits for each var, ranging the square domain [0,5*PI]x[0,5*PI] ///GABin2DecPhenotype map; ///GABin2DecPhenotype map; ///map.add(10, 0.0, 5.0 * M_PI); ///map.add(10, 0.0, 5.0 * M_PI); // Create the template genome using the phenotype map we just made. ///GABin2DecGenome genome(map, objective); //GA1DArrayGenome<double> genome(2, objective); GA1DArrayGenome<double> genome(3, dynamixObjective); // define own initializer, can do the same for mutator and comparator genome.initializer(::Initializer); // Now create the GA using the genome and run it. We'll use sigma truncation // scaling so that we can handle negative objective scores. GASimpleGA ga(genome); // TODO change to steady-state GALinearScaling scaling; ga.minimize(); // by default we want to minimize the objective ga.populationSize(popsize); ga.nGenerations(ngen); ga.pMutation(pmut); ga.pCrossover(pcross); ga.scaling(scaling); if(mpi_rank == 0) ga.scoreFilename("evolution.txt"); else ga.scoreFilename("/dev/null"); ga.scoreFrequency(1); ga.flushFrequency(1); ga.selectScores(GAStatistics::AllScores); // Pass MPI data to the GA class ga.mpi_rank(mpi_rank); ga.mpi_tasks(mpi_tasks); ga.evolve(seed); // Dump the GA results to file if(mpi_rank == 0) { genome = ga.statistics().bestIndividual(); printf("GA result:\n"); printf("x = %f, y = %f\n", genome.gene(0), genome.gene(1)); } MPI_Finalize(); return 0; }
int main(int argc, char *argv[]) { int i, OutputNumber = 0, d; int ni, ntot; char ParameterFile[MAXLINELENGTH]; if (argc == 1) PrintUsage (argv[0]); strcpy (ParameterFile, ""); for (i = 1; i < argc; i+=d) { d=1; if (*(argv[i]) == '+') { if (strspn (argv[i], \ "+S#D") \ != strlen (argv[i])) PrintUsage (argv[0]); if (strchr (argv[i], '#')) { d=2; ArrayNb = atoi(argv[i+1]); EarlyOutputRename = YES; if (ArrayNb <= 0) { masterprint ("Incorrect Array number after +# flag\n"); PrintUsage (argv[0]); } } if (strchr (argv[i], 'D')) { d=2; strcpy (DeviceFile, argv[i+1]); DeviceFileSpecified = YES; } if (strchr (argv[i], 'S')) { d=2; StretchNumber = atoi(argv[i+1]); StretchOldOutput = YES; } } if (*(argv[i]) == '-') { if (strspn (argv[i], \ "-tofCmkspSVBD0#") \ != strlen (argv[i])) PrintUsage (argv[0]); if (strchr (argv[i], 't')) TimeInfo = YES; if (strchr (argv[i], 'f')) ForwardOneStep = YES; if (strchr (argv[i], '0')) OnlyInit = YES; if (strchr (argv[i], 'C')) { EverythingOnCPU = YES; #ifdef GPU mastererr ("WARNING: Forcing execution of all functions on CPU\n"); #else mastererr ("WARNING: Flag -C meaningless for a CPU built\n"); #endif } if (strchr (argv[i], 'm')) { Merge = YES; } if (strchr (argv[i], 'k')) { Merge = NO; } if (strchr (argv[i], 'o')) { RedefineOptions = YES; ParseRedefinedOptions (argv[i+1]) ; d=2; } if (strchr (argv[i], 's')) { Restart = YES; d=2; NbRestart = atoi(argv[i+1]); if ((NbRestart < 0)) { masterprint ("Incorrect restart number\n"); PrintUsage (argv[0]); } } if (strchr (argv[i], '#')) { d=2; ArrayNb = atoi(argv[i+1]); if (ArrayNb <= 0) { masterprint ("Incorrect Array number after -# flag\n"); PrintUsage (argv[0]); } } if (strchr (argv[i], 'p')) { PostRestart = YES; } if (strchr (argv[i], 'S')) { Restart_Full = YES; d=2; NbRestart = atoi(argv[i+1]); if ((NbRestart < 0)) { masterprint ("Incorrect restart number\n"); PrintUsage (argv[0]); } } if (strchr (argv[i], 'V')) { Dat2vtk = YES; Restart_Full = YES; d=2; NbRestart = atoi(argv[i+1]); if ((NbRestart < 0)) { masterprint ("Incorrect output number\n"); PrintUsage (argv[0]); } } if (strchr (argv[i], 'B')) { Vtk2dat = YES; Restart_Full = YES; d=2; NbRestart = atoi(argv[i+1]); if ((NbRestart < 0)) { masterprint ("Incorrect output number\n"); PrintUsage (argv[0]); } } if (strchr (argv[i], 'D')) { d=2; DeviceManualSelection = atoi(argv[i+1]); } } else strcpy (ParameterFile, argv[i]); } #ifdef WRITEGHOSTS if (Merge == YES) { mastererr ("Cannot merge outputs when dumping ghost values.\n"); mastererr ("'make nofulldebug' could fix this problem.\n"); mastererr ("Using the -k flag could be another solution.\n"); prs_exit (1); } #endif if (ParameterFile[0] == 0) PrintUsage (argv[0]); #ifdef MPICUDA EarlyDeviceSelection(); #endif MPI_Init (&argc, &argv); MPI_Comm_rank (MPI_COMM_WORLD, &CPU_Rank); MPI_Comm_size (MPI_COMM_WORLD, &CPU_Number); CPU_Master = (CPU_Rank == 0 ? 1 : 0); #ifndef MPICUDA SelectDevice(CPU_Rank); #endif InitVariables (); MPI_Barrier(MPI_COMM_WORLD); ReadDefaultOut (); ReadVarFile (ParameterFile); if (strcmp (PLANETCONFIG, "NONE") != 0) ThereArePlanets = YES; if (ORBITALRADIUS > 1.0e-30){ YMIN *= ORBITALRADIUS; YMAX *= ORBITALRADIUS; DT *= sqrt(ORBITALRADIUS*ORBITALRADIUS*ORBITALRADIUS); } SubsDef (OUTPUTDIR, DefaultOut); /* This must be placed ***BEFORE*** reading the input files in case of a restart */ if ((ArrayNb) && (EarlyOutputRename == YES)) { i = strlen(OUTPUTDIR); if (OUTPUTDIR[i-1] == '/') OUTPUTDIR[i-1] = 0;//Remove trailing slash if any sprintf (OUTPUTDIR, "%s%06d/", OUTPUTDIR, ArrayNb); //Append numerical suffix /* There is no need to perform the wildcard (@) substitution. This has already been done */ printf ("\n\n***\n\nNew Output Directory is %s\n\n***\n\n", OUTPUTDIR); MakeDir(OUTPUTDIR); /*Create the output directory*/ } MakeDir(OUTPUTDIR); /*Create the output directory*/ #if !defined(X) NX = 1; #endif #if !defined(Y) NY = 1; #endif #if !defined(Z) NZ = 1; #endif SelectWriteMethod(); #if !defined(Y) && !defined(Z) if (CPU_Rank==1){ prs_error ("You cannot split a 1D mesh in x. Sequential runs only!"); } if (CPU_Number > 1) { MPI_Finalize(); prs_exit(EXIT_FAILURE); } #endif ListVariables ("variables.par"); //Writes all variables defined in set up ListVariablesIDL ("IDL.var"); ChangeArch(); /*Changes the name of the main functions ChangeArch adds _cpu or _gpu if GPU is activated.*/ split(&Gridd); /*Split mesh over PEs*/ InitSpace(); WriteDim(); InitSurfaces(); LightGlobalDev(); /* Copy light arrays to the device global memory */ CreateFields(); // Allocate all fields. Sys = InitPlanetarySystem(PLANETCONFIG); ListPlanets(); if(Corotating) OMEGAFRAME = GetPsysInfo(FREQUENCY); OMEGAFRAME0 = OMEGAFRAME; /* We need to keep track of initial azimuthal velocity to correct the target velocity in Stockholm's damping prescription. We copy the value above *after* rescaling, and after any initial correction to OMEGAFRAME (which is used afterwards to build the initial Vx field. */ if(Restart == YES || Restart_Full == YES) { CondInit (); //Needed even for restarts: some setups have custom //definitions (eg potential for setup MRI) or custom //scaling laws (eg. setup planetesimalsRT). begin_i = RestartSimulation(NbRestart); if (ThereArePlanets) { PhysicalTime = GetfromPlanetFile (NbRestart, 9, 0); OMEGAFRAME = GetfromPlanetFile (NbRestart, 10, 0); RestartPlanetarySystem (NbRestart, Sys); } } else { if (ThereArePlanets) EmptyPlanetSystemFiles (); CondInit(); // Initialize set up // Note: CondInit () must be called only ONCE (otherwise some // custom scaling laws may be applied several times). } if (StretchOldOutput == YES) { StretchOutput (StretchNumber); } FARGO_SAFE(comm(ENERGY)); //Very important for isothermal cases! /* This must be placed ***after*** reading the input files in case of a restart */ if ((ArrayNb) && (EarlyOutputRename == NO)) { i = strlen(OUTPUTDIR); if (OUTPUTDIR[i-1] == '/') OUTPUTDIR[i-1] = 0;//Remove trailing slash if any sprintf (OUTPUTDIR, "%s%06d/", OUTPUTDIR, ArrayNb); //Append numerical suffix /* There is no need to perform the wildcard (@) substitution. This has already been done */ printf ("\n\n***\n\nNew Output Directory is %s\n\n***\n\n", OUTPUTDIR); MakeDir(OUTPUTDIR); /*Create the output directory*/ ListVariables ("variables.par"); //Writes all variables defined in set up ListVariablesIDL ("IDL.var"); InitSpace(); WriteDim (); } DumpToFargo3drc(argc, argv); FillGhosts(PrimitiveVariables()); #ifdef STOCKHOLM FARGO_SAFE(init_stockholm()); #ifdef STOCKHOLMACC FARGO_SAFE(ComputeVymed(Vy)); FARGO_SAFE(ComputeRhomed(Density)); Write2D(Density0_avg, "density0_2d_avg.dat", OUTPUTDIR, GHOSTINC); Write2D(Vy0_avg, "vy0_2d_avg.dat", OUTPUTDIR, GHOSTINC); #endif #endif #ifdef GHOSTSX masterprint ("\n\nNew version with ghost zones in X activated\n"); #else masterprint ("Standard version with no ghost zones in X\n"); #endif #ifdef TIMER clock_t begin_timer_time, end_timer_time; real timer_time_elapsed; #endif ntot = NTOTINIT; for (ni = 0; ni<NITER; ni++) { // Iteration loop ntot = (ni == 0) ? NTOTINIT : NTOT; masterprint ("Start of %d iteration\n", ni); masterprint ("Evolving waves for %d DT (DT = %lg)\n", ntot,DT); for (i = begin_i; i<=ntot; i++) { // MAIN LOOP #ifdef TIMER if (i==begin_i) { begin_timer_time = clock(); } #endif if (NINTERM * (TimeStep = (i / NINTERM)) == i) { TimeStepIter = ni; #if defined(MHD) && defined(DEBUG) FARGO_SAFE(ComputeDivergence(Bx, By, Bz)); #endif if (ThereArePlanets) WritePlanetSystemFile(TimeStep, NO); #ifndef NOOUTPUTS WriteOutputsAndDisplay(ALL); if(CPU_Master) printf("OUTPUTS %d at date t = %f OK\n", TimeStep, PhysicalTime); #endif if (TimeInfo == YES) GiveTimeInfo (TimeStep); } if (NSNAP != 0) { if (NSNAP * (TimeStep = (i / NSNAP)) == i) { WriteOutputsAndDisplay(SPECIFIC); } } AlgoGas(FALSE); MonitorGlobal (MONITOR2D | MONITORY | MONITORY_RAW| \ MONITORSCALAR | MONITORZ | MONITORZ_RAW); if (ThereArePlanets) { WriteTorqueAndWork(TimeStep, 0); WritePlanetSystemFile(TimeStep, YES); SolveOrbits (Sys); } #ifdef TIMER if (i==begin_i) { end_timer_time = clock(); timer_time_elapsed =( (double)(end_timer_time-begin_timer_time))/CLOCKS_PER_SEC; masterprint("time for time_step was %g s\n",timer_time_elapsed); } #endif } masterprint ("End of %d iteration\n", ni); AlgoGas(TRUE); masterprint ("Computing steady state\n"); compute_steady_state(); add_avgs(); output_steady_state(ni); clear_averages(); } MPI_Finalize(); printf("End of simulation!\n"); return 0; }
int main(int argc, char* argv[]) { #ifdef BENCHMARKING benchmark(argc, argv); #else // mpi setup int numProcs; int rank, flag; int done = 0; MPI_Status status; MPI_Init(&argc, &argv); MPI_Comm_size(MPI_COMM_WORLD, &numProcs); MPI_Comm_rank(MPI_COMM_WORLD, &rank); // create a buffer for both worker and controller static double buffer[BUFFER_SIZE]; unsigned int niter = argc > 1 ? atoi(argv[1]) : NITER; // Setting up the PSF (statically) int psfWidth, psfHeight; double* psf = ImageQueue::getPsf(&psfWidth, &psfHeight); // ---------- CONTROLLER NODE ---------- // if (rank == 0) { // Set up producer ImageQueue images(buffer, BUFFER_SIZE, "../images", numProcs); // Print out some details int numImages = images.remaining(); FPRINT("Starting %d iteration(s) on %d image(s)", niter, numImages); PerfTimer mainTimer; mainTimer.begin(); int toSend = (unsigned int)numProcs < images.remaining() ? numProcs : images.remaining(); for (int i = 0; i < toSend; i++) { images.pop(i); MPI_Send(buffer, BUFFER_SIZE, MPI_DOUBLE, i, IMG, MPI_COMM_WORLD); } while (images.remaining() > 0) { for (int i = 0; i < numProcs; i++) { // If an image is received then save it and send the next one MPI_Iprobe(i, IMG, MPI_COMM_WORLD, &flag, &status); if (flag) { MPI_Recv(buffer, BUFFER_SIZE, MPI_DOUBLE, i, IMG, MPI_COMM_WORLD, &status); images.save(i); images.pop(i); MPI_Send(buffer, BUFFER_SIZE, MPI_DOUBLE, i, IMG, MPI_COMM_WORLD); } } } for (int i = 0; i < numProcs; i++) { MPI_Send(&done, 1, MPI_INT, i, END, MPI_COMM_WORLD); } FPRINT("Finished %d image(s) in %f seconds", numImages, mainTimer.getElapsed()); } // ---------- WORKER NODE ---------- // else { // worker thread // Set up consumer DeconvFilter filter(WIDTH, HEIGHT, niter, psf, psfWidth, psfHeight, buffer); bool running = true; PRINT("Worker thread initialised."); while (running) { MPI_Iprobe(0, IMG, MPI_COMM_WORLD, &flag, &status); if (flag) { // New image MPI_Recv(buffer, BUFFER_SIZE, MPI_DOUBLE, 0, IMG, MPI_COMM_WORLD, &status); filter.process(); MPI_Send(buffer, BUFFER_SIZE, MPI_DOUBLE, 0, IMG, MPI_COMM_WORLD); } MPI_Iprobe(0, END, MPI_COMM_WORLD, &flag, &status); if (flag) { // Execution finished MPI_Recv(&done, 1, MPI_INT, 0, END, MPI_COMM_WORLD, &status); running = false; } } PRINT("Worker thread finished."); } MPI_Finalize(); #endif return 0; }