void startMPItest(MPI_Comm comm,int verbose) { int bufSize=2*1024*1024; char *buf=malloc(bufSize); mpiComm msg; mpiComm *c=&msg; c->b.recv_fn=mpi_recv_fn; c->b.finish_fn=mpi_finish_fn; c->comm=comm; MPI_Comm_rank(c->comm,&c->myRank); MPI_Buffer_attach(buf,bufSize); // Run the Bsend test, which is non-blocking: c->b.send_fn=mpi_bsend_fn; c->doneFlag=0; msg_comm_test(&c->b, "MPI Bsend", c->myRank, verbose); // Run the Isend test, which may block: c->b.send_fn=mpi_isend_fn; c->doneFlag=0; c->nIsend=0; msg_comm_test(&c->b, "MPI Isend", c->myRank, verbose); while (!c->doneFlag) mpi_isend_poll(&c->b); MPI_Buffer_detach(buf,&bufSize); free(buf); }
void echange_buffer(struct graphe_t * graphe, char **msg_snd, int *taille_msg_snd, char **msg_rcv, int *taille_msg_rcv) { int rang, nbv, iv, vois, tag, buff_size; char *buff_mpi; MPI_Status sta; MPI_Comm_rank(MPI_COMM_WORLD, &rang); tag = 1000; nbv = graphe->nb_voisins[rang]; buff_size = 0; for( iv = 0 ; iv < nbv ; iv++ ) buff_size += MPI_BSEND_OVERHEAD + taille_msg_snd[iv]; buff_mpi = malloc(buff_size); MPI_Buffer_attach(buff_mpi, buff_size); //remaruqe: il faut séparer les 2 boucles for( iv = 0 ; iv < nbv ; iv++ ) { vois = graphe->voisins[rang][iv]; MPI_Bsend(msg_snd[iv], taille_msg_snd[iv], MPI_CHAR, vois, tag, MPI_COMM_WORLD); } for( iv = 0 ; iv < nbv ; iv++ ) { vois = graphe->voisins[rang][iv]; MPI_Recv(msg_rcv[iv], taille_msg_rcv[iv], MPI_CHAR, vois, tag, MPI_COMM_WORLD, &sta); } //si on appl buffer detach juste après la boucle de bsend ça peut posera des pbs MPI_Buffer_detach(&buff_mpi, &buff_size); free(buff_mpi); }
// MPI_TEST will be executed every this many seconds: so this determines the minimum time taken for every send operation!! //#define VERBOSE_MPISENDRECV int MpiNode::relion_MPI_Send(void *buf, int count, MPI_Datatype datatype, int dest, int tag, MPI_Comm comm) { int result; double start_time = MPI_Wtime(); #define ONLY_NORMAL_SEND #ifdef ONLY_NORMAL_SEND result = MPI_Send(buf, count, datatype, dest, tag, comm); if (result != MPI_SUCCESS) { report_MPI_ERROR(result); } #else // Only use Bsend for larger messages, otherwise use normal send if (count > 100) { int size; MPI_Pack_size( count, datatype, comm, &size ); char *membuff; // Allocate memory for the package to be sent int attach_result = MPI_Buffer_attach( malloc(size + MPI_BSEND_OVERHEAD ), size + MPI_BSEND_OVERHEAD ); if (attach_result != MPI_SUCCESS) { report_MPI_ERROR(result); } // Actually start sending the message result = MPI_Bsend(buf, count, datatype, dest, tag, comm); if (result != MPI_SUCCESS) { report_MPI_ERROR(result); } // The following will only complete once the message has been successfully sent (i.e. also received on the other side) int deattach_result = MPI_Buffer_detach( &membuff, &size); if (deattach_result != MPI_SUCCESS) { report_MPI_ERROR(result); } } else { result = MPI_Send(buf, count, datatype, dest, tag, comm); if (result != MPI_SUCCESS) { report_MPI_ERROR(result); } } #endif #ifdef VERBOSE_MPISENDRECV if (count > 100) std::cerr <<" relion_MPI_Send: message to " << dest << " of size "<< count << " arrived in " << MPI_Wtime() - start_time << " seconds" << std::endl; #endif return result; }
void mpi_process_group::set_message_buffer_size(std::size_t s) { int sz; void* ptr; if (!message_buffer.empty()) { MPI_Buffer_detach(&ptr,&sz); BOOST_ASSERT(ptr == &message_buffer.front()); BOOST_ASSERT(static_cast<std::size_t>(sz) == message_buffer.size()); } else if (old_buffer != 0) MPI_Buffer_detach(&old_buffer,&old_buffer_size); message_buffer.resize(s); if (s) MPI_Buffer_attach(&message_buffer.front(), message_buffer.size()); else if (old_buffer_size) MPI_Buffer_attach(old_buffer, old_buffer_size); }
EXPORT_MPI_API void FORTRAN_API mpi_buffer_detach_( void **buffer, MPI_Fint *size, MPI_Fint *__ierr ) { void *tmp = (void *)buffer; int lsize; *__ierr = MPI_Buffer_detach(&tmp,&lsize); *size = (MPI_Fint)lsize; }
int main(int argc, char *argv[]) { MPI_Status status; MPI_Comm comm, scomm; int a[10], b[10]; int buf[BUFSIZE], *bptr, bl, i, j, rank, size, color, errs = 0; MTest_Init(0, 0); MPI_Comm_rank(MPI_COMM_WORLD, &rank); color = rank % 2; MPI_Comm_split(MPI_COMM_WORLD, color, rank, &scomm); MPI_Intercomm_create(scomm, 0, MPI_COMM_WORLD, 1 - color, 52, &comm); MPI_Comm_rank(comm, &rank); MPI_Comm_remote_size(comm, &size); MPI_Buffer_attach(buf, BUFSIZE); for (j = 0; j < 10; j++) { for (i = 0; i < 10; i++) { a[i] = (rank + 10 * j) * size + i; } MPI_Bsend(a, 10, MPI_INT, 0, 27 + j, comm); } if (rank == 0) { for (i = 0; i < size; i++) { for (j = 0; j < 10; j++) { int k; status.MPI_TAG = -10; status.MPI_SOURCE = -20; MPI_Recv(b, 10, MPI_INT, i, 27 + j, comm, &status); if (status.MPI_TAG != 27 + j) { errs++; printf("Wrong tag = %d\n", status.MPI_TAG); } if (status.MPI_SOURCE != i) { errs++; printf("Wrong source = %d\n", status.MPI_SOURCE); } for (k = 0; k < 10; k++) { if (b[k] != (i + 10 * j) * size + k) { errs++; printf("received b[%d] = %d from %d tag %d\n", k, b[k], i, 27 + j); } } } } } MPI_Buffer_detach(&bptr, &bl); MPI_Comm_free(&scomm); MPI_Comm_free(&comm); MTest_Finalize(errs); return MTestReturnValue(errs); }
int main( int argc, char *argv[] ) { MPI_Status status; MPI_Request request; int a[10], b[10]; int buf[BUFSIZE], *bptr, bl, i, j, rank, size; int errs = 0; MTest_Init( 0, 0 ); MPI_Comm_rank( MPI_COMM_WORLD, &rank ); MPI_Comm_size( MPI_COMM_WORLD, &size ); MPI_Buffer_attach( buf, BUFSIZE ); for (j=0; j<10; j++) { MPI_Bsend_init( a, 10, MPI_INT, 0, 27+j, MPI_COMM_WORLD, &request ); for (i=0; i<10; i++) { a[i] = (rank + 10 * j) * size + i; } MPI_Start( &request ); MPI_Wait( &request, &status ); MPI_Request_free( &request ); } if (rank == 0) { for (i=0; i<size; i++) { for (j=0; j<10; j++) { int k; status.MPI_TAG = -10; status.MPI_SOURCE = -20; MPI_Recv( b, 10, MPI_INT, i, 27+j, MPI_COMM_WORLD, &status ); if (status.MPI_TAG != 27+j) { errs++; printf( "Wrong tag = %d\n", status.MPI_TAG ); } if (status.MPI_SOURCE != i) { errs++; printf( "Wrong source = %d\n", status.MPI_SOURCE ); } for (k=0; k<10; k++) { if (b[k] != (i + 10 * j) * size + k) { errs++; printf( "received b[%d] = %d from %d tag %d\n", k, b[k], i, 27+j ); } } } } } MPI_Buffer_detach( &bptr, &bl ); MTest_Finalize( errs ); MPI_Finalize(); return 0; }
JNIEXPORT void JNICALL Java_mpi_MPI_detachBuffer_1jni( JNIEnv *env, jclass jthis, jbyteArray buf) { int size; jbyte* bufptr; int rc = MPI_Buffer_detach(&bufptr, &size); ompi_java_exceptionCheck(env, rc); if(buf != NULL) (*env)->ReleaseByteArrayElements(env,buf,bufptr,0); }
/* * Class: mpi_MPI * Method: detachBuffer_jni * Signature: ([B)V */ JNIEXPORT void JNICALL Java_mpi_MPI_detachBuffer_1jni( JNIEnv *env, jclass jthis, jbyteArray buf) { /*jboolean isCopy;*/ int size, rc; /*char* bufptr ;*/ jbyte* bufptr ; rc = MPI_Buffer_detach(&bufptr, &size); ompi_java_exceptionCheck(env, rc); if (buf != NULL) { (*env)->ReleaseByteArrayElements(env,buf,bufptr,0); } }
void Foam::UPstream::exit(int errnum) { if (debug) { Pout<< "UPstream::exit." << endl; } # ifndef SGIMPI int size; char* buff; MPI_Buffer_detach(&buff, &size); delete[] buff; # endif if (PstreamGlobals::outstandingRequests_.size()) { label n = PstreamGlobals::outstandingRequests_.size(); PstreamGlobals::outstandingRequests_.clear(); WarningIn("UPstream::exit(int)") << "There are still " << n << " outstanding MPI_Requests." << endl << "This means that your code exited before doing a" << " UPstream::waitRequests()." << endl << "This should not happen for a normal code exit." << endl; } // Clean mpi communicators forAll(myProcNo_, communicator) { if (myProcNo_[communicator] != -1) { freePstreamCommunicator(communicator); } } if (errnum == 0) { MPI_Finalize(); ::exit(errnum); } else { MPI_Abort(MPI_COMM_WORLD, errnum); } }
/*-------------------------------------------------------------------------------*/ void OneStepCirculation(int step) { MPI_Status status; int n = SIZE * LOCAL_SIZE; int m = 1; int sizeOneMsg; MPI_Pack_size(n, MPI_DOUBLE, MPI_COMM_WORLD, &sizeOneMsg); int size = m * (sizeOneMsg + MPI_BSEND_OVERHEAD); double *buf = (double*) malloc(size); MPI_Buffer_attach(buf, size); MPI_Bsend(A_Slice, SIZE * LOCAL_SIZE, MPI_DOUBLE, ((Me - 1) + NbPE) % NbPE, 0, MPI_COMM_WORLD); MPI_Recv(A_Slice, SIZE * LOCAL_SIZE, MPI_DOUBLE, ((Me + 1)) % NbPE, 0, MPI_COMM_WORLD, &status); MPI_Buffer_detach(&buf, &size); /******************************** TO DO ******************************************/ }
void Foam::Pstream::exit(int errnum) { # ifndef SGIMPI int size; char* buff; MPI_Buffer_detach(&buff, &size); delete[] buff; # endif if (errnum == 0) { MPI_Finalize(); ::exit(errnum); } else { MPI_Abort(MPI_COMM_WORLD, errnum); } }
int main(int argc, char *argv[]) { int i,bufsize,N=1024*10; int myrank, nprocs,src, dest,tag; MPI_Status status; double A[N],B[N],sum; double *buf; MPI_Init(&argc, &argv); MPI_Comm_rank(MPI_COMM_WORLD,&myrank); MPI_Comm_size(MPI_COMM_WORLD,&nprocs); for(i=0;i<N;i++) A[i]=(double)myrank; MPI_Pack_size(N,MPI_DOUBLE, MPI_COMM_WORLD, &bufsize); // MPI_Type_size(MPI_DOUBLE,&bufsize); // bufsize = N*bufsize; bufsize = MPI_BSEND_OVERHEAD+bufsize;//定义缓冲方式所需额外开销 buf=(double *)malloc(bufsize); MPI_Buffer_attach(buf,bufsize); src = myrank-1; if(src<0) src=nprocs-1; dest = myrank+1; if(dest>=nprocs) dest = 0; tag =111; MPI_Bsend(A, N, MPI_DOUBLE, dest, tag, MPI_COMM_WORLD); MPI_Recv(B, N, MPI_DOUBLE, src, tag, MPI_COMM_WORLD, &status); sum = 0.0; for (i=0;i<N;i++) sum =sum +B[i]; printf("Process %d ,values = %f\n",myrank, (double)sum/N); MPI_Buffer_detach(&buf, &bufsize); free(buf); MPI_Finalize(); return 0; }
void Communicator::ensure_MPI_send_buffer_size(int atLeast) { static char *mpi_send_buf = NULL; static int mpi_send_buf_size = 0; atLeast += MPI_BSEND_OVERHEAD; if ( mpi_send_buf_size < atLeast ) { if (mpi_send_buf_size > 0) MPI_Buffer_detach(&mpi_send_buf, &mpi_send_buf_size); mpi_send_buf = (char *)realloc(mpi_send_buf, (mpi_send_buf_size = atLeast)); if ( mpi_send_buf == 0 ) (cerr << "Error reallocating mpi_send_buf!\n").flush(); // if (parameters.outputMPIMessages) // { // cout.form( // "[%i] Attaching MPI_Bsend buffer, size = %i\n", // parameters.rank, mpi_send_buf_size); // cout.flush(); // } MPI_Buffer_attach(mpi_send_buf, mpi_send_buf_size); } }
int main(int argc, char *argv[]) { MPI_Comm comm = MPI_COMM_WORLD; int dest = 1, src = 0, tag = 1; int s1, s2, s3; char *buf, *bbuf; char msg1[7], msg3[17]; double msg2[2]; char rmsg1[64], rmsg3[64]; double rmsg2[64]; int errs = 0, rank; int bufsize, bsize; MPI_Status status; MPI_Request req; MPI_Init(&argc, &argv); MPI_Comm_rank(MPI_COMM_WORLD, &rank); MPI_Pack_size(7, MPI_CHAR, comm, &s1); MPI_Pack_size(2, MPI_DOUBLE, comm, &s2); MPI_Pack_size(17, MPI_CHAR, comm, &s3); bufsize = 3 * MPI_BSEND_OVERHEAD + s1 + s2 + s3; buf = (char*)malloc(bufsize); MPI_Buffer_attach(buf, bufsize); strncpy(msg1, "012345", 7); strncpy(msg3, "0123401234012345", 17); msg2[0] = 1.23; msg2[1] = 3.21; if (rank == src) { fprintf(stdout, "-----proc %d, before send, t=%f\n", rank, MPI_Wtime()); MPI_Bsend(msg1, 7, MPI_CHAR, dest, tag, comm); fprintf(stdout, "-----proc %d, after send, t=%f\n", rank, MPI_Wtime()); } if (rank == dest) { fprintf(stdout, "-----proc %d, before recv, t=%f\n", rank, MPI_Wtime()); MPI_Irecv(rmsg1, 7, MPI_CHAR, src, tag, comm, &req); fprintf(stdout, "-----proc %d, after recv, t=%f\n", rank, MPI_Wtime()); fprintf(stdout, "-----proc %d, before wait, t=%f\n", rank, MPI_Wtime()); MPI_Wait(&req, &status); fprintf(stdout, "-----proc %d, after wait, t=%f\n", rank, MPI_Wtime()); if (strcmp(rmsg1, msg1) != 0) { errs++; fprintf(stdout, "message 1 (%s) shoule be %s\n", rmsg1, msg1); fflush(stdout); } else { fprintf(stdout, "message transform successfully\n"); } } fprintf(stdout, "<<<<<<<<<<proc %d , before detach, t=%f\n", rank, MPI_Wtime()); MPI_Buffer_detach(&buf, &bufsize); fprintf(stdout, ">>>>>>>>>>proc %d , after detach, t=%f\n", rank, MPI_Wtime()); free(buf); MPI_Finalize(); return 0; }
/* ______________________________________________________________________________________________ Main ______________________________________________________________________________________________ */ int main(int argc, char *argv[]) { // --- Init Cluster variable --------------------------------------------------------- // carmen 0 => local execution i.e. show time on screen // carmen 1 => cluster execution i.e. refresh Performance.dat (default) #if defined PARMPI // --- MPI Runtime system initialization // size - total number of processors // rnak - current CPU MPI_Init(&argc,&argv); MPI_Comm_size(MPI_COMM_WORLD, &size); MPI_Comm_rank(MPI_COMM_WORLD, &rank); #else size=1; rank=0; #endif if (argc == 2) Cluster = atoi(argv[1]); // --- Print messages on screen- ----------------------------------------------------- cout << "carmen: begin execution.\n\n"; printf("Carmen %4.2f \n",CarmenVersion); cout << "Copyright (C) 2000-2005 by Olivier Roussel.\n"; cout << "All rights reserved.\n\n"; #if defined PARMPI //Synchronize all parallel branches MPI_Barrier(MPI_COMM_WORLD); #endif CPUTime.start(); // --- Create first node of the tree structure --------------------------------------- Node *Mesh=0; FineMesh *FMesh=0; // --- Init global values (See Parameters.h and Parameters.cpp) ---------------------------- cout << "carmen: init computation ...\n"; InitParameters(); // --- Debug output information for parallel execution ------------------------------------- #if defined PARMPI if (Multiresolution) { printf("\nParallel Multiresolution solver not implemented yet!\n"); exit(0); } printf("My Rank=%d\n",rank); // --- Each CPU print his coordinates in the virtual processor cart ------------------------ printf("Cart_i = %d; Cart_j = %d; Cart_k = %d;\n",coords[0],coords[1],coords[2]); // --- Each CPU print his computation domain printf("Xmin = %lf; XMax = %lf;\n",XMin[1],XMax[1]); printf("Ymin = %lf; YMax = %lf;\n",XMin[2],XMax[2]); printf("Zmin = %lf; ZMax = %lf;\n",XMin[3],XMax[3]); // --- And the local scale number ---------------------------------------------------------- printf("ScaleNb = %d\n",ScaleNb); #endif // --- Allocate ---------------------------------------------------------------------------- if (Multiresolution) Mesh = new Node; else FMesh = new FineMesh; // --- Init tree structure ----------------------------------------------------------------- if (Multiresolution) { InitTree(Mesh); RefreshTree(Mesh); } // --- Compute initial integral values and init time step ---------------------------------- if (Multiresolution) Mesh->computeIntegral(); else FMesh->computeIntegral(); // -- Write integral values -- // -- Compute initial time step -- InitTimeStep(); if (rank==0) PrintIntegral("Integral.dat"); // --- Save initial values into files ------------------------------------------------------ if (PrintEvery == 0) { if (Multiresolution) View(Mesh, "Tree_0.dat", "Mesh_0.dat", "Average_0.dat"); else View(FMesh,"Average_0.dat"); } // --- When PrintEvery != 0, save initial values into specific name format --- if (PrintEvery != 0) { if (Multiresolution) ViewEvery(Mesh, 0); else ViewEvery(FMesh, 0); } // --- Parallel execution only -------------------------------------------- // --- Save to disk DX header for ouput files ----------------------------- // --- This file is needed for the external postprocessing (merging files from the different processors) #if defined PARMPI real tempXMin[4]; real tempXMax[4]; // --- Save original task parameters for the parallel execution int tempScaleNb=ScaleNb; // --- Simulate sequantial running ScaleNb=AllTaskScaleNb; for (int i=0;i<4;i++) { tempXMin[i]=XMin[i]; tempXMax[i]=XMax[i]; // --- Simulate sequantial running XMin[i]=AllXMin[i]; XMax[i]=AllXMax[i]; } // --- Write header with parameters, as we have run sequantial code if (rank==0) FMesh->writeHeader("header.txt"); // Restore variables for (int i=0;i<4;i++) { XMin[i]=tempXMin[i]; XMax[i]=tempXMax[i]; } ScaleNb=tempScaleNb; #endif // --- Done --- cout << "carmen: done.\n"; // --- Write solver type --- if (Multiresolution) cout << "carmen: multiresolution (MR) solver.\n"; else cout << "carmen: finite volume (FV) solver.\n"; // --- Write number of iterations --- if (IterationNb == 1) cout << "carmen: compute 1 iteration ...\n"; else cout << "carmen: compute " << IterationNb << " iterations ...\n"; printf("\n\n\n"); // --- Begin time iteration ---------------------------------------------------------------- for (IterationNo = 1; IterationNo <= IterationNb; IterationNo++) { // --- Time evolution procedure --- if (Multiresolution) TimeEvolution(Mesh); else TimeEvolution(FMesh); // --- Remesh --- if (Multiresolution) Remesh(Mesh); // --- Check CPU Time --- CPUTime.check(); // --- Write information every (Refresh) iteration --- if ((IterationNo-1)%Refresh == 0) { // - Write integral values - if (rank==0) PrintIntegral("Integral.dat"); if (Cluster == 0) ShowTime(CPUTime); // Show time on screen //else if (rank==0) Performance("carmen.prf"); // Refresh file "carmen.prf" } // --- Backup data every (10*Refresh) iteration --- if ((IterationNo-1)%(10*Refresh) == 0 && UseBackup) { if (Multiresolution) Backup(Mesh); else Backup(FMesh); } // --- Print solution if IterationNo = PrintIt1 to PrintIt6 --- if (Multiresolution) ViewIteration(Mesh); else ViewIteration(FMesh); // --- Print solution if IterationNo is a multiple of PrintEvery --- if (PrintEvery != 0) { if (IterationNo%PrintEvery == 0) { if (Multiresolution) ViewEvery(Mesh, IterationNo); else ViewEvery(FMesh, IterationNo); } } // --- End time iteration ------------------------------------------------------------------ } // --- Backup final data ------------------------------------------------------------------ IterationNo--; if (UseBackup) { if (Multiresolution) Backup(Mesh); else Backup(FMesh); } // --- Write integral values --------------------------------------------------------------- if (rank==0) PrintIntegral("Integral.dat"); IterationNo++; // --- Save values into file --------------------------------------------------------------- if (Multiresolution) View(Mesh, "Tree.dat", "Mesh.dat", "Average.dat"); else View(FMesh, "Average.dat"); cout << "\ncarmen: done.\n"; // --- Analyse performance and save it into file ------------------------------------------- if (rank==0) Performance("carmen.prf"); // --- End --------------------------------------------------------------------------------- if (Multiresolution) delete Mesh; else delete FMesh; #if defined PARMPI //free memory for the MPI runtime variables delete[] disp; delete[] blocklen; int sz; MPI_Buffer_detach(&MPIbuffer,&sz); // for (int i = 0; i < 4*Dimension; i++) MPI_Request_free(&req[i]); MPI_Finalize(); #endif cout <<"carmen: end execution.\n"; return EXIT_SUCCESS; }
/* * This is a simple program that tests bsend. It may be run as a single * process to simplify debugging; in addition, bsend allows send-to-self * programs. */ int main(int argc, char *argv[]) { MPI_Comm comm = MPI_COMM_WORLD; int dest = 0, src = 0, tag = 1; int s1, s2, s3; char *buf, *bbuf; char msg1[7], msg3[17]; double msg2[2]; char rmsg1[64], rmsg3[64]; double rmsg2[64]; int errs = 0, rank; int bufsize, bsize; MTest_Init(&argc, &argv); MPI_Comm_rank(MPI_COMM_WORLD, &rank); /* According to the standard, we must use the PACK_SIZE length of each * message in the computation of the message buffer size */ MPI_Pack_size(7, MPI_CHAR, comm, &s1); MPI_Pack_size(2, MPI_DOUBLE, comm, &s2); MPI_Pack_size(17, MPI_CHAR, comm, &s3); bufsize = 3 * MPI_BSEND_OVERHEAD + s1 + s2 + s3; buf = (char *) malloc(bufsize); MPI_Buffer_attach(buf, bufsize); strncpy(msg1, "012345", 7); strncpy(msg3, "0123401234012341", 17); msg2[0] = 1.23; msg2[1] = 3.21; if (rank == src) { /* These message sizes are chosen to expose any alignment problems */ MPI_Bsend(msg1, 7, MPI_CHAR, dest, tag, comm); MPI_Bsend(msg2, 2, MPI_DOUBLE, dest, tag, comm); MPI_Bsend(msg3, 17, MPI_CHAR, dest, tag, comm); } if (rank == dest) { MPI_Recv(rmsg1, 7, MPI_CHAR, src, tag, comm, MPI_STATUS_IGNORE); MPI_Recv(rmsg2, 10, MPI_DOUBLE, src, tag, comm, MPI_STATUS_IGNORE); MPI_Recv(rmsg3, 17, MPI_CHAR, src, tag, comm, MPI_STATUS_IGNORE); if (strcmp(rmsg1, msg1) != 0) { errs++; fprintf(stderr, "message 1 (%s) should be %s\n", rmsg1, msg1); } if (rmsg2[0] != msg2[0] || rmsg2[1] != msg2[1]) { errs++; fprintf(stderr, "message 2 incorrect, values are (%f,%f) but should be (%f,%f)\n", rmsg2[0], rmsg2[1], msg2[0], msg2[1]); } if (strcmp(rmsg3, msg3) != 0) { errs++; fprintf(stderr, "message 3 (%s) should be %s\n", rmsg3, msg3); } } /* We can't guarantee that messages arrive until the detach */ MPI_Buffer_detach(&bbuf, &bsize); MTest_Finalize(errs); MPI_Finalize(); return 0; }
int main( int argc, char *argv[] ) { int errs = 0; int rank, size, /* source, */ dest; MPI_Comm comm; MPI_Status status; MPI_Request req; static int bufsizes[4] = { 1, 100, 10000, 1000000 }; char *buf; #ifdef TEST_IRSEND int veryPicky = 0; /* Set to 1 to test "quality of implementation" in a tricky part of cancel */ #endif int cs, flag, n; MTest_Init( &argc, &argv ); comm = MPI_COMM_WORLD; MPI_Comm_rank( comm, &rank ); MPI_Comm_size( comm, &size ); /* source = 0; */ dest = size - 1; MTestPrintfMsg( 1, "Starting scancel test\n" ); for (cs=0; cs<4; cs++) { if (rank == 0) { n = bufsizes[cs]; buf = (char *)malloc( n ); if (!buf) { fprintf( stderr, "Unable to allocate %d bytes\n", n ); MPI_Abort( MPI_COMM_WORLD, 1 ); exit(1); } MTestPrintfMsg( 1, "(%d) About to create isend and cancel\n",cs ); MPI_Isend( buf, n, MPI_CHAR, dest, cs+n+1, comm, &req ); MPI_Cancel( &req ); MPI_Wait( &req, &status ); MTestPrintfMsg( 1, "Completed wait on isend\n" ); MPI_Test_cancelled( &status, &flag ); if (!flag) { errs ++; printf( "Failed to cancel an Isend request\n" ); fflush(stdout); } else { n = 0; } /* Send the size, zero for successfully cancelled */ MPI_Send( &n, 1, MPI_INT, dest, 123, comm ); /* Send the tag so the message can be received */ n = cs+n+1; MPI_Send( &n, 1, MPI_INT, dest, 123, comm ); free( buf ); } else if (rank == dest) { int nn, tag; char *btemp; MPI_Recv( &nn, 1, MPI_INT, 0, 123, comm, &status ); MPI_Recv( &tag, 1, MPI_INT, 0, 123, comm, &status ); if (nn > 0) { /* If the message was not cancelled, receive it here */ btemp = (char*)malloc( nn ); if (!btemp) { fprintf( stderr, "Unable to allocate %d bytes\n", nn ); MPI_Abort( MPI_COMM_WORLD, 1 ); exit(1); } MPI_Recv( btemp, nn, MPI_CHAR, 0, tag, comm, &status ); free(btemp); } } MPI_Barrier( comm ); if (rank == 0) { char *bsendbuf; int bsendbufsize; int bf, bs; n = bufsizes[cs]; buf = (char *)malloc( n ); if (!buf) { fprintf( stderr, "Unable to allocate %d bytes\n", n ); MPI_Abort( MPI_COMM_WORLD, 1 ); exit(1); } bsendbufsize = n + MPI_BSEND_OVERHEAD; bsendbuf = (char *)malloc( bsendbufsize ); if (!bsendbuf) { fprintf( stderr, "Unable to allocate %d bytes for bsend\n", n ); MPI_Abort( MPI_COMM_WORLD, 1 ); exit(1); } MPI_Buffer_attach( bsendbuf, bsendbufsize ); MTestPrintfMsg( 1, "About to create and cancel ibsend\n" ); MPI_Ibsend( buf, n, MPI_CHAR, dest, cs+n+2, comm, &req ); MPI_Cancel( &req ); MPI_Wait( &req, &status ); MPI_Test_cancelled( &status, &flag ); if (!flag) { errs ++; printf( "Failed to cancel an Ibsend request\n" ); fflush(stdout); } else { n = 0; } /* Send the size, zero for successfully cancelled */ MPI_Send( &n, 1, MPI_INT, dest, 123, comm ); /* Send the tag so the message can be received */ n = cs+n+2; MPI_Send( &n, 1, MPI_INT, dest, 123, comm ); free( buf ); MPI_Buffer_detach( &bf, &bs ); free( bsendbuf ); } else if (rank == dest) { int nn, tag; char *btemp; MPI_Recv( &nn, 1, MPI_INT, 0, 123, comm, &status ); MPI_Recv( &tag, 1, MPI_INT, 0, 123, comm, &status ); if (nn > 0) { /* If the message was not cancelled, receive it here */ btemp = (char*)malloc( nn ); if (!btemp) { fprintf( stderr, "Unable to allocate %d bytes\n", nn); MPI_Abort( MPI_COMM_WORLD, 1 ); exit(1); } MPI_Recv( btemp, nn, MPI_CHAR, 0, tag, comm, &status ); free(btemp); } } MPI_Barrier( comm ); /* Because this test is erroneous, we do not perform it unless TEST_IRSEND is defined. */ #ifdef TEST_IRSEND /* We avoid ready send to self because an implementation is free to detect the error in delivering a message to itself without a pending receive; we could also check for an error return from the MPI_Irsend */ if (rank == 0 && dest != rank) { n = bufsizes[cs]; buf = (char *)malloc( n ); if (!buf) { fprintf( stderr, "Unable to allocate %d bytes\n", n ); MPI_Abort( MPI_COMM_WORLD, 1 ); exit(1); } MTestPrintfMsg( 1, "About to create and cancel irsend\n" ); MPI_Irsend( buf, n, MPI_CHAR, dest, cs+n+3, comm, &req ); MPI_Cancel( &req ); MPI_Wait( &req, &status ); MPI_Test_cancelled( &status, &flag ); /* This can be pretty ugly. The standard is clear (Section 3.8) that either a sent message is received or the sent message is successfully cancelled. Since this message can never be received, the cancel must complete successfully. However, since there is no matching receive, this program is erroneous. In this case, we can't really flag this as an error */ if (!flag && veryPicky) { errs ++; printf( "Failed to cancel an Irsend request\n" ); fflush(stdout); } if (flag) { n = 0; } /* Send the size, zero for successfully cancelled */ MPI_Send( &n, 1, MPI_INT, dest, 123, comm ); /* Send the tag so the message can be received */ n = cs+n+3; MPI_Send( &n, 1, MPI_INT, dest, 123, comm ); free( buf ); } else if (rank == dest) { int n, tag; char *btemp; MPI_Recv( &n, 1, MPI_INT, 0, 123, comm, &status ); MPI_Recv( &tag, 1, MPI_INT, 0, 123, comm, &status ); if (n > 0) { /* If the message was not cancelled, receive it here */ btemp = (char*)malloc( n ); if (!btemp) { fprintf( stderr, "Unable to allocate %d bytes\n", n); MPI_Abort( MPI_COMM_WORLD, 1 ); exit(1); } MPI_Recv( btemp, n, MPI_CHAR, 0, tag, comm, &status ); free(btemp); } } MPI_Barrier( comm ); #endif if (rank == 0) { n = bufsizes[cs]; buf = (char *)malloc( n ); if (!buf) { fprintf( stderr, "Unable to allocate %d bytes\n", n ); MPI_Abort( MPI_COMM_WORLD, 1 ); exit(1); } MTestPrintfMsg( 1, "About to create and cancel issend\n" ); MPI_Issend( buf, n, MPI_CHAR, dest, cs+n+4, comm, &req ); MPI_Cancel( &req ); MPI_Wait( &req, &status ); MPI_Test_cancelled( &status, &flag ); if (!flag) { errs ++; printf( "Failed to cancel an Issend request\n" ); fflush(stdout); } else { n = 0; } /* Send the size, zero for successfully cancelled */ MPI_Send( &n, 1, MPI_INT, dest, 123, comm ); /* Send the tag so the message can be received */ n = cs+n+4; MPI_Send( &n, 1, MPI_INT, dest, 123, comm ); free( buf ); } else if (rank == dest) { int nn, tag; char *btemp; MPI_Recv( &nn, 1, MPI_INT, 0, 123, comm, &status ); MPI_Recv( &tag, 1, MPI_INT, 0, 123, comm, &status ); if (nn > 0) { /* If the message was not cancelled, receive it here */ btemp = (char*)malloc( nn ); if (!btemp) { fprintf( stderr, "Unable to allocate %d bytes\n", nn); MPI_Abort( MPI_COMM_WORLD, 1 ); exit(1); } MPI_Recv( btemp, nn, MPI_CHAR, 0, tag, comm, &status ); free(btemp); } } MPI_Barrier( comm ); } MTest_Finalize( errs ); MPI_Finalize(); return 0; }
void declareBindings (void) { /* === Point-to-point === */ void* buf; int count; MPI_Datatype datatype; int dest; int tag; MPI_Comm comm; MPI_Send (buf, count, datatype, dest, tag, comm); // L12 int source; MPI_Status status; MPI_Recv (buf, count, datatype, source, tag, comm, &status); // L15 MPI_Get_count (&status, datatype, &count); MPI_Bsend (buf, count, datatype, dest, tag, comm); MPI_Ssend (buf, count, datatype, dest, tag, comm); MPI_Rsend (buf, count, datatype, dest, tag, comm); void* buffer; int size; MPI_Buffer_attach (buffer, size); // L22 MPI_Buffer_detach (buffer, &size); MPI_Request request; MPI_Isend (buf, count, datatype, dest, tag, comm, &request); // L25 MPI_Ibsend (buf, count, datatype, dest, tag, comm, &request); MPI_Issend (buf, count, datatype, dest, tag, comm, &request); MPI_Irsend (buf, count, datatype, dest, tag, comm, &request); MPI_Irecv (buf, count, datatype, source, tag, comm, &request); MPI_Wait (&request, &status); int flag; MPI_Test (&request, &flag, &status); // L32 MPI_Request_free (&request); MPI_Request* array_of_requests; int index; MPI_Waitany (count, array_of_requests, &index, &status); // L36 MPI_Testany (count, array_of_requests, &index, &flag, &status); MPI_Status* array_of_statuses; MPI_Waitall (count, array_of_requests, array_of_statuses); // L39 MPI_Testall (count, array_of_requests, &flag, array_of_statuses); int incount; int outcount; int* array_of_indices; MPI_Waitsome (incount, array_of_requests, &outcount, array_of_indices, array_of_statuses); // L44--45 MPI_Testsome (incount, array_of_requests, &outcount, array_of_indices, array_of_statuses); // L46--47 MPI_Iprobe (source, tag, comm, &flag, &status); // L48 MPI_Probe (source, tag, comm, &status); MPI_Cancel (&request); MPI_Test_cancelled (&status, &flag); MPI_Send_init (buf, count, datatype, dest, tag, comm, &request); MPI_Bsend_init (buf, count, datatype, dest, tag, comm, &request); MPI_Ssend_init (buf, count, datatype, dest, tag, comm, &request); MPI_Rsend_init (buf, count, datatype, dest, tag, comm, &request); MPI_Recv_init (buf, count, datatype, source, tag, comm, &request); MPI_Start (&request); MPI_Startall (count, array_of_requests); void* sendbuf; int sendcount; MPI_Datatype sendtype; int sendtag; void* recvbuf; int recvcount; MPI_Datatype recvtype; MPI_Datatype recvtag; MPI_Sendrecv (sendbuf, sendcount, sendtype, dest, sendtag, recvbuf, recvcount, recvtype, source, recvtag, comm, &status); // L67--69 MPI_Sendrecv_replace (buf, count, datatype, dest, sendtag, source, recvtag, comm, &status); // L70--71 MPI_Datatype oldtype; MPI_Datatype newtype; MPI_Type_contiguous (count, oldtype, &newtype); // L74 int blocklength; { int stride; MPI_Type_vector (count, blocklength, stride, oldtype, &newtype); // L78 } { MPI_Aint stride; MPI_Type_hvector (count, blocklength, stride, oldtype, &newtype); // L82 } int* array_of_blocklengths; { int* array_of_displacements; MPI_Type_indexed (count, array_of_blocklengths, array_of_displacements, oldtype, &newtype); // L87--88 } { MPI_Aint* array_of_displacements; MPI_Type_hindexed (count, array_of_blocklengths, array_of_displacements, oldtype, &newtype); // L92--93 MPI_Datatype* array_of_types; MPI_Type_struct (count, array_of_blocklengths, array_of_displacements, array_of_types, &newtype); // L95--96 } void* location; MPI_Aint address; MPI_Address (location, &address); // L100 MPI_Aint extent; MPI_Type_extent (datatype, &extent); // L102 MPI_Type_size (datatype, &size); MPI_Aint displacement; MPI_Type_lb (datatype, &displacement); // L105 MPI_Type_ub (datatype, &displacement); MPI_Type_commit (&datatype); MPI_Type_free (&datatype); MPI_Get_elements (&status, datatype, &count); void* inbuf; void* outbuf; int outsize; int position; MPI_Pack (inbuf, incount, datatype, outbuf, outsize, &position, comm); // L114 int insize; MPI_Unpack (inbuf, insize, &position, outbuf, outcount, datatype, comm); // L116--117 MPI_Pack_size (incount, datatype, comm, &size); /* === Collectives === */ MPI_Barrier (comm); // L121 int root; MPI_Bcast (buffer, count, datatype, root, comm); // L123 MPI_Gather (sendbuf, sendcount, sendtype, recvbuf, recvcount, recvtype, root, comm); // L124--125 int* recvcounts; int* displs; MPI_Gatherv (sendbuf, sendcount, sendtype, recvbuf, recvcounts, displs, recvtype, root, comm); // L128--130 MPI_Scatter (sendbuf, sendcount, sendtype, recvbuf, recvcount, recvtype, root, comm); // L131--132 int* sendcounts; MPI_Scatterv (sendbuf, sendcounts, displs, sendtype, recvbuf, recvcount, recvtype, root, comm); // L134--135 MPI_Allgather (sendbuf, sendcount, sendtype, recvbuf, recvcount, recvtype, comm); // L136--137 MPI_Allgatherv (sendbuf, sendcount, sendtype, recvbuf, recvcounts, displs, recvtype, comm); // L138--140 MPI_Alltoall (sendbuf, sendcount, sendtype, recvbuf, recvcount, recvtype, comm); // L141--142 int* sdispls; int* rdispls; MPI_Alltoallv (sendbuf, sendcounts, sdispls, sendtype, recvbuf, recvcounts, rdispls, recvtype, comm); // L145--147 MPI_Op op; MPI_Reduce (sendbuf, recvbuf, count, datatype, op, root, comm); // L149 #if 0 MPI_User_function function; int commute; MPI_Op_create (function, commute, &op); // L153 #endif MPI_Op_free (&op); // L155 MPI_Allreduce (sendbuf, recvbuf, count, datatype, op, comm); MPI_Reduce_scatter (sendbuf, recvbuf, recvcounts, datatype, op, comm); MPI_Scan (sendbuf, recvbuf, count, datatype, op, comm); /* === Groups, contexts, and communicators === */ MPI_Group group; MPI_Group_size (group, &size); // L162 int rank; MPI_Group_rank (group, &rank); // L164 MPI_Group group1; int n; int* ranks1; MPI_Group group2; int* ranks2; MPI_Group_translate_ranks (group1, n, ranks1, group2, ranks2); // L170 int result; MPI_Group_compare (group1, group2, &result); // L172 MPI_Group newgroup; MPI_Group_union (group1, group2, &newgroup); // L174 MPI_Group_intersection (group1, group2, &newgroup); MPI_Group_difference (group1, group2, &newgroup); int* ranks; MPI_Group_incl (group, n, ranks, &newgroup); // L178 MPI_Group_excl (group, n, ranks, &newgroup); extern int ranges[][3]; MPI_Group_range_incl (group, n, ranges, &newgroup); // L181 MPI_Group_range_excl (group, n, ranges, &newgroup); MPI_Group_free (&group); MPI_Comm_size (comm, &size); MPI_Comm_rank (comm, &rank); MPI_Comm comm1; MPI_Comm comm2; MPI_Comm_compare (comm1, comm2, &result); MPI_Comm newcomm; MPI_Comm_dup (comm, &newcomm); MPI_Comm_create (comm, group, &newcomm); int color; int key; MPI_Comm_split (comm, color, key, &newcomm); // L194 MPI_Comm_free (&comm); MPI_Comm_test_inter (comm, &flag); MPI_Comm_remote_size (comm, &size); MPI_Comm_remote_group (comm, &group); MPI_Comm local_comm; int local_leader; MPI_Comm peer_comm; int remote_leader; MPI_Comm newintercomm; MPI_Intercomm_create (local_comm, local_leader, peer_comm, remote_leader, tag, &newintercomm); // L204--205 MPI_Comm intercomm; MPI_Comm newintracomm; int high; MPI_Intercomm_merge (intercomm, high, &newintracomm); // L209 int keyval; #if 0 MPI_Copy_function copy_fn; MPI_Delete_function delete_fn; void* extra_state; MPI_Keyval_create (copy_fn, delete_fn, &keyval, extra_state); // L215 #endif MPI_Keyval_free (&keyval); // L217 void* attribute_val; MPI_Attr_put (comm, keyval, attribute_val); // L219 MPI_Attr_get (comm, keyval, attribute_val, &flag); MPI_Attr_delete (comm, keyval); /* === Environmental inquiry === */ char* name; int resultlen; MPI_Get_processor_name (name, &resultlen); // L226 MPI_Errhandler errhandler; #if 0 MPI_Handler_function function; MPI_Errhandler_create (function, &errhandler); // L230 #endif MPI_Errhandler_set (comm, errhandler); // L232 MPI_Errhandler_get (comm, &errhandler); MPI_Errhandler_free (&errhandler); int errorcode; char* string; MPI_Error_string (errorcode, string, &resultlen); // L237 int errorclass; MPI_Error_class (errorcode, &errorclass); // L239 MPI_Wtime (); MPI_Wtick (); int argc; char** argv; MPI_Init (&argc, &argv); // L244 MPI_Finalize (); MPI_Initialized (&flag); MPI_Abort (comm, errorcode); }
int main(int argc, char *argv[]) { int provided, i[2], k; char *buffer, *ptr_dt; buffer = (char *) malloc(BUFSIZE * sizeof(char)); MPI_Status status; pthread_t receiver_thread, sender_thread[NUMSENDS]; pthread_attr_t attr; MPI_Comm communicator; int bs; MPI_Init_thread(&argc, &argv, MPI_THREAD_MULTIPLE, &provided); if (provided != MPI_THREAD_MULTIPLE) { printf("Error\n"); MPI_Abort(911, MPI_COMM_WORLD); } MPI_Buffer_attach(buffer, BUFSIZE); MPI_Comm_rank(MPI_COMM_WORLD, &rank); MPI_Comm_size(MPI_COMM_WORLD, &size); MPI_Comm_dup(MPI_COMM_WORLD, &communicator); /* We do not use this communicator in this program, but * with this call, the problem appears more reliably. * If the MPI_Comm_dup() call is commented out, it is still * evident but does not appear that often (don't know why) */ /* Initialize and set thread detached attribute */ pthread_attr_init(&attr); pthread_attr_setdetachstate(&attr, PTHREAD_CREATE_JOINABLE); pthread_create(&receiver_thread, &attr, &receiver, NULL); for (k = 0; k < NUMSENDS; k++) pthread_create(&sender_thread[k], &attr, &sender_bsend, NULL); pthread_join(receiver_thread, NULL); for (k = 0; k < NUMSENDS; k++) pthread_join(sender_thread[k], NULL); MPI_Barrier(MPI_COMM_WORLD); pthread_create(&receiver_thread, &attr, &receiver, NULL); for (k = 0; k < NUMSENDS; k++) pthread_create(&sender_thread[k], &attr, &sender_ibsend, NULL); pthread_join(receiver_thread, NULL); for (k = 0; k < NUMSENDS; k++) pthread_join(sender_thread[k], NULL); MPI_Barrier(MPI_COMM_WORLD); pthread_create(&receiver_thread, &attr, &receiver, NULL); for (k = 0; k < NUMSENDS; k++) pthread_create(&sender_thread[k], &attr, &sender_isend, NULL); pthread_join(receiver_thread, NULL); for (k = 0; k < NUMSENDS; k++) pthread_join(sender_thread[k], NULL); MPI_Barrier(MPI_COMM_WORLD); pthread_create(&receiver_thread, &attr, &receiver, NULL); for (k = 0; k < NUMSENDS; k++) pthread_create(&sender_thread[k], &attr, &sender_send, NULL); pthread_join(receiver_thread, NULL); for (k = 0; k < NUMSENDS; k++) pthread_join(sender_thread[k], NULL); MPI_Barrier(MPI_COMM_WORLD); pthread_attr_destroy(&attr); if (!rank) printf(" No Errors\n"); MPI_Comm_free(&communicator); MPI_Buffer_detach(&ptr_dt, &bs); free(buffer); MPI_Finalize(); return 0; }
void FWI_PSV(){ /* global variables */ /* ---------------- */ /* forward modelling */ extern int MYID, FDORDER, NX, NY, NT, L, READMOD, QUELLART, RUN_MULTIPLE_SHOTS, TIME_FILT; extern int LOG, SEISMO, N_STREAMER, FW, NXG, NYG, IENDX, IENDY, NTDTINV, IDXI, IDYI, NXNYI, INV_STF, DTINV; extern float FC_SPIKE_1, FC_SPIKE_2, FC, FC_START, TIME, DT; extern char LOG_FILE[STRING_SIZE], MFILE[STRING_SIZE]; extern FILE *FP; /* gravity modelling/inversion */ extern int GRAVITY, NZGRAV, NGRAVB, GRAV_TYPE, BACK_DENSITY; extern char GRAV_DATA_OUT[STRING_SIZE], GRAV_DATA_IN[STRING_SIZE], GRAV_STAT_POS[STRING_SIZE], DFILE[STRING_SIZE]; extern float LAM_GRAV, GAMMA_GRAV, LAM_GRAV_GRAD, L2_GRAV_IT1; /* full waveform inversion */ extern int GRAD_METHOD, NLBFGS, ITERMAX, IDX, IDY, INVMAT1, EPRECOND; extern int GRAD_FORM, POS[3], QUELLTYPB, MIN_ITER, MODEL_FILTER; extern float FC_END, PRO, C_vp, C_vs, C_rho; extern char MISFIT_LOG_FILE[STRING_SIZE], JACOBIAN[STRING_SIZE]; extern char *FILEINP1; /* local variables */ int ns, nseismograms=0, nt, nd, fdo3, j, i, iter, h, hin, iter_true, SHOTINC, s=0; int buffsize, ntr=0, ntr_loc=0, ntr_glob=0, nsrc=0, nsrc_loc=0, nsrc_glob=0, ishot, nshots=0, itestshot; float sum, eps_scale, opteps_vp, opteps_vs, opteps_rho, Vp_avg, Vs_avg, rho_avg, Vs_sum, Vp_sum, rho_sum; char *buff_addr, ext[10], *fileinp, jac[225], source_signal_file[STRING_SIZE]; double time1, time2, time7, time8, time_av_v_update=0.0, time_av_s_update=0.0, time_av_v_exchange=0.0, time_av_s_exchange=0.0, time_av_timestep=0.0; float L2sum, *L2t; float ** taper_coeff, * epst1, *hc=NULL; int * DTINV_help; MPI_Request *req_send, *req_rec; MPI_Status *send_statuses, *rec_statuses; /* Variables for step length calculation */ int step1, step3=0; float eps_true, tmp; /* Variables for the L-BFGS method */ float * rho_LBFGS, * alpha_LBFGS, * beta_LBFGS; float * y_LBFGS, * s_LBFGS, * q_LBFGS, * r_LBFGS; int NLBFGS_class, LBFGS_pointer, NLBFGS_vec; /* Variables for energy weighted gradient */ float ** Ws, **Wr, **We; /* parameters for FWI-workflow */ int stagemax=0, nstage; /*vector for abort criterion*/ float * L2_hist=NULL; /* help variable for MIN_ITER */ int min_iter_help=0; /* parameters for gravity inversion */ float * gz_mod, * gz_res; float ** gravpos=NULL, ** rho_grav=NULL, ** rho_grav_ext=NULL; float ** grad_grav=NULL; int ngrav=0, nxgrav, nygrav; float L2_grav, FWImax, GRAVmax, FWImax_all, GRAVmax_all ; char jac_grav[STRING_SIZE]; FILE *FPL2, *FP_stage, *FP_GRAV, *LAMBDA; if (MYID == 0){ time1=MPI_Wtime(); clock(); } /* open log-file (each PE is using different file) */ /* fp=stdout; */ sprintf(ext,".%i",MYID); strcat(LOG_FILE,ext); if ((MYID==0) && (LOG==1)) FP=stdout; else FP=fopen(LOG_FILE,"w"); fprintf(FP," This is the log-file generated by PE %d \n\n",MYID); /* ----------------------- */ /* define FD grid geometry */ /* ----------------------- */ /* domain decomposition */ initproc(); NT=iround(TIME/DT); /* number of timesteps */ /* output of parameters to log-file or stdout */ if (MYID==0) write_par(FP); /* NXG, NYG denote size of the entire (global) grid */ NXG=NX; NYG=NY; /* In the following, NX and NY denote size of the local grid ! */ NX = IENDX; NY = IENDY; NTDTINV=ceil((float)NT/(float)DTINV); /* round towards next higher integer value */ /* save every IDXI and IDYI spatial point during the forward modelling */ IDXI=1; IDYI=1; NXNYI=(NX/IDXI)*(NY/IDYI); SHOTINC=1; /* use only every DTINV time sample for the inversion */ DTINV_help=ivector(1,NT); /* read parameters from workflow-file (stdin) */ FP_stage=fopen(FILEINP1,"r"); if(FP_stage==NULL) { if (MYID == 0){ printf("\n==================================================================\n"); printf(" Cannot open Denise workflow input file %s \n",FILEINP1); printf("\n==================================================================\n\n"); err(" --- "); } } /* estimate number of lines in FWI-workflow */ i=0; stagemax=0; while ((i=fgetc(FP_stage)) != EOF) if (i=='\n') ++stagemax; rewind(FP_stage); stagemax--; fclose(FP_stage); /* define data structures for PSV problem */ struct wavePSV; struct wavePSV_PML; struct matPSV; struct fwiPSV; struct mpiPSV; struct seisPSV; struct seisPSVfwi; struct acq; nd = FDORDER/2 + 1; fdo3 = 2*nd; buffsize=2.0*2.0*fdo3*(NX +NY)*sizeof(MPI_FLOAT); /* allocate buffer for buffering messages */ buff_addr=malloc(buffsize); if (!buff_addr) err("allocation failure for buffer for MPI_Bsend !"); MPI_Buffer_attach(buff_addr,buffsize); /* allocation for request and status arrays */ req_send=(MPI_Request *)malloc(REQUEST_COUNT*sizeof(MPI_Request)); req_rec=(MPI_Request *)malloc(REQUEST_COUNT*sizeof(MPI_Request)); send_statuses=(MPI_Status *)malloc(REQUEST_COUNT*sizeof(MPI_Status)); rec_statuses=(MPI_Status *)malloc(REQUEST_COUNT*sizeof(MPI_Status)); /* --------- add different modules here ------------------------ */ ns=NT; /* in a FWI one has to keep all samples of the forward modeled data at the receiver positions to calculate the adjoint sources and to do the backpropagation; look at function saveseis_glob.c to see that every NDT sample for the forward modeled wavefield is written to su files*/ if (SEISMO){ acq.recpos=receiver(FP, &ntr, ishot); acq.recswitch = ivector(1,ntr); acq.recpos_loc = splitrec(acq.recpos,&ntr_loc, ntr, acq.recswitch); ntr_glob=ntr; ntr=ntr_loc; if(N_STREAMER>0){ free_imatrix(acq.recpos,1,3,1,ntr_glob); if(ntr>0) free_imatrix(acq.recpos_loc,1,3,1,ntr); free_ivector(acq.recswitch,1,ntr_glob); } } if(N_STREAMER==0){ /* Memory for seismic data */ alloc_seisPSV(ntr,ns,&seisPSV); /* Memory for FWI seismic data */ alloc_seisPSVfwi(ntr,ntr_glob,ns,&seisPSVfwi); } /* Memory for full data seismograms */ alloc_seisPSVfull(&seisPSV,ntr_glob); /* memory allocation for abort criterion*/ L2_hist = vector(1,1000); /* estimate memory requirement of the variables in megabytes*/ switch (SEISMO){ case 1 : /* particle velocities only */ nseismograms=2; break; case 2 : /* pressure only */ nseismograms=1; break; case 3 : /* curl and div only */ nseismograms=2; break; case 4 : /* everything */ nseismograms=5; break; } /* calculate memory requirements for PSV forward problem */ mem_fwiPSV(nseismograms,ntr,ns,fdo3,nd,buffsize,ntr_glob); /* Define gradient formulation */ /* GRAD_FORM = 1 - stress-displacement gradients */ /* GRAD_FORM = 2 - stress-velocity gradients for decomposed impedance matrix */ GRAD_FORM = 1; if(GRAVITY==1 || GRAVITY==2){ if(GRAV_TYPE == 1){ sprintf(GRAV_DATA_OUT, "./gravity/grav_mod.dat"); /* output file of gravity data */ sprintf(GRAV_DATA_IN, "./gravity/grav_field.dat"); /* input file of gravity data */ } if(GRAV_TYPE == 2){ sprintf(GRAV_DATA_OUT, "./gravity/grav_grad_mod.dat"); /* output file of gravity gradient data */ sprintf(GRAV_DATA_IN, "./gravity/grav_grad_field.dat"); /* input file of gravity gradientdata */ } sprintf(GRAV_STAT_POS, "./gravity/grav_stat.dat"); /* file with station positions for gravity modelling */ /* size of the extended gravity model */ nxgrav = NXG + 2*NGRAVB; nygrav = NYG + NGRAVB; } /* allocate memory for PSV forward problem */ alloc_PSV(&wavePSV,&wavePSV_PML); /* calculate damping coefficients for CPMLs (PSV problem)*/ if(FW>0){PML_pro(wavePSV_PML.d_x, wavePSV_PML.K_x, wavePSV_PML.alpha_prime_x, wavePSV_PML.a_x, wavePSV_PML.b_x, wavePSV_PML.d_x_half, wavePSV_PML.K_x_half, wavePSV_PML.alpha_prime_x_half, wavePSV_PML.a_x_half, wavePSV_PML.b_x_half, wavePSV_PML.d_y, wavePSV_PML.K_y, wavePSV_PML.alpha_prime_y, wavePSV_PML.a_y, wavePSV_PML.b_y, wavePSV_PML.d_y_half, wavePSV_PML.K_y_half, wavePSV_PML.alpha_prime_y_half, wavePSV_PML.a_y_half, wavePSV_PML.b_y_half); } /* allocate memory for PSV material parameters */ alloc_matPSV(&matPSV); /* allocate memory for PSV FWI parameters */ alloc_fwiPSV(&fwiPSV); /* allocate memory for PSV MPI variables */ alloc_mpiPSV(&mpiPSV); /* Variables for the l-BFGS method */ if(GRAD_METHOD==2){ NLBFGS_class = 3; /* number of parameter classes */ NLBFGS_vec = NLBFGS_class*NX*NY; /* length of one LBFGS-parameter class */ LBFGS_pointer = 1; /* initiate pointer in the cyclic LBFGS-vectors */ y_LBFGS = vector(1,NLBFGS_vec*NLBFGS); s_LBFGS = vector(1,NLBFGS_vec*NLBFGS); q_LBFGS = vector(1,NLBFGS_vec); r_LBFGS = vector(1,NLBFGS_vec); rho_LBFGS = vector(1,NLBFGS); alpha_LBFGS = vector(1,NLBFGS); beta_LBFGS = vector(1,NLBFGS); } taper_coeff= matrix(1,NY,1,NX); /* memory for source position definition */ acq.srcpos1=fmatrix(1,8,1,1); /* memory of L2 norm */ L2t = vector(1,4); epst1 = vector(1,3); fprintf(FP," ... memory allocation for PE %d was successfull.\n\n", MYID); /* Holberg coefficients for FD operators*/ hc = holbergcoeff(); MPI_Barrier(MPI_COMM_WORLD); /* Reading source positions from SOURCE_FILE */ acq.srcpos=sources(&nsrc); nsrc_glob=nsrc; /* create model grids */ if(L){ if (READMOD) readmod_visc_PSV(matPSV.prho,matPSV.ppi,matPSV.pu,matPSV.ptaus,matPSV.ptaup,matPSV.peta); else model(matPSV.prho,matPSV.ppi,matPSV.pu,matPSV.ptaus,matPSV.ptaup,matPSV.peta); } else{ if (READMOD) readmod_elastic_PSV(matPSV.prho,matPSV.ppi,matPSV.pu); else model_elastic(matPSV.prho,matPSV.ppi,matPSV.pu); } /* check if the FD run will be stable and free of numerical dispersion */ if(L){ checkfd_ssg_visc(FP,matPSV.prho,matPSV.ppi,matPSV.pu,matPSV.ptaus,matPSV.ptaup,matPSV.peta,hc); } else{ checkfd_ssg_elastic(FP,matPSV.prho,matPSV.ppi,matPSV.pu,hc); } if(GRAVITY==1 || GRAVITY==2){ /* read station positions */ MPI_Barrier(MPI_COMM_WORLD); gravpos=read_grav_pos(&ngrav); /* define model and residual data vector for gz (z-component of the gravity field) */ gz_mod = vector(1,ngrav); gz_res = vector(1,ngrav); /* only forward modelling of gravity data */ if(GRAVITY==1){ /* global density model */ rho_grav = matrix(1,NYG,1,NXG); rho_grav_ext = matrix(1,nygrav,1,nxgrav); read_density_glob(rho_grav,1); extend_mod(rho_grav,rho_grav_ext,nxgrav,nygrav); grav_mod(rho_grav_ext,ngrav,gravpos,gz_mod,nxgrav,nygrav,NZGRAV); free_matrix(rho_grav,1,NYG,1,NXG); free_matrix(rho_grav_ext,1,nygrav,1,nxgrav); } if(GRAVITY==2){ grad_grav = matrix(1,NY,1,NX); } } SHOTINC=1; iter_true=1; /* Begin of FWI-workflow */ for(nstage=1;nstage<=stagemax;nstage++){ /* read workflow input file *.inp */ FP_stage=fopen(FILEINP1,"r"); read_par_inv(FP_stage,nstage,stagemax); /*fclose(FP_stage);*/ if((EPRECOND==1)||(EPRECOND==3)){ Ws = matrix(1,NY,1,NX); /* total energy of the source wavefield */ Wr = matrix(1,NY,1,NX); /* total energy of the receiver wavefield */ We = matrix(1,NY,1,NX); /* total energy of source and receiver wavefield */ } FC=FC_END; iter=1; /* -------------------------------------- * Begin of Full Waveform iteration loop * -------------------------------------- */ while(iter<=ITERMAX){ if(GRAD_METHOD==2){ /* increase pointer to LBFGS-vector*/ if(iter>2){ LBFGS_pointer++; } /* if LBFGS-pointer > NLBFGS -> set LBFGS_pointer=1 */ if(LBFGS_pointer>NLBFGS){LBFGS_pointer=1;} } if (MYID==0) { time2=MPI_Wtime(); fprintf(FP,"\n\n\n ------------------------------------------------------------------\n"); fprintf(FP,"\n\n\n TDFWI ITERATION %d \t of %d \n",iter,ITERMAX); fprintf(FP,"\n\n\n ------------------------------------------------------------------\n"); } /* For the calculation of the material parameters between gridpoints they have to be averaged. For this, values lying at 0 and NX+1, for example, are required on the local grid. These are now copied from the neighbouring grids */ if (L){ matcopy_PSV(matPSV.prho,matPSV.ppi,matPSV.pu,matPSV.ptaus,matPSV.ptaup); } else{ matcopy_elastic_PSV(matPSV.prho,matPSV.ppi,matPSV.pu); } MPI_Barrier(MPI_COMM_WORLD); av_mue(matPSV.pu,matPSV.puipjp,matPSV.prho); av_rho(matPSV.prho,matPSV.prip,matPSV.prjp); if (L) av_tau(matPSV.ptaus,matPSV.ptausipjp); /* Preparing memory variables for update_s (viscoelastic) */ if (L) prepare_update_s_visc_PSV(matPSV.etajm,matPSV.etaip,matPSV.peta,matPSV.fipjp,matPSV.pu,matPSV.puipjp,matPSV.ppi,matPSV.prho,matPSV.ptaus,matPSV.ptaup,matPSV.ptausipjp,matPSV.f,matPSV.g, matPSV.bip,matPSV.bjm,matPSV.cip,matPSV.cjm,matPSV.dip,matPSV.d,matPSV.e); if(iter_true==1){ for (i=1;i<=NX;i=i+IDX){ for (j=1;j<=NY;j=j+IDY){ if(INVMAT1==1){ fwiPSV.Vp0[j][i] = matPSV.ppi[j][i]; fwiPSV.Vs0[j][i] = matPSV.pu[j][i]; fwiPSV.Rho0[j][i] = matPSV.prho[j][i]; } if(INVMAT1==2){ fwiPSV.Vp0[j][i] = sqrt((matPSV.ppi[j][i]+2.0*matPSV.pu[j][i])*matPSV.prho[j][i]); fwiPSV.Vs0[j][i] = sqrt(matPSV.pu[j][i]*matPSV.prho[j][i]); fwiPSV.Rho0[j][i] = matPSV.prho[j][i]; } if(INVMAT1==3){ fwiPSV.Vp0[j][i] = matPSV.ppi[j][i]; fwiPSV.Vs0[j][i] = matPSV.pu[j][i]; fwiPSV.Rho0[j][i] = matPSV.prho[j][i]; } } } /* ----------------------------- */ /* calculate Covariance matrices */ /* ----------------------------- */ Vp_avg = 0.0; Vs_avg = 0.0; rho_avg = 0.0; for (i=1;i<=NX;i=i+IDX){ for (j=1;j<=NY;j=j+IDY){ /* calculate average Vp, Vs */ Vp_avg+=matPSV.ppi[j][i]; Vs_avg+=matPSV.pu[j][i]; /* calculate average rho */ rho_avg+=matPSV.prho[j][i]; } } /* calculate average Vp, Vs and rho of all CPUs*/ Vp_sum = 0.0; MPI_Allreduce(&Vp_avg,&Vp_sum,1,MPI_FLOAT,MPI_SUM,MPI_COMM_WORLD); Vp_avg=Vp_sum; Vs_sum = 0.0; MPI_Allreduce(&Vs_avg,&Vs_sum,1,MPI_FLOAT,MPI_SUM,MPI_COMM_WORLD); Vs_avg=Vs_sum; rho_sum = 0.0; MPI_Allreduce(&rho_avg,&rho_sum,1,MPI_FLOAT,MPI_SUM,MPI_COMM_WORLD); rho_avg=rho_sum; Vp_avg /=NXG*NYG; Vs_avg /=NXG*NYG; rho_avg /=NXG*NYG; if(MYID==0){ printf("Vp_avg = %.0f \t Vs_avg = %.0f \t rho_avg = %.0f \n ",Vp_avg,Vs_avg,rho_avg); } C_vp = Vp_avg; C_vs = Vs_avg; C_rho = rho_avg; } /* Open Log File for L2 norm */ if(MYID==0){ if(iter_true==1){ FPL2=fopen(MISFIT_LOG_FILE,"w"); } if(iter_true>1){ FPL2=fopen(MISFIT_LOG_FILE,"a"); } } /* ---------------------------------------------------------------------------------------------------- */ /* --------- Calculate gradient and objective function using the adjoint state method ----------------- */ /* ---------------------------------------------------------------------------------------------------- */ L2sum = grad_obj_psv(&wavePSV, &wavePSV_PML, &matPSV, &fwiPSV, &mpiPSV, &seisPSV, &seisPSVfwi, &acq, hc, iter, nsrc, ns, ntr, ntr_glob, nsrc_glob, nsrc_loc, ntr_loc, nstage, We, Ws, Wr, taper_coeff, hin, DTINV_help, req_send, req_rec); L2t[1]=L2sum; L2t[4]=L2sum; if(GRAVITY==2){ /* save seismic L2-norm of seismic data residuals */ L2sum = L2t[1]; /* global density model */ rho_grav = matrix(1,NYG,1,NXG); rho_grav_ext = matrix(1,nygrav,1,nxgrav); /* model gravity data */ /* save current density model */ sprintf(jac_grav,"%s_tmp.rho.%i%i",JACOBIAN,POS[1],POS[2]); FP_GRAV=fopen(jac_grav,"wb"); for (i=1;i<=NX;i=i+IDX){ for (j=1;j<=NY;j=j+IDY){ fwrite(&matPSV.prho[j][i],sizeof(float),1,FP_GRAV); } } fclose(FP_GRAV); MPI_Barrier(MPI_COMM_WORLD); /* merge model file */ sprintf(jac_grav,"%s_tmp.rho",JACOBIAN); if (MYID==0) mergemod(jac_grav,3); MPI_Barrier(MPI_COMM_WORLD); /* gravity forward modelling */ read_density_glob(rho_grav,2); extend_mod(rho_grav,rho_grav_ext,nxgrav,nygrav); grav_mod(rho_grav_ext,ngrav,gravpos,gz_mod,nxgrav,nygrav,NZGRAV); /* calculate gravity data residuals */ L2_grav=calc_res_grav(ngrav,gz_mod,gz_res); /* calculate lambda 1 */ if(iter==1){ LAM_GRAV = GAMMA_GRAV * (L2sum/L2_grav); } /* add gravity penalty term to the seismic objective function */ L2t[1]+=LAM_GRAV * L2_grav; L2t[4]+=LAM_GRAV * L2_grav; /* calculate gravity gradient */ for (i=1;i<=NX;i=i+IDX){ for (j=1;j<=NY;j=j+IDY){ grad_grav[j][i]=0.0; } } grav_grad(ngrav,gravpos,grad_grav,gz_res); MPI_Barrier(MPI_COMM_WORLD); /* merge model file */ sprintf(jac,"%s_grav",JACOBIAN); if (MYID==0) mergemod(jac,3); /* free memory */ free_matrix(rho_grav,1,NYG,1,NXG); free_matrix(rho_grav_ext,1,nygrav,1,nxgrav); } /* Interpolate missing spatial gradient values in case IDXI > 1 || IDXY > 1 */ /* ------------------------------------------------------------------------ */ if((IDXI>1)||(IDYI>1)){ interpol(IDXI,IDYI,fwiPSV.waveconv,1); interpol(IDXI,IDYI,fwiPSV.waveconv_u,1); interpol(IDXI,IDYI,fwiPSV.waveconv_rho,1); } /* Preconditioning of gradients after shot summation */ precond_PSV(&fwiPSV,&acq,nsrc,ntr_glob,taper_coeff,FP_GRAV); /* Add gravity gradient to FWI density gradient */ /* -------------------------------------------- */ if(GRAVITY==2){ /* calculate maximum values of waveconv_rho and grad_grav */ FWImax = 0.0; GRAVmax = 0.0; for (i=1;i<=NX;i++){ for (j=1;j<=NY;j++){ if(fabs(fwiPSV.waveconv_rho[j][i])>FWImax){FWImax=fabs(fwiPSV.waveconv_rho[j][i]);} if(fabs(grad_grav[j][i])>GRAVmax){GRAVmax=fabs(grad_grav[j][i]);} } } MPI_Allreduce(&FWImax,&FWImax_all, 1,MPI_FLOAT,MPI_MAX,MPI_COMM_WORLD); MPI_Allreduce(&GRAVmax,&GRAVmax_all,1,MPI_FLOAT,MPI_MAX,MPI_COMM_WORLD); /* calculate lambda 2, normalized with respect to the maximum gradients */ if(iter==1){ LAM_GRAV_GRAD = GAMMA_GRAV * (FWImax_all/GRAVmax_all); } /* add gravity gradient to seismic gradient with respect to the density */ for (i=1;i<=NX;i++){ for (j=1;j<=NY;j++){ fwiPSV.waveconv_rho[j][i] += LAM_GRAV_GRAD * grad_grav[j][i]; } } } /* Use preconditioned conjugate gradient optimization method */ if(GRAD_METHOD==1){ PCG(fwiPSV.waveconv, taper_coeff, nsrc, acq.srcpos, acq.recpos, ntr_glob, iter, fwiPSV.gradp, fwiPSV.waveconv_u, fwiPSV.gradp_u, fwiPSV.waveconv_rho, fwiPSV.gradp_rho); } /* Use l-BFGS optimization */ if(GRAD_METHOD==2){ /* store models and gradients in l-BFGS vectors */ store_LBFGS_PSV(taper_coeff, nsrc, acq.srcpos, acq.recpos, ntr_glob, iter, fwiPSV.waveconv, fwiPSV.gradp, fwiPSV.waveconv_u, fwiPSV.gradp_u, fwiPSV.waveconv_rho, fwiPSV.gradp_rho, y_LBFGS, s_LBFGS, q_LBFGS, matPSV.ppi, matPSV.pu, matPSV.prho, NXNYI, LBFGS_pointer, NLBFGS, NLBFGS_vec); /* apply l-BFGS optimization */ LBFGS(iter, y_LBFGS, s_LBFGS, rho_LBFGS, alpha_LBFGS, q_LBFGS, r_LBFGS, beta_LBFGS, LBFGS_pointer, NLBFGS, NLBFGS_vec); /* extract gradients and save old models/gradients for next l-BFGS iteration */ extract_LBFGS_PSV(iter, fwiPSV.waveconv, fwiPSV.gradp, fwiPSV.waveconv_u, fwiPSV.gradp_u, fwiPSV.waveconv_rho, fwiPSV.gradp_rho, matPSV.ppi, matPSV.pu, matPSV.prho, r_LBFGS); } opteps_vp=0.0; opteps_vs=0.0; opteps_rho=0.0; /* ============================================================================================================================*/ /* =============================================== test loop L2 ===============================================================*/ /* ============================================================================================================================*/ /* set min_iter_help to initial global value of MIN_ITER */ if(iter==1){min_iter_help=MIN_ITER;} /* Estimate optimum step length ... */ /* ... by line search (parabolic fitting) */ eps_scale = step_length_est_psv(&wavePSV,&wavePSV_PML,&matPSV,&fwiPSV,&mpiPSV,&seisPSV,&seisPSVfwi,&acq,hc,iter,nsrc,ns,ntr,ntr_glob,epst1,L2t,nsrc_glob,nsrc_loc,&step1,&step3,nxgrav,nygrav,ngrav,gravpos,gz_mod,NZGRAV, ntr_loc,Ws,Wr,hin,DTINV_help,req_send,req_rec); /* no model update due to steplength estimation failed or update with the smallest steplength if the number of iteration is smaller than the minimum number of iteration per frequency MIN_ITER */ if((iter>min_iter_help)&&(step1==0)){ eps_scale=0.0; opteps_vp=0.0; } else{ opteps_vp=eps_scale; } /* write log-parameter files */ if(MYID==0){ printf("MYID = %d \t opteps_vp = %e \t opteps_vs = %e \t opteps_rho = %e \n",MYID,opteps_vp,opteps_vs,opteps_rho); printf("MYID = %d \t L2t[1] = %e \t L2t[2] = %e \t L2t[3] = %e \t L2t[4] = %e \n",MYID,L2t[1],L2t[2],L2t[3],L2t[4]); printf("MYID = %d \t epst1[1] = %e \t epst1[2] = %e \t epst1[3] = %e \n",MYID,epst1[1],epst1[2],epst1[3]); /*output of log file for combined inversion*/ if(iter_true==1){ LAMBDA = fopen("gravity/lambda.dat","w"); } if(iter_true>1){ LAMBDA = fopen("gravity/lambda.dat","a"); } fprintf(LAMBDA,"%d \t %d \t %e \t %e \t %e \t %e \t %e \t %e \t %e \n",nstage,iter,LAM_GRAV,L2sum,L2_grav,L2t[4],LAM_GRAV_GRAD,FWImax_all,GRAVmax_all); fclose(LAMBDA); } if(MYID==0){ if (TIME_FILT==0){ fprintf(FPL2,"%e \t %e \t %e \t %e \t %e \t %e \t %e \t %e \t %d \n",opteps_vp,epst1[1],epst1[2],epst1[3],L2t[1],L2t[2],L2t[3],L2t[4],nstage);} else{ fprintf(FPL2,"%e \t %e \t %e \t %e \t %e \t %e \t %e \t %e \t %f \t %f \t %d \n",opteps_vp,epst1[1],epst1[2],epst1[3],L2t[1],L2t[2],L2t[3],L2t[4],FC_START,FC,nstage);}} /* saving history of final L2*/ L2_hist[iter]=L2t[4]; s=0; /* calculate optimal change in the material parameters */ eps_true=calc_mat_change_test_PSV(fwiPSV.waveconv,fwiPSV.waveconv_rho,fwiPSV.waveconv_u,fwiPSV.prho_old,matPSV.prho,fwiPSV.ppi_old,matPSV.ppi,fwiPSV.pu_old,matPSV.pu,iter,1,eps_scale,0); if (MODEL_FILTER){ /* smoothing the velocity models vp and vs */ smooth_model(matPSV.ppi,matPSV.pu,matPSV.prho,iter); } if(MYID==0){ /* fprintf(FPL2,"=============================================================\n"); fprintf(FPL2,"=============================================================\n"); fprintf(FPL2,"STATISTICS FOR ITERATION STEP %d \n",iter); fprintf(FPL2,"=============================================================\n"); fprintf(FPL2,"=============================================================\n");*/ /* fprintf(FPL2,"Low-pass filter at %e Hz\n",freq); fprintf(FPL2,"----------------------------------------------\n"); */ /*fprintf(FPL2,"L2 at iteration step n = %e \n",L2);*/ /* fprintf(FPL2,"%e \t %e \t %e \t %e \t %e \t %e \t %e \t %e \n",EPSILON,EPSILON_u,EPSILON_rho,L2t[4],betaVp,betaVs,betarho,sqrt(C_vp));*/ /*fprintf(FPL2,"----------------------------------------------\n");*/ /* fprintf(FPL2,"=============================================================\n"); fprintf(FPL2,"=============================================================\n\n\n");*/ } if(MYID==0){ fclose(FPL2); } if (iter>min_iter_help){ float diff=0.0, pro=PRO; /* calculating differnce of the actual L2 and before two iterations, dividing with L2_hist[iter-2] provide changing in procent*/ diff=fabs((L2_hist[iter-2]-L2_hist[iter])/L2_hist[iter-2]); if((diff<=pro)||(step3==1)){ /* output of the model at the end of given corner frequency */ model_freq_out_PSV(matPSV.ppi,matPSV.prho,matPSV.pu,nstage,FC); s=1; min_iter_help=0; min_iter_help=iter+MIN_ITER; iter=0; if(GRAD_METHOD==2){ zero_LBFGS(NLBFGS, NLBFGS_vec, y_LBFGS, s_LBFGS, q_LBFGS, r_LBFGS, alpha_LBFGS, beta_LBFGS, rho_LBFGS); LBFGS_pointer = 1; } if(MYID==0){ if(step3==1){ printf("\n Steplength estimation failed step3=%d \n Changing to next FWI stage \n",step3); } else{ printf("\n Reached the abort criterion of pro=%e and diff=%e \n Changing to next FWI stage \n",pro,diff); } } break; } } iter++; iter_true++; /* ====================================== */ } /* end of fullwaveform iteration loop*/ /* ====================================== */ } /* End of FWI-workflow loop */ /* deallocate memory for PSV forward problem */ dealloc_PSV(&wavePSV,&wavePSV_PML); /* deallocation of memory */ free_matrix(fwiPSV.Vp0,-nd+1,NY+nd,-nd+1,NX+nd); free_matrix(fwiPSV.Vs0,-nd+1,NY+nd,-nd+1,NX+nd); free_matrix(fwiPSV.Rho0,-nd+1,NY+nd,-nd+1,NX+nd); free_matrix(matPSV.prho,-nd+1,NY+nd,-nd+1,NX+nd); free_matrix(fwiPSV.prho_old,-nd+1,NY+nd,-nd+1,NX+nd); free_matrix(matPSV.prip,-nd+1,NY+nd,-nd+1,NX+nd); free_matrix(matPSV.prjp,-nd+1,NY+nd,-nd+1,NX+nd); free_matrix(matPSV.ppi,-nd+1,NY+nd,-nd+1,NX+nd); free_matrix(fwiPSV.ppi_old,-nd+1,NY+nd,-nd+1,NX+nd); free_matrix(matPSV.pu,-nd+1,NY+nd,-nd+1,NX+nd); free_matrix(fwiPSV.pu_old,-nd+1,NY+nd,-nd+1,NX+nd); free_matrix(matPSV.puipjp,-nd+1,NY+nd,-nd+1,NX+nd); free_matrix(fwiPSV.waveconv,-nd+1,NY+nd,-nd+1,NX+nd); free_matrix(fwiPSV.waveconv_lam,-nd+1,NY+nd,-nd+1,NX+nd); free_matrix(fwiPSV.waveconv_shot,-nd+1,NY+nd,-nd+1,NX+nd); free_matrix(mpiPSV.bufferlef_to_rig,1,NY,1,fdo3); free_matrix(mpiPSV.bufferrig_to_lef,1,NY,1,fdo3); free_matrix(mpiPSV.buffertop_to_bot,1,NX,1,fdo3); free_matrix(mpiPSV.bufferbot_to_top,1,NX,1,fdo3); free_vector(hc,0,6); free_matrix(fwiPSV.gradg,-nd+1,NY+nd,-nd+1,NX+nd); free_matrix(fwiPSV.gradp,-nd+1,NY+nd,-nd+1,NX+nd); free_matrix(fwiPSV.gradg_rho,-nd+1,NY+nd,-nd+1,NX+nd); free_matrix(fwiPSV.gradp_rho,-nd+1,NY+nd,-nd+1,NX+nd); free_matrix(fwiPSV.waveconv_rho,-nd+1,NY+nd,-nd+1,NX+nd); free_matrix(fwiPSV.waveconv_rho_s,-nd+1,NY+nd,-nd+1,NX+nd); free_matrix(fwiPSV.waveconv_rho_shot,-nd+1,NY+nd,-nd+1,NX+nd); free_matrix(fwiPSV.gradg_u,-nd+1,NY+nd,-nd+1,NX+nd); free_matrix(fwiPSV.gradp_u,-nd+1,NY+nd,-nd+1,NX+nd); free_matrix(fwiPSV.waveconv_u,-nd+1,NY+nd,-nd+1,NX+nd); free_matrix(fwiPSV.waveconv_mu,-nd+1,NY+nd,-nd+1,NX+nd); free_matrix(fwiPSV.waveconv_u_shot,-nd+1,NY+nd,-nd+1,NX+nd); free_vector(fwiPSV.forward_prop_x,1,NY*NX*NT); free_vector(fwiPSV.forward_prop_y,1,NY*NX*NT); free_vector(fwiPSV.forward_prop_rho_x,1,NY*NX*NT); free_vector(fwiPSV.forward_prop_rho_y,1,NY*NX*NT); free_vector(fwiPSV.forward_prop_u,1,NY*NX*NT); if (nsrc_loc>0){ free_matrix(acq.signals,1,nsrc_loc,1,NT); free_matrix(acq.srcpos_loc,1,8,1,nsrc_loc); free_matrix(acq.srcpos_loc_back,1,6,1,nsrc_loc); } /* free memory for global source positions */ free_matrix(acq.srcpos,1,8,1,nsrc); /* free memory for source position definition */ free_matrix(acq.srcpos1,1,8,1,1); /* free memory for abort criterion */ free_vector(L2_hist,1,1000); free_vector(L2t,1,4); free_vector(epst1,1,3); if(N_STREAMER==0){ if (SEISMO) free_imatrix(acq.recpos,1,3,1,ntr_glob); if ((ntr>0) && (SEISMO)){ free_imatrix(acq.recpos_loc,1,3,1,ntr); acq.recpos_loc = NULL; switch (SEISMO){ case 1 : /* particle velocities only */ free_matrix(seisPSV.sectionvx,1,ntr,1,ns); free_matrix(seisPSV.sectionvy,1,ntr,1,ns); seisPSV.sectionvx=NULL; seisPSV.sectionvy=NULL; break; case 2 : /* pressure only */ free_matrix(seisPSV.sectionp,1,ntr,1,ns); break; case 3 : /* curl and div only */ free_matrix(seisPSV.sectioncurl,1,ntr,1,ns); free_matrix(seisPSV.sectiondiv,1,ntr,1,ns); break; case 4 : /* everything */ free_matrix(seisPSV.sectionvx,1,ntr,1,ns); free_matrix(seisPSV.sectionvy,1,ntr,1,ns); free_matrix(seisPSV.sectionp,1,ntr,1,ns); free_matrix(seisPSV.sectioncurl,1,ntr,1,ns); free_matrix(seisPSV.sectiondiv,1,ntr,1,ns); break; } } free_matrix(seisPSVfwi.sectionread,1,ntr_glob,1,ns); free_ivector(acq.recswitch,1,ntr); if((QUELLTYPB==1)||(QUELLTYPB==3)||(QUELLTYPB==5)||(QUELLTYPB==7)){ free_matrix(seisPSVfwi.sectionvxdata,1,ntr,1,ns); free_matrix(seisPSVfwi.sectionvxdiff,1,ntr,1,ns); free_matrix(seisPSVfwi.sectionvxdiffold,1,ntr,1,ns); } if((QUELLTYPB==1)||(QUELLTYPB==2)||(QUELLTYPB==6)||(QUELLTYPB==7)){ free_matrix(seisPSVfwi.sectionvydata,1,ntr,1,ns); free_matrix(seisPSVfwi.sectionvydiff,1,ntr,1,ns); free_matrix(seisPSVfwi.sectionvydiffold,1,ntr,1,ns); } if(QUELLTYPB>=4){ free_matrix(seisPSVfwi.sectionpdata,1,ntr,1,ns); free_matrix(seisPSVfwi.sectionpdiff,1,ntr,1,ns); free_matrix(seisPSVfwi.sectionpdiffold,1,ntr,1,ns); } } if(SEISMO){ free_matrix(seisPSV.fulldata,1,ntr_glob,1,NT); } if(SEISMO==1){ free_matrix(seisPSV.fulldata_vx,1,ntr_glob,1,NT); free_matrix(seisPSV.fulldata_vy,1,ntr_glob,1,NT); } if(SEISMO==2){ free_matrix(seisPSV.fulldata_p,1,ntr_glob,1,NT); } if(SEISMO==3){ free_matrix(seisPSV.fulldata_curl,1,ntr_glob,1,NT); free_matrix(seisPSV.fulldata_div,1,ntr_glob,1,NT); } if(SEISMO==4){ free_matrix(seisPSV.fulldata_vx,1,ntr_glob,1,NT); free_matrix(seisPSV.fulldata_vy,1,ntr_glob,1,NT); free_matrix(seisPSV.fulldata_p,1,ntr_glob,1,NT); free_matrix(seisPSV.fulldata_curl,1,ntr_glob,1,NT); free_matrix(seisPSV.fulldata_div,1,ntr_glob,1,NT); } free_ivector(DTINV_help,1,NT); /* free memory for viscoelastic modeling variables */ if (L) { free_matrix(matPSV.ptaus,-nd+1,NY+nd,-nd+1,NX+nd); free_matrix(matPSV.ptausipjp,-nd+1,NY+nd,-nd+1,NX+nd); free_matrix(matPSV.ptaup,-nd+1,NY+nd,-nd+1,NX+nd); free_vector(matPSV.peta,1,L); free_vector(matPSV.etaip,1,L); free_vector(matPSV.etajm,1,L); free_vector(matPSV.bip,1,L); free_vector(matPSV.bjm,1,L); free_vector(matPSV.cip,1,L); free_vector(matPSV.cjm,1,L); free_matrix(matPSV.f,-nd+1,NY+nd,-nd+1,NX+nd); free_matrix(matPSV.g,-nd+1,NY+nd,-nd+1,NX+nd); free_matrix(matPSV.fipjp,-nd+1,NY+nd,-nd+1,NX+nd); free_f3tensor(matPSV.dip,-nd+1,NY+nd,-nd+1,NX+nd,1,L); free_f3tensor(matPSV.d,-nd+1,NY+nd,-nd+1,NX+nd,1,L); free_f3tensor(matPSV.e,-nd+1,NY+nd,-nd+1,NX+nd,1,L); } if(GRAVITY){ free_matrix(gravpos,1,2,1,ngrav); free_vector(gz_mod,1,ngrav); free_vector(gz_res,1,ngrav); if(GRAVITY==2){ free_matrix(grad_grav,1,NY,1,NX); } } /* de-allocate buffer for messages */ MPI_Buffer_detach(buff_addr,&buffsize); MPI_Barrier(MPI_COMM_WORLD); if (MYID==0){ fprintf(FP,"\n **Info from main (written by PE %d): \n",MYID); fprintf(FP," CPU time of program per PE: %li seconds.\n",clock()/CLOCKS_PER_SEC); time8=MPI_Wtime(); fprintf(FP," Total real time of program: %4.2f seconds.\n",time8-time1); time_av_v_update=time_av_v_update/(double)NT; time_av_s_update=time_av_s_update/(double)NT; time_av_v_exchange=time_av_v_exchange/(double)NT; time_av_s_exchange=time_av_s_exchange/(double)NT; time_av_timestep=time_av_timestep/(double)NT; /* fprintf(FP," Average times for \n"); fprintf(FP," velocity update: \t %5.3f seconds \n",time_av_v_update); fprintf(FP," stress update: \t %5.3f seconds \n",time_av_s_update); fprintf(FP," velocity exchange: \t %5.3f seconds \n",time_av_v_exchange); fprintf(FP," stress exchange: \t %5.3f seconds \n",time_av_s_exchange); fprintf(FP," timestep: \t %5.3f seconds \n",time_av_timestep);*/ } fclose(FP); }
int main (int argc, char **argv) { int nprocs = -1; int rank = -1; int comm = MPI_COMM_WORLD; char processor_name[128]; int namelen = 128; int bbuf[(BUF_SIZE + MPI_BSEND_OVERHEAD) * 2 * NUM_BSEND_TYPES]; int buf[BUF_SIZE * 2 * NUM_SEND_TYPES + SLOP]; int i, j, k, l, m, at_size, send_t_number, index, outcount, total, flag; int num_errors, error_count, indices[2 * NUM_SEND_TYPES]; MPI_Request aReq[2 * NUM_SEND_TYPES]; MPI_Status aStatus[2 * NUM_SEND_TYPES]; /* init */ MPI_Init (&argc, &argv); MPI_Comm_size (comm, &nprocs); MPI_Comm_rank (comm, &rank); MPI_Get_processor_name (processor_name, &namelen); printf ("(%d) is alive on %s\n", rank, processor_name); fflush (stdout); MPI_Buffer_attach (bbuf, sizeof(int) * (BUF_SIZE + MPI_BSEND_OVERHEAD) * 2 * NUM_BSEND_TYPES); if (rank == 0) { /* set up persistent sends... */ send_t_number = NUM_SEND_TYPES - NUM_PERSISTENT_SEND_TYPES; MPI_Send_init (&buf[send_t_number * 2 * BUF_SIZE], BUF_SIZE, MPI_INT, 1, send_t_number * 2, comm, &aReq[send_t_number * 2]); MPI_Send_init (&buf[(send_t_number * 2 + 1) * BUF_SIZE], BUF_SIZE, MPI_INT, 1, send_t_number * 2 + 1, comm, &aReq[send_t_number * 2 + 1]); send_t_number++; MPI_Bsend_init (&buf[send_t_number * 2 * BUF_SIZE], BUF_SIZE, MPI_INT, 1, send_t_number * 2, comm, &aReq[send_t_number * 2]); MPI_Bsend_init (&buf[(send_t_number * 2 + 1) * BUF_SIZE], BUF_SIZE, MPI_INT, 1, send_t_number * 2 + 1, comm, &aReq[send_t_number * 2 + 1]); send_t_number++; MPI_Rsend_init (&buf[send_t_number * 2 * BUF_SIZE], BUF_SIZE, MPI_INT, 1, send_t_number * 2, comm, &aReq[send_t_number * 2]); MPI_Rsend_init (&buf[(send_t_number * 2 + 1) * BUF_SIZE], BUF_SIZE, MPI_INT, 1, send_t_number * 2 + 1, comm, &aReq[send_t_number * 2 + 1]); send_t_number++; MPI_Ssend_init (&buf[send_t_number * 2 * BUF_SIZE], BUF_SIZE, MPI_INT, 1, send_t_number * 2, comm, &aReq[send_t_number * 2]); MPI_Ssend_init (&buf[(send_t_number * 2 + 1) * BUF_SIZE], BUF_SIZE, MPI_INT, 1, send_t_number * 2 + 1, comm, &aReq[send_t_number * 2 + 1]); } for (m = 0; m < NUM_RECV_TYPES; m++) { if ((m == 1) && (rank == 1)) { /* set up the persistent receives... */ for (j = 0; j < NUM_SEND_TYPES * 2; j++) { MPI_Recv_init (&buf[j * BUF_SIZE], BUF_SIZE + (j % 2) * SLOP, MPI_INT, 0, j, comm, &aReq[j]); } } for (l = 0; l < (NUM_COMPLETION_MECHANISMS * 2); l++) { for (k = 0; k < (NUM_COMPLETION_MECHANISMS * 2); k++) { if (rank == 0) { /* initialize all of the send buffers */ for (j = 0; j < NUM_SEND_TYPES; j++) { for (i = 0; i < BUF_SIZE; i++) { buf[2 * j * BUF_SIZE + i] = i; buf[((2 * j + 1) * BUF_SIZE) + i] = BUF_SIZE - 1 - i; } } } else if (rank == 1) { /* zero out all of the receive buffers */ bzero (buf, sizeof(int) * BUF_SIZE * 2 * NUM_SEND_TYPES); } MPI_Barrier(MPI_COMM_WORLD); if (rank == 0) { /* set up transient sends... */ send_t_number = 0; MPI_Isend (&buf[send_t_number * 2 * BUF_SIZE], BUF_SIZE, MPI_INT, 1, send_t_number * 2, comm, &aReq[send_t_number * 2]); MPI_Isend (&buf[(send_t_number * 2 + 1) * BUF_SIZE], BUF_SIZE, MPI_INT, 1, send_t_number * 2 + 1, comm, &aReq[send_t_number * 2 + 1]); send_t_number++; MPI_Ibsend (&buf[send_t_number * 2 * BUF_SIZE], BUF_SIZE, MPI_INT, 1, send_t_number * 2, comm, &aReq[send_t_number * 2]); MPI_Ibsend (&buf[(send_t_number * 2 + 1) * BUF_SIZE], BUF_SIZE, MPI_INT, 1, send_t_number * 2 + 1, comm, &aReq[send_t_number * 2 + 1]); send_t_number++; /* Barrier to ensure receives are posted for rsends... */ MPI_Barrier(MPI_COMM_WORLD); MPI_Irsend (&buf[send_t_number * 2 * BUF_SIZE], BUF_SIZE, MPI_INT, 1, send_t_number * 2, comm, &aReq[send_t_number * 2]); MPI_Irsend (&buf[(send_t_number * 2 + 1) * BUF_SIZE], BUF_SIZE, MPI_INT, 1, send_t_number * 2 + 1, comm, &aReq[send_t_number * 2 + 1]); send_t_number++; MPI_Issend (&buf[send_t_number * 2 * BUF_SIZE], BUF_SIZE, MPI_INT, 1, send_t_number * 2, comm, &aReq[send_t_number * 2]); MPI_Issend (&buf[(send_t_number * 2 + 1) * BUF_SIZE], BUF_SIZE, MPI_INT, 1, send_t_number * 2 + 1, comm, &aReq[send_t_number * 2 + 1]); /* just to be paranoid */ send_t_number++; assert (send_t_number == NUM_SEND_TYPES - NUM_PERSISTENT_SEND_TYPES); /* start the persistent sends... */ if (k % 2) { MPI_Startall (NUM_PERSISTENT_SEND_TYPES * 2, &aReq[2 * send_t_number]); } else { for (j = 0; j < NUM_PERSISTENT_SEND_TYPES * 2; j++) { MPI_Start (&aReq[2 * send_t_number + j]); } } /* complete the sends */ switch (k/2) { case 0: /* use MPI_Wait */ for (j = 0; j < NUM_SEND_TYPES * 2; j++) { MPI_Wait (&aReq[j], &aStatus[j]); } break; case 1: /* use MPI_Waitall */ MPI_Waitall (NUM_SEND_TYPES * 2, aReq, aStatus); break; case 2: /* use MPI_Waitany */ for (j = 0; j < NUM_SEND_TYPES * 2; j++) { MPI_Waitany (NUM_SEND_TYPES * 2, aReq, &index, aStatus); } break; case 3: /* use MPI_Waitsome */ total = 0; while (total < NUM_SEND_TYPES * 2) { MPI_Waitsome (NUM_SEND_TYPES * 2, aReq, &outcount, indices, aStatus); total += outcount; } break; case 4: /* use MPI_Test */ for (j = 0; j < NUM_SEND_TYPES * 2; j++) { flag = 0; while (!flag) { MPI_Test (&aReq[j], &flag, &aStatus[j]); } } break; case 5: /* use MPI_Testall */ flag = 0; while (!flag) { MPI_Testall (NUM_SEND_TYPES * 2, aReq, &flag, aStatus); } break; case 6: /* use MPI_Testany */ for (j = 0; j < NUM_SEND_TYPES * 2; j++) { flag = 0; while (!flag) { MPI_Testany (NUM_SEND_TYPES * 2, aReq, &index, &flag, aStatus); } } break; case 7: /* use MPI_Testsome */ total = 0; while (total < NUM_SEND_TYPES * 2) { outcount = 0; while (!outcount) { MPI_Testsome (NUM_SEND_TYPES * 2, aReq, &outcount, indices, aStatus); } total += outcount; } break; default: assert (0); break; } } else if (rank == 1) { /* start receives for all of the sends */ if (m == 0) { for (j = 0; j < NUM_SEND_TYPES * 2; j++) { MPI_Irecv (&buf[j * BUF_SIZE], BUF_SIZE + (j % 2) * SLOP, MPI_INT, 0, j, comm, &aReq[j]); } } else { /* start the persistent receives... */ if (l % 2) { MPI_Startall (NUM_SEND_TYPES * 2, aReq); } else { for (j = 0; j < NUM_SEND_TYPES * 2; j++) { MPI_Start (&aReq[j]); } } } /* Barrier to ensure receives are posted for rsends... */ MPI_Barrier(MPI_COMM_WORLD); /* complete all of the receives... */ switch (l/2) { case 0: /* use MPI_Wait */ for (j = 0; j < NUM_SEND_TYPES * 2; j++) { MPI_Wait (&aReq[j], &aStatus[j]); } break; case 1: /* use MPI_Waitall */ MPI_Waitall (NUM_SEND_TYPES * 2, aReq, aStatus); break; case 2: /* use MPI_Waitany */ for (j = 0; j < NUM_SEND_TYPES * 2; j++) { MPI_Waitany (NUM_SEND_TYPES * 2, aReq, &index, aStatus); } break; case 3: /* use MPI_Waitsome */ total = 0; while (total < NUM_SEND_TYPES * 2) { MPI_Waitsome (NUM_SEND_TYPES * 2, aReq, &outcount, indices, aStatus); total += outcount; } break; case 4: /* use MPI_Test */ for (j = 0; j < NUM_SEND_TYPES * 2; j++) { flag = 0; while (!flag) { MPI_Test (&aReq[j], &flag, &aStatus[j]); } } break; case 5: /* use MPI_Testall */ flag = 0; while (!flag) { MPI_Testall (NUM_SEND_TYPES * 2, aReq, &flag, aStatus); } break; case 6: /* use MPI_Testany */ for (j = 0; j < NUM_SEND_TYPES * 2; j++) { flag = 0; while (!flag) { MPI_Testany (NUM_SEND_TYPES * 2, aReq, &index, &flag, aStatus); } } break; case 7: /* use MPI_Testsome */ total = 0; while (total < NUM_SEND_TYPES * 2) { outcount = 0; while (!outcount) { MPI_Testsome (NUM_SEND_TYPES * 2, aReq, &outcount, indices, aStatus); } total += outcount; } break; default: assert (0); break; } } else { /* Barrier to ensure receives are posted for rsends... */ MPI_Barrier(MPI_COMM_WORLD); } } } } MPI_Barrier(MPI_COMM_WORLD); if (rank == 0) { /* free the persistent send requests */ for (i = 2* (NUM_SEND_TYPES - NUM_PERSISTENT_SEND_TYPES); i < 2 * NUM_SEND_TYPES; i++) { MPI_Request_free (&aReq[i]); } } else if (rank == 1) { /* free the persistent receive requests */ for (i = 0; i < 2 * NUM_SEND_TYPES; i++) { MPI_Request_free (&aReq[i]); } } MPI_Buffer_detach (bbuf, &at_size); assert (at_size == sizeof(int) * (BUF_SIZE + MPI_BSEND_OVERHEAD) * 2 * NUM_BSEND_TYPES); MPI_Finalize (); printf ("(%d) Finished normally\n", rank); }
void RTM_PSV(){ /* global variables */ /* ---------------- */ /* forward modelling */ extern int MYID, FDORDER, NX, NY, NT, L, READMOD, QUELLART, RUN_MULTIPLE_SHOTS, TIME_FILT; extern int LOG, SEISMO, N_STREAMER, FW, NXG, NYG, IENDX, IENDY, NTDTINV, IDXI, IDYI, NXNYI, INV_STF, DTINV; extern float FC_SPIKE_1, FC_SPIKE_2, FC, FC_START, TIME, DT; extern char LOG_FILE[STRING_SIZE], MFILE[STRING_SIZE]; extern FILE *FP; /* gravity modelling/inversion */ extern int GRAVITY, NZGRAV, NGRAVB, GRAV_TYPE, BACK_DENSITY; extern char GRAV_DATA_OUT[STRING_SIZE], GRAV_DATA_IN[STRING_SIZE], GRAV_STAT_POS[STRING_SIZE], DFILE[STRING_SIZE]; extern float LAM_GRAV, GAMMA_GRAV, LAM_GRAV_GRAD, L2_GRAV_IT1; /* full waveform inversion */ extern int GRAD_METHOD, NLBFGS, ITERMAX, IDX, IDY, INVMAT1, EPRECOND; extern int GRAD_FORM, POS[3], QUELLTYPB, MIN_ITER, MODEL_FILTER; extern float FC_END, PRO, C_vp, C_vs, C_rho; extern char MISFIT_LOG_FILE[STRING_SIZE], JACOBIAN[STRING_SIZE]; extern char *FILEINP1; /* local variables */ int ns, nseismograms=0, nt, nd, fdo3, j, i, iter, h, hin, iter_true, SHOTINC, s=0; int buffsize, ntr=0, ntr_loc=0, ntr_glob=0, nsrc=0, nsrc_loc=0, nsrc_glob=0, ishot, nshots=0, itestshot; float sum, eps_scale, opteps_vp, opteps_vs, opteps_rho, Vp_avg, Vs_avg, rho_avg, Vs_sum, Vp_sum, rho_sum; char *buff_addr, ext[10], *fileinp, jac[225], source_signal_file[STRING_SIZE]; double time1, time2, time7, time8, time_av_v_update=0.0, time_av_s_update=0.0, time_av_v_exchange=0.0, time_av_s_exchange=0.0, time_av_timestep=0.0; float L2sum, *L2t; float ** taper_coeff, * epst1, *hc=NULL; int * DTINV_help; MPI_Request *req_send, *req_rec; MPI_Status *send_statuses, *rec_statuses; /* Variables for step length calculation */ int step1, step3=0; float eps_true, tmp; /* Variables for the L-BFGS method */ float * rho_LBFGS, * alpha_LBFGS, * beta_LBFGS; float * y_LBFGS, * s_LBFGS, * q_LBFGS, * r_LBFGS; int NLBFGS_class, LBFGS_pointer, NLBFGS_vec; /* Variables for energy weighted gradient */ float ** Ws, **Wr, **We; /* parameters for FWI-workflow */ int stagemax=0, nstage; /* help variable for MIN_ITER */ int min_iter_help=0; /* parameters for gravity inversion */ char jac_grav[STRING_SIZE]; FILE *FP_stage, *FP_GRAV; if (MYID == 0){ time1=MPI_Wtime(); clock(); } /* open log-file (each PE is using different file) */ /* fp=stdout; */ sprintf(ext,".%i",MYID); strcat(LOG_FILE,ext); if ((MYID==0) && (LOG==1)) FP=stdout; else FP=fopen(LOG_FILE,"w"); fprintf(FP," This is the log-file generated by PE %d \n\n",MYID); /* ----------------------- */ /* define FD grid geometry */ /* ----------------------- */ /* domain decomposition */ initproc(); NT=iround(TIME/DT); /* number of timesteps */ /* output of parameters to log-file or stdout */ if (MYID==0) write_par(FP); /* NXG, NYG denote size of the entire (global) grid */ NXG=NX; NYG=NY; /* In the following, NX and NY denote size of the local grid ! */ NX = IENDX; NY = IENDY; NTDTINV=ceil((float)NT/(float)DTINV); /* round towards next higher integer value */ /* save every IDXI and IDYI spatial point during the forward modelling */ IDXI=1; IDYI=1; NXNYI=(NX/IDXI)*(NY/IDYI); SHOTINC=1; /* use only every DTINV time sample for the inversion */ DTINV_help=ivector(1,NT); /* Check if RTM workflow-file is defined (stdin) */ FP_stage=fopen(FILEINP1,"r"); if(FP_stage==NULL) { if (MYID == 0){ printf("\n==================================================================\n"); printf(" Cannot open Denise workflow input file %s \n",FILEINP1); printf("\n==================================================================\n\n"); err(" --- "); } } fclose(FP_stage); /* define data structures for PSV problem */ struct wavePSV; struct wavePSV_PML; struct matPSV; struct fwiPSV; struct mpiPSV; struct seisPSV; struct seisPSVfwi; struct acq; nd = FDORDER/2 + 1; fdo3 = 2*nd; buffsize=2.0*2.0*fdo3*(NX +NY)*sizeof(MPI_FLOAT); /* allocate buffer for buffering messages */ buff_addr=malloc(buffsize); if (!buff_addr) err("allocation failure for buffer for MPI_Bsend !"); MPI_Buffer_attach(buff_addr,buffsize); /* allocation for request and status arrays */ req_send=(MPI_Request *)malloc(REQUEST_COUNT*sizeof(MPI_Request)); req_rec=(MPI_Request *)malloc(REQUEST_COUNT*sizeof(MPI_Request)); send_statuses=(MPI_Status *)malloc(REQUEST_COUNT*sizeof(MPI_Status)); rec_statuses=(MPI_Status *)malloc(REQUEST_COUNT*sizeof(MPI_Status)); /* --------- add different modules here ------------------------ */ ns=NT; /* in a FWI one has to keep all samples of the forward modeled data at the receiver positions to calculate the adjoint sources and to do the backpropagation; look at function saveseis_glob.c to see that every NDT sample for the forward modeled wavefield is written to su files*/ if (SEISMO){ acq.recpos=receiver(FP, &ntr, ishot); acq.recswitch = ivector(1,ntr); acq.recpos_loc = splitrec(acq.recpos,&ntr_loc, ntr, acq.recswitch); ntr_glob=ntr; ntr=ntr_loc; if(N_STREAMER>0){ free_imatrix(acq.recpos,1,3,1,ntr_glob); if(ntr>0) free_imatrix(acq.recpos_loc,1,3,1,ntr); free_ivector(acq.recswitch,1,ntr_glob); } } if(N_STREAMER==0){ /* Memory for seismic data */ alloc_seisPSV(ntr,ns,&seisPSV); /* Memory for FWI seismic data */ alloc_seisPSVfwi(ntr,ntr_glob,ns,&seisPSVfwi); } /* Memory for full data seismograms */ alloc_seisPSVfull(&seisPSV,ntr_glob); /* estimate memory requirement of the variables in megabytes*/ switch (SEISMO){ case 1 : /* particle velocities only */ nseismograms=2; break; case 2 : /* pressure only */ nseismograms=1; break; case 3 : /* curl and div only */ nseismograms=2; break; case 4 : /* everything */ nseismograms=5; break; } /* calculate memory requirements for PSV forward problem */ mem_fwiPSV(nseismograms,ntr,ns,fdo3,nd,buffsize,ntr_glob); /* Define gradient formulation */ /* GRAD_FORM = 1 - stress-displacement gradients */ /* GRAD_FORM = 2 - stress-velocity gradients for decomposed impedance matrix */ GRAD_FORM = 1; /* allocate memory for PSV forward problem */ alloc_PSV(&wavePSV,&wavePSV_PML); /* calculate damping coefficients for CPMLs (PSV problem)*/ if(FW>0){PML_pro(wavePSV_PML.d_x, wavePSV_PML.K_x, wavePSV_PML.alpha_prime_x, wavePSV_PML.a_x, wavePSV_PML.b_x, wavePSV_PML.d_x_half, wavePSV_PML.K_x_half, wavePSV_PML.alpha_prime_x_half, wavePSV_PML.a_x_half, wavePSV_PML.b_x_half, wavePSV_PML.d_y, wavePSV_PML.K_y, wavePSV_PML.alpha_prime_y, wavePSV_PML.a_y, wavePSV_PML.b_y, wavePSV_PML.d_y_half, wavePSV_PML.K_y_half, wavePSV_PML.alpha_prime_y_half, wavePSV_PML.a_y_half, wavePSV_PML.b_y_half); } /* allocate memory for PSV material parameters */ alloc_matPSV(&matPSV); /* allocate memory for PSV FWI parameters */ alloc_fwiPSV(&fwiPSV); /* allocate memory for PSV MPI variables */ alloc_mpiPSV(&mpiPSV); taper_coeff= matrix(1,NY,1,NX); /* memory for source position definition */ acq.srcpos1=fmatrix(1,8,1,1); /* memory of L2 norm */ L2t = vector(1,4); epst1 = vector(1,3); fprintf(FP," ... memory allocation for PE %d was successfull.\n\n", MYID); /* Holberg coefficients for FD operators*/ hc = holbergcoeff(); MPI_Barrier(MPI_COMM_WORLD); /* Reading source positions from SOURCE_FILE */ acq.srcpos=sources(&nsrc); nsrc_glob=nsrc; /* create model grids */ if(L){ if (READMOD) readmod_visc_PSV(matPSV.prho,matPSV.ppi,matPSV.pu,matPSV.ptaus,matPSV.ptaup,matPSV.peta); else model(matPSV.prho,matPSV.ppi,matPSV.pu,matPSV.ptaus,matPSV.ptaup,matPSV.peta); } else{ if (READMOD) readmod_elastic_PSV(matPSV.prho,matPSV.ppi,matPSV.pu); else model_elastic(matPSV.prho,matPSV.ppi,matPSV.pu); } /* check if the FD run will be stable and free of numerical dispersion */ if(L){ checkfd_ssg_visc(FP,matPSV.prho,matPSV.ppi,matPSV.pu,matPSV.ptaus,matPSV.ptaup,matPSV.peta,hc); } else{ checkfd_ssg_elastic(FP,matPSV.prho,matPSV.ppi,matPSV.pu,hc); } SHOTINC=1; /* For RTM read only first line from FWI workflow file and set iter = 1 */ stagemax = 1; iter_true = 1; iter = 1; /* Begin of FWI-workflow */ for(nstage=1;nstage<=stagemax;nstage++){ /* read workflow input file *.inp */ FP_stage=fopen(FILEINP1,"r"); read_par_inv(FP_stage,nstage,stagemax); /*fclose(FP_stage);*/ if((EPRECOND==1)||(EPRECOND==3)){ Ws = matrix(1,NY,1,NX); /* total energy of the source wavefield */ Wr = matrix(1,NY,1,NX); /* total energy of the receiver wavefield */ We = matrix(1,NY,1,NX); /* total energy of source and receiver wavefield */ } FC=FC_END; if (MYID==0) { time2=MPI_Wtime(); fprintf(FP,"\n\n\n ------------------------------------------------------------------\n"); fprintf(FP,"\n\n\n Elastic Reverse Time Migration RTM \n"); fprintf(FP,"\n\n\n ------------------------------------------------------------------\n"); } /* For the calculation of the material parameters between gridpoints they have to be averaged. For this, values lying at 0 and NX+1, for example, are required on the local grid. These are now copied from the neighbouring grids */ if (L){ matcopy_PSV(matPSV.prho,matPSV.ppi,matPSV.pu,matPSV.ptaus,matPSV.ptaup); } else{ matcopy_elastic_PSV(matPSV.prho,matPSV.ppi,matPSV.pu); } MPI_Barrier(MPI_COMM_WORLD); av_mue(matPSV.pu,matPSV.puipjp,matPSV.prho); av_rho(matPSV.prho,matPSV.prip,matPSV.prjp); if (L) av_tau(matPSV.ptaus,matPSV.ptausipjp); /* Preparing memory variables for update_s (viscoelastic) */ if (L) prepare_update_s_visc_PSV(matPSV.etajm,matPSV.etaip,matPSV.peta,matPSV.fipjp,matPSV.pu,matPSV.puipjp,matPSV.ppi,matPSV.prho,matPSV.ptaus,matPSV.ptaup,matPSV.ptausipjp,matPSV.f,matPSV.g, matPSV.bip,matPSV.bjm,matPSV.cip,matPSV.cjm,matPSV.dip,matPSV.d,matPSV.e); /* ------------------------------------- */ /* calculate average material parameters */ /* ------------------------------------- */ Vp_avg = 0.0; Vs_avg = 0.0; rho_avg = 0.0; for (i=1;i<=NX;i=i+IDX){ for (j=1;j<=NY;j=j+IDY){ /* calculate average Vp, Vs */ Vp_avg+=matPSV.ppi[j][i]; Vs_avg+=matPSV.pu[j][i]; /* calculate average rho */ rho_avg+=matPSV.prho[j][i]; } } /* calculate average Vp, Vs and rho of all CPUs */ Vp_sum = 0.0; MPI_Allreduce(&Vp_avg,&Vp_sum,1,MPI_FLOAT,MPI_SUM,MPI_COMM_WORLD); Vp_avg=Vp_sum; Vs_sum = 0.0; MPI_Allreduce(&Vs_avg,&Vs_sum,1,MPI_FLOAT,MPI_SUM,MPI_COMM_WORLD); Vs_avg=Vs_sum; rho_sum = 0.0; MPI_Allreduce(&rho_avg,&rho_sum,1,MPI_FLOAT,MPI_SUM,MPI_COMM_WORLD); rho_avg=rho_sum; Vp_avg /=NXG*NYG; Vs_avg /=NXG*NYG; rho_avg /=NXG*NYG; if(MYID==0){ printf("Vp_avg = %.0f \t Vs_avg = %.0f \t rho_avg = %.0f \n ",Vp_avg,Vs_avg,rho_avg); } C_vp = Vp_avg; C_vs = Vs_avg; C_rho = rho_avg; /* ---------------------------------------------------------------------------------------------------- */ /* --------- Calculate RTM P- and S- wave image using the adjoint state method ------------------------ */ /* ---------------------------------------------------------------------------------------------------- */ L2sum = grad_obj_psv(&wavePSV, &wavePSV_PML, &matPSV, &fwiPSV, &mpiPSV, &seisPSV, &seisPSVfwi, &acq, hc, iter, nsrc, ns, ntr, ntr_glob, nsrc_glob, nsrc_loc, ntr_loc, nstage, We, Ws, Wr, taper_coeff, hin, DTINV_help, req_send, req_rec); /* Interpolate missing spatial gradient values in case IDXI > 1 || IDXY > 1 */ /* ------------------------------------------------------------------------ */ if((IDXI>1)||(IDYI>1)){ interpol(IDXI,IDYI,fwiPSV.waveconv,1); interpol(IDXI,IDYI,fwiPSV.waveconv_u,1); interpol(IDXI,IDYI,fwiPSV.waveconv_rho,1); } /* Preconditioning of gradients after shot summation */ precond_PSV(&fwiPSV,&acq,nsrc,ntr_glob,taper_coeff,FP_GRAV); /* Output of RTM results */ RTM_PSV_out(&fwiPSV); } /* End of RTM-workflow loop */ /* deallocate memory for PSV forward problem */ dealloc_PSV(&wavePSV,&wavePSV_PML); /* deallocation of memory */ free_matrix(fwiPSV.Vp0,-nd+1,NY+nd,-nd+1,NX+nd); free_matrix(fwiPSV.Vs0,-nd+1,NY+nd,-nd+1,NX+nd); free_matrix(fwiPSV.Rho0,-nd+1,NY+nd,-nd+1,NX+nd); free_matrix(matPSV.prho,-nd+1,NY+nd,-nd+1,NX+nd); free_matrix(fwiPSV.prho_old,-nd+1,NY+nd,-nd+1,NX+nd); free_matrix(matPSV.prip,-nd+1,NY+nd,-nd+1,NX+nd); free_matrix(matPSV.prjp,-nd+1,NY+nd,-nd+1,NX+nd); free_matrix(matPSV.ppi,-nd+1,NY+nd,-nd+1,NX+nd); free_matrix(fwiPSV.ppi_old,-nd+1,NY+nd,-nd+1,NX+nd); free_matrix(matPSV.pu,-nd+1,NY+nd,-nd+1,NX+nd); free_matrix(fwiPSV.pu_old,-nd+1,NY+nd,-nd+1,NX+nd); free_matrix(matPSV.puipjp,-nd+1,NY+nd,-nd+1,NX+nd); free_matrix(fwiPSV.waveconv,-nd+1,NY+nd,-nd+1,NX+nd); free_matrix(fwiPSV.waveconv_lam,-nd+1,NY+nd,-nd+1,NX+nd); free_matrix(fwiPSV.waveconv_shot,-nd+1,NY+nd,-nd+1,NX+nd); free_matrix(mpiPSV.bufferlef_to_rig,1,NY,1,fdo3); free_matrix(mpiPSV.bufferrig_to_lef,1,NY,1,fdo3); free_matrix(mpiPSV.buffertop_to_bot,1,NX,1,fdo3); free_matrix(mpiPSV.bufferbot_to_top,1,NX,1,fdo3); free_vector(hc,0,6); free_matrix(fwiPSV.gradg,-nd+1,NY+nd,-nd+1,NX+nd); free_matrix(fwiPSV.gradp,-nd+1,NY+nd,-nd+1,NX+nd); free_matrix(fwiPSV.gradg_rho,-nd+1,NY+nd,-nd+1,NX+nd); free_matrix(fwiPSV.gradp_rho,-nd+1,NY+nd,-nd+1,NX+nd); free_matrix(fwiPSV.waveconv_rho,-nd+1,NY+nd,-nd+1,NX+nd); free_matrix(fwiPSV.waveconv_rho_s,-nd+1,NY+nd,-nd+1,NX+nd); free_matrix(fwiPSV.waveconv_rho_shot,-nd+1,NY+nd,-nd+1,NX+nd); free_matrix(fwiPSV.gradg_u,-nd+1,NY+nd,-nd+1,NX+nd); free_matrix(fwiPSV.gradp_u,-nd+1,NY+nd,-nd+1,NX+nd); free_matrix(fwiPSV.waveconv_u,-nd+1,NY+nd,-nd+1,NX+nd); free_matrix(fwiPSV.waveconv_mu,-nd+1,NY+nd,-nd+1,NX+nd); free_matrix(fwiPSV.waveconv_u_shot,-nd+1,NY+nd,-nd+1,NX+nd); free_vector(fwiPSV.forward_prop_x,1,NY*NX*NT); free_vector(fwiPSV.forward_prop_y,1,NY*NX*NT); free_vector(fwiPSV.forward_prop_rho_x,1,NY*NX*NT); free_vector(fwiPSV.forward_prop_rho_y,1,NY*NX*NT); free_vector(fwiPSV.forward_prop_u,1,NY*NX*NT); if (nsrc_loc>0){ free_matrix(acq.signals,1,nsrc_loc,1,NT); free_matrix(acq.srcpos_loc,1,8,1,nsrc_loc); free_matrix(acq.srcpos_loc_back,1,6,1,nsrc_loc); } /* free memory for global source positions */ free_matrix(acq.srcpos,1,8,1,nsrc); /* free memory for source position definition */ free_matrix(acq.srcpos1,1,8,1,1); /* free memory for abort criterion */ free_vector(L2t,1,4); free_vector(epst1,1,3); if(N_STREAMER==0){ if (SEISMO) free_imatrix(acq.recpos,1,3,1,ntr_glob); if ((ntr>0) && (SEISMO)){ free_imatrix(acq.recpos_loc,1,3,1,ntr); acq.recpos_loc = NULL; switch (SEISMO){ case 1 : /* particle velocities only */ free_matrix(seisPSV.sectionvx,1,ntr,1,ns); free_matrix(seisPSV.sectionvy,1,ntr,1,ns); seisPSV.sectionvx=NULL; seisPSV.sectionvy=NULL; break; case 2 : /* pressure only */ free_matrix(seisPSV.sectionp,1,ntr,1,ns); break; case 3 : /* curl and div only */ free_matrix(seisPSV.sectioncurl,1,ntr,1,ns); free_matrix(seisPSV.sectiondiv,1,ntr,1,ns); break; case 4 : /* everything */ free_matrix(seisPSV.sectionvx,1,ntr,1,ns); free_matrix(seisPSV.sectionvy,1,ntr,1,ns); free_matrix(seisPSV.sectionp,1,ntr,1,ns); free_matrix(seisPSV.sectioncurl,1,ntr,1,ns); free_matrix(seisPSV.sectiondiv,1,ntr,1,ns); break; } } free_matrix(seisPSVfwi.sectionread,1,ntr_glob,1,ns); free_ivector(acq.recswitch,1,ntr); if((QUELLTYPB==1)||(QUELLTYPB==3)||(QUELLTYPB==5)||(QUELLTYPB==7)){ free_matrix(seisPSVfwi.sectionvxdata,1,ntr,1,ns); free_matrix(seisPSVfwi.sectionvxdiff,1,ntr,1,ns); free_matrix(seisPSVfwi.sectionvxdiffold,1,ntr,1,ns); } if((QUELLTYPB==1)||(QUELLTYPB==2)||(QUELLTYPB==6)||(QUELLTYPB==7)){ free_matrix(seisPSVfwi.sectionvydata,1,ntr,1,ns); free_matrix(seisPSVfwi.sectionvydiff,1,ntr,1,ns); free_matrix(seisPSVfwi.sectionvydiffold,1,ntr,1,ns); } if(QUELLTYPB>=4){ free_matrix(seisPSVfwi.sectionpdata,1,ntr,1,ns); free_matrix(seisPSVfwi.sectionpdiff,1,ntr,1,ns); free_matrix(seisPSVfwi.sectionpdiffold,1,ntr,1,ns); } } if(SEISMO){ free_matrix(seisPSV.fulldata,1,ntr_glob,1,NT); } if(SEISMO==1){ free_matrix(seisPSV.fulldata_vx,1,ntr_glob,1,NT); free_matrix(seisPSV.fulldata_vy,1,ntr_glob,1,NT); } if(SEISMO==2){ free_matrix(seisPSV.fulldata_p,1,ntr_glob,1,NT); } if(SEISMO==3){ free_matrix(seisPSV.fulldata_curl,1,ntr_glob,1,NT); free_matrix(seisPSV.fulldata_div,1,ntr_glob,1,NT); } if(SEISMO==4){ free_matrix(seisPSV.fulldata_vx,1,ntr_glob,1,NT); free_matrix(seisPSV.fulldata_vy,1,ntr_glob,1,NT); free_matrix(seisPSV.fulldata_p,1,ntr_glob,1,NT); free_matrix(seisPSV.fulldata_curl,1,ntr_glob,1,NT); free_matrix(seisPSV.fulldata_div,1,ntr_glob,1,NT); } free_ivector(DTINV_help,1,NT); /* free memory for viscoelastic modeling variables */ if (L) { free_matrix(matPSV.ptaus,-nd+1,NY+nd,-nd+1,NX+nd); free_matrix(matPSV.ptausipjp,-nd+1,NY+nd,-nd+1,NX+nd); free_matrix(matPSV.ptaup,-nd+1,NY+nd,-nd+1,NX+nd); free_vector(matPSV.peta,1,L); free_vector(matPSV.etaip,1,L); free_vector(matPSV.etajm,1,L); free_vector(matPSV.bip,1,L); free_vector(matPSV.bjm,1,L); free_vector(matPSV.cip,1,L); free_vector(matPSV.cjm,1,L); free_matrix(matPSV.f,-nd+1,NY+nd,-nd+1,NX+nd); free_matrix(matPSV.g,-nd+1,NY+nd,-nd+1,NX+nd); free_matrix(matPSV.fipjp,-nd+1,NY+nd,-nd+1,NX+nd); free_f3tensor(matPSV.dip,-nd+1,NY+nd,-nd+1,NX+nd,1,L); free_f3tensor(matPSV.d,-nd+1,NY+nd,-nd+1,NX+nd,1,L); free_f3tensor(matPSV.e,-nd+1,NY+nd,-nd+1,NX+nd,1,L); } /* de-allocate buffer for messages */ MPI_Buffer_detach(buff_addr,&buffsize); MPI_Barrier(MPI_COMM_WORLD); if (MYID==0){ fprintf(FP,"\n **Info from main (written by PE %d): \n",MYID); fprintf(FP," CPU time of program per PE: %li seconds.\n",clock()/CLOCKS_PER_SEC); time8=MPI_Wtime(); fprintf(FP," Total real time of program: %4.2f seconds.\n",time8-time1); time_av_v_update=time_av_v_update/(double)NT; time_av_s_update=time_av_s_update/(double)NT; time_av_v_exchange=time_av_v_exchange/(double)NT; time_av_s_exchange=time_av_s_exchange/(double)NT; time_av_timestep=time_av_timestep/(double)NT; /* fprintf(FP," Average times for \n"); fprintf(FP," velocity update: \t %5.3f seconds \n",time_av_v_update); fprintf(FP," stress update: \t %5.3f seconds \n",time_av_s_update); fprintf(FP," velocity exchange: \t %5.3f seconds \n",time_av_v_exchange); fprintf(FP," stress exchange: \t %5.3f seconds \n",time_av_s_exchange); fprintf(FP," timestep: \t %5.3f seconds \n",time_av_timestep);*/ } fclose(FP); }
std::pair<void *, int> buffer_detach() const { void *buff; int size; MPI_Buffer_detach(&buff, &size); return std::make_pair(buff, size); }
int main(int argc, char *argv[]) { int errs = 0; int rank, size, source, dest; unsigned char *buf, *bufp; int minsize = 2; int i, msgsize, bufsize, outsize; unsigned char *msg1, *msg2, *msg3; MPI_Comm comm; MPI_Status status1, status2, status3; MTest_Init(&argc, &argv); /* The following illustrates the use of the routines to * run through a selection of communicators and datatypes. * Use subsets of these for tests that do not involve combinations * of communicators, datatypes, and counts of datatypes */ msgsize = 128 * 1024; msg1 = (unsigned char *) malloc(3 * msgsize); msg2 = msg1 + msgsize; msg3 = msg2 + msgsize; while (MTestGetIntracommGeneral(&comm, minsize, 1)) { if (comm == MPI_COMM_NULL) continue; /* Determine the sender and receiver */ MPI_Comm_rank(comm, &rank); MPI_Comm_size(comm, &size); source = 0; dest = size - 1; /* Here is the test: The sender */ if (rank == source) { /* Get a bsend buffer. Make it large enough that the Bsend * internals will (probably) not use a eager send for the data. * Have three such messages */ bufsize = 3 * (MPI_BSEND_OVERHEAD + msgsize); buf = (unsigned char *) malloc(bufsize); if (!buf) { fprintf(stderr, "Unable to allocate a buffer of %d bytes\n", bufsize); MPI_Abort(MPI_COMM_WORLD, 1); } MPI_Buffer_attach(buf, bufsize); /* Initialize the buffers */ for (i = 0; i < msgsize; i++) { msg1[i] = 0xff ^ (i & 0xff); msg2[i] = 0xff ^ (3 * i & 0xff); msg3[i] = 0xff ^ (5 * i & 0xff); } /* Initiate the bsends */ MPI_Bsend(msg1, msgsize, MPI_UNSIGNED_CHAR, dest, 0, comm); MPI_Bsend(msg2, msgsize, MPI_UNSIGNED_CHAR, dest, 0, comm); MPI_Bsend(msg3, msgsize, MPI_UNSIGNED_CHAR, dest, 0, comm); /* Synchronize with our partner */ MPI_Sendrecv(NULL, 0, MPI_UNSIGNED_CHAR, dest, 10, NULL, 0, MPI_UNSIGNED_CHAR, dest, 10, comm, MPI_STATUS_IGNORE); /* Detach the buffers. There should be pending operations */ MPI_Buffer_detach(&bufp, &outsize); if (bufp != buf) { fprintf(stderr, "Wrong buffer returned\n"); errs++; } if (outsize != bufsize) { fprintf(stderr, "Wrong buffer size returned\n"); errs++; } } else if (rank == dest) { double tstart; /* Clear the message buffers */ for (i = 0; i < msgsize; i++) { msg1[i] = 0; msg2[i] = 0; msg3[i] = 0; } /* Wait for the synchronize */ MPI_Sendrecv(NULL, 0, MPI_UNSIGNED_CHAR, source, 10, NULL, 0, MPI_UNSIGNED_CHAR, source, 10, comm, MPI_STATUS_IGNORE); /* Wait 2 seconds */ tstart = MPI_Wtime(); while (MPI_Wtime() - tstart < 2.0); /* Now receive the messages */ MPI_Recv(msg1, msgsize, MPI_UNSIGNED_CHAR, source, 0, comm, &status1); MPI_Recv(msg2, msgsize, MPI_UNSIGNED_CHAR, source, 0, comm, &status2); MPI_Recv(msg3, msgsize, MPI_UNSIGNED_CHAR, source, 0, comm, &status3); /* Check that we have the correct data */ for (i = 0; i < msgsize; i++) { if (msg1[i] != (0xff ^ (i & 0xff))) { if (errs < 10) { fprintf(stderr, "msg1[%d] = %d\n", i, msg1[i]); } errs++; } if (msg2[i] != (0xff ^ (3 * i & 0xff))) { if (errs < 10) { fprintf(stderr, "msg2[%d] = %d\n", i, msg2[i]); } errs++; } if (msg3[i] != (0xff ^ (5 * i & 0xff))) { if (errs < 10) { fprintf(stderr, "msg2[%d] = %d\n", i, msg2[i]); } errs++; } } } MTestFreeComm(&comm); } free(msg1); MTest_Finalize(errs); MPI_Finalize(); return 0; }
static void test_pair (void) { int prev, next, count, tag, index, i, outcount, indices[2]; int rank, size, flag, ierr, reqcount; double send_buf[TEST_SIZE], recv_buf[TEST_SIZE]; double buffered_send_buf[TEST_SIZE * 2 + MPI_BSEND_OVERHEAD]; /* factor of two is based on guessing - only dynamic allocation would be safe */ void *buffer; MPI_Status statuses[2]; MPI_Status status; MPI_Request requests[2]; MPI_Comm dupcom, intercom; #ifdef V_T struct _VT_FuncFrameHandle { char *name; int func; int frame; }; typedef struct _VT_FuncFrameHandle VT_FuncFrameHandle_t; VT_FuncFrameHandle_t normal_sends, buffered_sends, buffered_persistent_sends, ready_sends, sync_sends, nblock_sends, nblock_rsends, nblock_ssends, pers_sends, pers_rsends, pers_ssends, sendrecv, sendrecv_repl, intercomm; int classid; VT_classdef( "Application:test_pair", &classid ); #define VT_REGION_DEF( _name, _nameframe, _class ) \ (_nameframe).name=_name; \ VT_funcdef( (_nameframe).name, _class, &((_nameframe).func) ); #define VT_BEGIN_REGION( _nameframe ) \ LOCDEF(); \ VT_begin( (_nameframe).func ) #define VT_END_REGION( _nameframe ) \ LOCDEF(); VT_end( (_nameframe).func ) #else #define VT_REGION_DEF( _name, _nameframe, _class ) #define VT_BEGIN_REGION( _nameframe ) #define VT_END_REGION( _nameframe ) #endif ierr = MPI_Comm_rank(MPI_COMM_WORLD, &rank); ierr = MPI_Comm_size(MPI_COMM_WORLD, &size); if ( size < 2 ) { if ( rank == 0 ) { printf("Program needs to be run on at least 2 processes.\n"); } ierr = MPI_Abort( MPI_COMM_WORLD, 66 ); } ierr = MPI_Comm_dup(MPI_COMM_WORLD, &dupcom); if ( rank >= 2 ) { /* printf( "%d Calling finalize.\n", rank ); */ ierr = MPI_Finalize( ); exit(0); } next = rank + 1; if (next >= 2) next = 0; prev = rank - 1; if (prev < 0) prev = 1; VT_REGION_DEF( "Normal_Sends", normal_sends, classid ); VT_REGION_DEF( "Buffered_Sends", buffered_sends, classid ); VT_REGION_DEF( "Buffered_Persistent_Sends", buffered_persistent_sends, classid ); VT_REGION_DEF( "Ready_Sends", ready_sends, classid ); VT_REGION_DEF( "Sync_Sends", sync_sends, classid ); VT_REGION_DEF( "nblock_Sends", nblock_sends, classid ); VT_REGION_DEF( "nblock_RSends", nblock_rsends, classid ); VT_REGION_DEF( "nblock_SSends", nblock_ssends, classid ); VT_REGION_DEF( "Pers_Sends", pers_sends, classid ); VT_REGION_DEF( "Pers_RSends", pers_rsends, classid ); VT_REGION_DEF( "Pers_SSends", pers_ssends, classid ); VT_REGION_DEF( "SendRecv", sendrecv, classid ); VT_REGION_DEF( "SendRevc_Repl", sendrecv_repl, classid ); VT_REGION_DEF( "InterComm", intercomm, classid ); /* * Normal sends */ VT_BEGIN_REGION( normal_sends ); if (rank == 0) printf ("Send\n"); tag = 0x100; count = TEST_SIZE / 5; clear_test_data(recv_buf,TEST_SIZE); if (rank == 0) { init_test_data(send_buf,TEST_SIZE,0); LOCDEF(); MPI_Send(send_buf, count, MPI_DOUBLE, next, tag, MPI_COMM_WORLD); MPI_Recv(recv_buf, TEST_SIZE, MPI_DOUBLE, MPI_ANY_SOURCE, MPI_ANY_TAG, MPI_COMM_WORLD, &status); msg_check(recv_buf, prev, tag, count, &status, TEST_SIZE, "send and recv"); } else { LOCDEF(); MPI_Recv(recv_buf, TEST_SIZE, MPI_DOUBLE,MPI_ANY_SOURCE, MPI_ANY_TAG, MPI_COMM_WORLD, &status); msg_check( recv_buf, prev, tag, count, &status, TEST_SIZE,"send and recv"); init_test_data(recv_buf,TEST_SIZE,1); MPI_Send(recv_buf, count, MPI_DOUBLE, next, tag, MPI_COMM_WORLD); } VT_END_REGION( normal_sends ); /* * Buffered sends */ VT_BEGIN_REGION( buffered_sends ); if (rank == 0) printf ("Buffered Send\n"); tag = 138; count = TEST_SIZE / 5; clear_test_data(recv_buf,TEST_SIZE); if (rank == 0) { init_test_data(send_buf,TEST_SIZE,0); LOCDEF(); MPI_Buffer_attach(buffered_send_buf, sizeof(buffered_send_buf)); MPI_Bsend(send_buf, count, MPI_DOUBLE, next, tag, MPI_COMM_WORLD); MPI_Buffer_detach(&buffer, &size); if(buffer != buffered_send_buf || size != sizeof(buffered_send_buf)) { printf ("[%d] Unexpected buffer returned by MPI_Buffer_detach(): %p/%d != %p/%d\n", rank, buffer, size, buffered_send_buf, (int)sizeof(buffered_send_buf)); MPI_Abort(MPI_COMM_WORLD, 201); } MPI_Recv(recv_buf, TEST_SIZE, MPI_DOUBLE, MPI_ANY_SOURCE, MPI_ANY_TAG, MPI_COMM_WORLD, &status); msg_check(recv_buf, prev, tag, count, &status, TEST_SIZE, "send and recv"); } else { LOCDEF(); MPI_Recv(recv_buf, TEST_SIZE, MPI_DOUBLE,MPI_ANY_SOURCE, MPI_ANY_TAG, MPI_COMM_WORLD, &status); msg_check( recv_buf, prev, tag, count, &status, TEST_SIZE,"send and recv"); init_test_data(recv_buf,TEST_SIZE,1); MPI_Send(recv_buf, count, MPI_DOUBLE, next, tag, MPI_COMM_WORLD); } VT_END_REGION( buffered_sends ); /* * Buffered sends */ VT_BEGIN_REGION( buffered_persistent_sends ); if (rank == 0) printf ("Buffered Persistent Send\n"); tag = 238; count = TEST_SIZE / 5; clear_test_data(recv_buf,TEST_SIZE); if (rank == 0) { init_test_data(send_buf,TEST_SIZE,0); LOCDEF(); MPI_Buffer_attach(buffered_send_buf, sizeof(buffered_send_buf)); MPI_Bsend_init(send_buf, count, MPI_DOUBLE, next, tag, MPI_COMM_WORLD, requests); MPI_Start(requests); MPI_Wait(requests, statuses); MPI_Request_free(requests); MPI_Buffer_detach(&buffer, &size); if(buffer != buffered_send_buf || size != sizeof(buffered_send_buf)) { printf ("[%d] Unexpected buffer returned by MPI_Buffer_detach(): %p/%d != %p/%d\n", rank, buffer, size, buffered_send_buf, (int)sizeof(buffered_send_buf)); MPI_Abort(MPI_COMM_WORLD, 201); } MPI_Recv(recv_buf, TEST_SIZE, MPI_DOUBLE, MPI_ANY_SOURCE, MPI_ANY_TAG, MPI_COMM_WORLD, &status); msg_check(recv_buf, prev, tag, count, &status, TEST_SIZE, "send and recv"); } else { LOCDEF(); MPI_Recv(recv_buf, TEST_SIZE, MPI_DOUBLE,MPI_ANY_SOURCE, MPI_ANY_TAG, MPI_COMM_WORLD, &status); msg_check( recv_buf, prev, tag, count, &status, TEST_SIZE,"send and recv"); init_test_data(recv_buf,TEST_SIZE,1); MPI_Send(recv_buf, count, MPI_DOUBLE, next, tag, MPI_COMM_WORLD); } VT_END_REGION( buffered_persistent_sends ); /* * Ready sends. Note that we must insure that the receive is posted * before the rsend; this requires using Irecv. */ VT_BEGIN_REGION( ready_sends ); if (rank == 0) printf ("Rsend\n"); tag = 1456; count = TEST_SIZE / 3; clear_test_data(recv_buf,TEST_SIZE); if (rank == 0) { init_test_data(send_buf,TEST_SIZE,0); MPI_Recv(MPI_BOTTOM, 0, MPI_INT, next, tag, MPI_COMM_WORLD, &status); MPI_Rsend(send_buf, count, MPI_DOUBLE, next, tag, MPI_COMM_WORLD); MPI_Probe(MPI_ANY_SOURCE, tag, MPI_COMM_WORLD, &status); if (status.MPI_SOURCE != prev) printf ("Incorrect src, expected %d, got %d\n",prev, status.MPI_SOURCE); if (status.MPI_TAG != tag) printf ("Incorrect tag, expected %d, got %d\n",tag, status.MPI_TAG); MPI_Get_count(&status, MPI_DOUBLE, &i); if (i != count) printf ("Incorrect count, expected %d, got %d\n",count,i); MPI_Recv(recv_buf, TEST_SIZE, MPI_DOUBLE, MPI_ANY_SOURCE, MPI_ANY_TAG, MPI_COMM_WORLD, &status); msg_check( recv_buf, prev, tag, count, &status, TEST_SIZE, "rsend and recv"); } else { MPI_Irecv(recv_buf, TEST_SIZE, MPI_DOUBLE, MPI_ANY_SOURCE, MPI_ANY_TAG, MPI_COMM_WORLD, requests); MPI_Send( MPI_BOTTOM, 0, MPI_INT, next, tag, MPI_COMM_WORLD); MPI_Wait(requests, &status); msg_check( recv_buf, prev, tag, count, &status, TEST_SIZE, "rsend and recv"); init_test_data(recv_buf,TEST_SIZE,1); MPI_Send(recv_buf, count, MPI_DOUBLE, next, tag, MPI_COMM_WORLD); } VT_END_REGION( ready_sends ); /* * Synchronous sends */ VT_BEGIN_REGION( sync_sends ); if (rank == 0) printf ("Ssend\n"); tag = 1789; count = TEST_SIZE / 3; clear_test_data(recv_buf,TEST_SIZE); if (rank == 0) { init_test_data(send_buf,TEST_SIZE,0); MPI_Iprobe(MPI_ANY_SOURCE, tag, MPI_COMM_WORLD, &flag, &status); if (flag) printf ("Iprobe succeeded! source %d, tag %d\n",status.MPI_SOURCE, status.MPI_TAG); MPI_Ssend(send_buf, count, MPI_DOUBLE, next, tag, MPI_COMM_WORLD); while (!flag) MPI_Iprobe(MPI_ANY_SOURCE, tag, MPI_COMM_WORLD, &flag, &status); if (status.MPI_SOURCE != prev) printf ("Incorrect src, expected %d, got %d\n",prev, status.MPI_SOURCE); if (status.MPI_TAG != tag) printf ("Incorrect tag, expected %d, got %d\n",tag, status.MPI_TAG); MPI_Get_count(&status, MPI_DOUBLE, &i); if (i != count) printf ("Incorrect count, expected %d, got %d\n",count,i); MPI_Recv(recv_buf, TEST_SIZE, MPI_DOUBLE, MPI_ANY_SOURCE, MPI_ANY_TAG, MPI_COMM_WORLD, &status); msg_check( recv_buf, prev, tag, count, &status, TEST_SIZE, "ssend and recv"); } else { MPI_Recv(recv_buf, TEST_SIZE, MPI_DOUBLE, MPI_ANY_SOURCE, MPI_ANY_TAG, MPI_COMM_WORLD, &status); msg_check( recv_buf, prev, tag, count, &status, TEST_SIZE, "ssend and recv"); init_test_data(recv_buf,TEST_SIZE,1); MPI_Ssend(recv_buf, count, MPI_DOUBLE, next, tag, MPI_COMM_WORLD); } VT_END_REGION( sync_sends ); /* * Nonblocking normal sends */ VT_BEGIN_REGION( nblock_sends ); if (rank == 0) printf ("Isend\n"); tag = 2123; count = TEST_SIZE / 5; clear_test_data(recv_buf,TEST_SIZE); if (rank == 0) { MPI_Irecv(recv_buf, TEST_SIZE, MPI_DOUBLE,MPI_ANY_SOURCE, MPI_ANY_TAG, MPI_COMM_WORLD, requests); init_test_data(send_buf,TEST_SIZE,0); MPI_Isend(send_buf, count, MPI_DOUBLE, next, tag, MPI_COMM_WORLD, (requests+1)); MPI_Waitall(2, requests, statuses); rq_check( requests, 2, "isend and irecv" ); msg_check(recv_buf,prev,tag,count,statuses, TEST_SIZE,"isend and irecv"); } else { MPI_Recv(recv_buf, TEST_SIZE, MPI_DOUBLE,MPI_ANY_SOURCE, MPI_ANY_TAG, MPI_COMM_WORLD, &status); msg_check(recv_buf,prev,tag,count,&status, TEST_SIZE,"isend and irecv"); init_test_data(recv_buf,TEST_SIZE,1); MPI_Isend(recv_buf, count, MPI_DOUBLE, next, tag,MPI_COMM_WORLD, (requests)); MPI_Wait((requests), &status); rq_check(requests, 1, "isend (and recv)"); } VT_END_REGION( nblock_sends ); /* * Nonblocking ready sends */ VT_BEGIN_REGION( nblock_rsends ); if (rank == 0) printf ("Irsend\n"); tag = 2456; count = TEST_SIZE / 3; clear_test_data(recv_buf,TEST_SIZE); if (rank == 0) { MPI_Irecv(recv_buf, TEST_SIZE, MPI_DOUBLE, MPI_ANY_SOURCE, MPI_ANY_TAG, MPI_COMM_WORLD, requests); init_test_data(send_buf,TEST_SIZE,0); MPI_Sendrecv( MPI_BOTTOM, 0, MPI_INT, next, 0, MPI_BOTTOM, 0, MPI_INT, next, 0, dupcom, &status); MPI_Irsend(send_buf, count, MPI_DOUBLE, next, tag, MPI_COMM_WORLD, (requests+1)); reqcount = 0; while (reqcount != 2) { MPI_Waitany( 2, requests, &index, statuses); if( index == 0 ) { memcpy( &status, statuses, sizeof(status) ); } reqcount++; } rq_check( requests, 1, "irsend and irecv"); msg_check(recv_buf,prev,tag,count,&status, TEST_SIZE,"irsend and irecv"); } else { MPI_Irecv(recv_buf, TEST_SIZE, MPI_DOUBLE, MPI_ANY_SOURCE, MPI_ANY_TAG, MPI_COMM_WORLD, requests); MPI_Sendrecv( MPI_BOTTOM, 0, MPI_INT, next, 0, MPI_BOTTOM, 0, MPI_INT, next, 0, dupcom, &status); flag = 0; while (!flag) MPI_Test(requests, &flag, &status); rq_check( requests, 1, "irsend and irecv (test)"); msg_check( recv_buf, prev, tag, count, &status, TEST_SIZE, "irsend and irecv"); init_test_data(recv_buf,TEST_SIZE,1); MPI_Irsend(recv_buf, count, MPI_DOUBLE, next, tag, MPI_COMM_WORLD, requests); MPI_Waitall(1, requests, statuses); rq_check( requests, 1, "irsend and irecv"); } VT_END_REGION( nblock_rsends ); /* * Nonblocking synchronous sends */ VT_BEGIN_REGION( nblock_ssends ); if (rank == 0) printf ("Issend\n"); tag = 2789; count = TEST_SIZE / 3; clear_test_data(recv_buf,TEST_SIZE); if (rank == 0) { MPI_Irecv(recv_buf, TEST_SIZE, MPI_DOUBLE, MPI_ANY_SOURCE, MPI_ANY_TAG, MPI_COMM_WORLD, requests ); init_test_data(send_buf,TEST_SIZE,0); MPI_Issend(send_buf, count, MPI_DOUBLE, next, tag, MPI_COMM_WORLD, (requests+1)); flag = 0; while (!flag) MPI_Testall(2, requests, &flag, statuses); rq_check( requests, 2, "issend and irecv (testall)"); msg_check( recv_buf, prev, tag, count, statuses, TEST_SIZE, "issend and recv"); } else { MPI_Recv(recv_buf, TEST_SIZE, MPI_DOUBLE, MPI_ANY_SOURCE, MPI_ANY_TAG, MPI_COMM_WORLD, &status); msg_check( recv_buf, prev, tag, count, &status, TEST_SIZE, "issend and recv"); init_test_data(recv_buf,TEST_SIZE,1); MPI_Issend(recv_buf, count, MPI_DOUBLE, next, tag, MPI_COMM_WORLD,requests); flag = 0; while (!flag) MPI_Testany(1, requests, &index, &flag, statuses); rq_check( requests, 1, "issend and recv (testany)"); } VT_END_REGION( nblock_ssends ); /* * Persistent normal sends */ VT_BEGIN_REGION( pers_sends ); if (rank == 0) printf ("Send_init\n"); tag = 3123; count = TEST_SIZE / 5; clear_test_data(recv_buf,TEST_SIZE); MPI_Send_init(send_buf, count, MPI_DOUBLE, next, tag, MPI_COMM_WORLD, requests); MPI_Recv_init(recv_buf, TEST_SIZE, MPI_DOUBLE,MPI_ANY_SOURCE, MPI_ANY_TAG, MPI_COMM_WORLD, (requests+1)); if (rank == 0) { init_test_data(send_buf,TEST_SIZE,0); MPI_Startall(2, requests); MPI_Waitall(2, requests, statuses); msg_check( recv_buf, prev, tag, count, (statuses+1), TEST_SIZE, "persistent send/recv"); } else { MPI_Start((requests+1)); MPI_Wait((requests+1), &status); msg_check( recv_buf, prev, tag, count, &status, TEST_SIZE, "persistent send/recv"); init_test_data(send_buf,TEST_SIZE,1); MPI_Start(requests); MPI_Wait(requests, &status); } MPI_Request_free(requests); MPI_Request_free((requests+1)); VT_END_REGION( pers_sends ); /* * Persistent ready sends */ VT_BEGIN_REGION( pers_rsends ); if (rank == 0) printf ("Rsend_init\n"); tag = 3456; count = TEST_SIZE / 3; clear_test_data(recv_buf,TEST_SIZE); MPI_Rsend_init(send_buf, count, MPI_DOUBLE, next, tag, MPI_COMM_WORLD, requests); MPI_Recv_init(recv_buf, TEST_SIZE, MPI_DOUBLE, MPI_ANY_SOURCE, MPI_ANY_TAG, MPI_COMM_WORLD, (requests+1)); if (rank == 0) { init_test_data(send_buf,TEST_SIZE,0); MPI_Barrier( MPI_COMM_WORLD ); MPI_Startall(2, requests); reqcount = 0; while (reqcount != 2) { MPI_Waitsome(2, requests, &outcount, indices, statuses); for (i=0; i<outcount; i++) { if (indices[i] == 1) { msg_check( recv_buf, prev, tag, count, (statuses+i), TEST_SIZE, "waitsome"); } reqcount++; } } } else { MPI_Start((requests+1)); MPI_Barrier( MPI_COMM_WORLD ); flag = 0; while (!flag) MPI_Test((requests+1), &flag, &status); msg_check( recv_buf, prev, tag, count, &status, TEST_SIZE, "test"); init_test_data(send_buf,TEST_SIZE,1); MPI_Start(requests); MPI_Wait(requests, &status); } MPI_Request_free(requests); MPI_Request_free((requests+1)); VT_END_REGION( pers_rsends ); /* * Persistent synchronous sends */ VT_BEGIN_REGION( pers_ssends ); if (rank == 0) printf ("Ssend_init\n"); tag = 3789; count = TEST_SIZE / 3; clear_test_data(recv_buf,TEST_SIZE); MPI_Ssend_init(send_buf, count, MPI_DOUBLE, next, tag, MPI_COMM_WORLD, (requests+1)); MPI_Recv_init(recv_buf, TEST_SIZE, MPI_DOUBLE, MPI_ANY_SOURCE, MPI_ANY_TAG, MPI_COMM_WORLD, requests); if (rank == 0) { init_test_data(send_buf,TEST_SIZE,0); MPI_Startall(2, requests); reqcount = 0; while (reqcount != 2) { MPI_Testsome(2, requests, &outcount, indices, statuses); for (i=0; i<outcount; i++) { if (indices[i] == 0) { msg_check( recv_buf, prev, tag, count, (statuses+i), TEST_SIZE, "testsome"); } reqcount++; } } } else { MPI_Start(requests); flag = 0; while (!flag) MPI_Testany(1, requests, &index, &flag, statuses); msg_check( recv_buf, prev, tag, count, statuses, TEST_SIZE, "testany" ); init_test_data(send_buf,TEST_SIZE,1); MPI_Start((requests+1)); MPI_Wait((requests+1), &status); } MPI_Request_free(requests); MPI_Request_free((requests+1)); VT_END_REGION( pers_ssends ); /* * Send/receive. */ VT_BEGIN_REGION( sendrecv ); if (rank == 0) printf ("Sendrecv\n"); tag = 4123; count = TEST_SIZE / 5; clear_test_data(recv_buf,TEST_SIZE); if (rank == 0) { init_test_data(send_buf,TEST_SIZE,0); MPI_Sendrecv(send_buf, count, MPI_DOUBLE, next, tag, recv_buf, count, MPI_DOUBLE, prev, tag, MPI_COMM_WORLD, &status ); msg_check( recv_buf, prev, tag, count, &status, TEST_SIZE, "sendrecv"); } else { MPI_Recv(recv_buf, TEST_SIZE, MPI_DOUBLE, MPI_ANY_SOURCE, MPI_ANY_TAG, MPI_COMM_WORLD, &status); msg_check( recv_buf, prev, tag, count, &status, TEST_SIZE, "recv/send"); init_test_data(recv_buf,TEST_SIZE,1); MPI_Send(recv_buf, count, MPI_DOUBLE, next, tag, MPI_COMM_WORLD); } VT_END_REGION( sendrecv ); #ifdef V_T VT_flush(); #endif /* * Send/receive replace. */ VT_BEGIN_REGION( sendrecv_repl ); if (rank == 0) printf ("Sendrecv_replace\n"); tag = 4456; count = TEST_SIZE / 3; if (rank == 0) { init_test_data(recv_buf, TEST_SIZE,0); for (i=count; i< TEST_SIZE; i++) recv_buf[i] = 0.0; MPI_Sendrecv_replace(recv_buf, count, MPI_DOUBLE, next, tag, prev, tag, MPI_COMM_WORLD, &status); msg_check( recv_buf, prev, tag, count, &status, TEST_SIZE, "sendrecvreplace"); } else { clear_test_data(recv_buf,TEST_SIZE); MPI_Recv(recv_buf, TEST_SIZE, MPI_DOUBLE, MPI_ANY_SOURCE, MPI_ANY_TAG, MPI_COMM_WORLD, &status); msg_check( recv_buf, prev, tag, count, &status, TEST_SIZE, "recv/send for replace"); init_test_data(recv_buf,TEST_SIZE,1); MPI_Send(recv_buf, count, MPI_DOUBLE, next, tag, MPI_COMM_WORLD); } VT_END_REGION( sendrecv_repl ); /* * Send/Receive via inter-communicator */ VT_BEGIN_REGION( intercomm ); MPI_Intercomm_create(MPI_COMM_SELF, 0, MPI_COMM_WORLD, next, 1, &intercom); if (rank == 0) printf ("Send via inter-communicator\n"); tag = 4018; count = TEST_SIZE / 5; clear_test_data(recv_buf,TEST_SIZE); if (rank == 0) { init_test_data(send_buf,TEST_SIZE,0); LOCDEF(); MPI_Send(send_buf, count, MPI_DOUBLE, 0, tag, intercom); MPI_Recv(recv_buf, TEST_SIZE, MPI_DOUBLE, MPI_ANY_SOURCE, MPI_ANY_TAG, intercom, &status); msg_check(recv_buf, 0, tag, count, &status, TEST_SIZE, "send and recv via inter-communicator"); } else if (rank == 1) { LOCDEF(); MPI_Recv(recv_buf, TEST_SIZE, MPI_DOUBLE,MPI_ANY_SOURCE, MPI_ANY_TAG, intercom, &status); msg_check( recv_buf, 0, tag, count, &status, TEST_SIZE,"send and recv via inter-communicator"); init_test_data(recv_buf,TEST_SIZE,0); MPI_Send(recv_buf, count, MPI_DOUBLE, 0, tag, intercom); } VT_END_REGION( normal_sends ); MPI_Comm_free(&intercom); MPI_Comm_free(&dupcom); }
void main(int argc,char * argv[]) { const int BUFSIZE=MPI_BSEND_OVERHEAD+4; unsigned char buf[BUFSIZE]; int rank,ierr,ibufsize,rbuf; struct MPI_Status status; ierr=MPI_Init(&argc,&argv); ierr=MPI_Comm_rank(MPI_COMM_WORLD, &rank); if (rank%2 == 0) { if (rank != 0) { ierr=MPI_Buffer_attach(buf,BUFSIZE); ierr=MPI_Bsend(&rank,1,MPI_INT,rank+1,5,MPI_COMM_WORLD); // sending variable rank ierr=MPI_Buffer_detach(&buf, &BUFSIZE); } } else { if (rank != 1) { ierr=MPI_Recv(&rbuf,1,MPI_INT,rank-1,5,MPI_COMM_WORLD,&status); printf("Process %i received %i from process %i\n",rank, rbuf, status.MPI_SOURCE); } } ierr=MPI_Finalize(); }