void relax(double *phi, double *b, double *tmp, param_t p) { int i, x, y; // A little trick to index phi as expected. double* phi_s = phi + p.L; // Prepare for async send/recv MPI_Request request[4]; int requests; MPI_Status status[4]; for(i=0; i<p.niter; i++) { requests = 0; // Send the higher-memory component to the next rank. MPI_Isend(phi_s + p.L*(p.y-1), p.L, MPI_DOUBLE, (p.my_rank+1)%p.world_size, 1, MPI_COMM_WORLD, request + requests++); MPI_Irecv(phi_s - p.L, p.L, MPI_DOUBLE, (p.my_rank+p.world_size-1)%p.world_size, 1, MPI_COMM_WORLD, request + requests++); // Send the lower-memory component to the previous rank. MPI_Isend(phi_s, p.L, MPI_DOUBLE, (p.my_rank+p.world_size-1)%p.world_size, 0, MPI_COMM_WORLD, request + requests++); MPI_Irecv(phi_s + p.L*p.y, p.L, MPI_DOUBLE, (p.my_rank+1)%p.world_size, 0, MPI_COMM_WORLD, request + requests++); // Do some other work while we wait! // Update everything that doesn't depend on buffers. for(x = 0; x < p.L; x++) { for(y = 1; y < p.y-1; y++) { tmp[x + y*p.L] = (1 - p.dt)* phi_s[x + y*p.L] + p.dt* p.scale* (phi_s[(x+1)%p.L + y*p.L] + phi_s[(x-1+p.L)%p.L + y*p.L] + phi_s[x + ((y+1)%p.L)*p.L] + phi_s[x + ((y-1+p.L)%p.L)*p.L]) + p.dt*p.scale* b[x + y*p.L]; } } // Wait, if sync hasn't finished. MPI_Waitall ( requests, request, status ); // Update the other cells. for(x = 0; x < p.L; x++) { y = 0; tmp[x + y*p.L] = (1 - p.dt)* phi_s[x + y*p.L] + p.dt* p.scale* (phi_s[(x+1)%p.L + y*p.L] + phi_s[(x-1+p.L)%p.L + y*p.L] + phi_s[x + (y+1)*p.L] + phi_s[x + (y-1)*p.L]) + p.dt*p.scale* b[x + y*p.L]; y = p.y-1; tmp[x + y*p.L] = (1 - p.dt)* phi_s[x + y*p.L] + p.dt* p.scale* (phi_s[(x+1)%p.L + y*p.L] + phi_s[(x-1+p.L)%p.L + y*p.L] + phi_s[x + (y+1)*p.L] + phi_s[x + (y-1)*p.L]) + p.dt*p.scale* b[x + y*p.L]; } for(x = 0; x < p.L; x++) { for(y = 0; y < p.y; y++) { phi_s[x + y*p.L] = tmp[x + y*p.L]; } } } MPI_Barrier(MPI_COMM_WORLD); return; }
int Stg_MPI_Isend( char* file, int line, void *buf, int count, MPI_Datatype datatype, int dest, int tag, MPI_Comm comm, MPI_Request *request ) { Stream* stream = Journal_Register( Info_Type, "mpi" ); Journal_Printf( stream, "%s %d, rank %d MPI_Isend: tag = %d, count = %d, datatype = %d, dest = %d\n", file, line, Stg_Messaging_GetRank( comm ), tag, count, datatype, dest ); return MPI_Isend( buf, count, datatype, dest, tag, comm, request ); }
void exchsolution_gmrfData_1(unsigned int slot) { for (int fragmentIdx = 0; fragmentIdx < 1; ++fragmentIdx) { if (isValidForSubdomain[0]) { if ((!neighbor_isValid[0][0])) { { double xPos; double yPos; /* Statements in this Scop: S493, S492, S494 */ { { { double* fieldData_Solution_GMRF_1_p1 = (&fieldData_Solution_GMRF[1][0]); int i1 = 1; for (; (i1<=2); i1 += 2) { fieldData_Solution_GMRF_1_p1[((i1*6)+2)] = 0.000000e+00; fieldData_Solution_GMRF_1_p1[((i1*6)+8)] = 0.000000e+00; } for (; (i1<=3); i1 += 1) { fieldData_Solution_GMRF_1_p1[((i1*6)+2)] = 0.000000e+00; } } { int i1 = 1; for (; (i1<(2&(~1))); i1 += 1) { xPos = posBegin[0]; } __m128d vec1 = _mm_set1_pd(xPos); for (; (i1<1); i1 += 4) { /* xPos = posBegin[0]; */ __m128d vec0 = _mm_load1_pd((&posBegin[0])); __m128d vec0_2 = _mm_load1_pd((&posBegin[0])); vec1 = vec0; vec1 = vec0_2; } for (; (i1<4); i1 += 1) { xPos = posBegin[0]; } } } { int i1 = 1; for (; (i1<(2&(~1))); i1 += 1) { yPos = ((((i1-1)/2.000000e+00)*(posEnd[1]-posBegin[1]))+posBegin[1]); } __m128d vec1 = _mm_set1_pd(1.000000e+00); __m128d vec2 = _mm_set1_pd(2.000000e+00); __m128d vec5 = _mm_set1_pd(yPos); for (; (i1<1); i1 += 4) { /* yPos = ((((i1-1)/2.000000e+00)*(posEnd[1]-posBegin[1]))+posBegin[1]); */ __m128d vec0 = _mm_set_pd(i1+1,i1); __m128d vec0_2 = _mm_set_pd(i1+1,i1); __m128d vec3 = _mm_load1_pd((&posEnd[1])); __m128d vec3_2 = _mm_load1_pd((&posEnd[1])); __m128d vec4 = _mm_load1_pd((&posBegin[1])); __m128d vec4_2 = _mm_load1_pd((&posBegin[1])); vec5 = _mm_add_pd(_mm_mul_pd(_mm_div_pd(_mm_sub_pd(vec0, vec1), vec2), _mm_sub_pd(vec3, vec4)), vec4); vec5 = _mm_add_pd(_mm_mul_pd(_mm_div_pd(_mm_sub_pd(vec0_2, vec1), vec2), _mm_sub_pd(vec3_2, vec4_2)), vec4_2); } for (; (i1<4); i1 += 1) { yPos = ((((i1-1)/2.000000e+00)*(posEnd[1]-posBegin[1]))+posBegin[1]); } } } } } if ((!neighbor_isValid[0][1])) { { double xPos; double yPos; /* Statements in this Scop: S496, S495, S497 */ { { { double* fieldData_Solution_GMRF_1_p1 = (&fieldData_Solution_GMRF[1][0]); int i1 = 1; for (; (i1<=2); i1 += 2) { fieldData_Solution_GMRF_1_p1[((i1*6)+4)] = 0.000000e+00; fieldData_Solution_GMRF_1_p1[((i1*6)+10)] = 0.000000e+00; } for (; (i1<=3); i1 += 1) { fieldData_Solution_GMRF_1_p1[((i1*6)+4)] = 0.000000e+00; } } { int i1 = 1; for (; (i1<(2&(~1))); i1 += 1) { xPos = posEnd[0]; } __m128d vec1 = _mm_set1_pd(xPos); for (; (i1<1); i1 += 4) { /* xPos = posEnd[0]; */ __m128d vec0 = _mm_load1_pd((&posEnd[0])); __m128d vec0_2 = _mm_load1_pd((&posEnd[0])); vec1 = vec0; vec1 = vec0_2; } for (; (i1<4); i1 += 1) { xPos = posEnd[0]; } } } { int i1 = 1; for (; (i1<(2&(~1))); i1 += 1) { yPos = ((((i1-1)/2.000000e+00)*(posEnd[1]-posBegin[1]))+posBegin[1]); } __m128d vec1 = _mm_set1_pd(1.000000e+00); __m128d vec2 = _mm_set1_pd(2.000000e+00); __m128d vec5 = _mm_set1_pd(yPos); for (; (i1<1); i1 += 4) { /* yPos = ((((i1-1)/2.000000e+00)*(posEnd[1]-posBegin[1]))+posBegin[1]); */ __m128d vec0 = _mm_set_pd(i1+1,i1); __m128d vec0_2 = _mm_set_pd(i1+1,i1); __m128d vec3 = _mm_load1_pd((&posEnd[1])); __m128d vec3_2 = _mm_load1_pd((&posEnd[1])); __m128d vec4 = _mm_load1_pd((&posBegin[1])); __m128d vec4_2 = _mm_load1_pd((&posBegin[1])); vec5 = _mm_add_pd(_mm_mul_pd(_mm_div_pd(_mm_sub_pd(vec0, vec1), vec2), _mm_sub_pd(vec3, vec4)), vec4); vec5 = _mm_add_pd(_mm_mul_pd(_mm_div_pd(_mm_sub_pd(vec0_2, vec1), vec2), _mm_sub_pd(vec3_2, vec4_2)), vec4_2); } for (; (i1<4); i1 += 1) { yPos = ((((i1-1)/2.000000e+00)*(posEnd[1]-posBegin[1]))+posBegin[1]); } } } } } if ((!neighbor_isValid[0][2])) { { double xPos; double yPos; /* Statements in this Scop: S500, S499, S498 */ { { { int i2 = 2; for (; (i2<=3); i2 += 2) { xPos = ((((i2-2)/2.000000e+00)*(posEnd[0]-posBegin[0]))+posBegin[0]); xPos = ((((i2-1)/2.000000e+00)*(posEnd[0]-posBegin[0]))+posBegin[0]); } for (; (i2<=4); i2 += 1) { xPos = ((((i2-2)/2.000000e+00)*(posEnd[0]-posBegin[0]))+posBegin[0]); } } { double* fieldData_Solution_GMRF_1_p1 = (&fieldData_Solution_GMRF[1][0]); int i2 = 2; for (; (i2<=3); i2 += 2) { fieldData_Solution_GMRF_1_p1[(i2+6)] = 0.000000e+00; fieldData_Solution_GMRF_1_p1[(i2+7)] = 0.000000e+00; } for (; (i2<=4); i2 += 1) { fieldData_Solution_GMRF_1_p1[(i2+6)] = 0.000000e+00; } } } { int i2 = 2; for (; (i2<=3); i2 += 2) { yPos = posBegin[1]; yPos = posBegin[1]; } for (; (i2<=4); i2 += 1) { yPos = posBegin[1]; } } } } } if ((!neighbor_isValid[0][3])) { { double xPos; double yPos; /* Statements in this Scop: S503, S502, S501 */ { { { int i2 = 2; for (; (i2<=3); i2 += 2) { xPos = ((((i2-2)/2.000000e+00)*(posEnd[0]-posBegin[0]))+posBegin[0]); xPos = ((((i2-1)/2.000000e+00)*(posEnd[0]-posBegin[0]))+posBegin[0]); } for (; (i2<=4); i2 += 1) { xPos = ((((i2-2)/2.000000e+00)*(posEnd[0]-posBegin[0]))+posBegin[0]); } } { int i2 = 2; for (; (i2<=3); i2 += 2) { yPos = posEnd[1]; yPos = posEnd[1]; } for (; (i2<=4); i2 += 1) { yPos = posEnd[1]; } } } { double* fieldData_Solution_GMRF_1_p1 = (&fieldData_Solution_GMRF[1][0]); int i2 = 2; for (; (i2<=3); i2 += 2) { fieldData_Solution_GMRF_1_p1[(i2+18)] = 0.000000e+00; fieldData_Solution_GMRF_1_p1[(i2+19)] = 0.000000e+00; } for (; (i2<=4); i2 += 1) { fieldData_Solution_GMRF_1_p1[(i2+18)] = 0.000000e+00; } } } } } } } for (int fragmentIdx = 0; fragmentIdx < 1; ++fragmentIdx) { if (isValidForSubdomain[0]) { ; } } for (int fragmentIdx = 0; fragmentIdx < 1; ++fragmentIdx) { if (isValidForSubdomain[0]) { if ((neighbor_isValid[0][1]&&neighbor_isRemote[0][1])) { MPI_Isend(&fieldData_Solution_GMRF[1][10], 1, mpiDatatype_3_1_6, neighbor_remoteRank[0][1], ((unsigned int)commId << 16) + ((unsigned int)(neighbor_fragCommId[0][1]) & 0x0000ffff), mpiCommunicator, &mpiRequest_Send[1]); reqOutstanding_Send[1] = true; } } } ; for (int fragmentIdx = 0; fragmentIdx < 1; ++fragmentIdx) { if (isValidForSubdomain[0]) { if ((neighbor_isValid[0][0]&&neighbor_isRemote[0][0])) { MPI_Irecv(&fieldData_Solution_GMRF[1][8], 1, mpiDatatype_3_1_6, neighbor_remoteRank[0][0], ((unsigned int)(neighbor_fragCommId[0][0]) << 16) + ((unsigned int)commId & 0x0000ffff), mpiCommunicator, &mpiRequest_Recv[0]); reqOutstanding_Recv[0] = true; } } } for (int fragmentIdx = 0; fragmentIdx < 1; ++fragmentIdx) { if (isValidForSubdomain[0]) { if (reqOutstanding_Recv[0]) { waitForMPIReq(&mpiRequest_Recv[0]); reqOutstanding_Recv[0] = false; } } } for (int fragmentIdx = 0; fragmentIdx < 1; ++fragmentIdx) { if (isValidForSubdomain[0]) { ; } } ; ; for (int fragmentIdx = 0; fragmentIdx < 1; ++fragmentIdx) { if (isValidForSubdomain[0]) { if (reqOutstanding_Send[1]) { waitForMPIReq(&mpiRequest_Send[1]); reqOutstanding_Send[1] = false; } } } for (int fragmentIdx = 0; fragmentIdx < 1; ++fragmentIdx) { if (isValidForSubdomain[0]) { ; } } for (int fragmentIdx = 0; fragmentIdx < 1; ++fragmentIdx) { if (isValidForSubdomain[0]) { if ((neighbor_isValid[0][3]&&neighbor_isRemote[0][3])) { MPI_Isend(&fieldData_Solution_GMRF[1][20], 1, mpiDatatype_1_3_6, neighbor_remoteRank[0][3], ((unsigned int)commId << 16) + ((unsigned int)(neighbor_fragCommId[0][3]) & 0x0000ffff), mpiCommunicator, &mpiRequest_Send[3]); reqOutstanding_Send[3] = true; } } } ; for (int fragmentIdx = 0; fragmentIdx < 1; ++fragmentIdx) { if (isValidForSubdomain[0]) { if ((neighbor_isValid[0][2]&&neighbor_isRemote[0][2])) { MPI_Irecv(&fieldData_Solution_GMRF[1][8], 1, mpiDatatype_1_3_6, neighbor_remoteRank[0][2], ((unsigned int)(neighbor_fragCommId[0][2]) << 16) + ((unsigned int)commId & 0x0000ffff), mpiCommunicator, &mpiRequest_Recv[2]); reqOutstanding_Recv[2] = true; } } } for (int fragmentIdx = 0; fragmentIdx < 1; ++fragmentIdx) { if (isValidForSubdomain[0]) { if (reqOutstanding_Recv[2]) { waitForMPIReq(&mpiRequest_Recv[2]); reqOutstanding_Recv[2] = false; } } } for (int fragmentIdx = 0; fragmentIdx < 1; ++fragmentIdx) { if (isValidForSubdomain[0]) { ; } } ; ; for (int fragmentIdx = 0; fragmentIdx < 1; ++fragmentIdx) { if (isValidForSubdomain[0]) { if (reqOutstanding_Send[3]) { waitForMPIReq(&mpiRequest_Send[3]); reqOutstanding_Send[3] = false; } } } for (int fragmentIdx = 0; fragmentIdx < 1; ++fragmentIdx) { if (isValidForSubdomain[0]) { ; ; } } for (int fragmentIdx = 0; fragmentIdx < 1; ++fragmentIdx) { if (isValidForSubdomain[0]) { if ((neighbor_isValid[0][0]&&neighbor_isRemote[0][0])) { MPI_Isend(&fieldData_Solution_GMRF[1][3], 1, mpiDatatype_5_1_6, neighbor_remoteRank[0][0], ((unsigned int)commId << 16) + ((unsigned int)(neighbor_fragCommId[0][0]) & 0x0000ffff), mpiCommunicator, &mpiRequest_Send[0]); reqOutstanding_Send[0] = true; } if ((neighbor_isValid[0][1]&&neighbor_isRemote[0][1])) { MPI_Isend(&fieldData_Solution_GMRF[1][3], 1, mpiDatatype_5_1_6, neighbor_remoteRank[0][1], ((unsigned int)commId << 16) + ((unsigned int)(neighbor_fragCommId[0][1]) & 0x0000ffff), mpiCommunicator, &mpiRequest_Send[1]); reqOutstanding_Send[1] = true; } } } ; for (int fragmentIdx = 0; fragmentIdx < 1; ++fragmentIdx) { if (isValidForSubdomain[0]) { if ((neighbor_isValid[0][0]&&neighbor_isRemote[0][0])) { MPI_Irecv(&fieldData_Solution_GMRF[1][1], 1, mpiDatatype_5_1_6, neighbor_remoteRank[0][0], ((unsigned int)(neighbor_fragCommId[0][0]) << 16) + ((unsigned int)commId & 0x0000ffff), mpiCommunicator, &mpiRequest_Recv[0]); reqOutstanding_Recv[0] = true; } if ((neighbor_isValid[0][1]&&neighbor_isRemote[0][1])) { MPI_Irecv(&fieldData_Solution_GMRF[1][5], 1, mpiDatatype_5_1_6, neighbor_remoteRank[0][1], ((unsigned int)(neighbor_fragCommId[0][1]) << 16) + ((unsigned int)commId & 0x0000ffff), mpiCommunicator, &mpiRequest_Recv[1]); reqOutstanding_Recv[1] = true; } } } for (int fragmentIdx = 0; fragmentIdx < 1; ++fragmentIdx) { if (isValidForSubdomain[0]) { if (reqOutstanding_Recv[0]) { waitForMPIReq(&mpiRequest_Recv[0]); reqOutstanding_Recv[0] = false; } if (reqOutstanding_Recv[1]) { waitForMPIReq(&mpiRequest_Recv[1]); reqOutstanding_Recv[1] = false; } } } for (int fragmentIdx = 0; fragmentIdx < 1; ++fragmentIdx) { if (isValidForSubdomain[0]) { ; ; } } ; ; for (int fragmentIdx = 0; fragmentIdx < 1; ++fragmentIdx) { if (isValidForSubdomain[0]) { if (reqOutstanding_Send[0]) { waitForMPIReq(&mpiRequest_Send[0]); reqOutstanding_Send[0] = false; } if (reqOutstanding_Send[1]) { waitForMPIReq(&mpiRequest_Send[1]); reqOutstanding_Send[1] = false; } } } for (int fragmentIdx = 0; fragmentIdx < 1; ++fragmentIdx) { if (isValidForSubdomain[0]) { ; ; } } for (int fragmentIdx = 0; fragmentIdx < 1; ++fragmentIdx) { if (isValidForSubdomain[0]) { if ((neighbor_isValid[0][2]&&neighbor_isRemote[0][2])) { MPI_Isend(&fieldData_Solution_GMRF[1][13], 1, mpiDatatype_1_5_6, neighbor_remoteRank[0][2], ((unsigned int)commId << 16) + ((unsigned int)(neighbor_fragCommId[0][2]) & 0x0000ffff), mpiCommunicator, &mpiRequest_Send[2]); reqOutstanding_Send[2] = true; } if ((neighbor_isValid[0][3]&&neighbor_isRemote[0][3])) { MPI_Isend(&fieldData_Solution_GMRF[1][13], 1, mpiDatatype_1_5_6, neighbor_remoteRank[0][3], ((unsigned int)commId << 16) + ((unsigned int)(neighbor_fragCommId[0][3]) & 0x0000ffff), mpiCommunicator, &mpiRequest_Send[3]); reqOutstanding_Send[3] = true; } } } ; for (int fragmentIdx = 0; fragmentIdx < 1; ++fragmentIdx) { if (isValidForSubdomain[0]) { if ((neighbor_isValid[0][2]&&neighbor_isRemote[0][2])) { MPI_Irecv(&fieldData_Solution_GMRF[1][1], 1, mpiDatatype_1_5_6, neighbor_remoteRank[0][2], ((unsigned int)(neighbor_fragCommId[0][2]) << 16) + ((unsigned int)commId & 0x0000ffff), mpiCommunicator, &mpiRequest_Recv[2]); reqOutstanding_Recv[2] = true; } if ((neighbor_isValid[0][3]&&neighbor_isRemote[0][3])) { MPI_Irecv(&fieldData_Solution_GMRF[1][25], 1, mpiDatatype_1_5_6, neighbor_remoteRank[0][3], ((unsigned int)(neighbor_fragCommId[0][3]) << 16) + ((unsigned int)commId & 0x0000ffff), mpiCommunicator, &mpiRequest_Recv[3]); reqOutstanding_Recv[3] = true; } } } for (int fragmentIdx = 0; fragmentIdx < 1; ++fragmentIdx) { if (isValidForSubdomain[0]) { if (reqOutstanding_Recv[2]) { waitForMPIReq(&mpiRequest_Recv[2]); reqOutstanding_Recv[2] = false; } if (reqOutstanding_Recv[3]) { waitForMPIReq(&mpiRequest_Recv[3]); reqOutstanding_Recv[3] = false; } } } for (int fragmentIdx = 0; fragmentIdx < 1; ++fragmentIdx) { if (isValidForSubdomain[0]) { ; ; } } ; ; for (int fragmentIdx = 0; fragmentIdx < 1; ++fragmentIdx) { if (isValidForSubdomain[0]) { if (reqOutstanding_Send[2]) { waitForMPIReq(&mpiRequest_Send[2]); reqOutstanding_Send[2] = false; } if (reqOutstanding_Send[3]) { waitForMPIReq(&mpiRequest_Send[3]); reqOutstanding_Send[3] = false; } } } }
void exchlaplacecoeff_gmrfData_0(unsigned int slot) { for (int fragmentIdx = 0; fragmentIdx < 1; ++fragmentIdx) { if (isValidForSubdomain[0]) { if ((!neighbor_isValid[0][0])) { { double xPos; double yPos; /* Statements in this Scop: S1053, S1056, S1059, S1050, S1058, S1052, S1055, S1060, S1054, S1057, S1051 */ { { { { { { { { { { { double* fieldData_LaplaceCoeff_GMRF_0_p1 = (&fieldData_LaplaceCoeff_GMRF[0][0]); int i1 = 1; for (; (i1<=1); i1 += 2) { fieldData_LaplaceCoeff_GMRF_0_p1[((i1*6)+26)] = 0.000000e+00; fieldData_LaplaceCoeff_GMRF_0_p1[((i1*6)+32)] = 0.000000e+00; } for (; (i1<=2); i1 += 1) { fieldData_LaplaceCoeff_GMRF_0_p1[((i1*6)+26)] = 0.000000e+00; } } { double* fieldData_LaplaceCoeff_GMRF_0_p1 = (&fieldData_LaplaceCoeff_GMRF[0][0]); int i1 = 1; for (; (i1<=1); i1 += 2) { fieldData_LaplaceCoeff_GMRF_0_p1[((i1*6)+146)] = 0.000000e+00; fieldData_LaplaceCoeff_GMRF_0_p1[((i1*6)+152)] = 0.000000e+00; } for (; (i1<=2); i1 += 1) { fieldData_LaplaceCoeff_GMRF_0_p1[((i1*6)+146)] = 0.000000e+00; } } } { double* fieldData_LaplaceCoeff_GMRF_0_p1 = (&fieldData_LaplaceCoeff_GMRF[0][0]); int i1 = 1; for (; (i1<=1); i1 += 2) { fieldData_LaplaceCoeff_GMRF_0_p1[((i1*6)+98)] = 0.000000e+00; fieldData_LaplaceCoeff_GMRF_0_p1[((i1*6)+104)] = 0.000000e+00; } for (; (i1<=2); i1 += 1) { fieldData_LaplaceCoeff_GMRF_0_p1[((i1*6)+98)] = 0.000000e+00; } } } { double* fieldData_LaplaceCoeff_GMRF_0_p1 = (&fieldData_LaplaceCoeff_GMRF[0][0]); int i1 = 1; for (; (i1<=1); i1 += 2) { fieldData_LaplaceCoeff_GMRF_0_p1[((i1*6)+74)] = 0.000000e+00; fieldData_LaplaceCoeff_GMRF_0_p1[((i1*6)+80)] = 0.000000e+00; } for (; (i1<=2); i1 += 1) { fieldData_LaplaceCoeff_GMRF_0_p1[((i1*6)+74)] = 0.000000e+00; } } } { int i1 = 1; for (; (i1<(2&(~1))); i1 += 1) { xPos = posBegin[0]; } __m128d vec1 = _mm_set1_pd(xPos); for (; (i1<0); i1 += 4) { /* xPos = posBegin[0]; */ __m128d vec0 = _mm_load1_pd((&posBegin[0])); __m128d vec0_2 = _mm_load1_pd((&posBegin[0])); vec1 = vec0; vec1 = vec0_2; } for (; (i1<3); i1 += 1) { xPos = posBegin[0]; } } } { double* fieldData_LaplaceCoeff_GMRF_0_p1 = (&fieldData_LaplaceCoeff_GMRF[0][0]); int i1 = 1; for (; (i1<=1); i1 += 2) { fieldData_LaplaceCoeff_GMRF_0_p1[((i1*6)+122)] = 0.000000e+00; fieldData_LaplaceCoeff_GMRF_0_p1[((i1*6)+128)] = 0.000000e+00; } for (; (i1<=2); i1 += 1) { fieldData_LaplaceCoeff_GMRF_0_p1[((i1*6)+122)] = 0.000000e+00; } } } { double* fieldData_LaplaceCoeff_GMRF_0_p1 = (&fieldData_LaplaceCoeff_GMRF[0][0]); int i1 = 1; for (; (i1<=1); i1 += 2) { fieldData_LaplaceCoeff_GMRF_0_p1[((i1*6)+170)] = 0.000000e+00; fieldData_LaplaceCoeff_GMRF_0_p1[((i1*6)+176)] = 0.000000e+00; } for (; (i1<=2); i1 += 1) { fieldData_LaplaceCoeff_GMRF_0_p1[((i1*6)+170)] = 0.000000e+00; } } } { double* fieldData_LaplaceCoeff_GMRF_0_p1 = (&fieldData_LaplaceCoeff_GMRF[0][0]); int i1 = 1; for (; (i1<=1); i1 += 2) { fieldData_LaplaceCoeff_GMRF_0_p1[((i1*6)+194)] = 0.000000e+00; fieldData_LaplaceCoeff_GMRF_0_p1[((i1*6)+200)] = 0.000000e+00; } for (; (i1<=2); i1 += 1) { fieldData_LaplaceCoeff_GMRF_0_p1[((i1*6)+194)] = 0.000000e+00; } } } { int i1 = 1; for (; (i1<(2&(~1))); i1 += 1) { yPos = ((((i1-1)/1.000000e+00)*(posEnd[1]-posBegin[1]))+posBegin[1]); } __m128d vec1 = _mm_set1_pd(1.000000e+00); __m128d vec2 = _mm_set1_pd(1.000000e+00); __m128d vec5 = _mm_set1_pd(yPos); for (; (i1<0); i1 += 4) { /* yPos = ((((i1-1)/1.000000e+00)*(posEnd[1]-posBegin[1]))+posBegin[1]); */ __m128d vec0 = _mm_set_pd(i1+1,i1); __m128d vec0_2 = _mm_set_pd(i1+1,i1); __m128d vec3 = _mm_load1_pd((&posEnd[1])); __m128d vec3_2 = _mm_load1_pd((&posEnd[1])); __m128d vec4 = _mm_load1_pd((&posBegin[1])); __m128d vec4_2 = _mm_load1_pd((&posBegin[1])); vec5 = _mm_add_pd(_mm_mul_pd(_mm_div_pd(_mm_sub_pd(vec0, vec1), vec2), _mm_sub_pd(vec3, vec4)), vec4); vec5 = _mm_add_pd(_mm_mul_pd(_mm_div_pd(_mm_sub_pd(vec0_2, vec1), vec2), _mm_sub_pd(vec3_2, vec4_2)), vec4_2); } for (; (i1<3); i1 += 1) { yPos = ((((i1-1)/1.000000e+00)*(posEnd[1]-posBegin[1]))+posBegin[1]); } } } { double* fieldData_LaplaceCoeff_GMRF_0_p1 = (&fieldData_LaplaceCoeff_GMRF[0][0]); int i1 = 1; for (; (i1<=1); i1 += 2) { fieldData_LaplaceCoeff_GMRF_0_p1[((i1*6)+2)] = 0.000000e+00; fieldData_LaplaceCoeff_GMRF_0_p1[((i1*6)+8)] = 0.000000e+00; } for (; (i1<=2); i1 += 1) { fieldData_LaplaceCoeff_GMRF_0_p1[((i1*6)+2)] = 0.000000e+00; } } } { double* fieldData_LaplaceCoeff_GMRF_0_p1 = (&fieldData_LaplaceCoeff_GMRF[0][0]); int i1 = 1; for (; (i1<=1); i1 += 2) { fieldData_LaplaceCoeff_GMRF_0_p1[((i1*6)+50)] = 0.000000e+00; fieldData_LaplaceCoeff_GMRF_0_p1[((i1*6)+56)] = 0.000000e+00; } for (; (i1<=2); i1 += 1) { fieldData_LaplaceCoeff_GMRF_0_p1[((i1*6)+50)] = 0.000000e+00; } } } } } if ((!neighbor_isValid[0][1])) { { double xPos; double yPos; /* Statements in this Scop: S1071, S1065, S1068, S1062, S1070, S1064, S1067, S1061, S1069, S1063, S1066 */ { { { { { { { { { { { double* fieldData_LaplaceCoeff_GMRF_0_p1 = (&fieldData_LaplaceCoeff_GMRF[0][0]); int i1 = 1; for (; (i1<=1); i1 += 2) { fieldData_LaplaceCoeff_GMRF_0_p1[((i1*6)+195)] = 0.000000e+00; fieldData_LaplaceCoeff_GMRF_0_p1[((i1*6)+201)] = 0.000000e+00; } for (; (i1<=2); i1 += 1) { fieldData_LaplaceCoeff_GMRF_0_p1[((i1*6)+195)] = 0.000000e+00; } } { double* fieldData_LaplaceCoeff_GMRF_0_p1 = (&fieldData_LaplaceCoeff_GMRF[0][0]); int i1 = 1; for (; (i1<=1); i1 += 2) { fieldData_LaplaceCoeff_GMRF_0_p1[((i1*6)+51)] = 0.000000e+00; fieldData_LaplaceCoeff_GMRF_0_p1[((i1*6)+57)] = 0.000000e+00; } for (; (i1<=2); i1 += 1) { fieldData_LaplaceCoeff_GMRF_0_p1[((i1*6)+51)] = 0.000000e+00; } } } { double* fieldData_LaplaceCoeff_GMRF_0_p1 = (&fieldData_LaplaceCoeff_GMRF[0][0]); int i1 = 1; for (; (i1<=1); i1 += 2) { fieldData_LaplaceCoeff_GMRF_0_p1[((i1*6)+75)] = 0.000000e+00; fieldData_LaplaceCoeff_GMRF_0_p1[((i1*6)+81)] = 0.000000e+00; } for (; (i1<=2); i1 += 1) { fieldData_LaplaceCoeff_GMRF_0_p1[((i1*6)+75)] = 0.000000e+00; } } } { double* fieldData_LaplaceCoeff_GMRF_0_p1 = (&fieldData_LaplaceCoeff_GMRF[0][0]); int i1 = 1; for (; (i1<=1); i1 += 2) { fieldData_LaplaceCoeff_GMRF_0_p1[((i1*6)+3)] = 0.000000e+00; fieldData_LaplaceCoeff_GMRF_0_p1[((i1*6)+9)] = 0.000000e+00; } for (; (i1<=2); i1 += 1) { fieldData_LaplaceCoeff_GMRF_0_p1[((i1*6)+3)] = 0.000000e+00; } } } { double* fieldData_LaplaceCoeff_GMRF_0_p1 = (&fieldData_LaplaceCoeff_GMRF[0][0]); int i1 = 1; for (; (i1<=1); i1 += 2) { fieldData_LaplaceCoeff_GMRF_0_p1[((i1*6)+171)] = 0.000000e+00; fieldData_LaplaceCoeff_GMRF_0_p1[((i1*6)+177)] = 0.000000e+00; } for (; (i1<=2); i1 += 1) { fieldData_LaplaceCoeff_GMRF_0_p1[((i1*6)+171)] = 0.000000e+00; } } } { int i1 = 1; for (; (i1<(2&(~1))); i1 += 1) { yPos = ((((i1-1)/1.000000e+00)*(posEnd[1]-posBegin[1]))+posBegin[1]); } __m128d vec1 = _mm_set1_pd(1.000000e+00); __m128d vec2 = _mm_set1_pd(1.000000e+00); __m128d vec5 = _mm_set1_pd(yPos); for (; (i1<0); i1 += 4) { /* yPos = ((((i1-1)/1.000000e+00)*(posEnd[1]-posBegin[1]))+posBegin[1]); */ __m128d vec0 = _mm_set_pd(i1+1,i1); __m128d vec0_2 = _mm_set_pd(i1+1,i1); __m128d vec3 = _mm_load1_pd((&posEnd[1])); __m128d vec3_2 = _mm_load1_pd((&posEnd[1])); __m128d vec4 = _mm_load1_pd((&posBegin[1])); __m128d vec4_2 = _mm_load1_pd((&posBegin[1])); vec5 = _mm_add_pd(_mm_mul_pd(_mm_div_pd(_mm_sub_pd(vec0, vec1), vec2), _mm_sub_pd(vec3, vec4)), vec4); vec5 = _mm_add_pd(_mm_mul_pd(_mm_div_pd(_mm_sub_pd(vec0_2, vec1), vec2), _mm_sub_pd(vec3_2, vec4_2)), vec4_2); } for (; (i1<3); i1 += 1) { yPos = ((((i1-1)/1.000000e+00)*(posEnd[1]-posBegin[1]))+posBegin[1]); } } } { double* fieldData_LaplaceCoeff_GMRF_0_p1 = (&fieldData_LaplaceCoeff_GMRF[0][0]); int i1 = 1; for (; (i1<=1); i1 += 2) { fieldData_LaplaceCoeff_GMRF_0_p1[((i1*6)+99)] = 0.000000e+00; fieldData_LaplaceCoeff_GMRF_0_p1[((i1*6)+105)] = 0.000000e+00; } for (; (i1<=2); i1 += 1) { fieldData_LaplaceCoeff_GMRF_0_p1[((i1*6)+99)] = 0.000000e+00; } } } { double* fieldData_LaplaceCoeff_GMRF_0_p1 = (&fieldData_LaplaceCoeff_GMRF[0][0]); int i1 = 1; for (; (i1<=1); i1 += 2) { fieldData_LaplaceCoeff_GMRF_0_p1[((i1*6)+123)] = 0.000000e+00; fieldData_LaplaceCoeff_GMRF_0_p1[((i1*6)+129)] = 0.000000e+00; } for (; (i1<=2); i1 += 1) { fieldData_LaplaceCoeff_GMRF_0_p1[((i1*6)+123)] = 0.000000e+00; } } } { double* fieldData_LaplaceCoeff_GMRF_0_p1 = (&fieldData_LaplaceCoeff_GMRF[0][0]); int i1 = 1; for (; (i1<=1); i1 += 2) { fieldData_LaplaceCoeff_GMRF_0_p1[((i1*6)+147)] = 0.000000e+00; fieldData_LaplaceCoeff_GMRF_0_p1[((i1*6)+153)] = 0.000000e+00; } for (; (i1<=2); i1 += 1) { fieldData_LaplaceCoeff_GMRF_0_p1[((i1*6)+147)] = 0.000000e+00; } } } { int i1 = 1; for (; (i1<(2&(~1))); i1 += 1) { xPos = posEnd[0]; } __m128d vec1 = _mm_set1_pd(xPos); for (; (i1<0); i1 += 4) { /* xPos = posEnd[0]; */ __m128d vec0 = _mm_load1_pd((&posEnd[0])); __m128d vec0_2 = _mm_load1_pd((&posEnd[0])); vec1 = vec0; vec1 = vec0_2; } for (; (i1<3); i1 += 1) { xPos = posEnd[0]; } } } { double* fieldData_LaplaceCoeff_GMRF_0_p1 = (&fieldData_LaplaceCoeff_GMRF[0][0]); int i1 = 1; for (; (i1<=1); i1 += 2) { fieldData_LaplaceCoeff_GMRF_0_p1[((i1*6)+27)] = 0.000000e+00; fieldData_LaplaceCoeff_GMRF_0_p1[((i1*6)+33)] = 0.000000e+00; } for (; (i1<=2); i1 += 1) { fieldData_LaplaceCoeff_GMRF_0_p1[((i1*6)+27)] = 0.000000e+00; } } } } } if ((!neighbor_isValid[0][2])) { { double xPos; double yPos; /* Statements in this Scop: S1080, S1074, S1077, S1082, S1076, S1079, S1073, S1072, S1081, S1075, S1078 */ { { { { { { { { { { { double* fieldData_LaplaceCoeff_GMRF_0_p1 = (&fieldData_LaplaceCoeff_GMRF[0][0]); int i2 = 2; for (; (i2<=2); i2 += 2) { fieldData_LaplaceCoeff_GMRF_0_p1[(i2+126)] = 0.000000e+00; fieldData_LaplaceCoeff_GMRF_0_p1[(i2+127)] = 0.000000e+00; } for (; (i2<=3); i2 += 1) { fieldData_LaplaceCoeff_GMRF_0_p1[(i2+126)] = 0.000000e+00; } } { int i2 = 2; for (; (i2<=2); i2 += 2) { xPos = ((((i2-2)/1.000000e+00)*(posEnd[0]-posBegin[0]))+posBegin[0]); xPos = ((((i2-1)/1.000000e+00)*(posEnd[0]-posBegin[0]))+posBegin[0]); } for (; (i2<=3); i2 += 1) { xPos = ((((i2-2)/1.000000e+00)*(posEnd[0]-posBegin[0]))+posBegin[0]); } } } { double* fieldData_LaplaceCoeff_GMRF_0_p1 = (&fieldData_LaplaceCoeff_GMRF[0][0]); int i2 = 2; for (; (i2<=2); i2 += 2) { fieldData_LaplaceCoeff_GMRF_0_p1[(i2+198)] = 0.000000e+00; fieldData_LaplaceCoeff_GMRF_0_p1[(i2+199)] = 0.000000e+00; } for (; (i2<=3); i2 += 1) { fieldData_LaplaceCoeff_GMRF_0_p1[(i2+198)] = 0.000000e+00; } } } { int i2 = 2; for (; (i2<=2); i2 += 2) { yPos = posBegin[1]; yPos = posBegin[1]; } for (; (i2<=3); i2 += 1) { yPos = posBegin[1]; } } } { double* fieldData_LaplaceCoeff_GMRF_0_p1 = (&fieldData_LaplaceCoeff_GMRF[0][0]); int i2 = 2; for (; (i2<=2); i2 += 2) { fieldData_LaplaceCoeff_GMRF_0_p1[(i2+30)] = 0.000000e+00; fieldData_LaplaceCoeff_GMRF_0_p1[(i2+31)] = 0.000000e+00; } for (; (i2<=3); i2 += 1) { fieldData_LaplaceCoeff_GMRF_0_p1[(i2+30)] = 0.000000e+00; } } } { double* fieldData_LaplaceCoeff_GMRF_0_p1 = (&fieldData_LaplaceCoeff_GMRF[0][0]); int i2 = 2; for (; (i2<=2); i2 += 2) { fieldData_LaplaceCoeff_GMRF_0_p1[(i2+174)] = 0.000000e+00; fieldData_LaplaceCoeff_GMRF_0_p1[(i2+175)] = 0.000000e+00; } for (; (i2<=3); i2 += 1) { fieldData_LaplaceCoeff_GMRF_0_p1[(i2+174)] = 0.000000e+00; } } } { double* fieldData_LaplaceCoeff_GMRF_0_p1 = (&fieldData_LaplaceCoeff_GMRF[0][0]); int i2 = 2; for (; (i2<=2); i2 += 2) { fieldData_LaplaceCoeff_GMRF_0_p1[(i2+78)] = 0.000000e+00; fieldData_LaplaceCoeff_GMRF_0_p1[(i2+79)] = 0.000000e+00; } for (; (i2<=3); i2 += 1) { fieldData_LaplaceCoeff_GMRF_0_p1[(i2+78)] = 0.000000e+00; } } } { double* fieldData_LaplaceCoeff_GMRF_0_p1 = (&fieldData_LaplaceCoeff_GMRF[0][0]); int i2 = 2; for (; (i2<=2); i2 += 2) { fieldData_LaplaceCoeff_GMRF_0_p1[(i2+54)] = 0.000000e+00; fieldData_LaplaceCoeff_GMRF_0_p1[(i2+55)] = 0.000000e+00; } for (; (i2<=3); i2 += 1) { fieldData_LaplaceCoeff_GMRF_0_p1[(i2+54)] = 0.000000e+00; } } } { double* fieldData_LaplaceCoeff_GMRF_0_p1 = (&fieldData_LaplaceCoeff_GMRF[0][0]); int i2 = 2; for (; (i2<=2); i2 += 2) { fieldData_LaplaceCoeff_GMRF_0_p1[(i2+150)] = 0.000000e+00; fieldData_LaplaceCoeff_GMRF_0_p1[(i2+151)] = 0.000000e+00; } for (; (i2<=3); i2 += 1) { fieldData_LaplaceCoeff_GMRF_0_p1[(i2+150)] = 0.000000e+00; } } } { double* fieldData_LaplaceCoeff_GMRF_0_p1 = (&fieldData_LaplaceCoeff_GMRF[0][0]); int i2 = 2; for (; (i2<=2); i2 += 2) { fieldData_LaplaceCoeff_GMRF_0_p1[(i2+6)] = 0.000000e+00; fieldData_LaplaceCoeff_GMRF_0_p1[(i2+7)] = 0.000000e+00; } for (; (i2<=3); i2 += 1) { fieldData_LaplaceCoeff_GMRF_0_p1[(i2+6)] = 0.000000e+00; } } } { double* fieldData_LaplaceCoeff_GMRF_0_p1 = (&fieldData_LaplaceCoeff_GMRF[0][0]); int i2 = 2; for (; (i2<=2); i2 += 2) { fieldData_LaplaceCoeff_GMRF_0_p1[(i2+102)] = 0.000000e+00; fieldData_LaplaceCoeff_GMRF_0_p1[(i2+103)] = 0.000000e+00; } for (; (i2<=3); i2 += 1) { fieldData_LaplaceCoeff_GMRF_0_p1[(i2+102)] = 0.000000e+00; } } } } } if ((!neighbor_isValid[0][3])) { { double xPos; double yPos; /* Statements in this Scop: S1083, S1092, S1086, S1089, S1088, S1091, S1085, S1090, S1093, S1087, S1084 */ { { { { { { { { { { { double* fieldData_LaplaceCoeff_GMRF_0_p1 = (&fieldData_LaplaceCoeff_GMRF[0][0]); int i2 = 2; for (; (i2<=2); i2 += 2) { fieldData_LaplaceCoeff_GMRF_0_p1[(i2+12)] = 0.000000e+00; fieldData_LaplaceCoeff_GMRF_0_p1[(i2+13)] = 0.000000e+00; } for (; (i2<=3); i2 += 1) { fieldData_LaplaceCoeff_GMRF_0_p1[(i2+12)] = 0.000000e+00; } } { double* fieldData_LaplaceCoeff_GMRF_0_p1 = (&fieldData_LaplaceCoeff_GMRF[0][0]); int i2 = 2; for (; (i2<=2); i2 += 2) { fieldData_LaplaceCoeff_GMRF_0_p1[(i2+60)] = 0.000000e+00; fieldData_LaplaceCoeff_GMRF_0_p1[(i2+61)] = 0.000000e+00; } for (; (i2<=3); i2 += 1) { fieldData_LaplaceCoeff_GMRF_0_p1[(i2+60)] = 0.000000e+00; } } } { double* fieldData_LaplaceCoeff_GMRF_0_p1 = (&fieldData_LaplaceCoeff_GMRF[0][0]); int i2 = 2; for (; (i2<=2); i2 += 2) { fieldData_LaplaceCoeff_GMRF_0_p1[(i2+204)] = 0.000000e+00; fieldData_LaplaceCoeff_GMRF_0_p1[(i2+205)] = 0.000000e+00; } for (; (i2<=3); i2 += 1) { fieldData_LaplaceCoeff_GMRF_0_p1[(i2+204)] = 0.000000e+00; } } } { double* fieldData_LaplaceCoeff_GMRF_0_p1 = (&fieldData_LaplaceCoeff_GMRF[0][0]); int i2 = 2; for (; (i2<=2); i2 += 2) { fieldData_LaplaceCoeff_GMRF_0_p1[(i2+132)] = 0.000000e+00; fieldData_LaplaceCoeff_GMRF_0_p1[(i2+133)] = 0.000000e+00; } for (; (i2<=3); i2 += 1) { fieldData_LaplaceCoeff_GMRF_0_p1[(i2+132)] = 0.000000e+00; } } } { double* fieldData_LaplaceCoeff_GMRF_0_p1 = (&fieldData_LaplaceCoeff_GMRF[0][0]); int i2 = 2; for (; (i2<=2); i2 += 2) { fieldData_LaplaceCoeff_GMRF_0_p1[(i2+84)] = 0.000000e+00; fieldData_LaplaceCoeff_GMRF_0_p1[(i2+85)] = 0.000000e+00; } for (; (i2<=3); i2 += 1) { fieldData_LaplaceCoeff_GMRF_0_p1[(i2+84)] = 0.000000e+00; } } } { int i2 = 2; for (; (i2<=2); i2 += 2) { yPos = posEnd[1]; yPos = posEnd[1]; } for (; (i2<=3); i2 += 1) { yPos = posEnd[1]; } } } { double* fieldData_LaplaceCoeff_GMRF_0_p1 = (&fieldData_LaplaceCoeff_GMRF[0][0]); int i2 = 2; for (; (i2<=2); i2 += 2) { fieldData_LaplaceCoeff_GMRF_0_p1[(i2+36)] = 0.000000e+00; fieldData_LaplaceCoeff_GMRF_0_p1[(i2+37)] = 0.000000e+00; } for (; (i2<=3); i2 += 1) { fieldData_LaplaceCoeff_GMRF_0_p1[(i2+36)] = 0.000000e+00; } } } { int i2 = 2; for (; (i2<=2); i2 += 2) { xPos = ((((i2-2)/1.000000e+00)*(posEnd[0]-posBegin[0]))+posBegin[0]); xPos = ((((i2-1)/1.000000e+00)*(posEnd[0]-posBegin[0]))+posBegin[0]); } for (; (i2<=3); i2 += 1) { xPos = ((((i2-2)/1.000000e+00)*(posEnd[0]-posBegin[0]))+posBegin[0]); } } } { double* fieldData_LaplaceCoeff_GMRF_0_p1 = (&fieldData_LaplaceCoeff_GMRF[0][0]); int i2 = 2; for (; (i2<=2); i2 += 2) { fieldData_LaplaceCoeff_GMRF_0_p1[(i2+180)] = 0.000000e+00; fieldData_LaplaceCoeff_GMRF_0_p1[(i2+181)] = 0.000000e+00; } for (; (i2<=3); i2 += 1) { fieldData_LaplaceCoeff_GMRF_0_p1[(i2+180)] = 0.000000e+00; } } } { double* fieldData_LaplaceCoeff_GMRF_0_p1 = (&fieldData_LaplaceCoeff_GMRF[0][0]); int i2 = 2; for (; (i2<=2); i2 += 2) { fieldData_LaplaceCoeff_GMRF_0_p1[(i2+156)] = 0.000000e+00; fieldData_LaplaceCoeff_GMRF_0_p1[(i2+157)] = 0.000000e+00; } for (; (i2<=3); i2 += 1) { fieldData_LaplaceCoeff_GMRF_0_p1[(i2+156)] = 0.000000e+00; } } } { double* fieldData_LaplaceCoeff_GMRF_0_p1 = (&fieldData_LaplaceCoeff_GMRF[0][0]); int i2 = 2; for (; (i2<=2); i2 += 2) { fieldData_LaplaceCoeff_GMRF_0_p1[(i2+108)] = 0.000000e+00; fieldData_LaplaceCoeff_GMRF_0_p1[(i2+109)] = 0.000000e+00; } for (; (i2<=3); i2 += 1) { fieldData_LaplaceCoeff_GMRF_0_p1[(i2+108)] = 0.000000e+00; } } } } } } } for (int fragmentIdx = 0; fragmentIdx < 1; ++fragmentIdx) { if (isValidForSubdomain[0]) { if ((neighbor_isValid[0][1]&&neighbor_isRemote[0][1])) { /* Statements in this Scop: S1094 */ for (int i0 = 0; (i0<=8); i0 += 1) { double* buffer_Send_1_p1 = (&buffer_Send[1][(i0*2)]); double* fieldData_LaplaceCoeff_GMRF_0_p1 = (&fieldData_LaplaceCoeff_GMRF[0][(i0*24)]); int i1 = 1; for (; (i1<=1); i1 += 2) { buffer_Send_1_p1[(i1-1)] = fieldData_LaplaceCoeff_GMRF_0_p1[((i1*6)+3)]; buffer_Send_1_p1[i1] = fieldData_LaplaceCoeff_GMRF_0_p1[((i1*6)+9)]; } for (; (i1<=2); i1 += 1) { buffer_Send_1_p1[(i1-1)] = fieldData_LaplaceCoeff_GMRF_0_p1[((i1*6)+3)]; } } } } } for (int fragmentIdx = 0; fragmentIdx < 1; ++fragmentIdx) { if (isValidForSubdomain[0]) { if ((neighbor_isValid[0][1]&&neighbor_isRemote[0][1])) { MPI_Isend(buffer_Send[1], 18, MPI_DOUBLE, neighbor_remoteRank[0][1], ((unsigned int)commId << 16) + ((unsigned int)(neighbor_fragCommId[0][1]) & 0x0000ffff), mpiCommunicator, &mpiRequest_Send[1]); reqOutstanding_Send[1] = true; } } } ; for (int fragmentIdx = 0; fragmentIdx < 1; ++fragmentIdx) { if (isValidForSubdomain[0]) { if ((neighbor_isValid[0][0]&&neighbor_isRemote[0][0])) { MPI_Irecv(buffer_Recv[0], 18, MPI_DOUBLE, neighbor_remoteRank[0][0], ((unsigned int)(neighbor_fragCommId[0][0]) << 16) + ((unsigned int)commId & 0x0000ffff), mpiCommunicator, &mpiRequest_Recv[0]); reqOutstanding_Recv[0] = true; } } } for (int fragmentIdx = 0; fragmentIdx < 1; ++fragmentIdx) { if (isValidForSubdomain[0]) { if (reqOutstanding_Recv[0]) { waitForMPIReq(&mpiRequest_Recv[0]); reqOutstanding_Recv[0] = false; } } } for (int fragmentIdx = 0; fragmentIdx < 1; ++fragmentIdx) { if (isValidForSubdomain[0]) { if ((neighbor_isValid[0][0]&&neighbor_isRemote[0][0])) { /* Statements in this Scop: S1095 */ for (int i0 = 0; (i0<=8); i0 += 1) { double* buffer_Recv_0_p1 = (&buffer_Recv[0][(i0*2)]); double* fieldData_LaplaceCoeff_GMRF_0_p1 = (&fieldData_LaplaceCoeff_GMRF[0][(i0*24)]); int i1 = 3; for (; (i1<=3); i1 += 2) { fieldData_LaplaceCoeff_GMRF_0_p1[((i1*6)-10)] = buffer_Recv_0_p1[(i1-3)]; fieldData_LaplaceCoeff_GMRF_0_p1[((i1*6)-4)] = buffer_Recv_0_p1[(i1-2)]; } for (; (i1<=4); i1 += 1) { fieldData_LaplaceCoeff_GMRF_0_p1[((i1*6)-10)] = buffer_Recv_0_p1[(i1-3)]; } } } } } ; ; for (int fragmentIdx = 0; fragmentIdx < 1; ++fragmentIdx) { if (isValidForSubdomain[0]) { if (reqOutstanding_Send[1]) { waitForMPIReq(&mpiRequest_Send[1]); reqOutstanding_Send[1] = false; } } } for (int fragmentIdx = 0; fragmentIdx < 1; ++fragmentIdx) { if (isValidForSubdomain[0]) { ; } } for (int fragmentIdx = 0; fragmentIdx < 1; ++fragmentIdx) { if (isValidForSubdomain[0]) { if ((neighbor_isValid[0][3]&&neighbor_isRemote[0][3])) { MPI_Isend(&fieldData_LaplaceCoeff_GMRF[0][14], 1, mpiDatatype_9_2_24, neighbor_remoteRank[0][3], ((unsigned int)commId << 16) + ((unsigned int)(neighbor_fragCommId[0][3]) & 0x0000ffff), mpiCommunicator, &mpiRequest_Send[3]); reqOutstanding_Send[3] = true; } } } ; for (int fragmentIdx = 0; fragmentIdx < 1; ++fragmentIdx) { if (isValidForSubdomain[0]) { if ((neighbor_isValid[0][2]&&neighbor_isRemote[0][2])) { MPI_Irecv(&fieldData_LaplaceCoeff_GMRF[0][8], 1, mpiDatatype_9_2_24, neighbor_remoteRank[0][2], ((unsigned int)(neighbor_fragCommId[0][2]) << 16) + ((unsigned int)commId & 0x0000ffff), mpiCommunicator, &mpiRequest_Recv[2]); reqOutstanding_Recv[2] = true; } } } for (int fragmentIdx = 0; fragmentIdx < 1; ++fragmentIdx) { if (isValidForSubdomain[0]) { if (reqOutstanding_Recv[2]) { waitForMPIReq(&mpiRequest_Recv[2]); reqOutstanding_Recv[2] = false; } } } for (int fragmentIdx = 0; fragmentIdx < 1; ++fragmentIdx) { if (isValidForSubdomain[0]) { ; } } ; ; for (int fragmentIdx = 0; fragmentIdx < 1; ++fragmentIdx) { if (isValidForSubdomain[0]) { if (reqOutstanding_Send[3]) { waitForMPIReq(&mpiRequest_Send[3]); reqOutstanding_Send[3] = false; } } } for (int fragmentIdx = 0; fragmentIdx < 1; ++fragmentIdx) { if (isValidForSubdomain[0]) { if ((neighbor_isValid[0][0]&&neighbor_isRemote[0][0])) { /* Statements in this Scop: S1096 */ for (int i0 = 0; (i0<=8); i0 += 1) { double* fieldData_LaplaceCoeff_GMRF_0_p1 = (&fieldData_LaplaceCoeff_GMRF[0][(i0*24)]); double* buffer_Send_0_p1 = (&buffer_Send[0][(i0*4)]); int i1 = 0; for (; (i1<=2); i1 += 2) { buffer_Send_0_p1[i1] = fieldData_LaplaceCoeff_GMRF_0_p1[((i1*6)+3)]; buffer_Send_0_p1[(i1+1)] = fieldData_LaplaceCoeff_GMRF_0_p1[((i1*6)+9)]; } for (; (i1<=3); i1 += 1) { buffer_Send_0_p1[i1] = fieldData_LaplaceCoeff_GMRF_0_p1[((i1*6)+3)]; } } } if ((neighbor_isValid[0][1]&&neighbor_isRemote[0][1])) { /* Statements in this Scop: S1097 */ for (int i0 = 0; (i0<=8); i0 += 1) { double* buffer_Send_1_p1 = (&buffer_Send[1][(i0*4)]); double* fieldData_LaplaceCoeff_GMRF_0_p1 = (&fieldData_LaplaceCoeff_GMRF[0][(i0*24)]); int i1 = 0; for (; (i1<=2); i1 += 2) { buffer_Send_1_p1[i1] = fieldData_LaplaceCoeff_GMRF_0_p1[((i1*6)+2)]; buffer_Send_1_p1[(i1+1)] = fieldData_LaplaceCoeff_GMRF_0_p1[((i1*6)+8)]; } for (; (i1<=3); i1 += 1) { buffer_Send_1_p1[i1] = fieldData_LaplaceCoeff_GMRF_0_p1[((i1*6)+2)]; } } } } } for (int fragmentIdx = 0; fragmentIdx < 1; ++fragmentIdx) { if (isValidForSubdomain[0]) { if ((neighbor_isValid[0][0]&&neighbor_isRemote[0][0])) { MPI_Isend(buffer_Send[0], 36, MPI_DOUBLE, neighbor_remoteRank[0][0], ((unsigned int)commId << 16) + ((unsigned int)(neighbor_fragCommId[0][0]) & 0x0000ffff), mpiCommunicator, &mpiRequest_Send[0]); reqOutstanding_Send[0] = true; } if ((neighbor_isValid[0][1]&&neighbor_isRemote[0][1])) { MPI_Isend(buffer_Send[1], 36, MPI_DOUBLE, neighbor_remoteRank[0][1], ((unsigned int)commId << 16) + ((unsigned int)(neighbor_fragCommId[0][1]) & 0x0000ffff), mpiCommunicator, &mpiRequest_Send[1]); reqOutstanding_Send[1] = true; } } } ; for (int fragmentIdx = 0; fragmentIdx < 1; ++fragmentIdx) { if (isValidForSubdomain[0]) { if ((neighbor_isValid[0][0]&&neighbor_isRemote[0][0])) { MPI_Irecv(buffer_Recv[0], 36, MPI_DOUBLE, neighbor_remoteRank[0][0], ((unsigned int)(neighbor_fragCommId[0][0]) << 16) + ((unsigned int)commId & 0x0000ffff), mpiCommunicator, &mpiRequest_Recv[0]); reqOutstanding_Recv[0] = true; } if ((neighbor_isValid[0][1]&&neighbor_isRemote[0][1])) { MPI_Irecv(buffer_Recv[1], 36, MPI_DOUBLE, neighbor_remoteRank[0][1], ((unsigned int)(neighbor_fragCommId[0][1]) << 16) + ((unsigned int)commId & 0x0000ffff), mpiCommunicator, &mpiRequest_Recv[1]); reqOutstanding_Recv[1] = true; } } } for (int fragmentIdx = 0; fragmentIdx < 1; ++fragmentIdx) { if (isValidForSubdomain[0]) { if (reqOutstanding_Recv[0]) { waitForMPIReq(&mpiRequest_Recv[0]); reqOutstanding_Recv[0] = false; } if (reqOutstanding_Recv[1]) { waitForMPIReq(&mpiRequest_Recv[1]); reqOutstanding_Recv[1] = false; } } } for (int fragmentIdx = 0; fragmentIdx < 1; ++fragmentIdx) { if (isValidForSubdomain[0]) { if ((neighbor_isValid[0][0]&&neighbor_isRemote[0][0])) { /* Statements in this Scop: S1098 */ for (int i0 = 0; (i0<=8); i0 += 1) { double* buffer_Recv_0_p1 = (&buffer_Recv[0][(i0*4)]); double* fieldData_LaplaceCoeff_GMRF_0_p1 = (&fieldData_LaplaceCoeff_GMRF[0][(i0*24)]); int i1 = 1; for (; (i1<=3); i1 += 2) { fieldData_LaplaceCoeff_GMRF_0_p1[((i1*6)-5)] = buffer_Recv_0_p1[(i1-1)]; fieldData_LaplaceCoeff_GMRF_0_p1[((i1*6)+1)] = buffer_Recv_0_p1[i1]; } for (; (i1<=4); i1 += 1) { fieldData_LaplaceCoeff_GMRF_0_p1[((i1*6)-5)] = buffer_Recv_0_p1[(i1-1)]; } } } if ((neighbor_isValid[0][1]&&neighbor_isRemote[0][1])) { /* Statements in this Scop: S1099 */ for (int i0 = 0; (i0<=8); i0 += 1) { double* buffer_Recv_1_p1 = (&buffer_Recv[1][(i0*4)]); double* fieldData_LaplaceCoeff_GMRF_0_p1 = (&fieldData_LaplaceCoeff_GMRF[0][(i0*24)]); int i1 = 4; for (; (i1<=6); i1 += 2) { fieldData_LaplaceCoeff_GMRF_0_p1[((i1*6)-20)] = buffer_Recv_1_p1[(i1-4)]; fieldData_LaplaceCoeff_GMRF_0_p1[((i1*6)-14)] = buffer_Recv_1_p1[(i1-3)]; } for (; (i1<=7); i1 += 1) { fieldData_LaplaceCoeff_GMRF_0_p1[((i1*6)-20)] = buffer_Recv_1_p1[(i1-4)]; } } } } } ; ; for (int fragmentIdx = 0; fragmentIdx < 1; ++fragmentIdx) { if (isValidForSubdomain[0]) { if (reqOutstanding_Send[0]) { waitForMPIReq(&mpiRequest_Send[0]); reqOutstanding_Send[0] = false; } if (reqOutstanding_Send[1]) { waitForMPIReq(&mpiRequest_Send[1]); reqOutstanding_Send[1] = false; } } } for (int fragmentIdx = 0; fragmentIdx < 1; ++fragmentIdx) { if (isValidForSubdomain[0]) { ; ; } } for (int fragmentIdx = 0; fragmentIdx < 1; ++fragmentIdx) { if (isValidForSubdomain[0]) { if ((neighbor_isValid[0][2]&&neighbor_isRemote[0][2])) { MPI_Isend(&fieldData_LaplaceCoeff_GMRF[0][13], 1, mpiDatatype_9_4_24, neighbor_remoteRank[0][2], ((unsigned int)commId << 16) + ((unsigned int)(neighbor_fragCommId[0][2]) & 0x0000ffff), mpiCommunicator, &mpiRequest_Send[2]); reqOutstanding_Send[2] = true; } if ((neighbor_isValid[0][3]&&neighbor_isRemote[0][3])) { MPI_Isend(&fieldData_LaplaceCoeff_GMRF[0][7], 1, mpiDatatype_9_4_24, neighbor_remoteRank[0][3], ((unsigned int)commId << 16) + ((unsigned int)(neighbor_fragCommId[0][3]) & 0x0000ffff), mpiCommunicator, &mpiRequest_Send[3]); reqOutstanding_Send[3] = true; } } } ; for (int fragmentIdx = 0; fragmentIdx < 1; ++fragmentIdx) { if (isValidForSubdomain[0]) { if ((neighbor_isValid[0][2]&&neighbor_isRemote[0][2])) { MPI_Irecv(&fieldData_LaplaceCoeff_GMRF[0][1], 1, mpiDatatype_9_4_24, neighbor_remoteRank[0][2], ((unsigned int)(neighbor_fragCommId[0][2]) << 16) + ((unsigned int)commId & 0x0000ffff), mpiCommunicator, &mpiRequest_Recv[2]); reqOutstanding_Recv[2] = true; } if ((neighbor_isValid[0][3]&&neighbor_isRemote[0][3])) { MPI_Irecv(&fieldData_LaplaceCoeff_GMRF[0][19], 1, mpiDatatype_9_4_24, neighbor_remoteRank[0][3], ((unsigned int)(neighbor_fragCommId[0][3]) << 16) + ((unsigned int)commId & 0x0000ffff), mpiCommunicator, &mpiRequest_Recv[3]); reqOutstanding_Recv[3] = true; } } } for (int fragmentIdx = 0; fragmentIdx < 1; ++fragmentIdx) { if (isValidForSubdomain[0]) { if (reqOutstanding_Recv[2]) { waitForMPIReq(&mpiRequest_Recv[2]); reqOutstanding_Recv[2] = false; } if (reqOutstanding_Recv[3]) { waitForMPIReq(&mpiRequest_Recv[3]); reqOutstanding_Recv[3] = false; } } } for (int fragmentIdx = 0; fragmentIdx < 1; ++fragmentIdx) { if (isValidForSubdomain[0]) { ; ; } } ; ; for (int fragmentIdx = 0; fragmentIdx < 1; ++fragmentIdx) { if (isValidForSubdomain[0]) { if (reqOutstanding_Send[2]) { waitForMPIReq(&mpiRequest_Send[2]); reqOutstanding_Send[2] = false; } if (reqOutstanding_Send[3]) { waitForMPIReq(&mpiRequest_Send[3]); reqOutstanding_Send[3] = false; } } } }
/* Sets error_code to MPI_SUCCESS if successful, or creates an error code * in the case of error. */ static void ADIOI_W_Exchange_data(ADIO_File fd, void *buf, char *write_buf, ADIOI_Flatlist_node *flat_buf, ADIO_Offset *offset_list, ADIO_Offset *len_list, int *send_size, int *recv_size, ADIO_Offset off, int size, int *count, int *start_pos, int *partial_recv, int *sent_to_proc, int nprocs, int myrank, int buftype_is_contig, int contig_access_count, ADIO_Offset min_st_offset, ADIO_Offset fd_size, ADIO_Offset *fd_start, ADIO_Offset *fd_end, ADIOI_Access *others_req, int *send_buf_idx, int *curr_to_proc, int *done_to_proc, int *hole, int iter, MPI_Aint buftype_extent, int *buf_idx, int *error_code) { int i, j, k, *tmp_len, nprocs_recv, nprocs_send, err; char **send_buf = NULL; MPI_Request *requests, *send_req; MPI_Datatype *recv_types; MPI_Status *statuses, status; int *srt_len=NULL, sum; ADIO_Offset *srt_off=NULL; static char myname[] = "ADIOI_W_EXCHANGE_DATA"; /* exchange recv_size info so that each process knows how much to send to whom. */ MPI_Alltoall(recv_size, 1, MPI_INT, send_size, 1, MPI_INT, fd->comm); /* create derived datatypes for recv */ nprocs_recv = 0; for (i=0; i<nprocs; i++) if (recv_size[i]) nprocs_recv++; recv_types = (MPI_Datatype *) ADIOI_Malloc((nprocs_recv+1)*sizeof(MPI_Datatype)); /* +1 to avoid a 0-size malloc */ tmp_len = (int *) ADIOI_Malloc(nprocs*sizeof(int)); j = 0; for (i=0; i<nprocs; i++) { if (recv_size[i]) { /* take care if the last off-len pair is a partial recv */ if (partial_recv[i]) { k = start_pos[i] + count[i] - 1; tmp_len[i] = others_req[i].lens[k]; others_req[i].lens[k] = partial_recv[i]; } ADIOI_Type_create_hindexed_x(count[i], &(others_req[i].lens[start_pos[i]]), &(others_req[i].mem_ptrs[start_pos[i]]), MPI_BYTE, recv_types+j); /* absolute displacements; use MPI_BOTTOM in recv */ MPI_Type_commit(recv_types+j); j++; } } /* To avoid a read-modify-write, check if there are holes in the data to be written. For this, merge the (sorted) offset lists others_req using a heap-merge. */ sum = 0; for (i=0; i<nprocs; i++) sum += count[i]; /* valgrind-detcted optimization: if there is no work on this process we do * not need to search for holes */ if (sum) { srt_off = (ADIO_Offset *) ADIOI_Malloc(sum*sizeof(ADIO_Offset)); srt_len = (int *) ADIOI_Malloc(sum*sizeof(int)); ADIOI_Heap_merge(others_req, count, srt_off, srt_len, start_pos, nprocs, nprocs_recv, sum); } /* for partial recvs, restore original lengths */ for (i=0; i<nprocs; i++) if (partial_recv[i]) { k = start_pos[i] + count[i] - 1; others_req[i].lens[k] = tmp_len[i]; } ADIOI_Free(tmp_len); /* check if there are any holes. If yes, must do read-modify-write. * holes can be in three places. 'middle' is what you'd expect: the * processes are operating on noncontigous data. But holes can also show * up at the beginning or end of the file domain (see John Bent ROMIO REQ * #835). Missing these holes would result in us writing more data than * recieved by everyone else. */ *hole = 0; if (sum) { if (off != srt_off[0]) /* hole at the front */ *hole = 1; else { /* coalesce the sorted offset-length pairs */ for (i=1; i<sum; i++) { if (srt_off[i] <= srt_off[0] + srt_len[0]) { /* ok to cast: operating on cb_buffer_size chunks */ int new_len = (int)srt_off[i] + srt_len[i] - (int)srt_off[0]; if (new_len > srt_len[0]) srt_len[0] = new_len; } else break; } if (i < sum || size != srt_len[0]) /* hole in middle or end */ *hole = 1; } ADIOI_Free(srt_off); ADIOI_Free(srt_len); } if (nprocs_recv) { if (*hole) { ADIO_ReadContig(fd, write_buf, size, MPI_BYTE, ADIO_EXPLICIT_OFFSET, off, &status, &err); /* --BEGIN ERROR HANDLING-- */ if (err != MPI_SUCCESS) { *error_code = MPIO_Err_create_code(err, MPIR_ERR_RECOVERABLE, myname, __LINE__, MPI_ERR_IO, "**ioRMWrdwr", 0); return; } /* --END ERROR HANDLING-- */ } } nprocs_send = 0; for (i=0; i < nprocs; i++) if (send_size[i]) nprocs_send++; if (fd->atomicity) { /* bug fix from Wei-keng Liao and Kenin Coloma */ requests = (MPI_Request *) ADIOI_Malloc((nprocs_send+1)*sizeof(MPI_Request)); send_req = requests; } else { requests = (MPI_Request *) ADIOI_Malloc((nprocs_send+nprocs_recv+1)*sizeof(MPI_Request)); /* +1 to avoid a 0-size malloc */ /* post receives */ j = 0; for (i=0; i<nprocs; i++) { if (recv_size[i]) { MPI_Irecv(MPI_BOTTOM, 1, recv_types[j], i, myrank+i+100*iter, fd->comm, requests+j); j++; } } send_req = requests + nprocs_recv; } /* post sends. if buftype_is_contig, data can be directly sent from user buf at location given by buf_idx. else use send_buf. */ #ifdef AGGREGATION_PROFILE MPE_Log_event (5032, 0, NULL); #endif if (buftype_is_contig) { j = 0; for (i=0; i < nprocs; i++) if (send_size[i]) { MPI_Isend(((char *) buf) + buf_idx[i], send_size[i], MPI_BYTE, i, myrank+i+100*iter, fd->comm, send_req+j); j++; buf_idx[i] += send_size[i]; } } else if (nprocs_send) { /* buftype is not contig */ send_buf = (char **) ADIOI_Malloc(nprocs*sizeof(char*)); for (i=0; i < nprocs; i++) if (send_size[i]) send_buf[i] = (char *) ADIOI_Malloc(send_size[i]); ADIOI_Fill_send_buffer(fd, buf, flat_buf, send_buf, offset_list, len_list, send_size, send_req, sent_to_proc, nprocs, myrank, contig_access_count, min_st_offset, fd_size, fd_start, fd_end, send_buf_idx, curr_to_proc, done_to_proc, iter, buftype_extent); /* the send is done in ADIOI_Fill_send_buffer */ } if (fd->atomicity) { /* bug fix from Wei-keng Liao and Kenin Coloma */ j = 0; for (i=0; i<nprocs; i++) { MPI_Status wkl_status; if (recv_size[i]) { MPI_Recv(MPI_BOTTOM, 1, recv_types[j], i, myrank+i+100*iter, fd->comm, &wkl_status); j++; } } } for (i=0; i<nprocs_recv; i++) MPI_Type_free(recv_types+i); ADIOI_Free(recv_types); if (fd->atomicity) { /* bug fix from Wei-keng Liao and Kenin Coloma */ statuses = (MPI_Status *) ADIOI_Malloc((nprocs_send+1) * \ sizeof(MPI_Status)); /* +1 to avoid a 0-size malloc */ } else { statuses = (MPI_Status *) ADIOI_Malloc((nprocs_send+nprocs_recv+1) * \ sizeof(MPI_Status)); /* +1 to avoid a 0-size malloc */ } #ifdef NEEDS_MPI_TEST i = 0; if (fd->atomicity) { /* bug fix from Wei-keng Liao and Kenin Coloma */ while (!i) MPI_Testall(nprocs_send, send_req, &i, statuses); } else { while (!i) MPI_Testall(nprocs_send+nprocs_recv, requests, &i, statuses); } #else if (fd->atomicity) /* bug fix from Wei-keng Liao and Kenin Coloma */ MPI_Waitall(nprocs_send, send_req, statuses); else MPI_Waitall(nprocs_send+nprocs_recv, requests, statuses); #endif #ifdef AGGREGATION_PROFILE MPE_Log_event (5033, 0, NULL); #endif ADIOI_Free(statuses); ADIOI_Free(requests); if (!buftype_is_contig && nprocs_send) { for (i=0; i < nprocs; i++) if (send_size[i]) ADIOI_Free(send_buf[i]); ADIOI_Free(send_buf); } }
int main(int argc, char **argv) { /* Variable declaration */ float *globalArray = NULL, *bBucket = NULL, **sBucket = NULL, *tempArray = NULL, value; int dataPerProc, numprocs, rank, i, p, n, size, maxTempSize, assignedIndex, bSize, bMaxSize, bTotal, *sSize, sMaxSize, sTotal, sAssigned; MPI_Status status, recvStatus; MPI_Request sendRequest, recvRequest; /* Initliaze MPI */ MPI_Init(&argc, &argv); MPI_Comm_size(MPI_COMM_WORLD, &numprocs); MPI_Comm_rank(MPI_COMM_WORLD, &rank ); /* Initializing from CL arguments */ n = getProblemSize(argc, argv, rank, numprocs); if(n==-1) { MPI_Finalize(); return EXIT_FAILURE; } else { dataPerProc = n/numprocs; maxTempSize = n; bMaxSize = n; sMaxSize = dataPerProc; bTotal = numprocs; sTotal = numprocs; } /* Initializing buckets and arrays */ if(rank==0) globalArray = initialiseRandomList(n); tempArray = (float*) malloc(sizeof(float)*maxTempSize); bBucket = (float*) malloc(sizeof(float)*bMaxSize); sBucket = (float**) malloc(sizeof(float*)*sTotal); for(i=0; i<sTotal; i++) sBucket[i] = (float*) malloc(sizeof(float)*sMaxSize); bSize = dataPerProc; sSize = (int*) calloc(numprocs, sizeof(int)); /* Start timer */ double startTime = MPI_Wtime(); if( rank==0 ) printf( "Starting iteration; may take a few seconds ...\n" ); /* Scatter global array into big buckets */ displayFullList(globalArray, rank, numprocs, n); MPI_Scatter(globalArray, dataPerProc, MPI_FLOAT, bBucket, dataPerProc, MPI_FLOAT, 0, MPI_COMM_WORLD); displayBigBuckets(bBucket, bSize, rank, numprocs, n); /* Pour each rank's big buckets into the correct small bucket */ /* Step 1 to Step 2 of Lecture 8 */ for(i=0; i<dataPerProc; i++) { value = bBucket[i]; sAssigned = (int) (value * numprocs); if(sAssigned == numprocs) sAssigned--; /* Resolves a bug cropping due to numerical errors; * e.g. int(0.99 * 1) = 1 but first bucket is index 0*/ assignedIndex = sSize[sAssigned]; sBucket[sAssigned][assignedIndex] = value; sSize[sAssigned] += 1; } // for(i=0; i<sTotal; i++) printf("\nRank %i: sBucket=%i, Size=%i\n", rank, i, sSize[i]); displaySmallBuckets(sBucket, sSize, rank, numprocs, n); /* Pour each rank's small bucket back into the correct big bucket * The use of non-blocking communication to prevent deadlock when * problem size is rather large (although there exists a way solution * that doesn't involve non-blocking communication)*/ /* Step 2 to Step 3 of Lecture 8 */ for(i=0; i<bMaxSize; i++) bBucket[i] = 0.0; bSize = 0; for(p=0; p<sTotal; p++) { if(p==rank) { for(i=0; i<sSize[p]; i++) bBucket[bSize + i] = sBucket[p][i]; bSize += sSize[p]; } else { MPI_Isend(sBucket[p], sSize[p], MPI_FLOAT, p, 0, MPI_COMM_WORLD, &sendRequest); MPI_Irecv(tempArray, maxTempSize, MPI_FLOAT, p, 0, MPI_COMM_WORLD, &recvRequest); MPI_Wait(&recvRequest, &recvStatus); MPI_Get_count(&recvStatus, MPI_FLOAT, &size); for(i=0; i<size; i++) bBucket[bSize + i] = tempArray[i]; bSize += size; } } /* All small buckets should pour their entire contents into the big buckets * before just before serial sorting of big buckets */ MPI_Barrier(MPI_COMM_WORLD); displayBigBuckets(bBucket, bSize, rank, numprocs, n); /* Swirl each rank's big bucket until sorted */ /* Step 3 to Step 4 of Lecture 8 */ serialQuicksort(bBucket, 0, bSize); displayBigBuckets(bBucket, bSize, rank, numprocs, n); /* Concatenate each rank's big bucket */ /* Step 4 to Step 5 of Lecture 8 */ if(rank!=0) MPI_Send(bBucket, bSize, MPI_FLOAT, 0, 0, MPI_COMM_WORLD); else { for(p=1; p<bTotal; p++) { MPI_Recv(tempArray, maxTempSize, MPI_FLOAT, p, 0, MPI_COMM_WORLD, &status); MPI_Get_count(&status, MPI_FLOAT, &size); for(i=0; i<size; i++) bBucket[bSize + i] = tempArray[i]; bSize += size; } globalArray = bBucket; } /* End timer */ double timeTaken = MPI_Wtime() - startTime; if( rank==0 ) { printf( "Finished. Time taken: %g seconds\n", timeTaken ); // FILE *f = fopen("data.txt", "a"); // fprintf(f, "%g\n", timeTaken); } /* Display the final (hopefully sorted) list, and check all entries are indeed in order. */ displayFullList(globalArray,rank,numprocs,n); /* Again, nothing is displayed if n>100. */ if(rank==0) { for(i=0; i<n-1; i++) if(globalArray[i] > globalArray[i+1]) { printf("List not sorted correctly.\n"); break; } if(i==n-1) printf("List correctly sorted.\n"); } /* Clear up and quit. As ever, each malloc() needs a free(). */ free(bBucket); // Also frees global array for(i=0; i<sTotal; i++) free(sBucket[i]); free(sBucket); free(sSize); free(tempArray); MPI_Finalize(); return EXIT_SUCCESS; }
/* * Performs sparse matrix-vector multiplication. */ void pdgsmv ( int_t abs, /* Input. Do abs(A)*abs(x). */ SuperMatrix *A_internal, /* Input. Matrix A permuted by columns. The column indices are translated into the relative positions in the gathered x-vector. The type of A can be: Stype = NR_loc; Dtype = SLU_D; Mtype = GE. */ gridinfo_t *grid, /* Input */ pdgsmv_comm_t *gsmv_comm, /* Input. The data structure for communication. */ double x[], /* Input. The distributed source vector */ double ax[] /* Output. The distributed destination vector */ ) { NRformat_loc *Astore; int iam, procs; int_t i, j, p, m, m_loc, n, fst_row, jcol; int_t *colind, *rowptr; int *SendCounts, *RecvCounts; int_t *ind_tosend, *ind_torecv, *ptr_ind_tosend, *ptr_ind_torecv; int_t *extern_start, TotalValSend; double *nzval, *val_tosend, *val_torecv; double zero = 0.0; MPI_Request *send_req, *recv_req; MPI_Status status; #if ( DEBUGlevel>=1 ) CHECK_MALLOC(grid->iam, "Enter pdgsmv()"); #endif /* ------------------------------------------------------------ INITIALIZATION. ------------------------------------------------------------*/ iam = grid->iam; procs = grid->nprow * grid->npcol; Astore = (NRformat_loc *) A_internal->Store; m = A_internal->nrow; n = A_internal->ncol; m_loc = Astore->m_loc; fst_row = Astore->fst_row; colind = Astore->colind; rowptr = Astore->rowptr; nzval = (double *) Astore->nzval; extern_start = gsmv_comm->extern_start; ind_torecv = gsmv_comm->ind_torecv; ptr_ind_tosend = gsmv_comm->ptr_ind_tosend; ptr_ind_torecv = gsmv_comm->ptr_ind_torecv; SendCounts = gsmv_comm->SendCounts; RecvCounts = gsmv_comm->RecvCounts; val_tosend = (double *) gsmv_comm->val_tosend; val_torecv = (double *) gsmv_comm->val_torecv; TotalValSend = gsmv_comm->TotalValSend; /* ------------------------------------------------------------ COPY THE X VALUES INTO THE SEND BUFFER. ------------------------------------------------------------*/ for (i = 0; i < TotalValSend; ++i) { j = ind_torecv[i] - fst_row; /* Relative index in x[] */ val_tosend[i] = x[j]; } /* ------------------------------------------------------------ COMMUNICATE THE X VALUES. ------------------------------------------------------------*/ if ( !(send_req = (MPI_Request *) SUPERLU_MALLOC(2*procs *sizeof(MPI_Request)))) ABORT("Malloc fails for recv_req[]."); recv_req = send_req + procs; for (p = 0; p < procs; ++p) { if ( RecvCounts[p] ) { MPI_Isend(&val_tosend[ptr_ind_torecv[p]], RecvCounts[p], MPI_DOUBLE, p, iam, grid->comm, &send_req[p]); } if ( SendCounts[p] ) { MPI_Irecv(&val_torecv[ptr_ind_tosend[p]], SendCounts[p], MPI_DOUBLE, p, p, grid->comm, &recv_req[p]); } } /* ------------------------------------------------------------ PERFORM THE ACTUAL MULTIPLICATION. ------------------------------------------------------------*/ if ( abs ) { /* Perform abs(A)*abs(x) */ /* Multiply the local part. */ for (i = 0; i < m_loc; ++i) { /* Loop through each row */ ax[i] = 0.0; for (j = rowptr[i]; j < extern_start[i]; ++j) { jcol = colind[j]; ax[i] += fabs(nzval[j]) * fabs(x[jcol]); } } for (p = 0; p < procs; ++p) { if ( RecvCounts[p] ) MPI_Wait(&send_req[p], &status); if ( SendCounts[p] ) MPI_Wait(&recv_req[p], &status); } /* Multiply the external part. */ for (i = 0; i < m_loc; ++i) { /* Loop through each row */ for (j = extern_start[i]; j < rowptr[i+1]; ++j) { jcol = colind[j]; ax[i] += fabs(nzval[j]) * fabs(val_torecv[jcol]); } } } else { /* Multiply the local part. */ for (i = 0; i < m_loc; ++i) { /* Loop through each row */ ax[i] = zero; for (j = rowptr[i]; j < extern_start[i]; ++j) { jcol = colind[j]; ax[i] += nzval[j] * x[jcol]; } } for (p = 0; p < procs; ++p) { if ( RecvCounts[p] ) MPI_Wait(&send_req[p], &status); if ( SendCounts[p] ) MPI_Wait(&recv_req[p], &status); } /* Multiply the external part. */ for (i = 0; i < m_loc; ++i) { /* Loop through each row */ for (j = extern_start[i]; j < rowptr[i+1]; ++j) { jcol = colind[j]; ax[i] += nzval[j] * val_torecv[jcol]; } } } SUPERLU_FREE(send_req); #if ( DEBUGlevel>=1 ) CHECK_MALLOC(iam, "Exit pdgsmv()"); #endif } /* PDGSMV */
int main(int argc, char **argv){ uint32_t datarows, datacolumns; uint32_t i, j, k; int world_size, world_rank, rc; //Check input arguments if (argc != 2) { fprintf(stderr,"USAGE: %s <input_filename>\n", argv[0]); exit(1); } // Start MPI rc = MPI_Init(&argc,&argv); if (rc != MPI_SUCCESS) { printf ("Error starting MPI program. Terminating.\n"); MPI_Abort(MPI_COMM_WORLD, rc); } // Get world size (number of MPI processes) and world rank (# of this process) MPI_Comm_size(MPI_COMM_WORLD,&world_size); MPI_Comm_rank(MPI_COMM_WORLD,&world_rank); if (world_rank==0){ // Declare variables used only on the root node int buf[world_size-1], nextReady; MPI_Request reqs[world_size-1]; MPI_Status stats[world_size-1]; // Print format of output printf("Kv\tMbulk\tTliq\tTsatb\tTf\tTsat\tZrsat\tZrf\tFf\tSiO2\tZrbulk\tMZr\tTcryst\n"); // Import 2-d source data array as a flat double array. Format: // SiO2, TiO2, Al2O3, Fe2O3, Cr2O3, FeO, MnO, MgO, NiO, CoO, CaO, Na2O, K2O, P2O5, CO2, H2O, Zr, Kv; double** const data = csvparse(argv[1],',', &datarows, &datacolumns); // Listen for task requests from the worker nodes for (i=1; i<world_size; i++){ // *buf, count, datatype, dest, tag, comm, *request MPI_Irecv(&buf[i-1], 1, MPI_INT, i, 0, MPI_COMM_WORLD, &reqs[i-1]); } // Once any worker asks for a new task, send next task to that worker and keep listening for (i=0; i<datarows; i++){ MPI_Waitany(world_size-1, reqs, &nextReady, stats); // *buf, count, datatype, dest, tag, comm MPI_Send(data[i], 18, MPI_DOUBLE, nextReady+1, 1, MPI_COMM_WORLD); // *buf, count, datatype, source, tag, comm, *request MPI_Irecv(&buf[nextReady], 1, MPI_INT, nextReady+1, 0, MPI_COMM_WORLD, &reqs[nextReady]); } // Wait for all workers to complete, then send the stop signal MPI_Waitall(world_size-1, reqs, stats); double stop[18] = {-1}; for (i=1; i<world_size; i++){ MPI_Send(&stop, 18, MPI_DOUBLE, i, 1, MPI_COMM_WORLD); } } else { // Declare variables used only on the worker nodes MPI_Request sReq; MPI_Status sStat; double ic[18], Kd, iKd; FILE *fp; // char prefix[200], cmd_string[500]; char* prefix = malloc(500*sizeof(char)); char* cmd_string = malloc(1000*sizeof(char)); // Simulation parameters /**********************************************************/ // Version to run MELTS in (MELTS or pMELTS) const char version[]="pMELTS"; // Melts mode (isobaric, ptpath, etc) const char mode[]="isobaric"; // fO2 buffer to use (None, FMQ, etc.) const char fo2Buffer[]="FMQ"; // fO2 offset from buffer double fo2Delta=1; // Initial temperature (Celcius) double Ti=1700; //Initial Pressure (bar) double Pi=600; //Temperature step size in each simulation const int deltaT=-10; // Pressure step size; const int deltaP=0; // Stop simulations at a given percent melt const double minPercentMelt=10; // Variables that control size and location of the simulation /***********************************************************/ // Location of scratch directory (ideally local scratch for each node) // This location may vary on your system - contact your sysadmin if unsure // const char scratchdir[]="/scratch/gpfs/cbkeller/"; const char scratchdir[]="/scratch/"; // Variables that determine how much memory to allocate to imported results const int maxMinerals=100, maxSteps=1700/abs(deltaT), maxColumns=50; /***********************************************************/ // Malloc space for the imported melts array double **rawMatrix=mallocDoubleArray(maxMinerals*maxSteps,maxColumns); double ***melts=malloc(maxMinerals*sizeof(double**)); char **names=malloc(maxMinerals*sizeof(char*)); char ***elements=malloc(maxMinerals*sizeof(char**)); int *meltsrows=malloc(maxMinerals*sizeof(int)), *meltscolumns=malloc(maxMinerals*sizeof(int)); for (i=0; i<maxMinerals; i++){ names[i]=malloc(30*sizeof(char)); elements[i]=malloc(maxColumns*sizeof(char*)); for (k=0; k<maxColumns; k++){ elements[i][k]=malloc(30*sizeof(char)); } } int minerals; // Variables for finding saturation temperature int row, col, P, T, mass, SiO2, TiO2, Al2O3, Fe2O3, Cr2O3, FeO, MnO, MgO, NiO, CoO, CaO, Na2O, K2O, P2O5, CO2, H2O; int fspCaO, fspNa2O, fspK2O, oxideTiO2, oxideFe2O3, oxideFeO, oxideMnO; double M, Tf, Tsat, Tsatbulk, Ts, Tsmax, Zrf, Zrsat, MZr, MZrnow, Tcryst; double AnKd, AbKd, OrKd, IlmKd, MtKd; while (1) { // Ask root node for new task // *buf, count, datatype, dest, tag, comm, *request MPI_Isend(&world_rank, 1, MPI_INT, 0, 0, MPI_COMM_WORLD, &sReq); // *buf, count, datatype, source, tag, comm, *status MPI_Recv(&ic, 18, MPI_DOUBLE, 0, 1, MPI_COMM_WORLD, &sStat); // Exit loop if stop signal recieved if (ic[0]<0) break; //Configure working directory sprintf(prefix,"%sout%i_%.0f/", scratchdir, world_rank, ic[17]); sprintf(cmd_string,"mkdir -p %s", prefix); system(cmd_string); // //Set water // ic[15]=3.0; // //Set CO2 // ic[14]=0.1; //Run MELTS runmelts(prefix,ic,version,"isobaric",fo2Buffer,fo2Delta,"1\nsc.melts\n10\n1\n3\n1\nliquid\n1\n0.99\n1\n10\n0\n4\n0\n","","!",Ti,Pi,deltaT,deltaP,0.005); // If simulation failed, clean up scratch directory and move on to next simulation sprintf(cmd_string,"%sPhase_main_tbl.txt", prefix); if ((fp = fopen(cmd_string, "r")) == NULL) { fprintf(stderr, "%s : MELTS equilibration failed to produce output.\n", prefix); sprintf(cmd_string,"rm -r %s", prefix); system(cmd_string); continue; } // Import results, if they exist. Format: // Pressure Temperature mass S H V Cp viscosity SiO2 TiO2 Al2O3 Fe2O3 Cr2O3 FeO MnO MgO NiO CoO CaO Na2O K2O P2O5 H2O minerals=maxMinerals; importmelts(maxSteps, maxColumns, prefix, melts, rawMatrix, meltsrows, meltscolumns, names, elements, &minerals); if (minerals<1 | strcmp(names[0],"liquid_0")!=0) { fprintf(stderr, "%s : MELTS equilibration failed to calculate liquid composition.\n", prefix); sprintf(cmd_string,"rm -r %s", prefix); system(cmd_string); continue; } // Can delete temp files after we've read them sprintf(cmd_string,"rm -r %s", prefix); system(cmd_string); // Find the columns containing useful elements for(col=0; col<meltscolumns[0]; col++){ if (strcmp(elements[0][col], "Pressure")==0) P=col; else if (strcmp(elements[0][col], "Temperature")==0) T=col; else if (strcmp(elements[0][col], "mass")==0) mass=col; else if (strcmp(elements[0][col], "SiO2")==0) SiO2=col; else if (strcmp(elements[0][col], "TiO2")==0) TiO2=col; else if (strcmp(elements[0][col], "Al2O3")==0) Al2O3=col; else if (strcmp(elements[0][col], "Fe2O3")==0) Fe2O3=col; else if (strcmp(elements[0][col], "Cr2O3")==0) Cr2O3=col; else if (strcmp(elements[0][col], "FeO")==0) FeO=col; else if (strcmp(elements[0][col], "MnO")==0) MnO=col; else if (strcmp(elements[0][col], "MgO")==0) MgO=col; else if (strcmp(elements[0][col], "NiO")==0) NiO=col; else if (strcmp(elements[0][col], "CoO")==0) CoO=col; else if (strcmp(elements[0][col], "CaO")==0) CaO=col; else if (strcmp(elements[0][col], "Na2O")==0) Na2O=col; else if (strcmp(elements[0][col], "K2O")==0) K2O=col; else if (strcmp(elements[0][col], "P2O5")==0) P2O5=col; else if (strcmp(elements[0][col], "CO2")==0) CO2=col; else if (strcmp(elements[0][col], "H2O")==0) H2O=col; } // Find the columns containing useful elements for other minerals for (i=1; i<minerals; i++){ if (strncasecmp(names[i],"feldspar",8)==0){ for(col=0; col<meltscolumns[i]; col++){ if (strcmp(elements[i][col], "CaO")==0) fspCaO=col; else if (strcmp(elements[i][col], "Na2O")==0) fspNa2O=col; else if (strcmp(elements[i][col], "K2O")==0) fspK2O=col; } } else if (strncasecmp(names[i],"rhm_oxide",9)==0){ for(col=0; col<meltscolumns[i]; col++){ if (strcmp(elements[i][col], "TiO2")==0) oxideTiO2=col; else if (strcmp(elements[i][col], "Fe2O3")==0) oxideFe2O3=col; else if (strcmp(elements[i][col], "FeO")==0) oxideFeO=col; else if (strcmp(elements[i][col], "MnO")==0) oxideMnO=col; } } } // Initial saturation state M = meltsM(&melts[0][0][SiO2]); Zrf = ic[16]; // Zirconium content in melt Tf = melts[0][0][T]; // Current temperature Zrsat = tzircZr(M, Tf); // Zirconium required for saturation Tsatbulk = tzirc(M, Zrf); // Temperature required for saturation // Calculate saturation temperature and minimum necessary zirconium content Tsat=0; Tcryst=0; MZr=0; Tsmax = Tsatbulk; for(row=1; row<(meltsrows[0]-1); row++){ // Calculate bulk zircon partition coefficient at present step Kd = 0; for (i=1; i<minerals; i++){ // See what minerals might be crystallizing at this temperature // so we can find their GERM partition coefficients for (j=0; j<meltsrows[i]; j++){ if (fabs(melts[0][row][T]-melts[i][j][T]) < 0.01){ if (strncasecmp(names[i],"feldspar",8)==0){ AnKd = getGERMKd("AnKdorthite","Zr",melts[0][row][SiO2]); AbKd = getGERMKd("Albite","Zr",melts[0][row][SiO2]); OrKd = getGERMKd("Orthoclase","Zr",melts[0][row][SiO2]); if (isnan(AnKd)) AnKd=0; if (isnan(OrKd)) OrKd=0; if (isnan(AbKd)) AbKd = (AnKd + OrKd)/2; iKd = (220.1298+56.18)/56.18*melts[i][j][fspCaO]/100 * AnKd\ +(228.2335+30.99)/30.99*melts[i][j][fspNa2O]/100 * AbKd\ +(228.2335+47.1)/47.1*melts[i][j][fspK2O]/100 * OrKd; } else if (strncasecmp(names[i],"rhm_oxide",9)==0){ IlmKd = getGERMKd("Ilmenite","Zr",melts[0][row][SiO2]); MtKd = getGERMKd("Magnetite","Zr",melts[0][row][SiO2]); if (isnan(IlmKd)) IlmKd = 0; if (isnan(MtKd)) MtKd = 0; iKd = (melts[i][j][oxideTiO2]+melts[i][j][oxideMnO]+(melts[i][j][oxideTiO2]\ *(71.8444/79.8768)-melts[i][j][oxideMnO]*(71.8444/70.9374)))/100 * AnKd\ + (1 - (melts[i][j][oxideTiO2]+melts[i][j][oxideMnO]+(melts[i][j][oxideTiO2]\ *(71.8444/79.8768)-melts[i][j][oxideMnO]*(71.8444/70.9374)))/100) * MtKd; } else { iKd = getGERMKd(names[i],"Zr",melts[0][row][SiO2]); } if (isnan(iKd)){iKd = 0;} Kd += iKd * melts[i][j][mass]; } } } Kd = Kd / (100 - melts[0][row][mass]); //Calculate melt M and [Zr] M = meltsM(&melts[0][row][SiO2]); Zrf = ic[16]*100/(melts[0][row][mass] + Kd*(100-melts[0][row][mass])); // Zirconium content in melt Tf = melts[0][row][T]; // Current temperature Zrsat = tzircZr(M, Tf); // Zirconium required for saturation Ts = tzirc(M, Zrf); // Temperature required for saturation // Determine how much zircon is saturated if (Zrf>Zrsat){ MZrnow = melts[0][row][mass]/100*(Zrf-Zrsat); if (MZr < MZrnow){ Tcryst += (MZrnow - MZr)*melts[0][row][T]; MZr = MZrnow; } } // Keep track of maximum saturation temperature if (Ts > Tsmax){ Tsmax = Ts; } // Check if we've cooled below the saturation temperature yet if (Tsat==0 && Ts > melts[0][row][T]){ Tsat = Ts; } // Stop when we get to maximum SiO2 if (melts[0][row-1][SiO2]>(melts[0][row][SiO2])+0.01){ row--; break; } // Or when remaining melt falls below minimum percent if (melts[0][row][mass]<minPercentMelt){ row--; break; } } // If zircon never saturated, check what the best (highest) saturation temperature was if (Tsat==0 || MZr==0){ Tsat = Tsmax; Tcryst = NAN; } else { Tcryst = Tcryst / MZr; } // Get back bulk M M = meltsM(&melts[0][0][SiO2]); // Print results. Format: // Kv, Mbulk, Tliquidus, Tsatbulk, Tf, Tsat, Zrsat, Zrf, Ff, SiO2, Zrbulk, MZr, Tcryst printf("%g\t%g\t%g\t%g\t%g\t%g\t%g\t%g\t%g\t%g\t%g\t%g\t%g\n", ic[17], M, melts[0][0][T], Tsatbulk, Tf, Tsat, Zrsat, Zrf, melts[0][row][mass], melts[0][0][SiO2], ic[16], MZr, Tcryst); } } MPI_Finalize(); return 0; }
int main( int argc, char **argv ) { int locId ; int data [i_ntotin] ; MPI_Init(&argc, &argv) ; MPI_Comm_rank(MPI_COMM_WORLD, &locId) ; if(locId == 0) { /* The server... */ MPI_Status status[2] ; MPI_Request events [2] ; int eventId ; int dstId = 1 ; int i ; for(i = 0 ; i < i_ntotin ; i++) data [i] = i + 1 ; events [0] = MPI_REQUEST_NULL ; events [1] = MPI_REQUEST_NULL ; MPI_Isend(data, i_ntotin, MPI_INT, dstId, DAR, MPI_COMM_WORLD, events + 1) ; /* enable send of data */ /*_begin_trace_code */ /* printf("locId = %d: MPI_Isend(%x, %d, %x, %d, %d, %x, %x)\n", locId, data, i_ntotin, MPI_INT, dstId, DAR, MPI_COMM_WORLD, events [1]); */ /*_end_trace_code */ /*_begin_trace_code */ /* printf("locId = %d: MPI_Waitany(%d, [%x, %x], %x %x)...", locId, 2, events [0], events [1], &eventId, &status) ; */ /*_end_trace_code */ MPI_Waitany(2, events, &eventId, status) ; /*_begin_trace_code */ printf("done. eventId = %d\n", eventId) ; /*_end_trace_code */ } if(locId == 1) { /* The Client... */ MPI_Status status ; int srcId = MPI_ANY_SOURCE ; /*_begin_trace_code */ /* printf("locId = %d: MPI_Recv(%x, %d, %x, %d, %d, %x, %x)...", locId, data, i_ntotin, MPI_INT, srcId, DAR, MPI_COMM_WORLD, &status) ; */ /*_end_trace_code */ MPI_Recv(data, i_ntotin, MPI_INT, srcId, DAR, MPI_COMM_WORLD, &status) ; /*_begin_trace_code */ /*printf("done.\n") ;*/ /*_end_trace_code */ /* printf("locId = %d: data [0] = %d, data [%d] = %d\n", locId, data [0], i_ntotin - 1, data [i_ntotin - 1]) ; */ } MPI_Barrier( MPI_COMM_WORLD ); if (locId == 0) printf( "Test complete\n" ); MPI_Finalize() ; return 0; }
void peanoclaw::records::RepositoryStatePacked::send(int destination, int tag, bool exchangeOnlyAttributesMarkedWithParallelise, bool communicateBlocking) { _senderDestinationRank = destination; if (communicateBlocking) { const int result = MPI_Send(this, 1, exchangeOnlyAttributesMarkedWithParallelise ? Datatype : FullDatatype, destination, tag, tarch::parallel::Node::getInstance().getCommunicator()); if (result!=MPI_SUCCESS) { std::ostringstream msg; msg << "was not able to send message peanoclaw::records::RepositoryStatePacked " << toString() << " to node " << destination << ": " << tarch::parallel::MPIReturnValueToString(result); _log.error( "send(int)",msg.str() ); } } else { MPI_Request* sendRequestHandle = new MPI_Request(); MPI_Status status; int flag = 0; int result; clock_t timeOutWarning = -1; clock_t timeOutShutdown = -1; bool triggeredTimeoutWarning = false; if (exchangeOnlyAttributesMarkedWithParallelise) { result = MPI_Isend( this, 1, Datatype, destination, tag, tarch::parallel::Node::getInstance().getCommunicator(), sendRequestHandle ); } else { result = MPI_Isend( this, 1, FullDatatype, destination, tag, tarch::parallel::Node::getInstance().getCommunicator(), sendRequestHandle ); } if (result!=MPI_SUCCESS) { std::ostringstream msg; msg << "was not able to send message peanoclaw::records::RepositoryStatePacked " << toString() << " to node " << destination << ": " << tarch::parallel::MPIReturnValueToString(result); _log.error( "send(int)",msg.str() ); } result = MPI_Test( sendRequestHandle, &flag, &status ); while (!flag) { if (timeOutWarning==-1) timeOutWarning = tarch::parallel::Node::getInstance().getDeadlockWarningTimeStamp(); if (timeOutShutdown==-1) timeOutShutdown = tarch::parallel::Node::getInstance().getDeadlockTimeOutTimeStamp(); result = MPI_Test( sendRequestHandle, &flag, &status ); if (result!=MPI_SUCCESS) { std::ostringstream msg; msg << "testing for finished send task for peanoclaw::records::RepositoryStatePacked " << toString() << " sent to node " << destination << " failed: " << tarch::parallel::MPIReturnValueToString(result); _log.error("send(int)", msg.str() ); } // deadlock aspect if ( tarch::parallel::Node::getInstance().isTimeOutWarningEnabled() && (clock()>timeOutWarning) && (!triggeredTimeoutWarning) ) { tarch::parallel::Node::getInstance().writeTimeOutWarning( "peanoclaw::records::RepositoryStatePacked", "send(int)", destination,tag,1 ); triggeredTimeoutWarning = true; } if ( tarch::parallel::Node::getInstance().isTimeOutDeadlockEnabled() && (clock()>timeOutShutdown) ) { tarch::parallel::Node::getInstance().triggerDeadlockTimeOut( "peanoclaw::records::RepositoryStatePacked", "send(int)", destination,tag,1 ); } tarch::parallel::Node::getInstance().receiveDanglingMessages(); } delete sendRequestHandle; #ifdef Debug _log.debug("send(int,int)", "sent " + toString() ); #endif } }
void connection_handler::handle_messages() { detail::handling_messages hm(handling_messages_); // reset on exit bool bootstrapping = hpx::is_starting(); bool has_work = true; std::size_t k = 0; hpx::util::high_resolution_timer t; std::list<std::pair<int, MPI_Request> > close_requests; // We let the message handling loop spin for another 2 seconds to avoid the // costs involved with posting it to asio while(bootstrapping || has_work || (!has_work && t.elapsed() < 2.0)) { if(stopped_) break; // break the loop if someone requested to pause the parcelport if(!enable_parcel_handling_) break; // handle all send requests { hpx::lcos::local::spinlock::scoped_lock l(senders_mtx_); for( senders_type::iterator it = senders_.begin(); !stopped_ && enable_parcel_handling_ && it != senders_.end(); /**/) { if((*it)->done()) { it = senders_.erase(it); } else { ++it; } } has_work = !senders_.empty(); } // Send the pending close requests { hpx::lcos::local::spinlock::scoped_lock l(close_mtx_); typedef std::pair<int, int> pair_type; BOOST_FOREACH(pair_type p, pending_close_requests_) { header close_request = header::close(p.first, p.second); close_requests.push_back(std::make_pair(p.first, MPI_Request())); MPI_Isend( close_request.data(), // Data pointer close_request.data_size_, // Size close_request.type(), // MPI Datatype close_request.rank(), // Destination 0, // Tag communicator_, // Communicator &close_requests.back().second ); } pending_close_requests_.clear(); } // add new receive requests std::pair<bool, header> next(acceptor_.next_header()); if(next.first) { boost::shared_ptr<receiver> rcv; header h = next.second; receivers_tag_map_type & tag_map = receivers_map_[h.rank()]; receivers_tag_map_type::iterator jt = tag_map.find(h.tag()); if(jt != tag_map.end()) { rcv = jt->second; } else { rcv = boost::make_shared<receiver>( communicator_ , get_next_tag() , h.tag() , h.rank() , *this); tag_map.insert(std::make_pair(h.tag(), rcv)); } if(h.close_request()) { rcv->close(); } else { h.assert_valid(); if (static_cast<std::size_t>(h.size()) > this->get_max_message_size()) { // report this problem ... HPX_THROW_EXCEPTION(boost::asio::error::operation_not_supported, "mpi::connection_handler::handle_messages", "The size of this message exceeds the maximum inbound data size"); return; } if(rcv->async_read(h)) { #ifdef HPX_DEBUG receivers_type::iterator it = std::find(receivers_.begin(), receivers_.end(), rcv); HPX_ASSERT(it == receivers_.end()); #endif receivers_.push_back(rcv); } } } // handle all receive requests for(receivers_type::iterator it = receivers_.begin(); it != receivers_.end(); /**/) { boost::shared_ptr<receiver> rcv = *it; if(rcv->done()) { HPX_ASSERT(rcv->sender_tag() != -1); if(rcv->closing()) { receivers_tag_map_type & tag_map = receivers_map_[rcv->rank()]; receivers_tag_map_type::iterator jt = tag_map.find(rcv->sender_tag()); HPX_ASSERT(jt != tag_map.end()); tag_map.erase(jt); { hpx::lcos::local::spinlock::scoped_lock l(tag_mtx_); free_tags_.push_back(rcv->tag()); } } it = receivers_.erase(it); } else { ++it; } } if(!has_work) has_work = !receivers_.empty(); // handle completed close requests for( std::list<std::pair<int, MPI_Request> >::iterator it = close_requests.begin(); !stopped_ && enable_parcel_handling_ && it != close_requests.end(); ) { int completed = 0; MPI_Status status; int ret = 0; ret = MPI_Test(&it->second, &completed, &status); HPX_ASSERT(ret == MPI_SUCCESS); if(completed && status.MPI_ERROR != MPI_ERR_PENDING) { hpx::lcos::local::spinlock::scoped_lock l(tag_mtx_); free_tags_.push_back(it->first); it = close_requests.erase(it); } else { ++it; } } if(!has_work) has_work = !close_requests.empty(); if (bootstrapping) bootstrapping = hpx::is_starting(); if(has_work) { t.restart(); k = 0; } else { if(enable_parcel_handling_) { hpx::lcos::local::spinlock::yield(k); ++k; } } }
int main(int argc, char **argv) { int myRank; int pNum; double start_time, end_time; double *matrix; MPI_Status stat; MPI_Request req1[300], req2[300]; MPI_Init(&argc, &argv); start_time = MPI_Wtime(); MPI_Comm_rank(MPI_COMM_WORLD, &myRank); MPI_Comm_size(MPI_COMM_WORLD, &pNum); if (myRank == 0) { double buf[N+5]; while(1) { double diff; int flag = 0; for(int i = 1;i < pNum;i++) { MPI_Recv(&diff, 1, MPI_DOUBLE, i, MPI_ANY_TAG, MPI_COMM_WORLD, &stat); if (diff > ext) flag = 1; } MPI_Bcast(&flag, 1, MPI_INT, 0, MPI_COMM_WORLD); if (flag == 0) break; } } else { // init calculate model int local_size = N / (pNum - 1) + 2; if (myRank == pNum - 1) local_size = N - (local_size - 2) * (pNum - 2) + 2; //printf("local size: %d\n", local_size); double temp[local_size][N + 2], temp2[local_size][N + 2]; for(int i = 1;i < local_size - 1;i++) { for(int j = 1;j < N + 1;j++) temp[i][j] = (int)(random())% 1000; temp[i][0] = temp[i][N + 1] = 0; } for(int j = 0;j < N + 2;j++) temp[0][j] = temp[local_size - 1][j] = 0; double maxDiff = ext + 1; while(1) { maxDiff = ext; // pass value int sendNum = 0, recNum = 0; if (myRank != 1) MPI_Isend(temp[1], N + 2, MPI_DOUBLE, myRank - 1, 0, MPI_COMM_WORLD, &req1[sendNum++]); if (myRank != pNum - 1) MPI_Isend(temp[local_size - 2], N + 2, MPI_DOUBLE, myRank + 1, 0, MPI_COMM_WORLD, &req1[sendNum++]); double preBuf[N], nextBuf[N]; if (myRank != 1) { MPI_Irecv(temp[0], N + 2, MPI_DOUBLE, myRank - 1, MPI_ANY_TAG, MPI_COMM_WORLD, &req2[recNum++]); //memcpy(temp[0], preBuf, N + 2); } if (myRank != pNum - 1) { MPI_Irecv(temp[local_size - 1], N + 2, MPI_DOUBLE, myRank + 1, MPI_ANY_TAG, MPI_COMM_WORLD, &req2[recNum++]); //memcpy(temp[local_size - 1], nextBuf, N + 2); } //calculate for(int i = 1;i < local_size - 1;i++) for(int j = 1;j <= N;j++) { temp2[i][j] = (temp[i - 1][j] + temp[i + 1][j] + temp[i][j - 1] + temp[i][j + 1] + temp[i][j]) / 5; if (fabs(temp2[i][j] - temp[i][j]) > maxDiff) maxDiff = fabs(temp2[i][j] - temp[i][j]); } for(int i = 0;i < recNum;i++) MPI_Wait(&req2[i], &stat); // printf("id:%d diff %lf localSize %d\n", myRank, maxDiff, local_size); MPI_Send(&maxDiff, 1, MPI_DOUBLE, 0, 0, MPI_COMM_WORLD); int flag; MPI_Bcast(&flag, 1, MPI_INT, 0, MPI_COMM_WORLD); for(int i = 1;i < local_size - 1;i++) for(int j = 1;j <= N;j++) temp[i][j] = temp2[i][j]; if (flag == 0) break; // printf("rank:%d ok diff %lf\n", myRank, maxDiff); } //for(int j = 1;j < local_size - 1;j++) // MPI_Send(&temp[i][1], N, MPI_DOUBLE, 0, myRank, MPI_COMM_WORLD, &stat); } end_time = MPI_Wtime(); printf("rank: %d, runtime is %fs\n", myRank, end_time - start_time); MPI_Finalize(); return 0; }
void QCDDopr_Mult(QCDSpinor* pV,QCDMatrix* pU,QCDSpinor* pW,double k) { MPI_Request reqSend[8]; MPI_Request reqRecv[8]; MPI_Status st; QCDMatrix* pUx; QCDMatrix* pUy; QCDMatrix* pUz; QCDMatrix* pUt; int i; qcdtKappa[0] = k; qcdtKappa[1] = k; qcdtKappa[2] = k; qcdtKappa[3] = k; pUx = pU; pUy = pU + qcdNsite; pUz = pU + qcdNsite*2; pUt = pU + qcdNsite*3; /* #pragma omp parallel num_threads(8) */ #pragma omp parallel { int tid = 0,nid = 1; tid = omp_get_thread_num(); nid = omp_get_num_threads(); /* //debug */ /* printf("nthreads: %d\n", nid); */ /* printf("max_threads: %d\n", omp_get_max_threads()); */ if(tid == 0){ MPI_Irecv(qcdRecvBuf[QCD_TP],12*qcdNxyz,MPI_DOUBLE_PRECISION,qcdRankNeighbors[QCD_TP],QCD_TP,MPI_COMM_WORLD,&reqRecv[QCD_TP]); MPI_Irecv(qcdRecvBuf[QCD_TM],12*qcdNxyz,MPI_DOUBLE_PRECISION,qcdRankNeighbors[QCD_TM],QCD_TM,MPI_COMM_WORLD,&reqRecv[QCD_TM]); MPI_Irecv(qcdRecvBuf[QCD_XP],12*qcdNy*qcdNz*qcdNt,MPI_DOUBLE_PRECISION,qcdRankNeighbors[QCD_XP],QCD_XP,MPI_COMM_WORLD,&reqRecv[QCD_XP]); MPI_Irecv(qcdRecvBuf[QCD_XM],12*qcdNy*qcdNz*qcdNt,MPI_DOUBLE_PRECISION,qcdRankNeighbors[QCD_XM],QCD_XM,MPI_COMM_WORLD,&reqRecv[QCD_XM]); MPI_Irecv(qcdRecvBuf[QCD_YP],12*qcdNx*qcdNz*qcdNt,MPI_DOUBLE_PRECISION,qcdRankNeighbors[QCD_YP],QCD_YP,MPI_COMM_WORLD,&reqRecv[QCD_YP]); MPI_Irecv(qcdRecvBuf[QCD_YM],12*qcdNx*qcdNz*qcdNt,MPI_DOUBLE_PRECISION,qcdRankNeighbors[QCD_YM],QCD_YM,MPI_COMM_WORLD,&reqRecv[QCD_YM]); MPI_Irecv(qcdRecvBuf[QCD_ZP],12*qcdNx*qcdNy*qcdNt,MPI_DOUBLE_PRECISION,qcdRankNeighbors[QCD_ZP],QCD_ZP,MPI_COMM_WORLD,&reqRecv[QCD_ZP]); MPI_Irecv(qcdRecvBuf[QCD_ZM],12*qcdNx*qcdNy*qcdNt,MPI_DOUBLE_PRECISION,qcdRankNeighbors[QCD_ZM],QCD_ZM,MPI_COMM_WORLD,&reqRecv[QCD_ZM]); } //Send T QCDDopr_MakeTPB_dirac(qcdSendBuf[QCD_TP],pW,tid,nid); #pragma omp barrier if(tid == 0){ MPI_Isend(qcdSendBuf[QCD_TP],12*qcdNxyz,MPI_DOUBLE_PRECISION,qcdRankNeighbors[QCD_TM],QCD_TP,MPI_COMM_WORLD,&reqSend[QCD_TP]); } QCDDopr_MakeTMB_dirac(qcdSendBuf[QCD_TM],pUt + qcdNsite-qcdNxyz,pW + qcdNsite-qcdNxyz,tid,nid); #pragma omp barrier if(tid == 0){ MPI_Isend(qcdSendBuf[QCD_TM],12*qcdNxyz,MPI_DOUBLE_PRECISION,qcdRankNeighbors[QCD_TP],QCD_TM,MPI_COMM_WORLD,&reqSend[QCD_TM]); } //Send X QCDDopr_MakeXPB(qcdSendBuf[QCD_XP],pW,tid,nid); #pragma omp barrier if(tid == 0){ MPI_Isend(qcdSendBuf[QCD_XP],12*qcdNy*qcdNz*qcdNt,MPI_DOUBLE_PRECISION,qcdRankNeighbors[QCD_XM],QCD_XP,MPI_COMM_WORLD,&reqSend[QCD_XP]); } QCDDopr_MakeXMB(qcdSendBuf[QCD_XM],pUx + qcdNx-1,pW + qcdNx-1,tid,nid); #pragma omp barrier if(tid == 0){ MPI_Isend(qcdSendBuf[QCD_XM],12*qcdNy*qcdNz*qcdNt,MPI_DOUBLE_PRECISION,qcdRankNeighbors[QCD_XP],QCD_XM,MPI_COMM_WORLD,&reqSend[QCD_XM]); } //Send Y QCDDopr_MakeYPB(qcdSendBuf[QCD_YP],pW,tid,nid); #pragma omp barrier if(tid == 0){ MPI_Isend(qcdSendBuf[QCD_YP],12*qcdNx*qcdNz*qcdNt,MPI_DOUBLE_PRECISION,qcdRankNeighbors[QCD_YM],QCD_YP,MPI_COMM_WORLD,&reqSend[QCD_YP]); } QCDDopr_MakeYMB(qcdSendBuf[QCD_YM],pUy + qcdNxy-qcdNx,pW + qcdNxy-qcdNx,tid,nid); #pragma omp barrier if(tid == 0){ MPI_Isend(qcdSendBuf[QCD_YM],12*qcdNx*qcdNz*qcdNt,MPI_DOUBLE_PRECISION,qcdRankNeighbors[QCD_YP],QCD_YM,MPI_COMM_WORLD,&reqSend[QCD_YM]); } //Send Z QCDDopr_MakeZPB(qcdSendBuf[QCD_ZP],pW,tid,nid); #pragma omp barrier if(tid == 0){ MPI_Isend(qcdSendBuf[QCD_ZP],12*qcdNx*qcdNy*qcdNt,MPI_DOUBLE_PRECISION,qcdRankNeighbors[QCD_ZM],QCD_ZP,MPI_COMM_WORLD,&reqSend[QCD_ZP]); } QCDDopr_MakeZMB(qcdSendBuf[QCD_ZM],pUz + qcdNxyz-qcdNxy,pW + qcdNxyz-qcdNxy,tid,nid); #pragma omp barrier if(tid == 0){ MPI_Isend(qcdSendBuf[QCD_ZM],12*qcdNx*qcdNy*qcdNt,MPI_DOUBLE_PRECISION,qcdRankNeighbors[QCD_ZP],QCD_ZM,MPI_COMM_WORLD,&reqSend[QCD_ZM]); } QCDLA_Equate(pV + tid*qcdNsite/nid,pW + tid*qcdNsite/nid, (tid+1)*qcdNsite/nid - tid*qcdNsite/nid); #pragma omp barrier QCDDopr_TPin_dirac(pV,pUt,pW + qcdNxyz,tid,nid); #pragma omp barrier QCDDopr_TMin_dirac(pV,pUt-qcdNxyz,pW - qcdNxyz,tid,nid); #pragma omp barrier QCDDopr_XPin(pV,pUx,pW+1,tid,nid); #pragma omp barrier QCDDopr_XMin(pV,pUx-1,pW-1,tid,nid); #pragma omp barrier QCDDopr_YPin(pV,pUy,pW + qcdNx,tid,nid); #pragma omp barrier QCDDopr_YMin(pV,pUy-qcdNx,pW - qcdNx,tid,nid); #pragma omp barrier QCDDopr_ZPin(pV,pUz,pW + qcdNxy,tid,nid); #pragma omp barrier QCDDopr_ZMin(pV,pUz-qcdNxy,pW - qcdNxy,tid,nid); if(tid == 0){ MPI_Wait(&reqRecv[QCD_TP],&st); } #pragma omp barrier QCDDopr_SetTPBnd_dirac(pV,pUt,qcdRecvBuf[QCD_TP],tid,nid); if(tid == 0){ MPI_Wait(&reqRecv[QCD_TM],&st); } #pragma omp barrier QCDDopr_SetTMBnd_dirac(pV,qcdRecvBuf[QCD_TM],tid,nid); if(tid == 0){ MPI_Wait(&reqRecv[QCD_XP],&st); } #pragma omp barrier QCDDopr_SetXPBnd(pV,pUx,qcdRecvBuf[QCD_XP],tid,nid); if(tid == 0){ MPI_Wait(&reqRecv[QCD_XM],&st); } #pragma omp barrier QCDDopr_SetXMBnd(pV,qcdRecvBuf[QCD_XM],tid,nid); if(tid == 0){ MPI_Wait(&reqRecv[QCD_YP],&st); } #pragma omp barrier QCDDopr_SetYPBnd(pV,pUy,qcdRecvBuf[QCD_YP],tid,nid); if(tid == 0){ MPI_Wait(&reqRecv[QCD_YM],&st); } #pragma omp barrier QCDDopr_SetYMBnd(pV,qcdRecvBuf[QCD_YM],tid,nid); if(tid == 0){ MPI_Wait(&reqRecv[QCD_ZP],&st); } #pragma omp barrier QCDDopr_SetZPBnd(pV,pUz,qcdRecvBuf[QCD_ZP],tid,nid); if(tid == 0){ MPI_Wait(&reqRecv[QCD_ZM],&st); } #pragma omp barrier QCDDopr_SetZMBnd(pV,qcdRecvBuf[QCD_ZM],tid,nid); if(tid == 0){ MPI_Wait(&reqSend[QCD_TP],&st); MPI_Wait(&reqSend[QCD_TM],&st); MPI_Wait(&reqSend[QCD_XP],&st); MPI_Wait(&reqSend[QCD_XM],&st); MPI_Wait(&reqSend[QCD_YP],&st); MPI_Wait(&reqSend[QCD_YM],&st); MPI_Wait(&reqSend[QCD_ZP],&st); MPI_Wait(&reqSend[QCD_ZM],&st); } #pragma omp barrier } }
double GetResRoot(double *phi, double *b, param_t p) { int x,y; //true residue double residue; double ResRoot = 0.0; double Bmag = 0.0; double ResRoot_global = 0.0; double Bmag_global = 0.0; // A little trick to index phi normally. double* phi_s = phi + p.L; // Prepare for async send/recv MPI_Request request[4]; int requests; MPI_Status status[4]; requests = 0; // Send the higher-memory component to the next rank. MPI_Isend(phi_s + p.L*(p.y-1), p.L, MPI_DOUBLE, (p.my_rank+1)%p.world_size, 1, MPI_COMM_WORLD, request + requests++); MPI_Irecv(phi_s - p.L, p.L, MPI_DOUBLE, (p.my_rank+p.world_size-1)%p.world_size, 1, MPI_COMM_WORLD, request + requests++); // Send the lower-memory component to the previous rank. MPI_Isend(phi_s, p.L, MPI_DOUBLE, (p.my_rank+p.world_size-1)%p.world_size, 0, MPI_COMM_WORLD, request + requests++); MPI_Irecv(phi_s + p.L*p.y, p.L, MPI_DOUBLE, (p.my_rank+1)%p.world_size, 0, MPI_COMM_WORLD, request + requests++); // Do some other work while we wait! // Update everything that doesn't depend on buffers. for(x = 0; x < p.L; x++) { for(y = 1; y < p.y-1; y++) { residue = p.scale* b[x + y*p.L] - phi_s[x + y*p.L] + p.scale*(phi_s[(x+1)%p.L + y*p.L] + phi_s[(x-1+p.L)%p.L + y*p.L] + phi_s[x + (y+1)*p.L] + phi_s[x + (y-1)*p.L]); ResRoot += residue*residue; Bmag += b[x + y*p.L]*b[x + y*p.L]; } } // Wait, if sync hasn't finished. MPI_Waitall ( requests, request, status ); // Update the rest of the cells. for(x = 0; x < p.L; x++) { y = 0; residue = p.scale* b[x + y*p.L] - phi_s[x + y*p.L] + p.scale*(phi_s[(x+1)%p.L + y*p.L] + phi_s[(x-1+p.L)%p.L + y*p.L] + phi_s[x + (y+1)*p.L] + phi_s[x + (y-1)*p.L]); ResRoot += residue*residue; Bmag += b[x + y*p.L]*b[x + y*p.L]; y = p.y-1; residue = p.scale* b[x + y*p.L] - phi_s[x + y*p.L] + p.scale*(phi_s[(x+1)%p.L + y*p.L] + phi_s[(x-1+p.L)%p.L + y*p.L] + phi_s[x + (y+1)*p.L] + phi_s[x + (y-1)*p.L]); ResRoot += residue*residue; Bmag += b[x + y*p.L]*b[x + y*p.L]; } MPI_Allreduce(&Bmag, &Bmag_global, 1, MPI_DOUBLE, MPI_SUM, MPI_COMM_WORLD); MPI_Allreduce(&ResRoot, &ResRoot_global, 1, MPI_DOUBLE, MPI_SUM, MPI_COMM_WORLD); // Normalized true residue return sqrt(ResRoot_global)/sqrt(Bmag_global); }
int main (int argc, char **argv) { int nprocs = -1; int rank = -1; char processor_name[128]; int namelen = 128; int buf0[buf_size]; int buf1[buf_size]; MPI_Status statuses[2]; MPI_Request reqs[2]; /* init */ MPI_Init (&argc, &argv); MPI_Comm_size (MPI_COMM_WORLD, &nprocs); MPI_Comm_rank (MPI_COMM_WORLD, &rank); MPI_Get_processor_name (processor_name, &namelen); printf ("(%d) is alive on %s\n", rank, processor_name); fflush (stdout); MPI_Barrier (MPI_COMM_WORLD); /* this code is very similar to no-error-waitall-any_src.c */ /* but deadlocks since task 2's send and recv are inverted... */ if (nprocs < 3) { printf ("not enough tasks\n"); } else if (rank == 0) { MPI_Irecv (buf0, buf_size, MPI_INT, 1, 0, MPI_COMM_WORLD, &reqs[0]); MPI_Irecv (buf1, buf_size, MPI_INT, 1, 0, MPI_COMM_WORLD, &reqs[1]); MPI_Waitall (2, reqs, statuses); MPI_Send (buf1, buf_size, MPI_INT, 1, 1, MPI_COMM_WORLD); } else if (rank == 1) { memset (buf0, 0, buf_size); MPI_Isend (buf0, buf_size, MPI_INT, 0, 0, MPI_COMM_WORLD, &reqs[0]); MPI_Isend (buf0, buf_size, MPI_INT, 2, 1, MPI_COMM_WORLD, &reqs[1]); MPI_Waitall (2, reqs, statuses); MPI_Recv (buf1, buf_size, MPI_INT, 0, 1, MPI_COMM_WORLD, statuses); MPI_Send (buf0, buf_size, MPI_INT, 0, 0, MPI_COMM_WORLD); } else if (rank == 2) { MPI_Recv (buf1, buf_size, MPI_INT, 1, 1, MPI_COMM_WORLD, statuses); } MPI_Barrier (MPI_COMM_WORLD); MPI_Finalize (); printf ("(%d) Finished normally\n", rank); }
PetscErrorCode MatGetSubMatrices_MPIDense_Local(Mat C,PetscInt ismax,const IS isrow[],const IS iscol[],MatReuse scall,Mat *submats) { Mat_MPIDense *c = (Mat_MPIDense*)C->data; Mat A = c->A; Mat_SeqDense *a = (Mat_SeqDense*)A->data,*mat; PetscErrorCode ierr; PetscMPIInt rank,size,tag0,tag1,idex,end,i; PetscInt N = C->cmap->N,rstart = C->rmap->rstart,count; const PetscInt **irow,**icol,*irow_i; PetscInt *nrow,*ncol,*w1,*w3,*w4,*rtable,start; PetscInt **sbuf1,m,j,k,l,ct1,**rbuf1,row,proc; PetscInt nrqs,msz,**ptr,*ctr,*pa,*tmp,bsz,nrqr; PetscInt is_no,jmax,**rmap,*rmap_i; PetscInt ctr_j,*sbuf1_j,*rbuf1_i; MPI_Request *s_waits1,*r_waits1,*s_waits2,*r_waits2; MPI_Status *r_status1,*r_status2,*s_status1,*s_status2; MPI_Comm comm; PetscScalar **rbuf2,**sbuf2; PetscBool sorted; PetscFunctionBegin; ierr = PetscObjectGetComm((PetscObject)C,&comm);CHKERRQ(ierr); tag0 = ((PetscObject)C)->tag; size = c->size; rank = c->rank; m = C->rmap->N; /* Get some new tags to keep the communication clean */ ierr = PetscObjectGetNewTag((PetscObject)C,&tag1);CHKERRQ(ierr); /* Check if the col indices are sorted */ for (i=0; i<ismax; i++) { ierr = ISSorted(isrow[i],&sorted);CHKERRQ(ierr); if (!sorted) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE,"ISrow is not sorted"); ierr = ISSorted(iscol[i],&sorted);CHKERRQ(ierr); if (!sorted) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE,"IScol is not sorted"); } ierr = PetscMalloc5(ismax,const PetscInt*,&irow,ismax,const PetscInt*,&icol,ismax,PetscInt,&nrow,ismax,PetscInt,&ncol,m,PetscInt,&rtable);CHKERRQ(ierr); for (i=0; i<ismax; i++) { ierr = ISGetIndices(isrow[i],&irow[i]);CHKERRQ(ierr); ierr = ISGetIndices(iscol[i],&icol[i]);CHKERRQ(ierr); ierr = ISGetLocalSize(isrow[i],&nrow[i]);CHKERRQ(ierr); ierr = ISGetLocalSize(iscol[i],&ncol[i]);CHKERRQ(ierr); } /* Create hash table for the mapping :row -> proc*/ for (i=0,j=0; i<size; i++) { jmax = C->rmap->range[i+1]; for (; j<jmax; j++) rtable[j] = i; } /* evaluate communication - mesg to who,length of mesg, and buffer space required. Based on this, buffers are allocated, and data copied into them*/ ierr = PetscMalloc3(2*size,PetscInt,&w1,size,PetscInt,&w3,size,PetscInt,&w4);CHKERRQ(ierr); ierr = PetscMemzero(w1,size*2*sizeof(PetscInt));CHKERRQ(ierr); /* initialize work vector*/ ierr = PetscMemzero(w3,size*sizeof(PetscInt));CHKERRQ(ierr); /* initialize work vector*/ for (i=0; i<ismax; i++) { ierr = PetscMemzero(w4,size*sizeof(PetscInt));CHKERRQ(ierr); /* initialize work vector*/ jmax = nrow[i]; irow_i = irow[i]; for (j=0; j<jmax; j++) { row = irow_i[j]; proc = rtable[row]; w4[proc]++; } for (j=0; j<size; j++) { if (w4[j]) { w1[2*j] += w4[j]; w3[j]++;} } } nrqs = 0; /* no of outgoing messages */ msz = 0; /* total mesg length (for all procs) */ w1[2*rank] = 0; /* no mesg sent to self */ w3[rank] = 0; for (i=0; i<size; i++) { if (w1[2*i]) { w1[2*i+1] = 1; nrqs++;} /* there exists a message to proc i */ } ierr = PetscMalloc((nrqs+1)*sizeof(PetscInt),&pa);CHKERRQ(ierr); /*(proc -array)*/ for (i=0,j=0; i<size; i++) { if (w1[2*i]) { pa[j] = i; j++; } } /* Each message would have a header = 1 + 2*(no of IS) + data */ for (i=0; i<nrqs; i++) { j = pa[i]; w1[2*j] += w1[2*j+1] + 2* w3[j]; msz += w1[2*j]; } /* Do a global reduction to determine how many messages to expect*/ ierr = PetscMaxSum(comm,w1,&bsz,&nrqr);CHKERRQ(ierr); /* Allocate memory for recv buffers . Make sure rbuf1[0] exists by adding 1 to the buffer length */ ierr = PetscMalloc((nrqr+1)*sizeof(PetscInt*),&rbuf1);CHKERRQ(ierr); ierr = PetscMalloc(nrqr*bsz*sizeof(PetscInt),&rbuf1[0]);CHKERRQ(ierr); for (i=1; i<nrqr; ++i) rbuf1[i] = rbuf1[i-1] + bsz; /* Post the receives */ ierr = PetscMalloc((nrqr+1)*sizeof(MPI_Request),&r_waits1);CHKERRQ(ierr); for (i=0; i<nrqr; ++i) { ierr = MPI_Irecv(rbuf1[i],bsz,MPIU_INT,MPI_ANY_SOURCE,tag0,comm,r_waits1+i);CHKERRQ(ierr); } /* Allocate Memory for outgoing messages */ ierr = PetscMalloc4(size,PetscInt*,&sbuf1,size,PetscInt*,&ptr,2*msz,PetscInt,&tmp,size,PetscInt,&ctr);CHKERRQ(ierr); ierr = PetscMemzero(sbuf1,size*sizeof(PetscInt*));CHKERRQ(ierr); ierr = PetscMemzero(ptr,size*sizeof(PetscInt*));CHKERRQ(ierr); { PetscInt *iptr = tmp,ict = 0; for (i=0; i<nrqs; i++) { j = pa[i]; iptr += ict; sbuf1[j] = iptr; ict = w1[2*j]; } } /* Form the outgoing messages */ /* Initialize the header space */ for (i=0; i<nrqs; i++) { j = pa[i]; sbuf1[j][0] = 0; ierr = PetscMemzero(sbuf1[j]+1,2*w3[j]*sizeof(PetscInt));CHKERRQ(ierr); ptr[j] = sbuf1[j] + 2*w3[j] + 1; } /* Parse the isrow and copy data into outbuf */ for (i=0; i<ismax; i++) { ierr = PetscMemzero(ctr,size*sizeof(PetscInt));CHKERRQ(ierr); irow_i = irow[i]; jmax = nrow[i]; for (j=0; j<jmax; j++) { /* parse the indices of each IS */ row = irow_i[j]; proc = rtable[row]; if (proc != rank) { /* copy to the outgoing buf*/ ctr[proc]++; *ptr[proc] = row; ptr[proc]++; } } /* Update the headers for the current IS */ for (j=0; j<size; j++) { /* Can Optimise this loop too */ if ((ctr_j = ctr[j])) { sbuf1_j = sbuf1[j]; k = ++sbuf1_j[0]; sbuf1_j[2*k] = ctr_j; sbuf1_j[2*k-1] = i; } } } /* Now post the sends */ ierr = PetscMalloc((nrqs+1)*sizeof(MPI_Request),&s_waits1);CHKERRQ(ierr); for (i=0; i<nrqs; ++i) { j = pa[i]; ierr = MPI_Isend(sbuf1[j],w1[2*j],MPIU_INT,j,tag0,comm,s_waits1+i);CHKERRQ(ierr); } /* Post recieves to capture the row_data from other procs */ ierr = PetscMalloc((nrqs+1)*sizeof(MPI_Request),&r_waits2);CHKERRQ(ierr); ierr = PetscMalloc((nrqs+1)*sizeof(PetscScalar*),&rbuf2);CHKERRQ(ierr); for (i=0; i<nrqs; i++) { j = pa[i]; count = (w1[2*j] - (2*sbuf1[j][0] + 1))*N; ierr = PetscMalloc((count+1)*sizeof(PetscScalar),&rbuf2[i]);CHKERRQ(ierr); ierr = MPI_Irecv(rbuf2[i],count,MPIU_SCALAR,j,tag1,comm,r_waits2+i);CHKERRQ(ierr); } /* Receive messages(row_nos) and then, pack and send off the rowvalues to the correct processors */ ierr = PetscMalloc((nrqr+1)*sizeof(MPI_Request),&s_waits2);CHKERRQ(ierr); ierr = PetscMalloc((nrqr+1)*sizeof(MPI_Status),&r_status1);CHKERRQ(ierr); ierr = PetscMalloc((nrqr+1)*sizeof(PetscScalar*),&sbuf2);CHKERRQ(ierr); { PetscScalar *sbuf2_i,*v_start; PetscInt s_proc; for (i=0; i<nrqr; ++i) { ierr = MPI_Waitany(nrqr,r_waits1,&idex,r_status1+i);CHKERRQ(ierr); s_proc = r_status1[i].MPI_SOURCE; /* send processor */ rbuf1_i = rbuf1[idex]; /* Actual message from s_proc */ /* no of rows = end - start; since start is array idex[], 0idex, whel end is length of the buffer - which is 1idex */ start = 2*rbuf1_i[0] + 1; ierr = MPI_Get_count(r_status1+i,MPIU_INT,&end);CHKERRQ(ierr); /* allocate memory sufficinet to hold all the row values */ ierr = PetscMalloc((end-start)*N*sizeof(PetscScalar),&sbuf2[idex]);CHKERRQ(ierr); sbuf2_i = sbuf2[idex]; /* Now pack the data */ for (j=start; j<end; j++) { row = rbuf1_i[j] - rstart; v_start = a->v + row; for (k=0; k<N; k++) { sbuf2_i[0] = v_start[0]; sbuf2_i++; v_start += C->rmap->n; } } /* Now send off the data */ ierr = MPI_Isend(sbuf2[idex],(end-start)*N,MPIU_SCALAR,s_proc,tag1,comm,s_waits2+i);CHKERRQ(ierr); } } /* End Send-Recv of IS + row_numbers */ ierr = PetscFree(r_status1);CHKERRQ(ierr); ierr = PetscFree(r_waits1);CHKERRQ(ierr); ierr = PetscMalloc((nrqs+1)*sizeof(MPI_Status),&s_status1);CHKERRQ(ierr); if (nrqs) {ierr = MPI_Waitall(nrqs,s_waits1,s_status1);CHKERRQ(ierr);} ierr = PetscFree(s_status1);CHKERRQ(ierr); ierr = PetscFree(s_waits1);CHKERRQ(ierr); /* Create the submatrices */ if (scall == MAT_REUSE_MATRIX) { for (i=0; i<ismax; i++) { mat = (Mat_SeqDense*)(submats[i]->data); if ((submats[i]->rmap->n != nrow[i]) || (submats[i]->cmap->n != ncol[i])) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"Cannot reuse matrix. wrong size"); ierr = PetscMemzero(mat->v,submats[i]->rmap->n*submats[i]->cmap->n*sizeof(PetscScalar));CHKERRQ(ierr); submats[i]->factortype = C->factortype; } } else { for (i=0; i<ismax; i++) { ierr = MatCreate(PETSC_COMM_SELF,submats+i);CHKERRQ(ierr); ierr = MatSetSizes(submats[i],nrow[i],ncol[i],nrow[i],ncol[i]);CHKERRQ(ierr); ierr = MatSetType(submats[i],((PetscObject)A)->type_name);CHKERRQ(ierr); ierr = MatSeqDenseSetPreallocation(submats[i],NULL);CHKERRQ(ierr); } } /* Assemble the matrices */ { PetscInt col; PetscScalar *imat_v,*mat_v,*imat_vi,*mat_vi; for (i=0; i<ismax; i++) { mat = (Mat_SeqDense*)submats[i]->data; mat_v = a->v; imat_v = mat->v; irow_i = irow[i]; m = nrow[i]; for (j=0; j<m; j++) { row = irow_i[j]; proc = rtable[row]; if (proc == rank) { row = row - rstart; mat_vi = mat_v + row; imat_vi = imat_v + j; for (k=0; k<ncol[i]; k++) { col = icol[i][k]; imat_vi[k*m] = mat_vi[col*C->rmap->n]; } } } } } /* Create row map-> This maps c->row to submat->row for each submat*/ /* this is a very expensive operation wrt memory usage */ ierr = PetscMalloc(ismax*sizeof(PetscInt*),&rmap);CHKERRQ(ierr); ierr = PetscMalloc(ismax*C->rmap->N*sizeof(PetscInt),&rmap[0]);CHKERRQ(ierr); ierr = PetscMemzero(rmap[0],ismax*C->rmap->N*sizeof(PetscInt));CHKERRQ(ierr); for (i=1; i<ismax; i++) rmap[i] = rmap[i-1] + C->rmap->N; for (i=0; i<ismax; i++) { rmap_i = rmap[i]; irow_i = irow[i]; jmax = nrow[i]; for (j=0; j<jmax; j++) { rmap_i[irow_i[j]] = j; } } /* Now Receive the row_values and assemble the rest of the matrix */ ierr = PetscMalloc((nrqs+1)*sizeof(MPI_Status),&r_status2);CHKERRQ(ierr); { PetscInt is_max,tmp1,col,*sbuf1_i,is_sz; PetscScalar *rbuf2_i,*imat_v,*imat_vi; for (tmp1=0; tmp1<nrqs; tmp1++) { /* For each message */ ierr = MPI_Waitany(nrqs,r_waits2,&i,r_status2+tmp1);CHKERRQ(ierr); /* Now dig out the corresponding sbuf1, which contains the IS data_structure */ sbuf1_i = sbuf1[pa[i]]; is_max = sbuf1_i[0]; ct1 = 2*is_max+1; rbuf2_i = rbuf2[i]; for (j=1; j<=is_max; j++) { /* For each IS belonging to the message */ is_no = sbuf1_i[2*j-1]; is_sz = sbuf1_i[2*j]; mat = (Mat_SeqDense*)submats[is_no]->data; imat_v = mat->v; rmap_i = rmap[is_no]; m = nrow[is_no]; for (k=0; k<is_sz; k++,rbuf2_i+=N) { /* For each row */ row = sbuf1_i[ct1]; ct1++; row = rmap_i[row]; imat_vi = imat_v + row; for (l=0; l<ncol[is_no]; l++) { /* For each col */ col = icol[is_no][l]; imat_vi[l*m] = rbuf2_i[col]; } } } } } /* End Send-Recv of row_values */ ierr = PetscFree(r_status2);CHKERRQ(ierr); ierr = PetscFree(r_waits2);CHKERRQ(ierr); ierr = PetscMalloc((nrqr+1)*sizeof(MPI_Status),&s_status2);CHKERRQ(ierr); if (nrqr) {ierr = MPI_Waitall(nrqr,s_waits2,s_status2);CHKERRQ(ierr);} ierr = PetscFree(s_status2);CHKERRQ(ierr); ierr = PetscFree(s_waits2);CHKERRQ(ierr); /* Restore the indices */ for (i=0; i<ismax; i++) { ierr = ISRestoreIndices(isrow[i],irow+i);CHKERRQ(ierr); ierr = ISRestoreIndices(iscol[i],icol+i);CHKERRQ(ierr); } /* Destroy allocated memory */ ierr = PetscFree5(irow,icol,nrow,ncol,rtable);CHKERRQ(ierr); ierr = PetscFree3(w1,w3,w4);CHKERRQ(ierr); ierr = PetscFree(pa);CHKERRQ(ierr); for (i=0; i<nrqs; ++i) { ierr = PetscFree(rbuf2[i]);CHKERRQ(ierr); } ierr = PetscFree(rbuf2);CHKERRQ(ierr); ierr = PetscFree4(sbuf1,ptr,tmp,ctr);CHKERRQ(ierr); ierr = PetscFree(rbuf1[0]);CHKERRQ(ierr); ierr = PetscFree(rbuf1);CHKERRQ(ierr); for (i=0; i<nrqr; ++i) { ierr = PetscFree(sbuf2[i]);CHKERRQ(ierr); } ierr = PetscFree(sbuf2);CHKERRQ(ierr); ierr = PetscFree(rmap[0]);CHKERRQ(ierr); ierr = PetscFree(rmap);CHKERRQ(ierr); for (i=0; i<ismax; i++) { ierr = MatAssemblyBegin(submats[i],MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); ierr = MatAssemblyEnd(submats[i],MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); } PetscFunctionReturn(0); }
int main(int argc, char * argv[]) { int numPointsPerDimension; int verbose = 0; double omega; double epsilon; double * * points; struct timeval startTime; struct timeval endTime; double duration; double breakdown = 0; int numIterations; double maxDiff, tmpMaxDiff; int numProcesses; int workingProcesses; int myRank; MPI_Status status; MPI_Request requestUpSend, requestUpRecv; MPI_Request requestDownSend, requestDownRecv; int partitions; int remainder; int width; int i, k; int buffSize; int startRow; double * upPointsSend, * upPointsRecv; double * downPointsSend, * downPointsRecv; int upperProc, lowerProc; struct timeval startInterval; struct timeval endInterval; if (argc < 2) { fprintf(stderr, "ERROR: Too few arguments!\n"); printUsage(argv[0]); exit(1); } else if (argc > 3) { fprintf(stderr, "ERROR: Too many arguments!\n"); printUsage(argv[0]); exit(1); } else { int argIdx = 1; if (argc == 3) { if (strncmp(argv[argIdx], OPTION_VERBOSE, strlen(OPTION_VERBOSE)) != 0) { fprintf(stderr, "ERROR: Unexpected option '%s'!\n", argv[argIdx]); printUsage(argv[0]); exit(1); } verbose = 1; ++argIdx; } numPointsPerDimension = atoi(argv[argIdx]); if (numPointsPerDimension < 2) { fprintf(stderr, "ERROR: The number of points, '%s', should be " "a numeric value greater than or equal to 2!\n", argv[argIdx]); printUsage(argv[0]); exit(1); } } MPI_Init(&argc, &argv); /* get info about how may processes are running * and what is your rank number */ MPI_Comm_size(MPI_COMM_WORLD, &numProcesses); MPI_Comm_rank(MPI_COMM_WORLD, &myRank); /* calculate nominal size of data per each process */ partitions = numPointsPerDimension / numProcesses; /* calculate number of processes with the additional row of data */ remainder = numPointsPerDimension % numProcesses; /* according to myRank, set the width of the table */ width = (myRank < remainder) ? partitions + 1 : partitions; /* decide how many processes are required to do the calculation */ workingProcesses = (numProcesses > numPointsPerDimension) ? numPointsPerDimension : numProcesses; /* terminate processes that won't be used */ /* start of copied part of code */ MPI_Comm MY_WORLD = MPI_COMM_WORLD; if(workingProcesses < numProcesses) { MPI_Group world_group; MPI_Comm_group(MPI_COMM_WORLD, &world_group); // Remove all unnecessary ranks MPI_Group new_group; int ranges[1][3] = {{workingProcesses, (numProcesses - 1), 1}}; MPI_Group_range_excl(world_group, 1, ranges, &new_group); // Create a new communicator MPI_Comm_create(MPI_COMM_WORLD, new_group, &MY_WORLD); if (MY_WORLD == MPI_COMM_NULL) { // Bye bye cruel world MPI_Finalize(); exit(0); } } /* end of copied part of code */ /* source: http://stackoverflow.com/questions/13774968/mpi-kill-unwanted-processes */ /* set the calculation parameters */ omega = getOmega(numPointsPerDimension); epsilon = getEpsilon(numPointsPerDimension); /* allocate points table for each process */ points = allocatePoints(numPointsPerDimension, width, numProcesses); if (points == NULL) { freePoints(points, width, myRank); fprintf(stderr, "ERROR: Malloc failed!\n"); exit(1); } /* size of the table to send per each iteration */ buffSize = numPointsPerDimension / 2 + numPointsPerDimension % 2 ; /* initialize additional buffers for communication */ upPointsSend = initializeBuffer(buffSize); upPointsRecv = initializeBuffer(buffSize); downPointsSend = initializeBuffer(buffSize); downPointsRecv = initializeBuffer(buffSize); /* process #0 sends to others separate parts of the table * others wait for incoming data */ if (myRank == 0) { startRow = numPointsPerDimension; for(k = workingProcesses - 1; k >= 0 ; --k) { width = (k < remainder) ? partitions + 1 : partitions; /* initialize points */ initializePoints(points, startRow - width, width, numPointsPerDimension); /* send table to k-th process */ if(k != 0) { for(i = 0; i < width; ++i) { MPI_Send(points[i], numPointsPerDimension, MPI_DOUBLE, k, 123, MY_WORLD); } } startRow -= width; } } else { if(myRank < workingProcesses) { for(i = 0; i < width; ++i) { MPI_Recv(points[i], numPointsPerDimension, MPI_DOUBLE, 0, 123, MY_WORLD, &status); } } } /* remember with which processes you comunicate */ upperProc = myRank == 0 ? MPI_PROC_NULL : myRank - 1; lowerProc = myRank == workingProcesses - 1 ? MPI_PROC_NULL : myRank + 1; /* here each process has it's own data set for computations */ if(remainder > 0) { startRow = (myRank < remainder) ? myRank * (partitions + 1) : myRank * partitions + remainder; } else { startRow = myRank * partitions; } if(gettimeofday(&startTime, NULL)) { freePoints(points, width, myRank); fprintf(stderr, "ERROR: Gettimeofday failed!\n"); exit(1); } /* Start of computations. */ numIterations = 0; do { int i, j, color; maxDiff = 0.0; for (color = 0; color < 2; ++color) { /* fill downPointsSend with the last row of points data */ setDataBuffer(downPointsSend, points, width - 1, 1 + ((startRow + width) % 2 == color ? 1 : 0), numPointsPerDimension); if(gettimeofday(&startInterval, NULL)) { freePoints(points, width, myRank); fprintf(stderr, "ERROR: Gettimeofday failed!\n"); exit(1); } MPI_Isend(downPointsSend, buffSize, MPI_DOUBLE, lowerProc, color, MY_WORLD, &requestDownSend); MPI_Irecv(downPointsRecv, buffSize, MPI_DOUBLE, lowerProc, color, MY_WORLD, &requestDownRecv); if(gettimeofday(&endInterval, NULL)) { freePoints(points, width, myRank); fprintf(stderr, "ERROR: Gettimeofday failed!\n"); exit(1); } breakdown += ((double)endInterval.tv_sec + ((double)endInterval.tv_usec / 1000000.0)) - ((double)startInterval.tv_sec + ((double)startInterval.tv_usec / 1000000.0)); /* fill upPointsSend with the last row of points data */ setDataBuffer(upPointsSend, points, 0, 1 + ((startRow - 1) % 2 == color ? 1 : 0), numPointsPerDimension); if(gettimeofday(&startInterval, NULL)) { freePoints(points, width, myRank); fprintf(stderr, "ERROR: Gettimeofday failed!\n"); exit(1); } MPI_Isend(upPointsSend, buffSize, MPI_DOUBLE, upperProc, color, MY_WORLD, &requestUpSend); MPI_Irecv(upPointsRecv, buffSize, MPI_DOUBLE, upperProc, color, MY_WORLD, &requestUpRecv); if(gettimeofday(&endInterval, NULL)) { freePoints(points, width, myRank); fprintf(stderr, "ERROR: Gettimeofday failed!\n"); exit(1); } breakdown += ((double)endInterval.tv_sec + ((double)endInterval.tv_usec / 1000000.0)) - ((double)startInterval.tv_sec + ((double)startInterval.tv_usec / 1000000.0)); /* computations of the first row requires data that has to be recieved from other process */ MPI_Wait(&requestUpRecv, &status); for (i = 0; i < width; ++i) { /* before computing the last row of its data, * process has to be sure that it has required * row from process rank+1 */ if(i == width - 1) { MPI_Wait(&requestDownRecv, &status); } for (j = 1 + ((startRow+i) % 2 == color ? 1 : 0); j < numPointsPerDimension - 1; j += 2) { if( (myRank != 0 || i != 0 ) && (myRank != workingProcesses - 1 || i != width - 1) ) { double tmp, diff; double down, up; int jIdx = (j - 1 - ((startRow + i) % 2 == color ? 1 : 0))/ 2; /* decide if up or down value should be taken from additional buffers */ up = (i == 0) ? upPointsRecv[jIdx] : points[i-1][j]; down = (i == width - 1) ? downPointsRecv[jIdx] : points[i+1][j]; /* calculate final value */ tmp = (up + down + points[i][j - 1] + points[i][j + 1]) / 4.0; diff = points[i][j]; points[i][j] = (1.0 - omega) * points[i][j] + omega * tmp; diff = fabs(diff - points[i][j]); if (diff > maxDiff) { maxDiff = diff; } } } } MPI_Barrier(MY_WORLD); } if(gettimeofday(&startInterval, NULL)) { freePoints(points, width, myRank); fprintf(stderr, "ERROR: Gettimeofday failed!\n"); exit(1); } /* find new maxDiff among all processes */ MPI_Allreduce(&maxDiff, &tmpMaxDiff, 1, MPI_DOUBLE, MPI_MAX, MY_WORLD ); maxDiff = tmpMaxDiff; if(gettimeofday(&endInterval, NULL)) { freePoints(points, width, myRank); fprintf(stderr, "ERROR: Gettimeofday failed!\n"); exit(1); } breakdown += ((double)endInterval.tv_sec + ((double)endInterval.tv_usec / 1000000.0)) - ((double)startInterval.tv_sec + ((double)startInterval.tv_usec / 1000000.0)); ++numIterations; } while (maxDiff > epsilon); /* End of computations. */ if(gettimeofday(&endTime, NULL)) { freePoints(points, width, myRank); fprintf(stderr, "ERROR: Gettimeofday failed!\n"); exit(1); } /* calculate how long did the computation lasted */ duration = ((double)endTime.tv_sec + ((double)endTime.tv_usec / 1000000.0)) - ((double)startTime.tv_sec + ((double)startTime.tv_usec / 1000000.0)); /* we choose the process whose execution lasted for the longest time */ double maxDuration; MPI_Allreduce(&duration, &maxDuration, 1, MPI_DOUBLE, MPI_MAX, MY_WORLD); if(myRank==0) { fprintf(stderr, "Statistics: duration(s)=%.10f breakdown=%.10f #iters=%d diff=%.10f epsilon=%.10f\n", maxDuration, breakdown, numIterations, maxDiff, epsilon); } if (verbose) { MPI_Barrier(MY_WORLD); /* process #0 is responsible for printing results of computation * others send their data straight to it */ if(myRank != 0 && myRank < workingProcesses) { for(k = 0; k < width ; ++k) { MPI_Send(points[k], numPointsPerDimension, MPI_DOUBLE, 0, 123, MY_WORLD); } } else if(myRank == 0) { printPoints(points, width, numPointsPerDimension); for(i = 1; i < workingProcesses; ++i) { width = (i < remainder) ? partitions + 1 : partitions; for (k = 0 ; k < width ; ++k) { MPI_Recv(points[k], numPointsPerDimension, MPI_DOUBLE, i, 123, MY_WORLD, &status); } printPoints(points, width, numPointsPerDimension); } } } /* free all the memory that was allocated */ freePoints(points, width, myRank); free(downPointsSend); free(upPointsSend); free(downPointsRecv); free(upPointsRecv); MPI_Finalize(); return 0; }
int main(int argc, char *argv[]) { int provided, wrank, wsize, nmsg, i, tag; int *(buf[MAX_TARGETS]), bufsize[MAX_TARGETS]; MPI_Request r[MAX_TARGETS]; MPI_Comm commDup, commEven; MPI_Init_thread(&argc, &argv, MPI_THREAD_FUNNELED, &provided); MPI_Comm_rank(MPI_COMM_WORLD, &wrank); MPI_Comm_size(MPI_COMM_WORLD, &wsize); if (wsize < 4) { fprintf(stderr, "This test requires at least 4 processes\n"); MPI_Abort(MPI_COMM_WORLD, 1); } /* Create several communicators */ MPI_Comm_dup(MPI_COMM_WORLD, &commDup); MPI_Comm_set_name(commDup, "User dup of comm world"); MPI_Comm_split(MPI_COMM_WORLD, wrank & 0x1, wrank, &commEven); if (wrank & 0x1) MPI_Comm_free(&commEven); else MPI_Comm_set_name(commEven, "User split to even ranks"); /* Create a collection of pending sends and receives * We use tags on the sends and receives (when ANY_TAG isn't used) * to provide an easy way to check that the proper requests are present. * TAG values use fields, in decimal (for easy reading): * 0-99: send/recv type: * 0 - other * 1 - irecv * 2 - isend * 3 - issend * 4 - ibsend * 5 - irsend * 6 - persistent recv * 7 - persistent send * 8 - persistent ssend * 9 - persistent rsend * 10 - persistent bsend * 100-999: destination (for send) or source, if receive. 999 = any-source * (rank is value/100) * 1000-2G: other values */ /* Create the send/receive buffers */ nmsg = 10; for (i = 0; i < nmsg; i++) { bufsize[i] = i; if (i) { buf[i] = (int *) calloc(bufsize[i], sizeof(int)); if (!buf[i]) { fprintf(stderr, "Unable to allocate %d words\n", bufsize[i]); MPI_Abort(MPI_COMM_WORLD, 2); } } else buf[i] = 0; } /* Partial implementation */ if (wrank == 0) { nmsg = 0; tag = 2 + 1 * 100; MPI_Isend(buf[0], bufsize[0], MPI_INT, 1, tag, MPI_COMM_WORLD, &r[nmsg++]); tag = 3 + 2 * 100; MPI_Issend(buf[1], bufsize[1], MPI_INT, 2, tag, MPI_COMM_WORLD, &r[nmsg++]); tag = 1 + 3 * 100; MPI_Irecv(buf[2], bufsize[2], MPI_INT, 3, tag, MPI_COMM_WORLD, &r[nmsg++]); } else if (wrank == 1) { } else if (wrank == 2) { } else if (wrank == 3) { } /* provide a convenient place to wait */ MPI_Barrier(MPI_COMM_WORLD); printf("Barrier 1 finished\n"); /* Match up (or cancel) the requests */ if (wrank == 0) { MPI_Waitall(nmsg, r, MPI_STATUSES_IGNORE); } else if (wrank == 1) { tag = 2 + 1 * 100; MPI_Recv(buf[0], bufsize[0], MPI_INT, 0, tag, MPI_COMM_WORLD, MPI_STATUS_IGNORE); } else if (wrank == 2) { tag = 3 + 2 * 100; MPI_Recv(buf[1], bufsize[1], MPI_INT, 0, tag, MPI_COMM_WORLD, MPI_STATUS_IGNORE); } else if (wrank == 3) { tag = 1 + 3 * 100; MPI_Send(buf[2], bufsize[2], MPI_INT, 0, tag, MPI_COMM_WORLD); } MPI_Barrier(MPI_COMM_WORLD); printf("Barrier 2 finished\n"); MPI_Comm_free(&commDup); if (commEven != MPI_COMM_NULL) MPI_Comm_free(&commEven); MPI_Finalize(); return 0; }
void pdgsmv_init ( SuperMatrix *A, /* Matrix A permuted by columns (input/output). The type of A can be: Stype = SLU_NR_loc; Dtype = SLU_D; Mtype = SLU_GE. */ int_t *row_to_proc, /* Input. Mapping between rows and processes. */ gridinfo_t *grid, /* Input */ pdgsmv_comm_t *gsmv_comm /* Output. The data structure for communication. */ ) { NRformat_loc *Astore; int iam, p, procs; int *SendCounts, *RecvCounts; int_t i, j, k, l, m, m_loc, n, fst_row, jcol; int_t TotalIndSend, TotalValSend; int_t *colind, *rowptr; int_t *ind_tosend = NULL, *ind_torecv = NULL; int_t *ptr_ind_tosend, *ptr_ind_torecv; int_t *extern_start, *spa, *itemp; double *nzval, *val_tosend = NULL, *val_torecv = NULL, t; MPI_Request *send_req, *recv_req; MPI_Status status; #if ( DEBUGlevel>=1 ) CHECK_MALLOC(grid->iam, "Enter pdgsmv_init()"); #endif /* ------------------------------------------------------------ INITIALIZATION. ------------------------------------------------------------*/ iam = grid->iam; procs = grid->nprow * grid->npcol; Astore = (NRformat_loc *) A->Store; m = A->nrow; n = A->ncol; m_loc = Astore->m_loc; fst_row = Astore->fst_row; colind = Astore->colind; rowptr = Astore->rowptr; nzval = Astore->nzval; if ( !(SendCounts = SUPERLU_MALLOC(2*procs * sizeof(int))) ) ABORT("Malloc fails for SendCounts[]"); /*for (i = 0; i < 2*procs; ++i) SendCounts[i] = 0;*/ RecvCounts = SendCounts + procs; if ( !(ptr_ind_tosend = intMalloc_dist(2*(procs+1))) ) ABORT("Malloc fails for ptr_ind_tosend[]"); ptr_ind_torecv = ptr_ind_tosend + procs + 1; if ( !(extern_start = intMalloc_dist(m_loc)) ) ABORT("Malloc fails for extern_start[]"); for (i = 0; i < m_loc; ++i) extern_start[i] = rowptr[i]; /* ------------------------------------------------------------ COUNT THE NUMBER OF X ENTRIES TO BE SENT TO EACH PROCESS. THIS IS THE UNION OF THE COLUMN INDICES OF MY ROWS. SWAP TO THE BEGINNING THE PART OF A CORRESPONDING TO THE LOCAL PART OF X. THIS ACCOUNTS FOR THE FIRST PASS OF ACCESSING MATRIX A. ------------------------------------------------------------*/ if ( !(spa = intCalloc_dist(n)) ) /* Aid in global to local translation */ ABORT("Malloc fails for spa[]"); for (p = 0; p < procs; ++p) SendCounts[p] = 0; for (i = 0; i < m_loc; ++i) { /* Loop through each row */ k = extern_start[i]; for (j = rowptr[i]; j < rowptr[i+1]; ++j) {/* Each nonzero in row i */ jcol = colind[j]; p = row_to_proc[jcol]; if ( p != iam ) { /* External */ if ( spa[jcol] == 0 ) { /* First time see this index */ ++SendCounts[p]; spa[jcol] = 1; } } else { /* Swap to beginning the part of A corresponding to the local part of X */ l = colind[k]; t = nzval[k]; colind[k] = jcol; nzval[k] = nzval[j]; colind[j] = l; nzval[j] = t; ++k; } } extern_start[i] = k; } /* ------------------------------------------------------------ LOAD THE X-INDICES TO BE SENT TO THE OTHER PROCESSES. THIS ACCOUNTS FOR THE SECOND PASS OF ACCESSING MATRIX A. ------------------------------------------------------------*/ /* Build pointers to ind_tosend[]. */ ptr_ind_tosend[0] = 0; for (p = 0, TotalIndSend = 0; p < procs; ++p) { TotalIndSend += SendCounts[p]; /* Total to send. */ ptr_ind_tosend[p+1] = ptr_ind_tosend[p] + SendCounts[p]; } #if 0 ptr_ind_tosend[iam] = 0; /* Local part of X */ #endif if ( TotalIndSend ) { if ( !(ind_tosend = intMalloc_dist(TotalIndSend)) ) ABORT("Malloc fails for ind_tosend[]"); /* Exclude local part of X */ } /* Build SPA to aid global to local translation. */ for (i = 0; i < n; ++i) spa[i] = EMPTY; for (i = 0; i < m_loc; ++i) { /* Loop through each row of A */ for (j = rowptr[i]; j < rowptr[i+1]; ++j) { jcol = colind[j]; if ( spa[jcol] == EMPTY ) { /* First time see this index */ p = row_to_proc[jcol]; if ( p == iam ) { /* Local */ /*assert(jcol>=fst_row);*/ spa[jcol] = jcol - fst_row; /* Relative position in local X */ } else { /* External */ ind_tosend[ptr_ind_tosend[p]] = jcol; /* Still global */ spa[jcol] = ptr_ind_tosend[p]; /* Position in ind_tosend[] */ ++ptr_ind_tosend[p]; } } } } /* ------------------------------------------------------------ TRANSFORM THE COLUMN INDICES OF MATRIX A INTO LOCAL INDICES. THIS ACCOUNTS FOR THE THIRD PASS OF ACCESSING MATRIX A. ------------------------------------------------------------*/ for (i = 0; i < m_loc; ++i) { for (j = rowptr[i]; j < rowptr[i+1]; ++j) { jcol = colind[j]; colind[j] = spa[jcol]; } } /* ------------------------------------------------------------ COMMUNICATE THE EXTERNAL INDICES OF X. ------------------------------------------------------------*/ MPI_Alltoall(SendCounts, 1, MPI_INT, RecvCounts, 1, MPI_INT, grid->comm); /* Build pointers to ind_torecv[]. */ ptr_ind_torecv[0] = 0; for (p = 0, TotalValSend = 0; p < procs; ++p) { TotalValSend += RecvCounts[p]; /* Total to receive. */ ptr_ind_torecv[p+1] = ptr_ind_torecv[p] + RecvCounts[p]; } if ( TotalValSend ) { if ( !(ind_torecv = intMalloc_dist(TotalValSend)) ) ABORT("Malloc fails for ind_torecv[]"); } if ( !(send_req = (MPI_Request *) SUPERLU_MALLOC(2*procs *sizeof(MPI_Request)))) ABORT("Malloc fails for recv_req[]."); recv_req = send_req + procs; for (p = 0; p < procs; ++p) { ptr_ind_tosend[p] -= SendCounts[p]; /* Reset pointer to beginning */ if ( SendCounts[p] ) { MPI_Isend(&ind_tosend[ptr_ind_tosend[p]], SendCounts[p], mpi_int_t, p, iam, grid->comm, &send_req[p]); } if ( RecvCounts[p] ) { MPI_Irecv(&ind_torecv[ptr_ind_torecv[p]], RecvCounts[p], mpi_int_t, p, p, grid->comm, &recv_req[p]); } } for (p = 0; p < procs; ++p) { if ( SendCounts[p] ) MPI_Wait(&send_req[p], &status); if ( RecvCounts[p] ) MPI_Wait(&recv_req[p], &status); } /* Allocate storage for the X values to to transferred. */ if ( TotalIndSend && !(val_torecv = doubleMalloc_dist(TotalIndSend)) ) ABORT("Malloc fails for val_torecv[]."); if ( TotalValSend && !(val_tosend = doubleMalloc_dist(TotalValSend)) ) ABORT("Malloc fails for val_tosend[]."); gsmv_comm->extern_start = extern_start; gsmv_comm->ind_tosend = ind_tosend; gsmv_comm->ind_torecv = ind_torecv; gsmv_comm->ptr_ind_tosend = ptr_ind_tosend; gsmv_comm->ptr_ind_torecv = ptr_ind_torecv; gsmv_comm->SendCounts = SendCounts; gsmv_comm->RecvCounts = RecvCounts; gsmv_comm->val_tosend = val_tosend; gsmv_comm->val_torecv = val_torecv; gsmv_comm->TotalIndSend = TotalIndSend; gsmv_comm->TotalValSend = TotalValSend; SUPERLU_FREE(spa); SUPERLU_FREE(send_req); #if ( DEBUGlevel>=2 ) PrintInt10("pdgsmv_init::rowptr", m_loc+1, rowptr); PrintInt10("pdgsmv_init::extern_start", m_loc, extern_start); #endif #if ( DEBUGlevel>=1 ) CHECK_MALLOC(iam, "Exit pdgsmv_init()"); #endif } /* PDGSMV_INIT */
int main (int argc, char *argv[]) { int procid, num_procs; MPI_Status status; // derivative_time, integral_time, err_time is the local sum of runtime for each computation // tick is used to mark time double derivative_time = 0, integral_time = 0, err_time = 0, tick; MPI_Init(&argc, &argv); MPI_Comm_rank(MPI_COMM_WORLD, &procid); MPI_Comm_size(MPI_COMM_WORLD, &num_procs); // Calculate grid-points per process if(NGRID % num_procs > 0) { if(procid == 0) printf("NGRID should be divisible by the number of processes!"); MPI_Finalize(); return 1; } int points_per_node = NGRID / num_procs; //loop index int i; //domain array and step size FP_PREC xc[points_per_node], dx; //function array and derivative //the size will be dependent on the //number of processors used //to the program FP_PREC yc[points_per_node], dyc[points_per_node]; //integration values FP_PREC local_intg, intg; //error analysis array FP_PREC derr[points_per_node]; //error analysis values FP_PREC dlocal_sum_err, davg_err, dlocal_std_dev, dstd_dev, intg_err; //calculate dx dx = (FP_PREC)(XF - XI)/(FP_PREC)(NGRID - 1); // get start X for each process (my_XI) int bins_before_me = procid * points_per_node; FP_PREC my_XI = XI + bins_before_me * dx; //construct grid for (i = 0; i < points_per_node; ++i) { xc[i] = my_XI + i * dx; } //define the function for(i = 0; i < points_per_node; ++i) { yc[i] = fn(xc[i]); } //define holders for left and right bound value FP_PREC left_bound_yc, right_bound_yc; if(procid == 0) left_bound_yc = fn(XI-dx); if(procid == num_procs - 1) right_bound_yc = fn(XF+dx); tick = MPI_Wtime(); #if BLOCKING if(procid == 0) printf("Using blocking message! \n"); //Step 1: even nodes send to the right then receive back //Step 2: even nodes receive from the left then send back if(procid % 2 == 0) { if(procid < num_procs - 1) { MPI_Send(&yc[points_per_node-1], 1, MPI_DOUBLE, procid+1, 0, MPI_COMM_WORLD); MPI_Recv(&right_bound_yc, 1, MPI_DOUBLE, procid+1, 0, MPI_COMM_WORLD, &status); } if(procid > 0) { MPI_Recv(&left_bound_yc, 1, MPI_DOUBLE, procid-1, 0, MPI_COMM_WORLD, &status); MPI_Send(&yc[0], 1, MPI_DOUBLE, procid-1, 0, MPI_COMM_WORLD); } } else { MPI_Recv(&left_bound_yc, 1, MPI_DOUBLE, procid-1, 0, MPI_COMM_WORLD, &status); MPI_Send(&yc[0], 1, MPI_DOUBLE, procid-1, 0, MPI_COMM_WORLD); if(procid < num_procs - 1) { MPI_Send(&yc[points_per_node-1], 1, MPI_DOUBLE, procid+1, 0, MPI_COMM_WORLD); MPI_Recv(&right_bound_yc, 1, MPI_DOUBLE, procid+1, 0, MPI_COMM_WORLD, &status); } } #else if(procid == 0) printf("Using non-blocking message! \n"); MPI_Request request[4]; int current_request = 0; if(procid < num_procs - 1) { // receive right bound yc MPI_Irecv(&right_bound_yc, 1, MPI_DOUBLE, procid+1, 0, MPI_COMM_WORLD, &request[current_request]); ++current_request; } if(procid > 0) { // receive left bound yc MPI_Irecv(&left_bound_yc, 1, MPI_DOUBLE, procid-1, 0, MPI_COMM_WORLD, &request[current_request]); ++current_request; } if(procid < num_procs - 1) { // send right bound yc to right node MPI_Isend(&yc[points_per_node-1], 1, MPI_DOUBLE, procid+1, 0, MPI_COMM_WORLD, &request[current_request]); ++current_request; } if(procid > 0) { // send left bound yc to left node MPI_Isend(&yc[0], 1, MPI_DOUBLE, procid-1, 0, MPI_COMM_WORLD, &request[current_request]); ++current_request; } #endif derivative_time += MPI_Wtime() - tick; integral_time += MPI_Wtime() - tick; // Overlap computation and communication BEGIN //compute the derivative using first-order finite differencing tick = MPI_Wtime(); for (i = 1; i < points_per_node-1; ++i) { dyc[i] = (yc[i + 1] - yc[i - 1])/(2.0 * dx); } derivative_time += MPI_Wtime() - tick; //compute the integral using Trapazoidal rule tick = MPI_Wtime(); local_intg = 0.0; for (i = 0; i < points_per_node-1; ++i) { local_intg += 0.5 * (yc[i] + yc[i + 1]) * dx; } integral_time += MPI_Wtime() - tick; // Overlap computation and communication END // WAIT for non-blocking message complete before continue #if !BLOCKING tick = MPI_Wtime(); MPI_Waitall(current_request, request, MPI_STATUSES_IGNORE); derivative_time += MPI_Wtime() - tick; integral_time += MPI_Wtime() - tick; #endif // compute derivative of boundary points, runtime is not counted because it's quite small dyc[0] = (yc[1] - left_bound_yc)/(2.0 * dx); dyc[points_per_node-1] = (right_bound_yc - yc[points_per_node-2])/(2.0 * dx); // compute integral at right boundary point, runtime is not counted because it's quite small if(procid < num_procs-1) local_intg += 0.5 * (yc[points_per_node-1] + right_bound_yc) * dx; tick = MPI_Wtime(); //compute the error, average error of the derivatives for(i = 0; i < points_per_node; ++i) { if(dfn(xc[i]) == 0) { printf("WARNING: derivative at point %d on process %d is zero.\n", i, procid); derr[i] = 0; } else derr[i] = fabs((dyc[i] - dfn(xc[i]))/dfn(xc[i])); } //find the local average error dlocal_sum_err = 0.0; for(i = 0; i < points_per_node; ++i) { dlocal_sum_err += derr[i]; } //calculate and output errors #if SINGLE_CALL_REDUCTION if(procid == 0) printf("Using single call reduction! \n"); //all nodes collect sum err and convert it to the mean value MPI_Allreduce(&dlocal_sum_err, &davg_err, 1, MPI_DOUBLE, MPI_SUM, MPI_COMM_WORLD); davg_err /= (FP_PREC)NGRID; // each process calculates global average #else if(procid == 0) printf("Using manual call reduction! \n"); //all nodes collect sum err and convert it to the mean value if(procid != 0) MPI_Send(&dlocal_sum_err, 1, MPI_DOUBLE, 0, 0, MPI_COMM_WORLD); else if(procid == 0) { davg_err = dlocal_sum_err; for(i = 1; i < num_procs; ++i) { MPI_Recv(&dlocal_sum_err, 1, MPI_DOUBLE, MPI_ANY_SOURCE, 0, MPI_COMM_WORLD, &status); davg_err += dlocal_sum_err; } davg_err /= (FP_PREC)NGRID; } MPI_Bcast(&davg_err, 1, MPI_DOUBLE, 0, MPI_COMM_WORLD); #endif //now all nodes have davg_err, find sum squared differences of local derr dlocal_std_dev = 0.0; for(i = 0; i < points_per_node; ++i) { dlocal_std_dev += pow(derr[i] - davg_err, 2); } err_time += MPI_Wtime() - tick; #if SINGLE_CALL_REDUCTION //reduce local integral & local (sum squared differences of derr) to root tick = MPI_Wtime(); MPI_Reduce(&dlocal_std_dev, &dstd_dev, 1, MPI_DOUBLE, MPI_SUM, 0, MPI_COMM_WORLD); err_time += MPI_Wtime() - tick; tick = MPI_Wtime(); MPI_Reduce(&local_intg, &intg, 1, MPI_DOUBLE, MPI_SUM, 0, MPI_COMM_WORLD); integral_time += MPI_Wtime() - tick; #else //reduce local integral & local (sum squared differences of derr) to root if(procid != 0) { tick = MPI_Wtime(); MPI_Send(&dlocal_std_dev, 1, MPI_DOUBLE, 0, 0, MPI_COMM_WORLD); err_time += MPI_Wtime() - tick; tick = MPI_Wtime(); MPI_Send(&local_intg, 1, MPI_DOUBLE, 0, 1, MPI_COMM_WORLD); integral_time += MPI_Wtime() - tick; } else if(procid == 0) { dstd_dev = dlocal_std_dev; intg = local_intg; tick = MPI_Wtime(); for(i = 1; i < num_procs; ++i) { MPI_Recv(&dlocal_std_dev, 1, MPI_DOUBLE, MPI_ANY_SOURCE, 0, MPI_COMM_WORLD, &status); dstd_dev += dlocal_std_dev; } err_time += MPI_Wtime() - tick; tick = MPI_Wtime(); for(i = 1; i < num_procs; ++i) { MPI_Recv(&local_intg, 1, MPI_DOUBLE, MPI_ANY_SOURCE, 1, MPI_COMM_WORLD, &status); intg+= local_intg; } integral_time += MPI_Wtime() - tick; } #endif // print out the max runtime for each calculation double max_derivative_time, max_integral_time, max_err_time; MPI_Reduce(&derivative_time, &max_derivative_time, 1, MPI_DOUBLE, MPI_MAX, 0, MPI_COMM_WORLD); MPI_Reduce(&integral_time, &max_integral_time, 1, MPI_DOUBLE, MPI_MAX, 0, MPI_COMM_WORLD); MPI_Reduce(&err_time, &max_err_time, 1, MPI_DOUBLE, MPI_MAX, 0, MPI_COMM_WORLD); if(procid == 0) { printf("Max runtime to calculate derivatives is %e\n", max_derivative_time); printf("Max runtime to calculate integral is %e\n", max_integral_time); printf("Max runtime to calculate derivative errors is %e\n", max_err_time); } //gather derivative results & errors for output //this part shouldn't be included in running time measurements FP_PREC *final_dyc = NULL; FP_PREC *final_derr = NULL; if(procid == 0) { final_dyc = (FP_PREC*)malloc(NGRID * sizeof(FP_PREC)); final_derr = (FP_PREC*)malloc(NGRID * sizeof(FP_PREC)); } MPI_Gather(dyc, points_per_node, MPI_DOUBLE, final_dyc, points_per_node, MPI_DOUBLE, 0, MPI_COMM_WORLD); MPI_Gather(derr, points_per_node, MPI_DOUBLE, final_derr, points_per_node, MPI_DOUBLE, 0, MPI_COMM_WORLD); //final output at root node (rank 0) if(procid == 0) { dstd_dev = sqrt(dstd_dev/(FP_PREC)NGRID); if(ifn(XI, XF) == 0) { printf("WARNING: true integral value from XI to XF is equal zero.\n"); intg_err = 0; } else { intg_err = fabs((ifn(XI, XF) - intg)/ifn(XI, XF)); } print_function_data(NGRID, dx, final_dyc); print_error_data(NGRID, davg_err, dstd_dev, intg_err, dx, final_derr); free(final_dyc); free(final_derr); } MPI_Finalize(); return 0; }
// main calculation method for jacobi void calculateJacobi () { double star; double residuum; double maxresiduum_temp; double pih = 0.0; double fpisin = 0.0; int term_iteration = options.term_iteration; if (options.inf_func == FUNC_FPISIN) { pih = PI * h; fpisin = 0.25 * TWO_PI_SQUARE * h * h; } iteration = 0; while (term_iteration > 0) { // swap matrices oldmatrix = Matrix[iteration%2]; newmatrix = Matrix[(iteration+1)%2]; maxresiduum = 0; // send lines MPI_Request r1; MPI_Request r2; MPI_Status s1; MPI_Status s2; if (rank != root) { MPI_Isend(oldmatrix[1], N+1, MPI_DOUBLE, rank-1, 0, MPI_COMM_WORLD, &r1); } if (rank != last) { MPI_Isend(oldmatrix[actuallines-2], N+1, MPI_DOUBLE, rank+1, 0, MPI_COMM_WORLD, &r2); } // alternative mode (exchange of lines and computation of non-halo lines are parallel) // - deal with the "middle" rows if (altermode) { // over all non-halo rows for (int i = 2; i < actuallines-2; i++) { double fpisin_i = 0.0; if (options.inf_func == FUNC_FPISIN) { fpisin_i = fpisin * sin(pih * (double)(i+startline-1)); } // over all columns for (int j = 1; j < N; j++) { star = 0.25 * (oldmatrix[i-1][j] + oldmatrix[i][j-1] + oldmatrix[i][j+1] + oldmatrix[i+1][j]); if (options.inf_func == FUNC_FPISIN) { star += fpisin_i * sin(pih * (double)j); } if (options.termination == TERM_PREC || term_iteration == 1) { residuum = oldmatrix[i][j] - star; residuum = (residuum < 0) ? -residuum : residuum; maxresiduum = (residuum < maxresiduum) ? maxresiduum : residuum; } newmatrix[i][j] = star; } } } // wait for sending to be successfull if (rank != root) { MPI_Recv(oldmatrix[0], N+1, MPI_DOUBLE, rank-1, 0, MPI_COMM_WORLD, MPI_STATUS_IGNORE); } if (rank != last) { MPI_Recv(oldmatrix[actuallines-1], N+1, MPI_DOUBLE, rank+1, 0, MPI_COMM_WORLD, MPI_STATUS_IGNORE); } if (rank != root) { MPI_Wait(&r1, &s1); } if (rank != last) { MPI_Wait(&r2, &s2); } MPI_Barrier(MPI_COMM_WORLD); // alternative mode - deal with the remaining, i.e. "halo" rows if (altermode) { // first line int i = 1; double fpisin_i = 0.0; if (options.inf_func == FUNC_FPISIN) { fpisin_i = fpisin * sin(pih * (double)(i+startline-1)); } // over all columns for (int j = 1; j < N; j++) { star = 0.25 * (oldmatrix[i-1][j] + oldmatrix[i][j-1] + oldmatrix[i][j+1] + oldmatrix[i+1][j]); if (options.inf_func == FUNC_FPISIN) { star += fpisin_i * sin(pih * (double)j); } if (options.termination == TERM_PREC || term_iteration == 1) { residuum = oldmatrix[i][j] - star; residuum = (residuum < 0) ? -residuum : residuum; maxresiduum = (residuum < maxresiduum) ? maxresiduum : residuum; } newmatrix[i][j] = star; } // last line i = (actuallines-2); fpisin_i = 0.0; if (options.inf_func == FUNC_FPISIN) { fpisin_i = fpisin * sin(pih * (double)(i+startline-1)); } // over all columns for (int j = 1; j < N; j++) { star = 0.25 * (oldmatrix[i-1][j] + oldmatrix[i][j-1] + oldmatrix[i][j+1] + oldmatrix[i+1][j]); if (options.inf_func == FUNC_FPISIN) { star += fpisin_i * sin(pih * (double)j); } if (options.termination == TERM_PREC || term_iteration == 1) { residuum = oldmatrix[i][j] - star; residuum = (residuum < 0) ? -residuum : residuum; maxresiduum = (residuum < maxresiduum) ? maxresiduum : residuum; } newmatrix[i][j] = star; } } else { // over all rows for (int i = 1; i < actuallines-1; i++) { double fpisin_i = 0.0; if (options.inf_func == FUNC_FPISIN) { fpisin_i = fpisin * sin(pih * (double)(i+startline-1)); } // over all columns for (int j = 1; j < N; j++) { star = 0.25 * (oldmatrix[i-1][j] + oldmatrix[i][j-1] + oldmatrix[i][j+1] + oldmatrix[i+1][j]); if (options.inf_func == FUNC_FPISIN) { star += fpisin_i * sin(pih * (double)j); } if (options.termination == TERM_PREC || term_iteration == 1) { residuum = oldmatrix[i][j] - star; residuum = (residuum < 0) ? -residuum : residuum; //printf("%15.14f\n", residuum); maxresiduum = (residuum < maxresiduum) ? maxresiduum : residuum; } newmatrix[i][j] = star; } } } /* check for stopping calculation, depending on termination method */ if (options.termination == TERM_PREC) { maxresiduum_temp = maxresiduum; MPI_Allreduce(&maxresiduum_temp, &maxresiduum, 1, MPI_DOUBLE, MPI_MAX, MPI_COMM_WORLD); if (maxresiduum < options.term_precision) { term_iteration = 0; } } else if (options.termination == TERM_ITER) { term_iteration--; } iteration++; } }
void xchange_2fields(spinor * const l, spinor * const k, const int ieo) { MPI_Request requests[32]; MPI_Status status[32]; int reqcount = 0; #if defined PARALLELXYZT int ix=0; #endif #ifdef _KOJAK_INST #pragma pomp inst begin(xchange2fields) #endif # ifdef MPI # if (defined BGL && defined XLC) # ifdef PARALLELXYZT __alignx(16, field_buffer_z); __alignx(16, field_buffer_z2); __alignx(16, field_buffer_z3); __alignx(16, field_buffer_z4); # endif __alignx(16, l); # endif /* send the data to the neighbour on the left */ /* recieve the data from the neighbour on the right */ MPI_Isend((void*)l, 1, field_time_slice_cont, g_nb_t_dn, 81, g_cart_grid, &requests[reqcount]); MPI_Irecv((void*)(l+T*LX*LY*LZ/2), 1, field_time_slice_cont, g_nb_t_up, 81, g_cart_grid, &requests[reqcount+1]); reqcount=reqcount+2; /* send the data to the neighbour on the right */ /* recieve the data from the neighbour on the left */ MPI_Isend((void*)(l+(T-1)*LX*LY*LZ/2), 1, field_time_slice_cont, g_nb_t_up, 82, g_cart_grid, &requests[reqcount]); MPI_Irecv((void*)(l+(T+1)*LX*LY*LZ/2), 1, field_time_slice_cont, g_nb_t_dn, 82, g_cart_grid, &requests[reqcount+1]); reqcount=reqcount+2; /* send the data to the neighbour on the left */ /* recieve the data from the neighbour on the right */ MPI_Isend((void*)k, 1, field_time_slice_cont, g_nb_t_dn, 83, g_cart_grid, &requests[reqcount]); MPI_Irecv((void*)(k+T*LX*LY*LZ/2), 1, field_time_slice_cont, g_nb_t_up, 83, g_cart_grid, &requests[reqcount+1]); reqcount=reqcount+2; /* send the data to the neighbour on the right */ /* recieve the data from the neighbour on the left */ MPI_Isend((void*)(k+(T-1)*LX*LY*LZ/2), 1, field_time_slice_cont, g_nb_t_up, 84, g_cart_grid, &requests[reqcount]); MPI_Irecv((void*)(k+(T+1)*LX*LY*LZ/2), 1, field_time_slice_cont, g_nb_t_dn, 84, g_cart_grid, &requests[reqcount+1]); reqcount=reqcount+2; # if (defined PARALLELXT || defined PARALLELXYT || defined PARALLELXYZT) /* send the data to the neighbour on the left in x direction */ /* recieve the data from the neighbour on the right in x direction */ MPI_Isend((void*)l, 1, field_x_slice_gath, g_nb_x_dn, 91, g_cart_grid, &requests[reqcount]); MPI_Irecv((void*)(l+(T+2)*LX*LY*LZ/2), 1, field_x_slice_cont, g_nb_x_up, 91, g_cart_grid, &requests[reqcount+1]); reqcount=reqcount+2; /* send the data to the neighbour on the right in x direction */ /* recieve the data from the neighbour on the left in x direction */ MPI_Isend((void*)(l+(LX-1)*LY*LZ/2), 1, field_x_slice_gath, g_nb_x_up, 92, g_cart_grid, &requests[reqcount]); MPI_Irecv((void*)(l+((T+2)*LX*LY*LZ + T*LY*LZ)/2), 1, field_x_slice_cont, g_nb_x_dn, 92, g_cart_grid, &requests[reqcount+1]); reqcount=reqcount+2; /* send the data to the neighbour on the left in x direction */ /* recieve the data from the neighbour on the right in x direction */ MPI_Isend((void*)k, 1, field_x_slice_gath, g_nb_x_dn, 93, g_cart_grid, &requests[reqcount]); MPI_Irecv((void*)(k+(T+2)*LX*LY*LZ/2), 1, field_x_slice_cont, g_nb_x_up, 93, g_cart_grid, &requests[reqcount+1]); reqcount=reqcount+2; /* send the data to the neighbour on the right in x direction */ /* recieve the data from the neighbour on the left in x direction */ MPI_Isend((void*)(k+(LX-1)*LY*LZ/2), 1, field_x_slice_gath, g_nb_x_up, 94, g_cart_grid, &requests[reqcount]); MPI_Irecv((void*)(k+((T+2)*LX*LY*LZ + T*LY*LZ)/2), 1, field_x_slice_cont, g_nb_x_dn, 94, g_cart_grid, &requests[reqcount+1]); reqcount=reqcount+2; # endif # if (defined PARALLELXYT || defined PARALLELXYZT) /* send the data to the neighbour on the left in y direction */ /* recieve the data from the neighbour on the right in y direction */ MPI_Isend((void*)l, 1, field_y_slice_gath, g_nb_y_dn, 101, g_cart_grid, &requests[reqcount]); MPI_Irecv((void*)(l+((T+2)*LX*LY*LZ + 2*T*LY*LZ)/2), 1, field_y_slice_cont, g_nb_y_up, 101, g_cart_grid, &requests[reqcount+1]); reqcount=reqcount+2; /* send the data to the neighbour on the right in y direction */ /* recieve the data from the neighbour on the left in y direction */ MPI_Isend((void*)(l+(LY-1)*LZ/2), 1, field_y_slice_gath, g_nb_y_up, 102, g_cart_grid, &requests[reqcount]); MPI_Irecv((void*)(l+((T+2)*LX*LY*LZ + 2*T*LY*LZ + T*LX*LZ)/2), 1, field_y_slice_cont, g_nb_y_dn, 102, g_cart_grid, &requests[reqcount+1]); reqcount=reqcount+2; /* send the data to the neighbour on the left in y direction */ /* recieve the data from the neighbour on the right in y direction */ MPI_Isend((void*)k, 1, field_y_slice_gath, g_nb_y_dn, 103, g_cart_grid, &requests[reqcount]); MPI_Irecv((void*)(k+((T+2)*LX*LY*LZ + 2*T*LY*LZ)/2), 1, field_y_slice_cont, g_nb_y_up, 103, g_cart_grid, &requests[reqcount+1]); reqcount=reqcount+2; /* send the data to the neighbour on the right in y direction */ /* recieve the data from the neighbour on the left in y direction */ MPI_Isend((void*)(k+(LY-1)*LZ/2), 1, field_y_slice_gath, g_nb_y_up, 104, g_cart_grid, &requests[reqcount]); MPI_Irecv((void*)(k+((T+2)*LX*LY*LZ + 2*T*LY*LZ + T*LX*LZ)/2), 1, field_y_slice_cont, g_nb_y_dn, 104, g_cart_grid, &requests[reqcount+1]); reqcount=reqcount+2; # endif # if (defined PARALLELXYZT) /* fill buffer ! */ /* This is now depending on whether the field is */ /* even or odd */ if(ieo == 1) { for(ix = 0; ix < T*LX*LY/2; ix++) { field_buffer_z[ix] = l[ g_field_z_ipt_even[ix] ]; } } else { for(ix = 0; ix < T*LX*LY/2; ix++) { field_buffer_z[ix] = l[ g_field_z_ipt_odd[ix] ]; } } if(ieo == 1) { for(ix = T*LX*LY/2; ix < T*LX*LY; ix++) { field_buffer_z2[ix-T*LX*LY/2] = l[ g_field_z_ipt_even[ix] ]; } } else { for(ix = T*LX*LY/2; ix < T*LX*LY; ix++) { field_buffer_z2[ix-T*LX*LY/2] = l[ g_field_z_ipt_odd[ix] ]; } } /* send the data to the neighbour on the left in z direction */ /* recieve the data from the neighbour on the right in z direction */ MPI_Isend((void*)field_buffer_z, 12*T*LX*LY, MPI_DOUBLE, g_nb_z_dn, 503, g_cart_grid, &requests[reqcount]); MPI_Irecv((void*)(l+(VOLUME/2 + LX*LY*LZ + T*LY*LZ +T*LX*LZ)), 12*T*LX*LY, MPI_DOUBLE, g_nb_z_up, 503, g_cart_grid, &requests[reqcount+1]); reqcount=reqcount+2; /* send the data to the neighbour on the right in y direction */ /* recieve the data from the neighbour on the left in y direction */ MPI_Isend((void*)field_buffer_z2, 12*T*LX*LY, MPI_DOUBLE, g_nb_z_up, 504, g_cart_grid, &requests[reqcount]); MPI_Irecv((void*)(l+(VOLUME + 2*LX*LY*LZ + 2*T*LY*LZ + 2*T*LX*LZ + T*LX*LY)/2), 12*T*LX*LY, MPI_DOUBLE, g_nb_z_dn, 504, g_cart_grid, &requests[reqcount+1]); reqcount=reqcount+2; /* fill buffer ! */ /* This is now depending on whether the field is */ /* even or odd */ if(ieo == 0) { for(ix = 0; ix < T*LX*LY/2; ix++) { field_buffer_z3[ix] = k[ g_field_z_ipt_even[ix] ]; } } else { for(ix = 0; ix < T*LX*LY/2; ix++) { field_buffer_z3[ix] = k[ g_field_z_ipt_odd[ix] ]; } } if(ieo == 0) { for(ix = T*LX*LY/2; ix < T*LX*LY; ix++) { field_buffer_z4[ix-T*LX*LY/2] = k[ g_field_z_ipt_even[ix] ]; } } else { for(ix = T*LX*LY/2; ix < T*LX*LY; ix++) { field_buffer_z4[ix-T*LX*LY/2] = k[ g_field_z_ipt_odd[ix] ]; } } /* send the data to the neighbour on the left in z direction */ /* recieve the data from the neighbour on the right in z direction */ MPI_Isend((void*)field_buffer_z3, 12*T*LX*LY, MPI_DOUBLE, g_nb_z_dn, 505, g_cart_grid, &requests[reqcount]); MPI_Irecv((void*)(k+(VOLUME/2 + LX*LY*LZ + T*LY*LZ +T*LX*LZ)), 12*T*LX*LY, MPI_DOUBLE, g_nb_z_up, 505, g_cart_grid, &requests[reqcount+1]); reqcount=reqcount+2; /* send the data to the neighbour on the right in y direction */ /* recieve the data from the neighbour on the left in y direction */ MPI_Isend((void*)field_buffer_z4, 12*T*LX*LY, MPI_DOUBLE, g_nb_z_up, 506, g_cart_grid, &requests[reqcount]); MPI_Irecv((void*)(k+(VOLUME + 2*LX*LY*LZ + 2*T*LY*LZ + 2*T*LX*LZ + T*LX*LY)/2), 12*T*LX*LY, MPI_DOUBLE, g_nb_z_dn, 506, g_cart_grid, &requests[reqcount+1]); reqcount=reqcount+2; # endif MPI_Waitall(reqcount, requests, status); # endif return; #ifdef _KOJAK_INST #pragma pomp inst end(xchange2fields) #endif }
int main(int argc, char *argv[]) { int errs = 0; int rank, size, dest; MPI_Comm comm; MPI_Status status; MPI_Request req; static int bufsizes[4] = { 1, 100, 10000, 1000000 }; char *buf; #ifdef TEST_IRSEND int veryPicky = 0; /* Set to 1 to test "quality of implementation" in * a tricky part of cancel */ #endif int cs, flag, n; MTest_Init(&argc, &argv); comm = MPI_COMM_WORLD; MPI_Comm_rank(comm, &rank); MPI_Comm_size(comm, &size); dest = size - 1; MTestPrintfMsg(1, "Starting scancel test\n"); for (cs = 0; cs < 4; cs++) { if (rank == 0) { n = bufsizes[cs]; buf = (char *) malloc(n); if (!buf) { fprintf(stderr, "Unable to allocate %d bytes\n", n); MPI_Abort(MPI_COMM_WORLD, 1); } MTestPrintfMsg(1, "(%d) About to create isend and cancel\n", cs); MPI_Isend(buf, n, MPI_CHAR, dest, cs + n + 1, comm, &req); MPI_Cancel(&req); MPI_Wait(&req, &status); MTestPrintfMsg(1, "Completed wait on isend\n"); MPI_Test_cancelled(&status, &flag); if (!flag) { errs++; printf("Failed to cancel an Isend request\n"); fflush(stdout); } else { n = 0; } /* Send the size, zero for successfully cancelled */ MPI_Send(&n, 1, MPI_INT, dest, 123, comm); /* Send the tag so the message can be received */ n = cs + n + 1; MPI_Send(&n, 1, MPI_INT, dest, 123, comm); free(buf); } else if (rank == dest) { int nn, tag; char *btemp; MPI_Recv(&nn, 1, MPI_INT, 0, 123, comm, &status); MPI_Recv(&tag, 1, MPI_INT, 0, 123, comm, &status); if (nn > 0) { /* If the message was not cancelled, receive it here */ btemp = (char *) malloc(nn); if (!btemp) { fprintf(stderr, "Unable to allocate %d bytes\n", nn); MPI_Abort(MPI_COMM_WORLD, 1); } MPI_Recv(btemp, nn, MPI_CHAR, 0, tag, comm, &status); free(btemp); } } MPI_Barrier(comm); #ifdef TEST_IRSEND if (rank == 0) { char *bsendbuf; int bsendbufsize; int bf, bs; n = bufsizes[cs]; buf = (char *) malloc(n); if (!buf) { fprintf(stderr, "Unable to allocate %d bytes\n", n); MPI_Abort(MPI_COMM_WORLD, 1); } bsendbufsize = n + MPI_BSEND_OVERHEAD; bsendbuf = (char *) malloc(bsendbufsize); if (!bsendbuf) { fprintf(stderr, "Unable to allocate %d bytes for bsend\n", n); MPI_Abort(MPI_COMM_WORLD, 1); } MPI_Buffer_attach(bsendbuf, bsendbufsize); MTestPrintfMsg(1, "About to create and cancel ibsend\n"); MPI_Ibsend(buf, n, MPI_CHAR, dest, cs + n + 2, comm, &req); MPI_Cancel(&req); MPI_Wait(&req, &status); MPI_Test_cancelled(&status, &flag); if (!flag) { errs++; printf("Failed to cancel an Ibsend request\n"); fflush(stdout); } else { n = 0; } /* Send the size, zero for successfully cancelled */ MPI_Send(&n, 1, MPI_INT, dest, 123, comm); /* Send the tag so the message can be received */ n = cs + n + 2; MPI_Send(&n, 1, MPI_INT, dest, 123, comm); free(buf); MPI_Buffer_detach(&bf, &bs); free(bsendbuf); } else if (rank == dest) { int nn, tag; char *btemp; MPI_Recv(&nn, 1, MPI_INT, 0, 123, comm, &status); MPI_Recv(&tag, 1, MPI_INT, 0, 123, comm, &status); if (nn > 0) { /* If the message was not cancelled, receive it here */ btemp = (char *) malloc(nn); if (!btemp) { fprintf(stderr, "Unable to allocate %d bytes\n", nn); MPI_Abort(MPI_COMM_WORLD, 1); } MPI_Recv(btemp, nn, MPI_CHAR, 0, tag, comm, &status); free(btemp); } } MPI_Barrier(comm); /* Because this test is erroneous, we do not perform it unless * TEST_IRSEND is defined. */ /* We avoid ready send to self because an implementation * is free to detect the error in delivering a message to * itself without a pending receive; we could also check * for an error return from the MPI_Irsend */ if (rank == 0 && dest != rank) { n = bufsizes[cs]; buf = (char *) malloc(n); if (!buf) { fprintf(stderr, "Unable to allocate %d bytes\n", n); MPI_Abort(MPI_COMM_WORLD, 1); } MTestPrintfMsg(1, "About to create and cancel irsend\n"); MPI_Irsend(buf, n, MPI_CHAR, dest, cs + n + 3, comm, &req); MPI_Cancel(&req); MPI_Wait(&req, &status); MPI_Test_cancelled(&status, &flag); /* This can be pretty ugly. The standard is clear (Section 3.8) * that either a sent message is received or the * sent message is successfully cancelled. Since this message * can never be received, the cancel must complete * successfully. * * However, since there is no matching receive, this * program is erroneous. In this case, we can't really * flag this as an error */ if (!flag && veryPicky) { errs++; printf("Failed to cancel an Irsend request\n"); fflush(stdout); } if (flag) { n = 0; } /* Send the size, zero for successfully cancelled */ MPI_Send(&n, 1, MPI_INT, dest, 123, comm); /* Send the tag so the message can be received */ n = cs + n + 3; MPI_Send(&n, 1, MPI_INT, dest, 123, comm); free(buf); } else if (rank == dest) { int n, tag; char *btemp; MPI_Recv(&n, 1, MPI_INT, 0, 123, comm, &status); MPI_Recv(&tag, 1, MPI_INT, 0, 123, comm, &status); if (n > 0) { /* If the message was not cancelled, receive it here */ btemp = (char *) malloc(n); if (!btemp) { fprintf(stderr, "Unable to allocate %d bytes\n", n); MPI_Abort(MPI_COMM_WORLD, 1); } MPI_Recv(btemp, n, MPI_CHAR, 0, tag, comm, &status); free(btemp); } } MPI_Barrier(comm); #endif if (rank == 0) { n = bufsizes[cs]; buf = (char *) malloc(n); if (!buf) { fprintf(stderr, "Unable to allocate %d bytes\n", n); MPI_Abort(MPI_COMM_WORLD, 1); } MTestPrintfMsg(1, "About to create and cancel issend\n"); MPI_Issend(buf, n, MPI_CHAR, dest, cs + n + 4, comm, &req); MPI_Cancel(&req); MPI_Wait(&req, &status); MPI_Test_cancelled(&status, &flag); if (!flag) { errs++; printf("Failed to cancel an Issend request\n"); fflush(stdout); } else { n = 0; } /* Send the size, zero for successfully cancelled */ MPI_Send(&n, 1, MPI_INT, dest, 123, comm); /* Send the tag so the message can be received */ n = cs + n + 4; MPI_Send(&n, 1, MPI_INT, dest, 123, comm); free(buf); } else if (rank == dest) { int nn, tag; char *btemp; MPI_Recv(&nn, 1, MPI_INT, 0, 123, comm, &status); MPI_Recv(&tag, 1, MPI_INT, 0, 123, comm, &status); if (nn > 0) { /* If the message was not cancelled, receive it here */ btemp = (char *) malloc(nn); if (!btemp) { fprintf(stderr, "Unable to allocate %d bytes\n", nn); MPI_Abort(MPI_COMM_WORLD, 1); } MPI_Recv(btemp, nn, MPI_CHAR, 0, tag, comm, &status); free(btemp); } } MPI_Barrier(comm); } MTest_Finalize(errs); MPI_Finalize(); return 0; }
void xchange_2fields(spinor * const l, spinor * const k, const int ieo) { #ifdef MPI MPI_Request requests[32]; MPI_Status status[32]; #endif int reqcount = 0; #if defined PARALLELXYZT int ix=0; #endif #ifdef _KOJAK_INST #pragma pomp inst begin(xchange2fields) #endif # ifdef MPI # if (defined BGL && defined XLC) __alignx(16, l); # endif # if (defined PARALLELT || defined PARALLELXT || defined PARALLELXYT || defined PARALLELXYZT ) /* send the data to the neighbour on the left */ /* recieve the data from the neighbour on the right */ MPI_Isend((void*)(l+g_1st_t_int_dn), 1, field_time_slice_cont, g_nb_t_dn, 81, g_cart_grid, &requests[reqcount]); MPI_Irecv((void*)(l+g_1st_t_ext_up), 1, field_time_slice_cont, g_nb_t_up, 81, g_cart_grid, &requests[reqcount+1]); reqcount=reqcount+2; /* send the data to the neighbour on the right */ /* recieve the data from the neighbour on the left */ MPI_Isend((void*)(l+g_1st_t_int_up), 1, field_time_slice_cont, g_nb_t_up, 82, g_cart_grid, &requests[reqcount]); MPI_Irecv((void*)(l+g_1st_t_ext_dn), 1, field_time_slice_cont, g_nb_t_dn, 82, g_cart_grid, &requests[reqcount+1]); reqcount=reqcount+2; /* send the data to the neighbour on the left */ /* recieve the data from the neighbour on the right */ MPI_Isend((void*)(k+g_1st_t_int_dn), 1, field_time_slice_cont, g_nb_t_dn, 83, g_cart_grid, &requests[reqcount]); MPI_Irecv((void*)(k+g_1st_t_ext_up), 1, field_time_slice_cont, g_nb_t_up, 83, g_cart_grid, &requests[reqcount+1]); reqcount=reqcount+2; /* send the data to the neighbour on the right */ /* recieve the data from the neighbour on the left */ MPI_Isend((void*)(k+g_1st_t_int_up), 1, field_time_slice_cont, g_nb_t_up, 84, g_cart_grid, &requests[reqcount]); MPI_Irecv((void*)(k+g_1st_t_int_dn), 1, field_time_slice_cont, g_nb_t_dn, 84, g_cart_grid, &requests[reqcount+1]); reqcount=reqcount+2; # endif # if (defined PARALLELXT || defined PARALLELXYT || defined PARALLELXYZT || defined PARALLELX || defined PARALLELXY || defined PARALLELXYZ ) /* send the data to the neighbour on the left in x direction */ /* recieve the data from the neighbour on the right in x direction */ MPI_Isend((void*)(l+g_1st_x_int_dn), 1, field_x_slice_gath, g_nb_x_dn, 91, g_cart_grid, &requests[reqcount]); MPI_Irecv((void*)(l+g_1st_x_ext_up), 1, field_x_slice_cont, g_nb_x_up, 91, g_cart_grid, &requests[reqcount+1]); reqcount=reqcount+2; /* send the data to the neighbour on the right in x direction */ /* recieve the data from the neighbour on the left in x direction */ MPI_Isend((void*)(l+g_1st_x_int_up), 1, field_x_slice_gath, g_nb_x_up, 92, g_cart_grid, &requests[reqcount]); MPI_Irecv((void*)(l+g_1st_x_ext_dn), 1, field_x_slice_cont, g_nb_x_dn, 92, g_cart_grid, &requests[reqcount+1]); reqcount=reqcount+2; /* send the data to the neighbour on the left in x direction */ /* recieve the data from the neighbour on the right in x direction */ MPI_Isend((void*)(k+g_1st_x_int_dn), 1, field_x_slice_gath, g_nb_x_dn, 93, g_cart_grid, &requests[reqcount]); MPI_Irecv((void*)(k+g_1st_x_ext_up), 1, field_x_slice_cont, g_nb_x_up, 93, g_cart_grid, &requests[reqcount+1]); reqcount=reqcount+2; /* send the data to the neighbour on the right in x direction */ /* recieve the data from the neighbour on the left in x direction */ MPI_Isend((void*)(k+g_1st_x_int_up), 1, field_x_slice_gath, g_nb_x_up, 94, g_cart_grid, &requests[reqcount]); MPI_Irecv((void*)(k+g_1st_x_ext_dn), 1, field_x_slice_cont, g_nb_x_dn, 94, g_cart_grid, &requests[reqcount+1]); reqcount=reqcount+2; # endif # if (defined PARALLELXYT || defined PARALLELXYZT || defined PARALLELXY || defined PARALLELXYZ ) /* send the data to the neighbour on the left in y direction */ /* recieve the data from the neighbour on the right in y direction */ MPI_Isend((void*)(l+g_1st_y_int_dn), 1, field_y_slice_gath, g_nb_y_dn, 101, g_cart_grid, &requests[reqcount]); MPI_Irecv((void*)(l+g_1st_y_ext_up), 1, field_y_slice_cont, g_nb_y_up, 101, g_cart_grid, &requests[reqcount+1]); reqcount=reqcount+2; /* send the data to the neighbour on the right in y direction */ /* recieve the data from the neighbour on the left in y direction */ MPI_Isend((void*)(l+g_1st_y_int_up), 1, field_y_slice_gath, g_nb_y_up, 102, g_cart_grid, &requests[reqcount]); MPI_Irecv((void*)(l+g_1st_y_ext_dn), 1, field_y_slice_cont, g_nb_y_dn, 102, g_cart_grid, &requests[reqcount+1]); reqcount=reqcount+2; /* send the data to the neighbour on the left in y direction */ /* recieve the data from the neighbour on the right in y direction */ MPI_Isend((void*)(k+g_1st_y_int_dn), 1, field_y_slice_gath, g_nb_y_dn, 103, g_cart_grid, &requests[reqcount]); MPI_Irecv((void*)(k+g_1st_y_ext_up), 1, field_y_slice_cont, g_nb_y_up, 103, g_cart_grid, &requests[reqcount+1]); reqcount=reqcount+2; /* send the data to the neighbour on the right in y direction */ /* recieve the data from the neighbour on the left in y direction */ MPI_Isend((void*)(k+g_1st_y_int_up), 1, field_y_slice_gath, g_nb_y_up, 104, g_cart_grid, &requests[reqcount]); MPI_Irecv((void*)(k+g_1st_y_ext_dn), 1, field_y_slice_cont, g_nb_y_dn, 104, g_cart_grid, &requests[reqcount+1]); reqcount=reqcount+2; # endif # if (defined PARALLELXYZ || defined PARALLELXYZT) /* send the data to the neighbour on the left in z direction */ /* recieve the data from the neighbour on the right in z direction */ if(ieo == 1) { MPI_Isend((void*)(l+g_1st_z_int_dn),1,field_z_slice_even_dn,g_nb_z_dn,503,g_cart_grid,&requests[reqcount]); MPI_Irecv((void*)(l+g_1st_z_ext_up),1,field_z_slice_cont,g_nb_z_up,503,g_cart_grid,&requests[reqcount+1]); reqcount=reqcount+2; } else { MPI_Isend((void*)(l+g_1st_z_int_dn),1,field_z_slice_odd_dn,g_nb_z_dn,503,g_cart_grid,&requests[reqcount]); MPI_Irecv((void*)(l+g_1st_z_ext_up),1,field_z_slice_cont,g_nb_z_up,503,g_cart_grid,&requests[reqcount+1]); reqcount=reqcount+2; } if(ieo == 1) { MPI_Isend((void*)(k+g_1st_z_int_dn),1,field_z_slice_even_dn,g_nb_z_dn,505,g_cart_grid,&requests[reqcount]); MPI_Irecv((void*)(k+g_1st_z_ext_up),1,field_z_slice_cont,g_nb_z_up,505,g_cart_grid,&requests[reqcount+1]); reqcount=reqcount+2; } else { MPI_Isend((void*)(k+g_1st_z_int_dn),1,field_z_slice_odd_dn,g_nb_z_dn,505,g_cart_grid,&requests[reqcount]); MPI_Irecv((void*)(k+g_1st_z_ext_up),1,field_z_slice_cont,g_nb_z_up,505,g_cart_grid,&requests[reqcount+1]); reqcount=reqcount+2; } /* send the data to the neighbour on the right in z direction */ /* recieve the data from the neighbour on the left in z direction */ if(ieo == 1) { MPI_Isend((void*)(l+g_1st_z_int_up),1,field_z_slice_even_up,g_nb_z_up,504,g_cart_grid,&requests[reqcount]); MPI_Irecv((void*)(l+g_1st_z_ext_dn),1,field_z_slice_cont,g_nb_z_dn,504,g_cart_grid,&requests[reqcount+1]); reqcount=reqcount+2; } else { MPI_Isend((void*)(l+g_1st_z_int_up),1,field_z_slice_odd_up,g_nb_z_up,504,g_cart_grid,&requests[reqcount]); MPI_Irecv((void*)(l+g_1st_z_ext_dn),1,field_z_slice_cont,g_nb_z_dn,504,g_cart_grid,&requests[reqcount+1]); reqcount=reqcount+2; } if(ieo == 1) { MPI_Isend((void*)(k+g_1st_z_int_up),1,field_z_slice_even_up,g_nb_z_up,506,g_cart_grid,&requests[reqcount]); MPI_Irecv((void*)(k+g_1st_z_ext_dn),1,field_z_slice_cont,g_nb_z_dn,506,g_cart_grid,&requests[reqcount+1]); reqcount=reqcount+2; } else { MPI_Isend((void*)(k+g_1st_z_int_up),1,field_z_slice_odd_up,g_nb_z_up,506,g_cart_grid,&requests[reqcount]); MPI_Irecv((void*)(k+g_1st_z_ext_dn),1,field_z_slice_cont,g_nb_z_dn,506,g_cart_grid,&requests[reqcount+1]); reqcount=reqcount+2; } # endif MPI_Waitall(reqcount, requests, status); # endif return; #ifdef _KOJAK_INST #pragma pomp inst end(xchange2fields) #endif }
static void ADIOI_Fill_send_buffer(ADIO_File fd, void *buf, ADIOI_Flatlist_node *flat_buf, char **send_buf, ADIO_Offset *offset_list, ADIO_Offset *len_list, int *send_size, MPI_Request *requests, int *sent_to_proc, int nprocs, int myrank, int contig_access_count, ADIO_Offset min_st_offset, ADIO_Offset fd_size, ADIO_Offset *fd_start, ADIO_Offset *fd_end, int *send_buf_idx, int *curr_to_proc, int *done_to_proc, int iter, MPI_Aint buftype_extent) { /* this function is only called if buftype is not contig */ int i, p, flat_buf_idx; ADIO_Offset flat_buf_sz, size_in_buf, buf_incr, size; int jj, n_buftypes; ADIO_Offset off, len, rem_len, user_buf_idx; /* curr_to_proc[p] = amount of data sent to proc. p that has already been accounted for so far done_to_proc[p] = amount of data already sent to proc. p in previous iterations user_buf_idx = current location in user buffer send_buf_idx[p] = current location in send_buf of proc. p */ for (i=0; i < nprocs; i++) { send_buf_idx[i] = curr_to_proc[i] = 0; done_to_proc[i] = sent_to_proc[i]; } jj = 0; user_buf_idx = flat_buf->indices[0]; flat_buf_idx = 0; n_buftypes = 0; flat_buf_sz = flat_buf->blocklens[0]; /* flat_buf_idx = current index into flattened buftype flat_buf_sz = size of current contiguous component in flattened buf */ for (i=0; i<contig_access_count; i++) { off = offset_list[i]; rem_len = len_list[i]; /*this request may span the file domains of more than one process*/ while (rem_len != 0) { len = rem_len; /* NOTE: len value is modified by ADIOI_Calc_aggregator() to be no * longer than the single region that processor "p" is responsible * for. */ p = ADIOI_Calc_aggregator(fd, off, min_st_offset, &len, fd_size, fd_start, fd_end); if (send_buf_idx[p] < send_size[p]) { if (curr_to_proc[p]+len > done_to_proc[p]) { if (done_to_proc[p] > curr_to_proc[p]) { size = ADIOI_MIN(curr_to_proc[p] + len - done_to_proc[p], send_size[p]-send_buf_idx[p]); buf_incr = done_to_proc[p] - curr_to_proc[p]; ADIOI_BUF_INCR ADIOI_Assert((curr_to_proc[p] + len - done_to_proc[p]) == (unsigned)(curr_to_proc[p] + len - done_to_proc[p])); buf_incr = curr_to_proc[p] + len - done_to_proc[p]; ADIOI_Assert((done_to_proc[p] + size) == (unsigned)(done_to_proc[p] + size)); /* ok to cast: bounded by cb buffer size */ curr_to_proc[p] = done_to_proc[p] + (int)size; ADIOI_BUF_COPY } else { size = ADIOI_MIN(len,send_size[p]-send_buf_idx[p]); buf_incr = len; ADIOI_Assert((curr_to_proc[p] + size) == (unsigned)((ADIO_Offset)curr_to_proc[p] + size)); curr_to_proc[p] += size; ADIOI_BUF_COPY } if (send_buf_idx[p] == send_size[p]) { MPI_Isend(send_buf[p], send_size[p], MPI_BYTE, p, myrank+p+100*iter, fd->comm, requests+jj); jj++; } } else { ADIOI_Assert((curr_to_proc[p] + len) == (unsigned)((ADIO_Offset)curr_to_proc[p] + len)); curr_to_proc[p] += len; buf_incr = len; ADIOI_BUF_INCR } }
void NEKTAR_MEX::MEX_post_send(){ for (int i = 0; i < Npartners; ++i) MPI_Isend(send_buffer[i],message_size[i],MPI_DOUBLE,partner_list[i],my_rank+2999,comm,&request_send[i]); }
int main(int argc, char * argv[]) { double *sbuf, *rbuf; int iter, maxiter, repeats[NCOUNTS]; int count[NCOUNTS]; int nc, nbytes; int taskid, ntasks; int itag = 99; int offset1,offset2; double etime; double latency, bw; sbuf = (double*) malloc(MAXPTS*sizeof(double)); rbuf = (double*) malloc(MAXPTS*sizeof(double)); MPI_Status mpi_status[2]; MPI_Request mpi_request[2]; /*----------------------------------------------*/ /* define an array of counts for 8-unsigned char objects */ /*----------------------------------------------*/ count[0] = 0; count[1] = 1; count[2] = 4; count[3] = 12; count[4] = 40; count[5] = 125; count[6] = 400; count[7] = 1250; count[8] = 4000; count[9] = 12500; count[10] = 40000; count[11] = 125000; repeats[0] = 100; repeats[1] = 100; repeats[2] = 100; repeats[3] = 100; repeats[4] = 100; repeats[5] = 100; repeats[6] = 100; repeats[7] = 100; repeats[8] = 100; repeats[9] = 50; repeats[10] = 25; repeats[11] = 10; /*-----------------------------------------------------------*/ /* set-up the parallel environment: assign ntasks and taskid */ /*-----------------------------------------------------------*/ MPI_Init(&argc, &argv); MPI_Comm_size(MPI_COMM_WORLD, &ntasks); MPI_Comm_rank(MPI_COMM_WORLD, &taskid); if ((ntasks % 2) != 0 && taskid == 0) { fprintf(stdout,"You must specify an even number of MPI tasks.\n"); exit(-1); } offset1=(NODESIZE-2); offset2=offset1+NODESIZE; if ((taskid>=2) && (taskid<NODESIZE)) { for (nc=0; nc<NCOUNTS; nc++) MPI_Barrier(MPI_COMM_WORLD); /* synchronize here */ MPI_Finalize(); return(0); } if ((taskid>=(NODESIZE+2)) && (taskid<(2*NODESIZE))) { for (nc=0; nc<NCOUNTS; nc++) MPI_Barrier(MPI_COMM_WORLD); /* synchronize here */ MPI_Finalize(); return(0); } if ((taskid>=NODESIZE) && (taskid<(NODESIZE+2))) taskid-=offset1; /*----------------------------------------------------*/ /* send/recv to ensure that the routines are loaded */ /*----------------------------------------------------*/ nc = 1; if ((taskid < 2)) MPI_Isend(sbuf, count[nc], MPI_DOUBLE, (taskid+NODESIZE)%(2*NODESIZE), itag, MPI_COMM_WORLD, &(mpi_request[0])); else MPI_Irecv(rbuf, count[nc], MPI_DOUBLE, (taskid+offset2)%(2*NODESIZE), itag, MPI_COMM_WORLD, &(mpi_request[0])); if ((taskid >= 2)) MPI_Isend(sbuf, count[nc], MPI_DOUBLE, (taskid+offset2)%(2*NODESIZE), itag, MPI_COMM_WORLD, &(mpi_request[1])); else MPI_Irecv(rbuf, count[nc], MPI_DOUBLE, (taskid+NODESIZE)%(2*NODESIZE), itag, MPI_COMM_WORLD, &(mpi_request[1])); MPI_Waitall(2,mpi_request,mpi_status); /*--------------------------------------------------------*/ /* send or receive messages, and measure round-trip time. */ /* even tasks send, odd tasks receive, then the reverse. */ /*--------------------------------------------------------*/ for (nc=0; nc<NCOUNTS; nc++) { MPI_Barrier(MPI_COMM_WORLD); /* synchronize here */ TEST_CLOCK_INIT maxiter = repeats[nc]; for (iter=0; iter<maxiter; iter++) { /*--------------------------------------------*/ /* send in one direction i->i+1 */ /*--------------------------------------------*/ if ((taskid <2)) MPI_Isend(sbuf, count[nc], MPI_DOUBLE, (taskid+NODESIZE)%(2*NODESIZE), itag, MPI_COMM_WORLD, &(mpi_request[0])); else MPI_Irecv(rbuf, count[nc], MPI_DOUBLE, (taskid+offset2)%(2*NODESIZE), itag, MPI_COMM_WORLD, &(mpi_request[0])); /*--------------------------------------------*/ /* send in the reverse direction i+1->i */ /*--------------------------------------------*/ if ((taskid >= 2)) MPI_Isend(sbuf, count[nc], MPI_DOUBLE, (taskid+offset2)%(2*NODESIZE), itag, MPI_COMM_WORLD, &(mpi_request[1])); else MPI_Irecv(rbuf, count[nc], MPI_DOUBLE, (taskid+NODESIZE)%(2*NODESIZE), itag, MPI_COMM_WORLD, &(mpi_request[1])); MPI_Waitall(2,mpi_request,mpi_status); } /* end the repeat loop */ TEST_CLOCK_STOP /*-----------------------------------------*/ /* write timing data for each message size */ /*-----------------------------------------*/ nbytes = 8*count[nc]; /* 8 bytes per entry */ etime = 0.5e3*(TEST_CLOCK_GET)/maxiter; if (taskid == 0) { fprintf(stdout,"msglen = %8d bytes, elapsed time = %.4lf msec\n", nbytes, etime); } if (nc == 0) latency = 1.0e3*etime; if (nc == (NCOUNTS-1)) bw = nbytes/(1.0e3*etime); } /* end the loop over message sizes */ /*--------------------------------------------------------*/ /*report apporximate numbers for bandwidth and latency */ /*--------------------------------------------------------*/ if (taskid == 0) { fprintf(stdout,"\nlatency = %.1lf microseconds\n", latency); fprintf(stdout,"bandwidth = %.2lf MBytes/sec\n", bw); fprintf(stdout,"(approximate values for MPI_Isend/MPI_Irecv)\n"); } MPI_Finalize(); free(sbuf); free(rbuf); return(0); }
void NEKTAR_MEX::MEX_init (int *map, int n, int *AdjacentPartitions, int NAdjacentPartitions, MPI_Comm comm_in){ /* n - [INPUT] integer - length of array "map", number of degrees of freedom processed in this partition map - [INPUT] - array of integers - global IDs of degrees of fredom processed in this partition AdjacentPartitions - [INPUT] - array of integers - list of possible adjacent partitions NAdjacentPartitions - [INPUT] integer - number of possible adjacent partitions comm - [INPUT] - communicator */ /* at the beginning we assume that NAdjacentPartitions can be greater or equall to the actuall number of partition to communicate with */ int *partner_map_size; int **partners_map; int *shared_dof; int i,j,k,ii,jj,partner; comm = comm_in; MPI_Comm_rank(comm,&my_rank); #ifdef MEX_REPORT static int FLAG_INIT = 0; #endif MPI_Request *request_recv_tmp, *request_send_tmp; //fprintf(stderr,"my_rank = %d, NAdjacentPartitions = %d\n",my_rank,NAdjacentPartitions); request_recv_tmp = new MPI_Request[NAdjacentPartitions]; request_send_tmp = new MPI_Request[NAdjacentPartitions]; partner_map_size = new int[NAdjacentPartitions]; shared_dof = new int[NAdjacentPartitions]; for (i = 0; i < NAdjacentPartitions; ++i) MPI_Irecv(&partner_map_size[i],1,MPI_INT,AdjacentPartitions[i],AdjacentPartitions[i],comm,&request_recv_tmp[i]); for (i = 0; i < NAdjacentPartitions; ++i) MPI_Isend(&n,1,MPI_INT,AdjacentPartitions[i],my_rank,comm,&request_send_tmp[i]); MPI_Waitall(NAdjacentPartitions,request_recv_tmp,MPI_STATUS_IGNORE); MPI_Waitall(NAdjacentPartitions,request_send_tmp,MPI_STATUS_IGNORE); //allocate memory for incomming messages partners_map = new int*[NAdjacentPartitions]; for (i = 0; i < NAdjacentPartitions; i++) partners_map[i] = new int[partner_map_size[i]]; //get partners map for (i = 0; i < NAdjacentPartitions; ++i) MPI_Irecv(partners_map[i],partner_map_size[i],MPI_INT,AdjacentPartitions[i],AdjacentPartitions[i],comm,&request_recv_tmp[i]); //send local map to partners for (i = 0; i < NAdjacentPartitions; ++i) MPI_Isend(map,n,MPI_INT,AdjacentPartitions[i],my_rank,comm,&request_send_tmp[i]); MPI_Waitall(NAdjacentPartitions,request_recv_tmp,MPI_STATUS_IGNORE); MPI_Waitall(NAdjacentPartitions,request_send_tmp,MPI_STATUS_IGNORE); // compare local map and partners map for (partner = 0; partner < NAdjacentPartitions; ++partner){ shared_dof[partner] = 0; for (i = 0; i < n; ++i){ for (j = 0; j < partner_map_size[partner]; ++j){ if (map[i] == partners_map[partner][j]){ shared_dof[partner]++; break; } } } } /* calculate the number of partitions to communicate with */ for (partner = 0, Npartners = 0; partner < NAdjacentPartitions; ++partner){ if (shared_dof[partner] > 0) Npartners++; } delete[] request_recv_tmp; delete[] request_send_tmp; request_recv = (MPI_Request *) malloc(Npartners*sizeof(MPI_Request)); request_send = (MPI_Request *) malloc(Npartners*sizeof(MPI_Request)); for (i = 0; i < Npartners; ++i){ request_recv[i] = MPI_REQUEST_NULL; request_send[i] = MPI_REQUEST_NULL; } #if (defined (__bg__) || defined (__blrts__) ) posix_memalign((void**)&partner_list,16, Npartners*sizeof(int)); posix_memalign((void**)&message_size,16, Npartners*sizeof(int)); #else partner_list = (int *) malloc(Npartners*sizeof(int)); message_size = (int *) malloc(Npartners*sizeof(int)); #endif for (partner = 0, i = 0; partner < NAdjacentPartitions; ++partner){ if (shared_dof[partner] > 0){ partner_list[i] = AdjacentPartitions[partner]; message_size[i] = shared_dof[partner]; i++; } } /* the "partner_list" now can be sorted with respect to topology and message size */ int *pivot, *message_size_tmp, *partner_map_tmp; #if (defined (__bg__) || defined (__blrts__) ) posix_memalign((void**)&my_coord,16, 4*sizeof(int)); posix_memalign((void**)&partners_coordinates,16, Npartners*4*sizeof(int)); get_rank_coordinates(my_coord); get_partners_coordinates(Npartners,partner_list,my_coord,partners_coordinates,comm); posix_memalign((void**)&pivot,16, Npartners*sizeof(int)); posix_memalign((void**)&message_size_tmp,16, Npartners*sizeof(int)); posix_memalign((void**)&partner_map_tmp,16, Npartners*sizeof(int)); #else pivot = (int *) malloc( Npartners*sizeof(int)); message_size_tmp = (int *) malloc( Npartners*sizeof(int)); partner_map_tmp = (int *) malloc( Npartners*sizeof(int)); #endif /* initialize pivot to default values */ for (i = 0; i < Npartners; ++i) pivot[i] = i; #if (defined (__bg__) || defined (__blrts__) ) reorder_partner_list_2(Npartners,partners_coordinates,my_coord,partner_list,message_size,pivot); free(partners_coordinates); #else sort_ascending(Npartners,partner_list,message_size,pivot); #endif MPI_Barrier(comm); //if (my_rank == 0) // fprintf(stderr,"MEX: sort_ascending - done\n"); for (i = 0; i < Npartners; ++i) message_size_tmp[pivot[i]] = message_size[i]; memcpy(message_size,message_size_tmp,Npartners*sizeof(int)); /* map partner_list to AdjacentPartitions partner_map_tmp will store the index of partner_list[i] in AdjacentPartitions */ for (i = 0; i < Npartners; ++i){ for (j = 0; j < NAdjacentPartitions; ++j){ if (AdjacentPartitions[j] == partner_list[i]){ partner_map_tmp[i] = j; break; } } } free(pivot); free(message_size_tmp); MPI_Barrier(comm); //if (my_rank == 0) // fprintf(stderr,"MEX: partners reordering - done\n"); #if (defined (__bg__) || defined (__blrts__) ) posix_memalign((void**)&message_send_map,16, Npartners*sizeof(int*)); for (i = 0; i < Npartners; ++i){ posix_memalign((void**)&message_send_map[i],16, message_size[i]*sizeof(int)); memset(message_send_map[i],'\0',message_size[i]*sizeof(int)); } posix_memalign((void**)&message_recv_map,16, Npartners*sizeof(int*)); for (i = 0; i < Npartners; ++i){ posix_memalign((void**)&message_recv_map[i],16, message_size[i]*sizeof(int)); memset(message_recv_map[i],'\0',message_size[i]*sizeof(int)); } posix_memalign((void**)&send_buffer,16, Npartners*sizeof(double*)); for (i = 0; i < Npartners; ++i){ posix_memalign((void**)&send_buffer[i],16, message_size[i]*sizeof(double)); memset(send_buffer[i],'\0',message_size[i]*sizeof(double)); } posix_memalign((void**)&recv_buffer,16, Npartners*sizeof(double*)); for (i = 0; i < Npartners; ++i){ posix_memalign((void**)&recv_buffer[i],16, message_size[i]*sizeof(double)); memset(recv_buffer[i],'\0',message_size[i]*sizeof(double)); } #else message_send_map = (int **) malloc(Npartners*sizeof(int*)); for (i = 0; i < Npartners; ++i){ message_send_map[i] = (int *) malloc(message_size[i]*sizeof(int)); memset(message_send_map[i],'\0',message_size[i]*sizeof(int)); } message_recv_map = (int **) malloc(Npartners*sizeof(int*)); for (i = 0; i < Npartners; ++i){ message_recv_map[i] = (int *) malloc(message_size[i]*sizeof(int)); memset(message_recv_map[i],'\0',message_size[i]*sizeof(int)); } send_buffer = (double **) malloc(Npartners*sizeof(double*)); for (i = 0; i < Npartners; ++i){ send_buffer[i] = (double *) malloc(message_size[i]*sizeof(double)); memset(send_buffer[i],'\0',message_size[i]*sizeof(double)); } recv_buffer = (double **) malloc(Npartners*sizeof(double*)); for (i = 0; i < Npartners; ++i){ recv_buffer[i] = (double *) malloc(message_size[i]*sizeof(double)); memset(recv_buffer[i],'\0',message_size[i]*sizeof(double)); } #endif /* to support unsorted list of degrees of freedom two maps are created it is possible to check if the two maps are identical so only one will be kept and pointers message_send_map[k] and message_recv_map[k] will be the same - this will help to save some memory */ MPI_Barrier(comm); //if (my_rank == 0) // fprintf(stderr,"MEX: file = %s, line = %d\n",__FILE__,__LINE__); double map_time_start = MPI_Wtime(); #ifdef TEST_OMP #pragma omp parallel private(partner, i, ii, j, jj) { #pragma omp for schedule(dynamic) for (k = 0; k < Npartners; ++k){ partner = partner_map_tmp[k]; for (i = 0, ii=0; i < n; ++i){ for (j = 0; j < partner_map_size[partner]; ++j){ if (map[i] == partners_map[partner][j]){ message_send_map[k][ii] = i; ii++; break; } } } for (j = 0, jj = 0; j < partner_map_size[partner]; ++j){ for (i = 0; i < n; ++i){ if (map[i] == partners_map[partner][j]){ message_recv_map[k][jj] = i; jj++; break; } } } } } #else for (k = 0; k < Npartners; ++k){ partner = partner_map_tmp[k]; for (i = 0, ii=0; i < n; ++i){ for (j = 0; j < partner_map_size[partner]; ++j){ if (map[i] == partners_map[partner][j]){ message_send_map[k][ii] = i; ii++; break; } } } for (j = 0, jj = 0; j < partner_map_size[partner]; ++j){ for (i = 0; i < n; ++i){ if (map[i] == partners_map[partner][j]){ message_recv_map[k][jj] = i; jj++; break; } } } } #endif MPI_Barrier(comm); //if (my_rank == 0) // fprintf(stderr,"MEX: file = %s, line = %d map_time = %f\n",__FILE__,__LINE__,MPI_Wtime() - map_time_start); free(partner_map_tmp); /* for (partner = 0, k = 0; partner < NAdjacentPartitions; ++partner){ if (shared_dof[partner] == 0) continue; for (i = 0, ii=0; i < n; ++i){ for (j = 0; j < partner_map_size[partner]; ++j){ if (map[i] == partners_map[partner][j]){ message_send_map[k][ii] = i; ii++; break; } } } for (j = 0, jj = 0; j < partner_map_size[partner]; ++j){ for (i = 0; i < n; ++i){ if (map[i] == partners_map[partner][j]){ message_recv_map[k][jj] = i; jj++; break; } } } k++; } */ #ifdef MEX_REPORT //print report to file FILE *pFile; char fname[128]; sprintf(fname,"report_gs_init.%d.%d",FLAG_INIT,my_rank); pFile = fopen(fname,"w"); for (i = 0; i < NAdjacentPartitions; ++i){ if (shared_dof[i] > 0) fprintf(pFile,"%d %d\n",AdjacentPartitions[i],shared_dof[i]); } fprintf(pFile,"-1 -1 \n"); fprintf(pFile,"**\n"); for (partner = 0; partner < Npartners; ++partner){ fprintf(pFile,"will send to partner %d array of size %d\n",partner_list[partner],message_size[partner]); for (i = 0; i < message_size[partner]; ++i) fprintf(pFile,"%d ", message_send_map[partner][i]); fprintf(pFile,"\n"); } fprintf(pFile,"**\n"); for (partner = 0; partner < Npartners; ++partner){ fprintf(pFile,"will recv from partner %d array of size %d\n",partner_list[partner],message_size[partner]); for (i = 0; i < message_size[partner]; ++i) fprintf(pFile,"%d ", message_recv_map[partner][i]); fprintf(pFile,"\n"); } fprintf(pFile,"**\n"); fprintf(pFile,"My d.o.f are:\n"); for (i = 0; i < n; ++i) fprintf(pFile,"%d ",map[i]); fprintf(pFile,"\n"); fclose(pFile); FLAG_INIT++; #endif delete[] shared_dof; delete[] partner_map_size; for (i = 0; i < NAdjacentPartitions; i++) delete partners_map[i]; delete[] partners_map; }
int main(int argc, char **argv) { int rank, size, i; int root = 0; int hits = 0; // index used for 'hits' int total = 1; // index used for 'total' int msg_waiting = 0; double results[2] = {0}; MPI_Init(&argc, &argv); MPI_Comm comm = MPI_COMM_WORLD; MPI_Comm_rank(comm, &rank); MPI_Comm_size(comm, &size); MPI_Status status; MPI_Request request; // is root process if(rank == root) { double area; double total_hits = 0; double total_pokes = 0; while (1) { // check each slave process for results (non-blocking) for (i = 1; i < size; i++) { MPI_Iprobe(i, 0, comm, &msg_waiting, &status); // if slave process is sending results if (msg_waiting) { MPI_Recv(&results, 2, MPI_DOUBLE, i, 0, comm, &status); total_hits += results[hits]; total_pokes += results[total]; } } if (total_pokes >= 15000000000) { area = (total_hits / total_pokes) * 4; printf("Area=%.12lf\n", area); // send terminating message to each slave process for (i = 1; i < size; i++) { MPI_Isend(&area, 1, MPI_DOUBLE, i, 0, comm, &request); } break; } } // is slave process } else { int cpu_count = get_cpu_count(); double shared_results[cpu_count * 2]; double l_hits = 0; double l_total = 0; pthread_t threads[cpu_count]; t_data thread_data[cpu_count]; for (i = 0; i < cpu_count; i++) { thread_data[i].id = i; thread_data[i].rank = rank; thread_data[i].results = shared_results; pthread_create(&threads[i], NULL, &throw_darts, &thread_data[i]); } // periodically reads results from shared memory; sends to root process while(1) { sleep(3); // first checks for termination flag from root process MPI_Iprobe(root, 0, comm, &msg_waiting, &status); if (msg_waiting) { // terminate threads for (i = 0; i < cpu_count; i++) { pthread_cancel(threads[i]); } break; } else { results[hits] = 0; results[total] = 0; for (i = 0; i < cpu_count; i++) { results[hits] += shared_results[i * 2]; results[total] += shared_results[i * 2 + 1]; } results[hits] -= l_hits; results[total] -= l_total; l_hits += results[hits]; l_total += results[total]; // send results to root process MPI_Isend(&results, 2, MPI_DOUBLE, root, 0, comm, &request); } } } MPI_Finalize(); return 0; }
/** * Función main */ int main(int argc, char *argv[]){ //************************************************************ // 0. Variables char * matrix_name; /**< Nombre de la matrix de entrada */ int rows; /**< Número de filas de la matriz */ int cols; /**< Número de columnas */ m_type * matrix1; /**< Matrix con los datos */ m_type * matrix2; /**< Copia de la matrix para calcular nuevos valores */ double t_begin; /**< Tiempo de inicio del calculo */ double t_end; /**< Tiempo de finalización */ m_type sum; /**< Suma de verificación */ int iter = 0; /**< Iteraciones realizadas hasta llegar a un punto estable */ int rank; /**< Identificador del proceso */ int size; /**< Tamano del grupo */ int temporal = 0; int indice = 0; int i1,j1,k1; m_type * buf_col_x; // Macros para acceder a las matrices #define m1(i,j) (matrix1[(i)*(cols+2)+(j)]) #define m2(i,j) (matrix2[(i)*(cols+2)+(j)]) //************************************************************ //************************************************************ // 1. Leer los parametros de entrada. if(argc != 2){ fprintf(stderr,"USO: %s <matriz de datos>\n",argv[0]); exit(EXIT_FAILURE); } matrix_name = argv[1]; //************************************************************ //************************************************************ // 0. Inicialisacion del entorno MPI MPI_Init(&argc, &argv); MPI_Comm_rank(MPI_COMM_WORLD, &rank); MPI_Comm_size(MPI_COMM_WORLD, &size); //************************************************************ //************************************************************ // 2. Cargar la matriz // Matrix de tamaño rows+2 x cols+2 (halo de tamaño 1) // (Los datos están en [1,rows+1][1,cols+1]) cp_read_matrix_size(matrix_name, &rows, &cols); printf(" Matriz %s: %dx%d\n",matrix_name, rows, cols); int matrix_size = (rows + 2) * (cols + 2); matrix1 = malloc(sizeof(m_type) * (size_t) matrix_size); cp_read_matrix(matrix_name, matrix1, 1); //************************************************************ buf_col_x = malloc(sizeof(m_type) * (size_t) (rows+2)); for (i1 = 1;i1 <= size;i1++){ indice = i1*(cols)/size; for (k1=temporal;k1<indice;k1++) { for (j1 = 0; j1 < cols; j1++) { buf_col_x[j1] = m1(k1,j1); } MPI_Request request_col_x; //data_buf = calloc((size_t) (rows+2), sizeof(float)); // Envia al proceso col_X MPI_Isend(buf_col_x, rows, MPI_FLOAT, i1, COL_ID, MPI_COMM_WORLD, &request_col_x); //2.3 Recibircepcion de los datos MPI_Recv(buf_col_x, rows, MPI_FLOAT, i1, COL_ID, MPI_COMM_WORLD, MPI_STATUS_IGNORE); MPI_Wait(&request_col_x, MPI_STATUS_IGNORE); temporal = indice; } printf("hola mundo\n"); } //************************************************************ // 3. Copia de la matriz matrix2 = malloc(sizeof(m_type) * (size_t) matrix_size); //************************************************************ #ifdef SHOW_DISPLAY cp_display_create("Stencil", rows+2, cols+2); cp_display_draw_matrix(matrix1,CP_RGB(255,0,0),CP_RGB(0,255,0)); cp_msleep(1000); #endif //************************************************************ // 4. Bucle principal t_begin = cp_Wtime(); int i,j; // 4.1 Nos mantenemos en el bucle mientras el residuo calculado // sea mayor que el residuo objetivo do { resid = 0.0; // Iteración del algoritmo update(matrix1,matrix2,rows,cols, rank, size); // Recibir los datos de los otros procesos en una matricia // Actualizar la copia for (i=1; i<rows+1; i++) { for (j=1; j<cols+1; j++) { m1(i,j) = m2 (i,j); } } #ifdef SHOW_DISPLAY cp_display_draw_matrix(matrix2,CP_RGB(255,0,0),CP_RGB(0,255,0)); cp_msleep(50); // Para ver más despacio la evolucion del proceso #endif iter++; } while (resid > MAX_RESID); //************************************************************ //************************************************************ // 5. Suma de verificación sum = check_sum(matrix1,rows,cols); t_end = cp_Wtime(); //************************************************************ //************************************************************ // 6. Mostrar resultados printf(" Check sum: %f\n", sum); printf(" Iteraciones: %d\n", iter); printf(" Tiempo de ejecución: %f\n", t_end-t_begin); //************************************************************ #ifdef SHOW_DISPLAY cp_msleep(250); cp_display_close(); #endif //************************************************************ // 7. Liberar las matrices free(matrix1); free(matrix2); //************************************************************ #undef m1 #undef m2 //************************************************************ // 8. Finalizacion del entorno MPI MPI_Finalize(); //************************************************************ return EXIT_SUCCESS; }