void relax(double *phi, double *b, double *tmp, param_t p)
{  
   int i, x, y;
   // A little trick to index phi as expected.
   double* phi_s = phi + p.L; 
   
   // Prepare for async send/recv
   MPI_Request request[4];
   int requests;
   MPI_Status status[4];
  
   for(i=0; i<p.niter; i++)
   {
      
      requests = 0;
   
      // Send the higher-memory component to the next rank.
      MPI_Isend(phi_s + p.L*(p.y-1), p.L, MPI_DOUBLE, 
                  (p.my_rank+1)%p.world_size, 1, 
                  MPI_COMM_WORLD, request + requests++);
      MPI_Irecv(phi_s - p.L, p.L, MPI_DOUBLE,
                   (p.my_rank+p.world_size-1)%p.world_size, 1,
                   MPI_COMM_WORLD, request + requests++);


      // Send the lower-memory component to the previous rank.
      MPI_Isend(phi_s, p.L, MPI_DOUBLE, 
                  (p.my_rank+p.world_size-1)%p.world_size, 0, 
                  MPI_COMM_WORLD, request + requests++);
      MPI_Irecv(phi_s + p.L*p.y, p.L, MPI_DOUBLE,
                   (p.my_rank+1)%p.world_size, 0,
                   MPI_COMM_WORLD, request + requests++);

      // Do some other work while we wait! 
      // Update everything that doesn't depend on buffers. 
      
      for(x = 0; x < p.L; x++)
      {
         for(y = 1; y < p.y-1; y++)
         {
            tmp[x + y*p.L] =  (1 - p.dt)* phi_s[x + y*p.L]
                  + p.dt* p.scale* (phi_s[(x+1)%p.L + y*p.L] + phi_s[(x-1+p.L)%p.L + y*p.L] 
                  +  phi_s[x + ((y+1)%p.L)*p.L]  + phi_s[x + ((y-1+p.L)%p.L)*p.L])
                  + p.dt*p.scale* b[x + y*p.L]; 
         }
      }
      
      // Wait, if sync hasn't finished.
      MPI_Waitall ( requests, request, status );
      
      // Update the other cells. 
      for(x = 0; x < p.L; x++)
      {
         y = 0;
         tmp[x + y*p.L] =  (1 - p.dt)* phi_s[x + y*p.L]
               + p.dt* p.scale* (phi_s[(x+1)%p.L + y*p.L] + phi_s[(x-1+p.L)%p.L + y*p.L] 
               +  phi_s[x + (y+1)*p.L]  + phi_s[x + (y-1)*p.L])
               + p.dt*p.scale* b[x + y*p.L]; 
      
         y = p.y-1;
         tmp[x + y*p.L] =  (1 - p.dt)* phi_s[x + y*p.L]
               + p.dt* p.scale* (phi_s[(x+1)%p.L + y*p.L] + phi_s[(x-1+p.L)%p.L + y*p.L] 
               +  phi_s[x + (y+1)*p.L]  + phi_s[x + (y-1)*p.L])
               + p.dt*p.scale* b[x + y*p.L]; 
      }
      
      for(x = 0; x < p.L; x++)
      {
         for(y = 0; y < p.y; y++)
         {
            phi_s[x + y*p.L] = tmp[x + y*p.L];
         }
      }
   }

   MPI_Barrier(MPI_COMM_WORLD);
   
   return;    
}
Exemple #2
0
int Stg_MPI_Isend( char* file, int line, void *buf, int count, MPI_Datatype datatype, int dest, int tag, MPI_Comm comm, MPI_Request *request ) {
	Stream* stream = Journal_Register( Info_Type, "mpi" );
	Journal_Printf( stream, "%s %d, rank %d MPI_Isend: tag = %d, count = %d, datatype = %d, dest = %d\n", file, line, Stg_Messaging_GetRank( comm ), tag, count, datatype, dest );
	return MPI_Isend( buf, count, datatype, dest, tag, comm, request );
}
void exchsolution_gmrfData_1(unsigned int slot) {
for (int fragmentIdx = 0; fragmentIdx < 1; ++fragmentIdx) {
if (isValidForSubdomain[0]) {
if ((!neighbor_isValid[0][0])) {
{
double xPos;
double yPos;
/* Statements in this Scop: S493, S492, S494 */
{
{
{
double* fieldData_Solution_GMRF_1_p1 = (&fieldData_Solution_GMRF[1][0]);
int i1 = 1;
for (; (i1<=2); i1 += 2) {
fieldData_Solution_GMRF_1_p1[((i1*6)+2)] = 0.000000e+00;
fieldData_Solution_GMRF_1_p1[((i1*6)+8)] = 0.000000e+00;
}
for (; (i1<=3); i1 += 1) {
fieldData_Solution_GMRF_1_p1[((i1*6)+2)] = 0.000000e+00;
}
}
{
int i1 = 1;
for (; (i1<(2&(~1))); i1 += 1) {
xPos = posBegin[0];
}
__m128d vec1 = _mm_set1_pd(xPos);
for (; (i1<1); i1 += 4) {
/* xPos = posBegin[0]; */
__m128d vec0 = _mm_load1_pd((&posBegin[0]));
__m128d vec0_2 = _mm_load1_pd((&posBegin[0]));
vec1 = vec0;
vec1 = vec0_2;
}
for (; (i1<4); i1 += 1) {
xPos = posBegin[0];
}
}
}
{
int i1 = 1;
for (; (i1<(2&(~1))); i1 += 1) {
yPos = ((((i1-1)/2.000000e+00)*(posEnd[1]-posBegin[1]))+posBegin[1]);
}
__m128d vec1 = _mm_set1_pd(1.000000e+00);
__m128d vec2 = _mm_set1_pd(2.000000e+00);
__m128d vec5 = _mm_set1_pd(yPos);
for (; (i1<1); i1 += 4) {
/* yPos = ((((i1-1)/2.000000e+00)*(posEnd[1]-posBegin[1]))+posBegin[1]); */
__m128d vec0 = _mm_set_pd(i1+1,i1);
__m128d vec0_2 = _mm_set_pd(i1+1,i1);
__m128d vec3 = _mm_load1_pd((&posEnd[1]));
__m128d vec3_2 = _mm_load1_pd((&posEnd[1]));
__m128d vec4 = _mm_load1_pd((&posBegin[1]));
__m128d vec4_2 = _mm_load1_pd((&posBegin[1]));
vec5 = _mm_add_pd(_mm_mul_pd(_mm_div_pd(_mm_sub_pd(vec0, vec1), vec2), _mm_sub_pd(vec3, vec4)), vec4);
vec5 = _mm_add_pd(_mm_mul_pd(_mm_div_pd(_mm_sub_pd(vec0_2, vec1), vec2), _mm_sub_pd(vec3_2, vec4_2)), vec4_2);
}
for (; (i1<4); i1 += 1) {
yPos = ((((i1-1)/2.000000e+00)*(posEnd[1]-posBegin[1]))+posBegin[1]);
}
}
}
}
}
if ((!neighbor_isValid[0][1])) {
{
double xPos;
double yPos;
/* Statements in this Scop: S496, S495, S497 */
{
{
{
double* fieldData_Solution_GMRF_1_p1 = (&fieldData_Solution_GMRF[1][0]);
int i1 = 1;
for (; (i1<=2); i1 += 2) {
fieldData_Solution_GMRF_1_p1[((i1*6)+4)] = 0.000000e+00;
fieldData_Solution_GMRF_1_p1[((i1*6)+10)] = 0.000000e+00;
}
for (; (i1<=3); i1 += 1) {
fieldData_Solution_GMRF_1_p1[((i1*6)+4)] = 0.000000e+00;
}
}
{
int i1 = 1;
for (; (i1<(2&(~1))); i1 += 1) {
xPos = posEnd[0];
}
__m128d vec1 = _mm_set1_pd(xPos);
for (; (i1<1); i1 += 4) {
/* xPos = posEnd[0]; */
__m128d vec0 = _mm_load1_pd((&posEnd[0]));
__m128d vec0_2 = _mm_load1_pd((&posEnd[0]));
vec1 = vec0;
vec1 = vec0_2;
}
for (; (i1<4); i1 += 1) {
xPos = posEnd[0];
}
}
}
{
int i1 = 1;
for (; (i1<(2&(~1))); i1 += 1) {
yPos = ((((i1-1)/2.000000e+00)*(posEnd[1]-posBegin[1]))+posBegin[1]);
}
__m128d vec1 = _mm_set1_pd(1.000000e+00);
__m128d vec2 = _mm_set1_pd(2.000000e+00);
__m128d vec5 = _mm_set1_pd(yPos);
for (; (i1<1); i1 += 4) {
/* yPos = ((((i1-1)/2.000000e+00)*(posEnd[1]-posBegin[1]))+posBegin[1]); */
__m128d vec0 = _mm_set_pd(i1+1,i1);
__m128d vec0_2 = _mm_set_pd(i1+1,i1);
__m128d vec3 = _mm_load1_pd((&posEnd[1]));
__m128d vec3_2 = _mm_load1_pd((&posEnd[1]));
__m128d vec4 = _mm_load1_pd((&posBegin[1]));
__m128d vec4_2 = _mm_load1_pd((&posBegin[1]));
vec5 = _mm_add_pd(_mm_mul_pd(_mm_div_pd(_mm_sub_pd(vec0, vec1), vec2), _mm_sub_pd(vec3, vec4)), vec4);
vec5 = _mm_add_pd(_mm_mul_pd(_mm_div_pd(_mm_sub_pd(vec0_2, vec1), vec2), _mm_sub_pd(vec3_2, vec4_2)), vec4_2);
}
for (; (i1<4); i1 += 1) {
yPos = ((((i1-1)/2.000000e+00)*(posEnd[1]-posBegin[1]))+posBegin[1]);
}
}
}
}
}
if ((!neighbor_isValid[0][2])) {
{
double xPos;
double yPos;
/* Statements in this Scop: S500, S499, S498 */
{
{
{
int i2 = 2;
for (; (i2<=3); i2 += 2) {
xPos = ((((i2-2)/2.000000e+00)*(posEnd[0]-posBegin[0]))+posBegin[0]);
xPos = ((((i2-1)/2.000000e+00)*(posEnd[0]-posBegin[0]))+posBegin[0]);
}
for (; (i2<=4); i2 += 1) {
xPos = ((((i2-2)/2.000000e+00)*(posEnd[0]-posBegin[0]))+posBegin[0]);
}
}
{
double* fieldData_Solution_GMRF_1_p1 = (&fieldData_Solution_GMRF[1][0]);
int i2 = 2;
for (; (i2<=3); i2 += 2) {
fieldData_Solution_GMRF_1_p1[(i2+6)] = 0.000000e+00;
fieldData_Solution_GMRF_1_p1[(i2+7)] = 0.000000e+00;
}
for (; (i2<=4); i2 += 1) {
fieldData_Solution_GMRF_1_p1[(i2+6)] = 0.000000e+00;
}
}
}
{
int i2 = 2;
for (; (i2<=3); i2 += 2) {
yPos = posBegin[1];
yPos = posBegin[1];
}
for (; (i2<=4); i2 += 1) {
yPos = posBegin[1];
}
}
}
}
}
if ((!neighbor_isValid[0][3])) {
{
double xPos;
double yPos;
/* Statements in this Scop: S503, S502, S501 */
{
{
{
int i2 = 2;
for (; (i2<=3); i2 += 2) {
xPos = ((((i2-2)/2.000000e+00)*(posEnd[0]-posBegin[0]))+posBegin[0]);
xPos = ((((i2-1)/2.000000e+00)*(posEnd[0]-posBegin[0]))+posBegin[0]);
}
for (; (i2<=4); i2 += 1) {
xPos = ((((i2-2)/2.000000e+00)*(posEnd[0]-posBegin[0]))+posBegin[0]);
}
}
{
int i2 = 2;
for (; (i2<=3); i2 += 2) {
yPos = posEnd[1];
yPos = posEnd[1];
}
for (; (i2<=4); i2 += 1) {
yPos = posEnd[1];
}
}
}
{
double* fieldData_Solution_GMRF_1_p1 = (&fieldData_Solution_GMRF[1][0]);
int i2 = 2;
for (; (i2<=3); i2 += 2) {
fieldData_Solution_GMRF_1_p1[(i2+18)] = 0.000000e+00;
fieldData_Solution_GMRF_1_p1[(i2+19)] = 0.000000e+00;
}
for (; (i2<=4); i2 += 1) {
fieldData_Solution_GMRF_1_p1[(i2+18)] = 0.000000e+00;
}
}
}
}
}
}
}
for (int fragmentIdx = 0; fragmentIdx < 1; ++fragmentIdx) {
if (isValidForSubdomain[0]) {
;
}
}
for (int fragmentIdx = 0; fragmentIdx < 1; ++fragmentIdx) {
if (isValidForSubdomain[0]) {
if ((neighbor_isValid[0][1]&&neighbor_isRemote[0][1])) {
MPI_Isend(&fieldData_Solution_GMRF[1][10], 1, mpiDatatype_3_1_6, neighbor_remoteRank[0][1], ((unsigned int)commId << 16) + ((unsigned int)(neighbor_fragCommId[0][1]) & 0x0000ffff), mpiCommunicator, &mpiRequest_Send[1]);
reqOutstanding_Send[1] = true;
}
}
}
;
for (int fragmentIdx = 0; fragmentIdx < 1; ++fragmentIdx) {
if (isValidForSubdomain[0]) {
if ((neighbor_isValid[0][0]&&neighbor_isRemote[0][0])) {
MPI_Irecv(&fieldData_Solution_GMRF[1][8], 1, mpiDatatype_3_1_6, neighbor_remoteRank[0][0], ((unsigned int)(neighbor_fragCommId[0][0]) << 16) + ((unsigned int)commId & 0x0000ffff), mpiCommunicator, &mpiRequest_Recv[0]);
reqOutstanding_Recv[0] = true;
}
}
}
for (int fragmentIdx = 0; fragmentIdx < 1; ++fragmentIdx) {
if (isValidForSubdomain[0]) {
if (reqOutstanding_Recv[0]) {
waitForMPIReq(&mpiRequest_Recv[0]);
reqOutstanding_Recv[0] = false;
}
}
}
for (int fragmentIdx = 0; fragmentIdx < 1; ++fragmentIdx) {
if (isValidForSubdomain[0]) {
;
}
}
;
;
for (int fragmentIdx = 0; fragmentIdx < 1; ++fragmentIdx) {
if (isValidForSubdomain[0]) {
if (reqOutstanding_Send[1]) {
waitForMPIReq(&mpiRequest_Send[1]);
reqOutstanding_Send[1] = false;
}
}
}
for (int fragmentIdx = 0; fragmentIdx < 1; ++fragmentIdx) {
if (isValidForSubdomain[0]) {
;
}
}
for (int fragmentIdx = 0; fragmentIdx < 1; ++fragmentIdx) {
if (isValidForSubdomain[0]) {
if ((neighbor_isValid[0][3]&&neighbor_isRemote[0][3])) {
MPI_Isend(&fieldData_Solution_GMRF[1][20], 1, mpiDatatype_1_3_6, neighbor_remoteRank[0][3], ((unsigned int)commId << 16) + ((unsigned int)(neighbor_fragCommId[0][3]) & 0x0000ffff), mpiCommunicator, &mpiRequest_Send[3]);
reqOutstanding_Send[3] = true;
}
}
}
;
for (int fragmentIdx = 0; fragmentIdx < 1; ++fragmentIdx) {
if (isValidForSubdomain[0]) {
if ((neighbor_isValid[0][2]&&neighbor_isRemote[0][2])) {
MPI_Irecv(&fieldData_Solution_GMRF[1][8], 1, mpiDatatype_1_3_6, neighbor_remoteRank[0][2], ((unsigned int)(neighbor_fragCommId[0][2]) << 16) + ((unsigned int)commId & 0x0000ffff), mpiCommunicator, &mpiRequest_Recv[2]);
reqOutstanding_Recv[2] = true;
}
}
}
for (int fragmentIdx = 0; fragmentIdx < 1; ++fragmentIdx) {
if (isValidForSubdomain[0]) {
if (reqOutstanding_Recv[2]) {
waitForMPIReq(&mpiRequest_Recv[2]);
reqOutstanding_Recv[2] = false;
}
}
}
for (int fragmentIdx = 0; fragmentIdx < 1; ++fragmentIdx) {
if (isValidForSubdomain[0]) {
;
}
}
;
;
for (int fragmentIdx = 0; fragmentIdx < 1; ++fragmentIdx) {
if (isValidForSubdomain[0]) {
if (reqOutstanding_Send[3]) {
waitForMPIReq(&mpiRequest_Send[3]);
reqOutstanding_Send[3] = false;
}
}
}
for (int fragmentIdx = 0; fragmentIdx < 1; ++fragmentIdx) {
if (isValidForSubdomain[0]) {
;
;
}
}
for (int fragmentIdx = 0; fragmentIdx < 1; ++fragmentIdx) {
if (isValidForSubdomain[0]) {
if ((neighbor_isValid[0][0]&&neighbor_isRemote[0][0])) {
MPI_Isend(&fieldData_Solution_GMRF[1][3], 1, mpiDatatype_5_1_6, neighbor_remoteRank[0][0], ((unsigned int)commId << 16) + ((unsigned int)(neighbor_fragCommId[0][0]) & 0x0000ffff), mpiCommunicator, &mpiRequest_Send[0]);
reqOutstanding_Send[0] = true;
}
if ((neighbor_isValid[0][1]&&neighbor_isRemote[0][1])) {
MPI_Isend(&fieldData_Solution_GMRF[1][3], 1, mpiDatatype_5_1_6, neighbor_remoteRank[0][1], ((unsigned int)commId << 16) + ((unsigned int)(neighbor_fragCommId[0][1]) & 0x0000ffff), mpiCommunicator, &mpiRequest_Send[1]);
reqOutstanding_Send[1] = true;
}
}
}
;
for (int fragmentIdx = 0; fragmentIdx < 1; ++fragmentIdx) {
if (isValidForSubdomain[0]) {
if ((neighbor_isValid[0][0]&&neighbor_isRemote[0][0])) {
MPI_Irecv(&fieldData_Solution_GMRF[1][1], 1, mpiDatatype_5_1_6, neighbor_remoteRank[0][0], ((unsigned int)(neighbor_fragCommId[0][0]) << 16) + ((unsigned int)commId & 0x0000ffff), mpiCommunicator, &mpiRequest_Recv[0]);
reqOutstanding_Recv[0] = true;
}
if ((neighbor_isValid[0][1]&&neighbor_isRemote[0][1])) {
MPI_Irecv(&fieldData_Solution_GMRF[1][5], 1, mpiDatatype_5_1_6, neighbor_remoteRank[0][1], ((unsigned int)(neighbor_fragCommId[0][1]) << 16) + ((unsigned int)commId & 0x0000ffff), mpiCommunicator, &mpiRequest_Recv[1]);
reqOutstanding_Recv[1] = true;
}
}
}
for (int fragmentIdx = 0; fragmentIdx < 1; ++fragmentIdx) {
if (isValidForSubdomain[0]) {
if (reqOutstanding_Recv[0]) {
waitForMPIReq(&mpiRequest_Recv[0]);
reqOutstanding_Recv[0] = false;
}
if (reqOutstanding_Recv[1]) {
waitForMPIReq(&mpiRequest_Recv[1]);
reqOutstanding_Recv[1] = false;
}
}
}
for (int fragmentIdx = 0; fragmentIdx < 1; ++fragmentIdx) {
if (isValidForSubdomain[0]) {
;
;
}
}
;
;
for (int fragmentIdx = 0; fragmentIdx < 1; ++fragmentIdx) {
if (isValidForSubdomain[0]) {
if (reqOutstanding_Send[0]) {
waitForMPIReq(&mpiRequest_Send[0]);
reqOutstanding_Send[0] = false;
}
if (reqOutstanding_Send[1]) {
waitForMPIReq(&mpiRequest_Send[1]);
reqOutstanding_Send[1] = false;
}
}
}
for (int fragmentIdx = 0; fragmentIdx < 1; ++fragmentIdx) {
if (isValidForSubdomain[0]) {
;
;
}
}
for (int fragmentIdx = 0; fragmentIdx < 1; ++fragmentIdx) {
if (isValidForSubdomain[0]) {
if ((neighbor_isValid[0][2]&&neighbor_isRemote[0][2])) {
MPI_Isend(&fieldData_Solution_GMRF[1][13], 1, mpiDatatype_1_5_6, neighbor_remoteRank[0][2], ((unsigned int)commId << 16) + ((unsigned int)(neighbor_fragCommId[0][2]) & 0x0000ffff), mpiCommunicator, &mpiRequest_Send[2]);
reqOutstanding_Send[2] = true;
}
if ((neighbor_isValid[0][3]&&neighbor_isRemote[0][3])) {
MPI_Isend(&fieldData_Solution_GMRF[1][13], 1, mpiDatatype_1_5_6, neighbor_remoteRank[0][3], ((unsigned int)commId << 16) + ((unsigned int)(neighbor_fragCommId[0][3]) & 0x0000ffff), mpiCommunicator, &mpiRequest_Send[3]);
reqOutstanding_Send[3] = true;
}
}
}
;
for (int fragmentIdx = 0; fragmentIdx < 1; ++fragmentIdx) {
if (isValidForSubdomain[0]) {
if ((neighbor_isValid[0][2]&&neighbor_isRemote[0][2])) {
MPI_Irecv(&fieldData_Solution_GMRF[1][1], 1, mpiDatatype_1_5_6, neighbor_remoteRank[0][2], ((unsigned int)(neighbor_fragCommId[0][2]) << 16) + ((unsigned int)commId & 0x0000ffff), mpiCommunicator, &mpiRequest_Recv[2]);
reqOutstanding_Recv[2] = true;
}
if ((neighbor_isValid[0][3]&&neighbor_isRemote[0][3])) {
MPI_Irecv(&fieldData_Solution_GMRF[1][25], 1, mpiDatatype_1_5_6, neighbor_remoteRank[0][3], ((unsigned int)(neighbor_fragCommId[0][3]) << 16) + ((unsigned int)commId & 0x0000ffff), mpiCommunicator, &mpiRequest_Recv[3]);
reqOutstanding_Recv[3] = true;
}
}
}
for (int fragmentIdx = 0; fragmentIdx < 1; ++fragmentIdx) {
if (isValidForSubdomain[0]) {
if (reqOutstanding_Recv[2]) {
waitForMPIReq(&mpiRequest_Recv[2]);
reqOutstanding_Recv[2] = false;
}
if (reqOutstanding_Recv[3]) {
waitForMPIReq(&mpiRequest_Recv[3]);
reqOutstanding_Recv[3] = false;
}
}
}
for (int fragmentIdx = 0; fragmentIdx < 1; ++fragmentIdx) {
if (isValidForSubdomain[0]) {
;
;
}
}
;
;
for (int fragmentIdx = 0; fragmentIdx < 1; ++fragmentIdx) {
if (isValidForSubdomain[0]) {
if (reqOutstanding_Send[2]) {
waitForMPIReq(&mpiRequest_Send[2]);
reqOutstanding_Send[2] = false;
}
if (reqOutstanding_Send[3]) {
waitForMPIReq(&mpiRequest_Send[3]);
reqOutstanding_Send[3] = false;
}
}
}
}
void exchlaplacecoeff_gmrfData_0(unsigned int slot) {
for (int fragmentIdx = 0; fragmentIdx < 1; ++fragmentIdx) {
if (isValidForSubdomain[0]) {
if ((!neighbor_isValid[0][0])) {
{
double xPos;
double yPos;
/* Statements in this Scop: S1053, S1056, S1059, S1050, S1058, S1052, S1055, S1060, S1054, S1057, S1051 */
{
{
{
{
{
{
{
{
{
{
{
double* fieldData_LaplaceCoeff_GMRF_0_p1 = (&fieldData_LaplaceCoeff_GMRF[0][0]);
int i1 = 1;
for (; (i1<=1); i1 += 2) {
fieldData_LaplaceCoeff_GMRF_0_p1[((i1*6)+26)] = 0.000000e+00;
fieldData_LaplaceCoeff_GMRF_0_p1[((i1*6)+32)] = 0.000000e+00;
}
for (; (i1<=2); i1 += 1) {
fieldData_LaplaceCoeff_GMRF_0_p1[((i1*6)+26)] = 0.000000e+00;
}
}
{
double* fieldData_LaplaceCoeff_GMRF_0_p1 = (&fieldData_LaplaceCoeff_GMRF[0][0]);
int i1 = 1;
for (; (i1<=1); i1 += 2) {
fieldData_LaplaceCoeff_GMRF_0_p1[((i1*6)+146)] = 0.000000e+00;
fieldData_LaplaceCoeff_GMRF_0_p1[((i1*6)+152)] = 0.000000e+00;
}
for (; (i1<=2); i1 += 1) {
fieldData_LaplaceCoeff_GMRF_0_p1[((i1*6)+146)] = 0.000000e+00;
}
}
}
{
double* fieldData_LaplaceCoeff_GMRF_0_p1 = (&fieldData_LaplaceCoeff_GMRF[0][0]);
int i1 = 1;
for (; (i1<=1); i1 += 2) {
fieldData_LaplaceCoeff_GMRF_0_p1[((i1*6)+98)] = 0.000000e+00;
fieldData_LaplaceCoeff_GMRF_0_p1[((i1*6)+104)] = 0.000000e+00;
}
for (; (i1<=2); i1 += 1) {
fieldData_LaplaceCoeff_GMRF_0_p1[((i1*6)+98)] = 0.000000e+00;
}
}
}
{
double* fieldData_LaplaceCoeff_GMRF_0_p1 = (&fieldData_LaplaceCoeff_GMRF[0][0]);
int i1 = 1;
for (; (i1<=1); i1 += 2) {
fieldData_LaplaceCoeff_GMRF_0_p1[((i1*6)+74)] = 0.000000e+00;
fieldData_LaplaceCoeff_GMRF_0_p1[((i1*6)+80)] = 0.000000e+00;
}
for (; (i1<=2); i1 += 1) {
fieldData_LaplaceCoeff_GMRF_0_p1[((i1*6)+74)] = 0.000000e+00;
}
}
}
{
int i1 = 1;
for (; (i1<(2&(~1))); i1 += 1) {
xPos = posBegin[0];
}
__m128d vec1 = _mm_set1_pd(xPos);
for (; (i1<0); i1 += 4) {
/* xPos = posBegin[0]; */
__m128d vec0 = _mm_load1_pd((&posBegin[0]));
__m128d vec0_2 = _mm_load1_pd((&posBegin[0]));
vec1 = vec0;
vec1 = vec0_2;
}
for (; (i1<3); i1 += 1) {
xPos = posBegin[0];
}
}
}
{
double* fieldData_LaplaceCoeff_GMRF_0_p1 = (&fieldData_LaplaceCoeff_GMRF[0][0]);
int i1 = 1;
for (; (i1<=1); i1 += 2) {
fieldData_LaplaceCoeff_GMRF_0_p1[((i1*6)+122)] = 0.000000e+00;
fieldData_LaplaceCoeff_GMRF_0_p1[((i1*6)+128)] = 0.000000e+00;
}
for (; (i1<=2); i1 += 1) {
fieldData_LaplaceCoeff_GMRF_0_p1[((i1*6)+122)] = 0.000000e+00;
}
}
}
{
double* fieldData_LaplaceCoeff_GMRF_0_p1 = (&fieldData_LaplaceCoeff_GMRF[0][0]);
int i1 = 1;
for (; (i1<=1); i1 += 2) {
fieldData_LaplaceCoeff_GMRF_0_p1[((i1*6)+170)] = 0.000000e+00;
fieldData_LaplaceCoeff_GMRF_0_p1[((i1*6)+176)] = 0.000000e+00;
}
for (; (i1<=2); i1 += 1) {
fieldData_LaplaceCoeff_GMRF_0_p1[((i1*6)+170)] = 0.000000e+00;
}
}
}
{
double* fieldData_LaplaceCoeff_GMRF_0_p1 = (&fieldData_LaplaceCoeff_GMRF[0][0]);
int i1 = 1;
for (; (i1<=1); i1 += 2) {
fieldData_LaplaceCoeff_GMRF_0_p1[((i1*6)+194)] = 0.000000e+00;
fieldData_LaplaceCoeff_GMRF_0_p1[((i1*6)+200)] = 0.000000e+00;
}
for (; (i1<=2); i1 += 1) {
fieldData_LaplaceCoeff_GMRF_0_p1[((i1*6)+194)] = 0.000000e+00;
}
}
}
{
int i1 = 1;
for (; (i1<(2&(~1))); i1 += 1) {
yPos = ((((i1-1)/1.000000e+00)*(posEnd[1]-posBegin[1]))+posBegin[1]);
}
__m128d vec1 = _mm_set1_pd(1.000000e+00);
__m128d vec2 = _mm_set1_pd(1.000000e+00);
__m128d vec5 = _mm_set1_pd(yPos);
for (; (i1<0); i1 += 4) {
/* yPos = ((((i1-1)/1.000000e+00)*(posEnd[1]-posBegin[1]))+posBegin[1]); */
__m128d vec0 = _mm_set_pd(i1+1,i1);
__m128d vec0_2 = _mm_set_pd(i1+1,i1);
__m128d vec3 = _mm_load1_pd((&posEnd[1]));
__m128d vec3_2 = _mm_load1_pd((&posEnd[1]));
__m128d vec4 = _mm_load1_pd((&posBegin[1]));
__m128d vec4_2 = _mm_load1_pd((&posBegin[1]));
vec5 = _mm_add_pd(_mm_mul_pd(_mm_div_pd(_mm_sub_pd(vec0, vec1), vec2), _mm_sub_pd(vec3, vec4)), vec4);
vec5 = _mm_add_pd(_mm_mul_pd(_mm_div_pd(_mm_sub_pd(vec0_2, vec1), vec2), _mm_sub_pd(vec3_2, vec4_2)), vec4_2);
}
for (; (i1<3); i1 += 1) {
yPos = ((((i1-1)/1.000000e+00)*(posEnd[1]-posBegin[1]))+posBegin[1]);
}
}
}
{
double* fieldData_LaplaceCoeff_GMRF_0_p1 = (&fieldData_LaplaceCoeff_GMRF[0][0]);
int i1 = 1;
for (; (i1<=1); i1 += 2) {
fieldData_LaplaceCoeff_GMRF_0_p1[((i1*6)+2)] = 0.000000e+00;
fieldData_LaplaceCoeff_GMRF_0_p1[((i1*6)+8)] = 0.000000e+00;
}
for (; (i1<=2); i1 += 1) {
fieldData_LaplaceCoeff_GMRF_0_p1[((i1*6)+2)] = 0.000000e+00;
}
}
}
{
double* fieldData_LaplaceCoeff_GMRF_0_p1 = (&fieldData_LaplaceCoeff_GMRF[0][0]);
int i1 = 1;
for (; (i1<=1); i1 += 2) {
fieldData_LaplaceCoeff_GMRF_0_p1[((i1*6)+50)] = 0.000000e+00;
fieldData_LaplaceCoeff_GMRF_0_p1[((i1*6)+56)] = 0.000000e+00;
}
for (; (i1<=2); i1 += 1) {
fieldData_LaplaceCoeff_GMRF_0_p1[((i1*6)+50)] = 0.000000e+00;
}
}
}
}
}
if ((!neighbor_isValid[0][1])) {
{
double xPos;
double yPos;
/* Statements in this Scop: S1071, S1065, S1068, S1062, S1070, S1064, S1067, S1061, S1069, S1063, S1066 */
{
{
{
{
{
{
{
{
{
{
{
double* fieldData_LaplaceCoeff_GMRF_0_p1 = (&fieldData_LaplaceCoeff_GMRF[0][0]);
int i1 = 1;
for (; (i1<=1); i1 += 2) {
fieldData_LaplaceCoeff_GMRF_0_p1[((i1*6)+195)] = 0.000000e+00;
fieldData_LaplaceCoeff_GMRF_0_p1[((i1*6)+201)] = 0.000000e+00;
}
for (; (i1<=2); i1 += 1) {
fieldData_LaplaceCoeff_GMRF_0_p1[((i1*6)+195)] = 0.000000e+00;
}
}
{
double* fieldData_LaplaceCoeff_GMRF_0_p1 = (&fieldData_LaplaceCoeff_GMRF[0][0]);
int i1 = 1;
for (; (i1<=1); i1 += 2) {
fieldData_LaplaceCoeff_GMRF_0_p1[((i1*6)+51)] = 0.000000e+00;
fieldData_LaplaceCoeff_GMRF_0_p1[((i1*6)+57)] = 0.000000e+00;
}
for (; (i1<=2); i1 += 1) {
fieldData_LaplaceCoeff_GMRF_0_p1[((i1*6)+51)] = 0.000000e+00;
}
}
}
{
double* fieldData_LaplaceCoeff_GMRF_0_p1 = (&fieldData_LaplaceCoeff_GMRF[0][0]);
int i1 = 1;
for (; (i1<=1); i1 += 2) {
fieldData_LaplaceCoeff_GMRF_0_p1[((i1*6)+75)] = 0.000000e+00;
fieldData_LaplaceCoeff_GMRF_0_p1[((i1*6)+81)] = 0.000000e+00;
}
for (; (i1<=2); i1 += 1) {
fieldData_LaplaceCoeff_GMRF_0_p1[((i1*6)+75)] = 0.000000e+00;
}
}
}
{
double* fieldData_LaplaceCoeff_GMRF_0_p1 = (&fieldData_LaplaceCoeff_GMRF[0][0]);
int i1 = 1;
for (; (i1<=1); i1 += 2) {
fieldData_LaplaceCoeff_GMRF_0_p1[((i1*6)+3)] = 0.000000e+00;
fieldData_LaplaceCoeff_GMRF_0_p1[((i1*6)+9)] = 0.000000e+00;
}
for (; (i1<=2); i1 += 1) {
fieldData_LaplaceCoeff_GMRF_0_p1[((i1*6)+3)] = 0.000000e+00;
}
}
}
{
double* fieldData_LaplaceCoeff_GMRF_0_p1 = (&fieldData_LaplaceCoeff_GMRF[0][0]);
int i1 = 1;
for (; (i1<=1); i1 += 2) {
fieldData_LaplaceCoeff_GMRF_0_p1[((i1*6)+171)] = 0.000000e+00;
fieldData_LaplaceCoeff_GMRF_0_p1[((i1*6)+177)] = 0.000000e+00;
}
for (; (i1<=2); i1 += 1) {
fieldData_LaplaceCoeff_GMRF_0_p1[((i1*6)+171)] = 0.000000e+00;
}
}
}
{
int i1 = 1;
for (; (i1<(2&(~1))); i1 += 1) {
yPos = ((((i1-1)/1.000000e+00)*(posEnd[1]-posBegin[1]))+posBegin[1]);
}
__m128d vec1 = _mm_set1_pd(1.000000e+00);
__m128d vec2 = _mm_set1_pd(1.000000e+00);
__m128d vec5 = _mm_set1_pd(yPos);
for (; (i1<0); i1 += 4) {
/* yPos = ((((i1-1)/1.000000e+00)*(posEnd[1]-posBegin[1]))+posBegin[1]); */
__m128d vec0 = _mm_set_pd(i1+1,i1);
__m128d vec0_2 = _mm_set_pd(i1+1,i1);
__m128d vec3 = _mm_load1_pd((&posEnd[1]));
__m128d vec3_2 = _mm_load1_pd((&posEnd[1]));
__m128d vec4 = _mm_load1_pd((&posBegin[1]));
__m128d vec4_2 = _mm_load1_pd((&posBegin[1]));
vec5 = _mm_add_pd(_mm_mul_pd(_mm_div_pd(_mm_sub_pd(vec0, vec1), vec2), _mm_sub_pd(vec3, vec4)), vec4);
vec5 = _mm_add_pd(_mm_mul_pd(_mm_div_pd(_mm_sub_pd(vec0_2, vec1), vec2), _mm_sub_pd(vec3_2, vec4_2)), vec4_2);
}
for (; (i1<3); i1 += 1) {
yPos = ((((i1-1)/1.000000e+00)*(posEnd[1]-posBegin[1]))+posBegin[1]);
}
}
}
{
double* fieldData_LaplaceCoeff_GMRF_0_p1 = (&fieldData_LaplaceCoeff_GMRF[0][0]);
int i1 = 1;
for (; (i1<=1); i1 += 2) {
fieldData_LaplaceCoeff_GMRF_0_p1[((i1*6)+99)] = 0.000000e+00;
fieldData_LaplaceCoeff_GMRF_0_p1[((i1*6)+105)] = 0.000000e+00;
}
for (; (i1<=2); i1 += 1) {
fieldData_LaplaceCoeff_GMRF_0_p1[((i1*6)+99)] = 0.000000e+00;
}
}
}
{
double* fieldData_LaplaceCoeff_GMRF_0_p1 = (&fieldData_LaplaceCoeff_GMRF[0][0]);
int i1 = 1;
for (; (i1<=1); i1 += 2) {
fieldData_LaplaceCoeff_GMRF_0_p1[((i1*6)+123)] = 0.000000e+00;
fieldData_LaplaceCoeff_GMRF_0_p1[((i1*6)+129)] = 0.000000e+00;
}
for (; (i1<=2); i1 += 1) {
fieldData_LaplaceCoeff_GMRF_0_p1[((i1*6)+123)] = 0.000000e+00;
}
}
}
{
double* fieldData_LaplaceCoeff_GMRF_0_p1 = (&fieldData_LaplaceCoeff_GMRF[0][0]);
int i1 = 1;
for (; (i1<=1); i1 += 2) {
fieldData_LaplaceCoeff_GMRF_0_p1[((i1*6)+147)] = 0.000000e+00;
fieldData_LaplaceCoeff_GMRF_0_p1[((i1*6)+153)] = 0.000000e+00;
}
for (; (i1<=2); i1 += 1) {
fieldData_LaplaceCoeff_GMRF_0_p1[((i1*6)+147)] = 0.000000e+00;
}
}
}
{
int i1 = 1;
for (; (i1<(2&(~1))); i1 += 1) {
xPos = posEnd[0];
}
__m128d vec1 = _mm_set1_pd(xPos);
for (; (i1<0); i1 += 4) {
/* xPos = posEnd[0]; */
__m128d vec0 = _mm_load1_pd((&posEnd[0]));
__m128d vec0_2 = _mm_load1_pd((&posEnd[0]));
vec1 = vec0;
vec1 = vec0_2;
}
for (; (i1<3); i1 += 1) {
xPos = posEnd[0];
}
}
}
{
double* fieldData_LaplaceCoeff_GMRF_0_p1 = (&fieldData_LaplaceCoeff_GMRF[0][0]);
int i1 = 1;
for (; (i1<=1); i1 += 2) {
fieldData_LaplaceCoeff_GMRF_0_p1[((i1*6)+27)] = 0.000000e+00;
fieldData_LaplaceCoeff_GMRF_0_p1[((i1*6)+33)] = 0.000000e+00;
}
for (; (i1<=2); i1 += 1) {
fieldData_LaplaceCoeff_GMRF_0_p1[((i1*6)+27)] = 0.000000e+00;
}
}
}
}
}
if ((!neighbor_isValid[0][2])) {
{
double xPos;
double yPos;
/* Statements in this Scop: S1080, S1074, S1077, S1082, S1076, S1079, S1073, S1072, S1081, S1075, S1078 */
{
{
{
{
{
{
{
{
{
{
{
double* fieldData_LaplaceCoeff_GMRF_0_p1 = (&fieldData_LaplaceCoeff_GMRF[0][0]);
int i2 = 2;
for (; (i2<=2); i2 += 2) {
fieldData_LaplaceCoeff_GMRF_0_p1[(i2+126)] = 0.000000e+00;
fieldData_LaplaceCoeff_GMRF_0_p1[(i2+127)] = 0.000000e+00;
}
for (; (i2<=3); i2 += 1) {
fieldData_LaplaceCoeff_GMRF_0_p1[(i2+126)] = 0.000000e+00;
}
}
{
int i2 = 2;
for (; (i2<=2); i2 += 2) {
xPos = ((((i2-2)/1.000000e+00)*(posEnd[0]-posBegin[0]))+posBegin[0]);
xPos = ((((i2-1)/1.000000e+00)*(posEnd[0]-posBegin[0]))+posBegin[0]);
}
for (; (i2<=3); i2 += 1) {
xPos = ((((i2-2)/1.000000e+00)*(posEnd[0]-posBegin[0]))+posBegin[0]);
}
}
}
{
double* fieldData_LaplaceCoeff_GMRF_0_p1 = (&fieldData_LaplaceCoeff_GMRF[0][0]);
int i2 = 2;
for (; (i2<=2); i2 += 2) {
fieldData_LaplaceCoeff_GMRF_0_p1[(i2+198)] = 0.000000e+00;
fieldData_LaplaceCoeff_GMRF_0_p1[(i2+199)] = 0.000000e+00;
}
for (; (i2<=3); i2 += 1) {
fieldData_LaplaceCoeff_GMRF_0_p1[(i2+198)] = 0.000000e+00;
}
}
}
{
int i2 = 2;
for (; (i2<=2); i2 += 2) {
yPos = posBegin[1];
yPos = posBegin[1];
}
for (; (i2<=3); i2 += 1) {
yPos = posBegin[1];
}
}
}
{
double* fieldData_LaplaceCoeff_GMRF_0_p1 = (&fieldData_LaplaceCoeff_GMRF[0][0]);
int i2 = 2;
for (; (i2<=2); i2 += 2) {
fieldData_LaplaceCoeff_GMRF_0_p1[(i2+30)] = 0.000000e+00;
fieldData_LaplaceCoeff_GMRF_0_p1[(i2+31)] = 0.000000e+00;
}
for (; (i2<=3); i2 += 1) {
fieldData_LaplaceCoeff_GMRF_0_p1[(i2+30)] = 0.000000e+00;
}
}
}
{
double* fieldData_LaplaceCoeff_GMRF_0_p1 = (&fieldData_LaplaceCoeff_GMRF[0][0]);
int i2 = 2;
for (; (i2<=2); i2 += 2) {
fieldData_LaplaceCoeff_GMRF_0_p1[(i2+174)] = 0.000000e+00;
fieldData_LaplaceCoeff_GMRF_0_p1[(i2+175)] = 0.000000e+00;
}
for (; (i2<=3); i2 += 1) {
fieldData_LaplaceCoeff_GMRF_0_p1[(i2+174)] = 0.000000e+00;
}
}
}
{
double* fieldData_LaplaceCoeff_GMRF_0_p1 = (&fieldData_LaplaceCoeff_GMRF[0][0]);
int i2 = 2;
for (; (i2<=2); i2 += 2) {
fieldData_LaplaceCoeff_GMRF_0_p1[(i2+78)] = 0.000000e+00;
fieldData_LaplaceCoeff_GMRF_0_p1[(i2+79)] = 0.000000e+00;
}
for (; (i2<=3); i2 += 1) {
fieldData_LaplaceCoeff_GMRF_0_p1[(i2+78)] = 0.000000e+00;
}
}
}
{
double* fieldData_LaplaceCoeff_GMRF_0_p1 = (&fieldData_LaplaceCoeff_GMRF[0][0]);
int i2 = 2;
for (; (i2<=2); i2 += 2) {
fieldData_LaplaceCoeff_GMRF_0_p1[(i2+54)] = 0.000000e+00;
fieldData_LaplaceCoeff_GMRF_0_p1[(i2+55)] = 0.000000e+00;
}
for (; (i2<=3); i2 += 1) {
fieldData_LaplaceCoeff_GMRF_0_p1[(i2+54)] = 0.000000e+00;
}
}
}
{
double* fieldData_LaplaceCoeff_GMRF_0_p1 = (&fieldData_LaplaceCoeff_GMRF[0][0]);
int i2 = 2;
for (; (i2<=2); i2 += 2) {
fieldData_LaplaceCoeff_GMRF_0_p1[(i2+150)] = 0.000000e+00;
fieldData_LaplaceCoeff_GMRF_0_p1[(i2+151)] = 0.000000e+00;
}
for (; (i2<=3); i2 += 1) {
fieldData_LaplaceCoeff_GMRF_0_p1[(i2+150)] = 0.000000e+00;
}
}
}
{
double* fieldData_LaplaceCoeff_GMRF_0_p1 = (&fieldData_LaplaceCoeff_GMRF[0][0]);
int i2 = 2;
for (; (i2<=2); i2 += 2) {
fieldData_LaplaceCoeff_GMRF_0_p1[(i2+6)] = 0.000000e+00;
fieldData_LaplaceCoeff_GMRF_0_p1[(i2+7)] = 0.000000e+00;
}
for (; (i2<=3); i2 += 1) {
fieldData_LaplaceCoeff_GMRF_0_p1[(i2+6)] = 0.000000e+00;
}
}
}
{
double* fieldData_LaplaceCoeff_GMRF_0_p1 = (&fieldData_LaplaceCoeff_GMRF[0][0]);
int i2 = 2;
for (; (i2<=2); i2 += 2) {
fieldData_LaplaceCoeff_GMRF_0_p1[(i2+102)] = 0.000000e+00;
fieldData_LaplaceCoeff_GMRF_0_p1[(i2+103)] = 0.000000e+00;
}
for (; (i2<=3); i2 += 1) {
fieldData_LaplaceCoeff_GMRF_0_p1[(i2+102)] = 0.000000e+00;
}
}
}
}
}
if ((!neighbor_isValid[0][3])) {
{
double xPos;
double yPos;
/* Statements in this Scop: S1083, S1092, S1086, S1089, S1088, S1091, S1085, S1090, S1093, S1087, S1084 */
{
{
{
{
{
{
{
{
{
{
{
double* fieldData_LaplaceCoeff_GMRF_0_p1 = (&fieldData_LaplaceCoeff_GMRF[0][0]);
int i2 = 2;
for (; (i2<=2); i2 += 2) {
fieldData_LaplaceCoeff_GMRF_0_p1[(i2+12)] = 0.000000e+00;
fieldData_LaplaceCoeff_GMRF_0_p1[(i2+13)] = 0.000000e+00;
}
for (; (i2<=3); i2 += 1) {
fieldData_LaplaceCoeff_GMRF_0_p1[(i2+12)] = 0.000000e+00;
}
}
{
double* fieldData_LaplaceCoeff_GMRF_0_p1 = (&fieldData_LaplaceCoeff_GMRF[0][0]);
int i2 = 2;
for (; (i2<=2); i2 += 2) {
fieldData_LaplaceCoeff_GMRF_0_p1[(i2+60)] = 0.000000e+00;
fieldData_LaplaceCoeff_GMRF_0_p1[(i2+61)] = 0.000000e+00;
}
for (; (i2<=3); i2 += 1) {
fieldData_LaplaceCoeff_GMRF_0_p1[(i2+60)] = 0.000000e+00;
}
}
}
{
double* fieldData_LaplaceCoeff_GMRF_0_p1 = (&fieldData_LaplaceCoeff_GMRF[0][0]);
int i2 = 2;
for (; (i2<=2); i2 += 2) {
fieldData_LaplaceCoeff_GMRF_0_p1[(i2+204)] = 0.000000e+00;
fieldData_LaplaceCoeff_GMRF_0_p1[(i2+205)] = 0.000000e+00;
}
for (; (i2<=3); i2 += 1) {
fieldData_LaplaceCoeff_GMRF_0_p1[(i2+204)] = 0.000000e+00;
}
}
}
{
double* fieldData_LaplaceCoeff_GMRF_0_p1 = (&fieldData_LaplaceCoeff_GMRF[0][0]);
int i2 = 2;
for (; (i2<=2); i2 += 2) {
fieldData_LaplaceCoeff_GMRF_0_p1[(i2+132)] = 0.000000e+00;
fieldData_LaplaceCoeff_GMRF_0_p1[(i2+133)] = 0.000000e+00;
}
for (; (i2<=3); i2 += 1) {
fieldData_LaplaceCoeff_GMRF_0_p1[(i2+132)] = 0.000000e+00;
}
}
}
{
double* fieldData_LaplaceCoeff_GMRF_0_p1 = (&fieldData_LaplaceCoeff_GMRF[0][0]);
int i2 = 2;
for (; (i2<=2); i2 += 2) {
fieldData_LaplaceCoeff_GMRF_0_p1[(i2+84)] = 0.000000e+00;
fieldData_LaplaceCoeff_GMRF_0_p1[(i2+85)] = 0.000000e+00;
}
for (; (i2<=3); i2 += 1) {
fieldData_LaplaceCoeff_GMRF_0_p1[(i2+84)] = 0.000000e+00;
}
}
}
{
int i2 = 2;
for (; (i2<=2); i2 += 2) {
yPos = posEnd[1];
yPos = posEnd[1];
}
for (; (i2<=3); i2 += 1) {
yPos = posEnd[1];
}
}
}
{
double* fieldData_LaplaceCoeff_GMRF_0_p1 = (&fieldData_LaplaceCoeff_GMRF[0][0]);
int i2 = 2;
for (; (i2<=2); i2 += 2) {
fieldData_LaplaceCoeff_GMRF_0_p1[(i2+36)] = 0.000000e+00;
fieldData_LaplaceCoeff_GMRF_0_p1[(i2+37)] = 0.000000e+00;
}
for (; (i2<=3); i2 += 1) {
fieldData_LaplaceCoeff_GMRF_0_p1[(i2+36)] = 0.000000e+00;
}
}
}
{
int i2 = 2;
for (; (i2<=2); i2 += 2) {
xPos = ((((i2-2)/1.000000e+00)*(posEnd[0]-posBegin[0]))+posBegin[0]);
xPos = ((((i2-1)/1.000000e+00)*(posEnd[0]-posBegin[0]))+posBegin[0]);
}
for (; (i2<=3); i2 += 1) {
xPos = ((((i2-2)/1.000000e+00)*(posEnd[0]-posBegin[0]))+posBegin[0]);
}
}
}
{
double* fieldData_LaplaceCoeff_GMRF_0_p1 = (&fieldData_LaplaceCoeff_GMRF[0][0]);
int i2 = 2;
for (; (i2<=2); i2 += 2) {
fieldData_LaplaceCoeff_GMRF_0_p1[(i2+180)] = 0.000000e+00;
fieldData_LaplaceCoeff_GMRF_0_p1[(i2+181)] = 0.000000e+00;
}
for (; (i2<=3); i2 += 1) {
fieldData_LaplaceCoeff_GMRF_0_p1[(i2+180)] = 0.000000e+00;
}
}
}
{
double* fieldData_LaplaceCoeff_GMRF_0_p1 = (&fieldData_LaplaceCoeff_GMRF[0][0]);
int i2 = 2;
for (; (i2<=2); i2 += 2) {
fieldData_LaplaceCoeff_GMRF_0_p1[(i2+156)] = 0.000000e+00;
fieldData_LaplaceCoeff_GMRF_0_p1[(i2+157)] = 0.000000e+00;
}
for (; (i2<=3); i2 += 1) {
fieldData_LaplaceCoeff_GMRF_0_p1[(i2+156)] = 0.000000e+00;
}
}
}
{
double* fieldData_LaplaceCoeff_GMRF_0_p1 = (&fieldData_LaplaceCoeff_GMRF[0][0]);
int i2 = 2;
for (; (i2<=2); i2 += 2) {
fieldData_LaplaceCoeff_GMRF_0_p1[(i2+108)] = 0.000000e+00;
fieldData_LaplaceCoeff_GMRF_0_p1[(i2+109)] = 0.000000e+00;
}
for (; (i2<=3); i2 += 1) {
fieldData_LaplaceCoeff_GMRF_0_p1[(i2+108)] = 0.000000e+00;
}
}
}
}
}
}
}
for (int fragmentIdx = 0; fragmentIdx < 1; ++fragmentIdx) {
if (isValidForSubdomain[0]) {
if ((neighbor_isValid[0][1]&&neighbor_isRemote[0][1])) {
/* Statements in this Scop: S1094 */
for (int i0 = 0; (i0<=8); i0 += 1) {
double* buffer_Send_1_p1 = (&buffer_Send[1][(i0*2)]);
double* fieldData_LaplaceCoeff_GMRF_0_p1 = (&fieldData_LaplaceCoeff_GMRF[0][(i0*24)]);
int i1 = 1;
for (; (i1<=1); i1 += 2) {
buffer_Send_1_p1[(i1-1)] = fieldData_LaplaceCoeff_GMRF_0_p1[((i1*6)+3)];
buffer_Send_1_p1[i1] = fieldData_LaplaceCoeff_GMRF_0_p1[((i1*6)+9)];
}
for (; (i1<=2); i1 += 1) {
buffer_Send_1_p1[(i1-1)] = fieldData_LaplaceCoeff_GMRF_0_p1[((i1*6)+3)];
}
}
}
}
}
for (int fragmentIdx = 0; fragmentIdx < 1; ++fragmentIdx) {
if (isValidForSubdomain[0]) {
if ((neighbor_isValid[0][1]&&neighbor_isRemote[0][1])) {
MPI_Isend(buffer_Send[1], 18, MPI_DOUBLE, neighbor_remoteRank[0][1], ((unsigned int)commId << 16) + ((unsigned int)(neighbor_fragCommId[0][1]) & 0x0000ffff), mpiCommunicator, &mpiRequest_Send[1]);
reqOutstanding_Send[1] = true;
}
}
}
;
for (int fragmentIdx = 0; fragmentIdx < 1; ++fragmentIdx) {
if (isValidForSubdomain[0]) {
if ((neighbor_isValid[0][0]&&neighbor_isRemote[0][0])) {
MPI_Irecv(buffer_Recv[0], 18, MPI_DOUBLE, neighbor_remoteRank[0][0], ((unsigned int)(neighbor_fragCommId[0][0]) << 16) + ((unsigned int)commId & 0x0000ffff), mpiCommunicator, &mpiRequest_Recv[0]);
reqOutstanding_Recv[0] = true;
}
}
}
for (int fragmentIdx = 0; fragmentIdx < 1; ++fragmentIdx) {
if (isValidForSubdomain[0]) {
if (reqOutstanding_Recv[0]) {
waitForMPIReq(&mpiRequest_Recv[0]);
reqOutstanding_Recv[0] = false;
}
}
}
for (int fragmentIdx = 0; fragmentIdx < 1; ++fragmentIdx) {
if (isValidForSubdomain[0]) {
if ((neighbor_isValid[0][0]&&neighbor_isRemote[0][0])) {
/* Statements in this Scop: S1095 */
for (int i0 = 0; (i0<=8); i0 += 1) {
double* buffer_Recv_0_p1 = (&buffer_Recv[0][(i0*2)]);
double* fieldData_LaplaceCoeff_GMRF_0_p1 = (&fieldData_LaplaceCoeff_GMRF[0][(i0*24)]);
int i1 = 3;
for (; (i1<=3); i1 += 2) {
fieldData_LaplaceCoeff_GMRF_0_p1[((i1*6)-10)] = buffer_Recv_0_p1[(i1-3)];
fieldData_LaplaceCoeff_GMRF_0_p1[((i1*6)-4)] = buffer_Recv_0_p1[(i1-2)];
}
for (; (i1<=4); i1 += 1) {
fieldData_LaplaceCoeff_GMRF_0_p1[((i1*6)-10)] = buffer_Recv_0_p1[(i1-3)];
}
}
}
}
}
;
;
for (int fragmentIdx = 0; fragmentIdx < 1; ++fragmentIdx) {
if (isValidForSubdomain[0]) {
if (reqOutstanding_Send[1]) {
waitForMPIReq(&mpiRequest_Send[1]);
reqOutstanding_Send[1] = false;
}
}
}
for (int fragmentIdx = 0; fragmentIdx < 1; ++fragmentIdx) {
if (isValidForSubdomain[0]) {
;
}
}
for (int fragmentIdx = 0; fragmentIdx < 1; ++fragmentIdx) {
if (isValidForSubdomain[0]) {
if ((neighbor_isValid[0][3]&&neighbor_isRemote[0][3])) {
MPI_Isend(&fieldData_LaplaceCoeff_GMRF[0][14], 1, mpiDatatype_9_2_24, neighbor_remoteRank[0][3], ((unsigned int)commId << 16) + ((unsigned int)(neighbor_fragCommId[0][3]) & 0x0000ffff), mpiCommunicator, &mpiRequest_Send[3]);
reqOutstanding_Send[3] = true;
}
}
}
;
for (int fragmentIdx = 0; fragmentIdx < 1; ++fragmentIdx) {
if (isValidForSubdomain[0]) {
if ((neighbor_isValid[0][2]&&neighbor_isRemote[0][2])) {
MPI_Irecv(&fieldData_LaplaceCoeff_GMRF[0][8], 1, mpiDatatype_9_2_24, neighbor_remoteRank[0][2], ((unsigned int)(neighbor_fragCommId[0][2]) << 16) + ((unsigned int)commId & 0x0000ffff), mpiCommunicator, &mpiRequest_Recv[2]);
reqOutstanding_Recv[2] = true;
}
}
}
for (int fragmentIdx = 0; fragmentIdx < 1; ++fragmentIdx) {
if (isValidForSubdomain[0]) {
if (reqOutstanding_Recv[2]) {
waitForMPIReq(&mpiRequest_Recv[2]);
reqOutstanding_Recv[2] = false;
}
}
}
for (int fragmentIdx = 0; fragmentIdx < 1; ++fragmentIdx) {
if (isValidForSubdomain[0]) {
;
}
}
;
;
for (int fragmentIdx = 0; fragmentIdx < 1; ++fragmentIdx) {
if (isValidForSubdomain[0]) {
if (reqOutstanding_Send[3]) {
waitForMPIReq(&mpiRequest_Send[3]);
reqOutstanding_Send[3] = false;
}
}
}
for (int fragmentIdx = 0; fragmentIdx < 1; ++fragmentIdx) {
if (isValidForSubdomain[0]) {
if ((neighbor_isValid[0][0]&&neighbor_isRemote[0][0])) {
/* Statements in this Scop: S1096 */
for (int i0 = 0; (i0<=8); i0 += 1) {
double* fieldData_LaplaceCoeff_GMRF_0_p1 = (&fieldData_LaplaceCoeff_GMRF[0][(i0*24)]);
double* buffer_Send_0_p1 = (&buffer_Send[0][(i0*4)]);
int i1 = 0;
for (; (i1<=2); i1 += 2) {
buffer_Send_0_p1[i1] = fieldData_LaplaceCoeff_GMRF_0_p1[((i1*6)+3)];
buffer_Send_0_p1[(i1+1)] = fieldData_LaplaceCoeff_GMRF_0_p1[((i1*6)+9)];
}
for (; (i1<=3); i1 += 1) {
buffer_Send_0_p1[i1] = fieldData_LaplaceCoeff_GMRF_0_p1[((i1*6)+3)];
}
}
}
if ((neighbor_isValid[0][1]&&neighbor_isRemote[0][1])) {
/* Statements in this Scop: S1097 */
for (int i0 = 0; (i0<=8); i0 += 1) {
double* buffer_Send_1_p1 = (&buffer_Send[1][(i0*4)]);
double* fieldData_LaplaceCoeff_GMRF_0_p1 = (&fieldData_LaplaceCoeff_GMRF[0][(i0*24)]);
int i1 = 0;
for (; (i1<=2); i1 += 2) {
buffer_Send_1_p1[i1] = fieldData_LaplaceCoeff_GMRF_0_p1[((i1*6)+2)];
buffer_Send_1_p1[(i1+1)] = fieldData_LaplaceCoeff_GMRF_0_p1[((i1*6)+8)];
}
for (; (i1<=3); i1 += 1) {
buffer_Send_1_p1[i1] = fieldData_LaplaceCoeff_GMRF_0_p1[((i1*6)+2)];
}
}
}
}
}
for (int fragmentIdx = 0; fragmentIdx < 1; ++fragmentIdx) {
if (isValidForSubdomain[0]) {
if ((neighbor_isValid[0][0]&&neighbor_isRemote[0][0])) {
MPI_Isend(buffer_Send[0], 36, MPI_DOUBLE, neighbor_remoteRank[0][0], ((unsigned int)commId << 16) + ((unsigned int)(neighbor_fragCommId[0][0]) & 0x0000ffff), mpiCommunicator, &mpiRequest_Send[0]);
reqOutstanding_Send[0] = true;
}
if ((neighbor_isValid[0][1]&&neighbor_isRemote[0][1])) {
MPI_Isend(buffer_Send[1], 36, MPI_DOUBLE, neighbor_remoteRank[0][1], ((unsigned int)commId << 16) + ((unsigned int)(neighbor_fragCommId[0][1]) & 0x0000ffff), mpiCommunicator, &mpiRequest_Send[1]);
reqOutstanding_Send[1] = true;
}
}
}
;
for (int fragmentIdx = 0; fragmentIdx < 1; ++fragmentIdx) {
if (isValidForSubdomain[0]) {
if ((neighbor_isValid[0][0]&&neighbor_isRemote[0][0])) {
MPI_Irecv(buffer_Recv[0], 36, MPI_DOUBLE, neighbor_remoteRank[0][0], ((unsigned int)(neighbor_fragCommId[0][0]) << 16) + ((unsigned int)commId & 0x0000ffff), mpiCommunicator, &mpiRequest_Recv[0]);
reqOutstanding_Recv[0] = true;
}
if ((neighbor_isValid[0][1]&&neighbor_isRemote[0][1])) {
MPI_Irecv(buffer_Recv[1], 36, MPI_DOUBLE, neighbor_remoteRank[0][1], ((unsigned int)(neighbor_fragCommId[0][1]) << 16) + ((unsigned int)commId & 0x0000ffff), mpiCommunicator, &mpiRequest_Recv[1]);
reqOutstanding_Recv[1] = true;
}
}
}
for (int fragmentIdx = 0; fragmentIdx < 1; ++fragmentIdx) {
if (isValidForSubdomain[0]) {
if (reqOutstanding_Recv[0]) {
waitForMPIReq(&mpiRequest_Recv[0]);
reqOutstanding_Recv[0] = false;
}
if (reqOutstanding_Recv[1]) {
waitForMPIReq(&mpiRequest_Recv[1]);
reqOutstanding_Recv[1] = false;
}
}
}
for (int fragmentIdx = 0; fragmentIdx < 1; ++fragmentIdx) {
if (isValidForSubdomain[0]) {
if ((neighbor_isValid[0][0]&&neighbor_isRemote[0][0])) {
/* Statements in this Scop: S1098 */
for (int i0 = 0; (i0<=8); i0 += 1) {
double* buffer_Recv_0_p1 = (&buffer_Recv[0][(i0*4)]);
double* fieldData_LaplaceCoeff_GMRF_0_p1 = (&fieldData_LaplaceCoeff_GMRF[0][(i0*24)]);
int i1 = 1;
for (; (i1<=3); i1 += 2) {
fieldData_LaplaceCoeff_GMRF_0_p1[((i1*6)-5)] = buffer_Recv_0_p1[(i1-1)];
fieldData_LaplaceCoeff_GMRF_0_p1[((i1*6)+1)] = buffer_Recv_0_p1[i1];
}
for (; (i1<=4); i1 += 1) {
fieldData_LaplaceCoeff_GMRF_0_p1[((i1*6)-5)] = buffer_Recv_0_p1[(i1-1)];
}
}
}
if ((neighbor_isValid[0][1]&&neighbor_isRemote[0][1])) {
/* Statements in this Scop: S1099 */
for (int i0 = 0; (i0<=8); i0 += 1) {
double* buffer_Recv_1_p1 = (&buffer_Recv[1][(i0*4)]);
double* fieldData_LaplaceCoeff_GMRF_0_p1 = (&fieldData_LaplaceCoeff_GMRF[0][(i0*24)]);
int i1 = 4;
for (; (i1<=6); i1 += 2) {
fieldData_LaplaceCoeff_GMRF_0_p1[((i1*6)-20)] = buffer_Recv_1_p1[(i1-4)];
fieldData_LaplaceCoeff_GMRF_0_p1[((i1*6)-14)] = buffer_Recv_1_p1[(i1-3)];
}
for (; (i1<=7); i1 += 1) {
fieldData_LaplaceCoeff_GMRF_0_p1[((i1*6)-20)] = buffer_Recv_1_p1[(i1-4)];
}
}
}
}
}
;
;
for (int fragmentIdx = 0; fragmentIdx < 1; ++fragmentIdx) {
if (isValidForSubdomain[0]) {
if (reqOutstanding_Send[0]) {
waitForMPIReq(&mpiRequest_Send[0]);
reqOutstanding_Send[0] = false;
}
if (reqOutstanding_Send[1]) {
waitForMPIReq(&mpiRequest_Send[1]);
reqOutstanding_Send[1] = false;
}
}
}
for (int fragmentIdx = 0; fragmentIdx < 1; ++fragmentIdx) {
if (isValidForSubdomain[0]) {
;
;
}
}
for (int fragmentIdx = 0; fragmentIdx < 1; ++fragmentIdx) {
if (isValidForSubdomain[0]) {
if ((neighbor_isValid[0][2]&&neighbor_isRemote[0][2])) {
MPI_Isend(&fieldData_LaplaceCoeff_GMRF[0][13], 1, mpiDatatype_9_4_24, neighbor_remoteRank[0][2], ((unsigned int)commId << 16) + ((unsigned int)(neighbor_fragCommId[0][2]) & 0x0000ffff), mpiCommunicator, &mpiRequest_Send[2]);
reqOutstanding_Send[2] = true;
}
if ((neighbor_isValid[0][3]&&neighbor_isRemote[0][3])) {
MPI_Isend(&fieldData_LaplaceCoeff_GMRF[0][7], 1, mpiDatatype_9_4_24, neighbor_remoteRank[0][3], ((unsigned int)commId << 16) + ((unsigned int)(neighbor_fragCommId[0][3]) & 0x0000ffff), mpiCommunicator, &mpiRequest_Send[3]);
reqOutstanding_Send[3] = true;
}
}
}
;
for (int fragmentIdx = 0; fragmentIdx < 1; ++fragmentIdx) {
if (isValidForSubdomain[0]) {
if ((neighbor_isValid[0][2]&&neighbor_isRemote[0][2])) {
MPI_Irecv(&fieldData_LaplaceCoeff_GMRF[0][1], 1, mpiDatatype_9_4_24, neighbor_remoteRank[0][2], ((unsigned int)(neighbor_fragCommId[0][2]) << 16) + ((unsigned int)commId & 0x0000ffff), mpiCommunicator, &mpiRequest_Recv[2]);
reqOutstanding_Recv[2] = true;
}
if ((neighbor_isValid[0][3]&&neighbor_isRemote[0][3])) {
MPI_Irecv(&fieldData_LaplaceCoeff_GMRF[0][19], 1, mpiDatatype_9_4_24, neighbor_remoteRank[0][3], ((unsigned int)(neighbor_fragCommId[0][3]) << 16) + ((unsigned int)commId & 0x0000ffff), mpiCommunicator, &mpiRequest_Recv[3]);
reqOutstanding_Recv[3] = true;
}
}
}
for (int fragmentIdx = 0; fragmentIdx < 1; ++fragmentIdx) {
if (isValidForSubdomain[0]) {
if (reqOutstanding_Recv[2]) {
waitForMPIReq(&mpiRequest_Recv[2]);
reqOutstanding_Recv[2] = false;
}
if (reqOutstanding_Recv[3]) {
waitForMPIReq(&mpiRequest_Recv[3]);
reqOutstanding_Recv[3] = false;
}
}
}
for (int fragmentIdx = 0; fragmentIdx < 1; ++fragmentIdx) {
if (isValidForSubdomain[0]) {
;
;
}
}
;
;
for (int fragmentIdx = 0; fragmentIdx < 1; ++fragmentIdx) {
if (isValidForSubdomain[0]) {
if (reqOutstanding_Send[2]) {
waitForMPIReq(&mpiRequest_Send[2]);
reqOutstanding_Send[2] = false;
}
if (reqOutstanding_Send[3]) {
waitForMPIReq(&mpiRequest_Send[3]);
reqOutstanding_Send[3] = false;
}
}
}
}
Exemple #5
0
/* Sets error_code to MPI_SUCCESS if successful, or creates an error code
 * in the case of error.
 */
static void ADIOI_W_Exchange_data(ADIO_File fd, void *buf, char *write_buf,
				  ADIOI_Flatlist_node *flat_buf, ADIO_Offset 
				  *offset_list, ADIO_Offset *len_list, int *send_size, 
				  int *recv_size, ADIO_Offset off, int size,
				  int *count, int *start_pos,
				  int *partial_recv,
				  int *sent_to_proc, int nprocs, 
				  int myrank, int
				  buftype_is_contig, int contig_access_count,
				  ADIO_Offset min_st_offset,
				  ADIO_Offset fd_size,
				  ADIO_Offset *fd_start, ADIO_Offset *fd_end, 
				  ADIOI_Access *others_req, 
				  int *send_buf_idx, int *curr_to_proc,
				  int *done_to_proc, int *hole, int iter, 
				  MPI_Aint buftype_extent, int *buf_idx,
				  int *error_code)
{
    int i, j, k, *tmp_len, nprocs_recv, nprocs_send, err;
    char **send_buf = NULL; 
    MPI_Request *requests, *send_req;
    MPI_Datatype *recv_types;
    MPI_Status *statuses, status;
    int *srt_len=NULL, sum;
    ADIO_Offset *srt_off=NULL;
    static char myname[] = "ADIOI_W_EXCHANGE_DATA";

/* exchange recv_size info so that each process knows how much to
   send to whom. */

    MPI_Alltoall(recv_size, 1, MPI_INT, send_size, 1, MPI_INT, fd->comm);

    /* create derived datatypes for recv */

    nprocs_recv = 0;
    for (i=0; i<nprocs; i++) if (recv_size[i]) nprocs_recv++;

    recv_types = (MPI_Datatype *)
	ADIOI_Malloc((nprocs_recv+1)*sizeof(MPI_Datatype)); 
/* +1 to avoid a 0-size malloc */

    tmp_len = (int *) ADIOI_Malloc(nprocs*sizeof(int));
    j = 0;
    for (i=0; i<nprocs; i++) {
	if (recv_size[i]) {
/* take care if the last off-len pair is a partial recv */
	    if (partial_recv[i]) {
		k = start_pos[i] + count[i] - 1;
		tmp_len[i] = others_req[i].lens[k];
		others_req[i].lens[k] = partial_recv[i];
	    }
	    ADIOI_Type_create_hindexed_x(count[i],
		     &(others_req[i].lens[start_pos[i]]),
	             &(others_req[i].mem_ptrs[start_pos[i]]), 
			 MPI_BYTE, recv_types+j);
	    /* absolute displacements; use MPI_BOTTOM in recv */
	    MPI_Type_commit(recv_types+j);
	    j++;
	}
    }

    /* To avoid a read-modify-write, check if there are holes in the 
       data to be written. For this, merge the (sorted) offset lists
       others_req using a heap-merge. */

    sum = 0;
    for (i=0; i<nprocs; i++) sum += count[i];
    /* valgrind-detcted optimization: if there is no work on this process we do
     * not need to search for holes */
    if (sum) {
        srt_off = (ADIO_Offset *) ADIOI_Malloc(sum*sizeof(ADIO_Offset));
        srt_len = (int *) ADIOI_Malloc(sum*sizeof(int));

        ADIOI_Heap_merge(others_req, count, srt_off, srt_len, start_pos,
                         nprocs, nprocs_recv, sum);
    }

/* for partial recvs, restore original lengths */
    for (i=0; i<nprocs; i++) 
        if (partial_recv[i]) {
            k = start_pos[i] + count[i] - 1;
            others_req[i].lens[k] = tmp_len[i];
        }
    ADIOI_Free(tmp_len);

    /* check if there are any holes. If yes, must do read-modify-write.
     * holes can be in three places.  'middle' is what you'd expect: the
     * processes are operating on noncontigous data.  But holes can also show
     * up at the beginning or end of the file domain (see John Bent ROMIO REQ
     * #835). Missing these holes would result in us writing more data than
     * recieved by everyone else. */

    *hole = 0;
    if (sum) {
        if (off != srt_off[0]) /* hole at the front */
            *hole = 1;
        else { /* coalesce the sorted offset-length pairs */
            for (i=1; i<sum; i++) {
                if (srt_off[i] <= srt_off[0] + srt_len[0]) {
		    /* ok to cast: operating on cb_buffer_size chunks */
		    int new_len = (int)srt_off[i] + srt_len[i] - (int)srt_off[0];
		    if (new_len > srt_len[0]) srt_len[0] = new_len;
		}
		else
			break;
	    }
            if (i < sum || size != srt_len[0]) /* hole in middle or end */
                *hole = 1;
	}

        ADIOI_Free(srt_off);
        ADIOI_Free(srt_len);
    }

    if (nprocs_recv) {
	if (*hole) {
	    ADIO_ReadContig(fd, write_buf, size, MPI_BYTE, 
			    ADIO_EXPLICIT_OFFSET, off, &status, &err);
	    /* --BEGIN ERROR HANDLING-- */
	    if (err != MPI_SUCCESS) {
		*error_code = MPIO_Err_create_code(err,
						   MPIR_ERR_RECOVERABLE, myname,
						   __LINE__, MPI_ERR_IO,
						   "**ioRMWrdwr", 0);
		return;
	    } 
	    /* --END ERROR HANDLING-- */
	}
    }

    nprocs_send = 0;
    for (i=0; i < nprocs; i++) if (send_size[i]) nprocs_send++;

    if (fd->atomicity) {
        /* bug fix from Wei-keng Liao and Kenin Coloma */
        requests = (MPI_Request *)
	    ADIOI_Malloc((nprocs_send+1)*sizeof(MPI_Request)); 
        send_req = requests;
    }
    else {
        requests = (MPI_Request *) 	
            ADIOI_Malloc((nprocs_send+nprocs_recv+1)*sizeof(MPI_Request)); 
        /* +1 to avoid a 0-size malloc */

        /* post receives */
        j = 0;
        for (i=0; i<nprocs; i++) {
            if (recv_size[i]) {
                MPI_Irecv(MPI_BOTTOM, 1, recv_types[j], i, myrank+i+100*iter,
                          fd->comm, requests+j);
                j++;
            }
        }
	send_req = requests + nprocs_recv;
    }

/* post sends. if buftype_is_contig, data can be directly sent from
   user buf at location given by buf_idx. else use send_buf. */

#ifdef AGGREGATION_PROFILE
    MPE_Log_event (5032, 0, NULL);
#endif
    if (buftype_is_contig) {
	j = 0;
	for (i=0; i < nprocs; i++) 
	    if (send_size[i]) {
		MPI_Isend(((char *) buf) + buf_idx[i], send_size[i], 
  		            MPI_BYTE, i,  myrank+i+100*iter, fd->comm, 
                                  send_req+j);
		j++;
                buf_idx[i] += send_size[i];
	    }
    }
    else if (nprocs_send) {
	/* buftype is not contig */
	send_buf = (char **) ADIOI_Malloc(nprocs*sizeof(char*));
	for (i=0; i < nprocs; i++) 
	    if (send_size[i]) 
		send_buf[i] = (char *) ADIOI_Malloc(send_size[i]);

	ADIOI_Fill_send_buffer(fd, buf, flat_buf, send_buf,
                           offset_list, len_list, send_size, 
			   send_req,
                           sent_to_proc, nprocs, myrank, 
                           contig_access_count,
                           min_st_offset, fd_size, fd_start, fd_end, 
                           send_buf_idx, curr_to_proc, done_to_proc, iter,
                           buftype_extent);
        /* the send is done in ADIOI_Fill_send_buffer */
    }

    if (fd->atomicity) {
        /* bug fix from Wei-keng Liao and Kenin Coloma */
        j = 0;
        for (i=0; i<nprocs; i++) {
            MPI_Status wkl_status;
	    if (recv_size[i]) {
	        MPI_Recv(MPI_BOTTOM, 1, recv_types[j], i, myrank+i+100*iter,
		          fd->comm, &wkl_status);
	        j++;
	    }
        }
    }

    for (i=0; i<nprocs_recv; i++) MPI_Type_free(recv_types+i);
    ADIOI_Free(recv_types);
    
    if (fd->atomicity) {
        /* bug fix from Wei-keng Liao and Kenin Coloma */
        statuses = (MPI_Status *) ADIOI_Malloc((nprocs_send+1) * \
                                         sizeof(MPI_Status)); 
         /* +1 to avoid a 0-size malloc */
    }
    else {
        statuses = (MPI_Status *) ADIOI_Malloc((nprocs_send+nprocs_recv+1) * \
                                     sizeof(MPI_Status)); 
        /* +1 to avoid a 0-size malloc */
    }

#ifdef NEEDS_MPI_TEST
    i = 0;
    if (fd->atomicity) {
        /* bug fix from Wei-keng Liao and Kenin Coloma */
        while (!i) MPI_Testall(nprocs_send, send_req, &i, statuses);
    }
    else {
        while (!i) MPI_Testall(nprocs_send+nprocs_recv, requests, &i, statuses);
    }
#else
    if (fd->atomicity)
        /* bug fix from Wei-keng Liao and Kenin Coloma */
        MPI_Waitall(nprocs_send, send_req, statuses);
    else
        MPI_Waitall(nprocs_send+nprocs_recv, requests, statuses);
#endif

#ifdef AGGREGATION_PROFILE
    MPE_Log_event (5033, 0, NULL);
#endif
    ADIOI_Free(statuses);
    ADIOI_Free(requests);
    if (!buftype_is_contig && nprocs_send) {
	for (i=0; i < nprocs; i++) 
	    if (send_size[i]) ADIOI_Free(send_buf[i]);
	ADIOI_Free(send_buf);
    }
}
int main(int argc, char **argv)
{	
    /* Variable declaration */
    float 
        *globalArray = NULL,
        *bBucket = NULL,
        **sBucket = NULL,
        *tempArray = NULL,
        value;

    int 
	    dataPerProc, numprocs, rank, i, p, n,
        size, maxTempSize, assignedIndex,
         bSize, bMaxSize, bTotal,
        *sSize, sMaxSize, sTotal, sAssigned;
        
    MPI_Status status, recvStatus;
    MPI_Request sendRequest, recvRequest;

    /* Initliaze MPI */
	MPI_Init(&argc, &argv);
	MPI_Comm_size(MPI_COMM_WORLD, &numprocs);
	MPI_Comm_rank(MPI_COMM_WORLD, &rank    );

    /* Initializing from CL arguments */
	n = getProblemSize(argc, argv, rank, numprocs);
	if(n==-1)
	{
		MPI_Finalize();
		return EXIT_FAILURE;
	}
    else
    {
        dataPerProc = n/numprocs;
        maxTempSize = n;

        bMaxSize = n;
        sMaxSize = dataPerProc;

        bTotal = numprocs;
        sTotal = numprocs;
    }

    /* Initializing buckets and arrays */
	if(rank==0) globalArray = initialiseRandomList(n);
    tempArray = (float*) malloc(sizeof(float)*maxTempSize);

    bBucket = (float*) malloc(sizeof(float)*bMaxSize);
	sBucket = (float**) malloc(sizeof(float*)*sTotal);
	for(i=0; i<sTotal; i++) sBucket[i] = (float*) malloc(sizeof(float)*sMaxSize);

    bSize = dataPerProc;
    sSize = (int*) calloc(numprocs, sizeof(int));


    /* Start timer */
	double startTime = MPI_Wtime();
	if( rank==0 ) printf( "Starting iteration; may take a few seconds ...\n" );


    /* Scatter global array into big buckets */
	displayFullList(globalArray, rank, numprocs, n);
    MPI_Scatter(globalArray, dataPerProc, MPI_FLOAT, bBucket, dataPerProc, MPI_FLOAT, 0, MPI_COMM_WORLD); 
    displayBigBuckets(bBucket, bSize, rank, numprocs, n);


    /* Pour each rank's big buckets into the correct small bucket */
    /* Step 1 to Step 2 of Lecture 8 */
    for(i=0; i<dataPerProc; i++)
    {
        value = bBucket[i];

        sAssigned = (int) (value * numprocs);
        if(sAssigned == numprocs) sAssigned--;  /* Resolves a bug cropping due to numerical errors;
                                                 * e.g. int(0.99 * 1) = 1 but first bucket is index 0*/
        assignedIndex = sSize[sAssigned];
        sBucket[sAssigned][assignedIndex] = value;
        sSize[sAssigned] += 1;
    }
//    for(i=0; i<sTotal; i++) printf("\nRank %i: sBucket=%i, Size=%i\n", rank, i, sSize[i]);
    displaySmallBuckets(sBucket, sSize, rank, numprocs, n);


    /* Pour each rank's small bucket back into the correct big bucket 
     * The use of non-blocking communication to prevent deadlock when
     * problem size is rather large (although there exists a way solution
     * that doesn't involve non-blocking communication)*/
    /* Step 2 to Step 3 of Lecture 8 */
    for(i=0; i<bMaxSize; i++) bBucket[i] = 0.0;
    bSize = 0;

    for(p=0; p<sTotal; p++)
    {   
        if(p==rank)
        { 
            for(i=0; i<sSize[p]; i++) 
                bBucket[bSize + i] = sBucket[p][i];
                bSize += sSize[p];
        }
        else
        {
            MPI_Isend(sBucket[p], sSize[p], MPI_FLOAT, p, 0, MPI_COMM_WORLD, &sendRequest);

            MPI_Irecv(tempArray, maxTempSize, MPI_FLOAT, p, 0, MPI_COMM_WORLD, &recvRequest);
            MPI_Wait(&recvRequest, &recvStatus);
            MPI_Get_count(&recvStatus, MPI_FLOAT, &size);

            for(i=0; i<size; i++) bBucket[bSize + i] = tempArray[i];
            bSize += size;
        }
    }
    /* All small buckets should pour their entire contents into the big buckets 
     * before just before serial sorting of big buckets */
    MPI_Barrier(MPI_COMM_WORLD);
    displayBigBuckets(bBucket, bSize, rank, numprocs, n);


    /* Swirl each rank's big bucket until sorted */
    /* Step 3 to Step 4 of Lecture 8 */
    serialQuicksort(bBucket, 0, bSize);
    displayBigBuckets(bBucket, bSize, rank, numprocs, n);
    

    /* Concatenate each rank's big bucket */
    /* Step 4 to Step 5 of Lecture 8 */
    if(rank!=0) MPI_Send(bBucket, bSize, MPI_FLOAT, 0, 0, MPI_COMM_WORLD);
    else
    {
        for(p=1; p<bTotal; p++)
        {
            MPI_Recv(tempArray, maxTempSize, MPI_FLOAT, p, 0, MPI_COMM_WORLD, &status);
            MPI_Get_count(&status, MPI_FLOAT, &size);

            for(i=0; i<size; i++) bBucket[bSize + i] = tempArray[i];
            bSize += size;
        }
        globalArray = bBucket;
    }


    /* End timer */
	double timeTaken = MPI_Wtime() - startTime;
	if( rank==0 ) 
    {
        printf( "Finished. Time taken: %g seconds\n", timeTaken );

//        FILE *f = fopen("data.txt", "a");
//        fprintf(f, "%g\n", timeTaken); 
    }

	/*
		Display the final (hopefully sorted) list, and check all entries are indeed in order.
	*/
	displayFullList(globalArray,rank,numprocs,n);			/* Again, nothing is displayed if n>100. */
	if(rank==0)
	{
		for(i=0; i<n-1; i++)
			if(globalArray[i] > globalArray[i+1])
			{
				printf("List not sorted correctly.\n");
				break;
			}
		if(i==n-1) printf("List correctly sorted.\n");
	}


	/*
		Clear up and quit. As ever, each malloc() needs a free().
	*/
    free(bBucket);  // Also frees global array
    for(i=0; i<sTotal; i++) free(sBucket[i]);
    free(sBucket);

    free(sSize);
    free(tempArray);

	MPI_Finalize();
	return EXIT_SUCCESS;
}
Exemple #7
0
/*
 * Performs sparse matrix-vector multiplication.
 */
void
pdgsmv
(
 int_t  abs,               /* Input. Do abs(A)*abs(x). */
 SuperMatrix *A_internal,  /* Input. Matrix A permuted by columns.
			      The column indices are translated into
			      the relative positions in the gathered x-vector.
			      The type of A can be:
			      Stype = NR_loc; Dtype = SLU_D; Mtype = GE. */
 gridinfo_t *grid,         /* Input */
 pdgsmv_comm_t *gsmv_comm, /* Input. The data structure for communication. */
 double x[],       /* Input. The distributed source vector */
 double ax[]       /* Output. The distributed destination vector */
)
{
    NRformat_loc *Astore;
    int iam, procs;
    int_t i, j, p, m, m_loc, n, fst_row, jcol;
    int_t *colind, *rowptr;
    int   *SendCounts, *RecvCounts;
    int_t *ind_tosend, *ind_torecv, *ptr_ind_tosend, *ptr_ind_torecv;
    int_t *extern_start, TotalValSend;
    double *nzval, *val_tosend, *val_torecv;
    double zero = 0.0;
    MPI_Request *send_req, *recv_req;
    MPI_Status status;

#if ( DEBUGlevel>=1 )
    CHECK_MALLOC(grid->iam, "Enter pdgsmv()");
#endif

    /* ------------------------------------------------------------
       INITIALIZATION.
       ------------------------------------------------------------*/
    iam = grid->iam;
    procs = grid->nprow * grid->npcol;
    Astore = (NRformat_loc *) A_internal->Store;
    m = A_internal->nrow;
    n = A_internal->ncol;
    m_loc = Astore->m_loc;
    fst_row = Astore->fst_row;
    colind = Astore->colind;
    rowptr = Astore->rowptr;
    nzval = (double *) Astore->nzval;
    extern_start = gsmv_comm->extern_start;
    ind_torecv = gsmv_comm->ind_torecv;
    ptr_ind_tosend = gsmv_comm->ptr_ind_tosend;
    ptr_ind_torecv = gsmv_comm->ptr_ind_torecv;
    SendCounts = gsmv_comm->SendCounts;
    RecvCounts = gsmv_comm->RecvCounts;
    val_tosend = (double *) gsmv_comm->val_tosend;
    val_torecv = (double *) gsmv_comm->val_torecv;
    TotalValSend = gsmv_comm->TotalValSend;

    /* ------------------------------------------------------------
       COPY THE X VALUES INTO THE SEND BUFFER.
       ------------------------------------------------------------*/
    for (i = 0; i < TotalValSend; ++i) {
        j = ind_torecv[i] - fst_row; /* Relative index in x[] */
	val_tosend[i] = x[j];
    }

    /* ------------------------------------------------------------
       COMMUNICATE THE X VALUES.
       ------------------------------------------------------------*/
    if ( !(send_req = (MPI_Request *)
	   SUPERLU_MALLOC(2*procs *sizeof(MPI_Request))))
        ABORT("Malloc fails for recv_req[].");
    recv_req = send_req + procs;
    for (p = 0; p < procs; ++p) {
        if ( RecvCounts[p] ) {
	    MPI_Isend(&val_tosend[ptr_ind_torecv[p]], RecvCounts[p],
                      MPI_DOUBLE, p, iam,
                      grid->comm, &send_req[p]);
	}
	if ( SendCounts[p] ) {
	    MPI_Irecv(&val_torecv[ptr_ind_tosend[p]], SendCounts[p],
                      MPI_DOUBLE, p, p,
                      grid->comm, &recv_req[p]);
	}
    }
    
    /* ------------------------------------------------------------
       PERFORM THE ACTUAL MULTIPLICATION.
       ------------------------------------------------------------*/
    if ( abs ) { /* Perform abs(A)*abs(x) */
        /* Multiply the local part. */
        for (i = 0; i < m_loc; ++i) { /* Loop through each row */
	    ax[i] = 0.0;
	    for (j = rowptr[i]; j < extern_start[i]; ++j) {
	        jcol = colind[j];
		ax[i] += fabs(nzval[j]) * fabs(x[jcol]);
	    }
        }

        for (p = 0; p < procs; ++p) {
            if ( RecvCounts[p] ) MPI_Wait(&send_req[p], &status);
	    if ( SendCounts[p] ) MPI_Wait(&recv_req[p], &status);
        }

        /* Multiply the external part. */
        for (i = 0; i < m_loc; ++i) { /* Loop through each row */
	    for (j = extern_start[i]; j < rowptr[i+1]; ++j) {
	        jcol = colind[j];
	        ax[i] += fabs(nzval[j]) * fabs(val_torecv[jcol]);
	    }
	}
    } else {
        /* Multiply the local part. */
        for (i = 0; i < m_loc; ++i) { /* Loop through each row */
	    ax[i] = zero;
	    for (j = rowptr[i]; j < extern_start[i]; ++j) {
	        jcol = colind[j];
		ax[i] += nzval[j] * x[jcol];
	    }
        }

        for (p = 0; p < procs; ++p) {
            if ( RecvCounts[p] ) MPI_Wait(&send_req[p], &status);
	    if ( SendCounts[p] ) MPI_Wait(&recv_req[p], &status);
        }

        /* Multiply the external part. */
        for (i = 0; i < m_loc; ++i) { /* Loop through each row */
	    for (j = extern_start[i]; j < rowptr[i+1]; ++j) {
	        jcol = colind[j];
	        ax[i] += nzval[j] * val_torecv[jcol];
	    }
	}
    }

    SUPERLU_FREE(send_req);
#if ( DEBUGlevel>=1 )
    CHECK_MALLOC(iam, "Exit pdgsmv()");
#endif

} /* PDGSMV */
int main(int argc, char **argv){
	uint32_t datarows, datacolumns;
	uint32_t i, j, k;
	int world_size, world_rank, rc;


	//Check input arguments
	if (argc != 2) {
		fprintf(stderr,"USAGE: %s <input_filename>\n", argv[0]);
		exit(1);
	}

	// Start MPI
	rc = MPI_Init(&argc,&argv); 
	if (rc != MPI_SUCCESS) {
		printf ("Error starting MPI program. Terminating.\n"); MPI_Abort(MPI_COMM_WORLD, rc);
	}

	// Get world size (number of MPI processes) and world rank (# of this process)
	MPI_Comm_size(MPI_COMM_WORLD,&world_size);
	MPI_Comm_rank(MPI_COMM_WORLD,&world_rank);


	if (world_rank==0){
		// Declare variables used only on the root node
		int buf[world_size-1], nextReady;
		MPI_Request reqs[world_size-1];
		MPI_Status stats[world_size-1];

		// Print format of output 
		printf("Kv\tMbulk\tTliq\tTsatb\tTf\tTsat\tZrsat\tZrf\tFf\tSiO2\tZrbulk\tMZr\tTcryst\n");

		// Import 2-d source data array as a flat double array. Format:
		// SiO2, TiO2, Al2O3, Fe2O3, Cr2O3, FeO, MnO, MgO, NiO, CoO, CaO, Na2O, K2O, P2O5, CO2, H2O, Zr, Kv;
		double** const data = csvparse(argv[1],',', &datarows, &datacolumns);

		// Listen for task requests from the worker nodes
		for (i=1; i<world_size; i++){
			//        *buf, count, datatype, dest, tag, comm, *request
			MPI_Irecv(&buf[i-1], 1, MPI_INT, i, 0, MPI_COMM_WORLD, &reqs[i-1]);
		}

		// Once any worker asks for a new task, send next task to that worker and keep listening
		for (i=0; i<datarows; i++){
			MPI_Waitany(world_size-1, reqs, &nextReady, stats);
			//       *buf, count, datatype, dest, tag, comm
			MPI_Send(data[i], 18, MPI_DOUBLE, nextReady+1, 1, MPI_COMM_WORLD);
			//        *buf, count, datatype, source, tag, comm, *request
			MPI_Irecv(&buf[nextReady], 1, MPI_INT, nextReady+1, 0, MPI_COMM_WORLD, &reqs[nextReady]);
		}

		// Wait for all workers to complete, then send the stop signal
		MPI_Waitall(world_size-1, reqs, stats);	
		double stop[18] = {-1};
		for (i=1; i<world_size; i++){
			MPI_Send(&stop, 18, MPI_DOUBLE, i, 1, MPI_COMM_WORLD);	
		}
	}

	else {
		// Declare variables used only on the worker nodes
		MPI_Request sReq;
		MPI_Status sStat;
		double ic[18], Kd, iKd;
		FILE *fp;
//		char prefix[200], cmd_string[500];
		char* prefix = malloc(500*sizeof(char));
		char* cmd_string = malloc(1000*sizeof(char));

		// Simulation parameters
		/**********************************************************/
		// Version to run MELTS in (MELTS or pMELTS)
		const char version[]="pMELTS";
		// Melts mode (isobaric, ptpath, etc)
		const char mode[]="isobaric";

		// fO2 buffer to use (None, FMQ, etc.)
		const char fo2Buffer[]="FMQ";
		// fO2 offset from buffer
		double fo2Delta=1;

		// Initial temperature (Celcius)
		double Ti=1700;
		//Initial Pressure (bar)
		double Pi=600;
		//Temperature step size in each simulation
		const int deltaT=-10;
		// Pressure step size;
		const int deltaP=0;

		// Stop simulations at a given percent melt
		const double minPercentMelt=10;

		// Variables that control size and location of the simulation
		/***********************************************************/	
		// Location of scratch directory (ideally local scratch for each node)
		// This location may vary on your system - contact your sysadmin if unsure
//		const char scratchdir[]="/scratch/gpfs/cbkeller/";
		const char scratchdir[]="/scratch/";

		// Variables that determine how much memory to allocate to imported results
		const int maxMinerals=100, maxSteps=1700/abs(deltaT), maxColumns=50;
		/***********************************************************/


		// Malloc space for the imported melts array
		double **rawMatrix=mallocDoubleArray(maxMinerals*maxSteps,maxColumns);
		double ***melts=malloc(maxMinerals*sizeof(double**));
		char **names=malloc(maxMinerals*sizeof(char*));
		char ***elements=malloc(maxMinerals*sizeof(char**));
		int *meltsrows=malloc(maxMinerals*sizeof(int)), *meltscolumns=malloc(maxMinerals*sizeof(int));
		for (i=0; i<maxMinerals; i++){
			names[i]=malloc(30*sizeof(char));
			elements[i]=malloc(maxColumns*sizeof(char*));
			for (k=0; k<maxColumns; k++){
				elements[i][k]=malloc(30*sizeof(char));
			}
		}
		int minerals;


		//  Variables for finding saturation temperature
		int row, col, P, T, mass, SiO2, TiO2, Al2O3, Fe2O3, Cr2O3, FeO, MnO, MgO, NiO, CoO, CaO, Na2O, K2O, P2O5, CO2, H2O;
		int fspCaO, fspNa2O, fspK2O, oxideTiO2, oxideFe2O3, oxideFeO, oxideMnO;
		double M, Tf, Tsat, Tsatbulk, Ts, Tsmax, Zrf, Zrsat, MZr, MZrnow, Tcryst;
		double AnKd, AbKd, OrKd, IlmKd, MtKd;

		while (1) {
			// Ask root node for new task
			//       *buf, count, datatype, dest, tag, comm, *request
			MPI_Isend(&world_rank, 1, MPI_INT, 0, 0, MPI_COMM_WORLD, &sReq);
			//       *buf, count, datatype, source, tag, comm, *status
			MPI_Recv(&ic, 18, MPI_DOUBLE, 0, 1, MPI_COMM_WORLD, &sStat);

			// Exit loop if stop signal recieved
			if (ic[0]<0) break;

			//Configure working directory
			sprintf(prefix,"%sout%i_%.0f/", scratchdir, world_rank, ic[17]);
			sprintf(cmd_string,"mkdir -p %s", prefix);
			system(cmd_string);

//			//Set water
//			ic[15]=3.0;
//			//Set CO2
//			ic[14]=0.1;
			
			//Run MELTS
			runmelts(prefix,ic,version,"isobaric",fo2Buffer,fo2Delta,"1\nsc.melts\n10\n1\n3\n1\nliquid\n1\n0.99\n1\n10\n0\n4\n0\n","","!",Ti,Pi,deltaT,deltaP,0.005);

			// If simulation failed, clean up scratch directory and move on to next simulation
			sprintf(cmd_string,"%sPhase_main_tbl.txt", prefix);
			if ((fp = fopen(cmd_string, "r")) == NULL) {
				fprintf(stderr, "%s : MELTS equilibration failed to produce output.\n", prefix);
				sprintf(cmd_string,"rm -r %s", prefix);
				system(cmd_string);
				continue;
			}

			// Import results, if they exist. Format:
			// Pressure Temperature mass S H V Cp viscosity SiO2 TiO2 Al2O3 Fe2O3 Cr2O3 FeO MnO MgO NiO CoO CaO Na2O K2O P2O5 H2O
			minerals=maxMinerals;
			importmelts(maxSteps, maxColumns, prefix, melts, rawMatrix, meltsrows, meltscolumns, names, elements, &minerals);
			if (minerals<1 | strcmp(names[0],"liquid_0")!=0) {
				fprintf(stderr, "%s : MELTS equilibration failed to calculate liquid composition.\n", prefix);
				sprintf(cmd_string,"rm -r %s", prefix);
				system(cmd_string);
				continue;
			}
			// Can delete temp files after we've read them
			sprintf(cmd_string,"rm -r %s", prefix);
			system(cmd_string);


			// Find the columns containing useful elements
			for(col=0; col<meltscolumns[0]; col++){
				if (strcmp(elements[0][col], "Pressure")==0) P=col;
				else if (strcmp(elements[0][col], "Temperature")==0) T=col;
				else if (strcmp(elements[0][col], "mass")==0) mass=col;
				else if (strcmp(elements[0][col], "SiO2")==0) SiO2=col;
				else if (strcmp(elements[0][col], "TiO2")==0) TiO2=col;
				else if (strcmp(elements[0][col], "Al2O3")==0) Al2O3=col;
				else if (strcmp(elements[0][col], "Fe2O3")==0) Fe2O3=col;
				else if (strcmp(elements[0][col], "Cr2O3")==0) Cr2O3=col;
				else if (strcmp(elements[0][col], "FeO")==0) FeO=col;
				else if (strcmp(elements[0][col], "MnO")==0) MnO=col;
				else if (strcmp(elements[0][col], "MgO")==0) MgO=col;
				else if (strcmp(elements[0][col], "NiO")==0) NiO=col;
				else if (strcmp(elements[0][col], "CoO")==0) CoO=col;
				else if (strcmp(elements[0][col], "CaO")==0) CaO=col;
				else if (strcmp(elements[0][col], "Na2O")==0) Na2O=col;
				else if (strcmp(elements[0][col], "K2O")==0) K2O=col;
				else if (strcmp(elements[0][col], "P2O5")==0) P2O5=col;
				else if (strcmp(elements[0][col], "CO2")==0) CO2=col;
				else if (strcmp(elements[0][col], "H2O")==0) H2O=col;
			}

			// Find the columns containing useful elements for other minerals
			for (i=1; i<minerals; i++){
				if (strncasecmp(names[i],"feldspar",8)==0){
					for(col=0; col<meltscolumns[i]; col++){
						if (strcmp(elements[i][col], "CaO")==0) fspCaO=col;
						else if (strcmp(elements[i][col], "Na2O")==0) fspNa2O=col;
						else if (strcmp(elements[i][col], "K2O")==0) fspK2O=col;
					}

				} else if (strncasecmp(names[i],"rhm_oxide",9)==0){
					for(col=0; col<meltscolumns[i]; col++){
						if (strcmp(elements[i][col], "TiO2")==0) oxideTiO2=col;
						else if (strcmp(elements[i][col], "Fe2O3")==0) oxideFe2O3=col;
						else if (strcmp(elements[i][col], "FeO")==0) oxideFeO=col;
						else if (strcmp(elements[i][col], "MnO")==0) oxideMnO=col;
					}

				}
			}
			// Initial saturation state
			M = meltsM(&melts[0][0][SiO2]);
			Zrf = ic[16]; // Zirconium content in melt
			Tf = melts[0][0][T]; // Current temperature
			Zrsat = tzircZr(M, Tf); // Zirconium required for saturation
			Tsatbulk = tzirc(M, Zrf); // Temperature required for saturation

			// Calculate saturation temperature and minimum necessary zirconium content	
			Tsat=0;
			Tcryst=0;
			MZr=0;
			Tsmax = Tsatbulk;
			for(row=1; row<(meltsrows[0]-1); row++){
				// Calculate bulk zircon partition coefficient at present step
				Kd = 0;
				for (i=1; i<minerals; i++){
					// See what minerals might be crystallizing at this temperature
					// so we can find their GERM partition coefficients
					for (j=0; j<meltsrows[i]; j++){
						if (fabs(melts[0][row][T]-melts[i][j][T]) < 0.01){
							if (strncasecmp(names[i],"feldspar",8)==0){
								AnKd = getGERMKd("AnKdorthite","Zr",melts[0][row][SiO2]);
								AbKd = getGERMKd("Albite","Zr",melts[0][row][SiO2]);
								OrKd = getGERMKd("Orthoclase","Zr",melts[0][row][SiO2]);
								if (isnan(AnKd)) AnKd=0;
								if (isnan(OrKd)) OrKd=0;
								if (isnan(AbKd)) AbKd = (AnKd + OrKd)/2;

								iKd = (220.1298+56.18)/56.18*melts[i][j][fspCaO]/100 * AnKd\
								      +(228.2335+30.99)/30.99*melts[i][j][fspNa2O]/100 * AbKd\
								      +(228.2335+47.1)/47.1*melts[i][j][fspK2O]/100 * OrKd;

							} else if (strncasecmp(names[i],"rhm_oxide",9)==0){
								IlmKd = getGERMKd("Ilmenite","Zr",melts[0][row][SiO2]);
								MtKd = getGERMKd("Magnetite","Zr",melts[0][row][SiO2]);
								if (isnan(IlmKd)) IlmKd = 0;
								if (isnan(MtKd)) MtKd = 0;

								iKd = (melts[i][j][oxideTiO2]+melts[i][j][oxideMnO]+(melts[i][j][oxideTiO2]\
									*(71.8444/79.8768)-melts[i][j][oxideMnO]*(71.8444/70.9374)))/100 * AnKd\
								      + (1 - (melts[i][j][oxideTiO2]+melts[i][j][oxideMnO]+(melts[i][j][oxideTiO2]\
									*(71.8444/79.8768)-melts[i][j][oxideMnO]*(71.8444/70.9374)))/100) * MtKd;
							} else {
								iKd = getGERMKd(names[i],"Zr",melts[0][row][SiO2]);
							}

							if (isnan(iKd)){iKd = 0;}
							Kd += iKd * melts[i][j][mass];
						}
					}		
				}
				Kd = Kd / (100 - melts[0][row][mass]);

				//Calculate melt M and [Zr]
				M = meltsM(&melts[0][row][SiO2]);
				Zrf = ic[16]*100/(melts[0][row][mass] + Kd*(100-melts[0][row][mass])); // Zirconium content in melt
				Tf = melts[0][row][T]; // Current temperature
				Zrsat = tzircZr(M, Tf); // Zirconium required for saturation
				Ts = tzirc(M, Zrf); // Temperature required for saturation

				// Determine how much zircon is saturated
				if (Zrf>Zrsat){
					MZrnow = melts[0][row][mass]/100*(Zrf-Zrsat);
					if (MZr < MZrnow){
						Tcryst += (MZrnow - MZr)*melts[0][row][T];
						MZr = MZrnow;
					}
				}

				// Keep track of maximum saturation temperature
				if (Ts > Tsmax){
					Tsmax = Ts;
				}

				// Check if we've cooled below the saturation temperature yet
				if (Tsat==0 && Ts > melts[0][row][T]){
					Tsat = Ts;
				}
				// Stop when we get to maximum SiO2
				if (melts[0][row-1][SiO2]>(melts[0][row][SiO2])+0.01){
					row--;
					break;
				}

				// Or when remaining melt falls below minimum percent
				if (melts[0][row][mass]<minPercentMelt){
					row--;
					break;
				}
			}

			// If zircon never saturated, check what the best (highest) saturation temperature was
			if (Tsat==0 || MZr==0){
				Tsat = Tsmax;
				Tcryst = NAN;
			} else {
				Tcryst = Tcryst / MZr;
			}

			// Get back bulk M
			M = meltsM(&melts[0][0][SiO2]);
			// Print results. Format:
			// Kv, Mbulk, Tliquidus, Tsatbulk, Tf, Tsat, Zrsat, Zrf, Ff, SiO2, Zrbulk, MZr, Tcryst
			printf("%g\t%g\t%g\t%g\t%g\t%g\t%g\t%g\t%g\t%g\t%g\t%g\t%g\n", ic[17], M, melts[0][0][T], Tsatbulk, Tf, Tsat, Zrsat, Zrf, melts[0][row][mass], melts[0][0][SiO2], ic[16], MZr, Tcryst);
		}
	}
	MPI_Finalize();
	return 0;
}
Exemple #9
0
int main( int argc, char **argv )
 {
  int locId ;
  int data [i_ntotin] ;

  MPI_Init(&argc, &argv) ;
  MPI_Comm_rank(MPI_COMM_WORLD, &locId) ;

  if(locId == 0) {

    /* The server... */

    MPI_Status status[2] ;
    MPI_Request events [2] ;

    int eventId ;

    int dstId = 1 ;

    int i ;

    for(i = 0 ; i < i_ntotin ; i++)
      data [i] = i + 1 ;

    events [0] = MPI_REQUEST_NULL ;
    events [1] = MPI_REQUEST_NULL ;

    MPI_Isend(data, i_ntotin, MPI_INT, dstId, DAR,
              MPI_COMM_WORLD, events + 1) ;
        /* enable send of data */

    /*_begin_trace_code  */
    /* printf("locId = %d: MPI_Isend(%x, %d, %x, %d, %d, %x, %x)\n",
      locId, data, i_ntotin, MPI_INT, dstId, DAR, MPI_COMM_WORLD, events [1]); 
      */
    /*_end_trace_code  */

    /*_begin_trace_code  */
    /* printf("locId = %d: MPI_Waitany(%d, [%x, %x], %x %x)...",
      locId, 2, events [0], events [1], &eventId, &status) ; */
    /*_end_trace_code  */

    MPI_Waitany(2, events, &eventId, status) ;

    /*_begin_trace_code  */
    printf("done.  eventId = %d\n", eventId) ;
    /*_end_trace_code  */
  }

  if(locId == 1) {

    /* The Client...  */

    MPI_Status status ;

    int srcId = MPI_ANY_SOURCE ;

    /*_begin_trace_code  */
    /*
    printf("locId = %d: MPI_Recv(%x, %d, %x, %d, %d, %x, %x)...",
      locId, data, i_ntotin, MPI_INT, srcId, DAR, MPI_COMM_WORLD, &status) ;
      */
    /*_end_trace_code  */

    MPI_Recv(data, i_ntotin, MPI_INT, srcId, DAR,
             MPI_COMM_WORLD, &status) ;

    /*_begin_trace_code  */
    /*printf("done.\n") ;*/
    /*_end_trace_code  */

    /*
    printf("locId = %d: data [0] = %d, data [%d] = %d\n",
      locId, data [0], i_ntotin - 1, data [i_ntotin - 1]) ;
       */
  }

  MPI_Barrier( MPI_COMM_WORLD );
  if (locId == 0)
      printf( "Test complete\n" );
  MPI_Finalize() ;
  return 0;
}
void peanoclaw::records::RepositoryStatePacked::send(int destination, int tag, bool exchangeOnlyAttributesMarkedWithParallelise, bool communicateBlocking) {
   _senderDestinationRank = destination;
   
   if (communicateBlocking) {
   
      const int result = MPI_Send(this, 1, exchangeOnlyAttributesMarkedWithParallelise ? Datatype : FullDatatype, destination, tag, tarch::parallel::Node::getInstance().getCommunicator());
      if  (result!=MPI_SUCCESS) {
         std::ostringstream msg;
         msg << "was not able to send message peanoclaw::records::RepositoryStatePacked "
         << toString()
         << " to node " << destination
         << ": " << tarch::parallel::MPIReturnValueToString(result);
         _log.error( "send(int)",msg.str() );
      }
      
   }
   else {
   
   MPI_Request* sendRequestHandle = new MPI_Request();
   MPI_Status   status;
   int          flag = 0;
   int          result;
   
   clock_t      timeOutWarning   = -1;
   clock_t      timeOutShutdown  = -1;
   bool         triggeredTimeoutWarning = false;
   
   if (exchangeOnlyAttributesMarkedWithParallelise) {
      result = MPI_Isend(
         this, 1, Datatype, destination,
         tag, tarch::parallel::Node::getInstance().getCommunicator(),
         sendRequestHandle
      );
      
   }
   else {
      result = MPI_Isend(
         this, 1, FullDatatype, destination,
         tag, tarch::parallel::Node::getInstance().getCommunicator(),
         sendRequestHandle
      );
      
   }
   if  (result!=MPI_SUCCESS) {
      std::ostringstream msg;
      msg << "was not able to send message peanoclaw::records::RepositoryStatePacked "
      << toString()
      << " to node " << destination
      << ": " << tarch::parallel::MPIReturnValueToString(result);
      _log.error( "send(int)",msg.str() );
   }
   result = MPI_Test( sendRequestHandle, &flag, &status );
   while (!flag) {
      if (timeOutWarning==-1)   timeOutWarning   = tarch::parallel::Node::getInstance().getDeadlockWarningTimeStamp();
      if (timeOutShutdown==-1)  timeOutShutdown  = tarch::parallel::Node::getInstance().getDeadlockTimeOutTimeStamp();
      result = MPI_Test( sendRequestHandle, &flag, &status );
      if (result!=MPI_SUCCESS) {
         std::ostringstream msg;
         msg << "testing for finished send task for peanoclaw::records::RepositoryStatePacked "
         << toString()
         << " sent to node " << destination
         << " failed: " << tarch::parallel::MPIReturnValueToString(result);
         _log.error("send(int)", msg.str() );
      }
      
      // deadlock aspect
      if (
         tarch::parallel::Node::getInstance().isTimeOutWarningEnabled() &&
         (clock()>timeOutWarning) &&
         (!triggeredTimeoutWarning)
      ) {
         tarch::parallel::Node::getInstance().writeTimeOutWarning(
         "peanoclaw::records::RepositoryStatePacked",
         "send(int)", destination,tag,1
         );
         triggeredTimeoutWarning = true;
      }
      if (
         tarch::parallel::Node::getInstance().isTimeOutDeadlockEnabled() &&
         (clock()>timeOutShutdown)
      ) {
         tarch::parallel::Node::getInstance().triggerDeadlockTimeOut(
         "peanoclaw::records::RepositoryStatePacked",
         "send(int)", destination,tag,1
         );
      }
      tarch::parallel::Node::getInstance().receiveDanglingMessages();
   }
   
   delete sendRequestHandle;
   #ifdef Debug
   _log.debug("send(int,int)", "sent " + toString() );
   #endif
   
}

}
Exemple #11
0
    void connection_handler::handle_messages()
    {
        detail::handling_messages hm(handling_messages_);       // reset on exit

        bool bootstrapping = hpx::is_starting();
        bool has_work = true;
        std::size_t k = 0;

        hpx::util::high_resolution_timer t;
        std::list<std::pair<int, MPI_Request> > close_requests;

        // We let the message handling loop spin for another 2 seconds to avoid the
        // costs involved with posting it to asio
        while(bootstrapping || has_work || (!has_work && t.elapsed() < 2.0))
        {
            if(stopped_) break;

            // break the loop if someone requested to pause the parcelport
            if(!enable_parcel_handling_) break;

            // handle all send requests
            {
                hpx::lcos::local::spinlock::scoped_lock l(senders_mtx_);
                for(
                    senders_type::iterator it = senders_.begin();
                    !stopped_ && enable_parcel_handling_ && it != senders_.end();
                    /**/)
                {
                    if((*it)->done())
                    {
                        it = senders_.erase(it);
                    }
                    else
                    {
                        ++it;
                    }
                }
                has_work = !senders_.empty();
            }

            // Send the pending close requests
            {
                hpx::lcos::local::spinlock::scoped_lock l(close_mtx_);
                typedef std::pair<int, int> pair_type;

                BOOST_FOREACH(pair_type p, pending_close_requests_)
                {
                    header close_request = header::close(p.first, p.second);
                    close_requests.push_back(std::make_pair(p.first, MPI_Request()));
                    MPI_Isend(
                        close_request.data(),         // Data pointer
                        close_request.data_size_,     // Size
                        close_request.type(),         // MPI Datatype
                        close_request.rank(),         // Destination
                        0,                            // Tag
                        communicator_,                // Communicator
                        &close_requests.back().second
                    );
                }
                pending_close_requests_.clear();
            }

            // add new receive requests
            std::pair<bool, header> next(acceptor_.next_header());
            if(next.first)
            {
                boost::shared_ptr<receiver> rcv;
                header h = next.second;

                receivers_tag_map_type & tag_map = receivers_map_[h.rank()];

                receivers_tag_map_type::iterator jt = tag_map.find(h.tag());

                if(jt != tag_map.end())
                {
                    rcv = jt->second;
                }
                else
                {
                    rcv = boost::make_shared<receiver>(
                        communicator_
                      , get_next_tag()
                      , h.tag()
                      , h.rank()
                      , *this);
                    tag_map.insert(std::make_pair(h.tag(), rcv));
                }

                if(h.close_request())
                {
                    rcv->close();
                }
                else
                {
                    h.assert_valid();
                    if (static_cast<std::size_t>(h.size()) > this->get_max_message_size())
                    {
                        // report this problem ...
                        HPX_THROW_EXCEPTION(boost::asio::error::operation_not_supported,
                            "mpi::connection_handler::handle_messages",
                            "The size of this message exceeds the maximum inbound data size");
                        return;
                    }
                    if(rcv->async_read(h))
                    {
#ifdef HPX_DEBUG
                        receivers_type::iterator it = std::find(receivers_.begin(), receivers_.end(), rcv);
                        HPX_ASSERT(it == receivers_.end());

#endif
                        receivers_.push_back(rcv);
                    }
                }
            }

            // handle all receive requests
            for(receivers_type::iterator it = receivers_.begin();
                it != receivers_.end(); /**/)
            {
                boost::shared_ptr<receiver> rcv = *it;
                if(rcv->done())
                {
                    HPX_ASSERT(rcv->sender_tag() != -1);
                    if(rcv->closing())
                    {
                        receivers_tag_map_type & tag_map = receivers_map_[rcv->rank()];

                        receivers_tag_map_type::iterator jt = tag_map.find(rcv->sender_tag());
                        HPX_ASSERT(jt != tag_map.end());
                        tag_map.erase(jt);
                        {
                            hpx::lcos::local::spinlock::scoped_lock l(tag_mtx_);
                            free_tags_.push_back(rcv->tag());
                        }
                    }
                    it = receivers_.erase(it);
                }
                else
                {
                    ++it;
                }
            }
            if(!has_work) has_work = !receivers_.empty();

            // handle completed close requests
            for(
                std::list<std::pair<int, MPI_Request> >::iterator it = close_requests.begin();
                !stopped_ && enable_parcel_handling_ && it != close_requests.end();
            )
            {
                int completed = 0;
                MPI_Status status;
                int ret = 0;
                ret = MPI_Test(&it->second, &completed, &status);
                HPX_ASSERT(ret == MPI_SUCCESS);
                if(completed && status.MPI_ERROR != MPI_ERR_PENDING)
                {
                    hpx::lcos::local::spinlock::scoped_lock l(tag_mtx_);
                    free_tags_.push_back(it->first);
                    it = close_requests.erase(it);
                }
                else
                {
                    ++it;
                }
            }
            if(!has_work)
                has_work = !close_requests.empty();

            if (bootstrapping)
                bootstrapping = hpx::is_starting();

            if(has_work)
            {
                t.restart();
                k = 0;
            }
            else
            {
                if(enable_parcel_handling_)
                {
                    hpx::lcos::local::spinlock::yield(k);
                    ++k;
                }
            }
        }
int main(int argc, char **argv)
{
	int myRank;
	int pNum;
	double start_time, end_time;
	double *matrix;
	MPI_Status stat;
	MPI_Request req1[300], req2[300];


	MPI_Init(&argc, &argv);
	start_time = MPI_Wtime();

	MPI_Comm_rank(MPI_COMM_WORLD, &myRank);
	MPI_Comm_size(MPI_COMM_WORLD, &pNum);

	if (myRank == 0)
	{
		double buf[N+5];
		while(1)
		{
			double diff;
			int flag = 0;
			for(int i = 1;i < pNum;i++)
			{
				MPI_Recv(&diff, 1, MPI_DOUBLE, i, MPI_ANY_TAG, MPI_COMM_WORLD, &stat);
				if (diff > ext)
					flag = 1;
			}
			MPI_Bcast(&flag, 1, MPI_INT, 0, MPI_COMM_WORLD);
			if (flag == 0)
				break;
		}
	}
	else
	{
		// init calculate model
		int local_size = N / (pNum - 1) + 2;
		if (myRank == pNum - 1)
			local_size = N - (local_size - 2) * (pNum - 2) + 2;
		
		//printf("local size: %d\n", local_size);
		double temp[local_size][N + 2], temp2[local_size][N + 2];
		for(int i = 1;i < local_size - 1;i++)
		{
			for(int j = 1;j < N + 1;j++)
				temp[i][j] = (int)(random())% 1000;
			temp[i][0] = temp[i][N + 1] = 0;
		}	
		for(int j = 0;j < N + 2;j++)
			temp[0][j] = temp[local_size - 1][j] = 0;

		double maxDiff = ext + 1;
		while(1)
		{
			maxDiff = ext;
			// pass value
			int sendNum = 0, recNum = 0;
			if (myRank != 1)
				MPI_Isend(temp[1], N + 2, MPI_DOUBLE, myRank - 1, 0, MPI_COMM_WORLD, &req1[sendNum++]);
			if (myRank != pNum - 1)
				MPI_Isend(temp[local_size - 2], N + 2, MPI_DOUBLE, myRank + 1, 0, MPI_COMM_WORLD, &req1[sendNum++]);
			double preBuf[N], nextBuf[N];
			if (myRank != 1)
			{
				MPI_Irecv(temp[0], N + 2, MPI_DOUBLE, myRank - 1, MPI_ANY_TAG, MPI_COMM_WORLD, &req2[recNum++]);
				//memcpy(temp[0], preBuf, N + 2);
			}
			if (myRank != pNum - 1)
			{
				MPI_Irecv(temp[local_size - 1], N + 2, MPI_DOUBLE, myRank + 1, MPI_ANY_TAG, MPI_COMM_WORLD, &req2[recNum++]);
				//memcpy(temp[local_size - 1], nextBuf, N + 2);
			}
			//calculate
			for(int i = 1;i < local_size - 1;i++)
				for(int j = 1;j <= N;j++)
				{
					temp2[i][j] = (temp[i - 1][j] + temp[i + 1][j] + temp[i][j - 1] + temp[i][j + 1] + temp[i][j]) / 5;  
					if (fabs(temp2[i][j] - temp[i][j]) > maxDiff)
						maxDiff = fabs(temp2[i][j] - temp[i][j]);
				}

			for(int i = 0;i < recNum;i++)
				MPI_Wait(&req2[i], &stat);
//			printf("id:%d diff %lf localSize %d\n", myRank, maxDiff, local_size);
			MPI_Send(&maxDiff, 1, MPI_DOUBLE, 0, 0, MPI_COMM_WORLD);
			int flag;
			MPI_Bcast(&flag, 1, MPI_INT, 0, MPI_COMM_WORLD);
			for(int i = 1;i < local_size - 1;i++)
				for(int j = 1;j <= N;j++)
					temp[i][j] = temp2[i][j];
			if (flag == 0)
				break;
		//	printf("rank:%d ok diff %lf\n", myRank, maxDiff);
		}
		//for(int j = 1;j < local_size - 1;j++)
		//	MPI_Send(&temp[i][1], N, MPI_DOUBLE, 0, myRank, MPI_COMM_WORLD, &stat);
	}
	end_time = MPI_Wtime();
	printf("rank: %d, runtime is %fs\n", myRank, end_time - start_time);
	MPI_Finalize();
	return 0;
}
void QCDDopr_Mult(QCDSpinor* pV,QCDMatrix* pU,QCDSpinor* pW,double k)
{
	MPI_Request reqSend[8];
	MPI_Request reqRecv[8];
	MPI_Status st;
	QCDMatrix* pUx;
	QCDMatrix* pUy;
	QCDMatrix* pUz;
	QCDMatrix* pUt;
	int i;

	qcdtKappa[0] = k;
	qcdtKappa[1] = k;
	qcdtKappa[2] = k;
	qcdtKappa[3] = k;

	pUx = pU;
	pUy = pU + qcdNsite;
	pUz = pU + qcdNsite*2;
	pUt = pU + qcdNsite*3;

/* #pragma omp parallel num_threads(8) */
#pragma omp parallel
	{
	int tid = 0,nid = 1;

	tid = omp_get_thread_num();
	nid = omp_get_num_threads();

	/* //debug */
	/* printf("nthreads: %d\n", nid); */
	/* printf("max_threads: %d\n", omp_get_max_threads()); */

	if(tid == 0){
		MPI_Irecv(qcdRecvBuf[QCD_TP],12*qcdNxyz,MPI_DOUBLE_PRECISION,qcdRankNeighbors[QCD_TP],QCD_TP,MPI_COMM_WORLD,&reqRecv[QCD_TP]);
		MPI_Irecv(qcdRecvBuf[QCD_TM],12*qcdNxyz,MPI_DOUBLE_PRECISION,qcdRankNeighbors[QCD_TM],QCD_TM,MPI_COMM_WORLD,&reqRecv[QCD_TM]);

		MPI_Irecv(qcdRecvBuf[QCD_XP],12*qcdNy*qcdNz*qcdNt,MPI_DOUBLE_PRECISION,qcdRankNeighbors[QCD_XP],QCD_XP,MPI_COMM_WORLD,&reqRecv[QCD_XP]);
		MPI_Irecv(qcdRecvBuf[QCD_XM],12*qcdNy*qcdNz*qcdNt,MPI_DOUBLE_PRECISION,qcdRankNeighbors[QCD_XM],QCD_XM,MPI_COMM_WORLD,&reqRecv[QCD_XM]);

		MPI_Irecv(qcdRecvBuf[QCD_YP],12*qcdNx*qcdNz*qcdNt,MPI_DOUBLE_PRECISION,qcdRankNeighbors[QCD_YP],QCD_YP,MPI_COMM_WORLD,&reqRecv[QCD_YP]);
		MPI_Irecv(qcdRecvBuf[QCD_YM],12*qcdNx*qcdNz*qcdNt,MPI_DOUBLE_PRECISION,qcdRankNeighbors[QCD_YM],QCD_YM,MPI_COMM_WORLD,&reqRecv[QCD_YM]);

		MPI_Irecv(qcdRecvBuf[QCD_ZP],12*qcdNx*qcdNy*qcdNt,MPI_DOUBLE_PRECISION,qcdRankNeighbors[QCD_ZP],QCD_ZP,MPI_COMM_WORLD,&reqRecv[QCD_ZP]);
		MPI_Irecv(qcdRecvBuf[QCD_ZM],12*qcdNx*qcdNy*qcdNt,MPI_DOUBLE_PRECISION,qcdRankNeighbors[QCD_ZM],QCD_ZM,MPI_COMM_WORLD,&reqRecv[QCD_ZM]);
	}

	//Send T
	QCDDopr_MakeTPB_dirac(qcdSendBuf[QCD_TP],pW,tid,nid);
#pragma omp barrier
	if(tid == 0){
		MPI_Isend(qcdSendBuf[QCD_TP],12*qcdNxyz,MPI_DOUBLE_PRECISION,qcdRankNeighbors[QCD_TM],QCD_TP,MPI_COMM_WORLD,&reqSend[QCD_TP]);
	}

	QCDDopr_MakeTMB_dirac(qcdSendBuf[QCD_TM],pUt + qcdNsite-qcdNxyz,pW + qcdNsite-qcdNxyz,tid,nid);
#pragma omp barrier
	if(tid == 0){
		MPI_Isend(qcdSendBuf[QCD_TM],12*qcdNxyz,MPI_DOUBLE_PRECISION,qcdRankNeighbors[QCD_TP],QCD_TM,MPI_COMM_WORLD,&reqSend[QCD_TM]);
	}

	//Send X
	QCDDopr_MakeXPB(qcdSendBuf[QCD_XP],pW,tid,nid);
#pragma omp barrier
	if(tid == 0){
		MPI_Isend(qcdSendBuf[QCD_XP],12*qcdNy*qcdNz*qcdNt,MPI_DOUBLE_PRECISION,qcdRankNeighbors[QCD_XM],QCD_XP,MPI_COMM_WORLD,&reqSend[QCD_XP]);
	}

	QCDDopr_MakeXMB(qcdSendBuf[QCD_XM],pUx + qcdNx-1,pW + qcdNx-1,tid,nid);
#pragma omp barrier
	if(tid == 0){
		MPI_Isend(qcdSendBuf[QCD_XM],12*qcdNy*qcdNz*qcdNt,MPI_DOUBLE_PRECISION,qcdRankNeighbors[QCD_XP],QCD_XM,MPI_COMM_WORLD,&reqSend[QCD_XM]);
	}


	//Send Y
	QCDDopr_MakeYPB(qcdSendBuf[QCD_YP],pW,tid,nid);
#pragma omp barrier
	if(tid == 0){
		MPI_Isend(qcdSendBuf[QCD_YP],12*qcdNx*qcdNz*qcdNt,MPI_DOUBLE_PRECISION,qcdRankNeighbors[QCD_YM],QCD_YP,MPI_COMM_WORLD,&reqSend[QCD_YP]);
	}

	QCDDopr_MakeYMB(qcdSendBuf[QCD_YM],pUy + qcdNxy-qcdNx,pW + qcdNxy-qcdNx,tid,nid);
#pragma omp barrier
	if(tid == 0){
		MPI_Isend(qcdSendBuf[QCD_YM],12*qcdNx*qcdNz*qcdNt,MPI_DOUBLE_PRECISION,qcdRankNeighbors[QCD_YP],QCD_YM,MPI_COMM_WORLD,&reqSend[QCD_YM]);
	}

	//Send Z
	QCDDopr_MakeZPB(qcdSendBuf[QCD_ZP],pW,tid,nid);
#pragma omp barrier
	if(tid == 0){
		MPI_Isend(qcdSendBuf[QCD_ZP],12*qcdNx*qcdNy*qcdNt,MPI_DOUBLE_PRECISION,qcdRankNeighbors[QCD_ZM],QCD_ZP,MPI_COMM_WORLD,&reqSend[QCD_ZP]);
	}

	QCDDopr_MakeZMB(qcdSendBuf[QCD_ZM],pUz + qcdNxyz-qcdNxy,pW + qcdNxyz-qcdNxy,tid,nid);
#pragma omp barrier
	if(tid == 0){
		MPI_Isend(qcdSendBuf[QCD_ZM],12*qcdNx*qcdNy*qcdNt,MPI_DOUBLE_PRECISION,qcdRankNeighbors[QCD_ZP],QCD_ZM,MPI_COMM_WORLD,&reqSend[QCD_ZM]);
	}

	QCDLA_Equate(pV + tid*qcdNsite/nid,pW + tid*qcdNsite/nid, (tid+1)*qcdNsite/nid - tid*qcdNsite/nid);
#pragma omp barrier

	QCDDopr_TPin_dirac(pV,pUt,pW + qcdNxyz,tid,nid);
#pragma omp barrier
	QCDDopr_TMin_dirac(pV,pUt-qcdNxyz,pW - qcdNxyz,tid,nid);
#pragma omp barrier
	QCDDopr_XPin(pV,pUx,pW+1,tid,nid);
#pragma omp barrier
	QCDDopr_XMin(pV,pUx-1,pW-1,tid,nid);
#pragma omp barrier

	QCDDopr_YPin(pV,pUy,pW + qcdNx,tid,nid);
#pragma omp barrier
	QCDDopr_YMin(pV,pUy-qcdNx,pW - qcdNx,tid,nid);
#pragma omp barrier
	QCDDopr_ZPin(pV,pUz,pW + qcdNxy,tid,nid);
#pragma omp barrier
	QCDDopr_ZMin(pV,pUz-qcdNxy,pW - qcdNxy,tid,nid);

	if(tid == 0){
		MPI_Wait(&reqRecv[QCD_TP],&st);
	}
#pragma omp barrier
	QCDDopr_SetTPBnd_dirac(pV,pUt,qcdRecvBuf[QCD_TP],tid,nid);
	if(tid == 0){
		MPI_Wait(&reqRecv[QCD_TM],&st);
	}
#pragma omp barrier
	QCDDopr_SetTMBnd_dirac(pV,qcdRecvBuf[QCD_TM],tid,nid);

	if(tid == 0){
		MPI_Wait(&reqRecv[QCD_XP],&st);
	}
#pragma omp barrier
	QCDDopr_SetXPBnd(pV,pUx,qcdRecvBuf[QCD_XP],tid,nid);
	if(tid == 0){
		MPI_Wait(&reqRecv[QCD_XM],&st);
	}
#pragma omp barrier
	QCDDopr_SetXMBnd(pV,qcdRecvBuf[QCD_XM],tid,nid);

	if(tid == 0){
		MPI_Wait(&reqRecv[QCD_YP],&st);
	}
#pragma omp barrier
	QCDDopr_SetYPBnd(pV,pUy,qcdRecvBuf[QCD_YP],tid,nid);
	if(tid == 0){
		MPI_Wait(&reqRecv[QCD_YM],&st);
	}
#pragma omp barrier
	QCDDopr_SetYMBnd(pV,qcdRecvBuf[QCD_YM],tid,nid);

	if(tid == 0){
		MPI_Wait(&reqRecv[QCD_ZP],&st);
	}
#pragma omp barrier
	QCDDopr_SetZPBnd(pV,pUz,qcdRecvBuf[QCD_ZP],tid,nid);
	if(tid == 0){
		MPI_Wait(&reqRecv[QCD_ZM],&st);
	}
#pragma omp barrier
	QCDDopr_SetZMBnd(pV,qcdRecvBuf[QCD_ZM],tid,nid);

	if(tid == 0){
		MPI_Wait(&reqSend[QCD_TP],&st);
		MPI_Wait(&reqSend[QCD_TM],&st);
		MPI_Wait(&reqSend[QCD_XP],&st);
		MPI_Wait(&reqSend[QCD_XM],&st);
		MPI_Wait(&reqSend[QCD_YP],&st);
		MPI_Wait(&reqSend[QCD_YM],&st);
		MPI_Wait(&reqSend[QCD_ZP],&st);
		MPI_Wait(&reqSend[QCD_ZM],&st);
	}
#pragma omp barrier

	}
}
double GetResRoot(double *phi, double *b,  param_t p)
{
   int x,y;
   
   //true residue
   double residue;
   double ResRoot = 0.0;
   double Bmag = 0.0;
   
   double ResRoot_global = 0.0;
   double Bmag_global = 0.0;
   
   // A little trick to index phi normally.
   double* phi_s = phi + p.L; 
   
   // Prepare for async send/recv
   MPI_Request request[4];
   int requests;
   MPI_Status status[4];
   
   requests = 0;
   
   // Send the higher-memory component to the next rank.
   MPI_Isend(phi_s + p.L*(p.y-1), p.L, MPI_DOUBLE, 
               (p.my_rank+1)%p.world_size, 1, 
               MPI_COMM_WORLD, request + requests++);
   MPI_Irecv(phi_s - p.L, p.L, MPI_DOUBLE,
                (p.my_rank+p.world_size-1)%p.world_size, 1,
                MPI_COMM_WORLD, request + requests++);
   
   
   // Send the lower-memory component to the previous rank.
   MPI_Isend(phi_s, p.L, MPI_DOUBLE, 
               (p.my_rank+p.world_size-1)%p.world_size, 0, 
               MPI_COMM_WORLD, request + requests++);
   MPI_Irecv(phi_s + p.L*p.y, p.L, MPI_DOUBLE,
                (p.my_rank+1)%p.world_size, 0,
                MPI_COMM_WORLD, request + requests++);
   
   // Do some other work while we wait! 
   // Update everything that doesn't depend on buffers. 
   
   for(x = 0; x < p.L; x++)
   {
      for(y = 1; y < p.y-1; y++)
      {
         residue = p.scale* b[x + y*p.L]
                     - phi_s[x + y*p.L]  
                     + p.scale*(phi_s[(x+1)%p.L + y*p.L] + phi_s[(x-1+p.L)%p.L + y*p.L] 
                                 + phi_s[x + (y+1)*p.L]  + phi_s[x + (y-1)*p.L]);
         
         ResRoot += residue*residue;
         Bmag += b[x + y*p.L]*b[x + y*p.L];
      }
   }
   
   // Wait, if sync hasn't finished.
   MPI_Waitall ( requests, request, status );
   
   // Update the rest of the cells.
   for(x = 0; x < p.L; x++)
   {
      y = 0;
      residue = p.scale* b[x + y*p.L]
                  - phi_s[x + y*p.L]  
                  + p.scale*(phi_s[(x+1)%p.L + y*p.L] + phi_s[(x-1+p.L)%p.L + y*p.L] 
                              + phi_s[x + (y+1)*p.L]  + phi_s[x + (y-1)*p.L]);

      ResRoot += residue*residue;
      Bmag += b[x + y*p.L]*b[x + y*p.L];
      
      y = p.y-1;
      residue = p.scale* b[x + y*p.L]
                  - phi_s[x + y*p.L]  
                  + p.scale*(phi_s[(x+1)%p.L + y*p.L] + phi_s[(x-1+p.L)%p.L + y*p.L] 
                              + phi_s[x + (y+1)*p.L]  + phi_s[x + (y-1)*p.L]);

      ResRoot += residue*residue;
      Bmag += b[x + y*p.L]*b[x + y*p.L];
   }
   
   MPI_Allreduce(&Bmag, &Bmag_global, 1, MPI_DOUBLE, MPI_SUM, MPI_COMM_WORLD); 
   MPI_Allreduce(&ResRoot, &ResRoot_global, 1, MPI_DOUBLE, MPI_SUM, MPI_COMM_WORLD); 
   
   
   // Normalized true residue
   return sqrt(ResRoot_global)/sqrt(Bmag_global);    
}
int
main (int argc, char **argv)
{
  int nprocs = -1;
  int rank = -1;
  char processor_name[128];
  int namelen = 128;
  int buf0[buf_size];
  int buf1[buf_size];
  MPI_Status statuses[2];
  MPI_Request reqs[2];

  /* init */
  MPI_Init (&argc, &argv);
  MPI_Comm_size (MPI_COMM_WORLD, &nprocs);
  MPI_Comm_rank (MPI_COMM_WORLD, &rank);
  MPI_Get_processor_name (processor_name, &namelen);
  printf ("(%d) is alive on %s\n", rank, processor_name);
  fflush (stdout);

  MPI_Barrier (MPI_COMM_WORLD);

  /* this code is very similar to no-error-waitall-any_src.c */
  /* but deadlocks since task 2's send and recv are inverted... */
  if (nprocs < 3)
    {
      printf ("not enough tasks\n");
    }
  else if (rank == 0)
    {
      MPI_Irecv (buf0, buf_size, MPI_INT, 1, 0, MPI_COMM_WORLD, &reqs[0]);

      MPI_Irecv (buf1, buf_size, MPI_INT, 1, 0, MPI_COMM_WORLD, &reqs[1]);

      MPI_Waitall (2, reqs, statuses);

      MPI_Send (buf1, buf_size, MPI_INT, 1, 1, MPI_COMM_WORLD);
    }
  else if (rank == 1)
    {
      memset (buf0, 0, buf_size);

      MPI_Isend (buf0, buf_size, MPI_INT, 0, 0, MPI_COMM_WORLD, &reqs[0]);

      MPI_Isend (buf0, buf_size, MPI_INT, 2, 1, MPI_COMM_WORLD, &reqs[1]);

      MPI_Waitall (2, reqs, statuses);

      MPI_Recv (buf1, buf_size, MPI_INT, 0, 1, MPI_COMM_WORLD, statuses);

      MPI_Send (buf0, buf_size, MPI_INT, 0, 0, MPI_COMM_WORLD);
    }
  else if (rank == 2)
    {
      MPI_Recv (buf1, buf_size, MPI_INT, 1, 1, MPI_COMM_WORLD, statuses);
    }

  MPI_Barrier (MPI_COMM_WORLD);

  MPI_Finalize ();
  printf ("(%d) Finished normally\n", rank);
}
Exemple #16
0
PetscErrorCode MatGetSubMatrices_MPIDense_Local(Mat C,PetscInt ismax,const IS isrow[],const IS iscol[],MatReuse scall,Mat *submats)
{
  Mat_MPIDense   *c = (Mat_MPIDense*)C->data;
  Mat            A  = c->A;
  Mat_SeqDense   *a = (Mat_SeqDense*)A->data,*mat;
  PetscErrorCode ierr;
  PetscMPIInt    rank,size,tag0,tag1,idex,end,i;
  PetscInt       N = C->cmap->N,rstart = C->rmap->rstart,count;
  const PetscInt **irow,**icol,*irow_i;
  PetscInt       *nrow,*ncol,*w1,*w3,*w4,*rtable,start;
  PetscInt       **sbuf1,m,j,k,l,ct1,**rbuf1,row,proc;
  PetscInt       nrqs,msz,**ptr,*ctr,*pa,*tmp,bsz,nrqr;
  PetscInt       is_no,jmax,**rmap,*rmap_i;
  PetscInt       ctr_j,*sbuf1_j,*rbuf1_i;
  MPI_Request    *s_waits1,*r_waits1,*s_waits2,*r_waits2;
  MPI_Status     *r_status1,*r_status2,*s_status1,*s_status2;
  MPI_Comm       comm;
  PetscScalar    **rbuf2,**sbuf2;
  PetscBool      sorted;

  PetscFunctionBegin;
  ierr = PetscObjectGetComm((PetscObject)C,&comm);CHKERRQ(ierr);
  tag0 = ((PetscObject)C)->tag;
  size = c->size;
  rank = c->rank;
  m    = C->rmap->N;

  /* Get some new tags to keep the communication clean */
  ierr = PetscObjectGetNewTag((PetscObject)C,&tag1);CHKERRQ(ierr);

  /* Check if the col indices are sorted */
  for (i=0; i<ismax; i++) {
    ierr = ISSorted(isrow[i],&sorted);CHKERRQ(ierr);
    if (!sorted) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE,"ISrow is not sorted");
    ierr = ISSorted(iscol[i],&sorted);CHKERRQ(ierr);
    if (!sorted) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE,"IScol is not sorted");
  }

  ierr = PetscMalloc5(ismax,const PetscInt*,&irow,ismax,const PetscInt*,&icol,ismax,PetscInt,&nrow,ismax,PetscInt,&ncol,m,PetscInt,&rtable);CHKERRQ(ierr);
  for (i=0; i<ismax; i++) {
    ierr = ISGetIndices(isrow[i],&irow[i]);CHKERRQ(ierr);
    ierr = ISGetIndices(iscol[i],&icol[i]);CHKERRQ(ierr);
    ierr = ISGetLocalSize(isrow[i],&nrow[i]);CHKERRQ(ierr);
    ierr = ISGetLocalSize(iscol[i],&ncol[i]);CHKERRQ(ierr);
  }

  /* Create hash table for the mapping :row -> proc*/
  for (i=0,j=0; i<size; i++) {
    jmax = C->rmap->range[i+1];
    for (; j<jmax; j++) rtable[j] = i;
  }

  /* evaluate communication - mesg to who,length of mesg, and buffer space
     required. Based on this, buffers are allocated, and data copied into them*/
  ierr = PetscMalloc3(2*size,PetscInt,&w1,size,PetscInt,&w3,size,PetscInt,&w4);CHKERRQ(ierr);
  ierr = PetscMemzero(w1,size*2*sizeof(PetscInt));CHKERRQ(ierr); /* initialize work vector*/
  ierr = PetscMemzero(w3,size*sizeof(PetscInt));CHKERRQ(ierr); /* initialize work vector*/
  for (i=0; i<ismax; i++) {
    ierr   = PetscMemzero(w4,size*sizeof(PetscInt));CHKERRQ(ierr); /* initialize work vector*/
    jmax   = nrow[i];
    irow_i = irow[i];
    for (j=0; j<jmax; j++) {
      row  = irow_i[j];
      proc = rtable[row];
      w4[proc]++;
    }
    for (j=0; j<size; j++) {
      if (w4[j]) { w1[2*j] += w4[j];  w3[j]++;}
    }
  }

  nrqs       = 0;              /* no of outgoing messages */
  msz        = 0;              /* total mesg length (for all procs) */
  w1[2*rank] = 0;              /* no mesg sent to self */
  w3[rank]   = 0;
  for (i=0; i<size; i++) {
    if (w1[2*i])  { w1[2*i+1] = 1; nrqs++;} /* there exists a message to proc i */
  }
  ierr = PetscMalloc((nrqs+1)*sizeof(PetscInt),&pa);CHKERRQ(ierr); /*(proc -array)*/
  for (i=0,j=0; i<size; i++) {
    if (w1[2*i]) { pa[j] = i; j++; }
  }

  /* Each message would have a header = 1 + 2*(no of IS) + data */
  for (i=0; i<nrqs; i++) {
    j        = pa[i];
    w1[2*j] += w1[2*j+1] + 2* w3[j];
    msz     += w1[2*j];
  }
  /* Do a global reduction to determine how many messages to expect*/
  ierr = PetscMaxSum(comm,w1,&bsz,&nrqr);CHKERRQ(ierr);

  /* Allocate memory for recv buffers . Make sure rbuf1[0] exists by adding 1 to the buffer length */
  ierr = PetscMalloc((nrqr+1)*sizeof(PetscInt*),&rbuf1);CHKERRQ(ierr);
  ierr = PetscMalloc(nrqr*bsz*sizeof(PetscInt),&rbuf1[0]);CHKERRQ(ierr);
  for (i=1; i<nrqr; ++i) rbuf1[i] = rbuf1[i-1] + bsz;

  /* Post the receives */
  ierr = PetscMalloc((nrqr+1)*sizeof(MPI_Request),&r_waits1);CHKERRQ(ierr);
  for (i=0; i<nrqr; ++i) {
    ierr = MPI_Irecv(rbuf1[i],bsz,MPIU_INT,MPI_ANY_SOURCE,tag0,comm,r_waits1+i);CHKERRQ(ierr);
  }

  /* Allocate Memory for outgoing messages */
  ierr = PetscMalloc4(size,PetscInt*,&sbuf1,size,PetscInt*,&ptr,2*msz,PetscInt,&tmp,size,PetscInt,&ctr);CHKERRQ(ierr);
  ierr = PetscMemzero(sbuf1,size*sizeof(PetscInt*));CHKERRQ(ierr);
  ierr = PetscMemzero(ptr,size*sizeof(PetscInt*));CHKERRQ(ierr);
  {
    PetscInt *iptr = tmp,ict = 0;
    for (i=0; i<nrqs; i++) {
      j        = pa[i];
      iptr    += ict;
      sbuf1[j] = iptr;
      ict      = w1[2*j];
    }
  }

  /* Form the outgoing messages */
  /* Initialize the header space */
  for (i=0; i<nrqs; i++) {
    j           = pa[i];
    sbuf1[j][0] = 0;
    ierr        = PetscMemzero(sbuf1[j]+1,2*w3[j]*sizeof(PetscInt));CHKERRQ(ierr);
    ptr[j]      = sbuf1[j] + 2*w3[j] + 1;
  }

  /* Parse the isrow and copy data into outbuf */
  for (i=0; i<ismax; i++) {
    ierr   = PetscMemzero(ctr,size*sizeof(PetscInt));CHKERRQ(ierr);
    irow_i = irow[i];
    jmax   = nrow[i];
    for (j=0; j<jmax; j++) {  /* parse the indices of each IS */
      row  = irow_i[j];
      proc = rtable[row];
      if (proc != rank) { /* copy to the outgoing buf*/
        ctr[proc]++;
        *ptr[proc] = row;
        ptr[proc]++;
      }
    }
    /* Update the headers for the current IS */
    for (j=0; j<size; j++) { /* Can Optimise this loop too */
      if ((ctr_j = ctr[j])) {
        sbuf1_j        = sbuf1[j];
        k              = ++sbuf1_j[0];
        sbuf1_j[2*k]   = ctr_j;
        sbuf1_j[2*k-1] = i;
      }
    }
  }

  /*  Now  post the sends */
  ierr = PetscMalloc((nrqs+1)*sizeof(MPI_Request),&s_waits1);CHKERRQ(ierr);
  for (i=0; i<nrqs; ++i) {
    j    = pa[i];
    ierr = MPI_Isend(sbuf1[j],w1[2*j],MPIU_INT,j,tag0,comm,s_waits1+i);CHKERRQ(ierr);
  }

  /* Post recieves to capture the row_data from other procs */
  ierr = PetscMalloc((nrqs+1)*sizeof(MPI_Request),&r_waits2);CHKERRQ(ierr);
  ierr = PetscMalloc((nrqs+1)*sizeof(PetscScalar*),&rbuf2);CHKERRQ(ierr);
  for (i=0; i<nrqs; i++) {
    j     = pa[i];
    count = (w1[2*j] - (2*sbuf1[j][0] + 1))*N;
    ierr  = PetscMalloc((count+1)*sizeof(PetscScalar),&rbuf2[i]);CHKERRQ(ierr);
    ierr  = MPI_Irecv(rbuf2[i],count,MPIU_SCALAR,j,tag1,comm,r_waits2+i);CHKERRQ(ierr);
  }

  /* Receive messages(row_nos) and then, pack and send off the rowvalues
     to the correct processors */

  ierr = PetscMalloc((nrqr+1)*sizeof(MPI_Request),&s_waits2);CHKERRQ(ierr);
  ierr = PetscMalloc((nrqr+1)*sizeof(MPI_Status),&r_status1);CHKERRQ(ierr);
  ierr = PetscMalloc((nrqr+1)*sizeof(PetscScalar*),&sbuf2);CHKERRQ(ierr);

  {
    PetscScalar *sbuf2_i,*v_start;
    PetscInt    s_proc;
    for (i=0; i<nrqr; ++i) {
      ierr    = MPI_Waitany(nrqr,r_waits1,&idex,r_status1+i);CHKERRQ(ierr);
      s_proc  = r_status1[i].MPI_SOURCE;         /* send processor */
      rbuf1_i = rbuf1[idex];         /* Actual message from s_proc */
      /* no of rows = end - start; since start is array idex[], 0idex, whel end
         is length of the buffer - which is 1idex */
      start = 2*rbuf1_i[0] + 1;
      ierr  = MPI_Get_count(r_status1+i,MPIU_INT,&end);CHKERRQ(ierr);
      /* allocate memory sufficinet to hold all the row values */
      ierr    = PetscMalloc((end-start)*N*sizeof(PetscScalar),&sbuf2[idex]);CHKERRQ(ierr);
      sbuf2_i = sbuf2[idex];
      /* Now pack the data */
      for (j=start; j<end; j++) {
        row     = rbuf1_i[j] - rstart;
        v_start = a->v + row;
        for (k=0; k<N; k++) {
          sbuf2_i[0] = v_start[0];
          sbuf2_i++;
          v_start += C->rmap->n;
        }
      }
      /* Now send off the data */
      ierr = MPI_Isend(sbuf2[idex],(end-start)*N,MPIU_SCALAR,s_proc,tag1,comm,s_waits2+i);CHKERRQ(ierr);
    }
  }
  /* End Send-Recv of IS + row_numbers */
  ierr = PetscFree(r_status1);CHKERRQ(ierr);
  ierr = PetscFree(r_waits1);CHKERRQ(ierr);
  ierr = PetscMalloc((nrqs+1)*sizeof(MPI_Status),&s_status1);CHKERRQ(ierr);
  if (nrqs) {ierr = MPI_Waitall(nrqs,s_waits1,s_status1);CHKERRQ(ierr);}
  ierr = PetscFree(s_status1);CHKERRQ(ierr);
  ierr = PetscFree(s_waits1);CHKERRQ(ierr);

  /* Create the submatrices */
  if (scall == MAT_REUSE_MATRIX) {
    for (i=0; i<ismax; i++) {
      mat = (Mat_SeqDense*)(submats[i]->data);
      if ((submats[i]->rmap->n != nrow[i]) || (submats[i]->cmap->n != ncol[i])) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"Cannot reuse matrix. wrong size");
      ierr = PetscMemzero(mat->v,submats[i]->rmap->n*submats[i]->cmap->n*sizeof(PetscScalar));CHKERRQ(ierr);

      submats[i]->factortype = C->factortype;
    }
  } else {
    for (i=0; i<ismax; i++) {
      ierr = MatCreate(PETSC_COMM_SELF,submats+i);CHKERRQ(ierr);
      ierr = MatSetSizes(submats[i],nrow[i],ncol[i],nrow[i],ncol[i]);CHKERRQ(ierr);
      ierr = MatSetType(submats[i],((PetscObject)A)->type_name);CHKERRQ(ierr);
      ierr = MatSeqDenseSetPreallocation(submats[i],NULL);CHKERRQ(ierr);
    }
  }

  /* Assemble the matrices */
  {
    PetscInt    col;
    PetscScalar *imat_v,*mat_v,*imat_vi,*mat_vi;

    for (i=0; i<ismax; i++) {
      mat    = (Mat_SeqDense*)submats[i]->data;
      mat_v  = a->v;
      imat_v = mat->v;
      irow_i = irow[i];
      m      = nrow[i];
      for (j=0; j<m; j++) {
        row  = irow_i[j];
        proc = rtable[row];
        if (proc == rank) {
          row     = row - rstart;
          mat_vi  = mat_v + row;
          imat_vi = imat_v + j;
          for (k=0; k<ncol[i]; k++) {
            col          = icol[i][k];
            imat_vi[k*m] = mat_vi[col*C->rmap->n];
          }
        }
      }
    }
  }

  /* Create row map-> This maps c->row to submat->row for each submat*/
  /* this is a very expensive operation wrt memory usage */
  ierr = PetscMalloc(ismax*sizeof(PetscInt*),&rmap);CHKERRQ(ierr);
  ierr = PetscMalloc(ismax*C->rmap->N*sizeof(PetscInt),&rmap[0]);CHKERRQ(ierr);
  ierr = PetscMemzero(rmap[0],ismax*C->rmap->N*sizeof(PetscInt));CHKERRQ(ierr);
  for (i=1; i<ismax; i++) rmap[i] = rmap[i-1] + C->rmap->N;
  for (i=0; i<ismax; i++) {
    rmap_i = rmap[i];
    irow_i = irow[i];
    jmax   = nrow[i];
    for (j=0; j<jmax; j++) {
      rmap_i[irow_i[j]] = j;
    }
  }

  /* Now Receive the row_values and assemble the rest of the matrix */
  ierr = PetscMalloc((nrqs+1)*sizeof(MPI_Status),&r_status2);CHKERRQ(ierr);
  {
    PetscInt    is_max,tmp1,col,*sbuf1_i,is_sz;
    PetscScalar *rbuf2_i,*imat_v,*imat_vi;

    for (tmp1=0; tmp1<nrqs; tmp1++) { /* For each message */
      ierr = MPI_Waitany(nrqs,r_waits2,&i,r_status2+tmp1);CHKERRQ(ierr);
      /* Now dig out the corresponding sbuf1, which contains the IS data_structure */
      sbuf1_i = sbuf1[pa[i]];
      is_max  = sbuf1_i[0];
      ct1     = 2*is_max+1;
      rbuf2_i = rbuf2[i];
      for (j=1; j<=is_max; j++) { /* For each IS belonging to the message */
        is_no  = sbuf1_i[2*j-1];
        is_sz  = sbuf1_i[2*j];
        mat    = (Mat_SeqDense*)submats[is_no]->data;
        imat_v = mat->v;
        rmap_i = rmap[is_no];
        m      = nrow[is_no];
        for (k=0; k<is_sz; k++,rbuf2_i+=N) {  /* For each row */
          row     = sbuf1_i[ct1]; ct1++;
          row     = rmap_i[row];
          imat_vi = imat_v + row;
          for (l=0; l<ncol[is_no]; l++) { /* For each col */
            col          = icol[is_no][l];
            imat_vi[l*m] = rbuf2_i[col];
          }
        }
      }
    }
  }
  /* End Send-Recv of row_values */
  ierr = PetscFree(r_status2);CHKERRQ(ierr);
  ierr = PetscFree(r_waits2);CHKERRQ(ierr);
  ierr = PetscMalloc((nrqr+1)*sizeof(MPI_Status),&s_status2);CHKERRQ(ierr);
  if (nrqr) {ierr = MPI_Waitall(nrqr,s_waits2,s_status2);CHKERRQ(ierr);}
  ierr = PetscFree(s_status2);CHKERRQ(ierr);
  ierr = PetscFree(s_waits2);CHKERRQ(ierr);

  /* Restore the indices */
  for (i=0; i<ismax; i++) {
    ierr = ISRestoreIndices(isrow[i],irow+i);CHKERRQ(ierr);
    ierr = ISRestoreIndices(iscol[i],icol+i);CHKERRQ(ierr);
  }

  /* Destroy allocated memory */
  ierr = PetscFree5(irow,icol,nrow,ncol,rtable);CHKERRQ(ierr);
  ierr = PetscFree3(w1,w3,w4);CHKERRQ(ierr);
  ierr = PetscFree(pa);CHKERRQ(ierr);

  for (i=0; i<nrqs; ++i) {
    ierr = PetscFree(rbuf2[i]);CHKERRQ(ierr);
  }
  ierr = PetscFree(rbuf2);CHKERRQ(ierr);
  ierr = PetscFree4(sbuf1,ptr,tmp,ctr);CHKERRQ(ierr);
  ierr = PetscFree(rbuf1[0]);CHKERRQ(ierr);
  ierr = PetscFree(rbuf1);CHKERRQ(ierr);

  for (i=0; i<nrqr; ++i) {
    ierr = PetscFree(sbuf2[i]);CHKERRQ(ierr);
  }

  ierr = PetscFree(sbuf2);CHKERRQ(ierr);
  ierr = PetscFree(rmap[0]);CHKERRQ(ierr);
  ierr = PetscFree(rmap);CHKERRQ(ierr);

  for (i=0; i<ismax; i++) {
    ierr = MatAssemblyBegin(submats[i],MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
    ierr = MatAssemblyEnd(submats[i],MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
  }
  PetscFunctionReturn(0);
}
int main(int argc, char * argv[])
{
    int numPointsPerDimension;
    int verbose = 0;
    double omega;
    double epsilon;
    double * * points;
    struct timeval startTime;
    struct timeval endTime;
    double duration;
    double breakdown = 0;
    int numIterations;
    double maxDiff, tmpMaxDiff;

    int numProcesses;
    int workingProcesses;
    int myRank;
    MPI_Status status;
    MPI_Request requestUpSend, requestUpRecv;
    MPI_Request requestDownSend, requestDownRecv;
    int partitions;
    int remainder;
    int width;
    int i, k;
    int buffSize;
    int startRow;

    double * upPointsSend, * upPointsRecv;
    double * downPointsSend, * downPointsRecv;

    int upperProc, lowerProc;
    struct timeval startInterval;
    struct timeval endInterval;

    if (argc < 2)
    {
        fprintf(stderr, "ERROR: Too few arguments!\n");
        printUsage(argv[0]);
        exit(1);
    }
    else if (argc > 3)
    {
        fprintf(stderr, "ERROR: Too many arguments!\n");
        printUsage(argv[0]);
        exit(1);
    }
    else
    {
        int argIdx = 1;
        if (argc == 3)
        {
            if (strncmp(argv[argIdx], OPTION_VERBOSE, strlen(OPTION_VERBOSE)) != 0)
            {
                fprintf(stderr, "ERROR: Unexpected option '%s'!\n", argv[argIdx]);
                printUsage(argv[0]);
                exit(1);
            }
            verbose = 1;
            ++argIdx;
        }
        numPointsPerDimension = atoi(argv[argIdx]);
        if (numPointsPerDimension < 2)
        {
            fprintf(stderr, "ERROR: The number of points, '%s', should be "
                "a numeric value greater than or equal to 2!\n", argv[argIdx]);
            printUsage(argv[0]);
            exit(1);
        }
    }
    
    MPI_Init(&argc, &argv);

    /* get info about how may processes are running 
     * and what is your rank number */
    MPI_Comm_size(MPI_COMM_WORLD, &numProcesses);
    MPI_Comm_rank(MPI_COMM_WORLD, &myRank);

    /* calculate nominal size of data per each process */
    partitions = numPointsPerDimension / numProcesses;

    /* calculate number of processes with the additional row of data */
    remainder = numPointsPerDimension % numProcesses;

    /* according to myRank, set the width of the table */
    width = (myRank < remainder) ? partitions + 1 : partitions;
    
    /* decide how many processes are required to do the calculation */
    workingProcesses = (numProcesses > numPointsPerDimension) ? numPointsPerDimension : numProcesses;

    /* terminate processes that won't be used */
    /* start of copied part of code */
    MPI_Comm MY_WORLD = MPI_COMM_WORLD;
    if(workingProcesses < numProcesses)
    {
        MPI_Group world_group;
        MPI_Comm_group(MPI_COMM_WORLD, &world_group);
        
        // Remove all unnecessary ranks
        MPI_Group new_group;
        int ranges[1][3] = {{workingProcesses, (numProcesses - 1), 1}};
        MPI_Group_range_excl(world_group, 1, ranges, &new_group);

        // Create a new communicator
        MPI_Comm_create(MPI_COMM_WORLD, new_group, &MY_WORLD);

        if (MY_WORLD == MPI_COMM_NULL)
        {
            // Bye bye cruel world
            MPI_Finalize();
            exit(0);
        }
    }
    /* end of copied part of code */
    /* source: http://stackoverflow.com/questions/13774968/mpi-kill-unwanted-processes */


    /* set the calculation parameters */
    omega = getOmega(numPointsPerDimension);
    epsilon = getEpsilon(numPointsPerDimension);
    
    /* allocate points table for each process */
    points = allocatePoints(numPointsPerDimension, width, numProcesses);
    if (points == NULL)
    {
        freePoints(points, width, myRank);
        fprintf(stderr, "ERROR: Malloc failed!\n");
        exit(1);
    }
    
    /* size of the table to send per each iteration */
    buffSize = numPointsPerDimension / 2 + numPointsPerDimension % 2 ;
    
    /* initialize additional buffers for communication */
    upPointsSend = initializeBuffer(buffSize);
    upPointsRecv = initializeBuffer(buffSize);
    downPointsSend = initializeBuffer(buffSize);
    downPointsRecv = initializeBuffer(buffSize);
    
    /* process #0 sends to others separate parts of the table
     * others wait for incoming data */
    if (myRank == 0)
    { 
        startRow = numPointsPerDimension;
        for(k = workingProcesses - 1; k >= 0 ; --k)
        {
            width = (k < remainder) ? partitions + 1 : partitions;
            
            /* initialize points */
            initializePoints(points, startRow - width, width, numPointsPerDimension);
        
            /* send table to k-th process */
            if(k != 0)
            {
                for(i = 0; i < width; ++i)
                {
                    MPI_Send(points[i], numPointsPerDimension, MPI_DOUBLE, k, 123, MY_WORLD);
                }
            }
            startRow -= width;
        }        
    } 
    else 
    {
        if(myRank < workingProcesses)
        {
            for(i = 0; i < width; ++i)
            {
                MPI_Recv(points[i], numPointsPerDimension, MPI_DOUBLE, 0, 123, MY_WORLD, &status);
            }
        }
    }

    /* remember with which processes you comunicate */ 
    upperProc = myRank == 0 ? MPI_PROC_NULL : myRank - 1;
    lowerProc = myRank == workingProcesses - 1 ? MPI_PROC_NULL : myRank + 1;
    
    /* here each process has it's own data set for computations */

    if(remainder > 0)
    {
        startRow = (myRank < remainder) ? myRank * (partitions + 1) : myRank * partitions + remainder;
    }
    else 
    {
        startRow = myRank * partitions;
    }

    if(gettimeofday(&startTime, NULL))
    {
        freePoints(points, width, myRank);
        fprintf(stderr, "ERROR: Gettimeofday failed!\n");
        exit(1);
    }
    
    /* Start of computations. */
    
    numIterations = 0;
    do
    {
        int i, j, color;
        maxDiff = 0.0;
        for (color = 0; color < 2; ++color)
        {

            /* fill downPointsSend with the last row of points data */
            setDataBuffer(downPointsSend, points, width - 1, 1 + ((startRow + width) % 2 == color ? 1 : 0), numPointsPerDimension);

            if(gettimeofday(&startInterval, NULL))
		    {
		        freePoints(points, width, myRank);
		        fprintf(stderr, "ERROR: Gettimeofday failed!\n");
		        exit(1);
	        }
            
            MPI_Isend(downPointsSend, buffSize, MPI_DOUBLE, lowerProc, color, MY_WORLD, &requestDownSend);
            MPI_Irecv(downPointsRecv, buffSize, MPI_DOUBLE, lowerProc, color, MY_WORLD, &requestDownRecv);
            
            if(gettimeofday(&endInterval, NULL))
		    {
		        freePoints(points, width, myRank);
		        fprintf(stderr, "ERROR: Gettimeofday failed!\n");
		        exit(1);
		    }
	
		    breakdown += ((double)endInterval.tv_sec + ((double)endInterval.tv_usec / 1000000.0)) - 
	                     ((double)startInterval.tv_sec + ((double)startInterval.tv_usec / 1000000.0));

            /* fill upPointsSend with the last row of points data */
            setDataBuffer(upPointsSend, points, 0, 1 + ((startRow - 1) % 2 == color ? 1 : 0), numPointsPerDimension);

		    if(gettimeofday(&startInterval, NULL))
		    {
		        freePoints(points, width, myRank);
		        fprintf(stderr, "ERROR: Gettimeofday failed!\n");
		        exit(1);
		    }

		    MPI_Isend(upPointsSend, buffSize, MPI_DOUBLE, upperProc, color, MY_WORLD, &requestUpSend);
            MPI_Irecv(upPointsRecv, buffSize, MPI_DOUBLE, upperProc, color, MY_WORLD, &requestUpRecv);
			
		    if(gettimeofday(&endInterval, NULL))
		    {
		        freePoints(points, width, myRank);
		        fprintf(stderr, "ERROR: Gettimeofday failed!\n");
		        exit(1);
		    }
		
		    breakdown += ((double)endInterval.tv_sec + ((double)endInterval.tv_usec / 1000000.0)) - 
        	             ((double)startInterval.tv_sec + ((double)startInterval.tv_usec / 1000000.0));

            /* computations of the first row requires data that has to be recieved from other process */
            MPI_Wait(&requestUpRecv, &status);

            for (i = 0; i < width; ++i)
            {
 
                /* before computing the last row of its data, 
                 * process has to be sure that it has required
                 * row from process rank+1 */
		        if(i == width - 1)
                {
                	MPI_Wait(&requestDownRecv, &status);
                }

                for (j = 1 + ((startRow+i) % 2 == color ? 1 : 0); j < numPointsPerDimension - 1; j += 2)
                {
                    if( (myRank != 0 || i != 0 ) && (myRank != workingProcesses - 1 || i != width - 1) )
                    {
                        
                        double tmp, diff;
                        double down, up;
                        int jIdx = (j - 1 - ((startRow + i) % 2 == color ? 1 : 0))/ 2;
                        
                        /* decide if up or down value should be taken from additional buffers */
                        up = (i == 0) ? upPointsRecv[jIdx] : points[i-1][j];
                        down = (i == width - 1) ? downPointsRecv[jIdx] : points[i+1][j];
                        
                        /* calculate final value */
                        tmp = (up + down + points[i][j - 1] + points[i][j + 1]) / 4.0;
                        diff = points[i][j];
                        points[i][j] = (1.0 - omega) * points[i][j] + omega * tmp;
                        
                        diff = fabs(diff - points[i][j]);
                        if (diff > maxDiff)
                        {
                            maxDiff = diff;
                        }
                    }
                }
            }
            MPI_Barrier(MY_WORLD);
        }
    	
        if(gettimeofday(&startInterval, NULL))
        {
            freePoints(points, width, myRank);
            fprintf(stderr, "ERROR: Gettimeofday failed!\n");
            exit(1);
        }
        
        /* find new maxDiff among all processes */
        MPI_Allreduce(&maxDiff, &tmpMaxDiff, 1, MPI_DOUBLE, MPI_MAX, MY_WORLD );
        maxDiff = tmpMaxDiff;

        if(gettimeofday(&endInterval, NULL))
        {
            freePoints(points, width, myRank);
            fprintf(stderr, "ERROR: Gettimeofday failed!\n");
                exit(1);
        }
        
        breakdown += ((double)endInterval.tv_sec + ((double)endInterval.tv_usec / 1000000.0)) - 
                     ((double)startInterval.tv_sec + ((double)startInterval.tv_usec / 1000000.0));

        ++numIterations;
    }
    while (maxDiff > epsilon);

    /* End of computations. */
 
    if(gettimeofday(&endTime, NULL))
    {
        freePoints(points, width, myRank);
        fprintf(stderr, "ERROR: Gettimeofday failed!\n");
        exit(1);
    }

    /* calculate how long did the computation lasted */
    duration =
        ((double)endTime.tv_sec + ((double)endTime.tv_usec / 1000000.0)) - 
        ((double)startTime.tv_sec + ((double)startTime.tv_usec / 1000000.0));

    /* we choose the process whose execution lasted for the longest time */     
    double maxDuration;
    MPI_Allreduce(&duration, &maxDuration, 1, MPI_DOUBLE, MPI_MAX, MY_WORLD);
   
    if(myRank==0)
    {
        fprintf(stderr,
            "Statistics: duration(s)=%.10f breakdown=%.10f #iters=%d diff=%.10f epsilon=%.10f\n",
            maxDuration, breakdown, numIterations, maxDiff, epsilon);
    }
  
    if (verbose) {
        
        MPI_Barrier(MY_WORLD);
    
        /* process #0 is responsible for printing results of computation 
         * others send their data straight to it */
        if(myRank != 0 && myRank < workingProcesses) 
        {
            for(k = 0; k < width ; ++k)
            {
                MPI_Send(points[k], numPointsPerDimension, MPI_DOUBLE, 0, 123, MY_WORLD);
            }
        }
        else if(myRank == 0)
        {
            printPoints(points, width, numPointsPerDimension);
            for(i = 1; i < workingProcesses; ++i)
            {
                width = (i < remainder) ? partitions + 1 : partitions;
                
                for (k = 0 ; k < width ; ++k)
                {
                    MPI_Recv(points[k], numPointsPerDimension, MPI_DOUBLE, i, 123, MY_WORLD, &status);
                }

                printPoints(points, width, numPointsPerDimension);
            }
        }
    }
    
    /* free all the memory that was allocated */
    freePoints(points, width, myRank);
    free(downPointsSend);
    free(upPointsSend);
    free(downPointsRecv);
    free(upPointsRecv);
     
    MPI_Finalize();
    
    return 0;
}
Exemple #18
0
int main(int argc, char *argv[])
{
    int provided, wrank, wsize, nmsg, i, tag;
    int *(buf[MAX_TARGETS]), bufsize[MAX_TARGETS];
    MPI_Request r[MAX_TARGETS];
    MPI_Comm commDup, commEven;

    MPI_Init_thread(&argc, &argv, MPI_THREAD_FUNNELED, &provided);
    MPI_Comm_rank(MPI_COMM_WORLD, &wrank);
    MPI_Comm_size(MPI_COMM_WORLD, &wsize);

    if (wsize < 4) {
        fprintf(stderr, "This test requires at least 4 processes\n");
        MPI_Abort(MPI_COMM_WORLD, 1);
    }

    /* Create several communicators */
    MPI_Comm_dup(MPI_COMM_WORLD, &commDup);
    MPI_Comm_set_name(commDup, "User dup of comm world");

    MPI_Comm_split(MPI_COMM_WORLD, wrank & 0x1, wrank, &commEven);
    if (wrank & 0x1)
        MPI_Comm_free(&commEven);
    else
        MPI_Comm_set_name(commEven, "User split to even ranks");

    /* Create a collection of pending sends and receives
     * We use tags on the sends and receives (when ANY_TAG isn't used)
     * to provide an easy way to check that the proper requests are present.
     * TAG values use fields, in decimal (for easy reading):
     * 0-99: send/recv type:
     * 0 - other
     * 1 - irecv
     * 2 - isend
     * 3 - issend
     * 4 - ibsend
     * 5 - irsend
     * 6 - persistent recv
     * 7 - persistent send
     * 8 - persistent ssend
     * 9 - persistent rsend
     * 10 - persistent bsend
     * 100-999: destination (for send) or source, if receive.  999 = any-source
     * (rank is value/100)
     * 1000-2G: other values
     */
    /* Create the send/receive buffers */
    nmsg = 10;
    for (i = 0; i < nmsg; i++) {
        bufsize[i] = i;
        if (i) {
            buf[i] = (int *) calloc(bufsize[i], sizeof(int));
            if (!buf[i]) {
                fprintf(stderr, "Unable to allocate %d words\n", bufsize[i]);
                MPI_Abort(MPI_COMM_WORLD, 2);
            }
        } else
            buf[i] = 0;
    }

    /* Partial implementation */
    if (wrank == 0) {
        nmsg = 0;
        tag = 2 + 1 * 100;
        MPI_Isend(buf[0], bufsize[0], MPI_INT, 1, tag, MPI_COMM_WORLD, &r[nmsg++]);
        tag = 3 + 2 * 100;
        MPI_Issend(buf[1], bufsize[1], MPI_INT, 2, tag, MPI_COMM_WORLD, &r[nmsg++]);
        tag = 1 + 3 * 100;
        MPI_Irecv(buf[2], bufsize[2], MPI_INT, 3, tag, MPI_COMM_WORLD, &r[nmsg++]);
    } else if (wrank == 1) {
    } else if (wrank == 2) {
    } else if (wrank == 3) {
    }

    /* provide a convenient place to wait */
    MPI_Barrier(MPI_COMM_WORLD);
    printf("Barrier 1 finished\n");

    /* Match up (or cancel) the requests */
    if (wrank == 0) {
        MPI_Waitall(nmsg, r, MPI_STATUSES_IGNORE);
    } else if (wrank == 1) {
        tag = 2 + 1 * 100;
        MPI_Recv(buf[0], bufsize[0], MPI_INT, 0, tag, MPI_COMM_WORLD, MPI_STATUS_IGNORE);
    } else if (wrank == 2) {
        tag = 3 + 2 * 100;
        MPI_Recv(buf[1], bufsize[1], MPI_INT, 0, tag, MPI_COMM_WORLD, MPI_STATUS_IGNORE);
    } else if (wrank == 3) {
        tag = 1 + 3 * 100;
        MPI_Send(buf[2], bufsize[2], MPI_INT, 0, tag, MPI_COMM_WORLD);
    }

    MPI_Barrier(MPI_COMM_WORLD);
    printf("Barrier 2 finished\n");

    MPI_Comm_free(&commDup);
    if (commEven != MPI_COMM_NULL)
        MPI_Comm_free(&commEven);

    MPI_Finalize();
    return 0;
}
Exemple #19
0
void pdgsmv_init
(
 SuperMatrix *A,       /* Matrix A permuted by columns (input/output).
			  The type of A can be:
			  Stype = SLU_NR_loc; Dtype = SLU_D; Mtype = SLU_GE. */
 int_t *row_to_proc,   /* Input. Mapping between rows and processes. */
 gridinfo_t *grid,     /* Input */
 pdgsmv_comm_t *gsmv_comm /* Output. The data structure for communication. */
 )
{
    NRformat_loc *Astore;
    int iam, p, procs;
    int *SendCounts, *RecvCounts;
    int_t i, j, k, l, m, m_loc, n, fst_row, jcol;
    int_t TotalIndSend, TotalValSend;
    int_t *colind, *rowptr;
    int_t *ind_tosend = NULL, *ind_torecv = NULL;
    int_t *ptr_ind_tosend, *ptr_ind_torecv;
    int_t *extern_start, *spa, *itemp;
    double *nzval, *val_tosend = NULL, *val_torecv = NULL, t;
    MPI_Request *send_req, *recv_req;
    MPI_Status status;

#if ( DEBUGlevel>=1 )
    CHECK_MALLOC(grid->iam, "Enter pdgsmv_init()");
#endif

    /* ------------------------------------------------------------
       INITIALIZATION.
       ------------------------------------------------------------*/
    iam = grid->iam;
    procs = grid->nprow * grid->npcol;
    Astore = (NRformat_loc *) A->Store;
    m = A->nrow;
    n = A->ncol;
    m_loc = Astore->m_loc;
    fst_row = Astore->fst_row;
    colind = Astore->colind;
    rowptr = Astore->rowptr;
    nzval = Astore->nzval;
    if ( !(SendCounts = SUPERLU_MALLOC(2*procs * sizeof(int))) )
        ABORT("Malloc fails for SendCounts[]");
    /*for (i = 0; i < 2*procs; ++i) SendCounts[i] = 0;*/
    RecvCounts = SendCounts + procs;
    if ( !(ptr_ind_tosend = intMalloc_dist(2*(procs+1))) )
        ABORT("Malloc fails for ptr_ind_tosend[]");
    ptr_ind_torecv = ptr_ind_tosend + procs + 1;
    if ( !(extern_start = intMalloc_dist(m_loc)) )
        ABORT("Malloc fails for extern_start[]");
    for (i = 0; i < m_loc; ++i) extern_start[i] = rowptr[i];

    /* ------------------------------------------------------------
       COUNT THE NUMBER OF X ENTRIES TO BE SENT TO EACH PROCESS.
       THIS IS THE UNION OF THE COLUMN INDICES OF MY ROWS.
       SWAP TO THE BEGINNING THE PART OF A CORRESPONDING TO THE
       LOCAL PART OF X.
       THIS ACCOUNTS FOR THE FIRST PASS OF ACCESSING MATRIX A.
       ------------------------------------------------------------*/
    if ( !(spa = intCalloc_dist(n)) ) /* Aid in global to local translation */
        ABORT("Malloc fails for spa[]");
    for (p = 0; p < procs; ++p) SendCounts[p] = 0;
    for (i = 0; i < m_loc; ++i) { /* Loop through each row */
        k = extern_start[i];
        for (j = rowptr[i]; j < rowptr[i+1]; ++j) {/* Each nonzero in row i */
	    jcol = colind[j];
            p = row_to_proc[jcol];
	    if ( p != iam ) { /* External */
	        if ( spa[jcol] == 0 ) { /* First time see this index */
		    ++SendCounts[p];
		    spa[jcol] = 1;
                }
	    } else { /* Swap to beginning the part of A corresponding
			to the local part of X */
		l = colind[k];
		t = nzval[k];
		colind[k] = jcol;
		nzval[k] = nzval[j];
		colind[j] = l;
		nzval[j] = t;
		++k;
	    }
	}
	extern_start[i] = k;
    }

    /* ------------------------------------------------------------
       LOAD THE X-INDICES TO BE SENT TO THE OTHER PROCESSES.
       THIS ACCOUNTS FOR THE SECOND PASS OF ACCESSING MATRIX A.
       ------------------------------------------------------------*/
    /* Build pointers to ind_tosend[]. */
    ptr_ind_tosend[0] = 0;
    for (p = 0, TotalIndSend = 0; p < procs; ++p) {
        TotalIndSend += SendCounts[p]; /* Total to send. */
	ptr_ind_tosend[p+1] = ptr_ind_tosend[p] + SendCounts[p];
    }
#if 0
    ptr_ind_tosend[iam] = 0; /* Local part of X */
#endif
    if ( TotalIndSend ) {
        if ( !(ind_tosend = intMalloc_dist(TotalIndSend)) )
	    ABORT("Malloc fails for ind_tosend[]"); /* Exclude local part of X */
    }

    /* Build SPA to aid global to local translation. */
    for (i = 0; i < n; ++i) spa[i] = EMPTY;
    for (i = 0; i < m_loc; ++i) { /* Loop through each row of A */
        for (j = rowptr[i]; j < rowptr[i+1]; ++j) {
	    jcol = colind[j];
	    if ( spa[jcol] == EMPTY ) { /* First time see this index */
	        p = row_to_proc[jcol];
		if ( p == iam ) { /* Local */
		  /*assert(jcol>=fst_row);*/
		  spa[jcol] = jcol - fst_row; /* Relative position in local X */
		} else {          /* External */
		  ind_tosend[ptr_ind_tosend[p]] = jcol; /* Still global */
		  spa[jcol] = ptr_ind_tosend[p]; /* Position in ind_tosend[] */
		  ++ptr_ind_tosend[p];
		}
	    }
	}
    }
    
    /* ------------------------------------------------------------
       TRANSFORM THE COLUMN INDICES OF MATRIX A INTO LOCAL INDICES.
       THIS ACCOUNTS FOR THE THIRD PASS OF ACCESSING MATRIX A.
       ------------------------------------------------------------*/
    for (i = 0; i < m_loc; ++i) {
        for (j = rowptr[i]; j < rowptr[i+1]; ++j) {
	    jcol = colind[j];
	    colind[j] = spa[jcol];
	}
    }

    /* ------------------------------------------------------------
       COMMUNICATE THE EXTERNAL INDICES OF X.
       ------------------------------------------------------------*/
    MPI_Alltoall(SendCounts, 1, MPI_INT, RecvCounts, 1, MPI_INT,
		 grid->comm);

    /* Build pointers to ind_torecv[]. */
    ptr_ind_torecv[0] = 0;
    for (p = 0, TotalValSend = 0; p < procs; ++p) {
        TotalValSend += RecvCounts[p]; /* Total to receive. */
	ptr_ind_torecv[p+1] = ptr_ind_torecv[p] + RecvCounts[p];
    }
    if ( TotalValSend ) {
        if ( !(ind_torecv = intMalloc_dist(TotalValSend)) )
	    ABORT("Malloc fails for ind_torecv[]");
    }

    if ( !(send_req = (MPI_Request *)
	   SUPERLU_MALLOC(2*procs *sizeof(MPI_Request))))
        ABORT("Malloc fails for recv_req[].");
    recv_req = send_req + procs;
    for (p = 0; p < procs; ++p) {
        ptr_ind_tosend[p] -= SendCounts[p]; /* Reset pointer to beginning */
        if ( SendCounts[p] ) {
	    MPI_Isend(&ind_tosend[ptr_ind_tosend[p]], SendCounts[p],
		      mpi_int_t, p, iam, grid->comm, &send_req[p]);
	}
	if ( RecvCounts[p] ) {
	    MPI_Irecv(&ind_torecv[ptr_ind_torecv[p]], RecvCounts[p],
		      mpi_int_t, p, p, grid->comm, &recv_req[p]);
	}
    }
    for (p = 0; p < procs; ++p) {
        if ( SendCounts[p] ) MPI_Wait(&send_req[p], &status);
	if ( RecvCounts[p] ) MPI_Wait(&recv_req[p], &status);
    }

    /* Allocate storage for the X values to to transferred. */
    if ( TotalIndSend &&
         !(val_torecv = doubleMalloc_dist(TotalIndSend)) )
        ABORT("Malloc fails for val_torecv[].");
    if ( TotalValSend &&
         !(val_tosend = doubleMalloc_dist(TotalValSend)) )
        ABORT("Malloc fails for val_tosend[].");

    gsmv_comm->extern_start = extern_start;
    gsmv_comm->ind_tosend = ind_tosend;
    gsmv_comm->ind_torecv = ind_torecv;
    gsmv_comm->ptr_ind_tosend = ptr_ind_tosend;
    gsmv_comm->ptr_ind_torecv = ptr_ind_torecv;
    gsmv_comm->SendCounts = SendCounts;
    gsmv_comm->RecvCounts = RecvCounts;
    gsmv_comm->val_tosend = val_tosend;
    gsmv_comm->val_torecv = val_torecv;
    gsmv_comm->TotalIndSend = TotalIndSend;
    gsmv_comm->TotalValSend = TotalValSend;
    
    SUPERLU_FREE(spa);
    SUPERLU_FREE(send_req);

#if ( DEBUGlevel>=2 )
    PrintInt10("pdgsmv_init::rowptr", m_loc+1, rowptr);
    PrintInt10("pdgsmv_init::extern_start", m_loc, extern_start);
#endif
#if ( DEBUGlevel>=1 )
    CHECK_MALLOC(iam, "Exit pdgsmv_init()");
#endif

} /* PDGSMV_INIT */
Exemple #20
0
int main (int argc, char *argv[])
{
  int procid, num_procs;
  MPI_Status status;
  // derivative_time, integral_time, err_time is the local sum of runtime for each computation
  // tick is used to mark time
  double derivative_time = 0, integral_time = 0, err_time = 0, tick;

  MPI_Init(&argc, &argv);
  MPI_Comm_rank(MPI_COMM_WORLD, &procid);
  MPI_Comm_size(MPI_COMM_WORLD, &num_procs);

  // Calculate grid-points per process
  if(NGRID % num_procs > 0)
  {
	if(procid == 0) printf("NGRID should be divisible by the number of processes!");
	MPI_Finalize();
	return 1;
  }
  int points_per_node = NGRID / num_procs;

  //loop index
  int i;

  //domain array and step size
  FP_PREC xc[points_per_node], dx;

  //function array and derivative
  //the size will be dependent on the
  //number of processors used
  //to the program
  FP_PREC yc[points_per_node], dyc[points_per_node];
  
  //integration values
  FP_PREC local_intg, intg;

  //error analysis array
  FP_PREC derr[points_per_node];

  //error analysis values
  FP_PREC dlocal_sum_err, davg_err, dlocal_std_dev, dstd_dev, intg_err;

  //calculate dx
  dx = (FP_PREC)(XF - XI)/(FP_PREC)(NGRID - 1);

  // get start X for each process (my_XI)
  int bins_before_me = procid * points_per_node;
  FP_PREC my_XI = XI + bins_before_me * dx;

  //construct grid
  for (i = 0; i < points_per_node; ++i)
  {
    xc[i] = my_XI + i * dx;
  }

  //define the function
  for(i = 0; i < points_per_node; ++i)
  {
    yc[i] = fn(xc[i]);
  }

  //define holders for left and right bound value
  FP_PREC left_bound_yc, right_bound_yc;
  if(procid == 0) left_bound_yc = fn(XI-dx);
  if(procid == num_procs - 1) right_bound_yc = fn(XF+dx);

  tick = MPI_Wtime();
#if BLOCKING
  if(procid == 0) printf("Using blocking message! \n");
  //Step 1: even nodes send to the right then receive back
  //Step 2: even nodes receive from the left then send back
  if(procid % 2 == 0)
  {
    if(procid < num_procs - 1)
    {
	MPI_Send(&yc[points_per_node-1], 1, MPI_DOUBLE, procid+1, 0, MPI_COMM_WORLD);
	MPI_Recv(&right_bound_yc, 1, MPI_DOUBLE, procid+1, 0, MPI_COMM_WORLD, &status);
    }
    if(procid > 0)
    {
	MPI_Recv(&left_bound_yc, 1, MPI_DOUBLE, procid-1, 0, MPI_COMM_WORLD, &status);
	MPI_Send(&yc[0], 1, MPI_DOUBLE, procid-1, 0, MPI_COMM_WORLD);
    }
  } else
  {
    MPI_Recv(&left_bound_yc, 1, MPI_DOUBLE, procid-1, 0, MPI_COMM_WORLD, &status);
    MPI_Send(&yc[0], 1, MPI_DOUBLE, procid-1, 0, MPI_COMM_WORLD);
    if(procid < num_procs - 1)
    {
    	MPI_Send(&yc[points_per_node-1], 1, MPI_DOUBLE, procid+1, 0, MPI_COMM_WORLD);
    	MPI_Recv(&right_bound_yc, 1, MPI_DOUBLE, procid+1, 0, MPI_COMM_WORLD, &status);
    }
  }
#else
  if(procid == 0) printf("Using non-blocking message! \n");
  MPI_Request request[4];
  int current_request = 0;
  if(procid < num_procs - 1)
  { // receive right bound yc
      MPI_Irecv(&right_bound_yc, 1, MPI_DOUBLE, procid+1, 0, MPI_COMM_WORLD, &request[current_request]);
      ++current_request;
  }
  if(procid > 0)
  { // receive left bound yc
      MPI_Irecv(&left_bound_yc, 1, MPI_DOUBLE, procid-1, 0, MPI_COMM_WORLD, &request[current_request]);
      ++current_request;
  }
  if(procid < num_procs - 1)
  { // send right bound yc to right node
      MPI_Isend(&yc[points_per_node-1], 1, MPI_DOUBLE, procid+1, 0, MPI_COMM_WORLD, &request[current_request]);
      ++current_request;
  }
  if(procid > 0)
  { // send left bound yc to left node
      MPI_Isend(&yc[0], 1, MPI_DOUBLE, procid-1, 0, MPI_COMM_WORLD, &request[current_request]);
      ++current_request;
  }
#endif
  derivative_time += MPI_Wtime() - tick;
  integral_time += MPI_Wtime() - tick;

  // Overlap computation and communication BEGIN
  //compute the derivative using first-order finite differencing
  tick = MPI_Wtime();
  for (i = 1; i < points_per_node-1; ++i)
  {
    dyc[i] = (yc[i + 1] - yc[i - 1])/(2.0 * dx);
  }
  derivative_time += MPI_Wtime() - tick;

  //compute the integral using Trapazoidal rule
  tick = MPI_Wtime();
  local_intg = 0.0;
  for (i = 0; i < points_per_node-1; ++i)
  {
    local_intg += 0.5 * (yc[i] + yc[i + 1]) * dx;
  }
  integral_time += MPI_Wtime() - tick;
  // Overlap computation and communication END

  // WAIT for non-blocking message complete before continue
#if !BLOCKING
  tick = MPI_Wtime();
  MPI_Waitall(current_request, request, MPI_STATUSES_IGNORE);
  derivative_time += MPI_Wtime() - tick;
  integral_time += MPI_Wtime() - tick;
#endif

  // compute derivative of boundary points, runtime is not counted because it's quite small
  dyc[0] = (yc[1] - left_bound_yc)/(2.0 * dx);
  dyc[points_per_node-1] = (right_bound_yc - yc[points_per_node-2])/(2.0 * dx);

  // compute integral at right boundary point, runtime is not counted because it's quite small
  if(procid < num_procs-1) local_intg += 0.5 * (yc[points_per_node-1] + right_bound_yc) * dx;

  tick = MPI_Wtime();
  //compute the error, average error of the derivatives
  for(i = 0; i < points_per_node; ++i)
  {
    if(dfn(xc[i]) == 0)
    {
      printf("WARNING: derivative at point %d on process %d is zero.\n", i, procid);
      derr[i] = 0;
    }
    else derr[i] = fabs((dyc[i] - dfn(xc[i]))/dfn(xc[i]));
  }

  //find the local average error
  dlocal_sum_err = 0.0;
  for(i = 0; i < points_per_node; ++i)
  {
    dlocal_sum_err += derr[i];
  }

  //calculate and output errors
#if SINGLE_CALL_REDUCTION
  if(procid == 0) printf("Using single call reduction! \n");
  //all nodes collect sum err and convert it to the mean value
  MPI_Allreduce(&dlocal_sum_err, &davg_err, 1, MPI_DOUBLE, MPI_SUM, MPI_COMM_WORLD);
  davg_err /= (FP_PREC)NGRID; // each process calculates global average
#else
  if(procid == 0) printf("Using manual call reduction! \n");
  //all nodes collect sum err and convert it to the mean value
  if(procid != 0) MPI_Send(&dlocal_sum_err, 1, MPI_DOUBLE, 0, 0, MPI_COMM_WORLD);
  else if(procid == 0)
  {
    davg_err = dlocal_sum_err;
    for(i = 1; i < num_procs; ++i)
    {
      MPI_Recv(&dlocal_sum_err, 1, MPI_DOUBLE, MPI_ANY_SOURCE, 0, MPI_COMM_WORLD, &status);
      davg_err += dlocal_sum_err;
    }
    davg_err /= (FP_PREC)NGRID;
  }
  MPI_Bcast(&davg_err, 1, MPI_DOUBLE, 0, MPI_COMM_WORLD);
#endif

  //now all nodes have davg_err, find sum squared differences of local derr
  dlocal_std_dev = 0.0;
  for(i = 0; i < points_per_node; ++i)
  {
    dlocal_std_dev += pow(derr[i] - davg_err, 2);
  }
  err_time += MPI_Wtime() - tick;

#if SINGLE_CALL_REDUCTION
  //reduce local integral & local (sum squared differences of derr) to root
  tick = MPI_Wtime();
  MPI_Reduce(&dlocal_std_dev, &dstd_dev, 1, MPI_DOUBLE, MPI_SUM, 0, MPI_COMM_WORLD);
  err_time += MPI_Wtime() - tick;
  tick = MPI_Wtime();
  MPI_Reduce(&local_intg, &intg, 1, MPI_DOUBLE, MPI_SUM, 0, MPI_COMM_WORLD);
  integral_time += MPI_Wtime() - tick;
#else
  //reduce local integral & local (sum squared differences of derr) to root
  if(procid != 0)
  {
    tick = MPI_Wtime();
    MPI_Send(&dlocal_std_dev, 1, MPI_DOUBLE, 0, 0, MPI_COMM_WORLD);
    err_time += MPI_Wtime() - tick;
    tick = MPI_Wtime();
    MPI_Send(&local_intg, 1, MPI_DOUBLE, 0, 1, MPI_COMM_WORLD);
    integral_time += MPI_Wtime() - tick;
  } else if(procid == 0)
  {
    dstd_dev = dlocal_std_dev;
    intg = local_intg;
    tick = MPI_Wtime();
    for(i = 1; i < num_procs; ++i)
    {
      MPI_Recv(&dlocal_std_dev, 1, MPI_DOUBLE, MPI_ANY_SOURCE, 0, MPI_COMM_WORLD, &status);
      dstd_dev += dlocal_std_dev;
    }
    err_time += MPI_Wtime() - tick;
    tick = MPI_Wtime();
    for(i = 1; i < num_procs; ++i)
    {
      MPI_Recv(&local_intg, 1, MPI_DOUBLE, MPI_ANY_SOURCE, 1, MPI_COMM_WORLD, &status);
      intg+= local_intg;
    }
    integral_time += MPI_Wtime() - tick;
  }
#endif

  // print out the max runtime for each calculation
  double max_derivative_time, max_integral_time, max_err_time;
  MPI_Reduce(&derivative_time, &max_derivative_time, 1, MPI_DOUBLE, MPI_MAX, 0, MPI_COMM_WORLD);
  MPI_Reduce(&integral_time, &max_integral_time, 1, MPI_DOUBLE, MPI_MAX, 0, MPI_COMM_WORLD);
  MPI_Reduce(&err_time, &max_err_time, 1, MPI_DOUBLE, MPI_MAX, 0, MPI_COMM_WORLD);
  if(procid == 0)
  {
    printf("Max runtime to calculate derivatives is %e\n", max_derivative_time);
    printf("Max runtime to calculate integral is %e\n", max_integral_time);
    printf("Max runtime to calculate derivative errors is %e\n", max_err_time);
  }

  //gather derivative results & errors for output
  //this part shouldn't be included in running time measurements
  FP_PREC *final_dyc = NULL;
  FP_PREC *final_derr = NULL;
  if(procid == 0)
  {
    final_dyc = (FP_PREC*)malloc(NGRID * sizeof(FP_PREC));
    final_derr = (FP_PREC*)malloc(NGRID * sizeof(FP_PREC));
  }
  MPI_Gather(dyc, points_per_node, MPI_DOUBLE, final_dyc, points_per_node, MPI_DOUBLE, 0, MPI_COMM_WORLD);
  MPI_Gather(derr, points_per_node, MPI_DOUBLE, final_derr, points_per_node, MPI_DOUBLE, 0, MPI_COMM_WORLD);

  //final output at root node (rank 0)
  if(procid == 0)
  {
    dstd_dev = sqrt(dstd_dev/(FP_PREC)NGRID);
    if(ifn(XI, XF) == 0) {
      printf("WARNING: true integral value from XI to XF is equal zero.\n");
      intg_err = 0;
    } else {
      intg_err = fabs((ifn(XI, XF) - intg)/ifn(XI, XF));
    }

    print_function_data(NGRID, dx, final_dyc);
    print_error_data(NGRID, davg_err, dstd_dev, intg_err, dx, final_derr);

    free(final_dyc);
    free(final_derr);
  }

  MPI_Finalize();
  return 0;
}
Exemple #21
0
// main calculation method for jacobi
void calculateJacobi () {
    double star;
    double residuum;
    double maxresiduum_temp;

    double pih = 0.0;
    double fpisin = 0.0;

    int term_iteration = options.term_iteration;

    if (options.inf_func == FUNC_FPISIN)
    {
        pih = PI * h;
        fpisin = 0.25 * TWO_PI_SQUARE * h * h;
    }

    iteration = 0;
    while (term_iteration > 0)
    {
        // swap matrices
        oldmatrix = Matrix[iteration%2];
        newmatrix = Matrix[(iteration+1)%2];

        maxresiduum = 0;

        // send lines
        MPI_Request r1;
        MPI_Request r2;
        MPI_Status s1;
        MPI_Status s2;
        if (rank != root) {
            MPI_Isend(oldmatrix[1], N+1, MPI_DOUBLE, rank-1, 0, MPI_COMM_WORLD, &r1);
        }
        if (rank != last) {
            MPI_Isend(oldmatrix[actuallines-2], N+1, MPI_DOUBLE, rank+1, 0, MPI_COMM_WORLD, &r2);
        }


        // alternative mode (exchange of lines and computation of non-halo lines are parallel)
        //	- deal with the "middle" rows
        if (altermode) {
            // over all non-halo rows
            for (int i = 2; i < actuallines-2; i++)
            {
                double fpisin_i = 0.0;

                if (options.inf_func == FUNC_FPISIN)
                {
                    fpisin_i = fpisin * sin(pih * (double)(i+startline-1));
                }

                // over all columns
                for (int j = 1; j < N; j++)
                {
                    star = 0.25 * (oldmatrix[i-1][j] + oldmatrix[i][j-1] + oldmatrix[i][j+1] + oldmatrix[i+1][j]);

                    if (options.inf_func == FUNC_FPISIN)
                    {
                        star += fpisin_i * sin(pih * (double)j);
                    }

                    if (options.termination == TERM_PREC || term_iteration == 1)
                    {
                        residuum = oldmatrix[i][j] - star;
                        residuum = (residuum < 0) ? -residuum : residuum;
                        maxresiduum = (residuum < maxresiduum) ? maxresiduum : residuum;
                    }

                    newmatrix[i][j] = star;
                }
            }
        }

        // wait for sending to be successfull
        if (rank != root) {
            MPI_Recv(oldmatrix[0], N+1, MPI_DOUBLE, rank-1, 0, MPI_COMM_WORLD, MPI_STATUS_IGNORE);
        }
        if (rank != last) {
            MPI_Recv(oldmatrix[actuallines-1], N+1, MPI_DOUBLE, rank+1, 0, MPI_COMM_WORLD, MPI_STATUS_IGNORE);
        }
        if (rank != root) {
            MPI_Wait(&r1, &s1);
        }
        if (rank != last) {
            MPI_Wait(&r2, &s2);
        }
        MPI_Barrier(MPI_COMM_WORLD);

        // alternative mode - deal with the remaining, i.e. "halo" rows
        if (altermode) {
            // first line

            int i = 1;
            double fpisin_i = 0.0;
            if (options.inf_func == FUNC_FPISIN)
            {
                fpisin_i = fpisin * sin(pih * (double)(i+startline-1));
            }
            // over all columns
            for (int j = 1; j < N; j++)
            {
                star = 0.25 * (oldmatrix[i-1][j] + oldmatrix[i][j-1] + oldmatrix[i][j+1] + oldmatrix[i+1][j]);

                if (options.inf_func == FUNC_FPISIN)
                {
                    star += fpisin_i * sin(pih * (double)j);
                }

                if (options.termination == TERM_PREC || term_iteration == 1)
                {
                    residuum = oldmatrix[i][j] - star;
                    residuum = (residuum < 0) ? -residuum : residuum;
                    maxresiduum = (residuum < maxresiduum) ? maxresiduum : residuum;
                }

                newmatrix[i][j] = star;
            }

            // last line
            i = (actuallines-2);
            fpisin_i = 0.0;
            if (options.inf_func == FUNC_FPISIN)
            {
                fpisin_i = fpisin * sin(pih * (double)(i+startline-1));
            }
            // over all columns
            for (int j = 1; j < N; j++)
            {
                star = 0.25 * (oldmatrix[i-1][j] + oldmatrix[i][j-1] + oldmatrix[i][j+1] + oldmatrix[i+1][j]);

                if (options.inf_func == FUNC_FPISIN)
                {
                    star += fpisin_i * sin(pih * (double)j);
                }

                if (options.termination == TERM_PREC || term_iteration == 1)
                {
                    residuum = oldmatrix[i][j] - star;
                    residuum = (residuum < 0) ? -residuum : residuum;
                    maxresiduum = (residuum < maxresiduum) ? maxresiduum : residuum;
                }

                newmatrix[i][j] = star;
            }

        }
        else {
            // over all rows
            for (int i = 1; i < actuallines-1; i++)
            {
                double fpisin_i = 0.0;

                if (options.inf_func == FUNC_FPISIN)
                {
                    fpisin_i = fpisin * sin(pih * (double)(i+startline-1));
                }

                // over all columns
                for (int j = 1; j < N; j++)
                {
                    star = 0.25 * (oldmatrix[i-1][j] + oldmatrix[i][j-1] + oldmatrix[i][j+1] + oldmatrix[i+1][j]);

                    if (options.inf_func == FUNC_FPISIN)
                    {
                        star += fpisin_i * sin(pih * (double)j);
                    }

                    if (options.termination == TERM_PREC || term_iteration == 1)
                    {
                        residuum = oldmatrix[i][j] - star;
                        residuum = (residuum < 0) ? -residuum : residuum;
                        //printf("%15.14f\n", residuum);
                        maxresiduum = (residuum < maxresiduum) ? maxresiduum : residuum;
                    }

                    newmatrix[i][j] = star;
                }
            }
        }

        /* check for stopping calculation, depending on termination method */
        if (options.termination == TERM_PREC)
        {
            maxresiduum_temp = maxresiduum;
            MPI_Allreduce(&maxresiduum_temp, &maxresiduum, 1, MPI_DOUBLE, MPI_MAX, MPI_COMM_WORLD);
            if (maxresiduum < options.term_precision)
            {
                term_iteration = 0;
            }
        }
        else if (options.termination == TERM_ITER)
        {
            term_iteration--;
        }

        iteration++;
    }
}
Exemple #22
0
void xchange_2fields(spinor * const l, spinor * const k, const int ieo) {

    MPI_Request requests[32];
    MPI_Status status[32];
    int reqcount = 0;
#if defined PARALLELXYZT
    int ix=0;
#endif

#ifdef _KOJAK_INST
#pragma pomp inst begin(xchange2fields)
#endif

#  ifdef MPI

#  if (defined BGL && defined XLC)
#    ifdef PARALLELXYZT
    __alignx(16, field_buffer_z);
    __alignx(16, field_buffer_z2);
    __alignx(16, field_buffer_z3);
    __alignx(16, field_buffer_z4);
#    endif
    __alignx(16, l);
#  endif

    /* send the data to the neighbour on the left */
    /* recieve the data from the neighbour on the right */
    MPI_Isend((void*)l, 1, field_time_slice_cont, g_nb_t_dn, 81, g_cart_grid, &requests[reqcount]);
    MPI_Irecv((void*)(l+T*LX*LY*LZ/2), 1, field_time_slice_cont, g_nb_t_up, 81, g_cart_grid, &requests[reqcount+1]);
    reqcount=reqcount+2;

    /* send the data to the neighbour on the right */
    /* recieve the data from the neighbour on the left */
    MPI_Isend((void*)(l+(T-1)*LX*LY*LZ/2), 1, field_time_slice_cont, g_nb_t_up, 82, g_cart_grid, &requests[reqcount]);
    MPI_Irecv((void*)(l+(T+1)*LX*LY*LZ/2), 1, field_time_slice_cont, g_nb_t_dn, 82, g_cart_grid, &requests[reqcount+1]);
    reqcount=reqcount+2;

    /* send the data to the neighbour on the left */
    /* recieve the data from the neighbour on the right */
    MPI_Isend((void*)k, 1, field_time_slice_cont, g_nb_t_dn, 83, g_cart_grid, &requests[reqcount]);
    MPI_Irecv((void*)(k+T*LX*LY*LZ/2), 1, field_time_slice_cont, g_nb_t_up, 83, g_cart_grid, &requests[reqcount+1]);
    reqcount=reqcount+2;

    /* send the data to the neighbour on the right */
    /* recieve the data from the neighbour on the left */
    MPI_Isend((void*)(k+(T-1)*LX*LY*LZ/2), 1, field_time_slice_cont, g_nb_t_up, 84, g_cart_grid, &requests[reqcount]);
    MPI_Irecv((void*)(k+(T+1)*LX*LY*LZ/2), 1, field_time_slice_cont, g_nb_t_dn, 84, g_cart_grid, &requests[reqcount+1]);
    reqcount=reqcount+2;


#    if (defined PARALLELXT || defined PARALLELXYT || defined PARALLELXYZT)
    /* send the data to the neighbour on the left in x direction */
    /* recieve the data from the neighbour on the right in x direction */
    MPI_Isend((void*)l, 1, field_x_slice_gath, g_nb_x_dn, 91, g_cart_grid,  &requests[reqcount]);
    MPI_Irecv((void*)(l+(T+2)*LX*LY*LZ/2), 1, field_x_slice_cont, g_nb_x_up, 91, g_cart_grid, &requests[reqcount+1]);
    reqcount=reqcount+2;

    /* send the data to the neighbour on the right in x direction */
    /* recieve the data from the neighbour on the left in x direction */
    MPI_Isend((void*)(l+(LX-1)*LY*LZ/2), 1, field_x_slice_gath, g_nb_x_up, 92, g_cart_grid, &requests[reqcount]);
    MPI_Irecv((void*)(l+((T+2)*LX*LY*LZ + T*LY*LZ)/2), 1, field_x_slice_cont, g_nb_x_dn, 92, g_cart_grid, &requests[reqcount+1]);
    reqcount=reqcount+2;

    /* send the data to the neighbour on the left in x direction */
    /* recieve the data from the neighbour on the right in x direction */
    MPI_Isend((void*)k, 1, field_x_slice_gath, g_nb_x_dn, 93, g_cart_grid,  &requests[reqcount]);
    MPI_Irecv((void*)(k+(T+2)*LX*LY*LZ/2), 1, field_x_slice_cont, g_nb_x_up, 93, g_cart_grid, &requests[reqcount+1]);
    reqcount=reqcount+2;

    /* send the data to the neighbour on the right in x direction */
    /* recieve the data from the neighbour on the left in x direction */
    MPI_Isend((void*)(k+(LX-1)*LY*LZ/2), 1, field_x_slice_gath, g_nb_x_up, 94, g_cart_grid, &requests[reqcount]);
    MPI_Irecv((void*)(k+((T+2)*LX*LY*LZ + T*LY*LZ)/2), 1, field_x_slice_cont, g_nb_x_dn, 94, g_cart_grid, &requests[reqcount+1]);
    reqcount=reqcount+2;
#    endif

#    if (defined PARALLELXYT || defined PARALLELXYZT)
    /* send the data to the neighbour on the left in y direction */
    /* recieve the data from the neighbour on the right in y direction */
    MPI_Isend((void*)l, 1, field_y_slice_gath, g_nb_y_dn, 101, g_cart_grid, &requests[reqcount]);
    MPI_Irecv((void*)(l+((T+2)*LX*LY*LZ + 2*T*LY*LZ)/2), 1, field_y_slice_cont, g_nb_y_up, 101, g_cart_grid, &requests[reqcount+1]);
    reqcount=reqcount+2;

    /* send the data to the neighbour on the right in y direction */
    /* recieve the data from the neighbour on the left in y direction */
    MPI_Isend((void*)(l+(LY-1)*LZ/2), 1, field_y_slice_gath, g_nb_y_up, 102, g_cart_grid, &requests[reqcount]);
    MPI_Irecv((void*)(l+((T+2)*LX*LY*LZ + 2*T*LY*LZ + T*LX*LZ)/2), 1, field_y_slice_cont, g_nb_y_dn, 102, g_cart_grid, &requests[reqcount+1]);
    reqcount=reqcount+2;

    /* send the data to the neighbour on the left in y direction */
    /* recieve the data from the neighbour on the right in y direction */
    MPI_Isend((void*)k, 1, field_y_slice_gath, g_nb_y_dn, 103, g_cart_grid, &requests[reqcount]);
    MPI_Irecv((void*)(k+((T+2)*LX*LY*LZ + 2*T*LY*LZ)/2), 1, field_y_slice_cont, g_nb_y_up, 103, g_cart_grid, &requests[reqcount+1]);
    reqcount=reqcount+2;

    /* send the data to the neighbour on the right in y direction */
    /* recieve the data from the neighbour on the left in y direction */
    MPI_Isend((void*)(k+(LY-1)*LZ/2), 1, field_y_slice_gath, g_nb_y_up, 104, g_cart_grid, &requests[reqcount]);
    MPI_Irecv((void*)(k+((T+2)*LX*LY*LZ + 2*T*LY*LZ + T*LX*LZ)/2), 1, field_y_slice_cont, g_nb_y_dn, 104, g_cart_grid, &requests[reqcount+1]);
    reqcount=reqcount+2;

#    endif

#    if (defined PARALLELXYZT)
    /* fill buffer ! */
    /* This is now depending on whether the field is */
    /* even or odd */
    if(ieo == 1) {
        for(ix = 0; ix < T*LX*LY/2; ix++) {
            field_buffer_z[ix] = l[ g_field_z_ipt_even[ix] ];
        }
    }
    else {
        for(ix = 0; ix < T*LX*LY/2; ix++) {
            field_buffer_z[ix] = l[ g_field_z_ipt_odd[ix] ];
        }
    }
    if(ieo == 1) {
        for(ix = T*LX*LY/2; ix < T*LX*LY; ix++) {
            field_buffer_z2[ix-T*LX*LY/2] = l[ g_field_z_ipt_even[ix] ];
        }
    }
    else {
        for(ix = T*LX*LY/2; ix < T*LX*LY; ix++) {
            field_buffer_z2[ix-T*LX*LY/2] = l[ g_field_z_ipt_odd[ix] ];
        }
    }
    /* send the data to the neighbour on the left in z direction */
    /* recieve the data from the neighbour on the right in z direction */
    MPI_Isend((void*)field_buffer_z, 12*T*LX*LY, MPI_DOUBLE, g_nb_z_dn, 503, g_cart_grid, &requests[reqcount]);
    MPI_Irecv((void*)(l+(VOLUME/2 + LX*LY*LZ + T*LY*LZ +T*LX*LZ)), 12*T*LX*LY, MPI_DOUBLE, g_nb_z_up, 503, g_cart_grid, &requests[reqcount+1]);
    reqcount=reqcount+2;

    /* send the data to the neighbour on the right in y direction */
    /* recieve the data from the neighbour on the left in y direction */
    MPI_Isend((void*)field_buffer_z2, 12*T*LX*LY, MPI_DOUBLE, g_nb_z_up, 504, g_cart_grid, &requests[reqcount]);
    MPI_Irecv((void*)(l+(VOLUME + 2*LX*LY*LZ + 2*T*LY*LZ + 2*T*LX*LZ + T*LX*LY)/2), 12*T*LX*LY, MPI_DOUBLE, g_nb_z_dn, 504, g_cart_grid, &requests[reqcount+1]);
    reqcount=reqcount+2;

    /* fill buffer ! */
    /* This is now depending on whether the field is */
    /* even or odd */
    if(ieo == 0) {
        for(ix = 0; ix < T*LX*LY/2; ix++) {
            field_buffer_z3[ix] = k[ g_field_z_ipt_even[ix] ];
        }
    }
    else {
        for(ix = 0; ix < T*LX*LY/2; ix++) {
            field_buffer_z3[ix] = k[ g_field_z_ipt_odd[ix] ];
        }
    }
    if(ieo == 0) {
        for(ix = T*LX*LY/2; ix < T*LX*LY; ix++) {
            field_buffer_z4[ix-T*LX*LY/2] = k[ g_field_z_ipt_even[ix] ];
        }
    }
    else {
        for(ix = T*LX*LY/2; ix < T*LX*LY; ix++) {
            field_buffer_z4[ix-T*LX*LY/2] = k[ g_field_z_ipt_odd[ix] ];
        }
    }
    /* send the data to the neighbour on the left in z direction */
    /* recieve the data from the neighbour on the right in z direction */
    MPI_Isend((void*)field_buffer_z3, 12*T*LX*LY, MPI_DOUBLE, g_nb_z_dn, 505, g_cart_grid, &requests[reqcount]);
    MPI_Irecv((void*)(k+(VOLUME/2 + LX*LY*LZ + T*LY*LZ +T*LX*LZ)), 12*T*LX*LY, MPI_DOUBLE, g_nb_z_up, 505, g_cart_grid, &requests[reqcount+1]);
    reqcount=reqcount+2;

    /* send the data to the neighbour on the right in y direction */
    /* recieve the data from the neighbour on the left in y direction */
    MPI_Isend((void*)field_buffer_z4, 12*T*LX*LY, MPI_DOUBLE, g_nb_z_up, 506, g_cart_grid, &requests[reqcount]);
    MPI_Irecv((void*)(k+(VOLUME + 2*LX*LY*LZ + 2*T*LY*LZ + 2*T*LX*LZ + T*LX*LY)/2), 12*T*LX*LY, MPI_DOUBLE, g_nb_z_dn, 506, g_cart_grid, &requests[reqcount+1]);
    reqcount=reqcount+2;


#    endif


    MPI_Waitall(reqcount, requests, status);
#  endif
    return;
#ifdef _KOJAK_INST
#pragma pomp inst end(xchange2fields)
#endif
}
Exemple #23
0
int main(int argc, char *argv[])
{
    int errs = 0;
    int rank, size, dest;
    MPI_Comm comm;
    MPI_Status status;
    MPI_Request req;
    static int bufsizes[4] = { 1, 100, 10000, 1000000 };
    char *buf;
#ifdef TEST_IRSEND
    int veryPicky = 0;          /* Set to 1 to test "quality of implementation" in
                                 * a tricky part of cancel */
#endif
    int cs, flag, n;

    MTest_Init(&argc, &argv);

    comm = MPI_COMM_WORLD;
    MPI_Comm_rank(comm, &rank);
    MPI_Comm_size(comm, &size);

    dest = size - 1;

    MTestPrintfMsg(1, "Starting scancel test\n");
    for (cs = 0; cs < 4; cs++) {
        if (rank == 0) {
            n = bufsizes[cs];
            buf = (char *) malloc(n);
            if (!buf) {
                fprintf(stderr, "Unable to allocate %d bytes\n", n);
                MPI_Abort(MPI_COMM_WORLD, 1);
            }
            MTestPrintfMsg(1, "(%d) About to create isend and cancel\n", cs);
            MPI_Isend(buf, n, MPI_CHAR, dest, cs + n + 1, comm, &req);
            MPI_Cancel(&req);
            MPI_Wait(&req, &status);
            MTestPrintfMsg(1, "Completed wait on isend\n");
            MPI_Test_cancelled(&status, &flag);
            if (!flag) {
                errs++;
                printf("Failed to cancel an Isend request\n");
                fflush(stdout);
            }
            else {
                n = 0;
            }
            /* Send the size, zero for successfully cancelled */
            MPI_Send(&n, 1, MPI_INT, dest, 123, comm);
            /* Send the tag so the message can be received */
            n = cs + n + 1;
            MPI_Send(&n, 1, MPI_INT, dest, 123, comm);
            free(buf);
        }
        else if (rank == dest) {
            int nn, tag;
            char *btemp;
            MPI_Recv(&nn, 1, MPI_INT, 0, 123, comm, &status);
            MPI_Recv(&tag, 1, MPI_INT, 0, 123, comm, &status);
            if (nn > 0) {
                /* If the message was not cancelled, receive it here */
                btemp = (char *) malloc(nn);
                if (!btemp) {
                    fprintf(stderr, "Unable to allocate %d bytes\n", nn);
                    MPI_Abort(MPI_COMM_WORLD, 1);
                }
                MPI_Recv(btemp, nn, MPI_CHAR, 0, tag, comm, &status);
                free(btemp);
            }
        }
        MPI_Barrier(comm);
#ifdef TEST_IRSEND
        if (rank == 0) {
            char *bsendbuf;
            int bsendbufsize;
            int bf, bs;
            n = bufsizes[cs];
            buf = (char *) malloc(n);
            if (!buf) {
                fprintf(stderr, "Unable to allocate %d bytes\n", n);
                MPI_Abort(MPI_COMM_WORLD, 1);
            }
            bsendbufsize = n + MPI_BSEND_OVERHEAD;
            bsendbuf = (char *) malloc(bsendbufsize);
            if (!bsendbuf) {
                fprintf(stderr, "Unable to allocate %d bytes for bsend\n", n);
                MPI_Abort(MPI_COMM_WORLD, 1);
            }
            MPI_Buffer_attach(bsendbuf, bsendbufsize);
            MTestPrintfMsg(1, "About to create and cancel ibsend\n");
            MPI_Ibsend(buf, n, MPI_CHAR, dest, cs + n + 2, comm, &req);
            MPI_Cancel(&req);
            MPI_Wait(&req, &status);
            MPI_Test_cancelled(&status, &flag);
            if (!flag) {
                errs++;
                printf("Failed to cancel an Ibsend request\n");
                fflush(stdout);
            }
            else {
                n = 0;
            }
            /* Send the size, zero for successfully cancelled */
            MPI_Send(&n, 1, MPI_INT, dest, 123, comm);
            /* Send the tag so the message can be received */
            n = cs + n + 2;
            MPI_Send(&n, 1, MPI_INT, dest, 123, comm);
            free(buf);
            MPI_Buffer_detach(&bf, &bs);
            free(bsendbuf);
        }
        else if (rank == dest) {
            int nn, tag;
            char *btemp;
            MPI_Recv(&nn, 1, MPI_INT, 0, 123, comm, &status);
            MPI_Recv(&tag, 1, MPI_INT, 0, 123, comm, &status);
            if (nn > 0) {
                /* If the message was not cancelled, receive it here */
                btemp = (char *) malloc(nn);
                if (!btemp) {
                    fprintf(stderr, "Unable to allocate %d bytes\n", nn);
                    MPI_Abort(MPI_COMM_WORLD, 1);
                }
                MPI_Recv(btemp, nn, MPI_CHAR, 0, tag, comm, &status);
                free(btemp);
            }
        }
        MPI_Barrier(comm);

        /* Because this test is erroneous, we do not perform it unless
         * TEST_IRSEND is defined.  */

        /* We avoid ready send to self because an implementation
         * is free to detect the error in delivering a message to
         * itself without a pending receive; we could also check
         * for an error return from the MPI_Irsend */
        if (rank == 0 && dest != rank) {
            n = bufsizes[cs];
            buf = (char *) malloc(n);
            if (!buf) {
                fprintf(stderr, "Unable to allocate %d bytes\n", n);
                MPI_Abort(MPI_COMM_WORLD, 1);
            }
            MTestPrintfMsg(1, "About to create and cancel irsend\n");
            MPI_Irsend(buf, n, MPI_CHAR, dest, cs + n + 3, comm, &req);
            MPI_Cancel(&req);
            MPI_Wait(&req, &status);
            MPI_Test_cancelled(&status, &flag);
            /* This can be pretty ugly.  The standard is clear (Section 3.8)
             * that either a sent message is received or the
             * sent message is successfully cancelled.  Since this message
             * can never be received, the cancel must complete
             * successfully.
             *
             * However, since there is no matching receive, this
             * program is erroneous.  In this case, we can't really
             * flag this as an error */
            if (!flag && veryPicky) {
                errs++;
                printf("Failed to cancel an Irsend request\n");
                fflush(stdout);
            }
            if (flag) {
                n = 0;
            }
            /* Send the size, zero for successfully cancelled */
            MPI_Send(&n, 1, MPI_INT, dest, 123, comm);
            /* Send the tag so the message can be received */
            n = cs + n + 3;
            MPI_Send(&n, 1, MPI_INT, dest, 123, comm);
            free(buf);
        }
        else if (rank == dest) {
            int n, tag;
            char *btemp;
            MPI_Recv(&n, 1, MPI_INT, 0, 123, comm, &status);
            MPI_Recv(&tag, 1, MPI_INT, 0, 123, comm, &status);
            if (n > 0) {
                /* If the message was not cancelled, receive it here */
                btemp = (char *) malloc(n);
                if (!btemp) {
                    fprintf(stderr, "Unable to allocate %d bytes\n", n);
                    MPI_Abort(MPI_COMM_WORLD, 1);
                }
                MPI_Recv(btemp, n, MPI_CHAR, 0, tag, comm, &status);
                free(btemp);
            }
        }
        MPI_Barrier(comm);
#endif

        if (rank == 0) {
            n = bufsizes[cs];
            buf = (char *) malloc(n);
            if (!buf) {
                fprintf(stderr, "Unable to allocate %d bytes\n", n);
                MPI_Abort(MPI_COMM_WORLD, 1);
            }
            MTestPrintfMsg(1, "About to create and cancel issend\n");
            MPI_Issend(buf, n, MPI_CHAR, dest, cs + n + 4, comm, &req);
            MPI_Cancel(&req);
            MPI_Wait(&req, &status);
            MPI_Test_cancelled(&status, &flag);
            if (!flag) {
                errs++;
                printf("Failed to cancel an Issend request\n");
                fflush(stdout);
            }
            else {
                n = 0;
            }
            /* Send the size, zero for successfully cancelled */
            MPI_Send(&n, 1, MPI_INT, dest, 123, comm);
            /* Send the tag so the message can be received */
            n = cs + n + 4;
            MPI_Send(&n, 1, MPI_INT, dest, 123, comm);
            free(buf);
        }
        else if (rank == dest) {
            int nn, tag;
            char *btemp;
            MPI_Recv(&nn, 1, MPI_INT, 0, 123, comm, &status);
            MPI_Recv(&tag, 1, MPI_INT, 0, 123, comm, &status);
            if (nn > 0) {
                /* If the message was not cancelled, receive it here */
                btemp = (char *) malloc(nn);
                if (!btemp) {
                    fprintf(stderr, "Unable to allocate %d bytes\n", nn);
                    MPI_Abort(MPI_COMM_WORLD, 1);
                }
                MPI_Recv(btemp, nn, MPI_CHAR, 0, tag, comm, &status);
                free(btemp);
            }
        }
        MPI_Barrier(comm);
    }

    MTest_Finalize(errs);
    MPI_Finalize();
    return 0;
}
Exemple #24
0
void xchange_2fields(spinor * const l, spinor * const k, const int ieo) {

#ifdef MPI
    MPI_Request requests[32];
    MPI_Status status[32];
#endif
    int reqcount = 0;
#if defined PARALLELXYZT
    int ix=0;
#endif

#ifdef _KOJAK_INST
#pragma pomp inst begin(xchange2fields)
#endif

#  ifdef MPI

#  if (defined BGL && defined XLC)
    __alignx(16, l);
#  endif

#    if (defined PARALLELT || defined PARALLELXT || defined PARALLELXYT || defined PARALLELXYZT )
    /* send the data to the neighbour on the left */
    /* recieve the data from the neighbour on the right */
    MPI_Isend((void*)(l+g_1st_t_int_dn), 1, field_time_slice_cont, g_nb_t_dn, 81, g_cart_grid, &requests[reqcount]);
    MPI_Irecv((void*)(l+g_1st_t_ext_up), 1, field_time_slice_cont, g_nb_t_up, 81, g_cart_grid, &requests[reqcount+1]);
    reqcount=reqcount+2;

    /* send the data to the neighbour on the right */
    /* recieve the data from the neighbour on the left */
    MPI_Isend((void*)(l+g_1st_t_int_up), 1, field_time_slice_cont, g_nb_t_up, 82, g_cart_grid, &requests[reqcount]);
    MPI_Irecv((void*)(l+g_1st_t_ext_dn), 1, field_time_slice_cont, g_nb_t_dn, 82, g_cart_grid, &requests[reqcount+1]);
    reqcount=reqcount+2;

    /* send the data to the neighbour on the left */
    /* recieve the data from the neighbour on the right */
    MPI_Isend((void*)(k+g_1st_t_int_dn), 1, field_time_slice_cont, g_nb_t_dn, 83, g_cart_grid, &requests[reqcount]);
    MPI_Irecv((void*)(k+g_1st_t_ext_up), 1, field_time_slice_cont, g_nb_t_up, 83, g_cart_grid, &requests[reqcount+1]);
    reqcount=reqcount+2;

    /* send the data to the neighbour on the right */
    /* recieve the data from the neighbour on the left */
    MPI_Isend((void*)(k+g_1st_t_int_up), 1, field_time_slice_cont, g_nb_t_up, 84, g_cart_grid, &requests[reqcount]);
    MPI_Irecv((void*)(k+g_1st_t_int_dn), 1, field_time_slice_cont, g_nb_t_dn, 84, g_cart_grid, &requests[reqcount+1]);
    reqcount=reqcount+2;
#    endif

#    if (defined PARALLELXT || defined PARALLELXYT || defined PARALLELXYZT || defined PARALLELX || defined PARALLELXY || defined PARALLELXYZ )
    /* send the data to the neighbour on the left in x direction */
    /* recieve the data from the neighbour on the right in x direction */
    MPI_Isend((void*)(l+g_1st_x_int_dn), 1, field_x_slice_gath, g_nb_x_dn, 91, g_cart_grid,  &requests[reqcount]);
    MPI_Irecv((void*)(l+g_1st_x_ext_up), 1, field_x_slice_cont, g_nb_x_up, 91, g_cart_grid, &requests[reqcount+1]);
    reqcount=reqcount+2;

    /* send the data to the neighbour on the right in x direction */
    /* recieve the data from the neighbour on the left in x direction */
    MPI_Isend((void*)(l+g_1st_x_int_up), 1, field_x_slice_gath, g_nb_x_up, 92, g_cart_grid, &requests[reqcount]);
    MPI_Irecv((void*)(l+g_1st_x_ext_dn), 1, field_x_slice_cont, g_nb_x_dn, 92, g_cart_grid, &requests[reqcount+1]);
    reqcount=reqcount+2;

    /* send the data to the neighbour on the left in x direction */
    /* recieve the data from the neighbour on the right in x direction */
    MPI_Isend((void*)(k+g_1st_x_int_dn), 1, field_x_slice_gath, g_nb_x_dn, 93, g_cart_grid,  &requests[reqcount]);
    MPI_Irecv((void*)(k+g_1st_x_ext_up), 1, field_x_slice_cont, g_nb_x_up, 93, g_cart_grid, &requests[reqcount+1]);
    reqcount=reqcount+2;

    /* send the data to the neighbour on the right in x direction */
    /* recieve the data from the neighbour on the left in x direction */
    MPI_Isend((void*)(k+g_1st_x_int_up), 1, field_x_slice_gath, g_nb_x_up, 94, g_cart_grid, &requests[reqcount]);
    MPI_Irecv((void*)(k+g_1st_x_ext_dn), 1, field_x_slice_cont, g_nb_x_dn, 94, g_cart_grid, &requests[reqcount+1]);
    reqcount=reqcount+2;
#    endif

#    if (defined PARALLELXYT || defined PARALLELXYZT || defined PARALLELXY || defined PARALLELXYZ )
    /* send the data to the neighbour on the left in y direction */
    /* recieve the data from the neighbour on the right in y direction */
    MPI_Isend((void*)(l+g_1st_y_int_dn), 1, field_y_slice_gath, g_nb_y_dn, 101, g_cart_grid, &requests[reqcount]);
    MPI_Irecv((void*)(l+g_1st_y_ext_up), 1, field_y_slice_cont, g_nb_y_up, 101, g_cart_grid, &requests[reqcount+1]);
    reqcount=reqcount+2;

    /* send the data to the neighbour on the right in y direction */
    /* recieve the data from the neighbour on the left in y direction */
    MPI_Isend((void*)(l+g_1st_y_int_up), 1, field_y_slice_gath, g_nb_y_up, 102, g_cart_grid, &requests[reqcount]);
    MPI_Irecv((void*)(l+g_1st_y_ext_dn), 1, field_y_slice_cont, g_nb_y_dn, 102, g_cart_grid, &requests[reqcount+1]);
    reqcount=reqcount+2;

    /* send the data to the neighbour on the left in y direction */
    /* recieve the data from the neighbour on the right in y direction */
    MPI_Isend((void*)(k+g_1st_y_int_dn), 1, field_y_slice_gath, g_nb_y_dn, 103, g_cart_grid, &requests[reqcount]);
    MPI_Irecv((void*)(k+g_1st_y_ext_up), 1, field_y_slice_cont, g_nb_y_up, 103, g_cart_grid, &requests[reqcount+1]);
    reqcount=reqcount+2;

    /* send the data to the neighbour on the right in y direction */
    /* recieve the data from the neighbour on the left in y direction */
    MPI_Isend((void*)(k+g_1st_y_int_up), 1, field_y_slice_gath, g_nb_y_up, 104, g_cart_grid, &requests[reqcount]);
    MPI_Irecv((void*)(k+g_1st_y_ext_dn), 1, field_y_slice_cont, g_nb_y_dn, 104, g_cart_grid, &requests[reqcount+1]);
    reqcount=reqcount+2;

#    endif

#    if (defined PARALLELXYZ || defined PARALLELXYZT)
    /* send the data to the neighbour on the left in z direction */
    /* recieve the data from the neighbour on the right in z direction */
    if(ieo == 1) {
        MPI_Isend((void*)(l+g_1st_z_int_dn),1,field_z_slice_even_dn,g_nb_z_dn,503,g_cart_grid,&requests[reqcount]);
        MPI_Irecv((void*)(l+g_1st_z_ext_up),1,field_z_slice_cont,g_nb_z_up,503,g_cart_grid,&requests[reqcount+1]);
        reqcount=reqcount+2;
    } else {
        MPI_Isend((void*)(l+g_1st_z_int_dn),1,field_z_slice_odd_dn,g_nb_z_dn,503,g_cart_grid,&requests[reqcount]);
        MPI_Irecv((void*)(l+g_1st_z_ext_up),1,field_z_slice_cont,g_nb_z_up,503,g_cart_grid,&requests[reqcount+1]);
        reqcount=reqcount+2;
    }
    if(ieo == 1) {
        MPI_Isend((void*)(k+g_1st_z_int_dn),1,field_z_slice_even_dn,g_nb_z_dn,505,g_cart_grid,&requests[reqcount]);
        MPI_Irecv((void*)(k+g_1st_z_ext_up),1,field_z_slice_cont,g_nb_z_up,505,g_cart_grid,&requests[reqcount+1]);
        reqcount=reqcount+2;
    } else {
        MPI_Isend((void*)(k+g_1st_z_int_dn),1,field_z_slice_odd_dn,g_nb_z_dn,505,g_cart_grid,&requests[reqcount]);
        MPI_Irecv((void*)(k+g_1st_z_ext_up),1,field_z_slice_cont,g_nb_z_up,505,g_cart_grid,&requests[reqcount+1]);
        reqcount=reqcount+2;
    }

    /* send the data to the neighbour on the right in z direction */
    /* recieve the data from the neighbour on the left in z direction */
    if(ieo == 1) {
        MPI_Isend((void*)(l+g_1st_z_int_up),1,field_z_slice_even_up,g_nb_z_up,504,g_cart_grid,&requests[reqcount]);
        MPI_Irecv((void*)(l+g_1st_z_ext_dn),1,field_z_slice_cont,g_nb_z_dn,504,g_cart_grid,&requests[reqcount+1]);
        reqcount=reqcount+2;
    } else {
        MPI_Isend((void*)(l+g_1st_z_int_up),1,field_z_slice_odd_up,g_nb_z_up,504,g_cart_grid,&requests[reqcount]);
        MPI_Irecv((void*)(l+g_1st_z_ext_dn),1,field_z_slice_cont,g_nb_z_dn,504,g_cart_grid,&requests[reqcount+1]);
        reqcount=reqcount+2;
    }
    if(ieo == 1) {
        MPI_Isend((void*)(k+g_1st_z_int_up),1,field_z_slice_even_up,g_nb_z_up,506,g_cart_grid,&requests[reqcount]);
        MPI_Irecv((void*)(k+g_1st_z_ext_dn),1,field_z_slice_cont,g_nb_z_dn,506,g_cart_grid,&requests[reqcount+1]);
        reqcount=reqcount+2;
    } else {
        MPI_Isend((void*)(k+g_1st_z_int_up),1,field_z_slice_odd_up,g_nb_z_up,506,g_cart_grid,&requests[reqcount]);
        MPI_Irecv((void*)(k+g_1st_z_ext_dn),1,field_z_slice_cont,g_nb_z_dn,506,g_cart_grid,&requests[reqcount+1]);
        reqcount=reqcount+2;
    }

#    endif


    MPI_Waitall(reqcount, requests, status);
#  endif
    return;
#ifdef _KOJAK_INST
#pragma pomp inst end(xchange2fields)
#endif
}
Exemple #25
0
static void ADIOI_Fill_send_buffer(ADIO_File fd, void *buf, ADIOI_Flatlist_node
                           *flat_buf, char **send_buf, ADIO_Offset 
                           *offset_list, ADIO_Offset *len_list, int *send_size, 
                           MPI_Request *requests, int *sent_to_proc, 
                           int nprocs, int myrank, 
                           int contig_access_count, 
                           ADIO_Offset min_st_offset, ADIO_Offset fd_size,
                           ADIO_Offset *fd_start, ADIO_Offset *fd_end, 
                           int *send_buf_idx, int *curr_to_proc, 
                           int *done_to_proc, int iter,
                           MPI_Aint buftype_extent)
{
/* this function is only called if buftype is not contig */

    int i, p, flat_buf_idx;
    ADIO_Offset flat_buf_sz, size_in_buf, buf_incr, size;
    int jj, n_buftypes;
    ADIO_Offset off, len, rem_len, user_buf_idx;

/*  curr_to_proc[p] = amount of data sent to proc. p that has already
    been accounted for so far
    done_to_proc[p] = amount of data already sent to proc. p in 
    previous iterations
    user_buf_idx = current location in user buffer 
    send_buf_idx[p] = current location in send_buf of proc. p  */

    for (i=0; i < nprocs; i++) {
	send_buf_idx[i] = curr_to_proc[i] = 0;
	done_to_proc[i] = sent_to_proc[i];
    }
    jj = 0;

    user_buf_idx = flat_buf->indices[0];
    flat_buf_idx = 0;
    n_buftypes = 0;
    flat_buf_sz = flat_buf->blocklens[0];

    /* flat_buf_idx = current index into flattened buftype
       flat_buf_sz = size of current contiguous component in 
	                 flattened buf */

    for (i=0; i<contig_access_count; i++) { 
	off     = offset_list[i];
	rem_len = len_list[i];

	/*this request may span the file domains of more than one process*/
	while (rem_len != 0) {
	    len = rem_len;
	    /* NOTE: len value is modified by ADIOI_Calc_aggregator() to be no
	     * longer than the single region that processor "p" is responsible
	     * for.
	     */
	    p = ADIOI_Calc_aggregator(fd,
				      off,
				      min_st_offset,
				      &len,
				      fd_size,
				      fd_start,
				      fd_end);

	    if (send_buf_idx[p] < send_size[p]) {
		if (curr_to_proc[p]+len > done_to_proc[p]) {
		    if (done_to_proc[p] > curr_to_proc[p]) {
			size = ADIOI_MIN(curr_to_proc[p] + len - 
                                done_to_proc[p], send_size[p]-send_buf_idx[p]);
			buf_incr = done_to_proc[p] - curr_to_proc[p];
			ADIOI_BUF_INCR
      ADIOI_Assert((curr_to_proc[p] + len - done_to_proc[p]) == (unsigned)(curr_to_proc[p] + len - done_to_proc[p]));
		        buf_incr = curr_to_proc[p] + len - done_to_proc[p];
      ADIOI_Assert((done_to_proc[p] + size) == (unsigned)(done_to_proc[p] + size));
			/* ok to cast: bounded by cb buffer size */
			curr_to_proc[p] = done_to_proc[p] + (int)size;
		        ADIOI_BUF_COPY
		    }
		    else {
			size = ADIOI_MIN(len,send_size[p]-send_buf_idx[p]);
			buf_incr = len;
      ADIOI_Assert((curr_to_proc[p] + size) == (unsigned)((ADIO_Offset)curr_to_proc[p] + size));
			curr_to_proc[p] += size;
			ADIOI_BUF_COPY
		    }
		    if (send_buf_idx[p] == send_size[p]) {
			MPI_Isend(send_buf[p], send_size[p], MPI_BYTE, p, 
				myrank+p+100*iter, fd->comm, requests+jj);
			jj++;
		    }
		}
		else {
        ADIOI_Assert((curr_to_proc[p] + len) == (unsigned)((ADIO_Offset)curr_to_proc[p] + len));
		    curr_to_proc[p] += len;
		    buf_incr = len;
		    ADIOI_BUF_INCR
		}
	    }
Exemple #26
0
void NEKTAR_MEX::MEX_post_send(){
  for (int i = 0; i < Npartners; ++i)
    MPI_Isend(send_buffer[i],message_size[i],MPI_DOUBLE,partner_list[i],my_rank+2999,comm,&request_send[i]);
}
int main(int argc, char * argv[])
{
   double *sbuf, *rbuf;
   int iter, maxiter, repeats[NCOUNTS];
   int count[NCOUNTS];
   int nc, nbytes;
   int taskid, ntasks;
   int itag = 99;
   int offset1,offset2;
   double etime;
   double latency, bw;

   sbuf = (double*) malloc(MAXPTS*sizeof(double));
   rbuf = (double*) malloc(MAXPTS*sizeof(double));

   MPI_Status mpi_status[2];
   MPI_Request mpi_request[2];

   /*----------------------------------------------*/
   /* define an array of counts for 8-unsigned char objects */
   /*----------------------------------------------*/
   count[0] = 0;
   count[1] = 1;
   count[2] = 4;
   count[3] = 12;
   count[4] = 40;
   count[5] = 125;
   count[6] = 400;
   count[7] = 1250;
   count[8] = 4000;
   count[9] = 12500;
   count[10] = 40000;
   count[11] = 125000;

   repeats[0] = 100;
   repeats[1] = 100;
   repeats[2] = 100;
   repeats[3] = 100;
   repeats[4] = 100;
   repeats[5] = 100;
   repeats[6] = 100;
   repeats[7] = 100;
   repeats[8] = 100;
   repeats[9] = 50;
   repeats[10] = 25;
   repeats[11] = 10;

   /*-----------------------------------------------------------*/
   /* set-up the parallel environment: assign ntasks and taskid */
   /*-----------------------------------------------------------*/
   MPI_Init(&argc, &argv);
   MPI_Comm_size(MPI_COMM_WORLD, &ntasks);
   MPI_Comm_rank(MPI_COMM_WORLD, &taskid);
   if ((ntasks % 2) != 0 && taskid == 0)
   {
      fprintf(stdout,"You must specify an even number of MPI tasks.\n");
      exit(-1);
   }

   offset1=(NODESIZE-2);
   offset2=offset1+NODESIZE;
   if ((taskid>=2) && (taskid<NODESIZE))
     {
       for (nc=0; nc<NCOUNTS; nc++)
	 MPI_Barrier(MPI_COMM_WORLD); /* synchronize here */
       MPI_Finalize();
       return(0);
     }
   if ((taskid>=(NODESIZE+2)) && (taskid<(2*NODESIZE)))
     {
       for (nc=0; nc<NCOUNTS; nc++)
	 MPI_Barrier(MPI_COMM_WORLD); /* synchronize here */
       MPI_Finalize();
       return(0);
     }
   if ((taskid>=NODESIZE) && (taskid<(NODESIZE+2)))
     taskid-=offset1;
     
   /*----------------------------------------------------*/
   /* send/recv to ensure that the routines are loaded   */
   /*----------------------------------------------------*/
   nc = 1;
   if ((taskid < 2))
      MPI_Isend(sbuf, count[nc], MPI_DOUBLE, (taskid+NODESIZE)%(2*NODESIZE), 
                itag, MPI_COMM_WORLD, &(mpi_request[0]));
   else
      MPI_Irecv(rbuf, count[nc], MPI_DOUBLE, (taskid+offset2)%(2*NODESIZE), 
                itag, MPI_COMM_WORLD, &(mpi_request[0]));

   if ((taskid >= 2))
      MPI_Isend(sbuf, count[nc], MPI_DOUBLE, (taskid+offset2)%(2*NODESIZE), 
                itag, MPI_COMM_WORLD, &(mpi_request[1]));
   else
      MPI_Irecv(rbuf, count[nc], MPI_DOUBLE, (taskid+NODESIZE)%(2*NODESIZE), 
                itag, MPI_COMM_WORLD, &(mpi_request[1]));
   MPI_Waitall(2,mpi_request,mpi_status);

   /*--------------------------------------------------------*/
   /* send or receive messages, and measure round-trip time. */
   /* even tasks send, odd tasks receive, then the reverse.  */
   /*--------------------------------------------------------*/
   for (nc=0; nc<NCOUNTS; nc++)
   {

      MPI_Barrier(MPI_COMM_WORLD); /* synchronize here */

      TEST_CLOCK_INIT
      maxiter = repeats[nc];
      for (iter=0; iter<maxiter; iter++)
      {
         /*--------------------------------------------*/
         /* send in one direction i->i+1               */
         /*--------------------------------------------*/
         if ((taskid <2))
             MPI_Isend(sbuf, count[nc], MPI_DOUBLE, (taskid+NODESIZE)%(2*NODESIZE), 
                       itag, MPI_COMM_WORLD, &(mpi_request[0]));
         else
             MPI_Irecv(rbuf, count[nc], MPI_DOUBLE, (taskid+offset2)%(2*NODESIZE), 
                       itag, MPI_COMM_WORLD, &(mpi_request[0]));

         /*--------------------------------------------*/
         /* send in the reverse direction i+1->i       */
         /*--------------------------------------------*/
         if ((taskid >= 2))
             MPI_Isend(sbuf, count[nc], MPI_DOUBLE, (taskid+offset2)%(2*NODESIZE), 
                       itag, MPI_COMM_WORLD, &(mpi_request[1]));
         else
             MPI_Irecv(rbuf, count[nc], MPI_DOUBLE, (taskid+NODESIZE)%(2*NODESIZE), 
                       itag, MPI_COMM_WORLD, &(mpi_request[1]));
         MPI_Waitall(2,mpi_request,mpi_status);

      }  /* end the repeat loop */
      TEST_CLOCK_STOP

      /*-----------------------------------------*/
      /* write timing data for each message size */
      /*-----------------------------------------*/
      nbytes = 8*count[nc]; /* 8 bytes per entry */
      etime = 0.5e3*(TEST_CLOCK_GET)/maxiter;
      if (taskid == 0)
      {
	  fprintf(stdout,"msglen = %8d bytes,   elapsed time = %.4lf msec\n", 
                  nbytes, etime);
      }
      if (nc == 0) latency = 1.0e3*etime;
      if (nc == (NCOUNTS-1))  bw = nbytes/(1.0e3*etime);

   }  /* end the loop over message sizes */

   /*--------------------------------------------------------*/
   /*report apporximate numbers for bandwidth and latency    */
   /*--------------------------------------------------------*/
   if (taskid == 0)
   {
       fprintf(stdout,"\nlatency = %.1lf microseconds\n", latency);
       fprintf(stdout,"bandwidth =  %.2lf MBytes/sec\n", bw);
       fprintf(stdout,"(approximate values for MPI_Isend/MPI_Irecv)\n");
   }

   MPI_Finalize();

   free(sbuf);
   free(rbuf);

   return(0);

}
Exemple #28
0
void NEKTAR_MEX::MEX_init (int *map, int n, int *AdjacentPartitions, int NAdjacentPartitions, MPI_Comm comm_in){

/*  
n   - [INPUT] integer - length of array "map", number of degrees of freedom processed in this partition
map - [INPUT] - array of integers - global IDs of degrees of fredom processed in this partition
AdjacentPartitions - [INPUT] - array of integers -  list of possible adjacent partitions
NAdjacentPartitions - [INPUT] integer - number of possible adjacent partitions
comm - [INPUT] - communicator
*/

/* at the beginning we assume that NAdjacentPartitions can be greater or equall to the 
    actuall number of partition to communicate with */



  int *partner_map_size;
  int **partners_map;
  int *shared_dof;
  int i,j,k,ii,jj,partner;

  comm = comm_in;
  MPI_Comm_rank(comm,&my_rank);

#ifdef MEX_REPORT
  static int FLAG_INIT = 0;
#endif

  
  MPI_Request *request_recv_tmp, *request_send_tmp;
  //fprintf(stderr,"my_rank = %d, NAdjacentPartitions = %d\n",my_rank,NAdjacentPartitions);

  request_recv_tmp = new MPI_Request[NAdjacentPartitions];
  request_send_tmp = new MPI_Request[NAdjacentPartitions];

  partner_map_size  = new int[NAdjacentPartitions];
  shared_dof = new int[NAdjacentPartitions];

  
  for (i = 0; i < NAdjacentPartitions; ++i)
	  MPI_Irecv(&partner_map_size[i],1,MPI_INT,AdjacentPartitions[i],AdjacentPartitions[i],comm,&request_recv_tmp[i]);

  for (i = 0; i < NAdjacentPartitions; ++i)
	  MPI_Isend(&n,1,MPI_INT,AdjacentPartitions[i],my_rank,comm,&request_send_tmp[i]);


  MPI_Waitall(NAdjacentPartitions,request_recv_tmp,MPI_STATUS_IGNORE);
  MPI_Waitall(NAdjacentPartitions,request_send_tmp,MPI_STATUS_IGNORE);

 //allocate memory for incomming messages


  partners_map  = new int*[NAdjacentPartitions];
  for (i = 0; i < NAdjacentPartitions; i++)
      partners_map[i] = new int[partner_map_size[i]];


  //get partners map
  for (i = 0; i < NAdjacentPartitions; ++i)
	  MPI_Irecv(partners_map[i],partner_map_size[i],MPI_INT,AdjacentPartitions[i],AdjacentPartitions[i],comm,&request_recv_tmp[i]);

  //send local map to partners
  for (i = 0; i < NAdjacentPartitions; ++i)
	  MPI_Isend(map,n,MPI_INT,AdjacentPartitions[i],my_rank,comm,&request_send_tmp[i]);
  
  MPI_Waitall(NAdjacentPartitions,request_recv_tmp,MPI_STATUS_IGNORE);
  MPI_Waitall(NAdjacentPartitions,request_send_tmp,MPI_STATUS_IGNORE);


  // compare local map and partners map
  
  for (partner = 0; partner < NAdjacentPartitions; ++partner){
      shared_dof[partner] = 0;
      for (i = 0; i < n; ++i){
	  for (j = 0; j < partner_map_size[partner]; ++j){
	      if (map[i] == partners_map[partner][j]){
		  shared_dof[partner]++;
		  break;
	      }
	  }
      }
  }

  
  /* calculate the number of partitions to communicate with */
  for (partner = 0, Npartners = 0; partner < NAdjacentPartitions; ++partner){
    if (shared_dof[partner] > 0) 
      Npartners++;
  }

  delete[] request_recv_tmp;
  delete[] request_send_tmp;

  request_recv = (MPI_Request *) malloc(Npartners*sizeof(MPI_Request));
  request_send = (MPI_Request *) malloc(Npartners*sizeof(MPI_Request));

  for (i = 0; i < Npartners; ++i){
    request_recv[i] = MPI_REQUEST_NULL;
    request_send[i] = MPI_REQUEST_NULL;
  }

#if (defined (__bg__) || defined (__blrts__) )
  posix_memalign((void**)&partner_list,16, Npartners*sizeof(int));   
  posix_memalign((void**)&message_size,16, Npartners*sizeof(int));   
#else
  partner_list = (int *) malloc(Npartners*sizeof(int));
  message_size = (int *) malloc(Npartners*sizeof(int));
#endif

  for (partner = 0, i = 0; partner < NAdjacentPartitions; ++partner){
     if (shared_dof[partner] > 0){
         partner_list[i] = AdjacentPartitions[partner];
         message_size[i] = shared_dof[partner];
         i++;
     }
   }

   /* 
     the "partner_list" now can be sorted with respect to topology and message size
  */
  
  int *pivot, *message_size_tmp, *partner_map_tmp;

#if (defined (__bg__) || defined (__blrts__) )
  posix_memalign((void**)&my_coord,16, 4*sizeof(int));
  posix_memalign((void**)&partners_coordinates,16, Npartners*4*sizeof(int));
  get_rank_coordinates(my_coord); 
  get_partners_coordinates(Npartners,partner_list,my_coord,partners_coordinates,comm);
  posix_memalign((void**)&pivot,16, Npartners*sizeof(int));
  posix_memalign((void**)&message_size_tmp,16, Npartners*sizeof(int));
  posix_memalign((void**)&partner_map_tmp,16, Npartners*sizeof(int));
#else
  pivot            = (int *) malloc( Npartners*sizeof(int));
  message_size_tmp = (int *) malloc( Npartners*sizeof(int)); 
  partner_map_tmp  = (int *) malloc( Npartners*sizeof(int));
#endif  


   /* initialize pivot to default values */
   for (i = 0; i < Npartners;  ++i)
       pivot[i] = i;

#if (defined (__bg__) || defined (__blrts__) )
   reorder_partner_list_2(Npartners,partners_coordinates,my_coord,partner_list,message_size,pivot);
   free(partners_coordinates); 
#else
    sort_ascending(Npartners,partner_list,message_size,pivot);
#endif


   
   MPI_Barrier(comm);
   //if (my_rank == 0)
   //    fprintf(stderr,"MEX: sort_ascending - done\n");
      

   for (i = 0; i < Npartners;  ++i)
     message_size_tmp[pivot[i]] = message_size[i];

   memcpy(message_size,message_size_tmp,Npartners*sizeof(int));
  

   /* 
   map partner_list to AdjacentPartitions 
   partner_map_tmp  will store the index of partner_list[i] in AdjacentPartitions 
   */

   for (i = 0; i < Npartners;  ++i){
     for (j = 0; j < NAdjacentPartitions;  ++j){
        if (AdjacentPartitions[j] == partner_list[i]){
            partner_map_tmp[i] = j;
            break;
        }
     }
   }
 
   free(pivot);
   free(message_size_tmp);

   MPI_Barrier(comm);
   //if (my_rank == 0)
   //    fprintf(stderr,"MEX: partners reordering - done\n");

#if (defined (__bg__) || defined (__blrts__) )
   posix_memalign((void**)&message_send_map,16, Npartners*sizeof(int*));   
   for (i = 0; i < Npartners; ++i){
     posix_memalign((void**)&message_send_map[i],16, message_size[i]*sizeof(int));   
     memset(message_send_map[i],'\0',message_size[i]*sizeof(int));
   }

   posix_memalign((void**)&message_recv_map,16, Npartners*sizeof(int*));   
   for (i = 0; i < Npartners; ++i){
     posix_memalign((void**)&message_recv_map[i],16, message_size[i]*sizeof(int));   
     memset(message_recv_map[i],'\0',message_size[i]*sizeof(int));
   }

   posix_memalign((void**)&send_buffer,16, Npartners*sizeof(double*));
   for (i = 0; i < Npartners; ++i){
     posix_memalign((void**)&send_buffer[i],16, message_size[i]*sizeof(double));
     memset(send_buffer[i],'\0',message_size[i]*sizeof(double));
   }

   posix_memalign((void**)&recv_buffer,16, Npartners*sizeof(double*));
   for (i = 0; i < Npartners; ++i){
     posix_memalign((void**)&recv_buffer[i],16, message_size[i]*sizeof(double));
     memset(recv_buffer[i],'\0',message_size[i]*sizeof(double));
   }
#else
   
   message_send_map = (int **) malloc(Npartners*sizeof(int*));
   for (i = 0; i < Npartners; ++i){
     message_send_map[i] = (int *) malloc(message_size[i]*sizeof(int));
     memset(message_send_map[i],'\0',message_size[i]*sizeof(int));
   }

   message_recv_map = (int **) malloc(Npartners*sizeof(int*));
   for (i = 0; i < Npartners; ++i){
     message_recv_map[i]  = (int *) malloc(message_size[i]*sizeof(int)); 
     memset(message_recv_map[i],'\0',message_size[i]*sizeof(int));
   }

   send_buffer = (double **) malloc(Npartners*sizeof(double*));
   for (i = 0; i < Npartners; ++i){
     send_buffer[i] = (double *) malloc(message_size[i]*sizeof(double));
     memset(send_buffer[i],'\0',message_size[i]*sizeof(double));
   }
   recv_buffer = (double **) malloc(Npartners*sizeof(double*));
   for (i = 0; i < Npartners; ++i){
     recv_buffer[i] =  (double *) malloc(message_size[i]*sizeof(double));
     memset(recv_buffer[i],'\0',message_size[i]*sizeof(double));
   }
#endif

   /* to support unsorted list of degrees of freedom 
      two maps are created 
      it is possible to check if the two maps are identical 
      so only one will be kept and pointers 
      message_send_map[k]    and    message_recv_map[k]
      will be the same - this will help to save some memory
   */

   MPI_Barrier(comm);
   //if (my_rank == 0)
   //    fprintf(stderr,"MEX: file = %s, line = %d\n",__FILE__,__LINE__);

  double map_time_start = MPI_Wtime();

#ifdef TEST_OMP
#pragma omp parallel private(partner, i, ii, j, jj)
{  
   #pragma omp for schedule(dynamic)
   for (k = 0; k < Npartners; ++k){

     partner = partner_map_tmp[k];

     for (i = 0, ii=0; i < n; ++i){
           for (j = 0; j < partner_map_size[partner]; ++j){
             if (map[i] == partners_map[partner][j]){
                 message_send_map[k][ii] = i;
                 ii++;
                 break;
             }
           }
         }
        for (j = 0, jj = 0; j < partner_map_size[partner]; ++j){
          for (i = 0; i < n; ++i){
             if (map[i] == partners_map[partner][j]){
                message_recv_map[k][jj] = i;
                jj++;
                break;
             }
          }
        }
   }
}
#else
   for (k = 0; k < Npartners; ++k){ 
     partner = partner_map_tmp[k];
        for (i = 0, ii=0; i < n; ++i){
           for (j = 0; j < partner_map_size[partner]; ++j){
             if (map[i] == partners_map[partner][j]){
                 message_send_map[k][ii] = i;
                 ii++;
                 break;
             }
           }
        }
        for (j = 0, jj = 0; j < partner_map_size[partner]; ++j){
          for (i = 0; i < n; ++i){
             if (map[i] == partners_map[partner][j]){
                message_recv_map[k][jj] = i;
                jj++;
                break;
             }
          }
        }
   }
#endif 

   MPI_Barrier(comm);
   //if (my_rank == 0)
   //    fprintf(stderr,"MEX: file = %s, line = %d map_time = %f\n",__FILE__,__LINE__,MPI_Wtime() - map_time_start);
 
   free(partner_map_tmp);

/*

   for (partner = 0, k = 0; partner < NAdjacentPartitions; ++partner){
      
     if (shared_dof[partner] == 0) continue;

     for (i = 0, ii=0; i < n; ++i){
	   for (j = 0; j < partner_map_size[partner]; ++j){
 	     if (map[i] == partners_map[partner][j]){
		 message_send_map[k][ii] = i;
		 ii++;
		 break;
	     }
	   }
	 }

        for (j = 0, jj = 0; j < partner_map_size[partner]; ++j){
          for (i = 0; i < n; ++i){
             if (map[i] == partners_map[partner][j]){
                message_recv_map[k][jj] = i;
		jj++;
                break;
	     }
	  }
	}
        k++;
   }
*/

#ifdef MEX_REPORT
 //print report to file
  FILE *pFile;
  char fname[128];
  sprintf(fname,"report_gs_init.%d.%d",FLAG_INIT,my_rank);
  pFile = fopen(fname,"w");
  for (i = 0; i < NAdjacentPartitions; ++i){
	  if (shared_dof[i] > 0)
		  fprintf(pFile,"%d  %d\n",AdjacentPartitions[i],shared_dof[i]);
  }
  fprintf(pFile,"-1 -1 \n");
  
  fprintf(pFile,"**\n");
  for (partner = 0; partner < Npartners; ++partner){
	  fprintf(pFile,"will send to partner %d array of size %d\n",partner_list[partner],message_size[partner]);
	  for (i = 0; i < message_size[partner]; ++i)
           fprintf(pFile,"%d  ", message_send_map[partner][i]);
      fprintf(pFile,"\n");
  }
  fprintf(pFile,"**\n");
  for (partner = 0; partner < Npartners; ++partner){
	  fprintf(pFile,"will recv from partner %d array of size %d\n",partner_list[partner],message_size[partner]);
	  for (i = 0; i < message_size[partner]; ++i)
           fprintf(pFile,"%d  ", message_recv_map[partner][i]);
      fprintf(pFile,"\n");
  }
  fprintf(pFile,"**\n");
  fprintf(pFile,"My d.o.f are:\n");
  for (i = 0; i < n; ++i)
    fprintf(pFile,"%d  ",map[i]);
  fprintf(pFile,"\n");


  fclose(pFile);
  FLAG_INIT++;
#endif

  delete[] shared_dof;
  delete[] partner_map_size;
  for (i = 0; i < NAdjacentPartitions; i++)
	  delete partners_map[i];
  delete[] partners_map;
}
int main(int argc, char **argv) {
    int rank, size, i;
    int root = 0;
    int hits = 0;  // index used for 'hits'
    int total = 1; // index used for 'total'
    int msg_waiting = 0;

    double results[2] = {0};

    MPI_Init(&argc, &argv);
    MPI_Comm comm = MPI_COMM_WORLD;
    MPI_Comm_rank(comm, &rank);
    MPI_Comm_size(comm, &size);
    MPI_Status status;
    MPI_Request request;

    // is root process
    if(rank == root) {
        double area;
        double total_hits = 0;
        double total_pokes = 0;

        while (1) {
            // check each slave process for results (non-blocking)
            for (i = 1; i < size; i++) {
                MPI_Iprobe(i, 0, comm, &msg_waiting, &status);
                // if slave process is sending results
                if (msg_waiting) {
                    MPI_Recv(&results, 2, MPI_DOUBLE, i, 0, comm, &status);
                    total_hits += results[hits];
                    total_pokes += results[total];
                }
            }
            if (total_pokes >= 15000000000) {
                area = (total_hits / total_pokes) * 4;
                printf("Area=%.12lf\n", area);
                // send terminating message to each slave process
                for (i = 1; i < size; i++) {
                    MPI_Isend(&area, 1, MPI_DOUBLE, i, 0, comm, &request);
                }
                break;
            }
        }
    // is slave process
    } else {
        int cpu_count = get_cpu_count();
        double shared_results[cpu_count * 2];
        double l_hits = 0;
        double l_total = 0;

        pthread_t threads[cpu_count];
        t_data thread_data[cpu_count];

        for (i = 0; i < cpu_count; i++) {
            thread_data[i].id = i;
            thread_data[i].rank = rank;
            thread_data[i].results = shared_results;
            pthread_create(&threads[i], NULL, &throw_darts, &thread_data[i]);
        }

        // periodically reads results from shared memory; sends to root process
        while(1) {
            sleep(3);
            // first checks for termination flag from root process
            MPI_Iprobe(root, 0, comm, &msg_waiting, &status);
            if (msg_waiting) {
                // terminate threads
                for (i = 0; i < cpu_count; i++) {
                    pthread_cancel(threads[i]);
                }
                break;
            } else {
                results[hits] = 0;
                results[total] = 0;
                for (i = 0; i < cpu_count; i++) {
                    results[hits] += shared_results[i * 2];
                    results[total] += shared_results[i * 2 + 1];
                }
                results[hits] -= l_hits;
                results[total] -= l_total;
                l_hits += results[hits];
                l_total += results[total];
                // send results to root process
                MPI_Isend(&results, 2, MPI_DOUBLE, root, 0, comm, &request);
            }
        }
    }

    MPI_Finalize();
    return 0;
}
Exemple #30
0
/**
 * Función main
 */
int main(int argc, char *argv[]){

	//************************************************************
	// 0. Variables
	char * matrix_name; /**< Nombre de la matrix de entrada */
	int rows;           /**< Número de filas de la matriz */
	int cols;           /**< Número de columnas */
	m_type * matrix1;   /**< Matrix con los datos */
	m_type * matrix2;   /**< Copia de la matrix para calcular nuevos valores */
	double t_begin;     /**< Tiempo de inicio del calculo */
	double t_end;       /**< Tiempo de finalización */
	m_type sum;         /**< Suma de verificación */
	int iter = 0;		/**< Iteraciones realizadas hasta llegar a un punto estable */
	int rank; /**< Identificador del proceso */
	int size; /**< Tamano del grupo */
	int temporal = 0;
	int indice = 0;
	int i1,j1,k1;
	m_type * buf_col_x;
	// Macros para acceder a las matrices
#define m1(i,j) (matrix1[(i)*(cols+2)+(j)])
#define m2(i,j) (matrix2[(i)*(cols+2)+(j)])
	//************************************************************
	
	//************************************************************
	// 1. Leer los parametros de entrada.
	if(argc != 2){
		fprintf(stderr,"USO: %s <matriz de datos>\n",argv[0]);
		exit(EXIT_FAILURE);
	}
	
	matrix_name = argv[1];
	//************************************************************
	//************************************************************
	// 0. Inicialisacion del entorno MPI
	MPI_Init(&argc, &argv);
	MPI_Comm_rank(MPI_COMM_WORLD, &rank);
	MPI_Comm_size(MPI_COMM_WORLD, &size);
	//************************************************************

	
	//************************************************************
	// 2. Cargar la matriz 	
	// Matrix de tamaño rows+2 x cols+2 (halo de tamaño 1)
	// (Los datos están en [1,rows+1][1,cols+1])
	cp_read_matrix_size(matrix_name, &rows, &cols);
	printf(" Matriz %s: %dx%d\n",matrix_name, rows, cols);
	int matrix_size = (rows + 2) 	* (cols + 2);
	matrix1 = malloc(sizeof(m_type) * (size_t) matrix_size);
	cp_read_matrix(matrix_name, matrix1, 1);
	//************************************************************

	buf_col_x = malloc(sizeof(m_type) * (size_t) (rows+2));	

		for (i1 = 1;i1 <= size;i1++){				
		indice = i1*(cols)/size;
			for (k1=temporal;k1<indice;k1++)
			{	

				for (j1 = 0; j1 < cols; j1++) {
					buf_col_x[j1] = m1(k1,j1);
				}

			 	MPI_Request request_col_x;
			 	//data_buf = calloc((size_t) (rows+2), sizeof(float));


				// Envia al proceso col_X
				MPI_Isend(buf_col_x, rows, MPI_FLOAT, i1, COL_ID,
			  		MPI_COMM_WORLD, &request_col_x);

				//2.3 Recibircepcion de los datos
				MPI_Recv(buf_col_x, rows, MPI_FLOAT, i1,
					COL_ID, MPI_COMM_WORLD, MPI_STATUS_IGNORE);

				MPI_Wait(&request_col_x, MPI_STATUS_IGNORE);
				
				temporal = indice;
			}		
			printf("hola mundo\n");
	}
	//************************************************************
	// 3. Copia de la matriz
	matrix2 = malloc(sizeof(m_type) * (size_t) matrix_size);
	//************************************************************

#ifdef SHOW_DISPLAY
	cp_display_create("Stencil", rows+2, cols+2);
	cp_display_draw_matrix(matrix1,CP_RGB(255,0,0),CP_RGB(0,255,0));
	cp_msleep(1000);
#endif

	//************************************************************
	// 4. Bucle principal
	t_begin = cp_Wtime(); 
	int i,j;
	
	// 4.1 Nos mantenemos en el bucle mientras el residuo calculado 
	//  sea mayor que el residuo objetivo
	do {
		
		resid = 0.0;

		// Iteración del algoritmo
		update(matrix1,matrix2,rows,cols, rank, size);

		// Recibir los datos de los otros procesos en una matricia
		// Actualizar la copia
			for (i=1; i<rows+1; i++) {
				for (j=1; j<cols+1; j++) {
					m1(i,j) = m2 (i,j);
				}
			}



#ifdef SHOW_DISPLAY
		cp_display_draw_matrix(matrix2,CP_RGB(255,0,0),CP_RGB(0,255,0));
		cp_msleep(50); // Para ver más despacio la evolucion del proceso
#endif

	iter++;

	} while (resid > MAX_RESID);
	//************************************************************
	
	
	//************************************************************
	// 5. Suma de verificación
	sum = check_sum(matrix1,rows,cols);
	t_end = cp_Wtime();
	//************************************************************
	
	
	//************************************************************
	// 6. Mostrar resultados
	printf(" Check sum: %f\n", sum);
	printf(" Iteraciones: %d\n", iter);
	printf(" Tiempo de ejecución: %f\n", t_end-t_begin);
	//************************************************************

#ifdef SHOW_DISPLAY	
	cp_msleep(250);
	cp_display_close();
#endif


	//************************************************************
	// 7. Liberar las matrices
	free(matrix1);
	free(matrix2);
	//************************************************************

#undef m1
#undef m2

	//************************************************************
	// 8. Finalizacion del entorno MPI
	MPI_Finalize();
	//************************************************************
	return EXIT_SUCCESS;
}