Пример #1
0
void  GatherScatterIvert(Bsystem *Bsys){

  int i,j;
  int *BLACS_PARAMS;
  double **ivert_local;
  SMatrix *SM,SM_cont;

  double start_time_GatherScatterIvert, end_time_GatherScatterIvert;
    //start_time_GatherScatterIvert_tmp, end_time_GatherScatterIvert_tmp;

  BLACS_PARAMS = Bsys->Pmat->info.lenergy.BLACS_PARAMS;
  ivert_local = Bsys->Pmat->info.lenergy.ivert_local;
  SM = Bsys->Pmat->info.lenergy.SM_local;


  /*  copy values from  SM_local  to invert_local  */
  update_inva_LOC(SM, BLACS_PARAMS, ivert_local, 'L');


  /***************************/
  /* innitialize ring pattern communication */
  /* create two send buffers - one for indeces and one for values*/
  /* communicate over all CPUs to get the "nnz" value */
  /* copy values from sparse matrix to this buffers and send them */


  int *nnz_per_rank,*temp_nnz_per_rank;
  nnz_per_rank = ivector(0,pllinfo[get_active_handle()].nprocs-1);
  izero(pllinfo[get_active_handle()].nprocs,nnz_per_rank,1);
  nnz_per_rank[pllinfo[get_active_handle()].procid] = SM[0].nz;

  temp_nnz_per_rank = ivector(0,pllinfo[get_active_handle()].nprocs-1);
  izero(pllinfo[get_active_handle()].nprocs,temp_nnz_per_rank,1);

  gisum (nnz_per_rank,pllinfo[get_active_handle()].nprocs,temp_nnz_per_rank);
  free(temp_nnz_per_rank);


  int *send_buf_Index,*send_buf_Jndex;
  double *send_buf_value;
  int index,partner_to_send_data,partner_to_receive_data;

  start_time_GatherScatterIvert = dclock();

  SM_cont.allocate_SMatrix_cont(SM[0].nz,1,1);
  memcpy(SM_cont.AA,SM[0].AA,SM[0].nz*sizeof(double));
  memcpy(SM_cont.IA,SM[0].IA,SM[0].nz*sizeof(int));
  memcpy(SM_cont.JA,SM[0].JA,SM[0].nz*sizeof(int));
  SM[0].deallocate_SMatrix();

  for (i = 1; i < pllinfo[get_active_handle()].nprocs; i++){
    send_buf_Index = ivector(0,SM_cont.nz*2-1);
    send_buf_Jndex = send_buf_Index + SM_cont.nz;
    send_buf_value = dvector(0,SM_cont.nz-1);

    memcpy (send_buf_Index,SM_cont.IA,SM_cont.nz*2*sizeof(int));
    memcpy (send_buf_value,SM_cont.AA,SM_cont.nz*sizeof(double));

    partner_to_send_data = pllinfo[get_active_handle()].procid+1;
    partner_to_receive_data =  pllinfo[get_active_handle()].procid-1;
    if (partner_to_send_data == pllinfo[get_active_handle()].nprocs)
      partner_to_send_data = 0;
    if (partner_to_receive_data == -1)
      partner_to_receive_data = pllinfo[get_active_handle()].nprocs-1;

    /* trace number of SM transfers in order to get nz value right */
    index = pllinfo[get_active_handle()].procid-i;
    if (index < 0)
      index = index+pllinfo[get_active_handle()].nprocs;

    j = SM_cont.nz; //length of massege to be sent

    SM_cont.deallocate_SMatrix_cont();
    SM_cont.allocate_SMatrix_cont(nnz_per_rank[index],1,1);

    if (pllinfo[get_active_handle()].procid == 0){
      mpi_isend (send_buf_Index,j*2,partner_to_send_data ,0);
      mpi_irecv (SM_cont.IA,SM_cont.nz*2,partner_to_receive_data,0);
    }
    else{
      mpi_irecv (SM_cont.IA,SM_cont.nz*2,partner_to_receive_data,0);
      mpi_isend (send_buf_Index,j*2,partner_to_send_data ,0);
    }

    if (pllinfo[get_active_handle()].procid == 0){
      mpi_dsend (send_buf_value,j,partner_to_send_data ,2);
      mpi_drecv (SM_cont.AA,SM_cont.nz,partner_to_receive_data,2);
    }
    else{
      mpi_drecv (SM_cont.AA,SM_cont.nz,partner_to_receive_data,2);
      mpi_dsend (send_buf_value,j,partner_to_send_data ,2);
    }


    /*  copy values from  SM_local  to invert_local  */
    update_inva_LOC(&SM_cont, BLACS_PARAMS, ivert_local, 'L');

    //if (pllinfo[get_active_handle()].procid == 0)
    //    printf(" pass No. %d completed ! \n", i);

    free(send_buf_Index);free(send_buf_value);

  }
  free(nnz_per_rank);
  SM_cont.deallocate_SMatrix_cont();

  end_time_GatherScatterIvert = dclock();
  ROOTONLY
     fprintf(stderr,"ring communication was done in : %f sec \n", end_time_GatherScatterIvert-start_time_GatherScatterIvert);


#if 0
            static int FLAG_INDEX_IVERT_M = 0;
            int itmp, jtmp;
            FILE *Fivert_local;
            char fname_ivert_local[128];
            sprintf(fname_ivert_local,"ivert_localM_%d_%d.dat",FLAG_INDEX_IVERT_M,mynode());
            Fivert_local = fopen(fname_ivert_local,"w");
            for (itmp = 0; itmp < BLACS_PARAMS[12]; itmp++){
              for (jtmp = 0; jtmp < BLACS_PARAMS[11]; jtmp++)
                fprintf(Fivert_local," %2.16f ",ivert_local[itmp][jtmp]);
              fprintf(Fivert_local," \n");
            }
            fclose(Fivert_local);


#endif

  /* compute LU decompisition and pivot vector,
     LU stored in "ivert_local", pivot stored in "ivert_ipvt  */

  start_time_GatherScatterIvert = dclock();

  blacs_pdgetrf_nektar(BLACS_PARAMS,
           Bsys->Pmat->info.lenergy.DESC_ivert,
           Bsys->Pmat->info.lenergy.ivert_ipvt,
                       ivert_local);

  end_time_GatherScatterIvert = dclock();

  ROOTONLY
     fprintf(stderr,"Parallel LU was done in : %f sec \n", end_time_GatherScatterIvert-start_time_GatherScatterIvert);



#if 1

/* invert operator  */

   start_time_GatherScatterIvert = dclock();
   blacs_pdgetri_nektar(BLACS_PARAMS,
                       Bsys->Pmat->info.lenergy.DESC_ivert,
                       Bsys->Pmat->info.lenergy.ivert_ipvt,
                       Bsys->Pmat->info.lenergy.ivert_local);

   end_time_GatherScatterIvert = dclock();

   ROOTONLY
     fprintf(stderr,"Operator Inversion was done in : %f sec \n", end_time_GatherScatterIvert-start_time_GatherScatterIvert);

#endif


#if 0
            sprintf(fname_ivert_local,"ivert_localIM_%d_%d.dat",FLAG_INDEX_IVERT_M,mynode());
            Fivert_local = fopen(fname_ivert_local,"w");
            for (itmp = 0; itmp < BLACS_PARAMS[12]; itmp++){
              for (jtmp = 0; jtmp < BLACS_PARAMS[11]; jtmp++)
                fprintf(Fivert_local," %2.16f ",Bsys->Pmat->info.lenergy.invA_local[itmp][jtmp]);
              fprintf(Fivert_local," \n");
            }
            fclose(Fivert_local);

            FLAG_INDEX_IVERT_M++;
#endif

}
Пример #2
0
mpi_request* mpi_issend(mpi* m, void* data, unsigned size, int to)
{
  return mpi_isend(m, data, size, to);
}