void GatherScatterIvert(Bsystem *Bsys){ int i,j; int *BLACS_PARAMS; double **ivert_local; SMatrix *SM,SM_cont; double start_time_GatherScatterIvert, end_time_GatherScatterIvert; //start_time_GatherScatterIvert_tmp, end_time_GatherScatterIvert_tmp; BLACS_PARAMS = Bsys->Pmat->info.lenergy.BLACS_PARAMS; ivert_local = Bsys->Pmat->info.lenergy.ivert_local; SM = Bsys->Pmat->info.lenergy.SM_local; /* copy values from SM_local to invert_local */ update_inva_LOC(SM, BLACS_PARAMS, ivert_local, 'L'); /***************************/ /* innitialize ring pattern communication */ /* create two send buffers - one for indeces and one for values*/ /* communicate over all CPUs to get the "nnz" value */ /* copy values from sparse matrix to this buffers and send them */ int *nnz_per_rank,*temp_nnz_per_rank; nnz_per_rank = ivector(0,pllinfo[get_active_handle()].nprocs-1); izero(pllinfo[get_active_handle()].nprocs,nnz_per_rank,1); nnz_per_rank[pllinfo[get_active_handle()].procid] = SM[0].nz; temp_nnz_per_rank = ivector(0,pllinfo[get_active_handle()].nprocs-1); izero(pllinfo[get_active_handle()].nprocs,temp_nnz_per_rank,1); gisum (nnz_per_rank,pllinfo[get_active_handle()].nprocs,temp_nnz_per_rank); free(temp_nnz_per_rank); int *send_buf_Index,*send_buf_Jndex; double *send_buf_value; int index,partner_to_send_data,partner_to_receive_data; start_time_GatherScatterIvert = dclock(); SM_cont.allocate_SMatrix_cont(SM[0].nz,1,1); memcpy(SM_cont.AA,SM[0].AA,SM[0].nz*sizeof(double)); memcpy(SM_cont.IA,SM[0].IA,SM[0].nz*sizeof(int)); memcpy(SM_cont.JA,SM[0].JA,SM[0].nz*sizeof(int)); SM[0].deallocate_SMatrix(); for (i = 1; i < pllinfo[get_active_handle()].nprocs; i++){ send_buf_Index = ivector(0,SM_cont.nz*2-1); send_buf_Jndex = send_buf_Index + SM_cont.nz; send_buf_value = dvector(0,SM_cont.nz-1); memcpy (send_buf_Index,SM_cont.IA,SM_cont.nz*2*sizeof(int)); memcpy (send_buf_value,SM_cont.AA,SM_cont.nz*sizeof(double)); partner_to_send_data = pllinfo[get_active_handle()].procid+1; partner_to_receive_data = pllinfo[get_active_handle()].procid-1; if (partner_to_send_data == pllinfo[get_active_handle()].nprocs) partner_to_send_data = 0; if (partner_to_receive_data == -1) partner_to_receive_data = pllinfo[get_active_handle()].nprocs-1; /* trace number of SM transfers in order to get nz value right */ index = pllinfo[get_active_handle()].procid-i; if (index < 0) index = index+pllinfo[get_active_handle()].nprocs; j = SM_cont.nz; //length of massege to be sent SM_cont.deallocate_SMatrix_cont(); SM_cont.allocate_SMatrix_cont(nnz_per_rank[index],1,1); if (pllinfo[get_active_handle()].procid == 0){ mpi_isend (send_buf_Index,j*2,partner_to_send_data ,0); mpi_irecv (SM_cont.IA,SM_cont.nz*2,partner_to_receive_data,0); } else{ mpi_irecv (SM_cont.IA,SM_cont.nz*2,partner_to_receive_data,0); mpi_isend (send_buf_Index,j*2,partner_to_send_data ,0); } if (pllinfo[get_active_handle()].procid == 0){ mpi_dsend (send_buf_value,j,partner_to_send_data ,2); mpi_drecv (SM_cont.AA,SM_cont.nz,partner_to_receive_data,2); } else{ mpi_drecv (SM_cont.AA,SM_cont.nz,partner_to_receive_data,2); mpi_dsend (send_buf_value,j,partner_to_send_data ,2); } /* copy values from SM_local to invert_local */ update_inva_LOC(&SM_cont, BLACS_PARAMS, ivert_local, 'L'); //if (pllinfo[get_active_handle()].procid == 0) // printf(" pass No. %d completed ! \n", i); free(send_buf_Index);free(send_buf_value); } free(nnz_per_rank); SM_cont.deallocate_SMatrix_cont(); end_time_GatherScatterIvert = dclock(); ROOTONLY fprintf(stderr,"ring communication was done in : %f sec \n", end_time_GatherScatterIvert-start_time_GatherScatterIvert); #if 0 static int FLAG_INDEX_IVERT_M = 0; int itmp, jtmp; FILE *Fivert_local; char fname_ivert_local[128]; sprintf(fname_ivert_local,"ivert_localM_%d_%d.dat",FLAG_INDEX_IVERT_M,mynode()); Fivert_local = fopen(fname_ivert_local,"w"); for (itmp = 0; itmp < BLACS_PARAMS[12]; itmp++){ for (jtmp = 0; jtmp < BLACS_PARAMS[11]; jtmp++) fprintf(Fivert_local," %2.16f ",ivert_local[itmp][jtmp]); fprintf(Fivert_local," \n"); } fclose(Fivert_local); #endif /* compute LU decompisition and pivot vector, LU stored in "ivert_local", pivot stored in "ivert_ipvt */ start_time_GatherScatterIvert = dclock(); blacs_pdgetrf_nektar(BLACS_PARAMS, Bsys->Pmat->info.lenergy.DESC_ivert, Bsys->Pmat->info.lenergy.ivert_ipvt, ivert_local); end_time_GatherScatterIvert = dclock(); ROOTONLY fprintf(stderr,"Parallel LU was done in : %f sec \n", end_time_GatherScatterIvert-start_time_GatherScatterIvert); #if 1 /* invert operator */ start_time_GatherScatterIvert = dclock(); blacs_pdgetri_nektar(BLACS_PARAMS, Bsys->Pmat->info.lenergy.DESC_ivert, Bsys->Pmat->info.lenergy.ivert_ipvt, Bsys->Pmat->info.lenergy.ivert_local); end_time_GatherScatterIvert = dclock(); ROOTONLY fprintf(stderr,"Operator Inversion was done in : %f sec \n", end_time_GatherScatterIvert-start_time_GatherScatterIvert); #endif #if 0 sprintf(fname_ivert_local,"ivert_localIM_%d_%d.dat",FLAG_INDEX_IVERT_M,mynode()); Fivert_local = fopen(fname_ivert_local,"w"); for (itmp = 0; itmp < BLACS_PARAMS[12]; itmp++){ for (jtmp = 0; jtmp < BLACS_PARAMS[11]; jtmp++) fprintf(Fivert_local," %2.16f ",Bsys->Pmat->info.lenergy.invA_local[itmp][jtmp]); fprintf(Fivert_local," \n"); } fclose(Fivert_local); FLAG_INDEX_IVERT_M++; #endif }
mpi_request* mpi_issend(mpi* m, void* data, unsigned size, int to) { return mpi_isend(m, data, size, to); }