void setup (int N, const Parameter ¶m, Array<double, 1> &WR, Array<double,2> &ev, Array<double,2> &evInv) { int Nm1 = N; int i; Array<double, 1> x; Array<double, 2> D; Array<double, 1> r; Array<double, 2> Dsec; Array<double, 1> XX; Array<double, 1> YY; Array<double, 2> A(N,N); Array<double, 2> B(N,N); Array<int, 1> IPIV(Nm1); char BALANC[1]; char JOBVL[1]; char JOBVR[1]; char SENSE[1]; int LDA; int LDVL; int LDVR; int NRHS; int LDB; int INFO; //resize output arrays WR.resize(N); ev.resize(N, N); evInv.resize(N, N); // parameters for DGEEVX Array<double, 1> WI(Nm1); // WR(Nm1), // The real and imaginary part of the eig.values Array<double, 2> VL(N, N); Array<double, 2> VR(Nm1,Nm1); //VR(Nm1,Nm1); // The left and rigth eigenvectors int ILO, IHI; // Info on the balanced output matrix Array<double, 1> SCALE(Nm1); // Scaling factors applied for balancing double ABNRM; // 1-Norm of the balanced matrix Array<double, 1> RCONDE(Nm1); // the reciprocal cond. numb of the respective eig.val Array<double, 1> RCONDV(Nm1); // the reciprocal cond. numb of the respective eig.vec int LWORK = (N+1)*(N+7); // Depending on SENSE Array<double, 1> WORK(LWORK); Array<int, 1> IWORK(2*(N+1)-2); // Compute the Chebyshev differensiation matrix and D*D // cheb(N, x, D); cheb(N, x, D); Dsec.resize(D.shape()); MatrixMatrixMultiply(D, D, Dsec); // Compute the 1. and 2. derivatives of the transformations XYmat(N, param, XX, YY, r); // Set up the full timepropagation matrix A // dy/dt = - i A y Range range(1, N); //Dsec and D have range 0, N+1. //We don't want the edge points in A A = XX(tensor::i) * Dsec(range, range) + YY(tensor::i) * D(range, range); //Transpose A for (int i=0; i<A.extent(0); i++) { for (int j=0; j<i; j++) { double t = A(i,j); A(i,j) = A(j, i); A(j,i) = t; } } // Add radialpart of non-time dependent potential here /* 2D radial for (int i=0; i<A.extent(0); i++) { A(i, i) += 0.25 / (r(i)*r(i)); } */ // Compute eigen decomposition BALANC[0] ='B'; JOBVL[0] ='V'; JOBVR[0] ='V'; SENSE[0] ='B'; LDA = Nm1; LDVL = Nm1; LDVR = Nm1; FORTRAN_NAME(dgeevx)(BALANC, JOBVL, JOBVR, SENSE, &Nm1, A.data(), &LDA, WR.data(), WI.data(), VL.data(), &LDVL, VR.data(), &LDVR, &ILO, &IHI, SCALE.data(), &ABNRM, RCONDE.data(), RCONDV.data(), WORK.data(), &LWORK, IWORK.data(), &INFO); // Compute the inverse of the eigen vector matrix NRHS = Nm1; evInv = VR ;// VL; LDB = LDA; B = 0.0; for (i=0; i<Nm1; i++) B(i,i) = 1.0; FORTRAN_NAME(dgesv)(&Nm1, &NRHS, evInv.data(), &LDA, IPIV.data(), B.data(), &LDB, &INFO); ev = VR(tensor::j, tensor::i); //Transpose evInv = B(tensor::j, tensor::i); //Transpose //cout << "Eigenvectors (right): " << ev << endl; //cout << "Eigenvectors (inv): " << evInv << endl; //printf(" Done inverse, INFO = %d \n", INFO); } // done
int main(int argc, char *argv[]) { //Declaration of variables int my_rank, num_procs; int num_rows, num_cols; double **A, **B, **C; int nrA, ncA, nrB, ncB; double **a, **b, **c; int nra, nca, nrb, ncb; int sqrt_p; int *rsA, *csA, *rsB, *csB; int mrA, mcA, mrB, mcB; //Declare MPI-suff MPI_Init(&argc, &argv); MPI_Comm_rank(MPI_COMM_WORLD, &my_rank); MPI_Comm_size(MPI_COMM_WORLD, &num_procs); char *Mat_A, *Mat_B, *Mat_C; Mat_A = argv[1]; Mat_B = argv[2]; Mat_C = argv[3]; sqrt_p = sqrt(num_procs); if(my_rank == 0){ //read_matrix_binaryformat(Mat_A, &A, &nrA, &ncA ); //read_matrix_binaryformat(Mat_B, &B, &nrB, &ncB ); nrA = 100; ncA = 50; nrB = 50; ncB = 100; allocate_matrix(&A, nrA, ncA); allocate_matrix(&B, nrB, ncB); fill_matrix(&A, nrA, ncA); fill_matrix(&B, nrB, ncB); char A_id = 'A', B_id = 'B'; print_matrix(nrA, ncA, A, my_rank, A_id); print_matrix(nrB, ncB, B, my_rank, B_id); } MPI_Bcast(&nrA, 1, MPI_INT, 0, MPI_COMM_WORLD); MPI_Bcast(&ncA, 1, MPI_INT, 0, MPI_COMM_WORLD); MPI_Bcast(&nrB, 1, MPI_INT, 0, MPI_COMM_WORLD); MPI_Bcast(&ncB, 1, MPI_INT, 0, MPI_COMM_WORLD); find_size(nrA, ncA, &nra, &nca, my_rank, sqrt_p); find_size(nrB, ncB, &nrb, &ncb, my_rank, sqrt_p); rsA = (int*)malloc(num_procs*sizeof(int)); csA = (int*)malloc(num_procs*sizeof(int)); rsB = (int*)malloc(num_procs*sizeof(int)); csB = (int*)malloc(num_procs*sizeof(int)); for(int i=0; i<num_procs; ++i) { if(i == my_rank) { rsA[i] = nra; csA[i] = nca; rsB[i] = nrb; csB[i] = ncb; } else { MPI_Sendrecv(&nra, 1, MPI_INT, i, 1, &(rsA[i]), 1, MPI_INT, i, 1, MPI_COMM_WORLD, MPI_STATUS_IGNORE); MPI_Sendrecv(&nca, 1, MPI_INT, i, 1, &(csA[i]), 1, MPI_INT, i, 1, MPI_COMM_WORLD, MPI_STATUS_IGNORE); MPI_Sendrecv(&nrb, 1, MPI_INT, i, 1, &(rsB[i]), 1, MPI_INT, i, 1, MPI_COMM_WORLD, MPI_STATUS_IGNORE); MPI_Sendrecv(&ncb, 1, MPI_INT, i, 1, &(csB[i]), 1, MPI_INT, i, 1, MPI_COMM_WORLD, MPI_STATUS_IGNORE); } //printf("nra[%i] = %i, my_rank:%i\n", i, rsA[i], my_rank); } // root finds large enough sizes if(my_rank == 0) { find_max(rsA, &mrA, num_procs); find_max(csA, &mcA, num_procs); find_max(rsB, &mrB, num_procs); find_max(csB, &mcB, num_procs); } // send bigbuffs to all MPI_Bcast(&mrA, 1, MPI_INT, 0, MPI_COMM_WORLD); MPI_Bcast(&mcA, 1, MPI_INT, 0, MPI_COMM_WORLD); MPI_Bcast(&mrB, 1, MPI_INT, 0, MPI_COMM_WORLD); MPI_Bcast(&mcB, 1, MPI_INT, 0, MPI_COMM_WORLD); // allocate sub matrices allocate_matrix(&a, mrA, mcA); allocate_matrix(&b, mrB, mcB); allocate_matrix(&c, mrA, mcB); if(my_rank == 0) { // root sets its own matrices for(int i=0; i<nra; ++i) { for(int j=0; j<nca; ++j) { a[i][j] = A[i][j]; } } for(int i=0; i<nrb; ++i) { for(int j=0; j<ncb; ++j) { b[i][j] = B[i][j]; } } MPI_Datatype btA; MPI_Datatype btB; // initialize row and column index variables int riA = 0; int riB = 0; int ciA = csA[0]; int ciB = csB[0]; int tmpk = 1; for(int k=1; k<num_procs; ++k) { //send to slaves // create strided types (blocks/chunks/blarg) MPI_Type_vector(rsA[k], csA[k], ncA, MPI_DOUBLE, &btA); MPI_Type_create_resized(btA, 0, sizeof(double), &btA); MPI_Type_commit(&btA); MPI_Type_vector(rsB[k], csB[k], ncB, MPI_DOUBLE, &btB); MPI_Type_create_resized(btB, 0, sizeof(double), &btB); MPI_Type_commit(&btB); // send to slaves MPI_Send(&((*A)[ncA*riA + ciA]), 1, btA, k, 1, MPI_COMM_WORLD); MPI_Send(&((*B)[ncB*riB + ciB]), 1, btB, k, 2, MPI_COMM_WORLD); // free types for next iteration MPI_Type_free(&btA); MPI_Type_free(&btB); ciA += csA[k]; ciB += csB[k]; tmpk++; if(tmpk == sqrt_p) { // jump down to next chunk row riA += rsA[k-1]; riB += rsB[k-1]; ciA = 0; ciB = 0; tmpk = 0; } } // deallocate A and B //deallocate(&A); //deallocate(&B); } else { // slaves recv from root a chunk/block/somethgin // create data types MPI_Datatype btA; MPI_Datatype btB; // create strided data types MPI_Type_vector(nra, nca, mcA, MPI_DOUBLE, &btA); MPI_Type_create_resized(btA, 0, sizeof(double), &btA); MPI_Type_commit(&btA); MPI_Type_vector(nrb, ncb, mcB, MPI_DOUBLE, &btB); MPI_Type_create_resized(btB, 0, sizeof(double), &btB); MPI_Type_commit(&btB); // recv from root a chunk MPI_Recv(&((*a)[0]), 1, btA, 0, 1, MPI_COMM_WORLD, MPI_STATUS_IGNORE); MPI_Recv(&((*b)[0]), 1, btB, 0, 2, MPI_COMM_WORLD, MPI_STATUS_IGNORE); // free types MPI_Type_free(&btA); MPI_Type_free(&btB); //char a_id = 'a', b_id = 'b'; //print_matrix(nra, nca, a, my_rank, a_id); } char a_id = 'a', b_id = 'b'; //print_matrix(nra, nca, a, my_rank, a_id); //print_matrix(nrb, ncb, b, my_rank, b_id); MatrixMatrixMultiply(&a, &b, &c, mrA, mcA, mrB, mcB, rsA, csA, rsB, csB, MPI_COMM_WORLD); if(my_rank == 0) { // allocate result allocate_matrix(&C, nrA, ncB); // root sets its own matrices for(int i=0; i<nra; ++i) { for(int j=0; j<ncb; ++j) { C[i][j] = c[i][j]; } } MPI_Datatype btC; int riC = 0; int ciC = csB[0]; int tmpk = 1; for(int k=1; k<num_procs; ++k) { MPI_Type_vector(rsA[k], csB[k], ncB, MPI_DOUBLE, &btC); MPI_Type_create_resized(btC, 0, sizeof(double), &btC); MPI_Type_commit(&btC); printf("%i\n", ncB*riC+ciC); MPI_Recv(&((*C)[ncB*riC+ciC]), 1, btC, k, k, MPI_COMM_WORLD, MPI_STATUS_IGNORE); MPI_Type_free(&btC); ciC += csB[k]; if(tmpk == sqrt_p) { riC += rsA[k-1]; ciC = 0; tmpk = 0; } } //read_matrix_binaryformat(Mat_A, &A, &nrA, &ncA ); //read_matrix_binaryformat(Mat_B, &B, &nrB, &ncB ); test_result(nrA, ncA, nrB, ncB, A, B, C); //test_result(nrA, ncA, nrB, ncB, A, B, C); deallocate(&A); deallocate(&B); deallocate(&C); } else { MPI_Datatype btC; MPI_Type_vector(nra, ncb, mcB, MPI_DOUBLE, &btC); MPI_Type_create_resized(btC, 0, sizeof(double), &btC); MPI_Type_commit(&btC); MPI_Send(&((*c)[0]), 1, btC, 0, my_rank, MPI_COMM_WORLD); MPI_Type_free(&btC); } // deallocate sub matrices deallocate(&a); deallocate(&b); deallocate(&c); free(rsA); free(csA); free(rsB); free(csB); MPI_Finalize(); return 0; }