int main(void) { Item x = 5, y = 2, x2 = 6, y2 = 3; mat m1, m2, k1, k2; int i, j; printf("\nItems: "); item_print(x); printf(" "); item_print(y); printf("\n"); printf("Item Addition: "); item_print(item_add(x, y)); printf("\n"); printf("Item Multiplication: "); item_print(item_mul(x, y)); printf("\n"); printf("Item Substraction: "); item_print(item_sub(x, y)); printf("\n"); printf("Item Division: "); item_print(item_div(x, y)); printf("\n\n"); m1 = mat_create(3, 2); m2 = mat_create(2, 3); k1 = mat_create(2, 2); k2 = mat_create(2, 2); for (i = 0; i < mat_rows(m1); i++) for (j = 0; j < mat_cols(m1); j++) mat_add_elem(m1, i, j, x); for (i = 0; i < mat_rows(m2); i++) for (j = 0; j < mat_cols(m2); j++) mat_add_elem(m2, i, j, y); for (i = 0; i < mat_rows(k1); i++) for (j = 0; j < mat_cols(k1); j++) { mat_add_elem(k1, i, j, x2); mat_add_elem(k2, i, j, y2); } printf("k1 Matrix: \n"); mat_print(k1); printf("k2 Matrix: \n"); mat_print(k2); printf("Addition: \n"); mat_print(mat_add(k1, k2)); printf("Substraction: \n"); mat_print(mat_sub(k1, k2)); printf("m1 Matrix: \n"); mat_print(m1); printf("m2 Matrix: \n"); mat_print(m2); printf("Multiplication: \n"); mat_print(mat_mul(m1, m2)); return 0; }
void bidiag_gkl_restart( int locked, int l, int n, CAX && Ax, CATX && Atx, CD && D, CE && E, CRho && rho, CP && P, CQ && Q, int s_indx, int t_s_indx) { // enhancements version from SLEPc const double eta = 1.e-10; double t_start = 0.0, t_end = 0.0; double local_start = 0.0, local_end = 0.0; double t_total3 = 0.0, t_total4 = 0.0, t_total5 = 0.0, t_total6 = 0.0, t_total7 = 0.0; int rank, nprocs; MPI_Comm_rank(MPI_COMM_WORLD, &rank); MPI_Comm_size(MPI_COMM_WORLD, &nprocs); // Step 1 int recv_len = (int)P.dim0() * nprocs; vec_container<double> tmp(Ax.dim0()); vec_container<double> recv_tmp(recv_len); auto m_Ax = make_gemv_ax(&Ax); auto m_Atx = make_gemv_ax(&Atx); m_Ax(Q.col(l), tmp, P.dim0() > 1000); vec_container<double> send_data(P.dim0(),0); for(size_t i = s_indx; i < s_indx + Ax.dim0(); ++i) send_data[i] = tmp.get(i-s_indx); MPI_Gather(&send_data[0], P.dim0(), MPI_DOUBLE, &recv_tmp[0], P.dim0(), MPI_DOUBLE, 0, MPI_COMM_WORLD); P.col(l) = 0; // Generate truly P.col(l) if(rank == 0) { local_union(P, recv_tmp, l, nprocs); // Step 2 & also in rank 0 for (int j = locked; j < l; ++j) { P.col(l) += -rho(j) * P.col(j); } } MPI_Bcast(&(P.col(0)[0]), P.size(), MPI_DOUBLE, 0, MPI_COMM_WORLD); //MPI_Bcast(&(P.col(l)[0]), P.size(), MPI_DOUBLE, 0, MPI_COMM_WORLD); // Main loop vec_container<double> T(n); int recv_l = Q.dim0() * nprocs; vec_container<double> recv_t(recv_l); for (int j = l; j < n; ++j) { // Step 3 vec_container<double> tmp2(Atx.dim0()); /* for print */ if(rank == 0) t_start = currenttime(); local_start = currenttime(); m_Atx(P.col(j), tmp2, Q.dim0() > 1000); local_end = currenttime(); std::cout << "parallel mv time cost is " << (local_end - local_start) / 1.0e6 << std::endl; vec_container<double> s_data(Q.dim0(), 0); for(size_t i = t_s_indx; i < t_s_indx + Atx.dim0(); ++i) s_data[i] = tmp2[i-t_s_indx]; MPI_Gather(&s_data[0], Q.dim0(), MPI_DOUBLE, &recv_t[0], Q.dim0(), MPI_DOUBLE, 0, MPI_COMM_WORLD); local_start = currenttime(); std::cout << "parallel mv time cost2 is " << (local_start - local_end) / 1.0e6 << std::endl; //Q.col(j+1) = 0; if(rank == 0) { // Generate truly Q.col(j+1) local_union(Q, recv_t, j + 1, nprocs); local_end = currenttime(); t_end = currenttime(); std::cout << "parallel mv time cost3 is " << (local_end - local_start) / 1.0e6 << std::endl; std::cout << "time of step 3 is : " << (t_end - t_start) / 1.0e6 << std::endl; t_total3 += (t_end - t_start) / 1.0e6; } // Step 4 for(size_t aa = 0; aa < Q.dim0(); ++aa) // row MPI_Bcast(&(Q.row(aa)[0]), j + 2, MPI_DOUBLE, 0, MPI_COMM_WORLD); // MPI_Bcast(&(Q.col(0)[0]), Q.size(), MPI_DOUBLE, 0, MPI_COMM_WORLD); if(rank == 0) t_end = currenttime(); auto Qj = mat_cols(Q, 0, j + 1); auto Tj = make_vec(&T, j + 1); //Tj.assign(gemv(Qj.trans(), Q.col(j + 1)), j >= 3); parallel_gemv_task(Qj.trans(), Q.col(j+1), Tj); if(rank == 0) { t_start = currenttime(); t_total4 += (t_start - t_end) / 1.0e6; std::cout << "time of step 4 is : " << (t_start - t_end) / 1.0e6 << std::endl; } // Step 5 if(rank == 0) { double r = Q.col(j + 1).norm2(); D[j] = vec_unit(P.col(j)); Q.col(j + 1).scale(1. / D[j]); Tj = Tj / D[j]; r /= D[j]; Q.col(j + 1).plus_assign(- gemv(Qj, Tj), Q.dim0() > 1000); t_end = currenttime(); t_total5 += (t_end - t_start) / 1.0e6; std::cout << "time of step 5 is : " << (t_end - t_start) / 1.0e6 << std::endl; // Step 6 double beta = r * r - Tj.square_sum(); if (beta < eta * r * r) { Tj.assign(gemv(Qj.trans(), Q.col(j + 1)), Q.dim0() > 1000); r = Q.col(j + 1).square_sum(); Q.col(j + 1).plus_assign(-gemv(Qj, Tj), Q.dim0() > 1000); beta = r * r - Tj.square_sum(); } beta = std::sqrt(beta); E[j] = beta; Q.col(j + 1).scale(1. / E[j]); t_start = currenttime(); t_total6 += (t_start - t_end) / 1.0e6; std::cout << "time of step 6 is : " << (t_start - t_end) / 1.0e6 << std::endl; } // Step 7 // MPI_Bcast(&(Q.col(j+1)[0]), Q.size(), MPI_DOUBLE, 0, MPI_COMM_WORLD); // MPI_Bcast(&(Q.col(0)[0]), Q.size(), MPI_DOUBLE, 0, MPI_COMM_WORLD); for(size_t aa = 0; aa < Q.dim0(); ++aa) MPI_Bcast(&(Q.col(j+1)[aa]), 1, MPI_DOUBLE, 0, MPI_COMM_WORLD); if (j + 1 < n) { if(rank == 0) t_start = currenttime(); vec_container<double> tmp3(Ax.dim0()); vec_container<double> se_data(P.dim0(), 0); m_Ax(Q.col(j + 1), tmp3, P.dim0() > 1000); for(size_t k1 = s_indx; k1 < s_indx + Ax.dim0(); ++k1) se_data[k1] = tmp3[k1-s_indx]; MPI_Gather(&se_data[0], P.dim0(), MPI_DOUBLE, &recv_tmp[0], P.dim0(), MPI_DOUBLE, 0, MPI_COMM_WORLD); // P.col(j+1) = 0; if(rank == 0) { local_union(P, recv_tmp, j + 1, nprocs); P.col(j + 1).plus_assign(- E[j] * P.col(j), P.dim0() > 1000); } /* for print */ if(rank == 0) { t_end = currenttime(); t_total7 += (t_end - t_start) / 1.0e6; std::cout << "time of step 7 is : " << (t_end - t_start) / 1.0e6 << std::endl; } // MPI_Bcast(&(P.col(l)[0]), P.size(), MPI_DOUBLE, 0, MPI_COMM_WORLD); // MPI_Bcast(&(P.col(0)[0]), P.size(), MPI_DOUBLE, 0, MPI_COMM_WORLD); for(size_t aa = 0; aa < P.dim0(); ++aa) MPI_Bcast(&(P.col(j+1)[aa]), 1, MPI_DOUBLE, 0, MPI_COMM_WORLD); } // end if } // end while /* for print time of each step. */ if(rank == 0) { std::cout << "total step 3 time is : " << t_total3 << std::endl; std::cout << "total step 4 time is : " << t_total4 << std::endl; std::cout << "total step 5 time is : " << t_total5 << std::endl; std::cout << "total step 6 time is : " << t_total6 << std::endl; std::cout << "total step 7 time is : " << t_total7 << std::endl; } return ; }