int main(void) 
{
  Item x = 5, y = 2, x2 = 6, y2 = 3;
  mat m1, m2, k1, k2;
  int i, j;

  printf("\nItems: ");
  item_print(x);
  printf(" ");
  item_print(y);
  printf("\n");
  printf("Item Addition: ");
  item_print(item_add(x, y));
  printf("\n");
  printf("Item Multiplication: ");
  item_print(item_mul(x, y));
  printf("\n");
  printf("Item Substraction: ");
  item_print(item_sub(x, y));
  printf("\n");
  printf("Item Division: ");
  item_print(item_div(x, y));
  printf("\n\n");
  m1 = mat_create(3, 2);
  m2 = mat_create(2, 3);
  k1 = mat_create(2, 2);
  k2 = mat_create(2, 2);
  for (i = 0; i < mat_rows(m1); i++)
    for (j = 0; j < mat_cols(m1); j++)
      mat_add_elem(m1, i, j, x);
  for (i = 0; i < mat_rows(m2); i++)
    for (j = 0; j < mat_cols(m2); j++)
      mat_add_elem(m2, i, j, y);
  for (i = 0; i < mat_rows(k1); i++)
    for (j = 0; j < mat_cols(k1); j++)
      {
  	mat_add_elem(k1, i, j, x2);
  	mat_add_elem(k2, i, j, y2);
      }
  printf("k1 Matrix: \n");
  mat_print(k1);
  printf("k2 Matrix: \n");
  mat_print(k2);
  printf("Addition: \n");
  mat_print(mat_add(k1, k2));
  printf("Substraction: \n");
  mat_print(mat_sub(k1, k2));
  printf("m1 Matrix: \n");
  mat_print(m1);
  printf("m2 Matrix: \n");
  mat_print(m2);
  printf("Multiplication: \n");
  mat_print(mat_mul(m1, m2));
  return 0;
}
Example #2
0
void bidiag_gkl_restart(
    int locked, int l, int n,
    CAX && Ax, CATX && Atx, CD && D, CE && E, CRho && rho, CP && P, CQ && Q, int s_indx, int t_s_indx) {
  // enhancements version from SLEPc
  const double eta = 1.e-10;
  double t_start = 0.0, t_end = 0.0;
  double local_start = 0.0, local_end = 0.0; 
  double t_total3 = 0.0, t_total4 = 0.0, t_total5 = 0.0, t_total6 = 0.0, t_total7 = 0.0;
  
  int rank, nprocs;
  MPI_Comm_rank(MPI_COMM_WORLD, &rank);
  MPI_Comm_size(MPI_COMM_WORLD, &nprocs);
  
  // Step 1
  int recv_len = (int)P.dim0() * nprocs;
  vec_container<double> tmp(Ax.dim0());
  vec_container<double> recv_tmp(recv_len);
  
  auto m_Ax = make_gemv_ax(&Ax);
  auto m_Atx = make_gemv_ax(&Atx);
  
  m_Ax(Q.col(l), tmp, P.dim0() > 1000);

  vec_container<double> send_data(P.dim0(),0);
  for(size_t i = s_indx; i < s_indx + Ax.dim0(); ++i)
    send_data[i] = tmp.get(i-s_indx);
  MPI_Gather(&send_data[0], P.dim0(), MPI_DOUBLE, &recv_tmp[0], P.dim0(), MPI_DOUBLE, 0, MPI_COMM_WORLD);

  P.col(l) = 0;
  // Generate truly P.col(l)
  if(rank == 0) {
    local_union(P, recv_tmp, l, nprocs);
    // Step 2 & also in rank 0
    for (int j = locked; j < l; ++j) {
      P.col(l) += -rho(j) * P.col(j);
    }
  }
  
  MPI_Bcast(&(P.col(0)[0]), P.size(), MPI_DOUBLE, 0, MPI_COMM_WORLD); 
  //MPI_Bcast(&(P.col(l)[0]), P.size(), MPI_DOUBLE, 0, MPI_COMM_WORLD); 
  // Main loop
  vec_container<double> T(n);
  int recv_l = Q.dim0() * nprocs;
  vec_container<double> recv_t(recv_l);
  for (int j = l; j < n; ++j) {
    // Step 3   
    vec_container<double> tmp2(Atx.dim0());
    
    /* for print */
    if(rank == 0)
    	t_start = currenttime();
   
    local_start = currenttime();
    m_Atx(P.col(j), tmp2, Q.dim0() > 1000);
    local_end = currenttime();
    std::cout << "parallel mv time cost is " << (local_end - local_start) / 1.0e6 << std::endl;
     
    vec_container<double> s_data(Q.dim0(), 0); 
    for(size_t i = t_s_indx; i < t_s_indx + Atx.dim0(); ++i)
      s_data[i] = tmp2[i-t_s_indx];
    MPI_Gather(&s_data[0], Q.dim0(), MPI_DOUBLE, &recv_t[0], Q.dim0(), MPI_DOUBLE, 0, MPI_COMM_WORLD);
    local_start = currenttime();
    std::cout << "parallel mv time cost2 is " << (local_start - local_end) / 1.0e6 << std::endl;
    
    //Q.col(j+1) = 0;
    if(rank == 0) {
      // Generate truly Q.col(j+1) 
      local_union(Q, recv_t, j + 1, nprocs);
      local_end = currenttime();
      t_end = currenttime();
      std::cout << "parallel mv time cost3 is " << (local_end - local_start) / 1.0e6 << std::endl;
      std::cout << "time of step 3 is : " << (t_end - t_start) / 1.0e6 << std::endl;
      t_total3 += (t_end - t_start) / 1.0e6;
    }
      
    // Step 4
    for(size_t aa = 0; aa < Q.dim0(); ++aa) // row
      MPI_Bcast(&(Q.row(aa)[0]), j + 2, MPI_DOUBLE, 0, MPI_COMM_WORLD); 
    // MPI_Bcast(&(Q.col(0)[0]), Q.size(), MPI_DOUBLE, 0, MPI_COMM_WORLD); 
    
    if(rank == 0)
      t_end = currenttime();
    auto Qj = mat_cols(Q, 0, j + 1);
    auto Tj = make_vec(&T, j + 1);
    
    //Tj.assign(gemv(Qj.trans(), Q.col(j + 1)), j >= 3);
    parallel_gemv_task(Qj.trans(), Q.col(j+1), Tj);
    
    if(rank == 0) {
      t_start = currenttime();
      t_total4 += (t_start - t_end) / 1.0e6;
      std::cout << "time of step 4 is : " << (t_start - t_end) / 1.0e6 << std::endl;
    }

    // Step 5
    if(rank == 0) {
      double r = Q.col(j + 1).norm2();
      D[j] = vec_unit(P.col(j));
      Q.col(j + 1).scale(1. / D[j]);
      Tj = Tj / D[j];
      r /= D[j];
      Q.col(j + 1).plus_assign(- gemv(Qj, Tj), Q.dim0() > 1000);
      
      t_end = currenttime();
      t_total5 += (t_end - t_start) / 1.0e6;
      std::cout << "time of step 5 is : " << (t_end - t_start) / 1.0e6 << std::endl;

      // Step 6
      double beta = r * r - Tj.square_sum();
      if (beta < eta * r * r) {
        Tj.assign(gemv(Qj.trans(), Q.col(j + 1)), Q.dim0() > 1000);
        r = Q.col(j + 1).square_sum();
        Q.col(j + 1).plus_assign(-gemv(Qj, Tj), Q.dim0() > 1000);
        beta = r * r - Tj.square_sum();
      }
      beta = std::sqrt(beta);
      E[j] = beta;
      Q.col(j + 1).scale(1. / E[j]);
      
      t_start = currenttime();
      t_total6 += (t_start - t_end) / 1.0e6;
      std::cout << "time of step 6 is : " << (t_start - t_end) / 1.0e6 << std::endl;
    } 
    
    // Step 7
    // MPI_Bcast(&(Q.col(j+1)[0]), Q.size(), MPI_DOUBLE, 0, MPI_COMM_WORLD);
    // MPI_Bcast(&(Q.col(0)[0]), Q.size(), MPI_DOUBLE, 0, MPI_COMM_WORLD);
    for(size_t aa = 0; aa < Q.dim0(); ++aa)
      MPI_Bcast(&(Q.col(j+1)[aa]), 1, MPI_DOUBLE, 0, MPI_COMM_WORLD);

    if (j + 1 < n) {
      if(rank == 0) 
        t_start = currenttime();
      vec_container<double> tmp3(Ax.dim0());
      vec_container<double> se_data(P.dim0(), 0);
      
      m_Ax(Q.col(j + 1), tmp3, P.dim0() > 1000);
      
      for(size_t k1 = s_indx; k1 < s_indx + Ax.dim0(); ++k1)
        se_data[k1] = tmp3[k1-s_indx];
      MPI_Gather(&se_data[0], P.dim0(), MPI_DOUBLE, &recv_tmp[0], P.dim0(), MPI_DOUBLE, 0, MPI_COMM_WORLD);
      
      // P.col(j+1) = 0;
      if(rank == 0) {
	local_union(P, recv_tmp, j + 1, nprocs);
	P.col(j + 1).plus_assign(- E[j] * P.col(j), P.dim0() > 1000);
      }
      
      /* for print */
      if(rank == 0) {
        t_end = currenttime();
        t_total7 += (t_end - t_start) / 1.0e6;
	std::cout << "time of step 7 is : " << (t_end - t_start) / 1.0e6 << std::endl;
      }

      // MPI_Bcast(&(P.col(l)[0]), P.size(), MPI_DOUBLE, 0, MPI_COMM_WORLD);
      // MPI_Bcast(&(P.col(0)[0]), P.size(), MPI_DOUBLE, 0, MPI_COMM_WORLD);
      for(size_t aa = 0; aa < P.dim0(); ++aa)
        MPI_Bcast(&(P.col(j+1)[aa]), 1, MPI_DOUBLE, 0, MPI_COMM_WORLD);

    }  // end if
  }    // end while
  
  /* for print time of each step. */
  if(rank == 0) {
    std::cout << "total step 3 time is : " << t_total3 << std::endl; 
    std::cout << "total step 4 time is : " << t_total4 << std::endl; 
    std::cout << "total step 5 time is : " << t_total5 << std::endl; 
    std::cout << "total step 6 time is : " << t_total6 << std::endl; 
    std::cout << "total step 7 time is : " << t_total7 << std::endl; 
  }
  
  return ;
}