Exemplo n.º 1
0
void DistributedVector<T>::localize (NumericVector<T>& v_local_in) const

{
  libmesh_assert (this->initialized());
  libmesh_assert_equal_to (_values.size(), _local_size);
  libmesh_assert_equal_to ((_last_local_index - _first_local_index), _local_size);

  DistributedVector<T>* v_local = libmesh_cast_ptr<DistributedVector<T>*>(&v_local_in);

  v_local->_first_local_index = 0;

  v_local->_global_size =
    v_local->_local_size =
    v_local->_last_local_index = size();

  v_local->_is_initialized =
    v_local->_is_closed = true;

  // Call localize on the vector's values.  This will help
  // prevent code duplication
  localize (v_local->_values);

#ifndef LIBMESH_HAVE_MPI

  libmesh_assert_equal_to (local_size(), size());

#endif
}
Exemplo n.º 2
0
void DistributedVector<T>::conjugate()
{
  for (numeric_index_type i=0; i<local_size(); i++)
    {
      // Replace values by complex conjugate
      _values[i] = libmesh_conj(_values[i]);
    }
}
Exemplo n.º 3
0
void DistributedVector<T>::abs()
{
  libmesh_assert (this->initialized());
  libmesh_assert_equal_to ((_last_local_index - _first_local_index), _local_size);

  for (std::size_t i=0; i<local_size(); i++)
    this->set(i,std::abs(_values[i]));
}
Exemplo n.º 4
0
void DistributedVector<T>::scale (const T factor)
{
  libmesh_assert (this->initialized());
  libmesh_assert_equal_to (_values.size(), _local_size);
  libmesh_assert_equal_to ((_last_local_index - _first_local_index), _local_size);

  for (std::size_t i=0; i<local_size(); i++)
    _values[i] *= factor;
}
Exemplo n.º 5
0
void DistributedVector<T>::add (const T v)
{
  libmesh_assert (this->initialized());
  libmesh_assert_equal_to (_values.size(), _local_size);
  libmesh_assert_equal_to ((_last_local_index - _first_local_index), _local_size);

  for (numeric_index_type i=0; i<local_size(); i++)
    _values[i] += v;
}
Exemplo n.º 6
0
void DistributedVector<T>::reciprocal()
{
  for (numeric_index_type i=0; i<local_size(); i++)
    {
      // Don't divide by zero
      libmesh_assert_not_equal_to (_values[i], T(0));

      _values[i] = 1. / _values[i];
    }
}
Exemplo n.º 7
0
NumericVector<T>&
DistributedVector<T>::operator = (const T s)
{
  libmesh_assert (this->initialized());
  libmesh_assert_equal_to (_values.size(), _local_size);
  libmesh_assert_equal_to ((_last_local_index - _first_local_index), _local_size);

  for (std::size_t i=0; i<local_size(); i++)
    _values[i] = s;

  return *this;
}
Exemplo n.º 8
0
 std::vector<Node*>::const_iterator AbstractLayer::local_end(int_t depth) const
 {
   if (depth >= depth_)
     throw BadProperty("Selected depth out of range");
   index min_nodes_per_layer = local_size()/depth_;
   index last_gid_at_depth = gids_[(depth+1)*(global_size()/depth_)-1];
   std::vector<Node*>::const_iterator iter = local_begin();
   for(iter += (depth+1)*min_nodes_per_layer; iter != local_end(); ++iter) {
     if ((*iter)->get_gid() > last_gid_at_depth)
       break;
   }
   return iter;
 }
Exemplo n.º 9
0
void PetscVector<Real>::localize_to_one (std::vector<Real>& v_local,
					 const processor_id_type pid) const
{
  this->_restore_array();

  PetscErrorCode ierr=0;
  const PetscInt n  = size();
  const PetscInt nl = local_size();
  PetscScalar *values;


  v_local.resize(n);


  // only one processor
  if (n == nl)
    {
      ierr = VecGetArray (_vec, &values);
	     CHKERRABORT(libMesh::COMM_WORLD,ierr);

      for (PetscInt i=0; i<n; i++)
	v_local[i] = static_cast<Real>(values[i]);

      ierr = VecRestoreArray (_vec, &values);
	     CHKERRABORT(libMesh::COMM_WORLD,ierr);
    }

  // otherwise multiple processors
  else
    {
      numeric_index_type ioff = this->first_local_index();
      std::vector<Real> local_values (n, 0.);

      {
	ierr = VecGetArray (_vec, &values);
	       CHKERRABORT(libMesh::COMM_WORLD,ierr);

	for (PetscInt i=0; i<nl; i++)
	  local_values[i+ioff] = static_cast<Real>(values[i]);

	ierr = VecRestoreArray (_vec, &values);
	       CHKERRABORT(libMesh::COMM_WORLD,ierr);
      }


      MPI_Reduce (&local_values[0], &v_local[0], n, MPI_REAL, MPI_SUM,
		  pid, libMesh::COMM_WORLD);
    }
}
Exemplo n.º 10
0
void DistributedVector<T>::localize (std::vector<T>& v_local) const
{
  // This function must be run on all processors at once
  parallel_object_only();

  libmesh_assert (this->initialized());
  libmesh_assert_equal_to (_values.size(), _local_size);
  libmesh_assert_equal_to ((_last_local_index - _first_local_index), _local_size);

  v_local = this->_values;

  this->comm().allgather (v_local);

#ifndef LIBMESH_HAVE_MPI
  libmesh_assert_equal_to (local_size(), size());
#endif
}
Exemplo n.º 11
0
Real DistributedVector<T>::l2_norm () const
{
  // This function must be run on all processors at once
  parallel_object_only();

  libmesh_assert (this->initialized());
  libmesh_assert_equal_to (_values.size(), _local_size);
  libmesh_assert_equal_to ((_last_local_index - _first_local_index), _local_size);

  double local_l2 = 0.;

  for (numeric_index_type i=0; i<local_size(); i++)
    local_l2 += TensorTools::norm_sq(_values[i]);

  this->comm().sum(local_l2);

  return std::sqrt(local_l2);
}
Exemplo n.º 12
0
T DistributedVector<T>::sum () const
{
  // This function must be run on all processors at once
  parallel_object_only();

  libmesh_assert (this->initialized());
  libmesh_assert_equal_to (_values.size(), _local_size);
  libmesh_assert_equal_to ((_last_local_index - _first_local_index), _local_size);

  T local_sum = 0.;

  for (numeric_index_type i=0; i<local_size(); i++)
    local_sum += _values[i];

  this->comm().sum(local_sum);

  return local_sum;
}
Exemplo n.º 13
0
/*
 * MPID_Pack_size
 *
 * NOTE: MPID_Msg_pack_t msgact ignored for reasons stated at top of file
 *
 * NOTE: there's no way for me to report an error condition.
 *       where's the *error_code arg?
 *       in case of an error, i will return pass 0 and print an error
 *       message to stdout.
 */
void MPID_Pack_size(int count,
		    struct MPIR_DATATYPE *datatype,
		    MPID_Msg_pack_t msgact, /* ignored */
		    int *size)
{
    int					tmp_size;

    tmp_size = local_size(count, datatype);
    
    if (tmp_size < 0)
    {
	globus_libc_fprintf(stderr,
	"ERROR: MPID_Pack_size could not calculate pack size, returning 0\n");
	*size = 0;
    } /* endif */

    *size = tmp_size + sizeof(unsigned char);
}
Exemplo n.º 14
0
NumericVector<T>&
DistributedVector<T>::operator = (const std::vector<T>& v)
{
  libmesh_assert (this->initialized());
  libmesh_assert_equal_to (_values.size(), _local_size);
  libmesh_assert_equal_to ((_last_local_index - _first_local_index), _local_size);

  if (v.size() == local_size())
    _values = v;

  else if (v.size() == size())
    for (std::size_t i=first_local_index(); i<last_local_index(); i++)
      _values[i-first_local_index()] = v[i];

  else
    libmesh_error_msg("Incompatible sizes in DistributedVector::operator=");

  return *this;
}
Exemplo n.º 15
0
Real DistributedVector<T>::linfty_norm () const
{
  // This function must be run on all processors at once
  parallel_object_only();

  libmesh_assert (this->initialized());
  libmesh_assert_equal_to (_values.size(), _local_size);
  libmesh_assert_equal_to ((_last_local_index - _first_local_index), _local_size);

  Real local_linfty = 0.;

  for (numeric_index_type i=0; i<local_size(); i++)
    local_linfty  = std::max(local_linfty,
                             static_cast<Real>(std::abs(_values[i]))
                             ); // Note we static_cast so that both
                                // types are the same, as required
                                // by std::max

  this->comm().max(local_linfty);

  return local_linfty;
}
Exemplo n.º 16
0
void PetscVector<Complex>::localize_to_one (std::vector<Complex>& v_local,
					    const processor_id_type pid) const
{
  this->_restore_array();

  PetscErrorCode ierr=0;
  const PetscInt n  = size();
  const PetscInt nl = local_size();
  PetscScalar *values;


  v_local.resize(n);


  for (PetscInt i=0; i<n; i++)
    v_local[i] = 0.;

  // only one processor
  if (n == nl)
    {
      ierr = VecGetArray (_vec, &values);
	     CHKERRABORT(libMesh::COMM_WORLD,ierr);

      for (PetscInt i=0; i<n; i++)
	v_local[i] = static_cast<Complex>(values[i]);

      ierr = VecRestoreArray (_vec, &values);
	     CHKERRABORT(libMesh::COMM_WORLD,ierr);
    }

  // otherwise multiple processors
  else
    {
      numeric_index_type ioff = this->first_local_index();

      /* in here the local values are stored, acting as send buffer for MPI
       * initialize to zero, since we collect using MPI_SUM
       */
      std::vector<Real> real_local_values(n, 0.);
      std::vector<Real> imag_local_values(n, 0.);

      {
	ierr = VecGetArray (_vec, &values);
	       CHKERRABORT(libMesh::COMM_WORLD,ierr);

	// provide my local share to the real and imag buffers
	for (PetscInt i=0; i<nl; i++)
	  {
	    real_local_values[i+ioff] = static_cast<Complex>(values[i]).real();
	    imag_local_values[i+ioff] = static_cast<Complex>(values[i]).imag();
	  }

	ierr = VecRestoreArray (_vec, &values);
	       CHKERRABORT(libMesh::COMM_WORLD,ierr);
      }

      /* have buffers of the real and imaginary part of v_local.
       * Once MPI_Reduce() collected all the real and imaginary
       * parts in these std::vector<Real>, the values can be
       * copied to v_local
       */
      std::vector<Real> real_v_local(n);
      std::vector<Real> imag_v_local(n);

      // collect entries from other proc's in real_v_local, imag_v_local
      MPI_Reduce (&real_local_values[0], &real_v_local[0], n,
		  MPI_REAL, MPI_SUM,
		  pid, libMesh::COMM_WORLD);

      MPI_Reduce (&imag_local_values[0], &imag_v_local[0], n,
		  MPI_REAL, MPI_SUM,
		  pid, libMesh::COMM_WORLD);

      // copy real_v_local and imag_v_local to v_local
      for (PetscInt i=0; i<n; i++)
	v_local[i] = Complex(real_v_local[i], imag_v_local[i]);
    }
}
Exemplo n.º 17
0
/*
 * local_size
 *
 * return -1 when there's problems
 *
 * NOTE: there is one more datatype found in datatype.h ... MPIR_FORT_INT
 *       it has been explained to me by bill that we do not have to
 *       support an explicit case for that type because it is a
 *       synonym for one of the other types we already have a case
 *       statement for (which type it is a synonym for is architecture 
 *       dependent and determined during mpich configuration).
 *
 */
int local_size(int count, struct MPIR_DATATYPE *datatype)
{
    int rc;

    if (count < 0)
    {
	globus_libc_fprintf(stderr,
	    "ERROR: local_size: passed count %d .... must be >= 0\n", 
	    count);
	return -1;
    } /* endif */

    switch(datatype->dte_type)
    {
        case MPIR_CHAR:           rc = globus_dc_sizeof_char(count);      break;
        case MPIR_UCHAR:          rc = globus_dc_sizeof_u_char(count);    break;
	/* MPIR_PACKED are always raw bytes and are never converted */
        case MPIR_PACKED:         rc = count;                             break;
        case MPIR_BYTE:           rc = count ;                            break;
        case MPIR_SHORT:          rc = globus_dc_sizeof_short(count);     break;
        case MPIR_USHORT:         rc = globus_dc_sizeof_u_short(count);   break;
        case MPIR_LOGICAL: /* 'logical' in FORTRAN is always same as 'int' */
        case MPIR_INT:            rc = globus_dc_sizeof_int(count);       break;
        case MPIR_UINT:           rc = globus_dc_sizeof_u_int(count);     break;
        case MPIR_LONG:           rc = globus_dc_sizeof_long(count);      break;
        case MPIR_LONGLONGINT:    rc = globus_dc_sizeof_long_long(count); break;
        case MPIR_ULONG:          rc = globus_dc_sizeof_u_long(count);    break;
        case MPIR_FLOAT:          rc = globus_dc_sizeof_float(count);     break;
        case MPIR_DOUBLE:         rc = globus_dc_sizeof_double(count);    break;
        case MPIR_LONGDOUBLE: /* not supported by Globus */ rc = 0;       break;
        case MPIR_UB:             
        case MPIR_LB:             rc = 0;                                break;
        case MPIR_COMPLEX:        rc = globus_dc_sizeof_float(2*count);  break;
        case MPIR_DOUBLE_COMPLEX: rc = globus_dc_sizeof_double(2*count); break;
        case MPIR_CONTIG:         
            rc = local_size(count*datatype->count, datatype->old_type);
            break;
        case MPIR_VECTOR:         
        case MPIR_HVECTOR:        
	    {
		int tmp = local_size(datatype->blocklen, datatype->old_type);
		rc = (tmp == -1 ? -1 : tmp*count*datatype->count);
	    }
            break;
        case MPIR_INDEXED:        
        case MPIR_HINDEXED:       
	    {
		int i, tmp, tmp2;
		for (rc = tmp = tmp2 = i = 0; 
		    tmp2 != -1 && i < datatype->count; 
			i++)
		{
		    tmp2 = local_size(datatype->blocklens[i], 
					datatype->old_type);
		    if (tmp2 == -1)
			tmp = -1;
		    else
			tmp += tmp2;
		} /* endfor */
		if (tmp != -1)
		    rc = tmp*count;
		else
		    rc = -1;
	    }
	    break;
        case MPIR_STRUCT:
	    {
		int i, tmp, tmp2;
		for (rc = tmp = tmp2 = i = 0; 
		    tmp2 != -1 && i < datatype->count; 
			i++)
		{
		    tmp2 = local_size(datatype->blocklens[i], 
					datatype->old_types[i]);
		    if (tmp2 == -1)
			tmp = -1;
		    else
			tmp += tmp2;
		} /* endfor */
		if (tmp != -1)
		    rc = tmp*count;
		else
		    rc = -1;
	    }
	    break;
        default:        
            globus_libc_fprintf(stderr,
                "ERROR: local_size: encountered unrecognizable MPIR type %d\n", 
		    datatype->dte_type);
            rc = -1;
	    break;
    } /* end switch */

    return rc;

} /* end local_size() */
Exemplo n.º 18
0
///  计算结果存储在矩阵a中
///  n_global: the order of the matrix
static void inv_driver(blas_idx_t n_global)		
{

    auto grid = std::make_shared<blacs_grid_t>();
	
	//// self code
	//n_global = 3;
	//double *aaa = new double(n_global*n_global);
	//for (int i = 0; i < 9; i++)
	//{
	//	aaa[i] = i + 1;
	//}
	//aaa[8] = 10;
	//auto a = block_cyclic_mat_t::createWithArray(grid, n_global, n_global, aaa);


    // Create a NxN random matrix A
    auto a = block_cyclic_mat_t::random(grid, n_global, n_global);        

    // Create a NxN matrix to hold A^{-1}
    auto ai = block_cyclic_mat_t::constant(grid, n_global, n_global);

    // Copy A to A^{-1} since it will be overwritten during factorization
    std::copy_n(a->local_data(), a->local_size(), ai->local_data());

    MPI_Barrier (MPI_COMM_WORLD);

    double t0 = MPI_Wtime();
    
    // Factorize A 
    blas_idx_t ia = 1, ja = 1;
    std::vector<blas_idx_t> ipiv(a->local_rows() + a->row_block_size() + 100);
    blas_idx_t info;

	//含义应该是D-GE-TRF。
	//第一个D表示我们的矩阵是double类型的
	//GE表示我们的矩阵是General类型的
	//TRF表示对矩阵进行三角分解也就是我们通常所说的LU分解。
    pdgetrf_(n_global, n_global, 
        ai->local_data(), ia, ja, ai->descriptor(), 
        ipiv.data(), 
        info);
    assert(info == 0);
    double t_factor = MPI_Wtime() - t0;

    // Compute A^{-1} based on the LU factorization

    // Compute workspace for double and integer work arrays on each process
    blas_idx_t lwork  = 10;
    blas_idx_t liwork = 10;
    std::vector<double>     work (lwork); 
    std::vector<blas_idx_t> iwork(liwork);

    lwork = liwork = -1;   

	// 计算lwork与liwork的值
    pdgetri_(n_global, 
        ai->local_data(), ia, ja, ai->descriptor(), 
        ipiv.data(), 
        work.data(), lwork, iwork.data(), liwork, info);
    assert(info == 0);
    lwork  = static_cast<blas_idx_t>(work[0]);
    liwork = static_cast<size_t>(iwork[0]);
    work.resize(lwork);
    iwork.resize(liwork);

    // Now compute the inverse
    t0 = MPI_Wtime();
    pdgetri_(n_global, 
        ai->local_data(), ia, ja, ai->descriptor(), 
        ipiv.data(), 
        work.data(), lwork, iwork.data(), liwork, info);
    assert(info == 0);
    double t_solve = MPI_Wtime() - t0;

    // Verify that the inverse is correct using A*A^{-1} = I
    auto identity = block_cyclic_mat_t::diagonal(grid, n_global, n_global);

    // Compute I = A * A^{-1} - I and verify that the ||I|| is small    
    char nein = 'N';
    double alpha = 1.0, beta = -1.0;
    pdgemm_(nein, nein, n_global, n_global, n_global, alpha, 
        a->local_data() , ia, ja, a->descriptor(),
        ai->local_data(), ia, ja, ai->descriptor(),
        beta,
        identity->local_data(), ia, ja, identity->descriptor());

    // Compute 1-norm of the result
    char norm='1';
    work.resize(identity->local_cols());
    double err = pdlange_(norm, n_global, n_global, 
        identity->local_data(), ia, ja, identity->descriptor(), work.data());

    double t_total = t_factor + t_solve;
    double t_glob;
    MPI_Reduce(&t_total, &t_glob, 1, MPI_DOUBLE, MPI_MAX, 0, MPI_COMM_WORLD);

    if (grid->iam() == 0) 
    {
        double gflops = getri_flops(n_global)/t_glob/grid->nprocs();
        printf("\n"
            "MATRIX INVERSE BENCHMARK SUMMARY\n"
            "================================\n"
            "N = %d\tNP = %d\tNP_ROW = %d\tNP_COL = %d\n"
            "Time for PxGETRF + PxGETRI = %10.7f seconds\tGflops/Proc = %10.7f, Error = %f\n",
            n_global, grid->nprocs(), grid->nprows(), grid->npcols(), 
            t_glob, gflops, err);fflush(stdout);
    }
}