예제 #1
0
void HPCoarsenTest::add_projection(const System & system,
                                   const Elem * elem,
                                   unsigned int var)
{
  // If we have children, we need to add their projections instead
  if (!elem->active())
    {
      libmesh_assert(!elem->subactive());
      for (unsigned int c = 0; c != elem->n_children(); ++c)
        this->add_projection(system, elem->child(c), var);
      return;
    }

  // The DofMap for this system
  const DofMap & dof_map = system.get_dof_map();

  // The type of finite element to use for this variable
  const FEType & fe_type = dof_map.variable_type (var);

  const FEContinuity cont = fe->get_continuity();

  fe->reinit(elem);

  dof_map.dof_indices(elem, dof_indices, var);

  const unsigned int n_dofs =
    cast_int<unsigned int>(dof_indices.size());

  FEInterface::inverse_map (system.get_mesh().mesh_dimension(),
                            fe_type, coarse, *xyz_values, coarse_qpoints);

  fe_coarse->reinit(coarse, &coarse_qpoints);

  const unsigned int n_coarse_dofs =
    cast_int<unsigned int>(phi_coarse->size());

  if (Uc.size() == 0)
    {
      Ke.resize(n_coarse_dofs, n_coarse_dofs);
      Ke.zero();
      Fe.resize(n_coarse_dofs);
      Fe.zero();
      Uc.resize(n_coarse_dofs);
      Uc.zero();
    }
  libmesh_assert_equal_to (Uc.size(), phi_coarse->size());

  // Loop over the quadrature points
  for (unsigned int qp=0; qp<qrule->n_points(); qp++)
    {
      // The solution value at the quadrature point
      Number val = libMesh::zero;
      Gradient grad;
      Tensor hess;

      for (unsigned int i=0; i != n_dofs; i++)
        {
          dof_id_type dof_num = dof_indices[i];
          val += (*phi)[i][qp] *
            system.current_solution(dof_num);
          if (cont == C_ZERO || cont == C_ONE)
            grad.add_scaled((*dphi)[i][qp],system.current_solution(dof_num));
          // grad += (*dphi)[i][qp] *
          //  system.current_solution(dof_num);
          if (cont == C_ONE)
            hess.add_scaled((*d2phi)[i][qp], system.current_solution(dof_num));
          // hess += (*d2phi)[i][qp] *
          //  system.current_solution(dof_num);
        }

      // The projection matrix and vector
      for (unsigned int i=0; i != Fe.size(); ++i)
        {
          Fe(i) += (*JxW)[qp] *
            (*phi_coarse)[i][qp]*val;
          if (cont == C_ZERO || cont == C_ONE)
            Fe(i) += (*JxW)[qp] *
              (grad*(*dphi_coarse)[i][qp]);
          if (cont == C_ONE)
            Fe(i) += (*JxW)[qp] *
              hess.contract((*d2phi_coarse)[i][qp]);
          // Fe(i) += (*JxW)[qp] *
          //  (*d2phi_coarse)[i][qp].contract(hess);

          for (unsigned int j=0; j != Fe.size(); ++j)
            {
              Ke(i,j) += (*JxW)[qp] *
                (*phi_coarse)[i][qp]*(*phi_coarse)[j][qp];
              if (cont == C_ZERO || cont == C_ONE)
                Ke(i,j) += (*JxW)[qp] *
                  (*dphi_coarse)[i][qp]*(*dphi_coarse)[j][qp];
              if (cont == C_ONE)
                Ke(i,j) += (*JxW)[qp] *
                  ((*d2phi_coarse)[i][qp].contract((*d2phi_coarse)[j][qp]));
            }
        }
    }
}
예제 #2
0
void MeshFunction::hessian (const Point& p,
                            const Real,
                            std::vector<Tensor>& output,
                            const std::set<subdomain_id_type>* subdomain_ids)
{
  libmesh_assert (this->initialized());

  const Elem* element = this->find_element(p,subdomain_ids);

  if (!element)
    {
      output.resize(0);
    }
  else
    {
      // resize the output vector to the number of output values
      // that the user told us
      output.resize (this->_system_vars.size());


      {
        const unsigned int dim = element->dim();


        /*
         * Get local coordinates to feed these into compute_data().
         * Note that the fe_type can safely be used from the 0-variable,
         * since the inverse mapping is the same for all FEFamilies
         */
        const Point mapped_point (FEInterface::inverse_map (dim,
                                                            this->_dof_map.variable_type(0),
                                                            element,
                                                            p));

        std::vector<Point> point_list (1, mapped_point);

        // loop over all vars
        for (unsigned int index=0; index < this->_system_vars.size(); index++)
          {
            /*
             * the data for this variable
             */
            const unsigned int var = _system_vars[index];
            const FEType& fe_type = this->_dof_map.variable_type(var);

            UniquePtr<FEBase> point_fe (FEBase::build(dim, fe_type));
            const std::vector<std::vector<RealTensor> >& d2phi =
              point_fe->get_d2phi();
            point_fe->reinit(element, &point_list);

            // where the solution values for the var-th variable are stored
            std::vector<dof_id_type> dof_indices;
            this->_dof_map.dof_indices (element, dof_indices, var);

            // interpolate the solution
            Tensor hess;

            for (unsigned int i=0; i<dof_indices.size(); i++)
              hess.add_scaled(d2phi[i][0], this->_vector(dof_indices[i]));

            output[index] = hess;
          }
      }
    }

  // all done
  return;
}
예제 #3
0
파일: tensor.cpp 프로젝트: pranjul23/NASA
void Tensor::CreateLinearSystem(vector<double>& B_vec, Matrix& A_matrix, 
						Tensor& X, Tensor& A, Tensor& B,
						vector<int>& mult_modesX, vector<int>& mult_modesA)
{
	// fake multiply x and A together to create B, creating the linear system in the process

	assert(mult_modesX.size() == mult_modesA.size());

	if (X.Order() == mult_modesX.size() && A.Order() == mult_modesA.size())
	{
		assert(0);
	}

	int numMultElements = 1;	
	vector<int> mult_dims(mult_modesX.size(), 0);

	for (int i = 0; i < mult_modesX.size(); ++i) 
	{
		assert(X.Dim(mult_modesX[i]) == A.Dim(mult_modesA[i]));
		mult_dims[i] = X.Dim(mult_modesX[i]);
		numMultElements = numMultElements * mult_dims[i];
	}
	vector<int> mult_offsets;
	ComputeOffsets(mult_offsets, mult_dims);
	int result_order = X.Order() + A.Order() - mult_modesX.size() - mult_modesA.size(); 

	if (result_order == 0)
		assert(0);

	vector<int> result_dims;

	vector<int> free_modesX;
	vector<int> free_modesA;


	// find free indices from X
	for (int i = 0; i < X.Order(); ++i)
	{
		if (!VectorPlus::Contains(mult_modesX, i))
		{
			free_modesX.push_back(i);
		}
	}

	// find free indices from A
	for (int i = 0; i < A.Order(); ++i)
	{
		if (!VectorPlus::Contains(mult_modesA, i))
		{
			free_modesA.push_back(i);
		}
	}
	vector<int> a_mat_dims = VectorPlus::CreatePair(B.NumElements(), X.NumElements());
	A_matrix.Initialize(a_mat_dims);
	B_vec.reserve(B.NumElements());
	// fill in elements from result tensor

	FastIndexer B_indexer(B.Dims());

	for (int n = 0; n < B.NumElements(); ++n)
	{
		B_vec.push_back(B.At(n));

		vector<int>& indices = B_indexer.GetNext();
		vector<int> free_indicesX;
		vector<int> free_indicesA;
	//	B.ComputeIndexArray(indices, n);

		for (int i = 0; i < B.Order(); ++i)
		{
			if (!VectorPlus::Contains(mult_modesX, i))
				free_indicesX.push_back(indices[i]);
			else
				free_indicesA.push_back(indices[i]);
		}

		// sum over elementwise products of mult-mode elements
		double temp_sum = 0;
		FastIndexer mult_indexer(mult_dims);
		for (int k = 0; k < numMultElements; ++k)
		{
			vector<int>& mult_indices = mult_indexer.GetNext();
		//	ComputeIndexArray(mult_indices, mult_offsets, k);

			vector<int> indicesX; 
			vector<int> indicesA;

			MergeIndices(indicesX, mult_modesX, free_modesX, mult_indices, free_indicesX);
			MergeIndices(indicesA, mult_modesA, free_modesA, mult_indices, free_indicesA);

			
			A_matrix.Set(n, X.ComputeIndex(indicesX), A.At(indicesA));
		}
	}
}
void TrigonometricPathVessel::finish( const std::vector<double>& buffer ) {
  // Store the data calculated during mpi loop
  StoreDataVessel::finish( buffer );
  // Get current value of all arguments
  for(unsigned i=0; i<cargs.size(); ++i) cargs[i]=mymap->getArgument(i);

  // Determine closest and second closest point to current position
  double lambda=mymap->getLambda();
  std::vector<double> dist( getNumberOfComponents() ), dist2( getNumberOfComponents() );;
  retrieveSequentialValue( 0, false, dist );
  retrieveSequentialValue( 1, false, dist2 );
  iclose1=getStoreIndex(0); iclose2=getStoreIndex(1);
  double mindist1=dist[0], mindist2=dist2[0];
  if( lambda>0.0 ) {
    mindist1=-std::log( dist[0] ) / lambda;
    mindist2=-std::log( dist2[0] ) / lambda;
  }
  if( mindist2<mindist1 ) {
    double tmp=mindist1; mindist1=mindist2; mindist2=tmp;
    iclose1=getStoreIndex(1); iclose2=getStoreIndex(0);
  }
  for(unsigned i=2; i<getNumberOfStoredValues(); ++i) {
    retrieveSequentialValue( i, false, dist );
    double ndist=dist[0];
    if( lambda>0.0 ) ndist=-std::log( dist[0] ) / lambda;
    if( ndist<mindist1 ) {
      mindist2=mindist1; iclose2=iclose1;
      mindist1=ndist; iclose1=getStoreIndex(i);
    } else if( ndist<mindist2 ) {
      mindist2=ndist; iclose2=getStoreIndex(i);
    }
  }
  // And find third closest point
  int isign = iclose1 - iclose2;
  if( isign>1 ) isign=1; else if( isign<-1 ) isign=-1;
  int iclose3 = iclose1 + isign; double v2v2;
  // We now have to compute vectors connecting the three closest points to the
  // new point
  double v1v1 = (mymap->getReferenceConfiguration( iclose1 ))->calculate( mymap->getPositions(), mymap->getPbc(), mymap->getArguments(), mypack1, true );
  double v3v3 = (mymap->getReferenceConfiguration( iclose2 ))->calculate( mymap->getPositions(), mymap->getPbc(), mymap->getArguments(), mypack3, true );
  if( iclose3<0 || iclose3>=mymap->getFullNumberOfTasks() ) {
    ReferenceConfiguration* conf2=mymap->getReferenceConfiguration( iclose1 );
    v2v2=(mymap->getReferenceConfiguration( iclose2 ))->calc( conf2->getReferencePositions(), mymap->getPbc(), mymap->getArguments(),
         conf2->getReferenceArguments(), mypack2, true );
    (mymap->getReferenceConfiguration( iclose2 ))->extractDisplacementVector( conf2->getReferencePositions(), mymap->getArguments(),
        conf2->getReferenceArguments(), false, projdir );
  } else {
    ReferenceConfiguration* conf2=mymap->getReferenceConfiguration( iclose3 );
    v2v2=(mymap->getReferenceConfiguration( iclose1 ))->calc( conf2->getReferencePositions(), mymap->getPbc(), mymap->getArguments(),
         conf2->getReferenceArguments(), mypack2, true );
    (mymap->getReferenceConfiguration( iclose1 ))->extractDisplacementVector( conf2->getReferencePositions(), mymap->getArguments(),
        conf2->getReferenceArguments(), false, projdir );
  }

  // Stash derivatives of v1v1
  for(unsigned i=0; i<mymap->getNumberOfArguments(); ++i) mypack1_stashd_args[i]=mypack1.getArgumentDerivative(i);
  if( mymap->getNumberOfAtoms()>0 ) {
    ReferenceAtoms* at = dynamic_cast<ReferenceAtoms*>( mymap->getReferenceConfiguration( iclose1 ) );
    const std::vector<double> & displace( at->getDisplace() );
    for(unsigned i=0; i<mymap->getNumberOfAtoms(); ++i) {
      mypack1_stashd_atoms[i]=mypack1.getAtomDerivative(i); mypack1.getAtomsDisplacementVector()[i] /= displace[i];
    }
  }
  // Calculate the dot product of v1 with v2
  double v1v2 = (mymap->getReferenceConfiguration(iclose1))->projectDisplacementOnVector( projdir, mymap->getArguments(), cargs, mypack1 );

  // This computes s value
  double spacing = mymap->getPropertyValue( iclose1, (mymap->property.begin())->first ) - mymap->getPropertyValue( iclose2, (mymap->property.begin())->first );
  double root = sqrt( v1v2*v1v2 - v2v2 * ( v1v1 - v3v3) );
  dx = 0.5 * ( (root + v1v2) / v2v2 - 1.);
  double path_s = mymap->getPropertyValue(iclose1, (mymap->property.begin())->first ) + spacing * dx; sp->set( path_s );
  double fact = 0.25*spacing / v2v2;
  // Derivative of s wrt arguments
  for(unsigned i=0; i<mymap->getNumberOfArguments(); ++i) {
    sp->setDerivative( i, fact*( mypack2.getArgumentDerivative(i) + (v2v2 * (-mypack1_stashd_args[i] + mypack3.getArgumentDerivative(i))
                                 + v1v2*mypack2.getArgumentDerivative(i) )/root ) );
  }
  // Derivative of s wrt atoms
  unsigned narg=mymap->getNumberOfArguments(); Tensor vir; vir.zero(); fact = 0.5*spacing / v2v2;
  if( mymap->getNumberOfAtoms()>0 ) {
    for(unsigned i=0; i<mymap->getNumberOfAtoms(); ++i) {
      Vector ader = fact*(( v1v2*mypack1.getAtomDerivative(i) + 0.5*v2v2*(-mypack1_stashd_atoms[i] + mypack3.getAtomDerivative(i) ) )/root + mypack1.getAtomDerivative(i) );
      for(unsigned k=0; k<3; ++k) sp->setDerivative( narg+3*i+k, ader[k] );
      vir-=Tensor( mymap->getPosition(i), ader );
    }
    // Set the virial
    unsigned nbase=narg+3*mymap->getNumberOfAtoms();
    for(unsigned i=0; i<3; ++i) for(unsigned j=0; j<3; ++j) sp->setDerivative( nbase+3*i+j, vir(i,j) );
  }
  // Now compute z value
  ReferenceConfiguration* conf2=mymap->getReferenceConfiguration( iclose1 );
  double v4v4=(mymap->getReferenceConfiguration( iclose2 ))->calc( conf2->getReferencePositions(), mymap->getPbc(), mymap->getArguments(),
              conf2->getReferenceArguments(), mypack2, true );
  // Extract vector connecting frames
  (mymap->getReferenceConfiguration( iclose2 ))->extractDisplacementVector( conf2->getReferencePositions(), mymap->getArguments(),
      conf2->getReferenceArguments(), false, projdir );
  // Calculate projection of vector on line connnecting frames
  double proj = (mymap->getReferenceConfiguration(iclose1))->projectDisplacementOnVector( projdir, mymap->getArguments(), cargs, mypack1 );
  double path_z = v1v1 + dx*dx*v4v4 - 2*dx*proj;
  // Derivatives for z path
  path_z = sqrt(path_z); zp->set( path_z ); vir.zero();
  for(unsigned i=0; i<mymap->getNumberOfArguments(); ++i) zp->setDerivative( i, (mypack1_stashd_args[i] - 2*dx*mypack1.getArgumentDerivative(i))/(2.0*path_z) );
  // Derivative wrt atoms
  if( mymap->getNumberOfAtoms()>0 ) {
    for(unsigned i=0; i<mymap->getNumberOfAtoms(); ++i) {
      Vector dxder; for(unsigned k=0; k<3; ++k) dxder[k] = ( 2*v4v4*dx - 2*proj )*spacing*sp->getDerivative( narg + 3*i+k );
      Vector ader = ( mypack1_stashd_atoms[i] - 2.*dx*mypack1.getAtomDerivative(i) + dxder )/ (2.0*path_z);
      for(unsigned k=0; k<3; ++k) zp->setDerivative( narg+3*i+k, ader[k] );
      vir-=Tensor( mymap->getPosition(i), ader );
    }
    // Set the virial
    unsigned nbase=narg+3*mymap->getNumberOfAtoms();
    for(unsigned i=0; i<3; ++i) for(unsigned j=0; j<3; ++j) zp->setDerivative( nbase+3*i+j, vir(i,j) );
  }
}
예제 #5
0
std::tuple<Tensor, Tensor, Tensor, Tensor>
embedding_bag_cpu(const Tensor &weight, const Tensor &indices__,
                  const Tensor &offsets__, const bool scale_grad_by_freq,
                  const int64_t mode, bool sparse) {
  auto indices_arg = TensorArg(indices__, "indices__", 1);
  checkScalarType("embedding_bag", indices_arg, kLong);
  auto offsets_arg = TensorArg(offsets__, "offsets__", 1);
  checkScalarType("embedding_bag", offsets_arg, kLong);
  Tensor indices = indices__.contiguous();
  Tensor offsets = offsets__.contiguous();
  auto weight_arg = TensorArg(weight, "weight", 1);
  checkScalarTypes("embedding_bag", weight_arg, {kFloat, kDouble});

  auto bag_size = at::zeros(offsets.sizes(), indices.type());
  make_bag_size(offsets, indices, mode, bag_size);

  // If the last entries are empty, that the last offsets are irrelevant as they
  // won't change anything in the assignment of ID -> bag, but index_add would
  // throw out of bounds error. So to keep it simple we just add one more
  // entry to the end then get rid of it after make_offset2bag.
  auto offset2bag = at::zeros(
     {indices.sizes()[0] + 1}, indices__.type()); // offset2bag = [0 0 0 0 0]

  make_offset2bag(offsets, indices, offset2bag);

  offset2bag.resize_({indices.sizes()[0]});

  auto output = at::zeros({offsets.size(0), weight.size(1)}, weight.type());

  if (mode == MODE_MEAN || mode == MODE_SUM) {
    if (weight.type().scalarType() == kFloat) {
      index_select_add<float>(indices, offset2bag, weight, output);
    } else if (weight.type().scalarType() == kDouble) {
      index_select_add<double>(indices, offset2bag, weight, output);
    }
    auto ret = apply_bag_size(offsets, indices, mode, output, bag_size);
    return std::tuple<Tensor, Tensor, Tensor, Tensor>(ret, offset2bag, bag_size, bag_size);
  } else { // MODE_MAX
    return AT_DISPATCH_FLOATING_TYPES_AND_HALF(
      weight.type(), "embedding_bag_cpu_max", [&]() {
        return embedding_bag_cpu_max<scalar_t>(weight, indices, offset2bag, output, bag_size, offsets);
      }
    );
  }
}
예제 #6
0
inline void MapPlan(Tensor<gpu,dim> _dst, const expr::Plan<E> &plan){
  cuda::MapPlan<Saver>( _dst.FlatTo2D(), plan );
}
예제 #7
0
void Tensor<Dtype>::ShareMem(const Tensor& other) {
  ASSERT(count_ == other.count(), "");
  mem_ = other.mem();
}
예제 #8
0
파일: tensor.cpp 프로젝트: JoeHowse/opencv
void Tensor::copyTo(Tensor& dst)
{
    void* p = map();
    dst.reshape((const char*)p, shape_, format_);
    unMap();
}
예제 #9
0
void MeshFunction::hessian (const Point& p,
                            const Real,
                            std::vector<Tensor>& output)
{
  libmesh_assert (this->initialized());

  /* Ensure that in the case of a master mesh function, the
     out-of-mesh mode is enabled either for both or for none.  This is
     important because the out-of-mesh mode is also communicated to
     the point locator.  Since this is time consuming, enable it only
     in debug mode.  */
#ifdef DEBUG
  if (this->_master != NULL)
    {
      const MeshFunction* master =
        cast_ptr<const MeshFunction*>(this->_master);
      if(_out_of_mesh_mode!=master->_out_of_mesh_mode)
        libmesh_error_msg("ERROR: If you use out-of-mesh-mode in connection with master mesh " \
                          << "functions, you must enable out-of-mesh mode for both the master and the slave mesh function.");
    }
#endif

  // locate the point in the other mesh
  const Elem* element = this->_point_locator->operator()(p);

  // If we have an element, but it's not a local element, then we
  // either need to have a serialized vector or we need to find a
  // local element sharing the same point.
  if (element &&
      (element->processor_id() != this->processor_id()) &&
      _vector.type() != SERIAL)
    {
      // look for a local element containing the point
      std::set<const Elem*> point_neighbors;
      element->find_point_neighbors(p, point_neighbors);
      element = NULL;
      std::set<const Elem*>::const_iterator       it  = point_neighbors.begin();
      const std::set<const Elem*>::const_iterator end = point_neighbors.end();
      for (; it != end; ++it)
        {
          const Elem* elem = *it;
          if (elem->processor_id() == this->processor_id())
            {
              element = elem;
              break;
            }
        }
    }

  if (!element)
    {
      output.resize(0);
    }
  else
    {
      // resize the output vector to the number of output values
      // that the user told us
      output.resize (this->_system_vars.size());


      {
        const unsigned int dim = this->_eqn_systems.get_mesh().mesh_dimension();


        /*
         * Get local coordinates to feed these into compute_data().
         * Note that the fe_type can safely be used from the 0-variable,
         * since the inverse mapping is the same for all FEFamilies
         */
        const Point mapped_point (FEInterface::inverse_map (dim,
                                                            this->_dof_map.variable_type(0),
                                                            element,
                                                            p));

        std::vector<Point> point_list (1, mapped_point);

        // loop over all vars
        for (unsigned int index=0; index < this->_system_vars.size(); index++)
          {
            /*
             * the data for this variable
             */
            const unsigned int var = _system_vars[index];
            const FEType& fe_type = this->_dof_map.variable_type(var);

            AutoPtr<FEBase> point_fe (FEBase::build(dim, fe_type));
            const std::vector<std::vector<RealTensor> >& d2phi =
              point_fe->get_d2phi();
            point_fe->reinit(element, &point_list);

            // where the solution values for the var-th variable are stored
            std::vector<dof_id_type> dof_indices;
            this->_dof_map.dof_indices (element, dof_indices, var);

            // interpolate the solution
            Tensor hess;

            for (unsigned int i=0; i<dof_indices.size(); i++)
              hess.add_scaled(d2phi[i][0], this->_vector(dof_indices[i]));

            output[index] = hess;
          }
      }
    }

  // all done
  return;
}
예제 #10
0
bool
test_fundamentals(Index const dimension)
{
  bool
  passed = true;

  Index const
  number_components = integer_power(dimension, Tensor::ORDER);

  std::vector<Scalar> const
  X = generate_sequence<Scalar>(number_components, 1.0, 1.0);

  // Test constructor with pointer
  Tensor const
  A(dimension, &X[0]);

  // Test copy constructor
  Tensor
  B = A;

  Tensor
  C;

  // Test copy assignment
  C = B - A;

  Scalar
  error = norm_f(C);

  bool const
  copy_assigned = error <= machine_epsilon<Scalar>();
  passed = passed && copy_assigned;

  // Test fill with pointer
  B.fill(&X[0]);

  C = B - A;

  error = norm_f(C);

  bool const
  filled_pointer = error <= machine_epsilon<Scalar>();
  passed = passed && filled_pointer;

  std::vector<Scalar> const
  Y = generate_sequence<Scalar>(number_components, -1.0, -1.0);

  C.fill(&Y[0]);

  // Test increment
  C += A;

  error = norm_f(C);

  bool const
  incremented = error <= machine_epsilon<Scalar>();
  passed = passed && incremented;

  C.fill(&X[0]);

  // Test decrement
  C -= A;

  error = norm_f(C);

  bool const
  decremented = error <= machine_epsilon<Scalar>();
  passed = passed && decremented;

#ifdef HAVE_INTREPID_KOKKOSCORE
  //test Tensor fill and create for Kokkos data types
  Kokkos::View<Scalar *, Kokkos::DefaultExecutionSpace>
  X1("X1_kokkos", dimension);

  Kokkos::View<Scalar **, Kokkos::DefaultExecutionSpace>
  X2("X2_kokkos", dimension, dimension);

  Kokkos::View<Scalar ***, Kokkos::DefaultExecutionSpace>
  X3("X3_kokkos", dimension, dimension, dimension);

  Kokkos::View<Scalar ****, Kokkos::DefaultExecutionSpace>
  X4("X4_kokkos", dimension, dimension, dimension, dimension);

  Kokkos::deep_copy(X1, 3.1);
  Kokkos::deep_copy(X2, 3.2);
  Kokkos::deep_copy(X3, 3.3);
  Kokkos::deep_copy(X4, 3.4);

  Tensor
  Z(dimension); //(X1_k,0);

  Index
  rank = 0;

  Index
  temp = number_components;

  while (temp != 1) {
    temp = temp / dimension;
    rank = rank + 1;
    assert(temp > 0);
  }

  switch (rank) {
  default:
    assert(false);
    break;

  case 1:
    Z.fill(X1, 0);
    break;

  case 2:
    Z.fill(X2, 0, 0);
    break;

  case 3:
    Z.fill(X3, 0, 0, 0);
    break;

  case 4:
    Z.fill(X4, 0, 0, 0, 0);
    break;
  }

  // Test copy constructor.
  Tensor const
  U = Z;

  // Test copy assignment.
  Tensor
  V;

  V = U - Z;

  error = norm_f(V);

  bool const
  tensor_create_from_1d_kokkos = error <= machine_epsilon<Scalar>();
  passed = passed && tensor_create_from_1d_kokkos;
#endif 

  return passed;
}
예제 #11
0
#define CATCH_CONFIG_MAIN
#include "catch.hpp"

#include "ATen/ATen.h"
#include "ATen/DLConvertor.h"

#include <iostream>
#include <string.h>
#include <sstream>
#include "test_seed.h"

using namespace at;

TEST_CASE( "parallel", "[cpu]" ) {

  manual_seed(123, at::Backend::CPU);
  set_num_threads(1);

  Tensor a = rand(CPU(at::kFloat), {1,3});
  a[0][0] = 1;
  a[0][1] = 0;
  a[0][2] = 0;
  Tensor as = rand(CPU(at::kFloat), {3});
  as[0] = 1;
  as[1] = 0;
  as[2] = 0;
  REQUIRE(a.sum(0).equal(as));
}

예제 #12
0
Tensor upsample_nearest1d_cpu(const Tensor& input, IntArrayRef output_size) {
  auto output = at::empty({0}, input.options());
  upsample_nearest1d_out_cpu_template(output, input, output_size);
  return output;
}
예제 #13
0
static void test_resize()
{
  Tensor<int, 3> epsilon;
  epsilon.resize(2,3,7);
  VERIFY_IS_EQUAL(epsilon.dimension(0), 2);
  VERIFY_IS_EQUAL(epsilon.dimension(1), 3);
  VERIFY_IS_EQUAL(epsilon.dimension(2), 7);
  VERIFY_IS_EQUAL(epsilon.dimensions().TotalSize(), 2*3*7);

  const int* old_data = epsilon.data();
  epsilon.resize(3,2,7);
  VERIFY_IS_EQUAL(epsilon.dimension(0), 3);
  VERIFY_IS_EQUAL(epsilon.dimension(1), 2);
  VERIFY_IS_EQUAL(epsilon.dimension(2), 7);
  VERIFY_IS_EQUAL(epsilon.dimensions().TotalSize(), 2*3*7);
  VERIFY_IS_EQUAL(epsilon.data(), old_data);

  epsilon.resize(3,5,7);
  VERIFY_IS_EQUAL(epsilon.dimension(0), 3);
  VERIFY_IS_EQUAL(epsilon.dimension(1), 5);
  VERIFY_IS_EQUAL(epsilon.dimension(2), 7);
  VERIFY_IS_EQUAL(epsilon.dimensions().TotalSize(), 3*5*7);
  VERIFY_IS_NOT_EQUAL(epsilon.data(), old_data);
}
예제 #14
0
파일: ERMSD.cpp 프로젝트: JFDama/plumed2
void ERMSD::calcMat(const std::vector<Vector> & positions,const Pbc& pbc, std::vector<Vector4d> &mat, std::vector<TensorGeneric<4,3> > &Gderi) {

  std::vector<Vector3d> pos;
  pos.resize(3*nresidues);

  std::vector<Tensor3d> deri;
  deri.resize(nresidues*9);

  std::vector<Vector> centers;
  centers.resize(nresidues);

  unsigned idx_deri = 0;

  Tensor da_dxa = (2./3.)*Tensor::identity();
  Tensor da_dxb = -(1./3.)*Tensor::identity();
  Tensor da_dxc = -(1./3.)*Tensor::identity();

  Tensor db_dxa = -(1./3.)*Tensor::identity();
  Tensor db_dxb = (2./3.)*Tensor::identity();
  Tensor db_dxc = -(1./3.)*Tensor::identity();

  // Form factors - should this be somewhere else?

  double w = 1./3.;
  Vector form_factor = Vector(2.0,2.0,1.0/0.3);

  for(unsigned res_idx=0; res_idx<natoms/3; res_idx++) {


    const unsigned at_idx = 3*res_idx;
    //center
    for (unsigned j=0; j<3; j++) {
      centers[res_idx] += w*positions[at_idx+j];
    }

    Vector3d a = delta(centers[res_idx],positions[at_idx]);
    Vector3d b = delta(centers[res_idx],positions[at_idx+1]);
    Vector3d d = crossProduct(a,b);
    double ianorm = 1./a.modulo();
    double idnorm = 1./d.modulo();

    // X vector: COM-C2
    pos[at_idx] = a*ianorm;
    // Z versor: C2 x (COM-C4/C6)
    pos[at_idx+2] = d*idnorm;
    // Y versor: Z x Y
    pos[at_idx+1] = crossProduct(pos[at_idx+2],pos[at_idx]);

    // Derivatives ////////
    Tensor3d t1 = ianorm*(Tensor::identity()-extProduct(pos[at_idx],pos[at_idx]));
    // dv1/dxa
    deri[idx_deri] = (2./3. )*t1;
    // dv1/dxb
    deri[idx_deri+3] = -(1./3.)*t1;
    // dv1/dxc
    deri[idx_deri+6] = -(1./3.)*t1;

    Tensor dd_dxa =  VcrossTensor(a,db_dxa) -VcrossTensor(b,da_dxa);
    Tensor dd_dxb =  VcrossTensor(a,db_dxb)-VcrossTensor(b,da_dxb);
    Tensor dd_dxc =  VcrossTensor(a,db_dxc)-VcrossTensor(b,da_dxc);

    // dv3/dxa
    deri[idx_deri+2] = deriNorm(d,dd_dxa);
    // dv3/dxb
    deri[idx_deri+5] = deriNorm(d,dd_dxb);
    // dv3/dxc
    deri[idx_deri+8] = deriNorm(d,dd_dxc);

    // dv2/dxa = dv3/dxa cross v1 + v3 cross dv1/dxa
    deri[idx_deri+1] =  (VcrossTensor(deri[idx_deri+2],pos[at_idx]) + \
                         VcrossTensor(pos[at_idx+2],deri[idx_deri]));
    // dv2/dxb
    deri[idx_deri+4] =  (VcrossTensor(deri[idx_deri+5],pos[at_idx]) + \
                         VcrossTensor(pos[at_idx+2],deri[idx_deri+3]));
    // dv2/dxc
    deri[idx_deri+7] =  (VcrossTensor(deri[idx_deri+8],pos[at_idx]) + \
                         VcrossTensor(pos[at_idx+2],deri[idx_deri+6]));

    idx_deri += 9;
    // End derivatives ///////

  }


  // Initialization (unnecessary?)
  for (unsigned i1=0; i1<nresidues*nresidues; i1++) {
    for (unsigned i2=0; i2<4; i2++) {
      mat[i1][i2] = 0.0;
    }
  }

  double maxdist = cutoff/form_factor[0];
  double gamma = pi/cutoff;
  unsigned idx;
  unsigned idx1 = 0;
  // Calculate mat
  for (unsigned i=0; i<nresidues; i++) {
    for (unsigned j=0; j<nresidues; j++) {

      // skip i==j
      if(inPair(i,j) and i != j) {
        //if(i!=j){


        // Calculate normal distance first
        Vector diff = delta(centers[i],centers[j]);
        double d1 = diff.modulo();
        //std::cout << inPair(i,j) << " " << i << " " << j << " "<< d1 <<"\n";
        //std::cout << inPair(i,j) << " " << i << " " << j << " "<< d1 <<"\n";
        if(d1<maxdist) {

          // calculate r_tilde_ij
          Vector3d rtilde;
          for (unsigned k=0; k<3; k++) {
            for (unsigned l=0; l<3; l++) {
              rtilde[l] += pos[3*i+l][k]*diff[k]*form_factor[l];
            }
          }
          double rtilde_norm = rtilde.modulo();

          double irnorm = 1./rtilde_norm;

          // ellipsoidal cutoff
          if(rtilde_norm < cutoff) {
            idx = i*nresidues + j;
            //std::cout << i << " " << j << " " << rtilde_norm << " " << idx <<"\n";


            // fill 4d matrix
            double dummy = sin(gamma*rtilde_norm)/(rtilde_norm*gamma);
            mat[idx][0] = dummy*rtilde[0];
            mat[idx][1] = dummy*rtilde[1];
            mat[idx][2] = dummy*rtilde[2];
            mat[idx][3] = (1.+ cos(gamma*rtilde_norm))/gamma;

            // Derivative (drtilde_dx)
            std::vector<Tensor3d> drtilde_dx;
            drtilde_dx.resize(6);
            unsigned pos_idx = 3*i;
            unsigned deri_idx = 9*i;
            for (unsigned at=0; at<3; at++) {
              for (unsigned l=0; l<3; l++) {
                Vector3d rvec = form_factor[l]*((pos[pos_idx+l])/3.);
                Vector3d vvec = form_factor[l]*(matmul(deri[deri_idx+3*at+l],diff));
                drtilde_dx[at].setRow(l,vvec-rvec);
                drtilde_dx[at+3].setRow(l,rvec);
              }
            }

            //std::vector<TensorGeneric<4,3> > dG_dx;
            //dG_dx.resize(6);

            double dummy1 = (cos(gamma*rtilde_norm) - dummy);

            idx1 = i*nresidues*6 + j*6;

            for (unsigned l=0; l<6; l++) {
              //std::cout << i << " " << j << " " << idx1 << " " << idx1+l << "\n";

              // components 1,2,3
              // sin(gamma*|rtilde|)/gamma*|rtilde|*d_rtilde +
              // + ((d_rtilde*r_tilde/r_tilde^2) out r_tilde)*
              // (cos(gamma*|rtilde| - sin(gamma*|rtilde|)/gamma*|rtilde|))
              Vector3d rdr = matmul(rtilde,drtilde_dx[l]);
              Tensor tt = dummy*drtilde_dx[l] + (dummy1*irnorm*irnorm)*Tensor(rtilde,rdr);
              for (unsigned m=0; m<3; m++) {
                // Transpose here
                //dG_dx[l].setRow(m,tt.getRow(m));
                Gderi[idx1+l].setRow(m,tt.getRow(m));
              }
              // component 4
              // - sin(gamma*|rtilde|)/|rtilde|*(r_tilde*d_rtilde)
              //dG_dx[l].setRow(3,-dummy*gamma*rdr);
              Gderi[idx1+l].setRow(3,-dummy*gamma*rdr);
            }




          }
        }
      }

    }
  }

}
예제 #15
0
void Tensor<XPU, DT>::blas_vsqrt(const Tensor<XPU, DT> &in)
{ const int N = size();  CHECK_EQ (N, in.size());
  unary_vexpr_kernel<opsqrt<DT>><<<cuda_get_blocks(N), CUDA_NUM_THREADS, 0, get_calc_stream()>>>
    (N, in.dptr, dptr);
  cuda_sync_check ("cublas_vsqrt");
};
예제 #16
0
TEST(TestScalar, TestScalar) {
  manual_seed(123);

  Scalar what = 257;
  Scalar bar = 3.0;
  Half h = bar.toHalf();
  Scalar h2 = h;
  cout << "H2: " << h2.toDouble() << " " << what.toFloat() << " "
       << bar.toDouble() << " " << what.isIntegral() << "\n";
  Generator& gen = at::globalContext().defaultGenerator(at::kCPU);
  ASSERT_NO_THROW(gen.seed());
  auto&& C = at::globalContext();
  if (at::hasCUDA()) {
    auto t2 = zeros({4, 4}, at::kCUDA);
    cout << &t2 << "\n";
  }
  auto t = ones({4, 4});

  auto wha2 = zeros({4, 4}).add(t).sum();
  ASSERT_EQ(wha2.item<double>(), 16.0);

  ASSERT_EQ(t.sizes()[0], 4);
  ASSERT_EQ(t.sizes()[1], 4);
  ASSERT_EQ(t.strides()[0], 4);
  ASSERT_EQ(t.strides()[1], 1);

  TensorOptions options = dtype(kFloat);
  Tensor x = randn({1, 10}, options);
  Tensor prev_h = randn({1, 20}, options);
  Tensor W_h = randn({20, 20}, options);
  Tensor W_x = randn({20, 10}, options);
  Tensor i2h = at::mm(W_x, x.t());
  Tensor h2h = at::mm(W_h, prev_h.t());
  Tensor next_h = i2h.add(h2h);
  next_h = next_h.tanh();

  ASSERT_ANY_THROW(Tensor{}.item());

  test_overflow();

  if (at::hasCUDA()) {
    auto r = next_h.to(at::Device(kCUDA), kFloat, /*non_blocking=*/ false, /*copy=*/ true);
    ASSERT_TRUE(r.to(at::Device(kCPU), kFloat, /*non_blocking=*/ false, /*copy=*/ true).equal(next_h));
  }
  ASSERT_NO_THROW(randn({10, 10, 2}, options));

  // check Scalar.toTensor on Scalars backed by different data types
  ASSERT_EQ(scalar_to_tensor(bar).scalar_type(), kDouble);
  ASSERT_EQ(scalar_to_tensor(what).scalar_type(), kLong);
  ASSERT_EQ(scalar_to_tensor(ones({}).item()).scalar_type(), kDouble);

  if (x.scalar_type() != ScalarType::Half) {
    AT_DISPATCH_ALL_TYPES(x.scalar_type(), "foo", [&] {
      scalar_t s = 1;
      std::stringstream ss;
      ASSERT_NO_THROW(
          ss << "hello, dispatch" << x.dispatch_type().toString() << s << "\n");
      auto data = (scalar_t*)x.data_ptr();
      (void)data;
    });
  }

  // test direct C-scalar type conversions
  {
    auto x = ones({1, 2}, options);
    ASSERT_ANY_THROW(x.item<float>());
  }
  auto float_one = ones({}, options);
  ASSERT_EQ(float_one.item<float>(), 1);
  ASSERT_EQ(float_one.item<int32_t>(), 1);
  ASSERT_EQ(float_one.item<at::Half>(), 1);
}
예제 #17
0
void Tensor<XPU, DT>::blas_vdiv (const Tensor<XPU, DT> &A, const Tensor<XPU, DT> &B)
{ const int N = size();  CHECK_EQ (A.size(), B.size());
  binary_vexpr_kernel<opdiv <DT>><<<cuda_get_blocks(N), CUDA_NUM_THREADS, 0, get_calc_stream()>>>
    (N, A.dptr, B.dptr, dptr);
  cuda_sync_check ("cublas_vdiv");
};
void SpatialDivisiveNormalization::init(std::shared_ptr<TorchData> input) {
  RASSERT(input->type() == TorchDataType::TENSOR_DATA);
  Tensor<float>* in = TO_TENSOR_PTR(input.get());

  RASSERT(in->dim() == 3);

  if (output != nullptr) {
    if (!in->isSameSizeAs(*TO_TENSOR_PTR(output.get()))) {
      // Input dimension has changed!
      cleanup();
    }
  }

  if (output == nullptr) {
    output.reset(new Tensor<float>(in->dim(), in->size()));
    std_pass1_.reset(new Tensor<float>(in->dim(), in->size()));
    std_pass2_.reset(new Tensor<float>(in->dim(), in->size()));
  }
  if (kernel_norm_ == nullptr) {
    bool onedim_kernel = kernel_->dim() == 1;
    const float n_feats = (float)in->size()[2];

    // Clone and normalize the input kernel
    kernel_norm_.reset(Tensor<float>::clone(*kernel_));
    float sum = Tensor<float>::slowSum(*kernel_norm_);
    float div_val = onedim_kernel ? (sum * sqrtf(n_feats)) : (sum * n_feats);
    Tensor<float>::div(*kernel_norm_, div_val);
  }
  if (std_coef_ == nullptr) {
    uint32_t std_coeff_size[2];
    std_coeff_size[0] = TO_TENSOR_PTR(output.get())->size()[0];
    std_coeff_size[1] = TO_TENSOR_PTR(output.get())->size()[1];
    std_coef_.reset(new Tensor<float>(2, std_coeff_size));

    std::unique_ptr<float[]> std_coef_cpu(new float[std_coef_->nelems()]);
    std::unique_ptr<float[]> kernel_norm_cpu(new float[kernel_norm_->nelems()]);
    kernel_norm_->getData(kernel_norm_cpu.get());
    bool onedim_kernel = kernel_->dim() == 1;

    // Filter an image of all 1 values to create the normalization constants
    // See norm_test.lua for proof that this works as well as:
    // https://github.com/andresy/torch/blob/master/extra/nn/SpatialDivisiveNormalization.lua
    int32_t n_feats = TO_TENSOR_PTR(output.get())->size()[2];
    int32_t height = TO_TENSOR_PTR(output.get())->size()[1];
    int32_t width = TO_TENSOR_PTR(output.get())->size()[0];
    if (onedim_kernel) {
      // 1D case - The filter is seperable, but we'll just do the dumb 2D
      // version since we only do this once on startup.  --> O(n * m)
      int32_t kernel_size = kernel_norm_->size()[0];
      int32_t filt_rad = (kernel_size - 1) / 2;
      for (int32_t v = 0; v < height; v++) {
        for (int32_t u = 0; u < width; u++) {
          float tmp = 0.0f;
          for (int32_t v_filt = -filt_rad; v_filt <= filt_rad; v_filt++) {
            for (int32_t u_filt = -filt_rad; u_filt <= filt_rad; u_filt++) {
              int32_t u_in = u + u_filt;
              int32_t v_in = v + v_filt;
              if (u_in >= 0 && u_in < width && v_in >= 0 && v_in < height) {
                // Pixel is inside --> We'll effectively clamp zeros elsewhere.
                tmp += (kernel_norm_cpu[v_filt + filt_rad] *
                        kernel_norm_cpu[u_filt + filt_rad]);
              }
            }
          }
          std_coef_cpu[v * width + u] = tmp / n_feats;
        }
      }
    } else {
      // 2D case
      int32_t kernel_size_u = kernel_norm_->size()[0];
      int32_t kernel_size_v = kernel_norm_->size()[1];
      int32_t filt_rad_u = (kernel_size_u - 1) / 2;
      int32_t filt_rad_v = (kernel_size_v - 1) / 2;
      for (int32_t v = 0; v < height; v++) {
        for (int32_t u = 0; u < width; u++) {
          float tmp = 0.0f;
          for (int32_t v_filt = -filt_rad_v; v_filt <= filt_rad_v; v_filt++) {
            for (int32_t u_filt = -filt_rad_u; u_filt <= filt_rad_u; u_filt++) {
              int32_t u_in = u + u_filt;
              int32_t v_in = v + v_filt;
              if (u_in >= 0 && u_in < width && v_in >= 0 && v_in < height) {
                // Pixel is inside --> We'll effectively clamp zeros elsewhere.
                tmp += kernel_norm_cpu[(v_filt + filt_rad_v) * kernel_size_u +
                                       (u_filt + filt_rad_u)];
              }
            }
          }
          std_coef_cpu[v * width + u] = tmp / n_feats;
        }
      }
    }
    std_coef_->setData(std_coef_cpu.get());
  }
  if (std_ == nullptr) {
    uint32_t std_coeff_size[2];
    std_coeff_size[0] = TO_TENSOR_PTR(output.get())->size()[0];
    std_coeff_size[1] = TO_TENSOR_PTR(output.get())->size()[1];
    std_.reset(new Tensor<float>(2, std_coeff_size));
  }
}
예제 #19
0
void Tensor<Dtype>::ReshapeLike(const Tensor<Dtype>& other) {
  Reshape(other.shape());
}
void SpatialDivisiveNormalization::forwardProp(
    std::shared_ptr<TorchData> input) {
  init(input);
  bool onedim_kernel = kernel_->dim() == 1;

  Tensor<float>* in = TO_TENSOR_PTR(input.get());
  Tensor<float>* out = TO_TENSOR_PTR(output.get());
  if (onedim_kernel) {
    int32_t filt_rad = ((int32_t)kernel_norm_->size()[0] - 1) / 2;

    // Perform horizontal filter pass
    cl_context->useKernelCStr(kSpatialDivisiveNormalizationKernel,
                          "SpatialDivisiveNormalizationHoriz");
    cl_context->setArg(0, in->storage());
    cl_context->setArg(1, std_pass1_->storage());
    cl_context->setArg(2, kernel_norm_->storage());
    cl_context->setArg(3, filt_rad);
    cl_context->runKernel(jtorch::deviceid, std_pass1_->dim(),
                          std_pass1_->size(), false);

    // Perform vertical filter pass
    cl_context->useKernelCStr(kSpatialDivisiveNormalizationKernel,
                          "SpatialDivisiveNormalizationVert");
    cl_context->setArg(0, std_pass1_->storage());
    cl_context->setArg(1, std_pass2_->storage());
    cl_context->setArg(2, kernel_norm_->storage());
    cl_context->setArg(3, filt_rad);
    cl_context->runKernel(jtorch::deviceid, std_pass2_->dim(),
                          std_pass2_->size(), false);
  } else {
    int32_t filt_rad_u = ((int32_t)kernel_norm_->size()[0] - 1) / 2;
    int32_t filt_rad_v = ((int32_t)kernel_norm_->size()[1] - 1) / 2;

    // Perform vertical filter pass
    cl_context->useKernelCStr(kSpatialDivisiveNormalizationKernel,
                          "SpatialDivisiveNormalization2D");
    cl_context->setArg(0, in->storage());
    cl_context->setArg(1, std_pass2_->storage());
    cl_context->setArg(2, kernel_norm_->storage());
    cl_context->setArg(3, filt_rad_u);
    cl_context->setArg(4, filt_rad_v);
    cl_context->runKernel(jtorch::deviceid, std_pass2_->dim(),
                          std_pass2_->size(), false);
  }

  // Perform accumulation and division pass
  cl_context->useKernelCStr(kSpatialDivisiveNormalizationKernel,
                        "SpatialDivisiveNormalizationAccumDiv");
  cl_context->setArg(0, std_pass2_->storage());
  cl_context->setArg(1, std_->storage());
  cl_context->setArg(2, std_coef_->storage());
  cl_context->setArg(3, (int)out->size()[2]);
  cl_context->setArg(4, threshold_);
  cl_context->runKernel(jtorch::deviceid, std_->dim(), std_->size(), false);

  // Perform normalization pass
  cl_context->useKernelCStr(kSpatialDivisiveNormalizationKernel,
                        "SpatialDivisiveNormalization");
  cl_context->setArg(0, in->storage());
  cl_context->setArg(1, out->storage());
  cl_context->setArg(2, std_->storage());
  cl_context->runKernel(jtorch::deviceid, out->dim(), out->size(), false);
}
예제 #21
0
  RTensor
  do_block_svd(const Tensor &A, Tensor *pU, Tensor *pVT, bool economic)
  {
    index rows = A.rows();
    index cols = A.columns();
    if (rows != cols && !economic)
      return svd(A, pU, pVT, economic);
    index minrc = std::min(rows, cols);

    index nblocks;
    Indices *block_rows, *block_cols;
    if (!find_blocks<Tensor>(A, &nblocks, &block_rows, &block_cols)) {
      return svd(A, pU, pVT, economic);
    }

    if ((nblocks == 1) &&
	(block_rows[0].size() >= rows/2) &&
	(block_cols[0].size() >= cols/2)) {
      RTensor s = svd(A, pU, pVT, economic);
      delete[] block_rows;
      delete[] block_cols;
      return s;
    }

    RTensor s(minrc);
    s.fill_with_zeros();
    if (pU) {
      *pU = Tensor::zeros(rows, economic? minrc : rows);
    }
    if (pVT) {
      *pVT = Tensor::zeros(economic? minrc : cols, cols);
    }

    RTensor stemp;
    Tensor Utemp, Vtemp;
    Tensor *pUtemp = pU? &Utemp : 0;
    Tensor *pVtemp = pVT? &Vtemp : 0;
    for (index b = 0, sndx = 0; b < nblocks; b++) {
      Tensor m = A(range(block_rows[b]), range(block_cols[b]));
      index n = m.size();
      if (m.size() > 1) {
	stemp = svd(m, pUtemp, pVtemp, economic);
        index slast = sndx + stemp.size() - 1;
	s.at(range(sndx, slast)) = stemp;
	if (pU) {
          (*pU).at(range(block_rows[b]), range(sndx, slast)) = Utemp;
	}
	if (pVT) {
          (*pVT).at(range(sndx, slast), range(block_cols[b])) = Vtemp;
	}
        sndx = slast + 1;
      } else {
	index row = block_rows[b][0];
	index col = block_cols[b][0];
	double aux = abs(m[0]);
	s.at(sndx) = aux;
	if (pU) {
	  (*pU).at(row,sndx) = 1.0;
	}
	if (pVT) {
	  (*pVT).at(sndx,col) = m[0]/aux;
	}
	++sndx;
      }
    }
    delete[] block_rows;
    delete[] block_cols;

    Indices ndx = sort_indices(s, true);
    s = s(range(ndx));
    if (pU)
      *pU = (*pU)(range(), range(ndx));
    if (pVT)
      *pVT = (*pVT)(range(ndx), range());
    return s;
  }
예제 #22
0
std::tuple<Tensor, Tensor>
_unique_cpu(const Tensor& self, const bool sorted, const bool return_inverse) {
  return AT_DISPATCH_ALL_TYPES(self.type(), "unique", [&] {
    return _unique_cpu_template<scalar_t>(self, sorted, return_inverse);
  });
}
void SpatialBatchNormalization::init(std::shared_ptr<TorchData> input) {
  RASSERT(input->type() == TorchDataType::TENSOR_DATA);

  Tensor<float>* in = TO_TENSOR_PTR(input.get());
  Tensor<float>* out = TO_TENSOR_PTR(output.get());

  RASSERT(in->dim() >= 3);
  RASSERT(in->size()[2] == nfeats_);

  if (output != nullptr && in->dim() != out->dim()) {
    output = nullptr;
  }

  // Check that the input and output size are the same.
  if (output != nullptr) {
    if (in->size()[0] != out->size()[0] ||
        in->size()[1] != out->size()[1] ||
        in->size()[2] != out->size()[2]) {
      output = nullptr;
    }
  }

  if (output == nullptr) {
    output.reset(new Tensor<float>(in->dim(), in->size()));
  }
}
예제 #24
0
static void test_simple_reductions() {
  Tensor<float, 4, DataLayout> tensor(2, 3, 5, 7);
  tensor.setRandom();
  array<ptrdiff_t, 2> reduction_axis2;
  reduction_axis2[0] = 1;
  reduction_axis2[1] = 3;

  Tensor<float, 2, DataLayout> result = tensor.sum(reduction_axis2);
  VERIFY_IS_EQUAL(result.dimension(0), 2);
  VERIFY_IS_EQUAL(result.dimension(1), 5);
  for (int i = 0; i < 2; ++i) {
    for (int j = 0; j < 5; ++j) {
      float sum = 0.0f;
      for (int k = 0; k < 3; ++k) {
        for (int l = 0; l < 7; ++l) {
          sum += tensor(i, k, j, l);
        }
      }
      VERIFY_IS_APPROX(result(i, j), sum);
    }
  }

  {
    Tensor<float, 0, DataLayout> sum1 = tensor.sum();
    VERIFY_IS_EQUAL(sum1.rank(), 0);

    array<ptrdiff_t, 4> reduction_axis4;
    reduction_axis4[0] = 0;
    reduction_axis4[1] = 1;
    reduction_axis4[2] = 2;
    reduction_axis4[3] = 3;
    Tensor<float, 0, DataLayout> sum2 = tensor.sum(reduction_axis4);
    VERIFY_IS_EQUAL(sum2.rank(), 0);

    VERIFY_IS_APPROX(sum1(), sum2());
  }

  reduction_axis2[0] = 0;
  reduction_axis2[1] = 2;
  result = tensor.prod(reduction_axis2);
  VERIFY_IS_EQUAL(result.dimension(0), 3);
  VERIFY_IS_EQUAL(result.dimension(1), 7);
  for (int i = 0; i < 3; ++i) {
    for (int j = 0; j < 7; ++j) {
      float prod = 1.0f;
      for (int k = 0; k < 2; ++k) {
        for (int l = 0; l < 5; ++l) {
          prod *= tensor(k, i, l, j);
        }
      }
      VERIFY_IS_APPROX(result(i, j), prod);
    }
  }

  {
    Tensor<float, 0, DataLayout> prod1 = tensor.prod();
    VERIFY_IS_EQUAL(prod1.rank(), 0);

    array<ptrdiff_t, 4> reduction_axis4;
    reduction_axis4[0] = 0;
    reduction_axis4[1] = 1;
    reduction_axis4[2] = 2;
    reduction_axis4[3] = 3;
    Tensor<float, 0, DataLayout> prod2 = tensor.prod(reduction_axis4);
    VERIFY_IS_EQUAL(prod2.rank(), 0);

    VERIFY_IS_APPROX(prod1(), prod2());
  }

  reduction_axis2[0] = 0;
  reduction_axis2[1] = 2;
  result = tensor.maximum(reduction_axis2);
  VERIFY_IS_EQUAL(result.dimension(0), 3);
  VERIFY_IS_EQUAL(result.dimension(1), 7);
  for (int i = 0; i < 3; ++i) {
    for (int j = 0; j < 7; ++j) {
      float max_val = std::numeric_limits<float>::lowest();
      for (int k = 0; k < 2; ++k) {
        for (int l = 0; l < 5; ++l) {
          max_val = (std::max)(max_val, tensor(k, i, l, j));
        }
      }
      VERIFY_IS_APPROX(result(i, j), max_val);
    }
  }

  {
    Tensor<float, 0, DataLayout> max1 = tensor.maximum();
    VERIFY_IS_EQUAL(max1.rank(), 0);

    array<ptrdiff_t, 4> reduction_axis4;
    reduction_axis4[0] = 0;
    reduction_axis4[1] = 1;
    reduction_axis4[2] = 2;
    reduction_axis4[3] = 3;
    Tensor<float, 0, DataLayout> max2 = tensor.maximum(reduction_axis4);
    VERIFY_IS_EQUAL(max2.rank(), 0);

    VERIFY_IS_APPROX(max1(), max2());
  }

  reduction_axis2[0] = 0;
  reduction_axis2[1] = 1;
  result = tensor.minimum(reduction_axis2);
  VERIFY_IS_EQUAL(result.dimension(0), 5);
  VERIFY_IS_EQUAL(result.dimension(1), 7);
  for (int i = 0; i < 5; ++i) {
    for (int j = 0; j < 7; ++j) {
      float min_val = (std::numeric_limits<float>::max)();
      for (int k = 0; k < 2; ++k) {
        for (int l = 0; l < 3; ++l) {
          min_val = (std::min)(min_val, tensor(k, l, i, j));
        }
      }
      VERIFY_IS_APPROX(result(i, j), min_val);
    }
  }

  {
    Tensor<float, 0, DataLayout> min1 = tensor.minimum();
    VERIFY_IS_EQUAL(min1.rank(), 0);

    array<ptrdiff_t, 4> reduction_axis4;
    reduction_axis4[0] = 0;
    reduction_axis4[1] = 1;
    reduction_axis4[2] = 2;
    reduction_axis4[3] = 3;
    Tensor<float, 0, DataLayout> min2 = tensor.minimum(reduction_axis4);
    VERIFY_IS_EQUAL(min2.rank(), 0);

    VERIFY_IS_APPROX(min1(), min2());
  }

  reduction_axis2[0] = 0;
  reduction_axis2[1] = 1;
  result = tensor.mean(reduction_axis2);
  VERIFY_IS_EQUAL(result.dimension(0), 5);
  VERIFY_IS_EQUAL(result.dimension(1), 7);
  for (int i = 0; i < 5; ++i) {
    for (int j = 0; j < 7; ++j) {
      float sum = 0.0f;
      int count = 0;
      for (int k = 0; k < 2; ++k) {
        for (int l = 0; l < 3; ++l) {
          sum += tensor(k, l, i, j);
          ++count;
        }
      }
      VERIFY_IS_APPROX(result(i, j), sum / count);
    }
  }

  {
    Tensor<float, 0, DataLayout> mean1 = tensor.mean();
    VERIFY_IS_EQUAL(mean1.rank(), 0);

    array<ptrdiff_t, 4> reduction_axis4;
    reduction_axis4[0] = 0;
    reduction_axis4[1] = 1;
    reduction_axis4[2] = 2;
    reduction_axis4[3] = 3;
    Tensor<float, 0, DataLayout> mean2 = tensor.mean(reduction_axis4);
    VERIFY_IS_EQUAL(mean2.rank(), 0);

    VERIFY_IS_APPROX(mean1(), mean2());
  }

  {
    Tensor<int, 1> ints(10);
    std::iota(ints.data(), ints.data() + ints.dimension(0), 0);

    TensorFixedSize<bool, Sizes<> > all;
    all = ints.all();
    VERIFY(!all());
    all = (ints >= ints.constant(0)).all();
    VERIFY(all());

    TensorFixedSize<bool, Sizes<> > any;
    any = (ints > ints.constant(10)).any();
    VERIFY(!any());
    any = (ints < ints.constant(1)).any();
    VERIFY(any());
  }
}
예제 #25
0
Tensor embedding_bag_backward_cpu(const Tensor &grad_, const Tensor &indices__,
                                  const Tensor &offsets__,
                                  const Tensor &offset2bag__,
                                  const Tensor &bag_size_,
                                  const Tensor& max_indices_, int64_t num_weights,
                                  bool scale_grad_by_freq, int64_t mode) {
  auto grad = grad_.contiguous();
  auto grad_arg = TensorArg(grad, "grad_", 1);
  checkScalarTypes("embedding_bag", grad_arg, {kFloat, kDouble});
  auto indices_arg = TensorArg(indices__, "indices__", 1);
  checkScalarType("embedding_bag", indices_arg, kLong);
  auto offsets_arg = TensorArg(offsets__, "offsets__", 1);
  checkScalarType("embedding_bag", offsets_arg, kLong);
  auto offset2bag_arg = TensorArg(offset2bag__, "offset2bag__", 1);
  checkScalarType("embedding_bag", offset2bag_arg, kLong);
  checkContiguous("embedding_bag", offset2bag_arg);
  Tensor indices_ = indices__.contiguous();
  Tensor offsets_ = offsets__.contiguous();

  Tensor &offset2bag_ = const_cast<Tensor &>(offset2bag__);

  auto ind_sort_ = indices_.sort();
  auto indices = std::get<0>(ind_sort_);
  auto ind_sort = std::get<1>(ind_sort_);
  auto offset2bag = offset2bag_.index_select(0, ind_sort);

  auto indices_data = indices.data<int64_t>();
  auto offsets_data = offsets_.data<int64_t>();
  auto offset2bag_data = offset2bag.data<int64_t>();
  int64_t numel = indices.numel();

  std::vector<int64_t> counts(num_weights);
  for (int i = 0; i < numel; i++) {
    counts[indices_data[i]] = 0;
  }
  for (int i = 0; i < numel; i++) {
    counts[indices_data[i]]++;
  }

  auto index_grad_weight =
      at::zeros({num_weights, grad.size(1)}, grad.type()).contiguous();

  std::vector<int64_t> counts_uniq;
  counts_uniq.reserve(num_weights);
  int64_t o = 0;
  for (int64_t i = 0; i < numel; i += counts[indices_data[i]]) {
    counts_uniq.push_back(counts[indices_data[i]]);
    if (o > 0) {
      counts_uniq[o] += counts_uniq[o - 1];
    }
    o++;
  }

  if (mode == MODE_MEAN || mode == MODE_SUM) {
    #pragma omp parallel for if (numel > 1000)
      for (int64_t i = 0; i < (int64_t)counts_uniq.size(); i++) {
        int64_t start = i == 0 ? 0 : counts_uniq[i - 1];
        int64_t index = indices_data[start];
        for (int64_t j = start; j < counts_uniq[i]; j++) {
          int64_t source = offset2bag_data[j];
          double scale = 1.0;
          if (scale_grad_by_freq) {
            scale /= counts[indices_data[i]];
          }
          if (mode == 1) { // MODE_MEAN
            if (offsets_.size(0) == 1) {
              auto bag_size = indices.size(0);
              scale /= bag_size;
            } else {
              if (source == offsets_.size(0) - 1) {
                scale /= indices.size(0) - offsets_data[offsets_.size(0) - 1];
              } else {
                scale /= offsets_data[source + 1] - offsets_data[source];
              }
            }
          }
          int64_t ddim = grad.size(1);
          if (grad.type().scalarType() == kFloat) {
            auto igwd = index_grad_weight.data<float>();
            auto gd = grad.data<float>();
            THBlas_axpy<float>(ddim, (float)scale, gd + ddim * source, 1,
                        igwd + ddim * index, 1);
          } else if (grad.type().scalarType() == kDouble) {
            auto igwd = index_grad_weight.data<double>();
            auto gd = grad.data<double>();
            THBlas_axpy<double>(ddim, (double)scale, gd + ddim * source, 1,
                         igwd + ddim * index, 1);
          }
        }
      }
  } else if (mode == MODE_MAX) {
    auto nonempty_max_indices = max_indices_.index_select(0, bag_size_.nonzero().view(-1));
    auto nonempty_grad = grad_.index_select(0, bag_size_.nonzero().view(-1));

    for (int64_t dim = 0; dim < grad.size(1); dim++) {
      index_grad_weight.select(1, dim).index_add_(
        0, nonempty_max_indices.select(1, dim), nonempty_grad.select(1, dim));
    }
  }

  return index_grad_weight;
}
예제 #26
0
파일: MDAtoms.cpp 프로젝트: apoma/plumed2
void MDAtomsTyped<T>::getBox(Tensor&box)const{
  if(this->box) for(int i=0;i<3;i++)for(int j=0;j<3;j++) box(i,j)=this->box[3*i+j]*scaleb;
  else box.zero();
}
예제 #27
0
void CH3Shifts::calculate()
{
  double energy=0.;
  Tensor virial;
  virial.zero();
  vector<Vector> deriv(getNumberOfAtoms());
  int N = getNumberOfAtoms();
  Coor<double> coor(N); 
  Coor<double> forces(N);

  forces.clear();
  for(int i=0; i<numResidues; i++) for(unsigned j=0; j<6; j++) sh[i][j]=0.;

  for (int i = 0; i < N; i++) {
     int ipos = 4 * i;
     Vector Pos = getPosition(i);
     coor.coor[ipos]   = len_pl2alm*Pos[0];
     coor.coor[ipos+1] = len_pl2alm*Pos[1];
     coor.coor[ipos+2] = len_pl2alm*Pos[2];
  }

  double fact=1.0;
  if(!ensemble) { 
     energy = meth_list[0]->calc_cs_force(coor, forces);
     bool printout=false;
     if(pperiod>0&&comm.Get_rank()==0) printout = (!(getStep()%pperiod));
     if(printout) {
       string csfile;
       char tmps1[21];
       // add to the name the label of the cv in such a way to have different files
       // when there is more than one defined variable
       sprintf(tmps1, "%li", getStep());
       csfile = string("cs")+tmps1+string(".dat");
       meth_list[0]->write_cs(csfile.c_str());
     }
  } else {
     meth_list[0]->calc_cs(coor);
     bool printout=false;
     if(pperiod>0&&comm.Get_rank()==0) printout = (!(getStep()%pperiod));
     if(printout) {
       string csfile;
       char tmps1[21], tmps2[21];
       // add to the name the label of the cv in such a way to have different files
       // when there is more than one defined variable
       sprintf(tmps1, "%li", getStep());
       sprintf(tmps2, "%i", multi_sim_comm.Get_rank());
       csfile = string("cs")+tmps2+"-"+tmps1+string(".dat");
       meth_list[0]->write_cs(csfile.c_str());
     }
     unsigned size = meth_list[0]->ala_calc_hb.size();
     for(unsigned j=0;j<size;j++) sh[0][j] = meth_list[0]->ala_calc_hb[j];
     size = meth_list[0]->ile_calc_hd.size();
     for(unsigned j=0;j<size;j++) sh[1][j] = meth_list[0]->ile_calc_hd[j];
     size = meth_list[0]->ile_calc_hg2.size();
     for(unsigned j=0;j<size;j++) sh[2][j] = meth_list[0]->ile_calc_hg2[j];
     size = meth_list[0]->leu_calc_hd1.size();
     for(unsigned j=0;j<size;j++) sh[3][j] = meth_list[0]->leu_calc_hd1[j];
     size = meth_list[0]->leu_calc_hd2.size();
     for(unsigned j=0;j<size;j++) sh[4][j] = meth_list[0]->leu_calc_hd2[j];
     size = meth_list[0]->thr_calc_hg2.size();
     for(unsigned j=0;j<size;j++) sh[5][j] = meth_list[0]->thr_calc_hg2[j];
     size = meth_list[0]->val_calc_hg1.size();
     for(unsigned j=0;j<size;j++) sh[6][j] = meth_list[0]->val_calc_hg1[j];
     size = meth_list[0]->val_calc_hg2.size();
     for(unsigned j=0;j<size;j++) sh[7][j] = meth_list[0]->val_calc_hg2[j];
     fact = 1./((double) ens_dim);
     if(comm.Get_rank()==0) { // I am the master of my replica
       // among replicas
       multi_sim_comm.Sum(&sh[0][0], numResidues*8);
       multi_sim_comm.Barrier(); 
       for(unsigned i=0;i<8;i++) for(int j=0;j<numResidues;j++) sh[j][i] *= fact; 
     } else for(unsigned i=0;i<8;i++) for(int j=0;j<numResidues;j++) sh[j][i] = 0.;
     // inside each replica
     comm.Sum(&sh[0][0], numResidues*8);
     // now send the averaged shifts back to almost
     size = meth_list[0]->ala_calc_hb.size();
     for(unsigned j=0;j<size;j++)  meth_list[0]->ala_calc_hb[j] = sh[0][j];
     size = meth_list[0]->ile_calc_hd.size();
     for(unsigned j=0;j<size;j++) meth_list[0]->ile_calc_hd[j] = sh[1][j];
     size = meth_list[0]->ile_calc_hg2.size();
     for(unsigned j=0;j<size;j++) meth_list[0]->ile_calc_hg2[j] = sh[2][j];
     size = meth_list[0]->leu_calc_hd1.size();
     for(unsigned j=0;j<size;j++) meth_list[0]->leu_calc_hd1[j] = sh[3][j];
     size = meth_list[0]->leu_calc_hd2.size();
     for(unsigned j=0;j<size;j++) meth_list[0]->leu_calc_hd2[j] = sh[4][j];
     size = meth_list[0]->thr_calc_hg2.size();
     for(unsigned j=0;j<size;j++) meth_list[0]->thr_calc_hg2[j] = sh[5][j];
     size = meth_list[0]->val_calc_hg1.size();
     for(unsigned j=0;j<size;j++) meth_list[0]->val_calc_hg1[j] = sh[6][j];
     size = meth_list[0]->val_calc_hg2.size();
     for(unsigned j=0;j<size;j++) meth_list[0]->val_calc_hg2[j] = sh[7][j];
     // calculate all the forces now
     energy = meth_list[0]->ens_calc_cs_force(coor, forces);
  }

  for (int i = 0; i < N; i++)
  {
    Vector For;
    int ipos = 4 * i;
    For[0] = forces.coor[ipos];
    For[1] = forces.coor[ipos+1];
    For[2] = forces.coor[ipos+2];
    deriv[i] = fact*for_pl2alm*For;
    virial=virial+(-1.*Tensor(getPosition(i),deriv[i]));
  }

  for(unsigned i=0;i<getNumberOfAtoms();++i) setAtomsDerivatives(i,deriv[i]);
  setValue           (ene_pl2alm*energy);
  setBoxDerivatives  (virial);
}
예제 #28
0
Tensor< S > Tensor< S >::apply(
    const Tensor &x,
    const ::std::function< void(value_type*) > &lambda) {
  return x.clone().apply(lambda);
}
예제 #29
0
파일: tensor.cpp 프로젝트: pranjul23/NASA
bool Tensor::ComputeJointSVD(Tensor& Umat, vector<Tensor*>& extra_tensor_list, vector<int>& mult_modes, int nonzerovals)
{
	int num_sing_vals = (int)pow((double)nonzerovals, (double)mult_modes.size());

	vector<int> free_modes;
	vector<int> mult_dims;
	vector<int> free_dims;
	vector<int> mult_offsets;
	vector<int> free_offsets;

	Tensor& temp_tensor = *(extra_tensor_list.at(0));
	VectorPlus::SetDiff(free_modes, temp_tensor.Modes(), mult_modes);  
	VectorPlus::Subset(mult_dims, temp_tensor.Dims(), mult_modes);
	VectorPlus::CSubset(free_dims, temp_tensor.Dims(), mult_modes);
	ComputeOffsets(mult_offsets, mult_dims);
	ComputeOffsets(free_offsets, free_dims);

	vector<int> usmalldims(free_modes.size(), nonzerovals);
	vector<int> udims;
	vector<int> usmall_offsets;
	ComputeOffsets(usmall_offsets, usmalldims);

	int numMultElements = VectorPlus::Product(mult_dims);
	int numFreeElements = VectorPlus::Product(free_dims);

	assert(numMultElements == numFreeElements);

    Eigen::MatrixXd matricized_tensor(numFreeElements,extra_tensor_list.size() * numMultElements);

	//cout << "copy start 1\n";
	for (int z = 0; z < extra_tensor_list.size(); ++z)
	{
		int z_offset = z * numMultElements;
		FastIndexer i_indexer(free_dims);
		for (int i = 0; i < numFreeElements; ++i)
		{
			vector<int>& free_indices = i_indexer.GetNext();
		//	ComputeIndexArray(free_indices, free_offsets, i);
			FastIndexer j_indexer(mult_dims);
			for (int j = 0; j < numMultElements; ++j)
			{
				vector<int>& mult_indices = j_indexer.GetNext();
		//		ComputeIndexArray(mult_indices, mult_offsets, j);
				vector<int> total_indices;
				VectorPlus::Concat(total_indices, free_indices, mult_indices);
				matricized_tensor(i,z_offset + j) = extra_tensor_list.at(z)->At(total_indices);
			}
		}
	}
//	cout << "copy end 1\n";	
//	MatrixXd matricized_inverse = matricized_tensor.inverse();
//	cout << matricized_inverse;
//	cout << "\n";
	//compute pseudoinverse
	//cout << "svd start 1\n";	
		Eigen::JacobiSVD<Eigen::MatrixXd> svd(matricized_tensor, Eigen::ComputeFullU);
		Eigen::MatrixXd U = svd.matrixU();
	//	cout << "svd end 1\n";	
		Eigen::MatrixXd thinU = U.leftCols(num_sing_vals);
		
	
		VectorPlus::Concat(udims, free_dims, usmalldims);
		Umat.Initialize(udims);

		

		vector<int> semi_dims;
		semi_dims.push_back(thinU.rows());
		semi_dims.push_back(thinU.cols());

	//	cout << "copy start 2 \n";	
		FastIndexer i_indexer(free_dims);
		for (int i = 0; i < thinU.rows(); ++i)
		{
			vector<int>& left_indices = i_indexer.GetNext();
		//	ComputeIndexArray(left_indices, free_offsets, i);

			FastIndexer j_indexer(usmalldims);
			for (int j = 0; j < thinU.cols(); ++j)
			{
				vector<int>& right_indices = j_indexer.GetNext();
			//	ComputeIndexArray(right_indices, usmall_offsets, j);
				vector<int> indices;
				VectorPlus::Concat(indices, left_indices, right_indices);
			//	Umat.Set(indices, rand_matrix.At(i,j));
				Umat.Set(indices, thinU(i,j));

				if (thinU.rows() == thinU.cols())
				{
					if (VectorPlus::Equals(left_indices, right_indices))
					{	
						Umat.Set(indices, 1);
					}
					else
					{
						Umat.Set(indices, 0);
					}
				}
			}
		}
	//	cout << "copy end 2 \n";	
	return true;
}
예제 #30
0
void HPCoarsenTest::select_refinement (System & system)
{
  START_LOG("select_refinement()", "HPCoarsenTest");

  // The current mesh
  MeshBase & mesh = system.get_mesh();

  // The dimensionality of the mesh
  const unsigned int dim = mesh.mesh_dimension();

  // The number of variables in the system
  const unsigned int n_vars = system.n_vars();

  // The DofMap for this system
  const DofMap & dof_map = system.get_dof_map();

  // The system number (for doing bad hackery)
  const unsigned int sys_num = system.number();

  // Check for a valid component_scale
  if (!component_scale.empty())
    {
      if (component_scale.size() != n_vars)
        libmesh_error_msg("ERROR: component_scale is the wrong size:\n" \
                          << " component_scale.size()=" \
                          << component_scale.size()     \
                          << "\n n_vars=" \
                          << n_vars);
    }
  else
    {
      // No specified scaling.  Scale all variables by one.
      component_scale.resize (n_vars, 1.0);
    }

  // Resize the error_per_cell vectors to handle
  // the number of elements, initialize them to 0.
  std::vector<ErrorVectorReal> h_error_per_cell(mesh.max_elem_id(), 0.);
  std::vector<ErrorVectorReal> p_error_per_cell(mesh.max_elem_id(), 0.);

  // Loop over all the variables in the system
  for (unsigned int var=0; var<n_vars; var++)
    {
      // Possibly skip this variable
      if (!component_scale.empty())
        if (component_scale[var] == 0.0) continue;

      // The type of finite element to use for this variable
      const FEType & fe_type = dof_map.variable_type (var);

      // Finite element objects for a fine (and probably a coarse)
      // element will be needed
      fe = FEBase::build (dim, fe_type);
      fe_coarse = FEBase::build (dim, fe_type);

      // Any cached coarse element results have expired
      coarse = libmesh_nullptr;
      unsigned int cached_coarse_p_level = 0;

      const FEContinuity cont = fe->get_continuity();
      libmesh_assert (cont == DISCONTINUOUS || cont == C_ZERO ||
                      cont == C_ONE);

      // Build an appropriate quadrature rule
      qrule = fe_type.default_quadrature_rule(dim);

      // Tell the refined finite element about the quadrature
      // rule.  The coarse finite element need not know about it
      fe->attach_quadrature_rule (qrule.get());

      // We will always do the integration
      // on the fine elements.  Get their Jacobian values, etc..
      JxW = &(fe->get_JxW());
      xyz_values = &(fe->get_xyz());

      // The shape functions
      phi = &(fe->get_phi());
      phi_coarse = &(fe_coarse->get_phi());

      // The shape function derivatives
      if (cont == C_ZERO || cont == C_ONE)
        {
          dphi = &(fe->get_dphi());
          dphi_coarse = &(fe_coarse->get_dphi());
        }

#ifdef LIBMESH_ENABLE_SECOND_DERIVATIVES
      // The shape function second derivatives
      if (cont == C_ONE)
        {
          d2phi = &(fe->get_d2phi());
          d2phi_coarse = &(fe_coarse->get_d2phi());
        }
#endif // defined (LIBMESH_ENABLE_SECOND_DERIVATIVES)

      // Iterate over all the active elements in the mesh
      // that live on this processor.

      MeshBase::const_element_iterator       elem_it  =
        mesh.active_local_elements_begin();
      const MeshBase::const_element_iterator elem_end =
        mesh.active_local_elements_end();

      for (; elem_it != elem_end; ++elem_it)
        {
          const Elem * elem = *elem_it;

          // We're only checking elements that are already flagged for h
          // refinement
          if (elem->refinement_flag() != Elem::REFINE)
            continue;

          const dof_id_type e_id = elem->id();

          // Find the projection onto the parent element,
          // if necessary
          if (elem->parent() &&
              (coarse != elem->parent() ||
               cached_coarse_p_level != elem->p_level()))
            {
              Uc.resize(0);

              coarse = elem->parent();
              cached_coarse_p_level = elem->p_level();

              unsigned int old_parent_level = coarse->p_level();
              (const_cast<Elem *>(coarse))->hack_p_level(elem->p_level());

              this->add_projection(system, coarse, var);

              (const_cast<Elem *>(coarse))->hack_p_level(old_parent_level);

              // Solve the h-coarsening projection problem
              Ke.cholesky_solve(Fe, Uc);
            }

          fe->reinit(elem);

          // Get the DOF indices for the fine element
          dof_map.dof_indices (elem, dof_indices, var);

          // The number of quadrature points
          const unsigned int n_qp = qrule->n_points();

          // The number of DOFS on the fine element
          const unsigned int n_dofs =
            cast_int<unsigned int>(dof_indices.size());

          // The number of nodes on the fine element
          const unsigned int n_nodes = elem->n_nodes();

          // The average element value (used as an ugly hack
          // when we have nothing p-coarsened to compare to)
          // Real average_val = 0.;
          Number average_val = 0.;

          // Calculate this variable's contribution to the p
          // refinement error

          if (elem->p_level() == 0)
            {
              unsigned int n_vertices = 0;
              for (unsigned int n = 0; n != n_nodes; ++n)
                if (elem->is_vertex(n))
                  {
                    n_vertices++;
                    const Node * const node = elem->get_node(n);
                    average_val += system.current_solution
                      (node->dof_number(sys_num,var,0));
                  }
              average_val /= n_vertices;
            }
          else
            {
              unsigned int old_elem_level = elem->p_level();
              (const_cast<Elem *>(elem))->hack_p_level(old_elem_level - 1);

              fe_coarse->reinit(elem, &(qrule->get_points()));

              const unsigned int n_coarse_dofs =
                cast_int<unsigned int>(phi_coarse->size());

              (const_cast<Elem *>(elem))->hack_p_level(old_elem_level);

              Ke.resize(n_coarse_dofs, n_coarse_dofs);
              Ke.zero();
              Fe.resize(n_coarse_dofs);
              Fe.zero();

              // Loop over the quadrature points
              for (unsigned int qp=0; qp<qrule->n_points(); qp++)
                {
                  // The solution value at the quadrature point
                  Number val = libMesh::zero;
                  Gradient grad;
                  Tensor hess;

                  for (unsigned int i=0; i != n_dofs; i++)
                    {
                      dof_id_type dof_num = dof_indices[i];
                      val += (*phi)[i][qp] *
                        system.current_solution(dof_num);
                      if (cont == C_ZERO || cont == C_ONE)
                        grad.add_scaled((*dphi)[i][qp], system.current_solution(dof_num));
                      // grad += (*dphi)[i][qp] *
                      //  system.current_solution(dof_num);
                      if (cont == C_ONE)
                        hess.add_scaled((*d2phi)[i][qp], system.current_solution(dof_num));
                      // hess += (*d2phi)[i][qp] *
                      //  system.current_solution(dof_num);
                    }

                  // The projection matrix and vector
                  for (unsigned int i=0; i != Fe.size(); ++i)
                    {
                      Fe(i) += (*JxW)[qp] *
                        (*phi_coarse)[i][qp]*val;
                      if (cont == C_ZERO || cont == C_ONE)
                        Fe(i) += (*JxW)[qp] *
                          grad * (*dphi_coarse)[i][qp];
                      if (cont == C_ONE)
                        Fe(i) += (*JxW)[qp] *
                          hess.contract((*d2phi_coarse)[i][qp]);

                      for (unsigned int j=0; j != Fe.size(); ++j)
                        {
                          Ke(i,j) += (*JxW)[qp] *
                            (*phi_coarse)[i][qp]*(*phi_coarse)[j][qp];
                          if (cont == C_ZERO || cont == C_ONE)
                            Ke(i,j) += (*JxW)[qp] *
                              (*dphi_coarse)[i][qp]*(*dphi_coarse)[j][qp];
                          if (cont == C_ONE)
                            Ke(i,j) += (*JxW)[qp] *
                              ((*d2phi_coarse)[i][qp].contract((*d2phi_coarse)[j][qp]));
                        }
                    }
                }

              // Solve the p-coarsening projection problem
              Ke.cholesky_solve(Fe, Up);
            }

          // loop over the integration points on the fine element
          for (unsigned int qp=0; qp<n_qp; qp++)
            {
              Number value_error = 0.;
              Gradient grad_error;
              Tensor hessian_error;
              for (unsigned int i=0; i<n_dofs; i++)
                {
                  const dof_id_type dof_num = dof_indices[i];
                  value_error += (*phi)[i][qp] *
                    system.current_solution(dof_num);
                  if (cont == C_ZERO || cont == C_ONE)
                    grad_error.add_scaled((*dphi)[i][qp], system.current_solution(dof_num));
                  // grad_error += (*dphi)[i][qp] *
                  //  system.current_solution(dof_num);
                  if (cont == C_ONE)
                    hessian_error.add_scaled((*d2phi)[i][qp], system.current_solution(dof_num));
                  // hessian_error += (*d2phi)[i][qp] *
                  //    system.current_solution(dof_num);
                }
              if (elem->p_level() == 0)
                {
                  value_error -= average_val;
                }
              else
                {
                  for (unsigned int i=0; i<Up.size(); i++)
                    {
                      value_error -= (*phi_coarse)[i][qp] * Up(i);
                      if (cont == C_ZERO || cont == C_ONE)
                        grad_error.subtract_scaled((*dphi_coarse)[i][qp], Up(i));
                      // grad_error -= (*dphi_coarse)[i][qp] * Up(i);
                      if (cont == C_ONE)
                        hessian_error.subtract_scaled((*d2phi_coarse)[i][qp], Up(i));
                      // hessian_error -= (*d2phi_coarse)[i][qp] * Up(i);
                    }
                }

              p_error_per_cell[e_id] += static_cast<ErrorVectorReal>
                (component_scale[var] *
                 (*JxW)[qp] * TensorTools::norm_sq(value_error));
              if (cont == C_ZERO || cont == C_ONE)
                p_error_per_cell[e_id] += static_cast<ErrorVectorReal>
                  (component_scale[var] *
                   (*JxW)[qp] * grad_error.norm_sq());
              if (cont == C_ONE)
                p_error_per_cell[e_id] += static_cast<ErrorVectorReal>
                  (component_scale[var] *
                   (*JxW)[qp] * hessian_error.norm_sq());
            }

          // Calculate this variable's contribution to the h
          // refinement error

          if (!elem->parent())
            {
              // For now, we'll always start with an h refinement
              h_error_per_cell[e_id] =
                std::numeric_limits<ErrorVectorReal>::max() / 2;
            }
          else
            {
              FEInterface::inverse_map (dim, fe_type, coarse,
                                        *xyz_values, coarse_qpoints);

              unsigned int old_parent_level = coarse->p_level();
              (const_cast<Elem *>(coarse))->hack_p_level(elem->p_level());

              fe_coarse->reinit(coarse, &coarse_qpoints);

              (const_cast<Elem *>(coarse))->hack_p_level(old_parent_level);

              // The number of DOFS on the coarse element
              unsigned int n_coarse_dofs =
                cast_int<unsigned int>(phi_coarse->size());

              // Loop over the quadrature points
              for (unsigned int qp=0; qp<n_qp; qp++)
                {
                  // The solution difference at the quadrature point
                  Number value_error = libMesh::zero;
                  Gradient grad_error;
                  Tensor hessian_error;

                  for (unsigned int i=0; i != n_dofs; ++i)
                    {
                      const dof_id_type dof_num = dof_indices[i];
                      value_error += (*phi)[i][qp] *
                        system.current_solution(dof_num);
                      if (cont == C_ZERO || cont == C_ONE)
                        grad_error.add_scaled((*dphi)[i][qp], system.current_solution(dof_num));
                      // grad_error += (*dphi)[i][qp] *
                      //  system.current_solution(dof_num);
                      if (cont == C_ONE)
                        hessian_error.add_scaled((*d2phi)[i][qp], system.current_solution(dof_num));
                      // hessian_error += (*d2phi)[i][qp] *
                      //  system.current_solution(dof_num);
                    }

                  for (unsigned int i=0; i != n_coarse_dofs; ++i)
                    {
                      value_error -= (*phi_coarse)[i][qp] * Uc(i);
                      if (cont == C_ZERO || cont == C_ONE)
                        // grad_error -= (*dphi_coarse)[i][qp] * Uc(i);
                        grad_error.subtract_scaled((*dphi_coarse)[i][qp], Uc(i));
                      if (cont == C_ONE)
                        hessian_error.subtract_scaled((*d2phi_coarse)[i][qp], Uc(i));
                      // hessian_error -= (*d2phi_coarse)[i][qp] * Uc(i);
                    }

                  h_error_per_cell[e_id] += static_cast<ErrorVectorReal>
                    (component_scale[var] *
                     (*JxW)[qp] * TensorTools::norm_sq(value_error));
                  if (cont == C_ZERO || cont == C_ONE)
                    h_error_per_cell[e_id] += static_cast<ErrorVectorReal>
                      (component_scale[var] *
                       (*JxW)[qp] * grad_error.norm_sq());
                  if (cont == C_ONE)
                    h_error_per_cell[e_id] += static_cast<ErrorVectorReal>
                      (component_scale[var] *
                       (*JxW)[qp] * hessian_error.norm_sq());
                }

            }
        }
    }

  // Now that we've got our approximations for p_error and h_error, let's see
  // if we want to switch any h refinement flags to p refinement

  // Iterate over all the active elements in the mesh
  // that live on this processor.

  MeshBase::element_iterator       elem_it  =
    mesh.active_local_elements_begin();
  const MeshBase::element_iterator elem_end =
    mesh.active_local_elements_end();

  for (; elem_it != elem_end; ++elem_it)
    {
      Elem * elem = *elem_it;

      // We're only checking elements that are already flagged for h
      // refinement
      if (elem->refinement_flag() != Elem::REFINE)
        continue;

      const dof_id_type e_id = elem->id();

      unsigned int dofs_per_elem = 0, dofs_per_p_elem = 0;

      // Loop over all the variables in the system
      for (unsigned int var=0; var<n_vars; var++)
        {
          // The type of finite element to use for this variable
          const FEType & fe_type = dof_map.variable_type (var);

          // FIXME: we're overestimating the number of DOFs added by h
          // refinement
          FEType elem_fe_type = fe_type;
          elem_fe_type.order =
            static_cast<Order>(fe_type.order + elem->p_level());
          dofs_per_elem +=
            FEInterface::n_dofs(dim, elem_fe_type, elem->type());

          elem_fe_type.order =
            static_cast<Order>(fe_type.order + elem->p_level() + 1);
          dofs_per_p_elem +=
            FEInterface::n_dofs(dim, elem_fe_type, elem->type());
        }

      const unsigned int new_h_dofs = dofs_per_elem *
        (elem->n_children() - 1);

      const unsigned int new_p_dofs = dofs_per_p_elem -
        dofs_per_elem;

      /*
        libMesh::err << "Cell " << e_id << ": h = " << elem->hmax()
        << ", p = " << elem->p_level() + 1 << "," << std::endl
        << "     h_error = " << h_error_per_cell[e_id]
        << ", p_error = " << p_error_per_cell[e_id] << std::endl
        << "     new_h_dofs = " << new_h_dofs
        << ", new_p_dofs = " << new_p_dofs << std::endl;
      */
      const Real p_value =
        std::sqrt(p_error_per_cell[e_id]) * p_weight / new_p_dofs;
      const Real h_value =
        std::sqrt(h_error_per_cell[e_id]) /
        static_cast<Real>(new_h_dofs);
      if (p_value > h_value)
        {
          elem->set_p_refinement_flag(Elem::REFINE);
          elem->set_refinement_flag(Elem::DO_NOTHING);
        }
    }

  STOP_LOG("select_refinement()", "HPCoarsenTest");
}