void HPCoarsenTest::add_projection(const System & system, const Elem * elem, unsigned int var) { // If we have children, we need to add their projections instead if (!elem->active()) { libmesh_assert(!elem->subactive()); for (unsigned int c = 0; c != elem->n_children(); ++c) this->add_projection(system, elem->child(c), var); return; } // The DofMap for this system const DofMap & dof_map = system.get_dof_map(); // The type of finite element to use for this variable const FEType & fe_type = dof_map.variable_type (var); const FEContinuity cont = fe->get_continuity(); fe->reinit(elem); dof_map.dof_indices(elem, dof_indices, var); const unsigned int n_dofs = cast_int<unsigned int>(dof_indices.size()); FEInterface::inverse_map (system.get_mesh().mesh_dimension(), fe_type, coarse, *xyz_values, coarse_qpoints); fe_coarse->reinit(coarse, &coarse_qpoints); const unsigned int n_coarse_dofs = cast_int<unsigned int>(phi_coarse->size()); if (Uc.size() == 0) { Ke.resize(n_coarse_dofs, n_coarse_dofs); Ke.zero(); Fe.resize(n_coarse_dofs); Fe.zero(); Uc.resize(n_coarse_dofs); Uc.zero(); } libmesh_assert_equal_to (Uc.size(), phi_coarse->size()); // Loop over the quadrature points for (unsigned int qp=0; qp<qrule->n_points(); qp++) { // The solution value at the quadrature point Number val = libMesh::zero; Gradient grad; Tensor hess; for (unsigned int i=0; i != n_dofs; i++) { dof_id_type dof_num = dof_indices[i]; val += (*phi)[i][qp] * system.current_solution(dof_num); if (cont == C_ZERO || cont == C_ONE) grad.add_scaled((*dphi)[i][qp],system.current_solution(dof_num)); // grad += (*dphi)[i][qp] * // system.current_solution(dof_num); if (cont == C_ONE) hess.add_scaled((*d2phi)[i][qp], system.current_solution(dof_num)); // hess += (*d2phi)[i][qp] * // system.current_solution(dof_num); } // The projection matrix and vector for (unsigned int i=0; i != Fe.size(); ++i) { Fe(i) += (*JxW)[qp] * (*phi_coarse)[i][qp]*val; if (cont == C_ZERO || cont == C_ONE) Fe(i) += (*JxW)[qp] * (grad*(*dphi_coarse)[i][qp]); if (cont == C_ONE) Fe(i) += (*JxW)[qp] * hess.contract((*d2phi_coarse)[i][qp]); // Fe(i) += (*JxW)[qp] * // (*d2phi_coarse)[i][qp].contract(hess); for (unsigned int j=0; j != Fe.size(); ++j) { Ke(i,j) += (*JxW)[qp] * (*phi_coarse)[i][qp]*(*phi_coarse)[j][qp]; if (cont == C_ZERO || cont == C_ONE) Ke(i,j) += (*JxW)[qp] * (*dphi_coarse)[i][qp]*(*dphi_coarse)[j][qp]; if (cont == C_ONE) Ke(i,j) += (*JxW)[qp] * ((*d2phi_coarse)[i][qp].contract((*d2phi_coarse)[j][qp])); } } } }
void MeshFunction::hessian (const Point& p, const Real, std::vector<Tensor>& output, const std::set<subdomain_id_type>* subdomain_ids) { libmesh_assert (this->initialized()); const Elem* element = this->find_element(p,subdomain_ids); if (!element) { output.resize(0); } else { // resize the output vector to the number of output values // that the user told us output.resize (this->_system_vars.size()); { const unsigned int dim = element->dim(); /* * Get local coordinates to feed these into compute_data(). * Note that the fe_type can safely be used from the 0-variable, * since the inverse mapping is the same for all FEFamilies */ const Point mapped_point (FEInterface::inverse_map (dim, this->_dof_map.variable_type(0), element, p)); std::vector<Point> point_list (1, mapped_point); // loop over all vars for (unsigned int index=0; index < this->_system_vars.size(); index++) { /* * the data for this variable */ const unsigned int var = _system_vars[index]; const FEType& fe_type = this->_dof_map.variable_type(var); UniquePtr<FEBase> point_fe (FEBase::build(dim, fe_type)); const std::vector<std::vector<RealTensor> >& d2phi = point_fe->get_d2phi(); point_fe->reinit(element, &point_list); // where the solution values for the var-th variable are stored std::vector<dof_id_type> dof_indices; this->_dof_map.dof_indices (element, dof_indices, var); // interpolate the solution Tensor hess; for (unsigned int i=0; i<dof_indices.size(); i++) hess.add_scaled(d2phi[i][0], this->_vector(dof_indices[i])); output[index] = hess; } } } // all done return; }
void Tensor::CreateLinearSystem(vector<double>& B_vec, Matrix& A_matrix, Tensor& X, Tensor& A, Tensor& B, vector<int>& mult_modesX, vector<int>& mult_modesA) { // fake multiply x and A together to create B, creating the linear system in the process assert(mult_modesX.size() == mult_modesA.size()); if (X.Order() == mult_modesX.size() && A.Order() == mult_modesA.size()) { assert(0); } int numMultElements = 1; vector<int> mult_dims(mult_modesX.size(), 0); for (int i = 0; i < mult_modesX.size(); ++i) { assert(X.Dim(mult_modesX[i]) == A.Dim(mult_modesA[i])); mult_dims[i] = X.Dim(mult_modesX[i]); numMultElements = numMultElements * mult_dims[i]; } vector<int> mult_offsets; ComputeOffsets(mult_offsets, mult_dims); int result_order = X.Order() + A.Order() - mult_modesX.size() - mult_modesA.size(); if (result_order == 0) assert(0); vector<int> result_dims; vector<int> free_modesX; vector<int> free_modesA; // find free indices from X for (int i = 0; i < X.Order(); ++i) { if (!VectorPlus::Contains(mult_modesX, i)) { free_modesX.push_back(i); } } // find free indices from A for (int i = 0; i < A.Order(); ++i) { if (!VectorPlus::Contains(mult_modesA, i)) { free_modesA.push_back(i); } } vector<int> a_mat_dims = VectorPlus::CreatePair(B.NumElements(), X.NumElements()); A_matrix.Initialize(a_mat_dims); B_vec.reserve(B.NumElements()); // fill in elements from result tensor FastIndexer B_indexer(B.Dims()); for (int n = 0; n < B.NumElements(); ++n) { B_vec.push_back(B.At(n)); vector<int>& indices = B_indexer.GetNext(); vector<int> free_indicesX; vector<int> free_indicesA; // B.ComputeIndexArray(indices, n); for (int i = 0; i < B.Order(); ++i) { if (!VectorPlus::Contains(mult_modesX, i)) free_indicesX.push_back(indices[i]); else free_indicesA.push_back(indices[i]); } // sum over elementwise products of mult-mode elements double temp_sum = 0; FastIndexer mult_indexer(mult_dims); for (int k = 0; k < numMultElements; ++k) { vector<int>& mult_indices = mult_indexer.GetNext(); // ComputeIndexArray(mult_indices, mult_offsets, k); vector<int> indicesX; vector<int> indicesA; MergeIndices(indicesX, mult_modesX, free_modesX, mult_indices, free_indicesX); MergeIndices(indicesA, mult_modesA, free_modesA, mult_indices, free_indicesA); A_matrix.Set(n, X.ComputeIndex(indicesX), A.At(indicesA)); } } }
void TrigonometricPathVessel::finish( const std::vector<double>& buffer ) { // Store the data calculated during mpi loop StoreDataVessel::finish( buffer ); // Get current value of all arguments for(unsigned i=0; i<cargs.size(); ++i) cargs[i]=mymap->getArgument(i); // Determine closest and second closest point to current position double lambda=mymap->getLambda(); std::vector<double> dist( getNumberOfComponents() ), dist2( getNumberOfComponents() );; retrieveSequentialValue( 0, false, dist ); retrieveSequentialValue( 1, false, dist2 ); iclose1=getStoreIndex(0); iclose2=getStoreIndex(1); double mindist1=dist[0], mindist2=dist2[0]; if( lambda>0.0 ) { mindist1=-std::log( dist[0] ) / lambda; mindist2=-std::log( dist2[0] ) / lambda; } if( mindist2<mindist1 ) { double tmp=mindist1; mindist1=mindist2; mindist2=tmp; iclose1=getStoreIndex(1); iclose2=getStoreIndex(0); } for(unsigned i=2; i<getNumberOfStoredValues(); ++i) { retrieveSequentialValue( i, false, dist ); double ndist=dist[0]; if( lambda>0.0 ) ndist=-std::log( dist[0] ) / lambda; if( ndist<mindist1 ) { mindist2=mindist1; iclose2=iclose1; mindist1=ndist; iclose1=getStoreIndex(i); } else if( ndist<mindist2 ) { mindist2=ndist; iclose2=getStoreIndex(i); } } // And find third closest point int isign = iclose1 - iclose2; if( isign>1 ) isign=1; else if( isign<-1 ) isign=-1; int iclose3 = iclose1 + isign; double v2v2; // We now have to compute vectors connecting the three closest points to the // new point double v1v1 = (mymap->getReferenceConfiguration( iclose1 ))->calculate( mymap->getPositions(), mymap->getPbc(), mymap->getArguments(), mypack1, true ); double v3v3 = (mymap->getReferenceConfiguration( iclose2 ))->calculate( mymap->getPositions(), mymap->getPbc(), mymap->getArguments(), mypack3, true ); if( iclose3<0 || iclose3>=mymap->getFullNumberOfTasks() ) { ReferenceConfiguration* conf2=mymap->getReferenceConfiguration( iclose1 ); v2v2=(mymap->getReferenceConfiguration( iclose2 ))->calc( conf2->getReferencePositions(), mymap->getPbc(), mymap->getArguments(), conf2->getReferenceArguments(), mypack2, true ); (mymap->getReferenceConfiguration( iclose2 ))->extractDisplacementVector( conf2->getReferencePositions(), mymap->getArguments(), conf2->getReferenceArguments(), false, projdir ); } else { ReferenceConfiguration* conf2=mymap->getReferenceConfiguration( iclose3 ); v2v2=(mymap->getReferenceConfiguration( iclose1 ))->calc( conf2->getReferencePositions(), mymap->getPbc(), mymap->getArguments(), conf2->getReferenceArguments(), mypack2, true ); (mymap->getReferenceConfiguration( iclose1 ))->extractDisplacementVector( conf2->getReferencePositions(), mymap->getArguments(), conf2->getReferenceArguments(), false, projdir ); } // Stash derivatives of v1v1 for(unsigned i=0; i<mymap->getNumberOfArguments(); ++i) mypack1_stashd_args[i]=mypack1.getArgumentDerivative(i); if( mymap->getNumberOfAtoms()>0 ) { ReferenceAtoms* at = dynamic_cast<ReferenceAtoms*>( mymap->getReferenceConfiguration( iclose1 ) ); const std::vector<double> & displace( at->getDisplace() ); for(unsigned i=0; i<mymap->getNumberOfAtoms(); ++i) { mypack1_stashd_atoms[i]=mypack1.getAtomDerivative(i); mypack1.getAtomsDisplacementVector()[i] /= displace[i]; } } // Calculate the dot product of v1 with v2 double v1v2 = (mymap->getReferenceConfiguration(iclose1))->projectDisplacementOnVector( projdir, mymap->getArguments(), cargs, mypack1 ); // This computes s value double spacing = mymap->getPropertyValue( iclose1, (mymap->property.begin())->first ) - mymap->getPropertyValue( iclose2, (mymap->property.begin())->first ); double root = sqrt( v1v2*v1v2 - v2v2 * ( v1v1 - v3v3) ); dx = 0.5 * ( (root + v1v2) / v2v2 - 1.); double path_s = mymap->getPropertyValue(iclose1, (mymap->property.begin())->first ) + spacing * dx; sp->set( path_s ); double fact = 0.25*spacing / v2v2; // Derivative of s wrt arguments for(unsigned i=0; i<mymap->getNumberOfArguments(); ++i) { sp->setDerivative( i, fact*( mypack2.getArgumentDerivative(i) + (v2v2 * (-mypack1_stashd_args[i] + mypack3.getArgumentDerivative(i)) + v1v2*mypack2.getArgumentDerivative(i) )/root ) ); } // Derivative of s wrt atoms unsigned narg=mymap->getNumberOfArguments(); Tensor vir; vir.zero(); fact = 0.5*spacing / v2v2; if( mymap->getNumberOfAtoms()>0 ) { for(unsigned i=0; i<mymap->getNumberOfAtoms(); ++i) { Vector ader = fact*(( v1v2*mypack1.getAtomDerivative(i) + 0.5*v2v2*(-mypack1_stashd_atoms[i] + mypack3.getAtomDerivative(i) ) )/root + mypack1.getAtomDerivative(i) ); for(unsigned k=0; k<3; ++k) sp->setDerivative( narg+3*i+k, ader[k] ); vir-=Tensor( mymap->getPosition(i), ader ); } // Set the virial unsigned nbase=narg+3*mymap->getNumberOfAtoms(); for(unsigned i=0; i<3; ++i) for(unsigned j=0; j<3; ++j) sp->setDerivative( nbase+3*i+j, vir(i,j) ); } // Now compute z value ReferenceConfiguration* conf2=mymap->getReferenceConfiguration( iclose1 ); double v4v4=(mymap->getReferenceConfiguration( iclose2 ))->calc( conf2->getReferencePositions(), mymap->getPbc(), mymap->getArguments(), conf2->getReferenceArguments(), mypack2, true ); // Extract vector connecting frames (mymap->getReferenceConfiguration( iclose2 ))->extractDisplacementVector( conf2->getReferencePositions(), mymap->getArguments(), conf2->getReferenceArguments(), false, projdir ); // Calculate projection of vector on line connnecting frames double proj = (mymap->getReferenceConfiguration(iclose1))->projectDisplacementOnVector( projdir, mymap->getArguments(), cargs, mypack1 ); double path_z = v1v1 + dx*dx*v4v4 - 2*dx*proj; // Derivatives for z path path_z = sqrt(path_z); zp->set( path_z ); vir.zero(); for(unsigned i=0; i<mymap->getNumberOfArguments(); ++i) zp->setDerivative( i, (mypack1_stashd_args[i] - 2*dx*mypack1.getArgumentDerivative(i))/(2.0*path_z) ); // Derivative wrt atoms if( mymap->getNumberOfAtoms()>0 ) { for(unsigned i=0; i<mymap->getNumberOfAtoms(); ++i) { Vector dxder; for(unsigned k=0; k<3; ++k) dxder[k] = ( 2*v4v4*dx - 2*proj )*spacing*sp->getDerivative( narg + 3*i+k ); Vector ader = ( mypack1_stashd_atoms[i] - 2.*dx*mypack1.getAtomDerivative(i) + dxder )/ (2.0*path_z); for(unsigned k=0; k<3; ++k) zp->setDerivative( narg+3*i+k, ader[k] ); vir-=Tensor( mymap->getPosition(i), ader ); } // Set the virial unsigned nbase=narg+3*mymap->getNumberOfAtoms(); for(unsigned i=0; i<3; ++i) for(unsigned j=0; j<3; ++j) zp->setDerivative( nbase+3*i+j, vir(i,j) ); } }
std::tuple<Tensor, Tensor, Tensor, Tensor> embedding_bag_cpu(const Tensor &weight, const Tensor &indices__, const Tensor &offsets__, const bool scale_grad_by_freq, const int64_t mode, bool sparse) { auto indices_arg = TensorArg(indices__, "indices__", 1); checkScalarType("embedding_bag", indices_arg, kLong); auto offsets_arg = TensorArg(offsets__, "offsets__", 1); checkScalarType("embedding_bag", offsets_arg, kLong); Tensor indices = indices__.contiguous(); Tensor offsets = offsets__.contiguous(); auto weight_arg = TensorArg(weight, "weight", 1); checkScalarTypes("embedding_bag", weight_arg, {kFloat, kDouble}); auto bag_size = at::zeros(offsets.sizes(), indices.type()); make_bag_size(offsets, indices, mode, bag_size); // If the last entries are empty, that the last offsets are irrelevant as they // won't change anything in the assignment of ID -> bag, but index_add would // throw out of bounds error. So to keep it simple we just add one more // entry to the end then get rid of it after make_offset2bag. auto offset2bag = at::zeros( {indices.sizes()[0] + 1}, indices__.type()); // offset2bag = [0 0 0 0 0] make_offset2bag(offsets, indices, offset2bag); offset2bag.resize_({indices.sizes()[0]}); auto output = at::zeros({offsets.size(0), weight.size(1)}, weight.type()); if (mode == MODE_MEAN || mode == MODE_SUM) { if (weight.type().scalarType() == kFloat) { index_select_add<float>(indices, offset2bag, weight, output); } else if (weight.type().scalarType() == kDouble) { index_select_add<double>(indices, offset2bag, weight, output); } auto ret = apply_bag_size(offsets, indices, mode, output, bag_size); return std::tuple<Tensor, Tensor, Tensor, Tensor>(ret, offset2bag, bag_size, bag_size); } else { // MODE_MAX return AT_DISPATCH_FLOATING_TYPES_AND_HALF( weight.type(), "embedding_bag_cpu_max", [&]() { return embedding_bag_cpu_max<scalar_t>(weight, indices, offset2bag, output, bag_size, offsets); } ); } }
inline void MapPlan(Tensor<gpu,dim> _dst, const expr::Plan<E> &plan){ cuda::MapPlan<Saver>( _dst.FlatTo2D(), plan ); }
void Tensor<Dtype>::ShareMem(const Tensor& other) { ASSERT(count_ == other.count(), ""); mem_ = other.mem(); }
void Tensor::copyTo(Tensor& dst) { void* p = map(); dst.reshape((const char*)p, shape_, format_); unMap(); }
void MeshFunction::hessian (const Point& p, const Real, std::vector<Tensor>& output) { libmesh_assert (this->initialized()); /* Ensure that in the case of a master mesh function, the out-of-mesh mode is enabled either for both or for none. This is important because the out-of-mesh mode is also communicated to the point locator. Since this is time consuming, enable it only in debug mode. */ #ifdef DEBUG if (this->_master != NULL) { const MeshFunction* master = cast_ptr<const MeshFunction*>(this->_master); if(_out_of_mesh_mode!=master->_out_of_mesh_mode) libmesh_error_msg("ERROR: If you use out-of-mesh-mode in connection with master mesh " \ << "functions, you must enable out-of-mesh mode for both the master and the slave mesh function."); } #endif // locate the point in the other mesh const Elem* element = this->_point_locator->operator()(p); // If we have an element, but it's not a local element, then we // either need to have a serialized vector or we need to find a // local element sharing the same point. if (element && (element->processor_id() != this->processor_id()) && _vector.type() != SERIAL) { // look for a local element containing the point std::set<const Elem*> point_neighbors; element->find_point_neighbors(p, point_neighbors); element = NULL; std::set<const Elem*>::const_iterator it = point_neighbors.begin(); const std::set<const Elem*>::const_iterator end = point_neighbors.end(); for (; it != end; ++it) { const Elem* elem = *it; if (elem->processor_id() == this->processor_id()) { element = elem; break; } } } if (!element) { output.resize(0); } else { // resize the output vector to the number of output values // that the user told us output.resize (this->_system_vars.size()); { const unsigned int dim = this->_eqn_systems.get_mesh().mesh_dimension(); /* * Get local coordinates to feed these into compute_data(). * Note that the fe_type can safely be used from the 0-variable, * since the inverse mapping is the same for all FEFamilies */ const Point mapped_point (FEInterface::inverse_map (dim, this->_dof_map.variable_type(0), element, p)); std::vector<Point> point_list (1, mapped_point); // loop over all vars for (unsigned int index=0; index < this->_system_vars.size(); index++) { /* * the data for this variable */ const unsigned int var = _system_vars[index]; const FEType& fe_type = this->_dof_map.variable_type(var); AutoPtr<FEBase> point_fe (FEBase::build(dim, fe_type)); const std::vector<std::vector<RealTensor> >& d2phi = point_fe->get_d2phi(); point_fe->reinit(element, &point_list); // where the solution values for the var-th variable are stored std::vector<dof_id_type> dof_indices; this->_dof_map.dof_indices (element, dof_indices, var); // interpolate the solution Tensor hess; for (unsigned int i=0; i<dof_indices.size(); i++) hess.add_scaled(d2phi[i][0], this->_vector(dof_indices[i])); output[index] = hess; } } } // all done return; }
bool test_fundamentals(Index const dimension) { bool passed = true; Index const number_components = integer_power(dimension, Tensor::ORDER); std::vector<Scalar> const X = generate_sequence<Scalar>(number_components, 1.0, 1.0); // Test constructor with pointer Tensor const A(dimension, &X[0]); // Test copy constructor Tensor B = A; Tensor C; // Test copy assignment C = B - A; Scalar error = norm_f(C); bool const copy_assigned = error <= machine_epsilon<Scalar>(); passed = passed && copy_assigned; // Test fill with pointer B.fill(&X[0]); C = B - A; error = norm_f(C); bool const filled_pointer = error <= machine_epsilon<Scalar>(); passed = passed && filled_pointer; std::vector<Scalar> const Y = generate_sequence<Scalar>(number_components, -1.0, -1.0); C.fill(&Y[0]); // Test increment C += A; error = norm_f(C); bool const incremented = error <= machine_epsilon<Scalar>(); passed = passed && incremented; C.fill(&X[0]); // Test decrement C -= A; error = norm_f(C); bool const decremented = error <= machine_epsilon<Scalar>(); passed = passed && decremented; #ifdef HAVE_INTREPID_KOKKOSCORE //test Tensor fill and create for Kokkos data types Kokkos::View<Scalar *, Kokkos::DefaultExecutionSpace> X1("X1_kokkos", dimension); Kokkos::View<Scalar **, Kokkos::DefaultExecutionSpace> X2("X2_kokkos", dimension, dimension); Kokkos::View<Scalar ***, Kokkos::DefaultExecutionSpace> X3("X3_kokkos", dimension, dimension, dimension); Kokkos::View<Scalar ****, Kokkos::DefaultExecutionSpace> X4("X4_kokkos", dimension, dimension, dimension, dimension); Kokkos::deep_copy(X1, 3.1); Kokkos::deep_copy(X2, 3.2); Kokkos::deep_copy(X3, 3.3); Kokkos::deep_copy(X4, 3.4); Tensor Z(dimension); //(X1_k,0); Index rank = 0; Index temp = number_components; while (temp != 1) { temp = temp / dimension; rank = rank + 1; assert(temp > 0); } switch (rank) { default: assert(false); break; case 1: Z.fill(X1, 0); break; case 2: Z.fill(X2, 0, 0); break; case 3: Z.fill(X3, 0, 0, 0); break; case 4: Z.fill(X4, 0, 0, 0, 0); break; } // Test copy constructor. Tensor const U = Z; // Test copy assignment. Tensor V; V = U - Z; error = norm_f(V); bool const tensor_create_from_1d_kokkos = error <= machine_epsilon<Scalar>(); passed = passed && tensor_create_from_1d_kokkos; #endif return passed; }
#define CATCH_CONFIG_MAIN #include "catch.hpp" #include "ATen/ATen.h" #include "ATen/DLConvertor.h" #include <iostream> #include <string.h> #include <sstream> #include "test_seed.h" using namespace at; TEST_CASE( "parallel", "[cpu]" ) { manual_seed(123, at::Backend::CPU); set_num_threads(1); Tensor a = rand(CPU(at::kFloat), {1,3}); a[0][0] = 1; a[0][1] = 0; a[0][2] = 0; Tensor as = rand(CPU(at::kFloat), {3}); as[0] = 1; as[1] = 0; as[2] = 0; REQUIRE(a.sum(0).equal(as)); }
Tensor upsample_nearest1d_cpu(const Tensor& input, IntArrayRef output_size) { auto output = at::empty({0}, input.options()); upsample_nearest1d_out_cpu_template(output, input, output_size); return output; }
static void test_resize() { Tensor<int, 3> epsilon; epsilon.resize(2,3,7); VERIFY_IS_EQUAL(epsilon.dimension(0), 2); VERIFY_IS_EQUAL(epsilon.dimension(1), 3); VERIFY_IS_EQUAL(epsilon.dimension(2), 7); VERIFY_IS_EQUAL(epsilon.dimensions().TotalSize(), 2*3*7); const int* old_data = epsilon.data(); epsilon.resize(3,2,7); VERIFY_IS_EQUAL(epsilon.dimension(0), 3); VERIFY_IS_EQUAL(epsilon.dimension(1), 2); VERIFY_IS_EQUAL(epsilon.dimension(2), 7); VERIFY_IS_EQUAL(epsilon.dimensions().TotalSize(), 2*3*7); VERIFY_IS_EQUAL(epsilon.data(), old_data); epsilon.resize(3,5,7); VERIFY_IS_EQUAL(epsilon.dimension(0), 3); VERIFY_IS_EQUAL(epsilon.dimension(1), 5); VERIFY_IS_EQUAL(epsilon.dimension(2), 7); VERIFY_IS_EQUAL(epsilon.dimensions().TotalSize(), 3*5*7); VERIFY_IS_NOT_EQUAL(epsilon.data(), old_data); }
void ERMSD::calcMat(const std::vector<Vector> & positions,const Pbc& pbc, std::vector<Vector4d> &mat, std::vector<TensorGeneric<4,3> > &Gderi) { std::vector<Vector3d> pos; pos.resize(3*nresidues); std::vector<Tensor3d> deri; deri.resize(nresidues*9); std::vector<Vector> centers; centers.resize(nresidues); unsigned idx_deri = 0; Tensor da_dxa = (2./3.)*Tensor::identity(); Tensor da_dxb = -(1./3.)*Tensor::identity(); Tensor da_dxc = -(1./3.)*Tensor::identity(); Tensor db_dxa = -(1./3.)*Tensor::identity(); Tensor db_dxb = (2./3.)*Tensor::identity(); Tensor db_dxc = -(1./3.)*Tensor::identity(); // Form factors - should this be somewhere else? double w = 1./3.; Vector form_factor = Vector(2.0,2.0,1.0/0.3); for(unsigned res_idx=0; res_idx<natoms/3; res_idx++) { const unsigned at_idx = 3*res_idx; //center for (unsigned j=0; j<3; j++) { centers[res_idx] += w*positions[at_idx+j]; } Vector3d a = delta(centers[res_idx],positions[at_idx]); Vector3d b = delta(centers[res_idx],positions[at_idx+1]); Vector3d d = crossProduct(a,b); double ianorm = 1./a.modulo(); double idnorm = 1./d.modulo(); // X vector: COM-C2 pos[at_idx] = a*ianorm; // Z versor: C2 x (COM-C4/C6) pos[at_idx+2] = d*idnorm; // Y versor: Z x Y pos[at_idx+1] = crossProduct(pos[at_idx+2],pos[at_idx]); // Derivatives //////// Tensor3d t1 = ianorm*(Tensor::identity()-extProduct(pos[at_idx],pos[at_idx])); // dv1/dxa deri[idx_deri] = (2./3. )*t1; // dv1/dxb deri[idx_deri+3] = -(1./3.)*t1; // dv1/dxc deri[idx_deri+6] = -(1./3.)*t1; Tensor dd_dxa = VcrossTensor(a,db_dxa) -VcrossTensor(b,da_dxa); Tensor dd_dxb = VcrossTensor(a,db_dxb)-VcrossTensor(b,da_dxb); Tensor dd_dxc = VcrossTensor(a,db_dxc)-VcrossTensor(b,da_dxc); // dv3/dxa deri[idx_deri+2] = deriNorm(d,dd_dxa); // dv3/dxb deri[idx_deri+5] = deriNorm(d,dd_dxb); // dv3/dxc deri[idx_deri+8] = deriNorm(d,dd_dxc); // dv2/dxa = dv3/dxa cross v1 + v3 cross dv1/dxa deri[idx_deri+1] = (VcrossTensor(deri[idx_deri+2],pos[at_idx]) + \ VcrossTensor(pos[at_idx+2],deri[idx_deri])); // dv2/dxb deri[idx_deri+4] = (VcrossTensor(deri[idx_deri+5],pos[at_idx]) + \ VcrossTensor(pos[at_idx+2],deri[idx_deri+3])); // dv2/dxc deri[idx_deri+7] = (VcrossTensor(deri[idx_deri+8],pos[at_idx]) + \ VcrossTensor(pos[at_idx+2],deri[idx_deri+6])); idx_deri += 9; // End derivatives /////// } // Initialization (unnecessary?) for (unsigned i1=0; i1<nresidues*nresidues; i1++) { for (unsigned i2=0; i2<4; i2++) { mat[i1][i2] = 0.0; } } double maxdist = cutoff/form_factor[0]; double gamma = pi/cutoff; unsigned idx; unsigned idx1 = 0; // Calculate mat for (unsigned i=0; i<nresidues; i++) { for (unsigned j=0; j<nresidues; j++) { // skip i==j if(inPair(i,j) and i != j) { //if(i!=j){ // Calculate normal distance first Vector diff = delta(centers[i],centers[j]); double d1 = diff.modulo(); //std::cout << inPair(i,j) << " " << i << " " << j << " "<< d1 <<"\n"; //std::cout << inPair(i,j) << " " << i << " " << j << " "<< d1 <<"\n"; if(d1<maxdist) { // calculate r_tilde_ij Vector3d rtilde; for (unsigned k=0; k<3; k++) { for (unsigned l=0; l<3; l++) { rtilde[l] += pos[3*i+l][k]*diff[k]*form_factor[l]; } } double rtilde_norm = rtilde.modulo(); double irnorm = 1./rtilde_norm; // ellipsoidal cutoff if(rtilde_norm < cutoff) { idx = i*nresidues + j; //std::cout << i << " " << j << " " << rtilde_norm << " " << idx <<"\n"; // fill 4d matrix double dummy = sin(gamma*rtilde_norm)/(rtilde_norm*gamma); mat[idx][0] = dummy*rtilde[0]; mat[idx][1] = dummy*rtilde[1]; mat[idx][2] = dummy*rtilde[2]; mat[idx][3] = (1.+ cos(gamma*rtilde_norm))/gamma; // Derivative (drtilde_dx) std::vector<Tensor3d> drtilde_dx; drtilde_dx.resize(6); unsigned pos_idx = 3*i; unsigned deri_idx = 9*i; for (unsigned at=0; at<3; at++) { for (unsigned l=0; l<3; l++) { Vector3d rvec = form_factor[l]*((pos[pos_idx+l])/3.); Vector3d vvec = form_factor[l]*(matmul(deri[deri_idx+3*at+l],diff)); drtilde_dx[at].setRow(l,vvec-rvec); drtilde_dx[at+3].setRow(l,rvec); } } //std::vector<TensorGeneric<4,3> > dG_dx; //dG_dx.resize(6); double dummy1 = (cos(gamma*rtilde_norm) - dummy); idx1 = i*nresidues*6 + j*6; for (unsigned l=0; l<6; l++) { //std::cout << i << " " << j << " " << idx1 << " " << idx1+l << "\n"; // components 1,2,3 // sin(gamma*|rtilde|)/gamma*|rtilde|*d_rtilde + // + ((d_rtilde*r_tilde/r_tilde^2) out r_tilde)* // (cos(gamma*|rtilde| - sin(gamma*|rtilde|)/gamma*|rtilde|)) Vector3d rdr = matmul(rtilde,drtilde_dx[l]); Tensor tt = dummy*drtilde_dx[l] + (dummy1*irnorm*irnorm)*Tensor(rtilde,rdr); for (unsigned m=0; m<3; m++) { // Transpose here //dG_dx[l].setRow(m,tt.getRow(m)); Gderi[idx1+l].setRow(m,tt.getRow(m)); } // component 4 // - sin(gamma*|rtilde|)/|rtilde|*(r_tilde*d_rtilde) //dG_dx[l].setRow(3,-dummy*gamma*rdr); Gderi[idx1+l].setRow(3,-dummy*gamma*rdr); } } } } } } }
void Tensor<XPU, DT>::blas_vsqrt(const Tensor<XPU, DT> &in) { const int N = size(); CHECK_EQ (N, in.size()); unary_vexpr_kernel<opsqrt<DT>><<<cuda_get_blocks(N), CUDA_NUM_THREADS, 0, get_calc_stream()>>> (N, in.dptr, dptr); cuda_sync_check ("cublas_vsqrt"); };
TEST(TestScalar, TestScalar) { manual_seed(123); Scalar what = 257; Scalar bar = 3.0; Half h = bar.toHalf(); Scalar h2 = h; cout << "H2: " << h2.toDouble() << " " << what.toFloat() << " " << bar.toDouble() << " " << what.isIntegral() << "\n"; Generator& gen = at::globalContext().defaultGenerator(at::kCPU); ASSERT_NO_THROW(gen.seed()); auto&& C = at::globalContext(); if (at::hasCUDA()) { auto t2 = zeros({4, 4}, at::kCUDA); cout << &t2 << "\n"; } auto t = ones({4, 4}); auto wha2 = zeros({4, 4}).add(t).sum(); ASSERT_EQ(wha2.item<double>(), 16.0); ASSERT_EQ(t.sizes()[0], 4); ASSERT_EQ(t.sizes()[1], 4); ASSERT_EQ(t.strides()[0], 4); ASSERT_EQ(t.strides()[1], 1); TensorOptions options = dtype(kFloat); Tensor x = randn({1, 10}, options); Tensor prev_h = randn({1, 20}, options); Tensor W_h = randn({20, 20}, options); Tensor W_x = randn({20, 10}, options); Tensor i2h = at::mm(W_x, x.t()); Tensor h2h = at::mm(W_h, prev_h.t()); Tensor next_h = i2h.add(h2h); next_h = next_h.tanh(); ASSERT_ANY_THROW(Tensor{}.item()); test_overflow(); if (at::hasCUDA()) { auto r = next_h.to(at::Device(kCUDA), kFloat, /*non_blocking=*/ false, /*copy=*/ true); ASSERT_TRUE(r.to(at::Device(kCPU), kFloat, /*non_blocking=*/ false, /*copy=*/ true).equal(next_h)); } ASSERT_NO_THROW(randn({10, 10, 2}, options)); // check Scalar.toTensor on Scalars backed by different data types ASSERT_EQ(scalar_to_tensor(bar).scalar_type(), kDouble); ASSERT_EQ(scalar_to_tensor(what).scalar_type(), kLong); ASSERT_EQ(scalar_to_tensor(ones({}).item()).scalar_type(), kDouble); if (x.scalar_type() != ScalarType::Half) { AT_DISPATCH_ALL_TYPES(x.scalar_type(), "foo", [&] { scalar_t s = 1; std::stringstream ss; ASSERT_NO_THROW( ss << "hello, dispatch" << x.dispatch_type().toString() << s << "\n"); auto data = (scalar_t*)x.data_ptr(); (void)data; }); } // test direct C-scalar type conversions { auto x = ones({1, 2}, options); ASSERT_ANY_THROW(x.item<float>()); } auto float_one = ones({}, options); ASSERT_EQ(float_one.item<float>(), 1); ASSERT_EQ(float_one.item<int32_t>(), 1); ASSERT_EQ(float_one.item<at::Half>(), 1); }
void Tensor<XPU, DT>::blas_vdiv (const Tensor<XPU, DT> &A, const Tensor<XPU, DT> &B) { const int N = size(); CHECK_EQ (A.size(), B.size()); binary_vexpr_kernel<opdiv <DT>><<<cuda_get_blocks(N), CUDA_NUM_THREADS, 0, get_calc_stream()>>> (N, A.dptr, B.dptr, dptr); cuda_sync_check ("cublas_vdiv"); };
void SpatialDivisiveNormalization::init(std::shared_ptr<TorchData> input) { RASSERT(input->type() == TorchDataType::TENSOR_DATA); Tensor<float>* in = TO_TENSOR_PTR(input.get()); RASSERT(in->dim() == 3); if (output != nullptr) { if (!in->isSameSizeAs(*TO_TENSOR_PTR(output.get()))) { // Input dimension has changed! cleanup(); } } if (output == nullptr) { output.reset(new Tensor<float>(in->dim(), in->size())); std_pass1_.reset(new Tensor<float>(in->dim(), in->size())); std_pass2_.reset(new Tensor<float>(in->dim(), in->size())); } if (kernel_norm_ == nullptr) { bool onedim_kernel = kernel_->dim() == 1; const float n_feats = (float)in->size()[2]; // Clone and normalize the input kernel kernel_norm_.reset(Tensor<float>::clone(*kernel_)); float sum = Tensor<float>::slowSum(*kernel_norm_); float div_val = onedim_kernel ? (sum * sqrtf(n_feats)) : (sum * n_feats); Tensor<float>::div(*kernel_norm_, div_val); } if (std_coef_ == nullptr) { uint32_t std_coeff_size[2]; std_coeff_size[0] = TO_TENSOR_PTR(output.get())->size()[0]; std_coeff_size[1] = TO_TENSOR_PTR(output.get())->size()[1]; std_coef_.reset(new Tensor<float>(2, std_coeff_size)); std::unique_ptr<float[]> std_coef_cpu(new float[std_coef_->nelems()]); std::unique_ptr<float[]> kernel_norm_cpu(new float[kernel_norm_->nelems()]); kernel_norm_->getData(kernel_norm_cpu.get()); bool onedim_kernel = kernel_->dim() == 1; // Filter an image of all 1 values to create the normalization constants // See norm_test.lua for proof that this works as well as: // https://github.com/andresy/torch/blob/master/extra/nn/SpatialDivisiveNormalization.lua int32_t n_feats = TO_TENSOR_PTR(output.get())->size()[2]; int32_t height = TO_TENSOR_PTR(output.get())->size()[1]; int32_t width = TO_TENSOR_PTR(output.get())->size()[0]; if (onedim_kernel) { // 1D case - The filter is seperable, but we'll just do the dumb 2D // version since we only do this once on startup. --> O(n * m) int32_t kernel_size = kernel_norm_->size()[0]; int32_t filt_rad = (kernel_size - 1) / 2; for (int32_t v = 0; v < height; v++) { for (int32_t u = 0; u < width; u++) { float tmp = 0.0f; for (int32_t v_filt = -filt_rad; v_filt <= filt_rad; v_filt++) { for (int32_t u_filt = -filt_rad; u_filt <= filt_rad; u_filt++) { int32_t u_in = u + u_filt; int32_t v_in = v + v_filt; if (u_in >= 0 && u_in < width && v_in >= 0 && v_in < height) { // Pixel is inside --> We'll effectively clamp zeros elsewhere. tmp += (kernel_norm_cpu[v_filt + filt_rad] * kernel_norm_cpu[u_filt + filt_rad]); } } } std_coef_cpu[v * width + u] = tmp / n_feats; } } } else { // 2D case int32_t kernel_size_u = kernel_norm_->size()[0]; int32_t kernel_size_v = kernel_norm_->size()[1]; int32_t filt_rad_u = (kernel_size_u - 1) / 2; int32_t filt_rad_v = (kernel_size_v - 1) / 2; for (int32_t v = 0; v < height; v++) { for (int32_t u = 0; u < width; u++) { float tmp = 0.0f; for (int32_t v_filt = -filt_rad_v; v_filt <= filt_rad_v; v_filt++) { for (int32_t u_filt = -filt_rad_u; u_filt <= filt_rad_u; u_filt++) { int32_t u_in = u + u_filt; int32_t v_in = v + v_filt; if (u_in >= 0 && u_in < width && v_in >= 0 && v_in < height) { // Pixel is inside --> We'll effectively clamp zeros elsewhere. tmp += kernel_norm_cpu[(v_filt + filt_rad_v) * kernel_size_u + (u_filt + filt_rad_u)]; } } } std_coef_cpu[v * width + u] = tmp / n_feats; } } } std_coef_->setData(std_coef_cpu.get()); } if (std_ == nullptr) { uint32_t std_coeff_size[2]; std_coeff_size[0] = TO_TENSOR_PTR(output.get())->size()[0]; std_coeff_size[1] = TO_TENSOR_PTR(output.get())->size()[1]; std_.reset(new Tensor<float>(2, std_coeff_size)); } }
void Tensor<Dtype>::ReshapeLike(const Tensor<Dtype>& other) { Reshape(other.shape()); }
void SpatialDivisiveNormalization::forwardProp( std::shared_ptr<TorchData> input) { init(input); bool onedim_kernel = kernel_->dim() == 1; Tensor<float>* in = TO_TENSOR_PTR(input.get()); Tensor<float>* out = TO_TENSOR_PTR(output.get()); if (onedim_kernel) { int32_t filt_rad = ((int32_t)kernel_norm_->size()[0] - 1) / 2; // Perform horizontal filter pass cl_context->useKernelCStr(kSpatialDivisiveNormalizationKernel, "SpatialDivisiveNormalizationHoriz"); cl_context->setArg(0, in->storage()); cl_context->setArg(1, std_pass1_->storage()); cl_context->setArg(2, kernel_norm_->storage()); cl_context->setArg(3, filt_rad); cl_context->runKernel(jtorch::deviceid, std_pass1_->dim(), std_pass1_->size(), false); // Perform vertical filter pass cl_context->useKernelCStr(kSpatialDivisiveNormalizationKernel, "SpatialDivisiveNormalizationVert"); cl_context->setArg(0, std_pass1_->storage()); cl_context->setArg(1, std_pass2_->storage()); cl_context->setArg(2, kernel_norm_->storage()); cl_context->setArg(3, filt_rad); cl_context->runKernel(jtorch::deviceid, std_pass2_->dim(), std_pass2_->size(), false); } else { int32_t filt_rad_u = ((int32_t)kernel_norm_->size()[0] - 1) / 2; int32_t filt_rad_v = ((int32_t)kernel_norm_->size()[1] - 1) / 2; // Perform vertical filter pass cl_context->useKernelCStr(kSpatialDivisiveNormalizationKernel, "SpatialDivisiveNormalization2D"); cl_context->setArg(0, in->storage()); cl_context->setArg(1, std_pass2_->storage()); cl_context->setArg(2, kernel_norm_->storage()); cl_context->setArg(3, filt_rad_u); cl_context->setArg(4, filt_rad_v); cl_context->runKernel(jtorch::deviceid, std_pass2_->dim(), std_pass2_->size(), false); } // Perform accumulation and division pass cl_context->useKernelCStr(kSpatialDivisiveNormalizationKernel, "SpatialDivisiveNormalizationAccumDiv"); cl_context->setArg(0, std_pass2_->storage()); cl_context->setArg(1, std_->storage()); cl_context->setArg(2, std_coef_->storage()); cl_context->setArg(3, (int)out->size()[2]); cl_context->setArg(4, threshold_); cl_context->runKernel(jtorch::deviceid, std_->dim(), std_->size(), false); // Perform normalization pass cl_context->useKernelCStr(kSpatialDivisiveNormalizationKernel, "SpatialDivisiveNormalization"); cl_context->setArg(0, in->storage()); cl_context->setArg(1, out->storage()); cl_context->setArg(2, std_->storage()); cl_context->runKernel(jtorch::deviceid, out->dim(), out->size(), false); }
RTensor do_block_svd(const Tensor &A, Tensor *pU, Tensor *pVT, bool economic) { index rows = A.rows(); index cols = A.columns(); if (rows != cols && !economic) return svd(A, pU, pVT, economic); index minrc = std::min(rows, cols); index nblocks; Indices *block_rows, *block_cols; if (!find_blocks<Tensor>(A, &nblocks, &block_rows, &block_cols)) { return svd(A, pU, pVT, economic); } if ((nblocks == 1) && (block_rows[0].size() >= rows/2) && (block_cols[0].size() >= cols/2)) { RTensor s = svd(A, pU, pVT, economic); delete[] block_rows; delete[] block_cols; return s; } RTensor s(minrc); s.fill_with_zeros(); if (pU) { *pU = Tensor::zeros(rows, economic? minrc : rows); } if (pVT) { *pVT = Tensor::zeros(economic? minrc : cols, cols); } RTensor stemp; Tensor Utemp, Vtemp; Tensor *pUtemp = pU? &Utemp : 0; Tensor *pVtemp = pVT? &Vtemp : 0; for (index b = 0, sndx = 0; b < nblocks; b++) { Tensor m = A(range(block_rows[b]), range(block_cols[b])); index n = m.size(); if (m.size() > 1) { stemp = svd(m, pUtemp, pVtemp, economic); index slast = sndx + stemp.size() - 1; s.at(range(sndx, slast)) = stemp; if (pU) { (*pU).at(range(block_rows[b]), range(sndx, slast)) = Utemp; } if (pVT) { (*pVT).at(range(sndx, slast), range(block_cols[b])) = Vtemp; } sndx = slast + 1; } else { index row = block_rows[b][0]; index col = block_cols[b][0]; double aux = abs(m[0]); s.at(sndx) = aux; if (pU) { (*pU).at(row,sndx) = 1.0; } if (pVT) { (*pVT).at(sndx,col) = m[0]/aux; } ++sndx; } } delete[] block_rows; delete[] block_cols; Indices ndx = sort_indices(s, true); s = s(range(ndx)); if (pU) *pU = (*pU)(range(), range(ndx)); if (pVT) *pVT = (*pVT)(range(ndx), range()); return s; }
std::tuple<Tensor, Tensor> _unique_cpu(const Tensor& self, const bool sorted, const bool return_inverse) { return AT_DISPATCH_ALL_TYPES(self.type(), "unique", [&] { return _unique_cpu_template<scalar_t>(self, sorted, return_inverse); }); }
void SpatialBatchNormalization::init(std::shared_ptr<TorchData> input) { RASSERT(input->type() == TorchDataType::TENSOR_DATA); Tensor<float>* in = TO_TENSOR_PTR(input.get()); Tensor<float>* out = TO_TENSOR_PTR(output.get()); RASSERT(in->dim() >= 3); RASSERT(in->size()[2] == nfeats_); if (output != nullptr && in->dim() != out->dim()) { output = nullptr; } // Check that the input and output size are the same. if (output != nullptr) { if (in->size()[0] != out->size()[0] || in->size()[1] != out->size()[1] || in->size()[2] != out->size()[2]) { output = nullptr; } } if (output == nullptr) { output.reset(new Tensor<float>(in->dim(), in->size())); } }
static void test_simple_reductions() { Tensor<float, 4, DataLayout> tensor(2, 3, 5, 7); tensor.setRandom(); array<ptrdiff_t, 2> reduction_axis2; reduction_axis2[0] = 1; reduction_axis2[1] = 3; Tensor<float, 2, DataLayout> result = tensor.sum(reduction_axis2); VERIFY_IS_EQUAL(result.dimension(0), 2); VERIFY_IS_EQUAL(result.dimension(1), 5); for (int i = 0; i < 2; ++i) { for (int j = 0; j < 5; ++j) { float sum = 0.0f; for (int k = 0; k < 3; ++k) { for (int l = 0; l < 7; ++l) { sum += tensor(i, k, j, l); } } VERIFY_IS_APPROX(result(i, j), sum); } } { Tensor<float, 0, DataLayout> sum1 = tensor.sum(); VERIFY_IS_EQUAL(sum1.rank(), 0); array<ptrdiff_t, 4> reduction_axis4; reduction_axis4[0] = 0; reduction_axis4[1] = 1; reduction_axis4[2] = 2; reduction_axis4[3] = 3; Tensor<float, 0, DataLayout> sum2 = tensor.sum(reduction_axis4); VERIFY_IS_EQUAL(sum2.rank(), 0); VERIFY_IS_APPROX(sum1(), sum2()); } reduction_axis2[0] = 0; reduction_axis2[1] = 2; result = tensor.prod(reduction_axis2); VERIFY_IS_EQUAL(result.dimension(0), 3); VERIFY_IS_EQUAL(result.dimension(1), 7); for (int i = 0; i < 3; ++i) { for (int j = 0; j < 7; ++j) { float prod = 1.0f; for (int k = 0; k < 2; ++k) { for (int l = 0; l < 5; ++l) { prod *= tensor(k, i, l, j); } } VERIFY_IS_APPROX(result(i, j), prod); } } { Tensor<float, 0, DataLayout> prod1 = tensor.prod(); VERIFY_IS_EQUAL(prod1.rank(), 0); array<ptrdiff_t, 4> reduction_axis4; reduction_axis4[0] = 0; reduction_axis4[1] = 1; reduction_axis4[2] = 2; reduction_axis4[3] = 3; Tensor<float, 0, DataLayout> prod2 = tensor.prod(reduction_axis4); VERIFY_IS_EQUAL(prod2.rank(), 0); VERIFY_IS_APPROX(prod1(), prod2()); } reduction_axis2[0] = 0; reduction_axis2[1] = 2; result = tensor.maximum(reduction_axis2); VERIFY_IS_EQUAL(result.dimension(0), 3); VERIFY_IS_EQUAL(result.dimension(1), 7); for (int i = 0; i < 3; ++i) { for (int j = 0; j < 7; ++j) { float max_val = std::numeric_limits<float>::lowest(); for (int k = 0; k < 2; ++k) { for (int l = 0; l < 5; ++l) { max_val = (std::max)(max_val, tensor(k, i, l, j)); } } VERIFY_IS_APPROX(result(i, j), max_val); } } { Tensor<float, 0, DataLayout> max1 = tensor.maximum(); VERIFY_IS_EQUAL(max1.rank(), 0); array<ptrdiff_t, 4> reduction_axis4; reduction_axis4[0] = 0; reduction_axis4[1] = 1; reduction_axis4[2] = 2; reduction_axis4[3] = 3; Tensor<float, 0, DataLayout> max2 = tensor.maximum(reduction_axis4); VERIFY_IS_EQUAL(max2.rank(), 0); VERIFY_IS_APPROX(max1(), max2()); } reduction_axis2[0] = 0; reduction_axis2[1] = 1; result = tensor.minimum(reduction_axis2); VERIFY_IS_EQUAL(result.dimension(0), 5); VERIFY_IS_EQUAL(result.dimension(1), 7); for (int i = 0; i < 5; ++i) { for (int j = 0; j < 7; ++j) { float min_val = (std::numeric_limits<float>::max)(); for (int k = 0; k < 2; ++k) { for (int l = 0; l < 3; ++l) { min_val = (std::min)(min_val, tensor(k, l, i, j)); } } VERIFY_IS_APPROX(result(i, j), min_val); } } { Tensor<float, 0, DataLayout> min1 = tensor.minimum(); VERIFY_IS_EQUAL(min1.rank(), 0); array<ptrdiff_t, 4> reduction_axis4; reduction_axis4[0] = 0; reduction_axis4[1] = 1; reduction_axis4[2] = 2; reduction_axis4[3] = 3; Tensor<float, 0, DataLayout> min2 = tensor.minimum(reduction_axis4); VERIFY_IS_EQUAL(min2.rank(), 0); VERIFY_IS_APPROX(min1(), min2()); } reduction_axis2[0] = 0; reduction_axis2[1] = 1; result = tensor.mean(reduction_axis2); VERIFY_IS_EQUAL(result.dimension(0), 5); VERIFY_IS_EQUAL(result.dimension(1), 7); for (int i = 0; i < 5; ++i) { for (int j = 0; j < 7; ++j) { float sum = 0.0f; int count = 0; for (int k = 0; k < 2; ++k) { for (int l = 0; l < 3; ++l) { sum += tensor(k, l, i, j); ++count; } } VERIFY_IS_APPROX(result(i, j), sum / count); } } { Tensor<float, 0, DataLayout> mean1 = tensor.mean(); VERIFY_IS_EQUAL(mean1.rank(), 0); array<ptrdiff_t, 4> reduction_axis4; reduction_axis4[0] = 0; reduction_axis4[1] = 1; reduction_axis4[2] = 2; reduction_axis4[3] = 3; Tensor<float, 0, DataLayout> mean2 = tensor.mean(reduction_axis4); VERIFY_IS_EQUAL(mean2.rank(), 0); VERIFY_IS_APPROX(mean1(), mean2()); } { Tensor<int, 1> ints(10); std::iota(ints.data(), ints.data() + ints.dimension(0), 0); TensorFixedSize<bool, Sizes<> > all; all = ints.all(); VERIFY(!all()); all = (ints >= ints.constant(0)).all(); VERIFY(all()); TensorFixedSize<bool, Sizes<> > any; any = (ints > ints.constant(10)).any(); VERIFY(!any()); any = (ints < ints.constant(1)).any(); VERIFY(any()); } }
Tensor embedding_bag_backward_cpu(const Tensor &grad_, const Tensor &indices__, const Tensor &offsets__, const Tensor &offset2bag__, const Tensor &bag_size_, const Tensor& max_indices_, int64_t num_weights, bool scale_grad_by_freq, int64_t mode) { auto grad = grad_.contiguous(); auto grad_arg = TensorArg(grad, "grad_", 1); checkScalarTypes("embedding_bag", grad_arg, {kFloat, kDouble}); auto indices_arg = TensorArg(indices__, "indices__", 1); checkScalarType("embedding_bag", indices_arg, kLong); auto offsets_arg = TensorArg(offsets__, "offsets__", 1); checkScalarType("embedding_bag", offsets_arg, kLong); auto offset2bag_arg = TensorArg(offset2bag__, "offset2bag__", 1); checkScalarType("embedding_bag", offset2bag_arg, kLong); checkContiguous("embedding_bag", offset2bag_arg); Tensor indices_ = indices__.contiguous(); Tensor offsets_ = offsets__.contiguous(); Tensor &offset2bag_ = const_cast<Tensor &>(offset2bag__); auto ind_sort_ = indices_.sort(); auto indices = std::get<0>(ind_sort_); auto ind_sort = std::get<1>(ind_sort_); auto offset2bag = offset2bag_.index_select(0, ind_sort); auto indices_data = indices.data<int64_t>(); auto offsets_data = offsets_.data<int64_t>(); auto offset2bag_data = offset2bag.data<int64_t>(); int64_t numel = indices.numel(); std::vector<int64_t> counts(num_weights); for (int i = 0; i < numel; i++) { counts[indices_data[i]] = 0; } for (int i = 0; i < numel; i++) { counts[indices_data[i]]++; } auto index_grad_weight = at::zeros({num_weights, grad.size(1)}, grad.type()).contiguous(); std::vector<int64_t> counts_uniq; counts_uniq.reserve(num_weights); int64_t o = 0; for (int64_t i = 0; i < numel; i += counts[indices_data[i]]) { counts_uniq.push_back(counts[indices_data[i]]); if (o > 0) { counts_uniq[o] += counts_uniq[o - 1]; } o++; } if (mode == MODE_MEAN || mode == MODE_SUM) { #pragma omp parallel for if (numel > 1000) for (int64_t i = 0; i < (int64_t)counts_uniq.size(); i++) { int64_t start = i == 0 ? 0 : counts_uniq[i - 1]; int64_t index = indices_data[start]; for (int64_t j = start; j < counts_uniq[i]; j++) { int64_t source = offset2bag_data[j]; double scale = 1.0; if (scale_grad_by_freq) { scale /= counts[indices_data[i]]; } if (mode == 1) { // MODE_MEAN if (offsets_.size(0) == 1) { auto bag_size = indices.size(0); scale /= bag_size; } else { if (source == offsets_.size(0) - 1) { scale /= indices.size(0) - offsets_data[offsets_.size(0) - 1]; } else { scale /= offsets_data[source + 1] - offsets_data[source]; } } } int64_t ddim = grad.size(1); if (grad.type().scalarType() == kFloat) { auto igwd = index_grad_weight.data<float>(); auto gd = grad.data<float>(); THBlas_axpy<float>(ddim, (float)scale, gd + ddim * source, 1, igwd + ddim * index, 1); } else if (grad.type().scalarType() == kDouble) { auto igwd = index_grad_weight.data<double>(); auto gd = grad.data<double>(); THBlas_axpy<double>(ddim, (double)scale, gd + ddim * source, 1, igwd + ddim * index, 1); } } } } else if (mode == MODE_MAX) { auto nonempty_max_indices = max_indices_.index_select(0, bag_size_.nonzero().view(-1)); auto nonempty_grad = grad_.index_select(0, bag_size_.nonzero().view(-1)); for (int64_t dim = 0; dim < grad.size(1); dim++) { index_grad_weight.select(1, dim).index_add_( 0, nonempty_max_indices.select(1, dim), nonempty_grad.select(1, dim)); } } return index_grad_weight; }
void MDAtomsTyped<T>::getBox(Tensor&box)const{ if(this->box) for(int i=0;i<3;i++)for(int j=0;j<3;j++) box(i,j)=this->box[3*i+j]*scaleb; else box.zero(); }
void CH3Shifts::calculate() { double energy=0.; Tensor virial; virial.zero(); vector<Vector> deriv(getNumberOfAtoms()); int N = getNumberOfAtoms(); Coor<double> coor(N); Coor<double> forces(N); forces.clear(); for(int i=0; i<numResidues; i++) for(unsigned j=0; j<6; j++) sh[i][j]=0.; for (int i = 0; i < N; i++) { int ipos = 4 * i; Vector Pos = getPosition(i); coor.coor[ipos] = len_pl2alm*Pos[0]; coor.coor[ipos+1] = len_pl2alm*Pos[1]; coor.coor[ipos+2] = len_pl2alm*Pos[2]; } double fact=1.0; if(!ensemble) { energy = meth_list[0]->calc_cs_force(coor, forces); bool printout=false; if(pperiod>0&&comm.Get_rank()==0) printout = (!(getStep()%pperiod)); if(printout) { string csfile; char tmps1[21]; // add to the name the label of the cv in such a way to have different files // when there is more than one defined variable sprintf(tmps1, "%li", getStep()); csfile = string("cs")+tmps1+string(".dat"); meth_list[0]->write_cs(csfile.c_str()); } } else { meth_list[0]->calc_cs(coor); bool printout=false; if(pperiod>0&&comm.Get_rank()==0) printout = (!(getStep()%pperiod)); if(printout) { string csfile; char tmps1[21], tmps2[21]; // add to the name the label of the cv in such a way to have different files // when there is more than one defined variable sprintf(tmps1, "%li", getStep()); sprintf(tmps2, "%i", multi_sim_comm.Get_rank()); csfile = string("cs")+tmps2+"-"+tmps1+string(".dat"); meth_list[0]->write_cs(csfile.c_str()); } unsigned size = meth_list[0]->ala_calc_hb.size(); for(unsigned j=0;j<size;j++) sh[0][j] = meth_list[0]->ala_calc_hb[j]; size = meth_list[0]->ile_calc_hd.size(); for(unsigned j=0;j<size;j++) sh[1][j] = meth_list[0]->ile_calc_hd[j]; size = meth_list[0]->ile_calc_hg2.size(); for(unsigned j=0;j<size;j++) sh[2][j] = meth_list[0]->ile_calc_hg2[j]; size = meth_list[0]->leu_calc_hd1.size(); for(unsigned j=0;j<size;j++) sh[3][j] = meth_list[0]->leu_calc_hd1[j]; size = meth_list[0]->leu_calc_hd2.size(); for(unsigned j=0;j<size;j++) sh[4][j] = meth_list[0]->leu_calc_hd2[j]; size = meth_list[0]->thr_calc_hg2.size(); for(unsigned j=0;j<size;j++) sh[5][j] = meth_list[0]->thr_calc_hg2[j]; size = meth_list[0]->val_calc_hg1.size(); for(unsigned j=0;j<size;j++) sh[6][j] = meth_list[0]->val_calc_hg1[j]; size = meth_list[0]->val_calc_hg2.size(); for(unsigned j=0;j<size;j++) sh[7][j] = meth_list[0]->val_calc_hg2[j]; fact = 1./((double) ens_dim); if(comm.Get_rank()==0) { // I am the master of my replica // among replicas multi_sim_comm.Sum(&sh[0][0], numResidues*8); multi_sim_comm.Barrier(); for(unsigned i=0;i<8;i++) for(int j=0;j<numResidues;j++) sh[j][i] *= fact; } else for(unsigned i=0;i<8;i++) for(int j=0;j<numResidues;j++) sh[j][i] = 0.; // inside each replica comm.Sum(&sh[0][0], numResidues*8); // now send the averaged shifts back to almost size = meth_list[0]->ala_calc_hb.size(); for(unsigned j=0;j<size;j++) meth_list[0]->ala_calc_hb[j] = sh[0][j]; size = meth_list[0]->ile_calc_hd.size(); for(unsigned j=0;j<size;j++) meth_list[0]->ile_calc_hd[j] = sh[1][j]; size = meth_list[0]->ile_calc_hg2.size(); for(unsigned j=0;j<size;j++) meth_list[0]->ile_calc_hg2[j] = sh[2][j]; size = meth_list[0]->leu_calc_hd1.size(); for(unsigned j=0;j<size;j++) meth_list[0]->leu_calc_hd1[j] = sh[3][j]; size = meth_list[0]->leu_calc_hd2.size(); for(unsigned j=0;j<size;j++) meth_list[0]->leu_calc_hd2[j] = sh[4][j]; size = meth_list[0]->thr_calc_hg2.size(); for(unsigned j=0;j<size;j++) meth_list[0]->thr_calc_hg2[j] = sh[5][j]; size = meth_list[0]->val_calc_hg1.size(); for(unsigned j=0;j<size;j++) meth_list[0]->val_calc_hg1[j] = sh[6][j]; size = meth_list[0]->val_calc_hg2.size(); for(unsigned j=0;j<size;j++) meth_list[0]->val_calc_hg2[j] = sh[7][j]; // calculate all the forces now energy = meth_list[0]->ens_calc_cs_force(coor, forces); } for (int i = 0; i < N; i++) { Vector For; int ipos = 4 * i; For[0] = forces.coor[ipos]; For[1] = forces.coor[ipos+1]; For[2] = forces.coor[ipos+2]; deriv[i] = fact*for_pl2alm*For; virial=virial+(-1.*Tensor(getPosition(i),deriv[i])); } for(unsigned i=0;i<getNumberOfAtoms();++i) setAtomsDerivatives(i,deriv[i]); setValue (ene_pl2alm*energy); setBoxDerivatives (virial); }
Tensor< S > Tensor< S >::apply( const Tensor &x, const ::std::function< void(value_type*) > &lambda) { return x.clone().apply(lambda); }
bool Tensor::ComputeJointSVD(Tensor& Umat, vector<Tensor*>& extra_tensor_list, vector<int>& mult_modes, int nonzerovals) { int num_sing_vals = (int)pow((double)nonzerovals, (double)mult_modes.size()); vector<int> free_modes; vector<int> mult_dims; vector<int> free_dims; vector<int> mult_offsets; vector<int> free_offsets; Tensor& temp_tensor = *(extra_tensor_list.at(0)); VectorPlus::SetDiff(free_modes, temp_tensor.Modes(), mult_modes); VectorPlus::Subset(mult_dims, temp_tensor.Dims(), mult_modes); VectorPlus::CSubset(free_dims, temp_tensor.Dims(), mult_modes); ComputeOffsets(mult_offsets, mult_dims); ComputeOffsets(free_offsets, free_dims); vector<int> usmalldims(free_modes.size(), nonzerovals); vector<int> udims; vector<int> usmall_offsets; ComputeOffsets(usmall_offsets, usmalldims); int numMultElements = VectorPlus::Product(mult_dims); int numFreeElements = VectorPlus::Product(free_dims); assert(numMultElements == numFreeElements); Eigen::MatrixXd matricized_tensor(numFreeElements,extra_tensor_list.size() * numMultElements); //cout << "copy start 1\n"; for (int z = 0; z < extra_tensor_list.size(); ++z) { int z_offset = z * numMultElements; FastIndexer i_indexer(free_dims); for (int i = 0; i < numFreeElements; ++i) { vector<int>& free_indices = i_indexer.GetNext(); // ComputeIndexArray(free_indices, free_offsets, i); FastIndexer j_indexer(mult_dims); for (int j = 0; j < numMultElements; ++j) { vector<int>& mult_indices = j_indexer.GetNext(); // ComputeIndexArray(mult_indices, mult_offsets, j); vector<int> total_indices; VectorPlus::Concat(total_indices, free_indices, mult_indices); matricized_tensor(i,z_offset + j) = extra_tensor_list.at(z)->At(total_indices); } } } // cout << "copy end 1\n"; // MatrixXd matricized_inverse = matricized_tensor.inverse(); // cout << matricized_inverse; // cout << "\n"; //compute pseudoinverse //cout << "svd start 1\n"; Eigen::JacobiSVD<Eigen::MatrixXd> svd(matricized_tensor, Eigen::ComputeFullU); Eigen::MatrixXd U = svd.matrixU(); // cout << "svd end 1\n"; Eigen::MatrixXd thinU = U.leftCols(num_sing_vals); VectorPlus::Concat(udims, free_dims, usmalldims); Umat.Initialize(udims); vector<int> semi_dims; semi_dims.push_back(thinU.rows()); semi_dims.push_back(thinU.cols()); // cout << "copy start 2 \n"; FastIndexer i_indexer(free_dims); for (int i = 0; i < thinU.rows(); ++i) { vector<int>& left_indices = i_indexer.GetNext(); // ComputeIndexArray(left_indices, free_offsets, i); FastIndexer j_indexer(usmalldims); for (int j = 0; j < thinU.cols(); ++j) { vector<int>& right_indices = j_indexer.GetNext(); // ComputeIndexArray(right_indices, usmall_offsets, j); vector<int> indices; VectorPlus::Concat(indices, left_indices, right_indices); // Umat.Set(indices, rand_matrix.At(i,j)); Umat.Set(indices, thinU(i,j)); if (thinU.rows() == thinU.cols()) { if (VectorPlus::Equals(left_indices, right_indices)) { Umat.Set(indices, 1); } else { Umat.Set(indices, 0); } } } } // cout << "copy end 2 \n"; return true; }
void HPCoarsenTest::select_refinement (System & system) { START_LOG("select_refinement()", "HPCoarsenTest"); // The current mesh MeshBase & mesh = system.get_mesh(); // The dimensionality of the mesh const unsigned int dim = mesh.mesh_dimension(); // The number of variables in the system const unsigned int n_vars = system.n_vars(); // The DofMap for this system const DofMap & dof_map = system.get_dof_map(); // The system number (for doing bad hackery) const unsigned int sys_num = system.number(); // Check for a valid component_scale if (!component_scale.empty()) { if (component_scale.size() != n_vars) libmesh_error_msg("ERROR: component_scale is the wrong size:\n" \ << " component_scale.size()=" \ << component_scale.size() \ << "\n n_vars=" \ << n_vars); } else { // No specified scaling. Scale all variables by one. component_scale.resize (n_vars, 1.0); } // Resize the error_per_cell vectors to handle // the number of elements, initialize them to 0. std::vector<ErrorVectorReal> h_error_per_cell(mesh.max_elem_id(), 0.); std::vector<ErrorVectorReal> p_error_per_cell(mesh.max_elem_id(), 0.); // Loop over all the variables in the system for (unsigned int var=0; var<n_vars; var++) { // Possibly skip this variable if (!component_scale.empty()) if (component_scale[var] == 0.0) continue; // The type of finite element to use for this variable const FEType & fe_type = dof_map.variable_type (var); // Finite element objects for a fine (and probably a coarse) // element will be needed fe = FEBase::build (dim, fe_type); fe_coarse = FEBase::build (dim, fe_type); // Any cached coarse element results have expired coarse = libmesh_nullptr; unsigned int cached_coarse_p_level = 0; const FEContinuity cont = fe->get_continuity(); libmesh_assert (cont == DISCONTINUOUS || cont == C_ZERO || cont == C_ONE); // Build an appropriate quadrature rule qrule = fe_type.default_quadrature_rule(dim); // Tell the refined finite element about the quadrature // rule. The coarse finite element need not know about it fe->attach_quadrature_rule (qrule.get()); // We will always do the integration // on the fine elements. Get their Jacobian values, etc.. JxW = &(fe->get_JxW()); xyz_values = &(fe->get_xyz()); // The shape functions phi = &(fe->get_phi()); phi_coarse = &(fe_coarse->get_phi()); // The shape function derivatives if (cont == C_ZERO || cont == C_ONE) { dphi = &(fe->get_dphi()); dphi_coarse = &(fe_coarse->get_dphi()); } #ifdef LIBMESH_ENABLE_SECOND_DERIVATIVES // The shape function second derivatives if (cont == C_ONE) { d2phi = &(fe->get_d2phi()); d2phi_coarse = &(fe_coarse->get_d2phi()); } #endif // defined (LIBMESH_ENABLE_SECOND_DERIVATIVES) // Iterate over all the active elements in the mesh // that live on this processor. MeshBase::const_element_iterator elem_it = mesh.active_local_elements_begin(); const MeshBase::const_element_iterator elem_end = mesh.active_local_elements_end(); for (; elem_it != elem_end; ++elem_it) { const Elem * elem = *elem_it; // We're only checking elements that are already flagged for h // refinement if (elem->refinement_flag() != Elem::REFINE) continue; const dof_id_type e_id = elem->id(); // Find the projection onto the parent element, // if necessary if (elem->parent() && (coarse != elem->parent() || cached_coarse_p_level != elem->p_level())) { Uc.resize(0); coarse = elem->parent(); cached_coarse_p_level = elem->p_level(); unsigned int old_parent_level = coarse->p_level(); (const_cast<Elem *>(coarse))->hack_p_level(elem->p_level()); this->add_projection(system, coarse, var); (const_cast<Elem *>(coarse))->hack_p_level(old_parent_level); // Solve the h-coarsening projection problem Ke.cholesky_solve(Fe, Uc); } fe->reinit(elem); // Get the DOF indices for the fine element dof_map.dof_indices (elem, dof_indices, var); // The number of quadrature points const unsigned int n_qp = qrule->n_points(); // The number of DOFS on the fine element const unsigned int n_dofs = cast_int<unsigned int>(dof_indices.size()); // The number of nodes on the fine element const unsigned int n_nodes = elem->n_nodes(); // The average element value (used as an ugly hack // when we have nothing p-coarsened to compare to) // Real average_val = 0.; Number average_val = 0.; // Calculate this variable's contribution to the p // refinement error if (elem->p_level() == 0) { unsigned int n_vertices = 0; for (unsigned int n = 0; n != n_nodes; ++n) if (elem->is_vertex(n)) { n_vertices++; const Node * const node = elem->get_node(n); average_val += system.current_solution (node->dof_number(sys_num,var,0)); } average_val /= n_vertices; } else { unsigned int old_elem_level = elem->p_level(); (const_cast<Elem *>(elem))->hack_p_level(old_elem_level - 1); fe_coarse->reinit(elem, &(qrule->get_points())); const unsigned int n_coarse_dofs = cast_int<unsigned int>(phi_coarse->size()); (const_cast<Elem *>(elem))->hack_p_level(old_elem_level); Ke.resize(n_coarse_dofs, n_coarse_dofs); Ke.zero(); Fe.resize(n_coarse_dofs); Fe.zero(); // Loop over the quadrature points for (unsigned int qp=0; qp<qrule->n_points(); qp++) { // The solution value at the quadrature point Number val = libMesh::zero; Gradient grad; Tensor hess; for (unsigned int i=0; i != n_dofs; i++) { dof_id_type dof_num = dof_indices[i]; val += (*phi)[i][qp] * system.current_solution(dof_num); if (cont == C_ZERO || cont == C_ONE) grad.add_scaled((*dphi)[i][qp], system.current_solution(dof_num)); // grad += (*dphi)[i][qp] * // system.current_solution(dof_num); if (cont == C_ONE) hess.add_scaled((*d2phi)[i][qp], system.current_solution(dof_num)); // hess += (*d2phi)[i][qp] * // system.current_solution(dof_num); } // The projection matrix and vector for (unsigned int i=0; i != Fe.size(); ++i) { Fe(i) += (*JxW)[qp] * (*phi_coarse)[i][qp]*val; if (cont == C_ZERO || cont == C_ONE) Fe(i) += (*JxW)[qp] * grad * (*dphi_coarse)[i][qp]; if (cont == C_ONE) Fe(i) += (*JxW)[qp] * hess.contract((*d2phi_coarse)[i][qp]); for (unsigned int j=0; j != Fe.size(); ++j) { Ke(i,j) += (*JxW)[qp] * (*phi_coarse)[i][qp]*(*phi_coarse)[j][qp]; if (cont == C_ZERO || cont == C_ONE) Ke(i,j) += (*JxW)[qp] * (*dphi_coarse)[i][qp]*(*dphi_coarse)[j][qp]; if (cont == C_ONE) Ke(i,j) += (*JxW)[qp] * ((*d2phi_coarse)[i][qp].contract((*d2phi_coarse)[j][qp])); } } } // Solve the p-coarsening projection problem Ke.cholesky_solve(Fe, Up); } // loop over the integration points on the fine element for (unsigned int qp=0; qp<n_qp; qp++) { Number value_error = 0.; Gradient grad_error; Tensor hessian_error; for (unsigned int i=0; i<n_dofs; i++) { const dof_id_type dof_num = dof_indices[i]; value_error += (*phi)[i][qp] * system.current_solution(dof_num); if (cont == C_ZERO || cont == C_ONE) grad_error.add_scaled((*dphi)[i][qp], system.current_solution(dof_num)); // grad_error += (*dphi)[i][qp] * // system.current_solution(dof_num); if (cont == C_ONE) hessian_error.add_scaled((*d2phi)[i][qp], system.current_solution(dof_num)); // hessian_error += (*d2phi)[i][qp] * // system.current_solution(dof_num); } if (elem->p_level() == 0) { value_error -= average_val; } else { for (unsigned int i=0; i<Up.size(); i++) { value_error -= (*phi_coarse)[i][qp] * Up(i); if (cont == C_ZERO || cont == C_ONE) grad_error.subtract_scaled((*dphi_coarse)[i][qp], Up(i)); // grad_error -= (*dphi_coarse)[i][qp] * Up(i); if (cont == C_ONE) hessian_error.subtract_scaled((*d2phi_coarse)[i][qp], Up(i)); // hessian_error -= (*d2phi_coarse)[i][qp] * Up(i); } } p_error_per_cell[e_id] += static_cast<ErrorVectorReal> (component_scale[var] * (*JxW)[qp] * TensorTools::norm_sq(value_error)); if (cont == C_ZERO || cont == C_ONE) p_error_per_cell[e_id] += static_cast<ErrorVectorReal> (component_scale[var] * (*JxW)[qp] * grad_error.norm_sq()); if (cont == C_ONE) p_error_per_cell[e_id] += static_cast<ErrorVectorReal> (component_scale[var] * (*JxW)[qp] * hessian_error.norm_sq()); } // Calculate this variable's contribution to the h // refinement error if (!elem->parent()) { // For now, we'll always start with an h refinement h_error_per_cell[e_id] = std::numeric_limits<ErrorVectorReal>::max() / 2; } else { FEInterface::inverse_map (dim, fe_type, coarse, *xyz_values, coarse_qpoints); unsigned int old_parent_level = coarse->p_level(); (const_cast<Elem *>(coarse))->hack_p_level(elem->p_level()); fe_coarse->reinit(coarse, &coarse_qpoints); (const_cast<Elem *>(coarse))->hack_p_level(old_parent_level); // The number of DOFS on the coarse element unsigned int n_coarse_dofs = cast_int<unsigned int>(phi_coarse->size()); // Loop over the quadrature points for (unsigned int qp=0; qp<n_qp; qp++) { // The solution difference at the quadrature point Number value_error = libMesh::zero; Gradient grad_error; Tensor hessian_error; for (unsigned int i=0; i != n_dofs; ++i) { const dof_id_type dof_num = dof_indices[i]; value_error += (*phi)[i][qp] * system.current_solution(dof_num); if (cont == C_ZERO || cont == C_ONE) grad_error.add_scaled((*dphi)[i][qp], system.current_solution(dof_num)); // grad_error += (*dphi)[i][qp] * // system.current_solution(dof_num); if (cont == C_ONE) hessian_error.add_scaled((*d2phi)[i][qp], system.current_solution(dof_num)); // hessian_error += (*d2phi)[i][qp] * // system.current_solution(dof_num); } for (unsigned int i=0; i != n_coarse_dofs; ++i) { value_error -= (*phi_coarse)[i][qp] * Uc(i); if (cont == C_ZERO || cont == C_ONE) // grad_error -= (*dphi_coarse)[i][qp] * Uc(i); grad_error.subtract_scaled((*dphi_coarse)[i][qp], Uc(i)); if (cont == C_ONE) hessian_error.subtract_scaled((*d2phi_coarse)[i][qp], Uc(i)); // hessian_error -= (*d2phi_coarse)[i][qp] * Uc(i); } h_error_per_cell[e_id] += static_cast<ErrorVectorReal> (component_scale[var] * (*JxW)[qp] * TensorTools::norm_sq(value_error)); if (cont == C_ZERO || cont == C_ONE) h_error_per_cell[e_id] += static_cast<ErrorVectorReal> (component_scale[var] * (*JxW)[qp] * grad_error.norm_sq()); if (cont == C_ONE) h_error_per_cell[e_id] += static_cast<ErrorVectorReal> (component_scale[var] * (*JxW)[qp] * hessian_error.norm_sq()); } } } } // Now that we've got our approximations for p_error and h_error, let's see // if we want to switch any h refinement flags to p refinement // Iterate over all the active elements in the mesh // that live on this processor. MeshBase::element_iterator elem_it = mesh.active_local_elements_begin(); const MeshBase::element_iterator elem_end = mesh.active_local_elements_end(); for (; elem_it != elem_end; ++elem_it) { Elem * elem = *elem_it; // We're only checking elements that are already flagged for h // refinement if (elem->refinement_flag() != Elem::REFINE) continue; const dof_id_type e_id = elem->id(); unsigned int dofs_per_elem = 0, dofs_per_p_elem = 0; // Loop over all the variables in the system for (unsigned int var=0; var<n_vars; var++) { // The type of finite element to use for this variable const FEType & fe_type = dof_map.variable_type (var); // FIXME: we're overestimating the number of DOFs added by h // refinement FEType elem_fe_type = fe_type; elem_fe_type.order = static_cast<Order>(fe_type.order + elem->p_level()); dofs_per_elem += FEInterface::n_dofs(dim, elem_fe_type, elem->type()); elem_fe_type.order = static_cast<Order>(fe_type.order + elem->p_level() + 1); dofs_per_p_elem += FEInterface::n_dofs(dim, elem_fe_type, elem->type()); } const unsigned int new_h_dofs = dofs_per_elem * (elem->n_children() - 1); const unsigned int new_p_dofs = dofs_per_p_elem - dofs_per_elem; /* libMesh::err << "Cell " << e_id << ": h = " << elem->hmax() << ", p = " << elem->p_level() + 1 << "," << std::endl << " h_error = " << h_error_per_cell[e_id] << ", p_error = " << p_error_per_cell[e_id] << std::endl << " new_h_dofs = " << new_h_dofs << ", new_p_dofs = " << new_p_dofs << std::endl; */ const Real p_value = std::sqrt(p_error_per_cell[e_id]) * p_weight / new_p_dofs; const Real h_value = std::sqrt(h_error_per_cell[e_id]) / static_cast<Real>(new_h_dofs); if (p_value > h_value) { elem->set_p_refinement_flag(Elem::REFINE); elem->set_refinement_flag(Elem::DO_NOTHING); } } STOP_LOG("select_refinement()", "HPCoarsenTest"); }