void Response_Functional<panzer::Traits::Tangent>:: scatterResponse() { const int n = value.size(); const int num_deriv = this->numDeriv(); TEUCHOS_ASSERT(n == 0 || n == num_deriv); ScalarT glbValue = ScalarT(num_deriv, 0.0); // do global summation -- it is possible to do the reduceAll() on the Fad's directly, but it is somewhat // complicated for DFad (due to temporaries that might get created). Since this is just a sum, it is // easier to do the reduction for each value and derivative component. Teuchos::reduceAll(*this->getComm(), Teuchos::REDUCE_SUM, Thyra::Ordinal(1), &value.val(), &glbValue.val()); if (num_deriv > 0) Teuchos::reduceAll(*this->getComm(), Teuchos::REDUCE_SUM, Thyra::Ordinal(n), value.dx(), &glbValue.fastAccessDx(0)); value = glbValue; // copy data in vectors if(this->useEpetra()) { // use epetra Epetra_MultiVector& deriv = this->getEpetraMultiVector(); for (int i=0; i<num_deriv; ++i) deriv[i][0] = glbValue.dx(i); } else { // use thyra TEUCHOS_ASSERT(this->useThyra()); Thyra::ArrayRCP< Thyra::ArrayRCP<double> > deriv = this->getThyraMultiVector(); for (int i=0; i<num_deriv; ++i) deriv[i][0] = glbValue.dx(i); } }
void EquilibriumConcentrationBC<PHAL::AlbanyTraits::Tangent, Traits>:: evaluateFields(typename Traits::EvalData dirichletWorkset) { Teuchos::RCP<Tpetra_Vector> fT = dirichletWorkset.fT; Teuchos::RCP<Tpetra_MultiVector> fpT = dirichletWorkset.fpT; Teuchos::RCP<Tpetra_MultiVector> JVT = dirichletWorkset.JVT; Teuchos::RCP<const Tpetra_Vector> xT = dirichletWorkset.xT; Teuchos::RCP<const Tpetra_MultiVector> VxT = dirichletWorkset.VxT; Teuchos::ArrayRCP<const ST> VxT_constView; Teuchos::ArrayRCP<ST> fT_nonconstView; if (fT != Teuchos::null) fT_nonconstView = fT->get1dViewNonConst(); Teuchos::ArrayRCP<const ST> xT_constView = xT->get1dView(); const RealType j_coeff = dirichletWorkset.j_coeff; const std::vector<std::vector<int>>& nsNodes = dirichletWorkset.nodeSets->find(this->nodeSetID)->second; int cunk, punk; ScalarT Cval; ScalarT pressure; for (unsigned int inode = 0; inode < nsNodes.size(); inode++) { cunk = nsNodes[inode][this->coffset_]; punk = nsNodes[inode][this->poffset_]; pressure = xT_constView[punk]; this->computeBCs(pressure, Cval); if (fT != Teuchos::null) { fT_nonconstView[cunk] = xT_constView[cunk] - Cval.val(); } if (JVT != Teuchos::null) { Teuchos::ArrayRCP<ST> JVT_nonconstView; for (int i=0; i<dirichletWorkset.num_cols_x; i++) { JVT_nonconstView = JVT->getDataNonConst(i); VxT_constView = VxT->getData(i); JVT_nonconstView[cunk] = j_coeff*VxT_constView[cunk]; } } if (fpT != Teuchos::null) { Teuchos::ArrayRCP<ST> fpT_nonconstView; for (int i=0; i<dirichletWorkset.num_cols_p; i++) { fpT_nonconstView = fpT->getDataNonConst(i); fpT_nonconstView[cunk] = -Cval.dx(dirichletWorkset.param_offset+i); } } } }
void EquilibriumConcentrationBC<PHAL::AlbanyTraits::Jacobian, Traits>:: evaluateFields(typename Traits::EvalData dirichletWorkset) { Teuchos::RCP<Tpetra_Vector> fT = dirichletWorkset.fT; Teuchos::RCP<const Tpetra_Vector> xT = dirichletWorkset.xT; Teuchos::ArrayRCP<const ST> xT_constView = xT->get1dView(); Teuchos::RCP<Tpetra_CrsMatrix> jacT = dirichletWorkset.JacT; const RealType j_coeff = dirichletWorkset.j_coeff; const std::vector<std::vector<int>>& nsNodes = dirichletWorkset.nodeSets->find(this->nodeSetID)->second; bool fillResid = (fT != Teuchos::null); Teuchos::ArrayRCP<ST> fT_nonconstView; if (fillResid) fT_nonconstView = fT->get1dViewNonConst(); int cunk, punk; ScalarT Cval; ScalarT pressure; Teuchos::Array<LO> index(1); Teuchos::Array<ST> value(1); size_t numEntriesT; value[0] = j_coeff; Teuchos::Array<ST> matrixEntriesT; Teuchos::Array<LO> matrixIndicesT; for (unsigned int inode = 0; inode < nsNodes.size(); inode++) { cunk = nsNodes[inode][this->coffset_]; punk = nsNodes[inode][this->poffset_]; pressure = xT_constView[punk]; this->computeBCs(pressure, Cval); // replace jac values for the C dof numEntriesT = jacT->getNumEntriesInLocalRow(cunk); matrixEntriesT.resize(numEntriesT); matrixIndicesT.resize(numEntriesT); jacT->getLocalRowCopy(cunk, matrixIndicesT(), matrixEntriesT(), numEntriesT); for (int i=0; i<numEntriesT; i++) matrixEntriesT[i]=0; jacT->replaceLocalValues(cunk, matrixIndicesT(), matrixEntriesT()); index[0] = cunk; jacT->replaceLocalValues(cunk, index(), value()); if (fillResid) { fT_nonconstView[cunk] = xT_constView[cunk] - Cval.val(); } } }
//********************************************************************** TEUCHOS_UNIT_TEST(hessian_test,correctness) { typedef InputConditionsEvaluator<panzer::Traits::Hessian,panzer::Traits> InputCondEval; typedef HessianTestEvaluator<panzer::Traits::Hessian,panzer::Traits> HessTestEval; typedef panzer::Traits::HessianType ScalarT; typedef Sacado::ScalarValue<ScalarT> Value; using Teuchos::RCP; using Teuchos::rcp; // the one and only evaluator Teuchos::ParameterList empty_pl; RCP<InputCondEval> ic_eval = rcp(new InputCondEval(empty_pl)); RCP<HessTestEval> ht_eval = rcp(new HessTestEval(empty_pl)); Teuchos::RCP<PHX::FieldManager<panzer::Traits> > fm = Teuchos::rcp(new PHX::FieldManager<panzer::Traits>); fm->registerEvaluator<panzer::Traits::Hessian>(ic_eval); fm->registerEvaluator<panzer::Traits::Hessian>(ht_eval); fm->requireField<panzer::Traits::Hessian>(ht_eval->result.fieldTag()); std::vector<PHX::index_size_type> derivative_dimensions; derivative_dimensions.push_back(4); fm->setKokkosExtendedDataTypeDimensions<panzer::Traits::Hessian>(derivative_dimensions); panzer::Traits::SetupData setupData; fm->postRegistrationSetup(setupData); panzer::Workset workset; panzer::Traits::PreEvalData preEvalData; fm->preEvaluate<panzer::Traits::Hessian>(preEvalData); fm->evaluateFields<panzer::Traits::Hessian>(workset); fm->postEvaluate<panzer::Traits::Hessian>(0); for(int i=0;i<5;i++) { double x = Value::eval(ic_eval->x(i)); double y = Value::eval(ic_eval->y(i)); double dx = Value::eval(ic_eval->dx(i)); double dy = Value::eval(ic_eval->dy(i)); double f = func(x,y); std::vector<double> hess = hess_func(x,y,dx,dy); ScalarT r = ht_eval->result(i); TEST_EQUALITY(Value::eval(r),f); TEST_EQUALITY(r.fastAccessDx(0).fastAccessDx(0),hess[0]); TEST_EQUALITY(r.fastAccessDx(1).fastAccessDx(0),hess[1]); } }
//********************************************************************** TEUCHOS_UNIT_TEST(hessian_test_k,correctness) { typedef HessianType ScalarT; typedef Sacado::ScalarValue<ScalarT> Value; using Teuchos::RCP; using Teuchos::rcp; double x_val = 0.25; double y_val = 0.5; double dx_val = 2.0; double dy_val = 3.0; Kokkos::View<ScalarT*> x("x",5); Kokkos::View<ScalarT*> y("y",5); Kokkos::View<ScalarT*> dx("dx",5); Kokkos::View<ScalarT*> dy("dy",5); Kokkos::View<ScalarT*> result("result",5); for(int i=0;i<5;++i) { dx(i) = ScalarT(dx_val); dy(i) = ScalarT(dy_val); x(i) = seed_second_deriv(2,0,x_val,dx_val); y(i) = seed_second_deriv(2,1,y_val,dy_val); } for(int i=0;i<5;++i) result(i) = std::sin(x(i)*y(i))+0.25*std::cos(y(i)); for(int i=0;i<5;i++) { double x_val = Value::eval(x(i)); double y_val = Value::eval(y(i)); double dx_val = Value::eval(dx(i)); double dy_val = Value::eval(dy(i)); double f = func(x_val,y_val); std::vector<double> hess = hess_func(x_val,y_val,dx_val,dy_val); ScalarT r = result(i); TEST_EQUALITY(Value::eval(r),f); TEST_EQUALITY(r.fastAccessDx(0).fastAccessDx(0),hess[0]); TEST_EQUALITY(r.fastAccessDx(0).fastAccessDx(1),hess[1]); out << "RESULT = " << r << std::endl; } }
void panzer::ScatterResidual_Epetra<panzer::Traits::Hessian, TRAITS,LO,GO>:: evaluateFields(typename TRAITS::EvalData workset) { std::vector<int> cLIDs, rLIDs; std::vector<double> jacRow; bool useColumnIndexer = colGlobalIndexer_!=Teuchos::null; // for convenience pull out some objects from workset std::string blockId = this->wda(workset).block_id; const std::vector<std::size_t> & localCellIds = this->wda(workset).cell_local_ids; Teuchos::RCP<Epetra_Vector> r = epetraContainer_->get_f(); Teuchos::RCP<Epetra_CrsMatrix> Jac = epetraContainer_->get_A(); const Teuchos::RCP<const panzer::UniqueGlobalIndexer<LO,GO> >& colGlobalIndexer = useColumnIndexer ? colGlobalIndexer_ : globalIndexer_; // NOTE: A reordering of these loops will likely improve performance // The "getGIDFieldOffsets" may be expensive. However the // "getElementGIDs" can be cheaper. However the lookup for LIDs // may be more expensive! // scatter operation for each cell in workset for(std::size_t worksetCellIndex=0;worksetCellIndex<localCellIds.size();++worksetCellIndex) { std::size_t cellLocalId = localCellIds[worksetCellIndex]; rLIDs = globalIndexer_->getElementLIDs(cellLocalId); cLIDs = colGlobalIndexer->getElementLIDs(cellLocalId); if (Teuchos::nonnull(workset.other)) { const std::size_t other_cellLocalId = workset.other->cell_local_ids[worksetCellIndex]; const std::vector<int> other_cLIDs = colGlobalIndexer->getElementLIDs(other_cellLocalId); cLIDs.insert(cLIDs.end(), other_cLIDs.begin(), other_cLIDs.end()); } // loop over each field to be scattered for(std::size_t fieldIndex = 0; fieldIndex < scatterFields_.size(); fieldIndex++) { int fieldNum = fieldIds_[fieldIndex]; const std::vector<int> & elmtOffset = globalIndexer_->getGIDFieldOffsets(blockId,fieldNum); // loop over the basis functions (currently they are nodes) for(std::size_t rowBasisNum = 0; rowBasisNum < elmtOffset.size(); rowBasisNum++) { const ScalarT scatterField = (scatterFields_[fieldIndex])(worksetCellIndex,rowBasisNum); int rowOffset = elmtOffset[rowBasisNum]; int row = rLIDs[rowOffset]; // loop over the sensitivity indices: all DOFs on a cell jacRow.resize(scatterField.size()); for(int sensIndex=0;sensIndex<scatterField.size();++sensIndex) jacRow[sensIndex] = scatterField.fastAccessDx(sensIndex).fastAccessDx(0); { int err = Jac->SumIntoMyValues( row, std::min(cLIDs.size(), static_cast<size_t>(scatterField.size())), panzer::ptrFromStlVector(jacRow), panzer::ptrFromStlVector(cLIDs)); TEUCHOS_ASSERT_EQUALITY(err,0); } } // end rowBasisNum } // end fieldIndex } }
void GatherSolution<PHAL::AlbanyTraits::Tangent, Traits>:: evaluateFields(typename Traits::EvalData workset) { Teuchos::RCP<const Epetra_Vector> x = workset.x; Teuchos::RCP<const Epetra_Vector> xdot = workset.xdot; Teuchos::RCP<const Epetra_MultiVector> Vx = workset.Vx; Teuchos::RCP<const Epetra_MultiVector> Vxdot = workset.Vxdot; Teuchos::RCP<ParamVec> params = workset.params; int num_cols_tot = workset.param_offset + workset.num_cols_p; ScalarT* valptr; for (int cell=0; cell < workset.numCells; ++cell ) { const Teuchos::ArrayRCP<Teuchos::ArrayRCP<int> >& nodeID = workset.wsElNodeEqID[cell]; for (int node = 0; node < this->numNodes; ++node) { const Teuchos::ArrayRCP<int>& eqID = nodeID[node]; int n = 0, eq = 0; for (int j = eq; j < eq+this->numNodeVar; j++, ++n) { valptr = &(this->val[j])(cell,node); if (Vx != Teuchos::null && workset.j_coeff != 0.0) { *valptr = TanFadType(num_cols_tot, (*x)[eqID[n]]); for (int k=0; k<workset.num_cols_x; k++) valptr->fastAccessDx(k) = workset.j_coeff*(*Vx)[k][eqID[n]]; } else *valptr = TanFadType((*x)[eqID[n]]); } eq += this->numNodeVar; for (int level = 0; level < this->numLevels; level++) { for (int j = eq; j < eq+this->numVectorLevelVar; j++) { for (int dim = 0; dim < this->numDims; ++dim, ++n) { valptr = &(this->val[j])(cell,node,level,dim); if (Vx != Teuchos::null && workset.j_coeff != 0.0) { *valptr = TanFadType(num_cols_tot, (*x)[eqID[n]]); for (int k=0; k<workset.num_cols_x; k++) valptr->fastAccessDx(k) = workset.j_coeff*(*Vx)[k][eqID[n]]; } else *valptr = TanFadType((*x)[eqID[n]]); } } for (int j = eq+this->numVectorLevelVar; j < eq+this->numVectorLevelVar+this->numScalarLevelVar; j++, ++n) { valptr = &(this->val[j])(cell,node,level); if (Vx != Teuchos::null && workset.j_coeff != 0.0) { *valptr = TanFadType(num_cols_tot, (*x)[eqID[n]]); for (int k=0; k<workset.num_cols_x; k++) valptr->fastAccessDx(k) = workset.j_coeff*(*Vx)[k][eqID[n]]; } else *valptr = TanFadType((*x)[eqID[n]]); } } eq += this->numVectorLevelVar+this->numScalarLevelVar; for (int level = 0; level < this->numLevels; ++level) { for (int j = eq; j < eq+this->numTracerVar; ++j, ++n) { valptr = &(this->val[j])(cell,node,level); if (Vx != Teuchos::null && workset.j_coeff != 0.0) { *valptr = TanFadType(num_cols_tot, (*x)[eqID[n]]); for (int k=0; k<workset.num_cols_x; k++) valptr->fastAccessDx(k) = workset.j_coeff*(*Vx)[k][eqID[n]]; } else *valptr = TanFadType((*x)[eqID[n]]); } } eq += this->numTracerVar; if (workset.transientTerms) { int n = 0, eq = 0; for (int j = eq; j < eq+this->numNodeVar; j++, ++n) { valptr = &(this->val_dot[j])(cell,node); if (Vxdot != Teuchos::null && workset.m_coeff != 0.0) { *valptr = TanFadType(num_cols_tot, (*xdot)[eqID[n]]); for (int k=0; k<workset.num_cols_x; k++) valptr->fastAccessDx(k) = workset.m_coeff*(*Vxdot)[k][eqID[n]]; } else *valptr = TanFadType((*xdot)[eqID[n]]); } eq += this->numNodeVar; for (int level = 0; level < this->numLevels; level++) { for (int j = eq; j < eq+this->numVectorLevelVar; j++) { for (int dim = 0; dim < this->numDims; ++dim, ++n) { valptr = &(this->val_dot[j])(cell,node,level,dim); if (Vxdot != Teuchos::null && workset.m_coeff != 0.0) { *valptr = TanFadType(num_cols_tot, (*xdot)[eqID[n]]); for (int k=0; k<workset.num_cols_x; k++) valptr->fastAccessDx(k) = workset.m_coeff*(*Vxdot)[k][eqID[n]]; } else *valptr = TanFadType((*xdot)[eqID[n]]); } } for (int j = eq+this->numVectorLevelVar; j < eq+this->numScalarLevelVar+this->numScalarLevelVar; j++,++n) { valptr = &(this->val_dot[j])(cell,node,level); if (Vxdot != Teuchos::null && workset.m_coeff != 0.0) { *valptr = TanFadType(num_cols_tot, (*xdot)[eqID[n]]); for (int k=0; k<workset.num_cols_x; k++) valptr->fastAccessDx(k) = workset.m_coeff*(*Vxdot)[k][eqID[n]]; } else *valptr = TanFadType((*xdot)[eqID[n]]); } } eq += this->numVectorLevelVar+this->numScalarLevelVar; for (int level = 0; level < this->numLevels; ++level) { for (int j = eq; j < eq+this->numTracerVar; ++j, ++n) { valptr = &(this->val_dot[j])(cell,node,level); if (Vxdot != Teuchos::null && workset.m_coeff != 0.0) { *valptr = TanFadType(num_cols_tot, (*xdot)[eqID[n]]); for (int k=0; k<workset.num_cols_x; k++) valptr->fastAccessDx(k) = workset.m_coeff*(*Vxdot)[k][eqID[n]]; } else *valptr = TanFadType((*xdot)[eqID[n]]); } } eq += this->numTracerVar; } } } }
void GatherSolution<PHAL::AlbanyTraits::Jacobian, Traits>:: evaluateFields(typename Traits::EvalData workset) { const Teuchos::RCP<const Epetra_Vector> x = workset.x; const Teuchos::RCP<const Epetra_Vector> xdot = workset.xdot; for (int cell=0; cell < workset.numCells; ++cell ) { const Teuchos::ArrayRCP<Teuchos::ArrayRCP<int> >& nodeID = workset.wsElNodeEqID[cell]; const int neq = nodeID[0].size(); const int num_dof = neq * this->numNodes; for (int node = 0; node < this->numNodes; ++node) { const Teuchos::ArrayRCP<int>& eqID = nodeID[node]; const int firstunk = neq * node; int n = 0, eq = 0; for (int j = eq; j < eq+this->numNodeVar; ++j, ++n) { ScalarT* valptr = &(this->val[j])(cell,node); *valptr = FadType(num_dof, (*x)[eqID[n]]); valptr->setUpdateValue(!workset.ignore_residual); valptr->fastAccessDx(firstunk + n) = workset.j_coeff; } eq += this->numNodeVar; for (int level = 0; level < this->numLevels; level++) { for (int j = eq; j < eq+this->numVectorLevelVar; j++) { for (int dim = 0; dim < this->numDims; ++dim, ++n) { ScalarT* valptr = &(this->val[j])(cell,node,level,dim); *valptr = FadType(num_dof, (*x)[eqID[n]]); valptr->setUpdateValue(!workset.ignore_residual); valptr->fastAccessDx(firstunk + n) = workset.j_coeff; } } for (int j = eq+this->numVectorLevelVar; j < eq+this->numVectorLevelVar+this->numScalarLevelVar; ++j,++n) { ScalarT* valptr = &(this->val[j])(cell,node,level); *valptr = FadType(num_dof, (*x)[eqID[n]]); valptr->setUpdateValue(!workset.ignore_residual); valptr->fastAccessDx(firstunk + n) = workset.j_coeff; } } eq += this->numVectorLevelVar+this->numScalarLevelVar; for (int level = 0; level < this->numLevels; ++level) { for (int j = eq; j < eq+this->numTracerVar; ++j, ++n) { ScalarT* valptr = &(this->val[j])(cell,node,level); *valptr = FadType(num_dof, (*x)[eqID[n]]); valptr->setUpdateValue(!workset.ignore_residual); valptr->fastAccessDx(firstunk + n) = workset.j_coeff; } } eq += this->numTracerVar; if (workset.transientTerms) { int n = 0, eq = 0; for (int j = eq; j < eq+this->numNodeVar; ++j, ++n) { ScalarT* valptr = &(this->val_dot[j])(cell,node); *valptr = FadType(num_dof, (*xdot)[eqID[n]]); valptr->fastAccessDx(firstunk + n) = workset.m_coeff; } eq += this->numNodeVar; for (int level = 0; level < this->numLevels; level++) { for (int j = eq; j < eq+this->numVectorLevelVar; j++) { for (int dim = 0; dim < this->numDims; ++dim, ++n) { ScalarT* valptr = &(this->val_dot[j])(cell,node,level,dim); *valptr = FadType(num_dof, (*xdot)[eqID[n]]); valptr->fastAccessDx(firstunk + n) = workset.m_coeff; } } for (int j = eq+this->numVectorLevelVar; j < eq+this->numVectorLevelVar+this->numScalarLevelVar; j++,++n) { ScalarT* valptr = &(this->val_dot[j])(cell,node,level); *valptr = FadType(num_dof, (*xdot)[eqID[n]]); valptr->fastAccessDx(firstunk + n) = workset.m_coeff; } } eq += this->numVectorLevelVar+this->numScalarLevelVar; for (int level = 0; level < this->numLevels; ++level) { for (int j = eq; j < eq+this->numTracerVar; ++j, ++n) { ScalarT* valptr = &(this->val_dot[j])(cell,node,level); *valptr = FadType(num_dof, (*xdot)[eqID[n]]); valptr->fastAccessDx(firstunk + n) = workset.m_coeff; } } eq += this->numTracerVar; } } } }
void panzer::ScatterDirichletResidual_Tpetra<panzer::Traits::Jacobian, TRAITS,LO,GO,NodeT>:: evaluateFields(typename TRAITS::EvalData workset) { std::vector<GO> GIDs; // for convenience pull out some objects from workset std::string blockId = workset.block_id; const std::vector<std::size_t> & localCellIds = workset.cell_local_ids; Teuchos::RCP<typename LOC::VectorType> r = tpetraContainer_->get_f(); Teuchos::RCP<typename LOC::CrsMatrixType> Jac = tpetraContainer_->get_A(); Teuchos::ArrayRCP<double> r_array = r->get1dViewNonConst(); Teuchos::ArrayRCP<double> dc_array = dirichletCounter_->get1dViewNonConst(); // NOTE: A reordering of these loops will likely improve performance // The "getGIDFieldOffsets may be expensive. However the // "getElementGIDs" can be cheaper. However the lookup for LIDs // may be more expensive! // scatter operation for each cell in workset for(std::size_t worksetCellIndex=0;worksetCellIndex<localCellIds.size();++worksetCellIndex) { std::size_t cellLocalId = localCellIds[worksetCellIndex]; globalIndexer_->getElementGIDs(cellLocalId,GIDs); const std::vector<LO> & LIDs = globalIndexer_->getElementLIDs(cellLocalId); // loop over each field to be scattered for(std::size_t fieldIndex = 0; fieldIndex < scatterFields_.size(); fieldIndex++) { int fieldNum = fieldIds_[fieldIndex]; // this call "should" get the right ordering according to the Intrepid basis const std::pair<std::vector<int>,std::vector<int> > & indicePair = globalIndexer_->getGIDFieldOffsets_closure(blockId,fieldNum, side_subcell_dim_, local_side_id_); const std::vector<int> & elmtOffset = indicePair.first; const std::vector<int> & basisIdMap = indicePair.second; // loop over basis functions for(std::size_t basis=0;basis<elmtOffset.size();basis++) { int offset = elmtOffset[basis]; LO lid = LIDs[offset]; if(lid<0) // not on this processor continue; int basisId = basisIdMap[basis]; if (checkApplyBC_) if (!applyBC_[fieldIndex](worksetCellIndex,basisId)) continue; // zero out matrix row { std::size_t sz = Jac->getNumEntriesInLocalRow(lid); std::size_t numEntries = 0; Teuchos::Array<LO> rowIndices(sz); Teuchos::Array<double> rowValues(sz); // Jac->getLocalRowView(lid,numEntries,rowValues,rowIndices); Jac->getLocalRowCopy(lid,rowIndices,rowValues,numEntries); for(std::size_t i=0;i<numEntries;i++) rowValues[i] = 0.0; Jac->replaceLocalValues(lid,rowIndices,rowValues); } GO gid = GIDs[offset]; const ScalarT scatterField = (scatterFields_[fieldIndex])(worksetCellIndex,basisId); r_array[lid] = scatterField.val(); dc_array[lid] = 1.0; // mark row as dirichlet // loop over the sensitivity indices: all DOFs on a cell std::vector<double> jacRow(scatterField.size(),0.0); for(int sensIndex=0;sensIndex<scatterField.size();++sensIndex) jacRow[sensIndex] = scatterField.fastAccessDx(sensIndex); TEUCHOS_ASSERT(jacRow.size()==GIDs.size()); Jac->replaceGlobalValues(gid, GIDs, jacRow); } } } }
void panzer::ScatterDirichletResidual_BlockedTpetra<panzer::Traits::Jacobian, TRAITS,LO,GO,NodeT>:: evaluateFields(typename TRAITS::EvalData workset) { using Teuchos::RCP; using Teuchos::ArrayRCP; using Teuchos::ptrFromRef; using Teuchos::rcp_dynamic_cast; using Thyra::VectorBase; using Thyra::SpmdVectorBase; using Thyra::ProductVectorBase; using Thyra::BlockedLinearOpBase; std::vector<std::pair<int,GO> > GIDs; std::vector<LO> LIDs; // for convenience pull out some objects from workset std::string blockId = this->wda(workset).block_id; const std::vector<std::size_t> & localCellIds = this->wda(workset).cell_local_ids; RCP<ProductVectorBase<double> > r = rcp_dynamic_cast<ProductVectorBase<double> >(blockedContainer_->get_f()); Teuchos::RCP<BlockedLinearOpBase<double> > Jac = rcp_dynamic_cast<BlockedLinearOpBase<double> >(blockedContainer_->get_A()); int numFieldBlocks = globalIndexer_->getNumFieldBlocks(); std::vector<int> blockOffsets(numFieldBlocks+1); // number of fields, plus a sentinnel for(int blk=0; blk<numFieldBlocks; blk++) { int blockOffset = globalIndexer_->getBlockGIDOffset(blockId,blk); blockOffsets[blk] = blockOffset; } std::unordered_map<std::pair<int,int>,Teuchos::RCP<CrsMatrixType>,panzer::pair_hash> jacTpetraBlocks; // NOTE: A reordering of these loops will likely improve performance // The "getGIDFieldOffsets may be expensive. However the // "getElementGIDs" can be cheaper. However the lookup for LIDs // may be more expensive! // scatter operation for each cell in workset for(std::size_t worksetCellIndex=0; worksetCellIndex<localCellIds.size(); ++worksetCellIndex) { std::size_t cellLocalId = localCellIds[worksetCellIndex]; globalIndexer_->getElementGIDs(cellLocalId,GIDs); blockOffsets[numFieldBlocks] = GIDs.size(); // caculate the local IDs for this element LIDs.resize(GIDs.size()); for(std::size_t i=0; i<GIDs.size(); i++) { // used for doing local ID lookups RCP<const MapType> r_map = blockedContainer_->getMapForBlock(GIDs[i].first); LIDs[i] = r_map->getLocalElement(GIDs[i].second); } // loop over each field to be scattered Teuchos::ArrayRCP<double> local_r, local_dc; for(std::size_t fieldIndex = 0; fieldIndex < scatterFields_.size(); fieldIndex++) { int fieldNum = fieldIds_[fieldIndex]; int blockRowIndex = globalIndexer_->getFieldBlock(fieldNum); RCP<SpmdVectorBase<double> > dc = rcp_dynamic_cast<SpmdVectorBase<double> >(dirichletCounter_->getNonconstVectorBlock(blockRowIndex)); dc->getNonconstLocalData(ptrFromRef(local_dc)); // grab local data for inputing RCP<SpmdVectorBase<double> > block_r = rcp_dynamic_cast<SpmdVectorBase<double> >(r->getNonconstVectorBlock(blockRowIndex)); block_r->getNonconstLocalData(ptrFromRef(local_r)); // this call "should" get the right ordering according to the Intrepid basis const std::pair<std::vector<int>,std::vector<int> > & indicePair = globalIndexer_->getGIDFieldOffsets_closure(blockId,fieldNum, side_subcell_dim_, local_side_id_); const std::vector<int> & elmtOffset = indicePair.first; const std::vector<int> & basisIdMap = indicePair.second; // loop over basis functions for(std::size_t basis=0; basis<elmtOffset.size(); basis++) { int offset = elmtOffset[basis]; int lid = LIDs[offset]; if(lid<0) // not on this processor continue; int basisId = basisIdMap[basis]; if (checkApplyBC_) if (!applyBC_[fieldIndex](worksetCellIndex,basisId)) continue; // zero out matrix row for(int blockColIndex=0; blockColIndex<numFieldBlocks; blockColIndex++) { int start = blockOffsets[blockColIndex]; int end = blockOffsets[blockColIndex+1]; if(end-start<=0) continue; // check hash table for jacobian sub block std::pair<int,int> blockIndex = std::make_pair(blockRowIndex,blockColIndex); Teuchos::RCP<CrsMatrixType> subJac = jacTpetraBlocks[blockIndex]; // if you didn't find one before, add it to the hash table if(subJac==Teuchos::null) { Teuchos::RCP<Thyra::LinearOpBase<double> > tOp = Jac->getNonconstBlock(blockIndex.first,blockIndex.second); // block operator is null, don't do anything (it is excluded) if(Teuchos::is_null(tOp)) continue; Teuchos::RCP<OperatorType> tpetra_Op = rcp_dynamic_cast<ThyraLinearOp>(tOp)->getTpetraOperator(); subJac = rcp_dynamic_cast<CrsMatrixType>(tpetra_Op,true); jacTpetraBlocks[blockIndex] = subJac; } std::size_t sz = subJac->getNumEntriesInLocalRow(lid); std::size_t numEntries = 0; Teuchos::Array<LO> rowIndices(sz); Teuchos::Array<double> rowValues(sz); subJac->getLocalRowCopy(lid,rowIndices,rowValues,numEntries); for(std::size_t i=0; i<numEntries; i++) rowValues[i] = 0.0; subJac->replaceLocalValues(lid,rowIndices,rowValues); } const ScalarT scatterField = (scatterFields_[fieldIndex])(worksetCellIndex,basisId); local_r[lid] = scatterField.val(); local_dc[lid] = 1.0; // mark row as dirichlet // loop over the sensitivity indices: all DOFs on a cell std::vector<double> jacRow(scatterField.size(),0.0); for(int sensIndex=0; sensIndex<scatterField.size(); ++sensIndex) jacRow[sensIndex] = scatterField.fastAccessDx(sensIndex); TEUCHOS_ASSERT(jacRow.size()==GIDs.size()); for(int blockColIndex=0; blockColIndex<numFieldBlocks; blockColIndex++) { int start = blockOffsets[blockColIndex]; int end = blockOffsets[blockColIndex+1]; if(end-start<=0) continue; // check hash table for jacobian sub block std::pair<int,int> blockIndex = std::make_pair(blockRowIndex,blockColIndex); Teuchos::RCP<CrsMatrixType> subJac = jacTpetraBlocks[blockIndex]; // if you didn't find one before, add it to the hash table if(subJac==Teuchos::null) { Teuchos::RCP<Thyra::LinearOpBase<double> > tOp = Jac->getNonconstBlock(blockIndex.first,blockIndex.second); // block operator is null, don't do anything (it is excluded) if(Teuchos::is_null(tOp)) continue; Teuchos::RCP<OperatorType> tpetra_Op = rcp_dynamic_cast<ThyraLinearOp>(tOp)->getTpetraOperator(); subJac = rcp_dynamic_cast<CrsMatrixType>(tpetra_Op,true); jacTpetraBlocks[blockIndex] = subJac; } // Sum Jacobian subJac->replaceLocalValues(lid, Teuchos::arrayViewFromVector(LIDs).view(start,end-start), Teuchos::arrayViewFromVector(jacRow).view(start,end-start)); } } } } }
void panzer::ScatterResidual_BlockedEpetra<panzer::Traits::Jacobian, TRAITS,LO,GO>:: evaluateFields(typename TRAITS::EvalData workset) { using Teuchos::RCP; using Teuchos::ArrayRCP; using Teuchos::ptrFromRef; using Teuchos::rcp_dynamic_cast; using Thyra::VectorBase; using Thyra::SpmdVectorBase; using Thyra::ProductVectorBase; using Thyra::BlockedLinearOpBase; typedef BlockedEpetraLinearObjContainer BLOC; std::vector<std::pair<int,GO> > GIDs; std::vector<LO> LIDs; std::vector<double> jacRow; // for convenience pull out some objects from workset std::string blockId = this->wda(workset).block_id; const std::vector<std::size_t> & localCellIds = this->wda(workset).cell_local_ids; RCP<const BLOC> blockedContainer = blockedContainer_; RCP<ProductVectorBase<double> > r = rcp_dynamic_cast<ProductVectorBase<double> >(blockedContainer->get_f()); Teuchos::RCP<BlockedLinearOpBase<double> > Jac = rcp_dynamic_cast<BlockedLinearOpBase<double> >(blockedContainer->get_A()); int numFieldBlocks = globalIndexer_->getNumFieldBlocks(); std::vector<int> blockOffsets(numFieldBlocks+1); // number of fields, plus a sentinnel for(int blk=0;blk<numFieldBlocks;blk++) { int blockOffset = globalIndexer_->getBlockGIDOffset(blockId,blk); blockOffsets[blk] = blockOffset; } std::unordered_map<std::pair<int,int>,Teuchos::RCP<Epetra_CrsMatrix>,panzer::pair_hash> jacEpetraBlocks; // NOTE: A reordering of these loops will likely improve performance // The "getGIDFieldOffsets" may be expensive. However the // "getElementGIDs" can be cheaper. However the lookup for LIDs // may be more expensive! // scatter operation for each cell in workset for(std::size_t worksetCellIndex=0;worksetCellIndex<localCellIds.size();++worksetCellIndex) { std::size_t cellLocalId = localCellIds[worksetCellIndex]; globalIndexer_->getElementGIDs(cellLocalId,GIDs,blockId); // caculate the local IDs for this element LIDs.resize(GIDs.size()); for(std::size_t i=0;i<GIDs.size();i++) { // used for doing local ID lookups RCP<const Epetra_Map> r_map = blockedContainer->getMapForBlock(GIDs[i].first); LIDs[i] = r_map->LID(GIDs[i].second); } // loop over each field to be scattered Teuchos::ArrayRCP<double> local_r; for(std::size_t fieldIndex = 0; fieldIndex < scatterFields_.size(); fieldIndex++) { int fieldNum = fieldIds_[fieldIndex]; int blockRowIndex = globalIndexer_->getFieldBlock(fieldNum); // grab local data for inputing if(r!=Teuchos::null) { RCP<SpmdVectorBase<double> > block_r = rcp_dynamic_cast<SpmdVectorBase<double> >(r->getNonconstVectorBlock(blockRowIndex)); block_r->getNonconstLocalData(ptrFromRef(local_r)); } const std::vector<int> & elmtOffset = globalIndexer_->getGIDFieldOffsets(blockId,fieldNum); // loop over the basis functions (currently they are nodes) for(std::size_t rowBasisNum = 0; rowBasisNum < elmtOffset.size(); rowBasisNum++) { const ScalarT scatterField = (scatterFields_[fieldIndex])(worksetCellIndex,rowBasisNum); int rowOffset = elmtOffset[rowBasisNum]; int r_lid = LIDs[rowOffset]; // Sum residual if(local_r!=Teuchos::null) local_r[r_lid] += (scatterField.val()); blockOffsets[numFieldBlocks] = scatterField.size(); // add the sentinel // loop over the sensitivity indices: all DOFs on a cell jacRow.resize(scatterField.size()); // For Neumann conditions with no dependence on degrees of freedom, there should be no Jacobian contribution if(scatterField.size() == 0) continue; for(int sensIndex=0;sensIndex<scatterField.size();++sensIndex) { jacRow[sensIndex] = scatterField.fastAccessDx(sensIndex); } for(int blockColIndex=0;blockColIndex<numFieldBlocks;blockColIndex++) { int start = blockOffsets[blockColIndex]; int end = blockOffsets[blockColIndex+1]; if(end-start<=0) continue; // check hash table for jacobian sub block std::pair<int,int> blockIndex = std::make_pair(blockRowIndex,blockColIndex); Teuchos::RCP<Epetra_CrsMatrix> subJac = jacEpetraBlocks[blockIndex]; // if you didn't find one before, add it to the hash table if(subJac==Teuchos::null) { Teuchos::RCP<Thyra::LinearOpBase<double> > tOp = Jac->getNonconstBlock(blockIndex.first,blockIndex.second); // block operator is null, don't do anything (it is excluded) if(Teuchos::is_null(tOp)) continue; Teuchos::RCP<Epetra_Operator> eOp = Thyra::get_Epetra_Operator(*tOp); subJac = rcp_dynamic_cast<Epetra_CrsMatrix>(eOp,true); jacEpetraBlocks[blockIndex] = subJac; } // Sum Jacobian int err = subJac->SumIntoMyValues(r_lid, end-start, &jacRow[start],&LIDs[start]); if(err!=0) { RCP<const Epetra_Map> rr = blockedContainer->getMapForBlock(GIDs[start].first); bool sameColMap = subJac->ColMap().SameAs(*rr); std::stringstream ss; ss << "Failed inserting row: " << GIDs[rowOffset].second << " (" << r_lid << "): "; for(int i=start;i<end;i++) ss << GIDs[i].second << " (" << LIDs[i] << ") "; ss << std::endl; ss << "Into block " << blockRowIndex << ", " << blockColIndex << std::endl; ss << "scatter field = "; scatterFields_[fieldIndex].print(ss); ss << std::endl; ss << "Same map = " << (sameColMap ? "true" : "false") << std::endl; TEUCHOS_TEST_FOR_EXCEPTION(err!=0,std::runtime_error,ss.str()); } } } // end rowBasisNum } // end fieldIndex } }
double QCAD::EvaluatorTools<PHAL::AlbanyTraits::Tangent, Traits>:: getDoubleValue(const ScalarT& t) const { return t.val(); }
void panzer::ScatterDirichletResidual_Tpetra<panzer::Traits::Tangent, TRAITS,LO,GO,NodeT>:: evaluateFields(typename TRAITS::EvalData workset) { std::vector<GO> GIDs; std::vector<LO> LIDs; // for convenience pull out some objects from workset std::string blockId = this->wda(workset).block_id; const std::vector<std::size_t> & localCellIds = this->wda(workset).cell_local_ids; Teuchos::RCP<typename LOC::VectorType> r = (!scatterIC_) ? tpetraContainer_->get_f() : tpetraContainer_->get_x(); Teuchos::ArrayRCP<double> r_array = r->get1dViewNonConst(); Teuchos::ArrayRCP<double> dc_array = dirichletCounter_->get1dViewNonConst(); // NOTE: A reordering of these loops will likely improve performance // The "getGIDFieldOffsets may be expensive. However the // "getElementGIDs" can be cheaper. However the lookup for LIDs // may be more expensive! // scatter operation for each cell in workset for(std::size_t worksetCellIndex=0;worksetCellIndex<localCellIds.size();++worksetCellIndex) { std::size_t cellLocalId = localCellIds[worksetCellIndex]; globalIndexer_->getElementGIDs(cellLocalId,GIDs); // caculate the local IDs for this element LIDs.resize(GIDs.size()); for(std::size_t i=0;i<GIDs.size();i++) LIDs[i] = r->getMap()->getLocalElement(GIDs[i]); // loop over each field to be scattered for(std::size_t fieldIndex = 0; fieldIndex < scatterFields_.size(); fieldIndex++) { int fieldNum = fieldIds_[fieldIndex]; if (!scatterIC_) { // this call "should" get the right ordering according to the Intrepid2 basis const std::pair<std::vector<int>,std::vector<int> > & indicePair = globalIndexer_->getGIDFieldOffsets_closure(blockId,fieldNum, side_subcell_dim_, local_side_id_); const std::vector<int> & elmtOffset = indicePair.first; const std::vector<int> & basisIdMap = indicePair.second; // loop over basis functions for(std::size_t basis=0;basis<elmtOffset.size();basis++) { int offset = elmtOffset[basis]; LO lid = LIDs[offset]; if(lid<0) // not on this processor! continue; int basisId = basisIdMap[basis]; if (checkApplyBC_) if (!applyBC_[fieldIndex](worksetCellIndex,basisId)) continue; ScalarT value = (scatterFields_[fieldIndex])(worksetCellIndex,basisId); //r_array[lid] = (scatterFields_[fieldIndex])(worksetCellIndex,basisId).val(); // then scatter the sensitivity vectors if(value.size()==0) for(std::size_t d=0;d<dfdp_vectors_.size();d++) dfdp_vectors_[d][lid] = 0.0; else for(int d=0;d<value.size();d++) { dfdp_vectors_[d][lid] = value.fastAccessDx(d); } // record that you set a dirichlet condition dc_array[lid] = 1.0; } } else { // this call "should" get the right ordering according to the Intrepid2 basis const std::vector<int> & elmtOffset = globalIndexer_->getGIDFieldOffsets(blockId,fieldNum); // loop over basis functions for(std::size_t basis=0;basis<elmtOffset.size();basis++) { int offset = elmtOffset[basis]; LO lid = LIDs[offset]; if(lid<0) // not on this processor! continue; ScalarT value = (scatterFields_[fieldIndex])(worksetCellIndex,basis); //r_array[lid] = (scatterFields_[fieldIndex])(worksetCellIndex,basis).val(); // then scatter the sensitivity vectors if(value.size()==0) for(std::size_t d=0;d<dfdp_vectors_.size();d++) dfdp_vectors_[d][lid] = 0.0; else for(int d=0;d<value.size();d++) { dfdp_vectors_[d][lid] = value.fastAccessDx(d); } // record that you set a dirichlet condition dc_array[lid] = 1.0; } } } } }