// quad tests TEUCHOS_UNIT_TEST(tCubeHexMeshDOFManager, buildTest_hex) { // build global (or serial communicator) #ifdef HAVE_MPI stk::ParallelMachine Comm = MPI_COMM_WORLD; #else stk::ParallelMachine Comm = WHAT_TO_DO_COMM; #endif int numProcs = stk::parallel_machine_size(Comm); int myRank = stk::parallel_machine_rank(Comm); TEUCHOS_ASSERT(numProcs<=2); // build a geometric pattern from a single basis RCP<const panzer::FieldPattern> patternC1 = buildFieldPattern<Intrepid::Basis_HGRAD_HEX_C1_FEM<double,FieldContainer> >(); Teuchos::RCP<panzer_stk::STK_Interface> mesh = buildQuadMesh(Comm,2,2,2,1,1,1); RCP<panzer::ConnManager<int,int> > connManager = Teuchos::rcp(new panzer_stk::STKConnManager(mesh)); RCP<panzer::DOFManager<int,int> > dofManager = rcp(new panzer::DOFManager<int,int>()); TEST_EQUALITY(dofManager->getOrientationsRequired(),false); TEST_EQUALITY(dofManager->getConnManager(),Teuchos::null); dofManager->setConnManager(connManager,MPI_COMM_WORLD); TEST_EQUALITY(dofManager->getConnManager(),connManager); dofManager->addField("ux",patternC1); dofManager->addField("uy",patternC1); dofManager->addField("p",patternC1); std::vector<std::string> fieldOrder; fieldOrder.push_back("ux"); fieldOrder.push_back("uy"); fieldOrder.push_back("p"); dofManager->setFieldOrder(fieldOrder); dofManager->buildGlobalUnknowns(); dofManager->printFieldInformation(out); if(numProcs==1) { std::vector<int> gids_v; int * gids = 0; TEST_ASSERT(false); // element 0 dofManager->getElementGIDs(0,gids_v); gids = &gids_v[0]; TEST_EQUALITY(gids_v.size(),24); TEST_EQUALITY(gids[0],0); TEST_EQUALITY(gids[1],1); TEST_EQUALITY(gids[2],2); TEST_EQUALITY(gids[3],3); TEST_EQUALITY(gids[4],4); TEST_EQUALITY(gids[5],5); TEST_EQUALITY(gids[6],12); TEST_EQUALITY(gids[7],13); TEST_EQUALITY(gids[8],14); TEST_EQUALITY(gids[9], 9); TEST_EQUALITY(gids[10],10); TEST_EQUALITY(gids[11],11); gids = &gids_v[12]; TEST_EQUALITY(gids[0],27); TEST_EQUALITY(gids[1],28); TEST_EQUALITY(gids[2],29); TEST_EQUALITY(gids[3],30); TEST_EQUALITY(gids[4],31); TEST_EQUALITY(gids[5],32); TEST_EQUALITY(gids[6],39); TEST_EQUALITY(gids[7],40); TEST_EQUALITY(gids[8],41); TEST_EQUALITY(gids[9],36); TEST_EQUALITY(gids[10],37); TEST_EQUALITY(gids[11],38); // element 6 dofManager->getElementGIDs(mesh->elementLocalId(5),gids_v); gids = &gids_v[0]; TEST_EQUALITY(gids_v.size(),24); TEST_EQUALITY(gids[0],27); TEST_EQUALITY(gids[1],28); TEST_EQUALITY(gids[2],29); TEST_EQUALITY(gids[3],30); TEST_EQUALITY(gids[4],31); TEST_EQUALITY(gids[5],32); TEST_EQUALITY(gids[6],39); TEST_EQUALITY(gids[7],40); TEST_EQUALITY(gids[8],41); TEST_EQUALITY(gids[9],36); TEST_EQUALITY(gids[10],37); TEST_EQUALITY(gids[11],38); gids = &gids_v[12]; TEST_EQUALITY(gids[0],54); TEST_EQUALITY(gids[1],55); TEST_EQUALITY(gids[2],56); TEST_EQUALITY(gids[3],57); TEST_EQUALITY(gids[4],58); TEST_EQUALITY(gids[5],59); TEST_EQUALITY(gids[6],66); TEST_EQUALITY(gids[7],67); TEST_EQUALITY(gids[8],68); TEST_EQUALITY(gids[9],63); TEST_EQUALITY(gids[10],64); TEST_EQUALITY(gids[11],65); } else if(myRank==0) { // element 7 const int * gids = connManager->getConnectivity(mesh->elementLocalId(7)); TEST_EQUALITY(connManager->getConnectivitySize(mesh->elementLocalId(7)),8); TEST_EQUALITY(gids[0],12); TEST_EQUALITY(gids[1],13); TEST_EQUALITY(gids[2],16); TEST_EQUALITY(gids[3],15); TEST_EQUALITY(gids[4],21); TEST_EQUALITY(gids[5],22); TEST_EQUALITY(gids[6],25); TEST_EQUALITY(gids[7],24); } else if(myRank==1) { // element 2 const int * gids = connManager->getConnectivity(mesh->elementLocalId(2)); TEST_EQUALITY(connManager->getConnectivitySize(mesh->elementLocalId(2)),8); TEST_EQUALITY(gids[0],1); TEST_EQUALITY(gids[1],2); TEST_EQUALITY(gids[2],5); TEST_EQUALITY(gids[3],4); TEST_EQUALITY(gids[4],10); TEST_EQUALITY(gids[5],11); TEST_EQUALITY(gids[6],14); TEST_EQUALITY(gids[7],13); } }
int main(int argc, char* argv[]) { #ifdef HAVE_MPI MPI::Init(argc, argv); #endif RCP<MxComm> myComm = rcp(new MxComm()); #if 0 #ifdef HAVE_MPI MPI::Init(argc, argv); //MPI_Init(argc, argv); Epetra_MpiComm myComm(MPI_COMM_WORLD); #else Epetra_SerialComm myComm; #endif #endif // input file method #if 1 std::string inFile; Teuchos::CommandLineProcessor cmdp(false, true); cmdp.setOption("infile", &inFile, "XML format input file."); if (cmdp.parse(argc,argv) != Teuchos::CommandLineProcessor::PARSE_SUCCESSFUL) { return -1; } if (inFile == "") { std::cout << "Please specify an input file using --infile=your_file.mx\n"; exit(0); } // now read the input file with trilinos XML reader Teuchos::XMLObject xmlObj(Teuchos::FileInputSource(inFile).getObject()); // get simulation dimension int dim = atoi(MxUtil::XML::getAttr("dim", xmlObj).c_str()); if (dim < 1 or dim > 3) { std::cout << "Simulation dimension invalid or not given, using 3D.\n"; dim = 3; } // get simulation type std::string domain = MxUtil::XML::getAttr("domain", xmlObj).c_str(); if (domain != "frequency" and domain != "time") { std::cout << "Simulation domain invalid or not given, using frequency-domain.\n"; domain = "frequency"; } // create problem MxProblem<1> * prob1d; MxProblem<2> * prob2d; MxProblem<3> * prob3d; switch (dim) { case 1: prob1d = new MxProblem<1>(xmlObj, myComm); prob1d->solve(); delete prob1d; break; case 2: prob2d = new MxProblem<2>(xmlObj, myComm); prob2d->solve(); delete prob2d; break; case 3: prob3d = new MxProblem<3>(xmlObj, myComm); prob3d->solve(); delete prob3d; break; } #endif #if 0 // epetra stuff test MxMap map(10, 0, myComm); Epetra_CrsMatrix mat(Copy, map, 0); int ind = 2; double val = 0; mat.InsertGlobalValues(1, 1, &val, &ind); ind = 3; val = 4; mat.InsertGlobalValues(1, 1, &val, &ind); mat.FillComplete(map, map); Epetra_Vector myvec(map); myvec.Random(); std::cout << myvec; mat.Apply(myvec, myvec); std::cout << myvec; Epetra_CrsMatrix copy(mat); std::cout << mat; MxUtil::Epetra::stripZeros(mat); std::cout << mat; //throw 1; #endif typedef MxDimVector<double, 3> vecd3; typedef MxDimVector<int, 3> veci3; vecd3 midPt(0); #if 0 //std::cout << "Crab cavity setup:\n"; int crabNumCells = 4; double crabCellLen = 2.0 * 0.0192; //meters double crabCavRad = 0.04719; double crabIrisRad = 0.015; double crabCavRho = 0.0136; double crabIrisRho = 0.00331; int crabCellRes = 40; int padCells = 2; int cnx, cny, cnz; double clx, cly, clz; double cox, coy, coz; double crabDelta = crabCellLen / double(crabCellRes); cnz = crabNumCells * crabCellRes + 2 * padCells; clz = double(cnz) * crabDelta; coz = -0.5 * clz; cny = cnx = 2 * (int(ceil(crabCavRad / crabDelta)) + padCells); cly = clx = double(cnx) * crabDelta; coy = cox = -0.5 * clx; veci3 crabN; crabN[0] = cnx; crabN[1] = cny; crabN[2] = cnz; vecd3 crabL; crabL[0] = clx; crabL[1] = cly; crabL[2] = clz; vecd3 crabO; crabO[0] = cox; crabO[1] = coy; crabO[2] = coz; //crabN.print(); //crabL.print(); //crabO.print(); MxGrid<3> crabGrid(crabO, crabN, crabL, &myComm); crabGrid.print(); MxCrabCav crabCav(midPt, crabNumCells, crabCellLen, crabIrisRad, crabCavRad, crabIrisRho, crabCavRho); crabCav.save(crabGrid); Teuchos::ParameterList crabList; crabList.set("geo-mg : levels", 1); crabList.set("geo-mg : smoothers : sweeps", 5); crabList.set("amg : smoothers : sweeps", 1); crabList.set("amg : smoothers : type", "Chebyshev"); crabList.set("eigensolver : output", 2); crabList.set("eigensolver : nev", 15); crabList.set("eigensolver : tol", 1.e-8); crabList.set("eigensolver : block size", 2); crabList.set("eigensolver : num blocks", 30); crabList.set("eigensolver : spectrum", "LM"); crabList.set("wave operator : invert", true); crabList.set("wave operator : invert : tol", 1.e-10); //crabList.set("wave operator : invert : shift", 1000.0); crabList.set("wave operator : invert : max basis size", 40); MxEMSim<dim> crabSim; crabSim.setGrid(&crabGrid); crabSim.setPEC(&crabCav); //crabSim.setGrid(&sphGrid); //crabSim.setPEC(&ell); crabSim.setParameters(crabList); crabSim.setup(); MxSolver<dim> * solver; solver = new MxSolver<dim>(&crabSim, crabList); solver->solve(); delete solver; //return 1; #endif // optimized phc cavity #if 0 double rodRad = 0.003175; // meters const int numRods = 24; double rodx[numRods] = {0.0158406582694, 0.0551748491968, 0.0209567636489, 0.0384658321918, 0.00792032913471, 0.0338604938991, 0.00477355412058, 0.00485955186622, -0.00792032913471, -0.0213143552977, -0.0161832095283, -0.0336062803256, -0.0158406582694, -0.0551748491968, -0.0209567636489, -0.0384658321918, -0.00792032913471, -0.0338604938991, -0.00477355412058, -0.00485955186622, 0.00792032913471, 0.0213143552977, 0.0161832095283, 0.0336062803256}; double rody[numRods] = {0.0, -0.00724351649877, 0.006587367621, 0.0165969314144, 0.013718412474, 0.044161062805, 0.0214427735115, 0.041610853563, 0.013718412474, 0.0514045793038, 0.0148554058905, 0.0250139221487, 1.9399211446e-18, 0.00724351649877, -0.006587367621, -0.0165969314144, -0.013718412474, -0.044161062805, -0.0214427735115, -0.041610853563, -0.013718412474, -0.0514045793038, -0.0148554058905, -0.0250139221487}; std::vector<MxShape<3> *> rods; MxShapeUnion<3> rodsShape; vecd3 rodPos; vecd3 zhat(0); zhat[2] = 1.0; for (int i = 0; i < numRods; i++) { rodPos[0] = rodx[i]; rodPos[1] = rody[i]; rodPos[2] = 0.0; rods.push_back(new MxCylinder(rodPos, zhat, rodRad)); rodsShape.add(rods[i]); } MxDimMatrix<double, 3> sapphEps(0); sapphEps(0, 0) = 9.3; sapphEps(1, 1) = 9.3; sapphEps(2, 2) = 11.5; MxDielectric<3> phcDiel; phcDiel.add(&rodsShape, sapphEps); // conducting cavity double cavLen = 0.019624116824498831; double cavRad = 0.1; MxCylinder cavCyl(0, zhat, cavRad); MxSlab<3> cavCaps(0, zhat, cavLen); MxShapeIntersection<3> phcCav; phcCav.add(&cavCyl); phcCav.add(&cavCaps); // setup grid int rodDiaCells = 6; int pad = 2; double delta = 2.0 * rodRad / double(rodDiaCells); veci3 phcN; phcN[0] = phcN[1] = int(2.0 * cavRad / delta) + 2 * pad; phcN[2] = int(cavLen / delta) + 2 * pad; vecd3 phcL; phcL[0] = phcL[1] = delta * double(phcN[0]); phcL[2] = delta * double(phcN[2]); vecd3 phcO; phcO[0] = phcO[1] = -0.5 * phcL[0]; phcO[2] = -0.5 * phcL[2]; MxGrid<3> phcGrid(phcO, phcN, phcL, &myComm); phcGrid.print(); Teuchos::ParameterList phcList; phcList.set("geo-mg : levels", 1); phcList.set("geo-mg : smoothers : sweeps", 5); phcList.set("eigensolver : output", 2); phcList.set("eigensolver : nev", 15); phcList.set("eigensolver : tol", 1.e-8); phcList.set("eigensolver : block size", 1); phcList.set("eigensolver : num blocks", 30); phcList.set("eigensolver : spectrum", "LM"); phcList.set("wave operator : invert", true); phcList.set("wave operator : invert : tol", 1.e-8); //phcList.set("wave operator : invert : shift", 1000.0); phcList.set("wave operator : invert : max basis size", 40); MxEMSim<dim> phcSim; phcSim.setGrid(&phcGrid); //phcSim.setPEC(&phcCav); phcSim.setDielectric(&phcDiel); phcSim.setParameters(phcList); phcSim.setup(); MxSolver<dim> * solver; solver = new MxSolver<dim>(&phcSim, phcList); solver->solve(); delete solver; for (int i = 0; i < numRods; i++) delete rods[i]; #endif #if 0 double sphR = 0.37; int sphN = 64; MxEllipsoid ell(0.0, sphR); MxGrid<3> sphGrid(-0.5, sphN, 1.0, &myComm); sphGrid.print(); MxDimMatrix<double, 3> rotSapphEps(0); rotSapphEps(0, 0) = 10.225; rotSapphEps(1, 1) = 10.225; rotSapphEps(2, 2) = 9.95; rotSapphEps(0, 1) = rotSapphEps(1, 0) = -0.825; rotSapphEps(0, 2) = rotSapphEps(2, 0) = -0.67360967926537398; rotSapphEps(1, 2) = rotSapphEps(2, 1) = 0.67360967926537398; MxDielectric<3> phcDiel; phcDiel.add(&ell, rotSapphEps); vecd3 ell2Loc(0); ell2Loc[0] = 0.6; vecd3 ell3Loc(0); ell3Loc[0] = 0.3; ell3Loc[2] = 0.3; MxEllipsoid ell2(ell2Loc, sphR); MxEllipsoid ell3(ell3Loc, sphR); MxShapeUnion<3> shUnion; shUnion.add(&ell); shUnion.add(&ell2); shUnion.add(&ell3); //shUnion.save(sphGrid); MxShapeIntersection<3> shInt; shInt.add(&ell); shInt.add(&ell2); shInt.add(&ell3); //shInt.save(sphGrid); MxShapeSubtract<3> shSub; shSub.setBaseShape(&ell); shSub.subtractShape(&ell2); shSub.subtractShape(&ell3); //shSub.save(sphGrid); MxDielectric<3> dielEll; MxDimMatrix<double, 3> epsEll(vecd3(10.0)); // isotropic eps = 10 dielEll.add(&ell, epsEll); Teuchos::ParameterList sphList; sphList.set("geo-mg : levels", 1); sphList.set("geo-mg : smoothers : sweeps", 4); sphList.set("eigensolver : output", 2); sphList.set("eigensolver : nev", 12); sphList.set("eigensolver : tol", 1.e-8); sphList.set("eigensolver : block size", 1); sphList.set("eigensolver : num blocks", 30); sphList.set("eigensolver : spectrum", "LM"); sphList.set("wave operator : invert", true); sphList.set("wave operator : invert : tol", 1.e-8); //sphList.set("wave operator : invert : shift", -0.1); sphList.set("wave operator : invert : shift", 1.0); sphList.set("wave operator : invert : max basis size", 40); MxEMSim<dim> sphSim; sphSim.setGrid(&sphGrid); //sphSim.setDielectric(&dielEll); sphSim.setDielectric(&phcDiel); //sphSim.setPEC(&sphCav); //sphSim.setPEC(&ell); sphSim.setParameters(sphList); sphSim.setup(); MxSolver<dim> * solver; solver = new MxSolver<dim>(&sphSim, sphList); solver->solve(); delete solver; #endif #ifdef HAVE_MPI MPI::Finalize(); //MPI_Finalize(); #endif return 0; }
virtual RCP<Hierarchy> CreateHierarchy() const { return rcp(new Hierarchy()); }
void CrsMatrixWrapper<ST>::solve(const Teuchos::ArrayView<ST>& x, const Teuchos::ArrayView<const ST>& b, escript::SolverBuddy& sb) const { typedef VectorType<ST> Vector; RCP<Vector> X = rcp(new Vector(mat.getDomainMap(), 1)); RCP<Vector> B = rcp(new Vector(mat.getRangeMap(), b, b.size(), 1)); RCP<const Matrix> A = rcpFromRef(mat); if (escript::isDirectSolver(sb.getSolverMethod())) { RCP<DirectSolverType<Matrix,Vector> > solver(m_direct); if (solver.is_null()) { solver = createDirectSolver<Matrix,Vector>(sb, A, X, B); m_direct = solver; if (sb.isVerbose()) { std::cout << "Using " << solver->description() << std::endl; std::cout << "Performing symbolic factorization..." << std::flush; } solver->symbolicFactorization(); if (sb.isVerbose()) { std::cout << "done\nPerforming numeric factorization..." << std::flush; } solver->numericFactorization(); if (sb.isVerbose()) { std::cout << "done\n" << std::flush; } } else { if (sb.isVerbose()) { std::cout << "Using " << solver->description() << std::endl; } if (m_resetCalled) { // matrix structure never changes solver->setA(A, Amesos2::SYMBFACT); m_resetCalled = false; } solver->setX(X); solver->setB(B); } if (sb.isVerbose()) { std::cout << "Solving system..." << std::flush; } solver->solve(); if (sb.isVerbose()) { std::cout << "done" << std::endl; RCP<Teuchos::FancyOStream> fos(Teuchos::fancyOStream(Teuchos::rcpFromRef(std::cout))); solver->printTiming(*fos, Teuchos::VERB_HIGH); } } else { // iterative solver double t0 = Teuchos::Time::wallTime(); RCP<ProblemType<ST> > problem(m_solver); if (problem.is_null()) { problem = rcp(new ProblemType<ST>(A, X, B)); m_solver = problem; RCP<OpType<ST> > prec = createPreconditioner<ST>(A, sb); m_preconditioner = prec; if (!prec.is_null()) { // Trilinos BiCGStab does not support left preconditioners if (sb.getSolverMethod() == escript::SO_METHOD_BICGSTAB) problem->setRightPrec(prec); else problem->setLeftPrec(prec); } problem->setHermitian(sb.isSymmetric()); problem->setProblem(); } else { for (auto t: problem->getTimers()) { t->reset(); } if (m_resetCalled) { // special case for MueLu preconditioner - call Reuse... // which honours the "reuse: type" parameter. RCP<MueLu::TpetraOperator<ST,LO,GO,NT> > mlOp = Teuchos::rcp_dynamic_cast<MueLu::TpetraOperator<ST,LO,GO,NT> >(m_preconditioner); if (mlOp.get()) { RCP<Matrix> A_(Teuchos::rcp_const_cast<Matrix>(A)); MueLu::ReuseTpetraPreconditioner(A_, *mlOp); } } problem->setProblem(X, B); } double t1 = Teuchos::Time::wallTime(); RCP<SolverType<ST> > solver = createSolver<ST>(sb); if (sb.isVerbose()) { std::cout << "Using " << solver->description() << std::endl; } solver->setProblem(problem); Belos::ReturnType result = solver->solve(); double t2 = Teuchos::Time::wallTime(); const int numIters = solver->getNumIters(); double tol = sb.getTolerance(); try { tol = solver->achievedTol(); } catch (...) { } if (sb.isVerbose()) { if (result == Belos::Converged) { sb.updateDiagnostics("converged", true); std::cout << "The solver took " << numIters << " iteration(s) to reach a residual tolerance of " << tol << "." << std::endl; } else { std::cout << "The solver took " << numIters << " iteration(s), but did not reach a relative residual " "tolerance of " << sb.getTolerance() << "." << std::endl; } } double solverTime = 0.; for (auto t: problem->getTimers()) { solverTime += t->totalElapsedTime(); } sb.updateDiagnostics("set_up_time", t1-t0); sb.updateDiagnostics("net_time", solverTime); sb.updateDiagnostics("time", t2-t0); sb.updateDiagnostics("num_iter", numIters); sb.updateDiagnostics("residual_norm", tol); } X->get1dCopy(x, x.size()); }
RCP<SimpleFunctionObject<OperandType> > DivisionFunctionXMLConverter<OperandType>::getSpecificSimpleFunction( OperandType operand) const { return rcp(new DivisionFunction<OperandType>(operand)); }
TEUCHOS_UNIT_TEST(point_values, md_field_evaluate) { typedef panzer::Traits::FadType ScalarType; typedef PHX::MDField<ScalarType> ArrayType; typedef PHX::KokkosViewFactory<ScalarType,PHX::Device> ViewFactory; typedef PHX::MDField<double>::size_type size_type; Teuchos::RCP<shards::CellTopology> topo = Teuchos::rcp(new shards::CellTopology(shards::getCellTopologyData< shards::Quadrilateral<4> >())); const int num_cells = 4; const int base_cell_dimension = 2; const panzer::CellData cell_data(num_cells,topo); int num_points = 3; RCP<PointRule> point_rule = rcp(new PointRule("RandomPoints",num_points, cell_data)); TEST_EQUALITY(point_rule->num_points,num_points); panzer::PointValues<ScalarType,PHX::MDField<ScalarType> > point_values; panzer::MDFieldArrayFactory af("prefix_"); point_values.setupArrays(point_rule,af); // Set up node coordinates. Here we assume the following // ordering. This needs to be consistent with shards topology, // otherwise we will get negative determinates // 3(0,1)---2(1,1) // | 0 | // | | // 0(0,0)---1(1,0) const size_type derivative_dim = 4; const std::vector<PHX::index_size_type> ddims(1,derivative_dim); const int num_vertices = point_rule->topology->getNodeCount(); ArrayType node_coordinates = af.buildArray<ScalarType,Cell,NODE,Dim>("node_coordinates",num_cells, num_vertices, base_cell_dimension); node_coordinates.setFieldData(ViewFactory::buildView(node_coordinates.fieldTag(),ddims)); const size_type x = 0; const size_type y = 1; for (size_type cell = 0; cell < node_coordinates.dimension(0); ++cell) { int xleft = cell % 2; int yleft = int(cell/2); node_coordinates(cell,0,x) = xleft*0.5; node_coordinates(cell,0,y) = yleft*0.5; node_coordinates(cell,1,x) = (xleft+1)*0.5; node_coordinates(cell,1,y) = yleft*0.5; node_coordinates(cell,2,x) = (xleft+1)*0.5; node_coordinates(cell,2,y) = (yleft+1)*0.5; node_coordinates(cell,3,x) = xleft*0.5; node_coordinates(cell,3,y) = (yleft+1)*0.5; out << "Cell " << cell << " = "; for(int i=0;i<4;i++) out << "(" << node_coordinates(cell,i,x) << ", " << node_coordinates(cell,i,y) << ") "; out << std::endl; } // Build the evaluation points ArrayType point_coordinates = af.buildArray<ScalarType,IP,Dim>("points",num_points, base_cell_dimension); point_coordinates.setFieldData(ViewFactory::buildView(point_coordinates.fieldTag(),ddims)); point_coordinates(0,0) = 0.0; point_coordinates(0,1) = 0.0; // mid point point_coordinates(1,0) = 0.5; point_coordinates(1,1) = 0.5; // mid point of upper left quadrant point_coordinates(2,0) = -0.5; point_coordinates(2,1) = 0.0; // mid point of line from center to left side point_values.coords_ref.setFieldData(ViewFactory::buildView(point_values.coords_ref.fieldTag(),ddims)); point_values.node_coordinates.setFieldData(ViewFactory::buildView(point_values.node_coordinates.fieldTag(),ddims)); point_values.point_coords.setFieldData(ViewFactory::buildView(point_values.point_coords.fieldTag(),ddims)); point_values.jac.setFieldData(ViewFactory::buildView(point_values.jac.fieldTag(),ddims)); point_values.jac_inv.setFieldData(ViewFactory::buildView(point_values.jac_inv.fieldTag(),ddims)); point_values.jac_det.setFieldData(ViewFactory::buildView(point_values.jac_det.fieldTag(),ddims)); point_values.evaluateValues(node_coordinates,point_coordinates); // check the reference values (ensure copying) for(int p=0;p<num_points;p++) for(size_type d=0;d<base_cell_dimension;d++) TEST_EQUALITY(point_values.coords_ref(p,d).val(),point_coordinates(p,d).val()); // check the shifted values (ensure physical mapping) for(int c=0;c<num_cells;c++) { double dx = 0.5; double dy = 0.5; for(int p=0;p<num_points;p++) { double x = dx*(point_coordinates(p,0).val()+1.0)/2.0 + node_coordinates(c,0,0).val(); double y = dy*(point_coordinates(p,1).val()+1.0)/2.0 + node_coordinates(c,0,1).val(); TEST_FLOATING_EQUALITY(point_values.point_coords(c,p,0).val(),x,1e-10); TEST_FLOATING_EQUALITY(point_values.point_coords(c,p,1).val(),y,1e-10); } } // check the jacobian for(int c=0;c<num_cells;c++) { double dx = 0.5; double dy = 0.5; for(int p=0;p<num_points;p++) { TEST_FLOATING_EQUALITY(point_values.jac(c,p,0,0).val(),dx/2.0,1e-10); TEST_FLOATING_EQUALITY(point_values.jac(c,p,0,1).val(),0.0,1e-10); TEST_FLOATING_EQUALITY(point_values.jac(c,p,1,0).val(),0.0,1e-10); TEST_FLOATING_EQUALITY(point_values.jac(c,p,1,1).val(),dy/2.0,1e-10); } } for(int c=0;c<num_cells;c++) { double dx = 0.5; double dy = 0.5; for(int p=0;p<num_points;p++) { TEST_FLOATING_EQUALITY(point_values.jac_det(c,p).val(),dy*dx/4.0,1e-10); } } out << "TESTING" << std::endl; for(size_type c=0;c<point_values.jac_det.size();c++) { point_values.jac_det[c] = c+1.0; out << " " << point_values.jac_det[c] << ", " << c+1.0 << std::endl; } out << "TESTING B" << std::endl; for(size_type c=0;c<point_values.jac_det.size();c++) out << " " << point_values.jac_det[c] << ", " << c << std::endl; // check the inverse jacobian for(int c=0;c<num_cells;c++) { double dx = 0.5; double dy = 0.5; for(int p=0;p<num_points;p++) { TEST_FLOATING_EQUALITY(point_values.jac_inv(c,p,0,0).val(),2.0/dx,1e-10); TEST_FLOATING_EQUALITY(point_values.jac_inv(c,p,0,1).val(),0.0,1e-10); TEST_FLOATING_EQUALITY(point_values.jac_inv(c,p,1,0).val(),0.0,1e-10); TEST_FLOATING_EQUALITY(point_values.jac_inv(c,p,1,1).val(),2.0/dy,1e-10); } } }
EpetraImport::EpetraImport(const Teuchos::RCP<const Map<int,int> > & source, const Teuchos::RCP<const Map<int,int> > & target) : import_(rcp(new Epetra_Import(toEpetra(target), toEpetra(source)))) { } // Warning: Epetra(Target, Source) vs. Tpetra(Source, Target)
void UncoupledAggregationFactory<LocalOrdinal, GlobalOrdinal, Node, LocalMatOps>::Build(Level ¤tLevel) const { FactoryMonitor m(*this, "Build", currentLevel); const ParameterList& pL = GetParameterList(); bDefinitionPhase_ = false; // definition phase is finished, now all aggregation algorithm information is fixed bool bUseOnePtAggregationAlgorithm = pL.get<bool>("UseOnePtAggregationAlgorithm"); bool bUseSmallAggregationAlgorithm = pL.get<bool>("UseSmallAggregatesAggregationAlgorithm"); bool bUsePreserveDirichletAggregationAlgorithm = pL.get<bool>("UsePreserveDirichletAggregationAlgorithm"); bool bUseUncoupledAggregationAglorithm = pL.get<bool>("UseUncoupledAggregationAlgorithm"); bool bUseMaxLinkAggregationAlgorithm = pL.get<bool>("UseMaxLinkAggregationAlgorithm"); bool bUseIsolatedNodeAggregationAglorithm = pL.get<bool>("UseIsolatedNodeAggregationAlgorithm"); bool bUseEmergencyAggregationAlgorithm = pL.get<bool>("UseEmergencyAggregationAlgorithm"); // define aggregation algorithms RCP<const FactoryBase> graphFact = GetFactory("Graph"); // TODO Can we keep different aggregation algorithms over more Build calls? algos_.clear(); if (bUseOnePtAggregationAlgorithm) algos_.push_back(rcp(new OnePtAggregationAlgorithm (graphFact))); if (bUseSmallAggregationAlgorithm) algos_.push_back(rcp(new SmallAggregationAlgorithm (graphFact))); if (bUseUncoupledAggregationAglorithm) algos_.push_back(rcp(new UncoupledAggregationAlgorithm (graphFact))); if (bUseMaxLinkAggregationAlgorithm) algos_.push_back(rcp(new MaxLinkAggregationAlgorithm (graphFact))); if (bUsePreserveDirichletAggregationAlgorithm) algos_.push_back(rcp(new PreserveDirichletAggregationAlgorithm (graphFact))); if (bUseIsolatedNodeAggregationAglorithm) algos_.push_back(rcp(new IsolatedNodeAggregationAlgorithm (graphFact))); if (bUseEmergencyAggregationAlgorithm) algos_.push_back(rcp(new EmergencyAggregationAlgorithm (graphFact))); std::string mapOnePtName = pL.get<std::string>("OnePt aggregate map name"), mapSmallAggName = pL.get<std::string>("SmallAgg aggregate map name"); RCP<const Map> OnePtMap, SmallAggMap; if (mapOnePtName.length()) { RCP<const FactoryBase> mapOnePtFact = GetFactory("OnePt aggregate map factory"); OnePtMap = currentLevel.Get<RCP<const Map> >(mapOnePtName, mapOnePtFact.get()); } if (mapSmallAggName.length()) { RCP<const FactoryBase> mapSmallAggFact = GetFactory("SmallAgg aggregate map factory"); SmallAggMap = currentLevel.Get<RCP<const Map> >(mapSmallAggName, mapSmallAggFact.get()); } RCP<const GraphBase> graph = Get< RCP<GraphBase> >(currentLevel, "Graph"); // Build RCP<Aggregates> aggregates = rcp(new Aggregates(*graph)); aggregates->setObjectLabel("UC"); const LO nRows = graph->GetNodeNumVertices(); // construct aggStat information std::vector<unsigned> aggStat(nRows, NodeStats::READY); ArrayRCP<const bool> dirichletBoundaryMap = graph->GetBoundaryNodeMap(); if (dirichletBoundaryMap != Teuchos::null) { for (LO i = 0; i < nRows; i++) if (dirichletBoundaryMap[i] == true) aggStat[i] = NodeStats::BOUNDARY; } LO nDofsPerNode = Get<LO>(currentLevel, "DofsPerNode"); GO indexBase = graph->GetDomainMap()->getIndexBase(); if (SmallAggMap != Teuchos::null || OnePtMap != Teuchos::null) { for (LO i = 0; i < nRows; i++) { // reconstruct global row id (FIXME only works for contiguous maps) GO grid = (graph->GetDomainMap()->getGlobalElement(i)-indexBase) * nDofsPerNode + indexBase; if (SmallAggMap != null) { for (LO kr = 0; kr < nDofsPerNode; kr++) { if (SmallAggMap->isNodeGlobalElement(grid + kr)) aggStat[i] = MueLu::NodeStats::SMALLAGG; } } if (OnePtMap != null) { for (LO kr = 0; kr < nDofsPerNode; kr++) { if (OnePtMap->isNodeGlobalElement(grid + kr)) aggStat[i] = MueLu::NodeStats::ONEPT; } } } } const RCP<const Teuchos::Comm<int> > comm = graph->GetComm(); GO numGlobalRows = 0; if (IsPrint(Statistics1)) sumAll(comm, as<GO>(nRows), numGlobalRows); LO numNonAggregatedNodes = nRows; GO numGlobalAggregatedPrev = 0, numGlobalAggsPrev = 0; for (size_t a = 0; a < algos_.size(); a++) { std::string phase = algos_[a]->description(); SubFactoryMonitor sfm(*this, "Algo \"" + phase + "\"", currentLevel); algos_[a]->BuildAggregates(pL, *graph, *aggregates, aggStat, numNonAggregatedNodes); if (IsPrint(Statistics1)) { GO numLocalAggregated = nRows - numNonAggregatedNodes, numGlobalAggregated = 0; GO numLocalAggs = aggregates->GetNumAggregates(), numGlobalAggs = 0; sumAll(comm, numLocalAggregated, numGlobalAggregated); sumAll(comm, numLocalAggs, numGlobalAggs); double aggPercent = 100*as<double>(numGlobalAggregated)/as<double>(numGlobalRows); GetOStream(Statistics1) << " aggregated : " << (numGlobalAggregated - numGlobalAggregatedPrev) << " (phase), " << std::fixed << std::setprecision(2) << numGlobalAggregated << "/" << numGlobalRows << " [" << aggPercent << "%] (total)\n" << " remaining : " << numGlobalRows - numGlobalAggregated << "\n" << " aggregates : " << numGlobalAggs-numGlobalAggsPrev << " (phase), " << numGlobalAggs << " (total)" << std::endl; numGlobalAggregatedPrev = numGlobalAggregated; numGlobalAggsPrev = numGlobalAggs; } } TEUCHOS_TEST_FOR_EXCEPTION(numNonAggregatedNodes, Exceptions::RuntimeError, "MueLu::UncoupledAggregationFactory::Build: Leftover nodes found! Error!"); aggregates->AggregatesCrossProcessors(false); Set(currentLevel, "Aggregates", aggregates); GetOStream(Statistics0) << aggregates->description() << std::endl; }
// quad tests TEUCHOS_UNIT_TEST(tSquareQuadMeshDOFManager_edgetests, buildTest_quad_edge_orientations_fail) { // build global (or serial communicator) #ifdef HAVE_MPI stk_classic::ParallelMachine Comm = MPI_COMM_WORLD; #else stk_classic::ParallelMachine Comm = WHAT_TO_DO_COMM; #endif int numProcs = stk_classic::parallel_machine_size(Comm); TEUCHOS_ASSERT(numProcs==1); // build a geometric pattern from a single basis RCP<const panzer::FieldPattern> patternI1 = buildFieldPattern<Intrepid2::Basis_HCURL_QUAD_I1_FEM<double,FieldContainer> >(); out << *patternI1 << std::endl; RCP<panzer::ConnManager<int,int> > connManager = buildQuadMesh(Comm,2,2,1,1); RCP<panzer::DOFManagerFEI<int,int> > dofManager = rcp(new panzer::DOFManagerFEI<int,int>()); dofManager->setOrientationsRequired(true); TEST_EQUALITY(dofManager->getOrientationsRequired(),true); TEST_EQUALITY(dofManager->getConnManager(),Teuchos::null); dofManager->setConnManager(connManager,MPI_COMM_WORLD); TEST_EQUALITY(dofManager->getConnManager(),connManager); dofManager->addField("b",patternI1); dofManager->buildGlobalUnknowns(); for(int i=0;i<4;i++) { const int * indices = connManager->getConnectivity(i); TEST_EQUALITY(connManager->getConnectivitySize(i),8); out << "cell = " << i << ": "; for(int j=0;j<4;j++) out << indices[j+4] << " "; out << std::endl; } out << "GIDS" << std::endl; for(int i=0;i<4;i++) { std::vector<int> gids; dofManager->getElementGIDs(i,gids); TEST_EQUALITY(gids.size(),4); out << "cell = " << i << ": "; for(int j=0;j<4;j++) out << gids[j] << " "; out << std::endl; } std::vector<int> total; dofManager->getOwnedIndices(total); TEST_EQUALITY(total.size(),12); dofManager->printFieldInformation(out); }
void Ifpack2Smoother<Scalar, LocalOrdinal, GlobalOrdinal, Node>::SetupSchwarz(Level& currentLevel) { if (this->IsSetup() == true) this->GetOStream(Warnings0) << "MueLu::Ifpack2Smoother::Setup(): Setup() has already been called" << std::endl; // If we are doing "user" partitioning, we assume that what the user // really wants to do is make tiny little subdomains with one row // asssigned to each subdomain. The rows used for these little // subdomains correspond to those in the 2nd block row. Then, // if we overlap these mini-subdomains, we will do something that // looks like Vanka (grabbing all velocities associated with each // each pressure unknown). In addition, we put all Dirichlet points // as a little mini-domain. ParameterList& paramList = const_cast<ParameterList&>(this->GetParameterList()); bool isBlockedMatrix = false; RCP<Matrix> merged2Mat; std::string sublistName = "subdomain solver parameters"; if (paramList.isSublist(sublistName)) { ParameterList& subList = paramList.sublist(sublistName); std::string partName = "partitioner: type"; if (subList.isParameter(partName) && subList.get<std::string>(partName) == "user") { isBlockedMatrix = true; RCP<BlockedCrsMatrix> bA = rcp_dynamic_cast<BlockedCrsMatrix>(A_); TEUCHOS_TEST_FOR_EXCEPTION(bA.is_null(), Exceptions::BadCast, "Matrix A must be of type BlockedCrsMatrix."); size_t numVels = bA->getMatrix(0,0)->getNodeNumRows(); size_t numPres = bA->getMatrix(1,0)->getNodeNumRows(); size_t numRows = A_->getNodeNumRows(); ArrayRCP<LocalOrdinal> blockSeeds(numRows, Teuchos::OrdinalTraits<LocalOrdinal>::invalid()); size_t numBlocks = 0; for (size_t rowOfB = numVels; rowOfB < numVels+numPres; ++rowOfB) blockSeeds[rowOfB] = numBlocks++; RCP<BlockedCrsMatrix> bA2 = rcp_dynamic_cast<BlockedCrsMatrix>(A_); TEUCHOS_TEST_FOR_EXCEPTION(bA2.is_null(), Exceptions::BadCast, "Matrix A must be of type BlockedCrsMatrix."); RCP<CrsMatrix> mergedMat = bA2->Merge(); merged2Mat = rcp(new CrsMatrixWrap(mergedMat)); // Add Dirichlet rows to the list of seeds ArrayRCP<const bool> boundaryNodes; boundaryNodes = Utilities::DetectDirichletRows(*merged2Mat, 0.0); bool haveBoundary = false; for (LO i = 0; i < boundaryNodes.size(); i++) if (boundaryNodes[i]) { // FIXME: // 1. would not this [] overlap with some in the previos blockSeed loop? // 2. do we need to distinguish between pressure and velocity Dirichlet b.c. blockSeeds[i] = numBlocks; haveBoundary = true; } if (haveBoundary) numBlocks++; subList.set("partitioner: map", blockSeeds); subList.set("partitioner: local parts", as<int>(numBlocks)); } } RCP<const Tpetra::RowMatrix<SC, LO, GO, NO> > tpA; if (isBlockedMatrix == true) tpA = Utilities::Op2NonConstTpetraRow(merged2Mat); else tpA = Utilities::Op2NonConstTpetraRow(A_); prec_ = Ifpack2::Factory::create(type_, tpA, overlap_); SetPrecParameters(); prec_->initialize(); prec_->compute(); }
RCP<MueLu::SmootherPrototype<Scalar, LocalOrdinal, GlobalOrdinal, Node> > Ifpack2Smoother<Scalar, LocalOrdinal, GlobalOrdinal, Node>::Copy() const { RCP<Ifpack2Smoother> smoother = rcp(new Ifpack2Smoother(*this) ); smoother->SetParameterList(this->GetParameterList()); return smoother; }
void dft_PolyA22_Tpetra_Operator<Scalar,MatrixType>:: finalizeProblemValues () { if (isLinearProblemSet_) { return; // nothing to do } insertRow(); // Dump any remaining entries if (firstTime_) { RCP<ParameterList> pl = rcp(new ParameterList(parameterList_->sublist("fillCompleteList"))); pl->set( "Preserve Local Graph", true ); cmsOnCmsMatrix_->fillComplete(pl); ArrayRCP<size_t> numEntriesPerRow(cmsMap_->getNodeNumElements()); for (LocalOrdinal i = OTLO::zero(); i < cmsMap_->getNodeNumElements(); ++i) { numEntriesPerRow[i] = cmsOnCmsMatrix_->getNumEntriesInLocalRow( i ); } cmsOnCmsGraph_ = rcp(new GRAPH(cmsMap_, cmsOnCmsMatrix_->getColMap(), numEntriesPerRow, Tpetra::StaticProfile)); for (LocalOrdinal i = OTLO::zero(); i < cmsMap_->getNodeNumElements(); ++i) { ArrayView<const GlobalOrdinal> indices; ArrayView<const MatScalar> values; cmsOnCmsMatrix_->getLocalRowView( i, indices, values ); cmsOnCmsGraph_->insertLocalIndices( i, indices ); } cmsOnCmsGraph_->fillComplete(); cmsOnCmsMatrixStatic_ = rcp(new MAT(cmsOnCmsGraph_)); cmsOnCmsMatrixStatic_->setAllToScalar(STMS::zero()); for (LocalOrdinal i = OTLO::zero(); i < cmsMap_->getNodeNumElements(); ++i) { ArrayView<const GlobalOrdinal> indices; ArrayView<const MatScalar> values; cmsOnCmsMatrix_->getLocalRowView( i, indices, values ); cmsOnCmsMatrixStatic_->sumIntoLocalValues( i, indices(), values() ); } cmsOnCmsMatrixStatic_->fillComplete(); cmsOnCmsMatrixOp_ = rcp(new MMOP(cmsOnCmsMatrixStatic_)); } if (!cmsOnCmsMatrixStatic_->isFillComplete()) { RCP<ParameterList> pl = rcp(new ParameterList(parameterList_->sublist("fillCompleteList"))); cmsOnCmsMatrixStatic_->fillComplete(pl); } RCP<ParameterList> pl = rcp(new ParameterList(parameterList_->sublist("fillCompleteList"))); if (!isFLinear_) { insertRow(); // Dump any remaining entries cmsOnDensityMatrix_->fillComplete(densityMap_, cmsMap_, pl); } if (!hasDensityOnCms_) // Confirm that densityOnCmsMatrix is zero { // Scalar normvalue = densityOnCmsMatrix_->normInf(); // TEUCHOS_TEST_FOR_EXCEPT(normvalue!=0.0); } else { insertRow(); // Dump any remaining entries densityOnCmsMatrix_->fillComplete(cmsMap_, densityMap_, pl); } // Form the inverse of the densityOnDensityMatrix densityOnDensityInverse_->reciprocal(*densityOnDensityMatrix_); // Use a diagonal preconditioner for the cmsOnCmsMatrix if (firstTime_) { RCP<const MAT> const_matrix = Teuchos::rcp_implicit_cast<const MAT>(cmsOnCmsMatrixStatic_); cmsOnCmsInverse_ = rcp(new DIAGONAL(const_matrix)); cmsOnCmsInverseOp_ = rcp(new DIAGONAL_OP(cmsOnCmsInverse_)); #ifdef KDEBUG TEUCHOS_TEST_FOR_EXCEPT(cmsOnCmsInverse_==Teuchos::null); #endif cmsOnCmsInverse_->initialize(); } cmsOnCmsInverse_->compute(); // Compute the total number of entries in the A22 block if (firstTime_) { nnz_ = cmsOnCmsMatrixStatic_->getGlobalNumEntries() + \ cmsOnDensityMatrix_->getGlobalNumEntries() + \ densityOnCmsMatrix_->getGlobalNumEntries() + \ densityOnDensityMatrix_->getGlobalLength(); } isLinearProblemSet_ = true; firstTime_ = false; } //end finalizeProblemValues
// triangle tests TEUCHOS_UNIT_TEST(tFieldPattern, test_equals) { out << note << std::endl; RCP<Intrepid::Basis<double,FieldContainer> > basisA; RCP<Intrepid::Basis<double,FieldContainer> > basisB; basisA = rcp(new Intrepid::Basis_HGRAD_HEX_C1_FEM<double,FieldContainer>); basisB = rcp(new Intrepid::Basis_HGRAD_HEX_C1_FEM<double,FieldContainer>); TEST_ASSERT(intrepid_same_geom(basisA,basisB,__FILE__,__LINE__)); TEST_ASSERT(intrepid_equals(basisA,basisB,__FILE__,__LINE__)); basisA = rcp(new Intrepid::Basis_HGRAD_HEX_C1_FEM<double,FieldContainer>); basisB = rcp(new Intrepid::Basis_HGRAD_HEX_C2_FEM<double,FieldContainer>); TEST_ASSERT(intrepid_same_geom(basisA,basisB,__FILE__,__LINE__)); TEST_ASSERT(not intrepid_equals(basisA,basisB,__FILE__,__LINE__)); basisA = rcp(new Intrepid::Basis_HGRAD_HEX_C2_FEM<double,FieldContainer>); basisB = rcp(new Intrepid::Basis_HGRAD_HEX_C2_FEM<double,FieldContainer>); TEST_ASSERT(intrepid_same_geom(basisA,basisB,__FILE__,__LINE__)); TEST_ASSERT(intrepid_equals(basisA,basisB,__FILE__,__LINE__)); basisA = rcp(new Intrepid::Basis_HGRAD_QUAD_C1_FEM<double,FieldContainer>); basisB = rcp(new Intrepid::Basis_HGRAD_HEX_C1_FEM<double,FieldContainer>); TEST_ASSERT(not intrepid_same_geom(basisA,basisB,__FILE__,__LINE__)); TEST_ASSERT(not intrepid_equals(basisA,basisB,__FILE__,__LINE__)); basisA = rcp(new Intrepid::Basis_HGRAD_QUAD_C1_FEM<double,FieldContainer>); basisB = rcp(new Intrepid::Basis_HGRAD_QUAD_C1_FEM<double,FieldContainer>); TEST_ASSERT(intrepid_same_geom(basisA,basisB,__FILE__,__LINE__)); TEST_ASSERT(intrepid_equals(basisA,basisB,__FILE__,__LINE__)); basisA = rcp(new Intrepid::Basis_HGRAD_QUAD_C2_FEM<double,FieldContainer>); basisB = rcp(new Intrepid::Basis_HGRAD_QUAD_C1_FEM<double,FieldContainer>); TEST_ASSERT(intrepid_same_geom(basisA,basisB,__FILE__,__LINE__)); TEST_ASSERT(not intrepid_equals(basisA,basisB,__FILE__,__LINE__)); basisA = rcp(new Intrepid::Basis_HGRAD_QUAD_C2_FEM<double,FieldContainer>); basisB = rcp(new Intrepid::Basis_HGRAD_QUAD_C2_FEM<double,FieldContainer>); TEST_ASSERT(intrepid_same_geom(basisA,basisB,__FILE__,__LINE__)); TEST_ASSERT(intrepid_equals(basisA,basisB,__FILE__,__LINE__)); }
virtual RCP<ExprBase> getRcp() {return rcp(this);}
RCP<const Basic> pow_expand(const RCP<const Pow> &self) { if (is_a<Integer>(*self->exp_)) { if (is_a<Add>(*self->base_)) { map_vec_mpz r; int n = rcp_static_cast<const Integer>(self->exp_)->as_int(); RCP<const Add> base = rcp_static_cast<const Add>(self->base_); umap_basic_int base_dict = base->dict_; if (! (base->coef_->is_zero())) { // Add the numerical coefficient into the dictionary. This // allows a little bit easier treatment below. insert(base_dict, base->coef_, one); } int m = base_dict.size(); multinomial_coefficients_mpz(m, n, r); umap_basic_int rd; // This speeds up overall expansion. For example for the benchmark // (y + x + z + w)^60 it improves the timing from 135ms to 124ms. rd.reserve(2*r.size()); RCP<const Number> add_overall_coeff=zero; for (auto &p: r) { auto power = p.first.begin(); auto i2 = base_dict.begin(); map_basic_basic d; RCP<const Number> overall_coeff=one; for (; power != p.first.end(); ++power, ++i2) { if (*power > 0) { RCP<const Integer> exp = rcp(new Integer(*power)); RCP<const Basic> base = i2->first; if (is_a<Integer>(*base)) { imulnum(outArg(overall_coeff), rcp_static_cast<const Number>( rcp_static_cast<const Integer>(base)->powint(*exp))); } else if (is_a<Symbol>(*base)) { Mul::dict_add_term(d, exp, base); } else { RCP<const Basic> exp2, t, tmp; tmp = pow(base, exp); Mul::as_base_exp(tmp, outArg(exp2), outArg(t)); Mul::dict_add_term(d, exp2, t); } if (!(i2->second->is_one())) { imulnum(outArg(overall_coeff), pownum(i2->second, rcp_static_cast<const Number>(exp))); } } } RCP<const Basic> term = Mul::from_dict(overall_coeff, d); RCP<const Number> coef2 = rcp(new Integer(p.second)); if (is_a_Number(*term)) { iaddnum(outArg(add_overall_coeff), mulnum(rcp_static_cast<const Number>(term), coef2)); } else { if (is_a<Mul>(*term) && !(rcp_static_cast<const Mul>(term)->coef_->is_one())) { // Tidy up things like {2x: 3} -> {x: 6} imulnum(outArg(coef2), rcp_static_cast<const Mul>(term)->coef_); term = Mul::from_dict(one, rcp_static_cast<const Mul>(term)->dict_); } Add::dict_add_term(rd, coef2, term); } } RCP<const Basic> result = Add::from_dict(add_overall_coeff, rd); return result; } } return self; }
TEUCHOS_UNIT_TEST(point_values, intrepid_container_dfad) { PHX::KokkosDeviceSession session; Teuchos::RCP<shards::CellTopology> topo = Teuchos::rcp(new shards::CellTopology(shards::getCellTopologyData< shards::Quadrilateral<4> >())); const int num_cells = 4; const int base_cell_dimension = 2; const panzer::CellData cell_data(num_cells,topo); int num_points = 3; RCP<PointRule> point_rule = rcp(new PointRule("RandomPoints",num_points, cell_data)); TEST_EQUALITY(point_rule->num_points,num_points); typedef panzer::Traits::FadType ScalarType; panzer::PointValues<ScalarType,Intrepid2::FieldContainer<ScalarType> > point_values; panzer::Intrepid2FieldContainerFactory af; point_values.setupArrays(point_rule,af); // Set up node coordinates. Here we assume the following // ordering. This needs to be consistent with shards topology, // otherwise we will get negative determinates // 3(0,1)---2(1,1) // | 0 | // | | // 0(0,0)---1(1,0) const int num_vertices = point_rule->topology->getNodeCount(); Intrepid2::FieldContainer<ScalarType> node_coordinates(num_cells, num_vertices, base_cell_dimension); typedef panzer::ArrayTraits<ScalarType,FieldContainer<ScalarType> >::size_type size_type; const size_type x = 0; const size_type y = 1; for (size_type cell = 0; cell < node_coordinates.dimension(0); ++cell) { int xleft = cell % 2; int yleft = int(cell/2); node_coordinates(cell,0,x) = xleft*0.5; node_coordinates(cell,0,y) = yleft*0.5; node_coordinates(cell,1,x) = (xleft+1)*0.5; node_coordinates(cell,1,y) = yleft*0.5; node_coordinates(cell,2,x) = (xleft+1)*0.5; node_coordinates(cell,2,y) = (yleft+1)*0.5; node_coordinates(cell,3,x) = xleft*0.5; node_coordinates(cell,3,y) = (yleft+1)*0.5; out << "Cell " << cell << " = "; for(int i=0; i<4; i++) out << "(" << node_coordinates(cell,i,x) << ", " << node_coordinates(cell,i,y) << ") "; out << std::endl; } // Build the evaluation points Intrepid2::FieldContainer<ScalarType> point_coordinates(num_points, base_cell_dimension); point_coordinates(0,0) = 0.0; point_coordinates(0,1) = 0.0; // mid point point_coordinates(1,0) = 0.5; point_coordinates(1,1) = 0.5; // mid point of upper left quadrant point_coordinates(2,0) = -0.5; point_coordinates(2,1) = 0.0; // mid point of line from center to left side point_values.evaluateValues(node_coordinates,point_coordinates); }
TEUCHOS_UNIT_TEST(point_values, md_field_setup) { typedef PHX::KokkosViewFactory<double,PHX::Device> ViewFactory; typedef PHX::MDField<double>::size_type size_type; Teuchos::RCP<shards::CellTopology> topo = Teuchos::rcp(new shards::CellTopology(shards::getCellTopologyData< shards::Quadrilateral<4> >())); const int num_cells = 20; const int base_cell_dimension = 2; const panzer::CellData cell_data(num_cells,topo); unsigned int num_points = 3; RCP<PointRule> point_rule = rcp(new PointRule("RandomPoints",num_points, cell_data)); TEST_EQUALITY(point_rule->num_points,Teuchos::as<int>(num_points)); panzer::PointValues<double,PHX::MDField<double> > point_values; panzer::MDFieldArrayFactory af("prefix_"); point_values.setupArrays(point_rule,af); // check to make sure all data layouts and field names are as // expected. In a simulation environment the field manager will // build these values. // check basis TEST_EQUALITY(point_values.coords_ref.fieldTag().dataLayout().rank(),2); TEST_EQUALITY(point_values.coords_ref.fieldTag().dataLayout().dimension(0),num_points); TEST_EQUALITY(point_values.coords_ref.fieldTag().dataLayout().dimension(1),base_cell_dimension); TEST_EQUALITY(point_values.coords_ref.fieldTag().name(),"prefix_coords_ref"); TEST_EQUALITY(point_values.node_coordinates.fieldTag().dataLayout().rank(),3); TEST_EQUALITY(point_values.node_coordinates.fieldTag().dataLayout().dimension(0),num_cells); TEST_EQUALITY(point_values.node_coordinates.fieldTag().dataLayout().dimension(1),4); TEST_EQUALITY(point_values.node_coordinates.fieldTag().dataLayout().dimension(2),base_cell_dimension); TEST_EQUALITY(point_values.node_coordinates.fieldTag().name(),"prefix_node_coordinates"); TEST_EQUALITY(point_values.point_coords.fieldTag().dataLayout().rank(),3); TEST_EQUALITY(point_values.point_coords.fieldTag().dataLayout().dimension(0),num_cells); TEST_EQUALITY(point_values.point_coords.fieldTag().dataLayout().dimension(1),num_points); TEST_EQUALITY(point_values.point_coords.fieldTag().dataLayout().dimension(2),base_cell_dimension); TEST_EQUALITY(point_values.point_coords.fieldTag().name(),"prefix_point_coords"); TEST_EQUALITY(point_values.jac.fieldTag().dataLayout().rank(),4); TEST_EQUALITY(point_values.jac.fieldTag().dataLayout().dimension(0),num_cells); TEST_EQUALITY(point_values.jac.fieldTag().dataLayout().dimension(1),num_points); TEST_EQUALITY(point_values.jac.fieldTag().dataLayout().dimension(2),base_cell_dimension); TEST_EQUALITY(point_values.jac.fieldTag().dataLayout().dimension(3),base_cell_dimension); TEST_EQUALITY(point_values.jac.fieldTag().name(),"prefix_jac"); TEST_EQUALITY(point_values.jac_inv.fieldTag().dataLayout().rank(),4); TEST_EQUALITY(point_values.jac_inv.fieldTag().dataLayout().dimension(0),num_cells); TEST_EQUALITY(point_values.jac_inv.fieldTag().dataLayout().dimension(1),num_points); TEST_EQUALITY(point_values.jac_inv.fieldTag().dataLayout().dimension(2),base_cell_dimension); TEST_EQUALITY(point_values.jac_inv.fieldTag().dataLayout().dimension(3),base_cell_dimension); TEST_EQUALITY(point_values.jac_inv.fieldTag().name(),"prefix_jac_inv"); TEST_EQUALITY(point_values.jac_det.fieldTag().dataLayout().rank(),2); TEST_EQUALITY(point_values.jac_det.fieldTag().dataLayout().dimension(0),num_cells); TEST_EQUALITY(point_values.jac_det.fieldTag().dataLayout().dimension(1),num_points); TEST_EQUALITY(point_values.jac_det.fieldTag().name(),"prefix_jac_det"); const size_type derivative_dim = 4; const std::vector<PHX::index_size_type> ddims(1,derivative_dim); point_values.coords_ref.setFieldData(ViewFactory::buildView(point_values.coords_ref.fieldTag(),ddims)); }
/** Return a ref count pointer to self */ virtual RCP<CurveBase> getRcp() { return rcp(this); }
TEUCHOS_UNIT_TEST(point_values, intrepid_container) { Teuchos::RCP<shards::CellTopology> topo = Teuchos::rcp(new shards::CellTopology(shards::getCellTopologyData< shards::Quadrilateral<4> >())); const int num_cells = 4; const int base_cell_dimension = 2; const panzer::CellData cell_data(num_cells,topo); int num_points = 3; RCP<PointRule> point_rule = rcp(new PointRule("RandomPoints",num_points, cell_data)); TEST_EQUALITY(point_rule->num_points,num_points); panzer::PointValues<double,Kokkos::DynRankView<double,PHX::Device> > point_values; panzer::Intrepid2FieldContainerFactory af; point_values.setupArrays(point_rule,af); // Set up node coordinates. Here we assume the following // ordering. This needs to be consistent with shards topology, // otherwise we will get negative determinates // 3(0,1)---2(1,1) // | 0 | // | | // 0(0,0)---1(1,0) const int num_vertices = point_rule->topology->getNodeCount(); Kokkos::DynRankView<double,PHX::Device> node_coordinates(num_cells, num_vertices, base_cell_dimension); typedef panzer::ArrayTraits<double,Kokkos::DynRankView<double,PHX::Device> >::size_type size_type; const size_type x = 0; const size_type y = 1; for (size_type cell = 0; cell < node_coordinates.dimension(0); ++cell) { int xleft = cell % 2; int yleft = int(cell/2); node_coordinates(cell,0,x) = xleft*0.5; node_coordinates(cell,0,y) = yleft*0.5; node_coordinates(cell,1,x) = (xleft+1)*0.5; node_coordinates(cell,1,y) = yleft*0.5; node_coordinates(cell,2,x) = (xleft+1)*0.5; node_coordinates(cell,2,y) = (yleft+1)*0.5; node_coordinates(cell,3,x) = xleft*0.5; node_coordinates(cell,3,y) = (yleft+1)*0.5; out << "Cell " << cell << " = "; for(int i=0;i<4;i++) out << "(" << node_coordinates(cell,i,x) << ", " << node_coordinates(cell,i,y) << ") "; out << std::endl; } // Build the evaluation points Kokkos::DynRankView<double,PHX::Device> point_coordinates("a",num_points, base_cell_dimension); point_coordinates(0,0) = 0.0; point_coordinates(0,1) = 0.0; // mid point point_coordinates(1,0) = 0.5; point_coordinates(1,1) = 0.5; // mid point of upper left quadrant point_coordinates(2,0) = -0.5; point_coordinates(2,1) = 0.0; // mid point of line from center to left side point_values.evaluateValues(node_coordinates,point_coordinates); for(size_type p=0;p<num_points;p++) for(size_type d=0;d<base_cell_dimension;d++) TEST_EQUALITY(point_values.coords_ref(p,d),point_coordinates(p,d)); for(int c=0;c<num_cells;c++) { double dx = 0.5; double dy = 0.5; for(size_type p=0;p<num_points;p++) { double x = dx*(point_coordinates(p,0)+1.0)/2.0 + node_coordinates(c,0,0); double y = dy*(point_coordinates(p,1)+1.0)/2.0 + node_coordinates(c,0,1); TEST_FLOATING_EQUALITY(point_values.point_coords(c,p,0),x,1e-10); TEST_FLOATING_EQUALITY(point_values.point_coords(c,p,1),y,1e-10); } } }
RCP<const Export<LocalOrdinal,GlobalOrdinal,Node> > toXpetra(const RCP<const Tpetra::Export<LocalOrdinal,GlobalOrdinal,Node> >& exp) { if (!exp.is_null()) return rcp(new TpetraExport<LocalOrdinal,GlobalOrdinal,Node>(exp)); return Teuchos::null; }
RCP< const Import<int, int > > toXpetra(const Epetra_Import *import) { RCP<const Epetra_Import> imp = rcp(new Epetra_Import(*import)); //NOTE: non consitent: return pointer, take ref return rcp ( new Xpetra::EpetraImport(imp) ); }
/* * This test was created at the request of Chris Siefert to verify * that some inexplicable behaviour in MueLu was not due to a faulty * assumption in the Matrix Matrix Multiply Kernel. * KLN 15/06/2011 */ TEUCHOS_UNIT_TEST(Tpetra_MatMat, range_row_test){ RCP<const Comm<int> > comm = DefaultPlatform::getDefaultPlatform().getComm(); ParameterList defaultParameters; RCP<node_type> node = rcp(new node_type(defaultParameters)); int numProcs = comm->getSize(); //!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!! //THIS NUMBER MUST BE EVEN SO THAT WHEN I CALCULATE THE NUMBER //OF ROWS IN THE DOMAIN MAP I DON'T ENCOUNTER ANY //WEIRD RESULTS DUE TO INTEGER DIVISION //!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!! int numRowsPerProc = 4; int rank = comm->getRank(); global_size_t globalNumRows = numRowsPerProc*numProcs; RCP<CrsMatrix<double,int,int,node_type> > identityMatrix = getIdentityMatrix<double,int,int,int>(globalNumRows, comm, node); //Create "B" Array<int> myRows = tuple<int>( rank*numRowsPerProc, rank*numRowsPerProc+1, rank*numRowsPerProc+2, rank*numRowsPerProc+3); Array<int> rangeElements; if(rank == 0){ rangeElements = tuple<int>( (numProcs-1)*numRowsPerProc+1, (numProcs-1)*numRowsPerProc+2, (numProcs-1)*numRowsPerProc, (numProcs-1)*numRowsPerProc+3); } else{ rangeElements = tuple<int>( (rank-1)*numRowsPerProc+1, (rank-1)*numRowsPerProc+2, (rank-1)*numRowsPerProc, (rank-1)*numRowsPerProc+3); } RCP<const Map<int,int,node_type> > bRowMap = Tpetra::createNonContigMapWithNode<int,int,node_type>(myRows, comm, node); RCP<const Map<int,int,node_type> > bRangeMap = Tpetra::createNonContigMapWithNode<int,int,node_type>(rangeElements, comm, node); //We divide by 2 to make the matrix tall and "skinny" RCP<const Map<int,int,node_type> > bDomainMap = Tpetra::createUniformContigMapWithNode<int,int,node_type>( globalNumRows/2, comm, node); RCP<CrsMatrix<double,int,int,node_type> > bMatrix = Tpetra::createCrsMatrix<double,int,int,node_type>(bRowMap, 1); for( ArrayView<const int>::iterator it = bRowMap->getNodeElementList().begin(); it != bRowMap->getNodeElementList().end(); ++it) { Array<int> col(1,(*it)/2); Array<double> val(1,3.0); bMatrix->insertGlobalValues(*it, col(), val()); } bMatrix->fillComplete(bDomainMap, bRangeMap); out << "Regular I*P" << std::endl; mult_test_results results = multiply_test( "Different Range and Row Maps", identityMatrix, bMatrix, false, false, bMatrix, comm, out); if(verbose){ out << "Results:" <<std::endl; out << "\tEpsilon: " << results.epsilon << std::endl; out << "\tcNorm: " << results.cNorm << std::endl; out << "\tcompNorm: " << results.compNorm << std::endl; } TEST_COMPARE(results.epsilon, <, defaultEpsilon) RCP<CrsMatrix<double,int,int,node_type> > identity2 = getIdentityMatrix<double,int,int,int>(globalNumRows/2, comm, node); RCP<const Map<int,int,node_type> > bTransRowMap = Tpetra::createUniformContigMapWithNode<int,int,node_type>(globalNumRows/2,comm,node); RCP<CrsMatrix<double,int,int,node_type> > bTrans = Tpetra::createCrsMatrix<double,int,int,node_type>(bTransRowMap, 1); Array<int> bTransRangeElements; if(rank == 0){ bTransRangeElements = tuple<int>( (numProcs-1)*(numRowsPerProc/2)+1, (numProcs-1)*(numRowsPerProc/2)); } else{ bTransRangeElements = tuple<int>( (rank-1)*(numRowsPerProc/2)+1, (rank-1)*(numRowsPerProc/2)); } out << bTransRangeElements << std::endl; RCP<const Map<int,int,node_type> > bTransRangeMap = Tpetra::createNonContigMapWithNode<int,int,node_type>(bTransRangeElements, comm, node); RCP<const Map<int,int,node_type> > bTransDomainMap = Tpetra::createUniformContigMapWithNode<int,int,node_type>(globalNumRows,comm,node); Tpetra::MatrixMatrix::Multiply(*identity2,false,*bMatrix, true, *bTrans, false); bTrans->fillComplete(bTransDomainMap, bTransRangeMap); RCP<CrsMatrix<double,int,int,node_type> > bTransTest = Tpetra::createCrsMatrix<double,int,int,node_type>(bTransRowMap, 1); for( ArrayView<const int>::iterator it = bRowMap->getNodeElementList().begin(); it != bRowMap->getNodeElementList().end(); ++it) { Array<int> col(1,*it); Array<double> val(1,3.0); bTransTest->insertGlobalValues((*it)/2, col(), val()); } bTransTest->fillComplete(bTransDomainMap, bTransRangeMap); out << "Regular I*P^T" << std::endl; RCP<CrsMatrix<double,int,int,node_type> > bTransDiff = Tpetra::createCrsMatrix<double,int,int,node_type>(bTransRowMap, 1); Tpetra::MatrixMatrix::Add<double,int,int,node_type>(*bTransTest, false, -1.0, *bTrans, false, 1.0,bTransDiff); bTransDiff->fillComplete(bTransDomainMap, bDomainMap); double diffNorm = bTransDiff->getFrobeniusNorm (); double realNorm = bTransTest->getFrobeniusNorm (); double calcEpsilon = diffNorm/realNorm; out << "B" << std::endl; if(verbose){ out << "Results:" <<std::endl; out << "\tEpsilon: " << calcEpsilon<< std::endl; out << "\treal norm: " << realNorm<< std::endl; out << "\tcompNorm: " << diffNorm<< std::endl; } TEST_COMPARE(calcEpsilon, <, defaultEpsilon) }
Color PathTraceIntegrator::Li(LightPath& lightPath, const Ref<BackendScene>& scene, IntegratorState& state) { /*! Terminate path if too long or contribution too low. */ if (lightPath.depth >= maxDepth || reduce_max(lightPath.throughput) < minContribution) return zero; /*! Traverse ray. */ DifferentialGeometry dg; //scene->intersector->intersect(lightPath.lastRay); rtcIntersect(scene->scene,(RTCRay&)lightPath.lastRay); scene->postIntersect(lightPath.lastRay,dg); state.numRays++; //return Color(dg.st.x,dg.st.y,0.0f); Color L = zero; const Vector3f wo = -lightPath.lastRay.dir; BRDFType directLightingBRDFTypes = (BRDFType)(DIFFUSE); BRDFType giBRDFTypes = (BRDFType)(ALL); if (sampleLightForGlossy) { directLightingBRDFTypes = (BRDFType)(DIFFUSE|GLOSSY); giBRDFTypes = (BRDFType)(SPECULAR); } /*! Environment shading when nothing hit. */ if (!lightPath.lastRay) { if (backplate && lightPath.unbend) { const int x = clamp(int(state.pixel.x * backplate->width ), 0, int(backplate->width )-1); const int y = clamp(int(state.pixel.y * backplate->height), 0, int(backplate->height)-1); L = backplate->get(x, y); } else { if (!lightPath.ignoreVisibleLights) for (size_t i=0; i<scene->envLights.size(); i++) L += scene->envLights[i]->Le(wo); } return L; } /*! face forward normals */ bool backfacing = false; if (dot(dg.Ng, lightPath.lastRay.dir) > 0) { backfacing = true; dg.Ng = -dg.Ng; dg.Ns = -dg.Ns; } /*! Shade surface. */ CompositedBRDF brdfs; if (dg.material) dg.material->shade(lightPath.lastRay, lightPath.lastMedium, dg, brdfs); /*! Add light emitted by hit area light source. */ if (!lightPath.ignoreVisibleLights && dg.light && !backfacing) L += dg.light->Le(dg,wo); /*! Global illumination. Pick one BRDF component and sample it. */ if (lightPath.depth < maxDepth) { /*! sample brdf */ Sample3f wi; BRDFType type; Vec2f s = state.sample->getVec2f(firstScatterSampleID + lightPath.depth); float ss = state.sample->getFloat(firstScatterTypeSampleID + lightPath.depth); Color c = brdfs.sample(wo, dg, wi, type, s, ss, giBRDFTypes); /*! Continue only if we hit something valid. */ if (c != Color(zero) && wi.pdf > 0.0f) { /*! Compute simple volumetric effect. */ const Color& transmission = lightPath.lastMedium.transmission; if (transmission != Color(one)) c *= pow(transmission,lightPath.lastRay.tfar); /*! Tracking medium if we hit a medium interface. */ Medium nextMedium = lightPath.lastMedium; if (type & TRANSMISSION) nextMedium = dg.material->nextMedium(lightPath.lastMedium); /*! Continue the path. */ LightPath scatteredPath = lightPath.extended(Ray(dg.P, wi, dg.error*epsilon, inf, lightPath.lastRay.time), nextMedium, c, (type & directLightingBRDFTypes) != NONE); L += c * Li(scatteredPath, scene, state) * rcp(wi.pdf); } } /*! Check if any BRDF component uses direct lighting. */ bool useDirectLighting = false; for (size_t i=0; i<brdfs.size(); i++) useDirectLighting |= (brdfs[i]->type & directLightingBRDFTypes) != NONE; /*! Direct lighting. Shoot shadow rays to all light sources. */ if (useDirectLighting) { for (size_t i=0; i<scene->allLights.size(); i++) { if ((scene->allLights[i]->illumMask & dg.illumMask) == 0) continue; /*! Either use precomputed samples for the light or sample light now. */ LightSample ls; if (scene->allLights[i]->precompute()) ls = state.sample->getLightSample(precomputedLightSampleID[i]); else ls.L = scene->allLights[i]->sample(dg, ls.wi, ls.tMax, state.sample->getVec2f(lightSampleID)); /*! Ignore zero radiance or illumination from the back. */ //if (ls.L == Color(zero) || ls.wi.pdf == 0.0f || dot(dg.Ns,Vector3f(ls.wi)) <= 0.0f) continue; if (ls.L == Color(zero) || ls.wi.pdf == 0.0f) continue; /*! Evaluate BRDF */ Color brdf = brdfs.eval(wo, dg, ls.wi, directLightingBRDFTypes); if (brdf == Color(zero)) continue; /*! Test for shadows. */ Ray shadowRay(dg.P, ls.wi, dg.error*epsilon, ls.tMax-dg.error*epsilon, lightPath.lastRay.time,dg.shadowMask); rtcOccluded(scene->scene,(RTCRay&)shadowRay); state.numRays++; if (shadowRay) continue; /*! Evaluate BRDF. */ L += ls.L * brdf * rcp(ls.wi.pdf); } } return L; }
/** * This test was written at the request of Chris Siefert * in order to verity that A^T * I produces correct results * when A's rowmap and rangemap are differnt. * KLN 23/06/2011 */ TEUCHOS_UNIT_TEST(Tpetra_MatMat, ATI_range_row_test){ RCP<const Comm<int> > comm = DefaultPlatform::getDefaultPlatform().getComm(); ParameterList defaultParameters; RCP<node_type> node = rcp(new node_type(defaultParameters)); int numProcs = comm->getSize(); //!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!! //THIS NUMBER MUST BE EVEN SO THAT WHEN I CALCULATE THE NUMBER //OF ROWS IN THE DOMAIN MAP I DON'T ENCOUNTER ANY //WEIRD RESULTS DUE TO INTEGER DIVISION //!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!! int numRowsPerProc = 4; int rank = comm->getRank(); global_size_t globalNumRows = numRowsPerProc*numProcs; //Create identity matrix RCP<CrsMatrix<double,int,int,node_type> > identityMatrix = getIdentityMatrix<double,int,int,int>(globalNumRows, comm, node); //Create A Array<int> aMyRows = tuple<int>( rank*numRowsPerProc, rank*numRowsPerProc+1, rank*numRowsPerProc+2, rank*numRowsPerProc+3); RCP<const Map<int,int,node_type> > aRowMap = Tpetra::createNonContigMapWithNode<int,int,node_type>( aMyRows, comm, node); RCP<const Map<int,int,node_type> > aDomainMap = Tpetra::createUniformContigMapWithNode<int,int,node_type>( globalNumRows/2, comm, node); Array<int> aRangeElements; if(rank == 0){ aRangeElements = tuple<int>( (numProcs-1)*numRowsPerProc+1, (numProcs-1)*numRowsPerProc+2, (numProcs-1)*numRowsPerProc, (numProcs-1)*numRowsPerProc+3); } else{ aRangeElements = tuple<int>( (rank-1)*numRowsPerProc+1, (rank-1)*numRowsPerProc+2, (rank-1)*numRowsPerProc, (rank-1)*numRowsPerProc+3); } RCP<const Map<int,int,node_type> > aRangeMap = Tpetra::createNonContigMapWithNode<int,int,node_type>( aRangeElements, comm, node); RCP<CrsMatrix<double,int,int,node_type> > aMat = Tpetra::createCrsMatrix<double,int,int,node_type>(aRowMap, 1); for( ArrayView<const int>::iterator it = aRowMap->getNodeElementList().begin(); it != aRowMap->getNodeElementList().end(); ++it) { Array<int> col(1,(*it)/2); Array<double> val(1,3.0); aMat->insertGlobalValues(*it, col(), val()); } aMat->fillComplete(aDomainMap, aRangeMap); RowMatrixTransposer<double,int,int,node_type> transposer (aMat); RCP<CrsMatrix<double, int, int, node_type> > knownAMat = transposer.createTranspose(); out << "Regular I*P" << std::endl; mult_test_results results = multiply_test( "Different Range and Row Maps", aMat, identityMatrix, true, false, knownAMat, comm, out); if(verbose){ out << "Results:" <<std::endl; out << "\tEpsilon: " << results.epsilon << std::endl; out << "\tcNorm: " << results.cNorm << std::endl; out << "\tcompNorm: " << results.compNorm << std::endl; } TEST_COMPARE(results.epsilon, <, defaultEpsilon) }
void BVH4mbIntersector16Single::occluded(mic_i* valid_i, BVH4mb* bvh, Ray16& ray16) { /* near and node stack */ __align(64) NodeRef stack_node[3*BVH4i::maxDepth+1]; /* setup */ const mic_m m_valid = *(mic_i*)valid_i != mic_i(0); const mic3f rdir16 = rcp_safe(ray16.dir); unsigned int terminated = toInt(!m_valid); const mic_f inf = mic_f(pos_inf); const mic_f zero = mic_f::zero(); const Node * __restrict__ nodes = (Node *)bvh->nodePtr(); const BVH4mb::Triangle01 * __restrict__ accel = (BVH4mb::Triangle01 *)bvh->triPtr(); stack_node[0] = BVH4i::invalidNode; long rayIndex = -1; while((rayIndex = bitscan64(rayIndex,toInt(m_valid))) != BITSCAN_NO_BIT_SET_64) { stack_node[1] = bvh->root; size_t sindex = 2; const mic_f org_xyz = loadAOS4to16f(rayIndex,ray16.org.x,ray16.org.y,ray16.org.z); const mic_f dir_xyz = loadAOS4to16f(rayIndex,ray16.dir.x,ray16.dir.y,ray16.dir.z); const mic_f rdir_xyz = loadAOS4to16f(rayIndex,rdir16.x,rdir16.y,rdir16.z); const mic_f org_rdir_xyz = org_xyz * rdir_xyz; const mic_f min_dist_xyz = broadcast1to16f(&ray16.tnear[rayIndex]); const mic_f max_dist_xyz = broadcast1to16f(&ray16.tfar[rayIndex]); const mic_f time = broadcast1to16f(&ray16.time[rayIndex]); const unsigned int leaf_mask = BVH4I_LEAF_MASK; while (1) { NodeRef curNode = stack_node[sindex-1]; sindex--; const mic_f one_time = (mic_f::one() - time); while (1) { /* test if this is a leaf node */ if (unlikely(curNode.isLeaf(leaf_mask))) break; const Node* __restrict__ const node = curNode.node(nodes); const float* __restrict const plower = (float*)node->lower; const float* __restrict const pupper = (float*)node->upper; prefetch<PFHINT_L1>((char*)node + 0*64); prefetch<PFHINT_L1>((char*)node + 1*64); prefetch<PFHINT_L1>((char*)node + 2*64); prefetch<PFHINT_L1>((char*)node + 3*64); const BVH4mb::Node* __restrict__ const nodeMB = (BVH4mb::Node*)node; const mic_f lower = one_time * load16f((float*)nodeMB->lower) + time * load16f((float*)nodeMB->lower_t1); const mic_f upper = one_time * load16f((float*)nodeMB->upper) + time * load16f((float*)nodeMB->upper_t1); /* intersect single ray with 4 bounding boxes */ const mic_f tLowerXYZ = lower * rdir_xyz - org_rdir_xyz; const mic_f tUpperXYZ = upper * rdir_xyz - org_rdir_xyz; const mic_f tLower = mask_min(0x7777,min_dist_xyz,tLowerXYZ,tUpperXYZ); const mic_f tUpper = mask_max(0x7777,max_dist_xyz,tLowerXYZ,tUpperXYZ); const Node* __restrict__ const next = curNode.node(nodes); prefetch<PFHINT_L2>((char*)next + 0); prefetch<PFHINT_L2>((char*)next + 64); sindex--; const mic_f tNear = vreduce_max4(tLower); const mic_f tFar = vreduce_min4(tUpper); const mic_m hitm = le(0x8888,tNear,tFar); const mic_f tNear_pos = select(hitm,tNear,inf); curNode = stack_node[sindex]; // early pop of next node /* if no child is hit, continue with early popped child */ if (unlikely(none(hitm))) continue; sindex++; const unsigned long hiti = toInt(hitm); const unsigned long pos_first = bitscan64(hiti); const unsigned long num_hitm = countbits(hiti); /* if a single child is hit, continue with that child */ curNode = ((unsigned int *)plower)[pos_first]; if (likely(num_hitm == 1)) continue; /* if two children are hit, push in correct order */ const unsigned long pos_second = bitscan64(pos_first,hiti); if (likely(num_hitm == 2)) { const unsigned int dist_first = ((unsigned int*)&tNear)[pos_first]; const unsigned int dist_second = ((unsigned int*)&tNear)[pos_second]; const unsigned int node_first = curNode; const unsigned int node_second = ((unsigned int*)plower)[pos_second]; if (dist_first <= dist_second) { stack_node[sindex] = node_second; sindex++; assert(sindex < 3*BVH4i::maxDepth+1); continue; } else { stack_node[sindex] = curNode; curNode = node_second; sindex++; assert(sindex < 3*BVH4i::maxDepth+1); continue; } } /* continue with closest child and push all others */ const mic_f min_dist = set_min_lanes(tNear_pos); const unsigned int old_sindex = sindex; sindex += countbits(hiti) - 1; assert(sindex < 3*BVH4i::maxDepth+1); const mic_m closest_child = eq(hitm,min_dist,tNear); const unsigned long closest_child_pos = bitscan64(closest_child); const mic_m m_pos = andn(hitm,andn(closest_child,(mic_m)((unsigned int)closest_child - 1))); const mic_i plower_node = load16i((int*)plower); curNode = ((unsigned int*)plower)[closest_child_pos]; compactustore16i(m_pos,&stack_node[old_sindex],plower_node); } /* return if stack is empty */ if (unlikely(curNode == BVH4i::invalidNode)) break; /* intersect one ray against four triangles */ ////////////////////////////////////////////////////////////////////////////////////////////////// const BVH4mb::Triangle01* tptr = (BVH4mb::Triangle01*) curNode.leaf(accel); prefetch<PFHINT_L1>((mic_f*)tptr + 0); prefetch<PFHINT_L1>((mic_f*)tptr + 1); prefetch<PFHINT_L1>((mic_f*)tptr + 2); prefetch<PFHINT_L1>((mic_f*)tptr + 3); const mic_i and_mask = broadcast4to16i(zlc4); const mic_f v0_t0 = gather_4f_zlc(and_mask, (float*)&tptr[0].t0.v0, (float*)&tptr[1].t0.v0, (float*)&tptr[2].t0.v0, (float*)&tptr[3].t0.v0); const mic_f v1_t0 = gather_4f_zlc(and_mask, (float*)&tptr[0].t0.v1, (float*)&tptr[1].t0.v1, (float*)&tptr[2].t0.v1, (float*)&tptr[3].t0.v1); const mic_f v2_t0 = gather_4f_zlc(and_mask, (float*)&tptr[0].t0.v2, (float*)&tptr[1].t0.v2, (float*)&tptr[2].t0.v2, (float*)&tptr[3].t0.v2); prefetch<PFHINT_L2>((mic_f*)tptr + 4); prefetch<PFHINT_L2>((mic_f*)tptr + 5); prefetch<PFHINT_L2>((mic_f*)tptr + 6); prefetch<PFHINT_L2>((mic_f*)tptr + 7); const mic_f v0_t1 = gather_4f_zlc(and_mask, (float*)&tptr[0].t1.v0, (float*)&tptr[1].t1.v0, (float*)&tptr[2].t1.v0, (float*)&tptr[3].t1.v0); const mic_f v1_t1 = gather_4f_zlc(and_mask, (float*)&tptr[0].t1.v1, (float*)&tptr[1].t1.v1, (float*)&tptr[2].t1.v1, (float*)&tptr[3].t1.v1); const mic_f v2_t1 = gather_4f_zlc(and_mask, (float*)&tptr[0].t1.v2, (float*)&tptr[1].t1.v2, (float*)&tptr[2].t1.v2, (float*)&tptr[3].t1.v2); const mic_f v0 = v0_t0 * one_time + time * v0_t1; const mic_f v1 = v1_t0 * one_time + time * v1_t1; const mic_f v2 = v2_t0 * one_time + time * v2_t1; const mic_f e1 = v1 - v0; const mic_f e2 = v0 - v2; const mic_f normal = lcross_zxy(e1,e2); const mic_f org = v0 - org_xyz; const mic_f odzxy = msubr231(org * swizzle(dir_xyz,_MM_SWIZ_REG_DACB), dir_xyz, swizzle(org,_MM_SWIZ_REG_DACB)); const mic_f den = ldot3_zxy(dir_xyz,normal); const mic_f rcp_den = rcp(den); const mic_f uu = ldot3_zxy(e2,odzxy); const mic_f vv = ldot3_zxy(e1,odzxy); const mic_f u = uu * rcp_den; const mic_f v = vv * rcp_den; #if defined(__BACKFACE_CULLING__) const mic_m m_init = (mic_m)0x1111 & (den > zero); #else const mic_m m_init = 0x1111; #endif const mic_m valid_u = ge((mic_m)m_init,u,zero); const mic_m valid_v = ge(valid_u,v,zero); const mic_m m_aperture = le(valid_v,u+v,mic_f::one()); const mic_f nom = ldot3_zxy(org,normal); const mic_f t = rcp_den*nom; if (unlikely(none(m_aperture))) continue; mic_m m_final = lt(lt(m_aperture,min_dist_xyz,t),t,max_dist_xyz); #if defined(__USE_RAY_MASK__) const mic_i rayMask(ray16.mask[rayIndex]); const mic_i triMask = swDDDD(gather16i_4i_align(&tptr[0].t0.v2,&tptr[1].t0.v2,&tptr[2].t0.v2,&tptr[3].t0.v2)); const mic_m m_ray_mask = (rayMask & triMask) != mic_i::zero(); m_final &= m_ray_mask; #endif if (unlikely(any(m_final))) { terminated |= mic_m::shift1[rayIndex]; break; } ////////////////////////////////////////////////////////////////////////////////////////////////// } if (unlikely(all(toMask(terminated)))) break; } store16i(m_valid & toMask(terminated),&ray16.geomID,0); }
GidLookupHelper<T, lno_t>::GidLookupHelper(): env_(rcp(new Environment)), gidList_(), useHashTable_(false), indexMap_(), indexHash_() {}
void CubeTetMeshFactory::buildTetsOnHex(const Teuchos::Tuple<int,3> & meshDesc, const Teuchos::Tuple<int,3> & element, stk::mesh::Part * block, const std::vector<stk::mesh::EntityId> & h_nodes, STK_Interface & mesh) const { Teuchos::FancyOStream out(Teuchos::rcpFromRef(std::cout)); out.setShowProcRank(true); out.setOutputToRootOnly(-1); int totalXElems = meshDesc[0]; int totalYElems = meshDesc[1]; int totalZElems = meshDesc[2]; int nx = element[0]; int ny = element[1]; int nz = element[2]; stk::mesh::EntityId hex_id = totalXElems*totalYElems*nz+totalXElems*ny+nx+1; stk::mesh::EntityId gid_0 = 12*(hex_id-1)+1; std::vector<stk::mesh::EntityId> nodes(4); // add centroid node stk::mesh::EntityId centroid = 0; { stk::mesh::EntityId largestNode = (totalXElems+1)*(totalYElems+1)*(totalZElems+1); centroid = hex_id+largestNode; // compute average of coordinates std::vector<double> coord(3,0.0); for(std::size_t i=0;i<h_nodes.size();i++) { const double * node_coord = mesh.getNodeCoordinates(h_nodes[i]); coord[0] += node_coord[0]; coord[1] += node_coord[1]; coord[2] += node_coord[2]; } coord[0] /= 8.0; coord[1] /= 8.0; coord[2] /= 8.0; mesh.addNode(centroid,coord); } // int idSet[][3] = { { 0, 1, 2}, // back { 0, 2, 3}, { 0, 5, 1}, // bottom { 0, 4, 5}, { 0, 7, 4}, // left { 0, 3, 7}, { 6, 1, 5}, // right { 6, 2, 1}, { 6, 3, 2}, // top { 6, 7, 3}, { 6, 4, 7}, // front { 6, 5, 4} }; for(int i=0;i<12;i++) { nodes[0] = h_nodes[idSet[i][0]]; nodes[1] = h_nodes[idSet[i][1]]; nodes[2] = h_nodes[idSet[i][2]]; nodes[3] = centroid; // add element to mesh mesh.addElement(rcp(new ElementDescriptor(gid_0+i,nodes)),block); } }
RCP<HermiteInterpolator<Scalar> > hermiteInterpolator() { RCP<HermiteInterpolator<Scalar> > hi = rcp(new HermiteInterpolator<Scalar>() ); return hi; }
/** Return a ref count pointer to self */ virtual RCP<MeshTransformationBase> getRcp() {return rcp(this);}
inline RCP<Xpetra::CrsMatrixWrap<double,int,int,KDNT,KDKSO> > Convert_Epetra_CrsMatrix_ToXpetra_CrsMatrixWrap<double,int,int,KDNT,KDKSO > (RCP<Epetra_CrsMatrix> &epAB) { RCP<Xpetra::EpetraCrsMatrix> tmpC1 = rcp(new Xpetra::EpetraCrsMatrix(epAB)); RCP<Xpetra::CrsMatrix<double,int,int,KDNT,KDKSO> > tmpC2 = rcp_implicit_cast<Xpetra::CrsMatrix<double,int,int,KDNT,KDKSO> >(tmpC1); RCP<Xpetra::CrsMatrixWrap<double,int,int,KDNT,KDKSO> > tmpC3 = rcp(new Xpetra::CrsMatrixWrap<double,int,int,KDNT,KDKSO>(tmpC2)); return tmpC3; }