void KfieldBC<PHAL::AlbanyTraits::Jacobian, Traits>:: evaluateFields(typename Traits::EvalData dirichletWorkset) { Teuchos::RCP<Epetra_Vector> f = dirichletWorkset.f; Teuchos::RCP<Epetra_CrsMatrix> jac = dirichletWorkset.Jac; Teuchos::RCP<const Epetra_Vector> x = dirichletWorkset.x; RealType time = dirichletWorkset.current_time; const RealType j_coeff = dirichletWorkset.j_coeff; const std::vector<std::vector<int> >& nsNodes = dirichletWorkset.nodeSets->find(this->nodeSetID)->second; const std::vector<double*>& nsNodeCoords = dirichletWorkset.nodeSetCoords->find(this->nodeSetID)->second; RealType* matrixEntries; int* matrixIndices; int numEntries; RealType diag=j_coeff; bool fillResid = (f != Teuchos::null); int xlunk, ylunk; // local indicies into unknown vector double* coord; ScalarT Xval, Yval; for (unsigned int inode = 0; inode < nsNodes.size(); inode++) { xlunk = nsNodes[inode][0]; ylunk = nsNodes[inode][1]; coord = nsNodeCoords[inode]; this->computeBCs(coord, Xval, Yval, time); // replace jac values for the X dof jac->ExtractMyRowView(xlunk, numEntries, matrixEntries, matrixIndices); for (int i=0; i<numEntries; i++) matrixEntries[i]=0; jac->ReplaceMyValues(xlunk, 1, &diag, &xlunk); // replace jac values for the y dof jac->ExtractMyRowView(ylunk, numEntries, matrixEntries, matrixIndices); for (int i=0; i<numEntries; i++) matrixEntries[i]=0; jac->ReplaceMyValues(ylunk, 1, &diag, &ylunk); if (fillResid) { (*f)[xlunk] = ((*x)[xlunk] - Xval.val()); (*f)[ylunk] = ((*x)[ylunk] - Yval.val()); } } }
int main(int argc, char *argv[]) { // Initialize MPI #ifdef HAVE_MPI MPI_Init(&argc,&argv); #endif // Create a communicator for Epetra objects #ifdef HAVE_MPI Epetra_MpiComm Comm( MPI_COMM_WORLD ); #else Epetra_SerialComm Comm; #endif int * testInt = new int[100]; delete [] testInt; bool verbose = false; if (argc > 1) if (argv[1][0]=='-' && argv[1][1]=='v') verbose = true; // Get the process ID and the total number of processors int MyPID = Comm.MyPID(); int NumProc = Comm.NumProc(); // Set up theolver options parameter list Teuchos::RCP<Teuchos::ParameterList> noxParamsPtr = Teuchos::rcp(new Teuchos::ParameterList); Teuchos::ParameterList & noxParams = *(noxParamsPtr.get()); // Set up the printing utilities // Only print output if the "-v" flag is set on the command line Teuchos::ParameterList& printParams = noxParams.sublist("Printing"); printParams.set("MyPID", MyPID); printParams.set("Output Precision", 5); printParams.set("Output Processor", 0); if( verbose ) printParams.set("Output Information", NOX::Utils::OuterIteration + NOX::Utils::OuterIterationStatusTest + NOX::Utils::InnerIteration + NOX::Utils::Parameters + NOX::Utils::Details + NOX::Utils::Warning + NOX::Utils::TestDetails); else printParams.set("Output Information", NOX::Utils::Error + NOX::Utils::TestDetails); Teuchos::RCP<NOX::Utils> printing = Teuchos::rcp( new NOX::Utils(printParams) ); // Identify the test problem if (printing->isPrintType(NOX::Utils::TestDetails)) printing->out() << "Starting epetra/NOX_Operators/NOX_BroydenOp.exe" << std::endl; // Identify processor information #ifdef HAVE_MPI if (printing->isPrintType(NOX::Utils::TestDetails)) { printing->out() << "Parallel Run" << std::endl; printing->out() << "Number of processors = " << NumProc << std::endl; printing->out() << "Print Process = " << MyPID << std::endl; } Comm.Barrier(); if (printing->isPrintType(NOX::Utils::TestDetails)) printing->out() << "Process " << MyPID << " is alive!" << std::endl; Comm.Barrier(); #else if (printing->isPrintType(NOX::Utils::TestDetails)) printing->out() << "Serial Run" << std::endl; #endif int status = 0; // Create a TestCompare class NOX::Epetra::TestCompare tester( printing->out(), *printing); double abstol = 1.e-4; double reltol = 1.e-4 ; // Test NOX::Epetra::BroydenOperator int numGlobalElems = 3 * NumProc; Epetra_Map broydenRowMap ( numGlobalElems, 0, Comm ); Epetra_Vector broydenWorkVec ( broydenRowMap ); Epetra_CrsGraph broydenWorkGraph( Copy, broydenRowMap, 0 ); std::vector<int> globalIndices(3); for( int lcol = 0; lcol < 3; ++lcol ) globalIndices[lcol] = 3 * MyPID + lcol; std::vector<int> myGlobalIndices(2); // Row 1 structure myGlobalIndices[0] = globalIndices[0]; myGlobalIndices[1] = globalIndices[2]; broydenWorkGraph.InsertGlobalIndices( globalIndices[0], 2, &myGlobalIndices[0] ); // Row 2 structure myGlobalIndices[0] = globalIndices[0]; myGlobalIndices[1] = globalIndices[1]; broydenWorkGraph.InsertGlobalIndices( globalIndices[1], 2, &myGlobalIndices[0] ); // Row 3 structure myGlobalIndices[0] = globalIndices[1]; myGlobalIndices[1] = globalIndices[2]; broydenWorkGraph.InsertGlobalIndices( globalIndices[2], 2, &myGlobalIndices[0] ); broydenWorkGraph.FillComplete(); Teuchos::RCP<Epetra_CrsMatrix> broydenWorkMatrix = Teuchos::rcp( new Epetra_CrsMatrix( Copy, broydenWorkGraph ) ); // Create an identity matrix broydenWorkVec.PutScalar(1.0); broydenWorkMatrix->ReplaceDiagonalValues(broydenWorkVec); NOX::Epetra::BroydenOperator broydenOp( noxParams, printing, broydenWorkVec, broydenWorkMatrix, true ); broydenWorkVec[0] = 1.0; broydenWorkVec[1] = -1.0; broydenWorkVec[2] = 2.0; broydenOp.setStepVector( broydenWorkVec ); broydenWorkVec[0] = 2.0; broydenWorkVec[1] = 1.0; broydenWorkVec[2] = 3.0; broydenOp.setYieldVector( broydenWorkVec ); broydenOp.computeSparseBroydenUpdate(); // Create the gold matrix for comparison Teuchos::RCP<Epetra_CrsMatrix> goldMatrix = Teuchos::rcp( new Epetra_CrsMatrix( Copy, broydenWorkGraph ) ); int numCols ; double * values ; // Row 1 answers goldMatrix->ExtractMyRowView( 0, numCols, values ); values[0] = 6.0 ; values[1] = 2.0 ; // Row 2 answers goldMatrix->ExtractMyRowView( 1, numCols, values ); values[0] = 5.0 ; values[1] = 0.0 ; // Row 3 structure goldMatrix->ExtractMyRowView( 2, numCols, values ); values[0] = -1.0 ; values[1] = 7.0 ; goldMatrix->Scale(0.2); status += tester.testCrsMatrices( broydenOp.getBroydenMatrix(), *goldMatrix, reltol, abstol, "Broyden Sparse Operator Update Test" ); // Now try a dense Broyden Update Epetra_CrsGraph broydenWorkGraph2( Copy, broydenRowMap, 0 ); myGlobalIndices.resize(3); // All Rowsstructure myGlobalIndices[0] = globalIndices[0]; myGlobalIndices[1] = globalIndices[1]; myGlobalIndices[2] = globalIndices[2]; broydenWorkGraph2.InsertGlobalIndices( globalIndices[0], 3, &myGlobalIndices[0] ); broydenWorkGraph2.InsertGlobalIndices( globalIndices[1], 3, &myGlobalIndices[0] ); broydenWorkGraph2.InsertGlobalIndices( globalIndices[2], 3, &myGlobalIndices[0] ); broydenWorkGraph2.FillComplete(); Teuchos::RCP<Epetra_CrsMatrix> broydenWorkMatrix2 = Teuchos::rcp( new Epetra_CrsMatrix( Copy, broydenWorkGraph2 ) ); // Create an identity matrix broydenWorkVec.PutScalar(1.0); broydenWorkMatrix2->ReplaceDiagonalValues(broydenWorkVec); NOX::Epetra::BroydenOperator broydenOp2( noxParams, printing, broydenWorkVec, broydenWorkMatrix2, true ); broydenWorkVec[0] = 1.0; broydenWorkVec[1] = -1.0; broydenWorkVec[2] = 2.0; broydenOp2.setStepVector( broydenWorkVec ); broydenWorkVec[0] = 2.0; broydenWorkVec[1] = 1.0; broydenWorkVec[2] = 3.0; broydenOp2.setYieldVector( broydenWorkVec ); broydenOp2.computeSparseBroydenUpdate(); // Create the gold matrix for comparison Teuchos::RCP<Epetra_CrsMatrix> goldMatrix2 = Teuchos::rcp( new Epetra_CrsMatrix( Copy, broydenWorkGraph2 ) ); // Row 1 answers goldMatrix2->ExtractMyRowView( 0, numCols, values ); values[0] = 7.0 ; values[1] = -1.0 ; values[2] = 2.0 ; // Row 2 answers goldMatrix2->ExtractMyRowView( 1, numCols, values ); values[0] = 2.0 ; values[1] = 4.0 ; values[2] = 4.0 ; // Row 3 structure goldMatrix2->ExtractMyRowView( 2, numCols, values ); values[0] = 1.0 ; values[1] = -1.0 ; values[2] = 8.0 ; double scaleF = 1.0 / 6.0; goldMatrix2->Scale( scaleF ); status += tester.testCrsMatrices( broydenOp2.getBroydenMatrix(), *goldMatrix2, reltol, abstol, "Broyden Sparse Operator Update Test (Dense)" ); // Now test the ability to remove active entries in the Broyden update Epetra_CrsGraph inactiveGraph( Copy, broydenRowMap, 0 ); // Row 1 structure inactiveGraph.InsertGlobalIndices( globalIndices[0], 1, &myGlobalIndices[1] ); // Row 2 structure inactiveGraph.InsertGlobalIndices( globalIndices[1], 1, &myGlobalIndices[2] ); // Row 3 structure inactiveGraph.InsertGlobalIndices( globalIndices[2], 1, &myGlobalIndices[0] ); inactiveGraph.FillComplete(); // Inactivate entries in dense matrix to arrive again at the original sparse structure broydenOp2.removeEntriesFromBroydenUpdate( inactiveGraph ); #ifdef HAVE_NOX_DEBUG if( verbose ) broydenOp2.outputActiveEntries(); #endif // Reset to the identity matrix broydenOp2.resetBroydenMatrix( *broydenWorkMatrix2 ); // Step and Yield vectors are already set broydenOp2.computeSparseBroydenUpdate(); status += tester.testCrsMatrices( broydenOp2.getBroydenMatrix(), *goldMatrix, reltol, abstol, "Broyden Sparse Operator Update Test (Entry Removal)", false ); // Summarize test results if( status == 0 ) printing->out() << "Test passed!" << std::endl; else printing->out() << "Test failed!" << std::endl; #ifdef HAVE_MPI MPI_Finalize(); #endif // Final return value (0 = successfull, non-zero = failure) return status; }
void panzer::ScatterDirichletResidual_BlockedEpetra<panzer::Traits::Jacobian, TRAITS,LO,GO>:: evaluateFields(typename TRAITS::EvalData workset) { using Teuchos::RCP; using Teuchos::ArrayRCP; using Teuchos::ptrFromRef; using Teuchos::rcp_dynamic_cast; using Thyra::VectorBase; using Thyra::SpmdVectorBase; using Thyra::ProductVectorBase; using Thyra::BlockedLinearOpBase; typedef BlockedEpetraLinearObjContainer BLOC; std::vector<std::pair<int,GO> > GIDs; std::vector<int> LIDs; // for convenience pull out some objects from workset std::string blockId = this->wda(workset).block_id; const std::vector<std::size_t> & localCellIds = this->wda(workset).cell_local_ids; RCP<const BLOC> blockedContainer = blockedContainer_; RCP<ProductVectorBase<double> > r = rcp_dynamic_cast<ProductVectorBase<double> >(blockedContainer->get_f()); Teuchos::RCP<BlockedLinearOpBase<double> > Jac = rcp_dynamic_cast<BlockedLinearOpBase<double> >(blockedContainer->get_A()); int numFieldBlocks = globalIndexer_->getNumFieldBlocks(); std::vector<int> blockOffsets(numFieldBlocks+1); // number of fields, plus a sentinnel for(int blk=0; blk<numFieldBlocks; blk++) { int blockOffset = globalIndexer_->getBlockGIDOffset(blockId,blk); blockOffsets[blk] = blockOffset; } std::unordered_map<std::pair<int,int>,Teuchos::RCP<Epetra_CrsMatrix>,panzer::pair_hash> jacEpetraBlocks; // NOTE: A reordering of these loops will likely improve performance // The "getGIDFieldOffsets may be expensive. However the // "getElementGIDs" can be cheaper. However the lookup for LIDs // may be more expensive! // scatter operation for each cell in workset for(std::size_t worksetCellIndex=0; worksetCellIndex<localCellIds.size(); ++worksetCellIndex) { std::size_t cellLocalId = localCellIds[worksetCellIndex]; globalIndexer_->getElementGIDs(cellLocalId,GIDs); blockOffsets[numFieldBlocks] = GIDs.size(); // caculate the local IDs for this element LIDs.resize(GIDs.size()); for(std::size_t i=0; i<GIDs.size(); i++) { // used for doing local ID lookups RCP<const Epetra_Map> r_map = blockedContainer->getMapForBlock(GIDs[i].first); LIDs[i] = r_map->LID(GIDs[i].second); } std::vector<bool> is_owned(GIDs.size(), false); globalIndexer_->ownedIndices(GIDs,is_owned); // loop over each field to be scattered Teuchos::ArrayRCP<double> local_r, local_dc; for(std::size_t fieldIndex = 0; fieldIndex < scatterFields_.size(); fieldIndex++) { int fieldNum = fieldIds_[fieldIndex]; int blockRowIndex = globalIndexer_->getFieldBlock(fieldNum); RCP<SpmdVectorBase<double> > dc = rcp_dynamic_cast<SpmdVectorBase<double> >(dirichletCounter_->getNonconstVectorBlock(blockRowIndex)); dc->getNonconstLocalData(ptrFromRef(local_dc)); // grab local data for inputing RCP<SpmdVectorBase<double> > block_r = rcp_dynamic_cast<SpmdVectorBase<double> >(r->getNonconstVectorBlock(blockRowIndex)); block_r->getNonconstLocalData(ptrFromRef(local_r)); // this call "should" get the right ordering according to the Intrepid basis const std::pair<std::vector<int>,std::vector<int> > & indicePair = globalIndexer_->getGIDFieldOffsets_closure(blockId,fieldNum, side_subcell_dim_, local_side_id_); const std::vector<int> & elmtOffset = indicePair.first; const std::vector<int> & basisIdMap = indicePair.second; // loop over basis functions for(std::size_t basis=0; basis<elmtOffset.size(); basis++) { int offset = elmtOffset[basis]; int lid = LIDs[offset]; if(lid<0) // not on this processor continue; int basisId = basisIdMap[basis]; if (checkApplyBC_) if (!applyBC_[fieldIndex](worksetCellIndex,basisId)) continue; // zero out matrix row for(int blockColIndex=0; blockColIndex<numFieldBlocks; blockColIndex++) { int start = blockOffsets[blockColIndex]; int end = blockOffsets[blockColIndex+1]; if(end-start<=0) continue; // check hash table for jacobian sub block std::pair<int,int> blockIndex = std::make_pair(blockRowIndex,blockColIndex); Teuchos::RCP<Epetra_CrsMatrix> subJac = jacEpetraBlocks[blockIndex]; // if you didn't find one before, add it to the hash table if(subJac==Teuchos::null) { Teuchos::RCP<Thyra::LinearOpBase<double> > tOp = Jac->getNonconstBlock(blockIndex.first,blockIndex.second); // block operator is null, don't do anything (it is excluded) if(Teuchos::is_null(tOp)) continue; Teuchos::RCP<Epetra_Operator> eOp = Thyra::get_Epetra_Operator(*tOp); subJac = rcp_dynamic_cast<Epetra_CrsMatrix>(eOp,true); jacEpetraBlocks[blockIndex] = subJac; } int numEntries = 0; int * rowIndices = 0; double * rowValues = 0; subJac->ExtractMyRowView(lid,numEntries,rowValues,rowIndices); for(int i=0; i<numEntries; i++) rowValues[i] = 0.0; } const ScalarT scatterField = (scatterFields_[fieldIndex])(worksetCellIndex,basisId); local_r[lid] = scatterField.val(); local_dc[lid] = 1.0; // mark row as dirichlet // loop over the sensitivity indices: all DOFs on a cell std::vector<double> jacRow(scatterField.size(),0.0); for(int sensIndex=0; sensIndex<scatterField.size(); ++sensIndex) jacRow[sensIndex] = scatterField.fastAccessDx(sensIndex); TEUCHOS_ASSERT(jacRow.size()==GIDs.size()); for(int blockColIndex=0; blockColIndex<numFieldBlocks; blockColIndex++) { int start = blockOffsets[blockColIndex]; int end = blockOffsets[blockColIndex+1]; if(end-start<=0) continue; // check hash table for jacobian sub block std::pair<int,int> blockIndex = std::make_pair(blockRowIndex,blockColIndex); Teuchos::RCP<Epetra_CrsMatrix> subJac = jacEpetraBlocks[blockIndex]; // if you didn't find one before, add it to the hash table if(subJac==Teuchos::null) { Teuchos::RCP<Thyra::LinearOpBase<double> > tOp = Jac->getNonconstBlock(blockIndex.first,blockIndex.second); // block operator is null, don't do anything (it is excluded) if(Teuchos::is_null(tOp)) continue; Teuchos::RCP<Epetra_Operator> eOp = Thyra::get_Epetra_Operator(*tOp); subJac = rcp_dynamic_cast<Epetra_CrsMatrix>(eOp,true); jacEpetraBlocks[blockIndex] = subJac; } // Sum Jacobian int err = subJac->ReplaceMyValues(lid, end-start, &jacRow[start],&LIDs[start]); if(err!=0) { std::stringstream ss; ss << "Failed inserting row: " << GIDs[offset].second << " (" << lid << "): "; for(int i=start; i<end; i++) ss << GIDs[i].second << " (" << LIDs[i] << ") "; ss << std::endl; ss << "Into block " << blockRowIndex << ", " << blockColIndex << std::endl; ss << "scatter field = "; scatterFields_[fieldIndex].print(ss); ss << std::endl; TEUCHOS_TEST_FOR_EXCEPTION(err!=0,std::runtime_error,ss.str()); } } } } } }
static int run_test(Teuchos::RCP<Epetra_CrsMatrix> matrix, bool verbose, // display the graph before & after bool contract, // set global number of partitions to 1/2 num procs int partitioningType, // hypergraph or graph partitioning, or simple int vertexWeightType, // use vertex weights? int edgeWeightType, // use edge/hyperedge weights? int objectType) // use isorropia's CrsMatrix or CrsGraph { int rc=0, fail = 0; #ifdef HAVE_EPETRAEXT int localProc = 0; double balance1, balance2, cutn1, cutn2, cutl1, cutl2; double balance3, cutn3, cutl3; double cutWgt1, cutWgt2, cutWgt3; int numCuts1, numCuts2, numCuts3, valid; int numPartitions = 0; int keepDenseEdges = 0; int numProcs = 1; #ifdef HAVE_MPI const Epetra_MpiComm &Comm = dynamic_cast<const Epetra_MpiComm &>(matrix->Comm()); localProc = Comm.MyPID(); numProcs = Comm.NumProc(); #else const Epetra_SerialComm &Comm = dynamic_cast<const Epetra_SerialComm &>(matrix->Comm()); #endif int numRows = matrix->NumGlobalRows(); if (numRows < (numProcs * 100)){ // By default Zoltan throws out dense edges, defined as those // whose number of non-zeros exceeds 25% of the number of vertices. // // If dense edges are thrown out of a small matrix, there may be nothing left. keepDenseEdges = 1; } double myShareBefore = 1.0 / numProcs; double myShare = myShareBefore; if (contract){ numPartitions = numProcs / 2; if (numPartitions > numRows) numPartitions = numRows; if (numPartitions > 0){ if (localProc < numPartitions){ myShare = 1.0 / numPartitions; } else{ myShare = 0.0; } } else{ contract = 0; } } // If we want Zoltan's or Isorropia's default weights, then we don't // need to supply a CostDescriber object to createBalancedCopy, // so we get to test the API functions that don't take a CostDescriber. bool noCosts = ((vertexWeightType == NO_APPLICATION_SUPPLIED_WEIGHTS) && (edgeWeightType == NO_APPLICATION_SUPPLIED_WEIGHTS)); // Test the interface that has no parameters, if possible bool noParams = ((partitioningType == HYPERGRAPH_PARTITIONING) && // default, so requires no params (numPartitions == 0) && // >0 would require a parameter (keepDenseEdges == 0)); // >0 would require a parameter // Maps for original object const Epetra_Map &sourceRowMap = matrix->RowMap(); const Epetra_Map &sourceRangeMap = matrix->RangeMap(); // const Epetra_Map &sourceColMap = matrix->ColMap(); const Epetra_Map &sourceDomainMap = matrix->DomainMap(); int numCols = matrix->NumGlobalCols(); int nMyRows = sourceRowMap.NumMyElements(); int base = sourceRowMap.IndexBase(); // Compute vertex and edge weights Isorropia::Epetra::CostDescriber costs; Teuchos::RCP<Epetra_Vector> vptr; Teuchos::RCP<Epetra_CrsMatrix> eptr; Teuchos::RCP<Epetra_Vector> hyperEdgeWeights; if (edgeWeightType != NO_APPLICATION_SUPPLIED_WEIGHTS){ if (partitioningType == GRAPH_PARTITIONING){ // Create graph edge weights. eptr = Teuchos::rcp(new Epetra_CrsMatrix(*matrix)); if (vertexWeightType == SUPPLY_EQUAL_WEIGHTS){ eptr->PutScalar(1.0); // set all nonzeros to 1.0 } else{ int maxRowSize = eptr->MaxNumEntries(); double *newVal = NULL; if (maxRowSize > 0){ newVal = new double [maxRowSize]; for (int j=0; j<maxRowSize; j++){ newVal[j] = localProc + 1 + j; } } int numEntries; int *idx; double *val; for (int i=0; i<nMyRows; i++){ rc = eptr->ExtractMyRowView(i, numEntries, val, idx); for (int j=0; j<numEntries; j++){ val[j] = newVal[j]; } } if (newVal) delete [] newVal; } eptr->FillComplete(sourceDomainMap, sourceRangeMap); costs.setGraphEdgeWeights(eptr); } else{ // Create hyperedge weights. (Note that the list of hyperedges that a // process provides weights for has no relation to the columns // that it has non-zeroes for, or the rows that is has. Hypergraphs // in general are not square. Also more than one process can provide // a weight for the same edge. Zoltan combines the weights according // to the value of the PHG_EDGE_WEIGHT_OPERATION parameter. The default // for this parameter is to use the maximum edge weight provided by any // process for a given hyperedge.) Epetra_Map hyperEdgeMap(numCols, base, Comm); hyperEdgeWeights = Teuchos::rcp(new Epetra_Vector(hyperEdgeMap)); int *edgeGIDs = NULL; double *weights = NULL; int numHEweights = hyperEdgeMap.NumMyElements(); if (numHEweights){ edgeGIDs = new int [numHEweights]; weights = new double [numHEweights]; if (edgeWeightType == SUPPLY_EQUAL_WEIGHTS){ for (int i=0; i<numHEweights; i++){ edgeGIDs[i] = hyperEdgeMap.GID(i); weights[i] = 1.0; } } else{ int hiVolumeStart = matrix->NumGlobalCols() / 3; int hiVolumeEnd = hiVolumeStart * 2; for (int i=0; i<numHEweights; i++){ edgeGIDs[i] = hyperEdgeMap.GID(i); if ((edgeGIDs[i] < hiVolumeStart) || (edgeGIDs[i] >= hiVolumeEnd)){ weights[i] = 1.0; } else{ weights[i] = 3.0; } } } hyperEdgeWeights->ReplaceGlobalValues(numHEweights, weights, edgeGIDs); } if (weights){ delete [] weights; delete [] edgeGIDs; } costs.setHypergraphEdgeWeights(hyperEdgeWeights); } } bool need_importer = false; if ((vertexWeightType != NO_APPLICATION_SUPPLIED_WEIGHTS)){ need_importer = true; // to redistribute row weights double *val = NULL; if (nMyRows){ val = new double [nMyRows]; if (vertexWeightType == SUPPLY_EQUAL_WEIGHTS){ for (int i=0; i<nMyRows; i++){ val[i] = 1.0; } } else if (vertexWeightType == SUPPLY_UNEQUAL_WEIGHTS){ for (int i=0; i<nMyRows; i++){ val[i] = 1.0 + ((localProc+1) / 2); } } } vptr = Teuchos::rcp(new Epetra_Vector(Copy, sourceRowMap, val)); if (val) delete [] val; costs.setVertexWeights(vptr); } // Calculate partition quality metrics before calling Zoltan if (partitioningType == GRAPH_PARTITIONING){ rc = ispatest::compute_graph_metrics(matrix->Graph(), costs, myShare, balance1, numCuts1, cutWgt1, cutn1, cutl1); if (contract){ // balance wrt target of balancing weight over *all* procs rc = ispatest::compute_graph_metrics(matrix->Graph(), costs, myShareBefore, balance3, numCuts3, cutWgt3, cutn3, cutl3); } } else{ rc = ispatest::compute_hypergraph_metrics(matrix->Graph(), costs, myShare, balance1, cutn1, cutl1); if (contract){ // balance wrt target of balancing weight over *all* procs rc = ispatest::compute_hypergraph_metrics(matrix->Graph(), costs, myShareBefore, balance3, cutn3, cutl3); } } if (rc){ ERROREXIT((localProc==0), "Error in computing partitioning metrics") } Teuchos::ParameterList params; #ifdef HAVE_ISORROPIA_ZOLTAN if (!noParams){ // We're using Zoltan for partitioning and supplying // parameters, overriding defaults. Teuchos::ParameterList &sublist = params.sublist("Zoltan"); if (partitioningType == GRAPH_PARTITIONING){ params.set("PARTITIONING METHOD", "GRAPH"); sublist.set("GRAPH_PACKAGE", "PHG"); } else{ params.set("PARTITIONING METHOD", "HYPERGRAPH"); sublist.set("LB_APPROACH", "PARTITION"); sublist.set("PHG_CUT_OBJECTIVE", "CONNECTIVITY"); // "cutl" } if (keepDenseEdges){ // only throw out rows that have no zeroes, default is to // throw out if .25 or more of the columns are non-zero sublist.set("PHG_EDGE_SIZE_THRESHOLD", "1.0"); } if (numPartitions > 0){ // test #Partitions < #Processes std::ostringstream os; os << numPartitions; std::string s = os.str(); // sublist.set("NUM_GLOBAL_PARTS", s); params.set("NUM PARTS", s); } //sublist.set("DEBUG_LEVEL", "1"); // Zoltan will print out parameters //sublist.set("DEBUG_LEVEL", "5"); // proc 0 will trace Zoltan calls //sublist.set("DEBUG_MEMORY", "2"); // Zoltan will trace alloc & free } #else ERROREXIT((localProc==0), "Zoltan partitioning required but Zoltan not available.") #endif // Function scope values Teuchos::RCP<Epetra_Vector> newvwgts; Teuchos::RCP<Epetra_CrsMatrix> newewgts; // Function scope values required for LinearProblem Epetra_LinearProblem *problem = NULL; Epetra_Map *LHSmap = NULL; Epetra_MultiVector *RHS = NULL; Epetra_MultiVector *LHS = NULL; // Reference counted pointer to balanced object Epetra_CrsMatrix *matrixPtr=NULL; Epetra_CrsGraph *graphPtr=NULL; Epetra_RowMatrix *rowMatrixPtr=NULL; Epetra_LinearProblem *problemPtr=NULL; // Row map for balanced object const Epetra_BlockMap *targetBlockRowMap=NULL; // for input CrsGraph const Epetra_Map *targetRowMap=NULL; // for all other inputs // Column map for balanced object const Epetra_BlockMap *targetBlockColMap=NULL; // for input CrsGraph const Epetra_Map *targetColMap=NULL; // for all other inputs if (objectType == EPETRA_CRSMATRIX){ if (noParams && noCosts){ matrixPtr = Isorropia::Epetra::createBalancedCopy(*matrix); } else if (noCosts){ matrixPtr = Isorropia::Epetra::createBalancedCopy(*matrix, params); } targetRowMap = &(matrixPtr->RowMap()); targetColMap = &(matrixPtr->ColMap()); } else if (objectType == EPETRA_CRSGRAPH){ const Epetra_CrsGraph graph = matrix->Graph(); if (noParams && noCosts){ graphPtr = Isorropia::Epetra::createBalancedCopy(graph); } else if (noCosts){ graphPtr = Isorropia::Epetra::createBalancedCopy(graph, params); } targetBlockRowMap = &(graphPtr->RowMap()); targetBlockColMap = &(graphPtr->ColMap()); } else if (objectType == EPETRA_ROWMATRIX){ if (noParams && noCosts){ rowMatrixPtr = Isorropia::Epetra::createBalancedCopy(*matrix); } else if (noCosts){ rowMatrixPtr = Isorropia::Epetra::createBalancedCopy(*matrix, params); } targetRowMap = &(rowMatrixPtr->RowMatrixRowMap()); targetColMap = &(rowMatrixPtr->RowMatrixColMap()); } else if (objectType == EPETRA_LINEARPROBLEM){ // Create a linear problem with this matrix. LHSmap = new Epetra_Map(numCols, base, Comm); int myRHSsize = sourceRowMap.NumMyElements(); int myLHSsize = LHSmap->NumMyElements(); int valSize = ((myRHSsize > myLHSsize) ? myRHSsize : myLHSsize); double *vals = NULL; if (valSize){ vals = new double [valSize]; } if (valSize){ for (int i=0; i < valSize; i++){ // put my rank in my portion of LHS and my portion of RHS vals[i] = localProc; } } RHS = new Epetra_MultiVector(Copy, sourceRowMap, vals, 1, 1); LHS = new Epetra_MultiVector(Copy, *LHSmap, vals, 1, 1); if (valSize){ delete [] vals; } problem = new Epetra_LinearProblem(matrix.get(), LHS, RHS); Epetra_LinearProblem lp = *problem; if (lp.CheckInput()){ ERROREXIT((localProc==0), "Error creating a LinearProblem"); } if (noParams && noCosts){ problemPtr = Isorropia::Epetra::createBalancedCopy(lp); } else if (noCosts){ problemPtr = Isorropia::Epetra::createBalancedCopy(lp, params); } targetRowMap = &(problemPtr->GetMatrix()->RowMatrixRowMap()); targetColMap = &(problemPtr->GetMatrix()->RowMatrixColMap()); } // Redistribute the edge weights // Comment this out since we don't redistribute columns if (edgeWeightType != NO_APPLICATION_SUPPLIED_WEIGHTS){ if (partitioningType == GRAPH_PARTITIONING){ Epetra_Import *importer = NULL; if (objectType == EPETRA_CRSGRAPH){ newewgts = Teuchos::rcp(new Epetra_CrsMatrix(Copy, *graphPtr)); targetRowMap = &(newewgts->RowMap()); targetColMap = &(newewgts->ColMap()); } else{ newewgts = Teuchos::rcp(new Epetra_CrsMatrix(Copy, *targetRowMap, *targetColMap, 0)); } importer = new Epetra_Import(*targetRowMap, sourceRowMap); newewgts->Import(*eptr, *importer, Insert); newewgts->FillComplete(*targetColMap, *targetRowMap); costs.setGraphEdgeWeights(newewgts); } } // Redistribute the vertex weights if ((vertexWeightType != NO_APPLICATION_SUPPLIED_WEIGHTS)){ Epetra_Import *importer = NULL; if (objectType == EPETRA_CRSGRAPH){ newvwgts = Teuchos::rcp(new Epetra_Vector(*targetBlockRowMap)); importer = new Epetra_Import(*targetBlockRowMap, sourceRowMap); } else{ newvwgts = Teuchos::rcp(new Epetra_Vector(*targetRowMap)); importer = new Epetra_Import(*targetRowMap, sourceRowMap); } newvwgts->Import(*vptr, *importer, Insert); costs.setVertexWeights(newvwgts); } if (localProc == 0){ test_type(numPartitions, partitioningType, vertexWeightType, edgeWeightType, objectType); } if (verbose){ // Picture of problem before balancing if (objectType == EPETRA_LINEARPROBLEM){ ispatest::show_matrix("Before load balancing", *problem, Comm); } else{ ispatest::show_matrix("Before load balancing", matrix->Graph(), Comm); } // Picture of problem after balancing if (objectType == EPETRA_LINEARPROBLEM){ ispatest::show_matrix("After load balancing (x in Ax=b is not redistributed)", *problemPtr, Comm); } else if (objectType == EPETRA_ROWMATRIX){ ispatest::show_matrix("After load balancing", *rowMatrixPtr, Comm); } else if (objectType == EPETRA_CRSMATRIX){ ispatest::show_matrix("After load balancing", matrixPtr->Graph(), Comm); } else if (objectType == EPETRA_CRSGRAPH){ ispatest::show_matrix("After load balancing", *graphPtr, Comm); } } // After partitioning, recompute the metrics if (partitioningType == GRAPH_PARTITIONING){ if (objectType == EPETRA_LINEARPROBLEM){ rc = ispatest::compute_graph_metrics(*(problemPtr->GetMatrix()), costs, myShare, balance2, numCuts2, cutWgt2, cutn2, cutl2); } else if (objectType == EPETRA_ROWMATRIX){ rc = ispatest::compute_graph_metrics(*rowMatrixPtr, costs, myShare, balance2, numCuts2, cutWgt2, cutn2, cutl2); } else if (objectType == EPETRA_CRSMATRIX){ rc = ispatest::compute_graph_metrics(matrixPtr->Graph(), costs, myShare, balance2, numCuts2, cutWgt2, cutn2, cutl2); } else { rc = ispatest::compute_graph_metrics(*graphPtr, costs, myShare, balance2, numCuts2, cutWgt2, cutn2, cutl2); } } else{ if (objectType == EPETRA_LINEARPROBLEM){ rc = ispatest::compute_hypergraph_metrics(*(problemPtr->GetMatrix()), costs, myShare, balance2, cutn2, cutl2); } else if (objectType == EPETRA_ROWMATRIX){ rc = ispatest::compute_hypergraph_metrics(*rowMatrixPtr, costs, myShare, balance2, cutn2, cutl2); } else if (objectType == EPETRA_CRSMATRIX){ rc = ispatest::compute_hypergraph_metrics(matrixPtr->Graph(), costs, myShare, balance2, cutn2, cutl2); } else{ rc = ispatest::compute_hypergraph_metrics(*graphPtr, costs, myShare, balance2, cutn2, cutl2); } } if (rc){ ERROREXIT((localProc==0), "Error in computing partitioning metrics") } std::string why; if (partitioningType == GRAPH_PARTITIONING){ fail = (cutWgt2 > cutWgt1); why = "New weighted edge cuts are worse"; if (localProc == 0){ std::cout << "Before partitioning: Balance " << balance1 ; std::cout << " cutn " << cutn1 ; std::cout << " cutl " << cutl1 ; if (contract){ std::cout << " (wrt balancing over " << numPartitions << " partitions)" << std::endl; std::cout << "Before partitioning: Balance " << balance3 ; std::cout << " cutn " << cutn3 ; std::cout << " cutl " << cutl3 ; std::cout << " (wrt balancing over " << numProcs << " partitions)" ; } std::cout << std::endl; std::cout << " Total edge cuts: " << numCuts1; std::cout << " Total weighted edge cuts: " << cutWgt1 << std::endl; std::cout << "After partitioning: Balance " << balance2 ; std::cout << " cutn " << cutn2 ; std::cout << " cutl " << cutl2 << std::endl; std::cout << " Total edge cuts: " << numCuts2; std::cout << " Total weighted edge cuts: " << cutWgt2 << std::endl; } } else{ fail = (cutl2 > cutl1); why = "New cutl is worse"; if (localProc == 0){ std::cout << "Before partitioning: Balance " << balance1 ; std::cout << " cutn " << cutn1 ; std::cout << " cutl " << cutl1 ; if (contract){ std::cout << " (wrt balancing over " << numPartitions << " partitions)" << std::endl; std::cout << "Before partitioning: Balance " << balance3 ; std::cout << " cutn " << cutn3 ; std::cout << " cutl " << cutl3 ; std::cout << " (wrt balancing over " << numProcs << " partitions)" ; } std::cout << std::endl; std::cout << "After partitioning: Balance " << balance2 ; std::cout << " cutn " << cutn2 ; std::cout << " cutl " << cutl2 << std::endl; } } if (fail){ if (localProc == 0) std::cout << "ERROR: "+why << std::endl; } // Check that input matrix is valid. This test constructs an "x" // with the matrix->DomainMap() and a "y" with matrix->RangeMap() // and then calculates y = Ax. if (objectType == EPETRA_LINEARPROBLEM){ valid = ispatest::test_matrix_vector_multiply(*problemPtr); } else if (objectType == EPETRA_ROWMATRIX){ valid = ispatest::test_row_matrix_vector_multiply(*rowMatrixPtr); } else if (objectType == EPETRA_CRSMATRIX){ valid = ispatest::test_matrix_vector_multiply(*matrixPtr); } else{ valid = ispatest::test_matrix_vector_multiply(*graphPtr); } if (!valid){ if (localProc == 0) std::cout << "Rebalanced matrix is not a valid Epetra matrix" << std::endl; fail = 1; } else{ if (localProc == 0) std::cout << "Rebalanced matrix is a valid Epetra matrix" << std::endl; } if (localProc == 0) std::cout << std::endl; #else std::cout << "test_simple main: currently can only test " << "with Epetra and EpetraExt enabled." << std::endl; rc = -1; #endif return fail; }
void panzer::ScatterDirichletResidual_Epetra<panzer::Traits::Jacobian, Traits,LO,GO>:: evaluateFields(typename Traits::EvalData workset) { std::vector<GO> GIDs; std::vector<int> LIDs; // for convenience pull out some objects from workset std::string blockId = workset.block_id; const std::vector<std::size_t> & localCellIds = workset.cell_local_ids; Teuchos::RCP<const EpetraLinearObjContainer> epetraContainer = epetraContainer_; TEUCHOS_ASSERT(epetraContainer!=Teuchos::null); Teuchos::RCP<Epetra_Vector> r = epetraContainer->get_f(); Teuchos::RCP<Epetra_CrsMatrix> Jac = epetraContainer->get_A(); // NOTE: A reordering of these loops will likely improve performance // The "getGIDFieldOffsets may be expensive. However the // "getElementGIDs" can be cheaper. However the lookup for LIDs // may be more expensive! // scatter operation for each cell in workset for(std::size_t worksetCellIndex=0;worksetCellIndex<localCellIds.size();++worksetCellIndex) { std::size_t cellLocalId = localCellIds[worksetCellIndex]; globalIndexer_->getElementGIDs(cellLocalId,GIDs); if(r!=Teuchos::null) { // caculate the local IDs for this element LIDs.resize(GIDs.size()); for(std::size_t i=0;i<GIDs.size();i++) LIDs[i] = r->Map().LID(GIDs[i]); } else { // caculate the local IDs for this element LIDs.resize(GIDs.size()); for(std::size_t i=0;i<GIDs.size();i++) LIDs[i] = Jac->RowMap().LID(GIDs[i]); } // loop over each field to be scattered for(std::size_t fieldIndex = 0; fieldIndex < scatterFields_.size(); fieldIndex++) { int fieldNum = fieldIds_[fieldIndex]; // this call "should" get the right ordering accordint to the Intrepid basis const std::pair<std::vector<int>,std::vector<int> > & indicePair = globalIndexer_->getGIDFieldOffsets_closure(blockId,fieldNum, side_subcell_dim_, local_side_id_); const std::vector<int> & elmtOffset = indicePair.first; const std::vector<int> & basisIdMap = indicePair.second; // loop over basis functions for(std::size_t basis=0;basis<elmtOffset.size();basis++) { int offset = elmtOffset[basis]; int lid = LIDs[offset]; if(lid<0) // not on this processor continue; // zero out matrix row { int numEntries = 0; int * rowIndices = 0; double * rowValues = 0; Jac->ExtractMyRowView(lid,numEntries,rowValues,rowIndices); for(int i=0;i<numEntries;i++) { if(preserveDiagonal_) { if(lid!=rowIndices[i]) rowValues[i] = 0.0; } else rowValues[i] = 0.0; } } int basisId = basisIdMap[basis]; int gid = GIDs[offset]; const ScalarT & scatterField = (scatterFields_[fieldIndex])(worksetCellIndex,basisId); if(r!=Teuchos::null) (*r)[lid] = scatterField.val(); if(dirichletCounter_!=Teuchos::null) (*dirichletCounter_)[lid] = 1.0; // mark row as dirichlet // loop over the sensitivity indices: all DOFs on a cell std::vector<double> jacRow(scatterField.size(),0.0); if(!preserveDiagonal_) { // this is the default case for(int sensIndex=0;sensIndex<scatterField.size();++sensIndex) jacRow[sensIndex] = scatterField.fastAccessDx(sensIndex); TEUCHOS_ASSERT(jacRow.size()==GIDs.size()); int err = Jac->ReplaceGlobalValues(gid, scatterField.size(), &jacRow[0],&GIDs[0]); TEUCHOS_ASSERT(err==0); } } } } }
// helper routines bool SplitMatrix2x2(Teuchos::RCP<const Epetra_CrsMatrix> A, const Epetra_Map& A11rowmap, const Epetra_Map& A22rowmap, Teuchos::RCP<Epetra_CrsMatrix>& A11, Teuchos::RCP<Epetra_CrsMatrix>& A12, Teuchos::RCP<Epetra_CrsMatrix>& A21, Teuchos::RCP<Epetra_CrsMatrix>& A22) { if (A==Teuchos::null) { std::cout << "ERROR: SplitMatrix2x2: A==null on entry" << std::endl; return false; } const Epetra_Comm& Comm = A->Comm(); const Epetra_Map& A22map = A22rowmap; const Epetra_Map& A11map = A11rowmap; //----------------------------- create a parallel redundant map of A22map std::map<int,int> a22gmap; { std::vector<int> a22global(A22map.NumGlobalElements()); int count=0; for (int proc=0; proc<Comm.NumProc(); ++proc) { int length = 0; if (proc==Comm.MyPID()) { for (int i=0; i<A22map.NumMyElements(); ++i) { a22global[count+length] = A22map.GID(i); ++length; } } Comm.Broadcast(&length,1,proc); Comm.Broadcast(&a22global[count],length,proc); count += length; } if (count != A22map.NumGlobalElements()) { std::cout << "ERROR SplitMatrix2x2: mismatch in dimensions" << std::endl; return false; } // create the map for (int i=0; i<count; ++i) a22gmap[a22global[i]] = 1; a22global.clear(); } //--------------------------------------------------- create matrix A22 A22 = Teuchos::rcp(new Epetra_CrsMatrix(Copy,A22map,100)); { std::vector<int> a22gcindices(100); std::vector<double> a22values(100); for (int i=0; i<A->NumMyRows(); ++i) { const int grid = A->GRID(i); if (A22map.MyGID(grid)==false) continue; int numentries; double* values; int* cindices; int err = A->ExtractMyRowView(i,numentries,values,cindices); if (err) { std::cout << "ERROR: SplitMatrix2x2: A->ExtractMyRowView returned " << err << std::endl; return false; } if (numentries>(int)a22gcindices.size()) { a22gcindices.resize(numentries); a22values.resize(numentries); } int count=0; for (int j=0; j<numentries; ++j) { const int gcid = A->ColMap().GID(cindices[j]); // see whether we have gcid in a22gmap std::map<int,int>::iterator curr = a22gmap.find(gcid); if (curr==a22gmap.end()) continue; //std::cout << gcid << " "; a22gcindices[count] = gcid; a22values[count] = values[j]; ++count; } //std::cout << std::endl; fflush(stdout); // add this filtered row to A22 err = A22->InsertGlobalValues(grid,count,&a22values[0],&a22gcindices[0]); if (err<0) { std::cout << "ERROR: SplitMatrix2x2: A->InsertGlobalValues returned " << err << std::endl; return false; } } //for (int i=0; i<A->NumMyRows(); ++i) a22gcindices.clear(); a22values.clear(); } A22->FillComplete(); A22->OptimizeStorage(); //----------------------------------------------------- create matrix A11 A11 = Teuchos::rcp(new Epetra_CrsMatrix(Copy,A11map,100)); { std::vector<int> a11gcindices(100); std::vector<double> a11values(100); for (int i=0; i<A->NumMyRows(); ++i) { const int grid = A->GRID(i); if (A11map.MyGID(grid)==false) continue; int numentries; double* values; int* cindices; int err = A->ExtractMyRowView(i,numentries,values,cindices); if (err) { std::cout << "ERROR: SplitMatrix2x2: A->ExtractMyRowView returned " << err << std::endl; return false; } if (numentries>(int)a11gcindices.size()) { a11gcindices.resize(numentries); a11values.resize(numentries); } int count=0; for (int j=0; j<numentries; ++j) { const int gcid = A->ColMap().GID(cindices[j]); // see whether we have gcid as part of a22gmap std::map<int,int>::iterator curr = a22gmap.find(gcid); if (curr!=a22gmap.end()) continue; a11gcindices[count] = gcid; a11values[count] = values[j]; ++count; } err = A11->InsertGlobalValues(grid,count,&a11values[0],&a11gcindices[0]); if (err<0) { std::cout << "ERROR: SplitMatrix2x2: A->InsertGlobalValues returned " << err << std::endl; return false; } } // for (int i=0; i<A->NumMyRows(); ++i) a11gcindices.clear(); a11values.clear(); } A11->FillComplete(); A11->OptimizeStorage(); //---------------------------------------------------- create matrix A12 A12 = Teuchos::rcp(new Epetra_CrsMatrix(Copy,A11map,100)); { std::vector<int> a12gcindices(100); std::vector<double> a12values(100); for (int i=0; i<A->NumMyRows(); ++i) { const int grid = A->GRID(i); if (A11map.MyGID(grid)==false) continue; int numentries; double* values; int* cindices; int err = A->ExtractMyRowView(i,numentries,values,cindices); if (err) { std::cout << "ERROR: SplitMatrix2x2: A->ExtractMyRowView returned " << err << std::endl; return false; } if (numentries>(int)a12gcindices.size()) { a12gcindices.resize(numentries); a12values.resize(numentries); } int count=0; for (int j=0; j<numentries; ++j) { const int gcid = A->ColMap().GID(cindices[j]); // see whether we have gcid as part of a22gmap std::map<int,int>::iterator curr = a22gmap.find(gcid); if (curr==a22gmap.end()) continue; a12gcindices[count] = gcid; a12values[count] = values[j]; ++count; } err = A12->InsertGlobalValues(grid,count,&a12values[0],&a12gcindices[0]); if (err<0) { std::cout << "ERROR: SplitMatrix2x2: A->InsertGlobalValues returned " << err << std::endl; return false; } } // for (int i=0; i<A->NumMyRows(); ++i) a12values.clear(); a12gcindices.clear(); } A12->FillComplete(A22map,A11map); A12->OptimizeStorage(); //----------------------------------------------------------- create A21 A21 = Teuchos::rcp(new Epetra_CrsMatrix(Copy,A22map,100)); { std::vector<int> a21gcindices(100); std::vector<double> a21values(100); for (int i=0; i<A->NumMyRows(); ++i) { const int grid = A->GRID(i); if (A22map.MyGID(grid)==false) continue; int numentries; double* values; int* cindices; int err = A->ExtractMyRowView(i,numentries,values,cindices); if (err) { std::cout << "ERROR: SplitMatrix2x2: A->ExtractMyRowView returned " << err << std::endl; return false; } if (numentries>(int)a21gcindices.size()) { a21gcindices.resize(numentries); a21values.resize(numentries); } int count=0; for (int j=0; j<numentries; ++j) { const int gcid = A->ColMap().GID(cindices[j]); // see whether we have gcid as part of a22gmap std::map<int,int>::iterator curr = a22gmap.find(gcid); if (curr!=a22gmap.end()) continue; a21gcindices[count] = gcid; a21values[count] = values[j]; ++count; } err = A21->InsertGlobalValues(grid,count,&a21values[0],&a21gcindices[0]); if (err<0) { std::cout << "ERROR: SplitMatrix2x2: A->InsertGlobalValues returned " << err << std::endl; return false; } } // for (int i=0; i<A->NumMyRows(); ++i) a21values.clear(); a21gcindices.clear(); } A21->FillComplete(A11map,A22map); A21->OptimizeStorage(); //-------------------------------------------------------------- tidy up a22gmap.clear(); return true; }