Teuchos::RCP<Epetra_CrsGraph> sparse3Tensor2CrsGraph( const Stokhos::Sparse3Tensor<ordinal_type,value_type>& Cijk, const Epetra_BlockMap& map) { typedef Stokhos::Sparse3Tensor<ordinal_type,value_type> Cijk_type; // Graph to be created Teuchos::RCP<Epetra_CrsGraph> graph = Teuchos::rcp(new Epetra_CrsGraph(Copy, map, 0)); // Loop over Cijk entries including a non-zero in the graph at // indices (i,j) if there is any k for which Cijk is non-zero for (typename Cijk_type::k_iterator k_it=Cijk.k_begin(); k_it!=Cijk.k_end(); ++k_it) { for (typename Cijk_type::kj_iterator j_it = Cijk.j_begin(k_it); j_it != Cijk.j_end(k_it); ++j_it) { ordinal_type j = index(j_it); for (typename Cijk_type::kji_iterator i_it = Cijk.i_begin(j_it); i_it != Cijk.i_end(j_it); ++i_it) { ordinal_type i = index(i_it); graph->InsertGlobalIndices(i, 1, &j); } } } // Sort, remove redundencies, transform to local, ... graph->FillComplete(); return graph; }
Teuchos::RCP<Epetra_CrsGraph> sparse3Tensor2CrsGraph( const Stokhos::OrthogPolyBasis<ordinal_type,value_type>& basis, const Stokhos::Sparse3Tensor<ordinal_type,value_type>& Cijk, const Epetra_Comm& comm) { // Number of stochastic rows ordinal_type num_rows = basis.size(); // Replicated local map Epetra_LocalMap map(num_rows, 0, comm); // Graph to be created Teuchos::RCP<Epetra_CrsGraph> graph = Teuchos::rcp(new Epetra_CrsGraph(Copy, map, 0)); // Loop over Cijk entries including a non-zero in the graph at // indices (i,j) if there is any k for which Cijk is non-zero ordinal_type Cijk_size = Cijk.size(); for (ordinal_type k=0; k<Cijk_size; k++) { ordinal_type nj = Cijk.num_j(k); const Teuchos::Array<int>& j_indices = Cijk.Jindices(k); for (ordinal_type jj=0; jj<nj; jj++) { ordinal_type j = j_indices[jj]; const Teuchos::Array<int>& i_indices = Cijk.Iindices(k,jj); ordinal_type ni = i_indices.size(); for (ordinal_type ii=0; ii<ni; ii++) { ordinal_type i = i_indices[ii]; graph->InsertGlobalIndices(i, 1, &j); } } } // Sort, remove redundencies, transform to local, ... graph->FillComplete(); return graph; }
//============================================================================= int Amesos_Dscpack::PerformSymbolicFactorization() { ResetTimer(0); ResetTimer(1); MyPID_ = Comm().MyPID(); NumProcs_ = Comm().NumProc(); Epetra_RowMatrix *RowMatrixA = Problem_->GetMatrix(); if (RowMatrixA == 0) AMESOS_CHK_ERR(-1); const Epetra_Map& OriginalMap = RowMatrixA->RowMatrixRowMap() ; const Epetra_MpiComm& comm1 = dynamic_cast<const Epetra_MpiComm &> (Comm()); int numrows = RowMatrixA->NumGlobalRows(); int numentries = RowMatrixA->NumGlobalNonzeros(); Teuchos::RCP<Epetra_CrsGraph> Graph; Epetra_CrsMatrix* CastCrsMatrixA = dynamic_cast<Epetra_CrsMatrix*>(RowMatrixA); if (CastCrsMatrixA) { Graph = Teuchos::rcp(const_cast<Epetra_CrsGraph*>(&(CastCrsMatrixA->Graph())), false); } else { int MaxNumEntries = RowMatrixA->MaxNumEntries(); Graph = Teuchos::rcp(new Epetra_CrsGraph(Copy, OriginalMap, MaxNumEntries)); std::vector<int> Indices(MaxNumEntries); std::vector<double> Values(MaxNumEntries); for (int i = 0 ; i < RowMatrixA->NumMyRows() ; ++i) { int NumEntries; RowMatrixA->ExtractMyRowCopy(i, MaxNumEntries, NumEntries, &Values[0], &Indices[0]); for (int j = 0 ; j < NumEntries ; ++j) Indices[j] = RowMatrixA->RowMatrixColMap().GID(Indices[j]); int GlobalRow = RowMatrixA->RowMatrixRowMap().GID(i); Graph->InsertGlobalIndices(GlobalRow, NumEntries, &Indices[0]); } Graph->FillComplete(); } // // Create a replicated map and graph // std::vector<int> AllIDs( numrows ) ; for ( int i = 0; i < numrows ; i++ ) AllIDs[i] = i ; Epetra_Map ReplicatedMap( -1, numrows, &AllIDs[0], 0, Comm()); Epetra_Import ReplicatedImporter(ReplicatedMap, OriginalMap); Epetra_CrsGraph ReplicatedGraph( Copy, ReplicatedMap, 0 ); AMESOS_CHK_ERR(ReplicatedGraph.Import(*Graph, ReplicatedImporter, Insert)); AMESOS_CHK_ERR(ReplicatedGraph.FillComplete()); // // Convert the matrix to Ap, Ai // std::vector <int> Replicates(numrows); std::vector <int> Ap(numrows + 1); std::vector <int> Ai(EPETRA_MAX(numrows, numentries)); for( int i = 0 ; i < numrows; i++ ) Replicates[i] = 1; int NumEntriesPerRow ; int *ColIndices = 0 ; int Ai_index = 0 ; for ( int MyRow = 0; MyRow <numrows; MyRow++ ) { AMESOS_CHK_ERR( ReplicatedGraph.ExtractMyRowView( MyRow, NumEntriesPerRow, ColIndices ) ); Ap[MyRow] = Ai_index ; for ( int j = 0; j < NumEntriesPerRow; j++ ) { Ai[Ai_index] = ColIndices[j] ; Ai_index++; } } assert( Ai_index == numentries ) ; Ap[ numrows ] = Ai_index ; MtxConvTime_ = AddTime("Total matrix conversion time", MtxConvTime_, 0); ResetTimer(0); // // Call Dscpack Symbolic Factorization // int OrderCode = 2; std::vector<double> MyANonZ; NumLocalNonz = 0 ; GlobalStructNewColNum = 0 ; GlobalStructNewNum = 0 ; GlobalStructOwner = 0 ; LocalStructOldNum = 0 ; NumGlobalCols = 0 ; // MS // Have to define the maximum number of processes to be used // MS // This is only a suggestion as Dscpack uses a number of processes that is a power of 2 int NumGlobalNonzeros = GetProblem()->GetMatrix()->NumGlobalNonzeros(); int NumRows = GetProblem()->GetMatrix()->NumGlobalRows(); // optimal value for MaxProcs == -1 int OptNumProcs1 = 1+EPETRA_MAX( NumRows/10000, NumGlobalNonzeros/1000000 ); OptNumProcs1 = EPETRA_MIN(NumProcs_,OptNumProcs1 ); // optimal value for MaxProcs == -2 int OptNumProcs2 = (int)sqrt(1.0 * NumProcs_); if( OptNumProcs2 < 1 ) OptNumProcs2 = 1; // fix the value of MaxProcs switch (MaxProcs_) { case -1: MaxProcs_ = EPETRA_MIN(OptNumProcs1, NumProcs_); break; case -2: MaxProcs_ = EPETRA_MIN(OptNumProcs2, NumProcs_); break; case -3: MaxProcs_ = NumProcs_; break; } #if 0 if (MyDscRank>=0 && A_and_LU_built) { DSC_ReFactorInitialize(PrivateDscpackData_->MyDSCObject); } #endif // if ( ! A_and_LU_built ) { // DSC_End( PrivateDscpackData_->MyDSCObject ) ; // PrivateDscpackData_->MyDSCObject = DSC_Begin() ; // } // MS // here I continue with the old code... OverheadTime_ = AddTime("Total Amesos overhead time", OverheadTime_, 1); DscNumProcs = 1 ; int DscMax = DSC_Analyze( numrows, &Ap[0], &Ai[0], &Replicates[0] ); while ( DscNumProcs * 2 <=EPETRA_MIN( MaxProcs_, DscMax ) ) DscNumProcs *= 2 ; MyDscRank = -1; DSC_Open0( PrivateDscpackData_->MyDSCObject_, DscNumProcs, &MyDscRank, comm1.Comm()) ; NumLocalCols = 0 ; // This is for those processes not in the Dsc grid if ( MyDscRank >= 0 ) { assert( MyPID_ == MyDscRank ) ; AMESOS_CHK_ERR( DSC_Order ( PrivateDscpackData_->MyDSCObject_, OrderCode, numrows, &Ap[0], &Ai[0], &Replicates[0], &NumGlobalCols, &NumLocalStructs, &NumLocalCols, &NumLocalNonz, &GlobalStructNewColNum, &GlobalStructNewNum, &GlobalStructOwner, &LocalStructOldNum ) ) ; assert( NumGlobalCols == numrows ) ; assert( NumLocalCols == NumLocalStructs ) ; } if ( MyDscRank >= 0 ) { int MaxSingleBlock; const int Limit = 5000000 ; // Memory Limit set to 5 Terabytes AMESOS_CHK_ERR( DSC_SFactor ( PrivateDscpackData_->MyDSCObject_, &TotalMemory_, &MaxSingleBlock, Limit, DSC_LBLAS3, DSC_DBLAS2 ) ) ; } // A_and_LU_built = true; // If you uncomment this, TestOptions fails SymFactTime_ = AddTime("Total symbolic factorization time", SymFactTime_, 0); return(0); }
/* Computes the approximate Schur complement for the wide separator using guided probing*/ Teuchos::RCP<Epetra_CrsMatrix> computeSchur_GuidedProbing ( shylu_config *config, shylu_symbolic *ssym, // symbolic structure shylu_data *data, // numeric structure Epetra_Map *localDRowMap ) { int i; double relative_thres = config->relative_threshold; Epetra_CrsMatrix *G = ssym->G.getRawPtr(); Epetra_CrsMatrix *R = ssym->R.getRawPtr(); Epetra_LinearProblem *LP = ssym->LP.getRawPtr(); Amesos_BaseSolver *solver = ssym->Solver.getRawPtr(); Ifpack_Preconditioner *ifSolver = ssym->ifSolver.getRawPtr(); Epetra_CrsMatrix *C = ssym->C.getRawPtr(); // Need to create local G (block diagonal portion) , R, C // Get row map of G Epetra_Map CrMap = C->RowMap(); int *c_rows = CrMap.MyGlobalElements(); int *c_cols = (C->ColMap()).MyGlobalElements(); //int c_totalElems = CrMap.NumGlobalElements(); int c_localElems = CrMap.NumMyElements(); int c_localcolElems = (C->ColMap()).NumMyElements(); Epetra_Map GrMap = G->RowMap(); int *g_rows = GrMap.MyGlobalElements(); //int g_totalElems = GrMap.NumGlobalElements(); int g_localElems = GrMap.NumMyElements(); Epetra_Map RrMap = R->RowMap(); int *r_rows = RrMap.MyGlobalElements(); int *r_cols = (R->ColMap()).MyGlobalElements(); //int r_totalElems = RrMap.NumGlobalElements(); int r_localElems = RrMap.NumMyElements(); int r_localcolElems = (R->ColMap()).NumMyElements(); Epetra_SerialComm LComm; Epetra_Map C_localRMap (-1, c_localElems, c_rows, 0, LComm); Epetra_Map C_localCMap (-1, c_localcolElems, c_cols, 0, LComm); Epetra_Map G_localRMap (-1, g_localElems, g_rows, 0, LComm); Epetra_Map R_localRMap (-1, r_localElems, r_rows, 0, LComm); Epetra_Map R_localCMap (-1, r_localcolElems, r_cols, 0, LComm); //cout << "#local rows" << g_localElems << "#non zero local cols" << c_localcolElems << endl; #ifdef DEBUG cout << "DEBUG MODE" << endl; int nrows = C->RowMap().NumMyElements(); assert(nrows == localDRowMap->NumGlobalElements()); int gids[nrows], gids1[nrows]; C_localRMap.MyGlobalElements(gids); localDRowMap->MyGlobalElements(gids1); cout << "Comparing R's domain map with D's row map" << endl; for (int i = 0; i < nrows; i++) { assert(gids[i] == gids1[i]); } #endif int nentries1, gid; // maxentries is the maximum of all three possible matrices as the arrays // are reused between the three int maxentries = max(C->MaxNumEntries(), R->MaxNumEntries()); maxentries = max(maxentries, G->MaxNumEntries()); double *values1 = new double[maxentries]; double *values2 = new double[maxentries]; double *values3 = new double[maxentries]; int *indices1 = new int[maxentries]; int *indices2 = new int[maxentries]; int *indices3 = new int[maxentries]; //cout << "Creating local matrices" << endl; int err; Epetra_CrsMatrix localC(Copy, C_localRMap, C->MaxNumEntries(), false); for (i = 0; i < c_localElems ; i++) { gid = c_rows[i]; err = C->ExtractGlobalRowCopy(gid, maxentries, nentries1, values1, indices1); assert (err == 0); //if (nentries1 > 0) // TODO: Later //{ err = localC.InsertGlobalValues(gid, nentries1, values1, indices1); assert (err == 0); //} } localC.FillComplete(G_localRMap, C_localRMap); //cout << "Created local C matrix" << endl; Epetra_CrsMatrix localR(Copy, R_localRMap, R->MaxNumEntries(), false); for (i = 0; i < r_localElems ; i++) { gid = r_rows[i]; R->ExtractGlobalRowCopy(gid, maxentries, nentries1, values1, indices1); localR.InsertGlobalValues(gid, nentries1, values1, indices1); } localR.FillComplete(*localDRowMap, R_localRMap); //cout << "Created local R matrix" << endl; // Sbar - Approximate Schur complement Teuchos::RCP<Epetra_CrsMatrix> Sbar = Teuchos::rcp(new Epetra_CrsMatrix( Copy, GrMap, g_localElems)); // Include only the block diagonal elements of G in localG Epetra_CrsMatrix localG(Copy, G_localRMap, G->MaxNumEntries(), false); int cnt, scnt; for (i = 0; i < g_localElems ; i++) { gid = g_rows[i]; G->ExtractGlobalRowCopy(gid, maxentries, nentries1, values1, indices1); cnt = 0; scnt = 0; for (int j = 0 ; j < nentries1 ; j++) { if (G->LRID(indices1[j]) != -1) { values2[cnt] = values1[j]; indices2[cnt++] = indices1[j]; } else { // Add it to Sbar immediately values3[scnt] = values1[j]; indices3[scnt++] = indices1[j]; } } localG.InsertGlobalValues(gid, cnt, values2, indices2); Sbar->InsertGlobalValues(gid, scnt, values3, indices3); } localG.FillComplete(); cout << "Created local G matrix" << endl; int nvectors = 16; ShyLU_Probing_Operator probeop(config, ssym, &localG, &localR, LP, solver, ifSolver, &localC, localDRowMap, nvectors); #ifdef DUMP_MATRICES //ostringstream fnamestr; //fnamestr << "localC" << C->Comm().MyPID() << ".mat"; //string Cfname = fnamestr.str(); //EpetraExt::RowMatrixToMatlabFile(Cfname.c_str(), localC); //Epetra_Map defMapg(-1, g_localElems, 0, localG.Comm()); //EpetraExt::ViewTransform<Epetra_CrsMatrix> * ReIdx_MatTransg = //new EpetraExt::CrsMatrix_Reindex( defMapg ); //Epetra_CrsMatrix t2G = (*ReIdx_MatTransg)( localG ); //ReIdx_MatTransg->fwd(); //EpetraExt::RowMatrixToMatlabFile("localG.mat", t2G); #endif //cout << " totalElems in Schur Complement" << totalElems << endl; //cout << myPID << " localElems" << localElems << endl; // **************** Two collectives here ********************* #ifdef TIMING_OUTPUT Teuchos::Time ftime("setup time"); #endif #ifdef TIMING_OUTPUT Teuchos::Time app_time("setup time"); #endif Teuchos::RCP<Epetra_CrsGraph> lSGraph = Teuchos::RCP<Epetra_CrsGraph> ( new Epetra_CrsGraph(Copy, G_localRMap, maxentries)); if (data->num_compute % config->reset_iter == 0) { int nentries; // size > maxentries as there could be fill // TODO: Currently the size of the two arrays can be one, Even if we switch // the loop below the size of the array required is nvectors. Fix it double *values = new double[g_localElems]; int *indices = new int[g_localElems]; double *vecvalues; int dropped = 0; double *maxvalue = new double[nvectors]; #ifdef TIMING_OUTPUT ftime.start(); #endif int findex = g_localElems / nvectors ; int cindex; // int mypid = C->Comm().MyPID(); // unused Epetra_MultiVector probevec(G_localRMap, nvectors); Epetra_MultiVector Scol(G_localRMap, nvectors); for (i = 0 ; i < findex*nvectors ; i+=nvectors) { probevec.PutScalar(0.0); // TODO: Move it out for (int k = 0; k < nvectors; k++) { cindex = k+i; // TODO: Can do better than this, just need to go to the column map // of C, there might be null columns in C // Not much of use for Shasta 2x2 .. Later. probevec.ReplaceGlobalValue(g_rows[cindex], k, 1.0); //if (mypid == 0) //cout << "Changing row to 1.0 " << g_rows[cindex] << endl; } #ifdef TIMING_OUTPUT app_time.start(); #endif probeop.Apply(probevec, Scol); #ifdef TIMING_OUTPUT app_time.stop(); #endif Scol.MaxValue(maxvalue); for (int k = 0; k < nvectors; k++) //TODO:Need to switch these loops { cindex = k+i; vecvalues = Scol[k]; //cout << "MAX" << maxvalue << endl; for (int j = 0 ; j < g_localElems ; j++) { nentries = 0; // inserting one entry in each row for now if (g_rows[cindex] == g_rows[j]) // diagonal entry { values[nentries] = vecvalues[j]; indices[nentries] = g_rows[cindex]; nentries++; err = Sbar->InsertGlobalValues(g_rows[j], nentries, values, indices); assert(err >= 0); err = lSGraph->InsertGlobalIndices(g_rows[j], nentries, indices); assert(err >= 0); } else if (abs(vecvalues[j]/maxvalue[k]) > relative_thres) { values[nentries] = vecvalues[j]; indices[nentries] = g_rows[cindex]; nentries++; err = Sbar->InsertGlobalValues(g_rows[j], nentries, values, indices); assert(err >= 0); err = lSGraph->InsertGlobalIndices(g_rows[j], nentries, indices); assert(err >= 0); } else { if (vecvalues[j] != 0.0) { dropped++; //cout << "vecvalues[j]" << vecvalues[j] << // " max" << maxvalue[k] << endl; } } } } } probeop.ResetTempVectors(1); for ( ; i < g_localElems ; i++) { // TODO: Can move the next two decalarations outside the loop Epetra_MultiVector probevec(G_localRMap, 1); Epetra_MultiVector Scol(G_localRMap, 1); probevec.PutScalar(0.0); // TODO: Can do better than this, just need to go to the column map // of C, there might be null columns in C probevec.ReplaceGlobalValue(g_rows[i], 0, 1.0); #ifdef TIMING_OUTPUT app_time.start(); #endif probeop.Apply(probevec, Scol); #ifdef TIMING_OUTPUT app_time.stop(); #endif vecvalues = Scol[0]; Scol.MaxValue(maxvalue); //cout << "MAX" << maxvalue << endl; for (int j = 0 ; j < g_localElems ; j++) { nentries = 0; // inserting one entry in each row for now if (g_rows[i] == g_rows[j]) // diagonal entry { values[nentries] = vecvalues[j]; indices[nentries] = g_rows[i]; nentries++; err = Sbar->InsertGlobalValues(g_rows[j], nentries, values, indices); assert(err >= 0); err = lSGraph->InsertGlobalIndices(g_rows[j], nentries, indices); assert(err >= 0); } else if (abs(vecvalues[j]/maxvalue[0]) > relative_thres) { values[nentries] = vecvalues[j]; indices[nentries] = g_rows[i]; nentries++; err = Sbar->InsertGlobalValues(g_rows[j], nentries, values, indices); assert(err >= 0); err = lSGraph->InsertGlobalIndices(g_rows[j], nentries, indices); assert(err >= 0); } else { if (vecvalues[j] != 0.0) dropped++; } } } #ifdef TIMING_OUTPUT ftime.stop(); cout << "Time in finding and dropping entries" << ftime.totalElapsedTime() << endl; ftime.reset(); #endif #ifdef TIMING_OUTPUT cout << "Time in Apply of probing" << app_time.totalElapsedTime() << endl; #endif probeop.PrintTimingInfo(); Sbar->FillComplete(); lSGraph->FillComplete(); data->localSbargraph = lSGraph; #ifdef DUMP_MATRICES Epetra_Map defMap2(-1, g_localElems, 0, C->Comm()); EpetraExt::ViewTransform<Epetra_CrsMatrix> * ReIdx_MatTrans2 = new EpetraExt::CrsMatrix_Reindex( defMap2 ); Epetra_CrsMatrix t2S = (*ReIdx_MatTrans2)( *Sbar ); ReIdx_MatTrans2->fwd(); EpetraExt::RowMatrixToMatlabFile("Schur.mat", t2S); #endif cout << "#dropped entries" << dropped << endl; delete[] values; delete[] indices; delete[] maxvalue; } else { if (((data->num_compute-1) % config->reset_iter) == 0) { // We recomputed the Schur complement with dropping for the last // compute. Reset the prober with the new orthogonal vectors for // the Sbar from the previous iteration. Teuchos::ParameterList pList; Teuchos::RCP<Isorropia::Epetra::Prober> gprober = Teuchos::RCP<Isorropia::Epetra::Prober> (new Isorropia::Epetra::Prober( data->localSbargraph.getRawPtr(), pList, false)); gprober->color(); data->guided_prober = gprober; } // Use the prober to probe the probeop for the sparsity pattern // add that to Sbar and call Fill complete int nvectors = data->guided_prober->getNumOrthogonalVectors(); cout << "Number of Orthogonal Vectors for guided probing" << nvectors << endl; probeop.ResetTempVectors(nvectors); Teuchos::RCP<Epetra_CrsMatrix> blockdiag_Sbar = data->guided_prober->probe(probeop); int maxentries = blockdiag_Sbar->GlobalMaxNumEntries(); int *indices = new int[maxentries]; double *values = new double[maxentries]; int numentries; for (int i = 0; i < blockdiag_Sbar->NumGlobalRows() ; i++) { int gid = blockdiag_Sbar->GRID(i); blockdiag_Sbar->ExtractGlobalRowCopy(gid, maxentries, numentries, values, indices); Sbar->InsertGlobalValues(gid, numentries, values, indices); } Sbar->FillComplete(); delete[] indices; delete[] values; } delete[] values1; delete[] indices1; delete[] values2; delete[] indices2; delete[] values3; delete[] indices3; return Sbar; }
Teuchos::RCP<Epetra_CrsGraph> BlockAdjacencyGraph::compute( Epetra_CrsGraph& B, int nbrr, std::vector<int>&r, std::vector<double>& weights, bool verbose) { // Check if the graph is on one processor. int myMatProc = -1, matProc = -1; int myPID = B.Comm().MyPID(); for (int proc=0; proc<B.Comm().NumProc(); proc++) { if (B.NumGlobalEntries() == B.NumMyEntries()) myMatProc = myPID; } B.Comm().MaxAll( &myMatProc, &matProc, 1 ); if( matProc == -1) { cout << "FAIL for Global! All CrsGraph entries must be on one processor!\n"; abort(); } int i= 0, j = 0, k, l = 0, p, pm, q = -1, ns; int tree_height; int error = -1; /* error detected, possibly a problem with the input */ int nrr; /* number of rows in B */ int nzM = 0; /* number of edges in graph */ int m = 0; /* maximum number of nonzeros in any block row of B */ int* colstack = 0; /* stack used to process each block row */ int* bstree = 0; /* binary search tree */ std::vector<int> Mi, Mj, Mnum(nbrr+1,0); nrr = B.NumMyRows(); if ( matProc == myPID && verbose ) std::printf(" Matrix Size = %d Number of Blocks = %d\n",nrr, nbrr); else nrr = -1; /* Prevent processor from doing any computations */ bstree = csr_bst(nbrr); /* 0 : nbrr-1 */ tree_height = ceil31log2(nbrr) + 1; error = -1; l = 0; j = 0; m = 0; for( i = 0; i < nrr; i++ ){ if( i >= r[l+1] ){ ++l; /* new block row */ m = EPETRA_MAX(m,j) ; /* nonzeros in block row */ j = B.NumGlobalIndices(i); }else{ j += B.NumGlobalIndices(i); } } /* one more time for the final block */ m = EPETRA_MAX(m,j) ; /* nonzeros in block row */ colstack = (int*) malloc( EPETRA_MAX(m,1) * sizeof(int) ); // The compressed graph is actually computed twice, // due to concerns about memory limitations. First, // without memory allocation, just nzM is computed. // Next Mj is allocated. Then, the second time, the // arrays are actually populated. nzM = 0; q = -1; l = 0; int * indices; int numEntries; for( i = 0; i <= nrr; i++ ){ if( i >= r[l+1] ){ if( q > 0 ) std::qsort(colstack,q+1,sizeof(int),compare_ints); /* sort stack */ if( q >= 0 ) ns = 1; /* l, colstack[0] M */ for( j=1; j<=q ; j++ ){ /* delete copies */ if( colstack[j] > colstack[j-1] ) ++ns; } nzM += ns; /*M->p[l+1] = M->p[l] + ns;*/ ++l; q = -1; } if( i < nrr ){ B.ExtractMyRowView( i, numEntries, indices ); for( k = 0; k < numEntries; k++){ j = indices[k]; ns = 0; p = 0; while( (r[bstree[p]] > j) || (j >= r[bstree[p]+1]) ){ if( r[bstree[p]] > j){ p = 2*p+1; }else{ if( r[bstree[p]+1] <= j) p = 2*p+2; } ++ns; if( p > nbrr || ns > tree_height ) { error = j; std::printf("error: p %d nbrr %d ns %d %d\n",p,nbrr,ns,j); break; } } colstack[++q] = bstree[p]; } //if( error >-1 ){ std::printf("%d\n",error); break; } // p > nbrr is a fatal error that is ignored } } if ( matProc == myPID && verbose ) std::printf("nzM = %d \n", nzM ); Mi.resize( nzM ); Mj.resize( nzM ); q = -1; l = 0; pm = -1; for( i = 0; i <= nrr; i++ ){ if( i >= r[l+1] ){ if( q > 0 ) std::qsort(colstack,q+1,sizeof(colstack[0]),compare_ints); /* sort stack */ if( q >= 0 ){ Mi[++pm] = l; Mj[pm] = colstack[0]; } for( j=1; j<=q ; j++ ){ /* delete copies */ if( colstack[j] > colstack[j-1] ){ /* l, colstack[j] */ Mi[++pm] = l; Mj[pm] = colstack[j]; } } ++l; Mnum[l] = pm + 1; /* sparse row format: M->p[l+1] = M->p[l] + ns; */ q = -1; } if( i < nrr ){ B.ExtractMyRowView( i, numEntries, indices ); for( k = 0; k < numEntries; k++){ j = indices[k]; ns = 0; p = 0; while( (r[bstree[p]] > j) || (j >= r[bstree[p]+1]) ){ if( r[bstree[p]] > j){ p = 2*p+1; }else{ if( r[bstree[p]+1] <= j) p = 2*p+2; } ++ns; } colstack[++q] = bstree[p]; } } } if ( bstree ) free ( bstree ); if ( colstack ) free( colstack ); // Compute weights as number of rows in each block. weights.resize( nbrr ); for( l=0; l<nbrr; l++) weights[l] = r[l+1] - r[l]; // Compute Epetra_CrsGraph and return Teuchos::RCP<Epetra_Map> newMap; if ( matProc == myPID ) newMap = Teuchos::rcp( new Epetra_Map(nbrr, nbrr, 0, B.Comm() ) ); else newMap = Teuchos::rcp( new Epetra_Map( nbrr, 0, 0, B.Comm() ) ); Teuchos::RCP<Epetra_CrsGraph> newGraph = Teuchos::rcp( new Epetra_CrsGraph( Copy, *newMap, 0 ) ); for( l=0; l<newGraph->NumMyRows(); l++) { newGraph->InsertGlobalIndices( l, Mnum[l+1]-Mnum[l], &Mj[Mnum[l]] ); } newGraph->FillComplete(); return (newGraph); }
int extract_matrices ( Epetra_CrsMatrix *A, // i/p: A matrix shylu_symbolic *ssym, // symbolic structure shylu_data *data, // numeric structure, TODO: Required ? shylu_config *config, // i/p: library configuration bool insertValues // true implies values will be inserted and fill // complete will be called. false implies values // will be replaced. ) { Teuchos::RCP<Epetra_CrsMatrix> D = ssym->D; Teuchos::RCP<Epetra_CrsMatrix> C = ssym->C; Teuchos::RCP<Epetra_CrsMatrix> R = ssym->R; Teuchos::RCP<Epetra_CrsMatrix> G = ssym->G; Teuchos::RCP<Epetra_CrsGraph> Sg = ssym->Sg; int *DColElems = data->DColElems; int *gvals = data->gvals; double Sdiagfactor = config->Sdiagfactor; int *LeftIndex = new int[data->lmax]; double *LeftValues = new double[data->lmax]; int *RightIndex = new int[data->rmax]; double *RightValues = new double[data->rmax]; int err; int lcnt, rcnt ; int gcid; int gid; int *Ai; double *Ax; int nrows = A->RowMap().NumMyElements(); int *rows = A->RowMap().MyGlobalElements(); for (int i = 0; i < nrows ; i++) { int NumEntries; err = A->ExtractMyRowView(i, NumEntries, Ax, Ai); lcnt = 0; rcnt = 0; // Place the entry in the correct sub matrix, Works only for sym gid = rows[i]; int lcid; for (int j = 0 ; j < NumEntries ; j++) { // O(nnz) ! Careful what you do inside // Row permutation does not matter here gcid = A->GCID(Ai[j]); assert(gcid != -1); //Either in D or R if ((gvals[gid] != 1 && gvals[gcid] == 1) || (gvals[gid] == 1 && A->LRID(gcid) != -1 && gvals[gcid] == 1)) { assert(lcnt < data->lmax); if (insertValues) LeftIndex[lcnt] = gcid; else { //local column id lcid = (gvals[gid] == 1 ? D->LCID(gcid) : R->LCID(gcid)); assert(lcid != -1); LeftIndex[lcnt] = lcid; } LeftValues[lcnt++] = Ax[j]; } else { assert(rcnt < data->rmax); if (insertValues) RightIndex[rcnt] = gcid; else { //local column id lcid = (gvals[gid] == 1 ? C->LCID(gcid) : G->LCID(gcid)); assert(lcid != -1); RightIndex[rcnt] = lcid; } RightValues[rcnt++] = Ax[j]; } } if (gvals[gid] == 1) { // D or C row if (insertValues) { err = D->InsertGlobalValues(gid, lcnt, LeftValues, LeftIndex); assert(err == 0); err = C->InsertGlobalValues(gid, rcnt, RightValues, RightIndex); assert(err == 0); } else { err = D->ReplaceMyValues(D->LRID(gid), lcnt, LeftValues, LeftIndex); assert(err == 0); err = C->ReplaceMyValues(C->LRID(gid), rcnt, RightValues, RightIndex); assert(err == 0); } } else { // R or S row //assert(lcnt > 0); // TODO: Enable this once using narrow sep. if (insertValues) { assert(rcnt > 0); err = R->InsertGlobalValues(gid, lcnt, LeftValues, LeftIndex); assert(err == 0); err = G->InsertGlobalValues(gid, rcnt, RightValues, RightIndex); assert(err == 0); if (config->schurApproxMethod == 1) { err = Sg->InsertGlobalIndices(gid, rcnt, RightIndex); assert(err == 0); } } else { assert(rcnt > 0); err = R->ReplaceMyValues(R->LRID(gid), lcnt, LeftValues, LeftIndex); assert(err == 0); err = G->ReplaceMyValues(G->LRID(gid), rcnt, RightValues, RightIndex); assert(err == 0); } } } if (insertValues) { /* ------------- Create the maps for the DBBD form ------------------ */ Epetra_Map *DRowMap, *SRowMap, *DColMap; Epetra_SerialComm LComm; if (config->sep_type != 1) { DRowMap = new Epetra_Map(-1, data->Dnr, data->DRowElems, 0, A->Comm()); SRowMap = new Epetra_Map(-1, data->Snr, data->SRowElems, 0, A->Comm()); DColMap = new Epetra_Map(-1, data->Dnc, DColElems, 0, A->Comm()); } else { DRowMap = new Epetra_Map(-1, data->Dnr, data->DRowElems, 0, LComm); SRowMap = new Epetra_Map(-1, data->Snr, data->SRowElems, 0, LComm); DColMap = new Epetra_Map(-1, data->Dnc, DColElems, 0, LComm); } D->FillComplete(); //config->dm.print(5, "Done D fillcomplete"); G->FillComplete(); //config->dm.print(5, "Done G fillcomplete"); C->FillComplete(*SRowMap, *DRowMap); //TODO:Won't work if permutation is // unsymmetric SRowMap //config->dm.print(5, "Done C fillcomplete"); R->FillComplete(*DColMap, *SRowMap); //config->dm.print(5, "Done R fillcomplete"); int Sdiag = (int) data->Snr * Sdiagfactor; Sdiag = MIN(Sdiag, data->Snr-1); Sdiag = MAX(Sdiag, 0); // Add the diagonals to Sg for (int i = 0; config->schurApproxMethod == 1 && i < nrows ; i++) { gid = rows[i]; if (gvals[gid] == 1) continue; // not a row in S if (data->Snr == 0) assert(0 == 1); rcnt = 0; //TODO Will be trouble if SNumGlobalCols != Snc //assert(SNumGlobalCols == Snc); //for (int j = MAX(i-Sdiag,0) ; j<MIN(SNumGlobalCols, i+Sdiag); j++) for (int j = MAX(i-Sdiag, 0) ; j < MIN(data->Snr, i+Sdiag); j++) { // find the adjacent columns from the row map of S //assert (j >= 0 && j < Snr); RightIndex[rcnt++] = data->SRowElems[j]; } err = Sg->InsertGlobalIndices(gid, rcnt, RightIndex); assert(err == 0); // Always insert the diagonals, if it is added twice that is fine. err = Sg->InsertGlobalIndices(gid, 1, &gid); assert(err == 0); } if (config->schurApproxMethod == 1) Sg->FillComplete(); delete DRowMap; delete SRowMap; delete DColMap; } #if 0 if (insertValues) { #ifdef TIMING_OUTPUT Teuchos::Time ttime("transpose time"); ttime.start(); #endif bool MakeDataContiguous = true; ssym->transposer = Teuchos::RCP<EpetraExt::RowMatrix_Transpose>(new EpetraExt::RowMatrix_Transpose(MakeDataContiguous)); ssym->DT = Teuchos::rcp( dynamic_cast<Epetra_CrsMatrix *>(&(*ssym->transposer)(*D)), false); #ifdef TIMING_OUTPUT ttime.stop(); cout << "Transpose Time" << ttime.totalElapsedTime() << endl; ttime.reset(); #endif } else { ssym->transposer->fwd(); //ssym->ReIdx_LP->fwd(); // TODO: Needed ? } #endif // A is no longer needed delete[] LeftIndex; delete[] LeftValues; delete[] RightIndex; delete[] RightValues; //cout << msg << "S rows=" << S.NumGlobalRows() << " S cols=" << //S.NumGlobalCols() << "#cols in column map="<< //S.ColMap().NumMyElements() << endl; //cout << msg << "C rows=" << Cptr->NumGlobalRows() << " C cols=" << //Cptr->NumGlobalCols() << "#cols in column map="<< //Cptr->ColMap().NumMyElements() << endl; //cout << msg << "D rows=" << D.NumGlobalRows() << " D cols=" << //D.NumGlobalCols() << "#cols in column map="<< //D.ColMap().NumMyElements() << endl; //cout << msg << "R rows=" << Rptr->NumGlobalRows() << " R cols=" << //Rptr->NumGlobalCols() << "#cols in column map="<< //Rptr->ColMap().NumMyElements() << endl; // ] return 0; }
TEUCHOS_UNIT_TEST(interlaced_op, test) { #ifdef HAVE_MPI Teuchos::RCP<const Epetra_Comm> comm = Teuchos::rcp(new Epetra_MpiComm(MPI_COMM_WORLD)); #else Teuchos::RCP<const Epetra_Comm> comm = Teuchos::rcp(new Epetra_SerialComm); #endif //int rank = comm->MyPID(); int numProc = comm->NumProc(); int num_KL = 1; int porder = 5; bool full_expansion = false; Teuchos::RCP<const Stokhos::CompletePolynomialBasis<int,double> > basis = buildBasis(num_KL,porder); Teuchos::RCP<Stokhos::Sparse3Tensor<int,double> > Cijk; Teuchos::RCP<Stokhos::ParallelData> sg_parallel_data; Teuchos::RCP<Stokhos::OrthogPolyExpansion<int,double> > expansion; { if(full_expansion) Cijk = basis->computeTripleProductTensor(); else Cijk = basis->computeLinearTripleProductTensor(); Teuchos::ParameterList parallelParams; parallelParams.set("Number of Spatial Processors", numProc); sg_parallel_data = Teuchos::rcp(new Stokhos::ParallelData(basis, Cijk, comm, parallelParams)); expansion = Teuchos::rcp(new Stokhos::AlgebraicOrthogPolyExpansion<int,double>(basis, Cijk)); } Teuchos::RCP<const EpetraExt::MultiComm> sg_comm = sg_parallel_data->getMultiComm(); // determinstic PDE graph Teuchos::RCP<Epetra_Map> determRowMap = Teuchos::rcp(new Epetra_Map(-1,10,0,*comm)); Teuchos::RCP<Epetra_CrsGraph> determGraph = Teuchos::rcp(new Epetra_CrsGraph(Copy,*determRowMap,1)); for(int row=0;row<determRowMap->NumMyElements();row++) { int gid = determRowMap->GID(row); determGraph->InsertGlobalIndices(gid,1,&gid); } for(int row=1;row<determRowMap->NumMyElements()-1;row++) { int gid = determRowMap->GID(row); int indices[2] = {gid-1,gid+1}; determGraph->InsertGlobalIndices(gid,2,indices); } determGraph->FillComplete(); Teuchos::RCP<Teuchos::ParameterList> params = Teuchos::rcp(new Teuchos::ParameterList); params->set("Scale Operator by Inverse Basis Norms", false); params->set("Include Mean", true); params->set("Only Use Linear Terms", false); Teuchos::RCP<Stokhos::EpetraSparse3Tensor> epetraCijk = Teuchos::rcp(new Stokhos::EpetraSparse3Tensor(basis,Cijk,sg_comm)); Teuchos::RCP<Stokhos::EpetraOperatorOrthogPoly> W_sg_blocks = Teuchos::rcp(new Stokhos::EpetraOperatorOrthogPoly(basis, epetraCijk->getStochasticRowMap(), determRowMap, determRowMap, sg_comm)); for(int i=0; i<W_sg_blocks->size(); i++) { Teuchos::RCP<Epetra_CrsMatrix> crsMat = Teuchos::rcp(new Epetra_CrsMatrix(Copy,*determGraph)); crsMat->PutScalar(1.0 + i); W_sg_blocks->setCoeffPtr(i,crsMat); // allocate a bunch of matrices } Teuchos::RCP<const Epetra_Map> sg_map = Teuchos::rcp(EpetraExt::BlockUtility::GenerateBlockMap( *determRowMap, *(epetraCijk->getStochasticRowMap()), *(epetraCijk->getMultiComm()))); // build an interlaced operator (object under test) and a benchmark // fully assembled operator /////////////////////////////////////////////////////////////////////// Stokhos::InterlacedOperator op(sg_comm,basis,epetraCijk,determGraph,params); op.PutScalar(0.0); op.setupOperator(W_sg_blocks); Stokhos::FullyAssembledOperator full_op(sg_comm,basis,epetraCijk,determGraph,sg_map,sg_map,params); full_op.PutScalar(0.0); full_op.setupOperator(W_sg_blocks); // here we test interlaced operator against the fully assembled operator /////////////////////////////////////////////////////////////////////// bool result = true; for(int i=0;i<100;i++) { // build vector for fully assembled operator (blockwise) Teuchos::RCP<Stokhos::EpetraVectorOrthogPoly> x_vec_blocks = Teuchos::rcp(new Stokhos::EpetraVectorOrthogPoly(basis,epetraCijk->getStochasticRowMap(),determRowMap,epetraCijk->getMultiComm())); Teuchos::RCP<Stokhos::EpetraVectorOrthogPoly> f_vec_blocks = Teuchos::rcp(new Stokhos::EpetraVectorOrthogPoly(basis,epetraCijk->getStochasticRowMap(),determRowMap,epetraCijk->getMultiComm())); Teuchos::RCP<Epetra_Vector> x_vec_blocked = x_vec_blocks->getBlockVector(); Teuchos::RCP<Epetra_Vector> f_vec_blocked = f_vec_blocks->getBlockVector(); x_vec_blocked->Random(); // build an initial vector f_vec_blocked->PutScalar(0.0); // build interlaced vectors Teuchos::RCP<Epetra_Vector> x_vec_inter = Teuchos::rcp(new Epetra_Vector(op.OperatorDomainMap())); Teuchos::RCP<Epetra_Vector> f_vec_inter = Teuchos::rcp(new Epetra_Vector(op.OperatorRangeMap())); Teuchos::RCP<Epetra_Vector> f_vec_blk_inter = Teuchos::rcp(new Epetra_Vector(op.OperatorRangeMap())); Stokhos::SGModelEvaluator_Interlaced::copyToInterlacedVector(*x_vec_blocks,*x_vec_inter); // copy random x to f_vec_inter->PutScalar(0.0); full_op.Apply(*x_vec_blocked,*f_vec_blocked); op.Apply(*x_vec_inter,*f_vec_inter); // copy blocked action to interlaced for comparison Stokhos::SGModelEvaluator_Interlaced::copyToInterlacedVector(*f_vec_blocks,*f_vec_blk_inter); // compute norm double error = 0.0; double true_norm = 0.0; f_vec_blk_inter->NormInf(&true_norm); f_vec_blk_inter->Update(-1.0,*f_vec_inter,1.0); f_vec_blk_inter->NormInf(&error); out << "rel error = " << error/true_norm << " ( " << true_norm << " ), "; result &= (error/true_norm < 1e-14); } out << std::endl; TEST_ASSERT(result); }
// Can't be a constructor because MPI will not be initialized void setup() { Epetra_Object::SetTracebackMode(2); // Test tolerance tol = 1.0e-12; // Basis of dimension 3, order 5 const int d = 2; const int p = 3; Teuchos::Array< Teuchos::RCP<const Stokhos::OneDOrthogPolyBasis<int,double> > > bases(d); for (int i=0; i<d; i++) { bases[i] = Teuchos::rcp(new Stokhos::LegendreBasis<int,double>(p)); } Teuchos::RCP<const Stokhos::CompletePolynomialBasis<int,double> > basis = Teuchos::rcp(new Stokhos::CompletePolynomialBasis<int,double>(bases)); // Triple product tensor Teuchos::RCP<Stokhos::Sparse3Tensor<int,double> > Cijk = basis->computeTripleProductTensor(basis->size()); // Create a communicator for Epetra objects Teuchos::RCP<const Epetra_Comm> globalComm; #ifdef HAVE_MPI globalComm = Teuchos::rcp(new Epetra_MpiComm(MPI_COMM_WORLD)); #else globalComm = Teuchos::rcp(new Epetra_SerialComm); #endif // Create stochastic parallel distribution int num_spatial_procs = -1; int num_procs = globalComm->NumProc(); if (num_procs > 1) num_spatial_procs = num_procs / 2; Teuchos::ParameterList parallelParams; parallelParams.set("Number of Spatial Processors", num_spatial_procs); Teuchos::RCP<Stokhos::ParallelData> sg_parallel_data = Teuchos::rcp(new Stokhos::ParallelData(basis, Cijk, globalComm, parallelParams)); Teuchos::RCP<const EpetraExt::MultiComm> sg_comm = sg_parallel_data->getMultiComm(); Teuchos::RCP<const Epetra_Comm> app_comm = sg_parallel_data->getSpatialComm(); Teuchos::RCP<const Stokhos::EpetraSparse3Tensor> epetraCijk = sg_parallel_data->getEpetraCijk(); // Deterministic domain map const int num_x = 5; Teuchos::RCP<Epetra_Map> x_map = Teuchos::rcp(new Epetra_Map(num_x, 0, *app_comm)); // Deterministic column map Teuchos::RCP<Epetra_Map> x_overlap_map = Teuchos::rcp(new Epetra_LocalMap(num_x, 0, *app_comm)); // Deterministic range map const int num_f = 3; Teuchos::RCP<Epetra_Map> f_map = Teuchos::rcp(new Epetra_Map(num_f, 0, *app_comm)); // Product domain & range maps Teuchos::RCP<const Epetra_BlockMap> stoch_row_map = epetraCijk->getStochasticRowMap(); sg_x_map = Teuchos::rcp(EpetraExt::BlockUtility::GenerateBlockMap( *x_map, *stoch_row_map, *sg_comm)); sg_f_map = Teuchos::rcp(EpetraExt::BlockUtility::GenerateBlockMap( *f_map, *stoch_row_map, *sg_comm)); // Deterministic matrix graph const int num_indices = num_x; Teuchos::RCP<Epetra_CrsGraph> graph = Teuchos::rcp(new Epetra_CrsGraph(Copy, *f_map, num_indices)); int indices[num_indices]; for (int j=0; j<num_indices; j++) indices[j] = x_overlap_map->GID(j); for (int i=0; i<f_map->NumMyElements(); i++) graph->InsertGlobalIndices(f_map->GID(i), num_indices, indices); graph->FillComplete(*x_map, *f_map); // Create matrix expansion Teuchos::RCP<Epetra_BlockMap> sg_overlap_map = Teuchos::rcp(new Epetra_LocalMap( basis->size(), 0, *(sg_parallel_data->getStochasticComm()))); Teuchos::RCP< Stokhos::EpetraOperatorOrthogPoly > mat_sg = Teuchos::rcp(new Stokhos::EpetraOperatorOrthogPoly( basis, sg_overlap_map, x_map, f_map, sg_f_map, sg_comm)); for (int block=0; block<basis->size(); block++) { Teuchos::RCP<Epetra_CrsMatrix> mat = Teuchos::rcp(new Epetra_CrsMatrix(Copy, *graph)); TEUCHOS_TEST_FOR_EXCEPTION(!mat->IndicesAreLocal(), std::logic_error, "Indices are not local!"); double values[num_indices]; for (int i=0; i<f_map->NumMyElements(); i++) { for (int j=0; j<num_indices; j++) { indices[j] = x_overlap_map->GID(j); values[j] = 0.1*(i+1)*(j+1)*(block+1); } mat->ReplaceMyValues(i, num_indices, values, indices); } mat->FillComplete(*x_map, *f_map); mat_sg->setCoeffPtr(block, mat); } // Matrix-free operator Teuchos::RCP<Teuchos::ParameterList> op_params = Teuchos::rcp(new Teuchos::ParameterList); mat_free_op = Teuchos::rcp(new Stokhos::MatrixFreeOperator( sg_comm, basis, epetraCijk, x_map, f_map, sg_x_map, sg_f_map, op_params)); mat_free_op->setupOperator(mat_sg); // Fully assembled operator assembled_op = Teuchos::rcp(new Stokhos::FullyAssembledOperator( sg_comm, basis, epetraCijk, graph, sg_x_map, sg_f_map, op_params)); assembled_op->setupOperator(mat_sg); }
Teuchos::RCP<Epetra_CrsGraph> create_epetra_graph(int numProcs, int localProc) { if (localProc == 0) { std::cout << " creating Epetra_CrsGraph with un-even distribution..." << std::endl; } //create an Epetra_CrsGraph with rows spread un-evenly over //processors. Epetra_MpiComm comm(MPI_COMM_WORLD); int local_num_rows = 800; int nnz_per_row = local_num_rows/4+1; int global_num_rows = numProcs*local_num_rows; int mid_proc = numProcs/2; bool num_procs_even = numProcs%2==0 ? true : false; int adjustment = local_num_rows/2; //adjust local_num_rows so that it's not equal on all procs. if (localProc < mid_proc) { local_num_rows -= adjustment; } else { local_num_rows += adjustment; } //if numProcs is not an even number, undo the local_num_rows adjustment //on one proc so that the total will still be correct. if (localProc == numProcs-1) { if (num_procs_even == false) { local_num_rows -= adjustment; } } //now we're ready to create a row-map. Epetra_Map rowmap(global_num_rows, local_num_rows, 0, comm); //create a graph Teuchos::RCP<Epetra_CrsGraph> graph = Teuchos::rcp(new Epetra_CrsGraph(Copy, rowmap, nnz_per_row)); std::vector<int> indices(nnz_per_row); std::vector<double> coefs(nnz_per_row); int err = 0; for(int i=0; i<local_num_rows; ++i) { int global_row = rowmap.GID(i); int first_col = global_row - nnz_per_row/2; if (first_col < 0) { first_col = 0; } else if (first_col > (global_num_rows - nnz_per_row)) { first_col = global_num_rows - nnz_per_row; } for(int j=0; j<nnz_per_row; ++j) { indices[j] = first_col + j; coefs[j] = 1.0; } err = graph->InsertGlobalIndices(global_row, nnz_per_row, &indices[0]); if (err < 0) { throw Isorropia::Exception("create_epetra_graph: error inserting indices in graph"); } } err = graph->FillComplete(); if (err != 0) { throw Isorropia::Exception("create_epetra_graph: error in graph.FillComplete()"); } return(graph); }