/* ----------------------------------------- purpose -- produce a map from each column to the front that contains it created -- 98may04, cca ----------------------------------------- */ IV * FrontMtx_colmapIV ( FrontMtx *frontmtx ) { int ii, J, ncolJ, neqns, nfront, nJ ; int *colindJ, *colmap ; IV *colmapIV ; /* ----------------------------------------- get the map from columns to owning fronts ----------------------------------------- */ neqns = FrontMtx_neqns(frontmtx) ; nfront = FrontMtx_nfront(frontmtx) ; colmapIV = IV_new() ; IV_init(colmapIV, neqns, NULL) ; colmap = IV_entries(colmapIV) ; IVfill(neqns, colmap, -1) ; for ( J = 0 ; J < nfront ; J++ ) { if ( (nJ = FrontMtx_frontSize(frontmtx, J)) > 0 ) { FrontMtx_columnIndices(frontmtx, J, &ncolJ, &colindJ) ; if ( ncolJ > 0 && colindJ != NULL ) { for ( ii = 0 ; ii < nJ ; ii++ ) { colmap[colindJ[ii]] = J ; } } } } return(colmapIV) ; }
/* -------------------------------------------------------------------- purpose -- produce a map from each row to the front that contains it created -- 98may04, cca -------------------------------------------------------------------- */ IV * FrontMtx_rowmapIV ( FrontMtx *frontmtx ) { int ii, J, nrowJ, neqns, nfront, nJ ; int *rowindJ, *rowmap ; IV *rowmapIV ; /* -------------------------------------- get the map from rows to owning fronts -------------------------------------- */ neqns = FrontMtx_neqns(frontmtx) ; nfront = FrontMtx_nfront(frontmtx) ; rowmapIV = IV_new() ; IV_init(rowmapIV, neqns, NULL) ; rowmap = IV_entries(rowmapIV) ; IVfill(neqns, rowmap, -1) ; for ( J = 0 ; J < nfront ; J++ ) { if ( (nJ = FrontMtx_frontSize(frontmtx, J)) > 0 ) { FrontMtx_rowIndices(frontmtx, J, &nrowJ, &rowindJ) ; if ( nrowJ > 0 && rowindJ != NULL ) { for ( ii = 0 ; ii < nJ ; ii++ ) { rowmap[rowindJ[ii]] = J ; } } } } return(rowmapIV) ; }
/* ----------------------------- input a chevron in the matrix created -- 98jan28, cca ----------------------------- */ static void inputChevron ( InpMtx *inpmtx, int chv, int chvsize, int chvind[], double chvent[] ) { int col, ii, jj, nent, offset, row ; int *ivec1, *ivec2 ; prepareToAddNewEntries(inpmtx, chvsize) ; nent = inpmtx->nent ; ivec1 = IV_entries(&inpmtx->ivec1IV) ; ivec2 = IV_entries(&inpmtx->ivec2IV) ; if ( INPMTX_IS_BY_ROWS(inpmtx) ) { for ( ii = 0, jj = nent ; ii < chvsize ; ii++, jj++ ) { if ( (offset = chvind[ii]) >= 0 ) { row = chv ; col = chv + offset ; } else { col = chv ; row = chv - offset ; } ivec1[jj] = row ; ivec2[jj] = col ; } } else if ( INPMTX_IS_BY_COLUMNS(inpmtx) ) { for ( ii = 0, jj = nent ; ii < chvsize ; ii++, jj++ ) { if ( (offset = chvind[ii]) >= 0 ) { row = chv ; col = chv + offset ; } else { col = chv ; row = chv - offset ; } ivec1[jj] = col ; ivec2[jj] = row ; } } else if ( INPMTX_IS_BY_CHEVRONS(inpmtx) ) { IVfill(chvsize, ivec1 + nent, chv) ; IVcopy(chvsize, ivec2 + nent, chvind) ; } IV_setSize(&inpmtx->ivec1IV, nent + chvsize) ; IV_setSize(&inpmtx->ivec2IV, nent + chvsize) ; if ( INPMTX_IS_REAL_ENTRIES(inpmtx) ) { double *dvec = DV_entries(&inpmtx->dvecDV) + nent ; DVcopy(chvsize, dvec, chvent) ; DV_setSize(&inpmtx->dvecDV, nent + chvsize) ; } else if ( INPMTX_IS_REAL_ENTRIES(inpmtx) ) { double *dvec = DV_entries(&inpmtx->dvecDV) + 2*nent ; ZVcopy(chvsize, dvec, chvent) ; DV_setSize(&inpmtx->dvecDV, 2*(nent + chvsize)) ; } inpmtx->nent += chvsize ; inpmtx->storageMode = INPMTX_RAW_DATA ; return ; }
/* ------------------------------------ input a complex column in the matrix created -- 98jan28, cca ------------------------------------ */ static void inputColumn ( InpMtx *inpmtx, int col, int colsize, int colind[], double colent[] ) { int ii, jj, nent, row ; int *ivec1, *ivec2 ; prepareToAddNewEntries(inpmtx, colsize) ; nent = inpmtx->nent ; ivec1 = IV_entries(&inpmtx->ivec1IV) ; ivec2 = IV_entries(&inpmtx->ivec2IV) ; if ( INPMTX_IS_BY_ROWS(inpmtx) ) { IVcopy(colsize, ivec1 + nent, colind) ; IVfill(colsize, ivec2 + nent, col) ; } else if ( INPMTX_IS_BY_COLUMNS(inpmtx) ) { IVfill(colsize, ivec1 + nent, col) ; IVcopy(colsize, ivec2 + nent, colind) ; } else if ( INPMTX_IS_BY_CHEVRONS(inpmtx) ) { for ( ii = 0, jj = nent ; ii < colsize ; ii++, jj++ ) { row = colind[jj] ; ivec1[jj] = (row <= col) ? row : col ; ivec2[jj] = col - row ; } } IV_setSize(&inpmtx->ivec1IV, nent + colsize) ; IV_setSize(&inpmtx->ivec2IV, nent + colsize) ; if ( INPMTX_IS_REAL_ENTRIES(inpmtx) ) { double *dvec = DV_entries(&inpmtx->dvecDV) + nent ; DVcopy(colsize, dvec, colent) ; DV_setSize(&inpmtx->dvecDV, nent + colsize) ; } else if ( INPMTX_IS_COMPLEX_ENTRIES(inpmtx) ) { double *dvec = DV_entries(&inpmtx->dvecDV) + 2*nent ; ZVcopy(colsize, dvec, colent) ; DV_setSize(&inpmtx->dvecDV, 2*(nent + colsize)) ; } inpmtx->nent = nent + colsize ; inpmtx->storageMode = INPMTX_RAW_DATA ; return ; }
/* --------------------------------------------------- purpose -- allocate a int array with size entries and fill with value ival return value -- a pointer to the start of the array created : 95sep22, cca --------------------------------------------------- */ int * IVinit ( int size, int ival ) { int *y = NULL ; if ( size > 0 ) { y = IVinit2(size) ; IVfill(size, y, ival) ; } return(y) ; }
/*--------------------------------------------------------------------*/ int main ( int argc, char *argv[] ) /* --------------------------------------------------------------- read BPG from file and get the Dulmage-Mendelsohn decomposition created -- 96mar08, cca --------------------------------------------------------------- */ { char *inBPGFileName ; double t1, t2 ; int ierr, msglvl, rc ; int *dmflags, *stats ; BPG *bpg ; FILE *msgFile ; if ( argc != 4 ) { fprintf(stdout, "\n\n usage : %s msglvl msgFile inFile " "\n msglvl -- message level" "\n msgFile -- message file" "\n inFile -- input file, must be *.bpgf or *.bpgb" "\n", argv[0]) ; return(0) ; } msglvl = atoi(argv[1]) ; if ( strcmp(argv[2], "stdout") == 0 ) { msgFile = stdout ; } else if ( (msgFile = fopen(argv[2], "a")) == NULL ) { fprintf(stderr, "\n fatal error in %s" "\n unable to open file %s\n", argv[0], argv[2]) ; return(-1) ; } inBPGFileName = argv[3] ; fprintf(msgFile, "\n %s " "\n msglvl -- %d" "\n msgFile -- %s" "\n inFile -- %s" "\n", argv[0], msglvl, argv[2], inBPGFileName) ; fflush(msgFile) ; /* ---------------------- read in the BPG object ---------------------- */ if ( strcmp(inBPGFileName, "none") == 0 ) { fprintf(msgFile, "\n no file to read from") ; exit(0) ; } bpg = BPG_new() ; MARKTIME(t1) ; rc = BPG_readFromFile(bpg, inBPGFileName) ; MARKTIME(t2) ; fprintf(msgFile, "\n CPU %9.5f : read in graph from file %s", t2 - t1, inBPGFileName) ; if ( rc != 1 ) { fprintf(msgFile, "\n return value %d from BPG_readFromFile(%p,%s)", rc, bpg, inBPGFileName) ; exit(-1) ; } fprintf(msgFile, "\n\n after reading BPG object from file %s", inBPGFileName) ; if ( msglvl > 2 ) { BPG_writeForHumanEye(bpg, msgFile) ; } else { BPG_writeStats(bpg, msgFile) ; } fflush(msgFile) ; /* -------------------------------------------- test out the max flow DMdecomposition method -------------------------------------------- */ dmflags = IVinit(bpg->nX + bpg->nY, -1) ; stats = IVinit(6, 0) ; MARKTIME(t1) ; BPG_DMviaMaxFlow(bpg, dmflags, stats, msglvl, msgFile) ; MARKTIME(t2) ; fprintf(msgFile, "\n\n CPU %9.5f : find DM via maxflow", t2 - t1) ; if ( msglvl > 0 ) { fprintf(msgFile, "\n\n BPG_DMviaMaxFlow" "\n |X_I| = %6d, |X_E| = %6d, |X_R| = %6d" "\n |Y_I| = %6d, |Y_E| = %6d, |Y_R| = %6d", stats[0], stats[1], stats[2], stats[3], stats[4], stats[5]) ; } if ( msglvl > 1 ) { fprintf(msgFile, "\n dmflags") ; IVfp80(msgFile, bpg->nX + bpg->nY, dmflags, 80, &ierr) ; fflush(msgFile) ; } /* ------------------------------------------ test out the matching DMcomposition method ------------------------------------------ */ IVfill(bpg->nX + bpg->nY, dmflags, -1) ; IVfill(6, stats, -1) ; MARKTIME(t1) ; BPG_DMdecomposition(bpg, dmflags, stats, msglvl, msgFile) ; MARKTIME(t2) ; fprintf(msgFile, "\n\n CPU %9.5f : find DM via matching", t2 - t1) ; if ( msglvl > 0 ) { fprintf(msgFile, "\n\n BPG_DMdecomposition" "\n |X_I| = %6d, |X_E| = %6d, |X_R| = %6d" "\n |Y_I| = %6d, |Y_E| = %6d, |Y_R| = %6d", stats[0], stats[1], stats[2], stats[3], stats[4], stats[5]) ; } if ( msglvl > 1 ) { fprintf(msgFile, "\n dmflags") ; IVfp80(msgFile, bpg->nX + bpg->nY, dmflags, 80, &ierr) ; fflush(msgFile) ; } /* ---------------- free the storage ---------------- */ IVfree(dmflags) ; IVfree(stats) ; BPG_free(bpg) ; fprintf(msgFile, "\n") ; fclose(msgFile) ; return(1) ; }
/* ------------------------------------------------------------- purpose --- to compute a matrix-vector multiply y[] = C * x[] where C is the identity, A or B (depending on *pprbtype). *pnrows -- # of rows in x[] *pncols -- # of columns in x[] *pprbtype -- problem type *pprbtype = 1 --> vibration problem, matrix is A *pprbtype = 2 --> buckling problem, matrix is B *pprbtype = 3 --> matrix is identity, y[] = x[] x[] -- vector to be multiplied NOTE: the x[] vector is global, not a portion y[] -- product vector NOTE: the y[] vector is global, not a portion created -- 98aug28, cca & jcp ------------------------------------------------------------- */ void JimMatMulMPI ( int *pnrows, int *pncols, double x[], double y[], int *pprbtype, void *data ) { BridgeMPI *bridge = (BridgeMPI *) data ; int ncols, nent, nrows ; #if MYDEBUG > 0 double t1, t2 ; count_JimMatMul++ ; MARKTIME(t1) ; if ( bridge->myid == 0 ) { fprintf(stdout, "\n (%d) JimMatMulMPI() start", count_JimMatMul) ; fflush(stdout) ; } #endif #if MYDEBUG > 1 fprintf(bridge->msgFile, "\n (%d) JimMatMulMPI() start", count_JimMatMul) ; fflush(bridge->msgFile) ; #endif nrows = *pnrows ; ncols = *pncols ; nent = nrows*ncols ; if ( *pprbtype == 3 ) { /* -------------------------- ... matrix is the identity -------------------------- */ DVcopy(nent, y, x) ; } else { BridgeMPI *bridge = (BridgeMPI *) data ; DenseMtx *mtx, *newmtx ; int irow, jcol, jj, kk, myid, neqns, nowned, tag = 0 ; int *vtxmap ; int stats[4] ; IV *mapIV ; /* --------------------------------------------- slide the owned rows of x[] down in the array --------------------------------------------- */ vtxmap = IV_entries(bridge->vtxmapIV) ; neqns = bridge->neqns ; myid = bridge->myid ; nowned = IV_size(bridge->myownedIV) ; for ( jcol = jj = kk = 0 ; jcol < ncols ; jcol++ ) { for ( irow = 0 ; irow < neqns ; irow++, jj++ ) { if ( vtxmap[irow] == myid ) { y[kk++] = x[jj] ; } } } if ( kk != nowned * ncols ) { fprintf(stderr, "\n proc %d : kk %d, nowned %d, ncols %d", myid, kk, nowned, ncols) ; exit(-1) ; } /* ---------------------------------------- call the method that assumes local input ---------------------------------------- */ if ( bridge->msglvl > 2 ) { fprintf(bridge->msgFile, "\n inside JimMatMulMPI, calling MatMulMpi" "\n prbtype %d, nrows %d, ncols %d, nowned %d", *pprbtype, *pnrows, *pncols, nowned) ; fflush(bridge->msgFile) ; } MatMulMPI(&nowned, pncols, y, y, pprbtype, data) ; /* ------------------------------------------------- gather all the entries of y[] onto processor zero ------------------------------------------------- */ mtx = DenseMtx_new() ; DenseMtx_init(mtx, SPOOLES_REAL, 0, 0, nowned, ncols, 1, nowned) ; DVcopy (nowned*ncols, DenseMtx_entries(mtx), y) ; IVcopy(nowned, mtx->rowind, IV_entries(bridge->myownedIV)) ; mapIV = IV_new() ; IV_init(mapIV, neqns, NULL) ; IV_fill(mapIV, 0) ; IVfill(4, stats, 0) ; if ( bridge->msglvl > 2 ) { fprintf(bridge->msgFile, "\n mtx: %d rows x %d columns", mtx->nrow, mtx->ncol) ; fflush(bridge->msgFile) ; } newmtx = DenseMtx_MPI_splitByRows(mtx, mapIV, stats, bridge->msglvl, bridge->msgFile, tag, bridge->comm) ; if ( bridge->msglvl > 2 ) { fprintf(bridge->msgFile, "\n newmtx: %d rows x %d columns", newmtx->nrow, newmtx->ncol) ; fflush(bridge->msgFile) ; } DenseMtx_free(mtx) ; mtx = newmtx ; IV_free(mapIV) ; if ( myid == 0 ) { if ( mtx->nrow != neqns || mtx->ncol != ncols ) { fprintf(bridge->msgFile, "\n\n WHOA: mtx->nrows %d, mtx->ncols %d" ", neqns %d, ncols %d", mtx->nrow, mtx->ncol, neqns, ncols) ; exit(-1) ; } DVcopy(neqns*ncols, y, DenseMtx_entries(mtx)) ; } DenseMtx_free(mtx) ; /* --------------------------------------------- broadcast the entries to the other processors --------------------------------------------- */ MPI_Bcast((void *) y, neqns*ncols, MPI_DOUBLE, 0, bridge->comm) ; if ( bridge->msglvl > 2 ) { fprintf(bridge->msgFile, "\n after the broadcast") ; fflush(bridge->msgFile) ; } } MPI_Barrier(bridge->comm) ; #if MYDEBUG > 0 MARKTIME(t2) ; time_JimMatMul += t2 - t1 ; if ( bridge->myid == 0 ) { fprintf(stdout, "\n (%d) JimMatMulMPI() end", count_JimMatMul) ; fprintf(stdout, ", %8.3f seconds, %8.3f total time", t2 - t1, time_JimMatMul) ; fflush(stdout) ; } #endif #if MYDEBUG > 1 fprintf(bridge->msgFile, "\n (%d) JimMatMulMPI() end", count_JimMatMul) ; fprintf(bridge->msgFile, ", %8.3f seconds, %8.3f total time", t2 - t1, time_JimMatMul) ; fflush(bridge->msgFile) ; #endif return ; }
NM_Status SpoolesSolver :: solve(SparseMtrx *A, FloatArray *b, FloatArray *x) { int errorValue, mtxType, symmetryflag; int seed = 30145, pivotingflag = 0; int *oldToNew, *newToOld; double droptol = 0.0, tau = 1.e300; double cpus [ 10 ]; int stats [ 20 ]; ChvManager *chvmanager; Chv *rootchv; InpMtx *mtxA; DenseMtx *mtxY, *mtxX; // first check whether Lhs is defined if ( !A ) { _error("solveYourselfAt: unknown Lhs"); } // and whether Rhs if ( !b ) { _error("solveYourselfAt: unknown Rhs"); } // and whether previous Solution exist if ( !x ) { _error("solveYourselfAt: unknown solution array"); } if ( x->giveSize() != b->giveSize() ) { _error("solveYourselfAt: size mismatch"); } Timer timer; timer.startTimer(); if ( A->giveType() != SMT_SpoolesMtrx ) { _error("solveYourselfAt: SpoolesSparseMtrx Expected"); } mtxA = ( ( SpoolesSparseMtrx * ) A )->giveInpMtrx(); mtxType = ( ( SpoolesSparseMtrx * ) A )->giveValueType(); symmetryflag = ( ( SpoolesSparseMtrx * ) A )->giveSymmetryFlag(); int i; int neqns = A->giveNumberOfRows(); int nrhs = 1; /* convert right-hand side to DenseMtx */ mtxY = DenseMtx_new(); DenseMtx_init(mtxY, mtxType, 0, 0, neqns, nrhs, 1, neqns); DenseMtx_zero(mtxY); for ( i = 0; i < neqns; i++ ) { DenseMtx_setRealEntry( mtxY, i, 0, b->at(i + 1) ); } if ( ( Lhs != A ) || ( this->lhsVersion != A->giveVersion() ) ) { // // lhs has been changed -> new factorization // Lhs = A; this->lhsVersion = A->giveVersion(); if ( frontmtx ) { FrontMtx_free(frontmtx); } if ( newToOldIV ) { IV_free(newToOldIV); } if ( oldToNewIV ) { IV_free(oldToNewIV); } if ( frontETree ) { ETree_free(frontETree); } if ( symbfacIVL ) { IVL_free(symbfacIVL); } if ( mtxmanager ) { SubMtxManager_free(mtxmanager); } if ( graph ) { Graph_free(graph); } /* * ------------------------------------------------- * STEP 3 : find a low-fill ordering * (1) create the Graph object * (2) order the graph using multiple minimum degree * ------------------------------------------------- */ int nedges; graph = Graph_new(); adjIVL = InpMtx_fullAdjacency(mtxA); nedges = IVL_tsize(adjIVL); Graph_init2(graph, 0, neqns, 0, nedges, neqns, nedges, adjIVL, NULL, NULL); if ( msglvl > 2 ) { fprintf(msgFile, "\n\n graph of the input matrix"); Graph_writeForHumanEye(graph, msgFile); fflush(msgFile); } frontETree = orderViaMMD(graph, seed, msglvl, msgFile); if ( msglvl > 0 ) { fprintf(msgFile, "\n\n front tree from ordering"); ETree_writeForHumanEye(frontETree, msgFile); fflush(msgFile); } /* * ---------------------------------------------------- * STEP 4: get the permutation, permute the front tree, * permute the matrix and right hand side, and * get the symbolic factorization * ---------------------------------------------------- */ oldToNewIV = ETree_oldToNewVtxPerm(frontETree); oldToNew = IV_entries(oldToNewIV); newToOldIV = ETree_newToOldVtxPerm(frontETree); newToOld = IV_entries(newToOldIV); ETree_permuteVertices(frontETree, oldToNewIV); InpMtx_permute(mtxA, oldToNew, oldToNew); if ( symmetryflag == SPOOLES_SYMMETRIC || symmetryflag == SPOOLES_HERMITIAN ) { InpMtx_mapToUpperTriangle(mtxA); } InpMtx_changeCoordType(mtxA, INPMTX_BY_CHEVRONS); InpMtx_changeStorageMode(mtxA, INPMTX_BY_VECTORS); symbfacIVL = SymbFac_initFromInpMtx(frontETree, mtxA); if ( msglvl > 2 ) { fprintf(msgFile, "\n\n old-to-new permutation vector"); IV_writeForHumanEye(oldToNewIV, msgFile); fprintf(msgFile, "\n\n new-to-old permutation vector"); IV_writeForHumanEye(newToOldIV, msgFile); fprintf(msgFile, "\n\n front tree after permutation"); ETree_writeForHumanEye(frontETree, msgFile); fprintf(msgFile, "\n\n input matrix after permutation"); InpMtx_writeForHumanEye(mtxA, msgFile); fprintf(msgFile, "\n\n symbolic factorization"); IVL_writeForHumanEye(symbfacIVL, msgFile); fflush(msgFile); } Tree_writeToFile(frontETree->tree, (char*)"haggar.treef"); /*--------------------------------------------------------------------*/ /* * ------------------------------------------ * STEP 5: initialize the front matrix object * ------------------------------------------ */ frontmtx = FrontMtx_new(); mtxmanager = SubMtxManager_new(); SubMtxManager_init(mtxmanager, NO_LOCK, 0); FrontMtx_init(frontmtx, frontETree, symbfacIVL, mtxType, symmetryflag, FRONTMTX_DENSE_FRONTS, pivotingflag, NO_LOCK, 0, NULL, mtxmanager, msglvl, msgFile); /*--------------------------------------------------------------------*/ /* * ----------------------------------------- * STEP 6: compute the numeric factorization * ----------------------------------------- */ chvmanager = ChvManager_new(); ChvManager_init(chvmanager, NO_LOCK, 1); DVfill(10, cpus, 0.0); IVfill(20, stats, 0); rootchv = FrontMtx_factorInpMtx(frontmtx, mtxA, tau, droptol, chvmanager, & errorValue, cpus, stats, msglvl, msgFile); ChvManager_free(chvmanager); if ( msglvl > 0 ) { fprintf(msgFile, "\n\n factor matrix"); FrontMtx_writeForHumanEye(frontmtx, msgFile); fflush(msgFile); } if ( rootchv != NULL ) { fprintf(msgFile, "\n\n matrix found to be singular\n"); exit(-1); } if ( errorValue >= 0 ) { fprintf(msgFile, "\n\n error encountered at front %d", errorValue); exit(-1); } /*--------------------------------------------------------------------*/ /* * -------------------------------------- * STEP 7: post-process the factorization * -------------------------------------- */ FrontMtx_postProcess(frontmtx, msglvl, msgFile); if ( msglvl > 2 ) { fprintf(msgFile, "\n\n factor matrix after post-processing"); FrontMtx_writeForHumanEye(frontmtx, msgFile); fflush(msgFile); } /*--------------------------------------------------------------------*/ } /* * ---------------------------------------------------- * STEP 4: permute the right hand side * ---------------------------------------------------- */ DenseMtx_permuteRows(mtxY, oldToNewIV); if ( msglvl > 2 ) { fprintf(msgFile, "\n\n right hand side matrix after permutation"); DenseMtx_writeForHumanEye(mtxY, msgFile); } /* * ------------------------------- * STEP 8: solve the linear system * ------------------------------- */ mtxX = DenseMtx_new(); DenseMtx_init(mtxX, mtxType, 0, 0, neqns, nrhs, 1, neqns); DenseMtx_zero(mtxX); FrontMtx_solve(frontmtx, mtxX, mtxY, mtxmanager, cpus, msglvl, msgFile); if ( msglvl > 2 ) { fprintf(msgFile, "\n\n solution matrix in new ordering"); DenseMtx_writeForHumanEye(mtxX, msgFile); fflush(msgFile); } /*--------------------------------------------------------------------*/ /* * ------------------------------------------------------- * STEP 9: permute the solution into the original ordering * ------------------------------------------------------- */ DenseMtx_permuteRows(mtxX, newToOldIV); if ( msglvl > 0 ) { fprintf(msgFile, "\n\n solution matrix in original ordering"); DenseMtx_writeForHumanEye(mtxX, msgFile); fflush(msgFile); } // DenseMtx_writeForMatlab(mtxX, "x", msgFile) ; /*--------------------------------------------------------------------*/ /* fetch data to oofem vectors */ double *xptr = x->givePointer(); for ( i = 0; i < neqns; i++ ) { DenseMtx_realEntry(mtxX, i, 0, xptr + i); // printf ("x(%d) = %e\n", i+1, *(xptr+i)); } // DenseMtx_copyRowIntoVector(mtxX, 0, x->givePointer()); timer.stopTimer(); OOFEM_LOG_DEBUG( "SpoolesSolver info: user time consumed by solution: %.2fs\n", timer.getUtime() ); /* * ----------- * free memory * ----------- */ DenseMtx_free(mtxX); DenseMtx_free(mtxY); /*--------------------------------------------------------------------*/ return ( 1 ); }
/*--------------------------------------------------------------------*/ int main ( int argc, char *argv[] ) { /* -------------------------------------------------- all-in-one program to solve A X = B using a multithreaded factorization and solve We use a patch-and-go strategy for the factorization without pivoting (1) read in matrix entries and form DInpMtx object (2) form Graph object (3) order matrix and form front tree (4) get the permutation, permute the matrix and front tree and get the symbolic factorization (5) compute the numeric factorization (6) read in right hand side entries (7) compute the solution created -- 98jun04, cca -------------------------------------------------- */ /*--------------------------------------------------------------------*/ char *matrixFileName, *rhsFileName ; DenseMtx *mtxB, *mtxX ; Chv *rootchv ; ChvManager *chvmanager ; double fudge, imag, real, tau = 100., toosmall, value ; double cpus[10] ; DV *cumopsDV ; ETree *frontETree ; FrontMtx *frontmtx ; FILE *inputFile, *msgFile ; Graph *graph ; InpMtx *mtxA ; int error, ient, irow, jcol, jrhs, jrow, lookahead, msglvl, ncol, nedges, nent, neqns, nfront, nrhs, nrow, nthread, patchAndGoFlag, seed, storeids, storevalues, symmetryflag, type ; int *newToOld, *oldToNew ; int stats[20] ; IV *newToOldIV, *oldToNewIV, *ownersIV ; IVL *adjIVL, *symbfacIVL ; SolveMap *solvemap ; SubMtxManager *mtxmanager ; /*--------------------------------------------------------------------*/ /* -------------------- get input parameters -------------------- */ if ( argc != 14 ) { fprintf(stdout, "\n" "\n usage: %s msglvl msgFile type symmetryflag patchAndGoFlag" "\n fudge toosmall storeids storevalues" "\n matrixFileName rhsFileName seed" "\n msglvl -- message level" "\n msgFile -- message file" "\n type -- type of entries" "\n 1 (SPOOLES_REAL) -- real entries" "\n 2 (SPOOLES_COMPLEX) -- complex entries" "\n symmetryflag -- type of matrix" "\n 0 (SPOOLES_SYMMETRIC) -- symmetric entries" "\n 1 (SPOOLES_HERMITIAN) -- Hermitian entries" "\n 2 (SPOOLES_NONSYMMETRIC) -- nonsymmetric entries" "\n patchAndGoFlag -- flag for the patch-and-go strategy" "\n 0 -- none, stop factorization" "\n 1 -- optimization strategy" "\n 2 -- structural analysis strategy" "\n fudge -- perturbation parameter" "\n toosmall -- upper bound on a small pivot" "\n storeids -- flag to store ids of small pivots" "\n storevalues -- flag to store perturbations" "\n matrixFileName -- matrix file name, format" "\n nrow ncol nent" "\n irow jcol entry" "\n ..." "\n note: indices are zero based" "\n rhsFileName -- right hand side file name, format" "\n nrow nrhs " "\n ..." "\n jrow entry(jrow,0) ... entry(jrow,nrhs-1)" "\n ..." "\n seed -- random number seed, used for ordering" "\n nthread -- number of threads" "\n", argv[0]) ; return(0) ; } msglvl = atoi(argv[1]) ; if ( strcmp(argv[2], "stdout") == 0 ) { msgFile = stdout ; } else if ( (msgFile = fopen(argv[2], "a")) == NULL ) { fprintf(stderr, "\n fatal error in %s" "\n unable to open file %s\n", argv[0], argv[2]) ; return(-1) ; } type = atoi(argv[3]) ; symmetryflag = atoi(argv[4]) ; patchAndGoFlag = atoi(argv[5]) ; fudge = atof(argv[6]) ; toosmall = atof(argv[7]) ; storeids = atoi(argv[8]) ; storevalues = atoi(argv[9]) ; matrixFileName = argv[10] ; rhsFileName = argv[11] ; seed = atoi(argv[12]) ; nthread = atoi(argv[13]) ; /*--------------------------------------------------------------------*/ /* -------------------------------------------- STEP 1: read the entries from the input file and create the InpMtx object -------------------------------------------- */ if ( (inputFile = fopen(matrixFileName, "r")) == NULL ) { fprintf(stderr, "\n unable to open file %s", matrixFileName) ; spoolesFatal(); } fscanf(inputFile, "%d %d %d", &nrow, &ncol, &nent) ; neqns = nrow ; mtxA = InpMtx_new() ; InpMtx_init(mtxA, INPMTX_BY_ROWS, type, nent, 0) ; if ( type == SPOOLES_REAL ) { for ( ient = 0 ; ient < nent ; ient++ ) { fscanf(inputFile, "%d %d %le", &irow, &jcol, &value) ; InpMtx_inputRealEntry(mtxA, irow, jcol, value) ; } } else { for ( ient = 0 ; ient < nent ; ient++ ) { fscanf(inputFile, "%d %d %le %le", &irow, &jcol, &real, &imag) ; InpMtx_inputComplexEntry(mtxA, irow, jcol, real, imag) ; } } fclose(inputFile) ; InpMtx_changeStorageMode(mtxA, INPMTX_BY_VECTORS) ; if ( msglvl > 1 ) { fprintf(msgFile, "\n\n input matrix") ; InpMtx_writeForHumanEye(mtxA, msgFile) ; fflush(msgFile) ; } /*--------------------------------------------------------------------*/ /* ------------------------------------------------- STEP 2 : find a low-fill ordering (1) create the Graph object (2) order the graph using multiple minimum degree ------------------------------------------------- */ graph = Graph_new() ; adjIVL = InpMtx_fullAdjacency(mtxA) ; nedges = IVL_tsize(adjIVL) ; Graph_init2(graph, 0, neqns, 0, nedges, neqns, nedges, adjIVL, NULL, NULL) ; if ( msglvl > 1 ) { fprintf(msgFile, "\n\n graph of the input matrix") ; Graph_writeForHumanEye(graph, msgFile) ; fflush(msgFile) ; } frontETree = orderViaMMD(graph, seed, msglvl, msgFile) ; if ( msglvl > 1 ) { fprintf(msgFile, "\n\n front tree from ordering") ; ETree_writeForHumanEye(frontETree, msgFile) ; fflush(msgFile) ; } /*--------------------------------------------------------------------*/ /* ----------------------------------------------------- STEP 3: get the permutation, permute the matrix and front tree and get the symbolic factorization ----------------------------------------------------- */ oldToNewIV = ETree_oldToNewVtxPerm(frontETree) ; oldToNew = IV_entries(oldToNewIV) ; newToOldIV = ETree_newToOldVtxPerm(frontETree) ; newToOld = IV_entries(newToOldIV) ; ETree_permuteVertices(frontETree, oldToNewIV) ; InpMtx_permute(mtxA, oldToNew, oldToNew) ; InpMtx_mapToUpperTriangle(mtxA) ; InpMtx_changeCoordType(mtxA, INPMTX_BY_CHEVRONS) ; InpMtx_changeStorageMode(mtxA, INPMTX_BY_VECTORS) ; symbfacIVL = SymbFac_initFromInpMtx(frontETree, mtxA) ; if ( msglvl > 1 ) { fprintf(msgFile, "\n\n old-to-new permutation vector") ; IV_writeForHumanEye(oldToNewIV, msgFile) ; fprintf(msgFile, "\n\n new-to-old permutation vector") ; IV_writeForHumanEye(newToOldIV, msgFile) ; fprintf(msgFile, "\n\n front tree after permutation") ; ETree_writeForHumanEye(frontETree, msgFile) ; fprintf(msgFile, "\n\n input matrix after permutation") ; InpMtx_writeForHumanEye(mtxA, msgFile) ; fprintf(msgFile, "\n\n symbolic factorization") ; IVL_writeForHumanEye(symbfacIVL, msgFile) ; fflush(msgFile) ; } /*--------------------------------------------------------------------*/ /* ------------------------------------------ STEP 4: initialize the front matrix object and the PatchAndGoInfo object to handle small pivots ------------------------------------------ */ frontmtx = FrontMtx_new() ; mtxmanager = SubMtxManager_new() ; SubMtxManager_init(mtxmanager, LOCK_IN_PROCESS, 0) ; FrontMtx_init(frontmtx, frontETree, symbfacIVL, type, symmetryflag, FRONTMTX_DENSE_FRONTS, SPOOLES_NO_PIVOTING, LOCK_IN_PROCESS, 0, NULL, mtxmanager, msglvl, msgFile) ; if ( patchAndGoFlag == 1 ) { frontmtx->patchinfo = PatchAndGoInfo_new() ; PatchAndGoInfo_init(frontmtx->patchinfo, 1, toosmall, fudge, storeids, storevalues) ; } else if ( patchAndGoFlag == 2 ) { frontmtx->patchinfo = PatchAndGoInfo_new() ; PatchAndGoInfo_init(frontmtx->patchinfo, 2, toosmall, fudge, storeids, storevalues) ; } /*--------------------------------------------------------------------*/ /* ------------------------------------------ STEP 5: setup the domain decomposition map ------------------------------------------ */ if ( nthread > (nfront = FrontMtx_nfront(frontmtx)) ) { nthread = nfront ; } cumopsDV = DV_new() ; DV_init(cumopsDV, nthread, NULL) ; ownersIV = ETree_ddMap(frontETree, type, symmetryflag, cumopsDV, 1./(2.*nthread)) ; DV_free(cumopsDV) ; /*--------------------------------------------------------------------*/ /* ----------------------------------------------------- STEP 6: compute the numeric factorization in parallel ----------------------------------------------------- */ chvmanager = ChvManager_new() ; ChvManager_init(chvmanager, LOCK_IN_PROCESS, 1) ; DVfill(10, cpus, 0.0) ; IVfill(20, stats, 0) ; lookahead = 0 ; rootchv = FrontMtx_MT_factorInpMtx(frontmtx, mtxA, tau, 0.0, chvmanager, ownersIV, lookahead, &error, cpus, stats, msglvl, msgFile) ; if ( patchAndGoFlag == 1 ) { if ( frontmtx->patchinfo->fudgeIV != NULL ) { fprintf(msgFile, "\n small pivots found at these locations") ; IV_writeForHumanEye(frontmtx->patchinfo->fudgeIV, msgFile) ; } PatchAndGoInfo_free(frontmtx->patchinfo) ; } else if ( patchAndGoFlag == 2 ) { if ( frontmtx->patchinfo->fudgeIV != NULL ) { fprintf(msgFile, "\n small pivots found at these locations") ; IV_writeForHumanEye(frontmtx->patchinfo->fudgeIV, msgFile) ; } if ( frontmtx->patchinfo->fudgeDV != NULL ) { fprintf(msgFile, "\n perturbations") ; DV_writeForHumanEye(frontmtx->patchinfo->fudgeDV, msgFile) ; } PatchAndGoInfo_free(frontmtx->patchinfo) ; } ChvManager_free(chvmanager) ; if ( msglvl > 1 ) { fprintf(msgFile, "\n\n factor matrix") ; FrontMtx_writeForHumanEye(frontmtx, msgFile) ; fflush(msgFile) ; } if ( rootchv != NULL ) { fprintf(msgFile, "\n\n matrix found to be singular\n") ; spoolesFatal(); } if ( error >= 0 ) { fprintf(msgFile, "\n\n fatal error at front %d\n", error) ; spoolesFatal(); } /* -------------------------------------- STEP 7: post-process the factorization -------------------------------------- */ FrontMtx_postProcess(frontmtx, msglvl, msgFile) ; if ( msglvl > 1 ) { fprintf(msgFile, "\n\n factor matrix after post-processing") ; FrontMtx_writeForHumanEye(frontmtx, msgFile) ; fflush(msgFile) ; } /*--------------------------------------------------------------------*/ /* ----------------------------------------- STEP 8: read the right hand side matrix B ----------------------------------------- */ if ( (inputFile = fopen(rhsFileName, "r")) == NULL ) { fprintf(stderr, "\n unable to open file %s", rhsFileName) ; spoolesFatal(); } fscanf(inputFile, "%d %d", &nrow, &nrhs) ; mtxB = DenseMtx_new() ; DenseMtx_init(mtxB, type, 0, 0, neqns, nrhs, 1, neqns) ; DenseMtx_zero(mtxB) ; if ( type == SPOOLES_REAL ) { for ( irow = 0 ; irow < nrow ; irow++ ) { fscanf(inputFile, "%d", &jrow) ; for ( jrhs = 0 ; jrhs < nrhs ; jrhs++ ) { fscanf(inputFile, "%le", &value) ; DenseMtx_setRealEntry(mtxB, jrow, jrhs, value) ; } } } else { for ( irow = 0 ; irow < nrow ; irow++ ) { fscanf(inputFile, "%d", &jrow) ; for ( jrhs = 0 ; jrhs < nrhs ; jrhs++ ) { fscanf(inputFile, "%le %le", &real, &imag) ; DenseMtx_setComplexEntry(mtxB, jrow, jrhs, real, imag) ; } } } fclose(inputFile) ; if ( msglvl > 1 ) { fprintf(msgFile, "\n\n rhs matrix in original ordering") ; DenseMtx_writeForHumanEye(mtxB, msgFile) ; fflush(msgFile) ; } /*--------------------------------------------------------------------*/ /* -------------------------------------------------------------- STEP 9: permute the right hand side into the original ordering -------------------------------------------------------------- */ DenseMtx_permuteRows(mtxB, oldToNewIV) ; if ( msglvl > 1 ) { fprintf(msgFile, "\n\n right hand side matrix in new ordering") ; DenseMtx_writeForHumanEye(mtxB, msgFile) ; fflush(msgFile) ; } /*--------------------------------------------------------------------*/ /* -------------------------------------------------------- STEP 10: get the solve map object for the parallel solve -------------------------------------------------------- */ solvemap = SolveMap_new() ; SolveMap_ddMap(solvemap, type, FrontMtx_upperBlockIVL(frontmtx), FrontMtx_lowerBlockIVL(frontmtx), nthread, ownersIV, FrontMtx_frontTree(frontmtx), seed, msglvl, msgFile) ; /*--------------------------------------------------------------------*/ /* -------------------------------------------- STEP 11: solve the linear system in parallel -------------------------------------------- */ mtxX = DenseMtx_new() ; DenseMtx_init(mtxX, type, 0, 0, neqns, nrhs, 1, neqns) ; DenseMtx_zero(mtxX) ; FrontMtx_MT_solve(frontmtx, mtxX, mtxB, mtxmanager, solvemap, cpus, msglvl, msgFile) ; if ( msglvl > 1 ) { fprintf(msgFile, "\n\n solution matrix in new ordering") ; DenseMtx_writeForHumanEye(mtxX, msgFile) ; fflush(msgFile) ; } /*--------------------------------------------------------------------*/ /* -------------------------------------------------------- STEP 12: permute the solution into the original ordering -------------------------------------------------------- */ DenseMtx_permuteRows(mtxX, newToOldIV) ; if ( msglvl > 0 ) { fprintf(msgFile, "\n\n solution matrix in original ordering") ; DenseMtx_writeForHumanEye(mtxX, msgFile) ; fflush(msgFile) ; } /*--------------------------------------------------------------------*/ /* ----------- free memory ----------- */ FrontMtx_free(frontmtx) ; DenseMtx_free(mtxX) ; DenseMtx_free(mtxB) ; IV_free(newToOldIV) ; IV_free(oldToNewIV) ; InpMtx_free(mtxA) ; ETree_free(frontETree) ; IVL_free(symbfacIVL) ; SubMtxManager_free(mtxmanager) ; Graph_free(graph) ; SolveMap_free(solvemap) ; IV_free(ownersIV) ; /*--------------------------------------------------------------------*/ return(1) ; }
/* -------------------------------------------------- purpose -- to solve a linear system (A - sigma*B) sol[] = rhs[] data -- pointer to bridge data object *pnrows -- # of rows in x[] and y[] *pncols -- # of columns in x[] and y[] rhs[] -- vector that holds right hand sides NOTE: the rhs[] vector is global, not a portion sol[] -- vector to hold solutions NOTE: the sol[] vector is global, not a portion note: rhs[] and sol[] can be the same array. on return, *perror holds an error code. created -- 98aug28, cca & jcp -------------------------------------------------- */ void JimSolveMPI ( int *pnrows, int *pncols, double rhs[], double sol[], void *data, int *perror ) { BridgeMPI *bridge = (BridgeMPI *) data ; DenseMtx *mtx, *newmtx ; int irow, jj, jcol, kk, myid, ncols = *pncols, neqns, nowned, tag = 0 ; int *vtxmap ; int stats[4] ; IV *mapIV ; #if MYDEBUG > 0 double t1, t2 ; count_JimSolve++ ; MARKTIME(t1) ; if ( bridge->myid == 0 ) { fprintf(stdout, "\n (%d) JimSolve() start", count_JimSolve) ; fflush(stdout) ; } #endif #if MYDEBUG > 1 fprintf(bridge->msgFile, "\n (%d) JimSolve() start", count_JimSolve) ; fflush(bridge->msgFile) ; #endif MPI_Barrier(bridge->comm) ; /* --------------------------------------------- slide the owned rows of rhs down in the array --------------------------------------------- */ vtxmap = IV_entries(bridge->vtxmapIV) ; neqns = bridge->neqns ; myid = bridge->myid ; nowned = IV_size(bridge->myownedIV) ; for ( jcol = jj = kk = 0 ; jcol < ncols ; jcol++ ) { for ( irow = 0 ; irow < neqns ; irow++, jj++ ) { if ( vtxmap[irow] == myid ) { sol[kk++] = rhs[jj] ; } } } if ( kk != nowned * ncols ) { fprintf(stderr, "\n proc %d : kk %d, nowned %d, ncols %d", myid, kk, nowned, ncols) ; exit(-1) ; } /* ---------------------------------------- call the method that assumes local input ---------------------------------------- */ if ( bridge->msglvl > 1 ) { fprintf(bridge->msgFile, "\n calling SolveMPI()") ; fflush(bridge->msgFile) ; } SolveMPI(&nowned, pncols, sol, sol, data, perror) ; if ( bridge->msglvl > 1 ) { fprintf(bridge->msgFile, "\n return from SolveMPI()") ; fflush(bridge->msgFile) ; } /* ------------------------------------------ gather all the entries onto processor zero ------------------------------------------ */ mtx = DenseMtx_new() ; DenseMtx_init(mtx, SPOOLES_REAL, 0, 0, nowned, ncols, 1, nowned) ; DVcopy (nowned*ncols, DenseMtx_entries(mtx), sol) ; IVcopy(nowned, mtx->rowind, IV_entries(bridge->myownedIV)) ; mapIV = IV_new() ; IV_init(mapIV, neqns, NULL) ; IV_fill(mapIV, 0) ; IVfill(4, stats, 0) ; if ( bridge->msglvl > 1 ) { fprintf(bridge->msgFile, "\n calling DenseMtx_split()()") ; fflush(bridge->msgFile) ; } newmtx = DenseMtx_MPI_splitByRows(mtx, mapIV, stats, bridge->msglvl, bridge->msgFile, tag, bridge->comm) ; if ( bridge->msglvl > 1 ) { fprintf(bridge->msgFile, "\n return from DenseMtx_split()()") ; fflush(bridge->msgFile) ; } DenseMtx_free(mtx) ; mtx = newmtx ; IV_free(mapIV) ; if ( myid == 0 ) { DVcopy(neqns*ncols, sol, DenseMtx_entries(mtx)) ; } DenseMtx_free(mtx) ; /* --------------------------------------------- broadcast the entries to the other processors --------------------------------------------- */ if ( bridge->msglvl > 1 ) { fprintf(bridge->msgFile, "\n calling MPI_Bcast()()") ; fflush(bridge->msgFile) ; } MPI_Bcast((void *) sol, neqns*ncols, MPI_DOUBLE, 0, bridge->comm) ; if ( bridge->msglvl > 1 ) { fprintf(bridge->msgFile, "\n return from MPI_Bcast()()") ; fflush(bridge->msgFile) ; } MPI_Barrier(bridge->comm) ; /* ------------------------------------------------------------------ set the error. (this is simple since when the spooles codes detect a fatal error, they print out a message to stderr and exit.) ------------------------------------------------------------------ */ *perror = 0 ; #if MYDEBUG > 0 MARKTIME(t2) ; time_JimSolve += t2 - t1 ; if ( bridge->myid == 0 ) { fprintf(stdout, "\n (%d) JimSolve() end", count_JimSolve) ; fprintf(stdout, ", %8.3f seconds, %8.3f total time", t2 - t1, time_JimSolve) ; fflush(stdout) ; } #endif #if MYDEBUG > 1 fprintf(bridge->msgFile, "\n (%d) JimSolve() end", count_JimSolve) ; fprintf(bridge->msgFile, ", %8.3f seconds, %8.3f total time", t2 - t1, time_JimSolve) ; fflush(bridge->msgFile) ; #endif return ; }
/* -------------------------------------------------------------- purpose -- create and return an A2 object that contains rows of A and rows from update matrices of the children. the matrix may not be in staircase form created -- 98may25, cca -------------------------------------------------------------- */ A2 * FrontMtx_QR_assembleFront ( FrontMtx *frontmtx, int J, InpMtx *mtxA, IVL *rowsIVL, int firstnz[], int colmap[], Chv *firstchild, DV *workDV, int msglvl, FILE *msgFile ) { A2 *frontJ ; Chv *chvI ; double *rowI, *rowJ, *rowentA ; int ii, irow, irowA, irowI, jcol, jj, jrow, ncolI, ncolJ, nentA, nrowI, nrowJ, nrowFromA ; int *colindA, *colindI, *colindJ, *map, *rowids, *rowsFromA ; /* --------------- check the input --------------- */ if ( frontmtx == NULL || mtxA == NULL || rowsIVL == NULL || (msglvl > 0 && msgFile == NULL) ) { fprintf(stderr, "\n fatal error in FrontMtx_QR_assembleFront()" "\n bad input\n") ; exit(-1) ; } if ( msglvl > 3 ) { fprintf(msgFile, "\n\n inside FrontMtx_QR_assembleFront(%d)", J) ; fflush(msgFile) ; } /* -------------------------------------------------- set up the map from global to local column indices -------------------------------------------------- */ FrontMtx_columnIndices(frontmtx, J, &ncolJ, &colindJ) ; for ( jcol = 0 ; jcol < ncolJ ; jcol++ ) { colmap[colindJ[jcol]] = jcol ; } if ( msglvl > 3 ) { fprintf(msgFile, "\n front %d's column indices", J) ; IVfprintf(msgFile, ncolJ, colindJ) ; fflush(msgFile) ; } /* ------------------------------------------------- compute the size of the front and map the global indices of the update matrices into local indices ------------------------------------------------- */ IVL_listAndSize(rowsIVL, J, &nrowFromA, &rowsFromA) ; nrowJ = nrowFromA ; if ( msglvl > 3 ) { fprintf(msgFile, "\n %d rows from A", nrowFromA) ; fflush(msgFile) ; } for ( chvI = firstchild ; chvI != NULL ; chvI = chvI->next ) { nrowJ += chvI->nD ; Chv_columnIndices(chvI, &ncolI, &colindI) ; for ( jcol = 0 ; jcol < ncolI ; jcol++ ) { colindI[jcol] = colmap[colindI[jcol]] ; } if ( msglvl > 3 ) { fprintf(msgFile, "\n %d rows from child %d", chvI->nD, chvI->id) ; fflush(msgFile) ; } } /* ---------------------------------------------------------- get workspace for the rowids[nrowJ] and map[nrowJ] vectors ---------------------------------------------------------- */ if ( sizeof(int) == sizeof(double) ) { DV_setSize(workDV, 2*nrowJ) ; } else if ( 2*sizeof(int) == sizeof(double) ) { DV_setSize(workDV, nrowJ) ; } rowids = (int *) DV_entries(workDV) ; map = rowids + nrowJ ; IVramp(nrowJ, rowids, 0, 1) ; IVfill(nrowJ, map, -1) ; /* ----------------------------------------------------------------- get the map from incoming rows to their place in the front matrix ----------------------------------------------------------------- */ for ( irow = jrow = 0 ; irow < nrowFromA ; irow++, jrow++ ) { irowA = rowsFromA[irow] ; map[jrow] = colmap[firstnz[irowA]] ; } for ( chvI = firstchild ; chvI != NULL ; chvI = chvI->next ) { nrowI = chvI->nD ; Chv_columnIndices(chvI, &ncolI, &colindI) ; for ( irow = 0 ; irow < nrowI ; irow++, jrow++ ) { map[jrow] = colindI[irow] ; } } IV2qsortUp(nrowJ, map, rowids) ; for ( irow = 0 ; irow < nrowJ ; irow++ ) { map[rowids[irow]] = irow ; } /* ---------------------------- allocate the A2 front object ---------------------------- */ frontJ = A2_new() ; A2_init(frontJ, frontmtx->type, nrowJ, ncolJ, ncolJ, 1, NULL) ; A2_zero(frontJ) ; /* ------------------------------------ load the original rows of the matrix ------------------------------------ */ for ( jrow = 0 ; jrow < nrowFromA ; jrow++ ) { irowA = rowsFromA[jrow] ; rowJ = A2_row(frontJ, map[jrow]) ; if ( A2_IS_REAL(frontJ) ) { InpMtx_realVector(mtxA, irowA, &nentA, &colindA, &rowentA) ; } else if ( A2_IS_COMPLEX(frontJ) ) { InpMtx_complexVector(mtxA, irowA, &nentA, &colindA, &rowentA) ; } if ( msglvl > 3 ) { fprintf(msgFile, "\n loading row %d", irowA) ; fprintf(msgFile, "\n global indices") ; IVfprintf(msgFile, nentA, colindA) ; fflush(msgFile) ; } if ( A2_IS_REAL(frontJ) ) { for ( ii = 0 ; ii < nentA ; ii++ ) { jj = colmap[colindA[ii]] ; rowJ[jj] = rowentA[ii] ; } } else if ( A2_IS_COMPLEX(frontJ) ) { for ( ii = 0 ; ii < nentA ; ii++ ) { jj = colmap[colindA[ii]] ; rowJ[2*jj] = rowentA[2*ii] ; rowJ[2*jj+1] = rowentA[2*ii+1] ; } } } if ( msglvl > 3 ) { fprintf(msgFile, "\n after assembling rows of A") ; A2_writeForHumanEye(frontJ, msgFile) ; fflush(msgFile) ; } /* ---------------------------------- load the updates from the children ---------------------------------- */ for ( chvI = firstchild ; chvI != NULL ; chvI = chvI->next ) { nrowI = chvI->nD ; Chv_columnIndices(chvI, &ncolI, &colindI) ; if ( msglvl > 3 ) { fprintf(msgFile, "\n loading child %d", chvI->id) ; fprintf(msgFile, "\n child's column indices") ; IVfprintf(msgFile, ncolI, colindI) ; Chv_writeForHumanEye(chvI, msgFile) ; fflush(msgFile) ; } rowI = Chv_entries(chvI) ; for ( irowI = 0 ; irowI < nrowI ; irowI++, jrow++ ) { rowJ = A2_row(frontJ, map[jrow]) ; if ( A2_IS_REAL(frontJ) ) { for ( ii = irowI ; ii < ncolI ; ii++ ) { jj = colindI[ii] ; rowJ[jj] = rowI[ii] ; } rowI += ncolI - irowI - 1 ; } else if ( A2_IS_COMPLEX(frontJ) ) { for ( ii = irowI ; ii < ncolI ; ii++ ) { jj = colindI[ii] ; rowJ[2*jj] = rowI[2*ii] ; rowJ[2*jj+1] = rowI[2*ii+1] ; } rowI += 2*(ncolI - irowI - 1) ; } } if ( msglvl > 3 ) { fprintf(msgFile, "\n after assembling child %d", chvI->id) ; A2_writeForHumanEye(frontJ, msgFile) ; fflush(msgFile) ; } } return(frontJ) ; }
/* -------------------------------------------- purpose -- to solve the linear system MPI version if permuteflag is 1 then rhs is permuted into new ordering solution is permuted into old ordering return value --- 1 -- normal return -1 -- bridge is NULL -2 -- X is NULL -3 -- Y is NULL -4 -- frontmtx is NULL -5 -- mtxmanager is NULL -6 -- oldToNewIV not available -7 -- newToOldIV not available created -- 98sep18, cca -------------------------------------------- */ int BridgeMPI_solve ( BridgeMPI *bridge, int permuteflag, DenseMtx *X, DenseMtx *Y ) { DenseMtx *Xloc, *Yloc ; double cputotal, t0, t1, t2 ; double cpus[6] ; FILE *msgFile ; FrontMtx *frontmtx ; int firsttag, msglvl, myid, nmycol, nrhs, nrow ; int *mycolind, *rowind ; int stats[4] ; IV *mapIV, *ownersIV ; MPI_Comm comm ; SubMtxManager *mtxmanager ; /* --------------- check the input --------------- */ MARKTIME(t0) ; if ( bridge == NULL ) { fprintf(stderr, "\n error in BridgeMPI_solve" "\n bridge is NULL\n") ; return(-1) ; } if ( (frontmtx = bridge->frontmtx) == NULL ) { fprintf(stderr, "\n error in BridgeMPI_solve" "\n frontmtx is NULL\n") ; return(-4) ; } if ( (mtxmanager = bridge->mtxmanager) == NULL ) { fprintf(stderr, "\n error in BridgeMPI_solve" "\n mtxmanager is NULL\n") ; return(-5) ; } myid = bridge->myid ; comm = bridge->comm ; msglvl = bridge->msglvl ; msgFile = bridge->msgFile ; frontmtx = bridge->frontmtx ; ownersIV = bridge->ownersIV ; Xloc = bridge->Xloc ; Yloc = bridge->Yloc ; if ( myid != 0 ) { X = Y = NULL ; } else { if ( X == NULL ) { fprintf(stderr, "\n error in BridgeMPI_solve" "\n myid 0, X is NULL\n") ; return(-2) ; } if ( Y == NULL ) { fprintf(stderr, "\n error in BridgeMPI_solve" "\n myid 0, Y is NULL\n") ; return(-3) ; } } if ( msglvl > 2 ) { fprintf(msgFile, "\n\n inside BridgeMPI_solve()") ; fflush(msgFile) ; } if ( msglvl > 2 ) { fprintf(msgFile , "\n\n Xloc") ; DenseMtx_writeForHumanEye(Xloc, msgFile) ; fprintf(msgFile , "\n\n Yloc") ; DenseMtx_writeForHumanEye(Yloc, msgFile) ; fflush(msgFile) ; } /*--------------------------------------------------------------------*/ if ( myid == 0 ) { /* -------------------------- optionally permute the rhs -------------------------- */ if ( permuteflag == 1 ) { int rc ; IV *oldToNewIV ; MARKTIME(t1) ; rc = BridgeMPI_oldToNewIV(bridge, &oldToNewIV) ; if (rc != 1) { fprintf(stderr, "\n error in BridgeMPI_solve()" "\n rc = %d from BridgeMPI_oldToNewIV()\n", rc) ; return(-6) ; } DenseMtx_permuteRows(Y, oldToNewIV) ; MARKTIME(t2) ; bridge->cpus[15] += t2 - t1 ; if ( msglvl > 2 ) { fprintf(msgFile , "\n\n permuted Y") ; DenseMtx_writeForHumanEye(Y, msgFile) ; fflush(msgFile) ; } } } /*--------------------------------------------------------------------*/ /* ------------------------------------- distribute the right hand side matrix ------------------------------------- */ MARKTIME(t1) ; mapIV = bridge->rowmapIV ; if ( msglvl > 2 ) { fprintf(msgFile, "\n\n row map IV object") ; IV_writeForHumanEye(mapIV, msgFile) ; fflush(msgFile) ; } if ( myid == 0 ) { nrhs = Y->ncol ; } else { nrhs = 0 ; } MPI_Bcast((void *) &nrhs, 1, MPI_INT, 0, comm) ; firsttag = 0 ; IVfill(4, stats, 0) ; DenseMtx_MPI_splitFromGlobalByRows(Y, Yloc, mapIV, 0, stats, msglvl, msgFile, firsttag, comm) ; MARKTIME(t2) ; bridge->cpus[16] += t2 - t1 ; if ( msglvl > 2 ) { fprintf(msgFile, "\n\n local matrix Y after the split") ; DenseMtx_writeForHumanEye(Yloc, msgFile) ; fflush(msgFile) ; } /*--------------------------------------------------------------------*/ /* -------------------------------------- initialize the local solution X object -------------------------------------- */ MARKTIME(t1) ; IV_sizeAndEntries(bridge->ownedColumnsIV, &nmycol, &mycolind) ; DenseMtx_init(Xloc, bridge->type, -1, -1, nmycol, nrhs, 1, nmycol) ; if ( nmycol > 0 ) { DenseMtx_rowIndices(Xloc, &nrow, &rowind) ; IVcopy(nmycol, rowind, mycolind) ; if ( msglvl > 2 ) { fprintf(msgFile, "\n\n local matrix X") ; DenseMtx_writeForHumanEye(Xloc, msgFile) ; fflush(msgFile) ; } } MARKTIME(t2) ; bridge->cpus[17] += t2 - t1 ; /*--------------------------------------------------------------------*/ /* ---------------- solve the system ---------------- */ MARKTIME(t1) ; DVzero(6, cpus) ; FrontMtx_MPI_solve(frontmtx, Xloc, Yloc, mtxmanager, bridge->solvemap, cpus, stats, msglvl, msgFile, firsttag, comm) ; MARKTIME(t2) ; bridge->cpus[18] += t2 - t1 ; if ( msglvl > 1 ) { fprintf(msgFile, "\n\n CPU %8.3f : solve the system", t2 - t1) ; } cputotal = t2 - t1 ; if ( cputotal > 0.0 ) { fprintf(msgFile, "\n set up solves %8.3f %6.2f" "\n load rhs and store solution %8.3f %6.2f" "\n forward solve %8.3f %6.2f" "\n diagonal solve %8.3f %6.2f" "\n backward solve %8.3f %6.2f" "\n total time %8.3f", cpus[0], 100.*cpus[0]/cputotal, cpus[1], 100.*cpus[1]/cputotal, cpus[2], 100.*cpus[2]/cputotal, cpus[3], 100.*cpus[3]/cputotal, cpus[4], 100.*cpus[4]/cputotal, cputotal) ; } if ( msglvl > 3 ) { fprintf(msgFile, "\n\n computed solution") ; DenseMtx_writeForHumanEye(Xloc, msgFile) ; fflush(stdout) ; } /*--------------------------------------------------------------------*/ /* ------------------------------------- gather the solution on processor zero ------------------------------------- */ MARKTIME(t1) ; DenseMtx_MPI_mergeToGlobalByRows(X, Xloc, 0, stats, msglvl, msgFile, firsttag, comm) ; MARKTIME(t2) ; bridge->cpus[19] += t2 - t1 ; if ( myid == 0 ) { if ( msglvl > 2 ) { fprintf(msgFile, "\n\n global matrix X in new ordering") ; DenseMtx_writeForHumanEye(X, msgFile) ; fflush(msgFile) ; } } /*--------------------------------------------------------------------*/ /* ------------------------------- optionally permute the solution ------------------------------- */ if ( myid == 0 ) { if ( permuteflag == 1 ) { int rc ; IV *newToOldIV ; rc = BridgeMPI_newToOldIV(bridge, &newToOldIV) ; if (rc != 1) { fprintf(stderr, "\n error in BridgeMPI_solve()" "\n rc = %d from BridgeMPI_newToOldIV()\n", rc) ; return(-7) ; } DenseMtx_permuteRows(X, newToOldIV) ; } MARKTIME(t2) ; bridge->cpus[20] += t2 - t1 ; if ( msglvl > 2 ) { fprintf(msgFile, "\n\n global matrix X in old ordering") ; DenseMtx_writeForHumanEye(X, msgFile) ; fflush(msgFile) ; } } MARKTIME(t2) ; bridge->cpus[21] += t2 - t0 ; return(1) ; }
//static void factor_MT(struct factorinfo *pfi, InpMtx *mtxA, int size, FILE *msgFile, int symmetryflag) void factor_MT(struct factorinfo *pfi, InpMtx *mtxA, int size, FILE *msgFile, int symmetryflag) { Graph *graph; IV *ownersIV; IVL *symbfacIVL; Chv *rootchv; /* Initialize pfi: */ pfi->size = size; pfi->msgFile = msgFile; DVfill(10, pfi->cpus, 0.0); /* * STEP 1 : find a low-fill ordering * (1) create the Graph object */ ssolve_creategraph(&graph, &pfi->frontETree, mtxA, size, msgFile); /* * STEP 2: get the permutation, permute the matrix and * front tree and get the symbolic factorization */ ssolve_permuteA(&pfi->oldToNewIV, &pfi->newToOldIV, &symbfacIVL, pfi->frontETree, mtxA, msgFile, symmetryflag); /* * STEP 3: Prepare distribution to multiple threads/cpus */ { DV *cumopsDV; int nfront; nfront = ETree_nfront(pfi->frontETree); pfi->nthread = num_cpus; if (pfi->nthread > nfront) pfi->nthread = nfront; cumopsDV = DV_new(); DV_init(cumopsDV, pfi->nthread, NULL); ownersIV = ETree_ddMap(pfi->frontETree, SPOOLES_REAL, symmetryflag, cumopsDV, 1. / (2. * pfi->nthread)); if (DEBUG_LVL > 1) { fprintf(msgFile, "\n\n map from fronts to threads"); IV_writeForHumanEye(ownersIV, msgFile); fprintf(msgFile, "\n\n factor operations for each front"); DV_writeForHumanEye(cumopsDV, msgFile); fflush(msgFile); } else { fprintf(msgFile, "\n\n Using %d threads\n", pfi->nthread); } DV_free(cumopsDV); } /* * STEP 4: initialize the front matrix object */ { pfi->frontmtx = FrontMtx_new(); pfi->mtxmanager = SubMtxManager_new(); SubMtxManager_init(pfi->mtxmanager, LOCK_IN_PROCESS, 0); FrontMtx_init(pfi->frontmtx, pfi->frontETree, symbfacIVL, SPOOLES_REAL, symmetryflag, FRONTMTX_DENSE_FRONTS, SPOOLES_PIVOTING, LOCK_IN_PROCESS, 0, NULL, pfi->mtxmanager, DEBUG_LVL, pfi->msgFile); } /* * STEP 5: compute the numeric factorization in parallel */ { ChvManager *chvmanager; int stats[20]; int error; chvmanager = ChvManager_new(); ChvManager_init(chvmanager, LOCK_IN_PROCESS, 1); IVfill(20, stats, 0); rootchv = FrontMtx_MT_factorInpMtx(pfi->frontmtx, mtxA, MAGIC_TAU, MAGIC_DTOL, chvmanager, ownersIV, 0, &error, pfi->cpus, stats, DEBUG_LVL, pfi->msgFile); ChvManager_free(chvmanager); if (DEBUG_LVL > 1) { fprintf(msgFile, "\n\n factor matrix"); FrontMtx_writeForHumanEye(pfi->frontmtx, pfi->msgFile); fflush(pfi->msgFile); } if (rootchv != NULL) { fprintf(pfi->msgFile, "\n\n matrix found to be singular\n"); exit(-1); } if (error >= 0) { fprintf(pfi->msgFile, "\n\n fatal error at front %d", error); exit(-1); } } /* * STEP 6: post-process the factorization */ ssolve_postfactor(pfi->frontmtx, pfi->msgFile); /* * STEP 7: get the solve map object for the parallel solve */ { pfi->solvemap = SolveMap_new(); SolveMap_ddMap(pfi->solvemap, symmetryflag, FrontMtx_upperBlockIVL(pfi->frontmtx), FrontMtx_lowerBlockIVL(pfi->frontmtx), pfi->nthread, ownersIV, FrontMtx_frontTree(pfi->frontmtx), RNDSEED, DEBUG_LVL, pfi->msgFile); } /* cleanup: */ InpMtx_free(mtxA); IVL_free(symbfacIVL); Graph_free(graph); IV_free(ownersIV); }
void factor(struct factorinfo *pfi, InpMtx *mtxA, int size, FILE *msgFile, int symmetryflag) { Graph *graph; IVL *symbfacIVL; Chv *rootchv; /* Initialize pfi: */ pfi->size = size; pfi->msgFile = msgFile; pfi->solvemap = NULL; DVfill(10, pfi->cpus, 0.0); /* * STEP 1 : find a low-fill ordering * (1) create the Graph object */ ssolve_creategraph(&graph, &pfi->frontETree, mtxA, size, pfi->msgFile); /* * STEP 2: get the permutation, permute the matrix and * front tree and get the symbolic factorization */ ssolve_permuteA(&pfi->oldToNewIV, &pfi->newToOldIV, &symbfacIVL, pfi->frontETree, mtxA, pfi->msgFile, symmetryflag); /* * STEP 3: initialize the front matrix object */ { pfi->frontmtx = FrontMtx_new(); pfi->mtxmanager = SubMtxManager_new(); SubMtxManager_init(pfi->mtxmanager, NO_LOCK, 0); FrontMtx_init(pfi->frontmtx, pfi->frontETree, symbfacIVL, SPOOLES_REAL, symmetryflag, FRONTMTX_DENSE_FRONTS, SPOOLES_PIVOTING, NO_LOCK, 0, NULL, pfi->mtxmanager, DEBUG_LVL, pfi->msgFile); } /* * STEP 4: compute the numeric factorization */ { ChvManager *chvmanager; int stats[20]; int error; chvmanager = ChvManager_new(); ChvManager_init(chvmanager, NO_LOCK, 1); IVfill(20, stats, 0); rootchv = FrontMtx_factorInpMtx(pfi->frontmtx, mtxA, MAGIC_TAU, MAGIC_DTOL, chvmanager, &error, pfi->cpus, stats, DEBUG_LVL, pfi->msgFile); ChvManager_free(chvmanager); if (DEBUG_LVL > 1) { fprintf(msgFile, "\n\n factor matrix"); FrontMtx_writeForHumanEye(pfi->frontmtx, pfi->msgFile); fflush(msgFile); } if (rootchv != NULL) { fprintf(pfi->msgFile, "\n\n matrix found to be singular\n"); exit(-1); } if (error >= 0) { fprintf(pfi->msgFile, "\n\nerror encountered at front %d", error); exit(-1); } } /* * STEP 5: post-process the factorization */ ssolve_postfactor(pfi->frontmtx, pfi->msgFile); /* cleanup: */ IVL_free(symbfacIVL); InpMtx_free(mtxA); Graph_free(graph); }
/* --------------------------------------------------------------------- purpose -- to compute the factorization of A - sigma * B note: all variables in the calling sequence are references to allow call from fortran. input parameters data -- pointer to bridge data object psigma -- shift for the matrix pencil ppvttol -- pivot tolerance *ppvttol = 0.0 --> no pivoting used *ppvttol != 0.0 --> pivoting used, entries in factor are bounded above by 1/pvttol in magnitude output parameters *pinertia -- on return contains the number of negative eigenvalues *perror -- on return contains an error code 1 -- error found during factorization 0 -- normal return -1 -- psigma is NULL -2 -- ppvttol is NULL -3 -- data is NULL -4 -- pinertia is NULL created -- 98aug10, cca & jcp --------------------------------------------------------------------- */ void Factor ( double *psigma, double *ppvttol, void *data, int *pinertia, int *perror ) { Bridge *bridge = (Bridge *) data ; Chv *rootchv ; ChvManager *chvmanager ; double droptol=0.0, tau ; double cpus[10] ; int stats[20] ; int nnegative, nzero, npositive, pivotingflag ; #if MYDEBUG > 0 double t1, t2 ; MARKTIME(t1) ; count_Factor++ ; fprintf(stdout, "\n (%d) Factor()", count_Factor) ; fflush(stdout) ; #endif /* --------------- check the input --------------- */ if ( psigma == NULL ) { fprintf(stderr, "\n error in Factor()" "\n psigma is NULL\n") ; *perror = -1 ; return ; } if ( ppvttol == NULL ) { fprintf(stderr, "\n error in Factor()" "\n ppvttol is NULL\n") ; *perror = -2 ; return ; } if ( data == NULL ) { fprintf(stderr, "\n error in Factor()" "\n data is NULL\n") ; *perror = -3 ; return ; } if ( pinertia == NULL ) { fprintf(stderr, "\n error in Factor()" "\n pinertia is NULL\n") ; *perror = -4 ; return ; } if ( perror == NULL ) { fprintf(stderr, "\n error in Factor()" "\n perror is NULL\n") ; return ; } /* ---------------------------------- set the shift in the pencil object ---------------------------------- */ bridge->pencil->sigma[0] = -(*psigma) ; bridge->pencil->sigma[1] = 0.0 ; /* ----------------------------------------------------- clear the front matrix and submatrix mananger objects ----------------------------------------------------- */ FrontMtx_clearData(bridge->frontmtx); SubMtxManager_clearData(bridge->mtxmanager); /* ----------------------------------------------------------- set the pivot tolerance. NOTE: spooles's "tau" parameter is a bound on the magnitude of the factor entries, and is the recipricol of that of the pivot tolerance of the lanczos code ----------------------------------------------------------- */ if ( *ppvttol == 0.0 ) { tau = 10.0 ; pivotingflag = SPOOLES_NO_PIVOTING ; } else { tau = (1.0)/(*ppvttol) ; pivotingflag = SPOOLES_PIVOTING ; } /* ---------------------------------- initialize the front matrix object ---------------------------------- */ FrontMtx_init(bridge->frontmtx, bridge->frontETree, bridge->symbfacIVL, SPOOLES_REAL, SPOOLES_SYMMETRIC, FRONTMTX_DENSE_FRONTS, pivotingflag, NO_LOCK, 0, NULL, bridge->mtxmanager, bridge->msglvl, bridge->msgFile) ; /* ------------------------- compute the factorization ------------------------- */ chvmanager = ChvManager_new() ; ChvManager_init(chvmanager, NO_LOCK, 1); IVfill(20, stats, 0) ; DVfill(10, cpus, 0.0) ; rootchv = FrontMtx_factorPencil(bridge->frontmtx, bridge->pencil, tau, droptol, chvmanager, perror, cpus, stats, bridge->msglvl, bridge->msgFile); ChvManager_free(chvmanager); /* ---------------------------- if matrix is singular then set error flag and return ---------------------------- */ if ( rootchv != NULL ) { *perror = 1 ; return ; } /* ------------------------------------------------------------------ post-process the factor matrix, convert from fronts to submatrices ------------------------------------------------------------------ */ FrontMtx_postProcess(bridge->frontmtx, bridge->msglvl, bridge->msgFile); /* ------------------- compute the inertia ------------------- */ FrontMtx_inertia(bridge->frontmtx, &nnegative, &nzero, &npositive) ; *pinertia = nnegative; /* ------------------------------------------------------------------ set the error. (this is simple since when the spooles codes detect a fatal error, they print out a message to stderr and exit.) ------------------------------------------------------------------ */ *perror = 0 ; #if MYDEBUG > 0 MARKTIME(t2) ; time_Factor += t2 - t1 ; fprintf(stdout, ", %8.3f seconds, %8.3f total time", t2 - t1, time_Factor) ; fflush(stdout) ; #endif return ; }
PetscErrorCode MatFactorNumeric_SeqSpooles(Mat F,Mat A,const MatFactorInfo *info) { Mat_Spooles *lu = (Mat_Spooles*)(F)->spptr; ChvManager *chvmanager ; Chv *rootchv ; IVL *adjIVL; PetscErrorCode ierr; PetscInt nz,nrow=A->rmap->n,irow,nedges,neqns=A->cmap->n,*ai,*aj,i,*diag=0,fierr; PetscScalar *av; double cputotal,facops; #if defined(PETSC_USE_COMPLEX) PetscInt nz_row,*aj_tmp; PetscScalar *av_tmp; #else PetscInt *ivec1,*ivec2,j; double *dvec; #endif PetscBool isSeqAIJ,isMPIAIJ; PetscFunctionBegin; if (lu->flg == DIFFERENT_NONZERO_PATTERN) { /* first numeric factorization */ (F)->ops->solve = MatSolve_SeqSpooles; (F)->assembled = PETSC_TRUE; /* set Spooles options */ ierr = SetSpoolesOptions(A, &lu->options);CHKERRQ(ierr); lu->mtxA = InpMtx_new(); } /* copy A to Spooles' InpMtx object */ ierr = PetscObjectTypeCompare((PetscObject)A,MATSEQAIJ,&isSeqAIJ);CHKERRQ(ierr); ierr = PetscObjectTypeCompare((PetscObject)A,MATSEQAIJ,&isMPIAIJ);CHKERRQ(ierr); if (isSeqAIJ){ Mat_SeqAIJ *mat = (Mat_SeqAIJ*)A->data; ai=mat->i; aj=mat->j; av=mat->a; if (lu->options.symflag == SPOOLES_NONSYMMETRIC) { nz=mat->nz; } else { /* SPOOLES_SYMMETRIC || SPOOLES_HERMITIAN */ nz=(mat->nz + A->rmap->n)/2; diag=mat->diag; } } else { /* A is SBAIJ */ Mat_SeqSBAIJ *mat = (Mat_SeqSBAIJ*)A->data; ai=mat->i; aj=mat->j; av=mat->a; nz=mat->nz; } InpMtx_init(lu->mtxA, INPMTX_BY_ROWS, lu->options.typeflag, nz, 0); #if defined(PETSC_USE_COMPLEX) for (irow=0; irow<nrow; irow++) { if ( lu->options.symflag == SPOOLES_NONSYMMETRIC || !(isSeqAIJ || isMPIAIJ)){ nz_row = ai[irow+1] - ai[irow]; aj_tmp = aj + ai[irow]; av_tmp = av + ai[irow]; } else { nz_row = ai[irow+1] - diag[irow]; aj_tmp = aj + diag[irow]; av_tmp = av + diag[irow]; } for (i=0; i<nz_row; i++){ InpMtx_inputComplexEntry(lu->mtxA, irow, *aj_tmp++,PetscRealPart(*av_tmp),PetscImaginaryPart(*av_tmp)); av_tmp++; } } #else ivec1 = InpMtx_ivec1(lu->mtxA); ivec2 = InpMtx_ivec2(lu->mtxA); dvec = InpMtx_dvec(lu->mtxA); if ( lu->options.symflag == SPOOLES_NONSYMMETRIC || !isSeqAIJ){ for (irow = 0; irow < nrow; irow++){ for (i = ai[irow]; i<ai[irow+1]; i++) ivec1[i] = irow; } IVcopy(nz, ivec2, aj); DVcopy(nz, dvec, av); } else { nz = 0; for (irow = 0; irow < nrow; irow++){ for (j = diag[irow]; j<ai[irow+1]; j++) { ivec1[nz] = irow; ivec2[nz] = aj[j]; dvec[nz] = av[j]; nz++; } } } InpMtx_inputRealTriples(lu->mtxA, nz, ivec1, ivec2, dvec); #endif InpMtx_changeStorageMode(lu->mtxA, INPMTX_BY_VECTORS); if ( lu->options.msglvl > 0 ) { int err; printf("\n\n input matrix"); ierr = PetscFPrintf(PETSC_COMM_SELF,lu->options.msgFile, "\n\n input matrix");CHKERRQ(ierr); InpMtx_writeForHumanEye(lu->mtxA, lu->options.msgFile); err = fflush(lu->options.msgFile); if (err) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SYS,"fflush() failed on file"); } if ( lu->flg == DIFFERENT_NONZERO_PATTERN){ /* first numeric factorization */ /*--------------------------------------------------- find a low-fill ordering (1) create the Graph object (2) order the graph -------------------------------------------------------*/ if (lu->options.useQR){ adjIVL = InpMtx_adjForATA(lu->mtxA); } else { adjIVL = InpMtx_fullAdjacency(lu->mtxA); } nedges = IVL_tsize(adjIVL); lu->graph = Graph_new(); Graph_init2(lu->graph, 0, neqns, 0, nedges, neqns, nedges, adjIVL, NULL, NULL); if ( lu->options.msglvl > 2 ) { int err; if (lu->options.useQR){ ierr = PetscFPrintf(PETSC_COMM_SELF,lu->options.msgFile, "\n\n graph of A^T A");CHKERRQ(ierr); } else { ierr = PetscFPrintf(PETSC_COMM_SELF,lu->options.msgFile, "\n\n graph of the input matrix");CHKERRQ(ierr); } Graph_writeForHumanEye(lu->graph, lu->options.msgFile); err = fflush(lu->options.msgFile); if (err) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SYS,"fflush() failed on file"); } switch (lu->options.ordering) { case 0: lu->frontETree = orderViaBestOfNDandMS(lu->graph, lu->options.maxdomainsize, lu->options.maxzeros, lu->options.maxsize, lu->options.seed, lu->options.msglvl, lu->options.msgFile); break; case 1: lu->frontETree = orderViaMMD(lu->graph,lu->options.seed,lu->options.msglvl,lu->options.msgFile); break; case 2: lu->frontETree = orderViaMS(lu->graph, lu->options.maxdomainsize, lu->options.seed,lu->options.msglvl,lu->options.msgFile); break; case 3: lu->frontETree = orderViaND(lu->graph, lu->options.maxdomainsize, lu->options.seed,lu->options.msglvl,lu->options.msgFile); break; default: SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONG,"Unknown Spooles's ordering"); } if ( lu->options.msglvl > 0 ) { int err; ierr = PetscFPrintf(PETSC_COMM_SELF,lu->options.msgFile, "\n\n front tree from ordering");CHKERRQ(ierr); ETree_writeForHumanEye(lu->frontETree, lu->options.msgFile); err = fflush(lu->options.msgFile); if (err) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SYS,"fflush() failed on file"); } /* get the permutation, permute the front tree */ lu->oldToNewIV = ETree_oldToNewVtxPerm(lu->frontETree); lu->oldToNew = IV_entries(lu->oldToNewIV); lu->newToOldIV = ETree_newToOldVtxPerm(lu->frontETree); if (!lu->options.useQR) ETree_permuteVertices(lu->frontETree, lu->oldToNewIV); /* permute the matrix */ if (lu->options.useQR){ InpMtx_permute(lu->mtxA, NULL, lu->oldToNew); } else { InpMtx_permute(lu->mtxA, lu->oldToNew, lu->oldToNew); if ( lu->options.symflag == SPOOLES_SYMMETRIC) { InpMtx_mapToUpperTriangle(lu->mtxA); } #if defined(PETSC_USE_COMPLEX) if ( lu->options.symflag == SPOOLES_HERMITIAN ) { InpMtx_mapToUpperTriangleH(lu->mtxA); } #endif InpMtx_changeCoordType(lu->mtxA, INPMTX_BY_CHEVRONS); } InpMtx_changeStorageMode(lu->mtxA, INPMTX_BY_VECTORS); /* get symbolic factorization */ if (lu->options.useQR){ lu->symbfacIVL = SymbFac_initFromGraph(lu->frontETree, lu->graph); IVL_overwrite(lu->symbfacIVL, lu->oldToNewIV); IVL_sortUp(lu->symbfacIVL); ETree_permuteVertices(lu->frontETree, lu->oldToNewIV); } else { lu->symbfacIVL = SymbFac_initFromInpMtx(lu->frontETree, lu->mtxA); } if ( lu->options.msglvl > 2 ) { int err; ierr = PetscFPrintf(PETSC_COMM_SELF,lu->options.msgFile, "\n\n old-to-new permutation vector");CHKERRQ(ierr); IV_writeForHumanEye(lu->oldToNewIV, lu->options.msgFile); ierr = PetscFPrintf(PETSC_COMM_SELF,lu->options.msgFile, "\n\n new-to-old permutation vector");CHKERRQ(ierr); IV_writeForHumanEye(lu->newToOldIV, lu->options.msgFile); ierr = PetscFPrintf(PETSC_COMM_SELF,lu->options.msgFile, "\n\n front tree after permutation");CHKERRQ(ierr); ETree_writeForHumanEye(lu->frontETree, lu->options.msgFile); ierr = PetscFPrintf(PETSC_COMM_SELF,lu->options.msgFile, "\n\n input matrix after permutation");CHKERRQ(ierr); InpMtx_writeForHumanEye(lu->mtxA, lu->options.msgFile); ierr = PetscFPrintf(PETSC_COMM_SELF,lu->options.msgFile, "\n\n symbolic factorization");CHKERRQ(ierr); IVL_writeForHumanEye(lu->symbfacIVL, lu->options.msgFile); err = fflush(lu->options.msgFile); if (err) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SYS,"fflush() failed on file"); } lu->frontmtx = FrontMtx_new(); lu->mtxmanager = SubMtxManager_new(); SubMtxManager_init(lu->mtxmanager, NO_LOCK, 0); } else { /* new num factorization using previously computed symbolic factor */ if (lu->options.pivotingflag) { /* different FrontMtx is required */ FrontMtx_free(lu->frontmtx); lu->frontmtx = FrontMtx_new(); } else { FrontMtx_clearData (lu->frontmtx); } SubMtxManager_free(lu->mtxmanager); lu->mtxmanager = SubMtxManager_new(); SubMtxManager_init(lu->mtxmanager, NO_LOCK, 0); /* permute mtxA */ if (lu->options.useQR){ InpMtx_permute(lu->mtxA, NULL, lu->oldToNew); } else { InpMtx_permute(lu->mtxA, lu->oldToNew, lu->oldToNew); if ( lu->options.symflag == SPOOLES_SYMMETRIC ) { InpMtx_mapToUpperTriangle(lu->mtxA); } InpMtx_changeCoordType(lu->mtxA, INPMTX_BY_CHEVRONS); } InpMtx_changeStorageMode(lu->mtxA, INPMTX_BY_VECTORS); if ( lu->options.msglvl > 2 ) { ierr = PetscFPrintf(PETSC_COMM_SELF,lu->options.msgFile, "\n\n input matrix after permutation");CHKERRQ(ierr); InpMtx_writeForHumanEye(lu->mtxA, lu->options.msgFile); } } /* end of if( lu->flg == DIFFERENT_NONZERO_PATTERN) */ if (lu->options.useQR){ FrontMtx_init(lu->frontmtx, lu->frontETree, lu->symbfacIVL, lu->options.typeflag, SPOOLES_SYMMETRIC, FRONTMTX_DENSE_FRONTS, SPOOLES_NO_PIVOTING, NO_LOCK, 0, NULL, lu->mtxmanager, lu->options.msglvl, lu->options.msgFile); } else { FrontMtx_init(lu->frontmtx, lu->frontETree, lu->symbfacIVL, lu->options.typeflag, lu->options.symflag, FRONTMTX_DENSE_FRONTS, lu->options.pivotingflag, NO_LOCK, 0, NULL, lu->mtxmanager, lu->options.msglvl, lu->options.msgFile); } if ( lu->options.symflag == SPOOLES_SYMMETRIC ) { /* || SPOOLES_HERMITIAN ? */ if ( lu->options.patchAndGoFlag == 1 ) { lu->frontmtx->patchinfo = PatchAndGoInfo_new(); PatchAndGoInfo_init(lu->frontmtx->patchinfo, 1, lu->options.toosmall, lu->options.fudge, lu->options.storeids, lu->options.storevalues); } else if ( lu->options.patchAndGoFlag == 2 ) { lu->frontmtx->patchinfo = PatchAndGoInfo_new(); PatchAndGoInfo_init(lu->frontmtx->patchinfo, 2, lu->options.toosmall, lu->options.fudge, lu->options.storeids, lu->options.storevalues); } } /* numerical factorization */ chvmanager = ChvManager_new(); ChvManager_init(chvmanager, NO_LOCK, 1); DVfill(10, lu->cpus, 0.0); if (lu->options.useQR){ facops = 0.0 ; FrontMtx_QR_factor(lu->frontmtx, lu->mtxA, chvmanager, lu->cpus, &facops, lu->options.msglvl, lu->options.msgFile); if ( lu->options.msglvl > 1 ) { ierr = PetscFPrintf(PETSC_COMM_SELF,lu->options.msgFile, "\n\n factor matrix");CHKERRQ(ierr); ierr = PetscFPrintf(PETSC_COMM_SELF,lu->options.msgFile, "\n facops = %9.2f", facops);CHKERRQ(ierr); } } else { IVfill(20, lu->stats, 0); rootchv = FrontMtx_factorInpMtx(lu->frontmtx, lu->mtxA, lu->options.tau, 0.0, chvmanager, &fierr, lu->cpus,lu->stats,lu->options.msglvl,lu->options.msgFile); if (rootchv) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_MAT_LU_ZRPVT,"\n matrix found to be singular"); if (fierr >= 0) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_LIB,"\n error encountered at front %D", fierr); if(lu->options.FrontMtxInfo){ ierr = PetscPrintf(PETSC_COMM_SELF,"\n %8d pivots, %8d pivot tests, %8d delayed rows and columns\n",lu->stats[0], lu->stats[1], lu->stats[2]);CHKERRQ(ierr); cputotal = lu->cpus[8] ; if ( cputotal > 0.0 ) { ierr = PetscPrintf(PETSC_COMM_SELF, "\n cpus cpus/totaltime" "\n initialize fronts %8.3f %6.2f" "\n load original entries %8.3f %6.2f" "\n update fronts %8.3f %6.2f" "\n assemble postponed data %8.3f %6.2f" "\n factor fronts %8.3f %6.2f" "\n extract postponed data %8.3f %6.2f" "\n store factor entries %8.3f %6.2f" "\n miscellaneous %8.3f %6.2f" "\n total time %8.3f \n", lu->cpus[0], 100.*lu->cpus[0]/cputotal, lu->cpus[1], 100.*lu->cpus[1]/cputotal, lu->cpus[2], 100.*lu->cpus[2]/cputotal, lu->cpus[3], 100.*lu->cpus[3]/cputotal, lu->cpus[4], 100.*lu->cpus[4]/cputotal, lu->cpus[5], 100.*lu->cpus[5]/cputotal, lu->cpus[6], 100.*lu->cpus[6]/cputotal, lu->cpus[7], 100.*lu->cpus[7]/cputotal, cputotal);CHKERRQ(ierr); } } } ChvManager_free(chvmanager); if ( lu->options.msglvl > 0 ) { int err; ierr = PetscFPrintf(PETSC_COMM_SELF,lu->options.msgFile, "\n\n factor matrix");CHKERRQ(ierr); FrontMtx_writeForHumanEye(lu->frontmtx, lu->options.msgFile); err = fflush(lu->options.msgFile); if (err) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SYS,"fflush() failed on file"); } if ( lu->options.symflag == SPOOLES_SYMMETRIC ) { /* || SPOOLES_HERMITIAN ? */ if ( lu->options.patchAndGoFlag == 1 ) { if ( lu->frontmtx->patchinfo->fudgeIV != NULL ) { if (lu->options.msglvl > 0 ){ ierr = PetscFPrintf(PETSC_COMM_SELF,lu->options.msgFile, "\n small pivots found at these locations");CHKERRQ(ierr); IV_writeForHumanEye(lu->frontmtx->patchinfo->fudgeIV, lu->options.msgFile); } } PatchAndGoInfo_free(lu->frontmtx->patchinfo); } else if ( lu->options.patchAndGoFlag == 2 ) { if (lu->options.msglvl > 0 ){ if ( lu->frontmtx->patchinfo->fudgeIV != NULL ) { ierr = PetscFPrintf(PETSC_COMM_SELF,lu->options.msgFile, "\n small pivots found at these locations");CHKERRQ(ierr); IV_writeForHumanEye(lu->frontmtx->patchinfo->fudgeIV, lu->options.msgFile); } if ( lu->frontmtx->patchinfo->fudgeDV != NULL ) { ierr = PetscFPrintf(PETSC_COMM_SELF,lu->options.msgFile, "\n perturbations");CHKERRQ(ierr); DV_writeForHumanEye(lu->frontmtx->patchinfo->fudgeDV, lu->options.msgFile); } } PatchAndGoInfo_free(lu->frontmtx->patchinfo); } } /* post-process the factorization */ FrontMtx_postProcess(lu->frontmtx, lu->options.msglvl, lu->options.msgFile); if ( lu->options.msglvl > 2 ) { int err; ierr = PetscFPrintf(PETSC_COMM_SELF,lu->options.msgFile, "\n\n factor matrix after post-processing");CHKERRQ(ierr); FrontMtx_writeForHumanEye(lu->frontmtx, lu->options.msgFile); err = fflush(lu->options.msgFile); if (err) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SYS,"fflush() failed on file"); } lu->flg = SAME_NONZERO_PATTERN; lu->CleanUpSpooles = PETSC_TRUE; PetscFunctionReturn(0); }
/* ------------------------------------------------------------------- make an element graph for a n1 x n2 x n3 grid with ncomp components created -- 95nov03, cca ------------------------------------------------------------------- */ EGraph * EGraph_make27P ( int n1, int n2, int n3, int ncomp ) { EGraph *egraph ; int eid, icomp, ijk, ielem, jelem, kelem, m, nelem, nvtx ; int *list ; /* --------------- check the input --------------- */ if ( n1 <= 0 || n2 <= 0 || n3 <= 0 || ncomp <= 0 ) { fprintf(stderr, "\n fatal error in EGraph_make27P(%d,%d,%d,%d)" "\n bad input\n", n1, n2, n3, ncomp) ; exit(-1) ; } #if MYDEBUG > 0 fprintf(stdout, "\n inside EGraph_make27P(%d,%d,%d,%d)", n1, n2, n3, ncomp) ; fflush(stdout) ; #endif /* ----------------- create the object ----------------- */ nelem = (n1 - 1)*(n2 - 1)*(n3 - 1) ; nvtx = n1*n2*n3*ncomp ; egraph = EGraph_new() ; if ( ncomp == 1 ) { EGraph_init(egraph, 0, nelem, nvtx, IVL_CHUNKED) ; } else { EGraph_init(egraph, 1, nelem, nvtx, IVL_CHUNKED) ; IVfill(nvtx, egraph->vwghts, ncomp) ; } /* ---------------------------- fill the adjacency structure ---------------------------- */ list = IVinit(8*ncomp, -1) ; for ( kelem = 0 ; kelem < n3 - 1 ; kelem++ ) { for ( jelem = 0 ; jelem < n2 - 1 ; jelem++ ) { for ( ielem = 0 ; ielem < n1 - 1 ; ielem++ ) { eid = ielem + jelem*(n1-1) + kelem*(n1-1)*(n2-1); m = 0 ; ijk = ncomp*(ielem + jelem*n1 + kelem*n1*n2) ; for ( icomp = 0 ; icomp < ncomp ; icomp++ ) { list[m++] = ijk++ ; } ijk = ncomp*(ielem + 1 + jelem*n1 + kelem*n1*n2) ; for ( icomp = 0 ; icomp < ncomp ; icomp++ ) { list[m++] = ijk++ ; } ijk = ncomp*(ielem + (jelem+1)*n1 + kelem*n1*n2) ; for ( icomp = 0 ; icomp < ncomp ; icomp++ ) { list[m++] = ijk++ ; } ijk = ncomp*(ielem + 1 + (jelem+1)*n1 + kelem*n1*n2) ; for ( icomp = 0 ; icomp < ncomp ; icomp++ ) { list[m++] = ijk++ ; } ijk = ncomp*(ielem + jelem*n1 + (kelem+1)*n1*n2) ; for ( icomp = 0 ; icomp < ncomp ; icomp++ ) { list[m++] = ijk++ ; } ijk = ncomp*(ielem + 1 + jelem*n1 + (kelem+1)*n1*n2) ; for ( icomp = 0 ; icomp < ncomp ; icomp++ ) { list[m++] = ijk++ ; } ijk = ncomp*(ielem + (jelem+1)*n1 + (kelem+1)*n1*n2) ; for ( icomp = 0 ; icomp < ncomp ; icomp++ ) { list[m++] = ijk++ ; } ijk = ncomp*(ielem + 1 + (jelem+1)*n1 + (kelem+1)*n1*n2) ; for ( icomp = 0 ; icomp < ncomp ; icomp++ ) { list[m++] = ijk++ ; } IVqsortUp(m, list) ; IVL_setList(egraph->adjIVL, eid, m, list) ; } } } IVfree(list) ; return(egraph) ; }
/* -------------------------------------------------------------------- fill *pndom with ndom, the number of domains. fill *pnseg with nseg, the number of segments. domains are numbered in [0, ndom), segments in [ndom,ndom+nseg). return -- an IV object that contains the map from vertices to segments created -- 99feb29, cca -------------------------------------------------------------------- */ IV * GPart_domSegMap ( GPart *gpart, int *pndom, int *pnseg ) { FILE *msgFile ; Graph *g ; int adjdom, count, d, first, ierr, ii, jj1, jj2, last, ndom, msglvl, nextphi, nPhi, nPsi, nV, phi, phi0, phi1, phi2, phi3, psi, sigma, size, size0, size1, size2, v, vsize, w ; int *adj, *adj0, *adj1, *adj2, *compids, *dmark, *dsmap, *head, *link, *list, *offsets, *PhiToPsi, *PhiToV, *PsiToSigma, *vadj, *VtoPhi ; IV *dsmapIV ; IVL *PhiByPhi, *PhiByPowD, *PsiByPowD ; /* -------------------- set the initial time -------------------- */ icputimes = 0 ; MARKTIME(cputimes[icputimes]) ; /* --------------- check the input --------------- */ if ( gpart == NULL || (g = gpart->g) == NULL || pndom == NULL || pnseg == NULL ) { fprintf(stderr, "\n fatal error in GPart_domSegMap(%p,%p,%p)" "\n bad input\n", gpart, pndom, pnseg) ; exit(-1) ; } compids = IV_entries(&gpart->compidsIV) ; msglvl = gpart->msglvl ; msgFile = gpart->msgFile ; /* ------------------------ create the map IV object ------------------------ */ nV = g->nvtx ; dsmapIV = IV_new() ; IV_init(dsmapIV, nV, NULL) ; dsmap = IV_entries(dsmapIV) ; /* ---------------------------------- check compids[] and get the number of domains and interface vertices ---------------------------------- */ icputimes++ ; MARKTIME(cputimes[icputimes]) ; ndom = nPhi = 0 ; for ( v = 0 ; v < nV ; v++ ) { if ( (d = compids[v]) < 0 ) { fprintf(stderr, "\n fatal error in GPart_domSegMap(%p,%p,%p)" "\n compids[%d] = %d\n", gpart, pndom, pnseg, v, compids[v]) ; exit(-1) ; } else if ( d == 0 ) { nPhi++ ; } else if ( ndom < d ) { ndom = d ; } } *pndom = ndom ; if ( msglvl > 1 ) { fprintf(msgFile, "\n\n Inside GPart_domSegMap") ; fprintf(msgFile, "\n %d domains, %d Phi vertices", ndom, nPhi) ; } if ( ndom == 1 ) { IVfill(nV, dsmap, 0) ; *pndom = 1 ; *pnseg = 0 ; return(dsmapIV) ; } /* -------------------------------- get the maps PhiToV : [0,nPhi) |---> [0,nV) VtoPhi : [0,nV) |---> [0,nPhi) -------------------------------- */ icputimes++ ; MARKTIME(cputimes[icputimes]) ; PhiToV = IVinit(nPhi, -1) ; VtoPhi = IVinit(nV, -1) ; for ( v = 0, phi = 0 ; v < nV ; v++ ) { if ( (d = compids[v]) == 0 ) { PhiToV[phi] = v ; VtoPhi[v] = phi++ ; } } if ( phi != nPhi ) { fprintf(stderr, "\n fatal error in GPart_domSegMap(%p,%p,%p)" "\n phi = %d != %d = nPhi\n", gpart, pndom, pnseg, phi, nPhi) ; exit(-1) ; } if ( msglvl > 2 ) { fprintf(msgFile, "\n PhiToV(%d) :", nPhi) ; IVfp80(msgFile, nPhi, PhiToV, 15, &ierr) ; fflush(msgFile) ; } if ( msglvl > 3 ) { fprintf(msgFile, "\n VtoPhi(%d) :", nV) ; IVfp80(msgFile, nV, VtoPhi, 15, &ierr) ; fflush(msgFile) ; } /* --------------------------------------------------- create an IVL object, PhiByPowD, to hold lists from the interface vertices to their adjacent domains --------------------------------------------------- */ icputimes++ ; MARKTIME(cputimes[icputimes]) ; dmark = IVinit(ndom+1, -1) ; if ( nPhi >= ndom ) { list = IVinit(nPhi, -1) ; } else { list = IVinit(ndom, -1) ; } PhiByPowD = IVL_new() ; IVL_init1(PhiByPowD, IVL_CHUNKED, nPhi) ; for ( phi = 0 ; phi < nPhi ; phi++ ) { v = PhiToV[phi] ; Graph_adjAndSize(g, v, &vsize, &vadj) ; /* if ( phi == 0 ) { int ierr ; fprintf(msgFile, "\n adj(%d,%d) = ", v, phi) ; IVfp80(msgFile, vsize, vadj, 15, &ierr) ; fflush(msgFile) ; } */ count = 0 ; for ( ii = 0 ; ii < vsize ; ii++ ) { if ( (w = vadj[ii]) < nV && (d = compids[w]) > 0 && dmark[d] != phi ) { dmark[d] = phi ; list[count++] = d ; } } if ( count > 0 ) { IVqsortUp(count, list) ; IVL_setList(PhiByPowD, phi, count, list) ; } } if ( msglvl > 2 ) { fprintf(msgFile, "\n PhiByPowD : interface x adjacent domains") ; IVL_writeForHumanEye(PhiByPowD, msgFile) ; fflush(msgFile) ; } /* ------------------------------------------------------- create an IVL object, PhiByPhi to hold lists from the interface vertices to interface vertices. (s,t) are in the list if (s,t) is an edge in the graph and s and t do not share an adjacent domain ------------------------------------------------------- */ icputimes++ ; MARKTIME(cputimes[icputimes]) ; PhiByPhi = IVL_new() ; IVL_init1(PhiByPhi, IVL_CHUNKED, nPhi) ; offsets = IVinit(nPhi, 0) ; head = IVinit(nPhi, -1) ; link = IVinit(nPhi, -1) ; for ( phi1 = 0 ; phi1 < nPhi ; phi1++ ) { count = 0 ; if ( msglvl > 2 ) { v = PhiToV[phi1] ; Graph_adjAndSize(g, v, &vsize, &vadj) ; fprintf(msgFile, "\n checking out phi = %d, v = %d", phi1, v) ; fprintf(msgFile, "\n adj(%d) : ", v) ; IVfp80(msgFile, vsize, vadj, 10, &ierr) ; } /* ------------------------------------------------------------- get (phi1, phi0) edges that were previously put into the list ------------------------------------------------------------- */ if ( msglvl > 3 ) { if ( head[phi1] == -1 ) { fprintf(msgFile, "\n no previous edges") ; } else { fprintf(msgFile, "\n previous edges :") ; } } for ( phi0 = head[phi1] ; phi0 != -1 ; phi0 = nextphi ) { if ( msglvl > 3 ) { fprintf(msgFile, " %d", phi0) ; } nextphi = link[phi0] ; list[count++] = phi0 ; IVL_listAndSize(PhiByPhi, phi0, &size0, &adj0) ; if ( (ii = ++offsets[phi0]) < size0 ) { /* ---------------------------- link phi0 into the next list ---------------------------- */ phi2 = adj0[ii] ; link[phi0] = head[phi2] ; head[phi2] = phi0 ; } } /* -------------------------- get new edges (phi1, phi2) -------------------------- */ IVL_listAndSize(PhiByPowD, phi1, &size1, &adj1) ; v = PhiToV[phi1] ; Graph_adjAndSize(g, v, &vsize, &vadj) ; for ( ii = 0 ; ii < vsize ; ii++ ) { if ( (w = vadj[ii]) < nV && compids[w] == 0 && (phi2 = VtoPhi[w]) > phi1 ) { if ( msglvl > 3 ) { fprintf(msgFile, "\n checking out phi2 = %d", phi2) ; } /* -------------------------------------------------- see if phi1 and phi2 have a common adjacent domain -------------------------------------------------- */ IVL_listAndSize(PhiByPowD, phi2, &size2, &adj2) ; adjdom = 0 ; jj1 = jj2 = 0 ; while ( jj1 < size1 && jj2 < size2 ) { if ( adj1[jj1] < adj2[jj2] ) { jj1++ ; } else if ( adj1[jj1] > adj2[jj2] ) { jj2++ ; } else { if ( msglvl > 3 ) { fprintf(msgFile, ", common adj domain %d", adj1[jj1]) ; } adjdom = 1 ; break ; } } if ( adjdom == 0 ) { if ( msglvl > 3 ) { fprintf(msgFile, ", no adjacent domain") ; } list[count++] = phi2 ; } } } if ( count > 0 ) { /* --------------------- set the list for phi1 --------------------- */ IVqsortUp(count, list) ; IVL_setList(PhiByPhi, phi1, count, list) ; if ( msglvl > 2 ) { fprintf(msgFile, "\n edge list for %d :", phi1) ; IVfp80(msgFile, count, list, 15, &ierr) ; } for ( ii = 0, phi2 = -1 ; ii < count ; ii++ ) { if ( list[ii] > phi1 ) { offsets[phi1] = ii ; phi2 = list[ii] ; break ; } } if ( phi2 != -1 ) { if ( msglvl > 2 ) { fprintf(msgFile, "\n linking %d into list for %d", phi1, phi2) ; } link[phi1] = head[phi2] ; head[phi2] = phi1 ; } /* phi2 = list[0] ; link[phi1] = head[phi2] ; head[phi2] = phi1 ; */ } } if ( msglvl > 2 ) { fprintf(msgFile, "\n PhiByPhi : interface x interface") ; IVL_writeForHumanEye(PhiByPhi, msgFile) ; fflush(msgFile) ; } /* -------------------- get the PhiToPsi map -------------------- */ icputimes++ ; MARKTIME(cputimes[icputimes]) ; PhiToPsi = IVinit(nPhi, -1) ; nPsi = 0 ; for ( phi = 0 ; phi < nPhi ; phi++ ) { if ( PhiToPsi[phi] == -1 ) { /* --------------------------- phi not yet mapped to a psi --------------------------- */ first = last = 0 ; list[0] = phi ; PhiToPsi[phi] = nPsi ; while ( first <= last ) { phi2 = list[first++] ; IVL_listAndSize(PhiByPhi, phi2, &size, &adj) ; for ( ii = 0 ; ii < size ; ii++ ) { phi3 = adj[ii] ; if ( PhiToPsi[phi3] == -1 ) { PhiToPsi[phi3] = nPsi ; list[++last] = phi3 ; } } } nPsi++ ; } } if ( msglvl > 1 ) { fprintf(msgFile, "\n nPsi = %d", nPsi) ; fflush(msgFile) ; } if ( msglvl > 2 ) { fprintf(msgFile, "\n PhiToPsi(%d) :", nPhi) ; IVfp80(msgFile, nPhi, PhiToPsi, 15, &ierr) ; fflush(msgFile) ; } /* --------------------------------- create an IVL object, Psi --> 2^D --------------------------------- */ icputimes++ ; MARKTIME(cputimes[icputimes]) ; IVfill(nPsi, head, -1) ; IVfill(nPhi, link, -1) ; for ( phi = 0 ; phi < nPhi ; phi++ ) { psi = PhiToPsi[phi] ; link[phi] = head[psi] ; head[psi] = phi ; } PsiByPowD = IVL_new() ; IVL_init1(PsiByPowD, IVL_CHUNKED, nPsi) ; IVfill(ndom+1, dmark, -1) ; for ( psi = 0 ; psi < nPsi ; psi++ ) { count = 0 ; for ( phi = head[psi] ; phi != -1 ; phi = link[phi] ) { v = PhiToV[phi] ; Graph_adjAndSize(g, v, &size, &adj) ; for ( ii = 0 ; ii < size ; ii++ ) { if ( (w = adj[ii]) < nV && (d = compids[w]) > 0 && dmark[d] != psi ) { dmark[d] = psi ; list[count++] = d ; } } } if ( count > 0 ) { IVqsortUp(count, list) ; IVL_setList(PsiByPowD, psi, count, list) ; } } if ( msglvl > 2 ) { fprintf(msgFile, "\n PsiByPowD(%d) :", nPhi) ; IVL_writeForHumanEye(PsiByPowD, msgFile) ; fflush(msgFile) ; } icputimes++ ; MARKTIME(cputimes[icputimes]) ; /* ------------------------------------- now get the map Psi |---> Sigma that is the equivalence map over PhiByPowD ------------------------------------- */ icputimes++ ; MARKTIME(cputimes[icputimes]) ; PsiToSigma = IVL_equivMap1(PsiByPowD) ; *pnseg = 1 + IVmax(nPsi, PsiToSigma, &ii) ; if ( msglvl > 2 ) { fprintf(msgFile, "\n nSigma = %d", *pnseg) ; fprintf(msgFile, "\n PsiToSigma(%d) :", nPhi) ; IVfp80(msgFile, nPsi, PsiToSigma, 15, &ierr) ; fflush(msgFile) ; } /* -------------------------------------------------------------- now fill the map from the vertices to the domains and segments -------------------------------------------------------------- */ icputimes++ ; MARKTIME(cputimes[icputimes]) ; for ( v = 0 ; v < nV ; v++ ) { if ( (d = compids[v]) > 0 ) { dsmap[v] = d - 1 ; } else { phi = VtoPhi[v] ; psi = PhiToPsi[phi] ; sigma = PsiToSigma[psi] ; dsmap[v] = ndom + sigma ; } } /* ------------------------ free the working storage ------------------------ */ icputimes++ ; MARKTIME(cputimes[icputimes]) ; IVL_free(PhiByPhi) ; IVL_free(PhiByPowD) ; IVL_free(PsiByPowD) ; IVfree(PhiToV) ; IVfree(VtoPhi) ; IVfree(dmark) ; IVfree(list) ; IVfree(PhiToPsi) ; IVfree(head) ; IVfree(link) ; IVfree(offsets) ; IVfree(PsiToSigma) ; icputimes++ ; MARKTIME(cputimes[icputimes]) ; if ( msglvl > 1 ) { fprintf(msgFile, "\n domain/segment map timings split") ; fprintf(msgFile, "\n %9.5f : create the DSmap object" "\n %9.5f : get numbers of domain and interface vertices" "\n %9.5f : generate PhiToV and VtoPhi" "\n %9.5f : generate PhiByPowD" "\n %9.5f : generate PhiByPhi" "\n %9.5f : generate PhiToPsi" "\n %9.5f : generate PsiByPowD" "\n %9.5f : generate PsiToSigma" "\n %9.5f : generate dsmap" "\n %9.5f : free working storage" "\n %9.5f : total time", cputimes[1] - cputimes[0], cputimes[2] - cputimes[1], cputimes[3] - cputimes[2], cputimes[4] - cputimes[3], cputimes[5] - cputimes[4], cputimes[6] - cputimes[5], cputimes[7] - cputimes[6], cputimes[8] - cputimes[7], cputimes[9] - cputimes[8], cputimes[10] - cputimes[9], cputimes[11] - cputimes[0]) ; } return(dsmapIV) ; }
/* -------------------------------------------------------------- mark the nodes 1 or 2 to define a min-cut, where source in X and sink in Y. start the search from the source node. for x in X and y in Y arc (x,y) is in the min-cut when flow(x,y) == capacity(x,y) arc (y,x) is in the min-cut when flow(y,x) == 0 on return, mark[*] is filled with 1 or 2, where the mark[source] = 1 and mark[sink] = 2 created -- 96jun08, cca -------------------------------------------------------------- */ void Network_findMincutFromSource ( Network *network, Ideq *deq, int mark[] ) { Arc *arc ; Arc **inheads, **outheads ; FILE *msgFile ; int msglvl, nnode, source, x, z ; /* --------------- check the input --------------- */ if ( network == NULL || (nnode = network->nnode) <= 0 || deq == NULL || mark == NULL ) { fprintf(stderr, "\n fatal error in Network_findMincutFromSource(%p,%p,%p)" "\n bad input\n", network, deq, mark) ; exit(-1) ; } source = 0 ; inheads = network->inheads ; outheads = network->outheads ; msglvl = network->msglvl ; msgFile = network->msgFile ; if ( msglvl > 2 ) { fprintf(msgFile, "\n Network_findMincutFromSource") ; fflush(msgFile) ; } /* ----------------------------------------------- load all the nodes into Y except for the source ----------------------------------------------- */ IVfill(nnode, mark, 2) ; mark[source] = 1 ; /* --------------------------------------------------------- do a breadth first traversal from the source visit x in X out edge (x,z), add z to X if flow(x,z) < capacity(x,z) in edge (z,x), add z to X if flow(z,x) > 0 --------------------------------------------------------- */ Ideq_clear(deq) ; Ideq_insertAtHead(deq, source) ; while ( (x = Ideq_removeFromHead(deq)) != -1 ) { if ( msglvl > 2 ) { fprintf(msgFile, "\n checking out node %d", x) ; fflush(msgFile) ; } for ( arc = outheads[x] ; arc != NULL ; arc = arc->nextOut ) { z = arc->second ; if ( mark[z] != 1 ) { if ( msglvl > 2 ) { fprintf(msgFile, "\n out-arc (%d,%d), flow %d, capacity %d", x, z, arc->flow, arc->capacity) ; fflush(msgFile) ; } if ( arc->flow < arc->capacity ) { if ( msglvl > 2 ) { fprintf(msgFile, ", adding %d to X", z) ; fflush(msgFile) ; } Ideq_insertAtTail(deq, z) ; mark[z] = 1 ; } } } for ( arc = inheads[x] ; arc != NULL ; arc = arc->nextIn ) { z = arc->first ; if ( mark[z] != 1 ) { if ( msglvl > 2 ) { fprintf(msgFile, "\n in-arc (%d,%d), flow %d", z, x, arc->flow) ; fflush(msgFile) ; } if ( arc->flow > 0 ) { if ( msglvl > 2 ) { fprintf(msgFile, ", adding %d to X", z) ; fflush(msgFile) ; } Ideq_insertAtTail(deq, z) ; mark[z] = 1 ; } } } } if ( msglvl > 2 ) { fprintf(msgFile, "\n leaving FindMincutFromSource") ; fflush(msgFile) ; } return ; }
/* --------------------------------------------------------------------- purpose -- to compute the factorization of A - sigma * B note: all variables in the calling sequence are references to allow call from fortran. input parameters data -- pointer to bridge data object psigma -- shift for the matrix pencil ppvttol -- pivot tolerance *ppvttol = 0.0 --> no pivoting used *ppvttol != 0.0 --> pivoting used, entries in factor are bounded above by 1/pvttol in magnitude output parameters *pinertia -- on return contains the number of negative eigenvalues *perror -- on return contains an error code 1 -- error found during factorization 0 -- normal return -1 -- psigma is NULL -2 -- ppvttol is NULL -3 -- data is NULL -4 -- pinertia is NULL created -- 98aug10, cca & jcp --------------------------------------------------------------------- */ void FactorMPI ( double *psigma, double *ppvttol, void *data, int *pinertia, int *perror ) { BridgeMPI *bridge = (BridgeMPI *) data ; Chv *rootchv ; ChvManager *chvmanager ; double droptol=0.0, tau ; double cpus[20] ; FILE *msgFile ; int recvtemp[3], sendtemp[3], stats[20] ; int msglvl, nnegative, nzero, npositive, pivotingflag, tag ; MPI_Comm comm ; int nproc ; #if MYDEBUG > 0 double t1, t2 ; count_Factor++ ; MARKTIME(t1) ; if ( bridge->myid == 0 ) { fprintf(stdout, "\n (%d) FactorMPI()", count_Factor) ; fflush(stdout) ; } #endif #if MYDEBUG > 1 fprintf(bridge->msgFile, "\n (%d) FactorMPI()", count_Factor) ; fflush(bridge->msgFile) ; #endif nproc = bridge->nproc ; /* --------------- check the input --------------- */ if ( psigma == NULL ) { fprintf(stderr, "\n error in FactorMPI()" "\n psigma is NULL\n") ; *perror = -1 ; return ; } if ( ppvttol == NULL ) { fprintf(stderr, "\n error in FactorMPI()" "\n ppvttol is NULL\n") ; *perror = -2 ; return ; } if ( data == NULL ) { fprintf(stderr, "\n error in FactorMPI()" "\n data is NULL\n") ; *perror = -3 ; return ; } if ( pinertia == NULL ) { fprintf(stderr, "\n error in FactorMPI()" "\n pinertia is NULL\n") ; *perror = -4 ; return ; } if ( perror == NULL ) { fprintf(stderr, "\n error in FactorMPI()" "\n perror is NULL\n") ; return ; } comm = bridge->comm ; msglvl = bridge->msglvl ; msgFile = bridge->msgFile ; /* ---------------------------------- set the shift in the pencil object ---------------------------------- */ bridge->pencil->sigma[0] = -(*psigma) ; bridge->pencil->sigma[1] = 0.0 ; /* ----------------------------------------- if the matrices are in local coordinates (i.e., this is the first factorization following a matrix-vector multiply) then map the matrix into global coordinates ----------------------------------------- */ if ( bridge->coordFlag == LOCAL ) { if ( bridge->prbtype == 1 ) { MatMul_setGlobalIndices(bridge->info, bridge->B) ; if ( msglvl > 2 ) { fprintf(msgFile, "\n\n matrix B in local coordinates") ; InpMtx_writeForHumanEye(bridge->B, msgFile) ; fflush(msgFile) ; } } if ( bridge->prbtype == 2 ) { MatMul_setGlobalIndices(bridge->info, bridge->A) ; if ( msglvl > 2 ) { fprintf(msgFile, "\n\n matrix A in local coordinates") ; InpMtx_writeForHumanEye(bridge->A, msgFile) ; fflush(msgFile) ; } } bridge->coordFlag = GLOBAL ; } /* ----------------------------------------------------- clear the front matrix and submatrix mananger objects ----------------------------------------------------- */ FrontMtx_clearData(bridge->frontmtx); SubMtxManager_clearData(bridge->mtxmanager); SolveMap_clearData(bridge->solvemap) ; if ( bridge->rowmapIV != NULL ) { IV_free(bridge->rowmapIV) ; bridge->rowmapIV = NULL ; } /* ----------------------------------------------------------- set the pivot tolerance. NOTE: spooles's "tau" parameter is a bound on the magnitude of the factor entries, and is the recipricol of that of the pivot tolerance of the lanczos code ----------------------------------------------------------- */ if ( *ppvttol == 0.0 ) { tau = 10.0 ; pivotingflag = SPOOLES_NO_PIVOTING ; } else { tau = (1.0)/(*ppvttol) ; pivotingflag = SPOOLES_PIVOTING ; } /* ---------------------------------- initialize the front matrix object ---------------------------------- */ FrontMtx_init(bridge->frontmtx, bridge->frontETree, bridge->symbfacIVL, SPOOLES_REAL, SPOOLES_SYMMETRIC, FRONTMTX_DENSE_FRONTS, pivotingflag, NO_LOCK, bridge->myid, bridge->ownersIV, bridge->mtxmanager, bridge->msglvl, bridge->msgFile) ; /* ------------------------- compute the factorization ------------------------- */ tag = 0 ; chvmanager = ChvManager_new() ; ChvManager_init(chvmanager, NO_LOCK, 0); IVfill(20, stats, 0) ; DVfill(20, cpus, 0.0) ; rootchv = FrontMtx_MPI_factorPencil(bridge->frontmtx, bridge->pencil, tau, droptol, chvmanager, bridge->ownersIV, 0, perror, cpus, stats, bridge->msglvl, bridge->msgFile, tag, comm) ; ChvManager_free(chvmanager); tag += 3*FrontMtx_nfront(bridge->frontmtx) + 2 ; if ( msglvl > 3 ) { fprintf(msgFile, "\n\n numeric factorization") ; FrontMtx_writeForHumanEye(bridge->frontmtx, bridge->msgFile) ; fflush(bridge->msgFile) ; } /* ---------------------------- if matrix is singular then set error flag and return ---------------------------- */ if ( rootchv != NULL ) { fprintf(msgFile, "\n WHOA NELLY!, matrix is singular") ; fflush(msgFile) ; *perror = 1 ; return ; } /* ------------------------------------------------------------------ post-process the factor matrix, convert from fronts to submatrices ------------------------------------------------------------------ */ FrontMtx_MPI_postProcess(bridge->frontmtx, bridge->ownersIV, stats, bridge->msglvl, bridge->msgFile, tag, comm); tag += 5*bridge->nproc ; /* ------------------- compute the inertia ------------------- */ FrontMtx_inertia(bridge->frontmtx, &nnegative, &nzero, &npositive) ; sendtemp[0] = nnegative ; sendtemp[1] = nzero ; sendtemp[2] = npositive ; if ( bridge->msglvl > 2 && bridge->msgFile != NULL ) { fprintf(bridge->msgFile, "\n local inertia = < %d, %d, %d >", nnegative, nzero, npositive) ; fflush(bridge->msgFile) ; } MPI_Allreduce((void *) sendtemp, (void *) recvtemp, 3, MPI_INT, MPI_SUM, comm) ; nnegative = recvtemp[0] ; nzero = recvtemp[1] ; npositive = recvtemp[2] ; if ( bridge->msglvl > 2 && bridge->msgFile != NULL ) { fprintf(bridge->msgFile, "\n global inertia = < %d, %d, %d >", nnegative, nzero, npositive) ; fflush(bridge->msgFile) ; } *pinertia = nnegative; /* --------------------------- create the solve map object --------------------------- */ SolveMap_ddMap(bridge->solvemap, SPOOLES_REAL, FrontMtx_upperBlockIVL(bridge->frontmtx), FrontMtx_lowerBlockIVL(bridge->frontmtx), nproc, bridge->ownersIV, FrontMtx_frontTree(bridge->frontmtx), bridge->seed, bridge->msglvl, bridge->msgFile) ; /* ------------------------------- redistribute the front matrices ------------------------------- */ FrontMtx_MPI_split(bridge->frontmtx, bridge->solvemap, stats, bridge->msglvl, bridge->msgFile, tag, comm) ; if ( *ppvttol != 0.0 ) { /* ------------------------------------------------------------- pivoting for stability may have taken place. create rowmapIV, the map from rows in the factorization to processes. ------------------------------------------------------------- */ bridge->rowmapIV = FrontMtx_MPI_rowmapIV(bridge->frontmtx, bridge->ownersIV, bridge->msglvl, bridge->msgFile, bridge->comm) ; if ( bridge->msglvl > 2 && bridge->msgFile != NULL ) { fprintf(bridge->msgFile, "\n\n bridge->rowmapIV") ; IV_writeForHumanEye(bridge->rowmapIV, bridge->msgFile) ; fflush(bridge->msgFile) ; } } else { bridge->rowmapIV = NULL ; } /* ------------------------------------------------------------------ set the error. (this is simple since when the spooles codes detect a fatal error, they print out a message to stderr and exit.) ------------------------------------------------------------------ */ *perror = 0 ; #if MYDEBUG > 0 MARKTIME(t2) ; time_Factor += t2 - t1 ; if ( bridge->myid == 0 ) { fprintf(stdout, ", %8.3f seconds, %8.3f total time", t2 - t1, time_Factor) ; fflush(stdout) ; } #endif #if MYDEBUG > 1 fprintf(bridge->msgFile, ", %8.3f seconds, %8.3f total time", t2 - t1, time_Factor) ; fflush(bridge->msgFile) ; #endif return; }