/* --------------------------------------------------------- purpose -- allocate a double array with size entries and fill with value dval return value -- a pointer to the start of the array created : 95sep22, cca --------------------------------------------------------- */ double * DVinit ( int size, double dval ) { double *y = NULL ; if ( size > 0 ) { y = DVinit2(size) ; DVfill(size, y, dval) ; } return(y) ; }
/* -------------------------- fill a vector with a value created -- 96jun22, cca -------------------------- */ void DV_fill ( DV *dv, double value ) { /* --------------- check the input --------------- */ if ( dv == NULL ) { fprintf(stderr, "\n fatal error in DV_fill(%p,%f)" "\n bad input\n", dv, value) ; exit(-1) ; } if ( dv->size > 0 ) { DVfill(dv->size, dv->vec, value) ; } return ; }
/*--------------------------------------------------------------------*/ static void findLocalCoords ( int n, double x[], double xloc[], double rscale, double radius[], double xmin, double xmax ) { int J, Jmax, Jmin ; double b1, b2, locwidth, width, ximax, ximin, xlocmax, xlocmin, xoff, xscale ; width = xmax - xmin ; ximin = DVmin(n, x, &J) ; ximax = DVmax(n, x, &J) ; #if MYDEBUG > 0 fprintf(stdout, "\n ximin %f, ximax %f", ximin, ximax) ; #endif if ( ximax == ximin ) { DVfill(n, xloc, 0.5*(xmax+xmin)) ; return ; } xscale = width/(ximax - ximin) ; #if MYDEBUG > 0 fprintf(stdout, "\n initial xscale %f", xscale) ; #endif for ( J = 0 ; J < n ; J++ ) { xloc[J] = xmin + xscale*(x[J] - ximin) ; } while ( 1 ) { if ( radius == NULL ) { xlocmin = xloc[0] - rscale ; xlocmax = xloc[0] + rscale ; Jmin = Jmax = 0 ; for ( J = 0 ; J < n ; J++ ) { if ( xlocmin > xloc[J] - rscale ) { xlocmin = xloc[J] - rscale ; Jmin = J ; } if ( xlocmax < xloc[J] + rscale ) { xlocmax = xloc[J] + rscale ; Jmax = J ; } } } else { xlocmin = xloc[0] - rscale*radius[0] ; xlocmax = xloc[0] + rscale*radius[0] ; Jmin = Jmax = 0 ; for ( J = 0 ; J < n ; J++ ) { if ( xlocmin > xloc[J] - rscale*radius[J] ) { xlocmin = xloc[J] - rscale*radius[J] ; Jmin = J ; } if ( xlocmax < xloc[J] + rscale*radius[J] ) { xlocmax = xloc[J] + rscale*radius[J] ; Jmax = J ; } } } #if MYDEBUG > 0 fprintf(stdout, "\n\n Jmin = %d, Jmax = %d", Jmin, Jmax) ; fprintf(stdout, "\n xlocmin %f, xlocmax %f", xlocmin, xlocmax) ; #endif if ( Jmin == Jmax ) { DVfill(n, xloc, (xmin + xmax)/2) ; #if MYDEBUG > 0 fprintf(stdout, "\n leaving") ; #endif break ; } else { locwidth = xlocmax - xlocmin ; #if MYDEBUG > 0 fprintf(stdout, "\n width %f, locwidth %f", width, locwidth) ; #endif if ( locwidth > 1.01*width || locwidth < 0.99*width ) { if ( radius == NULL ) { b1 = xmin + rscale ; b2 = xmax - rscale ; } else { b1 = xmin + rscale*radius[Jmin] ; b2 = xmax - rscale*radius[Jmax] ; } if ( b1 > b2 ) { fprintf(stderr, "\n\n error in Tree_drawEPS()" "\n nonlinear process is unable to converge" "\n reduce radius scaling factor\n") ; exit(-1) ; } #if MYDEBUG > 0 fprintf(stdout, "\n 1. x[%d] = %f, x[%d] = %f", Jmin, x[Jmin], Jmax, x[Jmax]) ; fprintf(stdout, "\n 1. b1 = %f, b2 = %f", b1, b2) ; #endif xscale = (b2 - b1)/(x[Jmax] - x[Jmin]) ; xoff = -(b2*x[Jmin] - b1*x[Jmax])/(x[Jmax] - x[Jmin]) ; #if MYDEBUG > 0 fprintf(stdout, "\n 1. xscale = %f, xoff = %f", xscale, xoff) ; #endif for ( J = 0 ; J < n ; J++ ) { xloc[J] = xoff + xscale*x[J] ; } } else { break ; } } } return ; }
PetscErrorCode MatFactorNumeric_SeqSpooles(Mat F,Mat A,const MatFactorInfo *info) { Mat_Spooles *lu = (Mat_Spooles*)(F)->spptr; ChvManager *chvmanager ; Chv *rootchv ; IVL *adjIVL; PetscErrorCode ierr; PetscInt nz,nrow=A->rmap->n,irow,nedges,neqns=A->cmap->n,*ai,*aj,i,*diag=0,fierr; PetscScalar *av; double cputotal,facops; #if defined(PETSC_USE_COMPLEX) PetscInt nz_row,*aj_tmp; PetscScalar *av_tmp; #else PetscInt *ivec1,*ivec2,j; double *dvec; #endif PetscBool isSeqAIJ,isMPIAIJ; PetscFunctionBegin; if (lu->flg == DIFFERENT_NONZERO_PATTERN) { /* first numeric factorization */ (F)->ops->solve = MatSolve_SeqSpooles; (F)->assembled = PETSC_TRUE; /* set Spooles options */ ierr = SetSpoolesOptions(A, &lu->options);CHKERRQ(ierr); lu->mtxA = InpMtx_new(); } /* copy A to Spooles' InpMtx object */ ierr = PetscObjectTypeCompare((PetscObject)A,MATSEQAIJ,&isSeqAIJ);CHKERRQ(ierr); ierr = PetscObjectTypeCompare((PetscObject)A,MATSEQAIJ,&isMPIAIJ);CHKERRQ(ierr); if (isSeqAIJ){ Mat_SeqAIJ *mat = (Mat_SeqAIJ*)A->data; ai=mat->i; aj=mat->j; av=mat->a; if (lu->options.symflag == SPOOLES_NONSYMMETRIC) { nz=mat->nz; } else { /* SPOOLES_SYMMETRIC || SPOOLES_HERMITIAN */ nz=(mat->nz + A->rmap->n)/2; diag=mat->diag; } } else { /* A is SBAIJ */ Mat_SeqSBAIJ *mat = (Mat_SeqSBAIJ*)A->data; ai=mat->i; aj=mat->j; av=mat->a; nz=mat->nz; } InpMtx_init(lu->mtxA, INPMTX_BY_ROWS, lu->options.typeflag, nz, 0); #if defined(PETSC_USE_COMPLEX) for (irow=0; irow<nrow; irow++) { if ( lu->options.symflag == SPOOLES_NONSYMMETRIC || !(isSeqAIJ || isMPIAIJ)){ nz_row = ai[irow+1] - ai[irow]; aj_tmp = aj + ai[irow]; av_tmp = av + ai[irow]; } else { nz_row = ai[irow+1] - diag[irow]; aj_tmp = aj + diag[irow]; av_tmp = av + diag[irow]; } for (i=0; i<nz_row; i++){ InpMtx_inputComplexEntry(lu->mtxA, irow, *aj_tmp++,PetscRealPart(*av_tmp),PetscImaginaryPart(*av_tmp)); av_tmp++; } } #else ivec1 = InpMtx_ivec1(lu->mtxA); ivec2 = InpMtx_ivec2(lu->mtxA); dvec = InpMtx_dvec(lu->mtxA); if ( lu->options.symflag == SPOOLES_NONSYMMETRIC || !isSeqAIJ){ for (irow = 0; irow < nrow; irow++){ for (i = ai[irow]; i<ai[irow+1]; i++) ivec1[i] = irow; } IVcopy(nz, ivec2, aj); DVcopy(nz, dvec, av); } else { nz = 0; for (irow = 0; irow < nrow; irow++){ for (j = diag[irow]; j<ai[irow+1]; j++) { ivec1[nz] = irow; ivec2[nz] = aj[j]; dvec[nz] = av[j]; nz++; } } } InpMtx_inputRealTriples(lu->mtxA, nz, ivec1, ivec2, dvec); #endif InpMtx_changeStorageMode(lu->mtxA, INPMTX_BY_VECTORS); if ( lu->options.msglvl > 0 ) { int err; printf("\n\n input matrix"); ierr = PetscFPrintf(PETSC_COMM_SELF,lu->options.msgFile, "\n\n input matrix");CHKERRQ(ierr); InpMtx_writeForHumanEye(lu->mtxA, lu->options.msgFile); err = fflush(lu->options.msgFile); if (err) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SYS,"fflush() failed on file"); } if ( lu->flg == DIFFERENT_NONZERO_PATTERN){ /* first numeric factorization */ /*--------------------------------------------------- find a low-fill ordering (1) create the Graph object (2) order the graph -------------------------------------------------------*/ if (lu->options.useQR){ adjIVL = InpMtx_adjForATA(lu->mtxA); } else { adjIVL = InpMtx_fullAdjacency(lu->mtxA); } nedges = IVL_tsize(adjIVL); lu->graph = Graph_new(); Graph_init2(lu->graph, 0, neqns, 0, nedges, neqns, nedges, adjIVL, NULL, NULL); if ( lu->options.msglvl > 2 ) { int err; if (lu->options.useQR){ ierr = PetscFPrintf(PETSC_COMM_SELF,lu->options.msgFile, "\n\n graph of A^T A");CHKERRQ(ierr); } else { ierr = PetscFPrintf(PETSC_COMM_SELF,lu->options.msgFile, "\n\n graph of the input matrix");CHKERRQ(ierr); } Graph_writeForHumanEye(lu->graph, lu->options.msgFile); err = fflush(lu->options.msgFile); if (err) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SYS,"fflush() failed on file"); } switch (lu->options.ordering) { case 0: lu->frontETree = orderViaBestOfNDandMS(lu->graph, lu->options.maxdomainsize, lu->options.maxzeros, lu->options.maxsize, lu->options.seed, lu->options.msglvl, lu->options.msgFile); break; case 1: lu->frontETree = orderViaMMD(lu->graph,lu->options.seed,lu->options.msglvl,lu->options.msgFile); break; case 2: lu->frontETree = orderViaMS(lu->graph, lu->options.maxdomainsize, lu->options.seed,lu->options.msglvl,lu->options.msgFile); break; case 3: lu->frontETree = orderViaND(lu->graph, lu->options.maxdomainsize, lu->options.seed,lu->options.msglvl,lu->options.msgFile); break; default: SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONG,"Unknown Spooles's ordering"); } if ( lu->options.msglvl > 0 ) { int err; ierr = PetscFPrintf(PETSC_COMM_SELF,lu->options.msgFile, "\n\n front tree from ordering");CHKERRQ(ierr); ETree_writeForHumanEye(lu->frontETree, lu->options.msgFile); err = fflush(lu->options.msgFile); if (err) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SYS,"fflush() failed on file"); } /* get the permutation, permute the front tree */ lu->oldToNewIV = ETree_oldToNewVtxPerm(lu->frontETree); lu->oldToNew = IV_entries(lu->oldToNewIV); lu->newToOldIV = ETree_newToOldVtxPerm(lu->frontETree); if (!lu->options.useQR) ETree_permuteVertices(lu->frontETree, lu->oldToNewIV); /* permute the matrix */ if (lu->options.useQR){ InpMtx_permute(lu->mtxA, NULL, lu->oldToNew); } else { InpMtx_permute(lu->mtxA, lu->oldToNew, lu->oldToNew); if ( lu->options.symflag == SPOOLES_SYMMETRIC) { InpMtx_mapToUpperTriangle(lu->mtxA); } #if defined(PETSC_USE_COMPLEX) if ( lu->options.symflag == SPOOLES_HERMITIAN ) { InpMtx_mapToUpperTriangleH(lu->mtxA); } #endif InpMtx_changeCoordType(lu->mtxA, INPMTX_BY_CHEVRONS); } InpMtx_changeStorageMode(lu->mtxA, INPMTX_BY_VECTORS); /* get symbolic factorization */ if (lu->options.useQR){ lu->symbfacIVL = SymbFac_initFromGraph(lu->frontETree, lu->graph); IVL_overwrite(lu->symbfacIVL, lu->oldToNewIV); IVL_sortUp(lu->symbfacIVL); ETree_permuteVertices(lu->frontETree, lu->oldToNewIV); } else { lu->symbfacIVL = SymbFac_initFromInpMtx(lu->frontETree, lu->mtxA); } if ( lu->options.msglvl > 2 ) { int err; ierr = PetscFPrintf(PETSC_COMM_SELF,lu->options.msgFile, "\n\n old-to-new permutation vector");CHKERRQ(ierr); IV_writeForHumanEye(lu->oldToNewIV, lu->options.msgFile); ierr = PetscFPrintf(PETSC_COMM_SELF,lu->options.msgFile, "\n\n new-to-old permutation vector");CHKERRQ(ierr); IV_writeForHumanEye(lu->newToOldIV, lu->options.msgFile); ierr = PetscFPrintf(PETSC_COMM_SELF,lu->options.msgFile, "\n\n front tree after permutation");CHKERRQ(ierr); ETree_writeForHumanEye(lu->frontETree, lu->options.msgFile); ierr = PetscFPrintf(PETSC_COMM_SELF,lu->options.msgFile, "\n\n input matrix after permutation");CHKERRQ(ierr); InpMtx_writeForHumanEye(lu->mtxA, lu->options.msgFile); ierr = PetscFPrintf(PETSC_COMM_SELF,lu->options.msgFile, "\n\n symbolic factorization");CHKERRQ(ierr); IVL_writeForHumanEye(lu->symbfacIVL, lu->options.msgFile); err = fflush(lu->options.msgFile); if (err) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SYS,"fflush() failed on file"); } lu->frontmtx = FrontMtx_new(); lu->mtxmanager = SubMtxManager_new(); SubMtxManager_init(lu->mtxmanager, NO_LOCK, 0); } else { /* new num factorization using previously computed symbolic factor */ if (lu->options.pivotingflag) { /* different FrontMtx is required */ FrontMtx_free(lu->frontmtx); lu->frontmtx = FrontMtx_new(); } else { FrontMtx_clearData (lu->frontmtx); } SubMtxManager_free(lu->mtxmanager); lu->mtxmanager = SubMtxManager_new(); SubMtxManager_init(lu->mtxmanager, NO_LOCK, 0); /* permute mtxA */ if (lu->options.useQR){ InpMtx_permute(lu->mtxA, NULL, lu->oldToNew); } else { InpMtx_permute(lu->mtxA, lu->oldToNew, lu->oldToNew); if ( lu->options.symflag == SPOOLES_SYMMETRIC ) { InpMtx_mapToUpperTriangle(lu->mtxA); } InpMtx_changeCoordType(lu->mtxA, INPMTX_BY_CHEVRONS); } InpMtx_changeStorageMode(lu->mtxA, INPMTX_BY_VECTORS); if ( lu->options.msglvl > 2 ) { ierr = PetscFPrintf(PETSC_COMM_SELF,lu->options.msgFile, "\n\n input matrix after permutation");CHKERRQ(ierr); InpMtx_writeForHumanEye(lu->mtxA, lu->options.msgFile); } } /* end of if( lu->flg == DIFFERENT_NONZERO_PATTERN) */ if (lu->options.useQR){ FrontMtx_init(lu->frontmtx, lu->frontETree, lu->symbfacIVL, lu->options.typeflag, SPOOLES_SYMMETRIC, FRONTMTX_DENSE_FRONTS, SPOOLES_NO_PIVOTING, NO_LOCK, 0, NULL, lu->mtxmanager, lu->options.msglvl, lu->options.msgFile); } else { FrontMtx_init(lu->frontmtx, lu->frontETree, lu->symbfacIVL, lu->options.typeflag, lu->options.symflag, FRONTMTX_DENSE_FRONTS, lu->options.pivotingflag, NO_LOCK, 0, NULL, lu->mtxmanager, lu->options.msglvl, lu->options.msgFile); } if ( lu->options.symflag == SPOOLES_SYMMETRIC ) { /* || SPOOLES_HERMITIAN ? */ if ( lu->options.patchAndGoFlag == 1 ) { lu->frontmtx->patchinfo = PatchAndGoInfo_new(); PatchAndGoInfo_init(lu->frontmtx->patchinfo, 1, lu->options.toosmall, lu->options.fudge, lu->options.storeids, lu->options.storevalues); } else if ( lu->options.patchAndGoFlag == 2 ) { lu->frontmtx->patchinfo = PatchAndGoInfo_new(); PatchAndGoInfo_init(lu->frontmtx->patchinfo, 2, lu->options.toosmall, lu->options.fudge, lu->options.storeids, lu->options.storevalues); } } /* numerical factorization */ chvmanager = ChvManager_new(); ChvManager_init(chvmanager, NO_LOCK, 1); DVfill(10, lu->cpus, 0.0); if (lu->options.useQR){ facops = 0.0 ; FrontMtx_QR_factor(lu->frontmtx, lu->mtxA, chvmanager, lu->cpus, &facops, lu->options.msglvl, lu->options.msgFile); if ( lu->options.msglvl > 1 ) { ierr = PetscFPrintf(PETSC_COMM_SELF,lu->options.msgFile, "\n\n factor matrix");CHKERRQ(ierr); ierr = PetscFPrintf(PETSC_COMM_SELF,lu->options.msgFile, "\n facops = %9.2f", facops);CHKERRQ(ierr); } } else { IVfill(20, lu->stats, 0); rootchv = FrontMtx_factorInpMtx(lu->frontmtx, lu->mtxA, lu->options.tau, 0.0, chvmanager, &fierr, lu->cpus,lu->stats,lu->options.msglvl,lu->options.msgFile); if (rootchv) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_MAT_LU_ZRPVT,"\n matrix found to be singular"); if (fierr >= 0) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_LIB,"\n error encountered at front %D", fierr); if(lu->options.FrontMtxInfo){ ierr = PetscPrintf(PETSC_COMM_SELF,"\n %8d pivots, %8d pivot tests, %8d delayed rows and columns\n",lu->stats[0], lu->stats[1], lu->stats[2]);CHKERRQ(ierr); cputotal = lu->cpus[8] ; if ( cputotal > 0.0 ) { ierr = PetscPrintf(PETSC_COMM_SELF, "\n cpus cpus/totaltime" "\n initialize fronts %8.3f %6.2f" "\n load original entries %8.3f %6.2f" "\n update fronts %8.3f %6.2f" "\n assemble postponed data %8.3f %6.2f" "\n factor fronts %8.3f %6.2f" "\n extract postponed data %8.3f %6.2f" "\n store factor entries %8.3f %6.2f" "\n miscellaneous %8.3f %6.2f" "\n total time %8.3f \n", lu->cpus[0], 100.*lu->cpus[0]/cputotal, lu->cpus[1], 100.*lu->cpus[1]/cputotal, lu->cpus[2], 100.*lu->cpus[2]/cputotal, lu->cpus[3], 100.*lu->cpus[3]/cputotal, lu->cpus[4], 100.*lu->cpus[4]/cputotal, lu->cpus[5], 100.*lu->cpus[5]/cputotal, lu->cpus[6], 100.*lu->cpus[6]/cputotal, lu->cpus[7], 100.*lu->cpus[7]/cputotal, cputotal);CHKERRQ(ierr); } } } ChvManager_free(chvmanager); if ( lu->options.msglvl > 0 ) { int err; ierr = PetscFPrintf(PETSC_COMM_SELF,lu->options.msgFile, "\n\n factor matrix");CHKERRQ(ierr); FrontMtx_writeForHumanEye(lu->frontmtx, lu->options.msgFile); err = fflush(lu->options.msgFile); if (err) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SYS,"fflush() failed on file"); } if ( lu->options.symflag == SPOOLES_SYMMETRIC ) { /* || SPOOLES_HERMITIAN ? */ if ( lu->options.patchAndGoFlag == 1 ) { if ( lu->frontmtx->patchinfo->fudgeIV != NULL ) { if (lu->options.msglvl > 0 ){ ierr = PetscFPrintf(PETSC_COMM_SELF,lu->options.msgFile, "\n small pivots found at these locations");CHKERRQ(ierr); IV_writeForHumanEye(lu->frontmtx->patchinfo->fudgeIV, lu->options.msgFile); } } PatchAndGoInfo_free(lu->frontmtx->patchinfo); } else if ( lu->options.patchAndGoFlag == 2 ) { if (lu->options.msglvl > 0 ){ if ( lu->frontmtx->patchinfo->fudgeIV != NULL ) { ierr = PetscFPrintf(PETSC_COMM_SELF,lu->options.msgFile, "\n small pivots found at these locations");CHKERRQ(ierr); IV_writeForHumanEye(lu->frontmtx->patchinfo->fudgeIV, lu->options.msgFile); } if ( lu->frontmtx->patchinfo->fudgeDV != NULL ) { ierr = PetscFPrintf(PETSC_COMM_SELF,lu->options.msgFile, "\n perturbations");CHKERRQ(ierr); DV_writeForHumanEye(lu->frontmtx->patchinfo->fudgeDV, lu->options.msgFile); } } PatchAndGoInfo_free(lu->frontmtx->patchinfo); } } /* post-process the factorization */ FrontMtx_postProcess(lu->frontmtx, lu->options.msglvl, lu->options.msgFile); if ( lu->options.msglvl > 2 ) { int err; ierr = PetscFPrintf(PETSC_COMM_SELF,lu->options.msgFile, "\n\n factor matrix after post-processing");CHKERRQ(ierr); FrontMtx_writeForHumanEye(lu->frontmtx, lu->options.msgFile); err = fflush(lu->options.msgFile); if (err) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SYS,"fflush() failed on file"); } lu->flg = SAME_NONZERO_PATTERN; lu->CleanUpSpooles = PETSC_TRUE; PetscFunctionReturn(0); }
NM_Status SpoolesSolver :: solve(SparseMtrx *A, FloatArray *b, FloatArray *x) { int errorValue, mtxType, symmetryflag; int seed = 30145, pivotingflag = 0; int *oldToNew, *newToOld; double droptol = 0.0, tau = 1.e300; double cpus [ 10 ]; int stats [ 20 ]; ChvManager *chvmanager; Chv *rootchv; InpMtx *mtxA; DenseMtx *mtxY, *mtxX; // first check whether Lhs is defined if ( !A ) { _error("solveYourselfAt: unknown Lhs"); } // and whether Rhs if ( !b ) { _error("solveYourselfAt: unknown Rhs"); } // and whether previous Solution exist if ( !x ) { _error("solveYourselfAt: unknown solution array"); } if ( x->giveSize() != b->giveSize() ) { _error("solveYourselfAt: size mismatch"); } Timer timer; timer.startTimer(); if ( A->giveType() != SMT_SpoolesMtrx ) { _error("solveYourselfAt: SpoolesSparseMtrx Expected"); } mtxA = ( ( SpoolesSparseMtrx * ) A )->giveInpMtrx(); mtxType = ( ( SpoolesSparseMtrx * ) A )->giveValueType(); symmetryflag = ( ( SpoolesSparseMtrx * ) A )->giveSymmetryFlag(); int i; int neqns = A->giveNumberOfRows(); int nrhs = 1; /* convert right-hand side to DenseMtx */ mtxY = DenseMtx_new(); DenseMtx_init(mtxY, mtxType, 0, 0, neqns, nrhs, 1, neqns); DenseMtx_zero(mtxY); for ( i = 0; i < neqns; i++ ) { DenseMtx_setRealEntry( mtxY, i, 0, b->at(i + 1) ); } if ( ( Lhs != A ) || ( this->lhsVersion != A->giveVersion() ) ) { // // lhs has been changed -> new factorization // Lhs = A; this->lhsVersion = A->giveVersion(); if ( frontmtx ) { FrontMtx_free(frontmtx); } if ( newToOldIV ) { IV_free(newToOldIV); } if ( oldToNewIV ) { IV_free(oldToNewIV); } if ( frontETree ) { ETree_free(frontETree); } if ( symbfacIVL ) { IVL_free(symbfacIVL); } if ( mtxmanager ) { SubMtxManager_free(mtxmanager); } if ( graph ) { Graph_free(graph); } /* * ------------------------------------------------- * STEP 3 : find a low-fill ordering * (1) create the Graph object * (2) order the graph using multiple minimum degree * ------------------------------------------------- */ int nedges; graph = Graph_new(); adjIVL = InpMtx_fullAdjacency(mtxA); nedges = IVL_tsize(adjIVL); Graph_init2(graph, 0, neqns, 0, nedges, neqns, nedges, adjIVL, NULL, NULL); if ( msglvl > 2 ) { fprintf(msgFile, "\n\n graph of the input matrix"); Graph_writeForHumanEye(graph, msgFile); fflush(msgFile); } frontETree = orderViaMMD(graph, seed, msglvl, msgFile); if ( msglvl > 0 ) { fprintf(msgFile, "\n\n front tree from ordering"); ETree_writeForHumanEye(frontETree, msgFile); fflush(msgFile); } /* * ---------------------------------------------------- * STEP 4: get the permutation, permute the front tree, * permute the matrix and right hand side, and * get the symbolic factorization * ---------------------------------------------------- */ oldToNewIV = ETree_oldToNewVtxPerm(frontETree); oldToNew = IV_entries(oldToNewIV); newToOldIV = ETree_newToOldVtxPerm(frontETree); newToOld = IV_entries(newToOldIV); ETree_permuteVertices(frontETree, oldToNewIV); InpMtx_permute(mtxA, oldToNew, oldToNew); if ( symmetryflag == SPOOLES_SYMMETRIC || symmetryflag == SPOOLES_HERMITIAN ) { InpMtx_mapToUpperTriangle(mtxA); } InpMtx_changeCoordType(mtxA, INPMTX_BY_CHEVRONS); InpMtx_changeStorageMode(mtxA, INPMTX_BY_VECTORS); symbfacIVL = SymbFac_initFromInpMtx(frontETree, mtxA); if ( msglvl > 2 ) { fprintf(msgFile, "\n\n old-to-new permutation vector"); IV_writeForHumanEye(oldToNewIV, msgFile); fprintf(msgFile, "\n\n new-to-old permutation vector"); IV_writeForHumanEye(newToOldIV, msgFile); fprintf(msgFile, "\n\n front tree after permutation"); ETree_writeForHumanEye(frontETree, msgFile); fprintf(msgFile, "\n\n input matrix after permutation"); InpMtx_writeForHumanEye(mtxA, msgFile); fprintf(msgFile, "\n\n symbolic factorization"); IVL_writeForHumanEye(symbfacIVL, msgFile); fflush(msgFile); } Tree_writeToFile(frontETree->tree, (char*)"haggar.treef"); /*--------------------------------------------------------------------*/ /* * ------------------------------------------ * STEP 5: initialize the front matrix object * ------------------------------------------ */ frontmtx = FrontMtx_new(); mtxmanager = SubMtxManager_new(); SubMtxManager_init(mtxmanager, NO_LOCK, 0); FrontMtx_init(frontmtx, frontETree, symbfacIVL, mtxType, symmetryflag, FRONTMTX_DENSE_FRONTS, pivotingflag, NO_LOCK, 0, NULL, mtxmanager, msglvl, msgFile); /*--------------------------------------------------------------------*/ /* * ----------------------------------------- * STEP 6: compute the numeric factorization * ----------------------------------------- */ chvmanager = ChvManager_new(); ChvManager_init(chvmanager, NO_LOCK, 1); DVfill(10, cpus, 0.0); IVfill(20, stats, 0); rootchv = FrontMtx_factorInpMtx(frontmtx, mtxA, tau, droptol, chvmanager, & errorValue, cpus, stats, msglvl, msgFile); ChvManager_free(chvmanager); if ( msglvl > 0 ) { fprintf(msgFile, "\n\n factor matrix"); FrontMtx_writeForHumanEye(frontmtx, msgFile); fflush(msgFile); } if ( rootchv != NULL ) { fprintf(msgFile, "\n\n matrix found to be singular\n"); exit(-1); } if ( errorValue >= 0 ) { fprintf(msgFile, "\n\n error encountered at front %d", errorValue); exit(-1); } /*--------------------------------------------------------------------*/ /* * -------------------------------------- * STEP 7: post-process the factorization * -------------------------------------- */ FrontMtx_postProcess(frontmtx, msglvl, msgFile); if ( msglvl > 2 ) { fprintf(msgFile, "\n\n factor matrix after post-processing"); FrontMtx_writeForHumanEye(frontmtx, msgFile); fflush(msgFile); } /*--------------------------------------------------------------------*/ } /* * ---------------------------------------------------- * STEP 4: permute the right hand side * ---------------------------------------------------- */ DenseMtx_permuteRows(mtxY, oldToNewIV); if ( msglvl > 2 ) { fprintf(msgFile, "\n\n right hand side matrix after permutation"); DenseMtx_writeForHumanEye(mtxY, msgFile); } /* * ------------------------------- * STEP 8: solve the linear system * ------------------------------- */ mtxX = DenseMtx_new(); DenseMtx_init(mtxX, mtxType, 0, 0, neqns, nrhs, 1, neqns); DenseMtx_zero(mtxX); FrontMtx_solve(frontmtx, mtxX, mtxY, mtxmanager, cpus, msglvl, msgFile); if ( msglvl > 2 ) { fprintf(msgFile, "\n\n solution matrix in new ordering"); DenseMtx_writeForHumanEye(mtxX, msgFile); fflush(msgFile); } /*--------------------------------------------------------------------*/ /* * ------------------------------------------------------- * STEP 9: permute the solution into the original ordering * ------------------------------------------------------- */ DenseMtx_permuteRows(mtxX, newToOldIV); if ( msglvl > 0 ) { fprintf(msgFile, "\n\n solution matrix in original ordering"); DenseMtx_writeForHumanEye(mtxX, msgFile); fflush(msgFile); } // DenseMtx_writeForMatlab(mtxX, "x", msgFile) ; /*--------------------------------------------------------------------*/ /* fetch data to oofem vectors */ double *xptr = x->givePointer(); for ( i = 0; i < neqns; i++ ) { DenseMtx_realEntry(mtxX, i, 0, xptr + i); // printf ("x(%d) = %e\n", i+1, *(xptr+i)); } // DenseMtx_copyRowIntoVector(mtxX, 0, x->givePointer()); timer.stopTimer(); OOFEM_LOG_DEBUG( "SpoolesSolver info: user time consumed by solution: %.2fs\n", timer.getUtime() ); /* * ----------- * free memory * ----------- */ DenseMtx_free(mtxX); DenseMtx_free(mtxY); /*--------------------------------------------------------------------*/ return ( 1 ); }
/* --------------------------------------------------------------------- purpose -- to compute the factorization of A - sigma * B note: all variables in the calling sequence are references to allow call from fortran. input parameters data -- pointer to bridge data object psigma -- shift for the matrix pencil ppvttol -- pivot tolerance *ppvttol = 0.0 --> no pivoting used *ppvttol != 0.0 --> pivoting used, entries in factor are bounded above by 1/pvttol in magnitude output parameters *pinertia -- on return contains the number of negative eigenvalues *perror -- on return contains an error code 1 -- error found during factorization 0 -- normal return -1 -- psigma is NULL -2 -- ppvttol is NULL -3 -- data is NULL -4 -- pinertia is NULL created -- 98aug10, cca & jcp --------------------------------------------------------------------- */ void FactorMPI ( double *psigma, double *ppvttol, void *data, int *pinertia, int *perror ) { BridgeMPI *bridge = (BridgeMPI *) data ; Chv *rootchv ; ChvManager *chvmanager ; double droptol=0.0, tau ; double cpus[20] ; FILE *msgFile ; int recvtemp[3], sendtemp[3], stats[20] ; int msglvl, nnegative, nzero, npositive, pivotingflag, tag ; MPI_Comm comm ; int nproc ; #if MYDEBUG > 0 double t1, t2 ; count_Factor++ ; MARKTIME(t1) ; if ( bridge->myid == 0 ) { fprintf(stdout, "\n (%d) FactorMPI()", count_Factor) ; fflush(stdout) ; } #endif #if MYDEBUG > 1 fprintf(bridge->msgFile, "\n (%d) FactorMPI()", count_Factor) ; fflush(bridge->msgFile) ; #endif nproc = bridge->nproc ; /* --------------- check the input --------------- */ if ( psigma == NULL ) { fprintf(stderr, "\n error in FactorMPI()" "\n psigma is NULL\n") ; *perror = -1 ; return ; } if ( ppvttol == NULL ) { fprintf(stderr, "\n error in FactorMPI()" "\n ppvttol is NULL\n") ; *perror = -2 ; return ; } if ( data == NULL ) { fprintf(stderr, "\n error in FactorMPI()" "\n data is NULL\n") ; *perror = -3 ; return ; } if ( pinertia == NULL ) { fprintf(stderr, "\n error in FactorMPI()" "\n pinertia is NULL\n") ; *perror = -4 ; return ; } if ( perror == NULL ) { fprintf(stderr, "\n error in FactorMPI()" "\n perror is NULL\n") ; return ; } comm = bridge->comm ; msglvl = bridge->msglvl ; msgFile = bridge->msgFile ; /* ---------------------------------- set the shift in the pencil object ---------------------------------- */ bridge->pencil->sigma[0] = -(*psigma) ; bridge->pencil->sigma[1] = 0.0 ; /* ----------------------------------------- if the matrices are in local coordinates (i.e., this is the first factorization following a matrix-vector multiply) then map the matrix into global coordinates ----------------------------------------- */ if ( bridge->coordFlag == LOCAL ) { if ( bridge->prbtype == 1 ) { MatMul_setGlobalIndices(bridge->info, bridge->B) ; if ( msglvl > 2 ) { fprintf(msgFile, "\n\n matrix B in local coordinates") ; InpMtx_writeForHumanEye(bridge->B, msgFile) ; fflush(msgFile) ; } } if ( bridge->prbtype == 2 ) { MatMul_setGlobalIndices(bridge->info, bridge->A) ; if ( msglvl > 2 ) { fprintf(msgFile, "\n\n matrix A in local coordinates") ; InpMtx_writeForHumanEye(bridge->A, msgFile) ; fflush(msgFile) ; } } bridge->coordFlag = GLOBAL ; } /* ----------------------------------------------------- clear the front matrix and submatrix mananger objects ----------------------------------------------------- */ FrontMtx_clearData(bridge->frontmtx); SubMtxManager_clearData(bridge->mtxmanager); SolveMap_clearData(bridge->solvemap) ; if ( bridge->rowmapIV != NULL ) { IV_free(bridge->rowmapIV) ; bridge->rowmapIV = NULL ; } /* ----------------------------------------------------------- set the pivot tolerance. NOTE: spooles's "tau" parameter is a bound on the magnitude of the factor entries, and is the recipricol of that of the pivot tolerance of the lanczos code ----------------------------------------------------------- */ if ( *ppvttol == 0.0 ) { tau = 10.0 ; pivotingflag = SPOOLES_NO_PIVOTING ; } else { tau = (1.0)/(*ppvttol) ; pivotingflag = SPOOLES_PIVOTING ; } /* ---------------------------------- initialize the front matrix object ---------------------------------- */ FrontMtx_init(bridge->frontmtx, bridge->frontETree, bridge->symbfacIVL, SPOOLES_REAL, SPOOLES_SYMMETRIC, FRONTMTX_DENSE_FRONTS, pivotingflag, NO_LOCK, bridge->myid, bridge->ownersIV, bridge->mtxmanager, bridge->msglvl, bridge->msgFile) ; /* ------------------------- compute the factorization ------------------------- */ tag = 0 ; chvmanager = ChvManager_new() ; ChvManager_init(chvmanager, NO_LOCK, 0); IVfill(20, stats, 0) ; DVfill(20, cpus, 0.0) ; rootchv = FrontMtx_MPI_factorPencil(bridge->frontmtx, bridge->pencil, tau, droptol, chvmanager, bridge->ownersIV, 0, perror, cpus, stats, bridge->msglvl, bridge->msgFile, tag, comm) ; ChvManager_free(chvmanager); tag += 3*FrontMtx_nfront(bridge->frontmtx) + 2 ; if ( msglvl > 3 ) { fprintf(msgFile, "\n\n numeric factorization") ; FrontMtx_writeForHumanEye(bridge->frontmtx, bridge->msgFile) ; fflush(bridge->msgFile) ; } /* ---------------------------- if matrix is singular then set error flag and return ---------------------------- */ if ( rootchv != NULL ) { fprintf(msgFile, "\n WHOA NELLY!, matrix is singular") ; fflush(msgFile) ; *perror = 1 ; return ; } /* ------------------------------------------------------------------ post-process the factor matrix, convert from fronts to submatrices ------------------------------------------------------------------ */ FrontMtx_MPI_postProcess(bridge->frontmtx, bridge->ownersIV, stats, bridge->msglvl, bridge->msgFile, tag, comm); tag += 5*bridge->nproc ; /* ------------------- compute the inertia ------------------- */ FrontMtx_inertia(bridge->frontmtx, &nnegative, &nzero, &npositive) ; sendtemp[0] = nnegative ; sendtemp[1] = nzero ; sendtemp[2] = npositive ; if ( bridge->msglvl > 2 && bridge->msgFile != NULL ) { fprintf(bridge->msgFile, "\n local inertia = < %d, %d, %d >", nnegative, nzero, npositive) ; fflush(bridge->msgFile) ; } MPI_Allreduce((void *) sendtemp, (void *) recvtemp, 3, MPI_INT, MPI_SUM, comm) ; nnegative = recvtemp[0] ; nzero = recvtemp[1] ; npositive = recvtemp[2] ; if ( bridge->msglvl > 2 && bridge->msgFile != NULL ) { fprintf(bridge->msgFile, "\n global inertia = < %d, %d, %d >", nnegative, nzero, npositive) ; fflush(bridge->msgFile) ; } *pinertia = nnegative; /* --------------------------- create the solve map object --------------------------- */ SolveMap_ddMap(bridge->solvemap, SPOOLES_REAL, FrontMtx_upperBlockIVL(bridge->frontmtx), FrontMtx_lowerBlockIVL(bridge->frontmtx), nproc, bridge->ownersIV, FrontMtx_frontTree(bridge->frontmtx), bridge->seed, bridge->msglvl, bridge->msgFile) ; /* ------------------------------- redistribute the front matrices ------------------------------- */ FrontMtx_MPI_split(bridge->frontmtx, bridge->solvemap, stats, bridge->msglvl, bridge->msgFile, tag, comm) ; if ( *ppvttol != 0.0 ) { /* ------------------------------------------------------------- pivoting for stability may have taken place. create rowmapIV, the map from rows in the factorization to processes. ------------------------------------------------------------- */ bridge->rowmapIV = FrontMtx_MPI_rowmapIV(bridge->frontmtx, bridge->ownersIV, bridge->msglvl, bridge->msgFile, bridge->comm) ; if ( bridge->msglvl > 2 && bridge->msgFile != NULL ) { fprintf(bridge->msgFile, "\n\n bridge->rowmapIV") ; IV_writeForHumanEye(bridge->rowmapIV, bridge->msgFile) ; fflush(bridge->msgFile) ; } } else { bridge->rowmapIV = NULL ; } /* ------------------------------------------------------------------ set the error. (this is simple since when the spooles codes detect a fatal error, they print out a message to stderr and exit.) ------------------------------------------------------------------ */ *perror = 0 ; #if MYDEBUG > 0 MARKTIME(t2) ; time_Factor += t2 - t1 ; if ( bridge->myid == 0 ) { fprintf(stdout, ", %8.3f seconds, %8.3f total time", t2 - t1, time_Factor) ; fflush(stdout) ; } #endif #if MYDEBUG > 1 fprintf(bridge->msgFile, ", %8.3f seconds, %8.3f total time", t2 - t1, time_Factor) ; fflush(bridge->msgFile) ; #endif return; }
/*--------------------------------------------------------------------*/ int main ( int argc, char *argv[] ) { /* -------------------------------------------------- all-in-one program to solve A X = B using a multithreaded factorization and solve We use a patch-and-go strategy for the factorization without pivoting (1) read in matrix entries and form DInpMtx object (2) form Graph object (3) order matrix and form front tree (4) get the permutation, permute the matrix and front tree and get the symbolic factorization (5) compute the numeric factorization (6) read in right hand side entries (7) compute the solution created -- 98jun04, cca -------------------------------------------------- */ /*--------------------------------------------------------------------*/ char *matrixFileName, *rhsFileName ; DenseMtx *mtxB, *mtxX ; Chv *rootchv ; ChvManager *chvmanager ; double fudge, imag, real, tau = 100., toosmall, value ; double cpus[10] ; DV *cumopsDV ; ETree *frontETree ; FrontMtx *frontmtx ; FILE *inputFile, *msgFile ; Graph *graph ; InpMtx *mtxA ; int error, ient, irow, jcol, jrhs, jrow, lookahead, msglvl, ncol, nedges, nent, neqns, nfront, nrhs, nrow, nthread, patchAndGoFlag, seed, storeids, storevalues, symmetryflag, type ; int *newToOld, *oldToNew ; int stats[20] ; IV *newToOldIV, *oldToNewIV, *ownersIV ; IVL *adjIVL, *symbfacIVL ; SolveMap *solvemap ; SubMtxManager *mtxmanager ; /*--------------------------------------------------------------------*/ /* -------------------- get input parameters -------------------- */ if ( argc != 14 ) { fprintf(stdout, "\n" "\n usage: %s msglvl msgFile type symmetryflag patchAndGoFlag" "\n fudge toosmall storeids storevalues" "\n matrixFileName rhsFileName seed" "\n msglvl -- message level" "\n msgFile -- message file" "\n type -- type of entries" "\n 1 (SPOOLES_REAL) -- real entries" "\n 2 (SPOOLES_COMPLEX) -- complex entries" "\n symmetryflag -- type of matrix" "\n 0 (SPOOLES_SYMMETRIC) -- symmetric entries" "\n 1 (SPOOLES_HERMITIAN) -- Hermitian entries" "\n 2 (SPOOLES_NONSYMMETRIC) -- nonsymmetric entries" "\n patchAndGoFlag -- flag for the patch-and-go strategy" "\n 0 -- none, stop factorization" "\n 1 -- optimization strategy" "\n 2 -- structural analysis strategy" "\n fudge -- perturbation parameter" "\n toosmall -- upper bound on a small pivot" "\n storeids -- flag to store ids of small pivots" "\n storevalues -- flag to store perturbations" "\n matrixFileName -- matrix file name, format" "\n nrow ncol nent" "\n irow jcol entry" "\n ..." "\n note: indices are zero based" "\n rhsFileName -- right hand side file name, format" "\n nrow nrhs " "\n ..." "\n jrow entry(jrow,0) ... entry(jrow,nrhs-1)" "\n ..." "\n seed -- random number seed, used for ordering" "\n nthread -- number of threads" "\n", argv[0]) ; return(0) ; } msglvl = atoi(argv[1]) ; if ( strcmp(argv[2], "stdout") == 0 ) { msgFile = stdout ; } else if ( (msgFile = fopen(argv[2], "a")) == NULL ) { fprintf(stderr, "\n fatal error in %s" "\n unable to open file %s\n", argv[0], argv[2]) ; return(-1) ; } type = atoi(argv[3]) ; symmetryflag = atoi(argv[4]) ; patchAndGoFlag = atoi(argv[5]) ; fudge = atof(argv[6]) ; toosmall = atof(argv[7]) ; storeids = atoi(argv[8]) ; storevalues = atoi(argv[9]) ; matrixFileName = argv[10] ; rhsFileName = argv[11] ; seed = atoi(argv[12]) ; nthread = atoi(argv[13]) ; /*--------------------------------------------------------------------*/ /* -------------------------------------------- STEP 1: read the entries from the input file and create the InpMtx object -------------------------------------------- */ if ( (inputFile = fopen(matrixFileName, "r")) == NULL ) { fprintf(stderr, "\n unable to open file %s", matrixFileName) ; spoolesFatal(); } fscanf(inputFile, "%d %d %d", &nrow, &ncol, &nent) ; neqns = nrow ; mtxA = InpMtx_new() ; InpMtx_init(mtxA, INPMTX_BY_ROWS, type, nent, 0) ; if ( type == SPOOLES_REAL ) { for ( ient = 0 ; ient < nent ; ient++ ) { fscanf(inputFile, "%d %d %le", &irow, &jcol, &value) ; InpMtx_inputRealEntry(mtxA, irow, jcol, value) ; } } else { for ( ient = 0 ; ient < nent ; ient++ ) { fscanf(inputFile, "%d %d %le %le", &irow, &jcol, &real, &imag) ; InpMtx_inputComplexEntry(mtxA, irow, jcol, real, imag) ; } } fclose(inputFile) ; InpMtx_changeStorageMode(mtxA, INPMTX_BY_VECTORS) ; if ( msglvl > 1 ) { fprintf(msgFile, "\n\n input matrix") ; InpMtx_writeForHumanEye(mtxA, msgFile) ; fflush(msgFile) ; } /*--------------------------------------------------------------------*/ /* ------------------------------------------------- STEP 2 : find a low-fill ordering (1) create the Graph object (2) order the graph using multiple minimum degree ------------------------------------------------- */ graph = Graph_new() ; adjIVL = InpMtx_fullAdjacency(mtxA) ; nedges = IVL_tsize(adjIVL) ; Graph_init2(graph, 0, neqns, 0, nedges, neqns, nedges, adjIVL, NULL, NULL) ; if ( msglvl > 1 ) { fprintf(msgFile, "\n\n graph of the input matrix") ; Graph_writeForHumanEye(graph, msgFile) ; fflush(msgFile) ; } frontETree = orderViaMMD(graph, seed, msglvl, msgFile) ; if ( msglvl > 1 ) { fprintf(msgFile, "\n\n front tree from ordering") ; ETree_writeForHumanEye(frontETree, msgFile) ; fflush(msgFile) ; } /*--------------------------------------------------------------------*/ /* ----------------------------------------------------- STEP 3: get the permutation, permute the matrix and front tree and get the symbolic factorization ----------------------------------------------------- */ oldToNewIV = ETree_oldToNewVtxPerm(frontETree) ; oldToNew = IV_entries(oldToNewIV) ; newToOldIV = ETree_newToOldVtxPerm(frontETree) ; newToOld = IV_entries(newToOldIV) ; ETree_permuteVertices(frontETree, oldToNewIV) ; InpMtx_permute(mtxA, oldToNew, oldToNew) ; InpMtx_mapToUpperTriangle(mtxA) ; InpMtx_changeCoordType(mtxA, INPMTX_BY_CHEVRONS) ; InpMtx_changeStorageMode(mtxA, INPMTX_BY_VECTORS) ; symbfacIVL = SymbFac_initFromInpMtx(frontETree, mtxA) ; if ( msglvl > 1 ) { fprintf(msgFile, "\n\n old-to-new permutation vector") ; IV_writeForHumanEye(oldToNewIV, msgFile) ; fprintf(msgFile, "\n\n new-to-old permutation vector") ; IV_writeForHumanEye(newToOldIV, msgFile) ; fprintf(msgFile, "\n\n front tree after permutation") ; ETree_writeForHumanEye(frontETree, msgFile) ; fprintf(msgFile, "\n\n input matrix after permutation") ; InpMtx_writeForHumanEye(mtxA, msgFile) ; fprintf(msgFile, "\n\n symbolic factorization") ; IVL_writeForHumanEye(symbfacIVL, msgFile) ; fflush(msgFile) ; } /*--------------------------------------------------------------------*/ /* ------------------------------------------ STEP 4: initialize the front matrix object and the PatchAndGoInfo object to handle small pivots ------------------------------------------ */ frontmtx = FrontMtx_new() ; mtxmanager = SubMtxManager_new() ; SubMtxManager_init(mtxmanager, LOCK_IN_PROCESS, 0) ; FrontMtx_init(frontmtx, frontETree, symbfacIVL, type, symmetryflag, FRONTMTX_DENSE_FRONTS, SPOOLES_NO_PIVOTING, LOCK_IN_PROCESS, 0, NULL, mtxmanager, msglvl, msgFile) ; if ( patchAndGoFlag == 1 ) { frontmtx->patchinfo = PatchAndGoInfo_new() ; PatchAndGoInfo_init(frontmtx->patchinfo, 1, toosmall, fudge, storeids, storevalues) ; } else if ( patchAndGoFlag == 2 ) { frontmtx->patchinfo = PatchAndGoInfo_new() ; PatchAndGoInfo_init(frontmtx->patchinfo, 2, toosmall, fudge, storeids, storevalues) ; } /*--------------------------------------------------------------------*/ /* ------------------------------------------ STEP 5: setup the domain decomposition map ------------------------------------------ */ if ( nthread > (nfront = FrontMtx_nfront(frontmtx)) ) { nthread = nfront ; } cumopsDV = DV_new() ; DV_init(cumopsDV, nthread, NULL) ; ownersIV = ETree_ddMap(frontETree, type, symmetryflag, cumopsDV, 1./(2.*nthread)) ; DV_free(cumopsDV) ; /*--------------------------------------------------------------------*/ /* ----------------------------------------------------- STEP 6: compute the numeric factorization in parallel ----------------------------------------------------- */ chvmanager = ChvManager_new() ; ChvManager_init(chvmanager, LOCK_IN_PROCESS, 1) ; DVfill(10, cpus, 0.0) ; IVfill(20, stats, 0) ; lookahead = 0 ; rootchv = FrontMtx_MT_factorInpMtx(frontmtx, mtxA, tau, 0.0, chvmanager, ownersIV, lookahead, &error, cpus, stats, msglvl, msgFile) ; if ( patchAndGoFlag == 1 ) { if ( frontmtx->patchinfo->fudgeIV != NULL ) { fprintf(msgFile, "\n small pivots found at these locations") ; IV_writeForHumanEye(frontmtx->patchinfo->fudgeIV, msgFile) ; } PatchAndGoInfo_free(frontmtx->patchinfo) ; } else if ( patchAndGoFlag == 2 ) { if ( frontmtx->patchinfo->fudgeIV != NULL ) { fprintf(msgFile, "\n small pivots found at these locations") ; IV_writeForHumanEye(frontmtx->patchinfo->fudgeIV, msgFile) ; } if ( frontmtx->patchinfo->fudgeDV != NULL ) { fprintf(msgFile, "\n perturbations") ; DV_writeForHumanEye(frontmtx->patchinfo->fudgeDV, msgFile) ; } PatchAndGoInfo_free(frontmtx->patchinfo) ; } ChvManager_free(chvmanager) ; if ( msglvl > 1 ) { fprintf(msgFile, "\n\n factor matrix") ; FrontMtx_writeForHumanEye(frontmtx, msgFile) ; fflush(msgFile) ; } if ( rootchv != NULL ) { fprintf(msgFile, "\n\n matrix found to be singular\n") ; spoolesFatal(); } if ( error >= 0 ) { fprintf(msgFile, "\n\n fatal error at front %d\n", error) ; spoolesFatal(); } /* -------------------------------------- STEP 7: post-process the factorization -------------------------------------- */ FrontMtx_postProcess(frontmtx, msglvl, msgFile) ; if ( msglvl > 1 ) { fprintf(msgFile, "\n\n factor matrix after post-processing") ; FrontMtx_writeForHumanEye(frontmtx, msgFile) ; fflush(msgFile) ; } /*--------------------------------------------------------------------*/ /* ----------------------------------------- STEP 8: read the right hand side matrix B ----------------------------------------- */ if ( (inputFile = fopen(rhsFileName, "r")) == NULL ) { fprintf(stderr, "\n unable to open file %s", rhsFileName) ; spoolesFatal(); } fscanf(inputFile, "%d %d", &nrow, &nrhs) ; mtxB = DenseMtx_new() ; DenseMtx_init(mtxB, type, 0, 0, neqns, nrhs, 1, neqns) ; DenseMtx_zero(mtxB) ; if ( type == SPOOLES_REAL ) { for ( irow = 0 ; irow < nrow ; irow++ ) { fscanf(inputFile, "%d", &jrow) ; for ( jrhs = 0 ; jrhs < nrhs ; jrhs++ ) { fscanf(inputFile, "%le", &value) ; DenseMtx_setRealEntry(mtxB, jrow, jrhs, value) ; } } } else { for ( irow = 0 ; irow < nrow ; irow++ ) { fscanf(inputFile, "%d", &jrow) ; for ( jrhs = 0 ; jrhs < nrhs ; jrhs++ ) { fscanf(inputFile, "%le %le", &real, &imag) ; DenseMtx_setComplexEntry(mtxB, jrow, jrhs, real, imag) ; } } } fclose(inputFile) ; if ( msglvl > 1 ) { fprintf(msgFile, "\n\n rhs matrix in original ordering") ; DenseMtx_writeForHumanEye(mtxB, msgFile) ; fflush(msgFile) ; } /*--------------------------------------------------------------------*/ /* -------------------------------------------------------------- STEP 9: permute the right hand side into the original ordering -------------------------------------------------------------- */ DenseMtx_permuteRows(mtxB, oldToNewIV) ; if ( msglvl > 1 ) { fprintf(msgFile, "\n\n right hand side matrix in new ordering") ; DenseMtx_writeForHumanEye(mtxB, msgFile) ; fflush(msgFile) ; } /*--------------------------------------------------------------------*/ /* -------------------------------------------------------- STEP 10: get the solve map object for the parallel solve -------------------------------------------------------- */ solvemap = SolveMap_new() ; SolveMap_ddMap(solvemap, type, FrontMtx_upperBlockIVL(frontmtx), FrontMtx_lowerBlockIVL(frontmtx), nthread, ownersIV, FrontMtx_frontTree(frontmtx), seed, msglvl, msgFile) ; /*--------------------------------------------------------------------*/ /* -------------------------------------------- STEP 11: solve the linear system in parallel -------------------------------------------- */ mtxX = DenseMtx_new() ; DenseMtx_init(mtxX, type, 0, 0, neqns, nrhs, 1, neqns) ; DenseMtx_zero(mtxX) ; FrontMtx_MT_solve(frontmtx, mtxX, mtxB, mtxmanager, solvemap, cpus, msglvl, msgFile) ; if ( msglvl > 1 ) { fprintf(msgFile, "\n\n solution matrix in new ordering") ; DenseMtx_writeForHumanEye(mtxX, msgFile) ; fflush(msgFile) ; } /*--------------------------------------------------------------------*/ /* -------------------------------------------------------- STEP 12: permute the solution into the original ordering -------------------------------------------------------- */ DenseMtx_permuteRows(mtxX, newToOldIV) ; if ( msglvl > 0 ) { fprintf(msgFile, "\n\n solution matrix in original ordering") ; DenseMtx_writeForHumanEye(mtxX, msgFile) ; fflush(msgFile) ; } /*--------------------------------------------------------------------*/ /* ----------- free memory ----------- */ FrontMtx_free(frontmtx) ; DenseMtx_free(mtxX) ; DenseMtx_free(mtxB) ; IV_free(newToOldIV) ; IV_free(oldToNewIV) ; InpMtx_free(mtxA) ; ETree_free(frontETree) ; IVL_free(symbfacIVL) ; SubMtxManager_free(mtxmanager) ; Graph_free(graph) ; SolveMap_free(solvemap) ; IV_free(ownersIV) ; /*--------------------------------------------------------------------*/ return(1) ; }
//static void factor_MT(struct factorinfo *pfi, InpMtx *mtxA, int size, FILE *msgFile, int symmetryflag) void factor_MT(struct factorinfo *pfi, InpMtx *mtxA, int size, FILE *msgFile, int symmetryflag) { Graph *graph; IV *ownersIV; IVL *symbfacIVL; Chv *rootchv; /* Initialize pfi: */ pfi->size = size; pfi->msgFile = msgFile; DVfill(10, pfi->cpus, 0.0); /* * STEP 1 : find a low-fill ordering * (1) create the Graph object */ ssolve_creategraph(&graph, &pfi->frontETree, mtxA, size, msgFile); /* * STEP 2: get the permutation, permute the matrix and * front tree and get the symbolic factorization */ ssolve_permuteA(&pfi->oldToNewIV, &pfi->newToOldIV, &symbfacIVL, pfi->frontETree, mtxA, msgFile, symmetryflag); /* * STEP 3: Prepare distribution to multiple threads/cpus */ { DV *cumopsDV; int nfront; nfront = ETree_nfront(pfi->frontETree); pfi->nthread = num_cpus; if (pfi->nthread > nfront) pfi->nthread = nfront; cumopsDV = DV_new(); DV_init(cumopsDV, pfi->nthread, NULL); ownersIV = ETree_ddMap(pfi->frontETree, SPOOLES_REAL, symmetryflag, cumopsDV, 1. / (2. * pfi->nthread)); if (DEBUG_LVL > 1) { fprintf(msgFile, "\n\n map from fronts to threads"); IV_writeForHumanEye(ownersIV, msgFile); fprintf(msgFile, "\n\n factor operations for each front"); DV_writeForHumanEye(cumopsDV, msgFile); fflush(msgFile); } else { fprintf(msgFile, "\n\n Using %d threads\n", pfi->nthread); } DV_free(cumopsDV); } /* * STEP 4: initialize the front matrix object */ { pfi->frontmtx = FrontMtx_new(); pfi->mtxmanager = SubMtxManager_new(); SubMtxManager_init(pfi->mtxmanager, LOCK_IN_PROCESS, 0); FrontMtx_init(pfi->frontmtx, pfi->frontETree, symbfacIVL, SPOOLES_REAL, symmetryflag, FRONTMTX_DENSE_FRONTS, SPOOLES_PIVOTING, LOCK_IN_PROCESS, 0, NULL, pfi->mtxmanager, DEBUG_LVL, pfi->msgFile); } /* * STEP 5: compute the numeric factorization in parallel */ { ChvManager *chvmanager; int stats[20]; int error; chvmanager = ChvManager_new(); ChvManager_init(chvmanager, LOCK_IN_PROCESS, 1); IVfill(20, stats, 0); rootchv = FrontMtx_MT_factorInpMtx(pfi->frontmtx, mtxA, MAGIC_TAU, MAGIC_DTOL, chvmanager, ownersIV, 0, &error, pfi->cpus, stats, DEBUG_LVL, pfi->msgFile); ChvManager_free(chvmanager); if (DEBUG_LVL > 1) { fprintf(msgFile, "\n\n factor matrix"); FrontMtx_writeForHumanEye(pfi->frontmtx, pfi->msgFile); fflush(pfi->msgFile); } if (rootchv != NULL) { fprintf(pfi->msgFile, "\n\n matrix found to be singular\n"); exit(-1); } if (error >= 0) { fprintf(pfi->msgFile, "\n\n fatal error at front %d", error); exit(-1); } } /* * STEP 6: post-process the factorization */ ssolve_postfactor(pfi->frontmtx, pfi->msgFile); /* * STEP 7: get the solve map object for the parallel solve */ { pfi->solvemap = SolveMap_new(); SolveMap_ddMap(pfi->solvemap, symmetryflag, FrontMtx_upperBlockIVL(pfi->frontmtx), FrontMtx_lowerBlockIVL(pfi->frontmtx), pfi->nthread, ownersIV, FrontMtx_frontTree(pfi->frontmtx), RNDSEED, DEBUG_LVL, pfi->msgFile); } /* cleanup: */ InpMtx_free(mtxA); IVL_free(symbfacIVL); Graph_free(graph); IV_free(ownersIV); }
void factor(struct factorinfo *pfi, InpMtx *mtxA, int size, FILE *msgFile, int symmetryflag) { Graph *graph; IVL *symbfacIVL; Chv *rootchv; /* Initialize pfi: */ pfi->size = size; pfi->msgFile = msgFile; pfi->solvemap = NULL; DVfill(10, pfi->cpus, 0.0); /* * STEP 1 : find a low-fill ordering * (1) create the Graph object */ ssolve_creategraph(&graph, &pfi->frontETree, mtxA, size, pfi->msgFile); /* * STEP 2: get the permutation, permute the matrix and * front tree and get the symbolic factorization */ ssolve_permuteA(&pfi->oldToNewIV, &pfi->newToOldIV, &symbfacIVL, pfi->frontETree, mtxA, pfi->msgFile, symmetryflag); /* * STEP 3: initialize the front matrix object */ { pfi->frontmtx = FrontMtx_new(); pfi->mtxmanager = SubMtxManager_new(); SubMtxManager_init(pfi->mtxmanager, NO_LOCK, 0); FrontMtx_init(pfi->frontmtx, pfi->frontETree, symbfacIVL, SPOOLES_REAL, symmetryflag, FRONTMTX_DENSE_FRONTS, SPOOLES_PIVOTING, NO_LOCK, 0, NULL, pfi->mtxmanager, DEBUG_LVL, pfi->msgFile); } /* * STEP 4: compute the numeric factorization */ { ChvManager *chvmanager; int stats[20]; int error; chvmanager = ChvManager_new(); ChvManager_init(chvmanager, NO_LOCK, 1); IVfill(20, stats, 0); rootchv = FrontMtx_factorInpMtx(pfi->frontmtx, mtxA, MAGIC_TAU, MAGIC_DTOL, chvmanager, &error, pfi->cpus, stats, DEBUG_LVL, pfi->msgFile); ChvManager_free(chvmanager); if (DEBUG_LVL > 1) { fprintf(msgFile, "\n\n factor matrix"); FrontMtx_writeForHumanEye(pfi->frontmtx, pfi->msgFile); fflush(msgFile); } if (rootchv != NULL) { fprintf(pfi->msgFile, "\n\n matrix found to be singular\n"); exit(-1); } if (error >= 0) { fprintf(pfi->msgFile, "\n\nerror encountered at front %d", error); exit(-1); } } /* * STEP 5: post-process the factorization */ ssolve_postfactor(pfi->frontmtx, pfi->msgFile); /* cleanup: */ IVL_free(symbfacIVL); InpMtx_free(mtxA); Graph_free(graph); }
/* --------------------------------------------------------------------- purpose -- to compute the factorization of A - sigma * B note: all variables in the calling sequence are references to allow call from fortran. input parameters data -- pointer to bridge data object psigma -- shift for the matrix pencil ppvttol -- pivot tolerance *ppvttol = 0.0 --> no pivoting used *ppvttol != 0.0 --> pivoting used, entries in factor are bounded above by 1/pvttol in magnitude output parameters *pinertia -- on return contains the number of negative eigenvalues *perror -- on return contains an error code 1 -- error found during factorization 0 -- normal return -1 -- psigma is NULL -2 -- ppvttol is NULL -3 -- data is NULL -4 -- pinertia is NULL created -- 98aug10, cca & jcp --------------------------------------------------------------------- */ void Factor ( double *psigma, double *ppvttol, void *data, int *pinertia, int *perror ) { Bridge *bridge = (Bridge *) data ; Chv *rootchv ; ChvManager *chvmanager ; double droptol=0.0, tau ; double cpus[10] ; int stats[20] ; int nnegative, nzero, npositive, pivotingflag ; #if MYDEBUG > 0 double t1, t2 ; MARKTIME(t1) ; count_Factor++ ; fprintf(stdout, "\n (%d) Factor()", count_Factor) ; fflush(stdout) ; #endif /* --------------- check the input --------------- */ if ( psigma == NULL ) { fprintf(stderr, "\n error in Factor()" "\n psigma is NULL\n") ; *perror = -1 ; return ; } if ( ppvttol == NULL ) { fprintf(stderr, "\n error in Factor()" "\n ppvttol is NULL\n") ; *perror = -2 ; return ; } if ( data == NULL ) { fprintf(stderr, "\n error in Factor()" "\n data is NULL\n") ; *perror = -3 ; return ; } if ( pinertia == NULL ) { fprintf(stderr, "\n error in Factor()" "\n pinertia is NULL\n") ; *perror = -4 ; return ; } if ( perror == NULL ) { fprintf(stderr, "\n error in Factor()" "\n perror is NULL\n") ; return ; } /* ---------------------------------- set the shift in the pencil object ---------------------------------- */ bridge->pencil->sigma[0] = -(*psigma) ; bridge->pencil->sigma[1] = 0.0 ; /* ----------------------------------------------------- clear the front matrix and submatrix mananger objects ----------------------------------------------------- */ FrontMtx_clearData(bridge->frontmtx); SubMtxManager_clearData(bridge->mtxmanager); /* ----------------------------------------------------------- set the pivot tolerance. NOTE: spooles's "tau" parameter is a bound on the magnitude of the factor entries, and is the recipricol of that of the pivot tolerance of the lanczos code ----------------------------------------------------------- */ if ( *ppvttol == 0.0 ) { tau = 10.0 ; pivotingflag = SPOOLES_NO_PIVOTING ; } else { tau = (1.0)/(*ppvttol) ; pivotingflag = SPOOLES_PIVOTING ; } /* ---------------------------------- initialize the front matrix object ---------------------------------- */ FrontMtx_init(bridge->frontmtx, bridge->frontETree, bridge->symbfacIVL, SPOOLES_REAL, SPOOLES_SYMMETRIC, FRONTMTX_DENSE_FRONTS, pivotingflag, NO_LOCK, 0, NULL, bridge->mtxmanager, bridge->msglvl, bridge->msgFile) ; /* ------------------------- compute the factorization ------------------------- */ chvmanager = ChvManager_new() ; ChvManager_init(chvmanager, NO_LOCK, 1); IVfill(20, stats, 0) ; DVfill(10, cpus, 0.0) ; rootchv = FrontMtx_factorPencil(bridge->frontmtx, bridge->pencil, tau, droptol, chvmanager, perror, cpus, stats, bridge->msglvl, bridge->msgFile); ChvManager_free(chvmanager); /* ---------------------------- if matrix is singular then set error flag and return ---------------------------- */ if ( rootchv != NULL ) { *perror = 1 ; return ; } /* ------------------------------------------------------------------ post-process the factor matrix, convert from fronts to submatrices ------------------------------------------------------------------ */ FrontMtx_postProcess(bridge->frontmtx, bridge->msglvl, bridge->msgFile); /* ------------------- compute the inertia ------------------- */ FrontMtx_inertia(bridge->frontmtx, &nnegative, &nzero, &npositive) ; *pinertia = nnegative; /* ------------------------------------------------------------------ set the error. (this is simple since when the spooles codes detect a fatal error, they print out a message to stderr and exit.) ------------------------------------------------------------------ */ *perror = 0 ; #if MYDEBUG > 0 MARKTIME(t2) ; time_Factor += t2 - t1 ; fprintf(stdout, ", %8.3f seconds, %8.3f total time", t2 - t1, time_Factor) ; fflush(stdout) ; #endif return ; }
/* ------------------------------------------------------------------ to fill xDV and yDV with a log10 profile of the magnitudes of the entries in the DV object. tausmall and tau big provide cutoffs within which to examine the entries. pnzero, pnsmall and pnbig are addresses to hold the number of entries zero, smaller than tausmall and larger than taubig, respectively. created -- 97feb14, cca ------------------------------------------------------------------ */ void DV_log10profile ( DV *dv, int npts, DV *xDV, DV *yDV, double tausmall, double taubig, int *pnzero, int *pnsmall, int *pnbig ) { double deltaVal, maxval, minval, val ; double *dvec, *sums, *x, *y ; int ii, ipt, nbig, nsmall, nzero, size ; /* --------------- check the input --------------- */ if ( dv == NULL || npts <= 0 || xDV == NULL || yDV == NULL || tausmall < 0.0 || taubig < 0.0 || tausmall > taubig || pnzero == NULL || pnsmall == NULL || pnbig == NULL ) { fprintf(stderr, "\n fatal error in DV_log10profile(%p,%d,%p,%p,%f,%f,%p,%p,%p)" "\n bad input\n", dv, npts, xDV, yDV, tausmall, taubig, pnzero, pnsmall, pnbig) ; exit(-1) ; } /* ------------------------------------- find the largest and smallest entries in the range [tausmall, taubig] ------------------------------------- */ nbig = nsmall = nzero = 0 ; minval = maxval = 0.0 ; DV_sizeAndEntries(dv, &size, &dvec) ; for ( ii = 0 ; ii < size ; ii++ ) { val = fabs(dvec[ii]) ; if ( val == 0.0 ) { nzero++ ; } else if ( val <= tausmall ) { nsmall++ ; } else if ( val >= taubig ) { nbig++ ; } else { if ( minval == 0.0 || minval > val ) { minval = val ; } if ( maxval < val ) { maxval = val ; } } } *pnzero = nzero ; *pnsmall = nsmall ; *pnbig = nbig ; #if MYDEBUG > 0 fprintf(stdout, "\n nzero = %d, minval = %e, nsmall = %d, maxval = %e, nbig = %d", nzero, minval, nsmall, maxval, nbig) ; #endif /* ------------------ set up the buckets ------------------ */ DV_setSize(xDV, npts) ; DV_setSize(yDV, npts) ; x = DV_entries(xDV) ; y = DV_entries(yDV) ; sums = DVinit(npts, 0.0) ; minval = log10(minval) ; maxval = log10(maxval) ; /* minval = log10(tausmall) ; maxval = log10(taubig) ; */ deltaVal = (maxval - minval)/(npts - 1) ; DVfill(npts, x, 0.0) ; DVfill(npts, y, 0.0) ; /* -------------------------------- fill the sums and counts vectors -------------------------------- */ for ( ii = 0 ; ii < size ; ii++ ) { val = fabs(dvec[ii]) ; if ( tausmall < val && val < taubig ) { ipt = (log10(val) - minval) / deltaVal ; sums[ipt] += val ; y[ipt]++ ; } } #if MYDEBUG > 0 fprintf(stdout, "\n sum(y) = %.0f", DV_sum(yDV)) ; #endif /* --------------------------- set the x-coordinate vector --------------------------- */ for ( ipt = 0 ; ipt < npts ; ipt++ ) { if ( sums[ipt] == 0.0 ) { x[ipt] = minval + ipt*deltaVal ; } else { x[ipt] = log10(sums[ipt]/y[ipt]) ; } } /* ------------------------ free the working storage ------------------------ */ DVfree(sums) ; return ; }