DenseMtx *fsolve(struct factorinfo *pfi, DenseMtx *mtxB) { DenseMtx *mtxX; /* * STEP 6: permute the right hand side into the new ordering */ { DenseMtx_permuteRows(mtxB, pfi->oldToNewIV); if (DEBUG_LVL > 1) { fprintf(pfi->msgFile, "\n\n right hand side matrix in new ordering"); DenseMtx_writeForHumanEye(mtxB, pfi->msgFile); fflush(pfi->msgFile); } } /* * STEP 7: solve the linear system */ { mtxX = DenseMtx_new(); DenseMtx_init(mtxX, SPOOLES_REAL, 0, 0, pfi->size, 1, 1, pfi->size); DenseMtx_zero(mtxX); FrontMtx_solve(pfi->frontmtx, mtxX, mtxB, pfi->mtxmanager, pfi->cpus, DEBUG_LVL, pfi->msgFile); if (DEBUG_LVL > 1) { fprintf(pfi->msgFile, "\n\n solution matrix in new ordering"); DenseMtx_writeForHumanEye(mtxX, pfi->msgFile); fflush(pfi->msgFile); } } /* * STEP 8: permute the solution into the original ordering */ ssolve_permuteout(mtxX, pfi->newToOldIV, pfi->msgFile); /* cleanup: */ DenseMtx_free(mtxB); return mtxX; }
int tfqmrl ( int n_matrixSize, int type, int symmetryflag, InpMtx *mtxA, FrontMtx *Precond, DenseMtx *mtxX, DenseMtx *mtxB, int itermax, double convergetol, int msglvl, FILE *msgFile ) { Chv *chv, *rootchv ; ChvManager *chvmanager ; DenseMtx *vecD, *vecR, *vecT, *vecU1, *vecU2, *vecV, *vecW; DenseMtx *vecX, *vecY1, *vecY2 ; double Alpha, Beta, Cee, Eta, Rho, Rho_new ; double Sigma, Tau, Theta; double Init_norm, ratio, Res_norm; double error_trol, m, Rtmp; double t1, t2, cpus[9] ; double one[2] = {1.0, 0.0}, zero[2] ={0.0, 0.0} ; double Tiny = 0.1e-28; int Iter, Imv, neqns; int stats[6] ; neqns = n_matrixSize; /* -------------------- init the vectors in TFQMRL -------------------- */ vecD = DenseMtx_new() ; DenseMtx_init(vecD, type, 0, 0, neqns, 1, 1, neqns) ; vecR = DenseMtx_new() ; DenseMtx_init(vecR, type, 0, 0, neqns, 1, 1, neqns) ; vecT = DenseMtx_new() ; DenseMtx_init(vecT, type, 0, 0, neqns, 1, 1, neqns) ; vecU1 = DenseMtx_new() ; DenseMtx_init(vecU1, type, 0, 0, neqns, 1, 1, neqns) ; vecU2 = DenseMtx_new() ; DenseMtx_init(vecU2, type, 0, 0, neqns, 1, 1, neqns) ; vecV = DenseMtx_new() ; DenseMtx_init(vecV, type, 0, 0, neqns, 1, 1, neqns) ; vecW = DenseMtx_new() ; DenseMtx_init(vecW, type, 0, 0, neqns, 1, 1, neqns) ; vecX = DenseMtx_new() ; DenseMtx_init(vecX, type, 0, 0, neqns, 1, 1, neqns) ; vecY1 = DenseMtx_new() ; DenseMtx_init(vecY1, type, 0, 0, neqns, 1, 1, neqns) ; vecY2 = DenseMtx_new() ; DenseMtx_init(vecY2, type, 0, 0, neqns, 1, 1, neqns) ; /* -------------------------- Initialize the iterations -------------------------- */ /* ---- Set initial guess as zero ---- */ DenseMtx_zero(vecX) ; DenseMtx_colCopy(vecT, 0, mtxB, 0); /* */ FrontMtx_solve(Precond, vecR, vecT, Precond->manager, cpus, msglvl, msgFile) ; /* */ Init_norm = DenseMtx_twoNormOfColumn(vecR,0); if ( Init_norm == 0.0 ){ Init_norm = 1.0; }; error_trol = Init_norm * convergetol ; fprintf(msgFile, "\n TFQMRL Initial norml: %6.2e ", Init_norm ) ; fprintf(msgFile, "\n TFQMRL Conveg. Control: %7.3e ", convergetol ) ; fprintf(msgFile, "\n TFQMRL Convergen Control: %7.3e ",error_trol ) ; DenseMtx_zero(vecD) ; DenseMtx_zero(vecU1) ; DenseMtx_zero(vecU2) ; DenseMtx_zero(vecY2) ; /* DenseMtx_copy(vecR, mtxB); */ DenseMtx_colCopy(vecW, 0, vecR, 0); DenseMtx_colCopy(vecY1, 0, vecR, 0); Iter = 0; Imv = 0; switch ( symmetryflag ) { case SPOOLES_SYMMETRIC : InpMtx_sym_gmmm(mtxA, zero, vecT, one, vecY1) ; break ; case SPOOLES_HERMITIAN : fprintf(msgFile, "\n TFQMRL Matrix type wrong"); fprintf(msgFile, "\n Fatal error"); goto end; case SPOOLES_NONSYMMETRIC : InpMtx_nonsym_gmmm(mtxA, zero, vecT, one, vecY1) ; break ; default : fprintf(msgFile, "\n TFQMRL Matrix type wrong"); fprintf(msgFile, "\n Fatal error"); goto end; } /* */ FrontMtx_solve(Precond, vecV, vecT, Precond->manager, cpus, msglvl, msgFile) ; /* */ Imv++; DenseMtx_colCopy(vecU1, 0, vecV, 0); /* */ Theta = 0.0; Eta = 0.0; Tau = Init_norm ; Rho = Tau * Tau ; /* ------------------------------ TFQMRL Iteration start ------------------------------ */ MARKTIME(t1) ; while ( Iter <= itermax ) { Iter++; DenseMtx_colDotProduct(vecV, 0, vecR, 0, &Sigma); if (Sigma == 0){ fprintf(msgFile, "\n\n Fatal Error, \n" " TFQMRL Breakdown, Sigma = 0 !!") ; Imv = -1; goto end; }; Alpha = Rho/Sigma; /* ---------------- Odd step --------------- */ m = 2 * Iter - 1; Rtmp=-Alpha; DenseMtx_colGenAxpy(one, vecW, 0, &Rtmp, vecU1, 0); Rtmp = Theta * Theta * Eta / Alpha ; DenseMtx_colGenAxpy(&Rtmp, vecD, 0, one, vecY1, 0); Theta = DenseMtx_twoNormOfColumn(vecW,0)/Tau; Cee = 1.0/sqrt(1.0 + Theta*Theta); Tau = Tau * Theta * Cee ; Eta = Cee * Cee * Alpha ; DenseMtx_colGenAxpy(one, vecX, 0, &Eta, vecD, 0); fprintf(msgFile, "\n\n Odd step at %d", Imv); fprintf(msgFile, " \n Tau is : %7.3e", Tau) ; /* Debug purpose: Check the convergence history for the true residual norm */ /* DenseMtx_zero(vecT) ; InpMtx_nonsym_mmm(mtxA, vecT, one, vecX) ; DenseMtx_sub(vecT, mtxB) ; Rtmp = DenseMtx_twoNormOfColumn(vecT,0); fprintf(msgFile, "\n TFQMRL Residual norm: %6.2e ", Rtmp) ; */ /* ---------------- Convergence Test --------------- */ if (Tau * sqrt(m + 1) <= error_trol ) { /* */ DenseMtx_colCopy(mtxX, 0, vecX, 0); /* DenseMtx_zero(vecT) ; InpMtx_nonsym_mmm(mtxA, vecT, one, mtxX) ; */ switch ( symmetryflag ) { case SPOOLES_SYMMETRIC : InpMtx_sym_gmmm(mtxA, zero, vecT, one, mtxX) ; break ; case SPOOLES_HERMITIAN : fprintf(msgFile, "\n TFQMRL Matrix type wrong"); fprintf(msgFile, "\n Fatal error"); goto end; case SPOOLES_NONSYMMETRIC : InpMtx_nonsym_gmmm(mtxA, zero, vecT, one, mtxX) ; break ; default : fprintf(msgFile, "\n TFQMRL Matrix type wrong"); fprintf(msgFile, "\n Fatal error"); goto end; } DenseMtx_sub(vecT, mtxB) ; Rtmp = DenseMtx_twoNormOfColumn(vecT,0); fprintf(msgFile, "\n TFQMRL Residual norm: %6.2e ", Rtmp) ; MARKTIME(t2) ; fprintf(msgFile, "\n CPU : Converges in time: %8.3f ", t2 - t1) ; fprintf(msgFile, "\n # iterations = %d", Imv) ; fprintf(msgFile, "\n\n after TFQMRL") ; goto end; }; /* ---------------- Even step --------------- */ DenseMtx_colCopy(vecY2, 0, vecY1, 0); Rtmp=-Alpha; DenseMtx_colGenAxpy(one, vecY2, 0, &Rtmp, vecV, 0); /* DenseMtx_zero(vecT) ; InpMtx_nonsym_mmm(mtxA, vecT, one, vecY2) ; */ switch ( symmetryflag ) { case SPOOLES_SYMMETRIC : InpMtx_sym_gmmm(mtxA, zero, vecT, one, vecY2) ; break ; case SPOOLES_HERMITIAN : fprintf(msgFile, "\n TFQMRL Matrix type wrong"); fprintf(msgFile, "\n Fatal error"); goto end; case SPOOLES_NONSYMMETRIC : InpMtx_nonsym_gmmm(mtxA, zero, vecT, one, vecY2) ; break ; default : fprintf(msgFile, "\n TFQMRL Matrix type wrong"); fprintf(msgFile, "\n Fatal error"); goto end; } FrontMtx_solve(Precond, vecU2, vecT, Precond->manager, cpus, msglvl, msgFile) ; Imv++; m = 2 * Iter ; Rtmp = -Alpha; DenseMtx_colGenAxpy(one, vecW, 0, &Rtmp, vecU2, 0); Rtmp = Theta * Theta * Eta / Alpha ; DenseMtx_colGenAxpy(&Rtmp, vecD, 0, one, vecY2, 0); Theta = DenseMtx_twoNormOfColumn(vecW,0)/Tau; Cee = 1.0/sqrt(1.0 + Theta*Theta); Tau = Tau * Theta * Cee ; Eta = Cee * Cee * Alpha ; DenseMtx_colGenAxpy(one, vecX, 0, &Eta, vecD, 0); fprintf(msgFile, "\n\n Even step at %d", Imv) ; /* ---------------- Convergence Test for even step --------------- */ if (Tau * sqrt(m + 1) <= error_trol ) { DenseMtx_colCopy(mtxX, 0, vecX, 0); /* DenseMtx_zero(vecT) ; InpMtx_nonsym_mmm(mtxA, vecT, one, mtxX) ; */ switch ( symmetryflag ) { case SPOOLES_SYMMETRIC : InpMtx_sym_gmmm(mtxA, zero, vecT, one, mtxX) ; break ; case SPOOLES_HERMITIAN : fprintf(msgFile, "\n TFQMRL Matrix type wrong"); fprintf(msgFile, "\n Fatal error"); goto end; case SPOOLES_NONSYMMETRIC : InpMtx_nonsym_gmmm(mtxA, zero, vecT, one, mtxX) ; break ; default : fprintf(msgFile, "\n TFQMRL Matrix type wrong"); fprintf(msgFile, "\n Fatal error"); goto end; } DenseMtx_sub(vecT, mtxB) ; Rtmp = DenseMtx_twoNormOfColumn(vecT,0); fprintf(msgFile, "\n TFQMRL Residual norm: %6.2e ", Rtmp) ; MARKTIME(t2) ; fprintf(msgFile, "\n CPU : Converges in time: %8.3f ", t2 - t1) ; fprintf(msgFile, "\n # iterations = %d", Imv) ; fprintf(msgFile, "\n\n after TFQMRL") ; goto end; }; if (Rho == 0){ fprintf(msgFile, "\n\n Fatal Error, \n" " TFQMRL Breakdown, Rho = 0 !!") ; Imv = -1; goto end; }; DenseMtx_colDotProduct(vecW, 0, vecR, 0, &Rho_new); Beta = Rho_new / Rho; Rho = Rho_new ; DenseMtx_colCopy(vecY1, 0, vecY2, 0); DenseMtx_colGenAxpy(&Beta, vecY1, 0, one, vecW, 0); /* DenseMtx_zero(vecT) ; InpMtx_nonsym_mmm(mtxA, vecT, one, vecY1) ; */ switch ( symmetryflag ) { case SPOOLES_SYMMETRIC : InpMtx_sym_gmmm(mtxA, zero, vecT, one, vecY1) ; break ; case SPOOLES_HERMITIAN : fprintf(msgFile, "\n TFQMRL Matrix type wrong"); fprintf(msgFile, "\n Fatal error"); goto end; case SPOOLES_NONSYMMETRIC : InpMtx_nonsym_gmmm(mtxA, zero, vecT, one, vecY1) ; break ; default : fprintf(msgFile, "\n TFQMRL Matrix type wrong"); fprintf(msgFile, "\n Fatal error"); goto end; } FrontMtx_solve(Precond, vecU1, vecT, Precond->manager, cpus, msglvl, msgFile) ; Imv++; /* */ DenseMtx_colCopy(vecT, 0, vecU2, 0); DenseMtx_colGenAxpy(one, vecT, 0, &Beta, vecV, 0); DenseMtx_colCopy(vecV, 0, vecT, 0); DenseMtx_colGenAxpy(&Beta, vecV, 0, one, vecU1, 0); Rtmp = Tau*sqrt(m + 1)/Init_norm ; fprintf(msgFile, "\n\n At iteration %d" " the convergence ratio is %12.4e", Imv, Rtmp) ; } /* End of while loop */ MARKTIME(t2) ; fprintf(msgFile, "\n CPU : Total iteration time is : %8.3f ", t2 - t1) ; fprintf(msgFile, "\n # iterations = %d", Imv) ; fprintf(msgFile, "\n\n TFQMRL did not Converge !") ; fprintf(msgFile, "\n\n after TFQMRL") ; DenseMtx_colCopy(mtxX, 0, vecX, 0); /* ------------------------ free the working storage ------------------------ */ end: DenseMtx_free(vecD) ; DenseMtx_free(vecR) ; DenseMtx_free(vecT) ; DenseMtx_free(vecU1) ; DenseMtx_free(vecU2) ; DenseMtx_free(vecV) ; DenseMtx_free(vecW) ; DenseMtx_free(vecX) ; DenseMtx_free(vecY1) ; DenseMtx_free(vecY2) ; fprintf(msgFile, "\n") ; return(Imv) ; }
PetscErrorCode MatSolve_SeqSpooles(Mat A,Vec b,Vec x) { Mat_Spooles *lu = (Mat_Spooles*)A->spptr; PetscScalar *array; DenseMtx *mtxY, *mtxX ; PetscErrorCode ierr; PetscInt irow,neqns=A->cmap->n,nrow=A->rmap->n,*iv; #if defined(PETSC_USE_COMPLEX) double x_real,x_imag; #else double *entX; #endif PetscFunctionBegin; mtxY = DenseMtx_new(); DenseMtx_init(mtxY, lu->options.typeflag, 0, 0, nrow, 1, 1, nrow); /* column major */ ierr = VecGetArray(b,&array);CHKERRQ(ierr); if (lu->options.useQR) { /* copy b to mtxY */ for ( irow = 0 ; irow < nrow; irow++ ) #if !defined(PETSC_USE_COMPLEX) DenseMtx_setRealEntry(mtxY, irow, 0, *array++); #else DenseMtx_setComplexEntry(mtxY, irow, 0, PetscRealPart(array[irow]), PetscImaginaryPart(array[irow])); #endif } else { /* copy permuted b to mtxY */ iv = IV_entries(lu->oldToNewIV); for ( irow = 0 ; irow < nrow; irow++ ) #if !defined(PETSC_USE_COMPLEX) DenseMtx_setRealEntry(mtxY, *iv++, 0, *array++); #else DenseMtx_setComplexEntry(mtxY,*iv++,0,PetscRealPart(array[irow]),PetscImaginaryPart(array[irow])); #endif } ierr = VecRestoreArray(b,&array);CHKERRQ(ierr); mtxX = DenseMtx_new(); DenseMtx_init(mtxX, lu->options.typeflag, 0, 0, neqns, 1, 1, neqns); if (lu->options.useQR) { FrontMtx_QR_solve(lu->frontmtx, lu->mtxA, mtxX, mtxY, lu->mtxmanager, lu->cpus, lu->options.msglvl, lu->options.msgFile); } else { FrontMtx_solve(lu->frontmtx, mtxX, mtxY, lu->mtxmanager, lu->cpus, lu->options.msglvl, lu->options.msgFile); } if ( lu->options.msglvl > 2 ) { int err; ierr = PetscFPrintf(PETSC_COMM_SELF,lu->options.msgFile, "\n\n right hand side matrix after permutation");CHKERRQ(ierr); DenseMtx_writeForHumanEye(mtxY, lu->options.msgFile); ierr = PetscFPrintf(PETSC_COMM_SELF,lu->options.msgFile, "\n\n solution matrix in new ordering");CHKERRQ(ierr); DenseMtx_writeForHumanEye(mtxX, lu->options.msgFile); err = fflush(lu->options.msgFile); if (err) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SYS,"fflush() failed on file"); } /* permute solution into original ordering, then copy to x */ DenseMtx_permuteRows(mtxX, lu->newToOldIV); ierr = VecGetArray(x,&array);CHKERRQ(ierr); #if !defined(PETSC_USE_COMPLEX) entX = DenseMtx_entries(mtxX); DVcopy(neqns, array, entX); #else for (irow=0; irow<nrow; irow++){ DenseMtx_complexEntry(mtxX,irow,0,&x_real,&x_imag); array[irow] = x_real+x_imag*PETSC_i; } #endif ierr = VecRestoreArray(x,&array);CHKERRQ(ierr); /* free memory */ DenseMtx_free(mtxX); DenseMtx_free(mtxY); PetscFunctionReturn(0); }
int main ( int argc, char *argv[] ) /* ----------------------------------------------------- test the factor method for a grid matrix (1) construct a linear system for a nested dissection ordering on a regular grid (2) create a solution matrix object (3) multiply the solution with the matrix to get a right hand side matrix object (4) factor the matrix (5) solve the system created -- 98may16, cca ----------------------------------------------------- */ { Chv *chv, *rootchv ; ChvManager *chvmanager ; DenseMtx *mtxB, *mtxX, *mtxZ ; FrontMtx *frontmtx ; InpMtx *mtxA ; SubMtxManager *mtxmanager ; double cputotal, droptol, factorops ; double cpus[9] ; Drand drand ; double nops, tau, t1, t2 ; ETree *frontETree ; FILE *msgFile ; int error, lockflag, maxsize, maxzeros, msglvl, neqns, n1, n2, n3, nrhs, nzf, pivotingflag, seed, sparsityflag, symmetryflag, type ; int stats[6] ; IVL *symbfacIVL ; if ( argc != 17 ) { fprintf(stdout, "\n\n usage : %s msglvl msgFile n1 n2 n3 maxzeros maxsize" "\n seed type symmetryflag sparsityflag " "\n pivotingflag tau droptol lockflag nrhs" "\n msglvl -- message level" "\n msgFile -- message file" "\n n1 -- number of grid points in the first direction" "\n n2 -- number of grid points in the second direction" "\n n3 -- number of grid points in the third direction" "\n maxzeros -- max number of zeroes in a front" "\n maxsize -- max number of internal nodes in a front" "\n seed -- random number seed" "\n type -- type of entries" "\n 1 --> real" "\n 2 --> complex" "\n symmetryflag -- symmetry flag" "\n 0 --> symmetric " "\n 1 --> hermitian" "\n 2 --> nonsymmetric" "\n sparsityflag -- sparsity flag" "\n 0 --> store dense fronts" "\n 1 --> store sparse fronts, use droptol to drop entries" "\n pivotingflag -- pivoting flag" "\n 0 --> do not pivot" "\n 1 --> enable pivoting" "\n tau -- upper bound on factor entries" "\n used only with pivoting" "\n droptol -- lower bound on factor entries" "\n used only with sparse fronts" "\n lockflag -- flag to specify lock status" "\n 0 --> mutex lock is not allocated or initialized" "\n 1 --> mutex lock is allocated and it can synchronize" "\n only threads in this process." "\n 2 --> mutex lock is allocated and it can synchronize" "\n only threads in this and other processes." "\n nrhs -- # of right hand sides" "\n", argv[0]) ; return(-1) ; } msglvl = atoi(argv[1]) ; if ( strcmp(argv[2], "stdout") == 0 ) { msgFile = stdout ; } else if ( (msgFile = fopen(argv[2], "a")) == NULL ) { fprintf(stderr, "\n fatal error in %s" "\n unable to open file %s\n", argv[0], argv[2]) ; return(-1) ; } n1 = atoi(argv[3]) ; n2 = atoi(argv[4]) ; n3 = atoi(argv[5]) ; maxzeros = atoi(argv[6]) ; maxsize = atoi(argv[7]) ; seed = atoi(argv[8]) ; type = atoi(argv[9]) ; symmetryflag = atoi(argv[10]) ; sparsityflag = atoi(argv[11]) ; pivotingflag = atoi(argv[12]) ; tau = atof(argv[13]) ; droptol = atof(argv[14]) ; lockflag = atoi(argv[15]) ; nrhs = atoi(argv[16]) ; fprintf(msgFile, "\n %s " "\n msglvl -- %d" "\n msgFile -- %s" "\n n1 -- %d" "\n n2 -- %d" "\n n3 -- %d" "\n maxzeros -- %d" "\n maxsize -- %d" "\n seed -- %d" "\n type -- %d" "\n symmetryflag -- %d" "\n sparsityflag -- %d" "\n pivotingflag -- %d" "\n tau -- %e" "\n droptol -- %e" "\n lockflag -- %d" "\n nrhs -- %d" "\n", argv[0], msglvl, argv[2], n1, n2, n3, maxzeros, maxsize, seed, type, symmetryflag, sparsityflag, pivotingflag, tau, droptol, lockflag, nrhs) ; fflush(msgFile) ; neqns = n1 * n2 * n3 ; /* -------------------------------------- initialize the random number generator -------------------------------------- */ Drand_setDefaultFields(&drand) ; Drand_init(&drand) ; Drand_setSeed(&drand, seed) ; /* Drand_setUniform(&drand, 0.0, 1.0) ; */ Drand_setNormal(&drand, 0.0, 1.0) ; /* -------------------------- generate the linear system -------------------------- */ mkNDlinsys(n1, n2, n3, maxzeros, maxsize, type, symmetryflag, nrhs, seed, msglvl, msgFile, &frontETree, &symbfacIVL, &mtxA, &mtxX, &mtxB) ; if ( msglvl > 1 ) { fprintf(msgFile, "\n mtxA") ; InpMtx_writeForHumanEye(mtxA, msgFile) ; fprintf(msgFile, "\n mtxX") ; DenseMtx_writeForHumanEye(mtxX, msgFile) ; fprintf(msgFile, "\n mtxB") ; DenseMtx_writeForHumanEye(mtxB, msgFile) ; fflush(msgFile) ; } /* fprintf(msgFile, "\n neqns = %d ;", n1*n2*n3) ; fprintf(msgFile, "\n nrhs = %d ;", nrhs) ; fprintf(msgFile, "\n A = zeros(neqns, neqns) ;") ; fprintf(msgFile, "\n X = zeros(neqns, nrhs) ;") ; fprintf(msgFile, "\n B = zeros(neqns, nrhs) ;") ; InpMtx_writeForMatlab(mtxA, "A", msgFile) ; DenseMtx_writeForMatlab(mtxX, "X", msgFile) ; DenseMtx_writeForMatlab(mtxB, "B", msgFile) ; { int *ivec1 = InpMtx_ivec1(mtxA) ; int *ivec2 = InpMtx_ivec2(mtxA) ; double *dvec = InpMtx_dvec(mtxA) ; int ichv, ii, col, offset, row, nent = InpMtx_nent(mtxA) ; fprintf(msgFile, "\n coordType = %d", mtxA->coordType) ; fprintf(msgFile, "\n start of matrix output file") ; fprintf(msgFile, "\n %d %d %d", n1*n2*n3, n1*n2*n3, nent) ; for ( ii = 0 ; ii < nent ; ii++ ) { ichv = ivec1[ii] ; if ( (offset = ivec2[ii]) >= 0 ) { row = ichv, col = row + offset ; } else { col = ichv, row = col - offset ; } fprintf(msgFile, "\n %d %d %24.16e %24.16e", row, col, dvec[2*ii], dvec[2*ii+1]) ; } } { int ii, jj ; double imag, real ; fprintf(msgFile, "\n start of rhs output file") ; fprintf(msgFile, "\n %d %d", n1*n2*n3, nrhs) ; for ( ii = 0 ; ii < n1*n2*n3 ; ii++ ) { fprintf(msgFile, "\n %d ", ii) ; for ( jj = 0 ; jj < nrhs ; jj++ ) { DenseMtx_complexEntry(mtxB, ii, jj, &real, &imag) ; fprintf(msgFile, " %24.16e %24.16e", real, imag) ; } } } */ /* ------------------------------ initialize the FrontMtx object ------------------------------ */ MARKTIME(t1) ; frontmtx = FrontMtx_new() ; mtxmanager = SubMtxManager_new() ; SubMtxManager_init(mtxmanager, lockflag, 0) ; FrontMtx_init(frontmtx, frontETree, symbfacIVL, type, symmetryflag, sparsityflag, pivotingflag, lockflag, 0, NULL, mtxmanager, msglvl, msgFile) ; MARKTIME(t2) ; fprintf(msgFile, "\n\n CPU %8.3f : initialize the front matrix", t2 - t1) ; if ( msglvl > 1 ) { fprintf(msgFile, "\n nendD = %d, nentL = %d, nentU = %d", frontmtx->nentD, frontmtx->nentL, frontmtx->nentU) ; } if ( msglvl > 2 ) { fprintf(msgFile, "\n front matrix initialized") ; FrontMtx_writeForHumanEye(frontmtx, msgFile) ; fflush(msgFile) ; } SubMtxManager_writeForHumanEye(mtxmanager, msgFile) ; /* ----------------- factor the matrix ----------------- */ nzf = ETree_nFactorEntries(frontETree, symmetryflag) ; factorops = ETree_nFactorOps(frontETree, type, symmetryflag) ; fprintf(msgFile, "\n %d factor entries, %.0f factor ops, %8.3f ratio", nzf, factorops, factorops/nzf) ; IVzero(6, stats) ; DVzero(9, cpus) ; chvmanager = ChvManager_new() ; ChvManager_init(chvmanager, lockflag, 1) ; MARKTIME(t1) ; rootchv = FrontMtx_factorInpMtx(frontmtx, mtxA, tau, droptol, chvmanager, &error, cpus, stats, msglvl, msgFile) ; MARKTIME(t2) ; fprintf(msgFile, "\n\n CPU %8.3f : factor matrix, %8.3f mflops", t2 - t1, 1.e-6*factorops/(t2-t1)) ; if ( rootchv != NULL ) { fprintf(msgFile, "\n\n factorization did not complete") ; for ( chv = rootchv ; chv != NULL ; chv = chv->next ) { fprintf(stdout, "\n chv %d, nD = %d, nL = %d, nU = %d", chv->id, chv->nD, chv->nL, chv->nU) ; } } if ( error >= 0 ) { fprintf(msgFile, "\n\n error encountered at front %d\n", error) ; exit(-1) ; } fprintf(msgFile, "\n %8d pivots, %8d pivot tests, %8d delayed rows and columns", stats[0], stats[1], stats[2]) ; if ( frontmtx->rowadjIVL != NULL ) { fprintf(msgFile, "\n %d entries in rowadjIVL", frontmtx->rowadjIVL->tsize) ; } if ( frontmtx->coladjIVL != NULL ) { fprintf(msgFile, ", %d entries in coladjIVL", frontmtx->coladjIVL->tsize) ; } if ( frontmtx->upperblockIVL != NULL ) { fprintf(msgFile, "\n %d fronts, %d entries in upperblockIVL", frontmtx->nfront, frontmtx->upperblockIVL->tsize) ; } if ( frontmtx->lowerblockIVL != NULL ) { fprintf(msgFile, ", %d entries in lowerblockIVL", frontmtx->lowerblockIVL->tsize) ; } fprintf(msgFile, "\n %d entries in D, %d entries in L, %d entries in U", stats[3], stats[4], stats[5]) ; fprintf(msgFile, "\n %d locks", frontmtx->nlocks) ; cputotal = cpus[8] ; if ( cputotal > 0.0 ) { fprintf(msgFile, "\n initialize fronts %8.3f %6.2f" "\n load original entries %8.3f %6.2f" "\n update fronts %8.3f %6.2f" "\n assemble postponed data %8.3f %6.2f" "\n factor fronts %8.3f %6.2f" "\n extract postponed data %8.3f %6.2f" "\n store factor entries %8.3f %6.2f" "\n miscellaneous %8.3f %6.2f" "\n total time %8.3f", cpus[0], 100.*cpus[0]/cputotal, cpus[1], 100.*cpus[1]/cputotal, cpus[2], 100.*cpus[2]/cputotal, cpus[3], 100.*cpus[3]/cputotal, cpus[4], 100.*cpus[4]/cputotal, cpus[5], 100.*cpus[5]/cputotal, cpus[6], 100.*cpus[6]/cputotal, cpus[7], 100.*cpus[7]/cputotal, cputotal) ; } SubMtxManager_writeForHumanEye(mtxmanager, msgFile) ; ChvManager_writeForHumanEye(chvmanager, msgFile) ; if ( msglvl > 2 ) { fprintf(msgFile, "\n\n front factor matrix") ; FrontMtx_writeForHumanEye(frontmtx, msgFile) ; } if ( msglvl > 2 ) { fprintf(msgFile, "\n\n %% MATLAB file: front factor matrix") ; FrontMtx_writeForMatlab(frontmtx, "L", "D", "U", msgFile) ; } /* ------------------------------ post-process the factor matrix ------------------------------ */ MARKTIME(t1) ; FrontMtx_postProcess(frontmtx, msglvl, msgFile) ; MARKTIME(t2) ; fprintf(msgFile, "\n\n CPU %8.3f : post-process the matrix", t2 - t1) ; if ( msglvl > 2 ) { fprintf(msgFile, "\n\n front factor matrix after post-processing") ; FrontMtx_writeForHumanEye(frontmtx, msgFile) ; } fprintf(msgFile, "\n\n after post-processing") ; SubMtxManager_writeForHumanEye(frontmtx->manager, msgFile) ; /* code to test out the IO methods. write the matrix to a file, free it, then read it back in. note: formatted files do not have much accuracy. */ /* FrontMtx_writeToFile(frontmtx, "temp.frontmtxb") ; FrontMtx_free(frontmtx) ; frontmtx = FrontMtx_new() ; FrontMtx_readFromFile(frontmtx, "temp.frontmtxb") ; frontmtx->manager = mtxmanager ; FrontMtx_writeForHumanEye(frontmtx, msgFile) ; */ /* ---------------- solve the system ---------------- */ neqns = mtxB->nrow ; nrhs = mtxB->ncol ; mtxZ = DenseMtx_new() ; DenseMtx_init(mtxZ, type, 0, 0, neqns, nrhs, 1, neqns) ; DenseMtx_zero(mtxZ) ; if ( type == SPOOLES_REAL ) { nops = frontmtx->nentD + 2*frontmtx->nentU ; if ( FRONTMTX_IS_NONSYMMETRIC(frontmtx) ) { nops += 2*frontmtx->nentL ; } else { nops += 2*frontmtx->nentU ; } } else if ( type == SPOOLES_COMPLEX ) { nops = 8*frontmtx->nentD + 8*frontmtx->nentU ; if ( FRONTMTX_IS_NONSYMMETRIC(frontmtx) ) { nops += 8*frontmtx->nentL ; } else { nops += 8*frontmtx->nentU ; } } nops *= nrhs ; if ( msglvl > 2 ) { fprintf(msgFile, "\n\n rhs") ; DenseMtx_writeForHumanEye(mtxB, msgFile) ; fflush(stdout) ; } DVzero(6, cpus) ; MARKTIME(t1) ; FrontMtx_solve(frontmtx, mtxZ, mtxB, mtxmanager, cpus, msglvl, msgFile) ; MARKTIME(t2) ; fprintf(msgFile, "\n\n CPU %8.3f : solve the system, %.3f mflops", t2 - t1, 1.e-6*nops/(t2 - t1)) ; cputotal = t2 - t1 ; if ( cputotal > 0.0 ) { fprintf(msgFile, "\n set up solves %8.3f %6.2f" "\n load rhs and store solution %8.3f %6.2f" "\n forward solve %8.3f %6.2f" "\n diagonal solve %8.3f %6.2f" "\n backward solve %8.3f %6.2f" "\n total time %8.3f", cpus[0], 100.*cpus[0]/cputotal, cpus[1], 100.*cpus[1]/cputotal, cpus[2], 100.*cpus[2]/cputotal, cpus[3], 100.*cpus[3]/cputotal, cpus[4], 100.*cpus[4]/cputotal, cputotal) ; } /* fprintf(msgFile, "\n Z = zeros(neqns, nrhs) ;") ; DenseMtx_writeForMatlab(mtxZ, "Z", msgFile) ; */ if ( msglvl > 2 ) { fprintf(msgFile, "\n\n computed solution") ; DenseMtx_writeForHumanEye(mtxZ, msgFile) ; fflush(stdout) ; } DenseMtx_sub(mtxZ, mtxX) ; fprintf(msgFile, "\n\n maxabs error = %12.4e", DenseMtx_maxabs(mtxZ)) ; if ( msglvl > 2 ) { fprintf(msgFile, "\n\n error") ; DenseMtx_writeForHumanEye(mtxZ, msgFile) ; fflush(stdout) ; } fprintf(msgFile, "\n\n after solve") ; SubMtxManager_writeForHumanEye(frontmtx->manager, msgFile) ; /* ------------------------ free the working storage ------------------------ */ InpMtx_free(mtxA) ; DenseMtx_free(mtxX) ; DenseMtx_free(mtxB) ; DenseMtx_free(mtxZ) ; FrontMtx_free(frontmtx) ; ETree_free(frontETree) ; IVL_free(symbfacIVL) ; ChvManager_free(chvmanager) ; SubMtxManager_free(mtxmanager) ; fprintf(msgFile, "\n") ; fclose(msgFile) ; return(1) ; }
NM_Status SpoolesSolver :: solve(SparseMtrx *A, FloatArray *b, FloatArray *x) { int errorValue, mtxType, symmetryflag; int seed = 30145, pivotingflag = 0; int *oldToNew, *newToOld; double droptol = 0.0, tau = 1.e300; double cpus [ 10 ]; int stats [ 20 ]; ChvManager *chvmanager; Chv *rootchv; InpMtx *mtxA; DenseMtx *mtxY, *mtxX; // first check whether Lhs is defined if ( !A ) { _error("solveYourselfAt: unknown Lhs"); } // and whether Rhs if ( !b ) { _error("solveYourselfAt: unknown Rhs"); } // and whether previous Solution exist if ( !x ) { _error("solveYourselfAt: unknown solution array"); } if ( x->giveSize() != b->giveSize() ) { _error("solveYourselfAt: size mismatch"); } Timer timer; timer.startTimer(); if ( A->giveType() != SMT_SpoolesMtrx ) { _error("solveYourselfAt: SpoolesSparseMtrx Expected"); } mtxA = ( ( SpoolesSparseMtrx * ) A )->giveInpMtrx(); mtxType = ( ( SpoolesSparseMtrx * ) A )->giveValueType(); symmetryflag = ( ( SpoolesSparseMtrx * ) A )->giveSymmetryFlag(); int i; int neqns = A->giveNumberOfRows(); int nrhs = 1; /* convert right-hand side to DenseMtx */ mtxY = DenseMtx_new(); DenseMtx_init(mtxY, mtxType, 0, 0, neqns, nrhs, 1, neqns); DenseMtx_zero(mtxY); for ( i = 0; i < neqns; i++ ) { DenseMtx_setRealEntry( mtxY, i, 0, b->at(i + 1) ); } if ( ( Lhs != A ) || ( this->lhsVersion != A->giveVersion() ) ) { // // lhs has been changed -> new factorization // Lhs = A; this->lhsVersion = A->giveVersion(); if ( frontmtx ) { FrontMtx_free(frontmtx); } if ( newToOldIV ) { IV_free(newToOldIV); } if ( oldToNewIV ) { IV_free(oldToNewIV); } if ( frontETree ) { ETree_free(frontETree); } if ( symbfacIVL ) { IVL_free(symbfacIVL); } if ( mtxmanager ) { SubMtxManager_free(mtxmanager); } if ( graph ) { Graph_free(graph); } /* * ------------------------------------------------- * STEP 3 : find a low-fill ordering * (1) create the Graph object * (2) order the graph using multiple minimum degree * ------------------------------------------------- */ int nedges; graph = Graph_new(); adjIVL = InpMtx_fullAdjacency(mtxA); nedges = IVL_tsize(adjIVL); Graph_init2(graph, 0, neqns, 0, nedges, neqns, nedges, adjIVL, NULL, NULL); if ( msglvl > 2 ) { fprintf(msgFile, "\n\n graph of the input matrix"); Graph_writeForHumanEye(graph, msgFile); fflush(msgFile); } frontETree = orderViaMMD(graph, seed, msglvl, msgFile); if ( msglvl > 0 ) { fprintf(msgFile, "\n\n front tree from ordering"); ETree_writeForHumanEye(frontETree, msgFile); fflush(msgFile); } /* * ---------------------------------------------------- * STEP 4: get the permutation, permute the front tree, * permute the matrix and right hand side, and * get the symbolic factorization * ---------------------------------------------------- */ oldToNewIV = ETree_oldToNewVtxPerm(frontETree); oldToNew = IV_entries(oldToNewIV); newToOldIV = ETree_newToOldVtxPerm(frontETree); newToOld = IV_entries(newToOldIV); ETree_permuteVertices(frontETree, oldToNewIV); InpMtx_permute(mtxA, oldToNew, oldToNew); if ( symmetryflag == SPOOLES_SYMMETRIC || symmetryflag == SPOOLES_HERMITIAN ) { InpMtx_mapToUpperTriangle(mtxA); } InpMtx_changeCoordType(mtxA, INPMTX_BY_CHEVRONS); InpMtx_changeStorageMode(mtxA, INPMTX_BY_VECTORS); symbfacIVL = SymbFac_initFromInpMtx(frontETree, mtxA); if ( msglvl > 2 ) { fprintf(msgFile, "\n\n old-to-new permutation vector"); IV_writeForHumanEye(oldToNewIV, msgFile); fprintf(msgFile, "\n\n new-to-old permutation vector"); IV_writeForHumanEye(newToOldIV, msgFile); fprintf(msgFile, "\n\n front tree after permutation"); ETree_writeForHumanEye(frontETree, msgFile); fprintf(msgFile, "\n\n input matrix after permutation"); InpMtx_writeForHumanEye(mtxA, msgFile); fprintf(msgFile, "\n\n symbolic factorization"); IVL_writeForHumanEye(symbfacIVL, msgFile); fflush(msgFile); } Tree_writeToFile(frontETree->tree, (char*)"haggar.treef"); /*--------------------------------------------------------------------*/ /* * ------------------------------------------ * STEP 5: initialize the front matrix object * ------------------------------------------ */ frontmtx = FrontMtx_new(); mtxmanager = SubMtxManager_new(); SubMtxManager_init(mtxmanager, NO_LOCK, 0); FrontMtx_init(frontmtx, frontETree, symbfacIVL, mtxType, symmetryflag, FRONTMTX_DENSE_FRONTS, pivotingflag, NO_LOCK, 0, NULL, mtxmanager, msglvl, msgFile); /*--------------------------------------------------------------------*/ /* * ----------------------------------------- * STEP 6: compute the numeric factorization * ----------------------------------------- */ chvmanager = ChvManager_new(); ChvManager_init(chvmanager, NO_LOCK, 1); DVfill(10, cpus, 0.0); IVfill(20, stats, 0); rootchv = FrontMtx_factorInpMtx(frontmtx, mtxA, tau, droptol, chvmanager, & errorValue, cpus, stats, msglvl, msgFile); ChvManager_free(chvmanager); if ( msglvl > 0 ) { fprintf(msgFile, "\n\n factor matrix"); FrontMtx_writeForHumanEye(frontmtx, msgFile); fflush(msgFile); } if ( rootchv != NULL ) { fprintf(msgFile, "\n\n matrix found to be singular\n"); exit(-1); } if ( errorValue >= 0 ) { fprintf(msgFile, "\n\n error encountered at front %d", errorValue); exit(-1); } /*--------------------------------------------------------------------*/ /* * -------------------------------------- * STEP 7: post-process the factorization * -------------------------------------- */ FrontMtx_postProcess(frontmtx, msglvl, msgFile); if ( msglvl > 2 ) { fprintf(msgFile, "\n\n factor matrix after post-processing"); FrontMtx_writeForHumanEye(frontmtx, msgFile); fflush(msgFile); } /*--------------------------------------------------------------------*/ } /* * ---------------------------------------------------- * STEP 4: permute the right hand side * ---------------------------------------------------- */ DenseMtx_permuteRows(mtxY, oldToNewIV); if ( msglvl > 2 ) { fprintf(msgFile, "\n\n right hand side matrix after permutation"); DenseMtx_writeForHumanEye(mtxY, msgFile); } /* * ------------------------------- * STEP 8: solve the linear system * ------------------------------- */ mtxX = DenseMtx_new(); DenseMtx_init(mtxX, mtxType, 0, 0, neqns, nrhs, 1, neqns); DenseMtx_zero(mtxX); FrontMtx_solve(frontmtx, mtxX, mtxY, mtxmanager, cpus, msglvl, msgFile); if ( msglvl > 2 ) { fprintf(msgFile, "\n\n solution matrix in new ordering"); DenseMtx_writeForHumanEye(mtxX, msgFile); fflush(msgFile); } /*--------------------------------------------------------------------*/ /* * ------------------------------------------------------- * STEP 9: permute the solution into the original ordering * ------------------------------------------------------- */ DenseMtx_permuteRows(mtxX, newToOldIV); if ( msglvl > 0 ) { fprintf(msgFile, "\n\n solution matrix in original ordering"); DenseMtx_writeForHumanEye(mtxX, msgFile); fflush(msgFile); } // DenseMtx_writeForMatlab(mtxX, "x", msgFile) ; /*--------------------------------------------------------------------*/ /* fetch data to oofem vectors */ double *xptr = x->givePointer(); for ( i = 0; i < neqns; i++ ) { DenseMtx_realEntry(mtxX, i, 0, xptr + i); // printf ("x(%d) = %e\n", i+1, *(xptr+i)); } // DenseMtx_copyRowIntoVector(mtxX, 0, x->givePointer()); timer.stopTimer(); OOFEM_LOG_DEBUG( "SpoolesSolver info: user time consumed by solution: %.2fs\n", timer.getUtime() ); /* * ----------- * free memory * ----------- */ DenseMtx_free(mtxX); DenseMtx_free(mtxY); /*--------------------------------------------------------------------*/ return ( 1 ); }
int zpcgr ( int n_matrixSize, int type, int symmetryflag, InpMtx *mtxA, FrontMtx *Precond, DenseMtx *mtxX, DenseMtx *mtxB, int itermax, double convergetol, int msglvl, FILE *msgFile ) { Chv *chv, *rootchv ; ChvManager *chvmanager ; DenseMtx *mtxZ ; DenseMtx *vecP, *vecR, *vecQ ; DenseMtx *vecX, *vecZ ; double Alpha[2], Beta[2], Rho[2], Rho0[2], Rtmp[2]; double Init_norm, ratio, Res_norm ; double t1, t2, cpus[9] ; double one[2] = {1.0, 0.0}, zero[2] = {0.0, 0.0} ; double Tiny[2] = {0.1e-28, 0.0}; int Iter, neqns; int stats[6] ; if (symmetryflag != SPOOLES_HERMITIAN){ fprintf(msgFile, "\n\n Fatal Error, \n" " Matrix is not Hermitian in ZPCGR !!") ; spoolesFatal(); }; neqns = n_matrixSize; /* -------------------- init the vectors in ZPCGR -------------------- */ vecP = DenseMtx_new() ; DenseMtx_init(vecP, type, 0, 0, neqns, 1, 1, neqns) ; vecR = DenseMtx_new() ; DenseMtx_init(vecR, type, 0, 0, neqns, 1, 1, neqns) ; vecX = DenseMtx_new() ; DenseMtx_init(vecX, type, 0, 0, neqns, 1, 1, neqns) ; vecQ = DenseMtx_new() ; DenseMtx_init(vecQ, type, 0, 0, neqns, 1, 1, neqns) ; vecZ = DenseMtx_new() ; DenseMtx_init(vecZ, type, 0, 0, neqns, 1, 1, neqns) ; /* -------------------------- Initialize the iterations -------------------------- */ Init_norm = DenseMtx_twoNormOfColumn (mtxB, 0); if ( Init_norm == 0.0 ){ Init_norm = 1.0; }; ratio = 1.0; DenseMtx_zero(vecX) ; DenseMtx_colCopy (vecR, 0, mtxB, 0); MARKTIME(t1) ; Iter = 0; /* ------------------------------ Main Loop of the iterations ------------------------------ */ while ( ratio > convergetol && Iter <= itermax ) { Iter++; /* */ FrontMtx_solve(Precond, vecZ, vecR, Precond->manager, cpus, msglvl, msgFile) ; DenseMtx_colDotProduct(vecR, 0, vecZ, 0, Rho); if ( Rho[0] == 0.0 & Rho[1] == 0.0){ fprintf(stderr, "\n breakdown in ZPCGR !! " "\n Fatal error \n"); spoolesFatal(); } /* */ if ( Iter == 1 ) { DenseMtx_colCopy (vecP, 0, vecZ, 0); } else { zdiv(Rho, Rho0, Beta); DenseMtx_colGenAxpy (Beta, vecP, 0, one, vecZ, 0); }; InpMtx_herm_gmmm(mtxA, zero, vecQ, one, vecP) ; DenseMtx_colDotProduct (vecP, 0, vecQ,0, Rtmp); zdiv(Rho, Rtmp, Alpha); DenseMtx_colGenAxpy (one, vecX, 0, Alpha, vecP, 0); Rtmp[0] = -Alpha[0]; Rtmp[1] = -Alpha[1]; DenseMtx_colGenAxpy (one, vecR, 0, Rtmp, vecQ, 0); Rho0[0] = Rho[0]; Rho0[1] = Rho[1]; /* */ Res_norm = DenseMtx_twoNormOfColumn (vecR, 0); ratio = Res_norm/Init_norm; fprintf(msgFile, "\n\n At iteration %d" " the convergence ratio is %12.4e", Iter, ratio) ; } /* End of while loop */ MARKTIME(t2) ; fprintf(msgFile, "\n CPU : Converges in time: %8.3f ", t2 - t1) ; fprintf(msgFile, "\n # iterations = %d", Iter) ; fprintf(msgFile, "\n\n after ZPCGR") ; DenseMtx_colCopy (mtxX, 0, vecX, 0); /* ------------------------ free the working storage ------------------------ */ DenseMtx_free(vecP) ; DenseMtx_free(vecR) ; DenseMtx_free(vecX) ; DenseMtx_free(vecQ) ; DenseMtx_free(vecZ) ; fprintf(msgFile, "\n") ; return(1) ; }