/*--------------------------------------------------------------------*/ int main ( int argc, char *argv[] ) /* --------------------------------------------------- test the QR factor method for a FrontMtx object on an n1 x n2 x n3 grid (1) generate an overdetermined system AX = B (2) factor the matrix (3) solve the systems created -- 97apr11, dkw modified -- 98may28, cca --------------------------------------------------- */ { ChvManager *chvmanager ; DenseMtx *mtxB, *mtxX, *mtxZ ; double cputotal, factorops ; double cpus[9] ; double nops, t1, t2 ; ETree *frontETree ; FILE *msgFile ; FrontMtx *frontmtx ; InpMtx *mtxA ; int msglvl, neqns, nrhs, n1, n2, n3, seed, type ; IVL *symbfacIVL ; SubMtxManager *mtxmanager ; if ( argc != 9 ) { fprintf(stdout, "\n\n usage : %s msglvl msgFile n1 n2 n3 seed nrhs " "\n msglvl -- message level" "\n msgFile -- message file" "\n n1 -- # of points in the first direction" "\n n2 -- # of points in the second direction" "\n n3 -- # of points in the third direction" "\n seed -- random number seed" "\n nrhs -- # of right hand sides" "\n type -- type of linear system" "\n 1 -- real" "\n 2 -- complex" "\n", argv[0]) ; return(0) ; } msglvl = atoi(argv[1]) ; if ( strcmp(argv[2], "stdout") == 0 ) { msgFile = stdout ; } else if ( (msgFile = fopen(argv[2], "a")) == NULL ) { fprintf(stderr, "\n fatal error in %s" "\n unable to open file %s\n", argv[0], argv[2]) ; return(-1) ; } n1 = atoi(argv[3]) ; n2 = atoi(argv[4]) ; n3 = atoi(argv[5]) ; seed = atoi(argv[6]) ; nrhs = atoi(argv[7]) ; type = atoi(argv[8]) ; fprintf(msgFile, "\n %s " "\n msglvl -- %d" "\n msgFile -- %s" "\n n1 -- %d" "\n n2 -- %d" "\n n3 -- %d" "\n seed -- %d" "\n nrhs -- %d" "\n type -- %d" "\n", argv[0], msglvl, argv[2], n1, n2, n3, seed, nrhs, type) ; fflush(msgFile) ; neqns = n1*n2*n3 ; if ( type != SPOOLES_REAL && type != SPOOLES_COMPLEX ) { fprintf(stderr, "\n fatal error, type must be real or complex") ; exit(-1) ; } /* ------------------------------------------ generate the A X = B overdetermined system ------------------------------------------ */ mkNDlinsysQR(n1, n2, n3, type, nrhs, seed, msglvl, msgFile, &frontETree, &symbfacIVL, &mtxA, &mtxX, &mtxB) ; /* ------------------------------ initialize the FrontMtx object ------------------------------ */ MARKTIME(t1) ; mtxmanager = SubMtxManager_new() ; SubMtxManager_init(mtxmanager, NO_LOCK, 0) ; frontmtx = FrontMtx_new() ; if ( type == SPOOLES_REAL ) { FrontMtx_init(frontmtx, frontETree, symbfacIVL, type, SPOOLES_SYMMETRIC, FRONTMTX_DENSE_FRONTS, SPOOLES_NO_PIVOTING, NO_LOCK, 0, NULL, mtxmanager, msglvl, msgFile) ; } else if ( type == SPOOLES_COMPLEX ) { FrontMtx_init(frontmtx, frontETree, symbfacIVL, type, SPOOLES_HERMITIAN, FRONTMTX_DENSE_FRONTS, SPOOLES_NO_PIVOTING, NO_LOCK, 0, NULL, mtxmanager, msglvl, msgFile) ; } MARKTIME(t2) ; fprintf(msgFile, "\n CPU %8.3f : FrontMtx initialized", t2 - t1) ; fflush(msgFile) ; /* ----------------- factor the matrix ----------------- */ DVzero(6, cpus) ; chvmanager = ChvManager_new() ; ChvManager_init(chvmanager, NO_LOCK, 0) ; factorops = 0.0 ; MARKTIME(t1) ; FrontMtx_QR_factor(frontmtx, mtxA, chvmanager, cpus, &factorops, msglvl, msgFile) ; MARKTIME(t2) ; fprintf(msgFile, "\n after QR_factor() call, facops = %8.2f",factorops) ; fprintf(msgFile, "\n CPU %8.3f : FrontMtx_QR_factor, %8.3f mflops", t2 - t1, 1.e-6*factorops/(t2-t1)) ; cputotal = t2 - t1 ; if ( cputotal > 0.0 ) { fprintf(msgFile, "\n" "\n setup factorization %8.3f %6.2f" "\n setup fronts %8.3f %6.2f" "\n factor fronts %8.3f %6.2f" "\n store factor %8.3f %6.2f" "\n store update %8.3f %6.2f" "\n miscellaneous %8.3f %6.2f" "\n total time %8.3f", cpus[0], 100.*cpus[0]/cputotal, cpus[1], 100.*cpus[1]/cputotal, cpus[2], 100.*cpus[2]/cputotal, cpus[3], 100.*cpus[3]/cputotal, cpus[4], 100.*cpus[4]/cputotal, cpus[5], 100.*cpus[5]/cputotal, cputotal) ; } /* ------------------------------ post-process the factor matrix ------------------------------ */ MARKTIME(t1) ; FrontMtx_postProcess(frontmtx, msglvl, msgFile) ; MARKTIME(t2) ; fprintf(msgFile, "\n CPU %8.3f : post-process the matrix", t2 - t1) ; if ( msglvl > 2 ) { fprintf(msgFile, "\n\n front factor matrix after post-processing") ; FrontMtx_writeForHumanEye(frontmtx, msgFile) ; } fprintf(msgFile, "\n\n after post-processing") ; SubMtxManager_writeForHumanEye(frontmtx->manager, msgFile) ; /* ---------------- solve the system ---------------- */ mtxZ = DenseMtx_new() ; DenseMtx_init(mtxZ, type, 0, 0, neqns, nrhs, 1, neqns) ; DenseMtx_zero(mtxZ) ; if ( type == SPOOLES_REAL ) { nops = frontmtx->nentD + 2*frontmtx->nentU ; if ( FRONTMTX_IS_NONSYMMETRIC(frontmtx) ) { nops += 2*frontmtx->nentL ; } else { nops += 2*frontmtx->nentU ; } } else if ( type == SPOOLES_COMPLEX ) { nops = 8*frontmtx->nentD + 8*frontmtx->nentU ; if ( FRONTMTX_IS_NONSYMMETRIC(frontmtx) ) { nops += 8*frontmtx->nentL ; } else { nops += 8*frontmtx->nentU ; } } nops *= nrhs ; if ( msglvl > 2 ) { fprintf(msgFile, "\n\n rhs") ; DenseMtx_writeForHumanEye(mtxB, msgFile) ; fflush(stdout) ; } DVzero(6, cpus) ; MARKTIME(t1) ; FrontMtx_QR_solve(frontmtx, mtxA, mtxZ, mtxB, mtxmanager, cpus, msglvl, msgFile) ; MARKTIME(t2) ; fprintf(msgFile, "\n CPU %8.3f : solve the system, %.3f mflops", t2 - t1, 1.e-6*nops/(t2 - t1)) ; cputotal = t2 - t1 ; if ( cputotal > 0.0 ) { fprintf(msgFile, "\n CPU %%" "\n A^TB matrix-matrix multiply %8.3f %6.2f" "\n set up solves %8.3f %6.2f" "\n load rhs and store solution %8.3f %6.2f" "\n forward solve %8.3f %6.2f" "\n diagonal solve %8.3f %6.2f" "\n backward solve %8.3f %6.2f" "\n total solve time %8.3f %6.2f" "\n total QR solve time %8.3f", cpus[6], 100.*cpus[6]/cputotal, cpus[0], 100.*cpus[0]/cputotal, cpus[1], 100.*cpus[1]/cputotal, cpus[2], 100.*cpus[2]/cputotal, cpus[3], 100.*cpus[3]/cputotal, cpus[4], 100.*cpus[4]/cputotal, cpus[5], 100.*cpus[5]/cputotal, cputotal) ; } if ( msglvl > 3 ) { fprintf(msgFile, "\n\n computed solution") ; DenseMtx_writeForHumanEye(mtxZ, msgFile) ; fflush(stdout) ; } /* ----------------- compute the error ----------------- */ DenseMtx_sub(mtxZ, mtxX) ; fprintf(msgFile, "\n\n maxabs error = %12.4e", DenseMtx_maxabs(mtxZ)) ; if ( msglvl > 2 ) { fprintf(msgFile, "\n\n error") ; DenseMtx_writeForHumanEye(mtxZ, msgFile) ; fflush(stdout) ; } fprintf(msgFile, "\n\n after solve") ; SubMtxManager_writeForHumanEye(frontmtx->manager, msgFile) ; /* ------------------------ free the working storage ------------------------ */ InpMtx_free(mtxA) ; DenseMtx_free(mtxX) ; DenseMtx_free(mtxZ) ; DenseMtx_free(mtxB) ; FrontMtx_free(frontmtx) ; IVL_free(symbfacIVL) ; ETree_free(frontETree) ; SubMtxManager_free(mtxmanager) ; ChvManager_free(chvmanager) ; fprintf(msgFile, "\n") ; fclose(msgFile) ; return(1) ; }
PetscErrorCode MatSolve_SeqSpooles(Mat A,Vec b,Vec x) { Mat_Spooles *lu = (Mat_Spooles*)A->spptr; PetscScalar *array; DenseMtx *mtxY, *mtxX ; PetscErrorCode ierr; PetscInt irow,neqns=A->cmap->n,nrow=A->rmap->n,*iv; #if defined(PETSC_USE_COMPLEX) double x_real,x_imag; #else double *entX; #endif PetscFunctionBegin; mtxY = DenseMtx_new(); DenseMtx_init(mtxY, lu->options.typeflag, 0, 0, nrow, 1, 1, nrow); /* column major */ ierr = VecGetArray(b,&array);CHKERRQ(ierr); if (lu->options.useQR) { /* copy b to mtxY */ for ( irow = 0 ; irow < nrow; irow++ ) #if !defined(PETSC_USE_COMPLEX) DenseMtx_setRealEntry(mtxY, irow, 0, *array++); #else DenseMtx_setComplexEntry(mtxY, irow, 0, PetscRealPart(array[irow]), PetscImaginaryPart(array[irow])); #endif } else { /* copy permuted b to mtxY */ iv = IV_entries(lu->oldToNewIV); for ( irow = 0 ; irow < nrow; irow++ ) #if !defined(PETSC_USE_COMPLEX) DenseMtx_setRealEntry(mtxY, *iv++, 0, *array++); #else DenseMtx_setComplexEntry(mtxY,*iv++,0,PetscRealPart(array[irow]),PetscImaginaryPart(array[irow])); #endif } ierr = VecRestoreArray(b,&array);CHKERRQ(ierr); mtxX = DenseMtx_new(); DenseMtx_init(mtxX, lu->options.typeflag, 0, 0, neqns, 1, 1, neqns); if (lu->options.useQR) { FrontMtx_QR_solve(lu->frontmtx, lu->mtxA, mtxX, mtxY, lu->mtxmanager, lu->cpus, lu->options.msglvl, lu->options.msgFile); } else { FrontMtx_solve(lu->frontmtx, mtxX, mtxY, lu->mtxmanager, lu->cpus, lu->options.msglvl, lu->options.msgFile); } if ( lu->options.msglvl > 2 ) { int err; ierr = PetscFPrintf(PETSC_COMM_SELF,lu->options.msgFile, "\n\n right hand side matrix after permutation");CHKERRQ(ierr); DenseMtx_writeForHumanEye(mtxY, lu->options.msgFile); ierr = PetscFPrintf(PETSC_COMM_SELF,lu->options.msgFile, "\n\n solution matrix in new ordering");CHKERRQ(ierr); DenseMtx_writeForHumanEye(mtxX, lu->options.msgFile); err = fflush(lu->options.msgFile); if (err) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SYS,"fflush() failed on file"); } /* permute solution into original ordering, then copy to x */ DenseMtx_permuteRows(mtxX, lu->newToOldIV); ierr = VecGetArray(x,&array);CHKERRQ(ierr); #if !defined(PETSC_USE_COMPLEX) entX = DenseMtx_entries(mtxX); DVcopy(neqns, array, entX); #else for (irow=0; irow<nrow; irow++){ DenseMtx_complexEntry(mtxX,irow,0,&x_real,&x_imag); array[irow] = x_real+x_imag*PETSC_i; } #endif ierr = VecRestoreArray(x,&array);CHKERRQ(ierr); /* free memory */ DenseMtx_free(mtxX); DenseMtx_free(mtxY); PetscFunctionReturn(0); }
/*--------------------------------------------------------------------*/ int main ( int argc, char *argv[] ) { /* -------------------------------------------------- QR all-in-one program (1) read in matrix entries and form InpMtx object of A and A^TA (2) form Graph object of A^TA (3) order matrix and form front tree (4) get the permutation, permute the matrix and front tree and get the symbolic factorization (5) compute the numeric factorization (6) read in right hand side entries (7) compute the solution created -- 98jun11, cca -------------------------------------------------- */ /*--------------------------------------------------------------------*/ char *matrixFileName, *rhsFileName ; ChvManager *chvmanager ; DenseMtx *mtxB, *mtxX ; double facops, imag, real, value ; double cpus[10] ; ETree *frontETree ; FILE *inputFile, *msgFile ; FrontMtx *frontmtx ; Graph *graph ; int ient, irow, jcol, jrhs, jrow, msglvl, neqns, nedges, nent, nrhs, nrow, seed, type ; InpMtx *mtxA ; IV *newToOldIV, *oldToNewIV ; IVL *adjIVL, *symbfacIVL ; SubMtxManager *mtxmanager ; /*--------------------------------------------------------------------*/ /* -------------------- get input parameters -------------------- */ if ( argc != 7 ) { fprintf(stdout, "\n usage: %s msglvl msgFile type matrixFileName rhsFileName seed" "\n msglvl -- message level" "\n msgFile -- message file" "\n type -- type of entries" "\n 1 (SPOOLES_REAL) -- real entries" "\n 2 (SPOOLES_COMPLEX) -- complex entries" "\n matrixFileName -- matrix file name, format" "\n nrow ncol nent" "\n irow jcol entry" "\n ..." "\n note: indices are zero based" "\n rhsFileName -- right hand side file name, format" "\n nrow " "\n entry[0]" "\n ..." "\n entry[nrow-1]" "\n seed -- random number seed, used for ordering" "\n", argv[0]) ; return(0) ; } msglvl = atoi(argv[1]) ; if ( strcmp(argv[2], "stdout") == 0 ) { msgFile = stdout ; } else if ( (msgFile = fopen(argv[2], "a")) == NULL ) { fprintf(stderr, "\n fatal error in %s" "\n unable to open file %s\n", argv[0], argv[2]) ; return(-1) ; } type = atoi(argv[3]) ; matrixFileName = argv[4] ; rhsFileName = argv[5] ; seed = atoi(argv[6]) ; /*--------------------------------------------------------------------*/ /* -------------------------------------------- STEP 1: read the entries from the input file and create the InpMtx object of A -------------------------------------------- */ inputFile = fopen(matrixFileName, "r") ; fscanf(inputFile, "%d %d %d", &nrow, &neqns, &nent) ; mtxA = InpMtx_new() ; InpMtx_init(mtxA, INPMTX_BY_ROWS, type, nent, 0) ; if ( type == SPOOLES_REAL ) { for ( ient = 0 ; ient < nent ; ient++ ) { fscanf(inputFile, "%d %d %le", &irow, &jcol, &value) ; InpMtx_inputRealEntry(mtxA, irow, jcol, value) ; } } else { for ( ient = 0 ; ient < nent ; ient++ ) { fscanf(inputFile, "%d %d %le %le", &irow, &jcol, &real, &imag) ; InpMtx_inputComplexEntry(mtxA, irow, jcol, real, imag) ; } } fclose(inputFile) ; if ( msglvl > 1 ) { fprintf(msgFile, "\n\n input matrix") ; InpMtx_writeForHumanEye(mtxA, msgFile) ; fflush(msgFile) ; } /*--------------------------------------------------------------------*/ /* ---------------------------------------- STEP 2: read the right hand side entries ---------------------------------------- */ inputFile = fopen(rhsFileName, "r") ; fscanf(inputFile, "%d %d", &nrow, &nrhs) ; mtxB = DenseMtx_new() ; DenseMtx_init(mtxB, type, 0, 0, nrow, nrhs, 1, nrow) ; DenseMtx_zero(mtxB) ; if ( type == SPOOLES_REAL ) { for ( irow = 0 ; irow < nrow ; irow++ ) { fscanf(inputFile, "%d", &jrow) ; for ( jrhs = 0 ; jrhs < nrhs ; jrhs++ ) { fscanf(inputFile, "%le", &value) ; DenseMtx_setRealEntry(mtxB, jrow, jrhs, value) ; } } } else { for ( irow = 0 ; irow < nrow ; irow++ ) { fscanf(inputFile, "%d", &jrow) ; for ( jrhs = 0 ; jrhs < nrhs ; jrhs++ ) { fscanf(inputFile, "%le %le", &real, &imag) ; DenseMtx_setComplexEntry(mtxB, jrow, jrhs, real, imag) ; } } } fclose(inputFile) ; if ( msglvl > 1 ) { fprintf(msgFile, "\n\n rhs matrix in original ordering") ; DenseMtx_writeForHumanEye(mtxB, msgFile) ; fflush(msgFile) ; } /*--------------------------------------------------------------------*/ /* ------------------------------------------------- STEP 3 : find a low-fill ordering (1) create the Graph object for A^TA or A^HA (2) order the graph using multiple minimum degree ------------------------------------------------- */ graph = Graph_new() ; adjIVL = InpMtx_adjForATA(mtxA) ; nedges = IVL_tsize(adjIVL) ; Graph_init2(graph, 0, neqns, 0, nedges, neqns, nedges, adjIVL, NULL, NULL) ; if ( msglvl > 1 ) { fprintf(msgFile, "\n\n graph of A^T A") ; Graph_writeForHumanEye(graph, msgFile) ; fflush(msgFile) ; } frontETree = orderViaMMD(graph, seed, msglvl, msgFile) ; if ( msglvl > 1 ) { fprintf(msgFile, "\n\n front tree from ordering") ; ETree_writeForHumanEye(frontETree, msgFile) ; fflush(msgFile) ; } /*--------------------------------------------------------------------*/ /* ----------------------------------------------------- STEP 4: get the permutation, permute the matrix and front tree and get the symbolic factorization ----------------------------------------------------- */ oldToNewIV = ETree_oldToNewVtxPerm(frontETree) ; newToOldIV = ETree_newToOldVtxPerm(frontETree) ; InpMtx_permute(mtxA, NULL, IV_entries(oldToNewIV)) ; InpMtx_changeStorageMode(mtxA, INPMTX_BY_VECTORS) ; symbfacIVL = SymbFac_initFromGraph(frontETree, graph) ; IVL_overwrite(symbfacIVL, oldToNewIV) ; IVL_sortUp(symbfacIVL) ; ETree_permuteVertices(frontETree, oldToNewIV) ; if ( msglvl > 1 ) { fprintf(msgFile, "\n\n old-to-new permutation vector") ; IV_writeForHumanEye(oldToNewIV, msgFile) ; fprintf(msgFile, "\n\n new-to-old permutation vector") ; IV_writeForHumanEye(newToOldIV, msgFile) ; fprintf(msgFile, "\n\n front tree after permutation") ; ETree_writeForHumanEye(frontETree, msgFile) ; fprintf(msgFile, "\n\n input matrix after permutation") ; InpMtx_writeForHumanEye(mtxA, msgFile) ; fprintf(msgFile, "\n\n symbolic factorization") ; IVL_writeForHumanEye(symbfacIVL, msgFile) ; fflush(msgFile) ; } /*--------------------------------------------------------------------*/ /* ------------------------------------------ STEP 5: initialize the front matrix object ------------------------------------------ */ frontmtx = FrontMtx_new() ; mtxmanager = SubMtxManager_new() ; SubMtxManager_init(mtxmanager, NO_LOCK, 0) ; if ( type == SPOOLES_REAL ) { FrontMtx_init(frontmtx, frontETree, symbfacIVL, type, SPOOLES_SYMMETRIC, FRONTMTX_DENSE_FRONTS, SPOOLES_NO_PIVOTING, NO_LOCK, 0, NULL, mtxmanager, msglvl, msgFile) ; } else { FrontMtx_init(frontmtx, frontETree, symbfacIVL, type, SPOOLES_HERMITIAN, FRONTMTX_DENSE_FRONTS, SPOOLES_NO_PIVOTING, NO_LOCK, 0, NULL, mtxmanager, msglvl, msgFile) ; } /*--------------------------------------------------------------------*/ /* ----------------------------------------- STEP 6: compute the numeric factorization ----------------------------------------- */ chvmanager = ChvManager_new() ; ChvManager_init(chvmanager, NO_LOCK, 1) ; DVzero(10, cpus) ; facops = 0.0 ; FrontMtx_QR_factor(frontmtx, mtxA, chvmanager, cpus, &facops, msglvl, msgFile) ; ChvManager_free(chvmanager) ; if ( msglvl > 1 ) { fprintf(msgFile, "\n\n factor matrix") ; fprintf(msgFile, "\n facops = %9.2f", facops) ; FrontMtx_writeForHumanEye(frontmtx, msgFile) ; fflush(msgFile) ; } /*--------------------------------------------------------------------*/ /* -------------------------------------- STEP 7: post-process the factorization -------------------------------------- */ FrontMtx_postProcess(frontmtx, msglvl, msgFile) ; if ( msglvl > 1 ) { fprintf(msgFile, "\n\n factor matrix after post-processing") ; FrontMtx_writeForHumanEye(frontmtx, msgFile) ; fflush(msgFile) ; } /*--------------------------------------------------------------------*/ /* ------------------------------- STEP 8: solve the linear system ------------------------------- */ mtxX = DenseMtx_new() ; DenseMtx_init(mtxX, type, 0, 0, neqns, nrhs, 1, neqns) ; FrontMtx_QR_solve(frontmtx, mtxA, mtxX, mtxB, mtxmanager, cpus, msglvl, msgFile) ; if ( msglvl > 1 ) { fprintf(msgFile, "\n\n solution matrix in new ordering") ; DenseMtx_writeForHumanEye(mtxX, msgFile) ; fflush(msgFile) ; } /*--------------------------------------------------------------------*/ /* ------------------------------------------------------- STEP 9: permute the solution into the original ordering ------------------------------------------------------- */ DenseMtx_permuteRows(mtxX, newToOldIV) ; if ( msglvl > 0 ) { fprintf(msgFile, "\n\n solution matrix in original ordering") ; DenseMtx_writeForHumanEye(mtxX, msgFile) ; fflush(msgFile) ; } /*--------------------------------------------------------------------*/ /* ------------------------ free the working storage ------------------------ */ InpMtx_free(mtxA) ; FrontMtx_free(frontmtx) ; Graph_free(graph) ; DenseMtx_free(mtxX) ; DenseMtx_free(mtxB) ; ETree_free(frontETree) ; IV_free(newToOldIV) ; IV_free(oldToNewIV) ; IVL_free(symbfacIVL) ; SubMtxManager_free(mtxmanager) ; /*--------------------------------------------------------------------*/ return(1) ; }