/*--------------------------------------------------------------------*/ int main ( int argc, char *argv[] ) /* ------------------------------------------------------------------ generate a random matrix and test a matrix-matrix multiply method. the output is a matlab file to test correctness. created -- 98jan29, cca -------------------------------------------------------------------- */ { DenseMtx *X, *Y, *Y2 ; double alpha[2] ; double alphaImag, alphaReal, t1, t2 ; double *zvec ; Drand *drand ; int col, dataType, ii, msglvl, ncolA, nitem, nops, nrhs, nrowA, nrowX, nrowY, nthread, row, seed, storageMode, symflag, transposeflag ; int *colids, *rowids ; InpMtx *A ; FILE *msgFile ; if ( argc != 15 ) { fprintf(stdout, "\n\n %% usage : %s msglvl msgFile symflag storageMode " "\n %% nrow ncol nent nrhs seed alphaReal alphaImag nthread" "\n %% msglvl -- message level" "\n %% msgFile -- message file" "\n %% dataType -- type of matrix entries" "\n %% 1 -- real" "\n %% 2 -- complex" "\n %% symflag -- symmetry flag" "\n %% 0 -- symmetric" "\n %% 1 -- hermitian" "\n %% 2 -- nonsymmetric" "\n %% storageMode -- storage mode" "\n %% 1 -- by rows" "\n %% 2 -- by columns" "\n %% 3 -- by chevrons, (requires nrow = ncol)" "\n %% transpose -- transpose flag" "\n %% 0 -- Y := Y + alpha * A * X" "\n %% 1 -- Y := Y + alpha * A^H * X, nonsymmetric only" "\n %% 2 -- Y := Y + alpha * A^T * X, nonsymmetric only" "\n %% nrowA -- number of rows in A" "\n %% ncolA -- number of columns in A" "\n %% nitem -- number of items" "\n %% nrhs -- number of right hand sides" "\n %% seed -- random number seed" "\n %% alphaReal -- y := y + alpha*A*x" "\n %% alphaImag -- y := y + alpha*A*x" "\n %% nthread -- # of threads" "\n", argv[0]) ; return(0) ; } msglvl = atoi(argv[1]) ; if ( strcmp(argv[2], "stdout") == 0 ) { msgFile = stdout ; } else if ( (msgFile = fopen(argv[2], "a")) == NULL ) { fprintf(stderr, "\n fatal error in %s" "\n unable to open file %s\n", argv[0], argv[2]) ; return(-1) ; } dataType = atoi(argv[3]) ; symflag = atoi(argv[4]) ; storageMode = atoi(argv[5]) ; transposeflag = atoi(argv[6]) ; nrowA = atoi(argv[7]) ; ncolA = atoi(argv[8]) ; nitem = atoi(argv[9]) ; nrhs = atoi(argv[10]) ; seed = atoi(argv[11]) ; alphaReal = atof(argv[12]) ; alphaImag = atof(argv[13]) ; nthread = atoi(argv[14]) ; fprintf(msgFile, "\n %% %s " "\n %% msglvl -- %d" "\n %% msgFile -- %s" "\n %% dataType -- %d" "\n %% symflag -- %d" "\n %% storageMode -- %d" "\n %% transposeflag -- %d" "\n %% nrowA -- %d" "\n %% ncolA -- %d" "\n %% nitem -- %d" "\n %% nrhs -- %d" "\n %% seed -- %d" "\n %% alphaReal -- %e" "\n %% alphaImag -- %e" "\n %% nthread -- %d" "\n", argv[0], msglvl, argv[2], dataType, symflag, storageMode, transposeflag, nrowA, ncolA, nitem, nrhs, seed, alphaReal, alphaImag, nthread) ; fflush(msgFile) ; if ( dataType != 1 && dataType != 2 ) { fprintf(stderr, "\n invalid value %d for dataType\n", dataType) ; spoolesFatal(); } if ( symflag != 0 && symflag != 1 && symflag != 2 ) { fprintf(stderr, "\n invalid value %d for symflag\n", symflag) ; spoolesFatal(); } if ( storageMode != 1 && storageMode != 2 && storageMode != 3 ) { fprintf(stderr, "\n invalid value %d for storageMode\n", storageMode) ; spoolesFatal(); } if ( transposeflag < 0 || transposeflag > 2 ) { fprintf(stderr, "\n error, transposeflag = %d, must be 0, 1 or 2", transposeflag) ; spoolesFatal(); } if ( (transposeflag == 1 && symflag != 2) || (transposeflag == 2 && symflag != 2) ) { fprintf(stderr, "\n error, transposeflag = %d, symflag = %d", transposeflag, symflag) ; spoolesFatal(); } if ( transposeflag == 1 && dataType != 2 ) { fprintf(stderr, "\n error, transposeflag = %d, dataType = %d", transposeflag, dataType) ; spoolesFatal(); } if ( symflag == 1 && dataType != 2 ) { fprintf(stderr, "\n symflag = 1 (hermitian), dataType != 2 (complex)") ; spoolesFatal(); } if ( nrowA <= 0 || ncolA <= 0 || nitem <= 0 ) { fprintf(stderr, "\n invalid value: nrow = %d, ncol = %d, nitem = %d", nrowA, ncolA, nitem) ; spoolesFatal(); } if ( symflag < 2 && nrowA != ncolA ) { fprintf(stderr, "\n invalid data: symflag = %d, nrow = %d, ncol = %d", symflag, nrowA, ncolA) ; spoolesFatal(); } alpha[0] = alphaReal ; alpha[1] = alphaImag ; /* ---------------------------- initialize the matrix object ---------------------------- */ A = InpMtx_new() ; InpMtx_init(A, storageMode, dataType, 0, 0) ; drand = Drand_new() ; /* ---------------------------------- generate a vector of nitem triples ---------------------------------- */ rowids = IVinit(nitem, -1) ; Drand_setUniform(drand, 0, nrowA) ; Drand_fillIvector(drand, nitem, rowids) ; colids = IVinit(nitem, -1) ; Drand_setUniform(drand, 0, ncolA) ; Drand_fillIvector(drand, nitem, colids) ; Drand_setUniform(drand, 0.0, 1.0) ; if ( INPMTX_IS_REAL_ENTRIES(A) ) { zvec = DVinit(nitem, 0.0) ; Drand_fillDvector(drand, nitem, zvec) ; } else if ( INPMTX_IS_COMPLEX_ENTRIES(A) ) { zvec = ZVinit(nitem, 0.0, 0.0) ; Drand_fillDvector(drand, 2*nitem, zvec) ; } /* ----------------------------------- assemble the entries entry by entry ----------------------------------- */ if ( msglvl > 1 ) { fprintf(msgFile, "\n\n A = zeros(%d,%d) ;", nrowA, ncolA) ; } if ( symflag == 1 ) { /* ---------------- hermitian matrix ---------------- */ for ( ii = 0 ; ii < nitem ; ii++ ) { if ( rowids[ii] == colids[ii] ) { zvec[2*ii+1] = 0.0 ; } if ( rowids[ii] <= colids[ii] ) { row = rowids[ii] ; col = colids[ii] ; } else { row = colids[ii] ; col = rowids[ii] ; } InpMtx_inputComplexEntry(A, row, col, zvec[2*ii], zvec[2*ii+1]) ; } } else if ( symflag == 0 ) { /* ---------------- symmetric matrix ---------------- */ if ( INPMTX_IS_REAL_ENTRIES(A) ) { for ( ii = 0 ; ii < nitem ; ii++ ) { if ( rowids[ii] <= colids[ii] ) { row = rowids[ii] ; col = colids[ii] ; } else { row = colids[ii] ; col = rowids[ii] ; } InpMtx_inputRealEntry(A, row, col, zvec[ii]) ; } } else if ( INPMTX_IS_COMPLEX_ENTRIES(A) ) { for ( ii = 0 ; ii < nitem ; ii++ ) { if ( rowids[ii] <= colids[ii] ) { row = rowids[ii] ; col = colids[ii] ; } else { row = colids[ii] ; col = rowids[ii] ; } InpMtx_inputComplexEntry(A, row, col, zvec[2*ii], zvec[2*ii+1]) ; } } } else { /* ------------------- nonsymmetric matrix ------------------- */ if ( INPMTX_IS_REAL_ENTRIES(A) ) { for ( ii = 0 ; ii < nitem ; ii++ ) { InpMtx_inputRealEntry(A, rowids[ii], colids[ii], zvec[ii]) ; } } else if ( INPMTX_IS_COMPLEX_ENTRIES(A) ) { for ( ii = 0 ; ii < nitem ; ii++ ) { InpMtx_inputComplexEntry(A, rowids[ii], colids[ii], zvec[2*ii], zvec[2*ii+1]) ; } } } InpMtx_changeStorageMode(A, INPMTX_BY_VECTORS) ; DVfree(zvec) ; if ( symflag == 0 || symflag == 1 ) { if ( INPMTX_IS_REAL_ENTRIES(A) ) { nops = 4*A->nent*nrhs ; } else if ( INPMTX_IS_COMPLEX_ENTRIES(A) ) { nops = 16*A->nent*nrhs ; } } else { if ( INPMTX_IS_REAL_ENTRIES(A) ) { nops = 2*A->nent*nrhs ; } else if ( INPMTX_IS_COMPLEX_ENTRIES(A) ) { nops = 8*A->nent*nrhs ; } } if ( msglvl > 1 ) { /* ------------------------------------------- write the assembled matrix to a matlab file ------------------------------------------- */ InpMtx_writeForMatlab(A, "A", msgFile) ; if ( symflag == 0 ) { fprintf(msgFile, "\n for k = 1:%d" "\n for j = k+1:%d" "\n A(j,k) = A(k,j) ;" "\n end" "\n end", nrowA, ncolA) ; } else if ( symflag == 1 ) { fprintf(msgFile, "\n for k = 1:%d" "\n for j = k+1:%d" "\n A(j,k) = ctranspose(A(k,j)) ;" "\n end" "\n end", nrowA, ncolA) ; } } /* ------------------------------- generate dense matrices X and Y ------------------------------- */ if ( transposeflag == 0 ) { nrowX = ncolA ; nrowY = nrowA ; } else { nrowX = nrowA ; nrowY = ncolA ; } X = DenseMtx_new() ; Y = DenseMtx_new() ; Y2 = DenseMtx_new() ; if ( INPMTX_IS_REAL_ENTRIES(A) ) { DenseMtx_init(X, SPOOLES_REAL, 0, 0, nrowX, nrhs, 1, nrowX) ; Drand_fillDvector(drand, nrowX*nrhs, DenseMtx_entries(X)) ; DenseMtx_init(Y, SPOOLES_REAL, 0, 0, nrowY, nrhs, 1, nrowY) ; Drand_fillDvector(drand, nrowY*nrhs, DenseMtx_entries(Y)) ; DenseMtx_init(Y2, SPOOLES_REAL, 0, 0, nrowY, nrhs, 1, nrowY) ; DVcopy(nrowY*nrhs, DenseMtx_entries(Y2), DenseMtx_entries(Y)) ; } else if ( INPMTX_IS_COMPLEX_ENTRIES(A) ) { DenseMtx_init(X, SPOOLES_COMPLEX, 0, 0, nrowX, nrhs, 1, nrowX) ; Drand_fillDvector(drand, 2*nrowX*nrhs, DenseMtx_entries(X)) ; DenseMtx_init(Y, SPOOLES_COMPLEX, 0, 0, nrowY, nrhs, 1, nrowY) ; Drand_fillDvector(drand, 2*nrowY*nrhs, DenseMtx_entries(Y)) ; DenseMtx_init(Y2, SPOOLES_COMPLEX, 0, 0, nrowY, nrhs, 1, nrowY) ; DVcopy(2*nrowY*nrhs, DenseMtx_entries(Y2), DenseMtx_entries(Y)) ; } if ( msglvl > 1 ) { fprintf(msgFile, "\n X = zeros(%d,%d) ;", nrowX, nrhs) ; DenseMtx_writeForMatlab(X, "X", msgFile) ; fprintf(msgFile, "\n Y = zeros(%d,%d) ;", nrowY, nrhs) ; DenseMtx_writeForMatlab(Y, "Y", msgFile) ; } /* -------------------------------------------- perform the matrix-matrix multiply in serial -------------------------------------------- */ if ( msglvl > 1 ) { fprintf(msgFile, "\n alpha = %20.12e + %20.2e*i;", alpha[0], alpha[1]); fprintf(msgFile, "\n Z = zeros(%d,1) ;", nrowY) ; } if ( transposeflag == 0 ) { MARKTIME(t1) ; if ( symflag == 0 ) { InpMtx_sym_mmm(A, Y, alpha, X) ; } else if ( symflag == 1 ) { InpMtx_herm_mmm(A, Y, alpha, X) ; } else if ( symflag == 2 ) { InpMtx_nonsym_mmm(A, Y, alpha, X) ; } MARKTIME(t2) ; if ( msglvl > 1 ) { DenseMtx_writeForMatlab(Y, "Z", msgFile) ; fprintf(msgFile, "\n maxerr = max(Z - Y - alpha*A*X) ") ; fprintf(msgFile, "\n") ; } } else if ( transposeflag == 1 ) { MARKTIME(t1) ; InpMtx_nonsym_mmm_H(A, Y, alpha, X) ; MARKTIME(t2) ; if ( msglvl > 1 ) { DenseMtx_writeForMatlab(Y, "Z", msgFile) ; fprintf(msgFile, "\n maxerr = max(Z - Y - alpha*ctranspose(A)*X) ") ; fprintf(msgFile, "\n") ; } } else if ( transposeflag == 2 ) { MARKTIME(t1) ; InpMtx_nonsym_mmm_T(A, Y, alpha, X) ; MARKTIME(t2) ; if ( msglvl > 1 ) { DenseMtx_writeForMatlab(Y, "Z", msgFile) ; fprintf(msgFile, "\n maxerr = max(Z - Y - alpha*transpose(A)*X) ") ; fprintf(msgFile, "\n") ; } } fprintf(msgFile, "\n %% %d ops, %.3f time, %.3f serial mflops", nops, t2 - t1, 1.e-6*nops/(t2 - t1)) ; /* -------------------------------------------------------- perform the matrix-matrix multiply in multithreaded mode -------------------------------------------------------- */ if ( msglvl > 1 ) { fprintf(msgFile, "\n alpha = %20.12e + %20.2e*i;", alpha[0], alpha[1]); fprintf(msgFile, "\n Z = zeros(%d,1) ;", nrowY) ; } if ( transposeflag == 0 ) { MARKTIME(t1) ; if ( symflag == 0 ) { InpMtx_MT_sym_mmm(A, Y2, alpha, X, nthread, msglvl, msgFile) ; } else if ( symflag == 1 ) { InpMtx_MT_herm_mmm(A, Y2, alpha, X, nthread, msglvl, msgFile) ; } else if ( symflag == 2 ) { InpMtx_MT_nonsym_mmm(A, Y2, alpha, X, nthread, msglvl, msgFile) ; } MARKTIME(t2) ; if ( msglvl > 1 ) { DenseMtx_writeForMatlab(Y2, "Z2", msgFile) ; fprintf(msgFile, "\n maxerr2 = max(Z2 - Y - alpha*A*X) ") ; fprintf(msgFile, "\n") ; } } else if ( transposeflag == 1 ) { MARKTIME(t1) ; InpMtx_MT_nonsym_mmm_H(A, Y2, alpha, X, nthread, msglvl, msgFile) ; MARKTIME(t2) ; if ( msglvl > 1 ) { DenseMtx_writeForMatlab(Y2, "Z2", msgFile) ; fprintf(msgFile, "\n maxerr2 = max(Z2 - Y - alpha*ctranspose(A)*X) ") ; fprintf(msgFile, "\n") ; } } else if ( transposeflag == 2 ) { MARKTIME(t1) ; InpMtx_MT_nonsym_mmm_T(A, Y2, alpha, X, nthread, msglvl, msgFile) ; MARKTIME(t2) ; if ( msglvl > 1 ) { DenseMtx_writeForMatlab(Y2, "Z2", msgFile) ; fprintf(msgFile, "\n maxerr2 = max(Z2 - Y - alpha*transpose(A)*X) ") ; fprintf(msgFile, "\n") ; } } fprintf(msgFile, "\n %% %d ops, %.3f time, %.3f MT mflops", nops, t2 - t1, 1.e-6*nops/(t2 - t1)) ; /* ------------------------ free the working storage ------------------------ */ InpMtx_free(A) ; DenseMtx_free(X) ; DenseMtx_free(Y) ; DenseMtx_free(Y2) ; IVfree(rowids) ; IVfree(colids) ; Drand_free(drand) ; fclose(msgFile) ; return(1) ; }
/*--------------------------------------------------------------------*/ int main ( int argc, char *argv[] ) { /* -------------------------------------------------- QR all-in-one program (1) read in matrix entries and form InpMtx object of A and A^TA (2) form Graph object of A^TA (3) order matrix and form front tree (4) get the permutation, permute the matrix and front tree and get the symbolic factorization (5) compute the numeric factorization (6) read in right hand side entries (7) compute the solution created -- 98jun11, cca -------------------------------------------------- */ /*--------------------------------------------------------------------*/ char *matrixFileName, *rhsFileName ; ChvManager *chvmanager ; DenseMtx *mtxB, *mtxX ; double facops, imag, real, value ; double cpus[10] ; ETree *frontETree ; FILE *inputFile, *msgFile ; FrontMtx *frontmtx ; Graph *graph ; int ient, irow, jcol, jrhs, jrow, msglvl, neqns, nedges, nent, nrhs, nrow, seed, type ; InpMtx *mtxA ; IV *newToOldIV, *oldToNewIV ; IVL *adjIVL, *symbfacIVL ; SubMtxManager *mtxmanager ; /*--------------------------------------------------------------------*/ /* -------------------- get input parameters -------------------- */ if ( argc != 7 ) { fprintf(stdout, "\n usage: %s msglvl msgFile type matrixFileName rhsFileName seed" "\n msglvl -- message level" "\n msgFile -- message file" "\n type -- type of entries" "\n 1 (SPOOLES_REAL) -- real entries" "\n 2 (SPOOLES_COMPLEX) -- complex entries" "\n matrixFileName -- matrix file name, format" "\n nrow ncol nent" "\n irow jcol entry" "\n ..." "\n note: indices are zero based" "\n rhsFileName -- right hand side file name, format" "\n nrow " "\n entry[0]" "\n ..." "\n entry[nrow-1]" "\n seed -- random number seed, used for ordering" "\n", argv[0]) ; return(0) ; } msglvl = atoi(argv[1]) ; if ( strcmp(argv[2], "stdout") == 0 ) { msgFile = stdout ; } else if ( (msgFile = fopen(argv[2], "a")) == NULL ) { fprintf(stderr, "\n fatal error in %s" "\n unable to open file %s\n", argv[0], argv[2]) ; return(-1) ; } type = atoi(argv[3]) ; matrixFileName = argv[4] ; rhsFileName = argv[5] ; seed = atoi(argv[6]) ; /*--------------------------------------------------------------------*/ /* -------------------------------------------- STEP 1: read the entries from the input file and create the InpMtx object of A -------------------------------------------- */ inputFile = fopen(matrixFileName, "r") ; fscanf(inputFile, "%d %d %d", &nrow, &neqns, &nent) ; mtxA = InpMtx_new() ; InpMtx_init(mtxA, INPMTX_BY_ROWS, type, nent, 0) ; if ( type == SPOOLES_REAL ) { for ( ient = 0 ; ient < nent ; ient++ ) { fscanf(inputFile, "%d %d %le", &irow, &jcol, &value) ; InpMtx_inputRealEntry(mtxA, irow, jcol, value) ; } } else { for ( ient = 0 ; ient < nent ; ient++ ) { fscanf(inputFile, "%d %d %le %le", &irow, &jcol, &real, &imag) ; InpMtx_inputComplexEntry(mtxA, irow, jcol, real, imag) ; } } fclose(inputFile) ; if ( msglvl > 1 ) { fprintf(msgFile, "\n\n input matrix") ; InpMtx_writeForHumanEye(mtxA, msgFile) ; fflush(msgFile) ; } /*--------------------------------------------------------------------*/ /* ---------------------------------------- STEP 2: read the right hand side entries ---------------------------------------- */ inputFile = fopen(rhsFileName, "r") ; fscanf(inputFile, "%d %d", &nrow, &nrhs) ; mtxB = DenseMtx_new() ; DenseMtx_init(mtxB, type, 0, 0, nrow, nrhs, 1, nrow) ; DenseMtx_zero(mtxB) ; if ( type == SPOOLES_REAL ) { for ( irow = 0 ; irow < nrow ; irow++ ) { fscanf(inputFile, "%d", &jrow) ; for ( jrhs = 0 ; jrhs < nrhs ; jrhs++ ) { fscanf(inputFile, "%le", &value) ; DenseMtx_setRealEntry(mtxB, jrow, jrhs, value) ; } } } else { for ( irow = 0 ; irow < nrow ; irow++ ) { fscanf(inputFile, "%d", &jrow) ; for ( jrhs = 0 ; jrhs < nrhs ; jrhs++ ) { fscanf(inputFile, "%le %le", &real, &imag) ; DenseMtx_setComplexEntry(mtxB, jrow, jrhs, real, imag) ; } } } fclose(inputFile) ; if ( msglvl > 1 ) { fprintf(msgFile, "\n\n rhs matrix in original ordering") ; DenseMtx_writeForHumanEye(mtxB, msgFile) ; fflush(msgFile) ; } /*--------------------------------------------------------------------*/ /* ------------------------------------------------- STEP 3 : find a low-fill ordering (1) create the Graph object for A^TA or A^HA (2) order the graph using multiple minimum degree ------------------------------------------------- */ graph = Graph_new() ; adjIVL = InpMtx_adjForATA(mtxA) ; nedges = IVL_tsize(adjIVL) ; Graph_init2(graph, 0, neqns, 0, nedges, neqns, nedges, adjIVL, NULL, NULL) ; if ( msglvl > 1 ) { fprintf(msgFile, "\n\n graph of A^T A") ; Graph_writeForHumanEye(graph, msgFile) ; fflush(msgFile) ; } frontETree = orderViaMMD(graph, seed, msglvl, msgFile) ; if ( msglvl > 1 ) { fprintf(msgFile, "\n\n front tree from ordering") ; ETree_writeForHumanEye(frontETree, msgFile) ; fflush(msgFile) ; } /*--------------------------------------------------------------------*/ /* ----------------------------------------------------- STEP 4: get the permutation, permute the matrix and front tree and get the symbolic factorization ----------------------------------------------------- */ oldToNewIV = ETree_oldToNewVtxPerm(frontETree) ; newToOldIV = ETree_newToOldVtxPerm(frontETree) ; InpMtx_permute(mtxA, NULL, IV_entries(oldToNewIV)) ; InpMtx_changeStorageMode(mtxA, INPMTX_BY_VECTORS) ; symbfacIVL = SymbFac_initFromGraph(frontETree, graph) ; IVL_overwrite(symbfacIVL, oldToNewIV) ; IVL_sortUp(symbfacIVL) ; ETree_permuteVertices(frontETree, oldToNewIV) ; if ( msglvl > 1 ) { fprintf(msgFile, "\n\n old-to-new permutation vector") ; IV_writeForHumanEye(oldToNewIV, msgFile) ; fprintf(msgFile, "\n\n new-to-old permutation vector") ; IV_writeForHumanEye(newToOldIV, msgFile) ; fprintf(msgFile, "\n\n front tree after permutation") ; ETree_writeForHumanEye(frontETree, msgFile) ; fprintf(msgFile, "\n\n input matrix after permutation") ; InpMtx_writeForHumanEye(mtxA, msgFile) ; fprintf(msgFile, "\n\n symbolic factorization") ; IVL_writeForHumanEye(symbfacIVL, msgFile) ; fflush(msgFile) ; } /*--------------------------------------------------------------------*/ /* ------------------------------------------ STEP 5: initialize the front matrix object ------------------------------------------ */ frontmtx = FrontMtx_new() ; mtxmanager = SubMtxManager_new() ; SubMtxManager_init(mtxmanager, NO_LOCK, 0) ; if ( type == SPOOLES_REAL ) { FrontMtx_init(frontmtx, frontETree, symbfacIVL, type, SPOOLES_SYMMETRIC, FRONTMTX_DENSE_FRONTS, SPOOLES_NO_PIVOTING, NO_LOCK, 0, NULL, mtxmanager, msglvl, msgFile) ; } else { FrontMtx_init(frontmtx, frontETree, symbfacIVL, type, SPOOLES_HERMITIAN, FRONTMTX_DENSE_FRONTS, SPOOLES_NO_PIVOTING, NO_LOCK, 0, NULL, mtxmanager, msglvl, msgFile) ; } /*--------------------------------------------------------------------*/ /* ----------------------------------------- STEP 6: compute the numeric factorization ----------------------------------------- */ chvmanager = ChvManager_new() ; ChvManager_init(chvmanager, NO_LOCK, 1) ; DVzero(10, cpus) ; facops = 0.0 ; FrontMtx_QR_factor(frontmtx, mtxA, chvmanager, cpus, &facops, msglvl, msgFile) ; ChvManager_free(chvmanager) ; if ( msglvl > 1 ) { fprintf(msgFile, "\n\n factor matrix") ; fprintf(msgFile, "\n facops = %9.2f", facops) ; FrontMtx_writeForHumanEye(frontmtx, msgFile) ; fflush(msgFile) ; } /*--------------------------------------------------------------------*/ /* -------------------------------------- STEP 7: post-process the factorization -------------------------------------- */ FrontMtx_postProcess(frontmtx, msglvl, msgFile) ; if ( msglvl > 1 ) { fprintf(msgFile, "\n\n factor matrix after post-processing") ; FrontMtx_writeForHumanEye(frontmtx, msgFile) ; fflush(msgFile) ; } /*--------------------------------------------------------------------*/ /* ------------------------------- STEP 8: solve the linear system ------------------------------- */ mtxX = DenseMtx_new() ; DenseMtx_init(mtxX, type, 0, 0, neqns, nrhs, 1, neqns) ; FrontMtx_QR_solve(frontmtx, mtxA, mtxX, mtxB, mtxmanager, cpus, msglvl, msgFile) ; if ( msglvl > 1 ) { fprintf(msgFile, "\n\n solution matrix in new ordering") ; DenseMtx_writeForHumanEye(mtxX, msgFile) ; fflush(msgFile) ; } /*--------------------------------------------------------------------*/ /* ------------------------------------------------------- STEP 9: permute the solution into the original ordering ------------------------------------------------------- */ DenseMtx_permuteRows(mtxX, newToOldIV) ; if ( msglvl > 0 ) { fprintf(msgFile, "\n\n solution matrix in original ordering") ; DenseMtx_writeForHumanEye(mtxX, msgFile) ; fflush(msgFile) ; } /*--------------------------------------------------------------------*/ /* ------------------------ free the working storage ------------------------ */ InpMtx_free(mtxA) ; FrontMtx_free(frontmtx) ; Graph_free(graph) ; DenseMtx_free(mtxX) ; DenseMtx_free(mtxB) ; ETree_free(frontETree) ; IV_free(newToOldIV) ; IV_free(oldToNewIV) ; IVL_free(symbfacIVL) ; SubMtxManager_free(mtxmanager) ; /*--------------------------------------------------------------------*/ return(1) ; }
/* ------------------------------------------------------------------- InpMtx_readFromAIJ2file reads in an AIJ2 matrix from file and converts to InpMtx object. created -- 98dec01, jwu ------------------------------------------------------------------- */ int InpMtx_readFromAIJ2file(InpMtx *mtxA, char *srcFileName) { FILE *srcFile; int rc, nrow, ncol, irow, icol, jrow, jcol, ient, nentInRow; double ent; /* ---------------------------- open the input file and read #rows #columns ---------------------------- */ if ( (srcFile = fopen(srcFileName, "r")) == NULL ) { fprintf(stderr, "\n fatal error in InpMtx_readFromAIJ2file" "\n unable to open file %s\n", srcFileName) ; return(-1) ; } rc = fscanf(srcFile, "%d %d", &nrow, &ncol) ; if ( rc != 2 ) { fprintf(stderr, "\n fatal error in InpMtx_readFromAIJ2file" "\n %d of 2 fields read on first line of file %s", rc, srcFileName) ; return(-1) ; } /* -------------------------------------------------- initialize the object set coordType = INPMTX_BY_ROWS --> row coordinates -------------------------------------------------- */ InpMtx_init(mtxA, INPMTX_BY_ROWS, SPOOLES_REAL, 10*nrow, 0) ; /* ------------------------------------------------- read in the entries and load them into the object ------------------------------------------------- */ ient=0; for ( irow = 0 ; irow < nrow ; irow++ ) { rc = fscanf(srcFile, "%d %d", &jrow, &nentInRow) ; if ( rc != 2 ) { fprintf(stderr, "\n fatal error in InpMtx_readFromAIJ2file" "\n %d of 2 fields read on entry %d of file %s", rc, ient, srcFileName) ; return(-1) ; } for ( icol = 0 ; icol < nentInRow ; icol++ ) { rc = fscanf(srcFile, "%d %lf", &jcol, &ent) ; if ( rc != 2 ) { fprintf(stderr, "\n fatal error in InpMtx_readFromAIJ2file" "\n %d of 2 fields read on entry %d of file %s", rc, ient, srcFileName) ; return(-1) ; } InpMtx_inputRealEntry(mtxA, jrow-1, jcol-1, ent) ; ient++; } } /* ----------------------------- sort and compress the entries ----------------------------- */ InpMtx_changeStorageMode(mtxA, 3) ; fclose(srcFile); return(1) ; }
SPOOLESSolverMT::SPOOLESSolverMT(const SparseMatrix * A, int numThreads, int verbose) { n = A->Getn(); this->verbose = verbose; msgFile = fopen("SPOOLES.message","w"); // prepare SPOOLES input matrix if (verbose >= 1) printf("Converting matrix to SPOOLES format...\n"); InpMtx * mtxA = InpMtx_new(); InpMtx_init(mtxA, INPMTX_BY_ROWS, SPOOLES_REAL, A->GetNumEntries(), n); for(int row=0; row<n; row++) { int rowLength = A->GetRowLength(row); for(int j=0; j< rowLength; j++) { if (A->GetColumnIndex(row,j) >= row) InpMtx_inputRealEntry(mtxA, row, A->GetColumnIndex(row, j), A->GetEntry(row, j) ); } } InpMtx_changeStorageMode(mtxA, INPMTX_BY_VECTORS); //InpMtx_writeForHumanEye(mtxA, msgFile); // compute the factorization if (verbose >= 1) printf("Factoring the %d x %d matrix...\n",n,n); BridgeMT * bridgeMT = BridgeMT_new(); BridgeMT_setMatrixParams(bridgeMT, n, SPOOLES_REAL, SPOOLES_SYMMETRIC); BridgeMT_setMessageInfo(bridgeMT, 1, msgFile); int rc = BridgeMT_setup(bridgeMT, mtxA); if (rc != 1) { printf("Error: BridgeMT setup returned exit code %d.\n", rc); throw 1; } int type = 1; // real entries int nfront, nfind, nfent; double nfactorops; rc = BridgeMT_factorStats(bridgeMT, type, SPOOLES_SYMMETRIC, &nfront, &nfind, &nfent, &nsolveops, &nfactorops); if ( rc != 1 ) { printf("Error: BridgeMT_factorStats returned exit code %d.\n", rc); throw 1; } fprintf(msgFile, "\n\n factor matrix statistics" "\n %d fronts, %d indices, %d entries" "\n %d solve operations, %12.4e factor operations", nfront, nfind, nfent, nsolveops, nfactorops) ; fflush(msgFile) ; // setup the parallel factorization rc = BridgeMT_factorSetup(bridgeMT, numThreads, 0, 0.0) ; fprintf(msgFile, "\n\n ----- PARALLEL FACTOR SETUP -----\n") ; fprintf(msgFile, "\n CPU %8.3f : time to setup parallel factorization", bridgeMT->cpus[5]) ; fprintf(msgFile, "\n total factor operations = %.0f", DV_sum(bridgeMT->cumopsDV)) ; fprintf(msgFile, "\n upper bound on speedup due to load balance = %.2f", DV_sum(bridgeMT->cumopsDV)/DV_max(bridgeMT->cumopsDV)) ; fprintf(msgFile, "\n operations distributions over threads") ; DV_writeForHumanEye(bridgeMT->cumopsDV, msgFile) ; fflush(msgFile) ; // factor the matrix int permuteflag = 1 ; int error; rc = BridgeMT_factor(bridgeMT, mtxA, permuteflag, &error); if ( rc == 1 ) { fprintf(msgFile, "\n\n factorization completed successfully\n") ; } else { printf("Error: factorization returned exit code %d (error %d).\n", rc, error); throw 1; } fprintf(msgFile, "\n\n ----- FACTORIZATION -----\n") ; fprintf(msgFile, "\n CPU %8.3f : time to permute original matrix" "\n CPU %8.3f : time to initialize factor matrix" "\n CPU %8.3f : time to compute factorization" "\n CPU %8.3f : time to post-process factorization" "\n CPU %8.3f : total factorization time\n", bridgeMT->cpus[6], bridgeMT->cpus[7], bridgeMT->cpus[8], bridgeMT->cpus[9], bridgeMT->cpus[10]) ; fprintf(msgFile, "\n\n factorization statistics" "\n %d pivots, %d pivot tests, %d delayed vertices" "\n %d entries in D, %d entries in L, %d entries in U", bridgeMT->stats[0], bridgeMT->stats[1], bridgeMT->stats[2], bridgeMT->stats[3], bridgeMT->stats[4], bridgeMT->stats[5]) ; fprintf(msgFile, "\n\n factorization: raw mflops %8.3f, overall mflops %8.3f", 1.e-6*nfactorops/bridgeMT->cpus[8], 1.e-6*nfactorops/bridgeMT->cpus[10]) ; fflush(msgFile) ; // construct dense SPOOLES matrix for rhs and x DenseMtx *mtx_rhs = DenseMtx_new(); DenseMtx_init(mtx_rhs, SPOOLES_REAL, 0, 0, n, 1, 1, n); mtx_rhsPointer = (void*) mtx_rhs; DenseMtx *mtx_x = DenseMtx_new(); DenseMtx_init(mtx_x, SPOOLES_REAL, 0, 0, n, 1, 1, n); mtx_xPointer = (void*) mtx_x; bridgeMTPointer = (void*) bridgeMT; APointer = (void*) mtxA; if (verbose >= 1) printf("Factorization completed.\n"); }
/*--------------------------------------------------------------------*/ int main ( int argc, char *argv[] ) { /* -------------------------------------------------- all-in-one program to solve A X = B using a multithreaded factorization and solve We use a patch-and-go strategy for the factorization without pivoting (1) read in matrix entries and form DInpMtx object (2) form Graph object (3) order matrix and form front tree (4) get the permutation, permute the matrix and front tree and get the symbolic factorization (5) compute the numeric factorization (6) read in right hand side entries (7) compute the solution created -- 98jun04, cca -------------------------------------------------- */ /*--------------------------------------------------------------------*/ char *matrixFileName, *rhsFileName ; DenseMtx *mtxB, *mtxX ; Chv *rootchv ; ChvManager *chvmanager ; double fudge, imag, real, tau = 100., toosmall, value ; double cpus[10] ; DV *cumopsDV ; ETree *frontETree ; FrontMtx *frontmtx ; FILE *inputFile, *msgFile ; Graph *graph ; InpMtx *mtxA ; int error, ient, irow, jcol, jrhs, jrow, lookahead, msglvl, ncol, nedges, nent, neqns, nfront, nrhs, nrow, nthread, patchAndGoFlag, seed, storeids, storevalues, symmetryflag, type ; int *newToOld, *oldToNew ; int stats[20] ; IV *newToOldIV, *oldToNewIV, *ownersIV ; IVL *adjIVL, *symbfacIVL ; SolveMap *solvemap ; SubMtxManager *mtxmanager ; /*--------------------------------------------------------------------*/ /* -------------------- get input parameters -------------------- */ if ( argc != 14 ) { fprintf(stdout, "\n" "\n usage: %s msglvl msgFile type symmetryflag patchAndGoFlag" "\n fudge toosmall storeids storevalues" "\n matrixFileName rhsFileName seed" "\n msglvl -- message level" "\n msgFile -- message file" "\n type -- type of entries" "\n 1 (SPOOLES_REAL) -- real entries" "\n 2 (SPOOLES_COMPLEX) -- complex entries" "\n symmetryflag -- type of matrix" "\n 0 (SPOOLES_SYMMETRIC) -- symmetric entries" "\n 1 (SPOOLES_HERMITIAN) -- Hermitian entries" "\n 2 (SPOOLES_NONSYMMETRIC) -- nonsymmetric entries" "\n patchAndGoFlag -- flag for the patch-and-go strategy" "\n 0 -- none, stop factorization" "\n 1 -- optimization strategy" "\n 2 -- structural analysis strategy" "\n fudge -- perturbation parameter" "\n toosmall -- upper bound on a small pivot" "\n storeids -- flag to store ids of small pivots" "\n storevalues -- flag to store perturbations" "\n matrixFileName -- matrix file name, format" "\n nrow ncol nent" "\n irow jcol entry" "\n ..." "\n note: indices are zero based" "\n rhsFileName -- right hand side file name, format" "\n nrow nrhs " "\n ..." "\n jrow entry(jrow,0) ... entry(jrow,nrhs-1)" "\n ..." "\n seed -- random number seed, used for ordering" "\n nthread -- number of threads" "\n", argv[0]) ; return(0) ; } msglvl = atoi(argv[1]) ; if ( strcmp(argv[2], "stdout") == 0 ) { msgFile = stdout ; } else if ( (msgFile = fopen(argv[2], "a")) == NULL ) { fprintf(stderr, "\n fatal error in %s" "\n unable to open file %s\n", argv[0], argv[2]) ; return(-1) ; } type = atoi(argv[3]) ; symmetryflag = atoi(argv[4]) ; patchAndGoFlag = atoi(argv[5]) ; fudge = atof(argv[6]) ; toosmall = atof(argv[7]) ; storeids = atoi(argv[8]) ; storevalues = atoi(argv[9]) ; matrixFileName = argv[10] ; rhsFileName = argv[11] ; seed = atoi(argv[12]) ; nthread = atoi(argv[13]) ; /*--------------------------------------------------------------------*/ /* -------------------------------------------- STEP 1: read the entries from the input file and create the InpMtx object -------------------------------------------- */ if ( (inputFile = fopen(matrixFileName, "r")) == NULL ) { fprintf(stderr, "\n unable to open file %s", matrixFileName) ; spoolesFatal(); } fscanf(inputFile, "%d %d %d", &nrow, &ncol, &nent) ; neqns = nrow ; mtxA = InpMtx_new() ; InpMtx_init(mtxA, INPMTX_BY_ROWS, type, nent, 0) ; if ( type == SPOOLES_REAL ) { for ( ient = 0 ; ient < nent ; ient++ ) { fscanf(inputFile, "%d %d %le", &irow, &jcol, &value) ; InpMtx_inputRealEntry(mtxA, irow, jcol, value) ; } } else { for ( ient = 0 ; ient < nent ; ient++ ) { fscanf(inputFile, "%d %d %le %le", &irow, &jcol, &real, &imag) ; InpMtx_inputComplexEntry(mtxA, irow, jcol, real, imag) ; } } fclose(inputFile) ; InpMtx_changeStorageMode(mtxA, INPMTX_BY_VECTORS) ; if ( msglvl > 1 ) { fprintf(msgFile, "\n\n input matrix") ; InpMtx_writeForHumanEye(mtxA, msgFile) ; fflush(msgFile) ; } /*--------------------------------------------------------------------*/ /* ------------------------------------------------- STEP 2 : find a low-fill ordering (1) create the Graph object (2) order the graph using multiple minimum degree ------------------------------------------------- */ graph = Graph_new() ; adjIVL = InpMtx_fullAdjacency(mtxA) ; nedges = IVL_tsize(adjIVL) ; Graph_init2(graph, 0, neqns, 0, nedges, neqns, nedges, adjIVL, NULL, NULL) ; if ( msglvl > 1 ) { fprintf(msgFile, "\n\n graph of the input matrix") ; Graph_writeForHumanEye(graph, msgFile) ; fflush(msgFile) ; } frontETree = orderViaMMD(graph, seed, msglvl, msgFile) ; if ( msglvl > 1 ) { fprintf(msgFile, "\n\n front tree from ordering") ; ETree_writeForHumanEye(frontETree, msgFile) ; fflush(msgFile) ; } /*--------------------------------------------------------------------*/ /* ----------------------------------------------------- STEP 3: get the permutation, permute the matrix and front tree and get the symbolic factorization ----------------------------------------------------- */ oldToNewIV = ETree_oldToNewVtxPerm(frontETree) ; oldToNew = IV_entries(oldToNewIV) ; newToOldIV = ETree_newToOldVtxPerm(frontETree) ; newToOld = IV_entries(newToOldIV) ; ETree_permuteVertices(frontETree, oldToNewIV) ; InpMtx_permute(mtxA, oldToNew, oldToNew) ; InpMtx_mapToUpperTriangle(mtxA) ; InpMtx_changeCoordType(mtxA, INPMTX_BY_CHEVRONS) ; InpMtx_changeStorageMode(mtxA, INPMTX_BY_VECTORS) ; symbfacIVL = SymbFac_initFromInpMtx(frontETree, mtxA) ; if ( msglvl > 1 ) { fprintf(msgFile, "\n\n old-to-new permutation vector") ; IV_writeForHumanEye(oldToNewIV, msgFile) ; fprintf(msgFile, "\n\n new-to-old permutation vector") ; IV_writeForHumanEye(newToOldIV, msgFile) ; fprintf(msgFile, "\n\n front tree after permutation") ; ETree_writeForHumanEye(frontETree, msgFile) ; fprintf(msgFile, "\n\n input matrix after permutation") ; InpMtx_writeForHumanEye(mtxA, msgFile) ; fprintf(msgFile, "\n\n symbolic factorization") ; IVL_writeForHumanEye(symbfacIVL, msgFile) ; fflush(msgFile) ; } /*--------------------------------------------------------------------*/ /* ------------------------------------------ STEP 4: initialize the front matrix object and the PatchAndGoInfo object to handle small pivots ------------------------------------------ */ frontmtx = FrontMtx_new() ; mtxmanager = SubMtxManager_new() ; SubMtxManager_init(mtxmanager, LOCK_IN_PROCESS, 0) ; FrontMtx_init(frontmtx, frontETree, symbfacIVL, type, symmetryflag, FRONTMTX_DENSE_FRONTS, SPOOLES_NO_PIVOTING, LOCK_IN_PROCESS, 0, NULL, mtxmanager, msglvl, msgFile) ; if ( patchAndGoFlag == 1 ) { frontmtx->patchinfo = PatchAndGoInfo_new() ; PatchAndGoInfo_init(frontmtx->patchinfo, 1, toosmall, fudge, storeids, storevalues) ; } else if ( patchAndGoFlag == 2 ) { frontmtx->patchinfo = PatchAndGoInfo_new() ; PatchAndGoInfo_init(frontmtx->patchinfo, 2, toosmall, fudge, storeids, storevalues) ; } /*--------------------------------------------------------------------*/ /* ------------------------------------------ STEP 5: setup the domain decomposition map ------------------------------------------ */ if ( nthread > (nfront = FrontMtx_nfront(frontmtx)) ) { nthread = nfront ; } cumopsDV = DV_new() ; DV_init(cumopsDV, nthread, NULL) ; ownersIV = ETree_ddMap(frontETree, type, symmetryflag, cumopsDV, 1./(2.*nthread)) ; DV_free(cumopsDV) ; /*--------------------------------------------------------------------*/ /* ----------------------------------------------------- STEP 6: compute the numeric factorization in parallel ----------------------------------------------------- */ chvmanager = ChvManager_new() ; ChvManager_init(chvmanager, LOCK_IN_PROCESS, 1) ; DVfill(10, cpus, 0.0) ; IVfill(20, stats, 0) ; lookahead = 0 ; rootchv = FrontMtx_MT_factorInpMtx(frontmtx, mtxA, tau, 0.0, chvmanager, ownersIV, lookahead, &error, cpus, stats, msglvl, msgFile) ; if ( patchAndGoFlag == 1 ) { if ( frontmtx->patchinfo->fudgeIV != NULL ) { fprintf(msgFile, "\n small pivots found at these locations") ; IV_writeForHumanEye(frontmtx->patchinfo->fudgeIV, msgFile) ; } PatchAndGoInfo_free(frontmtx->patchinfo) ; } else if ( patchAndGoFlag == 2 ) { if ( frontmtx->patchinfo->fudgeIV != NULL ) { fprintf(msgFile, "\n small pivots found at these locations") ; IV_writeForHumanEye(frontmtx->patchinfo->fudgeIV, msgFile) ; } if ( frontmtx->patchinfo->fudgeDV != NULL ) { fprintf(msgFile, "\n perturbations") ; DV_writeForHumanEye(frontmtx->patchinfo->fudgeDV, msgFile) ; } PatchAndGoInfo_free(frontmtx->patchinfo) ; } ChvManager_free(chvmanager) ; if ( msglvl > 1 ) { fprintf(msgFile, "\n\n factor matrix") ; FrontMtx_writeForHumanEye(frontmtx, msgFile) ; fflush(msgFile) ; } if ( rootchv != NULL ) { fprintf(msgFile, "\n\n matrix found to be singular\n") ; spoolesFatal(); } if ( error >= 0 ) { fprintf(msgFile, "\n\n fatal error at front %d\n", error) ; spoolesFatal(); } /* -------------------------------------- STEP 7: post-process the factorization -------------------------------------- */ FrontMtx_postProcess(frontmtx, msglvl, msgFile) ; if ( msglvl > 1 ) { fprintf(msgFile, "\n\n factor matrix after post-processing") ; FrontMtx_writeForHumanEye(frontmtx, msgFile) ; fflush(msgFile) ; } /*--------------------------------------------------------------------*/ /* ----------------------------------------- STEP 8: read the right hand side matrix B ----------------------------------------- */ if ( (inputFile = fopen(rhsFileName, "r")) == NULL ) { fprintf(stderr, "\n unable to open file %s", rhsFileName) ; spoolesFatal(); } fscanf(inputFile, "%d %d", &nrow, &nrhs) ; mtxB = DenseMtx_new() ; DenseMtx_init(mtxB, type, 0, 0, neqns, nrhs, 1, neqns) ; DenseMtx_zero(mtxB) ; if ( type == SPOOLES_REAL ) { for ( irow = 0 ; irow < nrow ; irow++ ) { fscanf(inputFile, "%d", &jrow) ; for ( jrhs = 0 ; jrhs < nrhs ; jrhs++ ) { fscanf(inputFile, "%le", &value) ; DenseMtx_setRealEntry(mtxB, jrow, jrhs, value) ; } } } else { for ( irow = 0 ; irow < nrow ; irow++ ) { fscanf(inputFile, "%d", &jrow) ; for ( jrhs = 0 ; jrhs < nrhs ; jrhs++ ) { fscanf(inputFile, "%le %le", &real, &imag) ; DenseMtx_setComplexEntry(mtxB, jrow, jrhs, real, imag) ; } } } fclose(inputFile) ; if ( msglvl > 1 ) { fprintf(msgFile, "\n\n rhs matrix in original ordering") ; DenseMtx_writeForHumanEye(mtxB, msgFile) ; fflush(msgFile) ; } /*--------------------------------------------------------------------*/ /* -------------------------------------------------------------- STEP 9: permute the right hand side into the original ordering -------------------------------------------------------------- */ DenseMtx_permuteRows(mtxB, oldToNewIV) ; if ( msglvl > 1 ) { fprintf(msgFile, "\n\n right hand side matrix in new ordering") ; DenseMtx_writeForHumanEye(mtxB, msgFile) ; fflush(msgFile) ; } /*--------------------------------------------------------------------*/ /* -------------------------------------------------------- STEP 10: get the solve map object for the parallel solve -------------------------------------------------------- */ solvemap = SolveMap_new() ; SolveMap_ddMap(solvemap, type, FrontMtx_upperBlockIVL(frontmtx), FrontMtx_lowerBlockIVL(frontmtx), nthread, ownersIV, FrontMtx_frontTree(frontmtx), seed, msglvl, msgFile) ; /*--------------------------------------------------------------------*/ /* -------------------------------------------- STEP 11: solve the linear system in parallel -------------------------------------------- */ mtxX = DenseMtx_new() ; DenseMtx_init(mtxX, type, 0, 0, neqns, nrhs, 1, neqns) ; DenseMtx_zero(mtxX) ; FrontMtx_MT_solve(frontmtx, mtxX, mtxB, mtxmanager, solvemap, cpus, msglvl, msgFile) ; if ( msglvl > 1 ) { fprintf(msgFile, "\n\n solution matrix in new ordering") ; DenseMtx_writeForHumanEye(mtxX, msgFile) ; fflush(msgFile) ; } /*--------------------------------------------------------------------*/ /* -------------------------------------------------------- STEP 12: permute the solution into the original ordering -------------------------------------------------------- */ DenseMtx_permuteRows(mtxX, newToOldIV) ; if ( msglvl > 0 ) { fprintf(msgFile, "\n\n solution matrix in original ordering") ; DenseMtx_writeForHumanEye(mtxX, msgFile) ; fflush(msgFile) ; } /*--------------------------------------------------------------------*/ /* ----------- free memory ----------- */ FrontMtx_free(frontmtx) ; DenseMtx_free(mtxX) ; DenseMtx_free(mtxB) ; IV_free(newToOldIV) ; IV_free(oldToNewIV) ; InpMtx_free(mtxA) ; ETree_free(frontETree) ; IVL_free(symbfacIVL) ; SubMtxManager_free(mtxmanager) ; Graph_free(graph) ; SolveMap_free(solvemap) ; IV_free(ownersIV) ; /*--------------------------------------------------------------------*/ return(1) ; }
void *spooles_factor(long *row, long *col, double *data, long neq, long nnz, int symmetryflag) { InpMtx *mtxA; struct factorinfo *pfi_ = (struct factorinfo *)malloc(sizeof(struct factorinfo)); //printf(" Factoring the system of equations using the symmetric spooles solver\n"); if ((msgFile = fopen("spooles.out", "a")) == NULL) { fprintf(stderr, "\n fatal error in spooles.c" "\n unable to open file spooles.out\n"); } /* * Create the InpMtx object from the CalculiX matrix * representation */ mtxA = InpMtx_new(); InpMtx_init(mtxA, INPMTX_BY_ROWS, SPOOLES_REAL, nnz, neq); long i; for(i = 0 ; i<nnz ; i++) { InpMtx_inputRealEntry(mtxA, row[i], col[i], data[i]); } /* solve it! */ #ifdef USE_MT /* Rules for parallel solve: a. determining the maximum number of cpus: - if NUMBER_OF_CPUS>0 this is taken as the number of cpus in the system - else it is taken from _SC_NPROCESSORS_CONF, if strictly positive - else 1 cpu is assumed (default) b. determining the number of cpus to use - if CCX_NPROC_EQUATION_SOLVER>0 then use CCX_NPROC_EQUATION_SOLVER cpus - else if CCX_NPROC>0 use CCX_NPROC cpus - else use the maximum number of cpus */ if (num_cpus < 0) { int sys_cpus; char *env,*envloc,*envsys; num_cpus = 0; sys_cpus=0; /* explicit user declaration prevails */ envsys=getenv("NUMBER_OF_CPUS"); if(envsys){ sys_cpus=atoi(envsys); if(sys_cpus<0) sys_cpus=0; } /* automatic detection of available number of processors */ if(sys_cpus==0){ sys_cpus = sysconf(_SC_NPROCESSORS_CONF); if(sys_cpus<1) sys_cpus=1; } /* local declaration prevails, if strictly positive */ envloc = getenv("CCX_NPROC_EQUATION_SOLVER"); if(envloc){ num_cpus=atoi(envloc); if(num_cpus<0){ num_cpus=0; }else if(num_cpus>sys_cpus){ num_cpus=sys_cpus; } } /* else global declaration, if any, applies */ env = getenv("OMP_NUM_THREADS"); if(num_cpus==0){ if (env) num_cpus = atoi(env); if (num_cpus < 1) { num_cpus=1; }else if(num_cpus>sys_cpus){ num_cpus=sys_cpus; } } } //printf(" Using up to %d cpu(s) for spooles.\n\n", num_cpus); if (num_cpus > 1) { /* do not use the multithreaded solver unless * we have multiple threads - avoid the * locking overhead */ factor_MT(pfi_, mtxA, neq, msgFile,symmetryflag); } else { factor(pfi_, mtxA, neq, msgFile,symmetryflag); } #else //printf(" Using 1 cpu for spooles.\n\n"); factor(pfi_, mtxA, neq, msgFile,symmetryflag); #endif return (void *)pfi_; }