/*--------------------------------------------------------------------*/ int main ( int argc, char *argv[] ) /* ------------------------------------------------------- read in an ETree object and an equivalence map, expand the ETree object and optionally write to a file. created -- 98sep05, cca ------------------------------------------------------- */ { char *inEqmapFileName, *inETreeFileName, *outETreeFileName ; double t1, t2 ; ETree *etree, *etree2 ; FILE *msgFile ; int msglvl, rc ; IV *eqmapIV ; if ( argc != 6 ) { fprintf(stdout, "\n\n usage : %s msglvl msgFile inETreeFile inEqmapFile outETreeFile" "\n msglvl -- message level" "\n msgFile -- message file" "\n inETreeFile -- input file, must be *.etreef or *.etreeb" "\n inEqmapFile -- input file, must be *.ivf or *.ivb" "\n outETreeFile -- output file, must be *.etreef or *.etreeb" "\n", argv[0]) ; return(0) ; } msglvl = atoi(argv[1]) ; if ( strcmp(argv[2], "stdout") == 0 ) { msgFile = stdout ; } else if ( (msgFile = fopen(argv[2], "a")) == NULL ) { fprintf(stderr, "\n fatal error in %s" "\n unable to open file %s\n", argv[0], argv[2]) ; return(-1) ; } inETreeFileName = argv[3] ; inEqmapFileName = argv[4] ; outETreeFileName = argv[5] ; fprintf(msgFile, "\n %s " "\n msglvl -- %d" "\n msgFile -- %s" "\n inETreeFile -- %s" "\n inEqmapFile -- %s" "\n outETreeFile -- %s" "\n", argv[0], msglvl, argv[2], inETreeFileName, inEqmapFileName, outETreeFileName) ; fflush(msgFile) ; /* ------------------------ read in the ETree object ------------------------ */ if ( strcmp(inETreeFileName, "none") == 0 ) { fprintf(msgFile, "\n no file to read from") ; exit(0) ; } etree = ETree_new() ; MARKTIME(t1) ; rc = ETree_readFromFile(etree, inETreeFileName) ; MARKTIME(t2) ; fprintf(msgFile, "\n CPU %9.5f : read in etree from file %s", t2 - t1, inETreeFileName) ; if ( rc != 1 ) { fprintf(msgFile, "\n return value %d from ETree_readFromFile(%p,%s)", rc, etree, inETreeFileName) ; exit(-1) ; } fprintf(msgFile, "\n\n after reading ETree object from file %s", inETreeFileName) ; if ( msglvl > 2 ) { ETree_writeForHumanEye(etree, msgFile) ; } else { ETree_writeStats(etree, msgFile) ; } fflush(msgFile) ; /* ------------------------------------- read in the equivalence map IV object ------------------------------------- */ if ( strcmp(inEqmapFileName, "none") == 0 ) { fprintf(msgFile, "\n no file to read from") ; exit(0) ; } eqmapIV = IV_new() ; MARKTIME(t1) ; rc = IV_readFromFile(eqmapIV, inEqmapFileName) ; MARKTIME(t2) ; fprintf(msgFile, "\n CPU %9.5f : read in eqmapIV from file %s", t2 - t1, inEqmapFileName) ; if ( rc != 1 ) { fprintf(msgFile, "\n return value %d from IV_readFromFile(%p,%s)", rc, eqmapIV, inEqmapFileName) ; exit(-1) ; } fprintf(msgFile, "\n\n after reading IV object from file %s", inEqmapFileName) ; if ( msglvl > 2 ) { IV_writeForHumanEye(eqmapIV, msgFile) ; } else { IV_writeStats(eqmapIV, msgFile) ; } fflush(msgFile) ; /* ----------------------- expand the ETree object ----------------------- */ etree2 = ETree_expand(etree, eqmapIV) ; fprintf(msgFile, "\n\n after expanding the ETree object") ; if ( msglvl > 2 ) { ETree_writeForHumanEye(etree2, msgFile) ; } else { ETree_writeStats(etree2, msgFile) ; } fflush(msgFile) ; /* -------------------------- write out the ETree object -------------------------- */ if ( strcmp(outETreeFileName, "none") != 0 ) { MARKTIME(t1) ; rc = ETree_writeToFile(etree2, outETreeFileName) ; MARKTIME(t2) ; fprintf(msgFile, "\n CPU %9.5f : write etree to file %s", t2 - t1, outETreeFileName) ; } if ( rc != 1 ) { fprintf(msgFile, "\n return value %d from ETree_writeToFile(%p,%s)", rc, etree2, outETreeFileName) ; } /* --------------------- free the ETree object --------------------- */ ETree_free(etree) ; IV_free(eqmapIV) ; ETree_free(etree2) ; fprintf(msgFile, "\n") ; fclose(msgFile) ; return(1) ; }
int tfqmrl ( int n_matrixSize, int type, int symmetryflag, InpMtx *mtxA, FrontMtx *Precond, DenseMtx *mtxX, DenseMtx *mtxB, int itermax, double convergetol, int msglvl, FILE *msgFile ) { Chv *chv, *rootchv ; ChvManager *chvmanager ; DenseMtx *vecD, *vecR, *vecT, *vecU1, *vecU2, *vecV, *vecW; DenseMtx *vecX, *vecY1, *vecY2 ; double Alpha, Beta, Cee, Eta, Rho, Rho_new ; double Sigma, Tau, Theta; double Init_norm, ratio, Res_norm; double error_trol, m, Rtmp; double t1, t2, cpus[9] ; double one[2] = {1.0, 0.0}, zero[2] ={0.0, 0.0} ; double Tiny = 0.1e-28; int Iter, Imv, neqns; int stats[6] ; neqns = n_matrixSize; /* -------------------- init the vectors in TFQMRL -------------------- */ vecD = DenseMtx_new() ; DenseMtx_init(vecD, type, 0, 0, neqns, 1, 1, neqns) ; vecR = DenseMtx_new() ; DenseMtx_init(vecR, type, 0, 0, neqns, 1, 1, neqns) ; vecT = DenseMtx_new() ; DenseMtx_init(vecT, type, 0, 0, neqns, 1, 1, neqns) ; vecU1 = DenseMtx_new() ; DenseMtx_init(vecU1, type, 0, 0, neqns, 1, 1, neqns) ; vecU2 = DenseMtx_new() ; DenseMtx_init(vecU2, type, 0, 0, neqns, 1, 1, neqns) ; vecV = DenseMtx_new() ; DenseMtx_init(vecV, type, 0, 0, neqns, 1, 1, neqns) ; vecW = DenseMtx_new() ; DenseMtx_init(vecW, type, 0, 0, neqns, 1, 1, neqns) ; vecX = DenseMtx_new() ; DenseMtx_init(vecX, type, 0, 0, neqns, 1, 1, neqns) ; vecY1 = DenseMtx_new() ; DenseMtx_init(vecY1, type, 0, 0, neqns, 1, 1, neqns) ; vecY2 = DenseMtx_new() ; DenseMtx_init(vecY2, type, 0, 0, neqns, 1, 1, neqns) ; /* -------------------------- Initialize the iterations -------------------------- */ /* ---- Set initial guess as zero ---- */ DenseMtx_zero(vecX) ; DenseMtx_colCopy(vecT, 0, mtxB, 0); /* */ FrontMtx_solve(Precond, vecR, vecT, Precond->manager, cpus, msglvl, msgFile) ; /* */ Init_norm = DenseMtx_twoNormOfColumn(vecR,0); if ( Init_norm == 0.0 ){ Init_norm = 1.0; }; error_trol = Init_norm * convergetol ; fprintf(msgFile, "\n TFQMRL Initial norml: %6.2e ", Init_norm ) ; fprintf(msgFile, "\n TFQMRL Conveg. Control: %7.3e ", convergetol ) ; fprintf(msgFile, "\n TFQMRL Convergen Control: %7.3e ",error_trol ) ; DenseMtx_zero(vecD) ; DenseMtx_zero(vecU1) ; DenseMtx_zero(vecU2) ; DenseMtx_zero(vecY2) ; /* DenseMtx_copy(vecR, mtxB); */ DenseMtx_colCopy(vecW, 0, vecR, 0); DenseMtx_colCopy(vecY1, 0, vecR, 0); Iter = 0; Imv = 0; switch ( symmetryflag ) { case SPOOLES_SYMMETRIC : InpMtx_sym_gmmm(mtxA, zero, vecT, one, vecY1) ; break ; case SPOOLES_HERMITIAN : fprintf(msgFile, "\n TFQMRL Matrix type wrong"); fprintf(msgFile, "\n Fatal error"); goto end; case SPOOLES_NONSYMMETRIC : InpMtx_nonsym_gmmm(mtxA, zero, vecT, one, vecY1) ; break ; default : fprintf(msgFile, "\n TFQMRL Matrix type wrong"); fprintf(msgFile, "\n Fatal error"); goto end; } /* */ FrontMtx_solve(Precond, vecV, vecT, Precond->manager, cpus, msglvl, msgFile) ; /* */ Imv++; DenseMtx_colCopy(vecU1, 0, vecV, 0); /* */ Theta = 0.0; Eta = 0.0; Tau = Init_norm ; Rho = Tau * Tau ; /* ------------------------------ TFQMRL Iteration start ------------------------------ */ MARKTIME(t1) ; while ( Iter <= itermax ) { Iter++; DenseMtx_colDotProduct(vecV, 0, vecR, 0, &Sigma); if (Sigma == 0){ fprintf(msgFile, "\n\n Fatal Error, \n" " TFQMRL Breakdown, Sigma = 0 !!") ; Imv = -1; goto end; }; Alpha = Rho/Sigma; /* ---------------- Odd step --------------- */ m = 2 * Iter - 1; Rtmp=-Alpha; DenseMtx_colGenAxpy(one, vecW, 0, &Rtmp, vecU1, 0); Rtmp = Theta * Theta * Eta / Alpha ; DenseMtx_colGenAxpy(&Rtmp, vecD, 0, one, vecY1, 0); Theta = DenseMtx_twoNormOfColumn(vecW,0)/Tau; Cee = 1.0/sqrt(1.0 + Theta*Theta); Tau = Tau * Theta * Cee ; Eta = Cee * Cee * Alpha ; DenseMtx_colGenAxpy(one, vecX, 0, &Eta, vecD, 0); fprintf(msgFile, "\n\n Odd step at %d", Imv); fprintf(msgFile, " \n Tau is : %7.3e", Tau) ; /* Debug purpose: Check the convergence history for the true residual norm */ /* DenseMtx_zero(vecT) ; InpMtx_nonsym_mmm(mtxA, vecT, one, vecX) ; DenseMtx_sub(vecT, mtxB) ; Rtmp = DenseMtx_twoNormOfColumn(vecT,0); fprintf(msgFile, "\n TFQMRL Residual norm: %6.2e ", Rtmp) ; */ /* ---------------- Convergence Test --------------- */ if (Tau * sqrt(m + 1) <= error_trol ) { /* */ DenseMtx_colCopy(mtxX, 0, vecX, 0); /* DenseMtx_zero(vecT) ; InpMtx_nonsym_mmm(mtxA, vecT, one, mtxX) ; */ switch ( symmetryflag ) { case SPOOLES_SYMMETRIC : InpMtx_sym_gmmm(mtxA, zero, vecT, one, mtxX) ; break ; case SPOOLES_HERMITIAN : fprintf(msgFile, "\n TFQMRL Matrix type wrong"); fprintf(msgFile, "\n Fatal error"); goto end; case SPOOLES_NONSYMMETRIC : InpMtx_nonsym_gmmm(mtxA, zero, vecT, one, mtxX) ; break ; default : fprintf(msgFile, "\n TFQMRL Matrix type wrong"); fprintf(msgFile, "\n Fatal error"); goto end; } DenseMtx_sub(vecT, mtxB) ; Rtmp = DenseMtx_twoNormOfColumn(vecT,0); fprintf(msgFile, "\n TFQMRL Residual norm: %6.2e ", Rtmp) ; MARKTIME(t2) ; fprintf(msgFile, "\n CPU : Converges in time: %8.3f ", t2 - t1) ; fprintf(msgFile, "\n # iterations = %d", Imv) ; fprintf(msgFile, "\n\n after TFQMRL") ; goto end; }; /* ---------------- Even step --------------- */ DenseMtx_colCopy(vecY2, 0, vecY1, 0); Rtmp=-Alpha; DenseMtx_colGenAxpy(one, vecY2, 0, &Rtmp, vecV, 0); /* DenseMtx_zero(vecT) ; InpMtx_nonsym_mmm(mtxA, vecT, one, vecY2) ; */ switch ( symmetryflag ) { case SPOOLES_SYMMETRIC : InpMtx_sym_gmmm(mtxA, zero, vecT, one, vecY2) ; break ; case SPOOLES_HERMITIAN : fprintf(msgFile, "\n TFQMRL Matrix type wrong"); fprintf(msgFile, "\n Fatal error"); goto end; case SPOOLES_NONSYMMETRIC : InpMtx_nonsym_gmmm(mtxA, zero, vecT, one, vecY2) ; break ; default : fprintf(msgFile, "\n TFQMRL Matrix type wrong"); fprintf(msgFile, "\n Fatal error"); goto end; } FrontMtx_solve(Precond, vecU2, vecT, Precond->manager, cpus, msglvl, msgFile) ; Imv++; m = 2 * Iter ; Rtmp = -Alpha; DenseMtx_colGenAxpy(one, vecW, 0, &Rtmp, vecU2, 0); Rtmp = Theta * Theta * Eta / Alpha ; DenseMtx_colGenAxpy(&Rtmp, vecD, 0, one, vecY2, 0); Theta = DenseMtx_twoNormOfColumn(vecW,0)/Tau; Cee = 1.0/sqrt(1.0 + Theta*Theta); Tau = Tau * Theta * Cee ; Eta = Cee * Cee * Alpha ; DenseMtx_colGenAxpy(one, vecX, 0, &Eta, vecD, 0); fprintf(msgFile, "\n\n Even step at %d", Imv) ; /* ---------------- Convergence Test for even step --------------- */ if (Tau * sqrt(m + 1) <= error_trol ) { DenseMtx_colCopy(mtxX, 0, vecX, 0); /* DenseMtx_zero(vecT) ; InpMtx_nonsym_mmm(mtxA, vecT, one, mtxX) ; */ switch ( symmetryflag ) { case SPOOLES_SYMMETRIC : InpMtx_sym_gmmm(mtxA, zero, vecT, one, mtxX) ; break ; case SPOOLES_HERMITIAN : fprintf(msgFile, "\n TFQMRL Matrix type wrong"); fprintf(msgFile, "\n Fatal error"); goto end; case SPOOLES_NONSYMMETRIC : InpMtx_nonsym_gmmm(mtxA, zero, vecT, one, mtxX) ; break ; default : fprintf(msgFile, "\n TFQMRL Matrix type wrong"); fprintf(msgFile, "\n Fatal error"); goto end; } DenseMtx_sub(vecT, mtxB) ; Rtmp = DenseMtx_twoNormOfColumn(vecT,0); fprintf(msgFile, "\n TFQMRL Residual norm: %6.2e ", Rtmp) ; MARKTIME(t2) ; fprintf(msgFile, "\n CPU : Converges in time: %8.3f ", t2 - t1) ; fprintf(msgFile, "\n # iterations = %d", Imv) ; fprintf(msgFile, "\n\n after TFQMRL") ; goto end; }; if (Rho == 0){ fprintf(msgFile, "\n\n Fatal Error, \n" " TFQMRL Breakdown, Rho = 0 !!") ; Imv = -1; goto end; }; DenseMtx_colDotProduct(vecW, 0, vecR, 0, &Rho_new); Beta = Rho_new / Rho; Rho = Rho_new ; DenseMtx_colCopy(vecY1, 0, vecY2, 0); DenseMtx_colGenAxpy(&Beta, vecY1, 0, one, vecW, 0); /* DenseMtx_zero(vecT) ; InpMtx_nonsym_mmm(mtxA, vecT, one, vecY1) ; */ switch ( symmetryflag ) { case SPOOLES_SYMMETRIC : InpMtx_sym_gmmm(mtxA, zero, vecT, one, vecY1) ; break ; case SPOOLES_HERMITIAN : fprintf(msgFile, "\n TFQMRL Matrix type wrong"); fprintf(msgFile, "\n Fatal error"); goto end; case SPOOLES_NONSYMMETRIC : InpMtx_nonsym_gmmm(mtxA, zero, vecT, one, vecY1) ; break ; default : fprintf(msgFile, "\n TFQMRL Matrix type wrong"); fprintf(msgFile, "\n Fatal error"); goto end; } FrontMtx_solve(Precond, vecU1, vecT, Precond->manager, cpus, msglvl, msgFile) ; Imv++; /* */ DenseMtx_colCopy(vecT, 0, vecU2, 0); DenseMtx_colGenAxpy(one, vecT, 0, &Beta, vecV, 0); DenseMtx_colCopy(vecV, 0, vecT, 0); DenseMtx_colGenAxpy(&Beta, vecV, 0, one, vecU1, 0); Rtmp = Tau*sqrt(m + 1)/Init_norm ; fprintf(msgFile, "\n\n At iteration %d" " the convergence ratio is %12.4e", Imv, Rtmp) ; } /* End of while loop */ MARKTIME(t2) ; fprintf(msgFile, "\n CPU : Total iteration time is : %8.3f ", t2 - t1) ; fprintf(msgFile, "\n # iterations = %d", Imv) ; fprintf(msgFile, "\n\n TFQMRL did not Converge !") ; fprintf(msgFile, "\n\n after TFQMRL") ; DenseMtx_colCopy(mtxX, 0, vecX, 0); /* ------------------------ free the working storage ------------------------ */ end: DenseMtx_free(vecD) ; DenseMtx_free(vecR) ; DenseMtx_free(vecT) ; DenseMtx_free(vecU1) ; DenseMtx_free(vecU2) ; DenseMtx_free(vecV) ; DenseMtx_free(vecW) ; DenseMtx_free(vecX) ; DenseMtx_free(vecY1) ; DenseMtx_free(vecY2) ; fprintf(msgFile, "\n") ; return(Imv) ; }
/*--------------------------------------------------------------------*/ int main ( int argc, char *argv[] ) /* ------------------------------------------------------ (1) read in an ETree object. (2) read in an Graph object. (3) find the optimal domain/schur complement partition for a semi-implicit factorization created -- 96oct03, cca ------------------------------------------------------ */ { char *inETreeFileName, *inGraphFileName, *outIVfileName ; double alpha, nA21, nfent1, nfops1, nL11, nL22, nPhi, nV, t1, t2 ; Graph *graph ; int ii, inside, J, K, msglvl, nfind1, nfront, nJ, nleaves1, nnode1, nvtx, rc, sizeJ, totalgain, vsize, v, w ; int *adjJ, *compids, *nodwghts, *vadj, *vtxToFront, *vwghts ; IV *compidsIV ; IVL *symbfacIVL ; ETree *etree ; FILE *msgFile ; Tree *tree ; if ( argc != 7 ) { fprintf(stdout, "\n\n usage : %s msglvl msgFile inETreeFile inGraphFile alpha" "\n outIVfile " "\n msglvl -- message level" "\n msgFile -- message file" "\n inETreeFile -- input file, must be *.etreef or *.etreeb" "\n inGraphFile -- input file, must be *.graphf or *.graphb" "\n alpha -- weight parameter" "\n alpha = 0 --> minimize storage" "\n alpha = 1 --> minimize solve ops" "\n outIVfile -- output file for oldToNew vector," "\n must be *.ivf or *.ivb" "\n", argv[0]) ; return(0) ; } msglvl = atoi(argv[1]) ; if ( strcmp(argv[2], "stdout") == 0 ) { msgFile = stdout ; } else if ( (msgFile = fopen(argv[2], "a")) == NULL ) { fprintf(stderr, "\n fatal error in %s" "\n unable to open file %s\n", argv[0], argv[2]) ; return(-1) ; } inETreeFileName = argv[3] ; inGraphFileName = argv[4] ; alpha = atof(argv[5]) ; outIVfileName = argv[6] ; fprintf(msgFile, "\n %s " "\n msglvl -- %d" "\n msgFile -- %s" "\n inETreeFile -- %s" "\n inGraphFile -- %s" "\n alpha -- %f" "\n outIVfile -- %s" "\n", argv[0], msglvl, argv[2], inETreeFileName, inGraphFileName, alpha, outIVfileName) ; fflush(msgFile) ; /* ------------------------ read in the ETree object ------------------------ */ if ( strcmp(inETreeFileName, "none") == 0 ) { fprintf(msgFile, "\n no file to read from") ; spoolesFatal(); } etree = ETree_new() ; MARKTIME(t1) ; rc = ETree_readFromFile(etree, inETreeFileName) ; MARKTIME(t2) ; fprintf(msgFile, "\n CPU %9.5f : read in etree from file %s", t2 - t1, inETreeFileName) ; if ( rc != 1 ) { fprintf(msgFile, "\n return value %d from ETree_readFromFile(%p,%s)", rc, etree, inETreeFileName) ; spoolesFatal(); } ETree_leftJustify(etree) ; fprintf(msgFile, "\n\n after reading ETree object from file %s", inETreeFileName) ; if ( msglvl > 2 ) { ETree_writeForHumanEye(etree, msgFile) ; } else { ETree_writeStats(etree, msgFile) ; } fflush(msgFile) ; nfront = ETree_nfront(etree) ; tree = ETree_tree(etree) ; nodwghts = ETree_nodwghts(etree) ; vtxToFront = ETree_vtxToFront(etree) ; /* ------------------------ read in the Graph object ------------------------ */ if ( strcmp(inGraphFileName, "none") == 0 ) { fprintf(msgFile, "\n no file to read from") ; spoolesFatal(); } graph = Graph_new() ; MARKTIME(t1) ; rc = Graph_readFromFile(graph, inGraphFileName) ; nvtx = graph->nvtx ; vwghts = graph->vwghts ; MARKTIME(t2) ; fprintf(msgFile, "\n CPU %9.5f : read in graph from file %s", t2 - t1, inGraphFileName) ; if ( rc != 1 ) { fprintf(msgFile, "\n return value %d from Graph_readFromFile(%p,%s)", rc, graph, inGraphFileName) ; spoolesFatal(); } fprintf(msgFile, "\n\n after reading Graph object from file %s", inGraphFileName) ; if ( msglvl > 2 ) { Graph_writeForHumanEye(graph, msgFile) ; } else { Graph_writeStats(graph, msgFile) ; } fflush(msgFile) ; /* ---------------------- compute the statistics ---------------------- */ nnode1 = etree->tree->n ; nfind1 = ETree_nFactorIndices(etree) ; nfent1 = ETree_nFactorEntries(etree, SPOOLES_SYMMETRIC) ; nfops1 = ETree_nFactorOps(etree, SPOOLES_REAL, SPOOLES_SYMMETRIC) ; nleaves1 = Tree_nleaves(etree->tree) ; fprintf(stdout, "\n root front %d has %d vertices", etree->tree->root, etree->nodwghtsIV->vec[etree->tree->root]) ; /* --------------------------------- create the symbolic factorization --------------------------------- */ symbfacIVL = SymbFac_initFromGraph(etree, graph) ; if ( msglvl > 2 ) { IVL_writeForHumanEye(symbfacIVL, msgFile) ; } else { IVL_writeStats(symbfacIVL, msgFile) ; } fflush(msgFile) ; /* -------------------------- find the optimal partition -------------------------- */ compidsIV = ETree_optPart(etree, graph, symbfacIVL, alpha, &totalgain, msglvl, msgFile) ; if ( msglvl > 2 ) { IV_writeForHumanEye(compidsIV, msgFile) ; } else { IV_writeStats(compidsIV, msgFile) ; } fflush(msgFile) ; compids = IV_entries(compidsIV) ; /* ------------------------------------------------------ compute the number of vertices in the schur complement ------------------------------------------------------ */ for ( J = 0, nPhi = nV = 0. ; J < nfront ; J++ ) { if ( compids[J] == 0 ) { nPhi += nodwghts[J] ; } nV += nodwghts[J] ; } /* -------------------------------------------- compute the number of entries in L11 and L22 -------------------------------------------- */ nL11 = nL22 = 0 ; for ( J = Tree_postOTfirst(tree) ; J != -1 ; J = Tree_postOTnext(tree, J) ) { nJ = nodwghts[J] ; if ( msglvl > 3 ) { fprintf(msgFile, "\n\n front %d, nJ = %d", J, nJ) ; } IVL_listAndSize(symbfacIVL, J, &sizeJ, &adjJ) ; for ( ii = 0, inside = 0 ; ii < sizeJ ; ii++ ) { w = adjJ[ii] ; K = vtxToFront[w] ; if ( msglvl > 3 ) { fprintf(msgFile, "\n w = %d, K = %d", w, K) ; } if ( K > J && compids[K] == compids[J] ) { inside += (vwghts == NULL) ? 1 : vwghts[w] ; if ( msglvl > 3 ) { fprintf(msgFile, ", inside") ; } } } if ( compids[J] != 0 ) { if ( msglvl > 3 ) { fprintf(msgFile, "\n inside = %d, adding %d to L11", inside, nJ*nJ + 2*nJ*inside) ; } nL11 += (nJ*(nJ+1))/2 + nJ*inside ; } else { if ( msglvl > 3 ) { fprintf(msgFile, "\n inside = %d, adding %d to L22", inside, (nJ*(nJ+1))/2 + nJ*inside) ; } nL22 += (nJ*(nJ+1))/2 + nJ*inside ; } } if ( msglvl > 0 ) { fprintf(msgFile, "\n |L| = %.0f, |L11| = %.0f, |L22| = %.0f", nfent1, nL11, nL22) ; } /* ------------------------------------ compute the number of entries in A21 ------------------------------------ */ nA21 = 0 ; if ( vwghts != NULL ) { for ( v = 0 ; v < nvtx ; v++ ) { J = vtxToFront[v] ; if ( compids[J] != 0 ) { Graph_adjAndSize(graph, v, &vsize, &vadj) ; for ( ii = 0 ; ii < vsize ; ii++ ) { w = vadj[ii] ; K = vtxToFront[w] ; if ( compids[K] == 0 ) { if ( msglvl > 3 ) { fprintf(msgFile, "\n A21 : v = %d, w = %d", v, w) ; } nA21 += vwghts[v] * vwghts[w] ; } } } } } else { for ( v = 0 ; v < nvtx ; v++ ) { J = vtxToFront[v] ; if ( compids[J] != 0 ) { Graph_adjAndSize(graph, v, &vsize, &vadj) ; for ( ii = 0 ; ii < vsize ; ii++ ) { w = vadj[ii] ; K = vtxToFront[w] ; if ( compids[K] == 0 ) { if ( msglvl > 3 ) { fprintf(msgFile, "\n A21 : v = %d, w = %d", v, w) ; } nA21++ ; } } } } } if ( msglvl > 0 ) { fprintf(msgFile, "\n |L| = %.0f, |L11| = %.0f, |L22| = %.0f, |A21| = %.0f", nfent1, nL11, nL22, nA21) ; fprintf(msgFile, "\n storage: explicit = %.0f, semi-implicit = %.0f, ratio = %.3f" "\n opcount: explicit = %.0f, semi-implicit = %.0f, ratio = %.3f", nfent1, nL11 + nA21 + nL22, nfent1/(nL11 + nA21 + nL22), 2*nfent1, 4*nL11 + 2*nA21 + 2*nL22, 2*nfent1/(4*nL11 + 2*nA21 + 2*nL22)) ; fprintf(msgFile, "\n ratios %8.3f %8.3f %8.3f", nPhi/nV, nfent1/(nL11 + nA21 + nL22), 2*nfent1/(4*nL11 + 2*nA21 + 2*nL22)) ; } /* ---------------- free the objects ---------------- */ ETree_free(etree) ; Graph_free(graph) ; IVL_free(symbfacIVL) ; fprintf(msgFile, "\n") ; fclose(msgFile) ; return(1) ; }
/*--------------------------------------------------------------------*/ int main ( int argc, char *argv[] ) /* --------------------------------------------------------------- read BPG from file and get the Dulmage-Mendelsohn decomposition created -- 96mar08, cca --------------------------------------------------------------- */ { char *inBPGFileName ; double t1, t2 ; int ierr, msglvl, rc ; int *dmflags, *stats ; BPG *bpg ; FILE *msgFile ; if ( argc != 4 ) { fprintf(stdout, "\n\n usage : %s msglvl msgFile inFile " "\n msglvl -- message level" "\n msgFile -- message file" "\n inFile -- input file, must be *.bpgf or *.bpgb" "\n", argv[0]) ; return(0) ; } msglvl = atoi(argv[1]) ; if ( strcmp(argv[2], "stdout") == 0 ) { msgFile = stdout ; } else if ( (msgFile = fopen(argv[2], "a")) == NULL ) { fprintf(stderr, "\n fatal error in %s" "\n unable to open file %s\n", argv[0], argv[2]) ; return(-1) ; } inBPGFileName = argv[3] ; fprintf(msgFile, "\n %s " "\n msglvl -- %d" "\n msgFile -- %s" "\n inFile -- %s" "\n", argv[0], msglvl, argv[2], inBPGFileName) ; fflush(msgFile) ; /* ---------------------- read in the BPG object ---------------------- */ if ( strcmp(inBPGFileName, "none") == 0 ) { fprintf(msgFile, "\n no file to read from") ; exit(0) ; } bpg = BPG_new() ; MARKTIME(t1) ; rc = BPG_readFromFile(bpg, inBPGFileName) ; MARKTIME(t2) ; fprintf(msgFile, "\n CPU %9.5f : read in graph from file %s", t2 - t1, inBPGFileName) ; if ( rc != 1 ) { fprintf(msgFile, "\n return value %d from BPG_readFromFile(%p,%s)", rc, bpg, inBPGFileName) ; exit(-1) ; } fprintf(msgFile, "\n\n after reading BPG object from file %s", inBPGFileName) ; if ( msglvl > 2 ) { BPG_writeForHumanEye(bpg, msgFile) ; } else { BPG_writeStats(bpg, msgFile) ; } fflush(msgFile) ; /* -------------------------------------------- test out the max flow DMdecomposition method -------------------------------------------- */ dmflags = IVinit(bpg->nX + bpg->nY, -1) ; stats = IVinit(6, 0) ; MARKTIME(t1) ; BPG_DMviaMaxFlow(bpg, dmflags, stats, msglvl, msgFile) ; MARKTIME(t2) ; fprintf(msgFile, "\n\n CPU %9.5f : find DM via maxflow", t2 - t1) ; if ( msglvl > 0 ) { fprintf(msgFile, "\n\n BPG_DMviaMaxFlow" "\n |X_I| = %6d, |X_E| = %6d, |X_R| = %6d" "\n |Y_I| = %6d, |Y_E| = %6d, |Y_R| = %6d", stats[0], stats[1], stats[2], stats[3], stats[4], stats[5]) ; } if ( msglvl > 1 ) { fprintf(msgFile, "\n dmflags") ; IVfp80(msgFile, bpg->nX + bpg->nY, dmflags, 80, &ierr) ; fflush(msgFile) ; } /* ------------------------------------------ test out the matching DMcomposition method ------------------------------------------ */ IVfill(bpg->nX + bpg->nY, dmflags, -1) ; IVfill(6, stats, -1) ; MARKTIME(t1) ; BPG_DMdecomposition(bpg, dmflags, stats, msglvl, msgFile) ; MARKTIME(t2) ; fprintf(msgFile, "\n\n CPU %9.5f : find DM via matching", t2 - t1) ; if ( msglvl > 0 ) { fprintf(msgFile, "\n\n BPG_DMdecomposition" "\n |X_I| = %6d, |X_E| = %6d, |X_R| = %6d" "\n |Y_I| = %6d, |Y_E| = %6d, |Y_R| = %6d", stats[0], stats[1], stats[2], stats[3], stats[4], stats[5]) ; } if ( msglvl > 1 ) { fprintf(msgFile, "\n dmflags") ; IVfp80(msgFile, bpg->nX + bpg->nY, dmflags, 80, &ierr) ; fflush(msgFile) ; } /* ---------------- free the storage ---------------- */ IVfree(dmflags) ; IVfree(stats) ; BPG_free(bpg) ; fprintf(msgFile, "\n") ; fclose(msgFile) ; return(1) ; }
/*--------------------------------------------------------------------*/ int main ( int argc, char *argv[] ) /* ------------------------------------ test the copyEntriesToVector routine created -- 98may01, cca, ------------------------------------ */ { Chv *chvJ, *chvI ; double imag, real, t1, t2 ; double *dvec, *entries ; Drand *drand ; FILE *msgFile ; int count, first, ierr, ii, iilast, ipivot, irow, jcol, jj, jjlast, maxnent, mm, msglvl, ncol, nD, nent, nentD, nentL, nentL11, nentL21, nentU, nentU11, nentU12, nL, npivot, nrow, nU, pivotingflag, seed, storeflag, symflag, total, type ; int *colind, *pivotsizes, *rowind ; if ( argc != 10 ) { fprintf(stdout, "\n\n usage : %s msglvl msgFile nD nU type symflag " "\n pivotingflag storeflag seed" "\n msglvl -- message level" "\n msgFile -- message file" "\n nD -- # of rows and columns in the (1,1) block" "\n nU -- # of columns in the (1,2) block" "\n type -- entries type" "\n 1 --> real" "\n 2 --> complex" "\n symflag -- symmetry flag" "\n 0 --> symmetric" "\n 1 --> nonsymmetric" "\n pivotingflag -- pivoting flag" "\n if symflag = 1 and pivotingflag = 1 then" "\n construct pivotsizes[] vector" "\n endif" "\n storeflag -- flag to denote how to store entries" "\n 0 --> store by rows" "\n 1 --> store by columns" "\n seed -- random number seed" "\n", argv[0]) ; return(0) ; } if ( (msglvl = atoi(argv[1])) < 0 ) { fprintf(stderr, "\n message level must be positive\n") ; exit(-1) ; } if ( strcmp(argv[2], "stdout") == 0 ) { msgFile = stdout ; } else if ( (msgFile = fopen(argv[2], "a")) == NULL ) { fprintf(stderr, "\n unable to open file %s\n", argv[2]) ; return(-1) ; } nD = atoi(argv[3]) ; nU = atoi(argv[4]) ; type = atoi(argv[5]) ; symflag = atoi(argv[6]) ; pivotingflag = atoi(argv[7]) ; storeflag = atoi(argv[8]) ; seed = atoi(argv[9]) ; if ( msglvl > 0 ) { switch ( storeflag ) { case 0 : fprintf(msgFile, "\n\n %% STORE BY ROWS") ; break ; case 1 : fprintf(msgFile, "\n\n %% STORE BY COLUMNS") ; break ; default : fprintf(stderr, "\n bad value %d for storeflag", storeflag) ; break ; } } nL = nU ; if ( symflag == SPOOLES_NONSYMMETRIC ) { pivotingflag = 0 ; } /* -------------------------------------- initialize the random number generator -------------------------------------- */ drand = Drand_new() ; Drand_init(drand) ; Drand_setNormal(drand, 0.0, 1.0) ; Drand_setSeed(drand, seed) ; /* -------------------------- initialize the chvJ object -------------------------- */ MARKTIME(t1) ; chvJ = Chv_new() ; Chv_init(chvJ, 0, nD, nL, nU, type, symflag) ; MARKTIME(t2) ; fprintf(msgFile, "\n %% CPU : %.3f to initialize matrix objects", t2 - t1) ; nent = Chv_nent(chvJ) ; entries = Chv_entries(chvJ) ; if ( CHV_IS_REAL(chvJ) ) { Drand_fillDvector(drand, nent, entries) ; } else if ( CHV_IS_COMPLEX(chvJ) ) { Drand_fillDvector(drand, 2*nent, entries) ; } Chv_columnIndices(chvJ, &ncol, &colind) ; IVramp(ncol, colind, 0, 1) ; if ( CHV_IS_NONSYMMETRIC(chvJ) ) { Chv_rowIndices(chvJ, &nrow, &rowind) ; IVramp(nrow, rowind, 0, 1) ; } if ( msglvl > 3 ) { fprintf(msgFile, "\n %% chevron a") ; Chv_writeForMatlab(chvJ, "a", msgFile) ; fflush(msgFile) ; } /* -------------------------- initialize the chvI object -------------------------- */ MARKTIME(t1) ; chvI = Chv_new() ; Chv_init(chvI, 0, nD, nL, nU, type, symflag) ; MARKTIME(t2) ; fprintf(msgFile, "\n %% CPU : %.3f to initialize matrix objects", t2 - t1) ; Chv_zero(chvI) ; Chv_columnIndices(chvI, &ncol, &colind) ; IVramp(ncol, colind, 0, 1) ; if ( CHV_IS_NONSYMMETRIC(chvI) ) { Chv_rowIndices(chvI, &nrow, &rowind) ; IVramp(nrow, rowind, 0, 1) ; } if ( symflag == 0 && pivotingflag == 1 ) { /* ------------------------------ create the pivotsizes[] vector ------------------------------ */ Drand_setUniform(drand, 1, 2.999) ; pivotsizes = IVinit(nD, 0) ; Drand_fillIvector(drand, nD, pivotsizes) ; /* fprintf(msgFile, "\n initial pivotsizes[] : ") ; IVfp80(msgFile, nD, pivotsizes, 80, &ierr) ; */ for ( npivot = count = 0 ; npivot < nD ; npivot++ ) { count += pivotsizes[npivot] ; if ( count > nD ) { pivotsizes[npivot]-- ; count-- ; } if ( count == nD ) { break ; } } npivot++ ; /* fprintf(msgFile, "\n final pivotsizes[] : ") ; IVfp80(msgFile, npivot, pivotsizes, 80, &ierr) ; */ } else { npivot = 0 ; pivotsizes = NULL ; } /* -------------------------------------------------- first test: copy lower, diagonal and upper entries -------------------------------------------------- */ if ( CHV_IS_NONSYMMETRIC(chvJ) ) { nentL = Chv_countEntries(chvJ, npivot, pivotsizes, CHV_STRICT_LOWER); } else { nentL = 0 ; } nentD = Chv_countEntries(chvJ, npivot, pivotsizes, CHV_DIAGONAL) ; nentU = Chv_countEntries(chvJ, npivot, pivotsizes, CHV_STRICT_UPPER) ; maxnent = nentL ; if ( maxnent < nentD ) { maxnent = nentD ; } if ( maxnent < nentU ) { maxnent = nentU ; } if ( CHV_IS_REAL(chvJ) ) { dvec = DVinit(maxnent, 0.0) ; } else if ( CHV_IS_COMPLEX(chvJ) ) { dvec = DVinit(2*maxnent, 0.0) ; } if ( CHV_IS_NONSYMMETRIC(chvJ) ) { /* -------------------------------------- copy the entries in the lower triangle, then move into the chvI object -------------------------------------- */ nent = Chv_copyEntriesToVector(chvJ, npivot, pivotsizes, maxnent, dvec, CHV_STRICT_LOWER, storeflag) ; if ( nent != nentL ) { fprintf(stderr, "\n error: nentL = %d, nent = %d", nentL, nent) ; exit(-1) ; } if ( storeflag == 0 ) { for ( irow = 0, mm = 0 ; irow < nrow ; irow++ ) { jjlast = (irow < nD) ? irow - 1 : nD - 1 ; for ( jj = 0 ; jj <= jjlast ; jj++, mm++ ) { if ( CHV_IS_REAL(chvJ) ) { real = dvec[mm] ; Chv_setRealEntry(chvI, irow, jj, real) ; } else if ( CHV_IS_COMPLEX(chvJ) ) { real = dvec[2*mm] ; imag = dvec[2*mm+1] ; Chv_setComplexEntry(chvI, irow, jj, real, imag) ; } } } } else { for ( jcol = 0, mm = 0 ; jcol < nD ; jcol++ ) { for ( irow = jcol + 1 ; irow < nrow ; irow++, mm++ ) { if ( CHV_IS_REAL(chvJ) ) { real = dvec[mm] ; Chv_setRealEntry(chvI, irow, jcol, real) ; } else if ( CHV_IS_COMPLEX(chvJ) ) { real = dvec[2*mm] ; imag = dvec[2*mm+1] ; /* fprintf(msgFile, "\n %% mm = %d, a(%d,%d) = %20.12e + %20.12e*i", mm, irow, jcol, real, imag) ; */ Chv_setComplexEntry(chvI, irow, jcol, real, imag) ; } } } } } /* --------------------------------------- copy the entries in the diagonal matrix then move into the chvI object --------------------------------------- */ nent = Chv_copyEntriesToVector(chvJ, npivot, pivotsizes, maxnent, dvec, CHV_DIAGONAL, storeflag) ; if ( nent != nentD ) { fprintf(stderr, "\n error: nentD = %d, nent = %d", nentD, nent) ; exit(-1) ; } if ( pivotsizes == NULL ) { for ( jcol = 0, mm = 0 ; jcol < nD ; jcol++, mm++ ) { if ( CHV_IS_REAL(chvJ) ) { real = dvec[mm] ; Chv_setRealEntry(chvI, jcol, jcol, real) ; } else if ( CHV_IS_COMPLEX(chvJ) ) { real = dvec[2*mm] ; imag = dvec[2*mm+1] ; Chv_setComplexEntry(chvI, jcol, jcol, real, imag) ; } } } else { for ( ipivot = irow = mm = 0 ; ipivot < npivot ; ipivot++ ) { if ( pivotsizes[ipivot] == 1 ) { if ( CHV_IS_REAL(chvJ) ) { real = dvec[mm] ; Chv_setRealEntry(chvI, irow, irow, real) ; } else if ( CHV_IS_COMPLEX(chvJ) ) { real = dvec[2*mm] ; imag = dvec[2*mm+1] ; Chv_setComplexEntry(chvI, irow, irow, real, imag) ; } mm++ ; irow++ ; } else { if ( CHV_IS_REAL(chvJ) ) { real = dvec[mm] ; Chv_setRealEntry(chvI, irow, irow, real) ; mm++ ; real = dvec[mm] ; Chv_setRealEntry(chvI, irow, irow+1, real) ; mm++ ; real = dvec[mm] ; Chv_setRealEntry(chvI, irow+1, irow+1, real) ; mm++ ; irow += 2 ; } else if ( CHV_IS_COMPLEX(chvJ) ) { real = dvec[2*mm] ; imag = dvec[2*mm+1] ; Chv_setComplexEntry(chvI, irow, irow, real, imag) ; mm++ ; real = dvec[2*mm] ; imag = dvec[2*mm+1] ; Chv_setComplexEntry(chvI, irow, irow+1, real, imag) ; mm++ ; real = dvec[2*mm] ; imag = dvec[2*mm+1] ; Chv_setComplexEntry(chvI, irow+1, irow+1, real, imag) ; mm++ ; irow += 2 ; } } } } /* -------------------------------------- copy the entries in the upper triangle, then move into the chvI object -------------------------------------- */ nent = Chv_copyEntriesToVector(chvJ, npivot, pivotsizes, maxnent, dvec, CHV_STRICT_UPPER, storeflag) ; if ( nent != nentU ) { fprintf(stderr, "\n error: nentU = %d, nent = %d", nentU, nent) ; exit(-1) ; } if ( storeflag == 1 ) { if ( pivotsizes == NULL ) { for ( jcol = mm = 0 ; jcol < ncol ; jcol++ ) { iilast = (jcol < nD) ? jcol - 1 : nD - 1 ; for ( ii = 0 ; ii <= iilast ; ii++, mm++ ) { if ( CHV_IS_REAL(chvJ) ) { real = dvec[mm] ; Chv_setRealEntry(chvI, ii, jcol, real) ; } else if ( CHV_IS_COMPLEX(chvJ) ) { real = dvec[2*mm] ; imag = dvec[2*mm+1] ; Chv_setComplexEntry(chvI, ii, jcol, real, imag) ; } } } } else { for ( ipivot = jcol = mm = 0 ; ipivot < npivot ; ipivot++ ) { iilast = jcol - 1 ; for ( ii = 0 ; ii <= iilast ; ii++, mm++ ) { if ( CHV_IS_REAL(chvJ) ) { real = dvec[mm] ; Chv_setRealEntry(chvI, ii, jcol, real) ; } else if ( CHV_IS_COMPLEX(chvJ) ) { real = dvec[2*mm] ; imag = dvec[2*mm+1] ; Chv_setComplexEntry(chvI, ii, jcol, real, imag) ; } } jcol++ ; if ( pivotsizes[ipivot] == 2 ) { for ( ii = 0 ; ii <= iilast ; ii++, mm++ ) { if ( CHV_IS_REAL(chvJ) ) { real = dvec[mm] ; Chv_setRealEntry(chvI, ii, jcol, real) ; } else if ( CHV_IS_COMPLEX(chvJ) ) { real = dvec[2*mm] ; imag = dvec[2*mm+1] ; Chv_setComplexEntry(chvI, ii, jcol, real, imag) ; } } jcol++ ; } } for ( jcol = nD ; jcol < ncol ; jcol++ ) { for ( irow = 0 ; irow < nD ; irow++, mm++ ) { if ( CHV_IS_REAL(chvJ) ) { real = dvec[mm] ; Chv_setRealEntry(chvI, irow, jcol, real) ; } else if ( CHV_IS_COMPLEX(chvJ) ) { real = dvec[2*mm] ; imag = dvec[2*mm+1] ; Chv_setComplexEntry(chvI, irow, jcol, real, imag) ; } } } } } else { if ( pivotsizes == NULL ) { for ( irow = mm = 0 ; irow < nD ; irow++ ) { for ( jcol = irow + 1 ; jcol < ncol ; jcol++, mm++ ) { if ( CHV_IS_REAL(chvJ) ) { real = dvec[mm] ; Chv_setRealEntry(chvI, irow, jcol, real) ; } else if ( CHV_IS_COMPLEX(chvJ) ) { real = dvec[2*mm] ; imag = dvec[2*mm+1] ; Chv_setComplexEntry(chvI, irow, jcol, real, imag) ; } } } } else { for ( ipivot = irow = mm = 0 ; ipivot < npivot ; ipivot++ ) { if ( pivotsizes[ipivot] == 1 ) { for ( jcol = irow + 1 ; jcol < ncol ; jcol++, mm++ ) { if ( CHV_IS_REAL(chvJ) ) { real = dvec[mm] ; Chv_setRealEntry(chvI, irow, jcol, real) ; } else if ( CHV_IS_COMPLEX(chvJ) ) { real = dvec[2*mm] ; imag = dvec[2*mm+1] ; Chv_setComplexEntry(chvI, irow, jcol, real, imag) ; } } irow++ ; } else { for ( jcol = irow + 2 ; jcol < ncol ; jcol++, mm++ ) { if ( CHV_IS_REAL(chvJ) ) { real = dvec[mm] ; Chv_setRealEntry(chvI, irow, jcol, real) ; } else if ( CHV_IS_COMPLEX(chvJ) ) { real = dvec[2*mm] ; imag = dvec[2*mm+1] ; Chv_setComplexEntry(chvI, irow, jcol, real, imag) ; } } for ( jcol = irow + 2 ; jcol < ncol ; jcol++, mm++ ) { if ( CHV_IS_REAL(chvJ) ) { real = dvec[mm] ; Chv_setRealEntry(chvI, irow+1, jcol, real) ; } else if ( CHV_IS_COMPLEX(chvJ) ) { real = dvec[2*mm] ; imag = dvec[2*mm+1] ; Chv_setComplexEntry(chvI, irow+1, jcol, real, imag) ; } } irow += 2 ; } } } } if ( msglvl > 3 ) { fprintf(msgFile, "\n %% chevron b") ; Chv_writeForMatlab(chvI, "b", msgFile) ; fprintf(msgFile, "\n\n emtx1 = abs(a - b) ; enorm1 = max(max(emtx1))") ; fflush(msgFile) ; } DVfree(dvec) ; /* ----------------------------------------------------- second test: copy lower (1,1), lower (2,1), diagonal, upper(1,1) and upper(1,2) blocks ----------------------------------------------------- */ if ( CHV_IS_NONSYMMETRIC(chvJ) ) { nentL11 = Chv_countEntries(chvJ, npivot, pivotsizes, CHV_STRICT_LOWER_11) ; nentL21 = Chv_countEntries(chvJ, npivot, pivotsizes, CHV_LOWER_21) ; } else { nentL11 = 0 ; nentL21 = 0 ; } nentD = Chv_countEntries(chvJ, npivot, pivotsizes, CHV_DIAGONAL) ; nentU11 = Chv_countEntries(chvJ, npivot, pivotsizes, CHV_STRICT_UPPER_11) ; nentU12 = Chv_countEntries(chvJ, npivot, pivotsizes, CHV_UPPER_12) ; maxnent = nentL11 ; if ( maxnent < nentL21 ) { maxnent = nentL21 ; } if ( maxnent < nentD ) { maxnent = nentD ; } if ( maxnent < nentU11 ) { maxnent = nentU11 ; } if ( maxnent < nentU12 ) { maxnent = nentU12 ; } fprintf(msgFile, "\n %% nentL11 = %d, nentL21 = %d" "\n %% nentD = %d, nentU11 = %d, nentU12 = %d", nentL11, nentL21, nentD, nentU11, nentU12) ; if ( CHV_IS_REAL(chvJ) ) { dvec = DVinit(maxnent, 0.0) ; } else if ( CHV_IS_COMPLEX(chvJ) ) { dvec = DVinit(2*maxnent, 0.0) ; } Chv_zero(chvI) ; if ( CHV_IS_NONSYMMETRIC(chvJ) ) { /* ------------------------------------------ copy the entries in the lower (1,1) block, then move into the chvI object ------------------------------------------ */ nent = Chv_copyEntriesToVector(chvJ, npivot, pivotsizes, maxnent, dvec, CHV_STRICT_LOWER_11, storeflag) ; if ( nent != nentL11 ) { fprintf(stderr, "\n error: nentL = %d, nent = %d", nentL, nent) ; exit(-1) ; } if ( storeflag == 0 ) { for ( irow = 0, mm = 0 ; irow < nD ; irow++ ) { jjlast = (irow < nD) ? irow - 1 : nD - 1 ; for ( jj = 0 ; jj <= jjlast ; jj++, mm++ ) { if ( CHV_IS_REAL(chvJ) ) { real = dvec[mm] ; Chv_setRealEntry(chvI, irow, jj, real) ; } else if ( CHV_IS_COMPLEX(chvJ) ) { real = dvec[2*mm] ; imag = dvec[2*mm+1] ; Chv_setComplexEntry(chvI, irow, jj, real, imag) ; } } } } else { for ( jcol = 0, mm = 0 ; jcol < nD ; jcol++ ) { for ( irow = jcol + 1 ; irow < nD ; irow++, mm++ ) { if ( CHV_IS_REAL(chvJ) ) { real = dvec[mm] ; Chv_setRealEntry(chvI, irow, jcol, real) ; } else if ( CHV_IS_COMPLEX(chvJ) ) { real = dvec[2*mm] ; imag = dvec[2*mm+1] ; Chv_setComplexEntry(chvI, irow, jcol, real, imag) ; } } } } /* ------------------------------------------ copy the entries in the lower (2,1) block, then move into the chvI object ------------------------------------------ */ nent = Chv_copyEntriesToVector(chvJ, npivot, pivotsizes, maxnent, dvec, CHV_LOWER_21, storeflag); if ( nent != nentL21 ) { fprintf(stderr, "\n error: nentL21 = %d, nent = %d", nentL21, nent) ; exit(-1) ; } if ( storeflag == 0 ) { for ( irow = nD, mm = 0 ; irow < nrow ; irow++ ) { for ( jcol = 0 ; jcol < nD ; jcol++, mm++ ) { if ( CHV_IS_REAL(chvJ) ) { real = dvec[mm] ; Chv_setRealEntry(chvI, irow, jcol, real) ; } else if ( CHV_IS_COMPLEX(chvJ) ) { real = dvec[2*mm] ; imag = dvec[2*mm+1] ; Chv_setComplexEntry(chvI, irow, jcol, real, imag) ; } } } } else { for ( jcol = 0, mm = 0 ; jcol < nD ; jcol++ ) { for ( irow = nD ; irow < nrow ; irow++, mm++ ) { if ( CHV_IS_REAL(chvJ) ) { real = dvec[mm] ; Chv_setRealEntry(chvI, irow, jcol, real) ; } else if ( CHV_IS_COMPLEX(chvJ) ) { real = dvec[2*mm] ; imag = dvec[2*mm+1] ; Chv_setComplexEntry(chvI, irow, jcol, real, imag) ; } } } } } /* --------------------------------------- copy the entries in the diagonal matrix then move into the chvI object --------------------------------------- */ nent = Chv_copyEntriesToVector(chvJ, npivot, pivotsizes, maxnent, dvec, CHV_DIAGONAL, storeflag) ; if ( nent != nentD ) { fprintf(stderr, "\n error: nentD = %d, nent = %d", nentD, nent) ; exit(-1) ; } if ( pivotsizes == NULL ) { for ( jcol = 0, mm = 0 ; jcol < nD ; jcol++, mm++ ) { if ( CHV_IS_REAL(chvJ) ) { real = dvec[mm] ; Chv_setRealEntry(chvI, jcol, jcol, real) ; } else if ( CHV_IS_COMPLEX(chvJ) ) { real = dvec[2*mm] ; imag = dvec[2*mm+1] ; Chv_setComplexEntry(chvI, jcol, jcol, real, imag) ; } } } else { for ( ipivot = irow = mm = 0 ; ipivot < npivot ; ipivot++ ) { if ( pivotsizes[ipivot] == 1 ) { if ( CHV_IS_REAL(chvJ) ) { real = dvec[mm] ; Chv_setRealEntry(chvI, irow, irow, real) ; } else if ( CHV_IS_COMPLEX(chvJ) ) { real = dvec[2*mm] ; imag = dvec[2*mm+1] ; Chv_setComplexEntry(chvI, irow, irow, real, imag) ; } mm++ ; irow++ ; } else { if ( CHV_IS_REAL(chvJ) ) { real = dvec[mm] ; Chv_setRealEntry(chvI, irow, irow, real) ; mm++ ; real = dvec[mm] ; Chv_setRealEntry(chvI, irow, irow+1, real) ; mm++ ; real = dvec[mm] ; Chv_setRealEntry(chvI, irow+1, irow+1, real) ; mm++ ; } else if ( CHV_IS_COMPLEX(chvJ) ) { real = dvec[2*mm] ; imag = dvec[2*mm+1] ; Chv_setComplexEntry(chvI, irow, irow, real, imag) ; mm++ ; real = dvec[2*mm] ; imag = dvec[2*mm+1] ; Chv_setComplexEntry(chvI, irow, irow+1, real, imag) ; mm++ ; real = dvec[2*mm] ; imag = dvec[2*mm+1] ; Chv_setComplexEntry(chvI, irow+1, irow+1, real, imag) ; mm++ ; } irow += 2 ; } } } /* ----------------------------------------- copy the entries in the upper (1,1) block then move into the chvI object ----------------------------------------- */ nent = Chv_copyEntriesToVector(chvJ, npivot, pivotsizes, maxnent, dvec, CHV_STRICT_UPPER_11, storeflag) ; if ( nent != nentU11 ) { fprintf(stderr, "\n error: nentU11 = %d, nent = %d", nentU11, nent) ; exit(-1) ; } if ( storeflag == 1 ) { if ( pivotsizes == NULL ) { for ( jcol = mm = 0 ; jcol < nD ; jcol++ ) { iilast = (jcol < nD) ? jcol - 1 : nD - 1 ; for ( ii = 0 ; ii <= iilast ; ii++, mm++ ) { if ( CHV_IS_REAL(chvJ) ) { real = dvec[mm] ; Chv_setRealEntry(chvI, ii, jcol, real) ; } else if ( CHV_IS_COMPLEX(chvJ) ) { real = dvec[2*mm] ; imag = dvec[2*mm+1] ; Chv_setComplexEntry(chvI, ii, jcol, real, imag) ; } } } } else { for ( ipivot = jcol = mm = 0 ; ipivot < npivot ; ipivot++ ) { iilast = jcol - 1 ; for ( ii = 0 ; ii <= iilast ; ii++, mm++ ) { if ( CHV_IS_REAL(chvJ) ) { real = dvec[mm] ; Chv_setRealEntry(chvI, ii, jcol, real) ; } else if ( CHV_IS_COMPLEX(chvJ) ) { real = dvec[2*mm] ; imag = dvec[2*mm+1] ; Chv_setComplexEntry(chvI, ii, jcol, real, imag) ; } } jcol++ ; if ( pivotsizes[ipivot] == 2 ) { for ( ii = 0 ; ii <= iilast ; ii++, mm++ ) { if ( CHV_IS_REAL(chvJ) ) { real = dvec[mm] ; Chv_setRealEntry(chvI, ii, jcol, real) ; } else if ( CHV_IS_COMPLEX(chvJ) ) { real = dvec[2*mm] ; imag = dvec[2*mm+1] ; Chv_setComplexEntry(chvI, ii, jcol, real, imag) ; } } jcol++ ; } } } } else { if ( pivotsizes == NULL ) { for ( irow = mm = 0 ; irow < nD ; irow++ ) { for ( jcol = irow + 1 ; jcol < nD ; jcol++, mm++ ) { if ( CHV_IS_REAL(chvJ) ) { real = dvec[mm] ; Chv_setRealEntry(chvI, irow, jcol, real) ; } else if ( CHV_IS_COMPLEX(chvJ) ) { real = dvec[2*mm] ; imag = dvec[2*mm+1] ; Chv_setComplexEntry(chvI, irow, jcol, real, imag) ; } } } } else { for ( ipivot = irow = mm = 0 ; ipivot < npivot ; ipivot++ ) { if ( pivotsizes[ipivot] == 1 ) { for ( jcol = irow + 1 ; jcol < nD ; jcol++, mm++ ) { if ( CHV_IS_REAL(chvJ) ) { real = dvec[mm] ; Chv_setRealEntry(chvI, irow, jcol, real) ; } else if ( CHV_IS_COMPLEX(chvJ) ) { real = dvec[2*mm] ; imag = dvec[2*mm+1] ; Chv_setComplexEntry(chvI, irow, jcol, real, imag) ; } } irow++ ; } else { for ( jcol = irow + 2 ; jcol < nD ; jcol++, mm++ ) { if ( CHV_IS_REAL(chvJ) ) { real = dvec[mm] ; Chv_setRealEntry(chvI, irow, jcol, real) ; } else if ( CHV_IS_COMPLEX(chvJ) ) { real = dvec[2*mm] ; imag = dvec[2*mm+1] ; Chv_setComplexEntry(chvI, irow, jcol, real, imag) ; } } for ( jcol = irow + 2 ; jcol < nD ; jcol++, mm++ ) { if ( CHV_IS_REAL(chvJ) ) { real = dvec[mm] ; Chv_setRealEntry(chvI, irow+1, jcol, real) ; } else if ( CHV_IS_COMPLEX(chvJ) ) { real = dvec[2*mm] ; imag = dvec[2*mm+1] ; Chv_setComplexEntry(chvI, irow+1, jcol, real, imag) ; } } irow += 2 ; } } } } /* ----------------------------------------- copy the entries in the upper (1,2) block then move into the chvI object ----------------------------------------- */ nent = Chv_copyEntriesToVector(chvJ, npivot, pivotsizes, maxnent, dvec, CHV_UPPER_12, storeflag) ; if ( nent != nentU12 ) { fprintf(stderr, "\n error: nentU12 = %d, nent = %d", nentU12, nent) ; exit(-1) ; } if ( storeflag == 1 ) { for ( jcol = nD, mm = 0 ; jcol < ncol ; jcol++ ) { for ( irow = 0 ; irow < nD ; irow++, mm++ ) { if ( CHV_IS_REAL(chvJ) ) { real = dvec[mm] ; Chv_setRealEntry(chvI, irow, jcol, real) ; } else if ( CHV_IS_COMPLEX(chvJ) ) { real = dvec[2*mm] ; imag = dvec[2*mm+1] ; Chv_setComplexEntry(chvI, irow, jcol, real, imag) ; } } } } else { for ( irow = mm = 0 ; irow < nD ; irow++ ) { for ( jcol = nD ; jcol < ncol ; jcol++, mm++ ) { if ( CHV_IS_REAL(chvJ) ) { real = dvec[mm] ; Chv_setRealEntry(chvI, irow, jcol, real) ; } else if ( CHV_IS_COMPLEX(chvJ) ) { real = dvec[2*mm] ; imag = dvec[2*mm+1] ; Chv_setComplexEntry(chvI, irow, jcol, real, imag) ; } } } } if ( msglvl > 3 ) { fprintf(msgFile, "\n %% chevron b") ; Chv_writeForMatlab(chvI, "b", msgFile) ; fprintf(msgFile, "\n\n emtx2 = abs(a - b) ; enorm2 = max(max(emtx2))") ; fprintf(msgFile, "\n\n [ enorm1 enorm2]") ; fflush(msgFile) ; } /* ------------------------ free the working storage ------------------------ */ if ( pivotsizes != NULL ) { IVfree(pivotsizes) ; } Chv_free(chvJ) ; Chv_free(chvI) ; Drand_free(drand) ; DVfree(dvec) ; fprintf(msgFile, "\n") ; return(1) ; }
int main ( int argc, char *argv[] ) /* ----------------------------------------------------- test the factor method for a grid matrix (1) construct a linear system for a nested dissection ordering on a regular grid (2) create a solution matrix object (3) multiply the solution with the matrix to get a right hand side matrix object (4) factor the matrix (5) solve the system created -- 98may16, cca ----------------------------------------------------- */ { Chv *chv, *rootchv ; ChvManager *chvmanager ; DenseMtx *mtxB, *mtxX, *mtxZ ; FrontMtx *frontmtx ; InpMtx *mtxA ; SubMtxManager *mtxmanager ; double cputotal, droptol, factorops ; double cpus[9] ; Drand drand ; double nops, tau, t1, t2 ; ETree *frontETree ; FILE *msgFile ; int error, lockflag, maxsize, maxzeros, msglvl, neqns, n1, n2, n3, nrhs, nzf, pivotingflag, seed, sparsityflag, symmetryflag, type ; int stats[6] ; IVL *symbfacIVL ; if ( argc != 17 ) { fprintf(stdout, "\n\n usage : %s msglvl msgFile n1 n2 n3 maxzeros maxsize" "\n seed type symmetryflag sparsityflag " "\n pivotingflag tau droptol lockflag nrhs" "\n msglvl -- message level" "\n msgFile -- message file" "\n n1 -- number of grid points in the first direction" "\n n2 -- number of grid points in the second direction" "\n n3 -- number of grid points in the third direction" "\n maxzeros -- max number of zeroes in a front" "\n maxsize -- max number of internal nodes in a front" "\n seed -- random number seed" "\n type -- type of entries" "\n 1 --> real" "\n 2 --> complex" "\n symmetryflag -- symmetry flag" "\n 0 --> symmetric " "\n 1 --> hermitian" "\n 2 --> nonsymmetric" "\n sparsityflag -- sparsity flag" "\n 0 --> store dense fronts" "\n 1 --> store sparse fronts, use droptol to drop entries" "\n pivotingflag -- pivoting flag" "\n 0 --> do not pivot" "\n 1 --> enable pivoting" "\n tau -- upper bound on factor entries" "\n used only with pivoting" "\n droptol -- lower bound on factor entries" "\n used only with sparse fronts" "\n lockflag -- flag to specify lock status" "\n 0 --> mutex lock is not allocated or initialized" "\n 1 --> mutex lock is allocated and it can synchronize" "\n only threads in this process." "\n 2 --> mutex lock is allocated and it can synchronize" "\n only threads in this and other processes." "\n nrhs -- # of right hand sides" "\n", argv[0]) ; return(-1) ; } msglvl = atoi(argv[1]) ; if ( strcmp(argv[2], "stdout") == 0 ) { msgFile = stdout ; } else if ( (msgFile = fopen(argv[2], "a")) == NULL ) { fprintf(stderr, "\n fatal error in %s" "\n unable to open file %s\n", argv[0], argv[2]) ; return(-1) ; } n1 = atoi(argv[3]) ; n2 = atoi(argv[4]) ; n3 = atoi(argv[5]) ; maxzeros = atoi(argv[6]) ; maxsize = atoi(argv[7]) ; seed = atoi(argv[8]) ; type = atoi(argv[9]) ; symmetryflag = atoi(argv[10]) ; sparsityflag = atoi(argv[11]) ; pivotingflag = atoi(argv[12]) ; tau = atof(argv[13]) ; droptol = atof(argv[14]) ; lockflag = atoi(argv[15]) ; nrhs = atoi(argv[16]) ; fprintf(msgFile, "\n %s " "\n msglvl -- %d" "\n msgFile -- %s" "\n n1 -- %d" "\n n2 -- %d" "\n n3 -- %d" "\n maxzeros -- %d" "\n maxsize -- %d" "\n seed -- %d" "\n type -- %d" "\n symmetryflag -- %d" "\n sparsityflag -- %d" "\n pivotingflag -- %d" "\n tau -- %e" "\n droptol -- %e" "\n lockflag -- %d" "\n nrhs -- %d" "\n", argv[0], msglvl, argv[2], n1, n2, n3, maxzeros, maxsize, seed, type, symmetryflag, sparsityflag, pivotingflag, tau, droptol, lockflag, nrhs) ; fflush(msgFile) ; neqns = n1 * n2 * n3 ; /* -------------------------------------- initialize the random number generator -------------------------------------- */ Drand_setDefaultFields(&drand) ; Drand_init(&drand) ; Drand_setSeed(&drand, seed) ; /* Drand_setUniform(&drand, 0.0, 1.0) ; */ Drand_setNormal(&drand, 0.0, 1.0) ; /* -------------------------- generate the linear system -------------------------- */ mkNDlinsys(n1, n2, n3, maxzeros, maxsize, type, symmetryflag, nrhs, seed, msglvl, msgFile, &frontETree, &symbfacIVL, &mtxA, &mtxX, &mtxB) ; if ( msglvl > 1 ) { fprintf(msgFile, "\n mtxA") ; InpMtx_writeForHumanEye(mtxA, msgFile) ; fprintf(msgFile, "\n mtxX") ; DenseMtx_writeForHumanEye(mtxX, msgFile) ; fprintf(msgFile, "\n mtxB") ; DenseMtx_writeForHumanEye(mtxB, msgFile) ; fflush(msgFile) ; } /* fprintf(msgFile, "\n neqns = %d ;", n1*n2*n3) ; fprintf(msgFile, "\n nrhs = %d ;", nrhs) ; fprintf(msgFile, "\n A = zeros(neqns, neqns) ;") ; fprintf(msgFile, "\n X = zeros(neqns, nrhs) ;") ; fprintf(msgFile, "\n B = zeros(neqns, nrhs) ;") ; InpMtx_writeForMatlab(mtxA, "A", msgFile) ; DenseMtx_writeForMatlab(mtxX, "X", msgFile) ; DenseMtx_writeForMatlab(mtxB, "B", msgFile) ; { int *ivec1 = InpMtx_ivec1(mtxA) ; int *ivec2 = InpMtx_ivec2(mtxA) ; double *dvec = InpMtx_dvec(mtxA) ; int ichv, ii, col, offset, row, nent = InpMtx_nent(mtxA) ; fprintf(msgFile, "\n coordType = %d", mtxA->coordType) ; fprintf(msgFile, "\n start of matrix output file") ; fprintf(msgFile, "\n %d %d %d", n1*n2*n3, n1*n2*n3, nent) ; for ( ii = 0 ; ii < nent ; ii++ ) { ichv = ivec1[ii] ; if ( (offset = ivec2[ii]) >= 0 ) { row = ichv, col = row + offset ; } else { col = ichv, row = col - offset ; } fprintf(msgFile, "\n %d %d %24.16e %24.16e", row, col, dvec[2*ii], dvec[2*ii+1]) ; } } { int ii, jj ; double imag, real ; fprintf(msgFile, "\n start of rhs output file") ; fprintf(msgFile, "\n %d %d", n1*n2*n3, nrhs) ; for ( ii = 0 ; ii < n1*n2*n3 ; ii++ ) { fprintf(msgFile, "\n %d ", ii) ; for ( jj = 0 ; jj < nrhs ; jj++ ) { DenseMtx_complexEntry(mtxB, ii, jj, &real, &imag) ; fprintf(msgFile, " %24.16e %24.16e", real, imag) ; } } } */ /* ------------------------------ initialize the FrontMtx object ------------------------------ */ MARKTIME(t1) ; frontmtx = FrontMtx_new() ; mtxmanager = SubMtxManager_new() ; SubMtxManager_init(mtxmanager, lockflag, 0) ; FrontMtx_init(frontmtx, frontETree, symbfacIVL, type, symmetryflag, sparsityflag, pivotingflag, lockflag, 0, NULL, mtxmanager, msglvl, msgFile) ; MARKTIME(t2) ; fprintf(msgFile, "\n\n CPU %8.3f : initialize the front matrix", t2 - t1) ; if ( msglvl > 1 ) { fprintf(msgFile, "\n nendD = %d, nentL = %d, nentU = %d", frontmtx->nentD, frontmtx->nentL, frontmtx->nentU) ; } if ( msglvl > 2 ) { fprintf(msgFile, "\n front matrix initialized") ; FrontMtx_writeForHumanEye(frontmtx, msgFile) ; fflush(msgFile) ; } SubMtxManager_writeForHumanEye(mtxmanager, msgFile) ; /* ----------------- factor the matrix ----------------- */ nzf = ETree_nFactorEntries(frontETree, symmetryflag) ; factorops = ETree_nFactorOps(frontETree, type, symmetryflag) ; fprintf(msgFile, "\n %d factor entries, %.0f factor ops, %8.3f ratio", nzf, factorops, factorops/nzf) ; IVzero(6, stats) ; DVzero(9, cpus) ; chvmanager = ChvManager_new() ; ChvManager_init(chvmanager, lockflag, 1) ; MARKTIME(t1) ; rootchv = FrontMtx_factorInpMtx(frontmtx, mtxA, tau, droptol, chvmanager, &error, cpus, stats, msglvl, msgFile) ; MARKTIME(t2) ; fprintf(msgFile, "\n\n CPU %8.3f : factor matrix, %8.3f mflops", t2 - t1, 1.e-6*factorops/(t2-t1)) ; if ( rootchv != NULL ) { fprintf(msgFile, "\n\n factorization did not complete") ; for ( chv = rootchv ; chv != NULL ; chv = chv->next ) { fprintf(stdout, "\n chv %d, nD = %d, nL = %d, nU = %d", chv->id, chv->nD, chv->nL, chv->nU) ; } } if ( error >= 0 ) { fprintf(msgFile, "\n\n error encountered at front %d\n", error) ; exit(-1) ; } fprintf(msgFile, "\n %8d pivots, %8d pivot tests, %8d delayed rows and columns", stats[0], stats[1], stats[2]) ; if ( frontmtx->rowadjIVL != NULL ) { fprintf(msgFile, "\n %d entries in rowadjIVL", frontmtx->rowadjIVL->tsize) ; } if ( frontmtx->coladjIVL != NULL ) { fprintf(msgFile, ", %d entries in coladjIVL", frontmtx->coladjIVL->tsize) ; } if ( frontmtx->upperblockIVL != NULL ) { fprintf(msgFile, "\n %d fronts, %d entries in upperblockIVL", frontmtx->nfront, frontmtx->upperblockIVL->tsize) ; } if ( frontmtx->lowerblockIVL != NULL ) { fprintf(msgFile, ", %d entries in lowerblockIVL", frontmtx->lowerblockIVL->tsize) ; } fprintf(msgFile, "\n %d entries in D, %d entries in L, %d entries in U", stats[3], stats[4], stats[5]) ; fprintf(msgFile, "\n %d locks", frontmtx->nlocks) ; cputotal = cpus[8] ; if ( cputotal > 0.0 ) { fprintf(msgFile, "\n initialize fronts %8.3f %6.2f" "\n load original entries %8.3f %6.2f" "\n update fronts %8.3f %6.2f" "\n assemble postponed data %8.3f %6.2f" "\n factor fronts %8.3f %6.2f" "\n extract postponed data %8.3f %6.2f" "\n store factor entries %8.3f %6.2f" "\n miscellaneous %8.3f %6.2f" "\n total time %8.3f", cpus[0], 100.*cpus[0]/cputotal, cpus[1], 100.*cpus[1]/cputotal, cpus[2], 100.*cpus[2]/cputotal, cpus[3], 100.*cpus[3]/cputotal, cpus[4], 100.*cpus[4]/cputotal, cpus[5], 100.*cpus[5]/cputotal, cpus[6], 100.*cpus[6]/cputotal, cpus[7], 100.*cpus[7]/cputotal, cputotal) ; } SubMtxManager_writeForHumanEye(mtxmanager, msgFile) ; ChvManager_writeForHumanEye(chvmanager, msgFile) ; if ( msglvl > 2 ) { fprintf(msgFile, "\n\n front factor matrix") ; FrontMtx_writeForHumanEye(frontmtx, msgFile) ; } if ( msglvl > 2 ) { fprintf(msgFile, "\n\n %% MATLAB file: front factor matrix") ; FrontMtx_writeForMatlab(frontmtx, "L", "D", "U", msgFile) ; } /* ------------------------------ post-process the factor matrix ------------------------------ */ MARKTIME(t1) ; FrontMtx_postProcess(frontmtx, msglvl, msgFile) ; MARKTIME(t2) ; fprintf(msgFile, "\n\n CPU %8.3f : post-process the matrix", t2 - t1) ; if ( msglvl > 2 ) { fprintf(msgFile, "\n\n front factor matrix after post-processing") ; FrontMtx_writeForHumanEye(frontmtx, msgFile) ; } fprintf(msgFile, "\n\n after post-processing") ; SubMtxManager_writeForHumanEye(frontmtx->manager, msgFile) ; /* code to test out the IO methods. write the matrix to a file, free it, then read it back in. note: formatted files do not have much accuracy. */ /* FrontMtx_writeToFile(frontmtx, "temp.frontmtxb") ; FrontMtx_free(frontmtx) ; frontmtx = FrontMtx_new() ; FrontMtx_readFromFile(frontmtx, "temp.frontmtxb") ; frontmtx->manager = mtxmanager ; FrontMtx_writeForHumanEye(frontmtx, msgFile) ; */ /* ---------------- solve the system ---------------- */ neqns = mtxB->nrow ; nrhs = mtxB->ncol ; mtxZ = DenseMtx_new() ; DenseMtx_init(mtxZ, type, 0, 0, neqns, nrhs, 1, neqns) ; DenseMtx_zero(mtxZ) ; if ( type == SPOOLES_REAL ) { nops = frontmtx->nentD + 2*frontmtx->nentU ; if ( FRONTMTX_IS_NONSYMMETRIC(frontmtx) ) { nops += 2*frontmtx->nentL ; } else { nops += 2*frontmtx->nentU ; } } else if ( type == SPOOLES_COMPLEX ) { nops = 8*frontmtx->nentD + 8*frontmtx->nentU ; if ( FRONTMTX_IS_NONSYMMETRIC(frontmtx) ) { nops += 8*frontmtx->nentL ; } else { nops += 8*frontmtx->nentU ; } } nops *= nrhs ; if ( msglvl > 2 ) { fprintf(msgFile, "\n\n rhs") ; DenseMtx_writeForHumanEye(mtxB, msgFile) ; fflush(stdout) ; } DVzero(6, cpus) ; MARKTIME(t1) ; FrontMtx_solve(frontmtx, mtxZ, mtxB, mtxmanager, cpus, msglvl, msgFile) ; MARKTIME(t2) ; fprintf(msgFile, "\n\n CPU %8.3f : solve the system, %.3f mflops", t2 - t1, 1.e-6*nops/(t2 - t1)) ; cputotal = t2 - t1 ; if ( cputotal > 0.0 ) { fprintf(msgFile, "\n set up solves %8.3f %6.2f" "\n load rhs and store solution %8.3f %6.2f" "\n forward solve %8.3f %6.2f" "\n diagonal solve %8.3f %6.2f" "\n backward solve %8.3f %6.2f" "\n total time %8.3f", cpus[0], 100.*cpus[0]/cputotal, cpus[1], 100.*cpus[1]/cputotal, cpus[2], 100.*cpus[2]/cputotal, cpus[3], 100.*cpus[3]/cputotal, cpus[4], 100.*cpus[4]/cputotal, cputotal) ; } /* fprintf(msgFile, "\n Z = zeros(neqns, nrhs) ;") ; DenseMtx_writeForMatlab(mtxZ, "Z", msgFile) ; */ if ( msglvl > 2 ) { fprintf(msgFile, "\n\n computed solution") ; DenseMtx_writeForHumanEye(mtxZ, msgFile) ; fflush(stdout) ; } DenseMtx_sub(mtxZ, mtxX) ; fprintf(msgFile, "\n\n maxabs error = %12.4e", DenseMtx_maxabs(mtxZ)) ; if ( msglvl > 2 ) { fprintf(msgFile, "\n\n error") ; DenseMtx_writeForHumanEye(mtxZ, msgFile) ; fflush(stdout) ; } fprintf(msgFile, "\n\n after solve") ; SubMtxManager_writeForHumanEye(frontmtx->manager, msgFile) ; /* ------------------------ free the working storage ------------------------ */ InpMtx_free(mtxA) ; DenseMtx_free(mtxX) ; DenseMtx_free(mtxB) ; DenseMtx_free(mtxZ) ; FrontMtx_free(frontmtx) ; ETree_free(frontETree) ; IVL_free(symbfacIVL) ; ChvManager_free(chvmanager) ; SubMtxManager_free(mtxmanager) ; fprintf(msgFile, "\n") ; fclose(msgFile) ; return(1) ; }
/*--------------------------------------------------------------------*/ int main ( int argc, char *argv[] ) /* ------------------------------------------------- test Perm_readFromFile and Perm_writeToFile, useful for translating between formatted *.permf and binary *.permb files. created -- 96may02, cca ------------------------------------------------- */ { char *inPermFileName, *outPermFileName ; double t1, t2 ; int msglvl, rc ; Perm *perm ; FILE *msgFile ; if ( argc != 5 ) { fprintf(stdout, "\n\n usage : %s msglvl msgFile inFile outFile" "\n msglvl -- message level" "\n msgFile -- message file" "\n inFile -- input file, must be *.permf or *.permb" "\n outFile -- output file, must be *.permf or *.permb" "\n", argv[0]) ; return(0) ; } msglvl = atoi(argv[1]) ; if ( strcmp(argv[2], "stdout") == 0 ) { msgFile = stdout ; } else if ( (msgFile = fopen(argv[2], "a")) == NULL ) { fprintf(stderr, "\n fatal error in %s" "\n unable to open file %s\n", argv[0], argv[2]) ; return(-1) ; } inPermFileName = argv[3] ; outPermFileName = argv[4] ; fprintf(msgFile, "\n %s " "\n msglvl -- %d" "\n msgFile -- %s" "\n inFile -- %s" "\n outFile -- %s" "\n", argv[0], msglvl, argv[2], inPermFileName, outPermFileName) ; fflush(msgFile) ; /* ----------------------- read in the Perm object ----------------------- */ if ( strcmp(inPermFileName, "none") == 0 ) { fprintf(msgFile, "\n no file to read from") ; exit(0) ; } perm = Perm_new() ; MARKTIME(t1) ; rc = Perm_readFromFile(perm, inPermFileName) ; MARKTIME(t2) ; fprintf(msgFile, "\n CPU %9.5f : read in perm from file %s", t2 - t1, inPermFileName) ; if ( rc != 1 ) { fprintf(msgFile, "\n return value %d from Perm_readFromFile(%p,%s)", rc, perm, inPermFileName) ; exit(-1) ; } fprintf(msgFile, "\n\n after reading Perm object from file %s", inPermFileName) ; if ( msglvl > 2 ) { Perm_writeForHumanEye(perm, msgFile) ; } else { Perm_writeStats(perm, msgFile) ; } fflush(msgFile) ; /* - ------------------------ write out the Perm object - ------------------------ */ if ( strcmp(outPermFileName, "none") != 0 ) { MARKTIME(t1) ; rc = Perm_writeToFile(perm, outPermFileName) ; MARKTIME(t2) ; fprintf(msgFile, "\n CPU %9.5f : write perm to file %s", t2 - t1, outPermFileName) ; } if ( rc != 1 ) { fprintf(msgFile, "\n return value %d from Perm_writeToFile(%p,%s)", rc, perm, outPermFileName) ; } /* -------------------- free the Perm object -------------------- */ Perm_free(perm) ; fprintf(msgFile, "\n") ; fclose(msgFile) ; return(1) ; }
/* -------------------------------------------------------------------- fill *pndom with ndom, the number of domains. fill *pnseg with nseg, the number of segments. domains are numbered in [0, ndom), segments in [ndom,ndom+nseg). return -- an IV object that contains the map from vertices to segments created -- 99feb29, cca -------------------------------------------------------------------- */ IV * GPart_domSegMap ( GPart *gpart, int *pndom, int *pnseg ) { FILE *msgFile ; Graph *g ; int adjdom, count, d, first, ierr, ii, jj1, jj2, last, ndom, msglvl, nextphi, nPhi, nPsi, nV, phi, phi0, phi1, phi2, phi3, psi, sigma, size, size0, size1, size2, v, vsize, w ; int *adj, *adj0, *adj1, *adj2, *compids, *dmark, *dsmap, *head, *link, *list, *offsets, *PhiToPsi, *PhiToV, *PsiToSigma, *vadj, *VtoPhi ; IV *dsmapIV ; IVL *PhiByPhi, *PhiByPowD, *PsiByPowD ; /* -------------------- set the initial time -------------------- */ icputimes = 0 ; MARKTIME(cputimes[icputimes]) ; /* --------------- check the input --------------- */ if ( gpart == NULL || (g = gpart->g) == NULL || pndom == NULL || pnseg == NULL ) { fprintf(stderr, "\n fatal error in GPart_domSegMap(%p,%p,%p)" "\n bad input\n", gpart, pndom, pnseg) ; exit(-1) ; } compids = IV_entries(&gpart->compidsIV) ; msglvl = gpart->msglvl ; msgFile = gpart->msgFile ; /* ------------------------ create the map IV object ------------------------ */ nV = g->nvtx ; dsmapIV = IV_new() ; IV_init(dsmapIV, nV, NULL) ; dsmap = IV_entries(dsmapIV) ; /* ---------------------------------- check compids[] and get the number of domains and interface vertices ---------------------------------- */ icputimes++ ; MARKTIME(cputimes[icputimes]) ; ndom = nPhi = 0 ; for ( v = 0 ; v < nV ; v++ ) { if ( (d = compids[v]) < 0 ) { fprintf(stderr, "\n fatal error in GPart_domSegMap(%p,%p,%p)" "\n compids[%d] = %d\n", gpart, pndom, pnseg, v, compids[v]) ; exit(-1) ; } else if ( d == 0 ) { nPhi++ ; } else if ( ndom < d ) { ndom = d ; } } *pndom = ndom ; if ( msglvl > 1 ) { fprintf(msgFile, "\n\n Inside GPart_domSegMap") ; fprintf(msgFile, "\n %d domains, %d Phi vertices", ndom, nPhi) ; } if ( ndom == 1 ) { IVfill(nV, dsmap, 0) ; *pndom = 1 ; *pnseg = 0 ; return(dsmapIV) ; } /* -------------------------------- get the maps PhiToV : [0,nPhi) |---> [0,nV) VtoPhi : [0,nV) |---> [0,nPhi) -------------------------------- */ icputimes++ ; MARKTIME(cputimes[icputimes]) ; PhiToV = IVinit(nPhi, -1) ; VtoPhi = IVinit(nV, -1) ; for ( v = 0, phi = 0 ; v < nV ; v++ ) { if ( (d = compids[v]) == 0 ) { PhiToV[phi] = v ; VtoPhi[v] = phi++ ; } } if ( phi != nPhi ) { fprintf(stderr, "\n fatal error in GPart_domSegMap(%p,%p,%p)" "\n phi = %d != %d = nPhi\n", gpart, pndom, pnseg, phi, nPhi) ; exit(-1) ; } if ( msglvl > 2 ) { fprintf(msgFile, "\n PhiToV(%d) :", nPhi) ; IVfp80(msgFile, nPhi, PhiToV, 15, &ierr) ; fflush(msgFile) ; } if ( msglvl > 3 ) { fprintf(msgFile, "\n VtoPhi(%d) :", nV) ; IVfp80(msgFile, nV, VtoPhi, 15, &ierr) ; fflush(msgFile) ; } /* --------------------------------------------------- create an IVL object, PhiByPowD, to hold lists from the interface vertices to their adjacent domains --------------------------------------------------- */ icputimes++ ; MARKTIME(cputimes[icputimes]) ; dmark = IVinit(ndom+1, -1) ; if ( nPhi >= ndom ) { list = IVinit(nPhi, -1) ; } else { list = IVinit(ndom, -1) ; } PhiByPowD = IVL_new() ; IVL_init1(PhiByPowD, IVL_CHUNKED, nPhi) ; for ( phi = 0 ; phi < nPhi ; phi++ ) { v = PhiToV[phi] ; Graph_adjAndSize(g, v, &vsize, &vadj) ; /* if ( phi == 0 ) { int ierr ; fprintf(msgFile, "\n adj(%d,%d) = ", v, phi) ; IVfp80(msgFile, vsize, vadj, 15, &ierr) ; fflush(msgFile) ; } */ count = 0 ; for ( ii = 0 ; ii < vsize ; ii++ ) { if ( (w = vadj[ii]) < nV && (d = compids[w]) > 0 && dmark[d] != phi ) { dmark[d] = phi ; list[count++] = d ; } } if ( count > 0 ) { IVqsortUp(count, list) ; IVL_setList(PhiByPowD, phi, count, list) ; } } if ( msglvl > 2 ) { fprintf(msgFile, "\n PhiByPowD : interface x adjacent domains") ; IVL_writeForHumanEye(PhiByPowD, msgFile) ; fflush(msgFile) ; } /* ------------------------------------------------------- create an IVL object, PhiByPhi to hold lists from the interface vertices to interface vertices. (s,t) are in the list if (s,t) is an edge in the graph and s and t do not share an adjacent domain ------------------------------------------------------- */ icputimes++ ; MARKTIME(cputimes[icputimes]) ; PhiByPhi = IVL_new() ; IVL_init1(PhiByPhi, IVL_CHUNKED, nPhi) ; offsets = IVinit(nPhi, 0) ; head = IVinit(nPhi, -1) ; link = IVinit(nPhi, -1) ; for ( phi1 = 0 ; phi1 < nPhi ; phi1++ ) { count = 0 ; if ( msglvl > 2 ) { v = PhiToV[phi1] ; Graph_adjAndSize(g, v, &vsize, &vadj) ; fprintf(msgFile, "\n checking out phi = %d, v = %d", phi1, v) ; fprintf(msgFile, "\n adj(%d) : ", v) ; IVfp80(msgFile, vsize, vadj, 10, &ierr) ; } /* ------------------------------------------------------------- get (phi1, phi0) edges that were previously put into the list ------------------------------------------------------------- */ if ( msglvl > 3 ) { if ( head[phi1] == -1 ) { fprintf(msgFile, "\n no previous edges") ; } else { fprintf(msgFile, "\n previous edges :") ; } } for ( phi0 = head[phi1] ; phi0 != -1 ; phi0 = nextphi ) { if ( msglvl > 3 ) { fprintf(msgFile, " %d", phi0) ; } nextphi = link[phi0] ; list[count++] = phi0 ; IVL_listAndSize(PhiByPhi, phi0, &size0, &adj0) ; if ( (ii = ++offsets[phi0]) < size0 ) { /* ---------------------------- link phi0 into the next list ---------------------------- */ phi2 = adj0[ii] ; link[phi0] = head[phi2] ; head[phi2] = phi0 ; } } /* -------------------------- get new edges (phi1, phi2) -------------------------- */ IVL_listAndSize(PhiByPowD, phi1, &size1, &adj1) ; v = PhiToV[phi1] ; Graph_adjAndSize(g, v, &vsize, &vadj) ; for ( ii = 0 ; ii < vsize ; ii++ ) { if ( (w = vadj[ii]) < nV && compids[w] == 0 && (phi2 = VtoPhi[w]) > phi1 ) { if ( msglvl > 3 ) { fprintf(msgFile, "\n checking out phi2 = %d", phi2) ; } /* -------------------------------------------------- see if phi1 and phi2 have a common adjacent domain -------------------------------------------------- */ IVL_listAndSize(PhiByPowD, phi2, &size2, &adj2) ; adjdom = 0 ; jj1 = jj2 = 0 ; while ( jj1 < size1 && jj2 < size2 ) { if ( adj1[jj1] < adj2[jj2] ) { jj1++ ; } else if ( adj1[jj1] > adj2[jj2] ) { jj2++ ; } else { if ( msglvl > 3 ) { fprintf(msgFile, ", common adj domain %d", adj1[jj1]) ; } adjdom = 1 ; break ; } } if ( adjdom == 0 ) { if ( msglvl > 3 ) { fprintf(msgFile, ", no adjacent domain") ; } list[count++] = phi2 ; } } } if ( count > 0 ) { /* --------------------- set the list for phi1 --------------------- */ IVqsortUp(count, list) ; IVL_setList(PhiByPhi, phi1, count, list) ; if ( msglvl > 2 ) { fprintf(msgFile, "\n edge list for %d :", phi1) ; IVfp80(msgFile, count, list, 15, &ierr) ; } for ( ii = 0, phi2 = -1 ; ii < count ; ii++ ) { if ( list[ii] > phi1 ) { offsets[phi1] = ii ; phi2 = list[ii] ; break ; } } if ( phi2 != -1 ) { if ( msglvl > 2 ) { fprintf(msgFile, "\n linking %d into list for %d", phi1, phi2) ; } link[phi1] = head[phi2] ; head[phi2] = phi1 ; } /* phi2 = list[0] ; link[phi1] = head[phi2] ; head[phi2] = phi1 ; */ } } if ( msglvl > 2 ) { fprintf(msgFile, "\n PhiByPhi : interface x interface") ; IVL_writeForHumanEye(PhiByPhi, msgFile) ; fflush(msgFile) ; } /* -------------------- get the PhiToPsi map -------------------- */ icputimes++ ; MARKTIME(cputimes[icputimes]) ; PhiToPsi = IVinit(nPhi, -1) ; nPsi = 0 ; for ( phi = 0 ; phi < nPhi ; phi++ ) { if ( PhiToPsi[phi] == -1 ) { /* --------------------------- phi not yet mapped to a psi --------------------------- */ first = last = 0 ; list[0] = phi ; PhiToPsi[phi] = nPsi ; while ( first <= last ) { phi2 = list[first++] ; IVL_listAndSize(PhiByPhi, phi2, &size, &adj) ; for ( ii = 0 ; ii < size ; ii++ ) { phi3 = adj[ii] ; if ( PhiToPsi[phi3] == -1 ) { PhiToPsi[phi3] = nPsi ; list[++last] = phi3 ; } } } nPsi++ ; } } if ( msglvl > 1 ) { fprintf(msgFile, "\n nPsi = %d", nPsi) ; fflush(msgFile) ; } if ( msglvl > 2 ) { fprintf(msgFile, "\n PhiToPsi(%d) :", nPhi) ; IVfp80(msgFile, nPhi, PhiToPsi, 15, &ierr) ; fflush(msgFile) ; } /* --------------------------------- create an IVL object, Psi --> 2^D --------------------------------- */ icputimes++ ; MARKTIME(cputimes[icputimes]) ; IVfill(nPsi, head, -1) ; IVfill(nPhi, link, -1) ; for ( phi = 0 ; phi < nPhi ; phi++ ) { psi = PhiToPsi[phi] ; link[phi] = head[psi] ; head[psi] = phi ; } PsiByPowD = IVL_new() ; IVL_init1(PsiByPowD, IVL_CHUNKED, nPsi) ; IVfill(ndom+1, dmark, -1) ; for ( psi = 0 ; psi < nPsi ; psi++ ) { count = 0 ; for ( phi = head[psi] ; phi != -1 ; phi = link[phi] ) { v = PhiToV[phi] ; Graph_adjAndSize(g, v, &size, &adj) ; for ( ii = 0 ; ii < size ; ii++ ) { if ( (w = adj[ii]) < nV && (d = compids[w]) > 0 && dmark[d] != psi ) { dmark[d] = psi ; list[count++] = d ; } } } if ( count > 0 ) { IVqsortUp(count, list) ; IVL_setList(PsiByPowD, psi, count, list) ; } } if ( msglvl > 2 ) { fprintf(msgFile, "\n PsiByPowD(%d) :", nPhi) ; IVL_writeForHumanEye(PsiByPowD, msgFile) ; fflush(msgFile) ; } icputimes++ ; MARKTIME(cputimes[icputimes]) ; /* ------------------------------------- now get the map Psi |---> Sigma that is the equivalence map over PhiByPowD ------------------------------------- */ icputimes++ ; MARKTIME(cputimes[icputimes]) ; PsiToSigma = IVL_equivMap1(PsiByPowD) ; *pnseg = 1 + IVmax(nPsi, PsiToSigma, &ii) ; if ( msglvl > 2 ) { fprintf(msgFile, "\n nSigma = %d", *pnseg) ; fprintf(msgFile, "\n PsiToSigma(%d) :", nPhi) ; IVfp80(msgFile, nPsi, PsiToSigma, 15, &ierr) ; fflush(msgFile) ; } /* -------------------------------------------------------------- now fill the map from the vertices to the domains and segments -------------------------------------------------------------- */ icputimes++ ; MARKTIME(cputimes[icputimes]) ; for ( v = 0 ; v < nV ; v++ ) { if ( (d = compids[v]) > 0 ) { dsmap[v] = d - 1 ; } else { phi = VtoPhi[v] ; psi = PhiToPsi[phi] ; sigma = PsiToSigma[psi] ; dsmap[v] = ndom + sigma ; } } /* ------------------------ free the working storage ------------------------ */ icputimes++ ; MARKTIME(cputimes[icputimes]) ; IVL_free(PhiByPhi) ; IVL_free(PhiByPowD) ; IVL_free(PsiByPowD) ; IVfree(PhiToV) ; IVfree(VtoPhi) ; IVfree(dmark) ; IVfree(list) ; IVfree(PhiToPsi) ; IVfree(head) ; IVfree(link) ; IVfree(offsets) ; IVfree(PsiToSigma) ; icputimes++ ; MARKTIME(cputimes[icputimes]) ; if ( msglvl > 1 ) { fprintf(msgFile, "\n domain/segment map timings split") ; fprintf(msgFile, "\n %9.5f : create the DSmap object" "\n %9.5f : get numbers of domain and interface vertices" "\n %9.5f : generate PhiToV and VtoPhi" "\n %9.5f : generate PhiByPowD" "\n %9.5f : generate PhiByPhi" "\n %9.5f : generate PhiToPsi" "\n %9.5f : generate PsiByPowD" "\n %9.5f : generate PsiToSigma" "\n %9.5f : generate dsmap" "\n %9.5f : free working storage" "\n %9.5f : total time", cputimes[1] - cputimes[0], cputimes[2] - cputimes[1], cputimes[3] - cputimes[2], cputimes[4] - cputimes[3], cputimes[5] - cputimes[4], cputimes[6] - cputimes[5], cputimes[7] - cputimes[6], cputimes[8] - cputimes[7], cputimes[9] - cputimes[8], cputimes[10] - cputimes[9], cputimes[11] - cputimes[0]) ; } return(dsmapIV) ; }
/* ----------------------------------------------------------------- given a domain decomposition, find a bisector 1. construct the domain/segment graph 2. use block kernihan-lin to get an initial bisector alpha -- cost function parameter for BKL seed -- random number seed cpus -- array to store CPU times cpus[0] -- time to find domain/segment map cpus[1] -- time to find domain/segment bipartite graph cpus[2] -- time to find two-set partition return value -- cost of the partition created -- 96mar09, cca ----------------------------------------------------------------- */ double GPart_TwoSetViaBKL ( GPart *gpart, double alpha, int seed, double cpus[] ) { BKL *bkl ; BPG *bpg ; double t1, t2 ; FILE *msgFile ; float bestcost ; Graph *g, *gc ; int c, flag, ierr, msglvl, ndom, nseg, nvtx, v ; int *compids, *cweights, *dscolors, *dsmap, *vwghts ; IV *dsmapIV ; /* --------------- check the input --------------- */ if ( gpart == NULL || cpus == NULL ) { fprintf(stderr, "\n fatal error in GPart_DDsep(%p,%f,%d,%p)" "\n bad input\n", gpart, alpha, seed, cpus) ; exit(-1) ; } g = gpart->g ; nvtx = gpart->nvtx ; compids = IV_entries(&gpart->compidsIV) ; cweights = IV_entries(&gpart->cweightsIV) ; vwghts = g->vwghts ; msglvl = gpart->msglvl ; msgFile = gpart->msgFile ; /* HARDCODE THE ALPHA PARAMETER. */ alpha = 1.0 ; /* ------------------------------ (1) get the domain/segment map (2) get the compressed graph (3) create the bipartite graph ------------------------------ */ MARKTIME(t1) ; dsmapIV = GPart_domSegMap(gpart, &ndom, &nseg) ; dsmap = IV_entries(dsmapIV) ; MARKTIME(t2) ; cpus[0] = t2 - t1 ; if ( msglvl > 1 ) { fprintf(msgFile, "\n CPU %9.5f : generate domain-segment map", t2 - t1) ; fprintf(msgFile, "\n ndom = %d, nseg = %d", ndom, nseg) ; fflush(msgFile) ; } /* ----------------------------------------- create the domain/segment bipartite graph ----------------------------------------- */ MARKTIME(t1) ; gc = Graph_compress(gpart->g, dsmap, 1) ; bpg = BPG_new() ; BPG_init(bpg, ndom, nseg, gc) ; MARKTIME(t2) ; if ( msglvl > 1 ) { fprintf(msgFile, "\n CPU %9.5f : create domain-segment graph", t2 - t1) ; fflush(msgFile) ; } cpus[1] = t2 - t1 ; if ( msglvl > 2 ) { if ( bpg->graph->vwghts != NULL ) { fprintf(msgFile, "\n domain weights :") ; IVfp80(msgFile, bpg->nX, bpg->graph->vwghts, 17, &ierr) ; fprintf(msgFile, "\n segment weights :") ; IVfp80(msgFile, bpg->nY, bpg->graph->vwghts+bpg->nX, 18, &ierr) ; fflush(msgFile) ; } } if ( msglvl > 3 ) { fprintf(msgFile, "\n dsmapIV ") ; IV_writeForHumanEye(dsmapIV, msgFile) ; fprintf(msgFile, "\n\n domain/segment bipartite graph ") ; BPG_writeForHumanEye(bpg, msgFile) ; fflush(msgFile) ; } /* ------------------------------------ create and initialize the BKL object ------------------------------------ */ MARKTIME(t1) ; flag = 5 ; bkl = BKL_new() ; BKL_init(bkl, bpg, alpha) ; BKL_setInitPart(bkl, flag, seed, NULL) ; bestcost = BKL_evalfcn(bkl) ; gpart->ncomp = 2 ; MARKTIME(t2) ; cpus[2] = t2 - t1 ; if ( msglvl > 1 ) { fprintf(msgFile, "\n CPU %9.5f : initialize BKL object", t2 - t1) ; fflush(msgFile) ; } if ( msglvl > 2 ) { fprintf(msgFile, "\n BKL : flag = %d, seed = %d", flag, seed) ; fprintf(msgFile, ", initial cost = %.2f", bestcost) ; fflush(msgFile) ; fprintf(msgFile, ", cweights = < %d %d %d >", bkl->cweights[0], bkl->cweights[1], bkl->cweights[2]) ; fflush(msgFile) ; } if ( msglvl > 2 ) { fprintf(msgFile, "\n colors") ; IVfp80(msgFile, bkl->nreg, bkl->colors, 80, &ierr) ; fflush(msgFile) ; } if ( msglvl > 1 ) { fprintf(msgFile, "\n BKL initial weights : ") ; IVfp80(msgFile, 3, bkl->cweights, 25, &ierr) ; fflush(msgFile) ; } /* -------------------------------- improve the partition via fidmat -------------------------------- */ MARKTIME(t1) ; bestcost = BKL_fidmat(bkl) ; MARKTIME(t2) ; cpus[2] += t2 - t1 ; if ( msglvl > 1 ) { fprintf(msgFile, "\n CPU %9.5f : improve the partition via fidmat", t2 - t1) ; fflush(msgFile) ; } if ( msglvl > 1 ) { fprintf(msgFile, "\n BKL : %d passes", bkl->npass) ; fprintf(msgFile, ", %d flips", bkl->nflips) ; fprintf(msgFile, ", %d gainevals", bkl->ngaineval) ; fprintf(msgFile, ", %d improve steps", bkl->nimprove) ; fprintf(msgFile, ", cost = %9.2f", bestcost) ; } if ( msglvl > 1 ) { fprintf(msgFile, "\n BKL STATS < %9d %9d %9d > %9.2f < %4d %4d %4d %4d %4d >", bkl->cweights[0], bkl->cweights[1], bkl->cweights[2], bestcost, bkl->npass, bkl->npatch, bkl->nflips, bkl->nimprove, bkl->ngaineval) ; fflush(msgFile) ; } if ( msglvl > 2 ) { fprintf(msgFile, "\n colors") ; IVfp80(msgFile, bkl->nreg, bkl->colors, 80, &ierr) ; fflush(msgFile) ; } /* ---------------------------- set compids[] and cweights[] ---------------------------- */ MARKTIME(t1) ; dscolors = bkl->colors ; gpart->ncomp = 2 ; IV_setSize(&gpart->cweightsIV, 3) ; cweights = IV_entries(&gpart->cweightsIV) ; cweights[0] = cweights[1] = cweights[2] = 0 ; if ( vwghts == NULL ) { for ( v = 0 ; v < nvtx ; v++ ) { compids[v] = c = dscolors[dsmap[v]] ; cweights[c]++ ; } } else { for ( v = 0 ; v < nvtx ; v++ ) { compids[v] = c = dscolors[dsmap[v]] ; cweights[c] += vwghts[v] ; } } if ( msglvl > 2 ) { fprintf(msgFile, "\n BKL partition : < %d %d %d >", cweights[0], cweights[1], cweights[2]) ; fflush(msgFile) ; } /* ------------------------------------ free the BKL object, the BPG object and the domain/segment map IV object ------------------------------------ */ BKL_free(bkl) ; IV_free(dsmapIV) ; BPG_free(bpg) ; MARKTIME(t2) ; cpus[2] += t2 - t1 ; return((double) bestcost) ; }
/*--------------------------------------------------------------------*/ int main ( int argc, char *argv[] ) /* ----------------------------------------------- test the DenseMtx_frobNorm routine. when msglvl > 1, the output of this program can be fed into Matlab to check for errors created -- 98dec03, ycp ----------------------------------------------- */ { DenseMtx *A ; double t1, t2, value ; Drand *drand ; FILE *msgFile ; int inc1, inc2, msglvl, nrow, ncol, seed, type ; if ( argc != 9 ) { fprintf(stdout, "\n\n usage : %s msglvl msgFile type nrow ncol inc1 inc2 " "\n , seed " "\n msglvl -- message level" "\n msgFile -- message file" "\n type -- entries type" "\n 1 -- real" "\n 2 -- complex" "\n nrow -- # of rows " "\n ncol -- # of columns " "\n inc1 -- row increment " "\n inc2 -- column increment " "\n seed -- random number seed" "\n", argv[0]) ; return(0) ; } if ( (msglvl = atoi(argv[1])) < 0 ) { fprintf(stderr, "\n message level must be positive\n") ; spoolesFatal(); } if ( strcmp(argv[2], "stdout") == 0 ) { msgFile = stdout ; } else if ( (msgFile = fopen(argv[2], "a")) == NULL ) { fprintf(stderr, "\n unable to open file %s\n", argv[2]) ; return(-1) ; } type = atoi(argv[3]) ; nrow = atoi(argv[4]) ; ncol = atoi(argv[5]) ; inc1 = atoi(argv[6]) ; inc2 = atoi(argv[7]) ; if ( type < 1 || type > 2 || nrow < 0 || ncol < 0 || inc1 < 1 || inc2 < 1 ) { fprintf(stderr, "\n fatal error, type %d, nrow %d, ncol %d, inc1 %d, inc2 %d", type, nrow, ncol, inc1, inc2) ; spoolesFatal(); } seed = atoi(argv[8]) ; fprintf(msgFile, "\n\n %% %s :" "\n %% msglvl = %d" "\n %% msgFile = %s" "\n %% type = %d" "\n %% nrow = %d" "\n %% ncol = %d" "\n %% inc1 = %d" "\n %% inc2 = %d" "\n %% seed = %d" "\n", argv[0], msglvl, argv[2], type, nrow, ncol, inc1, inc2, seed) ; /* ---------------------------- initialize the matrix object ---------------------------- */ MARKTIME(t1) ; A = DenseMtx_new() ; DenseMtx_init(A, type, 0, 0, nrow, ncol, inc1, inc2) ; MARKTIME(t2) ; fprintf(msgFile, "\n %% CPU : %.3f to initialize matrix object", t2 - t1) ; MARKTIME(t1) ; drand = Drand_new() ; Drand_setSeed(drand, seed) ; seed++ ; Drand_setUniform(drand, -1.0, 1.0) ; DenseMtx_fillRandomEntries(A, drand) ; MARKTIME(t2) ; fprintf(msgFile, "\n %% CPU : %.3f to fill matrix with random numbers", t2 - t1) ; if ( msglvl > 3 ) { fprintf(msgFile, "\n matrix A") ; DenseMtx_writeForHumanEye(A, msgFile) ; } if ( msglvl > 1 ) { fprintf(msgFile, "\n %% matrix A") ; fprintf(msgFile, "\n nrow = %d ;", nrow) ; fprintf(msgFile, "\n ncol = %d ;", ncol) ; fprintf(msgFile, "\n"); DenseMtx_writeForMatlab(A, "A", msgFile) ; } /* -------------------------- compute the frobenius norm -------------------------- */ value = DenseMtx_frobNorm(A); if ( msglvl > 1 ) { fprintf(msgFile, "\n %% Frobenius Norm = %e", value) ; fprintf(msgFile, "\n"); fflush(msgFile) ; } /* ------------------------ free the working storage ------------------------ */ DenseMtx_free(A) ; Drand_free(drand) ; return(1) ; }
/*--------------------------------------------------------------------*/ int main ( int argc, char *argv[] ) /* ------------------------------------------------------------ make ETree objects for nested dissection on a regular grid 1 -- vertex elimination tree 2 -- fundamental supernode front tree 3 -- merge only children if possible 4 -- merge all children if possible 5 -- split large non-leaf fronts created -- 98feb05, cca ------------------------------------------------------------ */ { char *outETreeFileName ; double ops[6] ; double t1, t2 ; EGraph *egraph ; ETree *etree0, *etree1, *etree2, *etree3, *etree4, *etree5 ; FILE *msgFile ; Graph *graph ; int nfronts[6], nfind[6], nzf[6] ; int maxsize, maxzeros, msglvl, n1, n2, n3, nvtx, rc, v ; int *newToOld, *oldToNew ; IV *nzerosIV ; if ( argc != 9 ) { fprintf(stdout, "\n\n usage : %s msglvl msgFile n1 n2 n3 maxzeros maxsize outFile" "\n msglvl -- message level" "\n msgFile -- message file" "\n n1 -- number of points in the first direction" "\n n2 -- number of points in the second direction" "\n n3 -- number of points in the third direction" "\n maxzeros -- number of points in the third direction" "\n maxsize -- maximum number of vertices in a front" "\n outFile -- output file, must be *.etreef or *.etreeb" "\n", argv[0]) ; return(0) ; } msglvl = atoi(argv[1]) ; if ( strcmp(argv[2], "stdout") == 0 ) { msgFile = stdout ; } else if ( (msgFile = fopen(argv[2], "a")) == NULL ) { fprintf(stderr, "\n fatal error in %s" "\n unable to open file %s\n", argv[0], argv[2]) ; return(-1) ; } n1 = atoi(argv[3]) ; n2 = atoi(argv[4]) ; n3 = atoi(argv[5]) ; maxzeros = atoi(argv[6]) ; maxsize = atoi(argv[7]) ; outETreeFileName = argv[8] ; fprintf(msgFile, "\n %s " "\n msglvl -- %d" "\n msgFile -- %s" "\n n1 -- %d" "\n n2 -- %d" "\n n3 -- %d" "\n maxzeros -- %d" "\n maxsize -- %d" "\n outFile -- %s" "\n", argv[0], msglvl, argv[2], n1, n2, n3, maxzeros, maxsize, outETreeFileName) ; fflush(msgFile) ; /* ---------------------------- create the grid graph object ---------------------------- */ if ( n1 == 1 ) { egraph = EGraph_make9P(n2, n3, 1) ; } else if ( n2 == 1 ) { egraph = EGraph_make9P(n1, n3, 1) ; } else if ( n3 == 1 ) { egraph = EGraph_make9P(n1, n2, 1) ; } else { egraph = EGraph_make27P(n1, n2, n3, 1) ; } if ( msglvl > 2 ) { fprintf(msgFile, "\n\n %d x %d x %d grid EGraph", n1, n2, n3) ; EGraph_writeForHumanEye(egraph, msgFile) ; fflush(msgFile) ; } graph = EGraph_mkAdjGraph(egraph) ; if ( msglvl > 2 ) { fprintf(msgFile, "\n\n %d x %d x %d grid Graph", n1, n2, n3) ; Graph_writeForHumanEye(graph, msgFile) ; fflush(msgFile) ; } /* ---------------------------------- get the nested dissection ordering ---------------------------------- */ nvtx = n1*n2*n3 ; newToOld = IVinit(nvtx, -1) ; oldToNew = IVinit(nvtx, -1) ; mkNDperm(n1, n2, n3, newToOld, 0, n1-1, 0, n2-1, 0, n3-1) ; for ( v = 0 ; v < nvtx ; v++ ) { oldToNew[newToOld[v]] = v ; } if ( msglvl > 2 ) { fprintf(msgFile, "\n\n %d x %d x %d nd ordering", n1, n2, n3) ; IVfprintf(msgFile, nvtx, oldToNew) ; fflush(msgFile) ; } /* ------------------------------------------ create the vertex elimination ETree object ------------------------------------------ */ etree0 = ETree_new() ; ETree_initFromGraphWithPerms(etree0, graph, newToOld, oldToNew) ; nfronts[0] = ETree_nfront(etree0) ; nfind[0] = ETree_nFactorIndices(etree0) ; nzf[0] = ETree_nFactorEntries(etree0, SPOOLES_SYMMETRIC) ; ops[0] = ETree_nFactorOps(etree0, SPOOLES_REAL, SPOOLES_SYMMETRIC) ; fprintf(msgFile, "\n vtx tree : %8d fronts, %8d indices, %8d |L|, %12.0f ops", nfronts[0], nfind[0], nzf[0], ops[0]) ; if ( msglvl > 2 ) { fprintf(msgFile, "\n\n vertex elimination tree") ; ETree_writeForHumanEye(etree0, msgFile) ; fflush(msgFile) ; } /* --------------------------------------------- create the fundamental supernode ETree object --------------------------------------------- */ nzerosIV = IV_new() ; IV_init(nzerosIV, nvtx, NULL) ; IV_fill(nzerosIV, 0) ; etree1 = ETree_mergeFrontsOne(etree0, 0, nzerosIV) ; nfronts[1] = ETree_nfront(etree1) ; nfind[1] = ETree_nFactorIndices(etree1) ; nzf[1] = ETree_nFactorEntries(etree1, SPOOLES_SYMMETRIC) ; ops[1] = ETree_nFactorOps(etree1, SPOOLES_REAL, SPOOLES_SYMMETRIC) ; fprintf(msgFile, "\n fs tree : %8d fronts, %8d indices, %8d |L|, %12.0f ops", nfronts[1], nfind[1], nzf[1], ops[1]) ; if ( msglvl > 2 ) { fprintf(msgFile, "\n\n fundamental supernode front tree") ; ETree_writeForHumanEye(etree1, msgFile) ; fprintf(msgFile, "\n\n nzerosIV") ; IV_writeForHumanEye(nzerosIV, msgFile) ; fflush(msgFile) ; } /* --------------------------- try to absorb only children --------------------------- */ etree2 = ETree_mergeFrontsOne(etree1, maxzeros, nzerosIV) ; nfronts[2] = ETree_nfront(etree2) ; nfind[2] = ETree_nFactorIndices(etree2) ; nzf[2] = ETree_nFactorEntries(etree2, SPOOLES_SYMMETRIC) ; ops[2] = ETree_nFactorOps(etree2, SPOOLES_REAL, SPOOLES_SYMMETRIC) ; fprintf(msgFile, "\n merge one : %8d fronts, %8d indices, %8d |L|, %12.0f ops", nfronts[2], nfind[2], nzf[2], ops[2]) ; if ( msglvl > 2 ) { fprintf(msgFile, "\n\n front tree after mergeOne") ; ETree_writeForHumanEye(etree2, msgFile) ; fprintf(msgFile, "\n\n nzerosIV") ; IV_writeForHumanEye(nzerosIV, msgFile) ; fflush(msgFile) ; } /* -------------------------- try to absorb all children -------------------------- */ etree3 = ETree_mergeFrontsAll(etree2, maxzeros, nzerosIV) ; nfronts[3] = ETree_nfront(etree3) ; nfind[3] = ETree_nFactorIndices(etree3) ; nzf[3] = ETree_nFactorEntries(etree3, SPOOLES_SYMMETRIC) ; ops[3] = ETree_nFactorOps(etree3, SPOOLES_REAL, SPOOLES_SYMMETRIC) ; fprintf(msgFile, "\n merge all : %8d fronts, %8d indices, %8d |L|, %12.0f ops", nfronts[3], nfind[3], nzf[3], ops[3]) ; if ( msglvl > 2 ) { fprintf(msgFile, "\n\n front tree after mergeAll") ; ETree_writeForHumanEye(etree3, msgFile) ; fprintf(msgFile, "\n\n nzerosIV") ; IV_writeForHumanEye(nzerosIV, msgFile) ; fflush(msgFile) ; } /* -------------------------------- try to absorb any other children -------------------------------- */ etree4 = etree3 ; /* etree4 = ETree_mergeFrontsAny(etree3, maxzeros, nzerosIV) ; nfronts[4] = ETree_nfront(etree4) ; nfind[4] = ETree_nFactorIndices(etree4) ; nzf[4] = ETree_nFactorEntries(etree4, SPOOLES_SYMMETRIC) ; ops[4] = ETree_nFactorOps(etree4, SPOOLES_REAL, SPOOLES_SYMMETRIC) ; fprintf(msgFile, "\n merge any : %8d fronts, %8d indices, %8d |L|, %12.0f ops", nfronts[4], nfind[4], nzf[4], ops[4]) ; if ( msglvl > 2 ) { fprintf(msgFile, "\n\n front tree after mergeAny") ; ETree_writeForHumanEye(etree3, msgFile) ; fprintf(msgFile, "\n\n nzerosIV") ; IV_writeForHumanEye(nzerosIV, msgFile) ; fflush(msgFile) ; } */ /* -------------------- split the front tree -------------------- */ etree5 = ETree_splitFronts(etree4, NULL, maxsize, 0) ; nfronts[5] = ETree_nfront(etree5) ; nfind[5] = ETree_nFactorIndices(etree5) ; nzf[5] = ETree_nFactorEntries(etree5, SPOOLES_SYMMETRIC) ; ops[5] = ETree_nFactorOps(etree5, SPOOLES_REAL, SPOOLES_SYMMETRIC) ; fprintf(msgFile, "\n split : %8d fronts, %8d indices, %8d |L|, %12.0f ops", nfronts[5], nfind[5], nzf[5], ops[5]) ; if ( msglvl > 2 ) { fprintf(msgFile, "\n\n front tree after split") ; ETree_writeForHumanEye(etree4, msgFile) ; fflush(msgFile) ; } fprintf(msgFile, "\n\n complex symmetric ops %.0f", ETree_nFactorOps(etree5, SPOOLES_COMPLEX, SPOOLES_SYMMETRIC)) ; /* -------------------------- write out the ETree object -------------------------- */ if ( strcmp(outETreeFileName, "none") != 0 ) { MARKTIME(t1) ; rc = ETree_writeToFile(etree5, outETreeFileName) ; MARKTIME(t2) ; fprintf(msgFile, "\n CPU %9.5f : write etree to file %s", t2 - t1, outETreeFileName) ; if ( rc != 1 ) { fprintf(msgFile, "\n return value %d from ETree_writeToFile(%p,%s)", rc, etree5, outETreeFileName) ; } } /* ---------------- free the objects ---------------- */ ETree_free(etree0) ; ETree_free(etree1) ; ETree_free(etree2) ; ETree_free(etree3) ; /* ETree_free(etree4) ; */ ETree_free(etree5) ; EGraph_free(egraph) ; Graph_free(graph) ; IVfree(newToOld) ; IVfree(oldToNew) ; IV_free(nzerosIV) ; fprintf(msgFile, "\n") ; fclose(msgFile) ; return(1) ; }
/* ------------------------------------------------------------ purpose -- compute a QR factorization using multiple threads created -- 98may29, cca ------------------------------------------------------------ */ void FrontMtx_MT_QR_factor ( FrontMtx *frontmtx, InpMtx *mtxA, ChvManager *chvmanager, IV *ownersIV, double cpus[], double *pfacops, int msglvl, FILE *msgFile ) { ChvList *updlist ; double t0, t1 ; IVL *rowsIVL ; int ithread, myid, nthread, rc ; int *firstnz ; QR_factorData *data, *dataObjects ; /* --------------- check the input --------------- */ if ( frontmtx == NULL || mtxA == NULL || chvmanager == NULL || ownersIV == NULL || cpus == NULL || pfacops == NULL || (msglvl > 0 && msgFile == NULL) ) { fprintf(stderr, "\n fatal error in FrontMtx_MT_QR_factor()" "\n bad input\n") ; exit(-1) ; } nthread = 1 + IV_max(ownersIV) ; /* ---------------------------------------------------------------- create the update Chv list object create the rowsIVL object, where list(J) = list of rows that are assembled in front J firstnz[irowA] = first column with nonzero element in A(irowA,*) ---------------------------------------------------------------- */ MARKTIME(t0) ; updlist = FrontMtx_postList(frontmtx, ownersIV, LOCK_IN_PROCESS) ; FrontMtx_QR_setup(frontmtx, mtxA, &rowsIVL, &firstnz, msglvl, msgFile) ; MARKTIME(t1) ; cpus[0] = t1 - t0 ; /* ------------------------------------ create and load nthread data objects ------------------------------------ */ ALLOCATE(dataObjects, struct _QR_factorData, nthread) ; for ( myid = 0, data = dataObjects ; myid < nthread ; myid++, data++ ) { data->mtxA = mtxA ; data->rowsIVL = rowsIVL ; data->firstnz = firstnz ; data->ownersIV = ownersIV ; data->frontmtx = frontmtx ; data->chvmanager = chvmanager ; data->updlist = updlist ; data->myid = myid ; DVzero(7, data->cpus) ; data->facops = 0.0 ; data->msglvl = msglvl ; if ( msglvl > 0 ) { char buffer[20] ; sprintf(buffer, "res.%d", myid) ; if ( (data->msgFile = fopen(buffer, "w")) == NULL ) { fprintf(stderr, "\n fatal error in FrontMtx_MT_QR_factor()" "\n unable to open file %s", buffer) ; exit(-1) ; } } else { data->msgFile = NULL ; } } #if THREAD_TYPE == TT_SOLARIS /* ---------------------------------- Solaris threads. (1) set the concurrency (2) create nthread - 1 new threads (3) execute own thread (4) join the threads ---------------------------------- */ thr_setconcurrency(nthread) ; for ( myid = 0, data = dataObjects ; myid < nthread - 1 ; myid++, data++ ) { rc = thr_create(NULL, 0, FrontMtx_QR_workerFactor, data, 0, NULL) ; if ( rc != 0 ) { fprintf(stderr, "\n fatal error, myid = %d, rc = %d from thr_create()", myid, rc) ; exit(-1) ; } } FrontMtx_QR_workerFactor(data) ; for ( myid = 0 ; myid < nthread - 1 ; myid++ ) { thr_join(0, 0, 0) ; } #endif #if THREAD_TYPE == TT_POSIX /* ---------------------------------- POSIX threads. (1) if SGI, set the concurrency (2) create nthread new threads (3) join the threads ---------------------------------- */ { pthread_t *tids ; pthread_attr_t attr ; void *status ; /* --------------------------------------------------------- #### NOTE: for SGI machines, this command must be present #### for the thread scheduling to be efficient. #### this is NOT a POSIX call, but necessary --------------------------------------------------------- pthread_setconcurrency(nthread) ; */ pthread_attr_init(&attr) ; /* pthread_attr_setscope(&attr, PTHREAD_SCOPE_SYSTEM) ; */ pthread_attr_setscope(&attr, PTHREAD_SCOPE_PROCESS) ; ALLOCATE(tids, pthread_t, nthread) ; for ( myid = 0 ; myid < nthread ; myid++ ) { #ifdef _MSC_VER tids[myid].p = 0; tids[myid].x = 0; #else tids[myid] = 0 ; #endif } for ( myid = 0, data = dataObjects ; myid < nthread ; myid++, data++ ) { rc = pthread_create(&tids[myid], &attr, FrontMtx_QR_workerFactor, data) ; if ( rc != 0 ) { fprintf(stderr, "\n fatal error in FrontMtx_MT_QR_factor()" "\n myid = %d, rc = %d from pthread_create()", myid, rc) ; exit(-1) ; } else if ( msglvl > 2 ) { fprintf(stderr, "\n thread %d created", myid) ; } } for ( myid = 0 ; myid < nthread ; myid++ ) { pthread_join(tids[myid], &status) ; } FREE(tids) ; pthread_attr_destroy(&attr) ; } #endif /* ---------------------------------------------- fill the cpu vector and factor operation count ---------------------------------------------- */ *pfacops = 0 ; for ( myid = 0, data = dataObjects ; myid < nthread ; myid++, data++ ) { if ( msglvl > 3 ) { fprintf(msgFile, "\n thread %d cpus", myid) ; DVfprintf(msgFile, 7, data->cpus) ; } for ( ithread = 0 ; ithread < 7 ; ithread++ ) { cpus[ithread] += data->cpus[ithread] ; } *pfacops += data->facops ; } /* ------------- free the data ------------- */ ChvList_free(updlist) ; IVL_free(rowsIVL) ; IVfree(firstnz) ; FREE(dataObjects) ; return ; }
/* ---------------------------------------------------- purpose -- worker method to factor the matrix created -- 98may29, cca ---------------------------------------------------- */ static void * FrontMtx_QR_workerFactor ( void *arg ) { char *status ; ChvList *updlist ; ChvManager *chvmanager ; double facops, t0, t1 ; double *cpus ; DV workDV ; FILE *msgFile ; FrontMtx *frontmtx ; Ideq *dequeue ; InpMtx *mtxA ; int J, K, myid, neqns, nfront, msglvl ; int *colmap, *firstnz, *nactiveChild, *owners, *par ; IVL *rowsIVL ; QR_factorData *data ; MARKTIME(t0) ; data = (QR_factorData *) arg ; mtxA = data->mtxA ; rowsIVL = data->rowsIVL ; firstnz = data->firstnz ; IV_sizeAndEntries(data->ownersIV, &nfront, &owners) ; frontmtx = data->frontmtx ; chvmanager = data->chvmanager ; updlist = data->updlist ; myid = data->myid ; cpus = data->cpus ; msglvl = data->msglvl ; msgFile = data->msgFile ; par = frontmtx->tree->par ; neqns = FrontMtx_neqns(frontmtx) ; /* -------------------------------------------------------- status[J] = 'F' --> J finished = 'W' --> J waiting to be finished create the Ideq object to handle the bottom-up traversal nactiveChild[K] = # of unfinished children of K, when zero, K can be placed on the dequeue -------------------------------------------------------- */ status = CVinit(nfront, 'F') ; dequeue = FrontMtx_setUpDequeue(frontmtx, owners, myid, status, NULL, 'W', 'F', msglvl, msgFile) ; FrontMtx_loadActiveLeaves(frontmtx, status, 'W', dequeue) ; nactiveChild = FrontMtx_nactiveChild(frontmtx, status, myid) ; colmap = IVinit(neqns, -1) ; DV_setDefaultFields(&workDV) ; facops = 0.0 ; if ( msglvl > 3 ) { fprintf(msgFile, "\n owners") ; IVfprintf(msgFile, nfront, owners) ; fprintf(msgFile, "\n Ideq") ; Ideq_writeForHumanEye(dequeue, msgFile) ; fflush(msgFile) ; } MARKTIME(t1) ; cpus[0] += t1 - t0 ; /* --------------------------- loop while a path is active --------------------------- */ while ( (J = Ideq_removeFromHead(dequeue)) != -1 ) { if ( msglvl > 1 ) { fprintf(msgFile, "\n\n ### checking out front %d, owner %d", J, owners[J]) ; } if ( owners[J] == myid ) { /* -------------------------------- front J is ready to be processed -------------------------------- */ FrontMtx_QR_factorVisit(frontmtx, J, mtxA, rowsIVL, firstnz, updlist, chvmanager, status, colmap, &workDV, cpus, &facops, msglvl, msgFile) ; if ( status[J] == 'F' ) { /* ------------------------------------------ front J is finished, put parent on dequeue if it exists or all children are finished ------------------------------------------ */ if ( (K = par[J]) != -1 && --nactiveChild[K] == 0 ) { Ideq_insertAtHead(dequeue, K) ; } } else { /* ----------------------------------------------- front J is not complete, put on tail of dequeue ----------------------------------------------- */ Ideq_insertAtTail(dequeue, J) ; } } else { /* ------------------------------------------- front J is not owned, put parent on dequeue if it exists and all children are finished ------------------------------------------- */ if ( (K = par[J]) != -1 && --nactiveChild[K] == 0 ) { Ideq_insertAtHead(dequeue, K) ; } } } data->facops = facops ; /* ------------------------ free the working storage ------------------------ */ CVfree(status) ; Ideq_free(dequeue) ; IVfree(nactiveChild) ; IVfree(colmap) ; DV_clearData(&workDV) ; MARKTIME(t1) ; cpus[6] = t1 - t0 ; cpus[5] = t1 - t0 - cpus[0] - cpus[1] - cpus[2] - cpus[3] - cpus[4] ; return(NULL) ; }
void main ( int argc, char *argv[] ) /* ---------------------------------------------------------- read in Harwell-Boeing matrices, use serial factor, solve, and multiply routines based on spooles, invoke eigensolver created -- 98mar31 jcp modified -- 98dec18, cca ---------------------------------------------------------- */ { Bridge bridge ; char *inFileName_A, *inFileName_B, *outFileName, *parmFileName, *type ; char buffer[20], pbtype[4], which[4] ; double lftend, rhtend, center, shfscl, t1, t2 ; double c__1 = 1.0, c__4 = 4.0, tolact = 2.309970868130169e-11 ; double eigval[1000], sigma[2]; double *evec; int error, fstevl, lfinit, lstevl, mxbksz, msglvl, ncol, ndiscd, neig, neigvl, nfound, nnonzeros, nrhs, nrow, prbtyp, rc, retc, rfinit, seed, warnng ; int c__5 = 5, output = 6 ; int *lanczos_wksp; InpMtx *inpmtxA, *inpmtxB ; FILE *msgFile, *parmFile; /*--------------------------------------------------------------------*/ if ( argc != 7 ) { fprintf(stdout, "\n\n usage : %s msglvl msgFile parmFile seed inFileA inFileB" "\n msglvl -- message level" "\n msgFile -- message file" "\n parmFile -- input parameters file" "\n seed -- random number seed, used for ordering" "\n inFileA -- stiffness matrix in Harwell-Boeing format" "\n inFileB -- mass matrix in Harwell-Boeing format" "\n used for prbtyp = 1 or 2" "\n", argv[0]) ; return ; } msglvl = atoi(argv[1]) ; if ( strcmp(argv[2], "stdout") == 0 ) { msgFile = stdout ; } else if ( (msgFile = fopen(argv[2], "a")) == NULL ) { fprintf(stderr, "\n fatal error in %s" "\n unable to open file %s\n", argv[0], argv[2]) ; exit(-1) ; } parmFileName = argv[3] ; seed = atoi(argv[4]) ; inFileName_A = argv[5] ; inFileName_B = argv[6] ; fprintf(msgFile, "\n %s " "\n msglvl -- %d" "\n msgFile -- %s" "\n parmFile -- %s" "\n seed -- %d" "\n stiffness file -- %s" "\n mass file -- %s" "\n", argv[0], msglvl, argv[2], parmFileName, seed, inFileName_A, inFileName_B) ; fflush(msgFile) ; /* --------------------------------------------- read in the Harwell-Boeing matrix information --------------------------------------------- */ if ( strcmp(inFileName_A, "none") == 0 ) { fprintf(msgFile, "\n no file to read from") ; exit(0) ; } MARKTIME(t1) ; readHB_info (inFileName_A, &nrow, &ncol, &nnonzeros, &type, &nrhs) ; MARKTIME(t2) ; fprintf(msgFile, "\n CPU %8.3f : read in header information for A", t2 - t1) ; /*--------------------------------------------------------------------*/ /* --------------------------------------------------------------- read in eigenvalue problem data neigvl -- # of desired eigenvalues which -- which eigenvalues to compute 'l' or 'L' lowest (smallest magnitude) 'h' or 'H' highest (largest magnitude) 'n' or 'N' nearest to central value 'c' or 'C' nearest to central value 'a' or 'A' all eigenvalues in interval pbtype -- type of problem 'v' or 'V' generalized symmetric problem (K,M) with M positive semidefinite (vibration problem) 'b' or 'B' generalized symmetric problem (K,K_s) with K positive semidefinite with K_s posibly indefinite (buckling problem) 'o' or 'O' ordinary symmetric eigenproblem lfinit -- if true, lftend is restriction on lower bound of eigenvalues. if false, no restriction on lower bound lftend -- left endpoint of interval rfinit -- if true, rhtend is restriction on upper bound of eigenvalues. if false, no restriction on upper bound rhtend -- right endpoint of interval center -- center of interval mxbksz -- upper bound on block size for Lanczos recurrence shfscl -- shift scaling parameter, an estimate on the magnitude of the smallest nonzero eigenvalues --------------------------------------------------------------- */ MARKTIME(t1) ; parmFile = fopen(parmFileName, "r"); fscanf(parmFile, "%d %s %s %d %le %d %le %le %d %le", &neigvl, which, pbtype, &lfinit, &lftend, &rfinit, &rhtend, ¢er, &mxbksz, &shfscl) ; fclose(parmFile); MARKTIME(t2) ; fprintf(msgFile, "\n CPU %8.3f : read in eigenvalue problem data", t2 - t1) ; /* ---------------------------------------- check and set the problem type parameter ---------------------------------------- */ switch ( pbtype[1] ) { case 'v' : case 'V' : prbtyp = 1 ; break ; case 'b' : case 'B' : prbtyp = 2 ; break ; case 'o' : case 'O' : prbtyp = 3 ; break ; default : fprintf(stderr, "\n invalid problem type %s", pbtype) ; exit(-1) ; } /* ---------------------------- Initialize Lanczos workspace ---------------------------- */ MARKTIME(t1) ; lanczos_init_ ( &lanczos_wksp ) ; MARKTIME(t2) ; fprintf(msgFile, "\n CPU %8.3f : initialize lanczos workspace", t2 - t1) ; /* ---------------------------------- initialize communication structure ---------------------------------- */ MARKTIME(t1) ; lanczos_set_parm( &lanczos_wksp, "order-of-problem", &nrow, &retc ); lanczos_set_parm( &lanczos_wksp, "accuracy-tolerance", &tolact, &retc ); lanczos_set_parm( &lanczos_wksp, "max-block-size", &mxbksz, &retc ); lanczos_set_parm( &lanczos_wksp, "shift-scale", &shfscl, &retc ); lanczos_set_parm( &lanczos_wksp, "message_level", &msglvl, &retc ); MARKTIME(t2) ; fprintf(msgFile, "\n CPU %8.3f : init lanczos communication structure", t2 - t1) ; /*--------------------------------------------------------------------*/ /* --------------------------------------------- create the InpMtx objects for matrix A and B --------------------------------------------- */ if ( strcmp(inFileName_A, "none") == 0 ) { fprintf(msgFile, "\n no file to read from") ; exit(0) ; } MARKTIME(t1) ; inpmtxA = InpMtx_new() ; InpMtx_readFromHBfile ( inpmtxA, inFileName_A ) ; MARKTIME(t2) ; fprintf(msgFile, "\n CPU %8.3f : read in A", t2 - t1) ; if ( msglvl > 2 ) { fprintf(msgFile, "\n\n InpMtx A object after loading") ; InpMtx_writeForHumanEye(inpmtxA, msgFile) ; fflush(msgFile) ; } MARKTIME(t1) ; lanczos_set_parm( &lanczos_wksp, "matrix-type", &c__1, &retc ); MARKTIME(t2) ; fprintf(msgFile, "\n CPU %8.3f : set A's parameters", t2 - t1) ; if ( prbtyp != 3 ) { if ( strcmp(inFileName_B, "none") == 0 ) { fprintf(msgFile, "\n no file to read from") ; exit(0) ; } MARKTIME(t1) ; inpmtxB = InpMtx_new() ; InpMtx_readFromHBfile ( inpmtxB, inFileName_B ) ; MARKTIME(t2) ; fprintf(msgFile, "\n CPU %8.3f : read in B", t2 - t1) ; } else { MARKTIME(t1) ; inpmtxB = NULL ; lanczos_set_parm( &lanczos_wksp, "matrix-type", &c__4, &retc ); MARKTIME(t2) ; fprintf(msgFile, "\n CPU %8.3f : set B's parameters", t2 - t1) ; } if ( msglvl > 2 && prbtyp != 3 ) { fprintf(msgFile, "\n\n InpMtx B object after loading") ; InpMtx_writeForHumanEye(inpmtxB, msgFile) ; fflush(msgFile) ; } /* ----------------------------- set up the solver environment ----------------------------- */ MARKTIME(t1) ; rc = Setup((void *) &bridge, &prbtyp, &nrow, &mxbksz, inpmtxA, inpmtxB, &seed, &msglvl, msgFile) ; MARKTIME(t2) ; fprintf(msgFile, "\n CPU %8.3f : set up solver environment", t2 - t1) ; if ( rc != 1 ) { fprintf(stderr, "\n fatal error %d from Setup()", rc) ; exit(-1) ; } /*--------------------------------------------------------------------*/ /* ----------------------------------------------- invoke eigensolver nfound -- # of eigenvalues found and kept ndisc -- # of additional eigenvalues discarded ----------------------------------------------- */ MARKTIME(t1) ; lanczos_run(&neigvl, &which[1] , &pbtype[1], &lfinit, &lftend, &rfinit, &rhtend, ¢er, &lanczos_wksp, &bridge, &nfound, &ndiscd, &warnng, &error, Factor, MatMul, Solve ) ; MARKTIME(t2) ; fprintf(msgFile, "\n CPU %8.3f : time for lanczos run", t2 - t1) ; /* ------------------------- get eigenvalues and print ------------------------- */ MARKTIME(t1) ; neig = nfound + ndiscd ; lstevl = nfound ; lanczos_eigenvalues (&lanczos_wksp, eigval, &neig, &retc); fstevl = 1 ; if ( nfound == 0 ) fstevl = -1 ; if ( ndiscd > 0 ) lstevl = -ndiscd ; hdslp5_ ("computed eigenvalues returned by hdserl", &neig, eigval, &output, 39L ) ; MARKTIME(t2) ; fprintf(msgFile, "\n CPU %8.3f : get and print eigenvalues ", t2 - t1) ; /* ------------------------- get eigenvectors and print ------------------------- */ /* MARKTIME(t1) ; neig = min ( 50, nrow ); Lncz_ALLOCATE(evec, double, nrow, retc); for ( i = 1 ; i <= nfound ; i++ ) { lanczos_eigenvector ( &lanczos_wksp, &i, &i, newToOld, evec, &nrow, &retc ) ; hdslp5_ ( "computed eigenvector returned by hdserc", &neig, evec, &output, 39L ) ; } MARKTIME(t2) ; fprintf(msgFile, "\n CPU %8.3f : get and print eigenvectors ", t2 - t1) ; */ /* ------------------------ free the working storage ------------------------ */ MARKTIME(t1) ; lanczos_free( &lanczos_wksp ) ; MARKTIME(t2) ; fprintf(msgFile, "\n CPU %8.3f : free lanczos workspace ", t2 - t1) ; MARKTIME(t1) ; rc = Cleanup(&bridge) ; MARKTIME(t2) ; fprintf(msgFile, "\n CPU %8.3f : free solver workspace ", t2 - t1) ; if ( rc != 1 ) { fprintf(stderr, "\n error return %d from Cleanup()", rc) ; exit(-1) ; } fprintf(msgFile, "\n") ; fclose(msgFile) ; return ; }
/* ------------------------------------------------------------------ purpose -- return an ETree object for a nested dissection ordering graph -- graph to order maxdomainsize -- used to control the incomplete nested dissection process. any subgraph whose weight is less than maxdomainsize is not split further. seed -- random number seed msglvl -- message level, 0 --> no output, 1 --> timings msgFile -- message file created -- 97nov06, cca ------------------------------------------------------------------ */ ETree * orderViaND ( Graph *graph, int maxdomainsize, int seed, int msglvl, FILE *msgFile ) { double t1, t2 ; DSTree *dstree ; ETree *etree ; int nvtx, Nvtx ; IV *eqmapIV, *stagesIV ; /* --------------- check the input --------------- */ if ( graph == NULL || maxdomainsize <= 0 || (msglvl > 0 && msgFile == NULL) ) { fprintf(stderr, "\n fatal error in orderViaND(%p,%d,%d,%d,%p)" "\n bad input\n", graph, maxdomainsize, seed, msglvl, msgFile) ; exit(-1) ; } /* ------------------------------ compress the graph if worth it ------------------------------ */ nvtx = graph->nvtx ; MARKTIME(t1) ; eqmapIV = Graph_equivMap(graph) ; MARKTIME(t2) ; if ( msglvl > 0 ) { fprintf(msgFile, "\n CPU %8.3f : get equivalence map", t2 - t1) ; fflush(msgFile) ; } Nvtx = 1 + IV_max(eqmapIV) ; if ( Nvtx <= COMPRESS_FRACTION * nvtx ) { MARKTIME(t1) ; graph = Graph_compress2(graph, eqmapIV, 1) ; MARKTIME(t2) ; if ( msglvl > 0 ) { fprintf(msgFile, "\n CPU %8.3f : compress graph", t2 - t1) ; fflush(msgFile) ; } } else { IV_free(eqmapIV) ; eqmapIV = NULL ; } MARKTIME(t1) ; IVL_sortUp(graph->adjIVL) ; MARKTIME(t2) ; if ( msglvl > 0 ) { fprintf(msgFile, "\n CPU %8.3f : sort adjacency", t2 - t1) ; fflush(msgFile) ; } /* ----------------------------- get the domain separator tree ----------------------------- */ { GPart *gpart ; DDsepInfo *info ; info = DDsepInfo_new() ; info->seed = seed ; info->maxcompweight = maxdomainsize ; info->alpha = 0.1 ; gpart = GPart_new() ; GPart_init(gpart, graph) ; GPart_setMessageInfo(gpart, msglvl, msgFile) ; dstree = GPart_RBviaDDsep(gpart, info) ; DSTree_renumberViaPostOT(dstree) ; if ( msglvl > 0 ) { DDsepInfo_writeCpuTimes(info, msgFile) ; } DDsepInfo_free(info) ; GPart_free(gpart) ; } /* --------------------- get the stages vector --------------------- */ stagesIV = DSTree_NDstages(dstree) ; DSTree_free(dstree) ; /* --------------------------------------------- order the vertices and extract the front tree --------------------------------------------- */ { MSMDinfo *info ; MSMD *msmd ; info = MSMDinfo_new() ; info->seed = seed ; info->compressFlag = 2 ; info->msglvl = msglvl ; info->msgFile = msgFile ; msmd = MSMD_new() ; MSMD_order(msmd, graph, IV_entries(stagesIV), info) ; etree = MSMD_frontETree(msmd) ; if ( msglvl > 0 ) { MSMDinfo_print(info, msgFile) ; } MSMDinfo_free(info) ; MSMD_free(msmd) ; IV_free(stagesIV) ; } /* ------------------------------------------------- expand the front tree if the graph was compressed ------------------------------------------------- */ if ( eqmapIV != NULL ) { ETree *etree2 = ETree_expand(etree, eqmapIV) ; ETree_free(etree) ; etree = etree2 ; Graph_free(graph) ; IV_free(eqmapIV) ; } else { MARKTIME(t1) ; IVL_sortUp(graph->adjIVL) ; MARKTIME(t2) ; if ( msglvl > 0 ) { fprintf(msgFile, "\n CPU %8.3f : sort adjacency", t2 - t1) ; fflush(msgFile) ; } } return(etree) ; }
/* --------------------------------------------------------------------- purpose -- to compute the factorization of A - sigma * B note: all variables in the calling sequence are references to allow call from fortran. input parameters data -- pointer to bridge data object psigma -- shift for the matrix pencil ppvttol -- pivot tolerance *ppvttol = 0.0 --> no pivoting used *ppvttol != 0.0 --> pivoting used, entries in factor are bounded above by 1/pvttol in magnitude output parameters *pinertia -- on return contains the number of negative eigenvalues *perror -- on return contains an error code 1 -- error found during factorization 0 -- normal return -1 -- psigma is NULL -2 -- ppvttol is NULL -3 -- data is NULL -4 -- pinertia is NULL created -- 98aug10, cca & jcp --------------------------------------------------------------------- */ void FactorMPI ( double *psigma, double *ppvttol, void *data, int *pinertia, int *perror ) { BridgeMPI *bridge = (BridgeMPI *) data ; Chv *rootchv ; ChvManager *chvmanager ; double droptol=0.0, tau ; double cpus[20] ; FILE *msgFile ; int recvtemp[3], sendtemp[3], stats[20] ; int msglvl, nnegative, nzero, npositive, pivotingflag, tag ; MPI_Comm comm ; int nproc ; #if MYDEBUG > 0 double t1, t2 ; count_Factor++ ; MARKTIME(t1) ; if ( bridge->myid == 0 ) { fprintf(stdout, "\n (%d) FactorMPI()", count_Factor) ; fflush(stdout) ; } #endif #if MYDEBUG > 1 fprintf(bridge->msgFile, "\n (%d) FactorMPI()", count_Factor) ; fflush(bridge->msgFile) ; #endif nproc = bridge->nproc ; /* --------------- check the input --------------- */ if ( psigma == NULL ) { fprintf(stderr, "\n error in FactorMPI()" "\n psigma is NULL\n") ; *perror = -1 ; return ; } if ( ppvttol == NULL ) { fprintf(stderr, "\n error in FactorMPI()" "\n ppvttol is NULL\n") ; *perror = -2 ; return ; } if ( data == NULL ) { fprintf(stderr, "\n error in FactorMPI()" "\n data is NULL\n") ; *perror = -3 ; return ; } if ( pinertia == NULL ) { fprintf(stderr, "\n error in FactorMPI()" "\n pinertia is NULL\n") ; *perror = -4 ; return ; } if ( perror == NULL ) { fprintf(stderr, "\n error in FactorMPI()" "\n perror is NULL\n") ; return ; } comm = bridge->comm ; msglvl = bridge->msglvl ; msgFile = bridge->msgFile ; /* ---------------------------------- set the shift in the pencil object ---------------------------------- */ bridge->pencil->sigma[0] = -(*psigma) ; bridge->pencil->sigma[1] = 0.0 ; /* ----------------------------------------- if the matrices are in local coordinates (i.e., this is the first factorization following a matrix-vector multiply) then map the matrix into global coordinates ----------------------------------------- */ if ( bridge->coordFlag == LOCAL ) { if ( bridge->prbtype == 1 ) { MatMul_setGlobalIndices(bridge->info, bridge->B) ; if ( msglvl > 2 ) { fprintf(msgFile, "\n\n matrix B in local coordinates") ; InpMtx_writeForHumanEye(bridge->B, msgFile) ; fflush(msgFile) ; } } if ( bridge->prbtype == 2 ) { MatMul_setGlobalIndices(bridge->info, bridge->A) ; if ( msglvl > 2 ) { fprintf(msgFile, "\n\n matrix A in local coordinates") ; InpMtx_writeForHumanEye(bridge->A, msgFile) ; fflush(msgFile) ; } } bridge->coordFlag = GLOBAL ; } /* ----------------------------------------------------- clear the front matrix and submatrix mananger objects ----------------------------------------------------- */ FrontMtx_clearData(bridge->frontmtx); SubMtxManager_clearData(bridge->mtxmanager); SolveMap_clearData(bridge->solvemap) ; if ( bridge->rowmapIV != NULL ) { IV_free(bridge->rowmapIV) ; bridge->rowmapIV = NULL ; } /* ----------------------------------------------------------- set the pivot tolerance. NOTE: spooles's "tau" parameter is a bound on the magnitude of the factor entries, and is the recipricol of that of the pivot tolerance of the lanczos code ----------------------------------------------------------- */ if ( *ppvttol == 0.0 ) { tau = 10.0 ; pivotingflag = SPOOLES_NO_PIVOTING ; } else { tau = (1.0)/(*ppvttol) ; pivotingflag = SPOOLES_PIVOTING ; } /* ---------------------------------- initialize the front matrix object ---------------------------------- */ FrontMtx_init(bridge->frontmtx, bridge->frontETree, bridge->symbfacIVL, SPOOLES_REAL, SPOOLES_SYMMETRIC, FRONTMTX_DENSE_FRONTS, pivotingflag, NO_LOCK, bridge->myid, bridge->ownersIV, bridge->mtxmanager, bridge->msglvl, bridge->msgFile) ; /* ------------------------- compute the factorization ------------------------- */ tag = 0 ; chvmanager = ChvManager_new() ; ChvManager_init(chvmanager, NO_LOCK, 0); IVfill(20, stats, 0) ; DVfill(20, cpus, 0.0) ; rootchv = FrontMtx_MPI_factorPencil(bridge->frontmtx, bridge->pencil, tau, droptol, chvmanager, bridge->ownersIV, 0, perror, cpus, stats, bridge->msglvl, bridge->msgFile, tag, comm) ; ChvManager_free(chvmanager); tag += 3*FrontMtx_nfront(bridge->frontmtx) + 2 ; if ( msglvl > 3 ) { fprintf(msgFile, "\n\n numeric factorization") ; FrontMtx_writeForHumanEye(bridge->frontmtx, bridge->msgFile) ; fflush(bridge->msgFile) ; } /* ---------------------------- if matrix is singular then set error flag and return ---------------------------- */ if ( rootchv != NULL ) { fprintf(msgFile, "\n WHOA NELLY!, matrix is singular") ; fflush(msgFile) ; *perror = 1 ; return ; } /* ------------------------------------------------------------------ post-process the factor matrix, convert from fronts to submatrices ------------------------------------------------------------------ */ FrontMtx_MPI_postProcess(bridge->frontmtx, bridge->ownersIV, stats, bridge->msglvl, bridge->msgFile, tag, comm); tag += 5*bridge->nproc ; /* ------------------- compute the inertia ------------------- */ FrontMtx_inertia(bridge->frontmtx, &nnegative, &nzero, &npositive) ; sendtemp[0] = nnegative ; sendtemp[1] = nzero ; sendtemp[2] = npositive ; if ( bridge->msglvl > 2 && bridge->msgFile != NULL ) { fprintf(bridge->msgFile, "\n local inertia = < %d, %d, %d >", nnegative, nzero, npositive) ; fflush(bridge->msgFile) ; } MPI_Allreduce((void *) sendtemp, (void *) recvtemp, 3, MPI_INT, MPI_SUM, comm) ; nnegative = recvtemp[0] ; nzero = recvtemp[1] ; npositive = recvtemp[2] ; if ( bridge->msglvl > 2 && bridge->msgFile != NULL ) { fprintf(bridge->msgFile, "\n global inertia = < %d, %d, %d >", nnegative, nzero, npositive) ; fflush(bridge->msgFile) ; } *pinertia = nnegative; /* --------------------------- create the solve map object --------------------------- */ SolveMap_ddMap(bridge->solvemap, SPOOLES_REAL, FrontMtx_upperBlockIVL(bridge->frontmtx), FrontMtx_lowerBlockIVL(bridge->frontmtx), nproc, bridge->ownersIV, FrontMtx_frontTree(bridge->frontmtx), bridge->seed, bridge->msglvl, bridge->msgFile) ; /* ------------------------------- redistribute the front matrices ------------------------------- */ FrontMtx_MPI_split(bridge->frontmtx, bridge->solvemap, stats, bridge->msglvl, bridge->msgFile, tag, comm) ; if ( *ppvttol != 0.0 ) { /* ------------------------------------------------------------- pivoting for stability may have taken place. create rowmapIV, the map from rows in the factorization to processes. ------------------------------------------------------------- */ bridge->rowmapIV = FrontMtx_MPI_rowmapIV(bridge->frontmtx, bridge->ownersIV, bridge->msglvl, bridge->msgFile, bridge->comm) ; if ( bridge->msglvl > 2 && bridge->msgFile != NULL ) { fprintf(bridge->msgFile, "\n\n bridge->rowmapIV") ; IV_writeForHumanEye(bridge->rowmapIV, bridge->msgFile) ; fflush(bridge->msgFile) ; } } else { bridge->rowmapIV = NULL ; } /* ------------------------------------------------------------------ set the error. (this is simple since when the spooles codes detect a fatal error, they print out a message to stderr and exit.) ------------------------------------------------------------------ */ *perror = 0 ; #if MYDEBUG > 0 MARKTIME(t2) ; time_Factor += t2 - t1 ; if ( bridge->myid == 0 ) { fprintf(stdout, ", %8.3f seconds, %8.3f total time", t2 - t1, time_Factor) ; fflush(stdout) ; } #endif #if MYDEBUG > 1 fprintf(bridge->msgFile, ", %8.3f seconds, %8.3f total time", t2 - t1, time_Factor) ; fflush(bridge->msgFile) ; #endif return; }
/* ---------------------------------------------------------- purpose -- to construct the map from fronts to processors, and compute operations for each processor. maptype -- type of map for parallel factorization maptype = 1 --> wrap map maptype = 2 --> balanced map maptype = 3 --> subtree-subset map maptype = 4 --> domain decomposition map cutoff -- used when maptype = 4 as upper bound on relative domain size return value -- 1 -- success -1 -- bridge is NULL -2 -- front tree is NULL created -- 98sep25, cca ---------------------------------------------------------- */ int BridgeMPI_factorSetup ( BridgeMPI *bridge, int maptype, double cutoff ) { double t1, t2 ; DV *cumopsDV ; ETree *frontETree ; FILE *msgFile ; int msglvl, nproc ; /* --------------- check the input --------------- */ MARKTIME(t1) ; if ( bridge == NULL ) { fprintf(stderr, "\n error in BridgeMPI_factorSetup()" "\n bridge is NULL") ; return(-1) ; } if ( (frontETree = bridge->frontETree) == NULL ) { fprintf(stderr, "\n error in BridgeMPI_factorSetup()" "\n frontETree is NULL") ; return(-2) ; } nproc = bridge->nproc ; msglvl = bridge->msglvl ; msgFile = bridge->msgFile ; /* ------------------------------------------- allocate and initialize the cumopsDV object ------------------------------------------- */ if ( (cumopsDV = bridge->cumopsDV) == NULL ) { cumopsDV = bridge->cumopsDV = DV_new() ; } DV_setSize(cumopsDV, nproc) ; DV_zero(cumopsDV) ; /* ---------------------------- create the owners map object ---------------------------- */ switch ( maptype ) { case 1 : bridge->ownersIV = ETree_wrapMap(frontETree, bridge->type, bridge->symmetryflag, cumopsDV) ; break ; case 2 : bridge->ownersIV = ETree_balancedMap(frontETree, bridge->type, bridge->symmetryflag, cumopsDV) ; break ; case 3 : bridge->ownersIV = ETree_subtreeSubsetMap(frontETree, bridge->type, bridge->symmetryflag, cumopsDV) ; break ; case 4 : bridge->ownersIV = ETree_ddMap(frontETree, bridge->type, bridge->symmetryflag, cumopsDV, cutoff) ; break ; default : bridge->ownersIV = ETree_ddMap(frontETree, bridge->type, bridge->symmetryflag, cumopsDV, 1./(2*nproc)) ; break ; } MARKTIME(t2) ; bridge->cpus[7] = t2 - t1 ; if ( msglvl > 1 ) { fprintf(msgFile, "\n\n parallel factor setup") ; fprintf(msgFile, "\n type = %d, symmetryflag = %d", bridge->type, bridge->symmetryflag) ; fprintf(msgFile, "\n total factor operations = %.0f", DV_sum(cumopsDV)) ; fprintf(msgFile, "\n upper bound on speedup due to load balance = %.2f", DV_max(cumopsDV)/DV_sum(cumopsDV)) ; fprintf(msgFile, "\n operations distributions over threads") ; DV_writeForHumanEye(cumopsDV, msgFile) ; fflush(msgFile) ; } if ( msglvl > 2 ) { fprintf(msgFile, "\n\n owners map IV object") ; IV_writeForHumanEye(bridge->ownersIV, msgFile) ; fflush(msgFile) ; } /* ---------------------------- create the vertex map object ---------------------------- */ bridge->vtxmapIV = IV_new() ; IV_init(bridge->vtxmapIV, bridge->neqns, NULL) ; IVgather(bridge->neqns, IV_entries(bridge->vtxmapIV), IV_entries(bridge->ownersIV), ETree_vtxToFront(bridge->frontETree)) ; if ( msglvl > 2 ) { fprintf(msgFile, "\n\n vertex map IV object") ; IV_writeForHumanEye(bridge->vtxmapIV, msgFile) ; fflush(msgFile) ; } return(1) ; }
int zpcgr ( int n_matrixSize, int type, int symmetryflag, InpMtx *mtxA, FrontMtx *Precond, DenseMtx *mtxX, DenseMtx *mtxB, int itermax, double convergetol, int msglvl, FILE *msgFile ) { Chv *chv, *rootchv ; ChvManager *chvmanager ; DenseMtx *mtxZ ; DenseMtx *vecP, *vecR, *vecQ ; DenseMtx *vecX, *vecZ ; double Alpha[2], Beta[2], Rho[2], Rho0[2], Rtmp[2]; double Init_norm, ratio, Res_norm ; double t1, t2, cpus[9] ; double one[2] = {1.0, 0.0}, zero[2] = {0.0, 0.0} ; double Tiny[2] = {0.1e-28, 0.0}; int Iter, neqns; int stats[6] ; if (symmetryflag != SPOOLES_HERMITIAN){ fprintf(msgFile, "\n\n Fatal Error, \n" " Matrix is not Hermitian in ZPCGR !!") ; spoolesFatal(); }; neqns = n_matrixSize; /* -------------------- init the vectors in ZPCGR -------------------- */ vecP = DenseMtx_new() ; DenseMtx_init(vecP, type, 0, 0, neqns, 1, 1, neqns) ; vecR = DenseMtx_new() ; DenseMtx_init(vecR, type, 0, 0, neqns, 1, 1, neqns) ; vecX = DenseMtx_new() ; DenseMtx_init(vecX, type, 0, 0, neqns, 1, 1, neqns) ; vecQ = DenseMtx_new() ; DenseMtx_init(vecQ, type, 0, 0, neqns, 1, 1, neqns) ; vecZ = DenseMtx_new() ; DenseMtx_init(vecZ, type, 0, 0, neqns, 1, 1, neqns) ; /* -------------------------- Initialize the iterations -------------------------- */ Init_norm = DenseMtx_twoNormOfColumn (mtxB, 0); if ( Init_norm == 0.0 ){ Init_norm = 1.0; }; ratio = 1.0; DenseMtx_zero(vecX) ; DenseMtx_colCopy (vecR, 0, mtxB, 0); MARKTIME(t1) ; Iter = 0; /* ------------------------------ Main Loop of the iterations ------------------------------ */ while ( ratio > convergetol && Iter <= itermax ) { Iter++; /* */ FrontMtx_solve(Precond, vecZ, vecR, Precond->manager, cpus, msglvl, msgFile) ; DenseMtx_colDotProduct(vecR, 0, vecZ, 0, Rho); if ( Rho[0] == 0.0 & Rho[1] == 0.0){ fprintf(stderr, "\n breakdown in ZPCGR !! " "\n Fatal error \n"); spoolesFatal(); } /* */ if ( Iter == 1 ) { DenseMtx_colCopy (vecP, 0, vecZ, 0); } else { zdiv(Rho, Rho0, Beta); DenseMtx_colGenAxpy (Beta, vecP, 0, one, vecZ, 0); }; InpMtx_herm_gmmm(mtxA, zero, vecQ, one, vecP) ; DenseMtx_colDotProduct (vecP, 0, vecQ,0, Rtmp); zdiv(Rho, Rtmp, Alpha); DenseMtx_colGenAxpy (one, vecX, 0, Alpha, vecP, 0); Rtmp[0] = -Alpha[0]; Rtmp[1] = -Alpha[1]; DenseMtx_colGenAxpy (one, vecR, 0, Rtmp, vecQ, 0); Rho0[0] = Rho[0]; Rho0[1] = Rho[1]; /* */ Res_norm = DenseMtx_twoNormOfColumn (vecR, 0); ratio = Res_norm/Init_norm; fprintf(msgFile, "\n\n At iteration %d" " the convergence ratio is %12.4e", Iter, ratio) ; } /* End of while loop */ MARKTIME(t2) ; fprintf(msgFile, "\n CPU : Converges in time: %8.3f ", t2 - t1) ; fprintf(msgFile, "\n # iterations = %d", Iter) ; fprintf(msgFile, "\n\n after ZPCGR") ; DenseMtx_colCopy (mtxX, 0, vecX, 0); /* ------------------------ free the working storage ------------------------ */ DenseMtx_free(vecP) ; DenseMtx_free(vecR) ; DenseMtx_free(vecX) ; DenseMtx_free(vecQ) ; DenseMtx_free(vecZ) ; fprintf(msgFile, "\n") ; return(1) ; }
/*--------------------------------------------------------------------*/ int main ( int argc, char *argv[] ) /* --------------------------------------------------- test the QR factor method for a FrontMtx object on an n1 x n2 x n3 grid (1) generate an overdetermined system AX = B (2) factor the matrix (3) solve the systems created -- 97apr11, dkw modified -- 98may28, cca --------------------------------------------------- */ { ChvManager *chvmanager ; DenseMtx *mtxB, *mtxX, *mtxZ ; double cputotal, factorops ; double cpus[9] ; double nops, t1, t2 ; ETree *frontETree ; FILE *msgFile ; FrontMtx *frontmtx ; InpMtx *mtxA ; int msglvl, neqns, nrhs, n1, n2, n3, seed, type ; IVL *symbfacIVL ; SubMtxManager *mtxmanager ; if ( argc != 9 ) { fprintf(stdout, "\n\n usage : %s msglvl msgFile n1 n2 n3 seed nrhs " "\n msglvl -- message level" "\n msgFile -- message file" "\n n1 -- # of points in the first direction" "\n n2 -- # of points in the second direction" "\n n3 -- # of points in the third direction" "\n seed -- random number seed" "\n nrhs -- # of right hand sides" "\n type -- type of linear system" "\n 1 -- real" "\n 2 -- complex" "\n", argv[0]) ; return(0) ; } msglvl = atoi(argv[1]) ; if ( strcmp(argv[2], "stdout") == 0 ) { msgFile = stdout ; } else if ( (msgFile = fopen(argv[2], "a")) == NULL ) { fprintf(stderr, "\n fatal error in %s" "\n unable to open file %s\n", argv[0], argv[2]) ; return(-1) ; } n1 = atoi(argv[3]) ; n2 = atoi(argv[4]) ; n3 = atoi(argv[5]) ; seed = atoi(argv[6]) ; nrhs = atoi(argv[7]) ; type = atoi(argv[8]) ; fprintf(msgFile, "\n %s " "\n msglvl -- %d" "\n msgFile -- %s" "\n n1 -- %d" "\n n2 -- %d" "\n n3 -- %d" "\n seed -- %d" "\n nrhs -- %d" "\n type -- %d" "\n", argv[0], msglvl, argv[2], n1, n2, n3, seed, nrhs, type) ; fflush(msgFile) ; neqns = n1*n2*n3 ; if ( type != SPOOLES_REAL && type != SPOOLES_COMPLEX ) { fprintf(stderr, "\n fatal error, type must be real or complex") ; exit(-1) ; } /* ------------------------------------------ generate the A X = B overdetermined system ------------------------------------------ */ mkNDlinsysQR(n1, n2, n3, type, nrhs, seed, msglvl, msgFile, &frontETree, &symbfacIVL, &mtxA, &mtxX, &mtxB) ; /* ------------------------------ initialize the FrontMtx object ------------------------------ */ MARKTIME(t1) ; mtxmanager = SubMtxManager_new() ; SubMtxManager_init(mtxmanager, NO_LOCK, 0) ; frontmtx = FrontMtx_new() ; if ( type == SPOOLES_REAL ) { FrontMtx_init(frontmtx, frontETree, symbfacIVL, type, SPOOLES_SYMMETRIC, FRONTMTX_DENSE_FRONTS, SPOOLES_NO_PIVOTING, NO_LOCK, 0, NULL, mtxmanager, msglvl, msgFile) ; } else if ( type == SPOOLES_COMPLEX ) { FrontMtx_init(frontmtx, frontETree, symbfacIVL, type, SPOOLES_HERMITIAN, FRONTMTX_DENSE_FRONTS, SPOOLES_NO_PIVOTING, NO_LOCK, 0, NULL, mtxmanager, msglvl, msgFile) ; } MARKTIME(t2) ; fprintf(msgFile, "\n CPU %8.3f : FrontMtx initialized", t2 - t1) ; fflush(msgFile) ; /* ----------------- factor the matrix ----------------- */ DVzero(6, cpus) ; chvmanager = ChvManager_new() ; ChvManager_init(chvmanager, NO_LOCK, 0) ; factorops = 0.0 ; MARKTIME(t1) ; FrontMtx_QR_factor(frontmtx, mtxA, chvmanager, cpus, &factorops, msglvl, msgFile) ; MARKTIME(t2) ; fprintf(msgFile, "\n after QR_factor() call, facops = %8.2f",factorops) ; fprintf(msgFile, "\n CPU %8.3f : FrontMtx_QR_factor, %8.3f mflops", t2 - t1, 1.e-6*factorops/(t2-t1)) ; cputotal = t2 - t1 ; if ( cputotal > 0.0 ) { fprintf(msgFile, "\n" "\n setup factorization %8.3f %6.2f" "\n setup fronts %8.3f %6.2f" "\n factor fronts %8.3f %6.2f" "\n store factor %8.3f %6.2f" "\n store update %8.3f %6.2f" "\n miscellaneous %8.3f %6.2f" "\n total time %8.3f", cpus[0], 100.*cpus[0]/cputotal, cpus[1], 100.*cpus[1]/cputotal, cpus[2], 100.*cpus[2]/cputotal, cpus[3], 100.*cpus[3]/cputotal, cpus[4], 100.*cpus[4]/cputotal, cpus[5], 100.*cpus[5]/cputotal, cputotal) ; } /* ------------------------------ post-process the factor matrix ------------------------------ */ MARKTIME(t1) ; FrontMtx_postProcess(frontmtx, msglvl, msgFile) ; MARKTIME(t2) ; fprintf(msgFile, "\n CPU %8.3f : post-process the matrix", t2 - t1) ; if ( msglvl > 2 ) { fprintf(msgFile, "\n\n front factor matrix after post-processing") ; FrontMtx_writeForHumanEye(frontmtx, msgFile) ; } fprintf(msgFile, "\n\n after post-processing") ; SubMtxManager_writeForHumanEye(frontmtx->manager, msgFile) ; /* ---------------- solve the system ---------------- */ mtxZ = DenseMtx_new() ; DenseMtx_init(mtxZ, type, 0, 0, neqns, nrhs, 1, neqns) ; DenseMtx_zero(mtxZ) ; if ( type == SPOOLES_REAL ) { nops = frontmtx->nentD + 2*frontmtx->nentU ; if ( FRONTMTX_IS_NONSYMMETRIC(frontmtx) ) { nops += 2*frontmtx->nentL ; } else { nops += 2*frontmtx->nentU ; } } else if ( type == SPOOLES_COMPLEX ) { nops = 8*frontmtx->nentD + 8*frontmtx->nentU ; if ( FRONTMTX_IS_NONSYMMETRIC(frontmtx) ) { nops += 8*frontmtx->nentL ; } else { nops += 8*frontmtx->nentU ; } } nops *= nrhs ; if ( msglvl > 2 ) { fprintf(msgFile, "\n\n rhs") ; DenseMtx_writeForHumanEye(mtxB, msgFile) ; fflush(stdout) ; } DVzero(6, cpus) ; MARKTIME(t1) ; FrontMtx_QR_solve(frontmtx, mtxA, mtxZ, mtxB, mtxmanager, cpus, msglvl, msgFile) ; MARKTIME(t2) ; fprintf(msgFile, "\n CPU %8.3f : solve the system, %.3f mflops", t2 - t1, 1.e-6*nops/(t2 - t1)) ; cputotal = t2 - t1 ; if ( cputotal > 0.0 ) { fprintf(msgFile, "\n CPU %%" "\n A^TB matrix-matrix multiply %8.3f %6.2f" "\n set up solves %8.3f %6.2f" "\n load rhs and store solution %8.3f %6.2f" "\n forward solve %8.3f %6.2f" "\n diagonal solve %8.3f %6.2f" "\n backward solve %8.3f %6.2f" "\n total solve time %8.3f %6.2f" "\n total QR solve time %8.3f", cpus[6], 100.*cpus[6]/cputotal, cpus[0], 100.*cpus[0]/cputotal, cpus[1], 100.*cpus[1]/cputotal, cpus[2], 100.*cpus[2]/cputotal, cpus[3], 100.*cpus[3]/cputotal, cpus[4], 100.*cpus[4]/cputotal, cpus[5], 100.*cpus[5]/cputotal, cputotal) ; } if ( msglvl > 3 ) { fprintf(msgFile, "\n\n computed solution") ; DenseMtx_writeForHumanEye(mtxZ, msgFile) ; fflush(stdout) ; } /* ----------------- compute the error ----------------- */ DenseMtx_sub(mtxZ, mtxX) ; fprintf(msgFile, "\n\n maxabs error = %12.4e", DenseMtx_maxabs(mtxZ)) ; if ( msglvl > 2 ) { fprintf(msgFile, "\n\n error") ; DenseMtx_writeForHumanEye(mtxZ, msgFile) ; fflush(stdout) ; } fprintf(msgFile, "\n\n after solve") ; SubMtxManager_writeForHumanEye(frontmtx->manager, msgFile) ; /* ------------------------ free the working storage ------------------------ */ InpMtx_free(mtxA) ; DenseMtx_free(mtxX) ; DenseMtx_free(mtxZ) ; DenseMtx_free(mtxB) ; FrontMtx_free(frontmtx) ; IVL_free(symbfacIVL) ; ETree_free(frontETree) ; SubMtxManager_free(mtxmanager) ; ChvManager_free(chvmanager) ; fprintf(msgFile, "\n") ; fclose(msgFile) ; return(1) ; }
/*--------------------------------------------------------------------*/ int main ( int argc, char *argv[] ) /* -------------------------------------------------- test EGraph_readFromFile and EGraph_writeToFile, useful for translating between formatted *.egraphf and binary *.egraphb files. created -- 95nov03, cca -------------------------------------------------- */ { double t1, t2 ; int msglvl, rc ; EGraph egraph ; FILE *msgFile ; if ( argc != 5 ) { fprintf(stdout, "\n\n usage : %s msglvl msgFile inFile outFile" "\n msglvl -- message level" "\n msgFile -- message file" "\n inFile -- input file, must be *.egraphf or *.egraphb" "\n outFile -- output file, must be *.egraphf or *.egraphb" "\n", argv[0]) ; return(0) ; } msglvl = atoi(argv[1]) ; if ( strcmp(argv[2], "stdout") == 0 ) { msgFile = stdout ; } else if ( (msgFile = fopen(argv[2], "a")) == NULL ) { fprintf(stderr, "\n fatal error in %s" "\n unable to open file %s\n", argv[0], argv[2]) ; return(-1) ; } fprintf(msgFile, "\n %s " "\n msglvl -- %d" "\n msgFile -- %s" "\n inFile -- %s" "\n outFile -- %s" "\n", argv[0], msglvl, argv[2], argv[3], argv[4]) ; fflush(msgFile) ; /* ---------------------- set the default fields ---------------------- */ EGraph_setDefaultFields(&egraph) ; if ( msglvl > 2 ) { fprintf(msgFile, "\n\n after setting default fields") ; EGraph_writeForHumanEye(&egraph, msgFile) ; fflush(msgFile) ; } /* ------------------------- read in the EGraph object ------------------------- */ if ( strcmp(argv[3], "none") == 0 ) { fprintf(msgFile, "\n no file to read from") ; exit(0) ; } MARKTIME(t1) ; rc = EGraph_readFromFile(&egraph, argv[3]) ; MARKTIME(t2) ; fprintf(msgFile, "\n CPU %9.5f : read in egraph from file %s", t2 - t1, argv[3]) ; if ( rc != 1 ) { fprintf(msgFile, "\n return value %d from EGraph_readFromFile(%p,%s)", rc, &egraph, argv[3]) ; exit(-1) ; } if ( msglvl > 2 ) { fprintf(msgFile, "\n\n after reading EGraph object from file %s", argv[3]) ; EGraph_writeForHumanEye(&egraph, msgFile) ; fflush(msgFile) ; } /* --------------------------- write out the EGraph object --------------------------- */ if ( strcmp(argv[4], "none") != 0 ) { MARKTIME(t1) ; rc = EGraph_writeToFile(&egraph, argv[4]) ; MARKTIME(t2) ; fprintf(msgFile, "\n CPU %9.5f : write egraph to file %s", t2 - t1, argv[4]) ; } if ( rc != 1 ) { fprintf(msgFile, "\n return value %d from EGraph_writeToFile(%p,%s)", rc, &egraph, argv[4]) ; } fprintf(msgFile, "\n") ; fclose(msgFile) ; return(1) ; }
/*--------------------------------------------------------------------*/ int main ( int argc, char *argv[] ) /* ------------------------------------------------------------------ generate a random matrix and test a matrix-matrix multiply method. the output is a matlab file to test correctness. created -- 98jan29, cca -------------------------------------------------------------------- */ { DenseMtx *X, *Y, *Y2 ; double alpha[2] ; double alphaImag, alphaReal, t1, t2 ; double *zvec ; Drand *drand ; int col, dataType, ii, msglvl, ncolA, nitem, nops, nrhs, nrowA, nrowX, nrowY, nthread, row, seed, storageMode, symflag, transposeflag ; int *colids, *rowids ; InpMtx *A ; FILE *msgFile ; if ( argc != 15 ) { fprintf(stdout, "\n\n %% usage : %s msglvl msgFile symflag storageMode " "\n %% nrow ncol nent nrhs seed alphaReal alphaImag nthread" "\n %% msglvl -- message level" "\n %% msgFile -- message file" "\n %% dataType -- type of matrix entries" "\n %% 1 -- real" "\n %% 2 -- complex" "\n %% symflag -- symmetry flag" "\n %% 0 -- symmetric" "\n %% 1 -- hermitian" "\n %% 2 -- nonsymmetric" "\n %% storageMode -- storage mode" "\n %% 1 -- by rows" "\n %% 2 -- by columns" "\n %% 3 -- by chevrons, (requires nrow = ncol)" "\n %% transpose -- transpose flag" "\n %% 0 -- Y := Y + alpha * A * X" "\n %% 1 -- Y := Y + alpha * A^H * X, nonsymmetric only" "\n %% 2 -- Y := Y + alpha * A^T * X, nonsymmetric only" "\n %% nrowA -- number of rows in A" "\n %% ncolA -- number of columns in A" "\n %% nitem -- number of items" "\n %% nrhs -- number of right hand sides" "\n %% seed -- random number seed" "\n %% alphaReal -- y := y + alpha*A*x" "\n %% alphaImag -- y := y + alpha*A*x" "\n %% nthread -- # of threads" "\n", argv[0]) ; return(0) ; } msglvl = atoi(argv[1]) ; if ( strcmp(argv[2], "stdout") == 0 ) { msgFile = stdout ; } else if ( (msgFile = fopen(argv[2], "a")) == NULL ) { fprintf(stderr, "\n fatal error in %s" "\n unable to open file %s\n", argv[0], argv[2]) ; return(-1) ; } dataType = atoi(argv[3]) ; symflag = atoi(argv[4]) ; storageMode = atoi(argv[5]) ; transposeflag = atoi(argv[6]) ; nrowA = atoi(argv[7]) ; ncolA = atoi(argv[8]) ; nitem = atoi(argv[9]) ; nrhs = atoi(argv[10]) ; seed = atoi(argv[11]) ; alphaReal = atof(argv[12]) ; alphaImag = atof(argv[13]) ; nthread = atoi(argv[14]) ; fprintf(msgFile, "\n %% %s " "\n %% msglvl -- %d" "\n %% msgFile -- %s" "\n %% dataType -- %d" "\n %% symflag -- %d" "\n %% storageMode -- %d" "\n %% transposeflag -- %d" "\n %% nrowA -- %d" "\n %% ncolA -- %d" "\n %% nitem -- %d" "\n %% nrhs -- %d" "\n %% seed -- %d" "\n %% alphaReal -- %e" "\n %% alphaImag -- %e" "\n %% nthread -- %d" "\n", argv[0], msglvl, argv[2], dataType, symflag, storageMode, transposeflag, nrowA, ncolA, nitem, nrhs, seed, alphaReal, alphaImag, nthread) ; fflush(msgFile) ; if ( dataType != 1 && dataType != 2 ) { fprintf(stderr, "\n invalid value %d for dataType\n", dataType) ; spoolesFatal(); } if ( symflag != 0 && symflag != 1 && symflag != 2 ) { fprintf(stderr, "\n invalid value %d for symflag\n", symflag) ; spoolesFatal(); } if ( storageMode != 1 && storageMode != 2 && storageMode != 3 ) { fprintf(stderr, "\n invalid value %d for storageMode\n", storageMode) ; spoolesFatal(); } if ( transposeflag < 0 || transposeflag > 2 ) { fprintf(stderr, "\n error, transposeflag = %d, must be 0, 1 or 2", transposeflag) ; spoolesFatal(); } if ( (transposeflag == 1 && symflag != 2) || (transposeflag == 2 && symflag != 2) ) { fprintf(stderr, "\n error, transposeflag = %d, symflag = %d", transposeflag, symflag) ; spoolesFatal(); } if ( transposeflag == 1 && dataType != 2 ) { fprintf(stderr, "\n error, transposeflag = %d, dataType = %d", transposeflag, dataType) ; spoolesFatal(); } if ( symflag == 1 && dataType != 2 ) { fprintf(stderr, "\n symflag = 1 (hermitian), dataType != 2 (complex)") ; spoolesFatal(); } if ( nrowA <= 0 || ncolA <= 0 || nitem <= 0 ) { fprintf(stderr, "\n invalid value: nrow = %d, ncol = %d, nitem = %d", nrowA, ncolA, nitem) ; spoolesFatal(); } if ( symflag < 2 && nrowA != ncolA ) { fprintf(stderr, "\n invalid data: symflag = %d, nrow = %d, ncol = %d", symflag, nrowA, ncolA) ; spoolesFatal(); } alpha[0] = alphaReal ; alpha[1] = alphaImag ; /* ---------------------------- initialize the matrix object ---------------------------- */ A = InpMtx_new() ; InpMtx_init(A, storageMode, dataType, 0, 0) ; drand = Drand_new() ; /* ---------------------------------- generate a vector of nitem triples ---------------------------------- */ rowids = IVinit(nitem, -1) ; Drand_setUniform(drand, 0, nrowA) ; Drand_fillIvector(drand, nitem, rowids) ; colids = IVinit(nitem, -1) ; Drand_setUniform(drand, 0, ncolA) ; Drand_fillIvector(drand, nitem, colids) ; Drand_setUniform(drand, 0.0, 1.0) ; if ( INPMTX_IS_REAL_ENTRIES(A) ) { zvec = DVinit(nitem, 0.0) ; Drand_fillDvector(drand, nitem, zvec) ; } else if ( INPMTX_IS_COMPLEX_ENTRIES(A) ) { zvec = ZVinit(nitem, 0.0, 0.0) ; Drand_fillDvector(drand, 2*nitem, zvec) ; } /* ----------------------------------- assemble the entries entry by entry ----------------------------------- */ if ( msglvl > 1 ) { fprintf(msgFile, "\n\n A = zeros(%d,%d) ;", nrowA, ncolA) ; } if ( symflag == 1 ) { /* ---------------- hermitian matrix ---------------- */ for ( ii = 0 ; ii < nitem ; ii++ ) { if ( rowids[ii] == colids[ii] ) { zvec[2*ii+1] = 0.0 ; } if ( rowids[ii] <= colids[ii] ) { row = rowids[ii] ; col = colids[ii] ; } else { row = colids[ii] ; col = rowids[ii] ; } InpMtx_inputComplexEntry(A, row, col, zvec[2*ii], zvec[2*ii+1]) ; } } else if ( symflag == 0 ) { /* ---------------- symmetric matrix ---------------- */ if ( INPMTX_IS_REAL_ENTRIES(A) ) { for ( ii = 0 ; ii < nitem ; ii++ ) { if ( rowids[ii] <= colids[ii] ) { row = rowids[ii] ; col = colids[ii] ; } else { row = colids[ii] ; col = rowids[ii] ; } InpMtx_inputRealEntry(A, row, col, zvec[ii]) ; } } else if ( INPMTX_IS_COMPLEX_ENTRIES(A) ) { for ( ii = 0 ; ii < nitem ; ii++ ) { if ( rowids[ii] <= colids[ii] ) { row = rowids[ii] ; col = colids[ii] ; } else { row = colids[ii] ; col = rowids[ii] ; } InpMtx_inputComplexEntry(A, row, col, zvec[2*ii], zvec[2*ii+1]) ; } } } else { /* ------------------- nonsymmetric matrix ------------------- */ if ( INPMTX_IS_REAL_ENTRIES(A) ) { for ( ii = 0 ; ii < nitem ; ii++ ) { InpMtx_inputRealEntry(A, rowids[ii], colids[ii], zvec[ii]) ; } } else if ( INPMTX_IS_COMPLEX_ENTRIES(A) ) { for ( ii = 0 ; ii < nitem ; ii++ ) { InpMtx_inputComplexEntry(A, rowids[ii], colids[ii], zvec[2*ii], zvec[2*ii+1]) ; } } } InpMtx_changeStorageMode(A, INPMTX_BY_VECTORS) ; DVfree(zvec) ; if ( symflag == 0 || symflag == 1 ) { if ( INPMTX_IS_REAL_ENTRIES(A) ) { nops = 4*A->nent*nrhs ; } else if ( INPMTX_IS_COMPLEX_ENTRIES(A) ) { nops = 16*A->nent*nrhs ; } } else { if ( INPMTX_IS_REAL_ENTRIES(A) ) { nops = 2*A->nent*nrhs ; } else if ( INPMTX_IS_COMPLEX_ENTRIES(A) ) { nops = 8*A->nent*nrhs ; } } if ( msglvl > 1 ) { /* ------------------------------------------- write the assembled matrix to a matlab file ------------------------------------------- */ InpMtx_writeForMatlab(A, "A", msgFile) ; if ( symflag == 0 ) { fprintf(msgFile, "\n for k = 1:%d" "\n for j = k+1:%d" "\n A(j,k) = A(k,j) ;" "\n end" "\n end", nrowA, ncolA) ; } else if ( symflag == 1 ) { fprintf(msgFile, "\n for k = 1:%d" "\n for j = k+1:%d" "\n A(j,k) = ctranspose(A(k,j)) ;" "\n end" "\n end", nrowA, ncolA) ; } } /* ------------------------------- generate dense matrices X and Y ------------------------------- */ if ( transposeflag == 0 ) { nrowX = ncolA ; nrowY = nrowA ; } else { nrowX = nrowA ; nrowY = ncolA ; } X = DenseMtx_new() ; Y = DenseMtx_new() ; Y2 = DenseMtx_new() ; if ( INPMTX_IS_REAL_ENTRIES(A) ) { DenseMtx_init(X, SPOOLES_REAL, 0, 0, nrowX, nrhs, 1, nrowX) ; Drand_fillDvector(drand, nrowX*nrhs, DenseMtx_entries(X)) ; DenseMtx_init(Y, SPOOLES_REAL, 0, 0, nrowY, nrhs, 1, nrowY) ; Drand_fillDvector(drand, nrowY*nrhs, DenseMtx_entries(Y)) ; DenseMtx_init(Y2, SPOOLES_REAL, 0, 0, nrowY, nrhs, 1, nrowY) ; DVcopy(nrowY*nrhs, DenseMtx_entries(Y2), DenseMtx_entries(Y)) ; } else if ( INPMTX_IS_COMPLEX_ENTRIES(A) ) { DenseMtx_init(X, SPOOLES_COMPLEX, 0, 0, nrowX, nrhs, 1, nrowX) ; Drand_fillDvector(drand, 2*nrowX*nrhs, DenseMtx_entries(X)) ; DenseMtx_init(Y, SPOOLES_COMPLEX, 0, 0, nrowY, nrhs, 1, nrowY) ; Drand_fillDvector(drand, 2*nrowY*nrhs, DenseMtx_entries(Y)) ; DenseMtx_init(Y2, SPOOLES_COMPLEX, 0, 0, nrowY, nrhs, 1, nrowY) ; DVcopy(2*nrowY*nrhs, DenseMtx_entries(Y2), DenseMtx_entries(Y)) ; } if ( msglvl > 1 ) { fprintf(msgFile, "\n X = zeros(%d,%d) ;", nrowX, nrhs) ; DenseMtx_writeForMatlab(X, "X", msgFile) ; fprintf(msgFile, "\n Y = zeros(%d,%d) ;", nrowY, nrhs) ; DenseMtx_writeForMatlab(Y, "Y", msgFile) ; } /* -------------------------------------------- perform the matrix-matrix multiply in serial -------------------------------------------- */ if ( msglvl > 1 ) { fprintf(msgFile, "\n alpha = %20.12e + %20.2e*i;", alpha[0], alpha[1]); fprintf(msgFile, "\n Z = zeros(%d,1) ;", nrowY) ; } if ( transposeflag == 0 ) { MARKTIME(t1) ; if ( symflag == 0 ) { InpMtx_sym_mmm(A, Y, alpha, X) ; } else if ( symflag == 1 ) { InpMtx_herm_mmm(A, Y, alpha, X) ; } else if ( symflag == 2 ) { InpMtx_nonsym_mmm(A, Y, alpha, X) ; } MARKTIME(t2) ; if ( msglvl > 1 ) { DenseMtx_writeForMatlab(Y, "Z", msgFile) ; fprintf(msgFile, "\n maxerr = max(Z - Y - alpha*A*X) ") ; fprintf(msgFile, "\n") ; } } else if ( transposeflag == 1 ) { MARKTIME(t1) ; InpMtx_nonsym_mmm_H(A, Y, alpha, X) ; MARKTIME(t2) ; if ( msglvl > 1 ) { DenseMtx_writeForMatlab(Y, "Z", msgFile) ; fprintf(msgFile, "\n maxerr = max(Z - Y - alpha*ctranspose(A)*X) ") ; fprintf(msgFile, "\n") ; } } else if ( transposeflag == 2 ) { MARKTIME(t1) ; InpMtx_nonsym_mmm_T(A, Y, alpha, X) ; MARKTIME(t2) ; if ( msglvl > 1 ) { DenseMtx_writeForMatlab(Y, "Z", msgFile) ; fprintf(msgFile, "\n maxerr = max(Z - Y - alpha*transpose(A)*X) ") ; fprintf(msgFile, "\n") ; } } fprintf(msgFile, "\n %% %d ops, %.3f time, %.3f serial mflops", nops, t2 - t1, 1.e-6*nops/(t2 - t1)) ; /* -------------------------------------------------------- perform the matrix-matrix multiply in multithreaded mode -------------------------------------------------------- */ if ( msglvl > 1 ) { fprintf(msgFile, "\n alpha = %20.12e + %20.2e*i;", alpha[0], alpha[1]); fprintf(msgFile, "\n Z = zeros(%d,1) ;", nrowY) ; } if ( transposeflag == 0 ) { MARKTIME(t1) ; if ( symflag == 0 ) { InpMtx_MT_sym_mmm(A, Y2, alpha, X, nthread, msglvl, msgFile) ; } else if ( symflag == 1 ) { InpMtx_MT_herm_mmm(A, Y2, alpha, X, nthread, msglvl, msgFile) ; } else if ( symflag == 2 ) { InpMtx_MT_nonsym_mmm(A, Y2, alpha, X, nthread, msglvl, msgFile) ; } MARKTIME(t2) ; if ( msglvl > 1 ) { DenseMtx_writeForMatlab(Y2, "Z2", msgFile) ; fprintf(msgFile, "\n maxerr2 = max(Z2 - Y - alpha*A*X) ") ; fprintf(msgFile, "\n") ; } } else if ( transposeflag == 1 ) { MARKTIME(t1) ; InpMtx_MT_nonsym_mmm_H(A, Y2, alpha, X, nthread, msglvl, msgFile) ; MARKTIME(t2) ; if ( msglvl > 1 ) { DenseMtx_writeForMatlab(Y2, "Z2", msgFile) ; fprintf(msgFile, "\n maxerr2 = max(Z2 - Y - alpha*ctranspose(A)*X) ") ; fprintf(msgFile, "\n") ; } } else if ( transposeflag == 2 ) { MARKTIME(t1) ; InpMtx_MT_nonsym_mmm_T(A, Y2, alpha, X, nthread, msglvl, msgFile) ; MARKTIME(t2) ; if ( msglvl > 1 ) { DenseMtx_writeForMatlab(Y2, "Z2", msgFile) ; fprintf(msgFile, "\n maxerr2 = max(Z2 - Y - alpha*transpose(A)*X) ") ; fprintf(msgFile, "\n") ; } } fprintf(msgFile, "\n %% %d ops, %.3f time, %.3f MT mflops", nops, t2 - t1, 1.e-6*nops/(t2 - t1)) ; /* ------------------------ free the working storage ------------------------ */ InpMtx_free(A) ; DenseMtx_free(X) ; DenseMtx_free(Y) ; DenseMtx_free(Y2) ; IVfree(rowids) ; IVfree(colids) ; Drand_free(drand) ; fclose(msgFile) ; return(1) ; }
/*--------------------------------------------------------------------*/ int main ( int argc, char *argv[] ) /* ------------------------------------------------- this program tests the IVL_MPI_allgather() method (1) each process generates the same owners[n] map (2) each process creates an IVL object and fills its owned lists with random numbers (3) the processes gather-all's the lists of ivl created -- 98apr03, cca ------------------------------------------------- */ { char *buffer ; double chksum, globalsum, t1, t2 ; Drand drand ; int ilist, length, myid, msglvl, nlist, nproc, rc, seed, size, tag ; int *list, *owners, *vec ; int stats[4], tstats[4] ; IV *ownersIV ; IVL *ivl ; FILE *msgFile ; /* --------------------------------------------------------------- find out the identity of this process and the number of process --------------------------------------------------------------- */ MPI_Init(&argc, &argv) ; MPI_Comm_rank(MPI_COMM_WORLD, &myid) ; MPI_Comm_size(MPI_COMM_WORLD, &nproc) ; fprintf(stdout, "\n process %d of %d, argc = %d", myid, nproc, argc) ; fflush(stdout) ; if ( argc != 5 ) { fprintf(stdout, "\n\n usage : %s msglvl msgFile n seed " "\n msglvl -- message level" "\n msgFile -- message file" "\n nlist -- number of lists in the IVL object" "\n seed -- random number seed" "\n", argv[0]) ; return(0) ; } msglvl = atoi(argv[1]) ; if ( strcmp(argv[2], "stdout") == 0 ) { msgFile = stdout ; } else { length = strlen(argv[2]) + 1 + 4 ; buffer = CVinit(length, '\0') ; sprintf(buffer, "%s.%d", argv[2], myid) ; if ( (msgFile = fopen(buffer, "w")) == NULL ) { fprintf(stderr, "\n fatal error in %s" "\n unable to open file %s\n", argv[0], argv[2]) ; return(-1) ; } CVfree(buffer) ; } nlist = atoi(argv[3]) ; seed = atoi(argv[4]) ; fprintf(msgFile, "\n %s " "\n msglvl -- %d" "\n msgFile -- %s" "\n nlist -- %d" "\n seed -- %d" "\n", argv[0], msglvl, argv[2], nlist, seed) ; fflush(msgFile) ; /* ---------------------------- generate the ownersIV object ---------------------------- */ MARKTIME(t1) ; ownersIV = IV_new() ; IV_init(ownersIV, nlist, NULL) ; owners = IV_entries(ownersIV) ; Drand_setDefaultFields(&drand) ; Drand_setSeed(&drand, seed) ; Drand_setUniform(&drand, 0, nproc) ; Drand_fillIvector(&drand, nlist, owners) ; MARKTIME(t2) ; fprintf(msgFile, "\n CPU %8.3f : initialize the ownersIV object", t2 - t1) ; fflush(msgFile) ; fprintf(msgFile, "\n\n ownersIV generated") ; if ( msglvl > 2 ) { IV_writeForHumanEye(ownersIV, msgFile) ; } else { IV_writeStats(ownersIV, msgFile) ; } fflush(msgFile) ; /* -------------------------------------------- set up the IVL object and fill owned entries -------------------------------------------- */ MARKTIME(t1) ; ivl = IVL_new() ; IVL_init1(ivl, IVL_CHUNKED, nlist) ; vec = IVinit(nlist, -1) ; Drand_setSeed(&drand, seed + myid) ; Drand_setUniform(&drand, 0, nlist) ; for ( ilist = 0 ; ilist < nlist ; ilist++ ) { if ( owners[ilist] == myid ) { size = (int) Drand_value(&drand) ; Drand_fillIvector(&drand, size, vec) ; IVL_setList(ivl, ilist, size, vec) ; } } MARKTIME(t2) ; fprintf(msgFile, "\n CPU %8.3f : initialize the IVL object", t2 - t1) ; fflush(msgFile) ; if ( msglvl > 2 ) { IVL_writeForHumanEye(ivl, msgFile) ; } else { IVL_writeStats(ivl, msgFile) ; } fflush(msgFile) ; /* -------------------------------------------- compute the local checksum of the ivl object -------------------------------------------- */ for ( ilist = 0, chksum = 0.0 ; ilist < nlist ; ilist++ ) { if ( owners[ilist] == myid ) { IVL_listAndSize(ivl, ilist, &size, &list) ; chksum += 1 + ilist + size + IVsum(size, list) ; } } fprintf(msgFile, "\n\n local partial chksum = %12.4e", chksum) ; fflush(msgFile) ; /* ----------------------- get the global checksum ----------------------- */ rc = MPI_Allreduce((void *) &chksum, (void *) &globalsum, 1, MPI_DOUBLE, MPI_SUM, MPI_COMM_WORLD) ; /* -------------------------------- execute the all-gather operation -------------------------------- */ tag = 47 ; IVzero(4, stats) ; IVL_MPI_allgather(ivl, ownersIV, stats, msglvl, msgFile, tag, MPI_COMM_WORLD) ; if ( msglvl > 0 ) { fprintf(msgFile, "\n\n return from IVL_MPI_allgather()") ; fprintf(msgFile, "\n local send stats : %10d messages with %10d bytes" "\n local recv stats : %10d messages with %10d bytes", stats[0], stats[2], stats[1], stats[3]) ; fflush(msgFile) ; } MPI_Reduce((void *) stats, (void *) tstats, 4, MPI_INT, MPI_SUM, 0, MPI_COMM_WORLD) ; if ( myid == 0 ) { fprintf(msgFile, "\n total send stats : %10d messages with %10d bytes" "\n total recv stats : %10d messages with %10d bytes", tstats[0], tstats[2], tstats[1], tstats[3]) ; fflush(msgFile) ; } if ( msglvl > 2 ) { fprintf(msgFile, "\n\n ivl") ; IVL_writeForHumanEye(ivl, msgFile) ; fflush(msgFile) ; } /* ----------------------------------------- compute the checksum of the entire object ----------------------------------------- */ for ( ilist = 0, chksum = 0.0 ; ilist < nlist ; ilist++ ) { IVL_listAndSize(ivl, ilist, &size, &list) ; chksum += 1 + ilist + size + IVsum(size, list) ; } fprintf(msgFile, "\n globalsum = %12.4e, chksum = %12.4e, error = %12.4e", globalsum, chksum, fabs(globalsum - chksum)) ; fflush(msgFile) ; /* ---------------- free the objects ---------------- */ IV_free(ownersIV) ; IVL_free(ivl) ; /* ------------------------ exit the MPI environment ------------------------ */ MPI_Finalize() ; fprintf(msgFile, "\n") ; fclose(msgFile) ; return(0) ; }
/*--------------------------------------------------------------------*/ int main ( int argc, char *argv[] ) /* --------------------------------------------------- read in a DSTree object, read in a Graph file, read in a DV cutoffs file, get the stages IV object based on domain weight and write it to a file. created -- 97jun12, cca --------------------------------------------------- */ { char *inCutoffDVfileName, *inDSTreeFileName, *inGraphFileName, *outIVfileName ; double t1, t2 ; DV *cutoffDV ; Graph *graph ; int msglvl, rc ; IV *stagesIV ; DSTree *dstree ; FILE *msgFile ; if ( argc != 7 ) { fprintf(stdout, "\n\n usage : %s msglvl msgFile inDSTreeFile inGraphFile " "\n inCutoffDVfile outFile" "\n msglvl -- message level" "\n msgFile -- message file" "\n inDSTreeFile -- input file, must be *.dstreef or *.dstreeb" "\n inGraphFile -- input file, must be *.graphf or *.graphb" "\n inCutoffDVfile -- input file, must be *.dvf or *.dvb" "\n outFile -- output file, must be *.ivf or *.ivb" "\n", argv[0]) ; return(0) ; } msglvl = atoi(argv[1]) ; if ( strcmp(argv[2], "stdout") == 0 ) { msgFile = stdout ; } else if ( (msgFile = fopen(argv[2], "a")) == NULL ) { fprintf(stderr, "\n fatal error in %s" "\n unable to open file %s\n", argv[0], argv[2]) ; return(-1) ; } inDSTreeFileName = argv[3] ; inGraphFileName = argv[4] ; inCutoffDVfileName = argv[5] ; outIVfileName = argv[6] ; fprintf(msgFile, "\n %s " "\n msglvl -- %d" "\n msgFile -- %s" "\n inDSTreeFileName -- %s" "\n inGraphFileName -- %s" "\n inCutoffDVfileName -- %s" "\n outFile -- %s" "\n", argv[0], msglvl, argv[2], inDSTreeFileName, inGraphFileName, inCutoffDVfileName, outIVfileName) ; fflush(msgFile) ; /* ------------------------- read in the DSTree object ------------------------- */ if ( strcmp(inDSTreeFileName, "none") == 0 ) { fprintf(msgFile, "\n no file to read from") ; spoolesFatal(); } dstree = DSTree_new() ; MARKTIME(t1) ; rc = DSTree_readFromFile(dstree, inDSTreeFileName) ; MARKTIME(t2) ; fprintf(msgFile, "\n CPU %9.5f : read in dstree from file %s", t2 - t1, inDSTreeFileName) ; if ( rc != 1 ) { fprintf(msgFile, "\n return value %d from DSTree_readFromFile(%p,%s)", rc, dstree, inDSTreeFileName) ; spoolesFatal(); } fprintf(msgFile, "\n\n after reading DSTree object from file %s", inDSTreeFileName) ; if ( msglvl > 2 ) { DSTree_writeForHumanEye(dstree, msgFile) ; } else { DSTree_writeStats(dstree, msgFile) ; } fflush(msgFile) ; /* ------------------------- read in the Graph object ------------------------- */ if ( strcmp(inGraphFileName, "none") == 0 ) { fprintf(msgFile, "\n no file to read from") ; spoolesFatal(); } graph = Graph_new() ; MARKTIME(t1) ; rc = Graph_readFromFile(graph, inGraphFileName) ; MARKTIME(t2) ; fprintf(msgFile, "\n CPU %9.5f : read in graph from file %s", t2 - t1, inGraphFileName) ; if ( rc != 1 ) { fprintf(msgFile, "\n return value %d from Graph_readFromFile(%p,%s)", rc, graph, inGraphFileName) ; spoolesFatal(); } fprintf(msgFile, "\n\n after reading Graph object from file %s", inGraphFileName) ; if ( msglvl > 2 ) { Graph_writeForHumanEye(graph, msgFile) ; } else { Graph_writeStats(graph, msgFile) ; } fflush(msgFile) ; /* ----------------------------- read in the cutoffs DV object ----------------------------- */ if ( strcmp(inCutoffDVfileName, "none") == 0 ) { fprintf(msgFile, "\n no file to read from") ; spoolesFatal(); } cutoffDV = DV_new() ; MARKTIME(t1) ; rc = DV_readFromFile(cutoffDV, inCutoffDVfileName) ; MARKTIME(t2) ; fprintf(msgFile, "\n CPU %9.5f : read in graph from file %s", t2 - t1, inCutoffDVfileName) ; if ( rc != 1 ) { fprintf(msgFile, "\n return value %d from DV_readFromFile(%p,%s)", rc, cutoffDV, inCutoffDVfileName) ; spoolesFatal(); } fprintf(msgFile, "\n\n after reading DV object from file %s", inCutoffDVfileName) ; if ( msglvl > 0 ) { DV_writeForHumanEye(cutoffDV, msgFile) ; } else { DV_writeStats(cutoffDV, msgFile) ; } fflush(msgFile) ; /* --------------------- get the stages vector --------------------- */ stagesIV = DSTree_stagesViaDomainWeight(dstree, graph->vwghts, cutoffDV) ; if ( msglvl > 2 ) { IV_writeForHumanEye(stagesIV, msgFile) ; } else { IV_writeStats(stagesIV, msgFile) ; } fflush(msgFile) ; /* --------------------------- write out the DSTree object --------------------------- */ if ( stagesIV != NULL && strcmp(outIVfileName, "none") != 0 ) { MARKTIME(t1) ; rc = IV_writeToFile(stagesIV, outIVfileName) ; MARKTIME(t2) ; fprintf(msgFile, "\n CPU %9.5f : write dstree to file %s", t2 - t1, outIVfileName) ; if ( rc != 1 ) { fprintf(msgFile, "\n return value %d from IV_writeToFile(%p,%s)", rc, stagesIV, outIVfileName) ; } } /* ---------------------- free the DSTree object ---------------------- */ DSTree_free(dstree) ; if ( stagesIV != NULL ) { IV_free(stagesIV) ; } fprintf(msgFile, "\n") ; fclose(msgFile) ; return(1) ; }
/*--------------------------------------------------------------------*/ int main ( int argc, char *argv[] ) /* --------------------------------------------------------------- read in a ETree object, create an IV object with the same size, mark the vertices in the top level separator(s), write the IV object to a file created -- 96may02, cca --------------------------------------------------------------- */ { char *inETreeFileName, *outIVfileName ; double t1, t2 ; int msglvl, rc, J, K, ncomp, nfront, nvtx, v ; int *bndwghts, *compids, *fch, *map, *nodwghts, *par, *sib, *vtxToFront ; IV *compidsIV, *mapIV ; ETree *etree ; FILE *msgFile ; Tree *tree ; if ( argc != 5 ) { fprintf(stdout, "\n\n usage : %s msglvl msgFile inETreeFile outIVfile" "\n msglvl -- message level" "\n msgFile -- message file" "\n inETreeFile -- input file, must be *.etreef or *.etreeb" "\n outIVfile -- output file, must be *.ivf or *.ivb" "\n", argv[0]) ; return(0) ; } msglvl = atoi(argv[1]) ; if ( strcmp(argv[2], "stdout") == 0 ) { msgFile = stdout ; } else if ( (msgFile = fopen(argv[2], "a")) == NULL ) { fprintf(stderr, "\n fatal error in %s" "\n unable to open file %s\n", argv[0], argv[2]) ; return(-1) ; } inETreeFileName = argv[3] ; outIVfileName = argv[4] ; fprintf(msgFile, "\n %s " "\n msglvl -- %d" "\n msgFile -- %s" "\n inETreeFile -- %s" "\n outIVfile -- %s" "\n", argv[0], msglvl, argv[2], inETreeFileName, outIVfileName) ; fflush(msgFile) ; /* ------------------------ read in the ETree object ------------------------ */ if ( strcmp(inETreeFileName, "none") == 0 ) { fprintf(msgFile, "\n no file to read from") ; exit(0) ; } etree = ETree_new() ; MARKTIME(t1) ; rc = ETree_readFromFile(etree, inETreeFileName) ; MARKTIME(t2) ; fprintf(msgFile, "\n CPU %9.5f : read in etree from file %s", t2 - t1, inETreeFileName) ; if ( rc != 1 ) { fprintf(msgFile, "\n return value %d from ETree_readFromFile(%p,%s)", rc, etree, inETreeFileName) ; exit(-1) ; } fprintf(msgFile, "\n\n after reading ETree object from file %s", inETreeFileName) ; if ( msglvl > 2 ) { ETree_writeForHumanEye(etree, msgFile) ; } else { ETree_writeStats(etree, msgFile) ; } fflush(msgFile) ; nfront = ETree_nfront(etree) ; nvtx = ETree_nvtx(etree) ; bndwghts = ETree_bndwghts(etree) ; vtxToFront = ETree_vtxToFront(etree) ; nodwghts = ETree_nodwghts(etree) ; par = ETree_par(etree) ; fch = ETree_fch(etree) ; sib = ETree_sib(etree) ; tree = ETree_tree(etree) ; /* ----------------------------------------- create the map from fronts to components, top level separator(s) are component zero ----------------------------------------- */ mapIV = IV_new() ; IV_init(mapIV, nfront, NULL) ; map = IV_entries(mapIV) ; ncomp = 0 ; for ( J = Tree_preOTfirst(tree) ; J != -1 ; J = Tree_preOTnext(tree, J) ) { if ( (K = par[J]) == -1 ) { map[J] = 0 ; } else if ( map[K] != 0 ) { map[J] = map[K] ; } else if ( J == fch[K] && sib[J] == -1 && bndwghts[J] == nodwghts[K] + bndwghts[K] ) { map[J] = 0 ; } else { map[J] = ++ncomp ; } } fprintf(msgFile, "\n\n mapIV object") ; if ( msglvl > 2 ) { IV_writeForHumanEye(mapIV, msgFile) ; } else { IV_writeStats(mapIV, msgFile) ; } /* ---------------------------------------- fill the map from vertices to components ---------------------------------------- */ compidsIV = IV_new() ; IV_init(compidsIV, nvtx, NULL) ; compids = IV_entries(compidsIV) ; for ( v = 0 ; v < nvtx ; v++ ) { compids[v] = map[vtxToFront[v]] ; } fprintf(msgFile, "\n\n compidsIV object") ; if ( msglvl > 2 ) { IV_writeForHumanEye(compidsIV, msgFile) ; } else { IV_writeStats(compidsIV, msgFile) ; } fflush(msgFile) ; /* ----------------------- write out the IV object ----------------------- */ if ( strcmp(outIVfileName, "none") != 0 ) { MARKTIME(t1) ; rc = IV_writeToFile(compidsIV, outIVfileName) ; MARKTIME(t2) ; fprintf(msgFile, "\n CPU %9.5f : write etree to file %s", t2 - t1, outIVfileName) ; } if ( rc != 1 ) { fprintf(msgFile, "\n return value %d from IV_writeToFile(%p,%s)", rc, compidsIV, outIVfileName) ; } /* ---------------- free the objects ---------------- */ ETree_free(etree) ; IV_free(mapIV) ; IV_free(compidsIV) ; fprintf(msgFile, "\n") ; fclose(msgFile) ; return(1) ; }
/*--------------------------------------------------------------------*/ int main ( int argc, char *argv[] ) /* ------------------------------------------ test the SubMtx_scale{1,2,3}vec() methods. created -- 98may02, cca ------------------------------------------ */ { SubMtx *mtxA ; double t1, t2 ; double *x0, *x1, *x2, *y0, *y1, *y2 ; Drand *drand ; DV *xdv0, *xdv1, *xdv2, *ydv0, *ydv1, *ydv2 ; ZV *xzv0, *xzv1, *xzv2, *yzv0, *yzv1, *yzv2 ; FILE *msgFile ; int mode, msglvl, nrowA, seed, type ; if ( argc != 7 ) { fprintf(stdout, "\n\n usage : %s msglvl msgFile type nrowA seed" "\n msglvl -- message level" "\n msgFile -- message file" "\n type -- type of matrix A" "\n 1 -- real" "\n 2 -- complex" "\n mode -- mode of matrix A" "\n 7 -- diagonal" "\n 8 -- block diagonal symmetric" "\n 9 -- block diagonal hermitian (complex only)" "\n nrowA -- # of rows in matrix A" "\n seed -- random number seed" "\n", argv[0]) ; return(0) ; } if ( (msglvl = atoi(argv[1])) < 0 ) { fprintf(stderr, "\n message level must be positive\n") ; exit(-1) ; } if ( strcmp(argv[2], "stdout") == 0 ) { msgFile = stdout ; } else if ( (msgFile = fopen(argv[2], "a")) == NULL ) { fprintf(stderr, "\n unable to open file %s\n", argv[2]) ; return(-1) ; } type = atoi(argv[3]) ; mode = atoi(argv[4]) ; nrowA = atoi(argv[5]) ; seed = atoi(argv[6]) ; fprintf(msgFile, "\n %% %s:" "\n %% msglvl = %d" "\n %% msgFile = %s" "\n %% type = %d" "\n %% mode = %d" "\n %% nrowA = %d" "\n %% seed = %d", argv[0], msglvl, argv[2], type, mode, nrowA, seed) ; /* ----------------------------- check for errors in the input ----------------------------- */ if ( nrowA <= 0 ) { fprintf(stderr, "\n invalid input\n") ; exit(-1) ; } /* -------------------------------------- initialize the random number generator -------------------------------------- */ drand = Drand_new() ; Drand_init(drand) ; Drand_setSeed(drand, seed) ; Drand_setNormal(drand, 0.0, 1.0) ; /* ---------------------------- initialize the X ZV objects ---------------------------- */ MARKTIME(t1) ; if ( type == SPOOLES_REAL ) { xdv0 = DV_new() ; DV_init(xdv0, nrowA, NULL) ; x0 = DV_entries(xdv0) ; Drand_fillDvector(drand, nrowA, x0) ; xdv1 = DV_new() ; DV_init(xdv1, nrowA, NULL) ; x1 = DV_entries(xdv1) ; Drand_fillDvector(drand, nrowA, x1) ; xdv2 = DV_new() ; DV_init(xdv2, nrowA, NULL) ; x2 = DV_entries(xdv2) ; Drand_fillDvector(drand, nrowA, x2) ; MARKTIME(t2) ; fprintf(msgFile, "\n %% CPU : %.3f to initialize X ZV objects", t2 - t1) ; if ( msglvl > 1 ) { fprintf(msgFile, "\n\n %% X DV objects") ; fprintf(msgFile, "\n X0 = zeros(%d,1) ;", nrowA) ; DV_writeForMatlab(xdv0, "X0", msgFile) ; fprintf(msgFile, "\n X1 = zeros(%d,1) ;", nrowA) ; DV_writeForMatlab(xdv1, "X1", msgFile) ; fprintf(msgFile, "\n X2 = zeros(%d,1) ;", nrowA) ; DV_writeForMatlab(xdv2, "X2", msgFile) ; fflush(msgFile) ; } } else if ( type == SPOOLES_COMPLEX ) { xzv0 = ZV_new() ; ZV_init(xzv0, nrowA, NULL) ; x0 = ZV_entries(xzv0) ; Drand_fillDvector(drand, 2*nrowA, x0) ; xzv1 = ZV_new() ; ZV_init(xzv1, nrowA, NULL) ; x1 = ZV_entries(xzv1) ; Drand_fillDvector(drand, 2*nrowA, x1) ; xzv2 = ZV_new() ; ZV_init(xzv2, nrowA, NULL) ; x2 = ZV_entries(xzv2) ; Drand_fillDvector(drand, 2*nrowA, x2) ; MARKTIME(t2) ; fprintf(msgFile, "\n %% CPU : %.3f to initialize X ZV objects", t2 - t1) ; if ( msglvl > 1 ) { fprintf(msgFile, "\n\n %% X ZV objects") ; fprintf(msgFile, "\n X0 = zeros(%d,1) ;", nrowA) ; ZV_writeForMatlab(xzv0, "X0", msgFile) ; fprintf(msgFile, "\n X1 = zeros(%d,1) ;", nrowA) ; ZV_writeForMatlab(xzv1, "X1", msgFile) ; fprintf(msgFile, "\n X2 = zeros(%d,1) ;", nrowA) ; ZV_writeForMatlab(xzv2, "X2", msgFile) ; fflush(msgFile) ; } } /* --------------------------------- initialize the Y DV or ZV objects --------------------------------- */ MARKTIME(t1) ; if ( type == SPOOLES_REAL ) { ydv0 = DV_new() ; DV_init(ydv0, nrowA, NULL) ; y0 = DV_entries(ydv0) ; ydv1 = DV_new() ; DV_init(ydv1, nrowA, NULL) ; y1 = DV_entries(ydv1) ; ydv2 = DV_new() ; DV_init(ydv2, nrowA, NULL) ; y2 = DV_entries(ydv2) ; MARKTIME(t2) ; fprintf(msgFile, "\n %% CPU : %.3f to initialize Y DV objects", t2 - t1) ; } else if ( type == SPOOLES_COMPLEX ) { yzv0 = ZV_new() ; ZV_init(yzv0, nrowA, NULL) ; y0 = ZV_entries(yzv0) ; yzv1 = ZV_new() ; ZV_init(yzv1, nrowA, NULL) ; y1 = ZV_entries(yzv1) ; yzv2 = ZV_new() ; ZV_init(yzv2, nrowA, NULL) ; y2 = ZV_entries(yzv2) ; MARKTIME(t2) ; fprintf(msgFile, "\n %% CPU : %.3f to initialize Y ZV objects", t2 - t1) ; } /* ----------------------------------- initialize the A matrix SubMtx object ----------------------------------- */ seed++ ; mtxA = SubMtx_new() ; switch ( mode ) { case SUBMTX_DIAGONAL : case SUBMTX_BLOCK_DIAGONAL_SYM : case SUBMTX_BLOCK_DIAGONAL_HERM : SubMtx_initRandom(mtxA, type, mode, 0, 0, nrowA, nrowA, 0, seed) ; break ; default : fprintf(stderr, "\n fatal error in test_solve" "\n invalid mode = %d", mode) ; exit(-1) ; } if ( msglvl > 1 ) { fprintf(msgFile, "\n\n %% A SubMtx object") ; fprintf(msgFile, "\n A = zeros(%d,%d) ;", nrowA, nrowA) ; SubMtx_writeForMatlab(mtxA, "A", msgFile) ; fflush(msgFile) ; } /* ------------------- compute Y0 = A * X0 ------------------- */ if ( type == SPOOLES_REAL ) { DVzero(nrowA, y0) ; } else if ( type == SPOOLES_COMPLEX ) { DVzero(2*nrowA, y0) ; } SubMtx_scale1vec(mtxA, y0, x0) ; if ( msglvl > 1 ) { fprintf(msgFile, "\n\n Z0 = zeros(%d,1) ;", nrowA) ; if ( type == SPOOLES_REAL ) { DV_writeForMatlab(ydv0, "Z0", msgFile) ; } else if ( type == SPOOLES_COMPLEX ) { ZV_writeForMatlab(yzv0, "Z0", msgFile) ; } fprintf(msgFile, "\n err0 = Z0 - A*X0 ;") ; fprintf(msgFile, "\n error0 = max(abs(err0))") ; fflush(msgFile) ; } if ( type == SPOOLES_REAL ) { DVzero(nrowA, y0) ; DVzero(nrowA, y1) ; } else if ( type == SPOOLES_COMPLEX ) { DVzero(2*nrowA, y0) ; DVzero(2*nrowA, y1) ; } SubMtx_scale2vec(mtxA, y0, y1, x0, x1) ; if ( msglvl > 1 ) { fprintf(msgFile, "\n\n Z0 = zeros(%d,1) ;", nrowA) ; fprintf(msgFile, "\n\n Z1 = zeros(%d,1) ;", nrowA) ; if ( type == SPOOLES_REAL ) { DV_writeForMatlab(ydv0, "Z0", msgFile) ; DV_writeForMatlab(ydv1, "Z1", msgFile) ; } else if ( type == SPOOLES_COMPLEX ) { ZV_writeForMatlab(yzv0, "Z0", msgFile) ; ZV_writeForMatlab(yzv1, "Z1", msgFile) ; } fprintf(msgFile, "\n err1 = [Z0 Z1] - A*[X0 X1] ;") ; fprintf(msgFile, "\n error1 = max(abs(err1))") ; fflush(msgFile) ; } if ( type == SPOOLES_REAL ) { DVzero(nrowA, y0) ; DVzero(nrowA, y1) ; DVzero(nrowA, y2) ; } else if ( type == SPOOLES_COMPLEX ) { DVzero(2*nrowA, y0) ; DVzero(2*nrowA, y1) ; DVzero(2*nrowA, y2) ; } SubMtx_scale3vec(mtxA, y0, y1, y2, x0, x1, x2) ; if ( msglvl > 1 ) { fprintf(msgFile, "\n\n Z0 = zeros(%d,1) ;", nrowA) ; fprintf(msgFile, "\n\n Z1 = zeros(%d,1) ;", nrowA) ; fprintf(msgFile, "\n\n Z2 = zeros(%d,1) ;", nrowA) ; if ( type == SPOOLES_REAL ) { DV_writeForMatlab(ydv0, "Z0", msgFile) ; DV_writeForMatlab(ydv1, "Z1", msgFile) ; DV_writeForMatlab(ydv2, "Z2", msgFile) ; } else if ( type == SPOOLES_COMPLEX ) { ZV_writeForMatlab(yzv0, "Z0", msgFile) ; ZV_writeForMatlab(yzv1, "Z1", msgFile) ; ZV_writeForMatlab(yzv2, "Z2", msgFile) ; } fprintf(msgFile, "\n err2 = [Z0 Z1 Z2] - A*[X0 X1 X2] ;") ; fprintf(msgFile, "\n error3 = max(abs(err2))") ; fflush(msgFile) ; } /* ------------------------ free the working storage ------------------------ */ SubMtx_free(mtxA) ; if ( type == SPOOLES_REAL ) { DV_free(xdv0) ; DV_free(xdv1) ; DV_free(xdv2) ; DV_free(ydv0) ; DV_free(ydv1) ; DV_free(ydv2) ; } else if ( type == SPOOLES_COMPLEX ) { ZV_free(xzv0) ; ZV_free(xzv1) ; ZV_free(xzv2) ; ZV_free(yzv0) ; ZV_free(yzv1) ; ZV_free(yzv2) ; } Drand_free(drand) ; fprintf(msgFile, "\n") ; return(1) ; }
/*--------------------------------------------------------------------*/ int main ( int argc, char *argv[] ) /* ----------------------------- test the SubMtx_solve() method. created -- 98apr15, cca ----------------------------- */ { SubMtx *mtxA, *mtxB, *mtxX ; double idot, rdot, t1, t2 ; double *entB, *entX ; Drand *drand ; FILE *msgFile ; int inc1, inc2, mode, msglvl, ncolA, nentA, nrowA, ncolB, nrowB, ncolX, nrowX, seed, type ; if ( argc != 9 ) { fprintf(stdout, "\n\n usage : %s msglvl msgFile type mode nrowA nentA ncolB seed" "\n msglvl -- message level" "\n msgFile -- message file" "\n type -- type of matrix A" "\n 1 -- real" "\n 2 -- complex" "\n mode -- mode of matrix A" "\n 2 -- sparse stored by rows" "\n 3 -- sparse stored by columns" "\n 5 -- sparse stored by subrows" "\n 6 -- sparse stored by subcolumns" "\n 7 -- diagonal" "\n 8 -- block diagonal symmetric" "\n 9 -- block diagonal hermitian" "\n nrowA -- # of rows in matrix A" "\n nentA -- # of entries in matrix A" "\n ncolB -- # of columns in matrix B" "\n seed -- random number seed" "\n", argv[0]) ; return(0) ; } if ( (msglvl = atoi(argv[1])) < 0 ) { fprintf(stderr, "\n message level must be positive\n") ; spoolesFatal(); } if ( strcmp(argv[2], "stdout") == 0 ) { msgFile = stdout ; } else if ( (msgFile = fopen(argv[2], "a")) == NULL ) { fprintf(stderr, "\n unable to open file %s\n", argv[2]) ; return(-1) ; } type = atoi(argv[3]) ; mode = atoi(argv[4]) ; nrowA = atoi(argv[5]) ; nentA = atoi(argv[6]) ; ncolB = atoi(argv[7]) ; seed = atoi(argv[8]) ; fprintf(msgFile, "\n %% %s:" "\n %% msglvl = %d" "\n %% msgFile = %s" "\n %% type = %d" "\n %% mode = %d" "\n %% nrowA = %d" "\n %% nentA = %d" "\n %% ncolB = %d" "\n %% seed = %d", argv[0], msglvl, argv[2], type, mode, nrowA, nentA, ncolB, seed) ; ncolA = nrowA ; nrowB = nrowA ; nrowX = nrowA ; ncolX = ncolB ; /* ----------------------------- check for errors in the input ----------------------------- */ if ( nrowA <= 0 || nentA <= 0 || ncolB <= 0 ) { fprintf(stderr, "\n invalid input\n") ; spoolesFatal(); } switch ( type ) { case SPOOLES_REAL : switch ( mode ) { case SUBMTX_DENSE_SUBROWS : case SUBMTX_SPARSE_ROWS : case SUBMTX_DENSE_SUBCOLUMNS : case SUBMTX_SPARSE_COLUMNS : case SUBMTX_DIAGONAL : case SUBMTX_BLOCK_DIAGONAL_SYM : break ; default : fprintf(stderr, "\n invalid mode %d\n", mode) ; spoolesFatal(); } break ; case SPOOLES_COMPLEX : switch ( mode ) { case SUBMTX_DENSE_SUBROWS : case SUBMTX_SPARSE_ROWS : case SUBMTX_DENSE_SUBCOLUMNS : case SUBMTX_SPARSE_COLUMNS : case SUBMTX_DIAGONAL : case SUBMTX_BLOCK_DIAGONAL_SYM : case SUBMTX_BLOCK_DIAGONAL_HERM : break ; default : fprintf(stderr, "\n invalid mode %d\n", mode) ; spoolesFatal(); } break ; default : fprintf(stderr, "\n invalid type %d\n", type) ; spoolesFatal(); break ; } /* -------------------------------------- initialize the random number generator -------------------------------------- */ drand = Drand_new() ; Drand_init(drand) ; Drand_setSeed(drand, seed) ; Drand_setNormal(drand, 0.0, 1.0) ; /* ------------------------------ initialize the X SubMtx object ------------------------------ */ MARKTIME(t1) ; mtxX = SubMtx_new() ; SubMtx_initRandom(mtxX, type, SUBMTX_DENSE_COLUMNS, 0, 0, nrowX, ncolX, nrowX*ncolX, ++seed) ; SubMtx_denseInfo(mtxX, &nrowX, &ncolX, &inc1, &inc2, &entX) ; MARKTIME(t2) ; fprintf(msgFile, "\n %% CPU : %.3f to initialize X SubMtx object", t2 - t1) ; if ( msglvl > 1 ) { fprintf(msgFile, "\n\n %% X SubMtx object") ; fprintf(msgFile, "\n X = zeros(%d,%d) ;", nrowX, ncolX) ; SubMtx_writeForMatlab(mtxX, "X", msgFile) ; fflush(msgFile) ; } /* ------------------------------ initialize the B SubMtx object ------------------------------ */ MARKTIME(t1) ; mtxB = SubMtx_new() ; SubMtx_init(mtxB, type, SUBMTX_DENSE_COLUMNS, 0, 0, nrowB, ncolB, nrowB*ncolB) ; SubMtx_denseInfo(mtxB, &nrowB, &ncolB, &inc1, &inc2, &entB) ; switch ( mode ) { case SUBMTX_DENSE_SUBROWS : case SUBMTX_SPARSE_ROWS : case SUBMTX_DENSE_SUBCOLUMNS : case SUBMTX_SPARSE_COLUMNS : if ( SUBMTX_IS_REAL(mtxX) ) { DVcopy(nrowB*ncolB, entB, entX) ; } else if ( SUBMTX_IS_COMPLEX(mtxX) ) { ZVcopy(nrowB*ncolB, entB, entX) ; } break ; default : if ( SUBMTX_IS_REAL(mtxX) ) { DVzero(nrowB*ncolB, entB) ; } else if ( SUBMTX_IS_COMPLEX(mtxX) ) { DVzero(2*nrowB*ncolB, entB) ; } break ; } MARKTIME(t2) ; fprintf(msgFile, "\n %% CPU : %.3f to initialize B SubMtx object", t2 - t1) ; if ( msglvl > 1 ) { fprintf(msgFile, "\n\n %% B SubMtx object") ; fprintf(msgFile, "\n B = zeros(%d,%d) ;", nrowB, ncolB) ; SubMtx_writeForMatlab(mtxB, "B", msgFile) ; fflush(msgFile) ; } /* ------------------------------------- initialize the A matrix SubMtx object ------------------------------------- */ seed++ ; mtxA = SubMtx_new() ; switch ( mode ) { case SUBMTX_DENSE_SUBROWS : case SUBMTX_SPARSE_ROWS : SubMtx_initRandomLowerTriangle(mtxA, type, mode, 0, 0, nrowA, ncolA, nentA, seed, 1) ; break ; case SUBMTX_DENSE_SUBCOLUMNS : case SUBMTX_SPARSE_COLUMNS : SubMtx_initRandomUpperTriangle(mtxA, type, mode, 0, 0, nrowA, ncolA, nentA, seed, 1) ; break ; case SUBMTX_DIAGONAL : case SUBMTX_BLOCK_DIAGONAL_SYM : case SUBMTX_BLOCK_DIAGONAL_HERM : SubMtx_initRandom(mtxA, type, mode, 0, 0, nrowA, ncolA, nentA, seed) ; break ; default : fprintf(stderr, "\n fatal error in test_solve" "\n invalid mode = %d", mode) ; spoolesFatal(); } if ( msglvl > 1 ) { fprintf(msgFile, "\n\n %% A SubMtx object") ; fprintf(msgFile, "\n A = zeros(%d,%d) ;", nrowA, ncolA) ; SubMtx_writeForMatlab(mtxA, "A", msgFile) ; fflush(msgFile) ; } /* -------------------------------------------------------- compute B = A * X (for diagonal and block diagonal) or B = (I + A) * X (for lower and upper triangular) -------------------------------------------------------- */ if ( SUBMTX_IS_REAL(mtxA) ) { DV *colDV, *rowDV ; double value, *colX, *rowA, *pBij, *pXij ; int irowA, jcolX ; colDV = DV_new() ; DV_init(colDV, nrowA, NULL) ; colX = DV_entries(colDV) ; rowDV = DV_new() ; DV_init(rowDV, nrowA, NULL) ; rowA = DV_entries(rowDV) ; for ( jcolX = 0 ; jcolX < ncolB ; jcolX++ ) { SubMtx_fillColumnDV(mtxX, jcolX, colDV) ; for ( irowA = 0 ; irowA < nrowA ; irowA++ ) { SubMtx_fillRowDV(mtxA, irowA, rowDV) ; SubMtx_locationOfRealEntry(mtxX, irowA, jcolX, &pXij) ; SubMtx_locationOfRealEntry(mtxB, irowA, jcolX, &pBij) ; value = DVdot(nrowA, rowA, colX) ; switch ( mode ) { case SUBMTX_DENSE_SUBROWS : case SUBMTX_SPARSE_ROWS : case SUBMTX_DENSE_SUBCOLUMNS : case SUBMTX_SPARSE_COLUMNS : *pBij = *pXij + value ; break ; case SUBMTX_DIAGONAL : case SUBMTX_BLOCK_DIAGONAL_SYM : *pBij = value ; break ; } } } DV_free(colDV) ; DV_free(rowDV) ; } else if ( SUBMTX_IS_COMPLEX(mtxA) ) { ZV *colZV, *rowZV ; double *colX, *rowA, *pBIij, *pBRij, *pXIij, *pXRij ; int irowA, jcolX ; colZV = ZV_new() ; ZV_init(colZV, nrowA, NULL) ; colX = ZV_entries(colZV) ; rowZV = ZV_new() ; ZV_init(rowZV, nrowA, NULL) ; rowA = ZV_entries(rowZV) ; for ( jcolX = 0 ; jcolX < ncolB ; jcolX++ ) { SubMtx_fillColumnZV(mtxX, jcolX, colZV) ; for ( irowA = 0 ; irowA < nrowA ; irowA++ ) { SubMtx_fillRowZV(mtxA, irowA, rowZV) ; SubMtx_locationOfComplexEntry(mtxX, irowA, jcolX, &pXRij, &pXIij) ; SubMtx_locationOfComplexEntry(mtxB, irowA, jcolX, &pBRij, &pBIij) ; ZVdotU(nrowA, rowA, colX, &rdot, &idot) ; switch ( mode ) { case SUBMTX_DENSE_SUBROWS : case SUBMTX_SPARSE_ROWS : case SUBMTX_DENSE_SUBCOLUMNS : case SUBMTX_SPARSE_COLUMNS : *pBRij = *pXRij + rdot ; *pBIij = *pXIij + idot ; break ; case SUBMTX_DIAGONAL : case SUBMTX_BLOCK_DIAGONAL_SYM : case SUBMTX_BLOCK_DIAGONAL_HERM : *pBRij = rdot ; *pBIij = idot ; break ; } } } ZV_free(colZV) ; ZV_free(rowZV) ; } /* ---------------------- print out the matrices ---------------------- */ if ( msglvl > 1 ) { fprintf(msgFile, "\n\n %% X SubMtx object") ; fprintf(msgFile, "\n X = zeros(%d,%d) ;", nrowX, ncolX) ; SubMtx_writeForMatlab(mtxX, "X", msgFile) ; fprintf(msgFile, "\n\n %% A SubMtx object") ; fprintf(msgFile, "\n A = zeros(%d,%d) ;", nrowA, ncolA) ; SubMtx_writeForMatlab(mtxA, "A", msgFile) ; fprintf(msgFile, "\n\n %% B SubMtx object") ; fprintf(msgFile, "\n B = zeros(%d,%d) ;", nrowB, ncolB) ; SubMtx_writeForMatlab(mtxB, "B", msgFile) ; fflush(msgFile) ; } /* ----------------- check with matlab ----------------- */ if ( msglvl > 1 ) { switch ( mode ) { case SUBMTX_DENSE_SUBROWS : case SUBMTX_SPARSE_ROWS : case SUBMTX_DENSE_SUBCOLUMNS : case SUBMTX_SPARSE_COLUMNS : fprintf(msgFile, "\n\n emtx = abs(B - X - A*X) ;" "\n\n condA = cond(eye(%d,%d) + A)" "\n\n maxabsZ = max(max(abs(emtx))) ", nrowA, nrowA) ; fflush(msgFile) ; break ; case SUBMTX_DIAGONAL : case SUBMTX_BLOCK_DIAGONAL_SYM : case SUBMTX_BLOCK_DIAGONAL_HERM : fprintf(msgFile, "\n\n emtx = abs(B - A*X) ;" "\n\n condA = cond(A)" "\n\n maxabsZ = max(max(abs(emtx))) ") ; fflush(msgFile) ; break ; } } /* ---------------------------------------- compute the solve DY = B or (I + A)Y = B ---------------------------------------- */ SubMtx_solve(mtxA, mtxB) ; if ( msglvl > 1 ) { fprintf(msgFile, "\n\n %% Y SubMtx object") ; fprintf(msgFile, "\n Y = zeros(%d,%d) ;", nrowB, ncolB) ; SubMtx_writeForMatlab(mtxB, "Y", msgFile) ; fprintf(msgFile, "\n\n %% solerror = abs(Y - X) ;" "\n\n solerror = abs(Y - X) ;" "\n\n maxabserror = max(max(solerror)) ") ; fflush(msgFile) ; } /* ------------------------ free the working storage ------------------------ */ SubMtx_free(mtxA) ; SubMtx_free(mtxX) ; SubMtx_free(mtxB) ; Drand_free(drand) ; fprintf(msgFile, "\n") ; return(1) ; }
/*--------------------------------------------------------------------*/ int main ( int argc, char *argv[] ) /* ------------------------------------- test the Chv_r1upd() method. the program's output is a matlab file to check correctness of the code. created -- 98apr30, cca ------------------------------------- */ { Chv *chv ; double imag, real, t1, t2 ; double *entries ; Drand *drand ; FILE *msgFile ; int ii, irow, jcol, msglvl, ncol, nD, nent, nL, nrow, nU, rc, seed, symflag, tag, type ; int *colind, *rowind ; if ( argc != 8 ) { fprintf(stdout, "\n\n usage : %s msglvl msgFile nD nU type symflag seed " "\n msglvl -- message level" "\n msgFile -- message file" "\n nD -- # of rows and columns in the (1,1) block" "\n nU -- # of columns in the (1,2) block" "\n type -- entries type" "\n 1 --> real" "\n 2 --> complex" "\n symflag -- symmetry flag" "\n 0 --> hermitian" "\n 1 --> symmetric" "\n 2 --> nonsymmetric " "\n seed -- random number seed" "\n", argv[0]) ; return(0) ; } if ( (msglvl = atoi(argv[1])) < 0 ) { fprintf(stderr, "\n message level must be positive\n") ; exit(-1) ; } if ( strcmp(argv[2], "stdout") == 0 ) { msgFile = stdout ; } else if ( (msgFile = fopen(argv[2], "a")) == NULL ) { fprintf(stderr, "\n unable to open file %s\n", argv[2]) ; return(-1) ; } nD = atoi(argv[3]) ; nU = atoi(argv[4]) ; type = atoi(argv[5]) ; symflag = atoi(argv[6]) ; seed = atoi(argv[7]) ; fprintf(msgFile, "\n %% testChv:" "\n %% msglvl = %d" "\n %% msgFile = %s" "\n %% nD = %d" "\n %% nU = %d" "\n %% type = %d" "\n %% symflag = %d" "\n %% seed = %d", msglvl, argv[2], nD, nU, type, symflag, seed) ; nL = nU ; /* ----------------------------- check for errors in the input ----------------------------- */ if ( nD <= 0 || nL < 0 || nU < 0 || symflag < 0 || symflag > 3 ) { fprintf(stderr, "\n invalid input" "\n nD = %d, nL = %d, nU = %d, symflag = %d\n", nD, nL, nU, symflag) ; exit(-1) ; } if ( symflag <= 2 && nL != nU ) { fprintf(stderr, "\n invalid input" "\n symflag = %d, nL = %d, nU = %d", symflag, nL, nU) ; exit(-1) ; } /* -------------------------------------- initialize the random number generator -------------------------------------- */ drand = Drand_new() ; Drand_init(drand) ; Drand_setSeed(drand, seed) ; Drand_setNormal(drand, 0.0, 1.0) ; /* ---------------------------- initialize the Chv object ---------------------------- */ MARKTIME(t1) ; chv = Chv_new() ; Chv_init(chv, 0, nD, nL, nU, type, symflag) ; MARKTIME(t2) ; fprintf(msgFile, "\n %% CPU : %.3f to initialize chv object", t2 - t1) ; fflush(msgFile) ; Chv_columnIndices(chv, &ncol, &colind) ; IVramp(ncol, colind, 0, 1) ; if ( CHV_IS_NONSYMMETRIC(chv) ) { Chv_rowIndices(chv, &nrow, &rowind) ; IVramp(nrow, rowind, 0, 1) ; } /* ------------------------------------ load the entries with random entries ------------------------------------ */ nent = Chv_nent(chv) ; entries = Chv_entries(chv) ; if ( CHV_IS_REAL(chv) ) { Drand_fillDvector(drand, nent, entries) ; } else if ( CHV_IS_COMPLEX(chv) ) { Drand_fillDvector(drand, 2*nent, entries) ; } if ( CHV_IS_HERMITIAN(chv) ) { for ( irow = 0 ; irow < nD ; irow++ ) { Chv_complexEntry(chv, irow, irow, &real, &imag) ; Chv_setComplexEntry(chv, irow, irow, real, 0.0) ; } } fprintf(msgFile, "\n %% matrix entries") ; Chv_writeForMatlab(chv, "a", msgFile) ; /* --------------------------------------- write out matlab code for rank-1 update --------------------------------------- */ fprintf(msgFile, "\n nD = %d ;" "\n nL = %d ;" "\n nU = %d ;" "\n nrow = nD + nL ;" "\n ncol = nD + nU ;" "\n b = a ; " "\n d = a(1,1) ;" "\n l = a(2:nrow,1) / d ; " "\n u = a(1,2:ncol) ; " "\n b(2:nrow,2:ncol) = a(2:nrow,2:ncol) - l * u ; " "\n u = u / d ; " "\n b(1,1) = d ; " "\n b(1,2:ncol) = u ; " "\n b(2:nrow,1) = l ; ", nD, nL, nU) ; if ( nL > 0 && nU > 0 ) { fprintf(msgFile, "\n b(nD+1:nrow,nD+1:ncol) = 0.0 ;") ; } /* ------------------------- perform the rank-1 update ------------------------- */ rc = Chv_r1upd(chv) ; /* fprintf(msgFile, "\n raw entries vector") ; DVfprintf(msgFile, 2*nent, entries) ; */ fprintf(msgFile, "\n %% matrix entries after update") ; Chv_writeForMatlab(chv, "c", msgFile) ; fprintf(msgFile, "\n maxerr = max(max(abs(c-b)))") ; /* ------------------------ free the working storage ------------------------ */ Chv_free(chv) ; Drand_free(drand) ; fprintf(msgFile, "\n") ; return(1) ; }
/* ------------------------------------------------------------- purpose --- to compute a matrix-vector multiply y[] = C * x[] where C is the identity, A or B (depending on *pprbtype). *pnrows -- # of rows in x[] *pncols -- # of columns in x[] *pprbtype -- problem type *pprbtype = 1 --> vibration problem, matrix is A *pprbtype = 2 --> buckling problem, matrix is B *pprbtype = 3 --> matrix is identity, y[] = x[] x[] -- vector to be multiplied NOTE: the x[] vector is global, not a portion y[] -- product vector NOTE: the y[] vector is global, not a portion created -- 98aug28, cca & jcp ------------------------------------------------------------- */ void JimMatMulMPI ( int *pnrows, int *pncols, double x[], double y[], int *pprbtype, void *data ) { BridgeMPI *bridge = (BridgeMPI *) data ; int ncols, nent, nrows ; #if MYDEBUG > 0 double t1, t2 ; count_JimMatMul++ ; MARKTIME(t1) ; if ( bridge->myid == 0 ) { fprintf(stdout, "\n (%d) JimMatMulMPI() start", count_JimMatMul) ; fflush(stdout) ; } #endif #if MYDEBUG > 1 fprintf(bridge->msgFile, "\n (%d) JimMatMulMPI() start", count_JimMatMul) ; fflush(bridge->msgFile) ; #endif nrows = *pnrows ; ncols = *pncols ; nent = nrows*ncols ; if ( *pprbtype == 3 ) { /* -------------------------- ... matrix is the identity -------------------------- */ DVcopy(nent, y, x) ; } else { BridgeMPI *bridge = (BridgeMPI *) data ; DenseMtx *mtx, *newmtx ; int irow, jcol, jj, kk, myid, neqns, nowned, tag = 0 ; int *vtxmap ; int stats[4] ; IV *mapIV ; /* --------------------------------------------- slide the owned rows of x[] down in the array --------------------------------------------- */ vtxmap = IV_entries(bridge->vtxmapIV) ; neqns = bridge->neqns ; myid = bridge->myid ; nowned = IV_size(bridge->myownedIV) ; for ( jcol = jj = kk = 0 ; jcol < ncols ; jcol++ ) { for ( irow = 0 ; irow < neqns ; irow++, jj++ ) { if ( vtxmap[irow] == myid ) { y[kk++] = x[jj] ; } } } if ( kk != nowned * ncols ) { fprintf(stderr, "\n proc %d : kk %d, nowned %d, ncols %d", myid, kk, nowned, ncols) ; exit(-1) ; } /* ---------------------------------------- call the method that assumes local input ---------------------------------------- */ if ( bridge->msglvl > 2 ) { fprintf(bridge->msgFile, "\n inside JimMatMulMPI, calling MatMulMpi" "\n prbtype %d, nrows %d, ncols %d, nowned %d", *pprbtype, *pnrows, *pncols, nowned) ; fflush(bridge->msgFile) ; } MatMulMPI(&nowned, pncols, y, y, pprbtype, data) ; /* ------------------------------------------------- gather all the entries of y[] onto processor zero ------------------------------------------------- */ mtx = DenseMtx_new() ; DenseMtx_init(mtx, SPOOLES_REAL, 0, 0, nowned, ncols, 1, nowned) ; DVcopy (nowned*ncols, DenseMtx_entries(mtx), y) ; IVcopy(nowned, mtx->rowind, IV_entries(bridge->myownedIV)) ; mapIV = IV_new() ; IV_init(mapIV, neqns, NULL) ; IV_fill(mapIV, 0) ; IVfill(4, stats, 0) ; if ( bridge->msglvl > 2 ) { fprintf(bridge->msgFile, "\n mtx: %d rows x %d columns", mtx->nrow, mtx->ncol) ; fflush(bridge->msgFile) ; } newmtx = DenseMtx_MPI_splitByRows(mtx, mapIV, stats, bridge->msglvl, bridge->msgFile, tag, bridge->comm) ; if ( bridge->msglvl > 2 ) { fprintf(bridge->msgFile, "\n newmtx: %d rows x %d columns", newmtx->nrow, newmtx->ncol) ; fflush(bridge->msgFile) ; } DenseMtx_free(mtx) ; mtx = newmtx ; IV_free(mapIV) ; if ( myid == 0 ) { if ( mtx->nrow != neqns || mtx->ncol != ncols ) { fprintf(bridge->msgFile, "\n\n WHOA: mtx->nrows %d, mtx->ncols %d" ", neqns %d, ncols %d", mtx->nrow, mtx->ncol, neqns, ncols) ; exit(-1) ; } DVcopy(neqns*ncols, y, DenseMtx_entries(mtx)) ; } DenseMtx_free(mtx) ; /* --------------------------------------------- broadcast the entries to the other processors --------------------------------------------- */ MPI_Bcast((void *) y, neqns*ncols, MPI_DOUBLE, 0, bridge->comm) ; if ( bridge->msglvl > 2 ) { fprintf(bridge->msgFile, "\n after the broadcast") ; fflush(bridge->msgFile) ; } } MPI_Barrier(bridge->comm) ; #if MYDEBUG > 0 MARKTIME(t2) ; time_JimMatMul += t2 - t1 ; if ( bridge->myid == 0 ) { fprintf(stdout, "\n (%d) JimMatMulMPI() end", count_JimMatMul) ; fprintf(stdout, ", %8.3f seconds, %8.3f total time", t2 - t1, time_JimMatMul) ; fflush(stdout) ; } #endif #if MYDEBUG > 1 fprintf(bridge->msgFile, "\n (%d) JimMatMulMPI() end", count_JimMatMul) ; fprintf(bridge->msgFile, ", %8.3f seconds, %8.3f total time", t2 - t1, time_JimMatMul) ; fflush(bridge->msgFile) ; #endif return ; }
/*--------------------------------------------------------------------*/ int main ( int argc, char *argv[] ) /* -------------------------------------------------------------------- this program tests the Graph_MPI_Bcast() method (1) process root generates a random Graph object and computes its checksum (2) process root broadcasts the Graph object to the other processors (3) each process computes the checksum of its Graph object (4) the checksums are compared on root created -- 98sep10, cca -------------------------------------------------------------------- */ { char *buffer ; double chksum, t1, t2 ; double *sums ; Drand drand ; int iproc, length, loc, msglvl, myid, nitem, nproc, nvtx, root, seed, size, type, v ; int *list ; FILE *msgFile ; Graph *graph ; /* --------------------------------------------------------------- find out the identity of this process and the number of process --------------------------------------------------------------- */ MPI_Init(&argc, &argv) ; MPI_Comm_rank(MPI_COMM_WORLD, &myid) ; MPI_Comm_size(MPI_COMM_WORLD, &nproc) ; fprintf(stdout, "\n process %d of %d, argc = %d", myid, nproc, argc) ; fflush(stdout) ; if ( argc != 8 ) { fprintf(stdout, "\n\n usage : %s msglvl msgFile type nvtx nitem root seed " "\n msglvl -- message level" "\n msgFile -- message file" "\n type -- type of graph" "\n nvtx -- # of vertices" "\n nitem -- # of items used to generate graph" "\n root -- root processor for broadcast" "\n seed -- random number seed" "\n", argv[0]) ; return(0) ; } msglvl = atoi(argv[1]) ; if ( strcmp(argv[2], "stdout") == 0 ) { msgFile = stdout ; } else { length = strlen(argv[2]) + 1 + 4 ; buffer = CVinit(length, '\0') ; sprintf(buffer, "%s.%d", argv[2], myid) ; if ( (msgFile = fopen(buffer, "w")) == NULL ) { fprintf(stderr, "\n fatal error in %s" "\n unable to open file %s\n", argv[0], argv[2]) ; return(-1) ; } CVfree(buffer) ; } type = atoi(argv[3]) ; nvtx = atoi(argv[4]) ; nitem = atoi(argv[5]) ; root = atoi(argv[6]) ; seed = atoi(argv[7]) ; fprintf(msgFile, "\n %s " "\n msglvl -- %d" "\n msgFile -- %s" "\n type -- %d" "\n nvtx -- %d" "\n nitem -- %d" "\n root -- %d" "\n seed -- %d" "\n", argv[0], msglvl, argv[2], type, nvtx, nitem, root, seed) ; fflush(msgFile) ; /* ----------------------- set up the Graph object ----------------------- */ MARKTIME(t1) ; graph = Graph_new() ; if ( myid == root ) { InpMtx *inpmtx ; int nedges, totewght, totvwght, v ; int *adj, *vwghts ; IVL *adjIVL, *ewghtIVL ; /* ----------------------- generate a random graph ----------------------- */ inpmtx = InpMtx_new() ; InpMtx_init(inpmtx, INPMTX_BY_ROWS, INPMTX_INDICES_ONLY, nitem, 0) ; Drand_setDefaultFields(&drand) ; Drand_setSeed(&drand, seed) ; Drand_setUniform(&drand, 0, nvtx) ; Drand_fillIvector(&drand, nitem, InpMtx_ivec1(inpmtx)) ; Drand_fillIvector(&drand, nitem, InpMtx_ivec2(inpmtx)) ; InpMtx_setNent(inpmtx, nitem) ; InpMtx_changeStorageMode(inpmtx, INPMTX_BY_VECTORS) ; if ( msglvl > 2 ) { fprintf(msgFile, "\n\n inpmtx mtx filled with raw entries") ; InpMtx_writeForHumanEye(inpmtx, msgFile) ; fflush(msgFile) ; } adjIVL = InpMtx_fullAdjacency(inpmtx) ; if ( msglvl > 2 ) { fprintf(msgFile, "\n\n full adjacency structure") ; IVL_writeForHumanEye(adjIVL, msgFile) ; fflush(msgFile) ; } nedges = adjIVL->tsize ; if ( type == 1 || type == 3 ) { Drand_setUniform(&drand, 1, 10) ; vwghts = IVinit(nvtx, 0) ; Drand_fillIvector(&drand, nvtx, vwghts) ; totvwght = IVsum(nvtx, vwghts) ; if ( msglvl > 2 ) { fprintf(msgFile, "\n\n vertex weights") ; IVfprintf(msgFile, nvtx, vwghts) ; fflush(msgFile) ; } } else { vwghts = NULL ; totvwght = nvtx ; } if ( msglvl > 2 ) { fprintf(msgFile, "\n\n totvwght %d", totvwght) ; fflush(msgFile) ; } if ( type == 2 || type == 3 ) { ewghtIVL = IVL_new() ; IVL_init1(ewghtIVL, IVL_CHUNKED, nvtx) ; Drand_setUniform(&drand, 1, 100) ; totewght = 0 ; for ( v = 0 ; v < nvtx ; v++ ) { IVL_listAndSize(adjIVL, v, &size, &adj) ; IVL_setList(ewghtIVL, v, size, NULL) ; IVL_listAndSize(ewghtIVL, v, &size, &adj) ; Drand_fillIvector(&drand, size, adj) ; totewght += IVsum(size, adj) ; } if ( msglvl > 2 ) { fprintf(msgFile, "\n\n ewghtIVL") ; IVL_writeForHumanEye(ewghtIVL, msgFile) ; fflush(msgFile) ; } } else { ewghtIVL = NULL ; totewght = nedges ; } if ( msglvl > 2 ) { fprintf(msgFile, "\n\n totewght %d", totewght) ; fflush(msgFile) ; } Graph_init2(graph, type, nvtx, 0, nedges, totvwght, totewght, adjIVL, vwghts, ewghtIVL) ; InpMtx_free(inpmtx) ; } MARKTIME(t2) ; fprintf(msgFile, "\n CPU %8.3f : initialize the Graph object", t2 - t1) ; fflush(msgFile) ; if ( msglvl > 2 ) { Graph_writeForHumanEye(graph, msgFile) ; } else { Graph_writeStats(graph, msgFile) ; } fflush(msgFile) ; if ( myid == root ) { /* ---------------------------------------- compute the checksum of the Graph object ---------------------------------------- */ chksum = graph->type + graph->nvtx + graph->nvbnd + graph->nedges + graph->totvwght + graph->totewght ; for ( v = 0 ; v < nvtx ; v++ ) { IVL_listAndSize(graph->adjIVL, v, &size, &list) ; chksum += 1 + v + size + IVsum(size, list) ; } if ( graph->vwghts != NULL ) { chksum += IVsum(nvtx, graph->vwghts) ; } if ( graph->ewghtIVL != NULL ) { for ( v = 0 ; v < nvtx ; v++ ) { IVL_listAndSize(graph->ewghtIVL, v, &size, &list) ; chksum += 1 + v + size + IVsum(size, list) ; } } fprintf(msgFile, "\n\n local chksum = %12.4e", chksum) ; fflush(msgFile) ; } /* -------------------------- broadcast the Graph object -------------------------- */ MARKTIME(t1) ; graph = Graph_MPI_Bcast(graph, root, msglvl, msgFile, MPI_COMM_WORLD) ; MARKTIME(t2) ; fprintf(msgFile, "\n CPU %8.3f : broadcast the Graph object", t2 - t1) ; if ( msglvl > 2 ) { Graph_writeForHumanEye(graph, msgFile) ; } else { Graph_writeStats(graph, msgFile) ; } /* ---------------------------------------- compute the checksum of the Graph object ---------------------------------------- */ chksum = graph->type + graph->nvtx + graph->nvbnd + graph->nedges + graph->totvwght + graph->totewght ; for ( v = 0 ; v < nvtx ; v++ ) { IVL_listAndSize(graph->adjIVL, v, &size, &list) ; chksum += 1 + v + size + IVsum(size, list) ; } if ( graph->vwghts != NULL ) { chksum += IVsum(nvtx, graph->vwghts) ; } if ( graph->ewghtIVL != NULL ) { for ( v = 0 ; v < nvtx ; v++ ) { IVL_listAndSize(graph->ewghtIVL, v, &size, &list) ; chksum += 1 + v + size + IVsum(size, list) ; } } fprintf(msgFile, "\n\n local chksum = %12.4e", chksum) ; fflush(msgFile) ; /* --------------------------------------- gather the checksums from the processes --------------------------------------- */ sums = DVinit(nproc, 0.0) ; MPI_Gather((void *) &chksum, 1, MPI_DOUBLE, (void *) sums, 1, MPI_DOUBLE, 0, MPI_COMM_WORLD) ; if ( myid == 0 ) { fprintf(msgFile, "\n\n sums") ; DVfprintf(msgFile, nproc, sums) ; for ( iproc = 0 ; iproc < nproc ; iproc++ ) { sums[iproc] -= chksum ; } fprintf(msgFile, "\n\n errors") ; DVfprintf(msgFile, nproc, sums) ; fprintf(msgFile, "\n\n maxerror = %12.4e", DVmax(nproc, sums, &loc)); } /* ---------------- free the objects ---------------- */ DVfree(sums) ; Graph_free(graph) ; /* ------------------------ exit the MPI environment ------------------------ */ MPI_Finalize() ; fprintf(msgFile, "\n") ; fclose(msgFile) ; return(0) ; }
/*--------------------------------------------------------------------*/ int main ( int argc, char *argv[] ) /* ----------------------- simple test program created -- 98apr15, cca ----------------------- */ { A2 *A ; double t1, t2, value ; FILE *msgFile ; int inc1, inc2, irow, jcol, msglvl, nrow, ncol, seed, type ; if ( argc != 9 ) { fprintf(stdout, "\n\n usage : %s msglvl msgFile type nrow ncol inc1 inc2 seed " "\n msglvl -- message level" "\n msgFile -- message file" "\n type -- entries type" "\n 1 -- real" "\n 2 -- complex" "\n nrow -- # of rows " "\n ncol -- # of columns " "\n inc1 -- row increment " "\n inc2 -- column increment " "\n seed -- random number seed" "\n", argv[0]) ; return(0) ; } if ( (msglvl = atoi(argv[1])) < 0 ) { fprintf(stderr, "\n message level must be positive\n") ; spoolesFatal(); } if ( strcmp(argv[2], "stdout") == 0 ) { msgFile = stdout ; } else if ( (msgFile = fopen(argv[2], "a")) == NULL ) { fprintf(stderr, "\n unable to open file %s\n", argv[2]) ; return(-1) ; } type = atoi(argv[3]) ; nrow = atoi(argv[4]) ; ncol = atoi(argv[5]) ; inc1 = atoi(argv[6]) ; inc2 = atoi(argv[7]) ; if ( type < 1 || type > 2 || nrow < 0 || ncol < 0 || inc1 < 1 || inc2 < 1 ) { fprintf(stderr, "\n fatal error, type %d, nrow %d, ncol %d, inc1 %d, inc2 %d", type, nrow, ncol, inc1, inc2) ; spoolesFatal(); } seed = atoi(argv[7]) ; fprintf(msgFile, "\n\n %% %s :" "\n %% msglvl = %d" "\n %% msgFile = %s" "\n %% type = %d" "\n %% nrow = %d" "\n %% ncol = %d" "\n %% inc1 = %d" "\n %% inc2 = %d" "\n %% seed = %d" "\n", argv[0], msglvl, argv[2], type, nrow, ncol, inc1, inc2, seed) ; /* ----------------------------- initialize the matrix objects ----------------------------- */ MARKTIME(t1) ; A = A2_new() ; A2_init(A, type, nrow, ncol, inc1, inc2, NULL) ; MARKTIME(t2) ; fprintf(msgFile, "\n %% CPU : %.3f to initialize matrix object", t2 - t1) ; MARKTIME(t1) ; A2_fillRandomUniform(A, -1, 1, seed) ; seed++ ; MARKTIME(t2) ; fprintf(msgFile, "\n %% CPU : %.3f to fill matrix with random numbers", t2 - t1) ; if ( msglvl > 3 ) { fprintf(msgFile, "\n matrix A") ; A2_writeForHumanEye(A, msgFile) ; } if ( msglvl > 1 ) { fprintf(msgFile, "\n %% matrix A") ; A2_writeForMatlab(A, "A", msgFile) ; } /* ------------- get the norms ------------- */ value = A2_maxabs(A) ; fprintf(msgFile, "\n error_maxabs = abs(%20.12e - max(max(abs(A))))", value) ; value = A2_frobNorm(A) ; fprintf(msgFile, "\n error_frob = abs(%20.12e - norm(A, 'fro'))", value) ; value = A2_oneNorm(A) ; fprintf(msgFile, "\n error_one = abs(%20.12e - norm(A, 1))", value) ; value = A2_infinityNorm(A) ; fprintf(msgFile, "\n error_inf = abs(%20.12e - norm(A, inf))", value) ; for ( irow = 0 ; irow < nrow ; irow++ ) { value = A2_infinityNormOfRow(A, irow) ; fprintf(msgFile, "\n error_infNormsOfRows(%d) = abs(%20.12e - norm(A(%d,:), inf)) ;", irow+1, value, irow+1) ; value = A2_oneNormOfRow(A, irow) ; fprintf(msgFile, "\n error_oneNormsOfRows(%d) = abs(%20.12e - norm(A(%d,:), 1)) ;", irow+1, value, irow+1) ; value = A2_twoNormOfRow(A, irow) ; fprintf(msgFile, "\n error_twoNormsOfRows(%d) = abs(%20.12e - norm(A(%d,:), 2)) ;", irow+1, value, irow+1) ; } for ( jcol = 0 ; jcol < ncol ; jcol++ ) { value = A2_infinityNormOfColumn(A, jcol) ; fprintf(msgFile, "\n error_infNormsOfColumns(%d) = abs(%20.12e - norm(A(:,%d), inf)) ;", jcol+1, value, jcol+1) ; value = A2_oneNormOfColumn(A, jcol) ; fprintf(msgFile, "\n error_oneNormsOfColumns(%d) = abs(%20.12e - norm(A(:,%d), 1)) ;", jcol+1, value, jcol+1) ; value = A2_twoNormOfColumn(A, jcol) ; fprintf(msgFile, "\n error_twoNormsOfColumns(%d) = abs(%20.12e - norm(A(:,%d), 2)) ;", jcol+1, value, jcol+1) ; } fprintf(msgFile, "\n error_in_row_norms = [ max(error_infNormsOfRows) " "\n max(error_oneNormsOfRows) " "\n max(error_twoNormsOfRows) ]" "\n error_in_column_norms = [ max(error_infNormsOfColumns) " "\n max(error_oneNormsOfColumns) " "\n max(error_twoNormsOfColumns) ]") ; fprintf(msgFile, "\n") ; /* ------------------------ free the working storage ------------------------ */ A2_free(A) ; return(0) ; }