/* ---------------------------------------------------------------- purpose -- for each U_{J,bnd{J}} matrix, remove from hash table, split into their U_{J,K} submatrices and insert into the hash table. created -- 98may04, cca ---------------------------------------------------------------- */ void FrontMtx_splitUpperMatrices ( FrontMtx *frontmtx, int msglvl, FILE *msgFile ) { SubMtx *mtxUJ, *mtxUJJ, *mtxUJK ; SubMtxManager *manager ; double *entUJ, *entUJK ; int count, first, ii, inc1, inc2, jcol, jj, J, K, nbytes, ncolJ, ncolUJ, ncolUJK, nentUJ, nentUJK, neqns, nfront, nJ, nrowUJ, nrowUJK, offset, v ; int *colindJ, *colindUJ, *colindUJK, *colmap, *indicesUJ, *indicesUJK, *locmap, *rowindUJ, *rowindUJK, *sizesUJ, *sizesUJK ; I2Ohash *upperhash ; /* --------------- check the input --------------- */ if ( frontmtx == NULL || (msglvl > 0 && msgFile == NULL) ) { fprintf(stderr, "\n fatal error in FrontMtx_splitUpperMatrices(%p,%d,%p)" "\n bad input\n", frontmtx, msglvl, msgFile) ; spoolesFatal(); } nfront = FrontMtx_nfront(frontmtx) ; neqns = FrontMtx_neqns(frontmtx) ; upperhash = frontmtx->upperhash ; manager = frontmtx->manager ; /* ----------------------------------- construct the column and local maps ----------------------------------- */ colmap = IVinit(neqns, -1) ; locmap = IVinit(neqns, -1) ; for ( J = 0 ; J < nfront ; J++ ) { if ( (nJ = FrontMtx_frontSize(frontmtx, J)) > 0 ) { FrontMtx_columnIndices(frontmtx, J, &ncolJ, &colindJ) ; if ( ncolJ > 0 && colindJ != NULL ) { for ( ii = 0 ; ii < nJ ; ii++ ) { v = colindJ[ii] ; colmap[v] = J ; locmap[v] = ii ; } } } } if ( msglvl > 2 ) { fprintf(msgFile, "\n\n colmap[]") ; IVfprintf(msgFile, neqns, colmap) ; fprintf(msgFile, "\n\n locmap[]") ; IVfprintf(msgFile, neqns, locmap) ; fflush(msgFile) ; } /* --------------------------------------------- move the U_{J,J} matrices into the hash table --------------------------------------------- */ for ( J = 0 ; J < nfront ; J++ ) { if ( (mtxUJJ = FrontMtx_upperMtx(frontmtx, J, J)) != NULL ) { I2Ohash_insert(frontmtx->upperhash, J, J, mtxUJJ) ; } } /* ------------------------------------------------------------ now split the U_{J,bnd{J}} matrices into U_{J,K} matrices. note: columns of U_{J,bnd{J}} are assumed to be in ascending order with respect to the column ordering of the matrix. ------------------------------------------------------------ */ for ( J = 0 ; J < nfront ; J++ ) { mtxUJ = FrontMtx_upperMtx(frontmtx, J, nfront) ; if ( msglvl > 2 ) { fprintf(msgFile, "\n\n ### J = %d, mtxUJ = %p", J, mtxUJ) ; fflush(msgFile) ; } if ( mtxUJ != NULL ) { if ( msglvl > 2 ) { SubMtx_writeForHumanEye(mtxUJ, msgFile) ; fflush(msgFile) ; } SubMtx_columnIndices(mtxUJ, &ncolUJ, &colindUJ) ; SubMtx_rowIndices(mtxUJ, &nrowUJ, &rowindUJ) ; if ( msglvl > 2 ) { fprintf(msgFile, "\n column indices for J") ; IVfprintf(msgFile, ncolUJ, colindUJ) ; fprintf(msgFile, "\n row indices for UJ") ; IVfprintf(msgFile, nrowUJ, rowindUJ) ; fflush(msgFile) ; } if ( (K = colmap[colindUJ[0]]) == colmap[colindUJ[ncolUJ-1]] ) { if ( msglvl > 2 ) { fprintf(msgFile, "\n front %d supports only %d", J, K) ; fflush(msgFile) ; } /* ------------------------------------------------- U_{J,bnd{J}} is one submatrix, bnd{J} \subseteq K set row and column indices and change column id ------------------------------------------------- */ IVramp(nrowUJ, rowindUJ, 0, 1) ; for ( ii = 0 ; ii < ncolUJ ; ii++ ) { colindUJ[ii] = locmap[colindUJ[ii]] ; } SubMtx_setFields(mtxUJ, mtxUJ->type, mtxUJ->mode, J, K, mtxUJ->nrow, mtxUJ->ncol, mtxUJ->nent) ; /* mtxUJ->colid = K ; */ if ( msglvl > 2 ) { fprintf(msgFile, "\n\n ## inserting U(%d,%d) ", J, K) ; SubMtx_writeForHumanEye(mtxUJ, msgFile) ; fflush(msgFile) ; } I2Ohash_insert(upperhash, J, K, (void *) mtxUJ) ; } else { /* ----------------------------------- split U_{J,bnd{J}} into submatrices ----------------------------------- */ nJ = FrontMtx_frontSize(frontmtx, J) ; if ( SUBMTX_IS_DENSE_COLUMNS(mtxUJ) ) { SubMtx_denseInfo(mtxUJ, &nrowUJ, &ncolUJ, &inc1, &inc2, &entUJ) ; } else if ( SUBMTX_IS_SPARSE_COLUMNS(mtxUJ) ) { SubMtx_sparseColumnsInfo(mtxUJ, &ncolUJ, &nentUJ, &sizesUJ, &indicesUJ, &entUJ) ; offset = 0 ; count = sizesUJ[0] ; } first = 0 ; K = colmap[colindUJ[0]] ; for ( jcol = 1 ; jcol <= ncolUJ ; jcol++ ) { if ( msglvl > 2 ) { fprintf(msgFile, "\n jcol = %d", jcol) ; if ( jcol < ncolUJ ) { fprintf(msgFile, ", colmap[%d] = %d", colindUJ[jcol], colmap[colindUJ[jcol]]); } fflush(msgFile) ; } if ( jcol == ncolUJ || K != colmap[colindUJ[jcol]] ) { ncolUJK = jcol - first ; if ( SUBMTX_IS_DENSE_COLUMNS(mtxUJ) ) { nentUJK = nJ*ncolUJK ; } else if ( SUBMTX_IS_SPARSE_COLUMNS(mtxUJ) ) { if ( count == 0 ) { goto no_entries ; } nentUJK = count ; } nbytes = SubMtx_nbytesNeeded(mtxUJ->type, mtxUJ->mode, nJ, ncolUJK, nentUJK) ; if ( msglvl > 2 ) { fprintf(msgFile, "\n ncolUJK %d, nentUJK %d, nbytes %d", ncolUJK, nentUJK, nbytes) ; fflush(msgFile) ; } mtxUJK = SubMtxManager_newObjectOfSizeNbytes(manager, nbytes) ; SubMtx_init(mtxUJK, mtxUJ->type, mtxUJ->mode, J, K, nJ, ncolUJK, nentUJK) ; if ( SUBMTX_IS_DENSE_COLUMNS(mtxUJ) ) { SubMtx_denseInfo(mtxUJK, &nrowUJK, &ncolUJK, &inc1, &inc2, &entUJK) ; if ( FRONTMTX_IS_REAL(frontmtx) ) { DVcopy(nentUJK, entUJK, entUJ + first*nJ) ; } else if ( FRONTMTX_IS_COMPLEX(frontmtx) ) { DVcopy(2*nentUJK, entUJK, entUJ + 2*first*nJ) ; } } else if ( SUBMTX_IS_SPARSE_COLUMNS(mtxUJ) ) { SubMtx_sparseColumnsInfo(mtxUJK, &ncolUJK, &nentUJK, &sizesUJK, &indicesUJK, &entUJK) ; IVcopy(ncolUJK, sizesUJK, sizesUJ + first) ; IVcopy(nentUJK, indicesUJK, indicesUJ + offset) ; if ( FRONTMTX_IS_REAL(frontmtx) ) { DVcopy(nentUJK, entUJK, entUJ + offset) ; } else if ( FRONTMTX_IS_COMPLEX(frontmtx) ) { DVcopy(2*nentUJK, entUJK, entUJ + 2*offset) ; } count = 0 ; offset += nentUJK ; } /* ------------------------------------- initialize the row and column indices ------------------------------------- */ if ( msglvl > 2 ) { fprintf(msgFile, "\n setting row and column indices"); fflush(msgFile) ; } SubMtx_rowIndices(mtxUJK, &nrowUJK, &rowindUJK) ; IVramp(nJ, rowindUJK, 0, 1) ; SubMtx_columnIndices(mtxUJK, &ncolUJK, &colindUJK) ; for ( ii = 0, jj = first ; ii < ncolUJK ; ii++, jj++ ) { colindUJK[ii] = locmap[colindUJ[jj]] ; } /* ---------------------------------- insert U_{J,K} into the hash table ---------------------------------- */ if ( msglvl > 2 ) { fprintf(msgFile, "\n\n ## inserting U(%d,%d) ", J, K) ; SubMtx_writeForHumanEye(mtxUJK, msgFile) ; fflush(msgFile) ; } I2Ohash_insert(upperhash, J, K, (void *) mtxUJK) ; /* ----------------------------------- we jump to here if there were no entries to be stored in the matrix. ----------------------------------- */ no_entries : /* ---------------------------------------------------- reset first and K to new first location and front id ---------------------------------------------------- */ first = jcol ; if ( jcol < ncolUJ ) { K = colmap[colindUJ[jcol]] ; } } if ( jcol < ncolUJ && SUBMTX_IS_SPARSE_COLUMNS(mtxUJ) ) { count += sizesUJ[jcol] ; } } /* -------------------------------------------- give U_{J,bnd{J}} back to the matrix manager -------------------------------------------- */ SubMtxManager_releaseObject(manager, mtxUJ) ; } } } /* ------------------------ free the working storage ------------------------ */ IVfree(colmap) ; IVfree(locmap) ; return ; }
/* -------------------------------------------------------------------- purpose -- merge the front tree allowing at most maxzeros zero entries inside a front return -- IV object that has the old front to new front map created -- 96jun23, cca modified -- 97dec18, cca bug fixed that incorrectly counted the number of zeros in a front -------------------------------------------------------------------- */ ETree * ETree_mergeFrontsAny ( ETree *etree, int maxzeros, IV *nzerosIV ) { ETree *etree2 ; int J, K, nfront, nvtx, nnew ; int *bndwghts, *cost, *fch, *map, *nodwghts, *nzeros, *par, *place, *rep, *sib, *temp ; IV *mapIV ; Tree *tree ; /* --------------- check the input --------------- */ if ( etree == NULL || (nfront = etree->nfront) <= 0 || (nvtx = etree->nvtx) <= 0 ) { fprintf(stderr, "\n fatal error in ETree_mergeFrontsAny(%p,%d)" "\n bad input\n", etree, maxzeros) ; spoolesFatal(); } if ( IV_size(nzerosIV) != nfront ) { fprintf(stderr, "\n fatal error in ETree_mergeFrontsAny(%p,%d,%p)" "\n size(nzerosIV) = %d, nfront = %d\n", etree, maxzeros, nzerosIV, IV_size(nzerosIV), nfront) ; spoolesFatal(); } nzeros = IV_entries(nzerosIV) ; tree = etree->tree ; nodwghts = IVinit(nfront, 0) ; bndwghts = IVinit(nfront, 0) ; par = IVinit(nfront, -1) ; fch = IVinit(nfront, -1) ; sib = IVinit(nfront, -1) ; IVcopy(nfront, par, tree->par) ; IVcopy(nfront, fch, tree->fch) ; IVcopy(nfront, sib, tree->sib) ; IVcopy(nfront, nodwghts, IV_entries(etree->nodwghtsIV)) ; IVcopy(nfront, bndwghts, IV_entries(etree->bndwghtsIV)) ; /* ---------------------- set up working storage ---------------------- */ rep = IVinit(nfront, -1) ; IVramp(nfront, rep, 0, 1) ; cost = IVinit(nfront, 0) ; /* ------------------------------------------ perform a post-order traversal of the tree ------------------------------------------ */ for ( J = Tree_postOTfirst(tree) ; J != -1 ; J = Tree_postOTnext(tree, J) ) { #if MYDEBUG > 0 fprintf(stdout, "\n\n ##### visiting front %d", J) ; fflush(stdout) ; #endif visitAny(J, par, fch, sib, nodwghts, bndwghts, rep, cost, nzeros, maxzeros) ; } #if MYDEBUG > 0 fprintf(stdout, "\n\n whoa, finished") ; fflush(stdout) ; #endif /* ------------------------------------------------- take the map from fronts to representative fronts and make the map from old fronts to new fronts ------------------------------------------------- */ mapIV = IV_new() ; IV_init(mapIV, nfront, NULL) ; map = IV_entries(mapIV) ; place = IVinit(nfront, -1) ; for ( J = 0, nnew = 0 ; J < nfront ; J++ ) { #if MYDEBUG > 0 fprintf(stdout, "\n rep[%d] = %d", J, rep[J]) ; fflush(stdout) ; #endif if ( rep[J] != J ) { K = J ; while ( rep[K] != K ) { #if MYDEBUG > 0 fprintf(stdout, "\n rep[%d] = %d", K, rep[K]) ; fflush(stdout) ; #endif K = rep[K] ; } rep[J] = K ; #if MYDEBUG > 0 fprintf(stdout, "\n setting rep[%d] = %d", J, rep[J]) ; fflush(stdout) ; #endif } else { place[J] = nnew++ ; } } for ( J = 0 ; J < nfront ; J++ ) { K = rep[J] ; map[J] = place[K] ; } /* ------------------------------- get the compressed ETree object ------------------------------- */ etree2 = ETree_compress(etree, mapIV) ; /* ------------------------- remap the nzeros[] vector ------------------------- */ temp = IVinit(nfront, 0) ; IVcopy(nfront, temp, nzeros) ; IV_setSize(nzerosIV, nnew) ; nzeros = IV_entries(nzerosIV) ; for ( J = 0 ; J < nfront ; J++ ) { if ( rep[J] == J ) { nzeros[map[J]] = temp[J] ; } } IVfree(temp) ; /* ------------------------ free the working storage ------------------------ */ IVfree(par) ; IVfree(fch) ; IVfree(sib) ; IVfree(nodwghts) ; IVfree(bndwghts) ; IVfree(rep) ; IVfree(cost) ; IVfree(place) ; IV_free(mapIV) ; return(etree2) ; }
/* ------------------------------------------------- expand an ETree object by splitting a large front into a chain of smaller fronts. created -- 96jun27, cca ------------------------------------------------- */ ETree * ETree_splitFronts ( ETree *etree, int vwghts[], int maxfrontsize, int seed ) { ETree *etree2 ; int count, front, ii, I, Inew, J, Jnew, nbnd, newsize, nint, nfront, nfront2, nsplit, nvtx, prev, size, sizeJ, v, vwght ; int *bndwghts, *fch, *head, *indices, *link, *newbndwghts, *newmap, *newnodwghts, *newpar, *nodwghts, *roots, *sib, *vtxToFront ; Tree *tree ; /* --------------- check the input --------------- */ if ( etree == NULL || (nfront = etree->nfront) <= 0 || (nvtx = etree->nvtx) <= 0 || maxfrontsize <= 0 ) { fprintf(stderr, "\n fatal error in ETree_splitFronts(%p,%p,%d,%d)" "\n bad input\n", etree, vwghts, maxfrontsize, seed) ; spoolesFatal(); } tree = etree->tree ; fch = tree->fch ; sib = tree->sib ; nodwghts = IV_entries(etree->nodwghtsIV) ; bndwghts = IV_entries(etree->bndwghtsIV) ; vtxToFront = IV_entries(etree->vtxToFrontIV) ; /* -------------------------- set up the working storage -------------------------- */ newpar = IVinit(nvtx, -1) ; roots = IVinit(nfront, -1) ; newmap = IVinit(nvtx, -1) ; newnodwghts = IVinit(nvtx, -1) ; newbndwghts = IVinit(nvtx, -1) ; head = IVinit(nfront, -1) ; link = IVinit(nvtx, -1) ; indices = IVinit(nvtx, -1) ; for ( v = 0 ; v < nvtx ; v++ ) { front = vtxToFront[v] ; link[v] = head[front] ; head[front] = v ; } /* ------------------------------------------------ execute a post-order traversal of the front tree ------------------------------------------------ */ nfront2 = 0 ; for ( J = Tree_postOTfirst(tree) ; J != -1 ; J = Tree_postOTnext(tree, J) ) { sizeJ = 0 ; for ( v = head[J], count = 0 ; v != -1 ; v = link[v] ) { indices[count++] = v ; vwght = (vwghts != NULL) ? vwghts[v] : 1 ; sizeJ += vwght ; } if ( sizeJ != nodwghts[J] ) { fprintf(stderr, "\n fatal error in ETree_splitFronts(%p,%p,%d,%d)" "\n J = %d, sizeJ = %d, nodwght = %d\n", etree, vwghts, maxfrontsize, seed, J, sizeJ, nodwghts[J]) ; spoolesFatal(); } #if MYDEBUG > 0 fprintf(stdout, "\n\n checking out front %d, size %d", J, sizeJ) ; #endif if ( sizeJ <= maxfrontsize || fch[J] == -1 ) { /* ------------------------------------------- this front is small enough (or is a domain) ------------------------------------------- */ Jnew = nfront2++ ; for ( ii = 0 ; ii < count ; ii++ ) { v = indices[ii] ; newmap[v] = Jnew ; #if MYDEBUG > 1 fprintf(stdout, "\n mapping vertex %d into new front %d", v, Jnew) ; #endif } for ( I = fch[J] ; I != -1 ; I = sib[I] ) { Inew = roots[I] ; newpar[Inew] = Jnew ; } newnodwghts[Jnew] = nodwghts[J] ; newbndwghts[Jnew] = bndwghts[J] ; roots[J] = Jnew ; #if MYDEBUG > 0 fprintf(stdout, "\n front is small enough, Jnew = %d", Jnew) ; #endif } else { /* ------------------------------------------ this front is too large, split into pieces whose size differs by one vertex ------------------------------------------ */ nsplit = (sizeJ + maxfrontsize - 1)/maxfrontsize ; newsize = sizeJ / nsplit ; if ( sizeJ % nsplit != 0 ) { newsize++ ; } #if MYDEBUG > 0 fprintf(stdout, "\n front is too large, %d target fronts, target size = %d", nsplit, newsize) ; #endif prev = -1 ; nint = nodwghts[J] ; nbnd = nint + bndwghts[J] ; if ( seed > 0 ) { IVshuffle(count, indices, seed) ; } ii = 0 ; while ( ii < count ) { Jnew = nfront2++ ; size = 0 ; while ( ii < count ) { v = indices[ii] ; vwght = (vwghts != NULL) ? vwghts[v] : 1 ; #if MYDEBUG > 0 fprintf(stdout, "\n ii = %d, v = %d, vwght = %d, size = %d", ii, v, vwght, size) ; #endif /* ----------------------------------------------- 97aug28, cca bug fix. front is created even if it is too big ----------------------------------------------- */ if ( newsize >= size + vwght || size == 0 ) { newmap[v] = Jnew ; size += vwght ; #if MYDEBUG > 0 fprintf(stdout, "\n mapping vertex %d into new front %d, size = %d", v, Jnew, size) ; #endif ii++ ; } else { break ; } } if ( prev == -1 ) { for ( I = fch[J] ; I != -1 ; I = sib[I] ) { Inew = roots[I] ; newpar[Inew] = Jnew ; } } else { newpar[prev] = Jnew ; } prev = Jnew ; newnodwghts[Jnew] = size ; nbnd = nbnd - size ; newbndwghts[Jnew] = nbnd ; #if MYDEBUG > 0 fprintf(stdout, "\n new front %d, size %d, bnd %d", Jnew, newnodwghts[Jnew], newbndwghts[Jnew]) ; #endif } roots[J] = Jnew ; } } /* --------------------------- create the new ETree object --------------------------- */ etree2 = ETree_new() ; ETree_init1(etree2, nfront2, nvtx) ; IVcopy(nfront2, etree2->tree->par, newpar) ; Tree_setFchSibRoot(etree2->tree) ; IVcopy(nvtx, IV_entries(etree2->vtxToFrontIV), newmap) ; IVcopy(nfront2, IV_entries(etree2->nodwghtsIV), newnodwghts) ; IVcopy(nfront2, IV_entries(etree2->bndwghtsIV), newbndwghts) ; /* ------------------------ free the working storage ------------------------ */ IVfree(newpar) ; IVfree(roots) ; IVfree(newmap) ; IVfree(newnodwghts) ; IVfree(newbndwghts) ; IVfree(head) ; IVfree(link) ; IVfree(indices) ; return(etree2) ; }
/* ------------------------------------------------------------- purpose --- to compute a matrix-vector multiply y[] = C * x[] where C is the identity, A or B (depending on *pprbtype). *pnrows -- # of rows in x[] *pncols -- # of columns in x[] *pprbtype -- problem type *pprbtype = 1 --> vibration problem, matrix is A *pprbtype = 2 --> buckling problem, matrix is B *pprbtype = 3 --> matrix is identity, y[] = x[] x[] -- vector to be multiplied NOTE: the x[] vector is global, not a portion y[] -- product vector NOTE: the y[] vector is global, not a portion created -- 98aug28, cca & jcp ------------------------------------------------------------- */ void JimMatMulMPI ( int *pnrows, int *pncols, double x[], double y[], int *pprbtype, void *data ) { BridgeMPI *bridge = (BridgeMPI *) data ; int ncols, nent, nrows ; #if MYDEBUG > 0 double t1, t2 ; count_JimMatMul++ ; MARKTIME(t1) ; if ( bridge->myid == 0 ) { fprintf(stdout, "\n (%d) JimMatMulMPI() start", count_JimMatMul) ; fflush(stdout) ; } #endif #if MYDEBUG > 1 fprintf(bridge->msgFile, "\n (%d) JimMatMulMPI() start", count_JimMatMul) ; fflush(bridge->msgFile) ; #endif nrows = *pnrows ; ncols = *pncols ; nent = nrows*ncols ; if ( *pprbtype == 3 ) { /* -------------------------- ... matrix is the identity -------------------------- */ DVcopy(nent, y, x) ; } else { BridgeMPI *bridge = (BridgeMPI *) data ; DenseMtx *mtx, *newmtx ; int irow, jcol, jj, kk, myid, neqns, nowned, tag = 0 ; int *vtxmap ; int stats[4] ; IV *mapIV ; /* --------------------------------------------- slide the owned rows of x[] down in the array --------------------------------------------- */ vtxmap = IV_entries(bridge->vtxmapIV) ; neqns = bridge->neqns ; myid = bridge->myid ; nowned = IV_size(bridge->myownedIV) ; for ( jcol = jj = kk = 0 ; jcol < ncols ; jcol++ ) { for ( irow = 0 ; irow < neqns ; irow++, jj++ ) { if ( vtxmap[irow] == myid ) { y[kk++] = x[jj] ; } } } if ( kk != nowned * ncols ) { fprintf(stderr, "\n proc %d : kk %d, nowned %d, ncols %d", myid, kk, nowned, ncols) ; exit(-1) ; } /* ---------------------------------------- call the method that assumes local input ---------------------------------------- */ if ( bridge->msglvl > 2 ) { fprintf(bridge->msgFile, "\n inside JimMatMulMPI, calling MatMulMpi" "\n prbtype %d, nrows %d, ncols %d, nowned %d", *pprbtype, *pnrows, *pncols, nowned) ; fflush(bridge->msgFile) ; } MatMulMPI(&nowned, pncols, y, y, pprbtype, data) ; /* ------------------------------------------------- gather all the entries of y[] onto processor zero ------------------------------------------------- */ mtx = DenseMtx_new() ; DenseMtx_init(mtx, SPOOLES_REAL, 0, 0, nowned, ncols, 1, nowned) ; DVcopy (nowned*ncols, DenseMtx_entries(mtx), y) ; IVcopy(nowned, mtx->rowind, IV_entries(bridge->myownedIV)) ; mapIV = IV_new() ; IV_init(mapIV, neqns, NULL) ; IV_fill(mapIV, 0) ; IVfill(4, stats, 0) ; if ( bridge->msglvl > 2 ) { fprintf(bridge->msgFile, "\n mtx: %d rows x %d columns", mtx->nrow, mtx->ncol) ; fflush(bridge->msgFile) ; } newmtx = DenseMtx_MPI_splitByRows(mtx, mapIV, stats, bridge->msglvl, bridge->msgFile, tag, bridge->comm) ; if ( bridge->msglvl > 2 ) { fprintf(bridge->msgFile, "\n newmtx: %d rows x %d columns", newmtx->nrow, newmtx->ncol) ; fflush(bridge->msgFile) ; } DenseMtx_free(mtx) ; mtx = newmtx ; IV_free(mapIV) ; if ( myid == 0 ) { if ( mtx->nrow != neqns || mtx->ncol != ncols ) { fprintf(bridge->msgFile, "\n\n WHOA: mtx->nrows %d, mtx->ncols %d" ", neqns %d, ncols %d", mtx->nrow, mtx->ncol, neqns, ncols) ; exit(-1) ; } DVcopy(neqns*ncols, y, DenseMtx_entries(mtx)) ; } DenseMtx_free(mtx) ; /* --------------------------------------------- broadcast the entries to the other processors --------------------------------------------- */ MPI_Bcast((void *) y, neqns*ncols, MPI_DOUBLE, 0, bridge->comm) ; if ( bridge->msglvl > 2 ) { fprintf(bridge->msgFile, "\n after the broadcast") ; fflush(bridge->msgFile) ; } } MPI_Barrier(bridge->comm) ; #if MYDEBUG > 0 MARKTIME(t2) ; time_JimMatMul += t2 - t1 ; if ( bridge->myid == 0 ) { fprintf(stdout, "\n (%d) JimMatMulMPI() end", count_JimMatMul) ; fprintf(stdout, ", %8.3f seconds, %8.3f total time", t2 - t1, time_JimMatMul) ; fflush(stdout) ; } #endif #if MYDEBUG > 1 fprintf(bridge->msgFile, "\n (%d) JimMatMulMPI() end", count_JimMatMul) ; fprintf(bridge->msgFile, ", %8.3f seconds, %8.3f total time", t2 - t1, time_JimMatMul) ; fflush(bridge->msgFile) ; #endif return ; }
/* ---------------------------------------------------- create a Graph object that holds the adjacency graph of the assembled elements. created -- 95nov03, cca ---------------------------------------------------- */ Graph * EGraph_mkAdjGraph ( EGraph *egraph ) { int elem, esize, i, nelem, nvtx, v, vsize, w ; int *eind, *head, *link, *marker, *offsets, *vind ; IVL *eadjIVL, *gadjIVL ; Graph *graph ; /* --------------- check the input --------------- */ if ( egraph == NULL || (eadjIVL = egraph->adjIVL) == NULL ) { fprintf(stderr, "\n fatal error in EGraph_mkAdjGraph(%p)" "\n bad input\n", egraph) ; spoolesFatal(); } nelem = egraph->nelem ; nvtx = egraph->nvtx ; /* -------------------------------- set up the linked list structure -------------------------------- */ head = IVinit(nvtx, -1) ; link = IVinit(nelem, -1) ; offsets = IVinit(nelem, 0) ; /* ----------------------------------------------------------- sort the vertices in each element list into ascending order and link them into their first vertex ----------------------------------------------------------- */ for ( elem = 0 ; elem < nelem ; elem++ ) { IVL_listAndSize(eadjIVL, elem, &esize, &eind) ; if ( esize > 0 ) { IVqsortUp(esize, eind) ; v = eind[0] ; link[elem] = head[v] ; head[v] = elem ; } } /* --------------------------- create the new Graph object --------------------------- */ graph = Graph_new() ; Graph_init1(graph, egraph->type, nvtx, 0, 0, IVL_CHUNKED, IVL_CHUNKED) ; gadjIVL = graph->adjIVL ; /* ---------------------- loop over the vertices ---------------------- */ vind = IVinit(nvtx, -1) ; marker = IVinit(nvtx, -1) ; for ( v = 0 ; v < nvtx ; v++ ) { /* --------------------------------- loop over the supporting elements --------------------------------- */ vsize = 0 ; vind[vsize++] = v ; marker[v] = v ; while ( (elem = head[v]) != -1 ) { /* fprintf(stdout, "\n checking out element %d :", jelem) ; */ head[v] = link[elem] ; IVL_listAndSize(eadjIVL, elem, &esize, &eind) ; for ( i = 0 ; i < esize ; i++ ) { w = eind[i] ; if ( marker[w] != v ) { marker[w] = v ; vind[vsize++] = w ; } } if ( (i = ++offsets[elem]) < esize ) { w = eind[i] ; link[elem] = head[w] ; head[w] = elem ; } } IVqsortUp(vsize, vind) ; IVL_setList(gadjIVL, v, vsize, vind) ; } graph->nedges = gadjIVL->tsize ; if ( egraph->type == 0 ) { graph->totvwght = nvtx ; } else if ( egraph->type == 1 ) { /* ------------------------------ fill the vertex weights vector ------------------------------ */ IVcopy(nvtx, graph->vwghts, egraph->vwghts) ; graph->totvwght = IVsum(nvtx, graph->vwghts) ; } graph->totewght = graph->nedges ; /* ------------------------ free the working storage ------------------------ */ IVfree(head) ; IVfree(link) ; IVfree(marker) ; IVfree(vind) ; IVfree(offsets) ; return(graph) ; }
/* ------------------------------------------------------------- purpose -- the IVL object ivl and IV object ownersIV are both found on each process. the ownersIV object is identical over all the processes, and owners[ii] tells which processes owns list ii of the ivl object. on return from this method, the ivl object is replicated over all the processes. each process sends the lists that it owns to all the other processes. created -- 98apr03, cca ------------------------------------------------------------- */ void IVL_MPI_allgather ( IVL *ivl, IV *ownersIV, int stats[], int msglvl, FILE *msgFile, int firsttag, MPI_Comm comm ) { int count, destination, ii, ilist, incount, jlist, jproc, left, maxcount, myid, nlist, nmylists, notherlists, nowners, nproc, offset, outcount, right, size, source, tag ; int *counts, *inbuffer, *list, *outbuffer, *owners ; MPI_Status status ; /* --------------- check the input --------------- */ if ( ivl == NULL || ownersIV == NULL ) { fprintf(stderr, "\n fatal error in IVL_MPI_allgather()" "\n ivl = %p, ownersIV = %p\n", ivl, ownersIV) ; spoolesFatal(); } /* ---------------------------------------------- get id of self, # of processes and # of fronts ---------------------------------------------- */ MPI_Comm_rank(comm, &myid) ; MPI_Comm_size(comm, &nproc) ; nlist = ivl->nlist ; IV_sizeAndEntries(ownersIV, &nowners, &owners) ; if ( msglvl > 1 ) { fprintf(msgFile, "\n\n inside IVL_MPI_allgather()" "\n nproc = %d, myid = %d, nlist = %d, nowners = %d", nproc, myid, nlist, nowners) ; fflush(msgFile) ; } if ( nlist != nowners || owners == NULL ) { fprintf(stderr, "\n fatal error in IVL_MPI_allgather()" "\n nlist = %d, nowners = %d, owners = %p\n", nlist, nowners, owners) ; spoolesFatal(); } if ( msglvl > 2 ) { fprintf(msgFile, "\n\n ivl") ; IVL_writeForHumanEye(ivl, msgFile) ; fprintf(msgFile, "\n\n ownersIV") ; IV_writeForHumanEye(ownersIV, msgFile) ; fflush(msgFile) ; } /* ----------------------------------------------- step 1 : determine the size of the message that this process will send to the others ----------------------------------------------- */ for ( ilist = 0, outcount = 1 ; ilist < nlist ; ilist++ ) { if ( owners[ilist] < 0 || owners[ilist] >= nproc ) { fprintf(stderr, "\n owners[%d] = %d", ilist, owners[ilist]) ; spoolesFatal(); } if ( owners[ilist] == myid ) { outcount += 2 ; IVL_listAndSize(ivl, ilist, &size, &list) ; outcount += size ; } } if ( msglvl > 2 ) { fprintf(msgFile, "\n\n outcount = %d", outcount) ; fflush(msgFile) ; } /* ---------------------------------------------------- do an all-to-all gather/scatter counts[jproc] = # of int's in the message from jproc ---------------------------------------------------- */ counts = IVinit(nproc, 0) ; counts[myid] = outcount ; MPI_Allgather((void *) &counts[myid], 1, MPI_INT, (void *) counts, 1, MPI_INT, comm) ; if ( msglvl > 1 ) { fprintf(msgFile, "\n\n counts") ; IVfprintf(msgFile, nproc, counts) ; fflush(msgFile) ; } /* ----------------------------- set up the in and out buffers ----------------------------- */ if ( outcount > 0 ) { outbuffer = IVinit(outcount, -1) ; for ( ilist = nmylists = 0, ii = 1 ; ilist < nlist ; ilist++ ) { if ( owners[ilist] == myid ) { nmylists++ ; IVL_listAndSize(ivl, ilist, &size, &list) ; outbuffer[ii++] = ilist ; outbuffer[ii++] = size ; if ( size > 0 ) { IVcopy(size, &outbuffer[ii], list) ; ii += size ; } } } outbuffer[0] = nmylists ; if ( ii != outcount ) { fprintf(stderr, "\n myid = %d, ii = %d, outcount = %d", myid, ii, outcount) ; fprintf(msgFile, "\n myid = %d, ii = %d, outcount = %d", myid, ii, outcount) ; spoolesFatal(); } } else { outbuffer = NULL ; } maxcount = IVmax(nproc, counts, &jproc) ; if ( maxcount > 0 ) { inbuffer = IVinit(maxcount, -1) ; } else { inbuffer = NULL ; } if ( msglvl > 2 ) { fprintf(msgFile, "\n outbuffer %p, maxcount %d, inbuffer %p", outbuffer, maxcount, inbuffer) ; fflush(msgFile) ; } /* ------------------------------------- step 2: loop over the other processes send and receive information ------------------------------------- */ for ( offset = 1, tag = firsttag ; offset < nproc ; offset++, tag++ ) { right = (myid + offset) % nproc ; if ( offset <= myid ) { left = myid - offset ; } else { left = nproc + myid - offset ; } if ( outcount > 0 ) { destination = right ; stats[0]++ ; stats[2] += outcount*sizeof(int) ; } else { destination = MPI_PROC_NULL ; } incount = counts[left] ; if ( incount > 0 ) { source = left ; stats[1]++ ; stats[3] += incount*sizeof(int) ; } else { source = MPI_PROC_NULL ; } if ( msglvl > 2 ) { fprintf(msgFile, "\n offset %d, source %d, destination %d", offset, source, destination) ; fflush(msgFile) ; } /* ----------------- do a send/receive ----------------- */ MPI_Sendrecv(outbuffer, outcount, MPI_INT, destination, tag, inbuffer, incount, MPI_INT, source, tag, comm, &status) ; if ( source != MPI_PROC_NULL ) { MPI_Get_count(&status, MPI_INT, &count) ; if ( count != incount ) { fprintf(stderr, "\n 1. fatal error in IVL_MPI_allgather()" "\n proc %d : source = %d, count = %d, incount = %d\n", myid, source, count, incount) ; spoolesFatal(); } } /* ---------------------------- set the values in the vector ---------------------------- */ notherlists = inbuffer[0] ; for ( ilist = 0, ii = 1 ; ilist < notherlists ; ilist++ ) { jlist = inbuffer[ii++] ; size = inbuffer[ii++] ; if ( size > 0 ) { IVL_setList(ivl, jlist, size, &inbuffer[ii]) ; ii += size ; } } if ( ii != incount ) { fprintf(msgFile, "\n ii = %d, incount = %d", ii, incount) ; fprintf(stderr, "\n ii = %d, incount = %d", ii, incount) ; spoolesFatal(); } if ( msglvl > 2 ) { fprintf(msgFile, "\n after setting values") ; IVL_writeForHumanEye(ivl, msgFile) ; fflush(msgFile) ; } } /* ------------------------ free the working storage ------------------------ */ IVfree(counts) ; if ( outbuffer != NULL ) { IVfree(outbuffer) ; } if ( inbuffer != NULL ) { IVfree(inbuffer) ; } if ( msglvl > 2 ) { fprintf(msgFile, "\n\n leaving IVL_MPI_gatherall()") ; fflush(msgFile) ; } return ; }
/* ------------------------------------------------------------------ purpose -- basic initializer nlist -- number of lists to be held by this object counts -- vector that contains number of items expected for each list. counts == NULL --> unknown number of items expected counts != NULL --> known number of items expected lockflag -- flag to specify lock status lockflag = 0 --> mutex lock is not allocated or initialized lockflag = 1 --> mutex lock is allocated and it can synchronize only threads in this process. lockflag = 2 --> mutex lock is allocated and it can synchronize only threads in this and other processes. flags -- vector to specify whether to lock individual lists flags == NULL --> none or all lists must be locked, use lockflag to determine flags[ilist] = 'N' --> no need to lock list ilist flags[ilist] = 'Y' --> must lock list ilist created -- 98may02, cca ------------------------------------------------------------------ */ void SubMtxList_init ( SubMtxList *list, int nlist, int counts[], int lockflag, char flags[] ) { int ilist ; /* --------------- check the input --------------- */ if ( list == NULL || nlist <= 0 || lockflag < 0 || lockflag > 2 ) { fprintf(stderr, "\n fatal error in SubMtxList_init(%p,%d,%p,%d,%p)" "\n bad input\n", list, nlist, counts, lockflag, flags) ; exit(-1) ; } /* -------------- clear all data -------------- */ SubMtxList_clearData(list) ; /* ------------------------------------------------------- set the number of lists and allocate the heads[] vector ------------------------------------------------------- */ list->nlist = nlist ; ALLOCATE(list->heads, struct _SubMtx *, nlist) ; for ( ilist = 0 ; ilist < nlist ; ilist++ ) { list->heads[ilist] = NULL ; } if ( counts != NULL ) { /* ------------------------------------- allocate and fill the counts[] vector ------------------------------------- */ list->counts = IVinit(nlist, 0) ; IVcopy(nlist, list->counts, counts) ; } if ( lockflag > 0 ) { /* ----------------- allocate the lock ----------------- */ list->lock = Lock_new() ; Lock_init(list->lock, lockflag) ; } if ( flags != NULL ) { /* ------------------------------------ allocate and fill the flags[] vector ------------------------------------ */ list->flags = CVinit(nlist, 'N') ; CVcopy(nlist, list->flags, flags) ; } return ; }
/* ------------------------------------------------- purpose -- to create and return a Chv object that holds the update matrix for front J created -- 98may25, cca ------------------------------------------------- */ Chv * FrontMtx_QR_storeUpdate ( FrontMtx *frontmtx, int J, A2 *frontJ, ChvManager *chvmanager, int msglvl, FILE *msgFile ) { A2 tempJ ; Chv *chvJ ; double *updent ; int nbytes, ncolJ, ncolupd, nD, nent, nrowJ, nrowupd ; int *colindJ, *updind ; /* ----------------------------------------------- compute the number of rows in the update matrix ----------------------------------------------- */ nD = FrontMtx_frontSize(frontmtx, J) ; FrontMtx_columnIndices(frontmtx, J, &ncolJ, &colindJ) ; nrowJ = A2_nrow(frontJ) ; nrowupd = ((nrowJ >= ncolJ) ? ncolJ : nrowJ) - nD ; ncolupd = ncolJ - nD ; if ( msglvl > 3 ) { fprintf(msgFile, "\n\n inside FrontMtx_QR_storeUpdate(%d)", J) ; fprintf(msgFile, "\n nD %d, nrowJ %d, nrowupd %d, ncolupd %d", nD, nrowJ, nrowupd, ncolupd) ; fflush(msgFile) ; } if ( nrowupd > 0 && ncolupd > 0 ) { if ( FRONTMTX_IS_REAL(frontmtx) ) { nbytes = Chv_nbytesNeeded(nrowupd, 0, ncolupd - nrowupd, SPOOLES_REAL, SPOOLES_SYMMETRIC) ; } else if ( FRONTMTX_IS_COMPLEX(frontmtx) ) { nbytes = Chv_nbytesNeeded(nrowupd, 0, ncolupd - nrowupd, SPOOLES_COMPLEX, SPOOLES_HERMITIAN) ; } chvJ = ChvManager_newObjectOfSizeNbytes(chvmanager, nbytes) ; if ( FRONTMTX_IS_REAL(frontmtx) ) { Chv_init(chvJ, J, nrowupd, 0, ncolupd - nrowupd, SPOOLES_REAL, SPOOLES_SYMMETRIC) ; } else if ( FRONTMTX_IS_COMPLEX(frontmtx) ) { Chv_init(chvJ, J, nrowupd, 0, ncolupd - nrowupd, SPOOLES_COMPLEX, SPOOLES_HERMITIAN) ; } Chv_columnIndices(chvJ, &ncolupd, &updind) ; IVcopy(ncolupd, updind, colindJ + nD) ; nent = Chv_nent(chvJ) ; updent = Chv_entries(chvJ) ; A2_setDefaultFields(&tempJ) ; A2_subA2(&tempJ, frontJ, nD, nrowJ - 1, nD, ncolJ - 1) ; A2_copyEntriesToVector(&tempJ, nent, updent, A2_UPPER, A2_BY_ROWS) ; if ( msglvl > 3 ) { fprintf(msgFile, "\n update matrix %d", J) ; Chv_writeForHumanEye(chvJ, msgFile) ; fflush(msgFile) ; } } else { chvJ = NULL ; } return(chvJ) ; }
/* ---------------------------------- set the maximum size of the vector created -- 96dec08, cca ---------------------------------- */ void IV_setMaxsize ( IV *iv, int newmaxsize ) { /* --------------- check the input --------------- */ if ( iv == NULL || newmaxsize < 0 ) { fprintf(stderr, "\n fatal error in IV_setMaxsize(%p,%d)" "\n bad input\n", iv, newmaxsize) ; exit(-1) ; } if ( iv->maxsize > 0 && iv->owned == 0 ) { fprintf(stderr, "\n fatal error in IV_setMaxsize(%p,%d)" "\n iv->maxsize = %d, iv->owned = %d\n", iv, newmaxsize, iv->maxsize, iv->owned) ; exit(-1) ; } if ( iv->maxsize != newmaxsize ) { /* ----------------------------------- allocate new storage for the vector ----------------------------------- */ int *vec = IVinit(newmaxsize, -1) ; if ( iv->size > 0 ) { /* --------------------------------- copy old entries into new entries --------------------------------- */ if ( iv->vec == NULL ) { fprintf(stderr, "\n fatal error in IV_setMaxsize(%p,%d)" "\n iv->size = %d, iv->vec is NULL\n", iv, newmaxsize, iv->size) ; exit(-1) ; } if ( iv->size <= newmaxsize ) { /* ----------------------------------------- new maximum size is greater than old size ----------------------------------------- */ IVcopy(iv->size, vec, iv->vec) ; } else { /* ----------------------- note, data is truncated ----------------------- */ IVcopy(newmaxsize, vec, iv->vec) ; iv->size = newmaxsize ; } } if ( iv->vec != NULL ) { /* ---------------- free old entries ---------------- */ IVfree(iv->vec) ; } /* ---------- set fields ---------- */ iv->maxsize = newmaxsize ; iv->owned = 1 ; iv->vec = vec ; } /* fprintf(stdout, "\n %% leaving IV_setMaxsize, iv %p, size %d, maxsize %d, entries %p", iv, iv->size, iv->maxsize, iv->vec) ; fflush(stdout) ; */ return ; }
/*--------------------------------------------------------------------*/ int main ( int argc, char *argv[] ) /* ------------------------------------- test the Chv_update{H,S,N}() methods. T := T - U^T * D * U T := T - U^H * D * U T := T - L * D * U created -- 98apr23, cca ------------------------------------- */ { Chv *chvT ; SubMtx *mtxD, *mtxL, *mtxU ; double imag, ops, real, t1, t2 ; Drand *drand ; DV *tempDV ; FILE *msgFile ; int irow, msglvl, ncolT, nDT, ncolU, nentT, nentU, nrowD, nrowL, nrowT, offset, seed, size, sparsityflag, symflag, type ; int *colindT, *colindU, *ivec, *rowindL, *rowindT ; if ( argc != 13 ) { fprintf(stdout, "\n\n usage : %s msglvl msgFile type symflag sparsityflag" "\n ncolT ncolU nrowD nentU offset seed" "\n msglvl -- message level" "\n msgFile -- message file" "\n type -- entries type" "\n 1 -- real" "\n 2 -- complex" "\n symflag -- type of matrix U" "\n 0 -- symmetric" "\n 1 -- hermitian" "\n 2 -- nonsymmetric" "\n sparsityflag -- dense or sparse" "\n 0 -- dense" "\n 1 -- sparse" "\n ncolT -- # of rows and columns in matrix T" "\n nDT -- # of internal rows and columns in matrix T" "\n ncolU -- # of rows and columns in matrix U" "\n nrowD -- # of rows and columns in matrix D" "\n nentU -- # of entries in matrix U" "\n offset -- distance between D_I and T" "\n seed -- random number seed" "\n", argv[0]) ; return(0) ; } if ( (msglvl = atoi(argv[1])) < 0 ) { fprintf(stderr, "\n message level must be positive\n") ; spoolesFatal(); } if ( strcmp(argv[2], "stdout") == 0 ) { msgFile = stdout ; } else if ( (msgFile = fopen(argv[2], "a")) == NULL ) { fprintf(stderr, "\n unable to open file %s\n", argv[2]) ; return(-1) ; } type = atoi(argv[3]) ; symflag = atoi(argv[4]) ; sparsityflag = atoi(argv[5]) ; ncolT = atoi(argv[6]) ; nDT = atoi(argv[7]) ; ncolU = atoi(argv[8]) ; nrowD = atoi(argv[9]) ; nentU = atoi(argv[10]) ; offset = atoi(argv[11]) ; seed = atoi(argv[12]) ; fprintf(msgFile, "\n %% %s:" "\n %% msglvl = %d" "\n %% msgFile = %s" "\n %% type = %d" "\n %% symflag = %d" "\n %% sparsityflag = %d" "\n %% ncolT = %d" "\n %% nDT = %d" "\n %% ncolU = %d" "\n %% nrowD = %d" "\n %% nentU = %d" "\n %% offset = %d" "\n %% seed = %d", argv[0], msglvl, argv[2], type, symflag, sparsityflag, ncolT, nDT, ncolU, nrowD, nentU, offset, seed) ; /* ----------------------------- check for errors in the input ----------------------------- */ if ( (type != SPOOLES_REAL && type != SPOOLES_COMPLEX) || (symflag != SPOOLES_SYMMETRIC && symflag != SPOOLES_HERMITIAN && symflag != SPOOLES_NONSYMMETRIC) || (sparsityflag < 0 || sparsityflag > 1) || ncolT <= 0 || ncolU > (ncolT + offset) || nrowD <= 0 ) { fprintf(stderr, "\n invalid input\n") ; spoolesFatal(); } /* -------------------------------------- initialize the random number generator -------------------------------------- */ drand = Drand_new() ; Drand_init(drand) ; Drand_setSeed(drand, ++seed) ; Drand_setNormal(drand, 0.0, 1.0) ; /* ----------------------- get a vector of indices ----------------------- */ size = nrowD + offset + ncolT ; ivec = IVinit(size, -1) ; IVramp(size, ivec, 0, 1) ; /* ---------------------------- initialize the T Chv object ---------------------------- */ fprintf(msgFile, "\n\n %% symflag = %d", symflag) ; MARKTIME(t1) ; chvT = Chv_new() ; Chv_init(chvT, 0, nDT, ncolT - nDT, ncolT - nDT, type, symflag) ; nentT = Chv_nent(chvT) ; if ( CHV_IS_REAL(chvT) ) { Drand_fillDvector(drand, nentT, Chv_entries(chvT)) ; } else if ( CHV_IS_COMPLEX(chvT) ) { Drand_fillDvector(drand, 2*nentT, Chv_entries(chvT)) ; } Chv_columnIndices(chvT, &ncolT, &colindT) ; IVcopy(ncolT, colindT, ivec + nrowD + offset) ; if ( CHV_IS_NONSYMMETRIC(chvT) ) { Chv_rowIndices(chvT, &nrowT, &rowindT) ; IVcopy(nrowT, rowindT, colindT) ; } IVfree(ivec) ; if ( CHV_IS_HERMITIAN(chvT) ) { fprintf(msgFile, "\n\n %% hermitian\n") ; /* --------------------------------------------------------- hermitian example, set imaginary part of diagonal to zero --------------------------------------------------------- */ for ( irow = 0 ; irow < nDT ; irow++ ) { Chv_complexEntry(chvT, irow, irow, &real, &imag) ; Chv_setComplexEntry(chvT, irow, irow, real, 0.0) ; } } MARKTIME(t2) ; fprintf(msgFile, "\n %% CPU : %.3f to initialize chvT Chv object", t2 - t1) ; fprintf(msgFile, "\n T = zeros(%d,%d); ", size, size) ; Chv_writeForMatlab(chvT, "T", msgFile) ; /* --------------------------- initialize the D Mtx object --------------------------- */ MARKTIME(t1) ; mtxD = SubMtx_new() ; if ( CHV_IS_REAL(chvT) ) { if ( CHV_IS_SYMMETRIC(chvT) ) { SubMtx_initRandom(mtxD, SPOOLES_REAL, SUBMTX_BLOCK_DIAGONAL_SYM, 0, 0, nrowD, nrowD, nrowD*nrowD, ++seed) ; } else { SubMtx_initRandom(mtxD, SPOOLES_REAL, SUBMTX_DIAGONAL, 0, 0, nrowD, nrowD, nrowD*nrowD, ++seed) ; } } else if ( CHV_IS_COMPLEX(chvT) ) { if ( CHV_IS_HERMITIAN(chvT) ) { SubMtx_initRandom(mtxD,SPOOLES_COMPLEX,SUBMTX_BLOCK_DIAGONAL_HERM, 0, 0, nrowD, nrowD, nrowD*nrowD, ++seed) ; } else if ( CHV_IS_SYMMETRIC(chvT) ) { SubMtx_initRandom(mtxD,SPOOLES_COMPLEX, SUBMTX_BLOCK_DIAGONAL_SYM, 0, 0, nrowD, nrowD, nrowD*nrowD, ++seed) ; } else { SubMtx_initRandom(mtxD, SPOOLES_COMPLEX, SUBMTX_DIAGONAL, 0, 0, nrowD, nrowD, nrowD*nrowD, ++seed) ; } } MARKTIME(t2) ; fprintf(msgFile, "\n %% CPU : %.3f to initialize D SubMtx object", t2 - t1) ; fprintf(msgFile, "\n D = zeros(%d,%d) ;", nrowD, nrowD) ; SubMtx_writeForMatlab(mtxD, "D", msgFile) ; /* ---------------------------- initialize the U SubMtx object ---------------------------- */ MARKTIME(t1) ; mtxU = SubMtx_new() ; if ( CHV_IS_REAL(chvT) ) { if ( sparsityflag == 0 ) { SubMtx_initRandom(mtxU, SPOOLES_REAL, SUBMTX_DENSE_COLUMNS, 0, 0, nrowD, ncolU, nentU, ++seed) ; } else { SubMtx_initRandom(mtxU, SPOOLES_REAL, SUBMTX_SPARSE_COLUMNS, 0, 0, nrowD, ncolU, nentU, ++seed) ; } } else if ( CHV_IS_COMPLEX(chvT) ) { if ( sparsityflag == 0 ) { SubMtx_initRandom(mtxU, SPOOLES_COMPLEX, SUBMTX_DENSE_COLUMNS, 0, 0, nrowD, ncolU, nentU, ++seed) ; } else { SubMtx_initRandom(mtxU, SPOOLES_COMPLEX, SUBMTX_SPARSE_COLUMNS, 0, 0, nrowD, ncolU, nentU, ++seed) ; } } ivec = IVinit(offset + ncolT, -1) ; IVramp(offset + ncolT, ivec, nrowD, 1) ; IVshuffle(offset + ncolT, ivec, ++seed) ; SubMtx_columnIndices(mtxU, &ncolU, &colindU) ; IVcopy(ncolU, colindU, ivec) ; IVqsortUp(ncolU, colindU) ; IVfree(ivec) ; MARKTIME(t2) ; fprintf(msgFile, "\n %% CPU : %.3f to initialize U SubMtx object", t2 - t1) ; fprintf(msgFile, "\n U = zeros(%d,%d) ;", nrowD, size) ; SubMtx_writeForMatlab(mtxU, "U", msgFile) ; if ( CHV_IS_NONSYMMETRIC(chvT) ) { /* ---------------------------- initialize the L SubMtx object ---------------------------- */ MARKTIME(t1) ; mtxL = SubMtx_new() ; if ( CHV_IS_REAL(chvT) ) { if ( sparsityflag == 0 ) { SubMtx_initRandom(mtxL, SPOOLES_REAL, SUBMTX_DENSE_ROWS, 0, 0, ncolU, nrowD, nentU, ++seed) ; } else { SubMtx_initRandom(mtxL, SPOOLES_REAL, SUBMTX_SPARSE_ROWS, 0, 0, ncolU, nrowD, nentU, ++seed) ; } } else if ( CHV_IS_COMPLEX(chvT) ) { if ( sparsityflag == 0 ) { SubMtx_initRandom(mtxL, SPOOLES_COMPLEX, SUBMTX_DENSE_ROWS, 0, 0, ncolU, nrowD, nentU, ++seed) ; } else { SubMtx_initRandom(mtxL, SPOOLES_COMPLEX, SUBMTX_SPARSE_ROWS, 0, 0, ncolU, nrowD, nentU, ++seed) ; } } SubMtx_rowIndices(mtxL, &nrowL, &rowindL) ; IVcopy(nrowL, rowindL, colindU) ; MARKTIME(t2) ; fprintf(msgFile, "\n %% CPU : %.3f to initialize L SubMtx object", t2 - t1) ; fprintf(msgFile, "\n L = zeros(%d,%d) ;", size, nrowD) ; SubMtx_writeForMatlab(mtxL, "L", msgFile) ; } else { mtxL = NULL ; } /* -------------------------------- compute the matrix-matrix update -------------------------------- */ tempDV = DV_new() ; ops = 8*nrowD*nrowD*ncolU ; if ( CHV_IS_SYMMETRIC(chvT) ) { Chv_updateS(chvT, mtxD, mtxU, tempDV) ; } else if ( CHV_IS_HERMITIAN(chvT) ) { Chv_updateH(chvT, mtxD, mtxU, tempDV) ; } else if ( CHV_IS_NONSYMMETRIC(chvT) ) { Chv_updateN(chvT, mtxL, mtxD, mtxU, tempDV) ; } MARKTIME(t2) ; fprintf(msgFile, "\n %% CPU : %.3f to compute m-m, %.3f mflops", t2 - t1, ops*1.e-6/(t2 - t1)) ; if ( msglvl > 1 ) { fprintf(msgFile, "\n\n %% Z Chv object") ; fprintf(msgFile, "\n Z = zeros(%d,%d); ", size, size) ; Chv_writeForMatlab(chvT, "Z", msgFile) ; fflush(msgFile) ; } /* ----------------- check with matlab ----------------- */ if ( msglvl > 1 ) { if ( CHV_IS_HERMITIAN(chvT) ) { fprintf(msgFile, "\n\n B = ctranspose(U) * D * U ;") ; } else if ( CHV_IS_SYMMETRIC(chvT) ) { fprintf(msgFile, "\n\n B = transpose(U) * D * U ;") ; } else { fprintf(msgFile, "\n\n B = L * D * U ;") ; } fprintf(msgFile, "\n\n for irow = 1:%d" "\n for jcol = 1:%d" "\n if T(irow,jcol) ~= 0.0" "\n T(irow,jcol) = T(irow,jcol) - B(irow,jcol) ;" "\n end" "\n end" "\n end" "\n emtx = abs(Z - T) ;", size, size) ; fprintf(msgFile, "\n\n maxabs = max(max(emtx)) ") ; fflush(msgFile) ; } /* ------------------------ free the working storage ------------------------ */ if ( mtxL != NULL ) { SubMtx_free(mtxL) ; } Chv_free(chvT) ; SubMtx_free(mtxD) ; SubMtx_free(mtxU) ; DV_free(tempDV) ; Drand_free(drand) ; fprintf(msgFile, "\n") ; return(1) ; }
/* ---------------------------------------------------- store the factor entries of the reduced front matrix created -- 98may25, cca ---------------------------------------------------- */ void FrontMtx_QR_storeFront ( FrontMtx *frontmtx, int J, A2 *frontJ, int msglvl, FILE *msgFile ) { A2 tempA2 ; double fac, ifac, imag, real, rfac ; double *entDJJ, *entUJJ, *entUJN, *row ; int inc1, inc2, irow, jcol, ncol, ncolJ, nD, nentD, nentUJJ, nfront, nrow, nU ; int *colind, *colindJ, *firstlocs, *sizes ; SubMtx *mtx ; /* --------------- check the input --------------- */ if ( frontmtx == NULL || frontJ == NULL || (msglvl > 0 && msgFile == NULL) ) { fprintf(stderr, "\n fatal error in FrontMtx_QR_storeFront()" "\n bad input\n") ; exit(-1) ; } nfront = FrontMtx_nfront(frontmtx) ; FrontMtx_columnIndices(frontmtx, J, &ncolJ, &colindJ) ; nrow = A2_nrow(frontJ) ; ncol = A2_ncol(frontJ) ; A2_setDefaultFields(&tempA2) ; nD = FrontMtx_frontSize(frontmtx, J) ; nU = ncol - nD ; /* -------------------------------------- scale the rows and square the diagonal -------------------------------------- */ row = A2_entries(frontJ) ; if ( A2_IS_REAL(frontJ) ) { for ( irow = 0 ; irow < nD ; irow++ ) { if ( row[irow] != 0.0 ) { fac = 1./row[irow] ; for ( jcol = irow + 1 ; jcol < ncol ; jcol++ ) { row[jcol] *= fac ; } row[irow] = row[irow] * row[irow] ; } row += ncol ; } } else if ( A2_IS_COMPLEX(frontJ) ) { for ( irow = 0 ; irow < nD ; irow++ ) { real = row[2*irow] ; imag = row[2*irow+1] ; if ( real != 0.0 || imag != 0.0 ) { Zrecip(real, imag, &rfac, &ifac) ; ZVscale(ncol - irow - 1, & row[2*irow+2], rfac, ifac) ; row[2*irow] = real*real + imag*imag ; row[2*irow+1] = 0.0 ; } row += 2*ncol ; } } if ( msglvl > 3 ) { fprintf(msgFile, "\n after scaling rows of A") ; A2_writeForHumanEye(frontJ, msgFile) ; fflush(msgFile) ; } /* ------------------------- copy the diagonal entries ------------------------- */ mtx = FrontMtx_diagMtx(frontmtx, J) ; SubMtx_diagonalInfo(mtx, &nentD, &entDJJ) ; A2_subA2(&tempA2, frontJ, 0, nD-1, 0, nD-1) ; A2_copyEntriesToVector(&tempA2, nentD, entDJJ, A2_DIAGONAL, A2_BY_ROWS) ; SubMtx_columnIndices(mtx, &ncol, &colind) ; IVcopy(nD, colind, colindJ) ; if ( msglvl > 3 ) { fprintf(msgFile, "\n diagonal factor matrix") ; SubMtx_writeForHumanEye(mtx, msgFile) ; fflush(msgFile) ; } if ( (mtx = FrontMtx_upperMtx(frontmtx, J, J)) != NULL ) { /* ------------------------ copy the U_{J,J} entries ------------------------ */ SubMtx_denseSubcolumnsInfo(mtx, &nD, &nentUJJ, &firstlocs, &sizes, &entUJJ) ; A2_copyEntriesToVector(&tempA2, nentUJJ, entUJJ, A2_STRICT_UPPER, A2_BY_COLUMNS) ; SubMtx_columnIndices(mtx, &ncol, &colind) ; IVcopy(nD, colind, colindJ) ; if ( msglvl > 3 ) { fprintf(msgFile, "\n UJJ factor matrix") ; SubMtx_writeForHumanEye(mtx, msgFile) ; fflush(msgFile) ; } } if ( ncolJ > nD ) { /* ----------------------------- copy the U_{J,bnd{J}} entries ----------------------------- */ mtx = FrontMtx_upperMtx(frontmtx, J, nfront) ; SubMtx_denseInfo(mtx, &nD, &nU, &inc1, &inc2, &entUJN) ; A2_subA2(&tempA2, frontJ, 0, nD-1, nD, ncolJ-1) ; A2_copyEntriesToVector(&tempA2, nD*nU, entUJN, A2_ALL_ENTRIES, A2_BY_COLUMNS) ; SubMtx_columnIndices(mtx, &ncol, &colind) ; IVcopy(nU, colind, colindJ + nD) ; if ( msglvl > 3 ) { fprintf(msgFile, "\n UJN factor matrix") ; SubMtx_writeForHumanEye(mtx, msgFile) ; fflush(msgFile) ; } } return ; }
/*--------------------------------------------------------------------*/ int main ( int argc, char *argv[] ) /* --------------------------------------- test the Chv_addChevron() method. created -- 98apr18, cca --------------------------------------- */ { Chv *chv ; double alpha[2] ; double imag, real, t1, t2 ; double *chvent, *entries ; Drand *drand ; FILE *msgFile ; int chvsize, count, ichv, ierr, ii, iloc, irow, jcol, lastcol, msglvl, ncol, nD, nent, nL, nrow, nU, off, seed, symflag, type, upper ; int *chvind, *colind, *keys, *rowind, *temp ; if ( argc != 10 ) { fprintf(stdout, "\n\n usage : %s msglvl msgFile nD nU type symflag seed " "\n alphareal alphaimag" "\n msglvl -- message level" "\n msgFile -- message file" "\n nD -- # of rows and columns in the (1,1) block" "\n nU -- # of columns in the (1,2) block" "\n type -- entries type" "\n 1 --> real" "\n 2 --> complex" "\n symflag -- symmetry flag" "\n 0 --> symmetric" "\n 1 --> hermitian" "\n 2 --> nonsymmetric" "\n seed -- random number seed" "\n alpha -- scaling parameter" "\n", argv[0]) ; return(0) ; } if ( (msglvl = atoi(argv[1])) < 0 ) { fprintf(stderr, "\n message level must be positive\n") ; exit(-1) ; } if ( strcmp(argv[2], "stdout") == 0 ) { msgFile = stdout ; } else if ( (msgFile = fopen(argv[2], "a")) == NULL ) { fprintf(stderr, "\n unable to open file %s\n", argv[2]) ; return(-1) ; } nD = atoi(argv[3]) ; nU = atoi(argv[4]) ; type = atoi(argv[5]) ; symflag = atoi(argv[6]) ; seed = atoi(argv[7]) ; alpha[0] = atof(argv[8]) ; alpha[1] = atof(argv[9]) ; if ( nD <= 0 || nU < 0 || symflag < 0 || symflag > 2 ) { fprintf(stderr, "\n invalid input" "\n nD = %d, nU = %d, symflag = %d\n", nD, nU, symflag) ; exit(-1) ; } fprintf(msgFile, "\n alpha = %12.4e + %12.4e*i ;", alpha[0], alpha[1]) ; nL = nU ; /* -------------------------------------- initialize the random number generator -------------------------------------- */ drand = Drand_new() ; Drand_init(drand) ; Drand_setSeed(drand, seed) ; Drand_setUniform(drand, -1.0, 1.0) ; /* ---------------------------- initialize the Chv object ---------------------------- */ MARKTIME(t1) ; chv = Chv_new() ; Chv_init(chv, 0, nD, nL, nU, type, symflag) ; MARKTIME(t2) ; fprintf(msgFile, "\n %% CPU : %.3f to initialize chv object", t2 - t1) ; fflush(msgFile) ; Chv_columnIndices(chv, &ncol, &colind) ; temp = IVinit(2*(nD+nU), -1) ; IVramp(2*(nD+nU), temp, 0, 1) ; IVshuffle(2*(nD+nU), temp, ++seed) ; IVcopy(ncol, colind, temp) ; IVqsortUp(ncol, colind) ; if ( CHV_IS_NONSYMMETRIC(chv) ) { Chv_rowIndices(chv, &nrow, &rowind) ; IVcopy(nrow, rowind, colind) ; } if ( msglvl > 2 ) { fprintf(msgFile, "\n %% column indices") ; IVfprintf(msgFile, ncol, colind) ; } lastcol = colind[ncol-1] ; nent = Chv_nent(chv) ; entries = Chv_entries(chv) ; if ( CHV_IS_REAL(chv) ) { Drand_fillDvector(drand, nent, entries) ; } else if ( CHV_IS_COMPLEX(chv) ) { Drand_fillDvector(drand, 2*nent, entries) ; } if ( CHV_IS_HERMITIAN(chv) ) { /* --------------------------------------------------------- hermitian example, set imaginary part of diagonal to zero --------------------------------------------------------- */ for ( irow = 0 ; irow < nD ; irow++ ) { Chv_complexEntry(chv, irow, irow, &real, &imag) ; Chv_setComplexEntry(chv, irow, irow, real, 0.0) ; } } if ( msglvl > 1 ) { fprintf(msgFile, "\n a = zeros(%d,%d) ;", lastcol+1, lastcol+1) ; Chv_writeForMatlab(chv, "a", msgFile) ; } /* -------------------------------------------------- fill a chevron with random numbers and indices that are a subset of a front's, as in the assembly of original matrix entries. -------------------------------------------------- */ Drand_setUniform(drand, 0, nD) ; iloc = (int) Drand_value(drand) ; ichv = colind[iloc] ; if ( CHV_IS_SYMMETRIC(chv) || CHV_IS_HERMITIAN(chv) ) { upper = nD - iloc + nU ; } else { upper = 2*(nD - iloc) - 1 + nL + nU ; } Drand_setUniform(drand, 1, upper) ; chvsize = (int) Drand_value(drand) ; fprintf(msgFile, "\n %% iloc = %d, ichv = %d, chvsize = %d", iloc, ichv, chvsize) ; chvind = IVinit(chvsize, -1) ; chvent = DVinit(2*chvsize, 0.0) ; Drand_setNormal(drand, 0.0, 1.0) ; if ( CHV_IS_REAL(chv) ) { Drand_fillDvector(drand, chvsize, chvent) ; } else if ( CHV_IS_COMPLEX(chv) ) { Drand_fillDvector(drand, 2*chvsize, chvent) ; } keys = IVinit(upper+1, -1) ; keys[0] = 0 ; if ( CHV_IS_SYMMETRIC(chv) || CHV_IS_HERMITIAN(chv) ) { for ( ii = iloc + 1, count = 1 ; ii < nD + nU ; ii++ ) { keys[count++] = colind[ii] - ichv ; } } else { for ( ii = iloc + 1, count = 1 ; ii < nD + nU ; ii++ ) { keys[count++] = colind[ii] - ichv ; keys[count++] = - colind[ii] + ichv ; } } if ( msglvl > 3 ) { fprintf(msgFile, "\n %% iloc = %d, ichv = %d", iloc, ichv) ; fprintf(msgFile, "\n %% upper = %d", upper) ; fprintf(msgFile, "\n %% chvsize = %d", chvsize) ; fprintf(msgFile, "\n %% initial keys") ; IVfprintf(msgFile, count, keys) ; } IVshuffle(count, keys, ++seed) ; if ( msglvl > 3 ) { fprintf(msgFile, "\n %% shuffled keys") ; IVfp80(msgFile, count, keys, 80, &ierr) ; } IVcopy(chvsize, chvind, keys) ; if ( CHV_IS_REAL(chv) ) { IVDVqsortUp(chvsize, chvind, chvent) ; } else if ( CHV_IS_COMPLEX(chv) ) { IVZVqsortUp(chvsize, chvind, chvent) ; } if ( msglvl > 3 ) { fprintf(msgFile, "\n %% chvind") ; IVfprintf(msgFile, chvsize, chvind) ; } if ( CHV_IS_HERMITIAN(chv) ) { for ( ii = 0 ; ii < chvsize ; ii++ ) { if ( chvind[ii] == 0 ) { chvent[2*ii+1] = 0.0 ; } } } if ( msglvl > 1 ) { fprintf(msgFile, "\n b = zeros(%d,%d) ;", lastcol+1, lastcol+1) ; if ( CHV_IS_REAL(chv) ) { if ( CHV_IS_SYMMETRIC(chv) ) { for ( ii = 0 ; ii < chvsize ; ii++ ) { off = chvind[ii] ; fprintf(msgFile, "\n b(%d,%d) = %20.12e ;", colind[iloc]+1, colind[iloc]+off+1, chvent[ii]) ; fprintf(msgFile, "\n b(%d,%d) = %20.12e ;", colind[iloc]+off+1, colind[iloc]+1, chvent[ii]) ; } } else { for ( ii = 0 ; ii < chvsize ; ii++ ) { off = chvind[ii] ; if ( off > 0 ) { fprintf(msgFile, "\n b(%d,%d) = %20.12e ;", colind[iloc]+1, colind[iloc]+off+1, chvent[ii]) ; } else { fprintf(msgFile, "\n b(%d,%d) = %20.12e ;", colind[iloc]-off+1, colind[iloc]+1, chvent[ii]) ; } } } } else if ( CHV_IS_COMPLEX(chv) ) { if ( CHV_IS_SYMMETRIC(chv) || CHV_IS_HERMITIAN(chv) ) { for ( ii = 0 ; ii < chvsize ; ii++ ) { off = chvind[ii] ; fprintf(msgFile, "\n b(%d,%d) = %20.12e + %20.12e*i;", colind[iloc]+1, colind[iloc]+off+1, chvent[2*ii], chvent[2*ii+1]) ; if ( CHV_IS_HERMITIAN(chv) ) { fprintf(msgFile, "\n b(%d,%d) = %20.12e + %20.12e*i;", colind[iloc]+off+1, colind[iloc]+1, chvent[2*ii], -chvent[2*ii+1]) ; } else { fprintf(msgFile, "\n b(%d,%d) = %20.12e + %20.12e*i;", colind[iloc]+off+1, colind[iloc]+1, chvent[2*ii], chvent[2*ii+1]) ; } } } else { for ( ii = 0 ; ii < chvsize ; ii++ ) { off = chvind[ii] ; if ( off > 0 ) { fprintf(msgFile, "\n b(%d,%d) = %20.12e + %20.12e*i;", colind[iloc]+1, colind[iloc]+off+1, chvent[2*ii], chvent[2*ii+1]) ; } else { fprintf(msgFile, "\n b(%d,%d) = %20.12e + %20.12e*i;", colind[iloc]-off+1, colind[iloc]+1, chvent[2*ii], chvent[2*ii+1]) ; } } } } } /* ------------------------------------ add the chevron into the Chv object ------------------------------------ */ Chv_addChevron(chv, alpha, ichv, chvsize, chvind, chvent) ; if ( msglvl > 1 ) { fprintf(msgFile, "\n %% after adding the chevron") ; fprintf(msgFile, "\n c = zeros(%d,%d) ;", lastcol+1, lastcol+1) ; Chv_writeForMatlab(chv, "c", msgFile) ; } /* ----------------- compute the error ----------------- */ fprintf(msgFile, "\n max(max(abs(c - (a + alpha*b))))") ; /* ------------------------ free the working storage ------------------------ */ Chv_free(chv) ; Drand_free(drand) ; IVfree(temp) ; IVfree(chvind) ; DVfree(chvent) ; IVfree(keys) ; fprintf(msgFile, "\n") ; return(1) ; }
/* -------------------------------------------------------------- purpose -- to permute (if necessary) the original matrix, and to initialize, factor and postprocess the factor matrix return value --- 1 -- normal return, factorization complete 0 -- factorization did not complete, see error flag -1 -- bridge is NULL -2 -- mtxA is NULL -3 -- perror is NULL created -- 98sep18, cca -------------------------------------------------------------- */ int BridgeMT_factor ( BridgeMT *bridge, InpMtx *mtxA, int permuteflag, int *perror ) { Chv *rootchv ; ChvManager *chvmanager ; double cputotal, nfops, t0, t1, t2 ; double cpus[11] ; int msglvl, nzf ; int stats[16] ; FILE *msgFile ; FrontMtx *frontmtx ; SubMtxManager *mtxmanager ; /*--------------------------------------------------------------------*/ MARKTIME(t0) ; /* --------------- check the input --------------- */ if ( bridge == NULL ) { fprintf(stderr, "\n error in BridgeMT_factor()" "\n bridge is NULL\n") ; return(-1) ; } if ( mtxA == NULL ) { fprintf(stderr, "\n error in BridgeMT_factor()" "\n mtxA is NULL\n") ; return(-2) ; } if ( perror == NULL ) { fprintf(stderr, "\n error in BridgeMT_factor()" "\n perror is NULL\n") ; return(-3) ; } msglvl = bridge->msglvl ; msgFile = bridge->msgFile ; /*--------------------------------------------------------------------*/ MARKTIME(t1) ; if ( permuteflag == 1 ) { int *oldToNew = IV_entries(bridge->oldToNewIV) ; /* ------------------------------------------------ permute the input matrix and convert to chevrons ------------------------------------------------ */ InpMtx_permute(mtxA, oldToNew, oldToNew) ; if ( bridge->symmetryflag == SPOOLES_SYMMETRIC || bridge->symmetryflag == SPOOLES_HERMITIAN ) { InpMtx_mapToUpperTriangle(mtxA) ; } } if ( ! INPMTX_IS_BY_CHEVRONS(mtxA) ) { InpMtx_changeCoordType(mtxA, INPMTX_BY_CHEVRONS) ; } if ( ! INPMTX_IS_BY_VECTORS(mtxA) ) { InpMtx_changeStorageMode(mtxA, INPMTX_BY_VECTORS) ; } MARKTIME(t2) ; bridge->cpus[6] += t2 - t1 ; if ( msglvl > 1 ) { fprintf(msgFile, "\n CPU %8.3f : permute and format A", t2 - t1) ; fflush(msgFile) ; } /* --------------------------- initialize the front matrix --------------------------- */ MARKTIME(t1) ; if ( (mtxmanager = bridge->mtxmanager) == NULL ) { mtxmanager = bridge->mtxmanager = SubMtxManager_new() ; SubMtxManager_init(mtxmanager, LOCK_IN_PROCESS, 0) ; } if ( (frontmtx = bridge->frontmtx) == NULL ) { frontmtx = bridge->frontmtx = FrontMtx_new() ; } else { FrontMtx_clearData(frontmtx) ; } FrontMtx_init(frontmtx, bridge->frontETree, bridge->symbfacIVL, bridge->type, bridge->symmetryflag, bridge->sparsityflag, bridge->pivotingflag, LOCK_IN_PROCESS, 0, NULL, mtxmanager, msglvl, msgFile) ; frontmtx->patchinfo = bridge->patchinfo ; MARKTIME(t2) ; bridge->cpus[7] += t2 - t1 ; if ( msglvl > 1 ) { fprintf(msgFile, "\n CPU %8.3f : initialize front matrix", t2 - t1) ; fflush(msgFile) ; } /* ----------------- factor the matrix ----------------- */ nzf = ETree_nFactorEntries(bridge->frontETree, bridge->symmetryflag) ; nfops = ETree_nFactorOps(bridge->frontETree, bridge->type, bridge->symmetryflag) ; if ( msglvl > 1 ) { fprintf(msgFile, "\n %d factor entries, %.0f factor ops, %8.3f ratio", nzf, nfops, nfops/nzf) ; fflush(msgFile) ; } IVzero(16, stats) ; DVzero(11, cpus) ; chvmanager = ChvManager_new() ; ChvManager_init(chvmanager, LOCK_IN_PROCESS, 1) ; MARKTIME(t1) ; rootchv = FrontMtx_MT_factorInpMtx(frontmtx, mtxA, bridge->tau, bridge->droptol, chvmanager, bridge->ownersIV, bridge->lookahead, perror, cpus, stats, msglvl, msgFile) ; MARKTIME(t2) ; IVcopy(6, bridge->stats, stats) ; bridge->cpus[8] += t2 - t1 ; if ( msglvl > 1 ) { fprintf(msgFile, "\n\n CPU %8.3f : factor matrix, %8.3f mflops", t2 - t1, 1.e-6*nfops/(t2-t1)) ; fprintf(msgFile, "\n %8d pivots, %8d pivot tests, %8d delayed vertices" "\n %d entries in D, %d entries in L, %d entries in U", stats[0], stats[1], stats[2], stats[3], stats[4], stats[5]) ; cputotal = cpus[8] ; if ( cputotal > 0.0 ) { fprintf(msgFile, "\n initialize fronts %8.3f %6.2f" "\n load original entries %8.3f %6.2f" "\n update fronts %8.3f %6.2f" "\n assemble postponed data %8.3f %6.2f" "\n factor fronts %8.3f %6.2f" "\n extract postponed data %8.3f %6.2f" "\n store factor entries %8.3f %6.2f" "\n miscellaneous %8.3f %6.2f" "\n total time %8.3f", cpus[0], 100.*cpus[0]/cputotal, cpus[1], 100.*cpus[1]/cputotal, cpus[2], 100.*cpus[2]/cputotal, cpus[3], 100.*cpus[3]/cputotal, cpus[4], 100.*cpus[4]/cputotal, cpus[5], 100.*cpus[5]/cputotal, cpus[6], 100.*cpus[6]/cputotal, cpus[7], 100.*cpus[7]/cputotal, cputotal) ; } } if ( msglvl > 2 ) { fprintf(msgFile, "\n\n submatrix mananger after factorization") ; SubMtxManager_writeForHumanEye(mtxmanager, msgFile) ; fprintf(msgFile, "\n\n chevron mananger after factorization") ; ChvManager_writeForHumanEye(chvmanager, msgFile) ; fflush(msgFile) ; } if ( msglvl > 3 ) { fprintf(msgFile, "\n\n front factor matrix") ; FrontMtx_writeForHumanEye(frontmtx, msgFile) ; fflush(msgFile) ; } ChvManager_free(chvmanager) ; if ( *perror >= 0 ) { return(0) ; } /* ----------------------------- post-process the front matrix ----------------------------- */ MARKTIME(t1) ; FrontMtx_postProcess(frontmtx, msglvl, msgFile) ; MARKTIME(t2) ; bridge->cpus[9] += t2 - t1 ; if ( msglvl > 1 ) { fprintf(msgFile, "\n\n CPU %8.3f : post-process the matrix", t2 - t1) ; fflush(msgFile) ; } if ( msglvl > 2 ) { fprintf(msgFile, "\n\n submatrix mananger after post-processing") ; SubMtxManager_writeForHumanEye(frontmtx->manager, msgFile) ; fflush(msgFile) ; } if ( msglvl > 3 ) { fprintf(msgFile, "\n\n front factor matrix after post-processing") ; FrontMtx_writeForHumanEye(frontmtx, msgFile) ; fflush(msgFile) ; } /*--------------------------------------------------------------------*/ MARKTIME(t2) ; bridge->cpus[10] += t2 - t0 ; if ( msglvl > 1 ) { fprintf(msgFile, "\n\n CPU %8.3f : total factor time", t2 - t0) ; fflush(msgFile) ; } return(1) ; }
/* -------------------------------------------- purpose -- to solve the linear system MPI version if permuteflag is 1 then rhs is permuted into new ordering solution is permuted into old ordering return value --- 1 -- normal return -1 -- bridge is NULL -2 -- X is NULL -3 -- Y is NULL -4 -- frontmtx is NULL -5 -- mtxmanager is NULL -6 -- oldToNewIV not available -7 -- newToOldIV not available created -- 98sep18, cca -------------------------------------------- */ int BridgeMPI_solve ( BridgeMPI *bridge, int permuteflag, DenseMtx *X, DenseMtx *Y ) { DenseMtx *Xloc, *Yloc ; double cputotal, t0, t1, t2 ; double cpus[6] ; FILE *msgFile ; FrontMtx *frontmtx ; int firsttag, msglvl, myid, nmycol, nrhs, nrow ; int *mycolind, *rowind ; int stats[4] ; IV *mapIV, *ownersIV ; MPI_Comm comm ; SubMtxManager *mtxmanager ; /* --------------- check the input --------------- */ MARKTIME(t0) ; if ( bridge == NULL ) { fprintf(stderr, "\n error in BridgeMPI_solve" "\n bridge is NULL\n") ; return(-1) ; } if ( (frontmtx = bridge->frontmtx) == NULL ) { fprintf(stderr, "\n error in BridgeMPI_solve" "\n frontmtx is NULL\n") ; return(-4) ; } if ( (mtxmanager = bridge->mtxmanager) == NULL ) { fprintf(stderr, "\n error in BridgeMPI_solve" "\n mtxmanager is NULL\n") ; return(-5) ; } myid = bridge->myid ; comm = bridge->comm ; msglvl = bridge->msglvl ; msgFile = bridge->msgFile ; frontmtx = bridge->frontmtx ; ownersIV = bridge->ownersIV ; Xloc = bridge->Xloc ; Yloc = bridge->Yloc ; if ( myid != 0 ) { X = Y = NULL ; } else { if ( X == NULL ) { fprintf(stderr, "\n error in BridgeMPI_solve" "\n myid 0, X is NULL\n") ; return(-2) ; } if ( Y == NULL ) { fprintf(stderr, "\n error in BridgeMPI_solve" "\n myid 0, Y is NULL\n") ; return(-3) ; } } if ( msglvl > 2 ) { fprintf(msgFile, "\n\n inside BridgeMPI_solve()") ; fflush(msgFile) ; } if ( msglvl > 2 ) { fprintf(msgFile , "\n\n Xloc") ; DenseMtx_writeForHumanEye(Xloc, msgFile) ; fprintf(msgFile , "\n\n Yloc") ; DenseMtx_writeForHumanEye(Yloc, msgFile) ; fflush(msgFile) ; } /*--------------------------------------------------------------------*/ if ( myid == 0 ) { /* -------------------------- optionally permute the rhs -------------------------- */ if ( permuteflag == 1 ) { int rc ; IV *oldToNewIV ; MARKTIME(t1) ; rc = BridgeMPI_oldToNewIV(bridge, &oldToNewIV) ; if (rc != 1) { fprintf(stderr, "\n error in BridgeMPI_solve()" "\n rc = %d from BridgeMPI_oldToNewIV()\n", rc) ; return(-6) ; } DenseMtx_permuteRows(Y, oldToNewIV) ; MARKTIME(t2) ; bridge->cpus[15] += t2 - t1 ; if ( msglvl > 2 ) { fprintf(msgFile , "\n\n permuted Y") ; DenseMtx_writeForHumanEye(Y, msgFile) ; fflush(msgFile) ; } } } /*--------------------------------------------------------------------*/ /* ------------------------------------- distribute the right hand side matrix ------------------------------------- */ MARKTIME(t1) ; mapIV = bridge->rowmapIV ; if ( msglvl > 2 ) { fprintf(msgFile, "\n\n row map IV object") ; IV_writeForHumanEye(mapIV, msgFile) ; fflush(msgFile) ; } if ( myid == 0 ) { nrhs = Y->ncol ; } else { nrhs = 0 ; } MPI_Bcast((void *) &nrhs, 1, MPI_INT, 0, comm) ; firsttag = 0 ; IVfill(4, stats, 0) ; DenseMtx_MPI_splitFromGlobalByRows(Y, Yloc, mapIV, 0, stats, msglvl, msgFile, firsttag, comm) ; MARKTIME(t2) ; bridge->cpus[16] += t2 - t1 ; if ( msglvl > 2 ) { fprintf(msgFile, "\n\n local matrix Y after the split") ; DenseMtx_writeForHumanEye(Yloc, msgFile) ; fflush(msgFile) ; } /*--------------------------------------------------------------------*/ /* -------------------------------------- initialize the local solution X object -------------------------------------- */ MARKTIME(t1) ; IV_sizeAndEntries(bridge->ownedColumnsIV, &nmycol, &mycolind) ; DenseMtx_init(Xloc, bridge->type, -1, -1, nmycol, nrhs, 1, nmycol) ; if ( nmycol > 0 ) { DenseMtx_rowIndices(Xloc, &nrow, &rowind) ; IVcopy(nmycol, rowind, mycolind) ; if ( msglvl > 2 ) { fprintf(msgFile, "\n\n local matrix X") ; DenseMtx_writeForHumanEye(Xloc, msgFile) ; fflush(msgFile) ; } } MARKTIME(t2) ; bridge->cpus[17] += t2 - t1 ; /*--------------------------------------------------------------------*/ /* ---------------- solve the system ---------------- */ MARKTIME(t1) ; DVzero(6, cpus) ; FrontMtx_MPI_solve(frontmtx, Xloc, Yloc, mtxmanager, bridge->solvemap, cpus, stats, msglvl, msgFile, firsttag, comm) ; MARKTIME(t2) ; bridge->cpus[18] += t2 - t1 ; if ( msglvl > 1 ) { fprintf(msgFile, "\n\n CPU %8.3f : solve the system", t2 - t1) ; } cputotal = t2 - t1 ; if ( cputotal > 0.0 ) { fprintf(msgFile, "\n set up solves %8.3f %6.2f" "\n load rhs and store solution %8.3f %6.2f" "\n forward solve %8.3f %6.2f" "\n diagonal solve %8.3f %6.2f" "\n backward solve %8.3f %6.2f" "\n total time %8.3f", cpus[0], 100.*cpus[0]/cputotal, cpus[1], 100.*cpus[1]/cputotal, cpus[2], 100.*cpus[2]/cputotal, cpus[3], 100.*cpus[3]/cputotal, cpus[4], 100.*cpus[4]/cputotal, cputotal) ; } if ( msglvl > 3 ) { fprintf(msgFile, "\n\n computed solution") ; DenseMtx_writeForHumanEye(Xloc, msgFile) ; fflush(stdout) ; } /*--------------------------------------------------------------------*/ /* ------------------------------------- gather the solution on processor zero ------------------------------------- */ MARKTIME(t1) ; DenseMtx_MPI_mergeToGlobalByRows(X, Xloc, 0, stats, msglvl, msgFile, firsttag, comm) ; MARKTIME(t2) ; bridge->cpus[19] += t2 - t1 ; if ( myid == 0 ) { if ( msglvl > 2 ) { fprintf(msgFile, "\n\n global matrix X in new ordering") ; DenseMtx_writeForHumanEye(X, msgFile) ; fflush(msgFile) ; } } /*--------------------------------------------------------------------*/ /* ------------------------------- optionally permute the solution ------------------------------- */ if ( myid == 0 ) { if ( permuteflag == 1 ) { int rc ; IV *newToOldIV ; rc = BridgeMPI_newToOldIV(bridge, &newToOldIV) ; if (rc != 1) { fprintf(stderr, "\n error in BridgeMPI_solve()" "\n rc = %d from BridgeMPI_newToOldIV()\n", rc) ; return(-7) ; } DenseMtx_permuteRows(X, newToOldIV) ; } MARKTIME(t2) ; bridge->cpus[20] += t2 - t1 ; if ( msglvl > 2 ) { fprintf(msgFile, "\n\n global matrix X in old ordering") ; DenseMtx_writeForHumanEye(X, msgFile) ; fflush(msgFile) ; } } MARKTIME(t2) ; bridge->cpus[21] += t2 - t0 ; return(1) ; }
/* ---------------------------------------------------------------- purpose -- for each L_{bnd{J},J} matrix, remove from hash table, split into their L_{K,J} submatrices and insert into the hash table. created -- 98may04, cca ---------------------------------------------------------------- */ void FrontMtx_splitLowerMatrices ( FrontMtx *frontmtx, int msglvl, FILE *msgFile ) { SubMtx *mtxLJ, *mtxLJJ, *mtxLKJ ; SubMtxManager *manager ; double *entLJ, *entLKJ ; int count, first, ii, inc1, inc2, irow, jj, J, K, nbytes, ncolLJ, ncolLKJ, nentLJ, nentLKJ, neqns, nfront, nJ, nrowJ, nrowLJ, nrowLKJ, offset, v ; int *colindLJ, *colindLKJ, *rowmap, *indicesLJ, *indicesLKJ, *locmap, *rowindJ, *rowindLJ, *rowindLKJ, *sizesLJ, *sizesLKJ ; I2Ohash *lowerhash ; /* --------------- check the input --------------- */ if ( frontmtx == NULL || (msglvl > 0 && msgFile == NULL) ) { fprintf(stderr, "\n fatal error in FrontMtx_splitLowerMatrices(%p,%d,%p)" "\n bad input\n", frontmtx, msglvl, msgFile) ; spoolesFatal(); } nfront = FrontMtx_nfront(frontmtx) ; neqns = FrontMtx_neqns(frontmtx) ; lowerhash = frontmtx->lowerhash ; manager = frontmtx->manager ; /* -------------------------------- construct the row and local maps -------------------------------- */ rowmap = IVinit(neqns, -1) ; locmap = IVinit(neqns, -1) ; for ( J = 0 ; J < nfront ; J++ ) { if ( (nJ = FrontMtx_frontSize(frontmtx, J)) > 0 ) { FrontMtx_rowIndices(frontmtx, J, &nrowJ, &rowindJ) ; if ( nrowJ > 0 && rowindJ != NULL ) { for ( ii = 0 ; ii < nJ ; ii++ ) { v = rowindJ[ii] ; rowmap[v] = J ; locmap[v] = ii ; } } } } if ( msglvl > 2 ) { fprintf(msgFile, "\n\n rowmap[]") ; IVfprintf(msgFile, neqns, rowmap) ; fprintf(msgFile, "\n\n locmap[]") ; IVfprintf(msgFile, neqns, locmap) ; fflush(msgFile) ; } /* --------------------------------------------- move the L_{J,J} matrices into the hash table --------------------------------------------- */ for ( J = 0 ; J < nfront ; J++ ) { if ( (mtxLJJ = FrontMtx_lowerMtx(frontmtx, J, J)) != NULL ) { I2Ohash_insert(frontmtx->lowerhash, J, J, mtxLJJ) ; } } /* ------------------------------------------------------------ now split the L_{bnd{J},J} matrices into L_{K,J} matrices. note: columns of L_{bnd{J},J} are assumed to be in ascending order with respect to the column ordering of the matrix. ------------------------------------------------------------ */ for ( J = 0 ; J < nfront ; J++ ) { mtxLJ = FrontMtx_lowerMtx(frontmtx, nfront, J) ; if ( msglvl > 2 ) { fprintf(msgFile, "\n\n ### J = %d, mtxLJ = %p", J, mtxLJ) ; fflush(msgFile) ; } if ( mtxLJ != NULL ) { if ( msglvl > 2 ) { SubMtx_writeForHumanEye(mtxLJ, msgFile) ; fflush(msgFile) ; } SubMtx_columnIndices(mtxLJ, &ncolLJ, &colindLJ) ; SubMtx_rowIndices(mtxLJ, &nrowLJ, &rowindLJ) ; if ( msglvl > 2 ) { fprintf(msgFile, "\n column indices for J") ; IVfprintf(msgFile, ncolLJ, colindLJ) ; fprintf(msgFile, "\n row indices for LJ") ; IVfprintf(msgFile, nrowLJ, rowindLJ) ; fflush(msgFile) ; } if ( (K = rowmap[rowindLJ[0]]) == rowmap[rowindLJ[nrowLJ-1]] ) { if ( msglvl > 2 ) { fprintf(msgFile, "\n front %d supports only %d", J, K) ; fflush(msgFile) ; } /* ------------------------------------------------- L_{bnd{J},J} is one submatrix, bnd{J} \subseteq K set row and column indices and change column id ------------------------------------------------- */ IVramp(ncolLJ, colindLJ, 0, 1) ; for ( ii = 0 ; ii < nrowLJ ; ii++ ) { rowindLJ[ii] = locmap[rowindLJ[ii]] ; } /* mtxLJ->rowid = K ; */ SubMtx_setFields(mtxLJ, mtxLJ->type, mtxLJ->mode, K, J, mtxLJ->nrow, mtxLJ->ncol, mtxLJ->nent) ; if ( msglvl > 2 ) { fprintf(msgFile, "\n\n ## inserting L(%d,%d) ", K, J) ; SubMtx_writeForHumanEye(mtxLJ, msgFile) ; fflush(msgFile) ; } I2Ohash_insert(lowerhash, K, J, (void *) mtxLJ) ; } else { /* ----------------------------------- split L_{bnd{J},J} into submatrices ----------------------------------- */ nJ = FrontMtx_frontSize(frontmtx, J) ; if ( SUBMTX_IS_DENSE_ROWS(mtxLJ) ) { SubMtx_denseInfo(mtxLJ, &nrowLJ, &ncolLJ, &inc1, &inc2, &entLJ) ; } else if ( SUBMTX_IS_SPARSE_ROWS(mtxLJ) ) { SubMtx_sparseRowsInfo(mtxLJ, &nrowLJ, &nentLJ, &sizesLJ, &indicesLJ, &entLJ) ; offset = 0 ; count = sizesLJ[0] ; } first = 0 ; K = rowmap[rowindLJ[0]] ; for ( irow = 1 ; irow <= nrowLJ ; irow++ ) { if ( msglvl > 2 ) { fprintf(msgFile, "\n irow = %d", irow) ; if ( irow < nrowLJ ) { fprintf(msgFile, ", rowmap[%d] = %d", rowindLJ[irow], rowmap[rowindLJ[irow]]); } fflush(msgFile) ; } if ( irow == nrowLJ || K != rowmap[rowindLJ[irow]] ) { nrowLKJ = irow - first ; if ( SUBMTX_IS_DENSE_ROWS(mtxLJ) ) { nentLKJ = nJ*nrowLKJ ; } else if ( SUBMTX_IS_SPARSE_ROWS(mtxLJ) ) { if ( count == 0 ) { goto no_entries ; } nentLKJ = count ; } nbytes = SubMtx_nbytesNeeded(mtxLJ->type, mtxLJ->mode, nrowLKJ, nJ, nentLKJ) ; mtxLKJ = SubMtxManager_newObjectOfSizeNbytes(manager, nbytes) ; SubMtx_init(mtxLKJ, mtxLJ->type, mtxLJ->mode, K, J, nrowLKJ, nJ, nentLKJ) ; if ( SUBMTX_IS_DENSE_ROWS(mtxLJ) ) { SubMtx_denseInfo(mtxLKJ, &nrowLKJ, &ncolLKJ, &inc1, &inc2, &entLKJ) ; if ( FRONTMTX_IS_REAL(frontmtx) ) { DVcopy(nentLKJ, entLKJ, entLJ + first*nJ) ; } else if ( FRONTMTX_IS_COMPLEX(frontmtx) ) { DVcopy(2*nentLKJ, entLKJ, entLJ + 2*first*nJ) ; } } else if ( SUBMTX_IS_SPARSE_ROWS(mtxLJ) ) { SubMtx_sparseRowsInfo(mtxLKJ, &nrowLKJ, &nentLKJ, &sizesLKJ, &indicesLKJ, &entLKJ) ; IVcopy(nrowLKJ, sizesLKJ, sizesLJ + first) ; IVcopy(nentLKJ, indicesLKJ, indicesLJ + offset) ; if ( FRONTMTX_IS_REAL(frontmtx) ) { DVcopy(nentLKJ, entLKJ, entLJ + offset) ; } else if ( FRONTMTX_IS_COMPLEX(frontmtx) ) { DVcopy(2*nentLKJ, entLKJ, entLJ + 2*offset) ; } count = 0 ; offset += nentLKJ ; } /* ------------------------------------- initialize the row and column indices ------------------------------------- */ SubMtx_rowIndices(mtxLKJ, &nrowLKJ, &rowindLKJ) ; for ( ii = 0, jj = first ; ii < nrowLKJ ; ii++, jj++ ) { rowindLKJ[ii] = locmap[rowindLJ[jj]] ; } SubMtx_columnIndices(mtxLKJ, &ncolLKJ, &colindLKJ) ; IVramp(ncolLKJ, colindLKJ, 0, 1) ; /* ---------------------------------- insert L_{K,J} into the hash table ---------------------------------- */ if ( msglvl > 2 ) { fprintf(msgFile, "\n\n ## inserting L(%d,%d) ", K, J) ; SubMtx_writeForHumanEye(mtxLKJ, msgFile) ; fflush(msgFile) ; } I2Ohash_insert(lowerhash, K, J, (void *) mtxLKJ) ; /* ----------------------------------- we jump to here if there were no entries to be stored in the matrix. ----------------------------------- */ no_entries : /* ---------------------------------------------------- reset first and K to new first location and front id ---------------------------------------------------- */ first = irow ; if ( irow < nrowLJ ) { K = rowmap[rowindLJ[irow]] ; } } if ( irow < nrowLJ && SUBMTX_IS_SPARSE_ROWS(mtxLJ) ) { count += sizesLJ[irow] ; } } /* -------------------------------------------- give L_{bnd{J},J} back to the matrix manager -------------------------------------------- */ SubMtxManager_releaseObject(manager, mtxLJ) ; } } } /* ------------------------ free the working storage ------------------------ */ IVfree(rowmap) ; IVfree(locmap) ; return ; }
/* ----------------------------------------------------------- purpose -- return the Y by Y graph where (y1,y2) is an edge if there exists a x in X such that (x,y1) and (x,y2) are edges in the bipartite graph. created -- 95dec07, cca ----------------------------------------------------------- */ Graph * BPG_makeGraphYbyY ( BPG *bpg ) { Graph *graph, *gYbyY ; int count, ii, jj, nX, nY, x, xsize, y, ysize, z ; int *list, *mark, *xadj, *yadj ; /* --------------- check the input --------------- */ if ( bpg == NULL ) { fprintf(stdout, "\n fatal error in BPG_makeGraphXbyX(%p)" "\n bad input\n", bpg) ; spoolesFatal(); } /* ---------------------- check for quick return ---------------------- */ if ( (graph = bpg->graph) == NULL || (nY = bpg->nY) <= 0 ) { return(NULL) ; } nX = bpg->nX ; /* -------------------- initialize the graph -------------------- */ gYbyY = Graph_new() ; Graph_init1(gYbyY, graph->type, nY, 0, 0, IVL_CHUNKED, IVL_CHUNKED) ; /* -------------- fill the graph -------------- */ mark = IVinit(nY, -1) ; list = IVinit(nY, -1) ; for ( y = 0 ; y < nY ; y++ ) { Graph_adjAndSize(graph, nX + y, &ysize, &yadj) ; mark[y] = y ; for ( ii = 0, count = 0 ; ii < ysize ; ii++ ) { x = yadj[ii] ; Graph_adjAndSize(graph, x, &xsize, &xadj) ; for ( jj = 0 ; jj < xsize ; jj++ ) { z = xadj[jj] ; if ( mark[z] != y ) { mark[z] = y ; list[count++] = z ; } } } if ( count > 0 ) { IVqsortUp(count, list) ; IVL_setList(gYbyY->adjIVL, nX + y, count, list) ; } } IVfree(list) ; IVfree(mark) ; /* --------------------------------------- set vertex weight vector if appropriate --------------------------------------- */ if ( graph->type % 2 == 1 ) { IVcopy(nY, gYbyY->vwghts, graph->vwghts + nX) ; } return(gYbyY) ; }
/* -------------------------------------------------------------------- purpose -- to fill submtx with a submatrix of the front matrix. the fronts that form the submatrix are found in frontidsIV. all information in submtx is local, front #'s are from 0 to one less than the number of fronts in the submatrix, equation #'s are from 0 to one less than the number of rows and columns in the submatrix. the global row and column ids for the submatrix are stored in rowsIV and colsIV on return. return values --- 1 -- normal return -1 -- submtx is NULL -2 -- frontmtx is NULL -3 -- frontmtx is not in 2-D mode -4 -- frontidsIV is NULL -5 -- frontidsIV is invalid -6 -- rowsIV is NULL -7 -- colsIV is NULL -8 -- unable to create front tree -9 -- unable to create symbfacIVL -10 -- unable to create coladjIVL -11 -- unable to create rowadjIVL -12 -- unable to create upperblockIVL -13 -- unable to create lowerblockIVL created -- 98oct17, cca -------------------------------------------------------------------- */ int FrontMtx_initFromSubmatrix ( FrontMtx *submtx, FrontMtx *frontmtx, IV *frontidsIV, IV *rowsIV, IV *colsIV, int msglvl, FILE *msgFile ) { ETree *etreeSub ; int ii, J, Jsub, K, Ksub, ncol, nfront, nfrontSub, neqnSub, nJ, nrow, offset, rc, size, vSub ; int *bndwghts, *colind, *colmap, *cols, *frontSubIds, *list, *nodwghts, *rowind, *rowmap, *rows ; IV *frontsizesIVsub, *vtxIV ; IVL *coladjIVLsub, *lowerblockIVLsub, *rowadjIVLsub, *symbfacIVLsub, *upperblockIVLsub ; SubMtx *mtx ; /* --------------- check the input --------------- */ if ( submtx == NULL ) { fprintf(stderr, "\n error in FrontMtx_initFromSubmatrix()" "\n submtx is NULL\n") ; return(-1) ; } if ( frontmtx == NULL ) { fprintf(stderr, "\n error in FrontMtx_initFromSubmatrix()" "\n frontmtx is NULL\n") ; return(-2) ; } if ( ! FRONTMTX_IS_2D_MODE(frontmtx) ) { fprintf(stderr, "\n error in FrontMtx_initFromSubmatrix()" "\n frontmtx mode is not 2D\n") ; return(-3) ; } if ( frontidsIV == NULL ) { fprintf(stderr, "\n error in FrontMtx_initFromSubmatrix()" "\n frontidsIV is NULL\n") ; return(-4) ; } nfront = FrontMtx_nfront(frontmtx) ; IV_sizeAndEntries(frontidsIV, &nfrontSub, &frontSubIds) ; if ( nfrontSub < 0 || nfrontSub > nfront ) { fprintf(stderr, "\n error in FrontMtx_initFromSubmatrix()" "\n invalid frontidsIV" "\n nfrontSub = %d, nfront %d\n", nfrontSub, nfront) ; return(-5) ; } for ( ii = 0 ; ii < nfrontSub ; ii++ ) { if ( (J = frontSubIds[ii]) < 0 || J >= nfront ) { fprintf(stderr, "\n error in FrontMtx_initFromSubmatrix()" "\n invalid frontidsIV" "\n frontSubIds[%d] = %d, nfront = %d\n", ii, J, nfront) ; return(-5) ; } } if ( rowsIV == NULL ) { fprintf(stderr, "\n error in FrontMtx_initFromSubmatrix()" "\n rowsIV is NULL\n") ; return(-6) ; } if ( colsIV == NULL ) { fprintf(stderr, "\n error in FrontMtx_initFromSubmatrix()" "\n colsIV is NULL\n") ; return(-7) ; } /*--------------------------------------------------------------------*/ /* ----------------------------------------------------- clear the data for the submatrix and set the scalar values (some inherited from the global matrix) ----------------------------------------------------- */ FrontMtx_clearData(submtx) ; submtx->nfront = nfrontSub ; submtx->type = frontmtx->type ; submtx->symmetryflag = frontmtx->symmetryflag ; submtx->sparsityflag = frontmtx->sparsityflag ; submtx->pivotingflag = frontmtx->pivotingflag ; submtx->dataMode = FRONTMTX_2D_MODE ; /* --------------------------------------------------------------- initialize the front tree for the submatrix. note: on return, vtxIV is filled with the vertices originally in the submatrix, (pivoting may change this), needed to find symbolic factorization IVL object note: at return, the boundary weights are likely to be invalid, since we have no way of knowing what boundary indices for a front are really in the domain. this will be changed after we have the symbolic factorization. --------------------------------------------------------------- */ etreeSub = submtx->frontETree = ETree_new() ; vtxIV = IV_new() ; rc = ETree_initFromSubtree(etreeSub, frontidsIV, frontmtx->frontETree, vtxIV) ; if ( rc != 1 ) { fprintf(stderr, "\n error in FrontMtx_initFromSubmatrix()" "\n unable to create submatrix's front ETree, rc = %d\n", rc) ; return(-8) ; } if ( msglvl > 4 ) { fprintf(msgFile, "\n\n submatrix ETree") ; ETree_writeForHumanEye(etreeSub, msgFile) ; fprintf(msgFile, "\n\n submatrix original equations") ; IV_writeForHumanEye(vtxIV, msgFile) ; fflush(msgFile) ; } /* ------------------------------------------------------ set the # of equations (perhap temporarily if pivoting has delayed some rows and columns), and the tree. ------------------------------------------------------ */ submtx->neqns = neqnSub = IV_size(vtxIV) ; submtx->tree = etreeSub->tree ; /* ----------------------------------------------------- initialize the symbolic factorization for the subtree ----------------------------------------------------- */ symbfacIVLsub = submtx->symbfacIVL = IVL_new() ; rc = IVL_initFromSubIVL(symbfacIVLsub, frontmtx->symbfacIVL, frontidsIV, vtxIV) ; if ( rc != 1 ) { fprintf(stderr, "\n error in FrontMtx_initFromSubmatrix()" "\n unable to create submatrix's symbfac, rc = %d\n", rc) ; return(-9) ; } if ( msglvl > 4 ) { fprintf(msgFile, "\n\n submatrix symbolic factorizatio") ; IVL_writeForHumanEye(symbfacIVLsub, msgFile) ; fflush(msgFile) ; } /* --------------------------------------------- adjust the boundary weights of the front tree --------------------------------------------- */ nodwghts = ETree_nodwghts(etreeSub) ; bndwghts = ETree_bndwghts(etreeSub) ; for ( J = 0 ; J < nfrontSub ; J++ ) { IVL_listAndSize(symbfacIVLsub, J, &size, &list) ; bndwghts[J] = size - nodwghts[J] ; } if ( msglvl > 4 ) { fprintf(msgFile, "\n\n submatrix ETree after bndweight adjustment") ; ETree_writeForHumanEye(etreeSub, msgFile) ; fflush(msgFile) ; } /* ------------------------------------- set the front sizes for the submatrix ------------------------------------- */ frontsizesIVsub = submtx->frontsizesIV = IV_new() ; IV_init(frontsizesIVsub, nfrontSub, NULL) ; IVgather(nfrontSub, IV_entries(frontsizesIVsub), IV_entries(frontmtx->frontsizesIV), IV_entries(frontidsIV)) ; neqnSub = submtx->neqns = IV_sum(frontsizesIVsub) ; if ( msglvl > 4 ) { fprintf(msgFile, "\n\n %d equations in submatrix", neqnSub) ; fprintf(msgFile, "\n\n front sizes for submatrix") ; IV_writeForHumanEye(frontsizesIVsub, msgFile) ; fflush(msgFile) ; } /* ------------------------------------------------------------------- fill rowsIV and colsIV with the row and column ids of the submatrix ------------------------------------------------------------------- */ IV_setSize(rowsIV, neqnSub) ; IV_setSize(colsIV, neqnSub) ; rows = IV_entries(rowsIV) ; cols = IV_entries(colsIV) ; for ( Jsub = offset = 0 ; Jsub < nfrontSub ; Jsub++ ) { if ( (nJ = FrontMtx_frontSize(submtx, Jsub)) > 0 ) { J = frontSubIds[Jsub] ; FrontMtx_columnIndices(frontmtx, J, &size, &list) ; IVcopy(nJ, cols + offset, list) ; FrontMtx_rowIndices(frontmtx, J, &size, &list) ; IVcopy(nJ, rows + offset, list) ; offset += nJ ; } } if ( msglvl > 4 ) { fprintf(msgFile, "\n\n row ids for submatrix") ; IV_writeForHumanEye(rowsIV, msgFile) ; fprintf(msgFile, "\n\n column ids for submatrix") ; IV_writeForHumanEye(colsIV, msgFile) ; fflush(msgFile) ; } /* ---------------------------------- get the row and column adjacencies ---------------------------------- */ if ( FRONTMTX_IS_PIVOTING(frontmtx) ) { submtx->neqns = neqnSub ; coladjIVLsub = submtx->coladjIVL = IVL_new() ; rc = IVL_initFromSubIVL(coladjIVLsub, frontmtx->coladjIVL, frontidsIV, colsIV) ; if ( rc != 1 ) { fprintf(stderr, "\n error in FrontMtx_initFromSubmatrix()" "\n unable to create submatrix's coladjIVL, rc = %d\n", rc) ; return(-10) ; } if ( msglvl > 4 ) { fprintf(msgFile, "\n\n submatrix col adjacency") ; IVL_writeForHumanEye(coladjIVLsub, msgFile) ; fflush(msgFile) ; } if ( FRONTMTX_IS_NONSYMMETRIC(frontmtx) ) { rowadjIVLsub = submtx->rowadjIVL = IVL_new() ; rc = IVL_initFromSubIVL(rowadjIVLsub, frontmtx->rowadjIVL, frontidsIV, rowsIV) ; if ( rc != 1 ) { fprintf(stderr, "\n error in FrontMtx_initFromSubmatrix()" "\n unable to create submatrix's rowadjIVL, rc = %d\n", rc) ; return(-11) ; } if ( msglvl > 4 ) { fprintf(msgFile, "\n\n submatrix row adjacency") ; IVL_writeForHumanEye(rowadjIVLsub, msgFile) ; fflush(msgFile) ; } } } IV_free(vtxIV) ; /* ---------------------------------------------- get the rowmap[] and colmap[] vectors, needed to translate indices in the submatrices ---------------------------------------------- */ colmap = IVinit(frontmtx->neqns, -1) ; for ( ii = 0 ; ii < neqnSub ; ii++ ) { colmap[cols[ii]] = ii ; } if ( FRONTMTX_IS_NONSYMMETRIC(frontmtx) ) { rowmap = IVinit(frontmtx->neqns, -1) ; for ( ii = 0 ; ii < neqnSub ; ii++ ) { rowmap[rows[ii]] = ii ; } } else { rowmap = colmap ; } /* ----------------------------------------------------------- get the upper and lower block IVL objects for the submatrix ----------------------------------------------------------- */ upperblockIVLsub = submtx->upperblockIVL = IVL_new() ; rc = IVL_initFromSubIVL(upperblockIVLsub, frontmtx->upperblockIVL, frontidsIV, frontidsIV) ; if ( rc != 1 ) { fprintf(stderr, "\n error in FrontMtx_initFromSubmatrix()" "\n unable to create upperblockIVL, rc = %d\n", rc) ; return(-12) ; } if ( msglvl > 4 ) { fprintf(msgFile, "\n\n upper block adjacency IVL object") ; IVL_writeForHumanEye(upperblockIVLsub, msgFile) ; fflush(msgFile) ; } if ( FRONTMTX_IS_NONSYMMETRIC(frontmtx) ) { lowerblockIVLsub = submtx->lowerblockIVL = IVL_new() ; rc = IVL_initFromSubIVL(lowerblockIVLsub, frontmtx->lowerblockIVL, frontidsIV, frontidsIV) ; if ( rc != 1 ) { fprintf(stderr, "\n error in FrontMtx_initFromSubmatrix()" "\n unable to create lowerblockIVL, rc = %d\n", rc) ; return(-13) ; } if ( msglvl > 4 ) { fprintf(msgFile, "\n\n lower block adjacency IVL object") ; IVL_writeForHumanEye(lowerblockIVLsub, msgFile) ; fflush(msgFile) ; } } /* ---------------------------------------------------------------- allocate the vector and hash table(s) for the factor submatrices ---------------------------------------------------------------- */ ALLOCATE(submtx->p_mtxDJJ, struct _SubMtx *, nfrontSub) ; for ( J = 0 ; J < nfrontSub ; J++ ) { submtx->p_mtxDJJ[J] = NULL ; } submtx->upperhash = I2Ohash_new() ; I2Ohash_init(submtx->upperhash, nfrontSub, nfrontSub, nfrontSub) ; if ( FRONTMTX_IS_NONSYMMETRIC(frontmtx) ) { submtx->lowerhash = I2Ohash_new() ; I2Ohash_init(submtx->lowerhash, nfrontSub, nfrontSub, nfrontSub) ; } /* ----------------------------------------------------------------- remove the diagonal submatrices from the factor matrix and insert into the submatrix object. note: front row and column ids must be changed to their local values, and the row and column indices must be mapped to local indices. ----------------------------------------------------------------- */ for ( Jsub = 0 ; Jsub < nfrontSub ; Jsub++ ) { J = frontSubIds[Jsub] ; if ( (mtx = frontmtx->p_mtxDJJ[J]) != NULL ) { SubMtx_setIds(mtx, Jsub, Jsub) ; SubMtx_columnIndices(mtx, &ncol, &colind) ; IVgather(ncol, colind, colmap, colind) ; SubMtx_rowIndices(mtx, &nrow, &rowind) ; IVgather(nrow, rowind, rowmap, rowind) ; submtx->p_mtxDJJ[Jsub] = mtx ; frontmtx->p_mtxDJJ[J] = NULL ; submtx->nentD += mtx->nent ; } } /* ---------------------------------------------------------------- remove the upper triangular submatrices from the factor matrix and insert into the submatrix object. note: front row and column ids must be changed to their local values. if the matrix is on the diagonal, i.e., U(J,J), its row and column indices must be mapped to local indices. ---------------------------------------------------------------- */ for ( Jsub = 0 ; Jsub < nfrontSub ; Jsub++ ) { J = frontSubIds[Jsub] ; FrontMtx_upperAdjFronts(submtx, Jsub, &size, &list) ; for ( ii = 0 ; ii < size ; ii++ ) { Ksub = list[ii] ; K = frontSubIds[Ksub] ; if ( 1 == I2Ohash_remove(frontmtx->upperhash, J, K, (void *) &mtx) ) { SubMtx_setIds(mtx, Jsub, Ksub) ; if ( K == J ) { SubMtx_columnIndices(mtx, &ncol, &colind) ; IVgather(ncol, colind, colmap, colind) ; SubMtx_rowIndices(mtx, &nrow, &rowind) ; IVgather(nrow, rowind, rowmap, rowind) ; } I2Ohash_insert(submtx->upperhash, Jsub, Ksub, (void *) mtx) ; submtx->nentU += mtx->nent ; } } } if ( FRONTMTX_IS_NONSYMMETRIC(frontmtx) ) { /* ---------------------------------------------------------------- remove the lower triangular submatrices from the factor matrix and insert into the submatrix object. note: front row and column ids must be changed to their local values. if the matrix is on the diagonal, i.e., L(J,J), its row and column indices must be mapped to local indices. ---------------------------------------------------------------- */ for ( Jsub = 0 ; Jsub < nfrontSub ; Jsub++ ) { J = frontSubIds[Jsub] ; FrontMtx_lowerAdjFronts(submtx, Jsub, &size, &list) ; for ( ii = 0 ; ii < size ; ii++ ) { Ksub = list[ii] ; K = frontSubIds[Ksub] ; if ( 1 == I2Ohash_remove(frontmtx->lowerhash, K, J, (void *) &mtx) ) { SubMtx_setIds(mtx, Ksub, Jsub) ; if ( K == J ) { SubMtx_columnIndices(mtx, &ncol, &colind) ; IVgather(ncol, colind, colmap, colind) ; SubMtx_rowIndices(mtx, &nrow, &rowind) ; IVgather(nrow, rowind, rowmap, rowind) ; } I2Ohash_insert(submtx->lowerhash, Ksub, Jsub, (void *) mtx); submtx->nentL += mtx->nent ; } } } } /* ------------------------ free the working storage ------------------------ */ IVfree(colmap) ; if ( FRONTMTX_IS_NONSYMMETRIC(frontmtx) ) { IVfree(rowmap) ; } return(1) ; }
/* -------------------------------------------------- purpose -- to solve a linear system (A - sigma*B) sol[] = rhs[] data -- pointer to bridge data object *pnrows -- # of rows in x[] and y[] *pncols -- # of columns in x[] and y[] rhs[] -- vector that holds right hand sides NOTE: the rhs[] vector is global, not a portion sol[] -- vector to hold solutions NOTE: the sol[] vector is global, not a portion note: rhs[] and sol[] can be the same array. on return, *perror holds an error code. created -- 98aug28, cca & jcp -------------------------------------------------- */ void JimSolveMPI ( int *pnrows, int *pncols, double rhs[], double sol[], void *data, int *perror ) { BridgeMPI *bridge = (BridgeMPI *) data ; DenseMtx *mtx, *newmtx ; int irow, jj, jcol, kk, myid, ncols = *pncols, neqns, nowned, tag = 0 ; int *vtxmap ; int stats[4] ; IV *mapIV ; #if MYDEBUG > 0 double t1, t2 ; count_JimSolve++ ; MARKTIME(t1) ; if ( bridge->myid == 0 ) { fprintf(stdout, "\n (%d) JimSolve() start", count_JimSolve) ; fflush(stdout) ; } #endif #if MYDEBUG > 1 fprintf(bridge->msgFile, "\n (%d) JimSolve() start", count_JimSolve) ; fflush(bridge->msgFile) ; #endif MPI_Barrier(bridge->comm) ; /* --------------------------------------------- slide the owned rows of rhs down in the array --------------------------------------------- */ vtxmap = IV_entries(bridge->vtxmapIV) ; neqns = bridge->neqns ; myid = bridge->myid ; nowned = IV_size(bridge->myownedIV) ; for ( jcol = jj = kk = 0 ; jcol < ncols ; jcol++ ) { for ( irow = 0 ; irow < neqns ; irow++, jj++ ) { if ( vtxmap[irow] == myid ) { sol[kk++] = rhs[jj] ; } } } if ( kk != nowned * ncols ) { fprintf(stderr, "\n proc %d : kk %d, nowned %d, ncols %d", myid, kk, nowned, ncols) ; exit(-1) ; } /* ---------------------------------------- call the method that assumes local input ---------------------------------------- */ if ( bridge->msglvl > 1 ) { fprintf(bridge->msgFile, "\n calling SolveMPI()") ; fflush(bridge->msgFile) ; } SolveMPI(&nowned, pncols, sol, sol, data, perror) ; if ( bridge->msglvl > 1 ) { fprintf(bridge->msgFile, "\n return from SolveMPI()") ; fflush(bridge->msgFile) ; } /* ------------------------------------------ gather all the entries onto processor zero ------------------------------------------ */ mtx = DenseMtx_new() ; DenseMtx_init(mtx, SPOOLES_REAL, 0, 0, nowned, ncols, 1, nowned) ; DVcopy (nowned*ncols, DenseMtx_entries(mtx), sol) ; IVcopy(nowned, mtx->rowind, IV_entries(bridge->myownedIV)) ; mapIV = IV_new() ; IV_init(mapIV, neqns, NULL) ; IV_fill(mapIV, 0) ; IVfill(4, stats, 0) ; if ( bridge->msglvl > 1 ) { fprintf(bridge->msgFile, "\n calling DenseMtx_split()()") ; fflush(bridge->msgFile) ; } newmtx = DenseMtx_MPI_splitByRows(mtx, mapIV, stats, bridge->msglvl, bridge->msgFile, tag, bridge->comm) ; if ( bridge->msglvl > 1 ) { fprintf(bridge->msgFile, "\n return from DenseMtx_split()()") ; fflush(bridge->msgFile) ; } DenseMtx_free(mtx) ; mtx = newmtx ; IV_free(mapIV) ; if ( myid == 0 ) { DVcopy(neqns*ncols, sol, DenseMtx_entries(mtx)) ; } DenseMtx_free(mtx) ; /* --------------------------------------------- broadcast the entries to the other processors --------------------------------------------- */ if ( bridge->msglvl > 1 ) { fprintf(bridge->msgFile, "\n calling MPI_Bcast()()") ; fflush(bridge->msgFile) ; } MPI_Bcast((void *) sol, neqns*ncols, MPI_DOUBLE, 0, bridge->comm) ; if ( bridge->msglvl > 1 ) { fprintf(bridge->msgFile, "\n return from MPI_Bcast()()") ; fflush(bridge->msgFile) ; } MPI_Barrier(bridge->comm) ; /* ------------------------------------------------------------------ set the error. (this is simple since when the spooles codes detect a fatal error, they print out a message to stderr and exit.) ------------------------------------------------------------------ */ *perror = 0 ; #if MYDEBUG > 0 MARKTIME(t2) ; time_JimSolve += t2 - t1 ; if ( bridge->myid == 0 ) { fprintf(stdout, "\n (%d) JimSolve() end", count_JimSolve) ; fprintf(stdout, ", %8.3f seconds, %8.3f total time", t2 - t1, time_JimSolve) ; fflush(stdout) ; } #endif #if MYDEBUG > 1 fprintf(bridge->msgFile, "\n (%d) JimSolve() end", count_JimSolve) ; fprintf(bridge->msgFile, ", %8.3f seconds, %8.3f total time", t2 - t1, time_JimSolve) ; fflush(bridge->msgFile) ; #endif return ; }
/* -------------------------------------------------------------- identify the wide separator return -- IV object that holds the nodes in the wide separator created -- 96oct21, cca -------------------------------------------------------------- */ IV * GPart_identifyWideSep ( GPart *gpart, int nlevel1, int nlevel2 ) { FILE *msgFile ; Graph *g ; int count, first, ierr, ii, ilevel, last, msglvl, nfirst, now, nsecond, nsep, nvtx, v, vsize, w ; int *compids, *list, *mark, *vadj ; IV *sepIV ; /* --------------- check the input --------------- */ if ( gpart == NULL || (g = gpart->g) == NULL || nlevel1 < 0 || nlevel2 < 0 ) { fprintf(stderr, "\n fatal error in GPart_identifyWideSep(%p,%d,%d)" "\n bad input\n", gpart, nlevel1, nlevel2) ; exit(-1) ; } g = gpart->g ; compids = IV_entries(&gpart->compidsIV) ; nvtx = g->nvtx ; mark = IVinit(nvtx, -1) ; list = IVinit(nvtx, -1) ; msglvl = gpart->msglvl ; msgFile = gpart->msgFile ; /* -------------------------------------- load the separator nodes into the list -------------------------------------- */ nsep = 0 ; for ( v = 0 ; v < nvtx ; v++ ) { if ( compids[v] == 0 ) { list[nsep++] = v ; mark[v] = 0 ; } } count = nsep ; if ( msglvl > 1 ) { fprintf(msgFile, "\n GPart_identifyWideSep : %d separator nodes loaded", count) ; fflush(msgFile) ; } if ( msglvl > 2 ) { IVfp80(msgFile, nsep, list, 80, &ierr) ; fflush(msgFile) ; } /* ---------------------------------------------- loop over the number of levels out that form the wide separator towards the first component ---------------------------------------------- */ if ( nlevel1 >= 1 ) { first = count ; if ( msglvl > 2 ) { fprintf(msgFile, "\n\n level = %d, first = %d", 1, first) ; fflush(msgFile) ; } for ( now = 0 ; now < nsep ; now++ ) { v = list[now] ; Graph_adjAndSize(g, v, &vsize, &vadj) ; if ( msglvl > 2 ) { fprintf(msgFile, "\n %d : ", v) ; IVfp80(msgFile, vsize, vadj, 80, &ierr) ; fflush(msgFile) ; } for ( ii = 0 ; ii < vsize ; ii++ ) { w = vadj[ii] ; if ( w < nvtx && mark[w] == -1 && compids[w] == 1 ) { if ( msglvl > 2 ) { fprintf(msgFile, "\n adding %d to list", w) ; fflush(msgFile) ; } list[count++] = w ; mark[w] = 1 ; } } } now = first ; for ( ilevel = 2 ; ilevel <= nlevel1 ; ilevel++ ) { if ( msglvl > 2 ) { fprintf(msgFile, "\n\n level = %d, first = %d", ilevel, first); fflush(msgFile) ; } last = count - 1 ; while ( now <= last ) { v = list[now++] ; Graph_adjAndSize(g, v, &vsize, &vadj) ; if ( msglvl > 2 ) { fprintf(msgFile, "\n %d : ", v) ; IVfp80(msgFile, vsize, vadj, 80, &ierr) ; fflush(msgFile) ; } for ( ii = 0 ; ii < vsize ; ii++ ) { w = vadj[ii] ; if ( w < nvtx && mark[w] == -1 && compids[w] == 1 ) { if ( msglvl > 2 ) { fprintf(msgFile, "\n adding %d to list", w) ; fflush(msgFile) ; } mark[w] = 1 ; list[count++] = w ; } } } } } nfirst = count - nsep ; if ( msglvl > 2 ) { fprintf(msgFile, "\n %d nodes added from the first component", nfirst) ; fflush(msgFile) ; } if ( msglvl > 3 ) { IVfp80(msgFile, nfirst, &list[nsep], 80, &ierr) ; fflush(msgFile) ; } /* ---------------------------------------------- loop over the number of levels out that form the wide separator towards the second component ---------------------------------------------- */ if ( nlevel2 >= 1 ) { first = count ; if ( msglvl > 2 ) { fprintf(msgFile, "\n\n level = %d, first = %d", 1, first) ; fflush(msgFile) ; } for ( now = 0 ; now < nsep ; now++ ) { v = list[now] ; Graph_adjAndSize(g, v, &vsize, &vadj) ; if ( msglvl > 2 ) { fprintf(msgFile, "\n %d : ", v) ; IVfp80(msgFile, vsize, vadj, 80, &ierr) ; fflush(msgFile) ; } for ( ii = 0 ; ii < vsize ; ii++ ) { w = vadj[ii] ; if ( w < nvtx && mark[w] == -1 && compids[w] == 2 ) { if ( msglvl > 2 ) { fprintf(msgFile, "\n adding %d to list", w) ; fflush(msgFile) ; } list[count++] = w ; mark[w] = 2 ; } } } now = first ; for ( ilevel = 2 ; ilevel <= nlevel2 ; ilevel++ ) { if ( msglvl > 2 ) { fprintf(msgFile, "\n\n level = %d, first = %d", ilevel, first); fflush(msgFile) ; } last = count - 1 ; while ( now <= last ) { v = list[now++] ; Graph_adjAndSize(g, v, &vsize, &vadj) ; if ( msglvl > 2 ) { fprintf(msgFile, "\n %d : ", v) ; IVfp80(msgFile, vsize, vadj, 80, &ierr) ; fflush(msgFile) ; } for ( ii = 0 ; ii < vsize ; ii++ ) { w = vadj[ii] ; if ( w < nvtx && mark[w] == -1 && compids[w] == 2 ) { if ( msglvl > 2 ) { fprintf(msgFile, "\n adding %d to list", w) ; fflush(msgFile) ; } mark[w] = 2 ; list[count++] = w ; } } } } } nsecond = count - nsep - nfirst ; if ( msglvl > 2 ) { fprintf(msgFile, "\n %d nodes added from the second component", nsecond) ; fflush(msgFile) ; } if ( msglvl > 3 ) { IVfp80(msgFile, nsecond, &list[nsep + nfirst], 80, &ierr) ; fflush(msgFile) ; } IVqsortUp(count, list) ; /* -------------------- create the IV object -------------------- */ sepIV = IV_new() ; IV_init(sepIV, count, NULL) ; IVcopy(count, IV_entries(sepIV), list) ; if ( msglvl > 1 ) { fprintf(msgFile, "\n separator has %d nodes", IV_size(sepIV)) ; fflush(msgFile) ; } if ( msglvl > 2 ) { fprintf(msgFile, "\n sepIV") ; IV_writeForHumanEye(sepIV, msgFile) ; fflush(msgFile) ; } /* ------------------------ free the working storage ------------------------ */ IVfree(mark) ; IVfree(list) ; if ( msglvl > 2 ) { fprintf(msgFile, "\n return from GPart_identifyWideSep") ; fflush(msgFile) ; } return(sepIV) ; }
/*--------------------------------------------------------------------*/ int main ( int argc, char *argv[] ) /* ------------------------------------ test the Chv_assembleChv() method. created -- 98apr18, cca ------------------------------------ */ { Chv *chvI, *chvJ ; double imag, real, t1, t2 ; double *entriesI, *entriesJ ; Drand *drand ; FILE *msgFile ; int ierr, ii, irow, jcol, lastcol, msglvl, ncolI, ncolJ, nDI, nDJ, nentI, nentJ, nrowI, nrowJ, nUI, nUJ, seed, symflag, type ; int *colindI, *colindJ, *rowindI, *rowindJ, *temp ; if ( argc != 10 ) { fprintf(stdout, "\n\n usage : %s msglvl msgFile nDJ nUJ nDI nUI type symflag seed " "\n msglvl -- message level" "\n msgFile -- message file" "\n nDJ -- # of rows and columns in the (1,1) block" "\n nUJ -- # of columns in the (1,2) block" "\n nDI -- # of rows and columns in the (1,1) block" "\n nUI -- # of columns in the (1,2) block" "\n type -- entries type" "\n 1 --> real" "\n 2 --> complex" "\n symflag -- symmetry flag" "\n 0 --> symmetric" "\n 1 --> hermitian" "\n 2 --> nonsymmetric" "\n seed -- random number seed" "\n", argv[0]) ; return(0) ; } if ( (msglvl = atoi(argv[1])) < 0 ) { fprintf(stderr, "\n message level must be positive\n") ; exit(-1) ; } if ( strcmp(argv[2], "stdout") == 0 ) { msgFile = stdout ; } else if ( (msgFile = fopen(argv[2], "a")) == NULL ) { fprintf(stderr, "\n unable to open file %s\n", argv[2]) ; return(-1) ; } nDJ = atoi(argv[3]) ; nUJ = atoi(argv[4]) ; nDI = atoi(argv[5]) ; nUI = atoi(argv[6]) ; type = atoi(argv[7]) ; symflag = atoi(argv[8]) ; seed = atoi(argv[9]) ; if ( nDJ <= 0 || nUJ < 0 || nDI <= 0 || nUI < 0 || nDI >= nDJ || (nDI + nUI) >= (nDJ + nUJ) || nUI >= (nDJ + nUJ - nDI) || ( symflag != SPOOLES_SYMMETRIC && symflag != SPOOLES_HERMITIAN && symflag != SPOOLES_NONSYMMETRIC) ) { fprintf(stderr, "\n invalid input" "\n nDJ = %d, nUJ = %d, nDI = %d, nUI = %d, symflag = %d\n", nDJ, nUJ, nDI, nUI, symflag) ; exit(-1) ; } /* -------------------------------------- initialize the random number generator -------------------------------------- */ drand = Drand_new() ; Drand_init(drand) ; Drand_setSeed(drand, seed) ; Drand_setUniform(drand, -1.0, 1.0) ; /* ---------------------------- initialize the ChvJ object ---------------------------- */ MARKTIME(t1) ; chvJ = Chv_new() ; Chv_init(chvJ, 0, nDJ, nUJ, nUJ, type, symflag) ; MARKTIME(t2) ; fprintf(msgFile, "\n %% CPU : %.3f to initialize chv object", t2 - t1) ; fflush(msgFile) ; Chv_columnIndices(chvJ, &ncolJ, &colindJ) ; temp = IVinit(2*(nDJ+nUJ), -1) ; IVramp(2*(nDJ+nUJ), temp, 0, 1) ; IVshuffle(2*(nDJ+nUJ), temp, ++seed) ; IVcopy(ncolJ, colindJ, temp) ; IVfree(temp) ; IVqsortUp(ncolJ, colindJ) ; if ( CHV_IS_NONSYMMETRIC(chvJ) ) { Chv_rowIndices(chvJ, &nrowJ, &rowindJ) ; IVcopy(nrowJ, rowindJ, colindJ) ; } if ( msglvl > 2 ) { fprintf(msgFile, "\n %% column indices") ; IVfprintf(msgFile, ncolJ, colindJ) ; } lastcol = colindJ[ncolJ-1] ; nentJ = Chv_nent(chvJ) ; entriesJ = Chv_entries(chvJ) ; if ( CHV_IS_REAL(chvJ) ) { Drand_fillDvector(drand, nentJ, entriesJ) ; } else if ( CHV_IS_COMPLEX(chvJ) ) { Drand_fillDvector(drand, 2*nentJ, entriesJ) ; } if ( CHV_IS_HERMITIAN(chvJ) ) { /* --------------------------------------------------------- hermitian example, set imaginary part of diagonal to zero --------------------------------------------------------- */ for ( irow = 0 ; irow < nDJ ; irow++ ) { Chv_complexEntry(chvJ, irow, irow, &real, &imag) ; Chv_setComplexEntry(chvJ, irow, irow, real, 0.0) ; } } /* --------------------------- initialize the ChvI object --------------------------- */ chvI = Chv_new() ; Chv_init(chvI, 0, nDI, nUI, nUI, type, symflag) ; Chv_columnIndices(chvI, &ncolI, &colindI) ; temp = IVinit(ncolJ, -1) ; IVramp(ncolJ, temp, 0, 1) ; while ( 1 ) { IVshuffle(ncolJ, temp, ++seed) ; IVqsortUp(ncolI, temp) ; if ( temp[0] < nDJ ) { break ; } } for ( ii = 0 ; ii < ncolI ; ii++ ) { colindI[ii] = colindJ[temp[ii]] ; } IVfree(temp) ; if ( CHV_IS_NONSYMMETRIC(chvI) ) { Chv_rowIndices(chvI, &nrowI, &rowindI) ; IVcopy(nrowI, rowindI, colindI) ; } nentI = Chv_nent(chvI) ; entriesI = Chv_entries(chvI) ; if ( CHV_IS_REAL(chvI) ) { Drand_fillDvector(drand, nentI, entriesI) ; } else if ( CHV_IS_COMPLEX(chvI) ) { Drand_fillDvector(drand, 2*nentI, entriesI) ; } if ( CHV_IS_HERMITIAN(chvI) ) { /* --------------------------------------------------------- hermitian example, set imaginary part of diagonal to zero --------------------------------------------------------- */ for ( irow = 0 ; irow < nDI ; irow++ ) { Chv_complexEntry(chvI, irow, irow, &real, &imag) ; Chv_setComplexEntry(chvI, irow, irow, real, 0.0) ; } } /* -------------------------------------------------- write out the two chevron objects to a matlab file -------------------------------------------------- */ if ( msglvl > 1 ) { fprintf(msgFile, "\n a = zeros(%d,%d) ;", lastcol+1, lastcol+1) ; Chv_writeForMatlab(chvJ, "a", msgFile) ; fprintf(msgFile, "\n b = zeros(%d,%d) ;", lastcol+1, lastcol+1) ; Chv_writeForMatlab(chvI, "b", msgFile) ; } /* --------------------------------------------- assemble the chvI object into the chvJ object --------------------------------------------- */ Chv_assembleChv(chvJ, chvI) ; if ( msglvl > 1 ) { fprintf(msgFile, "\n %% after assembly") ; fprintf(msgFile, "\n c = zeros(%d,%d) ;", lastcol+1, lastcol+1) ; Chv_writeForMatlab(chvJ, "c", msgFile) ; } /* ----------------- compute the error ----------------- */ fprintf(msgFile, "\n max(max(abs(c - (b + a))))") ; /* ------------------------ free the working storage ------------------------ */ Chv_free(chvJ) ; Chv_free(chvI) ; Drand_free(drand) ; fprintf(msgFile, "\n") ; return(1) ; }
/* ----------------------------------------- purpose -- eliminate vertex v 1) create v's boundary list 2) merge boundary list onto reach list 3) for each vertex in the boundary 3.1) add v to the subtree list created -- 96feb25, cca ----------------------------------------- */ void MSMD_eliminateVtx ( MSMD *msmd, MSMDvtx *v, MSMDinfo *info ) { int i, ierr, j, nadj, nbnd, nedge, uid, wid, wght ; int *adj, *bnd, *edges ; IP *ip, *ip2, *prev ; IV *reachIV ; MSMDvtx *u, *w ; /* --------------- check the input --------------- */ if ( msmd == NULL || v == NULL || info == NULL ) { fprintf(stderr, "\n fatal error in MSMD_eliminateVtx(%p,%p,%p)" "\n bad input\n", msmd, v, info) ; exit(-1) ; } adj = IV_entries(&msmd->ivtmpIV) ; reachIV = &msmd->reachIV ; /* ----------------------------- create the boundary set for v ----------------------------- */ v->mark = 'X' ; if ( v->subtrees == NULL ) { /* -------------------------------------------------------- v is a leaf, look at its uncovered edge list, move v and any indistinguishable vertices to the end of the list -------------------------------------------------------- */ if ( info->msglvl > 3 ) { fprintf(info->msgFile, "\n vertex %d is a leaf", v->id) ; fflush(info->msgFile) ; } v->status = 'L' ; nedge = v->nadj ; edges = v->adj ; i = 0 ; j = nedge - 1 ; while ( i <= j ) { wid = edges[i] ; w = msmd->vertices + wid ; if ( w == v || w->status == 'I' ) { edges[i] = edges[j] ; edges[j] = wid ; j-- ; } else { w->mark = 'X' ; i++ ; } } v->nadj = j + 1 ; } else { /* ---------------------------------------------------- v is not a leaf, merge its subtrees' boundaries with its uncovered edge list to get the new boundary ---------------------------------------------------- */ if ( info->msglvl > 3 ) { fprintf(info->msgFile, "\n vertex %d is not a leaf", v->id) ; fprintf(info->msgFile, "\n vertex %d, subtrees :", v->id) ; IP_fp80(info->msgFile, v->subtrees, 20) ; fflush(info->msgFile) ; } v->status = 'E' ; nadj = 0 ; while ( (ip = v->subtrees) != NULL ) { if ( info->msglvl > 3 ) { fprintf(info->msgFile, "\n subtree %d, ip(%p)<%d,%p>", ip->val, ip, ip->val, ip->next) ; fflush(info->msgFile) ; } uid = ip->val ; u = msmd->vertices + uid ; u->par = v ; nbnd = u->nadj ; bnd = u->adj ; if ( info->msglvl > 3 ) { fprintf(info->msgFile, "\n bnd of adj subtree %d :", u->id) ; IVfp80(info->msgFile, nbnd, bnd, 25, &ierr) ; fflush(info->msgFile) ; } for ( i = 0 ; i < nbnd ; i++ ) { wid = bnd[i] ; w = msmd->vertices + wid ; if ( w->mark == 'O' && w->status != 'I' ) { w->mark = 'X' ; adj[nadj++] = wid ; } } if ( u->status == 'E' ) { /* ------------------------------------------ u is not a leaf, free its boundary storage ------------------------------------------ */ IVfree(u->adj) ; info->nbytes -= u->nadj * sizeof(int) ; } u->adj = NULL ; u->nadj = 0 ; /* -------------------------------------- put this IP structure on the free list -------------------------------------- */ v->subtrees = ip->next ; ip->val = -1 ; ip->next = msmd->freeIP ; msmd->freeIP = ip ; if ( info->msglvl > 3 ) { fprintf(info->msgFile, "\n v->subtrees = %p, msmd->freeIP = %p", v->subtrees, msmd->freeIP) ; fflush(info->msgFile) ; } } /* ------------------------------------------------ merge all uncovered edges into the boundary list ------------------------------------------------ */ nedge = v->nadj ; edges = v->adj ; for ( i = 0 ; i < nedge ; i++ ) { wid = edges[i] ; w = msmd->vertices + wid ; if ( w->mark == 'O' && w->status != 'I' ) { w->mark = 'X' ; adj[nadj++] = wid ; } } /* ---------------------------------------------------------- if boundary is not empty, allocate new storage for entries ---------------------------------------------------------- */ v->nadj = nadj ; if ( nadj > 0 ) { v->adj = IVinit(nadj, -1) ; IVcopy(nadj, v->adj, adj) ; info->nbytes += nadj*sizeof(int) ; if ( info->maxnbytes < info->nbytes ) { info->maxnbytes = info->nbytes ; } } else { v->adj = NULL ; } } if ( info->msglvl > 3 ) { fprintf(info->msgFile, "\n bnd(%d) :", v->id) ; if ( v->nadj > 0 ) { IVfp80(info->msgFile, v->nadj, v->adj, 17, &ierr) ; } fflush(info->msgFile) ; } /* ---------------------------------------------- for each boundary vertex 1. add v to subtree list 2. put v on reach set if not already there 3. unmark and add weight to boundary weight ---------------------------------------------- */ nbnd = v->nadj ; bnd = v->adj ; if ( info->msglvl > 3 ) { fprintf(info->msgFile, "\n %d's bnd :", v->id) ; IVfp80(info->msgFile, nbnd, bnd, 12, &ierr) ; fflush(info->msgFile) ; } wght = 0 ; for ( i = 0 ; i < nbnd ; i++ ) { wid = bnd[i] ; w = msmd->vertices + wid ; if ( info->msglvl > 4 ) { fprintf(info->msgFile, "\n adjacent vertex %d", w->id) ; fflush(info->msgFile) ; } /* ------------------------------- add v to the subtree list for w ------------------------------- */ if ( (ip = msmd->freeIP) == NULL ) { if ( info->msglvl > 2 ) { fprintf(info->msgFile, "\n need to get more IP objects") ; fflush(info->msgFile) ; } /* ------------------------------------------------- no more free IP structures, allocate more storage ------------------------------------------------- */ if ( (ip = IP_init(msmd->incrIP, IP_FORWARD)) == NULL ) { fprintf(stderr, "\n fatal error in MSMD_eliminateVtx%p,%p,%p)" "\n unable to allocate more IP objects", msmd, v, info) ; exit(-1) ; } if ( info->msglvl > 4 ) { fprintf(info->msgFile, "\n old baseIP = %p", msmd->baseIP) ; fprintf(info->msgFile, "\n new baseIP = %p", ip) ; fflush(info->msgFile) ; } ip->next = msmd->baseIP ; msmd->baseIP = ip ; info->nbytes += msmd->incrIP*sizeof(struct _IP) ; if ( info->maxnbytes < info->nbytes ) { info->maxnbytes = info->nbytes ; } ip = msmd->freeIP = msmd->baseIP + 1 ; if ( info->msglvl > 2 ) { fprintf(info->msgFile, "\n all set") ; fflush(info->msgFile) ; } } msmd->freeIP = ip->next ; ip->val = v->id ; ip->next = NULL ; for ( ip2 = w->subtrees, prev = NULL ; ip2 != NULL && ip2->val > ip->val ; ip2 = ip2->next ) { prev = ip2 ; } if ( prev == NULL ) { w->subtrees = ip ; } else { prev->next = ip ; } ip->next = ip2 ; if ( info->msglvl > 3 ) { fprintf(info->msgFile, "\n %d's subtrees :", w->id) ; IP_fp80(info->msgFile, w->subtrees, 15) ; fflush(info->msgFile) ; } /* -------------------------------- add w to reach list if necessary -------------------------------- */ if ( info->msglvl > 4 ) { fprintf(info->msgFile, "\n status[%d] = %c", wid, w->status) ; fflush(info->msgFile) ; } switch ( w->status ) { case 'D' : if ( info->msglvl > 4 ) { fprintf(info->msgFile, ", remove from heap") ; fflush(info->msgFile) ; } IIheap_remove(msmd->heap, wid) ; case 'O' : case 'B' : if ( info->msglvl > 4 ) { fprintf(info->msgFile, ", add to reach set, nreach = %d", IV_size(reachIV) + 1) ; fflush(info->msgFile) ; } IV_push(reachIV, wid) ; w->status = 'R' ; case 'R' : break ; case 'I' : break ; default : fprintf(stderr, "\n error in MSMD_eliminateVtx(%p,%p,%p)" "\n status[%d] = '%c'\n", msmd, v, info, wid, w->status) ; fprintf(stderr, "\n msmd->nvtx = %d", msmd->nvtx) ; exit(-1) ; } /* -------------------------------- unmark the boundary vertices and store the weight of the boundary -------------------------------- */ w->mark = 'O' ; wght += w->wght ; } /* ------------------------------------ unmark v and set its boundary weight ------------------------------------ */ v->mark = 'O' ; v->bndwght = wght ; return ; }
/* ------------------------------------------------------- purpose -- merge the front tree allowing a parent to absorb all children when that creates at most maxzeros zero entries inside a front return -- IV object that has the old front to new front map created -- 98jan29, cca ------------------------------------------------------- */ ETree * ETree_mergeFrontsAll ( ETree *etree, int maxzeros, IV *nzerosIV ) { ETree *etree2 ; int cost, J, Jall, K, KandBnd, nfront, nvtx, nnew ; int *bndwghts, *fch, *map, *nodwghts, *nzeros, *rep, *sib, *temp ; IV *mapIV ; Tree *tree ; /* --------------- check the input --------------- */ if ( etree == NULL || nzerosIV == NULL || (nfront = etree->nfront) <= 0 || (nvtx = etree->nvtx) <= 0 ) { fprintf(stderr, "\n fatal error in ETree_mergeFrontsAll(%p,%d,%p)" "\n bad input\n", etree, maxzeros, nzerosIV) ; if ( etree != NULL ) { fprintf(stderr, "\n nfront = %d, nvtx = %d", etree->nfront, etree->nvtx) ; } spoolesFatal(); } if ( IV_size(nzerosIV) != nfront ) { fprintf(stderr, "\n fatal error in ETree_mergeFrontsAll(%p,%d,%p)" "\n size(nzerosIV) = %d, nfront = %d\n", etree, maxzeros, nzerosIV, IV_size(nzerosIV), nfront) ; spoolesFatal(); } nzeros = IV_entries(nzerosIV) ; /* ---------------------- set up working storage ---------------------- */ tree = etree->tree ; fch = ETree_fch(etree) ; sib = ETree_sib(etree) ; nodwghts = IVinit(nfront, 0) ; IVcopy(nfront, nodwghts, ETree_nodwghts(etree)) ; bndwghts = ETree_bndwghts(etree) ; rep = IVinit(nfront, -1) ; IVramp(nfront, rep, 0, 1) ; /* ------------------------------------------ perform a post-order traversal of the tree ------------------------------------------ */ for ( K = Tree_postOTfirst(tree) ; K != -1 ; K = Tree_postOTnext(tree, K) ) { #if MYDEBUG > 0 fprintf(stdout, "\n\n ##### visiting front %d", K) ; fflush(stdout) ; #endif if ( (J = fch[K]) != -1 ) { KandBnd = nodwghts[K] + bndwghts[K] ; Jall = 0 ; cost = 2*nzeros[K] ; for ( J = fch[K] ; J != -1 ; J = sib[J] ) { Jall += nodwghts[J] ; cost -= nodwghts[J]*nodwghts[J] ; cost += 2*nodwghts[J]*(KandBnd - bndwghts[J]) ; cost += 2*nzeros[J] ; } cost += Jall*Jall ; cost = cost/2 ; #if MYDEBUG > 0 fprintf(stdout, "\n cost = %d", cost) ; fflush(stdout) ; #endif if ( cost <= maxzeros ) { for ( J = fch[K] ; J != -1 ; J = sib[J] ) { #if MYDEBUG > 0 fprintf(stdout, "\n merging %d into %d", J, K) ; fflush(stdout) ; #endif rep[J] = K ; nodwghts[K] += nodwghts[J] ; } nzeros[K] = cost ; } } } #if MYDEBUG > 0 fprintf(stdout, "\n\n whoa, finished") ; fflush(stdout) ; #endif /* ------------------------------------------------- take the map from fronts to representative fronts and make the map from old fronts to new fronts ------------------------------------------------- */ mapIV = IV_new() ; IV_init(mapIV, nfront, NULL) ; map = IV_entries(mapIV) ; for ( J = 0, nnew = 0 ; J < nfront ; J++ ) { if ( rep[J] == J ) { map[J] = nnew++ ; } else { K = J ; while ( rep[K] != K ) { K = rep[K] ; } rep[J] = K ; } } for ( J = 0 ; J < nfront ; J++ ) { if ( (K = rep[J]) != J ) { map[J] = map[K] ; } } /* ------------------------------- get the compressed ETree object ------------------------------- */ etree2 = ETree_compress(etree, mapIV) ; /* ------------------------- remap the nzeros[] vector ------------------------- */ temp = IVinit(nfront, 0) ; IVcopy(nfront, temp, nzeros) ; IV_setSize(nzerosIV, nnew) ; nzeros = IV_entries(nzerosIV) ; for ( J = 0 ; J < nfront ; J++ ) { if ( rep[J] == J ) { nzeros[map[J]] = temp[J] ; } } IVfree(temp) ; /* ------------------------ free the working storage ------------------------ */ IVfree(nodwghts) ; IVfree(rep) ; IV_free(mapIV) ; return(etree2) ; }
PetscErrorCode MatFactorNumeric_SeqSpooles(Mat F,Mat A,const MatFactorInfo *info) { Mat_Spooles *lu = (Mat_Spooles*)(F)->spptr; ChvManager *chvmanager ; Chv *rootchv ; IVL *adjIVL; PetscErrorCode ierr; PetscInt nz,nrow=A->rmap->n,irow,nedges,neqns=A->cmap->n,*ai,*aj,i,*diag=0,fierr; PetscScalar *av; double cputotal,facops; #if defined(PETSC_USE_COMPLEX) PetscInt nz_row,*aj_tmp; PetscScalar *av_tmp; #else PetscInt *ivec1,*ivec2,j; double *dvec; #endif PetscBool isSeqAIJ,isMPIAIJ; PetscFunctionBegin; if (lu->flg == DIFFERENT_NONZERO_PATTERN) { /* first numeric factorization */ (F)->ops->solve = MatSolve_SeqSpooles; (F)->assembled = PETSC_TRUE; /* set Spooles options */ ierr = SetSpoolesOptions(A, &lu->options);CHKERRQ(ierr); lu->mtxA = InpMtx_new(); } /* copy A to Spooles' InpMtx object */ ierr = PetscObjectTypeCompare((PetscObject)A,MATSEQAIJ,&isSeqAIJ);CHKERRQ(ierr); ierr = PetscObjectTypeCompare((PetscObject)A,MATSEQAIJ,&isMPIAIJ);CHKERRQ(ierr); if (isSeqAIJ){ Mat_SeqAIJ *mat = (Mat_SeqAIJ*)A->data; ai=mat->i; aj=mat->j; av=mat->a; if (lu->options.symflag == SPOOLES_NONSYMMETRIC) { nz=mat->nz; } else { /* SPOOLES_SYMMETRIC || SPOOLES_HERMITIAN */ nz=(mat->nz + A->rmap->n)/2; diag=mat->diag; } } else { /* A is SBAIJ */ Mat_SeqSBAIJ *mat = (Mat_SeqSBAIJ*)A->data; ai=mat->i; aj=mat->j; av=mat->a; nz=mat->nz; } InpMtx_init(lu->mtxA, INPMTX_BY_ROWS, lu->options.typeflag, nz, 0); #if defined(PETSC_USE_COMPLEX) for (irow=0; irow<nrow; irow++) { if ( lu->options.symflag == SPOOLES_NONSYMMETRIC || !(isSeqAIJ || isMPIAIJ)){ nz_row = ai[irow+1] - ai[irow]; aj_tmp = aj + ai[irow]; av_tmp = av + ai[irow]; } else { nz_row = ai[irow+1] - diag[irow]; aj_tmp = aj + diag[irow]; av_tmp = av + diag[irow]; } for (i=0; i<nz_row; i++){ InpMtx_inputComplexEntry(lu->mtxA, irow, *aj_tmp++,PetscRealPart(*av_tmp),PetscImaginaryPart(*av_tmp)); av_tmp++; } } #else ivec1 = InpMtx_ivec1(lu->mtxA); ivec2 = InpMtx_ivec2(lu->mtxA); dvec = InpMtx_dvec(lu->mtxA); if ( lu->options.symflag == SPOOLES_NONSYMMETRIC || !isSeqAIJ){ for (irow = 0; irow < nrow; irow++){ for (i = ai[irow]; i<ai[irow+1]; i++) ivec1[i] = irow; } IVcopy(nz, ivec2, aj); DVcopy(nz, dvec, av); } else { nz = 0; for (irow = 0; irow < nrow; irow++){ for (j = diag[irow]; j<ai[irow+1]; j++) { ivec1[nz] = irow; ivec2[nz] = aj[j]; dvec[nz] = av[j]; nz++; } } } InpMtx_inputRealTriples(lu->mtxA, nz, ivec1, ivec2, dvec); #endif InpMtx_changeStorageMode(lu->mtxA, INPMTX_BY_VECTORS); if ( lu->options.msglvl > 0 ) { int err; printf("\n\n input matrix"); ierr = PetscFPrintf(PETSC_COMM_SELF,lu->options.msgFile, "\n\n input matrix");CHKERRQ(ierr); InpMtx_writeForHumanEye(lu->mtxA, lu->options.msgFile); err = fflush(lu->options.msgFile); if (err) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SYS,"fflush() failed on file"); } if ( lu->flg == DIFFERENT_NONZERO_PATTERN){ /* first numeric factorization */ /*--------------------------------------------------- find a low-fill ordering (1) create the Graph object (2) order the graph -------------------------------------------------------*/ if (lu->options.useQR){ adjIVL = InpMtx_adjForATA(lu->mtxA); } else { adjIVL = InpMtx_fullAdjacency(lu->mtxA); } nedges = IVL_tsize(adjIVL); lu->graph = Graph_new(); Graph_init2(lu->graph, 0, neqns, 0, nedges, neqns, nedges, adjIVL, NULL, NULL); if ( lu->options.msglvl > 2 ) { int err; if (lu->options.useQR){ ierr = PetscFPrintf(PETSC_COMM_SELF,lu->options.msgFile, "\n\n graph of A^T A");CHKERRQ(ierr); } else { ierr = PetscFPrintf(PETSC_COMM_SELF,lu->options.msgFile, "\n\n graph of the input matrix");CHKERRQ(ierr); } Graph_writeForHumanEye(lu->graph, lu->options.msgFile); err = fflush(lu->options.msgFile); if (err) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SYS,"fflush() failed on file"); } switch (lu->options.ordering) { case 0: lu->frontETree = orderViaBestOfNDandMS(lu->graph, lu->options.maxdomainsize, lu->options.maxzeros, lu->options.maxsize, lu->options.seed, lu->options.msglvl, lu->options.msgFile); break; case 1: lu->frontETree = orderViaMMD(lu->graph,lu->options.seed,lu->options.msglvl,lu->options.msgFile); break; case 2: lu->frontETree = orderViaMS(lu->graph, lu->options.maxdomainsize, lu->options.seed,lu->options.msglvl,lu->options.msgFile); break; case 3: lu->frontETree = orderViaND(lu->graph, lu->options.maxdomainsize, lu->options.seed,lu->options.msglvl,lu->options.msgFile); break; default: SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONG,"Unknown Spooles's ordering"); } if ( lu->options.msglvl > 0 ) { int err; ierr = PetscFPrintf(PETSC_COMM_SELF,lu->options.msgFile, "\n\n front tree from ordering");CHKERRQ(ierr); ETree_writeForHumanEye(lu->frontETree, lu->options.msgFile); err = fflush(lu->options.msgFile); if (err) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SYS,"fflush() failed on file"); } /* get the permutation, permute the front tree */ lu->oldToNewIV = ETree_oldToNewVtxPerm(lu->frontETree); lu->oldToNew = IV_entries(lu->oldToNewIV); lu->newToOldIV = ETree_newToOldVtxPerm(lu->frontETree); if (!lu->options.useQR) ETree_permuteVertices(lu->frontETree, lu->oldToNewIV); /* permute the matrix */ if (lu->options.useQR){ InpMtx_permute(lu->mtxA, NULL, lu->oldToNew); } else { InpMtx_permute(lu->mtxA, lu->oldToNew, lu->oldToNew); if ( lu->options.symflag == SPOOLES_SYMMETRIC) { InpMtx_mapToUpperTriangle(lu->mtxA); } #if defined(PETSC_USE_COMPLEX) if ( lu->options.symflag == SPOOLES_HERMITIAN ) { InpMtx_mapToUpperTriangleH(lu->mtxA); } #endif InpMtx_changeCoordType(lu->mtxA, INPMTX_BY_CHEVRONS); } InpMtx_changeStorageMode(lu->mtxA, INPMTX_BY_VECTORS); /* get symbolic factorization */ if (lu->options.useQR){ lu->symbfacIVL = SymbFac_initFromGraph(lu->frontETree, lu->graph); IVL_overwrite(lu->symbfacIVL, lu->oldToNewIV); IVL_sortUp(lu->symbfacIVL); ETree_permuteVertices(lu->frontETree, lu->oldToNewIV); } else { lu->symbfacIVL = SymbFac_initFromInpMtx(lu->frontETree, lu->mtxA); } if ( lu->options.msglvl > 2 ) { int err; ierr = PetscFPrintf(PETSC_COMM_SELF,lu->options.msgFile, "\n\n old-to-new permutation vector");CHKERRQ(ierr); IV_writeForHumanEye(lu->oldToNewIV, lu->options.msgFile); ierr = PetscFPrintf(PETSC_COMM_SELF,lu->options.msgFile, "\n\n new-to-old permutation vector");CHKERRQ(ierr); IV_writeForHumanEye(lu->newToOldIV, lu->options.msgFile); ierr = PetscFPrintf(PETSC_COMM_SELF,lu->options.msgFile, "\n\n front tree after permutation");CHKERRQ(ierr); ETree_writeForHumanEye(lu->frontETree, lu->options.msgFile); ierr = PetscFPrintf(PETSC_COMM_SELF,lu->options.msgFile, "\n\n input matrix after permutation");CHKERRQ(ierr); InpMtx_writeForHumanEye(lu->mtxA, lu->options.msgFile); ierr = PetscFPrintf(PETSC_COMM_SELF,lu->options.msgFile, "\n\n symbolic factorization");CHKERRQ(ierr); IVL_writeForHumanEye(lu->symbfacIVL, lu->options.msgFile); err = fflush(lu->options.msgFile); if (err) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SYS,"fflush() failed on file"); } lu->frontmtx = FrontMtx_new(); lu->mtxmanager = SubMtxManager_new(); SubMtxManager_init(lu->mtxmanager, NO_LOCK, 0); } else { /* new num factorization using previously computed symbolic factor */ if (lu->options.pivotingflag) { /* different FrontMtx is required */ FrontMtx_free(lu->frontmtx); lu->frontmtx = FrontMtx_new(); } else { FrontMtx_clearData (lu->frontmtx); } SubMtxManager_free(lu->mtxmanager); lu->mtxmanager = SubMtxManager_new(); SubMtxManager_init(lu->mtxmanager, NO_LOCK, 0); /* permute mtxA */ if (lu->options.useQR){ InpMtx_permute(lu->mtxA, NULL, lu->oldToNew); } else { InpMtx_permute(lu->mtxA, lu->oldToNew, lu->oldToNew); if ( lu->options.symflag == SPOOLES_SYMMETRIC ) { InpMtx_mapToUpperTriangle(lu->mtxA); } InpMtx_changeCoordType(lu->mtxA, INPMTX_BY_CHEVRONS); } InpMtx_changeStorageMode(lu->mtxA, INPMTX_BY_VECTORS); if ( lu->options.msglvl > 2 ) { ierr = PetscFPrintf(PETSC_COMM_SELF,lu->options.msgFile, "\n\n input matrix after permutation");CHKERRQ(ierr); InpMtx_writeForHumanEye(lu->mtxA, lu->options.msgFile); } } /* end of if( lu->flg == DIFFERENT_NONZERO_PATTERN) */ if (lu->options.useQR){ FrontMtx_init(lu->frontmtx, lu->frontETree, lu->symbfacIVL, lu->options.typeflag, SPOOLES_SYMMETRIC, FRONTMTX_DENSE_FRONTS, SPOOLES_NO_PIVOTING, NO_LOCK, 0, NULL, lu->mtxmanager, lu->options.msglvl, lu->options.msgFile); } else { FrontMtx_init(lu->frontmtx, lu->frontETree, lu->symbfacIVL, lu->options.typeflag, lu->options.symflag, FRONTMTX_DENSE_FRONTS, lu->options.pivotingflag, NO_LOCK, 0, NULL, lu->mtxmanager, lu->options.msglvl, lu->options.msgFile); } if ( lu->options.symflag == SPOOLES_SYMMETRIC ) { /* || SPOOLES_HERMITIAN ? */ if ( lu->options.patchAndGoFlag == 1 ) { lu->frontmtx->patchinfo = PatchAndGoInfo_new(); PatchAndGoInfo_init(lu->frontmtx->patchinfo, 1, lu->options.toosmall, lu->options.fudge, lu->options.storeids, lu->options.storevalues); } else if ( lu->options.patchAndGoFlag == 2 ) { lu->frontmtx->patchinfo = PatchAndGoInfo_new(); PatchAndGoInfo_init(lu->frontmtx->patchinfo, 2, lu->options.toosmall, lu->options.fudge, lu->options.storeids, lu->options.storevalues); } } /* numerical factorization */ chvmanager = ChvManager_new(); ChvManager_init(chvmanager, NO_LOCK, 1); DVfill(10, lu->cpus, 0.0); if (lu->options.useQR){ facops = 0.0 ; FrontMtx_QR_factor(lu->frontmtx, lu->mtxA, chvmanager, lu->cpus, &facops, lu->options.msglvl, lu->options.msgFile); if ( lu->options.msglvl > 1 ) { ierr = PetscFPrintf(PETSC_COMM_SELF,lu->options.msgFile, "\n\n factor matrix");CHKERRQ(ierr); ierr = PetscFPrintf(PETSC_COMM_SELF,lu->options.msgFile, "\n facops = %9.2f", facops);CHKERRQ(ierr); } } else { IVfill(20, lu->stats, 0); rootchv = FrontMtx_factorInpMtx(lu->frontmtx, lu->mtxA, lu->options.tau, 0.0, chvmanager, &fierr, lu->cpus,lu->stats,lu->options.msglvl,lu->options.msgFile); if (rootchv) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_MAT_LU_ZRPVT,"\n matrix found to be singular"); if (fierr >= 0) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_LIB,"\n error encountered at front %D", fierr); if(lu->options.FrontMtxInfo){ ierr = PetscPrintf(PETSC_COMM_SELF,"\n %8d pivots, %8d pivot tests, %8d delayed rows and columns\n",lu->stats[0], lu->stats[1], lu->stats[2]);CHKERRQ(ierr); cputotal = lu->cpus[8] ; if ( cputotal > 0.0 ) { ierr = PetscPrintf(PETSC_COMM_SELF, "\n cpus cpus/totaltime" "\n initialize fronts %8.3f %6.2f" "\n load original entries %8.3f %6.2f" "\n update fronts %8.3f %6.2f" "\n assemble postponed data %8.3f %6.2f" "\n factor fronts %8.3f %6.2f" "\n extract postponed data %8.3f %6.2f" "\n store factor entries %8.3f %6.2f" "\n miscellaneous %8.3f %6.2f" "\n total time %8.3f \n", lu->cpus[0], 100.*lu->cpus[0]/cputotal, lu->cpus[1], 100.*lu->cpus[1]/cputotal, lu->cpus[2], 100.*lu->cpus[2]/cputotal, lu->cpus[3], 100.*lu->cpus[3]/cputotal, lu->cpus[4], 100.*lu->cpus[4]/cputotal, lu->cpus[5], 100.*lu->cpus[5]/cputotal, lu->cpus[6], 100.*lu->cpus[6]/cputotal, lu->cpus[7], 100.*lu->cpus[7]/cputotal, cputotal);CHKERRQ(ierr); } } } ChvManager_free(chvmanager); if ( lu->options.msglvl > 0 ) { int err; ierr = PetscFPrintf(PETSC_COMM_SELF,lu->options.msgFile, "\n\n factor matrix");CHKERRQ(ierr); FrontMtx_writeForHumanEye(lu->frontmtx, lu->options.msgFile); err = fflush(lu->options.msgFile); if (err) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SYS,"fflush() failed on file"); } if ( lu->options.symflag == SPOOLES_SYMMETRIC ) { /* || SPOOLES_HERMITIAN ? */ if ( lu->options.patchAndGoFlag == 1 ) { if ( lu->frontmtx->patchinfo->fudgeIV != NULL ) { if (lu->options.msglvl > 0 ){ ierr = PetscFPrintf(PETSC_COMM_SELF,lu->options.msgFile, "\n small pivots found at these locations");CHKERRQ(ierr); IV_writeForHumanEye(lu->frontmtx->patchinfo->fudgeIV, lu->options.msgFile); } } PatchAndGoInfo_free(lu->frontmtx->patchinfo); } else if ( lu->options.patchAndGoFlag == 2 ) { if (lu->options.msglvl > 0 ){ if ( lu->frontmtx->patchinfo->fudgeIV != NULL ) { ierr = PetscFPrintf(PETSC_COMM_SELF,lu->options.msgFile, "\n small pivots found at these locations");CHKERRQ(ierr); IV_writeForHumanEye(lu->frontmtx->patchinfo->fudgeIV, lu->options.msgFile); } if ( lu->frontmtx->patchinfo->fudgeDV != NULL ) { ierr = PetscFPrintf(PETSC_COMM_SELF,lu->options.msgFile, "\n perturbations");CHKERRQ(ierr); DV_writeForHumanEye(lu->frontmtx->patchinfo->fudgeDV, lu->options.msgFile); } } PatchAndGoInfo_free(lu->frontmtx->patchinfo); } } /* post-process the factorization */ FrontMtx_postProcess(lu->frontmtx, lu->options.msglvl, lu->options.msgFile); if ( lu->options.msglvl > 2 ) { int err; ierr = PetscFPrintf(PETSC_COMM_SELF,lu->options.msgFile, "\n\n factor matrix after post-processing");CHKERRQ(ierr); FrontMtx_writeForHumanEye(lu->frontmtx, lu->options.msgFile); err = fflush(lu->options.msgFile); if (err) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SYS,"fflush() failed on file"); } lu->flg = SAME_NONZERO_PATTERN; lu->CleanUpSpooles = PETSC_TRUE; PetscFunctionReturn(0); }
/* ------------------------------------ resize the deque if the new size is large enough then copy the old data return 1 else error, return -1 endif created -- 96jun06, cca ------------------------------------ */ int Ideq_resize ( Ideq *deq, int newsize ) { int count, head, j, size, tail ; int *ivec, *tmp ; /* --------------- check the input --------------- */ if ( deq == NULL || newsize < 0 ) { fprintf(stderr, "\n fatal error in Ideq_resize(%p,%d)" "\n bad input\n", deq, newsize) ; exit(-1) ; } /* --------------------------------------- check that the new size is large enough --------------------------------------- */ if ( deq->tail >= deq->head ) { size = deq->tail - deq->head + 1 ; } else { size = deq->tail + deq->iv.size - deq->head + 1 ; } if ( size > newsize ) { return(-1) ; } /* --------------------- create the new vector --------------------- */ tmp = IVinit(size, -1) ; if ( (j = deq->head) != -1 ) { ivec = deq->iv.vec ; count = 0 ; while ( j != deq->tail ) { tmp[count++] = ivec[j] ; if ( j == size - 1 ) { j = 0 ; } else { j++ ; } } tmp[count++] = ivec[j] ; head = 0 ; tail = count - 1 ; } else { head = tail = -1 ; } Ideq_clearData(deq) ; IV_init(&deq->iv, newsize, NULL) ; if ( size > 0 ) { IVcopy(size, deq->iv.vec, tmp) ; } IVfree(tmp) ; deq->head = head ; deq->tail = tail ; deq->maxsize = newsize ; return(1) ; }