/* ------------------------------------------------------------- purpose -- the IVL object ivl and IV object ownersIV are both found on each process. the ownersIV object is identical over all the processes, and owners[ii] tells which processes owns list ii of the ivl object. on return from this method, the ivl object is replicated over all the processes. each process sends the lists that it owns to all the other processes. created -- 98apr03, cca ------------------------------------------------------------- */ void IVL_MPI_allgather ( IVL *ivl, IV *ownersIV, int stats[], int msglvl, FILE *msgFile, int firsttag, MPI_Comm comm ) { int count, destination, ii, ilist, incount, jlist, jproc, left, maxcount, myid, nlist, nmylists, notherlists, nowners, nproc, offset, outcount, right, size, source, tag ; int *counts, *inbuffer, *list, *outbuffer, *owners ; MPI_Status status ; /* --------------- check the input --------------- */ if ( ivl == NULL || ownersIV == NULL ) { fprintf(stderr, "\n fatal error in IVL_MPI_allgather()" "\n ivl = %p, ownersIV = %p\n", ivl, ownersIV) ; exit(-1) ; } /* ---------------------------------------------- get id of self, # of processes and # of fronts ---------------------------------------------- */ MPI_Comm_rank(comm, &myid) ; MPI_Comm_size(comm, &nproc) ; nlist = ivl->nlist ; IV_sizeAndEntries(ownersIV, &nowners, &owners) ; if ( msglvl > 1 ) { fprintf(msgFile, "\n\n inside IVL_MPI_allgather()" "\n nproc = %d, myid = %d, nlist = %d, nowners = %d", nproc, myid, nlist, nowners) ; fflush(msgFile) ; } if ( nlist != nowners || owners == NULL ) { fprintf(stderr, "\n fatal error in IVL_MPI_allgather()" "\n nlist = %d, nowners = %d, owners = %p\n", nlist, nowners, owners) ; exit(-1) ; } if ( msglvl > 2 ) { fprintf(msgFile, "\n\n ivl") ; IVL_writeForHumanEye(ivl, msgFile) ; fprintf(msgFile, "\n\n ownersIV") ; IV_writeForHumanEye(ownersIV, msgFile) ; fflush(msgFile) ; } /* ----------------------------------------------- step 1 : determine the size of the message that this process will send to the others ----------------------------------------------- */ for ( ilist = 0, outcount = 1 ; ilist < nlist ; ilist++ ) { if ( owners[ilist] < 0 || owners[ilist] >= nproc ) { fprintf(stderr, "\n owners[%d] = %d", ilist, owners[ilist]) ; exit(-1) ; } if ( owners[ilist] == myid ) { outcount += 2 ; IVL_listAndSize(ivl, ilist, &size, &list) ; outcount += size ; } } if ( msglvl > 2 ) { fprintf(msgFile, "\n\n outcount = %d", outcount) ; fflush(msgFile) ; } /* ---------------------------------------------------- do an all-to-all gather/scatter counts[jproc] = # of int's in the message from jproc ---------------------------------------------------- */ counts = IVinit(nproc, 0) ; counts[myid] = outcount ; MPI_Allgather((void *) &counts[myid], 1, MPI_INT, (void *) counts, 1, MPI_INT, comm) ; if ( msglvl > 1 ) { fprintf(msgFile, "\n\n counts") ; IVfprintf(msgFile, nproc, counts) ; fflush(msgFile) ; } /* ----------------------------- set up the in and out buffers ----------------------------- */ if ( outcount > 0 ) { outbuffer = IVinit(outcount, -1) ; for ( ilist = nmylists = 0, ii = 1 ; ilist < nlist ; ilist++ ) { if ( owners[ilist] == myid ) { nmylists++ ; IVL_listAndSize(ivl, ilist, &size, &list) ; outbuffer[ii++] = ilist ; outbuffer[ii++] = size ; if ( size > 0 ) { IVcopy(size, &outbuffer[ii], list) ; ii += size ; } } } outbuffer[0] = nmylists ; if ( ii != outcount ) { fprintf(stderr, "\n myid = %d, ii = %d, outcount = %d", myid, ii, outcount) ; fprintf(msgFile, "\n myid = %d, ii = %d, outcount = %d", myid, ii, outcount) ; exit(-1) ; } } else { outbuffer = NULL ; } maxcount = IVmax(nproc, counts, &jproc) ; if ( maxcount > 0 ) { inbuffer = IVinit(maxcount, -1) ; } else { inbuffer = NULL ; } if ( msglvl > 2 ) { fprintf(msgFile, "\n outbuffer %p, maxcount %d, inbuffer %p", outbuffer, maxcount, inbuffer) ; fflush(msgFile) ; } /* ------------------------------------- step 2: loop over the other processes send and receive information ------------------------------------- */ for ( offset = 1, tag = firsttag ; offset < nproc ; offset++, tag++ ) { right = (myid + offset) % nproc ; if ( offset <= myid ) { left = myid - offset ; } else { left = nproc + myid - offset ; } if ( outcount > 0 ) { destination = right ; stats[0]++ ; stats[2] += outcount*sizeof(int) ; } else { destination = MPI_PROC_NULL ; } incount = counts[left] ; if ( incount > 0 ) { source = left ; stats[1]++ ; stats[3] += incount*sizeof(int) ; } else { source = MPI_PROC_NULL ; } if ( msglvl > 2 ) { fprintf(msgFile, "\n offset %d, source %d, destination %d", offset, source, destination) ; fflush(msgFile) ; } /* ----------------- do a send/receive ----------------- */ MPI_Sendrecv(outbuffer, outcount, MPI_INT, destination, tag, inbuffer, incount, MPI_INT, source, tag, comm, &status) ; if ( source != MPI_PROC_NULL ) { MPI_Get_count(&status, MPI_INT, &count) ; if ( count != incount ) { fprintf(stderr, "\n 1. fatal error in IVL_MPI_allgather()" "\n proc %d : source = %d, count = %d, incount = %d\n", myid, source, count, incount) ; exit(-1) ; } } /* ---------------------------- set the values in the vector ---------------------------- */ notherlists = inbuffer[0] ; for ( ilist = 0, ii = 1 ; ilist < notherlists ; ilist++ ) { jlist = inbuffer[ii++] ; size = inbuffer[ii++] ; if ( size > 0 ) { IVL_setList(ivl, jlist, size, &inbuffer[ii]) ; ii += size ; } } if ( ii != incount ) { fprintf(msgFile, "\n ii = %d, incount = %d", ii, incount) ; fprintf(stderr, "\n ii = %d, incount = %d", ii, incount) ; exit(-1) ; } if ( msglvl > 2 ) { fprintf(msgFile, "\n after setting values") ; IVL_writeForHumanEye(ivl, msgFile) ; fflush(msgFile) ; } } /* ------------------------ free the working storage ------------------------ */ IVfree(counts) ; if ( outbuffer != NULL ) { IVfree(outbuffer) ; } if ( inbuffer != NULL ) { IVfree(inbuffer) ; } if ( msglvl > 2 ) { fprintf(msgFile, "\n\n leaving IVL_MPI_gatherall()") ; fflush(msgFile) ; } return ; }
/* ------------------------------------------------------------ compute an old-to-new ordering for local nested dissection in three dimensions n1 -- number of grid points in first direction n2 -- number of grid points in second direction n3 -- number of grid points in third direction p1 -- number of domains in first direction p2 -- number of domains in second direction p3 -- number of domains in third direction dsizes1 -- domain sizes in first direction, size p1 if NULL, then we construct our own dsizes2 -- domain sizes in second direction, size p2 if NULL, then we construct our own dsizes3 -- domain sizes in third direction, size p3 if NULL, then we construct our own oldToNew -- old-to-new permutation vector note : the following must hold n1 > 0, n2 >0, n3 > 0, n1 >= 2*p1 - 1, n2 >= 2*p2 - 1, n3 >= 2*p3 - 1, p3 > 1 sum(dsizes1) = n1 - p1 + 1, sum(dsizes2) = n2 - p2 + 1 sum(dsizes3) = n3 - p3 + 1 created -- 95nov16, cca ------------------------------------------------------------ */ void localND3D ( int n1, int n2, int n3, int p1, int p2, int p3, int dsizes1[], int dsizes2[], int dsizes3[], int oldToNew[] ) { int i, idom, ijk, isw, j, jdom, jsw, k, kdom, ksw, length1, length2, length3, m, m1, m2, m3, msize, now, nvtx ; int *length1s, *length2s, *length3s, *isws, *jsws, *ksws, *temp ; /* --------------- check the input --------------- */ if ( n1 <= 0 || n2 <= 0 || n3 <= 0 || 2*p1 - 1 > n1 || 2*p2 - 1 > n2 || 2*p3 - 1 > n3 ) { fprintf(stderr, "\n error in input data") ; return ; } if ( p3 <= 1 ) { fprintf(stderr, "\n p3 must be > 1") ; return ; } if ( oldToNew == NULL ) { fprintf(stderr, "\n oldToNew = NULL") ; return ; } if ( dsizes1 != NULL && IVsum(p1, dsizes1) != n1 - p1 + 1 ) { fprintf(stderr, "\n IVsum(p1, dsizes1) != n1 - p1 + 1 ") ; return ; } if ( dsizes2 != NULL && IVsum(p2, dsizes2) != n2 - p2 + 1 ) { fprintf(stderr, "\n IVsum(p2, dsizes2) != n2 - p2 + 1 ") ; return ; } if ( dsizes3 != NULL && IVsum(p3, dsizes3) != n3 - p3 + 1 ) { fprintf(stderr, "\n IVsum(p3, dsizes3) != n3 - p3 + 1 ") ; return ; } if ( dsizes1 != NULL && IVmin(p1, dsizes1, &i) <= 0 ) { fprintf(stderr, "\n IVmin(p1, dsizes1) must be > 0") ; return ; } if ( dsizes2 != NULL && IVmin(p2, dsizes2, &i) <= 0 ) { fprintf(stderr, "\n IVmin(p2, dsizes2) must be > 0") ; return ; } if ( dsizes3 != NULL && IVmin(p3, dsizes3, &i) <= 0 ) { fprintf(stderr, "\n IVmin(p3, dsizes3) must be > 0") ; return ; } nvtx = n1*n2*n3 ; /* ---------------------------------- construct the domain sizes vectors ---------------------------------- */ if ( dsizes1 == NULL ) { length1s = IVinit(p1, 0) ; length1 = (n1 - p1 + 1) / p1 ; m1 = (n1 - p1 + 1) % p1 ; for ( i = 0 ; i < m1 ; i++ ) { length1s[i] = length1 + 1 ; } for ( ; i < p1 ; i++ ) { length1s[i] = length1 ; } } else { length1s = dsizes1 ; } if ( dsizes2 == NULL ) { length2s = IVinit(p2, 0) ; length2 = (n2 - p2 + 1) / p2 ; m2 = (n2 - p2 + 1) % p2 ; for ( i = 0 ; i < m2 ; i++ ) { length2s[i] = length2 + 1 ; } for ( ; i < p2 ; i++ ) { length2s[i] = length2 ; } } else { length2s = dsizes2 ; } if ( dsizes3 == NULL ) { length3s = IVinit(p3, 0) ; length3 = (n3 - p3 + 1) / p3 ; m3 = (n3 - p3 + 1) % p3 ; for ( i = 0 ; i < m3 ; i++ ) { length3s[i] = length3 + 1 ; } for ( ; i < p3 ; i++ ) { length3s[i] = length3 ; } } else { length3s = dsizes3 ; } #if MYDEBUG > 0 fprintf(stdout, "\n inside localND3D") ; fprintf(stdout, "\n n1 = %d, n2 = %d, n3 = %d, p1 = %d, p2 = %dm p3 = %d", n1, n2, n3, p1, p2, p3) ; fprintf(stdout, "\n length1s[%d] = ", p1) ; IVfp80(stdout, p1, length1s, 12) ; fprintf(stdout, "\n length2s[%d] = ", p2) ; IVfp80(stdout, p2, length2s, 12) ; fprintf(stdout, "\n length3s[%d] = ", p3) ; IVfp80(stdout, p3, length3s, 13) ; #endif /* --------------------------------------- determine the first and last domain ids and the array of southwest points --------------------------------------- */ isws = IVinit(p1, -1) ; for ( idom = 0, isw = 0 ; idom < p1 ; idom++ ) { isws[idom] = isw ; isw += length1s[idom] + 1 ; } jsws = IVinit(p2, -1) ; for ( jdom = 0, jsw = 0 ; jdom < p2 ; jdom++ ) { jsws[jdom] = jsw ; jsw += length2s[jdom] + 1 ; } ksws = IVinit(p3, -1) ; for ( kdom = 0, ksw = 0 ; kdom < p3 ; kdom++ ) { ksws[kdom] = ksw ; ksw += length3s[kdom] + 1 ; } #if MYDEBUG > 1 fprintf(stdout, "\n isws[%d] = ", p1) ; IVfp80(stdout, p1, isws, 12) ; fprintf(stdout, "\n jsws[%d] = ", p2) ; IVfp80(stdout, p2, jsws, 12) ; fprintf(stdout, "\n ksws[%d] = ", p3) ; IVfp80(stdout, p3, ksws, 12) ; #endif /* ---------------------------------------------------------------- create a temporary permutation vector for the domains' orderings ---------------------------------------------------------------- */ msize = IVmax(p1, length1s, &i) * IVmax(p2, length2s, &i) * IVmax(p3, length3s, &k) ; temp = IVinit(msize, -1) ; /* ------------------------ fill in the domain nodes ------------------------ */ now = 0 ; for ( kdom = 0 ; kdom < p3 ; kdom++ ) { ksw = ksws[kdom] ; length3 = length3s[kdom] ; for ( jdom = 0 ; jdom < p2 ; jdom++ ) { jsw = jsws[jdom] ; length2 = length2s[jdom] ; for ( idom = 0 ; idom < p1 ; idom++ ) { isw = isws[idom] ; length1 = length1s[idom] ; /* fprintf(stdout, "\n domain (%d,%d,%d), size %d x %d x %d", idom, jdom, kdom, length1, length2, length3) ; fprintf(stdout, "\n (isw, jsw, ksw) = (%d, %d, %d)", isw, jsw, ksw) ; */ mkNDperm(length1, length2, length3, temp, 0, length1-1, 0, length2-1, 0, length3-1) ; for ( m = 0 ; m < length1*length2*length3 ; m++ ) { ijk = temp[m] ; /* fprintf(stdout, "\n m = %d, ijk = %d", m, ijk) ; */ k = ksw + ijk / (length1*length2) ; ijk = ijk % (length1*length2) ; j = jsw + ijk / length1 ; i = isw + ijk % length1 ; /* fprintf(stdout, ", (i, j, k) = (%d, %d, %d)", i, j, k) ; */ ijk = i + j*n1 + k*n1*n2 ; oldToNew[ijk] = now++ ; } } } } #if MYDEBUG > 2 fprintf(stdout, "\n old-to-new after domains are numbered") ; fp3DGrid(n1, n2, n3, oldToNew, stdout) ; #endif /* --------------------------------- fill in the lower separator nodes --------------------------------- */ for ( kdom = 0 ; kdom < (p3/2) ; kdom++ ) { ksw = ksws[kdom] ; length3 = length3s[kdom] ; for ( jdom = 0 ; jdom < p2 ; jdom++ ) { jsw = jsws[jdom] ; length2 = length2s[jdom] ; for ( idom = 0 ; idom < p1 ; idom++ ) { isw = isws[idom] ; length1 = length1s[idom] ; /* ------- 3 faces ------- */ if ( isw > 0 ) { i = isw - 1 ; for ( j = jsw ; j <= jsw + length2 - 1 ; j++ ) { for ( k = ksw ; k <= ksw + length3 - 1 ; k++ ) { ijk = i + j*n1 + k*n1*n2 ; oldToNew[ijk] = now++ ; } } } if ( jsw > 0 ) { j = jsw - 1 ; for ( i = isw ; i <= isw + length1 - 1 ; i++ ) { for ( k = ksw ; k <= ksw + length3 - 1 ; k++ ) { ijk = i + j*n1 + k*n1*n2 ; oldToNew[ijk] = now++ ; } } } if ( ksw > 0 ) { k = ksw - 1 ; for ( j = jsw ; j <= jsw + length2 - 1 ; j++ ) { for ( i = isw ; i <= isw + length1 - 1 ; i++ ) { ijk = i + j*n1 + k*n1*n2 ; oldToNew[ijk] = now++ ; } } } /* ----------- three edges ----------- */ if ( isw > 0 && jsw > 0 ) { i = isw - 1 ; j = jsw - 1 ; for ( k = ksw ; k <= ksw + length3 - 1 ; k++ ) { ijk = i + j*n1 + k*n1*n2 ; oldToNew[ijk] = now++ ; } } if ( isw > 0 && ksw > 0 ) { i = isw - 1 ; k = ksw - 1 ; for ( j = jsw ; j <= jsw + length2 - 1 ; j++ ) { ijk = i + j*n1 + k*n1*n2 ; oldToNew[ijk] = now++ ; } } if ( jsw > 0 && ksw > 0 ) { j = jsw - 1 ; k = ksw - 1 ; for ( i = isw ; i <= isw + length1 - 1 ; i++ ) { ijk = i + j*n1 + k*n1*n2 ; oldToNew[ijk] = now++ ; } } /* ---------------- one corner point ---------------- */ if ( isw > 0 && jsw > 0 && ksw > 0 ) { i = isw - 1 ; j = jsw - 1 ; k = ksw - 1 ; ijk = i + j*n1 + k*n1*n2 ; oldToNew[ijk] = now++ ; } } } } #if MYDEBUG > 2 fprintf(stdout, "\n after the lower separators filled in") ; fp2DGrid(n1, n2, oldToNew, stdout) ; #endif /* --------------------------------- fill in the upper separator nodes --------------------------------- */ for ( kdom = p3 - 1 ; kdom >= (p3/2) ; kdom-- ) { ksw = ksws[kdom] ; length3 = length3s[kdom] ; for ( jdom = p2 - 1 ; jdom >= 0 ; jdom-- ) { jsw = jsws[jdom] ; length2 = length2s[jdom] ; for ( idom = p1 - 1 ; idom >= 0 ; idom-- ) { isw = isws[idom] ; length1 = length1s[idom] ; /* ------- 3 faces ------- */ if ( isw + length1 < n1 ) { i = isw + length1 ; for ( j = jsw ; j <= jsw + length2 - 1 ; j++ ) { for ( k = ksw ; k <= ksw + length3 - 1 ; k++ ) { ijk = i + j*n1 + k*n1*n2 ; oldToNew[ijk] = now++ ; } } } if ( jsw + length2 < n2 ) { j = jsw + length2 ; for ( i = isw ; i <= isw + length1 - 1 ; i++ ) { for ( k = ksw ; k <= ksw + length3 - 1 ; k++ ) { ijk = i + j*n1 + k*n1*n2 ; oldToNew[ijk] = now++ ; } } } if ( ksw + length3 < n3 ) { k = ksw + length3 ; for ( j = jsw ; j <= jsw + length2 - 1 ; j++ ) { for ( i = isw ; i <= isw + length1 - 1 ; i++ ) { ijk = i + j*n1 + k*n1*n2 ; oldToNew[ijk] = now++ ; } } } /* ----------- three edges ----------- */ if ( isw + length1 < n1 && jsw + length2 < n2 ) { i = isw + length1 ; j = jsw + length2 ; for ( k = ksw ; k <= ksw + length3 - 1 ; k++ ) { ijk = i + j*n1 + k*n1*n2 ; oldToNew[ijk] = now++ ; } } if ( isw + length1 < n1 && ksw + length3 < n3 ) { i = isw + length1 ; k = ksw + length3 ; for ( j = jsw ; j <= jsw + length2 - 1 ; j++ ) { ijk = i + j*n1 + k*n1*n2 ; oldToNew[ijk] = now++ ; } } if ( jsw + length2 < n2 && ksw + length3 < n3 ) { j = jsw + length2 ; k = ksw + length3 ; for ( i = isw ; i <= isw + length1 - 1 ; i++ ) { ijk = i + j*n1 + k*n1*n2 ; oldToNew[ijk] = now++ ; } } /* ---------------- one corner point ---------------- */ if ( isw + length1 < n1 && jsw + length2 < n2 && ksw + length3 < n3 ) { i = isw + length1 ; j = jsw + length2 ; k = ksw + length3 ; ijk = i + j*n1 + k*n1*n2 ; oldToNew[ijk] = now++ ; } } } } #if MYDEBUG > 2 fprintf(stdout, "\n after the upper separators filled in") ; fp2DGrid(n1, n2, oldToNew, stdout) ; #endif /* ------------------------------- fill in the top level separator ------------------------------- */ m1 = p3 / 2 ; for ( kdom = 0, k = 0 ; kdom < m1 ; kdom++ ) { k += length3s[kdom] + 1 ; } k-- ; for ( j = 0 ; j < n2 ; j++ ) { for ( i = 0 ; i < n1 ; i++ ) { ijk = i + j*n1 + k*n1*n2 ; oldToNew[ijk] = now++ ; } } /* ------------------------ free the working storage ------------------------ */ if ( dsizes1 == NULL ) { IVfree(length1s) ; } if ( dsizes2 == NULL ) { IVfree(length2s) ; } if ( dsizes3 == NULL ) { IVfree(length3s) ; } IVfree(isws) ; IVfree(jsws) ; IVfree(ksws) ; IVfree(temp) ; return ; }
/* ------------------------------------------------------------- purpose -- after pivoting for a nonsymmetric factorization, some delayed columns may belong to a process other than its original owner. this method returns an IV object that maps columns to owning processes. created -- 98may22, cca ------------------------------------------------------------- */ IV * FrontMtx_MPI_colmapIV ( FrontMtx *frontmtx, IV *frontOwnersIV, int msglvl, FILE *msgFile, MPI_Comm comm ) { int buffersize, ii, iproc, J, myid, nDJ, neqns, nfront, nproc, ncolJ, nToSend, v ; int *buffer, *counts, *frontOwners, *inbuffer, *outbuffer, *colindJ, *colmap, *vtxToFront ; IV *colmapIV ; /* ------------------------------------------- get the process id and number of processors ------------------------------------------- */ MPI_Comm_rank(comm, &myid) ; MPI_Comm_size(comm, &nproc) ; neqns = frontmtx->neqns ; vtxToFront = ETree_vtxToFront(frontmtx->frontETree) ; IV_sizeAndEntries(frontOwnersIV, &nfront, &frontOwners) ; if ( msglvl > 2 ) { fprintf(msgFile, "\n\n inside FrontMtx_MPI_colmapIV()" "\n myid = %d, nproc = %d, nfront = %d, neqns = %d", myid, nproc, nfront, neqns) ; fflush(msgFile) ; } /* ---------------------------------------------------------- loop through the owned fronts and store each column in an owned front that was originally owned by another processor ---------------------------------------------------------- */ outbuffer = IVinit(neqns, -1) ; for ( J = nToSend = 0 ; J < nfront ; J++ ) { if ( frontOwners[J] == myid && (nDJ = FrontMtx_frontSize(frontmtx, J)) > 0 ) { FrontMtx_columnIndices(frontmtx, J, &ncolJ, &colindJ) ; if ( msglvl > 2 ) { fprintf(msgFile, "\n front %d owned, nDJ = %d, ncolJ = %d", J, nDJ, ncolJ) ; fflush(msgFile) ; } for ( ii = 0 ; ii < nDJ ; ii++ ) { v = colindJ[ii] ; if ( frontOwners[vtxToFront[v]] != myid ) { if ( msglvl > 2 ) { fprintf(msgFile, "\n column %d originally owned by %d", v, frontOwners[vtxToFront[v]]) ; fflush(msgFile) ; } outbuffer[nToSend++] = v ; } } } } IVqsortUp(nToSend, outbuffer) ; if ( msglvl > 2 ) { fprintf(msgFile, "\n shifted vertices") ; IVfprintf(msgFile, nToSend, outbuffer) ; fflush(msgFile) ; } counts = IVinit(nproc, 0) ; /* -------------------------------------------- use an all-gather call to get the number of moved columns that are owned by each process -------------------------------------------- */ MPI_Allgather((void *) &nToSend, 1, MPI_INT, counts, 1, MPI_INT, comm) ; if ( msglvl > 2 ) { fprintf(msgFile, "\n after the all-gather operation, counts") ; IVfprintf(msgFile, nproc, counts) ; fflush(msgFile) ; } buffersize = IVmax(nproc, counts, &iproc) ; inbuffer = IVinit(buffersize, -1) ; /* ----------------------------------- initialize the column map IV object ----------------------------------- */ colmapIV = IV_new() ; IV_init(colmapIV, neqns, NULL) ; colmap = IV_entries(colmapIV) ; IVgather(neqns, colmap, frontOwners, vtxToFront) ; /* -------------------------------------------------------------- loop over the other processes, receive vector of moved columns -------------------------------------------------------------- */ for ( iproc = 0 ; iproc < nproc ; iproc++ ) { if ( counts[iproc] > 0 ) { if ( iproc == myid ) { /* ------------------------------------- send buffer vector to other processes ------------------------------------- */ if ( msglvl > 2 ) { fprintf(msgFile, "\n sending outbuffer to all processes") ; IVfprintf(msgFile, nToSend, outbuffer) ; fflush(msgFile) ; } MPI_Bcast(outbuffer, nToSend, MPI_INT, iproc, comm) ; buffer = outbuffer ; } else { /* ----------------------------------------- receive the vector from the other process ----------------------------------------- */ MPI_Bcast(inbuffer, counts[iproc], MPI_INT, iproc, comm) ; if ( msglvl > 2 ) { fprintf(msgFile, "\n received inbuffer from process %d", iproc) ; IVfprintf(msgFile, counts[iproc], inbuffer) ; fflush(msgFile) ; } buffer = inbuffer ; } /* ------------------------- set the column map values ------------------------- */ for ( ii = 0 ; ii < counts[iproc] ; ii++ ) { v = buffer[ii] ; colmap[v] = iproc ; } } } /* ------------------------ free the working storage ------------------------ */ IVfree(inbuffer) ; IVfree(outbuffer) ; IVfree(counts) ; return(colmapIV) ; }
/* -------------------------------------------------------------------- purpose -- this method is used to determine the support of this matrix for a matrix-vector multiply y[] = A * x[] when A is a symmetric matrix. supIV -- filled with row indices of y[] which will be updated and row indices of x[] which will be used. created -- 98aug01, cca -------------------------------------------------------------------- */ void InpMtx_supportSym ( InpMtx *A, IV *supIV ) { char *mark ; int chev, col, count, ii, loc, maxcol, maxrow, maxv, nent, off, row ; int *ivec1, *ivec2, *sup ; /* --------------- check the input --------------- */ if ( A == NULL || supIV == NULL ) { fprintf(stderr, "\n fatal error in InpMtx_supportSym(%p,%p)" "\n bad input\n", A, supIV) ; exit(-1) ; } if ( !INPMTX_IS_BY_ROWS(A) && !INPMTX_IS_BY_COLUMNS(A) && !INPMTX_IS_BY_CHEVRONS(A) ) { fprintf(stderr, "\n fatal error in InpMtx_supportSym(%p,%p)" "\n coordinate type\n", A, supIV) ; exit(-1) ; } ivec1 = InpMtx_ivec1(A) ; ivec2 = InpMtx_ivec2(A) ; nent = A->nent ; /* ----------------------------------------------------------------- (1) determine the maximum row and column numbers in these entries (2) allocate marking vectors for rows and columns (3) fill marking vectors for rows and columns (4) fill support vectors ----------------------------------------------------------------- */ if ( INPMTX_IS_BY_ROWS(A) ) { maxrow = IVmax(nent, ivec1, &loc) ; maxcol = IVmax(nent, ivec2, &loc) ; maxv = (maxrow >= maxcol) ? maxrow : maxcol ; mark = CVinit(1+maxv, 'O') ; count = 0 ; for ( ii = 0 ; ii < nent ; ii++ ) { row = ivec1[ii] ; col = ivec2[ii] ; if ( mark[row] == 'O' ) { count++ ; } mark[row] = 'X' ; if ( mark[col] == 'O' ) { count++ ; } mark[col] = 'X' ; } } else if ( INPMTX_IS_BY_COLUMNS(A) ) { maxrow = IVmax(nent, ivec2, &loc) ; maxcol = IVmax(nent, ivec1, &loc) ; maxv = (maxrow >= maxcol) ? maxrow : maxcol ; mark = CVinit(1+maxv, 'O') ; count = 0 ; for ( ii = 0 ; ii < nent ; ii++ ) { row = ivec2[ii] ; col = ivec1[ii] ; if ( mark[row] == 'O' ) { count++ ; } mark[row] = 'X' ; if ( mark[col] == 'O' ) { count++ ; } mark[col] = 'X' ; } } else if ( INPMTX_IS_BY_CHEVRONS(A) ) { maxv = -1 ; for ( ii = 0 ; ii < nent ; ii++ ) { chev = ivec1[ii] ; off = ivec2[ii] ; if ( off >= 0 ) { row = chev ; col = chev + off ; if ( maxv < col ) { maxv = col ; } } else { col = chev ; row = chev - off ; if ( maxv < row ) { maxv = row ; } } } mark = CVinit(1+maxv, 'O') ; count = 0 ; for ( ii = 0 ; ii < nent ; ii++ ) { chev = ivec1[ii] ; off = ivec2[ii] ; if ( off >= 0 ) { row = chev ; col = chev + off ; } else { col = chev ; row = chev - off ; } if ( mark[row] == 'O' ) { count++ ; } mark[row] = 'X' ; if ( mark[col] == 'O' ) { count++ ; } mark[col] = 'X' ; } } IV_setSize(supIV, count) ; sup = IV_entries(supIV) ; for ( row = count = 0 ; row <= maxv ; row++ ) { if ( mark[row] == 'X' ) { sup[count++] = row ; } } CVfree(mark) ; return ; }
/* ------------------------------------------------------------ compute an old-to-new ordering for local nested dissection in two dimensions n1 -- number of grid points in first direction n2 -- number of grid points in second direction p1 -- number of domains in first direction p2 -- number of domains in second direction dsizes1 -- domain sizes in first direction, size p1 if NULL, then we construct our own dsizes2 -- domain sizes in second direction, size p2 if NULL, then we construct our own oldToNew -- old-to-new permutation vector note : the following must hold n1 > 0, n2 >0, n1 >= 2*p1 - 1, n2 >= 2*p2 - 1, p2 > 1 sum(dsizes1) = n1 - p1 + 1 and sum(dsizes2) = n2 - p2 + 1 created -- 95nov16, cca ------------------------------------------------------------ */ void localND2D ( int n1, int n2, int p1, int p2, int dsizes1[], int dsizes2[], int oldToNew[] ) { int i, idom, ij, isw, j, jdom, jsw, length1, length2, m, m1, m2, msize, now, nvtx ; int *length1s, *length2s, *isws, *jsws, *temp ; /* --------------- check the input --------------- */ if ( n1 <= 0 || n2 <= 0 || 2*p1 - 1 > n1 || 2*p2 - 1 > n2 || oldToNew == NULL ) { fprintf(stderr, "\n fatal error in localND2D(%d,%d,%d,%d,%p,%p,%p)" "\n bad input\n", n1, n2, p1, p2, dsizes1, dsizes2, oldToNew) ; exit(-1) ; } if ( p2 <= 1 ) { fprintf(stderr, "\n fatal error in localND2D(%d,%d,%d,%d,%p,%p,%p)" "\n p2 = %d, must be > 1", n1, n2, p1, p2, dsizes1, dsizes2, oldToNew, p2) ; exit(-1) ; } if ( dsizes1 != NULL && IVsum(p1, dsizes1) != n1 - p1 + 1 ) { fprintf(stderr, "\n fatal error in localND2D(%d,%d,%d,%d,%p,%p,%p)" "\n IVsum(p1, dsizes1) = %d != %d = n1 - p1 + 1 ", n1, n2, p1, p2, dsizes1, dsizes2, oldToNew, IVsum(p1, dsizes1), n1 - p1 + 1) ; return ; } if ( dsizes1 != NULL && IVmin(p1, dsizes1, &i) <= 0 ) { fprintf(stderr, "\n fatal error in localND2D(%d,%d,%d,%d,%p,%p,%p)" "\n IVmin(p1, dsizes1) = %d must be > 0", n1, n2, p1, p2, dsizes1, dsizes2, oldToNew, IVmin(p1, dsizes1, &i)) ; return ; } if ( dsizes2 != NULL && IVsum(p2, dsizes2) != n2 - p2 + 1 ) { fprintf(stderr, "\n fatal error in localND2D(%d,%d,%d,%d,%p,%p,%p)" "\n IVsum(p2, dsizes2) = %d != %d = n2 - p2 + 1 ", n1, n2, p1, p2, dsizes1, dsizes2, oldToNew, IVsum(p2, dsizes2), n2 - p2 + 1) ; return ; } if ( dsizes2 != NULL && IVmin(p2, dsizes2, &i) <= 0 ) { fprintf(stderr, "\n fatal error in localND2D(%d,%d,%d,%d,%p,%p,%p)" "\n IVmin(p2, dsizes2) = %d must be > 0", n1, n2, p1, p2, dsizes1, dsizes2, oldToNew, IVmin(p2, dsizes2, &i)) ; return ; } nvtx = n1*n2 ; /* ---------------------------------- construct the domain sizes vectors ---------------------------------- */ if ( dsizes1 == NULL ) { length1s = IVinit(p1, 0) ; length1 = (n1 - p1 + 1) / p1 ; m1 = (n1 - p1 + 1) % p1 ; for ( i = 0 ; i < m1 ; i++ ) { length1s[i] = length1 + 1 ; } for ( ; i < p1 ; i++ ) { length1s[i] = length1 ; } } else { length1s = dsizes1 ; } if ( dsizes2 == NULL ) { length2s = IVinit(p2, 0) ; length2 = (n2 - p2 + 1) / p2 ; m2 = (n2 - p2 + 1) % p2 ; for ( i = 0 ; i < m2 ; i++ ) { length2s[i] = length2 + 1 ; } for ( ; i < p2 ; i++ ) { length2s[i] = length2 ; } } else { length2s = dsizes2 ; } #if MYDEBUG > 0 fprintf(stdout, "\n inside localND2D") ; fprintf(stdout, "\n n1 = %d, n2 = %d, p1 = %d, p2 = %d", n1, n2, p1, p2) ; fprintf(stdout, "\n length1s[%d] = ", p1) ; IVfp80(stdout, p1, length1s, 12) ; fprintf(stdout, "\n length2s[%d] = ", p2) ; IVfp80(stdout, p2, length2s, 12) ; #endif /* --------------------------------------- determine the first and last domain ids and the array of southwest points --------------------------------------- */ isws = IVinit(p1, -1) ; for ( idom = 0, isw = 0 ; idom < p1 ; idom++ ) { isws[idom] = isw ; isw += length1s[idom] + 1 ; } jsws = IVinit(p2, -1) ; for ( jdom = 0, jsw = 0 ; jdom < p2 ; jdom++ ) { jsws[jdom] = jsw ; jsw += length2s[jdom] + 1 ; } #if MYDEBUG > 1 fprintf(stdout, "\n isws[%d] = ", p1) ; IVfp80(stdout, p1, isws, 12) ; fprintf(stdout, "\n jsws[%d] = ", p2) ; IVfp80(stdout, p2, jsws, 12) ; #endif /* ---------------------------------------------------------------- create a temporary permutation vector for the domains' orderings ---------------------------------------------------------------- */ msize = IVmax(p1, length1s, &i) * IVmax(p2, length2s, &i) ; temp = IVinit(msize, -1) ; /* ------------------------ fill in the domain nodes ------------------------ */ now = 0 ; for ( jdom = 0; jdom < p2 ; jdom++ ) { jsw = jsws[jdom] ; length2 = length2s[jdom] ; for ( idom = 0 ; idom < p1 ; idom++ ) { length1 = length1s[idom] ; isw = isws[idom] ; mkNDperm(length1, length2, 1, temp, 0, length1-1, 0, length2-1, 0, 0) ; for ( m = 0 ; m < length1*length2 ; m++ ) { ij = temp[m] ; i = isw + (ij % length1) ; j = jsw + (ij / length1) ; ij = i + j*n1 ; oldToNew[ij] = now++ ; } } } #if MYDEBUG > 2 fprintf(stdout, "\n old-to-new after domains are numbered") ; fp2DGrid(n1, n2, oldToNew, stdout) ; #endif /* --------------------------------- fill in the lower separator nodes --------------------------------- */ for ( jdom = 0 ; jdom < (p2/2) ; jdom++ ) { jsw = jsws[jdom] ; length2 = length2s[jdom] ; for ( idom = 0 ; idom < p1 ; idom++ ) { isw = isws[idom] ; length1 = length1s[idom] ; if ( isw > 0 ) { i = isw - 1 ; for ( j = jsw ; j <= jsw + length2 - 1 ; j++ ) { ij = i + j*n1 ; oldToNew[ij] = now++ ; } } if ( isw > 0 && jsw > 0 ) { i = isw - 1 ; j = jsw - 1 ; ij = i + j*n1 ; oldToNew[ij] = now++ ; } if ( jsw > 0 ) { j = jsw - 1 ; for ( i = isw ; i <= isw + length1 - 1 ; i++ ) { ij = i + j*n1 ; oldToNew[ij] = now++ ; } } } } #if MYDEBUG > 2 fprintf(stdout, "\n after the lower separators filled in") ; fp2DGrid(n1, n2, oldToNew, stdout) ; #endif /* --------------------------------- fill in the upper separator nodes --------------------------------- */ for ( jdom = p2 - 1 ; jdom >= (p2/2) ; jdom-- ) { jsw = jsws[jdom] ; length2 = length2s[jdom] ; for ( idom = p1 - 1 ; idom >= 0 ; idom-- ) { isw = isws[idom] ; length1 = length1s[idom] ; if ( isw + length1 < n1 ) { i = isw + length1 ; for ( j = jsw ; j <= jsw + length2 - 1 ; j++ ) { ij = i + j*n1 ; oldToNew[ij] = now++ ; } } if ( isw + length1 < n1 && jsw + length2 < n2 ) { i = isw + length1 ; j = jsw + length2 ; ij = i + j*n1 ; oldToNew[ij] = now++ ; } if ( jsw + length2 < n2 ) { j = jsw + length2 ; for ( i = isw ; i <= isw + length1 - 1 ; i++ ) { ij = i + j*n1 ; oldToNew[ij] = now++ ; } } } } #if MYDEBUG > 2 fprintf(stdout, "\n after the upper separators filled in") ; fp2DGrid(n1, n2, oldToNew, stdout) ; #endif /* ------------------------------- fill in the top level separator ------------------------------- */ m1 = p2 / 2 ; for ( jdom = 0, j = 0 ; jdom < m1 ; jdom++ ) { j += length2s[jdom] + 1 ; } j-- ; for ( i = 0 ; i < n1 ; i++ ) { ij = i + j*n1 ; oldToNew[ij] = now++ ; } /* ------------------------ free the working storage ------------------------ */ if ( dsizes1 == NULL ) { IVfree(length1s) ; } if ( dsizes2 == NULL ) { IVfree(length2s) ; } IVfree(isws) ; IVfree(jsws) ; IVfree(temp) ; return ; }
/* ----------------------------------------------------------------- purpose -- the IV objects objIV and ownersIV are found on each process. the ownersIV object is identical over all the processes, and owners[ii] tells which processes owns location ii of the obj[] vector. on return from this entry, the obj[] vector is replicated over all the processes. each process sends the (ii,obj[ii]) pairs that it owns to all the other processes. created -- 98apr02, cca ----------------------------------------------------------------- */ void IV_MPI_allgather ( IV *objIV, IV *ownersIV, int stats[], int msglvl, FILE *msgFile, int firsttag, MPI_Comm comm ) { int count, destination, ii, incount, iproc, jj, lasttag, left, maxcount, myid, nowners, nproc, nvec, offset, outcount, right, source, tag, tagbound, value ; int *counts, *inbuffer, *outbuffer, *owners, *vec ; MPI_Status status ; /* --------------- check the input --------------- */ if ( objIV == NULL || ownersIV == NULL ) { fprintf(stderr, "\n fatal error in IV_MPI_allgather()" "\n objIV = %p, ownersIV = %p\n", objIV, ownersIV) ; spoolesFatal(); } /* ---------------------------------------------- get id of self, # of processes and # of fronts ---------------------------------------------- */ MPI_Comm_rank(comm, &myid) ; MPI_Comm_size(comm, &nproc) ; IV_sizeAndEntries(objIV, &nvec, &vec) ; IV_sizeAndEntries(ownersIV, &nowners, &owners) ; if ( msglvl > 1 ) { fprintf(msgFile, "\n\n inside IV_MPI_allgather" "\n nproc = %d, myid = %d, nvec = %d, nowners = %d", nproc, myid, nvec, nowners) ; fflush(msgFile) ; } if ( nvec != nowners || vec == NULL || owners == NULL ) { fprintf(stderr, "\n fatal error in IV_MPI_allgather()" "\n nvec = %d, nowners = %d, vec = %p, owners = %p\n", nvec, nowners, vec, owners) ; spoolesFatal(); } /* ------------------- check the tag range ------------------- */ lasttag = firsttag + nproc ; tagbound = maxTagMPI(comm) ; if ( firsttag < 0 || lasttag > tagbound ) { fprintf(stderr, "\n fatal error in IV_MPI_allgather()" "\n firsttag = %d, lasttag = %d, tagbound = %d\n", firsttag, lasttag, tagbound) ; spoolesFatal(); } if ( msglvl > 2 ) { fprintf(msgFile, "\n\n objIV") ; IV_writeForHumanEye(objIV, msgFile) ; fprintf(msgFile, "\n\n ownersIV") ; IV_writeForHumanEye(ownersIV, msgFile) ; fflush(msgFile) ; } /* ------------------------------------------------------------- step 1 : determine the number of entries owned by each vector ------------------------------------------------------------- */ counts = IVinit(nproc, 0) ; for ( ii = 0 ; ii < nvec ; ii++ ) { if ( owners[ii] < 0 || owners[ii] >= nproc ) { fprintf(stderr, "\n owners[%d] = %d", ii, owners[ii]) ; spoolesFatal(); } counts[owners[ii]]++ ; } if ( msglvl > 2 ) { fprintf(msgFile, "\n counts") ; IVfprintf(msgFile, nproc, counts) ; fflush(msgFile) ; } /* ----------------------------- set up the in and out buffers ----------------------------- */ if ( counts[myid] > 0 ) { outbuffer = IVinit(2*counts[myid], -1) ; for ( ii = jj = 0 ; ii < nvec ; ii++ ) { if ( owners[ii] == myid ) { outbuffer[jj++] = ii ; outbuffer[jj++] = vec[ii] ; } } if ( jj != 2*counts[myid] ) { fprintf(msgFile, "\n jj = %d, 2*counts[%d] = %d", jj, myid, 2*counts[myid]) ; fprintf(stderr, "\n jj = %d, 2*counts[%d] = %d", jj, myid, 2*counts[myid]) ; spoolesFatal(); } } else { outbuffer = NULL ; } maxcount = IVmax(nproc, counts, &iproc) ; if ( maxcount > 0 ) { inbuffer = IVinit(2*maxcount, -1) ; } else { inbuffer = NULL ; } if ( msglvl > 2 ) { fprintf(msgFile, "\n outbuffer %p, maxcount %d, inbuffer %p", outbuffer, maxcount, inbuffer) ; fflush(msgFile) ; } /* ------------------------------------- step 2: loop over the other processes send and receive information ------------------------------------- */ outcount = 2*counts[myid] ; for ( offset = 1, tag = firsttag ; offset < nproc ; offset++, tag++ ) { right = (myid + offset) % nproc ; if ( offset <= myid ) { left = myid - offset ; } else { left = nproc + myid - offset ; } if ( outcount > 0 ) { destination = right ; stats[0]++ ; stats[2] += outcount*sizeof(int) ; } else { destination = MPI_PROC_NULL ; } incount = 2*counts[left] ; if ( incount > 0 ) { source = left ; stats[1]++ ; stats[3] += incount*sizeof(int) ; } else { source = MPI_PROC_NULL ; } if ( msglvl > 1 ) { fprintf(msgFile, "\n offset %d, source %d, destination %d", offset, source, destination) ; fflush(msgFile) ; } /* ----------------- do a send/receive ----------------- */ MPI_Sendrecv(outbuffer, outcount, MPI_INT, destination, tag, inbuffer, incount, MPI_INT, source, tag, comm, &status) ; if ( source != MPI_PROC_NULL ) { MPI_Get_count(&status, MPI_INT, &count) ; if ( count != incount ) { fprintf(stderr, "\n 1. fatal error in IV_MPI_allgather()" "\n proc %d : source = %d, count = %d, incount = %d\n", myid, source, count, incount) ; spoolesFatal(); } } /* ---------------------------- set the values in the vector ---------------------------- */ for ( jj = 0 ; jj < incount ; jj += 2 ) { ii = inbuffer[jj] ; value = inbuffer[jj+1] ; vec[ii] = value ; } if ( jj != incount ) { fprintf(msgFile, "\n jj = %d, incount = %d", jj, incount) ; fprintf(stderr, "\n jj = %d, incount = %d", jj, incount) ; spoolesFatal(); } if ( msglvl > 2 ) { fprintf(msgFile, "\n after setting values") ; IVfprintf(msgFile, nvec, vec) ; fflush(msgFile) ; } } /* ------------------------ free the working storage ------------------------ */ IVfree(counts) ; if ( outbuffer != NULL ) { IVfree(outbuffer) ; } if ( inbuffer != NULL ) { IVfree(inbuffer) ; } if ( msglvl > 2 ) { fprintf(msgFile, "\n\n leaving IV_MPI_gatherall()") ; fflush(msgFile) ; } return ; }
/* -------------------------------------------------------------------- purpose -- this method is used to determine the support of this matrix for a matrix-vector multiply y[] = A * x[] when A is a nonsymmetric matrix. rowsupIV -- filled with row indices of y[] which will be updated. colsupIV -- filled with row indices of x[] which will be used. created -- 98aug01, cca -------------------------------------------------------------------- */ void InpMtx_supportNonsymT ( InpMtx *A, IV *rowsupIV, IV *colsupIV ) { char *colmark, *rowmark ; int chev, col, colcount, ii, loc, maxcol, maxrow, nent, off, row, rowcount ; int *colsup, *ivec1, *ivec2, *rowsup ; /* --------------- check the input --------------- */ if ( A == NULL || rowsupIV == NULL || colsupIV == NULL ) { fprintf(stderr, "\n fatal error in InpMtx_supportNonsymT(%p,%p,%p)" "\n bad input\n", A, rowsupIV, colsupIV) ; spoolesFatal(); } if ( !INPMTX_IS_BY_ROWS(A) && !INPMTX_IS_BY_COLUMNS(A) && !INPMTX_IS_BY_CHEVRONS(A) ) { fprintf(stderr, "\n fatal error in InpMtx_supportNonsymT(%p,%p,%p)" "\n coordinate type\n", A, rowsupIV, colsupIV) ; spoolesFatal(); } ivec1 = InpMtx_ivec1(A) ; ivec2 = InpMtx_ivec2(A) ; nent = A->nent ; /* ----------------------------------------------------------------- (1) determine the maximum row and column numbers in these entries (2) allocate marking vectors for rows and columns (3) fill marking vectors for rows and columns (4) fill support vectors ----------------------------------------------------------------- */ if ( INPMTX_IS_BY_ROWS(A) ) { maxrow = IVmax(nent, ivec1, &loc) ; maxcol = IVmax(nent, ivec2, &loc) ; rowmark = CVinit(1+maxcol, 'O') ; colmark = CVinit(1+maxrow, 'O') ; rowcount = colcount = 0 ; for ( ii = 0 ; ii < nent ; ii++ ) { row = ivec1[ii] ; col = ivec2[ii] ; if ( colmark[row] == 'O' ) { colcount++ ; } colmark[row] = 'X' ; if ( rowmark[col] == 'O' ) { rowcount++ ; } rowmark[col] = 'X' ; } } else if ( INPMTX_IS_BY_COLUMNS(A) ) { maxrow = IVmax(nent, ivec2, &loc) ; maxcol = IVmax(nent, ivec1, &loc) ; rowmark = CVinit(1+maxcol, 'O') ; colmark = CVinit(1+maxrow, 'O') ; rowcount = colcount = 0 ; for ( ii = 0 ; ii < nent ; ii++ ) { row = ivec2[ii] ; col = ivec1[ii] ; if ( colmark[row] == 'O' ) { colcount++ ; } colmark[row] = 'X' ; if ( rowmark[col] == 'O' ) { rowcount++ ; } rowmark[col] = 'X' ; } } else if ( INPMTX_IS_BY_CHEVRONS(A) ) { maxrow = maxcol = -1 ; for ( ii = 0 ; ii < nent ; ii++ ) { chev = ivec1[ii] ; off = ivec2[ii] ; if ( off >= 0 ) { row = chev ; col = chev + off ; } else { col = chev ; row = chev - off ; } if ( maxrow < row ) { maxrow = row ; } if ( maxcol < col ) { maxcol = col ; } } rowmark = CVinit(1+maxcol, 'O') ; colmark = CVinit(1+maxrow, 'O') ; rowcount = colcount = 0 ; for ( ii = 0 ; ii < nent ; ii++ ) { chev = ivec1[ii] ; off = ivec2[ii] ; if ( off >= 0 ) { row = chev ; col = chev + off ; } else { col = chev ; row = chev - off ; } if ( colmark[row] == 'O' ) { colcount++ ; } colmark[row] = 'X' ; if ( rowmark[col] == 'O' ) { rowcount++ ; } rowmark[col] = 'X' ; } } IV_setSize(rowsupIV, rowcount) ; rowsup = IV_entries(rowsupIV) ; for ( col = rowcount = 0 ; col <= maxcol ; col++ ) { if ( rowmark[col] == 'X' ) { rowsup[rowcount++] = col ; } } IV_setSize(colsupIV, colcount) ; colsup = IV_entries(colsupIV) ; for ( row = colcount = 0 ; row <= maxrow ; row++ ) { if ( colmark[row] == 'X' ) { colsup[colcount++] = row ; } } CVfree(colmark) ; CVfree(rowmark) ; return ; }
/* -------------------------------------------------------------------- fill *pndom with ndom, the number of domains. fill *pnseg with nseg, the number of segments. domains are numbered in [0, ndom), segments in [ndom,ndom+nseg). return -- an IV object that contains the map from vertices to segments created -- 99feb29, cca -------------------------------------------------------------------- */ IV * GPart_domSegMap ( GPart *gpart, int *pndom, int *pnseg ) { FILE *msgFile ; Graph *g ; int adjdom, count, d, first, ierr, ii, jj1, jj2, last, ndom, msglvl, nextphi, nPhi, nPsi, nV, phi, phi0, phi1, phi2, phi3, psi, sigma, size, size0, size1, size2, v, vsize, w ; int *adj, *adj0, *adj1, *adj2, *compids, *dmark, *dsmap, *head, *link, *list, *offsets, *PhiToPsi, *PhiToV, *PsiToSigma, *vadj, *VtoPhi ; IV *dsmapIV ; IVL *PhiByPhi, *PhiByPowD, *PsiByPowD ; /* -------------------- set the initial time -------------------- */ icputimes = 0 ; MARKTIME(cputimes[icputimes]) ; /* --------------- check the input --------------- */ if ( gpart == NULL || (g = gpart->g) == NULL || pndom == NULL || pnseg == NULL ) { fprintf(stderr, "\n fatal error in GPart_domSegMap(%p,%p,%p)" "\n bad input\n", gpart, pndom, pnseg) ; exit(-1) ; } compids = IV_entries(&gpart->compidsIV) ; msglvl = gpart->msglvl ; msgFile = gpart->msgFile ; /* ------------------------ create the map IV object ------------------------ */ nV = g->nvtx ; dsmapIV = IV_new() ; IV_init(dsmapIV, nV, NULL) ; dsmap = IV_entries(dsmapIV) ; /* ---------------------------------- check compids[] and get the number of domains and interface vertices ---------------------------------- */ icputimes++ ; MARKTIME(cputimes[icputimes]) ; ndom = nPhi = 0 ; for ( v = 0 ; v < nV ; v++ ) { if ( (d = compids[v]) < 0 ) { fprintf(stderr, "\n fatal error in GPart_domSegMap(%p,%p,%p)" "\n compids[%d] = %d\n", gpart, pndom, pnseg, v, compids[v]) ; exit(-1) ; } else if ( d == 0 ) { nPhi++ ; } else if ( ndom < d ) { ndom = d ; } } *pndom = ndom ; if ( msglvl > 1 ) { fprintf(msgFile, "\n\n Inside GPart_domSegMap") ; fprintf(msgFile, "\n %d domains, %d Phi vertices", ndom, nPhi) ; } if ( ndom == 1 ) { IVfill(nV, dsmap, 0) ; *pndom = 1 ; *pnseg = 0 ; return(dsmapIV) ; } /* -------------------------------- get the maps PhiToV : [0,nPhi) |---> [0,nV) VtoPhi : [0,nV) |---> [0,nPhi) -------------------------------- */ icputimes++ ; MARKTIME(cputimes[icputimes]) ; PhiToV = IVinit(nPhi, -1) ; VtoPhi = IVinit(nV, -1) ; for ( v = 0, phi = 0 ; v < nV ; v++ ) { if ( (d = compids[v]) == 0 ) { PhiToV[phi] = v ; VtoPhi[v] = phi++ ; } } if ( phi != nPhi ) { fprintf(stderr, "\n fatal error in GPart_domSegMap(%p,%p,%p)" "\n phi = %d != %d = nPhi\n", gpart, pndom, pnseg, phi, nPhi) ; exit(-1) ; } if ( msglvl > 2 ) { fprintf(msgFile, "\n PhiToV(%d) :", nPhi) ; IVfp80(msgFile, nPhi, PhiToV, 15, &ierr) ; fflush(msgFile) ; } if ( msglvl > 3 ) { fprintf(msgFile, "\n VtoPhi(%d) :", nV) ; IVfp80(msgFile, nV, VtoPhi, 15, &ierr) ; fflush(msgFile) ; } /* --------------------------------------------------- create an IVL object, PhiByPowD, to hold lists from the interface vertices to their adjacent domains --------------------------------------------------- */ icputimes++ ; MARKTIME(cputimes[icputimes]) ; dmark = IVinit(ndom+1, -1) ; if ( nPhi >= ndom ) { list = IVinit(nPhi, -1) ; } else { list = IVinit(ndom, -1) ; } PhiByPowD = IVL_new() ; IVL_init1(PhiByPowD, IVL_CHUNKED, nPhi) ; for ( phi = 0 ; phi < nPhi ; phi++ ) { v = PhiToV[phi] ; Graph_adjAndSize(g, v, &vsize, &vadj) ; /* if ( phi == 0 ) { int ierr ; fprintf(msgFile, "\n adj(%d,%d) = ", v, phi) ; IVfp80(msgFile, vsize, vadj, 15, &ierr) ; fflush(msgFile) ; } */ count = 0 ; for ( ii = 0 ; ii < vsize ; ii++ ) { if ( (w = vadj[ii]) < nV && (d = compids[w]) > 0 && dmark[d] != phi ) { dmark[d] = phi ; list[count++] = d ; } } if ( count > 0 ) { IVqsortUp(count, list) ; IVL_setList(PhiByPowD, phi, count, list) ; } } if ( msglvl > 2 ) { fprintf(msgFile, "\n PhiByPowD : interface x adjacent domains") ; IVL_writeForHumanEye(PhiByPowD, msgFile) ; fflush(msgFile) ; } /* ------------------------------------------------------- create an IVL object, PhiByPhi to hold lists from the interface vertices to interface vertices. (s,t) are in the list if (s,t) is an edge in the graph and s and t do not share an adjacent domain ------------------------------------------------------- */ icputimes++ ; MARKTIME(cputimes[icputimes]) ; PhiByPhi = IVL_new() ; IVL_init1(PhiByPhi, IVL_CHUNKED, nPhi) ; offsets = IVinit(nPhi, 0) ; head = IVinit(nPhi, -1) ; link = IVinit(nPhi, -1) ; for ( phi1 = 0 ; phi1 < nPhi ; phi1++ ) { count = 0 ; if ( msglvl > 2 ) { v = PhiToV[phi1] ; Graph_adjAndSize(g, v, &vsize, &vadj) ; fprintf(msgFile, "\n checking out phi = %d, v = %d", phi1, v) ; fprintf(msgFile, "\n adj(%d) : ", v) ; IVfp80(msgFile, vsize, vadj, 10, &ierr) ; } /* ------------------------------------------------------------- get (phi1, phi0) edges that were previously put into the list ------------------------------------------------------------- */ if ( msglvl > 3 ) { if ( head[phi1] == -1 ) { fprintf(msgFile, "\n no previous edges") ; } else { fprintf(msgFile, "\n previous edges :") ; } } for ( phi0 = head[phi1] ; phi0 != -1 ; phi0 = nextphi ) { if ( msglvl > 3 ) { fprintf(msgFile, " %d", phi0) ; } nextphi = link[phi0] ; list[count++] = phi0 ; IVL_listAndSize(PhiByPhi, phi0, &size0, &adj0) ; if ( (ii = ++offsets[phi0]) < size0 ) { /* ---------------------------- link phi0 into the next list ---------------------------- */ phi2 = adj0[ii] ; link[phi0] = head[phi2] ; head[phi2] = phi0 ; } } /* -------------------------- get new edges (phi1, phi2) -------------------------- */ IVL_listAndSize(PhiByPowD, phi1, &size1, &adj1) ; v = PhiToV[phi1] ; Graph_adjAndSize(g, v, &vsize, &vadj) ; for ( ii = 0 ; ii < vsize ; ii++ ) { if ( (w = vadj[ii]) < nV && compids[w] == 0 && (phi2 = VtoPhi[w]) > phi1 ) { if ( msglvl > 3 ) { fprintf(msgFile, "\n checking out phi2 = %d", phi2) ; } /* -------------------------------------------------- see if phi1 and phi2 have a common adjacent domain -------------------------------------------------- */ IVL_listAndSize(PhiByPowD, phi2, &size2, &adj2) ; adjdom = 0 ; jj1 = jj2 = 0 ; while ( jj1 < size1 && jj2 < size2 ) { if ( adj1[jj1] < adj2[jj2] ) { jj1++ ; } else if ( adj1[jj1] > adj2[jj2] ) { jj2++ ; } else { if ( msglvl > 3 ) { fprintf(msgFile, ", common adj domain %d", adj1[jj1]) ; } adjdom = 1 ; break ; } } if ( adjdom == 0 ) { if ( msglvl > 3 ) { fprintf(msgFile, ", no adjacent domain") ; } list[count++] = phi2 ; } } } if ( count > 0 ) { /* --------------------- set the list for phi1 --------------------- */ IVqsortUp(count, list) ; IVL_setList(PhiByPhi, phi1, count, list) ; if ( msglvl > 2 ) { fprintf(msgFile, "\n edge list for %d :", phi1) ; IVfp80(msgFile, count, list, 15, &ierr) ; } for ( ii = 0, phi2 = -1 ; ii < count ; ii++ ) { if ( list[ii] > phi1 ) { offsets[phi1] = ii ; phi2 = list[ii] ; break ; } } if ( phi2 != -1 ) { if ( msglvl > 2 ) { fprintf(msgFile, "\n linking %d into list for %d", phi1, phi2) ; } link[phi1] = head[phi2] ; head[phi2] = phi1 ; } /* phi2 = list[0] ; link[phi1] = head[phi2] ; head[phi2] = phi1 ; */ } } if ( msglvl > 2 ) { fprintf(msgFile, "\n PhiByPhi : interface x interface") ; IVL_writeForHumanEye(PhiByPhi, msgFile) ; fflush(msgFile) ; } /* -------------------- get the PhiToPsi map -------------------- */ icputimes++ ; MARKTIME(cputimes[icputimes]) ; PhiToPsi = IVinit(nPhi, -1) ; nPsi = 0 ; for ( phi = 0 ; phi < nPhi ; phi++ ) { if ( PhiToPsi[phi] == -1 ) { /* --------------------------- phi not yet mapped to a psi --------------------------- */ first = last = 0 ; list[0] = phi ; PhiToPsi[phi] = nPsi ; while ( first <= last ) { phi2 = list[first++] ; IVL_listAndSize(PhiByPhi, phi2, &size, &adj) ; for ( ii = 0 ; ii < size ; ii++ ) { phi3 = adj[ii] ; if ( PhiToPsi[phi3] == -1 ) { PhiToPsi[phi3] = nPsi ; list[++last] = phi3 ; } } } nPsi++ ; } } if ( msglvl > 1 ) { fprintf(msgFile, "\n nPsi = %d", nPsi) ; fflush(msgFile) ; } if ( msglvl > 2 ) { fprintf(msgFile, "\n PhiToPsi(%d) :", nPhi) ; IVfp80(msgFile, nPhi, PhiToPsi, 15, &ierr) ; fflush(msgFile) ; } /* --------------------------------- create an IVL object, Psi --> 2^D --------------------------------- */ icputimes++ ; MARKTIME(cputimes[icputimes]) ; IVfill(nPsi, head, -1) ; IVfill(nPhi, link, -1) ; for ( phi = 0 ; phi < nPhi ; phi++ ) { psi = PhiToPsi[phi] ; link[phi] = head[psi] ; head[psi] = phi ; } PsiByPowD = IVL_new() ; IVL_init1(PsiByPowD, IVL_CHUNKED, nPsi) ; IVfill(ndom+1, dmark, -1) ; for ( psi = 0 ; psi < nPsi ; psi++ ) { count = 0 ; for ( phi = head[psi] ; phi != -1 ; phi = link[phi] ) { v = PhiToV[phi] ; Graph_adjAndSize(g, v, &size, &adj) ; for ( ii = 0 ; ii < size ; ii++ ) { if ( (w = adj[ii]) < nV && (d = compids[w]) > 0 && dmark[d] != psi ) { dmark[d] = psi ; list[count++] = d ; } } } if ( count > 0 ) { IVqsortUp(count, list) ; IVL_setList(PsiByPowD, psi, count, list) ; } } if ( msglvl > 2 ) { fprintf(msgFile, "\n PsiByPowD(%d) :", nPhi) ; IVL_writeForHumanEye(PsiByPowD, msgFile) ; fflush(msgFile) ; } icputimes++ ; MARKTIME(cputimes[icputimes]) ; /* ------------------------------------- now get the map Psi |---> Sigma that is the equivalence map over PhiByPowD ------------------------------------- */ icputimes++ ; MARKTIME(cputimes[icputimes]) ; PsiToSigma = IVL_equivMap1(PsiByPowD) ; *pnseg = 1 + IVmax(nPsi, PsiToSigma, &ii) ; if ( msglvl > 2 ) { fprintf(msgFile, "\n nSigma = %d", *pnseg) ; fprintf(msgFile, "\n PsiToSigma(%d) :", nPhi) ; IVfp80(msgFile, nPsi, PsiToSigma, 15, &ierr) ; fflush(msgFile) ; } /* -------------------------------------------------------------- now fill the map from the vertices to the domains and segments -------------------------------------------------------------- */ icputimes++ ; MARKTIME(cputimes[icputimes]) ; for ( v = 0 ; v < nV ; v++ ) { if ( (d = compids[v]) > 0 ) { dsmap[v] = d - 1 ; } else { phi = VtoPhi[v] ; psi = PhiToPsi[phi] ; sigma = PsiToSigma[psi] ; dsmap[v] = ndom + sigma ; } } /* ------------------------ free the working storage ------------------------ */ icputimes++ ; MARKTIME(cputimes[icputimes]) ; IVL_free(PhiByPhi) ; IVL_free(PhiByPowD) ; IVL_free(PsiByPowD) ; IVfree(PhiToV) ; IVfree(VtoPhi) ; IVfree(dmark) ; IVfree(list) ; IVfree(PhiToPsi) ; IVfree(head) ; IVfree(link) ; IVfree(offsets) ; IVfree(PsiToSigma) ; icputimes++ ; MARKTIME(cputimes[icputimes]) ; if ( msglvl > 1 ) { fprintf(msgFile, "\n domain/segment map timings split") ; fprintf(msgFile, "\n %9.5f : create the DSmap object" "\n %9.5f : get numbers of domain and interface vertices" "\n %9.5f : generate PhiToV and VtoPhi" "\n %9.5f : generate PhiByPowD" "\n %9.5f : generate PhiByPhi" "\n %9.5f : generate PhiToPsi" "\n %9.5f : generate PsiByPowD" "\n %9.5f : generate PsiToSigma" "\n %9.5f : generate dsmap" "\n %9.5f : free working storage" "\n %9.5f : total time", cputimes[1] - cputimes[0], cputimes[2] - cputimes[1], cputimes[3] - cputimes[2], cputimes[4] - cputimes[3], cputimes[5] - cputimes[4], cputimes[6] - cputimes[5], cputimes[7] - cputimes[6], cputimes[8] - cputimes[7], cputimes[9] - cputimes[8], cputimes[10] - cputimes[9], cputimes[11] - cputimes[0]) ; } return(dsmapIV) ; }
/* ---------------------------------- return an IVL object that contains the adjacency structure of A^TA. created -- 98jan28, cca ---------------------------------- */ IVL * InpMtx_adjForATA ( InpMtx *inpmtxA ) { InpMtx *inpmtxATA ; int firstcol, firstrow, irow, jvtx, lastcol, lastrow, loc, ncol, nent, nrow, size ; int *ind, *ivec1, *ivec2 ; IVL *adjIVL ; /* --------------- check the input --------------- */ if ( inpmtxA == NULL ) { fprintf(stderr, "\n fatal error in InpMtx_adjForATA(%p)" "\n NULL input\n", inpmtxA) ; exit(-1) ; } /* ---------------------------------------------------------- change the coordinate type and storage mode to row vectors ---------------------------------------------------------- */ InpMtx_changeCoordType(inpmtxA, INPMTX_BY_ROWS) ; InpMtx_changeStorageMode(inpmtxA, INPMTX_BY_VECTORS) ; nent = InpMtx_nent(inpmtxA) ; ivec1 = InpMtx_ivec1(inpmtxA) ; ivec2 = InpMtx_ivec2(inpmtxA) ; firstrow = IVmin(nent, ivec1, &loc) ; lastrow = IVmax(nent, ivec1, &loc) ; firstcol = IVmin(nent, ivec2, &loc) ; lastcol = IVmax(nent, ivec2, &loc) ; if ( firstrow < 0 || firstcol < 0 ) { fprintf(stderr, "\n fatal error" "\n firstrow = %d, firstcol = %d" "\n lastrow = %d, lastcol = %d", firstrow, firstcol, lastrow, lastcol) ; exit(-1) ; } nrow = 1 + lastrow ; ncol = 1 + lastcol ; /* ----------------------------------------------------------- create the new InpMtx object to hold the structure of A^TA ----------------------------------------------------------- */ inpmtxATA = InpMtx_new() ; InpMtx_init(inpmtxATA, INPMTX_BY_ROWS, INPMTX_INDICES_ONLY, 0, 0) ; for ( irow = 0 ; irow < nrow ; irow++ ) { InpMtx_vector(inpmtxA, irow, &size, &ind) ; InpMtx_inputMatrix(inpmtxATA, size, size, 1, size, ind, ind) ; } for ( jvtx = 0 ; jvtx < nrow ; jvtx++ ) { InpMtx_inputEntry(inpmtxATA, jvtx, jvtx) ; } InpMtx_changeStorageMode(inpmtxATA, INPMTX_BY_VECTORS) ; /* ------------------- fill the IVL object ------------------- */ adjIVL = IVL_new() ; IVL_init1(adjIVL, IVL_CHUNKED, nrow) ; for ( jvtx = 0 ; jvtx < ncol ; jvtx++ ) { InpMtx_vector(inpmtxATA, jvtx, &size, &ind) ; IVL_setList(adjIVL, jvtx, size, ind) ; } /* ------------------------------ free the working InpMtx object ------------------------------ */ InpMtx_free(inpmtxATA) ; return(adjIVL) ; }
/*--------------------------------------------------------------------*/ int main ( int argc, char *argv[] ) /* ----------------------------------------------------- test the factor method for a grid matrix (0) read in matrix from source file (1) conver data matrix to InpMtx object if necessary (2) create Graph and ETree object if necessary (3) read in/create an ETree object (4) create a solution matrix object (5) multiply the solution with the matrix to get a right hand side matrix object (6) factor the matrix (7) solve the system created -- 98dec30, jwu ----------------------------------------------------- */ { char etreeFileName[80], mtxFileName[80], *cpt, rhsFileName[80], srcFileName[80], ctemp[81], msgFileName[80], slnFileName[80] ; Chv *chv, *rootchv ; ChvManager *chvmanager ; DenseMtx *mtxB, *mtxQ, *mtxX, *mtxZ ; double one[2] = { 1.0, 0.0 } ; FrontMtx *frontmtx ; InpMtx *mtxA ; SubMtxManager *mtxmanager ; double cputotal, droptol, conv_tol, factorops ; double cpus[9] ; Drand drand ; double nops, tau, t1, t2 ; ETree *frontETree ; Graph *graph ; FILE *msgFile, *inFile ; int error, loc, msglvl, neqns, nzf, iformat, pivotingflag, rc, seed, sparsityflag, symmetryflag, method[METHODS], type, nrhs, etreeflag ; int stats[6] ; int nnzA, Ik, itermax, zversion, iterout ; IV *newToOldIV, *oldToNewIV ; IVL *symbfacIVL ; int i, j, k, m, n, imethod, maxdomainsize, maxzeros, maxsize; int nouter,ninner ; if ( argc != 2 ) { fprintf(stdout, "\n\n usage : %s inFile" "\n inFile -- input filename" "\n", argv[0]) ; return(-1) ; } /* read input file */ inFile = fopen(argv[1], "r"); if (inFile == (FILE *)NULL) { fprintf(stderr, "\n fatal error in %s: unable to open file %s\n", argv[0], argv[1]) ; return(-1) ; } for (i=0; i<METHODS; i++) method[i]=-1; imethod=0; k=0; while (1) { fgets(ctemp, 80, inFile); if (ctemp[0] != '*') { /*printf("l=%2d:%s\n", strlen(ctemp),ctemp);*/ if (strlen(ctemp)==80) { fprintf(stderr, "\n fatal error in %s: input line contains more than " "80 characters.\n",argv[0]); exit(-1); } if (k==0) { sscanf(ctemp, "%d", &iformat); if (iformat < 0 || iformat > 2) { fprintf(stderr, "\n fatal error in %s: " "invalid source matrix format\n",argv[0]) ; return(-1) ; } } else if (k==1) sscanf(ctemp, "%s", srcFileName); else if (k==2) sscanf(ctemp, "%s", mtxFileName); else if (k==3) { sscanf(ctemp, "%d", &etreeflag); if (etreeflag < 0 || etreeflag > 4) { fprintf(stderr, "\n fatal error in %s: " "invalid etree file status\n",argv[0]) ; return(-1) ; } } else if (k==4) sscanf(ctemp, "%s", etreeFileName); else if (k==5) sscanf(ctemp, "%s", rhsFileName); else if (k==6) sscanf(ctemp, "%s", slnFileName); else if (k==7){ sscanf(ctemp, "%s", msgFileName); if ( strcmp(msgFileName, "stdout") == 0 ) { msgFile = stdout ; } else if ( (msgFile = fopen(msgFileName, "a")) == NULL ) { fprintf(stderr, "\n fatal error in %s" "\n unable to open file %s\n", argv[0], ctemp) ; return(-1) ; } } else if (k==8) sscanf(ctemp, "%d %d %d %d %d %d", &msglvl, &seed, &nrhs, &Ik, &itermax, &iterout); else if (k==9) sscanf(ctemp, "%d %d %d", &symmetryflag, &sparsityflag, &pivotingflag); else if (k==10) sscanf(ctemp, "%lf %lf %lf", &tau, &droptol, &conv_tol); else if (k==11) { /* for (j=0; j<strlen(ctemp); j++) { printf("j=%2d:%s",j,ctemp+j); if (ctemp[j] == ' ' && ctemp[j+1] != ' ') { sscanf(ctemp+j, "%d", method+imethod); printf("method[%d]=%d\n",imethod,method[imethod]); if (method[imethod] < 0) break; imethod++; } } */ imethod = sscanf(ctemp,"%d %d %d %d %d %d %d %d %d %d", method, method+1, method+2, method+3, method+4, method+5, method+6, method+7, method+8, method+9); /*printf("imethod=%d\n",imethod);*/ for (j=0; j<imethod; j++) { /*printf("method[%d]=%d\n",j,method[j]);*/ if (method[j]<0) { imethod=j; break; } } if (imethod == 0) { fprintf(msgFile,"No method assigned in input file\n"); return(-1); } } k++; } if (k==12) break; } fclose(inFile); /* reset nrhs to 1 */ if (nrhs > 1) { fprintf(msgFile,"*** Multiple right-hand-side vectors is not allowed yet.\n"); fprintf(msgFile,"*** nrhs is reset to 1.\n"); nrhs =1; } fprintf(msgFile, "\n %s " "\n srcFileName -- %s" "\n mtxFileName -- %s" "\n etreeFileName -- %s" "\n rhsFileName -- %s" "\n msglvl -- %d" "\n seed -- %d" "\n symmetryflag -- %d" "\n sparsityflag -- %d" "\n pivotingflag -- %d" "\n tau -- %e" "\n droptol -- %e" "\n conv_tol -- %e" "\n method -- ", argv[0], srcFileName, mtxFileName, etreeFileName, rhsFileName, msglvl, seed, symmetryflag, sparsityflag, pivotingflag, tau, droptol, conv_tol) ; for (k=0; k<imethod; k++) fprintf(msgFile, "%d ", method[k]); fprintf(msgFile, "\n ", method[k]); fflush(msgFile) ; /* -------------------------------------- initialize the random number generator -------------------------------------- */ Drand_setDefaultFields(&drand) ; Drand_init(&drand) ; Drand_setSeed(&drand, seed) ; /*Drand_setUniform(&drand, 0.0, 1.0) ;*/ Drand_setNormal(&drand, 0.0, 1.0) ; /* ---------------------------------------------- read in or convert source to the InpMtx object ---------------------------------------------- */ rc = 1; if ( strcmp(srcFileName, "none") == 0 ) { fprintf(msgFile, "\n no file to read from") ; exit(-1) ; } mtxA = InpMtx_new() ; MARKTIME(t1) ; if (iformat == 0) { /* InpMtx source format */ rc = InpMtx_readFromFile(mtxA, srcFileName) ; strcpy(mtxFileName, srcFileName); if ( rc != 1 ) fprintf(msgFile, "\n return value %d from InpMtx_readFromFile(%p,%s)", rc, mtxA, srcFileName) ; } else if (iformat == 1) { /* HBF source format */ rc = InpMtx_readFromHBfile(mtxA, srcFileName) ; if ( rc != 1 ) fprintf(msgFile, "\n return value %d from InpMtx_readFromHBfile(%p,%s)", rc, mtxA, srcFileName) ; } else { /* AIJ2 source format */ rc = InpMtx_readFromAIJ2file(mtxA, srcFileName) ; if ( rc != 1 ) fprintf(msgFile, "\n return value %d from InpMtx_readFromAIJ2file(%p,%s)", rc, mtxA, srcFileName) ; } MARKTIME(t2) ; if (iformat>0 && strcmp(mtxFileName, "none") != 0 ) { rc = InpMtx_writeToFile(mtxA, mtxFileName) ; if ( rc != 1 ) fprintf(msgFile, "\n return value %d from InpMtx_writeToFile(%p,%s)", rc, mtxA, mtxFileName) ; } fprintf(msgFile, "\n CPU %8.3f : read in (+ convert to) mtxA from file %s", t2 - t1, mtxFileName) ; if (rc != 1) { goto end_read; } type = mtxA->inputMode ; neqns = 1 + IVmax(mtxA->nent, InpMtx_ivec1(mtxA), &loc) ; if ( INPMTX_IS_BY_ROWS(mtxA) ) { fprintf(msgFile, "\n matrix coordinate type is rows") ; } else if ( INPMTX_IS_BY_COLUMNS(mtxA) ) { fprintf(msgFile, "\n matrix coordinate type is columns") ; } else if ( INPMTX_IS_BY_CHEVRONS(mtxA) ) { fprintf(msgFile, "\n matrix coordinate type is chevrons") ; } else { fprintf(msgFile, "\n\n, error, bad coordinate type") ; rc=-1; goto end_read; } if ( INPMTX_IS_RAW_DATA(mtxA) ) { fprintf(msgFile, "\n matrix storage mode is raw data\n") ; } else if ( INPMTX_IS_SORTED(mtxA) ) { fprintf(msgFile, "\n matrix storage mode is sorted\n") ; } else if ( INPMTX_IS_BY_VECTORS(mtxA) ) { fprintf(msgFile, "\n matrix storage mode is by vectors\n") ; } else { fprintf(msgFile, "\n\n, error, bad storage mode") ; rc=-1; goto end_read; } if ( msglvl > 1 ) { fprintf(msgFile, "\n\n after reading InpMtx object from file %s", mtxFileName) ; if ( msglvl == 2 ) { InpMtx_writeStats(mtxA, msgFile) ; } else { InpMtx_writeForHumanEye(mtxA, msgFile) ; } fflush(msgFile) ; } /* Get the nonzeros in matrix A and print it */ nnzA = InpMtx_nent( mtxA ); fprintf(msgFile, "\n\n Input matrix size %d NNZ %d", neqns, nnzA) ; /* -------------------------------------------------------- generate the linear system 1. generate solution matrix and fill with random numbers 2. generate rhs matrix and fill with zeros 3. compute matrix-matrix multiply -------------------------------------------------------- */ MARKTIME(t1) ; mtxX = DenseMtx_new() ; DenseMtx_init(mtxX, type, 0, -1, neqns, nrhs, 1, neqns) ; mtxB = DenseMtx_new() ; if (strcmp(rhsFileName, "none")) { rc = DenseMtx_readFromFile(mtxB, rhsFileName) ; if ( rc != 1 ) fprintf(msgFile, "\n return value %d from DenseMtx_readFromFile(%p,%s)", rc, mtxB, rhsFileName) ; DenseMtx_zero(mtxX) ; } else { DenseMtx_init(mtxB, type, 1, -1, neqns, nrhs, 1, neqns) ; DenseMtx_fillRandomEntries(mtxX, &drand) ; DenseMtx_zero(mtxB) ; switch ( symmetryflag ) { case SPOOLES_SYMMETRIC : InpMtx_sym_mmm(mtxA, mtxB, one, mtxX) ; break ; case SPOOLES_HERMITIAN : InpMtx_herm_mmm(mtxA, mtxB, one, mtxX) ; break ; case SPOOLES_NONSYMMETRIC : InpMtx_nonsym_mmm(mtxA, mtxB, one, mtxX) ; break ; default : break ; } } MARKTIME(t2) ; fprintf(msgFile, "\n CPU %8.3f : set up the solution and rhs ", t2 - t1) ; if ( msglvl > 2 ) { fprintf(msgFile, "\n\n original mtxX") ; DenseMtx_writeForHumanEye(mtxX, msgFile) ; fprintf(msgFile, "\n\n original mtxB") ; DenseMtx_writeForHumanEye(mtxB, msgFile) ; fflush(msgFile) ; } if (rc != 1) { InpMtx_free(mtxA); DenseMtx_free(mtxX); DenseMtx_free(mtxB); goto end_init; } /* ------------------------ read in/create the ETree object ------------------------ */ MARKTIME(t1) ; if (etreeflag == 0) { /* read in ETree from file */ if ( strcmp(etreeFileName, "none") == 0 ) fprintf(msgFile, "\n no file to read from") ; frontETree = ETree_new() ; rc = ETree_readFromFile(frontETree, etreeFileName) ; if (rc!=1) fprintf(msgFile, "\n return value %d from ETree_readFromFile(%p,%s)", rc, frontETree, etreeFileName) ; } else { graph = Graph_new() ; rc = InpMtx_createGraph(mtxA, graph); if (rc!=1) { fprintf(msgFile, "\n return value %d from InpMtx_createGraph(%p,%p)", rc, mtxA, graph) ; Graph_free(graph); goto end_tree; } if (etreeflag == 1) { /* Via BestOfNDandMS */ maxdomainsize = 500; maxzeros = 1000; maxsize = 64 ; frontETree = orderViaBestOfNDandMS(graph, maxdomainsize, maxzeros, maxsize, seed, msglvl, msgFile) ; } else if (etreeflag == 2) { /* Via MMD */ frontETree = orderViaMMD(graph, seed, msglvl, msgFile) ; } else if (etreeflag == 3) { /* Via MS */ maxdomainsize = 500; frontETree = orderViaMS(graph, maxdomainsize, seed, msglvl, msgFile) ; } else if (etreeflag == 4) { /* Via ND */ maxdomainsize = 500; frontETree = orderViaND(graph, maxdomainsize, seed, msglvl, msgFile) ; } Graph_free(graph); /* optionally write out the ETree object */ if ( strcmp(etreeFileName, "none") != 0 ) { fprintf(msgFile, "\n\n writing out ETree to file %s", etreeFileName) ; ETree_writeToFile(frontETree, etreeFileName) ; } } MARKTIME(t2) ; fprintf(msgFile, "\n CPU %8.3f : read in/create frontETree from file %s", t2 - t1, etreeFileName) ; if ( rc != 1 ) { ETree_free(frontETree); goto end_tree; } ETree_leftJustify(frontETree) ; if ( msglvl > 1 ) { fprintf(msgFile, "\n\n after reading ETree object from file %s", etreeFileName) ; if ( msglvl == 2 ) { ETree_writeStats(frontETree, msgFile) ; } else { ETree_writeForHumanEye(frontETree, msgFile) ; } } fflush(msgFile) ; /* -------------------------------------------------- get the permutations, permute the matrix and the front tree, and compute the symbolic factorization -------------------------------------------------- */ MARKTIME(t1) ; oldToNewIV = ETree_oldToNewVtxPerm(frontETree) ; newToOldIV = ETree_newToOldVtxPerm(frontETree) ; MARKTIME(t2) ; fprintf(msgFile, "\n CPU %8.3f : get permutations", t2 - t1) ; MARKTIME(t1) ; ETree_permuteVertices(frontETree, oldToNewIV) ; MARKTIME(t2) ; fprintf(msgFile, "\n CPU %8.3f : permute front tree", t2 - t1) ; MARKTIME(t1) ; InpMtx_permute(mtxA, IV_entries(oldToNewIV), IV_entries(oldToNewIV)) ; MARKTIME(t2) ; fprintf(msgFile, "\n CPU %8.3f : permute mtxA", t2 - t1) ; if ( symmetryflag == SPOOLES_SYMMETRIC || symmetryflag == SPOOLES_HERMITIAN ) { MARKTIME(t1) ; InpMtx_mapToUpperTriangle(mtxA) ; MARKTIME(t2) ; fprintf(msgFile, "\n CPU %8.3f : map to upper triangle", t2 - t1) ; } if ( ! INPMTX_IS_BY_CHEVRONS(mtxA) ) { MARKTIME(t1) ; InpMtx_changeCoordType(mtxA, INPMTX_BY_CHEVRONS) ; MARKTIME(t2) ; fprintf(msgFile, "\n CPU %8.3f : change coordinate type", t2 - t1) ; } if ( INPMTX_IS_RAW_DATA(mtxA) ) { MARKTIME(t1) ; InpMtx_changeStorageMode(mtxA, INPMTX_SORTED) ; MARKTIME(t2) ; fprintf(msgFile, "\n CPU %8.3f : sort entries ", t2 - t1) ; } if ( INPMTX_IS_SORTED(mtxA) ) { MARKTIME(t1) ; InpMtx_changeStorageMode(mtxA, INPMTX_BY_VECTORS) ; MARKTIME(t2) ; fprintf(msgFile, "\n CPU %8.3f : convert to vectors ", t2 - t1) ; } MARKTIME(t1) ; symbfacIVL = SymbFac_initFromInpMtx(frontETree, mtxA) ; MARKTIME(t2) ; fprintf(msgFile, "\n CPU %8.3f : symbolic factorization", t2 - t1) ; MARKTIME(t1) ; DenseMtx_permuteRows(mtxB, oldToNewIV) ; MARKTIME(t2) ; fprintf(msgFile, "\n CPU %8.3f : permute rhs", t2 - t1) ; /* ------------------------------ initialize the FrontMtx object ------------------------------ */ MARKTIME(t1) ; frontmtx = FrontMtx_new() ; mtxmanager = SubMtxManager_new() ; SubMtxManager_init(mtxmanager, NO_LOCK, 0) ; FrontMtx_init(frontmtx, frontETree, symbfacIVL, type, symmetryflag, sparsityflag, pivotingflag, NO_LOCK, 0, NULL, mtxmanager, msglvl, msgFile) ; MARKTIME(t2) ; fprintf(msgFile, "\n\n CPU %8.3f : initialize the front matrix", t2 - t1) ; if ( msglvl > 1 ) { fprintf(msgFile, "\n nendD = %d, nentL = %d, nentU = %d", frontmtx->nentD, frontmtx->nentL, frontmtx->nentU) ; SubMtxManager_writeForHumanEye(mtxmanager, msgFile) ; } if ( msglvl > 2 ) { fprintf(msgFile, "\n front matrix initialized") ; FrontMtx_writeForHumanEye(frontmtx, msgFile) ; fflush(msgFile) ; } /* ----------------- factor the matrix ----------------- */ nzf = ETree_nFactorEntries(frontETree, symmetryflag) ; factorops = ETree_nFactorOps(frontETree, type, symmetryflag) ; fprintf(msgFile, "\n %d factor entries, %.0f factor ops, %8.3f ratio", nzf, factorops, factorops/nzf) ; IVzero(6, stats) ; DVzero(9, cpus) ; chvmanager = ChvManager_new() ; ChvManager_init(chvmanager, NO_LOCK, 1) ; MARKTIME(t1) ; rootchv = FrontMtx_factorInpMtx(frontmtx, mtxA, tau, droptol, chvmanager, &error, cpus, stats, msglvl, msgFile) ; MARKTIME(t2) ; fprintf(msgFile, "\n\n CPU %8.3f : factor matrix, %8.3f mflops", t2 - t1, 1.e-6*factorops/(t2-t1)) ; if ( rootchv != NULL ) { fprintf(msgFile, "\n\n factorization did not complete") ; for ( chv = rootchv ; chv != NULL ; chv = chv->next ) { fprintf(stdout, "\n chv %d, nD = %d, nL = %d, nU = %d", chv->id, chv->nD, chv->nL, chv->nU) ; } } if ( error >= 0 ) { fprintf(msgFile, "\n\n error encountered at front %d\n", error) ; rc=error ; goto end_front; } fprintf(msgFile, "\n %8d pivots, %8d pivot tests, %8d delayed rows and columns", stats[0], stats[1], stats[2]) ; if ( frontmtx->rowadjIVL != NULL ) { fprintf(msgFile, "\n %d entries in rowadjIVL", frontmtx->rowadjIVL->tsize) ; } if ( frontmtx->coladjIVL != NULL ) { fprintf(msgFile, ", %d entries in coladjIVL", frontmtx->coladjIVL->tsize) ; } if ( frontmtx->upperblockIVL != NULL ) { fprintf(msgFile, "\n %d fronts, %d entries in upperblockIVL", frontmtx->nfront, frontmtx->upperblockIVL->tsize) ; } if ( frontmtx->lowerblockIVL != NULL ) { fprintf(msgFile, ", %d entries in lowerblockIVL", frontmtx->lowerblockIVL->tsize) ; } fprintf(msgFile, "\n %d entries in D, %d entries in L, %d entries in U", stats[3], stats[4], stats[5]) ; fprintf(msgFile, "\n %d locks", frontmtx->nlocks) ; if ( FRONTMTX_IS_SYMMETRIC(frontmtx) || FRONTMTX_IS_HERMITIAN(frontmtx) ) { int nneg, npos, nzero ; FrontMtx_inertia(frontmtx, &nneg, &nzero, &npos) ; fprintf(msgFile, "\n %d negative, %d zero and %d positive eigenvalues", nneg, nzero, npos) ; fflush(msgFile) ; } cputotal = cpus[8] ; if ( cputotal > 0.0 ) { fprintf(msgFile, "\n initialize fronts %8.3f %6.2f" "\n load original entries %8.3f %6.2f" "\n update fronts %8.3f %6.2f" "\n assemble postponed data %8.3f %6.2f" "\n factor fronts %8.3f %6.2f" "\n extract postponed data %8.3f %6.2f" "\n store factor entries %8.3f %6.2f" "\n miscellaneous %8.3f %6.2f" "\n total time %8.3f", cpus[0], 100.*cpus[0]/cputotal, cpus[1], 100.*cpus[1]/cputotal, cpus[2], 100.*cpus[2]/cputotal, cpus[3], 100.*cpus[3]/cputotal, cpus[4], 100.*cpus[4]/cputotal, cpus[5], 100.*cpus[5]/cputotal, cpus[6], 100.*cpus[6]/cputotal, cpus[7], 100.*cpus[7]/cputotal, cputotal) ; } if ( msglvl > 1 ) { SubMtxManager_writeForHumanEye(mtxmanager, msgFile) ; ChvManager_writeForHumanEye(chvmanager, msgFile) ; } if ( msglvl > 2 ) { fprintf(msgFile, "\n\n front factor matrix") ; FrontMtx_writeForHumanEye(frontmtx, msgFile) ; } /* ------------------------------ post-process the factor matrix ------------------------------ */ MARKTIME(t1) ; FrontMtx_postProcess(frontmtx, msglvl, msgFile) ; MARKTIME(t2) ; fprintf(msgFile, "\n\n CPU %8.3f : post-process the matrix", t2 - t1) ; if ( msglvl > 2 ) { fprintf(msgFile, "\n\n front factor matrix after post-processing") ; FrontMtx_writeForHumanEye(frontmtx, msgFile) ; } fprintf(msgFile, "\n\n after post-processing") ; if ( msglvl > 1 ) SubMtxManager_writeForHumanEye(frontmtx->manager, msgFile) ; /* ---------------- solve the system ---------------- */ neqns = mtxB->nrow ; mtxZ = DenseMtx_new() ; DenseMtx_init(mtxZ, type, 0, 0, neqns, nrhs, 1, neqns) ; zversion=INPMTX_IS_COMPLEX_ENTRIES(mtxA); for (k=0; k<imethod; k++) { DenseMtx_zero(mtxZ) ; if ( msglvl > 2 ) { fprintf(msgFile, "\n\n rhs") ; DenseMtx_writeForHumanEye(mtxB, msgFile) ; fflush(stdout) ; } fprintf(msgFile, "\n\n itemax %d", itermax) ; DVzero(6, cpus) ; MARKTIME(t1) ; switch ( method[k] ) { case BiCGStabR : if (zversion) rc=zbicgstabr(neqns, type, symmetryflag, mtxA, frontmtx, mtxZ, mtxB, itermax, conv_tol, msglvl, msgFile); else rc=bicgstabr(neqns, type, symmetryflag, mtxA, frontmtx, mtxZ, mtxB, itermax, conv_tol, msglvl, msgFile); break; case BiCGStabL : if (zversion) rc=zbicgstabl(neqns, type, symmetryflag, mtxA, frontmtx, mtxZ, mtxB, itermax, conv_tol, msglvl, msgFile); else rc=bicgstabl(neqns, type, symmetryflag, mtxA, frontmtx, mtxZ, mtxB, itermax, conv_tol, msglvl, msgFile); break; case TFQMRR : if (zversion) rc=ztfqmrr(neqns, type, symmetryflag, mtxA, frontmtx, mtxZ, mtxB, itermax, conv_tol, msglvl, msgFile); else rc=tfqmrr(neqns, type, symmetryflag, mtxA, frontmtx, mtxZ, mtxB, itermax, conv_tol, msglvl, msgFile); break; case TFQMRL : if (zversion) rc=ztfqmrl(neqns, type, symmetryflag, mtxA, frontmtx, mtxZ, mtxB, itermax, conv_tol, msglvl, msgFile); else rc=tfqmrl(neqns, type, symmetryflag, mtxA, frontmtx, mtxZ, mtxB, itermax, conv_tol, msglvl, msgFile); break; case PCGR : if (zversion) rc=zpcgr(neqns, type, symmetryflag, mtxA, frontmtx, mtxZ, mtxB, itermax, conv_tol, msglvl, msgFile); else rc=pcgr(neqns, type, symmetryflag, mtxA, frontmtx, mtxZ, mtxB, itermax, conv_tol, msglvl, msgFile); break; case PCGL : if (zversion) rc=zpcgl(neqns, type, symmetryflag, mtxA, frontmtx, mtxZ, mtxB, itermax, conv_tol, msglvl, msgFile); else rc=pcgl(neqns, type, symmetryflag, mtxA, frontmtx, mtxZ, mtxB, itermax, conv_tol, msglvl, msgFile); break; case MLBiCGStabR : mtxQ = DenseMtx_new() ; DenseMtx_init(mtxQ, type, 0, -1, neqns, Ik, 1, neqns) ; Drand_setUniform(&drand, 0.0, 1.0) ; DenseMtx_fillRandomEntries(mtxQ, &drand) ; if (zversion) rc=zmlbicgstabr(neqns, type, symmetryflag, mtxA, frontmtx, mtxQ, mtxZ, mtxB, itermax, conv_tol, msglvl, msgFile); else rc=mlbicgstabr(neqns, type, symmetryflag, mtxA, frontmtx, mtxQ, mtxZ, mtxB, itermax, conv_tol, msglvl, msgFile); DenseMtx_free(mtxQ) ; break; case MLBiCGStabL : mtxQ = DenseMtx_new() ; DenseMtx_init(mtxQ, type, 0, -1, neqns, Ik, 1, neqns) ; Drand_setUniform(&drand, 0.0, 1.0) ; DenseMtx_fillRandomEntries(mtxQ, &drand) ; if (zversion) rc=zmlbicgstabl(neqns, type, symmetryflag, mtxA, frontmtx, mtxQ, mtxZ, mtxB, itermax, conv_tol, msglvl, msgFile); else rc=mlbicgstabl(neqns, type, symmetryflag, mtxA, frontmtx, mtxQ, mtxZ, mtxB, itermax, conv_tol, msglvl, msgFile); DenseMtx_free(mtxQ) ; break; case BGMRESR: if (zversion) fprintf(msgFile, "\n\n *** BGMRESR complex version is not available " "at this moment. ") ; else rc=bgmresr(neqns, type, symmetryflag, mtxA, frontmtx, mtxZ, mtxB, iterout, itermax, &nouter, &ninner, conv_tol, msglvl, msgFile); break; case BGMRESL: if (zversion) fprintf(msgFile, "\n\n *** BGMRESR complex version is not available " "at this moment. ") ; else rc=bgmresl(neqns, type, symmetryflag, mtxA, frontmtx, mtxZ, mtxB, iterout, itermax, &nouter, &ninner, conv_tol, msglvl, msgFile); break; default: fprintf(msgFile, "\n\n *** Invalid method number ") ; } MARKTIME(t2) ; fprintf(msgFile, "\n\n CPU %8.3f : solve the system", t2 - t1) ; if ( msglvl > 2 ) { fprintf(msgFile, "\n\n computed solution") ; DenseMtx_writeForHumanEye(mtxZ, msgFile) ; fflush(stdout) ; } /* ------------------------------------------------------------- permute the computed solution back into the original ordering ------------------------------------------------------------- */ MARKTIME(t1) ; DenseMtx_permuteRows(mtxZ, newToOldIV) ; MARKTIME(t2) ; fprintf(msgFile, "\n CPU %8.3f : permute solution", t2 - t1) ; if ( msglvl > 2 ) { fprintf(msgFile, "\n\n permuted solution") ; DenseMtx_writeForHumanEye(mtxZ, msgFile) ; fflush(stdout) ; } /* ------------- save solution ------------- */ if ( strcmp(slnFileName, "none") != 0 ) { DenseMtx_writeToFile(mtxZ, slnFileName) ; } /* ----------------- compute the error ----------------- */ if (!strcmp(rhsFileName, "none")) { DenseMtx_sub(mtxZ, mtxX) ; if (method[k] <8) { mtxQ = DenseMtx_new() ; DenseMtx_init(mtxQ, type, 0, -1, neqns, 1, 1, neqns) ; rc=DenseMtx_initAsSubmatrix (mtxQ, mtxZ, 0, neqns-1, 0, 0); fprintf(msgFile, "\n\n maxabs error = %12.4e", DenseMtx_maxabs(mtxQ)) ; DenseMtx_free(mtxQ) ; } else fprintf(msgFile, "\n\n maxabs error = %12.4e", DenseMtx_maxabs(mtxZ)) ; if ( msglvl > 1 ) { fprintf(msgFile, "\n\n error") ; DenseMtx_writeForHumanEye(mtxZ, msgFile) ; fflush(stdout) ; } if ( msglvl > 1 ) SubMtxManager_writeForHumanEye(frontmtx->manager, msgFile) ; } fprintf(msgFile, "\n--------- End of Method %d -------\n",method[k]) ; } /* ------------------------ free the working storage ------------------------ */ DenseMtx_free(mtxZ) ; end_front: ChvManager_free(chvmanager) ; SubMtxManager_free(mtxmanager) ; FrontMtx_free(frontmtx) ; IVL_free(symbfacIVL) ; IV_free(oldToNewIV) ; IV_free(newToOldIV) ; end_tree: ETree_free(frontETree) ; end_init: DenseMtx_free(mtxB) ; DenseMtx_free(mtxX) ; end_read: InpMtx_free(mtxA) ; fprintf(msgFile, "\n") ; fclose(msgFile) ; return(rc) ; }
/* -------------------------------------------------------------------- purpose -- to setup two data structures for a QR serial or multithreaded factorization rowsIVL[J] -- list of rows of A to be assembled into front J firstnz[irow] -- column with location of leading nonzero of row in A created -- 98may29, cca -------------------------------------------------------------------- */ void FrontMtx_QR_setup ( FrontMtx *frontmtx, InpMtx *mtxA, IVL **prowsIVL, int **pfirstnz, int msglvl, FILE *msgFile ) { int count, irow, jcol, J, loc, neqns, nfront, nrowA, rowsize ; int *firstnz, *head, *link, *list, *rowind, *vtxToFront ; IVL *rowsIVL ; /* --------------- check the input --------------- */ if ( frontmtx == NULL || mtxA == NULL || prowsIVL == NULL || pfirstnz == NULL || (msglvl > 0 && msgFile == NULL) ) { fprintf(stderr, "\n fatal error in FrontMtx_QR_setup()" "\n bad input\n") ; exit(-1) ; } neqns = FrontMtx_neqns(frontmtx) ; nfront = FrontMtx_nfront(frontmtx) ; vtxToFront = ETree_vtxToFront(frontmtx->frontETree) ; /* ---------------------------------------------------------------- create the rowsIVL object, list(J) = list of rows that are assembled in front J firstnz[irowA] = first column with nonzero element in A(irowA,*) ---------------------------------------------------------------- */ InpMtx_changeCoordType(mtxA, INPMTX_BY_ROWS) ; InpMtx_changeStorageMode(mtxA, INPMTX_BY_VECTORS) ; nrowA = 1 + IVmax(InpMtx_nent(mtxA), InpMtx_ivec1(mtxA), &loc) ; if ( msglvl > 3 ) { fprintf(msgFile, "\n nrowA = %d ", nrowA) ; fflush(msgFile) ; } firstnz = IVinit(nrowA, -1) ; head = IVinit(nfront, -1) ; link = IVinit(nrowA, -1) ; for ( irow = nrowA - 1 ; irow >= 0 ; irow-- ) { InpMtx_vector(mtxA, irow, &rowsize, &rowind) ; if ( rowsize > 0 ) { firstnz[irow] = jcol = rowind[0] ; J = vtxToFront[jcol] ; link[irow] = head[J] ; head[J] = irow ; } } rowsIVL = IVL_new() ; IVL_init2(rowsIVL, IVL_CHUNKED, nfront, nrowA) ; list = IVinit(neqns, -1) ; for ( J = 0 ; J < nfront ; J++ ) { count = 0 ; for ( irow = head[J] ; irow != -1 ; irow = link[irow] ) { list[count++] = irow ; } if ( count > 0 ) { IVL_setList(rowsIVL, J, count, list) ; } } IVfree(head) ; IVfree(link) ; IVfree(list) ; /* --------------------------- set the pointers for return --------------------------- */ *prowsIVL = rowsIVL ; *pfirstnz = firstnz ; return ; }
/* ------------------------------------------------ given a permutation and a vector to map vertices into compressed vertices, create and return a permutation object for the compressed vertices. created -- 96may02, cca ------------------------------------------------ */ Perm * Perm_compress ( Perm *perm, IV *eqmapIV ) { int n, N, v, vcomp, vnew ; int *eqmap, *head, *link, *newToOld, *oldToNew, *vals ; Perm *perm2 ; /* --------------- check the input --------------- */ if ( perm == NULL || (n = perm->size) <= 0 || eqmapIV == NULL || n != IV_size(eqmapIV) || (eqmap = IV_entries(eqmapIV)) == NULL ) { fprintf(stderr, "\n fatal error in Perm_compress(%p,%p)" "\n bad input\n", perm, eqmapIV) ; if ( perm != NULL ) { Perm_writeStats(perm, stderr) ; } if ( eqmapIV != NULL ) { IV_writeStats(eqmapIV, stderr) ; } spoolesFatal(); } n = perm->size ; if ( (oldToNew = perm->oldToNew) == NULL ) { Perm_fillOldToNew(perm) ; oldToNew = perm->oldToNew ; } if ( (newToOld = perm->newToOld) == NULL ) { Perm_fillNewToOld(perm) ; newToOld = perm->newToOld ; } /* --------------------------------- create the new permutation object --------------------------------- */ N = 1 + IVmax(n, eqmap, &v) ; perm2 = Perm_new() ; Perm_initWithTypeAndSize(perm2, 3, N) ; /* -------------------------------------------- get the head/link structure for the vertices -------------------------------------------- */ head = IVinit(N, -1) ; link = IVinit(n, -1) ; for ( v = 0 ; v < n ; v++ ) { vcomp = eqmap[v] ; link[v] = head[vcomp] ; head[vcomp] = v ; } /* --------------------------- get the two vectors to sort --------------------------- */ IVramp(N, perm2->newToOld, 0, 1) ; vals = IVinit(N, -1) ; for ( vcomp = 0 ; vcomp < N ; vcomp++ ) { v = head[vcomp] ; vnew = perm->oldToNew[v] ; for ( v = link[v] ; v != -1 ; v = link[v] ) { if ( vnew > perm->oldToNew[v] ) { vnew = perm->oldToNew[v] ; } } vals[vcomp] = vnew ; } IV2qsortUp(N, vals, perm2->newToOld) ; for ( vcomp = 0 ; vcomp < N ; vcomp++ ) { perm2->oldToNew[perm2->newToOld[vcomp]] = vcomp ; } /* --------------------- free the working data --------------------- */ IVfree(head) ; IVfree(link) ; IVfree(vals) ; return(perm2) ; }