/* ------------------------------------ if the new-to-old vector is present, release it and free its entries created -- 96mar16, cca ------------------------------------ */ void Perm_releaseNewToOld ( Perm *perm ) { int size ; /* --------------- check the input --------------- */ if ( perm == NULL || perm->isPresent < 1 || perm->isPresent > 3 || (size = perm->size) <= 0 ) { fprintf(stderr, "\n fatal error in Perm_fillOldToNew(%p)" "\n bad input\n", perm) ; spoolesFatal(); } switch ( perm->isPresent ) { case 1 : IVfree(perm->newToOld) ; perm->isPresent = 0 ; break ; case 2 : break ; case 3 : IVfree(perm->newToOld) ; perm->isPresent = 1 ; break ; default : break ; } return ; }
/* ---------------------------------------------------------- check that the permutation object does house a permutation return value -- 1 if a true permutation 0 otherwise ---------------------------------------------------------- */ int Perm_checkPerm ( Perm *perm ) { int inew, iold, rc, size ; int *counts, *newToOld, *oldToNew ; /* --------------- check the input --------------- */ if ( perm == NULL || perm->isPresent < 1 || perm->isPresent > 3 || (size = perm->size) <= 0 ) { fprintf(stderr, "\n fatal error in Perm_checkPerm(%p)" "\n bad input\n", perm) ; spoolesFatal(); } rc = 1 ; counts = IVinit(size, 0) ; if ( (newToOld = perm->newToOld) != NULL ) { for ( inew = 0 ; inew < size ; inew++ ) { if ( 0 <= (iold = newToOld[inew]) && iold < size ) { counts[iold]++ ; } else { IVfree(counts) ; return(0) ; } } for ( iold = 0 ; iold < size ; iold++ ) { if ( counts[iold] != 1 ) { IVfree(counts) ; return(0) ; } } } if ( (oldToNew = perm->oldToNew) != NULL ) { IVzero(size, counts) ; for ( iold = 0 ; iold < size ; iold++ ) { if ( 0 <= (inew = oldToNew[iold]) && inew < size ) { counts[inew]++ ; } else { IVfree(counts) ; return(0) ; } } for ( inew = 0 ; inew < size ; inew++ ) { if ( counts[inew] != 1 ) { IVfree(counts) ; return(0) ; } } } IVfree(counts) ; return(rc) ; }
/* -------------------------------- purpose -- clear the data fields created -- 95sep27, cca -------------------------------- */ void Graph_clearData ( Graph *g ) { #if MYTRACE > 0 fprintf(stdout, "\n just inside Graph_clearData(%p)", g) ; fflush(stdout) ; #endif if ( g == NULL ) { fprintf(stderr, "\n fatal error in Graph_clearData(%p)" "\n graph is NULL\n", g) ; spoolesFatal(); } if ( g->adjIVL != NULL ) { IVL_free(g->adjIVL) ; } if ( g->vwghts != NULL ) { IVfree(g->vwghts) ; } if ( g->ewghtIVL != NULL ) { IVL_free(g->ewghtIVL) ; } Graph_setDefaultFields(g) ; #if MYTRACE > 0 fprintf(stdout, "\n leaving Graph_clearData(%p)", g) ; fflush(stdout) ; #endif return ; }
/* --------------------------------------------------------- set the compids[] vector using a global map from vertices to domains and interface nodes. DDmapIV -- IV object that contains the map from vertices to domains and interface nodes created -- 96mar17, cca --------------------------------------------------------- */ void GPart_DDviaProjection ( GPart *gpart, IV *DDmapIV ) { int *compids, *domainMap, *map, *vtxMap ; int dom, domloc, ndom, ndomloc, nvtx, vglob, vloc ; /* --------------- check the input --------------- */ if ( gpart == NULL || DDmapIV == NULL ) { fprintf(stderr, "\n fatal error in GPart_DDviaProjection(%p,%p)" "\n bad input\n", gpart, DDmapIV) ; exit(-1) ; } nvtx = gpart->nvtx ; compids = IV_entries(&gpart->compidsIV) ; /* -------------------------- find the number of domains -------------------------- */ vtxMap = IV_entries(&gpart->vtxMapIV) ; map = IV_entries(DDmapIV) ; ndom = IV_max(DDmapIV) ; /* ------------------------ check for a quick return ------------------------ */ if ( gpart->par == NULL ) { IVcopy(nvtx, compids, map) ; gpart->ncomp = ndom ; return ; } /* ---------------------------------------- fill compids[] with the local domain ids ---------------------------------------- */ domainMap = IVinit(ndom+1, -1) ; ndomloc = 0 ; for ( vloc = 0 ; vloc < nvtx ; vloc++ ) { vglob = vtxMap[vloc] ; if ( (dom = map[vglob]) > 0 ) { if ( (domloc = domainMap[dom]) == -1 ) { domloc = domainMap[dom] = ++ndomloc ; } compids[vloc] = domloc ; } else { compids[vloc] = 0 ; } } gpart->ncomp = ndomloc ; IVfree(domainMap) ; return ; }
/* ------------------------------------ clear the data fields created -- 95oct07, cca modified -- 95dec07, cca memory leak (bkl->regwghts) fixed ------------------------------------ */ void BKL_clearData ( BKL *bkl ) { if ( bkl == NULL ) { fprintf(stderr, "\n fatal error in BKL_clearData(%p)" "\n bad input\n", bkl) ; exit(-1) ; } if ( bkl->colors != NULL ) { IVfree(bkl->colors) ; } if ( bkl->bpg != NULL && bkl->bpg->graph != NULL && bkl->bpg->graph->vwghts == NULL && bkl->regwghts != NULL ) { IVfree(bkl->regwghts) ; } BKL_setDefaultFields(bkl) ; return ; }
/* -------------------------------------------------- clear the data fields, releasing allocated storage created -- 98mar19, cca -------------------------------------------------- */ void SolveMap_clearData ( SolveMap *solvemap ) { /* --------------- check the input --------------- */ if ( solvemap == NULL ) { fprintf(stderr, "\n fatal error in SolveMap_clearData(%p)" "\n bad input\n", solvemap) ; exit(-1) ; } /* ----------------------------------------------- free any storage held in the int vector objects ----------------------------------------------- */ if ( solvemap->owners != NULL ) { IVfree(solvemap->owners) ; } if ( solvemap->rowidsUpper != NULL ) { IVfree(solvemap->rowidsUpper) ; } if ( solvemap->colidsUpper != NULL ) { IVfree(solvemap->colidsUpper) ; } if ( solvemap->mapUpper != NULL ) { IVfree(solvemap->mapUpper) ; } if ( solvemap->rowidsLower != NULL ) { IVfree(solvemap->rowidsLower) ; } if ( solvemap->colidsLower != NULL ) { IVfree(solvemap->colidsLower) ; } if ( solvemap->mapLower != NULL ) { IVfree(solvemap->mapLower) ; } /* ---------------------- set the default fields ---------------------- */ SolveMap_setDefaultFields(solvemap) ; return ; }
/* ------------------------------------- return an IV object filled with the weights of the component's boundaries created -- 96oct21, cca ------------------------------------- */ IV * GPart_bndWeightsIV ( GPart *gpart ) { Graph *graph ; int icomp, ii, ncomp, nvtx, v, vsize, vwght, w ; int *bnd, *compids, *cweights, *mark, *vadj, *vwghts ; IV *bndIV ; /* --------------- check the input --------------- */ if ( gpart == NULL || (graph = gpart->g) == NULL ) { fprintf(stderr, "\n fatal error in GPart_bndWeightsIV(%p)" "\n bad input\n", gpart) ; exit(-1) ; } nvtx = gpart->nvtx ; ncomp = gpart->ncomp ; compids = IV_entries(&gpart->compidsIV) ; cweights = IV_entries(&gpart->cweightsIV) ; vwghts = graph->vwghts ; bndIV = IV_new() ; IV_init(bndIV, 1 + ncomp, NULL) ; IV_fill(bndIV, 0) ; bnd = IV_entries(bndIV) ; mark = IVinit(ncomp+1, -1) ; for ( v = 0 ; v < nvtx ; v++ ) { if ( compids[v] == 0 ) { vwght = (vwghts == NULL) ? 1 : vwghts[v] ; Graph_adjAndSize(graph, v, &vsize, &vadj) ; for ( ii = 0 ; ii < vsize ; ii++ ) { w = vadj[ii] ; if ( (icomp = compids[w]) != 0 && mark[icomp] != v ) { mark[icomp] = v ; bnd[icomp] += vwght ; } } } } IVfree(mark) ; return(bndIV) ; }
/* ----------------------------------------- permute the rows of the matrix A(*,*) = A(index(*),*) this method calls A2_sortRowsUp but does not overwrite the index[] vector created -- 98apr15, cca ----------------------------------------- */ void A2_permuteRows ( A2 *mtx, int nrow, int index[] ) { int *rowids ; /* --------------- check the input --------------- */ if ( mtx == NULL || nrow < 0 || nrow > mtx->n1 || index == NULL ) { fprintf(stderr, "\n fatal error in A2_permuteRows(%p,%d,%p)" "\n bad input\n", mtx, nrow, index) ; exit(-1) ; } rowids = IVinit(nrow, -1) ; IVcopy(nrow, rowids, index) ; A2_sortRowsUp(mtx, nrow, rowids) ; IVfree(rowids) ; return ; }
/* ----------------------------------------- permute the columns of the matrix A(*,*) = A(*,index(*)) this method calls A2_sortColumnsUp but does not overwrite the index[] vector created -- 98apr15, cca ----------------------------------------- */ void A2_permuteColumns ( A2 *mtx, int ncol, int index[] ) { int *colids ; /* --------------- check the input --------------- */ if ( mtx == NULL || ncol < 0 || ncol > mtx->n2 || index == NULL ) { fprintf(stderr, "\n fatal error in A2_permuteColumns(%p,%d,%p)" "\n bad input\n", mtx, ncol, index) ; exit(-1) ; } colids = IVinit(ncol, -1) ; IVcopy(ncol, colids, index) ; A2_sortColumnsUp(mtx, ncol, colids) ; IVfree(colids) ; return ; }
/* ------------------------------ purpose -- to permute a vector y[*] := y[index[*]] created -- 95sep22, cca ------------------------------ */ void IVperm ( int size, int y[], int index[] ) { if ( size > 0 ) { if ( y == NULL || index == NULL ) { fprintf(stderr, "\n fatal error in IVperm, invalid data" "\n size = %d, y = %p, index = %p\n", size, y, index) ; exit(-1) ; } else { int *x ; int i ; x = IVinit2(size) ; IVcopy(size, x, y) ; for ( i = 0 ; i < size ; i++ ) { y[i] = x[index[i]] ; } IVfree(x) ; } } return ; }
/* --------------------------------------------- purpose -- to broadcast a front tree object from one process to all the others created -- 98may21, cca --------------------------------------------- */ ETree * ETree_MPI_Bcast ( ETree *etree, int root, int msglvl, FILE *msgFile, MPI_Comm comm ) { int myid, nvtx, nfront, nint ; int *buffer ; /* ------------- find identity ------------- */ MPI_Comm_rank(comm, &myid) ; if ( myid == root ) { /* -------------------------------------------- this process owns the front tree, allocate a continuous buffer and load the data into it. -------------------------------------------- */ nfront = ETree_nfront(etree) ; nvtx = ETree_nvtx(etree) ; nint = 3 + 5*nfront + nvtx ; buffer = IVinit(nint, -1) ; buffer[0] = nfront ; buffer[1] = nvtx ; buffer[2] = ETree_root(etree) ; IVcopy(nfront, buffer + 3, ETree_par(etree)) ; IVcopy(nfront, buffer + 3 + nfront, ETree_fch(etree)) ; IVcopy(nfront, buffer + 3 + 2*nfront, ETree_sib(etree)) ; IVcopy(nfront, buffer + 3 + 3*nfront, ETree_nodwghts(etree)) ; IVcopy(nfront, buffer + 3 + 4*nfront, ETree_bndwghts(etree)) ; IVcopy(nvtx, buffer + 3 + 5*nfront, ETree_vtxToFront(etree)) ; /* ------------------------------------ send the size of the buffer and then the buffer to the other processors ------------------------------------ */ MPI_Bcast(&nint, 1, MPI_INT, root, comm) ; MPI_Bcast(buffer, nint, MPI_INT, root, comm) ; } else { /* -------------------------------------------- this process will receive the front tree. clear its data, receive the number of int's, then receive the buffer -------------------------------------------- */ if ( etree != NULL ) { ETree_free(etree) ; } MPI_Bcast(&nint, 1, MPI_INT, root, comm) ; buffer = IVinit(nint, -1) ; MPI_Bcast(buffer, nint, MPI_INT, root, comm) ; /* ---------------------------------------- create an ETree object and fill its data ---------------------------------------- */ etree = ETree_new() ; nfront = buffer[0] ; nvtx = buffer[1] ; ETree_init1(etree, nfront, nvtx) ; etree->tree->n = nfront ; etree->tree->root = buffer[2] ; IVcopy(nfront, ETree_par(etree), buffer + 3) ; IVcopy(nfront, ETree_fch(etree), buffer + 3 + nfront) ; IVcopy(nfront, ETree_sib(etree), buffer + 3 + 2*nfront) ; IVcopy(nfront, ETree_nodwghts(etree), buffer + 3 + 3*nfront) ; IVcopy(nfront, ETree_bndwghts(etree), buffer + 3 + 4*nfront) ; IVcopy(nvtx, ETree_vtxToFront(etree), buffer + 3 + 5*nfront) ; } /* --------------- free the buffer --------------- */ IVfree(buffer) ; return(etree) ; }
/* ------------------------------------------------------------- purpose -- the IVL object ivl and IV object ownersIV are both found on each process. the ownersIV object is identical over all the processes, and owners[ii] tells which processes owns list ii of the ivl object. on return from this method, the ivl object is replicated over all the processes. each process sends the lists that it owns to all the other processes. created -- 98apr03, cca ------------------------------------------------------------- */ void IVL_MPI_allgather ( IVL *ivl, IV *ownersIV, int stats[], int msglvl, FILE *msgFile, int firsttag, MPI_Comm comm ) { int count, destination, ii, ilist, incount, jlist, jproc, left, maxcount, myid, nlist, nmylists, notherlists, nowners, nproc, offset, outcount, right, size, source, tag ; int *counts, *inbuffer, *list, *outbuffer, *owners ; MPI_Status status ; /* --------------- check the input --------------- */ if ( ivl == NULL || ownersIV == NULL ) { fprintf(stderr, "\n fatal error in IVL_MPI_allgather()" "\n ivl = %p, ownersIV = %p\n", ivl, ownersIV) ; exit(-1) ; } /* ---------------------------------------------- get id of self, # of processes and # of fronts ---------------------------------------------- */ MPI_Comm_rank(comm, &myid) ; MPI_Comm_size(comm, &nproc) ; nlist = ivl->nlist ; IV_sizeAndEntries(ownersIV, &nowners, &owners) ; if ( msglvl > 1 ) { fprintf(msgFile, "\n\n inside IVL_MPI_allgather()" "\n nproc = %d, myid = %d, nlist = %d, nowners = %d", nproc, myid, nlist, nowners) ; fflush(msgFile) ; } if ( nlist != nowners || owners == NULL ) { fprintf(stderr, "\n fatal error in IVL_MPI_allgather()" "\n nlist = %d, nowners = %d, owners = %p\n", nlist, nowners, owners) ; exit(-1) ; } if ( msglvl > 2 ) { fprintf(msgFile, "\n\n ivl") ; IVL_writeForHumanEye(ivl, msgFile) ; fprintf(msgFile, "\n\n ownersIV") ; IV_writeForHumanEye(ownersIV, msgFile) ; fflush(msgFile) ; } /* ----------------------------------------------- step 1 : determine the size of the message that this process will send to the others ----------------------------------------------- */ for ( ilist = 0, outcount = 1 ; ilist < nlist ; ilist++ ) { if ( owners[ilist] < 0 || owners[ilist] >= nproc ) { fprintf(stderr, "\n owners[%d] = %d", ilist, owners[ilist]) ; exit(-1) ; } if ( owners[ilist] == myid ) { outcount += 2 ; IVL_listAndSize(ivl, ilist, &size, &list) ; outcount += size ; } } if ( msglvl > 2 ) { fprintf(msgFile, "\n\n outcount = %d", outcount) ; fflush(msgFile) ; } /* ---------------------------------------------------- do an all-to-all gather/scatter counts[jproc] = # of int's in the message from jproc ---------------------------------------------------- */ counts = IVinit(nproc, 0) ; counts[myid] = outcount ; MPI_Allgather((void *) &counts[myid], 1, MPI_INT, (void *) counts, 1, MPI_INT, comm) ; if ( msglvl > 1 ) { fprintf(msgFile, "\n\n counts") ; IVfprintf(msgFile, nproc, counts) ; fflush(msgFile) ; } /* ----------------------------- set up the in and out buffers ----------------------------- */ if ( outcount > 0 ) { outbuffer = IVinit(outcount, -1) ; for ( ilist = nmylists = 0, ii = 1 ; ilist < nlist ; ilist++ ) { if ( owners[ilist] == myid ) { nmylists++ ; IVL_listAndSize(ivl, ilist, &size, &list) ; outbuffer[ii++] = ilist ; outbuffer[ii++] = size ; if ( size > 0 ) { IVcopy(size, &outbuffer[ii], list) ; ii += size ; } } } outbuffer[0] = nmylists ; if ( ii != outcount ) { fprintf(stderr, "\n myid = %d, ii = %d, outcount = %d", myid, ii, outcount) ; fprintf(msgFile, "\n myid = %d, ii = %d, outcount = %d", myid, ii, outcount) ; exit(-1) ; } } else { outbuffer = NULL ; } maxcount = IVmax(nproc, counts, &jproc) ; if ( maxcount > 0 ) { inbuffer = IVinit(maxcount, -1) ; } else { inbuffer = NULL ; } if ( msglvl > 2 ) { fprintf(msgFile, "\n outbuffer %p, maxcount %d, inbuffer %p", outbuffer, maxcount, inbuffer) ; fflush(msgFile) ; } /* ------------------------------------- step 2: loop over the other processes send and receive information ------------------------------------- */ for ( offset = 1, tag = firsttag ; offset < nproc ; offset++, tag++ ) { right = (myid + offset) % nproc ; if ( offset <= myid ) { left = myid - offset ; } else { left = nproc + myid - offset ; } if ( outcount > 0 ) { destination = right ; stats[0]++ ; stats[2] += outcount*sizeof(int) ; } else { destination = MPI_PROC_NULL ; } incount = counts[left] ; if ( incount > 0 ) { source = left ; stats[1]++ ; stats[3] += incount*sizeof(int) ; } else { source = MPI_PROC_NULL ; } if ( msglvl > 2 ) { fprintf(msgFile, "\n offset %d, source %d, destination %d", offset, source, destination) ; fflush(msgFile) ; } /* ----------------- do a send/receive ----------------- */ MPI_Sendrecv(outbuffer, outcount, MPI_INT, destination, tag, inbuffer, incount, MPI_INT, source, tag, comm, &status) ; if ( source != MPI_PROC_NULL ) { MPI_Get_count(&status, MPI_INT, &count) ; if ( count != incount ) { fprintf(stderr, "\n 1. fatal error in IVL_MPI_allgather()" "\n proc %d : source = %d, count = %d, incount = %d\n", myid, source, count, incount) ; exit(-1) ; } } /* ---------------------------- set the values in the vector ---------------------------- */ notherlists = inbuffer[0] ; for ( ilist = 0, ii = 1 ; ilist < notherlists ; ilist++ ) { jlist = inbuffer[ii++] ; size = inbuffer[ii++] ; if ( size > 0 ) { IVL_setList(ivl, jlist, size, &inbuffer[ii]) ; ii += size ; } } if ( ii != incount ) { fprintf(msgFile, "\n ii = %d, incount = %d", ii, incount) ; fprintf(stderr, "\n ii = %d, incount = %d", ii, incount) ; exit(-1) ; } if ( msglvl > 2 ) { fprintf(msgFile, "\n after setting values") ; IVL_writeForHumanEye(ivl, msgFile) ; fflush(msgFile) ; } } /* ------------------------ free the working storage ------------------------ */ IVfree(counts) ; if ( outbuffer != NULL ) { IVfree(outbuffer) ; } if ( inbuffer != NULL ) { IVfree(inbuffer) ; } if ( msglvl > 2 ) { fprintf(msgFile, "\n\n leaving IVL_MPI_gatherall()") ; fflush(msgFile) ; } return ; }
/* --------------------------------------------------- purpose -- to read an IVL object from a binary file return value -- 1 if success, 0 if failure created -- 95sep29, cca --------------------------------------------------- */ int IVL_readFromBinaryFile ( IVL *ivl, FILE *fp ) { int nlist, rc, type ; int itemp[3] ; int *sizes ; /* --------------- check the input --------------- */ if ( ivl == NULL || fp == NULL ) { fprintf(stderr, "\n fatal error in IVL_readFromBinaryFile(%p,%p)" "\n bad input\n", ivl, fp) ; return(0) ; } switch ( ivl->type ) { case IVL_CHUNKED : case IVL_SOLO : break ; default : fprintf(stderr, "\n error in IVL_readBinaryFile(%p,%p)" "\n bad type = %d", ivl, fp, ivl->type) ; return(0) ; } /* ------------------------------------------- save the ivl type and clear the data fields ------------------------------------------- */ type = ivl->type ; IVL_clearData(ivl) ; /* ----------------------------------- read in the three scalar parameters type, # of lists, # of indices ----------------------------------- */ if ( (rc = fread((void *) itemp, sizeof(int), 3, fp)) != 3 ) { fprintf(stderr, "\n error in IVL_readFromBinaryFile(%p,%p)" "\n itemp(3) : %d items of %d read\n", ivl, fp, rc, 3) ; return(0) ; } nlist = itemp[1] ; /* -------------------------- read in the sizes[] vector -------------------------- */ sizes = IVinit(nlist, 0) ; if ( (rc = fread((void *) sizes, sizeof(int), nlist, fp)) != nlist ) { fprintf(stderr, "\n error in IVL_readFromBinaryFile(%p,%p)" "\n sizes(%d) : %d items of %d read\n", ivl, fp, nlist, rc, nlist) ; return(0) ; } /* --------------------- initialize the object --------------------- */ IVL_init3(ivl, type, nlist, sizes) ; IVfree(sizes) ; /* ------------------- read in the indices ------------------- */ switch ( type ) { case IVL_SOLO : { int ilist, size ; int *ind ; for ( ilist = 0 ; ilist < nlist ; ilist++ ) { IVL_listAndSize(ivl, ilist, &size, &ind) ; if ( (rc = fread((void *) ind, sizeof(int), size, fp)) != size ) { fprintf(stderr, "\n error in IVL_readFromBinaryFile(%p,%p)" "\n list %d, %d items of %d read\n", ivl, fp, ilist, rc, size) ; return(0) ; } } } break ; case IVL_CHUNKED : { /* -------------------------------------------------------- read in the indices into the contiguous block of storage -------------------------------------------------------- */ if ( (rc = fread((void *) ivl->chunk->base, sizeof(int), ivl->tsize, fp)) != ivl->tsize ) { fprintf(stderr, "\n error in IVL_readFromBinaryFile(%p,%p)" "\n indices(%d) : %d items of %d read\n", ivl, fp, ivl->tsize, rc, ivl->tsize) ; return(0) ; } } break ; } return(1) ; }
/*--------------------------------------------------------------------*/ int main ( int argc, char *argv[] ) /* ------------------------------------------------------------------ generate a random matrix and test a matrix-matrix multiply method. the output is a matlab file to test correctness. created -- 98jan29, cca -------------------------------------------------------------------- */ { DenseMtx *X, *Y, *Y2 ; double alpha[2] ; double alphaImag, alphaReal, t1, t2 ; double *zvec ; Drand *drand ; int col, dataType, ii, msglvl, ncolA, nitem, nops, nrhs, nrowA, nrowX, nrowY, nthread, row, seed, storageMode, symflag, transposeflag ; int *colids, *rowids ; InpMtx *A ; FILE *msgFile ; if ( argc != 15 ) { fprintf(stdout, "\n\n %% usage : %s msglvl msgFile symflag storageMode " "\n %% nrow ncol nent nrhs seed alphaReal alphaImag nthread" "\n %% msglvl -- message level" "\n %% msgFile -- message file" "\n %% dataType -- type of matrix entries" "\n %% 1 -- real" "\n %% 2 -- complex" "\n %% symflag -- symmetry flag" "\n %% 0 -- symmetric" "\n %% 1 -- hermitian" "\n %% 2 -- nonsymmetric" "\n %% storageMode -- storage mode" "\n %% 1 -- by rows" "\n %% 2 -- by columns" "\n %% 3 -- by chevrons, (requires nrow = ncol)" "\n %% transpose -- transpose flag" "\n %% 0 -- Y := Y + alpha * A * X" "\n %% 1 -- Y := Y + alpha * A^H * X, nonsymmetric only" "\n %% 2 -- Y := Y + alpha * A^T * X, nonsymmetric only" "\n %% nrowA -- number of rows in A" "\n %% ncolA -- number of columns in A" "\n %% nitem -- number of items" "\n %% nrhs -- number of right hand sides" "\n %% seed -- random number seed" "\n %% alphaReal -- y := y + alpha*A*x" "\n %% alphaImag -- y := y + alpha*A*x" "\n %% nthread -- # of threads" "\n", argv[0]) ; return(0) ; } msglvl = atoi(argv[1]) ; if ( strcmp(argv[2], "stdout") == 0 ) { msgFile = stdout ; } else if ( (msgFile = fopen(argv[2], "a")) == NULL ) { fprintf(stderr, "\n fatal error in %s" "\n unable to open file %s\n", argv[0], argv[2]) ; return(-1) ; } dataType = atoi(argv[3]) ; symflag = atoi(argv[4]) ; storageMode = atoi(argv[5]) ; transposeflag = atoi(argv[6]) ; nrowA = atoi(argv[7]) ; ncolA = atoi(argv[8]) ; nitem = atoi(argv[9]) ; nrhs = atoi(argv[10]) ; seed = atoi(argv[11]) ; alphaReal = atof(argv[12]) ; alphaImag = atof(argv[13]) ; nthread = atoi(argv[14]) ; fprintf(msgFile, "\n %% %s " "\n %% msglvl -- %d" "\n %% msgFile -- %s" "\n %% dataType -- %d" "\n %% symflag -- %d" "\n %% storageMode -- %d" "\n %% transposeflag -- %d" "\n %% nrowA -- %d" "\n %% ncolA -- %d" "\n %% nitem -- %d" "\n %% nrhs -- %d" "\n %% seed -- %d" "\n %% alphaReal -- %e" "\n %% alphaImag -- %e" "\n %% nthread -- %d" "\n", argv[0], msglvl, argv[2], dataType, symflag, storageMode, transposeflag, nrowA, ncolA, nitem, nrhs, seed, alphaReal, alphaImag, nthread) ; fflush(msgFile) ; if ( dataType != 1 && dataType != 2 ) { fprintf(stderr, "\n invalid value %d for dataType\n", dataType) ; spoolesFatal(); } if ( symflag != 0 && symflag != 1 && symflag != 2 ) { fprintf(stderr, "\n invalid value %d for symflag\n", symflag) ; spoolesFatal(); } if ( storageMode != 1 && storageMode != 2 && storageMode != 3 ) { fprintf(stderr, "\n invalid value %d for storageMode\n", storageMode) ; spoolesFatal(); } if ( transposeflag < 0 || transposeflag > 2 ) { fprintf(stderr, "\n error, transposeflag = %d, must be 0, 1 or 2", transposeflag) ; spoolesFatal(); } if ( (transposeflag == 1 && symflag != 2) || (transposeflag == 2 && symflag != 2) ) { fprintf(stderr, "\n error, transposeflag = %d, symflag = %d", transposeflag, symflag) ; spoolesFatal(); } if ( transposeflag == 1 && dataType != 2 ) { fprintf(stderr, "\n error, transposeflag = %d, dataType = %d", transposeflag, dataType) ; spoolesFatal(); } if ( symflag == 1 && dataType != 2 ) { fprintf(stderr, "\n symflag = 1 (hermitian), dataType != 2 (complex)") ; spoolesFatal(); } if ( nrowA <= 0 || ncolA <= 0 || nitem <= 0 ) { fprintf(stderr, "\n invalid value: nrow = %d, ncol = %d, nitem = %d", nrowA, ncolA, nitem) ; spoolesFatal(); } if ( symflag < 2 && nrowA != ncolA ) { fprintf(stderr, "\n invalid data: symflag = %d, nrow = %d, ncol = %d", symflag, nrowA, ncolA) ; spoolesFatal(); } alpha[0] = alphaReal ; alpha[1] = alphaImag ; /* ---------------------------- initialize the matrix object ---------------------------- */ A = InpMtx_new() ; InpMtx_init(A, storageMode, dataType, 0, 0) ; drand = Drand_new() ; /* ---------------------------------- generate a vector of nitem triples ---------------------------------- */ rowids = IVinit(nitem, -1) ; Drand_setUniform(drand, 0, nrowA) ; Drand_fillIvector(drand, nitem, rowids) ; colids = IVinit(nitem, -1) ; Drand_setUniform(drand, 0, ncolA) ; Drand_fillIvector(drand, nitem, colids) ; Drand_setUniform(drand, 0.0, 1.0) ; if ( INPMTX_IS_REAL_ENTRIES(A) ) { zvec = DVinit(nitem, 0.0) ; Drand_fillDvector(drand, nitem, zvec) ; } else if ( INPMTX_IS_COMPLEX_ENTRIES(A) ) { zvec = ZVinit(nitem, 0.0, 0.0) ; Drand_fillDvector(drand, 2*nitem, zvec) ; } /* ----------------------------------- assemble the entries entry by entry ----------------------------------- */ if ( msglvl > 1 ) { fprintf(msgFile, "\n\n A = zeros(%d,%d) ;", nrowA, ncolA) ; } if ( symflag == 1 ) { /* ---------------- hermitian matrix ---------------- */ for ( ii = 0 ; ii < nitem ; ii++ ) { if ( rowids[ii] == colids[ii] ) { zvec[2*ii+1] = 0.0 ; } if ( rowids[ii] <= colids[ii] ) { row = rowids[ii] ; col = colids[ii] ; } else { row = colids[ii] ; col = rowids[ii] ; } InpMtx_inputComplexEntry(A, row, col, zvec[2*ii], zvec[2*ii+1]) ; } } else if ( symflag == 0 ) { /* ---------------- symmetric matrix ---------------- */ if ( INPMTX_IS_REAL_ENTRIES(A) ) { for ( ii = 0 ; ii < nitem ; ii++ ) { if ( rowids[ii] <= colids[ii] ) { row = rowids[ii] ; col = colids[ii] ; } else { row = colids[ii] ; col = rowids[ii] ; } InpMtx_inputRealEntry(A, row, col, zvec[ii]) ; } } else if ( INPMTX_IS_COMPLEX_ENTRIES(A) ) { for ( ii = 0 ; ii < nitem ; ii++ ) { if ( rowids[ii] <= colids[ii] ) { row = rowids[ii] ; col = colids[ii] ; } else { row = colids[ii] ; col = rowids[ii] ; } InpMtx_inputComplexEntry(A, row, col, zvec[2*ii], zvec[2*ii+1]) ; } } } else { /* ------------------- nonsymmetric matrix ------------------- */ if ( INPMTX_IS_REAL_ENTRIES(A) ) { for ( ii = 0 ; ii < nitem ; ii++ ) { InpMtx_inputRealEntry(A, rowids[ii], colids[ii], zvec[ii]) ; } } else if ( INPMTX_IS_COMPLEX_ENTRIES(A) ) { for ( ii = 0 ; ii < nitem ; ii++ ) { InpMtx_inputComplexEntry(A, rowids[ii], colids[ii], zvec[2*ii], zvec[2*ii+1]) ; } } } InpMtx_changeStorageMode(A, INPMTX_BY_VECTORS) ; DVfree(zvec) ; if ( symflag == 0 || symflag == 1 ) { if ( INPMTX_IS_REAL_ENTRIES(A) ) { nops = 4*A->nent*nrhs ; } else if ( INPMTX_IS_COMPLEX_ENTRIES(A) ) { nops = 16*A->nent*nrhs ; } } else { if ( INPMTX_IS_REAL_ENTRIES(A) ) { nops = 2*A->nent*nrhs ; } else if ( INPMTX_IS_COMPLEX_ENTRIES(A) ) { nops = 8*A->nent*nrhs ; } } if ( msglvl > 1 ) { /* ------------------------------------------- write the assembled matrix to a matlab file ------------------------------------------- */ InpMtx_writeForMatlab(A, "A", msgFile) ; if ( symflag == 0 ) { fprintf(msgFile, "\n for k = 1:%d" "\n for j = k+1:%d" "\n A(j,k) = A(k,j) ;" "\n end" "\n end", nrowA, ncolA) ; } else if ( symflag == 1 ) { fprintf(msgFile, "\n for k = 1:%d" "\n for j = k+1:%d" "\n A(j,k) = ctranspose(A(k,j)) ;" "\n end" "\n end", nrowA, ncolA) ; } } /* ------------------------------- generate dense matrices X and Y ------------------------------- */ if ( transposeflag == 0 ) { nrowX = ncolA ; nrowY = nrowA ; } else { nrowX = nrowA ; nrowY = ncolA ; } X = DenseMtx_new() ; Y = DenseMtx_new() ; Y2 = DenseMtx_new() ; if ( INPMTX_IS_REAL_ENTRIES(A) ) { DenseMtx_init(X, SPOOLES_REAL, 0, 0, nrowX, nrhs, 1, nrowX) ; Drand_fillDvector(drand, nrowX*nrhs, DenseMtx_entries(X)) ; DenseMtx_init(Y, SPOOLES_REAL, 0, 0, nrowY, nrhs, 1, nrowY) ; Drand_fillDvector(drand, nrowY*nrhs, DenseMtx_entries(Y)) ; DenseMtx_init(Y2, SPOOLES_REAL, 0, 0, nrowY, nrhs, 1, nrowY) ; DVcopy(nrowY*nrhs, DenseMtx_entries(Y2), DenseMtx_entries(Y)) ; } else if ( INPMTX_IS_COMPLEX_ENTRIES(A) ) { DenseMtx_init(X, SPOOLES_COMPLEX, 0, 0, nrowX, nrhs, 1, nrowX) ; Drand_fillDvector(drand, 2*nrowX*nrhs, DenseMtx_entries(X)) ; DenseMtx_init(Y, SPOOLES_COMPLEX, 0, 0, nrowY, nrhs, 1, nrowY) ; Drand_fillDvector(drand, 2*nrowY*nrhs, DenseMtx_entries(Y)) ; DenseMtx_init(Y2, SPOOLES_COMPLEX, 0, 0, nrowY, nrhs, 1, nrowY) ; DVcopy(2*nrowY*nrhs, DenseMtx_entries(Y2), DenseMtx_entries(Y)) ; } if ( msglvl > 1 ) { fprintf(msgFile, "\n X = zeros(%d,%d) ;", nrowX, nrhs) ; DenseMtx_writeForMatlab(X, "X", msgFile) ; fprintf(msgFile, "\n Y = zeros(%d,%d) ;", nrowY, nrhs) ; DenseMtx_writeForMatlab(Y, "Y", msgFile) ; } /* -------------------------------------------- perform the matrix-matrix multiply in serial -------------------------------------------- */ if ( msglvl > 1 ) { fprintf(msgFile, "\n alpha = %20.12e + %20.2e*i;", alpha[0], alpha[1]); fprintf(msgFile, "\n Z = zeros(%d,1) ;", nrowY) ; } if ( transposeflag == 0 ) { MARKTIME(t1) ; if ( symflag == 0 ) { InpMtx_sym_mmm(A, Y, alpha, X) ; } else if ( symflag == 1 ) { InpMtx_herm_mmm(A, Y, alpha, X) ; } else if ( symflag == 2 ) { InpMtx_nonsym_mmm(A, Y, alpha, X) ; } MARKTIME(t2) ; if ( msglvl > 1 ) { DenseMtx_writeForMatlab(Y, "Z", msgFile) ; fprintf(msgFile, "\n maxerr = max(Z - Y - alpha*A*X) ") ; fprintf(msgFile, "\n") ; } } else if ( transposeflag == 1 ) { MARKTIME(t1) ; InpMtx_nonsym_mmm_H(A, Y, alpha, X) ; MARKTIME(t2) ; if ( msglvl > 1 ) { DenseMtx_writeForMatlab(Y, "Z", msgFile) ; fprintf(msgFile, "\n maxerr = max(Z - Y - alpha*ctranspose(A)*X) ") ; fprintf(msgFile, "\n") ; } } else if ( transposeflag == 2 ) { MARKTIME(t1) ; InpMtx_nonsym_mmm_T(A, Y, alpha, X) ; MARKTIME(t2) ; if ( msglvl > 1 ) { DenseMtx_writeForMatlab(Y, "Z", msgFile) ; fprintf(msgFile, "\n maxerr = max(Z - Y - alpha*transpose(A)*X) ") ; fprintf(msgFile, "\n") ; } } fprintf(msgFile, "\n %% %d ops, %.3f time, %.3f serial mflops", nops, t2 - t1, 1.e-6*nops/(t2 - t1)) ; /* -------------------------------------------------------- perform the matrix-matrix multiply in multithreaded mode -------------------------------------------------------- */ if ( msglvl > 1 ) { fprintf(msgFile, "\n alpha = %20.12e + %20.2e*i;", alpha[0], alpha[1]); fprintf(msgFile, "\n Z = zeros(%d,1) ;", nrowY) ; } if ( transposeflag == 0 ) { MARKTIME(t1) ; if ( symflag == 0 ) { InpMtx_MT_sym_mmm(A, Y2, alpha, X, nthread, msglvl, msgFile) ; } else if ( symflag == 1 ) { InpMtx_MT_herm_mmm(A, Y2, alpha, X, nthread, msglvl, msgFile) ; } else if ( symflag == 2 ) { InpMtx_MT_nonsym_mmm(A, Y2, alpha, X, nthread, msglvl, msgFile) ; } MARKTIME(t2) ; if ( msglvl > 1 ) { DenseMtx_writeForMatlab(Y2, "Z2", msgFile) ; fprintf(msgFile, "\n maxerr2 = max(Z2 - Y - alpha*A*X) ") ; fprintf(msgFile, "\n") ; } } else if ( transposeflag == 1 ) { MARKTIME(t1) ; InpMtx_MT_nonsym_mmm_H(A, Y2, alpha, X, nthread, msglvl, msgFile) ; MARKTIME(t2) ; if ( msglvl > 1 ) { DenseMtx_writeForMatlab(Y2, "Z2", msgFile) ; fprintf(msgFile, "\n maxerr2 = max(Z2 - Y - alpha*ctranspose(A)*X) ") ; fprintf(msgFile, "\n") ; } } else if ( transposeflag == 2 ) { MARKTIME(t1) ; InpMtx_MT_nonsym_mmm_T(A, Y2, alpha, X, nthread, msglvl, msgFile) ; MARKTIME(t2) ; if ( msglvl > 1 ) { DenseMtx_writeForMatlab(Y2, "Z2", msgFile) ; fprintf(msgFile, "\n maxerr2 = max(Z2 - Y - alpha*transpose(A)*X) ") ; fprintf(msgFile, "\n") ; } } fprintf(msgFile, "\n %% %d ops, %.3f time, %.3f MT mflops", nops, t2 - t1, 1.e-6*nops/(t2 - t1)) ; /* ------------------------ free the working storage ------------------------ */ InpMtx_free(A) ; DenseMtx_free(X) ; DenseMtx_free(Y) ; DenseMtx_free(Y2) ; IVfree(rowids) ; IVfree(colids) ; Drand_free(drand) ; fclose(msgFile) ; return(1) ; }
/* ---------------------------------------------------- set the component weights from the compids[] vector created -- 95oct05, cca modified -- 95nov29, cca ---------------------------------------------------- */ void GPart_setCweights ( GPart *gpart ) { Graph *g ; int ierr, ii, last, ncomp, now, nvtx, u, usize, v, w ; int *compids, *cweights, *list, *uadj, *vwghts ; /* -------------- check the data -------------- */ if ( gpart == NULL ) { fprintf(stderr, "\n fatal error in GPart_setCweights(%p)" "\n bad input\n", gpart) ; exit(-1) ; } if ( (nvtx = gpart->nvtx) <= 0 || (g = gpart->g) == NULL ) { fprintf(stderr, "\n fatal error in GPart_setCweights(%p)" "\n bad Gpart object\n", gpart) ; exit(-1) ; } /* ---------------------------------------------------------- set the component id of all non-multisector vertices to -1 ---------------------------------------------------------- */ compids = IV_entries(&gpart->compidsIV) ; for ( v = 0 ; v < nvtx ; v++ ) { if ( compids[v] != 0 ) { compids[v] = -1 ; } } /* ---------------------------------------------------------- compute the number of components and set the component ids ---------------------------------------------------------- */ list = IVinit(nvtx, -1) ; ncomp = 0 ; for ( v = 0 ; v < nvtx ; v++ ) { if ( compids[v] == -1 ) { compids[v] = ++ncomp ; now = last = 0 ; list[now] = v ; while ( now <= last ) { u = list[now++] ; Graph_adjAndSize(g, u, &usize, &uadj) ; for ( ii = 0 ; ii < usize ; ii++ ) { if ( (w = uadj[ii]) < nvtx && compids[w] == -1 ) { compids[w] = ncomp ; list[++last] = w ; } } } } } /* ---------------------------- set the number of components ---------------------------- */ gpart->ncomp = ncomp ; /* ------------------------- set the component weights ------------------------- */ IV_setSize(&gpart->cweightsIV, 1 + ncomp) ; cweights = IV_entries(&gpart->cweightsIV) ; IVzero(1 + ncomp, cweights) ; if ( (vwghts = gpart->g->vwghts) != NULL ) { for ( v = 0 ; v < nvtx ; v++ ) { cweights[compids[v]] += vwghts[v] ; } } else { for ( v = 0 ; v < nvtx ; v++ ) { cweights[compids[v]]++ ; } } /* ------------------------ free the working storage ------------------------ */ IVfree(list) ; return ; }
/* ------------------------------------------------------------ compute an old-to-new ordering for local nested dissection in two dimensions n1 -- number of grid points in first direction n2 -- number of grid points in second direction p1 -- number of domains in first direction p2 -- number of domains in second direction dsizes1 -- domain sizes in first direction, size p1 if NULL, then we construct our own dsizes2 -- domain sizes in second direction, size p2 if NULL, then we construct our own oldToNew -- old-to-new permutation vector note : the following must hold n1 > 0, n2 >0, n1 >= 2*p1 - 1, n2 >= 2*p2 - 1, p2 > 1 sum(dsizes1) = n1 - p1 + 1 and sum(dsizes2) = n2 - p2 + 1 created -- 95nov16, cca ------------------------------------------------------------ */ void localND2D ( int n1, int n2, int p1, int p2, int dsizes1[], int dsizes2[], int oldToNew[] ) { int i, idom, ij, isw, j, jdom, jsw, length1, length2, m, m1, m2, msize, now, nvtx ; int *length1s, *length2s, *isws, *jsws, *temp ; /* --------------- check the input --------------- */ if ( n1 <= 0 || n2 <= 0 || 2*p1 - 1 > n1 || 2*p2 - 1 > n2 || oldToNew == NULL ) { fprintf(stderr, "\n fatal error in localND2D(%d,%d,%d,%d,%p,%p,%p)" "\n bad input\n", n1, n2, p1, p2, dsizes1, dsizes2, oldToNew) ; exit(-1) ; } if ( p2 <= 1 ) { fprintf(stderr, "\n fatal error in localND2D(%d,%d,%d,%d,%p,%p,%p)" "\n p2 = %d, must be > 1", n1, n2, p1, p2, dsizes1, dsizes2, oldToNew, p2) ; exit(-1) ; } if ( dsizes1 != NULL && IVsum(p1, dsizes1) != n1 - p1 + 1 ) { fprintf(stderr, "\n fatal error in localND2D(%d,%d,%d,%d,%p,%p,%p)" "\n IVsum(p1, dsizes1) = %d != %d = n1 - p1 + 1 ", n1, n2, p1, p2, dsizes1, dsizes2, oldToNew, IVsum(p1, dsizes1), n1 - p1 + 1) ; return ; } if ( dsizes1 != NULL && IVmin(p1, dsizes1, &i) <= 0 ) { fprintf(stderr, "\n fatal error in localND2D(%d,%d,%d,%d,%p,%p,%p)" "\n IVmin(p1, dsizes1) = %d must be > 0", n1, n2, p1, p2, dsizes1, dsizes2, oldToNew, IVmin(p1, dsizes1, &i)) ; return ; } if ( dsizes2 != NULL && IVsum(p2, dsizes2) != n2 - p2 + 1 ) { fprintf(stderr, "\n fatal error in localND2D(%d,%d,%d,%d,%p,%p,%p)" "\n IVsum(p2, dsizes2) = %d != %d = n2 - p2 + 1 ", n1, n2, p1, p2, dsizes1, dsizes2, oldToNew, IVsum(p2, dsizes2), n2 - p2 + 1) ; return ; } if ( dsizes2 != NULL && IVmin(p2, dsizes2, &i) <= 0 ) { fprintf(stderr, "\n fatal error in localND2D(%d,%d,%d,%d,%p,%p,%p)" "\n IVmin(p2, dsizes2) = %d must be > 0", n1, n2, p1, p2, dsizes1, dsizes2, oldToNew, IVmin(p2, dsizes2, &i)) ; return ; } nvtx = n1*n2 ; /* ---------------------------------- construct the domain sizes vectors ---------------------------------- */ if ( dsizes1 == NULL ) { length1s = IVinit(p1, 0) ; length1 = (n1 - p1 + 1) / p1 ; m1 = (n1 - p1 + 1) % p1 ; for ( i = 0 ; i < m1 ; i++ ) { length1s[i] = length1 + 1 ; } for ( ; i < p1 ; i++ ) { length1s[i] = length1 ; } } else { length1s = dsizes1 ; } if ( dsizes2 == NULL ) { length2s = IVinit(p2, 0) ; length2 = (n2 - p2 + 1) / p2 ; m2 = (n2 - p2 + 1) % p2 ; for ( i = 0 ; i < m2 ; i++ ) { length2s[i] = length2 + 1 ; } for ( ; i < p2 ; i++ ) { length2s[i] = length2 ; } } else { length2s = dsizes2 ; } #if MYDEBUG > 0 fprintf(stdout, "\n inside localND2D") ; fprintf(stdout, "\n n1 = %d, n2 = %d, p1 = %d, p2 = %d", n1, n2, p1, p2) ; fprintf(stdout, "\n length1s[%d] = ", p1) ; IVfp80(stdout, p1, length1s, 12) ; fprintf(stdout, "\n length2s[%d] = ", p2) ; IVfp80(stdout, p2, length2s, 12) ; #endif /* --------------------------------------- determine the first and last domain ids and the array of southwest points --------------------------------------- */ isws = IVinit(p1, -1) ; for ( idom = 0, isw = 0 ; idom < p1 ; idom++ ) { isws[idom] = isw ; isw += length1s[idom] + 1 ; } jsws = IVinit(p2, -1) ; for ( jdom = 0, jsw = 0 ; jdom < p2 ; jdom++ ) { jsws[jdom] = jsw ; jsw += length2s[jdom] + 1 ; } #if MYDEBUG > 1 fprintf(stdout, "\n isws[%d] = ", p1) ; IVfp80(stdout, p1, isws, 12) ; fprintf(stdout, "\n jsws[%d] = ", p2) ; IVfp80(stdout, p2, jsws, 12) ; #endif /* ---------------------------------------------------------------- create a temporary permutation vector for the domains' orderings ---------------------------------------------------------------- */ msize = IVmax(p1, length1s, &i) * IVmax(p2, length2s, &i) ; temp = IVinit(msize, -1) ; /* ------------------------ fill in the domain nodes ------------------------ */ now = 0 ; for ( jdom = 0; jdom < p2 ; jdom++ ) { jsw = jsws[jdom] ; length2 = length2s[jdom] ; for ( idom = 0 ; idom < p1 ; idom++ ) { length1 = length1s[idom] ; isw = isws[idom] ; mkNDperm(length1, length2, 1, temp, 0, length1-1, 0, length2-1, 0, 0) ; for ( m = 0 ; m < length1*length2 ; m++ ) { ij = temp[m] ; i = isw + (ij % length1) ; j = jsw + (ij / length1) ; ij = i + j*n1 ; oldToNew[ij] = now++ ; } } } #if MYDEBUG > 2 fprintf(stdout, "\n old-to-new after domains are numbered") ; fp2DGrid(n1, n2, oldToNew, stdout) ; #endif /* --------------------------------- fill in the lower separator nodes --------------------------------- */ for ( jdom = 0 ; jdom < (p2/2) ; jdom++ ) { jsw = jsws[jdom] ; length2 = length2s[jdom] ; for ( idom = 0 ; idom < p1 ; idom++ ) { isw = isws[idom] ; length1 = length1s[idom] ; if ( isw > 0 ) { i = isw - 1 ; for ( j = jsw ; j <= jsw + length2 - 1 ; j++ ) { ij = i + j*n1 ; oldToNew[ij] = now++ ; } } if ( isw > 0 && jsw > 0 ) { i = isw - 1 ; j = jsw - 1 ; ij = i + j*n1 ; oldToNew[ij] = now++ ; } if ( jsw > 0 ) { j = jsw - 1 ; for ( i = isw ; i <= isw + length1 - 1 ; i++ ) { ij = i + j*n1 ; oldToNew[ij] = now++ ; } } } } #if MYDEBUG > 2 fprintf(stdout, "\n after the lower separators filled in") ; fp2DGrid(n1, n2, oldToNew, stdout) ; #endif /* --------------------------------- fill in the upper separator nodes --------------------------------- */ for ( jdom = p2 - 1 ; jdom >= (p2/2) ; jdom-- ) { jsw = jsws[jdom] ; length2 = length2s[jdom] ; for ( idom = p1 - 1 ; idom >= 0 ; idom-- ) { isw = isws[idom] ; length1 = length1s[idom] ; if ( isw + length1 < n1 ) { i = isw + length1 ; for ( j = jsw ; j <= jsw + length2 - 1 ; j++ ) { ij = i + j*n1 ; oldToNew[ij] = now++ ; } } if ( isw + length1 < n1 && jsw + length2 < n2 ) { i = isw + length1 ; j = jsw + length2 ; ij = i + j*n1 ; oldToNew[ij] = now++ ; } if ( jsw + length2 < n2 ) { j = jsw + length2 ; for ( i = isw ; i <= isw + length1 - 1 ; i++ ) { ij = i + j*n1 ; oldToNew[ij] = now++ ; } } } } #if MYDEBUG > 2 fprintf(stdout, "\n after the upper separators filled in") ; fp2DGrid(n1, n2, oldToNew, stdout) ; #endif /* ------------------------------- fill in the top level separator ------------------------------- */ m1 = p2 / 2 ; for ( jdom = 0, j = 0 ; jdom < m1 ; jdom++ ) { j += length2s[jdom] + 1 ; } j-- ; for ( i = 0 ; i < n1 ; i++ ) { ij = i + j*n1 ; oldToNew[ij] = now++ ; } /* ------------------------ free the working storage ------------------------ */ if ( dsizes1 == NULL ) { IVfree(length1s) ; } if ( dsizes2 == NULL ) { IVfree(length2s) ; } IVfree(isws) ; IVfree(jsws) ; IVfree(temp) ; return ; }
/* ---------------------------------------------- sort the rows of the matrix in ascending order of the rowids[] vector. on return, rowids is in asending order. return value is the number of row swaps made. created -- 98apr15, cca ---------------------------------------------- */ int A2_sortRowsUp ( A2 *mtx, int nrow, int rowids[] ) { int ii, minrow, minrowid, nswap, target ; /* --------------- check the input --------------- */ if ( mtx == NULL || mtx->n1 < nrow || nrow < 0 || rowids == NULL ) { fprintf(stderr, "\n fatal error in A2_sortRowsUp(%p,%d,%p)" "\n bad input\n", mtx, nrow, rowids) ; if ( mtx != NULL ) { A2_writeStats(mtx, stderr) ; } exit(-1) ; } if ( ! (A2_IS_REAL(mtx) || A2_IS_COMPLEX(mtx)) ) { fprintf(stderr, "\n fatal error in A2_sortRowsUp(%p,%d,%p)" "\n bad type %d, must be SPOOLES_REAL or SPOOLES_COMPLEX\n", mtx, nrow, rowids, mtx->type) ; exit(-1) ; } nswap = 0 ; if ( mtx->inc1 == 1 ) { double *dvtmp ; int jcol, ncol ; int *ivtmp ; /* --------------------------------------------------- matrix is stored by columns, so permute each column --------------------------------------------------- */ ivtmp = IVinit(nrow, -1) ; if ( A2_IS_REAL(mtx) ) { dvtmp = DVinit(nrow, 0.0) ; } else if ( A2_IS_COMPLEX(mtx) ) { dvtmp = DVinit(2*nrow, 0.0) ; } IVramp(nrow, ivtmp, 0, 1) ; IV2qsortUp(nrow, rowids, ivtmp) ; ncol = mtx->n2 ; for ( jcol = 0 ; jcol < ncol ; jcol++ ) { if ( A2_IS_REAL(mtx) ) { DVcopy(nrow, dvtmp, A2_column(mtx, jcol)) ; DVgather(nrow, A2_column(mtx, jcol), dvtmp, ivtmp) ; } else if ( A2_IS_COMPLEX(mtx) ) { ZVcopy(nrow, dvtmp, A2_column(mtx, jcol)) ; ZVgather(nrow, A2_column(mtx, jcol), dvtmp, ivtmp) ; } } IVfree(ivtmp) ; DVfree(dvtmp) ; } else { /* ---------------------------------------- use a simple insertion sort to swap rows ---------------------------------------- */ for ( target = 0 ; target < nrow ; target++ ) { minrow = target ; minrowid = rowids[target] ; for ( ii = target + 1 ; ii < nrow ; ii++ ) { if ( minrowid > rowids[ii] ) { minrow = ii ; minrowid = rowids[ii] ; } } if ( minrow != target ) { rowids[minrow] = rowids[target] ; rowids[target] = minrowid ; A2_swapRows(mtx, target, minrow) ; nswap++ ; } } } return(nswap) ; }
/* ------------------------------------------------- sort the columns of the matrix in ascending order of the colids[] vector. on return, colids is in asending order. return value is the number of column swaps made. created -- 98apr15, cca ------------------------------------------------- */ int A2_sortColumnsUp ( A2 *mtx, int ncol, int colids[] ) { int ii, mincol, mincolid, nswap, target ; /* --------------- check the input --------------- */ if ( mtx == NULL || mtx->n2 < ncol || ncol < 0 || colids == NULL ) { fprintf(stderr, "\n fatal error in A2_sortColumnsUp(%p,%d,%p)" "\n bad input\n", mtx, ncol, colids) ; if ( mtx != NULL ) { A2_writeStats(mtx, stderr) ; } exit(-1) ; } if ( ! (A2_IS_REAL(mtx) || A2_IS_COMPLEX(mtx)) ) { fprintf(stderr, "\n fatal error in A2_sortColumnsUp(%p,%d,%p)" "\n bad type %d, must be SPOOLES_REAL or SPOOLES_COMPLEX\n", mtx, ncol, colids, mtx->type) ; exit(-1) ; } nswap = 0 ; if ( mtx->inc2 == 1 ) { double *dvtmp ; int irow, nrow ; int *ivtmp ; /* --------------------------------------------------- matrix is stored by rows, so permute each row --------------------------------------------------- */ ivtmp = IVinit(ncol, -1) ; if ( A2_IS_REAL(mtx) ) { dvtmp = DVinit(ncol, 0.0) ; } else if ( A2_IS_COMPLEX(mtx) ) { dvtmp = DVinit(2*ncol, 0.0) ; } IVramp(ncol, ivtmp, 0, 1) ; IV2qsortUp(ncol, colids, ivtmp) ; nrow = mtx->n1 ; for ( irow = 0 ; irow < nrow ; irow++ ) { if ( A2_IS_REAL(mtx) ) { DVcopy(ncol, dvtmp, A2_row(mtx, irow)) ; DVgather(ncol, A2_row(mtx, irow), dvtmp, ivtmp) ; } else if ( A2_IS_COMPLEX(mtx) ) { ZVcopy(ncol, dvtmp, A2_row(mtx, irow)) ; ZVgather(ncol, A2_row(mtx, irow), dvtmp, ivtmp) ; } } IVfree(ivtmp) ; DVfree(dvtmp) ; } else { /* ---------------------------------------- use a simple insertion sort to swap cols ---------------------------------------- */ for ( target = 0 ; target < ncol ; target++ ) { mincol = target ; mincolid = colids[target] ; for ( ii = target + 1 ; ii < ncol ; ii++ ) { if ( mincolid > colids[ii] ) { mincol = ii ; mincolid = colids[ii] ; } } if ( mincol != target ) { colids[mincol] = colids[target] ; colids[target] = mincolid ; A2_swapColumns(mtx, target, mincol) ; nswap++ ; } } } return(nswap) ; }
/*--------------------------------------------------------------------*/ int main ( int argc, char *argv[] ) /* ------------------------------------ test the copyEntriesToVector routine created -- 98may01, cca, ------------------------------------ */ { Chv *chvJ, *chvI ; double imag, real, t1, t2 ; double *dvec, *entries ; Drand *drand ; FILE *msgFile ; int count, first, ierr, ii, iilast, ipivot, irow, jcol, jj, jjlast, maxnent, mm, msglvl, ncol, nD, nent, nentD, nentL, nentL11, nentL21, nentU, nentU11, nentU12, nL, npivot, nrow, nU, pivotingflag, seed, storeflag, symflag, total, type ; int *colind, *pivotsizes, *rowind ; if ( argc != 10 ) { fprintf(stdout, "\n\n usage : %s msglvl msgFile nD nU type symflag " "\n pivotingflag storeflag seed" "\n msglvl -- message level" "\n msgFile -- message file" "\n nD -- # of rows and columns in the (1,1) block" "\n nU -- # of columns in the (1,2) block" "\n type -- entries type" "\n 1 --> real" "\n 2 --> complex" "\n symflag -- symmetry flag" "\n 0 --> symmetric" "\n 1 --> nonsymmetric" "\n pivotingflag -- pivoting flag" "\n if symflag = 1 and pivotingflag = 1 then" "\n construct pivotsizes[] vector" "\n endif" "\n storeflag -- flag to denote how to store entries" "\n 0 --> store by rows" "\n 1 --> store by columns" "\n seed -- random number seed" "\n", argv[0]) ; return(0) ; } if ( (msglvl = atoi(argv[1])) < 0 ) { fprintf(stderr, "\n message level must be positive\n") ; exit(-1) ; } if ( strcmp(argv[2], "stdout") == 0 ) { msgFile = stdout ; } else if ( (msgFile = fopen(argv[2], "a")) == NULL ) { fprintf(stderr, "\n unable to open file %s\n", argv[2]) ; return(-1) ; } nD = atoi(argv[3]) ; nU = atoi(argv[4]) ; type = atoi(argv[5]) ; symflag = atoi(argv[6]) ; pivotingflag = atoi(argv[7]) ; storeflag = atoi(argv[8]) ; seed = atoi(argv[9]) ; if ( msglvl > 0 ) { switch ( storeflag ) { case 0 : fprintf(msgFile, "\n\n %% STORE BY ROWS") ; break ; case 1 : fprintf(msgFile, "\n\n %% STORE BY COLUMNS") ; break ; default : fprintf(stderr, "\n bad value %d for storeflag", storeflag) ; break ; } } nL = nU ; if ( symflag == SPOOLES_NONSYMMETRIC ) { pivotingflag = 0 ; } /* -------------------------------------- initialize the random number generator -------------------------------------- */ drand = Drand_new() ; Drand_init(drand) ; Drand_setNormal(drand, 0.0, 1.0) ; Drand_setSeed(drand, seed) ; /* -------------------------- initialize the chvJ object -------------------------- */ MARKTIME(t1) ; chvJ = Chv_new() ; Chv_init(chvJ, 0, nD, nL, nU, type, symflag) ; MARKTIME(t2) ; fprintf(msgFile, "\n %% CPU : %.3f to initialize matrix objects", t2 - t1) ; nent = Chv_nent(chvJ) ; entries = Chv_entries(chvJ) ; if ( CHV_IS_REAL(chvJ) ) { Drand_fillDvector(drand, nent, entries) ; } else if ( CHV_IS_COMPLEX(chvJ) ) { Drand_fillDvector(drand, 2*nent, entries) ; } Chv_columnIndices(chvJ, &ncol, &colind) ; IVramp(ncol, colind, 0, 1) ; if ( CHV_IS_NONSYMMETRIC(chvJ) ) { Chv_rowIndices(chvJ, &nrow, &rowind) ; IVramp(nrow, rowind, 0, 1) ; } if ( msglvl > 3 ) { fprintf(msgFile, "\n %% chevron a") ; Chv_writeForMatlab(chvJ, "a", msgFile) ; fflush(msgFile) ; } /* -------------------------- initialize the chvI object -------------------------- */ MARKTIME(t1) ; chvI = Chv_new() ; Chv_init(chvI, 0, nD, nL, nU, type, symflag) ; MARKTIME(t2) ; fprintf(msgFile, "\n %% CPU : %.3f to initialize matrix objects", t2 - t1) ; Chv_zero(chvI) ; Chv_columnIndices(chvI, &ncol, &colind) ; IVramp(ncol, colind, 0, 1) ; if ( CHV_IS_NONSYMMETRIC(chvI) ) { Chv_rowIndices(chvI, &nrow, &rowind) ; IVramp(nrow, rowind, 0, 1) ; } if ( symflag == 0 && pivotingflag == 1 ) { /* ------------------------------ create the pivotsizes[] vector ------------------------------ */ Drand_setUniform(drand, 1, 2.999) ; pivotsizes = IVinit(nD, 0) ; Drand_fillIvector(drand, nD, pivotsizes) ; /* fprintf(msgFile, "\n initial pivotsizes[] : ") ; IVfp80(msgFile, nD, pivotsizes, 80, &ierr) ; */ for ( npivot = count = 0 ; npivot < nD ; npivot++ ) { count += pivotsizes[npivot] ; if ( count > nD ) { pivotsizes[npivot]-- ; count-- ; } if ( count == nD ) { break ; } } npivot++ ; /* fprintf(msgFile, "\n final pivotsizes[] : ") ; IVfp80(msgFile, npivot, pivotsizes, 80, &ierr) ; */ } else { npivot = 0 ; pivotsizes = NULL ; } /* -------------------------------------------------- first test: copy lower, diagonal and upper entries -------------------------------------------------- */ if ( CHV_IS_NONSYMMETRIC(chvJ) ) { nentL = Chv_countEntries(chvJ, npivot, pivotsizes, CHV_STRICT_LOWER); } else { nentL = 0 ; } nentD = Chv_countEntries(chvJ, npivot, pivotsizes, CHV_DIAGONAL) ; nentU = Chv_countEntries(chvJ, npivot, pivotsizes, CHV_STRICT_UPPER) ; maxnent = nentL ; if ( maxnent < nentD ) { maxnent = nentD ; } if ( maxnent < nentU ) { maxnent = nentU ; } if ( CHV_IS_REAL(chvJ) ) { dvec = DVinit(maxnent, 0.0) ; } else if ( CHV_IS_COMPLEX(chvJ) ) { dvec = DVinit(2*maxnent, 0.0) ; } if ( CHV_IS_NONSYMMETRIC(chvJ) ) { /* -------------------------------------- copy the entries in the lower triangle, then move into the chvI object -------------------------------------- */ nent = Chv_copyEntriesToVector(chvJ, npivot, pivotsizes, maxnent, dvec, CHV_STRICT_LOWER, storeflag) ; if ( nent != nentL ) { fprintf(stderr, "\n error: nentL = %d, nent = %d", nentL, nent) ; exit(-1) ; } if ( storeflag == 0 ) { for ( irow = 0, mm = 0 ; irow < nrow ; irow++ ) { jjlast = (irow < nD) ? irow - 1 : nD - 1 ; for ( jj = 0 ; jj <= jjlast ; jj++, mm++ ) { if ( CHV_IS_REAL(chvJ) ) { real = dvec[mm] ; Chv_setRealEntry(chvI, irow, jj, real) ; } else if ( CHV_IS_COMPLEX(chvJ) ) { real = dvec[2*mm] ; imag = dvec[2*mm+1] ; Chv_setComplexEntry(chvI, irow, jj, real, imag) ; } } } } else { for ( jcol = 0, mm = 0 ; jcol < nD ; jcol++ ) { for ( irow = jcol + 1 ; irow < nrow ; irow++, mm++ ) { if ( CHV_IS_REAL(chvJ) ) { real = dvec[mm] ; Chv_setRealEntry(chvI, irow, jcol, real) ; } else if ( CHV_IS_COMPLEX(chvJ) ) { real = dvec[2*mm] ; imag = dvec[2*mm+1] ; /* fprintf(msgFile, "\n %% mm = %d, a(%d,%d) = %20.12e + %20.12e*i", mm, irow, jcol, real, imag) ; */ Chv_setComplexEntry(chvI, irow, jcol, real, imag) ; } } } } } /* --------------------------------------- copy the entries in the diagonal matrix then move into the chvI object --------------------------------------- */ nent = Chv_copyEntriesToVector(chvJ, npivot, pivotsizes, maxnent, dvec, CHV_DIAGONAL, storeflag) ; if ( nent != nentD ) { fprintf(stderr, "\n error: nentD = %d, nent = %d", nentD, nent) ; exit(-1) ; } if ( pivotsizes == NULL ) { for ( jcol = 0, mm = 0 ; jcol < nD ; jcol++, mm++ ) { if ( CHV_IS_REAL(chvJ) ) { real = dvec[mm] ; Chv_setRealEntry(chvI, jcol, jcol, real) ; } else if ( CHV_IS_COMPLEX(chvJ) ) { real = dvec[2*mm] ; imag = dvec[2*mm+1] ; Chv_setComplexEntry(chvI, jcol, jcol, real, imag) ; } } } else { for ( ipivot = irow = mm = 0 ; ipivot < npivot ; ipivot++ ) { if ( pivotsizes[ipivot] == 1 ) { if ( CHV_IS_REAL(chvJ) ) { real = dvec[mm] ; Chv_setRealEntry(chvI, irow, irow, real) ; } else if ( CHV_IS_COMPLEX(chvJ) ) { real = dvec[2*mm] ; imag = dvec[2*mm+1] ; Chv_setComplexEntry(chvI, irow, irow, real, imag) ; } mm++ ; irow++ ; } else { if ( CHV_IS_REAL(chvJ) ) { real = dvec[mm] ; Chv_setRealEntry(chvI, irow, irow, real) ; mm++ ; real = dvec[mm] ; Chv_setRealEntry(chvI, irow, irow+1, real) ; mm++ ; real = dvec[mm] ; Chv_setRealEntry(chvI, irow+1, irow+1, real) ; mm++ ; irow += 2 ; } else if ( CHV_IS_COMPLEX(chvJ) ) { real = dvec[2*mm] ; imag = dvec[2*mm+1] ; Chv_setComplexEntry(chvI, irow, irow, real, imag) ; mm++ ; real = dvec[2*mm] ; imag = dvec[2*mm+1] ; Chv_setComplexEntry(chvI, irow, irow+1, real, imag) ; mm++ ; real = dvec[2*mm] ; imag = dvec[2*mm+1] ; Chv_setComplexEntry(chvI, irow+1, irow+1, real, imag) ; mm++ ; irow += 2 ; } } } } /* -------------------------------------- copy the entries in the upper triangle, then move into the chvI object -------------------------------------- */ nent = Chv_copyEntriesToVector(chvJ, npivot, pivotsizes, maxnent, dvec, CHV_STRICT_UPPER, storeflag) ; if ( nent != nentU ) { fprintf(stderr, "\n error: nentU = %d, nent = %d", nentU, nent) ; exit(-1) ; } if ( storeflag == 1 ) { if ( pivotsizes == NULL ) { for ( jcol = mm = 0 ; jcol < ncol ; jcol++ ) { iilast = (jcol < nD) ? jcol - 1 : nD - 1 ; for ( ii = 0 ; ii <= iilast ; ii++, mm++ ) { if ( CHV_IS_REAL(chvJ) ) { real = dvec[mm] ; Chv_setRealEntry(chvI, ii, jcol, real) ; } else if ( CHV_IS_COMPLEX(chvJ) ) { real = dvec[2*mm] ; imag = dvec[2*mm+1] ; Chv_setComplexEntry(chvI, ii, jcol, real, imag) ; } } } } else { for ( ipivot = jcol = mm = 0 ; ipivot < npivot ; ipivot++ ) { iilast = jcol - 1 ; for ( ii = 0 ; ii <= iilast ; ii++, mm++ ) { if ( CHV_IS_REAL(chvJ) ) { real = dvec[mm] ; Chv_setRealEntry(chvI, ii, jcol, real) ; } else if ( CHV_IS_COMPLEX(chvJ) ) { real = dvec[2*mm] ; imag = dvec[2*mm+1] ; Chv_setComplexEntry(chvI, ii, jcol, real, imag) ; } } jcol++ ; if ( pivotsizes[ipivot] == 2 ) { for ( ii = 0 ; ii <= iilast ; ii++, mm++ ) { if ( CHV_IS_REAL(chvJ) ) { real = dvec[mm] ; Chv_setRealEntry(chvI, ii, jcol, real) ; } else if ( CHV_IS_COMPLEX(chvJ) ) { real = dvec[2*mm] ; imag = dvec[2*mm+1] ; Chv_setComplexEntry(chvI, ii, jcol, real, imag) ; } } jcol++ ; } } for ( jcol = nD ; jcol < ncol ; jcol++ ) { for ( irow = 0 ; irow < nD ; irow++, mm++ ) { if ( CHV_IS_REAL(chvJ) ) { real = dvec[mm] ; Chv_setRealEntry(chvI, irow, jcol, real) ; } else if ( CHV_IS_COMPLEX(chvJ) ) { real = dvec[2*mm] ; imag = dvec[2*mm+1] ; Chv_setComplexEntry(chvI, irow, jcol, real, imag) ; } } } } } else { if ( pivotsizes == NULL ) { for ( irow = mm = 0 ; irow < nD ; irow++ ) { for ( jcol = irow + 1 ; jcol < ncol ; jcol++, mm++ ) { if ( CHV_IS_REAL(chvJ) ) { real = dvec[mm] ; Chv_setRealEntry(chvI, irow, jcol, real) ; } else if ( CHV_IS_COMPLEX(chvJ) ) { real = dvec[2*mm] ; imag = dvec[2*mm+1] ; Chv_setComplexEntry(chvI, irow, jcol, real, imag) ; } } } } else { for ( ipivot = irow = mm = 0 ; ipivot < npivot ; ipivot++ ) { if ( pivotsizes[ipivot] == 1 ) { for ( jcol = irow + 1 ; jcol < ncol ; jcol++, mm++ ) { if ( CHV_IS_REAL(chvJ) ) { real = dvec[mm] ; Chv_setRealEntry(chvI, irow, jcol, real) ; } else if ( CHV_IS_COMPLEX(chvJ) ) { real = dvec[2*mm] ; imag = dvec[2*mm+1] ; Chv_setComplexEntry(chvI, irow, jcol, real, imag) ; } } irow++ ; } else { for ( jcol = irow + 2 ; jcol < ncol ; jcol++, mm++ ) { if ( CHV_IS_REAL(chvJ) ) { real = dvec[mm] ; Chv_setRealEntry(chvI, irow, jcol, real) ; } else if ( CHV_IS_COMPLEX(chvJ) ) { real = dvec[2*mm] ; imag = dvec[2*mm+1] ; Chv_setComplexEntry(chvI, irow, jcol, real, imag) ; } } for ( jcol = irow + 2 ; jcol < ncol ; jcol++, mm++ ) { if ( CHV_IS_REAL(chvJ) ) { real = dvec[mm] ; Chv_setRealEntry(chvI, irow+1, jcol, real) ; } else if ( CHV_IS_COMPLEX(chvJ) ) { real = dvec[2*mm] ; imag = dvec[2*mm+1] ; Chv_setComplexEntry(chvI, irow+1, jcol, real, imag) ; } } irow += 2 ; } } } } if ( msglvl > 3 ) { fprintf(msgFile, "\n %% chevron b") ; Chv_writeForMatlab(chvI, "b", msgFile) ; fprintf(msgFile, "\n\n emtx1 = abs(a - b) ; enorm1 = max(max(emtx1))") ; fflush(msgFile) ; } DVfree(dvec) ; /* ----------------------------------------------------- second test: copy lower (1,1), lower (2,1), diagonal, upper(1,1) and upper(1,2) blocks ----------------------------------------------------- */ if ( CHV_IS_NONSYMMETRIC(chvJ) ) { nentL11 = Chv_countEntries(chvJ, npivot, pivotsizes, CHV_STRICT_LOWER_11) ; nentL21 = Chv_countEntries(chvJ, npivot, pivotsizes, CHV_LOWER_21) ; } else { nentL11 = 0 ; nentL21 = 0 ; } nentD = Chv_countEntries(chvJ, npivot, pivotsizes, CHV_DIAGONAL) ; nentU11 = Chv_countEntries(chvJ, npivot, pivotsizes, CHV_STRICT_UPPER_11) ; nentU12 = Chv_countEntries(chvJ, npivot, pivotsizes, CHV_UPPER_12) ; maxnent = nentL11 ; if ( maxnent < nentL21 ) { maxnent = nentL21 ; } if ( maxnent < nentD ) { maxnent = nentD ; } if ( maxnent < nentU11 ) { maxnent = nentU11 ; } if ( maxnent < nentU12 ) { maxnent = nentU12 ; } fprintf(msgFile, "\n %% nentL11 = %d, nentL21 = %d" "\n %% nentD = %d, nentU11 = %d, nentU12 = %d", nentL11, nentL21, nentD, nentU11, nentU12) ; if ( CHV_IS_REAL(chvJ) ) { dvec = DVinit(maxnent, 0.0) ; } else if ( CHV_IS_COMPLEX(chvJ) ) { dvec = DVinit(2*maxnent, 0.0) ; } Chv_zero(chvI) ; if ( CHV_IS_NONSYMMETRIC(chvJ) ) { /* ------------------------------------------ copy the entries in the lower (1,1) block, then move into the chvI object ------------------------------------------ */ nent = Chv_copyEntriesToVector(chvJ, npivot, pivotsizes, maxnent, dvec, CHV_STRICT_LOWER_11, storeflag) ; if ( nent != nentL11 ) { fprintf(stderr, "\n error: nentL = %d, nent = %d", nentL, nent) ; exit(-1) ; } if ( storeflag == 0 ) { for ( irow = 0, mm = 0 ; irow < nD ; irow++ ) { jjlast = (irow < nD) ? irow - 1 : nD - 1 ; for ( jj = 0 ; jj <= jjlast ; jj++, mm++ ) { if ( CHV_IS_REAL(chvJ) ) { real = dvec[mm] ; Chv_setRealEntry(chvI, irow, jj, real) ; } else if ( CHV_IS_COMPLEX(chvJ) ) { real = dvec[2*mm] ; imag = dvec[2*mm+1] ; Chv_setComplexEntry(chvI, irow, jj, real, imag) ; } } } } else { for ( jcol = 0, mm = 0 ; jcol < nD ; jcol++ ) { for ( irow = jcol + 1 ; irow < nD ; irow++, mm++ ) { if ( CHV_IS_REAL(chvJ) ) { real = dvec[mm] ; Chv_setRealEntry(chvI, irow, jcol, real) ; } else if ( CHV_IS_COMPLEX(chvJ) ) { real = dvec[2*mm] ; imag = dvec[2*mm+1] ; Chv_setComplexEntry(chvI, irow, jcol, real, imag) ; } } } } /* ------------------------------------------ copy the entries in the lower (2,1) block, then move into the chvI object ------------------------------------------ */ nent = Chv_copyEntriesToVector(chvJ, npivot, pivotsizes, maxnent, dvec, CHV_LOWER_21, storeflag); if ( nent != nentL21 ) { fprintf(stderr, "\n error: nentL21 = %d, nent = %d", nentL21, nent) ; exit(-1) ; } if ( storeflag == 0 ) { for ( irow = nD, mm = 0 ; irow < nrow ; irow++ ) { for ( jcol = 0 ; jcol < nD ; jcol++, mm++ ) { if ( CHV_IS_REAL(chvJ) ) { real = dvec[mm] ; Chv_setRealEntry(chvI, irow, jcol, real) ; } else if ( CHV_IS_COMPLEX(chvJ) ) { real = dvec[2*mm] ; imag = dvec[2*mm+1] ; Chv_setComplexEntry(chvI, irow, jcol, real, imag) ; } } } } else { for ( jcol = 0, mm = 0 ; jcol < nD ; jcol++ ) { for ( irow = nD ; irow < nrow ; irow++, mm++ ) { if ( CHV_IS_REAL(chvJ) ) { real = dvec[mm] ; Chv_setRealEntry(chvI, irow, jcol, real) ; } else if ( CHV_IS_COMPLEX(chvJ) ) { real = dvec[2*mm] ; imag = dvec[2*mm+1] ; Chv_setComplexEntry(chvI, irow, jcol, real, imag) ; } } } } } /* --------------------------------------- copy the entries in the diagonal matrix then move into the chvI object --------------------------------------- */ nent = Chv_copyEntriesToVector(chvJ, npivot, pivotsizes, maxnent, dvec, CHV_DIAGONAL, storeflag) ; if ( nent != nentD ) { fprintf(stderr, "\n error: nentD = %d, nent = %d", nentD, nent) ; exit(-1) ; } if ( pivotsizes == NULL ) { for ( jcol = 0, mm = 0 ; jcol < nD ; jcol++, mm++ ) { if ( CHV_IS_REAL(chvJ) ) { real = dvec[mm] ; Chv_setRealEntry(chvI, jcol, jcol, real) ; } else if ( CHV_IS_COMPLEX(chvJ) ) { real = dvec[2*mm] ; imag = dvec[2*mm+1] ; Chv_setComplexEntry(chvI, jcol, jcol, real, imag) ; } } } else { for ( ipivot = irow = mm = 0 ; ipivot < npivot ; ipivot++ ) { if ( pivotsizes[ipivot] == 1 ) { if ( CHV_IS_REAL(chvJ) ) { real = dvec[mm] ; Chv_setRealEntry(chvI, irow, irow, real) ; } else if ( CHV_IS_COMPLEX(chvJ) ) { real = dvec[2*mm] ; imag = dvec[2*mm+1] ; Chv_setComplexEntry(chvI, irow, irow, real, imag) ; } mm++ ; irow++ ; } else { if ( CHV_IS_REAL(chvJ) ) { real = dvec[mm] ; Chv_setRealEntry(chvI, irow, irow, real) ; mm++ ; real = dvec[mm] ; Chv_setRealEntry(chvI, irow, irow+1, real) ; mm++ ; real = dvec[mm] ; Chv_setRealEntry(chvI, irow+1, irow+1, real) ; mm++ ; } else if ( CHV_IS_COMPLEX(chvJ) ) { real = dvec[2*mm] ; imag = dvec[2*mm+1] ; Chv_setComplexEntry(chvI, irow, irow, real, imag) ; mm++ ; real = dvec[2*mm] ; imag = dvec[2*mm+1] ; Chv_setComplexEntry(chvI, irow, irow+1, real, imag) ; mm++ ; real = dvec[2*mm] ; imag = dvec[2*mm+1] ; Chv_setComplexEntry(chvI, irow+1, irow+1, real, imag) ; mm++ ; } irow += 2 ; } } } /* ----------------------------------------- copy the entries in the upper (1,1) block then move into the chvI object ----------------------------------------- */ nent = Chv_copyEntriesToVector(chvJ, npivot, pivotsizes, maxnent, dvec, CHV_STRICT_UPPER_11, storeflag) ; if ( nent != nentU11 ) { fprintf(stderr, "\n error: nentU11 = %d, nent = %d", nentU11, nent) ; exit(-1) ; } if ( storeflag == 1 ) { if ( pivotsizes == NULL ) { for ( jcol = mm = 0 ; jcol < nD ; jcol++ ) { iilast = (jcol < nD) ? jcol - 1 : nD - 1 ; for ( ii = 0 ; ii <= iilast ; ii++, mm++ ) { if ( CHV_IS_REAL(chvJ) ) { real = dvec[mm] ; Chv_setRealEntry(chvI, ii, jcol, real) ; } else if ( CHV_IS_COMPLEX(chvJ) ) { real = dvec[2*mm] ; imag = dvec[2*mm+1] ; Chv_setComplexEntry(chvI, ii, jcol, real, imag) ; } } } } else { for ( ipivot = jcol = mm = 0 ; ipivot < npivot ; ipivot++ ) { iilast = jcol - 1 ; for ( ii = 0 ; ii <= iilast ; ii++, mm++ ) { if ( CHV_IS_REAL(chvJ) ) { real = dvec[mm] ; Chv_setRealEntry(chvI, ii, jcol, real) ; } else if ( CHV_IS_COMPLEX(chvJ) ) { real = dvec[2*mm] ; imag = dvec[2*mm+1] ; Chv_setComplexEntry(chvI, ii, jcol, real, imag) ; } } jcol++ ; if ( pivotsizes[ipivot] == 2 ) { for ( ii = 0 ; ii <= iilast ; ii++, mm++ ) { if ( CHV_IS_REAL(chvJ) ) { real = dvec[mm] ; Chv_setRealEntry(chvI, ii, jcol, real) ; } else if ( CHV_IS_COMPLEX(chvJ) ) { real = dvec[2*mm] ; imag = dvec[2*mm+1] ; Chv_setComplexEntry(chvI, ii, jcol, real, imag) ; } } jcol++ ; } } } } else { if ( pivotsizes == NULL ) { for ( irow = mm = 0 ; irow < nD ; irow++ ) { for ( jcol = irow + 1 ; jcol < nD ; jcol++, mm++ ) { if ( CHV_IS_REAL(chvJ) ) { real = dvec[mm] ; Chv_setRealEntry(chvI, irow, jcol, real) ; } else if ( CHV_IS_COMPLEX(chvJ) ) { real = dvec[2*mm] ; imag = dvec[2*mm+1] ; Chv_setComplexEntry(chvI, irow, jcol, real, imag) ; } } } } else { for ( ipivot = irow = mm = 0 ; ipivot < npivot ; ipivot++ ) { if ( pivotsizes[ipivot] == 1 ) { for ( jcol = irow + 1 ; jcol < nD ; jcol++, mm++ ) { if ( CHV_IS_REAL(chvJ) ) { real = dvec[mm] ; Chv_setRealEntry(chvI, irow, jcol, real) ; } else if ( CHV_IS_COMPLEX(chvJ) ) { real = dvec[2*mm] ; imag = dvec[2*mm+1] ; Chv_setComplexEntry(chvI, irow, jcol, real, imag) ; } } irow++ ; } else { for ( jcol = irow + 2 ; jcol < nD ; jcol++, mm++ ) { if ( CHV_IS_REAL(chvJ) ) { real = dvec[mm] ; Chv_setRealEntry(chvI, irow, jcol, real) ; } else if ( CHV_IS_COMPLEX(chvJ) ) { real = dvec[2*mm] ; imag = dvec[2*mm+1] ; Chv_setComplexEntry(chvI, irow, jcol, real, imag) ; } } for ( jcol = irow + 2 ; jcol < nD ; jcol++, mm++ ) { if ( CHV_IS_REAL(chvJ) ) { real = dvec[mm] ; Chv_setRealEntry(chvI, irow+1, jcol, real) ; } else if ( CHV_IS_COMPLEX(chvJ) ) { real = dvec[2*mm] ; imag = dvec[2*mm+1] ; Chv_setComplexEntry(chvI, irow+1, jcol, real, imag) ; } } irow += 2 ; } } } } /* ----------------------------------------- copy the entries in the upper (1,2) block then move into the chvI object ----------------------------------------- */ nent = Chv_copyEntriesToVector(chvJ, npivot, pivotsizes, maxnent, dvec, CHV_UPPER_12, storeflag) ; if ( nent != nentU12 ) { fprintf(stderr, "\n error: nentU12 = %d, nent = %d", nentU12, nent) ; exit(-1) ; } if ( storeflag == 1 ) { for ( jcol = nD, mm = 0 ; jcol < ncol ; jcol++ ) { for ( irow = 0 ; irow < nD ; irow++, mm++ ) { if ( CHV_IS_REAL(chvJ) ) { real = dvec[mm] ; Chv_setRealEntry(chvI, irow, jcol, real) ; } else if ( CHV_IS_COMPLEX(chvJ) ) { real = dvec[2*mm] ; imag = dvec[2*mm+1] ; Chv_setComplexEntry(chvI, irow, jcol, real, imag) ; } } } } else { for ( irow = mm = 0 ; irow < nD ; irow++ ) { for ( jcol = nD ; jcol < ncol ; jcol++, mm++ ) { if ( CHV_IS_REAL(chvJ) ) { real = dvec[mm] ; Chv_setRealEntry(chvI, irow, jcol, real) ; } else if ( CHV_IS_COMPLEX(chvJ) ) { real = dvec[2*mm] ; imag = dvec[2*mm+1] ; Chv_setComplexEntry(chvI, irow, jcol, real, imag) ; } } } } if ( msglvl > 3 ) { fprintf(msgFile, "\n %% chevron b") ; Chv_writeForMatlab(chvI, "b", msgFile) ; fprintf(msgFile, "\n\n emtx2 = abs(a - b) ; enorm2 = max(max(emtx2))") ; fprintf(msgFile, "\n\n [ enorm1 enorm2]") ; fflush(msgFile) ; } /* ------------------------ free the working storage ------------------------ */ if ( pivotsizes != NULL ) { IVfree(pivotsizes) ; } Chv_free(chvJ) ; Chv_free(chvI) ; Drand_free(drand) ; DVfree(dvec) ; fprintf(msgFile, "\n") ; return(1) ; }
/*--------------------------------------------------------------------*/ int main ( int argc, char *argv[] ) /* --------------------------------------------------------------- read BPG from file and get the Dulmage-Mendelsohn decomposition created -- 96mar08, cca --------------------------------------------------------------- */ { char *inBPGFileName ; double t1, t2 ; int ierr, msglvl, rc ; int *dmflags, *stats ; BPG *bpg ; FILE *msgFile ; if ( argc != 4 ) { fprintf(stdout, "\n\n usage : %s msglvl msgFile inFile " "\n msglvl -- message level" "\n msgFile -- message file" "\n inFile -- input file, must be *.bpgf or *.bpgb" "\n", argv[0]) ; return(0) ; } msglvl = atoi(argv[1]) ; if ( strcmp(argv[2], "stdout") == 0 ) { msgFile = stdout ; } else if ( (msgFile = fopen(argv[2], "a")) == NULL ) { fprintf(stderr, "\n fatal error in %s" "\n unable to open file %s\n", argv[0], argv[2]) ; return(-1) ; } inBPGFileName = argv[3] ; fprintf(msgFile, "\n %s " "\n msglvl -- %d" "\n msgFile -- %s" "\n inFile -- %s" "\n", argv[0], msglvl, argv[2], inBPGFileName) ; fflush(msgFile) ; /* ---------------------- read in the BPG object ---------------------- */ if ( strcmp(inBPGFileName, "none") == 0 ) { fprintf(msgFile, "\n no file to read from") ; exit(0) ; } bpg = BPG_new() ; MARKTIME(t1) ; rc = BPG_readFromFile(bpg, inBPGFileName) ; MARKTIME(t2) ; fprintf(msgFile, "\n CPU %9.5f : read in graph from file %s", t2 - t1, inBPGFileName) ; if ( rc != 1 ) { fprintf(msgFile, "\n return value %d from BPG_readFromFile(%p,%s)", rc, bpg, inBPGFileName) ; exit(-1) ; } fprintf(msgFile, "\n\n after reading BPG object from file %s", inBPGFileName) ; if ( msglvl > 2 ) { BPG_writeForHumanEye(bpg, msgFile) ; } else { BPG_writeStats(bpg, msgFile) ; } fflush(msgFile) ; /* -------------------------------------------- test out the max flow DMdecomposition method -------------------------------------------- */ dmflags = IVinit(bpg->nX + bpg->nY, -1) ; stats = IVinit(6, 0) ; MARKTIME(t1) ; BPG_DMviaMaxFlow(bpg, dmflags, stats, msglvl, msgFile) ; MARKTIME(t2) ; fprintf(msgFile, "\n\n CPU %9.5f : find DM via maxflow", t2 - t1) ; if ( msglvl > 0 ) { fprintf(msgFile, "\n\n BPG_DMviaMaxFlow" "\n |X_I| = %6d, |X_E| = %6d, |X_R| = %6d" "\n |Y_I| = %6d, |Y_E| = %6d, |Y_R| = %6d", stats[0], stats[1], stats[2], stats[3], stats[4], stats[5]) ; } if ( msglvl > 1 ) { fprintf(msgFile, "\n dmflags") ; IVfp80(msgFile, bpg->nX + bpg->nY, dmflags, 80, &ierr) ; fflush(msgFile) ; } /* ------------------------------------------ test out the matching DMcomposition method ------------------------------------------ */ IVfill(bpg->nX + bpg->nY, dmflags, -1) ; IVfill(6, stats, -1) ; MARKTIME(t1) ; BPG_DMdecomposition(bpg, dmflags, stats, msglvl, msgFile) ; MARKTIME(t2) ; fprintf(msgFile, "\n\n CPU %9.5f : find DM via matching", t2 - t1) ; if ( msglvl > 0 ) { fprintf(msgFile, "\n\n BPG_DMdecomposition" "\n |X_I| = %6d, |X_E| = %6d, |X_R| = %6d" "\n |Y_I| = %6d, |Y_E| = %6d, |Y_R| = %6d", stats[0], stats[1], stats[2], stats[3], stats[4], stats[5]) ; } if ( msglvl > 1 ) { fprintf(msgFile, "\n dmflags") ; IVfp80(msgFile, bpg->nX + bpg->nY, dmflags, 80, &ierr) ; fflush(msgFile) ; } /* ---------------- free the storage ---------------- */ IVfree(dmflags) ; IVfree(stats) ; BPG_free(bpg) ; fprintf(msgFile, "\n") ; fclose(msgFile) ; return(1) ; }
/* --------------------------------------------------- purpose -- move the solution from the individual SubMtx objects into the global solution SubMtx object created -- 98feb20 --------------------------------------------------- */ void FrontMtx_storeSolution ( FrontMtx *frontmtx, int owners[], int myid, SubMtxManager *manager, SubMtx *p_mtx[], DenseMtx *solmtx, int msglvl, FILE *msgFile ) { char localsol ; SubMtx *xmtxJ ; double *sol, *xJ ; int inc1, inc2, irow, jrhs, J, kk, ncolJ, neqns, nfront, nJ, nrhs, nrowInSol, nrowJ ; int *colindJ, *colmap, *rowind ; if ( (nrowInSol = solmtx->nrow) != (neqns = frontmtx->neqns) ) { /* -------------------------------------------------------------- the solution matrix is only part of the total solution matrix. (this happens in an MPI environment where the rhs is partitioned among the processors.) create a map from the global row indices to the indices local to this solution matrix. -------------------------------------------------------------- */ colmap = IVinit(neqns, -1) ; rowind = solmtx->rowind ; if ( msglvl > 1 ) { fprintf(msgFile, "\n solmtx->rowind") ; IVfprintf(msgFile, solmtx->nrow, rowind) ; fflush(msgFile) ; } for ( irow = 0 ; irow < nrowInSol ; irow++ ) { colmap[rowind[irow]] = irow ; } localsol = 'T' ; if ( msglvl > 1 ) { fprintf(msgFile, "\n colmap") ; IVfprintf(msgFile, neqns, colmap) ; fflush(msgFile) ; } } else { localsol = 'F' ; } DenseMtx_dimensions(solmtx, &neqns, &nrhs) ; nfront = FrontMtx_nfront(frontmtx) ; for ( J = 0 ; J < nfront ; J++ ) { if ( (owners == NULL || owners[J] == myid) && (nJ = FrontMtx_frontSize(frontmtx, J)) > 0 ) { FrontMtx_columnIndices(frontmtx, J, &ncolJ, &colindJ) ; xmtxJ = p_mtx[J] ; if ( xmtxJ == NULL ) { fprintf(stderr, "\n fatal error in storeSolution(%d)" "\n thread %d, xmtxJ = NULL", J, myid) ; exit(-1) ; } if ( msglvl > 1 ) { fprintf(msgFile, "\n storing solution for front %d", J) ; SubMtx_writeForHumanEye(xmtxJ, msgFile) ; fflush(msgFile) ; } if ( localsol == 'T' ) { /* ------------------------------------------------------ map the global row indices into the local row indices ------------------------------------------------------ */ if ( msglvl > 1 ) { fprintf(msgFile, "\n global row indices") ; IVfprintf(msgFile, nJ, colindJ) ; fflush(msgFile) ; } for ( irow = 0 ; irow < nJ ; irow++ ) { colindJ[irow] = colmap[colindJ[irow]] ; } if ( msglvl > 1 ) { fprintf(msgFile, "\n local row indices") ; IVfprintf(msgFile, nJ, colindJ) ; fflush(msgFile) ; } } /* ---------------------------------- store x_{J,*} into solution matrix ---------------------------------- */ sol = DenseMtx_entries(solmtx) ; SubMtx_denseInfo(xmtxJ, &nrowJ, &ncolJ, &inc1, &inc2, &xJ) ; if ( FRONTMTX_IS_REAL(frontmtx) ) { for ( jrhs = 0 ; jrhs < nrhs ; jrhs++ ) { for ( irow = 0 ; irow < nJ ; irow++ ) { kk = colindJ[irow] ; sol[kk] = xJ[irow] ; } sol += neqns ; xJ += nJ ; } } else if ( FRONTMTX_IS_COMPLEX(frontmtx) ) { for ( jrhs = 0 ; jrhs < nrhs ; jrhs++ ) { for ( irow = 0 ; irow < nJ ; irow++ ) { kk = colindJ[irow] ; sol[2*kk] = xJ[2*irow] ; sol[2*kk+1] = xJ[2*irow+1] ; } sol += 2*neqns ; xJ += 2*nJ ; } } /* fprintf(msgFile, "\n solution for front %d stored", J) ; */ SubMtxManager_releaseObject(manager, xmtxJ) ; if ( localsol == 'T' ) { /* ----------------------------------------------------------- map the local row indices back into the global row indices ----------------------------------------------------------- */ for ( irow = 0 ; irow < nJ ; irow++ ) { colindJ[irow] = rowind[colindJ[irow]] ; } } } } if ( localsol == 'T' ) { IVfree(colmap) ; } /* fprintf(msgFile, "\n\n SOLUTION") ; DenseMtx_writeForHumanEye(solmtx, msgFile) ; */ return ; }
/* ------------------------------------------------------------ compute an old-to-new ordering for local nested dissection in three dimensions n1 -- number of grid points in first direction n2 -- number of grid points in second direction n3 -- number of grid points in third direction p1 -- number of domains in first direction p2 -- number of domains in second direction p3 -- number of domains in third direction dsizes1 -- domain sizes in first direction, size p1 if NULL, then we construct our own dsizes2 -- domain sizes in second direction, size p2 if NULL, then we construct our own dsizes3 -- domain sizes in third direction, size p3 if NULL, then we construct our own oldToNew -- old-to-new permutation vector note : the following must hold n1 > 0, n2 >0, n3 > 0, n1 >= 2*p1 - 1, n2 >= 2*p2 - 1, n3 >= 2*p3 - 1, p3 > 1 sum(dsizes1) = n1 - p1 + 1, sum(dsizes2) = n2 - p2 + 1 sum(dsizes3) = n3 - p3 + 1 created -- 95nov16, cca ------------------------------------------------------------ */ void localND3D ( int n1, int n2, int n3, int p1, int p2, int p3, int dsizes1[], int dsizes2[], int dsizes3[], int oldToNew[] ) { int i, idom, ijk, isw, j, jdom, jsw, k, kdom, ksw, length1, length2, length3, m, m1, m2, m3, msize, now, nvtx ; int *length1s, *length2s, *length3s, *isws, *jsws, *ksws, *temp ; /* --------------- check the input --------------- */ if ( n1 <= 0 || n2 <= 0 || n3 <= 0 || 2*p1 - 1 > n1 || 2*p2 - 1 > n2 || 2*p3 - 1 > n3 ) { fprintf(stderr, "\n error in input data") ; return ; } if ( p3 <= 1 ) { fprintf(stderr, "\n p3 must be > 1") ; return ; } if ( oldToNew == NULL ) { fprintf(stderr, "\n oldToNew = NULL") ; return ; } if ( dsizes1 != NULL && IVsum(p1, dsizes1) != n1 - p1 + 1 ) { fprintf(stderr, "\n IVsum(p1, dsizes1) != n1 - p1 + 1 ") ; return ; } if ( dsizes2 != NULL && IVsum(p2, dsizes2) != n2 - p2 + 1 ) { fprintf(stderr, "\n IVsum(p2, dsizes2) != n2 - p2 + 1 ") ; return ; } if ( dsizes3 != NULL && IVsum(p3, dsizes3) != n3 - p3 + 1 ) { fprintf(stderr, "\n IVsum(p3, dsizes3) != n3 - p3 + 1 ") ; return ; } if ( dsizes1 != NULL && IVmin(p1, dsizes1, &i) <= 0 ) { fprintf(stderr, "\n IVmin(p1, dsizes1) must be > 0") ; return ; } if ( dsizes2 != NULL && IVmin(p2, dsizes2, &i) <= 0 ) { fprintf(stderr, "\n IVmin(p2, dsizes2) must be > 0") ; return ; } if ( dsizes3 != NULL && IVmin(p3, dsizes3, &i) <= 0 ) { fprintf(stderr, "\n IVmin(p3, dsizes3) must be > 0") ; return ; } nvtx = n1*n2*n3 ; /* ---------------------------------- construct the domain sizes vectors ---------------------------------- */ if ( dsizes1 == NULL ) { length1s = IVinit(p1, 0) ; length1 = (n1 - p1 + 1) / p1 ; m1 = (n1 - p1 + 1) % p1 ; for ( i = 0 ; i < m1 ; i++ ) { length1s[i] = length1 + 1 ; } for ( ; i < p1 ; i++ ) { length1s[i] = length1 ; } } else { length1s = dsizes1 ; } if ( dsizes2 == NULL ) { length2s = IVinit(p2, 0) ; length2 = (n2 - p2 + 1) / p2 ; m2 = (n2 - p2 + 1) % p2 ; for ( i = 0 ; i < m2 ; i++ ) { length2s[i] = length2 + 1 ; } for ( ; i < p2 ; i++ ) { length2s[i] = length2 ; } } else { length2s = dsizes2 ; } if ( dsizes3 == NULL ) { length3s = IVinit(p3, 0) ; length3 = (n3 - p3 + 1) / p3 ; m3 = (n3 - p3 + 1) % p3 ; for ( i = 0 ; i < m3 ; i++ ) { length3s[i] = length3 + 1 ; } for ( ; i < p3 ; i++ ) { length3s[i] = length3 ; } } else { length3s = dsizes3 ; } #if MYDEBUG > 0 fprintf(stdout, "\n inside localND3D") ; fprintf(stdout, "\n n1 = %d, n2 = %d, n3 = %d, p1 = %d, p2 = %dm p3 = %d", n1, n2, n3, p1, p2, p3) ; fprintf(stdout, "\n length1s[%d] = ", p1) ; IVfp80(stdout, p1, length1s, 12) ; fprintf(stdout, "\n length2s[%d] = ", p2) ; IVfp80(stdout, p2, length2s, 12) ; fprintf(stdout, "\n length3s[%d] = ", p3) ; IVfp80(stdout, p3, length3s, 13) ; #endif /* --------------------------------------- determine the first and last domain ids and the array of southwest points --------------------------------------- */ isws = IVinit(p1, -1) ; for ( idom = 0, isw = 0 ; idom < p1 ; idom++ ) { isws[idom] = isw ; isw += length1s[idom] + 1 ; } jsws = IVinit(p2, -1) ; for ( jdom = 0, jsw = 0 ; jdom < p2 ; jdom++ ) { jsws[jdom] = jsw ; jsw += length2s[jdom] + 1 ; } ksws = IVinit(p3, -1) ; for ( kdom = 0, ksw = 0 ; kdom < p3 ; kdom++ ) { ksws[kdom] = ksw ; ksw += length3s[kdom] + 1 ; } #if MYDEBUG > 1 fprintf(stdout, "\n isws[%d] = ", p1) ; IVfp80(stdout, p1, isws, 12) ; fprintf(stdout, "\n jsws[%d] = ", p2) ; IVfp80(stdout, p2, jsws, 12) ; fprintf(stdout, "\n ksws[%d] = ", p3) ; IVfp80(stdout, p3, ksws, 12) ; #endif /* ---------------------------------------------------------------- create a temporary permutation vector for the domains' orderings ---------------------------------------------------------------- */ msize = IVmax(p1, length1s, &i) * IVmax(p2, length2s, &i) * IVmax(p3, length3s, &k) ; temp = IVinit(msize, -1) ; /* ------------------------ fill in the domain nodes ------------------------ */ now = 0 ; for ( kdom = 0 ; kdom < p3 ; kdom++ ) { ksw = ksws[kdom] ; length3 = length3s[kdom] ; for ( jdom = 0 ; jdom < p2 ; jdom++ ) { jsw = jsws[jdom] ; length2 = length2s[jdom] ; for ( idom = 0 ; idom < p1 ; idom++ ) { isw = isws[idom] ; length1 = length1s[idom] ; /* fprintf(stdout, "\n domain (%d,%d,%d), size %d x %d x %d", idom, jdom, kdom, length1, length2, length3) ; fprintf(stdout, "\n (isw, jsw, ksw) = (%d, %d, %d)", isw, jsw, ksw) ; */ mkNDperm(length1, length2, length3, temp, 0, length1-1, 0, length2-1, 0, length3-1) ; for ( m = 0 ; m < length1*length2*length3 ; m++ ) { ijk = temp[m] ; /* fprintf(stdout, "\n m = %d, ijk = %d", m, ijk) ; */ k = ksw + ijk / (length1*length2) ; ijk = ijk % (length1*length2) ; j = jsw + ijk / length1 ; i = isw + ijk % length1 ; /* fprintf(stdout, ", (i, j, k) = (%d, %d, %d)", i, j, k) ; */ ijk = i + j*n1 + k*n1*n2 ; oldToNew[ijk] = now++ ; } } } } #if MYDEBUG > 2 fprintf(stdout, "\n old-to-new after domains are numbered") ; fp3DGrid(n1, n2, n3, oldToNew, stdout) ; #endif /* --------------------------------- fill in the lower separator nodes --------------------------------- */ for ( kdom = 0 ; kdom < (p3/2) ; kdom++ ) { ksw = ksws[kdom] ; length3 = length3s[kdom] ; for ( jdom = 0 ; jdom < p2 ; jdom++ ) { jsw = jsws[jdom] ; length2 = length2s[jdom] ; for ( idom = 0 ; idom < p1 ; idom++ ) { isw = isws[idom] ; length1 = length1s[idom] ; /* ------- 3 faces ------- */ if ( isw > 0 ) { i = isw - 1 ; for ( j = jsw ; j <= jsw + length2 - 1 ; j++ ) { for ( k = ksw ; k <= ksw + length3 - 1 ; k++ ) { ijk = i + j*n1 + k*n1*n2 ; oldToNew[ijk] = now++ ; } } } if ( jsw > 0 ) { j = jsw - 1 ; for ( i = isw ; i <= isw + length1 - 1 ; i++ ) { for ( k = ksw ; k <= ksw + length3 - 1 ; k++ ) { ijk = i + j*n1 + k*n1*n2 ; oldToNew[ijk] = now++ ; } } } if ( ksw > 0 ) { k = ksw - 1 ; for ( j = jsw ; j <= jsw + length2 - 1 ; j++ ) { for ( i = isw ; i <= isw + length1 - 1 ; i++ ) { ijk = i + j*n1 + k*n1*n2 ; oldToNew[ijk] = now++ ; } } } /* ----------- three edges ----------- */ if ( isw > 0 && jsw > 0 ) { i = isw - 1 ; j = jsw - 1 ; for ( k = ksw ; k <= ksw + length3 - 1 ; k++ ) { ijk = i + j*n1 + k*n1*n2 ; oldToNew[ijk] = now++ ; } } if ( isw > 0 && ksw > 0 ) { i = isw - 1 ; k = ksw - 1 ; for ( j = jsw ; j <= jsw + length2 - 1 ; j++ ) { ijk = i + j*n1 + k*n1*n2 ; oldToNew[ijk] = now++ ; } } if ( jsw > 0 && ksw > 0 ) { j = jsw - 1 ; k = ksw - 1 ; for ( i = isw ; i <= isw + length1 - 1 ; i++ ) { ijk = i + j*n1 + k*n1*n2 ; oldToNew[ijk] = now++ ; } } /* ---------------- one corner point ---------------- */ if ( isw > 0 && jsw > 0 && ksw > 0 ) { i = isw - 1 ; j = jsw - 1 ; k = ksw - 1 ; ijk = i + j*n1 + k*n1*n2 ; oldToNew[ijk] = now++ ; } } } } #if MYDEBUG > 2 fprintf(stdout, "\n after the lower separators filled in") ; fp2DGrid(n1, n2, oldToNew, stdout) ; #endif /* --------------------------------- fill in the upper separator nodes --------------------------------- */ for ( kdom = p3 - 1 ; kdom >= (p3/2) ; kdom-- ) { ksw = ksws[kdom] ; length3 = length3s[kdom] ; for ( jdom = p2 - 1 ; jdom >= 0 ; jdom-- ) { jsw = jsws[jdom] ; length2 = length2s[jdom] ; for ( idom = p1 - 1 ; idom >= 0 ; idom-- ) { isw = isws[idom] ; length1 = length1s[idom] ; /* ------- 3 faces ------- */ if ( isw + length1 < n1 ) { i = isw + length1 ; for ( j = jsw ; j <= jsw + length2 - 1 ; j++ ) { for ( k = ksw ; k <= ksw + length3 - 1 ; k++ ) { ijk = i + j*n1 + k*n1*n2 ; oldToNew[ijk] = now++ ; } } } if ( jsw + length2 < n2 ) { j = jsw + length2 ; for ( i = isw ; i <= isw + length1 - 1 ; i++ ) { for ( k = ksw ; k <= ksw + length3 - 1 ; k++ ) { ijk = i + j*n1 + k*n1*n2 ; oldToNew[ijk] = now++ ; } } } if ( ksw + length3 < n3 ) { k = ksw + length3 ; for ( j = jsw ; j <= jsw + length2 - 1 ; j++ ) { for ( i = isw ; i <= isw + length1 - 1 ; i++ ) { ijk = i + j*n1 + k*n1*n2 ; oldToNew[ijk] = now++ ; } } } /* ----------- three edges ----------- */ if ( isw + length1 < n1 && jsw + length2 < n2 ) { i = isw + length1 ; j = jsw + length2 ; for ( k = ksw ; k <= ksw + length3 - 1 ; k++ ) { ijk = i + j*n1 + k*n1*n2 ; oldToNew[ijk] = now++ ; } } if ( isw + length1 < n1 && ksw + length3 < n3 ) { i = isw + length1 ; k = ksw + length3 ; for ( j = jsw ; j <= jsw + length2 - 1 ; j++ ) { ijk = i + j*n1 + k*n1*n2 ; oldToNew[ijk] = now++ ; } } if ( jsw + length2 < n2 && ksw + length3 < n3 ) { j = jsw + length2 ; k = ksw + length3 ; for ( i = isw ; i <= isw + length1 - 1 ; i++ ) { ijk = i + j*n1 + k*n1*n2 ; oldToNew[ijk] = now++ ; } } /* ---------------- one corner point ---------------- */ if ( isw + length1 < n1 && jsw + length2 < n2 && ksw + length3 < n3 ) { i = isw + length1 ; j = jsw + length2 ; k = ksw + length3 ; ijk = i + j*n1 + k*n1*n2 ; oldToNew[ijk] = now++ ; } } } } #if MYDEBUG > 2 fprintf(stdout, "\n after the upper separators filled in") ; fp2DGrid(n1, n2, oldToNew, stdout) ; #endif /* ------------------------------- fill in the top level separator ------------------------------- */ m1 = p3 / 2 ; for ( kdom = 0, k = 0 ; kdom < m1 ; kdom++ ) { k += length3s[kdom] + 1 ; } k-- ; for ( j = 0 ; j < n2 ; j++ ) { for ( i = 0 ; i < n1 ; i++ ) { ijk = i + j*n1 + k*n1*n2 ; oldToNew[ijk] = now++ ; } } /* ------------------------ free the working storage ------------------------ */ if ( dsizes1 == NULL ) { IVfree(length1s) ; } if ( dsizes2 == NULL ) { IVfree(length2s) ; } if ( dsizes3 == NULL ) { IVfree(length3s) ; } IVfree(isws) ; IVfree(jsws) ; IVfree(ksws) ; IVfree(temp) ; return ; }
/* ------------------------------------------------------- purpose -- to read in a Graph object from a CHACO file input -- fn -- filename return value -- 1 if success, 0 if failure created -- 98sep20, jjs -------------------------------------------------------- */ int Graph_readFromChacoFile ( Graph *graph, char *fn ) { char *rc ; FILE *fp; int nvtx, nedges, format; char string[BUFLEN], *s1, *s2; int k, v, vsize, w, vwghts, ewghts; int *adjncy, *weights, *vwghtsINT; IVL *adjIVL, *ewghtIVL; /* --------------- check the input --------------- */ if ((graph == NULL) || (fn == NULL)) { fprintf(stderr, "\n error in Graph_readFromFile(%p,%s)" "\n bad input\n", graph, fn); return(0); } /* --------------------- clear the data fields --------------------- */ Graph_clearData(graph); /* ---------------------------------------------- open file and read in nvtx, nedges, and format ---------------------------------------------- */ if ((fp = fopen(fn, "r")) == (FILE*)NULL) { fprintf(stderr, "\n error in Graph_readFromChacoFile(%p,%s)" "\n unable to open file %s", graph, fn, fn); return(0); } /* ------------- skip comments ------------- */ do { rc = fgets(string, BUFLEN, fp) ; if ( rc == NULL ) { fprintf(stderr, "\n error in Graph_readFromChacoFile()" "\n error skipping comments in file %s\n", fn) ; return(0) ; } } while ( string[0] == '%'); /* ------------------------------------------------- read in # vertices, # edges and (optional) format ------------------------------------------------- */ format = 0; if (sscanf(string, "%d %d %d", &nvtx, &nedges, &format) < 2) { fprintf(stderr, "\n error in Graph_readFromChacoFile(%p,%s)" "\n unable to read header of file %s", graph, fn, fn); return(0); } ewghts = ((format % 10) > 0); vwghts = (((format / 10) % 10) > 0); if (format >= 100) { fprintf(stderr, "\n error in Graph_readFromChacoFile(%p,%s)" "\n unknown format", graph, fn); return(0); } /* ------------------------------------------------------------------ initialize vector(s) to hold adjacency and (optional) edge weights ------------------------------------------------------------------ */ adjncy = IVinit(nvtx, -1) ; if ( ewghts ) { weights = IVinit(nvtx, -1) ; } else { weights = NULL ; } /* --------------------------- initialize the Graph object --------------------------- */ nedges *= 2; nedges += nvtx; Graph_init1(graph, 2*ewghts+vwghts, nvtx, 0, nedges, IVL_CHUNKED, IVL_CHUNKED); adjIVL = graph->adjIVL; if (ewghts) { ewghtIVL = graph->ewghtIVL; weights[0] = 0; /* self loops have no weight */ } if (vwghts) vwghtsINT = graph->vwghts; /* --------------------------- read in all adjacency lists --------------------------- */ k = 0; for (v = 0; v < nvtx; v++) { /* ------------- skip comments ------------- */ do { rc = fgets(string, BUFLEN, fp); if ( rc == NULL ) { fprintf(stderr, "\n error in Graph_readFromChacoFile()" "\n error reading adjacency for vertex %d in file %s\n", v, fn) ; IVfree(adjncy) ; if ( weights != NULL ) { IVfree(weights) ; } return(0) ; } } while ( string[0] == '%'); /* ------------------------- check for buffer overflow ------------------------- */ if (strlen(string) == BUFLEN-1) { fprintf(stderr, "\n error in Graph_readFromChacoFile(%p,%s)" "\n unable to read adjacency lists of file %s (line " "buffer too small)\n", graph, fn, fn); IVfree(adjncy) ; if ( weights != NULL ) { IVfree(weights) ; } return(0); } /* ---------------------------------------------- read in (optional) vertex weight, adjacent vertices, and (optional) edge weights ---------------------------------------------- */ s1 = string; if (vwghts) vwghtsINT[v] = (int)strtol(string, &s1, 10); adjncy[0] = v; /* insert self loop needed by spooles */ if ( ewghts ) { weights[0] = 0; } vsize = 1; while ((w = (int)strtol(s1, &s2, 10)) > 0) { adjncy[vsize] = --w; /* node numbering starts with 0 */ s1 = s2; if (ewghts) { weights[vsize] = (int)strtol(s1, &s2, 10); s1 = s2; } vsize++; } /* --------------------------------- sort the lists in ascending order --------------------------------- */ if ( ewghts ) { IV2qsortUp(vsize, adjncy, weights) ; } else { IVqsortUp(vsize, adjncy) ; } /* -------------------------------- set the lists in the IVL objects -------------------------------- */ IVL_setList(adjIVL, v, vsize, adjncy); if (ewghts) IVL_setList(ewghtIVL, v, vsize, weights); k += vsize; } /* ----------------------------------- close the file and do a final check ----------------------------------- */ fclose(fp); /* ------------------------ free the working storage ------------------------ */ IVfree(adjncy) ; if ( weights != NULL ) { IVfree(weights) ; } /* ---------------- check for errors ---------------- */ if ((k != nedges) || (v != nvtx)) { fprintf(stderr, "\n error in Graph_readFromChacoFile()" "\n number of nodes/edges does not match with header of %s" "\n k %d, nedges %d, v %d, nvtx %d\n", fn, k, nedges, v, nvtx); return(0); } return(1); }
/* ------------------------------------------------------------------- make an element graph for a n1 x n2 x n3 grid with ncomp components created -- 95nov03, cca ------------------------------------------------------------------- */ EGraph * EGraph_make27P ( int n1, int n2, int n3, int ncomp ) { EGraph *egraph ; int eid, icomp, ijk, ielem, jelem, kelem, m, nelem, nvtx ; int *list ; /* --------------- check the input --------------- */ if ( n1 <= 0 || n2 <= 0 || n3 <= 0 || ncomp <= 0 ) { fprintf(stderr, "\n fatal error in EGraph_make27P(%d,%d,%d,%d)" "\n bad input\n", n1, n2, n3, ncomp) ; exit(-1) ; } #if MYDEBUG > 0 fprintf(stdout, "\n inside EGraph_make27P(%d,%d,%d,%d)", n1, n2, n3, ncomp) ; fflush(stdout) ; #endif /* ----------------- create the object ----------------- */ nelem = (n1 - 1)*(n2 - 1)*(n3 - 1) ; nvtx = n1*n2*n3*ncomp ; egraph = EGraph_new() ; if ( ncomp == 1 ) { EGraph_init(egraph, 0, nelem, nvtx, IVL_CHUNKED) ; } else { EGraph_init(egraph, 1, nelem, nvtx, IVL_CHUNKED) ; IVfill(nvtx, egraph->vwghts, ncomp) ; } /* ---------------------------- fill the adjacency structure ---------------------------- */ list = IVinit(8*ncomp, -1) ; for ( kelem = 0 ; kelem < n3 - 1 ; kelem++ ) { for ( jelem = 0 ; jelem < n2 - 1 ; jelem++ ) { for ( ielem = 0 ; ielem < n1 - 1 ; ielem++ ) { eid = ielem + jelem*(n1-1) + kelem*(n1-1)*(n2-1); m = 0 ; ijk = ncomp*(ielem + jelem*n1 + kelem*n1*n2) ; for ( icomp = 0 ; icomp < ncomp ; icomp++ ) { list[m++] = ijk++ ; } ijk = ncomp*(ielem + 1 + jelem*n1 + kelem*n1*n2) ; for ( icomp = 0 ; icomp < ncomp ; icomp++ ) { list[m++] = ijk++ ; } ijk = ncomp*(ielem + (jelem+1)*n1 + kelem*n1*n2) ; for ( icomp = 0 ; icomp < ncomp ; icomp++ ) { list[m++] = ijk++ ; } ijk = ncomp*(ielem + 1 + (jelem+1)*n1 + kelem*n1*n2) ; for ( icomp = 0 ; icomp < ncomp ; icomp++ ) { list[m++] = ijk++ ; } ijk = ncomp*(ielem + jelem*n1 + (kelem+1)*n1*n2) ; for ( icomp = 0 ; icomp < ncomp ; icomp++ ) { list[m++] = ijk++ ; } ijk = ncomp*(ielem + 1 + jelem*n1 + (kelem+1)*n1*n2) ; for ( icomp = 0 ; icomp < ncomp ; icomp++ ) { list[m++] = ijk++ ; } ijk = ncomp*(ielem + (jelem+1)*n1 + (kelem+1)*n1*n2) ; for ( icomp = 0 ; icomp < ncomp ; icomp++ ) { list[m++] = ijk++ ; } ijk = ncomp*(ielem + 1 + (jelem+1)*n1 + (kelem+1)*n1*n2) ; for ( icomp = 0 ; icomp < ncomp ; icomp++ ) { list[m++] = ijk++ ; } IVqsortUp(m, list) ; IVL_setList(egraph->adjIVL, eid, m, list) ; } } } IVfree(list) ; return(egraph) ; }
/* ------------------------------------------------------------ create and return an ETree object that holds the front tree. created -- 96jun23, cca ------------------------------------------------------------ */ ETree * MSMD_frontETree ( MSMD *msmd ) { ETree *etree ; int front, iv, nfront, nvtx, root ; int *bndwghts, *fch, *nodwghts, *par, *sib, *vtxToFront ; MSMDvtx *v, *w ; /* --------------- check the input --------------- */ if ( msmd == NULL ) { fprintf(stderr, "\n fatal error in MSMD_frontETree(%p)" "\n bad input\n", msmd) ; exit(-1) ; } nvtx = msmd->nvtx ; /* -------------------------- count the number of fronts -------------------------- */ nfront = 0 ; fch = IVinit(nvtx, -1) ; sib = IVinit(nvtx, -1) ; root = -1 ; for ( iv = 0, v = msmd->vertices ; iv < nvtx ; iv++, v++ ) { #if MYDEBUG > 0 fprintf(stdout, "\n vertex %d, status %c, wght %d", v->id, v->status, v->wght) ; /* MSMDvtx_print(v, stdout) ; */ fflush(stdout) ; #endif switch ( v->status ) { case 'L' : case 'E' : if ( (w = v->par) != NULL ) { sib[v->id] = fch[w->id] ; fch[w->id] = v->id ; } else { sib[v->id] = root ; root = v->id ; } #if MYDEBUG > 0 fprintf(stdout, ", new front %d", nfront) ; fflush(stdout) ; #endif nfront++ ; break ; default : break ; } } #if MYDEBUG > 0 fprintf(stdout, "\n %d fronts", nfront) ; fflush(stdout) ; #endif /* --------------------------- initialize the ETree object --------------------------- */ etree = ETree_new() ; ETree_init1(etree, nfront, nvtx) ; nodwghts = IV_entries(etree->nodwghtsIV) ; bndwghts = IV_entries(etree->bndwghtsIV) ; vtxToFront = IV_entries(etree->vtxToFrontIV) ; /* ---------------------------------------------- fill the vtxToFront[] vector so representative vertices are mapped in a post-order traversal ---------------------------------------------- */ nfront = 0 ; iv = root ; while ( iv != -1 ) { while ( fch[iv] != -1 ) { iv = fch[iv] ; } v = msmd->vertices + iv ; vtxToFront[iv] = nfront++ ; #if MYDEBUG > 0 fprintf(stdout, "\n v = %d, vwght = %d, vtxToFront[%d] = %d", v->id, v->wght, iv, vtxToFront[iv]) ; fflush(stdout) ; #endif while ( sib[iv] == -1 && v->par != NULL ) { v = v->par ; iv = v->id ; vtxToFront[iv] = nfront++ ; #if MYDEBUG > 0 fprintf(stdout, "\n v = %d, vwght = %d, vtxToFront[%d] = %d", v->id, v->wght, iv, vtxToFront[iv]) ; fflush(stdout) ; #endif } iv = sib[iv] ; } IVfree(fch) ; IVfree(sib) ; /* -------------------------------------------------------------- fill in the vertex-to-front map for indistinguishable vertices -------------------------------------------------------------- */ for ( iv = 0, v = msmd->vertices ; iv < nvtx ; iv++, v++ ) { #if MYDEBUG > 0 fprintf(stdout, "\n v %d, wght = %d, status %c", v->id, v->wght, v->status) ; fflush(stdout) ; #endif switch ( v->status ) { case 'I' : #if MYDEBUG > 0 fprintf(stdout, "\n I : v %d", v->id) ; fflush(stdout) ; #endif w = v ; while ( w->par != NULL && w->status == 'I' ) { w = w->par ; #if MYDEBUG > 0 /* fprintf(stdout, " --> %d", w->id) ; */ fprintf(stdout, " %d", w->id) ; fflush(stdout) ; #endif } #if MYDEBUG > 0 fprintf(stdout, ", w %d, status %c", w->id, w->status) ; fflush(stdout) ; #endif switch ( w->status ) { case 'L' : case 'E' : vtxToFront[v->id] = vtxToFront[w->id] ; #if MYDEBUG > 0 fprintf(stdout, "\n I: vtxToFront[%d] = %d", iv, vtxToFront[iv]) ; fflush(stdout) ; #endif break ; default : #if MYDEBUG > 0 fprintf(stdout, "\n wow, v->rootpar = %d, status %c", w->id, w->status) ; fflush(stdout) ; #endif break ; } } } /* ------------------------------------------------------------ now fill in the parent Tree field, node and boundary weights ------------------------------------------------------------ */ par = etree->tree->par ; for ( iv = 0, v = msmd->vertices ; iv < nvtx ; iv++, v++ ) { #if MYDEBUG > 0 fprintf(stdout, "\n v %d, status %c", v->id, v->status) ; fflush(stdout) ; #endif switch ( v->status ) { case 'L' : case 'E' : front = vtxToFront[iv] ; #if MYDEBUG > 0 fprintf(stdout, ", front %d", front) ; fflush(stdout) ; #endif if ( (w = v->par) != NULL ) { par[vtxToFront[v->id]] = vtxToFront[w->id] ; #if MYDEBUG > 0 fprintf(stdout, ", par[%d] = %d", front, par[front]) ; fflush(stdout) ; #endif } bndwghts[front] = v->bndwght ; nodwghts[front] = v->wght ; break ; default : break ; } } /* ------------------------- set the other tree fields ------------------------- */ Tree_setFchSibRoot(etree->tree) ; return(etree) ; }
/* ------------------------------------------------------------- purpose -- after pivoting for a nonsymmetric factorization, some delayed columns may belong to a process other than its original owner. this method returns an IV object that maps columns to owning processes. created -- 98may22, cca ------------------------------------------------------------- */ IV * FrontMtx_MPI_colmapIV ( FrontMtx *frontmtx, IV *frontOwnersIV, int msglvl, FILE *msgFile, MPI_Comm comm ) { int buffersize, ii, iproc, J, myid, nDJ, neqns, nfront, nproc, ncolJ, nToSend, v ; int *buffer, *counts, *frontOwners, *inbuffer, *outbuffer, *colindJ, *colmap, *vtxToFront ; IV *colmapIV ; /* ------------------------------------------- get the process id and number of processors ------------------------------------------- */ MPI_Comm_rank(comm, &myid) ; MPI_Comm_size(comm, &nproc) ; neqns = frontmtx->neqns ; vtxToFront = ETree_vtxToFront(frontmtx->frontETree) ; IV_sizeAndEntries(frontOwnersIV, &nfront, &frontOwners) ; if ( msglvl > 2 ) { fprintf(msgFile, "\n\n inside FrontMtx_MPI_colmapIV()" "\n myid = %d, nproc = %d, nfront = %d, neqns = %d", myid, nproc, nfront, neqns) ; fflush(msgFile) ; } /* ---------------------------------------------------------- loop through the owned fronts and store each column in an owned front that was originally owned by another processor ---------------------------------------------------------- */ outbuffer = IVinit(neqns, -1) ; for ( J = nToSend = 0 ; J < nfront ; J++ ) { if ( frontOwners[J] == myid && (nDJ = FrontMtx_frontSize(frontmtx, J)) > 0 ) { FrontMtx_columnIndices(frontmtx, J, &ncolJ, &colindJ) ; if ( msglvl > 2 ) { fprintf(msgFile, "\n front %d owned, nDJ = %d, ncolJ = %d", J, nDJ, ncolJ) ; fflush(msgFile) ; } for ( ii = 0 ; ii < nDJ ; ii++ ) { v = colindJ[ii] ; if ( frontOwners[vtxToFront[v]] != myid ) { if ( msglvl > 2 ) { fprintf(msgFile, "\n column %d originally owned by %d", v, frontOwners[vtxToFront[v]]) ; fflush(msgFile) ; } outbuffer[nToSend++] = v ; } } } } IVqsortUp(nToSend, outbuffer) ; if ( msglvl > 2 ) { fprintf(msgFile, "\n shifted vertices") ; IVfprintf(msgFile, nToSend, outbuffer) ; fflush(msgFile) ; } counts = IVinit(nproc, 0) ; /* -------------------------------------------- use an all-gather call to get the number of moved columns that are owned by each process -------------------------------------------- */ MPI_Allgather((void *) &nToSend, 1, MPI_INT, counts, 1, MPI_INT, comm) ; if ( msglvl > 2 ) { fprintf(msgFile, "\n after the all-gather operation, counts") ; IVfprintf(msgFile, nproc, counts) ; fflush(msgFile) ; } buffersize = IVmax(nproc, counts, &iproc) ; inbuffer = IVinit(buffersize, -1) ; /* ----------------------------------- initialize the column map IV object ----------------------------------- */ colmapIV = IV_new() ; IV_init(colmapIV, neqns, NULL) ; colmap = IV_entries(colmapIV) ; IVgather(neqns, colmap, frontOwners, vtxToFront) ; /* -------------------------------------------------------------- loop over the other processes, receive vector of moved columns -------------------------------------------------------------- */ for ( iproc = 0 ; iproc < nproc ; iproc++ ) { if ( counts[iproc] > 0 ) { if ( iproc == myid ) { /* ------------------------------------- send buffer vector to other processes ------------------------------------- */ if ( msglvl > 2 ) { fprintf(msgFile, "\n sending outbuffer to all processes") ; IVfprintf(msgFile, nToSend, outbuffer) ; fflush(msgFile) ; } MPI_Bcast(outbuffer, nToSend, MPI_INT, iproc, comm) ; buffer = outbuffer ; } else { /* ----------------------------------------- receive the vector from the other process ----------------------------------------- */ MPI_Bcast(inbuffer, counts[iproc], MPI_INT, iproc, comm) ; if ( msglvl > 2 ) { fprintf(msgFile, "\n received inbuffer from process %d", iproc) ; IVfprintf(msgFile, counts[iproc], inbuffer) ; fflush(msgFile) ; } buffer = inbuffer ; } /* ------------------------- set the column map values ------------------------- */ for ( ii = 0 ; ii < counts[iproc] ; ii++ ) { v = buffer[ii] ; colmap[v] = iproc ; } } } /* ------------------------ free the working storage ------------------------ */ IVfree(inbuffer) ; IVfree(outbuffer) ; IVfree(counts) ; return(colmapIV) ; }
/* ---------------------------------------------------------------- purpose -- to create and return an IVL object that holds the submatrix nonzero pattern for the lower triangular factor. NOTE: this method supercedes calling IVL_mapEntries() on the row adjacency structure. that gave problems when pivoting forced some fronts to have no eliminated columns. in some cases, solve aggregates were expected when none were forthcoming. created -- 98aug20, cca ---------------------------------------------------------------- */ IVL * FrontMtx_makeLowerBlockIVL ( FrontMtx *frontmtx, IV *rowmapIV ) { int count, ii, J, K, nrow, nfront, nJ ; int *rowmap, *rowind, *list, *mark ; IVL *lowerblockIVL ; /* --------------- check the input --------------- */ if ( frontmtx == NULL || rowmapIV == NULL ) { fprintf(stderr, "\n fatal error in FrontMtx_makeLowerBlockIVL()" "\n bad input\n") ; exit(-1) ; } nfront = FrontMtx_nfront(frontmtx) ; rowmap = IV_entries(rowmapIV) ; /* ----------------------------- set up the working storage and initialize the IVL object ----------------------------- */ mark = IVinit(nfront, -1) ; list = IVinit(nfront, -1) ; lowerblockIVL = IVL_new() ; IVL_init1(lowerblockIVL, IVL_CHUNKED, nfront) ; /* ------------------- fill the IVL object ------------------- */ for ( J = 0 ; J < nfront ; J++ ) { if ( (nJ = FrontMtx_frontSize(frontmtx, J)) > 0 ) { FrontMtx_rowIndices(frontmtx, J, &nrow, &rowind) ; if ( nrow > 0 ) { mark[J] = J ; count = 0 ; list[count++] = J ; for ( ii = nJ ; ii < nrow ; ii++ ) { K = rowmap[rowind[ii]] ; if ( mark[K] != J ) { mark[K] = J ; list[count++] = K ; } } IVL_setList(lowerblockIVL, J, count, list) ; } } } /* ------------------------ free the working storage ------------------------ */ IVfree(mark) ; IVfree(list) ; return(lowerblockIVL) ; }
/* ------------------------------------------------------ purpose -- to read an IVL object from a formatted file return value -- 1 if success, 0 if failure created -- 95sep29, cca ------------------------------------------------------ */ int IVL_readFromFormattedFile ( IVL *ivl, FILE *fp ) { int nlist, rc, type ; int itemp[3] ; int *sizes ; /* --------------- check the input --------------- */ if ( ivl == NULL || fp == NULL ) { fprintf(stderr, "\n error in IVL_readFromFormattedFile(%p,%p)" "\n bad input\n", ivl, fp) ; return(0) ; } switch ( ivl->type ) { case IVL_CHUNKED : case IVL_SOLO : break ; default : fprintf(stderr, "\n error in IVL_readFormattedFile(%p,%p)" "\n bad type = %d", ivl, fp, ivl->type) ; return(0) ; } /* ------------------------------------------- save the ivl type and clear the data fields ------------------------------------------- */ type = ivl->type ; IVL_clearData(ivl) ; /* ----------------------------------- read in the three scalar parameters type, # of lists, # of indices ----------------------------------- */ if ( (rc = IVfscanf(fp, 3, itemp)) != 3 ) { fprintf(stderr, "\n error in IVL_readFromFormattedFile(%p,%p)" "\n %d items of %d read\n", ivl, fp, rc, 3) ; return(0) ; } nlist = itemp[1] ; /* fprintf(stdout, "\n itemp = { %d %d %d } ", itemp[0], itemp[1], itemp[2]) ; */ /* -------------------------- read in the sizes[] vector -------------------------- */ sizes = IVinit(nlist, 0) ; if ( (rc = IVfscanf(fp, nlist, sizes)) != nlist ) { fprintf(stderr, "\n error in IVL_readFromFormattedFile(%p,%p)" "\n %d items of %d read\n", ivl, fp, rc, nlist) ; return(0) ; } /* --------------------- initialize the object --------------------- */ IVL_init3(ivl, type, nlist, sizes) ; IVfree(sizes) ; /* ----------------------- now read in the indices ----------------------- */ switch ( type ) { case IVL_SOLO : { int ilist, size ; int *ind ; for ( ilist = 0 ; ilist < nlist ; ilist++ ) { IVL_listAndSize(ivl, ilist, &size, &ind) ; if ( size > 0 ) { if ( (rc = IVfscanf(fp, size, ind)) != size ) { fprintf(stderr, "\n error in IVL_readFromFormattedFile(%p,%p)" "\n list %d, %d items of %d read\n", ivl, fp, ilist, rc, size) ; return(0) ; } } } } break ; case IVL_CHUNKED : { /* -------------------------------------------------------- read in the indices into the contiguous block of storage -------------------------------------------------------- */ if ( (rc = IVfscanf(fp, ivl->tsize, ivl->chunk->base)) != ivl->tsize ) { fprintf(stderr, "\n error in IVL_readFromFormattedFile(%p,%p)" "\n %d items of %d read\n", ivl, fp, rc, ivl->tsize) ; return(0) ; } } break ; } return(1) ; }
/* ---------------------------------------------------------------- purpose -- if the elimination has halted before all the stages have been eliminated, then create the schur complement graph and the map from the original vertices those in the schur complement graph. schurGraph -- Graph object to contain the schur complement graph VtoPhi -- IV object to contain the map from vertices in V to schur complement vertices in Phi created -- 97feb01, cca ---------------------------------------------------------------- */ void MSMD_makeSchurComplement ( MSMD *msmd, Graph *schurGraph, IV *VtoPhiIV ) { int nedge, nPhi, nvtx, totewght, totvwght ; int *mark, *rep, *VtoPhi, *vwghts ; int count, *list ; int ierr, ii, size, *adj ; int phi, psi, tag ; IP *ip ; IVL *adjIVL ; MSMDvtx *u, *v, *vertices, *vfirst, *vlast, *w ; /* --------------- check the input --------------- */ if ( msmd == NULL || schurGraph == NULL || VtoPhiIV == NULL ) { fprintf(stderr, "\n\n fatal error in MSMD_makeSchurComplement(%p,%p,%p)" "\n bad input\n", msmd, schurGraph, VtoPhiIV) ; exit(-1) ; } vertices = msmd->vertices ; nvtx = msmd->nvtx ; /* ------------------------------------- initialize the V-to-Phi map IV object ------------------------------------- */ IV_clearData(VtoPhiIV) ; IV_setSize(VtoPhiIV, nvtx) ; IV_fill(VtoPhiIV, -2) ; VtoPhi = IV_entries(VtoPhiIV) ; /* --------------------------------------------- count the number of Schur complement vertices --------------------------------------------- */ vfirst = vertices ; vlast = vfirst + nvtx - 1 ; nPhi = 0 ; for ( v = vfirst ; v <= vlast ; v++ ) { #if MYDEBUG > 0 fprintf(stdout, "\n v->id = %d, v->status = %c", v->id, v->status) ; fflush(stdout) ; #endif switch ( v->status ) { case 'L' : case 'E' : case 'I' : break ; case 'B' : VtoPhi[v->id] = nPhi++ ; #if MYDEBUG > 0 fprintf(stdout, ", VtoPhi[%d] = %d", v->id, VtoPhi[v->id]) ; fflush(stdout) ; #endif break ; default : break ; } } #if MYDEBUG > 0 fprintf(stdout, "\n\n nPhi = %d", nPhi) ; fflush(stdout) ; #endif /* ---------------------------------------------------- get the representative vertex id for each Phi vertex ---------------------------------------------------- */ rep = IVinit(nPhi, -1) ; for ( v = vfirst ; v <= vlast ; v++ ) { if ( (phi = VtoPhi[v->id]) >= 0 ) { #if MYDEBUG > 0 fprintf(stdout, "\n rep[%d] = %d", phi, v->id) ; fflush(stdout) ; #endif rep[phi] = v->id ; } } /* ------------------------------------------ set the map for indistinguishable vertices ------------------------------------------ */ for ( v = vfirst ; v <= vlast ; v++ ) { if ( v->status == 'I' ) { w = v ; while ( w->status == 'I' ) { w = w->par ; } #if MYDEBUG > 0 fprintf(stdout, "\n v = %d, status = %c, w = %d, status = %c", v->id, v->status, w->id, w->status) ; fflush(stdout) ; #endif VtoPhi[v->id] = VtoPhi[w->id] ; } } #if MYDEBUG > 0 fprintf(stdout, "\n\n VtoPhi") ; IV_writeForHumanEye(VtoPhiIV, stdout) ; fflush(stdout) ; #endif /* --------------------------- initialize the Graph object --------------------------- */ Graph_clearData(schurGraph) ; Graph_init1(schurGraph, 1, nPhi, 0, 0, IVL_CHUNKED, IVL_CHUNKED) ; adjIVL = schurGraph->adjIVL ; vwghts = schurGraph->vwghts ; #if MYDEBUG > 0 fprintf(stdout, "\n\n schurGraph initialized, nvtx = %d", schurGraph->nvtx) ; fflush(stdout) ; #endif /* ------------------------------- fill the vertex adjacency lists ------------------------------- */ mark = IVinit(nPhi, -1) ; list = IVinit(nPhi, -1) ; nedge = totvwght = totewght = 0 ; for ( phi = 0 ; phi < nPhi ; phi++ ) { /* ----------------------------- get the representative vertex ----------------------------- */ v = vfirst + rep[phi] ; #if MYDEBUG > 0 fprintf(stdout, "\n phi = %d, v = %d", phi, v->id) ; fflush(stdout) ; MSMDvtx_print(v, stdout) ; fflush(stdout) ; #endif count = 0 ; tag = v->id ; /* --------------------------- load self in adjacency list --------------------------- */ mark[phi] = tag ; totewght += v->wght * v->wght ; #if MYDEBUG > 0 fprintf(stdout, "\n mark[%d] = %d", phi, mark[phi]) ; fflush(stdout) ; #endif list[count++] = phi ; /* ---------------------------------------- load boundary lists of adjacent subtrees ---------------------------------------- */ for ( ip = v->subtrees ; ip != NULL ; ip = ip->next ) { u = vertices + ip->val ; size = u->nadj ; adj = u->adj ; #if MYDEBUG > 0 fprintf(stdout, "\n subtree %d :", u->id) ; IVfp80(stdout, size, adj, 15, &ierr) ; fflush(stdout) ; #endif for ( ii = 0 ; ii < size ; ii++ ) { w = vertices + adj[ii] ; #if MYDEBUG > 0 fprintf(stdout, "\n w %d, status %c, psi %d", w->id, w->status, VtoPhi[w->id]) ; fflush(stdout) ; #endif if ( (psi = VtoPhi[w->id]) != -2 && mark[psi] != tag ) { mark[psi] = tag ; #if MYDEBUG > 0 fprintf(stdout, ", mark[%d] = %d", psi, mark[psi]) ; fflush(stdout) ; #endif list[count++] = psi ; totewght += v->wght * w->wght ; } } } /* ---------------------- load adjacent vertices ---------------------- */ size = v->nadj ; adj = v->adj ; for ( ii = 0 ; ii < size ; ii++ ) { w = vertices + adj[ii] ; if ( (psi = VtoPhi[w->id]) != -2 && mark[psi] != tag ) { mark[psi] = tag ; list[count++] = psi ; totewght += v->wght * w->wght ; } } /* --------------------------------------------- sort the list and inform adjacency IVL object --------------------------------------------- */ IVqsortUp(count, list) ; IVL_setList(adjIVL, phi, count, list) ; /* -------------------------------------- set the vertex weight and increment the total vertex weight and edge count -------------------------------------- */ vwghts[phi] = v->wght ; totvwght += v->wght ; nedge += count ; } schurGraph->totvwght = totvwght ; schurGraph->nedges = nedge ; schurGraph->totewght = totewght ; /* ------------------------ free the working storage ------------------------ */ IVfree(list) ; IVfree(mark) ; IVfree(rep) ; return ; }
/* --------------------------------------------------------------------- purpose -- take a Graph object and a map to expand it, create and return a bigger unit weight Graph object. this is useful for expanding a compressed graph into a unit weight graph. created -- 96mar02, cca --------------------------------------------------------------------- */ Graph * Graph_expand ( Graph *g, int nvtxbig, int map[] ) { Graph *gbig ; int count, ii, nedge, nvtx, v, vbig, vsize, w ; int *head, *indices, *link, *mark, *vadj ; IVL *adjIVL, *adjbigIVL ; /* --------------- check the input --------------- */ if ( g == NULL || nvtxbig <= 0 || map == NULL ) { fprintf(stderr, "\n fatal error in Graph_expand(%p,%d,%p)" "\n bad input\n", g, nvtxbig, map) ; spoolesFatal(); } nvtx = g->nvtx ; adjIVL = g->adjIVL ; /* ---------------------------------------- set up the linked lists for the vertices ---------------------------------------- */ head = IVinit(nvtx, -1) ; link = IVinit(nvtxbig, -1) ; for ( vbig = 0 ; vbig < nvtxbig ; vbig++ ) { v = map[vbig] ; link[vbig] = head[v] ; head[v] = vbig ; } /* -------------------------------- create the expanded Graph object -------------------------------- */ gbig = Graph_new() ; Graph_init1(gbig, 0, nvtxbig, 0, 0, IVL_CHUNKED, IVL_CHUNKED) ; adjbigIVL = gbig->adjIVL ; /* ------------------------------------------- fill the lists in the expanded Graph object ------------------------------------------- */ indices = IVinit(nvtxbig, -1) ; mark = IVinit(nvtx, -1) ; nedge = 0 ; for ( v = 0 ; v < nvtx ; v++ ) { if ( head[v] != -1 ) { /* ------------------------------ load the indices that map to v ------------------------------ */ mark[v] = v ; count = 0 ; for ( vbig = head[v] ; vbig != -1 ; vbig = link[vbig] ) { indices[count++] = vbig ; } /* --------------------------------------------------- load the indices that map to vertices adjacent to v --------------------------------------------------- */ IVL_listAndSize(adjIVL, v, &vsize, &vadj) ; for ( ii = 0 ; ii < vsize ; ii++ ) { w = vadj[ii] ; if ( w < nvtx && mark[w] != v ) { mark[w] = v ; for ( vbig = head[w] ; vbig != -1 ; vbig = link[vbig] ) { indices[count++] = vbig ; } } } /* -------------------------------------- sort the index list in ascending order -------------------------------------- */ IVqsortUp(count, indices) ; /* ------------------------------------------------------- each vertex in the big IVL object has its own list. ------------------------------------------------------- */ for ( vbig = head[v] ; vbig != -1 ; vbig = link[vbig] ) { IVL_setList(adjbigIVL, vbig, count, indices) ; nedge += count ; } } } gbig->nedges = nedge ; /* ------------------------ free the working storage ------------------------ */ IVfree(head) ; IVfree(link) ; IVfree(indices) ; IVfree(mark) ; return(gbig) ; }