/* ----------------------- set the default fields return value --- 1 -- normal return -1 -- bridge is NULL created -- 98sep18, cca ----------------------- */ int BridgeMT_setDefaultFields ( BridgeMT *bridge ) { if ( bridge == NULL ) { fprintf(stderr, "\n fatal error in BridgeMT_setDefaultFields(%p)" "\n bad input\n", bridge) ; return(-1) ; } /* ---------------- graph statistics ---------------- */ bridge->neqns = 0 ; bridge->nedges = 0 ; bridge->Neqns = 0 ; bridge->Nedges = 0 ; /* ------------------- ordering parameters ------------------- */ bridge->compressCutoff = 0.0 ; bridge->maxdomainsize = -1 ; bridge->maxnzeros = -1 ; bridge->maxsize = -1 ; bridge->seed = -1 ; /* ------------------------------- matrix/factorization parameters ------------------------------- */ bridge->type = SPOOLES_REAL ; bridge->symmetryflag = SPOOLES_SYMMETRIC ; bridge->sparsityflag = FRONTMTX_DENSE_FRONTS ; bridge->pivotingflag = SPOOLES_NO_PIVOTING ; bridge->tau = 100.0 ; bridge->droptol = 1.e-3 ; bridge->lookahead = 0 ; bridge->patchinfo = NULL ; /* ------------------------ multithreaded parameters ------------------------ */ bridge->nthread = 0 ; bridge->ownersIV = NULL ; bridge->solvemap = NULL ; bridge->cumopsDV = NULL ; /* ------------------------------------ message info, statistics and timings ------------------------------------ */ IVzero(6, bridge->stats) ; DVzero(16, bridge->cpus) ; bridge->msglvl = 0 ; bridge->msgFile = stdout ; /* ------------------- pointers to objects ------------------- */ bridge->frontETree = NULL ; bridge->symbfacIVL = NULL ; bridge->mtxmanager = NULL ; bridge->frontmtx = NULL ; bridge->oldToNewIV = NULL ; bridge->newToOldIV = NULL ; return(1) ; }
/*--------------------------------------------------------------------*/ int main ( int argc, char *argv[] ) /* ----------------------------------------------------- test the factor method for a grid matrix (0) read in matrix from source file (1) conver data matrix to InpMtx object if necessary (2) create Graph and ETree object if necessary (3) read in/create an ETree object (4) create a solution matrix object (5) multiply the solution with the matrix to get a right hand side matrix object (6) factor the matrix (7) solve the system created -- 98dec30, jwu ----------------------------------------------------- */ { char etreeFileName[80], mtxFileName[80], *cpt, rhsFileName[80], srcFileName[80], ctemp[81], msgFileName[80], slnFileName[80] ; Chv *chv, *rootchv ; ChvManager *chvmanager ; DenseMtx *mtxB, *mtxQ, *mtxX, *mtxZ ; double one[2] = { 1.0, 0.0 } ; FrontMtx *frontmtx ; InpMtx *mtxA ; SubMtxManager *mtxmanager ; double cputotal, droptol, conv_tol, factorops ; double cpus[9] ; Drand drand ; double nops, tau, t1, t2 ; ETree *frontETree ; Graph *graph ; FILE *msgFile, *inFile ; int error, loc, msglvl, neqns, nzf, iformat, pivotingflag, rc, seed, sparsityflag, symmetryflag, method[METHODS], type, nrhs, etreeflag ; int stats[6] ; int nnzA, Ik, itermax, zversion, iterout ; IV *newToOldIV, *oldToNewIV ; IVL *symbfacIVL ; int i, j, k, m, n, imethod, maxdomainsize, maxzeros, maxsize; int nouter,ninner ; if ( argc != 2 ) { fprintf(stdout, "\n\n usage : %s inFile" "\n inFile -- input filename" "\n", argv[0]) ; return(-1) ; } /* read input file */ inFile = fopen(argv[1], "r"); if (inFile == (FILE *)NULL) { fprintf(stderr, "\n fatal error in %s: unable to open file %s\n", argv[0], argv[1]) ; return(-1) ; } for (i=0; i<METHODS; i++) method[i]=-1; imethod=0; k=0; while (1) { fgets(ctemp, 80, inFile); if (ctemp[0] != '*') { /*printf("l=%2d:%s\n", strlen(ctemp),ctemp);*/ if (strlen(ctemp)==80) { fprintf(stderr, "\n fatal error in %s: input line contains more than " "80 characters.\n",argv[0]); exit(-1); } if (k==0) { sscanf(ctemp, "%d", &iformat); if (iformat < 0 || iformat > 2) { fprintf(stderr, "\n fatal error in %s: " "invalid source matrix format\n",argv[0]) ; return(-1) ; } } else if (k==1) sscanf(ctemp, "%s", srcFileName); else if (k==2) sscanf(ctemp, "%s", mtxFileName); else if (k==3) { sscanf(ctemp, "%d", &etreeflag); if (etreeflag < 0 || etreeflag > 4) { fprintf(stderr, "\n fatal error in %s: " "invalid etree file status\n",argv[0]) ; return(-1) ; } } else if (k==4) sscanf(ctemp, "%s", etreeFileName); else if (k==5) sscanf(ctemp, "%s", rhsFileName); else if (k==6) sscanf(ctemp, "%s", slnFileName); else if (k==7){ sscanf(ctemp, "%s", msgFileName); if ( strcmp(msgFileName, "stdout") == 0 ) { msgFile = stdout ; } else if ( (msgFile = fopen(msgFileName, "a")) == NULL ) { fprintf(stderr, "\n fatal error in %s" "\n unable to open file %s\n", argv[0], ctemp) ; return(-1) ; } } else if (k==8) sscanf(ctemp, "%d %d %d %d %d %d", &msglvl, &seed, &nrhs, &Ik, &itermax, &iterout); else if (k==9) sscanf(ctemp, "%d %d %d", &symmetryflag, &sparsityflag, &pivotingflag); else if (k==10) sscanf(ctemp, "%lf %lf %lf", &tau, &droptol, &conv_tol); else if (k==11) { /* for (j=0; j<strlen(ctemp); j++) { printf("j=%2d:%s",j,ctemp+j); if (ctemp[j] == ' ' && ctemp[j+1] != ' ') { sscanf(ctemp+j, "%d", method+imethod); printf("method[%d]=%d\n",imethod,method[imethod]); if (method[imethod] < 0) break; imethod++; } } */ imethod = sscanf(ctemp,"%d %d %d %d %d %d %d %d %d %d", method, method+1, method+2, method+3, method+4, method+5, method+6, method+7, method+8, method+9); /*printf("imethod=%d\n",imethod);*/ for (j=0; j<imethod; j++) { /*printf("method[%d]=%d\n",j,method[j]);*/ if (method[j]<0) { imethod=j; break; } } if (imethod == 0) { fprintf(msgFile,"No method assigned in input file\n"); return(-1); } } k++; } if (k==12) break; } fclose(inFile); /* reset nrhs to 1 */ if (nrhs > 1) { fprintf(msgFile,"*** Multiple right-hand-side vectors is not allowed yet.\n"); fprintf(msgFile,"*** nrhs is reset to 1.\n"); nrhs =1; } fprintf(msgFile, "\n %s " "\n srcFileName -- %s" "\n mtxFileName -- %s" "\n etreeFileName -- %s" "\n rhsFileName -- %s" "\n msglvl -- %d" "\n seed -- %d" "\n symmetryflag -- %d" "\n sparsityflag -- %d" "\n pivotingflag -- %d" "\n tau -- %e" "\n droptol -- %e" "\n conv_tol -- %e" "\n method -- ", argv[0], srcFileName, mtxFileName, etreeFileName, rhsFileName, msglvl, seed, symmetryflag, sparsityflag, pivotingflag, tau, droptol, conv_tol) ; for (k=0; k<imethod; k++) fprintf(msgFile, "%d ", method[k]); fprintf(msgFile, "\n ", method[k]); fflush(msgFile) ; /* -------------------------------------- initialize the random number generator -------------------------------------- */ Drand_setDefaultFields(&drand) ; Drand_init(&drand) ; Drand_setSeed(&drand, seed) ; /*Drand_setUniform(&drand, 0.0, 1.0) ;*/ Drand_setNormal(&drand, 0.0, 1.0) ; /* ---------------------------------------------- read in or convert source to the InpMtx object ---------------------------------------------- */ rc = 1; if ( strcmp(srcFileName, "none") == 0 ) { fprintf(msgFile, "\n no file to read from") ; exit(-1) ; } mtxA = InpMtx_new() ; MARKTIME(t1) ; if (iformat == 0) { /* InpMtx source format */ rc = InpMtx_readFromFile(mtxA, srcFileName) ; strcpy(mtxFileName, srcFileName); if ( rc != 1 ) fprintf(msgFile, "\n return value %d from InpMtx_readFromFile(%p,%s)", rc, mtxA, srcFileName) ; } else if (iformat == 1) { /* HBF source format */ rc = InpMtx_readFromHBfile(mtxA, srcFileName) ; if ( rc != 1 ) fprintf(msgFile, "\n return value %d from InpMtx_readFromHBfile(%p,%s)", rc, mtxA, srcFileName) ; } else { /* AIJ2 source format */ rc = InpMtx_readFromAIJ2file(mtxA, srcFileName) ; if ( rc != 1 ) fprintf(msgFile, "\n return value %d from InpMtx_readFromAIJ2file(%p,%s)", rc, mtxA, srcFileName) ; } MARKTIME(t2) ; if (iformat>0 && strcmp(mtxFileName, "none") != 0 ) { rc = InpMtx_writeToFile(mtxA, mtxFileName) ; if ( rc != 1 ) fprintf(msgFile, "\n return value %d from InpMtx_writeToFile(%p,%s)", rc, mtxA, mtxFileName) ; } fprintf(msgFile, "\n CPU %8.3f : read in (+ convert to) mtxA from file %s", t2 - t1, mtxFileName) ; if (rc != 1) { goto end_read; } type = mtxA->inputMode ; neqns = 1 + IVmax(mtxA->nent, InpMtx_ivec1(mtxA), &loc) ; if ( INPMTX_IS_BY_ROWS(mtxA) ) { fprintf(msgFile, "\n matrix coordinate type is rows") ; } else if ( INPMTX_IS_BY_COLUMNS(mtxA) ) { fprintf(msgFile, "\n matrix coordinate type is columns") ; } else if ( INPMTX_IS_BY_CHEVRONS(mtxA) ) { fprintf(msgFile, "\n matrix coordinate type is chevrons") ; } else { fprintf(msgFile, "\n\n, error, bad coordinate type") ; rc=-1; goto end_read; } if ( INPMTX_IS_RAW_DATA(mtxA) ) { fprintf(msgFile, "\n matrix storage mode is raw data\n") ; } else if ( INPMTX_IS_SORTED(mtxA) ) { fprintf(msgFile, "\n matrix storage mode is sorted\n") ; } else if ( INPMTX_IS_BY_VECTORS(mtxA) ) { fprintf(msgFile, "\n matrix storage mode is by vectors\n") ; } else { fprintf(msgFile, "\n\n, error, bad storage mode") ; rc=-1; goto end_read; } if ( msglvl > 1 ) { fprintf(msgFile, "\n\n after reading InpMtx object from file %s", mtxFileName) ; if ( msglvl == 2 ) { InpMtx_writeStats(mtxA, msgFile) ; } else { InpMtx_writeForHumanEye(mtxA, msgFile) ; } fflush(msgFile) ; } /* Get the nonzeros in matrix A and print it */ nnzA = InpMtx_nent( mtxA ); fprintf(msgFile, "\n\n Input matrix size %d NNZ %d", neqns, nnzA) ; /* -------------------------------------------------------- generate the linear system 1. generate solution matrix and fill with random numbers 2. generate rhs matrix and fill with zeros 3. compute matrix-matrix multiply -------------------------------------------------------- */ MARKTIME(t1) ; mtxX = DenseMtx_new() ; DenseMtx_init(mtxX, type, 0, -1, neqns, nrhs, 1, neqns) ; mtxB = DenseMtx_new() ; if (strcmp(rhsFileName, "none")) { rc = DenseMtx_readFromFile(mtxB, rhsFileName) ; if ( rc != 1 ) fprintf(msgFile, "\n return value %d from DenseMtx_readFromFile(%p,%s)", rc, mtxB, rhsFileName) ; DenseMtx_zero(mtxX) ; } else { DenseMtx_init(mtxB, type, 1, -1, neqns, nrhs, 1, neqns) ; DenseMtx_fillRandomEntries(mtxX, &drand) ; DenseMtx_zero(mtxB) ; switch ( symmetryflag ) { case SPOOLES_SYMMETRIC : InpMtx_sym_mmm(mtxA, mtxB, one, mtxX) ; break ; case SPOOLES_HERMITIAN : InpMtx_herm_mmm(mtxA, mtxB, one, mtxX) ; break ; case SPOOLES_NONSYMMETRIC : InpMtx_nonsym_mmm(mtxA, mtxB, one, mtxX) ; break ; default : break ; } } MARKTIME(t2) ; fprintf(msgFile, "\n CPU %8.3f : set up the solution and rhs ", t2 - t1) ; if ( msglvl > 2 ) { fprintf(msgFile, "\n\n original mtxX") ; DenseMtx_writeForHumanEye(mtxX, msgFile) ; fprintf(msgFile, "\n\n original mtxB") ; DenseMtx_writeForHumanEye(mtxB, msgFile) ; fflush(msgFile) ; } if (rc != 1) { InpMtx_free(mtxA); DenseMtx_free(mtxX); DenseMtx_free(mtxB); goto end_init; } /* ------------------------ read in/create the ETree object ------------------------ */ MARKTIME(t1) ; if (etreeflag == 0) { /* read in ETree from file */ if ( strcmp(etreeFileName, "none") == 0 ) fprintf(msgFile, "\n no file to read from") ; frontETree = ETree_new() ; rc = ETree_readFromFile(frontETree, etreeFileName) ; if (rc!=1) fprintf(msgFile, "\n return value %d from ETree_readFromFile(%p,%s)", rc, frontETree, etreeFileName) ; } else { graph = Graph_new() ; rc = InpMtx_createGraph(mtxA, graph); if (rc!=1) { fprintf(msgFile, "\n return value %d from InpMtx_createGraph(%p,%p)", rc, mtxA, graph) ; Graph_free(graph); goto end_tree; } if (etreeflag == 1) { /* Via BestOfNDandMS */ maxdomainsize = 500; maxzeros = 1000; maxsize = 64 ; frontETree = orderViaBestOfNDandMS(graph, maxdomainsize, maxzeros, maxsize, seed, msglvl, msgFile) ; } else if (etreeflag == 2) { /* Via MMD */ frontETree = orderViaMMD(graph, seed, msglvl, msgFile) ; } else if (etreeflag == 3) { /* Via MS */ maxdomainsize = 500; frontETree = orderViaMS(graph, maxdomainsize, seed, msglvl, msgFile) ; } else if (etreeflag == 4) { /* Via ND */ maxdomainsize = 500; frontETree = orderViaND(graph, maxdomainsize, seed, msglvl, msgFile) ; } Graph_free(graph); /* optionally write out the ETree object */ if ( strcmp(etreeFileName, "none") != 0 ) { fprintf(msgFile, "\n\n writing out ETree to file %s", etreeFileName) ; ETree_writeToFile(frontETree, etreeFileName) ; } } MARKTIME(t2) ; fprintf(msgFile, "\n CPU %8.3f : read in/create frontETree from file %s", t2 - t1, etreeFileName) ; if ( rc != 1 ) { ETree_free(frontETree); goto end_tree; } ETree_leftJustify(frontETree) ; if ( msglvl > 1 ) { fprintf(msgFile, "\n\n after reading ETree object from file %s", etreeFileName) ; if ( msglvl == 2 ) { ETree_writeStats(frontETree, msgFile) ; } else { ETree_writeForHumanEye(frontETree, msgFile) ; } } fflush(msgFile) ; /* -------------------------------------------------- get the permutations, permute the matrix and the front tree, and compute the symbolic factorization -------------------------------------------------- */ MARKTIME(t1) ; oldToNewIV = ETree_oldToNewVtxPerm(frontETree) ; newToOldIV = ETree_newToOldVtxPerm(frontETree) ; MARKTIME(t2) ; fprintf(msgFile, "\n CPU %8.3f : get permutations", t2 - t1) ; MARKTIME(t1) ; ETree_permuteVertices(frontETree, oldToNewIV) ; MARKTIME(t2) ; fprintf(msgFile, "\n CPU %8.3f : permute front tree", t2 - t1) ; MARKTIME(t1) ; InpMtx_permute(mtxA, IV_entries(oldToNewIV), IV_entries(oldToNewIV)) ; MARKTIME(t2) ; fprintf(msgFile, "\n CPU %8.3f : permute mtxA", t2 - t1) ; if ( symmetryflag == SPOOLES_SYMMETRIC || symmetryflag == SPOOLES_HERMITIAN ) { MARKTIME(t1) ; InpMtx_mapToUpperTriangle(mtxA) ; MARKTIME(t2) ; fprintf(msgFile, "\n CPU %8.3f : map to upper triangle", t2 - t1) ; } if ( ! INPMTX_IS_BY_CHEVRONS(mtxA) ) { MARKTIME(t1) ; InpMtx_changeCoordType(mtxA, INPMTX_BY_CHEVRONS) ; MARKTIME(t2) ; fprintf(msgFile, "\n CPU %8.3f : change coordinate type", t2 - t1) ; } if ( INPMTX_IS_RAW_DATA(mtxA) ) { MARKTIME(t1) ; InpMtx_changeStorageMode(mtxA, INPMTX_SORTED) ; MARKTIME(t2) ; fprintf(msgFile, "\n CPU %8.3f : sort entries ", t2 - t1) ; } if ( INPMTX_IS_SORTED(mtxA) ) { MARKTIME(t1) ; InpMtx_changeStorageMode(mtxA, INPMTX_BY_VECTORS) ; MARKTIME(t2) ; fprintf(msgFile, "\n CPU %8.3f : convert to vectors ", t2 - t1) ; } MARKTIME(t1) ; symbfacIVL = SymbFac_initFromInpMtx(frontETree, mtxA) ; MARKTIME(t2) ; fprintf(msgFile, "\n CPU %8.3f : symbolic factorization", t2 - t1) ; MARKTIME(t1) ; DenseMtx_permuteRows(mtxB, oldToNewIV) ; MARKTIME(t2) ; fprintf(msgFile, "\n CPU %8.3f : permute rhs", t2 - t1) ; /* ------------------------------ initialize the FrontMtx object ------------------------------ */ MARKTIME(t1) ; frontmtx = FrontMtx_new() ; mtxmanager = SubMtxManager_new() ; SubMtxManager_init(mtxmanager, NO_LOCK, 0) ; FrontMtx_init(frontmtx, frontETree, symbfacIVL, type, symmetryflag, sparsityflag, pivotingflag, NO_LOCK, 0, NULL, mtxmanager, msglvl, msgFile) ; MARKTIME(t2) ; fprintf(msgFile, "\n\n CPU %8.3f : initialize the front matrix", t2 - t1) ; if ( msglvl > 1 ) { fprintf(msgFile, "\n nendD = %d, nentL = %d, nentU = %d", frontmtx->nentD, frontmtx->nentL, frontmtx->nentU) ; SubMtxManager_writeForHumanEye(mtxmanager, msgFile) ; } if ( msglvl > 2 ) { fprintf(msgFile, "\n front matrix initialized") ; FrontMtx_writeForHumanEye(frontmtx, msgFile) ; fflush(msgFile) ; } /* ----------------- factor the matrix ----------------- */ nzf = ETree_nFactorEntries(frontETree, symmetryflag) ; factorops = ETree_nFactorOps(frontETree, type, symmetryflag) ; fprintf(msgFile, "\n %d factor entries, %.0f factor ops, %8.3f ratio", nzf, factorops, factorops/nzf) ; IVzero(6, stats) ; DVzero(9, cpus) ; chvmanager = ChvManager_new() ; ChvManager_init(chvmanager, NO_LOCK, 1) ; MARKTIME(t1) ; rootchv = FrontMtx_factorInpMtx(frontmtx, mtxA, tau, droptol, chvmanager, &error, cpus, stats, msglvl, msgFile) ; MARKTIME(t2) ; fprintf(msgFile, "\n\n CPU %8.3f : factor matrix, %8.3f mflops", t2 - t1, 1.e-6*factorops/(t2-t1)) ; if ( rootchv != NULL ) { fprintf(msgFile, "\n\n factorization did not complete") ; for ( chv = rootchv ; chv != NULL ; chv = chv->next ) { fprintf(stdout, "\n chv %d, nD = %d, nL = %d, nU = %d", chv->id, chv->nD, chv->nL, chv->nU) ; } } if ( error >= 0 ) { fprintf(msgFile, "\n\n error encountered at front %d\n", error) ; rc=error ; goto end_front; } fprintf(msgFile, "\n %8d pivots, %8d pivot tests, %8d delayed rows and columns", stats[0], stats[1], stats[2]) ; if ( frontmtx->rowadjIVL != NULL ) { fprintf(msgFile, "\n %d entries in rowadjIVL", frontmtx->rowadjIVL->tsize) ; } if ( frontmtx->coladjIVL != NULL ) { fprintf(msgFile, ", %d entries in coladjIVL", frontmtx->coladjIVL->tsize) ; } if ( frontmtx->upperblockIVL != NULL ) { fprintf(msgFile, "\n %d fronts, %d entries in upperblockIVL", frontmtx->nfront, frontmtx->upperblockIVL->tsize) ; } if ( frontmtx->lowerblockIVL != NULL ) { fprintf(msgFile, ", %d entries in lowerblockIVL", frontmtx->lowerblockIVL->tsize) ; } fprintf(msgFile, "\n %d entries in D, %d entries in L, %d entries in U", stats[3], stats[4], stats[5]) ; fprintf(msgFile, "\n %d locks", frontmtx->nlocks) ; if ( FRONTMTX_IS_SYMMETRIC(frontmtx) || FRONTMTX_IS_HERMITIAN(frontmtx) ) { int nneg, npos, nzero ; FrontMtx_inertia(frontmtx, &nneg, &nzero, &npos) ; fprintf(msgFile, "\n %d negative, %d zero and %d positive eigenvalues", nneg, nzero, npos) ; fflush(msgFile) ; } cputotal = cpus[8] ; if ( cputotal > 0.0 ) { fprintf(msgFile, "\n initialize fronts %8.3f %6.2f" "\n load original entries %8.3f %6.2f" "\n update fronts %8.3f %6.2f" "\n assemble postponed data %8.3f %6.2f" "\n factor fronts %8.3f %6.2f" "\n extract postponed data %8.3f %6.2f" "\n store factor entries %8.3f %6.2f" "\n miscellaneous %8.3f %6.2f" "\n total time %8.3f", cpus[0], 100.*cpus[0]/cputotal, cpus[1], 100.*cpus[1]/cputotal, cpus[2], 100.*cpus[2]/cputotal, cpus[3], 100.*cpus[3]/cputotal, cpus[4], 100.*cpus[4]/cputotal, cpus[5], 100.*cpus[5]/cputotal, cpus[6], 100.*cpus[6]/cputotal, cpus[7], 100.*cpus[7]/cputotal, cputotal) ; } if ( msglvl > 1 ) { SubMtxManager_writeForHumanEye(mtxmanager, msgFile) ; ChvManager_writeForHumanEye(chvmanager, msgFile) ; } if ( msglvl > 2 ) { fprintf(msgFile, "\n\n front factor matrix") ; FrontMtx_writeForHumanEye(frontmtx, msgFile) ; } /* ------------------------------ post-process the factor matrix ------------------------------ */ MARKTIME(t1) ; FrontMtx_postProcess(frontmtx, msglvl, msgFile) ; MARKTIME(t2) ; fprintf(msgFile, "\n\n CPU %8.3f : post-process the matrix", t2 - t1) ; if ( msglvl > 2 ) { fprintf(msgFile, "\n\n front factor matrix after post-processing") ; FrontMtx_writeForHumanEye(frontmtx, msgFile) ; } fprintf(msgFile, "\n\n after post-processing") ; if ( msglvl > 1 ) SubMtxManager_writeForHumanEye(frontmtx->manager, msgFile) ; /* ---------------- solve the system ---------------- */ neqns = mtxB->nrow ; mtxZ = DenseMtx_new() ; DenseMtx_init(mtxZ, type, 0, 0, neqns, nrhs, 1, neqns) ; zversion=INPMTX_IS_COMPLEX_ENTRIES(mtxA); for (k=0; k<imethod; k++) { DenseMtx_zero(mtxZ) ; if ( msglvl > 2 ) { fprintf(msgFile, "\n\n rhs") ; DenseMtx_writeForHumanEye(mtxB, msgFile) ; fflush(stdout) ; } fprintf(msgFile, "\n\n itemax %d", itermax) ; DVzero(6, cpus) ; MARKTIME(t1) ; switch ( method[k] ) { case BiCGStabR : if (zversion) rc=zbicgstabr(neqns, type, symmetryflag, mtxA, frontmtx, mtxZ, mtxB, itermax, conv_tol, msglvl, msgFile); else rc=bicgstabr(neqns, type, symmetryflag, mtxA, frontmtx, mtxZ, mtxB, itermax, conv_tol, msglvl, msgFile); break; case BiCGStabL : if (zversion) rc=zbicgstabl(neqns, type, symmetryflag, mtxA, frontmtx, mtxZ, mtxB, itermax, conv_tol, msglvl, msgFile); else rc=bicgstabl(neqns, type, symmetryflag, mtxA, frontmtx, mtxZ, mtxB, itermax, conv_tol, msglvl, msgFile); break; case TFQMRR : if (zversion) rc=ztfqmrr(neqns, type, symmetryflag, mtxA, frontmtx, mtxZ, mtxB, itermax, conv_tol, msglvl, msgFile); else rc=tfqmrr(neqns, type, symmetryflag, mtxA, frontmtx, mtxZ, mtxB, itermax, conv_tol, msglvl, msgFile); break; case TFQMRL : if (zversion) rc=ztfqmrl(neqns, type, symmetryflag, mtxA, frontmtx, mtxZ, mtxB, itermax, conv_tol, msglvl, msgFile); else rc=tfqmrl(neqns, type, symmetryflag, mtxA, frontmtx, mtxZ, mtxB, itermax, conv_tol, msglvl, msgFile); break; case PCGR : if (zversion) rc=zpcgr(neqns, type, symmetryflag, mtxA, frontmtx, mtxZ, mtxB, itermax, conv_tol, msglvl, msgFile); else rc=pcgr(neqns, type, symmetryflag, mtxA, frontmtx, mtxZ, mtxB, itermax, conv_tol, msglvl, msgFile); break; case PCGL : if (zversion) rc=zpcgl(neqns, type, symmetryflag, mtxA, frontmtx, mtxZ, mtxB, itermax, conv_tol, msglvl, msgFile); else rc=pcgl(neqns, type, symmetryflag, mtxA, frontmtx, mtxZ, mtxB, itermax, conv_tol, msglvl, msgFile); break; case MLBiCGStabR : mtxQ = DenseMtx_new() ; DenseMtx_init(mtxQ, type, 0, -1, neqns, Ik, 1, neqns) ; Drand_setUniform(&drand, 0.0, 1.0) ; DenseMtx_fillRandomEntries(mtxQ, &drand) ; if (zversion) rc=zmlbicgstabr(neqns, type, symmetryflag, mtxA, frontmtx, mtxQ, mtxZ, mtxB, itermax, conv_tol, msglvl, msgFile); else rc=mlbicgstabr(neqns, type, symmetryflag, mtxA, frontmtx, mtxQ, mtxZ, mtxB, itermax, conv_tol, msglvl, msgFile); DenseMtx_free(mtxQ) ; break; case MLBiCGStabL : mtxQ = DenseMtx_new() ; DenseMtx_init(mtxQ, type, 0, -1, neqns, Ik, 1, neqns) ; Drand_setUniform(&drand, 0.0, 1.0) ; DenseMtx_fillRandomEntries(mtxQ, &drand) ; if (zversion) rc=zmlbicgstabl(neqns, type, symmetryflag, mtxA, frontmtx, mtxQ, mtxZ, mtxB, itermax, conv_tol, msglvl, msgFile); else rc=mlbicgstabl(neqns, type, symmetryflag, mtxA, frontmtx, mtxQ, mtxZ, mtxB, itermax, conv_tol, msglvl, msgFile); DenseMtx_free(mtxQ) ; break; case BGMRESR: if (zversion) fprintf(msgFile, "\n\n *** BGMRESR complex version is not available " "at this moment. ") ; else rc=bgmresr(neqns, type, symmetryflag, mtxA, frontmtx, mtxZ, mtxB, iterout, itermax, &nouter, &ninner, conv_tol, msglvl, msgFile); break; case BGMRESL: if (zversion) fprintf(msgFile, "\n\n *** BGMRESR complex version is not available " "at this moment. ") ; else rc=bgmresl(neqns, type, symmetryflag, mtxA, frontmtx, mtxZ, mtxB, iterout, itermax, &nouter, &ninner, conv_tol, msglvl, msgFile); break; default: fprintf(msgFile, "\n\n *** Invalid method number ") ; } MARKTIME(t2) ; fprintf(msgFile, "\n\n CPU %8.3f : solve the system", t2 - t1) ; if ( msglvl > 2 ) { fprintf(msgFile, "\n\n computed solution") ; DenseMtx_writeForHumanEye(mtxZ, msgFile) ; fflush(stdout) ; } /* ------------------------------------------------------------- permute the computed solution back into the original ordering ------------------------------------------------------------- */ MARKTIME(t1) ; DenseMtx_permuteRows(mtxZ, newToOldIV) ; MARKTIME(t2) ; fprintf(msgFile, "\n CPU %8.3f : permute solution", t2 - t1) ; if ( msglvl > 2 ) { fprintf(msgFile, "\n\n permuted solution") ; DenseMtx_writeForHumanEye(mtxZ, msgFile) ; fflush(stdout) ; } /* ------------- save solution ------------- */ if ( strcmp(slnFileName, "none") != 0 ) { DenseMtx_writeToFile(mtxZ, slnFileName) ; } /* ----------------- compute the error ----------------- */ if (!strcmp(rhsFileName, "none")) { DenseMtx_sub(mtxZ, mtxX) ; if (method[k] <8) { mtxQ = DenseMtx_new() ; DenseMtx_init(mtxQ, type, 0, -1, neqns, 1, 1, neqns) ; rc=DenseMtx_initAsSubmatrix (mtxQ, mtxZ, 0, neqns-1, 0, 0); fprintf(msgFile, "\n\n maxabs error = %12.4e", DenseMtx_maxabs(mtxQ)) ; DenseMtx_free(mtxQ) ; } else fprintf(msgFile, "\n\n maxabs error = %12.4e", DenseMtx_maxabs(mtxZ)) ; if ( msglvl > 1 ) { fprintf(msgFile, "\n\n error") ; DenseMtx_writeForHumanEye(mtxZ, msgFile) ; fflush(stdout) ; } if ( msglvl > 1 ) SubMtxManager_writeForHumanEye(frontmtx->manager, msgFile) ; } fprintf(msgFile, "\n--------- End of Method %d -------\n",method[k]) ; } /* ------------------------ free the working storage ------------------------ */ DenseMtx_free(mtxZ) ; end_front: ChvManager_free(chvmanager) ; SubMtxManager_free(mtxmanager) ; FrontMtx_free(frontmtx) ; IVL_free(symbfacIVL) ; IV_free(oldToNewIV) ; IV_free(newToOldIV) ; end_tree: ETree_free(frontETree) ; end_init: DenseMtx_free(mtxB) ; DenseMtx_free(mtxX) ; end_read: InpMtx_free(mtxA) ; fprintf(msgFile, "\n") ; fclose(msgFile) ; return(rc) ; }
/* -------------------------------------------------------------- purpose -- to permute (if necessary) the original matrix, and to initialize, factor and postprocess the factor matrix return value --- 1 -- normal return, factorization complete 0 -- factorization did not complete, see error flag -1 -- bridge is NULL -2 -- mtxA is NULL -3 -- perror is NULL created -- 98sep18, cca -------------------------------------------------------------- */ int BridgeMT_factor ( BridgeMT *bridge, InpMtx *mtxA, int permuteflag, int *perror ) { Chv *rootchv ; ChvManager *chvmanager ; double cputotal, nfops, t0, t1, t2 ; double cpus[11] ; int msglvl, nzf ; int stats[16] ; FILE *msgFile ; FrontMtx *frontmtx ; SubMtxManager *mtxmanager ; /*--------------------------------------------------------------------*/ MARKTIME(t0) ; /* --------------- check the input --------------- */ if ( bridge == NULL ) { fprintf(stderr, "\n error in BridgeMT_factor()" "\n bridge is NULL\n") ; return(-1) ; } if ( mtxA == NULL ) { fprintf(stderr, "\n error in BridgeMT_factor()" "\n mtxA is NULL\n") ; return(-2) ; } if ( perror == NULL ) { fprintf(stderr, "\n error in BridgeMT_factor()" "\n perror is NULL\n") ; return(-3) ; } msglvl = bridge->msglvl ; msgFile = bridge->msgFile ; /*--------------------------------------------------------------------*/ MARKTIME(t1) ; if ( permuteflag == 1 ) { int *oldToNew = IV_entries(bridge->oldToNewIV) ; /* ------------------------------------------------ permute the input matrix and convert to chevrons ------------------------------------------------ */ InpMtx_permute(mtxA, oldToNew, oldToNew) ; if ( bridge->symmetryflag == SPOOLES_SYMMETRIC || bridge->symmetryflag == SPOOLES_HERMITIAN ) { InpMtx_mapToUpperTriangle(mtxA) ; } } if ( ! INPMTX_IS_BY_CHEVRONS(mtxA) ) { InpMtx_changeCoordType(mtxA, INPMTX_BY_CHEVRONS) ; } if ( ! INPMTX_IS_BY_VECTORS(mtxA) ) { InpMtx_changeStorageMode(mtxA, INPMTX_BY_VECTORS) ; } MARKTIME(t2) ; bridge->cpus[6] += t2 - t1 ; if ( msglvl > 1 ) { fprintf(msgFile, "\n CPU %8.3f : permute and format A", t2 - t1) ; fflush(msgFile) ; } /* --------------------------- initialize the front matrix --------------------------- */ MARKTIME(t1) ; if ( (mtxmanager = bridge->mtxmanager) == NULL ) { mtxmanager = bridge->mtxmanager = SubMtxManager_new() ; SubMtxManager_init(mtxmanager, LOCK_IN_PROCESS, 0) ; } if ( (frontmtx = bridge->frontmtx) == NULL ) { frontmtx = bridge->frontmtx = FrontMtx_new() ; } else { FrontMtx_clearData(frontmtx) ; } FrontMtx_init(frontmtx, bridge->frontETree, bridge->symbfacIVL, bridge->type, bridge->symmetryflag, bridge->sparsityflag, bridge->pivotingflag, LOCK_IN_PROCESS, 0, NULL, mtxmanager, msglvl, msgFile) ; frontmtx->patchinfo = bridge->patchinfo ; MARKTIME(t2) ; bridge->cpus[7] += t2 - t1 ; if ( msglvl > 1 ) { fprintf(msgFile, "\n CPU %8.3f : initialize front matrix", t2 - t1) ; fflush(msgFile) ; } /* ----------------- factor the matrix ----------------- */ nzf = ETree_nFactorEntries(bridge->frontETree, bridge->symmetryflag) ; nfops = ETree_nFactorOps(bridge->frontETree, bridge->type, bridge->symmetryflag) ; if ( msglvl > 1 ) { fprintf(msgFile, "\n %d factor entries, %.0f factor ops, %8.3f ratio", nzf, nfops, nfops/nzf) ; fflush(msgFile) ; } IVzero(16, stats) ; DVzero(11, cpus) ; chvmanager = ChvManager_new() ; ChvManager_init(chvmanager, LOCK_IN_PROCESS, 1) ; MARKTIME(t1) ; rootchv = FrontMtx_MT_factorInpMtx(frontmtx, mtxA, bridge->tau, bridge->droptol, chvmanager, bridge->ownersIV, bridge->lookahead, perror, cpus, stats, msglvl, msgFile) ; MARKTIME(t2) ; IVcopy(6, bridge->stats, stats) ; bridge->cpus[8] += t2 - t1 ; if ( msglvl > 1 ) { fprintf(msgFile, "\n\n CPU %8.3f : factor matrix, %8.3f mflops", t2 - t1, 1.e-6*nfops/(t2-t1)) ; fprintf(msgFile, "\n %8d pivots, %8d pivot tests, %8d delayed vertices" "\n %d entries in D, %d entries in L, %d entries in U", stats[0], stats[1], stats[2], stats[3], stats[4], stats[5]) ; cputotal = cpus[8] ; if ( cputotal > 0.0 ) { fprintf(msgFile, "\n initialize fronts %8.3f %6.2f" "\n load original entries %8.3f %6.2f" "\n update fronts %8.3f %6.2f" "\n assemble postponed data %8.3f %6.2f" "\n factor fronts %8.3f %6.2f" "\n extract postponed data %8.3f %6.2f" "\n store factor entries %8.3f %6.2f" "\n miscellaneous %8.3f %6.2f" "\n total time %8.3f", cpus[0], 100.*cpus[0]/cputotal, cpus[1], 100.*cpus[1]/cputotal, cpus[2], 100.*cpus[2]/cputotal, cpus[3], 100.*cpus[3]/cputotal, cpus[4], 100.*cpus[4]/cputotal, cpus[5], 100.*cpus[5]/cputotal, cpus[6], 100.*cpus[6]/cputotal, cpus[7], 100.*cpus[7]/cputotal, cputotal) ; } } if ( msglvl > 2 ) { fprintf(msgFile, "\n\n submatrix mananger after factorization") ; SubMtxManager_writeForHumanEye(mtxmanager, msgFile) ; fprintf(msgFile, "\n\n chevron mananger after factorization") ; ChvManager_writeForHumanEye(chvmanager, msgFile) ; fflush(msgFile) ; } if ( msglvl > 3 ) { fprintf(msgFile, "\n\n front factor matrix") ; FrontMtx_writeForHumanEye(frontmtx, msgFile) ; fflush(msgFile) ; } ChvManager_free(chvmanager) ; if ( *perror >= 0 ) { return(0) ; } /* ----------------------------- post-process the front matrix ----------------------------- */ MARKTIME(t1) ; FrontMtx_postProcess(frontmtx, msglvl, msgFile) ; MARKTIME(t2) ; bridge->cpus[9] += t2 - t1 ; if ( msglvl > 1 ) { fprintf(msgFile, "\n\n CPU %8.3f : post-process the matrix", t2 - t1) ; fflush(msgFile) ; } if ( msglvl > 2 ) { fprintf(msgFile, "\n\n submatrix mananger after post-processing") ; SubMtxManager_writeForHumanEye(frontmtx->manager, msgFile) ; fflush(msgFile) ; } if ( msglvl > 3 ) { fprintf(msgFile, "\n\n front factor matrix after post-processing") ; FrontMtx_writeForHumanEye(frontmtx, msgFile) ; fflush(msgFile) ; } /*--------------------------------------------------------------------*/ MARKTIME(t2) ; bridge->cpus[10] += t2 - t0 ; if ( msglvl > 1 ) { fprintf(msgFile, "\n\n CPU %8.3f : total factor time", t2 - t0) ; fflush(msgFile) ; } return(1) ; }
/* -------------------------------------------- purpose -- to solve the linear system MPI version if permuteflag is 1 then rhs is permuted into new ordering solution is permuted into old ordering return value --- 1 -- normal return -1 -- bridge is NULL -2 -- X is NULL -3 -- Y is NULL -4 -- frontmtx is NULL -5 -- mtxmanager is NULL -6 -- oldToNewIV not available -7 -- newToOldIV not available created -- 98sep18, cca -------------------------------------------- */ int BridgeMPI_solve ( BridgeMPI *bridge, int permuteflag, DenseMtx *X, DenseMtx *Y ) { DenseMtx *Xloc, *Yloc ; double cputotal, t0, t1, t2 ; double cpus[6] ; FILE *msgFile ; FrontMtx *frontmtx ; int firsttag, msglvl, myid, nmycol, nrhs, nrow ; int *mycolind, *rowind ; int stats[4] ; IV *mapIV, *ownersIV ; MPI_Comm comm ; SubMtxManager *mtxmanager ; /* --------------- check the input --------------- */ MARKTIME(t0) ; if ( bridge == NULL ) { fprintf(stderr, "\n error in BridgeMPI_solve" "\n bridge is NULL\n") ; return(-1) ; } if ( (frontmtx = bridge->frontmtx) == NULL ) { fprintf(stderr, "\n error in BridgeMPI_solve" "\n frontmtx is NULL\n") ; return(-4) ; } if ( (mtxmanager = bridge->mtxmanager) == NULL ) { fprintf(stderr, "\n error in BridgeMPI_solve" "\n mtxmanager is NULL\n") ; return(-5) ; } myid = bridge->myid ; comm = bridge->comm ; msglvl = bridge->msglvl ; msgFile = bridge->msgFile ; frontmtx = bridge->frontmtx ; ownersIV = bridge->ownersIV ; Xloc = bridge->Xloc ; Yloc = bridge->Yloc ; if ( myid != 0 ) { X = Y = NULL ; } else { if ( X == NULL ) { fprintf(stderr, "\n error in BridgeMPI_solve" "\n myid 0, X is NULL\n") ; return(-2) ; } if ( Y == NULL ) { fprintf(stderr, "\n error in BridgeMPI_solve" "\n myid 0, Y is NULL\n") ; return(-3) ; } } if ( msglvl > 2 ) { fprintf(msgFile, "\n\n inside BridgeMPI_solve()") ; fflush(msgFile) ; } if ( msglvl > 2 ) { fprintf(msgFile , "\n\n Xloc") ; DenseMtx_writeForHumanEye(Xloc, msgFile) ; fprintf(msgFile , "\n\n Yloc") ; DenseMtx_writeForHumanEye(Yloc, msgFile) ; fflush(msgFile) ; } /*--------------------------------------------------------------------*/ if ( myid == 0 ) { /* -------------------------- optionally permute the rhs -------------------------- */ if ( permuteflag == 1 ) { int rc ; IV *oldToNewIV ; MARKTIME(t1) ; rc = BridgeMPI_oldToNewIV(bridge, &oldToNewIV) ; if (rc != 1) { fprintf(stderr, "\n error in BridgeMPI_solve()" "\n rc = %d from BridgeMPI_oldToNewIV()\n", rc) ; return(-6) ; } DenseMtx_permuteRows(Y, oldToNewIV) ; MARKTIME(t2) ; bridge->cpus[15] += t2 - t1 ; if ( msglvl > 2 ) { fprintf(msgFile , "\n\n permuted Y") ; DenseMtx_writeForHumanEye(Y, msgFile) ; fflush(msgFile) ; } } } /*--------------------------------------------------------------------*/ /* ------------------------------------- distribute the right hand side matrix ------------------------------------- */ MARKTIME(t1) ; mapIV = bridge->rowmapIV ; if ( msglvl > 2 ) { fprintf(msgFile, "\n\n row map IV object") ; IV_writeForHumanEye(mapIV, msgFile) ; fflush(msgFile) ; } if ( myid == 0 ) { nrhs = Y->ncol ; } else { nrhs = 0 ; } MPI_Bcast((void *) &nrhs, 1, MPI_INT, 0, comm) ; firsttag = 0 ; IVfill(4, stats, 0) ; DenseMtx_MPI_splitFromGlobalByRows(Y, Yloc, mapIV, 0, stats, msglvl, msgFile, firsttag, comm) ; MARKTIME(t2) ; bridge->cpus[16] += t2 - t1 ; if ( msglvl > 2 ) { fprintf(msgFile, "\n\n local matrix Y after the split") ; DenseMtx_writeForHumanEye(Yloc, msgFile) ; fflush(msgFile) ; } /*--------------------------------------------------------------------*/ /* -------------------------------------- initialize the local solution X object -------------------------------------- */ MARKTIME(t1) ; IV_sizeAndEntries(bridge->ownedColumnsIV, &nmycol, &mycolind) ; DenseMtx_init(Xloc, bridge->type, -1, -1, nmycol, nrhs, 1, nmycol) ; if ( nmycol > 0 ) { DenseMtx_rowIndices(Xloc, &nrow, &rowind) ; IVcopy(nmycol, rowind, mycolind) ; if ( msglvl > 2 ) { fprintf(msgFile, "\n\n local matrix X") ; DenseMtx_writeForHumanEye(Xloc, msgFile) ; fflush(msgFile) ; } } MARKTIME(t2) ; bridge->cpus[17] += t2 - t1 ; /*--------------------------------------------------------------------*/ /* ---------------- solve the system ---------------- */ MARKTIME(t1) ; DVzero(6, cpus) ; FrontMtx_MPI_solve(frontmtx, Xloc, Yloc, mtxmanager, bridge->solvemap, cpus, stats, msglvl, msgFile, firsttag, comm) ; MARKTIME(t2) ; bridge->cpus[18] += t2 - t1 ; if ( msglvl > 1 ) { fprintf(msgFile, "\n\n CPU %8.3f : solve the system", t2 - t1) ; } cputotal = t2 - t1 ; if ( cputotal > 0.0 ) { fprintf(msgFile, "\n set up solves %8.3f %6.2f" "\n load rhs and store solution %8.3f %6.2f" "\n forward solve %8.3f %6.2f" "\n diagonal solve %8.3f %6.2f" "\n backward solve %8.3f %6.2f" "\n total time %8.3f", cpus[0], 100.*cpus[0]/cputotal, cpus[1], 100.*cpus[1]/cputotal, cpus[2], 100.*cpus[2]/cputotal, cpus[3], 100.*cpus[3]/cputotal, cpus[4], 100.*cpus[4]/cputotal, cputotal) ; } if ( msglvl > 3 ) { fprintf(msgFile, "\n\n computed solution") ; DenseMtx_writeForHumanEye(Xloc, msgFile) ; fflush(stdout) ; } /*--------------------------------------------------------------------*/ /* ------------------------------------- gather the solution on processor zero ------------------------------------- */ MARKTIME(t1) ; DenseMtx_MPI_mergeToGlobalByRows(X, Xloc, 0, stats, msglvl, msgFile, firsttag, comm) ; MARKTIME(t2) ; bridge->cpus[19] += t2 - t1 ; if ( myid == 0 ) { if ( msglvl > 2 ) { fprintf(msgFile, "\n\n global matrix X in new ordering") ; DenseMtx_writeForHumanEye(X, msgFile) ; fflush(msgFile) ; } } /*--------------------------------------------------------------------*/ /* ------------------------------- optionally permute the solution ------------------------------- */ if ( myid == 0 ) { if ( permuteflag == 1 ) { int rc ; IV *newToOldIV ; rc = BridgeMPI_newToOldIV(bridge, &newToOldIV) ; if (rc != 1) { fprintf(stderr, "\n error in BridgeMPI_solve()" "\n rc = %d from BridgeMPI_newToOldIV()\n", rc) ; return(-7) ; } DenseMtx_permuteRows(X, newToOldIV) ; } MARKTIME(t2) ; bridge->cpus[20] += t2 - t1 ; if ( msglvl > 2 ) { fprintf(msgFile, "\n\n global matrix X in old ordering") ; DenseMtx_writeForHumanEye(X, msgFile) ; fflush(msgFile) ; } } MARKTIME(t2) ; bridge->cpus[21] += t2 - t0 ; return(1) ; }
/* ---------------------------- purpose -- basic initializer created -- 98may01, cca ---------------------------- */ void SubMtx_init ( SubMtx *mtx, int type, int mode, int rowid, int colid, int nrow, int ncol, int nent ) { int nbytes ; int *colind, *rowind ; /* --------------- check the input --------------- */ if ( mtx == NULL ) { fprintf(stderr, "\n fatal error in SubMtx_init()" "\n mtx is NULL\n") ; exit(-1) ; } if ( nrow <= 0 ) { fprintf(stderr, "\n fatal error in SubMtx_init()" "\n nrow = %d <= 0\n", nrow) ; exit(-1) ; } if ( ncol <= 0 ) { fprintf(stderr, "\n fatal error in SubMtx_init()" "\n ncol = %d <= 0\n", ncol) ; exit(-1) ; } if ( nrow <= 0 ) { fprintf(stderr, "\n fatal error in SubMtx_init()" "\n nent = %d <= 0\n", nent) ; exit(-1) ; } switch ( type ) { case SPOOLES_REAL : case SPOOLES_COMPLEX : break ; default : fprintf(stderr, "\n fatal error in SubMtx_init()" "\n invalid type %d", type) ; exit(-1) ; } switch ( mode ) { case SUBMTX_DENSE_ROWS : case SUBMTX_DENSE_COLUMNS : case SUBMTX_DIAGONAL : case SUBMTX_SPARSE_ROWS : case SUBMTX_SPARSE_COLUMNS : case SUBMTX_SPARSE_TRIPLES : case SUBMTX_DENSE_SUBROWS : case SUBMTX_DENSE_SUBCOLUMNS : case SUBMTX_BLOCK_DIAGONAL_SYM : case SUBMTX_BLOCK_DIAGONAL_HERM : break ; default : fprintf(stderr, "\n fatal error in SubMtx_init()" "\n invalid mode %d", mode) ; exit(-1) ; } /* ------------------------------------------------------- get and set the number of bytes needed in the workspace ------------------------------------------------------- */ nbytes = SubMtx_nbytesNeeded(type, mode, nrow, ncol, nent) ; SubMtx_setNbytesInWorkspace(mtx, nbytes) ; DVzero(nbytes/sizeof(double), (double *) SubMtx_workspace(mtx)) ; /* -------------- set the fields -------------- */ SubMtx_setFields(mtx, type, mode, rowid, colid, nrow, ncol, nent) ; SubMtx_rowIndices(mtx, &nrow, &rowind) ; IVramp(nrow, rowind, 0, 1) ; SubMtx_columnIndices(mtx, &ncol, &colind) ; IVramp(ncol, colind, 0, 1) ; return ; }
/* ----------------------------------------------------------- A contains the following data from the A = QR factorization A(1:ncolA,1:ncolA) = R A(j+1:nrowA,j) is v_j, the j-th householder vector, where v_j[j] = 1.0 NOTE: A and Q must be column major created -- 98dec10, cca ----------------------------------------------------------- */ void A2_computeQ ( A2 *Q, A2 *A, DV *workDV, int msglvl, FILE *msgFile ) { double *betas ; int irowA, jcolA, ncolA, nrowA ; /* --------------- check the input --------------- */ if ( Q == NULL ) { fprintf(stderr, "\n fatal error in A2_computeQ()" "\n Q is NULL\n") ; exit(-1) ; } if ( A == NULL ) { fprintf(stderr, "\n fatal error in A2_computeQ()" "\n A is NULL\n") ; exit(-1) ; } if ( workDV == NULL ) { fprintf(stderr, "\n fatal error in A2_computeQ()" "\n workDV is NULL\n") ; exit(-1) ; } if ( msglvl > 0 && msgFile == NULL ) { fprintf(stderr, "\n fatal error in A2_computeQ()" "\n msglvl > 0 and msgFile is NULL\n") ; exit(-1) ; } nrowA = A2_nrow(A) ; ncolA = A2_ncol(A) ; if ( nrowA <= 0 ) { fprintf(stderr, "\n fatal error in A2_computeQ()" "\n nrowA = %d\n", nrowA) ; exit(-1) ; } if ( ncolA <= 0 ) { fprintf(stderr, "\n fatal error in A2_computeQ()" "\n ncolA = %d\n", nrowA) ; exit(-1) ; } if ( nrowA != A2_nrow(Q) ) { fprintf(stderr, "\n fatal error in A2_computeQ()" "\n nrowA = %d, nrowQ = %d\n", nrowA, A2_nrow(Q)) ; exit(-1) ; } if ( ncolA != A2_ncol(Q) ) { fprintf(stderr, "\n fatal error in A2_computeQ()" "\n ncolA = %d, ncolQ = %d\n", ncolA, A2_ncol(Q)) ; exit(-1) ; } switch ( A->type ) { case SPOOLES_REAL : case SPOOLES_COMPLEX : break ; default : fprintf(stderr, "\n fatal error in A2_computeQ()" "\n invalid type for A\n") ; exit(-1) ; } if ( A->type != Q->type ) { fprintf(stderr, "\n fatal error in A2_computeQ()" "\n A->type = %d, Q->type = %d\n", A->type, Q->type) ; exit(-1) ; } if ( A2_inc1(A) != 1 ) { fprintf(stderr, "\n fatal error in A2_computeQ()" "\n A->inc1 = %d \n", A2_inc1(A)) ; exit(-1) ; } if ( A2_inc1(Q) != 1 ) { fprintf(stderr, "\n fatal error in A2_computeQ()" "\n Q->inc1 = %d, \n", A2_inc1(Q)) ; exit(-1) ; } /* -------------------------------------------------- compute the beta values, beta_j = 2./(V_j^H * V_j) -------------------------------------------------- */ DV_setSize(workDV, ncolA) ; betas = DV_entries(workDV) ; if ( A2_IS_REAL(A) ) { int irowA, jcolA ; double sum ; double *colA ; for ( jcolA = 0 ; jcolA < ncolA ; jcolA++ ) { sum = 1.0 ; colA = A2_column(A, jcolA) ; for ( irowA = jcolA + 1 ; irowA < nrowA ; irowA++ ) { sum += colA[irowA] * colA[irowA] ; } betas[jcolA] = 2./sum ; } } else { double ival, rval, sum ; double *colA ; for ( jcolA = 0 ; jcolA < ncolA ; jcolA++ ) { sum = 1.0 ; colA = A2_column(A, jcolA) ; for ( irowA = jcolA + 1 ; irowA < nrowA ; irowA++ ) { rval = colA[2*irowA] ; ival = colA[2*irowA+1] ; sum += rval*rval + ival*ival ; } betas[jcolA] = 2./sum ; } } /* ------------------------------------------- loop over the number of householder vectors ------------------------------------------- */ for ( jcolA = 0 ; jcolA < ncolA ; jcolA++ ) { double *V, *X ; int jcolV ; if ( msglvl > 2 ) { fprintf(msgFile, "\n\n %% jcolA = %d", jcolA) ; fflush(msgFile) ; } /* ------------------ set X[] to e_jcolA ------------------ */ X = A2_column(Q, jcolA) ; if ( A2_IS_REAL(Q) ) { DVzero(nrowA, X) ; X[jcolA] = 1.0 ; } else { DVzero(2*nrowA, X) ; X[2*jcolA] = 1.0 ; } for ( jcolV = jcolA ; jcolV >= 0 ; jcolV-- ) { double beta ; if ( msglvl > 2 ) { fprintf(msgFile, "\n %% jcolV = %d", jcolV) ; fflush(msgFile) ; } /* ----------------------------------------------------- update X = (I - beta_jcolV * V_jcolV * V_jcolV^T)X = X - beta_jcolV * V_jcolV * V_jcolV^T * X = X - (beta_jcolV * V_jcolV^T * X) * V_jcolV ----------------------------------------------------- */ V = A2_column(A, jcolV) ; beta = betas[jcolV] ; if ( msglvl > 2 ) { fprintf(msgFile, "\n %% beta = %12.4e", beta) ; fflush(msgFile) ; } if ( A2_IS_REAL(Q) ) { double fac, sum = X[jcolV] ; int irow ; for ( irow = jcolV + 1 ; irow < nrowA ; irow++ ) { if ( msglvl > 2 ) { fprintf(msgFile, "\n %% V[%d] = %12.4e, X[%d] = %12.4e", irow, V[irow], irow, X[irow]) ; fflush(msgFile) ; } sum += V[irow] * X[irow] ; } if ( msglvl > 2 ) { fprintf(msgFile, "\n %% sum = %12.4e", sum) ; fflush(msgFile) ; } fac = beta * sum ; if ( msglvl > 2 ) { fprintf(msgFile, "\n %% fac = %12.4e", fac) ; fflush(msgFile) ; } X[jcolV] -= fac ; for ( irow = jcolV + 1 ; irow < nrowA ; irow++ ) { X[irow] -= fac * V[irow] ; } } else { double rfac, ifac, rsum = X[2*jcolV], isum = X[2*jcolV+1] ; int irow ; for ( irow = jcolV + 1 ; irow < nrowA ; irow++ ) { double Vi, Vr, Xi, Xr ; Vr = V[2*irow] ; Vi = V[2*irow+1] ; Xr = X[2*irow] ; Xi = X[2*irow+1] ; rsum += Vr*Xr + Vi*Xi ; isum += Vr*Xi - Vi*Xr ; } rfac = beta * rsum ; ifac = beta * isum ; X[2*jcolV] -= rfac ; X[2*jcolV+1] -= ifac ; for ( irow = jcolV + 1 ; irow < nrowA ; irow++ ) { double Vi, Vr ; Vr = V[2*irow] ; Vi = V[2*irow+1] ; X[2*irow] -= rfac*Vr - ifac*Vi ; X[2*irow+1] -= rfac*Vi + ifac*Vr ; } } } } return ; }
/* ----------------------- compute W0 = v^H * A created -- 98may30, cca ----------------------- */ static double computeW1 ( A2 *mtxA, double H0[], double W0[], int msglvl, FILE *msgFile ) { double nops ; int inc1, inc2, ncolA, nrowA ; if ( msglvl > 5 ) { fprintf(msgFile, "\n %% inside computeW1, nrow %d, ncol %d", mtxA->n1, mtxA->n2) ; } nrowA = mtxA->n1 ; ncolA = mtxA->n2 ; inc1 = mtxA->inc1 ; inc2 = mtxA->inc2 ; if ( inc1 != 1 && inc2 != 1 ) { fprintf(stderr, "\n error in computeW1" "\n inc1 = %d, inc2 = %d\n", inc1, inc2) ; exit(-1) ; } nops = 0.0 ; if ( A2_IS_REAL(mtxA) ) { int irow, jcol ; if ( inc1 == 1 ) { double sums[3] ; double *colA0, *colA1, *colA2 ; /* ---------------------------- A is column major, compute W(j) = H0^T * A(*,j) ---------------------------- */ for ( jcol = 0 ; jcol < ncolA - 2 ; jcol += 3 ) { colA0 = A2_column(mtxA, jcol) ; colA1 = A2_column(mtxA, jcol+1) ; colA2 = A2_column(mtxA, jcol+2) ; DVdot13(nrowA, H0, colA0, colA1, colA2, sums) ; W0[jcol] = sums[0] ; W0[jcol+1] = sums[1] ; W0[jcol+2] = sums[2] ; nops += 6*nrowA ; } if ( jcol == ncolA - 2 ) { colA0 = A2_column(mtxA, jcol) ; colA1 = A2_column(mtxA, jcol+1) ; DVdot12(nrowA, H0, colA0, colA1, sums) ; W0[jcol] = sums[0] ; W0[jcol+1] = sums[1] ; nops += 4*nrowA ; } else if ( jcol == ncolA - 1 ) { colA0 = A2_column(mtxA, jcol) ; DVdot11(nrowA, H0, colA0, sums) ; W0[jcol] = sums[0] ; nops += 2*nrowA ; } } else { double alpha[3] ; double *rowA0, *rowA1, *rowA2 ; /* ------------------------------- A is row major compute W := W + H0(j) * A(j,*) ------------------------------- */ DVzero(ncolA, W0) ; for ( irow = 0 ; irow < nrowA - 2 ; irow += 3 ) { rowA0 = A2_row(mtxA, irow) ; rowA1 = A2_row(mtxA, irow+1) ; rowA2 = A2_row(mtxA, irow+2) ; alpha[0] = H0[irow] ; alpha[1] = H0[irow+1] ; alpha[2] = H0[irow+2] ; DVaxpy13(ncolA, W0, alpha, rowA0, rowA1, rowA2) ; nops += 6*ncolA ; } if ( irow == nrowA - 2 ) { rowA0 = A2_row(mtxA, irow) ; rowA1 = A2_row(mtxA, irow+1) ; alpha[0] = H0[irow] ; alpha[1] = H0[irow+1] ; DVaxpy12(ncolA, W0, alpha, rowA0, rowA1) ; nops += 4*ncolA ; } else if ( irow == nrowA - 1 ) { rowA0 = A2_row(mtxA, irow) ; alpha[0] = H0[irow] ; DVaxpy11(ncolA, W0, alpha, rowA0) ; nops += 2*ncolA ; } } } else if ( A2_IS_COMPLEX(mtxA) ) { int irow, jcol ; if ( inc1 == 1 ) { double sums[6] ; double *colA0, *colA1, *colA2 ; /* ---------------------------- A is column major compute W(j) = H0^H * A(*,j) ---------------------------- */ for ( jcol = 0 ; jcol < ncolA - 2 ; jcol += 3 ) { colA0 = A2_column(mtxA, jcol) ; colA1 = A2_column(mtxA, jcol+1) ; colA2 = A2_column(mtxA, jcol+2) ; ZVdotC13(nrowA, H0, colA0, colA1, colA2, sums) ; W0[2*jcol] = sums[0] ; W0[2*jcol+1] = sums[1] ; W0[2*(jcol+1)] = sums[2] ; W0[2*(jcol+1)+1] = sums[3] ; W0[2*(jcol+2)] = sums[4] ; W0[2*(jcol+2)+1] = sums[5] ; nops += 24*nrowA ; } if ( jcol == ncolA - 2 ) { colA0 = A2_column(mtxA, jcol) ; colA1 = A2_column(mtxA, jcol+1) ; ZVdotC12(nrowA, H0, colA0, colA1, sums) ; W0[2*jcol] = sums[0] ; W0[2*jcol+1] = sums[1] ; W0[2*(jcol+1)] = sums[2] ; W0[2*(jcol+1)+1] = sums[3] ; nops += 16*nrowA ; } else if ( jcol == ncolA - 1 ) { colA0 = A2_column(mtxA, jcol) ; ZVdotC11(nrowA, H0, colA0, sums) ; W0[2*jcol] = sums[0] ; W0[2*jcol+1] = sums[1] ; nops += 8*nrowA ; } } else { double alpha[6] ; double *rowA0, *rowA1, *rowA2 ; /* --------------------------------- A is row major compute W := W + H0(j)^H * A(j,*) --------------------------------- */ DVzero(2*ncolA, W0) ; for ( irow = 0 ; irow < nrowA - 2 ; irow += 3 ) { rowA0 = A2_row(mtxA, irow) ; rowA1 = A2_row(mtxA, irow+1) ; rowA2 = A2_row(mtxA, irow+2) ; alpha[0] = H0[2*irow] ; alpha[1] = -H0[2*irow+1] ; alpha[2] = H0[2*(irow+1)] ; alpha[3] = -H0[2*(irow+1)+1] ; alpha[4] = H0[2*(irow+2)] ; alpha[5] = -H0[2*(irow+2)+1] ; ZVaxpy13(ncolA, W0, alpha, rowA0, rowA1, rowA2) ; nops += 24*ncolA ; } if ( irow == nrowA - 2 ) { rowA0 = A2_row(mtxA, irow) ; rowA1 = A2_row(mtxA, irow+1) ; alpha[0] = H0[2*irow] ; alpha[1] = -H0[2*irow+1] ; alpha[2] = H0[2*(irow+1)] ; alpha[3] = -H0[2*(irow+1)+1] ; ZVaxpy12(ncolA, W0, alpha, rowA0, rowA1) ; nops += 16*ncolA ; } else if ( irow == nrowA - 1 ) { rowA0 = A2_row(mtxA, irow) ; alpha[0] = H0[2*irow] ; alpha[1] = -H0[2*irow+1] ; ZVaxpy11(ncolA, W0, alpha, rowA0) ; nops += 8*ncolA ; } } } return(nops) ; }