/* ---------------------------------------------------------- purpose -- to construct the map from fronts to processors, and compute operations for each processor. maptype -- type of map for parallel factorization maptype = 1 --> wrap map maptype = 2 --> balanced map maptype = 3 --> subtree-subset map maptype = 4 --> domain decomposition map cutoff -- used when maptype = 4 as upper bound on relative domain size return value -- 1 -- success -1 -- bridge is NULL -2 -- front tree is NULL created -- 98sep25, cca ---------------------------------------------------------- */ int BridgeMPI_factorSetup ( BridgeMPI *bridge, int maptype, double cutoff ) { double t1, t2 ; DV *cumopsDV ; ETree *frontETree ; FILE *msgFile ; int msglvl, nproc ; /* --------------- check the input --------------- */ MARKTIME(t1) ; if ( bridge == NULL ) { fprintf(stderr, "\n error in BridgeMPI_factorSetup()" "\n bridge is NULL") ; return(-1) ; } if ( (frontETree = bridge->frontETree) == NULL ) { fprintf(stderr, "\n error in BridgeMPI_factorSetup()" "\n frontETree is NULL") ; return(-2) ; } nproc = bridge->nproc ; msglvl = bridge->msglvl ; msgFile = bridge->msgFile ; /* ------------------------------------------- allocate and initialize the cumopsDV object ------------------------------------------- */ if ( (cumopsDV = bridge->cumopsDV) == NULL ) { cumopsDV = bridge->cumopsDV = DV_new() ; } DV_setSize(cumopsDV, nproc) ; DV_zero(cumopsDV) ; /* ---------------------------- create the owners map object ---------------------------- */ switch ( maptype ) { case 1 : bridge->ownersIV = ETree_wrapMap(frontETree, bridge->type, bridge->symmetryflag, cumopsDV) ; break ; case 2 : bridge->ownersIV = ETree_balancedMap(frontETree, bridge->type, bridge->symmetryflag, cumopsDV) ; break ; case 3 : bridge->ownersIV = ETree_subtreeSubsetMap(frontETree, bridge->type, bridge->symmetryflag, cumopsDV) ; break ; case 4 : bridge->ownersIV = ETree_ddMap(frontETree, bridge->type, bridge->symmetryflag, cumopsDV, cutoff) ; break ; default : bridge->ownersIV = ETree_ddMap(frontETree, bridge->type, bridge->symmetryflag, cumopsDV, 1./(2*nproc)) ; break ; } MARKTIME(t2) ; bridge->cpus[7] = t2 - t1 ; if ( msglvl > 1 ) { fprintf(msgFile, "\n\n parallel factor setup") ; fprintf(msgFile, "\n type = %d, symmetryflag = %d", bridge->type, bridge->symmetryflag) ; fprintf(msgFile, "\n total factor operations = %.0f", DV_sum(cumopsDV)) ; fprintf(msgFile, "\n upper bound on speedup due to load balance = %.2f", DV_max(cumopsDV)/DV_sum(cumopsDV)) ; fprintf(msgFile, "\n operations distributions over threads") ; DV_writeForHumanEye(cumopsDV, msgFile) ; fflush(msgFile) ; } if ( msglvl > 2 ) { fprintf(msgFile, "\n\n owners map IV object") ; IV_writeForHumanEye(bridge->ownersIV, msgFile) ; fflush(msgFile) ; } /* ---------------------------- create the vertex map object ---------------------------- */ bridge->vtxmapIV = IV_new() ; IV_init(bridge->vtxmapIV, bridge->neqns, NULL) ; IVgather(bridge->neqns, IV_entries(bridge->vtxmapIV), IV_entries(bridge->ownersIV), ETree_vtxToFront(bridge->frontETree)) ; if ( msglvl > 2 ) { fprintf(msgFile, "\n\n vertex map IV object") ; IV_writeForHumanEye(bridge->vtxmapIV, msgFile) ; fflush(msgFile) ; } return(1) ; }
SPOOLESSolverMT::SPOOLESSolverMT(const SparseMatrix * A, int numThreads, int verbose) { n = A->Getn(); this->verbose = verbose; msgFile = fopen("SPOOLES.message","w"); // prepare SPOOLES input matrix if (verbose >= 1) printf("Converting matrix to SPOOLES format...\n"); InpMtx * mtxA = InpMtx_new(); InpMtx_init(mtxA, INPMTX_BY_ROWS, SPOOLES_REAL, A->GetNumEntries(), n); for(int row=0; row<n; row++) { int rowLength = A->GetRowLength(row); for(int j=0; j< rowLength; j++) { if (A->GetColumnIndex(row,j) >= row) InpMtx_inputRealEntry(mtxA, row, A->GetColumnIndex(row, j), A->GetEntry(row, j) ); } } InpMtx_changeStorageMode(mtxA, INPMTX_BY_VECTORS); //InpMtx_writeForHumanEye(mtxA, msgFile); // compute the factorization if (verbose >= 1) printf("Factoring the %d x %d matrix...\n",n,n); BridgeMT * bridgeMT = BridgeMT_new(); BridgeMT_setMatrixParams(bridgeMT, n, SPOOLES_REAL, SPOOLES_SYMMETRIC); BridgeMT_setMessageInfo(bridgeMT, 1, msgFile); int rc = BridgeMT_setup(bridgeMT, mtxA); if (rc != 1) { printf("Error: BridgeMT setup returned exit code %d.\n", rc); throw 1; } int type = 1; // real entries int nfront, nfind, nfent; double nfactorops; rc = BridgeMT_factorStats(bridgeMT, type, SPOOLES_SYMMETRIC, &nfront, &nfind, &nfent, &nsolveops, &nfactorops); if ( rc != 1 ) { printf("Error: BridgeMT_factorStats returned exit code %d.\n", rc); throw 1; } fprintf(msgFile, "\n\n factor matrix statistics" "\n %d fronts, %d indices, %d entries" "\n %d solve operations, %12.4e factor operations", nfront, nfind, nfent, nsolveops, nfactorops) ; fflush(msgFile) ; // setup the parallel factorization rc = BridgeMT_factorSetup(bridgeMT, numThreads, 0, 0.0) ; fprintf(msgFile, "\n\n ----- PARALLEL FACTOR SETUP -----\n") ; fprintf(msgFile, "\n CPU %8.3f : time to setup parallel factorization", bridgeMT->cpus[5]) ; fprintf(msgFile, "\n total factor operations = %.0f", DV_sum(bridgeMT->cumopsDV)) ; fprintf(msgFile, "\n upper bound on speedup due to load balance = %.2f", DV_sum(bridgeMT->cumopsDV)/DV_max(bridgeMT->cumopsDV)) ; fprintf(msgFile, "\n operations distributions over threads") ; DV_writeForHumanEye(bridgeMT->cumopsDV, msgFile) ; fflush(msgFile) ; // factor the matrix int permuteflag = 1 ; int error; rc = BridgeMT_factor(bridgeMT, mtxA, permuteflag, &error); if ( rc == 1 ) { fprintf(msgFile, "\n\n factorization completed successfully\n") ; } else { printf("Error: factorization returned exit code %d (error %d).\n", rc, error); throw 1; } fprintf(msgFile, "\n\n ----- FACTORIZATION -----\n") ; fprintf(msgFile, "\n CPU %8.3f : time to permute original matrix" "\n CPU %8.3f : time to initialize factor matrix" "\n CPU %8.3f : time to compute factorization" "\n CPU %8.3f : time to post-process factorization" "\n CPU %8.3f : total factorization time\n", bridgeMT->cpus[6], bridgeMT->cpus[7], bridgeMT->cpus[8], bridgeMT->cpus[9], bridgeMT->cpus[10]) ; fprintf(msgFile, "\n\n factorization statistics" "\n %d pivots, %d pivot tests, %d delayed vertices" "\n %d entries in D, %d entries in L, %d entries in U", bridgeMT->stats[0], bridgeMT->stats[1], bridgeMT->stats[2], bridgeMT->stats[3], bridgeMT->stats[4], bridgeMT->stats[5]) ; fprintf(msgFile, "\n\n factorization: raw mflops %8.3f, overall mflops %8.3f", 1.e-6*nfactorops/bridgeMT->cpus[8], 1.e-6*nfactorops/bridgeMT->cpus[10]) ; fflush(msgFile) ; // construct dense SPOOLES matrix for rhs and x DenseMtx *mtx_rhs = DenseMtx_new(); DenseMtx_init(mtx_rhs, SPOOLES_REAL, 0, 0, n, 1, 1, n); mtx_rhsPointer = (void*) mtx_rhs; DenseMtx *mtx_x = DenseMtx_new(); DenseMtx_init(mtx_x, SPOOLES_REAL, 0, 0, n, 1, 1, n); mtx_xPointer = (void*) mtx_x; bridgeMTPointer = (void*) bridgeMT; APointer = (void*) mtxA; if (verbose >= 1) printf("Factorization completed.\n"); }