Beispiel #1
0
SPOOLESSolverMT::SPOOLESSolverMT(const SparseMatrix * A, int numThreads, int verbose)
{
  n = A->Getn();
  this->verbose = verbose;

  msgFile = fopen("SPOOLES.message","w");

  // prepare SPOOLES input matrix
  if (verbose >= 1)
    printf("Converting matrix to SPOOLES format...\n");

  InpMtx * mtxA = InpMtx_new();
  InpMtx_init(mtxA, INPMTX_BY_ROWS, SPOOLES_REAL, A->GetNumEntries(), n);

  for(int row=0; row<n; row++)
  {
    int rowLength = A->GetRowLength(row);

    for(int j=0; j< rowLength; j++)
    {
      if (A->GetColumnIndex(row,j) >= row)
        InpMtx_inputRealEntry(mtxA, row, A->GetColumnIndex(row, j), A->GetEntry(row, j) );
    }
  }

  InpMtx_changeStorageMode(mtxA, INPMTX_BY_VECTORS);
  //InpMtx_writeForHumanEye(mtxA, msgFile);

  // compute the factorization
  if (verbose >= 1)
    printf("Factoring the %d x %d matrix...\n",n,n);

  BridgeMT * bridgeMT = BridgeMT_new();
  BridgeMT_setMatrixParams(bridgeMT, n, SPOOLES_REAL, SPOOLES_SYMMETRIC);
  BridgeMT_setMessageInfo(bridgeMT, 1, msgFile);
  int rc = BridgeMT_setup(bridgeMT, mtxA);
  if (rc != 1)
  {
    printf("Error: BridgeMT setup returned exit code %d.\n", rc);
    throw 1;
  }

  int type = 1; // real entries
  int nfront, nfind, nfent;
  double nfactorops;
  rc = BridgeMT_factorStats(bridgeMT, type, SPOOLES_SYMMETRIC, &nfront,
                            &nfind, &nfent, &nsolveops, &nfactorops);
  if ( rc != 1 ) 
  {
    printf("Error: BridgeMT_factorStats returned exit code %d.\n", rc);
    throw 1;
  }

  fprintf(msgFile,
        "\n\n factor matrix statistics"
        "\n %d fronts, %d indices, %d entries"
        "\n %d solve operations, %12.4e factor operations",
        nfront, nfind, nfent, nsolveops, nfactorops) ;
  fflush(msgFile) ;


  // setup the parallel factorization

  rc = BridgeMT_factorSetup(bridgeMT, numThreads, 0, 0.0) ;
  fprintf(msgFile, "\n\n ----- PARALLEL FACTOR SETUP -----\n") ;
  fprintf(msgFile,
        "\n    CPU %8.3f : time to setup parallel factorization",
        bridgeMT->cpus[5]) ;
  fprintf(msgFile, "\n total factor operations = %.0f",
           DV_sum(bridgeMT->cumopsDV)) ;
  fprintf(msgFile,
           "\n upper bound on speedup due to load balance = %.2f",
           DV_sum(bridgeMT->cumopsDV)/DV_max(bridgeMT->cumopsDV)) ;
  fprintf(msgFile, "\n operations distributions over threads") ;
  DV_writeForHumanEye(bridgeMT->cumopsDV, msgFile) ;
  fflush(msgFile) ;

  // factor the matrix
  int permuteflag  = 1 ;
  int error;
  rc = BridgeMT_factor(bridgeMT, mtxA, permuteflag, &error);
  if ( rc == 1 ) 
  {
    fprintf(msgFile, "\n\n factorization completed successfully\n") ;
  } 
  else 
  {
    printf("Error: factorization returned exit code %d (error %d).\n", rc, error);
    throw 1;
  }

  fprintf(msgFile, "\n\n ----- FACTORIZATION -----\n") ;
  fprintf(msgFile,
        "\n    CPU %8.3f : time to permute original matrix"
        "\n    CPU %8.3f : time to initialize factor matrix"
        "\n    CPU %8.3f : time to compute factorization"
        "\n    CPU %8.3f : time to post-process factorization"
        "\n CPU %8.3f : total factorization time\n",
        bridgeMT->cpus[6],
        bridgeMT->cpus[7],
        bridgeMT->cpus[8],
        bridgeMT->cpus[9],
        bridgeMT->cpus[10]) ;
  fprintf(msgFile, "\n\n factorization statistics"
        "\n %d pivots, %d pivot tests, %d delayed vertices"
        "\n %d entries in D, %d entries in L, %d entries in U",
        bridgeMT->stats[0], bridgeMT->stats[1], bridgeMT->stats[2],
        bridgeMT->stats[3], bridgeMT->stats[4], bridgeMT->stats[5]) ;
  fprintf(msgFile,
        "\n\n factorization: raw mflops %8.3f, overall mflops %8.3f",
        1.e-6*nfactorops/bridgeMT->cpus[8],
        1.e-6*nfactorops/bridgeMT->cpus[10]) ;
  fflush(msgFile) ;

  // construct dense SPOOLES matrix for rhs and x
  DenseMtx *mtx_rhs = DenseMtx_new();
  DenseMtx_init(mtx_rhs, SPOOLES_REAL, 0, 0, n, 1, 1, n);
  mtx_rhsPointer = (void*) mtx_rhs;

  DenseMtx *mtx_x = DenseMtx_new();
  DenseMtx_init(mtx_x, SPOOLES_REAL, 0, 0, n, 1, 1, n);
  mtx_xPointer = (void*) mtx_x;

  bridgeMTPointer = (void*) bridgeMT;
  APointer = (void*) mtxA;

  if (verbose >= 1)
    printf("Factorization completed.\n");
}
/*
   ----------------------------------------------------------
   purpose -- to construct the map from fronts to processors,
      and compute operations for each processor.

   maptype -- type of map for parallel factorization
      maptype = 1 --> wrap map
      maptype = 2 --> balanced map
      maptype = 3 --> subtree-subset map
      maptype = 4 --> domain decomposition map
   cutoff -- used when maptype = 4 as upper bound on
      relative domain size

   return value --
      1 -- success
     -1 -- bridge is NULL
     -2 -- front tree is NULL

   created -- 98sep25, cca
   ----------------------------------------------------------
*/
int
BridgeMPI_factorSetup (
    BridgeMPI   *bridge,
    int         maptype,
    double      cutoff
) {
    double   t1, t2 ;
    DV       *cumopsDV ;
    ETree    *frontETree ;
    FILE     *msgFile ;
    int      msglvl, nproc ;
    /*
       ---------------
       check the input
       ---------------
    */
    MARKTIME(t1) ;
    if ( bridge == NULL ) {
        fprintf(stderr, "\n error in BridgeMPI_factorSetup()"
                "\n bridge is NULL") ;
        return(-1) ;
    }
    if ( (frontETree = bridge->frontETree) == NULL ) {
        fprintf(stderr, "\n error in BridgeMPI_factorSetup()"
                "\n frontETree is NULL") ;
        return(-2) ;
    }
    nproc   = bridge->nproc   ;
    msglvl  = bridge->msglvl  ;
    msgFile = bridge->msgFile ;
    /*
       -------------------------------------------
       allocate and initialize the cumopsDV object
       -------------------------------------------
    */
    if ( (cumopsDV = bridge->cumopsDV) == NULL ) {
        cumopsDV = bridge->cumopsDV = DV_new() ;
    }
    DV_setSize(cumopsDV, nproc) ;
    DV_zero(cumopsDV) ;
    /*
       ----------------------------
       create the owners map object
       ----------------------------
    */
    switch ( maptype ) {
    case 1 :
        bridge->ownersIV = ETree_wrapMap(frontETree, bridge->type,
                                         bridge->symmetryflag, cumopsDV) ;
        break ;
    case 2 :
        bridge->ownersIV = ETree_balancedMap(frontETree, bridge->type,
                                             bridge->symmetryflag, cumopsDV) ;
        break ;
    case 3 :
        bridge->ownersIV = ETree_subtreeSubsetMap(frontETree, bridge->type,
                           bridge->symmetryflag, cumopsDV) ;
        break ;
    case 4 :
        bridge->ownersIV = ETree_ddMap(frontETree, bridge->type,
                                       bridge->symmetryflag, cumopsDV, cutoff) ;
        break ;
    default :
        bridge->ownersIV = ETree_ddMap(frontETree, bridge->type,
                                       bridge->symmetryflag, cumopsDV, 1./(2*nproc)) ;
        break ;
    }
    MARKTIME(t2) ;
    bridge->cpus[7] = t2 - t1 ;
    if ( msglvl > 1 ) {
        fprintf(msgFile, "\n\n parallel factor setup") ;
        fprintf(msgFile, "\n type = %d, symmetryflag = %d",
                bridge->type, bridge->symmetryflag) ;
        fprintf(msgFile, "\n total factor operations = %.0f",
                DV_sum(cumopsDV)) ;
        fprintf(msgFile,
                "\n upper bound on speedup due to load balance = %.2f",
                DV_max(cumopsDV)/DV_sum(cumopsDV)) ;
        fprintf(msgFile, "\n operations distributions over threads") ;
        DV_writeForHumanEye(cumopsDV, msgFile) ;
        fflush(msgFile) ;
    }
    if ( msglvl > 2 ) {
        fprintf(msgFile, "\n\n owners map IV object") ;
        IV_writeForHumanEye(bridge->ownersIV, msgFile) ;
        fflush(msgFile) ;
    }
    /*
       ----------------------------
       create the vertex map object
       ----------------------------
    */
    bridge->vtxmapIV = IV_new() ;
    IV_init(bridge->vtxmapIV, bridge->neqns, NULL) ;
    IVgather(bridge->neqns, IV_entries(bridge->vtxmapIV),
             IV_entries(bridge->ownersIV),
             ETree_vtxToFront(bridge->frontETree)) ;
    if ( msglvl > 2 ) {
        fprintf(msgFile, "\n\n vertex map IV object") ;
        IV_writeForHumanEye(bridge->vtxmapIV, msgFile) ;
        fflush(msgFile) ;
    }

    return(1) ;
}
Beispiel #3
0
/*
   ------------------------------------------------------------------
   to fill xDV and yDV with a log10 profile of the magnitudes of
   the entries in the DV object. tausmall and tau big provide
   cutoffs within which to examine the entries. pnzero, pnsmall 
   and pnbig are addresses to hold the number of entries zero,
   smaller than tausmall and larger than taubig, respectively.

   created -- 97feb14, cca
   ------------------------------------------------------------------
*/
void
DV_log10profile (
   DV      *dv,
   int      npts,
   DV       *xDV,
   DV       *yDV,
   double   tausmall,
   double   taubig,
   int      *pnzero,
   int      *pnsmall,
   int      *pnbig
) {
double   deltaVal, maxval, minval, val ;
double   *dvec, *sums, *x, *y ;
int      ii, ipt, nbig, nsmall, nzero, size ;
/*
   ---------------
   check the input
   ---------------
*/
if ( dv == NULL || npts <= 0 || xDV == NULL || yDV == NULL
   || tausmall < 0.0 || taubig < 0.0 || tausmall > taubig
   || pnzero == NULL || pnsmall == NULL || pnbig == NULL ) {
   fprintf(stderr, 
       "\n fatal error in DV_log10profile(%p,%d,%p,%p,%f,%f,%p,%p,%p)"
       "\n bad input\n",
       dv, npts, xDV, yDV, tausmall, taubig, pnzero, pnsmall, pnbig) ;
   exit(-1) ;
}
/*
   -------------------------------------
   find the largest and smallest entries 
   in the range [tausmall, taubig]
   -------------------------------------
*/
nbig = nsmall = nzero = 0 ;
minval = maxval = 0.0 ;
DV_sizeAndEntries(dv, &size, &dvec) ;
for ( ii = 0 ; ii < size ; ii++ ) {
   val = fabs(dvec[ii]) ;
   if ( val == 0.0 ) {
      nzero++ ;
   } else if ( val <= tausmall ) {
      nsmall++ ;
   } else if ( val >= taubig ) {
      nbig++ ;
   } else {
      if ( minval == 0.0 || minval > val ) {
         minval = val ;
      }
      if ( maxval < val ) {
         maxval = val ;
      }
   }
}
*pnzero  = nzero  ;
*pnsmall = nsmall ;
*pnbig   = nbig   ;
#if MYDEBUG > 0
fprintf(stdout, 
        "\n nzero = %d, minval = %e, nsmall = %d, maxval = %e, nbig = %d",
        nzero, minval, nsmall, maxval, nbig) ;
#endif
/*
   ------------------
   set up the buckets
   ------------------
*/
DV_setSize(xDV, npts) ;
DV_setSize(yDV, npts) ;
x = DV_entries(xDV) ;
y = DV_entries(yDV) ;
sums = DVinit(npts, 0.0) ;
minval = log10(minval) ;
maxval = log10(maxval) ;
/*
minval = log10(tausmall) ;
maxval = log10(taubig) ;
*/
deltaVal = (maxval - minval)/(npts - 1) ;
DVfill(npts, x, 0.0) ;
DVfill(npts, y, 0.0) ;
/*
   --------------------------------
   fill the sums and counts vectors
   --------------------------------
*/
for ( ii = 0 ; ii < size ; ii++ ) {
   val = fabs(dvec[ii]) ;
   if ( tausmall < val && val < taubig ) {
      ipt = (log10(val) - minval) / deltaVal ;
      sums[ipt] += val ;
      y[ipt]++ ;
   }
}
#if MYDEBUG > 0
fprintf(stdout, "\n sum(y) = %.0f", DV_sum(yDV)) ;
#endif
/*
   ---------------------------
   set the x-coordinate vector
   ---------------------------
*/
for ( ipt = 0 ; ipt < npts ; ipt++ ) {
   if ( sums[ipt] == 0.0 ) {
      x[ipt] = minval + ipt*deltaVal ;
   } else {
      x[ipt] = log10(sums[ipt]/y[ipt]) ;
   }
}
/*
   ------------------------
   free the working storage
   ------------------------
*/
DVfree(sums) ;

return ; }