/*
   ----------------------------------------------------------
   purpose -- to construct the map from fronts to processors,
      and compute operations for each processor.

   maptype -- type of map for parallel factorization
      maptype = 1 --> wrap map
      maptype = 2 --> balanced map
      maptype = 3 --> subtree-subset map
      maptype = 4 --> domain decomposition map
   cutoff -- used when maptype = 4 as upper bound on
      relative domain size

   return value --
      1 -- success
     -1 -- bridge is NULL
     -2 -- front tree is NULL

   created -- 98sep25, cca
   ----------------------------------------------------------
*/
int
BridgeMPI_factorSetup (
    BridgeMPI   *bridge,
    int         maptype,
    double      cutoff
) {
    double   t1, t2 ;
    DV       *cumopsDV ;
    ETree    *frontETree ;
    FILE     *msgFile ;
    int      msglvl, nproc ;
    /*
       ---------------
       check the input
       ---------------
    */
    MARKTIME(t1) ;
    if ( bridge == NULL ) {
        fprintf(stderr, "\n error in BridgeMPI_factorSetup()"
                "\n bridge is NULL") ;
        return(-1) ;
    }
    if ( (frontETree = bridge->frontETree) == NULL ) {
        fprintf(stderr, "\n error in BridgeMPI_factorSetup()"
                "\n frontETree is NULL") ;
        return(-2) ;
    }
    nproc   = bridge->nproc   ;
    msglvl  = bridge->msglvl  ;
    msgFile = bridge->msgFile ;
    /*
       -------------------------------------------
       allocate and initialize the cumopsDV object
       -------------------------------------------
    */
    if ( (cumopsDV = bridge->cumopsDV) == NULL ) {
        cumopsDV = bridge->cumopsDV = DV_new() ;
    }
    DV_setSize(cumopsDV, nproc) ;
    DV_zero(cumopsDV) ;
    /*
       ----------------------------
       create the owners map object
       ----------------------------
    */
    switch ( maptype ) {
    case 1 :
        bridge->ownersIV = ETree_wrapMap(frontETree, bridge->type,
                                         bridge->symmetryflag, cumopsDV) ;
        break ;
    case 2 :
        bridge->ownersIV = ETree_balancedMap(frontETree, bridge->type,
                                             bridge->symmetryflag, cumopsDV) ;
        break ;
    case 3 :
        bridge->ownersIV = ETree_subtreeSubsetMap(frontETree, bridge->type,
                           bridge->symmetryflag, cumopsDV) ;
        break ;
    case 4 :
        bridge->ownersIV = ETree_ddMap(frontETree, bridge->type,
                                       bridge->symmetryflag, cumopsDV, cutoff) ;
        break ;
    default :
        bridge->ownersIV = ETree_ddMap(frontETree, bridge->type,
                                       bridge->symmetryflag, cumopsDV, 1./(2*nproc)) ;
        break ;
    }
    MARKTIME(t2) ;
    bridge->cpus[7] = t2 - t1 ;
    if ( msglvl > 1 ) {
        fprintf(msgFile, "\n\n parallel factor setup") ;
        fprintf(msgFile, "\n type = %d, symmetryflag = %d",
                bridge->type, bridge->symmetryflag) ;
        fprintf(msgFile, "\n total factor operations = %.0f",
                DV_sum(cumopsDV)) ;
        fprintf(msgFile,
                "\n upper bound on speedup due to load balance = %.2f",
                DV_max(cumopsDV)/DV_sum(cumopsDV)) ;
        fprintf(msgFile, "\n operations distributions over threads") ;
        DV_writeForHumanEye(cumopsDV, msgFile) ;
        fflush(msgFile) ;
    }
    if ( msglvl > 2 ) {
        fprintf(msgFile, "\n\n owners map IV object") ;
        IV_writeForHumanEye(bridge->ownersIV, msgFile) ;
        fflush(msgFile) ;
    }
    /*
       ----------------------------
       create the vertex map object
       ----------------------------
    */
    bridge->vtxmapIV = IV_new() ;
    IV_init(bridge->vtxmapIV, bridge->neqns, NULL) ;
    IVgather(bridge->neqns, IV_entries(bridge->vtxmapIV),
             IV_entries(bridge->ownersIV),
             ETree_vtxToFront(bridge->frontETree)) ;
    if ( msglvl > 2 ) {
        fprintf(msgFile, "\n\n vertex map IV object") ;
        IV_writeForHumanEye(bridge->vtxmapIV, msgFile) ;
        fflush(msgFile) ;
    }

    return(1) ;
}
Beispiel #2
0
SPOOLESSolverMT::SPOOLESSolverMT(const SparseMatrix * A, int numThreads, int verbose)
{
  n = A->Getn();
  this->verbose = verbose;

  msgFile = fopen("SPOOLES.message","w");

  // prepare SPOOLES input matrix
  if (verbose >= 1)
    printf("Converting matrix to SPOOLES format...\n");

  InpMtx * mtxA = InpMtx_new();
  InpMtx_init(mtxA, INPMTX_BY_ROWS, SPOOLES_REAL, A->GetNumEntries(), n);

  for(int row=0; row<n; row++)
  {
    int rowLength = A->GetRowLength(row);

    for(int j=0; j< rowLength; j++)
    {
      if (A->GetColumnIndex(row,j) >= row)
        InpMtx_inputRealEntry(mtxA, row, A->GetColumnIndex(row, j), A->GetEntry(row, j) );
    }
  }

  InpMtx_changeStorageMode(mtxA, INPMTX_BY_VECTORS);
  //InpMtx_writeForHumanEye(mtxA, msgFile);

  // compute the factorization
  if (verbose >= 1)
    printf("Factoring the %d x %d matrix...\n",n,n);

  BridgeMT * bridgeMT = BridgeMT_new();
  BridgeMT_setMatrixParams(bridgeMT, n, SPOOLES_REAL, SPOOLES_SYMMETRIC);
  BridgeMT_setMessageInfo(bridgeMT, 1, msgFile);
  int rc = BridgeMT_setup(bridgeMT, mtxA);
  if (rc != 1)
  {
    printf("Error: BridgeMT setup returned exit code %d.\n", rc);
    throw 1;
  }

  int type = 1; // real entries
  int nfront, nfind, nfent;
  double nfactorops;
  rc = BridgeMT_factorStats(bridgeMT, type, SPOOLES_SYMMETRIC, &nfront,
                            &nfind, &nfent, &nsolveops, &nfactorops);
  if ( rc != 1 ) 
  {
    printf("Error: BridgeMT_factorStats returned exit code %d.\n", rc);
    throw 1;
  }

  fprintf(msgFile,
        "\n\n factor matrix statistics"
        "\n %d fronts, %d indices, %d entries"
        "\n %d solve operations, %12.4e factor operations",
        nfront, nfind, nfent, nsolveops, nfactorops) ;
  fflush(msgFile) ;


  // setup the parallel factorization

  rc = BridgeMT_factorSetup(bridgeMT, numThreads, 0, 0.0) ;
  fprintf(msgFile, "\n\n ----- PARALLEL FACTOR SETUP -----\n") ;
  fprintf(msgFile,
        "\n    CPU %8.3f : time to setup parallel factorization",
        bridgeMT->cpus[5]) ;
  fprintf(msgFile, "\n total factor operations = %.0f",
           DV_sum(bridgeMT->cumopsDV)) ;
  fprintf(msgFile,
           "\n upper bound on speedup due to load balance = %.2f",
           DV_sum(bridgeMT->cumopsDV)/DV_max(bridgeMT->cumopsDV)) ;
  fprintf(msgFile, "\n operations distributions over threads") ;
  DV_writeForHumanEye(bridgeMT->cumopsDV, msgFile) ;
  fflush(msgFile) ;

  // factor the matrix
  int permuteflag  = 1 ;
  int error;
  rc = BridgeMT_factor(bridgeMT, mtxA, permuteflag, &error);
  if ( rc == 1 ) 
  {
    fprintf(msgFile, "\n\n factorization completed successfully\n") ;
  } 
  else 
  {
    printf("Error: factorization returned exit code %d (error %d).\n", rc, error);
    throw 1;
  }

  fprintf(msgFile, "\n\n ----- FACTORIZATION -----\n") ;
  fprintf(msgFile,
        "\n    CPU %8.3f : time to permute original matrix"
        "\n    CPU %8.3f : time to initialize factor matrix"
        "\n    CPU %8.3f : time to compute factorization"
        "\n    CPU %8.3f : time to post-process factorization"
        "\n CPU %8.3f : total factorization time\n",
        bridgeMT->cpus[6],
        bridgeMT->cpus[7],
        bridgeMT->cpus[8],
        bridgeMT->cpus[9],
        bridgeMT->cpus[10]) ;
  fprintf(msgFile, "\n\n factorization statistics"
        "\n %d pivots, %d pivot tests, %d delayed vertices"
        "\n %d entries in D, %d entries in L, %d entries in U",
        bridgeMT->stats[0], bridgeMT->stats[1], bridgeMT->stats[2],
        bridgeMT->stats[3], bridgeMT->stats[4], bridgeMT->stats[5]) ;
  fprintf(msgFile,
        "\n\n factorization: raw mflops %8.3f, overall mflops %8.3f",
        1.e-6*nfactorops/bridgeMT->cpus[8],
        1.e-6*nfactorops/bridgeMT->cpus[10]) ;
  fflush(msgFile) ;

  // construct dense SPOOLES matrix for rhs and x
  DenseMtx *mtx_rhs = DenseMtx_new();
  DenseMtx_init(mtx_rhs, SPOOLES_REAL, 0, 0, n, 1, 1, n);
  mtx_rhsPointer = (void*) mtx_rhs;

  DenseMtx *mtx_x = DenseMtx_new();
  DenseMtx_init(mtx_x, SPOOLES_REAL, 0, 0, n, 1, 1, n);
  mtx_xPointer = (void*) mtx_x;

  bridgeMTPointer = (void*) bridgeMT;
  APointer = (void*) mtxA;

  if (verbose >= 1)
    printf("Factorization completed.\n");
}