Ejemplo n.º 1
0
void kass(int            levelk,
          int            rat,
          SymbolMatrix * symbptr,
          PASTIX_INT            baseval,
          PASTIX_INT            vertnbr,
          PASTIX_INT            edgenbr,
          PASTIX_INT          * verttab,
          PASTIX_INT          * edgetab,
          Order        * orderptr,
          MPI_Comm       pastix_comm)
{
  PASTIX_INT snodenbr;
  PASTIX_INT *snodetab   = NULL;
  PASTIX_INT *treetab    = NULL;
  PASTIX_INT *ia         = NULL;
  PASTIX_INT *ja         = NULL;
  PASTIX_INT i, j, n;
  PASTIX_INT ind;
  csptr mat;
  PASTIX_INT *tmpj       = NULL;
  PASTIX_INT *perm       = NULL;
  PASTIX_INT *iperm      = NULL;
  PASTIX_INT newcblknbr;
  PASTIX_INT *newrangtab = NULL;
  Dof dofstr;
  Clock timer1;
  double nnzS;
  int procnum;
  (void)edgenbr;

  MPI_Comm_rank(pastix_comm,&procnum);

#ifdef DEBUG_KASS
  print_one("--- kass begin ---\n");
#endif
/*   graphData (graphptr,  */
/*           (SCOTCH_Num * )&baseval,  */
/*           (SCOTCH_Num * )&vertnbr,  */
/*           (SCOTCH_Num **)&verttab,  */
/*           NULL, NULL, NULL,  */
/*           (SCOTCH_Num * )&edgenbr,  */
/*           (SCOTCH_Num **)&edgetab,  */
/*           NULL); */

  n = vertnbr;
  ia = verttab;
  ja = edgetab;
  perm = orderptr->permtab;
  iperm = orderptr->peritab;

  /*** Convert Fortran to C numbering ***/
  if(baseval == 1)
    {
      for(i=0;i<=n;i++)
          ia[i]--;
      for(i=0;i<n;i++)
        for(j=ia[i];j<ia[i+1];j++)
          ja[j]--;
      for(i=0;i<n;i++)
        orderptr->permtab[i]--;
      for(i=0;i<n;i++)
        orderptr->peritab[i]--;
    }

  MALLOC_INTERN(treetab, n, PASTIX_INT);
#ifndef SCOTCH_SNODE
  /*if(rat != -1 )*/
    {
      /***** FIND THE SUPERNODE PARTITION FROM SCRATCH ********/

      /*** Find the supernodes of the direct factorization  ***/
      MALLOC_INTERN(snodetab, n+1, PASTIX_INT);


      clockInit(&timer1);
      clockStart(&timer1);
      find_supernodes(n, ia, ja, perm, iperm, &snodenbr, snodetab, treetab);
      clockStop(&timer1);
      print_one("Time to find the supernode (direct) %.3g s \n", clockVal(&timer1));

      /*memfree(treetab);*/
      print_one("Number of supernode for direct factorization %ld \n", (long)snodenbr);
    }
#else
  /*else*/
    {
      /***** USE THE SUPERNODE PARTITION OF SCOTCH  ********/
      snodenbr = orderptr->cblknbr;
      MALLOC_INTERN(snodetab, n+1, PASTIX_INT);
      memCpy(snodetab, orderptr->rangtab, sizeof(PASTIX_INT)*(snodenbr+1));
      print_one("Number of column block found in scotch (direct) %ld \n", (long)snodenbr);

    }
#endif

  /****************************************/
  /*  Convert the graph                   */
  /****************************************/
    MALLOC_INTERN(mat, 1, struct SparRow);
  initCS(mat, n);
  MALLOC_INTERN(tmpj, n, PASTIX_INT);
  /**** Convert and permute the matrix in sparrow form  ****/
  /**** The diagonal is not present in the CSR matrix, we have to put it in the matrix ***/
  bzero(tmpj, sizeof(PASTIX_INT)*n);
  for(i=0;i<n;i++)
    {
      /*** THE GRAPH DOES NOT CONTAIN THE DIAGONAL WE ADD IT ***/
      tmpj[0] = i;
      ind = 1;
      for(j=ia[i];j<ia[i+1];j++)
        tmpj[ind++] = ja[j];

      mat->nnzrow[i] = ind;
      MALLOC_INTERN(mat->ja[i], ind, PASTIX_INT);
      memCpy(mat->ja[i], tmpj, sizeof(PASTIX_INT)*ind);
      mat->ma[i] = NULL;
    }
  CS_Perm(mat, perm);
  /*** Reorder the matrix ***/
  sort_row(mat);
  memFree(tmpj);


  /***** COMPUTE THE SYMBOL MATRIX OF ILU(K) WITH AMALGAMATION *****/
  kass_symbol(mat, levelk, (double)(rat)/100.0, perm,
              iperm, snodenbr, snodetab, treetab, &newcblknbr, &newrangtab,
              symbptr, pastix_comm);


  cleanCS(mat);
  memFree(mat);
  memFree(treetab);

  dofInit(&dofstr);
  dofConstant(&dofstr, 0, symbptr->nodenbr, 1);
  nnzS =  recursive_sum(0, symbptr->cblknbr-1, nnz, symbptr, &dofstr);
  print_one("Number of non zero in the non patched symbol matrix = %g, fillrate1 %.3g \n",
            nnzS+n, (nnzS+n)/(ia[n]/2.0 +n));
  dofExit(&dofstr);


  if(symbolCheck(symbptr) != 0)
    {
      errorPrint("SymbolCheck after kass_symbol.");
      ASSERT(0, MOD_KASS);
    }



  if(levelk != -1)
    {
      /********************************************************/
      /** ADD BLOCKS IN ORDER TO GET A REAL ELIMINATION TREE **/
      /********************************************************/
      Patch_SymbolMatrix(symbptr);
    }



  dofInit(&dofstr);
  dofConstant(&dofstr, 0, symbptr->nodenbr, 1);
  nnzS =  recursive_sum(0, symbptr->cblknbr-1, nnz, symbptr, &dofstr);

  dofExit(&dofstr);
  print_one("Number of block in final symbol matrix = %ld \n", (long)symbptr->bloknbr);
  print_one("Number of non zero in final symbol matrix = %g, fillrate2 %.3g \n",  nnzS+n, (nnzS+n)/(ia[n]/2.0 +n));
  if(symbolCheck(symbptr) != 0)
    {
      errorPrint("SymbolCheck after Patch_SymbolMatrix.");
      ASSERT(0, MOD_KASS);
    }
#ifdef DEBUG_KASS
  print_one("--- kass end ---\n");
#endif
  memFree(snodetab);
  orderptr->cblknbr = newcblknbr;
  memFree(orderptr->rangtab);
  orderptr->rangtab = newrangtab;

}
Ejemplo n.º 2
0
void kass_symbol(csptr mat, PASTIX_INT levelk, double rat, PASTIX_INT *perm, PASTIX_INT *iperm, PASTIX_INT snodenbr, PASTIX_INT *snodetab, PASTIX_INT *streetab, PASTIX_INT *cblknbr, PASTIX_INT **rangtab, SymbolMatrix *symbmtx, MPI_Comm pastix_comm)
{
  /**************************************************************************************/
  /* This function computes a symbolic factorization ILU(k) given a CSR matrix and an   */
  /* ordering. Then it computes a block partition of the factor to get BLAS3            */
  /* efficiency                                                                         */
  /* NOTE: the CSC matrix is given symmetrized and without the diagonal                 */
  /**************************************************************************************/

  PASTIX_INT i, j;
  PASTIX_INT nnzL;
  PASTIX_INT *iperm2  = NULL;
  PASTIX_INT *treetab = NULL;
  PASTIX_INT n;
  csptr P;
  Clock timer1;
  int procnum;

  MPI_Comm_rank(pastix_comm,&procnum);

  n = mat->n;
  MALLOC_INTERN(iperm2, n, PASTIX_INT);

  /*compact_graph(mat, NULL, NULL, NULL);*/

  /*** Compute the ILU(k) pattern of the quotient matrix ***/
  MALLOC_INTERN(P, 1, struct SparRow);
  initCS(P, n);
  print_one("Level of fill = %ld\nAmalgamation ratio = %d \n", (long)levelk, (int)(rat*100));
  clockInit(&timer1);
  clockStart(&timer1);

  if(levelk == -1)
    {

      /***** FACTORISATION DIRECT *******/
      /***** (Re)compute also the streetab (usefull when SCOTCH_SNODE
             is active) ***/
      SF_Direct(mat, snodenbr, snodetab, streetab, P);

      clockStop(&timer1);
      print_one("Time to compute scalar symbolic direct factorization  %.3g s \n", clockVal(&timer1));
#ifdef DEBUG_KASS
      print_one("non-zeros in P = %ld \n", (long)CSnnz(P));
#endif
      nnzL = 0;
      for(i=0;i<P->n;i++)
        {
          PASTIX_INT ncol;
          ncol = snodetab[i+1]-snodetab[i];
          nnzL += (ncol*(ncol+1))/2;
#ifdef DEBUG_KASS
          ASSERT(P->nnzrow[i] >= ncol, MOD_KASS);
          if(P->nnzrow[i] >= n)
            fprintf(stderr,"P->nnzrow[%ld] = %ld \n", (long)i, (long)P->nnzrow[i]);
          ASSERT(P->nnzrow[i] < n, MOD_KASS);
#endif
          nnzL += (P->nnzrow[i]-ncol)*ncol;
        }
#ifdef DEBUG_KASS
      print_one("NNZL = %ld \n", (long)nnzL);
#endif
    }
  else
    {
      /***** FACTORISATION INCOMPLETE *******/
      nnzL = SF_level(2, mat, levelk, P);

      clockStop(&timer1);
      print_one("Time to compute scalar symbolic factorization of ILU(%ld) %.3g s \n",
              (long)levelk, clockVal(&timer1));

    }
  print_one("Scalar nnza = %ld nnzlk = %ld, fillrate0 = %.3g \n",
            (long)( CSnnz(mat) + n)/2, (long)nnzL, (double)nnzL/(double)( (CSnnz(mat)+n)/2.0 ));



  /** Sort the rows of the symbolic matrix */
  sort_row(P);

  clockInit(&timer1);
  clockStart(&timer1);

  if(levelk != -1)
    {

      /********************************/
      /** Compute the "k-supernodes" **/
      /********************************/

#ifdef KS
      assert(levelk >= 0);
      KSupernodes(P, rat, snodenbr, snodetab, cblknbr, rangtab);
#else

#ifdef SCOTCH_SNODE
      if(rat == -1)
        assert(0); /** do not have treetab with this version of Scotch **/
#endif

      MALLOC_INTERN(treetab, P->n, PASTIX_INT);
      for(j=0;j<snodenbr;j++)
        {
          for(i=snodetab[j];i<snodetab[j+1]-1;i++)
            treetab[i] = i+1;

          /*** Version generale ****/
          if(streetab[j] == -1 || streetab[j] == j)
            treetab[i] = -1;
          else
            treetab[i]=snodetab[streetab[j]];
          /*** Version restricted inside the supernode (like KSupernodes) ***/
          /*treetab[snodetab[j+1]-1] = -1;*/  /** this should give the same results than
                                                  KSupernodes **/
        }

      /** NEW ILUK + DIRECT **/
      amalgamate(rat, P, -1, NULL, treetab, cblknbr, rangtab, iperm2, pastix_comm);

      memFree(treetab);
      for(i=0;i<n;i++)
        iperm2[i] = iperm[iperm2[i]];
      memcpy(iperm, iperm2, sizeof(PASTIX_INT)*n);
      for(i=0;i<n;i++)
        perm[iperm[i]] = i;
#endif
    }
  else{


    /*if(0)*/
      {
        amalgamate(rat, P, snodenbr, snodetab, streetab, cblknbr,
                   rangtab, iperm2, pastix_comm);

        /** iperm2 is the iperm vector of P **/
        for(i=0;i<n;i++)
          iperm2[i] = iperm[iperm2[i]];
        memcpy(iperm, iperm2, sizeof(PASTIX_INT)*n);
        for(i=0;i<n;i++)
          perm[iperm[i]] = i;
      }
      /*else
      {
        fprintf(stderr, "RAT = 0 SKIP amalgamation \n");
        *cblknbr = snodenbr;
        MALLOC_INTERN(*rangtab, snodenbr+1, PASTIX_INT);
        memcpy(*rangtab, snodetab, sizeof(PASTIX_INT)*(snodenbr+1));
        }*/
  }

  clockStop(&timer1);
  print_one("Time to compute the amalgamation of supernodes %.3g s\n", clockVal(&timer1));

  print_one("Number of cblk in the amalgamated symbol matrix = %ld \n", (long)*cblknbr);


  Build_SymbolMatrix(P, *cblknbr, *rangtab, symbmtx);


  print_one("Number of block in the non patched symbol matrix = %ld \n", (long)symbmtx->bloknbr);


  memFree(iperm2);
  cleanCS(P);
  memFree(P);

}
Ejemplo n.º 3
0
void Patch_SymbolMatrix(SymbolMatrix *symbmtx)
{
  PASTIX_INT i,j, k;
  PASTIX_INT vroot;
  PASTIX_INT        *father     = NULL; /** For the cblk of the symbol matrix **/
  SymbolBlok *newbloktab = NULL;
  SymbolCblk *cblktab    = NULL;
  SymbolBlok *bloktab    = NULL;
  csptr Q;


  cblktab = symbmtx->cblktab;
  bloktab = symbmtx->bloktab;

  MALLOC_INTERN(father, symbmtx->cblknbr, PASTIX_INT);
  MALLOC_INTERN(newbloktab, symbmtx->bloknbr + symbmtx->cblknbr, SymbolBlok);

  MALLOC_INTERN(Q, 1, struct SparRow);
  initCS(Q, symbmtx->cblknbr);

  /* Count how many extra-diagonal bloks are facing each diagonal blok
   */
  for(i=0;i<symbmtx->cblknbr;i++)
    for(j=cblktab[i].bloknum+1;j<cblktab[i+1].bloknum;j++)
      Q->nnzrow[bloktab[j].cblknum]++;

  /* Allocate nFacingBlok integer for each diagonal blok */
  for(i=0;i<symbmtx->cblknbr;i++)
    {
      MALLOC_INTERN(Q->ja[i], Q->nnzrow[i], PASTIX_INT);
      Q->ma[i] = NULL;
    }

  for(i=0;i<symbmtx->cblknbr;i++)
    Q->nnzrow[i] = 0;

  /* Q->ja[k] will contain, for each extra-diagonal facing blok
   * of the column blok k, its column blok.
   */
  for(i=0;i<symbmtx->cblknbr;i++)
    for(j=cblktab[i].bloknum+1;j<cblktab[i+1].bloknum;j++)
      {
        k = bloktab[j].cblknum;
        Q->ja[k][Q->nnzrow[k]++] = i;
      }

  for(i=0;i<Q->n;i++)
    father[i] = -1;

  for(i=0;i<Q->n;i++)
    {
      /* for each blok facing diagonal blok i,
       * belonging to column blok k.
       *
       */
      for(j=0;j<Q->nnzrow[i];j++)
        {
          k = Q->ja[i][j];
#ifdef DEBUG_KASS
          assert(k<i);
#endif
          vroot = k;
          while(father[vroot] != -1 && father[vroot] != i)
            vroot = father[vroot];
          father[vroot] = i;

        }
    }

  for(i=0;i<Q->n;i++)
    if(father[i] == -1)
      father[i]=i+1;

  cleanCS(Q);
  memFree(Q);



  k = 0;
  for(i=0;i<symbmtx->cblknbr-1;i++)
    {
      PASTIX_INT odb, fbloknum;

      fbloknum = cblktab[i].bloknum;
      memCpy(newbloktab+k, bloktab + fbloknum, sizeof(SymbolBlok));
      cblktab[i].bloknum = k;
      k++;
      odb = cblktab[i+1].bloknum-fbloknum;
      if(odb <= 1 || bloktab[fbloknum+1].cblknum != father[i])
        {
          /** Add a blok toward the father **/
          newbloktab[k].frownum = cblktab[ father[i] ].fcolnum;
          newbloktab[k].lrownum = cblktab[ father[i] ].fcolnum; /** OIMBE try lcolnum **/
          newbloktab[k].cblknum = father[i];
#ifdef DEBUG_KASS
          if(father[i] != i)
            assert(cblktab[father[i]].fcolnum > cblktab[i].lcolnum);
#endif

          newbloktab[k].levfval = 0;
          k++;
        }


      if( odb > 1)
        {
          memCpy(newbloktab +k, bloktab + fbloknum+1, sizeof(SymbolBlok)*(odb-1));
          k+=odb-1;
        }

    }
  /** Copy the last one **/
  memCpy(newbloktab+k, bloktab + symbmtx->cblktab[symbmtx->cblknbr-1].bloknum, sizeof(SymbolBlok));
  cblktab[symbmtx->cblknbr-1].bloknum = k;
  k++;
  /** Virtual cblk **/
  symbmtx->cblktab[symbmtx->cblknbr].bloknum = k;

#ifdef DEBUG_KASS
  assert(k >= symbmtx->bloknbr);
  assert(k < symbmtx->cblknbr+symbmtx->bloknbr);
#endif
  symbmtx->bloknbr = k;
  memFree(symbmtx->bloktab);
  MALLOC_INTERN(symbmtx->bloktab, k, SymbolBlok);
  memCpy( symbmtx->bloktab, newbloktab, sizeof(SymbolBlok)*symbmtx->bloknbr);
  /*  virtual cblk to avoid side effect in the loops on cblk bloks */
  cblktab[symbmtx->cblknbr].bloknum = k;

  memFree(father);
  memFree(newbloktab);
}
Ejemplo n.º 4
0
/*-------------------- end protos */
int arms2(csptr Amat, int *ipar, double *droptol, int *lfil, 
	  double tolind, arms PreMat, FILE *ft) 
{
/*---------------------------------------------------------------------
| MULTI-LEVEL BLOCK ILUT PRECONDITIONER.
| ealier  version:  June 23, 1999  BJS -- 
| version2: Dec. 07th, 2000, YS  [reorganized ]
| version 3 (latest) Aug. 2005.  [reorganized + includes ddpq]
+---------------------------------------------------------------------- 
| ON ENTRY:
| ========= 
| ( Amat ) = original matrix A stored in C-style Compressed Sparse
|            Row (CSR) format -- 
|            see LIB/heads.h for the formal definition of this format.
|
| ipar[0:17]  = integer array to store parameters for 
|       arms construction (arms2) 
|
|       ipar[0]:=nlev.  number of levels (reduction processes). 
|                       see also "on return" below. 
| 
|       ipar[1]:= level-reordering option to be used.  
|                 if ipar[1]==0 ARMS uses a block independent set ordering
|                  with a sort of reverse cutill Mc Kee ordering to build 
|                  the blocks. This yields a symmetric ordering. 
|                  in this case the reordering routine called is indsetC
|                 if ipar[1] == 1, then a nonsymmetric ordering is used.
|                  In this case, the B matrix is constructed to be as
|                  diagonally dominant as possible and as sparse as possble.
|                  in this case the reordering routine called is ddPQ.
|                 
|       ipar[2]:=bsize. for indset  Dimension of the blocks. 
|                  bsize is only a target block size. The size of 
|                  each block can vary and is >= bsize. 
|                  for ddPQ: this is only the smallest size of the 
|                  last level. arms will stop when either the number 
|                  of levels reaches nlev (ipar[0]) or the size of the
|                  next level (C block) is less than bsize.
|
|       ipar[3]:=iout   if (iout > 0) statistics on the run are 
|                       printed to FILE *ft
|
|       ipar[4-9] NOT used [reserved for later use] - set to zero.
| 
| The following set method options for arms2. Their default values can
| all be set to zero if desired. 
|
|       ipar[10-13] == meth[0:3] = method flags for interlevel blocks
|       ipar[14-17] == meth[0:3] = method flags for last level block - 
|       with the following meaning in both cases:
|            meth[0] nonsummetric permutations of  1: yes. affects rperm
|                    USED FOR LAST SCHUR COMPLEMENT 
|            meth[1] permutations of columns 0:no 1: yes. So far this is
|                    USED ONLY FOR LAST BLOCK [ILUTP instead of ILUT]. 
|                    (so ipar[11] does no matter - enter zero). If 
|                    ipar[15] is one then ILUTP will be used instead 
|                    of ILUT. Permutation data stored in: perm2. 
|            meth[2] diag. row scaling. 0:no 1:yes. Data: D1
|            meth[3] diag. column scaling. 0:no 1:yes. Data: D2
|       all transformations related to parametres in meth[*] (permutation, 
|       scaling,..) are applied to the matrix before processing it 
| 
| ft       =  file for printing statistics on run
|
| droptol  = Threshold parameters for dropping elements in ILU 
|            factorization.
|            droptol[0:4] = related to the multilevel  block factorization
|            droptol[5:6] = related to ILU factorization of last block.
|            This flexibility is more than is really needed. one can use
|            a single parameter for all. it is preferable to use one value
|            for droptol[0:4] and another (smaller) for droptol[5:6]
|            droptol[0] = threshold for dropping  in L [B]. See piluNEW.c:
|            droptol[1] = threshold for dropping  in U [B].
|            droptol[2] = threshold for dropping  in L^{-1} F 
|            droptol[3] = threshold for dropping  in E U^{-1} 
|            droptol[4] = threshold for dropping  in Schur complement
|            droptol[5] = threshold for dropping  in L in last block
|              [see ilutpC.c]
|            droptol[6] = threshold for dropping  in U in last block
|              [see ilutpC.c]
|             This provides a rich selection - though in practice only 4
|             parameters are needed [which can be set to be the same 
              actually] -- indeed it makes sense to take
|             droptol[0] = droptol[1],  droptol[2] = droptol[3], 
|             and droptol[4] = droptol[5]
|
| lfil     = lfil[0:6] is an array containing the fill-in parameters.
|            similar explanations as above, namely:
|            lfil[0] = amount of fill-in kept  in L [B]. 
|            lfil[1] = amount of fill-in kept  in U [B].
|            lfil[2] = amount of fill-in kept  in E L\inv 
|            lfil[3] = amount of fill-in kept  in U \inv F
|            lfil[4] = amount of fill-in kept  in S    .
|            lfil[5] = amount of fill-in kept  in L_S  .
|            lfil[6] = amount of fill-in kept  in U_S 
|             
| tolind   = tolerance parameter used by the indset function. 
|            a row is not accepted into the independent set if 
|            the *relative* diagonal tolerance is below tolind.
|            see indset function for details. Good values are 
|            between 0.05 and 0.5 -- larger values tend to be better
|            for harder problems.
| 
| ON RETURN:
|=============
|
| (PreMat)  = arms data structure which consists of two parts:
|             levmat and ilsch. 
|
| ++(levmat)= permuted and sorted matrices for each level of the block 
|             factorization stored in PerMat4 struct. Specifically
|             each level in levmat contains the 4 matrices in:
|
|
|            |\         |       |
|            |  \   U   |       |
|            |    \     |   F   |
|            |  L   \   |       |
|            |        \ |       |
|            |----------+-------|
|            |          |       |
|            |    E     |       |
|            |          |       |
|            
|            plus a few other things. See LIB/heads.h for details.
|
| ++(ilsch) = IluSpar struct. If the block of the last level is:
|
|                        |  B    F |
|                  A_l = |         | 
|                        |  E    C |
|
|             then IluSpar contains the block C and an ILU
|             factorization (matrices L and U) for the last 
|             Schur complement [S ~ C - E inv(B) F ]
|             (modulo dropping) see LIB/heads.h for details.
|
| ipar[0]   = number of levels found (may differ from input value) 
|
+---------------------------------------------------------------------*/
/*-------------------- function  prototyping  done in LIB/protos.h    */
/*-------------------- move above to protos.h */
  p4ptr levp, levc, levn, levmat = PreMat->levmat;
  csptr schur, B, F, E, C=NULL; 
  ilutptr ilsch = PreMat->ilus; 
/*-------------------- local variables  (initialized)   */
   double *dd1, *dd2;
   int nlev = ipar[0], bsize = ipar[2], iout = ipar[3], ierr = 0;
   int methL[4], methS[4];
/*--------------------  local variables  (not initialized)   */
   int nA, nB, nC, j, n, ilev, symperm;
/*--------------------    work arrays:    */
   int *iwork, *uwork; 
/*   timer arrays:  */ 
/*   double *symtime, *unstime, *factime, *tottime;*/
/*---------------------------BEGIN ARMS-------------------------------*/
/*   schur matrix starts being original A */ 

/*-------------------- begin                                         */
   schur  = (csptr) Malloc(sizeof(SparMat), "arms2:1" );
/*---------------------------------------------------------------------
| The matrix (a,ja,ia) plays role of Schur compl. from the 0th level.
+--------------------------------------------------------------------*/
   nC = nA = n = Amat->n;
   if (bsize >= n) bsize = n-1;
   levmat->n = n; levmat->nB = 0; 
   setupCS(schur, n,1);
   cscpy(Amat,schur); 
   levc = levmat;
/*--------------------------------------- */ 
   levc->prev = levc->next = levp = NULL; 
   levc->n = 0; 
   memcpy(methL, &ipar[10], 4*sizeof(int));
   memcpy(methS, &ipar[14], 4*sizeof(int));
/*---------------------------------------------------------------------
| The preconditioner construction is divided into two parts:
|   1st part: construct and store multi-level L and U factors;
|   2nd part: construct the ILUT factorization for the coarsest level
+--------------------------------------------------------------------*/
   if ( (iout > 0)  && (nlev > 0) ) {
     fprintf(ft,"  \n");
     fprintf(ft,"Level   Total Unknowns    B-block   Coarse set\n");
     fprintf(ft,"=====   ==============    =======   ==========\n");
   }
/*---------------------------------------------------------------------
| main loop to construct multi-level LU preconditioner. Loop is on the
| level ilev. nA is the dimension of matrix A_l at each level.
+--------------------------------------------------------------------*/
   for (ilev = 0; ilev < nlev; ilev++) {
/*-------------------- new nA is old nC -- from previous level */
     nA = nC;
     if ( nA <= bsize )  goto label1000;  
/*-------------------- allocate work space                        */ 
     iwork = (int *) Malloc(nA*sizeof(int), "arms2:2.5" );
     symperm = 0;    /* 0nly needed in cleanP4 */
     if (ipar[1] == 1) 
       uwork = (int *) Malloc(nA*sizeof(int), "arms2:2.5" );
     else{
       symperm = 1;    
       uwork = iwork; 
     }
/*-------------------- SCALING*/
     dd1 = NULL;
     dd2 = NULL;
     if (methL[2]) {
       dd1 = (double *) Malloc(nA*sizeof(double), "arms2:3" );
       j=roscalC(schur, dd1,1);
      if (j) printf("ERROR in roscalC -  row %d  is a zero row\n",j);
     }

     if (methL[3]) {
       dd2 = (double *) Malloc(nA*sizeof(double), "arms2:4" );
       j=coscalC(schur, dd2,1); 
       if (j) printf("ERROR in coscalC - column %d is a zero column\n",j);
     }
/*--------------------independent-sets-permutation-------------------
|  do reordering -- The matrix and its transpose are used.
+--------------------------------------------------------------------*/
/* if (SHIFTTOL > 0.0) shiftsD(schur,SHIFTTOL);    */
//     printf("  ipar1 = %d \n", ipar[1]);
     if (ipar[1] == 1) 
       PQperm(schur, bsize, uwork, iwork, &nB, tolind) ; 
     else
       indsetC (schur, bsize, iwork, &nB, tolind) ; 
/*---------------------------------------------------------------------
| nB is the total number of nodes in the independent set.
| nC : nA - nB = the size of the reduced system.
+--------------------------------------------------------------------*/
     nC = nA - nB;
/*   if the size of B or C is zero , exit the main loop  */
/*   printf ("  nB %d nC %d \n",nB, nC); */
     if ( nB == 0 || nC == 0 )  goto label1000; 
/*---------------------------------------------------------------------
| The matrix for the current level is in (schur).
| The permutations arrays are in iwork and uwork (row).
| The routines rpermC, cpermC permute the matrix in place.
*-----------------------------------------------------------------------*/
/*   DEBUG : SHOULD THIS GO BEFORE GOTO LABEL1000 ?? */
     rpermC(schur,uwork); 
     cpermC(schur,iwork);
/*   prtC(schur, ilev) ;   print matrix - debugging */
/*-----------------------------------------------------------------------
| If this is the first level, the permuted matrix is stored in 
| (levc) = (levmat).  Otherwise, the next level is created in (levc).
+--------------------------------------------------------------------*/
     if (ilev > 0) {
/*-   delete C matrix of any level except last one (no longer needed) */
       cleanCS(C); 
/*-------------------- create the next level */
       levn = (p4ptr) Malloc(sizeof(Per4Mat), "arms2:6" );
       /* levc->prev = levp; */
       levc->next = levn;
       levp = levc;
       levc = levn;
       levc->prev = levp; 
     }
/*-------------------- p4ptr struct for current schur complement */
      B = (csptr) Malloc(sizeof(SparMat), "arms2:7" );
      E = (csptr) Malloc(sizeof(SparMat), "arms2:8" );
      F = (csptr) Malloc(sizeof(SparMat), "arms2:9" );
      C = (csptr) Malloc(sizeof(SparMat), "arms2:10" );
      csSplit4(schur, nB, nC, B, F, E, C);
      setupP4(levc, nB, nC, F, E);
/*--------------------     copy a few pointers       ---- */      
      levc->perm  = iwork;
      levc->rperm = uwork; 
      levc->symperm = symperm;
      levc->D1=dd1;
      levc->D2=dd2; 
/*---------------------------------------------------------------------
| a copy of the matrix (schur) has been permuted. Now perform the 
| block factorization: 
|
| | B   F |       | L       0 |     | U  L^-1 F |
| |       |   =   |           |  X  |           | = L x U
| | E   C |       | E U^-1  I |     | 0    A1   |
|   
| The factors E U^-1 and L^-1 F are discarded after the factorization.
|
+--------------------------------------------------------------------*/ 
      if (iout > 0)
	fprintf(ft,"%3d %13d %13d %10d\n", ilev+1,nA,nB,nC);
/*---------------------------------------------------------------------
| PILUT constructs one level of the block ILU fact.  The permuted matrix
| is in (levc).  The L and U factors will be stored in the p4mat struct.
| destroy current Schur  complement - no longer needed  - and set-up new
| one for next level...
+--------------------------------------------------------------------*/
      cleanCS(schur);
      schur = (csptr) Malloc(sizeof(SparMat), "arms2:11" ); 
      setupCS(schur, nC,1);
/*----------------------------------------------------------------------
| calling PILU to construct this level block factorization
| ! core dump in extreme case of empty matrices.
+----------------------------------------------------------------------*/
      ierr = pilu(levc, B, C, droptol, lfil, schur) ;
      /* prtC(levc->L, ilev) ; */
      if (ierr) { 
	fprintf(ft," ERROR IN  PILU  -- IERR = %d\n", ierr);
	return(1);
      }
      cleanCS(B); 
   }
/*---------------------------------------------------------------------
|   done with the reduction. Record the number of levels in ipar[0] 
|**********************************************************************
+--------------------------------------------------------------------*/
label1000:
   /* printf (" nnz_Schur %d \n",cs_nnz (schur)); */
   levc->next = NULL;
   ipar[0] = ilev;
   PreMat->nlev = ilev;  
   PreMat->n = n; 
   nC = schur->n;
   setupILUT(ilsch,nC); 
/*--------------------------------------------------------------------*/
 /* define C-matrix (member of ilsch) to be last C matrix */ 
   if (ilev > 0) ilsch->C=C; 
/*-------------------- for ilut fact of schur matrix */
/*  SCALING  */

   ilsch->D1 = NULL;
   if (methS[2]) {
     ilsch->D1 = (double *) Malloc(nC*sizeof(double), "arms2:iluschD1" );
     j=roscalC(schur, ilsch->D1, 1); 
     if (j) printf("ERROR in roscalC - row %d is a zero row\n",j);
   }

   ilsch->D2  = NULL;
   if (methS[3]) {
     ilsch->D2 = (double *) Malloc(nC*sizeof(double), "arms2:iluschD1" );
     j =coscalC(schur, ilsch->D2, 1);  
     if (j) printf("ERROR in coscalC - column %d is a zero column\n",j);
   }
/*---------------------------------------------------------------------
|     get ILUT factorization for the last reduced system.
+--------------------------------------------------------------------*/
   uwork = NULL;
   iwork = NULL;
   if (methS[0]) { 
     iwork = (int *) Malloc(nC*sizeof(int), "arms2:3" );
     uwork = (int *) Malloc(nC*sizeof(int), "arms2:3.5" );
     tolind = 0.0; 
     PQperm(schur, bsize, uwork, iwork, &nB, tolind) ; 
     rpermC(schur,uwork); 
     cpermC(schur,iwork);
   }
   ilsch->rperm = uwork; 
   ilsch->perm  = iwork;

   /*   printf("  lf : %d  %d  %d  %d  %d  %d  %d  \n",lfil[0],  
	lfil[1], lfil[2], lfil[3], lfil[4], lfil[5], lfil[6]) ; */
   
   ilsch->perm2 = NULL; 

   if (methS[1] == 0)
     ierr = ilutD(schur, droptol, lfil, ilsch);
   else {
     ilsch->perm2 = (int *) Malloc(nC*sizeof(int), "arms2:ilutpC" );
     for (j=0; j<nC; j++)
       ilsch->perm2[j] = j;
     ierr = ilutpC(schur, droptol, lfil, PERMTOL, nC, ilsch);
   }
/*---------- OPTIMIZATION: NEED TO COMPOUND THE TWO
             RIGHT PERMUTATIONS -- CHANGES HERE AND IN 
             USCHUR SOLVE ==  compound permutations */     
   if (ierr) {
     fprintf(ft," ERROR IN  ILUT -- IERR = %d\n", ierr); 
     return(1); 
   }
/*-------------------- Last Schur complement no longer needed */
   cleanCS(schur);
   return 0; 
}