Пример #1
0
//==============================================================================
Epetra_MsrMatrix::Epetra_MsrMatrix(int * proc_config, AZ_MATRIX * a_mat)
  : Epetra_Object("Epetra::MsrMatrix"),
    Amat_(a_mat),
    proc_config_(proc_config),
    Values_(0),
    Indices_(0),
    MaxNumEntries_(-1),
    ImportVector_(0),
    NormInf_(-1.0),
    NormOne_(-1.0)
{
#ifdef AZTEC_MPI
  MPI_Comm * mpicomm = (MPI_Comm * ) AZ_get_comm(proc_config);
  Comm_ = new Epetra_MpiComm(*mpicomm);
#else
  Comm_ = new Epetra_SerialComm();
#endif  
  if (a_mat->data_org[AZ_matrix_type]!=AZ_MSR_MATRIX)
    throw Comm_->ReportError("AZ_matrix_type must be AZ_MSR_MATRIX", -1);
  int * bindx = a_mat->bindx;
  NumMyRows_ = a_mat->data_org[AZ_N_internal] + a_mat->data_org[AZ_N_border];
  int NumExternal = a_mat->data_org[AZ_N_external];
  NumMyCols_ = NumMyRows_ + NumExternal;
  NumMyNonzeros_ = bindx[NumMyRows_] - bindx[0] + NumMyRows_;
  //Comm_->SumAll(&NumMyNonzeros_, &NumGlobalNonzeros_, 1);
  long long NumMyNonzerosLL_ = (long long) NumMyNonzeros_;
  Comm_->SumAll(&NumMyNonzerosLL_, &NumGlobalNonzeros_, 1);

  int * MyGlobalElements = a_mat->update;
  if (MyGlobalElements==0) 
    throw Comm_->ReportError("Aztec matrix has no update list: Check if AZ_Transform was called.", -2);

  DomainMap_ = new Epetra_Map(-1, NumMyRows_, MyGlobalElements, 0, *Comm_);

  double * dbleColGIDs = new double[NumMyCols_];
  int * ColGIDs = new int[NumMyCols_];
  for (int i=0; i<NumMyRows_; i++) dbleColGIDs[i] = (double) MyGlobalElements[i];
  AZ_exchange_bdry(dbleColGIDs, a_mat->data_org, proc_config);
  {for (int i=0; i<NumMyCols_; i++) ColGIDs[i] = (int) dbleColGIDs[i];}

  ColMap_ = new Epetra_Map(-1, NumMyCols_, ColGIDs, 0, *Comm_);

  Importer_ = new Epetra_Import(*ColMap_, *DomainMap_);
  
  delete [] dbleColGIDs;
  delete [] ColGIDs;
}
Пример #2
0
void AZ_sym_gauss_seidel_sl(double val[],int bindx[],double x[],int data_org[],
			    int options[], struct context *context,
			    int proc_config[])

/*******************************************************************************

  Symmetric Gauss-Siedel preconditioner.

  Author:          John N. Shadid, SNL, 1421
  =======

  Return code:     void
  ============

  Parameter list:
  ===============

  val:             Array containing the nonzero entries of the matrix (see
                   Aztec User's Guide).

  indx,
  bindx,
  rpntr,
  cpntr,
  bpntr:           Arrays used for DMSR and DVBR sparse matrix storage (see
                   file Aztec User's Guide).

  x:               On input, contains the current solution to the linear system.
                   On output contains the Jacobi preconditioned solution.

  data_org:        Array containing information on the distribution of the
                   matrix to this processor as well as communication parameters
                   (see Aztec User's Guide).

  options:         Determines specific solution method and other parameters.

*******************************************************************************/

{

  /* local variables */

  register int    *bindx_ptr;
  register double sum, *ptr_val;
  int             i, bindx_row, j_last, N, step, ione = 1, j;
  double          *b, *ptr_b;
  char            tag[80];

  /**************************** execution begins ******************************/

  N = data_org[AZ_N_internal] + data_org[AZ_N_border];

  sprintf(tag,"b/sGS %s",context->tag);
  b = AZ_manage_memory(N*sizeof(double), AZ_ALLOC, AZ_SYS+az_iterate_id, tag, &i);

  DCOPY_F77(&N, x, &ione, b, &ione);
  ptr_val = val;

  for (i = 0; i < N; i++) {
    (*ptr_val) = 1.0 / (*ptr_val);
    x[i]     = 0.0;
    ptr_val++;
  }

  for (step = 0; step < options[AZ_poly_ord]; step++) {
    AZ_exchange_bdry(x, data_org, proc_config);

    bindx_row = bindx[0];
    bindx_ptr = &bindx[bindx_row];
    ptr_val   = &val[bindx_row];
    ptr_b   = b;

    for (i = 0; i < N; i++) {
      sum    = *ptr_b++;
      j_last = bindx[i+1] - bindx[i];

      for (j = 0; j < j_last; j++) {
        sum -= *ptr_val++ * x[*bindx_ptr++];
      }
      x[i] = sum * val[i];
    }

    bindx_row = bindx[N];
    bindx_ptr = &bindx[bindx_row-1];
    ptr_val   = &val[bindx_row-1];

    for (i = N - 1; i >= 0; i--) {
      sum = b[i];
      j_last  = bindx[i+1] - bindx[i];

      for (j = 0; j < j_last; j++) {
        sum -= *ptr_val-- * x[*bindx_ptr--];
      }
      x[i] = sum * val[i];
    }
  }

  for (i = 0; i < N; i++)
    val[i] = 1.0 / val[i];

} /* AZ_sym_gauss_seidel_sl */
Пример #3
0
//==============================================================================
Epetra_MsrMatrix::Epetra_MsrMatrix(int * proc_config, AZ_MATRIX * a_mat)
  : Epetra_Object("Epetra::MsrMatrix"),
    Amat_(a_mat),
    proc_config_(proc_config),
    Values_(0),
    Indices_(0),
    MaxNumEntries_(-1),
    ImportVector_(0),
    NormInf_(-1.0),
    NormOne_(-1.0)
{
#ifdef EPETRA_NO_32BIT_GLOBAL_INDICES
  // We throw rather than let the compiler error out so that the
  // rest of the library is available and all possible tests can run.
  const char* error = "Epetra_MsrMatrix::Epetra_MsrMatrix: Not available for 64-bit Maps.";
  std::cerr << error << std::endl;
  throw Comm_->ReportError(error, -2);
#endif

#ifdef AZTEC_MPI
  MPI_Comm * mpicomm = (MPI_Comm * ) AZ_get_comm(proc_config);
  Comm_ = new Epetra_MpiComm(*mpicomm);
#else
  Comm_ = new Epetra_SerialComm();
#endif  
  if (a_mat->data_org[AZ_matrix_type]!=AZ_MSR_MATRIX)
    throw Comm_->ReportError("AZ_matrix_type must be AZ_MSR_MATRIX", -1);
  int * bindx = a_mat->bindx;
  NumMyRows_ = a_mat->data_org[AZ_N_internal] + a_mat->data_org[AZ_N_border];
  int NumExternal = a_mat->data_org[AZ_N_external];
  NumMyCols_ = NumMyRows_ + NumExternal;
  NumMyNonzeros_ = bindx[NumMyRows_] - bindx[0] + NumMyRows_;

#ifndef EPETRA_NO_64BIT_GLOBAL_INDICES
  long long tmp_NumMyNonzeros = NumMyNonzeros_;
  Comm_->SumAll(&tmp_NumMyNonzeros, &NumGlobalNonzeros_, 1);
#else
    int tmp_NumGlobalNonzeros = 0;
    Comm_->SumAll(&NumMyNonzeros_, &tmp_NumGlobalNonzeros, 1);
	NumGlobalNonzeros_ = tmp_NumGlobalNonzeros;
#endif

  int * MyGlobalElements = a_mat->update;
  if (MyGlobalElements==0) 
    throw Comm_->ReportError("Aztec matrix has no update list: Check if AZ_Transform was called.", -2);

#ifdef EPETRA_NO_32BIT_GLOBAL_INDICES
  DomainMap_ = 0;
#else
  DomainMap_ = new Epetra_Map(-1, NumMyRows_, MyGlobalElements, 0, *Comm_);
#endif

  double * dbleColGIDs = new double[NumMyCols_];
  int * ColGIDs = new int[NumMyCols_];
  for (int i=0; i<NumMyRows_; i++) dbleColGIDs[i] = (double) MyGlobalElements[i];
  AZ_exchange_bdry(dbleColGIDs, a_mat->data_org, proc_config);
  {for (int i=0; i<NumMyCols_; i++) ColGIDs[i] = (int) dbleColGIDs[i];}

#ifdef EPETRA_NO_32BIT_GLOBAL_INDICES
  ColMap_ = 0;
#else
  ColMap_ = new Epetra_Map(-1, NumMyCols_, ColGIDs, 0, *Comm_);
#endif

  Importer_ = new Epetra_Import(*ColMap_, *DomainMap_);
  
  delete [] dbleColGIDs;
  delete [] ColGIDs;
}
Пример #4
0
void dvbr_sparax_basic(int m, double *val, int *bindx, int *rpntr,
                       int *cpntr, int *bpntr, double *b, double *c,
                       int exchange_flag, int *data_org, int *proc_config)

/******************************************************************************

  c = Ab:
  Sparse (square) matrix-vector multiply, using the variable block row (VBR)
  data structure (A = val).

  Author:          Scott A. Hutchinson, SNL, 1421
  =======

  Return code:     void
  ============

  Parameter list:
  ===============

  m:               Number of (block) rows in A.

  val:             Array containing the entries of the matrix. The matrix is
                   stored block-row-by-block-row. Each block entry is dense and
                   stored by columns (VBR).

  bindx,
  rpntr,
  cpntr,
  bpntr:           Arrays used for DMSR and DVBR sparse matrix storage (see
                   Aztec User's Guide).

  b:               Right hand side of linear system.

  c:               On output contains the solution to the linear system.

  exchange_flag:   Flag which controls call to AZ_exchange_bdry() (ignored in
                   serial implementation).

  data_org:        Array containing information on the distribution of the
                   matrix to this processor as well as communication parameters
                   (see Aztec User's Guide).

******************************************************************************/

{

  /* local variables */

  register double *x;
  register double *c_pntr;
  register int     iblk_row, j, jblk, iblk_size;
  int              m1, ib1, n1;
  int              bpoff, rpoff;
  int              ione = 1;
  int              irpntr, irpntr_next;
  int              ibpntr, ibpntr_next = 0;
  double           one = 1.0;
  double          *val_pntr;
  char            *N = "N";

  /**************************** execution begins *****************************/

  /* exchange boundary info */

  if (exchange_flag) AZ_exchange_bdry(b, data_org, proc_config);

  /* offset of the first block */

  bpoff = *bpntr;
  rpoff = *rpntr;

  /* zero the result vector */

  for (j = 0; j < rpntr[m] - rpoff; c[j++] = 0.0);

  val_pntr    = val;
  irpntr_next = *rpntr++;
  bpntr++;
  c          -= rpoff;

  /* loop over block rows */

  for (iblk_row = 0; iblk_row < m; iblk_row++) {

    irpntr      = irpntr_next;
    irpntr_next = *rpntr++;

    ibpntr      = ibpntr_next;
    ibpntr_next = *bpntr++ - bpoff;

    /* set result pointer */

    c_pntr      = c + irpntr;

    /* number of rows in the current row block */

    m1          = irpntr_next - irpntr;

    /* loop over each block in the current row block */

    for (j = ibpntr; j < ibpntr_next; j++) {
      jblk = *(bindx+j);

      /* the starting point column index of the current block */

      ib1 = *(cpntr+jblk);

      /* number of columns in the current block */

      n1 = cpntr[jblk + 1] - ib1;
      iblk_size = m1*n1;

      /****************** Dense matrix-vector multiplication *****************/

      /*
       * Get base addresses
       */

      x = b + ib1;


      /*
       * Special case the m1 = n1 = 1 case
       */

      if (iblk_size == 1)
        *c_pntr += *val_pntr * *x;

      else if (m1 == n1) {

        /*
         * Inline small amounts of work
         */

        switch (m1) {

        case 2:
          c_pntr[0] += val_pntr[0]*x[0] + val_pntr[2]*x[1];
          c_pntr[1] += val_pntr[1]*x[0] + val_pntr[3]*x[1];
          break;

        case 3:
          c_pntr[0] += val_pntr[0]*x[0] + val_pntr[3]*x[1] + val_pntr[6]*x[2];
          c_pntr[1] += val_pntr[1]*x[0] + val_pntr[4]*x[1] + val_pntr[7]*x[2];
          c_pntr[2] += val_pntr[2]*x[0] + val_pntr[5]*x[1] + val_pntr[8]*x[2];
          break;

        case 4:
          c_pntr[0] += val_pntr[0]*x[0] + val_pntr[4]*x[1] + val_pntr[8] *x[2]
            + val_pntr[12]*x[3];
          c_pntr[1] += val_pntr[1]*x[0] + val_pntr[5]*x[1] + val_pntr[9] *x[2]
            + val_pntr[13]*x[3];
          c_pntr[2] += val_pntr[2]*x[0] + val_pntr[6]*x[1] + val_pntr[10]*x[2]
            + val_pntr[14]*x[3];

          c_pntr[3] += val_pntr[3]*x[0] + val_pntr[7]*x[1] + val_pntr[11]*x[2]
            + val_pntr[15]*x[3];
          break;

        case 5:
          c_pntr[0] += val_pntr[0]*x[0] + val_pntr[5]*x[1] + val_pntr[10]*x[2]
            + val_pntr[15]*x[3] + val_pntr[20]*x[4];
          c_pntr[1] += val_pntr[1]*x[0] + val_pntr[6]*x[1] + val_pntr[11]*x[2]
            + val_pntr[16]*x[3] + val_pntr[21]*x[4];
          c_pntr[2] += val_pntr[2]*x[0] + val_pntr[7]*x[1] + val_pntr[12]*x[2]
            + val_pntr[17]*x[3] + val_pntr[22]*x[4];
          c_pntr[3] += val_pntr[3]*x[0] + val_pntr[8]*x[1] + val_pntr[13]*x[2]
            + val_pntr[18]*x[3] + val_pntr[23]*x[4];
          c_pntr[4] += val_pntr[4]*x[0] + val_pntr[9]*x[1] + val_pntr[14]*x[2]
            + val_pntr[19]*x[3] + val_pntr[24]*x[4];
          break;

        case 6:
          c_pntr[0] += val_pntr[0]*x[0] + val_pntr[6] *x[1] + val_pntr[12]*x[2]
            + val_pntr[18]*x[3] + val_pntr[24]*x[4] + val_pntr[30]*x[5];
          c_pntr[1] += val_pntr[1]*x[0] + val_pntr[7] *x[1] + val_pntr[13]*x[2]
            + val_pntr[19]*x[3] + val_pntr[25]*x[4] + val_pntr[31]*x[5];
          c_pntr[2] += val_pntr[2]*x[0] + val_pntr[8] *x[1] + val_pntr[14]*x[2]
            + val_pntr[20]*x[3] + val_pntr[26]*x[4] + val_pntr[32]*x[5];
          c_pntr[3] += val_pntr[3]*x[0] + val_pntr[9] *x[1] + val_pntr[15]*x[2]
            + val_pntr[21]*x[3] + val_pntr[27]*x[4] + val_pntr[33]*x[5];
          c_pntr[4] += val_pntr[4]*x[0] + val_pntr[10]*x[1] + val_pntr[16]*x[2]
            + val_pntr[22]*x[3] + val_pntr[28]*x[4] + val_pntr[34]*x[5];
          c_pntr[5] += val_pntr[5]*x[0] + val_pntr[11]*x[1] + val_pntr[17]*x[2]
            + val_pntr[23]*x[3] + val_pntr[29]*x[4] + val_pntr[35]*x[5];
          break;

        default:

          /*
           * For most computers, a really well-optimized assembly-coded level 2
           * blas for small blocks sizes doesn't exist.  It's better to
           * optimize your own version, and take out all the overhead from the
           * regular dgemv call.  For large block sizes, it's also a win to
           * check for a column of zeroes; this is what dgemv_ does.  The
           * routine dgemvnsqr_() is a fortran routine that contains optimized
           * code for the hp, created from the optimizing preprocessor. Every
           * workstation will probably have an entry here eventually, since
           * this is a key optimization location.
           */

/* #ifdef AZ_PA_RISC */
/*           dgemvnsqr_(&m1, val_pntr, x, c_pntr); */
/* #else */
          if (m1 < 10)
            AZ_dgemv2(m1, n1, val_pntr, x, c_pntr);
          else
            DGEMV_F77(CHAR_MACRO(N[0]), &m1, &n1, &one, val_pntr, &m1, x, &ione, &one, c_pntr,
                   &ione);
/* #endif */

        }
      }

      /* nonsquare cases */

      else {
        if (m1 < 10)
          AZ_dgemv2(m1, n1, val_pntr, x, c_pntr);
        else
          DGEMV_F77(CHAR_MACRO(N[0]), &m1, &n1, &one, val_pntr, &m1, x, &ione, &one, c_pntr,
                 &ione);
      }

      val_pntr += iblk_size;
    }
  }

} /* dvbr_sparax_basic */
Пример #5
0
void AZ_MSR_matvec_mult (double *b, double *c,AZ_MATRIX *Amat,int proc_config[])


/******************************************************************************
  c = Ab:
  Sparse (square) overlapped matrix-vector multiply, using the  MSR
  data structure .

  Author:          Lydie Prevost, SNL, 1422
  =======

  Return code:     void
  ============

  Parameter list:
  ===============


  b:               Contains the vector b.

  c:               Contains the result vector c.



  options:         Determines specific solution method and other parameters.

  params:          Drop tolerance and convergence tolerance info.

  Amat:            Structure used to represent the matrix (see file az_aztec.h
                   and Aztec User's Guide).

  proc_config:     Machine configuration.  proc_config[AZ_node] is the node
                   number.  proc_config[AZ_N_procs] is the number of processors.

******************************************************************************/



{
  double *val;
  int *data_org, *bindx;
  register int j, irow;
 int          N;
#ifndef AZ_DONT_UNROLL_LOOPS
 int          *bindx_ptr;
 double       sum;
#else
 int nzeros, bindx_row, k;
#endif

#if AZ_TIME_MATVEC
 AZ_START_TIMER( "AztecOO: MSR mat-vec total", totalID );
#endif

  val = Amat->val;
  bindx = Amat->bindx;
  data_org = Amat->data_org;


  N = data_org[AZ_N_internal] + data_org[AZ_N_border];

#if AZ_TIME_MATVEC
  AZ_START_TIMER( "AztecOO: MSR mat-vec import", importID );
#endif

  /* exchange boundary info */

  AZ_exchange_bdry(b, data_org, proc_config);

#if AZ_TIME_MATVEC
 AZ_STOP_TIMER( importID );
#endif

#if AZ_TIME_MATVEC
 AZ_START_TIMER( "AztecOO: MSR mat-vec local", localID );
#endif

  /* This is the default */
#ifndef AZ_DONT_UNROLL_LOOPS

  j = bindx[0];
  bindx_ptr = &bindx[j];
  for (irow = 0; irow < N; irow++) {
    sum =  val[irow]*b[irow];
    while (j+10 < bindx[irow+1]) {
      sum += val[j+9]*b[bindx_ptr[9]] +
	val[j+8]*b[bindx_ptr[8]] +
	val[j+7]*b[bindx_ptr[7]] +
	val[j+6]*b[bindx_ptr[6]] +
	val[j+5]*b[bindx_ptr[5]] +
	val[j+4]*b[bindx_ptr[4]] +
	val[j+3]*b[bindx_ptr[3]] +
	val[j+2]*b[bindx_ptr[2]] +
	val[j+1]*b[bindx_ptr[1]] +
	val[j]*b[*bindx_ptr];
      bindx_ptr += 10;
      j += 10;
    }
    while (j < bindx[irow+1]) {
      sum += val[j++] * b[*bindx_ptr++];
    }
    c[irow] = sum;
  }

  /* This is available for backward compatibility.  Turn on by specifying -DAZ_DONT_UNROLL_LOOPS to the compiler */
#else                                                                           

  for (irow = 0; irow < N; irow++) {
    /* compute diagonal contribution */
    *c = val[irow] * b[irow];
    /* nonzero off diagonal contribution */
    bindx_row = bindx[irow];
    nzeros    = bindx[irow+1] - bindx_row;
    for (j = 0; j < nzeros; j++) {
      k   = bindx_row + j;
      *c += val[k] * b[bindx[k]];
    }
    c++;
    
  }
  
#endif

#if AZ_TIME_MATVEC
 AZ_STOP_TIMER( localID );
#endif

#if AZ_TIME_MATVEC
 AZ_STOP_TIMER( totalID );
#endif

} /* AZ_MSR_matvec_mult */
Пример #6
0
void AZ_pad_matrix(struct context *context, int proc_config[], 
   int N_unpadded, int *N, int **map, int **padded_data_org, 
   int *N_nz, int estimated_requirements)
{
   static int New_N_rows;
   int *data_org;
   int overlap;
   int i;
int *bindx;
double *val;
int count, start, end, j;

   data_org = context->A_overlapped->data_org;
   overlap  = context->aztec_choices->options[AZ_overlap];
   bindx    = context->A_overlapped->bindx;
   val      = context->A_overlapped->val;
   *map     = NULL; 
   *padded_data_org = data_org;

   if (overlap > 0) {
           *padded_data_org = data_org;
           New_N_rows = data_org[AZ_N_internal] + data_org[AZ_N_border];

           AZ_setup_dd_olap_msr(N_unpadded, &New_N_rows, bindx, val, overlap,
                           proc_config, padded_data_org,map, *N_nz, 
                           data_org[AZ_name], data_org, estimated_requirements,
			   context);

           if (New_N_rows > *N) {
              AZ_printf_out("Incorrectly estimated the overlap space reqirements.\n");
              AZ_printf_out("N_unpadded = %d, N_external = %d, overlap = %d\n",
		     N_unpadded, data_org[AZ_N_external], overlap);
              AZ_printf_out("Guess = %d, actual number of padded rows = %d\n",
                     *N, New_N_rows);
              AZ_printf_out("\n\nTry less overlapping and maybe we'll get it right\n");

              AZ_exit(1);
           }

           *N = New_N_rows;
    }
    else if (overlap == 0) {
       *N    = data_org[AZ_N_internal] + data_org[AZ_N_border];
       /* remove entries corresponding to external variables */

       count = bindx[0];
       start = count;
       for (i = 0 ; i < *N ; i++ ) {
          end = bindx[i+1];
          for (j = start ; j < end ; j++) {
             if ( bindx[j] < *N ) {
                bindx[count] = bindx[j];
                val[count++] = val[j];
             }
          }
          bindx[i+1] = count;
          start      = end;
       }

    }
    else { /* diagonal overlapping */

       *N = data_org[AZ_N_internal] + data_org[AZ_N_border];

       if (*N_nz < *N + data_org[AZ_N_external]) {
          AZ_printf_err("Not enough memory for diagonal overlapping\n");
          AZ_exit(1);
       }

       /* make room */

       count = data_org[AZ_N_external];
       for (i = bindx[*N]-1 ; i >= bindx[0] ; i-- ) {
          bindx[i+count] = bindx[i];
          val[i+count]   = val[i];
       }
       for (i = 0 ; i <= *N; i++) bindx[i] += count;
       for (i = (*N)+1 ; i <= *N + data_org[AZ_N_external]; i++) 
          bindx[i] = bindx[i-1];

       /* communicate diagonal */

       AZ_exchange_bdry(val, data_org, proc_config);

       *N = data_org[AZ_N_internal] + data_org[AZ_N_border] + 
                data_org[AZ_N_external];
    }
}
Пример #7
0
void AZ_domain_decomp(double x[], AZ_MATRIX *Amat, int options[],
                   int proc_config[], double params[],
		   struct context *context)


/*******************************************************************************

  Precondition 'x' using an overlapping domain decomposition method where a 
  solver specified by options[AZ_subdomain_solve] is used on the subdomains. 
  Note: if a factorization needs to be computed on the first iteration, this
  will be done and stored for future iterations.

  Author:          Lydie Prevost, SNL, 9222
  =======          Revised by R. Tuminaro (4/97), SNL, 9222

  Return code:     void
  ============

  Parameter list:
  ===============

  N_unpadded:      On input, number of rows in linear system (unpadded matrix) 
                   that will be factored (after adding values for overlapping).

  Nb_unpadded:     On input, number of block rows in linear system (unpadded) 
                   that will be factored (after adding values for overlapping).

  N_nz_unpadded:   On input, number of nonzeros in linear system (unpadded)
                   that will be factored (after adding values for overlapping).
             
  x:               On output, x[] is preconditioned by performing the subdomain
                   solve indicated by options[AZ_subdomain_solve].

  val    indx       
  bindx  rpntr:    On input, arrays containing matrix nonzeros (see manual). 
  cpntr  bpntr            

  options:         Determines specific solution method and other parameters.  In
                   this routine, we are concerned with options[AZ_overlap]:

                      == AZ_none: nonoverlapping domain decomposition
                      == AZ_diag: use rows corresponding to external variables 
                                  but only keep the diagonal for these rows.
                      == k      : Obtain rows that are a distance k away from
                                  rows owned by this processor.
                                  
  data_org:        Contains information on matrix data distribution and 
                   communication parameters (see manual).

*******************************************************************************/
{
  int N_unpadded, Nb_unpadded, N_nz_unpadded;
  double *x_pad = NULL, *x_reord = NULL, *ext_vals = NULL;
  int N_nz, N_nz_padded, nz_used;
  int mem_orig, mem_overlapped, mem_factor;
  int name, i, bandwidth;
  int *ordering = NULL;
  double condest;
/*
  double start_t;
*/
  int estimated_requirements;
  char str[80];
int *garbage;

  int N;
  int *padded_data_org = NULL, *bindx, *data_org;
  double *val;
  int *inv_ordering = NULL;
  int *map = NULL;
  AZ_MATRIX *A_overlapped = NULL;
  struct aztec_choices aztec_choices;


  /**************************** execution begins ******************************/
  data_org = Amat->data_org;
  bindx    = Amat->bindx;
  val      = Amat->val;
  N_unpadded = data_org[AZ_N_internal] + data_org[AZ_N_border];
  Nb_unpadded = data_org[AZ_N_int_blk]+data_org[AZ_N_bord_blk];
  if (data_org[AZ_matrix_type] == AZ_MSR_MATRIX) 
     N_nz_unpadded = bindx[N_unpadded];
  else if (data_org[AZ_matrix_type] == AZ_VBR_MATRIX)
     N_nz_unpadded = (Amat->indx)[(Amat->bpntr)[Nb_unpadded]];
  else {
     if (Amat->N_nz < 0) 
        AZ_matfree_Nnzs(Amat);
     N_nz_unpadded = Amat->N_nz;
  }

  
  aztec_choices.options  = options;
  aztec_choices.params   = params;
  context->aztec_choices = &aztec_choices;
  context->proc_config   = proc_config;
  name                   = data_org[AZ_name];


  if ((options[AZ_pre_calc] >= AZ_reuse) && (context->Pmat_computed)) {
     N               = context->N;
     N_nz            = context->N_nz;
     A_overlapped    = context->A_overlapped;
     A_overlapped->data_org  = data_org;
     A_overlapped->matvec = Amat->matvec;
  }
  else {
     sprintf(str,"A_over %s",context->tag);
     A_overlapped = (AZ_MATRIX *) AZ_manage_memory(sizeof(AZ_MATRIX), 
                                                   AZ_ALLOC, name, str, &i);
     AZ_matrix_init(A_overlapped, 0);

     context->A_overlapped     = A_overlapped;
     A_overlapped->data_org    = data_org;
     A_overlapped->matvec      = Amat->matvec;
     A_overlapped->matrix_type = AZ_MSR_MATRIX;

     AZ_init_subdomain_solver(context);

     AZ_compute_matrix_size(Amat, options, N_nz_unpadded, N_unpadded, 
			 &N_nz_padded, data_org[AZ_N_external],
		 	 &(context->max_row), &N, &N_nz, params[AZ_ilut_fill], 
                         &(context->extra_fact_nz_per_row),
                         Nb_unpadded,&bandwidth);

     
        estimated_requirements = N_nz;
        if (N_nz*2 > N_nz) N_nz = N_nz*2;	/* check for overflow */
						/* Add extra memory to N_nz. */
                                                /* This extra memory is used */
                                                /* as temporary space during */
                                                /* overlapping calculation   */

        /* Readjust N_nz by allocating auxilliary arrays and allocate */
        /* the MSR matrix to check that there is enough space.        */

        /* block off some space for map and padded_data_org in dd_overlap */

        garbage = (int *) AZ_allocate((AZ_send_list + 2*(N-N_unpadded) +10)*
                                      sizeof(int));
        AZ_hold_space(context, N);

   
        if (N_nz*((int) sizeof(double)) < N_nz) N_nz=N_nz/2; /* check for overflow */
        if (N_nz*((int) sizeof(double)) < N_nz) N_nz=N_nz/2; /* check for overflow */
        if (N_nz*((int) sizeof(double)) < N_nz) N_nz=N_nz/2; /* check for overflow */
        if (N_nz*((int) sizeof(double)) < N_nz) N_nz=N_nz/2; /* check for overflow */
        if (N_nz*((int) sizeof(double)) < N_nz) N_nz=N_nz/2; /* check for overflow */

        N_nz = AZ_adjust_N_nz_to_fit_memory(N_nz,
                                 context->N_large_int_arrays,
                                 context->N_large_dbl_arrays);
        context->N_nz_factors = N_nz;

        if (N_nz <= N_nz_unpadded) {
           AZ_printf_out("Error: Not enough space for domain decomposition\n");
           AZ_exit(1);
        }


        if (estimated_requirements > N_nz ) estimated_requirements = N_nz;

        /* allocate matrix via AZ_manage_memory() */

        sprintf(str,"bindx %s",context->tag);
        A_overlapped->bindx =(int    *) AZ_manage_memory(N_nz*sizeof(int),
                                                AZ_ALLOC, name, str, &i);
        sprintf(str,"val %s",context->tag);
        A_overlapped->val =(double *) AZ_manage_memory(N_nz*sizeof(double),
                                                AZ_ALLOC, name, str, &i);
        context->N_nz_allocated = N_nz;
        AZ_free_space_holder(context);
        AZ_free(garbage);

        /* convert to MSR if necessary */

        if (data_org[AZ_matrix_type] == AZ_VBR_MATRIX)
          AZ_vb2msr(Nb_unpadded,val,Amat->indx,bindx,Amat->rpntr,Amat->cpntr,
		    Amat->bpntr, A_overlapped->val, A_overlapped->bindx);
        else if (data_org[AZ_matrix_type] == AZ_MSR_MATRIX) {
          for (i = 0 ; i < N_nz_unpadded; i++ ) {
             A_overlapped->bindx[i] = bindx[i];
             A_overlapped->val[i]   = val[i];
          }
        }
        else AZ_matfree_2_msr(Amat,A_overlapped->val,A_overlapped->bindx,N_nz);

        mem_orig = AZ_gsum_int(A_overlapped->bindx[N_unpadded],proc_config);

/*
        start_t = AZ_second();
*/
        AZ_pad_matrix(context, proc_config, N_unpadded, &N, 
                      &(context->map), &(context->padded_data_org), &N_nz, 
                      estimated_requirements);

/*
        if (proc_config[AZ_node] == 0)
           AZ_printf_out("matrix padding took %e seconds\n",AZ_second()-start_t);
*/


        mem_overlapped = AZ_gsum_int(A_overlapped->bindx[N],proc_config);
  
        if (options[AZ_reorder]) {
/*
           start_t = AZ_second();
*/
           AZ_find_MSR_ordering(A_overlapped->bindx,
                                &(context->ordering),N,
                                &(context->inv_ordering),name,context);
/*
           if (proc_config[AZ_node] == 0) 
              AZ_printf_out("took %e seconds to find ordering\n", AZ_second()-start_t);
*/
/*
           start_t = AZ_second();
*/
           AZ_mat_reorder(N,A_overlapped->bindx,A_overlapped->val,
                          context->ordering, context->inv_ordering);
/*
           if (proc_config[AZ_node] == 0) 
              AZ_printf_out("took %e seconds to reorder\n", AZ_second()-start_t);
*/
                /* NOTE: ordering is freed inside AZ_mat_reorder */
#ifdef AZ_COL_REORDER
           if (options[AZ_reorder]==2) {
             AZ_mat_colperm(N,A_overlapped->bindx,A_overlapped->val,
                        &(context->ordering), name, context );
           }
#endif

        }

        /* Do a factorization if needed.  */

/*
        start_t = AZ_second();
*/
        AZ_factor_subdomain(context, N, N_nz, &nz_used);

       if (options[AZ_output] > 0 && options[AZ_diagnostics]!=AZ_none) {
          AZ_printf_out("\n*********************************************************************\n");
	  condest = AZ_condest(N, context);
          AZ_printf_out("*****  Condition number estimate for subdomain preconditioner on PE %d = %.4e\n",
               proc_config[AZ_node], condest);
          AZ_printf_out("*********************************************************************\n");
        }


/*
        start_t        = AZ_second()-start_t;
        max_time = AZ_gmax_double(start_t,proc_config);
        min_time = AZ_gmin_double(start_t,proc_config);
        if (proc_config[AZ_node] == 0) 
           AZ_printf_out("time for subdomain solvers ranges from %e to %e\n",
                  min_time,max_time);
*/
  
        if ( A_overlapped->matrix_type == AZ_MSR_MATRIX)
           AZ_compress_msr(&(A_overlapped->bindx), &(A_overlapped->val),
                     context->N_nz_allocated, nz_used, name, context);


        context->N_nz = nz_used;
        context->N    = N;
        context->N_nz_allocated = nz_used;

        mem_factor = AZ_gsum_int(nz_used,proc_config);

        if (proc_config[AZ_node] == 0)
           AZ_print_header(options,mem_overlapped,mem_orig,mem_factor);

        if (options[AZ_overlap] >= 1) {
           sprintf(str,"x_pad %s",context->tag);
           context->x_pad  = (double *) AZ_manage_memory(N*sizeof(double),
                                                   AZ_ALLOC, name, str, &i);
           sprintf(str,"ext_vals %s",context->tag);
           context->ext_vals = (double *) AZ_manage_memory((N-N_unpadded)*
                                             sizeof(double), AZ_ALLOC, name, 
                                             str, &i);
        }
        if (options[AZ_reorder]) {
           sprintf(str,"x_reord %s",context->tag);
           context->x_reord = (double *) AZ_manage_memory(N*sizeof(double),
                                             AZ_ALLOC, name, str, &i);
        }

     }

  /* Solve L u = x where the solution u overwrites x */

    x_reord         = context->x_reord;
    inv_ordering    = context->inv_ordering;
    ordering        = context->ordering;
    x_pad           = context->x_pad;
    ext_vals        = context->ext_vals;
    padded_data_org = context->padded_data_org;
    map             = context->map;

   if (x_pad == NULL) x_pad = x;

   if (options[AZ_overlap] >= 1) {

      for (i = 0 ; i < N_unpadded ; i++) x_pad[i] = x[i];
      AZ_exchange_bdry(x_pad,padded_data_org, proc_config);
      for (i = 0 ; i < N-N_unpadded ; i++ ) 
         ext_vals[map[i]-N_unpadded] = x_pad[i+N_unpadded];
      for (i = 0 ; i < N-N_unpadded ; i++ ) x_pad[i + N_unpadded] = ext_vals[i];
   }
   else if (options[AZ_overlap] == AZ_diag) 
	AZ_exchange_bdry(x_pad,data_org, proc_config);

   if (x_reord == NULL) x_reord = x_pad;
   if (options[AZ_reorder]) {
      /* Apply row permutation to the right hand side */
      /* ((P'A P)Pi') Pi P'x = P'rhs,  b= P'rhs */
      for (i = 0 ; i < N ; i++ ) x_reord[inv_ordering[i]] = x_pad[i];
   }

   AZ_solve_subdomain(x_reord,N, context);

#ifdef AZ_COL_REORDER
   /* Apply column permutation to the solution   */
   if (options[AZ_reorder]==1){
      /* ((P'A P) P'sol = P'rhs   sol = P( P'sol)  */
      for (i = 0; i < N; i++) x_pad[i] = x_reord[inv_ordering[i]];
   }
   if (options[AZ_reorder]==2){
      /*
       * ((P'A P)Pi') Pi P'sol = P'rhs  sol = P Pi'( Pi P'sol)
       * Version 1:
       * for (i = 0; i < N; i++) pi_sol[i] = x_reord[ordering[i]];
       * for (j = 0; j < N; j++) x_pad[j] = pi_sol[inv_ordering[j]];
       * Version 2:
       */
      for (i = 0; i < N; i++) x_pad[i] = x_reord[ ordering[inv_ordering[i]] ];
   }

#else
   if (options[AZ_reorder])
      for (i = 0; i < N; i++) x_pad[i] = x_reord[inv_ordering[i]];
#endif

   AZ_combine_overlapped_values(options[AZ_type_overlap],padded_data_org, 
                             options, x_pad, map,ext_vals,name,proc_config);

   if (x_pad != x) 
     for (i = 0 ; i < N_unpadded ; i++ ) x[i] = x_pad[i];

} /* subdomain driver*/