//============================================================================== Epetra_MsrMatrix::Epetra_MsrMatrix(int * proc_config, AZ_MATRIX * a_mat) : Epetra_Object("Epetra::MsrMatrix"), Amat_(a_mat), proc_config_(proc_config), Values_(0), Indices_(0), MaxNumEntries_(-1), ImportVector_(0), NormInf_(-1.0), NormOne_(-1.0) { #ifdef AZTEC_MPI MPI_Comm * mpicomm = (MPI_Comm * ) AZ_get_comm(proc_config); Comm_ = new Epetra_MpiComm(*mpicomm); #else Comm_ = new Epetra_SerialComm(); #endif if (a_mat->data_org[AZ_matrix_type]!=AZ_MSR_MATRIX) throw Comm_->ReportError("AZ_matrix_type must be AZ_MSR_MATRIX", -1); int * bindx = a_mat->bindx; NumMyRows_ = a_mat->data_org[AZ_N_internal] + a_mat->data_org[AZ_N_border]; int NumExternal = a_mat->data_org[AZ_N_external]; NumMyCols_ = NumMyRows_ + NumExternal; NumMyNonzeros_ = bindx[NumMyRows_] - bindx[0] + NumMyRows_; //Comm_->SumAll(&NumMyNonzeros_, &NumGlobalNonzeros_, 1); long long NumMyNonzerosLL_ = (long long) NumMyNonzeros_; Comm_->SumAll(&NumMyNonzerosLL_, &NumGlobalNonzeros_, 1); int * MyGlobalElements = a_mat->update; if (MyGlobalElements==0) throw Comm_->ReportError("Aztec matrix has no update list: Check if AZ_Transform was called.", -2); DomainMap_ = new Epetra_Map(-1, NumMyRows_, MyGlobalElements, 0, *Comm_); double * dbleColGIDs = new double[NumMyCols_]; int * ColGIDs = new int[NumMyCols_]; for (int i=0; i<NumMyRows_; i++) dbleColGIDs[i] = (double) MyGlobalElements[i]; AZ_exchange_bdry(dbleColGIDs, a_mat->data_org, proc_config); {for (int i=0; i<NumMyCols_; i++) ColGIDs[i] = (int) dbleColGIDs[i];} ColMap_ = new Epetra_Map(-1, NumMyCols_, ColGIDs, 0, *Comm_); Importer_ = new Epetra_Import(*ColMap_, *DomainMap_); delete [] dbleColGIDs; delete [] ColGIDs; }
void AZ_sym_gauss_seidel_sl(double val[],int bindx[],double x[],int data_org[], int options[], struct context *context, int proc_config[]) /******************************************************************************* Symmetric Gauss-Siedel preconditioner. Author: John N. Shadid, SNL, 1421 ======= Return code: void ============ Parameter list: =============== val: Array containing the nonzero entries of the matrix (see Aztec User's Guide). indx, bindx, rpntr, cpntr, bpntr: Arrays used for DMSR and DVBR sparse matrix storage (see file Aztec User's Guide). x: On input, contains the current solution to the linear system. On output contains the Jacobi preconditioned solution. data_org: Array containing information on the distribution of the matrix to this processor as well as communication parameters (see Aztec User's Guide). options: Determines specific solution method and other parameters. *******************************************************************************/ { /* local variables */ register int *bindx_ptr; register double sum, *ptr_val; int i, bindx_row, j_last, N, step, ione = 1, j; double *b, *ptr_b; char tag[80]; /**************************** execution begins ******************************/ N = data_org[AZ_N_internal] + data_org[AZ_N_border]; sprintf(tag,"b/sGS %s",context->tag); b = AZ_manage_memory(N*sizeof(double), AZ_ALLOC, AZ_SYS+az_iterate_id, tag, &i); DCOPY_F77(&N, x, &ione, b, &ione); ptr_val = val; for (i = 0; i < N; i++) { (*ptr_val) = 1.0 / (*ptr_val); x[i] = 0.0; ptr_val++; } for (step = 0; step < options[AZ_poly_ord]; step++) { AZ_exchange_bdry(x, data_org, proc_config); bindx_row = bindx[0]; bindx_ptr = &bindx[bindx_row]; ptr_val = &val[bindx_row]; ptr_b = b; for (i = 0; i < N; i++) { sum = *ptr_b++; j_last = bindx[i+1] - bindx[i]; for (j = 0; j < j_last; j++) { sum -= *ptr_val++ * x[*bindx_ptr++]; } x[i] = sum * val[i]; } bindx_row = bindx[N]; bindx_ptr = &bindx[bindx_row-1]; ptr_val = &val[bindx_row-1]; for (i = N - 1; i >= 0; i--) { sum = b[i]; j_last = bindx[i+1] - bindx[i]; for (j = 0; j < j_last; j++) { sum -= *ptr_val-- * x[*bindx_ptr--]; } x[i] = sum * val[i]; } } for (i = 0; i < N; i++) val[i] = 1.0 / val[i]; } /* AZ_sym_gauss_seidel_sl */
//============================================================================== Epetra_MsrMatrix::Epetra_MsrMatrix(int * proc_config, AZ_MATRIX * a_mat) : Epetra_Object("Epetra::MsrMatrix"), Amat_(a_mat), proc_config_(proc_config), Values_(0), Indices_(0), MaxNumEntries_(-1), ImportVector_(0), NormInf_(-1.0), NormOne_(-1.0) { #ifdef EPETRA_NO_32BIT_GLOBAL_INDICES // We throw rather than let the compiler error out so that the // rest of the library is available and all possible tests can run. const char* error = "Epetra_MsrMatrix::Epetra_MsrMatrix: Not available for 64-bit Maps."; std::cerr << error << std::endl; throw Comm_->ReportError(error, -2); #endif #ifdef AZTEC_MPI MPI_Comm * mpicomm = (MPI_Comm * ) AZ_get_comm(proc_config); Comm_ = new Epetra_MpiComm(*mpicomm); #else Comm_ = new Epetra_SerialComm(); #endif if (a_mat->data_org[AZ_matrix_type]!=AZ_MSR_MATRIX) throw Comm_->ReportError("AZ_matrix_type must be AZ_MSR_MATRIX", -1); int * bindx = a_mat->bindx; NumMyRows_ = a_mat->data_org[AZ_N_internal] + a_mat->data_org[AZ_N_border]; int NumExternal = a_mat->data_org[AZ_N_external]; NumMyCols_ = NumMyRows_ + NumExternal; NumMyNonzeros_ = bindx[NumMyRows_] - bindx[0] + NumMyRows_; #ifndef EPETRA_NO_64BIT_GLOBAL_INDICES long long tmp_NumMyNonzeros = NumMyNonzeros_; Comm_->SumAll(&tmp_NumMyNonzeros, &NumGlobalNonzeros_, 1); #else int tmp_NumGlobalNonzeros = 0; Comm_->SumAll(&NumMyNonzeros_, &tmp_NumGlobalNonzeros, 1); NumGlobalNonzeros_ = tmp_NumGlobalNonzeros; #endif int * MyGlobalElements = a_mat->update; if (MyGlobalElements==0) throw Comm_->ReportError("Aztec matrix has no update list: Check if AZ_Transform was called.", -2); #ifdef EPETRA_NO_32BIT_GLOBAL_INDICES DomainMap_ = 0; #else DomainMap_ = new Epetra_Map(-1, NumMyRows_, MyGlobalElements, 0, *Comm_); #endif double * dbleColGIDs = new double[NumMyCols_]; int * ColGIDs = new int[NumMyCols_]; for (int i=0; i<NumMyRows_; i++) dbleColGIDs[i] = (double) MyGlobalElements[i]; AZ_exchange_bdry(dbleColGIDs, a_mat->data_org, proc_config); {for (int i=0; i<NumMyCols_; i++) ColGIDs[i] = (int) dbleColGIDs[i];} #ifdef EPETRA_NO_32BIT_GLOBAL_INDICES ColMap_ = 0; #else ColMap_ = new Epetra_Map(-1, NumMyCols_, ColGIDs, 0, *Comm_); #endif Importer_ = new Epetra_Import(*ColMap_, *DomainMap_); delete [] dbleColGIDs; delete [] ColGIDs; }
void dvbr_sparax_basic(int m, double *val, int *bindx, int *rpntr, int *cpntr, int *bpntr, double *b, double *c, int exchange_flag, int *data_org, int *proc_config) /****************************************************************************** c = Ab: Sparse (square) matrix-vector multiply, using the variable block row (VBR) data structure (A = val). Author: Scott A. Hutchinson, SNL, 1421 ======= Return code: void ============ Parameter list: =============== m: Number of (block) rows in A. val: Array containing the entries of the matrix. The matrix is stored block-row-by-block-row. Each block entry is dense and stored by columns (VBR). bindx, rpntr, cpntr, bpntr: Arrays used for DMSR and DVBR sparse matrix storage (see Aztec User's Guide). b: Right hand side of linear system. c: On output contains the solution to the linear system. exchange_flag: Flag which controls call to AZ_exchange_bdry() (ignored in serial implementation). data_org: Array containing information on the distribution of the matrix to this processor as well as communication parameters (see Aztec User's Guide). ******************************************************************************/ { /* local variables */ register double *x; register double *c_pntr; register int iblk_row, j, jblk, iblk_size; int m1, ib1, n1; int bpoff, rpoff; int ione = 1; int irpntr, irpntr_next; int ibpntr, ibpntr_next = 0; double one = 1.0; double *val_pntr; char *N = "N"; /**************************** execution begins *****************************/ /* exchange boundary info */ if (exchange_flag) AZ_exchange_bdry(b, data_org, proc_config); /* offset of the first block */ bpoff = *bpntr; rpoff = *rpntr; /* zero the result vector */ for (j = 0; j < rpntr[m] - rpoff; c[j++] = 0.0); val_pntr = val; irpntr_next = *rpntr++; bpntr++; c -= rpoff; /* loop over block rows */ for (iblk_row = 0; iblk_row < m; iblk_row++) { irpntr = irpntr_next; irpntr_next = *rpntr++; ibpntr = ibpntr_next; ibpntr_next = *bpntr++ - bpoff; /* set result pointer */ c_pntr = c + irpntr; /* number of rows in the current row block */ m1 = irpntr_next - irpntr; /* loop over each block in the current row block */ for (j = ibpntr; j < ibpntr_next; j++) { jblk = *(bindx+j); /* the starting point column index of the current block */ ib1 = *(cpntr+jblk); /* number of columns in the current block */ n1 = cpntr[jblk + 1] - ib1; iblk_size = m1*n1; /****************** Dense matrix-vector multiplication *****************/ /* * Get base addresses */ x = b + ib1; /* * Special case the m1 = n1 = 1 case */ if (iblk_size == 1) *c_pntr += *val_pntr * *x; else if (m1 == n1) { /* * Inline small amounts of work */ switch (m1) { case 2: c_pntr[0] += val_pntr[0]*x[0] + val_pntr[2]*x[1]; c_pntr[1] += val_pntr[1]*x[0] + val_pntr[3]*x[1]; break; case 3: c_pntr[0] += val_pntr[0]*x[0] + val_pntr[3]*x[1] + val_pntr[6]*x[2]; c_pntr[1] += val_pntr[1]*x[0] + val_pntr[4]*x[1] + val_pntr[7]*x[2]; c_pntr[2] += val_pntr[2]*x[0] + val_pntr[5]*x[1] + val_pntr[8]*x[2]; break; case 4: c_pntr[0] += val_pntr[0]*x[0] + val_pntr[4]*x[1] + val_pntr[8] *x[2] + val_pntr[12]*x[3]; c_pntr[1] += val_pntr[1]*x[0] + val_pntr[5]*x[1] + val_pntr[9] *x[2] + val_pntr[13]*x[3]; c_pntr[2] += val_pntr[2]*x[0] + val_pntr[6]*x[1] + val_pntr[10]*x[2] + val_pntr[14]*x[3]; c_pntr[3] += val_pntr[3]*x[0] + val_pntr[7]*x[1] + val_pntr[11]*x[2] + val_pntr[15]*x[3]; break; case 5: c_pntr[0] += val_pntr[0]*x[0] + val_pntr[5]*x[1] + val_pntr[10]*x[2] + val_pntr[15]*x[3] + val_pntr[20]*x[4]; c_pntr[1] += val_pntr[1]*x[0] + val_pntr[6]*x[1] + val_pntr[11]*x[2] + val_pntr[16]*x[3] + val_pntr[21]*x[4]; c_pntr[2] += val_pntr[2]*x[0] + val_pntr[7]*x[1] + val_pntr[12]*x[2] + val_pntr[17]*x[3] + val_pntr[22]*x[4]; c_pntr[3] += val_pntr[3]*x[0] + val_pntr[8]*x[1] + val_pntr[13]*x[2] + val_pntr[18]*x[3] + val_pntr[23]*x[4]; c_pntr[4] += val_pntr[4]*x[0] + val_pntr[9]*x[1] + val_pntr[14]*x[2] + val_pntr[19]*x[3] + val_pntr[24]*x[4]; break; case 6: c_pntr[0] += val_pntr[0]*x[0] + val_pntr[6] *x[1] + val_pntr[12]*x[2] + val_pntr[18]*x[3] + val_pntr[24]*x[4] + val_pntr[30]*x[5]; c_pntr[1] += val_pntr[1]*x[0] + val_pntr[7] *x[1] + val_pntr[13]*x[2] + val_pntr[19]*x[3] + val_pntr[25]*x[4] + val_pntr[31]*x[5]; c_pntr[2] += val_pntr[2]*x[0] + val_pntr[8] *x[1] + val_pntr[14]*x[2] + val_pntr[20]*x[3] + val_pntr[26]*x[4] + val_pntr[32]*x[5]; c_pntr[3] += val_pntr[3]*x[0] + val_pntr[9] *x[1] + val_pntr[15]*x[2] + val_pntr[21]*x[3] + val_pntr[27]*x[4] + val_pntr[33]*x[5]; c_pntr[4] += val_pntr[4]*x[0] + val_pntr[10]*x[1] + val_pntr[16]*x[2] + val_pntr[22]*x[3] + val_pntr[28]*x[4] + val_pntr[34]*x[5]; c_pntr[5] += val_pntr[5]*x[0] + val_pntr[11]*x[1] + val_pntr[17]*x[2] + val_pntr[23]*x[3] + val_pntr[29]*x[4] + val_pntr[35]*x[5]; break; default: /* * For most computers, a really well-optimized assembly-coded level 2 * blas for small blocks sizes doesn't exist. It's better to * optimize your own version, and take out all the overhead from the * regular dgemv call. For large block sizes, it's also a win to * check for a column of zeroes; this is what dgemv_ does. The * routine dgemvnsqr_() is a fortran routine that contains optimized * code for the hp, created from the optimizing preprocessor. Every * workstation will probably have an entry here eventually, since * this is a key optimization location. */ /* #ifdef AZ_PA_RISC */ /* dgemvnsqr_(&m1, val_pntr, x, c_pntr); */ /* #else */ if (m1 < 10) AZ_dgemv2(m1, n1, val_pntr, x, c_pntr); else DGEMV_F77(CHAR_MACRO(N[0]), &m1, &n1, &one, val_pntr, &m1, x, &ione, &one, c_pntr, &ione); /* #endif */ } } /* nonsquare cases */ else { if (m1 < 10) AZ_dgemv2(m1, n1, val_pntr, x, c_pntr); else DGEMV_F77(CHAR_MACRO(N[0]), &m1, &n1, &one, val_pntr, &m1, x, &ione, &one, c_pntr, &ione); } val_pntr += iblk_size; } } } /* dvbr_sparax_basic */
void AZ_MSR_matvec_mult (double *b, double *c,AZ_MATRIX *Amat,int proc_config[]) /****************************************************************************** c = Ab: Sparse (square) overlapped matrix-vector multiply, using the MSR data structure . Author: Lydie Prevost, SNL, 1422 ======= Return code: void ============ Parameter list: =============== b: Contains the vector b. c: Contains the result vector c. options: Determines specific solution method and other parameters. params: Drop tolerance and convergence tolerance info. Amat: Structure used to represent the matrix (see file az_aztec.h and Aztec User's Guide). proc_config: Machine configuration. proc_config[AZ_node] is the node number. proc_config[AZ_N_procs] is the number of processors. ******************************************************************************/ { double *val; int *data_org, *bindx; register int j, irow; int N; #ifndef AZ_DONT_UNROLL_LOOPS int *bindx_ptr; double sum; #else int nzeros, bindx_row, k; #endif #if AZ_TIME_MATVEC AZ_START_TIMER( "AztecOO: MSR mat-vec total", totalID ); #endif val = Amat->val; bindx = Amat->bindx; data_org = Amat->data_org; N = data_org[AZ_N_internal] + data_org[AZ_N_border]; #if AZ_TIME_MATVEC AZ_START_TIMER( "AztecOO: MSR mat-vec import", importID ); #endif /* exchange boundary info */ AZ_exchange_bdry(b, data_org, proc_config); #if AZ_TIME_MATVEC AZ_STOP_TIMER( importID ); #endif #if AZ_TIME_MATVEC AZ_START_TIMER( "AztecOO: MSR mat-vec local", localID ); #endif /* This is the default */ #ifndef AZ_DONT_UNROLL_LOOPS j = bindx[0]; bindx_ptr = &bindx[j]; for (irow = 0; irow < N; irow++) { sum = val[irow]*b[irow]; while (j+10 < bindx[irow+1]) { sum += val[j+9]*b[bindx_ptr[9]] + val[j+8]*b[bindx_ptr[8]] + val[j+7]*b[bindx_ptr[7]] + val[j+6]*b[bindx_ptr[6]] + val[j+5]*b[bindx_ptr[5]] + val[j+4]*b[bindx_ptr[4]] + val[j+3]*b[bindx_ptr[3]] + val[j+2]*b[bindx_ptr[2]] + val[j+1]*b[bindx_ptr[1]] + val[j]*b[*bindx_ptr]; bindx_ptr += 10; j += 10; } while (j < bindx[irow+1]) { sum += val[j++] * b[*bindx_ptr++]; } c[irow] = sum; } /* This is available for backward compatibility. Turn on by specifying -DAZ_DONT_UNROLL_LOOPS to the compiler */ #else for (irow = 0; irow < N; irow++) { /* compute diagonal contribution */ *c = val[irow] * b[irow]; /* nonzero off diagonal contribution */ bindx_row = bindx[irow]; nzeros = bindx[irow+1] - bindx_row; for (j = 0; j < nzeros; j++) { k = bindx_row + j; *c += val[k] * b[bindx[k]]; } c++; } #endif #if AZ_TIME_MATVEC AZ_STOP_TIMER( localID ); #endif #if AZ_TIME_MATVEC AZ_STOP_TIMER( totalID ); #endif } /* AZ_MSR_matvec_mult */
void AZ_pad_matrix(struct context *context, int proc_config[], int N_unpadded, int *N, int **map, int **padded_data_org, int *N_nz, int estimated_requirements) { static int New_N_rows; int *data_org; int overlap; int i; int *bindx; double *val; int count, start, end, j; data_org = context->A_overlapped->data_org; overlap = context->aztec_choices->options[AZ_overlap]; bindx = context->A_overlapped->bindx; val = context->A_overlapped->val; *map = NULL; *padded_data_org = data_org; if (overlap > 0) { *padded_data_org = data_org; New_N_rows = data_org[AZ_N_internal] + data_org[AZ_N_border]; AZ_setup_dd_olap_msr(N_unpadded, &New_N_rows, bindx, val, overlap, proc_config, padded_data_org,map, *N_nz, data_org[AZ_name], data_org, estimated_requirements, context); if (New_N_rows > *N) { AZ_printf_out("Incorrectly estimated the overlap space reqirements.\n"); AZ_printf_out("N_unpadded = %d, N_external = %d, overlap = %d\n", N_unpadded, data_org[AZ_N_external], overlap); AZ_printf_out("Guess = %d, actual number of padded rows = %d\n", *N, New_N_rows); AZ_printf_out("\n\nTry less overlapping and maybe we'll get it right\n"); AZ_exit(1); } *N = New_N_rows; } else if (overlap == 0) { *N = data_org[AZ_N_internal] + data_org[AZ_N_border]; /* remove entries corresponding to external variables */ count = bindx[0]; start = count; for (i = 0 ; i < *N ; i++ ) { end = bindx[i+1]; for (j = start ; j < end ; j++) { if ( bindx[j] < *N ) { bindx[count] = bindx[j]; val[count++] = val[j]; } } bindx[i+1] = count; start = end; } } else { /* diagonal overlapping */ *N = data_org[AZ_N_internal] + data_org[AZ_N_border]; if (*N_nz < *N + data_org[AZ_N_external]) { AZ_printf_err("Not enough memory for diagonal overlapping\n"); AZ_exit(1); } /* make room */ count = data_org[AZ_N_external]; for (i = bindx[*N]-1 ; i >= bindx[0] ; i-- ) { bindx[i+count] = bindx[i]; val[i+count] = val[i]; } for (i = 0 ; i <= *N; i++) bindx[i] += count; for (i = (*N)+1 ; i <= *N + data_org[AZ_N_external]; i++) bindx[i] = bindx[i-1]; /* communicate diagonal */ AZ_exchange_bdry(val, data_org, proc_config); *N = data_org[AZ_N_internal] + data_org[AZ_N_border] + data_org[AZ_N_external]; } }
void AZ_domain_decomp(double x[], AZ_MATRIX *Amat, int options[], int proc_config[], double params[], struct context *context) /******************************************************************************* Precondition 'x' using an overlapping domain decomposition method where a solver specified by options[AZ_subdomain_solve] is used on the subdomains. Note: if a factorization needs to be computed on the first iteration, this will be done and stored for future iterations. Author: Lydie Prevost, SNL, 9222 ======= Revised by R. Tuminaro (4/97), SNL, 9222 Return code: void ============ Parameter list: =============== N_unpadded: On input, number of rows in linear system (unpadded matrix) that will be factored (after adding values for overlapping). Nb_unpadded: On input, number of block rows in linear system (unpadded) that will be factored (after adding values for overlapping). N_nz_unpadded: On input, number of nonzeros in linear system (unpadded) that will be factored (after adding values for overlapping). x: On output, x[] is preconditioned by performing the subdomain solve indicated by options[AZ_subdomain_solve]. val indx bindx rpntr: On input, arrays containing matrix nonzeros (see manual). cpntr bpntr options: Determines specific solution method and other parameters. In this routine, we are concerned with options[AZ_overlap]: == AZ_none: nonoverlapping domain decomposition == AZ_diag: use rows corresponding to external variables but only keep the diagonal for these rows. == k : Obtain rows that are a distance k away from rows owned by this processor. data_org: Contains information on matrix data distribution and communication parameters (see manual). *******************************************************************************/ { int N_unpadded, Nb_unpadded, N_nz_unpadded; double *x_pad = NULL, *x_reord = NULL, *ext_vals = NULL; int N_nz, N_nz_padded, nz_used; int mem_orig, mem_overlapped, mem_factor; int name, i, bandwidth; int *ordering = NULL; double condest; /* double start_t; */ int estimated_requirements; char str[80]; int *garbage; int N; int *padded_data_org = NULL, *bindx, *data_org; double *val; int *inv_ordering = NULL; int *map = NULL; AZ_MATRIX *A_overlapped = NULL; struct aztec_choices aztec_choices; /**************************** execution begins ******************************/ data_org = Amat->data_org; bindx = Amat->bindx; val = Amat->val; N_unpadded = data_org[AZ_N_internal] + data_org[AZ_N_border]; Nb_unpadded = data_org[AZ_N_int_blk]+data_org[AZ_N_bord_blk]; if (data_org[AZ_matrix_type] == AZ_MSR_MATRIX) N_nz_unpadded = bindx[N_unpadded]; else if (data_org[AZ_matrix_type] == AZ_VBR_MATRIX) N_nz_unpadded = (Amat->indx)[(Amat->bpntr)[Nb_unpadded]]; else { if (Amat->N_nz < 0) AZ_matfree_Nnzs(Amat); N_nz_unpadded = Amat->N_nz; } aztec_choices.options = options; aztec_choices.params = params; context->aztec_choices = &aztec_choices; context->proc_config = proc_config; name = data_org[AZ_name]; if ((options[AZ_pre_calc] >= AZ_reuse) && (context->Pmat_computed)) { N = context->N; N_nz = context->N_nz; A_overlapped = context->A_overlapped; A_overlapped->data_org = data_org; A_overlapped->matvec = Amat->matvec; } else { sprintf(str,"A_over %s",context->tag); A_overlapped = (AZ_MATRIX *) AZ_manage_memory(sizeof(AZ_MATRIX), AZ_ALLOC, name, str, &i); AZ_matrix_init(A_overlapped, 0); context->A_overlapped = A_overlapped; A_overlapped->data_org = data_org; A_overlapped->matvec = Amat->matvec; A_overlapped->matrix_type = AZ_MSR_MATRIX; AZ_init_subdomain_solver(context); AZ_compute_matrix_size(Amat, options, N_nz_unpadded, N_unpadded, &N_nz_padded, data_org[AZ_N_external], &(context->max_row), &N, &N_nz, params[AZ_ilut_fill], &(context->extra_fact_nz_per_row), Nb_unpadded,&bandwidth); estimated_requirements = N_nz; if (N_nz*2 > N_nz) N_nz = N_nz*2; /* check for overflow */ /* Add extra memory to N_nz. */ /* This extra memory is used */ /* as temporary space during */ /* overlapping calculation */ /* Readjust N_nz by allocating auxilliary arrays and allocate */ /* the MSR matrix to check that there is enough space. */ /* block off some space for map and padded_data_org in dd_overlap */ garbage = (int *) AZ_allocate((AZ_send_list + 2*(N-N_unpadded) +10)* sizeof(int)); AZ_hold_space(context, N); if (N_nz*((int) sizeof(double)) < N_nz) N_nz=N_nz/2; /* check for overflow */ if (N_nz*((int) sizeof(double)) < N_nz) N_nz=N_nz/2; /* check for overflow */ if (N_nz*((int) sizeof(double)) < N_nz) N_nz=N_nz/2; /* check for overflow */ if (N_nz*((int) sizeof(double)) < N_nz) N_nz=N_nz/2; /* check for overflow */ if (N_nz*((int) sizeof(double)) < N_nz) N_nz=N_nz/2; /* check for overflow */ N_nz = AZ_adjust_N_nz_to_fit_memory(N_nz, context->N_large_int_arrays, context->N_large_dbl_arrays); context->N_nz_factors = N_nz; if (N_nz <= N_nz_unpadded) { AZ_printf_out("Error: Not enough space for domain decomposition\n"); AZ_exit(1); } if (estimated_requirements > N_nz ) estimated_requirements = N_nz; /* allocate matrix via AZ_manage_memory() */ sprintf(str,"bindx %s",context->tag); A_overlapped->bindx =(int *) AZ_manage_memory(N_nz*sizeof(int), AZ_ALLOC, name, str, &i); sprintf(str,"val %s",context->tag); A_overlapped->val =(double *) AZ_manage_memory(N_nz*sizeof(double), AZ_ALLOC, name, str, &i); context->N_nz_allocated = N_nz; AZ_free_space_holder(context); AZ_free(garbage); /* convert to MSR if necessary */ if (data_org[AZ_matrix_type] == AZ_VBR_MATRIX) AZ_vb2msr(Nb_unpadded,val,Amat->indx,bindx,Amat->rpntr,Amat->cpntr, Amat->bpntr, A_overlapped->val, A_overlapped->bindx); else if (data_org[AZ_matrix_type] == AZ_MSR_MATRIX) { for (i = 0 ; i < N_nz_unpadded; i++ ) { A_overlapped->bindx[i] = bindx[i]; A_overlapped->val[i] = val[i]; } } else AZ_matfree_2_msr(Amat,A_overlapped->val,A_overlapped->bindx,N_nz); mem_orig = AZ_gsum_int(A_overlapped->bindx[N_unpadded],proc_config); /* start_t = AZ_second(); */ AZ_pad_matrix(context, proc_config, N_unpadded, &N, &(context->map), &(context->padded_data_org), &N_nz, estimated_requirements); /* if (proc_config[AZ_node] == 0) AZ_printf_out("matrix padding took %e seconds\n",AZ_second()-start_t); */ mem_overlapped = AZ_gsum_int(A_overlapped->bindx[N],proc_config); if (options[AZ_reorder]) { /* start_t = AZ_second(); */ AZ_find_MSR_ordering(A_overlapped->bindx, &(context->ordering),N, &(context->inv_ordering),name,context); /* if (proc_config[AZ_node] == 0) AZ_printf_out("took %e seconds to find ordering\n", AZ_second()-start_t); */ /* start_t = AZ_second(); */ AZ_mat_reorder(N,A_overlapped->bindx,A_overlapped->val, context->ordering, context->inv_ordering); /* if (proc_config[AZ_node] == 0) AZ_printf_out("took %e seconds to reorder\n", AZ_second()-start_t); */ /* NOTE: ordering is freed inside AZ_mat_reorder */ #ifdef AZ_COL_REORDER if (options[AZ_reorder]==2) { AZ_mat_colperm(N,A_overlapped->bindx,A_overlapped->val, &(context->ordering), name, context ); } #endif } /* Do a factorization if needed. */ /* start_t = AZ_second(); */ AZ_factor_subdomain(context, N, N_nz, &nz_used); if (options[AZ_output] > 0 && options[AZ_diagnostics]!=AZ_none) { AZ_printf_out("\n*********************************************************************\n"); condest = AZ_condest(N, context); AZ_printf_out("***** Condition number estimate for subdomain preconditioner on PE %d = %.4e\n", proc_config[AZ_node], condest); AZ_printf_out("*********************************************************************\n"); } /* start_t = AZ_second()-start_t; max_time = AZ_gmax_double(start_t,proc_config); min_time = AZ_gmin_double(start_t,proc_config); if (proc_config[AZ_node] == 0) AZ_printf_out("time for subdomain solvers ranges from %e to %e\n", min_time,max_time); */ if ( A_overlapped->matrix_type == AZ_MSR_MATRIX) AZ_compress_msr(&(A_overlapped->bindx), &(A_overlapped->val), context->N_nz_allocated, nz_used, name, context); context->N_nz = nz_used; context->N = N; context->N_nz_allocated = nz_used; mem_factor = AZ_gsum_int(nz_used,proc_config); if (proc_config[AZ_node] == 0) AZ_print_header(options,mem_overlapped,mem_orig,mem_factor); if (options[AZ_overlap] >= 1) { sprintf(str,"x_pad %s",context->tag); context->x_pad = (double *) AZ_manage_memory(N*sizeof(double), AZ_ALLOC, name, str, &i); sprintf(str,"ext_vals %s",context->tag); context->ext_vals = (double *) AZ_manage_memory((N-N_unpadded)* sizeof(double), AZ_ALLOC, name, str, &i); } if (options[AZ_reorder]) { sprintf(str,"x_reord %s",context->tag); context->x_reord = (double *) AZ_manage_memory(N*sizeof(double), AZ_ALLOC, name, str, &i); } } /* Solve L u = x where the solution u overwrites x */ x_reord = context->x_reord; inv_ordering = context->inv_ordering; ordering = context->ordering; x_pad = context->x_pad; ext_vals = context->ext_vals; padded_data_org = context->padded_data_org; map = context->map; if (x_pad == NULL) x_pad = x; if (options[AZ_overlap] >= 1) { for (i = 0 ; i < N_unpadded ; i++) x_pad[i] = x[i]; AZ_exchange_bdry(x_pad,padded_data_org, proc_config); for (i = 0 ; i < N-N_unpadded ; i++ ) ext_vals[map[i]-N_unpadded] = x_pad[i+N_unpadded]; for (i = 0 ; i < N-N_unpadded ; i++ ) x_pad[i + N_unpadded] = ext_vals[i]; } else if (options[AZ_overlap] == AZ_diag) AZ_exchange_bdry(x_pad,data_org, proc_config); if (x_reord == NULL) x_reord = x_pad; if (options[AZ_reorder]) { /* Apply row permutation to the right hand side */ /* ((P'A P)Pi') Pi P'x = P'rhs, b= P'rhs */ for (i = 0 ; i < N ; i++ ) x_reord[inv_ordering[i]] = x_pad[i]; } AZ_solve_subdomain(x_reord,N, context); #ifdef AZ_COL_REORDER /* Apply column permutation to the solution */ if (options[AZ_reorder]==1){ /* ((P'A P) P'sol = P'rhs sol = P( P'sol) */ for (i = 0; i < N; i++) x_pad[i] = x_reord[inv_ordering[i]]; } if (options[AZ_reorder]==2){ /* * ((P'A P)Pi') Pi P'sol = P'rhs sol = P Pi'( Pi P'sol) * Version 1: * for (i = 0; i < N; i++) pi_sol[i] = x_reord[ordering[i]]; * for (j = 0; j < N; j++) x_pad[j] = pi_sol[inv_ordering[j]]; * Version 2: */ for (i = 0; i < N; i++) x_pad[i] = x_reord[ ordering[inv_ordering[i]] ]; } #else if (options[AZ_reorder]) for (i = 0; i < N; i++) x_pad[i] = x_reord[inv_ordering[i]]; #endif AZ_combine_overlapped_values(options[AZ_type_overlap],padded_data_org, options, x_pad, map,ext_vals,name,proc_config); if (x_pad != x) for (i = 0 ; i < N_unpadded ; i++ ) x[i] = x_pad[i]; } /* subdomain driver*/