void AZK_create_vector_c2k(int *options, double *params, int *proc_config, AZ_MATRIX *Amat_komplex, double *vc, double **vk) { AZ_KOMPLEX *linsys_pass_data; int i; int N_equations, N_external; int *data_org, *rpntr, *update_index; /* First executable statement */ linsys_pass_data = (AZ_KOMPLEX *) Amat_komplex->aux_ptr; data_org = Amat_komplex->data_org; update_index = linsys_pass_data->update_index; rpntr = Amat_komplex->rpntr; N_equations = data_org[AZ_N_internal] + data_org[AZ_N_border]; N_external = data_org[AZ_N_external]; (*vk) = (double *) AZ_allocate((N_equations+N_external)*sizeof(double)); if ((*vk) == NULL) AZ_perror("AZK_create_vector_c2k: Out of memory"); for (i=0; i <N_equations; i++) (*vk)[i] = vc[i]; /* Check if we need to reorder vector */ if (linsys_pass_data->From_Global_Indices) AZ_reorder_vec((*vk), data_org, update_index, rpntr); return; }
int AZ_adjust_N_nz_to_fit_memory(int N,int N_int_arrays, int N_dbl_arrays) { /**************************************************************************** Find (and return) the largest value of k <= N such that we can successfully allocate N_int_arrays integer arrays of size k and N_dbl_arrays double arrays of size k. Author: Ray Tuminaro, SNL, 9222 Return code: int ============ Parameter list: =============== N: On input, the maximum number of integers and doubles that we wish to try and allocate. */ double **dptr; int **iptr; int i; iptr = (int **) AZ_allocate(N_int_arrays*sizeof(int *)); dptr = (double **) AZ_allocate(N_dbl_arrays*sizeof(double *)); if ( (dptr == 0) || (iptr == 0) ) AZ_perror("ERROR: not enough memory for preconditioner.\n"); for (i = 0 ; i < N_int_arrays ; i++ ) iptr[i] = (int *) AZ_allocate((N+20)*sizeof(int)); for (i = 0 ; i < N_dbl_arrays ; i++ ) dptr[i] = (double *) AZ_allocate((N+20)*sizeof(double)); /* add a little extra */ /* for manage memory */ /* Decrease memory until the problem fits */ while ( (dptr[N_dbl_arrays-1] == NULL) || (iptr[N_int_arrays-1] == NULL) ) { for (i = N_dbl_arrays-1 ; i >= 0; i-- ) if (dptr[i] != NULL) AZ_free(dptr[i]); for (i = N_int_arrays-1 ; i >= 0; i-- ) if (iptr[i] != NULL) AZ_free(iptr[i]); N = (int) ( ((double) N)*.91); if (N == 0) AZ_perror("ERROR: not enough memory for preconditioner.\n"); for (i = 0 ; i < N_int_arrays ; i++ ) iptr[i] = (int *) AZ_allocate((N+20)*sizeof(int)); for (i = 0 ; i < N_dbl_arrays ; i++ ) dptr[i] = (double *) AZ_allocate((N+20)*sizeof(double)); } for (i = N_dbl_arrays-1 ; i >= 0; i-- ) AZ_free(dptr[i]); for (i = N_int_arrays-1 ; i >= 0; i-- ) AZ_free(iptr[i]); AZ_free(dptr); AZ_free(iptr); return(N); }
void AZK_create_precon(int *options, double *params, int *proc_config,double *x, double *b, AZ_MATRIX *Amat, AZ_PRECOND **Prec) { AZ_KOMPLEX *pass_data, *Prec_pass_data; AZ_MATRIX *Pmat, *Amat_real, *Amat_imag; int N_equations, N_real; int *data_org_real, *data_org_imag; double *val; int i, is_VBR; /* Extract necessary data from pass_data */ pass_data = (AZ_KOMPLEX *) Amat->aux_ptr; if (pass_data->Form_of_Equations != AZK_Komplex_No_Copy) (*Prec) = AZ_precond_create(Amat,AZ_precondition,NULL); else { Amat_real = pass_data->Amat_real; /* Real operator in AZ_MATRIX struct */ Amat_imag = pass_data->Amat_imag; /* Imag operator in AZ_MATRIX struct */ data_org_real = Amat_real->data_org; data_org_imag = Amat_imag->data_org; N_real = data_org_real[AZ_N_internal] + data_org_real[AZ_N_border]; N_equations = 2 * N_real; if (data_org_real[AZ_matrix_type] == AZ_VBR_MATRIX && data_org_imag[AZ_matrix_type] == AZ_VBR_MATRIX ) { is_VBR = 1; } else if (data_org_real[AZ_matrix_type] == AZ_MSR_MATRIX && data_org_imag[AZ_matrix_type] == AZ_MSR_MATRIX) { is_VBR = 0; } else { printf("Unsupported Matrix types\n"); abort(); } /* set the preconditioning structure 'Prec'. */ Prec_pass_data = (AZ_KOMPLEX *) AZ_allocate(sizeof(AZ_KOMPLEX)); if (Prec_pass_data == NULL) AZ_perror("AZK_create_precon: Out of memory."); switch (options[AZ_precond]) { /* NO preconditioning. There is nothing to do */ /* We just need to give a valid matrix to the preconditioning */ case AZ_none: (*Prec) = AZ_precond_create(Amat,AZ_precondition,NULL); break; /* Polynomial preconditioning (least-squares or Neumann). */ /* Here we must give Aztec an upper bound for the norm of the */ /* matrix. In addition, we need to tell Aztec to use the */ /* USER's matrix-vector product when applying the polynomial */ /* preconditioner. */ case AZ_ls: case AZ_Neumann: Amat->matrix_norm = 8.0; (*Prec) = AZ_precond_create(Amat,AZ_precondition,NULL); break; /* Jacobi preconditioning. In this case, Aztec needs the */ /* diagonal of the matrix. This can be passed in as an MSR */ /* matrix. However, when using Jacobi, it is important to note*/ /* that the MSR 'bindx' array does not need to be specified. */ /* Only the diagonal needs to be placed in the 'val' array. */ case AZ_Jacobi: if (!is_VBR) { Pmat = AZ_create_matrix(N_equations, AZ_NO_EXTRA_SPACE, AZ_MSR_MATRIX, N_equations, AZ_NOT_USING_AZTEC_MATVEC); val = (double *) AZ_allocate(N_real * sizeof(double)); if (val == NULL) AZ_perror("AZK_create_precon: Out of memory"); for ( i = 0;i < N_real; i++) val[i] = 1.0/(Amat_real->val[i]*Amat_real->val[i] + Amat_imag->val[i]*Amat_imag->val[i]); Pmat->val = val; Pmat->bindx = NULL; Pmat->indx = NULL; Pmat->bpntr = NULL; Pmat->rpntr = NULL; Pmat->cpntr = NULL; (*Prec) = AZ_precond_create(Pmat,AZK_precon,NULL); options[AZ_precond] = AZ_user_precond; Prec_pass_data->AZK_precond = AZ_Jacobi; (*Prec)->Pmat->aux_ptr = (void *) Prec_pass_data; } else { AZ_perror("Jacobi scaling is only supported for MSR matrices"); } break; /* Domain decomposition preconditioning. In this case, Aztec */ /* needs the local matrix defined within the processor. This */ /* can be passed in as an MSR array. Note: since we do not */ /* overlap the subdomains in this specific example, only the */ /* local columns associated with local equations need to be */ /* kept. That is, we drop all references to external variables*/ case AZ_dom_decomp: AZ_perror("AZK_linsys_create_no_copy does not support dom_decomp"); break; default: AZ_perror("AZK_linsys_create_no_copy does not support this option"); } } if (Prec_pass_data) free(Prec_pass_data); Prec_pass_data = NULL; }
void AZ_calc_blk_diag_LU(double *val, int *indx, int *bindx, int *rpntr, int *cpntr, int *bpntr, double *d_inv, int *d_indx, int *d_bindx, int *d_rpntr, int *d_bpntr, int *data_org, int *ipvt) /******************************************************************************* Routine to calculate the LU factors of the block-diagonal portion of sparse matrix in 'val' and the associated integer pointer vectors. This is used for scaling. Author: Scott A. Hutchinson, SNL, 1421 ======= Return code: void ============ Parameter list: =============== val: Array containing the nonzero entries of the matrix (see Aztec User's Guide). indx, bindx, rpntr, cpntr, bpntr: Arrays used for DMSR and DVBR sparse matrix storage (see file Aztec User's Guide). d_inv: Vector containing the LU of the diagonal blocks. d_indx: The 'indx' array corresponding to the LU-block diagonals. d_bindx: The 'bindx' array corresponding to the LU-block diagonals. d_rpntr: The 'rpntr' array corresponding to the LU-block diagonals. d_bpntr: The 'bpntr' array corresponding to the LU-block diagonals. data_org: Array containing information on the distribution of the matrix to this processor as well as communication parameters (see Aztec User's Guide). *******************************************************************************/ { /* local variables */ register int i, j, iblk_row, jblk, icount = 0, iblk_count = 0, ival; int m1, n1, itemp; int m; int bpoff, idoff; int info; double *work; char *yo = "AZ_calc_blk_diag_inv: "; /**************************** execution begins ******************************/ m = data_org[AZ_N_int_blk] + data_org[AZ_N_bord_blk]; if (m == 0) return; /* allocate vectors for lapack routines */ work = (double *) AZ_allocate(rpntr[m]*sizeof(double)); if (work == NULL) AZ_perror("Not enough space for Block Jacobi\n"); /* offset of the first block */ bpoff = *bpntr; idoff = *indx; /* loop over block rows */ for (iblk_row = 0; iblk_row < m; iblk_row++) { /* number of rows in the current row block */ m1 = rpntr[iblk_row+1] - rpntr[iblk_row]; /* starting index of current row block */ ival = indx[bpntr[iblk_row] - bpoff] - idoff; /* loop over column block numbers, looking for the diagonal block */ for (j = bpntr[iblk_row] - bpoff; j < bpntr[iblk_row+1] - bpoff; j++) { jblk = bindx[j]; /* determine the number of columns in this block */ n1 = cpntr[jblk+1] - cpntr[jblk]; itemp = m1*n1; if (jblk == iblk_row) { /* diagonal block */ /* error check */ if (n1 != m1) { (void) AZ_printf_err( "%sERROR: diagonal blocks are not square\n.", yo); exit(-1); } else { /* fill the vectors */ d_indx[iblk_count] = icount; d_rpntr[iblk_count] = rpntr[iblk_row]; d_bpntr[iblk_count] = iblk_row; d_bindx[iblk_count] = iblk_row; for (i = 0; i < itemp; i++) d_inv[icount++] = val[ival + i]; /* invert the dense matrix */ DGETRF_F77(&m1, &m1, &d_inv[d_indx[iblk_count]], &m1, &(ipvt[rpntr[iblk_row]]), &info); if (info < 0) { (void) AZ_printf_err( "%sERROR: argument %d is illegal.\n", yo, -info); exit(-1); } else if (info > 0) { (void) AZ_printf_err( "%sERROR: the factorization has produced a " "singular U with U[%d][%d] being exactly zero.\n", yo, info, info); exit(-1); } iblk_count++; } break; } else ival += itemp; } } d_indx[iblk_count] = icount; d_rpntr[iblk_count] = rpntr[iblk_row]; d_bpntr[iblk_count] = iblk_row; AZ_free((void *) work); } /* AZ_calc_blk_diag_inv */
int main(int argc, char *argv[]) { int num_PDE_eqns=1, N_levels=3, nsmooth=2; int leng, level, N_grid_pts, coarsest_level; int leng1,leng2; /* See Aztec User's Guide for more information on the */ /* variables that follow. */ int proc_config[AZ_PROC_SIZE], options[AZ_OPTIONS_SIZE]; double params[AZ_PARAMS_SIZE], status[AZ_STATUS_SIZE]; /* data structure for matrix corresponding to the fine grid */ double *val = NULL, *xxx, *rhs, solve_time, setup_time, start_time; AZ_MATRIX *Amat; AZ_PRECOND *Pmat = NULL; ML *ml; FILE *fp; int i, j, Nrigid, *garbage, nblocks=0, *blocks = NULL, *block_pde=NULL; struct AZ_SCALING *scaling; ML_Aggregate *ag; double *mode, *rigid=NULL, alpha; char filename[80]; int one = 1; int proc,nprocs; char pathfilename[100]; #ifdef ML_MPI MPI_Init(&argc,&argv); /* get number of processors and the name of this processor */ AZ_set_proc_config(proc_config, MPI_COMM_WORLD); proc = proc_config[AZ_node]; nprocs = proc_config[AZ_N_procs]; #else AZ_set_proc_config(proc_config, AZ_NOT_MPI); proc = 0; nprocs = 1; #endif if (proc_config[AZ_node] == 0) { sprintf(pathfilename,"%s/inputfile",argv[1]); ML_Reader_ReadInput(pathfilename, &context); } else context = (struct reader_context *) ML_allocate(sizeof(struct reader_context)); AZ_broadcast((char *) context, sizeof(struct reader_context), proc_config, AZ_PACK); AZ_broadcast((char *) NULL , 0 , proc_config, AZ_SEND); N_levels = context->N_levels; printf("N_levels %d\n",N_levels); nsmooth = context->nsmooth; num_PDE_eqns = context->N_dofPerNode; printf("num_PDE_eqns %d\n",num_PDE_eqns); ML_Set_PrintLevel(context->output_level); /* read in the number of matrix equations */ leng = 0; if (proc_config[AZ_node] == 0) { sprintf(pathfilename,"%s/data_matrix.txt",argv[1]); fp=fopen(pathfilename,"r"); if (fp==NULL) { printf("**ERR** couldn't open file data_matrix.txt\n"); exit(1); } fscanf(fp,"%d",&leng); fclose(fp); } leng = AZ_gsum_int(leng, proc_config); N_grid_pts=leng/num_PDE_eqns; /* initialize the list of global indices. NOTE: the list of global */ /* indices must be in ascending order so that subsequent calls to */ /* AZ_find_index() will function properly. */ #if 0 if (proc_config[AZ_N_procs] == 1) i = AZ_linear; else i = AZ_file; #endif i = AZ_linear; /* cannot use AZ_input_update for variable blocks (forgot why, but debugged through it)*/ /* make a linear distribution of the matrix */ /* if the linear distribution does not align with the blocks, */ /* this is corrected in ML_AZ_Reader_ReadVariableBlocks */ leng1 = leng/nprocs; leng2 = leng-leng1*nprocs; if (proc >= leng2) { leng2 += (proc*leng1); } else { leng1++; leng2 = proc*leng1; } N_update = leng1; update = (int*)AZ_allocate((N_update+1)*sizeof(int)); if (update==NULL) { (void) fprintf (stderr, "Not enough space to allocate 'update'\n"); fflush(stderr); exit(EXIT_FAILURE); } for (i=0; i<N_update; i++) update[i] = i+leng2; #if 0 /* debug */ printf("proc %d N_update %d\n",proc_config[AZ_node],N_update); fflush(stdout); #endif sprintf(pathfilename,"%s/data_vblocks.txt",argv[1]); ML_AZ_Reader_ReadVariableBlocks(pathfilename,&nblocks,&blocks,&block_pde, &N_update,&update,proc_config); #if 0 /* debug */ printf("proc %d N_update %d\n",proc_config[AZ_node],N_update); fflush(stdout); #endif sprintf(pathfilename,"%s/data_matrix.txt",argv[1]); AZ_input_msr_matrix(pathfilename,update, &val, &bindx, N_update, proc_config); /* This code is to fix things up so that we are sure we have */ /* all blocks (including the ghost nodes) the same size. */ /* not sure, whether this is a good idea with variable blocks */ /* the examples inpufiles (see top of this file) don't need it */ /* anyway */ /* AZ_block_MSR(&bindx, &val, N_update, num_PDE_eqns, update); */ AZ_transform_norowreordering(proc_config, &external, bindx, val, update, &update_index, &extern_index, &data_org, N_update, 0, 0, 0, &cpntr, AZ_MSR_MATRIX); Amat = AZ_matrix_create( leng ); AZ_set_MSR(Amat, bindx, val, data_org, 0, NULL, AZ_LOCAL); Amat->matrix_type = data_org[AZ_matrix_type]; data_org[AZ_N_rows] = data_org[AZ_N_internal] + data_org[AZ_N_border]; start_time = AZ_second(); options[AZ_scaling] = AZ_none; ML_Create(&ml, N_levels); /* set up discretization matrix and matrix vector function */ AZ_ML_Set_Amat(ml, 0, N_update, N_update, Amat, proc_config); ML_Set_ResidualOutputFrequency(ml, context->output); ML_Set_Tolerance(ml, context->tol); ML_Aggregate_Create( &ag ); if (ML_strcmp(context->agg_coarsen_scheme,"Mis") == 0) { ML_Aggregate_Set_CoarsenScheme_MIS(ag); } else if (ML_strcmp(context->agg_coarsen_scheme,"Uncoupled") == 0) { ML_Aggregate_Set_CoarsenScheme_Uncoupled(ag); } else if (ML_strcmp(context->agg_coarsen_scheme,"Coupled") == 0) { ML_Aggregate_Set_CoarsenScheme_Coupled(ag); } else if (ML_strcmp(context->agg_coarsen_scheme,"Metis") == 0) { ML_Aggregate_Set_CoarsenScheme_METIS(ag); for (i=0; i<N_levels; i++) ML_Aggregate_Set_NodesPerAggr(ml,ag,i,9); } else if (ML_strcmp(context->agg_coarsen_scheme,"VBMetis") == 0) { /* when no blocks read, use standard metis assuming constant block sizes */ if (!blocks) ML_Aggregate_Set_CoarsenScheme_METIS(ag); else { ML_Aggregate_Set_CoarsenScheme_VBMETIS(ag); ML_Aggregate_Set_Vblocks_CoarsenScheme_VBMETIS(ag,0,N_levels,nblocks, blocks,block_pde,N_update); } for (i=0; i<N_levels; i++) ML_Aggregate_Set_NodesPerAggr(ml,ag,i,9); } else { printf("**ERR** ML: Unknown aggregation scheme %s\n",context->agg_coarsen_scheme); exit(-1); } ML_Aggregate_Set_DampingFactor(ag, context->agg_damping); ML_Aggregate_Set_MaxCoarseSize( ag, context->maxcoarsesize); ML_Aggregate_Set_Threshold(ag, context->agg_thresh); if (ML_strcmp(context->agg_spectral_norm,"Calc") == 0) { ML_Set_SpectralNormScheme_Calc(ml); } else if (ML_strcmp(context->agg_spectral_norm,"Anorm") == 0) { ML_Set_SpectralNormScheme_Anorm(ml); } else { printf("**WRN** ML: Unknown spectral norm scheme %s\n",context->agg_spectral_norm); } /* read in the rigid body modes */ Nrigid = 0; if (proc_config[AZ_node] == 0) { sprintf(filename,"data_nullsp%d.txt",Nrigid); sprintf(pathfilename,"%s/%s",argv[1],filename); while( (fp = fopen(pathfilename,"r")) != NULL) { fclose(fp); Nrigid++; sprintf(filename,"data_nullsp%d.txt",Nrigid); sprintf(pathfilename,"%s/%s",argv[1],filename); } } Nrigid = AZ_gsum_int(Nrigid,proc_config); if (Nrigid != 0) { rigid = (double *) ML_allocate( sizeof(double)*Nrigid*(N_update+1) ); if (rigid == NULL) { printf("Error: Not enough space for rigid body modes\n"); } } /* Set rhs */ sprintf(pathfilename,"%s/data_rhs.txt",argv[1]); fp = fopen(pathfilename,"r"); if (fp == NULL) { rhs=(double *)ML_allocate(leng*sizeof(double)); if (proc_config[AZ_node] == 0) printf("taking linear vector for rhs\n"); for (i = 0; i < N_update; i++) rhs[i] = (double) update[i]; } else { fclose(fp); if (proc_config[AZ_node] == 0) printf("reading rhs from a file\n"); AZ_input_msr_matrix(pathfilename, update, &rhs, &garbage, N_update, proc_config); } AZ_reorder_vec(rhs, data_org, update_index, NULL); for (i = 0; i < Nrigid; i++) { sprintf(filename,"data_nullsp%d.txt",i); sprintf(pathfilename,"%s/%s",argv[1],filename); AZ_input_msr_matrix(pathfilename, update, &mode, &garbage, N_update, proc_config); AZ_reorder_vec(mode, data_org, update_index, NULL); #if 0 /* test the given rigid body mode, output-vector should be ~0 */ Amat->matvec(mode, rigid, Amat, proc_config); for (j = 0; j < N_update; j++) printf("this is %d %e\n",j,rigid[j]); #endif for (j = 0; j < i; j++) { alpha = -AZ_gdot(N_update, mode, &(rigid[j*N_update]), proc_config)/ AZ_gdot(N_update, &(rigid[j*N_update]), &(rigid[j*N_update]), proc_config); DAXPY_F77(&N_update, &alpha, &(rigid[j*N_update]), &one, mode, &one); } /* rhs orthogonalization */ alpha = -AZ_gdot(N_update, mode, rhs, proc_config)/ AZ_gdot(N_update, mode, mode, proc_config); DAXPY_F77(&N_update, &alpha, mode, &one, rhs, &one); for (j = 0; j < N_update; j++) rigid[i*N_update+j] = mode[j]; free(mode); free(garbage); } for (j = 0; j < Nrigid; j++) { alpha = -AZ_gdot(N_update, rhs, &(rigid[j*N_update]), proc_config)/ AZ_gdot(N_update, &(rigid[j*N_update]), &(rigid[j*N_update]), proc_config); DAXPY_F77(&N_update, &alpha, &(rigid[j*N_update]), &one, rhs, &one); } #if 0 /* for testing the default nullsp */ ML_Aggregate_Set_NullSpace(ag, num_PDE_eqns, 6, NULL, N_update); #else if (Nrigid != 0) { ML_Aggregate_Set_NullSpace(ag, num_PDE_eqns, Nrigid, rigid, N_update); } #endif if (rigid) ML_free(rigid); ag->keep_agg_information = 1; coarsest_level = ML_Gen_MGHierarchy_UsingAggregation(ml, 0, ML_INCREASING, ag); coarsest_level--; if ( proc_config[AZ_node] == 0 ) printf("Coarse level = %d \n", coarsest_level); #if 0 /* set up smoothers */ if (!blocks) blocks = (int *) ML_allocate(sizeof(int)*N_update); #endif for (level = 0; level < coarsest_level; level++) { num_PDE_eqns = ml->Amat[level].num_PDEs; /* Sparse approximate inverse smoother that acutally does both */ /* pre and post smoothing. */ if (ML_strcmp(context->smoother,"Parasails") == 0) { ML_Gen_Smoother_ParaSails(ml , level, ML_PRESMOOTHER, nsmooth, parasails_sym, parasails_thresh, parasails_nlevels, parasails_filter, (int) parasails_loadbal, parasails_factorized); } /* This is the symmetric Gauss-Seidel smoothing that we usually use. */ /* In parallel, it is not a true Gauss-Seidel in that each processor */ /* does a Gauss-Seidel on its local submatrix independent of the */ /* other processors. */ else if (ML_strcmp(context->smoother,"GaussSeidel") == 0) { ML_Gen_Smoother_GaussSeidel(ml , level, ML_BOTH, nsmooth,1.); } else if (ML_strcmp(context->smoother,"SymGaussSeidel") == 0) { ML_Gen_Smoother_SymGaussSeidel(ml , level, ML_BOTH, nsmooth,1.); } else if (ML_strcmp(context->smoother,"Poly") == 0) { ML_Gen_Smoother_Cheby(ml, level, ML_BOTH, 30., nsmooth); } else if (ML_strcmp(context->smoother,"BlockGaussSeidel") == 0) { ML_Gen_Smoother_BlockGaussSeidel(ml , level, ML_BOTH, nsmooth,1., num_PDE_eqns); } else if (ML_strcmp(context->smoother,"VBSymGaussSeidel") == 0) { if (blocks) ML_free(blocks); if (block_pde) ML_free(block_pde); blocks = NULL; block_pde = NULL; nblocks = 0; ML_Aggregate_Get_Vblocks_CoarsenScheme_VBMETIS(ag,level,N_levels,&nblocks, &blocks,&block_pde); if (blocks==NULL) ML_Gen_Blocks_Aggregates(ag, level, &nblocks, &blocks); ML_Gen_Smoother_VBlockSymGaussSeidel(ml , level, ML_BOTH, nsmooth,1., nblocks, blocks); } /* This is a true Gauss Seidel in parallel. This seems to work for */ /* elasticity problems. However, I don't believe that this is very */ /* efficient in parallel. */ /* nblocks = ml->Amat[level].invec_leng; for (i =0; i < nblocks; i++) blocks[i] = i; ML_Gen_Smoother_VBlockSymGaussSeidelSequential(ml , level, ML_PRESMOOTHER, nsmooth, 1., nblocks, blocks); ML_Gen_Smoother_VBlockSymGaussSeidelSequential(ml, level, ML_POSTSMOOTHER, nsmooth, 1., nblocks, blocks); */ /* Jacobi Smoothing */ else if (ML_strcmp(context->smoother,"Jacobi") == 0) { ML_Gen_Smoother_Jacobi(ml , level, ML_PRESMOOTHER, nsmooth,.4); ML_Gen_Smoother_Jacobi(ml , level, ML_POSTSMOOTHER, nsmooth,.4); } /* This does a block Gauss-Seidel (not true GS in parallel) */ /* where each processor has 'nblocks' blocks. */ /* */ else if (ML_strcmp(context->smoother,"Metis") == 0) { if (blocks) ML_free(blocks); if (block_pde) ML_free(block_pde); nblocks = 250; ML_Gen_Blocks_Metis(ml, level, &nblocks, &blocks); ML_Gen_Smoother_VBlockSymGaussSeidel(ml , level, ML_BOTH, nsmooth,1., nblocks, blocks); } else { printf("unknown smoother %s\n",context->smoother); exit(1); } } /* set coarse level solver */ nsmooth = context->coarse_its; /* Sparse approximate inverse smoother that acutally does both */ /* pre and post smoothing. */ if (ML_strcmp(context->coarse_solve,"Parasails") == 0) { ML_Gen_Smoother_ParaSails(ml , coarsest_level, ML_PRESMOOTHER, nsmooth, parasails_sym, parasails_thresh, parasails_nlevels, parasails_filter, (int) parasails_loadbal, parasails_factorized); } else if (ML_strcmp(context->coarse_solve,"GaussSeidel") == 0) { ML_Gen_Smoother_GaussSeidel(ml , coarsest_level, ML_BOTH, nsmooth,1.); } else if (ML_strcmp(context->coarse_solve,"Poly") == 0) { ML_Gen_Smoother_Cheby(ml, coarsest_level, ML_BOTH, 30., nsmooth); } else if (ML_strcmp(context->coarse_solve,"SymGaussSeidel") == 0) { ML_Gen_Smoother_SymGaussSeidel(ml , coarsest_level, ML_BOTH, nsmooth,1.); } else if (ML_strcmp(context->coarse_solve,"BlockGaussSeidel") == 0) { ML_Gen_Smoother_BlockGaussSeidel(ml, coarsest_level, ML_BOTH, nsmooth,1., num_PDE_eqns); } else if (ML_strcmp(context->coarse_solve,"Aggregate") == 0) { if (blocks) ML_free(blocks); if (block_pde) ML_free(block_pde); ML_Gen_Blocks_Aggregates(ag, coarsest_level, &nblocks, &blocks); ML_Gen_Smoother_VBlockSymGaussSeidel(ml , coarsest_level, ML_BOTH, nsmooth,1., nblocks, blocks); } else if (ML_strcmp(context->coarse_solve,"Jacobi") == 0) { ML_Gen_Smoother_Jacobi(ml , coarsest_level, ML_BOTH, nsmooth,.5); } else if (ML_strcmp(context->coarse_solve,"Metis") == 0) { if (blocks) ML_free(blocks); if (block_pde) ML_free(block_pde); nblocks = 250; ML_Gen_Blocks_Metis(ml, coarsest_level, &nblocks, &blocks); ML_Gen_Smoother_VBlockSymGaussSeidel(ml , coarsest_level, ML_BOTH, nsmooth,1., nblocks, blocks); } else if (ML_strcmp(context->coarse_solve,"SuperLU") == 0) { ML_Gen_CoarseSolverSuperLU( ml, coarsest_level); } else if (ML_strcmp(context->coarse_solve,"Amesos") == 0) { ML_Gen_Smoother_Amesos(ml,coarsest_level,ML_AMESOS_KLU,-1, 0.0); } else { printf("unknown coarse grid solver %s\n",context->coarse_solve); exit(1); } ML_Gen_Solver(ml, ML_MGV, 0, coarsest_level); AZ_defaults(options, params); if (ML_strcmp(context->krylov,"Cg") == 0) { options[AZ_solver] = AZ_cg; } else if (ML_strcmp(context->krylov,"Bicgstab") == 0) { options[AZ_solver] = AZ_bicgstab; } else if (ML_strcmp(context->krylov,"Tfqmr") == 0) { options[AZ_solver] = AZ_tfqmr; } else if (ML_strcmp(context->krylov,"Gmres") == 0) { options[AZ_solver] = AZ_gmres; } else { printf("unknown krylov method %s\n",context->krylov); } if (blocks) ML_free(blocks); if (block_pde) ML_free(block_pde); options[AZ_scaling] = AZ_none; options[AZ_precond] = AZ_user_precond; options[AZ_conv] = AZ_r0; options[AZ_output] = 1; options[AZ_max_iter] = context->max_outer_its; options[AZ_poly_ord] = 5; options[AZ_kspace] = 130; params[AZ_tol] = context->tol; options[AZ_output] = context->output; ML_free(context); AZ_set_ML_preconditioner(&Pmat, Amat, ml, options); setup_time = AZ_second() - start_time; xxx = (double *) malloc( leng*sizeof(double)); for (iii = 0; iii < leng; iii++) xxx[iii] = 0.0; /* Set x */ /* there is no initguess supplied with these examples for the moment.... */ fp = fopen("initguessfile","r"); if (fp != NULL) { fclose(fp); if (proc_config[AZ_node]== 0) printf("reading initial guess from file\n"); AZ_input_msr_matrix("data_initguess.txt", update, &xxx, &garbage, N_update, proc_config); options[AZ_conv] = AZ_expected_values; } else if (proc_config[AZ_node]== 0) printf("taking 0 initial guess \n"); AZ_reorder_vec(xxx, data_org, update_index, NULL); /* if Dirichlet BC ... put the answer in */ for (i = 0; i < data_org[AZ_N_internal]+data_org[AZ_N_border]; i++) { if ( (val[i] > .99999999) && (val[i] < 1.0000001)) xxx[i] = rhs[i]; } fp = fopen("AZ_no_multilevel.dat","r"); scaling = AZ_scaling_create(); start_time = AZ_second(); if (fp != NULL) { fclose(fp); options[AZ_precond] = AZ_none; options[AZ_scaling] = AZ_sym_diag; options[AZ_ignore_scaling] = AZ_TRUE; options[AZ_keep_info] = 1; AZ_iterate(xxx, rhs, options, params, status, proc_config, Amat, NULL, scaling); /* options[AZ_pre_calc] = AZ_reuse; options[AZ_conv] = AZ_expected_values; if (proc_config[AZ_node] == 0) printf("\n-------- Second solve with improved convergence test -----\n"); AZ_iterate(xxx, rhs, options, params, status, proc_config, Amat, NULL, scaling); if (proc_config[AZ_node] == 0) printf("\n-------- Third solve with improved convergence test -----\n"); AZ_iterate(xxx, rhs, options, params, status, proc_config, Amat, NULL, scaling); */ } else { options[AZ_keep_info] = 1; AZ_iterate(xxx, rhs, options, params, status, proc_config, Amat, Pmat, scaling); options[AZ_pre_calc] = AZ_reuse; options[AZ_conv] = AZ_expected_values; /* if (proc_config[AZ_node] == 0) printf("\n-------- Second solve with improved convergence test -----\n"); AZ_iterate(xxx, rhs, options, params, status, proc_config, Amat, Pmat, scaling); if (proc_config[AZ_node] == 0) printf("\n-------- Third solve with improved convergence test -----\n"); AZ_iterate(xxx, rhs, options, params, status, proc_config, Amat, Pmat, scaling); */ } solve_time = AZ_second() - start_time; if (proc_config[AZ_node] == 0) printf("Solve time = %e, MG Setup time = %e\n", solve_time, setup_time); if (proc_config[AZ_node] == 0) printf("Printing out a few entries of the solution ...\n"); for (j=0;j<Amat->data_org[AZ_N_internal]+ Amat->data_org[AZ_N_border];j++) if (update[j] == 7) {printf("solution(gid = %d) = %10.4e\n", update[j],xxx[update_index[j]]); fflush(stdout);} j = AZ_gsum_int(7, proc_config); /* sync processors */ for (j=0;j<Amat->data_org[AZ_N_internal]+ Amat->data_org[AZ_N_border];j++) if (update[j] == 23) {printf("solution(gid = %d) = %10.4e\n", update[j],xxx[update_index[j]]); fflush(stdout);} j = AZ_gsum_int(7, proc_config); /* sync processors */ for (j=0;j<Amat->data_org[AZ_N_internal]+ Amat->data_org[AZ_N_border];j++) if (update[j] == 47) {printf("solution(gid = %d) = %10.4e\n", update[j],xxx[update_index[j]]); fflush(stdout);} j = AZ_gsum_int(7, proc_config); /* sync processors */ for (j=0;j<Amat->data_org[AZ_N_internal]+ Amat->data_org[AZ_N_border];j++) if (update[j] == 101) {printf("solution(gid = %d) = %10.4e\n", update[j],xxx[update_index[j]]); fflush(stdout);} j = AZ_gsum_int(7, proc_config); /* sync processors */ for (j=0;j<Amat->data_org[AZ_N_internal]+ Amat->data_org[AZ_N_border];j++) if (update[j] == 171) {printf("solution(gid = %d) = %10.4e\n", update[j],xxx[update_index[j]]); fflush(stdout);} ML_Aggregate_Destroy(&ag); ML_Destroy(&ml); AZ_free((void *) Amat->data_org); AZ_free((void *) Amat->val); AZ_free((void *) Amat->bindx); AZ_free((void *) update); AZ_free((void *) external); AZ_free((void *) extern_index); AZ_free((void *) update_index); AZ_scaling_destroy(&scaling); if (Amat != NULL) AZ_matrix_destroy(&Amat); if (Pmat != NULL) AZ_precond_destroy(&Pmat); free(xxx); free(rhs); #ifdef ML_MPI MPI_Finalize(); #endif return 0; }
void AZ_factor_subdomain(struct context *context, int N, int N_nz, int *nz_used) { /**************************************************************************** Given an overlapped subdomain matrix, factor it according to the chosen algorithm and store the result back in subdomain. Additionally, store the number of nonzeros used in the factorization in nz_used. Notes: 1) Matrix comes in as an MSR matrix. 2) context contains several fields which need to be appropriately set. These fields are specific to the individual solvers. 3) The factorization overwrites the matrix. However, different solvers will store the factorization in different formats. Author: Ray Tuminaro, SNL, 9222 (3/98) Return code: void ============ Parameter list: =============== context On input, context contains the matrix to be factored in context.A_overlapped (MSR format), On output, context contains the factored matrix which is stored in a format specific to the solver and any additional parameters required by the backsolver. N On input, the size of the linear system to be solved. N_nz On input, the number of nonzero values in the matrix to be factored. nz_used On output, the number of nonzero values in the matrix representing the factorization. *******************************************************************************/ #ifdef HAVE_AZLU int ifail, N_nz_matrix, *rnr; double *fake_rhs, *aflag; #endif int i, j, *bindx, *bpntr, *iw; double *cr, *unorm, *a, *val; int *ind, *jnz, *ja, ifill; double dtemp = (context->aztec_choices->params)[AZ_omega]; int N_blk_rows, name = context->A_overlapped->data_org[AZ_name]; char str[80]; /* Begin Aztec 2.1 mheroux mod */ #ifdef IFPACK void *precon, *bmat; double rthresh, athresh; int N_int_blk, N_bord_blk, graph_fill; #endif /* End Aztec 2.1 mheroux mod */ bindx = context->A_overlapped->bindx; *nz_used = bindx[N]; switch(context->aztec_choices->options[AZ_subdomain_solve]) { /* Begin Aztec 2.1 mheroux mod */ case AZ_bilu_ifp: #ifdef IFPACK if (N == 0) return; bindx = context->A_overlapped->bindx; val = context->A_overlapped->val; /* for bilu(k) with k > 1 , figure out the new sparsity pattern */ AZ_sort_msr(bindx, val, N); /* Let IFPACK handle fillin */ graph_fill = (context->aztec_choices->options)[AZ_graph_fill]; (context->aztec_choices->options)[AZ_graph_fill] = 0; /* recover some space so that there will */ /* be enough room to convert back to vbr */ i = AZ_compress_msr(&(context->A_overlapped->bindx), &(context->A_overlapped->val), context->N_nz_allocated, *nz_used, name, context); context->N_nz = *nz_used; context->N_nz_allocated = *nz_used; AZ_msr2vbr_mem_efficient(N, &(context->A_overlapped->bindx), &(context->A_overlapped->val), &(context->A_overlapped->cpntr), &(context->A_overlapped->bpntr), &(context->A_overlapped->indx), &N_blk_rows, (context->A_overlapped->data_org)[AZ_name], context->tag,i); context->A_overlapped->matrix_type = AZ_VBR_MATRIX; /*ifp_initialize();*/ /* Create IFPACK encapsulation of Amat */ context->A_overlapped->rpntr = context->A_overlapped->cpntr; N_int_blk = context->A_overlapped->data_org[AZ_N_int_blk]; N_bord_blk = context->A_overlapped->data_org[AZ_N_bord_blk]; context->A_overlapped->data_org[AZ_N_int_blk] = N_blk_rows; context->A_overlapped->data_org[AZ_N_bord_blk] = 0; (context->aztec_choices->options)[AZ_graph_fill] = graph_fill; az2ifp_blockmatrix(&bmat, context->A_overlapped); context->A_overlapped->data_org[AZ_N_int_blk] = N_int_blk; context->A_overlapped->data_org[AZ_N_bord_blk] = N_bord_blk; rthresh = (context->aztec_choices->params)[AZ_rthresh]; athresh = (context->aztec_choices->params)[AZ_athresh]; ifill = (context->aztec_choices->options)[AZ_graph_fill]; ifp_preconditioner(&precon, bmat, IFP_BILUK, (double) ifill, 0.0, IFP_SVD, rthresh, athresh); if ((context->aztec_choices->options)[AZ_output]>0) { ifp_biluk_stats(precon); } context->precon = precon; break; /* End Aztec 2.1 mheroux mod */ #else AZ_perror("IFPACK not linked. Must compile with -DIFPACK"); #endif case AZ_bilu: if (N == 0) return; bindx = context->A_overlapped->bindx; val = context->A_overlapped->val; /* for bilu(k) with k > 1 , figure out the new sparsity pattern */ AZ_sort_msr(bindx, val, N); ifill = (context->aztec_choices->options)[AZ_graph_fill]; if (ifill > 0) { *nz_used = AZ_fill_sparsity_pattern(context, ifill, bindx, val, N); } /* recover some space so that there will */ /* be enough room to convert back to vbr */ i = AZ_compress_msr(&(context->A_overlapped->bindx), &(context->A_overlapped->val), context->N_nz_allocated, *nz_used, name, context); context->N_nz = *nz_used; context->N_nz_allocated = *nz_used; AZ_msr2vbr_mem_efficient(N, &(context->A_overlapped->bindx), &(context->A_overlapped->val), &(context->A_overlapped->cpntr), &(context->A_overlapped->bpntr), &(context->A_overlapped->indx), &N_blk_rows, (context->A_overlapped->data_org)[AZ_name], context->tag,i); context->A_overlapped->matrix_type = AZ_VBR_MATRIX; bindx = context->A_overlapped->bindx; bpntr = context->A_overlapped->bpntr; val = context->A_overlapped->val; sprintf(str,"ipvt %s",context->tag); context->ipvt = (int *) AZ_manage_memory((N+1)*sizeof(int), AZ_ALLOC, name, str, &i); sprintf(str,"dblock %s",context->tag); context->dblock= (int *) AZ_manage_memory((N_blk_rows+1)* sizeof(int), AZ_ALLOC, name, str, &i); context->N_blk_rows = N_blk_rows; /* set dblock to point to the diagonal block in each block row */ for (i = 0 ; i < N_blk_rows ; i++ ) { for (j = bpntr[i] ; j < bpntr[i+1] ; j++ ) { if (bindx[j] == i) context->dblock[i] = j; } } AZ_fact_bilu(N_blk_rows, context->A_overlapped, context->dblock, context->ipvt); break; case AZ_ilut: cr = (double *) AZ_allocate((2*N+3+context->max_row)*sizeof(int)+ (2*N+2+context->max_row)*sizeof(double)); if (cr == NULL) AZ_perror("Out of space in ilut.\n"); unorm = &(cr[N+2]); a = &(unorm[N]); ind = (int *) &(a[context->max_row]); jnz = &(ind[N+3]); ja = &(jnz[N]); sprintf(str,"iu %s",context->tag); context->iu = (int *) AZ_manage_memory((N+1)*sizeof(int), AZ_ALLOC, name, str, &i); AZ_fact_ilut(&N, context->A_overlapped, a, ja, (context->aztec_choices->params)[AZ_drop], context->extra_fact_nz_per_row, N_nz - bindx[N], context->iu,cr,unorm,ind, nz_used, jnz, (context->aztec_choices->params)[AZ_rthresh], (context->aztec_choices->params)[AZ_athresh]); AZ_free(cr); break; case AZ_ilu: dtemp = 0.0; case AZ_rilu: if (N == 0) return; sprintf(str,"iu %s",context->tag); bindx = context->A_overlapped->bindx; val = context->A_overlapped->val; /* for ilu(k) with k > 1 , figure out the new sparsity pattern */ AZ_sort_msr(bindx, val, N); ifill = (context->aztec_choices->options)[AZ_graph_fill]; if (ifill > 0) { *nz_used = AZ_fill_sparsity_pattern(context, ifill, bindx, val, N); } context->iu= (int *) AZ_manage_memory((N+1)*sizeof(int),AZ_ALLOC, name, str, &i); iw = (int *) AZ_allocate((N+1)*sizeof(int)); if (iw == NULL) AZ_perror("Out of space in ilu.\n"); AZ_fact_rilu(N, nz_used, context->iu, iw, context->A_overlapped, dtemp, (context->aztec_choices->params)[AZ_rthresh], (context->aztec_choices->params)[AZ_athresh]); AZ_free(iw); break; case AZ_icc: sprintf(str,"iu %s",context->tag); bindx = context->A_overlapped->bindx; val = context->A_overlapped->val; /* for ilu(k) with k > 1 , figure out the new sparsity pattern */ AZ_sort_msr(bindx, val, N); ifill = (context->aztec_choices->options)[AZ_graph_fill]; if (ifill > 0) *nz_used = AZ_fill_sparsity_pattern(context, ifill, bindx, val, N); AZ_fact_chol(context->A_overlapped->bindx, context->A_overlapped->val,N, (context->aztec_choices->params)[AZ_rthresh], (context->aztec_choices->params)[AZ_athresh]); break; case AZ_lu: #ifdef HAVE_AZLU if (N == 0) return; aflag = (double *) AZ_allocate(8*sizeof(double)); rnr = (int *) AZ_allocate(N_nz*sizeof(int)); if (rnr == NULL) AZ_perror("Out of space in lu.\n"); sprintf(str,"iflag %s",context->tag); context->iflag = (int *) AZ_manage_memory(10*sizeof(int), AZ_ALLOC, name, str ,&i); sprintf(str,"ha %s",context->tag); context->ha = (int *) AZ_manage_memory(11*(N+1)*sizeof(int), AZ_ALLOC, name, str, &i); sprintf(str,"pivot %s",context->tag); context->pivot = (double *) AZ_manage_memory((N+1)*sizeof(double), AZ_ALLOC, name, str,&i); aflag[0] = 16.0; aflag[2] = 1.0e8; aflag[3] = 1.0e-12; aflag[1] = (context->aztec_choices->params)[AZ_drop]; /* set up flags for the sparse factorization solver */ context->iflag[0] = 1; context->iflag[1] = 2; context->iflag[2] = 1; context->iflag[3] = 0; context->iflag[4] = 2; /* Note: if matrix is pos def, iflag[2] = 2 is cheaper */ N_nz_matrix = bindx[N] - 1; AZ_msr2lu(N, context->A_overlapped, rnr); /* Mark bindx so we can see what was not used later */ for (i = N_nz_matrix ; i < N_nz ; i++) bindx[i] = -7; /* factor the matrix */ if (N == 1) { context->A_overlapped->val[0]=1./context->A_overlapped->val[0]; } else { context->N_nz_factors = N_nz; fake_rhs = (double *) AZ_allocate(N*sizeof(double)); if (fake_rhs == NULL) { AZ_printf_out("Not enough memory inside subdomain_solve\n"); } for (i = 0 ; i < N ; i++ ) fake_rhs[i] = 0.0; AZ_fact_lu(fake_rhs, context->A_overlapped,aflag, context->pivot, rnr, context->ha, context->iflag, &N_nz_matrix, &ifail, &(context->N_nz_factors), &N, &N); (context->iflag)[4] = 3; AZ_free(fake_rhs); /* find out what was not used by checking what was not touched */ *nz_used = N_nz; for (i = N_nz_matrix; i < N_nz ; i++ ) { if (bindx[i] != -7) *nz_used = i; } (*nz_used)++; context->N_nz_factors = *nz_used; } AZ_free(rnr); AZ_free(aflag); #else AZ_printf_err("AZ_lu unavailable: configure with --enable-aztecoo-azlu to make available\n"); exit(1); #endif break; default: if (context->aztec_choices->options[AZ_subdomain_solve] >= AZ_SOLVER_PARAMS) { AZ_printf_err("Unknown subdomain solver(%d)\n", context->aztec_choices->options[AZ_subdomain_solve]); exit(1); } } }
void AZ_find_MSR_ordering(int bindx2[],int **ordering,int N, int **inv_ordering, int name, struct context *context) /******************************************************************************* Use a reverse cuthill McKee algorithm to find an ordering for the matrix. Author: R. Tuminaro Return code: void ============ Parameter list: =============== bindx2: On input, the nonzero sparsity pattern of the matrix for which we will determine a new ordering. Note: bindx2 is changed in this routine, but then returned to its original values before exiting. ordering: On output, ordering[i] gives the new location of row i in the reordered system. inv_ordering: On output, inv_ordering[i] gives the location of row */ { int i; int *mask; int root, nlvl, ccsize; int total = 0; char str[80]; /* convert matrix to Fortran format */ if (N==0) return; for (i = N+1 ; i < bindx2[N]; i++ ) bindx2[i]++; for (i = 0 ; i <= N ; i++ ) bindx2[i] -= N; /* initialize arrays for fnroot() and rcm() */ sprintf(str,"inv_ordering %s",context->tag); *inv_ordering = (int *) AZ_manage_memory((N+1)*sizeof(int), AZ_ALLOC, name, str,&i); sprintf(str,"ordering %s",context->tag); *ordering = (int *) AZ_manage_memory((N+1)*sizeof(int), AZ_ALLOC, name, str,&i); mask = (int *) AZ_allocate((N+1)*sizeof(int)); if (mask == NULL) { AZ_printf_out("Not enough space for RCM reordering\n"); AZ_exit(1); } for (i = 0 ; i < N ; i++ ) mask[i] = 1; root = 1; while (total != N ) { AZ_FNROOT_F77(&root,bindx2,&(bindx2[N+1]),mask, &nlvl, &((*ordering)[total]), *inv_ordering); AZ_RCM_F77(&root,bindx2,&(bindx2[N+1]),mask,&((*ordering)[total]), &ccsize, *inv_ordering); if ( ccsize != N) { for (i = 0 ; i < ccsize ; i++) mask[(*ordering)[total+i]-1] = 0; for (i = 0 ; i < N ; i++ ) { if ( mask[i] == 1) break; } root = i+1; } total += ccsize; if (ccsize == 0) { AZ_printf_out("Error inside reordering\n"); AZ_exit(1); } } /* convert matrix back to C format */ for (i = 0 ; i <= N ; i++ ) bindx2[i] += N; for (i = N+1 ; i < bindx2[N]; i++ ) bindx2[i]--; /* convert ordering to C format */ for (i = 0 ; i < N ; i++ ) (*ordering)[i]--; /* produce the inverse order */ for (i = 0 ; i < N ; i++) (*inv_ordering)[(*ordering)[i]] = i; AZ_free(mask); }
void init_guess_and_rhs(int update_index[], int update[], double *x[],double *ax[],int data_org[], double val[], int indx[], int bindx[], int rpntr[], int cpntr[], int bpntr[], int proc_config[]) /* * Set the initial guess and the right hand side where the right hand side * is obtained by doing a matrix-vector multiplication. * * Author: Ray Tuminaro, Div 1422, SNL * Date : 3/15/95 * * Parameters * * update_index == On input, ordering of update and external * locally on this processor. For example * 'update_index[i]' gives the index location * of the block which has the global index * 'update[i]'. * update == On input, list of pts to be updated on this node * data_org == On input, indicates how data is set on this node. * For example, data_org[] contains information on * how many unknowns are internal, external and * border unknowns as well as which points need * to be communicated. See User's Guide for more * details. * val, indx, == On input, holds matrix nonzeros. See User's Guide * bindx, rpntr, for more details. * cpntr, bpntr * x == On output, 'x' is allocated and set to all zeros. * ax == On output, 'ax' is allocated and is set to the * result of a matrix-vector product. */ { int i,j; int temp,num; double sum = 0.0; AZ_MATRIX *Amat; temp = data_org[AZ_N_int_blk] + data_org[AZ_N_bord_blk]; num = data_org[AZ_N_internal] + data_org[AZ_N_border]; /* allocate vectors */ i = num + data_org[AZ_N_external]; *x = (double *) AZ_allocate((i+1)*sizeof(double)); *ax = (double *) AZ_allocate((i+1)*sizeof(double)); if (*ax == NULL) { (void) fprintf(stderr, "Not enough space in init_guess_and_rhs() for ax\n"); exit(1); } for (j = 0 ; j < i ; j++ ) (*x)[j] = 0.0; for (j = 0 ; j < i ; j++ ) (*ax)[j] = 0.0; /* initialize 'x' to a function which will be used in matrix-vector product */ if (data_org[AZ_matrix_type] == AZ_VBR_MATRIX) { for (i = 0; i < temp; i++) { for (j = rpntr[i]; j < rpntr[i+1]; j++) { (*x)[j] = (double) (update[i]) + (double)(j-rpntr[i]) / (double)(num_PDE_eqns); } } } else { for (i = 0; i < temp; i++) { (*x)[i] = (double) (update[i]) / (double) (num_PDE_eqns); } } /* Reorder 'x' so that it conforms to the transformed matrix */ AZ_reorder_vec(*x,data_org,update_index,rpntr); if (application == 2) { /* take out the constant vector. Used for the */ /* finite element problem because it is singular */ sum = AZ_gsum_double(sum, proc_config); i = AZ_gsum_int(num, proc_config); if (i != 0) sum = sum / ((double) i); for (i = 0; i < num; i++) (*x)[i] -= sum; } Amat = AZ_matrix_create(num); if (data_org[AZ_matrix_type] == AZ_MSR_MATRIX) AZ_set_MSR(Amat, bindx, val, data_org, 0, NULL, AZ_LOCAL); else if (data_org[AZ_matrix_type] == AZ_VBR_MATRIX) AZ_set_VBR(Amat, rpntr,cpntr, bpntr, indx,bindx, val, data_org, 0, NULL,AZ_LOCAL); Amat->matvec(*x, *ax, Amat, proc_config); AZ_matrix_destroy( &Amat ); for (i = 0; i < num; i++) (*x)[i] = 0.0; } /* init_guess_and_rhs */
void AZ_pcg_f(double b[], double x[], double weight[], int options[], double params[], int proc_config[],double status[], AZ_MATRIX *Amat, AZ_PRECOND *precond, struct AZ_CONVERGE_STRUCT *convergence_info) /******************************************************************************* Conjugate Gradient algorithm to solve the symmetric matrix problem Ax = b. Author: John N. Shadid, SNL, 1421 ======= Return code: void ============ Parameter list: =============== b: Right hand side of linear system. x: On input, contains the initial guess. On output contains the solution to the linear system. weight: Vector of weights for convergence norm #4. options: Determines specific solution method and other parameters. params: Drop tolerance and convergence tolerance info. proc_config: Machine configuration. proc_config[AZ_node] is the node number. proc_config[AZ_N_procs] is the number of processors. status: On output, indicates termination status: 0: terminated normally. -1: maximum number of iterations taken without achieving convergence. -2: Breakdown. The algorithm can not proceed due to numerical difficulties (usually a divide by zero). -3: Internal residual differs from the computed residual due to a significant loss of precision. Amat: Structure used to represent the matrix (see file az_aztec.h and Aztec User's Guide). precond: Structure used to represent the preconditioner (see file az_aztec.h and Aztec User's Guide). *******************************************************************************/ { /* local variables */ register int i; int N, NN, one = 1, iter = 1, r_avail = AZ_TRUE, j; int precond_flag, print_freq, proc, brkdown_will_occur = AZ_FALSE; double alpha, beta = 0.0, nalpha, true_scaled_r=-1.0; double *r, *z, *p, *ap, actual_residual = -1.0; double r_z_dot, r_z_dot_old, p_ap_dot, rec_residual=-1.0; double scaled_r_norm=-1.0, brkdown_tol = DBL_EPSILON; int *data_org, str_leng, first_time = AZ_TRUE; char label[64],suffix[32], prefix[64]; double **saveme, *ptap; int *kvec_sizes = NULL, current_kept = 0; double *dots; double doubleone = 1., dzero = 0.; char *T = "T"; char *T2 = "N"; double *block; /**************************** execution begins ******************************/ sprintf(suffix," in cg%d",options[AZ_recursion_level]); /* set string that will be used */ /* for manage_memory label */ /* set prefix for printing */ str_leng = 0; for (i = 0; i < 16; i++) prefix[str_leng++] = ' '; for (i = 0 ; i < options[AZ_recursion_level]; i++ ) { prefix[str_leng++] = ' '; prefix[str_leng++] = ' '; prefix[str_leng++] = ' '; prefix[str_leng++] = ' '; prefix[str_leng++] = ' '; } prefix[str_leng] = '\0'; /* pull needed values out of parameter arrays */ data_org = Amat->data_org; N = data_org[AZ_N_internal] + data_org[AZ_N_border]; precond_flag = options[AZ_precond]; proc = proc_config[AZ_node]; print_freq = options[AZ_print_freq]; /* Initialize some values in convergence info struct */ convergence_info->print_info = print_freq; convergence_info->iteration = 0; convergence_info->sol_updated = 1; /* CG always updates solution */ convergence_info->epsilon = params[AZ_tol]; /* Test against this */ /* allocate space for necessary vectors */ NN = N + data_org[AZ_N_external]; if (NN == 0) NN++; /* make sure everybody allocates something */ NN = NN + (NN%2); /* make sure things are aligned for assembly */ /* matvec on paragon. */ sprintf(label,"z%s",suffix); p = (double *) AZ_manage_memory(4*NN*sizeof(double),AZ_ALLOC, AZ_SYS+az_iterate_id, label, &j); r = &(p[1*NN]); z = &(p[2*NN]); ap = &(p[3*NN]); AZ_compute_residual(b, x, r, proc_config, Amat); if (options[AZ_apply_kvecs]) { AZ_compute_global_scalars(Amat, x, b, r, weight, &rec_residual, &scaled_r_norm, options, data_org, proc_config, &r_avail,NULL, NULL, &r_z_dot, convergence_info); AZ_space_for_kvecs(AZ_OLD_ADDRESS, &kvec_sizes, &saveme, &ptap, options, data_org, suffix, proc_config[AZ_node], &block); dots = (double *) AZ_allocate(2*kvec_sizes[AZ_Nkept]*sizeof(double)); if (dots == NULL) { printf("Not space to apply vectors in CG\n"); exit(1); } DGEMV_F77(CHAR_MACRO(T[0]),&N,&(kvec_sizes[AZ_Nkept]),&doubleone,block,&N, r, &one, &dzero, dots, &one); AZ_gdot_vec(kvec_sizes[AZ_Nkept], dots, &(dots[kvec_sizes[AZ_Nkept]]), proc_config); for (i = 0; i < kvec_sizes[AZ_Nkept]; i++) dots[i] = dots[i]/ptap[i]; DGEMV_F77(CHAR_MACRO(T2[0]), &N, &(kvec_sizes[AZ_Nkept]), &doubleone, block, &N, dots, &one, &doubleone, x, &one); AZ_free(dots); AZ_compute_residual(b, x, r, proc_config, Amat); if ((options[AZ_output] != AZ_none) && (proc == 0)) printf("\t\tApplied Previous Krylov Vectors ... \n\n"); } if (options[AZ_keep_kvecs] > 0) AZ_space_for_kvecs(AZ_NEW_ADDRESS, &kvec_sizes, &saveme, &ptap, options, data_org, suffix, proc_config[AZ_node], &block); /* z = M r */ /* p = 0 */ DCOPY_F77(&N, r, &one, z, &one); status[AZ_first_precond] = AZ_second(); if (precond_flag) precond->prec_function(z,options,proc_config,params,Amat,precond); status[AZ_first_precond] = AZ_second() - status[AZ_first_precond]; for (i = 0; i < N; i++ ) p[i] = 0.0; /* compute a few global scalars: */ /* 1) ||r|| corresponding to options[AZ_conv] */ /* 2) scaled ||r|| corresponding to options[AZ_conv] */ /* 3) r_z_dot = <z, r> */ AZ_compute_global_scalars(Amat, x, b, r, weight, &rec_residual, &scaled_r_norm, options, data_org, proc_config, &r_avail,r, z, &r_z_dot, convergence_info); true_scaled_r = scaled_r_norm; if ((options[AZ_output] != AZ_none) && (options[AZ_output] != AZ_last) && (options[AZ_output] != AZ_warnings) && (options[AZ_output] != AZ_summary) && (options[AZ_conv]!=AZTECOO_conv_test) && (proc == 0)) { (void) AZ_printf_out("%siter: 0 residual = %e\n", prefix,scaled_r_norm); AZ_flush_out(); } for (iter = 1; iter <= options[AZ_max_iter] && !(convergence_info->converged) && !(convergence_info->isnan); iter++ ) { convergence_info->iteration = iter; /* p = z + beta * p */ /* ap = A p */ for (i = 0; i < N; i++) p[i] = z[i] + beta * p[i]; Amat->matvec(p, ap, Amat, proc_config); if ((options[AZ_orth_kvecs]) && (kvec_sizes != NULL)) { for (i = 0; i < current_kept; i++) { alpha = -AZ_gdot(N, ap, saveme[i], proc_config)/ptap[i]; DAXPY_F77(&N, &alpha, saveme[i], &one, p, &one); } if (current_kept > 0) Amat->matvec(p, ap, Amat, proc_config); } p_ap_dot = AZ_gdot(N, p, ap, proc_config); if (p_ap_dot < brkdown_tol) { /* possible problem */ if (p_ap_dot < 0 || AZ_breakdown_f(N, p, ap, p_ap_dot, proc_config)) { /* something wrong */ AZ_scale_true_residual(x, b, ap, weight, &actual_residual, &true_scaled_r, options, data_org, proc_config, Amat, convergence_info); AZ_terminate_status_print(AZ_breakdown, iter, status, rec_residual, params, true_scaled_r, actual_residual, options, proc_config); return; } else brkdown_tol = 0.1 * p_ap_dot; } alpha = r_z_dot / p_ap_dot; nalpha = -alpha; /* x = x + alpha*p */ /* r = r - alpha*Ap */ /* z = M^-1 r */ DAXPY_F77(&N, &alpha, p, &one, x, &one); if (iter <= options[AZ_keep_kvecs]) { DCOPY_F77(&N, p, &one, saveme[iter-1], &one); ptap[iter-1] = p_ap_dot ; kvec_sizes[AZ_Nkept]++; current_kept = kvec_sizes[AZ_Nkept]; } /* else { i = (iter-1)%options[AZ_keep_kvecs]; DCOPY_F77(&N, p, &one, saveme[i], &one); ptap[i] = p_ap_dot ; } */ DAXPY_F77(&N, &nalpha, ap, &one, r, &one); DCOPY_F77(&N, r, &one, z, &one); if (precond_flag) precond->prec_function(z,options,proc_config,params,Amat,precond); r_z_dot_old = r_z_dot; /* compute a few global scalars: */ /* 1) ||r|| corresponding to options[AZ_conv] */ /* 2) scaled ||r|| corresponding to options[AZ_conv] */ /* 3) r_z_dot = <z, r> */ AZ_compute_global_scalars(Amat, x, b, r, weight, &rec_residual, &scaled_r_norm, options, data_org, proc_config, &r_avail, r, z, &r_z_dot, convergence_info); if (brkdown_will_occur) { AZ_scale_true_residual( x, b, ap, weight, &actual_residual, &true_scaled_r, options, data_org, proc_config, Amat,convergence_info); AZ_terminate_status_print(AZ_breakdown, iter, status, rec_residual, params, true_scaled_r, actual_residual, options, proc_config); return; } beta = r_z_dot / r_z_dot_old; if (fabs(r_z_dot) < brkdown_tol) { /* possible problem */ if (AZ_breakdown_f(N, r, z, r_z_dot, proc_config)) brkdown_will_occur = AZ_TRUE; else brkdown_tol = 0.1 * fabs(r_z_dot); } if ( (iter%print_freq == 0) && (options[AZ_conv]!=AZTECOO_conv_test) && proc == 0 ) { (void) AZ_printf_out("%siter: %4d residual = %e\n", prefix, iter, scaled_r_norm); AZ_flush_out(); } /* convergence tests */ if (options[AZ_check_update_size] & convergence_info->converged) convergence_info->converged = AZ_compare_update_vs_soln(N, -1.,alpha, p, x, params[AZ_update_reduction], options[AZ_output], proc_config, &first_time); if (convergence_info->converged) { AZ_scale_true_residual(x, b, ap, weight, &actual_residual, &true_scaled_r, options, data_org, proc_config, Amat, convergence_info); /* * Note: epsilon and params[AZ_tol] may not be equal due to a previous * call to AZ_get_new_eps(). */ if (!(convergence_info->converged) && options[AZ_conv]!=AZTECOO_conv_test) { if (AZ_get_new_eps(&(convergence_info->epsilon), scaled_r_norm, true_scaled_r, options, proc_config) == AZ_QUIT) { /* * Computed residual has converged, actual residual has not converged, * AZ_get_new_eps() has decided that it is time to quit. */ AZ_terminate_status_print(AZ_loss, iter, status, rec_residual, params, true_scaled_r, actual_residual, options, proc_config); return; } } } } iter--; if ( (iter%print_freq != 0) && (proc == 0) && (options[AZ_output] != AZ_none) && (options[AZ_output] != AZ_warnings) && (options[AZ_conv]!=AZTECOO_conv_test) ) { (void) AZ_printf_out("%siter: %4d residual = %e\n", prefix, iter, scaled_r_norm); AZ_flush_out(); } /* check if we exceeded maximum number of iterations */ if (convergence_info->converged) { i = AZ_normal; scaled_r_norm = true_scaled_r; } else if (convergence_info->isnan) i = AZ_breakdown; else i = AZ_maxits; AZ_terminate_status_print(i, iter, status, rec_residual, params, scaled_r_norm, actual_residual, options, proc_config); } /* AZ_pcg */
//============================================================================== AztecDVBR_Matrix::AztecDVBR_Matrix(const AztecDVBR_Matrix& src) : amap_(src.amap_), Amat_(NULL), N_update_(src.N_update_), external_(NULL), extern_index_(NULL), update_index_(NULL), data_org_(NULL), orderingUpdate_(NULL), isLoaded_(src.isLoaded_), isAllocated_(src.isAllocated_), localNNZ_(src.localNNZ_), nnzPerRow_(NULL), numRemoteBlocks_(0), remoteInds_(NULL), remoteBlockSizes_(NULL) { // //This copy constructor just takes a reference to src's amap_ (above), but //'deep'-copies everything else. i.e., all arrays are allocated here and copied //from src, etc. // //When this constructor completes, this matrix should be in the same state as //the 'src' matrix. i.e., if src is already allocated, then this matrix will //have its structure allocated. If src is already loaded (AZ_transform'd) then //this matrix will have all arrays resulting from AZ_transform allocated too. //The only thing this matrix won't get is the coefficient data from src. // nnzPerRow_ = new int[N_update_]; int i; for(i=0; i<N_update_; i++) { nnzPerRow_[i] = src.nnzPerRow_[i]; } Amat_ = AZ_matrix_create(N_update_); Amat_->matrix_type = AZ_VBR_MATRIX; Amat_->matvec = (void(*)(double*,double*,AZ_MATRIX_STRUCT*,int*))AZ_VBR_matvec_mult; Amat_->rpntr = NULL; calcRpntr(); if (isAllocated_) { Amat_->bpntr = new int[N_update_+1]; for(i=0; i<N_update_+1; i++) Amat_->bpntr[i] = src.Amat_->bpntr[i]; int totalNNZBlks = Amat_->bpntr[N_update_]; Amat_->bindx = new int[totalNNZBlks]; for(i=0; i<totalNNZBlks; i++) Amat_->bindx[i] = src.Amat_->bindx[i]; Amat_->indx = new int[totalNNZBlks+1]; for(i=0; i<totalNNZBlks+1; i++) Amat_->indx[i] = src.Amat_->indx[i]; Amat_->val = new double[localNNZ_]; for(i=0; i<localNNZ_; i++) Amat_->val[i] = 0.0; } if (isLoaded_) { int dataOrgLength = src.data_org_[AZ_total_send] + AZ_send_list; data_org_ = (int*) AZ_allocate(dataOrgLength * sizeof(int)); for(i=0; i<dataOrgLength; i++) data_org_[i] = src.data_org_[i]; Amat_->data_org = data_org_; int extLength = src.data_org_[AZ_N_ext_blk]; external_ = (int*) AZ_allocate(extLength * sizeof(int)); extern_index_ = (int*) AZ_allocate(extLength * sizeof(int)); for(i=0; i<extLength; i++) { external_[i] = src.external_[i]; extern_index_[i] = src.extern_index_[i]; } update_index_ = (int*) AZ_allocate(N_update_ * sizeof(int)); orderingUpdate_ = new int[N_update_]; for(i=0; i<N_update_; i++) { update_index_[i] = src.update_index_[i]; orderingUpdate_[i] = src.orderingUpdate_[i]; } int cpntrLength = N_update_ + src.data_org_[AZ_N_ext_blk] + 1; Amat_->cpntr = (int*) AZ_allocate(cpntrLength * sizeof(int)); for(i=0; i<cpntrLength; i++) Amat_->cpntr[i] = src.Amat_->cpntr[i]; } }
double AZK_residual_norm_no_copy(double *xr, double *xi, double *br, double *bi, int *options, double *params, int *proc_config, AZ_MATRIX *Amat_real, AZ_MATRIX *Amat_imag) /******************************************************************************* Author: Mike Heroux, SNL, 9222 ======= Return code: double ============ Parameter list: =============== xr,xi: On input, contains the initial guess, real part in xr and imaginary part in xi. br,bi: Right hand side of linear system. options: Determines specific solution method and other parameters. params: Drop tolerance and convergence tolerance info. proc_config: Machine configuration. proc_config[AZ_node] is the node number. proc_config[AZ_N_procs] is the number of processors. Amat_real, Amat_imag: The real and imaginary parts of the complex operator, each stored separately as AZ_MATRIX structures. Overview ======== AZK_residual_norm_no_copy computes the two norm of the residual ||r|| where r = b - A*x. Specifically, writing in terms of real and imaginary parts, we have (rr + i*ri) = (br + i*bi) - (Ar + i*Ai)*(xr + i*xi). The two-norm of the complex vector r is identical to the two-norm of the twice-length real vector formed by concatenating rr = real(r) and ri = imag(r). *******************************************************************************/ { AZ_MATRIX *Amat; /* Structure representing matrix to be solved. */ double *x, *b; /* Solution and right-hand side to linear system. */ int N_equations, i; double *y_tmp, result; /* Transform complex system into komplex system */ AZK_create_linsys_no_copy (xr, xi, br, bi, options, params, proc_config, Amat_real, Amat_imag, &x, &b, &Amat); /* Allocate temp vector y */ N_equations = Amat->data_org[AZ_N_internal] + Amat->data_org[AZ_N_border]; y_tmp = (double *) AZ_allocate(N_equations*sizeof(double)); if (y_tmp == NULL) AZ_perror("AZK_residual_norm_no_copy: Out of memory."); /* Compute y = A*x. */ Amat->matvec(x, y_tmp, Amat, proc_config); /* Compute r = b - A*x (put in y_tmp) */ /*daxpy_(&N_equations, &neg_one, b, &ione, y_tmp, &ione);*/ for (i=0; i<N_equations; i++) y_tmp[i] = y_tmp[i] - b[i]; /* Use Aztec function to compute norm */ result = AZ_gvector_norm(N_equations, 2, y_tmp, proc_config); /* Free memory space */ AZK_destroy_linsys (options, params, proc_config, &x, &b, &Amat); AZ_free((void *) y_tmp); result = sqrt(result); return(result); /* AZK_residual_norm */ }
double AZK_residual_norm(double *xk, double *bk, int *options, double *params, int *proc_config, AZ_MATRIX *Amat_komplex) /******************************************************************************* Author: Mike Heroux, SNL, 9222 ======= Return code: double ============ Parameter list: =============== xk: On input, contains the initial guess. bk: Right hand side of linear system. options: Determines specific solution method and other parameters. params: Drop tolerance and convergence tolerance info. proc_config: Machine configuration. proc_config[AZ_node] is the node number. proc_config[AZ_N_procs] is the number of processors. Amat_komplex: The komplex operator, stored as an AZ_MATRIX structure. Overview ======== AZK_residual_norm computes the two norm of the residual ||r|| where r = b - A*x. Specifically, writing in terms of real and imaginary parts, we have (rr + i*ri) = (br + i*bi) - (Ar + i*Ai)*(xr + i*xi). The two-norm of the complex vector r is identical to the two-norm of the twice-length real vector formed by concatenating rr = real(r) and ri = imag(r). *******************************************************************************/ { int N_equations, i; double *y_tmp, result; /* Allocate temp vector y */ N_equations = Amat_komplex->data_org[AZ_N_internal] + Amat_komplex->data_org[AZ_N_border]; y_tmp = (double *) AZ_allocate(N_equations*sizeof(double)); if (y_tmp == NULL) AZ_perror("AZK_residual_norm: Out of memory."); /* Compute y = A*x. */ Amat_komplex->matvec(xk, y_tmp, Amat_komplex, proc_config); /* Compute r = b - A*x (put in y_tmp) */ /*daxpy_(&N_equations, &neg_one, bk, &ione, y_tmp, &ione);*/ for (i=0; i<N_equations; i++) y_tmp[i] = y_tmp[i] - bk[i]; /* Use Aztec function to compute norm */ result = AZ_gvector_norm(N_equations, 2, y_tmp, proc_config); /* Free memory space */ AZ_free((void *) y_tmp); result = sqrt(result); return(result); /* AZK_residual_norm */ }
void AZ_ifpack_prec_create(double *x, double *b, int *options, double *params, int *proc_config, AZ_MATRIX *Amat, AZ_PRECOND **Prec) { AZ_IFPACK *Prec_pass_data; void *precon, *bmat ; int nr, nc, *data_org; double rthresh, athresh; Prec_pass_data = (AZ_IFPACK *) AZ_allocate(sizeof(AZ_IFPACK)); az2ifp_blockmatrix(&bmat, Amat); /* Create IFPACK encapsulation of Amat */ /* set the preconditioning structure 'Prec'. */ if (options[AZ_precond] == AZ_none) ifp_preconditioner(&precon, bmat, IFP_NONE, (double) options[AZ_graph_fill], 0.0, IFP_INVERSE, 0.0, 0.0); else if (options[AZ_precond] == AZ_Jacobi) { rthresh = params[AZ_rthresh]; athresh = params[AZ_athresh]; ifp_preconditioner(&precon, bmat, IFP_BJACOBI, 0.0, 0.0, IFP_SVD, rthresh, athresh); /*IFP_INVERSE, 0.0, 0.0); */ } else if (options[AZ_precond] == AZ_dom_decomp && options[AZ_subdomain_solve] == AZ_bilu_ifp) { rthresh = params[AZ_rthresh]; athresh = params[AZ_athresh]; ifp_preconditioner(&precon, bmat, IFP_BILUK, (double) options[AZ_graph_fill], 0.0, IFP_SVD, rthresh, athresh); /*IFP_INVERSE, 0.0, 0.0); */ } else { printf("Not a supported preconditioner in az_ifpack_prec_create\n"); abort(); } (*Prec) = AZ_precond_create(Amat,AZ_ifpack_precon,NULL); /* Store pointers to preconditioner and IFPACK encapsulation of Amat */ Prec_pass_data->precon = precon; Prec_pass_data->bmat = bmat; /* Construct auxiliary vector for use with apply function. NOTE: We are assuming only one RHS at this time !!! */ data_org = Amat->data_org; nr = data_org[AZ_N_internal] + data_org[AZ_N_border]; nc = 1; /*input_vector = (double *) malloc (nr * sizeof(double)); Prec_pass_data.input_vector = input_vector; */ Prec_pass_data->nr = nr; Prec_pass_data->nc = nc; (*Prec)->Pmat = Amat; Prec_pass_data->user_aux_ptr = (*Prec)->Pmat->aux_ptr; /* Save this to be able to restore*/ (*Prec)->Pmat->aux_ptr = (void *) Prec_pass_data; (*Prec)->prec_function = AZ_ifpack_precon; Prec_pass_data->user_precon = options[AZ_precond]; /* Save this to be able to restore*/ options[AZ_precond] = AZ_user_precond; }
void AZ_domain_decomp(double x[], AZ_MATRIX *Amat, int options[], int proc_config[], double params[], struct context *context) /******************************************************************************* Precondition 'x' using an overlapping domain decomposition method where a solver specified by options[AZ_subdomain_solve] is used on the subdomains. Note: if a factorization needs to be computed on the first iteration, this will be done and stored for future iterations. Author: Lydie Prevost, SNL, 9222 ======= Revised by R. Tuminaro (4/97), SNL, 9222 Return code: void ============ Parameter list: =============== N_unpadded: On input, number of rows in linear system (unpadded matrix) that will be factored (after adding values for overlapping). Nb_unpadded: On input, number of block rows in linear system (unpadded) that will be factored (after adding values for overlapping). N_nz_unpadded: On input, number of nonzeros in linear system (unpadded) that will be factored (after adding values for overlapping). x: On output, x[] is preconditioned by performing the subdomain solve indicated by options[AZ_subdomain_solve]. val indx bindx rpntr: On input, arrays containing matrix nonzeros (see manual). cpntr bpntr options: Determines specific solution method and other parameters. In this routine, we are concerned with options[AZ_overlap]: == AZ_none: nonoverlapping domain decomposition == AZ_diag: use rows corresponding to external variables but only keep the diagonal for these rows. == k : Obtain rows that are a distance k away from rows owned by this processor. data_org: Contains information on matrix data distribution and communication parameters (see manual). *******************************************************************************/ { int N_unpadded, Nb_unpadded, N_nz_unpadded; double *x_pad = NULL, *x_reord = NULL, *ext_vals = NULL; int N_nz, N_nz_padded, nz_used; int mem_orig, mem_overlapped, mem_factor; int name, i, bandwidth; int *ordering = NULL; double condest; /* double start_t; */ int estimated_requirements; char str[80]; int *garbage; int N; int *padded_data_org = NULL, *bindx, *data_org; double *val; int *inv_ordering = NULL; int *map = NULL; AZ_MATRIX *A_overlapped = NULL; struct aztec_choices aztec_choices; /**************************** execution begins ******************************/ data_org = Amat->data_org; bindx = Amat->bindx; val = Amat->val; N_unpadded = data_org[AZ_N_internal] + data_org[AZ_N_border]; Nb_unpadded = data_org[AZ_N_int_blk]+data_org[AZ_N_bord_blk]; if (data_org[AZ_matrix_type] == AZ_MSR_MATRIX) N_nz_unpadded = bindx[N_unpadded]; else if (data_org[AZ_matrix_type] == AZ_VBR_MATRIX) N_nz_unpadded = (Amat->indx)[(Amat->bpntr)[Nb_unpadded]]; else { if (Amat->N_nz < 0) AZ_matfree_Nnzs(Amat); N_nz_unpadded = Amat->N_nz; } aztec_choices.options = options; aztec_choices.params = params; context->aztec_choices = &aztec_choices; context->proc_config = proc_config; name = data_org[AZ_name]; if ((options[AZ_pre_calc] >= AZ_reuse) && (context->Pmat_computed)) { N = context->N; N_nz = context->N_nz; A_overlapped = context->A_overlapped; A_overlapped->data_org = data_org; A_overlapped->matvec = Amat->matvec; } else { sprintf(str,"A_over %s",context->tag); A_overlapped = (AZ_MATRIX *) AZ_manage_memory(sizeof(AZ_MATRIX), AZ_ALLOC, name, str, &i); AZ_matrix_init(A_overlapped, 0); context->A_overlapped = A_overlapped; A_overlapped->data_org = data_org; A_overlapped->matvec = Amat->matvec; A_overlapped->matrix_type = AZ_MSR_MATRIX; AZ_init_subdomain_solver(context); AZ_compute_matrix_size(Amat, options, N_nz_unpadded, N_unpadded, &N_nz_padded, data_org[AZ_N_external], &(context->max_row), &N, &N_nz, params[AZ_ilut_fill], &(context->extra_fact_nz_per_row), Nb_unpadded,&bandwidth); estimated_requirements = N_nz; if (N_nz*2 > N_nz) N_nz = N_nz*2; /* check for overflow */ /* Add extra memory to N_nz. */ /* This extra memory is used */ /* as temporary space during */ /* overlapping calculation */ /* Readjust N_nz by allocating auxilliary arrays and allocate */ /* the MSR matrix to check that there is enough space. */ /* block off some space for map and padded_data_org in dd_overlap */ garbage = (int *) AZ_allocate((AZ_send_list + 2*(N-N_unpadded) +10)* sizeof(int)); AZ_hold_space(context, N); if (N_nz*((int) sizeof(double)) < N_nz) N_nz=N_nz/2; /* check for overflow */ if (N_nz*((int) sizeof(double)) < N_nz) N_nz=N_nz/2; /* check for overflow */ if (N_nz*((int) sizeof(double)) < N_nz) N_nz=N_nz/2; /* check for overflow */ if (N_nz*((int) sizeof(double)) < N_nz) N_nz=N_nz/2; /* check for overflow */ if (N_nz*((int) sizeof(double)) < N_nz) N_nz=N_nz/2; /* check for overflow */ N_nz = AZ_adjust_N_nz_to_fit_memory(N_nz, context->N_large_int_arrays, context->N_large_dbl_arrays); context->N_nz_factors = N_nz; if (N_nz <= N_nz_unpadded) { AZ_printf_out("Error: Not enough space for domain decomposition\n"); AZ_exit(1); } if (estimated_requirements > N_nz ) estimated_requirements = N_nz; /* allocate matrix via AZ_manage_memory() */ sprintf(str,"bindx %s",context->tag); A_overlapped->bindx =(int *) AZ_manage_memory(N_nz*sizeof(int), AZ_ALLOC, name, str, &i); sprintf(str,"val %s",context->tag); A_overlapped->val =(double *) AZ_manage_memory(N_nz*sizeof(double), AZ_ALLOC, name, str, &i); context->N_nz_allocated = N_nz; AZ_free_space_holder(context); AZ_free(garbage); /* convert to MSR if necessary */ if (data_org[AZ_matrix_type] == AZ_VBR_MATRIX) AZ_vb2msr(Nb_unpadded,val,Amat->indx,bindx,Amat->rpntr,Amat->cpntr, Amat->bpntr, A_overlapped->val, A_overlapped->bindx); else if (data_org[AZ_matrix_type] == AZ_MSR_MATRIX) { for (i = 0 ; i < N_nz_unpadded; i++ ) { A_overlapped->bindx[i] = bindx[i]; A_overlapped->val[i] = val[i]; } } else AZ_matfree_2_msr(Amat,A_overlapped->val,A_overlapped->bindx,N_nz); mem_orig = AZ_gsum_int(A_overlapped->bindx[N_unpadded],proc_config); /* start_t = AZ_second(); */ AZ_pad_matrix(context, proc_config, N_unpadded, &N, &(context->map), &(context->padded_data_org), &N_nz, estimated_requirements); /* if (proc_config[AZ_node] == 0) AZ_printf_out("matrix padding took %e seconds\n",AZ_second()-start_t); */ mem_overlapped = AZ_gsum_int(A_overlapped->bindx[N],proc_config); if (options[AZ_reorder]) { /* start_t = AZ_second(); */ AZ_find_MSR_ordering(A_overlapped->bindx, &(context->ordering),N, &(context->inv_ordering),name,context); /* if (proc_config[AZ_node] == 0) AZ_printf_out("took %e seconds to find ordering\n", AZ_second()-start_t); */ /* start_t = AZ_second(); */ AZ_mat_reorder(N,A_overlapped->bindx,A_overlapped->val, context->ordering, context->inv_ordering); /* if (proc_config[AZ_node] == 0) AZ_printf_out("took %e seconds to reorder\n", AZ_second()-start_t); */ /* NOTE: ordering is freed inside AZ_mat_reorder */ #ifdef AZ_COL_REORDER if (options[AZ_reorder]==2) { AZ_mat_colperm(N,A_overlapped->bindx,A_overlapped->val, &(context->ordering), name, context ); } #endif } /* Do a factorization if needed. */ /* start_t = AZ_second(); */ AZ_factor_subdomain(context, N, N_nz, &nz_used); if (options[AZ_output] > 0 && options[AZ_diagnostics]!=AZ_none) { AZ_printf_out("\n*********************************************************************\n"); condest = AZ_condest(N, context); AZ_printf_out("***** Condition number estimate for subdomain preconditioner on PE %d = %.4e\n", proc_config[AZ_node], condest); AZ_printf_out("*********************************************************************\n"); } /* start_t = AZ_second()-start_t; max_time = AZ_gmax_double(start_t,proc_config); min_time = AZ_gmin_double(start_t,proc_config); if (proc_config[AZ_node] == 0) AZ_printf_out("time for subdomain solvers ranges from %e to %e\n", min_time,max_time); */ if ( A_overlapped->matrix_type == AZ_MSR_MATRIX) AZ_compress_msr(&(A_overlapped->bindx), &(A_overlapped->val), context->N_nz_allocated, nz_used, name, context); context->N_nz = nz_used; context->N = N; context->N_nz_allocated = nz_used; mem_factor = AZ_gsum_int(nz_used,proc_config); if (proc_config[AZ_node] == 0) AZ_print_header(options,mem_overlapped,mem_orig,mem_factor); if (options[AZ_overlap] >= 1) { sprintf(str,"x_pad %s",context->tag); context->x_pad = (double *) AZ_manage_memory(N*sizeof(double), AZ_ALLOC, name, str, &i); sprintf(str,"ext_vals %s",context->tag); context->ext_vals = (double *) AZ_manage_memory((N-N_unpadded)* sizeof(double), AZ_ALLOC, name, str, &i); } if (options[AZ_reorder]) { sprintf(str,"x_reord %s",context->tag); context->x_reord = (double *) AZ_manage_memory(N*sizeof(double), AZ_ALLOC, name, str, &i); } } /* Solve L u = x where the solution u overwrites x */ x_reord = context->x_reord; inv_ordering = context->inv_ordering; ordering = context->ordering; x_pad = context->x_pad; ext_vals = context->ext_vals; padded_data_org = context->padded_data_org; map = context->map; if (x_pad == NULL) x_pad = x; if (options[AZ_overlap] >= 1) { for (i = 0 ; i < N_unpadded ; i++) x_pad[i] = x[i]; AZ_exchange_bdry(x_pad,padded_data_org, proc_config); for (i = 0 ; i < N-N_unpadded ; i++ ) ext_vals[map[i]-N_unpadded] = x_pad[i+N_unpadded]; for (i = 0 ; i < N-N_unpadded ; i++ ) x_pad[i + N_unpadded] = ext_vals[i]; } else if (options[AZ_overlap] == AZ_diag) AZ_exchange_bdry(x_pad,data_org, proc_config); if (x_reord == NULL) x_reord = x_pad; if (options[AZ_reorder]) { /* Apply row permutation to the right hand side */ /* ((P'A P)Pi') Pi P'x = P'rhs, b= P'rhs */ for (i = 0 ; i < N ; i++ ) x_reord[inv_ordering[i]] = x_pad[i]; } AZ_solve_subdomain(x_reord,N, context); #ifdef AZ_COL_REORDER /* Apply column permutation to the solution */ if (options[AZ_reorder]==1){ /* ((P'A P) P'sol = P'rhs sol = P( P'sol) */ for (i = 0; i < N; i++) x_pad[i] = x_reord[inv_ordering[i]]; } if (options[AZ_reorder]==2){ /* * ((P'A P)Pi') Pi P'sol = P'rhs sol = P Pi'( Pi P'sol) * Version 1: * for (i = 0; i < N; i++) pi_sol[i] = x_reord[ordering[i]]; * for (j = 0; j < N; j++) x_pad[j] = pi_sol[inv_ordering[j]]; * Version 2: */ for (i = 0; i < N; i++) x_pad[i] = x_reord[ ordering[inv_ordering[i]] ]; } #else if (options[AZ_reorder]) for (i = 0; i < N; i++) x_pad[i] = x_reord[inv_ordering[i]]; #endif AZ_combine_overlapped_values(options[AZ_type_overlap],padded_data_org, options, x_pad, map,ext_vals,name,proc_config); if (x_pad != x) for (i = 0 ; i < N_unpadded ; i++ ) x[i] = x_pad[i]; } /* subdomain driver*/
void AZ_hold_space(struct context *context, int N) { /**************************************************************************** This routine is used in conjunction with AZ_free_space_holder(). Essentially, this routine allocates memory while AZ_free_space_holder() deallocates. The whole point of these two routines is to allocated all the space needed during the factorization process EXCLUDING all arrays whose size is related to the number of nonzeros. Once this is done, we can determine how much space there is left for the large arrays required for the factorization and split the remaining space amoung these large arrays. In this way LU routines where it is difficult to know the space requirements ahead of time can try to use as large an array as possible. Note: after factorization 'realloc' is used to reduce the array sizes. Author: Ray Tuminaro, SNL, 9222 (3/98) Return code: void ============ Parameter list: =============== context On input, context->aztec_choices-> options[AZ_subdomain_solve] indicates the solver being used. On output, context->space_holder points to a block of memory which can hold all the 'nonlarge' arrays required by this solver. N On input, the size of the matrix to be allocated. *******************************************************************************/ switch(context->aztec_choices->options[AZ_subdomain_solve]) { case AZ_ilut: context->space_holder = (int *) AZ_allocate((2*N+2+context->max_row)* sizeof(double) + sizeof(int)* (3*N+8+context->max_row)); if (context->space_holder == NULL) AZ_perror("No space in ilut.\n"); /* Space for cr (N+2 doubles), unorm (N doubles), a (max_row doubles),*/ /* ind (N+3 ints), jnz (N ints), iu (N+1 ints), ja(max_row ints), */ /* + 4 ints for manage memory header */ break; #ifdef HAVE_AZLU case AZ_lu: context->space_holder = (int *) AZ_allocate((2*N+9)* sizeof(double) + (11*(N+1)+22)*sizeof(int) ); /* Space for aflag (8 doubles), ifail (10 ints), ha (11*(N+1) ints), */ /* pivot (N+1 doubles), fake_rhs (N doubles)+12 ints for manage */ /* memory header */ /* Note: Arrays of size N_nz (e.g. rnr) are not allocated by this */ /* routine. Instead the subdomain field N_int_arrays is set. */ if (context->space_holder == NULL) AZ_perror("Out of space in lu.\n"); #else AZ_printf_err("AZ_lu unavailable: configure with --enable-aztecoo-azlu to make available\n"); exit(1); #endif break; case AZ_bilu: /* Begin Aztec 2.1 mheroux mod */ case AZ_bilu_ifp: /* End Aztec 2.1 mheroux mod */ context->space_holder= (int *) AZ_allocate((N+1)*sizeof(double)); if (context->space_holder == NULL) AZ_perror("No space for bilu.\n"); /* BILU is a little funny in that the maximum amount of memory */ /* required does not occur during the factorization. Instead */ /* it occurs when converting MSR to VBR. At this point, an */ /* additional array of length N is needed. */ break; case AZ_icc: context->space_holder= (int *) AZ_allocate((2*(N+1))*sizeof(int)+ (N+1)*sizeof(double)); if (context->space_holder == NULL) AZ_perror("Out of space in ilu.\n"); break; case AZ_ilu: case AZ_rilu: context->space_holder= (int *) AZ_allocate((2*(N+1)+4)*sizeof(int)); /* Space for iu (N+1 ints), iw (N+1 ints) + 4 ints for manage_memory */ if (context->space_holder == NULL) AZ_perror("Out of space in ilu.\n"); break; default: ; } }
void AZK_create_linsys_no_copy(double *xr, double *xi, double *br, double *bi, int *options, double *params, int *proc_config, AZ_MATRIX *Amat_real, AZ_MATRIX *Amat_imag, double **x, double **b, AZ_MATRIX **Amat) { /* Transforms a complex-valued system (Ar +i*Ai) * (xr + i*xi) = (br + i*bi) where double precision arrays hold the real and imaginary parts separately. Input arguments: ================ xr,xi: On input, contains the initial guess, real part in xr and imaginary part in xi. On output contains the solution to the linear system. br,bi: Right hand side of linear system. Output arguments: ================= x: Komplex version of initial guess and solution. b: Komplex version of RHS. Amat: Komplex version of matrix stored as an AZ_MATRIX structure. */ AZ_KOMPLEX *linsys_pass_data; int N_equations, N_blk_equations, N_real, N_external; int *data_org_real, *data_org_imag; int *komplex_to_real, *komplex_to_imag; int i; if (Amat_real->has_global_indices || Amat_imag->has_global_indices) AZ_perror("AZK_create_linsys_no_copy requires local indices"); linsys_pass_data = (AZ_KOMPLEX *) AZ_allocate(sizeof(AZ_KOMPLEX)); if (linsys_pass_data == NULL) AZ_perror("AZK_create_linsys_no_copy: Out of memory."); data_org_real = Amat_real->data_org; data_org_imag = Amat_imag->data_org; N_real = data_org_real[AZ_N_internal] + data_org_real[AZ_N_border]; N_equations = 2 * N_real; N_blk_equations = N_equations; N_external = AZ_MAX(data_org_real[AZ_N_external], data_org_imag[AZ_N_external]); if (Amat_real->matrix_type == AZ_MSR_MATRIX) { Amat_real->data_org[AZ_N_int_blk] = Amat_real->data_org[AZ_N_internal]; Amat_real->data_org[AZ_N_bord_blk] = Amat_real->data_org[AZ_N_border]; N_blk_equations = N_equations; } else if (Amat_real->matrix_type == AZ_VBR_MATRIX) { N_blk_equations = data_org_real[AZ_N_int_blk] + data_org_real[AZ_N_bord_blk]; } else if (Amat_real->matrix_type == AZ_USER_MATRIX) { Amat_real->data_org[AZ_N_int_blk] = Amat_real->data_org[AZ_N_internal]; Amat_real->data_org[AZ_N_bord_blk] = Amat_real->data_org[AZ_N_border]; N_blk_equations = N_equations; } else AZ_perror("AZK_create_linsys_no_copy: Unknown matrix type."); if (Amat_imag->matrix_type == AZ_MSR_MATRIX) { Amat_imag->data_org[AZ_N_int_blk] = Amat_imag->data_org[AZ_N_internal]; Amat_imag->data_org[AZ_N_bord_blk] = Amat_imag->data_org[AZ_N_border]; } else if (Amat_imag->matrix_type == AZ_USER_MATRIX) { Amat_imag->data_org[AZ_N_int_blk] = Amat_imag->data_org[AZ_N_internal]; Amat_imag->data_org[AZ_N_bord_blk] = Amat_imag->data_org[AZ_N_border]; } (*Amat) = AZ_create_matrix(N_equations, 0, AZ_USER_MATRIX, N_blk_equations, AZ_NOT_USING_AZTEC_MATVEC); /* Merge real and imaginary parts into K matrix order */ komplex_to_real = (int *) AZ_allocate (N_real*sizeof(int)); komplex_to_imag = (int *) AZ_allocate (N_real*sizeof(int)); (*x) = (double *) AZ_allocate((N_equations+N_external)*sizeof(double)); (*b) = (double *) AZ_allocate((N_equations+N_external)*sizeof(double)); if ((*b) == NULL) AZ_perror("AZK_create_linsys_no_copy: Out of memory."); for (i=0; i <N_real; i++) { komplex_to_real[i] = 2*i; komplex_to_imag[i] = 2*i+1; (*x)[komplex_to_real[i]] = xr[i]; (*x)[komplex_to_imag[i]] = xi[i]; (*b)[komplex_to_real[i]] = br[i]; (*b)[komplex_to_imag[i]] = bi[i]; } linsys_pass_data->Amat_real = Amat_real; linsys_pass_data->Amat_imag = Amat_imag; linsys_pass_data->komplex_to_real = komplex_to_real; linsys_pass_data->komplex_to_imag = komplex_to_imag; linsys_pass_data->c11 = 1.0; linsys_pass_data->c12 = 0.0; linsys_pass_data->c21 = 0.0; linsys_pass_data->c22 = 1.0; linsys_pass_data->Form_of_Equations = AZK_Komplex_No_Copy; linsys_pass_data->From_Global_Indices = 0; (*Amat)->matvec = AZK_matvec_no_copy; (*Amat)->aux_ptr = (void *) linsys_pass_data; return; }
void AZ_solve_subdomain(double x[],int N, struct context *context) { /**************************************************************************** Given a vector 'x' representing the right hand side, solve the system using whatever subdomain solver is indicated by 'context->which' and whatever factorization information has already been computed. Author: Ray Tuminaro, SNL, 9222 (3/98) Return code: void ============ Parameter list: =============== x On input, the right hand side of the subdomain system that is to be solved. On output, the solution of the subdomain system. N On input, the size of the linear system to be solved. bindx2,val2 On input, matrix or factorization information to be used by the solver. For most schemes, this information is in MSR format. However, the lu and bilu scheme would have this information in another format. Note: additional array information can be passed through context. context On input, the various fields are set to solver specific information corresponding to algorithm parameters as well as a previously done factorization. *******************************************************************************/ double *val2; int *bindx2; int N_blk_rows; #ifdef HAVE_AZLU int ifail; #endif int *sub_options, sub_proc_config[AZ_PROC_SIZE], *hold_data_org, *new_data_org; double *sub_params, *sub_status; AZ_MATRIX *sub_matrix; AZ_PRECOND *sub_precond; struct AZ_SCALING *sub_scaling; #ifdef AZTEC_MPI MPI_AZComm *tptr; #endif double *y; char label[80]; int t1, t2, t3, i, t4, t5 = 0; /* Begin Aztec 2.1 mheroux mod */ #ifdef IFPACK int ione = 1; void *precon; #endif /* End Aztec 2.1 mheroux mod */ val2 = context->A_overlapped->val; bindx2 = context->A_overlapped->bindx; switch(context->aztec_choices->options[AZ_subdomain_solve]) { /* Begin Aztec 2.1 mheroux mod */ case AZ_bilu_ifp: #ifdef IFPACK y = (double *) malloc (N * sizeof(double)); DCOPY_F77(&N, x, &ione, y, &ione); precon = context->precon; ifp_apply(precon, N, 1, y, N, x, N); free((void *) y); #endif break; /* End Aztec 2.1 mheroux mod */ case AZ_bilu: N_blk_rows = context->N_blk_rows; AZ_lower_triang_vbr_solve(N_blk_rows, context->A_overlapped->cpntr, context->A_overlapped->bpntr, context->A_overlapped->indx, bindx2, val2, x); AZ_upper_triang_vbr_solve(N_blk_rows, context->A_overlapped->cpntr, context->A_overlapped->bpntr, context->A_overlapped->indx, bindx2, val2, x, context->ipvt, context->dblock); break; case AZ_ilut: case AZ_rilu: case AZ_ilu: AZ_lower_tsolve(x,N, val2, bindx2, context->iu, x ); AZ_upper_tsolve( x, N, val2, bindx2, context->iu); break; case AZ_icc: AZ_lower_icc(bindx2,val2,N,x); AZ_upper_icc(bindx2,val2,N,x); break; case AZ_lu: #ifdef HAVE_AZLU if (N == 0) return; else if (N== 1) { x[0] *= val2[0]; ifail = 0; } else AZ_backsolve(val2, context->pivot,x, bindx2, context->ha, context->iflag, &ifail, &(context->N_nz_factors), &N, &N); #else AZ_printf_err("AZ_lu unavailable: configure with --enable-aztecoo-azlu to make available\n"); exit(1); #endif break; default: if (context->aztec_choices->options[AZ_subdomain_solve] >= AZ_SOLVER_PARAMS) { AZ_printf_out("ERROR: Unknown subdomain solver %d\n", context->aztec_choices->options[AZ_subdomain_solve]); exit(1); } else { /* better to put most of this in the factorization */ AZ_recover_sol_params(context->aztec_choices->options[ AZ_subdomain_solve], &sub_options, &sub_params, &sub_status, &sub_matrix, &sub_precond, &sub_scaling); t1 = sub_options[AZ_recursion_level]; sub_options[AZ_recursion_level]++; t2 = sub_options[AZ_output]; if (context->proc_config[AZ_node] != 0 ) sub_options[AZ_output] = AZ_none; t3 = context->proc_config[AZ_MPI_Tag]; /* fix data_org */ hold_data_org = context->A_overlapped->data_org; new_data_org = (int *) AZ_allocate( sizeof(int) * AZ_send_list ); if (new_data_org == NULL) { AZ_printf_out("Error: Not enough space for subdomain matrix\n"); exit(1); } context->A_overlapped->data_org = new_data_org; context->A_overlapped->matvec = AZ_MSR_matvec_mult; new_data_org[AZ_matrix_type] = AZ_MSR_MATRIX; new_data_org[AZ_N_internal] = N; new_data_org[AZ_N_border ] = 0; new_data_org[AZ_N_external] = 0; new_data_org[AZ_N_int_blk ] = N; new_data_org[AZ_N_bord_blk] = 0; new_data_org[AZ_N_ext_blk ] = 0; new_data_org[AZ_N_neigh ] = 0; new_data_org[AZ_total_send] = 0; new_data_org[AZ_name ] = hold_data_org[AZ_name]; new_data_org[AZ_internal_use]= 0; new_data_org[AZ_N_rows ]= N; sub_precond->Pmat = context->A_overlapped; sub_precond->prec_function = AZ_precondition; sub_proc_config[AZ_node] = 0; sub_proc_config[AZ_N_procs] = 1; #ifdef AZTEC_MPI tptr = AZ_get_comm(context->proc_config); AZ_set_comm(sub_proc_config, *tptr); #endif sprintf(label,"y in ssolve%d", sub_options[AZ_recursion_level]); y = AZ_manage_memory((N+1)*sizeof(double), AZ_ALLOC, AZ_SYS+az_iterate_id, label, &i); for (i = 0 ; i < N ; i++ ) y[i] = x[i]; for (i = 0 ; i < N ; i++ ) x[i] = 0.0; t4 = sub_options[AZ_keep_info]; sub_options[AZ_keep_info] = 1; if (context->aztec_choices->options[AZ_pre_calc] >= AZ_reuse) { t5 = sub_options[AZ_pre_calc]; sub_options[AZ_pre_calc] = AZ_sys_reuse; } AZ_oldsolve(x, y,sub_options,sub_params, sub_status, sub_proc_config, context->A_overlapped, sub_precond, sub_scaling); sub_options[AZ_keep_info] = t4; if (context->aztec_choices->options[AZ_pre_calc] == AZ_sys_reuse) sub_options[AZ_pre_calc] = t5; sub_options[AZ_recursion_level] = t1; sub_options[AZ_output] = t2; context->A_overlapped->data_org = hold_data_org; AZ_free(new_data_org); context->proc_config[AZ_MPI_Tag] = t3; } } }
void init_matrix_vector_structures(int proc_config[], int *update_index[], int *update[], int *data_org[], int *external[], int *extern_index[], int input_option, double *val[], int *bindx[], int *indx[], int *bpntr[], int *rpntr[], int *cpntr[]) /* * Read in the points to be updated on this processor, create the global * distributed form of the application matrix, and then convert it to a * local distributed form for AZTEC kernels. Along the way, initialize the * following quantities: * update_index[], update[], data_org[], a[], bindx[], bpntr[], cpntr[], * rpntr[], indx[], external[], extern_index[]. * * Author: Ray Tuminaro, Div 1422, SNL * Date: 3/15/95 * * Parameters * * proc_config == On input, processor information: * proc_config[AZ_node] = name of this processor * proc_config[AZ_N_procs] = # of processors used * update == On output, list of pts to be updated on this node * val,bindx == On output, local distributed form of arrays * holding matrix values * external == On output, list of external vector elements * update_index == On output, ordering of update and external * extern_index == locally on this processor. For example * 'update_index[i]' gives the index location * of the block which has the global index * 'update[i]'. * data_org == On output, indicates how the data is set out on * this node. For example, data_org[] contains * information on how many unknowns are internal, * external, and border unknowns as well as which * points need to be communicated. See User's Guide * for more details. * input_option == Indicates how update[] will be initialized. * = 0, linear decomposition * = 1, points read from file 'update'. * = 2, box decomposition * See AZ_read_update() comments for more details. * * The default finite difference MSR problem corresponds to a setting up * a series of uncoupled 3D Poisson equations on a cube. * To solve other problems, the call 'add_row_3D(...)' in * 'create_msr_matrix()' can be changed to 'add_row_5pt()' or * 'add_row_9pt()'. */ { int N_update; /* Number of pts updated on this processor */ int MSRorVBR; int chunks; int blk_size, num_blk_cols,num_blk_rows,size,kk, convert_to_vbr = 0; double *val2; int *bindx2; MSRorVBR = AZ_MSR_MATRIX; if (application == 1) MSRorVBR = AZ_VBR_MATRIX; chunks = num_PDE_eqns; if (MSRorVBR == AZ_VBR_MATRIX) chunks = 1; /* initialize the list of global indices. NOTE: the list of global */ /* indices must be in ascending order so that subsequent calls to */ /* AZ_find_index() will function properly. */ AZ_read_update(&N_update, update, proc_config, N_grid_pts, chunks, input_option); /* create the matrix: each processor creates only the */ /* rows appearing in update[] ... however this row is */ /* created as if it were on a serial machine (i.e. using */ /* the global column numbers) */ if (application == 1) create_vbr_matrix(*update, val, indx, N_update, rpntr, bpntr, bindx); else { *indx = NULL; *bpntr = NULL; *rpntr = NULL; *cpntr = NULL; if (application == 0) create_msr_matrix(*update, val, bindx, N_update); if (application == 2) create_fe_matrix(*update, proc_config[AZ_node], bindx, val, N_update); if (application == 3) { AZ_read_msr_matrix(*update, val, bindx, N_update, proc_config); } } /* convert matrix to a distributed parallel matrix */ AZ_transform(proc_config, external, *bindx, *val, *update, update_index, extern_index, data_org, N_update, *indx, *bpntr, *rpntr, cpntr, MSRorVBR); if ( (convert_to_vbr == 1) && (application == 3) ) { if (proc_config[AZ_node] == 0 ) { printf("enter the block size\n"); scanf("%d",&blk_size); } AZ_broadcast((char *) &blk_size, sizeof(int), proc_config, AZ_PACK); AZ_broadcast((char *) NULL , 0 , proc_config, AZ_SEND); if ( N_update%blk_size != 0 ) { (void) fprintf(stderr," The block size must be a multiple of the number of rows per processor.\n"); exit(-1); } num_blk_rows = N_update/blk_size; num_blk_cols = ( (*data_org)[AZ_N_external] + N_update)/blk_size; *cpntr = (int *) AZ_allocate( (num_blk_cols+2)*sizeof(int)); *rpntr = (int *) AZ_allocate( (num_blk_cols+2)*sizeof(int)); *bpntr = (int *) AZ_allocate( (num_blk_cols+2)*sizeof(int)); size = 20*(num_blk_cols+2); *indx = (int *) AZ_allocate(size*sizeof(int)); bindx2 = *bindx; val2 = *val; *bindx = (int *) AZ_allocate(size*sizeof(int)); *val = (double *) AZ_allocate(size*blk_size*blk_size*sizeof(double)); for (kk = 0 ; kk < num_blk_cols ; kk++ ) (*cpntr)[kk] = blk_size; AZ_msr2vbr(*val,*indx,*rpntr,*cpntr,*bpntr,*bindx,bindx2,val2, num_blk_rows,num_blk_cols,size,size*blk_size*blk_size,blk_size); MSRorVBR = AZ_VBR_MATRIX; N_update /= blk_size; num_PDE_eqns = blk_size; for (kk = 0 ; kk < N_update ; kk++ ) (*update)[kk] = (*update)[blk_size*kk]/blk_size; for (kk = 0 ; kk < (*data_org)[AZ_N_external] ; kk++ ) (*external)[kk] = (*external)[blk_size*kk]/blk_size; (*data_org)[AZ_matrix_type] = AZ_VBR_MATRIX; (*data_org)[AZ_N_int_blk ] /= blk_size; (*data_org)[AZ_N_bord_blk] /= blk_size; (*data_org)[AZ_N_ext_blk ] /= blk_size; AZ_free(bindx2); AZ_free(val2); } } /* init_matrix_vector_structures */
/*extern void mc64ad_(int *, int *, int *, int *, int *, double*, * int *, int *, int *, int *, int *, double*, * int *, int *); */ void AZ_mat_colperm(int n, int bindx[], double val[], int **invp, int name, struct context *context) /******************************************************************************* Use the mc64ad algorithm to permute the columns of a matrix. Unresolved issues: 1. Similar Aztec modules return invp and delete perm. 2. The goal of this module is to increase the number of diagonal nonzeros. This reduces the total number of nonzeros in MSR format. Some effort is required to make this consistent with Aztec format. Author: D. Day Return code: void ============ Parameter list: =============== bindx : On input, the nonzero sparsity pattern of the matrix for which we will determine a new ordering. Note: bindx is changed in this routine invp: On output, invp[i] gives the location of row i */ { int job,nnz,nzdiag,liw,ldw,i,p,row,ki,kf,k,nod; char str[80]; int *mcontrol, *info, *rowptr; /* double work; */ double *diag; if (n==0) return; nnz = bindx[n]-1; liw = 5*n; ldw = 2*n + nnz; /* If job=1, then ldw := n */ sprintf(str,"invp %s",context->tag); *invp = (int *) AZ_manage_memory((n+1)*sizeof(int), AZ_ALLOC, name, str,&i); mcontrol = (int *) AZ_allocate(10*sizeof(int)); info = (int *) AZ_allocate(10*sizeof(int)); rowptr = (int *) AZ_allocate(liw*sizeof(int)); diag = (double *) AZ_allocate(ldw*sizeof(double)); if (diag == NULL){ printf("AZ_col_perm: Error: memory insufficient. Try job=1\n"); AZ_exit(1); } /* Echo input matrix * printf("AZ_mat_colperm: bindx[%d] = %d\n", n, bindx[n]); * for (row=0;row<n;row++){ * printf("%d %d %22.14e \n", row+1, row+1, val[row]); * ki = bindx[row]; * kf = bindx[row+1]; * for (k=ki;k<kf;k++) * printf("%d %d %22.14e \n", row+1, bindx[k]+1, val[k]); * } */ /* msr2csr: retract the diagonal and delete zeros */ for (row=0;row<n;row++) diag[row] = val[row]; for (row=0;row<=n;row++) rowptr[row] = bindx[row]; p=0; ki = rowptr[0]; for (row=0;row<n;row++){ rowptr[row] += ( row - n - 1); kf = rowptr[row+1]; val[p] = diag[row]; diag[row] = 0.0; bindx[p] = row; ++p; for (k=ki;k<kf;k++){ val[p] = val[k]; bindx[p] = bindx[k]; ++p; } ki = kf; } --rowptr[n]; p=0; ki = rowptr[0]; for (row=0;row<n;row++){ rowptr[row] = p; kf = rowptr[row+1]; for (k=ki;k<kf;k++){ if( val[k] != 0.0 ){ val[p] = val[k]; bindx[p] = bindx[k]; ++p; } } ki = kf; } rowptr[n] = p; nnz = p; /* * Convert to standard sparse matrix format with Fortran indexing * bindx(1:n+1), bindx(n+2:nnz+n+2), val(1:nnz) * bindx[n+1:rowptr[n]+n] := bindx[0:rowptr[n]-1] and then * bindx[0:n] := rowptr[0:n] * mcontrol[0:2] := -1 turns off output */ for (k=p-1;k>=0;k--) bindx[k+n+1] = bindx[k]+1; for (k=0;k<=n;k++) bindx[k] = rowptr[k]+1; for (k=0;k<=n;k++) rowptr[k] = 0; job = 4; /* job = 1 may do less violence to symmetric form */ /* for (i=0; i<4; i++) mcontrol[i] = 6; */ for (i=0; i<3; i++) mcontrol[i] = -1; for (i=3; i<10; i++) mcontrol[i] = 0; for (i=0; i<10; i++) info[i] = 0; MC64AD_F77(&job,&n,&nnz,bindx,&(bindx[n+1]),val,&nzdiag,*invp,&liw,rowptr,&ldw,diag,mcontrol,info); /* nzdiag is the number of zero diagonals in the permuted matrix */ /* +1 structurally singular matrix (iffi nzdiag < n) +2 the returned scaling factors are large and may cause overflow when used to scale the matrix (for JOB = 5 entry only.) -1 JOB < 1 or JOB > 5. Value of JOB held in INFO(2). -2 N < 1. Value of N held in INFO(2). -3 NE < 1. Value of NE held in INFO(2). -4 the defined length LIW violates the restriction on LIW. Value of LIW required given by INFO(2). -5 the defined length LDW violates the restriction on LDW. Value of LDW required given by INFO(2). -6 entries are found whose row indices are out of range. INFO(2) contains the index of a column in which such an entry is found. -7 repeated entries are found. INFO(2) contains the index of a column in which such entries are found. */ if( info[0] >= 0 ){ /* convert permutation to C indexing and invert perm */ for (i = 0;i< n;i++) (*invp)[i]--; /* 1 2 3 0 */ /* csr2msr: diag = diag(A P) */ for (i = 0;i<= n;i++) bindx[i] += n; p = bindx[n]; for (i = n+1;i<p;i++) bindx[i]--; for (i = n+1;i<p;i++) bindx[i] = (*invp)[bindx[i]]; for (row=0;row<n;row++) diag[row] = 0.; p = n+1; for (row=0;row<n;row++){ ki = bindx[row]; bindx[row] = p; kf = bindx[row+1]; for (k=ki;k<kf;k++){ if( row != bindx[k]){ bindx[p] = bindx[k]; val[p-n-1] = val[k-n-1]; ++p; } else { diag[row] = val[k-n-1]; } } } bindx[n] = p; /* val[n+1: (n+1) + nod-1] := val[0:nod-1], nod = number off-diagonals */ nod = p-(n+1); /* printf("az_colperm: number of off diagonals is %d\n",nod); */ for (i=nod ; i>0 ; i-- ) val[n+i] = val[i-1]; val[n] = 0; for (i = 0 ; i < n ; i++ ) val[i] = diag[i]; /* Sort the colmns to ascend */ /* This appears unnecessary, though one never can be certain. for (row=0;row<n;row++){ ki = bindx[row]; kf = bindx[row+1]; for (p=ki+1;k<kf;k++){ k = p; while ( (k>ki) && (bindx[k-1]>bindx[k]) ){ work = val[k]; val[k] = val[k-1]; val[k-1] = work; i = bindx[k]; bindx[k] = bindx[k-1]; bindx[k-1] = i; --k; } } } */ if( info[0] == 1 ){ printf("AZ_col_perm: Error: Internal matrix is singular\n"); } }else{ /* Ideally an error flag would be returned here */ printf("az_colperm: Error: info = %d %d\n",info[0],info[1]); AZ_exit(1); } AZ_free(mcontrol); AZ_free(info); AZ_free(diag); AZ_free(rowptr); return; }