int ML_strcmp(char *input, char *string) { /* Similar to 'C' strcmp except this one converts everything to lower case. */ int i; char *input_copy, *string_copy; input_copy = (char *) ML_allocate(sizeof(char)*(strlen(input)+1)); string_copy = (char *) ML_allocate(sizeof(char)*(strlen(string)+1)); strcpy(input_copy,input); strcpy(string_copy,string); i = 0; while ((input_copy[i] != '\0') && (input_copy[i] != '\n')) { if ((input_copy[i] >= 'A') && (input_copy[i] <= 'Z')) input_copy[i] = 'a' + input_copy[i] - 'A'; i++; } i = 0; while ((string_copy[i] != '\0') && (string_copy[i] != '\n')) { if ((string_copy[i] >= 'A') && (string_copy[i] <= 'Z')) string_copy[i] = 'a' + string_copy[i] - 'A'; i++; } i = strcmp(input_copy, string_copy); ML_free(input_copy); ML_free(string_copy); return(i); }
// ================================================ ====== ==== ==== == = // Copied from ml_agg_genP.c static void ML_Finalize_Aux(ML_Operator *A) { int i; A->getrow->func_ptr = A->aux_data->aux_func_ptr; A->aux_data->aux_func_ptr = 0; for (i = 0 ; i < A->aux_data->filter_size ; ++i) ML_free((A->aux_data->filter[i])); ML_free(A->aux_data->filter); }
int ML_Reitzinger_Check_Hierarchy(ML *ml, ML_Operator **Tmat_array, int incr_or_decr) { int i,j; int finest_level, coarsest_level; ML_Operator *Amat, *Tmat; double *randvec, *result, *result1; double dnorm; finest_level = ml->ML_finest_level; coarsest_level = ml->ML_coarsest_level; if (incr_or_decr == ML_INCREASING) { if (ml->comm->ML_mypid == 0) { printf("ML_Reitzinger_Check_Hierarchy: ML_INCREASING is not supported "); printf(" at this time. Not checking hierarchy.\n"); } return 1; } if ( ML_Get_PrintLevel() > 5 ) { printf("ML_Reitzinger_Check_Hierarchy: Checking null space\n"); } for (i=finest_level; i>coarsest_level; i--) { Amat = ml->Amat+i; Tmat = Tmat_array[i]; /* normalized random vector */ randvec = (double *) ML_allocate(Tmat->invec_leng * sizeof(double) ); ML_random_vec(randvec,Tmat->invec_leng, ml->comm); dnorm = sqrt( ML_gdot(Tmat->invec_leng, randvec, randvec, ml->comm) ); for (j=0; j<Tmat->invec_leng; j++) randvec[j] /= dnorm; result = (double *) ML_allocate(Amat->invec_leng * sizeof(double) ); result1 = (double *) ML_allocate(Amat->outvec_leng * sizeof(double) ); ML_Operator_Apply(Tmat, Tmat->invec_leng, randvec, Tmat->outvec_leng, result); ML_Operator_Apply(Amat, Amat->invec_leng, result, Amat->outvec_leng, result1); dnorm = sqrt( ML_gdot(Amat->outvec_leng, result1, result1, ml->comm) ); if ( (ML_Get_PrintLevel() > 5) && (ml->comm->ML_mypid == 0) ) { printf("Level %d: for random v, ||S*T*v|| = %15.10e\n",i,dnorm); } ML_free(randvec); ML_free(result); ML_free(result1); } if ( (ML_Get_PrintLevel() > 5) && (ml->comm->ML_mypid == 0) ) printf("\n"); return 0; }
int ML_Smoother_Ifpack(ML_Smoother *sm,int inlen,double x[],int outlen, double rhs[]) { ML_Smoother *smooth_ptr = (ML_Smoother *) sm; void *Ifpack_Handle = smooth_ptr->smoother->data; double* x2 = NULL,* rhs2 = NULL; /*int i;*/ int n, kk; int one_int = 1; double minus_one_double = -1.0; if (sm->init_guess == ML_NONZERO) { n = sm->my_level->Amat->invec_leng; assert (n == sm->my_level->Amat->outvec_leng); rhs2 = (double*) ML_allocate(sizeof(double) * (n + 1)); x2 = (double*) ML_allocate(sizeof(double) * (n + 1)); ML_Operator_Apply(sm->my_level->Amat, n, x, n, rhs2); DCOPY_F77(&n, x, &one_int, x2, &one_int); DAXPY_F77(&n, &minus_one_double, rhs, &one_int, rhs2, &one_int); ML_Ifpack_Solve(Ifpack_Handle, x2, rhs2); DAXPY_F77(&n, &minus_one_double, x2, &one_int, x, &one_int); ML_free(rhs2); ML_free(x2); } else ML_Ifpack_Solve(Ifpack_Handle, x, rhs); for (kk = 1; kk < sm->ntimes; kk++) { n = sm->my_level->Amat->invec_leng; assert (n == sm->my_level->Amat->outvec_leng); rhs2 = (double*) ML_allocate(sizeof(double) * (n + 1)); x2 = (double*) ML_allocate(sizeof(double) * (n + 1)); ML_Operator_Apply(sm->my_level->Amat, n, x, n, rhs2); DCOPY_F77(&n, x, &one_int, x2, &one_int); DAXPY_F77(&n, &minus_one_double, rhs, &one_int, rhs2, &one_int); ML_Ifpack_Solve(Ifpack_Handle, x2, rhs2); DAXPY_F77(&n, &minus_one_double, x2, &one_int, x, &one_int); ML_free(rhs2); ML_free(x2); } return 0; } /* ML_Smoother_Ifpack */
int ML_qr_fix_Destroy(void) { if (xCDeadNodDof == NULL) return(0); /* loop over all fields and release them */ if (xCDeadNodDof->xDeadNodDof) ML_free(xCDeadNodDof->xDeadNodDof); /* free the structure itself */ ML_free(xCDeadNodDof); xCDeadNodDof = NULL; return(0); }
int ML_memory_clean( char *name, int inlen ) { int i, j, clean_flag, leng; void *mem_ptr; leng = inlen; if ( inlen > 3 ) leng = 3; if ( inlen < 0 ) leng = 0; for ( i = 0; i < MAX_MALLOC_LOG; i++ ) { if (malloc_leng_log[i] != -1) { clean_flag = 0; for ( j = 0; j < leng; j++ ) { if ( malloc_name_log[i][j] != name[j] ) { clean_flag = 1; break; } } if ( clean_flag == 0 ) { mem_ptr = (void *) malloc_addr_log[i]; ML_free( mem_ptr ); malloc_leng_log[i] = -1; } } } return 0; }
// ====================================================================== MultiVector GetDiagonal(const Operator& A, const int offset) { // FIXME if (A.GetDomainSpace() != A.GetRangeSpace()) ML_THROW("Currently only square matrices are supported", -1); MultiVector D(A.GetDomainSpace()); D = 0.0; ML_Operator* matrix = A.GetML_Operator(); if (matrix->getrow == NULL) ML_THROW("getrow() not set!", -1); int row_length; int allocated = 128; int* bindx = (int *) ML_allocate(allocated*sizeof(int )); double* val = (double *) ML_allocate(allocated*sizeof(double)); for (int i = 0 ; i < matrix->getrow->Nrows; i++) { int GlobalRow = A.GetGRID(i); ML_get_matrix_row(matrix, 1, &i, &allocated, &bindx, &val, &row_length, 0); for (int j = 0; j < row_length; j++) { D(i) = 0.0; if (A.GetGCID(bindx[j]) == GlobalRow + offset) { D(i) = val[j]; break; } } } ML_free(val); ML_free(bindx); return (D); }
void sample1(struct data *Afine_data, struct data *Acoarse_data, struct data *Rmat_data, struct data *Pmat_data, double *sol, double *rhs ) { ML *my_ml; int i; int fine_grid, output_level = 10, N_grids = 2, grid0 = 0, grid1 = 1; int Nfine, Ncoarse; double *diagonal; Nfine = Rmat_data->from_size; Ncoarse = Rmat_data->to_size; diagonal = (double *) malloc(Nfine*sizeof(double)); for (i = 0; i < Nfine; i++) diagonal[i] = 2.; fine_grid = grid1; ML_Create (&my_ml, N_grids); ML_Set_OutputLevel( my_ml, output_level); ML_Init_Amatrix (my_ml, grid1, Nfine, Nfine,(void *) Afine_data); ML_Set_Amatrix_Getrow(my_ml, grid1, myAgetrow, my_comm, Nfine+1); ML_Set_Amatrix_Matvec(my_ml, grid1, mymatvec); ML_Set_Amatrix_Diag (my_ml, grid1, Nfine, diagonal); ML_Gen_Smoother_Jacobi(my_ml, grid1, ML_PRESMOOTHER, 2, ML_DEFAULT); ML_Init_Prolongator(my_ml, grid0, grid1, Ncoarse,Nfine,(void *)Pmat_data); ML_Set_Prolongator_Getrow(my_ml, grid0, myPgetrow, my_comm, Ncoarse+1); ML_Set_Prolongator_Matvec(my_ml, grid0, myinterp); ML_Init_Restrictor(my_ml, grid1, grid0, Nfine, Ncoarse,(void *)Rmat_data); ML_Set_Restrictor_Getrow(my_ml, grid1, myRgetrow, my_comm, Nfine+1); ML_Set_Restrictor_Matvec(my_ml, grid1, myrestrict); ML_Gen_AmatrixRAP(my_ml,grid1, grid0); #ifdef SUPERLU ML_Gen_CoarseSolverSuperLU(my_ml, grid0); #else ML_Gen_Smoother_Jacobi(my_ml, grid0, ML_PRESMOOTHER, 100, ML_DEFAULT); #endif /* ML_Gen_Smoother_Jacobi(my_ml, grid0, ML_PRESMOOTHER, 200, ML_DEFAULT); */ /* ML_Gen_Smoother_GaussSeidel(my_ml, grid0, ML_PRESMOOTHER, 200, 1.); */ ML_Gen_Solver (my_ml, 0, fine_grid, grid0); ML_Iterate(my_ml, sol, rhs); ML_Destroy(&my_ml); ML_free(diagonal); }
void sample3(struct data *Afine_data, struct data *Acoarse_data, struct data *Rmat_data, struct data *Pmat_data, double *sol, double *rhs ) { ML *my_ml; double *diagonal; int i, fine_grid, output_level = 10, N_grids = 2, grid0 = 1, grid1 = 0; int Nfine, Ncoarse; Nfine = Rmat_data->from_size; Ncoarse = Rmat_data->to_size; diagonal = (double *) malloc(Nfine*sizeof(double)); for (i = 0; i < Nfine; i++) diagonal[i] = 2.; fine_grid = grid1; ML_Create (&my_ml, N_grids); ML_Set_OutputLevel(my_ml, output_level); ML_Init_Amatrix (my_ml, grid1, Nfine, Nfine, (void *) Afine_data); ML_Set_Amatrix_Matvec(my_ml, grid1, mymatvec ); ML_Set_Amatrix_Diag (my_ml, grid1, Nfine, diagonal); ML_Gen_Smoother_Jacobi(my_ml, grid1, ML_PRESMOOTHER, 2, ML_DEFAULT); ML_Init_Amatrix (my_ml, grid0, Ncoarse, Ncoarse, (void *) Acoarse_data); ML_Set_Amatrix_Matvec(my_ml, grid0, mymatvec); ML_Set_Amatrix_Diag (my_ml, grid0, Ncoarse, diagonal); ML_Gen_Smoother_Jacobi(my_ml, grid0, ML_PRESMOOTHER, 200, ML_DEFAULT); ML_Init_Prolongator(my_ml, grid0, grid1, Ncoarse, Nfine, (void*)Pmat_data); ML_Set_Prolongator_Matvec(my_ml, grid0, myinterp); ML_Init_Restrictor(my_ml, grid1, grid0, Nfine, Ncoarse,(void *)Rmat_data); ML_Set_Restrictor_Matvec(my_ml, grid1, myrestrict); ML_Gen_Solver (my_ml, 0, fine_grid, grid0); ML_free(diagonal); ML_Iterate(my_ml, sol, rhs); ML_Destroy(&my_ml); }
void ML_rap_check(ML *ml, ML_Operator *RAP, ML_Operator *R, ML_Operator *A, ML_Operator *P, int iNvec, int oNvec) { int i,j; double *vec1, *vec2, *vec3, *vec4, *vec5; double norm1, norm2; #ifdef DEBUG printf("ML_rap_check begins ...\n"); #endif if (RAP->getrow->ML_id != ML_ID_MATRIX) { if (ml->comm->ML_mypid == 0) printf("ML_rap_check: RAP is the wrong object (=%d). \n", RAP->getrow->ML_id); exit(1); } if (R->getrow->ML_id != ML_ID_MATRIX) { if (ml->comm->ML_mypid == 0) printf("ML_rap_check: R is the wrong object (=%d). \n", RAP->getrow->ML_id); exit(1); } if (P->getrow->ML_id != ML_ID_MATRIX) { if (ml->comm->ML_mypid == 0) printf("ML_rap_check: P is the wrong object (=%d). \n", RAP->getrow->ML_id); exit(1); } if (A->getrow->ML_id != ML_ID_MATRIX) { if (ml->comm->ML_mypid == 0) printf("ML_rap_check: A is the wrong object (=%d). \n", RAP->getrow->ML_id); exit(1); } /* j is the number of external variables */ j = 0; for (i = 0; i < RAP->getrow->pre_comm->N_neighbors; i++) j += RAP->getrow->pre_comm->neighbors[i].N_rcv; vec1 = (double *) ML_allocate((iNvec+ 1 + j)*sizeof(double)); vec2 = (double *) ML_allocate((P->getrow->Nrows+1)*sizeof(double)); vec3 = (double *) ML_allocate((A->getrow->Nrows+1)*sizeof(double)); vec4 = (double *) ML_allocate((oNvec + 1)*sizeof(double)); vec5 = (double *) ML_allocate((oNvec + 1)*sizeof(double)); for (i = 0; i < iNvec; i++) vec1[i] = (double) (ml->comm->ML_mypid*2301 + i*7 + 1); j = P->getrow->Nrows; ML_getrow_matvec(P, vec1, iNvec, vec2,&j); i = A->getrow->Nrows; ML_getrow_matvec(A, vec2, j, vec3,&i); ML_getrow_matvec(R, vec3, i, vec4,&oNvec); /* j is the number of variables sent in communication */ j = 0; for (i = 0; i < RAP->getrow->pre_comm->N_neighbors; i++) j += RAP->getrow->pre_comm->neighbors[i].N_send; ML_restricted_MSR_mult( RAP, oNvec, vec1, vec5, j); norm1 = sqrt(ML_gdot(oNvec, vec5, vec5, ml->comm)); for (i = 0; i < oNvec; i++) vec5[i] -= vec4[i]; norm2 = sqrt(ML_gdot(oNvec, vec5, vec5, ml->comm)); if (norm2 > norm1*1e-10) { norm2 = sqrt(ML_gdot(oNvec, vec4, vec4, ml->comm)); if (ml->comm->ML_mypid == 0) { printf("***************************************\n"); printf("RAP seems inaccurate:\n"); printf(" || RAP v ||_2 = %e\n\n", norm1); printf(" || R (A (P v)) ||_2 = %e\n",norm2); printf("***************************************\n"); } } ML_free(vec5); ML_free(vec4); ML_free(vec3); ML_free(vec2); ML_free(vec1); #ifdef DEBUG printf("ML_rap_check ends ...\n"); #endif }
void ML_rap(ML_Operator *Rmat, ML_Operator *Amat, ML_Operator *Pmat, ML_Operator *Result, int matrix_type) { int max_per_proc, i, j, N_input_vector; ML_Operator *APmat, *RAPmat, *Pcomm, *RAPcomm, *APcomm, *AP2comm, *tptr; ML_CommInfoOP *getrow_comm; double *scales = NULL; # ifdef ML_TIMING double tpre,tmult,tpost,ttotal; # endif /* Check that N_input_vector is reasonable */ # ifdef ML_TIMING tpre = GetClock(); ttotal = GetClock(); # endif N_input_vector = Pmat->invec_leng; getrow_comm = Pmat->getrow->pre_comm; if ( getrow_comm != NULL) { for (i = 0; i < getrow_comm->N_neighbors; i++) { for (j = 0; j < getrow_comm->neighbors[i].N_send; j++) { if (getrow_comm->neighbors[i].send_list[j] >= N_input_vector) { printf("(%d) Error: N_input_vector (%d) argument to rap() is not \n", Amat->comm->ML_mypid,N_input_vector); printf("(%d) Error: larger than %dth element (%d) sent to node %d\n", Amat->comm->ML_mypid,j+1, getrow_comm->neighbors[i].send_list[j], getrow_comm->neighbors[i].ML_id); printf("(%d) Error: Amat(%d,%d) Rmat(%d,%d) Pmat(%d,%d)\n", Amat->comm->ML_mypid, Amat->outvec_leng,Amat->invec_leng, Rmat->outvec_leng,Rmat->invec_leng, Pmat->outvec_leng,Pmat->invec_leng); fflush(stdout); exit(1); } } } } ML_create_unique_col_id(N_input_vector, &(Pmat->getrow->loc_glob_map), getrow_comm, &max_per_proc, Pmat->comm); Pmat->getrow->use_loc_glob_map = ML_YES; if (Amat->getrow->pre_comm != NULL) ML_exchange_rows( Pmat, &Pcomm, Amat->getrow->pre_comm); else Pcomm = Pmat; #ifdef DEBUG if ( Pmat->comm->ML_mypid == 0 ) printf("ML_rap : A * P begins...\n"); #endif # ifdef ML_TIMING tpre = GetClock() - tpre; tmult = GetClock(); # endif ML_matmat_mult(Amat, Pcomm , &APmat); # ifdef ML_TIMING tmult = GetClock() - tmult; tpost = GetClock(); # endif #ifdef DEBUG if ( Pmat->comm->ML_mypid == 0 ) printf("ML_rap : A * P ends.\n"); #endif ML_free(Pmat->getrow->loc_glob_map); Pmat->getrow->loc_glob_map = NULL; Pmat->getrow->use_loc_glob_map = ML_NO; if (Amat->getrow->pre_comm != NULL) { tptr = Pcomm; while ( (tptr!= NULL) && (tptr->sub_matrix != Pmat)) tptr = tptr->sub_matrix; if (tptr != NULL) tptr->sub_matrix = NULL; ML_RECUR_CSR_MSRdata_Destroy(Pcomm); ML_Operator_Destroy(&Pcomm); } if (Amat->getrow->post_comm != NULL) { ML_exchange_rows(APmat, &APcomm, Amat->getrow->post_comm); } else APcomm = APmat; /* Take into account any scaling in Amat */ if (Rmat->from != NULL) ML_DVector_GetDataPtr(Rmat->from->Amat_Normalization,&scales); if (scales != NULL) ML_Scale_CSR(APcomm, scales, 0); if (Rmat->getrow->pre_comm != NULL) ML_exchange_rows( APcomm, &AP2comm, Rmat->getrow->pre_comm); else AP2comm = APcomm; # ifdef ML_TIMING tpost = GetClock() - tpost; if ( Pmat->comm->ML_mypid == 0 && ML_Get_PrintLevel() > 5) { int level=-1; if (Amat->from != NULL) level = Amat->from->levelnum-1; printf("Timing summary (in seconds) for product RAP on level %d\n", level); printf(" (level %d) RAP right: pre-multiply communication time = %3.2e\n", level, tpre); printf(" (level %d) RAP right: multiply time = %3.2e\n", level, tmult); printf(" (level %d) RAP right: post-multiply communication time = %3.2e\n", level, tpost); } # endif #ifdef DEBUG if ( Pmat->comm->ML_mypid == 0 ) printf("ML_rap : R * AP begins...\n"); #endif # ifdef ML_TIMING tmult = GetClock(); # endif ML_matmat_mult(Rmat,AP2comm, &RAPmat); #ifdef DEBUG if ( Pmat->comm->ML_mypid == 0 ) printf("ML_rap : R * AP ends.\n"); #endif ML_RECUR_CSR_MSRdata_Destroy(AP2comm); ML_Operator_Destroy(&AP2comm); # ifdef ML_TIMING tmult = GetClock()-tmult; tpost = GetClock(); # endif if (Rmat->getrow->post_comm != NULL) ML_exchange_rows( RAPmat, &RAPcomm, Rmat->getrow->post_comm); else RAPcomm = RAPmat; scales = NULL; if (Rmat->to != NULL) ML_DVector_GetDataPtr(Rmat->to->Amat_Normalization,&scales); if (scales != NULL) ML_Scale_CSR(RAPcomm, scales, 1); RAPcomm->num_PDEs = Amat->num_PDEs; RAPcomm->num_rigid = Amat->num_rigid; if (matrix_type == ML_MSR_MATRIX) ML_back_to_local(RAPcomm, Result, max_per_proc); else if (matrix_type == ML_CSR_MATRIX) ML_back_to_csrlocal(RAPcomm, Result, max_per_proc); else if (matrix_type == ML_EpetraCRS_MATRIX) #ifdef ML_WITH_EPETRA ML_back_to_epetraCrs(RAPcomm, Result, Rmat, Pmat); #else pr_error("ML_RAP: ML_EpetraCRS_MATRIX requires epetra to be compiled in.\n"); #endif else pr_error("ML_RAP: Unknown matrix type\n");
ML_Operator *user_T_build(struct user_partition *Edge_Partition, struct user_partition *Node_Partition, ML_Operator *Kn_mat) { int nx, i, ii, jj, horv, Ncols, Nexterns; int *Tmat_bindx; double *Tmat_val; ML_Operator *Tmat; struct ML_CSR_MSRdata *csr_data; ML_Comm *comm; struct aztec_context *aztec_context; int global_id; int Nlocal_nodes, Nlocal_edges; int nz_ptr; Nlocal_nodes = Node_Partition->Nlocal; Nlocal_edges = Edge_Partition->Nlocal; nx = (int) sqrt( ((double) Node_Partition->Nglobal) + .00001); #ifdef periodic nx = (int) sqrt( ((double) Node_Partition->Nglobal - 2) + .00001); #endif Tmat_bindx = (int *) malloc((6*Nlocal_edges+5)*sizeof(int)); Tmat_val = (double *) malloc((6*Nlocal_edges+5)*sizeof(double)); Tmat_bindx[0] = Nlocal_edges + 1; for (i = 0; i < Nlocal_edges; i++) { global_id = (Edge_Partition->my_global_ids)[i]; Tmat_bindx[i+1] = Tmat_bindx[i] + 2; #ifdef periodic Tmat_bindx[i+1] += 1; #endif Tmat_val[i] = 0.0; inv2dindex(global_id, &ii, &jj, nx, &horv); nz_ptr = Tmat_bindx[i]; if (horv == HORIZONTAL) { Tmat_bindx[nz_ptr] = southwest2d(ii,jj,nx); Tmat_val[nz_ptr++] = -1.; Tmat_bindx[nz_ptr] = southeast2d(ii,jj,nx); Tmat_val[nz_ptr++] = 1.; #ifdef periodic Tmat_bindx[nz_ptr] = Nlocal_nodes-2; Tmat_val[nz_ptr++] = 1.; #endif } else { Tmat_bindx[nz_ptr] = northwest2d(ii,jj,nx); Tmat_val[nz_ptr++] = -1.; Tmat_bindx[nz_ptr] = southwest2d(ii,jj,nx); Tmat_val[nz_ptr++] = 1.; #ifdef periodic Tmat_bindx[nz_ptr] = Nlocal_nodes - 1; Tmat_val[nz_ptr++] = 1.; #endif } } /* Convert the MSR matrix to a CSR matrix. Then use a modified Aztec */ /* routine to convert the global CSR matrix to a local ML matrix */ /* Since this routine does not compute the communication structure, */ /* we assume that it is identical to Kn's and just clone it. */ csr_data = (struct ML_CSR_MSRdata *) ML_allocate(sizeof(struct ML_CSR_MSRdata)); csr_data->columns = Tmat_bindx; csr_data->values = Tmat_val; ML_MSR2CSR(csr_data, Nlocal_edges, &Ncols); aztec_context = (struct aztec_context *) Kn_mat->data; Nexterns = (aztec_context->Amat->data_org)[AZ_N_external]; Nexterns = 0; ML_Comm_Create( &comm); AZ_Tmat_transform2ml(Nexterns, Node_Partition->needed_external_ids, reordered_node_externs, Tmat_bindx, Tmat_val, csr_data->rowptr, Nlocal_nodes, Node_Partition->my_global_ids, comm, Nlocal_edges, &Tmat); ML_free(csr_data); Tmat->data_destroy = ML_CSR_MSRdata_Destroy; ML_CommInfoOP_Clone(&(Tmat->getrow->pre_comm), Kn_mat->getrow->pre_comm); return(Tmat); }
void ML_Amesos_Destroy(void *data) { #ifdef TFLOP if( Level__ != -1 ) { printf("Amesos (level %d) : Time for solve = %f (s)\n",Level__,TimeForSolve__); if( NumSolves__ ) printf("Amesos (level %d) : avg time for solve = %f (s) ( # solve = %d)\n",Level__,TimeForSolve__/NumSolves__,NumSolves__); else printf("Amesos (level %d) : no solve\n",Level__); } #else if( false && Level__ != -1 ) { // MS // I don't like this output any more std::cout << std::endl; std::cout << "Amesos (level " << Level__ << ") : Time for solve = " << TimeForSolve__ << " (s)" << std::endl; if( NumSolves__ ) std::cout << "Amesos (level " << Level__ << ") : avg time for solve = " << TimeForSolve__/NumSolves__ << " (s) ( # solves = " << NumSolves__ << ")" << std::endl; else std::cout << "Amesos (level " << Level__ << ") : no solve" << std::endl; #ifdef ML_AMESOS_DEBUG std::cout << "Amesos (level " << Level__ << ") : max (over solves) ||Ax - b|| = " << setiosflags(ios::scientific) << MaxError__ << std::endl; #endif std::cout << std::endl; } #endif Amesos_Handle_Type *Amesos_Handle = (Amesos_Handle_Type*) data; if (Amesos_Handle->A_Base == 0) { ML_free(Amesos_Handle); return; } Amesos_BaseSolver *A_Base = (Amesos_BaseSolver *) Amesos_Handle->A_Base; const Epetra_LinearProblem *Amesos_LinearProblem; Amesos_LinearProblem = A_Base->GetProblem(); # ifdef ML_MPI const Epetra_MpiComm *comm = dynamic_cast<const Epetra_MpiComm*>(&(Amesos_LinearProblem->GetOperator()->Comm())); if (comm == 0) { printf("ML_Amesos_Destroy: error getting MPI_Comm object\n"); exit(EXIT_FAILURE); } MPI_Comm subcomm = comm->GetMpiComm(); # endif delete A_Base ; delete Amesos_LinearProblem->GetOperator(); delete Amesos_LinearProblem ; # ifdef ML_MPI if (Amesos_Handle->freeMpiComm == 1) MPI_Comm_free(&subcomm); # endif ML_free(Amesos_Handle); } /*ML_Amesos_Destroy()*/
void ML_interp_check(ML *ml, int coarse_level, int fine_level) { int ii, jj, ncoarse, nfine; double *c_data, *f_data, coords[3], dtemp, d2, dlargest; ML_GridFunc *coarse_funs, *fine_funs; void *coarse_data, *fine_data; int nfine_eqn, ncoarse_eqn, stride = 1; /* check an interpolated linear function */ coarse_data = ml->SingleLevel[coarse_level].Grid->Grid; fine_data = ml->SingleLevel[ fine_level].Grid->Grid; coarse_funs = ml->SingleLevel[coarse_level].Grid->gridfcn; fine_funs = ml->SingleLevel[ fine_level].Grid->gridfcn; if ( (coarse_data==NULL)||(fine_data==NULL)) { printf("ML_interp_check: grid data not found?\n"); exit(1); } if ( (coarse_funs==NULL)||(fine_funs==NULL)) { printf("ML_interp_check: grid functions not found?\n"); exit(1); } if ( (coarse_funs->USR_grid_get_nvertices == 0) || ( fine_funs->USR_grid_get_nvertices == 0)) { printf("ML_interp_check: USR_grid_get_nvertices not found?\n"); exit(1); } ncoarse = coarse_funs->USR_grid_get_nvertices(coarse_data); nfine = fine_funs->USR_grid_get_nvertices( fine_data); nfine_eqn = ml->SingleLevel[coarse_level].Pmat->outvec_leng; ncoarse_eqn = ml->SingleLevel[coarse_level].Pmat->invec_leng; c_data = (double *) ML_allocate(ncoarse_eqn*sizeof(double)); f_data = (double *) ML_allocate(nfine_eqn*sizeof(double)); for (ii = 0; ii < ncoarse_eqn; ii++) c_data[ii] = 0.; for (ii = 0; ii < nfine_eqn; ii++) f_data[ii] = 0.; /* ASSUMING that for each grid point on this processor there are a */ /* set of equations and that all points at a grid point are numbered */ /* consecutively !!!!!!!!!!!!!! */ stride = nfine_eqn/nfine; for (ii = 0 ; ii < ncoarse ; ii++) { coarse_funs->USR_grid_get_vertex_coordinate(coarse_data,ii,coords); for (jj = 0; jj < stride; jj++) { c_data[ii*stride + jj] = coords[0] + 3.*coords[1] + .5; } } ML_Operator_Apply(ml->SingleLevel[coarse_level].Pmat,ncoarse_eqn,c_data, nfine_eqn,f_data); dlargest = 0.0; for (ii = 0 ; ii < nfine; ii++) { fine_funs->USR_grid_get_vertex_coordinate(fine_data , ii, coords); dtemp = coords[0] + 3.*coords[1] + .5; d2 = ML_dabs(dtemp - f_data[ii*stride])/(ML_dabs(dtemp)+1.e-9); /* Ray debugging if ( d2 > 1.e-8) printf("%d: f_data[%d] = %e %e | %e %e\n",ml->comm->ML_mypid, ii,f_data[ii*stride],dtemp,coords[0],coords[1]); */ if ( d2 > dlargest) { dlargest = d2; } } ML_free(f_data); ML_free(c_data); }
void ML_get_row_CSR_norow_map(ML_Operator *input_matrix, int N_requested_rows, int requested_rows[], int *allocated_space, int **columns, double **values, int row_lengths[], int index) { int i, *mapper, *t1, row; ML_Operator *next; double *t2; struct ML_CSR_MSRdata *matrix; int *rowptr, *bindx, *col_ptr, itemp, j; double *val, *val_ptr; #ifdef DEBUG2 if (N_requested_rows != 1) { printf("ML_get_matrix_row is currently implemented for only 1 row"); printf(" at a time.\n"); exit(1); } #endif row = requested_rows[0]; #ifdef DEBUG2 if ( (row >= input_matrix->getrow->Nrows) || (row < 0) ) { row_lengths[0] = 0; return; } #endif next = input_matrix->sub_matrix; while ( (next != NULL) && (row < next->getrow->Nrows) ) { input_matrix = next; next = next->sub_matrix; } if (next != NULL) row -= next->getrow->Nrows; matrix = (struct ML_CSR_MSRdata *) input_matrix->data; rowptr = matrix->rowptr; itemp = rowptr[row]; bindx = &(matrix->columns[itemp]); val = &(matrix->values[itemp]); *row_lengths = rowptr[row+1] - itemp; if (*row_lengths+index > *allocated_space) { *allocated_space = 2*(*allocated_space) + 1; if (*row_lengths+index > *allocated_space) *allocated_space = *row_lengths + 5 + index; t1 = (int *) ML_allocate(*allocated_space*sizeof(int )); t2 = (double *) ML_allocate(*allocated_space*sizeof(double)); if (t2 == NULL) { printf("Not enough space to get a matrix row. A row length of \n"); printf("%d was not sufficient\n",(*allocated_space-1)/2); fflush(stdout); ML_avoid_unused_param( (void *) &N_requested_rows); exit(1); } for (i = 0; i < index; i++) t1[i] = (*columns)[i]; for (i = 0; i < index; i++) t2[i] = (*values)[i]; ML_free(*columns); ML_free(*values); *columns = t1; *values = t2; } col_ptr = &((*columns)[index]); val_ptr = &((*values)[index]); for (j = 0 ; j < *row_lengths; j++) { *col_ptr++ = *bindx++; } for (j = 0 ; j < *row_lengths; j++) { *val_ptr++ = *val++; } if ( (input_matrix->getrow->use_loc_glob_map == ML_YES)) { mapper = input_matrix->getrow->loc_glob_map; for (i = 0; i < row_lengths[0]; i++) (*columns)[i+index] = mapper[(*columns)[index+i]]; } }
/*----------------------------------------------------------------------* | (private) m.gee 04/05| | set the smoother on this nonlinear level | *----------------------------------------------------------------------*/ bool ML_NOX::ML_Nox_NonlinearLevel::Set_Smoother(ML* ml, ML_Aggregate* ag, int level, int nlevel, ML* thislevel_ml, ML_Aggregate* thislevel_ag, string smoothertype, int nsmooth) { if (smoothertype == "SGS") ML_Gen_Smoother_SymGaussSeidel(thislevel_ml,0,ML_BOTH,nsmooth,1.0); else if (smoothertype == "Jacobi") ML_Gen_Smoother_Jacobi(thislevel_ml,0,ML_BOTH,nsmooth,0.25); else if (smoothertype == "AmesosKLU") ML_Gen_Smoother_Amesos(thislevel_ml,0,ML_AMESOS_KLU,-1,0.0); else if ( (smoothertype == "MLS") || (smoothertype == "Cheby") ) ML_Gen_Smoother_Cheby(thislevel_ml,0,ML_BOTH,30.,nsmooth); else if (smoothertype == "BSGS") { int nblocks = 0; int* blocks = NULL; int* blockpde = NULL; bool needfree = false; // try to get nodal blocks from the VBMETIS aggregation scheme ML_Aggregate_Get_Vblocks_CoarsenScheme_VBMETIS(ag,level,nlevel, &nblocks,&blocks,&blockpde); if (nblocks && blocks) needfree=true; else ML_Gen_Blocks_Aggregates(ag,level,&nblocks,&blocks); ML_Gen_Smoother_VBlockSymGaussSeidel(thislevel_ml,0,ML_BOTH,nsmooth,1., nblocks,blocks); if (needfree) { ML_free(blocks); ML_free(blockpde); } } else if (smoothertype == "Bcheby") { int nblocks = 0; int* blocks = NULL; int* blockpde = NULL; bool needfree = false; // try to get nodal blocks from the VBMETIS aggregation scheme ML_Aggregate_Get_Vblocks_CoarsenScheme_VBMETIS(ag,level,nlevel, &nblocks,&blocks,&blockpde); if (nblocks && blocks) needfree=true; else ML_Gen_Blocks_Aggregates(ag,level,&nblocks,&blocks); ML_Gen_Smoother_BlockDiagScaledCheby(thislevel_ml,0,ML_BOTH,30.,nsmooth, nblocks,blocks); if (needfree) { ML_free(blocks); ML_free(blockpde); } } else { cout << "**ERR**: ML_NOX::ML_Nox_NonlinearLevel::Setsmoother:\n" << "**ERR**: unknown type of smoother: " << smoothertype << "\n" << "**ERR**: file/line: " << __FILE__ << "/" << __LINE__ << "\n"; throw -1; } return true; }
int ML_memory_free(void ** var_ptr) { int ndouble=sizeof(double), index, index2, *int_ptr; char *char_ptr; /* ------------------------------------------------------------------ */ /* Extract head and tail information and check to see if they match. */ /* If not, flag error. */ /* ------------------------------------------------------------------ */ char_ptr = (char *) (*var_ptr); if (char_ptr != NULL) { int_ptr = (int *) ((ml_size_t) char_ptr - ndouble); index = (*int_ptr) - 1; if ( index >= 0 ) { if (index > MAX_MALLOC_LOG) { if ( global_comm != NULL ) printf("%d : ML_memory_free error : header invalid(%d).\n", global_comm->ML_mypid, index); else printf("ML_memory_free error : header invalid(%d).\n",index); exit(-1); } int_ptr = (int *) ((ml_size_t) char_ptr + malloc_leng_log[index] - 2 * ndouble); index2 = (*int_ptr); if (index != index2-1) { if ( global_comm == NULL ) printf("ML_memory_free warning : header/tail mismatch - %d\n", index); else printf("%d : ML_memory_free warning : header/tail mismatch - %d\n", global_comm->ML_mypid, index); printf(" (1) : header,tail indices = %d %d \n",index,index2); printf(" (2) : %.3s length = %ld \n", malloc_name_log[index], malloc_leng_log[index]); } /* ########## This check may be messed up by pointer exchanges if ( ((ml_size_t) var_ptr) != malloc_addr_log[index]) { printf("ML_memory_free warning : \n"); printf(" %.3s - header and log mismatch.\n", malloc_name_log[index]); printf("MEM LOG %d : %d \n", index, (int) malloc_addr_log[index]); } ############## */ malloc_leng_log[index] = -1; } /* else printf("ML_memory_free : variable not found.\n"); */ int_ptr = (int *) ((ml_size_t) char_ptr - ndouble); ML_free(int_ptr); } (*var_ptr) = NULL; return 0; }
static PetscErrorCode MatWrapML_MPIAIJ(ML_Operator *mlmat,MatReuse reuse,Mat *newmat) { struct ML_CSR_MSRdata *matdata = (struct ML_CSR_MSRdata *)mlmat->data; PetscInt *ml_cols=matdata->columns,*aj; PetscScalar *ml_vals=matdata->values,*aa; PetscErrorCode ierr; PetscInt i,j,k,*gordering; PetscInt m=mlmat->outvec_leng,n,nz_max,row; Mat A; PetscFunctionBegin; if (!mlmat->getrow) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_NULL,"mlmat->getrow = NULL"); n = mlmat->invec_leng; if (m != n) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"m %d must equal to n %d",m,n); if (reuse) { A = *newmat; for (nz_max=0,i=0; i<m; i++) nz_max = PetscMax(nz_max,ml_cols[i+1] - ml_cols[i] + 1); } else { PetscInt *nnzA,*nnzB,*nnz; ierr = MatCreate(mlmat->comm->USR_comm,&A);CHKERRQ(ierr); ierr = MatSetSizes(A,m,n,PETSC_DECIDE,PETSC_DECIDE);CHKERRQ(ierr); ierr = MatSetType(A,MATMPIAIJ);CHKERRQ(ierr); ierr = PetscMalloc3(m,PetscInt,&nnzA,m,PetscInt,&nnzB,m,PetscInt,&nnz);CHKERRQ(ierr); nz_max = 0; for (i=0; i<m; i++){ nnz[i] = ml_cols[i+1] - ml_cols[i] + 1; if (nz_max < nnz[i]) nz_max = nnz[i]; nnzA[i] = 1; /* diag */ for (j=ml_cols[i]; j<ml_cols[i+1]; j++){ if (ml_cols[j] < m) nnzA[i]++; } nnzB[i] = nnz[i] - nnzA[i]; } ierr = MatMPIAIJSetPreallocation(A,0,nnzA,0,nnzB);CHKERRQ(ierr); ierr = PetscFree3(nnzA,nnzB,nnz); } /* insert mat values -- remap row and column indices */ nz_max++; ierr = PetscMalloc2(nz_max,PetscScalar,&aa,nz_max,PetscInt,&aj);CHKERRQ(ierr); /* create global row numbering for a ML_Operator */ ML_build_global_numbering(mlmat,&gordering,"rows"); for (i=0; i<m; i++) { PetscInt ncols; row = gordering[i]; k = 0; /* diagonal entry */ aj[k] = row; aa[k++] = ml_vals[i]; /* off diagonal entries */ for (j=ml_cols[i]; j<ml_cols[i+1]; j++){ aj[k] = gordering[ml_cols[j]]; aa[k++] = ml_vals[j]; } ncols = ml_cols[i+1] - ml_cols[i] + 1; ierr = MatSetValues(A,1,&row,ncols,aj,aa,INSERT_VALUES);CHKERRQ(ierr); } ML_free(gordering); ierr = MatAssemblyBegin(A,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); ierr = MatAssemblyEnd(A,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); *newmat = A; ierr = PetscFree2(aa,aj);CHKERRQ(ierr); PetscFunctionReturn(0); }
int ML_Aggregate_CoarsenUser(ML_Aggregate *ml_ag, ML_Operator *Amatrix, ML_Operator **Pmatrix, ML_Comm *comm) { unsigned int nbytes, length; int i, j, k, Nrows, exp_Nrows; int diff_level; int aggr_count, index, mypid, num_PDE_eqns; int *aggr_index = NULL, nullspace_dim; int Ncoarse, count; int *new_ia = NULL, *new_ja = NULL, new_Nrows; int exp_Ncoarse; int *aggr_cnt_array = NULL; int level, index3, max_agg_size; int **rows_in_aggs = NULL, lwork, info; double *new_val = NULL, epsilon; double *nullspace_vect = NULL, *qr_tmp = NULL; double *tmp_vect = NULL, *work = NULL, *new_null = NULL; ML_SuperNode *aggr_head = NULL, *aggr_curr, *supernode; struct ML_CSR_MSRdata *csr_data; int total_nz = 0; char str[80]; int * graph_decomposition = NULL; ML_Aggregate_Viz_Stats * aggr_viz_and_stats; ML_Aggregate_Viz_Stats * grid_info; int Nprocs; char * unamalg_bdry = NULL; char* label; int N_dimensions; double* x_coord = NULL; double* y_coord = NULL; double* z_coord = NULL; /* ------------------- execution begins --------------------------------- */ label = ML_GetUserLabel(); sprintf(str, "%s (level %d) :", label, ml_ag->cur_level); /* ============================================================= */ /* get the machine information and matrix references */ /* ============================================================= */ mypid = comm->ML_mypid; Nprocs = comm->ML_nprocs; epsilon = ml_ag->threshold; num_PDE_eqns = ml_ag->num_PDE_eqns; nullspace_dim = ml_ag->nullspace_dim; nullspace_vect = ml_ag->nullspace_vect; Nrows = Amatrix->outvec_leng; if (mypid == 0 && 5 < ML_Get_PrintLevel()) { printf("%s num PDE eqns = %d\n", str, num_PDE_eqns); } /* ============================================================= */ /* check the system size versus null dimension size */ /* ============================================================= */ if ( Nrows % num_PDE_eqns != 0 ) { printf("ML_Aggregate_CoarsenUser ERROR : Nrows must be multiples"); printf(" of num_PDE_eqns.\n"); exit(EXIT_FAILURE); } diff_level = ml_ag->max_levels - ml_ag->cur_level - 1; if ( diff_level > 0 ) num_PDE_eqns = nullspace_dim; /* ## 12/20/99 */ /* ============================================================= */ /* set up the threshold for weight-based coarsening */ /* ============================================================= */ diff_level = ml_ag->begin_level - ml_ag->cur_level; if (diff_level == 0) ml_ag->curr_threshold = ml_ag->threshold; epsilon = ml_ag->curr_threshold; ml_ag->curr_threshold *= 0.5; if (mypid == 0 && 7 < ML_Get_PrintLevel()) printf("%s current eps = %e\n", str, epsilon); epsilon = epsilon * epsilon; ML_Operator_AmalgamateAndDropWeak(Amatrix, num_PDE_eqns, epsilon); Nrows /= num_PDE_eqns; exp_Nrows = Nrows; /* ********************************************************************** */ /* allocate memory for aggr_index, which will contain the decomposition */ /* ********************************************************************** */ nbytes = (Nrows*num_PDE_eqns) * sizeof(int); if ( nbytes > 0 ) { ML_memory_alloc((void**) &aggr_index, nbytes, "ACJ"); if( aggr_index == NULL ) { fprintf( stderr, "*ML*ERR* not enough memory for %d bytes\n" "*ML*ERR* (file %s, line %d)\n", nbytes, __FILE__, __LINE__ ); exit( EXIT_FAILURE ); } } else aggr_index = NULL; for( i=0 ; i<Nrows*num_PDE_eqns ; i++ ) aggr_index[i] = -1; unamalg_bdry = (char *) ML_allocate( sizeof(char) * (Nrows+1) ); if( unamalg_bdry == NULL ) { fprintf( stderr, "*ML*ERR* on proc %d, not enough space for %d bytes\n" "*ML*ERR* (file %s, line %d)\n", mypid, (int)sizeof(char) * Nrows, __FILE__, __LINE__ ); exit( EXIT_FAILURE ); } N_dimensions = ml_ag->N_dimensions; grid_info = (ML_Aggregate_Viz_Stats*) Amatrix->to->Grid->Grid; x_coord = grid_info->x; if (N_dimensions > 1 && x_coord) y_coord = grid_info->y; else y_coord = 0; if (N_dimensions > 2 && x_coord) z_coord = grid_info->z; else z_coord = 0; aggr_count = ML_GetUserPartitions(Amatrix,unamalg_bdry, epsilon, x_coord,y_coord,z_coord, aggr_index,&total_nz); #ifdef ML_MPI MPI_Allreduce( &Nrows, &i, 1, MPI_INT, MPI_SUM, Amatrix->comm->USR_comm ); MPI_Allreduce( &aggr_count, &j, 1, MPI_INT, MPI_SUM, Amatrix->comm->USR_comm ); #else i = Nrows; j = aggr_count; #endif if( mypid == 0 && 7 < ML_Get_PrintLevel() ) { printf("%s Using %d (block) aggregates (globally)\n", str, j ); printf("%s # (block) aggre/ # (block) rows = %8.5f %% ( = %d / %d)\n", str, 100.0*j/i, j, i); } j = ML_gsum_int( aggr_count, comm ); if (mypid == 0 && 7 < ML_Get_PrintLevel()) { printf("%s %d (block) aggregates (globally)\n", str, j ); } /* ********************************************************************** */ /* I allocate room to copy aggr_index and pass this value to the user, */ /* who will be able to analyze and visualize this after the construction */ /* of the levels. This way, the only price we have to pay for stats and */ /* viz is essentially a little bit of memory. */ /* this memory will be cleaned with the object ML_Aggregate ml_ag. */ /* I set the pointers using the ML_Aggregate_Info structure. This is */ /* allocated using ML_Aggregate_Info_Setup(ml,MaxNumLevels) */ /* ********************************************************************** */ if (Amatrix->to->Grid->Grid != NULL) { graph_decomposition = (int *)ML_allocate(sizeof(int)*(Nrows+1)); if( graph_decomposition == NULL ) { fprintf( stderr, "*ML*ERR* Not enough memory for %d bytes\n" "*ML*ERR* (file %s, line %d)\n", (int)sizeof(int)*Nrows, __FILE__, __LINE__ ); exit( EXIT_FAILURE ); } for( i=0 ; i<Nrows ; i++ ) graph_decomposition[i] = aggr_index[i]; aggr_viz_and_stats = (ML_Aggregate_Viz_Stats *) (Amatrix->to->Grid->Grid); aggr_viz_and_stats->graph_decomposition = graph_decomposition; aggr_viz_and_stats->Nlocal = Nrows; aggr_viz_and_stats->Naggregates = aggr_count; aggr_viz_and_stats->local_or_global = ML_LOCAL_INDICES; aggr_viz_and_stats->is_filled = ML_YES; aggr_viz_and_stats->Amatrix = Amatrix; } /* ********************************************************************** */ /* take the decomposition as created by METIS and form the aggregates */ /* ********************************************************************** */ total_nz = ML_Comm_GsumInt( comm, total_nz); i = ML_Comm_GsumInt( comm, Nrows); if ( mypid == 0 && 7 < ML_Get_PrintLevel()) printf("%s Total (block) nnz = %d ( = %5.2f/(block)row)\n", str, total_nz,1.0*total_nz/i); if ( ml_ag->operator_complexity == 0.0 ) { ml_ag->fine_complexity = total_nz; ml_ag->operator_complexity = total_nz; } else ml_ag->operator_complexity += total_nz; /* fix aggr_index for num_PDE_eqns > 1 */ for (i = Nrows - 1; i >= 0; i-- ) { for (j = num_PDE_eqns-1; j >= 0; j--) { aggr_index[i*num_PDE_eqns+j] = aggr_index[i]; } } if ( mypid == 0 && 8 < ML_Get_PrintLevel()) { printf("Calling ML_Operator_UnAmalgamateAndDropWeak\n"); fflush(stdout); } ML_Operator_UnAmalgamateAndDropWeak(Amatrix, num_PDE_eqns, epsilon); Nrows *= num_PDE_eqns; exp_Nrows *= num_PDE_eqns; /* count the size of each aggregate */ aggr_cnt_array = (int *) ML_allocate(sizeof(int)*(aggr_count+1)); for (i = 0; i < aggr_count ; i++) aggr_cnt_array[i] = 0; for (i = 0; i < exp_Nrows; i++) { if (aggr_index[i] >= 0) { if( aggr_index[i] >= aggr_count ) { fprintf( stderr, "*ML*WRN* on process %d, something weird happened...\n" "*ML*WRN* node %d belong to aggregate %d (#aggr = %d)\n" "*ML*WRN* (file %s, line %d)\n", comm->ML_mypid, i, aggr_index[i], aggr_count, __FILE__, __LINE__ ); } else { aggr_cnt_array[aggr_index[i]]++; } } } /* ============================================================= */ /* Form tentative prolongator */ /* ============================================================= */ Ncoarse = aggr_count; /* ============================================================= */ /* check and copy aggr_index */ /* ------------------------------------------------------------- */ level = ml_ag->cur_level; nbytes = (Nrows+1) * sizeof( int ); ML_memory_alloc((void**) &(ml_ag->aggr_info[level]), nbytes, "AGl"); count = aggr_count; for ( i = 0; i < Nrows; i+=num_PDE_eqns ) { if ( aggr_index[i] >= 0 ) { for ( j = 0; j < num_PDE_eqns; j++ ) ml_ag->aggr_info[level][i+j] = aggr_index[i]; if (aggr_index[i] >= count) count = aggr_index[i] + 1; } /*else *{ * printf("%d : CoarsenMIS error : aggr_index[%d] < 0\n", * mypid,i); * exit(1); *}*/ } ml_ag->aggr_count[level] = count; /* for relaxing boundary points */ /* ============================================================= */ /* set up the new operator */ /* ------------------------------------------------------------- */ new_Nrows = Nrows; exp_Ncoarse = Nrows; for ( i = 0; i < new_Nrows; i++ ) { if ( aggr_index[i] >= exp_Ncoarse ) { printf("*ML*WRN* index out of bound %d = %d(%d)\n", i, aggr_index[i], exp_Ncoarse); } } nbytes = ( new_Nrows+1 ) * sizeof(int); ML_memory_alloc((void**)&(new_ia), nbytes, "AIA"); nbytes = ( new_Nrows+1) * nullspace_dim * sizeof(int); ML_memory_alloc((void**)&(new_ja), nbytes, "AJA"); nbytes = ( new_Nrows+1) * nullspace_dim * sizeof(double); ML_memory_alloc((void**)&(new_val), nbytes, "AVA"); for ( i = 0; i < new_Nrows*nullspace_dim; i++ ) new_val[i] = 0.0; /* ------------------------------------------------------------- */ /* set up the space for storing the new null space */ /* ------------------------------------------------------------- */ nbytes = (Ncoarse+1) * nullspace_dim * nullspace_dim * sizeof(double); ML_memory_alloc((void**)&(new_null),nbytes,"AGr"); if( new_null == NULL ) { fprintf( stderr, "*ML*ERR* on process %d, not enough memory for %d bytes\n" "*ML*ERR* (file %s, line %d)\n", mypid, nbytes, __FILE__, __LINE__ ); exit( EXIT_FAILURE ); } for (i = 0; i < Ncoarse*nullspace_dim*nullspace_dim; i++) new_null[i] = 0.0; /* ------------------------------------------------------------- */ /* initialize the row pointer for the CSR prolongation operator */ /* (each row will have at most nullspace_dim nonzero entries) */ /* ------------------------------------------------------------- */ for (i = 0; i <= Nrows; i++) new_ia[i] = i * nullspace_dim; /* trying this when a Dirichlet row is taken out */ j = 0; new_ia[0] = 0; for (i = 0; i < Nrows; i++) { if (aggr_index[i] != -1) j += nullspace_dim; new_ia[i+1] = j; } /* ------------------------------------------------------------- */ /* generate an array to store which aggregate has which rows.Then*/ /* loop through the rows of A checking which aggregate each row */ /* is in, and adding it to the appropriate spot in rows_in_aggs */ /* ------------------------------------------------------------- */ ML_memory_alloc((void**)&rows_in_aggs,aggr_count*sizeof(int*),"MLs"); for (i = 0; i < aggr_count; i++) { nbytes = aggr_cnt_array[i]+1; rows_in_aggs[i] = (int *) ML_allocate(nbytes*sizeof(int)); aggr_cnt_array[i] = 0; if (rows_in_aggs[i] == NULL) { printf("*ML*ERR* couldn't allocate memory in CoarsenMETIS\n"); exit(1); } } for (i = 0; i < exp_Nrows; i+=num_PDE_eqns) { if ( aggr_index[i] >= 0 && aggr_index[i] < aggr_count) { for (j = 0; j < num_PDE_eqns; j++) { index = aggr_cnt_array[aggr_index[i]]++; rows_in_aggs[aggr_index[i]][index] = i + j; } } } /* ------------------------------------------------------------- */ /* allocate work arrays for QR factorization */ /* work and lwork are needed for lapack's QR routine. These */ /* settings seemed easiest since I don't quite understand */ /* what they do, but may want to do something better here later */ /* ------------------------------------------------------------- */ max_agg_size = 0; for (i = 0; i < aggr_count; i++) { if (aggr_cnt_array[i] > max_agg_size) max_agg_size = aggr_cnt_array[i]; } nbytes = max_agg_size * nullspace_dim * sizeof(double); ML_memory_alloc((void**)&qr_tmp, nbytes, "AGu"); nbytes = nullspace_dim * sizeof(double); ML_memory_alloc((void**)&tmp_vect, nbytes, "AGv"); lwork = nullspace_dim; nbytes = nullspace_dim * sizeof(double); ML_memory_alloc((void**)&work, nbytes, "AGw"); /* ------------------------------------------------------------- */ /* perform block QR decomposition */ /* ------------------------------------------------------------- */ for (i = 0; i < aggr_count; i++) { /* ---------------------------------------------------------- */ /* set up the matrix we want to decompose into Q and R: */ /* ---------------------------------------------------------- */ length = aggr_cnt_array[i]; if (nullspace_vect == NULL) { for (j = 0; j < (int) length; j++) { index = rows_in_aggs[i][j]; for (k = 0; k < nullspace_dim; k++) { if ( unamalg_bdry[index/num_PDE_eqns] == 'T') qr_tmp[k*length+j] = 0.; else { if (index % num_PDE_eqns == k) qr_tmp[k*length+j] = 1.0; else qr_tmp[k*length+j] = 0.0; } } } } else { for (k = 0; k < nullspace_dim; k++) { for (j = 0; j < (int) length; j++) { index = rows_in_aggs[i][j]; if ( unamalg_bdry[index/num_PDE_eqns] == 'T') qr_tmp[k*length+j] = 0.; else { if (index < Nrows) { qr_tmp[k*length+j] = nullspace_vect[k*Nrows+index]; } else { fprintf( stderr, "*ML*ERR* in QR\n" "*ML*ERR* (file %s, line %d)\n", __FILE__, __LINE__ ); exit( EXIT_FAILURE ); } } } } } /* ---------------------------------------------------------- */ /* now calculate QR using an LAPACK routine */ /* ---------------------------------------------------------- */ if (aggr_cnt_array[i] >= nullspace_dim) { DGEQRF_F77(&(aggr_cnt_array[i]), &nullspace_dim, qr_tmp, &(aggr_cnt_array[i]), tmp_vect, work, &lwork, &info); if (info != 0) pr_error("ErrOr in CoarsenMIS : dgeqrf returned a non-zero %d %d\n", aggr_cnt_array[i],i); if (work[0] > lwork) { lwork=(int) work[0]; ML_memory_free((void**) &work); ML_memory_alloc((void**) &work, sizeof(double)*lwork, "AGx"); } else lwork=(int) work[0]; /* ---------------------------------------------------------- */ /* the upper triangle of qr_tmp is now R, so copy that into */ /* the new nullspace */ /* ---------------------------------------------------------- */ for (j = 0; j < nullspace_dim; j++) for (k = j; k < nullspace_dim; k++) new_null[i*nullspace_dim+j+k*Ncoarse*nullspace_dim] = qr_tmp[j+aggr_cnt_array[i]*k]; /* ---------------------------------------------------------- */ /* to get this block of P, need to run qr_tmp through another */ /* LAPACK function: */ /* ---------------------------------------------------------- */ if ( aggr_cnt_array[i] < nullspace_dim ){ printf("Error in dorgqr on %d row (dims are %d, %d)\n",i,aggr_cnt_array[i], nullspace_dim); printf("ERROR : performing QR on a MxN matrix where M<N.\n"); } DORGQR_F77(&(aggr_cnt_array[i]), &nullspace_dim, &nullspace_dim, qr_tmp, &(aggr_cnt_array[i]), tmp_vect, work, &lwork, &info); if (info != 0) { printf("Error in dorgqr on %d row (dims are %d, %d)\n",i,aggr_cnt_array[i], nullspace_dim); pr_error("Error in CoarsenMIS: dorgqr returned a non-zero\n"); } if (work[0] > lwork) { lwork=(int) work[0]; ML_memory_free((void**) &work); ML_memory_alloc((void**) &work, sizeof(double)*lwork, "AGy"); } else lwork=(int) work[0]; /* ---------------------------------------------------------- */ /* now copy Q over into the appropriate part of P: */ /* The rows of P get calculated out of order, so I assume the */ /* Q is totally dense and use what I know of how big each Q */ /* will be to determine where in ia, ja, etc each nonzero in */ /* Q belongs. If I did not assume this, I would have to keep */ /* all of P in memory in order to determine where each entry */ /* should go */ /* ---------------------------------------------------------- */ for (j = 0; j < aggr_cnt_array[i]; j++) { index = rows_in_aggs[i][j]; if ( index < Nrows ) { index3 = new_ia[index]; for (k = 0; k < nullspace_dim; k++) { new_ja [index3+k] = i * nullspace_dim + k; new_val[index3+k] = qr_tmp[ k*aggr_cnt_array[i]+j]; } } else { fprintf( stderr, "*ML*ERR* in QR: index out of bounds (%d - %d)\n", index, Nrows ); } } } else { /* We have a small aggregate such that the QR factorization can not */ /* be performed. Instead let us copy the null space from the fine */ /* into the coarse grid nullspace and put the identity for the */ /* prolongator???? */ for (j = 0; j < nullspace_dim; j++) for (k = 0; k < nullspace_dim; k++) new_null[i*nullspace_dim+j+k*Ncoarse*nullspace_dim] = qr_tmp[j+aggr_cnt_array[i]*k]; for (j = 0; j < aggr_cnt_array[i]; j++) { index = rows_in_aggs[i][j]; index3 = new_ia[index]; for (k = 0; k < nullspace_dim; k++) { new_ja [index3+k] = i * nullspace_dim + k; if (k == j) new_val[index3+k] = 1.; else new_val[index3+k] = 0.; } } } } ML_Aggregate_Set_NullSpace(ml_ag, num_PDE_eqns, nullspace_dim, new_null, Ncoarse*nullspace_dim); ML_memory_free( (void **) &new_null); /* ------------------------------------------------------------- */ /* set up the csr_data data structure */ /* ------------------------------------------------------------- */ ML_memory_alloc((void**) &csr_data, sizeof(struct ML_CSR_MSRdata),"CSR"); csr_data->rowptr = new_ia; csr_data->columns = new_ja; csr_data->values = new_val; ML_Operator_Set_ApplyFuncData( *Pmatrix, nullspace_dim*Ncoarse, Nrows, csr_data, Nrows, NULL, 0); (*Pmatrix)->data_destroy = ML_CSR_MSR_ML_memorydata_Destroy; (*Pmatrix)->getrow->pre_comm = ML_CommInfoOP_Create(); (*Pmatrix)->max_nz_per_row = 1; ML_Operator_Set_Getrow((*Pmatrix), Nrows, CSR_getrow); ML_Operator_Set_ApplyFunc((*Pmatrix), CSR_matvec); (*Pmatrix)->max_nz_per_row = 1; /* this must be set so that the hierarchy generation does not abort early in adaptive SA */ (*Pmatrix)->num_PDEs = nullspace_dim; /* ------------------------------------------------------------- */ /* clean up */ /* ------------------------------------------------------------- */ ML_free(unamalg_bdry); ML_memory_free((void**)&aggr_index); ML_free(aggr_cnt_array); for (i = 0; i < aggr_count; i++) ML_free(rows_in_aggs[i]); ML_memory_free((void**)&rows_in_aggs); ML_memory_free((void**)&qr_tmp); ML_memory_free((void**)&tmp_vect); ML_memory_free((void**)&work); aggr_curr = aggr_head; while ( aggr_curr != NULL ) { supernode = aggr_curr; aggr_curr = aggr_curr->next; if ( supernode->length > 0 ) ML_free( supernode->list ); ML_free( supernode ); } return Ncoarse*nullspace_dim; } /* ML_Aggregate_CoarsenUser */
int main(int argc, char *argv[]) { int num_PDE_eqns=1, N_levels=3, nsmooth=2; int leng, level, N_grid_pts, coarsest_level; int leng1,leng2; /* See Aztec User's Guide for more information on the */ /* variables that follow. */ int proc_config[AZ_PROC_SIZE], options[AZ_OPTIONS_SIZE]; double params[AZ_PARAMS_SIZE], status[AZ_STATUS_SIZE]; /* data structure for matrix corresponding to the fine grid */ double *val = NULL, *xxx, *rhs, solve_time, setup_time, start_time; AZ_MATRIX *Amat; AZ_PRECOND *Pmat = NULL; ML *ml; FILE *fp; int i, j, Nrigid, *garbage, nblocks=0, *blocks = NULL, *block_pde=NULL; struct AZ_SCALING *scaling; ML_Aggregate *ag; double *mode, *rigid=NULL, alpha; char filename[80]; int one = 1; int proc,nprocs; char pathfilename[100]; #ifdef ML_MPI MPI_Init(&argc,&argv); /* get number of processors and the name of this processor */ AZ_set_proc_config(proc_config, MPI_COMM_WORLD); proc = proc_config[AZ_node]; nprocs = proc_config[AZ_N_procs]; #else AZ_set_proc_config(proc_config, AZ_NOT_MPI); proc = 0; nprocs = 1; #endif if (proc_config[AZ_node] == 0) { sprintf(pathfilename,"%s/inputfile",argv[1]); ML_Reader_ReadInput(pathfilename, &context); } else context = (struct reader_context *) ML_allocate(sizeof(struct reader_context)); AZ_broadcast((char *) context, sizeof(struct reader_context), proc_config, AZ_PACK); AZ_broadcast((char *) NULL , 0 , proc_config, AZ_SEND); N_levels = context->N_levels; printf("N_levels %d\n",N_levels); nsmooth = context->nsmooth; num_PDE_eqns = context->N_dofPerNode; printf("num_PDE_eqns %d\n",num_PDE_eqns); ML_Set_PrintLevel(context->output_level); /* read in the number of matrix equations */ leng = 0; if (proc_config[AZ_node] == 0) { sprintf(pathfilename,"%s/data_matrix.txt",argv[1]); fp=fopen(pathfilename,"r"); if (fp==NULL) { printf("**ERR** couldn't open file data_matrix.txt\n"); exit(1); } fscanf(fp,"%d",&leng); fclose(fp); } leng = AZ_gsum_int(leng, proc_config); N_grid_pts=leng/num_PDE_eqns; /* initialize the list of global indices. NOTE: the list of global */ /* indices must be in ascending order so that subsequent calls to */ /* AZ_find_index() will function properly. */ #if 0 if (proc_config[AZ_N_procs] == 1) i = AZ_linear; else i = AZ_file; #endif i = AZ_linear; /* cannot use AZ_input_update for variable blocks (forgot why, but debugged through it)*/ /* make a linear distribution of the matrix */ /* if the linear distribution does not align with the blocks, */ /* this is corrected in ML_AZ_Reader_ReadVariableBlocks */ leng1 = leng/nprocs; leng2 = leng-leng1*nprocs; if (proc >= leng2) { leng2 += (proc*leng1); } else { leng1++; leng2 = proc*leng1; } N_update = leng1; update = (int*)AZ_allocate((N_update+1)*sizeof(int)); if (update==NULL) { (void) fprintf (stderr, "Not enough space to allocate 'update'\n"); fflush(stderr); exit(EXIT_FAILURE); } for (i=0; i<N_update; i++) update[i] = i+leng2; #if 0 /* debug */ printf("proc %d N_update %d\n",proc_config[AZ_node],N_update); fflush(stdout); #endif sprintf(pathfilename,"%s/data_vblocks.txt",argv[1]); ML_AZ_Reader_ReadVariableBlocks(pathfilename,&nblocks,&blocks,&block_pde, &N_update,&update,proc_config); #if 0 /* debug */ printf("proc %d N_update %d\n",proc_config[AZ_node],N_update); fflush(stdout); #endif sprintf(pathfilename,"%s/data_matrix.txt",argv[1]); AZ_input_msr_matrix(pathfilename,update, &val, &bindx, N_update, proc_config); /* This code is to fix things up so that we are sure we have */ /* all blocks (including the ghost nodes) the same size. */ /* not sure, whether this is a good idea with variable blocks */ /* the examples inpufiles (see top of this file) don't need it */ /* anyway */ /* AZ_block_MSR(&bindx, &val, N_update, num_PDE_eqns, update); */ AZ_transform_norowreordering(proc_config, &external, bindx, val, update, &update_index, &extern_index, &data_org, N_update, 0, 0, 0, &cpntr, AZ_MSR_MATRIX); Amat = AZ_matrix_create( leng ); AZ_set_MSR(Amat, bindx, val, data_org, 0, NULL, AZ_LOCAL); Amat->matrix_type = data_org[AZ_matrix_type]; data_org[AZ_N_rows] = data_org[AZ_N_internal] + data_org[AZ_N_border]; start_time = AZ_second(); options[AZ_scaling] = AZ_none; ML_Create(&ml, N_levels); /* set up discretization matrix and matrix vector function */ AZ_ML_Set_Amat(ml, 0, N_update, N_update, Amat, proc_config); ML_Set_ResidualOutputFrequency(ml, context->output); ML_Set_Tolerance(ml, context->tol); ML_Aggregate_Create( &ag ); if (ML_strcmp(context->agg_coarsen_scheme,"Mis") == 0) { ML_Aggregate_Set_CoarsenScheme_MIS(ag); } else if (ML_strcmp(context->agg_coarsen_scheme,"Uncoupled") == 0) { ML_Aggregate_Set_CoarsenScheme_Uncoupled(ag); } else if (ML_strcmp(context->agg_coarsen_scheme,"Coupled") == 0) { ML_Aggregate_Set_CoarsenScheme_Coupled(ag); } else if (ML_strcmp(context->agg_coarsen_scheme,"Metis") == 0) { ML_Aggregate_Set_CoarsenScheme_METIS(ag); for (i=0; i<N_levels; i++) ML_Aggregate_Set_NodesPerAggr(ml,ag,i,9); } else if (ML_strcmp(context->agg_coarsen_scheme,"VBMetis") == 0) { /* when no blocks read, use standard metis assuming constant block sizes */ if (!blocks) ML_Aggregate_Set_CoarsenScheme_METIS(ag); else { ML_Aggregate_Set_CoarsenScheme_VBMETIS(ag); ML_Aggregate_Set_Vblocks_CoarsenScheme_VBMETIS(ag,0,N_levels,nblocks, blocks,block_pde,N_update); } for (i=0; i<N_levels; i++) ML_Aggregate_Set_NodesPerAggr(ml,ag,i,9); } else { printf("**ERR** ML: Unknown aggregation scheme %s\n",context->agg_coarsen_scheme); exit(-1); } ML_Aggregate_Set_DampingFactor(ag, context->agg_damping); ML_Aggregate_Set_MaxCoarseSize( ag, context->maxcoarsesize); ML_Aggregate_Set_Threshold(ag, context->agg_thresh); if (ML_strcmp(context->agg_spectral_norm,"Calc") == 0) { ML_Set_SpectralNormScheme_Calc(ml); } else if (ML_strcmp(context->agg_spectral_norm,"Anorm") == 0) { ML_Set_SpectralNormScheme_Anorm(ml); } else { printf("**WRN** ML: Unknown spectral norm scheme %s\n",context->agg_spectral_norm); } /* read in the rigid body modes */ Nrigid = 0; if (proc_config[AZ_node] == 0) { sprintf(filename,"data_nullsp%d.txt",Nrigid); sprintf(pathfilename,"%s/%s",argv[1],filename); while( (fp = fopen(pathfilename,"r")) != NULL) { fclose(fp); Nrigid++; sprintf(filename,"data_nullsp%d.txt",Nrigid); sprintf(pathfilename,"%s/%s",argv[1],filename); } } Nrigid = AZ_gsum_int(Nrigid,proc_config); if (Nrigid != 0) { rigid = (double *) ML_allocate( sizeof(double)*Nrigid*(N_update+1) ); if (rigid == NULL) { printf("Error: Not enough space for rigid body modes\n"); } } /* Set rhs */ sprintf(pathfilename,"%s/data_rhs.txt",argv[1]); fp = fopen(pathfilename,"r"); if (fp == NULL) { rhs=(double *)ML_allocate(leng*sizeof(double)); if (proc_config[AZ_node] == 0) printf("taking linear vector for rhs\n"); for (i = 0; i < N_update; i++) rhs[i] = (double) update[i]; } else { fclose(fp); if (proc_config[AZ_node] == 0) printf("reading rhs from a file\n"); AZ_input_msr_matrix(pathfilename, update, &rhs, &garbage, N_update, proc_config); } AZ_reorder_vec(rhs, data_org, update_index, NULL); for (i = 0; i < Nrigid; i++) { sprintf(filename,"data_nullsp%d.txt",i); sprintf(pathfilename,"%s/%s",argv[1],filename); AZ_input_msr_matrix(pathfilename, update, &mode, &garbage, N_update, proc_config); AZ_reorder_vec(mode, data_org, update_index, NULL); #if 0 /* test the given rigid body mode, output-vector should be ~0 */ Amat->matvec(mode, rigid, Amat, proc_config); for (j = 0; j < N_update; j++) printf("this is %d %e\n",j,rigid[j]); #endif for (j = 0; j < i; j++) { alpha = -AZ_gdot(N_update, mode, &(rigid[j*N_update]), proc_config)/ AZ_gdot(N_update, &(rigid[j*N_update]), &(rigid[j*N_update]), proc_config); DAXPY_F77(&N_update, &alpha, &(rigid[j*N_update]), &one, mode, &one); } /* rhs orthogonalization */ alpha = -AZ_gdot(N_update, mode, rhs, proc_config)/ AZ_gdot(N_update, mode, mode, proc_config); DAXPY_F77(&N_update, &alpha, mode, &one, rhs, &one); for (j = 0; j < N_update; j++) rigid[i*N_update+j] = mode[j]; free(mode); free(garbage); } for (j = 0; j < Nrigid; j++) { alpha = -AZ_gdot(N_update, rhs, &(rigid[j*N_update]), proc_config)/ AZ_gdot(N_update, &(rigid[j*N_update]), &(rigid[j*N_update]), proc_config); DAXPY_F77(&N_update, &alpha, &(rigid[j*N_update]), &one, rhs, &one); } #if 0 /* for testing the default nullsp */ ML_Aggregate_Set_NullSpace(ag, num_PDE_eqns, 6, NULL, N_update); #else if (Nrigid != 0) { ML_Aggregate_Set_NullSpace(ag, num_PDE_eqns, Nrigid, rigid, N_update); } #endif if (rigid) ML_free(rigid); ag->keep_agg_information = 1; coarsest_level = ML_Gen_MGHierarchy_UsingAggregation(ml, 0, ML_INCREASING, ag); coarsest_level--; if ( proc_config[AZ_node] == 0 ) printf("Coarse level = %d \n", coarsest_level); #if 0 /* set up smoothers */ if (!blocks) blocks = (int *) ML_allocate(sizeof(int)*N_update); #endif for (level = 0; level < coarsest_level; level++) { num_PDE_eqns = ml->Amat[level].num_PDEs; /* Sparse approximate inverse smoother that acutally does both */ /* pre and post smoothing. */ if (ML_strcmp(context->smoother,"Parasails") == 0) { ML_Gen_Smoother_ParaSails(ml , level, ML_PRESMOOTHER, nsmooth, parasails_sym, parasails_thresh, parasails_nlevels, parasails_filter, (int) parasails_loadbal, parasails_factorized); } /* This is the symmetric Gauss-Seidel smoothing that we usually use. */ /* In parallel, it is not a true Gauss-Seidel in that each processor */ /* does a Gauss-Seidel on its local submatrix independent of the */ /* other processors. */ else if (ML_strcmp(context->smoother,"GaussSeidel") == 0) { ML_Gen_Smoother_GaussSeidel(ml , level, ML_BOTH, nsmooth,1.); } else if (ML_strcmp(context->smoother,"SymGaussSeidel") == 0) { ML_Gen_Smoother_SymGaussSeidel(ml , level, ML_BOTH, nsmooth,1.); } else if (ML_strcmp(context->smoother,"Poly") == 0) { ML_Gen_Smoother_Cheby(ml, level, ML_BOTH, 30., nsmooth); } else if (ML_strcmp(context->smoother,"BlockGaussSeidel") == 0) { ML_Gen_Smoother_BlockGaussSeidel(ml , level, ML_BOTH, nsmooth,1., num_PDE_eqns); } else if (ML_strcmp(context->smoother,"VBSymGaussSeidel") == 0) { if (blocks) ML_free(blocks); if (block_pde) ML_free(block_pde); blocks = NULL; block_pde = NULL; nblocks = 0; ML_Aggregate_Get_Vblocks_CoarsenScheme_VBMETIS(ag,level,N_levels,&nblocks, &blocks,&block_pde); if (blocks==NULL) ML_Gen_Blocks_Aggregates(ag, level, &nblocks, &blocks); ML_Gen_Smoother_VBlockSymGaussSeidel(ml , level, ML_BOTH, nsmooth,1., nblocks, blocks); } /* This is a true Gauss Seidel in parallel. This seems to work for */ /* elasticity problems. However, I don't believe that this is very */ /* efficient in parallel. */ /* nblocks = ml->Amat[level].invec_leng; for (i =0; i < nblocks; i++) blocks[i] = i; ML_Gen_Smoother_VBlockSymGaussSeidelSequential(ml , level, ML_PRESMOOTHER, nsmooth, 1., nblocks, blocks); ML_Gen_Smoother_VBlockSymGaussSeidelSequential(ml, level, ML_POSTSMOOTHER, nsmooth, 1., nblocks, blocks); */ /* Jacobi Smoothing */ else if (ML_strcmp(context->smoother,"Jacobi") == 0) { ML_Gen_Smoother_Jacobi(ml , level, ML_PRESMOOTHER, nsmooth,.4); ML_Gen_Smoother_Jacobi(ml , level, ML_POSTSMOOTHER, nsmooth,.4); } /* This does a block Gauss-Seidel (not true GS in parallel) */ /* where each processor has 'nblocks' blocks. */ /* */ else if (ML_strcmp(context->smoother,"Metis") == 0) { if (blocks) ML_free(blocks); if (block_pde) ML_free(block_pde); nblocks = 250; ML_Gen_Blocks_Metis(ml, level, &nblocks, &blocks); ML_Gen_Smoother_VBlockSymGaussSeidel(ml , level, ML_BOTH, nsmooth,1., nblocks, blocks); } else { printf("unknown smoother %s\n",context->smoother); exit(1); } } /* set coarse level solver */ nsmooth = context->coarse_its; /* Sparse approximate inverse smoother that acutally does both */ /* pre and post smoothing. */ if (ML_strcmp(context->coarse_solve,"Parasails") == 0) { ML_Gen_Smoother_ParaSails(ml , coarsest_level, ML_PRESMOOTHER, nsmooth, parasails_sym, parasails_thresh, parasails_nlevels, parasails_filter, (int) parasails_loadbal, parasails_factorized); } else if (ML_strcmp(context->coarse_solve,"GaussSeidel") == 0) { ML_Gen_Smoother_GaussSeidel(ml , coarsest_level, ML_BOTH, nsmooth,1.); } else if (ML_strcmp(context->coarse_solve,"Poly") == 0) { ML_Gen_Smoother_Cheby(ml, coarsest_level, ML_BOTH, 30., nsmooth); } else if (ML_strcmp(context->coarse_solve,"SymGaussSeidel") == 0) { ML_Gen_Smoother_SymGaussSeidel(ml , coarsest_level, ML_BOTH, nsmooth,1.); } else if (ML_strcmp(context->coarse_solve,"BlockGaussSeidel") == 0) { ML_Gen_Smoother_BlockGaussSeidel(ml, coarsest_level, ML_BOTH, nsmooth,1., num_PDE_eqns); } else if (ML_strcmp(context->coarse_solve,"Aggregate") == 0) { if (blocks) ML_free(blocks); if (block_pde) ML_free(block_pde); ML_Gen_Blocks_Aggregates(ag, coarsest_level, &nblocks, &blocks); ML_Gen_Smoother_VBlockSymGaussSeidel(ml , coarsest_level, ML_BOTH, nsmooth,1., nblocks, blocks); } else if (ML_strcmp(context->coarse_solve,"Jacobi") == 0) { ML_Gen_Smoother_Jacobi(ml , coarsest_level, ML_BOTH, nsmooth,.5); } else if (ML_strcmp(context->coarse_solve,"Metis") == 0) { if (blocks) ML_free(blocks); if (block_pde) ML_free(block_pde); nblocks = 250; ML_Gen_Blocks_Metis(ml, coarsest_level, &nblocks, &blocks); ML_Gen_Smoother_VBlockSymGaussSeidel(ml , coarsest_level, ML_BOTH, nsmooth,1., nblocks, blocks); } else if (ML_strcmp(context->coarse_solve,"SuperLU") == 0) { ML_Gen_CoarseSolverSuperLU( ml, coarsest_level); } else if (ML_strcmp(context->coarse_solve,"Amesos") == 0) { ML_Gen_Smoother_Amesos(ml,coarsest_level,ML_AMESOS_KLU,-1, 0.0); } else { printf("unknown coarse grid solver %s\n",context->coarse_solve); exit(1); } ML_Gen_Solver(ml, ML_MGV, 0, coarsest_level); AZ_defaults(options, params); if (ML_strcmp(context->krylov,"Cg") == 0) { options[AZ_solver] = AZ_cg; } else if (ML_strcmp(context->krylov,"Bicgstab") == 0) { options[AZ_solver] = AZ_bicgstab; } else if (ML_strcmp(context->krylov,"Tfqmr") == 0) { options[AZ_solver] = AZ_tfqmr; } else if (ML_strcmp(context->krylov,"Gmres") == 0) { options[AZ_solver] = AZ_gmres; } else { printf("unknown krylov method %s\n",context->krylov); } if (blocks) ML_free(blocks); if (block_pde) ML_free(block_pde); options[AZ_scaling] = AZ_none; options[AZ_precond] = AZ_user_precond; options[AZ_conv] = AZ_r0; options[AZ_output] = 1; options[AZ_max_iter] = context->max_outer_its; options[AZ_poly_ord] = 5; options[AZ_kspace] = 130; params[AZ_tol] = context->tol; options[AZ_output] = context->output; ML_free(context); AZ_set_ML_preconditioner(&Pmat, Amat, ml, options); setup_time = AZ_second() - start_time; xxx = (double *) malloc( leng*sizeof(double)); for (iii = 0; iii < leng; iii++) xxx[iii] = 0.0; /* Set x */ /* there is no initguess supplied with these examples for the moment.... */ fp = fopen("initguessfile","r"); if (fp != NULL) { fclose(fp); if (proc_config[AZ_node]== 0) printf("reading initial guess from file\n"); AZ_input_msr_matrix("data_initguess.txt", update, &xxx, &garbage, N_update, proc_config); options[AZ_conv] = AZ_expected_values; } else if (proc_config[AZ_node]== 0) printf("taking 0 initial guess \n"); AZ_reorder_vec(xxx, data_org, update_index, NULL); /* if Dirichlet BC ... put the answer in */ for (i = 0; i < data_org[AZ_N_internal]+data_org[AZ_N_border]; i++) { if ( (val[i] > .99999999) && (val[i] < 1.0000001)) xxx[i] = rhs[i]; } fp = fopen("AZ_no_multilevel.dat","r"); scaling = AZ_scaling_create(); start_time = AZ_second(); if (fp != NULL) { fclose(fp); options[AZ_precond] = AZ_none; options[AZ_scaling] = AZ_sym_diag; options[AZ_ignore_scaling] = AZ_TRUE; options[AZ_keep_info] = 1; AZ_iterate(xxx, rhs, options, params, status, proc_config, Amat, NULL, scaling); /* options[AZ_pre_calc] = AZ_reuse; options[AZ_conv] = AZ_expected_values; if (proc_config[AZ_node] == 0) printf("\n-------- Second solve with improved convergence test -----\n"); AZ_iterate(xxx, rhs, options, params, status, proc_config, Amat, NULL, scaling); if (proc_config[AZ_node] == 0) printf("\n-------- Third solve with improved convergence test -----\n"); AZ_iterate(xxx, rhs, options, params, status, proc_config, Amat, NULL, scaling); */ } else { options[AZ_keep_info] = 1; AZ_iterate(xxx, rhs, options, params, status, proc_config, Amat, Pmat, scaling); options[AZ_pre_calc] = AZ_reuse; options[AZ_conv] = AZ_expected_values; /* if (proc_config[AZ_node] == 0) printf("\n-------- Second solve with improved convergence test -----\n"); AZ_iterate(xxx, rhs, options, params, status, proc_config, Amat, Pmat, scaling); if (proc_config[AZ_node] == 0) printf("\n-------- Third solve with improved convergence test -----\n"); AZ_iterate(xxx, rhs, options, params, status, proc_config, Amat, Pmat, scaling); */ } solve_time = AZ_second() - start_time; if (proc_config[AZ_node] == 0) printf("Solve time = %e, MG Setup time = %e\n", solve_time, setup_time); if (proc_config[AZ_node] == 0) printf("Printing out a few entries of the solution ...\n"); for (j=0;j<Amat->data_org[AZ_N_internal]+ Amat->data_org[AZ_N_border];j++) if (update[j] == 7) {printf("solution(gid = %d) = %10.4e\n", update[j],xxx[update_index[j]]); fflush(stdout);} j = AZ_gsum_int(7, proc_config); /* sync processors */ for (j=0;j<Amat->data_org[AZ_N_internal]+ Amat->data_org[AZ_N_border];j++) if (update[j] == 23) {printf("solution(gid = %d) = %10.4e\n", update[j],xxx[update_index[j]]); fflush(stdout);} j = AZ_gsum_int(7, proc_config); /* sync processors */ for (j=0;j<Amat->data_org[AZ_N_internal]+ Amat->data_org[AZ_N_border];j++) if (update[j] == 47) {printf("solution(gid = %d) = %10.4e\n", update[j],xxx[update_index[j]]); fflush(stdout);} j = AZ_gsum_int(7, proc_config); /* sync processors */ for (j=0;j<Amat->data_org[AZ_N_internal]+ Amat->data_org[AZ_N_border];j++) if (update[j] == 101) {printf("solution(gid = %d) = %10.4e\n", update[j],xxx[update_index[j]]); fflush(stdout);} j = AZ_gsum_int(7, proc_config); /* sync processors */ for (j=0;j<Amat->data_org[AZ_N_internal]+ Amat->data_org[AZ_N_border];j++) if (update[j] == 171) {printf("solution(gid = %d) = %10.4e\n", update[j],xxx[update_index[j]]); fflush(stdout);} ML_Aggregate_Destroy(&ag); ML_Destroy(&ml); AZ_free((void *) Amat->data_org); AZ_free((void *) Amat->val); AZ_free((void *) Amat->bindx); AZ_free((void *) update); AZ_free((void *) external); AZ_free((void *) extern_index); AZ_free((void *) update_index); AZ_scaling_destroy(&scaling); if (Amat != NULL) AZ_matrix_destroy(&Amat); if (Pmat != NULL) AZ_precond_destroy(&Pmat); free(xxx); free(rhs); #ifdef ML_MPI MPI_Finalize(); #endif return 0; }
// ====================================================================== int ML_Operator_Add2(ML_Operator *A, ML_Operator *B, ML_Operator *C, int matrix_type, double scalarA, double scalarB) { int A_allocated = 0, *A_bindx = NULL, B_allocated = 0, *B_bindx = NULL; double *A_val = NULL, *B_val = NULL, *hashed_vals; int i, A_length, B_length, *hashed_inds; int max_nz_per_row = 0, min_nz_per_row=1e6, j; int hash_val, index_length; int *columns, *rowptr, nz_ptr, hash_used, global_col; double *values; struct ML_CSR_MSRdata *temp; int *A_gids, *B_gids; int max_per_proc; #ifdef ML_WITH_EPETRA int count; #endif if (A->getrow == NULL) pr_error("ML_Operator_Add: A does not have a getrow function.\n"); if (B->getrow == NULL) pr_error("ML_Operator_Add: B does not have a getrow function.\n"); if (A->getrow->Nrows != B->getrow->Nrows) { printf("ML_Operator_Add: Can not add, two matrices do not have the same"); printf(" number of rows %d vs %d",A->getrow->Nrows,B->getrow->Nrows); exit(1); } if (A->invec_leng != B->invec_leng) { printf("ML_Operator_Add: Can not add, two matrices do not have the same"); printf(" number of columns %d vs %d",A->getrow->Nrows,B->getrow->Nrows); exit(1); } /* let's just count some things */ index_length = A->invec_leng + 1; if (A->getrow->pre_comm != NULL) { ML_CommInfoOP_Compute_TotalRcvLength(A->getrow->pre_comm); index_length += A->getrow->pre_comm->total_rcv_length; } if (B->getrow->pre_comm != NULL) { ML_CommInfoOP_Compute_TotalRcvLength(B->getrow->pre_comm); index_length += B->getrow->pre_comm->total_rcv_length; } ML_create_unique_col_id(A->invec_leng, &A_gids, A->getrow->pre_comm, &max_per_proc,A->comm); ML_create_unique_col_id(B->invec_leng, &B_gids, B->getrow->pre_comm, &max_per_proc,B->comm); hashed_inds = (int *) ML_allocate(sizeof(int)*index_length); hashed_vals = (double *) ML_allocate(sizeof(double)*index_length); for (i = 0; i < index_length; i++) hashed_inds[i] = -1; for (i = 0; i < index_length; i++) hashed_vals[i] = 0.; nz_ptr = 0; for (i = 0 ; i < A->getrow->Nrows; i++) { hash_used = 0; ML_get_matrix_row(A, 1, &i, &A_allocated, &A_bindx, &A_val, &A_length, 0); for (j = 0; j < A_length; j++) { global_col = A_gids[A_bindx[j]]; ML_hash_it(global_col, hashed_inds, index_length,&hash_used,&hash_val); hashed_inds[hash_val] = global_col; hashed_vals[hash_val] += scalarA * A_val[j]; A_bindx[j] = hash_val; } ML_get_matrix_row(B, 1, &i, &B_allocated, &B_bindx, &B_val, &B_length, 0); for (j = 0; j < B_length; j++) { global_col = B_gids[B_bindx[j]]; ML_hash_it(global_col, hashed_inds, index_length,&hash_used, &hash_val); hashed_inds[hash_val] = global_col; hashed_vals[hash_val] += scalarB*B_val[j]; B_bindx[j] = hash_val; } for (j = 0; j < A_length; j++) { nz_ptr++; hashed_inds[A_bindx[j]] = -1; hashed_vals[A_bindx[j]] = 0.; } for (j = 0; j < B_length; j++) { if (hashed_inds[B_bindx[j]] != -1) { nz_ptr++; hashed_inds[B_bindx[j]] = -1; hashed_vals[B_bindx[j]] = 0.; } } } nz_ptr++; columns = 0; values = 0; rowptr = (int *) ML_allocate(sizeof(int)*(A->outvec_leng+1)); if (matrix_type == ML_CSR_MATRIX) { columns= (int *) ML_allocate(sizeof(int)*nz_ptr); values = (double *) ML_allocate(sizeof(double)*nz_ptr); } #ifdef ML_WITH_EPETRA else if (matrix_type == ML_EpetraCRS_MATRIX) { columns= (int *) ML_allocate(sizeof(int)*(index_length+1)); values = (double *) ML_allocate(sizeof(double)*(index_length+1)); } #endif else { pr_error("ML_Operator_Add: Unknown matrix type\n"); } nz_ptr = 0; rowptr[0] = 0; for (i = 0 ; i < A->getrow->Nrows; i++) { hash_used = 0; ML_get_matrix_row(A, 1, &i, &A_allocated, &A_bindx, &A_val, &A_length, 0); for (j = 0; j < A_length; j++) { global_col = A_gids[A_bindx[j]]; ML_hash_it(global_col, hashed_inds, index_length,&hash_used, &hash_val); hashed_inds[hash_val] = global_col; hashed_vals[hash_val] += scalarA * A_val[j]; A_bindx[j] = hash_val; } ML_get_matrix_row(B, 1, &i, &B_allocated, &B_bindx, &B_val, &B_length, 0); for (j = 0; j < B_length; j++) { global_col = B_gids[B_bindx[j]]; ML_hash_it(global_col, hashed_inds, index_length,&hash_used, &hash_val); hashed_inds[hash_val] = global_col; hashed_vals[hash_val] += scalarB*B_val[j]; B_bindx[j] = hash_val; } #ifdef ML_WITH_EPETRA if (matrix_type == ML_EpetraCRS_MATRIX) { for (j = 0; j < A_length; j++) { columns[j] = hashed_inds[A_bindx[j]]; values[j] = hashed_vals[A_bindx[j]]; nz_ptr++; hashed_inds[A_bindx[j]] = -1; hashed_vals[A_bindx[j]] = 0.; } count = A_length; for (j = 0; j < B_length; j++) { if (hashed_inds[B_bindx[j]] != -1) { columns[count] = hashed_inds[B_bindx[j]]; values[count++] = hashed_vals[B_bindx[j]]; nz_ptr++; hashed_inds[B_bindx[j]] = -1; hashed_vals[B_bindx[j]] = 0.; } } ML_Epetra_CRSinsert(C,i,columns,values,count); } else { #endif for (j = 0; j < A_length; j++) { columns[nz_ptr] = hashed_inds[A_bindx[j]]; values[nz_ptr] = hashed_vals[A_bindx[j]]; nz_ptr++; hashed_inds[A_bindx[j]] = -1; hashed_vals[A_bindx[j]] = 0.; } for (j = 0; j < B_length; j++) { if (hashed_inds[B_bindx[j]] != -1) { columns[nz_ptr] = hashed_inds[B_bindx[j]]; values[nz_ptr] = hashed_vals[B_bindx[j]]; nz_ptr++; hashed_inds[B_bindx[j]] = -1; hashed_vals[B_bindx[j]] = 0.; } } #ifdef ML_WITH_EPETRA } #endif rowptr[i+1] = nz_ptr; j = rowptr[i+1] - rowptr[i]; if (j > max_nz_per_row) max_nz_per_row = j; if (j < min_nz_per_row && j>0) min_nz_per_row = j; } if (matrix_type == ML_CSR_MATRIX) { temp = (struct ML_CSR_MSRdata *) ML_allocate(sizeof(struct ML_CSR_MSRdata)); if (temp == NULL) pr_error("ML_Operator_Add: no space for temp\n"); temp->columns = columns; temp->values = values; temp->rowptr = rowptr; ML_Operator_Set_ApplyFuncData(C, B->invec_leng, A->outvec_leng, temp,A->outvec_leng, NULL,0); ML_Operator_Set_Getrow(C, A->outvec_leng, CSR_getrow); ML_Operator_Set_ApplyFunc (C, CSR_matvec); ML_globalcsr2localcsr(C, max_per_proc); C->data_destroy = ML_CSR_MSRdata_Destroy; C->max_nz_per_row = max_nz_per_row; C->min_nz_per_row = min_nz_per_row; C->N_nonzeros = nz_ptr; } #ifdef ML_WITH_EPETRA else { ML_free(rowptr); ML_free(columns); ML_free(values); } #endif ML_free(A_gids); ML_free(B_gids); ML_free(hashed_vals); ML_free(hashed_inds); ML_free(A_val); ML_free(A_bindx); ML_free(B_val); ML_free(B_bindx); return 1; }
void ML_get_matrix_row(ML_Operator *input_matrix, int N_requested_rows, int requested_rows[], int *allocated_space, int **columns, double **values, int row_lengths[], int index) { int i, *mapper, *t1, row; ML_Operator *next; double *t2; void *data; int (*getfunction)(void *,int,int*,int,int*,double*,int*); #ifdef DEBUG2 if (N_requested_rows != 1) { printf("ML_get_matrix_row is currently implemented for only 1 row"); printf(" at a time.\n"); exit(1); } #endif row = requested_rows[0]; #ifdef DEBUG2 if ( (row >= input_matrix->getrow->Nrows) || (row < 0) ) { row_lengths[0] = 0; return; } #endif if (input_matrix->getrow->row_map != NULL) { if (input_matrix->getrow->row_map[row] != -1) row = input_matrix->getrow->row_map[row]; else { row_lengths[0] = 0; ML_avoid_unused_param( (void *) &N_requested_rows); return;} } next = input_matrix->sub_matrix; while ( (next != NULL) && (row < next->getrow->Nrows) ) { input_matrix = next; next = next->sub_matrix; } if (next != NULL) row -= next->getrow->Nrows; data = (void *) input_matrix; getfunction = (int (*)(void *,int,int*,int,int*,double*,int*)) input_matrix->getrow->func_ptr; while(getfunction(data,1,&row,*allocated_space-index, &((*columns)[index]), &((*values)[index]), row_lengths) == 0) { *allocated_space = 2*(*allocated_space) + 1; t1 = (int *) ML_allocate(*allocated_space*sizeof(int )); if (t1 == NULL) { printf("Not enough space to get a matrix row. A row length of \n"); printf("%d Was not sufficient\n",(*allocated_space-1)/2); fflush(stdout); exit(1); } else { for (i = 0; i < index; i++) t1[i] = (*columns)[i]; if (*columns != NULL) ML_free(*columns); *columns = t1; } t2 = (double *) ML_allocate(*allocated_space*sizeof(double)); if (t2 == NULL) { printf("Not enough space to get a matrix row. A row length of \n"); printf("%d Was not sufficient\n",(*allocated_space-1)/2); fflush(stdout); exit(1); } for (i = 0; i < index; i++) t2[i] = (*values)[i]; if (*values != NULL) ML_free(*values); *values = t2; } if ( (input_matrix->getrow->use_loc_glob_map == ML_YES)) { mapper = input_matrix->getrow->loc_glob_map; for (i = 0; i < row_lengths[0]; i++) (*columns)[i+index] = mapper[(*columns)[index+i]]; } }
void ML_getrow_matvec(ML_Operator *matrix, double *vec, int Nvec, double *ovec, int *Novec) { ML_Operator *temp, *temp2, *temp3, *temp4, *tptr; int *cols, i; int allocated, row_length; if (matrix->getrow->func_ptr == NULL) { printf("ML_getrow_matvec: empty object? \n"); exit(1); } temp = ML_Operator_Create(matrix->comm); ML_Operator_Set_1Levels(temp, matrix->from, matrix->from); ML_Operator_Set_ApplyFuncData(temp,1,Nvec,vec,Nvec,NULL,0); ML_Operator_Set_Getrow(temp,Nvec, VECTOR_getrows); temp->max_nz_per_row = 1; temp->N_nonzeros = Nvec; if (matrix->getrow->pre_comm != NULL) { ML_exchange_rows(temp, &temp2, matrix->getrow->pre_comm); } else temp2 = temp; ML_matmat_mult(matrix, temp2, &temp3); if (matrix->getrow->post_comm != NULL) ML_exchange_rows(temp3, &temp4, matrix->getrow->post_comm); else temp4 = temp3; allocated = temp4->getrow->Nrows + 1; cols = (int *) ML_allocate(allocated*sizeof(int)); if (cols == NULL) { printf("no space in ML_getrow_matvec()\n"); exit(1); } for (i = 0; i < temp4->getrow->Nrows; i++) { ML_get_matrix_row(temp4, 1, &i, &allocated , &cols, &ovec, &row_length, i); if (allocated != temp4->getrow->Nrows + 1) printf("memory problems ... we can't reallocate here\n"); } ML_free(cols); if ( *Novec != temp4->getrow->Nrows) { printf("Warning: The length of ML's output vector does not agree with\n"); printf(" the user's length for the output vector (%d vs. %d).\n", *Novec, temp4->getrow->Nrows); printf(" indicate a problem.\n"); } *Novec = temp4->getrow->Nrows; if (matrix->getrow->pre_comm != NULL) { tptr = temp2; while ( (tptr!= NULL) && (tptr->sub_matrix != temp)) tptr = tptr->sub_matrix; if (tptr != NULL) tptr->sub_matrix = NULL; ML_RECUR_CSR_MSRdata_Destroy(temp2); ML_Operator_Destroy(&temp2); } if (matrix->getrow->post_comm != NULL) { tptr = temp4; while ( (tptr!= NULL) && (tptr->sub_matrix != temp3)) tptr = tptr->sub_matrix; if (tptr != NULL) tptr->sub_matrix = NULL; ML_RECUR_CSR_MSRdata_Destroy(temp4); ML_Operator_Destroy(&temp4); } ML_Operator_Destroy(&temp); ML_RECUR_CSR_MSRdata_Destroy(temp3); ML_Operator_Destroy(&temp3); }
char *ML_allocate(unsigned int isize) { /* * Allocate memory and record the event by placing an entry in the * ml_widget_head list. Also recored the size of this entry as well. * * Note: we actually allocate more memory than is requested (7 doubles more). * This additional memory is used to record the 'size' and to mark the * memory with a header and trailer which we can later check to see if * they were overwritten. * */ char *ptr, *header_start, *header_end; struct ml_widget *ml_widget; int *size_ptr, i, size; double *dptr; size = (int) isize; size = size + 7*sizeof(double); ml_widget = (struct ml_widget *) malloc(sizeof(struct ml_widget)); /* THIS MALLOC NEEDS TO STAY A */ /* MALLOC AND NOT AN ML_ALLOCATE */ if (ml_widget == NULL) return(NULL); ptr = (char *) malloc(size); /* THIS MALLOC NEEDS TO STAY A */ /* MALLOC AND NOT AN ML_ALLOCATE */ if (ptr == NULL) { ML_free(ml_widget); /* THIS FREE() NEEDS TO STAY A */ /* FREE AND NOT AN ML_FREE */ return(NULL); } ml_allo_count++; /* put trash in the space to make sure nobody is expecting zeros */ for (i = 0 ; i < size/sizeof(char) ; i++ ) ptr[i] = 'f'; /* record the entry */ ml_widget->order = ml_allo_count; if (size == -7*sizeof(double) ) { printf("allocating 0 space %u (%d)\n",ptr,size); i = 0; size = 1/i; ml_widget = NULL; } ml_widget->size = size - 7*sizeof(double); ml_widget->next = ml_widget_head; ml_widget_head = ml_widget; ml_widget->address = ptr; size_ptr = (int *) ptr; size_ptr[0] = size - 7*sizeof(double); dptr = (double *) ptr; /* mark the header */ header_start = (char *) &(dptr[1]); for (i = 0 ; i < 3*sizeof(double)/sizeof(char) ; i++ ) header_start[i] = 'x'; /* mark the trailer */ header_end = &(ptr[ (size/sizeof(char)) - 1]); header_start = (char *) &(dptr[4]); header_start = & (header_start[(size-7*sizeof(double))/sizeof(char)]); while (header_start <= header_end) { *header_start = 'x'; header_start++; } return( (char *) &(dptr[4]) ); }
char *ML_myrealloc(void *vptr, unsigned int new_size) { struct ml_widget *current, *prev; int i, *iptr, size, *new_size_ptr; char *header_start, *header_end, *ptr; char *data1, *data2, *new_ptr, *new_header_start, *new_header_end; int newmsize, smaller; double *dptr, *new_dptr; ptr = (char *) vptr; if (ptr == NULL) { printf("Trying to realloc a NULL ptr\n"); exit(1); } else { current = ml_widget_head; prev = NULL; data1 = ptr; dptr = (double *) ptr; --dptr; --dptr; --dptr; --dptr; ptr = (char *) dptr; while (current != NULL) { if (current->address == ptr) break; else { prev = current; current = current->next; } } if (current == NULL) { printf("the pointer %u was not found and thus can not be realloc.\n", ptr); exit(1); } else { /* check to see if the header is corrupted */ iptr = (int *) ptr; header_start = (char *) &(dptr[1]); for (i = 0 ; i < 3*sizeof(double)/sizeof(char) ; i++ ) { if (header_start[i] != 'x') { printf("realloc header is corrupted for %u (%d,%d)\n",ptr, current->size,current->order); size = 0; size = 1/size; } /* DO WE CHECK THE TRAILER ???? */ } size = iptr[0]; newmsize = new_size + 7*sizeof(double); new_ptr = (char *) malloc(newmsize); /* THIS MALLOC NEEDS TO STAY A */ /* MALLOC AND NOT AN ML_ALLOCATE */ if (new_ptr == NULL) return(NULL); new_size_ptr = (int *) new_ptr; new_size_ptr[0] = new_size; new_dptr = (double *) new_ptr; data2 = (char *) &(new_dptr[4]); new_header_start = (char *) &(new_dptr[1]); for (i = 0 ; i < 3*sizeof(double)/sizeof(char) ; i++ ) new_header_start[i] = 'x'; new_header_end = &(new_ptr[ (newmsize/sizeof(char)) - 1]); new_header_start = (char *) &(new_dptr[4]); new_header_start= &(new_header_start[new_size/sizeof(char)]); while (new_header_start <= new_header_end) { *new_header_start = 'x'; new_header_start++; } smaller = current->size; if (smaller > new_size ) smaller = new_size; for (i = 0 ; i < smaller; i++) data2[i] = data1[i]; ML_free(dptr); current->size = new_size; current->address = (char *) new_dptr; return( (char *) &(new_dptr[4])); } } }
// ================================================ ====== ==== ==== == = //! Build pi operator described by Bochev, Siefert, Tuminaro, Xu and Zhu (2007). int ML_Epetra::FaceMatrixFreePreconditioner::BuildNullspace(Epetra_MultiVector *& nullspace){ int Nf=FaceRangeMap_->NumMyElements(); /* Pull the coordinates from Teuchos */ double * xcoord=List_.get("x-coordinates",(double*)0); double * ycoord=List_.get("y-coordinates",(double*)0); double * zcoord=List_.get("z-coordinates",(double*)0); dim=(xcoord!=0) + (ycoord!=0) + (zcoord!=0); // Sanity check if(dim!=3){if(!Comm_->MyPID()) printf("ERROR: FaceMatrixFreePreconditioner only works in 3D"); ML_CHK_ERR(-1);} // Build the (unimported) coordinate multivector double **d_coords=new double* [dim]; d_coords[0]=xcoord; d_coords[1]=ycoord; if(dim==3) d_coords[2]=zcoord; Epetra_MultiVector n_coords_domain(View,*NodeDomainMap_,d_coords,dim); Epetra_MultiVector *n_coords; // Import coordinate info if(FaceNode_Matrix_->Importer()){ n_coords=new Epetra_MultiVector(FaceNode_Matrix_->ColMap(),dim); n_coords->PutScalar(0.0); n_coords->Import(n_coords_domain,*FaceNode_Matrix_->Importer(),Add); } else n_coords=&n_coords_domain; // Sanity HAQ - Only works on Hexes if(FaceNode_Matrix_->GlobalMaxNumEntries()!=4) {if(!Comm_->MyPID()) printf("ERROR: FaceMatrixFreePreconditioner only works on Hexes"); ML_CHK_ERR(-2);} // Allocate vector nullspace=new Epetra_MultiVector(*FaceDomainMap_,3); // Fill matrix - NTS this will *NOT* do periodic BCs correctly. double *a=new double[dim]; double *b=new double[dim]; double *c=new double[dim]; for(int i=0;i<Nf;i++){ int Ni,*indices; double *values; FaceNode_Matrix_->ExtractMyRowView(i,Ni,values,indices); if(Ni != 4){ printf("ERROR: Face %d has only %d nodes\n",i,Ni); ML_free(a); ML_free(b); ML_free(c); ML_CHK_ERR(-1); } a[0] = (*n_coords)[0][indices[1]] - (*n_coords)[0][indices[0]]; a[1] = (*n_coords)[1][indices[1]] - (*n_coords)[1][indices[0]]; a[2] = (*n_coords)[2][indices[1]] - (*n_coords)[2][indices[0]]; b[0] = (*n_coords)[0][indices[2]] - (*n_coords)[0][indices[0]]; b[1] = (*n_coords)[1][indices[2]] - (*n_coords)[1][indices[0]]; b[2] = (*n_coords)[2][indices[2]] - (*n_coords)[2][indices[0]]; cross_product(a,b,c); // HAQ - Hardwiring for hexes // HAQ - Absolute value, presuming all hexes are actually pointed the same way. This is a HAQ!!! (*nullspace)[0][i]=ABS(c[0])/6.0; (*nullspace)[1][i]=ABS(c[1])/6.0; (*nullspace)[2][i]=ABS(c[2])/6.0; } /* Cleanup */ if(FaceNode_Matrix_->Importer()) delete n_coords; delete [] a; delete [] b; delete [] c; delete [] d_coords; return 0; }
ML_Operator *user_T_build(struct user_partition *Edge_Partition, struct user_partition *Node_Partition, ML_Operator *Kn_mat, ML_Comm *comm) { int nx, i, ii, jj, horv, Ncols, Nexterns; int *Tmat_bindx; double *Tmat_val; ML_Operator *Tmat; struct ML_CSR_MSRdata *csr_data; struct aztec_context *aztec_context; int global_id; int Nlocal_nodes, Nlocal_edges; int nz_ptr; Nlocal_nodes = Node_Partition->Nlocal; Nlocal_edges = Edge_Partition->Nlocal; nx = (int) sqrt( ((double) Node_Partition->Nglobal) + .00001); Tmat_bindx = (int *) malloc((3*Nlocal_edges+1)*sizeof(int)); Tmat_val = (double *) malloc((3*Nlocal_edges+1)*sizeof(double)); Tmat_bindx[0] = Nlocal_edges + 1; for (i = 0; i < Nlocal_edges; i++) { global_id = (Edge_Partition->my_global_ids)[i]; Tmat_val[i] = 0.0; invindex(global_id, &ii, &jj, nx, &horv); nz_ptr = Tmat_bindx[i]; ii--; if (horv == HORIZONTAL) { if(ii != -1) { Tmat_bindx[nz_ptr] = southwest(ii,jj,nx); Tmat_val[nz_ptr++] = -1.; } Tmat_bindx[nz_ptr] = southeast(ii,jj,nx); Tmat_val[nz_ptr++] = 1.; } else { if (ii == -1) ii = nx-1; Tmat_bindx[nz_ptr] = northwest(ii,jj,nx); Tmat_val[nz_ptr++] = -1.; if (jj != 0) { Tmat_bindx[nz_ptr] = southwest(ii,jj,nx); Tmat_val[nz_ptr++] = 1.;} } Tmat_bindx[i+1] = nz_ptr; } csr_data = (struct ML_CSR_MSRdata *) ML_allocate(sizeof(struct ML_CSR_MSRdata)); csr_data->columns = Tmat_bindx; csr_data->values = Tmat_val; ML_MSR2CSR(csr_data, Nlocal_edges, &Ncols); aztec_context = (struct aztec_context *) Kn_mat->data; Nexterns = (aztec_context->Amat->data_org)[AZ_N_external]; AZ_Tmat_transform2ml(Nexterns, Node_Partition->needed_external_ids, reordered_node_externs, Tmat_bindx, Tmat_val, csr_data->rowptr, Nlocal_nodes, Node_Partition->my_global_ids, comm, Nlocal_edges, &Tmat); ML_free(csr_data); Tmat->data_destroy = ML_CSR_MSRdata_Destroy; ML_CommInfoOP_Clone(&(Tmat->getrow->pre_comm), Kn_mat->getrow->pre_comm); return(Tmat); }
// ================================================ ====== ==== ==== == = // Copied from ml_agg_genP.c static void ML_Init_Aux(ML_Operator* A, Teuchos::ParameterList &List) { int i, j, n, count, num_PDEs, BlockRow, BlockCol; double threshold; int* columns; double* values; int allocated, entries = 0; int N_dimensions; int DiagID; double DiagValue; int** filter; double dist; double *LaplacianDiag; int Nghost; // Boundary exchange the coords double *x_coord=0, *y_coord=0, *z_coord=0; RefMaxwell_SetupCoordinates(A,List,x_coord,y_coord,z_coord); int dim=(x_coord!=0) + (y_coord!=0) + (z_coord!=0); /* Sanity Checks */ if(dim == 0 || ((!x_coord && (y_coord || z_coord)) || (x_coord && !y_coord && z_coord))){ std::cerr<<"Error: Coordinates not defined. This is necessary for aux aggregation (found "<<dim<<" coordinates).\n"; exit(-1); } num_PDEs = A->num_PDEs; N_dimensions = dim; threshold = A->aux_data->threshold; ML_Operator_AmalgamateAndDropWeak(A, num_PDEs, 0.0); n = A->invec_leng; Nghost = ML_CommInfoOP_Compute_TotalRcvLength(A->getrow->pre_comm); LaplacianDiag = (double *) ML_allocate((A->getrow->Nrows+Nghost+1)* sizeof(double)); filter = (int**) ML_allocate(sizeof(int*) * n); allocated = 128; columns = (int *) ML_allocate(allocated * sizeof(int)); values = (double *) ML_allocate(allocated * sizeof(double)); for (i = 0 ; i < n ; ++i) { BlockRow = i; DiagID = -1; DiagValue = 0.0; ML_get_matrix_row(A,1,&i,&allocated,&columns,&values, &entries,0); for (j = 0; j < entries; j++) { BlockCol = columns[j]; if (BlockRow != BlockCol) { dist = 0.0; switch (N_dimensions) { case 3: dist += (z_coord[BlockRow] - z_coord[BlockCol]) * (z_coord[BlockRow] - z_coord[BlockCol]); case 2: dist += (y_coord[BlockRow] - y_coord[BlockCol]) * (y_coord[BlockRow] - y_coord[BlockCol]); case 1: dist += (x_coord[BlockRow] - x_coord[BlockCol]) * (x_coord[BlockRow] - x_coord[BlockCol]); } if (dist == 0.0) { printf("node %d = %e ", i, x_coord[BlockRow]); if (N_dimensions > 1) printf(" %e ", y_coord[BlockRow]); if (N_dimensions > 2) printf(" %e ", z_coord[BlockRow]); printf("\n"); printf("node %d = %e ", j, x_coord[BlockCol]); if (N_dimensions > 1) printf(" %e ", y_coord[BlockCol]); if (N_dimensions > 2) printf(" %e ", z_coord[BlockCol]); printf("\n"); printf("Operator has inlen = %d and outlen = %d\n", A->invec_leng, A->outvec_leng); } dist = 1.0 / dist; DiagValue += dist; } else if (columns[j] == i) { DiagID = j; } } if (DiagID == -1) { fprintf(stderr, "ERROR: matrix has no diagonal!\n" "ERROR: (file %s, line %d)\n", __FILE__, __LINE__); exit(EXIT_FAILURE); } LaplacianDiag[BlockRow] = DiagValue; } if ( A->getrow->pre_comm != NULL ) ML_exchange_bdry(LaplacianDiag,A->getrow->pre_comm,A->getrow->Nrows, A->comm, ML_OVERWRITE,NULL); for (i = 0 ; i < n ; ++i) { BlockRow = i; ML_get_matrix_row(A,1,&i,&allocated,&columns,&values, &entries,0); for (j = 0; j < entries; j++) { BlockCol = columns[j]; if (BlockRow != BlockCol) { dist = 0.0; switch (N_dimensions) { case 3: dist += (z_coord[BlockRow] - z_coord[BlockCol]) * (z_coord[BlockRow] - z_coord[BlockCol]); case 2: dist += (y_coord[BlockRow] - y_coord[BlockCol]) * (y_coord[BlockRow] - y_coord[BlockCol]); case 1: dist += (x_coord[BlockRow] - x_coord[BlockCol]) * (x_coord[BlockRow] - x_coord[BlockCol]); } dist = 1.0 / dist; values[j] = dist; } } count = 0; for (j = 0 ; j < entries ; ++j) { if ( (i != columns[j]) && (values[j]*values[j] < LaplacianDiag[BlockRow]*LaplacianDiag[columns[j]]*threshold*threshold)){ columns[count++] = columns[j]; } } /* insert the rows */ filter[BlockRow] = (int*) ML_allocate(sizeof(int) * (count + 1)); filter[BlockRow][0] = count; for (j = 0 ; j < count ; ++j) filter[BlockRow][j + 1] = columns[j]; } ML_free(columns); ML_free(values); ML_free(LaplacianDiag); ML_Operator_UnAmalgamateAndDropWeak(A, num_PDEs, 0.0); A->aux_data->aux_func_ptr = A->getrow->func_ptr; A->getrow->func_ptr = ML_Aux_Getrow; A->aux_data->filter = filter; A->aux_data->filter_size = n; // Cleanup ML_free(x_coord); ML_free(y_coord); ML_free(z_coord); }