double qFinderDMM::psi(double xx){ try { double psiX = 0.0000; if(xx < 1.0000){ double t1 = 1.0 / xx; psiX = cheb_eval(psi_cs, 22, 2.0*xx-1.0); psiX = -t1 + psiX; } else if(xx < 2.0000){ const double v = xx - 1.0; psiX = cheb_eval(psi_cs, 22, 2.0*v-1.0); } else{ const double t = 8.0/(xx*xx)-1.0; psiX = cheb_eval(apsi_cs, 15, t); psiX += log(xx) - 0.5/xx; } return psiX; } catch(exception& e){ m->errorOut(e, "qFinderDMM", "psi"); exit(1); } }
// Modified Bessel function of order zero. nr_double_t fspecial::i0 (nr_double_t x) { nr_double_t y = fabs (x); nr_double_t val; if (y < 2.0 * sqrt (NR_EPSI)) { val = 1.0; } else if (y <= 3.0) { val = 2.75 + cheb_eval (&bi0_cs, y * y / 4.5 - 1.0); } else if (y <= 8.0) { val = cheb_eval (&ai0_cs, (48.0 / y - 11.0) / 5.0); val = exp (y) * (0.375 + val) / sqrt (y); } else { val = cheb_eval (&ai02_cs, 16.0 / y - 1.0); val = exp (y) * (0.375 + val) / sqrt (y); } return val; }
nr_double_t fspecial::erfc (nr_double_t x) { const nr_double_t ax = fabs (x); nr_double_t val; if (ax <= 1.0) { nr_double_t t = 2.0 * ax - 1.0; val = cheb_eval (&erfc_xlt1_cs, t); } else if (ax <= 5.0) { nr_double_t ex2 = exp (-x * x); nr_double_t t = 0.5 * (ax - 3.0); val = ex2 * cheb_eval (&erfc_x15_cs, t); } else if (ax < 10.0) { nr_double_t ex = exp(-x * x) / ax; nr_double_t t = (2.0 * ax - 15.0) / 5.0; val = ex * cheb_eval (&erfc_x510_cs, t); } else { val = erfc8 (ax); } return (x < 0.0) ? 2.0 - val : val; }
int eval_cheb_at_nodes(FMMData *fmm_data, Vec &val_vec){ FMM_Tree_t* tree=fmm_data->tree; const MPI_Comm* comm=tree->Comm(); int cheb_deg=fmm_data->fmm_mat->ChebDeg(); PetscInt m,n, M,N; PetscErrorCode ierr; // Cheb node points size_t n_coeff3=(cheb_deg+1)*(cheb_deg+2)*(cheb_deg+3)/6; size_t n_nodes3=(cheb_deg+1)*(cheb_deg+1)*(cheb_deg+1); std::vector<double> cheb_node_coord1=pvfmm::cheb_nodes<double>(cheb_deg, 1); int omp_p=omp_get_max_threads(); //int omp_p=1; #pragma omp parallel for for(size_t i=0;i<cheb_node_coord1.size();i++){ cheb_node_coord1[i]=cheb_node_coord1[i]*2.0-1.0; } std::vector<FMMNode_t*> nlist; { // Get non-ghost, leaf nodes. std::vector<FMMNode_t*>& nlist_=tree->GetNodeList(); for(size_t i=0;i<nlist_.size();i++){ if(nlist_[i]->IsLeaf() && !nlist_[i]->IsGhost()){ nlist.push_back(nlist_[i]); } } } m=fmm_data->m; // local rows n=fmm_data->n; // local columns M=fmm_data->M; // global rows N=fmm_data->N; // global columns //create coeff vec Vec coeff_vec; VecCreateMPI(*comm,m,PETSC_DETERMINE,&coeff_vec); tree2vec(*fmm_data,coeff_vec); PetscInt coeff_vec_size; ierr = VecGetLocalSize(coeff_vec, &coeff_vec_size); int data_dof=coeff_vec_size/(n_coeff3*nlist.size()); assert(data_dof*n_coeff3*nlist.size()==coeff_vec_size); PetscScalar *coeff_vec_ptr; PetscScalar *val_vec_ptr; ierr = VecGetArray(coeff_vec, &coeff_vec_ptr); ierr = VecGetArray(val_vec, &val_vec_ptr); #pragma omp parallel for for(size_t tid=0;tid<omp_p;tid++){ size_t i_start=(nlist.size()* tid )/omp_p; size_t i_end =(nlist.size()*(tid+1))/omp_p; pvfmm::Vector<double> single_node_coeff_vec(n_coeff3*data_dof); pvfmm::Vector<double> single_node_val_vec(n_nodes3*data_dof); for(size_t i=i_start;i<i_end;i++){ double s=std::pow(2.0,COORD_DIM*nlist[i]->Depth()*0.5*SCAL_EXP); { // coeff_vec: Cheb coeff data for this node size_t coeff_vec_offset=i*n_coeff3*data_dof; for(size_t j=0;j<n_coeff3*data_dof;j++) single_node_coeff_vec[j]=PetscRealPart(coeff_vec_ptr[j+coeff_vec_offset])*s; } // val_vec: Evaluate coeff_vec at Chebyshev node points cheb_eval(single_node_coeff_vec, cheb_deg, cheb_node_coord1, cheb_node_coord1, cheb_node_coord1, single_node_val_vec); //std::cout << "here" << std::endl; { // val_vec: places the values into the vector size_t val_vec_offset=i*n_nodes3*data_dof; for(size_t j=0;j<n_nodes3*data_dof;j++){ //std::cout << single_node_val_vec[j] << std::endl; val_vec_ptr[j+val_vec_offset] = single_node_val_vec[j]; } } //std::cout << "not here though" << std::endl; } } ierr = VecRestoreArray(coeff_vec, &coeff_vec_ptr); ierr = VecRestoreArray(val_vec, &val_vec_ptr); return 1; }
int mult(Mat M, Vec U, Vec Y){ PetscErrorCode ierr; FMMData* fmm_data=NULL; MatShellGetContext(M, &fmm_data); FMM_Tree_t* tree=fmm_data->tree; const MPI_Comm* comm=tree->Comm(); int cheb_deg=fmm_data->fmm_mat->ChebDeg(); std::vector<double>& eta_vec=fmm_data->eta; Vec& phi_0_vec=fmm_data->phi_0_vec; int omp_p=omp_get_max_threads(); //int omp_p=1; pvfmm::Profile::Tic("FMM_Mul",comm,true); std::vector<FMMNode_t*> nlist; { // Get non-ghost, leaf nodes. std::vector<FMMNode_t*>& nlist_=tree->GetNodeList(); for(size_t i=0;i<nlist_.size();i++){ if(nlist_[i]->IsLeaf() && !nlist_[i]->IsGhost()){ nlist.push_back(nlist_[i]); } } } assert(nlist.size()>0); // Cheb node points size_t n_coeff3=(cheb_deg+1)*(cheb_deg+2)*(cheb_deg+3)/6; size_t n_nodes3=(cheb_deg+1)*(cheb_deg+1)*(cheb_deg+1); std::vector<double> cheb_node_coord1=pvfmm::cheb_nodes<double>(cheb_deg, 1); #pragma omp parallel for for(size_t i=0;i<cheb_node_coord1.size();i++){ cheb_node_coord1[i]=cheb_node_coord1[i]*2.0-1.0; } // Input Vector ( \phi_0_vec * U ) pvfmm::Profile::Tic("FMM_Input",comm,true); { PetscInt U_size; PetscInt phi_0_size; ierr = VecGetLocalSize(U, &U_size); ierr = VecGetLocalSize(phi_0_vec, &phi_0_size); int data_dof=U_size/(n_coeff3*nlist.size()); assert(data_dof*n_coeff3*nlist.size()==U_size); PetscScalar *U_ptr; PetscScalar* phi_0_ptr; ierr = VecGetArray(U, &U_ptr); ierr = VecGetArray(phi_0_vec, &phi_0_ptr); #pragma omp parallel for for(size_t tid=0;tid<omp_p;tid++){ size_t i_start=(nlist.size()* tid )/omp_p; size_t i_end =(nlist.size()*(tid+1))/omp_p; pvfmm::Vector<double> coeff_vec(n_coeff3*data_dof); pvfmm::Vector<double> val_vec(n_nodes3*data_dof); pvfmm::Vector<double> phi_0_part(n_nodes3*data_dof); for(size_t i=i_start;i<i_end;i++){ double s=std::pow(2.0,COORD_DIM*nlist[i]->Depth()*0.5*SCAL_EXP); { // coeff_vec: Cheb coeff data for this node size_t U_offset=i*n_coeff3*data_dof; size_t phi_0_offset = i*n_nodes3*data_dof; for(size_t j=0;j<n_coeff3*data_dof;j++){ coeff_vec[j]=PetscRealPart(U_ptr[j+U_offset])*s; } for(size_t j=0;j<n_nodes3*data_dof;j++){ phi_0_part[j]=PetscRealPart(phi_0_ptr[j+phi_0_offset]); } } // val_vec: Evaluate coeff_vec at Chebyshev node points cheb_eval(coeff_vec, cheb_deg, cheb_node_coord1, cheb_node_coord1, cheb_node_coord1, val_vec); {// phi_0_part*val_vec for(size_t j0=0;j0<data_dof;j0++){ double* vec=&val_vec[j0*n_nodes3]; double* phi_0=&phi_0_part[j0*n_nodes3]; for(size_t j1=0;j1<n_nodes3;j1++) vec[j1]*=phi_0[j1]; } } { // Compute Chebyshev approx pvfmm::Vector<double>& coeff_vec=nlist[i]->ChebData(); if(coeff_vec.Dim()!=(data_dof*(cheb_deg+1)*(cheb_deg+2)*(cheb_deg+3))/6){ coeff_vec.ReInit((data_dof*(cheb_deg+1)*(cheb_deg+2)*(cheb_deg+3))/6); } pvfmm::cheb_approx<double,double>(&val_vec[0], cheb_deg, data_dof, &coeff_vec[0]); nlist[i]->DataDOF()=data_dof; } } } } pvfmm::Profile::Toc(); // Run FMM ( Compute: G[ \eta * u ] ) tree->ClearFMMData(); tree->RunFMM(); // Copy data from tree to Y pvfmm::Profile::Tic("tree2vec",comm,true); { PetscInt Y_size; ierr = VecGetLocalSize(Y, &Y_size); int data_dof=Y_size/(n_coeff3*nlist.size()); PetscScalar *Y_ptr; ierr = VecGetArray(Y, &Y_ptr); #pragma omp parallel for for(size_t tid=0;tid<omp_p;tid++){ size_t i_start=(nlist.size()* tid )/omp_p; size_t i_end =(nlist.size()*(tid+1))/omp_p; for(size_t i=i_start;i<i_end;i++){ pvfmm::Vector<double>& coeff_vec=((FMM_Mat_t::FMMData*) nlist[i]->FMMData())->cheb_out; double s=std::pow(0.5,COORD_DIM*nlist[i]->Depth()*0.5*SCAL_EXP); size_t Y_offset=i*n_coeff3*data_dof; for(size_t j=0;j<n_coeff3*data_dof;j++) Y_ptr[j+Y_offset]=coeff_vec[j]*s; } } ierr = VecRestoreArray(Y, &Y_ptr); } pvfmm::Profile::Toc(); // Regularize Vec alpha; PetscScalar sca = (PetscScalar).00001; VecDuplicate(Y,&alpha); VecSet(alpha,sca); ierr = VecPointwiseMult(alpha,alpha,U); ierr = VecAXPY(Y,1,alpha); // Output Vector ( Compute: U + G[ \eta * U ] ) pvfmm::Profile::Tic("FMM_Output",comm,true); ierr = VecAXPY(Y,1,U);CHKERRQ(ierr); pvfmm::Profile::Toc(); ierr = VecDestroy(&alpha); CHKERRQ(ierr); pvfmm::Profile::Toc(); return 0; }
int PtWiseTreeMult(FMMData &fmm_data, FMM_Tree_t &tree2){ FMM_Tree_t* tree1=fmm_data.tree; const MPI_Comm* comm=tree1->Comm(); //int omp_p=omp_get_max_threads(); int omp_p = 1; int cheb_deg=fmm_data.fmm_mat->ChebDeg(); std::vector<FMMNode_t*> nlist1; std::vector<FMMNode_t*> nlist2; { // Get non-ghost, leaf nodes for BOTH trees. They must have the same structure. std::vector<FMMNode_t*>& nlist1_=tree1->GetNodeList(); std::vector<FMMNode_t*>& nlist2_=tree2.GetNodeList(); for(size_t i=0;i<nlist1_.size();i++){ if(nlist1_[i]->IsLeaf() && !nlist1_[i]->IsGhost()){ nlist1.push_back(nlist1_[i]); nlist2.push_back(nlist2_[i]); } } } //assert(nlist1.size()>0); // Cheb node points size_t n_coeff3=(cheb_deg+1)*(cheb_deg+2)*(cheb_deg+3)/6; size_t n_nodes3=(cheb_deg+1)*(cheb_deg+1)*(cheb_deg+1); std::cout << "n_nodes3: " << n_nodes3 << std::endl; std::vector<double> cheb_node_coord1=pvfmm::cheb_nodes<double>(cheb_deg, 1); #pragma omp parallel for for(size_t i=0;i<cheb_node_coord1.size();i++){ cheb_node_coord1[i]=cheb_node_coord1[i]*2.0-1.0; } // PtWise Mult pvfmm::Profile::Tic("FMM_PtWise_Mult",comm,true); { /* PetscInt U_size; PetscInt phi_0_size; ierr = VecGetLocalSize(U, &U_size); ierr = VecGetLocalSize(phi_0_vec, &phi_0_size); int data_dof=U_size/(n_coeff3*nlist.size()); assert(data_dof*n_coeff3*nlist.size()==U_size); */ int data_dof = fmm_data.kernel->ker_dim[0]; std::cout << "data_dof: " << data_dof << std::endl; /* PetscScalar *U_ptr; PetscScalar* phi_0_ptr; ierr = VecGetArray(U, &U_ptr); ierr = VecGetArray(phi_0_vec, &phi_0_ptr); */ #pragma omp parallel for for(size_t tid=0;tid<omp_p;tid++){ size_t i_start=(nlist1.size()* tid )/omp_p; size_t i_end =(nlist1.size()*(tid+1))/omp_p; pvfmm::Vector<double> coeff_vec1(n_coeff3*data_dof); pvfmm::Vector<double> coeff_vec2(n_coeff3*data_dof); pvfmm::Vector<double> val_vec1(n_nodes3*data_dof); pvfmm::Vector<double> val_vec2(n_nodes3*data_dof); std::cout << "val_vec2.Dim() " << val_vec2.Dim() << std::endl; for(size_t i=i_start;i<i_end;i++){ double s=std::pow(2.0,COORD_DIM*nlist1[i]->Depth()*0.5*SCAL_EXP); /* { // coeff_vec: Cheb coeff data for this node size_t U_offset=i*n_coeff3*data_dof; size_t phi_0_offset = i*n_nodes3*data_dof; for(size_t j=0;j<n_coeff3*data_dof;j++){ coeff_vec[j]=PetscRealPart(U_ptr[j+U_offset])*s; } for(size_t j=0;j<n_nodes3*data_dof;j++){ phi_0_part[j]=PetscRealPart(phi_0_ptr[j+phi_0_offset]); } } */ { // coeff_vecs: Cheb coeffs for this node for each tree! coeff_vec1 = nlist1[i]->ChebData(); coeff_vec2 = nlist2[i]->ChebData(); } // val_vec: Evaluate coeff_vec at Chebyshev node points cheb_eval(coeff_vec1, cheb_deg, cheb_node_coord1, cheb_node_coord1, cheb_node_coord1, val_vec1); cheb_eval(coeff_vec2, cheb_deg, cheb_node_coord1, cheb_node_coord1, cheb_node_coord1, val_vec2); std::cout << "dim :" << val_vec2.Dim() << std::endl; std::cout << "dim :" << val_vec1.Dim() << std::endl; /* {// phi_0_part*val_vec for(size_t j0=0;j0<data_dof;j0++){ double* vec1=&val_vec1[j0*n_nodes3]; std::cout << val_vec2.Dim() << " " << j0*n_nodes3 << std::endl; double* vec2=&val_vec2[j0*n_nodes3]; for(size_t j1=0;j1<n_nodes3;j1++){ vec1[j1]*=vec2[j1]; } std::cout << "after " << j0 << std::endl; } } */ { for(size_t j0=0;j0<n_nodes3;j0++){ for(size_t j1=0;j1<data_dof;j1++){ val_vec1[j1*n_nodes3+j0]*=val_vec2[j0]; } } } std::cout << "test1" << std::endl; { // Compute Chebyshev approx pvfmm::Vector<double>& coeff_vec=nlist1[i]->ChebData(); if(coeff_vec.Dim()!=(data_dof*(cheb_deg+1)*(cheb_deg+2)*(cheb_deg+3))/6){ coeff_vec.ReInit((data_dof*(cheb_deg+1)*(cheb_deg+2)*(cheb_deg+3))/6); } pvfmm::cheb_approx<double,double>(&val_vec1[0], cheb_deg, data_dof, &coeff_vec[0]); nlist1[i]->DataDOF()=data_dof; } } } } pvfmm::Profile::Toc(); return 0; }
void degree_of_Ptilde(int * _degree, double ** coefs, const double EVMin, const double EVMax, const int sloppy_degree, const double acc, matrix_mult_nd Qsq, const int repro) { int i, j; double temp, temp2; int degree; double sum=0.0; spinor *ss=NULL, *ss_=NULL, *sc=NULL, *sc_=NULL; spinor *auxs=NULL, *auxs_=NULL, *auxc=NULL, *auxc_=NULL; spinor *aux2s=NULL, *aux2s_=NULL, *aux2c=NULL, *aux2c_=NULL; *coefs = calloc(phmc_max_ptilde_degree, sizeof(double)); ss_ = calloc(VOLUMEPLUSRAND/2+1, sizeof(spinor)); auxs_ = calloc(VOLUMEPLUSRAND/2+1, sizeof(spinor)); aux2s_= calloc(VOLUMEPLUSRAND/2+1, sizeof(spinor)); sc_ = calloc(VOLUMEPLUSRAND/2+1, sizeof(spinor)); auxc_ = calloc(VOLUMEPLUSRAND/2+1, sizeof(spinor)); aux2c_= calloc(VOLUMEPLUSRAND/2+1, sizeof(spinor)); ss = (spinor *)(((unsigned long int)(ss_)+ALIGN_BASE)&~ALIGN_BASE); auxs = (spinor *)(((unsigned long int)(auxs_)+ALIGN_BASE)&~ALIGN_BASE); aux2s = (spinor *)(((unsigned long int)(aux2s_)+ALIGN_BASE)&~ALIGN_BASE); sc = (spinor *)(((unsigned long int)(sc_)+ALIGN_BASE)&~ALIGN_BASE); auxc = (spinor *)(((unsigned long int)(auxc_)+ALIGN_BASE)&~ALIGN_BASE); aux2c = (spinor *)(((unsigned long int)(aux2c_)+ALIGN_BASE)&~ALIGN_BASE); Ptilde_cheb_coefs(EVMin, EVMax, *coefs, phmc_max_ptilde_degree, -1.0); if(g_proc_id == g_stdio_proc && g_debug_level > 0){ printf("# NDPOLY Acceptance Polynomial: EVmin = %f EVmax = %f\n", EVMin, EVMax); printf("# NDPOLY ACceptance Polynomial: desired accuracy is %e \n", acc); fflush(stdout); } degree = 2*sloppy_degree; for(i = 0; i < 100 ; i++) { if (degree > phmc_max_ptilde_degree) { fprintf(stderr, "Error: n_cheby=%d > phmc_max_ptilde_degree=%d in ptilde\n", degree, phmc_max_ptilde_degree); fprintf(stderr, "Increase n_chebymax\n"); #ifdef MPI MPI_Finalize(); #endif exit(-5); } sum=0; for(j=degree; j<phmc_max_ptilde_degree; j++){ sum += fabs(coefs[0][j]); } if((g_proc_id == g_stdio_proc) && (g_debug_level > 0)) { printf("# NDPOLY Acceptance Polynomial: Sum remaining | d_n | = %e for degree=%d\n", sum, degree); printf("# NDPOLY Acceptance Polynomial: coef[degree] = %e\n", (*coefs)[degree]); } if(sum < acc) { break; } degree= (int)(degree*1.2); } if(g_debug_level > 2) { /* Ptilde P S P Ptilde X - X */ /* for random spinor X */ random_spinor_field_eo(ss, repro, RN_GAUSS); random_spinor_field_eo(sc, repro, RN_GAUSS); Ptilde_ndpsi(&auxs[0], &auxc[0], *coefs, degree, &ss[0], &sc[0], Qsq); Ptilde_ndpsi(&aux2s[0], &aux2c[0], phmc_dop_cheby_coef, phmc_dop_n_cheby, &auxs[0], &auxc[0], Qsq); Qsq(&auxs[0], &auxc[0], &aux2s[0], &aux2c[0]); Ptilde_ndpsi(&aux2s[0], &aux2c[0], phmc_dop_cheby_coef, phmc_dop_n_cheby, &auxs[0], &auxc[0], Qsq); Ptilde_ndpsi(&auxs[0], &auxc[0], *coefs, degree, &aux2s[0], &aux2c[0], Qsq); diff(&aux2s[0],&auxs[0], &ss[0], VOLUME/2); temp = square_norm(&aux2s[0], VOLUME/2, 1) / square_norm(&ss[0], VOLUME/2, 1) / 4.0; diff(&aux2c[0],&auxc[0], &sc[0], VOLUME/2); temp2 = square_norm(&aux2c[0], VOLUME/2, 1)/square_norm(&sc[0], VOLUME/2, 1) / 4.0; if(g_epsbar == 0){ temp2 = 0.0; } /* || (Ptilde P S P Ptilde - 1)X ||^2 / || 2X ||^2 */ if(g_proc_id == g_stdio_proc) { printf("# NDPOLY Acceptance Polynomial: relative squared accuracy in components:\n# UP=%e DN=%e \n", temp, temp2); } temp = chebtilde_eval(degree, *coefs, EVMin); temp *= cheb_eval(phmc_dop_n_cheby, phmc_dop_cheby_coef, EVMin); temp *= EVMin; temp *= cheb_eval(phmc_dop_n_cheby, phmc_dop_cheby_coef, EVMin); temp *= chebtilde_eval(degree, *coefs, EVMin); temp = 0.5*fabs(temp - 1); if(g_proc_id == g_stdio_proc) { printf("# NDPOLY Acceptance Polynomial: Delta_IR at s=%f: | Ptilde P s_low P Ptilde - 1 |/2 = %e \n", EVMin, temp); } } if(g_proc_id == g_stdio_proc) { printf("# NDPOLY Acceptance Polynomial degree set to %d\n\n", degree); } *_degree = degree; free(ss_); free(auxs_); free(aux2s_); free(sc_); free(auxc_); free(aux2c_); return; }