void ImprovedFastGaussTransform::Evaluate() { compute_C(); for(int j=0; j < M; j++) { pG[j]=0.0; int target_base=j*d; for(int k=0; k<K; k++){ int center_base=k*d; double target_center_distance_square=0.0; for(int i=0; i<d; i++){ dy[i]=py[target_base+i]-pcc[center_base+i]; target_center_distance_square += dy[i]*dy[i]; if (target_center_distance_square > ry_square[k]) break; } if (target_center_distance_square <= ry_square[k]){ compute_target_center_monomials(); double g=exp(-target_center_distance_square/h_square); for(int alpha=0; alpha<p_max_total; alpha++){ pG[j]+=(C[k*p_max_total+alpha]*g*target_center_monomials[alpha]); } } } } }
Subarray::Subarray(const DynamicParameter & dp_, bool is_fa_): dp(dp_), num_rows(dp.num_r_subarray), num_cols(dp.num_c_subarray), num_cols_fa_cam(dp.tag_num_c_subarray), num_cols_fa_ram(dp.data_num_c_subarray), cell(dp.cell), cam_cell(dp.cam_cell), is_fa(is_fa_) { //num_cols=7; //cout<<"num_cols ="<< num_cols <<endl; if (!(is_fa || dp.pure_cam)) { num_cols +=(g_ip->add_ecc_b_ ? (int)ceil(num_cols / num_bits_per_ecc_b_) : 0); // ECC overhead uint32_t ram_num_cells_wl_stitching = (dp.ram_cell_tech_type == lp_dram) ? dram_num_cells_wl_stitching_ : (dp.ram_cell_tech_type == comm_dram) ? comm_dram_num_cells_wl_stitching_ : sram_num_cells_wl_stitching_; area.h = cell.h * num_rows; area.w = cell.w * num_cols + ceil(num_cols / ram_num_cells_wl_stitching) * g_tp.ram_wl_stitching_overhead_; // stitching overhead } else //cam fa { //should not add dummy row here since the dummy row do not need decoder if (is_fa)// fully associative cache { num_cols_fa_cam += g_ip->add_ecc_b_ ? (int)ceil(num_cols_fa_cam / num_bits_per_ecc_b_) : 0; num_cols_fa_ram += (g_ip->add_ecc_b_ ? (int)ceil(num_cols_fa_ram / num_bits_per_ecc_b_) : 0); num_cols = num_cols_fa_cam + num_cols_fa_ram; } else { num_cols_fa_cam += g_ip->add_ecc_b_ ? (int)ceil(num_cols_fa_cam / num_bits_per_ecc_b_) : 0; num_cols_fa_ram = 0; num_cols = num_cols_fa_cam; } area.h = cam_cell.h * (num_rows + 1);//height of subarray is decided by CAM array. blank space in sram array are filled with dummy cells area.w = cam_cell.w * num_cols_fa_cam + cell.w * num_cols_fa_ram + ceil((num_cols_fa_cam + num_cols_fa_ram) / sram_num_cells_wl_stitching_)*g_tp.ram_wl_stitching_overhead_ + 16*g_tp.wire_local.pitch //the overhead for the NAND gate to connect the two halves + 128*g_tp.wire_local.pitch;//the overhead for the drivers from matchline to wordline of RAM } assert(area.h>0); assert(area.w>0); compute_C(); }
//Compute the Potential double SCFPotentialEval(double R,double Z, double phi, double t, struct potentialArg * potentialArgs) { double * args= potentialArgs->args; //Get args double a = *args++; int isNonAxi = (int)*args++; int N = *args++; int L = *args++; int M = *args++; double* Acos = args; double* Asin; if (isNonAxi==1) //LCOV_EXCL_START { Asin = args + N*L*M; } //LCOV_EXCL_STOP //convert R,Z to r, theta double r; double theta; cyl_to_spher(R, Z,&r, &theta); double xi; calculateXi(r, a, &xi); //Compute the gegenbauer polynomials and its derivative. double C[N*L]; compute_C(xi, N, L, &C[0]); //Compute phiTilde and its derivative double phiTilde[L*N]; compute_phiTilde(r, a, N, L, &C[0], &phiTilde[0]); //Compute Associated Legendre Polynomials int M_eff = M; int size = 0; if (isNonAxi==0) { M_eff = 1; size = L; } else{ //LCOV_EXCL_START size = L*L - L*(L-1)/2; } //LCOV_EXCL_STOP double P[size]; compute_P(cos(theta), L,M_eff, &P[0]); double potential; double (*PhiTilde_Pointer[1]) = {&phiTilde[0]}; double (*P_Pointer[1]) = {&P[0]}; double Constant[1] = {1.}; if (isNonAxi==1) //LCOV_EXCL_START { double (*Eq[1])(double, double, double, double, double, double, int) = {&computePhi}; equations e = {Eq,&PhiTilde_Pointer[0],&P_Pointer[0],&Constant[0]}; computeNonAxi(a, N, L, M,r, theta, phi, Acos, Asin, 1, e, &potential); } //LCOV_EXCL_STOP else { double (*Eq[1])(double, double, double) = {&computeAxiPhi}; axi_equations e = {Eq,&PhiTilde_Pointer[0],&P_Pointer[0],&Constant[0]}; compute(a, N, L, M,r, theta, phi, Acos, 1, e, &potential); } return potential; }
//Compute the Derivatives void computeDeriv(double R,double Z, double phi, double t, struct potentialArg * potentialArgs, double * F) { double * args= potentialArgs->args; //Get args double a = *args++; int isNonAxi = (int)*args++; int N = *args++; int L = *args++; int M = *args++; double* Acos = args; double * caching_i = (args + (isNonAxi + 1)*N*L*M); double *Asin; if (isNonAxi == 1) { Asin = args + N*L*M; } double *cached_type = caching_i; double * cached_coords = (caching_i+ 1); double * cached_values = (caching_i + 4); if ((int)*cached_type==DERIV) { if (*cached_coords == R && *(cached_coords + 1) == Z && *(cached_coords + 2) == phi) { *F = *cached_values; *(F + 1) = *(cached_values + 1); *(F + 2) = *(cached_values + 2); return; } } double r; double theta; cyl_to_spher(R, Z, &r, &theta); double xi; calculateXi(r, a, &xi); //Compute the gegenbauer polynomials and its derivative. double C[N*L]; double dC[N*L]; double d2C[N*L]; compute_C(xi, N, L, &C[0]); compute_dC(xi, N, L, &dC[0]); compute_d2C(xi, N, L, &d2C[0]); //Compute phiTilde and its derivative double phiTilde[L*N]; compute_phiTilde(r, a, N, L, &C[0], &phiTilde[0]); double dphiTilde[L*N]; compute_dphiTilde(r, a, N, L, &C[0], &dC[0], &dphiTilde[0]); double d2phiTilde[L*N]; compute_d2phiTilde(r, a, N, L, &C[0], &dC[0], &d2C[0], &d2phiTilde[0]); //Compute Associated Legendre Polynomials int M_eff = M; int size = 0; if (isNonAxi==0) { M_eff = 1; size = L; } else{ size = L*L - L*(L-1)/2; } double P[size]; compute_P(cos(theta), L,M_eff, &P[0]); double (*PhiTilde_Pointer[3])= {&d2phiTilde[0],&phiTilde[0],&dphiTilde[0]}; double (*P_Pointer[3]) = {&P[0], &P[0], &P[0]}; double Constant[3] = {1., 1., 1.}; if (isNonAxi==1) { double (*Eq[3])(double, double, double, double, double, double, int) = {&computeF_rr, &computeF_phiphi, &computeF_rphi}; equations e = {Eq,&PhiTilde_Pointer[0],&P_Pointer[0],&Constant[0]}; computeNonAxi(a, N, L, M,r, theta, phi, Acos, Asin, 3, e, F); } else { double (*Eq[3])(double, double, double) = {&computeAxiF_rr, &computeAxiF_phiphi, &computeAxiF_rphi}; axi_equations e = {Eq,&PhiTilde_Pointer[0],&P_Pointer[0],&Constant[0]}; compute(a, N, L, M,r, theta, phi, Acos, 3, e, F); } //Caching *cached_type = (double)DERIV; * cached_coords = R; * (cached_coords + 1) = Z; * (cached_coords + 2) = phi; * (cached_values) = *F; * (cached_values + 1) = *(F + 1); * (cached_values + 2) = *(F + 2); }
static void compute_csas(ConsensusSA *csa) { unsigned long i, sa_i, sa_i_size = 0, sa_prime, sa_prime_size; GtArray *splice_form; GtBittab **C, **left, **right, **L, **R, *U_i, *SA_i, *SA_prime; #ifndef NDEBUG unsigned long u_i_size, u_i_minus_1_size; gt_assert(csa && csa->set_of_sas); #endif /* init sets */ C = gt_malloc(sizeof (GtBittab*) * csa->number_of_sas); left = gt_malloc(sizeof (GtBittab*) * csa->number_of_sas); right = gt_malloc(sizeof (GtBittab*) * csa->number_of_sas); L = gt_malloc(sizeof (GtBittab*) * csa->number_of_sas); R = gt_malloc(sizeof (GtBittab*) * csa->number_of_sas); for (i = 0; i < csa->number_of_sas; i++) { C[i] = gt_bittab_new(csa->number_of_sas); left[i] = gt_bittab_new(csa->number_of_sas); right[i] = gt_bittab_new(csa->number_of_sas); L[i] = gt_bittab_new(csa->number_of_sas); R[i] = gt_bittab_new(csa->number_of_sas); } U_i = gt_bittab_new(csa->number_of_sas); SA_i = gt_bittab_new(csa->number_of_sas); SA_prime = gt_bittab_new(csa->number_of_sas); splice_form = gt_array_new(sizeof (unsigned long)); /* compute sets */ compute_C(C, csa); compute_left(left, csa); compute_right(right, csa); compute_L(L, C, left, csa->number_of_sas); compute_R(R, C, right, csa->number_of_sas); /* U_0 = SA */ for (i = 0; i < csa->number_of_sas; i++) gt_bittab_set_bit(U_i, i); #ifndef NDEBUG /* preparation for assertion below */ u_i_minus_1_size = gt_bittab_count_set_bits(U_i); #endif while (gt_bittab_is_true(U_i)) { sa_i = GT_UNDEF_ULONG; for (sa_prime = gt_bittab_get_first_bitnum(U_i); sa_prime != gt_bittab_get_last_bitnum(U_i); sa_prime = gt_bittab_get_next_bitnum(U_i, sa_prime)) { if (sa_i == GT_UNDEF_ULONG) { sa_i = sa_prime; gt_bittab_or(SA_i, L[sa_i], R[sa_i]); sa_i_size = gt_bittab_count_set_bits(SA_i); } else { gt_bittab_or(SA_prime, L[sa_prime], R[sa_prime]); sa_prime_size = gt_bittab_count_set_bits(SA_prime); if (sa_prime_size > sa_i_size) { sa_i = sa_prime; sa_i_size = sa_prime_size; gt_bittab_equal(SA_i, SA_prime); } } } /* make sure the computed splice form is maximal w.r.t. to compatibility */ gt_assert(splice_form_is_valid(SA_i, csa)); /* process splice form */ if (csa->process_splice_form) { gt_array_reset(splice_form); gt_bittab_get_all_bitnums(SA_i, splice_form); csa->process_splice_form(splice_form, csa->set_of_sas, csa->number_of_sas, csa->size_of_sa, csa->userdata); } /* U_i = U_i-1 \ SA_i */ gt_bittab_nand(U_i, U_i, SA_i); #ifndef NDEBUG /* ensure that |U_i| < |U_i-1| */ u_i_size = gt_bittab_count_set_bits(U_i); gt_assert(u_i_size < u_i_minus_1_size); u_i_minus_1_size = u_i_size; #endif } /* free sets */ for (i = 0; i < csa->number_of_sas; i++) { gt_bittab_delete(C[i]); gt_bittab_delete(left[i]); gt_bittab_delete(right[i]); gt_bittab_delete(L[i]); gt_bittab_delete(R[i]); } gt_free(C); gt_free(left); gt_free(right); gt_free(L); gt_free(R); gt_bittab_delete(U_i); gt_bittab_delete(SA_i); gt_bittab_delete(SA_prime); gt_array_delete(splice_form); }