double CDF (double x, double mu, double sigma) { double t; x -= mu; x /= sigma; if (x < 0) return 1 - CDF (-x, 0, 1); t = 1 / (1 + b0 * x); return 1 - PDF_0 (x) * (b1 * t + b2 * t * t + b3 * t * t * t + b4 * t * t * t * t + b5 * t * t * t * t * t); }
vector<double> Rico::Pr(vector<double> x) const { int N = x.size(); vector<double> p; vector<double> P = CDF(x); vector<double> Q = Util::diff(P); p.push_back(P[0]); for(int i = 0; i < N-1; i++) p.push_back(Q[i]); p.push_back(1 - P[N-1]); return p; }
// generate samples from a distribution using inverse CDF sampling // pdf input is expected to be an Nx2 matrix with col 1 = values and col 2 = probabilities of those values // step size scales pdf to equal 1, set equal Matrix invCDFsample(const int& samples, const double& step, const Matrix& pdf) { Matrix samp(samples,1); double tempSamp = 0.; int j = 0; // integrate to generate CDF Matrix CDF(pdf.rows(),1); CDF[0][0] = pdf[0][1]*step; for(int i=0;i<CDF.rows()-1;i++) { //cout << "CDF[i] = " << CDF[i][0] << " pdf[i] = " << pdf[i][1] << endl; CDF[i+1][0] = CDF[i][0] + pdf[i+1][1]; //cout << "CDF[i+1] = " << CDF[i+1][0] << endl; } // multiply by step size to scale integral correctly CDF = CDF*step; //CDF.print(); // choose sample from uniform generator according to CDF for(int i=0;i<samples;i++) { j = 0; tempSamp = rvStdUniform(); while( CDF[j][0] < tempSamp ) { j++; } if( j == 0 ) samp[i][0] = pdf[j][0]; else samp[i][0] = (pdf[j-1][0]+pdf[j][0])/2; } return samp; }
/** * Computes the Lomb-Scargle periodogram of the matrix "data". "data" should contain at least three * columns: time, measurement and measurement error. The periodogram is calculated in "samples" intervals * between "Pmin" and "Pmax", spaced logarithmically. * * The function returns a matrix of "samples" rows and several columns, including period, power (z) and * an estimation of the upper bound for the false alarm probability. The estimation is calculated using * the method of Baluev, 2008 (Baluev08). The column PS_Z_LS contains the unnormalized LS periodogram * (z = 1/2 * (Chi^2_0 - Chi^2_SC)), while the column PS_Z contains z_1 = 1/2 * N_H * z / Chi^2_0 (z_1 in Baluev08). * The FAP upper bound is estimated as ~ tau(z_1). (Another estimate of the FAP can be calculated by * estimating the indep. frequencies through your own algorithm, or using the ok_periodogram_boot routine.) * * @param data Input data containing the data; each row containing (t_i, x_i, sigma_i) * @param samples Number of frequencies sampled * @param Pmin Minimum period sampled * @param Pmax Maximum period sampled * @param method Method to compute periodogram (ignored) * @param timecol Time column (e.g. 0) in the matrix data * @param valcol Value column (e.g. 1) in the matrix data * @param sigmacol Sigma column (e.g. 2) in the matrix data * @param p If not NULL, it is used to return additional info for the periodogram and reuse matrices to save space/speed. If you pass * a value different than NULL, you are responsible for deallocating the workspace and its fields. p->buf is an array of * gsl_matrix*, sized the same as the value of omp_get_max_threads(). * @return A matrix containing: {PS_TIME, PS_Z, PS_FAP, PS_Z_LS} (period, power, FAP upper limit, unnormalized * LS power). You are responsible for deallocating it. */ gsl_matrix* ok_periodogram_ls(const gsl_matrix* data, const unsigned int samples, const double Pmin, const double Pmax, const int method, unsigned int timecol, unsigned int valcol, unsigned int sigcol, ok_periodogram_workspace* p) { gsl_matrix* ret = NULL; gsl_matrix* buf = NULL; gsl_vector* bufv = gsl_vector_alloc(data->size1); int ndata = data->size1; // If no pre-allocated buffers are passed through p, or p is null, // allocate those buffers. if (p != NULL) { if (p->per != NULL && MROWS(p->per) == samples && MCOLS(p->per) == PS_SIZE) ret = p->per; if (p->buf != NULL && MROWS(p->buf) == ndata && MCOLS(p->per) == 5) ret = p->buf; } ret = (ret != NULL ? ret : gsl_matrix_alloc(samples, PS_SIZE)); buf = (buf != NULL ? buf : gsl_matrix_alloc(ndata, 5)); double fmin = 1. / Pmax; double fmax = 1. / Pmin; double df = (fmax - fmin) / (double) samples; gsl_matrix_get_col(bufv, data, timecol); double W = 2. * M_PI * gsl_stats_sd(bufv->data, 1, ndata) / Pmin; gsl_matrix_get_col(bufv, data, valcol); double avg = gsl_stats_mean(bufv->data, 1, ndata); double z1_max = 0.; double xa[ndata]; // pre-calculate cdf, sdf for (int i = 0; i < ndata; i++) { double t = MGET(data, i, timecol) - MGET(data, 0, timecol); MSET(buf, i, BUF_CDF, cos(2 * M_PI * df * t)); MSET(buf, i, BUF_SDF, sin(2 * M_PI * df * t)); MSET(buf, i, BUF_C, cos(2 * M_PI * fmin * t)); MSET(buf, i, BUF_S, sin(2 * M_PI * fmin * t)); MSET(buf, i, BUF_SIG, 1. / (MGET(data, i, sigcol) * MGET(data, i, sigcol))); xa[i] = MGET(data, i, valcol) - avg; } // Calculate periodogram by looping over all angular frequencies for (int i = 0; i < samples; i++) { // Current frequency double f = fmin + df * i; double w = 2 * M_PI*f; // Calculate tau(w) double s_2wt = 0.; double c_2wt = 0.; for (int j = 0; j < ndata; j++) { double cos_wt = C(j); double sin_wt = S(j); c_2wt += (1. - 2. * sin_wt * sin_wt) * SIG(j); s_2wt += (2. * sin_wt * cos_wt) * SIG(j); } double tau = atan2(s_2wt, c_2wt) / (2. * w); double numa = 0.; double numb = 0.; double dena = 0.; double denb = 0.; double numa_w = 0.; double numb_w = 0.; double dena_w = 0.; double denb_w = 0.; double coswtau = cos(w * tau); double sinwtau = sin(w * tau); double chi2_h = 0.; double chi2_h_w = 0; for (int j = 0; j < ndata; j++) { double sig = SIG(j); const double cos_wt = C(j); const double sin_wt = S(j); double cos_wdf = CDF(j); double sin_wdf = SDF(j); double c = cos_wt * coswtau + sin_wt * sinwtau; double s = sin_wt * coswtau - cos_wt * sinwtau; double x = xa[j]; MSET(buf, j, BUF_C, cos_wt * cos_wdf - sin_wt * sin_wdf); MSET(buf, j, BUF_S, sin_wt * cos_wdf + cos_wt * sin_wdf); numa += x * c * sig; numb += x * s * sig; dena += c * c * sig; denb += s * s * sig; chi2_h += x * x * sig; numa_w += c; numb_w += s; dena_w += c*c; denb_w += s*s; chi2_h_w += 1; } double z = 0.5 * (numa * numa / dena + numb * numb / denb); double z_1 = z * ndata / chi2_h; double w_1 = 0.5 * (numa_w * numa_w / dena_w + numb_w * numb_w / denb_w) * ndata / chi2_h_w; double fap_single = pow(1. - 2. * z_1 / (double) ndata, 0.5 * (double) (ndata - 3.)); double tau_z = W * fap_single * sqrt(z_1); MSET(ret, samples - i - 1, PS_TIME, 1. / f); MSET(ret, samples - i - 1, PS_Z, z_1); MSET(ret, samples - i - 1, PS_Z_LS, z); MSET(ret, samples - i - 1, PS_FAP, MIN(fap_single + tau_z, 1.)); MSET(ret, samples - i - 1, PS_TAU, tau); MSET(ret, samples - i - 1, PS_WIN, w_1); z1_max = MAX(z1_max, z_1); } if (p != NULL && p->calc_z_fap) { gsl_root_fsolver * s = gsl_root_fsolver_alloc(gsl_root_fsolver_brent); double pars[3]; pars[0] = ndata; pars[1] = W; pars[2] = 0.; gsl_function F; F.function = _baluev_tau; F.params = pars; double zz = z1_max; while (_baluev_tau(zz, pars) > 1e-3) zz *= 2; p->z_fap_3 = _find_z(s, &F, 1e-3, 0.1, zz); p->z_fap_2 = _find_z(s, &F, 1e-2, 0.1, p->z_fap_3); p->z_fap_1 = _find_z(s, &F, 1e-1, 0.1, p->z_fap_2); gsl_root_fsolver_free(s); p->calc_z_fap = false; } if (p == NULL) { gsl_matrix_free(buf); } else { p->per = ret; p->buf = buf; p->zmax = z1_max; }; gsl_vector_free(bufv); return ret; }
float V(float t) { float cdf = CDF(t); return (cdf == 0) ? 0 : PDF(t) / cdf; }
vector<double> Rico::CDF(vector<double> X) const { vector<double> P; for(int i = 0; i < X.size(); i++) P.push_back(CDF(X[i])); return P; }