Example #1
0
static str
gsl_bat_chisqprob_cst(bat * retval, bat chi2, dbl datapoints) 
{
	BAT *b, *bn;
	BATiter bi;
	BUN p,q;
	dbl r;
	char *msg = NULL;

	if (datapoints == dbl_nil) {
		throw(MAL, "GSLbat_chisqprob_cst", "Parameter datapoints should not be nil");
	}
	if (datapoints < 0)
		throw(MAL, "gsl.chi2prob", "Wrong value for datapoints");

	if ((b = BATdescriptor(chi2)) == NULL) {
		throw(MAL, "chisqprob", "Cannot access descriptor");
	}
	bi = bat_iterator(b);
	bn = BATnew(TYPE_void, TYPE_dbl, BATcount(b), TRANSIENT);
	if (bn == NULL){
		BBPunfix(b->batCacheid);
		throw(MAL, "gsl.chisqprob", MAL_MALLOC_FAIL);
	}
	BATseqbase(bn, b->hseqbase);
	BATloop(b,p,q) {
		dbl d = *(dbl*)BUNtail(bi,p);
		if ((d == dbl_nil) || (d < 0))
			throw(MAL, "gsl.chi2prob", "Wrong value for chi2");
		r = gsl_cdf_chisq_Q(d, datapoints);
		BUNappend(bn, &r, FALSE);
	}
Example #2
0
/** Test the goodness-of-fit between two \ref apop_pmf models. 

If you send two histograms, I assume that the histograms are synced: for PMFs,
you've used \ref apop_data_to_bins to generate two histograms using the same binspec,
or you've used \ref apop_data_pmf_compress to guarantee that each observation value
appears exactly once in each data set.

In any case, you are confident that all values in the \c observed set appear in the \c
expected set with nonzero weight; otherwise this will return a \f$\chi^2\f$ statistic
of \c GSL_POSINF, indicating that it is impossible for the \c observed data to have
been drawn from the \c expected distribution.

\li If an observation row has weight zero, I skip it. if <tt>apop_opts.verbose >=1 </tt> I will show a warning.

  \ingroup histograms
*/
apop_data *apop_histograms_test_goodness_of_fit(apop_model *observed, apop_model *expected){
    int df = observed->data->weights->size;
    double diff = 0;
    for (int i=0; i< observed->data->weights->size; i++){
        Apop_data_row(observed->data, i, one_obs);
        double obs_val = gsl_vector_get(observed->data->weights, i);
        double exp_val = apop_p(one_obs, expected);
        if (exp_val == 0){
            diff = GSL_POSINF; 
            break;
        }
        if (obs_val==0){
            Apop_notify(1, "element %i of the observed data has weight zero. Skipping it.", i);
            df --;
        } else 
            diff += gsl_pow_2(obs_val - exp_val)/exp_val;
    }
    //Data gathered. Now output
    apop_data   *out    = apop_data_alloc();
    double      toptail = gsl_cdf_chisq_Q(diff, df-1);
    sprintf(out->names->title, "Goodness-of-fit test via Chi-squared statistic");
    apop_data_add_named_elmt(out, "Chi squared statistic", diff);
    apop_data_add_named_elmt(out, "df", df-1);
    apop_data_add_named_elmt(out, "p value",  toptail); 
    apop_data_add_named_elmt(out, "confidence", 1 - toptail);
    return out;
}
Example #3
0
int main(){
    apop_data *data = apop_data_falloc((2, 2),
                           30, 86,
                           24, 38 );
    double stat, chisq;
    stat = calc_chi_squared(data);
    chisq = gsl_cdf_chisq_Q(stat, (data->matrix->size1 - 1)* (data->matrix->size2 - 1));
    printf("chi squared statistic: %g; p, Chi-squared: %g\n", stat,chisq);
    apop_data_show(apop_test_anova_independence(data));
    apop_data_show(apop_test_fisher_exact(data));
}
Example #4
0
static str
gsl_chisqprob(dbl * retval, dbl chi2, dbl datapoints) 
{
	*retval = dbl_nil;
	if ((chi2 == dbl_nil) || (chi2 < 0))
		throw(MAL, "gsl.chi2prob", "Wrong value for chi2");
	if ((datapoints == dbl_nil) || (datapoints < 0))
		throw(MAL, "gsl.chi2prob", "Wrong value for datapoints");
	*retval = gsl_cdf_chisq_Q(chi2, datapoints);
	return MAL_SUCCEED;
}
Example #5
0
int ranex_test(seq_t *seq, double *pvalue, void *param)
{
	unsigned int i, J;
	unsigned int V[6] = { 0, 0, 0, 0, 0, 0 };
	int S, N, etat = 4;

	double pi[6] = {.8750000000,.01562500000,.01367187500,.01196289063,.01046752930,.0732727051};
	double X;

	if ( seq->n < RANEX_TEST_LENGTH ) {
	   fprintf(stderr, "Error[RanEx Test]: Sequence length too short\n");
	   *pvalue = 0.0;
	   return -1;
   	}

	for ( i = 0, J = 0, S = 0, N = 0; i < seq->n; i++ ) {

		S += 2*SEQ(seq, i) - 1;

		if ( S == 0 ) {
			if ( N >= 5 )
				V[5]++;
			else
				V[N]++;

			N = 0;
			J++;
		}
		else if ( S == etat )
			N++;
	}

	if ( S != 0 ) {
		J++;
		if ( N >= 5 )
			V[5]++;
		else
			V[N]++;
	}

	if ( J < 500 ) {
		fprintf(stderr, "Error[RanEx Test]: J=%d<500 Not enough excursions to compute test statistic\n", J);
		return -3;
	}

	for ( i = 0, X = 0.0; i < 6; i++ )
		X += pow(((double)V[i]) - ((double)J)*pi[i], 2.0)/(((double)J)*pi[i]);

	*pvalue = gsl_cdf_chisq_Q(X, 5);

	return 0;
}
Example #6
0
File: rng.c Project: Noughmad/Sola
double chisq ( gsl_histogram* values, gsl_histogram* expected )
{
  gsl_histogram*t  = gsl_histogram_clone(values);
  gsl_histogram_sub(t, expected);
  int j;
  double chi = 0;
  for (j=0; j<B; ++j)
  {
    chi += pow(gsl_histogram_get(t, j), 2)/gsl_histogram_get(expected, j);
  }
  
  return gsl_cdf_chisq_Q(chi, B-1);
}
Example #7
0
Vector & FirthRegression::GetAsyPvalue(){
  int numCov = B.Length();
  pValue.Dimension(B.Length());
  for (int i = 0; i < numCov; i ++){
    double Zstat = B[i] * B[i]/(covB[i][i]);
    // pValue[i] = ndist(Zstat);
    // if (pValue[i] >= 0.5){
    // 	pValue[i] = 2*(1-pValue[i]);
    // } else pValue[i] = 2*pValue[i];
    pValue[i] = gsl_cdf_chisq_Q(Zstat, 1.0);
  }
  return(pValue);
}
Example #8
0
double Pmetric(double chisq_reduction, double ndof, int ntrials, int fAdjusted)
{
    if (fAdjusted && chisq_reduction < MAX_CHISQ_USEFUL)
    {
        double p = gsl_cdf_chisq_Q(chisq_reduction, ndof) * ntrials;
        if (p > 0)
        {
            return p;
        }
        else
        {   /* underflow (chisquared of order 1000 or more) */
            return -chisq_reduction;
        }
    }
    else
    {
        return -chisq_reduction;
    }
}
Example #9
0
  int TestCovariate(Matrix& Xnull, Matrix& Y, Matrix& Xcol,
                    const EigenMatrix& kinshipU, const EigenMatrix& kinshipS){
    Eigen::MatrixXf g;
    G_to_Eigen(Xcol, &g);

    // store U'*G for computing AF later.
    const Eigen::MatrixXf& U = kinshipU.mat;
    this->ug = U.transpose() * g;

    Eigen::RowVectorXf g_mean = g.colwise().mean();
    g = g.rowwise() - g_mean;

    double gTg = g.array().square().sum();
    double t_new = (g.array() * this->transformedY.array()).sum();
    t_new = t_new * t_new / gTg;
    double t_score = t_new / this->gamma;
    this->betaG = (g.transpose() * this->transformedY).sum() / gTg / this->gamma;
    this->betaGVar = this->ySigmaY / gTg / this->gamma;

    this->pvalue = gsl_cdf_chisq_Q(t_score, 1.0);
    return 0;
  }
int SaddlePointApproximation::calculatePvalue(const Eigen::MatrixXf& g_tilde,
                                              float* newPvalue) {
  // find root
  // 1. find boundary of the root
  //    first try [0, 5] or [0, -5]
  const float s = (g_tilde.array() * resid_.array()).sum();
  const float var_s =
      (g_tilde.array().square() * (mu_.array() * (1.0 - mu_.array())).abs())
          .sum();
  // if (fabs(s) < 2.0 * sqrt(var_s) &&
  // !std::getenv("BOLTLMM_FORCE_SADDLEPOINT")) {
  //   fprintf(stderr, "Skip saddle point approximation (far from mean, |%g| >
  //   2.0 * sqrt(%g) )!\n", s, var_s);
  //   return -2;
  // }
  float t_grid[2];
  float y_grid[2];
  if (s > 0) {  // \hat{t} > 0
    t_grid[0] = -0.01;
    t_grid[1] = std::min(s / K_prime2_function(0, g_tilde, mu_), (float)5.);

  } else {
    t_grid[0] = 0.01;
    t_grid[1] = std::max(s / K_prime2_function(0, g_tilde, mu_), (float)-5.);
  }
  y_grid[0] = CGF_equation(t_grid[0], g_tilde, mu_, y_);
  y_grid[1] = CGF_equation(t_grid[1], g_tilde, mu_, y_);

  int iter = 0;
  // extend boundary
  // e.g. [0, 5] => [5, 15] => [15, 60]...
  while (y_grid[0] * y_grid[1] > 0) {
    t_grid[0] = t_grid[1];
    y_grid[0] = y_grid[1];
    t_grid[1] *= 4;
    y_grid[1] = CGF_equation(t_grid[1], g_tilde, mu_, y_);

    ++iter;
    fprintf(stderr, "iter %d, %g -> %g \n", iter, t_grid[1], y_grid[1]);

    if (iter > 10) {
      fprintf(stderr,
              "after 10 iteration, still cannot find boundary conditions\n");
      dumpToFile(y_, "tmp.y");
      dumpToFile(g_tilde, "tmp.g_tilde");
      dumpToFile(mu_, "tmp.mu");
      dumpToFile(resid_, "tmp.resid");
      return 1;  // exit(1);
    }
  }
  if (t_grid[0] > t_grid[1]) {
    std::swap(t_grid[0], t_grid[1]);
    std::swap(y_grid[0], y_grid[1]);
  }
  assert(y_grid[0] < 0 && y_grid[1] > 0);  // TODO: make this more robust
  if (y_grid[0] * y_grid[1] > 0) {
    fprintf(stderr, "Wrong boundary conditions!\n");
    // dumpToFile(covEffect, "tmp.covEffect");
    // dumpToFile(xbeta, "tmp.xbeta");
    dumpToFile(g_tilde, "tmp.g_tilde");
    dumpToFile(mu_, "tmp.mu");
    dumpToFile(resid_, "tmp.resid");
    // exit(1);
    return 1;
  }
  float t_new = t_grid[0];
  float y_new;
  iter = 0;
  // stop condition:
  // 1. secant method narrows to a small enough region
  // 2. new propose point has the differnt sign of statistic s. Usually they
  // have the same sign. Unless numerical issue arises.
  while (fabs(t_grid[1] - t_grid[0]) > 1e-3 || t_new * s < 0) {
    t_new = t_grid[0] +
            (t_grid[1] - t_grid[0]) * (-y_grid[0]) / (y_grid[1] - y_grid[0]);

    // switch to bisect?
    const float dist = t_grid[1] - t_grid[0];
    if (t_grid[1] - t_new < 0.1 * dist) {
      t_new = t_grid[0] + (t_grid[1] - t_grid[0]) * 0.8;
    }
    if (t_new - t_grid[0] < 0.1 * dist) {
      t_new = t_grid[0] + (t_grid[1] - t_grid[0]) * 0.2;
    }

    y_new = CGF_equation(t_new, g_tilde, mu_, y_);
    if (y_new == 0) {
      break;
    } else if (y_new > 0) {
      t_grid[1] = t_new;
      y_grid[1] = y_new;
    } else if (y_new < 0) {
      t_grid[0] = t_new;
      y_grid[0] = y_new;
    }
    ++iter;
    fprintf(stderr, "%g -> %g, %g -> %g \n", t_grid[0], y_grid[0], t_grid[1],
            y_grid[1]);

    if (iter > 10) {
      fprintf(stderr,
              "after 10 iteration, secant method still cannot find solution\n");
      dumpToFile(y_, "tmp.y");
      dumpToFile(g_tilde, "tmp.g_tilde");
      dumpToFile(mu_, "tmp.mu");
      dumpToFile(resid_, "tmp.resid");
      break;
    }
  }
  if (fabs(t_new) < 1e-4 && !std::getenv("BOLTLMM_FORCE_SADDLEPOINT")) {
    fprintf(stderr, "Skip saddle point approximation (t is too small: %g)\n",
            t_new);
    return -3;
  }

  // calculate new p_value
  const float K = K_function(t_new, g_tilde, mu_);
  const float K_prime2 = K_prime2_function(t_new, g_tilde, mu_);
  float w = sqrt(2 * (t_new * s - K));
  if (t_new < 0) {
    w = -w;
  }
  const float v = t_new * sqrt(K_prime2);
  assert(v / w > 0);

  float stat = w + log(v / w) / w;
  stat = stat * stat;

  if (t_new * s < K) {
    fprintf(stderr, "Wrong sqrt operand!\n");
    fprintf(stderr,
            "K = %g, K_prime2 = %g, s = %g, t_new = %g, w = %g, v = %g, stat "
            "= %g\n",
            K, K_prime2, s, t_new, w, v, stat);

    dumpToFile(g_tilde, "tmp.g_tilde");
    dumpToFile(mu_, "tmp.mu");
    dumpToFile(resid_, "tmp.resid");
    return 1;
    // exit(1);
  }
  if (std::getenv("BOLTLMM_DUMP_SADDLEPOINT")) {
    fprintf(stderr, "%s:%d dump saddlepoint\n", __FILE__, __LINE__);
    dumpToFile(g_tilde, "tmp.g_tilde");
    dumpToFile(mu_, "tmp.mu");
    dumpToFile(resid_, "tmp.resid");
  }
  fprintf(stderr,
          "K = %g, K_prime2 = %g, s = %g, t = %g, w = %g, v = %g, stat = %g\n",
          K, K_prime2, s, t_new, w, v, stat);
  float& pvalue_ = *newPvalue;
  pvalue_ = gsl_cdf_chisq_Q(stat, 1.0);
  return 0;
}
bool MultipleTraitLinearRegressionScoreTest::TestCovariateBlock() {
  MultipleTraitLinearRegressionScoreTestInternal& w = *this->work;
  for (int i = 0; i < groupSize; ++i) {
    // delcare const variables
    EMat& G = w.G[i];
    EMat& Ugz = w.Ugz[i];
    EMat& Uyg = w.Uyg[i];
    const EMat& Z = w.groupedZ[i];
    const EMat& Y = w.groupedY[i];
    const EMat& Uyz = w.groupedUyz[i];
    const bool& hasCovariate = w.groupedHasCovariate[i];
    const EMat& ZZinv = w.groupedZZinv[i];
    const EMat& L = w.groupedL[i];

    // center and scale g
    scale(&G);

    // calculate Ugz, Uyg
    if (hasCovariate) {
      Ugz.noalias() = Z.transpose() * G;  // C by blockSize
    }
    Uyg.noalias() = G.transpose() * Y;  // blockSize by T

    // calculate Ustat, Vstat
    if (hasCovariate) {
      w.ustat[i].noalias() =
          (Uyg - Ugz.transpose() * ZZinv * Uyz);  // blockSize by T
      w.vstat[i].noalias() =
          ((G.array().square()).matrix().colwise().sum() -
           (L.transpose() * Ugz).array().square().matrix().colwise().sum())
              .transpose();  // blockSize by 1
    } else {                 // no covariate
      w.ustat[i].noalias() = Uyg;
      w.vstat[i].noalias() =
          G.array().square().matrix().colwise().sum().transpose();  // blockSize
                                                                    // by 1
    }
    // defer this, as this cannot be grouped
    // w.vstat[i] *= w.sigma2[group[i][0]];
  }
  // assign u, v; calculat p-values
  for (int j = 0; j < blockSize; ++j) {
    for (int i = 0; i < groupSize; ++i) {
      for (size_t k = 0; k != group[i].size(); ++k) {
        const int idx = group[i][k];
        const double u = w.ustat[i](j, k);
        const double v = w.vstat[i](j, 0) * w.sigma2[idx];
        this->ustat[j][idx] = u;
        this->vstat[j][idx] = v;

        if (v == 0.) {
          this->pvalue[j][idx] = NAN;
        } else {
          double stat = u * u / v;
          this->pvalue[j][idx] = gsl_cdf_chisq_Q(stat, 1.0);
        }
      }
    }
  }  // end for i
  return true;
}
Example #12
0
/*
 * Chi2 p-value given chi2_val value
 * ie returns integral(p(x)dx, x=chi2_val..infinity) with p chi2 distribution
 */
    inline double chi2_pvalue(const double chi2_val, const size_t ndof)
    {
	return gsl_cdf_chisq_Q(chi2_val,(double)ndof);
    }
Example #13
0
double pp_model::calculate_waiting_times_log_predictive_df( double increment, bool lower_tail, bool two_sided, bool increment_parameters ){
  double sum_log_pvals = 0;// sum_log_pvals2 = 0;
  unsigned long long int how_many = 0;
  unsigned long long int i2 = m_data_cont ? m_data_cont->find_data_index(m_current_t+increment,0,m_current_data_index) : (unsigned long long int)(m_current_t+increment);
  double current_t = m_current_t;
  if(i2>m_current_data_index){
    while(i2>m_current_data_index){
      double t = m_pp_time_scale ? m_pp_time_scale->cumulative_function( current_t, (*m_data_cont)[0][m_current_data_index] ) : (*m_data_cont)[0][m_current_data_index] - current_t;
      m_log_predictive_df = calculate_log_posterior_predictive_pdf(t,0);//upper tail
      if(t<=0){
	cerr << "Rounding errors in event times"<< endl;
	exit(1);
      }
      if(lower_tail||two_sided){
	m_log_predictive_df2 = log(1-exp(m_log_predictive_df));//lower tail
	if(lower_tail)
	  m_log_predictive_df = m_log_predictive_df2;
      }
      /*      if(two_sided){//two-sided p-values version two - combine at the start
	if(m_log_predictive_df > m_log_predictive_df2)
	  m_log_predictive_df = m_log_predictive_df2;
	  m_log_predictive_df += LOG_TWO;
      }*/
      sum_log_pvals += m_log_predictive_df;
      //      if(two_sided)//two-sided p-values version one - combine at the end
      //	sum_log_pvals2 += m_log_predictive_df2;
      m_r++;
      m_t += t;
      m_alpha_star++;
      m_beta_star += t;
      how_many++;
      current_t = (*m_data_cont)[0][m_current_data_index];
      m_current_data_index++;
    }
  }
  double t = (m_pp_time_scale&&m_data_cont) ? m_pp_time_scale->cumulative_function( current_t, m_current_t+increment ) : m_current_t+increment - current_t;
  if(t>0){
    m_log_predictive_df = calculate_log_posterior_predictive_pdf(t,0) - LOG_TWO;//upper tail
    if(lower_tail||two_sided){
      m_log_predictive_df2 = log(1-exp(m_log_predictive_df));//lower tail
      if(lower_tail)
	m_log_predictive_df = m_log_predictive_df2;
    }
    /*    if(two_sided){
      if(m_log_predictive_df > m_log_predictive_df2)
	m_log_predictive_df = m_log_predictive_df2;
      m_log_predictive_df += LOG_TWO;
      }*/
    sum_log_pvals += m_log_predictive_df;
    //    if(two_sided)
    //      sum_log_pvals2 += m_log_predictive_df2;
    m_t += t;
    m_beta_star += t;
    how_many++;
  }
  //  if(two_sided && sum_log_pvals2 < sum_log_pvals)
  //    sum_log_pvals = sum_log_pvals2;
  m_currently_observable = how_many > 0;
  if(m_currently_observable){
    m_log_predictive_df = how_many==1 ? sum_log_pvals : log(gsl_cdf_chisq_Q(-2*sum_log_pvals, 2*how_many));
    //    if(two_sided)
    //      m_log_predictive_df += LOG_TWO;
    if(two_sided){
    if(m_log_predictive_df>=0)//rounding erros
      m_log_predictive_df2 = log(gsl_cdf_chisq_P(-2*sum_log_pvals, 2*how_many));
    else
      m_log_predictive_df2 = log(1-exp(m_log_predictive_df));//lower tail
      if(m_log_predictive_df > m_log_predictive_df2)
	m_log_predictive_df = m_log_predictive_df2;
      m_log_predictive_df += LOG_TWO;
    }
  }
  else
    m_log_predictive_df = -LOG_TWO;
  m_pvalue_pair = make_pair(m_log_predictive_df,m_log_predictive_df);
  m_pvalue_pair_on_log_scale = true;
  return m_log_predictive_df;
}