/* Purpose: Relaxed chi square test for differences in frequencies between observed and expected frequencies. This version keeps going, even if expected frequencies (E) are < 5 in more than 20% of the categories. This also excludes skips categories that have E = 0, reducing degrees of freedom accordingly. Parameters: *observed = array of ints with event counts for each category in the sample *expected = array of ints with event counts for each category as expected from the known population k = number of categories df = degrees of freedom (e.g. k-1) Returns: Probability of making an error when rejecting the NULL hypothesis -1 if data does not support Chi^2 test */ double gstats_rtest_XF (int *observed, int *expected, int k, int df) { double chi; int i; long sum, sum2; if ( df < 1 ) { gstats_error ("Chi^2 test: need at least one category with expected frequency > 0."); return (-1); } if ( k < 2 ) { gstats_error ("Chi^2 test: number of categories must be > 1."); return (-1); } /* check if parameters given make sense */ sum = 0; sum2 = 0; for (i = 0; i < k; i++) { sum = sum + observed [i]; sum2 = sum2 + expected [i]; } if ( sum < 2 ) { gstats_error ("Chi^2 test: sum of observed frequencies over all categories must be > 2."); return (-1); } if ( sum2 < 2 ) { gstats_error ("Chi^2 test: sum of expected frequencies over all categories must be > 2."); return (-1); } /* finally, calculate Chi! */ chi = 0; for (i = 0; i < k; i++) { if (expected[i] > 0) { chi = chi + ( ((float) ((observed[i]-expected[i])*(observed[i]-expected[i])) / expected[i]) ); } else { df --; if ( df < 1 ) { gstats_error ("Chi^2 test: need at least one category with expected frequency > 0."); return (-1); } } } return (gsl_ran_chisq_pdf (chi, (double) df)); }
double chisq (double x, void *p) { double * c = (double *)p; return gsl_ran_chisq_pdf (x, c[0]); }
double test_chisq_pdf (double x) { return gsl_ran_chisq_pdf (x, 13.0); }
static double one_chisq(double in, void *df){ return log(gsl_ran_chisq_pdf(in, *(double*)df)); }
/* Purpose: chi square test for differences in frequencies between sample and population Parameters: *observed = array of ints with event counts for each category in the sample *expected = array of ints with event counts for each category as expected from the known population k = number of categories df = degrees of freedom (e.g. k-1) Returns: Probability of making an error when rejecting the NULL hypothesis -1 if data does not support Chi^2 test */ double gstats_test_XF (int *observed, int *expected, int k, int df) { double chi; int i; long sum, sum2; double checksum; int lessThanFive; if ( df < 1 ) { gstats_error ("Chi^2 test: need at least one category with expected frequency > 0."); return (-1); } if ( k < 2 ) { gstats_error ("Chi^2 test: number of categories must be > 1."); return (-1); } lessThanFive = 0; /* keeps count of categories with < 1 expected frequency */ /* check if parameters given make sense */ sum = 0; sum2 = 0; for (i = 0; i < k; i++) { sum = sum + observed [i]; sum2 = sum2 + expected [i]; } if ( sum < 2 ) { gstats_error ("Chi^2 test: sum of observed frequencies over all categories must be > 2."); return (-1); } if ( sum2 < 2 ) { gstats_error ("Chi^2 test: sum of expected frequencies over all categories must be > 2."); return (-1); } /* finally, calculate Chi! */ chi = 0; for (i = 0; i < k; i++) { if (expected[i] > 0) { chi = chi + ( ((float) ((observed[i]-expected[i])*(observed[i]-expected[i])) / expected[i]) ); if (expected[i] < 5) { lessThanFive ++; } } else { gstats_error ("Chi^2 test: expected frequency for category < 1."); return (-1); } } /* check if more than 20% of categories have less than 5 observations */ checksum = lessThanFive / ((double) (k)/100.0); if (checksum > 20) { gstats_error ("Chi^2 test: more than 20% of categories have an expected frequency < 5."); return (-1); } return (gsl_ran_chisq_pdf (chi, (double) df)); }