double gsl_ran_multinomial_lnpdf (const size_t K, const double p[], const unsigned int n[]) { size_t k; unsigned int N = 0; double log_pdf = 0.0; double norm = 0.0; for (k = 0; k < K; k++) { N += n[k]; } for (k = 0; k < K; k++) { norm += p[k]; } log_pdf = gsl_sf_lnfact (N); for (k = 0; k < K; k++) { log_pdf -= gsl_sf_lnfact (n[k]); } for (k = 0; k < K; k++) { log_pdf += log (p[k] / norm) * n[k]; } return log_pdf; }
void calc_sum_fact(struct detector *det, struct dataset *frames) { int d, t ; struct dataset *curr = frames ; frames->sum_fact = calloc(frames->tot_num_data, sizeof(double)) ; while (curr != NULL) { if (curr->type == 0) { for (d = 0 ; d < curr->num_data ; ++d) for (t = 0 ; t < curr->multi[d] ; ++t) if (det->mask[curr->place_multi[curr->multi_accum[d] + t]] < 1) frames->sum_fact[curr->num_data_prev+d] += gsl_sf_lnfact(curr->count_multi[curr->multi_accum[d] + t]) ; } else if (curr->type == 1) { for (d = 0 ; d < curr->num_data ; ++d) for (t = 0 ; t < curr->num_pix ; ++t) if (det->mask[t] < 1) frames->sum_fact[curr->num_data_prev+d] += gsl_sf_lnfact(curr->int_frames[d*curr->num_pix + t]) ; } else if (curr->type == 2) { for (d = 0 ; d < curr->num_data ; ++d) frames->sum_fact[curr->num_data_prev+d] = 0. ; } curr = curr->next ; } }
double gsl_ran_multinomial_lnpdf (const size_t K, const double p[], const unsigned int n[]) { size_t k; unsigned int N = 0; double log_pdf = 0.0; double norm = 0.0; for (k = 0; k < K; k++) { N += n[k]; } for (k = 0; k < K; k++) { norm += p[k]; } log_pdf = gsl_sf_lnfact (N); for (k = 0; k < K; k++) { /* Handle case where n[k]==0 and p[k]==0 */ if (n[k] > 0) { log_pdf += log (p[k] / norm) * n[k] - gsl_sf_lnfact (n[k]); } } return log_pdf; }
double* genLogFactList(int size){ double* logFactList = (double*) calloc(size, sizeof(double)); for (int i = 0; i<size; i++) logFactList[i] = gsl_sf_lnfact(i); return logFactList; }
double gsl_ran_poisson_pdf (const unsigned int k, const double mu) { double p; double lf = gsl_sf_lnfact (k); p = exp (log (mu) * k - lf - mu); return p; }
rcount nulldist::rand() { if( gsl_ran_bernoulli( rng, a ) ) return 0; /* This uses a rejection sampling scheme worked out by Charles Geyer, * detailed in his notes "Lower-Truncated Poisson and Negative Binomial * Distributions". */ rcount x; double accp; while( true ) { x = gsl_ran_negative_binomial( rng, p, r + 1.0 ) + 1; accp = gsl_sf_lnfact( x - 1 ) - gsl_sf_lnfact( x ); if( gsl_ran_bernoulli( rng, exp( accp ) ) ) break; } return x; }
/* --------------------------------------------------------------------- Partition H --------------------------------------------------------------------- */ double PartitionHMB_OD(struct group *part, double linC, double *HarmonicList) { struct group *g1=part, *g2; int r, l; int ng=0; /* Number of non-empty groups */ int nnod=0; /* Number of nodes */ double H=0.0; H += linC * NNonEmptyGroups(part); while ((g1 = g1->next) != NULL) { if (g1->size > 0) { ng++; nnod += g1->size; r = g1->size * (g1->size - 1) / 2; l = g1->inlinks; H += log(r + 1) + LogChoose(r, l); /*H -= log(HarmonicList[r + 1] - HarmonicList[l]);*/ g2 = g1; while ((g2 = g2->next) != NULL) { if (g2->size > 0) { r = g1->size * g2->size; l = NG2GLinks(g1, g2); H += log(r + 1) + LogChoose(r, l); /*H -= log(HarmonicList[r + 1] - HarmonicList[l]);*/ } } } } H -= gsl_sf_lnfact(nnod - ng); H -= LogDegeneracy_OD(ng); return H; }
double logFact(int key, int size, double* logFactList){ if (size<key) return gsl_sf_lnfact(key); else return logFactList[key]; }
/** * \brief The log likelihood function * * This function calculates natural logarithm of the likelihood of a signal model (specified by a given set of * parameters) given the data from a set of detectors. * * The likelihood is the joint likelihood of chunks of data over which the noise is assumed stationary and Gaussian. For * each chunk a Gaussian likelihood for the noise and data has been marginalised over the unknown noise standard * deviation using a Jeffreys prior on the standard deviation. Given the data consisting of independent real and * imaginary parts this gives a Students-t distribution for each chunk (of length \f$m\f$) with \f$m/2\f$ degrees of * freedom: * \f[ * p(\mathbf{\theta}|\mathbf{B}) = \prod_{j=1}^M \frac{(m_j-1)!}{2\pi^{m_j}} * \left( \sum_{k=k_0}^{k_0+(m_j-1)} |B_k - y(\mathbf{\theta})_k|^2 * \right)^{-m_j}, * \f] * where \f$\mathbf{B}\f$ is a vector of the complex data, \f$y(\mathbf{\theta})\f$ is the model for a set of parameters * \f$\mathbf{\theta}\f$, \f$M\f$ is the total number of independent data chunks with lengths \f$m_j\f$ and \f$k_0 = * \sum_{i=1}^j 1 + m_{i-1}\f$ (with \f$m_0 = 0\f$) is the index of the first data point in each chunk. The product of * this for each detector will give the full joint likelihood. In the calculation here the unnecessary proportionality * factors are left out (this would effect the actual value of the marginal likelihood/evidence, but since we are only * interested in evidence ratios/Bayes factors these factors would cancel out anyway. See \cite DupuisWoan2005 for a * more detailed description. * * In this function data in chunks smaller than a certain minimum length \c chunkMin are ignored. * * \param vars [in] The parameter values * \param data [in] The detector data and initial signal phase template * \param get_model [in] The signal template/model function * * \return The natural logarithm of the likelihood function */ REAL8 pulsar_log_likelihood( LALInferenceVariables *vars, LALInferenceIFOData *data, LALInferenceTemplateFunction get_model){ REAL8 loglike = 0.; /* the log likelihood */ UINT4 i = 0; REAL8Vector *freqFactors = *(REAL8Vector **)LALInferenceGetVariable( data->dataParams, "freqfactors" ); LALInferenceIFOData *datatemp1 = data, *datatemp2 = data, *datatemp3 = data; /* copy model parameters to data parameters */ while( datatemp1 ){ LALInferenceCopyVariables( vars, datatemp1->modelParams ); datatemp1 = datatemp1->next; } /* get pulsar model */ while( datatemp2 ){ get_model( datatemp2 ); for( i = 0; i < freqFactors->length; i++ ) { datatemp2 = datatemp2->next; } } while ( datatemp3 ){ UINT4 j = 0, count = 0, cl = 0; UINT4 length = 0, chunkMin; REAL8 chunkLength = 0.; REAL8 logliketmp = 0.; REAL8 sumModel = 0., sumDataModel = 0.; REAL8 chiSquare = 0.; COMPLEX16 B, M; REAL8Vector *sumDat = NULL; UINT4Vector *chunkLengths = NULL; sumDat = *(REAL8Vector **)LALInferenceGetVariable( datatemp3->dataParams, "sumData" ); chunkLengths = *(UINT4Vector **)LALInferenceGetVariable( datatemp3->dataParams, "chunkLength" ); chunkMin = *(INT4*)LALInferenceGetVariable( datatemp3->dataParams, "chunkMin" ); length = datatemp3->compTimeData->data->length; for( i = 0 ; i < length ; i += chunkLength ){ chunkLength = (REAL8)chunkLengths->data[count]; /* skip section of data if its length is less than the minimum allowed chunk length */ if( chunkLength < chunkMin ){ count++; continue; } sumModel = 0.; sumDataModel = 0.; cl = i + (INT4)chunkLength; for( j = i ; j < cl ; j++ ){ B = datatemp3->compTimeData->data->data[j]; M = datatemp3->compModelData->data->data[j]; /* sum over the model */ sumModel += creal(M)*creal(M) + cimag(M)*cimag(M); /* sum over that data and model */ sumDataModel += creal(B)*creal(M) + cimag(B)*cimag(M); } chiSquare = sumDat->data[count]; chiSquare -= 2.*sumDataModel; chiSquare += sumModel; logliketmp -= chunkLength*log(chiSquare) + LAL_LN2 * (chunkLength-1.) + gsl_sf_lnfact(chunkLength); count++; } loglike += logliketmp; datatemp3 = datatemp3->next; } return loglike; }
/** * \brief Merge adjacent segments * * This function will attempt to remerge adjacent segments if statistically favourable (as calculated by the odds * ratio). For each pair of adjacent segments the joint likelihood of them being from two independent distributions is * compared to the likelihood that combined they are from one distribution. If the likelihood is highest for the * combined segments they are merged. * * \param data [in] A complex data vector * \param segments [in] A vector of split segment indexes */ void merge_data( COMPLEX16Vector *data, UINT4Vector **segments ){ UINT4 j = 0; REAL8 threshold = 0.; /* may need to be passed to function in the future, or defined globally */ UINT4Vector *segs = *segments; /* loop until stopping criterion is reached */ while( 1 ){ UINT4 ncells = segs->length; UINT4 mergepoint = 0; REAL8 logodds = 0., minl = -LAL_REAL8_MAX; for (j = 1; j < ncells; j++){ REAL8 summerged = 0., sum1 = 0., sum2 = 0.; UINT4 i = 0, n1 = 0, n2 = 0, nm = 0; UINT4 cellstarts1 = 0, cellends1 = 0, cellstarts2 = 0, cellends2 = 0; REAL8 log_merged = 0., log_individual = 0.; /* get the evidence for merged adjacent cells */ if( j == 1 ) { cellstarts1 = 0; } else { cellstarts1 = segs->data[j-2]; } cellends1 = segs->data[j-1]; cellstarts2 = segs->data[j-1]; cellends2 = segs->data[j]; n1 = cellends1 - cellstarts1; n2 = cellends2 - cellstarts2; nm = cellends2 - cellstarts1; for( i = cellstarts1; i < cellends1; i++ ) { sum1 += SQUARE( cabs(data->data[i]) ); } for( i = cellstarts2; i < cellends2; i++ ) { sum2 += SQUARE( cabs(data->data[i]) ); } summerged = sum1 + sum2; /* calculated evidences */ log_merged = -2 + gsl_sf_lnfact(nm-1) - (REAL8)nm * log( summerged ); log_individual = -2 + gsl_sf_lnfact(n1-1) - (REAL8)n1 * log( sum1 ); log_individual += -2 + gsl_sf_lnfact(n2-1) - (REAL8)n2 * log( sum2 ); logodds = log_merged - log_individual; if ( logodds > minl ){ mergepoint = j - 1; minl = logodds; } } /* set break criterion */ if ( minl < threshold ) { break; } else{ /* merge cells */ /* remove the cell end value between the two being merged and shift */ for( UINT4 i=0; i < ncells-(mergepoint+1); i++ ){ segs->data[mergepoint+i] = segs->data[mergepoint+i+1]; } segs = XLALResizeUINT4Vector( segs, ncells - 1 ); } } }
/** * \brief Find a change point in complex data * * This function is based in the Bayesian Blocks algorithm of \cite Scargle1998 that finds "change points" in data - * points at which the statistics of the data change. It is based on calculating evidence, or odds, ratios. The * function first computes the marginal likelihood (or evidence) that the whole of the data is described by a single * Gaussian (with mean of zero). This comes from taking a Gaussian likelihood function and analytically marginalising * over the standard deviation (using a prior on the standard deviation of \f$1/\sigma\f$), giving (see * [\cite DupuisWoan2005]) a Students-t distribution (see * <a href="https://wiki.ligo.org/foswiki/pub/CW/PulsarParameterEstimationNestedSampling/studentst.pdf">here</a>). * Following this the data is split into two segments (with lengths greater than, or equal to the minimum chunk length) * for all possible combinations, and the joint evidence for each of the two segments consisting of independent * Gaussian (basically multiplying the above equation calculated for each segment separately) is calculated and the * split point recorded. However, the value required for comparing to that for the whole data set, to give the odds * ratio, is the evidence that having any split is better than having no split, so the individual split data evidences * need to be added incoherently to give the total evidence for a split. The index at which the evidence for a single * split is maximum (i.e. the most favoured split point) is that which is returned. * * \param data [in] a complex data vector * \param logodds [in] a pointer to return the natural logarithm of the odds ratio/Bayes factor * \param minlength [in] the minimum chunk length * * \return The position of the change point */ UINT4 find_change_point( gsl_vector_complex *data, REAL8 *logodds, UINT4 minlength ){ UINT4 changepoint = 0, i = 0; UINT4 length = (UINT4)data->size, lsum = 0; REAL8 datasum = 0.; REAL8 logsingle = 0., logtot = -INFINITY; REAL8 logdouble = 0., logdouble_min = -INFINITY; REAL8 logratio = 0.; REAL8 sumforward = 0., sumback = 0.; gsl_complex dval; /* check that data is at least twice the minimum length, if not return an odds ratio of zero (log odds = -inf [or close to that!]) */ if ( length < (UINT4)(2*minlength) ){ logratio = -INFINITY; memcpy(logodds, &logratio, sizeof(REAL8)); return 0; } /* calculate the sum of the data squared */ for (i = 0; i < length; i++) { dval = gsl_vector_complex_get( data, i ); datasum += SQUARE( gsl_complex_abs( dval ) ); } /* calculate the evidence that the data consists of a Gaussian data with a single standard deviation */ logsingle = -LAL_LN2 - (REAL8)length*LAL_LNPI + gsl_sf_lnfact(length-1) - (REAL8)length * log( datasum ); lsum = length - 2*minlength + 1; for ( i = 0; i < length; i++ ){ dval = gsl_vector_complex_get( data, i ); if ( i < minlength-1 ){ sumforward += SQUARE( gsl_complex_abs( dval ) ); } else{ sumback += SQUARE( gsl_complex_abs( dval ) ); } } /* go through each possible change point and calculate the evidence for the data consisting of two independent * Gaussian's either side of the change point. Also calculate the total evidence for any change point. * Don't allow single points, so start at the second data point. */ for (i = 0; i < lsum; i++){ UINT4 ln1 = i+minlength, ln2 = (length-i-minlength); REAL8 log_1 = 0., log_2 = 0.; dval = gsl_vector_complex_get( data, ln1-1 ); REAL8 adval = SQUARE( gsl_complex_abs( dval ) ); sumforward += adval; sumback -= adval; /* get log evidences for the individual segments */ log_1 = -LAL_LN2 - (REAL8)ln1*LAL_LNPI + gsl_sf_lnfact(ln1-1) - (REAL8)ln1 * log( sumforward ); log_2 = -LAL_LN2 - (REAL8)ln2*LAL_LNPI + gsl_sf_lnfact(ln2-1) - (REAL8)ln2 * log( sumback ); /* get evidence for the two segments */ logdouble = log_1 + log_2; /* add to total evidence for a change point */ logtot = LOGPLUS(logtot, logdouble); /* find maximum value of logdouble and record that as the change point */ if ( logdouble > logdouble_min ){ changepoint = ln1; logdouble_min = logdouble; } } /* get the log odds ratio of segmented versus non-segmented model */ logratio = logtot - logsingle; memcpy(logodds, &logratio, sizeof(REAL8)); return changepoint; }
double mygsl_binomial_coef(unsigned int n, unsigned int k) { return floor(0.5 + exp(gsl_sf_lnfact(n) - gsl_sf_lnfact(k) - gsl_sf_lnfact(n-k))); }