/** * \brief Chop up the data into chunks smaller the the maximum allowed length * * This function chops any chunks that are greater than \c chunkMax into chunks smaller than, or equal to \c chunkMax, * and greater than \c chunkMin. On some occasions this might result in a segment smaller than \c chunkMin, but these * are ignored in the likelihood calculation anyway. * * \param chunkIndex [in] a vector of segment split positions * \param chunkMax [in] the maximum allowed segment/chunk length * \param chunkMin [in] the minimum allowed segment/chunk length */ void rechop_data( UINT4Vector *chunkIndex, INT4 chunkMax, INT4 chunkMin ){ INT4 i = 0, j = 0, count = 0; INT4 length = chunkIndex->length; INT4 endIndex = (INT4)chunkIndex->data[length-1]; UINT4 startindex = 0, chunklength = 0; UINT4Vector *newindex = NULL; newindex = XLALCreateUINT4Vector( ceil((REAL8)endIndex / (REAL8)chunkMax ) ); /* chop any chunks that are greater than chunkMax into chunks smaller than, or equal to chunkMax, and greater than chunkMin */ for ( i = 0; i < length; i++ ){ if ( i == 0 ) { startindex = 0; } else { startindex = chunkIndex->data[i-1]+1; } chunklength = chunkIndex->data[i] - startindex; if ( chunklength > (UINT4)chunkMax ){ INT4 remain = chunklength % chunkMax; /* cut segment into as many chunkMin chunks as possible */ for ( j = 0; j < floor(chunklength / chunkMax); j++ ){ newindex->data[count] = startindex + (j+1)*chunkMax; count++; } /* last chunk values */ if ( remain != 0 ){ /* set final value */ newindex->data[count] = startindex + j*chunkMax + remain; if ( remain < chunkMin ){ /* split the last two cells into one that is chunkMin long and one that is (chunkMax+remainder)-chunkMin long * - this may leave a cell shorter than chunkMin, but we'll have to live with that! */ INT4 n1 = (chunkMax + remain) - chunkMin; /* reset second to last value two values */ newindex->data[count-1] = newindex->data[count] - chunkMin; if ( n1 < chunkMin && verbose_output ){ fprintf(stderr, "Non-fatal error... segment no. %d is %d long, which is less than chunkMin = %d.\n", count, n1, chunkMin); } } count++; } } else{ newindex->data[count] = chunkIndex->data[i]; count++; } } chunkIndex = XLALResizeUINT4Vector( chunkIndex, count ); for ( i = 0; i < count; i++ ) { chunkIndex->data[i] = newindex->data[i]; } XLALDestroyUINT4Vector( newindex ); }
/** * \brief Chop up the data into chunks smaller the the maximum allowed length * * This function chops any chunks that are greater than \c chunkMax into chunks smaller than, or equal to \c chunkMax, * and greater than \c chunkMin. On some occasions this might result in a segment smaller than \c chunkMin, but these * are ignored in the likelihood calculation anyway. * * \param chunkIndex [in] a vector of segment split positions * \param chunkMax [in] the maximum allowed segment/chunk length * \param chunkMin [in] the minimum allowed segment/chunk length */ void rechop_data( UINT4Vector **chunkIndex, UINT4 chunkMax, UINT4 chunkMin ){ UINT4 i = 0, j = 0, count = 0; UINT4Vector *cip = *chunkIndex; /* pointer to chunkIndex */ UINT4 length = cip->length; UINT4 endIndex = cip->data[length-1]; UINT4 startindex = 0, chunklength = 0; UINT4 newindex[(UINT4)ceil((REAL8)endIndex/(REAL8)chunkMin)]; /* chop any chunks that are greater than chunkMax into chunks smaller than, or equal to chunkMax, and greater than chunkMin */ for ( i = 0; i < length; i++ ){ if ( i == 0 ) { startindex = 0; } else { startindex = cip->data[i-1]+1; } chunklength = cip->data[i] - startindex; if ( chunklength > chunkMax ){ UINT4 remain = chunklength % chunkMax; /* cut segment into as many chunkMin chunks as possible */ for ( j = 0; j < floor(chunklength / chunkMax); j++ ){ newindex[count] = startindex + (j+1)*chunkMax; count++; } /* last chunk values */ if ( remain != 0 ){ /* set final value */ newindex[count] = startindex + j*chunkMax + remain; if ( remain < chunkMin ){ /* split the last two cells into one that is chunkMin long and one that is (chunkMax+remainder)-chunkMin long * - this may leave a cell shorter than chunkMin, but we'll have to live with that! */ UINT4 n1 = (chunkMax + remain) - chunkMin; /* reset second to last value two values */ newindex[count-1] = newindex[count] - chunkMin; if ( n1 < chunkMin ){ XLAL_PRINT_WARNING("Non-fatal error... segment no. %d is %d long, which is less than chunkMin = %d.\n", count, n1, chunkMin); } } count++; } } else{ newindex[count] = cip->data[i]; count++; } } cip = XLALResizeUINT4Vector( cip, count ); for ( i = 0; i < count; i++ ) { cip->data[i] = newindex[i]; } }
/** * \brief Split the data into segments * * This function is deprecated to \c chop_n_merge, but gives the functionality of the old code. * * It cuts the data into as many contiguous segments of data as possible of length \c chunkMax. Where contiguous is * defined as containing consecutive point within 180 seconds of each other. The length of segments that do not fit into * a \c chunkMax length are also included. * * \param ifo [in] the LALInferenceIFOModel variable * \param chunkMax [in] the maximum length of a data chunk/segment * * \return A vector of chunk/segment lengths */ UINT4Vector *get_chunk_lengths( LALInferenceIFOModel *ifo, UINT4 chunkMax ){ UINT4 i = 0, j = 0, count = 0; UINT4 length; REAL8 t1, t2; UINT4Vector *chunkLengths = NULL; length = ifo->times->length; chunkLengths = XLALCreateUINT4Vector( length ); REAL8 dt = *(REAL8*)LALInferenceGetVariable( ifo->params, "dt" ); /* create vector of data segment length */ while( 1 ){ count++; /* counter */ /* break clause */ if( i > length - 2 ){ /* set final value of chunkLength */ chunkLengths->data[j] = count; j++; break; } i++; t1 = XLALGPSGetREAL8( &ifo->times->data[i-1] ); t2 = XLALGPSGetREAL8( &ifo->times->data[i] ); /* if consecutive points are within two sample times of each other count as in the same chunk */ if( t2 - t1 > 2.*dt || count == chunkMax ){ chunkLengths->data[j] = count; count = 0; /* reset counter */ j++; } } chunkLengths = XLALResizeUINT4Vector( chunkLengths, j ); return chunkLengths; }
/** * \brief Chops the data into stationary segments based on Bayesian change point analysis * * This function splits data into two (and recursively runs on those two segments) if it is found that the odds ratio * for them being from two independent Gaussian distributions is greater than a certain threshold. * * The threshold for the natural logarithm of the odds ratio is empirically set to be * \f[ * T = 4.07 + 1.33\log{}_{10}{N}, * \f] * where \f$N\f$ is the length in samples of the dataset. This is based on Monte Carlo simulations of * many realisations of Gaussian noise for data of different lengths. The threshold comes from a linear * fit to the log odds ratios required to give a 1% chance of splitting Gaussian data (drawn from a single * distribution) for data of various lengths. Note, however, that this relation is not good for stretches of data * with lengths of less than about 30 points, and in fact is rather consevative for such short stretches * of data, i.e. such short stretches of data will require relatively larger odds ratios for splitting than * longer stretches. * * \param data [in] A complex data vector * \param chunkMin [in] The minimum allowed segment length * * \return A vector of segment lengths * * \sa find_change_point */ UINT4Vector *chop_data( gsl_vector_complex *data, UINT4 chunkMin ){ UINT4Vector *chunkIndex = NULL; UINT4 length = (UINT4)data->size; REAL8 logodds = 0.; UINT4 changepoint = 0; REAL8 threshold = 0.; /* may need tuning or setting globally */ chunkIndex = XLALCreateUINT4Vector( 1 ); changepoint = find_change_point( data, &logodds, chunkMin ); /* threshold scaling for a 0.5% false alarm probability of splitting Gaussian data */ threshold = 4.07 + 1.33*log10((REAL8)length); if ( logodds > threshold ){ UINT4Vector *cp1 = NULL; UINT4Vector *cp2 = NULL; gsl_vector_complex_view data1 = gsl_vector_complex_subvector( data, 0, changepoint ); gsl_vector_complex_view data2 = gsl_vector_complex_subvector( data, changepoint, length-changepoint ); UINT4 i = 0, l = 0; cp1 = chop_data( &data1.vector, chunkMin ); cp2 = chop_data( &data2.vector, chunkMin ); l = cp1->length + cp2->length; chunkIndex = XLALResizeUINT4Vector( chunkIndex, l ); /* combine new chunks */ for (i = 0; i < cp1->length; i++) { chunkIndex->data[i] = cp1->data[i]; } for (i = 0; i < cp2->length; i++) { chunkIndex->data[i+cp1->length] = cp2->data[i] + changepoint; } XLALDestroyUINT4Vector( cp1 ); XLALDestroyUINT4Vector( cp2 ); } else{ chunkIndex->data[0] = length; } return chunkIndex; }
/** * \brief Merge adjacent segments * * This function will attempt to remerge adjacent segments if statistically favourable (as calculated by the odds * ratio). For each pair of adjacent segments the joint likelihood of them being from two independent distributions is * compared to the likelihood that combined they are from one distribution. If the likelihood is highest for the * combined segments they are merged. * * \param data [in] A complex data vector * \param segments [in] A vector of split segment indexes */ void merge_data( COMPLEX16Vector *data, UINT4Vector **segments ){ UINT4 j = 0; REAL8 threshold = 0.; /* may need to be passed to function in the future, or defined globally */ UINT4Vector *segs = *segments; /* loop until stopping criterion is reached */ while( 1 ){ UINT4 ncells = segs->length; UINT4 mergepoint = 0; REAL8 logodds = 0., minl = -LAL_REAL8_MAX; for (j = 1; j < ncells; j++){ REAL8 summerged = 0., sum1 = 0., sum2 = 0.; UINT4 i = 0, n1 = 0, n2 = 0, nm = 0; UINT4 cellstarts1 = 0, cellends1 = 0, cellstarts2 = 0, cellends2 = 0; REAL8 log_merged = 0., log_individual = 0.; /* get the evidence for merged adjacent cells */ if( j == 1 ) { cellstarts1 = 0; } else { cellstarts1 = segs->data[j-2]; } cellends1 = segs->data[j-1]; cellstarts2 = segs->data[j-1]; cellends2 = segs->data[j]; n1 = cellends1 - cellstarts1; n2 = cellends2 - cellstarts2; nm = cellends2 - cellstarts1; for( i = cellstarts1; i < cellends1; i++ ) { sum1 += SQUARE( cabs(data->data[i]) ); } for( i = cellstarts2; i < cellends2; i++ ) { sum2 += SQUARE( cabs(data->data[i]) ); } summerged = sum1 + sum2; /* calculated evidences */ log_merged = -2 + gsl_sf_lnfact(nm-1) - (REAL8)nm * log( summerged ); log_individual = -2 + gsl_sf_lnfact(n1-1) - (REAL8)n1 * log( sum1 ); log_individual += -2 + gsl_sf_lnfact(n2-1) - (REAL8)n2 * log( sum2 ); logodds = log_merged - log_individual; if ( logodds > minl ){ mergepoint = j - 1; minl = logodds; } } /* set break criterion */ if ( minl < threshold ) { break; } else{ /* merge cells */ /* remove the cell end value between the two being merged and shift */ for( UINT4 i=0; i < ncells-(mergepoint+1); i++ ){ segs->data[mergepoint+i] = segs->data[mergepoint+i+1]; } segs = XLALResizeUINT4Vector( segs, ncells - 1 ); } } }