/** * \brief Chops and remerges data into stationary segments * * This function finds segments of data that appear to be stationary (have the same standard deviation). * * The function first attempts to chop up the data into as many stationary segments as possible. The splitting may not * be optimal, so it then tries remerging consecutive segments to see if the merged segments show more evidence of * stationarity. <b>[NOTE: Remerging is currently turned off and will make very little difference to the algorithm]</b>. * It then, if necessary, chops the segments again to make sure there are none greater than the required \c chunkMax. * The default \c chunkMax is 0, so this rechopping will not normally happen. * * This is all performed on data that has had a running median subtracted, to try and removed any underlying trends in * the data (e.g. those caused by a strong signal), which might affect the calculations (which assume the data is * Gaussian with zero mean). * * If the \c verbose flag is set then a list of the segments will be output to a file called \c data_segment_list.txt, * with a prefix of the detector name. * * \param data [in] A data structure * \param chunkMin [in] The minimum length of a segment * \param chunkMax [in] The maximum length of a segment * * \return A vector of segment/chunk lengths * * \sa subtract_running_median * \sa chop_data * \sa merge_data * \sa rechop_data */ UINT4Vector *chop_n_merge( LALInferenceIFOData *data, INT4 chunkMin, INT4 chunkMax ){ UINT4 j = 0; UINT4Vector *chunkLengths = NULL; UINT4Vector *chunkIndex = NULL; COMPLEX16Vector *meddata = NULL; /* subtract a running median value from the data to remove any underlying trends (e.g. caused by a string signal) that * might affect the chunk calculations (which can assume the data is Gaussian with zero mean). */ meddata = subtract_running_median( data->compTimeData->data ); /* pass chop data a gsl_vector_view, so that internally it can use vector views rather than having to create new vectors */ gsl_vector_complex_view meddatagsl = gsl_vector_complex_view_array((double*)meddata->data, meddata->length); chunkIndex = chop_data( &meddatagsl.vector, chunkMin ); /* DON'T BOTHER WITH THE MERGING AS IT WILL MAKE VERY LITTLE DIFFERENCE */ /* merge_data( meddata, chunkIndex ); */ /* if a maximum chunk length is defined then rechop up the data, to segment any chunks longer than this value */ if ( chunkMax > chunkMin ) { rechop_data( chunkIndex, chunkMax, chunkMin ); } chunkLengths = XLALCreateUINT4Vector( chunkIndex->length ); /* go through segments and turn into vector of chunk lengths */ for ( j = 0; j < chunkIndex->length; j++ ){ if ( j == 0 ) { chunkLengths->data[j] = chunkIndex->data[j]; } else { chunkLengths->data[j] = chunkIndex->data[j] - chunkIndex->data[j-1]; } } /* if verbose print out the segment end indices to a file */ if ( verbose_output ){ FILE *fpsegs = NULL; CHAR *outfile = NULL; /* set detector name as prefix */ outfile = XLALStringDuplicate( data->detector->frDetector.prefix ); outfile = XLALStringAppend( outfile, "data_segment_list.txt" ); if ( (fpsegs = fopen(outfile, "w")) == NULL ){ fprintf(stderr, "Non-fatal error open file to output segment list.\n"); return chunkLengths; } for ( j = 0; j < chunkIndex->length; j++ ) { fprintf(fpsegs, "%u\n", chunkIndex->data[j]); } /* add space at the end so that you can separate lists from different detector data streams */ fprintf(fpsegs, "\n"); fclose( fpsegs ); } return chunkLengths; }
/** \brief Compute the noise variance for each data segment * * Once the data has been split into segments calculate the noise variance (using * both the real and imaginary parts) in each segment and fill in the associated * noise vector. To calculate the noise the running median should first be * subtracted from the data. * * \param data [in] the LALInferenceIFOData variable * \param model [in] the LALInferenceIFOModel variable */ void compute_variance( LALInferenceIFOData *data, LALInferenceIFOModel *model ){ REAL8 chunkLength = 0.; INT4 i = 0, j = 0, length = 0, cl = 0, counter = 0; COMPLEX16Vector *meddata = NULL; /* data with running median removed */ /* subtract a running median value from the data to remove any underlying trends (e.g. caused by a strong signal) */ meddata = subtract_running_median( data->compTimeData->data ); UINT4Vector *chunkLengths = NULL; chunkLengths = *(UINT4Vector **)LALInferenceGetVariable( model->params, "chunkLength" ); length = data->compTimeData->data->length; for ( i = 0, counter = 0; i < length; i+=chunkLength, counter++ ){ REAL8 vari = 0., meani = 0.; chunkLength = (REAL8)chunkLengths->data[counter]; cl = i + (INT4)chunkLength; /* get the mean (should be close to zero given the running median subtraction), but * probably worth doing anyway) */ for ( j = i ; j < cl ; j++ ){ meani += (creal(meddata->data[j]) + cimag(meddata->data[j])); } meani /= (2.*chunkLength); for ( j = i ; j < cl ; j++ ){ vari += SQUARE( (creal(meddata->data[j]) - meani) ); vari += SQUARE( (cimag(meddata->data[j]) - meani) ); } vari /= (2.*chunkLength - 1.); /* unbiased sample variance */ /* fill in variance vector */ for ( j = i ; j < cl ; j++ ){ data->varTimeData->data->data[j] = vari; } } }