Example #1
0
/**
 * \brief Chop up the data into chunks smaller the the maximum allowed length
 *
 * This function chops any chunks that are greater than \c chunkMax into chunks smaller than, or equal to \c chunkMax,
 * and greater than \c chunkMin. On some occasions this might result in a segment smaller than \c chunkMin, but these
 * are ignored in the likelihood calculation anyway.
 *
 * \param chunkIndex [in] a vector of segment split positions
 * \param chunkMax [in] the maximum allowed segment/chunk length
 * \param chunkMin [in] the minimum allowed segment/chunk length
 */
void rechop_data( UINT4Vector *chunkIndex, INT4 chunkMax, INT4 chunkMin ){
  INT4 i = 0, j = 0, count = 0;
  INT4 length = chunkIndex->length;
  INT4 endIndex = (INT4)chunkIndex->data[length-1];
  UINT4 startindex = 0, chunklength = 0;

  UINT4Vector *newindex = NULL;
  newindex = XLALCreateUINT4Vector( ceil((REAL8)endIndex / (REAL8)chunkMax ) );

  /* chop any chunks that are greater than chunkMax into chunks smaller than, or equal to chunkMax, and greater than chunkMin */
  for ( i = 0; i < length; i++ ){
    if ( i == 0 ) { startindex = 0; }
    else { startindex = chunkIndex->data[i-1]+1; }

    chunklength = chunkIndex->data[i] - startindex;

    if ( chunklength > (UINT4)chunkMax ){
      INT4 remain = chunklength % chunkMax;

      /* cut segment into as many chunkMin chunks as possible */
      for ( j = 0; j < floor(chunklength / chunkMax); j++ ){
        newindex->data[count] = startindex + (j+1)*chunkMax;
        count++;
      }

      /* last chunk values */
      if ( remain != 0 ){
        /* set final value */
        newindex->data[count] = startindex + j*chunkMax + remain;

        if ( remain < chunkMin ){
          /* split the last two cells into one that is chunkMin long and one that is (chunkMax+remainder)-chunkMin long
           * - this may leave a cell shorter than chunkMin, but we'll have to live with that! */
          INT4 n1 = (chunkMax + remain) - chunkMin;

          /* reset second to last value two values */
          newindex->data[count-1] = newindex->data[count] - chunkMin;

          if ( n1 < chunkMin && verbose_output ){
            fprintf(stderr, "Non-fatal error... segment no. %d is %d long, which is less than chunkMin = %d.\n",
                    count, n1, chunkMin);
          }
        }

        count++;
      }
    }
    else{
      newindex->data[count] = chunkIndex->data[i];
      count++;
    }
  }

  chunkIndex = XLALResizeUINT4Vector( chunkIndex, count );

  for ( i = 0; i < count; i++ ) { chunkIndex->data[i] = newindex->data[i]; }

  XLALDestroyUINT4Vector( newindex );
}
Example #2
0
/**
 * \brief Chop up the data into chunks smaller the the maximum allowed length
 *
 * This function chops any chunks that are greater than \c chunkMax into chunks smaller than, or equal to \c chunkMax,
 * and greater than \c chunkMin. On some occasions this might result in a segment smaller than \c chunkMin, but these
 * are ignored in the likelihood calculation anyway.
 *
 * \param chunkIndex [in] a vector of segment split positions
 * \param chunkMax [in] the maximum allowed segment/chunk length
 * \param chunkMin [in] the minimum allowed segment/chunk length
 */
void rechop_data( UINT4Vector **chunkIndex, UINT4 chunkMax, UINT4 chunkMin ){
  UINT4 i = 0, j = 0, count = 0;
  UINT4Vector *cip = *chunkIndex; /* pointer to chunkIndex */
  UINT4 length = cip->length;
  UINT4 endIndex = cip->data[length-1];
  UINT4 startindex = 0, chunklength = 0;
  UINT4 newindex[(UINT4)ceil((REAL8)endIndex/(REAL8)chunkMin)];

  /* chop any chunks that are greater than chunkMax into chunks smaller than, or equal to chunkMax, and greater than chunkMin */
  for ( i = 0; i < length; i++ ){
    if ( i == 0 ) { startindex = 0; }
    else { startindex = cip->data[i-1]+1; }

    chunklength = cip->data[i] - startindex;

    if ( chunklength > chunkMax ){
      UINT4 remain = chunklength % chunkMax;

      /* cut segment into as many chunkMin chunks as possible */
      for ( j = 0; j < floor(chunklength / chunkMax); j++ ){
        newindex[count] = startindex + (j+1)*chunkMax;
        count++;
      }

      /* last chunk values */
      if ( remain != 0 ){
        /* set final value */
        newindex[count] = startindex + j*chunkMax + remain;

        if ( remain < chunkMin ){
          /* split the last two cells into one that is chunkMin long and one that is (chunkMax+remainder)-chunkMin long
           * - this may leave a cell shorter than chunkMin, but we'll have to live with that! */
          UINT4 n1 = (chunkMax + remain) - chunkMin;

          /* reset second to last value two values */
          newindex[count-1] = newindex[count] - chunkMin;

          if ( n1 < chunkMin ){
            XLAL_PRINT_WARNING("Non-fatal error... segment no. %d is %d long, which is less than chunkMin = %d.\n", count, n1, chunkMin);
          }
        }

        count++;
      }
    }
    else{
      newindex[count] = cip->data[i];
      count++;
    }
  }

  cip = XLALResizeUINT4Vector( cip, count );

  for ( i = 0; i < count; i++ ) { cip->data[i] = newindex[i]; }
}
Example #3
0
/**
 * \brief Split the data into segments
 *
 * This function is deprecated to \c chop_n_merge, but gives the functionality of the old code.
 *
 * It cuts the data into as many contiguous segments of data as possible of length \c chunkMax. Where contiguous is
 * defined as containing consecutive point within 180 seconds of each other. The length of segments that do not fit into
 * a \c chunkMax length are also included.
 *
 * \param ifo [in] the LALInferenceIFOModel variable
 * \param chunkMax [in] the maximum length of a data chunk/segment
 *
 * \return A vector of chunk/segment lengths
 */
UINT4Vector *get_chunk_lengths( LALInferenceIFOModel *ifo, UINT4 chunkMax ){
  UINT4 i = 0, j = 0, count = 0;
  UINT4 length;

  REAL8 t1, t2;

  UINT4Vector *chunkLengths = NULL;

  length = ifo->times->length;

  chunkLengths = XLALCreateUINT4Vector( length );

  REAL8 dt = *(REAL8*)LALInferenceGetVariable( ifo->params, "dt" );

  /* create vector of data segment length */
  while( 1 ){
    count++; /* counter */

    /* break clause */
    if( i > length - 2 ){
      /* set final value of chunkLength */
      chunkLengths->data[j] = count;
      j++;
      break;
    }

    i++;

    t1 = XLALGPSGetREAL8( &ifo->times->data[i-1] );
    t2 = XLALGPSGetREAL8( &ifo->times->data[i] );

    /* if consecutive points are within two sample times of each other count as in the same chunk */
    if( t2 - t1 > 2.*dt || count == chunkMax ){
      chunkLengths->data[j] = count;
      count = 0; /* reset counter */

      j++;
    }
  }

  chunkLengths = XLALResizeUINT4Vector( chunkLengths, j );

  return chunkLengths;
}
Example #4
0
/**
 * \brief Chops the data into stationary segments based on Bayesian change point analysis
 *
 * This function splits data into two (and recursively runs on those two segments) if it is found that the odds ratio
 * for them being from two independent Gaussian distributions is greater than a certain threshold.
 *
 * The threshold for the natural logarithm of the odds ratio is empirically set to be
 * \f[
 * T = 4.07 + 1.33\log{}_{10}{N},
 * \f]
 * where \f$N\f$ is the length in samples of the dataset. This is based on Monte Carlo simulations of
 * many realisations of Gaussian noise for data of different lengths. The threshold comes from a linear
 * fit to the log odds ratios required to give a 1% chance of splitting Gaussian data (drawn from a single
 * distribution) for data of various lengths.  Note, however, that this relation is not good for stretches of data
 * with lengths of less than about 30 points, and in fact is rather consevative for such short stretches
 * of data, i.e. such short stretches of data will require relatively larger odds ratios for splitting than
 * longer stretches.
 *
 * \param data [in] A complex data vector
 * \param chunkMin [in] The minimum allowed segment length
 *
 * \return A vector of segment lengths
 *
 * \sa find_change_point
 */
UINT4Vector *chop_data( gsl_vector_complex *data, UINT4 chunkMin ){
  UINT4Vector *chunkIndex = NULL;

  UINT4 length = (UINT4)data->size;

  REAL8 logodds = 0.;
  UINT4 changepoint = 0;

  REAL8 threshold = 0.; /* may need tuning or setting globally */

  chunkIndex = XLALCreateUINT4Vector( 1 );

  changepoint = find_change_point( data, &logodds, chunkMin );

  /* threshold scaling for a 0.5% false alarm probability of splitting Gaussian data */
  threshold = 4.07 + 1.33*log10((REAL8)length);

  if ( logodds > threshold ){
    UINT4Vector *cp1 = NULL;
    UINT4Vector *cp2 = NULL;

    gsl_vector_complex_view data1 = gsl_vector_complex_subvector( data, 0, changepoint );
    gsl_vector_complex_view data2 = gsl_vector_complex_subvector( data, changepoint, length-changepoint );

    UINT4 i = 0, l = 0;

    cp1 = chop_data( &data1.vector, chunkMin );
    cp2 = chop_data( &data2.vector, chunkMin );

    l = cp1->length + cp2->length;

    chunkIndex = XLALResizeUINT4Vector( chunkIndex, l );

    /* combine new chunks */
    for (i = 0; i < cp1->length; i++) { chunkIndex->data[i] = cp1->data[i]; }
    for (i = 0; i < cp2->length; i++) { chunkIndex->data[i+cp1->length] = cp2->data[i] + changepoint; }

    XLALDestroyUINT4Vector( cp1 );
    XLALDestroyUINT4Vector( cp2 );
  }
  else{ chunkIndex->data[0] = length; }

  return chunkIndex;
}
Example #5
0
/**
 * \brief Merge adjacent segments
 *
 * This function will attempt to remerge adjacent segments if statistically favourable (as calculated by the odds
 * ratio). For each pair of adjacent segments the joint likelihood of them being from two independent distributions is
 * compared to the likelihood that combined they are from one distribution. If the likelihood is highest for the
 * combined segments they are merged.
 *
 * \param data [in] A complex data vector
 * \param segments [in] A vector of split segment indexes
 */
void merge_data( COMPLEX16Vector *data, UINT4Vector **segments ){
  UINT4 j = 0;
  REAL8 threshold = 0.; /* may need to be passed to function in the future, or defined globally */
  UINT4Vector *segs = *segments;

  /* loop until stopping criterion is reached */
  while( 1 ){
    UINT4 ncells = segs->length;

    UINT4 mergepoint = 0;
    REAL8 logodds = 0., minl = -LAL_REAL8_MAX;

    for (j = 1; j < ncells; j++){
      REAL8 summerged = 0., sum1 = 0., sum2 = 0.;
      UINT4 i = 0, n1 = 0, n2 = 0, nm = 0;
      UINT4 cellstarts1 = 0, cellends1 = 0, cellstarts2 = 0, cellends2 = 0;
      REAL8 log_merged = 0., log_individual = 0.;

      /* get the evidence for merged adjacent cells */
      if( j == 1 ) { cellstarts1 = 0; }
      else { cellstarts1 = segs->data[j-2]; }

      cellends1 = segs->data[j-1];

      cellstarts2 = segs->data[j-1];
      cellends2 = segs->data[j];

      n1 = cellends1 - cellstarts1;
      n2 = cellends2 - cellstarts2;
      nm = cellends2 - cellstarts1;

      for( i = cellstarts1; i < cellends1; i++ ) { sum1 += SQUARE( cabs(data->data[i]) ); }

      for( i = cellstarts2; i < cellends2; i++ ) { sum2 += SQUARE( cabs(data->data[i]) ); }

      summerged = sum1 + sum2;

      /* calculated evidences */
      log_merged = -2 + gsl_sf_lnfact(nm-1) - (REAL8)nm * log( summerged );

      log_individual = -2 + gsl_sf_lnfact(n1-1) - (REAL8)n1 * log( sum1 );
      log_individual += -2 + gsl_sf_lnfact(n2-1) - (REAL8)n2 * log( sum2 );

      logodds = log_merged - log_individual;

      if ( logodds > minl ){
        mergepoint = j - 1;
        minl = logodds;
      }
    }

    /* set break criterion */
    if ( minl < threshold ) { break; }
    else{ /* merge cells */
      /* remove the cell end value between the two being merged and shift */
      for( UINT4 i=0; i < ncells-(mergepoint+1); i++ ){
        segs->data[mergepoint+i] = segs->data[mergepoint+i+1];
      }

      segs = XLALResizeUINT4Vector( segs, ncells - 1 );
    }
  }
}