Exemplo n.º 1
0
/** Given an ArgList containing name,[min,max,step,bins,col,N], set up a 
  * coordinate with that name and parameters min, max, step, bins.
  * If '*' or not specified, a default value will be set.
  * \return 1 if error occurs, 0 otherwise.
  */
int Analysis_Hist::setupDimension(ArgList &arglist, DataSet_1D const& dset, size_t& offset) {
  bool minArg = false;
  bool maxArg = false;
  bool stepArg = false;
  bool binsArg = false; 

  if (debug_>1)
    arglist.PrintList();

  // Set up dimension name
  // NOTE: arglist[0] should be same as dset name from CheckDimension 
  std::string const& dLabel = arglist[0];

  // Cycle through coordinate arguments. Any argument left blank will be 
  // assigned a default value later.
  double dMin = 0.0;
  double dMax = 0.0;
  double dStep = 0.0;
  int dBins = -1;
  for (int i = 1; i < arglist.Nargs(); i++) {
    if (debug_>1) mprintf("DEBUG: setupCoord: Token %i (%s)\n", i, arglist[i].c_str());
    // '*' means default explicitly requested
    if (arglist[i] == "*") continue;
    switch (i) {
      case 1 : dMin  = convertToDouble( arglist[i]); minArg = true; break;
      case 2 : dMax  = convertToDouble( arglist[i]); maxArg = true; break;
      case 3 : dStep = convertToDouble( arglist[i]); stepArg = true; break;
      case 4 : dBins = convertToInteger(arglist[i]); binsArg = true; break;
    }
  }

  // If no min arg and no default min arg, get min from dataset
  if (!minArg) {
    if (!minArgSet_) 
      dMin = dset.Min();
    else
      dMin = default_min_;
  }
  // If no max arg and no default max arg, get max from dataset
  if (!maxArg) {
    if (!maxArgSet_)
      dMax = dset.Max();
    else
      dMax = default_max_;
  }
  // If bins/step not specified, use default
  if (!binsArg)
    dBins = default_bins_;
  if (!stepArg)
    dStep = default_step_;

  // Calculate dimension from given args.
  HistBin dim;
  if (dim.CalcBinsOrStep( dMin, dMax, dStep, dBins, dLabel )) {
    mprinterr("Error: Could not set up histogram dimension '%s'\n", dLabel.c_str());
    return 1;
  }
  dim.PrintHistBin();
  dimensions_.push_back( dim );

  // Recalculate offsets for all dimensions starting at farthest coord. This
  // follows row major ordering.
  size_t last_offset = 1UL; // For checking overflow.
  offset = 1UL;
  binOffsets_.resize( dimensions_.size() );
  OffType::iterator bOff = binOffsets_.begin();
  for (HdimType::const_iterator rd = dimensions_.begin();
                                rd != dimensions_.end(); ++rd, ++bOff)
  {
    if (debug_>0) mprintf("\tHistogram: %s offset is %zu\n", rd->label(), offset);
    *bOff = (long int)offset;
    offset *= rd->Bins();
    // Check for overflow.
    if ( offset < last_offset ) {
      mprinterr("Error: Too many bins for histogram. Try reducing the number of bins and/or\n"
                "Error:   the number of dimensions.\n");
      return 1;
    }
    last_offset = offset;
  }
  // offset should now be equal to the total number of bins across all dimensions
  if (debug_>0) mprintf("\tHistogram: Total Bins = %zu\n",offset);

  return 0;
}
Exemplo n.º 2
0
int KDE::CalcKDE(DataSet_double& Out, DataSet_1D const& Pdata) const {
  if (Pdata.Size() < 2) {
    mprinterr("Error: Not enough data for KDE.\n");
    return 1;
  }
  // Automatically determine min, max, step, and bin values.
//  std::vector<double> data;
//  data.reserve( Pdata.Size() );
  double N = 0.0;
  double mean = 0.0;
  double M2 = 0.0;
  double min = Pdata.Dval(0);
  double max = min; 
  for (unsigned int i = 0; i != Pdata.Size(); i++) {
    double x = Pdata.Dval(i);
    min = std::min(min, x);
    max = std::max(max, x);
    N++;
    double delta = x - mean;
    mean += delta / N;
    M2 += delta * (x - mean);
//    data.push_back( x );
  }
  M2 /= (N - 1.0);
  double stdev = sqrt(M2);
  double step = 0.0;
  int bins = (int)sqrt((double)Pdata.Size());
/*
  std::sort(data.begin(), data.end());
  double min = data.front();
  double max = data.back();
  unsigned int upperidx, loweridx;
  if ( (data.size() % 2) == 0 ) {
    // Even number of points. Get Q1 as median of lower and Q3 as median of upper.
    unsigned int halfsize = data.size() / 2;
    loweridx = ((halfsize - 1) / 2);
    upperidx = loweridx + halfsize;
  } else {
    // Odd number of points. Include the median in both halves
    unsigned int lsize = (data.size() + 1) / 2;
    loweridx = ((lsize - 1) / 2);
    unsigned int usize = (data.size() - 1) / 2;
    upperidx = loweridx + usize;
  }
  double Q1 = data[loweridx];
  double Q3 = data[upperidx];
  double step = 2 * ((Q3 - Q1) / pow(data.size(), 1/3));
  int bins = 0;
  mprintf("DEBUG: Q1= %g, Q3= %g, step= %g, min= %g, max= %g, mean= %g, stdev= %g\n",
          Q1, Q3, step, min, max, mean, stdev);
  if (max - min < step) {
    // Would only be 1 bin. Probably noisy.
    mprintf("Warning: Data set is very sparse.\n");
    bins = (int)Pdata.Size() / 10;
    step = 0;
  }
*/
  mprintf("DEBUG: mean= %g, stdev= %g\n", mean, stdev);
  HistBin Xdim;
  if (Xdim.CalcBinsOrStep(min, max, step, bins, Pdata.Meta().Legend()))
    return 1;
  Xdim.PrintHistBin();

  // Automatically determine bandwidth
  double bandwidth = 1.06 * stdev * BandwidthFactor(Pdata.Size());
  mprintf("\tBandwidth: %f\n", bandwidth);

  std::vector<double> Increments(Pdata.Size(), 1.0);

  return CalcKDE(Out, Pdata, Increments, Xdim, bandwidth);
}
Exemplo n.º 3
0
// Analysis_KDE::Analyze()
Analysis::RetType Analysis_KDE::Analyze() {
  DataSet_1D const& Pdata = static_cast<DataSet_1D const&>( *data_ );
  int inSize = (int)Pdata.Size();
  // Set output set dimensions from input set if necessary.
  if (!minArgSet_) {
    mprintf("\tNo minimum specified, determining from input data.\n");
    if (q_data_ != 0)
      default_min_ = std::max(((DataSet_1D*)q_data_)->Min(), Pdata.Min());
    else
      default_min_ = Pdata.Min();
  }
  if (!maxArgSet_) {
    mprintf("\tNo maximum specified, determining from input data.\n");
    if (q_data_ != 0)
      default_max_ = std::min(((DataSet_1D*)q_data_)->Max(), Pdata.Max());
    else
      default_max_ = Pdata.Max();
  }
  HistBin Xdim;
  if (Xdim.CalcBinsOrStep(default_min_, default_max_, default_step_,
                          default_bins_, Pdata.Meta().Legend()))
    return Analysis::ERR;
  Xdim.PrintHistBin();
  output_->SetDim( Dimension::X, Xdim );

  // Allocate output set
  DataSet_double& P_hist = static_cast<DataSet_double&>( *output_ );
  P_hist.Resize( Xdim.Bins() );
  int outSize = (int)P_hist.Size();

  // Estimate bandwidth from normal distribution approximation if necessary.
  if (bandwidth_ < 0.0) {
    double stdev;
    Pdata.Avg( stdev );
    double N_to_1_over_5 = pow( (double)inSize, (-1.0/5.0) );
    bandwidth_ = 1.06 * stdev * N_to_1_over_5;
    mprintf("\tDetermined bandwidth from normal distribution approximation: %f\n", bandwidth_);
  }

  // Set up increments
  std::vector<double> Increments(inSize, 1.0);
  if (amddata_ != 0) {
    DataSet_1D& AMD = static_cast<DataSet_1D&>( *amddata_ );
    if ((int)AMD.Size() != inSize) {
      if ((int)AMD.Size() < inSize) {
        mprinterr("Error: Size of AMD data set %zu < input data set iu\n",
                  AMD.Size(), inSize);
        return Analysis::ERR;
      } else {
        mprintf("Warning: Size of AMD data set %zu > input data set %i\n",
                AMD.Size(), inSize);
      }
    }
    for (int i = 0; i < inSize; i++)
      Increments[i] = exp( AMD.Dval(i) );
  }
  int frame, bin;
  double increment;
  double total = 0.0;
# ifdef _OPENMP
  int numthreads;
# pragma omp parallel
  {
#   pragma omp master
    {
      numthreads = omp_get_num_threads();
      mprintf("\tParallelizing calculation with %i threads\n", numthreads);
    }
  }
# endif
  if (q_data_ == 0) {
    double val;
    // Calculate KDE, loop over input data
#   ifdef _OPENMP
    int mythread;
    double **P_thread;
#   pragma omp parallel private(frame, bin, val, increment, mythread) reduction(+:total)
    {
      mythread = omp_get_thread_num();
      // Prevent race conditions by giving each thread its own histogram
#     pragma omp master
      {
        P_thread = new double*[ numthreads ];
        for (int nt = 0; nt < numthreads; nt++) {
          P_thread[nt] = new double[ outSize ];
          std::fill(P_thread[nt], P_thread[nt] + outSize, 0.0);
        }
      }
#     pragma omp barrier
#     pragma omp for
#   endif
      for (frame = 0; frame < inSize; frame++) {
        val = Pdata.Dval(frame);
        increment = Increments[frame];
        total += increment;
        // Apply kernel across histogram
        for (bin = 0; bin < outSize; bin++)
#         ifdef _OPENMP
          P_thread[mythread][bin] +=
#         else
          P_hist[bin] += 
#         endif
            (increment * (this->*Kernel_)( (Xdim.Coord(bin) - val) / bandwidth_ ));
      }
#   ifdef _OPENMP
    } // END parallel block
    // Combine results from each thread histogram into P_hist
    for (int i = 0; i < numthreads; i++) {
      for (int j = 0; j < outSize; j++)
        P_hist[j] += P_thread[i][j];
      delete[] P_thread[i];
    }
    delete[] P_thread;
#   endif
  } else {
    // Calculate Kullback-Leibler divergence vs time
    DataSet_1D const& Qdata = static_cast<DataSet_1D const&>( *q_data_ );
    if (inSize != (int)Qdata.Size()) {
      mprintf("Warning: Size of %s (%zu) != size of %s (%zu)\n",
                Pdata.legend(), Pdata.Size(), Qdata.legend(), Qdata.Size());
      inSize = std::min( inSize, (int)Qdata.Size() );
      mprintf("Warning:  Only using %i data points.\n", inSize);
    }
    DataSet_double& klOut = static_cast<DataSet_double&>( *kldiv_ );
    std::vector<double> Q_hist( Xdim.Bins(), 0.0 ); // Raw Q histogram.
    klOut.Resize( inSize ); // Hold KL div vs time
    double val_p, val_q, KL, xcrd, Pnorm, Qnorm, normP, normQ;
    bool Pzero, Qzero;
    // Loop over input P and Q data
    unsigned int nInvalid = 0, validPoint;
    for (frame = 0; frame < inSize; frame++) {
      //mprintf("DEBUG: Frame=%i Outsize=%i\n", frame, outSize);
      increment = Increments[frame];
      total += increment;
      // Apply kernel across P and Q, calculate KL divergence as we go. 
      val_p = Pdata.Dval(frame);
      val_q = Qdata.Dval(frame);
      normP = 0.0;
      normQ = 0.0;
      validPoint = 0; // 0 in this context means true
#     ifdef _OPENMP
#     pragma omp parallel private(bin, xcrd) reduction(+:normP, normQ)
      {
#       pragma omp for
#       endif
        for (bin = 0; bin < outSize; bin++) {
          xcrd = Xdim.Coord(bin);
          P_hist[bin] += (increment * (this->*Kernel_)( (xcrd - val_p) / bandwidth_ ));
          normP += P_hist[bin];
          Q_hist[bin] += (increment * (this->*Kernel_)( (xcrd - val_q) / bandwidth_ ));
          normQ += Q_hist[bin];
        }
#     ifdef _OPENMP
      } // End first parallel block
#     endif
      if (normP > std::numeric_limits<double>::min())
        normP = 1.0 / normP;
      if (normQ > std::numeric_limits<double>::min())
        normQ = 1.0 / normQ;
      KL = 0.0;
#     ifdef _OPENMP
#     pragma omp parallel private(bin, Pnorm, Qnorm, Pzero, Qzero) reduction(+:KL, validPoint)
      {
#       pragma omp for
#       endif
        for (bin = 0; bin < outSize; bin++) {
          // KL only defined when Q and P are non-zero, or both zero.
          if (validPoint == 0) {
            // Normalize for this frame
            Pnorm = P_hist[bin] * normP;
            Qnorm = Q_hist[bin] * normQ;
            //mprintf("Frame %8i Bin %8i P=%g Q=%g Pnorm=%g Qnorm=%g\n",frame,bin,P_hist[bin],Q_hist[bin],normP,normQ);
            Pzero = (Pnorm <= std::numeric_limits<double>::min());
            Qzero = (Qnorm <= std::numeric_limits<double>::min());
            if (!Pzero && !Qzero)
              KL += ( log( Pnorm / Qnorm ) * Pnorm );
            else if ( Pzero != Qzero )
              validPoint++;
          }
        }
#       ifdef _OPENMP
      } // End second parallel block
#     endif
      if (validPoint == 0) {
        klOut[frame] = KL;
      } else {
        //mprintf("Warning:\tKullback-Leibler divergence is undefined for frame %i\n", frame+1);
        nInvalid++;
      }
    } // END KL divergence calc loop over frames
    if (nInvalid > 0)
      mprintf("Warning:\tKullback-Leibler divergence was undefined for %u frames.\n", nInvalid);
  }

  // Normalize
  for (unsigned int j = 0; j < P_hist.Size(); j++)
    P_hist[j] /= (total * bandwidth_);

  // Calc free E
  if (calcFreeE_) {
    double KT = (-Constants::GASK_KCAL * Temp_);
    double minFreeE = 0.0;
    for (unsigned int j = 0; j < P_hist.Size(); j++) {
      P_hist[j] = log( P_hist[j] ) * KT;
      if (j == 0)
        minFreeE = P_hist[j];
      else if (P_hist[j] < minFreeE)
        minFreeE = P_hist[j];
    }
    for (unsigned int j = 0; j < P_hist.Size(); j++)
      P_hist[j] -= minFreeE;
  }

  return Analysis::OK;
}
Exemplo n.º 4
0
int KDE::CalcKDE(DataSet_double& Out, DataSet_1D const& Pdata,
                 std::vector<double> const& Increments,
                 HistBin const& Xdim, double bandwidth) const
{
  int inSize = (int)Pdata.Size();
  // Allocate output set, set all to zero.
  Out.Zero( Xdim.Bins() );
  Out.SetDim( Dimension::X, Xdim );
  int outSize = (int)Out.Size();

  int frame, bin;
  double increment, val;
  double total = 0.0;
# ifdef _OPENMP
  int original_num_threads;
# pragma omp parallel
  {
#   pragma omp master
    {
      original_num_threads = omp_get_num_threads();
    }
  }
  // Ensure we only execute with the desired number of threads
  if (numthreads_ < original_num_threads)
    omp_set_num_threads( numthreads_ );
# endif
  // Calculate KDE, loop over input data
# ifdef _OPENMP
  int mythread;
  double **P_thread;
# pragma omp parallel private(frame, bin, val, increment, mythread) reduction(+:total)
  {
    mythread = omp_get_thread_num();
    // Prevent race conditions by giving each thread its own histogram
#   pragma omp master
    {
      P_thread = new double*[ numthreads_ ];
      for (int nt = 0; nt < numthreads_; nt++) {
        P_thread[nt] = new double[ outSize ];
        std::fill(P_thread[nt], P_thread[nt] + outSize, 0.0);
      }
    }
#   pragma omp barrier
#   pragma omp for
# endif
    for (frame = 0; frame < inSize; frame++) {
      val = Pdata.Dval(frame);
      increment = Increments[frame];
      total += increment;
      // Apply kernel across histogram
      for (bin = 0; bin < outSize; bin++)
#       ifdef _OPENMP
        P_thread[mythread][bin] +=
#       else
        Out[bin] +=
#       endif
          (increment * (this->*Kernel_)( (Xdim.Coord(bin) - val) / bandwidth ));
    }
# ifdef _OPENMP
  } // END parallel block
  // Combine results from each thread histogram into Out
  for (int i = 0; i < numthreads_; i++) {
    for (int j = 0; j < outSize; j++)
      Out[j] += P_thread[i][j];
    delete[] P_thread[i];
  }
  delete[] P_thread;
  // Restore original number of threads
  if (original_num_threads != numthreads_)
    omp_set_num_threads( original_num_threads );
# endif
  // Normalize
  for (unsigned int j = 0; j < Out.Size(); j++)
    Out[j] /= (total * bandwidth);
  return 0;
}