Ejemplo n.º 1
0
// Analysis_KDE::Analyze()
Analysis::RetType Analysis_KDE::Analyze() {
  DataSet_1D const& Pdata = static_cast<DataSet_1D const&>( *data_ );
  int inSize = (int)Pdata.Size();
  // Set output set dimensions from input set if necessary.
  if (!minArgSet_) {
    mprintf("\tNo minimum specified, determining from input data.\n");
    if (q_data_ != 0)
      default_min_ = std::max(((DataSet_1D*)q_data_)->Min(), Pdata.Min());
    else
      default_min_ = Pdata.Min();
  }
  if (!maxArgSet_) {
    mprintf("\tNo maximum specified, determining from input data.\n");
    if (q_data_ != 0)
      default_max_ = std::min(((DataSet_1D*)q_data_)->Max(), Pdata.Max());
    else
      default_max_ = Pdata.Max();
  }
  HistBin Xdim;
  if (Xdim.CalcBinsOrStep(default_min_, default_max_, default_step_,
                          default_bins_, Pdata.Meta().Legend()))
    return Analysis::ERR;
  Xdim.PrintHistBin();
  output_->SetDim( Dimension::X, Xdim );

  // Allocate output set
  DataSet_double& P_hist = static_cast<DataSet_double&>( *output_ );
  P_hist.Resize( Xdim.Bins() );
  int outSize = (int)P_hist.Size();

  // Estimate bandwidth from normal distribution approximation if necessary.
  if (bandwidth_ < 0.0) {
    double stdev;
    Pdata.Avg( stdev );
    double N_to_1_over_5 = pow( (double)inSize, (-1.0/5.0) );
    bandwidth_ = 1.06 * stdev * N_to_1_over_5;
    mprintf("\tDetermined bandwidth from normal distribution approximation: %f\n", bandwidth_);
  }

  // Set up increments
  std::vector<double> Increments(inSize, 1.0);
  if (amddata_ != 0) {
    DataSet_1D& AMD = static_cast<DataSet_1D&>( *amddata_ );
    if ((int)AMD.Size() != inSize) {
      if ((int)AMD.Size() < inSize) {
        mprinterr("Error: Size of AMD data set %zu < input data set iu\n",
                  AMD.Size(), inSize);
        return Analysis::ERR;
      } else {
        mprintf("Warning: Size of AMD data set %zu > input data set %i\n",
                AMD.Size(), inSize);
      }
    }
    for (int i = 0; i < inSize; i++)
      Increments[i] = exp( AMD.Dval(i) );
  }
  int frame, bin;
  double increment;
  double total = 0.0;
# ifdef _OPENMP
  int numthreads;
# pragma omp parallel
  {
#   pragma omp master
    {
      numthreads = omp_get_num_threads();
      mprintf("\tParallelizing calculation with %i threads\n", numthreads);
    }
  }
# endif
  if (q_data_ == 0) {
    double val;
    // Calculate KDE, loop over input data
#   ifdef _OPENMP
    int mythread;
    double **P_thread;
#   pragma omp parallel private(frame, bin, val, increment, mythread) reduction(+:total)
    {
      mythread = omp_get_thread_num();
      // Prevent race conditions by giving each thread its own histogram
#     pragma omp master
      {
        P_thread = new double*[ numthreads ];
        for (int nt = 0; nt < numthreads; nt++) {
          P_thread[nt] = new double[ outSize ];
          std::fill(P_thread[nt], P_thread[nt] + outSize, 0.0);
        }
      }
#     pragma omp barrier
#     pragma omp for
#   endif
      for (frame = 0; frame < inSize; frame++) {
        val = Pdata.Dval(frame);
        increment = Increments[frame];
        total += increment;
        // Apply kernel across histogram
        for (bin = 0; bin < outSize; bin++)
#         ifdef _OPENMP
          P_thread[mythread][bin] +=
#         else
          P_hist[bin] += 
#         endif
            (increment * (this->*Kernel_)( (Xdim.Coord(bin) - val) / bandwidth_ ));
      }
#   ifdef _OPENMP
    } // END parallel block
    // Combine results from each thread histogram into P_hist
    for (int i = 0; i < numthreads; i++) {
      for (int j = 0; j < outSize; j++)
        P_hist[j] += P_thread[i][j];
      delete[] P_thread[i];
    }
    delete[] P_thread;
#   endif
  } else {
    // Calculate Kullback-Leibler divergence vs time
    DataSet_1D const& Qdata = static_cast<DataSet_1D const&>( *q_data_ );
    if (inSize != (int)Qdata.Size()) {
      mprintf("Warning: Size of %s (%zu) != size of %s (%zu)\n",
                Pdata.legend(), Pdata.Size(), Qdata.legend(), Qdata.Size());
      inSize = std::min( inSize, (int)Qdata.Size() );
      mprintf("Warning:  Only using %i data points.\n", inSize);
    }
    DataSet_double& klOut = static_cast<DataSet_double&>( *kldiv_ );
    std::vector<double> Q_hist( Xdim.Bins(), 0.0 ); // Raw Q histogram.
    klOut.Resize( inSize ); // Hold KL div vs time
    double val_p, val_q, KL, xcrd, Pnorm, Qnorm, normP, normQ;
    bool Pzero, Qzero;
    // Loop over input P and Q data
    unsigned int nInvalid = 0, validPoint;
    for (frame = 0; frame < inSize; frame++) {
      //mprintf("DEBUG: Frame=%i Outsize=%i\n", frame, outSize);
      increment = Increments[frame];
      total += increment;
      // Apply kernel across P and Q, calculate KL divergence as we go. 
      val_p = Pdata.Dval(frame);
      val_q = Qdata.Dval(frame);
      normP = 0.0;
      normQ = 0.0;
      validPoint = 0; // 0 in this context means true
#     ifdef _OPENMP
#     pragma omp parallel private(bin, xcrd) reduction(+:normP, normQ)
      {
#       pragma omp for
#       endif
        for (bin = 0; bin < outSize; bin++) {
          xcrd = Xdim.Coord(bin);
          P_hist[bin] += (increment * (this->*Kernel_)( (xcrd - val_p) / bandwidth_ ));
          normP += P_hist[bin];
          Q_hist[bin] += (increment * (this->*Kernel_)( (xcrd - val_q) / bandwidth_ ));
          normQ += Q_hist[bin];
        }
#     ifdef _OPENMP
      } // End first parallel block
#     endif
      if (normP > std::numeric_limits<double>::min())
        normP = 1.0 / normP;
      if (normQ > std::numeric_limits<double>::min())
        normQ = 1.0 / normQ;
      KL = 0.0;
#     ifdef _OPENMP
#     pragma omp parallel private(bin, Pnorm, Qnorm, Pzero, Qzero) reduction(+:KL, validPoint)
      {
#       pragma omp for
#       endif
        for (bin = 0; bin < outSize; bin++) {
          // KL only defined when Q and P are non-zero, or both zero.
          if (validPoint == 0) {
            // Normalize for this frame
            Pnorm = P_hist[bin] * normP;
            Qnorm = Q_hist[bin] * normQ;
            //mprintf("Frame %8i Bin %8i P=%g Q=%g Pnorm=%g Qnorm=%g\n",frame,bin,P_hist[bin],Q_hist[bin],normP,normQ);
            Pzero = (Pnorm <= std::numeric_limits<double>::min());
            Qzero = (Qnorm <= std::numeric_limits<double>::min());
            if (!Pzero && !Qzero)
              KL += ( log( Pnorm / Qnorm ) * Pnorm );
            else if ( Pzero != Qzero )
              validPoint++;
          }
        }
#       ifdef _OPENMP
      } // End second parallel block
#     endif
      if (validPoint == 0) {
        klOut[frame] = KL;
      } else {
        //mprintf("Warning:\tKullback-Leibler divergence is undefined for frame %i\n", frame+1);
        nInvalid++;
      }
    } // END KL divergence calc loop over frames
    if (nInvalid > 0)
      mprintf("Warning:\tKullback-Leibler divergence was undefined for %u frames.\n", nInvalid);
  }

  // Normalize
  for (unsigned int j = 0; j < P_hist.Size(); j++)
    P_hist[j] /= (total * bandwidth_);

  // Calc free E
  if (calcFreeE_) {
    double KT = (-Constants::GASK_KCAL * Temp_);
    double minFreeE = 0.0;
    for (unsigned int j = 0; j < P_hist.Size(); j++) {
      P_hist[j] = log( P_hist[j] ) * KT;
      if (j == 0)
        minFreeE = P_hist[j];
      else if (P_hist[j] < minFreeE)
        minFreeE = P_hist[j];
    }
    for (unsigned int j = 0; j < P_hist.Size(); j++)
      P_hist[j] -= minFreeE;
  }

  return Analysis::OK;
}
Ejemplo n.º 2
0
int KDE::CalcKDE(DataSet_double& Out, DataSet_1D const& Pdata,
                 std::vector<double> const& Increments,
                 HistBin const& Xdim, double bandwidth) const
{
  int inSize = (int)Pdata.Size();
  // Allocate output set, set all to zero.
  Out.Zero( Xdim.Bins() );
  Out.SetDim( Dimension::X, Xdim );
  int outSize = (int)Out.Size();

  int frame, bin;
  double increment, val;
  double total = 0.0;
# ifdef _OPENMP
  int original_num_threads;
# pragma omp parallel
  {
#   pragma omp master
    {
      original_num_threads = omp_get_num_threads();
    }
  }
  // Ensure we only execute with the desired number of threads
  if (numthreads_ < original_num_threads)
    omp_set_num_threads( numthreads_ );
# endif
  // Calculate KDE, loop over input data
# ifdef _OPENMP
  int mythread;
  double **P_thread;
# pragma omp parallel private(frame, bin, val, increment, mythread) reduction(+:total)
  {
    mythread = omp_get_thread_num();
    // Prevent race conditions by giving each thread its own histogram
#   pragma omp master
    {
      P_thread = new double*[ numthreads_ ];
      for (int nt = 0; nt < numthreads_; nt++) {
        P_thread[nt] = new double[ outSize ];
        std::fill(P_thread[nt], P_thread[nt] + outSize, 0.0);
      }
    }
#   pragma omp barrier
#   pragma omp for
# endif
    for (frame = 0; frame < inSize; frame++) {
      val = Pdata.Dval(frame);
      increment = Increments[frame];
      total += increment;
      // Apply kernel across histogram
      for (bin = 0; bin < outSize; bin++)
#       ifdef _OPENMP
        P_thread[mythread][bin] +=
#       else
        Out[bin] +=
#       endif
          (increment * (this->*Kernel_)( (Xdim.Coord(bin) - val) / bandwidth ));
    }
# ifdef _OPENMP
  } // END parallel block
  // Combine results from each thread histogram into Out
  for (int i = 0; i < numthreads_; i++) {
    for (int j = 0; j < outSize; j++)
      Out[j] += P_thread[i][j];
    delete[] P_thread[i];
  }
  delete[] P_thread;
  // Restore original number of threads
  if (original_num_threads != numthreads_)
    omp_set_num_threads( original_num_threads );
# endif
  // Normalize
  for (unsigned int j = 0; j < Out.Size(); j++)
    Out[j] /= (total * bandwidth);
  return 0;
}