// ----------------------------------------------------------------------------- int DataSet_Vector::CalcVectorCorr(DataSet_Vector const& V2, DataSet_1D& Ct, int lagmaxIn) const { if (Ct.Type() != DataSet::DOUBLE) return 1; unsigned int Nvecs = this->Size(); if (Nvecs != V2.Size()) return 1; if (Nvecs < 2) return 1; unsigned int lagmax; if (lagmaxIn == -1) lagmax = Nvecs; else if (lagmaxIn > (int)Nvecs) lagmax = Nvecs; else lagmax = (unsigned int)lagmaxIn; bool crosscorr = (&V2 != this); unsigned int arraySize = Nvecs * 3; // XYZ CorrF_FFT pubfft( arraySize ); ComplexArray data1 = pubfft.Array(); data1.PadWithZero( arraySize ); ComplexArray data2; if (crosscorr) data2 = data1; // Load up real components of data1 with vector XYZ unsigned int idx = 0; for (unsigned int v = 0; v != Nvecs; v++, idx += 6) { data1[idx ] = vectors_[v][0]; data1[idx+2] = vectors_[v][1]; data1[idx+4] = vectors_[v][2]; if (crosscorr) { data2[idx ] = V2.vectors_[v][0]; data2[idx+2] = V2.vectors_[v][1]; data2[idx+4] = V2.vectors_[v][2]; } } if (crosscorr) pubfft.CrossCorr(data1, data2); else pubfft.AutoCorr(data1); // Place desired components of correlation fn back in output and normalize double normV = (double)arraySize; double norm0 = 1.0 / (fabs(data1[0]) / normV); idx = 0; for (unsigned int iout = 0; iout != lagmax; iout++, normV -= 3.0, idx += 6) { double c_t = (data1[idx] / normV) * norm0; Ct.Add(iout, &c_t); //Ct[iout] = (data1[idx] / normV) * norm0; } return 0; }
/** Calculate unambiguous average dihedral angle (in degrees) by converting to * cartesian coords using x = cos(theta), y = sin(theta), and: * tan(avgtheta) = avgy / avgx = SUM[sin(theta)] / SUM[cos(theta)] * See Eq. 2 from Altis et al., J. Chem. Phys., 126 p. 244111 (2007). */ static double AvgCalc_Dih( DataSet_1D const& dsIn, ClusterDist::Cframes const& cframesIn, double& sumx, double& sumy ) { sumy = 0.0; sumx = 0.0; // TODO: Convert angles to radians prior to this call? for (ClusterDist::Cframes_it frm = cframesIn.begin(); frm != cframesIn.end(); ++frm) { double theta = dsIn.Dval( *frm ) * Constants::DEGRAD; sumy += sin( theta ); sumx += cos( theta ); } return atan2(sumy, sumx) * Constants::RADDEG; }
/** Given an ArgList containing name,[min,max,step,bins,col,N], set up a * coordinate with that name and parameters min, max, step, bins. * If '*' or not specified, a default value will be set. * \return 1 if error occurs, 0 otherwise. */ int Analysis_Hist::setupDimension(ArgList &arglist, DataSet_1D const& dset, size_t& offset) { bool minArg = false; bool maxArg = false; bool stepArg = false; bool binsArg = false; if (debug_>1) arglist.PrintList(); // Set up dimension name // NOTE: arglist[0] should be same as dset name from CheckDimension std::string const& dLabel = arglist[0]; // Cycle through coordinate arguments. Any argument left blank will be // assigned a default value later. double dMin = 0.0; double dMax = 0.0; double dStep = 0.0; int dBins = -1; for (int i = 1; i < arglist.Nargs(); i++) { if (debug_>1) mprintf("DEBUG: setupCoord: Token %i (%s)\n", i, arglist[i].c_str()); // '*' means default explicitly requested if (arglist[i] == "*") continue; switch (i) { case 1 : dMin = convertToDouble( arglist[i]); minArg = true; break; case 2 : dMax = convertToDouble( arglist[i]); maxArg = true; break; case 3 : dStep = convertToDouble( arglist[i]); stepArg = true; break; case 4 : dBins = convertToInteger(arglist[i]); binsArg = true; break; } } // If no min arg and no default min arg, get min from dataset if (!minArg) { if (!minArgSet_) dMin = dset.Min(); else dMin = default_min_; } // If no max arg and no default max arg, get max from dataset if (!maxArg) { if (!maxArgSet_) dMax = dset.Max(); else dMax = default_max_; } // If bins/step not specified, use default if (!binsArg) dBins = default_bins_; if (!stepArg) dStep = default_step_; // Calculate dimension from given args. HistBin dim; if (dim.CalcBinsOrStep( dMin, dMax, dStep, dBins, dLabel )) { mprinterr("Error: Could not set up histogram dimension '%s'\n", dLabel.c_str()); return 1; } dim.PrintHistBin(); dimensions_.push_back( dim ); // Recalculate offsets for all dimensions starting at farthest coord. This // follows row major ordering. size_t last_offset = 1UL; // For checking overflow. offset = 1UL; binOffsets_.resize( dimensions_.size() ); OffType::iterator bOff = binOffsets_.begin(); for (HdimType::const_iterator rd = dimensions_.begin(); rd != dimensions_.end(); ++rd, ++bOff) { if (debug_>0) mprintf("\tHistogram: %s offset is %zu\n", rd->label(), offset); *bOff = (long int)offset; offset *= rd->Bins(); // Check for overflow. if ( offset < last_offset ) { mprinterr("Error: Too many bins for histogram. Try reducing the number of bins and/or\n" "Error: the number of dimensions.\n"); return 1; } last_offset = offset; } // offset should now be equal to the total number of bins across all dimensions if (debug_>0) mprintf("\tHistogram: Total Bins = %zu\n",offset); return 0; }
static double AvgCalc_Std( DataSet_1D const& dsIn, ClusterDist::Cframes const& cframesIn ) { double val = 0.0; for (ClusterDist::Cframes_it frm = cframesIn.begin(); frm != cframesIn.end(); ++frm) val += dsIn.Dval( *frm ); return (val / (double)cframesIn.size()); }
int KDE::CalcKDE(DataSet_double& Out, DataSet_1D const& Pdata) const { if (Pdata.Size() < 2) { mprinterr("Error: Not enough data for KDE.\n"); return 1; } // Automatically determine min, max, step, and bin values. // std::vector<double> data; // data.reserve( Pdata.Size() ); double N = 0.0; double mean = 0.0; double M2 = 0.0; double min = Pdata.Dval(0); double max = min; for (unsigned int i = 0; i != Pdata.Size(); i++) { double x = Pdata.Dval(i); min = std::min(min, x); max = std::max(max, x); N++; double delta = x - mean; mean += delta / N; M2 += delta * (x - mean); // data.push_back( x ); } M2 /= (N - 1.0); double stdev = sqrt(M2); double step = 0.0; int bins = (int)sqrt((double)Pdata.Size()); /* std::sort(data.begin(), data.end()); double min = data.front(); double max = data.back(); unsigned int upperidx, loweridx; if ( (data.size() % 2) == 0 ) { // Even number of points. Get Q1 as median of lower and Q3 as median of upper. unsigned int halfsize = data.size() / 2; loweridx = ((halfsize - 1) / 2); upperidx = loweridx + halfsize; } else { // Odd number of points. Include the median in both halves unsigned int lsize = (data.size() + 1) / 2; loweridx = ((lsize - 1) / 2); unsigned int usize = (data.size() - 1) / 2; upperidx = loweridx + usize; } double Q1 = data[loweridx]; double Q3 = data[upperidx]; double step = 2 * ((Q3 - Q1) / pow(data.size(), 1/3)); int bins = 0; mprintf("DEBUG: Q1= %g, Q3= %g, step= %g, min= %g, max= %g, mean= %g, stdev= %g\n", Q1, Q3, step, min, max, mean, stdev); if (max - min < step) { // Would only be 1 bin. Probably noisy. mprintf("Warning: Data set is very sparse.\n"); bins = (int)Pdata.Size() / 10; step = 0; } */ mprintf("DEBUG: mean= %g, stdev= %g\n", mean, stdev); HistBin Xdim; if (Xdim.CalcBinsOrStep(min, max, step, bins, Pdata.Meta().Legend())) return 1; Xdim.PrintHistBin(); // Automatically determine bandwidth double bandwidth = 1.06 * stdev * BandwidthFactor(Pdata.Size()); mprintf("\tBandwidth: %f\n", bandwidth); std::vector<double> Increments(Pdata.Size(), 1.0); return CalcKDE(Out, Pdata, Increments, Xdim, bandwidth); }
int KDE::CalcKDE(DataSet_double& Out, DataSet_1D const& Pdata, std::vector<double> const& Increments, HistBin const& Xdim, double bandwidth) const { int inSize = (int)Pdata.Size(); // Allocate output set, set all to zero. Out.Zero( Xdim.Bins() ); Out.SetDim( Dimension::X, Xdim ); int outSize = (int)Out.Size(); int frame, bin; double increment, val; double total = 0.0; # ifdef _OPENMP int original_num_threads; # pragma omp parallel { # pragma omp master { original_num_threads = omp_get_num_threads(); } } // Ensure we only execute with the desired number of threads if (numthreads_ < original_num_threads) omp_set_num_threads( numthreads_ ); # endif // Calculate KDE, loop over input data # ifdef _OPENMP int mythread; double **P_thread; # pragma omp parallel private(frame, bin, val, increment, mythread) reduction(+:total) { mythread = omp_get_thread_num(); // Prevent race conditions by giving each thread its own histogram # pragma omp master { P_thread = new double*[ numthreads_ ]; for (int nt = 0; nt < numthreads_; nt++) { P_thread[nt] = new double[ outSize ]; std::fill(P_thread[nt], P_thread[nt] + outSize, 0.0); } } # pragma omp barrier # pragma omp for # endif for (frame = 0; frame < inSize; frame++) { val = Pdata.Dval(frame); increment = Increments[frame]; total += increment; // Apply kernel across histogram for (bin = 0; bin < outSize; bin++) # ifdef _OPENMP P_thread[mythread][bin] += # else Out[bin] += # endif (increment * (this->*Kernel_)( (Xdim.Coord(bin) - val) / bandwidth )); } # ifdef _OPENMP } // END parallel block // Combine results from each thread histogram into Out for (int i = 0; i < numthreads_; i++) { for (int j = 0; j < outSize; j++) Out[j] += P_thread[i][j]; delete[] P_thread[i]; } delete[] P_thread; // Restore original number of threads if (original_num_threads != numthreads_) omp_set_num_threads( original_num_threads ); # endif // Normalize for (unsigned int j = 0; j < Out.Size(); j++) Out[j] /= (total * bandwidth); return 0; }
int KDE::CalcKDE(DataSet_double& Out, DataSet_1D const& Pdata, HistBin const& Xdim, double bandwidth) const { std::vector<double> Increments(Pdata.Size(), 1.0); return CalcKDE(Out, Pdata, Increments, Xdim, bandwidth); }