/** Return the maximum value in the dataset. */ double DS_Math::Max(DataSet& ds) { // Check # values if ( ds.Size() == 0 ) return 0; double max = 0; // Check if this set is a good type if ( GoodCalcType(ds) ) { max = ds.Dval( 0 ); for (int i = 1; i < ds.Size(); ++i) { double val = ds.Dval( i ); if (val > max) max = val; } } return max; }
/** Return the minimum value in the dataset. */ double DS_Math::Min(DataSet& ds) { // Check # values if (ds.Size()==0) return 0; double min = 0; // Check if this set is a good type if ( GoodCalcType(ds) ) { min = ds.Dval( 0 ); for (int i = 1; i < ds.Size(); ++i) { double val = ds.Dval( i ); if (val < min) min = val; } } return min; }
/** Calculate Pearson product-moment correlation between DataSets. * \D1 DataSet to caclculate correlation for. * \D2 DataSet to caclulate correlation to. * \return Pearson product-moment correlation coefficient. */ double DS_Math::CorrCoeff( DataSet& D1, DataSet& D2 ) { // Check if D1 and D2 are valid types if ( !GoodCalcType(D1) ) return 0; if ( !GoodCalcType(D2) ) return 0; // Check that D1 and D2 have same # data points. int Nelements = D1.Size(); if (Nelements != D2.Size()) { mprinterr("Error: Corr: # elements in dataset %s (%i) not equal to\n", D1.Legend().c_str(), Nelements); mprinterr("Error: # elements in dataset %s (%i)\n", D2.Legend().c_str(), D2.Size()); return 0; } // Calculate averages double avg1 = Avg(D1); double avg2 = Avg(D2); // Calculate average deviations. double sumdiff1_2 = 0.0; double sumdiff2_2 = 0.0; double corr_coeff = 0.0; //mprinterr("DATASETS %s and %s\n", c_str(), D2.c_str()); for (int i = 0; i < Nelements; i++) { double diff1 = D1.Dval(i) - avg1; double diff2 = D2.Dval(i) - avg2; sumdiff1_2 += (diff1 * diff1); sumdiff2_2 += (diff2 * diff2); corr_coeff += (diff1 * diff2); } if (sumdiff1_2 == 0.0 || sumdiff2_2 == 0.0) { mprintf("Warning: Corr: %s to %s, Normalization is 0\n", D1.Legend().c_str(), D2.Legend().c_str()); return 0; } // Correlation coefficient corr_coeff /= ( sqrt( sumdiff1_2 ) * sqrt( sumdiff2_2 ) ); //mprintf(" CORRELATION COEFFICIENT %6s to %6s IS %10.4f\n", // D1_->c_str(), D2_->c_str(), corr_coeff ); return corr_coeff; }
/** Calculate the average over values in this set (and optionally the * standard deviation). */ double DS_Math::Avg(DataSet& ds, double* stdev) { // Check # values int numvalues = ds.Size(); if ( numvalues < 1 ) { if (stdev != 0) *stdev = 0.0; return 0.0; } double avg = 0; // Check if this set is a good type if ( GoodCalcType(ds) ) { if (IsTorsionArray(ds)) { // Cyclic torsion average double sumy = 0.0; double sumx = 0.0; for ( int i = 0; i < numvalues; ++i ) { double theta = ds.Dval( i ) * DEGRAD; sumy += sin( theta ); sumx += cos( theta ); } avg = atan2(sumy, sumx) * RADDEG; // Torsion Stdev sumy = 0; for ( int i = 0; i < numvalues; ++i) { double diff = fabs(avg - ds.Dval( i )); if (diff > 180.0) diff = 360.0 - diff; diff *= diff; sumy += diff; } sumy /= (double)numvalues; *stdev = sqrt(sumy); } else { // Non-cyclic, normal average double sum = 0; for ( int i = 0; i < numvalues; ++i ) sum += ds.Dval( i ); avg = sum / (double)numvalues; if (stdev==0) return avg; // Stdev sum = 0; for ( int i = 0; i < numvalues; ++i ) { double diff = avg - ds.Dval( i ); diff *= diff; sum += diff; } sum /= (double)numvalues; *stdev = sqrt(sum); } } return avg; }
/** Replace all variables in given ArgList with their values. */ ArgList VariableArray::ReplaceVariables(ArgList const& argIn, DataSetList const& DSL, int debug) { if (debug > 0) mprintf("DEBUG: Before variable replacement: [%s]\n", argIn.ArgLine()); ArgList modCmd = argIn; for (int n = 0; n < modCmd.Nargs(); n++) { size_t pos = modCmd[n].find("$"); while (pos != std::string::npos) { // Argument is/contains a variable. Find first non-alphanumeric char size_t len = 1; for (size_t pos1 = pos+1; pos1 < modCmd[n].size(); pos1++, len++) if (!isalnum(modCmd[n][pos1])) break; std::string var_in_arg = modCmd[n].substr(pos, len); // See if variable occurs in CurrentVars_ Varray::const_iterator vp = CurrentVars_.begin(); for (; vp != CurrentVars_.end(); ++vp) if (vp->first == var_in_arg) break; // If found replace with value from CurrentVars_ if (vp != CurrentVars_.end()) { if (debug > 0) mprintf("DEBUG: Replaced variable '%s' with value '%s'\n", var_in_arg.c_str(), vp->second.c_str()); std::string arg = modCmd[n]; arg.replace(pos, vp->first.size(), vp->second); modCmd.ChangeArg(n, arg); } else { // Not found in CurrentVars_; see if this is a DataSet. for (size_t pos1 = pos+len; pos1 < modCmd[n].size(); pos1++, len++) if (!isalnum(modCmd[n][pos1]) && modCmd[n][pos1] != '[' && modCmd[n][pos1] != ':' && modCmd[n][pos1] != ']' && modCmd[n][pos1] != '_' && modCmd[n][pos1] != '-' && modCmd[n][pos1] != '%') break; var_in_arg = modCmd[n].substr(pos+1, len-1); DataSet* ds = DSL.GetDataSet( var_in_arg ); if (ds == 0) { mprinterr("Error: Unrecognized variable in command: %s\n", var_in_arg.c_str()); return ArgList(); } else { if (ds->Type() != DataSet::STRING && ds->Group() != DataSet::SCALAR_1D) { mprinterr("Error: Only 1D data sets supported.\n"); return ArgList(); } if (ds->Size() < 1) { mprinterr("Error: Set is empty.\n"); return ArgList(); } if (ds->Size() > 1) mprintf("Warning: Only using first value.\n"); std::string value; if (ds->Type() == DataSet::STRING) value = (*((DataSet_string*)ds))[0]; else value = doubleToString(((DataSet_1D*)ds)->Dval(0)); if (debug > 0) mprintf("DEBUG: Replaced variable '$%s' with value '%s' from DataSet '%s'\n", var_in_arg.c_str(), value.c_str(), ds->legend()); std::string arg = modCmd[n]; arg.replace(pos, var_in_arg.size()+1, value); modCmd.ChangeArg(n, arg); } } pos = modCmd[n].find("$"); } // END loop over this argument } return modCmd; }
/** Calculate time correlation between two DataSets. * \D1 DataSet to calculate correlation for. * \D2 DataSet to calculate correlation to. * \Ct DataSet to store time correlation fn, must be DOUBLE. * \lagmaxIn Max lag to calculate corr. -1 means use size of dataset. * \calccovar If true calculate covariance (devation from avg). * \return 0 on success, 1 on error. */ int DS_Math::CrossCorr( DataSet& D1, DataSet& D2, DataSet& Ct, int lagmaxIn, bool calccovar, bool usefft ) { int lagmax; double ct; // Check if D1 and D2 are valid types if ( !GoodCalcType(D1) ) return 1; if ( !GoodCalcType(D2) ) return 1; // Check that D1 and D2 have same # data points. int Nelements = D1.Size(); if (Nelements != D2.Size()) { mprinterr("Error: CrossCorr: # elements in dataset %s (%i) not equal to\n", D1.Legend().c_str(), Nelements); mprinterr("Error: # elements in dataset %s (%i)\n", D2.Legend().c_str(), D2.Size()); return 1; } if (Nelements < 2) { mprinterr("Error: CrossCorr: # elements is less than 2 (%i)\n", Nelements); return 1; } // Check return dataset type if ( Ct.Type() != DataSet::DOUBLE ) { mprinterr("Internal Error: CrossCorr: Ct must be of type DataSet::DOUBLE.\n"); return 1; } // Check if lagmaxIn makes sense. Set default lag to be Nelements // if not specified. if (lagmaxIn == -1) lagmax = Nelements; else if (lagmaxIn > Nelements) { mprintf("Warning: CrossCorr [%s][%s]: max lag (%i) > Nelements (%i), setting to Nelements.\n", D1.Legend().c_str(), D2.Legend().c_str(), lagmaxIn, Nelements); lagmax = Nelements; } else lagmax = lagmaxIn; // If calculating covariance calculate averages double avg1 = 0; double avg2 = 0; if ( calccovar ) { avg1 = Avg(D1); avg2 = Avg(D2); } // Calculate correlation double norm = 1.0; if ( usefft ) { // Calc using FFT CorrF_FFT pubfft1(Nelements); ComplexArray data1 = pubfft1.Array(); data1.PadWithZero(Nelements); for (int i = 0; i < Nelements; ++i) data1[i*2] = D1.Dval(i) - avg1; if (&D2 == &D1) pubfft1.AutoCorr(data1); else { // Populate second dataset if different ComplexArray data2 = pubfft1.Array(); data2.PadWithZero(Nelements); for (int i = 0; i < Nelements; ++i) data2[i*2] = D2.Dval(i) - avg2; pubfft1.CrossCorr(data1, data2); } // Put real components of data1 in output DataSet norm = 1.0 / fabs( data1[0] ); for (int i = 0; i < lagmax; ++i) { ct = data1[i*2] * norm; Ct.Add(i, &ct); } } else { // Direct calc for (int lag = 0; lag < lagmax; ++lag) { ct = 0; int jmax = Nelements - lag; for (int j = 0; j < jmax; ++j) ct += ((D1.Dval(j) - avg1) * (D2.Dval(j+lag) - avg2)); if (lag == 0) { if (ct != 0) norm = fabs( ct ); } ct /= norm; Ct.Add(lag, &ct); } } return 0; }
/** Syntax: dataset invert <set arg0> ... name <new name> */ Exec::RetType Exec_DataSetCmd::InvertSets(CpptrajState& State, ArgList& argIn) { DataSetList& DSL = State.DSL(); // Get keywords DataSet* inputNames = 0; std::string dsname = argIn.GetStringKey("legendset"); if (!dsname.empty()) { inputNames = DSL.GetDataSet( dsname ); if (inputNames == 0) { mprinterr("Error: Name set '%s' not found.\n", dsname.c_str()); return CpptrajState::ERR; } if (inputNames->Type() != DataSet::STRING) { mprinterr("Error: Set '%s' does not contain strings.\n", inputNames->legend()); return CpptrajState::ERR; } mprintf("\tUsing names from set '%s' as legends for inverted sets.\n", inputNames->legend()); } dsname = argIn.GetStringKey("name"); if (dsname.empty()) { mprinterr("Error: 'invert' requires that 'name <new set name>' be specified.\n"); return CpptrajState::ERR; } mprintf("\tNew sets will be named '%s'\n", dsname.c_str()); DataFile* outfile = State.DFL().AddDataFile( argIn.GetStringKey("out"), argIn ); if (outfile != 0) mprintf("\tNew sets will be output to '%s'\n", outfile->DataFilename().full()); // TODO determine type some other way DataSet::DataType outtype = DataSet::DOUBLE; // Get input DataSets std::vector<DataSet_1D*> input_sets; std::string dsarg = argIn.GetStringNext(); while (!dsarg.empty()) { DataSetList sets = DSL.GetMultipleSets( dsarg ); for (DataSetList::const_iterator ds = sets.begin(); ds != sets.end(); ++ds) { if ( (*ds)->Group() != DataSet::SCALAR_1D ) { mprintf("Warning: '%s': Inversion only supported for 1D scalar data sets.\n", (*ds)->legend()); } else { if (!input_sets.empty()) { if ( (*ds)->Size() != input_sets.back()->Size() ) { mprinterr("Error: Set '%s' has different size (%zu) than previous set (%zu)\n", (*ds)->legend(), (*ds)->Size(), input_sets.back()->Size()); return CpptrajState::ERR; } } input_sets.push_back( (DataSet_1D*)*ds ); } } dsarg = argIn.GetStringNext(); } if (input_sets.empty()) { mprinterr("Error: No sets selected.\n"); return CpptrajState::ERR; } if (inputNames != 0 && inputNames->Size() != input_sets.front()->Size()) { mprinterr("Error: Name set '%s' size (%zu) differs from # data points (%zu).\n", inputNames->legend(), inputNames->Size(), input_sets.front()->Size()); return CpptrajState::ERR; } mprintf("\t%zu input sets; creating %zu output sets.\n", input_sets.size(), input_sets.front()->Size()); // Need an output data set for each point in input sets std::vector<DataSet*> output_sets; int column = 1; for (int idx = 0; idx != (int)input_sets[0]->Size(); idx++, column++) { DataSet* ds = 0; ds = DSL.AddSet(outtype, MetaData(dsname, column)); if (ds == 0) return CpptrajState::ERR; if (inputNames != 0) ds->SetLegend( (*((DataSet_string*)inputNames))[idx] ); output_sets.push_back( ds ); if (outfile != 0) outfile->AddDataSet( ds ); } // Create a data set containing names of each input data set DataSet* nameset = DSL.AddSet(DataSet::STRING, MetaData(dsname, column)); if (nameset == 0) return CpptrajState::ERR; if (inputNames != 0) nameset->SetLegend("Names"); if (outfile != 0) outfile->AddDataSet( nameset ); // Populate output data sets for (int jdx = 0; jdx != (int)input_sets.size(); jdx++) { DataSet_1D const& INP = static_cast<DataSet_1D const&>( *(input_sets[jdx]) ); nameset->Add( jdx, INP.legend() ); for (unsigned int idx = 0; idx != INP.Size(); idx++) { double dval = INP.Dval( idx ); output_sets[idx]->Add( jdx, &dval ); } } return CpptrajState::OK; }
// Exec_DataSetCmd::ModifyPoints() Exec::RetType Exec_DataSetCmd::ModifyPoints(CpptrajState& State, ArgList& argIn, bool drop) { const char* mode; if (drop) mode = "Drop"; else mode = "Kee"; // Keywords std::string name = argIn.GetStringKey("name"); int start = argIn.getKeyInt("start", 0) - 1; int stop = argIn.getKeyInt("stop", -1); int offset = argIn.getKeyInt("offset", -1); Range points; if (start < 0 && stop < 0 && offset < 0) { std::string rangearg = argIn.GetStringKey("range"); if (rangearg.empty()) { mprinterr("Error: Must specify range or start/stop/offset.\n"); return CpptrajState::ERR; } points.SetRange( rangearg ); if (points.Empty()) { mprinterr("Error: Range '%s' is empty.\n", rangearg.c_str()); return CpptrajState::ERR; } mprintf("\t%sping points in range %s\n", mode, rangearg.c_str()); // User args start from 1 points.ShiftBy(-1); } // Get data set to drop/keep points from // Loop over all DataSet arguments std::string ds_arg = argIn.GetStringNext(); while (!ds_arg.empty()) { DataSetList dsl = State.DSL().GetMultipleSets( ds_arg ); for (DataSetList::const_iterator it = dsl.begin(); it != dsl.end(); ++it) { DataSet* DS = *it; if (DS->Size() < 1) { mprinterr("Error: Set '%s' is empty.\n", DS->legend()); return CpptrajState::ERR; } // Restrict to 1D sets for now TODO more types if (DS->Group() != DataSet::SCALAR_1D) { mprinterr("Error: Currently only works for 1D scalar data sets.\n"); return CpptrajState::ERR; } DataSet_1D* ds1 = (DataSet_1D*)DS; // Output data set DataSet* out = 0; if (name.empty()) { // Modifying this set. Create new temporary set. out = State.DSL().Allocate( ds1->Type() ); if (out == 0) return CpptrajState::ERR; *out = *ds1; mprintf("\tOverwriting set '%s'\n", ds1->legend()); } else { // Write to new set MetaData md = ds1->Meta(); md.SetName( name ); out = State.DSL().AddSet(ds1->Type(), md); if (out == 0) return CpptrajState::ERR; mprintf("\tNew set is '%s'\n", out->Meta().PrintName().c_str()); } out->Allocate(DataSet::SizeArray(1, ds1->Size())); if (points.Empty()) { // Drop by start/stop/offset. Set defaults if needed if (start < 0) start = 0; if (stop < 0) stop = ds1->Size(); if (offset < 0) offset = 1; mprintf("\t%sping points from %i to %i, step %i\n", mode, start+1, stop, offset); for (int idx = start; idx < stop; idx += offset) points.AddToRange( idx ); } // TODO check that range values are valid? if (State.Debug() > 0) mprintf("DEBUG: Keeping points:"); Range::const_iterator pt = points.begin(); int idx = 0; int odx = 0; if (drop) { // Drop points for (; idx < (int)ds1->Size(); idx++) { if (pt == points.end()) break; if (*pt != idx) { if (State.Debug() > 0) mprintf(" %i", idx + 1); KeepPoint(ds1, out, idx, odx); } else ++pt; } // Keep all remaining points for (; idx < (int)ds1->Size(); idx++) { if (State.Debug() > 0) mprintf(" %i", idx + 1); KeepPoint(ds1, out, idx, odx); } } else { // Keep points for (; pt != points.end(); pt++) { if (*pt >= (int)ds1->Size()) break; if (State.Debug() > 0) mprintf(" %i", *pt + 1); KeepPoint(ds1, out, *pt, odx); } } if (State.Debug() > 0) mprintf("\n"); if (name.empty()) { // Replace old set with new set State.DSL().RemoveSet( ds1 ); State.DSL().AddSet( out ); } } // END loop over sets ds_arg = argIn.GetStringNext(); } // END loop over set args return CpptrajState::OK; }