/** For per-residue RMSD only. Setup output * file options. Calculate averages if requested. */ void Action_Rmsd::Print() { if (!perres_ || ResidueRMS_.empty()) return; // Per-residue output file if (perresout_ != 0) { // Set output file to be inverted if requested if (perresinvert_) perresout_->ProcessArgs("invert"); mprintf(" RMSD: Per-residue: Writing data for %zu residues to %s\n", ResidueRMS_.size(), perresout_->DataFilename().full()); } // Average if (perresavg_ != 0) { // Use the per residue rmsd dataset list to add one more for averaging DataSet_Mesh* PerResAvg = (DataSet_Mesh*)masterDSL_->AddSet(DataSet::XYMESH, MetaData(rmsd_->Meta().Name(), "Avg")); PerResAvg->ModifyDim(Dimension::X).SetLabel("Residue"); // another for stdev DataSet_Mesh* PerResStdev = (DataSet_Mesh*)masterDSL_->AddSet(DataSet::XYMESH, MetaData(rmsd_->Meta().Name(), "Stdev")); PerResStdev->ModifyDim(Dimension::X).SetLabel("Residue"); // Add the average and stdev datasets to the master datafile list perresavg_->AddDataSet(PerResAvg); perresavg_->AddDataSet(PerResStdev); // For each residue, get the average rmsd double stdev = 0; double avg = 0; for (perResArray::const_iterator PerRes = ResidueRMS_.begin(); PerRes != ResidueRMS_.end(); ++PerRes) { avg = PerRes->data_->Avg( stdev ); double pridx = (double)PerRes->data_->Meta().Idx(); PerResAvg->AddXY(pridx, avg); PerResStdev->AddXY(pridx, stdev); } } }
int Cluster_DPeaks::ChoosePointsAutomatically() { // Right now all density values are discrete. Try to choose outliers at each // value for which there is density.; /* // For each point, calculate average distance (X,Y) to points in next and // previous density values. const double dens_cut = 3.0 * 3.0; const double dist_cut = 1.32 * 1.32; for (Carray::const_iterator point0 = Points_.begin(); point0 != Points_.end(); ++point0) { int Npts = 0; for (Carray::const_iterator point1 = Points_.begin(); point1 != Points_.end(); ++point1) { if (point0 != point1) { // Only do this for close densities double dX = (double)(point0->PointsWithinEps() - point1->PointsWithinEps()); double dX2 = dX * dX; double dY = (point0->Dist() - point1->Dist()); double dY2 = dY * dY; if (dX2 < dens_cut && dY2 < dist_cut) { Npts++; } } } mprintf("%i %i %i\n", point0->PointsWithinEps(), point0->Fnum()+1, Npts); } */ /* CpptrajFile tempOut; tempOut.OpenWrite("temp.dat"); int currentDensity = -1; double distAv = 0.0; double distSD = 0.0; double sumWts = 0.0; int nValues = 0; Carray::const_iterator lastPoint = Points_.end() + 1; for (Carray::const_iterator point = Points_.begin(); point != lastPoint; ++point) { if (point == Points_.end() || point->PointsWithinEps() != currentDensity) { if (nValues > 0) { distAv = distAv / sumWts; //(double)nValues; distSD = (distSD / sumWts) - (distAv * distAv); if (distSD > 0.0) distSD = sqrt(distSD); else distSD = 0.0; //mprintf("Density %i: %i values Avg= %g SD= %g SumWts= %g\n", currentDensity, // nValues, distAv, distSD, sumWts); tempOut.Printf("%i %g\n", currentDensity, distAv); } if (point == Points_.end()) break; currentDensity = point->PointsWithinEps(); distAv = 0.0; distSD = 0.0; sumWts = 0.0; nValues = 0; } double wt = exp(point->Dist()); double dval = point->Dist() * wt; sumWts += wt; distAv += dval; distSD += (dval * dval); nValues++; } tempOut.CloseFile(); */ // BEGIN CALCULATING WEIGHTED DISTANCE AVERAGE CpptrajFile tempOut; tempOut.OpenWrite("temp.dat"); DataSet_Mesh weightedAverage; Carray::const_iterator cp = Points_.begin(); // Skip local density of 0. //while (cp->PointsWithinEps() == 0 && cp != Points_.end()) ++cp; while (cp != Points_.end()) { int densityVal = cp->PointsWithinEps(); Carray densityArray; // Add all points of current density. while (cp->PointsWithinEps() == densityVal && cp != Points_.end()) densityArray.push_back( *(cp++) ); mprintf("Density value %i has %zu points.\n", densityVal, densityArray.size()); // Sort array by distance std::sort(densityArray.begin(), densityArray.end(), Cpoint::dist_sort()); // Take the average of the points weighted by their position. double wtDistAv = 0.0; double sumWts = 0.0; //std::vector<double> weights; //weights.reserve( densityArray.size() ); int maxPt = (int)densityArray.size() - 1; for (int ip = 0; ip != (int)densityArray.size(); ++ip) { double wt = exp( (double)(ip - maxPt) ); //mprintf("\t%10i %10u %10u %10g\n", densityVal, ip, maxPt, wt); wtDistAv += (densityArray[ip].Dist() * wt); sumWts += wt; //weights.push_back( wt ); } wtDistAv /= sumWts; // Calculate the weighted sample variance //double distSD = 0.0; //for (unsigned int ip = 0; ip != densityArray.size(); ++ip) { // double diff = densityArray[ip].Dist() - wtDistAv; // distSD += weights[ip] * (diff * diff); //} //distSD /= sumWts; weightedAverage.AddXY(densityVal, wtDistAv); //tempOut.Printf("%i %g %g %g\n", densityVal, wtDistAv, sqrt(distSD), sumWts); tempOut.Printf("%i %g %g\n", densityVal, wtDistAv, sumWts); /* // Find the median. double median, Q1, Q3; if (densityArray.size() == 1) { median = densityArray[0].Dist(); Q1 = median; Q3 = median; } else { unsigned int q3_beg; unsigned int med_idx = densityArray.size() / 2; // Always 0 <= Q1 < med_idx if ((densityArray.size() % 2) == 0) { median = (densityArray[med_idx].Dist() + densityArray[med_idx-1].Dist()) / 2.0; q3_beg = med_idx; } else { median = densityArray[med_idx].Dist(); q3_beg = med_idx + 1; } if (densityArray.size() == 2) { Q1 = densityArray[0].Dist(); Q3 = densityArray[1].Dist(); } else { // Find lower quartile unsigned int q1_idx = med_idx / 2; if ((med_idx % 2) == 0) Q1 = (densityArray[q1_idx].Dist() + densityArray[q1_idx-1].Dist()) / 2.0; else Q1 = densityArray[q1_idx].Dist(); // Find upper quartile unsigned int q3_size = densityArray.size() - q3_beg; unsigned int q3_idx = (q3_size / 2) + q3_beg; if ((q3_size %2) == 0) Q3 = (densityArray[q3_idx].Dist() + densityArray[q3_idx-1].Dist()) / 2.0; else Q3 = densityArray[q3_idx].Dist(); } } mprintf("\tMedian dist value is %g. Q1= %g Q3= %g\n", median, Q1, Q3); */ } tempOut.CloseFile(); // END CALCULATING WEIGHTED DISTANCE AVERAGE /* // TEST tempOut.OpenWrite("temp2.dat"); std::vector<double> Hist( Points_.back().PointsWithinEps()+1, 0.0 ); int gWidth = 3; double cval = 3.0; double two_c_squared = 2.0 * cval * cval; mprintf("DBG: cval= %g, Gaussian denominator is %g\n", cval, two_c_squared); for (int wtIdx = 0; wtIdx != (int)weightedAverage.Size(); wtIdx++) { int bval = weightedAverage.X(wtIdx); for (int xval = std::max(bval - gWidth, 0); xval != std::min(bval + gWidth + 1, (int)Hist.size()); xval++) { // a: height (weighted average) // b: center (density value) // c: width // x: density value in histogram //int xval = weightedAverage.X(idx); //double bval = weightedAverage.X(wtIdx); //double bval = (double)wtIdx; double diff = (double)(xval - bval); //Hist[xval] += (weightedAverage.Y(wtIdx) * exp( -( (diff * diff) / two_c_squared ) )); Hist[xval] = std::max(Hist[xval], weightedAverage.Y(wtIdx) * exp( -( (diff * diff) / two_c_squared ) )); } } for (unsigned int idx = 0; idx != Hist.size(); idx++) tempOut.Printf("%u %g\n", idx, Hist[idx]); tempOut.CloseFile(); // END TEST */ /* // TEST // Construct best-fit line segments tempOut.OpenWrite("temp2.dat"); double slope, intercept, correl; int segment_length = 3; DataSet_Mesh Segment; Segment.Allocate1D( segment_length ); for (int wtIdx = 0; wtIdx != (int)weightedAverage.Size(); wtIdx++) { Segment.Clear(); for (int idx = std::max(wtIdx - 1, 0); // TODO: use segment_length idx != std::min(wtIdx + 2, (int)weightedAverage.Size()); idx++) Segment.AddXY(weightedAverage.X(idx), weightedAverage.Y(idx)); Segment.LinearRegression(slope, intercept, correl, true); for (int idx = std::max(wtIdx - 1, 0); // TODO: use segment_length idx != std::min(wtIdx + 2, (int)weightedAverage.Size()); idx++) { double x = weightedAverage.X(idx); double y = slope * x + intercept; tempOut.Printf("%g %g %i\n", x, y, weightedAverage.X(wtIdx)); } } tempOut.CloseFile(); // END TEST */ // BEGIN WEIGHTED RUNNING AVG/SD OF DISTANCES // For each point, determine if it is greater than the average of the // weighted average distances of the previous, current, and next densities. int width = 2; int currentDensity = 0; int wtIdx = 0; double currentAvg = 0.0; double deltaSD = 0.0; double deltaAv = 0.0; int Ndelta = 0; CpptrajFile raOut; if (!rafile_.empty()) raOut.OpenWrite(rafile_); CpptrajFile raDelta; if (!radelta_.empty()) raDelta.OpenWrite(radelta_); std::vector<unsigned int> candidateIdxs; std::vector<double> candidateDeltas; cp = Points_.begin(); // Skip over points with zero density while (cp != Points_.end() && cp->PointsWithinEps() == 0) ++cp; while (weightedAverage.X(wtIdx) != cp->PointsWithinEps() && wtIdx < (int)Points_.size()) ++wtIdx; for (Carray::const_iterator point = cp; point != Points_.end(); ++point) { if (point->PointsWithinEps() != currentDensity) { //currentAvg = weightedAverage.Y(wtIdx); // New density value. Determine average. currentAvg = 0.0; // unsigned int Npt = 0; double currentWt = 0.0; for (int idx = std::max(wtIdx - width, 0); idx != std::min(wtIdx + width + 1, (int)weightedAverage.Size()); idx++) { //currentAvg += weightedAverage.Y(idx); //Npt++; double wt = weightedAverage.Y(idx); currentAvg += (weightedAverage.Y(idx) * wt); currentWt += wt; } //currentAvg /= (double)Npt; currentAvg /= currentWt; //smoothAv += currentAvg; //smoothSD += (currentAvg * currentAvg); //Nsmooth++; currentDensity = point->PointsWithinEps(); if (raOut.IsOpen()) raOut.Printf("%i %g %g\n", currentDensity, currentAvg, weightedAverage.Y(wtIdx)); wtIdx++; } double delta = (point->Dist() - currentAvg); if (delta > 0.0) { //delta *= log((double)currentDensity); if (raDelta.IsOpen()) raDelta.Printf("%8i %8.3f %8i %8.3f %8.3f\n", currentDensity, delta, point->Fnum()+1, point->Dist(), currentAvg); candidateIdxs.push_back( point - Points_.begin() ); candidateDeltas.push_back( delta ); deltaAv += delta; deltaSD += (delta * delta); Ndelta++; } } raOut.CloseFile(); deltaAv /= (double)Ndelta; deltaSD = (deltaSD / (double)Ndelta) - (deltaAv * deltaAv); if (deltaSD > 0.0) deltaSD = sqrt(deltaSD); else deltaSD = 0.0; if (raDelta.IsOpen()) raDelta.Printf("#DeltaAvg= %g DeltaSD= %g\n", deltaAv, deltaSD); raDelta.CloseFile(); int cnum = 0; for (unsigned int i = 0; i != candidateIdxs.size(); i++) { if (candidateDeltas[i] > (deltaSD)) { Points_[candidateIdxs[i]].SetCluster( cnum++ ); mprintf("\tPoint %u (frame %i, density %i) selected as candidate for cluster %i\n", candidateIdxs[i], Points_[candidateIdxs[i]].Fnum()+1, Points_[candidateIdxs[i]].PointsWithinEps(), cnum-1); } } // END WEIGHTED AVG/SD OF DISTANCES /* // Currently doing this by calculating the running average of density vs // distance, then choosing points with distance > twice the SD of the // running average. // NOTE: Store in a mesh data set for now in case we want to spline etc later. if (avg_factor_ < 1) avg_factor_ = 10; unsigned int window_size = Points_.size() / (unsigned int)avg_factor_; mprintf("\tRunning avg window size is %u\n", window_size); // FIXME: Handle case where window_size < frames DataSet_Mesh runavg; unsigned int ra_size = Points_.size() - window_size + 1; runavg.Allocate1D( ra_size ); double dwindow = (double)window_size; double sumx = 0.0; double sumy = 0.0; for (unsigned int i = 0; i < window_size; i++) { sumx += (double)Points_[i].PointsWithinEps(); sumy += Points_[i].Dist(); } runavg.AddXY( sumx / dwindow, sumy / dwindow ); for (unsigned int i = 1; i < ra_size; i++) { unsigned int nextwin = i + window_size - 1; unsigned int prevwin = i - 1; sumx = (double)Points_[nextwin].PointsWithinEps() - (double)Points_[prevwin].PointsWithinEps() + sumx; sumy = Points_[nextwin].Dist() - Points_[prevwin].Dist() + sumy; runavg.AddXY( sumx / dwindow, sumy / dwindow ); } // Write running average if (!rafile_.empty()) { CpptrajFile raOut; if (raOut.OpenWrite(rafile_)) mprinterr("Error: Could not open running avg file '%s' for write.\n", rafile_.c_str()); else { for (unsigned int i = 0; i != runavg.Size(); i++) raOut.Printf("%g %g\n", runavg.X(i), runavg.Y(i)); raOut.CloseFile(); } } double ra_sd; double ra_avg = runavg.Avg( ra_sd ); // Double stdev to use as cutoff for findning anomalously high peaks. ra_sd *= 2.0; mprintf("\tAvg of running avg set is %g, SD*2.0 (delta cutoff) is %g\n", ra_avg, ra_sd); // For each point in density vs distance plot, determine which running // average point is closest. If the difference between the point and the // running average point is > 2.0 the SD of the running average data, // consider it a 'peak'. CpptrajFile raDelta; if (!radelta_.empty()) raDelta.OpenWrite("radelta.dat"); if (raDelta.IsOpen()) raDelta.Printf("%-10s %10s %10s\n", "#Frame", "RnAvgPos", "Delta"); unsigned int ra_position = 0; // Position in the runavg DataSet unsigned int ra_end = runavg.Size() - 1; int cnum = 0; for (Carray::iterator point = Points_.begin(); point != Points_.end(); ++point) { if (ra_position != ra_end) { // Is the next running avgd point closer to this point? while (ra_position != ra_end) { double dens = (double)point->PointsWithinEps(); double diff0 = fabs( dens - runavg.X(ra_position ) ); double diff1 = fabs( dens - runavg.X(ra_position+1) ); if (diff1 < diff0) ++ra_position; // Next running avg position is closer for this point. else break; // This position is closer. } } double delta = point->Dist() - runavg.Y(ra_position); if (raDelta.IsOpen()) raDelta.Printf("%-10i %10u %10g", point->Fnum()+1, ra_position, delta); if (delta > ra_sd) { if (raDelta.IsOpen()) raDelta.Printf(" POTENTIAL CLUSTER %i", cnum); point->SetCluster(cnum++); } if (raDelta.IsOpen()) raDelta.Printf("\n"); } raDelta.CloseFile(); */ return cnum; }