/** For each point p, calculate function Kdist(p) which is the distance of * the Kth nearest point to p. */ void Cluster_DBSCAN::ComputeKdist( int Kval, std::vector<int> const& FramesToCluster ) const { std::vector<double> dists; std::vector<double> Kdist; dists.reserve( FramesToCluster.size() ); Kdist.reserve( FramesToCluster.size() ); std::string outfilename = k_prefix_ + "Kdist." + integerToString(Kval) + ".dat"; mprintf("\tDBSCAN: Calculating Kdist(%i), output to %s\n", Kval, outfilename.c_str()); for (std::vector<int>::const_iterator point = FramesToCluster.begin(); point != FramesToCluster.end(); ++point) { // Store distances from this point dists.clear(); for (std::vector<int>::const_iterator otherpoint = FramesToCluster.begin(); otherpoint != FramesToCluster.end(); ++otherpoint) dists.push_back( FrameDistances_.GetFdist(*point, *otherpoint) ); // Sort distances - first dist should always be 0 std::sort(dists.begin(), dists.end()); Kdist.push_back( dists[Kval] ); } std::sort( Kdist.begin(), Kdist.end() ); CpptrajFile Outfile; Outfile.OpenWrite(outfilename); Outfile.Printf("%-8s %1i%-11s\n", "#Point", Kval,"-dist"); // Write out largest to smallest unsigned int ik = 0; for (std::vector<double>::reverse_iterator k = Kdist.rbegin(); k != Kdist.rend(); ++k, ++ik) Outfile.Printf("%8u %12.4f\n", ik, *k); Outfile.CloseFile(); }
// DataIO_Std::WriteData() int DataIO_Std::WriteData(FileName const& fname, DataSetList const& SetList) { int err = 0; if (!SetList.empty()) { // Open output file. CpptrajFile file; if (file.OpenWrite( fname )) return 1; // Base write type off first data set dimension FIXME if (SetList[0]->Group() == DataSet::CLUSTERMATRIX) { // Special case of 2D - may have sieved frames. err = WriteCmatrix(file, SetList); } else if (SetList[0]->Ndim() == 1) { if (group_ == NO_TYPE) { if (isInverted_) err = WriteDataInverted(file, SetList); else err = WriteDataNormal(file, SetList); } else err = WriteByGroup(file, SetList, group_); } else if (SetList[0]->Ndim() == 2) err = WriteData2D(file, SetList); else if (SetList[0]->Ndim() == 3) err = WriteData3D(file, SetList); file.CloseFile(); } return err; }
// TODO: Accept const ArgList so arguments are not reset? CpptrajFile* DataFileList::AddCpptrajFile(FileName const& nameIn, std::string const& descrip, CFtype typeIn, bool allowStdout) { // If no filename and stdout not allowed, no output desired. if (nameIn.empty() && !allowStdout) return 0; FileName name; CpptrajFile* Current = 0; int currentIdx = -1; if (!nameIn.empty()) { name = nameIn; // Append ensemble number if set. if (ensembleNum_ != -1) name.Append( "." + integerToString(ensembleNum_) ); // Check if filename in use by DataFile. DataFile* df = GetDataFile(name); if (df != 0) { mprinterr("Error: Text output file name '%s' already in use by data file '%s'.\n", nameIn.full(), df->DataFilename().full()); return 0; } // Check if this filename already in use currentIdx = GetCpptrajFileIdx( name ); if (currentIdx != -1) Current = cfList_[currentIdx]; } // If no CpptrajFile associated with name, create new CpptrajFile if (Current==0) { switch (typeIn) { case TEXT: Current = new CpptrajFile(); break; case PDB: Current = (CpptrajFile*)(new PDBfile()); break; } Current->SetDebug(debug_); // Set up file for writing. //if (Current->SetupWrite( name, debug_ )) if (Current->OpenWrite( name )) { mprinterr("Error: Setting up text output file %s\n", name.full()); delete Current; return 0; } cfList_.push_back( Current ); cfData_.push_back( CFstruct(descrip, typeIn) ); } else { // If Current type does not match typeIn do not allow. if (typeIn != cfData_[currentIdx].Type()) { mprinterr("Error: Cannot change type of text output for '%s'.\n", Current->Filename().full()); return 0; } Current->SetDebug(debug_); // Update description if (!descrip.empty()) cfData_[currentIdx].UpdateDescrip( descrip ); } return Current; }
// DataIO_CCP4::WriteData() int DataIO_CCP4::WriteData(FileName const& fname, DataSetList const& setList) { // Open output file CpptrajFile outfile; if (outfile.OpenWrite(fname)) { mprinterr("Error: Could not open CCP4 output file '%s'.\n", fname.full()); return 1; } // Warn about writing multiple sets if (setList.size() > 1) mprintf("Warning: %s: Writing multiple 3D sets in CCP4 format not supported.\n" "Warning: Only writing first set.\n", fname.full()); return WriteSet3D( setList.begin(), outfile ); }
// DataIO_OpenDx::WriteData() int DataIO_OpenDx::WriteData(FileName const& fname, DataSetList const& setList) { // Open output file CpptrajFile outfile; if (outfile.OpenWrite(fname)) { mprinterr("Error: Could not open OpenDX output file.\n"); return 1; } // Warn about writing multiple sets if (setList.size() > 1) mprintf("Warning: %s: Writing multiple 3D sets in OpenDX format may result in unexpected behavior\n", fname.full()); int err = 0; for (DataSetList::const_iterator set = setList.begin(); set != setList.end(); ++set) err += WriteSet3D( *(*set), outfile ); return err; }
void Action_Pairwise::Print() { if (nframes_ < 1) return; // Divide matrices by # of frames double norm = 1.0 / (double)nframes_; for (unsigned int i = 0; i != vdwMat_->Size(); i++) { (*vdwMat_)[i] *= norm; (*eleMat_)[i] *= norm; } // Write out final results CpptrajFile AvgOut; if (AvgOut.OpenWrite( avgout_ )) return; if (nb_calcType_ == NORMAL) mprintf(" PAIRWISE: Writing all pairs with |<evdw>| > %.4f, |<eelec>| > %.4f\n", cut_evdw_, cut_eelec_); else if (nb_calcType_ == COMPARE_REF) mprintf(" PAIRWISE: Writing all pairs with |<dEvdw>| > %.4f, |<dEelec>| > %.4f\n", cut_evdw_, cut_eelec_); AvgOut.Printf("%-16s %5s -- %16s %5s : ENE\n","#Name1", "At1", "Name2", "At2"); for (AtomMask::const_iterator m1 = Mask0_.begin(); m1 != Mask0_.end(); ++m1) { for (AtomMask::const_iterator m2 = m1 + 1; m2 != Mask0_.end(); ++m2) { double EV = vdwMat_->GetElement(*m1, *m2); double EE = eleMat_->GetElement(*m1, *m2); bool outputv = ( fabs(EV) > cut_evdw_ ); bool outpute = ( fabs(EE) > cut_eelec_ ); if (outputv || outpute) { AvgOut.Printf("%16s %5i -- %16s %5i :", CurrentParm_->TruncResAtomName(*m1).c_str(), *m1 + 1, CurrentParm_->TruncResAtomName(*m2).c_str(), *m2 + 1); if (outputv) AvgOut.Printf(" EVDW= %12.5e", EV); if (outpute) AvgOut.Printf(" EELEC= %12.5e", EE); AvgOut.Printf("\n"); } } } }
int Cluster_DPeaks::ChoosePointsAutomatically() { // Right now all density values are discrete. Try to choose outliers at each // value for which there is density.; /* // For each point, calculate average distance (X,Y) to points in next and // previous density values. const double dens_cut = 3.0 * 3.0; const double dist_cut = 1.32 * 1.32; for (Carray::const_iterator point0 = Points_.begin(); point0 != Points_.end(); ++point0) { int Npts = 0; for (Carray::const_iterator point1 = Points_.begin(); point1 != Points_.end(); ++point1) { if (point0 != point1) { // Only do this for close densities double dX = (double)(point0->PointsWithinEps() - point1->PointsWithinEps()); double dX2 = dX * dX; double dY = (point0->Dist() - point1->Dist()); double dY2 = dY * dY; if (dX2 < dens_cut && dY2 < dist_cut) { Npts++; } } } mprintf("%i %i %i\n", point0->PointsWithinEps(), point0->Fnum()+1, Npts); } */ /* CpptrajFile tempOut; tempOut.OpenWrite("temp.dat"); int currentDensity = -1; double distAv = 0.0; double distSD = 0.0; double sumWts = 0.0; int nValues = 0; Carray::const_iterator lastPoint = Points_.end() + 1; for (Carray::const_iterator point = Points_.begin(); point != lastPoint; ++point) { if (point == Points_.end() || point->PointsWithinEps() != currentDensity) { if (nValues > 0) { distAv = distAv / sumWts; //(double)nValues; distSD = (distSD / sumWts) - (distAv * distAv); if (distSD > 0.0) distSD = sqrt(distSD); else distSD = 0.0; //mprintf("Density %i: %i values Avg= %g SD= %g SumWts= %g\n", currentDensity, // nValues, distAv, distSD, sumWts); tempOut.Printf("%i %g\n", currentDensity, distAv); } if (point == Points_.end()) break; currentDensity = point->PointsWithinEps(); distAv = 0.0; distSD = 0.0; sumWts = 0.0; nValues = 0; } double wt = exp(point->Dist()); double dval = point->Dist() * wt; sumWts += wt; distAv += dval; distSD += (dval * dval); nValues++; } tempOut.CloseFile(); */ // BEGIN CALCULATING WEIGHTED DISTANCE AVERAGE CpptrajFile tempOut; tempOut.OpenWrite("temp.dat"); DataSet_Mesh weightedAverage; Carray::const_iterator cp = Points_.begin(); // Skip local density of 0. //while (cp->PointsWithinEps() == 0 && cp != Points_.end()) ++cp; while (cp != Points_.end()) { int densityVal = cp->PointsWithinEps(); Carray densityArray; // Add all points of current density. while (cp->PointsWithinEps() == densityVal && cp != Points_.end()) densityArray.push_back( *(cp++) ); mprintf("Density value %i has %zu points.\n", densityVal, densityArray.size()); // Sort array by distance std::sort(densityArray.begin(), densityArray.end(), Cpoint::dist_sort()); // Take the average of the points weighted by their position. double wtDistAv = 0.0; double sumWts = 0.0; //std::vector<double> weights; //weights.reserve( densityArray.size() ); int maxPt = (int)densityArray.size() - 1; for (int ip = 0; ip != (int)densityArray.size(); ++ip) { double wt = exp( (double)(ip - maxPt) ); //mprintf("\t%10i %10u %10u %10g\n", densityVal, ip, maxPt, wt); wtDistAv += (densityArray[ip].Dist() * wt); sumWts += wt; //weights.push_back( wt ); } wtDistAv /= sumWts; // Calculate the weighted sample variance //double distSD = 0.0; //for (unsigned int ip = 0; ip != densityArray.size(); ++ip) { // double diff = densityArray[ip].Dist() - wtDistAv; // distSD += weights[ip] * (diff * diff); //} //distSD /= sumWts; weightedAverage.AddXY(densityVal, wtDistAv); //tempOut.Printf("%i %g %g %g\n", densityVal, wtDistAv, sqrt(distSD), sumWts); tempOut.Printf("%i %g %g\n", densityVal, wtDistAv, sumWts); /* // Find the median. double median, Q1, Q3; if (densityArray.size() == 1) { median = densityArray[0].Dist(); Q1 = median; Q3 = median; } else { unsigned int q3_beg; unsigned int med_idx = densityArray.size() / 2; // Always 0 <= Q1 < med_idx if ((densityArray.size() % 2) == 0) { median = (densityArray[med_idx].Dist() + densityArray[med_idx-1].Dist()) / 2.0; q3_beg = med_idx; } else { median = densityArray[med_idx].Dist(); q3_beg = med_idx + 1; } if (densityArray.size() == 2) { Q1 = densityArray[0].Dist(); Q3 = densityArray[1].Dist(); } else { // Find lower quartile unsigned int q1_idx = med_idx / 2; if ((med_idx % 2) == 0) Q1 = (densityArray[q1_idx].Dist() + densityArray[q1_idx-1].Dist()) / 2.0; else Q1 = densityArray[q1_idx].Dist(); // Find upper quartile unsigned int q3_size = densityArray.size() - q3_beg; unsigned int q3_idx = (q3_size / 2) + q3_beg; if ((q3_size %2) == 0) Q3 = (densityArray[q3_idx].Dist() + densityArray[q3_idx-1].Dist()) / 2.0; else Q3 = densityArray[q3_idx].Dist(); } } mprintf("\tMedian dist value is %g. Q1= %g Q3= %g\n", median, Q1, Q3); */ } tempOut.CloseFile(); // END CALCULATING WEIGHTED DISTANCE AVERAGE /* // TEST tempOut.OpenWrite("temp2.dat"); std::vector<double> Hist( Points_.back().PointsWithinEps()+1, 0.0 ); int gWidth = 3; double cval = 3.0; double two_c_squared = 2.0 * cval * cval; mprintf("DBG: cval= %g, Gaussian denominator is %g\n", cval, two_c_squared); for (int wtIdx = 0; wtIdx != (int)weightedAverage.Size(); wtIdx++) { int bval = weightedAverage.X(wtIdx); for (int xval = std::max(bval - gWidth, 0); xval != std::min(bval + gWidth + 1, (int)Hist.size()); xval++) { // a: height (weighted average) // b: center (density value) // c: width // x: density value in histogram //int xval = weightedAverage.X(idx); //double bval = weightedAverage.X(wtIdx); //double bval = (double)wtIdx; double diff = (double)(xval - bval); //Hist[xval] += (weightedAverage.Y(wtIdx) * exp( -( (diff * diff) / two_c_squared ) )); Hist[xval] = std::max(Hist[xval], weightedAverage.Y(wtIdx) * exp( -( (diff * diff) / two_c_squared ) )); } } for (unsigned int idx = 0; idx != Hist.size(); idx++) tempOut.Printf("%u %g\n", idx, Hist[idx]); tempOut.CloseFile(); // END TEST */ /* // TEST // Construct best-fit line segments tempOut.OpenWrite("temp2.dat"); double slope, intercept, correl; int segment_length = 3; DataSet_Mesh Segment; Segment.Allocate1D( segment_length ); for (int wtIdx = 0; wtIdx != (int)weightedAverage.Size(); wtIdx++) { Segment.Clear(); for (int idx = std::max(wtIdx - 1, 0); // TODO: use segment_length idx != std::min(wtIdx + 2, (int)weightedAverage.Size()); idx++) Segment.AddXY(weightedAverage.X(idx), weightedAverage.Y(idx)); Segment.LinearRegression(slope, intercept, correl, true); for (int idx = std::max(wtIdx - 1, 0); // TODO: use segment_length idx != std::min(wtIdx + 2, (int)weightedAverage.Size()); idx++) { double x = weightedAverage.X(idx); double y = slope * x + intercept; tempOut.Printf("%g %g %i\n", x, y, weightedAverage.X(wtIdx)); } } tempOut.CloseFile(); // END TEST */ // BEGIN WEIGHTED RUNNING AVG/SD OF DISTANCES // For each point, determine if it is greater than the average of the // weighted average distances of the previous, current, and next densities. int width = 2; int currentDensity = 0; int wtIdx = 0; double currentAvg = 0.0; double deltaSD = 0.0; double deltaAv = 0.0; int Ndelta = 0; CpptrajFile raOut; if (!rafile_.empty()) raOut.OpenWrite(rafile_); CpptrajFile raDelta; if (!radelta_.empty()) raDelta.OpenWrite(radelta_); std::vector<unsigned int> candidateIdxs; std::vector<double> candidateDeltas; cp = Points_.begin(); // Skip over points with zero density while (cp != Points_.end() && cp->PointsWithinEps() == 0) ++cp; while (weightedAverage.X(wtIdx) != cp->PointsWithinEps() && wtIdx < (int)Points_.size()) ++wtIdx; for (Carray::const_iterator point = cp; point != Points_.end(); ++point) { if (point->PointsWithinEps() != currentDensity) { //currentAvg = weightedAverage.Y(wtIdx); // New density value. Determine average. currentAvg = 0.0; // unsigned int Npt = 0; double currentWt = 0.0; for (int idx = std::max(wtIdx - width, 0); idx != std::min(wtIdx + width + 1, (int)weightedAverage.Size()); idx++) { //currentAvg += weightedAverage.Y(idx); //Npt++; double wt = weightedAverage.Y(idx); currentAvg += (weightedAverage.Y(idx) * wt); currentWt += wt; } //currentAvg /= (double)Npt; currentAvg /= currentWt; //smoothAv += currentAvg; //smoothSD += (currentAvg * currentAvg); //Nsmooth++; currentDensity = point->PointsWithinEps(); if (raOut.IsOpen()) raOut.Printf("%i %g %g\n", currentDensity, currentAvg, weightedAverage.Y(wtIdx)); wtIdx++; } double delta = (point->Dist() - currentAvg); if (delta > 0.0) { //delta *= log((double)currentDensity); if (raDelta.IsOpen()) raDelta.Printf("%8i %8.3f %8i %8.3f %8.3f\n", currentDensity, delta, point->Fnum()+1, point->Dist(), currentAvg); candidateIdxs.push_back( point - Points_.begin() ); candidateDeltas.push_back( delta ); deltaAv += delta; deltaSD += (delta * delta); Ndelta++; } } raOut.CloseFile(); deltaAv /= (double)Ndelta; deltaSD = (deltaSD / (double)Ndelta) - (deltaAv * deltaAv); if (deltaSD > 0.0) deltaSD = sqrt(deltaSD); else deltaSD = 0.0; if (raDelta.IsOpen()) raDelta.Printf("#DeltaAvg= %g DeltaSD= %g\n", deltaAv, deltaSD); raDelta.CloseFile(); int cnum = 0; for (unsigned int i = 0; i != candidateIdxs.size(); i++) { if (candidateDeltas[i] > (deltaSD)) { Points_[candidateIdxs[i]].SetCluster( cnum++ ); mprintf("\tPoint %u (frame %i, density %i) selected as candidate for cluster %i\n", candidateIdxs[i], Points_[candidateIdxs[i]].Fnum()+1, Points_[candidateIdxs[i]].PointsWithinEps(), cnum-1); } } // END WEIGHTED AVG/SD OF DISTANCES /* // Currently doing this by calculating the running average of density vs // distance, then choosing points with distance > twice the SD of the // running average. // NOTE: Store in a mesh data set for now in case we want to spline etc later. if (avg_factor_ < 1) avg_factor_ = 10; unsigned int window_size = Points_.size() / (unsigned int)avg_factor_; mprintf("\tRunning avg window size is %u\n", window_size); // FIXME: Handle case where window_size < frames DataSet_Mesh runavg; unsigned int ra_size = Points_.size() - window_size + 1; runavg.Allocate1D( ra_size ); double dwindow = (double)window_size; double sumx = 0.0; double sumy = 0.0; for (unsigned int i = 0; i < window_size; i++) { sumx += (double)Points_[i].PointsWithinEps(); sumy += Points_[i].Dist(); } runavg.AddXY( sumx / dwindow, sumy / dwindow ); for (unsigned int i = 1; i < ra_size; i++) { unsigned int nextwin = i + window_size - 1; unsigned int prevwin = i - 1; sumx = (double)Points_[nextwin].PointsWithinEps() - (double)Points_[prevwin].PointsWithinEps() + sumx; sumy = Points_[nextwin].Dist() - Points_[prevwin].Dist() + sumy; runavg.AddXY( sumx / dwindow, sumy / dwindow ); } // Write running average if (!rafile_.empty()) { CpptrajFile raOut; if (raOut.OpenWrite(rafile_)) mprinterr("Error: Could not open running avg file '%s' for write.\n", rafile_.c_str()); else { for (unsigned int i = 0; i != runavg.Size(); i++) raOut.Printf("%g %g\n", runavg.X(i), runavg.Y(i)); raOut.CloseFile(); } } double ra_sd; double ra_avg = runavg.Avg( ra_sd ); // Double stdev to use as cutoff for findning anomalously high peaks. ra_sd *= 2.0; mprintf("\tAvg of running avg set is %g, SD*2.0 (delta cutoff) is %g\n", ra_avg, ra_sd); // For each point in density vs distance plot, determine which running // average point is closest. If the difference between the point and the // running average point is > 2.0 the SD of the running average data, // consider it a 'peak'. CpptrajFile raDelta; if (!radelta_.empty()) raDelta.OpenWrite("radelta.dat"); if (raDelta.IsOpen()) raDelta.Printf("%-10s %10s %10s\n", "#Frame", "RnAvgPos", "Delta"); unsigned int ra_position = 0; // Position in the runavg DataSet unsigned int ra_end = runavg.Size() - 1; int cnum = 0; for (Carray::iterator point = Points_.begin(); point != Points_.end(); ++point) { if (ra_position != ra_end) { // Is the next running avgd point closer to this point? while (ra_position != ra_end) { double dens = (double)point->PointsWithinEps(); double diff0 = fabs( dens - runavg.X(ra_position ) ); double diff1 = fabs( dens - runavg.X(ra_position+1) ); if (diff1 < diff0) ++ra_position; // Next running avg position is closer for this point. else break; // This position is closer. } } double delta = point->Dist() - runavg.Y(ra_position); if (raDelta.IsOpen()) raDelta.Printf("%-10i %10u %10g", point->Fnum()+1, ra_position, delta); if (delta > ra_sd) { if (raDelta.IsOpen()) raDelta.Printf(" POTENTIAL CLUSTER %i", cnum); point->SetCluster(cnum++); } if (raDelta.IsOpen()) raDelta.Printf("\n"); } raDelta.CloseFile(); */ return cnum; }
// ----------------------------------------------------------------------------- int Cluster_DPeaks::Cluster_DiscreteDensity() { mprintf("\tStarting DPeaks clustering, discrete density calculation.\n"); Points_.clear(); // First determine which frames are being clustered. for (int frame = 0; frame < (int)FrameDistances_.Nframes(); ++frame) if (!FrameDistances_.IgnoringRow( frame )) Points_.push_back( Cpoint(frame) ); // Sanity check. if (Points_.size() < 2) { mprinterr("Error: Only 1 frame in initial clustering.\n"); return 1; } // For each point, determine how many others are within epsilon. Also // determine maximum distance between any two points. mprintf("\tDetermining local density of each point.\n"); ProgressBar cluster_progress( Points_.size() ); double maxDist = -1.0; for (Carray::iterator point0 = Points_.begin(); point0 != Points_.end(); ++point0) { cluster_progress.Update(point0 - Points_.begin()); int density = 0; for (Carray::const_iterator point1 = Points_.begin(); point1 != Points_.end(); ++point1) { if (point0 != point1) { double dist = FrameDistances_.GetFdist(point0->Fnum(), point1->Fnum()); maxDist = std::max(maxDist, dist); if ( dist < epsilon_ ) density++; } } point0->SetPointsWithinEps( density ); } mprintf("DBG: Max dist= %g\n", maxDist); // DEBUG: Frame/Density CpptrajFile fdout; fdout.OpenWrite("fd.dat"); for (Carray::const_iterator point = Points_.begin(); point != Points_.end(); ++point) fdout.Printf("%i %i\n", point->Fnum()+1, point->PointsWithinEps()); fdout.CloseFile(); // Sort by density here. Otherwise array indices will be invalid later. std::sort( Points_.begin(), Points_.end(), Cpoint::pointsWithinEps_sort() ); // For each point, find the closest point that has higher density. Since // array is now sorted by density the last point has the highest density. Points_.back().SetDist( maxDist ); mprintf("\tFinding closest neighbor point with higher density for each point.\n"); unsigned int lastidx = Points_.size() - 1; cluster_progress.SetupProgress( lastidx ); for (unsigned int idx0 = 0; idx0 != lastidx; idx0++) { cluster_progress.Update( idx0 ); double min_dist = maxDist; int nearestIdx = -1; // Index of nearest neighbor with higher density Cpoint& point0 = Points_[idx0]; //mprintf("\nDBG:\tSearching for nearest neighbor to idx %u with higher density than %i.\n", // idx0, point0.PointsWithinEps()); // Since array is sorted by density we can start at the next point. for (unsigned int idx1 = idx0+1; idx1 != Points_.size(); idx1++) { Cpoint const& point1 = Points_[idx1]; double dist1_2 = FrameDistances_.GetFdist(point0.Fnum(), point1.Fnum()); if (point1.PointsWithinEps() > point0.PointsWithinEps()) { if (dist1_2 < min_dist) { min_dist = dist1_2; nearestIdx = (int)idx1; //mprintf("DBG:\t\tNeighbor idx %i is closer (density %i, distance %g)\n", // nearestIdx, point1.PointsWithinEps(), min_dist); } } } point0.SetDist( min_dist ); //mprintf("DBG:\tClosest point to %u with higher density is %i (distance %g)\n", // idx0, nearestIdx, min_dist); point0.SetNearestIdx( nearestIdx ); } // Plot density vs distance for each point. if (!dvdfile_.empty()) { CpptrajFile output; if (output.OpenWrite(dvdfile_)) mprinterr("Error: Could not open density vs distance plot '%s' for write.\n", dvdfile_.c_str()); // TODO: Make fatal? else { output.Printf("%-10s %10s %s %10s %10s\n", "#Density", "Distance", "Frame", "Idx", "Neighbor"); for (Carray::const_iterator point = Points_.begin(); point != Points_.end(); ++point) output.Printf("%-10i %10g \"%i\" %10u %10i\n", point->PointsWithinEps(), point->Dist(), point->Fnum()+1, point-Points_.begin(), point->NearestIdx()); output.CloseFile(); } } return 0; }
// ----------------------------------------------------------------------------- int Cluster_DPeaks::Cluster_GaussianKernel() { mprintf("\tStarting DPeaks clustering. Using Gaussian kernel to calculate density.\n"); // First determine which frames are being clustered. Points_.clear(); int oidx = 0; for (int frame = 0; frame < (int)FrameDistances_.Nframes(); ++frame) if (!FrameDistances_.IgnoringRow( frame )) Points_.push_back( Cpoint(frame, oidx++) ); // Sanity check. if (Points_.size() < 2) { mprinterr("Error: Only 1 frame in initial clustering.\n"); return 1; } // Sort distances std::vector<float> Distances; for (ClusterMatrix::const_iterator mat = FrameDistances_.begin(); mat != FrameDistances_.end(); ++mat) Distances.push_back( *mat ); std::sort( Distances.begin(), Distances.end() ); unsigned int idx = (unsigned int)((double)Distances.size() * 0.02); double bandwidth = (double)Distances[idx]; mprintf("idx= %u, bandwidth= %g\n", idx, bandwidth); // Density via Gaussian kernel double maxDist = -1.0; for (unsigned int i = 0; i != Points_.size(); i++) { for (unsigned int j = i+1; j != Points_.size(); j++) { double dist = FrameDistances_.GetFdist(Points_[i].Fnum(), Points_[j].Fnum()); maxDist = std::max( maxDist, dist ); dist /= bandwidth; double gk = exp(-(dist *dist)); Points_[i].AddDensity( gk ); Points_[j].AddDensity( gk ); } } mprintf("Max dist= %g\n", maxDist); CpptrajFile rhoOut; rhoOut.OpenWrite("rho.dat"); for (unsigned int i = 0; i != Points_.size(); i++) rhoOut.Printf("%u %g\n", i+1, Points_[i].Density()); rhoOut.CloseFile(); // Sort by density, descending std::stable_sort( Points_.begin(), Points_.end(), Cpoint::density_sort_descend() ); CpptrajFile ordrhoOut; ordrhoOut.OpenWrite("ordrho.dat"); for (unsigned int i = 0; i != Points_.size(); i++) ordrhoOut.Printf("%u %g %i %i\n", i+1, Points_[i].Density(), Points_[i].Fnum()+1, Points_[i].Oidx()+1); ordrhoOut.CloseFile(); // Determine minimum distances int first_idx = Points_[0].Oidx(); Points_[first_idx].SetDist( -1.0 ); Points_[first_idx].SetNearestIdx(-1); for (unsigned int ii = 1; ii != Points_.size(); ii++) { int ord_i = Points_[ii].Oidx(); Points_[ord_i].SetDist( maxDist ); for (unsigned int jj = 0; jj != ii; jj++) { int ord_j = Points_[jj].Oidx(); double dist = FrameDistances_.GetFdist(Points_[ord_i].Fnum(), Points_[ord_j].Fnum()); if (dist < Points_[ord_i].Dist()) { Points_[ord_i].SetDist( dist ); Points_[ord_j].SetNearestIdx( ord_j ); } } } if (!dvdfile_.empty()) { CpptrajFile output; if (output.OpenWrite(dvdfile_)) return 1; for (Carray::const_iterator point = Points_.begin(); point != Points_.end(); ++point) output.Printf("%g %g %i\n", point->Density(), point->Dist(), point->NearestIdx()+1); output.CloseFile(); } return 0; }
int Parm_CharmmPsf::WriteParm(FileName const& fname, Topology const& parm) { // TODO: CMAP etc info CpptrajFile outfile; if (outfile.OpenWrite(fname)) return 1; // Write PSF outfile.Printf("PSF\n\n"); // Write title std::string titleOut = parm.ParmName(); titleOut.resize(78); outfile.Printf("%8i !NTITLE\n* %-78s\n\n", 1, titleOut.c_str()); // Write NATOM section outfile.Printf("%8i !NATOM\n", parm.Natom()); unsigned int idx = 1; // Make fake segment ids for now. char segid[2]; segid[0] = 'A'; segid[1] = '\0'; mprintf("Warning: Assigning single letter segment IDs.\n"); int currentMol = 0; bool inSolvent = false; for (Topology::atom_iterator atom = parm.begin(); atom != parm.end(); ++atom, ++idx) { int resnum = atom->ResNum(); if (atom->MolNum() != currentMol) { if (!inSolvent) { inSolvent = parm.Mol(atom->MolNum()).IsSolvent(); currentMol = atom->MolNum(); segid[0]++; } else inSolvent = parm.Mol(atom->MolNum()).IsSolvent(); } // TODO: Print type name for xplor-like PSF int typeindex = atom->TypeIndex() + 1; // If type begins with digit, assume charmm numbers were read as // type. Currently Amber types all begin with letters. if (isdigit(atom->Type()[0])) typeindex = convertToInteger( *(atom->Type()) ); // ATOM# SEGID RES# RES ATNAME ATTYPE CHRG MASS (REST OF COLUMNS ARE LIKELY FOR CMAP AND CHEQ) outfile.Printf("%8i %-4s %-4i %-4s %-4s %4i %14.6G %9g %10i\n", idx, segid, parm.Res(resnum).OriginalResNum(), parm.Res(resnum).c_str(), atom->c_str(), typeindex, atom->Charge(), atom->Mass(), 0); } outfile.Printf("\n"); // Write NBOND section outfile.Printf("%8u !NBOND: bonds\n", parm.Bonds().size() + parm.BondsH().size()); idx = 1; for (BondArray::const_iterator bond = parm.BondsH().begin(); bond != parm.BondsH().end(); ++bond, ++idx) { outfile.Printf("%8i%8i", bond->A1()+1, bond->A2()+1); if ((idx % 4)==0) outfile.Printf("\n"); } for (BondArray::const_iterator bond = parm.Bonds().begin(); bond != parm.Bonds().end(); ++bond, ++idx) { outfile.Printf("%8i%8i", bond->A1()+1, bond->A2()+1); if ((idx % 4)==0) outfile.Printf("\n"); } if ((idx % 4)!=0) outfile.Printf("\n"); outfile.Printf("\n"); // Write NTHETA section outfile.Printf("%8u !NTHETA: angles\n", parm.Angles().size() + parm.AnglesH().size()); idx = 1; for (AngleArray::const_iterator ang = parm.AnglesH().begin(); ang != parm.AnglesH().end(); ++ang, ++idx) { outfile.Printf("%8i%8i%8i", ang->A1()+1, ang->A2()+1, ang->A3()+1); if ((idx % 3)==0) outfile.Printf("\n"); } for (AngleArray::const_iterator ang = parm.Angles().begin(); ang != parm.Angles().end(); ++ang, ++idx) { outfile.Printf("%8i%8i%8i", ang->A1()+1, ang->A2()+1, ang->A3()+1); if ((idx % 3)==0) outfile.Printf("\n"); } if ((idx % 3)==0) outfile.Printf("\n"); outfile.Printf("\n"); // Write out NPHI section outfile.Printf("%8u !NPHI: dihedrals\n", parm.Dihedrals().size() + parm.DihedralsH().size()); idx = 1; for (DihedralArray::const_iterator dih = parm.DihedralsH().begin(); dih != parm.DihedralsH().end(); ++dih, ++idx) { outfile.Printf("%8i%8i%8i%8i", dih->A1()+1, dih->A2()+1, dih->A3()+1, dih->A4()+1); if ((idx % 2)==0) outfile.Printf("\n"); } for (DihedralArray::const_iterator dih = parm.Dihedrals().begin(); dih != parm.Dihedrals().end(); ++dih, ++idx) { outfile.Printf("%8i%8i%8i%8i", dih->A1()+1, dih->A2()+1, dih->A3()+1, dih->A4()+1); if ((idx % 2)==0) outfile.Printf("\n"); } if ((idx % 2)==0) outfile.Printf("\n"); outfile.Printf("\n"); outfile.CloseFile(); return 0; }
// Analysis_Wavelet::Analyze() Analysis::RetType Analysis_Wavelet::Analyze() { // Step 1 - Create a matrix that is #atoms rows by #frames - 1 cols, // where matrix(frame, atom) is the distance that atom has // travelled from the previous frame. // TODO: Implement this in Action_Matrix()? mprintf(" WAVELET:\n"); // First set up atom mask. if (coords_->Top().SetupIntegerMask( mask_ )) return Analysis::ERR; mask_.MaskInfo(); int natoms = mask_.Nselected(); int nframes = (int)coords_->Size(); if (natoms < 1 || nframes < 2) { mprinterr("Error: Not enough frames (%i) or atoms (%i) in '%s'\n", nframes, natoms, coords_->legend()); return Analysis::ERR; } Matrix<double> d_matrix; mprintf("\t%i frames, %i atoms, distance matrix will require %.2f MB\n", (double)d_matrix.sizeInBytes(nframes, natoms) / (1024.0*1024.0)); d_matrix.resize(nframes, natoms); // Get initial frame. Frame currentFrame, lastFrame; currentFrame.SetupFrameFromMask( mask_, coords_->Top().Atoms() ); lastFrame = currentFrame; coords_->GetFrame( 0, lastFrame, mask_ ); // Iterate over frames for (int frm = 1; frm != nframes; frm++) { coords_->GetFrame( frm, currentFrame, mask_ ); int idx = frm; // Position in distance matrix; start at column 'frame' for (int at = 0; at != natoms; at++, idx += nframes) // Distance of atom in currentFrame from its position in lastFrame. d_matrix[idx] = sqrt(DIST2_NoImage( currentFrame.XYZ(at), lastFrame.XYZ(at) )); //lastFrame = currentFrame; // TODO: Re-enable? } # ifdef DEBUG_WAVELET // DEBUG: Write matrix to file. CpptrajFile dmatrixOut; // DEBUG dmatrixOut.OpenWrite("dmatrix.dat"); Matrix<double>::iterator mval = d_matrix.begin(); for (int row = 0; row != natoms; row++) { for (int col = 0; col != nframes; col++) dmatrixOut.Printf("%g ", *(mval++)); dmatrixOut.Printf("\n"); } dmatrixOut.CloseFile(); # endif // Precompute some factors for calculating scaled wavelets. const double one_over_sqrt_N = 1.0 / sqrt(static_cast<double>( nframes )); std::vector<int> arrayK( nframes ); arrayK[0] = -1 * (nframes/2); for (int i = 1; i != nframes; i++) arrayK[i] = arrayK[i-1] + 1; # ifdef DEBUG_WAVELET mprintf("DEBUG: K:"); for (std::vector<int>::const_iterator kval = arrayK.begin(); kval != arrayK.end(); ++kval) mprintf(" %i", *kval); mprintf("\n"); # endif // Step 2 - Get FFT of wavelet for each scale. PubFFT pubfft; pubfft.SetupFFTforN( nframes ); mprintf("\tMemory required for scaled wavelet array: %.2f MB\n", (double)(2 * nframes * nb_ * sizeof(double)) / (1024 * 1024)); typedef std::vector<ComplexArray> WaveletArray; WaveletArray FFT_of_Scaled_Wavelets; FFT_of_Scaled_Wavelets.reserve( nb_ ); typedef std::vector<double> Darray; Darray scaleVector; scaleVector.reserve( nb_ ); Darray MIN( nb_ * 2 ); for (int iscale = 0; iscale != nb_; iscale++) { // Calculate and store scale factor. scaleVector.push_back( S0_ * pow(2.0, iscale * ds_) ); // Populate MIN array MIN[iscale ] = (0.00647*pow((correction_*scaleVector.back()),1.41344)+19.7527)*chival_; MIN[iscale+nb_] = correction_*scaleVector.back(); // Calculate scalved wavelet ComplexArray scaledWavelet; switch (wavelet_type_) { case W_MORLET: scaledWavelet = F_Morlet(arrayK, scaleVector.back()); break; case W_PAUL : scaledWavelet = F_Paul(arrayK, scaleVector.back()); break; case W_NONE : return Analysis::ERR; // Sanity check } # ifdef DEBUG_WAVELET PrintComplex("wavelet_before_fft", scaledWavelet); # endif // Perform FFT pubfft.Forward( scaledWavelet ); // Normalize scaledWavelet.Normalize( one_over_sqrt_N ); # ifdef DEBUG_WAVELET PrintComplex("wavelet_after_fft", scaledWavelet); # endif FFT_of_Scaled_Wavelets.push_back( scaledWavelet ); } # ifdef DEBUG_WAVELET mprintf("DEBUG: Scaling factors:"); for (Darray::const_iterator sval = scaleVector.begin(); sval != scaleVector.end(); ++sval) mprintf(" %g", *sval); mprintf("\n"); mprintf("DEBUG: MIN:"); for (int i = 0; i != nb_; i++) mprintf(" %g", MIN[i]); mprintf("\n"); # endif // Step 3 - For each atom, calculate the convolution of scaled wavelets // with rows (atom distance vs frame) via dot product of the // frequency domains, i.e. Fourier-transformed, followed by an // inverse FT. DataSet_MatrixFlt& OUT = static_cast<DataSet_MatrixFlt&>( *output_ ); mprintf("\tMemory required for output matrix: %.2f MB\n", (double)Matrix<float>::sizeInBytes(nframes, natoms)/(1024.0*1024.0)); OUT.Allocate2D( nframes, natoms ); // Should initialize to zero Matrix<double> MAX; mprintf("\tMemory required for Max array: %.2f MB\n", (double)MAX.sizeInBytes(nframes, natoms)/(1024.0*1024.0)); MAX.resize( nframes, natoms ); Darray magnitude( nframes ); // Scratch space for calculating magnitude across rows for (int at = 0; at != natoms; at++) { ComplexArray AtomSignal( nframes ); // Initializes to zero // Calculate the distance variance for this atom and populate the array. int midx = at * nframes; // Index into d_matrix int cidx = 0; // Index into AtomSignal double d_avg = 0.0; double d_var = 0.0; for (int frm = 0; frm != nframes; frm++, cidx += 2, midx++) { d_avg += d_matrix[midx]; d_var += (d_matrix[midx] * d_matrix[midx]); AtomSignal[cidx] = d_matrix[midx]; } d_var = (d_var - ((d_avg * d_avg) / (double)nframes)) / ((double)(nframes - 1)); # ifdef DEBUG_WAVELET mprintf("VARIANCE: %g\n", d_var); # endif double var_norm = 1.0 / d_var; // Calculate FT of atom signal pubfft.Forward( AtomSignal ); # ifdef DEBUG_WAVELET PrintComplex("AtomSignal", AtomSignal); # endif // Normalize AtomSignal.Normalize( one_over_sqrt_N ); // Calculate dot product of atom signal with each scaled FT wavelet for (int iscale = 0; iscale != nb_; iscale++) { ComplexArray dot = AtomSignal.TimesComplexConj( FFT_of_Scaled_Wavelets[iscale] ); // Inverse FT of dot product pubfft.Back( dot ); # ifdef DEBUG_WAVELET PrintComplex("InverseFT_Dot", dot); # endif // Chi-squared testing midx = at * nframes; cidx = 0; for (int frm = 0; frm != nframes; frm++, cidx += 2, midx++) { magnitude[frm] = (dot[cidx]*dot[cidx] + dot[cidx+1]*dot[cidx+1]) * var_norm; if (magnitude[frm] < MIN[iscale]) magnitude[frm] = 0.0; if (magnitude[frm] > MAX[midx]) { MAX[midx] = magnitude[frm]; //Indices[midx] = iscale OUT[midx] = (float)(correction_ * scaleVector[iscale]); } } # ifdef DEBUG_WAVELET mprintf("DEBUG: AbsoluteValue:"); for (Darray::const_iterator dval = magnitude.begin(); dval != magnitude.end(); ++dval) mprintf(" %g", *dval); mprintf("\n"); # endif } // END loop over scales } // END loop over atoms # ifdef DEBUG_WAVELET // DEBUG: Print MAX CpptrajFile maxmatrixOut; // DEBUG maxmatrixOut.OpenWrite("maxmatrix.dat"); for (int col = 0; col != nframes; col++) { for (int row = 0; row != natoms; row++) maxmatrixOut.Printf("%g ", MAX.element(col, row)); maxmatrixOut.Printf("\n"); } maxmatrixOut.CloseFile(); # endif return Analysis::OK; }
// Cluster_DBSCAN::ComputeKdistMap() void Cluster_DBSCAN::ComputeKdistMap( Range const& Kvals, std::vector<int> const& FramesToCluster ) const { int pt1_idx, pt2_idx, d_idx, point; mprintf("\tCalculating Kdist map for %s\n", Kvals.RangeArg()); double* kdist_array; // Store distance of pt1 to every other point. int nframes = (int)FramesToCluster.size(); // Ensure all Kdist points are within proper range Range::const_iterator kval; for (kval = Kvals.begin(); kval != Kvals.end(); ++kval) if (*kval < 1 || *kval >= nframes) { mprinterr("Error: Kdist value %i is out of range (1 <= Kdist < %i)\n", *kval, nframes); return; } int nvals = (int)Kvals.Size(); double** KMAP; // KMAP[i] has the ith nearest point for each point. KMAP = new double*[ nvals ]; for (int i = 0; i != nvals; i++) KMAP[i] = new double[ nframes ]; ParallelProgress progress( nframes ); # ifdef _OPENMP # pragma omp parallel private(pt1_idx, pt2_idx, d_idx, kval, point, kdist_array) firstprivate(progress) { progress.SetThread( omp_get_thread_num() ); #endif kdist_array = new double[ nframes ]; # ifdef _OPENMP # pragma omp for # endif for (pt1_idx = 0; pt1_idx < nframes; pt1_idx++) // X { progress.Update( pt1_idx ); point = FramesToCluster[pt1_idx]; d_idx = 0; // Store distances from pt1 to pt2 for (pt2_idx = 0; pt2_idx != nframes; pt2_idx++) kdist_array[d_idx++] = FrameDistances_.GetFdist(point, FramesToCluster[pt2_idx]); // Sort distances; will be smallest to largest std::sort( kdist_array, kdist_array + nframes ); // Save the distance of specified nearest neighbors to this point. d_idx = 0; for (kval = Kvals.begin(); kval != Kvals.end(); ++kval) // Y KMAP[d_idx++][pt1_idx] = kdist_array[ *kval ]; } delete[] kdist_array; # ifdef _OPENMP } // END omp parallel # endif progress.Finish(); // Sort all of the individual kdist plots, smallest to largest. for (int i = 0; i != nvals; i++) std::sort(KMAP[i], KMAP[i] + nframes); // Save in matrix, largest to smallest. DataSet_MatrixDbl kmatrix; kmatrix.Allocate2D( FramesToCluster.size(), Kvals.Size() ); for (int y = 0; y != nvals; y++) { for (int x = nframes - 1; x != -1; x--) kmatrix.AddElement( KMAP[y][x] ); delete[] KMAP[y]; } delete[] KMAP; // Write matrix to file DataFile outfile; ArgList outargs("usemap"); outfile.SetupDatafile(k_prefix_ + "Kmatrix.gnu", outargs, debug_); outfile.AddDataSet( (DataSet*)&kmatrix ); outfile.WriteDataOut(); // Write out the largest and smallest values for each K. // This means for each value of K the point with the furthest Kth-nearest // neighbor etc. CpptrajFile maxfile; if (maxfile.OpenWrite(k_prefix_ + "Kmatrix.max.dat")) return; maxfile.Printf("%-12s %12s %12s\n", "#Kval", "MaxD", "MinD"); d_idx = 0; for (kval = Kvals.begin(); kval != Kvals.end(); ++kval, d_idx++) maxfile.Printf("%12i %12g %12g\n", *kval, kmatrix.GetElement(0, d_idx), kmatrix.GetElement(nframes-1, d_idx)); maxfile.CloseFile(); }