bool SDFfile::ID_SDF(CpptrajFile& fileIn) { // NOTE: ASSUMES FILE IS ALREADY SETUP! if (fileIn.OpenFile()) return false; // Search for V2000 somewhere in line 4 const char* ptr = 0; for (int i = 0; i < 4; i++) if ( (ptr = fileIn.NextLine()) == 0 ) { fileIn.CloseFile(); return false; } fileIn.CloseFile(); std::string line( ptr ); // Line 4, Connection table if ( line.find( "V2000" ) != std::string::npos ) return true; return false; }
/** \return true if TRR/TRJ file. */ bool Traj_GmxTrX::ID_TrajFormat(CpptrajFile& infile) { // File must already be set up for read if (infile.OpenFile()) return false; bool istrx = IsTRX(infile); infile.CloseFile(); return istrx; }
// DataIO_Std::WriteData() int DataIO_Std::WriteData(FileName const& fname, DataSetList const& SetList) { int err = 0; if (!SetList.empty()) { // Open output file. CpptrajFile file; if (file.OpenWrite( fname )) return 1; // Base write type off first data set dimension FIXME if (SetList[0]->Group() == DataSet::CLUSTERMATRIX) { // Special case of 2D - may have sieved frames. err = WriteCmatrix(file, SetList); } else if (SetList[0]->Ndim() == 1) { if (group_ == NO_TYPE) { if (isInverted_) err = WriteDataInverted(file, SetList); else err = WriteDataNormal(file, SetList); } else err = WriteByGroup(file, SetList, group_); } else if (SetList[0]->Ndim() == 2) err = WriteData2D(file, SetList); else if (SetList[0]->Ndim() == 3) err = WriteData3D(file, SetList); file.CloseFile(); } return err; }
// Action_ClusterDihedral::ReadDihedrals() int Action_ClusterDihedral::ReadDihedrals(std::string const& fname) { CpptrajFile infile; char buffer[256]; int a1, a2, a3, a4, bins; double min; if ( infile.OpenRead( fname ) ) return 1; mprintf("\tReading dihedral information from %s\n", fname.c_str()); while (infile.Gets(buffer, 256)==0) { // Expected line format: At#1 At#2 At#3 At#4 Bins Min // ATOM NUMBERS SHOULD START FROM 1! int nvals = sscanf(buffer, "%i %i %i %i %i %lf", &a1, &a2, &a3, &a4, &bins, &min); if (nvals < 5) { mprinterr("Error: Dihedral file %s: Expected at least 5 values, got %i\n", fname.c_str(), nvals); mprinterr("Error: Problem line: [%s]\n",buffer); mprinterr("Error: Expected format: At#1 At#2 At#3 At#4 Bins [Min]\n"); return 1; // This should automatically close infile through destructor. } if (nvals < 6) min = minimum_; DCmasks_.push_back( DCmask(a1-1, a2-1, a3-1, a4-1, bins, min ) ); mprintf("\t\t(%i)-(%i)-(%i)-(%i) Bins=%i Min=%.3f\n",a1,a2,a3,a4,bins,min); } mprintf("\tRead %zu dihedrals.\n", DCmasks_.size()); infile.CloseFile(); return 0; }
/** For each point p, calculate function Kdist(p) which is the distance of * the Kth nearest point to p. */ void Cluster_DBSCAN::ComputeKdist( int Kval, std::vector<int> const& FramesToCluster ) const { std::vector<double> dists; std::vector<double> Kdist; dists.reserve( FramesToCluster.size() ); Kdist.reserve( FramesToCluster.size() ); std::string outfilename = k_prefix_ + "Kdist." + integerToString(Kval) + ".dat"; mprintf("\tDBSCAN: Calculating Kdist(%i), output to %s\n", Kval, outfilename.c_str()); for (std::vector<int>::const_iterator point = FramesToCluster.begin(); point != FramesToCluster.end(); ++point) { // Store distances from this point dists.clear(); for (std::vector<int>::const_iterator otherpoint = FramesToCluster.begin(); otherpoint != FramesToCluster.end(); ++otherpoint) dists.push_back( FrameDistances_.GetFdist(*point, *otherpoint) ); // Sort distances - first dist should always be 0 std::sort(dists.begin(), dists.end()); Kdist.push_back( dists[Kval] ); } std::sort( Kdist.begin(), Kdist.end() ); CpptrajFile Outfile; Outfile.OpenWrite(outfilename); Outfile.Printf("%-8s %1i%-11s\n", "#Point", Kval,"-dist"); // Write out largest to smallest unsigned int ik = 0; for (std::vector<double>::reverse_iterator k = Kdist.rbegin(); k != Kdist.rend(); ++k, ++ik) Outfile.Printf("%8u %12.4f\n", ik, *k); Outfile.CloseFile(); }
bool Parm_CharmmPsf::ID_ParmFormat(CpptrajFile& fileIn) { // Assumes already set up if (fileIn.OpenFile()) return false; std::string nextLine = fileIn.GetLine(); if (nextLine.empty()) return false; bool isPSF = ( nextLine.compare(0, 3, "PSF") == 0 ); fileIn.CloseFile(); return isPSF; }
// PDBfile::ID_PDB() bool PDBfile::ID_PDB(CpptrajFile& fileIn) { // NOTE: ASSUME FILE SET UP FOR READ if (fileIn.OpenFile()) return false; std::string line1 = fileIn.GetLine(); std::string line2 = fileIn.GetLine(); fileIn.CloseFile(); if (!IsPDBkeyword( line1 )) return false; if (!IsPDBkeyword( line2 )) return false; return true; }
bool DataIO_OpenDx::ID_DataFormat( CpptrajFile& infile ) { bool isDX = false; if (!infile.OpenFile()) { std::string firstLine = infile.GetLine(); if (!firstLine.empty()) isDX = (firstLine.compare(0, 28, "object 1 class gridpositions") == 0); infile.CloseFile(); } return isDX; }
// DataIO_CCP4::ID_DataFormat() bool DataIO_CCP4::ID_DataFormat( CpptrajFile& infile ) { bool isCCP4 = false; if (!infile.OpenFile()) { unsigned char MAP[4]; if (infile.Seek(52 * wSize) == 0) { infile.Read( MAP, wSize ); isCCP4 = MapCharsValid( MAP ); } infile.CloseFile(); } return isCCP4; }
bool DataIO_XVG::ID_DataFormat(CpptrajFile& infile) { if (infile.OpenFile()) return false; const char* ptr = infile.NextLine(); while (ptr != 0 && ptr[0] == '#') { const char* cc = ptr; while (*cc != '\0') { if (*cc == 'G') { if ( cc[2] == 'R' && cc[4] == 'O' && cc[6] == 'M' && cc[8] == 'A' && cc[10] == 'A' && cc[12] == 'C' ) { infile.CloseFile(); mprintf("DEBUG:\tFound G R O M A C\n"); return true; } } ++cc; } ptr = infile.NextLine(); } infile.CloseFile(); return false; }
/** Determine if fileIn is a CIF file. Look for entries beginning with * an underscore (indicating data block), and a 'loop_' keyword or * '_entry.id' block. */ bool CIFfile::ID_CIF(CpptrajFile& fileIn) { // NOTE: ASSUME FILE SET UP FOR READ if (fileIn.OpenFile()) return false; int ndata = 0; // Number of '_XXX' entries seen bool foundLoop = false; bool foundEntryID = false; for (int i = 0; i < 10; i++) { std::string lineIn = fileIn.GetLine(); if (lineIn[0] == '_') ndata++; if (lineIn.compare(0,5,"loop_")==0) foundLoop = true; if (lineIn.compare(0,9,"_entry.id")==0) foundEntryID = true; } fileIn.CloseFile(); return ( ndata > 2 && (foundLoop || foundEntryID) ); }
// DataIO_Mdout::ID_DataFormat() bool DataIO_Mdout::ID_DataFormat(CpptrajFile& infile) { if (infile.OpenFile()) return false; bool isMdout = false; std::string line = infile.GetLine(); if (line[0] == '\n') { line = infile.GetLine(); if (line.compare(0, 15, " -----") == 0) { line = infile.GetLine(); if (line.compare(0, 15, " Amber") == 0) isMdout = true; } } infile.CloseFile(); return isMdout; }
bool Traj_CharmmCor::ID_TrajFormat(CpptrajFile& fileIn) { // File must already be set up for read. if (fileIn.OpenFile()) return false; bool isCor = false; const char* ptr = fileIn.NextLine(); // Must be at least 1 title line denoted with '*' if (ptr != 0 && *ptr == '*') { // Scan past all title lines while (ptr != 0 && *ptr == '*') ptr = fileIn.NextLine(); if (ptr != 0) { // Next line must be # atoms ONLY int ibuf[2]; if (sscanf(ptr, "%i %i", ibuf, ibuf+1) == 1) // make sure it was a valid integer isCor = (ibuf[0] > 0); } } fileIn.CloseFile(); return isCor; }
// Traj_AmberCoord::ID_TrajFormat() bool Traj_AmberCoord::ID_TrajFormat(CpptrajFile& fileIn) { // File must already be set up for read if (fileIn.OpenFile()) return false; if (fileIn.NextLine()==0) return false; // Title std::string buffer2 = fileIn.GetLine(); // REMD header/coords fileIn.CloseFile(); // Check if second line contains REMD/HREMD, Amber Traj with REMD header if ( IsRemdHeader( buffer2.c_str() ) ) { if (debug_>0) mprintf(" AMBER TRAJECTORY with (H)REMD header.\n"); hasREMD_ = REMD_HEADER_SIZE + (size_t)fileIn.IsDos(); return true; } // Check if we can read at least 3 coords of width 8, Amber trajectory float TrajCoord[3]; if ( sscanf(buffer2.c_str(), "%8f%8f%8f", TrajCoord, TrajCoord+1, TrajCoord+2) == 3 ) { if (debug_>0) mprintf(" AMBER TRAJECTORY file\n"); return true; } return false; }
bool TinkerFile::ID_Tinker(CpptrajFile& fileIn) { // NOTE: ASSUME FILE SET UP FOR READ if (fileIn.OpenFile()) return false; ArgList firstLine( fileIn.NextLine() ); ArgList secondLine( fileIn.NextLine() ); ArgList thirdLine( fileIn.NextLine() ); fileIn.CloseFile(); // First line should have <natom> <title> only int natom = 0; std::string title; if ( SetNatomAndTitle(firstLine, natom, title) != 0 ) return false; //mprinterr("Past SetNatomAndTitle\n"); if (secondLine.Nargs() == 6) { bool isBoxLine = true; for (int i = 0; i < 6; i++) { // It is a box line if all 6 tokens are doubles try { convertToDouble( secondLine.GetStringNext() ); } catch (std::runtime_error e) { if (i != 1) return false; // We found a non-double on the second character -- it could be an atom // name. Check that the rest of the line matches an atom record isBoxLine = false; break; } } // If we are here it is not a box line, so make sure if (!isBoxLine) { return IsAtomLine(secondLine); } else { // our second line WAS a box, now check the 3rd line return IsAtomLine(thirdLine); } } // There is no box, check that the second line is an atom line return IsAtomLine(secondLine); }
bool Traj_Gro::ID_TrajFormat(CpptrajFile& infile) { // Title line, atoms line, then resnum, resname, atomname, atomnum, X, Y, Z if (infile.OpenFile()) return false; int nread = 0; if (infile.NextLine() != 0) { // Title const char* ptr = infile.NextLine(); // Natom if (ptr != 0) { // Ensure only a single value on # atoms line std::string natom_str( ptr ); RemoveTrailingWhitespace( natom_str ); if (validInteger(natom_str)) { ptr = infile.NextLine(); // First atom if (ptr != 0) { char resnum[6], resname[6], atname[6], atnum[6]; float XYZ[3]; nread = sscanf(ptr, "%5c%5c%5c%5c%f %f %f", resnum, resname, atname, atnum, XYZ, XYZ+1, XYZ+2); } } } } infile.CloseFile(); return (nread == 7); }
/** Check for an integer (I5) followed by 0-2 scientific floats (E15.7) */ bool Traj_AmberRestart::ID_TrajFormat(CpptrajFile& fileIn) { // Assume file set up for read if (fileIn.OpenFile()) return false; bool isRestart = false; if ( fileIn.NextLine() !=0 ) { // Title const char* ptr = fileIn.NextLine(); // Natom [time [temp]] if (ptr != 0) { int i0; double D[3]; int nread = sscanf(ptr, "%5i%15lf%15lf%lf", &i0, D, D+1, D+2); if (nread > 0 && nread < 4) { // Read at least 3 12.7 coordinates from next line. ptr = fileIn.NextLine(); if (ptr != 0) { nread = sscanf(ptr, "%12lf%12lf%12lf", D, D+1, D+2); if (nread == 3) isRestart = true; } } } } fileIn.CloseFile(); return isRestart; }
// Traj_AmberCoord::ID_TrajFormat() bool Traj_AmberCoord::ID_TrajFormat(CpptrajFile& fileIn) { // File must already be set up for read if (fileIn.OpenFile()) return false; if (fileIn.NextLine()==0) return false; // Title std::string buffer2 = fileIn.GetLine(); // REMD header/coords fileIn.CloseFile(); // Check if second line contains REMD/HREMD, Amber Traj with REMD header if ( IsRemdHeader( buffer2.c_str() ) ) { if (debug_>0) mprintf(" AMBER TRAJECTORY with (H)REMD header.\n"); headerSize_ = REMD_HEADER_SIZE + (size_t)fileIn.IsDos(); tStart_ = 33; // 42 - 8 - 1 tEnd_ = 41; // 42 - 1 return true; } // TODO: Read these in as indices instead of temperatures if ( IsRxsgldHeader( buffer2.c_str() ) ) { mprintf(" AMBER TRAJECTORY with RXSGLD header.\n"); headerSize_ = RXSGLD_HEADER_SIZE + (size_t)fileIn.IsDos(); tStart_ = 35; // 44 - 8 - 1 tEnd_ = 43; // 44 - 1 return true; } // Check if we can read 3, 6, 9, or 10 coords (corresponding to 1, 2, 3 or // > 3 atoms) of width 8; Amber trajectory. float TrajCoord[10]; int nscan = sscanf(buffer2.c_str(), "%8f%8f%8f%8f%8f%8f%8f%8f%8f%8f", TrajCoord, TrajCoord+1, TrajCoord+2, TrajCoord+3, TrajCoord+4, TrajCoord+5, TrajCoord+6, TrajCoord+7, TrajCoord+8, TrajCoord+9); if (nscan == 3 || nscan == 6 || nscan == 9 || nscan == 10) { if (debug_>0) mprintf(" AMBER TRAJECTORY file\n"); return true; } return false; }
// Action_Spam::init() Action::RetType Action_Spam::Init(ArgList& actionArgs, TopologyList* PFL, DataSetList* DSL, DataFileList* DFL, int debugIn) { // Always use imaged distances InitImaging(true); // This is needed everywhere in this function scope FileName filename; // See if we're doing pure water. If so, we don't need a peak file purewater_ = actionArgs.hasKey("purewater"); if (purewater_) { // We still need the cutoff double cut = actionArgs.getKeyDouble("cut", 12.0); cut2_ = cut * cut; doublecut_ = 2 * cut; onecut2_ = 1 / cut2_; // See if we write to a data file datafile_ = actionArgs.GetStringKey("out"); // Generate the data set name, and hold onto the master data set list std::string ds_name = actionArgs.GetStringKey("name"); if (ds_name.empty()) ds_name = myDSL_.GenerateDefaultName("SPAM"); // We only have one data set averaging over every water. Add it here myDSL_.AddSet(DataSet::DOUBLE, ds_name, NULL); solvname_ = actionArgs.GetStringKey("solv"); if (solvname_.empty()) solvname_ = std::string("WAT"); }else { // Get the file name with the peaks defined in it filename.SetFileName( actionArgs.GetStringNext() ); if (filename.empty() || !File::Exists(filename)) { mprinterr("Spam: Error: Peak file [%s] does not exist!\n", filename.full()); return Action::ERR; } // Get the remaining optional arguments solvname_ = actionArgs.GetStringKey("solv"); if (solvname_.empty()) solvname_ = std::string("WAT"); reorder_ = actionArgs.hasKey("reorder"); bulk_ = actionArgs.getKeyDouble("bulk", 0.0); double cut = actionArgs.getKeyDouble("cut", 12.0); cut2_ = cut * cut; doublecut_ = 2 * cut; onecut2_ = 1 / cut2_; std::string infoname = actionArgs.GetStringKey("info"); if (infoname.empty()) infoname = std::string("spam.info"); infofile_ = DFL->AddCpptrajFile(infoname, "SPAM info"); if (infofile_ == 0) return Action::ERR; // The default maskstr is the Oxygen atom of the solvent summaryfile_ = actionArgs.GetStringKey("summary"); // Divide site size by 2 to make it half the edge length (or radius) site_size_ = actionArgs.getKeyDouble("site_size", 2.5) / 2.0; sphere_ = actionArgs.hasKey("sphere"); // If it's a sphere, square the radius to compare with if (sphere_) site_size_ *= site_size_; datafile_ = actionArgs.GetStringKey("out"); std::string ds_name = actionArgs.GetStringKey("name"); if (ds_name.empty()) ds_name = myDSL_.GenerateDefaultName("SPAM"); // Parse through the peaks file and extract the peaks CpptrajFile peakfile; if (peakfile.OpenRead(filename)) { mprinterr("SPAM: Error: Could not open %s for reading!\n", filename.full()); return Action::ERR; } std::string line = peakfile.GetLine(); int npeaks = 0; while (!line.empty()) { if (sscanf(line.c_str(), "%d", &npeaks) != 1) { line = peakfile.GetLine(); continue; } line = peakfile.GetLine(); break; } while (!line.empty()) { double x, y, z, dens; if (sscanf(line.c_str(), "C %lg %lg %lg %lg", &x, &y, &z, &dens) != 4) { line = peakfile.GetLine(); continue; } line = peakfile.GetLine(); peaks_.push_back(Vec3(x, y, z)); } peakfile.CloseFile(); // Check that our initial number of peaks matches our parsed peaks. Warn // otherwise if (npeaks != (int)peaks_.size()) mprinterr("SPAM: Warning: %s claims to have %d peaks, but really has %d!\n", filename.full(), npeaks, peaks_.size()); // Now add all of the data sets MetaData md(ds_name); for (int i = 0; i < (int)peaks_.size(); i++) { md.SetAspect( integerToString(i+1) ); // TODO: Should this be Idx? if (myDSL_.AddSet(DataSet::DOUBLE, md) == 0) return Action::ERR; // Add a new list of integers to keep track of omitted frames std::vector<int> vec; peakFrameData_.push_back(vec); } } // Print info now if (purewater_) { mprintf("SPAM: Calculating bulk value for pure solvent\n"); if (!datafile_.empty()) mprintf("SPAM: Printing solvent energies to %s\n", datafile_.c_str()); mprintf("SPAM: Using a %.2f Angstrom non-bonded cutoff with shifted EEL.\n", sqrt(cut2_)); if (reorder_) mprintf("SPAM: Warning: Re-ordering makes no sense for pure solvent.\n"); if (!summaryfile_.empty()) mprintf("SPAM: Printing solvent SPAM summary to %s\n", summaryfile_.c_str()); }else { mprintf("SPAM: Solvent [%s] density peaks taken from %s.\n", solvname_.c_str(), filename.base()); mprintf("SPAM: %d density peaks will be analyzed from %s.\n", peaks_.size(), filename.base()); mprintf("SPAM: Occupation information printed to %s.\n", infofile_->Filename().full()); mprintf("SPAM: Sites are "); if (sphere_) mprintf("spheres with diameter %.3lf\n", site_size_); else mprintf("boxes with edge length %.3lf\n", site_size_); if (reorder_) { mprintf("SPAM: Re-ordering trajectory so each site always has "); mprintf("the same water molecule.\n"); } if (summaryfile_.empty() && datafile_.empty()) { if (!reorder_) { mprinterr("SPAM: Error: Not re-ordering trajectory or calculating energies. "); mprinterr("Nothing to do!\n"); return Action::ERR; } mprintf("SPAM: Not calculating any SPAM energies\n"); }else { mprintf("SPAM: Using a non-bonded cutoff of %.2lf Ang. with a EEL shifting function.\n", sqrt(cut2_)); mprintf("SPAM: Bulk solvent SPAM energy taken as %.3lf kcal/mol\n", bulk_); } } mprintf("#Citation: Cui, G.; Swails, J.M.; Manas, E.S.; \"SPAM: A Simple Approach\n" "# for Profiling Bound Water Molecules\"\n" "# J. Chem. Theory Comput., 2013, 9 (12), pp 5539–5549.\n"); return Action::OK; }
int Cluster_DPeaks::ChoosePointsAutomatically() { // Right now all density values are discrete. Try to choose outliers at each // value for which there is density.; /* // For each point, calculate average distance (X,Y) to points in next and // previous density values. const double dens_cut = 3.0 * 3.0; const double dist_cut = 1.32 * 1.32; for (Carray::const_iterator point0 = Points_.begin(); point0 != Points_.end(); ++point0) { int Npts = 0; for (Carray::const_iterator point1 = Points_.begin(); point1 != Points_.end(); ++point1) { if (point0 != point1) { // Only do this for close densities double dX = (double)(point0->PointsWithinEps() - point1->PointsWithinEps()); double dX2 = dX * dX; double dY = (point0->Dist() - point1->Dist()); double dY2 = dY * dY; if (dX2 < dens_cut && dY2 < dist_cut) { Npts++; } } } mprintf("%i %i %i\n", point0->PointsWithinEps(), point0->Fnum()+1, Npts); } */ /* CpptrajFile tempOut; tempOut.OpenWrite("temp.dat"); int currentDensity = -1; double distAv = 0.0; double distSD = 0.0; double sumWts = 0.0; int nValues = 0; Carray::const_iterator lastPoint = Points_.end() + 1; for (Carray::const_iterator point = Points_.begin(); point != lastPoint; ++point) { if (point == Points_.end() || point->PointsWithinEps() != currentDensity) { if (nValues > 0) { distAv = distAv / sumWts; //(double)nValues; distSD = (distSD / sumWts) - (distAv * distAv); if (distSD > 0.0) distSD = sqrt(distSD); else distSD = 0.0; //mprintf("Density %i: %i values Avg= %g SD= %g SumWts= %g\n", currentDensity, // nValues, distAv, distSD, sumWts); tempOut.Printf("%i %g\n", currentDensity, distAv); } if (point == Points_.end()) break; currentDensity = point->PointsWithinEps(); distAv = 0.0; distSD = 0.0; sumWts = 0.0; nValues = 0; } double wt = exp(point->Dist()); double dval = point->Dist() * wt; sumWts += wt; distAv += dval; distSD += (dval * dval); nValues++; } tempOut.CloseFile(); */ // BEGIN CALCULATING WEIGHTED DISTANCE AVERAGE CpptrajFile tempOut; tempOut.OpenWrite("temp.dat"); DataSet_Mesh weightedAverage; Carray::const_iterator cp = Points_.begin(); // Skip local density of 0. //while (cp->PointsWithinEps() == 0 && cp != Points_.end()) ++cp; while (cp != Points_.end()) { int densityVal = cp->PointsWithinEps(); Carray densityArray; // Add all points of current density. while (cp->PointsWithinEps() == densityVal && cp != Points_.end()) densityArray.push_back( *(cp++) ); mprintf("Density value %i has %zu points.\n", densityVal, densityArray.size()); // Sort array by distance std::sort(densityArray.begin(), densityArray.end(), Cpoint::dist_sort()); // Take the average of the points weighted by their position. double wtDistAv = 0.0; double sumWts = 0.0; //std::vector<double> weights; //weights.reserve( densityArray.size() ); int maxPt = (int)densityArray.size() - 1; for (int ip = 0; ip != (int)densityArray.size(); ++ip) { double wt = exp( (double)(ip - maxPt) ); //mprintf("\t%10i %10u %10u %10g\n", densityVal, ip, maxPt, wt); wtDistAv += (densityArray[ip].Dist() * wt); sumWts += wt; //weights.push_back( wt ); } wtDistAv /= sumWts; // Calculate the weighted sample variance //double distSD = 0.0; //for (unsigned int ip = 0; ip != densityArray.size(); ++ip) { // double diff = densityArray[ip].Dist() - wtDistAv; // distSD += weights[ip] * (diff * diff); //} //distSD /= sumWts; weightedAverage.AddXY(densityVal, wtDistAv); //tempOut.Printf("%i %g %g %g\n", densityVal, wtDistAv, sqrt(distSD), sumWts); tempOut.Printf("%i %g %g\n", densityVal, wtDistAv, sumWts); /* // Find the median. double median, Q1, Q3; if (densityArray.size() == 1) { median = densityArray[0].Dist(); Q1 = median; Q3 = median; } else { unsigned int q3_beg; unsigned int med_idx = densityArray.size() / 2; // Always 0 <= Q1 < med_idx if ((densityArray.size() % 2) == 0) { median = (densityArray[med_idx].Dist() + densityArray[med_idx-1].Dist()) / 2.0; q3_beg = med_idx; } else { median = densityArray[med_idx].Dist(); q3_beg = med_idx + 1; } if (densityArray.size() == 2) { Q1 = densityArray[0].Dist(); Q3 = densityArray[1].Dist(); } else { // Find lower quartile unsigned int q1_idx = med_idx / 2; if ((med_idx % 2) == 0) Q1 = (densityArray[q1_idx].Dist() + densityArray[q1_idx-1].Dist()) / 2.0; else Q1 = densityArray[q1_idx].Dist(); // Find upper quartile unsigned int q3_size = densityArray.size() - q3_beg; unsigned int q3_idx = (q3_size / 2) + q3_beg; if ((q3_size %2) == 0) Q3 = (densityArray[q3_idx].Dist() + densityArray[q3_idx-1].Dist()) / 2.0; else Q3 = densityArray[q3_idx].Dist(); } } mprintf("\tMedian dist value is %g. Q1= %g Q3= %g\n", median, Q1, Q3); */ } tempOut.CloseFile(); // END CALCULATING WEIGHTED DISTANCE AVERAGE /* // TEST tempOut.OpenWrite("temp2.dat"); std::vector<double> Hist( Points_.back().PointsWithinEps()+1, 0.0 ); int gWidth = 3; double cval = 3.0; double two_c_squared = 2.0 * cval * cval; mprintf("DBG: cval= %g, Gaussian denominator is %g\n", cval, two_c_squared); for (int wtIdx = 0; wtIdx != (int)weightedAverage.Size(); wtIdx++) { int bval = weightedAverage.X(wtIdx); for (int xval = std::max(bval - gWidth, 0); xval != std::min(bval + gWidth + 1, (int)Hist.size()); xval++) { // a: height (weighted average) // b: center (density value) // c: width // x: density value in histogram //int xval = weightedAverage.X(idx); //double bval = weightedAverage.X(wtIdx); //double bval = (double)wtIdx; double diff = (double)(xval - bval); //Hist[xval] += (weightedAverage.Y(wtIdx) * exp( -( (diff * diff) / two_c_squared ) )); Hist[xval] = std::max(Hist[xval], weightedAverage.Y(wtIdx) * exp( -( (diff * diff) / two_c_squared ) )); } } for (unsigned int idx = 0; idx != Hist.size(); idx++) tempOut.Printf("%u %g\n", idx, Hist[idx]); tempOut.CloseFile(); // END TEST */ /* // TEST // Construct best-fit line segments tempOut.OpenWrite("temp2.dat"); double slope, intercept, correl; int segment_length = 3; DataSet_Mesh Segment; Segment.Allocate1D( segment_length ); for (int wtIdx = 0; wtIdx != (int)weightedAverage.Size(); wtIdx++) { Segment.Clear(); for (int idx = std::max(wtIdx - 1, 0); // TODO: use segment_length idx != std::min(wtIdx + 2, (int)weightedAverage.Size()); idx++) Segment.AddXY(weightedAverage.X(idx), weightedAverage.Y(idx)); Segment.LinearRegression(slope, intercept, correl, true); for (int idx = std::max(wtIdx - 1, 0); // TODO: use segment_length idx != std::min(wtIdx + 2, (int)weightedAverage.Size()); idx++) { double x = weightedAverage.X(idx); double y = slope * x + intercept; tempOut.Printf("%g %g %i\n", x, y, weightedAverage.X(wtIdx)); } } tempOut.CloseFile(); // END TEST */ // BEGIN WEIGHTED RUNNING AVG/SD OF DISTANCES // For each point, determine if it is greater than the average of the // weighted average distances of the previous, current, and next densities. int width = 2; int currentDensity = 0; int wtIdx = 0; double currentAvg = 0.0; double deltaSD = 0.0; double deltaAv = 0.0; int Ndelta = 0; CpptrajFile raOut; if (!rafile_.empty()) raOut.OpenWrite(rafile_); CpptrajFile raDelta; if (!radelta_.empty()) raDelta.OpenWrite(radelta_); std::vector<unsigned int> candidateIdxs; std::vector<double> candidateDeltas; cp = Points_.begin(); // Skip over points with zero density while (cp != Points_.end() && cp->PointsWithinEps() == 0) ++cp; while (weightedAverage.X(wtIdx) != cp->PointsWithinEps() && wtIdx < (int)Points_.size()) ++wtIdx; for (Carray::const_iterator point = cp; point != Points_.end(); ++point) { if (point->PointsWithinEps() != currentDensity) { //currentAvg = weightedAverage.Y(wtIdx); // New density value. Determine average. currentAvg = 0.0; // unsigned int Npt = 0; double currentWt = 0.0; for (int idx = std::max(wtIdx - width, 0); idx != std::min(wtIdx + width + 1, (int)weightedAverage.Size()); idx++) { //currentAvg += weightedAverage.Y(idx); //Npt++; double wt = weightedAverage.Y(idx); currentAvg += (weightedAverage.Y(idx) * wt); currentWt += wt; } //currentAvg /= (double)Npt; currentAvg /= currentWt; //smoothAv += currentAvg; //smoothSD += (currentAvg * currentAvg); //Nsmooth++; currentDensity = point->PointsWithinEps(); if (raOut.IsOpen()) raOut.Printf("%i %g %g\n", currentDensity, currentAvg, weightedAverage.Y(wtIdx)); wtIdx++; } double delta = (point->Dist() - currentAvg); if (delta > 0.0) { //delta *= log((double)currentDensity); if (raDelta.IsOpen()) raDelta.Printf("%8i %8.3f %8i %8.3f %8.3f\n", currentDensity, delta, point->Fnum()+1, point->Dist(), currentAvg); candidateIdxs.push_back( point - Points_.begin() ); candidateDeltas.push_back( delta ); deltaAv += delta; deltaSD += (delta * delta); Ndelta++; } } raOut.CloseFile(); deltaAv /= (double)Ndelta; deltaSD = (deltaSD / (double)Ndelta) - (deltaAv * deltaAv); if (deltaSD > 0.0) deltaSD = sqrt(deltaSD); else deltaSD = 0.0; if (raDelta.IsOpen()) raDelta.Printf("#DeltaAvg= %g DeltaSD= %g\n", deltaAv, deltaSD); raDelta.CloseFile(); int cnum = 0; for (unsigned int i = 0; i != candidateIdxs.size(); i++) { if (candidateDeltas[i] > (deltaSD)) { Points_[candidateIdxs[i]].SetCluster( cnum++ ); mprintf("\tPoint %u (frame %i, density %i) selected as candidate for cluster %i\n", candidateIdxs[i], Points_[candidateIdxs[i]].Fnum()+1, Points_[candidateIdxs[i]].PointsWithinEps(), cnum-1); } } // END WEIGHTED AVG/SD OF DISTANCES /* // Currently doing this by calculating the running average of density vs // distance, then choosing points with distance > twice the SD of the // running average. // NOTE: Store in a mesh data set for now in case we want to spline etc later. if (avg_factor_ < 1) avg_factor_ = 10; unsigned int window_size = Points_.size() / (unsigned int)avg_factor_; mprintf("\tRunning avg window size is %u\n", window_size); // FIXME: Handle case where window_size < frames DataSet_Mesh runavg; unsigned int ra_size = Points_.size() - window_size + 1; runavg.Allocate1D( ra_size ); double dwindow = (double)window_size; double sumx = 0.0; double sumy = 0.0; for (unsigned int i = 0; i < window_size; i++) { sumx += (double)Points_[i].PointsWithinEps(); sumy += Points_[i].Dist(); } runavg.AddXY( sumx / dwindow, sumy / dwindow ); for (unsigned int i = 1; i < ra_size; i++) { unsigned int nextwin = i + window_size - 1; unsigned int prevwin = i - 1; sumx = (double)Points_[nextwin].PointsWithinEps() - (double)Points_[prevwin].PointsWithinEps() + sumx; sumy = Points_[nextwin].Dist() - Points_[prevwin].Dist() + sumy; runavg.AddXY( sumx / dwindow, sumy / dwindow ); } // Write running average if (!rafile_.empty()) { CpptrajFile raOut; if (raOut.OpenWrite(rafile_)) mprinterr("Error: Could not open running avg file '%s' for write.\n", rafile_.c_str()); else { for (unsigned int i = 0; i != runavg.Size(); i++) raOut.Printf("%g %g\n", runavg.X(i), runavg.Y(i)); raOut.CloseFile(); } } double ra_sd; double ra_avg = runavg.Avg( ra_sd ); // Double stdev to use as cutoff for findning anomalously high peaks. ra_sd *= 2.0; mprintf("\tAvg of running avg set is %g, SD*2.0 (delta cutoff) is %g\n", ra_avg, ra_sd); // For each point in density vs distance plot, determine which running // average point is closest. If the difference between the point and the // running average point is > 2.0 the SD of the running average data, // consider it a 'peak'. CpptrajFile raDelta; if (!radelta_.empty()) raDelta.OpenWrite("radelta.dat"); if (raDelta.IsOpen()) raDelta.Printf("%-10s %10s %10s\n", "#Frame", "RnAvgPos", "Delta"); unsigned int ra_position = 0; // Position in the runavg DataSet unsigned int ra_end = runavg.Size() - 1; int cnum = 0; for (Carray::iterator point = Points_.begin(); point != Points_.end(); ++point) { if (ra_position != ra_end) { // Is the next running avgd point closer to this point? while (ra_position != ra_end) { double dens = (double)point->PointsWithinEps(); double diff0 = fabs( dens - runavg.X(ra_position ) ); double diff1 = fabs( dens - runavg.X(ra_position+1) ); if (diff1 < diff0) ++ra_position; // Next running avg position is closer for this point. else break; // This position is closer. } } double delta = point->Dist() - runavg.Y(ra_position); if (raDelta.IsOpen()) raDelta.Printf("%-10i %10u %10g", point->Fnum()+1, ra_position, delta); if (delta > ra_sd) { if (raDelta.IsOpen()) raDelta.Printf(" POTENTIAL CLUSTER %i", cnum); point->SetCluster(cnum++); } if (raDelta.IsOpen()) raDelta.Printf("\n"); } raDelta.CloseFile(); */ return cnum; }
// ----------------------------------------------------------------------------- int Cluster_DPeaks::Cluster_DiscreteDensity() { mprintf("\tStarting DPeaks clustering, discrete density calculation.\n"); Points_.clear(); // First determine which frames are being clustered. for (int frame = 0; frame < (int)FrameDistances_.Nframes(); ++frame) if (!FrameDistances_.IgnoringRow( frame )) Points_.push_back( Cpoint(frame) ); // Sanity check. if (Points_.size() < 2) { mprinterr("Error: Only 1 frame in initial clustering.\n"); return 1; } // For each point, determine how many others are within epsilon. Also // determine maximum distance between any two points. mprintf("\tDetermining local density of each point.\n"); ProgressBar cluster_progress( Points_.size() ); double maxDist = -1.0; for (Carray::iterator point0 = Points_.begin(); point0 != Points_.end(); ++point0) { cluster_progress.Update(point0 - Points_.begin()); int density = 0; for (Carray::const_iterator point1 = Points_.begin(); point1 != Points_.end(); ++point1) { if (point0 != point1) { double dist = FrameDistances_.GetFdist(point0->Fnum(), point1->Fnum()); maxDist = std::max(maxDist, dist); if ( dist < epsilon_ ) density++; } } point0->SetPointsWithinEps( density ); } mprintf("DBG: Max dist= %g\n", maxDist); // DEBUG: Frame/Density CpptrajFile fdout; fdout.OpenWrite("fd.dat"); for (Carray::const_iterator point = Points_.begin(); point != Points_.end(); ++point) fdout.Printf("%i %i\n", point->Fnum()+1, point->PointsWithinEps()); fdout.CloseFile(); // Sort by density here. Otherwise array indices will be invalid later. std::sort( Points_.begin(), Points_.end(), Cpoint::pointsWithinEps_sort() ); // For each point, find the closest point that has higher density. Since // array is now sorted by density the last point has the highest density. Points_.back().SetDist( maxDist ); mprintf("\tFinding closest neighbor point with higher density for each point.\n"); unsigned int lastidx = Points_.size() - 1; cluster_progress.SetupProgress( lastidx ); for (unsigned int idx0 = 0; idx0 != lastidx; idx0++) { cluster_progress.Update( idx0 ); double min_dist = maxDist; int nearestIdx = -1; // Index of nearest neighbor with higher density Cpoint& point0 = Points_[idx0]; //mprintf("\nDBG:\tSearching for nearest neighbor to idx %u with higher density than %i.\n", // idx0, point0.PointsWithinEps()); // Since array is sorted by density we can start at the next point. for (unsigned int idx1 = idx0+1; idx1 != Points_.size(); idx1++) { Cpoint const& point1 = Points_[idx1]; double dist1_2 = FrameDistances_.GetFdist(point0.Fnum(), point1.Fnum()); if (point1.PointsWithinEps() > point0.PointsWithinEps()) { if (dist1_2 < min_dist) { min_dist = dist1_2; nearestIdx = (int)idx1; //mprintf("DBG:\t\tNeighbor idx %i is closer (density %i, distance %g)\n", // nearestIdx, point1.PointsWithinEps(), min_dist); } } } point0.SetDist( min_dist ); //mprintf("DBG:\tClosest point to %u with higher density is %i (distance %g)\n", // idx0, nearestIdx, min_dist); point0.SetNearestIdx( nearestIdx ); } // Plot density vs distance for each point. if (!dvdfile_.empty()) { CpptrajFile output; if (output.OpenWrite(dvdfile_)) mprinterr("Error: Could not open density vs distance plot '%s' for write.\n", dvdfile_.c_str()); // TODO: Make fatal? else { output.Printf("%-10s %10s %s %10s %10s\n", "#Density", "Distance", "Frame", "Idx", "Neighbor"); for (Carray::const_iterator point = Points_.begin(); point != Points_.end(); ++point) output.Printf("%-10i %10g \"%i\" %10u %10i\n", point->PointsWithinEps(), point->Dist(), point->Fnum()+1, point-Points_.begin(), point->NearestIdx()); output.CloseFile(); } } return 0; }
// ----------------------------------------------------------------------------- int Cluster_DPeaks::Cluster_GaussianKernel() { mprintf("\tStarting DPeaks clustering. Using Gaussian kernel to calculate density.\n"); // First determine which frames are being clustered. Points_.clear(); int oidx = 0; for (int frame = 0; frame < (int)FrameDistances_.Nframes(); ++frame) if (!FrameDistances_.IgnoringRow( frame )) Points_.push_back( Cpoint(frame, oidx++) ); // Sanity check. if (Points_.size() < 2) { mprinterr("Error: Only 1 frame in initial clustering.\n"); return 1; } // Sort distances std::vector<float> Distances; for (ClusterMatrix::const_iterator mat = FrameDistances_.begin(); mat != FrameDistances_.end(); ++mat) Distances.push_back( *mat ); std::sort( Distances.begin(), Distances.end() ); unsigned int idx = (unsigned int)((double)Distances.size() * 0.02); double bandwidth = (double)Distances[idx]; mprintf("idx= %u, bandwidth= %g\n", idx, bandwidth); // Density via Gaussian kernel double maxDist = -1.0; for (unsigned int i = 0; i != Points_.size(); i++) { for (unsigned int j = i+1; j != Points_.size(); j++) { double dist = FrameDistances_.GetFdist(Points_[i].Fnum(), Points_[j].Fnum()); maxDist = std::max( maxDist, dist ); dist /= bandwidth; double gk = exp(-(dist *dist)); Points_[i].AddDensity( gk ); Points_[j].AddDensity( gk ); } } mprintf("Max dist= %g\n", maxDist); CpptrajFile rhoOut; rhoOut.OpenWrite("rho.dat"); for (unsigned int i = 0; i != Points_.size(); i++) rhoOut.Printf("%u %g\n", i+1, Points_[i].Density()); rhoOut.CloseFile(); // Sort by density, descending std::stable_sort( Points_.begin(), Points_.end(), Cpoint::density_sort_descend() ); CpptrajFile ordrhoOut; ordrhoOut.OpenWrite("ordrho.dat"); for (unsigned int i = 0; i != Points_.size(); i++) ordrhoOut.Printf("%u %g %i %i\n", i+1, Points_[i].Density(), Points_[i].Fnum()+1, Points_[i].Oidx()+1); ordrhoOut.CloseFile(); // Determine minimum distances int first_idx = Points_[0].Oidx(); Points_[first_idx].SetDist( -1.0 ); Points_[first_idx].SetNearestIdx(-1); for (unsigned int ii = 1; ii != Points_.size(); ii++) { int ord_i = Points_[ii].Oidx(); Points_[ord_i].SetDist( maxDist ); for (unsigned int jj = 0; jj != ii; jj++) { int ord_j = Points_[jj].Oidx(); double dist = FrameDistances_.GetFdist(Points_[ord_i].Fnum(), Points_[ord_j].Fnum()); if (dist < Points_[ord_i].Dist()) { Points_[ord_i].SetDist( dist ); Points_[ord_j].SetNearestIdx( ord_j ); } } } if (!dvdfile_.empty()) { CpptrajFile output; if (output.OpenWrite(dvdfile_)) return 1; for (Carray::const_iterator point = Points_.begin(); point != Points_.end(); ++point) output.Printf("%g %g %i\n", point->Density(), point->Dist(), point->NearestIdx()+1); output.CloseFile(); } return 0; }
/** Header is 256 4-byte words. Integer unless otherwise noted. First 56 words are: * 0-2: columns, rows, sections (fastest changing to slowest) * 3: mode: 0 = envelope stored as signed bytes (from -128 lowest to 127 highest) * 1 = Image stored as Integer*2 * 2 = Image stored as Reals * 3 = Transform stored as Complex Integer*2 * 4 = Transform stored as Complex Reals * 5 == 0 * 4-6: Column, row, and section offsets * 7-9: Intervals along X, Y, Z * 10-15: float; 3x cell lengths (Ang) and 3x cell angles (deg) * 16-18: Map of which axes correspond to cols, rows, sections (1,2,3 = x,y,z) * 19-21: float; Min, max, and mean density * 22-24: Space group, bytes used for storing symm ops, flag for skew transform * If skew flag != 0, skew transformation is from standard orthogonal * coordinate frame (as used for atoms) to orthogonal map frame, as: * Xo(map) = S * (Xo(atoms) - t) * 25-33: Skew matrix 'S' (in order S11, S12, S13, S21 etc) * 34-36: Skew translation 't' * 37-51: For future use and can be skipped. * 52: char; 'MAP ' * 53: char; machine stamp for determining endianness * 54: float; RMS deviation of map from mean * 55: Number of labels */ int DataIO_CCP4::ReadData(FileName const& fname, DataSetList& datasetlist, std::string const& dsname) { CpptrajFile infile; if (infile.OpenRead( fname )) return 1; // Read first 56 words of the header into a buffer. headerbyte buffer; if (infile.Read(buffer.i, 224*sizeof(unsigned char)) < 1) { mprinterr("Error: Could not buffer CCP4 header.\n"); return 1; } if (debug_ > 0) mprintf("DEBUG: MAP= '%c %c %c %c' MACHST= '%x %x %x %x'\n", buffer.c[208], buffer.c[209], buffer.c[210], buffer.c[211], buffer.c[212], buffer.c[213], buffer.c[214], buffer.c[215]); // SANITY CHECK if (!MapCharsValid(buffer.c + 208)) { mprinterr("Error: CCP4 file missing 'MAP ' string at word 53\n"); return 1; } // Check endianess bool isBigEndian = (buffer.c[212] == 0x11 && buffer.c[213] == 0x11 && buffer.c[214] == 0x00 && buffer.c[215] == 0x00); if (!isBigEndian) { if (debug_ > 0) mprintf("DEBUG: Little endian.\n"); // SANITY CHECK if ( !(buffer.c[212] == 0x44 && buffer.c[213] == 0x41 && buffer.c[214] == 0x00 && buffer.c[215] == 0x00) ) mprintf("Warning: Invalid machine stamp: %x %x %x %x : assuming little endian.\n", buffer.c[212], buffer.c[213], buffer.c[214], buffer.c[215]); } else { if (debug_ > 0) mprintf("DEBUG: Big endian.\n"); // Perform endian swapping on header if necessary endian_swap(buffer.i, 56); } // Print DEBUG info if (debug_ > 0) { mprintf("DEBUG: Columns=%i Rows=%i Sections=%i\n", buffer.i[0], buffer.i[1], buffer.i[2]); mprintf("DEBUG: Mode=%i\n", buffer.i[3]); mprintf("DEBUG: Offsets: C=%i R=%i S=%i\n", buffer.i[4], buffer.i[5], buffer.i[6]); mprintf("DEBUG: NXYZ={ %i %i %i }\n", buffer.i[7], buffer.i[8], buffer.i[9]); mprintf("DEBUG: Box XYZ={ %f %f %f } ABG={ %f %f %f }\n", buffer.f[10], buffer.f[11], buffer.f[12], buffer.f[13], buffer.f[14], buffer.f[15]); mprintf("DEBUG: Map: ColAxis=%i RowAxis=%i SecAxis=%i\n", buffer.i[16], buffer.i[17], buffer.i[18]); mprintf("DEBUG: SpaceGroup#=%i SymmOpBytes=%i SkewFlag=%i\n", buffer.i[22], buffer.i[23], buffer.i[24]); const int* MSKEW = buffer.i + 25; mprintf("DEBUG: Skew matrix: %i %i %i\n" " %i %i %i\n" " %i %i %i\n", MSKEW[0], MSKEW[1], MSKEW[2], MSKEW[3], MSKEW[4], MSKEW[5], MSKEW[6], MSKEW[7], MSKEW[8]); const int* TSKEW = buffer.i + 34; mprintf("DEBUG: Skew translation: %i %i %i\n", TSKEW[0], TSKEW[1], TSKEW[2]); mprintf("DEBUG: Nlabels=%i\n", buffer.i[55]); } // Check input data. Only support mode 2 for now. if (buffer.i[3] != 2) { mprinterr("Error: Mode %i; currently only mode 2 for CCP4 files is supported.\n", buffer.i[3]); return 1; } // Check offsets. if (buffer.i[4] != 0 || buffer.i[5] != 0 || buffer.i[6] != 0) mprintf("Warning: Non-zero offsets present. This is not yet supported and will be ignored.\n"); // Check that mapping is col=x row=y section=z if (buffer.i[16] != 1 || buffer.i[17] != 2 || buffer.i[18] != 3) { mprinterr("Error: Currently only support cols=X, rows=Y, sections=Z\n"); return 1; } if (buffer.i[24] != 0) { mprintf("Warning: Skew information present but not yet supported and will be ignored.\n"); return 1; } // Read 10 80 character text labels char Labels[801]; Labels[800] = '\0'; infile.Read( Labels, 200*wSize ); mprintf("\t%s\n", Labels); // Symmetry records: operators separated by * and grouped into 'lines' of 80 characters int NsymmRecords = buffer.i[23] / 80; if (NsymmRecords > 0) { char symBuffer[81]; mprintf("\t%i symmetry records.\n", NsymmRecords); for (int ib = 0; ib != NsymmRecords; ib++) { infile.Gets( symBuffer, 80 ); mprintf("\t%s\n", symBuffer); } } // Add grid data set. Default to float for now. DataSet* gridDS = datasetlist.AddSet( DataSet::GRID_FLT, dsname, "GRID" ); if (gridDS == 0) return 1; DataSet_GridFlt& grid = static_cast<DataSet_GridFlt&>( *gridDS ); // Allocate grid from dims and spacing. FIXME OK to assume zero origin? if (grid.Allocate_N_O_Box( buffer.i[7], buffer.i[8], buffer.i[9], Vec3(0.0), Box(buffer.f + 10) ) != 0) { mprinterr("Error: Could not allocate grid.\n"); return 1; } // FIXME: Grids are currently indexed so Z is fastest changing. // Should be able to change indexing in grid DataSet. size_t mapSize = buffer.i[7] * buffer.i[8] * buffer.i[9]; mprintf("\tCCP4 map has %zu elements\n", mapSize); mprintf("\tDensity: Min=%f Max=%f Mean=%f RMS=%f\n", buffer.f[19], buffer.f[20], buffer.f[21], buffer.f[54]); std::vector<float> mapbuffer( mapSize ); int mapBytes = mapSize * wSize; int numRead = infile.Read( &mapbuffer[0], mapBytes ); if (numRead < 1) { mprinterr("Error: Could not read CCP4 map data.\n"); return 1; } else if (numRead < mapBytes) mprintf("Warning: Expected %i bytes, read only %i bytes\n", mapBytes, numRead); if (isBigEndian) endian_swap(&mapbuffer[0], mapSize); // FIXME: Place data into grid DataSet with correct ordering. int gidx = 0; int NXY = buffer.i[7] * buffer.i[8]; for (int ix = 0; ix != buffer.i[7]; ix++) for (int iy = 0; iy != buffer.i[8]; iy++) for (int iz = 0; iz != buffer.i[9]; iz++) { int midx = (iz * NXY) + (iy * buffer.i[7]) + ix; grid[gidx++] = mapbuffer[midx]; } infile.CloseFile(); return 0; }
/** Open the Charmm PSF file specified by filename and set up topology data. * Mask selection requires natom, nres, names, resnames, resnums. */ int Parm_CharmmPsf::ReadParm(FileName const& fname, Topology &parmOut) { const size_t TAGSIZE = 10; char tag[TAGSIZE]; tag[0]='\0'; CpptrajFile infile; if (infile.OpenRead(fname)) return 1; mprintf(" Reading Charmm PSF file %s as topology file.\n",infile.Filename().base()); // Read the first line, should contain PSF... const char* buffer = 0; if ( (buffer=infile.NextLine()) == 0 ) return 1; // Advance to <ntitle> !NTITLE int ntitle = FindTag(tag, "!NTITLE", 7, infile); // Only read in 1st title. Skip any asterisks. std::string psftitle; if (ntitle > 0) { buffer = infile.NextLine(); const char* ptr = buffer; while (*ptr != '\0' && (*ptr == ' ' || *ptr == '*')) ++ptr; psftitle.assign( ptr ); } parmOut.SetParmName( NoTrailingWhitespace(psftitle), infile.Filename() ); // Advance to <natom> !NATOM int natom = FindTag(tag, "!NATOM", 6, infile); if (debug_>0) mprintf("\tPSF: !NATOM tag found, natom=%i\n", natom); // If no atoms, probably issue with PSF file if (natom < 1) { mprinterr("Error: No atoms in PSF file.\n"); return 1; } // Read the next natom lines int psfresnum = 0; char psfresname[6]; char psfname[6]; char psftype[6]; double psfcharge; double psfmass; for (int atom=0; atom < natom; atom++) { if ( (buffer=infile.NextLine()) == 0 ) { mprinterr("Error: ReadParmPSF(): Reading atom %i\n",atom+1); return 1; } // Read line // ATOM# SEGID RES# RES ATNAME ATTYPE CHRG MASS (REST OF COLUMNS ARE LIKELY FOR CMAP AND CHEQ) sscanf(buffer,"%*i %*s %i %s %s %s %lf %lf",&psfresnum, psfresname, psfname, psftype, &psfcharge, &psfmass); parmOut.AddTopAtom( Atom( psfname, psfcharge, psfmass, psftype), Residue( psfresname, psfresnum, ' ', ' '), 0 ); } // END loop over atoms // Advance to <nbond> !NBOND int bondatoms[9]; int nbond = FindTag(tag, "!NBOND", 6, infile); if (nbond > 0) { if (debug_>0) mprintf("\tPSF: !NBOND tag found, nbond=%i\n", nbond); int nlines = nbond / 4; if ( (nbond % 4) != 0) nlines++; for (int bondline=0; bondline < nlines; bondline++) { if ( (buffer=infile.NextLine()) == 0 ) { mprinterr("Error: ReadParmPSF(): Reading bond line %i\n",bondline+1); return 1; } // Each line has 4 pairs of atom numbers int nbondsread = sscanf(buffer,"%i %i %i %i %i %i %i %i",bondatoms,bondatoms+1, bondatoms+2,bondatoms+3, bondatoms+4,bondatoms+5, bondatoms+6,bondatoms+7); // NOTE: Charmm atom nums start from 1 for (int bondidx=0; bondidx < nbondsread; bondidx+=2) parmOut.AddBond(bondatoms[bondidx]-1, bondatoms[bondidx+1]-1); } } else mprintf("Warning: PSF has no bonds.\n"); // Advance to <nangles> !NTHETA int nangle = FindTag(tag, "!NTHETA", 7, infile); if (nangle > 0) { if (debug_>0) mprintf("\tPSF: !NTHETA tag found, nangle=%i\n", nangle); int nlines = nangle / 3; if ( (nangle % 3) != 0) nlines++; for (int angleline=0; angleline < nlines; angleline++) { if ( (buffer=infile.NextLine()) == 0) { mprinterr("Error: Reading angle line %i\n", angleline+1); return 1; } // Each line has 3 groups of 3 atom numbers int nanglesread = sscanf(buffer,"%i %i %i %i %i %i %i %i %i",bondatoms,bondatoms+1, bondatoms+2,bondatoms+3, bondatoms+4,bondatoms+5, bondatoms+6,bondatoms+7, bondatoms+8); for (int angleidx=0; angleidx < nanglesread; angleidx += 3) parmOut.AddAngle( bondatoms[angleidx ]-1, bondatoms[angleidx+1]-1, bondatoms[angleidx+2]-1 ); } } else mprintf("Warning: PSF has no angles.\n"); // Advance to <ndihedrals> !NPHI int ndihedral = FindTag(tag, "!NPHI", 5, infile); if (ndihedral > 0) { if (debug_>0) mprintf("\tPSF: !NPHI tag found, ndihedral=%i\n", ndihedral); int nlines = ndihedral / 2; if ( (ndihedral % 2) != 0) nlines++; for (int dihline = 0; dihline < nlines; dihline++) { if ( (buffer=infile.NextLine()) == 0) { mprinterr("Error: Reading dihedral line %i\n", dihline+1); return 1; } // Each line has 2 groups of 4 atom numbers int ndihread = sscanf(buffer,"%i %i %i %i %i %i %i %i",bondatoms,bondatoms+1, bondatoms+2,bondatoms+3, bondatoms+4,bondatoms+5, bondatoms+6,bondatoms+7); for (int dihidx=0; dihidx < ndihread; dihidx += 4) parmOut.AddDihedral( bondatoms[dihidx ]-1, bondatoms[dihidx+1]-1, bondatoms[dihidx+2]-1, bondatoms[dihidx+3]-1 ); } } else mprintf("Warning: PSF has no dihedrals.\n"); mprintf("\tPSF contains %i atoms, %i residues.\n", parmOut.Natom(), parmOut.Nres()); infile.CloseFile(); return 0; }
int Parm_CharmmPsf::WriteParm(FileName const& fname, Topology const& parm) { // TODO: CMAP etc info CpptrajFile outfile; if (outfile.OpenWrite(fname)) return 1; // Write PSF outfile.Printf("PSF\n\n"); // Write title std::string titleOut = parm.ParmName(); titleOut.resize(78); outfile.Printf("%8i !NTITLE\n* %-78s\n\n", 1, titleOut.c_str()); // Write NATOM section outfile.Printf("%8i !NATOM\n", parm.Natom()); unsigned int idx = 1; // Make fake segment ids for now. char segid[2]; segid[0] = 'A'; segid[1] = '\0'; mprintf("Warning: Assigning single letter segment IDs.\n"); int currentMol = 0; bool inSolvent = false; for (Topology::atom_iterator atom = parm.begin(); atom != parm.end(); ++atom, ++idx) { int resnum = atom->ResNum(); if (atom->MolNum() != currentMol) { if (!inSolvent) { inSolvent = parm.Mol(atom->MolNum()).IsSolvent(); currentMol = atom->MolNum(); segid[0]++; } else inSolvent = parm.Mol(atom->MolNum()).IsSolvent(); } // TODO: Print type name for xplor-like PSF int typeindex = atom->TypeIndex() + 1; // If type begins with digit, assume charmm numbers were read as // type. Currently Amber types all begin with letters. if (isdigit(atom->Type()[0])) typeindex = convertToInteger( *(atom->Type()) ); // ATOM# SEGID RES# RES ATNAME ATTYPE CHRG MASS (REST OF COLUMNS ARE LIKELY FOR CMAP AND CHEQ) outfile.Printf("%8i %-4s %-4i %-4s %-4s %4i %14.6G %9g %10i\n", idx, segid, parm.Res(resnum).OriginalResNum(), parm.Res(resnum).c_str(), atom->c_str(), typeindex, atom->Charge(), atom->Mass(), 0); } outfile.Printf("\n"); // Write NBOND section outfile.Printf("%8u !NBOND: bonds\n", parm.Bonds().size() + parm.BondsH().size()); idx = 1; for (BondArray::const_iterator bond = parm.BondsH().begin(); bond != parm.BondsH().end(); ++bond, ++idx) { outfile.Printf("%8i%8i", bond->A1()+1, bond->A2()+1); if ((idx % 4)==0) outfile.Printf("\n"); } for (BondArray::const_iterator bond = parm.Bonds().begin(); bond != parm.Bonds().end(); ++bond, ++idx) { outfile.Printf("%8i%8i", bond->A1()+1, bond->A2()+1); if ((idx % 4)==0) outfile.Printf("\n"); } if ((idx % 4)!=0) outfile.Printf("\n"); outfile.Printf("\n"); // Write NTHETA section outfile.Printf("%8u !NTHETA: angles\n", parm.Angles().size() + parm.AnglesH().size()); idx = 1; for (AngleArray::const_iterator ang = parm.AnglesH().begin(); ang != parm.AnglesH().end(); ++ang, ++idx) { outfile.Printf("%8i%8i%8i", ang->A1()+1, ang->A2()+1, ang->A3()+1); if ((idx % 3)==0) outfile.Printf("\n"); } for (AngleArray::const_iterator ang = parm.Angles().begin(); ang != parm.Angles().end(); ++ang, ++idx) { outfile.Printf("%8i%8i%8i", ang->A1()+1, ang->A2()+1, ang->A3()+1); if ((idx % 3)==0) outfile.Printf("\n"); } if ((idx % 3)==0) outfile.Printf("\n"); outfile.Printf("\n"); // Write out NPHI section outfile.Printf("%8u !NPHI: dihedrals\n", parm.Dihedrals().size() + parm.DihedralsH().size()); idx = 1; for (DihedralArray::const_iterator dih = parm.DihedralsH().begin(); dih != parm.DihedralsH().end(); ++dih, ++idx) { outfile.Printf("%8i%8i%8i%8i", dih->A1()+1, dih->A2()+1, dih->A3()+1, dih->A4()+1); if ((idx % 2)==0) outfile.Printf("\n"); } for (DihedralArray::const_iterator dih = parm.Dihedrals().begin(); dih != parm.Dihedrals().end(); ++dih, ++idx) { outfile.Printf("%8i%8i%8i%8i", dih->A1()+1, dih->A2()+1, dih->A3()+1, dih->A4()+1); if ((idx % 2)==0) outfile.Printf("\n"); } if ((idx % 2)==0) outfile.Printf("\n"); outfile.Printf("\n"); outfile.CloseFile(); return 0; }
// Analysis_Wavelet::Analyze() Analysis::RetType Analysis_Wavelet::Analyze() { // Step 1 - Create a matrix that is #atoms rows by #frames - 1 cols, // where matrix(frame, atom) is the distance that atom has // travelled from the previous frame. // TODO: Implement this in Action_Matrix()? mprintf(" WAVELET:\n"); // First set up atom mask. if (coords_->Top().SetupIntegerMask( mask_ )) return Analysis::ERR; mask_.MaskInfo(); int natoms = mask_.Nselected(); int nframes = (int)coords_->Size(); if (natoms < 1 || nframes < 2) { mprinterr("Error: Not enough frames (%i) or atoms (%i) in '%s'\n", nframes, natoms, coords_->legend()); return Analysis::ERR; } Matrix<double> d_matrix; mprintf("\t%i frames, %i atoms, distance matrix will require %.2f MB\n", (double)d_matrix.sizeInBytes(nframes, natoms) / (1024.0*1024.0)); d_matrix.resize(nframes, natoms); // Get initial frame. Frame currentFrame, lastFrame; currentFrame.SetupFrameFromMask( mask_, coords_->Top().Atoms() ); lastFrame = currentFrame; coords_->GetFrame( 0, lastFrame, mask_ ); // Iterate over frames for (int frm = 1; frm != nframes; frm++) { coords_->GetFrame( frm, currentFrame, mask_ ); int idx = frm; // Position in distance matrix; start at column 'frame' for (int at = 0; at != natoms; at++, idx += nframes) // Distance of atom in currentFrame from its position in lastFrame. d_matrix[idx] = sqrt(DIST2_NoImage( currentFrame.XYZ(at), lastFrame.XYZ(at) )); //lastFrame = currentFrame; // TODO: Re-enable? } # ifdef DEBUG_WAVELET // DEBUG: Write matrix to file. CpptrajFile dmatrixOut; // DEBUG dmatrixOut.OpenWrite("dmatrix.dat"); Matrix<double>::iterator mval = d_matrix.begin(); for (int row = 0; row != natoms; row++) { for (int col = 0; col != nframes; col++) dmatrixOut.Printf("%g ", *(mval++)); dmatrixOut.Printf("\n"); } dmatrixOut.CloseFile(); # endif // Precompute some factors for calculating scaled wavelets. const double one_over_sqrt_N = 1.0 / sqrt(static_cast<double>( nframes )); std::vector<int> arrayK( nframes ); arrayK[0] = -1 * (nframes/2); for (int i = 1; i != nframes; i++) arrayK[i] = arrayK[i-1] + 1; # ifdef DEBUG_WAVELET mprintf("DEBUG: K:"); for (std::vector<int>::const_iterator kval = arrayK.begin(); kval != arrayK.end(); ++kval) mprintf(" %i", *kval); mprintf("\n"); # endif // Step 2 - Get FFT of wavelet for each scale. PubFFT pubfft; pubfft.SetupFFTforN( nframes ); mprintf("\tMemory required for scaled wavelet array: %.2f MB\n", (double)(2 * nframes * nb_ * sizeof(double)) / (1024 * 1024)); typedef std::vector<ComplexArray> WaveletArray; WaveletArray FFT_of_Scaled_Wavelets; FFT_of_Scaled_Wavelets.reserve( nb_ ); typedef std::vector<double> Darray; Darray scaleVector; scaleVector.reserve( nb_ ); Darray MIN( nb_ * 2 ); for (int iscale = 0; iscale != nb_; iscale++) { // Calculate and store scale factor. scaleVector.push_back( S0_ * pow(2.0, iscale * ds_) ); // Populate MIN array MIN[iscale ] = (0.00647*pow((correction_*scaleVector.back()),1.41344)+19.7527)*chival_; MIN[iscale+nb_] = correction_*scaleVector.back(); // Calculate scalved wavelet ComplexArray scaledWavelet; switch (wavelet_type_) { case W_MORLET: scaledWavelet = F_Morlet(arrayK, scaleVector.back()); break; case W_PAUL : scaledWavelet = F_Paul(arrayK, scaleVector.back()); break; case W_NONE : return Analysis::ERR; // Sanity check } # ifdef DEBUG_WAVELET PrintComplex("wavelet_before_fft", scaledWavelet); # endif // Perform FFT pubfft.Forward( scaledWavelet ); // Normalize scaledWavelet.Normalize( one_over_sqrt_N ); # ifdef DEBUG_WAVELET PrintComplex("wavelet_after_fft", scaledWavelet); # endif FFT_of_Scaled_Wavelets.push_back( scaledWavelet ); } # ifdef DEBUG_WAVELET mprintf("DEBUG: Scaling factors:"); for (Darray::const_iterator sval = scaleVector.begin(); sval != scaleVector.end(); ++sval) mprintf(" %g", *sval); mprintf("\n"); mprintf("DEBUG: MIN:"); for (int i = 0; i != nb_; i++) mprintf(" %g", MIN[i]); mprintf("\n"); # endif // Step 3 - For each atom, calculate the convolution of scaled wavelets // with rows (atom distance vs frame) via dot product of the // frequency domains, i.e. Fourier-transformed, followed by an // inverse FT. DataSet_MatrixFlt& OUT = static_cast<DataSet_MatrixFlt&>( *output_ ); mprintf("\tMemory required for output matrix: %.2f MB\n", (double)Matrix<float>::sizeInBytes(nframes, natoms)/(1024.0*1024.0)); OUT.Allocate2D( nframes, natoms ); // Should initialize to zero Matrix<double> MAX; mprintf("\tMemory required for Max array: %.2f MB\n", (double)MAX.sizeInBytes(nframes, natoms)/(1024.0*1024.0)); MAX.resize( nframes, natoms ); Darray magnitude( nframes ); // Scratch space for calculating magnitude across rows for (int at = 0; at != natoms; at++) { ComplexArray AtomSignal( nframes ); // Initializes to zero // Calculate the distance variance for this atom and populate the array. int midx = at * nframes; // Index into d_matrix int cidx = 0; // Index into AtomSignal double d_avg = 0.0; double d_var = 0.0; for (int frm = 0; frm != nframes; frm++, cidx += 2, midx++) { d_avg += d_matrix[midx]; d_var += (d_matrix[midx] * d_matrix[midx]); AtomSignal[cidx] = d_matrix[midx]; } d_var = (d_var - ((d_avg * d_avg) / (double)nframes)) / ((double)(nframes - 1)); # ifdef DEBUG_WAVELET mprintf("VARIANCE: %g\n", d_var); # endif double var_norm = 1.0 / d_var; // Calculate FT of atom signal pubfft.Forward( AtomSignal ); # ifdef DEBUG_WAVELET PrintComplex("AtomSignal", AtomSignal); # endif // Normalize AtomSignal.Normalize( one_over_sqrt_N ); // Calculate dot product of atom signal with each scaled FT wavelet for (int iscale = 0; iscale != nb_; iscale++) { ComplexArray dot = AtomSignal.TimesComplexConj( FFT_of_Scaled_Wavelets[iscale] ); // Inverse FT of dot product pubfft.Back( dot ); # ifdef DEBUG_WAVELET PrintComplex("InverseFT_Dot", dot); # endif // Chi-squared testing midx = at * nframes; cidx = 0; for (int frm = 0; frm != nframes; frm++, cidx += 2, midx++) { magnitude[frm] = (dot[cidx]*dot[cidx] + dot[cidx+1]*dot[cidx+1]) * var_norm; if (magnitude[frm] < MIN[iscale]) magnitude[frm] = 0.0; if (magnitude[frm] > MAX[midx]) { MAX[midx] = magnitude[frm]; //Indices[midx] = iscale OUT[midx] = (float)(correction_ * scaleVector[iscale]); } } # ifdef DEBUG_WAVELET mprintf("DEBUG: AbsoluteValue:"); for (Darray::const_iterator dval = magnitude.begin(); dval != magnitude.end(); ++dval) mprintf(" %g", *dval); mprintf("\n"); # endif } // END loop over scales } // END loop over atoms # ifdef DEBUG_WAVELET // DEBUG: Print MAX CpptrajFile maxmatrixOut; // DEBUG maxmatrixOut.OpenWrite("maxmatrix.dat"); for (int col = 0; col != nframes; col++) { for (int row = 0; row != natoms; row++) maxmatrixOut.Printf("%g ", MAX.element(col, row)); maxmatrixOut.Printf("\n"); } maxmatrixOut.CloseFile(); # endif return Analysis::OK; }
// DataIO_CCP4::WriteSet3D() int DataIO_CCP4::WriteSet3D( DataSetList::const_iterator const& setIn, CpptrajFile& outfile ) { if ((*setIn)->Size() < 1) return 1; // SANITY CHECK: No empty grid allowed if ((*setIn)->Ndim() != 3) { mprinterr("Internal Error: DataSet %s in DataFile %s has %zu dimensions, expected 3.\n", (*setIn)->legend(), outfile.Filename().full(), (*setIn)->Ndim()); return 1; } DataSet_3D const& grid = static_cast<DataSet_3D const&>( *(*setIn) ); // Check input grid Vec3 OXYZ = grid.GridOrigin(); if (OXYZ[0] < 0.0 || OXYZ[1] < 0.0 || OXYZ[2] < 0.0 || OXYZ[0] > 0.0 || OXYZ[1] > 0.0 || OXYZ[2] > 0.0) mprintf("Warning: Grid '%s' origin is not 0.0, 0.0, 0.0\n" "Warning: Origin other than 0.0 not yet supported for CCP4 write.\n"); // Set default title if none set if (title_.empty()) title_.assign("CPPTRAJ CCP4 map volumetric data, set '" + grid.Meta().Legend() + "'. Format revision A."); // Check that title is not too big if (title_.size() > 800) { mprintf("Warning: CCP4 title is too large, truncating.\n"); title_.resize( 800 ); } // Set up and write header headerbyte buffer; buffer.i[0] = (int)grid.NX(); buffer.i[1] = (int)grid.NY(); buffer.i[2] = (int)grid.NZ(); buffer.i[3] = 2; // Only mode 2 supported buffer.i[4] = 0; // No offsets buffer.i[5] = 0; buffer.i[6] = 0; buffer.i[7] = (int)grid.NX(); buffer.i[8] = (int)grid.NY(); buffer.i[9] = (int)grid.NZ(); Box box( grid.Ucell() ); buffer.f[10] = (float)box[0]; buffer.f[11] = (float)box[1]; buffer.f[12] = (float)box[2]; buffer.f[13] = (float)box[3]; buffer.f[14] = (float)box[4]; buffer.f[15] = (float)box[5]; buffer.i[16] = 1; // Cols = X buffer.i[17] = 2; // Rows = Y buffer.i[18] = 3; // Secs = Z // Determine min, max, and mean of data double mean = grid[0]; double gmin = grid[0]; double gmax = grid[0]; double rmsd = grid[0] * grid[0]; for (unsigned int i = 1; i < grid.Size(); i++) { gmin = std::min(grid[i], gmin); gmax = std::max(grid[i], gmax); mean += grid[i]; rmsd += grid[i] * grid[i]; } mean /= (double)grid.Size(); rmsd /= (double)grid.Size(); rmsd = rmsd - (mean * mean); if (rmsd > 0.0) rmsd = sqrt(rmsd); else rmsd = 0.0; mprintf("\t%s\n", title_.c_str()); mprintf("\tDensity: Min=%f Max=%f Mean=%f RMS=%f\n", gmin, gmax, mean, rmsd); buffer.f[19] = (float)gmin; buffer.f[20] = (float)gmax; buffer.f[21] = (float)mean; buffer.i[22] = 1; // Assume P1 buffer.i[23] = 0; // No bytes for symmetry ops buffer.i[24] = 0; // No skew transform // Skew matrix (S11, S12, S13, S21, ...) and translation; 12 total, followed // by 15 'future use'; zero all. std::fill( buffer.i+25, buffer.i+52, 0 ); // MAP and machine precision. FIXME determine endianness! buffer.c[208] = 'M'; buffer.c[209] = 'A'; buffer.c[210] = 'P'; buffer.c[211] = ' '; buffer.c[212] = 0x44; // little endian buffer.c[213] = 0x41; buffer.c[214] = 0x00; buffer.c[215] = 0x00; // Determine RMS deviation from mean. buffer.f[54] = (float)rmsd; // Determine number of labels being used buffer.i[55] = (int)title_.size() / 80; if ( ((int)title_.size() % 80) != 0) buffer.i[55]++; outfile.Write( buffer.c, 224*sizeof(unsigned char) ); // Write labels; 10 lines, 80 chars each outfile.Write( title_.c_str(), title_.size() ); // FIXME this seems wasteful. std::vector<char> remainder( 800 - title_.size(), 0 ); outfile.Write( &remainder[0], remainder.size() ); remainder.clear(); // No symmetry bytes // Store data in buffer, then write. X changes fastest. // SANITY CHECK; This will result in invalid files if size of float is not 4. if (sizeof(float) != wSize) mprintf("Warning: Size of float on this system is %zu, not 4.\n" "Warning: Resulting CCP4 file data will not conform to standard.\n", sizeof(float)); std::vector<float> mapbuffer( grid.Size() ); std::vector<float>::iterator it = mapbuffer.begin(); for (unsigned int iz = 0; iz != grid.NZ(); iz++) for (unsigned int iy = 0; iy != grid.NY(); iy++) for (unsigned int ix = 0; ix != grid.NX(); ix++) *(it++) = grid.GetElement( ix, iy, iz ); outfile.Write( &mapbuffer[0], mapbuffer.size() * sizeof(float) ); outfile.CloseFile(); return 0; }
/** Load Karplus parameters from input file. * Expected format: * - {type}<+|-| ><a[4]><+|-| ><b[4]><+|-| ><c[4]><+|-| ><d[4]><A[6]><B[6]><C[6]>{<D[6]>} * <reslabel[4]>* * \return 0 on success, 1 on error */ int Action_Jcoupling::loadKarplus(std::string filename) { char buffer[512],residue[5]; char *end, *ptr; int i; CpptrajFile KarplusFile; karplusConstant KC; karplusConstantList* currentResList=0; std::string CurrentRes; karplusConstantMap::iterator reslist; if (filename.empty()) { mprinterr("Error: jcoupling: Could not find Karplus parameter file.\n"); return 1; } if (KarplusFile.OpenRead( filename )) { mprinterr("Error: jcoupling: Could not read Karplus parameter file %s\n", filename.c_str()); mprinterr("Error: Ensure the file exists and is readable.\n"); return 1; } // residue is only for reading in 4 chars for residue names residue[4]='\0'; // Read through all lines of the file while (KarplusFile.Gets(buffer,512)==0) { // Skip newlines and comments if (buffer[0]=='\n' || buffer[0]=='#') continue; ptr=buffer; // First char is optional type. If optional type is C, then the Karplus // function specified in Perez et al. JACS (2001) 123 will be used, and // A, B, and C will be taken as C0, C1, and C2. if(ptr[0]=='C') { KC.type=1; ptr++; } else { KC.type=0; } // Read atom names with optional preceding character (+, -) for (i=0; i<4; i++) { if (*ptr=='+') KC.offset[i]=1; else if (*ptr=='-') KC.offset[i]=-1; else KC.offset[i]=0; ++ptr; char *endchar = ptr + 4; char savechar = *endchar; *endchar = '\0'; KC.atomName[i] = ptr; *endchar = savechar; ptr += 4; //mprintf("DEBUG:\tAtomName %i [%s]\n",i,KC.atomName[i]); } // Read parameters // NOTE: Using sscanf here instead of atof since the 4th parameter is // optional, behavior is undefined for accessing uninitialized // portion of buffer. i = sscanf(ptr, "%6lf%6lf%6lf%6lf",KC.C,KC.C+1,KC.C+2,KC.C+3); if (i<3) { mprintf("Error: jcoupling: Expected at least 3 Karplus parameters, got %i\n",i); mprintf(" Line: [%s]\n",buffer); return 1; } else if (i==3) KC.C[3]=0.0; KC.C[3]*=Constants::DEGRAD; // Place the read-in karplus constants in a map indexed by residue name // so that all karplus constants for a given residue are in one place. KarplusFile.Gets(buffer,512); // end will hold the end of the read-in buffer string end = buffer + strlen(buffer); for (ptr = buffer; ptr < end; ptr+=4) { if (*ptr=='\n') continue; residue[0] = ptr[0]; residue[1] = ptr[1]; residue[2] = ptr[2]; residue[3] = ptr[3]; CurrentRes.assign(residue); //mprintf("DEBUG:\t[%s]\n",CurrentRes.c_str()); reslist = KarplusConstants_.find(CurrentRes); if (reslist == KarplusConstants_.end() ) { // List does not exist for residue yet, create it. currentResList = new karplusConstantList; KarplusConstants_.insert( reslist, std::pair<std::string,karplusConstantList*>( CurrentRes,currentResList) ); } else // Retrieve list for residue. currentResList = (*reslist).second; currentResList->push_back(KC); ++Nconstants_; } // END loop over residues in residue line } // END Gets over input file KarplusFile.CloseFile(); // DEBUG - Print out all parameters if (debug_>0) { mprintf(" KARPLUS PARAMETERS:\n"); for (reslist=KarplusConstants_.begin(); reslist!=KarplusConstants_.end(); ++reslist) { mprintf("\t[%4s]\n",(*reslist).first.c_str()); for (karplusConstantList::iterator kc = currentResList->begin(); kc != currentResList->end(); ++kc) { mprintf("\t\t%1i",(*kc).type); mprintf(" %4s",*((*kc).atomName[0])); mprintf(" %4s",*((*kc).atomName[1])); mprintf(" %4s",*((*kc).atomName[2])); mprintf(" %4s",*((*kc).atomName[3])); mprintf(" %i %i %i %i",(*kc).offset[0],(*kc).offset[1],(*kc).offset[2],(*kc).offset[3]); mprintf(" %6.2lf %6.2lf %6.2lf %6.2lf\n",(*kc).C[0],(*kc).C[1],(*kc).C[2],(*kc).C[3]); } } } return 0; }
// Cluster_DBSCAN::ComputeKdistMap() void Cluster_DBSCAN::ComputeKdistMap( Range const& Kvals, std::vector<int> const& FramesToCluster ) const { int pt1_idx, pt2_idx, d_idx, point; mprintf("\tCalculating Kdist map for %s\n", Kvals.RangeArg()); double* kdist_array; // Store distance of pt1 to every other point. int nframes = (int)FramesToCluster.size(); // Ensure all Kdist points are within proper range Range::const_iterator kval; for (kval = Kvals.begin(); kval != Kvals.end(); ++kval) if (*kval < 1 || *kval >= nframes) { mprinterr("Error: Kdist value %i is out of range (1 <= Kdist < %i)\n", *kval, nframes); return; } int nvals = (int)Kvals.Size(); double** KMAP; // KMAP[i] has the ith nearest point for each point. KMAP = new double*[ nvals ]; for (int i = 0; i != nvals; i++) KMAP[i] = new double[ nframes ]; ParallelProgress progress( nframes ); # ifdef _OPENMP # pragma omp parallel private(pt1_idx, pt2_idx, d_idx, kval, point, kdist_array) firstprivate(progress) { progress.SetThread( omp_get_thread_num() ); #endif kdist_array = new double[ nframes ]; # ifdef _OPENMP # pragma omp for # endif for (pt1_idx = 0; pt1_idx < nframes; pt1_idx++) // X { progress.Update( pt1_idx ); point = FramesToCluster[pt1_idx]; d_idx = 0; // Store distances from pt1 to pt2 for (pt2_idx = 0; pt2_idx != nframes; pt2_idx++) kdist_array[d_idx++] = FrameDistances_.GetFdist(point, FramesToCluster[pt2_idx]); // Sort distances; will be smallest to largest std::sort( kdist_array, kdist_array + nframes ); // Save the distance of specified nearest neighbors to this point. d_idx = 0; for (kval = Kvals.begin(); kval != Kvals.end(); ++kval) // Y KMAP[d_idx++][pt1_idx] = kdist_array[ *kval ]; } delete[] kdist_array; # ifdef _OPENMP } // END omp parallel # endif progress.Finish(); // Sort all of the individual kdist plots, smallest to largest. for (int i = 0; i != nvals; i++) std::sort(KMAP[i], KMAP[i] + nframes); // Save in matrix, largest to smallest. DataSet_MatrixDbl kmatrix; kmatrix.Allocate2D( FramesToCluster.size(), Kvals.Size() ); for (int y = 0; y != nvals; y++) { for (int x = nframes - 1; x != -1; x--) kmatrix.AddElement( KMAP[y][x] ); delete[] KMAP[y]; } delete[] KMAP; // Write matrix to file DataFile outfile; ArgList outargs("usemap"); outfile.SetupDatafile(k_prefix_ + "Kmatrix.gnu", outargs, debug_); outfile.AddDataSet( (DataSet*)&kmatrix ); outfile.WriteDataOut(); // Write out the largest and smallest values for each K. // This means for each value of K the point with the furthest Kth-nearest // neighbor etc. CpptrajFile maxfile; if (maxfile.OpenWrite(k_prefix_ + "Kmatrix.max.dat")) return; maxfile.Printf("%-12s %12s %12s\n", "#Kval", "MaxD", "MinD"); d_idx = 0; for (kval = Kvals.begin(); kval != Kvals.end(); ++kval, d_idx++) maxfile.Printf("%12i %12g %12g\n", *kval, kmatrix.GetElement(0, d_idx), kmatrix.GetElement(nframes-1, d_idx)); maxfile.CloseFile(); }