// Parm_CIF::ReadParm() int Parm_CIF::ReadParm(FileName const& fname, Topology &TopIn) { CIFfile infile; CIFfile::DataBlock::data_it line; if (infile.Read( fname, debug_ )) return 1; CIFfile::DataBlock const& block = infile.GetDataBlock("_atom_site"); if (block.empty()) { mprinterr("Error: CIF data block '_atom_site' not found.\n"); return 1; } // Does this CIF contain multiple models? int Nmodels = 0; int model_col = block.ColumnIndex("pdbx_PDB_model_num"); if (model_col != -1) { line = block.end(); --line; Nmodels = convertToInteger( (*line)[model_col] ); if (Nmodels > 1) mprintf("Warning: CIF '%s' contains %i models. Using first model for topology.\n", fname.full(), Nmodels); } // Get essential columns int COL[NENTRY]; for (int i = 0; i < (int)NENTRY; i++) { COL[i] = block.ColumnIndex(Entries[i]); if (COL[i] == -1) { mprinterr("Error: In CIF file '%s' could not find entry '%s' in block '%s'\n", fname.full(), Entries[i], block.Header().c_str()); return 1; } if (debug_>0) mprintf("DEBUG: '%s' column = %i\n", Entries[i], COL[i]); } // Get optional columns int occ_col = block.ColumnIndex("occupancy"); int bfac_col = block.ColumnIndex("B_iso_or_equiv"); int icode_col = block.ColumnIndex("pdbx_PDB_ins_code"); int altloc_col = block.ColumnIndex("label_alt_id"); std::vector<AtomExtra> extra; // Loop over all atom sites int current_res = 0; double XYZ[3]; double occupancy = 1.0; double bfactor = 0.0; char altloc = ' '; char icode; icode = ' '; Frame Coords; for (line = block.begin(); line != block.end(); ++line) { // If more than 1 model check if we are done. if (Nmodels > 1) { if ( convertToInteger( (*line)[model_col] ) > 1 ) break; } if (occ_col != -1) occupancy = convertToDouble( (*line)[ occ_col ] ); if (bfac_col != -1) bfactor = convertToDouble( (*line)[ bfac_col ] ); if (altloc_col != -1) altloc = (*line)[ altloc_col ][0]; // '.' altloc means blank? if (altloc == '.') altloc = ' '; extra.push_back( AtomExtra(occupancy, bfactor, altloc) ); if (icode_col != -1) { icode = (*line)[ icode_col ][0]; // '?' icode means blank if (icode == '?') icode = ' '; } XYZ[0] = convertToDouble( (*line)[ COL[X] ] ); XYZ[1] = convertToDouble( (*line)[ COL[Y] ] ); XYZ[2] = convertToDouble( (*line)[ COL[Z] ] ); NameType currentResName( (*line)[ COL[RNAME] ] ); // It seems that in some CIF files, there doesnt have to be a residue // number. Check if residue name has changed. if ( (*line)[ COL[RNUM] ][0] == '.' ) { Topology::res_iterator lastResidue = TopIn.ResEnd(); --lastResidue; if ( currentResName != (*lastResidue).Name() ) current_res = TopIn.Nres() + 1; } else current_res = convertToInteger( (*line)[ COL[RNUM] ] ); TopIn.AddTopAtom( Atom((*line)[ COL[ANAME] ], " "), Residue(currentResName, current_res, icode, (*line)[ COL[CHAINID] ][0]) ); Coords.AddXYZ( XYZ ); } if (TopIn.SetExtraAtomInfo( 0, extra )) return 1; // Search for bonds // FIXME nobondsearch? BondSearch( TopIn, Coords, Offset_, debug_ ); // Get title. CIFfile::DataBlock const& entryblock = infile.GetDataBlock("_entry"); std::string ciftitle; if (!entryblock.empty()) ciftitle = entryblock.Data("id"); TopIn.SetParmName( ciftitle, infile.CIFname() ); // Get unit cell parameters if present. CIFfile::DataBlock const& cellblock = infile.GetDataBlock("_cell"); if (!cellblock.empty()) { double cif_box[6]; cif_box[0] = convertToDouble( cellblock.Data("length_a") ); cif_box[1] = convertToDouble( cellblock.Data("length_b") ); cif_box[2] = convertToDouble( cellblock.Data("length_c") ); cif_box[3] = convertToDouble( cellblock.Data("angle_alpha") ); cif_box[4] = convertToDouble( cellblock.Data("angle_beta" ) ); cif_box[5] = convertToDouble( cellblock.Data("angle_gamma") ); mprintf("\tRead cell info from CIF: a=%g b=%g c=%g alpha=%g beta=%g gamma=%g\n", cif_box[0], cif_box[1], cif_box[2], cif_box[3], cif_box[4], cif_box[5]); TopIn.SetParmBox( Box(cif_box) ); } return 0; }
int SequenceAlign(CpptrajState& State, ArgList& argIn) { std::string blastfile = argIn.GetStringKey("blastfile"); if (blastfile.empty()) { mprinterr("Error: 'blastfile' must be specified.\n"); return 1; } ReferenceFrame qref = State.DSL()->GetReferenceFrame(argIn); if (qref.error() || qref.empty()) { mprinterr("Error: Must specify reference structure for query.\n"); return 1; } std::string outfilename = argIn.GetStringKey("out"); if (outfilename.empty()) { mprinterr("Error: Must specify output file.\n"); return 1; } TrajectoryFile::TrajFormatType fmt = TrajectoryFile::GetFormatFromArg(argIn); if (fmt != TrajectoryFile::PDBFILE && fmt != TrajectoryFile::MOL2FILE) fmt = TrajectoryFile::PDBFILE; // Default to PDB int smaskoffset = argIn.getKeyInt("smaskoffset", 0) + 1; int qmaskoffset = argIn.getKeyInt("qmaskoffset", 0) + 1; // Load blast file mprintf("\tReading BLAST alignment from '%s'\n", blastfile.c_str()); BufferedLine infile; if (infile.OpenFileRead( blastfile )) return 1; // Seek down to first Query line. const char* ptr = infile.Line(); bool atFirstQuery = false; while (ptr != 0) { if (*ptr == 'Q') { if ( strncmp(ptr, "Query", 5) == 0 ) { atFirstQuery = true; break; } } ptr = infile.Line(); } if (!atFirstQuery) { mprinterr("Error: 'Query' not found.\n"); return 1; } // Read alignment. Replacing query with subject. typedef std::vector<char> Carray; typedef std::vector<int> Iarray; Carray Query; // Query residues Carray Sbjct; // Sbjct residues Iarray Smap; // Smap[Sbjct index] = Query index while (ptr != 0) { const char* qline = ptr; // query line const char* aline = infile.Line(); // alignment line const char* sline = infile.Line(); // subject line if (aline == 0 || sline == 0) { mprinterr("Error: Missing alignment line or subject line after Query:\n"); mprinterr("Error: %s", qline); return 1; } for (int idx = 12; qline[idx] != ' '; idx++) { if (qline[idx] == '-') { // Sbjct does not have corresponding res in Query Smap.push_back(-1); Sbjct.push_back( sline[idx] ); } else if (sline[idx] == '-') { // Query does not have a corresponding res in Sbjct Query.push_back( qline[idx] ); } else { // Direct Query to Sbjct map Smap.push_back( Query.size() ); Sbjct.push_back( sline[idx] ); Query.push_back( qline[idx] ); } } // Scan to next Query ptr = infile.Line(); while (ptr != 0) { if (*ptr == 'Q') { if ( strncmp(ptr, "Query", 5) == 0 ) break; } ptr = infile.Line(); } } // DEBUG std::string SmaskExp, QmaskExp; if (State.Debug() > 0) mprintf(" Map of Sbjct to Query:\n"); for (int sres = 0; sres != (int)Sbjct.size(); sres++) { if (State.Debug() > 0) mprintf("%-i %3s %i", sres+smaskoffset, Residue::ConvertResName(Sbjct[sres]), Smap[sres]+qmaskoffset); const char* qres = ""; if (Smap[sres] != -1) { qres = Residue::ConvertResName(Query[Smap[sres]]); if (SmaskExp.empty()) SmaskExp.assign( integerToString(sres+smaskoffset) ); else SmaskExp.append( "," + integerToString(sres+smaskoffset) ); if (QmaskExp.empty()) QmaskExp.assign( integerToString(Smap[sres]+qmaskoffset) ); else QmaskExp.append( "," + integerToString(Smap[sres]+qmaskoffset) ); } if (State.Debug() > 0) mprintf(" %3s\n", qres); } mprintf("Smask: %s\n", SmaskExp.c_str()); mprintf("Qmask: %s\n", QmaskExp.c_str()); // Check that query residues match reference. for (unsigned int sres = 0; sres != Sbjct.size(); sres++) { int qres = Smap[sres]; if (qres != -1) { if (Query[qres] != qref.Parm().Res(qres).SingleCharName()) { mprintf("Warning: Potential residue mismatch: Query %s reference %s\n", Residue::ConvertResName(Query[qres]), qref.Parm().Res(qres).c_str()); } } } // Build subject using coordinate from reference. //AtomMask sMask; // Contain atoms that should be in sTop Topology sTop; Frame sFrame; Iarray placeHolder; // Atom indices of placeholder residues. for (unsigned int sres = 0; sres != Sbjct.size(); sres++) { int qres = Smap[sres]; NameType SresName( Residue::ConvertResName(Sbjct[sres]) ); if (qres != -1) { Residue const& QR = qref.Parm().Res(qres); Residue SR(SresName, sres+1, ' ', QR.ChainID()); if (Query[qres] == Sbjct[sres]) { // Exact match. All non-H atoms. for (int qat = QR.FirstAtom(); qat != QR.LastAtom(); qat++) { if (qref.Parm()[qat].Element() != Atom::HYDROGEN) sTop.AddTopAtom( qref.Parm()[qat], SR ); sFrame.AddXYZ( qref.Coord().XYZ(qat) ); //sMask.AddAtom(qat); } } else { // Partial match. Copy only backbone and CB. for (int qat = QR.FirstAtom(); qat != QR.LastAtom(); qat++) { if ( qref.Parm()[qat].Name().Match("N" ) || qref.Parm()[qat].Name().Match("CA") || qref.Parm()[qat].Name().Match("CB") || qref.Parm()[qat].Name().Match("C" ) || qref.Parm()[qat].Name().Match("O" ) ) { sTop.AddTopAtom( qref.Parm()[qat], SR ); sFrame.AddXYZ( qref.Coord().XYZ(qat) ); } } } } else { // Residue in query does not exist for subject. Just put placeholder CA for now. Vec3 Zero(0.0); placeHolder.push_back( sTop.Natom() ); sTop.AddTopAtom( Atom("CA", "C "), Residue(SresName, sres+1, ' ', ' ') ); sFrame.AddXYZ( Zero.Dptr() ); } } //sTop.PrintAtomInfo("*"); mprintf("\tPlaceholder residue indices:"); for (Iarray::const_iterator p = placeHolder.begin(); p != placeHolder.end(); ++p) mprintf(" %i", *p + 1); mprintf("\n"); // Try to give placeholders more reasonable coordinates. if (!placeHolder.empty()) { Iarray current_indices; unsigned int pidx = 0; while (pidx < placeHolder.size()) { if (current_indices.empty()) { current_indices.push_back( placeHolder[pidx++] ); // Search for the end of this segment for (; pidx != placeHolder.size(); pidx++) { if (placeHolder[pidx] - current_indices.back() > 1) break; current_indices.push_back( placeHolder[pidx] ); } // DEBUG mprintf("\tSegment:"); for (Iarray::const_iterator it = current_indices.begin(); it != current_indices.end(); ++it) mprintf(" %i", *it + 1); // Get coordinates of residues bordering segment. int prev_res = sTop[current_indices.front()].ResNum() - 1; int next_res = sTop[current_indices.back() ].ResNum() + 1; mprintf(" (prev_res=%i, next_res=%i)\n", prev_res+1, next_res+1); Vec3 prev_crd(sFrame.XYZ(current_indices.front() - 1)); Vec3 next_crd(sFrame.XYZ(current_indices.back() + 1)); prev_crd.Print("prev_crd"); next_crd.Print("next_crd"); Vec3 crd_step = (next_crd - prev_crd) / (double)(current_indices.size()+1); crd_step.Print("crd_step"); double* xyz = sFrame.xAddress() + (current_indices.front() * 3); for (unsigned int i = 0; i != current_indices.size(); i++, xyz += 3) { prev_crd += crd_step; xyz[0] = prev_crd[0]; xyz[1] = prev_crd[1]; xyz[2] = prev_crd[2]; } current_indices.clear(); } } } //Topology* sTop = qref.Parm().partialModifyStateByMask( sMask ); //if (sTop == 0) return 1; //Frame sFrame(qref.Coord(), sMask); // Write output traj Trajout_Single trajout; if (trajout.PrepareTrajWrite(outfilename, argIn, &sTop, CoordinateInfo(), 1, fmt)) return 1; if (trajout.WriteSingle(0, sFrame)) return 1; trajout.EndTraj(); return 0; }