Ejemplo n.º 1
0
  bool MOL2Format::ReadMolecule(OBBase* pOb, OBConversion* pConv)
  {

    OBMol* pmol = pOb->CastAndClear<OBMol>();
    if(pmol==NULL)
      return false;

    //Define some references so we can use the old parameter names
    istream &ifs = *pConv->GetInStream();
    OBMol &mol = *pmol;

    //Old code follows...
    bool foundAtomLine = false;
    char buffer[BUFF_SIZE];
    char *comment = NULL;
    string str,str1;
    vector<string> vstr;
    int len;

    mol.BeginModify();

    for (;;)
      {
        if (!ifs.getline(buffer,BUFF_SIZE))
          return(false);
        if (EQn(buffer,"@<TRIPOS>MOLECULE",17))
          break;
      }

    int lcount;
    int natoms,nbonds;
    for (lcount=0;;lcount++)
      {
        if (!ifs.getline(buffer,BUFF_SIZE))
          return(false);
        if (EQn(buffer,"@<TRIPOS>ATOM",13))
          {
            foundAtomLine = true;
            break;
          }

        if (lcount == 0)
          {
            tokenize(vstr,buffer);
            if (!vstr.empty())
              mol.SetTitle(buffer);
          }
        else if (lcount == 1)
          sscanf(buffer,"%d%d",&natoms,&nbonds);
        else if (lcount == 4) //energy
          {
            tokenize(vstr,buffer);
            if (!vstr.empty() && vstr.size() == 3)
              if (vstr[0] == "Energy")
                mol.SetEnergy(atof(vstr[2].c_str()));
          }
        else if (lcount == 5) //comment
          {
            if ( buffer[0] )
              {
                len = (int) strlen(buffer)+1;
                // TODO allow better multi-line comments
                // which don't allow ill-formed data to consume memory
                // Thanks to Andrew Dalke for the pointer
                if (comment != NULL)
                  delete [] comment;
                comment = new char [len];
                memcpy(comment,buffer,len);
              }
          }
      }

    if (!foundAtomLine)
      {
        mol.EndModify();
        mol.Clear();
        obErrorLog.ThrowError(__FUNCTION__, "Unable to read Mol2 format file. No atoms found.", obWarning);
        return(false);
      }

    mol.ReserveAtoms(natoms);

    int i;
    vector3 v;
    OBAtom atom;
    bool hasPartialCharges=false;
    double x,y,z,pcharge;
    char temp_type[BUFF_SIZE], resname[BUFF_SIZE], atmid[BUFF_SIZE];
    int elemno, resnum = -1;

    ttab.SetFromType("SYB");
    for (i = 0;i < natoms;i++)
      {
        if (!ifs.getline(buffer,BUFF_SIZE))
          return(false);
        sscanf(buffer," %*s %1024s %lf %lf %lf %1024s %d %1024s %lf",
               atmid, &x,&y,&z, temp_type, &resnum, resname, &pcharge);

        atom.SetVector(x, y, z);

        // Handle "CL" and "BR" and other mis-typed atoms
        str = temp_type;
        if (strncmp(temp_type, "CL", 2) == 0) {
          str = "Cl";
        } else  if (strncmp(temp_type,"BR",2) == 0) {
          str = "Br";
        } else if (strncmp(temp_type,"S.o2", 4) == 02) {
          str = "S.O2";
        } else if (strncmp(temp_type,"S.o", 3) == 0) {
          str = "S.O";
        } else if (strncmp(temp_type,"SI", 2) == 0) {
          str = "Si";
        // The following cases are entries which are not in openbabel/data/types.txt
        // and should probably be added there
        } else if (strncmp(temp_type,"S.1", 3) == 0) {
          str = "S.2"; // no idea what the best type might be here
        } else if (strncmp(temp_type,"P.", 2) == 0) {
          str = "P.3";
        } else if (strncasecmp(temp_type,"Ti.", 3) == 0) { // e.g. Ti.th
          str = "Ti";
        } else if (strncasecmp(temp_type,"Ru.", 3) == 0) { // e.g. Ru.oh
          str = "Ru";
        }

        ttab.SetToType("ATN");
        ttab.Translate(str1,str);
        elemno = atoi(str1.c_str());
        ttab.SetToType("IDX");

        // We might have missed some SI or FE type things above, so here's
        // another check
        if( !elemno && isupper(temp_type[1]) )
          {
            temp_type[1] = (char)tolower(temp_type[1]);
            str = temp_type;
            ttab.Translate(str1,str);
            elemno = atoi(str1.c_str());
          }
        // One last check if there isn't a period in the type,
        // it's a malformed atom type, but it may be the element symbol
        // GaussView does this (PR#1739905)
        if ( !elemno ) {
          obErrorLog.ThrowError(__FUNCTION__, "This Mol2 file is non-standard. Cannot interpret atom types correctly, instead attempting to interpret as elements instead.", obWarning);

          string::size_type dotPos = str.find('.');
          if (dotPos == string::npos) {
            elemno = etab.GetAtomicNum(str.c_str());
          }
        }

        atom.SetAtomicNum(elemno);
        ttab.SetToType("INT");
        ttab.Translate(str1,str);
        atom.SetType(str1);
        atom.SetPartialCharge(pcharge);
        if (!mol.AddAtom(atom))
          return(false);
        if (!IsNearZero(pcharge))
          hasPartialCharges = true;

        // Add residue information if it exists
        if (resnum != -1 && resnum != 0 &&
            strlen(resname) != 0 && strncmp(resname,"<1>", 3) != 0)
          {
            OBResidue *res  = (mol.NumResidues() > 0) ?
              mol.GetResidue(mol.NumResidues()-1) : NULL;
            if (res == NULL || res->GetName() != resname ||
                static_cast<int>(res->GetNum()) != resnum)
              {
                vector<OBResidue*>::iterator ri;
                for (res = mol.BeginResidue(ri) ; res ; res = mol.NextResidue(ri))
                  if (res->GetName() == resname &&
                      static_cast<int>(res->GetNum())
                      == resnum)
                    break;

                if (res == NULL)
                  {
                    res = mol.NewResidue();
                    res->SetName(resname);
                    res->SetNum(resnum);
                  }
              }
            OBAtom *atomPtr = mol.GetAtom(mol.NumAtoms());
            res->AddAtom(atomPtr);
            res->SetAtomID(atomPtr, atmid);
          } // end adding residue info
      }

    for (;;)
      {
        if (!ifs.getline(buffer,BUFF_SIZE))
          return(false);
        str = buffer;
        if (!strncmp(buffer,"@<TRIPOS>BOND",13))
          break;
      }

    int start,end,order;
    for (i = 0; i < nbonds; i++)
      {
        if (!ifs.getline(buffer,BUFF_SIZE))
          return(false);

        sscanf(buffer,"%*d %d %d %1024s",&start,&end,temp_type);
        str = temp_type;
        order = 1;
        if (str == "ar" || str == "AR" || str == "Ar")
          order = 5;
        else if (str == "AM" || str == "am" || str == "Am")
          order = 1;
        else
          order = atoi(str.c_str());

        mol.AddBond(start,end,order);
      }

 
    // update neighbour bonds information for each atom.
    vector<OBAtom*>::iterator apos;
    vector<OBBond*>::iterator bpos;
    OBAtom* patom;
    OBBond* pbond;
    
    for (patom = mol.BeginAtom(apos); patom; patom = mol.NextAtom(apos))
      {
        patom->ClearBond();
        for (pbond = mol.BeginBond(bpos); pbond; pbond = mol.NextBond(bpos))
          {
            if (patom == pbond->GetBeginAtom() || patom == pbond->GetEndAtom())
              {
                patom->AddBond(pbond);
              }
          }
      }

    // Suggestion by Liu Zhiguo 2008-01-26
    // Mol2 files define atom types -- there is no need to re-perceive
    mol.SetAtomTypesPerceived();
    mol.EndModify();

    //must add generic data after end modify - otherwise it will be blown away
    if (comment)
      {
        OBCommentData *cd = new OBCommentData;
        cd->SetData(comment);
        cd->SetOrigin(fileformatInput);
        mol.SetData(cd);
        delete [] comment;
        comment = NULL;
      }
    if (hasPartialCharges)
      mol.SetPartialChargesPerceived();

    // continue untill EOF or untill next molecule record
    streampos pos;
    for(;;)
      {
        pos = ifs.tellg();
        if (!ifs.getline(buffer,BUFF_SIZE))
          break;
        if (EQn(buffer,"@<TRIPOS>MOLECULE",17))
          break;
      }

    ifs.seekg(pos); // go back to the end of the molecule
    return(true);
  }
Ejemplo n.º 2
0
  static bool parseAtomRecord(char *buffer, OBMol &mol,int /*chainNum*/)
  /* ATOMFORMAT "(i5,1x,a4,a1,a3,1x,a1,i4,a1,3x,3f8.3,2f6.2,a2,a2)" */
  {
    string sbuf = &buffer[6];
    if (sbuf.size() < 48)
      return(false);

    bool hetatm = (EQn(buffer,"HETATM",6)) ? true : false;
    bool elementFound = false; // true if correct element found in col 77-78

    /* serial number */
    string serno = sbuf.substr(0,5);

    /* atom name */
    string atmid = sbuf.substr(6,4);

    /* chain */
    char chain = sbuf.substr(15,1)[0];

    /* element */
    string element = "  ";
    if (sbuf.size() > 71)
      {
        element = sbuf.substr(70,2);
        if (isalpha(element[1]))
          {
            if (element[0] == ' ')
              {
                element.erase(0, 1);
                elementFound = true;
              }
            else if (isalpha(element[0]))
              {
                elementFound = true;
              }
          }
      }

    if (!elementFound)
      {
        stringstream errorMsg;
        errorMsg << "WARNING: Problems reading a PDB file\n"
                 << "  Problems reading a HETATM or ATOM record.\n"
                 << "  According to the PDB specification,\n"
                 << "  columns 77-78 should contain the element symbol of an atom.\n"
                 << "  but OpenBabel found '" << element << "' (atom " << mol.NumAtoms()+1 << ")";
        obErrorLog.ThrowError(__FUNCTION__, errorMsg.str(), obWarning);
      }

    // charge - optional
    string scharge;
    if (sbuf.size() > 73)
      {
        scharge = sbuf.substr(72,2);
      }

    //trim spaces on the right and left sides
    while (!atmid.empty() && atmid[0] == ' ')
      atmid = atmid.erase(0, 1);

    while (!atmid.empty() && atmid[atmid.size()-1] == ' ')
      atmid = atmid.substr(0,atmid.size()-1);

    /* residue name */
    string resname = sbuf.substr(11,3);
    if (resname == "   ")
      resname = "UNK";
    else
      {
        while (!resname.empty() && resname[0] == ' ')
          resname = resname.substr(1,resname.size()-1);

        while (!resname.empty() && resname[resname.size()-1] == ' ')
          resname = resname.substr(0,resname.size()-1);
      }

    string type;
    if (!elementFound) {
      // OK, we have to fall back to determining the element from the atom type
      // This is unreliable, but there's no other choice
      if (EQn(buffer,"ATOM",4)) {
        type = atmid.substr(0,2);
        if (isdigit(type[0])) {
          // sometimes non-standard files have, e.g 11HH
          if (!isdigit(type[1])) type = atmid.substr(1,1);
          else type = atmid.substr(2,1);
        } else if ((sbuf[6] == ' ' &&
                   strncasecmp(type.c_str(), "Zn", 2) != 0 &&
                   strncasecmp(type.c_str(), "Fe", 2) != 0) ||
                   isdigit(type[1]))	//type[1] is digit in Platon
          type = atmid.substr(0,1);     // one-character element


        if (resname.substr(0,2) == "AS" || resname[0] == 'N') {
          if (atmid == "AD1")
            type = "O";
          if (atmid == "AD2")
            type = "N";
        }
        if (resname.substr(0,3) == "HIS" || resname[0] == 'H') {
          if (atmid == "AD1" || atmid == "AE2")
            type = "N";
          if (atmid == "AE1" || atmid == "AD2")
            type = "C";
        }
        if (resname.substr(0,2) == "GL" || resname[0] == 'Q') {
          if (atmid == "AE1")
            type = "O";
          if (atmid == "AE2")
            type = "N";
        }
        // fix: #2002557
        if (atmid[0] == 'H' &&
            (atmid[1] == 'D' || atmid[1] == 'E' ||
             atmid[1] == 'G' || atmid[1] == 'H')) // HD, HE, HG, HH, ..
          type = "H";
      } else { //must be hetatm record
        if (isalpha(element[1]) && (isalpha(element[0]) || (element[0] == ' '))) {
          if (isalpha(element[0]))
            type = element.substr(0,2);
          else
            type = element.substr(1,1);

          if (type.size() == 2)
            type[1] = tolower(type[1]);
        } else { // no element column to use
          if (isalpha(atmid[0])) {
            if (atmid.size() > 2 && (atmid[2] == '\0' || atmid[2] == ' '))
              type = atmid.substr(0,2);
            else if (atmid[0] == 'A') // alpha prefix
              type = atmid.substr(1, atmid.size() - 1);
            else
              type = atmid.substr(0,1);
          } else if (atmid[0] == ' ')
            type = atmid.substr(1,1); // one char element
          else
            type = atmid.substr(1,2);

          // Some cleanup steps
          if (atmid == resname) {
            type = atmid;
            if (type.size() == 2)
              type[1] = tolower(type[1]);
          } else
            if (resname == "ADR" || resname == "COA" || resname == "FAD" ||
                resname == "GPG" || resname == "NAD" || resname == "NAL" ||
                resname == "NDP" || resname == "ABA") {
              if (type.size() > 1)
                type = type.substr(0,1);
              //type.erase(1,type.size()-1);
            } else // other residues
              if (isdigit(type[0])){
                type = type.substr(1,1);
              }
              else
                if (type.size() > 1 && isdigit(type[1]))
                  type = type.substr(0,1);
                else
                  if (type.size() > 1 && isalpha(type[1])) {
                    if (type[0] == 'O' && type[1] == 'H')
                      type = type.substr(0,1); // no "Oh" element (e.g. 1MBN)
                    else if(isupper(type[1])) {
                      type[1] = tolower(type[1]);
                    }
                  }
        }

      } // HETATM records
    } // no element column to use

    OBAtom atom;
    /* X, Y, Z */
    string xstr = sbuf.substr(24,8);
    string ystr = sbuf.substr(32,8);
    string zstr = sbuf.substr(40,8);
    vector3 v(atof(xstr.c_str()),atof(ystr.c_str()),atof(zstr.c_str()));
    atom.SetVector(v);
    atom.ForceImplH();

    // useful for debugging unknown atom types (e.g., PR#1577238)
    //    cout << mol.NumAtoms() + 1  << " : '" << element << "'" << " " << etab.GetAtomicNum(element.c_str()) << endl;
    if (elementFound)
      atom.SetAtomicNum(etab.GetAtomicNum(element.c_str()));
    else // use our old-style guess from athe atom type
      atom.SetAtomicNum(etab.GetAtomicNum(type.c_str()));

    if ( (! scharge.empty()) && "  " != scharge )
      {
        if ( isdigit(scharge[0]) && ('+' == scharge[1] || '-' == scharge[1]) )
          {
            const char reorderCharge[3] = { scharge[1], scharge[0], '\0' };
            const int charge = atoi(reorderCharge);
            atom.SetFormalCharge(charge);
          }
        else
          {
            stringstream errorMsg;
            errorMsg << "WARNING: Problems reading a PDB file\n"
                     << "  Problems reading a HETATM or ATOM record.\n"
                     << "  According to the PDB specification,\n"
                     << "  columns 79-80 should contain charge of the atom\n"
                     << "  but OpenBabel found '" << scharge << "' (atom " << mol.NumAtoms()+1 << ").";
            obErrorLog.ThrowError(__FUNCTION__, errorMsg.str(), obWarning);
          }
      }
    else {
      atom.SetFormalCharge(0);
    }

    /* residue sequence number */
    string resnum = sbuf.substr(16,4);
    OBResidue *res  = (mol.NumResidues() > 0) ? mol.GetResidue(mol.NumResidues()-1) : NULL;
    if (res == NULL
        || res->GetName() != resname
        || res->GetNumString() != resnum
        || res->GetChain() != chain)
      {
        vector<OBResidue*>::iterator ri;
        for (res = mol.BeginResidue(ri) ; res ; res = mol.NextResidue(ri))
          if (res->GetName() == resname
              && res->GetNumString() == resnum
              && static_cast<int>(res->GetChain()) == chain)
            break;

        if (res == NULL) {
          res = mol.NewResidue();
          res->SetChain(chain);
          res->SetName(resname);
          res->SetNum(resnum);
        }
      }

    if (!mol.AddAtom(atom))
      return(false);
    else {
      OBAtom *atom = mol.GetAtom(mol.NumAtoms());

      res->AddAtom(atom);
      res->SetSerialNum(atom, atoi(serno.c_str()));
      res->SetAtomID(atom, sbuf.substr(6,4));
      res->SetHetAtom(atom, hetatm);

      return(true);
    }
  } // end reading atom records