Exemple #1
0
void CheckValidDipeptide(OBConversion &conv,
                         const string &test,
                         unsigned int testCount)
{
  OBMol mol;
  OBResidue *res;
  ostringstream os;

  mol.Clear();
  conv.ReadString(&mol, test);
  chainsparser.PerceiveChains(mol);
  if (mol.NumResidues() != 2) {
    os << "not ok " << testCount << " # expected 2 residues, but found "
         << mol.NumResidues() << '\n';
    os << "# ";
    FOR_RESIDUES_OF_MOL(res, mol)
      os << res->GetName() << " ";
    os << endl;
    BOOST_CHECK_MESSAGE( 0, os.str().c_str() );
  } else {
    res = mol.GetResidue(0);
    BOOST_CHECK_MESSAGE( res, "Get first AA from dipeptide" );
    res = mol.GetResidue(1);
    BOOST_CHECK_MESSAGE( res, "Get second AA from dipeptide" );
  }
}
void CheckInvalidResidue(OBConversion &conv,
                         const string &test,
                         unsigned int testCount)
{
  OBMol mol;
  
  mol.Clear();
  conv.ReadString(&mol, test);
  chainsparser.PerceiveChains(mol);
  if (mol.NumResidues() != 0) {
    OBResidue *res = mol.GetResidue(0);
    if (res->GetName() == "LIG") { // ligand, not residue
      cout << "ok " << testCount << " # found ligand, not residue "
           << test << '\n';
    } else {
      cout << "not ok " << testCount << " # expected 0 residues, found "
           << mol.NumResidues() << '\n';
      cout << "# " << res->GetName() << endl;
    }
  } else
    cout << "ok " << testCount << " # correctly rejected " << test << '\n';
}
Exemple #3
0
void CheckInvalidResidue(OBConversion &conv,
                         const string &test,
                         unsigned int testCount)
{
  OBMol mol;
  ostringstream os;
  
  mol.Clear();
  conv.ReadString(&mol, test);
  chainsparser.PerceiveChains(mol);
  if (mol.NumResidues() != 0) {
    OBResidue *res = mol.GetResidue(0);
    if (res->GetName() == "LIG") { // ligand, not residue
      BOOST_CHECK( 1 );
    } else {
      os << "not ok " << testCount << " # expected 0 residues, found "
           << mol.NumResidues() << '\n';
      os << "# " << res->GetName() << endl;
      BOOST_CHECK_MESSAGE( 0, os.str().c_str() );
    }
  } else
    BOOST_CHECK( 1 );
}
void CheckValidDipeptide(OBConversion &conv,
                         const string &test,
                         unsigned int testCount)
{
  OBMol mol;

  mol.Clear();
  conv.ReadString(&mol, test);
  chainsparser.PerceiveChains(mol);
  if (mol.NumResidues() != 2) {
    cout << "not ok " << testCount << " # expected 2 residues, but found "
         << mol.NumResidues() << '\n';
    cout << "# ";
    FOR_RESIDUES_OF_MOL(res, mol)
      cout << res->GetName() << " ";
    cout << endl;
  } else {
    OBResidue *res;
    res = mol.GetResidue(0);
    cout << "ok " << testCount << " # " << res->GetName();
    res = mol.GetResidue(1);
    cout << " " << res->GetName() << '\n';
  }
}
Exemple #5
0
  static bool parseAtomRecord(char *buffer, OBMol &mol,int /*chainNum*/)
  /* ATOMFORMAT "(i5,1x,a4,a1,a3,1x,a1,i4,a1,3x,3f8.3,2f6.2,a2,a2)" */
  {
    string sbuf = &buffer[6];
    if (sbuf.size() < 48)
      return(false);

    bool hetatm = (EQn(buffer,"HETATM",6)) ? true : false;
    bool elementFound = false; // true if correct element found in col 77-78

    /* serial number */
    string serno = sbuf.substr(0,5);

    /* atom name */
    string atmid = sbuf.substr(6,4);

    /* chain */
    char chain = sbuf.substr(15,1)[0];

    /* element */
    string element = "  ";
    if (sbuf.size() > 71)
      {
        element = sbuf.substr(70,2);
        if (isalpha(element[1]))
          {
            if (element[0] == ' ')
              {
                element.erase(0, 1);
                elementFound = true;
              }
            else if (isalpha(element[0]))
              {
                elementFound = true;
              }
          }
      }

    if (!elementFound)
      {
        stringstream errorMsg;
        errorMsg << "WARNING: Problems reading a PDB file\n"
                 << "  Problems reading a HETATM or ATOM record.\n"
                 << "  According to the PDB specification,\n"
                 << "  columns 77-78 should contain the element symbol of an atom.\n"
                 << "  but OpenBabel found '" << element << "' (atom " << mol.NumAtoms()+1 << ")";
        obErrorLog.ThrowError(__FUNCTION__, errorMsg.str(), obWarning);
      }

    // charge - optional
    string scharge;
    if (sbuf.size() > 73)
      {
        scharge = sbuf.substr(72,2);
      }

    //trim spaces on the right and left sides
    while (!atmid.empty() && atmid[0] == ' ')
      atmid = atmid.erase(0, 1);

    while (!atmid.empty() && atmid[atmid.size()-1] == ' ')
      atmid = atmid.substr(0,atmid.size()-1);

    /* residue name */
    string resname = sbuf.substr(11,3);
    if (resname == "   ")
      resname = "UNK";
    else
      {
        while (!resname.empty() && resname[0] == ' ')
          resname = resname.substr(1,resname.size()-1);

        while (!resname.empty() && resname[resname.size()-1] == ' ')
          resname = resname.substr(0,resname.size()-1);
      }

    string type;
    if (!elementFound) {
      // OK, we have to fall back to determining the element from the atom type
      // This is unreliable, but there's no other choice
      if (EQn(buffer,"ATOM",4)) {
        type = atmid.substr(0,2);
        if (isdigit(type[0])) {
          // sometimes non-standard files have, e.g 11HH
          if (!isdigit(type[1])) type = atmid.substr(1,1);
          else type = atmid.substr(2,1);
        } else if ((sbuf[6] == ' ' &&
                   strncasecmp(type.c_str(), "Zn", 2) != 0 &&
                   strncasecmp(type.c_str(), "Fe", 2) != 0) ||
                   isdigit(type[1]))	//type[1] is digit in Platon
          type = atmid.substr(0,1);     // one-character element


        if (resname.substr(0,2) == "AS" || resname[0] == 'N') {
          if (atmid == "AD1")
            type = "O";
          if (atmid == "AD2")
            type = "N";
        }
        if (resname.substr(0,3) == "HIS" || resname[0] == 'H') {
          if (atmid == "AD1" || atmid == "AE2")
            type = "N";
          if (atmid == "AE1" || atmid == "AD2")
            type = "C";
        }
        if (resname.substr(0,2) == "GL" || resname[0] == 'Q') {
          if (atmid == "AE1")
            type = "O";
          if (atmid == "AE2")
            type = "N";
        }
        // fix: #2002557
        if (atmid[0] == 'H' &&
            (atmid[1] == 'D' || atmid[1] == 'E' ||
             atmid[1] == 'G' || atmid[1] == 'H')) // HD, HE, HG, HH, ..
          type = "H";
      } else { //must be hetatm record
        if (isalpha(element[1]) && (isalpha(element[0]) || (element[0] == ' '))) {
          if (isalpha(element[0]))
            type = element.substr(0,2);
          else
            type = element.substr(1,1);

          if (type.size() == 2)
            type[1] = tolower(type[1]);
        } else { // no element column to use
          if (isalpha(atmid[0])) {
            if (atmid.size() > 2 && (atmid[2] == '\0' || atmid[2] == ' '))
              type = atmid.substr(0,2);
            else if (atmid[0] == 'A') // alpha prefix
              type = atmid.substr(1, atmid.size() - 1);
            else
              type = atmid.substr(0,1);
          } else if (atmid[0] == ' ')
            type = atmid.substr(1,1); // one char element
          else
            type = atmid.substr(1,2);

          // Some cleanup steps
          if (atmid == resname) {
            type = atmid;
            if (type.size() == 2)
              type[1] = tolower(type[1]);
          } else
            if (resname == "ADR" || resname == "COA" || resname == "FAD" ||
                resname == "GPG" || resname == "NAD" || resname == "NAL" ||
                resname == "NDP" || resname == "ABA") {
              if (type.size() > 1)
                type = type.substr(0,1);
              //type.erase(1,type.size()-1);
            } else // other residues
              if (isdigit(type[0])){
                type = type.substr(1,1);
              }
              else
                if (type.size() > 1 && isdigit(type[1]))
                  type = type.substr(0,1);
                else
                  if (type.size() > 1 && isalpha(type[1])) {
                    if (type[0] == 'O' && type[1] == 'H')
                      type = type.substr(0,1); // no "Oh" element (e.g. 1MBN)
                    else if(isupper(type[1])) {
                      type[1] = tolower(type[1]);
                    }
                  }
        }

      } // HETATM records
    } // no element column to use

    OBAtom atom;
    /* X, Y, Z */
    string xstr = sbuf.substr(24,8);
    string ystr = sbuf.substr(32,8);
    string zstr = sbuf.substr(40,8);
    vector3 v(atof(xstr.c_str()),atof(ystr.c_str()),atof(zstr.c_str()));
    atom.SetVector(v);
    atom.ForceImplH();

    // useful for debugging unknown atom types (e.g., PR#1577238)
    //    cout << mol.NumAtoms() + 1  << " : '" << element << "'" << " " << etab.GetAtomicNum(element.c_str()) << endl;
    if (elementFound)
      atom.SetAtomicNum(etab.GetAtomicNum(element.c_str()));
    else // use our old-style guess from athe atom type
      atom.SetAtomicNum(etab.GetAtomicNum(type.c_str()));

    if ( (! scharge.empty()) && "  " != scharge )
      {
        if ( isdigit(scharge[0]) && ('+' == scharge[1] || '-' == scharge[1]) )
          {
            const char reorderCharge[3] = { scharge[1], scharge[0], '\0' };
            const int charge = atoi(reorderCharge);
            atom.SetFormalCharge(charge);
          }
        else
          {
            stringstream errorMsg;
            errorMsg << "WARNING: Problems reading a PDB file\n"
                     << "  Problems reading a HETATM or ATOM record.\n"
                     << "  According to the PDB specification,\n"
                     << "  columns 79-80 should contain charge of the atom\n"
                     << "  but OpenBabel found '" << scharge << "' (atom " << mol.NumAtoms()+1 << ").";
            obErrorLog.ThrowError(__FUNCTION__, errorMsg.str(), obWarning);
          }
      }
    else {
      atom.SetFormalCharge(0);
    }

    /* residue sequence number */
    string resnum = sbuf.substr(16,4);
    OBResidue *res  = (mol.NumResidues() > 0) ? mol.GetResidue(mol.NumResidues()-1) : NULL;
    if (res == NULL
        || res->GetName() != resname
        || res->GetNumString() != resnum
        || res->GetChain() != chain)
      {
        vector<OBResidue*>::iterator ri;
        for (res = mol.BeginResidue(ri) ; res ; res = mol.NextResidue(ri))
          if (res->GetName() == resname
              && res->GetNumString() == resnum
              && static_cast<int>(res->GetChain()) == chain)
            break;

        if (res == NULL) {
          res = mol.NewResidue();
          res->SetChain(chain);
          res->SetName(resname);
          res->SetNum(resnum);
        }
      }

    if (!mol.AddAtom(atom))
      return(false);
    else {
      OBAtom *atom = mol.GetAtom(mol.NumAtoms());

      res->AddAtom(atom);
      res->SetSerialNum(atom, atoi(serno.c_str()));
      res->SetAtomID(atom, sbuf.substr(6,4));
      res->SetHetAtom(atom, hetatm);

      return(true);
    }
  } // end reading atom records
Exemple #6
0
int main(int argc,char **argv)
{
  char *program_name= argv[0];
  int c;
  char *FileIn = NULL;

  if (argc != 2)
    {
      string err = "Usage: ";
      err += program_name;
      err += " <filename>\n"
      "Output format:\n"
        "name NAME\n"
        "formula  FORMULA\n"
        "mol_weight MOLECULAR_WEIGHT\n"
        "exact_mass ISOTOPIC MASS\n"
        "canonical_SMILES STRING\n"
        "InChI  STRING\n"
        "num_atoms  NUM\n"
        "num_bonds  NUM\n"
        "num_residues  NUM\n"
	"num_rotors NUM\n"
        "sequence RESIDUE_SEQUENCE\n"
        "num_rings NUMBER_OF_RING_(SSSR)\n"
        "logP   NUM\n"
        "PSA    POLAR_SURFACE_AREA\n"
        "MR     MOLAR REFRACTIVITY";
      err += "$$$$";
//      ThrowError(err); wasn't being output because error level too low
      cerr << err; //Why not do directly
      exit(-1);
    }
  else
    {
      FileIn  = argv[1];
    }

  // Find Input filetype
  OBConversion conv;
  OBFormat *format = conv.FormatFromExt(FileIn);
    
  if (!format || !conv.SetInFormat(format))
    {
      cerr << program_name << ": cannot read input format!" << endl;
      exit (-1);
    }

  ifstream ifs;

  // Read the file
  ifs.open(FileIn);
  if (!ifs)
    {
      cerr << program_name << ": cannot read input file!" << endl;
      exit (-1);
    }
  
  OBMol mol;
  OBFormat *canSMIFormat = conv.FindFormat("can");
  OBFormat *inchiFormat = conv.FindFormat("inchi");


  ////////////////////////////////////////////////////////////////////////////
  // List of properties
  // Name
  // Molecular weight (Standard molar mass given by IUPAC atomic masses)
  // Number of rings : the size of the smallest set of smallest rings (SSSR)
  
  //.....ADD YOURS HERE.....
  
  for (c = 1;; ++c)
    {
      mol.Clear();
      conv.Read(&mol, &ifs);
      if (mol.Empty())
        break;
      
      if (!mol.HasHydrogensAdded())
        mol.AddHydrogens();
      // Print the properties
      if (strlen(mol.GetTitle()) != 0)
        cout << "name             " << mol.GetTitle() << endl;
      else 
        cout << "name             " << FileIn << " " << c << endl;

      cout << "formula          " << mol.GetFormula() << endl;
      cout << "mol_weight       " << mol.GetMolWt() << endl;
      cout << "exact_mass       " << mol.GetExactMass() << endl;

      string smilesString = "-";
      if (canSMIFormat) {
        conv.SetOutFormat(canSMIFormat);
        smilesString = conv.WriteString(&mol);
        if ( smilesString.length() == 0 )
        {
          smilesString = "-";
        }
      }
      cout << "canonical_SMILES " << smilesString << endl;

      string inchiString = "-";
      if (inchiFormat) {
        conv.SetOutFormat(inchiFormat);
        inchiString = conv.WriteString(&mol);
        if ( inchiString.length() == 0 )
        {
          inchiString = "-";
        }
      }
      cout << "InChI            " << inchiString << endl;

      cout << "num_atoms        " << mol.NumAtoms() << endl;
      cout << "num_bonds        " << mol.NumBonds() << endl;
      cout << "num_residues     " << mol.NumResidues() << endl;
      cout << "num_rotors       " << mol.NumRotors() << endl;
      if (mol.NumResidues() > 0)
        cout << "sequence         " << sequence(mol) << endl;
      else
        cout << "sequence         " << "-" << endl;

      cout << "num_rings        " << nrings(mol) << endl;

      OBDescriptor* pDesc;
      pDesc= OBDescriptor::FindType("logP");
      if(pDesc)
        cout << "logP             " << pDesc->Predict(&mol) << endl;

      pDesc = OBDescriptor::FindType("TPSA");
      if(pDesc)
        cout << "PSA              " << pDesc->Predict(&mol) << endl;

      pDesc = OBDescriptor::FindType("MR");
      if(pDesc)
        cout << "MR               " << pDesc->Predict(&mol) << endl;

      cout << "$$$$" << endl; // SDF like end of compound descriptor list
      
      //Other OBDescriptors could be output here, even ones that were rarely
      // used. Since these are plugin classes, they may not be loaded, but
      // then with code like the above they are just ignored.
    } // end for loop
  
  return(0);
}