Beispiel #1
0
bool FastSearchFormat::ObtainTarget(OBConversion* pConv, OBMol& patternMol, const string& indexname)
{
    //Obtains an OBMol
    //   either from the SMARTS string in the -s option
    //   or by converting the file in the -S option
    //or, if neither option is provided, displays information on the index file.

    stringstream smiles(stringstream::out);
    ifstream patternstream;
    OBConversion PatternConv(&patternstream,&smiles);

    const char* p = pConv->IsOption("s",OBConversion::GENOPTIONS);
    string txt;
    if(p)
    {
        // Use the -s option
        txt=p;
        stringstream smarts(txt, stringstream::in);
        OBConversion Convsm(&smarts);
        if(!Convsm.SetInFormat("smi")) return false;
        Convsm.Read(&patternMol);

        //erase -s option in GeneralOptions since it will be rewritten
        pConv->RemoveOption("s",OBConversion::GENOPTIONS);
        if(patternMol.Empty())
        {
            obErrorLog.ThrowError(__FUNCTION__,
                                  "Could not make a molecule from " + smarts.str()
                                  + "\nThis needs to be valid SMILES when using fastsearch."
                                  "You can use the more versatile SMARTS in a normal substructure search." , obError);
            return false;
        }
    }
    else
    {
        // or Make OBMol from file in -S option or -aS option
        p = pConv->IsOption("S",OBConversion::GENOPTIONS);
        if(!p)
            p = pConv->IsOption("S",OBConversion::INOPTIONS);//for GUI mainly
    }

    if(!p)
    {
        //neither -s or -S options provided. Output info rather than doing search
        const FptIndexHeader& header = fs.GetIndexHeader();
        string id(header.fpid);
        if(id.empty())
            id = "default";
        clog << indexname << " is an index of\n " << header.datafilename
             << ".\n It contains " << header.nEntries
             << " molecules. The fingerprint type is " << id << " with "
             << OBFingerprint::Getbitsperint() * header.words << " bits.\n"
             << "Typical usage for a substructure search:\n"
             << "babel indexfile.fs -osmi -sSMILES" << endl;
        return false;
    }

    if(p && patternMol.Empty())
    {
        txt=p;
        string::size_type pos = txt.find_last_of('.');
        if(pos==string::npos)
        {
            obErrorLog.ThrowError(__FUNCTION__, "Filename of pattern molecule in -S option must have an extension", obError);
            return false;
        }
        patternstream.open(txt.c_str());
        if(!patternstream)
        {
            stringstream errorMsg;

            errorMsg << "Cannot open " << txt << endl;
            obErrorLog.ThrowError(__FUNCTION__, errorMsg.str(), obError);
            return false;
        }

        PatternConv.SetOneObjectOnly();
        if(PatternConv.SetInFormat(txt.substr(pos+1).c_str()))
            PatternConv.Read(&patternMol);
    }

    if(patternMol.Empty())
    {
        obErrorLog.ThrowError(__FUNCTION__, "Cannot derive a molecule from the -s or -S options", obWarning);
        return false;
    }
    patternMol.ConvertDativeBonds();//use standard form for dative bonds

    //Convert to SMILES and generate a -s option for use in the final filtering
    if(!PatternConv.SetOutFormat("smi"))
        return false;
    PatternConv.Write(&patternMol);
    //remove name to leave smiles string
    string smilesstr(smiles.str());
    string::size_type pos = smilesstr.find_first_of(" \t\r\n");
    if(pos!=string::npos)
        smilesstr = smilesstr.substr(0,pos);
    pConv->AddOption("s", OBConversion::GENOPTIONS, smilesstr.c_str());

    return true;
}
Beispiel #2
0
bool fragmentMol(const ROMol& mol,
                 std::vector<std::pair<ROMOL_SPTR, ROMOL_SPTR> >& res,
                 unsigned int minCuts,
                 unsigned int maxCuts,
                 unsigned int maxCutBonds,
                 const std::string& pattern) {
#ifdef _DEBUG
  for (size_t i = 0; i < mol.getNumAtoms(); i++) {
    std::string symbol = mol.getAtomWithIdx(i)->getSymbol();
    int label = 0;
    mol.getAtomWithIdx(i)->getPropIfPresent(common_properties::molAtomMapNumber,
                                            label);
    char a1[32];
    if (0 == label)
      sprintf(a1, "\'%s\'", symbol.c_str(), label);
    else
      sprintf(a1, "\'%s:%u\'", symbol.c_str(), label);
    std::cout << "Atom " << i << ": " << a1;  //<<" Bonds:";
    std::cout << "\n";
  }
#endif

  res.clear();
  std::auto_ptr<const ROMol> smarts((const ROMol*)SmartsToMol(pattern));
  std::vector<MatchVectType>
      matching_atoms;  // one bond per match ! with default pattern
  unsigned int total = SubstructMatch(mol, *smarts, matching_atoms);
#ifdef _DEBUG
  std::cout << "total substructs =" << total
            << "\nmatching bonds (atom1, atom2):\n";
#endif
  if (0 == total)  // Not found.  Return empty set of molecules
    return false;
#ifdef _DEBUG
  for (size_t i = 0; i < matching_atoms.size(); i++) {
    std::string symbol =
        mol.getAtomWithIdx(matching_atoms[i][0].second)->getSymbol();
    int label = 0;
    mol.getAtomWithIdx(matching_atoms[i][0].second)
        ->getPropIfPresent(common_properties::molAtomMapNumber, label);
    char a1[32];
    if (0 == label)
      sprintf(a1, "\'%s\'", symbol.c_str(), label);
    else
      sprintf(a1, "\'%s:%u\'", symbol.c_str(), label);
    symbol = mol.getAtomWithIdx(matching_atoms[i][1].second)->getSymbol();
    label = 0;
    mol.getAtomWithIdx(matching_atoms[i][1].second)
        ->getPropIfPresent(common_properties::molAtomMapNumber, label);
    char a2[32];
    if (0 == label)
      sprintf(a2, "\'%s\'", symbol.c_str(), label);
    else
      sprintf(a2, "\'%s:%u\'", symbol.c_str(), label);

    std::cout << i << ": (" << matching_atoms[i][0].second << a1 << ","
              << matching_atoms[i][1].second << a2 << ") \n";
  }
#endif

  std::vector<BondVector_t> matching_bonds;  // List of matched query's bonds
  convertMatchingToBondVect(matching_bonds, matching_atoms, mol);
  if (matching_bonds.size() > maxCutBonds) return false;
#ifdef _DEBUG
  std::cout << "total matching_bonds = " << matching_bonds.size() << "\n";
#endif

  // loop to generate every cut in the molecule
  BondVector_t bonds_selected;
  processCuts(0, minCuts, maxCuts, bonds_selected, matching_bonds, mol, res);
  return true;
}