Example #1
0
  void OBPhModel::ParseLine(const char *buffer)
  {
    vector<string> vs;
    OBSmartsPattern *sp;

    if (buffer[0] == '#')
      return;

    if (EQn(buffer,"TRANSFORM",7))
      {
        tokenize(vs,buffer);
        if (vs.size() < 5)
          {
            obErrorLog.ThrowError(__FUNCTION__, " Could not parse line in phmodel table from phmodel.txt", obInfo);
            return;
          }

        OBChemTsfm *tsfm = new OBChemTsfm;
        if (!tsfm->Init(vs[1],vs[3]))
          {
            delete tsfm;
            tsfm = NULL;
            obErrorLog.ThrowError(__FUNCTION__, " Could not parse line in phmodel table from phmodel.txt", obInfo);
            return;
          }

        _vtsfm.push_back(tsfm);
        _vpKa.push_back(atof(vs[4].c_str()));
      }
    else if (EQn(buffer,"SEEDCHARGE",10))
      {
        tokenize(vs,buffer);
        if (vs.size() < 2)
          {
            obErrorLog.ThrowError(__FUNCTION__, " Could not parse line in phmodel table from phmodel.txt", obInfo);
            return;
          }

        sp = new OBSmartsPattern;
        if (!sp->Init(vs[1]) || (vs.size()-2) != sp->NumAtoms())
          {
            delete sp;
            sp = NULL;
            obErrorLog.ThrowError(__FUNCTION__, " Could not parse line in phmodel table from phmodel.txt", obInfo);
            return;
          }

        vector<double> vf;
        vector<string>::iterator i;
        for (i = vs.begin()+2;i != vs.end();++i)
          vf.push_back(atof((char*)i->c_str()));

        _vschrg.push_back(pair<OBSmartsPattern*,vector<double> > (sp,vf));
      }
  }
Example #2
0
///////////////////////////////////////////////////////////////////////////////
//! \brief Find the molecule(s) with or without a given SMART pattern
int main(int argc,char **argv)
{
  char c;
  unsigned int ntimes=0; // number of times SMARTS matches in a molecule
  unsigned int numMatching = 0; // number of matching molecules (for -c flag)
  bool pattern_matched=false, ntimes_matched=true;
  bool count=false, invert=false, full=false, name_only=false;
  char *FileIn = NULL, *Pattern = NULL;
  char *program_name = argv[0];
  char *iext;
  bool useInFile = true;

  OBConversion conv(&cin,&cout);
  OBFormat *pFormat = conv.FindFormat("smi"); // default format is SMILES
    
  // Parse options
  while ((c = getopt(argc, argv, "t:nvcfi:-")) != -1)
    {
#ifdef _WIN32
	    char optopt = c;
#endif
      switch (c)
        {
        case 't': // request ntimes unique matches

          c = sscanf(optarg, "%d", &ntimes);
          if (c != 1 )
            {
              cerr << program_name << ": unable to parse -t option" << endl;
              exit (-1);
            }
          break;

        case 'i':
          iext = optarg;

          //The ID provided by the OBFormat class is used as 
          // the identifying file extension. This is a slight
          // reduction in flexibility (which is not currently used)
          pFormat = conv.FindFormat(iext);
            
          if(pFormat==NULL)
            {
              cerr << program_name << ": cannot read input format!" << endl;
              exit(-1);
            }

          break;
        case 'n': // print the molecule name only
          name_only = true;
          break;
        case 'c': // count the number of match
          count = true;
          break;
        case 'v': // match only the molecules without the pattern
          invert = true;
          break;

        case 'f':
          full = true;
          break;

        case '-':
          useInFile = false;
          break;

        case '?':
          if (isprint (optopt))
            fprintf (stderr, "Unknown option `-%c'.\n", optopt);
          else
            fprintf (stderr,
                     "Unknown option character `\\x%x'.\n",
                     optopt);
          return 1;
        }
    }
  int index = optind;

  if (argc-index != 2 && argc-index != 1)
    {
      string err = "Usage: ";
      err += program_name;
      err += " [options] \"PATTERN\" <filename>\n";
      err += "If no filename is supplied, then obgrep will use stdin instead.\n";
      err += "Options:\n";
      err += "   -v      Invert the matching, print non-matching molecules\n";
      err += "   -c      Print the number of matched molecules\n";
      err += "   -i <format> Specify the input and output format\n";
      err += "   -f      Full match, print matching-molecules when the number\n";
      err += "           of heavy atoms is equal to the number of PATTERN atoms\n";
      err += "   -n      Only print the name of the molecules\n";
      err += "   -t NUM  Print a molecule only if the PATTERN occurs NUM times inside the molecule\n";
      cerr << err << ends;
      exit(-1);
    }
  else
    {
      Pattern = argv[index++];
      if (argc - index == 1)
        FileIn  = argv[index];
    }

  ifstream ifs;
  if (useInFile && FileIn != NULL)
    {
      // Read the file
      ifs.open(FileIn);
      if (!ifs)
        {
          cerr << program_name << ": cannot read input file!" << endl;
          exit (-1);
        }
      conv.SetInStream(&ifs);
	
	
      // Find Input filetype
      if (pFormat == NULL) {
          pFormat = conv.FormatFromExt(FileIn);
          if (pFormat == NULL)
            {
              cerr << program_name << ": cannot read input format!" << endl;
              return (-1);
            }
      }
    }

  if (! conv.SetInAndOutFormats(pFormat, pFormat))
    {
      cerr << program_name << ": cannot read or write to this file format" << endl;
      return (-1);
    }

  // Match the SMART
  OBSmartsPattern sp;
  vector< vector <int> > maplist;      // list of matched atoms
  sp.Init(Pattern);

  OBMol mol;

  bool impossible_match;

  // Search for pattern
  for (c=0;;)
    {
      mol.Clear();
      conv.Read(&mol);
      if (mol.Empty())
        break;


      ////////////////////////////////////////////////////////////////
      // Do not loose time trying to match the pattern if the matching
      // is impossible.
      // It is impossible to make a full match if the number of atoms is
      // different
      if (full )
        impossible_match = (sp.NumAtoms() == mol.NumHvyAtoms()) ? false : true;
      else
        impossible_match = false;

      if (impossible_match)
        { // -> avoid useless SMART matching attempt
          if (invert)
            {
              if (!count)
                {
                  if ( name_only )
                    cout << mol.GetTitle() << endl;
                  else
                    conv.Write(&mol, &cout);
                }
              numMatching++;
            }
          continue;
        }


      ////////////////////////////////////////////////////////////////
      // perform SMART matching

      pattern_matched = sp.Match(mol);

      // the number of times the match occured may matter
      if ( ntimes )
        { // ntimes is a positive integer of requested matches
          // Here, a match mean a unique match (same set of atoms)
          // so we need to get the unique match list size

          maplist = sp.GetUMapList();

          if( maplist.size() == ntimes )
            ntimes_matched = true;
          else
            ntimes_matched = false;
        }
      else
        {  // ntimes == 0, we don't care about the number of matches
          ntimes_matched = true;
        }


      ////////////////////////////////////////////////////////////////
      // perform a set of tests to guess what to print out

      if ( pattern_matched == true && ntimes_matched == true)
        {
          if (!invert)
            {      // do something only when invert flag is off
              if (!count)
                {
                  if ( name_only )
                    cout << mol.GetTitle() << endl;
                  else
                    conv.Write(&mol, &cout);
                }
              numMatching++;
            }

        }

      else
        { // The SMART pattern do not occur as many times as requested
          if (invert)
            {       // do something only if invert flag is on
              if (!count)
                {
                  if ( name_only )
                    cout << mol.GetTitle() << endl;
                  else
                    conv.Write(&mol, &cout);
                }
              numMatching++;
            }
        }
    } // end for loop


  ////////////////////////////////////////////////////////////////
  // Only print the number of matched molecules as requested
  if (count)
    {
      cout << numMatching << endl;
    }

  return(1);
}