Example #1
0
int main(int argc,char *argv[])
{
  // turn off slow sync with C-style output (we don't use it anyway).
  std::ios::sync_with_stdio(false);
  
  cout << "# Testing SMARTS Parsing...  \n";

  std::ifstream ifs;
  if (!SafeOpen(ifs, smarts_file.c_str()))
    {
      cout << "Bail out! Cannot read " << smarts_file << endl;
      return -1; // test failed
    }

  //read in the SMARTS test patterns
  char buffer[BUFF_SIZE];
  OBSmartsPattern sp;
  unsigned int patterns = 0;
  for (;ifs.getline(buffer,BUFF_SIZE);)
    {
      if (buffer[0] == '#') // skip comment line
        continue;
      
      if (sp.Init(buffer))
        cout << "ok " << ++patterns << endl;
      else
        cout << "not ok " << ++patterns << " failed on " << buffer << endl;
    }
  ifs.close();
  // output the number of tests run
  cout << "1.." << patterns << endl;

  // Passed Test
  return 0;
}
Example #2
0
  void OBAromaticTyper::ParseLine(const char *buffer)
  {
    OBSmartsPattern *sp;
    char temp_buffer[BUFF_SIZE];

    if (buffer[0] == '#' || !*buffer) //comment and empty lines
      return;
    vector<string> vs;
    tokenize(vs,buffer);
    if (vs.empty())
      return;

    if (vs.size() == 3)
      {
        strncpy(temp_buffer,vs[0].c_str(), BUFF_SIZE - 1);
        temp_buffer[BUFF_SIZE - 1] = '\0';
        sp = new OBSmartsPattern();
        if (sp->Init(temp_buffer))
          {
            _vsp.push_back(sp);
            _verange.push_back(pair<int,int>
                               (atoi((char*)vs[1].c_str()),
                                atoi((char*)vs[2].c_str())));
          }
        else
          {
            obErrorLog.ThrowError(__FUNCTION__, " Could not parse line in aromatic typer from aromatic.txt", obInfo);
            delete sp;
            sp = NULL;
            return;
          }
      }
    else
      obErrorLog.ThrowError(__FUNCTION__, " Could not parse line in aromatic typer from aromatic.txt", obInfo);
  }
Example #3
0
int main() {  
   
     OBAtom a, b, c;
      a.SetAtomicNum(8);
      b.SetAtomicNum(6);
      c.SetAtomicNum(8);

     OBMol mol;
     mol.AddAtom(a);
     mol.AddAtom(b);
     mol.AddAtom(c);
     
     mol.AddBond(1,2,2);
     mol.AddBond(2,3,2);

      OBConversion conv;
      conv.SetOutFormat("SMI");
      cout << conv.WriteString(&mol,1) << endl;
      
     OBSmartsPattern sp;
     
     sp.Init ("C~*");
     
     sp.Match (mol,false);
     
       cout << sp.NumMatches() << endl;
       
        cout << sp.GetUMapList().size() << endl;
      
      return EXIT_SUCCESS;
  }    
  // Helper function -- handle SMARTS selections
  // Called by performAction()
  void SelectExtension::selectSMARTS(GLWidget *widget)
  {
    bool ok;
    QString pattern = QInputDialog::getText(qobject_cast<QWidget*>(parent()),
        tr("SMARTS Selection"),
        tr("SMARTS pattern to select"),
        QLineEdit::Normal,
        "", &ok);
    if (ok && !pattern.isEmpty()) {
      OBSmartsPattern smarts;
      smarts.Init(pattern.toStdString());
      OpenBabel::OBMol obmol = m_molecule->OBMol();
      smarts.Match(obmol);

      // if we have matches, select them
      if(smarts.NumMatches() != 0) {
        QList<Primitive *> matchedAtoms;

        vector< vector <int> > mapList = smarts.GetUMapList();
        vector< vector <int> >::iterator i; // a set of matching atoms
        vector<int>::iterator j; // atom ids in each match
        for (i = mapList.begin(); i != mapList.end(); ++i) {
          for (j = i->begin(); j != i->end(); ++j) {
            matchedAtoms.append(m_molecule->atom(obmol.GetAtom(*j)->GetIdx()-1));
          }
        }

        widget->clearSelected();
        widget->setSelected(matchedAtoms, true);
        widget->update();
      } // end matches
    }
    return;
  }
Example #5
0
  void patty::assign_rules(std::vector<std::string> &rules)
  {
    vector<string> vs;
    char buffer[BUFF_SIZE];
    char tmp_str[BUFF_SIZE];
    unsigned int i;
    OBSmartsPattern *sp;

    for ( i = 0 ; i < rules.size() ; i++ )
      {
        strncpy(buffer, rules[i].c_str(), BUFF_SIZE - 1); // leave space for null termination
        if (buffer[0] != '#')
          {
            tokenize(vs,buffer," \t\n");
            if (vs.size() >= 2)
              {
                strncpy(tmp_str,vs[0].c_str(), sizeof(tmp_str) - 1);
                tmp_str[sizeof(tmp_str) - 1] = '\0';
                sp = new OBSmartsPattern;
                sp->Init(tmp_str);
                _sp.push_back(sp);
                smarts.push_back(vs[0]);
                typ.push_back(vs[1]);
              }
          }
      }
  }
Example #6
0
  void patty::read_rules(const string &infile)
  {
    ifstream ifs, ifs1, *ifsP;
    vector<string> vs;
    char buffer[BUFF_SIZE];
    char tmp_str[BUFF_SIZE];
    string patty_dir;
    OBSmartsPattern *sp;

    ifs.open(infile.c_str());
    ifsP= &ifs;
    if (!ifs)
      {
        if (getenv("BABEL_DATADIR") == NULL)
          {
            stringstream errorMsg;
            errorMsg << "The BABEL_DATADIR environment variable is not defined" << endl;
            errorMsg << "Please define it so the program can find " << infile << endl;
            obErrorLog.ThrowError(__FUNCTION__, errorMsg.str(), obWarning);
            //            exit(0);
          }
        else
          patty_dir = getenv("BABEL_DATADIR");
        patty_dir += FILE_SEP_CHAR;
        patty_dir += infile;
        ifs1.open(patty_dir.c_str());
        ifsP= &ifs1;
        //     if (!ifs1)
        //    {
        //     cerr << "Could not open " << patty_dir << endl;
        //    exit(0);
        // }
      }

    if (!ifsP)
      {
        stringstream errorMsg;
        errorMsg << "Could not open " << patty_dir << endl;
        obErrorLog.ThrowError(__FUNCTION__, errorMsg.str(), obWarning);
        //        exit(0);
      }
    while (ifsP->getline(buffer,BUFF_SIZE))
      {
        if (buffer[0] != '#')
          {
            tokenize(vs,buffer," \t\n");
            if (vs.size() >= 2)
              {
                strncpy(tmp_str,vs[0].c_str(), sizeof(tmp_str) - 1);
                tmp_str[sizeof(tmp_str) - 1] = '\0';
                sp = new OBSmartsPattern;
                sp->Init(tmp_str);
                _sp.push_back(sp);
                smarts.push_back(vs[0]);
                typ.push_back(vs[1]);
              }
          }
      }
  }
Example #7
0
  void OBPhModel::ParseLine(const char *buffer)
  {
    vector<string> vs;
    OBSmartsPattern *sp;

    if (buffer[0] == '#')
      return;

    if (EQn(buffer,"TRANSFORM",7))
      {
        tokenize(vs,buffer);
        if (vs.size() < 5)
          {
            obErrorLog.ThrowError(__FUNCTION__, " Could not parse line in phmodel table from phmodel.txt", obInfo);
            return;
          }

        OBChemTsfm *tsfm = new OBChemTsfm;
        if (!tsfm->Init(vs[1],vs[3]))
          {
            delete tsfm;
            tsfm = NULL;
            obErrorLog.ThrowError(__FUNCTION__, " Could not parse line in phmodel table from phmodel.txt", obInfo);
            return;
          }

        _vtsfm.push_back(tsfm);
        _vpKa.push_back(atof(vs[4].c_str()));
      }
    else if (EQn(buffer,"SEEDCHARGE",10))
      {
        tokenize(vs,buffer);
        if (vs.size() < 2)
          {
            obErrorLog.ThrowError(__FUNCTION__, " Could not parse line in phmodel table from phmodel.txt", obInfo);
            return;
          }

        sp = new OBSmartsPattern;
        if (!sp->Init(vs[1]) || (vs.size()-2) != sp->NumAtoms())
          {
            delete sp;
            sp = NULL;
            obErrorLog.ThrowError(__FUNCTION__, " Could not parse line in phmodel table from phmodel.txt", obInfo);
            return;
          }

        vector<double> vf;
        vector<string>::iterator i;
        for (i = vs.begin()+2;i != vs.end();++i)
          vf.push_back(atof((char*)i->c_str()));

        _vschrg.push_back(pair<OBSmartsPattern*,vector<double> > (sp,vf));
      }
  }
    double Predict(OBBase* pOb, string* param=NULL)
    {
      OBMol* pmol = dynamic_cast<OBMol*> (pOb);
      if(!pmol)
        return 0;

      OBSmartsPattern sp;
      if (sp.Init(_smarts) && sp.Match(*pmol))
        return sp.GetUMapList().size();
      else
        return 0.0;
    }
Example #9
0
/** The descriptor name can be s or smarts and is case independent
    The operator to return true for a match can be:
    one or more spaces, =, ==,  or nothing if the SMARTS string
    starts with a letter.
    To return true for a mismatch the operator is !=
    A space or tab should follow the SMARTS string.
 **/      
bool SmartsFilter::Compare(OBBase* pOb, istream& optionText, bool noEval)
{
  OBMol* pmol = dynamic_cast<OBMol*> (pOb);
  if(!pmol)
    return false;
  
  string smarts;
  bool matchornegate = ReadStringFromFilter(optionText, smarts);
  if(noEval)
    return false;
  OBSmartsPattern sp;
  sp.Init(smarts);
  bool ret = sp.Match(*pmol,true);//single match  
  if(!matchornegate)
    ret = !ret;
  return ret;
}
Example #10
0
  bool GetFingerprint(OBBase* pOb, vector<unsigned int>&fp, int nbits) 
  {
    OBMol* pmol = dynamic_cast<OBMol*>(pOb);
    
   	unsigned int o=0;
   	unsigned int m=0;
   	unsigned int i=0;
   	unsigned int n=0;
    
    if(!pmol)
      return false;
    
    //Read patterns file if it has not been done already
    if(smartsStrings.empty())
      ReadPatternFile(_patternsfile, smartsStrings);

    //Make fp size the smallest power of two to contain the patterns
    //unsigned int n=Getbitsperint();
    //while(n<smartsStrings.size())n*=2;
    //fp.resize(n/Getbitsperint());
    
    fp.resize(16);

    for(n=0;n<smartsStrings.size();++n)
		{
			OBSmartsPattern sp;
			sp.Init(smartsStrings[n]);
			
			if(sp.Match(*pmol)) {
			    m=sp.GetUMapList().size();
			    //m=sp.NumMatches();
			    o=n*8;
			    for(i=0;i<8;++i) {
			           if(i<m) {SetBit(fp, o+i);
			           //cout << "1";
			       }  
          //cout << endl;  
				}				
				}				
		}

    if(nbits)
      Fold(fp, nbits);
    return true;
  };
Example #11
0
bool OBGroupContrib::ParseFile(const char *filename)
{
    OBSmartsPattern *sp;

    // open data file
    ifstream ifs;

    if (OpenDatafile(ifs, filename).length() == 0) {
        obErrorLog.ThrowError(__FUNCTION__, " Could not find contribution data file.", obError);
        return false;
    }

    vector<string> vs;
    bool heavy = false;

    char buffer[80];
    while (ifs.getline(buffer, 80)) {
        if (EQn(buffer, "#", 1)) continue;
        if (EQn(buffer, ";heavy", 6))
            heavy = true;
        else if (EQn(buffer, ";", 1)) continue;


        tokenize(vs, buffer);
        if (vs.size() < 2)
            continue;

        sp = new OBSmartsPattern;
        if (sp->Init(vs[0])) {
            if (heavy)
                _contribsHeavy.push_back(pair<OBSmartsPattern*, double> (sp, atof(vs[1].c_str())));
            else
                _contribsHydrogen.push_back(pair<OBSmartsPattern*, double> (sp, atof(vs[1].c_str())));
        } else {
            delete sp;
            sp = NULL;
            obErrorLog.ThrowError(__FUNCTION__, " Could not parse SMARTS from contribution data file", obInfo);
            return false;
        }
    }

    return true;
}
Example #12
0
void CheckSmarts::checkSmarts() {

	OBSmartsPattern smartsPattern;

	for (unsigned int i = 0; i < data->num_smarts(); i++) {

		cout << *data->get_smarts(i) << "\t[ ";

		for (unsigned int j = 0; j < data->num_smiles(); j++) {

			smartsPattern.Init(*data->get_smarts(i));

			if (smartsPattern.Match(*data->get_mol(j), true))
				cout << data->get_id(j) << " ";
		}

		cout << "]\n";
	}
}
Example #13
0
/** The descriptor name can be s or smarts and is case independent
    The operator to return true for a match can be:
    one or more spaces, =, ==,  or nothing if the SMARTS string
    starts with a letter.
    To return true for a mismatch the operator is !=
    A space or tab should follow the SMARTS string.
 **/
bool SmartsFilter::Compare(OBBase *pOb, istream &optionText, bool noEval,
                           std::string *) {
  OBMol *pmol = dynamic_cast<OBMol *>(pOb);
  if (!pmol)
    return false;

  string smarts;
  bool matchornegate = ReadStringFromFilter(optionText, smarts);
  if (noEval)
    return false;
  OBSmartsPattern sp;
  if (!sp.Init(smarts))
    return false; // can't initialize the SMARTS, so fail gracefully

  bool ret = sp.Match(*pmol, true); // single match
  if (!matchornegate)
    ret = !ret;
  return ret;
}
Example #14
0
int smartsparse(int argc, char* argv[])
{
  int defaultchoice = 1;
  
  int choice = defaultchoice;

  if (argc > 1) {
    if(sscanf(argv[1], "%d", &choice) != 1) {
      printf("Couldn't parse that input as a number\n");
      return -1;
    }
  }
  
  cout << "# Testing SMARTS Parsing...  \n";

  std::ifstream ifs;
  if (!SafeOpen(ifs, nsmarts_file.c_str()))
    {
      cout << "Bail out! Cannot read " << nsmarts_file << endl;
      return -1; // test failed
    }

  //read in the SMARTS test patterns
  char buffer[BUFF_SIZE];
  OBSmartsPattern sp;
  unsigned int patterns = 0;
  for (;ifs.getline(buffer,BUFF_SIZE);)
    {
      if (buffer[0] == '#') // skip comment line
        continue;
      
      if (sp.Init(buffer))
        cout << "ok " << ++patterns << endl;
      else
        cout << "not ok " << ++patterns << " failed on " << buffer << endl;
    }
  ifs.close();
  // output the number of tests run
  cout << "1.." << patterns << endl;

  // Passed Test
  return 0;
}
  void OBBondTyper::ParseLine(const char *buffer)
  {
    vector<string> vs;
    vector<int>    bovector;
    OBSmartsPattern *sp;

    if (buffer[0] != '#')
      {
        tokenize(vs,buffer);
        // Make sure we actually have a SMARTS pattern plus at least one triple
        // and make sure we have the correct number of integers
        if (vs.empty() || vs.size() < 4)
          return; // just ignore empty (or short lines)
        else if (!vs.empty() && vs.size() >= 4 && (vs.size() % 3 != 1))
          {
            stringstream errorMsg;
            errorMsg << " Error in OBBondTyper. Pattern is incorrect, found "
                     << vs.size() << " tokens." << endl;
            errorMsg << " Buffer is: " << buffer << endl;
            obErrorLog.ThrowError(__FUNCTION__, errorMsg.str(), obInfo);
            return;
          }

        sp = new OBSmartsPattern;
        if (sp->Init(vs[0]))
          {
            for (unsigned int i = 1; i < vs.size() ; ++i)
              {
                bovector.push_back( atoi((char *)vs[i].c_str()) );
              }
            _fgbonds.push_back(pair<OBSmartsPattern*,vector<int> >
                               (sp, bovector));
          }
        else
          {
            delete sp;
            sp = NULL;
          }
      }
  }
Example #16
0
  void OBRingTyper::ParseLine(const char *buffer)
  {
    vector<string> vs;
    OBSmartsPattern *sp;

    if (EQn(buffer,"RINGTYP",7)) {
      tokenize(vs,buffer);
      if (vs.empty() || vs.size() < 3) {
        obErrorLog.ThrowError(__FUNCTION__, " Could not parse RING line in ring type table from ringtyp.txt", obInfo);
        return;
      }
      sp = new OBSmartsPattern;
      if (sp->Init(vs[2]))
        _ringtyp.push_back(pair<OBSmartsPattern*,string> (sp,vs[1]));
      else {
        delete sp;
        sp = NULL;
        obErrorLog.ThrowError(__FUNCTION__, " Could not parse RING line in ring type table from ringtyp.txt", obInfo);
        return;
      }
    }
  }
Example #17
0
///////////////////////////////////////////////////////////////////////////////
//! \brief Find the molecule(s) with or without a given SMART pattern
int main(int argc,char **argv)
{
  char c;
  unsigned int ntimes=0; // number of times SMARTS matches in a molecule
  unsigned int numMatching = 0; // number of matching molecules (for -c flag)
  bool pattern_matched=false, ntimes_matched=true;
  bool count=false, invert=false, full=false, name_only=false;
  char *FileIn = NULL, *Pattern = NULL;
  char *program_name = argv[0];
  char *iext;
  bool useInFile = true;

  OBConversion conv(&cin,&cout);
  OBFormat *pFormat = conv.FindFormat("smi"); // default format is SMILES
    
  // Parse options
  while ((c = getopt(argc, argv, "t:nvcfi:-")) != -1)
    {
#ifdef _WIN32
	    char optopt = c;
#endif
      switch (c)
        {
        case 't': // request ntimes unique matches

          c = sscanf(optarg, "%d", &ntimes);
          if (c != 1 )
            {
              cerr << program_name << ": unable to parse -t option" << endl;
              exit (-1);
            }
          break;

        case 'i':
          iext = optarg;

          //The ID provided by the OBFormat class is used as 
          // the identifying file extension. This is a slight
          // reduction in flexibility (which is not currently used)
          pFormat = conv.FindFormat(iext);
            
          if(pFormat==NULL)
            {
              cerr << program_name << ": cannot read input format!" << endl;
              exit(-1);
            }

          break;
        case 'n': // print the molecule name only
          name_only = true;
          break;
        case 'c': // count the number of match
          count = true;
          break;
        case 'v': // match only the molecules without the pattern
          invert = true;
          break;

        case 'f':
          full = true;
          break;

        case '-':
          useInFile = false;
          break;

        case '?':
          if (isprint (optopt))
            fprintf (stderr, "Unknown option `-%c'.\n", optopt);
          else
            fprintf (stderr,
                     "Unknown option character `\\x%x'.\n",
                     optopt);
          return 1;
        }
    }
  int index = optind;

  if (argc-index != 2 && argc-index != 1)
    {
      string err = "Usage: ";
      err += program_name;
      err += " [options] \"PATTERN\" <filename>\n";
      err += "If no filename is supplied, then obgrep will use stdin instead.\n";
      err += "Options:\n";
      err += "   -v      Invert the matching, print non-matching molecules\n";
      err += "   -c      Print the number of matched molecules\n";
      err += "   -i <format> Specify the input and output format\n";
      err += "   -f      Full match, print matching-molecules when the number\n";
      err += "           of heavy atoms is equal to the number of PATTERN atoms\n";
      err += "   -n      Only print the name of the molecules\n";
      err += "   -t NUM  Print a molecule only if the PATTERN occurs NUM times inside the molecule\n";
      cerr << err << ends;
      exit(-1);
    }
  else
    {
      Pattern = argv[index++];
      if (argc - index == 1)
        FileIn  = argv[index];
    }

  ifstream ifs;
  if (useInFile && FileIn != NULL)
    {
      // Read the file
      ifs.open(FileIn);
      if (!ifs)
        {
          cerr << program_name << ": cannot read input file!" << endl;
          exit (-1);
        }
      conv.SetInStream(&ifs);
	
	
      // Find Input filetype
      if (pFormat == NULL) {
          pFormat = conv.FormatFromExt(FileIn);
          if (pFormat == NULL)
            {
              cerr << program_name << ": cannot read input format!" << endl;
              return (-1);
            }
      }
    }

  if (! conv.SetInAndOutFormats(pFormat, pFormat))
    {
      cerr << program_name << ": cannot read or write to this file format" << endl;
      return (-1);
    }

  // Match the SMART
  OBSmartsPattern sp;
  vector< vector <int> > maplist;      // list of matched atoms
  sp.Init(Pattern);

  OBMol mol;

  bool impossible_match;

  // Search for pattern
  for (c=0;;)
    {
      mol.Clear();
      conv.Read(&mol);
      if (mol.Empty())
        break;


      ////////////////////////////////////////////////////////////////
      // Do not loose time trying to match the pattern if the matching
      // is impossible.
      // It is impossible to make a full match if the number of atoms is
      // different
      if (full )
        impossible_match = (sp.NumAtoms() == mol.NumHvyAtoms()) ? false : true;
      else
        impossible_match = false;

      if (impossible_match)
        { // -> avoid useless SMART matching attempt
          if (invert)
            {
              if (!count)
                {
                  if ( name_only )
                    cout << mol.GetTitle() << endl;
                  else
                    conv.Write(&mol, &cout);
                }
              numMatching++;
            }
          continue;
        }


      ////////////////////////////////////////////////////////////////
      // perform SMART matching

      pattern_matched = sp.Match(mol);

      // the number of times the match occured may matter
      if ( ntimes )
        { // ntimes is a positive integer of requested matches
          // Here, a match mean a unique match (same set of atoms)
          // so we need to get the unique match list size

          maplist = sp.GetUMapList();

          if( maplist.size() == ntimes )
            ntimes_matched = true;
          else
            ntimes_matched = false;
        }
      else
        {  // ntimes == 0, we don't care about the number of matches
          ntimes_matched = true;
        }


      ////////////////////////////////////////////////////////////////
      // perform a set of tests to guess what to print out

      if ( pattern_matched == true && ntimes_matched == true)
        {
          if (!invert)
            {      // do something only when invert flag is off
              if (!count)
                {
                  if ( name_only )
                    cout << mol.GetTitle() << endl;
                  else
                    conv.Write(&mol, &cout);
                }
              numMatching++;
            }

        }

      else
        { // The SMART pattern do not occur as many times as requested
          if (invert)
            {       // do something only if invert flag is on
              if (!count)
                {
                  if ( name_only )
                    cout << mol.GetTitle() << endl;
                  else
                    conv.Write(&mol, &cout);
                }
              numMatching++;
            }
        }
    } // end for loop


  ////////////////////////////////////////////////////////////////
  // Only print the number of matched molecules as requested
  if (count)
    {
      cout << numMatching << endl;
    }

  return(1);
}
Example #18
0
int main(int argc, char* argv[])
{
	// Check the required number of command line arguments.
	if (argc != 5)
	{
		cout << "usr host user pwd jobs_path" << endl;
		return 0;
	}

	// Fetch command line arguments.
	const auto host = argv[1];
	const auto user = argv[2];
	const auto pwd = argv[3];
	const path jobs_path = argv[4];

	// Connect to host and authenticate user.
	DBClientConnection conn;
	{
		cout << local_time() << "Connecting to " << host << " and authenticating " << user << endl;
		string errmsg;
		if ((!conn.connect(host, errmsg)) || (!conn.auth("istar", user, pwd, errmsg)))
		{
			cerr << local_time() << errmsg << endl;
			return 1;
		}
	}

	// Initialize constants.
	cout << local_time() << "Initializing" << endl;
	const auto collection = "istar.usr";
	const auto epoch = date(1970, 1, 1);
	const size_t num_usrs = 2;
	constexpr array<size_t, num_usrs> qn{{ 12, 60 }};
	constexpr array<double, num_usrs> qv{{ 1.0 / qn[0], 1.0 / qn[1] }};
	const size_t num_references = 4;
	const size_t num_subsets = 5;
	const array<string, num_subsets> SubsetSMARTS
	{{
		"[!#1]", // heavy
		"[#6+0!$(*~[#7,#8,F]),SH0+0v2,s+0,S^3,Cl+0,Br+0,I+0]", // hydrophobic
		"[a]", // aromatic
		"[$([O,S;H1;v2]-[!$(*=[O,N,P,S])]),$([O,S;H0;v2]),$([O,S;-]),$([N&v3;H1,H2]-[!$(*=[O,N,P,S])]),$([N;v3;H0]),$([n,o,s;+0]),F]", // acceptor
		"[N!H0v3,N!H0+v4,OH+0,SH+0,nH+0]", // donor
	}};

	// Initialize variables.
	array<array<double, qn.back()>, 1> qw;
	array<array<double, qn.back()>, 1> lw;
	auto q = qw[0];
	auto l = lw[0];

	// Read ZINC ID file.
	const string_array<size_t> zincids("16_zincid.txt");
	const auto num_ligands = zincids.size();

	// Read SMILES file.
	const string_array<size_t> smileses("16_smiles.txt");
	assert(smileses.size() == num_ligands);

	// Read supplier file.
	const string_array<size_t> suppliers("16_supplier.txt");
	assert(suppliers.size() == num_ligands);

	// Read property files of floating point types and integer types.
	const auto zfproperties = read<array<float, 4>>("16_zfprop.f32");
	assert(zfproperties.size() == num_ligands);
	const auto ziproperties = read<array<int16_t, 5>>("16_ziprop.i16");
	assert(ziproperties.size() == num_ligands);

	// Open files for subsequent reading.
	std::ifstream usrcat_bin("16_usrcat.f64");
	stream_array<size_t> ligands("16_ligand.pdbqt");
	assert(ligands.size() == num_ligands);
	array<vector<double>, 2> scores
	{{
		vector<double>(num_ligands, 0),
		vector<double>(num_ligands, 0)
	}};
	const auto& u0scores = scores[0];
	const auto& u1scores = scores[1];
	vector<size_t> scase(num_ligands);

	// Enter event loop.
	cout << local_time() << "Entering event loop" << endl;
	bool sleeping = false;
	while (true)
	{
		// Fetch an incompleted job in a first-come-first-served manner.
		if (!sleeping) cout << local_time() << "Fetching an incompleted job" << endl;
		BSONObj info;
		conn.runCommand("istar", BSON("findandmodify" << "usr" << "query" << BSON("done" << BSON("$exists" << false) << "started" << BSON("$exists" << false)) << "sort" << BSON("submitted" << 1) << "update" << BSON("$set" << BSON("started" << Date_t(duration_cast<std::chrono::milliseconds>(system_clock::now().time_since_epoch()).count())))), info); // conn.findAndModify() is available since MongoDB C++ Driver legacy-1.0.0
		const auto value = info["value"];
		if (value.isNull())
		{
			// No incompleted jobs. Sleep for a while.
			if (!sleeping) cout << local_time() << "Sleeping" << endl;
			sleeping = true;
			this_thread::sleep_for(chrono::seconds(10));
			continue;
		}
		sleeping = false;
		const auto job = value.Obj();

		// Obtain job properties.
		const auto _id = job["_id"].OID();
		cout << local_time() << "Executing job " << _id.str() << endl;
		const auto job_path = jobs_path / _id.str();
		const auto format = job["format"].String();
		const auto email = job["email"].String();

		// Parse the user-supplied ligand.
		OBMol obMol;
		OBConversion obConversion;
		obConversion.SetInFormat(format.c_str());
		obConversion.ReadFile(&obMol, (job_path / ("ligand." + format)).string());
		const auto num_atoms = obMol.NumAtoms();
//		obMol.AddHydrogens(); // Adding hydrogens does not seem to affect SMARTS matching.

		// Classify subset atoms.
		array<vector<int>, num_subsets> subsets;
		for (size_t k = 0; k < num_subsets; ++k)
		{
			auto& subset = subsets[k];
			subset.reserve(num_atoms);
			OBSmartsPattern smarts;
			smarts.Init(SubsetSMARTS[k]);
			smarts.Match(obMol);
			for (const auto& map : smarts.GetMapList())
			{
				subset.push_back(map.front());
			}
		}
		const auto& subset0 = subsets.front();

		// Check user-provided ligand validity.
		if (subset0.empty())
		{
			// Record job completion time stamp.
			const auto millis_since_epoch = duration_cast<std::chrono::milliseconds>(system_clock::now().time_since_epoch()).count();
			conn.update(collection, BSON("_id" << _id), BSON("$set" << BSON("done" << Date_t(millis_since_epoch))));

			// Send error notification email.
			cout << local_time() << "Sending an error notification email to " << email << endl;
			MailMessage message;
			message.setSender("usr <*****@*****.**>");
			message.setSubject("Your usr job has failed");
			message.setContent("Description: " + job["description"].String() + "\nSubmitted: " + to_simple_string(ptime(epoch, boost::posix_time::milliseconds(job["submitted"].Date().millis))) + " UTC\nFailed: " + to_simple_string(ptime(epoch, boost::posix_time::milliseconds(millis_since_epoch))) + " UTC\nReason: failed to parse the provided ligand.");
			message.addRecipient(MailRecipient(MailRecipient::PRIMARY_RECIPIENT, email));
			SMTPClientSession session("137.189.91.190");
			session.login();
			session.sendMessage(message);
			session.close();
			continue;
		}

		// Calculate the four reference points.
		const auto n = subset0.size();
		const auto v = 1.0 / n;
		array<vector3, num_references> references{};
		auto& ctd = references[0];
		auto& cst = references[1];
		auto& fct = references[2];
		auto& ftf = references[3];
		for (const auto i : subset0)
		{
			ctd += obMol.GetAtom(i)->GetVector();
		}
		ctd *= v;
		double cst_dist = numeric_limits<double>::max();
		double fct_dist = numeric_limits<double>::lowest();
		double ftf_dist = numeric_limits<double>::lowest();
		for (const auto i : subset0)
		{
			const auto& a = obMol.GetAtom(i)->GetVector();
			const auto this_dist = a.distSq(ctd);
			if (this_dist < cst_dist)
			{
				cst = a;
				cst_dist = this_dist;
			}
			if (this_dist > fct_dist)
			{
				fct = a;
				fct_dist = this_dist;
			}
		}
		for (const auto i : subset0)
		{
			const auto& a = obMol.GetAtom(i)->GetVector();
			const auto this_dist = a.distSq(fct);
			if (this_dist > ftf_dist)
			{
				ftf = a;
				ftf_dist = this_dist;
			}
		}

		// Precalculate the distances between each atom and each reference point.
		array<vector<double>, num_references> dista;
		for (size_t k = 0; k < num_references; ++k)
		{
			const auto& reference = references[k];
			auto& dists = dista[k];
			dists.resize(1 + num_atoms); // OpenBabel atom index starts from 1. dists[0] is dummy.
			for (size_t i = 0; i < n; ++i)
			{
				dists[subset0[i]] = sqrt(obMol.GetAtom(subset0[i])->GetVector().distSq(reference));
			}
		}

		// Calculate USR and USRCAT features of the input ligand.
		size_t qo = 0;
		for (const auto& subset : subsets)
		{
			const auto n = subset.size();
			for (size_t k = 0; k < num_references; ++k)
			{
				const auto& distp = dista[k];
				vector<double> dists(n);
				for (size_t i = 0; i < n; ++i)
				{
					dists[i] = distp[subset[i]];
				}
				array<double, 3> m{};
				if (n > 2)
				{
					const auto v = 1.0 / n;
					for (size_t i = 0; i < n; ++i)
					{
						const auto d = dists[i];
						m[0] += d;
					}
					m[0] *= v;
					for (size_t i = 0; i < n; ++i)
					{
						const auto d = dists[i] - m[0];
						m[1] += d * d;
					}
					m[1] = sqrt(m[1] * v);
					for (size_t i = 0; i < n; ++i)
					{
						const auto d = dists[i] - m[0];
						m[2] += d * d * d;
					}
					m[2] = cbrt(m[2] * v);
				}
				else if (n == 2)
				{
					m[0] = 0.5 *     (dists[0] + dists[1]);
					m[1] = 0.5 * fabs(dists[0] - dists[1]);
				}
				else if (n == 1)
				{
					m[0] = dists[0];
				}
				#pragma unroll
				for (const auto e : m)
				{
					q[qo++] = e;
				}
			}
		}
		assert(qo == qn.back());

		// Compute USR and USRCAT scores.
		usrcat_bin.seekg(0);
		for (size_t k = 0; k < num_ligands; ++k)
		{
			usrcat_bin.read(reinterpret_cast<char*>(l.data()), sizeof(l));
			double s = 0;
			#pragma unroll
			for (size_t i = 0, u = 0; u < num_usrs; ++u)
			{
				#pragma unroll
				for (const auto qnu = qn[u]; i < qnu; ++i)
				{
					s += fabs(q[i] - l[i]);
				}
				scores[u][k] = s;
			}
		}
		assert(usrcat_bin.tellg() == sizeof(l) * num_ligands);

		// Sort ligands by USRCAT score and then by USR score and then by ZINC ID.
		iota(scase.begin(), scase.end(), 0);
		sort(scase.begin(), scase.end(), [&](const size_t val0, const size_t val1)
		{
			const auto u1score0 = u1scores[val0];
			const auto u1score1 = u1scores[val1];
			if (u1score0 == u1score1)
			{
				const auto u0score0 = u0scores[val0];
				const auto u0score1 = u0scores[val1];
				if (u0score0 == u0score1)
				{
					return zincids[val0] < zincids[val1];
				}
				return u0score0 < u0score1;
			}
			return u1score0 < u1score1;
		});

		// Write results.
		filtering_ostream log_csv_gz;
		log_csv_gz.push(gzip_compressor());
		log_csv_gz.push(file_sink((job_path / "log.csv.gz").string()));
		log_csv_gz.setf(ios::fixed, ios::floatfield);
		log_csv_gz << "ZINC ID,USR score,USRCAT score\n" << setprecision(8);
		filtering_ostream ligands_pdbqt_gz;
		ligands_pdbqt_gz.push(gzip_compressor());
		ligands_pdbqt_gz.push(file_sink((job_path / "ligands.pdbqt.gz").string()));
		ligands_pdbqt_gz.setf(ios::fixed, ios::floatfield);
		for (size_t t = 0; t < 10000; ++t)
		{
			const size_t k = scase[t];
			const auto zincid = zincids[k].substr(0, 8); // Take another substr() to get rid of the trailing newline.
			const auto u0score = 1 / (1 + scores[0][k] * qv[0]);
			const auto u1score = 1 / (1 + scores[1][k] * qv[1]);
			log_csv_gz << zincid << ',' << u0score << ',' << u1score << '\n';

			// Only write conformations of the top ligands to ligands.pdbqt.gz.
			if (t >= 1000) continue;

			const auto zfp = zfproperties[k];
			const auto zip = ziproperties[k];
			ligands_pdbqt_gz
				<< "MODEL " << '\n'
				<< "REMARK 911 " << zincid
				<< setprecision(3)
				<< ' ' << setw(8) << zfp[0]
				<< ' ' << setw(8) << zfp[1]
				<< ' ' << setw(8) << zfp[2]
				<< ' ' << setw(8) << zfp[3]
				<< ' ' << setw(3) << zip[0]
				<< ' ' << setw(3) << zip[1]
				<< ' ' << setw(3) << zip[2]
				<< ' ' << setw(3) << zip[3]
				<< ' ' << setw(3) << zip[4]
				<< '\n'
				<< "REMARK 912 " << smileses[k]  // A newline is already included in smileses[k].
				<< "REMARK 913 " << suppliers[k] // A newline is already included in suppliers[k].
				<< setprecision(8)
				<< "REMARK 951    USR SCORE: " << setw(10) << u0score << '\n'
				<< "REMARK 952 USRCAT SCORE: " << setw(10) << u1score << '\n'
			;
			const auto lig = ligands[k];
			ligands_pdbqt_gz.write(lig.data(), lig.size());
			ligands_pdbqt_gz << "ENDMDL\n";
		}

		// Update progress.
		cout << local_time() << "Setting done time" << endl;
		const auto millis_since_epoch = duration_cast<std::chrono::milliseconds>(system_clock::now().time_since_epoch()).count();
		conn.update(collection, BSON("_id" << _id), BSON("$set" << BSON("done" << Date_t(millis_since_epoch))));

		// Send completion notification email.
		cout << local_time() << "Sending a completion notification email to " << email << endl;
		MailMessage message;
		message.setSender("istar <*****@*****.**>");
		message.setSubject("Your usr job has completed");
		message.setContent("Description: " + job["description"].String() + "\nSubmitted: " + to_simple_string(ptime(epoch, boost::posix_time::milliseconds(job["submitted"].Date().millis))) + " UTC\nCompleted: " + to_simple_string(ptime(epoch, boost::posix_time::milliseconds(millis_since_epoch))) + " UTC\nResult: http://istar.cse.cuhk.edu.hk/usr/iview/?" + _id.str());
		message.addRecipient(MailRecipient(MailRecipient::PRIMARY_RECIPIENT, email));
		SMTPClientSession session("137.189.91.190");
		session.login();
		session.sendMessage(message);
		session.close();
	}
}
  void OBBondTyper::AssignFunctionalGroupBonds(OBMol &mol)
  {
    if (!_init)
      Init();

    OBSmartsPattern *currentPattern;
    OBBond *b1, *b2;
    OBAtom *a1,*a2, *a3;
    double angle, dist1, dist2;
    vector<int> assignments;
    vector<vector<int> > mlist;
    vector<vector<int> >::iterator matches, l;
    vector<pair<OBSmartsPattern*, vector<int> > >::iterator i;
    unsigned int j;

    // Loop through for all the functional groups and assign bond orders
    for (i = _fgbonds.begin();i != _fgbonds.end();++i)
      {
        currentPattern = i->first;
        assignments = i->second;

        if (currentPattern && currentPattern->Match(mol))
          {
            mlist = currentPattern->GetUMapList();
            for (matches = mlist.begin(); matches != mlist.end(); ++matches)
              {
                // Now loop through the bonds to assign from _fgbonds
                for (j = 0; j < assignments.size(); j += 3)
                  {
                    // along the assignments vector: atomID1 atomID2 bondOrder
                    a1 = mol.GetAtom((*matches)[ assignments[j] ]);
                    a2 = mol.GetAtom((*matches)[ assignments[j+1 ] ]);
                    if (!a1 || !a2) continue;

                    b1 = a1->GetBond(a2);

                    if (!b1) continue;
                    b1->SetBO(assignments[j+2]);
                  } // bond order assignments
              } // each match
          } // current pattern matches

      } // for(functional groups)

    // FG with distance and/or bond criteria
    // Carbonyl oxygen C=O
    OBSmartsPattern carbo; carbo.Init("[#8D1][#6](*)(*)");

    if (carbo.Match(mol))
      {
        mlist = carbo.GetUMapList();
        for (l = mlist.begin(); l != mlist.end(); ++l)
          {
            a1 = mol.GetAtom((*l)[0]);
            a2 = mol.GetAtom((*l)[1]);

            angle = a2->AverageBondAngle();
            dist1 = a1->GetDistance(a2);

            // carbonyl geometries ?
            if (angle > 115 && angle < 150 && dist1 < 1.28) {

              if ( !a1->HasDoubleBond() ) {// no double bond already assigned
                b1 = a1->GetBond(a2);

                if (!b1 ) continue;
                b1->SetBO(2);
              }
            }
          }
      } // Carbonyl oxygen

    // thione C=S
    OBSmartsPattern thione; thione.Init("[#16D1][#6](*)(*)");

    if (thione.Match(mol))
      {
        mlist = thione.GetUMapList();
        for (l = mlist.begin(); l != mlist.end(); ++l)
          {
            a1 = mol.GetAtom((*l)[0]);
            a2 = mol.GetAtom((*l)[1]);

            angle = a2->AverageBondAngle();
            dist1 = a1->GetDistance(a2);

            // thione geometries ?
            if (angle > 115 && angle < 150 && dist1 < 1.72) {

              if ( !a1->HasDoubleBond() ) {// no double bond already assigned
                b1 = a1->GetBond(a2);

                if (!b1 ) continue;
                b1->SetBO(2);
              }
            }
          }
      } // thione

    // Isocyanate N=C=O or Isothiocyanate
    bool dist1OK;
    OBSmartsPattern isocyanate; isocyanate.Init("[#8,#16;D1][#6D2][#7D2]");
    if (isocyanate.Match(mol))
      {
        mlist = isocyanate.GetUMapList();
        for (l = mlist.begin(); l != mlist.end(); ++l)
          {
            a1 = mol.GetAtom((*l)[0]);
            a2 = mol.GetAtom((*l)[1]);
            a3 = mol.GetAtom((*l)[2]);

            angle = a2->AverageBondAngle();
            dist1 = a1->GetDistance(a2);
            dist2 = a2->GetDistance(a3);

            // isocyanate geometry or Isotiocyanate geometry ?
            if (a1->IsOxygen())
              dist1OK =  dist1 < 1.28;
            else
              dist1OK =  dist1 < 1.72;

            if (angle > 150 && dist1OK && dist2 < 1.34) {

              b1 = a1->GetBond(a2);
              b2 = a2->GetBond(a3);
              if (!b1 || !b2) continue;
              b1->SetBO(2);
              b2->SetBO(2);

            }

          }
      } // Isocyanate

    // oxime C=S
    OBSmartsPattern oxime; oxime.Init("[#6D3][#7D2][#8D2]");

    if (oxime.Match(mol))
      {
        mlist = oxime.GetUMapList();
        for (l = mlist.begin(); l != mlist.end(); ++l)
          {
            a1 = mol.GetAtom((*l)[0]);
            a2 = mol.GetAtom((*l)[1]);

            angle = a2->AverageBondAngle();
            dist1 = a1->GetDistance(a2);

            // thione geometries ?
            if (angle > 110 && angle < 150 && dist1 < 1.4) {

              if ( !a1->HasDoubleBond() ) {// no double bond already assigned
                b1 = a1->GetBond(a2);

                if (!b1 ) continue;
                b1->SetBO(2);
              }
            }
          }
      } // oxime

  }
Example #20
0
bool OpNewS::Do(OBBase* pOb, const char* OptionText, OpMap* pmap, OBConversion* pConv)
{
  OBMol* pmol = dynamic_cast<OBMol*>(pOb);
  if(!pmol)
    return false;

  // The SMARTS and any other parameters are extracted on the first molecule
  // and stored in the static variables vec, inv. The parameter is cleared so that:
  // (a) the original -s option in transform.cpp is inactive, and
  // (b) the parsing does not have to be done again for multi-molecule files

  string txt(pmap->find(GetID())->second); // ID can be "s" or "v"
  static vector<string> vec;
  static bool inv;
  static int nPatternAtoms; //non-zero for exact matches
  static OBQuery* query;
  static vector<OBQuery*> queries;
  vector<OBQuery*>::iterator qiter;
  if(!txt.empty())
  {
    //Set up on first call
    tokenize(vec, txt);
    inv = GetID()[0]=='v';
    if(vec[0][0]=='~')
    {
      inv = true;
      vec[0].erase(0,1);
    }

    //Interpret as a filename if possible
    MakeQueriesFromMolInFile(queries, vec[0], &nPatternAtoms);

    if(vec.size()>1 && vec[1]=="exact")
    {
      if(queries.empty())
      {
        //Convert SMARTS to SMILES to count number of atoms
        OBConversion conv;
        OBMol patmol;
        if(!conv.SetInFormat("smi") || !conv.ReadString(&patmol, vec[0]))
        {
          obErrorLog.ThrowError(__FUNCTION__, "Cannot read the parameter of -s option, "
          "which has to be valid SMILES when the exact option is used.", obError, onceOnly);
          delete pmol;
          pConv->SetOneObjectOnly(); //stop conversion
          return false;
        }
        nPatternAtoms = patmol.NumHvyAtoms();
      }
    }
    else
      nPatternAtoms = 0;
    
    //disable old versions
    pConv->AddOption(GetID(), OBConversion::GENOPTIONS, "");
  }

  bool match;
  //These are a vector of each mapping, each containing atom indxs.
  vector<vector<int> > vecatomvec;
  vector<vector<int> >* pMappedAtoms = NULL;
  OBSmartsPattern sp;

  if(nPatternAtoms)
    if(pmol->NumHvyAtoms() != nPatternAtoms)
      return false;

  int imol=0; //index of mol in pattern file
  if(!queries.empty()) //filename supplied
  {
    //match is set true if any of the structures match - OR behaviour
    for(qiter=queries.begin();qiter!=queries.end();++qiter, ++imol) 
    {
      OBIsomorphismMapper* mapper = OBIsomorphismMapper::GetInstance(*qiter);
      OBIsomorphismMapper::Mappings mappings;
      mapper->MapUnique(pmol, mappings);
      if( (match = !mappings.empty()) ) // extra parens to indicate truth value
      {
        OBIsomorphismMapper::Mappings::iterator ita;
        OBIsomorphismMapper::Mapping::iterator itb;
        for(ita=mappings.begin(); ita!=mappings.end();++ita)//each mapping
        {
          vector<int> atomvec;
          for(itb=ita->begin(); itb!=ita->end();++itb)//each atom index
            atomvec.push_back(itb->second+1);
          vecatomvec.push_back(atomvec);
          atomvec.clear();
        }
        pMappedAtoms = &vecatomvec;
        break;
      }
    }
  }
  else //SMARTS supplied
  {
    if(!sp.Init(vec[0]))
    {
      string msg = vec[0] + " cannot be interpreted as either valid SMARTS "
        "or the name of a file with an extension known to OpenBabel "
        "that contains one or more pattern molecules.";
      obErrorLog.ThrowError(__FUNCTION__, msg, obError, onceOnly);
      delete pmol;
      pmol = NULL;
      pConv->SetOneObjectOnly(); //stop conversion
      return false;
    }

    if( (match = sp.Match(*pmol)) ) // extra parens to indicate truth value
      pMappedAtoms = &sp.GetMapList();
  }

  if((!match && !inv) || (match && inv))
  {
    //delete a non-matching mol
    delete pmol;
    pmol = NULL;
    return false;
  }

  if(!inv && vec.size()>=2 && !vec[1].empty() && !nPatternAtoms)
  {
    vector<vector<int> >::iterator iter;

    if(vec[1]=="extract")
    {
      //Delete all unmatched atoms. Use only the first match
      ExtractSubstruct(pmol, *pMappedAtoms->begin());
      return true;
    }

    // color the substructure if there is a second parameter which is not "exact" or "extract"
    // with multiple color parameters use the one corresponding to the query molecule, or the last
    if(imol>vec.size()-2)
      imol = vec.size()-2;
    for(iter=pMappedAtoms->begin();iter!=pMappedAtoms->end();++iter)//each match
       AddDataToSubstruct(pmol, *iter, "color", vec[imol+1]);
    return true;
  }

  if(pConv && pConv->IsLast())
  {
    for(qiter=queries.begin();qiter!=queries.end();++qiter)
      delete *qiter;
    queries.clear();
  }
  return true;
}
Example #21
0
int main(int argc, char* argv[])
{
	const array<string, 5> SmartsPatterns =
	{
		"[!#1]", // heavy
		"[#6+0!$(*~[#7,#8,F]),SH0+0v2,s+0,S^3,Cl+0,Br+0,I+0]", // hydrophobic
		"[a]", // aromatic
		"[$([O,S;H1;v2]-[!$(*=[O,N,P,S])]),$([O,S;H0;v2]),$([O,S;-]),$([N&v3;H1,H2]-[!$(*=[O,N,P,S])]),$([N;v3;H0]),$([n,o,s;+0]),F]", // acceptor
		"[N!H0v3,N!H0+v4,OH+0,SH+0,nH+0]", // donor
	};
	OBConversion obConversion;
	obConversion.SetInFormat("pdbqt");
	while (true)
	{
		vector<array<double, 3>> atoms;
		atoms.reserve(80);
		stringstream ss;
		for (string line; getline(cin, line);)
		{
			ss << line << endl;
			const auto record = line.substr(0, 6);
			if (record == "TORSDO") break;
			if (record != "ATOM  " && record != "HETATM") continue;
			atoms.push_back({ stod(line.substr(30, 8)), stod(line.substr(38, 8)), stod(line.substr(46, 8)) });
		}
		if (atoms.empty()) break;
		OBMol obMol;
		obConversion.Read(&obMol, &ss);
		array<vector<int>, 5> subsets;
		for (size_t k = 0; k < 5; ++k)
		{
			auto& subset = subsets[k];
			subset.reserve(atoms.size());
			OBSmartsPattern smarts;
			smarts.Init(SmartsPatterns[k]);
			smarts.Match(obMol);
			for (const auto& map : smarts.GetMapList())
			{
				subset.push_back(map.front() - 1);
			}
		}
		const auto& subset0 = subsets.front();
		const auto n = subset0.size();
		const auto v = 1.0 / n;
		array<double, 3> ctd{};
		array<double, 3> cst{};
		array<double, 3> fct{};
		array<double, 3> ftf{};
		for (size_t k = 0; k < 3; ++k)
		{
			for (const auto i : subset0)
			{
				const auto& a = atoms[i];
				ctd[k] += a[k];
			}
			ctd[k] *= v;
		}
		double cst_dist = numeric_limits<double>::max();
		double fct_dist = numeric_limits<double>::lowest();
		double ftf_dist = numeric_limits<double>::lowest();
		for (const auto i : subset0)
		{
			const auto& a = atoms[i];
			const auto this_dist = dist2(a, ctd);
			if (this_dist < cst_dist)
			{
				cst = a;
				cst_dist = this_dist;
			}
			if (this_dist > fct_dist)
			{
				fct = a;
				fct_dist = this_dist;
			}
		}
		for (const auto i : subset0)
		{
			const auto& a = atoms[i];
			const auto this_dist = dist2(a, fct);
			if (this_dist > ftf_dist)
			{
				ftf = a;
				ftf_dist = this_dist;
			}
		}
		for (const auto& subset : subsets)
		{
			const auto n = subset.size();
			const auto v = 1.0 / n;
			for (const auto& rpt : { ctd, cst, fct, ftf })
			{
				vector<double> dists(n);
				for (size_t i = 0; i < n; ++i)
				{
					dists[i] = sqrt(dist2(atoms[subset[i]], rpt));
				}
				array<double, 3> m{};
				for (size_t i = 0; i < n; ++i)
				{
					const auto d = dists[i];
					m[0] += d;
				}
				m[0] *= v;
				for (size_t i = 0; i < n; ++i)
				{
					const auto d = dists[i] - m[0];
					m[1] += d * d;
				}
				m[1] = sqrt(m[1] * v);
				for (size_t i = 0; i < n; ++i)
				{
					const auto d = dists[i] - m[0];
					m[2] += d * d * d;
				}
				m[2] = cbrt(m[2] * v);
				cout.write(reinterpret_cast<char*>(m.data()), sizeof(m));
			}
		}
	}
}
Example #22
0
int main(int argc,char *argv[])
{
  // turn off slow sync with C-style output (we don't use it anyway).
  std::ios::sync_with_stdio(false);

  // Define location of file formats for testing
  #ifdef FORMATDIR
    char env[BUFF_SIZE];
    snprintf(env, BUFF_SIZE, "BABEL_LIBDIR=%s", FORMATDIR);
    putenv(env);
  #endif

  if (argc != 1)
    {
      if (strncmp(argv[1], "-g", 2))
        {
          cout << "Usage: smartstest\n";
          cout << "   Tests Open Babel SMILES/SMARTS pattern matching." << endl;
          return 0;
        }
      else
        {
          GenerateSmartsReference();
          return 0;
        }
    }
  
  cout << endl << "# Testing SMARTS...  \n";

  std::ifstream ifs;
  if (!SafeOpen(ifs, smarts_file.c_str()))
    {
      cout << "Bail out! Cannot read " << smarts_file << endl;
      return -1; // test failed
    }

  //read in the SMARTS test patterns
  char buffer[BUFF_SIZE];
  vector<OBSmartsPattern*> vsp;
  for (;ifs.getline(buffer,BUFF_SIZE);)
    {
      if (buffer[0] == '#') // skip comment line
        continue;

      OBSmartsPattern *sp = new OBSmartsPattern;

      if (sp->Init(buffer))
        vsp.push_back(sp);
      else
        delete sp;
    }
  ifs.close();

  std::ifstream rifs;
  if (!SafeOpen(rifs, results_file.c_str()))
    {
      cout << "Bail out! Cannot read in results file " << results_file << endl;
      return -1; // test failed
    }
  unsigned int npats;
  rifs.getline(buffer,BUFF_SIZE);
  sscanf(buffer,"%d %*s",&npats);

  //make sure the number of SMARTS patterns is the same as in the
  //reference data
  if (npats != vsp.size())
    {
      cout << "Bail out! Correct number of patterns not read in" <<
        "Read in " << vsp.size() << " expected " << npats << endl;
      return -1; // test failed
    }

  std::ifstream mifs;
  if (!SafeOpen(mifs, smilestypes_file.c_str()))
    {
      cout << "Bail out! Cannot read atom types " << smilestypes_file << endl;
      return -1; // test failed
    }

  unsigned int k;
  unsigned int res_line = 0;
  OBMol mol;
  vector<string> vs;
  vector<OBSmartsPattern*>::iterator i;
  vector<vector<int> > mlist;
  unsigned int currentMol = 0; // each molecule is a separate test
  bool molPassed = true;

  OBConversion conv(&mifs, &cout);
  if (! conv.SetInAndOutFormats("SMI","SMI"))
    {
      cout << "Bail out! SMILES format is not loaded" << endl;
      return -1;
    }

  //read in molecules, match SMARTS, and compare results to reference data
  for (;mifs;)
    {
      mol.Clear();
      conv.Read(&mol);
      if (mol.Empty())
        continue;

      currentMol++;
      molPassed = true;

      for (i = vsp.begin();i != vsp.end();i++)
        {
          if (!rifs.getline(buffer,BUFF_SIZE))
            {
              cout << "Bail out! Error reading reference data" << endl;
              return -1; // test failed
            }
          res_line++;

          tokenize(vs,buffer);
          (*i)->Match(mol);
          mlist = (*i)->GetMapList();
          if (mlist.size() != vs.size())
            {
              cout << "not ok " << currentMol
                   << " # number of matches different than reference\n";
              cout << "# Expected " << vs.size() << " matches, found "
                   << mlist.size() << "\n";
              cout << "# Error with molecule " << mol.GetTitle();
              cout << "#  on pattern " << (*i)->GetSMARTS() << "\n";
              if (mlist.size())
                cout << "# First match: atom #" << mlist[0][0] << "\n";
              molPassed = false;
              break;
            }

          if (mlist.size())
            {
              for (k = 0;k < vs.size();k++)
                {
                  if (atoi(vs[k].c_str()) != mlist[k][0])
                    {
                      cout << "not ok " << currentMol
                           << "# matching atom numbers different than reference\n";
                      cout << "# Expected " << vs[k] << " but found "
                           << mlist[k][0] << "\n";
                      cout << "# Molecule: " << mol.GetTitle() << "\n";
                      cout << "# Pattern: " << (*i)->GetSMARTS() << "\n";
                      molPassed = false;
                      break;
                    }
                }
              if (k != vs.size())
                {
                  molPassed = false;
                  break;
                }
            }
        }
      if (molPassed)
        cout << "ok " << currentMol << " # molecule passed tests\n";
    }

  // output the number of tests run
  cout << "1.." << currentMol << endl;

  // Passed Test
  return 0;
}
Example #23
0
void GenerateSmartsReference()
{
  std::ifstream ifs;
  if (!SafeOpen(ifs,smarts_file.c_str()))
    return;

  char buffer[BUFF_SIZE];
  vector<OBSmartsPattern*> vsp;
  for (;ifs.getline(buffer,BUFF_SIZE);)
    {
      if (buffer[0] == '#') // skip comment line
        continue;

      OBSmartsPattern *sp = new OBSmartsPattern;

      if (sp->Init(buffer))
        vsp.push_back(sp);
      else
        delete sp;
    }

  std::ofstream ofs;
  if (!SafeOpen(ofs, results_file.c_str()))
    return;

  ofs << vsp.size() << " patterns" << endl;
  std::ifstream mifs;
  if (!SafeOpen(mifs, smilestypes_file.c_str()))
    return;

  vector<int> vm;
  vector<vector<int> > mlist;
  vector<vector<int> >::iterator j;
  vector<OBSmartsPattern*>::iterator i;
  OBMol mol;
  OBConversion conv(&mifs, &cout);

  if(! conv.SetInAndOutFormats("SMI","SMI"))
    {
      cerr << "SMILES format is not loaded" << endl;
      return;
    }

  for (;mifs;)
    {
      mol.Clear();
      conv.Read(&mol);

      if (mol.Empty())
        continue;
      for (i = vsp.begin();i != vsp.end();i++)
        {
          (*i)->Match(mol);
          mlist = (*i)->GetMapList();
          for (j = mlist.begin();j != mlist.end();j++)
            {
              sprintf(buffer,"%3d",*(j->begin()));
              ofs << buffer;
            }
          ofs << endl;
        }
    }


  cerr << " SMARTS test results written successfully" << endl;
  return;
}
Example #24
0
///////////////////////////////////////////////////////////////////////////////
//! \brief Set a tortional bond to a given angle
int main(int argc,char **argv)
{
  const char *Pattern=NULL;
  unsigned int i, t, errflg = 0;
  int c;
  char flags[255];
  string err;
  bool graphOutput=false;

  // parse the command line -- optional -a flag to change all matching torsions
  if (argc < 3 || argc > 4) {
    errflg++;
  } else {
    FileIn = argv[1];
    Pattern = "[!$(*#*)&!D1]-!@[!$(*#*)&!D1]";
    // Read the atom position
    c = sscanf(argv[2], "%d", &angleSum);
	angle = 360./angleSum;
    if (argc == 4)
	{
    		c = sscanf(argv[3], "%s", flags);
		int flagid=1;
    		while (flags[flagid]!=0)
			switch (flags[flagid++])
			{
			case 'g':
				graphOutput=true;
			case 'e':
				forceField=OBForceField::FindForceField("MMFF94");
				isEnergyCalcing=true;
				break;
    			}
 	}
  }
  if (errflg) {
    cerr << "Usage: rkrotate <filename> <angle> [options]" << endl;
    exit(-1);
  }

  // create pattern
  OBSmartsPattern sp;
  sp.Init(Pattern);

  OBFormat* format = conv.FormatFromExt(FileIn);
  if(!(format && conv.SetInAndOutFormats(format, format))) { //in and out formats same
    cerr << "obrotate: cannot read and/or write this file format!" << endl;
    exit (-1);
  } //...NF

  //Open the molecule file
  ifstream ifs;

  // Read the file
  ifs.open(FileIn);
  if (!ifs) {
    cerr << "obrotate: cannot read input file!" << endl;
    exit (-1);
  }

  OBMol mol;
  vector< vector <int> > maplist;      // list of matched atoms
//  vector< vector <int> >::iterator m;  // and its iterators
  //   int tindex;
  
  // Set the angles
  for (;;) {
    mol.Clear();
    //NF      ifs >> mol;                   // Read molecule
    conv.Read(&mol,&ifs); //NF
    if (mol.Empty())
      break;

    if (sp.Match(mol)) {          
      // if match perform rotation
      maplist = sp.GetUMapList(); // get unique matches
      
      if (maplist.size() > 1)
        cerr << "obrotate: Found " << maplist.size() << " matches." << endl;

	energySheet=new MultiVector<double>(degrees=maplist.size(),angleSum);
	indexSheet=new int[maplist.size()];

      for (int EXO=0;EXO<maplist.size();++EXO)
	totalSum*=angleSum+EXO;
      // look at all the mapping atom but save only the last one.
	turnMol(mol,maplist,maplist.size()-1);
	
      if (graphOutput)
      {
	ofstream ofs("energyGraph.mlog");
	int ind[degrees];
	for (int i=0;i<degrees;++i)
		ind[i]=0;
	do
	{
		for (int i=0;i<degrees;++i)
			ofs<<ind[i]<<'\t';
		ofs<<energySheet->getVectorValue(ind)<<endl;
	}
	while(energySheet->incressIndex(ind));
      }

	if (isEnergyCalcing)
	{
		std::vector<int*> lowEnergySheet;
		totalSum=energySheet->getMinValues(lowEnergySheet);
		if (totalSum)
			outputMol(lowEnergySheet,mol,maplist,maplist.size()-1);
		else
			cerr << "rkrotate: No low energy conformation found." << endl;
	}

	cout << sum;
    } else {
      cerr << "obrotate: Found 0 matches for the SMARTS pattern." << endl;
      exit(-1);
    }
    //NF      cout << mol;
  }

  return(0);
}
Example #25
0
int main(int argc,char *argv[])
{
  // turn off slow sync with C-style output (we don't use it anyway).
  std::ios::sync_with_stdio(false);

  if (argc != 1)
    {
      cout << "Usage: smilesmatch\n";
      cout << "   Tests Open Babel SMILES/SMARTS pattern matching." << endl;
      return 0;
    }
  
  cout << endl << "# Testing SMILES self-matching using SMARTS...  \n";
  
  std::ifstream mifs;
  if (!SafeOpen(mifs, smilestypes_file.c_str()))
    {
      cout << "Bail out! Cannot read test data " << smilestypes_file << endl;
      return -1; // test failed
    }

  OBConversion conv(&mifs, &cout);
  if (! conv.SetInAndOutFormats("SMI","SMI"))
    {
      cout << "Bail out! SMILES format is not loaded" << endl;
      return -1;
    }

  unsigned int currentMol = 0;
  OBSmartsPattern smarts;
  OBMol mol;
  string buffer;

  //read in molecules and see if their SMARTS matches themselves
  while (getline(mifs, buffer))
    {
      mol.Clear();
      conv.ReadString(&mol, buffer);
      if (mol.Empty())
        continue;

      // trim off any title, etc.
      string::size_type pos = buffer.find_first_of(" \t\n\r");
      if (pos != string::npos)
        buffer.erase(pos);

      pos = buffer.find_first_of('.');
      if (pos != string::npos)
        continue;

      smarts.Init(buffer);
      if (smarts.Match(mol))
        cout << "ok " << ++currentMol << " # SMARTS matched the"
             << " SMILES molecule\n";
      else
        cout << "not ok " << ++currentMol << " # SMARTS did not match"
             << " for molecule " << buffer << "\n";
    }

  // output the number of tests run
  cout << "1.." << currentMol << endl;

  // Passed Test
  return 0;
}
Example #26
0
  OBBase* OBMol::DoTransformations(const std::map<std::string, std::string>* pOptions, OBConversion* pConv)
  {
    // Perform any requested transformations
    // on a OBMol
    //The input map has option letters or name as the key and
    //any associated text as the value.
    //For normal(non-filter) transforms:
    // returns a pointer to the OBMol (this) if ok or NULL if not.
    //For filters returns a pointer to the OBMol (this) if there is a  match,
    //and NULL when not and in addition the OBMol object is deleted NULL.

    //This is now a virtual function. The OBBase version just returns the OBMol pointer.
    //This is declared in mol.h

    //The filter options, s and v allow a obgrep facility.
    //Used together they must both be true to allow a molecule through.

    //Parse GeneralOptions
    if(pOptions->empty())
      return this;

    // DoOps calls Do() for each of the plugin options in the map
    // It normally returns true, even if there are no options but
    // can return false if one of the options decides that the
    // molecule should not be output. If it is a filtering op, it
    // should delete the molecule itself (unlike the -s, --filter options,
    // which delete it in this function).
    if(!OBOp::DoOps(this, pOptions, pConv))
      return (OBBase *)NULL;

    bool ret=true;

    map<string,string>::const_iterator itr, itr2;

    if(pOptions->find("b")!=pOptions->end())
      if(!ConvertDativeBonds())
        ret=false;

    if(pOptions->find("d")!=pOptions->end())
      if(!DeleteHydrogens())
        ret=false;

    if(pOptions->find("h")!=pOptions->end())
      if(!AddHydrogens(false, false))
        ret=false;

    if(pOptions->find("r")!=pOptions->end()) {
      StripSalts();
      ret = true;
    }

    itr = pOptions->find("p");
    if(itr!=pOptions->end()) {
      double pH = strtod(itr->second.c_str(), 0);
      if(!AddHydrogens(false, true, pH))
        ret=false;
    }

    if(pOptions->find("c")!=pOptions->end())
      Center();

    itr = pOptions->find("title"); //Replaces title
    if(itr!=pOptions->end())
      SetTitle(itr->second.c_str());

    itr = pOptions->find("addtotitle"); //Appends text to title
    if(itr!=pOptions->end())
      {
        string title(GetTitle());
        title += itr->second;
        SetTitle(title.c_str());
      }

/*    itr = pOptions->find("addformula"); //Appends tab + formula to title
    if(itr!=pOptions->end())
      {
        string title(GetTitle());
        title += '\t' + GetSpacedFormula(1,"");//actually unspaced
        SetTitle(title.c_str());
      }
*/
    //Add an extra property to the molecule.
    //Parameter has atrribute and value separated by a space
    itr = pOptions->find("property");
    if(itr!=pOptions->end())
      {
        string txt(itr->second);
        string::size_type pos = txt.find(' ');
        if(pos==string::npos)
          {
            obErrorLog.ThrowError(__FUNCTION__, "Missing property value", obError);
            ret=false;
          }
        else
          {
            string attr(txt.substr(0,pos)), val(txt.substr(pos+1));
            //Update value if it already exists
            OBPairData* dp = dynamic_cast<OBPairData*>(GetData(attr));
            if(dp) {
              dp->SetValue(val);
              dp->SetOrigin(userInput);
            }
            else {
              // Pair did not exist; make new one
              dp = new OBPairData;
              dp->SetAttribute(attr);
              dp->SetValue(val);
              dp->SetOrigin(userInput);
              SetData(dp);
            }
          }
      }

    itr = pOptions->find("add");  //adds new properties from descriptors in list
    if(itr!=pOptions->end())
      OBDescriptor::AddProperties(this, itr->second);

    itr = pOptions->find("delete"); //deletes the specified properties
    if(itr!=pOptions->end())
      OBDescriptor::DeleteProperties(this, itr->second);

    itr = pOptions->find("append"); //Appends values of descriptors or properties to title
    if(itr!=pOptions->end())
      {
        string title(GetTitle());
        title += OBDescriptor::GetValues(this, itr->second);
        if(ispunct(title[0]))
          title[0]=' ';//a leading punct char is used only as a separator, not at start
        SetTitle(Trim(title).c_str());
      }



      //Filter using OBDescriptor comparison and (older) SMARTS tests
    //Continue only if previous test was true.
    bool fmatch = true;
    itr = pOptions->find("filter");
    if(itr!=pOptions->end())
      {
        std::istringstream optionText(itr->second);
        fmatch = OBDescriptor::FilterCompare(this, optionText, false);
      }

    if(fmatch)
      {
        itr = pOptions->find("v");
        if(itr!=pOptions->end() && !itr->second.empty())
          {
            //inverse match quoted SMARTS string which follows
            OBSmartsPattern sp;
            sp.Init(itr->second);
            fmatch = !sp.Match(*this); //(*pmol) ;
          }
      }
    if(fmatch)
    {
      itr = pOptions->find("s");
      if(itr!=pOptions->end() && !itr->second.empty())
        {
          //SMARTS filter
          //If exactmatch option set (probably in fastsearchformat) the
          //number of atoms in the pattern (passed as a string in the option text)
          //has to be the same as in the molecule.
          itr2 = pOptions->find("exactmatch");
          if(itr2!=pOptions->end() && NumHvyAtoms()!=atoi(itr2->second.c_str()))
            fmatch=false;
          else
            {
              //match quoted SMARTS string which follows
              OBSmartsPattern sp;
                sp.Init(itr->second.c_str());
                fmatch = sp.Match(*this);
            }
        }
    }

    if(!fmatch)
      {
        //filter failed: delete OBMol and return NULL
        delete this;
        return NULL;
      }
    else
      {
        if(ret==false)
          {
            obErrorLog.ThrowError(__FUNCTION__, "Error executing an option", obError);
            delete this; //added 9March2006
            return NULL;
          }
        else
          return this;
      }
  }