Пример #1
0
bool MangoSubstructure::matchBinary (Scanner &scanner, Scanner *xyz_scanner)
{
    _validateQueryExtraData();

    profTimerStart(tcmf, "match.cmf");

    cmf_loader.free();
    cmf_loader.create(_context.cmf_dict, scanner);

    if (!_query_has_stereocare_bonds)
        cmf_loader->skip_cistrans = true;
    if (!_query_has_stereocenters)
        cmf_loader->skip_stereocenters = true;

    cmf_loader->loadMolecule(_target);
    if (xyz_scanner != 0)
        cmf_loader->loadXyz(*xyz_scanner);

    profTimerStop(tcmf);

    profTimerStart(tinit, "match.init_target");
    _initTarget(true);
    profTimerStop(tinit);

    return matchLoadedTarget();
}
Пример #2
0
bool MangoTautomer::matchBinary (Scanner &scanner)
{
   CmfLoader loader(_context.cmf_dict, scanner);
   
   loader.loadMolecule(_target);
   _initTarget(true);
   TautomerMethod m = RSMARTS;

   MoleculeTautomerMatcher matcher(_target, _params.substructure);

   matcher.setRulesList(&_context.tautomer_rules);
   matcher.setRules(_params.conditions, _params.force_hydrogens, _params.ring_chain, m);
   matcher.setQuery(_query.ref());

   profTimerStart(temb, "match.embedding");
   bool res = matcher.find();
   profTimerStop(temb);

   if (res)
   {
      profIncTimer("match.embedding_found", profTimerGetTime(temb));
   }
   else
   {
      profIncTimer("match.embedding_not_found", profTimerGetTime(temb));
   }
   return res;
}
Пример #3
0
void RingoFastIndex::_match (OracleEnv &env, int idx)
{
   _last_id = idx;

   BingoStorage &storage = this->_context.context().context().storage;
   QS_DEF(Array<char>, stored);
   
   storage.get(idx, stored);

   if (stored[0] != 0)
      return; // reaction was removed from index

   BufferScanner scanner(stored);

   scanner.skip(1); // skip the deletion mark
   scanner.skip(scanner.readByte()); // skip the compessed rowid
   
   profTimerStart(tall, "match");
   bool res = _context.substructure.matchBinary(scanner);
   profTimerStop(tall);
   
   if (res)
   {
      OraRowidText & rid = matched.at(matched.add());

      _decompressRowid(stored, rid);
      profIncTimer("match.found", profTimerGetTime(tall));
      _matched++;
   }
   else
   {
      profIncTimer("match.not_found", profTimerGetTime(tall));
      _unmatched++;
   }
}
Пример #4
0
bool MangoSubstructure::matchLoadedTarget ()
{
    MoleculeSubstructureMatcher matcher(_target);

    matcher.match_3d = match_3d;
    matcher.rms_threshold = rms_threshold;
    matcher.highlight = true;
    matcher.use_pi_systems_matcher = _use_pi_systems_matcher;
    matcher.setNeiCounters(&_nei_query_counters, &_nei_target_counters);
    matcher.fmcache = &_fmcache;

    _fmcache.clear();

    matcher.setQuery(_query);

    profTimerStart(temb, "match.embedding");
    bool res = matcher.find();
    profTimerStop(temb);

    if (res)
    {
        profIncTimer("match.embedding_found", profTimerGetTime(temb));
    }
    else
    {
        profIncTimer("match.embedding_not_found", profTimerGetTime(temb));
    }
    return res;
}
Пример #5
0
BingoPgSection& BingoPgIndex::_jumpToSection(int section_idx) {

   /*
    * Return if current section is already set
    */
   if (_currentSectionIdx == section_idx)
      return _currentSection.ref();
   if (section_idx >= getSectionNumber()) {
      if (_strategy == READING_STRATEGY) {
         throw Error("could not get the buffer: section %d is out of bounds %d", section_idx, getSectionNumber());
      } else {
         /*
          * If strategy is writing or updating then append new sections
          */
         while (section_idx >= getSectionNumber()) {
            _initializeNewSection();
         }
         return _currentSection.ref();
      }
   }
   profTimerStart(t0, "bingo_pg.read_section");
   /*
    * Read the section using offset mapping
    */
   _currentSectionIdx = section_idx;

   profTimerStart(t1, "bingo_pg.get_offset");
   int offset = _getSectionOffset(section_idx);
   profTimerStop(t1);
   _currentSection.reset(new BingoPgSection(*this, _strategy, offset));

   return _currentSection.ref();

}
Пример #6
0
ORAEXT void oraImportSDF (OCIExtProcContext *ctx,
                          const char *table_name, short table_name_ind,
                          const char *clob_col_name, short clob_col_ind,
                          const char *other_col_names, short other_col_ind,
                          const char *file_name, short file_name_ind)
{
   logger.initIfClosed(log_filename);

   ORABLOCK_BEGIN
   {
      profTimersReset();
      profTimerStart(ttotal, "total");

      OracleEnv env(ctx, logger);

      if (table_name_ind != OCI_IND_NOTNULL)
         throw BingoError("null table name");

      if (clob_col_ind != OCI_IND_NOTNULL)
         throw BingoError("null table name");

      if (clob_col_ind != OCI_IND_NOTNULL)
         throw BingoError("null file name");

      if (other_col_ind != OCI_IND_NOTNULL)
         other_col_names = 0;

      _importSDF(env, table_name, clob_col_name, other_col_names, file_name);

      profTimerStop(ttotal);
      bingoProfilingPrintStatistics(false);
   }
   ORABLOCK_END
}
Пример #7
0
void mangoRegisterMolecule (OracleEnv &env, const char *rowid,
                             MangoOracleContext &context,
                             const MangoIndex &index,
                             BingoFingerprints &fingerprints,
                             const Array<char> &prepared_data,
                             bool append)
{
   profTimerStart(tall, "moleculeIndex.register");

   int blockno, offset;

   profTimerStart(tstor, "moleculeIndex.register_storage");
   context.context().storage.add(env, prepared_data, blockno, offset);
   profTimerStop(tstor);

   profTimerStart(tfing, "moleculeIndex.register_fingerprint");
   fingerprints.addFingerprint(env, index.getFingerprint());
   profTimerStop(tfing);

   profTimerStart(tshad, "moleculeIndex.register_shadow");
   context.shadow_table.addMolecule(env, index, rowid, blockno + 1, offset, append);
   profTimerStop(tshad);
}
Пример #8
0
void MangoShadowFetch::fetch (OracleEnv &env, int maxrows)
{
   matched.clear();

   if (_statement.get() == 0)
      return;

   if (maxrows < 1 || _end)
      return;

   env.dbgPrintf("fetching up to %d rows using shadowtable... ", maxrows);

   while (matched.size() < maxrows)
   {
      bool fetch_res;

      if (_fetch_type == _MASS)
      {
         _statement->bindFloatByName(":mass_min", &_context.mass.bottom);
         _statement->bindFloatByName(":mass_max", &_context.mass.top);
      }
      else if (_fetch_type == _TAUTOMER && _right_part == 1)
      {
         const char *gross = _context.tautomer.getQueryGross();
         _statement->bindStringByName(":gross", gross, strlen(gross) + 1);
         QS_DEF(Array<char>, grossh);
         grossh.readString(gross, false);
         grossh.appendString(" H%", true);
         _statement->bindStringByName(":grossh", grossh.ptr(), grossh.size());
      }
      
      if (!_executed)
      {
         fetch_res = _statement->executeAllowNoData();
         _executed = true;
      }
      else
         fetch_res = _statement->fetch();

      if (!fetch_res)
      {
         _end = true;
         break;
      }

      bool have_match = false;
      
      TRY_READ_TARGET_MOL
      {

         if (_fetch_type == _NON_SUBSTRUCTURE)
         {
            MangoSubstructure &instance = _context.substructure;
            QS_DEF(Array<char>, cmf);

            _lob_cmf->readAll(cmf, false);

            if (_need_xyz)
            {
               if (_statement->gotNull(3)) // xyz == NULL?
                  have_match = true;
               else
               {
                  QS_DEF(Array<char>, xyz);

                  _lob_xyz->readAll(xyz, false);
                  if (!instance.matchBinary(cmf, &xyz))
                     have_match = true;
               }
            }
            else if (!instance.matchBinary(cmf, 0))
               have_match = true;
         }
         else if (_fetch_type == _NON_TAUTOMER_SUBSTRUCTURE)
         {
            MangoTautomer &instance = _context.tautomer;
            QS_DEF(Array<char>, cmf);

            _lob_cmf->readAll(cmf, false);

            if (!instance.matchBinary(cmf))
               have_match = true;
         }
         else if (_fetch_type == _TAUTOMER)
         {
            MangoTautomer &instance = _context.tautomer;
            QS_DEF(Array<char>, cmf);

            _lob_cmf->readAll(cmf, false);

            if (instance.matchBinary(cmf) == (_right_part == 1))
               have_match = true;
         }
         else if (_fetch_type == _EXACT)
         {
            MangoExact &instance = _context.exact;
            QS_DEF(Array<char>, cmf);

            profTimerStart(tlobread, "exact.lobread");
            _lob_cmf->readAll(cmf, false);
            profTimerStop(tlobread);

            if (_need_xyz)
            {
               if (_statement->gotNull(3)) // xyz == NULL?
                  have_match = (_right_part == 0);
               else
               {
                  QS_DEF(Array<char>, xyz);

                  profTimerStart(txyzlobread, "exact.xyzlobread");
                  _lob_xyz->readAll(xyz, false);
                  profTimerStop(txyzlobread);

                  profTimerStart(tmatch, "exact.match");
                  if (instance.matchBinary(cmf, &xyz) == (_right_part == 1))
                     have_match = true;
               }
            }
            else
            {
               profTimerStart(tmatch, "exact.match");
               if (instance.matchBinary(cmf, 0) == (_right_part == 1))
                  have_match = true;
            }
         }
         else if (_fetch_type == _GROSS)
         {
            MangoGross &instance = _context.gross;

            if (instance.checkGross(_gross) == (_right_part == 1))
               have_match = true;
         }
         else if (_fetch_type == _MASS)
         {
            have_match = true;
         }
         else
            throw Error("unexpected fetch type %d", _fetch_type);
      }
      CATCH_READ_TARGET_MOL(have_match = false)

      if (have_match)
         matched.add(_rowid);
      _processed_rows++;
   } 

   env.dbgPrintf("fetched %d\n", matched.size());

   return;
}
Пример #9
0
void MangoIndex::prepare (Scanner &molfile, Output &output, 
                          OsLock *lock_for_exclusive_access)
{
   QS_DEF(Molecule, mol);

   QS_DEF(Array<int>, gross);

   MoleculeAutoLoader loader(molfile);

   loader.treat_x_as_pseudoatom = _context->treat_x_as_pseudoatom;
   loader.ignore_closing_bond_direction_mismatch =
           _context->ignore_closing_bond_direction_mismatch;
   loader.loadMolecule(mol);

   //Skip all SGroups
   mol.clearSGroups();

   Molecule::checkForConsistency(mol);

   // Make aromatic molecule
   MoleculeAromatizer::aromatizeBonds(mol, AromaticityOptions::BASIC);

   MangoExact::calculateHash(mol, _hash);

   if (!skip_calculate_fp)
   {
      MoleculeFingerprintBuilder builder(mol, _context->fp_parameters);
      profTimerStart(tfing, "moleculeIndex.createFingerprint");
      builder.process();
      profTimerStop(tfing);

      _fp.copy(builder.get(), _context->fp_parameters.fingerprintSize());
      _fp_sim_bits_count = builder.countBits_Sim();
      output.writeBinaryWord((word)_fp_sim_bits_count);

      const byte *fp_sim_ptr = builder.getSim();
      int fp_sim_size = _context->fp_parameters.fingerprintSizeSim();

      ArrayOutput fp_sim_output(_fp_sim_str);

      for (int i = 0; i < fp_sim_size; i++)
         fp_sim_output.printf("%02X", fp_sim_ptr[i]);

      fp_sim_output.writeChar(0);
   }

   ArrayOutput output_cmf(_cmf);
   {
      // CmfSaver modifies _context->cmf_dict and 
      // requires exclusive access for this
      OsLockerNullable locker(lock_for_exclusive_access);

      CmfSaver saver(_context->cmf_dict, output_cmf);

      saver.saveMolecule(mol);
      
      if (mol.have_xyz)
      {
         ArrayOutput output_xyz(_xyz);
         saver.saveXyz(output_xyz);
      }
      else
         _xyz.clear();
   }

   output.writeArray(_cmf);

   // Save gross formula
   GrossFormula::collect(mol, gross);
   GrossFormula::toString(gross, _gross_str);

   _counted_elems_str.clear();
   _counted_elem_counters.clear();

   ArrayOutput ce_output(_counted_elems_str);

   for (int i = 0; i < (int)NELEM(counted_elements); i++)
   {
      _counted_elem_counters.push(gross[counted_elements[i]]);
      ce_output.printf(", %d", gross[counted_elements[i]]);
   }

   ce_output.writeByte(0);

   // Calculate molecular mass
   MoleculeMass mass_calulator;
   mass_calulator.relative_atomic_mass_map = &_context->relative_atomic_mass_map;
   _molecular_mass = mass_calulator.molecularWeight(mol);
}
Пример #10
0
void MangoFastIndex::_match (OracleEnv &env, int idx)
{
   _last_id = idx;

   BingoStorage &storage = this->_context.context().context().storage;
   QS_DEF(Array<char>, stored);
   
   storage.get(idx, stored);

   if (stored[0] != 0)
      return; // molecule was removed from index

   BufferScanner scanner(stored);

   scanner.skip(1); // skip the deletion mark
   scanner.skip(scanner.readByte()); // skip the compessed rowid
   scanner.skip(2); // skip 'ord' bits count
   
   bool res = false;

   profTimerStart(tall, "match");
   if (_fetch_type == _SUBSTRUCTURE)
   {
      QS_DEF(Array<char>, xyz_buf);
      
      if (_context.substructure.needCoords())
      {
         OraRowidText rid;

         _decompressRowid(stored, rid);
         if (_loadCoords(env, rid.ptr(), xyz_buf))
         {
            BufferScanner xyz_scanner(xyz_buf);

            res = _context.substructure.matchBinary(scanner, &xyz_scanner);
         }
         else
            // no XYZ --> skip the molecule
            res = false;
      }
      else
         res = _context.substructure.matchBinary(scanner, 0);
   }
   else if (_fetch_type == _TAUTOMER_SUBSTRUCTURE)
      res = _context.tautomer.matchBinary(scanner);
   else // _fetch_type == _SIMILARITY
      res = _context.similarity.matchBinary(scanner);

   profTimerStop(tall);
   
   if (res)
   {
      OraRowidText & rid = matched.at(matched.add());

      _decompressRowid(stored, rid);

      profIncTimer("match.found", profTimerGetTime(tall));
      _matched++;
   }
   else
   {
      profIncTimer("match.not_found", profTimerGetTime(tall));
      _unmatched++;
   }
}
Пример #11
0
void MangoFastIndex::_fetchSimilarity (OracleEnv &env, int max_matches)
{
   BingoFingerprints &fingerprints = _context.context().fingerprints;
   int i;

   if (!fingerprints.ableToScreen(_screening))
   {
      env.dbgPrintfTS("no bits in query fingerprint, can not do similarity search\n");
      return;
   }

   profTimerStart(tsimfetch, "sim.fetch");
   while (matched.size() < max_matches)
   {
      if (!fingerprints.countOnes_Init(env, _screening))
      {
         env.dbgPrintfTS("screening ended\n");
         break;
      }

      BingoStorage &storage = _context.context().context().storage;

      QS_DEF(Array<int>, max_common_ones);
      QS_DEF(Array<int>, min_common_ones);
      QS_DEF(Array<int>, target_ones);
      QS_DEF(Array<char>, stored);

      max_common_ones.clear_resize(_screening.block->used);
      min_common_ones.clear_resize(_screening.block->used);
      target_ones.clear_resize(_screening.block->used);

      for (i = 0; i < _screening.block->used; i++)
      {
         storage.get(fingerprints.getStorageIndex_NoMap(_screening, i), stored);

         BufferScanner scanner(stored);

         scanner.skip(1); // skip the deletion mark
         scanner.skip(scanner.readByte()); // skip the compessed rowid
         target_ones[i] = scanner.readBinaryWord();
         max_common_ones[i] = _context.similarity.getUpperBound(target_ones[i]);
         min_common_ones[i] = _context.similarity.getLowerBound(target_ones[i]);
      }

      bool first = true;
      bool entire = false;

      _screening.passed.clear();

      while (true)
      {
         if (!fingerprints.countOnes_Next(env, _screening))
         {
            env.dbgPrintf("read all %d bits, writing %d results... ",
               _screening.query_ones.size(), _screening.passed.size());

            entire = true;
            break;
         }

         if (first)
         {
            first = false;
            for (i = 0; i < _screening.block->used; i++)
            {
               int min_possible_ones = _screening.one_counters[i];
               int max_possible_ones = _screening.one_counters[i] +
                            _screening.query_ones.size() - _screening.query_bit_idx;

               if (min_possible_ones <= max_common_ones[i] &&
                   max_possible_ones >= min_common_ones[i])
                  _screening.passed.add(i);
            }
         }
         else
         {
            int j;

            for (j = _screening.passed.begin(); j != _screening.passed.end(); )
            {
               i = _screening.passed[j];

               int min_possible_ones = _screening.one_counters[i];
               int max_possible_ones = _screening.one_counters[i] +
                            _screening.query_ones.size() - _screening.query_bit_idx;

               int next_j = _screening.passed.next(j);

               if (min_possible_ones > max_common_ones[i] ||
                   max_possible_ones < min_common_ones[i])
                  _screening.passed.remove(j);

               j = next_j;
            }
         }

         if (_screening.passed.size() <= _context.context().context().sim_screening_pass_mark)
         { 
            env.dbgPrintfTS("stopping reading fingerprints on bit %d/%d; have %d molecules to check...  ",
               _screening.query_bit_idx, _screening.query_ones.size(), _screening.passed.size());
            _unmatched += _screening.block->used - _screening.passed.size();
            break;
         }
      }

      if (entire)
      {
         for (i = 0; i < _screening.block->used; i++)
         {
            if (_context.similarity.match(target_ones[i], _screening.one_counters[i]))
            {
               OraRowidText &rid = matched.at(matched.add());

               storage.get(fingerprints.getStorageIndex_NoMap(_screening, i), stored);
               _decompressRowid(stored, rid);
              _matched++;
            }
            else
               _unmatched++;
         }
      }
      else if (_screening.passed.size() > 0)
      {
         profTimerStart(tfine, "sim.fetch.fine");
         for (i = _screening.passed.begin(); i != _screening.passed.end(); i = _screening.passed.next(i))
            _match(env, fingerprints.getStorageIndex_NoMap(_screening, _screening.passed[i]));
         profTimerStop(tfine);
      }
      env.dbgPrintf("done\n");

      fingerprints.countOnes_End(env, _screening);
   }
   profTimerStop(tsimfetch);
}
Пример #12
0
void MangoIndex::prepare (Scanner &molfile, Output &output,
                          OsLock *lock_for_exclusive_access)
{
    QS_DEF(Molecule, mol);

    QS_DEF(Array<int>, gross);

    MoleculeAutoLoader loader(molfile);
    _context->setLoaderSettings(loader);
    loader.loadMolecule(mol);

    // Skip all SGroups
    mol.clearSGroups();

    if (_context->allow_non_unique_dearomatization)
        MoleculeDearomatizer::restoreHydrogens(mol, false);

    if (_context->zero_unknown_aromatic_hydrogens)
    {
        mol.restoreAromaticHydrogens();
        for (int i : mol.vertices())
        {
            if (mol.isRSite(i) || mol.isPseudoAtom(i))
                continue;

            if (mol.getAtomAromaticity(i) == ATOM_AROMATIC && mol.getImplicitH_NoThrow(i, -1) == -1)
                mol.setImplicitH(i, 0);
        }
    }

    Molecule::checkForConsistency(mol);

    // Make aromatic molecule
    MoleculeAromatizer::aromatizeBonds(mol, AromaticityOptions::BASIC);

    MangoExact::calculateHash(mol, _hash);

    if (!skip_calculate_fp)
    {
        MoleculeFingerprintBuilder builder(mol, _context->fp_parameters);
        profTimerStart(tfing, "moleculeIndex.createFingerprint");
        builder.process();
        profTimerStop(tfing);

        _fp.copy(builder.get(), _context->fp_parameters.fingerprintSize());
        _fp_sim_bits_count = builder.countBits_Sim();
        output.writeBinaryWord((word)_fp_sim_bits_count);

        const byte *fp_sim_ptr = builder.getSim();
        int fp_sim_size = _context->fp_parameters.fingerprintSizeSim();

        ArrayOutput fp_sim_output(_fp_sim_str);

        for (int i = 0; i < fp_sim_size; i++)
            fp_sim_output.printf("%02X", fp_sim_ptr[i]);

        fp_sim_output.writeChar(0);
    }

    ArrayOutput output_cmf(_cmf);
    {
        // CmfSaver modifies _context->cmf_dict and
        // requires exclusive access for this
        OsLockerNullable locker(lock_for_exclusive_access);

        CmfSaver saver(_context->cmf_dict, output_cmf);

        saver.saveMolecule(mol);

        if (mol.have_xyz)
        {
            ArrayOutput output_xyz(_xyz);
            saver.saveXyz(output_xyz);
        }
        else
            _xyz.clear();
    }

    output.writeArray(_cmf);

    // Save gross formula
    GrossFormula::collect(mol, gross);
    GrossFormula::toString(gross, _gross_str);

    _counted_elems_str.clear();
    _counted_elem_counters.clear();

    ArrayOutput ce_output(_counted_elems_str);

    for (int i = 0; i < (int)NELEM(counted_elements); i++)
    {
        _counted_elem_counters.push(gross[counted_elements[i]]);
        ce_output.printf(", %d", gross[counted_elements[i]]);
    }

    ce_output.writeByte(0);

    // Calculate molecular mass
    MoleculeMass mass_calulator;
    mass_calulator.relative_atomic_mass_map = &_context->relative_atomic_mass_map;
    _molecular_mass = mass_calulator.molecularWeight(mol);
}
Пример #13
0
void _importSDF (OracleEnv &env, const char *table, const char *clob_col, 
                 const char *other_cols, const char *file_name)
{
   FileScanner scanner(file_name);
   int i, nwritten = 0;
   QS_DEF(Array<char>, word);
   QS_DEF(StringPool, props);
   QS_DEF(StringPool, columns);

   env.dbgPrintfTS("importing into table %s\n", table);

   SdfLoader loader(scanner);

   _parseFieldList(other_cols, props, columns);
 
   while (!loader.isEOF())
   {
      profTimerStart(tread, "import.read_next");
      loader.readNext();
      profTimerStop(tread);

      OracleStatement statement(env);
      OracleLOB lob(env);

      lob.createTemporaryCLOB();
      lob.write(0, loader.data);

      statement.append("INSERT INTO %s(%s", table, clob_col);

      for (i = columns.begin(); i != columns.end(); i = columns.next(i))
         statement.append(", %s", columns.at(i));

      statement.append(") VALUES(:clobdata");

      for (i = columns.begin(); i != columns.end(); i = columns.next(i))
      {
         if (loader.properties.contains(props.at(i)))
            statement.append(", NULL");
         else
            statement.append(",:%s", columns.at(i));
      }

      statement.append(")");
      statement.prepare();

      statement.bindClobByName(":clobdata", lob);

      for (i = columns.begin(); i != columns.end(); i = columns.next(i))
      {
         if (loader.properties.contains(props.at(i)))
            continue;
         
         ArrayOutput out(word);

         out.printf(":%s", columns.at(i));
         out.writeChar(0);

         const char* val = loader.properties.at(props.at(i));

         statement.bindStringByName(word.ptr(), val, strlen(val) + 1);
      }

      profTimerStart(tinsert, "import.sql_insert");
      statement.execute();
      profTimerStop(tinsert);

      nwritten++;
      if (nwritten % 1000 == 0)
      {
         env.dbgPrintfTS("imported %d items, commiting\n", nwritten);
         OracleStatement::executeSingle(env, "COMMIT");
      }
   }
   if (nwritten % 1000 != 0)
   {
      env.dbgPrintfTS("imported %d items, commiting\n", nwritten);
      OracleStatement::executeSingle(env, "COMMIT");
   }
}