bool MangoSubstructure::matchBinary (Scanner &scanner, Scanner *xyz_scanner) { _validateQueryExtraData(); profTimerStart(tcmf, "match.cmf"); cmf_loader.free(); cmf_loader.create(_context.cmf_dict, scanner); if (!_query_has_stereocare_bonds) cmf_loader->skip_cistrans = true; if (!_query_has_stereocenters) cmf_loader->skip_stereocenters = true; cmf_loader->loadMolecule(_target); if (xyz_scanner != 0) cmf_loader->loadXyz(*xyz_scanner); profTimerStop(tcmf); profTimerStart(tinit, "match.init_target"); _initTarget(true); profTimerStop(tinit); return matchLoadedTarget(); }
bool MangoTautomer::matchBinary (Scanner &scanner) { CmfLoader loader(_context.cmf_dict, scanner); loader.loadMolecule(_target); _initTarget(true); TautomerMethod m = RSMARTS; MoleculeTautomerMatcher matcher(_target, _params.substructure); matcher.setRulesList(&_context.tautomer_rules); matcher.setRules(_params.conditions, _params.force_hydrogens, _params.ring_chain, m); matcher.setQuery(_query.ref()); profTimerStart(temb, "match.embedding"); bool res = matcher.find(); profTimerStop(temb); if (res) { profIncTimer("match.embedding_found", profTimerGetTime(temb)); } else { profIncTimer("match.embedding_not_found", profTimerGetTime(temb)); } return res; }
void RingoFastIndex::_match (OracleEnv &env, int idx) { _last_id = idx; BingoStorage &storage = this->_context.context().context().storage; QS_DEF(Array<char>, stored); storage.get(idx, stored); if (stored[0] != 0) return; // reaction was removed from index BufferScanner scanner(stored); scanner.skip(1); // skip the deletion mark scanner.skip(scanner.readByte()); // skip the compessed rowid profTimerStart(tall, "match"); bool res = _context.substructure.matchBinary(scanner); profTimerStop(tall); if (res) { OraRowidText & rid = matched.at(matched.add()); _decompressRowid(stored, rid); profIncTimer("match.found", profTimerGetTime(tall)); _matched++; } else { profIncTimer("match.not_found", profTimerGetTime(tall)); _unmatched++; } }
bool MangoSubstructure::matchLoadedTarget () { MoleculeSubstructureMatcher matcher(_target); matcher.match_3d = match_3d; matcher.rms_threshold = rms_threshold; matcher.highlight = true; matcher.use_pi_systems_matcher = _use_pi_systems_matcher; matcher.setNeiCounters(&_nei_query_counters, &_nei_target_counters); matcher.fmcache = &_fmcache; _fmcache.clear(); matcher.setQuery(_query); profTimerStart(temb, "match.embedding"); bool res = matcher.find(); profTimerStop(temb); if (res) { profIncTimer("match.embedding_found", profTimerGetTime(temb)); } else { profIncTimer("match.embedding_not_found", profTimerGetTime(temb)); } return res; }
BingoPgSection& BingoPgIndex::_jumpToSection(int section_idx) { /* * Return if current section is already set */ if (_currentSectionIdx == section_idx) return _currentSection.ref(); if (section_idx >= getSectionNumber()) { if (_strategy == READING_STRATEGY) { throw Error("could not get the buffer: section %d is out of bounds %d", section_idx, getSectionNumber()); } else { /* * If strategy is writing or updating then append new sections */ while (section_idx >= getSectionNumber()) { _initializeNewSection(); } return _currentSection.ref(); } } profTimerStart(t0, "bingo_pg.read_section"); /* * Read the section using offset mapping */ _currentSectionIdx = section_idx; profTimerStart(t1, "bingo_pg.get_offset"); int offset = _getSectionOffset(section_idx); profTimerStop(t1); _currentSection.reset(new BingoPgSection(*this, _strategy, offset)); return _currentSection.ref(); }
ORAEXT void oraImportSDF (OCIExtProcContext *ctx, const char *table_name, short table_name_ind, const char *clob_col_name, short clob_col_ind, const char *other_col_names, short other_col_ind, const char *file_name, short file_name_ind) { logger.initIfClosed(log_filename); ORABLOCK_BEGIN { profTimersReset(); profTimerStart(ttotal, "total"); OracleEnv env(ctx, logger); if (table_name_ind != OCI_IND_NOTNULL) throw BingoError("null table name"); if (clob_col_ind != OCI_IND_NOTNULL) throw BingoError("null table name"); if (clob_col_ind != OCI_IND_NOTNULL) throw BingoError("null file name"); if (other_col_ind != OCI_IND_NOTNULL) other_col_names = 0; _importSDF(env, table_name, clob_col_name, other_col_names, file_name); profTimerStop(ttotal); bingoProfilingPrintStatistics(false); } ORABLOCK_END }
void mangoRegisterMolecule (OracleEnv &env, const char *rowid, MangoOracleContext &context, const MangoIndex &index, BingoFingerprints &fingerprints, const Array<char> &prepared_data, bool append) { profTimerStart(tall, "moleculeIndex.register"); int blockno, offset; profTimerStart(tstor, "moleculeIndex.register_storage"); context.context().storage.add(env, prepared_data, blockno, offset); profTimerStop(tstor); profTimerStart(tfing, "moleculeIndex.register_fingerprint"); fingerprints.addFingerprint(env, index.getFingerprint()); profTimerStop(tfing); profTimerStart(tshad, "moleculeIndex.register_shadow"); context.shadow_table.addMolecule(env, index, rowid, blockno + 1, offset, append); profTimerStop(tshad); }
void MangoShadowFetch::fetch (OracleEnv &env, int maxrows) { matched.clear(); if (_statement.get() == 0) return; if (maxrows < 1 || _end) return; env.dbgPrintf("fetching up to %d rows using shadowtable... ", maxrows); while (matched.size() < maxrows) { bool fetch_res; if (_fetch_type == _MASS) { _statement->bindFloatByName(":mass_min", &_context.mass.bottom); _statement->bindFloatByName(":mass_max", &_context.mass.top); } else if (_fetch_type == _TAUTOMER && _right_part == 1) { const char *gross = _context.tautomer.getQueryGross(); _statement->bindStringByName(":gross", gross, strlen(gross) + 1); QS_DEF(Array<char>, grossh); grossh.readString(gross, false); grossh.appendString(" H%", true); _statement->bindStringByName(":grossh", grossh.ptr(), grossh.size()); } if (!_executed) { fetch_res = _statement->executeAllowNoData(); _executed = true; } else fetch_res = _statement->fetch(); if (!fetch_res) { _end = true; break; } bool have_match = false; TRY_READ_TARGET_MOL { if (_fetch_type == _NON_SUBSTRUCTURE) { MangoSubstructure &instance = _context.substructure; QS_DEF(Array<char>, cmf); _lob_cmf->readAll(cmf, false); if (_need_xyz) { if (_statement->gotNull(3)) // xyz == NULL? have_match = true; else { QS_DEF(Array<char>, xyz); _lob_xyz->readAll(xyz, false); if (!instance.matchBinary(cmf, &xyz)) have_match = true; } } else if (!instance.matchBinary(cmf, 0)) have_match = true; } else if (_fetch_type == _NON_TAUTOMER_SUBSTRUCTURE) { MangoTautomer &instance = _context.tautomer; QS_DEF(Array<char>, cmf); _lob_cmf->readAll(cmf, false); if (!instance.matchBinary(cmf)) have_match = true; } else if (_fetch_type == _TAUTOMER) { MangoTautomer &instance = _context.tautomer; QS_DEF(Array<char>, cmf); _lob_cmf->readAll(cmf, false); if (instance.matchBinary(cmf) == (_right_part == 1)) have_match = true; } else if (_fetch_type == _EXACT) { MangoExact &instance = _context.exact; QS_DEF(Array<char>, cmf); profTimerStart(tlobread, "exact.lobread"); _lob_cmf->readAll(cmf, false); profTimerStop(tlobread); if (_need_xyz) { if (_statement->gotNull(3)) // xyz == NULL? have_match = (_right_part == 0); else { QS_DEF(Array<char>, xyz); profTimerStart(txyzlobread, "exact.xyzlobread"); _lob_xyz->readAll(xyz, false); profTimerStop(txyzlobread); profTimerStart(tmatch, "exact.match"); if (instance.matchBinary(cmf, &xyz) == (_right_part == 1)) have_match = true; } } else { profTimerStart(tmatch, "exact.match"); if (instance.matchBinary(cmf, 0) == (_right_part == 1)) have_match = true; } } else if (_fetch_type == _GROSS) { MangoGross &instance = _context.gross; if (instance.checkGross(_gross) == (_right_part == 1)) have_match = true; } else if (_fetch_type == _MASS) { have_match = true; } else throw Error("unexpected fetch type %d", _fetch_type); } CATCH_READ_TARGET_MOL(have_match = false) if (have_match) matched.add(_rowid); _processed_rows++; } env.dbgPrintf("fetched %d\n", matched.size()); return; }
void MangoIndex::prepare (Scanner &molfile, Output &output, OsLock *lock_for_exclusive_access) { QS_DEF(Molecule, mol); QS_DEF(Array<int>, gross); MoleculeAutoLoader loader(molfile); loader.treat_x_as_pseudoatom = _context->treat_x_as_pseudoatom; loader.ignore_closing_bond_direction_mismatch = _context->ignore_closing_bond_direction_mismatch; loader.loadMolecule(mol); //Skip all SGroups mol.clearSGroups(); Molecule::checkForConsistency(mol); // Make aromatic molecule MoleculeAromatizer::aromatizeBonds(mol, AromaticityOptions::BASIC); MangoExact::calculateHash(mol, _hash); if (!skip_calculate_fp) { MoleculeFingerprintBuilder builder(mol, _context->fp_parameters); profTimerStart(tfing, "moleculeIndex.createFingerprint"); builder.process(); profTimerStop(tfing); _fp.copy(builder.get(), _context->fp_parameters.fingerprintSize()); _fp_sim_bits_count = builder.countBits_Sim(); output.writeBinaryWord((word)_fp_sim_bits_count); const byte *fp_sim_ptr = builder.getSim(); int fp_sim_size = _context->fp_parameters.fingerprintSizeSim(); ArrayOutput fp_sim_output(_fp_sim_str); for (int i = 0; i < fp_sim_size; i++) fp_sim_output.printf("%02X", fp_sim_ptr[i]); fp_sim_output.writeChar(0); } ArrayOutput output_cmf(_cmf); { // CmfSaver modifies _context->cmf_dict and // requires exclusive access for this OsLockerNullable locker(lock_for_exclusive_access); CmfSaver saver(_context->cmf_dict, output_cmf); saver.saveMolecule(mol); if (mol.have_xyz) { ArrayOutput output_xyz(_xyz); saver.saveXyz(output_xyz); } else _xyz.clear(); } output.writeArray(_cmf); // Save gross formula GrossFormula::collect(mol, gross); GrossFormula::toString(gross, _gross_str); _counted_elems_str.clear(); _counted_elem_counters.clear(); ArrayOutput ce_output(_counted_elems_str); for (int i = 0; i < (int)NELEM(counted_elements); i++) { _counted_elem_counters.push(gross[counted_elements[i]]); ce_output.printf(", %d", gross[counted_elements[i]]); } ce_output.writeByte(0); // Calculate molecular mass MoleculeMass mass_calulator; mass_calulator.relative_atomic_mass_map = &_context->relative_atomic_mass_map; _molecular_mass = mass_calulator.molecularWeight(mol); }
void MangoFastIndex::_match (OracleEnv &env, int idx) { _last_id = idx; BingoStorage &storage = this->_context.context().context().storage; QS_DEF(Array<char>, stored); storage.get(idx, stored); if (stored[0] != 0) return; // molecule was removed from index BufferScanner scanner(stored); scanner.skip(1); // skip the deletion mark scanner.skip(scanner.readByte()); // skip the compessed rowid scanner.skip(2); // skip 'ord' bits count bool res = false; profTimerStart(tall, "match"); if (_fetch_type == _SUBSTRUCTURE) { QS_DEF(Array<char>, xyz_buf); if (_context.substructure.needCoords()) { OraRowidText rid; _decompressRowid(stored, rid); if (_loadCoords(env, rid.ptr(), xyz_buf)) { BufferScanner xyz_scanner(xyz_buf); res = _context.substructure.matchBinary(scanner, &xyz_scanner); } else // no XYZ --> skip the molecule res = false; } else res = _context.substructure.matchBinary(scanner, 0); } else if (_fetch_type == _TAUTOMER_SUBSTRUCTURE) res = _context.tautomer.matchBinary(scanner); else // _fetch_type == _SIMILARITY res = _context.similarity.matchBinary(scanner); profTimerStop(tall); if (res) { OraRowidText & rid = matched.at(matched.add()); _decompressRowid(stored, rid); profIncTimer("match.found", profTimerGetTime(tall)); _matched++; } else { profIncTimer("match.not_found", profTimerGetTime(tall)); _unmatched++; } }
void MangoFastIndex::_fetchSimilarity (OracleEnv &env, int max_matches) { BingoFingerprints &fingerprints = _context.context().fingerprints; int i; if (!fingerprints.ableToScreen(_screening)) { env.dbgPrintfTS("no bits in query fingerprint, can not do similarity search\n"); return; } profTimerStart(tsimfetch, "sim.fetch"); while (matched.size() < max_matches) { if (!fingerprints.countOnes_Init(env, _screening)) { env.dbgPrintfTS("screening ended\n"); break; } BingoStorage &storage = _context.context().context().storage; QS_DEF(Array<int>, max_common_ones); QS_DEF(Array<int>, min_common_ones); QS_DEF(Array<int>, target_ones); QS_DEF(Array<char>, stored); max_common_ones.clear_resize(_screening.block->used); min_common_ones.clear_resize(_screening.block->used); target_ones.clear_resize(_screening.block->used); for (i = 0; i < _screening.block->used; i++) { storage.get(fingerprints.getStorageIndex_NoMap(_screening, i), stored); BufferScanner scanner(stored); scanner.skip(1); // skip the deletion mark scanner.skip(scanner.readByte()); // skip the compessed rowid target_ones[i] = scanner.readBinaryWord(); max_common_ones[i] = _context.similarity.getUpperBound(target_ones[i]); min_common_ones[i] = _context.similarity.getLowerBound(target_ones[i]); } bool first = true; bool entire = false; _screening.passed.clear(); while (true) { if (!fingerprints.countOnes_Next(env, _screening)) { env.dbgPrintf("read all %d bits, writing %d results... ", _screening.query_ones.size(), _screening.passed.size()); entire = true; break; } if (first) { first = false; for (i = 0; i < _screening.block->used; i++) { int min_possible_ones = _screening.one_counters[i]; int max_possible_ones = _screening.one_counters[i] + _screening.query_ones.size() - _screening.query_bit_idx; if (min_possible_ones <= max_common_ones[i] && max_possible_ones >= min_common_ones[i]) _screening.passed.add(i); } } else { int j; for (j = _screening.passed.begin(); j != _screening.passed.end(); ) { i = _screening.passed[j]; int min_possible_ones = _screening.one_counters[i]; int max_possible_ones = _screening.one_counters[i] + _screening.query_ones.size() - _screening.query_bit_idx; int next_j = _screening.passed.next(j); if (min_possible_ones > max_common_ones[i] || max_possible_ones < min_common_ones[i]) _screening.passed.remove(j); j = next_j; } } if (_screening.passed.size() <= _context.context().context().sim_screening_pass_mark) { env.dbgPrintfTS("stopping reading fingerprints on bit %d/%d; have %d molecules to check... ", _screening.query_bit_idx, _screening.query_ones.size(), _screening.passed.size()); _unmatched += _screening.block->used - _screening.passed.size(); break; } } if (entire) { for (i = 0; i < _screening.block->used; i++) { if (_context.similarity.match(target_ones[i], _screening.one_counters[i])) { OraRowidText &rid = matched.at(matched.add()); storage.get(fingerprints.getStorageIndex_NoMap(_screening, i), stored); _decompressRowid(stored, rid); _matched++; } else _unmatched++; } } else if (_screening.passed.size() > 0) { profTimerStart(tfine, "sim.fetch.fine"); for (i = _screening.passed.begin(); i != _screening.passed.end(); i = _screening.passed.next(i)) _match(env, fingerprints.getStorageIndex_NoMap(_screening, _screening.passed[i])); profTimerStop(tfine); } env.dbgPrintf("done\n"); fingerprints.countOnes_End(env, _screening); } profTimerStop(tsimfetch); }
void MangoIndex::prepare (Scanner &molfile, Output &output, OsLock *lock_for_exclusive_access) { QS_DEF(Molecule, mol); QS_DEF(Array<int>, gross); MoleculeAutoLoader loader(molfile); _context->setLoaderSettings(loader); loader.loadMolecule(mol); // Skip all SGroups mol.clearSGroups(); if (_context->allow_non_unique_dearomatization) MoleculeDearomatizer::restoreHydrogens(mol, false); if (_context->zero_unknown_aromatic_hydrogens) { mol.restoreAromaticHydrogens(); for (int i : mol.vertices()) { if (mol.isRSite(i) || mol.isPseudoAtom(i)) continue; if (mol.getAtomAromaticity(i) == ATOM_AROMATIC && mol.getImplicitH_NoThrow(i, -1) == -1) mol.setImplicitH(i, 0); } } Molecule::checkForConsistency(mol); // Make aromatic molecule MoleculeAromatizer::aromatizeBonds(mol, AromaticityOptions::BASIC); MangoExact::calculateHash(mol, _hash); if (!skip_calculate_fp) { MoleculeFingerprintBuilder builder(mol, _context->fp_parameters); profTimerStart(tfing, "moleculeIndex.createFingerprint"); builder.process(); profTimerStop(tfing); _fp.copy(builder.get(), _context->fp_parameters.fingerprintSize()); _fp_sim_bits_count = builder.countBits_Sim(); output.writeBinaryWord((word)_fp_sim_bits_count); const byte *fp_sim_ptr = builder.getSim(); int fp_sim_size = _context->fp_parameters.fingerprintSizeSim(); ArrayOutput fp_sim_output(_fp_sim_str); for (int i = 0; i < fp_sim_size; i++) fp_sim_output.printf("%02X", fp_sim_ptr[i]); fp_sim_output.writeChar(0); } ArrayOutput output_cmf(_cmf); { // CmfSaver modifies _context->cmf_dict and // requires exclusive access for this OsLockerNullable locker(lock_for_exclusive_access); CmfSaver saver(_context->cmf_dict, output_cmf); saver.saveMolecule(mol); if (mol.have_xyz) { ArrayOutput output_xyz(_xyz); saver.saveXyz(output_xyz); } else _xyz.clear(); } output.writeArray(_cmf); // Save gross formula GrossFormula::collect(mol, gross); GrossFormula::toString(gross, _gross_str); _counted_elems_str.clear(); _counted_elem_counters.clear(); ArrayOutput ce_output(_counted_elems_str); for (int i = 0; i < (int)NELEM(counted_elements); i++) { _counted_elem_counters.push(gross[counted_elements[i]]); ce_output.printf(", %d", gross[counted_elements[i]]); } ce_output.writeByte(0); // Calculate molecular mass MoleculeMass mass_calulator; mass_calulator.relative_atomic_mass_map = &_context->relative_atomic_mass_map; _molecular_mass = mass_calulator.molecularWeight(mol); }
void _importSDF (OracleEnv &env, const char *table, const char *clob_col, const char *other_cols, const char *file_name) { FileScanner scanner(file_name); int i, nwritten = 0; QS_DEF(Array<char>, word); QS_DEF(StringPool, props); QS_DEF(StringPool, columns); env.dbgPrintfTS("importing into table %s\n", table); SdfLoader loader(scanner); _parseFieldList(other_cols, props, columns); while (!loader.isEOF()) { profTimerStart(tread, "import.read_next"); loader.readNext(); profTimerStop(tread); OracleStatement statement(env); OracleLOB lob(env); lob.createTemporaryCLOB(); lob.write(0, loader.data); statement.append("INSERT INTO %s(%s", table, clob_col); for (i = columns.begin(); i != columns.end(); i = columns.next(i)) statement.append(", %s", columns.at(i)); statement.append(") VALUES(:clobdata"); for (i = columns.begin(); i != columns.end(); i = columns.next(i)) { if (loader.properties.contains(props.at(i))) statement.append(", NULL"); else statement.append(",:%s", columns.at(i)); } statement.append(")"); statement.prepare(); statement.bindClobByName(":clobdata", lob); for (i = columns.begin(); i != columns.end(); i = columns.next(i)) { if (loader.properties.contains(props.at(i))) continue; ArrayOutput out(word); out.printf(":%s", columns.at(i)); out.writeChar(0); const char* val = loader.properties.at(props.at(i)); statement.bindStringByName(word.ptr(), val, strlen(val) + 1); } profTimerStart(tinsert, "import.sql_insert"); statement.execute(); profTimerStop(tinsert); nwritten++; if (nwritten % 1000 == 0) { env.dbgPrintfTS("imported %d items, commiting\n", nwritten); OracleStatement::executeSingle(env, "COMMIT"); } } if (nwritten % 1000 != 0) { env.dbgPrintfTS("imported %d items, commiting\n", nwritten); OracleStatement::executeSingle(env, "COMMIT"); } }