void MangoShadowFetch::prepareMass (OracleEnv &env) { env.dbgPrintf("preparing shadow table for molecular mass match\n"); QS_DEF(Array<char>, where); { ArrayOutput where_out(where); where_out.printf(""); where_out.writeChar(0); } _fetch_type = _MASS; _env.reset(new OracleEnv(env.ctx(), env.logger())); _statement.reset(new OracleStatement(_env.ref())); _statement->append("SELECT mol_rowid FROM %s WHERE mass >= :mass_min AND mass <= :mass_max", _table_name.ptr()); _statement->prepare(); _statement->defineStringByPos(1, _rowid.ptr(), sizeof(_rowid)); ArrayOutput output(_counting_select); output.printf("SELECT COUNT(*) FROM %s WHERE WHERE mass >= :mass_min AND mass <= :mass_max", _table_name.ptr()); }
void MangoShadowFetch::prepareTautomer (OracleEnv &env, int right_part) { if (right_part == 1) env.dbgPrintf("preparing shadow table for tautomer match\n"); else env.dbgPrintf("preparing shadow table for non-tautomer match\n"); _fetch_type = _TAUTOMER; _right_part = right_part; _env.reset(new OracleEnv(env.ctx(), env.logger())); _statement.reset(new OracleStatement(_env.ref())); _lob_cmf.reset(new OracleLOB(_env.ref())); _statement->append("SELECT mol_rowid, cmf FROM %s", _table_name.ptr()); if (right_part == 1) _statement->append(" WHERE gross = :gross OR gross LIKE :grossh"); _statement->prepare(); _statement->defineStringByPos(1, _rowid.ptr(), sizeof(_rowid)); _statement->defineBlobByPos(2, _lob_cmf.ref()); if (right_part == 1) { ArrayOutput output(_counting_select); output.printf("SELECT COUNT(*) FROM %s WHERE gross = :gross", _table_name.ptr()); } else _counting_select.clear(); }
void RingoShadowFetch::prepareExact (OracleEnv &env, int right_part) { RingoExact & instance = _context.exact; if (right_part == 1) env.dbgPrintf("preparing shadow table for exact\n"); else env.dbgPrintf("preparing shadow table for non-exact\n"); _fetch_type = _EXACT; _right_part = right_part; _env.reset(new OracleEnv(env.ctx(), env.logger())); _statement.reset(new OracleStatement(_env.ref())); _lob_crf.reset(new OracleLOB(_env.ref())); _statement->append("SELECT sh.rid, sh.crf FROM %s sh", _table_name.ptr()); if (right_part == 1) _statement->append(" WHERE hash = :hash"); _statement->prepare(); _statement->defineStringByPos(1, _rowid.ptr(), sizeof(_rowid)); _statement->defineBlobByPos(2, _lob_crf.ref()); if (_right_part == 1) { const char *hash_str = instance.getQueryHashStr(); _statement->bindStringByName(":hash", hash_str, strlen(hash_str) + 1); } ArrayOutput output_cnt(_counting_select); output_cnt.printf("SELECT COUNT(*) FROM %s sh", _table_name.ptr()); if (right_part == 1) output_cnt.printf(" WHERE hash = :hash"); }
void MangoShadowFetch::prepareNonSubstructure (OracleEnv &env) { env.dbgPrintf("preparing shadow table for non-substructure match\n"); _fetch_type = _NON_SUBSTRUCTURE; _need_xyz = _context.substructure.needCoords(); _env.reset(new OracleEnv(env.ctx(), env.logger())); _statement.reset(new OracleStatement(_env.ref())); if (_need_xyz) { _lob_cmf.reset(new OracleLOB(_env.ref())); _lob_xyz.reset(new OracleLOB(_env.ref())); _statement->append("SELECT mol_rowid, cmf, xyz FROM %s", _table_name.ptr()); _statement->prepare(); _statement->defineStringByPos(1, _rowid.ptr(), sizeof(_rowid)); _statement->defineBlobByPos(2, _lob_cmf.ref()); _statement->defineBlobByPos(3, _lob_xyz.ref()); } else { _lob_cmf.reset(new OracleLOB(_env.ref())); _statement->append("SELECT mol_rowid, cmf FROM %s", _table_name.ptr()); _statement->prepare(); _statement->defineStringByPos(1, _rowid.ptr(), sizeof(_rowid)); _statement->defineBlobByPos(2, _lob_cmf.ref()); } _counting_select.clear(); }
void RingoFastIndex::fetch (OracleEnv &env, int max_matches) { env.dbgPrintf("requested %d hits\n", max_matches); matched.clear(); BingoFingerprints &fingerprints = _context.context().fingerprints; if (_fetch_type == _SUBSTRUCTURE) { if (fingerprints.ableToScreen(_screening)) { while (matched.size() < max_matches) { if (_screening.passed.size() > 0) { int idx = _screening.passed.begin(); _match(env, _screening.passed.at(idx)); _screening.passed.remove(idx); continue; } if (fingerprints.screenPart_Init(env, _screening)) { while (fingerprints.screenPart_Next(env, _screening)) ; fingerprints.screenPart_End(env, _screening); _unmatched += _screening.block->used - _screening.passed.size(); } else { env.dbgPrintfTS("screening ended\n"); break; } _screening.items_passed += _screening.passed.size(); env.dbgPrintfTS("%d reactions passed screening\n", _screening.passed.size()); } } else { while (matched.size() < max_matches && _cur_idx < _context.context().context().storage.count()) _match(env, _cur_idx++); env.dbgPrintfTS("%d reactions matched\n", matched.size()); } } else throw Error("unexpected fetch type: %d", _fetch_type); }
void MangoFastIndex::prepareSubstructure (OracleEnv &env) { env.dbgPrintf("preparing fastindex for substructure search\n"); _context.context().context().storage.validate(env); _context.context().fingerprints.validate(env); _context.context().fingerprints.screenInit(_context.substructure.getQueryFingerprint(), _screening); env.dbgPrintfTS("Have %d bits in query fingerprint\n", _screening.query_ones.size()); _fetch_type = _SUBSTRUCTURE; _cur_idx = 0; _matched = 0; _unmatched = 0; }
bool _ringoRegisterReaction (OracleEnv &env, const char *rowid, const Array<char> &reaction_buf, RingoOracleContext &context, RingoIndex &index, BingoFingerprints &fingerprints) { QS_DEF(Array<char>, data); QS_DEF(Array<char>, compressed_rowid); ArrayOutput output(data); output.writeChar(0); // 0 -- present, 1 -- removed from index ArrayOutput rid_output(compressed_rowid); RowIDSaver rid_saver(context.context().rid_dict, rid_output); rid_saver.saveRowID(rowid); output.writeByte((byte)compressed_rowid.size()); output.writeArray(compressed_rowid); TRY_READ_TARGET_RXN { BufferScanner scanner(reaction_buf); try { index.prepare(scanner, output, NULL); } catch (CmfSaver::Error &e) { if (context.context().reject_invalid_structures) throw; // Rethrow this exception further env.dbgPrintf(bad_reaction_warning_rowid, rowid, e.message()); return false; } catch (CrfSaver::Error &e) { if (context.context().reject_invalid_structures) throw; // Rethrow this exception further env.dbgPrintf(bad_reaction_warning_rowid, rowid, e.message()); return false; } } CATCH_READ_TARGET_RXN_ROWID(rowid, { if (context.context().reject_invalid_structures) throw; // Rethrow this exception further return false; });
void MangoFastIndex::_fetchSubstructure (OracleEnv &env, int max_matches) { BingoFingerprints &fingerprints = _context.context().fingerprints; if (fingerprints.ableToScreen(_screening)) { while (matched.size() < max_matches) { if (_screening.passed.size() > 0) { int idx = _screening.passed.begin(); _match(env, _screening.passed.at(idx)); _screening.passed.remove(idx); continue; } if (fingerprints.screenPart_Init(env, _screening)) { while (fingerprints.screenPart_Next(env, _screening)) { if (_screening.passed_pre.size() <= _context.context().context().sub_screening_pass_mark || _screening.query_bit_idx >= _context.context().context().sub_screening_max_bits) { env.dbgPrintfTS("stopping at bit #%d; ", _screening.query_bit_idx); break; } } fingerprints.screenPart_End(env, _screening); _unmatched += _screening.block->used - _screening.passed.size(); } else { env.dbgPrintfTS("screening ended\n"); break; } _screening.items_passed += _screening.passed.size(); env.dbgPrintf("%d molecules passed screening\n", _screening.passed.size()); } } else { while (matched.size() < max_matches && _cur_idx < _context.context().context().storage.count()) _match(env, _cur_idx++); env.dbgPrintfTS("%d molecules matched of tested %d\n", matched.size(), _cur_idx); } }
void MangoShadowFetch::prepareNonTautomerSubstructure (OracleEnv &env) { env.dbgPrintf("preparing shadow table for non-tautomer-substructure match\n"); _fetch_type = _NON_TAUTOMER_SUBSTRUCTURE; _env.reset(new OracleEnv(env.ctx(), env.logger())); _statement.reset(new OracleStatement(_env.ref())); _lob_cmf.reset(new OracleLOB(_env.ref())); _statement->append("SELECT mol_rowid, cmf FROM %s", _table_name.ptr()); _statement->prepare(); _statement->defineStringByPos(1, _rowid.ptr(), sizeof(_rowid)); _statement->defineBlobByPos(2, _lob_cmf.ref()); _counting_select.clear(); }
void BingoStorage::_finishIndexLob (OracleEnv &env) { env.dbgPrintf("flushing index LOB\n"); OracleLOB *index_lob = _getLob(env, 0); index_lob->write(_index_lob_pending_mark, _index_lob_pending_data); _index_lob_pending_mark += _index_lob_pending_data.size(); _index_lob_pending_data.clear(); delete index_lob; }
void MangoShadowFetch::prepareExact (OracleEnv &env, int right_part) { const MangoExact & instance = _context.exact; if (right_part == 1) env.dbgPrintf("preparing shadow table for exact\n"); else env.dbgPrintf("preparing shadow table for non-exact\n"); _fetch_type = _EXACT; _right_part = right_part; _need_xyz = instance.needCoords(); _env.reset(new OracleEnv(env.ctx(), env.logger())); _statement.reset(new OracleStatement(_env.ref())); _lob_cmf.reset(new OracleLOB(_env.ref())); _statement->append("SELECT sh.mol_rowid, sh.cmf"); if (_need_xyz) _statement->append(", sh.xyz", _table_name.ptr()); _statement->append(" FROM %s sh", _table_name.ptr()); QS_DEF(Array<char>, table_copies); QS_DEF(Array<char>, where_clause); _prepareExactQueryStrings(table_copies, where_clause); _statement->append(table_copies.ptr()); _statement->append(where_clause.ptr()); _statement->prepare(); _statement->defineStringByPos(1, _rowid.ptr(), sizeof(_rowid)); _statement->defineBlobByPos(2, _lob_cmf.ref()); if (_need_xyz) { _lob_xyz.reset(new OracleLOB(_env.ref())); _statement->defineBlobByPos(3, _lob_xyz.ref()); } ArrayOutput output_cnt(_counting_select); output_cnt.printf("SELECT COUNT(*) FROM %s sh", _table_name.ptr()); output_cnt.printf("%s", table_copies.ptr()); output_cnt.printf("%s", where_clause.ptr()); }
void BingoStorage::_finishTopLob (OracleEnv &env) { OracleLOB *top_lob = _getLob(env, _blocks.size()); env.dbgPrintf("flushing storage LOB\n"); top_lob->write(_top_lob_pending_mark, _top_lob_pending_data); _top_lob_pending_mark += _top_lob_pending_data.size(); _top_lob_pending_data.clear(); delete top_lob; top_lob = 0; }
void MangoFastIndex::prepareTautomerSubstructure (OracleEnv &env) { env.dbgPrintfTS("preparing fastindex for tautomer substructure search\n"); _context.context().context().storage.validate(env); _context.context().fingerprints.validate(env); _context.context().fingerprints.screenInit(_context.tautomer.getQueryFingerprint(), _screening); _fetch_type = _TAUTOMER_SUBSTRUCTURE; _cur_idx = 0; _matched = 0; _unmatched = 0; }
void MangoFastIndex::prepareSimilarity (OracleEnv &env) { env.dbgPrintfTS("preparing fastindex for similarity search\n"); _context.context().context().storage.validate(env); _context.context().fingerprints.validate(env); _context.context().fingerprints.screenInit(_context.similarity.getQueryFingerprint(), _screening); _fetch_type = _SIMILARITY; _cur_idx = 0; _matched = 0; _unmatched = 0; }
void MangoFastIndex::fetch (OracleEnv &env, int max_matches) { env.dbgPrintf("requested %d hits\n", max_matches); matched.clear(); if (_fetch_type == _SUBSTRUCTURE || _fetch_type == _TAUTOMER_SUBSTRUCTURE) _fetchSubstructure(env, max_matches); else if (_fetch_type == _SIMILARITY) _fetchSimilarity(env, max_matches); else throw Error("unexpected fetch type: %d", _fetch_type); }
void MangoShadowFetch::prepareGross (OracleEnv &env, int right_part) { MangoGross & instance = _context.gross; env.dbgPrintf("preparing shadow table for gross formula match\n"); _fetch_type = _GROSS; _right_part = right_part; _env.reset(new OracleEnv(env.ctx(), env.logger())); _statement.reset(new OracleStatement(_env.ref())); _statement->append("SELECT mol_rowid, gross FROM %s ", _table_name.ptr()); if (*instance.getConditions() != 0 && right_part == 1) _statement->append("WHERE %s", instance.getConditions()); _statement->prepare(); _statement->defineStringByPos(1, _rowid.ptr(), sizeof(_rowid)); _statement->defineStringByPos(2, _gross, sizeof(_gross)); ArrayOutput output(_counting_select); output.printf("SELECT COUNT(*) FROM %s WHERE %s", _table_name.ptr(), instance.getConditions()); }
int MangoShadowFetch::countOracleBlocks (OracleEnv &env) { int res; env.dbgPrintf("countOracleBlocks\n"); if (!OracleStatement::executeSingleInt(res, env, "select blocks from user_tables where " "table_name = upper('%s')", _table_name.ptr())) return 0; return res; }
void RingoFastIndex::prepareSubstructure (OracleEnv &env) { env.dbgPrintf("preparing fastindex for reaction substructure search\n"); _context.context().context().storage.validate(env); _context.context().fingerprints.validate(env); _context.context().fingerprints.screenInit(_context.substructure.getQueryFingerprint(), _screening); _fetch_type = _SUBSTRUCTURE; _cur_idx = 0; _matched = 0; _unmatched = 0; }
bool mangoPrepareMolecule (OracleEnv &env, const char *rowid, const Array<char> &molfile_buf, MangoOracleContext &context, MangoIndex &index, Array<char> &data, OsLock *lock_for_exclusive_access) { profTimerStart(tall, "moleculeIndex.prepare"); ArrayOutput output(data); output.writeChar(0); // 0 -- present, 1 -- removed from index QS_DEF(Array<char>, compressed_rowid); ArrayOutput rid_output(compressed_rowid); { // RowIDSaver modifies context.context().rid_dict and // requires exclusive access for this OsLockerNullable locker(lock_for_exclusive_access); RowIDSaver rid_saver(context.context().rid_dict, rid_output); rid_saver.saveRowID(rowid); } output.writeByte((byte)compressed_rowid.size()); output.writeArray(compressed_rowid); TRY_READ_TARGET_MOL { BufferScanner scanner(molfile_buf); try { index.prepare(scanner, output, lock_for_exclusive_access); } catch (CmfSaver::Error &e) { OsLockerNullable locker(lock_for_exclusive_access); env.dbgPrintf(bad_molecule_warning_rowid, rowid, e.message()); return false; } } CATCH_READ_TARGET_MOL_ROWID(rowid, return false); // some magic: round it up to avoid ora-22282 if (data.size() % 2 == 1) output.writeChar(0); return true; }
bool _ringoRegisterReaction (OracleEnv &env, const char *rowid, const Array<char> &reaction_buf, RingoOracleContext &context, RingoIndex &index, BingoFingerprints &fingerprints) { QS_DEF(Array<char>, data); QS_DEF(Array<char>, compressed_rowid); ArrayOutput output(data); output.writeChar(0); // 0 -- present, 1 -- removed from index ArrayOutput rid_output(compressed_rowid); RowIDSaver rid_saver(context.context().rid_dict, rid_output); rid_saver.saveRowID(rowid); output.writeByte((byte)compressed_rowid.size()); output.writeArray(compressed_rowid); TRY_READ_TARGET_RXN { BufferScanner scanner(reaction_buf); try { index.prepare(scanner, output, NULL); } catch (CmfSaver::Error &e) { env.dbgPrintf(bad_reaction_warning_rowid, rowid, e.message()); return false;} catch (CrfSaver::Error &e) { env.dbgPrintf(bad_reaction_warning_rowid, rowid, e.message()); return false;} } CATCH_READ_TARGET_RXN_ROWID(rowid, return false); // some magic: round it up to avoid ora-22282 if (data.size() % 2 == 1) output.writeChar(0); int blockno, offset; context.context().storage.add(env, data, blockno, offset); fingerprints.addFingerprint(env, index.getFingerprint()); context.shadow_table.addReaction(env, index, rowid, blockno + 1, offset); return true; }
int bingoPopRowidsToArray (OracleEnv &env, List<OraRowidText> &matched, int maxrows, OCIArray *array) { OCIString *rid_string = 0; int count = 0; while (matched.size() > 0 && maxrows > 0) { const char *rid_text = matched.at(matched.begin()).ptr(); env.callOCI(OCIStringAssignText(env.envhp(), env.errhp(), (OraText *)rid_text, (ub4)strlen(rid_text), &rid_string)); env.callOCI(OCICollAppend(env.envhp(), env.errhp(), rid_string, 0, array)); maxrows--; count++; matched.remove(matched.begin()); } return count; }
void mangoRegisterTable (OracleEnv &env, MangoOracleContext &context, const char *source_table, const char *source_column, const char *target_datatype) { profTimerStart(tall, "total"); QS_DEF(Array<char>, molfile_buf); OracleStatement statement(env); AutoPtr<OracleLOB> molfile_lob; OraRowidText rowid; char varchar2_text[4001]; // Oracle's BLOB and CLOB types always come uppercase bool blob = (strcmp(target_datatype, "BLOB") == 0); bool clob = (strcmp(target_datatype, "CLOB") == 0); int total_count = 0; OracleStatement::executeSingleInt(total_count, env, "SELECT COUNT(*) FROM %s WHERE %s IS NOT NULL AND LENGTH(%s) > 0", source_table, source_column, source_column); context.context().longOpInit(env, total_count, "Building molecule index", source_table, "molecules"); statement.append("SELECT %s, RowidToChar(rowid) FROM %s WHERE %s IS NOT NULL AND LENGTH(%s) > 0", source_column, source_table, source_column, source_column); //"ORDER BY dbms_rowid.rowid_block_number(rowid), dbms_rowid.rowid_row_number(rowid)", statement.prepare(); if (blob) { molfile_lob.reset(new OracleLOB(env)); statement.defineBlobByPos(1, molfile_lob.ref()); } else if (clob) { molfile_lob.reset(new OracleLOB(env)); statement.defineClobByPos(1, molfile_lob.ref()); } else statement.defineStringByPos(1, varchar2_text, sizeof(varchar2_text)); statement.defineStringByPos(2, rowid.ptr(), sizeof(rowid)); BingoFingerprints &fingerprints = context.fingerprints; fingerprints.validateForUpdate(env); if (context.context().nthreads == 1) { int n = 0; QS_DEF(MangoIndex, index); index.init(context.context()); if (statement.executeAllowNoData()) do { env.dbgPrintf("inserting molecule #%d with rowid %s\n", n, rowid.ptr()); if (blob || clob) molfile_lob->readAll(molfile_buf, false); else molfile_buf.readString(varchar2_text, false); try { mangoPrepareAndRegisterMolecule(env, rowid.ptr(), molfile_buf, context, index, fingerprints, true); } catch (Exception &ex) { char buf[4096]; snprintf(buf, NELEM(buf), "Failed on record with rowid=%s. Error message is '%s'", rowid.ptr(), ex.message()); throw Exception(buf); } n++; if ((n % 50) == 0) context.context().longOpUpdate(env, n); if ((n % 1000) == 0) { env.dbgPrintf("done %d molecules; flushing\n", n); context.context().storage.flush(env); } } while (statement.fetch()); } else { if (statement.executeAllowNoData()) { MangoRegisterDispatcher dispatcher(context, env, rowid.ptr()); dispatcher.setup(&statement, molfile_lob.get(), varchar2_text, blob || clob); int nthreads = context.context().nthreads; if (nthreads <= 0) dispatcher.run(); else dispatcher.run(nthreads); } } fingerprints.flush(env); context.shadow_table.flush(env); }
void BingoStorage::validate (OracleEnv &env) { env.dbgPrintfTS("validating storage... "); if (_shmem_state != 0 && strcmp(_shmem_state->getID(), _shmem_id.ptr()) != 0) { delete _shmem_state; _shmem_state = 0; _age_loaded = -1; } _State *state = _getState(true); // TODO: implement a semaphore while (state->state == _STATE_LOADING) { delete _shmem_state; _shmem_state = 0; _age_loaded = -1; state = _getState(true); if (state == 0) throw Error("can't get shared info"); env.dbgPrintf("."); } if (state->state == _STATE_READY) { if (state->age_loaded == state->age) { if (_age_loaded == state->age) { env.dbgPrintf("up to date\n"); return; } else env.dbgPrintf("loaded by the other process\n"); } else { env.dbgPrintf("has changed, reloading\n"); state->state = _STATE_LOADING; } } else { state->state = _STATE_LOADING; env.dbgPrintf("loading ... \n"); } _shmem_array.clear(); _blocks.clear(); OracleStatement statement(env); int id, length; OracleLOB lob(env); QS_DEF(Array<char>, block_name); statement.append("SELECT id, length(bindata), bindata FROM %s ORDER BY id", _table_name.ptr()); statement.prepare(); statement.defineIntByPos(1, &id); statement.defineIntByPos(2, &length); statement.defineBlobByPos(3, lob); statement.execute(); do { ArrayOutput output(block_name); output.printf("%s_%d_%d", _shmem_id.ptr(), id, state->age); output.writeByte(0); if (length < 1) { if (id == 0) { _index.clear(); break; } throw Error("cannot validate block #%d: length=%d", id, length); } _shmem_array.add(new SharedMemory(block_name.ptr(), length, state->state == _STATE_READY)); void *ptr = _shmem_array.top()->ptr(); if (ptr == 0) { if (state->state == _STATE_READY) { // That's rare case, but possible. // Reload the storage. env.dbgPrintf("shared memory is gone, resetting... \n"); state->state = _STATE_EMPTY; validate(env); return; } else throw Error("can't map block #%d", id); } if (state->state != _STATE_READY) lob.read(0, (char *)ptr, length); if (id == 0) { if ((length % sizeof(_Addr)) != 0) throw Error("LOB size %d (expected a multiple of %d)", length, sizeof(_Addr)); if (length > 0) _index.copy((_Addr *)_shmem_array[0]->ptr(), length / sizeof(_Addr)); } _Block &block = _blocks.push(); block.size = length; } while (statement.fetch()); state->state = _STATE_READY; state->age_loaded = state->age; _age_loaded = state->age; }
void _importRDF (OracleEnv &env, const char *table, const char *clob_col, const char *other_cols, const char *file_name) { FileScanner scanner(file_name); int i, nwritten = 0; QS_DEF(Array<char>, word); QS_DEF(StringPool, props); QS_DEF(StringPool, columns); env.dbgPrintfTS("importing into table %s\n", table); _parseFieldList(other_cols, props, columns); RdfLoader loader(scanner); while (!loader.isEOF()) { loader.readNext(); OracleStatement statement(env); OracleLOB lob(env); lob.createTemporaryCLOB(); lob.write(0, loader.data); statement.append("INSERT INTO %s(%s", table, clob_col); for (i = columns.begin(); i != columns.end(); i = columns.next(i)) statement.append(", %s", columns.at(i)); statement.append(") VALUES(:clobdata"); for (i = columns.begin(); i != columns.end(); i = columns.next(i)) { if (loader.properties.contains(props.at(i))) statement.append(", NULL"); else statement.append(",:%s", columns.at(i)); } statement.append(")"); statement.prepare(); statement.bindClobByName(":clobdata", lob); for (i = columns.begin(); i != columns.end(); i = columns.next(i)) { if (loader.properties.contains(props.at(i))) continue; ArrayOutput out(word); out.printf(":%s", columns.at(i)); out.writeChar(0); const char* val = loader.properties.at(props.at(i)); statement.bindStringByName(word.ptr(), val, strlen(val) + 1); } statement.execute(); nwritten++; if (nwritten % 1000 == 0) { env.dbgPrintfTS("imported %d items, commiting\n", nwritten); OracleStatement::executeSingle(env, "COMMIT"); } } if (nwritten % 1000 != 0) { env.dbgPrintfTS("imported %d items, commiting\n", nwritten); OracleStatement::executeSingle(env, "COMMIT"); } }
void ringoRegisterTable (OracleEnv &env, RingoOracleContext &context, const char *source_table, const char *source_column, const char *target_datatype) { QS_DEF(Array<char>, reaction_buf); OracleStatement statement(env); AutoPtr<OracleLOB> reaction_lob; OraRowidText rowid; char varchar2_text[4001]; // Oracle's BLOB and CLOB types always come uppercase bool blob = (strcmp(target_datatype, "BLOB") == 0); bool clob = (strcmp(target_datatype, "CLOB") == 0); int total_count = 0; OracleStatement::executeSingleInt(total_count, env, "SELECT COUNT(*) FROM %s WHERE %s IS NOT NULL AND LENGTH(%s) > 0", source_table, source_column, source_column); context.context().longOpInit(env, total_count, "Building reaction index", source_table, "reactions"); statement.append("SELECT %s, RowidToChar(rowid) FROM %s WHERE %s IS NOT NULL AND LENGTH(%s) > 0", source_column, source_table, source_column, source_column); statement.prepare(); if (blob) { reaction_lob.reset(new OracleLOB(env)); statement.defineBlobByPos(1, reaction_lob.ref()); } else if (clob) { reaction_lob.reset(new OracleLOB(env)); statement.defineClobByPos(1, reaction_lob.ref()); } else statement.defineStringByPos(1, varchar2_text, sizeof(varchar2_text)); statement.defineStringByPos(2, rowid.ptr(), sizeof(rowid)); BingoFingerprints &fingerprints = context.fingerprints; int nthreads = 0; fingerprints.validateForUpdate(env); context.context().configGetInt(env, "NTHREADS", nthreads); nthreads = 1; //if (nthreads == 1) { int n = 0; QS_DEF(RingoIndex, index); index.init(context.context()); if (statement.executeAllowNoData()) do { env.dbgPrintf("inserting reaction #%d with rowid %s\n", n, rowid.ptr()); if (blob || clob) reaction_lob->readAll(reaction_buf, false); else reaction_buf.readString(varchar2_text, false); try { if (_ringoRegisterReaction(env, rowid.ptr(), reaction_buf, context, index, fingerprints)) n++; } catch (Exception &ex) { char buf[4096]; snprintf(buf, NELEM(buf), "Failed on record with rowid=%s. Error message is '%s'", rowid.ptr(), ex.message()); throw Exception(buf); } if ((n % 50) == 0) context.context().longOpUpdate(env, n); if ((n % 1000) == 0) { env.dbgPrintfTS("done %d reactions ; flushing\n", n); context.context().storage.flush(env); } } while (statement.fetch()); fingerprints.flush(env); } }
void MangoFastIndex::_fetchSimilarity (OracleEnv &env, int max_matches) { BingoFingerprints &fingerprints = _context.context().fingerprints; int i; if (!fingerprints.ableToScreen(_screening)) { env.dbgPrintfTS("no bits in query fingerprint, can not do similarity search\n"); return; } profTimerStart(tsimfetch, "sim.fetch"); while (matched.size() < max_matches) { if (!fingerprints.countOnes_Init(env, _screening)) { env.dbgPrintfTS("screening ended\n"); break; } BingoStorage &storage = _context.context().context().storage; QS_DEF(Array<int>, max_common_ones); QS_DEF(Array<int>, min_common_ones); QS_DEF(Array<int>, target_ones); QS_DEF(Array<char>, stored); max_common_ones.clear_resize(_screening.block->used); min_common_ones.clear_resize(_screening.block->used); target_ones.clear_resize(_screening.block->used); for (i = 0; i < _screening.block->used; i++) { storage.get(fingerprints.getStorageIndex_NoMap(_screening, i), stored); BufferScanner scanner(stored); scanner.skip(1); // skip the deletion mark scanner.skip(scanner.readByte()); // skip the compessed rowid target_ones[i] = scanner.readBinaryWord(); max_common_ones[i] = _context.similarity.getUpperBound(target_ones[i]); min_common_ones[i] = _context.similarity.getLowerBound(target_ones[i]); } bool first = true; bool entire = false; _screening.passed.clear(); while (true) { if (!fingerprints.countOnes_Next(env, _screening)) { env.dbgPrintf("read all %d bits, writing %d results... ", _screening.query_ones.size(), _screening.passed.size()); entire = true; break; } if (first) { first = false; for (i = 0; i < _screening.block->used; i++) { int min_possible_ones = _screening.one_counters[i]; int max_possible_ones = _screening.one_counters[i] + _screening.query_ones.size() - _screening.query_bit_idx; if (min_possible_ones <= max_common_ones[i] && max_possible_ones >= min_common_ones[i]) _screening.passed.add(i); } } else { int j; for (j = _screening.passed.begin(); j != _screening.passed.end(); ) { i = _screening.passed[j]; int min_possible_ones = _screening.one_counters[i]; int max_possible_ones = _screening.one_counters[i] + _screening.query_ones.size() - _screening.query_bit_idx; int next_j = _screening.passed.next(j); if (min_possible_ones > max_common_ones[i] || max_possible_ones < min_common_ones[i]) _screening.passed.remove(j); j = next_j; } } if (_screening.passed.size() <= _context.context().context().sim_screening_pass_mark) { env.dbgPrintfTS("stopping reading fingerprints on bit %d/%d; have %d molecules to check... ", _screening.query_bit_idx, _screening.query_ones.size(), _screening.passed.size()); _unmatched += _screening.block->used - _screening.passed.size(); break; } } if (entire) { for (i = 0; i < _screening.block->used; i++) { if (_context.similarity.match(target_ones[i], _screening.one_counters[i])) { OraRowidText &rid = matched.at(matched.add()); storage.get(fingerprints.getStorageIndex_NoMap(_screening, i), stored); _decompressRowid(stored, rid); _matched++; } else _unmatched++; } } else if (_screening.passed.size() > 0) { profTimerStart(tfine, "sim.fetch.fine"); for (i = _screening.passed.begin(); i != _screening.passed.end(); i = _screening.passed.next(i)) _match(env, fingerprints.getStorageIndex_NoMap(_screening, _screening.passed[i])); profTimerStop(tfine); } env.dbgPrintf("done\n"); fingerprints.countOnes_End(env, _screening); } profTimerStop(tsimfetch); }
void MangoShadowFetch::fetch (OracleEnv &env, int maxrows) { matched.clear(); if (_statement.get() == 0) return; if (maxrows < 1 || _end) return; env.dbgPrintf("fetching up to %d rows using shadowtable... ", maxrows); while (matched.size() < maxrows) { bool fetch_res; if (_fetch_type == _MASS) { _statement->bindFloatByName(":mass_min", &_context.mass.bottom); _statement->bindFloatByName(":mass_max", &_context.mass.top); } else if (_fetch_type == _TAUTOMER && _right_part == 1) { const char *gross = _context.tautomer.getQueryGross(); _statement->bindStringByName(":gross", gross, strlen(gross) + 1); QS_DEF(Array<char>, grossh); grossh.readString(gross, false); grossh.appendString(" H%", true); _statement->bindStringByName(":grossh", grossh.ptr(), grossh.size()); } if (!_executed) { fetch_res = _statement->executeAllowNoData(); _executed = true; } else fetch_res = _statement->fetch(); if (!fetch_res) { _end = true; break; } bool have_match = false; TRY_READ_TARGET_MOL { if (_fetch_type == _NON_SUBSTRUCTURE) { MangoSubstructure &instance = _context.substructure; QS_DEF(Array<char>, cmf); _lob_cmf->readAll(cmf, false); if (_need_xyz) { if (_statement->gotNull(3)) // xyz == NULL? have_match = true; else { QS_DEF(Array<char>, xyz); _lob_xyz->readAll(xyz, false); if (!instance.matchBinary(cmf, &xyz)) have_match = true; } } else if (!instance.matchBinary(cmf, 0)) have_match = true; } else if (_fetch_type == _NON_TAUTOMER_SUBSTRUCTURE) { MangoTautomer &instance = _context.tautomer; QS_DEF(Array<char>, cmf); _lob_cmf->readAll(cmf, false); if (!instance.matchBinary(cmf)) have_match = true; } else if (_fetch_type == _TAUTOMER) { MangoTautomer &instance = _context.tautomer; QS_DEF(Array<char>, cmf); _lob_cmf->readAll(cmf, false); if (instance.matchBinary(cmf) == (_right_part == 1)) have_match = true; } else if (_fetch_type == _EXACT) { MangoExact &instance = _context.exact; QS_DEF(Array<char>, cmf); profTimerStart(tlobread, "exact.lobread"); _lob_cmf->readAll(cmf, false); profTimerStop(tlobread); if (_need_xyz) { if (_statement->gotNull(3)) // xyz == NULL? have_match = (_right_part == 0); else { QS_DEF(Array<char>, xyz); profTimerStart(txyzlobread, "exact.xyzlobread"); _lob_xyz->readAll(xyz, false); profTimerStop(txyzlobread); profTimerStart(tmatch, "exact.match"); if (instance.matchBinary(cmf, &xyz) == (_right_part == 1)) have_match = true; } } else { profTimerStart(tmatch, "exact.match"); if (instance.matchBinary(cmf, 0) == (_right_part == 1)) have_match = true; } } else if (_fetch_type == _GROSS) { MangoGross &instance = _context.gross; if (instance.checkGross(_gross) == (_right_part == 1)) have_match = true; } else if (_fetch_type == _MASS) { have_match = true; } else throw Error("unexpected fetch type %d", _fetch_type); } CATCH_READ_TARGET_MOL(have_match = false) if (have_match) matched.add(_rowid); _processed_rows++; } env.dbgPrintf("fetched %d\n", matched.size()); return; }
void RingoShadowTable::analyze (OracleEnv &env) { env.dbgPrintf("analyzing shadow table\n"); OracleStatement::executeSingle(env, "ANALYZE TABLE %s ESTIMATE STATISTICS", _table_name.ptr()); }
void RingoShadowFetch::fetch (OracleEnv &env, int maxrows) { matched.clear(); if (_statement.get() == 0) return; if (maxrows < 1 || _end) return; env.dbgPrintf("fetching up to %d rows using shadowtable... ", maxrows); while (matched.size() < maxrows) { bool fetch_res; if (!_executed) { fetch_res = _statement->executeAllowNoData(); _executed = true; } else fetch_res = _statement->fetch(); if (!fetch_res) { _end = true; break; } bool have_match = false; if (_fetch_type == _NON_SUBSTRUCTURE) { RingoSubstructure &instance = _context.substructure; QS_DEF(Array<char>, crf); _lob_crf->readAll(crf, false); if (!instance.matchBinary(crf)) have_match = true; } else if (_fetch_type == _EXACT) { RingoExact &instance = _context.exact; QS_DEF(Array<char>, crf); _lob_crf->readAll(crf, false); have_match = (instance.matchBinary(crf) == (_right_part == 1)); } else throw Error("unexpected fetch type %d", _fetch_type); if (have_match) matched.add(_rowid); _processed_rows++; } env.dbgPrintf("fetched %d\n", matched.size()); return; }
void _importSMILES (OracleEnv &env, const char *table, const char *smiles_col, const char *id_col, const char *file_name) { FileScanner fscanner(file_name); AutoPtr<GZipScanner> gzscanner; Scanner *scanner; int nwritten = 0; QS_DEF(Array<char>, id); QS_DEF(Array<char>, str); env.dbgPrintfTS("importing into table %s\n", table); // detect if input is gzipped byte magic[2]; int pos = fscanner.tell(); fscanner.readCharsFix(2, (char *)magic); fscanner.seek(pos, SEEK_SET); if (magic[0] == 0x1f && magic[1] == 0x8b) { gzscanner.reset(new GZipScanner(fscanner)); scanner = gzscanner.get(); } else scanner = &fscanner; while (!scanner->isEOF()) { id.clear(); scanner->readLine(str, false); BufferScanner strscan(str); strscan.skipSpace(); while (!strscan.isEOF() && !isspace(strscan.readChar())) ; strscan.skipSpace(); if (strscan.lookNext() == '|') { strscan.readChar(); while (!strscan.isEOF() && strscan.readChar() != '|') ; strscan.skipSpace(); } if (!strscan.isEOF() && id_col != 0) strscan.readLine(id, true); OracleStatement statement(env); statement.append("INSERT INTO %s(%s", table, smiles_col); if (id_col != 0) statement.append(", %s", id_col); statement.append(") VALUES(:smiles"); if (id_col != 0) { if (id.size() > 1) statement.append(", :id"); else statement.append(", NULL"); } statement.append(")"); statement.prepare(); str.push(0); statement.bindStringByName(":smiles", str.ptr(), str.size()); if (id.size() > 1) statement.bindStringByName(":id", id.ptr(), id.size()); statement.execute(); nwritten++; if (nwritten % 1000 == 0) { env.dbgPrintfTS("imported %d items, commiting\n", nwritten); OracleStatement::executeSingle(env, "COMMIT"); } } if (nwritten % 1000 != 0) { env.dbgPrintfTS("imported %d items, commiting\n", nwritten); OracleStatement::executeSingle(env, "COMMIT"); } }