void RingoFastIndex::fetch (OracleEnv &env, int max_matches) { env.dbgPrintf("requested %d hits\n", max_matches); matched.clear(); BingoFingerprints &fingerprints = _context.context().fingerprints; if (_fetch_type == _SUBSTRUCTURE) { if (fingerprints.ableToScreen(_screening)) { while (matched.size() < max_matches) { if (_screening.passed.size() > 0) { int idx = _screening.passed.begin(); _match(env, _screening.passed.at(idx)); _screening.passed.remove(idx); continue; } if (fingerprints.screenPart_Init(env, _screening)) { while (fingerprints.screenPart_Next(env, _screening)) ; fingerprints.screenPart_End(env, _screening); _unmatched += _screening.block->used - _screening.passed.size(); } else { env.dbgPrintfTS("screening ended\n"); break; } _screening.items_passed += _screening.passed.size(); env.dbgPrintfTS("%d reactions passed screening\n", _screening.passed.size()); } } else { while (matched.size() < max_matches && _cur_idx < _context.context().context().storage.count()) _match(env, _cur_idx++); env.dbgPrintfTS("%d reactions matched\n", matched.size()); } } else throw Error("unexpected fetch type: %d", _fetch_type); }
void MangoFastIndex::_fetchSubstructure (OracleEnv &env, int max_matches) { BingoFingerprints &fingerprints = _context.context().fingerprints; if (fingerprints.ableToScreen(_screening)) { while (matched.size() < max_matches) { if (_screening.passed.size() > 0) { int idx = _screening.passed.begin(); _match(env, _screening.passed.at(idx)); _screening.passed.remove(idx); continue; } if (fingerprints.screenPart_Init(env, _screening)) { while (fingerprints.screenPart_Next(env, _screening)) { if (_screening.passed_pre.size() <= _context.context().context().sub_screening_pass_mark || _screening.query_bit_idx >= _context.context().context().sub_screening_max_bits) { env.dbgPrintfTS("stopping at bit #%d; ", _screening.query_bit_idx); break; } } fingerprints.screenPart_End(env, _screening); _unmatched += _screening.block->used - _screening.passed.size(); } else { env.dbgPrintfTS("screening ended\n"); break; } _screening.items_passed += _screening.passed.size(); env.dbgPrintf("%d molecules passed screening\n", _screening.passed.size()); } } else { while (matched.size() < max_matches && _cur_idx < _context.context().context().storage.count()) _match(env, _cur_idx++); env.dbgPrintfTS("%d molecules matched of tested %d\n", matched.size(), _cur_idx); } }
void MangoFastIndex::prepareTautomerSubstructure (OracleEnv &env) { env.dbgPrintfTS("preparing fastindex for tautomer substructure search\n"); _context.context().context().storage.validate(env); _context.context().fingerprints.validate(env); _context.context().fingerprints.screenInit(_context.tautomer.getQueryFingerprint(), _screening); _fetch_type = _TAUTOMER_SUBSTRUCTURE; _cur_idx = 0; _matched = 0; _unmatched = 0; }
void MangoFastIndex::prepareSimilarity (OracleEnv &env) { env.dbgPrintfTS("preparing fastindex for similarity search\n"); _context.context().context().storage.validate(env); _context.context().fingerprints.validate(env); _context.context().fingerprints.screenInit(_context.similarity.getQueryFingerprint(), _screening); _fetch_type = _SIMILARITY; _cur_idx = 0; _matched = 0; _unmatched = 0; }
void MangoFastIndex::prepareSubstructure (OracleEnv &env) { env.dbgPrintf("preparing fastindex for substructure search\n"); _context.context().context().storage.validate(env); _context.context().fingerprints.validate(env); _context.context().fingerprints.screenInit(_context.substructure.getQueryFingerprint(), _screening); env.dbgPrintfTS("Have %d bits in query fingerprint\n", _screening.query_ones.size()); _fetch_type = _SUBSTRUCTURE; _cur_idx = 0; _matched = 0; _unmatched = 0; }
void ringoRegisterTable (OracleEnv &env, RingoOracleContext &context, const char *source_table, const char *source_column, const char *target_datatype) { QS_DEF(Array<char>, reaction_buf); OracleStatement statement(env); AutoPtr<OracleLOB> reaction_lob; OraRowidText rowid; char varchar2_text[4001]; // Oracle's BLOB and CLOB types always come uppercase bool blob = (strcmp(target_datatype, "BLOB") == 0); bool clob = (strcmp(target_datatype, "CLOB") == 0); int total_count = 0; OracleStatement::executeSingleInt(total_count, env, "SELECT COUNT(*) FROM %s WHERE %s IS NOT NULL AND LENGTH(%s) > 0", source_table, source_column, source_column); context.context().longOpInit(env, total_count, "Building reaction index", source_table, "reactions"); statement.append("SELECT %s, RowidToChar(rowid) FROM %s WHERE %s IS NOT NULL AND LENGTH(%s) > 0", source_column, source_table, source_column, source_column); statement.prepare(); if (blob) { reaction_lob.reset(new OracleLOB(env)); statement.defineBlobByPos(1, reaction_lob.ref()); } else if (clob) { reaction_lob.reset(new OracleLOB(env)); statement.defineClobByPos(1, reaction_lob.ref()); } else statement.defineStringByPos(1, varchar2_text, sizeof(varchar2_text)); statement.defineStringByPos(2, rowid.ptr(), sizeof(rowid)); BingoFingerprints &fingerprints = context.fingerprints; int nthreads = 0; fingerprints.validateForUpdate(env); context.context().configGetInt(env, "NTHREADS", nthreads); nthreads = 1; //if (nthreads == 1) { int n = 0; QS_DEF(RingoIndex, index); index.init(context.context()); if (statement.executeAllowNoData()) do { env.dbgPrintf("inserting reaction #%d with rowid %s\n", n, rowid.ptr()); if (blob || clob) reaction_lob->readAll(reaction_buf, false); else reaction_buf.readString(varchar2_text, false); try { if (_ringoRegisterReaction(env, rowid.ptr(), reaction_buf, context, index, fingerprints)) n++; } catch (Exception &ex) { char buf[4096]; snprintf(buf, NELEM(buf), "Failed on record with rowid=%s. Error message is '%s'", rowid.ptr(), ex.message()); throw Exception(buf); } if ((n % 50) == 0) context.context().longOpUpdate(env, n); if ((n % 1000) == 0) { env.dbgPrintfTS("done %d reactions ; flushing\n", n); context.context().storage.flush(env); } } while (statement.fetch()); fingerprints.flush(env); } }
void BingoStorage::validate (OracleEnv &env) { env.dbgPrintfTS("validating storage... "); if (_shmem_state != 0 && strcmp(_shmem_state->getID(), _shmem_id.ptr()) != 0) { delete _shmem_state; _shmem_state = 0; _age_loaded = -1; } _State *state = _getState(true); // TODO: implement a semaphore while (state->state == _STATE_LOADING) { delete _shmem_state; _shmem_state = 0; _age_loaded = -1; state = _getState(true); if (state == 0) throw Error("can't get shared info"); env.dbgPrintf("."); } if (state->state == _STATE_READY) { if (state->age_loaded == state->age) { if (_age_loaded == state->age) { env.dbgPrintf("up to date\n"); return; } else env.dbgPrintf("loaded by the other process\n"); } else { env.dbgPrintf("has changed, reloading\n"); state->state = _STATE_LOADING; } } else { state->state = _STATE_LOADING; env.dbgPrintf("loading ... \n"); } _shmem_array.clear(); _blocks.clear(); OracleStatement statement(env); int id, length; OracleLOB lob(env); QS_DEF(Array<char>, block_name); statement.append("SELECT id, length(bindata), bindata FROM %s ORDER BY id", _table_name.ptr()); statement.prepare(); statement.defineIntByPos(1, &id); statement.defineIntByPos(2, &length); statement.defineBlobByPos(3, lob); statement.execute(); do { ArrayOutput output(block_name); output.printf("%s_%d_%d", _shmem_id.ptr(), id, state->age); output.writeByte(0); if (length < 1) { if (id == 0) { _index.clear(); break; } throw Error("cannot validate block #%d: length=%d", id, length); } _shmem_array.add(new SharedMemory(block_name.ptr(), length, state->state == _STATE_READY)); void *ptr = _shmem_array.top()->ptr(); if (ptr == 0) { if (state->state == _STATE_READY) { // That's rare case, but possible. // Reload the storage. env.dbgPrintf("shared memory is gone, resetting... \n"); state->state = _STATE_EMPTY; validate(env); return; } else throw Error("can't map block #%d", id); } if (state->state != _STATE_READY) lob.read(0, (char *)ptr, length); if (id == 0) { if ((length % sizeof(_Addr)) != 0) throw Error("LOB size %d (expected a multiple of %d)", length, sizeof(_Addr)); if (length > 0) _index.copy((_Addr *)_shmem_array[0]->ptr(), length / sizeof(_Addr)); } _Block &block = _blocks.push(); block.size = length; } while (statement.fetch()); state->state = _STATE_READY; state->age_loaded = state->age; _age_loaded = state->age; }
void MangoFastIndex::_fetchSimilarity (OracleEnv &env, int max_matches) { BingoFingerprints &fingerprints = _context.context().fingerprints; int i; if (!fingerprints.ableToScreen(_screening)) { env.dbgPrintfTS("no bits in query fingerprint, can not do similarity search\n"); return; } profTimerStart(tsimfetch, "sim.fetch"); while (matched.size() < max_matches) { if (!fingerprints.countOnes_Init(env, _screening)) { env.dbgPrintfTS("screening ended\n"); break; } BingoStorage &storage = _context.context().context().storage; QS_DEF(Array<int>, max_common_ones); QS_DEF(Array<int>, min_common_ones); QS_DEF(Array<int>, target_ones); QS_DEF(Array<char>, stored); max_common_ones.clear_resize(_screening.block->used); min_common_ones.clear_resize(_screening.block->used); target_ones.clear_resize(_screening.block->used); for (i = 0; i < _screening.block->used; i++) { storage.get(fingerprints.getStorageIndex_NoMap(_screening, i), stored); BufferScanner scanner(stored); scanner.skip(1); // skip the deletion mark scanner.skip(scanner.readByte()); // skip the compessed rowid target_ones[i] = scanner.readBinaryWord(); max_common_ones[i] = _context.similarity.getUpperBound(target_ones[i]); min_common_ones[i] = _context.similarity.getLowerBound(target_ones[i]); } bool first = true; bool entire = false; _screening.passed.clear(); while (true) { if (!fingerprints.countOnes_Next(env, _screening)) { env.dbgPrintf("read all %d bits, writing %d results... ", _screening.query_ones.size(), _screening.passed.size()); entire = true; break; } if (first) { first = false; for (i = 0; i < _screening.block->used; i++) { int min_possible_ones = _screening.one_counters[i]; int max_possible_ones = _screening.one_counters[i] + _screening.query_ones.size() - _screening.query_bit_idx; if (min_possible_ones <= max_common_ones[i] && max_possible_ones >= min_common_ones[i]) _screening.passed.add(i); } } else { int j; for (j = _screening.passed.begin(); j != _screening.passed.end(); ) { i = _screening.passed[j]; int min_possible_ones = _screening.one_counters[i]; int max_possible_ones = _screening.one_counters[i] + _screening.query_ones.size() - _screening.query_bit_idx; int next_j = _screening.passed.next(j); if (min_possible_ones > max_common_ones[i] || max_possible_ones < min_common_ones[i]) _screening.passed.remove(j); j = next_j; } } if (_screening.passed.size() <= _context.context().context().sim_screening_pass_mark) { env.dbgPrintfTS("stopping reading fingerprints on bit %d/%d; have %d molecules to check... ", _screening.query_bit_idx, _screening.query_ones.size(), _screening.passed.size()); _unmatched += _screening.block->used - _screening.passed.size(); break; } } if (entire) { for (i = 0; i < _screening.block->used; i++) { if (_context.similarity.match(target_ones[i], _screening.one_counters[i])) { OraRowidText &rid = matched.at(matched.add()); storage.get(fingerprints.getStorageIndex_NoMap(_screening, i), stored); _decompressRowid(stored, rid); _matched++; } else _unmatched++; } } else if (_screening.passed.size() > 0) { profTimerStart(tfine, "sim.fetch.fine"); for (i = _screening.passed.begin(); i != _screening.passed.end(); i = _screening.passed.next(i)) _match(env, fingerprints.getStorageIndex_NoMap(_screening, _screening.passed[i])); profTimerStop(tfine); } env.dbgPrintf("done\n"); fingerprints.countOnes_End(env, _screening); } profTimerStop(tsimfetch); }
void _importRDF (OracleEnv &env, const char *table, const char *clob_col, const char *other_cols, const char *file_name) { FileScanner scanner(file_name); int i, nwritten = 0; QS_DEF(Array<char>, word); QS_DEF(StringPool, props); QS_DEF(StringPool, columns); env.dbgPrintfTS("importing into table %s\n", table); _parseFieldList(other_cols, props, columns); RdfLoader loader(scanner); while (!loader.isEOF()) { loader.readNext(); OracleStatement statement(env); OracleLOB lob(env); lob.createTemporaryCLOB(); lob.write(0, loader.data); statement.append("INSERT INTO %s(%s", table, clob_col); for (i = columns.begin(); i != columns.end(); i = columns.next(i)) statement.append(", %s", columns.at(i)); statement.append(") VALUES(:clobdata"); for (i = columns.begin(); i != columns.end(); i = columns.next(i)) { if (loader.properties.contains(props.at(i))) statement.append(", NULL"); else statement.append(",:%s", columns.at(i)); } statement.append(")"); statement.prepare(); statement.bindClobByName(":clobdata", lob); for (i = columns.begin(); i != columns.end(); i = columns.next(i)) { if (loader.properties.contains(props.at(i))) continue; ArrayOutput out(word); out.printf(":%s", columns.at(i)); out.writeChar(0); const char* val = loader.properties.at(props.at(i)); statement.bindStringByName(word.ptr(), val, strlen(val) + 1); } statement.execute(); nwritten++; if (nwritten % 1000 == 0) { env.dbgPrintfTS("imported %d items, commiting\n", nwritten); OracleStatement::executeSingle(env, "COMMIT"); } } if (nwritten % 1000 != 0) { env.dbgPrintfTS("imported %d items, commiting\n", nwritten); OracleStatement::executeSingle(env, "COMMIT"); } }
void _importSMILES (OracleEnv &env, const char *table, const char *smiles_col, const char *id_col, const char *file_name) { FileScanner fscanner(file_name); AutoPtr<GZipScanner> gzscanner; Scanner *scanner; int nwritten = 0; QS_DEF(Array<char>, id); QS_DEF(Array<char>, str); env.dbgPrintfTS("importing into table %s\n", table); // detect if input is gzipped byte magic[2]; int pos = fscanner.tell(); fscanner.readCharsFix(2, (char *)magic); fscanner.seek(pos, SEEK_SET); if (magic[0] == 0x1f && magic[1] == 0x8b) { gzscanner.reset(new GZipScanner(fscanner)); scanner = gzscanner.get(); } else scanner = &fscanner; while (!scanner->isEOF()) { id.clear(); scanner->readLine(str, false); BufferScanner strscan(str); strscan.skipSpace(); while (!strscan.isEOF() && !isspace(strscan.readChar())) ; strscan.skipSpace(); if (strscan.lookNext() == '|') { strscan.readChar(); while (!strscan.isEOF() && strscan.readChar() != '|') ; strscan.skipSpace(); } if (!strscan.isEOF() && id_col != 0) strscan.readLine(id, true); OracleStatement statement(env); statement.append("INSERT INTO %s(%s", table, smiles_col); if (id_col != 0) statement.append(", %s", id_col); statement.append(") VALUES(:smiles"); if (id_col != 0) { if (id.size() > 1) statement.append(", :id"); else statement.append(", NULL"); } statement.append(")"); statement.prepare(); str.push(0); statement.bindStringByName(":smiles", str.ptr(), str.size()); if (id.size() > 1) statement.bindStringByName(":id", id.ptr(), id.size()); statement.execute(); nwritten++; if (nwritten % 1000 == 0) { env.dbgPrintfTS("imported %d items, commiting\n", nwritten); OracleStatement::executeSingle(env, "COMMIT"); } } if (nwritten % 1000 != 0) { env.dbgPrintfTS("imported %d items, commiting\n", nwritten); OracleStatement::executeSingle(env, "COMMIT"); } }