Beispiel #1
0
void RingoFastIndex::fetch (OracleEnv &env, int max_matches)
{
   env.dbgPrintf("requested %d hits\n", max_matches);
   matched.clear();
   
   BingoFingerprints &fingerprints = _context.context().fingerprints;
   
   if (_fetch_type == _SUBSTRUCTURE)
   {
      if (fingerprints.ableToScreen(_screening))
      {
         while (matched.size() < max_matches)
         {
            if (_screening.passed.size() > 0)
            {
               int idx = _screening.passed.begin();
               _match(env, _screening.passed.at(idx));
               _screening.passed.remove(idx);
               continue;
            }

            if (fingerprints.screenPart_Init(env, _screening))
            {
               while (fingerprints.screenPart_Next(env, _screening))
                  ;
               fingerprints.screenPart_End(env, _screening);
               _unmatched += _screening.block->used - _screening.passed.size();
            }
            else
            {
               env.dbgPrintfTS("screening ended\n");
               break;
            }

            _screening.items_passed += _screening.passed.size();
            
            env.dbgPrintfTS("%d reactions passed screening\n", _screening.passed.size());
         } 
      }
      else
      {
         while (matched.size() < max_matches && _cur_idx < _context.context().context().storage.count())
            _match(env, _cur_idx++);
         
         env.dbgPrintfTS("%d reactions matched\n", matched.size());
      }
   }
   else
      throw Error("unexpected fetch type: %d", _fetch_type);
}
Beispiel #2
0
void MangoFastIndex::_fetchSubstructure (OracleEnv &env, int max_matches)
{
   BingoFingerprints &fingerprints = _context.context().fingerprints;

   if (fingerprints.ableToScreen(_screening))
   {
      while (matched.size() < max_matches)
      {
         if (_screening.passed.size() > 0)
         {
            int idx = _screening.passed.begin();
            _match(env, _screening.passed.at(idx));
            _screening.passed.remove(idx);
            continue;
         }

         if (fingerprints.screenPart_Init(env, _screening))
         {
            while (fingerprints.screenPart_Next(env, _screening))
            {
               if (_screening.passed_pre.size() <= _context.context().context().sub_screening_pass_mark ||
                   _screening.query_bit_idx    >= _context.context().context().sub_screening_max_bits)
               {
                  env.dbgPrintfTS("stopping at bit #%d; ", _screening.query_bit_idx);
                  break;
               }
            }
            fingerprints.screenPart_End(env, _screening);
            _unmatched += _screening.block->used - _screening.passed.size();
         }
         else
         {
            env.dbgPrintfTS("screening ended\n");
            break;
         }

         _screening.items_passed += _screening.passed.size();
         env.dbgPrintf("%d molecules passed screening\n", _screening.passed.size());
      }
   }
   else
   {
      while (matched.size() < max_matches && _cur_idx < _context.context().context().storage.count())
         _match(env, _cur_idx++);

      env.dbgPrintfTS("%d molecules matched of tested %d\n", matched.size(), _cur_idx);
   }
}
Beispiel #3
0
void MangoFastIndex::prepareTautomerSubstructure (OracleEnv &env)
{
   env.dbgPrintfTS("preparing fastindex for tautomer substructure search\n");
   _context.context().context().storage.validate(env);
   _context.context().fingerprints.validate(env);
   _context.context().fingerprints.screenInit(_context.tautomer.getQueryFingerprint(), _screening);
   _fetch_type = _TAUTOMER_SUBSTRUCTURE;
   _cur_idx = 0;
   _matched = 0;
   _unmatched = 0;
}
Beispiel #4
0
void MangoFastIndex::prepareSimilarity (OracleEnv &env)
{
   env.dbgPrintfTS("preparing fastindex for similarity search\n");
   _context.context().context().storage.validate(env);
   _context.context().fingerprints.validate(env);
   _context.context().fingerprints.screenInit(_context.similarity.getQueryFingerprint(), _screening);
   _fetch_type = _SIMILARITY;
   _cur_idx = 0;
   _matched = 0;
   _unmatched = 0;
}
Beispiel #5
0
void MangoFastIndex::prepareSubstructure (OracleEnv &env)
{
   env.dbgPrintf("preparing fastindex for substructure search\n");
   
   _context.context().context().storage.validate(env);
   _context.context().fingerprints.validate(env);
   _context.context().fingerprints.screenInit(_context.substructure.getQueryFingerprint(), _screening);

   env.dbgPrintfTS("Have %d bits in query fingerprint\n", _screening.query_ones.size());

   _fetch_type = _SUBSTRUCTURE;
   _cur_idx = 0;
   _matched = 0;
   _unmatched = 0;
}
Beispiel #6
0
void ringoRegisterTable (OracleEnv &env, RingoOracleContext &context,
                         const char *source_table, const char *source_column,
                         const char *target_datatype)
{
   QS_DEF(Array<char>, reaction_buf);
   OracleStatement statement(env);
   AutoPtr<OracleLOB> reaction_lob;
   OraRowidText rowid;
   char varchar2_text[4001];

      // Oracle's BLOB and CLOB types always come uppercase
   bool blob = (strcmp(target_datatype, "BLOB") == 0);
   bool clob = (strcmp(target_datatype, "CLOB") == 0);
   
   int total_count = 0;

   OracleStatement::executeSingleInt(total_count, env,
           "SELECT COUNT(*) FROM %s WHERE %s IS NOT NULL AND LENGTH(%s) > 0",
           source_table, source_column, source_column);

   context.context().longOpInit(env, total_count, "Building reaction index",
      source_table, "reactions");

   statement.append("SELECT %s, RowidToChar(rowid) FROM %s WHERE %s IS NOT NULL AND LENGTH(%s) > 0",
                    source_column, source_table, source_column, source_column);

   statement.prepare();

   if (blob)
   {
      reaction_lob.reset(new OracleLOB(env));
      statement.defineBlobByPos(1, reaction_lob.ref());
   }
   else if (clob)
   {
      reaction_lob.reset(new OracleLOB(env));
      statement.defineClobByPos(1, reaction_lob.ref());
   }
   else
      statement.defineStringByPos(1, varchar2_text, sizeof(varchar2_text));

   statement.defineStringByPos(2, rowid.ptr(), sizeof(rowid));

   BingoFingerprints &fingerprints = context.fingerprints;
   int nthreads = 0;

   fingerprints.validateForUpdate(env);
   context.context().configGetInt(env, "NTHREADS", nthreads);
   
   nthreads = 1;

   //if (nthreads == 1)
   {
      int n = 0;

      QS_DEF(RingoIndex, index);
      index.init(context.context());

      if (statement.executeAllowNoData()) do
      {
         env.dbgPrintf("inserting reaction #%d with rowid %s\n", n, rowid.ptr());

         if (blob || clob)
            reaction_lob->readAll(reaction_buf, false);
         else
            reaction_buf.readString(varchar2_text, false);

         try
         {
            if (_ringoRegisterReaction(env, rowid.ptr(), reaction_buf, context, index, fingerprints))
               n++;
         }
         catch (Exception &ex)
         {
            char buf[4096];
            snprintf(buf, NELEM(buf), "Failed on record with rowid=%s. Error message is '%s'",
               rowid.ptr(), ex.message());

            throw Exception(buf);
         }

         if ((n % 50) == 0)
            context.context().longOpUpdate(env, n);
         
         if ((n % 1000) == 0)
         {
            env.dbgPrintfTS("done %d reactions ; flushing\n", n);
            context.context().storage.flush(env);
            
         }
      } while (statement.fetch());
      
      fingerprints.flush(env);
   }
}
Beispiel #7
0
void BingoStorage::validate (OracleEnv &env)
{
    env.dbgPrintfTS("validating storage... ");

    if (_shmem_state != 0 && strcmp(_shmem_state->getID(), _shmem_id.ptr()) != 0)
    {
        delete _shmem_state;
        _shmem_state = 0;
        _age_loaded = -1;
    }

    _State *state = _getState(true);

    // TODO: implement a semaphore
    while (state->state == _STATE_LOADING)
    {
        delete _shmem_state;
        _shmem_state = 0;
        _age_loaded = -1;

        state = _getState(true);

        if (state == 0)
            throw Error("can't get shared info");

        env.dbgPrintf(".");
    }

    if (state->state == _STATE_READY)
    {
        if (state->age_loaded == state->age)
        {
            if (_age_loaded == state->age)
            {
                env.dbgPrintf("up to date\n");
                return;
            }
            else
                env.dbgPrintf("loaded by the other process\n");
        }
        else
        {
            env.dbgPrintf("has changed, reloading\n");
            state->state = _STATE_LOADING;
        }
    }
    else
    {
        state->state = _STATE_LOADING;
        env.dbgPrintf("loading ... \n");
    }

    _shmem_array.clear();
    _blocks.clear();

    OracleStatement statement(env);

    int id, length;
    OracleLOB lob(env);
    QS_DEF(Array<char>, block_name);

    statement.append("SELECT id, length(bindata), bindata FROM %s ORDER BY id",
                     _table_name.ptr());

    statement.prepare();
    statement.defineIntByPos(1, &id);
    statement.defineIntByPos(2, &length);
    statement.defineBlobByPos(3, lob);
    statement.execute();

    do
    {
        ArrayOutput output(block_name);
        output.printf("%s_%d_%d", _shmem_id.ptr(), id, state->age);
        output.writeByte(0);

        if (length < 1)
        {
            if (id == 0)
            {
                _index.clear();
                break;
            }
            throw Error("cannot validate block #%d: length=%d", id, length);
        }

        _shmem_array.add(new SharedMemory(block_name.ptr(), length, state->state == _STATE_READY));

        void *ptr = _shmem_array.top()->ptr();

        if (ptr == 0)
        {
            if (state->state == _STATE_READY)
            {
                // That's rare case, but possible.
                // Reload the storage.
                env.dbgPrintf("shared memory is gone, resetting... \n");
                state->state = _STATE_EMPTY;
                validate(env);
                return;
            }
            else
                throw Error("can't map block #%d", id);
        }

        if (state->state != _STATE_READY)
            lob.read(0, (char *)ptr, length);

        if (id == 0)
        {
            if ((length % sizeof(_Addr)) != 0)
                throw Error("LOB size %d (expected a multiple of %d)", length, sizeof(_Addr));
            if (length > 0)
                _index.copy((_Addr *)_shmem_array[0]->ptr(), length / sizeof(_Addr));
        }

        _Block &block = _blocks.push();

        block.size = length;
    } while (statement.fetch());

    state->state = _STATE_READY;
    state->age_loaded = state->age;
    _age_loaded = state->age;
}
Beispiel #8
0
void MangoFastIndex::_fetchSimilarity (OracleEnv &env, int max_matches)
{
   BingoFingerprints &fingerprints = _context.context().fingerprints;
   int i;

   if (!fingerprints.ableToScreen(_screening))
   {
      env.dbgPrintfTS("no bits in query fingerprint, can not do similarity search\n");
      return;
   }

   profTimerStart(tsimfetch, "sim.fetch");
   while (matched.size() < max_matches)
   {
      if (!fingerprints.countOnes_Init(env, _screening))
      {
         env.dbgPrintfTS("screening ended\n");
         break;
      }

      BingoStorage &storage = _context.context().context().storage;

      QS_DEF(Array<int>, max_common_ones);
      QS_DEF(Array<int>, min_common_ones);
      QS_DEF(Array<int>, target_ones);
      QS_DEF(Array<char>, stored);

      max_common_ones.clear_resize(_screening.block->used);
      min_common_ones.clear_resize(_screening.block->used);
      target_ones.clear_resize(_screening.block->used);

      for (i = 0; i < _screening.block->used; i++)
      {
         storage.get(fingerprints.getStorageIndex_NoMap(_screening, i), stored);

         BufferScanner scanner(stored);

         scanner.skip(1); // skip the deletion mark
         scanner.skip(scanner.readByte()); // skip the compessed rowid
         target_ones[i] = scanner.readBinaryWord();
         max_common_ones[i] = _context.similarity.getUpperBound(target_ones[i]);
         min_common_ones[i] = _context.similarity.getLowerBound(target_ones[i]);
      }

      bool first = true;
      bool entire = false;

      _screening.passed.clear();

      while (true)
      {
         if (!fingerprints.countOnes_Next(env, _screening))
         {
            env.dbgPrintf("read all %d bits, writing %d results... ",
               _screening.query_ones.size(), _screening.passed.size());

            entire = true;
            break;
         }

         if (first)
         {
            first = false;
            for (i = 0; i < _screening.block->used; i++)
            {
               int min_possible_ones = _screening.one_counters[i];
               int max_possible_ones = _screening.one_counters[i] +
                            _screening.query_ones.size() - _screening.query_bit_idx;

               if (min_possible_ones <= max_common_ones[i] &&
                   max_possible_ones >= min_common_ones[i])
                  _screening.passed.add(i);
            }
         }
         else
         {
            int j;

            for (j = _screening.passed.begin(); j != _screening.passed.end(); )
            {
               i = _screening.passed[j];

               int min_possible_ones = _screening.one_counters[i];
               int max_possible_ones = _screening.one_counters[i] +
                            _screening.query_ones.size() - _screening.query_bit_idx;

               int next_j = _screening.passed.next(j);

               if (min_possible_ones > max_common_ones[i] ||
                   max_possible_ones < min_common_ones[i])
                  _screening.passed.remove(j);

               j = next_j;
            }
         }

         if (_screening.passed.size() <= _context.context().context().sim_screening_pass_mark)
         { 
            env.dbgPrintfTS("stopping reading fingerprints on bit %d/%d; have %d molecules to check...  ",
               _screening.query_bit_idx, _screening.query_ones.size(), _screening.passed.size());
            _unmatched += _screening.block->used - _screening.passed.size();
            break;
         }
      }

      if (entire)
      {
         for (i = 0; i < _screening.block->used; i++)
         {
            if (_context.similarity.match(target_ones[i], _screening.one_counters[i]))
            {
               OraRowidText &rid = matched.at(matched.add());

               storage.get(fingerprints.getStorageIndex_NoMap(_screening, i), stored);
               _decompressRowid(stored, rid);
              _matched++;
            }
            else
               _unmatched++;
         }
      }
      else if (_screening.passed.size() > 0)
      {
         profTimerStart(tfine, "sim.fetch.fine");
         for (i = _screening.passed.begin(); i != _screening.passed.end(); i = _screening.passed.next(i))
            _match(env, fingerprints.getStorageIndex_NoMap(_screening, _screening.passed[i]));
         profTimerStop(tfine);
      }
      env.dbgPrintf("done\n");

      fingerprints.countOnes_End(env, _screening);
   }
   profTimerStop(tsimfetch);
}
Beispiel #9
0
void _importRDF (OracleEnv &env, const char *table, const char *clob_col,
                 const char *other_cols, const char *file_name)
{
   FileScanner scanner(file_name);
   int i, nwritten = 0;
   QS_DEF(Array<char>, word);
   QS_DEF(StringPool, props);
   QS_DEF(StringPool, columns);

   env.dbgPrintfTS("importing into table %s\n", table);

   _parseFieldList(other_cols, props, columns);

   RdfLoader loader(scanner);

   while (!loader.isEOF())
   {
      loader.readNext();

      OracleStatement statement(env);
      OracleLOB lob(env);

      lob.createTemporaryCLOB();
      lob.write(0, loader.data);

      statement.append("INSERT INTO %s(%s", table, clob_col);

      for (i = columns.begin(); i != columns.end(); i = columns.next(i))
         statement.append(", %s", columns.at(i));

      statement.append(") VALUES(:clobdata");

      for (i = columns.begin(); i != columns.end(); i = columns.next(i))
      {
         if (loader.properties.contains(props.at(i)))
            statement.append(", NULL");
         else
            statement.append(",:%s", columns.at(i));
      }

      statement.append(")");
      statement.prepare();

      statement.bindClobByName(":clobdata", lob);

      for (i = columns.begin(); i != columns.end(); i = columns.next(i))
      {
         if (loader.properties.contains(props.at(i)))
            continue;

         ArrayOutput out(word);

         out.printf(":%s", columns.at(i));
         out.writeChar(0);

         const char* val = loader.properties.at(props.at(i));

         statement.bindStringByName(word.ptr(), val, strlen(val) + 1);
      }

      statement.execute();
      nwritten++;
      if (nwritten % 1000 == 0)
      {
         env.dbgPrintfTS("imported %d items, commiting\n", nwritten);
         OracleStatement::executeSingle(env, "COMMIT");
      }
   }
   if (nwritten % 1000 != 0)
   {
      env.dbgPrintfTS("imported %d items, commiting\n", nwritten);
      OracleStatement::executeSingle(env, "COMMIT");
   }
}
Beispiel #10
0
void _importSMILES (OracleEnv &env, const char *table, const char *smiles_col,
                    const char *id_col, const char *file_name)
{
   FileScanner fscanner(file_name);
   AutoPtr<GZipScanner> gzscanner;
   Scanner *scanner;

   int nwritten = 0;
   QS_DEF(Array<char>, id);
   QS_DEF(Array<char>, str);

   env.dbgPrintfTS("importing into table %s\n", table);

   // detect if input is gzipped
   byte magic[2];
   int pos = fscanner.tell();

   fscanner.readCharsFix(2, (char *)magic);
   fscanner.seek(pos, SEEK_SET);

   if (magic[0] == 0x1f && magic[1] == 0x8b)
   {
      gzscanner.reset(new GZipScanner(fscanner));
      scanner = gzscanner.get();
   }
   else
      scanner = &fscanner;

   while (!scanner->isEOF())
   {
      id.clear();
      scanner->readLine(str, false);
      BufferScanner strscan(str);

      strscan.skipSpace();
      while (!strscan.isEOF() && !isspace(strscan.readChar()))
         ;
      strscan.skipSpace();
      if (strscan.lookNext() == '|')
      {
         strscan.readChar();
         while (!strscan.isEOF() && strscan.readChar() != '|')
            ;
         strscan.skipSpace();
      }

      if (!strscan.isEOF() && id_col != 0)
         strscan.readLine(id, true);

      OracleStatement statement(env);

      statement.append("INSERT INTO %s(%s", table, smiles_col);

      if (id_col != 0)
         statement.append(", %s", id_col);

      statement.append(") VALUES(:smiles");

      if (id_col != 0)
      {
         if (id.size() > 1)
            statement.append(", :id");
         else
            statement.append(", NULL");
      }
      statement.append(")");
      statement.prepare();

      str.push(0);
      statement.bindStringByName(":smiles", str.ptr(), str.size());
      if (id.size() > 1)
         statement.bindStringByName(":id", id.ptr(), id.size());

      statement.execute();
      nwritten++;
      if (nwritten % 1000 == 0)
      {
         env.dbgPrintfTS("imported %d items, commiting\n", nwritten);
         OracleStatement::executeSingle(env, "COMMIT");
      }
   }
   if (nwritten % 1000 != 0)
   {
      env.dbgPrintfTS("imported %d items, commiting\n", nwritten);
      OracleStatement::executeSingle(env, "COMMIT");
   }
}