Exemplo n.º 1
0
/**
 * GetColumnSamples - Query column samples by db_id, table_id and column_id.
 */
void TupleSamplesStorage::GetColumnSamples(
    oid_t database_id, oid_t table_id, oid_t column_id,
    std::vector<type::Value> &column_samples) {
  auto catalog = catalog::Catalog::GetInstance();
  std::string samples_table_name =
      GenerateSamplesTableName(database_id, table_id);
  auto &txn_manager = concurrency::TransactionManagerFactory::GetInstance();
  auto txn = txn_manager.BeginTransaction();
  auto data_table = catalog->GetTableWithName(std::string(SAMPLES_DB_NAME),
                                              std::string(DEFAULT_SCHEMA_NAME),
                                              samples_table_name, txn);

  std::vector<oid_t> column_ids({column_id});
  auto result_tiles = GetTuplesWithSeqScan(data_table, column_ids, txn);
  txn_manager.CommitTransaction(txn);

  LOG_DEBUG("Result tiles count: %lu", result_tiles->size());
  if (result_tiles->size() != 0) {
    auto tile = (*result_tiles)[0].get();
    LOG_DEBUG("Tuple count: %lu", tile->GetTupleCount());

    for (size_t tuple_id = 0; tuple_id < tile->GetTupleCount(); ++tuple_id) {
      column_samples.push_back(tile->GetValue(tuple_id, 0));
    }
  }
}
Exemplo n.º 2
0
/**
 * @brief Create a physical tile
 * @param
 * @return Physical tile
 */
std::unique_ptr<storage::Tile> LogicalTile::Materialize() {
  // Create new schema according underlying physical tile
  std::unique_ptr<catalog::Schema> source_tile_schema(GetPhysicalSchema());

  // Get the number of tuples within this logical tiles
  const int num_tuples = GetTupleCount();

  //const catalog::Schema *output_schema;
  std::unordered_map<oid_t, oid_t> old_to_new_cols;
  oid_t column_count = source_tile_schema->GetColumnCount();
  for (oid_t col = 0; col < column_count; col++) {
    old_to_new_cols[col] = col;
  }

  // Generate mappings.
  std::unordered_map<storage::Tile *, std::vector<oid_t>> tile_to_cols;
  GenerateTileToColMap(old_to_new_cols, tile_to_cols);

  // Create new physical tile.
  std::unique_ptr<storage::Tile> dest_tile(
      storage::TileFactory::GetTempTile(*source_tile_schema, num_tuples));

  // Proceed to materialize logical tile by physical tile at a time.
  MaterializeByTiles(old_to_new_cols, tile_to_cols,
                     dest_tile.get());

  // Wrap physical tile in logical tile.
  return std::move(dest_tile);
}
TEST_F(LogicalTileTests, TempTableTest) {
  const int tuple_count = TESTS_TUPLES_PER_TILEGROUP;
  auto pool = TestingHarness::GetInstance().GetTestingPool();

  catalog::Schema *schema = new catalog::Schema(
      {ExecutorTestsUtil::GetColumnInfo(0), ExecutorTestsUtil::GetColumnInfo(1),
       ExecutorTestsUtil::GetColumnInfo(2)});

  // Create our TempTable
  storage::TempTable table(INVALID_OID, schema, true);
  EXPECT_EQ(0, table.GetTupleCount());

  // Then shove some tuples in it
  for (int i = 0; i < tuple_count; i++) {
    storage::Tuple *tuple = new storage::Tuple(table.GetSchema(), true);
    auto val1 = type::ValueFactory::GetIntegerValue(
        ExecutorTestsUtil::PopulatedValue(i, 0));
    auto val2 = type::ValueFactory::GetIntegerValue(
        ExecutorTestsUtil::PopulatedValue(i, 1));
    auto val3 = type::ValueFactory::GetDoubleValue(
        ExecutorTestsUtil::PopulatedValue(i, 2));
    tuple->SetValue(0, val1, pool);
    tuple->SetValue(1, val2, pool);
    tuple->SetValue(2, val3, pool);
    table.InsertTuple(tuple);

    delete tuple;
  }
  LOG_INFO("%s", table.GetInfo().c_str());
  LOG_INFO("%s", GETINFO_SINGLE_LINE.c_str());

  // Check to see whether we can wrap a LogicalTile around it
  auto tile_group_count = table.GetTileGroupCount();
  std::vector<executor::LogicalTile *> logicalTiles;
  for (oid_t tile_group_itr = 0; tile_group_itr < tile_group_count;
       tile_group_itr++) {
    auto tile_group = table.GetTileGroup(tile_group_itr);
    EXPECT_NE(nullptr, tile_group);
    auto logical_tile = executor::LogicalTileFactory::WrapTileGroup(tile_group);
    EXPECT_NE(nullptr, logical_tile);
    logicalTiles.push_back(logical_tile);

    // Make sure that we can iterate over the LogicalTile and get
    // at our TempTable tuples
    EXPECT_NE(0, logical_tile->GetTupleCount());

    LOG_INFO("GetActiveTupleCount() = %d",
             (int)tile_group->GetActiveTupleCount());
    LOG_INFO("%s", tile_group->GetInfo().c_str());
    LOG_INFO("*****************************************");
    LOG_INFO("%s", logical_tile->GetInfo().c_str());
  }
  EXPECT_FALSE(logicalTiles.empty());

  for (executor::LogicalTile *lt : logicalTiles) {
    delete lt;
  }
}
int PMatch(TSequence &seq, DNALength startPos, DNALength length, TupleMetrics &tm,
           TupleCountTable<TSequence, T_Tuple> &ct, float &pMatch)
{
    int tupleCount;
    T_Tuple tuple, curTuple;
    //
    // Compute the probability of a match of length 'length'
    // in the genome using a k-th order	Markov model of the genome.
    // Other than that there is no spatial constraint on a match.
    // This means that if the length of seq is k, and that sequence
    // of length k exists in the genome, then the probability of a
    // match is 1.
    pMatch = 1;
    if (GetTupleCount(seq, startPos, tm, ct, tupleCount)) {
        if (tupleCount == 0) return 0;
        //
        // Compute the frequency of the following tuple, and compare this
        // to the frequencies of all 4 possible tuples that are next.
        //
        curTuple.FromStringLR(&seq.seq[startPos], tm);
        if (length < static_cast<DNALength>(tm.tupleSize)) {
            // the match is shorter than the tuples used to model the
            // genome sequence composition.  Don't try and compute a p-value
            // for it -- assume that you will always find a match of this
            // length.
            //
            pMatch = 0;
            return 1;
        }
        for (size_t i = 1; i < length - tm.tupleSize; i++) {
            //
            // now add on the log counts for the transitions.
            //
            if (tuple.FromStringLR(&seq.seq[i + startPos], tm) == 0) {
                return 0;
            }
            int nextTupleCount = 0;
            int rightMarCount = SumRightShiftMarginalTupleCounts(
                tm, ct, tuple, TwoBit[seq.GetNuc(startPos + i + tm.tupleSize - 1)], nextTupleCount);
            //
            // tuple counts are not defined for N's.
            //
            if (TwoBit[seq.GetNuc(startPos + i + tm.tupleSize)] > 3) {
                return 0;
            }

            if (nextTupleCount == 0) {
                //
                // There is no background distribution available for this
                // sequence context, therefore no way to evaluate p-value.
                //
                return 0;
            }
            pMatch += log((nextTupleCount / (1.0 * rightMarCount)));
            curTuple.tuple = tuple.tuple;
        }
        //
        // Done computing the probability of an extension.  Now compute the probability
        // of the match.  There are nMatches of the initial seed.  We assume that each has
        // an equal probability of matching.
        //
        return 1;
    } else {
        return 0;
    }
}