/**
 * Set up a multi-column temp output table for those executors that require one.
 * Called from p_init.
 */
void AbstractExecutor::setTempOutputTable(const ExecutorVector& executorVector,
                                          const string tempTableName) {
    TupleSchema* schema = m_abstractNode->generateTupleSchema();
    int column_count = schema->columnCount();
    std::vector<std::string> column_names(column_count);
    assert(column_count >= 1);
    const std::vector<SchemaColumn*>& outputSchema = m_abstractNode->getOutputSchema();

    for (int ctr = 0; ctr < column_count; ctr++) {
        column_names[ctr] = outputSchema[ctr]->getColumnName();
    }

    if (executorVector.isLargeQuery()) {
        m_tmpOutputTable = TableFactory::buildLargeTempTable(tempTableName,
                                                             schema,
                                                             column_names);
    }
    else {
        m_tmpOutputTable = TableFactory::buildTempTable(tempTableName,
                                                        schema,
                                                        column_names,
                                                        executorVector.limits());
    }

    m_abstractNode->setOutputTable(m_tmpOutputTable);
}
Exemple #2
0
TupleSchema* TupleSchema::createTupleSchema(const std::vector<ValueType> columnTypes,
                                            const std::vector<int32_t> columnSizes,
                                            const std::vector<bool> allowNull,
                                            bool allowInlinedObjects)
{
    const uint16_t uninlineableObjectColumnCount =
      TupleSchema::countUninlineableObjectColumns(columnTypes, columnSizes, allowInlinedObjects);
    const uint16_t columnCount = static_cast<uint16_t>(columnTypes.size());
    // big enough for any data members plus big enough for tupleCount + 1 "ColumnInfo"
    //  fields. We need CI+1 because we get the length of a column by offset subtraction
    // Also allocate space for an int16_t for each uninlineable object column so that
    // the indices of uninlineable columns can be stored at the front and aid in iteration
    int memSize = (int)(sizeof(TupleSchema) +
                        (sizeof(ColumnInfo) * (columnCount + 1)) +
                        (uninlineableObjectColumnCount * sizeof(int16_t)));

    // allocate the set amount of memory and cast it to a tuple pointer
    TupleSchema *retval = reinterpret_cast<TupleSchema*>(new char[memSize]);

    // clear all the offset values
    memset(retval, 0, memSize);
    retval->m_allowInlinedObjects = allowInlinedObjects;
    retval->m_columnCount = columnCount;
    retval->m_uninlinedObjectColumnCount = uninlineableObjectColumnCount;

    uint16_t uninlinedObjectColumnIndex = 0;
    for (uint16_t ii = 0; ii < columnCount; ii++) {
        const ValueType type = columnTypes[ii];
        const uint32_t length = columnSizes[ii];
        const bool columnAllowNull = allowNull[ii];
        retval->setColumnMetaData(ii, type, length, columnAllowNull, uninlinedObjectColumnIndex);
    }

    return retval;
}
Exemple #3
0
TupleSchema* TupleSchema::createTupleSchema(const std::vector<ValueType>& columnTypes,
                                            const std::vector<int32_t>&   columnSizes,
                                            const std::vector<bool>&      allowNull,
                                            const std::vector<bool>&      columnInBytes,
                                            const std::vector<ValueType>& hiddenColumnTypes,
                                            const std::vector<int32_t>&   hiddenColumnSizes,
                                            const std::vector<bool>&      hiddenAllowNull,
                                            const std::vector<bool>&      hiddenColumnInBytes)
{
    const uint16_t uninlineableObjectColumnCount =
      TupleSchema::countUninlineableObjectColumns(columnTypes, columnSizes, columnInBytes);
    const uint16_t columnCount = static_cast<uint16_t>(columnTypes.size());
    const uint16_t hiddenColumnCount = static_cast<uint16_t>(hiddenColumnTypes.size());
    int memSize = memSizeForTupleSchema(columnCount,
                                        uninlineableObjectColumnCount,
                                        hiddenColumnCount);

    // allocate the set amount of memory and cast it to a tuple pointer
    TupleSchema *retval = reinterpret_cast<TupleSchema*>(new char[memSize]);

    // clear all the offset values
    memset(retval, 0, memSize);
    retval->m_columnCount = columnCount;
    retval->m_uninlinedObjectColumnCount = uninlineableObjectColumnCount;
    retval->m_hiddenColumnCount = hiddenColumnCount;
    retval->m_isHeaderless = false;

    uint16_t uninlinedObjectColumnIndex = 0;
    for (uint16_t ii = 0; ii < columnCount; ii++) {
        const ValueType type = columnTypes[ii];
        const uint32_t length = columnSizes[ii];
        const bool columnAllowNull = allowNull[ii];
        const bool inBytes = columnInBytes[ii];
        retval->setColumnMetaData(ii, type, length, columnAllowNull, uninlinedObjectColumnIndex, inBytes);
    }

    for (uint16_t ii = 0; ii < hiddenColumnCount; ++ii) {
        const ValueType type = hiddenColumnTypes[ii];
        const uint32_t length = hiddenColumnSizes[ii];
        const bool columnAllowNull = hiddenAllowNull[ii];
        const bool inBytes = hiddenColumnInBytes[ii];

        // We can't allow uninlineable data in hidden columns yet
        if (! isInlineable(type, length, inBytes)) {
            throwFatalLogicErrorStreamed("Attempt to create uninlineable hidden column");
        }

        retval->setColumnMetaData(static_cast<uint16_t>(columnCount + ii),
                                  type,
                                  length,
                                  columnAllowNull,
                                  uninlinedObjectColumnIndex,
                                  inBytes);
    }

    return retval;
}
TEST_F(TupleSchemaTest, CreateEvictedTupleSchema) {
    initTable(true);
    
    // Create the TupleSchema for our evicted tuple tables
    // The first columns should be all of the columns of our primary key index
    TupleSchema *evictedSchema = TupleSchema::createEvictedTupleSchema();
    // fprintf(stdout, "\nEVICTED TABLE SCHEMA\n%s\n", evictedSchema->debug().c_str());
    ASSERT_EQ(2, evictedSchema->columnCount());
    ASSERT_EQ(VALUE_TYPE_SMALLINT, evictedSchema->columnType(0));
    ASSERT_EQ(VALUE_TYPE_INTEGER, evictedSchema->columnType(1));
    
    TupleSchema::freeTupleSchema(evictedSchema);
}
Exemple #5
0
TupleSchema*
TupleSchema::createTupleSchema(const TupleSchema *first,
                               const std::vector<uint16_t> firstSet,
                               const TupleSchema *second,
                               const std::vector<uint16_t> secondSet) {
    assert(first);

    const std::vector<uint16_t>::size_type offset = firstSet.size();
    const std::vector<uint16_t>::size_type combinedColumnCount = firstSet.size()
        + secondSet.size();
    std::vector<ValueType> columnTypes;
    std::vector<int32_t> columnLengths;
    std::vector<bool> columnAllowNull(combinedColumnCount, true);
    std::vector<bool> columnInBytes(combinedColumnCount, false);
    std::vector<uint16_t>::const_iterator iter;
    for (iter = firstSet.begin(); iter != firstSet.end(); iter++) {
        const TupleSchema::ColumnInfo *columnInfo = first->getColumnInfo(*iter);
        columnTypes.push_back(columnInfo->getVoltType());
        columnLengths.push_back(columnInfo->length);
        columnAllowNull[*iter] = columnInfo->allowNull;
        columnInBytes[*iter] = columnInfo->inBytes;
    }
    for (iter = secondSet.begin(); second && iter != secondSet.end(); iter++) {
        const TupleSchema::ColumnInfo *columnInfo = second->getColumnInfo(*iter);
        columnTypes.push_back(columnInfo->getVoltType());
        columnLengths.push_back(columnInfo->length);
        columnAllowNull[offset + *iter] = columnInfo->allowNull;
        columnInBytes[offset + *iter] = columnInfo->inBytes;
    }

    TupleSchema *schema = TupleSchema::createTupleSchema(columnTypes,
                                                         columnLengths,
                                                         columnAllowNull,
                                                         columnInBytes);

    // Remember to set the inlineability of each column correctly.
    for (iter = firstSet.begin(); iter != firstSet.end(); iter++) {
        ColumnInfo *info = schema->getColumnInfo(*iter);
        info->inlined = first->getColumnInfo(*iter)->inlined;
    }
    for (iter = secondSet.begin(); second && iter != secondSet.end(); iter++) {
        ColumnInfo *info = schema->getColumnInfo((int)offset + *iter);
        info->inlined = second->getColumnInfo(*iter)->inlined;
    }

    return schema;
}
TableIndex *TableIndexFactory::getInstance(const TableIndexScheme &scheme) {
    int colCount = (int)scheme.columnIndices.size();
    TupleSchema *tupleSchema = scheme.tupleSchema;
    assert(tupleSchema);
    std::vector<ValueType> keyColumnTypes;
    std::vector<int32_t> keyColumnLengths;
    std::vector<bool> keyColumnAllowNull(colCount, true);
    for (int i = 0; i < colCount; ++i) {
        keyColumnTypes.push_back(tupleSchema->columnType(scheme.columnIndices[i]));
        keyColumnLengths.push_back(tupleSchema->columnLength(scheme.columnIndices[i]));
    }
    TupleSchema *keySchema = TupleSchema::createTupleSchema(keyColumnTypes, keyColumnLengths, keyColumnAllowNull, true);
    assert(keySchema);
    VOLT_TRACE("Creating index for %s.\n%s", scheme.name.c_str(), keySchema->debug().c_str());
    TableIndexPicker picker(keySchema, scheme);
    TableIndex *retval = picker.getInstance();
    return retval;
}
/**
 * Set up a multi-column temp output table for those executors that require one.
 * Called from p_init.
 */
void AbstractExecutor::setTempOutputTable(TempTableLimits* limits, const string tempTableName) {
    assert(limits);
    TupleSchema* schema = m_abstractNode->generateTupleSchema();
    int column_count = schema->columnCount();
    std::vector<std::string> column_names(column_count);
    assert(column_count >= 1);
    const std::vector<SchemaColumn*>& outputSchema = m_abstractNode->getOutputSchema();

    for (int ctr = 0; ctr < column_count; ctr++) {
        column_names[ctr] = outputSchema[ctr]->getColumnName();
    }

    m_tmpOutputTable = TableFactory::getTempTable(m_abstractNode->databaseId(),
                                                              tempTableName,
                                                              schema,
                                                              column_names,
                                                              limits);
    m_abstractNode->setOutputTable(m_tmpOutputTable);
}
// helper to make a schema, a tuple and calculate EL size
size_t
TableTupleExportTest::maxElSize(std::vector<uint16_t> &keep_offsets,
                             bool useNullStrings)
{
    TableTuple *tt;
    TupleSchema *ts;
    char buf[1024]; // tuple data

    ts = TupleSchema::createTupleSchema(m_schema, keep_offsets);
    tt = new TableTuple(buf, ts);

    // if the tuple includes strings, add some content
    // assuming all Export tuples were allocated for persistent
    // storage and choosing set* api accordingly here.
    if (ts->columnCount() > 6) {
        NValue nv = ValueFactory::getStringValue("ABCDEabcde"); // 10 char
        if (useNullStrings)
        {
            nv.free(); nv.setNull();
        }
        tt->setNValueAllocateForObjectCopies(6, nv, NULL);
        nv.free();
    }
    if (ts->columnCount() > 7) {
        NValue nv = ValueFactory::getStringValue("abcdeabcdeabcdeabcde"); // 20 char
        if (useNullStrings)
        {
            nv.free(); nv.setNull();
        }
        tt->setNValueAllocateForObjectCopies(7, nv, NULL);
        nv.free();
    }

    // The function under test!
    size_t sz = tt->maxExportSerializationSize();

    // and cleanup
    tt->freeObjectColumns();
    delete tt;
    TupleSchema::freeTupleSchema(ts);

    return sz;
}
    // Create a table with the schema described above, where the
    // caller may have specified a number of extra columns.  Also add
    // two indexes: one integer primary key and one geospatial.
    static unique_ptr<PersistentTable> createTable(int numExtraCols = 0) {
        TupleSchema* schema = createTupleSchemaWithExtraCols(numExtraCols);
        char signature[20];
        CatalogId databaseId = 1000;
        std::vector<std::string> columnNames;
        for (int i = 0; i < schema->columnCount(); ++i) {
            std::ostringstream oss;
            oss << "col_" << i;
            columnNames.push_back(oss.str());
        }
        auto table = unique_ptr<PersistentTable>(
                         static_cast<PersistentTable*>(TableFactory::getPersistentTable(databaseId,
                                                                                        "test_table",
                                                                                        schema,
                                                                                        columnNames,
                                                                                        signature)));
        table->addIndex(createGeospatialIndex(table->schema()));

        TableIndex* pkIndex = createPrimaryKeyIndex(table->schema());
        table->addIndex(pkIndex);
        table->setPrimaryKeyIndex(pkIndex);

        return table;
    }
Exemple #10
0
TEST_F(TupleSchemaTest, CreateEvictedTupleSchema) {
    initTable(true);
    
    // Create the TupleSchema for our evicted tuple tables
    // The first columns should be all of the columns of our primary key index
    TupleSchema *evictedSchema = TupleSchema::createEvictedTupleSchema(m_primaryKeyIndexSchema);
    // fprintf(stdout, "\nEVICTED TABLE SCHEMA\n%s\n", evictedSchema->debug().c_str());
    ASSERT_EQ(m_numPrimaryKeyCols+1, evictedSchema->columnCount());
    for (int i = 0; i < m_numPrimaryKeyCols; i++) {
        ASSERT_EQ(m_primaryKeyIndexSchema->columnType(i), evictedSchema->columnType(i));
        ASSERT_EQ(m_primaryKeyIndexSchema->columnLength(i), evictedSchema->columnLength(i));
        ASSERT_EQ(m_primaryKeyIndexSchema->columnAllowNull(i), evictedSchema->columnAllowNull(i));
    }
    
    // Then there should only be one more column that contains the 16-bit block ids
    ASSERT_EQ(VALUE_TYPE_SMALLINT, evictedSchema->columnType(m_numPrimaryKeyCols));
    ASSERT_FALSE(evictedSchema->columnAllowNull(m_numPrimaryKeyCols));
    
    TupleSchema::freeTupleSchema(evictedSchema);
}
Exemple #11
0
TableIndex *TableIndexFactory::getInstance(const TableIndexScheme &scheme) {
    const TupleSchema *tupleSchema = scheme.tupleSchema;
    assert(tupleSchema);
    bool isIntsOnly = true;
    bool isInlinesOrColumnsOnly = true;
    std::vector<ValueType> keyColumnTypes;
    std::vector<int32_t> keyColumnLengths;
    size_t valueCount = 0;
    size_t exprCount = scheme.indexedExpressions.size();
    if (exprCount != 0) {
        valueCount = exprCount;
        // TODO: This is where we could gain some extra runtime and space efficiency by
        // somehow marking which indexed expressions happen to be non-inlined column expressions.
        // This case is significant because it presents an opportunity for the GenericPersistentKey
        // index keys to avoid a persistent allocation and copy of an already persistent value.
        // This could be implemented as a bool attribute of TupleSchema::ColumnInfo that is only
        // set to true in this special case. It would universally disable deep copying of that
        // particular "tuple column"'s referenced object.
        for (size_t ii = 0; ii < valueCount; ++ii) {
            ValueType exprType = scheme.indexedExpressions[ii]->getValueType();
            if ( ! isIntegralType(exprType)) {
                isIntsOnly = false;
            }
            uint32_t declaredLength;
            if (exprType == VALUE_TYPE_VARCHAR || exprType == VALUE_TYPE_VARBINARY) {
                // Setting the column length to TUPLE_SCHEMA_COLUMN_MAX_VALUE_LENGTH constrains the
                // maximum length of expression values that can be indexed with the same limit
                // that gets applied to column values.
                // In theory, indexed expression values could have an independent limit
                // up to any length that can be allocated via ThreadLocalPool.
                // Currently, all of these cases are constrained with the same limit,
                // which is also the default/maximum size for variable columns defined in schema,
                // as controlled in java by VoltType.MAX_VALUE_LENGTH.
                // It's not clear whether scheme.indexedExpressions[ii]->getValueSize()
                // can or should be called for a more useful answer.
                // There's probably little to gain since expressions usually do not contain enough information
                // to reliably determine that the result value is always small enough to "inline".
                declaredLength = TupleSchema::COLUMN_MAX_VALUE_LENGTH;
                isInlinesOrColumnsOnly = false;
            } else {
                declaredLength = NValue::getTupleStorageSize(exprType);
            }
            keyColumnTypes.push_back(exprType);
            keyColumnLengths.push_back(declaredLength);
        }
    } else {
        valueCount = scheme.columnIndices.size();
        for (size_t ii = 0; ii < valueCount; ++ii) {
            ValueType exprType = tupleSchema->columnType(scheme.columnIndices[ii]);
            if ( ! isIntegralType(exprType)) {
                isIntsOnly = false;
            }
            keyColumnTypes.push_back(exprType);
            keyColumnLengths.push_back(tupleSchema->columnLength(scheme.columnIndices[ii]));
        }
    }
    std::vector<bool> keyColumnAllowNull(valueCount, true);
    TupleSchema *keySchema = TupleSchema::createTupleSchema(keyColumnTypes, keyColumnLengths, keyColumnAllowNull, true);
    assert(keySchema);
    VOLT_TRACE("Creating index for '%s' with key schema '%s'", scheme.name.c_str(), keySchema->debug().c_str());
    TableIndexPicker picker(keySchema, isIntsOnly, isInlinesOrColumnsOnly, scheme);
    TableIndex *retval = picker.getInstance();
    return retval;
}
// helper to make a schema, a tuple and serialize to a buffer
size_t
TableTupleExportTest::serElSize(std::vector<uint16_t> &keep_offsets,
                             uint8_t *nullArray, char *dataPtr, bool nulls)
{
    TableTuple *tt;
    TupleSchema *ts;
    char buf[1024]; // tuple data

    ts = TupleSchema::createTupleSchema(m_schema, keep_offsets);
    tt = new TableTuple(buf, ts);

    // assuming all Export tuples were allocated for persistent
    // storage and choosing set* api accordingly here.

    switch (ts->columnCount()) {
        // note my sophisticated and clever use of fall through
      case 8:
      {
          NValue nv = ValueFactory::getStringValue("abcdeabcdeabcdeabcde"); // 20 char
          if (nulls) { nv.free(); nv.setNull(); }
          tt->setNValueAllocateForObjectCopies(7, nv, NULL);
          nv.free();
      }
      case 7:
      {
          NValue nv = ValueFactory::getStringValue("ABCDEabcde"); // 10 char
          if (nulls) { nv.free(); nv.setNull(); }
          tt->setNValueAllocateForObjectCopies(6, nv, NULL);
          nv.free();
      }
      case 6:
      {
          NValue nv = ValueFactory::getDecimalValueFromString("-12.34");
          if (nulls) { nv.free(); nv.setNull(); }
          tt->setNValueAllocateForObjectCopies(5, nv, NULL);
          nv.free();
      }
      case 5:
      {
          NValue nv = ValueFactory::getTimestampValue(9999);
          if (nulls) nv.setNull();
          tt->setNValueAllocateForObjectCopies(4, nv, NULL);
          nv.free();
      }
      case 4:
      {
          NValue nv = ValueFactory::getBigIntValue(1024);
          if (nulls) nv.setNull();
          tt->setNValueAllocateForObjectCopies(3, nv, NULL);
          nv.free();
      }
      case 3:
      {
          NValue nv = ValueFactory::getIntegerValue(512);
          if (nulls) nv.setNull();
          tt->setNValueAllocateForObjectCopies(2, nv, NULL);
          nv.free();
      }
      case 2:
      {
          NValue nv = ValueFactory::getSmallIntValue(256);
          if (nulls) nv.setNull();
          tt->setNValueAllocateForObjectCopies(1, nv, NULL);
          nv.free();
      }
      case 1:
      {
          NValue nv = ValueFactory::getTinyIntValue(120);
          if (nulls) nv.setNull();
          tt->setNValueAllocateForObjectCopies(0, nv, NULL);
          nv.free();
      }
      break;

      default:
        // this is an error in the test fixture.
        EXPECT_EQ(0,1);
        break;
    }

    // The function under test!
    ExportSerializeOutput io(dataPtr, 2048);
    tt->serializeToExport(io, 0, nullArray);

    // and cleanup
    tt->freeObjectColumns();
    delete tt;
    TupleSchema::freeTupleSchema(ts);
    return io.position();
}
/*
 * Show that the hash range expression correctly selects (or doesn't) rows in ranges
 */
TEST_F(ExpressionTest, HashRange) {
    queue<AE*> e;

    const int32_t range1Max = -(numeric_limits<int32_t>::max() / 2);
    const int32_t range1Min = numeric_limits<int32_t>::min() - (range1Max / 2);
    const int32_t range2Min = 0;
    const int32_t range2Max = numeric_limits<int32_t>::max() / 2;
    const int32_t range3Min = range2Max + (range2Max / 2);
    const int32_t range3Max = numeric_limits<int32_t>::max();

    int32_t ranges[][2] = {
            { range1Min, range1Max},
            { range2Min, range2Max},
            { range3Min, range3Max}
    };

    auto_ptr<AE> ae(new HR(1, ranges, 3));
    Json::Value json = ae->serializeValue();
    Json::FastWriter writer;
    std::string jsonText = writer.write(json);
    PlannerDomRoot domRoot(jsonText.c_str());
    auto_ptr<AbstractExpression> e1(AbstractExpression::buildExpressionTree(domRoot.rootObject()));

    vector<std::string> columnNames;
    columnNames.push_back("foo");
    columnNames.push_back("bar");

    vector<int32_t> columnSizes;
    columnSizes.push_back(8);
    columnSizes.push_back(4);

    vector<bool> allowNull;
    allowNull.push_back(true);
    allowNull.push_back(false);

    vector<voltdb::ValueType> types;
    types.push_back(voltdb::VALUE_TYPE_BIGINT);
    types.push_back(voltdb::VALUE_TYPE_INTEGER);

    TupleSchema *schema = TupleSchema::createTupleSchemaForTest(types,columnSizes,allowNull);

    boost::scoped_array<char> tupleStorage(new char[schema->tupleLength() + TUPLE_HEADER_SIZE]);

    TableTuple t(tupleStorage.get(), schema);
    const time_t seed = time(NULL);
    std::cout << "Seed " << seed << std::endl;
    srand(static_cast<unsigned int>(seed));

    for (int ii = 0; ii < 100000; ii++) {
        NValue val = ValueFactory::getIntegerValue(rand());
        const int32_t hash = val.murmurHash3();
        t.setNValue(1, val);
        NValue inrange = e1->eval( &t );
        if ((hash >= range1Min && hash <= range1Max) ||
             (hash >= range2Min && hash <= range2Max) ||
             (hash >= range3Min && hash <= range3Max)) {
             //We no longer allow wrapping so this condition isn't true
             //(hash >= range3Min || hash < range3Max)) {
            ASSERT_TRUE(inrange.isTrue());
        } else {
            ASSERT_FALSE(inrange.isTrue());
        }
    }
    TupleSchema::freeTupleSchema(schema);
}