/** * Set up a multi-column temp output table for those executors that require one. * Called from p_init. */ void AbstractExecutor::setTempOutputTable(const ExecutorVector& executorVector, const string tempTableName) { TupleSchema* schema = m_abstractNode->generateTupleSchema(); int column_count = schema->columnCount(); std::vector<std::string> column_names(column_count); assert(column_count >= 1); const std::vector<SchemaColumn*>& outputSchema = m_abstractNode->getOutputSchema(); for (int ctr = 0; ctr < column_count; ctr++) { column_names[ctr] = outputSchema[ctr]->getColumnName(); } if (executorVector.isLargeQuery()) { m_tmpOutputTable = TableFactory::buildLargeTempTable(tempTableName, schema, column_names); } else { m_tmpOutputTable = TableFactory::buildTempTable(tempTableName, schema, column_names, executorVector.limits()); } m_abstractNode->setOutputTable(m_tmpOutputTable); }
TupleSchema* TupleSchema::createTupleSchema(const std::vector<ValueType> columnTypes, const std::vector<int32_t> columnSizes, const std::vector<bool> allowNull, bool allowInlinedObjects) { const uint16_t uninlineableObjectColumnCount = TupleSchema::countUninlineableObjectColumns(columnTypes, columnSizes, allowInlinedObjects); const uint16_t columnCount = static_cast<uint16_t>(columnTypes.size()); // big enough for any data members plus big enough for tupleCount + 1 "ColumnInfo" // fields. We need CI+1 because we get the length of a column by offset subtraction // Also allocate space for an int16_t for each uninlineable object column so that // the indices of uninlineable columns can be stored at the front and aid in iteration int memSize = (int)(sizeof(TupleSchema) + (sizeof(ColumnInfo) * (columnCount + 1)) + (uninlineableObjectColumnCount * sizeof(int16_t))); // allocate the set amount of memory and cast it to a tuple pointer TupleSchema *retval = reinterpret_cast<TupleSchema*>(new char[memSize]); // clear all the offset values memset(retval, 0, memSize); retval->m_allowInlinedObjects = allowInlinedObjects; retval->m_columnCount = columnCount; retval->m_uninlinedObjectColumnCount = uninlineableObjectColumnCount; uint16_t uninlinedObjectColumnIndex = 0; for (uint16_t ii = 0; ii < columnCount; ii++) { const ValueType type = columnTypes[ii]; const uint32_t length = columnSizes[ii]; const bool columnAllowNull = allowNull[ii]; retval->setColumnMetaData(ii, type, length, columnAllowNull, uninlinedObjectColumnIndex); } return retval; }
TupleSchema* TupleSchema::createTupleSchema(const std::vector<ValueType>& columnTypes, const std::vector<int32_t>& columnSizes, const std::vector<bool>& allowNull, const std::vector<bool>& columnInBytes, const std::vector<ValueType>& hiddenColumnTypes, const std::vector<int32_t>& hiddenColumnSizes, const std::vector<bool>& hiddenAllowNull, const std::vector<bool>& hiddenColumnInBytes) { const uint16_t uninlineableObjectColumnCount = TupleSchema::countUninlineableObjectColumns(columnTypes, columnSizes, columnInBytes); const uint16_t columnCount = static_cast<uint16_t>(columnTypes.size()); const uint16_t hiddenColumnCount = static_cast<uint16_t>(hiddenColumnTypes.size()); int memSize = memSizeForTupleSchema(columnCount, uninlineableObjectColumnCount, hiddenColumnCount); // allocate the set amount of memory and cast it to a tuple pointer TupleSchema *retval = reinterpret_cast<TupleSchema*>(new char[memSize]); // clear all the offset values memset(retval, 0, memSize); retval->m_columnCount = columnCount; retval->m_uninlinedObjectColumnCount = uninlineableObjectColumnCount; retval->m_hiddenColumnCount = hiddenColumnCount; retval->m_isHeaderless = false; uint16_t uninlinedObjectColumnIndex = 0; for (uint16_t ii = 0; ii < columnCount; ii++) { const ValueType type = columnTypes[ii]; const uint32_t length = columnSizes[ii]; const bool columnAllowNull = allowNull[ii]; const bool inBytes = columnInBytes[ii]; retval->setColumnMetaData(ii, type, length, columnAllowNull, uninlinedObjectColumnIndex, inBytes); } for (uint16_t ii = 0; ii < hiddenColumnCount; ++ii) { const ValueType type = hiddenColumnTypes[ii]; const uint32_t length = hiddenColumnSizes[ii]; const bool columnAllowNull = hiddenAllowNull[ii]; const bool inBytes = hiddenColumnInBytes[ii]; // We can't allow uninlineable data in hidden columns yet if (! isInlineable(type, length, inBytes)) { throwFatalLogicErrorStreamed("Attempt to create uninlineable hidden column"); } retval->setColumnMetaData(static_cast<uint16_t>(columnCount + ii), type, length, columnAllowNull, uninlinedObjectColumnIndex, inBytes); } return retval; }
TEST_F(TupleSchemaTest, CreateEvictedTupleSchema) { initTable(true); // Create the TupleSchema for our evicted tuple tables // The first columns should be all of the columns of our primary key index TupleSchema *evictedSchema = TupleSchema::createEvictedTupleSchema(); // fprintf(stdout, "\nEVICTED TABLE SCHEMA\n%s\n", evictedSchema->debug().c_str()); ASSERT_EQ(2, evictedSchema->columnCount()); ASSERT_EQ(VALUE_TYPE_SMALLINT, evictedSchema->columnType(0)); ASSERT_EQ(VALUE_TYPE_INTEGER, evictedSchema->columnType(1)); TupleSchema::freeTupleSchema(evictedSchema); }
TupleSchema* TupleSchema::createTupleSchema(const TupleSchema *first, const std::vector<uint16_t> firstSet, const TupleSchema *second, const std::vector<uint16_t> secondSet) { assert(first); const std::vector<uint16_t>::size_type offset = firstSet.size(); const std::vector<uint16_t>::size_type combinedColumnCount = firstSet.size() + secondSet.size(); std::vector<ValueType> columnTypes; std::vector<int32_t> columnLengths; std::vector<bool> columnAllowNull(combinedColumnCount, true); std::vector<bool> columnInBytes(combinedColumnCount, false); std::vector<uint16_t>::const_iterator iter; for (iter = firstSet.begin(); iter != firstSet.end(); iter++) { const TupleSchema::ColumnInfo *columnInfo = first->getColumnInfo(*iter); columnTypes.push_back(columnInfo->getVoltType()); columnLengths.push_back(columnInfo->length); columnAllowNull[*iter] = columnInfo->allowNull; columnInBytes[*iter] = columnInfo->inBytes; } for (iter = secondSet.begin(); second && iter != secondSet.end(); iter++) { const TupleSchema::ColumnInfo *columnInfo = second->getColumnInfo(*iter); columnTypes.push_back(columnInfo->getVoltType()); columnLengths.push_back(columnInfo->length); columnAllowNull[offset + *iter] = columnInfo->allowNull; columnInBytes[offset + *iter] = columnInfo->inBytes; } TupleSchema *schema = TupleSchema::createTupleSchema(columnTypes, columnLengths, columnAllowNull, columnInBytes); // Remember to set the inlineability of each column correctly. for (iter = firstSet.begin(); iter != firstSet.end(); iter++) { ColumnInfo *info = schema->getColumnInfo(*iter); info->inlined = first->getColumnInfo(*iter)->inlined; } for (iter = secondSet.begin(); second && iter != secondSet.end(); iter++) { ColumnInfo *info = schema->getColumnInfo((int)offset + *iter); info->inlined = second->getColumnInfo(*iter)->inlined; } return schema; }
TableIndex *TableIndexFactory::getInstance(const TableIndexScheme &scheme) { int colCount = (int)scheme.columnIndices.size(); TupleSchema *tupleSchema = scheme.tupleSchema; assert(tupleSchema); std::vector<ValueType> keyColumnTypes; std::vector<int32_t> keyColumnLengths; std::vector<bool> keyColumnAllowNull(colCount, true); for (int i = 0; i < colCount; ++i) { keyColumnTypes.push_back(tupleSchema->columnType(scheme.columnIndices[i])); keyColumnLengths.push_back(tupleSchema->columnLength(scheme.columnIndices[i])); } TupleSchema *keySchema = TupleSchema::createTupleSchema(keyColumnTypes, keyColumnLengths, keyColumnAllowNull, true); assert(keySchema); VOLT_TRACE("Creating index for %s.\n%s", scheme.name.c_str(), keySchema->debug().c_str()); TableIndexPicker picker(keySchema, scheme); TableIndex *retval = picker.getInstance(); return retval; }
/** * Set up a multi-column temp output table for those executors that require one. * Called from p_init. */ void AbstractExecutor::setTempOutputTable(TempTableLimits* limits, const string tempTableName) { assert(limits); TupleSchema* schema = m_abstractNode->generateTupleSchema(); int column_count = schema->columnCount(); std::vector<std::string> column_names(column_count); assert(column_count >= 1); const std::vector<SchemaColumn*>& outputSchema = m_abstractNode->getOutputSchema(); for (int ctr = 0; ctr < column_count; ctr++) { column_names[ctr] = outputSchema[ctr]->getColumnName(); } m_tmpOutputTable = TableFactory::getTempTable(m_abstractNode->databaseId(), tempTableName, schema, column_names, limits); m_abstractNode->setOutputTable(m_tmpOutputTable); }
// helper to make a schema, a tuple and calculate EL size size_t TableTupleExportTest::maxElSize(std::vector<uint16_t> &keep_offsets, bool useNullStrings) { TableTuple *tt; TupleSchema *ts; char buf[1024]; // tuple data ts = TupleSchema::createTupleSchema(m_schema, keep_offsets); tt = new TableTuple(buf, ts); // if the tuple includes strings, add some content // assuming all Export tuples were allocated for persistent // storage and choosing set* api accordingly here. if (ts->columnCount() > 6) { NValue nv = ValueFactory::getStringValue("ABCDEabcde"); // 10 char if (useNullStrings) { nv.free(); nv.setNull(); } tt->setNValueAllocateForObjectCopies(6, nv, NULL); nv.free(); } if (ts->columnCount() > 7) { NValue nv = ValueFactory::getStringValue("abcdeabcdeabcdeabcde"); // 20 char if (useNullStrings) { nv.free(); nv.setNull(); } tt->setNValueAllocateForObjectCopies(7, nv, NULL); nv.free(); } // The function under test! size_t sz = tt->maxExportSerializationSize(); // and cleanup tt->freeObjectColumns(); delete tt; TupleSchema::freeTupleSchema(ts); return sz; }
// Create a table with the schema described above, where the // caller may have specified a number of extra columns. Also add // two indexes: one integer primary key and one geospatial. static unique_ptr<PersistentTable> createTable(int numExtraCols = 0) { TupleSchema* schema = createTupleSchemaWithExtraCols(numExtraCols); char signature[20]; CatalogId databaseId = 1000; std::vector<std::string> columnNames; for (int i = 0; i < schema->columnCount(); ++i) { std::ostringstream oss; oss << "col_" << i; columnNames.push_back(oss.str()); } auto table = unique_ptr<PersistentTable>( static_cast<PersistentTable*>(TableFactory::getPersistentTable(databaseId, "test_table", schema, columnNames, signature))); table->addIndex(createGeospatialIndex(table->schema())); TableIndex* pkIndex = createPrimaryKeyIndex(table->schema()); table->addIndex(pkIndex); table->setPrimaryKeyIndex(pkIndex); return table; }
TEST_F(TupleSchemaTest, CreateEvictedTupleSchema) { initTable(true); // Create the TupleSchema for our evicted tuple tables // The first columns should be all of the columns of our primary key index TupleSchema *evictedSchema = TupleSchema::createEvictedTupleSchema(m_primaryKeyIndexSchema); // fprintf(stdout, "\nEVICTED TABLE SCHEMA\n%s\n", evictedSchema->debug().c_str()); ASSERT_EQ(m_numPrimaryKeyCols+1, evictedSchema->columnCount()); for (int i = 0; i < m_numPrimaryKeyCols; i++) { ASSERT_EQ(m_primaryKeyIndexSchema->columnType(i), evictedSchema->columnType(i)); ASSERT_EQ(m_primaryKeyIndexSchema->columnLength(i), evictedSchema->columnLength(i)); ASSERT_EQ(m_primaryKeyIndexSchema->columnAllowNull(i), evictedSchema->columnAllowNull(i)); } // Then there should only be one more column that contains the 16-bit block ids ASSERT_EQ(VALUE_TYPE_SMALLINT, evictedSchema->columnType(m_numPrimaryKeyCols)); ASSERT_FALSE(evictedSchema->columnAllowNull(m_numPrimaryKeyCols)); TupleSchema::freeTupleSchema(evictedSchema); }
TableIndex *TableIndexFactory::getInstance(const TableIndexScheme &scheme) { const TupleSchema *tupleSchema = scheme.tupleSchema; assert(tupleSchema); bool isIntsOnly = true; bool isInlinesOrColumnsOnly = true; std::vector<ValueType> keyColumnTypes; std::vector<int32_t> keyColumnLengths; size_t valueCount = 0; size_t exprCount = scheme.indexedExpressions.size(); if (exprCount != 0) { valueCount = exprCount; // TODO: This is where we could gain some extra runtime and space efficiency by // somehow marking which indexed expressions happen to be non-inlined column expressions. // This case is significant because it presents an opportunity for the GenericPersistentKey // index keys to avoid a persistent allocation and copy of an already persistent value. // This could be implemented as a bool attribute of TupleSchema::ColumnInfo that is only // set to true in this special case. It would universally disable deep copying of that // particular "tuple column"'s referenced object. for (size_t ii = 0; ii < valueCount; ++ii) { ValueType exprType = scheme.indexedExpressions[ii]->getValueType(); if ( ! isIntegralType(exprType)) { isIntsOnly = false; } uint32_t declaredLength; if (exprType == VALUE_TYPE_VARCHAR || exprType == VALUE_TYPE_VARBINARY) { // Setting the column length to TUPLE_SCHEMA_COLUMN_MAX_VALUE_LENGTH constrains the // maximum length of expression values that can be indexed with the same limit // that gets applied to column values. // In theory, indexed expression values could have an independent limit // up to any length that can be allocated via ThreadLocalPool. // Currently, all of these cases are constrained with the same limit, // which is also the default/maximum size for variable columns defined in schema, // as controlled in java by VoltType.MAX_VALUE_LENGTH. // It's not clear whether scheme.indexedExpressions[ii]->getValueSize() // can or should be called for a more useful answer. // There's probably little to gain since expressions usually do not contain enough information // to reliably determine that the result value is always small enough to "inline". declaredLength = TupleSchema::COLUMN_MAX_VALUE_LENGTH; isInlinesOrColumnsOnly = false; } else { declaredLength = NValue::getTupleStorageSize(exprType); } keyColumnTypes.push_back(exprType); keyColumnLengths.push_back(declaredLength); } } else { valueCount = scheme.columnIndices.size(); for (size_t ii = 0; ii < valueCount; ++ii) { ValueType exprType = tupleSchema->columnType(scheme.columnIndices[ii]); if ( ! isIntegralType(exprType)) { isIntsOnly = false; } keyColumnTypes.push_back(exprType); keyColumnLengths.push_back(tupleSchema->columnLength(scheme.columnIndices[ii])); } } std::vector<bool> keyColumnAllowNull(valueCount, true); TupleSchema *keySchema = TupleSchema::createTupleSchema(keyColumnTypes, keyColumnLengths, keyColumnAllowNull, true); assert(keySchema); VOLT_TRACE("Creating index for '%s' with key schema '%s'", scheme.name.c_str(), keySchema->debug().c_str()); TableIndexPicker picker(keySchema, isIntsOnly, isInlinesOrColumnsOnly, scheme); TableIndex *retval = picker.getInstance(); return retval; }
// helper to make a schema, a tuple and serialize to a buffer size_t TableTupleExportTest::serElSize(std::vector<uint16_t> &keep_offsets, uint8_t *nullArray, char *dataPtr, bool nulls) { TableTuple *tt; TupleSchema *ts; char buf[1024]; // tuple data ts = TupleSchema::createTupleSchema(m_schema, keep_offsets); tt = new TableTuple(buf, ts); // assuming all Export tuples were allocated for persistent // storage and choosing set* api accordingly here. switch (ts->columnCount()) { // note my sophisticated and clever use of fall through case 8: { NValue nv = ValueFactory::getStringValue("abcdeabcdeabcdeabcde"); // 20 char if (nulls) { nv.free(); nv.setNull(); } tt->setNValueAllocateForObjectCopies(7, nv, NULL); nv.free(); } case 7: { NValue nv = ValueFactory::getStringValue("ABCDEabcde"); // 10 char if (nulls) { nv.free(); nv.setNull(); } tt->setNValueAllocateForObjectCopies(6, nv, NULL); nv.free(); } case 6: { NValue nv = ValueFactory::getDecimalValueFromString("-12.34"); if (nulls) { nv.free(); nv.setNull(); } tt->setNValueAllocateForObjectCopies(5, nv, NULL); nv.free(); } case 5: { NValue nv = ValueFactory::getTimestampValue(9999); if (nulls) nv.setNull(); tt->setNValueAllocateForObjectCopies(4, nv, NULL); nv.free(); } case 4: { NValue nv = ValueFactory::getBigIntValue(1024); if (nulls) nv.setNull(); tt->setNValueAllocateForObjectCopies(3, nv, NULL); nv.free(); } case 3: { NValue nv = ValueFactory::getIntegerValue(512); if (nulls) nv.setNull(); tt->setNValueAllocateForObjectCopies(2, nv, NULL); nv.free(); } case 2: { NValue nv = ValueFactory::getSmallIntValue(256); if (nulls) nv.setNull(); tt->setNValueAllocateForObjectCopies(1, nv, NULL); nv.free(); } case 1: { NValue nv = ValueFactory::getTinyIntValue(120); if (nulls) nv.setNull(); tt->setNValueAllocateForObjectCopies(0, nv, NULL); nv.free(); } break; default: // this is an error in the test fixture. EXPECT_EQ(0,1); break; } // The function under test! ExportSerializeOutput io(dataPtr, 2048); tt->serializeToExport(io, 0, nullArray); // and cleanup tt->freeObjectColumns(); delete tt; TupleSchema::freeTupleSchema(ts); return io.position(); }
/* * Show that the hash range expression correctly selects (or doesn't) rows in ranges */ TEST_F(ExpressionTest, HashRange) { queue<AE*> e; const int32_t range1Max = -(numeric_limits<int32_t>::max() / 2); const int32_t range1Min = numeric_limits<int32_t>::min() - (range1Max / 2); const int32_t range2Min = 0; const int32_t range2Max = numeric_limits<int32_t>::max() / 2; const int32_t range3Min = range2Max + (range2Max / 2); const int32_t range3Max = numeric_limits<int32_t>::max(); int32_t ranges[][2] = { { range1Min, range1Max}, { range2Min, range2Max}, { range3Min, range3Max} }; auto_ptr<AE> ae(new HR(1, ranges, 3)); Json::Value json = ae->serializeValue(); Json::FastWriter writer; std::string jsonText = writer.write(json); PlannerDomRoot domRoot(jsonText.c_str()); auto_ptr<AbstractExpression> e1(AbstractExpression::buildExpressionTree(domRoot.rootObject())); vector<std::string> columnNames; columnNames.push_back("foo"); columnNames.push_back("bar"); vector<int32_t> columnSizes; columnSizes.push_back(8); columnSizes.push_back(4); vector<bool> allowNull; allowNull.push_back(true); allowNull.push_back(false); vector<voltdb::ValueType> types; types.push_back(voltdb::VALUE_TYPE_BIGINT); types.push_back(voltdb::VALUE_TYPE_INTEGER); TupleSchema *schema = TupleSchema::createTupleSchemaForTest(types,columnSizes,allowNull); boost::scoped_array<char> tupleStorage(new char[schema->tupleLength() + TUPLE_HEADER_SIZE]); TableTuple t(tupleStorage.get(), schema); const time_t seed = time(NULL); std::cout << "Seed " << seed << std::endl; srand(static_cast<unsigned int>(seed)); for (int ii = 0; ii < 100000; ii++) { NValue val = ValueFactory::getIntegerValue(rand()); const int32_t hash = val.murmurHash3(); t.setNValue(1, val); NValue inrange = e1->eval( &t ); if ((hash >= range1Min && hash <= range1Max) || (hash >= range2Min && hash <= range2Max) || (hash >= range3Min && hash <= range3Max)) { //We no longer allow wrapping so this condition isn't true //(hash >= range3Min || hash < range3Max)) { ASSERT_TRUE(inrange.isTrue()); } else { ASSERT_FALSE(inrange.isTrue()); } } TupleSchema::freeTupleSchema(schema); }