Beispiel #1
0
bool InsertExecutor::executePurgeFragmentIfNeeded(PersistentTable** ptrToTable) {
    PersistentTable* table = *ptrToTable;
    int tupleLimit = table->tupleLimit();
    int numTuples = table->visibleTupleCount();

    // Note that the number of tuples may be larger than the limit.
    // This can happen we data is redistributed after an elastic
    // rejoin for example.
    if (numTuples >= tupleLimit) {
        // Next insert will fail: run the purge fragment
        // before trying to insert.
        m_engine->executePurgeFragment(table);

        // If the purge fragment did a truncate table, then the old
        // table is still around for undo purposes, but there is now a
        // new empty table we can insert into.  Update the caller's table
        // pointer to use it.
        //
        // The plan node will go through the table catalog delegate to get
        // the correct instance of PersistentTable.
        *ptrToTable = static_cast<PersistentTable*>(m_node->getTargetTable());
    }

    return true;
}
Beispiel #2
0
bool UpdateExecutor::p_execute(const NValueArray &params) {
    assert(m_inputTable);

    // target table should be persistenttable
    PersistentTable* targetTable = dynamic_cast<PersistentTable*>(m_node->getTargetTable());
    assert(targetTable);
    TableTuple targetTuple = TableTuple(targetTable->schema());

    VOLT_TRACE("INPUT TABLE: %s\n", m_inputTable->debug().c_str());
    VOLT_TRACE("TARGET TABLE - BEFORE: %s\n", targetTable->debug().c_str());

    // determine which indices are updated by this executor
    // iterate through all target table indices and see if they contain
    // columns mutated by this executor
    std::vector<TableIndex*> indexesToUpdate;
    const std::vector<TableIndex*>& allIndexes = targetTable->allIndexes();
    BOOST_FOREACH(TableIndex *index, allIndexes) {
        bool indexKeyUpdated = false;
        BOOST_FOREACH(int colIndex, index->getColumnIndices()) {
            std::pair<int, int> updateColInfo; // needs to be here because of macro failure
            BOOST_FOREACH(updateColInfo, m_inputTargetMap) {
                if (updateColInfo.second == colIndex) {
                    indexKeyUpdated = true;
                    break;
                }
            }
            if (indexKeyUpdated) break;
        }
        if (indexKeyUpdated) {
            indexesToUpdate.push_back(index);
        }
    }
Beispiel #3
0
CopyOnWriteContext::CopyOnWriteContext(
        PersistentTable &table,
        TupleSerializer &serializer,
        int32_t partitionId,
        const std::vector<std::string> &predicateStrings,
        int64_t totalTuples) :
             TableStreamerContext(table, predicateStrings),
             m_backedUpTuples(TableFactory::getCopiedTempTable(table.databaseId(),
                                                               "COW of " + table.name(),
                                                               &table, NULL)),
             m_serializer(serializer),
             m_pool(2097152, 320),
             m_blocks(getTable().m_data),
             m_iterator(new CopyOnWriteIterator(&table, m_blocks.begin(), m_blocks.end())),
             m_maxTupleLength(serializer.getMaxSerializedTupleSize(table.schema())),
             m_tuple(table.schema()),
             m_finishedTableScan(false),
             m_partitionId(partitionId),
             m_totalTuples(totalTuples),
             m_tuplesRemaining(totalTuples),
             m_blocksCompacted(0),
             m_serializationBatches(0),
             m_inserts(0),
             m_updates(0)
{}
bool EvictionIterator::next(TableTuple &tuple)
{    
    PersistentTable* ptable = static_cast<PersistentTable*>(table);

    if(current_tuple_id == ptable->getNewestTupleID()) // we've already returned the last tuple in the chain
    {
        VOLT_DEBUG("No more tuples in the chain.");
        return false; 
    }

    if(current_tuple_id == -1) // this is the first call to next
    {
        VOLT_DEBUG("This is the first tuple in the chain.");

        if(ptable->getNumTuplesInEvictionChain() == 0)  // there are no tuples in the chain
        {
            VOLT_DEBUG("There are no tuples in the eviction chain.");
            return false; 
        }
        
        current_tuple_id = ptable->getOldestTupleID(); 
    }
    else  // advance the iterator to the next tuple in the chain
    {        
        current_tuple_id = current_tuple->getTupleID(); 
    }

    current_tuple->move(ptable->dataPtrForTuple(current_tuple_id)); 
    tuple.move(current_tuple->address()); 
    
    VOLT_DEBUG("current_tuple_id = %d", current_tuple_id);
    
    return true; 
}
Beispiel #5
0
bool
VoltDBEngine::loadTable(int32_t tableId,
                        ReferenceSerializeInput &serializeIn,
                        int64_t txnId, int64_t lastCommittedTxnId)
{
    m_executorContext->setupForPlanFragments(getCurrentUndoQuantum(),
                                             txnId,
                                             lastCommittedTxnId);

    Table* ret = getTable(tableId);
    if (ret == NULL) {
        VOLT_ERROR("Table ID %d doesn't exist. Could not load data",
                   (int) tableId);
        return false;
    }

    PersistentTable* table = dynamic_cast<PersistentTable*>(ret);
    if (table == NULL) {
        VOLT_ERROR("Table ID %d(name '%s') is not a persistent table."
                   " Could not load data",
                   (int) tableId, ret->name().c_str());
        return false;
    }

    try {
        table->loadTuplesFrom(serializeIn);
    } catch (SerializableEEException e) {
        throwFatalException("%s", e.message().c_str());
    }
    return true;
}
CopyOnWriteContext::CopyOnWriteContext(
        PersistentTable &table,
        TupleSerializer &serializer,
        int32_t partitionId,
        const std::vector<std::string> &predicateStrings,
        int64_t totalTuples,
        bool doDelete) :
             m_table(table),
             m_backedUpTuples(TableFactory::getCopiedTempTable(table.databaseId(),
                                                               "COW of " + table.name(),
                                                               &table, NULL)),
             m_serializer(serializer),
             m_pool(2097152, 320),
             m_blocks(m_table.m_data),
             m_iterator(new CopyOnWriteIterator(&table, m_blocks.begin(), m_blocks.end())),
             m_maxTupleLength(serializer.getMaxSerializedTupleSize(table.schema())),
             m_tuple(table.schema()),
             m_finishedTableScan(false),
             m_partitionId(partitionId),
             m_totalTuples(totalTuples),
             m_tuplesRemaining(totalTuples),
             m_blocksCompacted(0),
             m_serializationBatches(0),
             m_inserts(0),
             m_updates(0),
             m_doDelete(doDelete)
{
    // Parse predicate strings. The factory type determines the kind of
    // predicates that get generated.
    // Throws an exception to be handled by caller on errors.
    std::ostringstream errmsg;
    if (!m_predicates.parseStrings(predicateStrings, errmsg)) {
        throwFatalException("CopyOnWriteContext() failed to parse predicate strings.");
    }
}
Beispiel #7
0
bool InsertExecutor::p_init(AbstractPlanNode* abstractNode,
                            TempTableLimits* limits)
{
    VOLT_TRACE("init Insert Executor");

    m_node = dynamic_cast<InsertPlanNode*>(abstractNode);
    assert(m_node);
    assert(m_node->getTargetTable());
    assert(m_node->getInputTableCount() == 1);

    Table* targetTable = m_node->getTargetTable();
    m_isUpsert = m_node->isUpsert();

    setDMLCountOutputTable(limits);

    m_inputTable = dynamic_cast<TempTable*>(m_node->getInputTable()); //input table should be temptable
    assert(m_inputTable);

    // Target table can be StreamedTable or PersistentTable and must not be NULL
    PersistentTable *persistentTarget = dynamic_cast<PersistentTable*>(targetTable);
    m_partitionColumn = -1;
    m_isStreamed = (persistentTarget == NULL);

    if (m_isUpsert) {
        VOLT_TRACE("init Upsert Executor actually");
        if (m_isStreamed) {
            VOLT_ERROR("UPSERT is not supported for Stream table %s", targetTable->name().c_str());
        }
        // look up the tuple whether it exists already
        if (targetTable->primaryKeyIndex() == NULL) {
            VOLT_ERROR("No primary keys were found in our target table '%s'",
                    targetTable->name().c_str());
        }
    }

    if (persistentTarget) {
        m_partitionColumn = persistentTarget->partitionColumn();
    }

    m_multiPartition = m_node->isMultiPartition();

    m_sourceIsPartitioned = m_node->sourceIsPartitioned();

    // allocate memory for template tuple, set defaults for all columns
    m_templateTuple.init(targetTable->schema());


    TableTuple tuple = m_templateTuple.tuple();

    std::set<int> fieldsExplicitlySet(m_node->getFieldMap().begin(), m_node->getFieldMap().end());
    m_node->initTupleWithDefaultValues(m_engine,
                                       &m_memoryPool,
                                       fieldsExplicitlySet,
                                       tuple,
                                       m_nowFields);
    m_hasPurgeFragment = persistentTarget ? persistentTarget->hasPurgeFragment() : false;

    return true;
}
Table *TableCatalogDelegate::getTable() const {
    // If a persistent table has an active delta table, return the delta table instead of the whole table.
    PersistentTable *persistentTable = dynamic_cast<PersistentTable*>(m_table);
    if (persistentTable && persistentTable->isDeltaTableActive()) {
        return persistentTable->deltaTable();
    }
    return m_table;
}
Beispiel #9
0
Table* TableFactory::getPersistentTable(
            voltdb::CatalogId databaseId,
            const std::string &name,
            TupleSchema* schema,
            const std::vector<std::string> &columnNames,
            char *signature,
            bool tableIsMaterialized,
            int partitionColumn,
            bool exportEnabled,
            bool exportOnly,
            int tableAllocationTargetSize,
            int tupleLimit,
            int32_t compactionThreshold,
            bool drEnabled)
{
    Table *table = NULL;
    StreamedTable *streamedTable = NULL;
    PersistentTable *persistentTable = NULL;

    if (exportOnly) {
        table = streamedTable = new StreamedTable(partitionColumn);
    }
    else {
        table = persistentTable = new PersistentTable(partitionColumn,
                                                      signature,
                                                      tableIsMaterialized,
                                                      tableAllocationTargetSize,
                                                      tupleLimit,
                                                      drEnabled);
    }

    initCommon(databaseId,
               table,
               name,
               schema,
               columnNames,
               true,  // table will take ownership of TupleSchema object
               compactionThreshold);

    TableStats *stats;
    if (exportOnly) {
        stats = streamedTable->getTableStats();
    }
    else {
        stats = persistentTable->getTableStats();
        // Allocate and assign the tuple storage block to the persistent table ahead of time instead
        // of doing so at time of first tuple insertion. The intent of block allocation ahead of time
        // is to avoid allocation cost at time of tuple insertion
        TBPtr block = persistentTable->allocateNextBlock();
        assert(block->hasFreeTuples());
        persistentTable->m_blocksWithSpace.insert(block);
    }

    // initialize stats for the table
    configureStats(name, stats);

    return table;
}
Beispiel #10
0
void AbstractPlanNode::setOutputTable(Table* table)
{
    PersistentTable* persistentTable = dynamic_cast<PersistentTable*>(table);
    if (persistentTable) {
        VoltDBEngine* engine = ExecutorContext::getEngine();
        TableCatalogDelegate* tcd = engine->getTableDelegate(persistentTable->name());
        m_outputTable.setTable(tcd);
    } else {
        TempTable* tempTable = dynamic_cast<TempTable*>(table);
        assert(tempTable);
        m_outputTable.setTable(tempTable);
    }
}
void MaterializedViewMetadata::setTargetTable(PersistentTable * target)
{
    PersistentTable * oldTarget = m_target;

    m_target = target;
    target->incrementRefcount();

    // Re-initialize dependencies on the target table, allowing for widened columns
    m_index = m_target->primaryKeyIndex();

    freeBackedTuples();
    allocateBackedTuples();

    oldTarget->decrementRefcount();
}
bool EvictionIterator::hasNext()
{        
    PersistentTable* ptable = static_cast<PersistentTable*>(table);
    
    if(current_tuple_id == ptable->getNewestTupleID())
        return false;
    if(ptable->usedTupleCount() == 0)
        return false; 
    if(ptable->getNumTuplesInEvictionChain() == 0) { // there are no tuples in the chain
        VOLT_DEBUG("There are no tuples in the eviction chain.");
        return false; 
    }
    
    return true; 
}
Beispiel #13
0
 void setTable(TableIndexScheme *pkey = NULL) {
     assert (columnNames.size() == columnTypes.size());
     assert (columnTypes.size() == columnSizes.size());
     assert (columnSizes.size() == columnNullables.size());
     TupleSchema *schema = TupleSchema::createTupleSchemaForTest(columnTypes, columnSizes, columnNullables);
     if (pkey != NULL) {
         pkey->tupleSchema = schema;
     }
     table = static_cast<PersistentTable*>(TableFactory::getPersistentTable(database_id, "test_table", schema, columnNames, signature));
     if (pkey) {
         TableIndex *pkeyIndex = TableIndexFactory::getInstance(*pkey);
         assert(pkeyIndex);
         table->addIndex(pkeyIndex);
         table->setPrimaryKeyIndex(pkeyIndex);
     }
 };
Beispiel #14
0
 MultiStreamTestTool(PersistentTable& table, size_t nparts) :
 table(table),
 nparts(nparts),
 iteration(-1),
 nerrors(0),
 showTuples(TUPLE_COUNT <= MAX_DETAIL_COUNT)
 {
     strcpy(stage, "Initialize");
     TableTuple tuple(table.schema());
     size_t i = 0;
     voltdb::TableIterator& iterator = table.iterator();
     while (iterator.next(tuple)) {
         int64_t value = *reinterpret_cast<int64_t*>(tuple.address() + 1);
         values.push_back(value);
         valueSet.insert(std::pair<int64_t,size_t>(value, i++));
     }
 }
Beispiel #15
0
Table* TableFactory::getPersistentTable(
            voltdb::CatalogId databaseId,
            const std::string &name,
            TupleSchema* schema,
            const std::vector<std::string> &columnNames,
            char *signature,
            bool tableIsMaterialized,
            int partitionColumn,
            bool exportEnabled,
            bool exportOnly,
            int tableAllocationTargetSize,
            int tupleLimit,
            int32_t compactionThreshold,
            bool drEnabled)
{
    Table *table = NULL;

    if (exportOnly) {
        table = new StreamedTable(exportEnabled);
    }
    else {
        table = new PersistentTable(partitionColumn, signature, tableIsMaterialized, tableAllocationTargetSize, tupleLimit, drEnabled);
    }

    initCommon(databaseId,
               table,
               name,
               schema,
               columnNames,
               true,  // table will take ownership of TupleSchema object
               compactionThreshold);

    // initialize stats for the table
    configureStats(databaseId, name, table);

    if(!exportOnly) {
        // allocate tuple storage block for the persistent table ahead of time
        // instead of waiting till first tuple insertion. Intend of allocating tuple
        // block storage ahead is to improve performance on first tuple insertion.
        PersistentTable *persistentTable = static_cast<PersistentTable*>(table);
        TBPtr block = persistentTable->allocateNextBlock();
        assert(block->hasFreeTuples());
        persistentTable->m_blocksWithSpace.insert(block);
    }
    return table;
}
Beispiel #16
0
void AbstractPlanNode::setInputTables(const vector<Table*>& val)
{
    size_t ii = val.size();
    m_inputTables.resize(ii);
    while (ii--) {
        PersistentTable* persistentTable = dynamic_cast<PersistentTable*>(val[ii]);
        if (persistentTable) {
            VoltDBEngine* engine = ExecutorContext::getEngine();
            assert(engine);
            TableCatalogDelegate* tcd = engine->getTableDelegate(persistentTable->name());
            m_inputTables[ii].setTable(tcd);
        } else {
            TempTable* tempTable = dynamic_cast<TempTable*>(val[ii]);
            assert(tempTable);
            m_inputTables[ii].setTable(tempTable);
        }
    }
}
Beispiel #17
0
bool EvictionIterator::next(TableTuple &tuple)
{    
#ifndef ANTICACHE_TIMESTAMPS
    PersistentTable* ptable = static_cast<PersistentTable*>(table);

    if(current_tuple_id == ptable->getNewestTupleID()) // we've already returned the last tuple in the chain
    {
        VOLT_DEBUG("No more tuples in the chain.");
        return false; 
    }

    if(is_first) // this is the first call to next
    {
        is_first = false; 
        VOLT_DEBUG("This is the first tuple in the chain.");

        if(ptable->getNumTuplesInEvictionChain() == 0)  // there are no tuples in the chain
        {
            VOLT_DEBUG("There are no tuples in the eviction chain.");
            return false; 
        }

        current_tuple_id = ptable->getOldestTupleID(); 
    }
    else  // advance the iterator to the next tuple in the chain
    {        
        current_tuple_id = current_tuple->getNextTupleInChain();
    }

    current_tuple->move(ptable->dataPtrForTuple(current_tuple_id)); 
    tuple.move(current_tuple->address()); 

    VOLT_DEBUG("current_tuple_id = %d", current_tuple_id);
#else
    tuple.move(candidates[current_tuple_id].m_addr);
    current_tuple_id++;
    while (candidates[current_tuple_id].m_addr == candidates[current_tuple_id - 1].m_addr) {
        current_tuple_id++;
        if (current_tuple_id == m_size) break;
    }
#endif

    return true; 
}
/**
 * Constructor without predicates.
 */
TableStreamerContext::TableStreamerContext(
        PersistentTable &table,
        PersistentTableSurgeon &surgeon,
        int32_t partitionId,
        TupleSerializer &serializer) :
    m_surgeon(surgeon),
    m_table(table),
    m_maxTupleLength(serializer.getMaxSerializedTupleSize(table.schema())),
    m_serializer(serializer),
    m_partitionId(partitionId)
{}
Beispiel #19
0
TEST_F(PersistentTableTest, TruncateTableTest) {
    VoltDBEngine* engine = getEngine();
    engine->loadCatalog(0, catalogPayload());
    PersistentTable *table = dynamic_cast<PersistentTable*>(
        engine->getTable("T"));
    ASSERT_NE(NULL, table);

    const int tuplesToInsert = 10;
    (void) tuplesToInsert;  // to make compiler happy
    ASSERT_EQ(1, table->allocatedBlockCount());
    bool addTuples = tableutil::addRandomTuples(table, tuplesToInsert);
    if(!addTuples) {
        assert(!"Failed adding random tuples");
    }
    size_t blockCount = table->allocatedBlockCount();

    table = dynamic_cast<PersistentTable*>(engine->getTable("T"));
    ASSERT_NE(NULL, table);
    ASSERT_EQ(blockCount, table->allocatedBlockCount());
    addTuples = tableutil::addRandomTuples(table, tuplesToInsert);
    if(!addTuples) {
        assert(!"Failed adding random tuples");
    }
    table->truncateTable(engine);

    // refresh table pointer by fetching the table from catalog as in truncate old table
    // gets replaced with new cloned empty table
    table = dynamic_cast<PersistentTable*>(engine->getTable("T"));
    ASSERT_NE(NULL, table);
    ASSERT_EQ(1, table->allocatedBlockCount());
}
Beispiel #20
0
/*
 * Iterate catalog tables looking for tables that are materialized
 * view sources.  When found, construct a materialized view metadata
 * object that connects the source and destination tables, and assign
 * that object to the source table.
 *
 * Assumes all tables (sources and destinations) have been constructed.
 * @param addAll Pass true to add all views. Pass false to only add new views.
 */
bool VoltDBEngine::initMaterializedViews(bool addAll) {
    map<string, catalog::Table*>::const_iterator tableIterator;
    // walk tables
    for (tableIterator = m_database->tables().begin(); tableIterator != m_database->tables().end(); tableIterator++) {
        catalog::Table *srcCatalogTable = tableIterator->second;
        PersistentTable *srcTable = dynamic_cast<PersistentTable*>(m_tables[srcCatalogTable->relativeIndex()]);
        // walk views
        map<string, catalog::MaterializedViewInfo*>::const_iterator matviewIterator;
        for (matviewIterator = srcCatalogTable->views().begin(); matviewIterator != srcCatalogTable->views().end(); matviewIterator++) {
            catalog::MaterializedViewInfo *catalogView = matviewIterator->second;
            // connect source and destination tables
            if (addAll || catalogView->wasAdded()) {
                const catalog::Table *destCatalogTable = catalogView->dest();
                PersistentTable *destTable = dynamic_cast<PersistentTable*>(m_tables[destCatalogTable->relativeIndex()]);
                MaterializedViewMetadata *mvmd = new MaterializedViewMetadata(srcTable, destTable, catalogView);
                srcTable->addMaterializedView(mvmd);
            }
        }
    }

    return true;
}
/**
 * Constructor with predicates.
 */
TableStreamerContext::TableStreamerContext(
        PersistentTable &table,
        PersistentTableSurgeon &surgeon,
        int32_t partitionId,
        TupleSerializer &serializer,
        const std::vector<std::string> &predicateStrings) :
    m_surgeon(surgeon),
    m_table(table),
    m_maxTupleLength(serializer.getMaxSerializedTupleSize(table.schema())),
    m_serializer(serializer),
    m_partitionId(partitionId)
{
    updatePredicates(predicateStrings);
}
Beispiel #22
0
bool TableStreamer::activateStream(PersistentTable &table, CatalogId tableId)
{
    if (m_context == NULL) {
        // This is the only place that can create a streaming context based on
        // the stream type. Other places shouldn't need to know about the
        // context sub-types.
        try {
            switch (m_streamType) {
                case TABLE_STREAM_SNAPSHOT: {
                    // Constructor can throw exception when it parses the predicates.
                    CopyOnWriteContext *newContext =
                        new CopyOnWriteContext(table, m_tupleSerializer, m_partitionId,
                                               m_predicateStrings, table.activeTupleCount());
                    m_context.reset(newContext);
                    break;
                }

                case TABLE_STREAM_RECOVERY:
                    m_context.reset(new RecoveryContext(table, tableId));
                    break;

                case TABLE_STREAM_ELASTIC:
                    m_context.reset(new ElasticContext(table, m_predicateStrings));
                    break;

                default:
                    assert(false);
            }
        }
        catch(SerializableEEException &e) {
            // m_context will be NULL if we get an exception.
        }
    }

    return (m_context != NULL);
}
Beispiel #23
0
bool EvictionIterator::hasNext()
{        
    VOLT_TRACE("Size: %lu\n", (long unsigned int)m_size);
    PersistentTable* ptable = static_cast<PersistentTable*>(table);

    VOLT_TRACE("Count: %lu %lu\n", ptable->usedTupleCount(), ptable->activeTupleCount());

    if(ptable->usedTupleCount() == 0)
        return false; 

#ifndef ANTICACHE_TIMESTAMPS
    if(current_tuple_id == ptable->getNewestTupleID())
        return false;
    if(ptable->getNumTuplesInEvictionChain() == 0) { // there are no tuples in the chain
        VOLT_DEBUG("There are no tuples in the eviction chain.");
        return false; 
    }
#else
    if (current_tuple_id == m_size)
        return false;
#endif

    return true; 
}
Beispiel #24
0
bool IndexScanExecutor::p_init(AbstractPlanNode *abstractNode,
        TempTableLimits* limits)
{
    VOLT_TRACE("init IndexScan Executor");

    m_projectionNode = NULL;

    m_node = dynamic_cast<IndexScanPlanNode*>(abstractNode);
    assert(m_node);
    assert(m_node->getTargetTable());

    // Create output table based on output schema from the plan
    setTempOutputTable(limits, m_node->getTargetTable()->name());

    //
    // INLINE PROJECTION
    //
    if (m_node->getInlinePlanNode(PLAN_NODE_TYPE_PROJECTION) != NULL) {
        m_projectionNode = static_cast<ProjectionPlanNode*>
            (m_node->getInlinePlanNode(PLAN_NODE_TYPE_PROJECTION));

        m_projector = OptimizedProjector(m_projectionNode->getOutputColumnExpressions());
        m_projector.optimize(m_projectionNode->getOutputTable()->schema(),
                             m_node->getTargetTable()->schema());
    }

    // Inline aggregation can be serial, partial or hash
    m_aggExec = voltdb::getInlineAggregateExecutor(m_abstractNode);

    //
    // Make sure that we have search keys and that they're not null
    //
    m_numOfSearchkeys = (int)m_node->getSearchKeyExpressions().size();
    m_searchKeyArrayPtr =
            boost::shared_array<AbstractExpression*>
    (new AbstractExpression*[m_numOfSearchkeys]);
    m_searchKeyArray = m_searchKeyArrayPtr.get();

    for (int ctr = 0; ctr < m_numOfSearchkeys; ctr++)
    {
        if (m_node->getSearchKeyExpressions()[ctr] == NULL)
        {
            VOLT_ERROR("The search key expression at position '%d' is NULL for"
                    " PlanNode '%s'", ctr, m_node->debug().c_str());
            return false;
        }
        m_searchKeyArrayPtr[ctr] =
                m_node->getSearchKeyExpressions()[ctr];
    }

    //output table should be temptable
    m_outputTable = static_cast<TempTable*>(m_node->getOutputTable());

    // The target table should be a persistent table.
    PersistentTable* targetTable = dynamic_cast<PersistentTable*>(m_node->getTargetTable());
    assert(targetTable);

    TableIndex *tableIndex = targetTable->index(m_node->getTargetIndexName());
    m_searchKeyBackingStore = new char[tableIndex->getKeySchema()->tupleLength()];

    // Grab the Index from our inner table
    // We'll throw an error if the index is missing
    VOLT_TRACE("Index key schema: '%s'", tableIndex->getKeySchema()->debug().c_str());
    //
    // Miscellanous Information
    //
    m_lookupType = m_node->getLookupType();
    m_sortDirection = m_node->getSortDirection();

    VOLT_DEBUG("IndexScan: %s.%s\n", targetTable->name().c_str(), tableIndex->getName().c_str());

    return true;
}
Beispiel #25
0
bool InsertExecutor::p_init(AbstractPlanNode* abstractNode,
                            const ExecutorVector& executorVector)
{
    VOLT_TRACE("init Insert Executor");

    m_node = dynamic_cast<InsertPlanNode*>(abstractNode);
    assert(m_node);
    assert(m_node->getTargetTable());
    assert(m_node->getInputTableCount() == (m_node->isInline() ? 0 : 1));

    Table* targetTable = m_node->getTargetTable();
    m_isUpsert = m_node->isUpsert();

    //
    // The insert node's input schema is fixed.  But
    // if this is an inline node we don't set it here.
    // We let the parent node set it in p_execute_init.
    //
    // Also, we don't want to set the input table for inline
    // insert nodes.
    //
    if ( ! m_node->isInline()) {
        setDMLCountOutputTable(executorVector.limits());
        m_inputTable = dynamic_cast<AbstractTempTable*>(m_node->getInputTable()); //input table should be temptable
        assert(m_inputTable);
    } else {
        m_inputTable = NULL;
    }

    // Target table can be StreamedTable or PersistentTable and must not be NULL
    PersistentTable *persistentTarget = dynamic_cast<PersistentTable*>(targetTable);
    m_partitionColumn = -1;
    StreamedTable *streamTarget = dynamic_cast<StreamedTable*>(targetTable);
    m_hasStreamView = false;
    if (streamTarget != NULL) {
        m_isStreamed = true;
        //See if we have any views.
        m_hasStreamView = streamTarget->hasViews();
        m_partitionColumn = streamTarget->partitionColumn();
    }
    if (m_isUpsert) {
        VOLT_TRACE("init Upsert Executor actually");
        assert( ! m_node->isInline() );
        if (m_isStreamed) {
            VOLT_ERROR("UPSERT is not supported for Stream table %s", targetTable->name().c_str());
        }
        // look up the tuple whether it exists already
        if (persistentTarget->primaryKeyIndex() == NULL) {
            VOLT_ERROR("No primary keys were found in our target table '%s'",
                    targetTable->name().c_str());
        }
    }

    if (persistentTarget) {
        m_partitionColumn = persistentTarget->partitionColumn();
        m_replicatedTableOperation = persistentTarget->isCatalogTableReplicated();
    }

    m_multiPartition = m_node->isMultiPartition();

    m_sourceIsPartitioned = m_node->sourceIsPartitioned();

    // allocate memory for template tuple, set defaults for all columns
    m_templateTupleStorage.init(targetTable->schema());


    TableTuple tuple = m_templateTupleStorage.tuple();

    std::set<int> fieldsExplicitlySet(m_node->getFieldMap().begin(), m_node->getFieldMap().end());
    // These default values are used for an INSERT including the INSERT sub-case of an UPSERT.
    // The defaults are purposely ignored in favor of existing column values
    // for the UPDATE subcase of an UPSERT.
    m_node->initTupleWithDefaultValues(m_engine,
                                       &m_memoryPool,
                                       fieldsExplicitlySet,
                                       tuple,
                                       m_nowFields);
    m_hasPurgeFragment = persistentTarget ? persistentTarget->hasPurgeFragment() : false;

    return true;
}
Beispiel #26
0
bool UpdateExecutor::p_execute(const NValueArray &params, ReadWriteTracker *tracker) {
    assert(m_inputTable);
    assert(m_targetTable);

    VOLT_TRACE("INPUT TABLE: %s\n", m_inputTable->debug().c_str());
    VOLT_TRACE("TARGET TABLE - BEFORE: %s\n", m_targetTable->debug().c_str());

    assert(m_inputTuple.sizeInValues() == m_inputTable->columnCount());
    assert(m_targetTuple.sizeInValues() == m_targetTable->columnCount());
    TableIterator input_iterator(m_inputTable);
    while (input_iterator.next(m_inputTuple)) {
        //
        // OPTIMIZATION: Single-Sited Query Plans
        // If our beloved UpdatePlanNode is apart of a single-site query plan,
        // then the first column in the input table will be the address of a
        // tuple on the target table that we will want to update. This saves us
        // the trouble of having to do an index lookup
        //
        void *target_address = m_inputTuple.getNValue(0).castAsAddress();
        m_targetTuple.move(target_address);
        
        // Read/Write Set Tracking
        if (tracker != NULL) {
            tracker->markTupleWritten(m_targetTable, &m_targetTuple);
        }

        // Loop through INPUT_COL_IDX->TARGET_COL_IDX mapping and only update
        // the values that we need to. The key thing to note here is that we
        // grab a temp tuple that is a copy of the target tuple (i.e., the tuple
        // we want to update). This insures that if the input tuple is somehow
        // bringing garbage with it, we're only going to copy what we really
        // need to into the target tuple.
        //
        TableTuple &tempTuple = m_targetTable->getTempTupleInlined(m_targetTuple);
        for (int map_ctr = 0; map_ctr < m_inputTargetMapSize; map_ctr++) {
            tempTuple.setNValue(m_inputTargetMap[map_ctr].second,
                                m_inputTuple.getNValue(m_inputTargetMap[map_ctr].first));
        }

        // if there is a partition column for the target table
        if (m_partitionColumn != -1) {
            // check for partition problems
            // get the value for the partition column
            NValue value = tempTuple.getNValue(m_partitionColumn);
            bool isLocal = m_engine->isLocalSite(value);

            // if it doesn't map to this site
            if (!isLocal) {
                VOLT_ERROR("Mispartitioned tuple in single-partition plan for"
                           " table '%s'", m_targetTable->name().c_str());
                return false;
            }
        }

        #ifdef ARIES
        if(m_engine->isARIESEnabled()){

            // add persistency check:
            PersistentTable* table = dynamic_cast<PersistentTable*>(m_targetTable);

            // only log if we are writing to a persistent table.
            if (table != NULL) {
                // before image -- target is old val with no updates
                // XXX: what about uninlined fields?
                // should we not be doing
                // m_targetTable->getTempTupleInlined(m_targetTuple); instead?
                TableTuple *beforeImage = &m_targetTuple;

                // after image -- temp is NEW, created using target and input
                TableTuple *afterImage = &tempTuple;

                TableTuple *keyTuple = NULL;
                char *keydata = NULL;
                std::vector<int32_t> modifiedCols;

                int32_t numCols = -1;

                // See if we can do better by using an index instead
                TableIndex *index = table->primaryKeyIndex();

                if (index != NULL) {
                    // First construct tuple for primary key
                    keydata = new char[index->getKeySchema()->tupleLength()];
                    keyTuple = new TableTuple(keydata, index->getKeySchema());

                    for (int i = 0; i < index->getKeySchema()->columnCount(); i++) {
                        keyTuple->setNValue(i, beforeImage->getNValue(index->getColumnIndices()[i]));
                    }

                    // no before image need be recorded, just the primary key
                    beforeImage = NULL;
                }

                // Set the modified column list
                numCols = m_inputTargetMapSize;

                modifiedCols.resize(m_inputTargetMapSize, -1);

                for (int map_ctr = 0; map_ctr < m_inputTargetMapSize; map_ctr++) {
                    // can't use column-id directly, otherwise we would go over vector bounds
                    int pos = m_inputTargetMap[map_ctr].first - 1;

                    modifiedCols.at(pos)
                    = m_inputTargetMap[map_ctr].second;
                }

                // Next, let the input tuple be the diff after image
                afterImage = &m_inputTuple;

                LogRecord *logrecord = new LogRecord(computeTimeStamp(),
                        LogRecord::T_UPDATE,// this is an update record
                        LogRecord::T_FORWARD,// the system is running normally
                        -1,// XXX: prevLSN must be fetched from table!
                        m_engine->getExecutorContext()->currentTxnId() ,// txn id
                        m_engine->getSiteId(),// which execution site
                        m_targetTable->name(),// the table affected
                        keyTuple,// primary key
                        numCols,
                        (numCols > 0) ? &modifiedCols : NULL,
                        beforeImage,
                        afterImage
                );

                size_t logrecordLength = logrecord->getEstimatedLength();
                char *logrecordBuffer = new char[logrecordLength];

                FallbackSerializeOutput output;
                output.initializeWithPosition(logrecordBuffer, logrecordLength, 0);

                logrecord->serializeTo(output);

                LogManager* m_logManager = this->m_engine->getLogManager();
                Logger m_ariesLogger = m_logManager->getAriesLogger();
                //VOLT_WARN("m_logManager : %p AriesLogger : %p",&m_logManager, &m_ariesLogger);
                const Logger *logger = m_logManager->getThreadLogger(LOGGERID_MM_ARIES);

                logger->log(LOGLEVEL_INFO, output.data(), output.position());

                delete[] logrecordBuffer;
                logrecordBuffer = NULL;

                delete logrecord;
                logrecord = NULL;

                if (keydata != NULL) {
                    delete[] keydata;
                    keydata = NULL;
                }

                if (keyTuple != NULL) {
                    delete keyTuple;
                    keyTuple = NULL;
                }
            }

        }
        #endif

        if (!m_targetTable->updateTuple(tempTuple, m_targetTuple,
                                        m_updatesIndexes)) {
            VOLT_INFO("Failed to update tuple from table '%s'",
                      m_targetTable->name().c_str());
            return false;
        }
    }

    VOLT_TRACE("TARGET TABLE - AFTER: %s\n", m_targetTable->debug().c_str());
    // TODO lets output result table here, not in result executor. same thing in
    // delete/insert

    // add to the planfragments count of modified tuples
    m_engine->m_tuplesModified += m_inputTable->activeTupleCount();

    return true;
}
Beispiel #27
0
bool InsertExecutor::p_execute(const NValueArray &params) {
    assert(m_node == dynamic_cast<InsertPlanNode*>(m_abstractNode));
    assert(m_node);
    assert(m_inputTable == dynamic_cast<TempTable*>(m_node->getInputTable()));
    assert(m_inputTable);

    // Target table can be StreamedTable or PersistentTable and must not be NULL
    // Update target table reference from table delegate
    Table* targetTable = m_node->getTargetTable();
    assert(targetTable);
    assert((targetTable == dynamic_cast<PersistentTable*>(targetTable)) ||
            (targetTable == dynamic_cast<StreamedTable*>(targetTable)));

    PersistentTable* persistentTable = m_isStreamed ?
        NULL : static_cast<PersistentTable*>(targetTable);
    TableTuple upsertTuple = TableTuple(targetTable->schema());

    VOLT_TRACE("INPUT TABLE: %s\n", m_inputTable->debug().c_str());

    // count the number of successful inserts
    int modifiedTuples = 0;

    Table* outputTable = m_node->getOutputTable();
    assert(outputTable);

    TableTuple templateTuple = m_templateTuple.tuple();

    std::vector<int>::iterator it;
    for (it = m_nowFields.begin(); it != m_nowFields.end(); ++it) {
        templateTuple.setNValue(*it, NValue::callConstant<FUNC_CURRENT_TIMESTAMP>());
    }

    VOLT_DEBUG("This is a %s-row insert on partition with id %d",
               m_node->getChildren()[0]->getPlanNodeType() == PLAN_NODE_TYPE_MATERIALIZE ?
               "single" : "multi", m_engine->getPartitionId());
    VOLT_DEBUG("Offset of partition column is %d", m_partitionColumn);

    //
    // An insert is quite simple really. We just loop through our m_inputTable
    // and insert any tuple that we find into our targetTable. It doesn't get any easier than that!
    //
    TableTuple inputTuple(m_inputTable->schema());
    assert (inputTuple.sizeInValues() == m_inputTable->columnCount());
    TableIterator iterator = m_inputTable->iterator();
    while (iterator.next(inputTuple)) {

        for (int i = 0; i < m_node->getFieldMap().size(); ++i) {
            // Most executors will just call setNValue instead of
            // setNValueAllocateForObjectCopies.
            //
            // However, We need to call
            // setNValueAlocateForObjectCopies here.  Sometimes the
            // input table's schema has an inlined string field, and
            // it's being assigned to the target table's outlined
            // string field.  In this case we need to tell the NValue
            // where to allocate the string data.
            templateTuple.setNValueAllocateForObjectCopies(m_node->getFieldMap()[i],
                                                           inputTuple.getNValue(i),
                                                           ExecutorContext::getTempStringPool());
        }

        VOLT_TRACE("Inserting tuple '%s' into target table '%s' with table schema: %s",
                   templateTuple.debug(targetTable->name()).c_str(), targetTable->name().c_str(),
                   targetTable->schema()->debug().c_str());

        // if there is a partition column for the target table
        if (m_partitionColumn != -1) {

            // get the value for the partition column
            NValue value = templateTuple.getNValue(m_partitionColumn);
            bool isLocal = m_engine->isLocalSite(value);

            // if it doesn't map to this site
            if (!isLocal) {
                if (!m_multiPartition) {
                    throw ConstraintFailureException(
                            dynamic_cast<PersistentTable*>(targetTable),
                            templateTuple,
                            "Mispartitioned tuple in single-partition insert statement.");
                }

                // don't insert
                continue;
            }
        }

        // for multi partition export tables, only insert into one
        // place (the partition with hash(0)), if the data is from a
        // replicated source.  If the data is coming from a subquery
        // with partitioned tables, we need to perform the insert on
        // every partition.
        if (m_isStreamed && m_multiPartition && !m_sourceIsPartitioned) {
            bool isLocal = m_engine->isLocalSite(ValueFactory::getBigIntValue(0));
            if (!isLocal) continue;
        }


        if (! m_isUpsert) {
            // try to put the tuple into the target table

            if (m_hasPurgeFragment) {
                if (!executePurgeFragmentIfNeeded(&persistentTable))
                    return false;
                // purge fragment might have truncated the table, and
                // refreshed the persistent table pointer.  Make sure to
                // use it when doing the insert below.
                targetTable = persistentTable;
            }

            if (!targetTable->insertTuple(templateTuple)) {
                VOLT_ERROR("Failed to insert tuple from input table '%s' into"
                           " target table '%s'",
                           m_inputTable->name().c_str(),
                           targetTable->name().c_str());
                return false;
            }

        } else {
            // upsert execution logic
            assert(persistentTable->primaryKeyIndex() != NULL);
            TableTuple existsTuple = persistentTable->lookupTupleByValues(templateTuple);

            if (existsTuple.isNullTuple()) {
                // try to put the tuple into the target table

                if (m_hasPurgeFragment) {
                    if (!executePurgeFragmentIfNeeded(&persistentTable))
                        return false;
                }

                if (!persistentTable->insertTuple(templateTuple)) {
                    VOLT_ERROR("Failed to insert tuple from input table '%s' into"
                               " target table '%s'",
                               m_inputTable->name().c_str(),
                               persistentTable->name().c_str());
                    return false;
                }
            } else {
                // tuple exists already, try to update the tuple instead
                upsertTuple.move(templateTuple.address());
                TableTuple &tempTuple = persistentTable->getTempTupleInlined(upsertTuple);

                if (!persistentTable->updateTupleWithSpecificIndexes(existsTuple, tempTuple,
                        persistentTable->allIndexes())) {
                    VOLT_INFO("Failed to update existsTuple from table '%s'",
                            persistentTable->name().c_str());
                    return false;
                }
            }
        }

        // successfully inserted or updated
        modifiedTuples++;
    }

    TableTuple& count_tuple = outputTable->tempTuple();
    count_tuple.setNValue(0, ValueFactory::getBigIntValue(modifiedTuples));
    // try to put the tuple into the output table
    if (!outputTable->insertTuple(count_tuple)) {
        VOLT_ERROR("Failed to insert tuple count (%d) into"
                   " output table '%s'",
                   modifiedTuples,
                   outputTable->name().c_str());
        return false;
    }

    // add to the planfragments count of modified tuples
    m_engine->addToTuplesModified(modifiedTuples);
    VOLT_DEBUG("Finished inserting %d tuples", modifiedTuples);
    return true;
}
Beispiel #28
0
bool DeleteExecutor::p_execute(const NValueArray &params) {
    // target table should be persistenttable
    // update target table reference from table delegate
    PersistentTable* targetTable = dynamic_cast<PersistentTable*>(m_node->getTargetTable());
    assert(targetTable);
    TableTuple targetTuple(targetTable->schema());

    int64_t modified_tuples = 0;

    if (m_truncate) {
        VOLT_TRACE("truncating table %s...", targetTable->name().c_str());
        // count the truncated tuples as deleted
        modified_tuples = targetTable->visibleTupleCount();

        VOLT_TRACE("Delete all rows from table : %s with %d active, %d visible, %d allocated",
                   targetTable->name().c_str(),
                   (int)targetTable->activeTupleCount(),
                   (int)targetTable->visibleTupleCount(),
                   (int)targetTable->allocatedTupleCount());

        // empty the table either by table swap or iteratively deleting tuple-by-tuple
        targetTable->truncateTable(m_engine);
    }
    else {
        assert(m_inputTable);
        assert(m_inputTuple.sizeInValues() == m_inputTable->columnCount());
        assert(targetTuple.sizeInValues() == targetTable->columnCount());
        TableIterator inputIterator = m_inputTable->iterator();
        while (inputIterator.next(m_inputTuple)) {
            //
            // OPTIMIZATION: Single-Sited Query Plans
            // If our beloved DeletePlanNode is apart of a single-site query plan,
            // then the first column in the input table will be the address of a
            // tuple on the target table that we will want to blow away. This saves
            // us the trouble of having to do an index lookup
            //
            void *targetAddress = m_inputTuple.getNValue(0).castAsAddress();
            targetTuple.move(targetAddress);

            // Delete from target table
            if (!targetTable->deleteTuple(targetTuple, true)) {
                VOLT_ERROR("Failed to delete tuple from table '%s'",
                           targetTable->name().c_str());
                return false;
            }
        }
        modified_tuples = m_inputTable->tempTableTupleCount();
        VOLT_TRACE("Deleted %d rows from table : %s with %d active, %d visible, %d allocated",
                   (int)modified_tuples,
                   targetTable->name().c_str(),
                   (int)targetTable->activeTupleCount(),
                   (int)targetTable->visibleTupleCount(),
                   (int)targetTable->allocatedTupleCount());

    }

    TableTuple& count_tuple = m_node->getOutputTable()->tempTuple();
    count_tuple.setNValue(0, ValueFactory::getBigIntValue(modified_tuples));
    // try to put the tuple into the output table
    if (!m_node->getOutputTable()->insertTuple(count_tuple)) {
        VOLT_ERROR("Failed to insert tuple count (%ld) into"
                   " output table '%s'",
                   static_cast<long int>(modified_tuples),
                   m_node->getOutputTable()->name().c_str());
        return false;
    }
    m_engine->addToTuplesModified(modified_tuples);

    return true;
}
Beispiel #29
0
/**
 * Generate hash value for key.
 */
ElasticHash ElasticIndex::generateHash(const PersistentTable &table, const TableTuple &tuple)
{
    return tuple.getNValue(table.partitionColumn()).murmurHash3();
}
Beispiel #30
0
bool IndexScanExecutor::p_execute(const NValueArray &params)
{
    assert(m_node);
    assert(m_node == dynamic_cast<IndexScanPlanNode*>(m_abstractNode));

    // update local target table with its most recent reference
    // The target table should be a persistent table.
    assert(dynamic_cast<PersistentTable*>(m_node->getTargetTable()));
    PersistentTable* targetTable = static_cast<PersistentTable*>(m_node->getTargetTable());

    TableIndex *tableIndex = targetTable->index(m_node->getTargetIndexName());
    IndexCursor indexCursor(tableIndex->getTupleSchema());

    TableTuple searchKey(tableIndex->getKeySchema());
    searchKey.moveNoHeader(m_searchKeyBackingStore);

    assert(m_lookupType != INDEX_LOOKUP_TYPE_EQ ||
            searchKey.getSchema()->columnCount() == m_numOfSearchkeys);

    int activeNumOfSearchKeys = m_numOfSearchkeys;
    IndexLookupType localLookupType = m_lookupType;
    SortDirectionType localSortDirection = m_sortDirection;

    //
    // INLINE LIMIT
    //
    LimitPlanNode* limit_node = dynamic_cast<LimitPlanNode*>(m_abstractNode->getInlinePlanNode(PLAN_NODE_TYPE_LIMIT));
    int limit = CountingPostfilter::NO_LIMIT;
    int offset = CountingPostfilter::NO_OFFSET;
    if (limit_node != NULL) {
        limit_node->getLimitAndOffsetByReference(params, limit, offset);
    }

    //
    // POST EXPRESSION
    //
    AbstractExpression* post_expression = m_node->getPredicate();
    if (post_expression != NULL) {
        VOLT_DEBUG("Post Expression:\n%s", post_expression->debug(true).c_str());
    }

    // Initialize the postfilter
    CountingPostfilter postfilter(m_outputTable, post_expression, limit, offset);

    TableTuple temp_tuple;
    ProgressMonitorProxy pmp(m_engine->getExecutorContext(), this);
    if (m_aggExec != NULL) {
        const TupleSchema * inputSchema = tableIndex->getTupleSchema();
        if (m_projectionNode != NULL) {
            inputSchema = m_projectionNode->getOutputTable()->schema();
        }
        temp_tuple = m_aggExec->p_execute_init(params, &pmp, inputSchema, m_outputTable, &postfilter);
    } else {
        temp_tuple = m_outputTable->tempTuple();
    }

    // Short-circuit an empty scan
    if (m_node->isEmptyScan()) {
        VOLT_DEBUG ("Empty Index Scan :\n %s", m_outputTable->debug().c_str());
        if (m_aggExec != NULL) {
            m_aggExec->p_execute_finish();
        }
        return true;
    }

    //
    // SEARCH KEY
    //
    bool earlyReturnForSearchKeyOutOfRange = false;

    searchKey.setAllNulls();
    VOLT_TRACE("Initial (all null) search key: '%s'", searchKey.debugNoHeader().c_str());

    for (int ctr = 0; ctr < activeNumOfSearchKeys; ctr++) {
        NValue candidateValue = m_searchKeyArray[ctr]->eval(NULL, NULL);
        if (candidateValue.isNull()) {
            // when any part of the search key is NULL, the result is false when it compares to anything.
            // do early return optimization, our index comparator may not handle null comparison correctly.
            earlyReturnForSearchKeyOutOfRange = true;
            break;
        }

        try {
            searchKey.setNValue(ctr, candidateValue);
        }
        catch (const SQLException &e) {
            // This next bit of logic handles underflow, overflow and search key length
            // exceeding variable length column size (variable lenght mismatch) when
            // setting up the search keys.
            // e.g. TINYINT > 200 or INT <= 6000000000
            // VarChar(3 bytes) < "abcd" or VarChar(3) > "abbd"

            // re-throw if not an overflow, underflow or variable length mismatch
            // currently, it's expected to always be an overflow or underflow
            if ((e.getInternalFlags() & (SQLException::TYPE_OVERFLOW | SQLException::TYPE_UNDERFLOW | SQLException::TYPE_VAR_LENGTH_MISMATCH)) == 0) {
                throw e;
            }

            // handle the case where this is a comparison, rather than equality match
            // comparison is the only place where the executor might return matching tuples
            // e.g. TINYINT < 1000 should return all values
            if ((localLookupType != INDEX_LOOKUP_TYPE_EQ) &&
                    (ctr == (activeNumOfSearchKeys - 1))) {

                if (e.getInternalFlags() & SQLException::TYPE_OVERFLOW) {
                    if ((localLookupType == INDEX_LOOKUP_TYPE_GT) ||
                            (localLookupType == INDEX_LOOKUP_TYPE_GTE)) {

                        // gt or gte when key overflows returns nothing except inline agg
                        earlyReturnForSearchKeyOutOfRange = true;
                        break;
                    }
                    else {
                        // for overflow on reverse scan, we need to
                        // do a forward scan to find the correct start
                        // point, which is exactly what LTE would do.
                        // so, set the lookupType to LTE and the missing
                        // searchkey will be handled by extra post filters
                        localLookupType = INDEX_LOOKUP_TYPE_LTE;
                    }
                }
                if (e.getInternalFlags() & SQLException::TYPE_UNDERFLOW) {
                    if ((localLookupType == INDEX_LOOKUP_TYPE_LT) ||
                            (localLookupType == INDEX_LOOKUP_TYPE_LTE)) {

                        // lt or lte when key underflows returns nothing except inline agg
                        earlyReturnForSearchKeyOutOfRange = true;
                        break;
                    }
                    else {
                        // don't allow GTE because it breaks null handling
                        localLookupType = INDEX_LOOKUP_TYPE_GT;
                    }
                }
                if (e.getInternalFlags() & SQLException::TYPE_VAR_LENGTH_MISMATCH) {
                    // shrink the search key and add the updated key to search key table tuple
                    searchKey.shrinkAndSetNValue(ctr, candidateValue);
                    // search will be performed on shrinked key, so update lookup operation
                    // to account for it
                    switch (localLookupType) {
                        case INDEX_LOOKUP_TYPE_LT:
                        case INDEX_LOOKUP_TYPE_LTE:
                            localLookupType = INDEX_LOOKUP_TYPE_LTE;
                            break;
                        case INDEX_LOOKUP_TYPE_GT:
                        case INDEX_LOOKUP_TYPE_GTE:
                            localLookupType = INDEX_LOOKUP_TYPE_GT;
                            break;
                        default:
                            assert(!"IndexScanExecutor::p_execute - can't index on not equals");
                            return false;
                    }
                }

                // if here, means all tuples with the previous searchkey
                // columns need to be scanned. Note, if only one column,
                // then all tuples will be scanned. Only exception to this
                // case is setting of search key in search tuple was due
                // to search key length exceeding the search column length
                // of variable length type
                if (!(e.getInternalFlags() & SQLException::TYPE_VAR_LENGTH_MISMATCH)) {
                    // for variable length mismatch error, the needed search key to perform the search
                    // has been generated and added to the search tuple. So no need to decrement
                    // activeNumOfSearchKeys
                    activeNumOfSearchKeys--;
                }
                if (localSortDirection == SORT_DIRECTION_TYPE_INVALID) {
                    localSortDirection = SORT_DIRECTION_TYPE_ASC;
                }
            }
            // if a EQ comparison is out of range, then return no tuples
            else {
                earlyReturnForSearchKeyOutOfRange = true;
                break;
            }
            break;
        }
    }

    if (earlyReturnForSearchKeyOutOfRange) {
        if (m_aggExec != NULL) {
            m_aggExec->p_execute_finish();
        }
        return true;
    }

    assert((activeNumOfSearchKeys == 0) || (searchKey.getSchema()->columnCount() > 0));
    VOLT_TRACE("Search key after substitutions: '%s', # of active search keys: %d", searchKey.debugNoHeader().c_str(), activeNumOfSearchKeys);

    //
    // END EXPRESSION
    //
    AbstractExpression* end_expression = m_node->getEndExpression();
    if (end_expression != NULL) {
        VOLT_DEBUG("End Expression:\n%s", end_expression->debug(true).c_str());
    }

    // INITIAL EXPRESSION
    AbstractExpression* initial_expression = m_node->getInitialExpression();
    if (initial_expression != NULL) {
        VOLT_DEBUG("Initial Expression:\n%s", initial_expression->debug(true).c_str());
    }

    //
    // SKIP NULL EXPRESSION
    //
    AbstractExpression* skipNullExpr = m_node->getSkipNullPredicate();
    // For reverse scan edge case NULL values and forward scan underflow case.
    if (skipNullExpr != NULL) {
        VOLT_DEBUG("COUNT NULL Expression:\n%s", skipNullExpr->debug(true).c_str());
    }

    //
    // An index scan has three parts:
    //  (1) Lookup tuples using the search key
    //  (2) For each tuple that comes back, check whether the
    //  end_expression is false.
    //  If it is, then we stop scanning. Otherwise...
    //  (3) Check whether the tuple satisfies the post expression.
    //      If it does, then add it to the output table
    //
    // Use our search key to prime the index iterator
    // Now loop through each tuple given to us by the iterator
    //

    TableTuple tuple;
    if (activeNumOfSearchKeys > 0) {
        VOLT_TRACE("INDEX_LOOKUP_TYPE(%d) m_numSearchkeys(%d) key:%s",
                localLookupType, activeNumOfSearchKeys, searchKey.debugNoHeader().c_str());

        if (localLookupType == INDEX_LOOKUP_TYPE_EQ) {
            tableIndex->moveToKey(&searchKey, indexCursor);
        }
        else if (localLookupType == INDEX_LOOKUP_TYPE_GT) {
            tableIndex->moveToGreaterThanKey(&searchKey, indexCursor);
        }
        else if (localLookupType == INDEX_LOOKUP_TYPE_GTE) {
            tableIndex->moveToKeyOrGreater(&searchKey, indexCursor);
        }
        else if (localLookupType == INDEX_LOOKUP_TYPE_LT) {
            tableIndex->moveToLessThanKey(&searchKey, indexCursor);
        }
        else if (localLookupType == INDEX_LOOKUP_TYPE_LTE) {
            // find the entry whose key is greater than search key,
            // do a forward scan using initialExpr to find the correct
            // start point to do reverse scan
            bool isEnd = tableIndex->moveToGreaterThanKey(&searchKey, indexCursor);
            if (isEnd) {
                tableIndex->moveToEnd(false, indexCursor);
            }
            else {
                while (!(tuple = tableIndex->nextValue(indexCursor)).isNullTuple()) {
                    pmp.countdownProgress();
                    if (initial_expression != NULL && !initial_expression->eval(&tuple, NULL).isTrue()) {
                        // just passed the first failed entry, so move 2 backward
                        tableIndex->moveToBeforePriorEntry(indexCursor);
                        break;
                    }
                }
                if (tuple.isNullTuple()) {
                    tableIndex->moveToEnd(false, indexCursor);
                }
            }
        }
        else if (localLookupType == INDEX_LOOKUP_TYPE_GEO_CONTAINS) {
            tableIndex->moveToCoveringCell(&searchKey, indexCursor);
        }
        else {
            return false;
        }
    }
    else {
        bool toStartActually = (localSortDirection != SORT_DIRECTION_TYPE_DESC);
        tableIndex->moveToEnd(toStartActually, indexCursor);
    }

    //
    // We have to different nextValue() methods for different lookup types
    //
    while (postfilter.isUnderLimit() &&
           getNextTuple(localLookupType,
                        &tuple,
                        tableIndex,
                        &indexCursor,
                        activeNumOfSearchKeys)) {
        if (tuple.isPendingDelete()) {
            continue;
        }
        VOLT_TRACE("LOOPING in indexscan: tuple: '%s'\n", tuple.debug("tablename").c_str());

        pmp.countdownProgress();
        //
        // First check to eliminate the null index rows for UNDERFLOW case only
        //
        if (skipNullExpr != NULL) {
            if (skipNullExpr->eval(&tuple, NULL).isTrue()) {
                VOLT_DEBUG("Index scan: find out null rows or columns.");
                continue;
            } else {
                skipNullExpr = NULL;
            }
        }
        //
        // First check whether the end_expression is now false
        //
        if (end_expression != NULL && !end_expression->eval(&tuple, NULL).isTrue()) {
            VOLT_TRACE("End Expression evaluated to false, stopping scan");
            break;
        }
        //
        // Then apply our post-predicate and LIMIT/OFFSET to do further filtering
        //
        if (postfilter.eval(&tuple, NULL)) {

            if (m_projector.numSteps() > 0) {
                m_projector.exec(temp_tuple, tuple);
                outputTuple(postfilter, temp_tuple);
            }
            else {
                outputTuple(postfilter, tuple);
            }
            pmp.countdownProgress();
        }
    }

    if (m_aggExec != NULL) {
        m_aggExec->p_execute_finish();
    }


    VOLT_DEBUG ("Index Scanned :\n %s", m_outputTable->debug().c_str());
    return true;
}