TEST_F(TableTupleFilterTest, tableTupleFilterTest) { static const int MARKER = 33; TempTable* table = getTempTable(); TableTupleFilter tableFilter; tableFilter.init(table); int tuplePerBlock = table->getTuplesPerBlock(); // make sure table spans more than one block ASSERT_TRUE(NUM_OF_TUPLES / tuplePerBlock > 1); TableTuple tuple = table->tempTuple(); TableIterator iterator = table->iterator(); // iterator over and mark every 5th tuple int counter = 0; std::multiset<int64_t> control_values; while(iterator.next(tuple)) { if (++counter % 5 == 0) { NValue nvalue = tuple.getNValue(1); int64_t value = ValuePeeker::peekBigInt(nvalue); control_values.insert(value); tableFilter.updateTuple(tuple, MARKER); } } TableTupleFilter_iter<MARKER> endItr = tableFilter.end<MARKER>(); for (TableTupleFilter_iter<MARKER> itr = tableFilter.begin<MARKER>(); itr != endItr; ++itr) { uint64_t tupleAddr = tableFilter.getTupleAddress(*itr); tuple.move((char *)tupleAddr); ASSERT_TRUE(tuple.isActive()); NValue nvalue = tuple.getNValue(1); int64_t value = ValuePeeker::peekBigInt(nvalue); ASSERT_FALSE(control_values.empty()); auto it = control_values.find(value); ASSERT_NE(it, control_values.end()); control_values.erase(it); } ASSERT_TRUE(control_values.empty()); }
bool NestLoopIndexExecutor::p_execute(const NValueArray ¶ms) { assert(dynamic_cast<NestLoopIndexPlanNode*>(m_abstractNode)); NestLoopIndexPlanNode* node = static_cast<NestLoopIndexPlanNode*>(m_abstractNode); // output table must be a temp table assert(m_tmpOutputTable); // target table is a persistent table assert(dynamic_cast<PersistentTable*>(m_indexNode->getTargetTable())); PersistentTable* inner_table = static_cast<PersistentTable*>(m_indexNode->getTargetTable()); TableIndex* index = inner_table->index(m_indexNode->getTargetIndexName()); assert(index); IndexCursor indexCursor(index->getTupleSchema()); //outer_table is the input table that have tuples to be iterated assert(node->getInputTableCount() == 1); Table* outer_table = node->getInputTable(); assert(outer_table); VOLT_TRACE("executing NestLoopIndex with outer table: %s, inner table: %s", outer_table->debug().c_str(), inner_table->debug().c_str()); // // Substitute parameter to SEARCH KEY Note that the expressions // will include TupleValueExpression even after this substitution // int num_of_searchkeys = static_cast <int> (m_indexNode->getSearchKeyExpressions().size()); for (int ctr = 0; ctr < num_of_searchkeys; ctr++) { VOLT_TRACE("Search Key[%d]:\n%s", ctr, m_indexNode->getSearchKeyExpressions()[ctr]->debug(true).c_str()); } // end expression AbstractExpression* end_expression = m_indexNode->getEndExpression(); if (end_expression) { VOLT_TRACE("End Expression:\n%s", end_expression->debug(true).c_str()); } // post expression AbstractExpression* post_expression = m_indexNode->getPredicate(); if (post_expression != NULL) { VOLT_TRACE("Post Expression:\n%s", post_expression->debug(true).c_str()); } // initial expression AbstractExpression* initial_expression = m_indexNode->getInitialExpression(); if (initial_expression != NULL) { VOLT_TRACE("Initial Expression:\n%s", initial_expression->debug(true).c_str()); } // SKIP NULL EXPRESSION AbstractExpression* skipNullExpr = m_indexNode->getSkipNullPredicate(); // For reverse scan edge case NULL values and forward scan underflow case. if (skipNullExpr != NULL) { VOLT_DEBUG("Skip NULL Expression:\n%s", skipNullExpr->debug(true).c_str()); } // pre join expression AbstractExpression* prejoin_expression = node->getPreJoinPredicate(); if (prejoin_expression != NULL) { VOLT_TRACE("Prejoin Expression:\n%s", prejoin_expression->debug(true).c_str()); } // where expression AbstractExpression* where_expression = node->getWherePredicate(); if (where_expression != NULL) { VOLT_TRACE("Where Expression:\n%s", where_expression->debug(true).c_str()); } LimitPlanNode* limit_node = dynamic_cast<LimitPlanNode*>(node->getInlinePlanNode(PLAN_NODE_TYPE_LIMIT)); int limit = CountingPostfilter::NO_LIMIT; int offset = CountingPostfilter::NO_OFFSET; if (limit_node) { limit_node->getLimitAndOffsetByReference(params, limit, offset); } // Init the postfilter CountingPostfilter postfilter(m_tmpOutputTable, where_expression, limit, offset); // // OUTER TABLE ITERATION // TableTuple outer_tuple(outer_table->schema()); TableTuple inner_tuple(inner_table->schema()); TableIterator outer_iterator = outer_table->iteratorDeletingAsWeGo(); int num_of_outer_cols = outer_table->columnCount(); assert (outer_tuple.sizeInValues() == outer_table->columnCount()); assert (inner_tuple.sizeInValues() == inner_table->columnCount()); const TableTuple &null_inner_tuple = m_null_inner_tuple.tuple(); ProgressMonitorProxy pmp(m_engine->getExecutorContext(), this); // The table filter to keep track of inner tuples that don't match any of outer tuples for FULL joins TableTupleFilter innerTableFilter; if (m_joinType == JOIN_TYPE_FULL) { // Prepopulate the set with all inner tuples innerTableFilter.init(inner_table); } TableTuple join_tuple; // It's not immediately obvious here, so there's some subtlety to // note with respect to the schema of the join_tuple. // // The inner_tuple is used to represent the values from the inner // table in the case of the join predicate passing, and for left // outer joins, the null_tuple is used if there is no match. Both // of these tuples include the complete schema of the table being // scanned. The inner table is being scanned via an inlined scan // node, so there is no temp table corresponding to it. // // Predicates that are evaluated against the inner table should // therefore use the complete schema of the table being scanned. // // The join_tuple is the tuple that contains the values that we // actually want to put in the output of the join (or to aggregate // if there is an inlined agg plan node). This tuple needs to // omit the unused columns from the inner table. The inlined // index scan itself has an inlined project node that defines the // columns that should be output by the join, and omits those that // are not needed. So the join_tuple contains the columns we're // using from the outer table, followed by the "projected" schema // for the inlined scan of the inner table. if (m_aggExec != NULL) { VOLT_TRACE("Init inline aggregate..."); const TupleSchema * aggInputSchema = node->getTupleSchemaPreAgg(); join_tuple = m_aggExec->p_execute_init(params, &pmp, aggInputSchema, m_tmpOutputTable, &postfilter); } else { join_tuple = m_tmpOutputTable->tempTuple(); } VOLT_TRACE("<num_of_outer_cols>: %d\n", num_of_outer_cols); while (postfilter.isUnderLimit() && outer_iterator.next(outer_tuple)) { VOLT_TRACE("outer_tuple:%s", outer_tuple.debug(outer_table->name()).c_str()); pmp.countdownProgress(); // Set the join tuple columns that originate solely from the outer tuple. // Must be outside the inner loop in case of the empty inner table. join_tuple.setNValues(0, outer_tuple, 0, num_of_outer_cols); // did this loop body find at least one match for this tuple? bool outerMatch = false; // For outer joins if outer tuple fails pre-join predicate // (join expression based on the outer table only) // it can't match any of inner tuples if (prejoin_expression == NULL || prejoin_expression->eval(&outer_tuple, NULL).isTrue()) { int activeNumOfSearchKeys = num_of_searchkeys; VOLT_TRACE ("<Nested Loop Index exec, WHILE-LOOP...> Number of searchKeys: %d \n", num_of_searchkeys); IndexLookupType localLookupType = m_lookupType; SortDirectionType localSortDirection = m_sortDirection; VOLT_TRACE("Lookup type: %d\n", m_lookupType); VOLT_TRACE("SortDirectionType: %d\n", m_sortDirection); // did setting the search key fail (usually due to overflow) bool keyException = false; // // Now use the outer table tuple to construct the search key // against the inner table // const TableTuple& index_values = m_indexValues.tuple(); index_values.setAllNulls(); for (int ctr = 0; ctr < activeNumOfSearchKeys; ctr++) { // in a normal index scan, params would be substituted here, // but this scan fills in params outside the loop NValue candidateValue = m_indexNode->getSearchKeyExpressions()[ctr]->eval(&outer_tuple, NULL); if (candidateValue.isNull()) { // when any part of the search key is NULL, the result is false when it compares to anything. // do early return optimization, our index comparator may not handle null comparison correctly. keyException = true; break; } try { index_values.setNValue(ctr, candidateValue); } catch (const SQLException &e) { // This next bit of logic handles underflow and overflow while // setting up the search keys. // e.g. TINYINT > 200 or INT <= 6000000000 // re-throw if not an overflow or underflow // currently, it's expected to always be an overflow or underflow if ((e.getInternalFlags() & (SQLException::TYPE_OVERFLOW | SQLException::TYPE_UNDERFLOW | SQLException::TYPE_VAR_LENGTH_MISMATCH)) == 0) { throw e; } // handle the case where this is a comparison, rather than equality match // comparison is the only place where the executor might return matching tuples // e.g. TINYINT < 1000 should return all values if ((localLookupType != INDEX_LOOKUP_TYPE_EQ) && (ctr == (activeNumOfSearchKeys - 1))) { if (e.getInternalFlags() & SQLException::TYPE_OVERFLOW) { if ((localLookupType == INDEX_LOOKUP_TYPE_GT) || (localLookupType == INDEX_LOOKUP_TYPE_GTE)) { // gt or gte when key overflows breaks out // and only returns for left-outer keyException = true; break; // the outer while loop } else { // overflow of LT or LTE should be treated as LTE // to issue an "initial" forward scan localLookupType = INDEX_LOOKUP_TYPE_LTE; } } if (e.getInternalFlags() & SQLException::TYPE_UNDERFLOW) { if ((localLookupType == INDEX_LOOKUP_TYPE_LT) || (localLookupType == INDEX_LOOKUP_TYPE_LTE)) { // overflow of LT or LTE should be treated as LTE // to issue an "initial" forward scans localLookupType = INDEX_LOOKUP_TYPE_LTE; } else { // don't allow GTE because it breaks null handling localLookupType = INDEX_LOOKUP_TYPE_GT; } } if (e.getInternalFlags() & SQLException::TYPE_VAR_LENGTH_MISMATCH) { // shrink the search key and add the updated key to search key table tuple index_values.shrinkAndSetNValue(ctr, candidateValue); // search will be performed on shrinked key, so update lookup operation // to account for it switch (localLookupType) { case INDEX_LOOKUP_TYPE_LT: case INDEX_LOOKUP_TYPE_LTE: localLookupType = INDEX_LOOKUP_TYPE_LTE; break; case INDEX_LOOKUP_TYPE_GT: case INDEX_LOOKUP_TYPE_GTE: localLookupType = INDEX_LOOKUP_TYPE_GT; break; default: assert(!"IndexScanExecutor::p_execute - can't index on not equals"); return false; } } // if here, means all tuples with the previous searchkey // columns need to be scaned. activeNumOfSearchKeys--; if (localSortDirection == SORT_DIRECTION_TYPE_INVALID) { localSortDirection = SORT_DIRECTION_TYPE_ASC; } } // if a EQ comparison is out of range, then the tuple from // the outer loop returns no matches (except left-outer) else { keyException = true; } break; } // End catch block for under- or overflow when setting index key } // End for each active search key VOLT_TRACE("Searching %s", index_values.debug("").c_str()); // if a search value didn't fit into the targeted index key, skip this key if (!keyException) { // // Our index scan on the inner table is going to have three parts: // (1) Lookup tuples using the search key // // (2) For each tuple that comes back, check whether the // end_expression is false. If it is, then we stop // scanning. Otherwise... // // (3) Check whether the tuple satisfies the post expression. // If it does, then add it to the output table // // Use our search key to prime the index iterator // The loop through each tuple given to us by the iterator // // Essentially cut and pasted this if ladder from // index scan executor if (num_of_searchkeys > 0) { if (localLookupType == INDEX_LOOKUP_TYPE_EQ) { index->moveToKey(&index_values, indexCursor); } else if (localLookupType == INDEX_LOOKUP_TYPE_GT) { index->moveToGreaterThanKey(&index_values, indexCursor); } else if (localLookupType == INDEX_LOOKUP_TYPE_GTE) { index->moveToKeyOrGreater(&index_values, indexCursor); } else if (localLookupType == INDEX_LOOKUP_TYPE_LT) { index->moveToLessThanKey(&index_values, indexCursor); } else if (localLookupType == INDEX_LOOKUP_TYPE_LTE) { // find the entry whose key is greater than search key, // do a forward scan using initialExpr to find the correct // start point to do reverse scan bool isEnd = index->moveToGreaterThanKey(&index_values, indexCursor); if (isEnd) { index->moveToEnd(false, indexCursor); } else { while (!(inner_tuple = index->nextValue(indexCursor)).isNullTuple()) { pmp.countdownProgress(); if (initial_expression != NULL && !initial_expression->eval(&outer_tuple, &inner_tuple).isTrue()) { // just passed the first failed entry, so move 2 backward index->moveToBeforePriorEntry(indexCursor); break; } } if (inner_tuple.isNullTuple()) { index->moveToEnd(false, indexCursor); } } } else if (localLookupType == INDEX_LOOKUP_TYPE_GEO_CONTAINS) { index->moveToCoveringCell(&index_values, indexCursor); } else { return false; } } else { bool toStartActually = (localSortDirection != SORT_DIRECTION_TYPE_DESC); index->moveToEnd(toStartActually, indexCursor); } AbstractExpression* skipNullExprIteration = skipNullExpr; while (postfilter.isUnderLimit() && IndexScanExecutor::getNextTuple(localLookupType, &inner_tuple, index, &indexCursor, num_of_searchkeys)) { if (inner_tuple.isPendingDelete()) { continue; } VOLT_TRACE("inner_tuple:%s", inner_tuple.debug(inner_table->name()).c_str()); pmp.countdownProgress(); // // First check to eliminate the null index rows for UNDERFLOW case only // if (skipNullExprIteration != NULL) { if (skipNullExprIteration->eval(&outer_tuple, &inner_tuple).isTrue()) { VOLT_DEBUG("Index scan: find out null rows or columns."); continue; } skipNullExprIteration = NULL; } // // First check whether the end_expression is now false // if (end_expression != NULL && !end_expression->eval(&outer_tuple, &inner_tuple).isTrue()) { VOLT_TRACE("End Expression evaluated to false, stopping scan\n"); break; } // // Then apply our post-predicate to do further filtering // if (post_expression == NULL || post_expression->eval(&outer_tuple, &inner_tuple).isTrue()) { outerMatch = true; // The inner tuple passed the join conditions if (m_joinType == JOIN_TYPE_FULL) { // Mark inner tuple as matched innerTableFilter.updateTuple(inner_tuple, MATCHED_TUPLE); } // Still need to pass where filtering if (postfilter.eval(&outer_tuple, &inner_tuple)) { // // Try to put the tuple into our output table // Append the inner values to the end of our join tuple // for (int col_ctr = num_of_outer_cols; col_ctr < join_tuple.sizeInValues(); ++col_ctr) { join_tuple.setNValue(col_ctr, m_outputExpressions[col_ctr]->eval(&outer_tuple, &inner_tuple)); } VOLT_TRACE("join_tuple tuple: %s", join_tuple.debug(m_tmpOutputTable->name()).c_str()); VOLT_TRACE("MATCH: %s", join_tuple.debug(m_tmpOutputTable->name()).c_str()); outputTuple(postfilter, join_tuple, pmp); } } } // END INNER WHILE LOOP } // END IF INDEX KEY EXCEPTION CONDITION } // END IF PRE JOIN CONDITION // // Left/Full Outer Join // if (m_joinType != JOIN_TYPE_INNER && !outerMatch && postfilter.isUnderLimit()) { // Still needs to pass the filter if (postfilter.eval(&outer_tuple, &null_inner_tuple)) { // Matched! Complete the joined tuple with null inner column values. for (int col_ctr = num_of_outer_cols; col_ctr < join_tuple.sizeInValues(); ++col_ctr) { join_tuple.setNValue(col_ctr, m_outputExpressions[col_ctr]->eval(&outer_tuple, &null_inner_tuple)); } outputTuple(postfilter, join_tuple, pmp); } } } // END OUTER WHILE LOOP // // FULL Outer Join. Iterate over the unmatched inner tuples // if (m_joinType == JOIN_TYPE_FULL && postfilter.isUnderLimit()) { // Preset outer columns to null const TableTuple& null_outer_tuple = m_null_outer_tuple.tuple(); join_tuple.setNValues(0, null_outer_tuple, 0, num_of_outer_cols); TableTupleFilter_iter<UNMATCHED_TUPLE> endItr = innerTableFilter.end<UNMATCHED_TUPLE>(); for (TableTupleFilter_iter<UNMATCHED_TUPLE> itr = innerTableFilter.begin<UNMATCHED_TUPLE>(); itr != endItr && postfilter.isUnderLimit(); ++itr) { // Restore the tuple value uint64_t tupleAddr = innerTableFilter.getTupleAddress(*itr); inner_tuple.move((char *)tupleAddr); // Still needs to pass the filter assert(inner_tuple.isActive()); if (postfilter.eval(&null_outer_tuple, &inner_tuple)) { // Passed! Complete the joined tuple with the inner column values. for (int col_ctr = num_of_outer_cols; col_ctr < join_tuple.sizeInValues(); ++col_ctr) { join_tuple.setNValue(col_ctr, m_outputExpressions[col_ctr]->eval(&null_outer_tuple, &inner_tuple)); } outputTuple(postfilter, join_tuple, pmp); } } } if (m_aggExec != NULL) { m_aggExec->p_execute_finish(); } VOLT_TRACE ("result table:\n %s", m_tmpOutputTable->debug().c_str()); VOLT_TRACE("Finished NestLoopIndex"); cleanupInputTempTable(inner_table); cleanupInputTempTable(outer_table); return (true); }