Example #1
0
bool AggregateExecutorBase::p_init(AbstractPlanNode*, TempTableLimits* limits)
{
    AggregatePlanNode* node = dynamic_cast<AggregatePlanNode*>(m_abstractNode);
    assert(node);

    m_inputExpressions = node->getAggregateInputExpressions();
    for (int i = 0; i < m_inputExpressions.size(); i++) {
        VOLT_DEBUG("\nAGG INPUT EXPRESSION: %s\n",
                   m_inputExpressions[i] ? m_inputExpressions[i]->debug().c_str() : "null");
    }

    /*
     * Find the difference between the set of aggregate output columns
     * (output columns resulting from an aggregate) and output columns.
     * Columns that are not the result of aggregates are being passed
     * through from the input table. Do this extra work here rather then
     * serialize yet more data.
     */
    std::vector<bool> outputColumnsResultingFromAggregates(node->getOutputSchema().size(), false);
    m_aggregateOutputColumns = node->getAggregateOutputColumns();
    BOOST_FOREACH(int aOC, m_aggregateOutputColumns) {
        outputColumnsResultingFromAggregates[aOC] = true;
    }
    for (int ii = 0; ii < outputColumnsResultingFromAggregates.size(); ii++) {
        if (outputColumnsResultingFromAggregates[ii] == false) {
            m_passThroughColumns.push_back(ii);
        }
    }

    if (!node->isInline()) {
        setTempOutputTable(limits);
    }
    m_partialSerialGroupByColumns = node->getPartialGroupByColumns();

    m_aggTypes = node->getAggregates();
    m_distinctAggs = node->getDistinctAggregates();
    m_groupByExpressions = node->getGroupByExpressions();
    node->collectOutputExpressions(m_outputColumnExpressions);

    // m_passThroughColumns.size() == m_groupByExpressions.size() is not true,
    // Because group by unique column may be able to select other columns
    m_prePredicate = node->getPrePredicate();
    m_postPredicate = node->getPostPredicate();

    m_groupByKeySchema = constructGroupBySchema(false);
    m_groupByKeyPartialHashSchema = NULL;
    if (m_partialSerialGroupByColumns.size() > 0) {
        for (int ii = 0; ii < m_groupByExpressions.size(); ii++) {
            if (std::find(m_partialSerialGroupByColumns.begin(),
                          m_partialSerialGroupByColumns.end(), ii)
                       == m_partialSerialGroupByColumns.end() )
            {
                // Find the partial hash group by columns
                m_partialHashGroupByColumns.push_back(ii);;
            }
        }
        m_groupByKeyPartialHashSchema = constructGroupBySchema(true);
    }

    return true;
}
Example #2
0
bool AggregateExecutorBase::p_init(AbstractPlanNode*, TempTableLimits* limits)
{
    AggregatePlanNode* node = dynamic_cast<AggregatePlanNode*>(m_abstractNode);
    assert(node);
    assert(node->getChildren().size() == 1);
    assert(node->getChildren()[0] != NULL);

    m_inputExpressions = node->getAggregateInputExpressions();
    for (int i = 0; i < m_inputExpressions.size(); i++) {
        VOLT_DEBUG("\nAGG INPUT EXPRESSION: %s\n",
                   m_inputExpressions[i] ? m_inputExpressions[i]->debug().c_str() : "null");
    }

    /*
     * Find the difference between the set of aggregate output columns
     * (output columns resulting from an aggregate) and output columns.
     * Columns that are not the result of aggregates are being passed
     * through from the input table. Do this extra work here rather then
     * serialize yet more data.
     */
    std::vector<bool> outputColumnsResultingFromAggregates(node->getOutputSchema().size(), false);
    std::vector<int> aggregateOutputColumns = node->getAggregateOutputColumns();
    BOOST_FOREACH(int aOC, aggregateOutputColumns) {
        outputColumnsResultingFromAggregates[aOC] = true;
    }

    /*
     * Now collect the indices in the output table of the pass
     * through columns.
     */
    for (int ii = 0; ii < outputColumnsResultingFromAggregates.size(); ii++) {
        if (outputColumnsResultingFromAggregates[ii] == false) {
            m_passThroughColumns.push_back(ii);
        }
    }

    setTempOutputTable(limits);

    m_aggTypes = node->getAggregates();
    m_distinctAggs = node->getDistinctAggregates();
    m_groupByExpressions = node->getGroupByExpressions();
    node->collectOutputExpressions(m_outputColumnExpressions);
    m_aggregateOutputColumns = node->getAggregateOutputColumns();
    m_prePredicate = node->getPrePredicate();
    m_postPredicate = node->getPostPredicate();

    std::vector<ValueType> groupByColumnTypes;
    std::vector<int32_t> groupByColumnSizes;
    std::vector<bool> groupByColumnAllowNull;
    for (int ii = 0; ii < m_groupByExpressions.size(); ii++) {
        AbstractExpression* expr = m_groupByExpressions[ii];
        groupByColumnTypes.push_back(expr->getValueType());
        groupByColumnSizes.push_back(expr->getValueSize());
        groupByColumnAllowNull.push_back(true);
    }
    m_groupByKeySchema = TupleSchema::createTupleSchema(groupByColumnTypes,
                                                        groupByColumnSizes,
                                                        groupByColumnAllowNull,
                                                        true);
    return true;
}