/** * \brief Reduce down the values from another aggregate UDF instance, and copy in to this aggregate instance */ void reduce(const AggregateUDF* otherAgg, Reporter& reporter) { try { // loop through other's values and add to our equivalent value (if exists) // if doesn't exist in our values, add to our values const GroupAndAggregateUDF* other = (const GroupAndAggregateUDF*)otherAgg; for(auto ivi=other->interimValues.begin(); ivi!=other->interimValues.end(); ++ivi) { std::string groupBy(ivi->first); auto it = interimValues.find(groupBy); if (it == interimValues.end()) { // new IInterimResult* newInterim = resultTypeRef(); newInterim->groupByValue = groupBy; newInterim->merge(ivi->second); interimValues.insert(std::pair<std::string,IInterimResult*>(groupBy,newInterim)); } else { IInterimResult* interim = interimValues.at(groupBy); // merge interim->merge(ivi->second); } } // end for } catch (std::exception& ex) { reporter.error(("Exception in reduce(): " + std::string(ex.what())).c_str()); } } // end reduce
/** * \brief Get the values we need for the aggregate and cache them * * Assume the first column (index 0) is the group by field (with LOW cardinality) */ void map(TupleIterator& values, Reporter& reporter) { try { for (; !values.done(); values.next()) { std::string groupBy; String tempString; values.value(0,tempString); groupBy = std::string(tempString); auto it = interimValues.find(groupBy); if (it == interimValues.end()) { // new IInterimResult* newInterim = resultTypeRef(); newInterim->groupByValue = groupBy; newInterim->add(values); interimValues.insert(std::pair<std::string,IInterimResult*>(groupBy,newInterim)); } else { IInterimResult* interim = interimValues.at(groupBy); // merge interim->add(values); } } } catch (std::exception& ex) { reporter.error(("Exception in map(): " + std::string(ex.what())).c_str()); } }
/** * \brief Return the final result from the remaining aggregate UDF instance */ void finish(OutputSequence& os, Reporter& reporter) { try { // Take Interim Values and merge results for (auto ivi = interimValues.begin();ivi != interimValues.end();++ivi) { ivi->second->writeToMap(os); } } catch (std::exception& ex) { reporter.error(("Exception in finish(): " + std::string(ex.what())).c_str()); } }
/** * \brief Decode from XDQP stream */ void decode(Decoder& d, Reporter& reporter) { try { int size; d.decode(size); IInterimResult* interim; for (long i = 0;i < size;i++) { // decode all interim results interim = resultTypeRef(); interim->decode(d,reporter); } } catch (std::exception& ex) { reporter.error(("Exception in decode(): " + std::string(ex.what())).c_str()); } }
/** * \brief Encode to XDQP stream */ void encode(Encoder& e, Reporter& reporter) { try { // TODO consider gzip compression at this top level, and using a std::ostringstream to collate values (optional setting at aggregate level) // no need to encode resultTypeRef as that's handled in the start() or clone() function // do need to encode count though e.encode((int)interimValues.size()); for (auto ivi = interimValues.begin();ivi != interimValues.end();++ivi) { ivi->second->encode(e,reporter); } } catch (std::exception& ex) { reporter.error(("Exception in encode(): " + std::string(ex.what())).c_str()); } }
void start(Sequence& args, Reporter& reporter) { try { // default to mean average resultTypeRef = &MeanInterimResult::create; // refer to function, not call it, so no () for (; !args.done(); args.next()) { String argValue; args.value(argValue); std::string arg = std::string(argValue); if ("aggregate=sum" == arg) { resultTypeRef = &SumInterimResult::create; } // TODO other parameters // TODO check for a "compression=gzip" setting, and set our configuration accordingly } // end for } catch (std::exception& ex) { reporter.error(("Exception in start(): " + std::string(ex.what())).c_str()); } }