void Sessionize::describeDataflowAndPredicates(UDRInvocationInfo &info) { // Start with the default behavior for a reducer, pushing down // any predicates on the key/id column. UDR::describeDataflowAndPredicates(info); // Make sure we don't require any unused passthru columns // from the child/input table. NOTE: This can change the // column numbers for our id and timestamp columns! info.setUnusedPassthruColumns(); // That could have set our timestamp column or user id // column as unused, however. So, make sure these two // columns are definitely included. // first, recompute the id and timestamp column numbers InternalColumns state( info.in().getColNum(info.par().getString(0)), info.in().getColNum(info.par().getString(1))); // then include the columns info.setChildColumnUsage(0, state.getIdColumn(), ColumnInfo::USED); info.setChildColumnUsage(0, state.getTsColumn(), ColumnInfo::USED); bool generatedColsAreUsed = (info.out().getColumn(0).getUsage() == ColumnInfo::USED || info.out().getColumn(1).getUsage() == ColumnInfo::USED); // Walk through predicates and find additional ones to push down // or to evaluate locally for (int p=0; p<info.getNumPredicates(); p++) { if (!generatedColsAreUsed) { // If session_id/sequence_no are not used in the query, then // we can push all predicates to the children. info.setPredicateEvaluationCode(p, PredicateInfo::EVALUATE_IN_CHILD); } else if (info.isAComparisonPredicate(p)) { // For demo purposes, accept predicates of the // form "session_id < const" to be evaluated in the UDF. const ComparisonPredicateInfo &cpi = info.getComparisonPredicate(p); if (cpi.getColumnNumber() == 0 /* SESSION_ID */ && cpi.getOperator() == PredicateInfo::LESS && cpi.hasAConstantValue()) info.setPredicateEvaluationCode(p, PredicateInfo::EVALUATE_IN_UDF); } } }
void Sessionize::describeStatistics(UDRInvocationInfo &info) { // We set the function type to REDUCER earlier. The Trafodion compiler // estimates one output row per input partition for reducer function, // unless the UDF specifies another value. Since our sessionize UDF // returns one output row per input row, make sure the optimizer has // a better cardinality estimate. // Crude estimate, assume each predicate evaluated by the UDF // reduces the number of output columns by 50%. At this point, only // predicates that are evaluated by the UDF are left in the list. double selectivity = pow(2,-info.getNumPredicates()); long resultRowCount = static_cast<long>(info.in().getEstimatedNumRows() * selectivity); info.out().setEstimatedNumRows(resultRowCount); }
void Sessionize::processData(UDRInvocationInfo &info, UDRPlanInfo &plan) { // read the three parameters and convert the first two into column numbers int userIdColNum = info.in(0).getColNum(info.par().getString(0)); int timeStampColNum = info.in(0).getColNum(info.par().getString(1)); long timeout = info.par().getLong(2); // variables needed for computing the session id long lastTimeStamp = 0; std::string lastUserId; long currSessionId = 1; long currSequenceNo = 1; int maxSessionId = 999999999; if (info.getNumPredicates() > 0) { // based on the describeDataflowAndPredicates() method, this must be // a predicate of the form SESSION_ID < const that we need // to evaluate inside this method std::string maxValue = info.getComparisonPredicate(0).getConstValue(); sscanf(maxValue.c_str(), "%d", &maxSessionId); } // loop over input rows while (getNextRow(info)) { long timeStamp = info.in(0).getLong(timeStampColNum); std::string userId = info.in(0).getString(userIdColNum); if (lastUserId != userId) { // reset timestamp check and start over with session id 0 lastTimeStamp = 0; currSessionId = 1; currSequenceNo = 1; lastUserId = userId; } long tsDiff = timeStamp - lastTimeStamp; if (tsDiff > timeout && lastTimeStamp > 0) { currSessionId++; currSequenceNo = 1; } else if (tsDiff < 0) throw UDRException( 38001, "Got negative or descending timestamps %ld, %ld", lastTimeStamp, timeStamp); lastTimeStamp = timeStamp; // this evaluates the SQL predicate on SESSION_ID if (currSessionId < maxSessionId) { // produce session_id and sequence_no output columns info.out().setLong(0, currSessionId); info.out().setLong(1, currSequenceNo); // produce the remaining columns and emit the row info.copyPassThruData(); emitRow(info); currSequenceNo++; } } }