Example #1
0
void Sessionize::describeDataflowAndPredicates(UDRInvocationInfo &info)
{
  // Start with the default behavior for a reducer, pushing down
  // any predicates on the key/id column.
  UDR::describeDataflowAndPredicates(info);

  // Make sure we don't require any unused passthru columns
  // from the child/input table. NOTE: This can change the
  // column numbers for our id and timestamp columns!
  info.setUnusedPassthruColumns();

  // That could have set our timestamp column or user id
  // column as unused, however. So, make sure these two
  // columns are definitely included.

  // first, recompute the id and timestamp column numbers
  InternalColumns state(
         info.in().getColNum(info.par().getString(0)),
         info.in().getColNum(info.par().getString(1)));

  // then include the columns
  info.setChildColumnUsage(0, state.getIdColumn(), ColumnInfo::USED);
  info.setChildColumnUsage(0, state.getTsColumn(), ColumnInfo::USED);

  bool generatedColsAreUsed =
    (info.out().getColumn(0).getUsage() == ColumnInfo::USED ||
     info.out().getColumn(1).getUsage() == ColumnInfo::USED);

  // Walk through predicates and find additional ones to push down
  // or to evaluate locally
  for (int p=0; p<info.getNumPredicates(); p++)
    {
      if (!generatedColsAreUsed)
        {
          // If session_id/sequence_no are not used in the query, then
          // we can push all predicates to the children.
          info.setPredicateEvaluationCode(p, PredicateInfo::EVALUATE_IN_CHILD);
        }
      else if (info.isAComparisonPredicate(p))
        {
          // For demo purposes, accept predicates of the
          // form "session_id < const" to be evaluated in the UDF.
          const ComparisonPredicateInfo &cpi = info.getComparisonPredicate(p);

          if (cpi.getColumnNumber() == 0 /* SESSION_ID */ &&
              cpi.getOperator() == PredicateInfo::LESS &&
              cpi.hasAConstantValue())
            info.setPredicateEvaluationCode(p, PredicateInfo::EVALUATE_IN_UDF);
        }
    }
}
Example #2
0
void Sessionize::processData(UDRInvocationInfo &info,
                             UDRPlanInfo &plan)
{
  // read the three parameters and convert the first two into column numbers
  int userIdColNum    = info.in(0).getColNum(info.par().getString(0));
  int timeStampColNum = info.in(0).getColNum(info.par().getString(1));
  long timeout        = info.par().getLong(2);

  // variables needed for computing the session id
  long lastTimeStamp = 0;
  std::string lastUserId;
  long currSessionId = 1;
  long currSequenceNo = 1;
  int maxSessionId = 999999999;

  if (info.getNumPredicates() > 0)
    {
      // based on the describeDataflowAndPredicates() method, this must be
      // a predicate of the form SESSION_ID < const that we need
      // to evaluate inside this method
      std::string maxValue = info.getComparisonPredicate(0).getConstValue();

      sscanf(maxValue.c_str(), "%d", &maxSessionId);
    }

  // loop over input rows
  while (getNextRow(info))
  {
    long timeStamp = info.in(0).getLong(timeStampColNum);
    std::string userId = info.in(0).getString(userIdColNum);

    if (lastUserId != userId)
      {
        // reset timestamp check and start over with session id 0
        lastTimeStamp = 0;
        currSessionId = 1;
        currSequenceNo = 1;
        lastUserId = userId;
      }

    long tsDiff = timeStamp - lastTimeStamp;

    if (tsDiff > timeout && lastTimeStamp > 0)
      {
        currSessionId++;
        currSequenceNo = 1;
      }
    else if (tsDiff < 0)
      throw UDRException(
           38001,
           "Got negative or descending timestamps %ld, %ld",
           lastTimeStamp, timeStamp);

    lastTimeStamp = timeStamp;

    // this evaluates the SQL predicate on SESSION_ID
    if (currSessionId < maxSessionId)
      {
        // produce session_id and sequence_no output columns
        info.out().setLong(0, currSessionId);
        info.out().setLong(1, currSequenceNo);

        // produce the remaining columns and emit the row
        info.copyPassThruData();
        emitRow(info);
        currSequenceNo++;
      }
   }
}