InternalColumns::InternalColumns(const UDRInvocationInfo &info) :
     info_(info)
{
  // expect a single table-valued input
  if (info.getNumTableInputs() != 1)
    throw UDRException(
         38010,
         "TIMESERIES UDF: Expecting one table-valued input");

  const OrderInfo &ord = info.in().getQueryOrdering();
  const PartitionInfo &part = info.in().getQueryPartitioning();

  // perform some basic tests in the first call at compile time
  if (info.getCallPhase() == UDRInvocationInfo::COMPILER_INITIAL_CALL)
    {
      // expect an order by on a time or timestamp expression
      if (ord.getNumEntries() != 1 ||
          ord.getOrderType(0) == OrderInfo::DESCENDING)
        throw UDRException(
             38020,
             "TIMESERIES UDF: Must use ORDER BY with one column for its input table and the order must be ascending");
      TypeInfo::SQLTypeCode typeCode =
        info.in().getColumn(ord.getColumnNum(0)).getType().getSQLType();
      if (typeCode != TypeInfo::TIME &&
          typeCode != TypeInfo::TIMESTAMP)
        throw UDRException(
             38030,
             "TIMESERIES UDF: Must use ORDER BY a TIME or TIMESTAMP column for the input table");

      // we need at least two parameters, time column name and width
      // of time slice
      if (info.par().getNumColumns() < 2)
        throw UDRException(
             38040,
             "TIMESERIES UDF: UDF needs to be called with at least 2 scalar parameters");

      // input parameter 0 (defined in the DDL) is the
      // name of the column containing the time values
      if (!info.par().isAvailable(0) ||
          info.par().getColumn(0).getType().getSQLTypeClass() !=
          TypeInfo::CHARACTER_TYPE)
        throw UDRException(
             38050,
             "TIMESERIES UDF: Expecting a character constant (timestamp alias) as first parameter");

      // check type and value of the time slice width, specified
      // as parameter 1
      if (!info.par().isAvailable(1))
        throw UDRException(
             38060,
             "TIMESERIES UDF: Expecting a constant for the time slice width as second parameter");

      // time slice width must be a day-second interval
      if (info.par().getColumn(1).getType().getSQLTypeSubClass() !=
          TypeInfo::DAY_SECOND_INTERVAL_TYPE)
        throw UDRException(
             38070,
             "TIMESERIES UDF: Second scalar parameter for time slice width must be an interval constant in the day to second range");

      // make sure parameters come in pairs
      if (info.par().getNumColumns() % 2 != 0)
        throw UDRException(
             38080,
             "TIMESERIES UDF: Parameters need to be specified in pairs of column name and instructions");

      // make sure all parameters are specified at compile time
      for (int p=2; p<info.par().getNumColumns(); p++)
        if (!info.par().isAvailable(p))
          throw UDRException(
               38090,
               "TIMESERIES UDF: All parameters must be specified as literals");
    } // initial compile-time checks

  tsInColNum_     = ord.getColumnNum(0);
  numTSCols_      = 1; // always a single timestamp column for now
  numPartCols_    = part.getNumEntries();

  timeSliceWidth_  = info.par().getTime(1);

  // initialize vectors
  for (int p=0; p<numPartCols_; p++)
    {
      currPartKey_.push_back("");
      currPartKeyNulls_.push_back(true);
    }

  int ip = 2;

  while (ip<info.par().getNumColumns())
    {
      std::string colName = info.par().getString(ip);
      std::string instr = info.par().getString(ip+1);
      bool isFirstVal;
      bool isConstInterpol;
      bool isIgnoreNulls;

      // some checks done only during the first compile time call
      if (info.getCallPhase() == UDRInvocationInfo::COMPILER_INITIAL_CALL)
        {
          if (instr.size() < 2 || instr.size() > 3)
            throw UDRException(
                 38100,
                 "TIMESERIES UDF: Expecting instructions with 2 or 3 characters: %s",
                 instr.c_str());

          // validate first character of instructions
          switch (instr[0])
            {
            case 'f':
            case 'F':
            case 'l':
            case 'L':
              break;

            default:
              throw UDRException(
                   38110,
                   "TIMESERIES UDF: Parameter %d should start with F or L for first or last value",
                   ip+2);
            }

          // validate second character of instructions
          switch (instr[1])
            {
            case 'c':
            case 'C':
            case 'l':
            case 'L':
              break;

            default:
              throw UDRException(
                   38120,
                   "TIMESERIES UDF: Parameter %d should have C or L as its second character, for constant or linear interpolation",
                   ip+2);
            }

          if (instr.size() == 3 &&
              instr[2] != 'i' &&
              instr[2] != 'I')
            throw UDRException(
                 38130,
                 "TIMESERIES UDF: Unexpected trailing characters in aggregate instructions: %s",
                 instr.c_str());
        } // compile-time checks

      isFirstVal = (instr[0] == 'F' || instr[0] == 'f');
      isConstInterpol = (instr[1] == 'C' || instr[1] == 'c');
      isIgnoreNulls = (instr.size() > 2);

      columns_.push_back(new TimeSeriesAggregate(
                              info.in(),
                              info.out(),
                              info.in().getColNum(info.par().getString(ip)),
                              getFirstAggrCol() + columns_.size(),
                              isFirstVal,
                              isConstInterpol,
                              isIgnoreNulls));
      ip += 2;
    }
}
Example #2
0
void Sessionize::describeParamsAndColumns(UDRInvocationInfo &info)
{
  // First, do some validation of the parameters and set
  // PARTITION BY and ORDER BY columns
  int idCol = -1;
  int tsCol = -1;

  // Make sure we have exactly one table-valued input, otherwise
  // generate a compile error
  if (info.getNumTableInputs() != 1)
    throw UDRException(38000,
                       "%s must be called with one table-valued input",
                       info.getUDRName().data());

  // check whether the first two arguments identify
  // an arbitrary column and an exact numeric column
  if (info.par().isAvailable(0))
    {
      const PartitionInfo &queryPartInfo = info.in().getQueryPartitioning();
      PartitionInfo newPartInfo;

      // This will raise an error if the column name
      // specified in the first parameter doesn't exist
      idCol = info.in().getColNum(info.par().getString(0));

      // make sure the query didn't specify a conflicting
      // PARTITION BY clause
      if (queryPartInfo.getType() == PartitionInfo::PARTITION &&
          (queryPartInfo.getNumEntries() != 1 ||
           queryPartInfo.getColumnNum(0) != idCol))
        throw UDRException(38001,
                           "Query PARTITION BY not compatible with id column %s",
                           info.par().getString(0).c_str());

      // Set this user id column as the required PARTITION BY column
      newPartInfo.setType(PartitionInfo::PARTITION);
      newPartInfo.addEntry(idCol);
      info.setChildPartitioning(0, newPartInfo);
    }
  else
    throw UDRException(38001,"First scalar parameter must be a string constant");

  // make sure the second parameter specifies the name of
  // an existing input column of type exact numeric
  if (info.par().isAvailable(1))
    {
      // This will raise an error if the column name
      // specified in the second parameter doesn't exist
      tsCol = info.in().getColNum(info.par().getString(1));
      const TypeInfo &typ = info.in().getColumn(tsCol).getType();
      const OrderInfo &queryOrderInfo = info.in().getQueryOrdering();
      OrderInfo newOrderInfo;

      if (typ.getSQLTypeSubClass() != TypeInfo::EXACT_NUMERIC_TYPE)
        throw UDRException(38002, "Second parameter must be the name of an exact numeric column");

      // check for a conflicting ORDER BY in the query
      if (queryOrderInfo.getNumEntries() > 0 &&
          (queryOrderInfo.getColumnNum(0) != tsCol ||
           queryOrderInfo.getOrderType(0) == OrderInfo::DESCENDING))
        throw UDRException(
             38900,
             "Query ORDER BY conflicts with specified timestamp column %s",
             info.par().getString(1).c_str());

      // make a new ORDER BY clause with just the timestamp column
      newOrderInfo.addEntry(tsCol);
      info.setChildOrdering(0, newOrderInfo);
    }
  else
    throw UDRException(38003,"Second scalar parameter must be a string constant");
 
  // To demonstrate state that gets passed between compiler phases and
  // to avoid looking up the id column and timestamp column each time,
  // store those as UDR Writer data in the UDRInvocationInfo object
  /* TBD: uncomment when this is allowed
  info.setUDRWriterCompileTimeData(new InternalColumns(idCol, tsCol));
  */

  // Second, define the output parameters

  // add the columns for session id and sequence number
  // (sequence_no is a unique sequence number within the session)
  info.out().addLongColumn("SESSION_ID");  // column number 0
  info.out().addLongColumn("SEQUENCE_NO"); // column number 1
 
  // Make all the input table columns also output columns,
  // those are called "pass-through" columns. The default
  // parameters of this method add all the columns of the
  // first input table.
  info.addPassThruColumns();

  // set the function type, sessionize behaves like a reducer in
  // MapReduce. Session ids are local within rows that share the
  // same id column value.
  info.setFuncType(UDRInvocationInfo::REDUCER);
}