InternalColumns::InternalColumns(const UDRInvocationInfo &info) : info_(info) { // expect a single table-valued input if (info.getNumTableInputs() != 1) throw UDRException( 38010, "TIMESERIES UDF: Expecting one table-valued input"); const OrderInfo &ord = info.in().getQueryOrdering(); const PartitionInfo &part = info.in().getQueryPartitioning(); // perform some basic tests in the first call at compile time if (info.getCallPhase() == UDRInvocationInfo::COMPILER_INITIAL_CALL) { // expect an order by on a time or timestamp expression if (ord.getNumEntries() != 1 || ord.getOrderType(0) == OrderInfo::DESCENDING) throw UDRException( 38020, "TIMESERIES UDF: Must use ORDER BY with one column for its input table and the order must be ascending"); TypeInfo::SQLTypeCode typeCode = info.in().getColumn(ord.getColumnNum(0)).getType().getSQLType(); if (typeCode != TypeInfo::TIME && typeCode != TypeInfo::TIMESTAMP) throw UDRException( 38030, "TIMESERIES UDF: Must use ORDER BY a TIME or TIMESTAMP column for the input table"); // we need at least two parameters, time column name and width // of time slice if (info.par().getNumColumns() < 2) throw UDRException( 38040, "TIMESERIES UDF: UDF needs to be called with at least 2 scalar parameters"); // input parameter 0 (defined in the DDL) is the // name of the column containing the time values if (!info.par().isAvailable(0) || info.par().getColumn(0).getType().getSQLTypeClass() != TypeInfo::CHARACTER_TYPE) throw UDRException( 38050, "TIMESERIES UDF: Expecting a character constant (timestamp alias) as first parameter"); // check type and value of the time slice width, specified // as parameter 1 if (!info.par().isAvailable(1)) throw UDRException( 38060, "TIMESERIES UDF: Expecting a constant for the time slice width as second parameter"); // time slice width must be a day-second interval if (info.par().getColumn(1).getType().getSQLTypeSubClass() != TypeInfo::DAY_SECOND_INTERVAL_TYPE) throw UDRException( 38070, "TIMESERIES UDF: Second scalar parameter for time slice width must be an interval constant in the day to second range"); // make sure parameters come in pairs if (info.par().getNumColumns() % 2 != 0) throw UDRException( 38080, "TIMESERIES UDF: Parameters need to be specified in pairs of column name and instructions"); // make sure all parameters are specified at compile time for (int p=2; p<info.par().getNumColumns(); p++) if (!info.par().isAvailable(p)) throw UDRException( 38090, "TIMESERIES UDF: All parameters must be specified as literals"); } // initial compile-time checks tsInColNum_ = ord.getColumnNum(0); numTSCols_ = 1; // always a single timestamp column for now numPartCols_ = part.getNumEntries(); timeSliceWidth_ = info.par().getTime(1); // initialize vectors for (int p=0; p<numPartCols_; p++) { currPartKey_.push_back(""); currPartKeyNulls_.push_back(true); } int ip = 2; while (ip<info.par().getNumColumns()) { std::string colName = info.par().getString(ip); std::string instr = info.par().getString(ip+1); bool isFirstVal; bool isConstInterpol; bool isIgnoreNulls; // some checks done only during the first compile time call if (info.getCallPhase() == UDRInvocationInfo::COMPILER_INITIAL_CALL) { if (instr.size() < 2 || instr.size() > 3) throw UDRException( 38100, "TIMESERIES UDF: Expecting instructions with 2 or 3 characters: %s", instr.c_str()); // validate first character of instructions switch (instr[0]) { case 'f': case 'F': case 'l': case 'L': break; default: throw UDRException( 38110, "TIMESERIES UDF: Parameter %d should start with F or L for first or last value", ip+2); } // validate second character of instructions switch (instr[1]) { case 'c': case 'C': case 'l': case 'L': break; default: throw UDRException( 38120, "TIMESERIES UDF: Parameter %d should have C or L as its second character, for constant or linear interpolation", ip+2); } if (instr.size() == 3 && instr[2] != 'i' && instr[2] != 'I') throw UDRException( 38130, "TIMESERIES UDF: Unexpected trailing characters in aggregate instructions: %s", instr.c_str()); } // compile-time checks isFirstVal = (instr[0] == 'F' || instr[0] == 'f'); isConstInterpol = (instr[1] == 'C' || instr[1] == 'c'); isIgnoreNulls = (instr.size() > 2); columns_.push_back(new TimeSeriesAggregate( info.in(), info.out(), info.in().getColNum(info.par().getString(ip)), getFirstAggrCol() + columns_.size(), isFirstVal, isConstInterpol, isIgnoreNulls)); ip += 2; } }
void Sessionize::describeParamsAndColumns(UDRInvocationInfo &info) { // First, do some validation of the parameters and set // PARTITION BY and ORDER BY columns int idCol = -1; int tsCol = -1; // Make sure we have exactly one table-valued input, otherwise // generate a compile error if (info.getNumTableInputs() != 1) throw UDRException(38000, "%s must be called with one table-valued input", info.getUDRName().data()); // check whether the first two arguments identify // an arbitrary column and an exact numeric column if (info.par().isAvailable(0)) { const PartitionInfo &queryPartInfo = info.in().getQueryPartitioning(); PartitionInfo newPartInfo; // This will raise an error if the column name // specified in the first parameter doesn't exist idCol = info.in().getColNum(info.par().getString(0)); // make sure the query didn't specify a conflicting // PARTITION BY clause if (queryPartInfo.getType() == PartitionInfo::PARTITION && (queryPartInfo.getNumEntries() != 1 || queryPartInfo.getColumnNum(0) != idCol)) throw UDRException(38001, "Query PARTITION BY not compatible with id column %s", info.par().getString(0).c_str()); // Set this user id column as the required PARTITION BY column newPartInfo.setType(PartitionInfo::PARTITION); newPartInfo.addEntry(idCol); info.setChildPartitioning(0, newPartInfo); } else throw UDRException(38001,"First scalar parameter must be a string constant"); // make sure the second parameter specifies the name of // an existing input column of type exact numeric if (info.par().isAvailable(1)) { // This will raise an error if the column name // specified in the second parameter doesn't exist tsCol = info.in().getColNum(info.par().getString(1)); const TypeInfo &typ = info.in().getColumn(tsCol).getType(); const OrderInfo &queryOrderInfo = info.in().getQueryOrdering(); OrderInfo newOrderInfo; if (typ.getSQLTypeSubClass() != TypeInfo::EXACT_NUMERIC_TYPE) throw UDRException(38002, "Second parameter must be the name of an exact numeric column"); // check for a conflicting ORDER BY in the query if (queryOrderInfo.getNumEntries() > 0 && (queryOrderInfo.getColumnNum(0) != tsCol || queryOrderInfo.getOrderType(0) == OrderInfo::DESCENDING)) throw UDRException( 38900, "Query ORDER BY conflicts with specified timestamp column %s", info.par().getString(1).c_str()); // make a new ORDER BY clause with just the timestamp column newOrderInfo.addEntry(tsCol); info.setChildOrdering(0, newOrderInfo); } else throw UDRException(38003,"Second scalar parameter must be a string constant"); // To demonstrate state that gets passed between compiler phases and // to avoid looking up the id column and timestamp column each time, // store those as UDR Writer data in the UDRInvocationInfo object /* TBD: uncomment when this is allowed info.setUDRWriterCompileTimeData(new InternalColumns(idCol, tsCol)); */ // Second, define the output parameters // add the columns for session id and sequence number // (sequence_no is a unique sequence number within the session) info.out().addLongColumn("SESSION_ID"); // column number 0 info.out().addLongColumn("SEQUENCE_NO"); // column number 1 // Make all the input table columns also output columns, // those are called "pass-through" columns. The default // parameters of this method add all the columns of the // first input table. info.addPassThruColumns(); // set the function type, sessionize behaves like a reducer in // MapReduce. Session ids are local within rows that share the // same id column value. info.setFuncType(UDRInvocationInfo::REDUCER); }