// ----------------------------------------------------------------------- // Given a column list providing identifiers for columns of this table, // this method returns a list of VEG expressions and/or base columns that // show the equivalence of base columns with index columns. // ----------------------------------------------------------------------- void TableDesc::getEquivVEGCols (const ValueIdList& columnList, ValueIdList &VEGColumnList) const { for (CollIndex i=0; i < columnList.entries(); i++) { ItemExpr *ie = columnList[i].getItemExpr(); BaseColumn *bc = NULL; switch (ie->getOperatorType()) { case ITM_BASECOLUMN: bc = (BaseColumn *) ie; break; case ITM_INDEXCOLUMN: bc = (BaseColumn *) ((IndexColumn *) ie)->getDefinition(). getItemExpr(); CMPASSERT(bc->getOperatorType() == ITM_BASECOLUMN); break; default: ABORT("Invalid argument to TableDesc::getEquivVEGCols()\n"); } CMPASSERT(bc->getTableDesc() == this); VEGColumnList.insert(getColumnVEGList()[bc->getColNumber()]); } }
NABoolean IndexDesc::isUniqueIndex() const { return getNAFileSet()->uniqueIndex(); #pragma nowarn(269) // warning elimination ValueIdList nonKeyColumnList; #pragma warn(269) // warning elimination getNonKeyColumnList(nonKeyColumnList); // if there are some non-index-key columns(the key of base table), // then this is a unique index. The primary key of base table is // not needed to define the key of the index. It is, of course, // needed to be present in the index as a non-key column. if (nonKeyColumnList.entries() > 0) return TRUE; else return FALSE; }
short ProbeCache::codeGen(Generator *generator) { ExpGenerator * exp_gen = generator->getExpGenerator(); Space * space = generator->getSpace(); MapTable * last_map_table = generator->getLastMapTable(); ex_cri_desc * given_desc = generator->getCriDesc(Generator::DOWN); ex_cri_desc * returned_desc = new(space) ex_cri_desc(given_desc->noTuples() + 1, space); // cri descriptor for work atp has 5 entries: // entry #0 for const // entry #1 for temp // entry #2 for hash value of probe input data in Probe Cache Manager // entry #3 for encoded probe input data in Probe Cache Manager // enrry #4 for inner table row data in this operator's cache buffer Int32 work_atp = 1; ex_cri_desc * work_cri_desc = new(space) ex_cri_desc(5, space); unsigned short hashValIdx = 2; unsigned short encodedProbeDataIdx = 3; unsigned short innerRowDataIdx = 4; // generate code for child tree, and get its tdb and explain tuple. child(0)->codeGen(generator); ComTdb * child_tdb = (ComTdb *)(generator->getGenObj()); ExplainTuple *childExplainTuple = generator->getExplainTuple(); ////////////////////////////////////////////////////// // Generate up to 4 runtime expressions. ////////////////////////////////////////////////////// // Will use child's char. inputs (+ execution count) for the next // two runtime expressions. ValueIdList inputsToUse = child(0).getGroupAttr()->getCharacteristicInputs(); inputsToUse.insert(generator->getOrAddStatementExecutionCount()); // Expression #1 gets the hash value of the probe input data ValueIdList hvAsList; // Executor has hard-coded assumption that the result is long, // so add a Cast node to convert result to a long. ItemExpr *probeHashAsIe = new (generator->wHeap()) HashDistPartHash(inputsToUse.rebuildExprTree(ITM_ITEM_LIST)); probeHashAsIe->bindNode(generator->getBindWA()); NumericType &nTyp = (NumericType &)probeHashAsIe->getValueId().getType(); GenAssert(nTyp.isSigned() == FALSE, "Unexpected signed HashDistPartHash."); GenAssert(probeHashAsIe->getValueId().getType().supportsSQLnullLogical() == FALSE, "Unexpected nullable HashDistPartHash."); ItemExpr *hvAsIe = new (generator->wHeap()) Cast( probeHashAsIe, new (generator->wHeap()) SQLInt(FALSE, // false == unsigned. FALSE // false == not nullable. )); hvAsIe->bindNode(generator->getBindWA()); hvAsList.insert(hvAsIe->getValueId()); ex_expr *hvExpr = NULL; ULng32 hvLength; exp_gen->generateContiguousMoveExpr( hvAsList, 0, // don't add convert node work_atp, hashValIdx, ExpTupleDesc::SQLARK_EXPLODED_FORMAT, hvLength, &hvExpr); GenAssert(hvLength == sizeof(Lng32), "Unexpected length of result of hash function."); // Expression #2 encodes the probe input data for storage in // the ProbeCacheManager. ValueIdList encodeInputAsList; CollIndex inputListIndex; for (inputListIndex = 0; inputListIndex < inputsToUse.entries(); inputListIndex++) { ItemExpr *inputIe = (inputsToUse[inputListIndex].getValueDesc())->getItemExpr(); if (inputIe->getValueId().getType().getVarLenHdrSize() > 0) { // This logic copied from Sort::codeGen(). // Explode varchars by moving them to a fixed field // whose length is equal to the max length of varchar. // 5/8/98: add support for VARNCHAR const CharType& char_type = (CharType&)(inputIe->getValueId().getType()); if (!CollationInfo::isSystemCollation(char_type.getCollation())) { inputIe = new(generator->wHeap()) Cast (inputIe, (new(generator->wHeap()) SQLChar( CharLenInfo(char_type.getStrCharLimit(), char_type.getDataStorageSize()), char_type.supportsSQLnull(), FALSE, FALSE, FALSE, char_type.getCharSet(), char_type.getCollation(), char_type.getCoercibility() ) ) ); } } CompEncode * enode = new(generator->wHeap()) CompEncode(inputIe, FALSE /* ascend/descend doesn't matter*/); enode->bindNode(generator->getBindWA()); encodeInputAsList.insert(enode->getValueId()); } ex_expr *encodeInputExpr = NULL; ULng32 encodedInputLength; exp_gen->generateContiguousMoveExpr(encodeInputAsList, 0, //don't add conv nodes work_atp, encodedProbeDataIdx, ExpTupleDesc::SQLARK_EXPLODED_FORMAT, encodedInputLength, &encodeInputExpr); // Expression #3 moves the inner table data into a buffer pool. // This is also the tuple returned to ProbeCache's parent. ex_expr * innerRecExpr = NULL; ValueIdList innerTableAsList = getGroupAttr()->getCharacteristicOutputs(); ////////////////////////////////////////////////////// // Describe the returned row and add the returned // values to the map table. ////////////////////////////////////////////////////// // determine internal format NABoolean useCif = FALSE; ExpTupleDesc::TupleDataFormat tupleFormat = generator->getInternalFormat(); //tupleFormat = determineInternalFormat( innerTableAsList, this, useCif,generator); ULng32 innerRecLength = 0; ExpTupleDesc * innerRecTupleDesc = 0; MapTable * returnedMapTable = NULL; exp_gen->generateContiguousMoveExpr(innerTableAsList, -1, // do add conv nodes work_atp, innerRowDataIdx, tupleFormat, innerRecLength, &innerRecExpr, &innerRecTupleDesc, ExpTupleDesc::SHORT_FORMAT, &returnedMapTable); returned_desc->setTupleDescriptor(returned_desc->noTuples() - 1, innerRecTupleDesc); // remove all appended map tables and return the returnedMapTable generator->removeAll(last_map_table); generator->appendAtEnd(returnedMapTable); // This returnedMapTable will contain the value ids that are being returned // (the inner table probed). // Massage the atp and atp_index of the innerTableAsList. for (CollIndex i = 0; i < innerTableAsList.entries(); i++) { ValueId innerValId = innerTableAsList[i]; Attributes *attrib = generator->getMapInfo(innerValId)->getAttr(); // All reference to the returned values from this point on // will be at atp = 0, atp_index = last entry in returned desc. attrib->setAtp(0); attrib->setAtpIndex(returned_desc->noTuples() - 1); } // Expression #4 is a selection predicate, to be applied // before returning rows to the parent ex_expr * selectPred = NULL; if (!selectionPred().isEmpty()) { ItemExpr * selPredTree = selectionPred().rebuildExprTree(ITM_AND,TRUE,TRUE); exp_gen->generateExpr(selPredTree->getValueId(), ex_expr::exp_SCAN_PRED, &selectPred); } ////////////////////////////////////////////////////// // Prepare params for ComTdbProbeCache. ////////////////////////////////////////////////////// queue_index pDownSize = (queue_index)getDefault(GEN_PROBE_CACHE_SIZE_DOWN); queue_index pUpSize = (queue_index)getDefault(GEN_PROBE_CACHE_SIZE_UP); // Make sure that the ProbeCache queues can support the childs queues. if(pDownSize < child_tdb->getInitialQueueSizeDown()) { pDownSize = child_tdb->getInitialQueueSizeDown(); pDownSize = MINOF(pDownSize, 32768); } if(pUpSize < child_tdb->getInitialQueueSizeUp()) { pUpSize = child_tdb->getInitialQueueSizeUp(); pUpSize = MINOF(pUpSize, 32768); } ULng32 pcNumEntries = numCachedProbes_; // Number of entries in the probe cache cannot be less than // max parent down queue size. Before testing and adjusting the // max queue size, it is necessary to make sure it is a power of // two, rounding up if necessary. This is to match the logic in // ex_queue::resize. queue_index pdq2 = 1; queue_index bits = pDownSize; while (bits && pdq2 < pDownSize) { bits = bits >> 1; pdq2 = pdq2 << 1; } if (pcNumEntries < pdq2) pcNumEntries = pdq2; numInnerTuples_ = getDefault(GEN_PROBE_CACHE_NUM_INNER); if (innerRecExpr == NULL) { // For semi-join and anti-semi-join, executor need not allocate // a buffer. Set the tdb's buffer size to 0 to be consistent. numInnerTuples_ = 0; } else if (numInnerTuples_ == 0) { // Handle special value, 0, which tells code gen to // decided on buffer size: i.e., large enough to accomodate // all parent up queue entries and all probe cache entries // having a different inner table row. // As we did for the down queue, make sure the up queue size // specified is a power of two. queue_index puq2 = 1; queue_index bits = pUpSize; while (bits && puq2 < pUpSize) { bits = bits >> 1; puq2 = puq2 << 1; } numInnerTuples_ = puq2 + pcNumEntries; }
static short ft_codegen(Generator *generator, RelExpr &relExpr, ComTdbFastExtract *&newTdb, Cardinality estimatedRowCount, char * targetName, char * hdfsHost, Int32 hdfsPort, char * hiveTableName, char * delimiter, char * header, char * nullString, char * recordSeparator, ULng32 downQueueMaxSize, ULng32 upQueueMaxSize, ULng32 outputBufferSize, ULng32 requestBufferSize, ULng32 replyBufferSize, ULng32 numOutputBuffers, ComTdb * childTdb, NABoolean isSequenceFile) { CmpContext *cmpContext = generator->currentCmpContext(); Space *space = generator->getSpace(); ExpGenerator *exp_gen = generator->getExpGenerator(); MapTable *map_table = generator->getMapTable(); MapTable *last_map_table = generator->getLastMapTable(); ex_expr *input_expr = NULL; ex_expr *output_expr = NULL; ex_expr * childData_expr = NULL ; ex_expr * cnvChildData_expr = NULL ; ULng32 i; ULng32 requestRowLen = 0; ULng32 outputRowLen = 0; ULng32 childDataRowLen = 0; ULng32 cnvChildDataRowLen = 0; ExpTupleDesc *requestTupleDesc = NULL; ExpTupleDesc *outputTupleDesc = NULL; ExpTupleDesc *childDataTupleDesc = NULL; ExpTupleDesc *cnvChildDataTupleDesc = NULL; newTdb = NULL; OperatorTypeEnum relExprType = relExpr.getOperatorType(); GenAssert(relExprType == REL_FAST_EXTRACT, "Unexpected RelExpr at FastExtract codegen") FastExtract * fastExtract = (FastExtract *) &relExpr; const Int32 workAtpNumber = 1; ex_cri_desc *given_desc = generator->getCriDesc(Generator::DOWN); ex_cri_desc *returned_desc = NULL; ex_cri_desc *work_cri_desc = NULL; returned_desc = given_desc; // Setup local variables related to the work ATP unsigned short numWorkTupps = 0; unsigned short childDataTuppIndex = 0; unsigned short cnvChildDataTuppIndex = 0; numWorkTupps = 3; childDataTuppIndex = numWorkTupps - 1 ; numWorkTupps ++; cnvChildDataTuppIndex = numWorkTupps - 1; work_cri_desc = new (space) ex_cri_desc(numWorkTupps, space); ExpTupleDesc::TupleDataFormat childReqFormat = ExpTupleDesc::SQLMX_ALIGNED_FORMAT; ValueIdList childDataVids; ValueIdList cnvChildDataVids; const ValueIdList& childVals = fastExtract->getSelectList(); for (i = 0; i < childVals.entries(); i++) { ItemExpr &inputExpr = *(childVals[i].getItemExpr()); const NAType &formalType = childVals[i].getType(); ItemExpr *lmExpr = NULL; ItemExpr *lmExpr2 = NULL; int res; lmExpr = &inputExpr; //CreateCastExpr(inputExpr, *inputExpr.getValueId().getType().newCopy(), cmpContext); res = CreateAllCharsExpr(formalType, // [IN] Child output type *lmExpr, // [IN] Actual input value cmpContext, // [IN] Compilation context lmExpr2 // [OUT] Returned expression ); GenAssert(res == 0 && lmExpr != NULL, "Error building expression tree for LM child Input value"); lmExpr->bindNode(generator->getBindWA()); childDataVids.insert(lmExpr->getValueId()); if (lmExpr2) { lmExpr2->bindNode(generator->getBindWA()); cnvChildDataVids.insert(lmExpr2->getValueId()); } } // for (i = 0; i < childVals.entries(); i++) if (childDataVids.entries() > 0 && cnvChildDataVids.entries()>0) //-- convertedChildDataVids { exp_gen->generateContiguousMoveExpr ( childDataVids, //childDataVids// [IN] source ValueIds TRUE, // [IN] add convert nodes? workAtpNumber, // [IN] target atp number (0 or 1) childDataTuppIndex, // [IN] target tupp index childReqFormat, // [IN] target tuple data format childDataRowLen, // [OUT] target tuple length &childData_expr, // [OUT] move expression &childDataTupleDesc, // [optional OUT] target tuple desc ExpTupleDesc::LONG_FORMAT // [optional IN] target desc format ); exp_gen->processValIdList ( cnvChildDataVids, // [IN] ValueIdList ExpTupleDesc::SQLARK_EXPLODED_FORMAT, // [IN] tuple data format cnvChildDataRowLen, // [OUT] tuple length workAtpNumber, // [IN] atp number cnvChildDataTuppIndex, // [IN] index into atp &cnvChildDataTupleDesc, // [optional OUT] tuple desc ExpTupleDesc::LONG_FORMAT // [optional IN] tuple desc format ); } // // Add the tuple descriptor for request values to the work ATP // work_cri_desc->setTupleDescriptor(childDataTuppIndex, childDataTupleDesc); work_cri_desc->setTupleDescriptor(cnvChildDataTuppIndex, cnvChildDataTupleDesc); // We can now remove all appended map tables generator->removeAll(last_map_table); ComSInt32 maxrs = 0; UInt32 flags = 0; UInt16 numIoBuffers = (UInt16)(ActiveSchemaDB()->getDefaults()).getAsLong(FAST_EXTRACT_IO_BUFFERS); UInt16 ioTimeout = (UInt16)(ActiveSchemaDB()->getDefaults()).getAsLong(FAST_EXTRACT_IO_TIMEOUT_SEC); Int64 hdfsBufSize = (Int64)CmpCommon::getDefaultNumeric(HDFS_IO_BUFFERSIZE); hdfsBufSize = hdfsBufSize * 1024; // convert to bytes Int16 replication = (Int16)CmpCommon::getDefaultNumeric(HDFS_REPLICATION); // Create a TDB ComTdbFastExtract *tdb = new (space) ComTdbFastExtract ( flags, estimatedRowCount, targetName, hdfsHost, hdfsPort, hiveTableName, delimiter, header, nullString, recordSeparator, given_desc, returned_desc, work_cri_desc, downQueueMaxSize, upQueueMaxSize, (Lng32) numOutputBuffers, outputBufferSize, numIoBuffers, ioTimeout, input_expr, output_expr, requestRowLen, outputRowLen, childData_expr, childTdb, space, childDataTuppIndex, cnvChildDataTuppIndex, childDataRowLen, hdfsBufSize, replication ); tdb->setSequenceFile(isSequenceFile); tdb->setHdfsCompressed(CmpCommon::getDefaultNumeric(TRAF_UNLOAD_HDFS_COMPRESS)!=0); tdb->setSkipWritingToFiles(CmpCommon::getDefault(TRAF_UNLOAD_SKIP_WRITING_TO_FILES) == DF_ON); tdb->setBypassLibhdfs(CmpCommon::getDefault(TRAF_UNLOAD_BYPASS_LIBHDFS) == DF_ON); generator->initTdbFields(tdb); // Generate EXPLAIN info. if (!generator->explainDisabled()) { generator->setExplainTuple(relExpr.addExplainInfo(tdb, 0, 0, generator)); } // Tell the generator about our in/out rows and the new TDB generator->setCriDesc(given_desc, Generator::DOWN); generator->setCriDesc(returned_desc, Generator::UP); generator->setGenObj(&relExpr, tdb); // Return a TDB pointer to the caller newTdb = tdb; return 0; } // ft_codegen()
void ItemExprList::insertTree(ItemExpr *tree, OperatorTypeEnum backBoneType, NABoolean flattenSBQ, NABoolean flattenUDF) { if (tree->getOperatorType() == backBoneType) { for (Int32 i = 0; i < tree->getArity(); i++) { // Check for NULL list for right linear trees. That is, arity may be // two, but second child is NULL. ItemExpr *child = tree->child(i); if (child) insertTree(tree->child(i), backBoneType, flattenSBQ, flattenUDF); } } else if (tree->getOperatorType() == ITM_ONE_ROW) { Aggregate *agr = (Aggregate *)tree; if (agr->isOneRowTransformed_) { for (Int32 i = 0; i < tree->getArity(); i++) insertTree(tree->child(i), backBoneType, flattenSBQ, flattenUDF); } else { // do nothing, postpone this processing until OneRow transformation // is done } } else if ((flattenSBQ AND tree->isASubquery()) OR (flattenUDF AND (tree->getOperatorType() == ITM_USER_DEF_FUNCTION)) AND (NOT tree->nodeIsTransformed())) // Added the extra check for transformation above to avoid any issues // where we might flatten a subquery/MVF a second time around while // we deal with ValueIdProxies. // The ValueIdProxy->needToTransformChild() // flag should be sufficient, but it never hurts to be safe. { ValueIdList cols; NABoolean haveRDesc(FALSE); if (tree->isASubquery()) { // flatten the subquery select list RETDesc *retDesc = ((Subquery*)tree)->getSubquery()->getRETDesc(); if (retDesc) { retDesc->getColumnList()->getValueIdList(cols); if (cols.entries() > 1) { haveRDesc = TRUE; } } } else if (tree->getOperatorType() == ITM_USER_DEF_FUNCTION) { // flatten the UDF by adding the additional outputs to the tree const RoutineDesc *rDesc = ((UDFunction *)tree)->getRoutineDesc(); if (rDesc && rDesc->getOutputColumnList().entries() > 1) { cols = rDesc->getOutputColumnList(); haveRDesc = TRUE; } } if (haveRDesc == TRUE) { for (CollIndex i = 0; i < cols.entries(); i++) { ValueId proxyId; proxyId = cols[i]; // We create a ValueIdProxy for each element in the subquery's // select list or for each output parameter of a MVF. The first // one of these will be marked to be transformed. This allows // us to get the correct degree of statements containing MVFs or // subquery with degree > 1 at bind time. ValueIdProxy *proxyOutput = new (CmpCommon::statementHeap()) ValueIdProxy( tree->getValueId(), proxyId, i); proxyOutput->synthTypeAndValueId(); // Make sure we transform the subquery or MVF if (i == 0 ) proxyOutput->setTransformChild(TRUE); insert(proxyOutput); } } else insert(tree); // we are processing a valueId of a UDFunction // or subquery before we have bound it. Just insert // its valueId and we'll have to deal with it later.. } else insert(tree); }
static short ft_codegen(Generator *generator, RelExpr &relExpr, ComTdbFastExtract *&newTdb, Cardinality estimatedRowCount, char * targetName, char * hdfsHost, Int32 hdfsPort, char * hiveTableName, char * delimiter, char * header, char * nullString, char * recordSeparator, ULng32 downQueueMaxSize, ULng32 upQueueMaxSize, ULng32 outputBufferSize, ULng32 requestBufferSize, ULng32 replyBufferSize, ULng32 numOutputBuffers, ComTdb * childTdb, NABoolean isSequenceFile) { CmpContext *cmpContext = generator->currentCmpContext(); Space *space = generator->getSpace(); ExpGenerator *exp_gen = generator->getExpGenerator(); MapTable *map_table = generator->getMapTable(); MapTable *last_map_table = generator->getLastMapTable(); ex_expr *input_expr = NULL; ex_expr *output_expr = NULL; ex_expr * childData_expr = NULL ; ex_expr * cnvChildData_expr = NULL ; ULng32 i; ULng32 requestRowLen = 0; ULng32 outputRowLen = 0; ULng32 childDataRowLen = 0; ULng32 cnvChildDataRowLen = 0; ExpTupleDesc *requestTupleDesc = NULL; ExpTupleDesc *outputTupleDesc = NULL; ExpTupleDesc *childDataTupleDesc = NULL; ExpTupleDesc *cnvChildDataTupleDesc = NULL; newTdb = NULL; OperatorTypeEnum relExprType = relExpr.getOperatorType(); GenAssert(relExprType == REL_FAST_EXTRACT, "Unexpected RelExpr at FastExtract codegen") FastExtract * fastExtract = (FastExtract *) &relExpr; const Int32 workAtpNumber = 1; ex_cri_desc *given_desc = generator->getCriDesc(Generator::DOWN); ex_cri_desc *returned_desc = NULL; ex_cri_desc *work_cri_desc = NULL; returned_desc = given_desc; // Setup local variables related to the work ATP unsigned short numWorkTupps = 0; unsigned short childDataTuppIndex = 0; unsigned short cnvChildDataTuppIndex = 0; numWorkTupps = 3; childDataTuppIndex = numWorkTupps - 1 ; numWorkTupps ++; cnvChildDataTuppIndex = numWorkTupps - 1; work_cri_desc = new (space) ex_cri_desc(numWorkTupps, space); ExpTupleDesc::TupleDataFormat childReqFormat = ExpTupleDesc::SQLMX_ALIGNED_FORMAT; ValueIdList childDataVids; ValueIdList cnvChildDataVids; const ValueIdList& childVals = fastExtract->getSelectList(); const NATable *hiveNATable = NULL; const NAColumnArray *hiveNAColArray = NULL; // hiveInsertErrMode: // if 0, do not do error checks. // if 1, do error check and return error. // if 2, do error check and ignore row, if error // if 3, insert null if an error occurs Lng32 hiveInsertErrMode = 0; if ((fastExtract) && (fastExtract->isHiveInsert()) && (fastExtract->getHiveTableDesc()) && (fastExtract->getHiveTableDesc()->getNATable()) && ((hiveInsertErrMode = CmpCommon::getDefaultNumeric(HIVE_INSERT_ERROR_MODE)) > 0)) { hiveNATable = fastExtract->getHiveTableDesc()->getNATable(); hiveNAColArray = &hiveNATable->getNAColumnArray(); } for (i = 0; i < childVals.entries(); i++) { ItemExpr &inputExpr = *(childVals[i].getItemExpr()); const NAType &formalType = childVals[i].getType(); ItemExpr *lmExpr = NULL; ItemExpr *lmExpr2 = NULL; int res; lmExpr = &inputExpr; lmExpr = lmExpr->bindNode(generator->getBindWA()); if (!lmExpr || generator->getBindWA()->errStatus()) { GenAssert(0, "lmExpr->bindNode failed"); } // Hive insert converts child data into string format and inserts // it into target table. // If child type can into an error during conversion, then // add a Cast to convert from child type to target type before // converting to string format to be inserted. if (hiveNAColArray) { const NAColumn *hiveNACol = (*hiveNAColArray)[i]; const NAType *hiveNAType = hiveNACol->getType(); // if tgt type was a hive 'string', do not return a conversion err if ((lmExpr->getValueId().getType().errorsCanOccur(*hiveNAType)) && (NOT ((DFS2REC::isSQLVarChar(hiveNAType->getFSDatatype())) && (((SQLVarChar*)hiveNAType)->wasHiveString())))) { ItemExpr *newExpr = new(generator->wHeap()) Cast(lmExpr, hiveNAType); newExpr = newExpr->bindNode(generator->getBindWA()); if (!newExpr || generator->getBindWA()->errStatus()) { GenAssert(0, "newExpr->bindNode failed"); } if (hiveInsertErrMode == 3) ((Cast*)newExpr)->setConvertNullWhenError(TRUE); lmExpr = newExpr; } } res = CreateAllCharsExpr(formalType, // [IN] Child output type *lmExpr, // [IN] Actual input value cmpContext, // [IN] Compilation context lmExpr2 // [OUT] Returned expression ); GenAssert(res == 0 && lmExpr != NULL, "Error building expression tree for LM child Input value"); childDataVids.insert(lmExpr->getValueId()); if (lmExpr2) { lmExpr2->bindNode(generator->getBindWA()); cnvChildDataVids.insert(lmExpr2->getValueId()); } } // for (i = 0; i < childVals.entries(); i++) if (childDataVids.entries() > 0 && cnvChildDataVids.entries()>0) //-- convertedChildDataVids { UInt16 pcm = exp_gen->getPCodeMode(); if ((hiveNAColArray) && (hiveInsertErrMode == 3)) { // if error mode is 3 (mode null when error), disable pcode. // this feature is currently not being handled by pcode. // (added as part of JIRA 1920 in FileScan::codeGenForHive). exp_gen->setPCodeMode(ex_expr::PCODE_NONE); } exp_gen->generateContiguousMoveExpr ( childDataVids, //childDataVids// [IN] source ValueIds TRUE, // [IN] add convert nodes? workAtpNumber, // [IN] target atp number (0 or 1) childDataTuppIndex, // [IN] target tupp index childReqFormat, // [IN] target tuple data format childDataRowLen, // [OUT] target tuple length &childData_expr, // [OUT] move expression &childDataTupleDesc, // [optional OUT] target tuple desc ExpTupleDesc::LONG_FORMAT // [optional IN] target desc format ); exp_gen->setPCodeMode(pcm); exp_gen->processValIdList ( cnvChildDataVids, // [IN] ValueIdList ExpTupleDesc::SQLARK_EXPLODED_FORMAT, // [IN] tuple data format cnvChildDataRowLen, // [OUT] tuple length workAtpNumber, // [IN] atp number cnvChildDataTuppIndex, // [IN] index into atp &cnvChildDataTupleDesc, // [optional OUT] tuple desc ExpTupleDesc::LONG_FORMAT // [optional IN] tuple desc format ); } // // Add the tuple descriptor for request values to the work ATP // work_cri_desc->setTupleDescriptor(childDataTuppIndex, childDataTupleDesc); work_cri_desc->setTupleDescriptor(cnvChildDataTuppIndex, cnvChildDataTupleDesc); // We can now remove all appended map tables generator->removeAll(last_map_table); ComSInt32 maxrs = 0; UInt32 flags = 0; UInt16 numIoBuffers = (UInt16)(ActiveSchemaDB()->getDefaults()).getAsLong(FAST_EXTRACT_IO_BUFFERS); UInt16 ioTimeout = (UInt16)(ActiveSchemaDB()->getDefaults()).getAsLong(FAST_EXTRACT_IO_TIMEOUT_SEC); Int64 hdfsBufSize = (Int64)CmpCommon::getDefaultNumeric(HDFS_IO_BUFFERSIZE); hdfsBufSize = hdfsBufSize * 1024; // convert to bytes Int16 replication = (Int16)CmpCommon::getDefaultNumeric(HDFS_REPLICATION); // Create a TDB ComTdbFastExtract *tdb = new (space) ComTdbFastExtract ( flags, estimatedRowCount, targetName, hdfsHost, hdfsPort, hiveTableName, delimiter, header, nullString, recordSeparator, given_desc, returned_desc, work_cri_desc, downQueueMaxSize, upQueueMaxSize, (Lng32) numOutputBuffers, outputBufferSize, numIoBuffers, ioTimeout, input_expr, output_expr, requestRowLen, outputRowLen, childData_expr, childTdb, space, childDataTuppIndex, cnvChildDataTuppIndex, childDataRowLen, hdfsBufSize, replication ); tdb->setSequenceFile(isSequenceFile); tdb->setHdfsCompressed(CmpCommon::getDefaultNumeric(TRAF_UNLOAD_HDFS_COMPRESS)!=0); tdb->setSkipWritingToFiles(CmpCommon::getDefault(TRAF_UNLOAD_SKIP_WRITING_TO_FILES) == DF_ON); tdb->setBypassLibhdfs(CmpCommon::getDefault(TRAF_UNLOAD_BYPASS_LIBHDFS) == DF_ON); if ((hiveNAColArray) && (hiveInsertErrMode == 2)) { tdb->setContinueOnError(TRUE); } generator->initTdbFields(tdb); // Generate EXPLAIN info. if (!generator->explainDisabled()) { generator->setExplainTuple(relExpr.addExplainInfo(tdb, 0, 0, generator)); } // Tell the generator about our in/out rows and the new TDB generator->setCriDesc(given_desc, Generator::DOWN); generator->setCriDesc(returned_desc, Generator::UP); generator->setGenObj(&relExpr, tdb); // Return a TDB pointer to the caller newTdb = tdb; return 0; } // ft_codegen()
short ExpGenerator::buildKeyInfo(keyRangeGen ** keyInfo, // out -- generated object Generator * generator, const NAColumnArray & keyColumns, const ValueIdList & listOfKeyColumns, const ValueIdList & beginKeyPred, const ValueIdList & endKeyPred, const SearchKey * searchKey, const MdamKey * mdamKeyPtr, const NABoolean reverseScan, unsigned short keytag, const ExpTupleDesc::TupleDataFormat tf, // the next few parameters are here // as part of a horrible kludge for // the PartitionAccess::codeGen() // method, which lacks a SearchKey // object and therefore exposes // things like the exclusion // expressions; with luck, later work // in the Optimizer will result in a // much cleaner interface const NABoolean useTheHorribleKludge, ItemExpr * beginKeyExclusionExpr, ItemExpr * endKeyExclusionExpr, ex_expr_lean ** unique_key_expr, ULng32 *uniqueKeyLen, NABoolean doKeyEncodeOpt, Lng32 * firstKeyColOffset, Int32 in_key_atp_index ) { Space * space = generator->getSpace(); const Int32 work_atp = 1; const Int32 key_atp_index = (in_key_atp_index <= 0 ? 2 : in_key_atp_index); const Int32 exclude_flag_atp_index = 3; const Int32 data_conv_error_atp_index = 4; const Int32 key_column_atp_index = 5; // used only for Mdam const Int32 key_column2_atp_index = 6; // used only for Mdam MDAM_BETWEEN pred; // code in BiLogic::mdamPredGenSubrange // and MdamColumn::buildDisjunct // requires this to be 1 more than // key_column_atp_index ULng32 keyLen; // add an entry to the map table for work Atp MapTable *keyBufferPartMapTable = generator->appendAtEnd(); // generate a temporary variable, which will be used for handling // data conversion errors during key building ValueIdList temp_varb_list; ItemExpr * dataConversionErrorFlag = new(generator->wHeap()) HostVar("_sys_dataConversionErrorFlag", new(generator->wHeap()) SQLInt(TRUE,FALSE), // int not null TRUE); ULng32 temp_varb_tupp_len; dataConversionErrorFlag->bindNode(generator->getBindWA()); temp_varb_list.insert(dataConversionErrorFlag->getValueId()); processValIdList(temp_varb_list, ExpTupleDesc::SQLARK_EXPLODED_FORMAT, temp_varb_tupp_len, // out work_atp, data_conv_error_atp_index); NABoolean doEquiKeyPredOpt = FALSE; #ifdef _DEBUG if (getenv("DO_EQUI_KEY_PRED_OPT")) doEquiKeyPredOpt = (searchKey ? searchKey->areAllChosenPredsEqualPreds() : FALSE); #endif if (mdamKeyPtr == NULL) { // check to see if there is a begin key expression; if there // isn't, don't generate a key object if (beginKeyPred.entries() == 0) *keyInfo = 0; else { // For subset and range operators, generate the begin key // expression, end key expression, begin key exclusion expression // and end key exclusion expression. For unique operators, // generate only the begin key exppression. ex_expr *bk_expr = 0; ex_expr *ek_expr = 0; ex_expr *bk_excluded_expr = 0; ex_expr *ek_excluded_expr = 0; short bkey_excluded = 0; short ekey_excluded = 0; generateKeyExpr(keyColumns, beginKeyPred, work_atp, key_atp_index, dataConversionErrorFlag, tf, keyLen, // out &bk_expr, // out doKeyEncodeOpt, firstKeyColOffset, doEquiKeyPredOpt); if (&endKeyPred) generateKeyExpr(keyColumns, endKeyPred, work_atp, key_atp_index, dataConversionErrorFlag, tf, keyLen, // out -- should be the same as above &ek_expr, // out doKeyEncodeOpt, firstKeyColOffset, doEquiKeyPredOpt); if (reverseScan) { // reverse scan - swap the begin and end key predicates // Note: evidently, the Optimizer has already switched // the key predicates in this case, so what we are // really doing is switching them back. ex_expr *temp = bk_expr; bk_expr = ek_expr; ek_expr = temp; } if (searchKey) { generateExclusionExpr(searchKey->getBeginKeyExclusionExpr(), work_atp, exclude_flag_atp_index, &bk_excluded_expr); // out bkey_excluded = (short) searchKey->isBeginKeyExclusive(); generateExclusionExpr(searchKey->getEndKeyExclusionExpr(), work_atp, exclude_flag_atp_index, &ek_excluded_expr); // out ekey_excluded = (short) searchKey->isEndKeyExclusive(); if (reverseScan) { NABoolean x = bkey_excluded; bkey_excluded = ekey_excluded; #pragma nowarn(1506) // warning elimination ekey_excluded = x; #pragma warn(1506) // warning elimination ex_expr* temp = bk_excluded_expr; bk_excluded_expr = ek_excluded_expr; bk_excluded_expr = temp; } } // if searchKey else if (useTheHorribleKludge) { generateExclusionExpr(beginKeyExclusionExpr, work_atp, exclude_flag_atp_index, &bk_excluded_expr); // out generateExclusionExpr(endKeyExclusionExpr, work_atp, exclude_flag_atp_index, &ek_excluded_expr); // out // note that the old PartitionAccess::codeGen() code didn't // set values for bkey_excluded and ekey_excluded, so the // safest choice is to choose inclusion, i.e. let the flags // retain their initial value of 0. } // Build key info if (keytag > 0) keyLen += sizeof(short); if ((unique_key_expr == NULL) || (NOT generator->genLeanExpr())) { // the work cri desc is used to build key values (entry 2) and // to compute the exclusion flag (entry 3) to monitor for data // conversion errors (entry 4) and to compute values on a column // basis (entry 5 - Mdam only) ex_cri_desc * work_cri_desc = new(space) ex_cri_desc(6, space); *keyInfo = new(space) keySingleSubsetGen( keyLen, work_cri_desc, key_atp_index, exclude_flag_atp_index, data_conv_error_atp_index, bk_expr, ek_expr, bk_excluded_expr, ek_excluded_expr, // static exclude flags (if exprs are NULL) bkey_excluded, ekey_excluded); if (unique_key_expr) *unique_key_expr = NULL; } else { if (keyInfo) *keyInfo = NULL; *unique_key_expr = (ex_expr_lean*)bk_expr; *uniqueKeyLen = keyLen; } } } // end of non-mdam case else // Mdam case { // the work cri desc is used to build key values (entry 2) and // to compute the exclusion flag (entry 3) to monitor for data // conversion errors (entry 4) and to compute values on a column // basis (entry 5 - Mdam only, and entry 6 - Mdam only, and only // for MDAM_BETWEEN predtype) ex_cri_desc * work_cri_desc = new(space) ex_cri_desc(7, space); // compute the format of the key buffer -- We need this // so that Mdam will know, for each column, where in the buffer // to move a value and how many bytes that value takes. The // next few lines of code result in this information being stored // in the attrs array. // Some words on the technique: We create expressions whose // result datatype matches the key buffer datatypes for each key // column. Then we use the datatypes of these expressions to // compute buffer format. The expressions themselves are not // used any further; they do not result in compiled expressions // in the plan. At run time we use string moves to move key // values instead. const CollIndex keyCount = listOfKeyColumns.entries(); CollIndex i; // assert at least one column GenAssert(keyCount > 0,"MDAM: at least one key column required."); Attributes ** attrs = new(generator->wHeap()) Attributes * [keyCount]; for (i = 0; i < keyCount; i++) { ItemExpr * col_node = listOfKeyColumns[i].getItemExpr(); ItemExpr *enode = col_node; if ((tf == ExpTupleDesc::SQLMX_KEY_FORMAT) && (enode->getValueId().getType().getVarLenHdrSize() > 0)) { // varchar keys in SQL/MP tables are converted to // fixed length chars in key buffers const CharType& char_type = (CharType&)(enode->getValueId().getType()); if (!CollationInfo::isSystemCollation(char_type.getCollation())) { enode = new(generator->wHeap()) Cast(enode, (new (generator->wHeap()) SQLChar( CharLenInfo(char_type.getStrCharLimit(), char_type.getDataStorageSize()), char_type.supportsSQLnull(), FALSE, FALSE, FALSE, char_type.getCharSet(), char_type.getCollation(), char_type.getCoercibility()))); } } NABoolean desc_flag; if (keyColumns.isAscending(i)) desc_flag = reverseScan; else desc_flag = !reverseScan; #pragma nowarn(1506) // warning elimination enode = new(generator->wHeap()) CompEncode(enode,desc_flag); #pragma warn(1506) // warning elimination enode->bindNode(generator->getBindWA()); attrs[i] = (generator-> addMapInfoToThis(keyBufferPartMapTable, enode->getValueId(), 0))->getAttr(); } // for, over keyCount // Compute offsets, lengths, etc. and assign them to the right // atp and atp index processAttributes((ULng32)keyCount, attrs, tf, keyLen, work_atp, key_atp_index); // Now we have key column offsets and lengths stored in attrs. // Next, for each column, generate expressions to compute hi, // lo, non-null hi and non-null lo values, and create // MdamColumnGen structures. // Notes: In the Mdam network itself, all key values are // encoded. Hence, we generate CompEncode nodes in all of the // expressions, regardless of tuple format. In the Simulator // case, we must at run-time decode the encoded values when // moving them to the key buffer. $$$ We need an expression to // do this. This decoding work has not yet been done, so the // simulator only works correctly for columns that happen to be // correctly aligned and whose encoding function does not change // the value. $$$ MdamColumnGen * first = 0; MdamColumnGen * last = 0; LIST(NAType *) keyTypeList(generator->wHeap());//to keep the type of the keys for later for (i = 0; i < keyCount; i++) { // generate expressions to compute hi, lo, non-null hi, non-null lo NAType * targetType = (keyColumns[i]->getType())->newCopy(generator->wHeap()); // Genesis case 10-971031-9814 fix: desc_flag must take into account // both the ASC/DESC attribute of the key column and the reverseScan // attribute. Before this fix, it only took into account the first of // these. NABoolean desc_flag; if (keyColumns.isAscending(i)) desc_flag = reverseScan; else desc_flag = !reverseScan; // End Genesis case 10-971031-9814 fix. if ((tf == ExpTupleDesc::SQLMX_KEY_FORMAT) && (targetType->getVarLenHdrSize() > 0)) { // 5/9/98: add support for VARNCHAR const CharType* char_type = (CharType*)(targetType); if (!CollationInfo::isSystemCollation(char_type->getCollation())) { targetType = new(generator->wHeap()) SQLChar( CharLenInfo(char_type->getStrCharLimit(), char_type->getDataStorageSize()), char_type -> supportsSQLnull(), FALSE, FALSE, FALSE, char_type -> getCharSet(), char_type -> getCollation(), char_type -> getCoercibility()); /* targetType->getNominalSize(), targetType->supportsSQLnull() */ } } keyTypeList.insert(targetType); // save in ith position for later // don't need to make copy of targetType in next call ItemExpr * lo = new(generator->wHeap()) ConstValue(targetType, !desc_flag, TRUE /* allow NULL */); #pragma nowarn(1506) // warning elimination lo = new(generator->wHeap()) CompEncode(lo,desc_flag); #pragma warn(1506) // warning elimination lo->bindNode(generator->getBindWA()); ValueIdList loList; loList.insert(lo->getValueId()); ex_expr *loExpr = 0; ULng32 dataLen = 0; generateContiguousMoveExpr(loList, 0, // don't add convert nodes work_atp, key_column_atp_index, tf, dataLen, &loExpr); ItemExpr * hi = new(generator->wHeap()) ConstValue(targetType->newCopy(generator->wHeap()), desc_flag, TRUE /* allow NULL */); #pragma nowarn(1506) // warning elimination hi = new(generator->wHeap()) CompEncode(hi,desc_flag); #pragma warn(1506) // warning elimination hi->bindNode(generator->getBindWA()); ValueIdList hiList; hiList.insert(hi->getValueId()); ex_expr *hiExpr = 0; generateContiguousMoveExpr(hiList, 0, // don't add convert nodes work_atp, key_column_atp_index, tf, dataLen, &hiExpr); ex_expr *nonNullLoExpr = loExpr; ex_expr *nonNullHiExpr = hiExpr; if (targetType->supportsSQLnull()) { if (desc_flag) { ItemExpr * nonNullLo = new(generator->wHeap()) ConstValue(targetType->newCopy(generator->wHeap()), !desc_flag, FALSE /* don't allow NULL */); #pragma nowarn(1506) // warning elimination nonNullLo = new(generator->wHeap()) CompEncode(nonNullLo,desc_flag); #pragma warn(1506) // warning elimination nonNullLo->bindNode(generator->getBindWA()); ValueIdList nonNullLoList; nonNullLoList.insert(nonNullLo->getValueId()); nonNullLoExpr = 0; // so we will get an expression back generateContiguousMoveExpr(nonNullLoList, 0, // don't add convert nodes work_atp, key_column_atp_index, tf, dataLen, &nonNullLoExpr); } else { ItemExpr * nonNullHi = new(generator->wHeap()) ConstValue(targetType->newCopy(generator->wHeap()), desc_flag, FALSE /* don't allow NULL */); #pragma nowarn(1506) // warning elimination nonNullHi = new(generator->wHeap()) CompEncode(nonNullHi,desc_flag); #pragma warn(1506) // warning elimination nonNullHi->bindNode(generator->getBindWA()); ValueIdList nonNullHiList; nonNullHiList.insert(nonNullHi->getValueId()); nonNullHiExpr = 0; // so we will get an expression back generateContiguousMoveExpr(nonNullHiList, 0, // don't add convert nodes work_atp, key_column_atp_index, tf, dataLen, &nonNullHiExpr); } } NABoolean useSparseProbes = mdamKeyPtr->isColumnSparse(i); // calculate offset to the beginning of the column value // (including the null indicator and the varchar length // indicator if present) ULng32 column_offset = attrs[i]->getOffset(); if (attrs[i]->getNullFlag()) column_offset = attrs[i]->getNullIndOffset(); else if (attrs[i]->getVCIndicatorLength() > 0) column_offset = attrs[i]->getVCLenIndOffset(); last = new(space) MdamColumnGen(last, dataLen, column_offset, useSparseProbes, loExpr, hiExpr, nonNullLoExpr, nonNullHiExpr); if (first == 0) first = last; } // for over keyCount // generate MdamPred's and attach to MdamColumnGen's const ColumnOrderListPtrArray &columnOrderListPtrArray = mdamKeyPtr->getColumnOrderListPtrArray(); #ifdef _DEBUG // Debug print stataments below depend on this // variable: char *ev = getenv("MDAM_PRINT"); const NABoolean mdamPrintOn = (ev != NULL AND strcmp(ev,"ON")==0); #endif #ifdef _DEBUG if (mdamPrintOn) { fprintf(stdout, "\n\n***Generating the MDAM key for table with index" " columns: "); listOfKeyColumns.display(); } #endif for (CollIndex n = 0; n < columnOrderListPtrArray.entries(); n++) { // get the list of key predicates associated with the n disjunct: const ColumnOrderList &columnOrderList = *columnOrderListPtrArray[n]; #ifdef _DEBUG if (mdamPrintOn) { fprintf(stdout,"\nDisjunct[%d]:----------------\n",n); columnOrderList.print(); } #endif MdamColumnGen * cc = first; CMPASSERT(keyCount == columnOrderList.entries()); const ValueIdSet *predsPtr = NULL; for (i = 0; i < keyCount; i++) { #ifdef _DEBUG if (mdamPrintOn) { fprintf(stdout, "Column(%d) using: ", i); if ( mdamKeyPtr->isColumnSparse(i) ) fprintf(stdout,"SPARSE probes\n"); else fprintf(stdout, "DENSE probes\n"); } #endif // get predicates for column order i: predsPtr = columnOrderList[i]; NAType * keyType = keyTypeList[i]; NABoolean descending; if (keyColumns.isAscending(i)) descending = reverseScan; else descending = !reverseScan; ValueId keyColumn = listOfKeyColumns[i]; MdamCodeGenHelper mdamHelper( n, keyType, descending, work_atp, key_column_atp_index, tf, dataConversionErrorFlag, keyColumn); MdamPred * lastPred = cc->getLastPred(); if (predsPtr != NULL) { for (ValueId predId = predsPtr->init(); predsPtr->next(predId); predsPtr->advance(predId)) { MdamPred * head = 0; // head of generated MdamPred's MdamPred * tail = 0; ItemExpr * orGroup = predId.getItemExpr(); orGroup->mdamPredGen(generator,&head,&tail,mdamHelper,NULL); if (lastPred) { if ( CmpCommon::getDefault(RANGESPEC_TRANSFORMATION) == DF_ON ) { MdamPred* curr = lastPred; while(curr->getNext() != NULL) curr=curr->getNext(); curr->setNext(head); } else lastPred->setNext(head); } cc->setLastPred(tail); lastPred = tail; //@ZXmdam if 1st pred has head != tail, head is lost } // for over preds } // if (predsPtr != NULL) cc = cc->getNext(); } // for every order... } // for every column order list in the array (of disjuncts) // build the Mdam key info if (keytag > 0) keyLen += sizeof(short); *keyInfo = new(space) keyMdamGen(keyLen, work_cri_desc, key_atp_index, exclude_flag_atp_index, data_conv_error_atp_index, key_column_atp_index, first, last, reverseScan, generator->wHeap()); } // end of mdam case if (*keyInfo) (*keyInfo)->setKeytag(keytag); // reset map table to forget about the key object's work Atp // aside: this logic is more bloody than it should be because the // map table implementation doesn't accurately reflect the map table // abstraction generator->removeAll(keyBufferPartMapTable); // deletes anything that might have been // added after keyBufferPartMapTable (at // this writing we don't expect there to // be anything, but we want to be safe) // at this point keyBufferPartMapTable should be the last map table in the // global map table chain generator->removeLast(); // unlinks keyBufferPartMapTable and deletes it return 0; };