//---------------------------------------------------------------------------- // Check if the query for this MAV is without a GROUP BY clause. // The RelExpr tree for a query with no GROUP BY clause, does not have // a GroupBy node in it. void MavRelRootBuilder::checkForMavWithoutGroupBy(RelExpr *mvSelectTree) { // Start from the top of the RelExpe tree. RelExpr *node = mvSelectTree; // Look for a GroupBy node. while (node->child(0) != NULL && node->getOperatorType() != REL_GROUPBY) node = node->child(0); if (node->getOperatorType() != REL_GROUPBY) isMavWithoutGroupBy_ = TRUE; else { // Check if the child of our child is another GroupBy node. // This can only be the result of multi-delta optimization on a // no-groupby MAV. if ( (node->child(0)->getOperatorType() == REL_ROOT) && (node->child(0)->child(0)->getOperatorType() == REL_GROUPBY) ) { // Found another GroupBy node below. // Now verify that the top one has no grouping columns. GroupByAgg *groupByNode = (GroupByAgg *)node; if (groupByNode->getGroupExprTree() == NULL) isMavWithoutGroupBy_ = TRUE; } } } // MavRelRootBuilder::checkForMavWithoutGroupBy()
static short ft_codegen(Generator *generator, RelExpr &relExpr, ComTdbFastExtract *&newTdb, Cardinality estimatedRowCount, char * targetName, char * hdfsHost, Int32 hdfsPort, char * hiveTableName, char * delimiter, char * header, char * nullString, char * recordSeparator, ULng32 downQueueMaxSize, ULng32 upQueueMaxSize, ULng32 outputBufferSize, ULng32 requestBufferSize, ULng32 replyBufferSize, ULng32 numOutputBuffers, ComTdb * childTdb, NABoolean isSequenceFile) { CmpContext *cmpContext = generator->currentCmpContext(); Space *space = generator->getSpace(); ExpGenerator *exp_gen = generator->getExpGenerator(); MapTable *map_table = generator->getMapTable(); MapTable *last_map_table = generator->getLastMapTable(); ex_expr *input_expr = NULL; ex_expr *output_expr = NULL; ex_expr * childData_expr = NULL ; ex_expr * cnvChildData_expr = NULL ; ULng32 i; ULng32 requestRowLen = 0; ULng32 outputRowLen = 0; ULng32 childDataRowLen = 0; ULng32 cnvChildDataRowLen = 0; ExpTupleDesc *requestTupleDesc = NULL; ExpTupleDesc *outputTupleDesc = NULL; ExpTupleDesc *childDataTupleDesc = NULL; ExpTupleDesc *cnvChildDataTupleDesc = NULL; newTdb = NULL; OperatorTypeEnum relExprType = relExpr.getOperatorType(); GenAssert(relExprType == REL_FAST_EXTRACT, "Unexpected RelExpr at FastExtract codegen") FastExtract * fastExtract = (FastExtract *) &relExpr; const Int32 workAtpNumber = 1; ex_cri_desc *given_desc = generator->getCriDesc(Generator::DOWN); ex_cri_desc *returned_desc = NULL; ex_cri_desc *work_cri_desc = NULL; returned_desc = given_desc; // Setup local variables related to the work ATP unsigned short numWorkTupps = 0; unsigned short childDataTuppIndex = 0; unsigned short cnvChildDataTuppIndex = 0; numWorkTupps = 3; childDataTuppIndex = numWorkTupps - 1 ; numWorkTupps ++; cnvChildDataTuppIndex = numWorkTupps - 1; work_cri_desc = new (space) ex_cri_desc(numWorkTupps, space); ExpTupleDesc::TupleDataFormat childReqFormat = ExpTupleDesc::SQLMX_ALIGNED_FORMAT; ValueIdList childDataVids; ValueIdList cnvChildDataVids; const ValueIdList& childVals = fastExtract->getSelectList(); for (i = 0; i < childVals.entries(); i++) { ItemExpr &inputExpr = *(childVals[i].getItemExpr()); const NAType &formalType = childVals[i].getType(); ItemExpr *lmExpr = NULL; ItemExpr *lmExpr2 = NULL; int res; lmExpr = &inputExpr; //CreateCastExpr(inputExpr, *inputExpr.getValueId().getType().newCopy(), cmpContext); res = CreateAllCharsExpr(formalType, // [IN] Child output type *lmExpr, // [IN] Actual input value cmpContext, // [IN] Compilation context lmExpr2 // [OUT] Returned expression ); GenAssert(res == 0 && lmExpr != NULL, "Error building expression tree for LM child Input value"); lmExpr->bindNode(generator->getBindWA()); childDataVids.insert(lmExpr->getValueId()); if (lmExpr2) { lmExpr2->bindNode(generator->getBindWA()); cnvChildDataVids.insert(lmExpr2->getValueId()); } } // for (i = 0; i < childVals.entries(); i++) if (childDataVids.entries() > 0 && cnvChildDataVids.entries()>0) //-- convertedChildDataVids { exp_gen->generateContiguousMoveExpr ( childDataVids, //childDataVids// [IN] source ValueIds TRUE, // [IN] add convert nodes? workAtpNumber, // [IN] target atp number (0 or 1) childDataTuppIndex, // [IN] target tupp index childReqFormat, // [IN] target tuple data format childDataRowLen, // [OUT] target tuple length &childData_expr, // [OUT] move expression &childDataTupleDesc, // [optional OUT] target tuple desc ExpTupleDesc::LONG_FORMAT // [optional IN] target desc format ); exp_gen->processValIdList ( cnvChildDataVids, // [IN] ValueIdList ExpTupleDesc::SQLARK_EXPLODED_FORMAT, // [IN] tuple data format cnvChildDataRowLen, // [OUT] tuple length workAtpNumber, // [IN] atp number cnvChildDataTuppIndex, // [IN] index into atp &cnvChildDataTupleDesc, // [optional OUT] tuple desc ExpTupleDesc::LONG_FORMAT // [optional IN] tuple desc format ); } // // Add the tuple descriptor for request values to the work ATP // work_cri_desc->setTupleDescriptor(childDataTuppIndex, childDataTupleDesc); work_cri_desc->setTupleDescriptor(cnvChildDataTuppIndex, cnvChildDataTupleDesc); // We can now remove all appended map tables generator->removeAll(last_map_table); ComSInt32 maxrs = 0; UInt32 flags = 0; UInt16 numIoBuffers = (UInt16)(ActiveSchemaDB()->getDefaults()).getAsLong(FAST_EXTRACT_IO_BUFFERS); UInt16 ioTimeout = (UInt16)(ActiveSchemaDB()->getDefaults()).getAsLong(FAST_EXTRACT_IO_TIMEOUT_SEC); Int64 hdfsBufSize = (Int64)CmpCommon::getDefaultNumeric(HDFS_IO_BUFFERSIZE); hdfsBufSize = hdfsBufSize * 1024; // convert to bytes Int16 replication = (Int16)CmpCommon::getDefaultNumeric(HDFS_REPLICATION); // Create a TDB ComTdbFastExtract *tdb = new (space) ComTdbFastExtract ( flags, estimatedRowCount, targetName, hdfsHost, hdfsPort, hiveTableName, delimiter, header, nullString, recordSeparator, given_desc, returned_desc, work_cri_desc, downQueueMaxSize, upQueueMaxSize, (Lng32) numOutputBuffers, outputBufferSize, numIoBuffers, ioTimeout, input_expr, output_expr, requestRowLen, outputRowLen, childData_expr, childTdb, space, childDataTuppIndex, cnvChildDataTuppIndex, childDataRowLen, hdfsBufSize, replication ); tdb->setSequenceFile(isSequenceFile); tdb->setHdfsCompressed(CmpCommon::getDefaultNumeric(TRAF_UNLOAD_HDFS_COMPRESS)!=0); tdb->setSkipWritingToFiles(CmpCommon::getDefault(TRAF_UNLOAD_SKIP_WRITING_TO_FILES) == DF_ON); tdb->setBypassLibhdfs(CmpCommon::getDefault(TRAF_UNLOAD_BYPASS_LIBHDFS) == DF_ON); generator->initTdbFields(tdb); // Generate EXPLAIN info. if (!generator->explainDisabled()) { generator->setExplainTuple(relExpr.addExplainInfo(tdb, 0, 0, generator)); } // Tell the generator about our in/out rows and the new TDB generator->setCriDesc(given_desc, Generator::DOWN); generator->setCriDesc(returned_desc, Generator::UP); generator->setGenObj(&relExpr, tdb); // Return a TDB pointer to the caller newTdb = tdb; return 0; } // ft_codegen()
static short ft_codegen(Generator *generator, RelExpr &relExpr, ComTdbFastExtract *&newTdb, Cardinality estimatedRowCount, char * targetName, char * hdfsHost, Int32 hdfsPort, char * hiveTableName, char * delimiter, char * header, char * nullString, char * recordSeparator, ULng32 downQueueMaxSize, ULng32 upQueueMaxSize, ULng32 outputBufferSize, ULng32 requestBufferSize, ULng32 replyBufferSize, ULng32 numOutputBuffers, ComTdb * childTdb, NABoolean isSequenceFile) { CmpContext *cmpContext = generator->currentCmpContext(); Space *space = generator->getSpace(); ExpGenerator *exp_gen = generator->getExpGenerator(); MapTable *map_table = generator->getMapTable(); MapTable *last_map_table = generator->getLastMapTable(); ex_expr *input_expr = NULL; ex_expr *output_expr = NULL; ex_expr * childData_expr = NULL ; ex_expr * cnvChildData_expr = NULL ; ULng32 i; ULng32 requestRowLen = 0; ULng32 outputRowLen = 0; ULng32 childDataRowLen = 0; ULng32 cnvChildDataRowLen = 0; ExpTupleDesc *requestTupleDesc = NULL; ExpTupleDesc *outputTupleDesc = NULL; ExpTupleDesc *childDataTupleDesc = NULL; ExpTupleDesc *cnvChildDataTupleDesc = NULL; newTdb = NULL; OperatorTypeEnum relExprType = relExpr.getOperatorType(); GenAssert(relExprType == REL_FAST_EXTRACT, "Unexpected RelExpr at FastExtract codegen") FastExtract * fastExtract = (FastExtract *) &relExpr; const Int32 workAtpNumber = 1; ex_cri_desc *given_desc = generator->getCriDesc(Generator::DOWN); ex_cri_desc *returned_desc = NULL; ex_cri_desc *work_cri_desc = NULL; returned_desc = given_desc; // Setup local variables related to the work ATP unsigned short numWorkTupps = 0; unsigned short childDataTuppIndex = 0; unsigned short cnvChildDataTuppIndex = 0; numWorkTupps = 3; childDataTuppIndex = numWorkTupps - 1 ; numWorkTupps ++; cnvChildDataTuppIndex = numWorkTupps - 1; work_cri_desc = new (space) ex_cri_desc(numWorkTupps, space); ExpTupleDesc::TupleDataFormat childReqFormat = ExpTupleDesc::SQLMX_ALIGNED_FORMAT; ValueIdList childDataVids; ValueIdList cnvChildDataVids; const ValueIdList& childVals = fastExtract->getSelectList(); const NATable *hiveNATable = NULL; const NAColumnArray *hiveNAColArray = NULL; // hiveInsertErrMode: // if 0, do not do error checks. // if 1, do error check and return error. // if 2, do error check and ignore row, if error // if 3, insert null if an error occurs Lng32 hiveInsertErrMode = 0; if ((fastExtract) && (fastExtract->isHiveInsert()) && (fastExtract->getHiveTableDesc()) && (fastExtract->getHiveTableDesc()->getNATable()) && ((hiveInsertErrMode = CmpCommon::getDefaultNumeric(HIVE_INSERT_ERROR_MODE)) > 0)) { hiveNATable = fastExtract->getHiveTableDesc()->getNATable(); hiveNAColArray = &hiveNATable->getNAColumnArray(); } for (i = 0; i < childVals.entries(); i++) { ItemExpr &inputExpr = *(childVals[i].getItemExpr()); const NAType &formalType = childVals[i].getType(); ItemExpr *lmExpr = NULL; ItemExpr *lmExpr2 = NULL; int res; lmExpr = &inputExpr; lmExpr = lmExpr->bindNode(generator->getBindWA()); if (!lmExpr || generator->getBindWA()->errStatus()) { GenAssert(0, "lmExpr->bindNode failed"); } // Hive insert converts child data into string format and inserts // it into target table. // If child type can into an error during conversion, then // add a Cast to convert from child type to target type before // converting to string format to be inserted. if (hiveNAColArray) { const NAColumn *hiveNACol = (*hiveNAColArray)[i]; const NAType *hiveNAType = hiveNACol->getType(); // if tgt type was a hive 'string', do not return a conversion err if ((lmExpr->getValueId().getType().errorsCanOccur(*hiveNAType)) && (NOT ((DFS2REC::isSQLVarChar(hiveNAType->getFSDatatype())) && (((SQLVarChar*)hiveNAType)->wasHiveString())))) { ItemExpr *newExpr = new(generator->wHeap()) Cast(lmExpr, hiveNAType); newExpr = newExpr->bindNode(generator->getBindWA()); if (!newExpr || generator->getBindWA()->errStatus()) { GenAssert(0, "newExpr->bindNode failed"); } if (hiveInsertErrMode == 3) ((Cast*)newExpr)->setConvertNullWhenError(TRUE); lmExpr = newExpr; } } res = CreateAllCharsExpr(formalType, // [IN] Child output type *lmExpr, // [IN] Actual input value cmpContext, // [IN] Compilation context lmExpr2 // [OUT] Returned expression ); GenAssert(res == 0 && lmExpr != NULL, "Error building expression tree for LM child Input value"); childDataVids.insert(lmExpr->getValueId()); if (lmExpr2) { lmExpr2->bindNode(generator->getBindWA()); cnvChildDataVids.insert(lmExpr2->getValueId()); } } // for (i = 0; i < childVals.entries(); i++) if (childDataVids.entries() > 0 && cnvChildDataVids.entries()>0) //-- convertedChildDataVids { UInt16 pcm = exp_gen->getPCodeMode(); if ((hiveNAColArray) && (hiveInsertErrMode == 3)) { // if error mode is 3 (mode null when error), disable pcode. // this feature is currently not being handled by pcode. // (added as part of JIRA 1920 in FileScan::codeGenForHive). exp_gen->setPCodeMode(ex_expr::PCODE_NONE); } exp_gen->generateContiguousMoveExpr ( childDataVids, //childDataVids// [IN] source ValueIds TRUE, // [IN] add convert nodes? workAtpNumber, // [IN] target atp number (0 or 1) childDataTuppIndex, // [IN] target tupp index childReqFormat, // [IN] target tuple data format childDataRowLen, // [OUT] target tuple length &childData_expr, // [OUT] move expression &childDataTupleDesc, // [optional OUT] target tuple desc ExpTupleDesc::LONG_FORMAT // [optional IN] target desc format ); exp_gen->setPCodeMode(pcm); exp_gen->processValIdList ( cnvChildDataVids, // [IN] ValueIdList ExpTupleDesc::SQLARK_EXPLODED_FORMAT, // [IN] tuple data format cnvChildDataRowLen, // [OUT] tuple length workAtpNumber, // [IN] atp number cnvChildDataTuppIndex, // [IN] index into atp &cnvChildDataTupleDesc, // [optional OUT] tuple desc ExpTupleDesc::LONG_FORMAT // [optional IN] tuple desc format ); } // // Add the tuple descriptor for request values to the work ATP // work_cri_desc->setTupleDescriptor(childDataTuppIndex, childDataTupleDesc); work_cri_desc->setTupleDescriptor(cnvChildDataTuppIndex, cnvChildDataTupleDesc); // We can now remove all appended map tables generator->removeAll(last_map_table); ComSInt32 maxrs = 0; UInt32 flags = 0; UInt16 numIoBuffers = (UInt16)(ActiveSchemaDB()->getDefaults()).getAsLong(FAST_EXTRACT_IO_BUFFERS); UInt16 ioTimeout = (UInt16)(ActiveSchemaDB()->getDefaults()).getAsLong(FAST_EXTRACT_IO_TIMEOUT_SEC); Int64 hdfsBufSize = (Int64)CmpCommon::getDefaultNumeric(HDFS_IO_BUFFERSIZE); hdfsBufSize = hdfsBufSize * 1024; // convert to bytes Int16 replication = (Int16)CmpCommon::getDefaultNumeric(HDFS_REPLICATION); // Create a TDB ComTdbFastExtract *tdb = new (space) ComTdbFastExtract ( flags, estimatedRowCount, targetName, hdfsHost, hdfsPort, hiveTableName, delimiter, header, nullString, recordSeparator, given_desc, returned_desc, work_cri_desc, downQueueMaxSize, upQueueMaxSize, (Lng32) numOutputBuffers, outputBufferSize, numIoBuffers, ioTimeout, input_expr, output_expr, requestRowLen, outputRowLen, childData_expr, childTdb, space, childDataTuppIndex, cnvChildDataTuppIndex, childDataRowLen, hdfsBufSize, replication ); tdb->setSequenceFile(isSequenceFile); tdb->setHdfsCompressed(CmpCommon::getDefaultNumeric(TRAF_UNLOAD_HDFS_COMPRESS)!=0); tdb->setSkipWritingToFiles(CmpCommon::getDefault(TRAF_UNLOAD_SKIP_WRITING_TO_FILES) == DF_ON); tdb->setBypassLibhdfs(CmpCommon::getDefault(TRAF_UNLOAD_BYPASS_LIBHDFS) == DF_ON); if ((hiveNAColArray) && (hiveInsertErrMode == 2)) { tdb->setContinueOnError(TRUE); } generator->initTdbFields(tdb); // Generate EXPLAIN info. if (!generator->explainDisabled()) { generator->setExplainTuple(relExpr.addExplainInfo(tdb, 0, 0, generator)); } // Tell the generator about our in/out rows and the new TDB generator->setCriDesc(given_desc, Generator::DOWN); generator->setCriDesc(returned_desc, Generator::UP); generator->setGenObj(&relExpr, tdb); // Return a TDB pointer to the caller newTdb = tdb; return 0; } // ft_codegen()
// AppliedStatMan::setupASMCacheForJBB method will be called from // Query::Analyze after connectivity analysis has been done and // empty logical properties have been set. void AppliedStatMan::setupASMCacheForJBB(JBB & jbb) { EstLogPropSharedPtr myEstLogProp; // get all JBBCs of JBB const CANodeIdSet jbbcNodeIdSet = jbb.getMainJBBSubset().getJBBCs(); CANodeId jbbcId; // for all jbbcs for (jbbcId = jbbcNodeIdSet.init(); jbbcNodeIdSet.next(jbbcId); jbbcNodeIdSet.advance(jbbcId)) { if (NodeAnalysis * jbbcNode = jbbcId.getNodeAnalysis()) { // Evaluate local predicates only if it is a table. RelExpr * jbbcExpr = jbbcNode->getOriginalExpr(); if ((jbbcNode->getTableAnalysis() != NULL) && (jbbcExpr->getOperatorType() == REL_SCAN)) { // get the original expression of the jbbc Scan * scanExpr = (Scan *) jbbcExpr; ValueIdSet localPreds = scanExpr->getSelectionPredicates(); // if local predicates have already been computed, then skip if ((localPreds.entries() > 0) || !(lookup(jbbcId))) { // check to see this GA has already been associated with // a logExpr for synthesis. If not, then synthesize // log. expression, and then apply local predicates to it if (NOT scanExpr->getGroupAttr()->existsLogExprForSynthesis()) scanExpr->synthLogProp(); myEstLogProp = getStatsForCANodeId(jbbcId); } } } } // Now do a second traversal of the JBB looking for join reducers for (jbbcId = jbbcNodeIdSet.init(); jbbcNodeIdSet.next(jbbcId); jbbcNodeIdSet.advance(jbbcId)) { // now look for all two way joins for this child if (jbbcId.getNodeAnalysis()) { // get all JBBCs connected to this JBBC, and do a two-way // join with all of them CANodeIdSet connectedNodes = jbbcId.getNodeAnalysis()->\ getJBBC()->getJoinedJBBCs(); for (CANodeId connectedTable = connectedNodes.init(); connectedNodes.next(connectedTable); connectedNodes.advance(connectedTable)) { if (connectedTable.getNodeAnalysis()) { // ASM does not concern itself with the order of the tables, // hence it is possible that the join has already been computed CANodeIdSet tableSet = jbbcId; tableSet.insert(connectedTable); if ((myEstLogProp = getCachedStatistics(&tableSet)) == NULL) { CANodeIdSet setForjbbcId(jbbcId); CANodeIdSet setForConnectedTable(connectedTable); myEstLogProp = joinJBBChildren(setForjbbcId, setForConnectedTable); } } } } } } // AppliedStatMan::setupASMCacheForJBB