void MultiJoin::synthLogPropWithMJReuse(NormWA * normWAPtr) { // Check to see whether this GA has already been associated // with a logExpr for synthesis. If so, no need to resynthesize // for this equivalent log. expression. if (getGroupAttr()->existsLogExprForSynthesis()) { Join * joinExprForSynth = (Join *) getGroupAttr()->getLogExprForSynthesis(); if(joinExprForSynth->isJoinFromMJSynthLogProp()) return; } NABoolean reUseMJ = TRUE; CMPASSERT ( (jbbSubset_.getGB() == NULL_CA_ID)); const CANodeIdSet & jbbcs = jbbSubset_.getJBBCs(); // Instead of always picking the first JBBC as the right child // pick the one with minimum JBBC connections. This will avoid // all unnecessary crossproducts CANodeId jbbcRight; jbbcRight = jbbcs.getJBBCwithMinConnectionsToThisJBBSubset(); CANodeIdSet right(jbbcRight); CANodeIdSet left(jbbcs); left -= jbbcRight; Join* join = splitSubset(*(left.jbbcsToJBBSubset()), *(right.jbbcsToJBBSubset()), reUseMJ); //if the left is a MultiJoin, synthesize it using reUse //this has to be done before join->synthLogProp to avoid //calling MultiJoin::synthLogProp on the left MultiJoin //because that does not reUse if(left.entries() > 1) { RelExpr * leftRelExpr = join->child(0)->castToRelExpr(); if(leftRelExpr && leftRelExpr->getOperator() == REL_MULTI_JOIN) ((MultiJoin *) leftRelExpr)->synthLogPropWithMJReuse(normWAPtr); } join->synthLogProp(normWAPtr); join->setJoinFromMJSynthLogProp(); getGroupAttr()->setLogExprForSynthesis(join); jbbSubset_.setSubsetMJ(this); CMPASSERT ( getGroupAttr()->getNumJoinedTables() >= getArity()); }
////////////////////////////////////////////////////////////////////////////// // This is where all the action begins. // The simple RelRoot-Refresh tree constructed by the parser, is transformed // to a complex RelExpr tree that implements the MV refresh algorithm. // The method: build a MvRefreshBuilder object, have it build the MV refresh // tree, and then bind the resulting tree. The bind may cause additional // levels of pipelined refresh to be constructed and bound as well. ////////////////////////////////////////////////////////////////////////////// RelExpr *Refresh::bindNode(BindWA *bindWA) { CollHeap *heap = bindWA->wHeap(); // for DEFAULT_SCHEMA_ACCESS_ONLY, there is no need to check here // because this fucntion is only called internally mvName_.applyDefaults(bindWA->getDefaultSchema()); if (deltaDefList_ != NULL) deltaDefList_->applyDefaultSchemaToAll(bindWA); if (pipelineClause_ != NULL) pipelineClause_->applyDefaultSchemaToAll(bindWA); CorrName mvCorrName(mvName_); MVInfoForDML *mvInfo = getMvInfo(bindWA, mvCorrName); if (mvInfo == NULL) return NULL; // User maintained MVs can only be recomputed (for initialization). if (mvInfo->getRefreshType() == COM_BY_USER && refreshType_ != RECOMPUTE ) return NULL; if (verifyDeltaDefinition(bindWA, mvInfo)) return NULL; // From now on, the binder needs to know we are binding a refresh statement. bindWA->setBindingMvRefresh(); bindWA->setPropagateOpAndSyskeyColumns(); RelExpr *refreshTree = buildCompleteRefreshTree(bindWA, mvInfo); if (bindWA->errStatus() || refreshTree==NULL) return this; // Bind the refresh tree. refreshTree = refreshTree->bindNode(bindWA); if (bindWA->errStatus()) return this; // If this is a multi-delta refresh, and we are not done yet - we need // to be called again with another phase. // Signal to the refresh utility using warning. if (getAdditionalPhaseNeeded()) { // 12304 An additional phase is needed to complete the refresh. *CmpCommon::diags() << DgSqlCode(12304); } // Delete all data members before saying goodbye. cleanupBeforeSelfDestruct(); // Return the bound refresh tree to the root. The Refresh node itself has // done its job, and is not part of the tree anymore. return refreshTree; }
NABoolean MultiJoin::duplicateMatch(const RelExpr & other) const { // We need to compare the arity before RelExpr::duplicateMatch(...) // otherwise we may get out of array boundary exception when compaing // this multijoin to a RelExpr with different arity. This is different // than other RelExprs because the check for operator type does not // secure the same arity in this case. if (getArity() != other.getArity()) return FALSE; ((RelExpr*)this)->selectionPred().clear(); if (!RelExpr::duplicateMatch(other)) return FALSE; MultiJoin &o = (MultiJoin &) other; if (jbbSubset_ != o.jbbSubset_) return FALSE; // At this point we will consider MJs with different LSRs to be different // This can change in the future, but would require updating the duplicate // MJ in the cascades memo to add the scheduledLSRs from the inserted one. if (scheduledLSRs_ != o.scheduledLSRs_) return FALSE; return TRUE; }
//---------------------------------------------------------------------------- // Check if the query for this MAV is without a GROUP BY clause. // The RelExpr tree for a query with no GROUP BY clause, does not have // a GroupBy node in it. void MavRelRootBuilder::checkForMavWithoutGroupBy(RelExpr *mvSelectTree) { // Start from the top of the RelExpe tree. RelExpr *node = mvSelectTree; // Look for a GroupBy node. while (node->child(0) != NULL && node->getOperatorType() != REL_GROUPBY) node = node->child(0); if (node->getOperatorType() != REL_GROUPBY) isMavWithoutGroupBy_ = TRUE; else { // Check if the child of our child is another GroupBy node. // This can only be the result of multi-delta optimization on a // no-groupby MAV. if ( (node->child(0)->getOperatorType() == REL_ROOT) && (node->child(0)->child(0)->getOperatorType() == REL_GROUPBY) ) { // Found another GroupBy node below. // Now verify that the top one has no grouping columns. GroupByAgg *groupByNode = (GroupByAgg *)node; if (groupByNode->getGroupExprTree() == NULL) isMavWithoutGroupBy_ = TRUE; } } } // MavRelRootBuilder::checkForMavWithoutGroupBy()
short Join::generateShape(CollHeap * c, char * buf, NAString * shapeStr) { Space * space = (Space *)c; char mybuf[100]; switch (getOperatorType()) { case REL_NESTED_JOIN: case REL_LEFT_NESTED_JOIN: case REL_NESTED_SEMIJOIN: case REL_NESTED_ANTI_SEMIJOIN: case REL_NESTED_JOIN_FLOW: sprintf(mybuf, "nested_join("); break; case REL_MERGE_JOIN: case REL_LEFT_MERGE_JOIN: case REL_MERGE_SEMIJOIN: case REL_MERGE_ANTI_SEMIJOIN: sprintf(mybuf, "merge_join("); break; case REL_HASH_SEMIJOIN: case REL_HASH_ANTI_SEMIJOIN: sprintf(mybuf, "hash_join("); break; case REL_LEFT_HYBRID_HASH_JOIN: case REL_HYBRID_HASH_SEMIJOIN: case REL_HYBRID_HASH_ANTI_SEMIJOIN: case REL_FULL_HYBRID_HASH_JOIN: sprintf(mybuf, "hybrid_hash_join("); break; case REL_LEFT_ORDERED_HASH_JOIN: case REL_ORDERED_HASH_JOIN: case REL_ORDERED_HASH_SEMIJOIN: case REL_ORDERED_HASH_ANTI_SEMIJOIN: sprintf(mybuf, "ordered_hash_join("); break; case REL_HYBRID_HASH_JOIN: if (((HashJoin *)this)->isOrderedCrossProduct()) sprintf(mybuf, "ordered_cross_product("); else sprintf(mybuf, "hybrid_hash_join("); break; default: sprintf(mybuf, "add_to_Join::generateShape("); } outputBuffer(space, buf, mybuf, shapeStr); child(0)->generateShape(space, buf, shapeStr); sprintf(mybuf, ","); outputBuffer(space, buf, mybuf, shapeStr); child(1)->generateShape(space, buf, shapeStr); // is it IndexJoin? if both children are scans and number of base tables is // 1, it is index join if (getGroupAttr()->getNumBaseTables() == 1) { NABoolean child0isScan = FALSE; NABoolean child1isScan = FALSE; RelExpr *lChild = child(0)->castToRelExpr(); while (lChild->getArity() == 1) { lChild=lChild->child(0)->castToRelExpr(); } switch( lChild->castToRelExpr()->getOperatorType()) { case REL_SCAN: case REL_FILE_SCAN: case REL_HBASE_ACCESS: case REL_HDFS_SCAN: child0isScan = TRUE; } RelExpr *rChild = child(1)->castToRelExpr(); while (rChild->getArity() == 1) { rChild = rChild->child(0)->castToRelExpr(); } switch (rChild->castToRelExpr()->getOperatorType()) { case REL_SCAN: case REL_FILE_SCAN: case REL_HBASE_ACCESS: case REL_HDFS_SCAN: child1isScan = TRUE; } if (child0isScan && child1isScan) { sprintf(mybuf, ",INDEXJOIN)"); } else { sprintf(mybuf, ")"); } } else { sprintf(mybuf, ")"); } outputBuffer(space, buf, mybuf, shapeStr); return 0; }
static short ft_codegen(Generator *generator, RelExpr &relExpr, ComTdbFastExtract *&newTdb, Cardinality estimatedRowCount, char * targetName, char * hdfsHost, Int32 hdfsPort, char * hiveTableName, char * delimiter, char * header, char * nullString, char * recordSeparator, ULng32 downQueueMaxSize, ULng32 upQueueMaxSize, ULng32 outputBufferSize, ULng32 requestBufferSize, ULng32 replyBufferSize, ULng32 numOutputBuffers, ComTdb * childTdb, NABoolean isSequenceFile) { CmpContext *cmpContext = generator->currentCmpContext(); Space *space = generator->getSpace(); ExpGenerator *exp_gen = generator->getExpGenerator(); MapTable *map_table = generator->getMapTable(); MapTable *last_map_table = generator->getLastMapTable(); ex_expr *input_expr = NULL; ex_expr *output_expr = NULL; ex_expr * childData_expr = NULL ; ex_expr * cnvChildData_expr = NULL ; ULng32 i; ULng32 requestRowLen = 0; ULng32 outputRowLen = 0; ULng32 childDataRowLen = 0; ULng32 cnvChildDataRowLen = 0; ExpTupleDesc *requestTupleDesc = NULL; ExpTupleDesc *outputTupleDesc = NULL; ExpTupleDesc *childDataTupleDesc = NULL; ExpTupleDesc *cnvChildDataTupleDesc = NULL; newTdb = NULL; OperatorTypeEnum relExprType = relExpr.getOperatorType(); GenAssert(relExprType == REL_FAST_EXTRACT, "Unexpected RelExpr at FastExtract codegen") FastExtract * fastExtract = (FastExtract *) &relExpr; const Int32 workAtpNumber = 1; ex_cri_desc *given_desc = generator->getCriDesc(Generator::DOWN); ex_cri_desc *returned_desc = NULL; ex_cri_desc *work_cri_desc = NULL; returned_desc = given_desc; // Setup local variables related to the work ATP unsigned short numWorkTupps = 0; unsigned short childDataTuppIndex = 0; unsigned short cnvChildDataTuppIndex = 0; numWorkTupps = 3; childDataTuppIndex = numWorkTupps - 1 ; numWorkTupps ++; cnvChildDataTuppIndex = numWorkTupps - 1; work_cri_desc = new (space) ex_cri_desc(numWorkTupps, space); ExpTupleDesc::TupleDataFormat childReqFormat = ExpTupleDesc::SQLMX_ALIGNED_FORMAT; ValueIdList childDataVids; ValueIdList cnvChildDataVids; const ValueIdList& childVals = fastExtract->getSelectList(); for (i = 0; i < childVals.entries(); i++) { ItemExpr &inputExpr = *(childVals[i].getItemExpr()); const NAType &formalType = childVals[i].getType(); ItemExpr *lmExpr = NULL; ItemExpr *lmExpr2 = NULL; int res; lmExpr = &inputExpr; //CreateCastExpr(inputExpr, *inputExpr.getValueId().getType().newCopy(), cmpContext); res = CreateAllCharsExpr(formalType, // [IN] Child output type *lmExpr, // [IN] Actual input value cmpContext, // [IN] Compilation context lmExpr2 // [OUT] Returned expression ); GenAssert(res == 0 && lmExpr != NULL, "Error building expression tree for LM child Input value"); lmExpr->bindNode(generator->getBindWA()); childDataVids.insert(lmExpr->getValueId()); if (lmExpr2) { lmExpr2->bindNode(generator->getBindWA()); cnvChildDataVids.insert(lmExpr2->getValueId()); } } // for (i = 0; i < childVals.entries(); i++) if (childDataVids.entries() > 0 && cnvChildDataVids.entries()>0) //-- convertedChildDataVids { exp_gen->generateContiguousMoveExpr ( childDataVids, //childDataVids// [IN] source ValueIds TRUE, // [IN] add convert nodes? workAtpNumber, // [IN] target atp number (0 or 1) childDataTuppIndex, // [IN] target tupp index childReqFormat, // [IN] target tuple data format childDataRowLen, // [OUT] target tuple length &childData_expr, // [OUT] move expression &childDataTupleDesc, // [optional OUT] target tuple desc ExpTupleDesc::LONG_FORMAT // [optional IN] target desc format ); exp_gen->processValIdList ( cnvChildDataVids, // [IN] ValueIdList ExpTupleDesc::SQLARK_EXPLODED_FORMAT, // [IN] tuple data format cnvChildDataRowLen, // [OUT] tuple length workAtpNumber, // [IN] atp number cnvChildDataTuppIndex, // [IN] index into atp &cnvChildDataTupleDesc, // [optional OUT] tuple desc ExpTupleDesc::LONG_FORMAT // [optional IN] tuple desc format ); } // // Add the tuple descriptor for request values to the work ATP // work_cri_desc->setTupleDescriptor(childDataTuppIndex, childDataTupleDesc); work_cri_desc->setTupleDescriptor(cnvChildDataTuppIndex, cnvChildDataTupleDesc); // We can now remove all appended map tables generator->removeAll(last_map_table); ComSInt32 maxrs = 0; UInt32 flags = 0; UInt16 numIoBuffers = (UInt16)(ActiveSchemaDB()->getDefaults()).getAsLong(FAST_EXTRACT_IO_BUFFERS); UInt16 ioTimeout = (UInt16)(ActiveSchemaDB()->getDefaults()).getAsLong(FAST_EXTRACT_IO_TIMEOUT_SEC); Int64 hdfsBufSize = (Int64)CmpCommon::getDefaultNumeric(HDFS_IO_BUFFERSIZE); hdfsBufSize = hdfsBufSize * 1024; // convert to bytes Int16 replication = (Int16)CmpCommon::getDefaultNumeric(HDFS_REPLICATION); // Create a TDB ComTdbFastExtract *tdb = new (space) ComTdbFastExtract ( flags, estimatedRowCount, targetName, hdfsHost, hdfsPort, hiveTableName, delimiter, header, nullString, recordSeparator, given_desc, returned_desc, work_cri_desc, downQueueMaxSize, upQueueMaxSize, (Lng32) numOutputBuffers, outputBufferSize, numIoBuffers, ioTimeout, input_expr, output_expr, requestRowLen, outputRowLen, childData_expr, childTdb, space, childDataTuppIndex, cnvChildDataTuppIndex, childDataRowLen, hdfsBufSize, replication ); tdb->setSequenceFile(isSequenceFile); tdb->setHdfsCompressed(CmpCommon::getDefaultNumeric(TRAF_UNLOAD_HDFS_COMPRESS)!=0); tdb->setSkipWritingToFiles(CmpCommon::getDefault(TRAF_UNLOAD_SKIP_WRITING_TO_FILES) == DF_ON); tdb->setBypassLibhdfs(CmpCommon::getDefault(TRAF_UNLOAD_BYPASS_LIBHDFS) == DF_ON); generator->initTdbFields(tdb); // Generate EXPLAIN info. if (!generator->explainDisabled()) { generator->setExplainTuple(relExpr.addExplainInfo(tdb, 0, 0, generator)); } // Tell the generator about our in/out rows and the new TDB generator->setCriDesc(given_desc, Generator::DOWN); generator->setCriDesc(returned_desc, Generator::UP); generator->setGenObj(&relExpr, tdb); // Return a TDB pointer to the caller newTdb = tdb; return 0; } // ft_codegen()
/** * Replace the query with its query descriptor text. * The MVQR_LOG_QUERY_DESCRIPTORS CQD was set to 'DUMP', so instead of running * this query, we are going to replace it with something that, when executed, * will produce the XML text of the query descriptor as its output. This * 'something' is a TupleList node that has the text lines of the descriptor * as its tuples. * * @param rootExpr The RelExpr tree of the existing query * @param xmlText The XML text of the query descriptor */ RelRoot* MvQueryRewriteHandler::handleAnalyzeOnlyQuery(RelRoot* rootExpr, NAString* xmlText) { static NAString empty("No descriptor generated."); if (xmlText==NULL) xmlText = ∅ CollHeap *heap = CmpCommon::statementHeap(); Int32 maxLen=xmlText->length(); Int32 pos=0; Int32 lastPos=-1; ItemExpr* tupleExpr = NULL; while (pos<maxLen-1) { // Find the next CR character pos = xmlText->index("\n", 1, pos+1, NAString::exact); if (pos==-1) // If this is the last line with no CR. pos=maxLen; // The next line if from the last CR to this one. NASubString line=(*xmlText)(lastPos+1, pos-lastPos-1); lastPos=pos; // Make a constant character string from it. ItemExpr* tuple = new (heap) Convert(new(heap) SystemLiteral(line)); // Collect the tuples in a list if (tupleExpr==NULL) tupleExpr = tuple; else tupleExpr = new(heap) ItemList(tuple, tupleExpr); } // Construct the TupleList node TupleList* tupleListNode = new(heap) TupleList(tupleExpr->reverseTree(), heap); RelRoot* newRoot = new(heap) RelRoot(tupleListNode); // A RenameTable above it to give a name to the column of text. ItemExpr *renExpr = new(heap) RenameCol(NULL, new(heap) ColRefName("Query Descriptor", heap)); NAString tableName("Descriptor table"); RelExpr* renameNode = new(heap) RenameTable(TRUE, newRoot, tableName, renExpr, heap); // And a final RelRoot on top with a SELECT *. ItemExpr *starExpr = new(heap) ColReference(new(heap) ColRefName(1)); RelRoot* topRoot = new(heap) RelRoot(renameNode, REL_ROOT, starExpr); topRoot->setRootFlag(TRUE); // Now bind the new query tree BindWA* bindWA = rootExpr->getRETDesc()->getBindWA(); RelExpr* boundRoot = topRoot->bindNode(bindWA); // Transform it. This will eliminate the Rename node and the extraneous root. NormWA normWA(CmpCommon::context()); ExprGroupId eg(boundRoot); boundRoot->transformNode(normWA, eg); // And Normalize it. RelRoot *normRoot = (RelRoot *)eg.getPtr(); normRoot->normalizeNode(normWA); return normRoot; }
short CmpStatement::getDDLExprAndNode(char * sqlStr, Lng32 inputCS, DDLExpr* &ddlExpr, ExprNode* &ddlNode) { ddlNode = NULL; ddlExpr = NULL; if (! sqlStr) return 0; // C control character embedded in sqlStr is not handled. Now replace // control characters tabs, line feeds, spaces with spaces. (no longer // substitute for \n so we can recognized embedded comments) for (Int32 i = 0; sqlStr[i]; i++) if (sqlStr[i] != '\n' && isSpace8859_1((unsigned char)sqlStr[i])) sqlStr[i] = ' '; // skip leading blanks NAString ns(sqlStr); ns = ns.strip(NAString::leading, ' '); ReturnStatus status = CmpStatement_SUCCESS; CmpMain::ReturnStatus rs = CmpMain::SUCCESS; QueryText qText(sqlStr, inputCS); // CmpMessageReplyCode // *bound = new(outHeap_) CmpMessageReplyCode(outHeap_, statement.id(), 0, 0, outHeap_); Set_SqlParser_Flags(DELAYED_RESET); // sqlcompCleanup resets for us Parser parser(CmpCommon::context()); BindWA bindWA(ActiveSchemaDB(), CmpCommon::context(), TRUE); ExprNode *boundDDL = NULL; RelExpr * rRoot = NULL; // save parser flags Int32 savedParserFlags = Get_SqlParser_Flags (0xFFFFFFFF); ExprNode * exprNode = NULL; if (parser.parseDML(qText, &exprNode, NULL)) { error(arkcmpErrorNoDiags, sqlStr); sqlTextStr_=NULL; goto label_error; } if (exprNode->getOperatorType() EQU STM_QUERY) { rRoot = (RelRoot*)exprNode->getChild(0); } else if (exprNode->getOperatorType() EQU REL_ROOT) { rRoot = (RelRoot*)exprNode; } CMPASSERT(rRoot); boundDDL = rRoot->bindNode(&bindWA); CMPASSERT(boundDDL); if (CmpCommon::diags()->getNumber(DgSqlCode::ERROR_)) { goto label_error; } ddlExpr = (DDLExpr*)rRoot->getChild(0); ddlNode = ddlExpr->getDDLNode(); if (ddlNode) { boundDDL = ddlNode->castToStmtDDLNode()->bindNode(&bindWA); CMPASSERT(boundDDL); if (CmpCommon::diags()->getNumber(DgSqlCode::ERROR_)) { goto label_error; } ddlNode = boundDDL; } Set_SqlParser_Flags (savedParserFlags); return 0; label_error: // reset saved flags Set_SqlParser_Flags (savedParserFlags); return CmpStatement_ERROR; }
CmpStatement::ReturnStatus CmpStatement::process (const CmpMessageDDL& statement) { CmpMain cmpmain; CMPASSERT(statement.getCmpCompileInfo()); char * sqlStr = NULL; Int32 sqlStrLen = 0; Lng32 inputCS = 0; NAString currCatName; NAString currSchName; char * recompControlInfo = NULL; NABoolean isSchNameRecvd; NABoolean nametypeNsk; NABoolean odbcProcess; NABoolean noTextCache; NABoolean aqrPrepare; NABoolean standaloneQuery; isDDL_ = TRUE; if (processRecvdCmpCompileInfo(this, statement, statement.getCmpCompileInfo(), context_, sqlStr, sqlStrLen, // out - long & inputCS, isSchNameRecvd, currCatName, currSchName, recompControlInfo, nametypeNsk, odbcProcess, noTextCache, aqrPrepare, standaloneQuery)) return CmpStatement_ERROR; CmpCommon::context()->sqlSession()->setParentQid( statement.getParentQid()); // process recompControlInfo, if received if (recompControlInfo) setupRecompControlInfo(recompControlInfo, &cmpmain); cmpmain.setSqlParserFlags(statement.getFlags()); // set the current catalog and schema names. InitSchemaDB(); // C control character embedded in sqlStr is not handled. Now replace // control characters tabs, line feeds, spaces with spaces. (no longer // substitute for \n so we can recognized embedded comments) for (Int32 i = 0; sqlStr[i]; i++) if (sqlStr[i] != '\n' && isSpace8859_1((unsigned char)sqlStr[i])) sqlStr[i] = ' '; // skip leading blanks NAString ns(sqlStr); ns = ns.strip(NAString::leading, ' '); // if this is an "update statistics..." request, // then do not send it catalog manager. Int32 foundUpdStat = 0; // check if the first token is UPDATE size_t position = ns.index("UPDATE", 0, NAString::ignoreCase); if (position == 0) { // found UPDATE. See if the next token is STATISTICS. ns = ns(6, ns.length()-6); // skip over UPDATE ns = ns.strip(NAString::leading, ' '); position = ns.index("STATISTICS", 0, NAString::ignoreCase); if (position == 0) foundUpdStat = -1; } if (foundUpdStat) { // TODO, should be removed later // A pointer to user SQL query is stored in CmpStatement; if an exception // is thrown the user query is copied from here. It is reset upon return // from the UpdateStats() method. char *userStr= new (heap()) char[2000]; #pragma nowarn(1506) // warning elimination Int32 len=strlen(sqlStr); #pragma warn(1506) // warning elimination if (len > 1999) len=1999; strncpy(userStr, sqlStr, len); userStr[len]='\0'; sqlTextStr_ = userStr; sqlTextLen_ = len; if (UpdateStats(sqlStr)) { sqlTextStr_ = NULL; sqlTextLen_ = 0; if (recompControlInfo) restoreRecompControlInfo(recompControlInfo); return CmpStatement_ERROR; } sqlTextStr_ = NULL; sqlTextLen_ = 0; if (recompControlInfo) restoreRecompControlInfo(recompControlInfo); return CmpStatement_SUCCESS; } ReturnStatus status = CmpStatement_SUCCESS; if (statement.getCmpCompileInfo()->isHbaseDDL()) { CmpMain::ReturnStatus rs = CmpMain::SUCCESS; QueryText qText(sqlStr, inputCS); CmpMessageReplyCode *bound = new(outHeap_) CmpMessageReplyCode(outHeap_, statement.id(), 0, 0, outHeap_); // CmpMain cmpmain; Set_SqlParser_Flags(DELAYED_RESET); // sqlcompCleanup resets for us Parser parser(CmpCommon::context()); BindWA bindWA(ActiveSchemaDB(), CmpCommon::context(), TRUE); // save parser flags Int32 savedParserFlags = Get_SqlParser_Flags (0xFFFFFFFF); ExprNode * exprNode = NULL; if (parser.parseDML(qText, &exprNode, NULL)) { error(arkcmpErrorNoDiags, statement.data()); sqlTextStr_=NULL; return CmpStatement_ERROR; } RelExpr * rRoot = NULL; if (exprNode->getOperatorType() EQU STM_QUERY) { rRoot = (RelRoot*)exprNode->getChild(0); } else if (exprNode->getOperatorType() EQU REL_ROOT) { rRoot = (RelRoot*)exprNode; } CMPASSERT(rRoot); ExprNode *boundDDL = rRoot->bindNode(&bindWA); CMPASSERT(boundDDL); if (CmpCommon::diags()->getNumber(DgSqlCode::ERROR_)) { return CmpStatement_ERROR; } ExprNode * ddlNode = NULL; DDLExpr * ddlExpr = NULL; ddlExpr = (DDLExpr*)rRoot->getChild(0); ddlNode = ddlExpr->getDDLNode(); if (ddlNode) { boundDDL = ddlNode->castToStmtDDLNode()->bindNode(&bindWA); CMPASSERT(boundDDL); if (CmpCommon::diags()->getNumber(DgSqlCode::ERROR_)) { return CmpStatement_ERROR; } ddlNode = boundDDL; } // reset saved flags Set_SqlParser_Flags (savedParserFlags); CmpSeabaseDDL cmpSBD(heap_); if (cmpSBD.executeSeabaseDDL(ddlExpr, ddlNode, currCatName, currSchName)) { Set_SqlParser_Flags(0); return CmpStatement_ERROR; } Set_SqlParser_Flags (0); // TEMPTEMP. // Until support for metadata invalidation is in, clear up query cache for // this process. That way statements issued later from this session will // not see stale definitions. // This also helps in running tests where tables are modified and accessed from // the same session. // This does not solve the issue of stale definition seen by other processes, // that will be fixed once we have metadata invalidation. CURRENTQCACHE->makeEmpty(); return CmpStatement_SUCCESS; } // hbaseDDL // This is a normal DDL request, call Catalog manager *diags() << DgSqlCode(-4222) << DgString0("SQL Compiler DDL"); return CmpStatement_ERROR; }
static short ft_codegen(Generator *generator, RelExpr &relExpr, ComTdbFastExtract *&newTdb, Cardinality estimatedRowCount, char * targetName, char * hdfsHost, Int32 hdfsPort, char * hiveTableName, char * delimiter, char * header, char * nullString, char * recordSeparator, ULng32 downQueueMaxSize, ULng32 upQueueMaxSize, ULng32 outputBufferSize, ULng32 requestBufferSize, ULng32 replyBufferSize, ULng32 numOutputBuffers, ComTdb * childTdb, NABoolean isSequenceFile) { CmpContext *cmpContext = generator->currentCmpContext(); Space *space = generator->getSpace(); ExpGenerator *exp_gen = generator->getExpGenerator(); MapTable *map_table = generator->getMapTable(); MapTable *last_map_table = generator->getLastMapTable(); ex_expr *input_expr = NULL; ex_expr *output_expr = NULL; ex_expr * childData_expr = NULL ; ex_expr * cnvChildData_expr = NULL ; ULng32 i; ULng32 requestRowLen = 0; ULng32 outputRowLen = 0; ULng32 childDataRowLen = 0; ULng32 cnvChildDataRowLen = 0; ExpTupleDesc *requestTupleDesc = NULL; ExpTupleDesc *outputTupleDesc = NULL; ExpTupleDesc *childDataTupleDesc = NULL; ExpTupleDesc *cnvChildDataTupleDesc = NULL; newTdb = NULL; OperatorTypeEnum relExprType = relExpr.getOperatorType(); GenAssert(relExprType == REL_FAST_EXTRACT, "Unexpected RelExpr at FastExtract codegen") FastExtract * fastExtract = (FastExtract *) &relExpr; const Int32 workAtpNumber = 1; ex_cri_desc *given_desc = generator->getCriDesc(Generator::DOWN); ex_cri_desc *returned_desc = NULL; ex_cri_desc *work_cri_desc = NULL; returned_desc = given_desc; // Setup local variables related to the work ATP unsigned short numWorkTupps = 0; unsigned short childDataTuppIndex = 0; unsigned short cnvChildDataTuppIndex = 0; numWorkTupps = 3; childDataTuppIndex = numWorkTupps - 1 ; numWorkTupps ++; cnvChildDataTuppIndex = numWorkTupps - 1; work_cri_desc = new (space) ex_cri_desc(numWorkTupps, space); ExpTupleDesc::TupleDataFormat childReqFormat = ExpTupleDesc::SQLMX_ALIGNED_FORMAT; ValueIdList childDataVids; ValueIdList cnvChildDataVids; const ValueIdList& childVals = fastExtract->getSelectList(); const NATable *hiveNATable = NULL; const NAColumnArray *hiveNAColArray = NULL; // hiveInsertErrMode: // if 0, do not do error checks. // if 1, do error check and return error. // if 2, do error check and ignore row, if error // if 3, insert null if an error occurs Lng32 hiveInsertErrMode = 0; if ((fastExtract) && (fastExtract->isHiveInsert()) && (fastExtract->getHiveTableDesc()) && (fastExtract->getHiveTableDesc()->getNATable()) && ((hiveInsertErrMode = CmpCommon::getDefaultNumeric(HIVE_INSERT_ERROR_MODE)) > 0)) { hiveNATable = fastExtract->getHiveTableDesc()->getNATable(); hiveNAColArray = &hiveNATable->getNAColumnArray(); } for (i = 0; i < childVals.entries(); i++) { ItemExpr &inputExpr = *(childVals[i].getItemExpr()); const NAType &formalType = childVals[i].getType(); ItemExpr *lmExpr = NULL; ItemExpr *lmExpr2 = NULL; int res; lmExpr = &inputExpr; lmExpr = lmExpr->bindNode(generator->getBindWA()); if (!lmExpr || generator->getBindWA()->errStatus()) { GenAssert(0, "lmExpr->bindNode failed"); } // Hive insert converts child data into string format and inserts // it into target table. // If child type can into an error during conversion, then // add a Cast to convert from child type to target type before // converting to string format to be inserted. if (hiveNAColArray) { const NAColumn *hiveNACol = (*hiveNAColArray)[i]; const NAType *hiveNAType = hiveNACol->getType(); // if tgt type was a hive 'string', do not return a conversion err if ((lmExpr->getValueId().getType().errorsCanOccur(*hiveNAType)) && (NOT ((DFS2REC::isSQLVarChar(hiveNAType->getFSDatatype())) && (((SQLVarChar*)hiveNAType)->wasHiveString())))) { ItemExpr *newExpr = new(generator->wHeap()) Cast(lmExpr, hiveNAType); newExpr = newExpr->bindNode(generator->getBindWA()); if (!newExpr || generator->getBindWA()->errStatus()) { GenAssert(0, "newExpr->bindNode failed"); } if (hiveInsertErrMode == 3) ((Cast*)newExpr)->setConvertNullWhenError(TRUE); lmExpr = newExpr; } } res = CreateAllCharsExpr(formalType, // [IN] Child output type *lmExpr, // [IN] Actual input value cmpContext, // [IN] Compilation context lmExpr2 // [OUT] Returned expression ); GenAssert(res == 0 && lmExpr != NULL, "Error building expression tree for LM child Input value"); childDataVids.insert(lmExpr->getValueId()); if (lmExpr2) { lmExpr2->bindNode(generator->getBindWA()); cnvChildDataVids.insert(lmExpr2->getValueId()); } } // for (i = 0; i < childVals.entries(); i++) if (childDataVids.entries() > 0 && cnvChildDataVids.entries()>0) //-- convertedChildDataVids { UInt16 pcm = exp_gen->getPCodeMode(); if ((hiveNAColArray) && (hiveInsertErrMode == 3)) { // if error mode is 3 (mode null when error), disable pcode. // this feature is currently not being handled by pcode. // (added as part of JIRA 1920 in FileScan::codeGenForHive). exp_gen->setPCodeMode(ex_expr::PCODE_NONE); } exp_gen->generateContiguousMoveExpr ( childDataVids, //childDataVids// [IN] source ValueIds TRUE, // [IN] add convert nodes? workAtpNumber, // [IN] target atp number (0 or 1) childDataTuppIndex, // [IN] target tupp index childReqFormat, // [IN] target tuple data format childDataRowLen, // [OUT] target tuple length &childData_expr, // [OUT] move expression &childDataTupleDesc, // [optional OUT] target tuple desc ExpTupleDesc::LONG_FORMAT // [optional IN] target desc format ); exp_gen->setPCodeMode(pcm); exp_gen->processValIdList ( cnvChildDataVids, // [IN] ValueIdList ExpTupleDesc::SQLARK_EXPLODED_FORMAT, // [IN] tuple data format cnvChildDataRowLen, // [OUT] tuple length workAtpNumber, // [IN] atp number cnvChildDataTuppIndex, // [IN] index into atp &cnvChildDataTupleDesc, // [optional OUT] tuple desc ExpTupleDesc::LONG_FORMAT // [optional IN] tuple desc format ); } // // Add the tuple descriptor for request values to the work ATP // work_cri_desc->setTupleDescriptor(childDataTuppIndex, childDataTupleDesc); work_cri_desc->setTupleDescriptor(cnvChildDataTuppIndex, cnvChildDataTupleDesc); // We can now remove all appended map tables generator->removeAll(last_map_table); ComSInt32 maxrs = 0; UInt32 flags = 0; UInt16 numIoBuffers = (UInt16)(ActiveSchemaDB()->getDefaults()).getAsLong(FAST_EXTRACT_IO_BUFFERS); UInt16 ioTimeout = (UInt16)(ActiveSchemaDB()->getDefaults()).getAsLong(FAST_EXTRACT_IO_TIMEOUT_SEC); Int64 hdfsBufSize = (Int64)CmpCommon::getDefaultNumeric(HDFS_IO_BUFFERSIZE); hdfsBufSize = hdfsBufSize * 1024; // convert to bytes Int16 replication = (Int16)CmpCommon::getDefaultNumeric(HDFS_REPLICATION); // Create a TDB ComTdbFastExtract *tdb = new (space) ComTdbFastExtract ( flags, estimatedRowCount, targetName, hdfsHost, hdfsPort, hiveTableName, delimiter, header, nullString, recordSeparator, given_desc, returned_desc, work_cri_desc, downQueueMaxSize, upQueueMaxSize, (Lng32) numOutputBuffers, outputBufferSize, numIoBuffers, ioTimeout, input_expr, output_expr, requestRowLen, outputRowLen, childData_expr, childTdb, space, childDataTuppIndex, cnvChildDataTuppIndex, childDataRowLen, hdfsBufSize, replication ); tdb->setSequenceFile(isSequenceFile); tdb->setHdfsCompressed(CmpCommon::getDefaultNumeric(TRAF_UNLOAD_HDFS_COMPRESS)!=0); tdb->setSkipWritingToFiles(CmpCommon::getDefault(TRAF_UNLOAD_SKIP_WRITING_TO_FILES) == DF_ON); tdb->setBypassLibhdfs(CmpCommon::getDefault(TRAF_UNLOAD_BYPASS_LIBHDFS) == DF_ON); if ((hiveNAColArray) && (hiveInsertErrMode == 2)) { tdb->setContinueOnError(TRUE); } generator->initTdbFields(tdb); // Generate EXPLAIN info. if (!generator->explainDisabled()) { generator->setExplainTuple(relExpr.addExplainInfo(tdb, 0, 0, generator)); } // Tell the generator about our in/out rows and the new TDB generator->setCriDesc(given_desc, Generator::DOWN); generator->setCriDesc(returned_desc, Generator::UP); generator->setGenObj(&relExpr, tdb); // Return a TDB pointer to the caller newTdb = tdb; return 0; } // ft_codegen()
// AppliedStatMan::setupASMCacheForJBB method will be called from // Query::Analyze after connectivity analysis has been done and // empty logical properties have been set. void AppliedStatMan::setupASMCacheForJBB(JBB & jbb) { EstLogPropSharedPtr myEstLogProp; // get all JBBCs of JBB const CANodeIdSet jbbcNodeIdSet = jbb.getMainJBBSubset().getJBBCs(); CANodeId jbbcId; // for all jbbcs for (jbbcId = jbbcNodeIdSet.init(); jbbcNodeIdSet.next(jbbcId); jbbcNodeIdSet.advance(jbbcId)) { if (NodeAnalysis * jbbcNode = jbbcId.getNodeAnalysis()) { // Evaluate local predicates only if it is a table. RelExpr * jbbcExpr = jbbcNode->getOriginalExpr(); if ((jbbcNode->getTableAnalysis() != NULL) && (jbbcExpr->getOperatorType() == REL_SCAN)) { // get the original expression of the jbbc Scan * scanExpr = (Scan *) jbbcExpr; ValueIdSet localPreds = scanExpr->getSelectionPredicates(); // if local predicates have already been computed, then skip if ((localPreds.entries() > 0) || !(lookup(jbbcId))) { // check to see this GA has already been associated with // a logExpr for synthesis. If not, then synthesize // log. expression, and then apply local predicates to it if (NOT scanExpr->getGroupAttr()->existsLogExprForSynthesis()) scanExpr->synthLogProp(); myEstLogProp = getStatsForCANodeId(jbbcId); } } } } // Now do a second traversal of the JBB looking for join reducers for (jbbcId = jbbcNodeIdSet.init(); jbbcNodeIdSet.next(jbbcId); jbbcNodeIdSet.advance(jbbcId)) { // now look for all two way joins for this child if (jbbcId.getNodeAnalysis()) { // get all JBBCs connected to this JBBC, and do a two-way // join with all of them CANodeIdSet connectedNodes = jbbcId.getNodeAnalysis()->\ getJBBC()->getJoinedJBBCs(); for (CANodeId connectedTable = connectedNodes.init(); connectedNodes.next(connectedTable); connectedNodes.advance(connectedTable)) { if (connectedTable.getNodeAnalysis()) { // ASM does not concern itself with the order of the tables, // hence it is possible that the join has already been computed CANodeIdSet tableSet = jbbcId; tableSet.insert(connectedTable); if ((myEstLogProp = getCachedStatistics(&tableSet)) == NULL) { CANodeIdSet setForjbbcId(jbbcId); CANodeIdSet setForConnectedTable(connectedTable); myEstLogProp = joinJBBChildren(setForjbbcId, setForConnectedTable); } } } } } } // AppliedStatMan::setupASMCacheForJBB
// This method forms the join expression for join on JBBC specified by jbbcId // inputEstLogProp should not be cacheable Join * AppliedStatMan::formJoinExprForJoinOnJBBC( CANodeIdSet jbbSubset, CANodeId jbbcId, const ValueIdSet * jbbcLocalPreds, const ValueIdSet * joinPreds, const EstLogPropSharedPtr& inputEstLogProp, const NABoolean cacheable) { NABoolean origInputIsCacheable = inputEstLogProp->isCacheable(); if(origInputIsCacheable) { inputEstLogProp->setCacheableFlag(FALSE); CCMPASSERT("Expecting Non Cacheable Input"); } RelExpr * jbbcExpr = getExprForCANodeId(jbbcId, inputEstLogProp, jbbcLocalPreds); jbbcExpr->getGroupAttr()->outputLogProp(inputEstLogProp); RelExpr * jbbSubsetExpr = jbbSubset.jbbcsToJBBSubset()->getPreferredJoin(); if(!jbbSubsetExpr) if(jbbSubset.entries()==1) if(!inputEstLogProp->isCacheable()) { inputEstLogProp->setCacheableFlag(TRUE); jbbSubsetExpr = getExprForCANodeId(jbbSubset.getFirst(), inputEstLogProp); inputEstLogProp->setCacheableFlag(FALSE); } else jbbSubsetExpr = getExprForCANodeId(jbbSubset.getFirst(), inputEstLogProp); else { CCMPASSERT("No Subset expression, need at least one entry in set"); } RelExpr * leftChildExpr = jbbSubsetExpr; RelExpr * rightChildExpr = jbbcExpr; GroupAttributes * galeft = jbbSubsetExpr->getGroupAttr(); GroupAttributes * garight = jbbcExpr->getGroupAttr(); // xxx JBBC * jbbc = jbbcId.getNodeAnalysis()->getJBBC(); Join * jbbcParentJoin = jbbc->getOriginalParentJoin(); ValueIdSet leftOuterJoinFilterPreds; Join * joinExpr = NULL; if(jbbcParentJoin) { if(jbbcParentJoin->isSemiJoin()) joinExpr = new STMTHEAP Join(leftChildExpr, rightChildExpr, REL_SEMIJOIN, NULL); if(jbbcParentJoin->isAntiSemiJoin()) joinExpr = new STMTHEAP Join(leftChildExpr, rightChildExpr, REL_ANTI_SEMIJOIN, NULL); if(jbbcParentJoin->isLeftJoin()) { joinExpr = new STMTHEAP Join(leftChildExpr, rightChildExpr, REL_LEFT_JOIN, NULL); leftOuterJoinFilterPreds += jbbc->getLeftJoinFilterPreds(); } if(joinExpr) { joinExpr->setJoinPred(jbbc->getPredsWithPredecessors()); joinExpr->nullInstantiatedOutput().insert(jbbc->nullInstantiatedOutput()); } } if(!joinExpr) { // now form a JoinExpr with these left and right children. joinExpr = new STMTHEAP Join(leftChildExpr, rightChildExpr, REL_JOIN, NULL); } ValueIdSet selPredsAndLOJFilter = leftOuterJoinFilterPreds; selPredsAndLOJFilter += (*joinPreds); joinExpr->setSelectionPredicates(selPredsAndLOJFilter); // set groupAttr of this Join expression GroupAttributes * gaJoin = new STMTHEAP GroupAttributes(); // set required outputs of Join as sum of characteristic // outputs of the left and the right children ValueIdSet requiredOutputs; requiredOutputs.addSet(getPotentialOutputs(jbbSubset)); requiredOutputs.addSet(getPotentialOutputs(jbbcId)); gaJoin->setCharacteristicOutputs(requiredOutputs); // set JBBSubset for this group, if all estLogProps are cacheable. // Else JBBSubset is NULL CANodeIdSet combinedSet = jbbSubset; combinedSet += jbbcId; if (cacheable) gaJoin->getGroupAnalysis()->setLocalJBBView(combinedSet.jbbcsToJBBSubset()); gaJoin->setMinChildEstRowCount(MINOF(garight->getMinChildEstRowCount(), galeft->getMinChildEstRowCount() ) ); // if there are some probes coming into the join // then join type = tsj. if ((inputEstLogProp->getResultCardinality() > 1) || (inputEstLogProp->getColStats().entries() > 1)) { if (cacheable) { CANodeIdSet inputNodeSet = *(inputEstLogProp->getNodeSet()); gaJoin->setCharacteristicInputs(getPotentialOutputs(inputNodeSet)); } } joinExpr->setGroupAttr(gaJoin); gaJoin->setLogExprForSynthesis(joinExpr); joinExpr->synthLogProp(); inputEstLogProp->setCacheableFlag(origInputIsCacheable); return joinExpr; } // AppliedStatMan::formJoinExprForJoinOnJBBC
EstLogPropSharedPtr AppliedStatMan::getStatsForCANodeId( CANodeId jbbc, const EstLogPropSharedPtr &inLP, const ValueIdSet * predIdSet) { EstLogPropSharedPtr inputLP = inLP; if(inputLP == (*GLOBAL_EMPTY_INPUT_LOGPROP)) inputLP = jbbc.getJBBInput(); EstLogPropSharedPtr outputEstLogProp = NULL; // 1. Try to find Logical Properties from cache if cacheable. // The estimate Logical Properties can be cacheable if all local // predicates are to be applied and if inNodeSet is provided, // or the inLP are cacheable if ((inputLP->isCacheable()) && (predIdSet == NULL) ) { CANodeIdSet combinedSet = jbbc; // get the nodeIdSet of the outer child, if not already given. This // along with the present jbbc is used as a key in the cache CANodeIdSet * inputNodeSet; inputNodeSet = inputLP->getNodeSet(); // if inLP are cacheable these should have a nodeSet attached CCMPASSERT(inputNodeSet != NULL); if (inputNodeSet) { combinedSet.insert(*inputNodeSet); // if estLogProp for all local predicates is required, // then it might already exist in the cache outputEstLogProp = getCachedStatistics(&combinedSet); } } if (outputEstLogProp == NULL) { // 2. properties do not exist in the cache, so synthesize them. //if specified by the user apply those predicates, // else apply predicates in the original expr NodeAnalysis * jbbcNode = jbbc.getNodeAnalysis(); TableAnalysis * tableAnalysis = jbbcNode->getTableAnalysis(); if (tableAnalysis && predIdSet) { TableDesc * tableDesc = tableAnalysis->getTableDesc(); const QualifiedName& qualName = tableDesc->getNATable()->getTableName(); CorrName name(qualName, STMTHEAP); Scan *scanExpr = new STMTHEAP Scan(name, tableDesc, REL_SCAN, STMTHEAP); Cardinality rc = tableDesc->getNATable()->getEstRowCount(); const CardinalityHint* cardHint = tableDesc->getCardinalityHint(); if ( cardHint ) rc = (cardHint->getScanCardinality()).getValue(); if ( !cardHint && tableDesc->getNATable()->isHbaseTable() ) { NATable* nt = (NATable*)(tableDesc->getNATable()); StatsList* statsList = nt->getColStats(); if ( statsList && statsList->entries() > 0 ) { ColStatsSharedPtr cStatsPtr = statsList->getSingleColumnColStats(0); if ( cStatsPtr ) rc = (cStatsPtr->getRowcount()).getValue(); } } scanExpr->setBaseCardinality(MIN_ONE (rc)); GroupAttributes * gaExpr = new STMTHEAP GroupAttributes(); scanExpr->setSelectionPredicates(*predIdSet); ValueIdSet requiredOutputs = jbbc.getNodeAnalysis()->\ getOriginalExpr()->getGroupAttr()->getCharacteristicOutputs(); gaExpr->setCharacteristicOutputs(requiredOutputs); scanExpr->setGroupAttr(gaExpr); gaExpr->setLogExprForSynthesis(scanExpr); EstLogPropSharedPtr nonCacheableInLP(new (HISTHEAP) EstLogProp (*inputLP)); nonCacheableInLP->setCacheableFlag(FALSE); scanExpr->synthLogProp(); outputEstLogProp = scanExpr->getGroupAttr()->outputLogProp(nonCacheableInLP); } else { NodeAnalysis * nodeAnalysis = jbbc.getNodeAnalysis(); RelExpr * relExpr = nodeAnalysis->getModifiedExpr(); if (relExpr == NULL) relExpr = nodeAnalysis->getOriginalExpr(); // synthesize and cache estLogProp for the given inLP. outputEstLogProp = relExpr->getGroupAttr()->outputLogProp(inputLP); } } return outputEstLogProp; } // getStatsForCANodeId