// append an ascii-version of ItemExpr into cachewa.qryText_ void BaseColumn::generateCacheKey(CacheWA& cwa) const { if (getTableDesc()->getCorrNameObj().getPrototype() == NULL) { // return the table in the format "table.col" ColRefName name (getTableDesc()->getNATable()->getNAColumnArray()[colNumber_]-> getColName(), getTableDesc()->getCorrNameObj(), CmpCommon::statementHeap()); cwa += name.getColRefAsAnsiString(); } else { ColRefName name (getTableDesc()->getNATable()->getNAColumnArray()[colNumber_]-> getColName(), CmpCommon::statementHeap()); cwa += name.getColRefAsAnsiString(); } }
// append an ascii-version of Insert into cachewa.qryText_ void Insert::generateCacheKey(CacheWA &cwa) const { GenericUpdate::generateCacheKey(cwa); if (insertColTree_) { cwa += " insCol:"; insertColTree_->generateCacheKey(cwa); } // order by clause is important ItemExpr *orderBy = orderByTree_ ? orderByTree_ : reqdOrder_.rebuildExprTree(); if (orderBy) { cwa += " order:"; orderBy->generateCacheKey(cwa); } const NATable *tbl; if (cwa.getPhase() >= CmpMain::BIND && getTableDesc() && (tbl=getTableDesc()->getNATable()) != NULL) { // If PARTITION clause has been used we must reflect that in the key. if (tbl->isPartitionNameSpecified()) { cwa += " partition:"; cwa += tbl->getClusteringIndex()->getFileSetName().getQualifiedNameAsString().data(); } // If PARTITION range has been used we must reflect that in the key. else if (tbl->isPartitionRangeSpecified()) { cwa += " partition:"; char str[100]; sprintf(str, " from %d to %d", tbl->getExtendedQualName().getPartnClause().getBeginPartitionNumber() , tbl->getExtendedQualName().getPartnClause().getEndPartitionNumber()); cwa += str; } } if (isUpsert()) { cwa += " upsert:"; } }
CostScalar Scan::computeBaseSelectivity() const { CostScalar scanCardWithoutHint = getGroupAttr()->outputLogProp((*GLOBAL_EMPTY_INPUT_LOGPROP))->\ getColStats().getScanRowCountWithoutHint(); double cardAfterLocalPreds = scanCardWithoutHint.getValue(); double baseRowCount = getTableDesc()->tableColStats()[0]->getColStats()->getRowcount().getValue() ; // both the minimum and the base row count have to be minimum 1. // This is ensured in the called routines. So no need to check here. return cardAfterLocalPreds/baseRowCount; }
short RelInternalSP::codeGen(Generator * generator) { Space * space = generator->getSpace(); ExpGenerator * exp_gen = generator->getExpGenerator(); MapTable * last_map_table = generator->getLastMapTable(); ex_expr * input_expr = NULL; ex_expr * output_expr = NULL; //////////////////////////////////////////////////////////////////////////// // // Returned atp layout: // // |--------------------------------| // | input data | stored proc row | // | ( I tupps ) | ( 1 tupp ) | // |--------------------------------| // <-- returned row to parent ----> // // input data: the atp input to this node by its parent. // stored proc row: tupp where the row read from SP is moved. // //////////////////////////////////////////////////////////////////////////// ex_cri_desc * given_desc = generator->getCriDesc(Generator::DOWN); ex_cri_desc * returned_desc = new(space) ex_cri_desc(given_desc->noTuples() + 1, space); // cri descriptor for work atp has 3 entries: // -- the first two entries for consts and temps. // -- Entry 3(index #2) is where the input and output rows will be created. ex_cri_desc * work_cri_desc = new(space) ex_cri_desc(3, space); const Int32 work_atp = 1; const Int32 work_atp_index = 2; ExpTupleDesc * input_tuple_desc = NULL; ExpTupleDesc * output_tuple_desc = NULL; // Generate expression to create the input row that will be // given to the stored proc. // The input value is in sp->getProcAllParams() // and has to be converted to sp->procType(). // Generate Cast node to convert procParam to ProcType. // If procType is a varchar, explode it. This is done // so that values could be extracted correctly. ValueIdList procVIDList; for (CollIndex i = 0; i < procTypes().entries(); i++) { Cast * cn; if ((procTypes())[i].getType().getVarLenHdrSize() > 0) { // 5/9/98: add support for VARNCHAR const CharType& char_type = (CharType&)((procTypes())[i].getType()); // Explode varchars by moving them to a fixed field // whose length is equal to the max length of varchar. cn = new(generator->wHeap()) Cast ((getProcAllParamsVids())[i].getItemExpr(), (new(generator->wHeap()) SQLChar(generator->wHeap(), CharLenInfo(char_type.getStrCharLimit(), char_type.getDataStorageSize()), char_type.supportsSQLnull(), FALSE, FALSE, FALSE, char_type.getCharSet(), char_type.getCollation(), char_type.getCoercibility() /* (procTypes())[i].getType().getNominalSize(), (procTypes())[i].getType().supportsSQLnull() */ ) ) ); // Move the exploded field to a varchar field since // procType is varchar. // Can optimize by adding an option to convert node to // blankpad. TBD. // cn = new(generator->wHeap()) Cast(cn, &((procTypes())[i].getType())); } else cn = new(generator->wHeap()) Cast((getProcAllParamsVids())[i].getItemExpr(), &((procTypes())[i].getType())); cn->bindNode(generator->getBindWA()); procVIDList.insert(cn->getValueId()); } ULng32 inputRowlen_ = 0; exp_gen->generateContiguousMoveExpr(procVIDList, -1, /*add conv nodes*/ work_atp, work_atp_index, ExpTupleDesc::SQLARK_EXPLODED_FORMAT, inputRowlen_, &input_expr, &input_tuple_desc, ExpTupleDesc::LONG_FORMAT); // add all columns from this SP to the map table. ULng32 tupleLength; exp_gen->processValIdList(getTableDesc()->getColumnList(), ExpTupleDesc::SQLARK_EXPLODED_FORMAT, tupleLength, work_atp, work_atp_index); // Generate expression to move the output row returned by the // stored proc back to parent. ULng32 outputRowlen_ = 0; MapTable * returnedMapTable = 0; exp_gen->generateContiguousMoveExpr(getTableDesc()->getColumnList(), -1 /*add conv nodes*/, 0, returned_desc->noTuples() - 1, ExpTupleDesc::SQLARK_EXPLODED_FORMAT, outputRowlen_, &output_expr, &output_tuple_desc, ExpTupleDesc::LONG_FORMAT, &returnedMapTable); // Now generate expressions used to extract or move input or // output values. See class ExSPInputOutput. ExSPInputOutput * extractInputExpr = NULL; ExSPInputOutput * moveOutputExpr = NULL; generateSPIOExpr(this, generator, extractInputExpr, moveOutputExpr); // done with expressions at this operator. Remove the appended map tables. generator->removeAll(last_map_table); // append the map table containing the returned columns generator->appendAtEnd(returnedMapTable); NAString procNameAsNAString(procName_); char * sp_name = space->allocateAndCopyToAlignedSpace(procNameAsNAString, procNameAsNAString.length(), 0); ExpGenerator *expGen = generator->getExpGenerator(); // expression to conditionally return 0 or more rows. ex_expr *predExpr = NULL; // generate tuple selection expression, if present if(NOT selectionPred().isEmpty()) { ItemExpr* pred = selectionPred().rebuildExprTree(ITM_AND,TRUE,TRUE); expGen->generateExpr(pred->getValueId(),ex_expr::exp_SCAN_PRED,&predExpr); } ComTdbStoredProc * sp_tdb = new(space) ComTdbStoredProc(sp_name, input_expr, inputRowlen_, output_expr, outputRowlen_, work_cri_desc, work_atp_index, given_desc, returned_desc, extractInputExpr, moveOutputExpr, 2, 1024, (Cardinality) getGroupAttr()-> getOutputLogPropList()[0]-> getResultCardinality().value(), 5, 64000, //10240 predExpr, (UInt16) arkcmpInfo_); generator->initTdbFields(sp_tdb); if(!generator->explainDisabled()) { generator->setExplainTuple( addExplainInfo(sp_tdb, 0, 0, generator)); } // Do not infer that any transaction started can // be in READ ONLY mode if ISPs are present. generator->setNeedsReadWriteTransaction(TRUE); generator->setCriDesc(given_desc, Generator::DOWN); generator->setCriDesc(returned_desc, Generator::UP); generator->setGenObj(this, sp_tdb); // Some built-in functions require a TMF transaction // because they get their information from catman generator->setTransactionFlag(getRequiresTMFTransaction()); return 0; }
// append an ascii-version of GenericUpdate into cachewa.qryText_ void GenericUpdate::generateCacheKey(CacheWA& cwa) const // NB: This comment applies to all generateCacheKey methods. // generateCacheKey is used to generate a string representation s of the // "parameterized" query. Since this string s is used by QCache::lookUp // to determine if a query is in the cache, it is essential that: // (1) two different queries have different string representations // (2) two queries that differ only in their query literals should // have the same string representations // One possible implementation of generateCacheKey is to use the query's // original query text. But, original query text does not satisfy (2). // To get (2), we call generateCacheKey() from RelRoot::normalizeForCache // which, by definition, replaced query literals with constant parameters. // However, generateCacheKey must also satisfy (1). generateCacheKey must // generate two different strings for two logically different queries. // // To satisfy requirements (1) and (2), generateCacheKey and // normalizeForCache must be in sync -- every user-specified expr that // generateCacheKey emits into cwa.qryText_ must be examined by // normalizeForCache for possible replacement of any literal there into // a constant parameter. // // In order for the literal-into-constantparameter replacement to be safe, // isCacheableExpr must visit all user-specified exprs to make sure that // only constants that can be safely cast into the query's target types // are considered cacheable. For example, given this update query // update t set a = 'xyz' where pk = 1; // isCacheableeExpr, normalizeForCache, and generateCacheKey must cooperate // so that: // 1) isCacheableExpr rejects the query as noncacheble if 'xyz' cannot be // safely cast into a's target type, eg, 'xyz' may be too long if a's // type is char(1). // 2) normalizeForCache must visit and replace both 'xyz' and 1 with // appropriate constant parameters. // 3) generateCacheKey must emit some string representation of the // parameterized query, eg, "update t set a = % where pk = %". // generateCacheKey can emit more stuff, eg, internally specified // begin/end-key predicates, but it must emit a string representation // of all user-specified parts of the query. { // append to cwa.qryText_ GenericUpdate's "essential" data members RelExpr::generateCacheKey(cwa); // An extension of the fix to 10-010618-3505, 10-010619-3515: // for "after bind" Insert/Update/Delete queries, include table's // RedefTime into cwa.qryText_ to make sure we get a cache hit only on // query that reference table(s) that have not changed since the query's // addition to the cache. The queries that reference altered table(s) // will never be hit again and will eventually age out of the cache. // This is not strictly necessary, but it speeds up the processing // of insert/update/delete queries on altered tables. const NATable *tbl; if (cwa.getPhase() >= CmpMain::BIND && getTableDesc() && (tbl=getTableDesc()->getNATable()) != NULL) { char redefTime[40]; convertInt64ToAscii(tbl->getRedefTime(), redefTime); cwa += " redef:"; cwa += redefTime; } ItemExpr *newExpr = newRecExprTree_ ? newRecExprTree_ : newRecExpr_.rebuildExprTree(ITM_ITEM_LIST); if (newExpr) { cwa += " newRecExpr:"; newExpr->generateCacheKey(cwa); } // make sure cache key can distinguish these 2 queries: // prepare s from select * from (update t042qT8 set b=7 where a=2) as t; // prepare s from select * from (update t042qT8 set b=7 set on rollback c=2 // where a=2) as t; ItemExpr *setOnRollback; if (newRecBeforeExpr_.entries() > 0 && (setOnRollback=newRecBeforeExpr_.rebuildExprTree(ITM_ITEM_LIST))) { cwa += " setOnRollback:"; setOnRollback->generateCacheKey(cwa); } ItemExpr *execPred = executorPredTree_ ? executorPredTree_ : executorPred_.rebuildExprTree(); if (execPred) { cwa += " execPred:"; execPred->generateCacheKey(cwa); } // MVs -- // The NOLOG parameter is essential. if (isNoLogOperation()) { cwa += " NOLOG"; } // "current of cursor/hostvar" is essential if (currOfCursorName_) { currOfCursorName_->generateCacheKey(cwa); } // not sure if the following are essential, but better to be safe & // slightly inefficient than to deliver a false hit (ie, wrong plan) cwa += mtsStatement_ ? "m1" : "m0"; cwa += noFlow_ ? "n1" : "n0"; cwa += noRollback_ ? "o1" : "o0"; cwa += noCheck_ ? "nc" : "dc"; // not sure if the following are essential, but we don't know how // to quickly & cheaply include them into our cachekey: // updatedTableName_, tabId_, updateToSelectMap_, indexDesc_, // newRecExprArray_, usedColumns_, newRecBeforeExpr_, // newRecBeforeExprArray_, usedBeforeColumns_, potentialOutputs_ // indexNumberArray_, scanIndexDesc_, rowsAffected_, stoi_, // oldToNewMap_ // The following data members are not "essential" to generateCacheKey // (at least "after bind") because they are either covered by other // data members (eg, beginKeyPred and endKeyPred_ are covered by the // selection pred in RelExpr) or they are not yet defined until later // (eg, after the optimize phase): // indexNewRecExprArrays_, beginKeyPred_, endKeyPred_, // pathKeys_, partKeys_, indexBeginKeyPredArray_, // indexEndKeyPredArray_, checkConstraints_ }
RelExpr* Scan::normalizeForCache(CacheWA& cwa, BindWA& bindWA) { if (nodeIsNormalizedForCache()) { return this; } if (CmpCommon::getDefault(QUERY_CACHE_TABLENAME) == DF_OFF) { // replace descendants' literals into ConstantParameters return RelExpr::normalizeForCache(cwa, bindWA); } // replace tablename with a prototyped tablename. TableDesc * td = getTableDesc(); CorrName &origName = td->getCorrNameObj(); if (origName.getPrototype() == NULL) { Lng32 CACHED_MAX_ANSI_NAME_EXTERNAL_LEN = 128; NAString hvName("dummy_name"); HostVar * hv = new(bindWA.wHeap()) HostVar(hvName, new(bindWA.wHeap()) SQLChar(CACHED_MAX_ANSI_NAME_EXTERNAL_LEN)); hv->setPrototypeValue(origName.getQualifiedNameAsString()); hv->synthTypeAndValueId(); hv->setIsCachedParam(TRUE); CorrName cn("HostVar$", bindWA.wHeap(), hv->getName(), // debugging ease "$bogus"); cn.setPrototype(hv); NAString *tmpName = new (bindWA.wHeap()) NAString(hv->getPrototypeValue(), bindWA.wHeap()); cn.setUgivenName(*tmpName); cn.applyDefaults(&bindWA, bindWA.getDefaultSchema()); td->setCorrName(cn); setTableName(cn); char * strval = new(bindWA.wHeap()) char[CACHED_MAX_ANSI_NAME_EXTERNAL_LEN]; strcpy(strval, origName.getQualifiedNameAsString().data()); CharType * typ = new(bindWA.wHeap()) SQLChar(CACHED_MAX_ANSI_NAME_EXTERNAL_LEN, FALSE); ConstValue * cv = new(bindWA.wHeap()) ConstValue(typ, strval, CACHED_MAX_ANSI_NAME_EXTERNAL_LEN); ConstantParameter* result = new(bindWA.wHeap()) ConstantParameter (*cv, bindWA.wHeap(), cwa.getPhase() == CmpMain::PARSE); result->synthTypeAndValueId(); cwa.addConstParam(result, bindWA); hv->setPMOrdPosAndIndex(COM_UNKNOWN_DIRECTION, -1, (Int32)cwa.getConstParams().entries()); } // replace descendants' literals into ConstantParameters return RelExpr::normalizeForCache(cwa, bindWA); }
// append an ascii-version of Scan into cachewa.qryText_ void Scan::generateCacheKey(CacheWA &cwa) const { RelExpr::generateCacheKey(cwa); // Fix to 10-010618-3505, 10-010619-3515: include this Scan table's // RedefTime into cwa.qryText_ to make sure we get a cache hit only on // query that reference table(s) that have not changed since the query's // addition to the cache. The queries that reference altered table(s) // will never be hit again and will eventually age out of the cache. const NATable *tbl; if (cwa.getPhase() >= CmpMain::BIND && getTableDesc() && (tbl=getTableDesc()->getNATable()) != NULL) { char redefTime[40]; convertInt64ToAscii(tbl->getRedefTime(), redefTime); cwa += " redef:"; cwa += redefTime; if (tbl->isHiveTable()) { char lastModTime[40]; Int64 mTime = tbl->getClusteringIndex()->getHHDFSTableStats()->getModificationTS(); convertInt64ToAscii(mTime, lastModTime); cwa += " lastMod:"; cwa += lastModTime; cwa += " numFiles:"; char numFiles[20]; Int64 numberOfFiles = tbl->getClusteringIndex()->getHHDFSTableStats()->getNumFiles(); sprintf(numFiles, " %ld", numberOfFiles); cwa += numFiles ; } // save pointer to this table. later, QueryCache::addEntry will use // this pointer to get to this table's histograms's timestamp cwa.addTable( (NATable*)tbl ); // If PARTITION clause has been used we must reflect that in the key. if (tbl->isPartitionNameSpecified()) { cwa += " partition:"; cwa += tbl->getClusteringIndex()->getFileSetName().getQualifiedNameAsString().data(); } // If PARTITION range has been used we must reflect that in the key. else if (tbl->isPartitionRangeSpecified()) { cwa += " partition:"; char str[100]; sprintf(str, " from %d to %d", tbl->getExtendedQualName().getPartnClause().getBeginPartitionNumber() , tbl->getExtendedQualName().getPartnClause().getEndPartitionNumber()); cwa += str; } } // We must reflect userTableName_.location into cache key. // Otherwise, two queries which differ only in location such as // table table (table T058a, location $system.zsd12345.x1234500); // table table (table T058a, location $data .zsd12345.x1234500); // can confuse our query caching code to return a false hit and // cause fullstack/test058 to fail. cwa += userTableName_.getLocationName().data(); // Same with stream_ because queries like // "select * from t" and "select * from stream(t)" can // confuse query caching into a false hit causing test079 to fail. if (stream_) { cwa += " stream "; } // mark mpalias queries so they can be decached upon user request if (getTableDesc()->getNATable()->isAnMPTableWithAnsiName()) { cwa += AM_AN_MPALIAS_QUERY; } if (getHbaseAccessOptions()) { cwa += " hbaseVersions: "; char numVersions[20]; sprintf(numVersions, " %d", getHbaseAccessOptions()->getHbaseVersions()); cwa += numVersions ; } }