////////////////////////////////////////////////////////////////////////////// // Does this base table have a supporting index on the MAV GroupBy columns? // (Supporting index means that the MAV GroupBy columns must be a prefix // of the index columns). ////////////////////////////////////////////////////////////////////////////// NABoolean Refresh::doesBaseTableHaveSupportingIndex(BindWA *bindWA, MVInfoForDML *mvInfo) const { CollIndex i; LIST (MVUsedObjectInfo*)& UsedObjList = mvInfo->getUsedObjectsList(); MVUsedObjectInfo* pUsedTable = UsedObjList[0]; // Extract GroupBy columns const MVColumns& pMvInfoColumnList = mvInfo->getMVColumns(); LIST (MVColumnInfo *) mvGroupByColumns(bindWA->wHeap()); for ( i = 0 ; i < pMvInfoColumnList.entries() ; i++) { #pragma nowarn(1506) // warning elimination MVColumnInfo *currentMvColInfo = pMvInfoColumnList[i]; #pragma warn(1506) // warning elimination if (COM_MVCOL_GROUPBY == currentMvColInfo->getColType()) { mvGroupByColumns.insert(currentMvColInfo); } } // If the MAV does not have any group by columns - than there is // no supporting index. if (mvGroupByColumns.entries() == 0) return FALSE; // Get the NATable QualifiedName underlyingTableName = pUsedTable->getObjectName(); CorrName corrTableName(underlyingTableName); NATable * pNaTable = bindWA->getNATable(corrTableName, FALSE); // Construct table GroupBy NAColumnArray tableGroupByArray; const NAColumnArray & columnArray = pNaTable->getNAColumnArray(); for ( i = 0 ; i < mvGroupByColumns.entries() ; i++) { Int32 tableColNum = (mvGroupByColumns)[i]->getOrigColNumber(); NAColumn * pColumn = columnArray.getColumn(tableColNum); tableGroupByArray.insert(pColumn); } // Check the clustering Index. const NAFileSet *pClusteringIndexFileSet = pNaTable->getClusteringIndex(); const NAColumnArray& ciColumns = pClusteringIndexFileSet->getIndexKeyColumns(); if (TRUE == areGroupByColsAnIndexPrefix(tableGroupByArray, ciColumns)) { return TRUE; } // Check any secondary indices. const NAFileSetList & indexFileSetList = pNaTable->getIndexList(); for ( i = 0 ; i < indexFileSetList.entries() ; i++) { const NAFileSet *pSecondaryIndexFileSet = indexFileSetList[i]; const NAColumnArray& siColumns = pSecondaryIndexFileSet->getIndexKeyColumns(); if (TRUE == areGroupByColsAnIndexPrefix(tableGroupByArray, siColumns)) { return TRUE; } } return FALSE; }
Lng32 AddKeyGroups() { HSGlobalsClass *hs_globals = GetHSContext(); if (HSGlobalsClass::isHiveCat(hs_globals->objDef->getCatName())) { // HSHiveTableDef::getKeyList()/getIndexArray() not yet implemented. *CmpCommon::diags() << DgSqlCode(-UERR_NO_ONEVERYKEY) << DgString0("hive"); return -1; } Lng32 retcode = 0; Lng32 numColsInGroup = 0; HSColumnStruct col; NAString tempColList = ""; NAString tempCol; NAString autoGroup; ULng32 numKeys; ULng32 i, j; NATable* naTbl = hs_globals->objDef->getNATable(); HSLogMan *LM = HSLogMan::Instance(); // ---------------------------------------------------------- // Generate histograms for KEY // ---------------------------------------------------------- // The clustering index is included in the list of indices returned by // NATable::getIndexList(), so we store its pointer so we can skip it // when the other indexes are processed below. NAFileSet* clusteringIndex = naTbl->getClusteringIndex(); const NAColumnArray& keyCols = clusteringIndex->getIndexKeyColumns(); Lng32 colPos; numKeys = keyCols.entries(); if (numKeys == 1) // SINGLE-COLUMN KEY { colPos = keyCols[0]->getPosition(); if (LM->LogNeeded()) { sprintf(LM->msg, "\t\tKEY:\t\t(%s)", hs_globals->objDef->getColName(colPos)); LM->Log(LM->msg); } if (ColumnExists(colPos)) // avoid duplicates { LM->Log("\t\t** duplicate column group has been ignored."); } else // add to single-column group list { retcode = AddSingleColumn(colPos); } } else if (numKeys > 1) // MULTI-COLUMN KEY { // Create multiple MC group(s) if numkeys > 1. Subset MC groups will // also be created if numkeys > 2, E.g. If numkeys = 5, then // MC groups with 5, 4, 3, and 2 columns will be created using // the key columns. Note that if numkeys is larger than CQD // USTAT_NUM_MC_GROUPS_FOR_KEYS (default = 5), then the number // of groups created will be limited by this value. So, e.g. if // numkeys = 10, then MC groups with 5, 4, 3, and 2 columns will // be created (that is, 5 groups will be created - incl the single). ULng32 minMCGroupSz = 2; ULng32 maxMCGroups = (ULng32) CmpCommon::getDefaultNumeric(USTAT_NUM_MC_GROUPS_FOR_KEYS); // Generate no MCs with more cols than specified by the cqd. if (numKeys > maxMCGroups) numKeys = maxMCGroups; // For salted table, generate only the longest MC for the key (subject // to max cols determined above) unless a cqd is set to gen all MCs of // allowable sizes. if (CmpCommon::getDefault(USTAT_ADD_SALTED_KEY_PREFIXES_FOR_MC) == DF_OFF && hs_globals->objDef->getColNum("_SALT_", FALSE) >= 0) minMCGroupSz = numKeys; while (numKeys >= minMCGroupSz) // Create only MC groups not single cols { HSColSet colSet; autoGroup = "("; for (j = 0; j < numKeys; j++) { colPos = keyCols[j]->getPosition(); col = hs_globals->objDef->getColInfo(colPos); col.colnum = colPos; colSet.insert(col); autoGroup += col.colname->data(); autoGroup += ","; } if (LM->LogNeeded()) { autoGroup.replace(autoGroup.length()-1,1,")"); // replace comma with close parenthesis sprintf(LM->msg, "\t\tKEY:\t\t%s", autoGroup.data()); LM->Log(LM->msg); } if (retcode = AddColumnSet(colSet)) { HSHandleError(retcode); } numKeys--; } } // ---------------------------------------------------------- // Generate histograms for all INDEXES // ---------------------------------------------------------- const NAFileSetList& indexes = naTbl->getIndexList(); NAFileSet* index; for (i = 0; i < indexes.entries(); i++ ) { index = indexes[i]; if (index == clusteringIndex) continue; // clustering index processed above already const NAColumnArray& keyCols = index->getIndexKeyColumns(); numKeys = keyCols.entries(); if (numKeys == 1) // SINGLE-COLUMN INDEX { colPos = keyCols[0]->getPosition(); if (LM->LogNeeded()) { sprintf(LM->msg, "\t\tINDEX[%d]\t(%s)", i, hs_globals->objDef->getColName(colPos)); LM->Log(LM->msg); } if (ColumnExists(colPos)) // avoid duplicates { LM->Log("\t\t*** duplicate column group has been ignored."); } else // add to single-column group list { retcode = AddSingleColumn(colPos); } } else // MULTI-COLUMN INDEX { // Create multiple MC group(s) if numkeys > 1. Subset MC groups will // also be created if numkeys > 2, E.g. If numkeys = 5, then // MC groups with 5, 4, 3, and 2 columns will be created using // the key columns. Note that if numkeys is larger than CQD // USTAT_NUM_MC_GROUPS_FOR_KEYS (default = 5), then the number // of groups created will be limited by this value. So, e.g. if // numkeys = 10, then MC groups with 10, 9, 8, 7, 6 columns will // be created (that is, 5 groups will be created). ULng32 minMCGroupSz = 2; ULng32 maxMCGroups = (ULng32) CmpCommon::getDefaultNumeric(USTAT_NUM_MC_GROUPS_FOR_KEYS); if (numKeys > maxMCGroups) minMCGroupSz = numKeys - maxMCGroups + 1; while (numKeys >= minMCGroupSz) // MinMCGroupSz is greater than 1. { HSColSet colSet; tempColList = ""; autoGroup = "("; for (j = 0; j < numKeys; j++) { colPos = keyCols[j]->getPosition(); tempCol = "."; tempCol += LongToNAString(colPos); tempCol += "."; // Eliminate duplicate columns in the index; // They may have been introduced by appending the key to the specified index. if (!tempColList.contains(tempCol)) { col = hs_globals->objDef->getColInfo(colPos); col.colnum = colPos; colSet.insert((const struct HSColumnStruct) col); tempColList += tempCol.data(); numColsInGroup++; autoGroup += col.colname->data(); autoGroup += ","; } } if (colSet.entries()) { if (numColsInGroup > 1) { if (LM->LogNeeded()) { autoGroup.replace(autoGroup.length()-1,1,")"); // replace comma with close parenthesis sprintf(LM->msg, "\t\tINDEX[%d]\t%s", i, autoGroup.data()); LM->Log(LM->msg); } if (retcode = AddColumnSet(colSet)) { HSHandleError(retcode); } } numColsInGroup = 0; } numKeys--; } } } return retcode; }