Lng32 AddKeyGroups() { HSGlobalsClass *hs_globals = GetHSContext(); if (HSGlobalsClass::isHiveCat(hs_globals->objDef->getCatName())) { // HSHiveTableDef::getKeyList()/getIndexArray() not yet implemented. *CmpCommon::diags() << DgSqlCode(-UERR_NO_ONEVERYKEY) << DgString0("hive"); return -1; } Lng32 retcode = 0; Lng32 numColsInGroup = 0; HSColumnStruct col; NAString tempColList = ""; NAString tempCol; NAString autoGroup; ULng32 numKeys; ULng32 i, j; NATable* naTbl = hs_globals->objDef->getNATable(); HSLogMan *LM = HSLogMan::Instance(); // ---------------------------------------------------------- // Generate histograms for KEY // ---------------------------------------------------------- // The clustering index is included in the list of indices returned by // NATable::getIndexList(), so we store its pointer so we can skip it // when the other indexes are processed below. NAFileSet* clusteringIndex = naTbl->getClusteringIndex(); const NAColumnArray& keyCols = clusteringIndex->getIndexKeyColumns(); Lng32 colPos; numKeys = keyCols.entries(); if (numKeys == 1) // SINGLE-COLUMN KEY { colPos = keyCols[0]->getPosition(); if (LM->LogNeeded()) { sprintf(LM->msg, "\t\tKEY:\t\t(%s)", hs_globals->objDef->getColName(colPos)); LM->Log(LM->msg); } if (ColumnExists(colPos)) // avoid duplicates { LM->Log("\t\t** duplicate column group has been ignored."); } else // add to single-column group list { retcode = AddSingleColumn(colPos); } } else if (numKeys > 1) // MULTI-COLUMN KEY { // Create multiple MC group(s) if numkeys > 1. Subset MC groups will // also be created if numkeys > 2, E.g. If numkeys = 5, then // MC groups with 5, 4, 3, and 2 columns will be created using // the key columns. Note that if numkeys is larger than CQD // USTAT_NUM_MC_GROUPS_FOR_KEYS (default = 5), then the number // of groups created will be limited by this value. So, e.g. if // numkeys = 10, then MC groups with 5, 4, 3, and 2 columns will // be created (that is, 5 groups will be created - incl the single). ULng32 minMCGroupSz = 2; ULng32 maxMCGroups = (ULng32) CmpCommon::getDefaultNumeric(USTAT_NUM_MC_GROUPS_FOR_KEYS); // Generate no MCs with more cols than specified by the cqd. if (numKeys > maxMCGroups) numKeys = maxMCGroups; // For salted table, generate only the longest MC for the key (subject // to max cols determined above) unless a cqd is set to gen all MCs of // allowable sizes. if (CmpCommon::getDefault(USTAT_ADD_SALTED_KEY_PREFIXES_FOR_MC) == DF_OFF && hs_globals->objDef->getColNum("_SALT_", FALSE) >= 0) minMCGroupSz = numKeys; while (numKeys >= minMCGroupSz) // Create only MC groups not single cols { HSColSet colSet; autoGroup = "("; for (j = 0; j < numKeys; j++) { colPos = keyCols[j]->getPosition(); col = hs_globals->objDef->getColInfo(colPos); col.colnum = colPos; colSet.insert(col); autoGroup += col.colname->data(); autoGroup += ","; } if (LM->LogNeeded()) { autoGroup.replace(autoGroup.length()-1,1,")"); // replace comma with close parenthesis sprintf(LM->msg, "\t\tKEY:\t\t%s", autoGroup.data()); LM->Log(LM->msg); } if (retcode = AddColumnSet(colSet)) { HSHandleError(retcode); } numKeys--; } } // ---------------------------------------------------------- // Generate histograms for all INDEXES // ---------------------------------------------------------- const NAFileSetList& indexes = naTbl->getIndexList(); NAFileSet* index; for (i = 0; i < indexes.entries(); i++ ) { index = indexes[i]; if (index == clusteringIndex) continue; // clustering index processed above already const NAColumnArray& keyCols = index->getIndexKeyColumns(); numKeys = keyCols.entries(); if (numKeys == 1) // SINGLE-COLUMN INDEX { colPos = keyCols[0]->getPosition(); if (LM->LogNeeded()) { sprintf(LM->msg, "\t\tINDEX[%d]\t(%s)", i, hs_globals->objDef->getColName(colPos)); LM->Log(LM->msg); } if (ColumnExists(colPos)) // avoid duplicates { LM->Log("\t\t*** duplicate column group has been ignored."); } else // add to single-column group list { retcode = AddSingleColumn(colPos); } } else // MULTI-COLUMN INDEX { // Create multiple MC group(s) if numkeys > 1. Subset MC groups will // also be created if numkeys > 2, E.g. If numkeys = 5, then // MC groups with 5, 4, 3, and 2 columns will be created using // the key columns. Note that if numkeys is larger than CQD // USTAT_NUM_MC_GROUPS_FOR_KEYS (default = 5), then the number // of groups created will be limited by this value. So, e.g. if // numkeys = 10, then MC groups with 10, 9, 8, 7, 6 columns will // be created (that is, 5 groups will be created). ULng32 minMCGroupSz = 2; ULng32 maxMCGroups = (ULng32) CmpCommon::getDefaultNumeric(USTAT_NUM_MC_GROUPS_FOR_KEYS); if (numKeys > maxMCGroups) minMCGroupSz = numKeys - maxMCGroups + 1; while (numKeys >= minMCGroupSz) // MinMCGroupSz is greater than 1. { HSColSet colSet; tempColList = ""; autoGroup = "("; for (j = 0; j < numKeys; j++) { colPos = keyCols[j]->getPosition(); tempCol = "."; tempCol += LongToNAString(colPos); tempCol += "."; // Eliminate duplicate columns in the index; // They may have been introduced by appending the key to the specified index. if (!tempColList.contains(tempCol)) { col = hs_globals->objDef->getColInfo(colPos); col.colnum = colPos; colSet.insert((const struct HSColumnStruct) col); tempColList += tempCol.data(); numColsInGroup++; autoGroup += col.colname->data(); autoGroup += ","; } } if (colSet.entries()) { if (numColsInGroup > 1) { if (LM->LogNeeded()) { autoGroup.replace(autoGroup.length()-1,1,")"); // replace comma with close parenthesis sprintf(LM->msg, "\t\tINDEX[%d]\t%s", i, autoGroup.data()); LM->Log(LM->msg); } if (retcode = AddColumnSet(colSet)) { HSHandleError(retcode); } } numColsInGroup = 0; } numKeys--; } } } return retcode; }
Lng32 AddColumnSet(HSColSet &colSet) { HSGlobalsClass *hs_globals = GetHSContext(); Lng32 retcode = 0; HSColGroupStruct *newGroup = NULL; Lng32 colCount = 0; NABoolean badColList = FALSE; NAString colNames = ""; NAString temp; HSLogMan *LM = HSLogMan::Instance(); Int32 numCols = colSet.entries(); Int32 i; if (numCols < 2) // Must have at least 2 columns in multi-col set. { if (LM->LogNeeded()) { sprintf(LM->msg, "\t\tIgnoring Column Group with single unique entry (%s)", colSet[0].colname->data()); LM->Log(LM->msg); } return HS_WARNING; } for (i=0; i<numCols; i++) // update column numbers, position & NO DUPLICATES { HSColumnStruct &col = colSet[i]; temp = " "; temp += ToAnsiIdentifier(col.colname->data()); // Note: ToAnsiIdentifier() determines whether a name needs to be delimited // with quotes. This function works for shift-JIS but may not work for other // non-ISO88591 char sets such as Korean, BIG5, GB2312, and GB18030, ... temp += ","; if (colNames.contains(temp)) badColList = TRUE; else { col.colnum = hs_globals->objDef->getColNum((char*)col.colname->data()); if (col.colnum < 0) { retcode = -1; HSHandleError(retcode); } col.position = colCount; colCount++; } colNames += temp; } colNames.remove(0,1); // remove first blank colNames.remove(colNames.length() - 1); // remove last comma if (badColList) // column list contains repeating columns { if (LM->LogNeeded()) { sprintf(LM->msg, "\t\tNon-Unique Column Group (%s)", colNames.data()); LM->Log(LM->msg); } HSFuncMergeDiags(- UERR_COLUMNLIST_NOT_UNIQUE, colNames.data()); retcode = -1; HSHandleError(retcode); } else { if (GroupExists(colSet)) { if (LM->LogNeeded()) { sprintf(LM->msg, "\t\tDuplicate Column Group (%s) has been ignored.", colNames.data()); LM->Log(LM->msg); } retcode = HS_WARNING; } else { newGroup = new(STMTHEAP) HSColGroupStruct; newGroup->colSet = colSet; newGroup->colCount = colCount; *newGroup->colNames = colNames.data(); if (hs_globals->multiGroup == NULL) // first group entry { hs_globals->multiGroup = newGroup; } else // append to front of list { newGroup->next = hs_globals->multiGroup; hs_globals->multiGroup->prev = newGroup; hs_globals->multiGroup = newGroup; } hs_globals->groupCount++; } } return retcode; }