ExplainTuple *PhysicalFastExtract::addSpecificExplainInfo(ExplainTupleMaster *explainTuple, ComTdb *tdb, Generator *generator) { NAString description = "Target_type: "; if (getTargetType() == FILE) { if (isHiveInsert()) description += "hive table"; else description += "file"; } else if (getTargetType() == SOCKET) description += "socket"; else description += "none"; if (isHiveInsert()) { NAString str = getTargetName(); size_t colonIndex = str.index(":", 1,0,NAString::ignoreCase); while (colonIndex != NA_NPOS) { str = str.replace(colonIndex, 1, "_", 1); colonIndex = str.index(":", 1,0,NAString::ignoreCase); } description += " location: "; description += str; } if (isHiveInsert()) { description += " table_name: "; description += getHiveTableName(); } else { description += " target_name: "; description += getTargetName(); } description += " delimiter: "; description += getDelimiter(); if (isAppend()) description += " append: yes"; if ( !isHiveInsert() && includeHeader()) { description += " header: "; description += getHeader(); } if (getCompressionType() != NONE) { description += " compression_type: "; if (getCompressionType() == LZO) description += "LZO"; else description += "error"; } description += " null_string: "; description += getNullString(); description += " record_separator: "; description += getRecordSeparator(); explainTuple->setDescription(description); if (isHiveInsert()) explainTuple->setTableName(getHiveTableName()); return explainTuple; }
Lng32 AddKeyGroups() { HSGlobalsClass *hs_globals = GetHSContext(); if (HSGlobalsClass::isHiveCat(hs_globals->objDef->getCatName())) { // HSHiveTableDef::getKeyList()/getIndexArray() not yet implemented. *CmpCommon::diags() << DgSqlCode(-UERR_NO_ONEVERYKEY) << DgString0("hive"); return -1; } Lng32 retcode = 0; Lng32 numColsInGroup = 0; HSColumnStruct col; NAString tempColList = ""; NAString tempCol; NAString autoGroup; ULng32 numKeys; ULng32 i, j; NATable* naTbl = hs_globals->objDef->getNATable(); HSLogMan *LM = HSLogMan::Instance(); // ---------------------------------------------------------- // Generate histograms for KEY // ---------------------------------------------------------- // The clustering index is included in the list of indices returned by // NATable::getIndexList(), so we store its pointer so we can skip it // when the other indexes are processed below. NAFileSet* clusteringIndex = naTbl->getClusteringIndex(); const NAColumnArray& keyCols = clusteringIndex->getIndexKeyColumns(); Lng32 colPos; numKeys = keyCols.entries(); if (numKeys == 1) // SINGLE-COLUMN KEY { colPos = keyCols[0]->getPosition(); if (LM->LogNeeded()) { sprintf(LM->msg, "\t\tKEY:\t\t(%s)", hs_globals->objDef->getColName(colPos)); LM->Log(LM->msg); } if (ColumnExists(colPos)) // avoid duplicates { LM->Log("\t\t** duplicate column group has been ignored."); } else // add to single-column group list { retcode = AddSingleColumn(colPos); } } else if (numKeys > 1) // MULTI-COLUMN KEY { // Create multiple MC group(s) if numkeys > 1. Subset MC groups will // also be created if numkeys > 2, E.g. If numkeys = 5, then // MC groups with 5, 4, 3, and 2 columns will be created using // the key columns. Note that if numkeys is larger than CQD // USTAT_NUM_MC_GROUPS_FOR_KEYS (default = 5), then the number // of groups created will be limited by this value. So, e.g. if // numkeys = 10, then MC groups with 5, 4, 3, and 2 columns will // be created (that is, 5 groups will be created - incl the single). ULng32 minMCGroupSz = 2; ULng32 maxMCGroups = (ULng32) CmpCommon::getDefaultNumeric(USTAT_NUM_MC_GROUPS_FOR_KEYS); // Generate no MCs with more cols than specified by the cqd. if (numKeys > maxMCGroups) numKeys = maxMCGroups; // For salted table, generate only the longest MC for the key (subject // to max cols determined above) unless a cqd is set to gen all MCs of // allowable sizes. if (CmpCommon::getDefault(USTAT_ADD_SALTED_KEY_PREFIXES_FOR_MC) == DF_OFF && hs_globals->objDef->getColNum("_SALT_", FALSE) >= 0) minMCGroupSz = numKeys; while (numKeys >= minMCGroupSz) // Create only MC groups not single cols { HSColSet colSet; autoGroup = "("; for (j = 0; j < numKeys; j++) { colPos = keyCols[j]->getPosition(); col = hs_globals->objDef->getColInfo(colPos); col.colnum = colPos; colSet.insert(col); autoGroup += col.colname->data(); autoGroup += ","; } if (LM->LogNeeded()) { autoGroup.replace(autoGroup.length()-1,1,")"); // replace comma with close parenthesis sprintf(LM->msg, "\t\tKEY:\t\t%s", autoGroup.data()); LM->Log(LM->msg); } if (retcode = AddColumnSet(colSet)) { HSHandleError(retcode); } numKeys--; } } // ---------------------------------------------------------- // Generate histograms for all INDEXES // ---------------------------------------------------------- const NAFileSetList& indexes = naTbl->getIndexList(); NAFileSet* index; for (i = 0; i < indexes.entries(); i++ ) { index = indexes[i]; if (index == clusteringIndex) continue; // clustering index processed above already const NAColumnArray& keyCols = index->getIndexKeyColumns(); numKeys = keyCols.entries(); if (numKeys == 1) // SINGLE-COLUMN INDEX { colPos = keyCols[0]->getPosition(); if (LM->LogNeeded()) { sprintf(LM->msg, "\t\tINDEX[%d]\t(%s)", i, hs_globals->objDef->getColName(colPos)); LM->Log(LM->msg); } if (ColumnExists(colPos)) // avoid duplicates { LM->Log("\t\t*** duplicate column group has been ignored."); } else // add to single-column group list { retcode = AddSingleColumn(colPos); } } else // MULTI-COLUMN INDEX { // Create multiple MC group(s) if numkeys > 1. Subset MC groups will // also be created if numkeys > 2, E.g. If numkeys = 5, then // MC groups with 5, 4, 3, and 2 columns will be created using // the key columns. Note that if numkeys is larger than CQD // USTAT_NUM_MC_GROUPS_FOR_KEYS (default = 5), then the number // of groups created will be limited by this value. So, e.g. if // numkeys = 10, then MC groups with 10, 9, 8, 7, 6 columns will // be created (that is, 5 groups will be created). ULng32 minMCGroupSz = 2; ULng32 maxMCGroups = (ULng32) CmpCommon::getDefaultNumeric(USTAT_NUM_MC_GROUPS_FOR_KEYS); if (numKeys > maxMCGroups) minMCGroupSz = numKeys - maxMCGroups + 1; while (numKeys >= minMCGroupSz) // MinMCGroupSz is greater than 1. { HSColSet colSet; tempColList = ""; autoGroup = "("; for (j = 0; j < numKeys; j++) { colPos = keyCols[j]->getPosition(); tempCol = "."; tempCol += LongToNAString(colPos); tempCol += "."; // Eliminate duplicate columns in the index; // They may have been introduced by appending the key to the specified index. if (!tempColList.contains(tempCol)) { col = hs_globals->objDef->getColInfo(colPos); col.colnum = colPos; colSet.insert((const struct HSColumnStruct) col); tempColList += tempCol.data(); numColsInGroup++; autoGroup += col.colname->data(); autoGroup += ","; } } if (colSet.entries()) { if (numColsInGroup > 1) { if (LM->LogNeeded()) { autoGroup.replace(autoGroup.length()-1,1,")"); // replace comma with close parenthesis sprintf(LM->msg, "\t\tINDEX[%d]\t%s", i, autoGroup.data()); LM->Log(LM->msg); } if (retcode = AddColumnSet(colSet)) { HSHandleError(retcode); } } numColsInGroup = 0; } numKeys--; } } } return retcode; }