コード例 #1
0
Lng32 AddKeyGroups()
  {
  HSGlobalsClass *hs_globals = GetHSContext();
    if (HSGlobalsClass::isHiveCat(hs_globals->objDef->getCatName()))
      {
        // HSHiveTableDef::getKeyList()/getIndexArray() not yet implemented.
        *CmpCommon::diags() << DgSqlCode(-UERR_NO_ONEVERYKEY) << DgString0("hive");
        return -1;
      }

    Lng32 retcode = 0;
    Lng32 numColsInGroup = 0;
    HSColumnStruct col;
    NAString tempColList = "";
    NAString tempCol;
    NAString autoGroup;
    ULng32 numKeys;
    ULng32 i, j;
    NATable* naTbl = hs_globals->objDef->getNATable();
    HSLogMan *LM = HSLogMan::Instance();

    // ----------------------------------------------------------
    // Generate histograms for KEY
    // ----------------------------------------------------------
    // The clustering index is included in the list of indices returned by
    // NATable::getIndexList(), so we store its pointer so we can skip it
    // when the other indexes are processed below.
    NAFileSet* clusteringIndex = naTbl->getClusteringIndex();
    const NAColumnArray& keyCols = clusteringIndex->getIndexKeyColumns();
    Lng32 colPos;
    numKeys = keyCols.entries();

    if (numKeys == 1)     // SINGLE-COLUMN KEY
      {
        colPos = keyCols[0]->getPosition();
        if (LM->LogNeeded())
          {
            sprintf(LM->msg, "\t\tKEY:\t\t(%s)", hs_globals->objDef->getColName(colPos));
            LM->Log(LM->msg);
          }

        if (ColumnExists(colPos)) // avoid duplicates
          {
            LM->Log("\t\t** duplicate column group has been ignored.");
          }
        else                                 // add to single-column group list
          {
            retcode = AddSingleColumn(colPos);
          }
      }
    else if (numKeys > 1) // MULTI-COLUMN KEY
      {  
        // Create multiple MC group(s) if numkeys > 1.  Subset MC groups will
        // also be created if numkeys > 2,  E.g. If numkeys = 5, then
        // MC groups with 5, 4, 3, and 2 columns will be created using
        // the key columns.  Note that if numkeys is larger than CQD 
        // USTAT_NUM_MC_GROUPS_FOR_KEYS (default = 5), then the number
        // of groups created will be limited by this value.  So, e.g. if
        // numkeys = 10, then MC groups with 5, 4, 3, and 2 columns will
        // be created (that is, 5 groups will be created - incl the single).

        ULng32 minMCGroupSz = 2;
        ULng32 maxMCGroups  = (ULng32)
          CmpCommon::getDefaultNumeric(USTAT_NUM_MC_GROUPS_FOR_KEYS);

        // Generate no MCs with more cols than specified by the cqd.
        if (numKeys > maxMCGroups)
          numKeys = maxMCGroups;

        // For salted table, generate only the longest MC for the key (subject
        // to max cols determined above) unless a cqd is set to gen all MCs of
        // allowable sizes.
        if (CmpCommon::getDefault(USTAT_ADD_SALTED_KEY_PREFIXES_FOR_MC) == DF_OFF &&
            hs_globals->objDef->getColNum("_SALT_", FALSE) >= 0)
          minMCGroupSz = numKeys;

        while (numKeys >= minMCGroupSz)  // Create only MC groups not single cols
          {
            HSColSet colSet;

            autoGroup = "(";
            for (j = 0; j < numKeys; j++)
              {
                colPos = keyCols[j]->getPosition();
                col = hs_globals->objDef->getColInfo(colPos);
                col.colnum = colPos;
                colSet.insert(col);
                autoGroup += col.colname->data();
                autoGroup += ",";
              }

            if (LM->LogNeeded())
              {
                autoGroup.replace(autoGroup.length()-1,1,")");    // replace comma with close parenthesis
                sprintf(LM->msg, "\t\tKEY:\t\t%s", autoGroup.data());
                LM->Log(LM->msg);
              }

            if (retcode = AddColumnSet(colSet))
              {
                HSHandleError(retcode);
              }
            numKeys--;
          }
      }
  
    // ----------------------------------------------------------
    // Generate histograms for all INDEXES
    // ----------------------------------------------------------
    const NAFileSetList& indexes = naTbl->getIndexList();
    NAFileSet* index;
    for (i = 0; i < indexes.entries(); i++ )
      {
        index = indexes[i];
        if (index == clusteringIndex)
          continue;  // clustering index processed above already
        const NAColumnArray& keyCols = index->getIndexKeyColumns();
        numKeys = keyCols.entries();
        if (numKeys == 1)                            // SINGLE-COLUMN INDEX
          {
            colPos = keyCols[0]->getPosition();
            if (LM->LogNeeded())
              {
                sprintf(LM->msg, "\t\tINDEX[%d]\t(%s)", i, 
                        hs_globals->objDef->getColName(colPos));
                LM->Log(LM->msg);
              }
            if (ColumnExists(colPos)) // avoid duplicates
              {
                LM->Log("\t\t*** duplicate column group has been ignored.");
              }
            else                                 // add to single-column group list
              {
                retcode = AddSingleColumn(colPos);
              }
          }
        else // MULTI-COLUMN INDEX
          {  
            // Create multiple MC group(s) if numkeys > 1.  Subset MC groups will
            // also be created if numkeys > 2,  E.g. If numkeys = 5, then
            // MC groups with 5, 4, 3, and 2 columns will be created using
            // the key columns.  Note that if numkeys is larger than CQD 
            // USTAT_NUM_MC_GROUPS_FOR_KEYS (default = 5), then the number
            // of groups created will be limited by this value.  So, e.g. if
            // numkeys = 10, then MC groups with 10, 9, 8, 7, 6 columns will
            // be created (that is, 5 groups will be created).

            ULng32 minMCGroupSz = 2;
            ULng32 maxMCGroups  = (ULng32)
              CmpCommon::getDefaultNumeric(USTAT_NUM_MC_GROUPS_FOR_KEYS);
            if (numKeys > maxMCGroups) 
              minMCGroupSz = numKeys - maxMCGroups + 1;
            while (numKeys >= minMCGroupSz)  // MinMCGroupSz is greater than 1.
              {
              HSColSet colSet;

              tempColList = "";
              autoGroup = "(";
              for (j = 0; j < numKeys; j++)
                {
                  colPos = keyCols[j]->getPosition();
                  tempCol = ".";
                  tempCol += LongToNAString(colPos);
                  tempCol += ".";

                  // Eliminate duplicate columns in the index;
                  // They may have been introduced by appending the key to the specified index.
                  if (!tempColList.contains(tempCol))
                    {
                      col = hs_globals->objDef->getColInfo(colPos);
                      col.colnum = colPos;
                      colSet.insert((const struct HSColumnStruct) col);

                      tempColList += tempCol.data();
                      numColsInGroup++;
                      autoGroup += col.colname->data();
                      autoGroup += ",";
                    }
                }

              if (colSet.entries())
                {
                  if (numColsInGroup > 1)
                    {
                      if (LM->LogNeeded())
                        {
                          autoGroup.replace(autoGroup.length()-1,1,")");    // replace comma with close parenthesis
                          sprintf(LM->msg, "\t\tINDEX[%d]\t%s", i, autoGroup.data());
                          LM->Log(LM->msg);
                        }

                      if (retcode = AddColumnSet(colSet))
                        {
                          HSHandleError(retcode);
                        }
                    }
                  numColsInGroup = 0;
                }
              numKeys--;
              }
          }
      }

    return retcode;
  }
コード例 #2
0
Lng32 AddColumnSet(HSColSet &colSet)
  {
    HSGlobalsClass *hs_globals = GetHSContext();
    Lng32 retcode = 0;
    HSColGroupStruct *newGroup  = NULL;
    Lng32 colCount = 0;
    NABoolean badColList = FALSE;
    NAString colNames = "";
    NAString temp;
    HSLogMan *LM = HSLogMan::Instance();
    Int32 numCols = colSet.entries();
    Int32 i;

    if (numCols < 2)          // Must have at least 2 columns in multi-col set.
      {
        if (LM->LogNeeded())
          {
            sprintf(LM->msg, "\t\tIgnoring Column Group with single unique entry (%s)", 
                             colSet[0].colname->data());
            LM->Log(LM->msg);
          }
        return HS_WARNING;
      }

    for (i=0; i<numCols; i++)          // update column numbers, position & NO DUPLICATES
      {
        HSColumnStruct &col = colSet[i];
        temp = " ";
        temp += ToAnsiIdentifier(col.colname->data());
          // Note: ToAnsiIdentifier() determines whether a name needs to be delimited
          // with quotes.  This function works for shift-JIS but may not work for other
          // non-ISO88591 char sets such as Korean, BIG5, GB2312, and GB18030, ...
        temp += ",";

        if (colNames.contains(temp))
          badColList = TRUE;
        else
          {
            col.colnum  = hs_globals->objDef->getColNum((char*)col.colname->data());
            if (col.colnum < 0)
              {
                retcode = -1;
                HSHandleError(retcode);
              }
            col.position = colCount;
            colCount++;
          }
        colNames += temp;
      }
    colNames.remove(0,1);    // remove first blank
    colNames.remove(colNames.length() - 1);    // remove last comma

    if (badColList)          // column list contains repeating columns
      {
        if (LM->LogNeeded())
          {
            sprintf(LM->msg, "\t\tNon-Unique Column Group (%s)", colNames.data());
            LM->Log(LM->msg);
          }
        HSFuncMergeDiags(- UERR_COLUMNLIST_NOT_UNIQUE, colNames.data());
        retcode = -1;
        HSHandleError(retcode);
      }
    else
      {
        if (GroupExists(colSet))
          {
            if (LM->LogNeeded())
              {
                sprintf(LM->msg, "\t\tDuplicate Column Group (%s) has been ignored.", colNames.data());
                LM->Log(LM->msg);
              }
            retcode = HS_WARNING;
          }
        else
          {
            newGroup  = new(STMTHEAP) HSColGroupStruct;
            newGroup->colSet = colSet;
            newGroup->colCount = colCount;
            *newGroup->colNames = colNames.data();

            if (hs_globals->multiGroup == NULL)    // first group entry
              {
                hs_globals->multiGroup = newGroup;
              }
            else                  // append to front of list
              {
                newGroup->next = hs_globals->multiGroup;
                hs_globals->multiGroup->prev = newGroup;
                hs_globals->multiGroup = newGroup;
              }

            hs_globals->groupCount++;
          }
      }

    return retcode;
  }