Exemple #1
0
ExplainTuple *PhysicalFastExtract::addSpecificExplainInfo(ExplainTupleMaster *explainTuple, ComTdb *tdb,
    Generator *generator)
{

  NAString description = "Target_type: ";
  if (getTargetType() == FILE)
  {
    if (isHiveInsert())
      description += "hive table";
    else
      description += "file";
  }
  else if (getTargetType() == SOCKET)
    description += "socket";
  else
    description += "none";
  if (isHiveInsert())
  {

    NAString str = getTargetName();
    size_t colonIndex = str.index(":", 1,0,NAString::ignoreCase);
    while (colonIndex !=  NA_NPOS)
    {
      str = str.replace(colonIndex, 1, "_", 1);
      colonIndex = str.index(":", 1,0,NAString::ignoreCase);
    }

    description += " location: ";
    description += str;
  }


  if (isHiveInsert())
  {
    description += " table_name: ";
    description += getHiveTableName();
  }
  else
  {
    description += " target_name: ";
    description += getTargetName();
  }
  description += " delimiter: ";
  description += getDelimiter();

  if (isAppend())
    description += " append: yes";
  if ( !isHiveInsert() && includeHeader())
  {
    description += " header: ";
    description += getHeader();
  }
  if (getCompressionType() != NONE)
  {
    description += " compression_type: ";
    if (getCompressionType() == LZO)
      description += "LZO";
    else
      description += "error";
  }

  description += " null_string: ";
  description += getNullString();

  description += " record_separator: ";
  description += getRecordSeparator();


  explainTuple->setDescription(description);
  if (isHiveInsert())
    explainTuple->setTableName(getHiveTableName());

  return explainTuple;
}
Lng32 AddKeyGroups()
  {
  HSGlobalsClass *hs_globals = GetHSContext();
    if (HSGlobalsClass::isHiveCat(hs_globals->objDef->getCatName()))
      {
        // HSHiveTableDef::getKeyList()/getIndexArray() not yet implemented.
        *CmpCommon::diags() << DgSqlCode(-UERR_NO_ONEVERYKEY) << DgString0("hive");
        return -1;
      }

    Lng32 retcode = 0;
    Lng32 numColsInGroup = 0;
    HSColumnStruct col;
    NAString tempColList = "";
    NAString tempCol;
    NAString autoGroup;
    ULng32 numKeys;
    ULng32 i, j;
    NATable* naTbl = hs_globals->objDef->getNATable();
    HSLogMan *LM = HSLogMan::Instance();

    // ----------------------------------------------------------
    // Generate histograms for KEY
    // ----------------------------------------------------------
    // The clustering index is included in the list of indices returned by
    // NATable::getIndexList(), so we store its pointer so we can skip it
    // when the other indexes are processed below.
    NAFileSet* clusteringIndex = naTbl->getClusteringIndex();
    const NAColumnArray& keyCols = clusteringIndex->getIndexKeyColumns();
    Lng32 colPos;
    numKeys = keyCols.entries();

    if (numKeys == 1)     // SINGLE-COLUMN KEY
      {
        colPos = keyCols[0]->getPosition();
        if (LM->LogNeeded())
          {
            sprintf(LM->msg, "\t\tKEY:\t\t(%s)", hs_globals->objDef->getColName(colPos));
            LM->Log(LM->msg);
          }

        if (ColumnExists(colPos)) // avoid duplicates
          {
            LM->Log("\t\t** duplicate column group has been ignored.");
          }
        else                                 // add to single-column group list
          {
            retcode = AddSingleColumn(colPos);
          }
      }
    else if (numKeys > 1) // MULTI-COLUMN KEY
      {  
        // Create multiple MC group(s) if numkeys > 1.  Subset MC groups will
        // also be created if numkeys > 2,  E.g. If numkeys = 5, then
        // MC groups with 5, 4, 3, and 2 columns will be created using
        // the key columns.  Note that if numkeys is larger than CQD 
        // USTAT_NUM_MC_GROUPS_FOR_KEYS (default = 5), then the number
        // of groups created will be limited by this value.  So, e.g. if
        // numkeys = 10, then MC groups with 5, 4, 3, and 2 columns will
        // be created (that is, 5 groups will be created - incl the single).

        ULng32 minMCGroupSz = 2;
        ULng32 maxMCGroups  = (ULng32)
          CmpCommon::getDefaultNumeric(USTAT_NUM_MC_GROUPS_FOR_KEYS);

        // Generate no MCs with more cols than specified by the cqd.
        if (numKeys > maxMCGroups)
          numKeys = maxMCGroups;

        // For salted table, generate only the longest MC for the key (subject
        // to max cols determined above) unless a cqd is set to gen all MCs of
        // allowable sizes.
        if (CmpCommon::getDefault(USTAT_ADD_SALTED_KEY_PREFIXES_FOR_MC) == DF_OFF &&
            hs_globals->objDef->getColNum("_SALT_", FALSE) >= 0)
          minMCGroupSz = numKeys;

        while (numKeys >= minMCGroupSz)  // Create only MC groups not single cols
          {
            HSColSet colSet;

            autoGroup = "(";
            for (j = 0; j < numKeys; j++)
              {
                colPos = keyCols[j]->getPosition();
                col = hs_globals->objDef->getColInfo(colPos);
                col.colnum = colPos;
                colSet.insert(col);
                autoGroup += col.colname->data();
                autoGroup += ",";
              }

            if (LM->LogNeeded())
              {
                autoGroup.replace(autoGroup.length()-1,1,")");    // replace comma with close parenthesis
                sprintf(LM->msg, "\t\tKEY:\t\t%s", autoGroup.data());
                LM->Log(LM->msg);
              }

            if (retcode = AddColumnSet(colSet))
              {
                HSHandleError(retcode);
              }
            numKeys--;
          }
      }
  
    // ----------------------------------------------------------
    // Generate histograms for all INDEXES
    // ----------------------------------------------------------
    const NAFileSetList& indexes = naTbl->getIndexList();
    NAFileSet* index;
    for (i = 0; i < indexes.entries(); i++ )
      {
        index = indexes[i];
        if (index == clusteringIndex)
          continue;  // clustering index processed above already
        const NAColumnArray& keyCols = index->getIndexKeyColumns();
        numKeys = keyCols.entries();
        if (numKeys == 1)                            // SINGLE-COLUMN INDEX
          {
            colPos = keyCols[0]->getPosition();
            if (LM->LogNeeded())
              {
                sprintf(LM->msg, "\t\tINDEX[%d]\t(%s)", i, 
                        hs_globals->objDef->getColName(colPos));
                LM->Log(LM->msg);
              }
            if (ColumnExists(colPos)) // avoid duplicates
              {
                LM->Log("\t\t*** duplicate column group has been ignored.");
              }
            else                                 // add to single-column group list
              {
                retcode = AddSingleColumn(colPos);
              }
          }
        else // MULTI-COLUMN INDEX
          {  
            // Create multiple MC group(s) if numkeys > 1.  Subset MC groups will
            // also be created if numkeys > 2,  E.g. If numkeys = 5, then
            // MC groups with 5, 4, 3, and 2 columns will be created using
            // the key columns.  Note that if numkeys is larger than CQD 
            // USTAT_NUM_MC_GROUPS_FOR_KEYS (default = 5), then the number
            // of groups created will be limited by this value.  So, e.g. if
            // numkeys = 10, then MC groups with 10, 9, 8, 7, 6 columns will
            // be created (that is, 5 groups will be created).

            ULng32 minMCGroupSz = 2;
            ULng32 maxMCGroups  = (ULng32)
              CmpCommon::getDefaultNumeric(USTAT_NUM_MC_GROUPS_FOR_KEYS);
            if (numKeys > maxMCGroups) 
              minMCGroupSz = numKeys - maxMCGroups + 1;
            while (numKeys >= minMCGroupSz)  // MinMCGroupSz is greater than 1.
              {
              HSColSet colSet;

              tempColList = "";
              autoGroup = "(";
              for (j = 0; j < numKeys; j++)
                {
                  colPos = keyCols[j]->getPosition();
                  tempCol = ".";
                  tempCol += LongToNAString(colPos);
                  tempCol += ".";

                  // Eliminate duplicate columns in the index;
                  // They may have been introduced by appending the key to the specified index.
                  if (!tempColList.contains(tempCol))
                    {
                      col = hs_globals->objDef->getColInfo(colPos);
                      col.colnum = colPos;
                      colSet.insert((const struct HSColumnStruct) col);

                      tempColList += tempCol.data();
                      numColsInGroup++;
                      autoGroup += col.colname->data();
                      autoGroup += ",";
                    }
                }

              if (colSet.entries())
                {
                  if (numColsInGroup > 1)
                    {
                      if (LM->LogNeeded())
                        {
                          autoGroup.replace(autoGroup.length()-1,1,")");    // replace comma with close parenthesis
                          sprintf(LM->msg, "\t\tINDEX[%d]\t%s", i, autoGroup.data());
                          LM->Log(LM->msg);
                        }

                      if (retcode = AddColumnSet(colSet))
                        {
                          HSHandleError(retcode);
                        }
                    }
                  numColsInGroup = 0;
                }
              numKeys--;
              }
          }
      }

    return retcode;
  }