//----------------------------------------------------------------------------
// Check if the query for this MAV is without a GROUP BY clause.
// The RelExpr tree for a query with no GROUP BY clause, does not have
// a GroupBy node in it. 
void MavRelRootBuilder::checkForMavWithoutGroupBy(RelExpr *mvSelectTree) 
{
  // Start from the top of the RelExpe tree.
  RelExpr *node = mvSelectTree;
  // Look for a GroupBy node.
  while (node->child(0) != NULL &&
         node->getOperatorType() != REL_GROUPBY)
    node = node->child(0);

  if (node->getOperatorType() != REL_GROUPBY)
    isMavWithoutGroupBy_ = TRUE;
  else
  {
    // Check if the child of our child is another GroupBy node.
    // This can only be the result of multi-delta optimization on a 
    // no-groupby MAV.
    if ( (node->child(0)->getOperatorType() == REL_ROOT)            &&
         (node->child(0)->child(0)->getOperatorType() == REL_GROUPBY) )
    {
      // Found another GroupBy node below.
      // Now verify that the top one has no grouping columns.
      GroupByAgg *groupByNode = (GroupByAgg *)node;
      if (groupByNode->getGroupExprTree() == NULL)
	isMavWithoutGroupBy_ = TRUE;
    }
  }
}  // MavRelRootBuilder::checkForMavWithoutGroupBy()
Exemple #2
0
static short ft_codegen(Generator *generator,
                        RelExpr &relExpr,
                        ComTdbFastExtract *&newTdb,
                        Cardinality estimatedRowCount,
                        char * targetName,
                        char * hdfsHost,
                        Int32 hdfsPort,
                        char * hiveTableName,
                        char * delimiter,
                        char * header,
                        char * nullString,
                        char * recordSeparator,
                        ULng32 downQueueMaxSize,
                        ULng32 upQueueMaxSize,
                        ULng32 outputBufferSize,
                        ULng32 requestBufferSize,
                        ULng32 replyBufferSize,
                        ULng32 numOutputBuffers,
                        ComTdb * childTdb,
                        NABoolean isSequenceFile)
{
  CmpContext *cmpContext = generator->currentCmpContext();
  Space *space = generator->getSpace();
  ExpGenerator *exp_gen = generator->getExpGenerator();
  MapTable *map_table = generator->getMapTable();
  MapTable *last_map_table = generator->getLastMapTable();
  ex_expr *input_expr = NULL;
  ex_expr *output_expr = NULL;
  ex_expr * childData_expr = NULL ;
  ex_expr * cnvChildData_expr = NULL ;
  ULng32 i;
  ULng32 requestRowLen = 0;
  ULng32 outputRowLen = 0;
  ULng32 childDataRowLen = 0;
  ULng32 cnvChildDataRowLen = 0;
  ExpTupleDesc *requestTupleDesc = NULL;

  ExpTupleDesc *outputTupleDesc = NULL;
  ExpTupleDesc *childDataTupleDesc = NULL;
  ExpTupleDesc *cnvChildDataTupleDesc = NULL;
  newTdb = NULL;

  OperatorTypeEnum relExprType = relExpr.getOperatorType();
  GenAssert(relExprType == REL_FAST_EXTRACT, "Unexpected RelExpr at FastExtract codegen")
  FastExtract * fastExtract = (FastExtract *) &relExpr;

  const Int32 workAtpNumber = 1;
  ex_cri_desc *given_desc = generator->getCriDesc(Generator::DOWN);
  ex_cri_desc *returned_desc = NULL;
  ex_cri_desc *work_cri_desc = NULL;

  returned_desc = given_desc;

  // Setup local variables related to the work ATP
  unsigned short numWorkTupps = 0;
  unsigned short childDataTuppIndex = 0;
  unsigned short cnvChildDataTuppIndex = 0;

  numWorkTupps = 3;
  childDataTuppIndex = numWorkTupps - 1 ;
  numWorkTupps ++;
  cnvChildDataTuppIndex = numWorkTupps - 1;
  work_cri_desc = new (space) ex_cri_desc(numWorkTupps, space);

  ExpTupleDesc::TupleDataFormat childReqFormat = ExpTupleDesc::SQLMX_ALIGNED_FORMAT;

  ValueIdList childDataVids;
  ValueIdList cnvChildDataVids;
  const ValueIdList& childVals = fastExtract->getSelectList();

  for (i = 0; i < childVals.entries(); i++)
  {
    ItemExpr &inputExpr = *(childVals[i].getItemExpr());
    const NAType &formalType = childVals[i].getType();
    ItemExpr *lmExpr = NULL;
    ItemExpr *lmExpr2 = NULL;
    int res;

    lmExpr = &inputExpr; //CreateCastExpr(inputExpr, *inputExpr.getValueId().getType().newCopy(), cmpContext);

    res = CreateAllCharsExpr(formalType, // [IN] Child output type
        *lmExpr, // [IN] Actual input value
        cmpContext, // [IN] Compilation context
        lmExpr2 // [OUT] Returned expression
        );

    GenAssert(res == 0 && lmExpr != NULL,
        "Error building expression tree for LM child Input value");

    lmExpr->bindNode(generator->getBindWA());
    childDataVids.insert(lmExpr->getValueId());
    if (lmExpr2)
    {
      lmExpr2->bindNode(generator->getBindWA());
      cnvChildDataVids.insert(lmExpr2->getValueId());
    }


  } // for (i = 0; i < childVals.entries(); i++)

  if (childDataVids.entries() > 0 &&
    cnvChildDataVids.entries()>0)  //-- convertedChildDataVids
  {
    exp_gen->generateContiguousMoveExpr (
      childDataVids,                         //childDataVids// [IN] source ValueIds
      TRUE,                                 // [IN] add convert nodes?
      workAtpNumber,                        // [IN] target atp number (0 or 1)
      childDataTuppIndex,                   // [IN] target tupp index
      childReqFormat,                       // [IN] target tuple data format
      childDataRowLen,                      // [OUT] target tuple length
      &childData_expr,                  // [OUT] move expression
      &childDataTupleDesc,                  // [optional OUT] target tuple desc
      ExpTupleDesc::LONG_FORMAT             // [optional IN] target desc format
      );

    exp_gen->processValIdList (
       cnvChildDataVids,                              // [IN] ValueIdList
       ExpTupleDesc::SQLARK_EXPLODED_FORMAT,  // [IN] tuple data format
       cnvChildDataRowLen,                          // [OUT] tuple length
       workAtpNumber,                                     // [IN] atp number
       cnvChildDataTuppIndex,         // [IN] index into atp
       &cnvChildDataTupleDesc,                      // [optional OUT] tuple desc
       ExpTupleDesc::LONG_FORMAT              // [optional IN] tuple desc format
       );
  }
  //
  // Add the tuple descriptor for request values to the work ATP
  //
  work_cri_desc->setTupleDescriptor(childDataTuppIndex, childDataTupleDesc);
  work_cri_desc->setTupleDescriptor(cnvChildDataTuppIndex, cnvChildDataTupleDesc);

  // We can now remove all appended map tables
  generator->removeAll(last_map_table);



  ComSInt32 maxrs = 0;
  UInt32 flags = 0;
  UInt16 numIoBuffers = (UInt16)(ActiveSchemaDB()->getDefaults()).getAsLong(FAST_EXTRACT_IO_BUFFERS);
  UInt16 ioTimeout = (UInt16)(ActiveSchemaDB()->getDefaults()).getAsLong(FAST_EXTRACT_IO_TIMEOUT_SEC);

  Int64 hdfsBufSize = (Int64)CmpCommon::getDefaultNumeric(HDFS_IO_BUFFERSIZE);
  hdfsBufSize = hdfsBufSize * 1024; // convert to bytes
  Int16 replication =  (Int16)CmpCommon::getDefaultNumeric(HDFS_REPLICATION);


  // Create a TDB
  ComTdbFastExtract *tdb = new (space) ComTdbFastExtract (
    flags,
    estimatedRowCount,
    targetName,
    hdfsHost,
    hdfsPort,
    hiveTableName,
    delimiter,
    header,
    nullString,
    recordSeparator,
    given_desc,
    returned_desc,
    work_cri_desc,
    downQueueMaxSize,
    upQueueMaxSize,
    (Lng32) numOutputBuffers,
    outputBufferSize,
    numIoBuffers,
    ioTimeout,
    input_expr,
    output_expr,
    requestRowLen,
    outputRowLen,
    childData_expr,
    childTdb,
    space,
    childDataTuppIndex,
    cnvChildDataTuppIndex,
    childDataRowLen,
    hdfsBufSize,
    replication
    );

  tdb->setSequenceFile(isSequenceFile);
  tdb->setHdfsCompressed(CmpCommon::getDefaultNumeric(TRAF_UNLOAD_HDFS_COMPRESS)!=0);

  tdb->setSkipWritingToFiles(CmpCommon::getDefault(TRAF_UNLOAD_SKIP_WRITING_TO_FILES) == DF_ON);
  tdb->setBypassLibhdfs(CmpCommon::getDefault(TRAF_UNLOAD_BYPASS_LIBHDFS) == DF_ON);
  generator->initTdbFields(tdb);

  // Generate EXPLAIN info.
  if (!generator->explainDisabled())
  {
    generator->setExplainTuple(relExpr.addExplainInfo(tdb, 0, 0, generator));
  }

  // Tell the generator about our in/out rows and the new TDB
  generator->setCriDesc(given_desc, Generator::DOWN);
  generator->setCriDesc(returned_desc, Generator::UP);
  generator->setGenObj(&relExpr, tdb);


  // Return a TDB pointer to the caller
  newTdb = tdb;

  return 0;

} // ft_codegen()
static short ft_codegen(Generator *generator,
                        RelExpr &relExpr,
                        ComTdbFastExtract *&newTdb,
                        Cardinality estimatedRowCount,
                        char * targetName,
                        char * hdfsHost,
                        Int32 hdfsPort,
                        char * hiveTableName,
                        char * delimiter,
                        char * header,
                        char * nullString,
                        char * recordSeparator,
                        ULng32 downQueueMaxSize,
                        ULng32 upQueueMaxSize,
                        ULng32 outputBufferSize,
                        ULng32 requestBufferSize,
                        ULng32 replyBufferSize,
                        ULng32 numOutputBuffers,
                        ComTdb * childTdb,
                        NABoolean isSequenceFile)
{
  CmpContext *cmpContext = generator->currentCmpContext();
  Space *space = generator->getSpace();
  ExpGenerator *exp_gen = generator->getExpGenerator();
  MapTable *map_table = generator->getMapTable();
  MapTable *last_map_table = generator->getLastMapTable();
  ex_expr *input_expr = NULL;
  ex_expr *output_expr = NULL;
  ex_expr * childData_expr = NULL ;
  ex_expr * cnvChildData_expr = NULL ;
  ULng32 i;
  ULng32 requestRowLen = 0;
  ULng32 outputRowLen = 0;
  ULng32 childDataRowLen = 0;
  ULng32 cnvChildDataRowLen = 0;
  ExpTupleDesc *requestTupleDesc = NULL;

  ExpTupleDesc *outputTupleDesc = NULL;
  ExpTupleDesc *childDataTupleDesc = NULL;
  ExpTupleDesc *cnvChildDataTupleDesc = NULL;
  newTdb = NULL;

  OperatorTypeEnum relExprType = relExpr.getOperatorType();
  GenAssert(relExprType == REL_FAST_EXTRACT, "Unexpected RelExpr at FastExtract codegen")
  FastExtract * fastExtract = (FastExtract *) &relExpr;

  const Int32 workAtpNumber = 1;
  ex_cri_desc *given_desc = generator->getCriDesc(Generator::DOWN);
  ex_cri_desc *returned_desc = NULL;
  ex_cri_desc *work_cri_desc = NULL;

  returned_desc = given_desc;

  // Setup local variables related to the work ATP
  unsigned short numWorkTupps = 0;
  unsigned short childDataTuppIndex = 0;
  unsigned short cnvChildDataTuppIndex = 0;

  numWorkTupps = 3;
  childDataTuppIndex = numWorkTupps - 1 ;
  numWorkTupps ++;
  cnvChildDataTuppIndex = numWorkTupps - 1;
  work_cri_desc = new (space) ex_cri_desc(numWorkTupps, space);

  ExpTupleDesc::TupleDataFormat childReqFormat = ExpTupleDesc::SQLMX_ALIGNED_FORMAT;

  ValueIdList childDataVids;
  ValueIdList cnvChildDataVids;
  const ValueIdList& childVals = fastExtract->getSelectList();

  const NATable *hiveNATable = NULL;
  const NAColumnArray *hiveNAColArray = NULL;

  // hiveInsertErrMode: 
  //    if 0, do not do error checks.
  //    if 1, do error check and return error.
  //    if 2, do error check and ignore row, if error
  //    if 3, insert null if an error occurs
  Lng32 hiveInsertErrMode = 0;
  if ((fastExtract) && (fastExtract->isHiveInsert()) &&
      (fastExtract->getHiveTableDesc()) &&
      (fastExtract->getHiveTableDesc()->getNATable()) &&
      ((hiveInsertErrMode = CmpCommon::getDefaultNumeric(HIVE_INSERT_ERROR_MODE)) > 0))
    {
      hiveNATable = fastExtract->getHiveTableDesc()->getNATable();
      hiveNAColArray = &hiveNATable->getNAColumnArray();
    }

  for (i = 0; i < childVals.entries(); i++)
  {
    ItemExpr &inputExpr = *(childVals[i].getItemExpr());
    const NAType &formalType = childVals[i].getType();
    ItemExpr *lmExpr = NULL;
    ItemExpr *lmExpr2 = NULL;
    int res;

    lmExpr = &inputExpr; 
    lmExpr = lmExpr->bindNode(generator->getBindWA());
    if (!lmExpr || generator->getBindWA()->errStatus())
      {
        GenAssert(0, "lmExpr->bindNode failed");
      }

    // Hive insert converts child data into string format and inserts
    // it into target table.
    // If child type can into an error during conversion, then
    // add a Cast to convert from child type to target type before
    // converting to string format to be inserted.
    if (hiveNAColArray)
      {
        const NAColumn *hiveNACol = (*hiveNAColArray)[i];
        const NAType *hiveNAType = hiveNACol->getType();
        // if tgt type was a hive 'string', do not return a conversion err
        if ((lmExpr->getValueId().getType().errorsCanOccur(*hiveNAType)) &&
            (NOT ((DFS2REC::isSQLVarChar(hiveNAType->getFSDatatype())) &&
                  (((SQLVarChar*)hiveNAType)->wasHiveString()))))
          {
            ItemExpr *newExpr = 
              new(generator->wHeap()) Cast(lmExpr, hiveNAType);
            newExpr = newExpr->bindNode(generator->getBindWA());
            if (!newExpr || generator->getBindWA()->errStatus())
              {
                GenAssert(0, "newExpr->bindNode failed");
              }
            
            if (hiveInsertErrMode == 3)
              ((Cast*)newExpr)->setConvertNullWhenError(TRUE);
            
            lmExpr = newExpr;
          }
      }

    res = CreateAllCharsExpr(formalType, // [IN] Child output type
        *lmExpr, // [IN] Actual input value
        cmpContext, // [IN] Compilation context
        lmExpr2 // [OUT] Returned expression
        );

    GenAssert(res == 0 && lmExpr != NULL,
        "Error building expression tree for LM child Input value");

    childDataVids.insert(lmExpr->getValueId());
    if (lmExpr2)
    {
      lmExpr2->bindNode(generator->getBindWA());
      cnvChildDataVids.insert(lmExpr2->getValueId());
    }


  } // for (i = 0; i < childVals.entries(); i++)

  if (childDataVids.entries() > 0 &&
    cnvChildDataVids.entries()>0)  //-- convertedChildDataVids
  {
    UInt16 pcm = exp_gen->getPCodeMode();
    if ((hiveNAColArray) &&
        (hiveInsertErrMode == 3))
      {
        // if error mode is 3 (mode null when error), disable pcode.
        // this feature is currently not being handled by pcode.
        // (added as part of JIRA 1920 in FileScan::codeGenForHive).
        exp_gen->setPCodeMode(ex_expr::PCODE_NONE);
      }

    exp_gen->generateContiguousMoveExpr (
      childDataVids,                         //childDataVids// [IN] source ValueIds
      TRUE,                                 // [IN] add convert nodes?
      workAtpNumber,                        // [IN] target atp number (0 or 1)
      childDataTuppIndex,                   // [IN] target tupp index
      childReqFormat,                       // [IN] target tuple data format
      childDataRowLen,                      // [OUT] target tuple length
      &childData_expr,                  // [OUT] move expression
      &childDataTupleDesc,                  // [optional OUT] target tuple desc
      ExpTupleDesc::LONG_FORMAT             // [optional IN] target desc format
      );

    exp_gen->setPCodeMode(pcm);

    exp_gen->processValIdList (
       cnvChildDataVids,                              // [IN] ValueIdList
       ExpTupleDesc::SQLARK_EXPLODED_FORMAT,  // [IN] tuple data format
       cnvChildDataRowLen,                          // [OUT] tuple length
       workAtpNumber,                                     // [IN] atp number
       cnvChildDataTuppIndex,         // [IN] index into atp
       &cnvChildDataTupleDesc,                      // [optional OUT] tuple desc
       ExpTupleDesc::LONG_FORMAT              // [optional IN] tuple desc format
       );
  }
  //
  // Add the tuple descriptor for request values to the work ATP
  //
  work_cri_desc->setTupleDescriptor(childDataTuppIndex, childDataTupleDesc);
  work_cri_desc->setTupleDescriptor(cnvChildDataTuppIndex, cnvChildDataTupleDesc);

  // We can now remove all appended map tables
  generator->removeAll(last_map_table);



  ComSInt32 maxrs = 0;
  UInt32 flags = 0;
  UInt16 numIoBuffers = (UInt16)(ActiveSchemaDB()->getDefaults()).getAsLong(FAST_EXTRACT_IO_BUFFERS);
  UInt16 ioTimeout = (UInt16)(ActiveSchemaDB()->getDefaults()).getAsLong(FAST_EXTRACT_IO_TIMEOUT_SEC);

  Int64 hdfsBufSize = (Int64)CmpCommon::getDefaultNumeric(HDFS_IO_BUFFERSIZE);
  hdfsBufSize = hdfsBufSize * 1024; // convert to bytes
  Int16 replication =  (Int16)CmpCommon::getDefaultNumeric(HDFS_REPLICATION);


  // Create a TDB
  ComTdbFastExtract *tdb = new (space) ComTdbFastExtract (
    flags,
    estimatedRowCount,
    targetName,
    hdfsHost,
    hdfsPort,
    hiveTableName,
    delimiter,
    header,
    nullString,
    recordSeparator,
    given_desc,
    returned_desc,
    work_cri_desc,
    downQueueMaxSize,
    upQueueMaxSize,
    (Lng32) numOutputBuffers,
    outputBufferSize,
    numIoBuffers,
    ioTimeout,
    input_expr,
    output_expr,
    requestRowLen,
    outputRowLen,
    childData_expr,
    childTdb,
    space,
    childDataTuppIndex,
    cnvChildDataTuppIndex,
    childDataRowLen,
    hdfsBufSize,
    replication
    );

  tdb->setSequenceFile(isSequenceFile);
  tdb->setHdfsCompressed(CmpCommon::getDefaultNumeric(TRAF_UNLOAD_HDFS_COMPRESS)!=0);

  tdb->setSkipWritingToFiles(CmpCommon::getDefault(TRAF_UNLOAD_SKIP_WRITING_TO_FILES) == DF_ON);
  tdb->setBypassLibhdfs(CmpCommon::getDefault(TRAF_UNLOAD_BYPASS_LIBHDFS) == DF_ON);

  if ((hiveNAColArray) &&
      (hiveInsertErrMode == 2))
    {
      tdb->setContinueOnError(TRUE);
    }

  generator->initTdbFields(tdb);

  // Generate EXPLAIN info.
  if (!generator->explainDisabled())
  {
    generator->setExplainTuple(relExpr.addExplainInfo(tdb, 0, 0, generator));
  }

  // Tell the generator about our in/out rows and the new TDB
  generator->setCriDesc(given_desc, Generator::DOWN);
  generator->setCriDesc(returned_desc, Generator::UP);
  generator->setGenObj(&relExpr, tdb);


  // Return a TDB pointer to the caller
  newTdb = tdb;

  return 0;

} // ft_codegen()
// AppliedStatMan::setupASMCacheForJBB method will be called from
// Query::Analyze after connectivity analysis has been done and
// empty logical properties have been set.
void AppliedStatMan::setupASMCacheForJBB(JBB & jbb)
{
  EstLogPropSharedPtr myEstLogProp;

  // get all JBBCs of JBB
  const CANodeIdSet jbbcNodeIdSet = jbb.getMainJBBSubset().getJBBCs();
  CANodeId jbbcId;

  // for all jbbcs
  for (jbbcId = jbbcNodeIdSet.init();
	  jbbcNodeIdSet.next(jbbcId);
	  jbbcNodeIdSet.advance(jbbcId))
  {
    if (NodeAnalysis * jbbcNode = jbbcId.getNodeAnalysis())
    {
      // Evaluate local predicates only if it is a table.

      RelExpr * jbbcExpr = jbbcNode->getOriginalExpr();

      if ((jbbcNode->getTableAnalysis() != NULL) &&
	        (jbbcExpr->getOperatorType() == REL_SCAN))
      {
        // get the original expression of the jbbc
        Scan * scanExpr = (Scan *) jbbcExpr;

        ValueIdSet localPreds = scanExpr->getSelectionPredicates();

        // if local predicates have already been computed, then skip
        if ((localPreds.entries() > 0) || !(lookup(jbbcId)))
        {
          // check to see this GA has already been associated with
          // a logExpr for synthesis.  If not, then synthesize
	        // log. expression, and then apply local predicates to it

          if (NOT scanExpr->getGroupAttr()->existsLogExprForSynthesis())
	          scanExpr->synthLogProp();

	        myEstLogProp = getStatsForCANodeId(jbbcId);
	      }
      }
    }
  }

  // Now do a second traversal of the JBB looking for join reducers
  for (jbbcId = jbbcNodeIdSet.init();
		jbbcNodeIdSet.next(jbbcId);
		jbbcNodeIdSet.advance(jbbcId))
  {
    // now look for all two way joins for this child
    if (jbbcId.getNodeAnalysis())
    {

      // get all JBBCs connected to this JBBC, and do a two-way
      // join with all of them

      CANodeIdSet connectedNodes = jbbcId.getNodeAnalysis()->\
				  getJBBC()->getJoinedJBBCs();

      for (CANodeId connectedTable = connectedNodes.init();
			      connectedNodes.next(connectedTable);
			      connectedNodes.advance(connectedTable))
      {
	      if (connectedTable.getNodeAnalysis())
	      {

	        // ASM does not concern itself with the order of the tables,
	        // hence it is possible that the join has already been computed

	        CANodeIdSet tableSet = jbbcId;
	        tableSet.insert(connectedTable);

	        if ((myEstLogProp = getCachedStatistics(&tableSet)) == NULL)
	        {
	          CANodeIdSet setForjbbcId(jbbcId);
	          CANodeIdSet setForConnectedTable(connectedTable);
	          myEstLogProp = joinJBBChildren(setForjbbcId, setForConnectedTable);
	        }
	      }
      }
    }
  }
} // AppliedStatMan::setupASMCacheForJBB