Exemple #1
0
static short ft_codegen(Generator *generator,
                        RelExpr &relExpr,
                        ComTdbFastExtract *&newTdb,
                        Cardinality estimatedRowCount,
                        char * targetName,
                        char * hdfsHost,
                        Int32 hdfsPort,
                        char * hiveTableName,
                        char * delimiter,
                        char * header,
                        char * nullString,
                        char * recordSeparator,
                        ULng32 downQueueMaxSize,
                        ULng32 upQueueMaxSize,
                        ULng32 outputBufferSize,
                        ULng32 requestBufferSize,
                        ULng32 replyBufferSize,
                        ULng32 numOutputBuffers,
                        ComTdb * childTdb,
                        NABoolean isSequenceFile)
{
  CmpContext *cmpContext = generator->currentCmpContext();
  Space *space = generator->getSpace();
  ExpGenerator *exp_gen = generator->getExpGenerator();
  MapTable *map_table = generator->getMapTable();
  MapTable *last_map_table = generator->getLastMapTable();
  ex_expr *input_expr = NULL;
  ex_expr *output_expr = NULL;
  ex_expr * childData_expr = NULL ;
  ex_expr * cnvChildData_expr = NULL ;
  ULng32 i;
  ULng32 requestRowLen = 0;
  ULng32 outputRowLen = 0;
  ULng32 childDataRowLen = 0;
  ULng32 cnvChildDataRowLen = 0;
  ExpTupleDesc *requestTupleDesc = NULL;

  ExpTupleDesc *outputTupleDesc = NULL;
  ExpTupleDesc *childDataTupleDesc = NULL;
  ExpTupleDesc *cnvChildDataTupleDesc = NULL;
  newTdb = NULL;

  OperatorTypeEnum relExprType = relExpr.getOperatorType();
  GenAssert(relExprType == REL_FAST_EXTRACT, "Unexpected RelExpr at FastExtract codegen")
  FastExtract * fastExtract = (FastExtract *) &relExpr;

  const Int32 workAtpNumber = 1;
  ex_cri_desc *given_desc = generator->getCriDesc(Generator::DOWN);
  ex_cri_desc *returned_desc = NULL;
  ex_cri_desc *work_cri_desc = NULL;

  returned_desc = given_desc;

  // Setup local variables related to the work ATP
  unsigned short numWorkTupps = 0;
  unsigned short childDataTuppIndex = 0;
  unsigned short cnvChildDataTuppIndex = 0;

  numWorkTupps = 3;
  childDataTuppIndex = numWorkTupps - 1 ;
  numWorkTupps ++;
  cnvChildDataTuppIndex = numWorkTupps - 1;
  work_cri_desc = new (space) ex_cri_desc(numWorkTupps, space);

  ExpTupleDesc::TupleDataFormat childReqFormat = ExpTupleDesc::SQLMX_ALIGNED_FORMAT;

  ValueIdList childDataVids;
  ValueIdList cnvChildDataVids;
  const ValueIdList& childVals = fastExtract->getSelectList();

  for (i = 0; i < childVals.entries(); i++)
  {
    ItemExpr &inputExpr = *(childVals[i].getItemExpr());
    const NAType &formalType = childVals[i].getType();
    ItemExpr *lmExpr = NULL;
    ItemExpr *lmExpr2 = NULL;
    int res;

    lmExpr = &inputExpr; //CreateCastExpr(inputExpr, *inputExpr.getValueId().getType().newCopy(), cmpContext);

    res = CreateAllCharsExpr(formalType, // [IN] Child output type
        *lmExpr, // [IN] Actual input value
        cmpContext, // [IN] Compilation context
        lmExpr2 // [OUT] Returned expression
        );

    GenAssert(res == 0 && lmExpr != NULL,
        "Error building expression tree for LM child Input value");

    lmExpr->bindNode(generator->getBindWA());
    childDataVids.insert(lmExpr->getValueId());
    if (lmExpr2)
    {
      lmExpr2->bindNode(generator->getBindWA());
      cnvChildDataVids.insert(lmExpr2->getValueId());
    }


  } // for (i = 0; i < childVals.entries(); i++)

  if (childDataVids.entries() > 0 &&
    cnvChildDataVids.entries()>0)  //-- convertedChildDataVids
  {
    exp_gen->generateContiguousMoveExpr (
      childDataVids,                         //childDataVids// [IN] source ValueIds
      TRUE,                                 // [IN] add convert nodes?
      workAtpNumber,                        // [IN] target atp number (0 or 1)
      childDataTuppIndex,                   // [IN] target tupp index
      childReqFormat,                       // [IN] target tuple data format
      childDataRowLen,                      // [OUT] target tuple length
      &childData_expr,                  // [OUT] move expression
      &childDataTupleDesc,                  // [optional OUT] target tuple desc
      ExpTupleDesc::LONG_FORMAT             // [optional IN] target desc format
      );

    exp_gen->processValIdList (
       cnvChildDataVids,                              // [IN] ValueIdList
       ExpTupleDesc::SQLARK_EXPLODED_FORMAT,  // [IN] tuple data format
       cnvChildDataRowLen,                          // [OUT] tuple length
       workAtpNumber,                                     // [IN] atp number
       cnvChildDataTuppIndex,         // [IN] index into atp
       &cnvChildDataTupleDesc,                      // [optional OUT] tuple desc
       ExpTupleDesc::LONG_FORMAT              // [optional IN] tuple desc format
       );
  }
  //
  // Add the tuple descriptor for request values to the work ATP
  //
  work_cri_desc->setTupleDescriptor(childDataTuppIndex, childDataTupleDesc);
  work_cri_desc->setTupleDescriptor(cnvChildDataTuppIndex, cnvChildDataTupleDesc);

  // We can now remove all appended map tables
  generator->removeAll(last_map_table);



  ComSInt32 maxrs = 0;
  UInt32 flags = 0;
  UInt16 numIoBuffers = (UInt16)(ActiveSchemaDB()->getDefaults()).getAsLong(FAST_EXTRACT_IO_BUFFERS);
  UInt16 ioTimeout = (UInt16)(ActiveSchemaDB()->getDefaults()).getAsLong(FAST_EXTRACT_IO_TIMEOUT_SEC);

  Int64 hdfsBufSize = (Int64)CmpCommon::getDefaultNumeric(HDFS_IO_BUFFERSIZE);
  hdfsBufSize = hdfsBufSize * 1024; // convert to bytes
  Int16 replication =  (Int16)CmpCommon::getDefaultNumeric(HDFS_REPLICATION);


  // Create a TDB
  ComTdbFastExtract *tdb = new (space) ComTdbFastExtract (
    flags,
    estimatedRowCount,
    targetName,
    hdfsHost,
    hdfsPort,
    hiveTableName,
    delimiter,
    header,
    nullString,
    recordSeparator,
    given_desc,
    returned_desc,
    work_cri_desc,
    downQueueMaxSize,
    upQueueMaxSize,
    (Lng32) numOutputBuffers,
    outputBufferSize,
    numIoBuffers,
    ioTimeout,
    input_expr,
    output_expr,
    requestRowLen,
    outputRowLen,
    childData_expr,
    childTdb,
    space,
    childDataTuppIndex,
    cnvChildDataTuppIndex,
    childDataRowLen,
    hdfsBufSize,
    replication
    );

  tdb->setSequenceFile(isSequenceFile);
  tdb->setHdfsCompressed(CmpCommon::getDefaultNumeric(TRAF_UNLOAD_HDFS_COMPRESS)!=0);

  tdb->setSkipWritingToFiles(CmpCommon::getDefault(TRAF_UNLOAD_SKIP_WRITING_TO_FILES) == DF_ON);
  tdb->setBypassLibhdfs(CmpCommon::getDefault(TRAF_UNLOAD_BYPASS_LIBHDFS) == DF_ON);
  generator->initTdbFields(tdb);

  // Generate EXPLAIN info.
  if (!generator->explainDisabled())
  {
    generator->setExplainTuple(relExpr.addExplainInfo(tdb, 0, 0, generator));
  }

  // Tell the generator about our in/out rows and the new TDB
  generator->setCriDesc(given_desc, Generator::DOWN);
  generator->setCriDesc(returned_desc, Generator::UP);
  generator->setGenObj(&relExpr, tdb);


  // Return a TDB pointer to the caller
  newTdb = tdb;

  return 0;

} // ft_codegen()
static short ft_codegen(Generator *generator,
                        RelExpr &relExpr,
                        ComTdbFastExtract *&newTdb,
                        Cardinality estimatedRowCount,
                        char * targetName,
                        char * hdfsHost,
                        Int32 hdfsPort,
                        char * hiveTableName,
                        char * delimiter,
                        char * header,
                        char * nullString,
                        char * recordSeparator,
                        ULng32 downQueueMaxSize,
                        ULng32 upQueueMaxSize,
                        ULng32 outputBufferSize,
                        ULng32 requestBufferSize,
                        ULng32 replyBufferSize,
                        ULng32 numOutputBuffers,
                        ComTdb * childTdb,
                        NABoolean isSequenceFile)
{
  CmpContext *cmpContext = generator->currentCmpContext();
  Space *space = generator->getSpace();
  ExpGenerator *exp_gen = generator->getExpGenerator();
  MapTable *map_table = generator->getMapTable();
  MapTable *last_map_table = generator->getLastMapTable();
  ex_expr *input_expr = NULL;
  ex_expr *output_expr = NULL;
  ex_expr * childData_expr = NULL ;
  ex_expr * cnvChildData_expr = NULL ;
  ULng32 i;
  ULng32 requestRowLen = 0;
  ULng32 outputRowLen = 0;
  ULng32 childDataRowLen = 0;
  ULng32 cnvChildDataRowLen = 0;
  ExpTupleDesc *requestTupleDesc = NULL;

  ExpTupleDesc *outputTupleDesc = NULL;
  ExpTupleDesc *childDataTupleDesc = NULL;
  ExpTupleDesc *cnvChildDataTupleDesc = NULL;
  newTdb = NULL;

  OperatorTypeEnum relExprType = relExpr.getOperatorType();
  GenAssert(relExprType == REL_FAST_EXTRACT, "Unexpected RelExpr at FastExtract codegen")
  FastExtract * fastExtract = (FastExtract *) &relExpr;

  const Int32 workAtpNumber = 1;
  ex_cri_desc *given_desc = generator->getCriDesc(Generator::DOWN);
  ex_cri_desc *returned_desc = NULL;
  ex_cri_desc *work_cri_desc = NULL;

  returned_desc = given_desc;

  // Setup local variables related to the work ATP
  unsigned short numWorkTupps = 0;
  unsigned short childDataTuppIndex = 0;
  unsigned short cnvChildDataTuppIndex = 0;

  numWorkTupps = 3;
  childDataTuppIndex = numWorkTupps - 1 ;
  numWorkTupps ++;
  cnvChildDataTuppIndex = numWorkTupps - 1;
  work_cri_desc = new (space) ex_cri_desc(numWorkTupps, space);

  ExpTupleDesc::TupleDataFormat childReqFormat = ExpTupleDesc::SQLMX_ALIGNED_FORMAT;

  ValueIdList childDataVids;
  ValueIdList cnvChildDataVids;
  const ValueIdList& childVals = fastExtract->getSelectList();

  const NATable *hiveNATable = NULL;
  const NAColumnArray *hiveNAColArray = NULL;

  // hiveInsertErrMode: 
  //    if 0, do not do error checks.
  //    if 1, do error check and return error.
  //    if 2, do error check and ignore row, if error
  //    if 3, insert null if an error occurs
  Lng32 hiveInsertErrMode = 0;
  if ((fastExtract) && (fastExtract->isHiveInsert()) &&
      (fastExtract->getHiveTableDesc()) &&
      (fastExtract->getHiveTableDesc()->getNATable()) &&
      ((hiveInsertErrMode = CmpCommon::getDefaultNumeric(HIVE_INSERT_ERROR_MODE)) > 0))
    {
      hiveNATable = fastExtract->getHiveTableDesc()->getNATable();
      hiveNAColArray = &hiveNATable->getNAColumnArray();
    }

  for (i = 0; i < childVals.entries(); i++)
  {
    ItemExpr &inputExpr = *(childVals[i].getItemExpr());
    const NAType &formalType = childVals[i].getType();
    ItemExpr *lmExpr = NULL;
    ItemExpr *lmExpr2 = NULL;
    int res;

    lmExpr = &inputExpr; 
    lmExpr = lmExpr->bindNode(generator->getBindWA());
    if (!lmExpr || generator->getBindWA()->errStatus())
      {
        GenAssert(0, "lmExpr->bindNode failed");
      }

    // Hive insert converts child data into string format and inserts
    // it into target table.
    // If child type can into an error during conversion, then
    // add a Cast to convert from child type to target type before
    // converting to string format to be inserted.
    if (hiveNAColArray)
      {
        const NAColumn *hiveNACol = (*hiveNAColArray)[i];
        const NAType *hiveNAType = hiveNACol->getType();
        // if tgt type was a hive 'string', do not return a conversion err
        if ((lmExpr->getValueId().getType().errorsCanOccur(*hiveNAType)) &&
            (NOT ((DFS2REC::isSQLVarChar(hiveNAType->getFSDatatype())) &&
                  (((SQLVarChar*)hiveNAType)->wasHiveString()))))
          {
            ItemExpr *newExpr = 
              new(generator->wHeap()) Cast(lmExpr, hiveNAType);
            newExpr = newExpr->bindNode(generator->getBindWA());
            if (!newExpr || generator->getBindWA()->errStatus())
              {
                GenAssert(0, "newExpr->bindNode failed");
              }
            
            if (hiveInsertErrMode == 3)
              ((Cast*)newExpr)->setConvertNullWhenError(TRUE);
            
            lmExpr = newExpr;
          }
      }

    res = CreateAllCharsExpr(formalType, // [IN] Child output type
        *lmExpr, // [IN] Actual input value
        cmpContext, // [IN] Compilation context
        lmExpr2 // [OUT] Returned expression
        );

    GenAssert(res == 0 && lmExpr != NULL,
        "Error building expression tree for LM child Input value");

    childDataVids.insert(lmExpr->getValueId());
    if (lmExpr2)
    {
      lmExpr2->bindNode(generator->getBindWA());
      cnvChildDataVids.insert(lmExpr2->getValueId());
    }


  } // for (i = 0; i < childVals.entries(); i++)

  if (childDataVids.entries() > 0 &&
    cnvChildDataVids.entries()>0)  //-- convertedChildDataVids
  {
    UInt16 pcm = exp_gen->getPCodeMode();
    if ((hiveNAColArray) &&
        (hiveInsertErrMode == 3))
      {
        // if error mode is 3 (mode null when error), disable pcode.
        // this feature is currently not being handled by pcode.
        // (added as part of JIRA 1920 in FileScan::codeGenForHive).
        exp_gen->setPCodeMode(ex_expr::PCODE_NONE);
      }

    exp_gen->generateContiguousMoveExpr (
      childDataVids,                         //childDataVids// [IN] source ValueIds
      TRUE,                                 // [IN] add convert nodes?
      workAtpNumber,                        // [IN] target atp number (0 or 1)
      childDataTuppIndex,                   // [IN] target tupp index
      childReqFormat,                       // [IN] target tuple data format
      childDataRowLen,                      // [OUT] target tuple length
      &childData_expr,                  // [OUT] move expression
      &childDataTupleDesc,                  // [optional OUT] target tuple desc
      ExpTupleDesc::LONG_FORMAT             // [optional IN] target desc format
      );

    exp_gen->setPCodeMode(pcm);

    exp_gen->processValIdList (
       cnvChildDataVids,                              // [IN] ValueIdList
       ExpTupleDesc::SQLARK_EXPLODED_FORMAT,  // [IN] tuple data format
       cnvChildDataRowLen,                          // [OUT] tuple length
       workAtpNumber,                                     // [IN] atp number
       cnvChildDataTuppIndex,         // [IN] index into atp
       &cnvChildDataTupleDesc,                      // [optional OUT] tuple desc
       ExpTupleDesc::LONG_FORMAT              // [optional IN] tuple desc format
       );
  }
  //
  // Add the tuple descriptor for request values to the work ATP
  //
  work_cri_desc->setTupleDescriptor(childDataTuppIndex, childDataTupleDesc);
  work_cri_desc->setTupleDescriptor(cnvChildDataTuppIndex, cnvChildDataTupleDesc);

  // We can now remove all appended map tables
  generator->removeAll(last_map_table);



  ComSInt32 maxrs = 0;
  UInt32 flags = 0;
  UInt16 numIoBuffers = (UInt16)(ActiveSchemaDB()->getDefaults()).getAsLong(FAST_EXTRACT_IO_BUFFERS);
  UInt16 ioTimeout = (UInt16)(ActiveSchemaDB()->getDefaults()).getAsLong(FAST_EXTRACT_IO_TIMEOUT_SEC);

  Int64 hdfsBufSize = (Int64)CmpCommon::getDefaultNumeric(HDFS_IO_BUFFERSIZE);
  hdfsBufSize = hdfsBufSize * 1024; // convert to bytes
  Int16 replication =  (Int16)CmpCommon::getDefaultNumeric(HDFS_REPLICATION);


  // Create a TDB
  ComTdbFastExtract *tdb = new (space) ComTdbFastExtract (
    flags,
    estimatedRowCount,
    targetName,
    hdfsHost,
    hdfsPort,
    hiveTableName,
    delimiter,
    header,
    nullString,
    recordSeparator,
    given_desc,
    returned_desc,
    work_cri_desc,
    downQueueMaxSize,
    upQueueMaxSize,
    (Lng32) numOutputBuffers,
    outputBufferSize,
    numIoBuffers,
    ioTimeout,
    input_expr,
    output_expr,
    requestRowLen,
    outputRowLen,
    childData_expr,
    childTdb,
    space,
    childDataTuppIndex,
    cnvChildDataTuppIndex,
    childDataRowLen,
    hdfsBufSize,
    replication
    );

  tdb->setSequenceFile(isSequenceFile);
  tdb->setHdfsCompressed(CmpCommon::getDefaultNumeric(TRAF_UNLOAD_HDFS_COMPRESS)!=0);

  tdb->setSkipWritingToFiles(CmpCommon::getDefault(TRAF_UNLOAD_SKIP_WRITING_TO_FILES) == DF_ON);
  tdb->setBypassLibhdfs(CmpCommon::getDefault(TRAF_UNLOAD_BYPASS_LIBHDFS) == DF_ON);

  if ((hiveNAColArray) &&
      (hiveInsertErrMode == 2))
    {
      tdb->setContinueOnError(TRUE);
    }

  generator->initTdbFields(tdb);

  // Generate EXPLAIN info.
  if (!generator->explainDisabled())
  {
    generator->setExplainTuple(relExpr.addExplainInfo(tdb, 0, 0, generator));
  }

  // Tell the generator about our in/out rows and the new TDB
  generator->setCriDesc(given_desc, Generator::DOWN);
  generator->setCriDesc(returned_desc, Generator::UP);
  generator->setGenObj(&relExpr, tdb);


  // Return a TDB pointer to the caller
  newTdb = tdb;

  return 0;

} // ft_codegen()