Exemplo n.º 1
0
short
PhysicalFastExtract::codeGen(Generator *generator)
{
  short result = 0;
  Space *space = generator->getSpace();
  CmpContext *cmpContext = generator->currentCmpContext();

  const ULng32 downQueueMaxSize = getDefault(GEN_FE_SIZE_DOWN);
  const ULng32 upQueueMaxSize = getDefault(GEN_FE_SIZE_UP);


  const ULng32 defaultBufferSize = getDefault(GEN_FE_BUFFER_SIZE);
  const ULng32 outputBufferSize = defaultBufferSize;
  const ULng32 requestBufferSize = defaultBufferSize;
  const ULng32 replyBufferSize = defaultBufferSize;
  const ULng32 numOutputBuffers = getDefault(GEN_FE_NUM_BUFFERS);

  // used in runtime stats
  Cardinality estimatedRowCount = (Cardinality)
                       (getInputCardinality() * getEstRowsUsed()).getValue();

  Int32 numChildren = getArity();
  ex_cri_desc * givenDesc = generator->getCriDesc(Generator::DOWN);
  ComTdb * childTdb = (ComTdb*) new (space) ComTdb();
  ExplainTuple *firstExplainTuple = 0;

  // Allocate a new map table for this child.
  //
  MapTable *localMapTable = generator->appendAtEnd();
  generator->setCriDesc(givenDesc, Generator::DOWN);
  child(0)->codeGen(generator);
  childTdb = (ComTdb *)(generator->getGenObj());
  firstExplainTuple = generator->getExplainTuple();

  ComTdbFastExtract *newTdb = NULL;
  char * targetName = NULL;
  char * hiveTableName = NULL;
  char * delimiter = NULL;
  char * header = NULL;
  char * nullString = NULL;
  char * recordSeparator = NULL;
  char * hdfsHostName = NULL;
  Int32 hdfsPortNum = getHdfsPort();

  char * newDelimiter = (char *)getDelimiter().data();
  char specChar = '0';
  if (!isHiveInsert() && isSpecialChar(newDelimiter, specChar))
  {
    newDelimiter = new (cmpContext->statementHeap()) char[2];
    newDelimiter[0] = specChar;
    newDelimiter[1] = '\0';
  }

  char * newRecordSep = (char *)getRecordSeparator().data();
  specChar = '0';
  if (!isHiveInsert() && isSpecialChar(newRecordSep, specChar))
  {
    newRecordSep = new (cmpContext->statementHeap()) char[2];
    newRecordSep[0] = specChar;
    newRecordSep[1] = '\0';
  }

  targetName = AllocStringInSpace(*space, (char *)getTargetName().data());
  hdfsHostName = AllocStringInSpace(*space, (char *)getHdfsHostName().data());
  hiveTableName = AllocStringInSpace(*space, (char *)getHiveTableName().data());
  delimiter = AllocStringInSpace(*space,  newDelimiter);
  header = AllocStringInSpace(*space, (char *)getHeader().data());
  nullString = AllocStringInSpace(*space, (char *)getNullString().data());
  recordSeparator = AllocStringInSpace(*space, newRecordSep);

   result = ft_codegen(generator,
                       *this,              // RelExpr &relExpr
                       newTdb,             // ComTdbUdr *&newTdb
                       estimatedRowCount,
                       targetName,
                       hdfsHostName,
                       hdfsPortNum,
                       hiveTableName,
                       delimiter,
                       header,
                       nullString,
                       recordSeparator,
                       downQueueMaxSize,
                       upQueueMaxSize,
                       outputBufferSize,
                       requestBufferSize,
                       replyBufferSize,
                       numOutputBuffers,
                       childTdb,
                       isSequenceFile());

  if (!generator->explainDisabled())
  {
    generator->setExplainTuple(addExplainInfo(newTdb, firstExplainTuple, 0, generator));
  }

  if (getTargetType() == FILE)
    newTdb->setTargetFile(1);
  else if (getTargetType() == SOCKET)
    newTdb->setTargetSocket(1);
  else
  GenAssert(0, "Unexpected Fast Extract target type")

  if (isAppend())
    newTdb->setIsAppend(1);
  if (this->includeHeader())
    newTdb->setIncludeHeader(1);

  if (isHiveInsert())
  {
    newTdb->setIsHiveInsert(1);
    newTdb->setIncludeHeader(0);
    setOverwriteHiveTable( getOverwriteHiveTable());
  }
  else
  {
    if (includeHeader())
      newTdb->setIncludeHeader(1);
  }
  if (getCompressionType() != NONE)
  {
    if (getCompressionType() == LZO)
      newTdb->setCompressLZO(1);
    else
    GenAssert(0, "Unexpected Fast Extract compression type")
  }
     if((ActiveSchemaDB()->getDefaults()).getToken(FAST_EXTRACT_DIAGS) == DF_ON)
    	 newTdb->setPrintDiags(1);

  return result;
}
ExWorkProcRetcode ExHdfsFastExtractTcb::work()
{
#ifdef __EID 
  // This class should not be instantiated in EID.
  return WORK_BAD_ERROR;
#else
  Lng32 retcode = 0;
  SFW_RetCode sfwRetCode = SFW_OK;
  ULng32 recSepLen = strlen(myTdb().getRecordSeparator());
  ULng32 delimLen = strlen(myTdb().getDelimiter());
  ULng32 nullLen = 
    (myTdb().getNullString() ? strlen(myTdb().getNullString()) : 0);
  if (myTdb().getIsHiveInsert())
  {
    recSepLen = 1;
    delimLen = 1;
  }
  if (getEmptyNullString()) //covers hive null case also
    nullLen = 0;

  ExOperStats *stats = NULL;
  ExFastExtractStats *feStats = getFastExtractStats();

  while (TRUE)
  {
    // if no parent request, return
    if (qParent_.down->isEmpty())
      return WORK_OK;

    ex_queue_entry *pentry_down = qParent_.down->getHeadEntry();
    const ex_queue::down_request request = pentry_down->downState.request;
    const Lng32 value = pentry_down->downState.requestValue;
    ExFastExtractPrivateState &pstate = *((ExFastExtractPrivateState *) pentry_down->pstate);
    switch (pstate.step_)
    {
    case EXTRACT_NOT_STARTED:
    {
      pstate.step_= EXTRACT_CHECK_MOD_TS;
    }
    break;

    case EXTRACT_CHECK_MOD_TS:
    {
      if ((! myTdb().getTargetFile()) ||
          (myTdb().getModTSforDir() == -1))
        {
          pstate.step_ = EXTRACT_INITIALIZE;
          break;
        }

      numBuffers_ = 0;

      memset (hdfsHost_, '\0', sizeof(hdfsHost_));
      strncpy(hdfsHost_, myTdb().getHdfsHostName(), sizeof(hdfsHost_));
      hdfsPort_ = myTdb().getHdfsPortNum();
      memset (fileName_, '\0', sizeof(fileName_));
      memset (targetLocation_, '\0', sizeof(targetLocation_));
      snprintf(targetLocation_,999, "%s", myTdb().getTargetName());

      retcode = lobInterfaceDataModCheck();
      if (retcode < 0)
      {
        Lng32 cliError = 0;
        
        Lng32 intParam1 = -retcode;
        ComDiagsArea * diagsArea = NULL;
        ExRaiseSqlError(getHeap(), &diagsArea, 
                        (ExeErrorCode)(EXE_ERROR_FROM_LOB_INTERFACE),
                        NULL, &intParam1, 
                        &cliError, 
                        NULL, 
                        "HDFS",
                        (char*)"ExpLOBInterfaceDataModCheck",
                        getLobErrStr(intParam1));
        pentry_down->setDiagsArea(diagsArea);
        pstate.step_ = EXTRACT_ERROR;
        break;
      }
      
      if (retcode == 1) // check failed
      {
        ComDiagsArea * diagsArea = NULL;
        ExRaiseSqlError(getHeap(), &diagsArea, 
                        (ExeErrorCode)(EXE_HIVE_DATA_MOD_CHECK_ERROR));
        pentry_down->setDiagsArea(diagsArea);
        pstate.step_ = EXTRACT_ERROR;
        break;
      }
      
      pstate.step_= EXTRACT_INITIALIZE;
    }
    break;
    
    case EXTRACT_INITIALIZE:
    {
      pstate.processingStarted_ = FALSE;
      errorOccurred_ = FALSE;

      //Allocate writeBuffers.
      numBuffers_ = 1;
      for (Int16 i = 0; i < numBuffers_; i++)
      {
        bool done = false;
        Int64 input_datalen = myTdb().getHdfsIoBufferSize();
        char * buf_addr = 0;
        while ((!done) && input_datalen >= 32 * 1024)
        {
          buf_addr = 0;
          buf_addr = (char *)((NAHeap *)heap_)->allocateAlignedHeapMemory((UInt32)input_datalen, 512, FALSE);
          if (buf_addr)
          {
            done = true;
            bufferPool_[i] = new (heap_) IOBuffer((char*) buf_addr, (Int32)input_datalen);
          }
          else
          {
            bufferAllocFailuresCount_++;
            input_datalen = input_datalen / 2;
          }
        }
        if (!done)
        {
          numBuffers_ = i;
          break ; // if too few buffers have been allocated we will raise
        }         // an error later
      }

      if (feStats)
      {
        feStats->setBufferAllocFailuresCount(bufferAllocFailuresCount_);
        feStats->setBuffersCount(numBuffers_);
      }


      ComDiagsArea *da = NULL;

      if (!myTdb().getSkipWritingToFiles())
        if (myTdb().getTargetFile() )
        {
          Lng32 fileNum = getGlobals()->castToExExeStmtGlobals()->getMyInstanceNumber();
          memset (hdfsHost_, '\0', sizeof(hdfsHost_));
          strncpy(hdfsHost_, myTdb().getHdfsHostName(), sizeof(hdfsHost_));
          hdfsPort_ = myTdb().getHdfsPortNum();
          memset (fileName_, '\0', sizeof(fileName_));
          memset (targetLocation_, '\0', sizeof(targetLocation_));

          time_t t;
          time(&t);
          char pt[30];
          struct tm * curgmtime = gmtime(&t);
          strftime(pt, 30, "%Y%m%d%H%M%S", curgmtime);
          srand(getpid());
          snprintf(targetLocation_,999, "%s", myTdb().getTargetName());

          if (myTdb().getIsHiveInsert())
            snprintf(fileName_,999, "%s%d-%s-%d", myTdb().getHiveTableName(), fileNum, pt,rand() % 1000);
          else
            snprintf(fileName_,999, "%s%d-%s-%d", "file", fileNum, pt,rand() % 1000);

          if ((isSequenceFile() || myTdb().getBypassLibhdfs()) &&
              !sequenceFileWriter_)
          {
            sequenceFileWriter_ = new(getHeap())
                                     SequenceFileWriter((NAHeap *)getHeap());
            sfwRetCode = sequenceFileWriter_->init();
            if (sfwRetCode != SFW_OK)
            {
              createSequenceFileError(sfwRetCode);
              pstate.step_ = EXTRACT_ERROR;
              break;
            }
          }

          if (isSequenceFile()  ||  myTdb().getBypassLibhdfs())
          {
            strcat(targetLocation_, "//");
            strcat(targetLocation_, fileName_);
            if (isSequenceFile())
              sfwRetCode = sequenceFileWriter_->open(targetLocation_, SFW_COMP_NONE);
            else
              sfwRetCode = sequenceFileWriter_->hdfsCreate(targetLocation_, isHdfsCompressed());
            if (sfwRetCode != SFW_OK)
            {
              createSequenceFileError(sfwRetCode);
              pstate.step_ = EXTRACT_ERROR;
              break;
            }
          }
          else
          {
            retcode = 0;
            retcode = lobInterfaceCreate();
            if (retcode < 0)
            {
              Lng32 cliError = 0;

              Lng32 intParam1 = -retcode;
              ComDiagsArea * diagsArea = NULL;
              ExRaiseSqlError(getHeap(), &diagsArea,
                  (ExeErrorCode)(8442), NULL, &intParam1,
                  &cliError, NULL, (char*)"ExpLOBinterfaceCreate",
                  getLobErrStr(intParam1));
              pentry_down->setDiagsArea(diagsArea);
              pstate.step_ = EXTRACT_ERROR;
              break;
            }
          }
            
          if (feStats)
          {
            feStats->setPartitionNumber(fileNum);
          }
        }
        else
        {
          updateWorkATPDiagsArea(__FILE__,__LINE__,"sockets are not supported");
          pstate.step_ = EXTRACT_ERROR;
          break;
        }

      for (UInt32 i = 0; i < myTdb().getChildTuple()->numAttrs(); i++)
      {
        Attributes * attr = myTdb().getChildTableAttr(i);
        Attributes * attr2 = myTdb().getChildTableAttr2(i);

        ex_conv_clause tempClause;
        int convIndex = 0;
        sourceFieldsConvIndex_[i] =
            tempClause.find_case_index(
                attr->getDatatype(),
                0,
                attr2->getDatatype(),
                0,
                0);

      }

      pstate.step_= EXTRACT_PASS_REQUEST_TO_CHILD;
    }
    break;

    case EXTRACT_PASS_REQUEST_TO_CHILD:
    {
      // pass the parent request to the child downqueue
      if (!qChild_.down->isFull())
      {
        ex_queue_entry * centry = qChild_.down->getTailEntry();

        if (request == ex_queue::GET_N)
          centry->downState.request = ex_queue::GET_ALL;
        else
          centry->downState.request = request;

        centry->downState.requestValue = pentry_down->downState.requestValue;
        centry->downState.parentIndex = qParent_.down->getHeadIndex();
        // set the child's input atp
        centry->passAtp(pentry_down->getAtp());
        qChild_.down->insert();
        pstate.processingStarted_ = TRUE;
      }
      else
        // couldn't pass request to child, return
        return WORK_OK;

      pstate.step_ = EXTRACT_RETURN_ROWS_FROM_CHILD;
    }
    break;
    case EXTRACT_RETURN_ROWS_FROM_CHILD:
    {
      if ((qChild_.up->isEmpty()))
      {
        return WORK_OK;
      }

      if (currBuffer_ == NULL)
      {
        currBuffer_ = bufferPool_[0];
        memset(currBuffer_->data_, '\0',currBuffer_->bufSize_);
        currBuffer_->bytesLeft_ = currBuffer_->bufSize_;
      }

      ex_queue_entry * centry = qChild_.up->getHeadEntry();
      ComDiagsArea *cda = NULL;
      ex_queue::up_status child_status = centry->upState.status;

      switch (child_status)
      {
      case ex_queue::Q_OK_MMORE:
      {
        // for the very first row retruned from child
        // include the header row if necessary
        if ((pstate.matchCount_ == 0) && myTdb().getIncludeHeader())
        {
          if (!myTdb().getIsAppend())
          {
            Int32 headerLength = strlen(myTdb().getHeader());
            char * target = currBuffer_->data_;
            if (headerLength + 1 < currBuffer_->bufSize_)
            {
              strncpy(target, myTdb().getHeader(),headerLength);
              target[headerLength] = '\n' ;
              currBuffer_->bytesLeft_ -= headerLength+1 ;
            }
            else
            {
              updateWorkATPDiagsArea(__FILE__,__LINE__,"header does not fit in buffer");
              pstate.step_ = EXTRACT_ERROR;
              break;
            }
          }
        }

        tupp_descriptor *dataDesc = childOutputTD_;
        ex_expr::exp_return_type expStatus = ex_expr::EXPR_OK;
        if (myTdb().getChildDataExpr())
        {
          UInt32 childTuppIndex = myTdb().childDataTuppIndex_;

          workAtp_->getTupp(childTuppIndex) = dataDesc;

          // Evaluate the child data expression. If diags are generated they
          // will be left in the down entry ATP.
          expStatus = myTdb().getChildDataExpr()->eval(centry->getAtp(), workAtp_);
          workAtp_->getTupp(childTuppIndex).release();

          if (expStatus == ex_expr::EXPR_ERROR)
          {
            updateWorkATPDiagsArea(centry);
            pstate.step_ = EXTRACT_ERROR;
            break;
          }
        } // if (myTdb().getChildDataExpr())
        ///////////////////////
        char * targetData = currBuffer_->data_ + currBuffer_->bufSize_ - currBuffer_->bytesLeft_;
        if (targetData == NULL)
        {
          updateWorkATPDiagsArea(__FILE__,__LINE__,"targetData is NULL");
          pstate.step_ = EXTRACT_ERROR;
          break;
        }
        NABoolean convError = FALSE;
        convertSQRowToString(nullLen, recSepLen, delimLen, dataDesc,
            targetData, convError);
        ///////////////////////////////
        pstate.matchCount_++;
        if (!convError)
        {
          if (feStats)
          {
            feStats->incProcessedRowsCount();
          }
          pstate.successRowCount_ ++;
        }
        else
        {
          if (feStats)
          {
            feStats->incErrorRowsCount();
          }
          pstate.errorRowCount_ ++;
        }
        if (currBuffer_->bytesLeft_ < (Int32) maxExtractRowLength_)
        {
          pstate.step_ = EXTRACT_DATA_READY_TO_SEND;
        }
      }
      break;

      case ex_queue::Q_NO_DATA:
      {
        pstate.step_ = EXTRACT_DATA_READY_TO_SEND;
        endOfData_ = TRUE;
        pstate.processingStarted_ = FALSE ; // so that cancel does not
        //wait for this Q_NO_DATA
      }
      break;
      case ex_queue::Q_SQLERROR:
      {
        pstate.step_ = EXTRACT_ERROR;
      }
      break;
      case ex_queue::Q_INVALID:
      {
        updateWorkATPDiagsArea(__FILE__,__LINE__,
            "ExFastExtractTcb::work() Invalid state returned by child");
        pstate.step_ = EXTRACT_ERROR;
      }
      break;

      } // switch
      qChild_.up->removeHead();
    }
    break;

    case EXTRACT_DATA_READY_TO_SEND:
    {
      ssize_t bytesToWrite = currBuffer_->bufSize_ - currBuffer_->bytesLeft_;

      if (!myTdb().getSkipWritingToFiles())
        if (isSequenceFile())
        {
          sfwRetCode = sequenceFileWriter_->writeBuffer(currBuffer_->data_, bytesToWrite, myTdb().getRecordSeparator());
          if (sfwRetCode != SFW_OK)
          {
            createSequenceFileError(sfwRetCode);
            pstate.step_ = EXTRACT_ERROR;
            break;
          }
        }
        else  if (myTdb().getBypassLibhdfs())
        {
          sfwRetCode = sequenceFileWriter_->hdfsWrite(currBuffer_->data_, bytesToWrite);
          if (sfwRetCode != SFW_OK)
          {
            createSequenceFileError(sfwRetCode);
            pstate.step_ = EXTRACT_ERROR;
            break;
          }
        }
        else
        {
          retcode = 0;
          retcode = lobInterfaceInsert(bytesToWrite);
          if (retcode < 0)
          {
            Lng32 cliError = 0;

            Lng32 intParam1 = -retcode;
            ComDiagsArea * diagsArea = NULL;
            ExRaiseSqlError(getHeap(), &diagsArea,
                (ExeErrorCode)(8442), NULL, &intParam1,
                &cliError, NULL, (char*)"ExpLOBInterfaceInsert",
                getLobErrStr(intParam1));
            pentry_down->setDiagsArea(diagsArea);
            pstate.step_ = EXTRACT_ERROR;
            break;
          }
        }
      if (feStats)
      {
        feStats->incReadyToSendBuffersCount();
        feStats->incReadyToSendBytes(currBuffer_->bufSize_ - currBuffer_->bytesLeft_);
      }
      currBuffer_ = NULL;

      if (endOfData_)
      {
        pstate.step_ = EXTRACT_DONE;
      }
      else
      {
        pstate.step_ = EXTRACT_RETURN_ROWS_FROM_CHILD;
      }
    }
    break;

    case EXTRACT_ERROR:
    {
      // If there is no room in the parent queue for the reply,
      // try again later.
      //Later we may split this state into 2 one for cancel and one for query
      if (qParent_.up->isFull())
        return WORK_OK;
      // Cancel the child request - there must be a child request in
      // progress to get to the ERROR state.
      if (pstate.processingStarted_)
      {
        qChild_.down->cancelRequestWithParentIndex(qParent_.down->getHeadIndex());
        //pstate.processingStarted_ = FALSE;
      }
      while (pstate.processingStarted_ && pstate.step_ == EXTRACT_ERROR)
      {
        if (qChild_.up->isEmpty())
          return WORK_OK;
        ex_queue_entry * childEntry = qChild_.up->getHeadEntry();
        ex_queue::up_status childStatus = childEntry->upState.status;

        if (childStatus == ex_queue::Q_NO_DATA)
        {
          pstate.step_ = EXTRACT_DONE;
          pstate.processingStarted_ = FALSE;
        }
        qChild_.up->removeHead();
      }
      ex_queue_entry *pentry_up = qParent_.up->getTailEntry();
      pentry_up->copyAtp(pentry_down);
      // Construct and return the error row.
      //
      if (workAtp_->getDiagsArea())
      {
        ComDiagsArea *diagsArea = pentry_up->getDiagsArea();
        if (diagsArea == NULL)
        {
          diagsArea = ComDiagsArea::allocate(getGlobals()->getDefaultHeap());
          pentry_up->setDiagsArea(diagsArea);
        }
        pentry_up->getDiagsArea()->mergeAfter(*workAtp_->getDiagsArea());
        workAtp_->setDiagsArea(NULL);
      }
      pentry_up->upState.status = ex_queue::Q_SQLERROR;
      pentry_up->upState.parentIndex
      = pentry_down->downState.parentIndex;
      pentry_up->upState.downIndex = qParent_.down->getHeadIndex();
      pentry_up->upState.setMatchNo(pstate.matchCount_);
      qParent_.up->insert();
      //
      errorOccurred_ = TRUE;
      pstate.step_ = EXTRACT_DONE;
    }
    break;

    case EXTRACT_DONE:
    {
      // If there is no room in the parent queue for the reply,
      // try again later.
      //
      if (qParent_.up->isFull())
        return WORK_OK;

      if (!myTdb().getSkipWritingToFiles())
        if (isSequenceFile())
        {
          sfwRetCode = sequenceFileWriter_->close();
          if (!errorOccurred_ && sfwRetCode != SFW_OK )
          {
            createSequenceFileError(sfwRetCode);
            pstate.step_ = EXTRACT_ERROR;
            break;
          }
        }
        else  if (myTdb().getBypassLibhdfs())
        {
          if (sequenceFileWriter_)
            {
              sfwRetCode = sequenceFileWriter_->hdfsClose();
              if (!errorOccurred_ && sfwRetCode != SFW_OK )
                {
                  createSequenceFileError(sfwRetCode);
                  pstate.step_ = EXTRACT_ERROR;
                  break;
                }
            }
        }
        else
        {
          retcode = lobInterfaceClose();
          if (! errorOccurred_ && retcode < 0)
          {
            Lng32 cliError = 0;

            Lng32 intParam1 = -retcode;
            ComDiagsArea * diagsArea = NULL;
            ExRaiseSqlError(getHeap(), &diagsArea,
                (ExeErrorCode)(8442), NULL, &intParam1,
                &cliError, NULL,
                (char*)"ExpLOBinterfaceCloseFile",
                getLobErrStr(intParam1));
            pentry_down->setDiagsArea(diagsArea);

            pstate.step_ = EXTRACT_ERROR;
            break;
          }
        }
      //insertUpQueueEntry will insert Q_NO_DATA into the up queue and
      //remove the head of the down queue
      insertUpQueueEntry(ex_queue::Q_NO_DATA, NULL, TRUE);
      errorOccurred_ = FALSE;

      endOfData_ = FALSE;

      //we need to set the next state so that the query can get re-executed
      //and we start from the beginning again. Not sure if pstate will be
      //valid anymore because insertUpQueueEntry() might have cleared it
      //already.
      pstate.step_ = EXTRACT_NOT_STARTED;

      //exit out now and not break.
      return WORK_OK;
    }
    break;

    default:
    {
      ex_assert(FALSE, "Invalid state in  ExHdfsFastExtractTcb ");
    }

    break;

    } // switch(pstate.step_)
  } // while

  return WORK_OK;
#endif
}//ExHdfsFastExtractTcb::work()