short PhysicalFastExtract::codeGen(Generator *generator) { short result = 0; Space *space = generator->getSpace(); CmpContext *cmpContext = generator->currentCmpContext(); const ULng32 downQueueMaxSize = getDefault(GEN_FE_SIZE_DOWN); const ULng32 upQueueMaxSize = getDefault(GEN_FE_SIZE_UP); const ULng32 defaultBufferSize = getDefault(GEN_FE_BUFFER_SIZE); const ULng32 outputBufferSize = defaultBufferSize; const ULng32 requestBufferSize = defaultBufferSize; const ULng32 replyBufferSize = defaultBufferSize; const ULng32 numOutputBuffers = getDefault(GEN_FE_NUM_BUFFERS); // used in runtime stats Cardinality estimatedRowCount = (Cardinality) (getInputCardinality() * getEstRowsUsed()).getValue(); Int32 numChildren = getArity(); ex_cri_desc * givenDesc = generator->getCriDesc(Generator::DOWN); ComTdb * childTdb = (ComTdb*) new (space) ComTdb(); ExplainTuple *firstExplainTuple = 0; // Allocate a new map table for this child. // MapTable *localMapTable = generator->appendAtEnd(); generator->setCriDesc(givenDesc, Generator::DOWN); child(0)->codeGen(generator); childTdb = (ComTdb *)(generator->getGenObj()); firstExplainTuple = generator->getExplainTuple(); ComTdbFastExtract *newTdb = NULL; char * targetName = NULL; char * hiveTableName = NULL; char * delimiter = NULL; char * header = NULL; char * nullString = NULL; char * recordSeparator = NULL; char * hdfsHostName = NULL; Int32 hdfsPortNum = getHdfsPort(); char * newDelimiter = (char *)getDelimiter().data(); char specChar = '0'; if (!isHiveInsert() && isSpecialChar(newDelimiter, specChar)) { newDelimiter = new (cmpContext->statementHeap()) char[2]; newDelimiter[0] = specChar; newDelimiter[1] = '\0'; } char * newRecordSep = (char *)getRecordSeparator().data(); specChar = '0'; if (!isHiveInsert() && isSpecialChar(newRecordSep, specChar)) { newRecordSep = new (cmpContext->statementHeap()) char[2]; newRecordSep[0] = specChar; newRecordSep[1] = '\0'; } targetName = AllocStringInSpace(*space, (char *)getTargetName().data()); hdfsHostName = AllocStringInSpace(*space, (char *)getHdfsHostName().data()); hiveTableName = AllocStringInSpace(*space, (char *)getHiveTableName().data()); delimiter = AllocStringInSpace(*space, newDelimiter); header = AllocStringInSpace(*space, (char *)getHeader().data()); nullString = AllocStringInSpace(*space, (char *)getNullString().data()); recordSeparator = AllocStringInSpace(*space, newRecordSep); result = ft_codegen(generator, *this, // RelExpr &relExpr newTdb, // ComTdbUdr *&newTdb estimatedRowCount, targetName, hdfsHostName, hdfsPortNum, hiveTableName, delimiter, header, nullString, recordSeparator, downQueueMaxSize, upQueueMaxSize, outputBufferSize, requestBufferSize, replyBufferSize, numOutputBuffers, childTdb, isSequenceFile()); if (!generator->explainDisabled()) { generator->setExplainTuple(addExplainInfo(newTdb, firstExplainTuple, 0, generator)); } if (getTargetType() == FILE) newTdb->setTargetFile(1); else if (getTargetType() == SOCKET) newTdb->setTargetSocket(1); else GenAssert(0, "Unexpected Fast Extract target type") if (isAppend()) newTdb->setIsAppend(1); if (this->includeHeader()) newTdb->setIncludeHeader(1); if (isHiveInsert()) { newTdb->setIsHiveInsert(1); newTdb->setIncludeHeader(0); setOverwriteHiveTable( getOverwriteHiveTable()); } else { if (includeHeader()) newTdb->setIncludeHeader(1); } if (getCompressionType() != NONE) { if (getCompressionType() == LZO) newTdb->setCompressLZO(1); else GenAssert(0, "Unexpected Fast Extract compression type") } if((ActiveSchemaDB()->getDefaults()).getToken(FAST_EXTRACT_DIAGS) == DF_ON) newTdb->setPrintDiags(1); return result; }
ExWorkProcRetcode ExHdfsFastExtractTcb::work() { #ifdef __EID // This class should not be instantiated in EID. return WORK_BAD_ERROR; #else Lng32 retcode = 0; SFW_RetCode sfwRetCode = SFW_OK; ULng32 recSepLen = strlen(myTdb().getRecordSeparator()); ULng32 delimLen = strlen(myTdb().getDelimiter()); ULng32 nullLen = (myTdb().getNullString() ? strlen(myTdb().getNullString()) : 0); if (myTdb().getIsHiveInsert()) { recSepLen = 1; delimLen = 1; } if (getEmptyNullString()) //covers hive null case also nullLen = 0; ExOperStats *stats = NULL; ExFastExtractStats *feStats = getFastExtractStats(); while (TRUE) { // if no parent request, return if (qParent_.down->isEmpty()) return WORK_OK; ex_queue_entry *pentry_down = qParent_.down->getHeadEntry(); const ex_queue::down_request request = pentry_down->downState.request; const Lng32 value = pentry_down->downState.requestValue; ExFastExtractPrivateState &pstate = *((ExFastExtractPrivateState *) pentry_down->pstate); switch (pstate.step_) { case EXTRACT_NOT_STARTED: { pstate.step_= EXTRACT_CHECK_MOD_TS; } break; case EXTRACT_CHECK_MOD_TS: { if ((! myTdb().getTargetFile()) || (myTdb().getModTSforDir() == -1)) { pstate.step_ = EXTRACT_INITIALIZE; break; } numBuffers_ = 0; memset (hdfsHost_, '\0', sizeof(hdfsHost_)); strncpy(hdfsHost_, myTdb().getHdfsHostName(), sizeof(hdfsHost_)); hdfsPort_ = myTdb().getHdfsPortNum(); memset (fileName_, '\0', sizeof(fileName_)); memset (targetLocation_, '\0', sizeof(targetLocation_)); snprintf(targetLocation_,999, "%s", myTdb().getTargetName()); retcode = lobInterfaceDataModCheck(); if (retcode < 0) { Lng32 cliError = 0; Lng32 intParam1 = -retcode; ComDiagsArea * diagsArea = NULL; ExRaiseSqlError(getHeap(), &diagsArea, (ExeErrorCode)(EXE_ERROR_FROM_LOB_INTERFACE), NULL, &intParam1, &cliError, NULL, "HDFS", (char*)"ExpLOBInterfaceDataModCheck", getLobErrStr(intParam1)); pentry_down->setDiagsArea(diagsArea); pstate.step_ = EXTRACT_ERROR; break; } if (retcode == 1) // check failed { ComDiagsArea * diagsArea = NULL; ExRaiseSqlError(getHeap(), &diagsArea, (ExeErrorCode)(EXE_HIVE_DATA_MOD_CHECK_ERROR)); pentry_down->setDiagsArea(diagsArea); pstate.step_ = EXTRACT_ERROR; break; } pstate.step_= EXTRACT_INITIALIZE; } break; case EXTRACT_INITIALIZE: { pstate.processingStarted_ = FALSE; errorOccurred_ = FALSE; //Allocate writeBuffers. numBuffers_ = 1; for (Int16 i = 0; i < numBuffers_; i++) { bool done = false; Int64 input_datalen = myTdb().getHdfsIoBufferSize(); char * buf_addr = 0; while ((!done) && input_datalen >= 32 * 1024) { buf_addr = 0; buf_addr = (char *)((NAHeap *)heap_)->allocateAlignedHeapMemory((UInt32)input_datalen, 512, FALSE); if (buf_addr) { done = true; bufferPool_[i] = new (heap_) IOBuffer((char*) buf_addr, (Int32)input_datalen); } else { bufferAllocFailuresCount_++; input_datalen = input_datalen / 2; } } if (!done) { numBuffers_ = i; break ; // if too few buffers have been allocated we will raise } // an error later } if (feStats) { feStats->setBufferAllocFailuresCount(bufferAllocFailuresCount_); feStats->setBuffersCount(numBuffers_); } ComDiagsArea *da = NULL; if (!myTdb().getSkipWritingToFiles()) if (myTdb().getTargetFile() ) { Lng32 fileNum = getGlobals()->castToExExeStmtGlobals()->getMyInstanceNumber(); memset (hdfsHost_, '\0', sizeof(hdfsHost_)); strncpy(hdfsHost_, myTdb().getHdfsHostName(), sizeof(hdfsHost_)); hdfsPort_ = myTdb().getHdfsPortNum(); memset (fileName_, '\0', sizeof(fileName_)); memset (targetLocation_, '\0', sizeof(targetLocation_)); time_t t; time(&t); char pt[30]; struct tm * curgmtime = gmtime(&t); strftime(pt, 30, "%Y%m%d%H%M%S", curgmtime); srand(getpid()); snprintf(targetLocation_,999, "%s", myTdb().getTargetName()); if (myTdb().getIsHiveInsert()) snprintf(fileName_,999, "%s%d-%s-%d", myTdb().getHiveTableName(), fileNum, pt,rand() % 1000); else snprintf(fileName_,999, "%s%d-%s-%d", "file", fileNum, pt,rand() % 1000); if ((isSequenceFile() || myTdb().getBypassLibhdfs()) && !sequenceFileWriter_) { sequenceFileWriter_ = new(getHeap()) SequenceFileWriter((NAHeap *)getHeap()); sfwRetCode = sequenceFileWriter_->init(); if (sfwRetCode != SFW_OK) { createSequenceFileError(sfwRetCode); pstate.step_ = EXTRACT_ERROR; break; } } if (isSequenceFile() || myTdb().getBypassLibhdfs()) { strcat(targetLocation_, "//"); strcat(targetLocation_, fileName_); if (isSequenceFile()) sfwRetCode = sequenceFileWriter_->open(targetLocation_, SFW_COMP_NONE); else sfwRetCode = sequenceFileWriter_->hdfsCreate(targetLocation_, isHdfsCompressed()); if (sfwRetCode != SFW_OK) { createSequenceFileError(sfwRetCode); pstate.step_ = EXTRACT_ERROR; break; } } else { retcode = 0; retcode = lobInterfaceCreate(); if (retcode < 0) { Lng32 cliError = 0; Lng32 intParam1 = -retcode; ComDiagsArea * diagsArea = NULL; ExRaiseSqlError(getHeap(), &diagsArea, (ExeErrorCode)(8442), NULL, &intParam1, &cliError, NULL, (char*)"ExpLOBinterfaceCreate", getLobErrStr(intParam1)); pentry_down->setDiagsArea(diagsArea); pstate.step_ = EXTRACT_ERROR; break; } } if (feStats) { feStats->setPartitionNumber(fileNum); } } else { updateWorkATPDiagsArea(__FILE__,__LINE__,"sockets are not supported"); pstate.step_ = EXTRACT_ERROR; break; } for (UInt32 i = 0; i < myTdb().getChildTuple()->numAttrs(); i++) { Attributes * attr = myTdb().getChildTableAttr(i); Attributes * attr2 = myTdb().getChildTableAttr2(i); ex_conv_clause tempClause; int convIndex = 0; sourceFieldsConvIndex_[i] = tempClause.find_case_index( attr->getDatatype(), 0, attr2->getDatatype(), 0, 0); } pstate.step_= EXTRACT_PASS_REQUEST_TO_CHILD; } break; case EXTRACT_PASS_REQUEST_TO_CHILD: { // pass the parent request to the child downqueue if (!qChild_.down->isFull()) { ex_queue_entry * centry = qChild_.down->getTailEntry(); if (request == ex_queue::GET_N) centry->downState.request = ex_queue::GET_ALL; else centry->downState.request = request; centry->downState.requestValue = pentry_down->downState.requestValue; centry->downState.parentIndex = qParent_.down->getHeadIndex(); // set the child's input atp centry->passAtp(pentry_down->getAtp()); qChild_.down->insert(); pstate.processingStarted_ = TRUE; } else // couldn't pass request to child, return return WORK_OK; pstate.step_ = EXTRACT_RETURN_ROWS_FROM_CHILD; } break; case EXTRACT_RETURN_ROWS_FROM_CHILD: { if ((qChild_.up->isEmpty())) { return WORK_OK; } if (currBuffer_ == NULL) { currBuffer_ = bufferPool_[0]; memset(currBuffer_->data_, '\0',currBuffer_->bufSize_); currBuffer_->bytesLeft_ = currBuffer_->bufSize_; } ex_queue_entry * centry = qChild_.up->getHeadEntry(); ComDiagsArea *cda = NULL; ex_queue::up_status child_status = centry->upState.status; switch (child_status) { case ex_queue::Q_OK_MMORE: { // for the very first row retruned from child // include the header row if necessary if ((pstate.matchCount_ == 0) && myTdb().getIncludeHeader()) { if (!myTdb().getIsAppend()) { Int32 headerLength = strlen(myTdb().getHeader()); char * target = currBuffer_->data_; if (headerLength + 1 < currBuffer_->bufSize_) { strncpy(target, myTdb().getHeader(),headerLength); target[headerLength] = '\n' ; currBuffer_->bytesLeft_ -= headerLength+1 ; } else { updateWorkATPDiagsArea(__FILE__,__LINE__,"header does not fit in buffer"); pstate.step_ = EXTRACT_ERROR; break; } } } tupp_descriptor *dataDesc = childOutputTD_; ex_expr::exp_return_type expStatus = ex_expr::EXPR_OK; if (myTdb().getChildDataExpr()) { UInt32 childTuppIndex = myTdb().childDataTuppIndex_; workAtp_->getTupp(childTuppIndex) = dataDesc; // Evaluate the child data expression. If diags are generated they // will be left in the down entry ATP. expStatus = myTdb().getChildDataExpr()->eval(centry->getAtp(), workAtp_); workAtp_->getTupp(childTuppIndex).release(); if (expStatus == ex_expr::EXPR_ERROR) { updateWorkATPDiagsArea(centry); pstate.step_ = EXTRACT_ERROR; break; } } // if (myTdb().getChildDataExpr()) /////////////////////// char * targetData = currBuffer_->data_ + currBuffer_->bufSize_ - currBuffer_->bytesLeft_; if (targetData == NULL) { updateWorkATPDiagsArea(__FILE__,__LINE__,"targetData is NULL"); pstate.step_ = EXTRACT_ERROR; break; } NABoolean convError = FALSE; convertSQRowToString(nullLen, recSepLen, delimLen, dataDesc, targetData, convError); /////////////////////////////// pstate.matchCount_++; if (!convError) { if (feStats) { feStats->incProcessedRowsCount(); } pstate.successRowCount_ ++; } else { if (feStats) { feStats->incErrorRowsCount(); } pstate.errorRowCount_ ++; } if (currBuffer_->bytesLeft_ < (Int32) maxExtractRowLength_) { pstate.step_ = EXTRACT_DATA_READY_TO_SEND; } } break; case ex_queue::Q_NO_DATA: { pstate.step_ = EXTRACT_DATA_READY_TO_SEND; endOfData_ = TRUE; pstate.processingStarted_ = FALSE ; // so that cancel does not //wait for this Q_NO_DATA } break; case ex_queue::Q_SQLERROR: { pstate.step_ = EXTRACT_ERROR; } break; case ex_queue::Q_INVALID: { updateWorkATPDiagsArea(__FILE__,__LINE__, "ExFastExtractTcb::work() Invalid state returned by child"); pstate.step_ = EXTRACT_ERROR; } break; } // switch qChild_.up->removeHead(); } break; case EXTRACT_DATA_READY_TO_SEND: { ssize_t bytesToWrite = currBuffer_->bufSize_ - currBuffer_->bytesLeft_; if (!myTdb().getSkipWritingToFiles()) if (isSequenceFile()) { sfwRetCode = sequenceFileWriter_->writeBuffer(currBuffer_->data_, bytesToWrite, myTdb().getRecordSeparator()); if (sfwRetCode != SFW_OK) { createSequenceFileError(sfwRetCode); pstate.step_ = EXTRACT_ERROR; break; } } else if (myTdb().getBypassLibhdfs()) { sfwRetCode = sequenceFileWriter_->hdfsWrite(currBuffer_->data_, bytesToWrite); if (sfwRetCode != SFW_OK) { createSequenceFileError(sfwRetCode); pstate.step_ = EXTRACT_ERROR; break; } } else { retcode = 0; retcode = lobInterfaceInsert(bytesToWrite); if (retcode < 0) { Lng32 cliError = 0; Lng32 intParam1 = -retcode; ComDiagsArea * diagsArea = NULL; ExRaiseSqlError(getHeap(), &diagsArea, (ExeErrorCode)(8442), NULL, &intParam1, &cliError, NULL, (char*)"ExpLOBInterfaceInsert", getLobErrStr(intParam1)); pentry_down->setDiagsArea(diagsArea); pstate.step_ = EXTRACT_ERROR; break; } } if (feStats) { feStats->incReadyToSendBuffersCount(); feStats->incReadyToSendBytes(currBuffer_->bufSize_ - currBuffer_->bytesLeft_); } currBuffer_ = NULL; if (endOfData_) { pstate.step_ = EXTRACT_DONE; } else { pstate.step_ = EXTRACT_RETURN_ROWS_FROM_CHILD; } } break; case EXTRACT_ERROR: { // If there is no room in the parent queue for the reply, // try again later. //Later we may split this state into 2 one for cancel and one for query if (qParent_.up->isFull()) return WORK_OK; // Cancel the child request - there must be a child request in // progress to get to the ERROR state. if (pstate.processingStarted_) { qChild_.down->cancelRequestWithParentIndex(qParent_.down->getHeadIndex()); //pstate.processingStarted_ = FALSE; } while (pstate.processingStarted_ && pstate.step_ == EXTRACT_ERROR) { if (qChild_.up->isEmpty()) return WORK_OK; ex_queue_entry * childEntry = qChild_.up->getHeadEntry(); ex_queue::up_status childStatus = childEntry->upState.status; if (childStatus == ex_queue::Q_NO_DATA) { pstate.step_ = EXTRACT_DONE; pstate.processingStarted_ = FALSE; } qChild_.up->removeHead(); } ex_queue_entry *pentry_up = qParent_.up->getTailEntry(); pentry_up->copyAtp(pentry_down); // Construct and return the error row. // if (workAtp_->getDiagsArea()) { ComDiagsArea *diagsArea = pentry_up->getDiagsArea(); if (diagsArea == NULL) { diagsArea = ComDiagsArea::allocate(getGlobals()->getDefaultHeap()); pentry_up->setDiagsArea(diagsArea); } pentry_up->getDiagsArea()->mergeAfter(*workAtp_->getDiagsArea()); workAtp_->setDiagsArea(NULL); } pentry_up->upState.status = ex_queue::Q_SQLERROR; pentry_up->upState.parentIndex = pentry_down->downState.parentIndex; pentry_up->upState.downIndex = qParent_.down->getHeadIndex(); pentry_up->upState.setMatchNo(pstate.matchCount_); qParent_.up->insert(); // errorOccurred_ = TRUE; pstate.step_ = EXTRACT_DONE; } break; case EXTRACT_DONE: { // If there is no room in the parent queue for the reply, // try again later. // if (qParent_.up->isFull()) return WORK_OK; if (!myTdb().getSkipWritingToFiles()) if (isSequenceFile()) { sfwRetCode = sequenceFileWriter_->close(); if (!errorOccurred_ && sfwRetCode != SFW_OK ) { createSequenceFileError(sfwRetCode); pstate.step_ = EXTRACT_ERROR; break; } } else if (myTdb().getBypassLibhdfs()) { if (sequenceFileWriter_) { sfwRetCode = sequenceFileWriter_->hdfsClose(); if (!errorOccurred_ && sfwRetCode != SFW_OK ) { createSequenceFileError(sfwRetCode); pstate.step_ = EXTRACT_ERROR; break; } } } else { retcode = lobInterfaceClose(); if (! errorOccurred_ && retcode < 0) { Lng32 cliError = 0; Lng32 intParam1 = -retcode; ComDiagsArea * diagsArea = NULL; ExRaiseSqlError(getHeap(), &diagsArea, (ExeErrorCode)(8442), NULL, &intParam1, &cliError, NULL, (char*)"ExpLOBinterfaceCloseFile", getLobErrStr(intParam1)); pentry_down->setDiagsArea(diagsArea); pstate.step_ = EXTRACT_ERROR; break; } } //insertUpQueueEntry will insert Q_NO_DATA into the up queue and //remove the head of the down queue insertUpQueueEntry(ex_queue::Q_NO_DATA, NULL, TRUE); errorOccurred_ = FALSE; endOfData_ = FALSE; //we need to set the next state so that the query can get re-executed //and we start from the beginning again. Not sure if pstate will be //valid anymore because insertUpQueueEntry() might have cleared it //already. pstate.step_ = EXTRACT_NOT_STARTED; //exit out now and not break. return WORK_OK; } break; default: { ex_assert(FALSE, "Invalid state in ExHdfsFastExtractTcb "); } break; } // switch(pstate.step_) } // while return WORK_OK; #endif }//ExHdfsFastExtractTcb::work()