Exemplo n.º 1
0
void TupleUnion::readInput(uint32_t which)
{
	/* The handling of the output got a little kludgey with the string table enhancement.
	 * When there is no distinct check, the outputs are all generated independently of
	 * each other locally in this fcn.  When there is a distinct check, threads
	 * share the output, which is built in the 'rowMemory' vector rather than in
	 * thread-local memory.  Building the result in a common space allows us to
	 * store 8-byte offsets in rowMemory rather than 16-bytes for absolute pointers.
	 */

	RowGroupDL *dl = NULL;
	bool more = true;
	RGData inRGData, outRGData, *tmpRGData;
	uint32_t it = numeric_limits<uint32_t>::max();
	RowGroup l_inputRG, l_outputRG, l_tmpRG;
	Row inRow, outRow, tmpRow;
	bool distinct;
	uint64_t memUsageBefore, memUsageAfter, memDiff;
	StepTeleStats sts;
	sts.query_uuid = fQueryUuid;
	sts.step_uuid = fStepUuid;

	l_outputRG = outputRG;
	dl = inputs[which];
	l_inputRG = inputRGs[which];
	l_inputRG.initRow(&inRow);
	l_outputRG.initRow(&outRow);
	distinct = distinctFlags[which];

	if (distinct) {
		l_tmpRG = outputRG;
		tmpRGData = &normalizedData[which];
		l_tmpRG.initRow(&tmpRow);
		l_tmpRG.setData(tmpRGData);
		l_tmpRG.resetRowGroup(0);
		l_tmpRG.getRow(0, &tmpRow);
	}
	else {
		outRGData = RGData(l_outputRG);
		l_outputRG.setData(&outRGData);
		l_outputRG.resetRowGroup(0);
		l_outputRG.getRow(0, &outRow);
	}

	try {

		it = dl->getIterator();
		more = dl->next(it, &inRGData);

		if (dlTimes.FirstReadTime().tv_sec==0)
            dlTimes.setFirstReadTime();

		if (fStartTime == -1)
		{
			sts.msg_type = StepTeleStats::ST_START;
			sts.total_units_of_work = 1;
			postStepStartTele(sts);
		}

		while (more && !cancelled()) {
			/*
				normalize each row
				  if distinct flag is set
					copy the row into the output and test for uniqueness
					  if unique, increment the row count
				  else
				    copy the row into the output & inc row count
			*/
			l_inputRG.setData(&inRGData);
			l_inputRG.getRow(0, &inRow);
			if (distinct) {
				memDiff = 0;
				l_tmpRG.resetRowGroup(0);
				l_tmpRG.getRow(0, &tmpRow);
				l_tmpRG.setRowCount(l_inputRG.getRowCount());
				for (uint32_t i = 0; i < l_inputRG.getRowCount(); i++, inRow.nextRow(),
				  tmpRow.nextRow())
					normalize(inRow, &tmpRow);

				l_tmpRG.getRow(0, &tmpRow);
				{
					mutex::scoped_lock lk(uniquerMutex);
					getOutput(&l_outputRG, &outRow, &outRGData);
					memUsageBefore = allocator.getMemUsage();
					for (uint32_t i = 0; i < l_tmpRG.getRowCount(); i++, tmpRow.nextRow()) {
						pair<Uniquer_t::iterator, bool> inserted;
						inserted = uniquer->insert(RowPosition(which | RowPosition::normalizedFlag, i));
						if (inserted.second) {
							copyRow(tmpRow, &outRow);
							const_cast<RowPosition &>(*(inserted.first)) = RowPosition(rowMemory.size()-1, l_outputRG.getRowCount());
							memDiff += outRow.getRealSize();
							addToOutput(&outRow, &l_outputRG, true, outRGData);
						}
					}
					memUsageAfter = allocator.getMemUsage();
					memDiff += (memUsageAfter - memUsageBefore);
					memUsage += memDiff;
				}
				if (!rm.getMemory(memDiff, sessionMemLimit)) {
					fLogger->logMessage(logging::LOG_TYPE_INFO, logging::ERR_UNION_TOO_BIG);
					if (status() == 0) // preserve existing error code
					{
						errorMessage(logging::IDBErrorInfo::instance()->errorMsg(
							logging::ERR_UNION_TOO_BIG));
						status(logging::ERR_UNION_TOO_BIG);
					}
					abort();
				}
			}
			else {
				for (uint32_t i = 0; i < l_inputRG.getRowCount(); i++, inRow.nextRow()) {
					normalize(inRow, &outRow);
					addToOutput(&outRow, &l_outputRG, false, outRGData);
				}
			}
			more = dl->next(it, &inRGData);
		}
	}
	catch(...)
	{
		if (status() == 0)
		{
			errorMessage("Union step caught an unknown exception.");
			status(logging::unionStepErr);
			fLogger->logMessage(logging::LOG_TYPE_CRITICAL, "Union step caught an unknown exception.");
		}
		abort();
	}

	/* make sure that the input was drained before exiting.  This can happen if the
	query was aborted */
	if (dl && it != numeric_limits<uint32_t>::max())
		while (more)
			more = dl->next(it, &inRGData);

	{
		mutex::scoped_lock lock1(uniquerMutex);
		mutex::scoped_lock lock2(sMutex);
		if (!distinct && l_outputRG.getRowCount() > 0)
			output->insert(outRGData);
		if (distinct) {
			getOutput(&l_outputRG, &outRow, &outRGData);
			if (++distinctDone == distinctCount && l_outputRG.getRowCount() > 0)
				output->insert(outRGData);
		}
		if (++runnersDone == fInputJobStepAssociation.outSize())
		{
			output->endOfInput();

			sts.msg_type = StepTeleStats::ST_SUMMARY;
			sts.total_units_of_work = sts.units_of_work_completed = 1;
			sts.rows = fRowsReturned;
			postStepSummaryTele(sts);

			if (traceOn())
			{
				dlTimes.setLastReadTime();
				dlTimes.setEndOfInputTime();

				time_t t = time (0);
				char timeString[50];
				ctime_r (&t, timeString);
				timeString[strlen (timeString )-1] = '\0';
				ostringstream logStr;
				logStr  << "ses:" << fSessionId << " st: " << fStepId << " finished at "
						<< timeString << "; total rows returned-" << fRowsReturned << endl
						<< "\t1st read " << dlTimes.FirstReadTimeString()
						<< "; EOI " << dlTimes.EndOfInputTimeString() << "; runtime-"
						<< JSTimeStamp::tsdiffstr(dlTimes.EndOfInputTime(),dlTimes.FirstReadTime())
						<< "s;\n\tUUID " << uuids::to_string(fStepUuid) << endl
						<< "\tJob completion status " << status() << endl;
				logEnd(logStr.str().c_str());
				fExtendedInfo += logStr.str();
				formatMiniStats();
			}
		}
	}
}
Exemplo n.º 2
0
void TupleUnion::readInput(uint which)
{
	/* The handling of the output got a little kludgey with the string table enhancement.
	 * When there is no distinct check, the outputs are all generated independently of
	 * each other locally in this fcn.  When there is a distinct check, threads
	 * share the output, which is built in the 'rowMemory' vector rather than in 
	 * thread-local memory.  Building the result in a common space allows us to
	 * store 8-byte offsets in rowMemory rather than 16-bytes for absolute pointers.
	 */
	
	RowGroupDL *dl = NULL;
	bool more = true;
	RGData inRGData, outRGData, *tmpRGData;
	uint it = numeric_limits<uint>::max();
	RowGroup l_inputRG, l_outputRG, l_tmpRG;
	Row inRow, outRow, tmpRow;
	bool distinct;
	uint64_t memUsageBefore, memUsageAfter, memDiff;

	
	l_outputRG = outputRG;
	dl = inputs[which];
	l_inputRG = inputRGs[which];
	l_inputRG.initRow(&inRow);
	l_outputRG.initRow(&outRow);
	distinct = distinctFlags[which];

	if (distinct) {
		l_tmpRG = outputRG;
		tmpRGData = &normalizedData[which];
		l_tmpRG.initRow(&tmpRow);
		l_tmpRG.setData(tmpRGData);
		l_tmpRG.resetRowGroup(0);
		l_tmpRG.getRow(0, &tmpRow);
	}
	else {
		outRGData = RGData(l_outputRG);
		l_outputRG.setData(&outRGData);
		l_outputRG.resetRowGroup(0);
		l_outputRG.getRow(0, &outRow);
	}
		
	try {

		it = dl->getIterator();
		more = dl->next(it, &inRGData);

		while (more && !cancelled()) {
			/*
				normalize each row
				  if distinct flag is set 
					copy the row into the output and test for uniqueness
					  if unique, increment the row count
				  else
				    copy the row into the output & inc row count
			*/
			l_inputRG.setData(&inRGData);
			l_inputRG.getRow(0, &inRow);
			if (distinct) {
				memDiff = 0;
				l_tmpRG.resetRowGroup(0);
				l_tmpRG.getRow(0, &tmpRow);
				l_tmpRG.setRowCount(l_inputRG.getRowCount());
				for (uint i = 0; i < l_inputRG.getRowCount(); i++, inRow.nextRow(),
				  tmpRow.nextRow())
					normalize(inRow, &tmpRow);

				l_tmpRG.getRow(0, &tmpRow);
				{
					mutex::scoped_lock lk(uniquerMutex);
					getOutput(&l_outputRG, &outRow, &outRGData);
					memUsageBefore = allocator.getMemUsage();
					for (uint i = 0; i < l_tmpRG.getRowCount(); i++, tmpRow.nextRow()) {
						pair<Uniquer_t::iterator, bool> inserted;
						inserted = uniquer->insert(RowPosition(which | RowPosition::normalizedFlag, i));
						if (inserted.second) {
							copyRow(tmpRow, &outRow);
							const_cast<RowPosition &>(*(inserted.first)) = RowPosition(rowMemory.size()-1, l_outputRG.getRowCount());
							memDiff += outRow.getRealSize();
							addToOutput(&outRow, &l_outputRG, true, outRGData);
						}
					}
					memUsageAfter = allocator.getMemUsage();
					memDiff += (memUsageAfter - memUsageBefore);
					memUsage += memDiff;
				}
				if (!rm.getMemory(memDiff)) {
					fLogger->logMessage(logging::LOG_TYPE_INFO, logging::ERR_UNION_TOO_BIG);
					status(logging::ERR_UNION_TOO_BIG);
					abort();
				}
			}
			else {
				for (uint i = 0; i < l_inputRG.getRowCount(); i++, inRow.nextRow()) {
					normalize(inRow, &outRow);
					addToOutput(&outRow, &l_outputRG, false, outRGData);
				}
			}
			more = dl->next(it, &inRGData);
		}
	}
	catch(...)
	{
		if (status() == 0)
		{
			status(logging::unionStepErr);
			fLogger->logMessage(logging::LOG_TYPE_CRITICAL, "Union step caught an unknown exception.");
		}
		abort();
	}

	/* make sure that the input was drained before exiting.  This can happen if the
	query was aborted */
	if (dl && it != numeric_limits<uint>::max())
		while (more)
			more = dl->next(it, &inRGData);

	{
		mutex::scoped_lock lock1(uniquerMutex);
		mutex::scoped_lock lock2(sMutex);
		if (!distinct && l_outputRG.getRowCount() > 0)
			output->insert(outRGData);
		if (distinct) {
			getOutput(&l_outputRG, &outRow, &outRGData);
			if (++distinctDone == distinctCount && l_outputRG.getRowCount() > 0)
				output->insert(outRGData);
		}
		if (++runnersDone == fInputJobStepAssociation.outSize())
			output->endOfInput();
	}
}