void RedistributeWorkerThread::handleDataCont(SBS& sbs, size_t& size)
{
	size_t ack = 0;

	try
	{
		size_t bytesRcvd = 0;
		*sbs >> bytesRcvd;

		if (bytesRcvd != sbs->length())
		{
			ostringstream oss;
			oss << "Incorrect data length: " << sbs->length() << ", expecting " << bytesRcvd;
			fErrorMsg = oss.str();
			fErrorCode = RED_EC_BS_TOO_SHORT;
			logMessage(fErrorMsg, __LINE__);
			throw runtime_error(fErrorMsg);
		}

		errno = 0;
		size_t n = fwrite(sbs->buf(), 1, bytesRcvd, fNewFilePtr);
		if (n != bytesRcvd)
		{
			int e = errno;
			ostringstream oss;
			oss << "Fail to write file: " << strerror(e) << " (" << e << ")";
			fErrorMsg = oss.str();
			fErrorCode = RED_EC_FWRITE_FAIL;
			logMessage(fErrorMsg, __LINE__);
			throw runtime_error(fErrorMsg);
		}

		ack = bytesRcvd;
		size += ack;
	}
	catch (const std::exception&)
	{
		// NACK
		size = -1;
	}
	catch (...)
	{
		// NACK
		ack = -1;
	}

	// ack received data
	sbs.reset();
	fMsgHeader.messageId = RED_DATA_ACK;
	fBs.restart();
	fBs << (ByteStream::byte) WriteEngine::WE_SVR_REDISTRIBUTE;  // dummy, keep for now.
	fBs.append((const ByteStream::byte*) &fMsgHeader, sizeof(fMsgHeader));
	fBs << ack;
	fIOSocket.write(fBs);
}
void RedistributeWorkerThread::handleDataStart(SBS& sbs, size_t& size)
{
	char fileName[WriteEngine::FILE_NAME_SIZE];

	try
	{
		// extract the control data for the segment file
		RedistributeDataControl dc;
		if (sbs->length() >= sizeof(RedistributeDataControl))
		{
			memcpy(&dc, sbs->buf(), sizeof(RedistributeDataControl));
			sbs->advance(sizeof(RedistributeDataControl));
			size = dc.size;
		}
		else
		{
			ostringstream oss;
			oss << "Short message, length=" << sbs->length();
			fErrorMsg = oss.str();
			fErrorCode = RED_EC_WKR_MSG_SHORT;
			logMessage(fErrorMsg, __LINE__);
			throw runtime_error(fErrorMsg);
		}

		// create and open the file for writing.
		WriteEngine::FileOp fileOp;  // just to get filename, not for file operations
		int rc = fileOp.oid2FileName(dc.oid, fileName, true, dc.dbroot, dc.partition, dc.segment);
		if (rc == WriteEngine::NO_ERROR)
		{
			ostringstream oss;
			oss << "=>redistributing: " << fileName << ", oid=" << dc.oid << ", db=" << dc.dbroot
				<< ", part=" << dc.partition << ", seg=" << dc.segment << " from db="
				<< fMsgHeader.destination;  // fMsgHeader has swapped source and destination.
			logMessage(oss.str(), __LINE__);
		}
		else
		{
			fErrorCode = RED_EC_OID_TO_FILENAME;
			ostringstream oss;
			oss << "Failed to get file name: oid=" << dc.oid << ", dbroot=" << dc.dbroot
				<< ", partition=" << dc.partition << ", segment=" << dc.segment;
			fErrorMsg = oss.str();
			logMessage(fErrorMsg, __LINE__);
			throw runtime_error(fErrorMsg);
		}

		if (fNewFilePtr != NULL)
			closeFile(fNewFilePtr);

		errno = 0;
		fNewFilePtr = fopen(fileName, "wb");
		if (fNewFilePtr != NULL)
		{
			ostringstream oss;
			oss << "open " << fileName << ", oid=" << dc.oid << ", dbroot="
				<< dc.dbroot << ", partition=" << dc.partition << ", segment=" << dc.segment
				<< ". " << fNewFilePtr;
			logMessage(oss.str(), __LINE__);
		}
		else
		{
			int e = errno;
			fErrorCode = RED_EC_OPEN_FILE_FAIL;
			ostringstream oss;
			oss << "Failed to open " << fileName << ", oid=" << dc.oid << ", dbroot="
				<< dc.dbroot << ", partition=" << dc.partition << ", segment=" << dc.segment
				<< ". " << strerror(e) << " (" << e << ")";
			fErrorMsg = oss.str();
			logMessage(fErrorMsg, __LINE__);
			throw runtime_error(fErrorMsg);
		}

		// set output buffering
		errno = 0;
		if (setvbuf(fNewFilePtr, fWriteBuffer.get(), _IOFBF, CHUNK_SIZE))
		{
			int e = errno;
			ostringstream oss;
			oss << "Failed to set i/o buffer: " << strerror(e) << " (" << e << ")";
			fErrorMsg = oss.str();
			logMessage(fErrorMsg, __LINE__);

			// not throwing an exception now.
		}

		// add to set for remove after abort
		addToDirSet(fileName, false);

		// do a fseek will show the right size, but will not actually allocate the continuous block.
		// do write 4k block till file size.
		char buf[PRE_ALLOC_SIZE] = {1};
		size_t nmemb = size / PRE_ALLOC_SIZE;
		while (nmemb-- > 0)
		{
			errno = 0;
			size_t n = fwrite(buf, PRE_ALLOC_SIZE, 1, fNewFilePtr);
			if (n != 1)
			{
				int e = errno;
				ostringstream oss;
				oss << "Fail to preallocate file: " << strerror(e) << " (" << e << ")";
				fErrorMsg = oss.str();
				fErrorCode = RED_EC_FWRITE_FAIL;
				logMessage(fErrorMsg, __LINE__);
				throw runtime_error(fErrorMsg);
			}
		}

		// move back to beging to write real data
		fflush(fNewFilePtr);
		rewind(fNewFilePtr);
	}
	catch (const std::exception& ex)
	{
		// NACK
		size = -1;
		logMessage(ex.what(), __LINE__);
	}
	catch (...)
	{
		// NACK
		size = -1;
	}

	// ack file size
	fMsgHeader.messageId = RED_DATA_ACK;
	fBs.restart();
	fBs << (ByteStream::byte) WriteEngine::WE_SVR_REDISTRIBUTE;  // dummy, keep for now.
	fBs.append((const ByteStream::byte*) &fMsgHeader, sizeof(fMsgHeader));
	fBs << size;
	fIOSocket.write(fBs);

	// reset to count the data received
	size = 0;
	sbs.reset();
}
void RedistributeWorkerThread::handleData()
{
	bool done = false;
	bool noExcept = true;
	SBS sbs;
	size_t size = 0;

	try
	{
		do
		{
			switch (fMsgHeader.messageId)
			{
				case RED_DATA_INIT:
					handleDataInit();
					break;

				case RED_DATA_START:
					handleDataStart(sbs, size);
					break;

				case RED_DATA_CONT:
					handleDataCont(sbs, size);
					break;

				case RED_DATA_FINISH:
					handleDataFinish(sbs, size);
					break;

				case RED_DATA_COMMIT:
					handleDataCommit(sbs, size);
					done = true;
					break;

				case RED_DATA_ABORT:
					handleDataAbort(sbs, size);
					done = true;
					break;

				default:
					handleUnknowDataMsg();
					done = true;
					break;
			}

			if (!done)
			{
				// get next message
				sbs = fIOSocket.read();
				ByteStream::byte wesMsgId;
				*sbs >> wesMsgId;
				memcpy(&fMsgHeader, sbs->buf(), sizeof(RedistributeMsgHeader));
				sbs->advance(sizeof(RedistributeMsgHeader));
			}
		}
		while (!done);  // will break after commit/abort or catch an exception
	}
	catch (const std::exception& ex)
	{
		noExcept = false;
		logMessage(ex.what(), __LINE__);
	}
	catch (...)
	{
		noExcept = false;
	}

	if (noExcept == false)
	{
		// send NACK to peer
		fBs.restart();
		fBs << (ByteStream::byte) WriteEngine::WE_SVR_REDISTRIBUTE;  // dummy, keep for now.
		fBs.append((const ByteStream::byte*) &fMsgHeader, sizeof(fMsgHeader));
		fBs << ((size_t) -1);
		fIOSocket.write(fBs);
	}

	fBs.reset();
	fIOSocket.close();
}