Example #1
0
// main()
//
int main(int argc, char **argv)
{
	uint64_t acc=0;
	uint64_t readBlocks=0; // read size ib blocks
	uint64_t readSize=0; // read size ib bytes
	uint64_t readBufferSz=0;
	const uint64_t blockSize=8192;
	char* alignedbuff=0;
 	boost::scoped_array<char> realbuff;	
	const unsigned pageSize = 4096; //getpagesize();
	WriteEngine::FileOp fFileOp;
	BRM::OID_t oid;
	char fname[256];
	struct timespec tm;
	struct timespec tm2;
	struct timespec tm3;
	struct timespec starttm;
	struct timespec endtm;
	struct timespec tottm;
	bool odirect=true;
	int fd = 0;
	char response='Y';

	if (argc <= 1) {
		cerr << "usage: testread <oid> <buffer size in blocks>" << endl;
		return -1;
	}

	oid=atoi(argv[1]);
	if (oid <=0)
		exit(-1);

	if (argc >=2) {
		readBlocks = atoi(argv[2]);
		if (readBlocks <= 0)
			readBlocks = 8;
	}

	if (argc >=4) {
		odirect=false;
	}

	readSize=readBlocks*blockSize;
	readBufferSz=readSize+pageSize;

	realbuff.reset(new char[readBufferSz]);
	if (realbuff.get() == 0) {
		cerr << "thr_popper: Can't allocate space for a whole extent in memory" << endl;
		return 0;
	}

	if (fFileOp.getFileName(oid, fname) != WriteEngine::NO_ERROR) {
		fname[0]=0;
		throw std::runtime_error("fileOp.getFileName failed");
	}
	else {
		cout << "Reading oid: " << oid << " od: " << odirect << " file: " << fname << endl;
	}

#if __LP64__
	alignedbuff=(char*)((((ptrdiff_t)realbuff.get() >> 12) << 12) + pageSize);
#else
	alignedbuff=(char*)(((((ptrdiff_t)realbuff.get() >> 12) << 12) & 0xffffffff) + pageSize);
#endif
	erydbassert(((ptrdiff_t)alignedbuff - (ptrdiff_t)realbuff.get()) < (ptrdiff_t)pageSize);
	erydbassert(((ptrdiff_t)alignedbuff % pageSize) == 0);

	if (odirect)
		fd=open(fname, O_RDONLY|O_DIRECT|O_LARGEFILE|O_NOATIME);
	else
		fd=open(fname, O_RDONLY|O_LARGEFILE|O_NOATIME);

	if (fd<0) {
		cerr << "Open failed" << endl;
		perror("open");
		throw runtime_error("Error opening file");
	}

	while (toupper(response) != 'N') {
	uint64_t i=1;
	uint64_t rCnt=0;
	clock_gettime(CLOCK_REALTIME, &starttm);
	while (i!=0) {
		//clock_gettime(CLOCK_REALTIME, &tm);
		i = pread(fd, alignedbuff, readSize, acc);
		//clock_gettime(CLOCK_REALTIME, &tm2);
		erydbassert(i==0||i==readSize);
		erydbassert(i%pageSize==0);
		erydbassert(acc%pageSize==0);
		if (i < 0 && errno == EINTR) {
			timespec_sub(tm, tm2, tm3);
			cout << "* "
				<< i << " "
				<< right << setw(2) << setfill(' ') << tm3.tv_sec << "."
				<< right << setw(9) << setfill('0') << tm3.tv_nsec
				<< endl;
			continue;
		}
		else if (i < 0) {
			timespec_sub(tm, tm2, tm3);
			cout << "* i: "
				<< i << " sz: " << readSize << " acc: " << acc
				<< right << setw(2) << setfill(' ') << tm3.tv_sec << " "
				<< right << tm3.tv_nsec
				<< endl;
			perror("pread");
			//make loop exit
			i=0;
		}

		acc += i;
		if (i>0)
			rCnt++;

		//timespec_sub(tm, tm2, tm3);
		//cout
		//	<< i << " "
		//	<< right << setw(2) << setfill(' ') << tm3.tv_sec << " "
		//	<< right << tm3.tv_nsec
		//	<< endl;

	} // while(acc...

	clock_gettime(CLOCK_REALTIME, &endtm);
	timespec_sub(starttm, endtm, tottm);

	cout << "Total reads: " << rCnt
		<< " sz: " << acc/(1024*1024) << "MB"
		<< " tm: " << tottm.tv_sec << "secs "
		<< tottm.tv_nsec << "ns"
		<< endl;

	cout << "Repeat the last scan[Y,N]?" << endl;
	cin >> response;
	acc=0;
	
	} // while response...

	close(fd);
	return 0;

} //main
void RedistributeWorkerThread::handleDataStart(SBS& sbs, size_t& size)
{
	char fileName[WriteEngine::FILE_NAME_SIZE];

	try
	{
		// extract the control data for the segment file
		RedistributeDataControl dc;
		if (sbs->length() >= sizeof(RedistributeDataControl))
		{
			memcpy(&dc, sbs->buf(), sizeof(RedistributeDataControl));
			sbs->advance(sizeof(RedistributeDataControl));
			size = dc.size;
		}
		else
		{
			ostringstream oss;
			oss << "Short message, length=" << sbs->length();
			fErrorMsg = oss.str();
			fErrorCode = RED_EC_WKR_MSG_SHORT;
			logMessage(fErrorMsg, __LINE__);
			throw runtime_error(fErrorMsg);
		}

		// create and open the file for writing.
		WriteEngine::FileOp fileOp;  // just to get filename, not for file operations
		int rc = fileOp.oid2FileName(dc.oid, fileName, true, dc.dbroot, dc.partition, dc.segment);
		if (rc == WriteEngine::NO_ERROR)
		{
			ostringstream oss;
			oss << "=>redistributing: " << fileName << ", oid=" << dc.oid << ", db=" << dc.dbroot
				<< ", part=" << dc.partition << ", seg=" << dc.segment << " from db="
				<< fMsgHeader.destination;  // fMsgHeader has swapped source and destination.
			logMessage(oss.str(), __LINE__);
		}
		else
		{
			fErrorCode = RED_EC_OID_TO_FILENAME;
			ostringstream oss;
			oss << "Failed to get file name: oid=" << dc.oid << ", dbroot=" << dc.dbroot
				<< ", partition=" << dc.partition << ", segment=" << dc.segment;
			fErrorMsg = oss.str();
			logMessage(fErrorMsg, __LINE__);
			throw runtime_error(fErrorMsg);
		}

		if (fNewFilePtr != NULL)
			closeFile(fNewFilePtr);

		errno = 0;
		fNewFilePtr = fopen(fileName, "wb");
		if (fNewFilePtr != NULL)
		{
			ostringstream oss;
			oss << "open " << fileName << ", oid=" << dc.oid << ", dbroot="
				<< dc.dbroot << ", partition=" << dc.partition << ", segment=" << dc.segment
				<< ". " << fNewFilePtr;
			logMessage(oss.str(), __LINE__);
		}
		else
		{
			int e = errno;
			fErrorCode = RED_EC_OPEN_FILE_FAIL;
			ostringstream oss;
			oss << "Failed to open " << fileName << ", oid=" << dc.oid << ", dbroot="
				<< dc.dbroot << ", partition=" << dc.partition << ", segment=" << dc.segment
				<< ". " << strerror(e) << " (" << e << ")";
			fErrorMsg = oss.str();
			logMessage(fErrorMsg, __LINE__);
			throw runtime_error(fErrorMsg);
		}

		// set output buffering
		errno = 0;
		if (setvbuf(fNewFilePtr, fWriteBuffer.get(), _IOFBF, CHUNK_SIZE))
		{
			int e = errno;
			ostringstream oss;
			oss << "Failed to set i/o buffer: " << strerror(e) << " (" << e << ")";
			fErrorMsg = oss.str();
			logMessage(fErrorMsg, __LINE__);

			// not throwing an exception now.
		}

		// add to set for remove after abort
		addToDirSet(fileName, false);

		// do a fseek will show the right size, but will not actually allocate the continuous block.
		// do write 4k block till file size.
		char buf[PRE_ALLOC_SIZE] = {1};
		size_t nmemb = size / PRE_ALLOC_SIZE;
		while (nmemb-- > 0)
		{
			errno = 0;
			size_t n = fwrite(buf, PRE_ALLOC_SIZE, 1, fNewFilePtr);
			if (n != 1)
			{
				int e = errno;
				ostringstream oss;
				oss << "Fail to preallocate file: " << strerror(e) << " (" << e << ")";
				fErrorMsg = oss.str();
				fErrorCode = RED_EC_FWRITE_FAIL;
				logMessage(fErrorMsg, __LINE__);
				throw runtime_error(fErrorMsg);
			}
		}

		// move back to beging to write real data
		fflush(fNewFilePtr);
		rewind(fNewFilePtr);
	}
	catch (const std::exception& ex)
	{
		// NACK
		size = -1;
		logMessage(ex.what(), __LINE__);
	}
	catch (...)
	{
		// NACK
		size = -1;
	}

	// ack file size
	fMsgHeader.messageId = RED_DATA_ACK;
	fBs.restart();
	fBs << (ByteStream::byte) WriteEngine::WE_SVR_REDISTRIBUTE;  // dummy, keep for now.
	fBs.append((const ByteStream::byte*) &fMsgHeader, sizeof(fMsgHeader));
	fBs << size;
	fIOSocket.write(fBs);

	// reset to count the data received
	size = 0;
	sbs.reset();
}
int RedistributeWorkerThread::sendData()
{
	WriteEngine::FileOp fileOp;  // just to get filename, not for file operations
	bool remotePM = (fMyId.second != fPeerId.second);
	uint32_t dbroot = fPlanEntry.source;
	uint32_t partition = fPlanEntry.partition;
	int16_t source = fPlanEntry.source;
	int16_t dest = fPlanEntry.destination;

	IDBDataFile::Types fileType = 
		(IDBPolicy::useHdfs() ? IDBDataFile::HDFS : IDBDataFile::UNBUFFERED);
	IDBFileSystem& fs = IDBFileSystem::getFs( fileType );

	if ((remotePM) && (fileType != IDBDataFile::HDFS))
	{
		if (connectToWes(fPeerId.second) != 0)
		{
			fErrorCode = RED_EC_CONNECT_FAIL;
			ostringstream oss;
			oss << "Failed to connect to PM" << fPeerId.second << " from PM" << fMyId.second;
			fErrorMsg = oss.str();
			logMessage(fErrorMsg, __LINE__);
			return fErrorCode;
		}

		// start to send each segment file
		uint32_t seq = 0;
		ByteStream bs;

		// start conversion with peer, hand shaking.
		RedistributeMsgHeader header(dest, source, seq++, RED_DATA_INIT);
		bs << (ByteStream::byte) WriteEngine::WE_SVR_REDISTRIBUTE;
		bs.append((const ByteStream::byte*) &header, sizeof(header));
		fMsgQueueClient->write(bs);

		SBS sbs = fMsgQueueClient->read();
		if (!checkDataTransferAck(sbs, 0))
			return fErrorCode;

		for (vector<int64_t>::iterator i = fOids.begin(); i != fOids.end(); i++)
		{
			for (set<int16_t>::iterator j = fSegments.begin(); j != fSegments.end(); ++j)
			{
				char fileName[WriteEngine::FILE_NAME_SIZE];
				int rc = fileOp.oid2FileName(*i, fileName, false, dbroot, partition, *j);
				if (rc == WriteEngine::NO_ERROR)
				{
					ostringstream oss;
					oss << "<=redistributing: " << fileName << ", oid=" << *i << ", db="
						<< source << ", part=" << partition << ", seg=" << *j << " to db="
						<< dest;
					logMessage(oss.str(), __LINE__);
				}
				else
				{
					fErrorCode = RED_EC_OID_TO_FILENAME;
					ostringstream oss;
					oss << "Failed to get file name: oid=" << *i << ", dbroot=" << dbroot
						<< ", partition=" << partition << ", segment=" << *j;
					fErrorMsg = oss.str();
					logMessage(fErrorMsg, __LINE__);
					return fErrorCode;
				}

				if (fOldFilePtr != NULL)
					closeFile(fOldFilePtr);

				errno = 0;
				FILE* fOldFilePtr = fopen(fileName, "rb");
				if (fOldFilePtr != NULL)
				{
					ostringstream oss;
					oss << "open " << fileName << ", oid=" << *i << ", dbroot=" << dbroot
						<< ", partition=" << partition << ", segment=" << *j
						<< ". " << fOldFilePtr;
					logMessage(oss.str(), __LINE__);
				}
				else
				{
					int e = errno;
					fErrorCode = RED_EC_OPEN_FILE_FAIL;
					ostringstream oss;
					oss << "Failed to open " << fileName << ", oid=" << *i << ", dbroot=" << dbroot
						<< ", partition=" << partition << ", segment=" << *j
						<< ". " << strerror(e) << " (" << e << ")";
					fErrorMsg = oss.str();
					logMessage(fErrorMsg, __LINE__);
					return fErrorCode;
				}

				// add to set for remove after commit
				addToDirSet(fileName, true);

				char chunk[CHUNK_SIZE];
				errno = 0;
				fseek(fOldFilePtr, 0, SEEK_END);       // go to end of file
				long fileSize = ftell(fOldFilePtr);    // get current file size
				if (fileSize < 0)
				{
					int e = errno;
					ostringstream oss;
					oss << "Fail to tell file size: " << strerror(e) << " (" << e << ")";
					fErrorMsg = oss.str();
					fErrorCode = RED_EC_FSEEK_FAIL;
					logMessage(fErrorMsg, __LINE__);
					return fErrorCode;
				}

				// send start message to have the file of fileSize created at target dbroot.
				bs.restart();
				RedistributeMsgHeader header(dest, source, seq++, RED_DATA_START);
				RedistributeDataControl dataControl(*i, dest, partition, *j, fileSize);
				bs << (ByteStream::byte) WriteEngine::WE_SVR_REDISTRIBUTE;
				bs.append((const ByteStream::byte*) &header, sizeof(header));
				bs.append((const ByteStream::byte*) &dataControl, sizeof(dataControl));
				fMsgQueueClient->write(bs);

				sbs = fMsgQueueClient->read();
				if (!checkDataTransferAck(sbs, fileSize))
					return fErrorCode;

				// now send the file chunk by chunk.
				rewind(fOldFilePtr);
				int64_t bytesLeft = fileSize;
				size_t  bytesSend = CHUNK_SIZE;
				header.messageId = RED_DATA_CONT;
				while (bytesLeft > 0)
				{
					if (fStopAction)
					{
						closeFile(fOldFilePtr);
						fOldFilePtr = NULL;
						return RED_EC_USER_STOP;
					}

					if (bytesLeft < (long) CHUNK_SIZE)
						bytesSend = bytesLeft;

					errno = 0;
					size_t n = fread(chunk, 1, bytesSend, fOldFilePtr);
					if (n != bytesSend)
					{
						int e = errno;
						ostringstream oss;
						oss << "Fail to read: " << strerror(e) << " (" << e << ")";
						fErrorMsg = oss.str();
						fErrorCode = RED_EC_FREAD_FAIL;
						logMessage(fErrorMsg, __LINE__);
						return fErrorCode;
					}

					header.sequenceNum = seq++;
					bs.restart();
					bs << (ByteStream::byte) WriteEngine::WE_SVR_REDISTRIBUTE;
   				 	bs.append((const ByteStream::byte*) &header, sizeof(header));
   				 	bs << (size_t) bytesSend;
					bs.append((const ByteStream::byte*) chunk, bytesSend);
   				 	fMsgQueueClient->write(bs);

					sbs = fMsgQueueClient->read();
					if (!checkDataTransferAck(sbs, bytesSend))
						return fErrorCode;

					bytesLeft -= bytesSend;
				}

				closeFile(fOldFilePtr);
				fOldFilePtr = NULL;

				header.messageId = RED_DATA_FINISH;
				header.sequenceNum = seq++;
				bs.restart();
				bs << (ByteStream::byte) WriteEngine::WE_SVR_REDISTRIBUTE;
  			 	bs.append((const ByteStream::byte*) &header, sizeof(header));
  			 	bs << (uint64_t) fileSize;
  			 	fMsgQueueClient->write(bs);

				sbs = fMsgQueueClient->read();
				if (!checkDataTransferAck(sbs, fileSize))
					return fErrorCode;

			}  // segments
		}  // for oids
	}   // remote peer non-hdfs
	else                                           // local or HDFS file copy
	{
		std::map<int,std::string> rootToPathMap;

		// use cp, in case failed in middle.  May consider to use rename if possible.
		for (vector<int64_t>::iterator i = fOids.begin(); i != fOids.end(); i++)
		{
			for (set<int16_t>::iterator j = fSegments.begin(); j != fSegments.end(); ++j)
			{
				if (fStopAction)
					return RED_EC_USER_STOP;

				if (fileType == IDBDataFile::HDFS) // HDFS file copy
				{
					string sourceName;
					int rc = buildFullHdfsPath(
						rootToPathMap, // map of root to path
                        *i,            // OID
						source,        // dbroot
						partition,     // partition
						*j,            // segment
						sourceName );  // full path name
					if (rc != 0)
					{
						fErrorCode = RED_EC_OID_TO_FILENAME;
						ostringstream oss;
						oss << "Failed to get src file name: oid=" << *i
							<< ", dbroot=" << source
							<< ", partition=" << partition
							<< ", segment=" << *j;
						fErrorMsg = oss.str();
						logMessage(fErrorMsg, __LINE__);
						return fErrorCode;
					}

					string destName;
					rc = buildFullHdfsPath(
						rootToPathMap, // map of root to path
                        *i,            // OID
						dest,          // dbroot
						partition,     // partition
						*j,            // segment
						destName );    // full path name
					if (rc != 0)
					{
						fErrorCode = RED_EC_OID_TO_FILENAME;
						ostringstream oss;
						oss << "Failed to get dest file name: oid=" << *i
							<< ", dbroot=" << dest
							<< ", partition=" << partition
							<< ", segment=" << *j;
						fErrorMsg = oss.str();
						logMessage(fErrorMsg, __LINE__);
						return fErrorCode;
					}

					ostringstream oss;
					oss << "<=redistributing(hdfs): " << sourceName << ", oid="
						<< *i << ", db=" << source << ", part=" << partition
						<< ", seg=" << *j << " to db=" << dest;
					logMessage(oss.str(), __LINE__);

					// add to set for remove after commit/abort
					addToDirSet(sourceName.c_str(), true);
					addToDirSet(destName.c_str(), false);

					int ret = fs.copyFile(sourceName.c_str(), destName.c_str());
					if (ret != 0)
					{
						fErrorCode = RED_EC_COPY_FILE_FAIL;
						ostringstream oss;
						oss << "Failed to copy " << sourceName << " to " <<
							destName << "; error is: " << strerror(errno);
						fErrorMsg = oss.str();
						logMessage(fErrorMsg, __LINE__);
						return fErrorCode;
					}
				}
				else                               // local file copy
				{
					char sourceName[WriteEngine::FILE_NAME_SIZE];
					int rc = fileOp.oid2FileName(*i, sourceName, false, source,
						partition, *j);
					if (rc != WriteEngine::NO_ERROR)
					{
						fErrorCode = RED_EC_OID_TO_FILENAME;
						ostringstream oss;
						oss << "Failed to get file name: oid=" << *i
							<< ", dbroot=" << source
							<< ", partition=" << partition
							<< ", segment=" << *j;
						fErrorMsg = oss.str();
						logMessage(fErrorMsg, __LINE__);
						return fErrorCode;
					}

					char destName[WriteEngine::FILE_NAME_SIZE];
					rc = fileOp.oid2FileName(*i, destName, true,
						dest, partition, *j);
					if (rc != WriteEngine::NO_ERROR)
					{
						fErrorCode = RED_EC_OID_TO_FILENAME;
						ostringstream oss;
						oss << "Failed to get file name: oid=" << *i
							<< ", dbroot=" << dest
							<< ", partition=" << partition
							<< ", segment=" << *j;
						fErrorMsg = oss.str();
						logMessage(fErrorMsg, __LINE__);
						return fErrorCode;
					}

					ostringstream oss;
					oss << "<=redistributing(copy): " << sourceName << ", oid="
						<< *i << ", db=" << source << ", part=" << partition
						<< ", seg=" << *j << " to db=" << dest;
					logMessage(oss.str(), __LINE__);

					// add to set for remove after commit/abort
					addToDirSet(sourceName, true);
					addToDirSet(destName, false);

					// Using boost::copy_file() instead of IDBFileSystem::copy-
					// File() so we can capture/report any boost exception error
					// msg that IDBFileSystem::copyFile() currently swallows.
					try
					{
						filesystem::copy_file(sourceName, destName);
					}
#if BOOST_VERSION >= 105200
					catch(filesystem::filesystem_error& e)
#else
					catch(filesystem::basic_filesystem_error<filesystem::path>& e)
#endif
					{
						fErrorCode = RED_EC_COPY_FILE_FAIL;
						ostringstream oss;
						oss << "Failed to copy " << sourceName << " to " <<
							destName << "; error is: " << e.what();
						fErrorMsg = oss.str();
						logMessage(fErrorMsg, __LINE__);
						return fErrorCode;
					}
				}
			}  // segment
		}  // oid
	}  // !remote

	return 0;
}
Example #4
0
	void operator() ()
	{

		WriteEngine::FileOp fFileOp;
		char frealbuff[freadBufferSz];
		memset(frealbuff, 0, freadBufferSz);
		if (frealbuff==0) {
			cerr << "thr_popper: Can't allocate space for a whole extent in memory" << endl;
			return;
		}

		if (fFileOp.getFileName(foid, fname) != WriteEngine::NO_ERROR) {
			fname[0]=0;
			throw std::runtime_error("fileOp.getFileName failed");
		}
		else {
			cout << "Reading oid: " << foid << " od: " << fodirect << " file: " << fname << endl;
		}

#if __LP64__
		falignedbuff=(char*)((((ptrdiff_t)frealbuff >> 12) << 12) + fpageSize);
#else
		falignedbuff=(char*)(((((ptrdiff_t)frealbuff >> 12) << 12) & 0xffffffff) + fpageSize);
#endif
		idbassert(((ptrdiff_t)falignedbuff - (ptrdiff_t)frealbuff) < (ptrdiff_t)fpageSize);
		idbassert(((ptrdiff_t)falignedbuff % fpageSize) == 0);

		if (fodirect)
			fd=open(fname, O_RDONLY|O_DIRECT|O_LARGEFILE|O_NOATIME);
		else
			fd=open(fname, O_RDONLY|O_LARGEFILE|O_NOATIME);

		if (fd<0) {
			cerr << "Open failed" << endl;
			perror("open");
			throw runtime_error("Error opening file");
		}

		uint64_t i=1;
		uint64_t rCnt=0;

		clock_gettime(CLOCK_REALTIME, &fstarttm);
		while (i>0) {
			clock_gettime(CLOCK_REALTIME, &ftm);
			i = pread(fd, falignedbuff, freadSize, facc);
			clock_gettime(CLOCK_REALTIME, &ftm2);

			idbassert(i==0||i==freadSize);
			idbassert(i%fpageSize==0);
			idbassert(facc%fpageSize==0);

			if (i < 0 && errno == EINTR)
			{
				timespec_sub(ftm, ftm2, ftm3);
				cout << "* "
					<< i << " "
					<< right << setw(2) << setfill(' ') << ftm3.tv_sec << "."
					<< right << setw(9) << setfill('0') << ftm3.tv_nsec
					<< endl;
				continue;
			}
			else if (i < 0)
			{
				timespec_sub(ftm, ftm2, ftm3);
				cout << "* i: "
					<< i << " sz: " << freadSize << " acc: " << facc
					<< right << setw(2) << setfill(' ') << ftm3.tv_sec << " "
					<< right << ftm3.tv_nsec
					<< endl;
				perror("pread");
			}

			facc += i;
			if (i>0)
				rCnt++;
			/**			
			timespec_sub(ftm, ftm2, ftm3);
			cout
				<< rCnt << " " << facc/(1024*1024)
				<< right << setw(2) << setfill(' ') << ftm3.tv_sec << "."
				<< right << ftm3.tv_nsec << " i: " << i/(1024*1024)
				<< endl;
			**/

		} // while(acc...

		clock_gettime(CLOCK_REALTIME, &fendtm);
		timespec_sub(fstarttm, fendtm, ftottm);

		cout << "Total reads: " << rCnt
			<< " sz: " << facc/(1024*1024) << "MB"
			<< " tm: " << ftottm.tv_sec << "secs "
			<< ftottm.tv_nsec << "ns"
			<< endl;

		facc=0;
		close(fd);
	} // operator()