int RedistributeWorkerThread::sendData() { WriteEngine::FileOp fileOp; // just to get filename, not for file operations bool remotePM = (fMyId.second != fPeerId.second); uint32_t dbroot = fPlanEntry.source; uint32_t partition = fPlanEntry.partition; int16_t source = fPlanEntry.source; int16_t dest = fPlanEntry.destination; IDBDataFile::Types fileType = (IDBPolicy::useHdfs() ? IDBDataFile::HDFS : IDBDataFile::UNBUFFERED); IDBFileSystem& fs = IDBFileSystem::getFs( fileType ); if ((remotePM) && (fileType != IDBDataFile::HDFS)) { if (connectToWes(fPeerId.second) != 0) { fErrorCode = RED_EC_CONNECT_FAIL; ostringstream oss; oss << "Failed to connect to PM" << fPeerId.second << " from PM" << fMyId.second; fErrorMsg = oss.str(); logMessage(fErrorMsg, __LINE__); return fErrorCode; } // start to send each segment file uint32_t seq = 0; ByteStream bs; // start conversion with peer, hand shaking. RedistributeMsgHeader header(dest, source, seq++, RED_DATA_INIT); bs << (ByteStream::byte) WriteEngine::WE_SVR_REDISTRIBUTE; bs.append((const ByteStream::byte*) &header, sizeof(header)); fMsgQueueClient->write(bs); SBS sbs = fMsgQueueClient->read(); if (!checkDataTransferAck(sbs, 0)) return fErrorCode; for (vector<int64_t>::iterator i = fOids.begin(); i != fOids.end(); i++) { for (set<int16_t>::iterator j = fSegments.begin(); j != fSegments.end(); ++j) { char fileName[WriteEngine::FILE_NAME_SIZE]; int rc = fileOp.oid2FileName(*i, fileName, false, dbroot, partition, *j); if (rc == WriteEngine::NO_ERROR) { ostringstream oss; oss << "<=redistributing: " << fileName << ", oid=" << *i << ", db=" << source << ", part=" << partition << ", seg=" << *j << " to db=" << dest; logMessage(oss.str(), __LINE__); } else { fErrorCode = RED_EC_OID_TO_FILENAME; ostringstream oss; oss << "Failed to get file name: oid=" << *i << ", dbroot=" << dbroot << ", partition=" << partition << ", segment=" << *j; fErrorMsg = oss.str(); logMessage(fErrorMsg, __LINE__); return fErrorCode; } if (fOldFilePtr != NULL) closeFile(fOldFilePtr); errno = 0; FILE* fOldFilePtr = fopen(fileName, "rb"); if (fOldFilePtr != NULL) { ostringstream oss; oss << "open " << fileName << ", oid=" << *i << ", dbroot=" << dbroot << ", partition=" << partition << ", segment=" << *j << ". " << fOldFilePtr; logMessage(oss.str(), __LINE__); } else { int e = errno; fErrorCode = RED_EC_OPEN_FILE_FAIL; ostringstream oss; oss << "Failed to open " << fileName << ", oid=" << *i << ", dbroot=" << dbroot << ", partition=" << partition << ", segment=" << *j << ". " << strerror(e) << " (" << e << ")"; fErrorMsg = oss.str(); logMessage(fErrorMsg, __LINE__); return fErrorCode; } // add to set for remove after commit addToDirSet(fileName, true); char chunk[CHUNK_SIZE]; errno = 0; fseek(fOldFilePtr, 0, SEEK_END); // go to end of file long fileSize = ftell(fOldFilePtr); // get current file size if (fileSize < 0) { int e = errno; ostringstream oss; oss << "Fail to tell file size: " << strerror(e) << " (" << e << ")"; fErrorMsg = oss.str(); fErrorCode = RED_EC_FSEEK_FAIL; logMessage(fErrorMsg, __LINE__); return fErrorCode; } // send start message to have the file of fileSize created at target dbroot. bs.restart(); RedistributeMsgHeader header(dest, source, seq++, RED_DATA_START); RedistributeDataControl dataControl(*i, dest, partition, *j, fileSize); bs << (ByteStream::byte) WriteEngine::WE_SVR_REDISTRIBUTE; bs.append((const ByteStream::byte*) &header, sizeof(header)); bs.append((const ByteStream::byte*) &dataControl, sizeof(dataControl)); fMsgQueueClient->write(bs); sbs = fMsgQueueClient->read(); if (!checkDataTransferAck(sbs, fileSize)) return fErrorCode; // now send the file chunk by chunk. rewind(fOldFilePtr); int64_t bytesLeft = fileSize; size_t bytesSend = CHUNK_SIZE; header.messageId = RED_DATA_CONT; while (bytesLeft > 0) { if (fStopAction) { closeFile(fOldFilePtr); fOldFilePtr = NULL; return RED_EC_USER_STOP; } if (bytesLeft < (long) CHUNK_SIZE) bytesSend = bytesLeft; errno = 0; size_t n = fread(chunk, 1, bytesSend, fOldFilePtr); if (n != bytesSend) { int e = errno; ostringstream oss; oss << "Fail to read: " << strerror(e) << " (" << e << ")"; fErrorMsg = oss.str(); fErrorCode = RED_EC_FREAD_FAIL; logMessage(fErrorMsg, __LINE__); return fErrorCode; } header.sequenceNum = seq++; bs.restart(); bs << (ByteStream::byte) WriteEngine::WE_SVR_REDISTRIBUTE; bs.append((const ByteStream::byte*) &header, sizeof(header)); bs << (size_t) bytesSend; bs.append((const ByteStream::byte*) chunk, bytesSend); fMsgQueueClient->write(bs); sbs = fMsgQueueClient->read(); if (!checkDataTransferAck(sbs, bytesSend)) return fErrorCode; bytesLeft -= bytesSend; } closeFile(fOldFilePtr); fOldFilePtr = NULL; header.messageId = RED_DATA_FINISH; header.sequenceNum = seq++; bs.restart(); bs << (ByteStream::byte) WriteEngine::WE_SVR_REDISTRIBUTE; bs.append((const ByteStream::byte*) &header, sizeof(header)); bs << (uint64_t) fileSize; fMsgQueueClient->write(bs); sbs = fMsgQueueClient->read(); if (!checkDataTransferAck(sbs, fileSize)) return fErrorCode; } // segments } // for oids } // remote peer non-hdfs else // local or HDFS file copy { std::map<int,std::string> rootToPathMap; // use cp, in case failed in middle. May consider to use rename if possible. for (vector<int64_t>::iterator i = fOids.begin(); i != fOids.end(); i++) { for (set<int16_t>::iterator j = fSegments.begin(); j != fSegments.end(); ++j) { if (fStopAction) return RED_EC_USER_STOP; if (fileType == IDBDataFile::HDFS) // HDFS file copy { string sourceName; int rc = buildFullHdfsPath( rootToPathMap, // map of root to path *i, // OID source, // dbroot partition, // partition *j, // segment sourceName ); // full path name if (rc != 0) { fErrorCode = RED_EC_OID_TO_FILENAME; ostringstream oss; oss << "Failed to get src file name: oid=" << *i << ", dbroot=" << source << ", partition=" << partition << ", segment=" << *j; fErrorMsg = oss.str(); logMessage(fErrorMsg, __LINE__); return fErrorCode; } string destName; rc = buildFullHdfsPath( rootToPathMap, // map of root to path *i, // OID dest, // dbroot partition, // partition *j, // segment destName ); // full path name if (rc != 0) { fErrorCode = RED_EC_OID_TO_FILENAME; ostringstream oss; oss << "Failed to get dest file name: oid=" << *i << ", dbroot=" << dest << ", partition=" << partition << ", segment=" << *j; fErrorMsg = oss.str(); logMessage(fErrorMsg, __LINE__); return fErrorCode; } ostringstream oss; oss << "<=redistributing(hdfs): " << sourceName << ", oid=" << *i << ", db=" << source << ", part=" << partition << ", seg=" << *j << " to db=" << dest; logMessage(oss.str(), __LINE__); // add to set for remove after commit/abort addToDirSet(sourceName.c_str(), true); addToDirSet(destName.c_str(), false); int ret = fs.copyFile(sourceName.c_str(), destName.c_str()); if (ret != 0) { fErrorCode = RED_EC_COPY_FILE_FAIL; ostringstream oss; oss << "Failed to copy " << sourceName << " to " << destName << "; error is: " << strerror(errno); fErrorMsg = oss.str(); logMessage(fErrorMsg, __LINE__); return fErrorCode; } } else // local file copy { char sourceName[WriteEngine::FILE_NAME_SIZE]; int rc = fileOp.oid2FileName(*i, sourceName, false, source, partition, *j); if (rc != WriteEngine::NO_ERROR) { fErrorCode = RED_EC_OID_TO_FILENAME; ostringstream oss; oss << "Failed to get file name: oid=" << *i << ", dbroot=" << source << ", partition=" << partition << ", segment=" << *j; fErrorMsg = oss.str(); logMessage(fErrorMsg, __LINE__); return fErrorCode; } char destName[WriteEngine::FILE_NAME_SIZE]; rc = fileOp.oid2FileName(*i, destName, true, dest, partition, *j); if (rc != WriteEngine::NO_ERROR) { fErrorCode = RED_EC_OID_TO_FILENAME; ostringstream oss; oss << "Failed to get file name: oid=" << *i << ", dbroot=" << dest << ", partition=" << partition << ", segment=" << *j; fErrorMsg = oss.str(); logMessage(fErrorMsg, __LINE__); return fErrorCode; } ostringstream oss; oss << "<=redistributing(copy): " << sourceName << ", oid=" << *i << ", db=" << source << ", part=" << partition << ", seg=" << *j << " to db=" << dest; logMessage(oss.str(), __LINE__); // add to set for remove after commit/abort addToDirSet(sourceName, true); addToDirSet(destName, false); // Using boost::copy_file() instead of IDBFileSystem::copy- // File() so we can capture/report any boost exception error // msg that IDBFileSystem::copyFile() currently swallows. try { filesystem::copy_file(sourceName, destName); } #if BOOST_VERSION >= 105200 catch(filesystem::filesystem_error& e) #else catch(filesystem::basic_filesystem_error<filesystem::path>& e) #endif { fErrorCode = RED_EC_COPY_FILE_FAIL; ostringstream oss; oss << "Failed to copy " << sourceName << " to " << destName << "; error is: " << e.what(); fErrorMsg = oss.str(); logMessage(fErrorMsg, __LINE__); return fErrorCode; } } } // segment } // oid } // !remote return 0; }
void RedistributeWorkerThread::handleDataStart(SBS& sbs, size_t& size) { char fileName[WriteEngine::FILE_NAME_SIZE]; try { // extract the control data for the segment file RedistributeDataControl dc; if (sbs->length() >= sizeof(RedistributeDataControl)) { memcpy(&dc, sbs->buf(), sizeof(RedistributeDataControl)); sbs->advance(sizeof(RedistributeDataControl)); size = dc.size; } else { ostringstream oss; oss << "Short message, length=" << sbs->length(); fErrorMsg = oss.str(); fErrorCode = RED_EC_WKR_MSG_SHORT; logMessage(fErrorMsg, __LINE__); throw runtime_error(fErrorMsg); } // create and open the file for writing. WriteEngine::FileOp fileOp; // just to get filename, not for file operations int rc = fileOp.oid2FileName(dc.oid, fileName, true, dc.dbroot, dc.partition, dc.segment); if (rc == WriteEngine::NO_ERROR) { ostringstream oss; oss << "=>redistributing: " << fileName << ", oid=" << dc.oid << ", db=" << dc.dbroot << ", part=" << dc.partition << ", seg=" << dc.segment << " from db=" << fMsgHeader.destination; // fMsgHeader has swapped source and destination. logMessage(oss.str(), __LINE__); } else { fErrorCode = RED_EC_OID_TO_FILENAME; ostringstream oss; oss << "Failed to get file name: oid=" << dc.oid << ", dbroot=" << dc.dbroot << ", partition=" << dc.partition << ", segment=" << dc.segment; fErrorMsg = oss.str(); logMessage(fErrorMsg, __LINE__); throw runtime_error(fErrorMsg); } if (fNewFilePtr != NULL) closeFile(fNewFilePtr); errno = 0; fNewFilePtr = fopen(fileName, "wb"); if (fNewFilePtr != NULL) { ostringstream oss; oss << "open " << fileName << ", oid=" << dc.oid << ", dbroot=" << dc.dbroot << ", partition=" << dc.partition << ", segment=" << dc.segment << ". " << fNewFilePtr; logMessage(oss.str(), __LINE__); } else { int e = errno; fErrorCode = RED_EC_OPEN_FILE_FAIL; ostringstream oss; oss << "Failed to open " << fileName << ", oid=" << dc.oid << ", dbroot=" << dc.dbroot << ", partition=" << dc.partition << ", segment=" << dc.segment << ". " << strerror(e) << " (" << e << ")"; fErrorMsg = oss.str(); logMessage(fErrorMsg, __LINE__); throw runtime_error(fErrorMsg); } // set output buffering errno = 0; if (setvbuf(fNewFilePtr, fWriteBuffer.get(), _IOFBF, CHUNK_SIZE)) { int e = errno; ostringstream oss; oss << "Failed to set i/o buffer: " << strerror(e) << " (" << e << ")"; fErrorMsg = oss.str(); logMessage(fErrorMsg, __LINE__); // not throwing an exception now. } // add to set for remove after abort addToDirSet(fileName, false); // do a fseek will show the right size, but will not actually allocate the continuous block. // do write 4k block till file size. char buf[PRE_ALLOC_SIZE] = {1}; size_t nmemb = size / PRE_ALLOC_SIZE; while (nmemb-- > 0) { errno = 0; size_t n = fwrite(buf, PRE_ALLOC_SIZE, 1, fNewFilePtr); if (n != 1) { int e = errno; ostringstream oss; oss << "Fail to preallocate file: " << strerror(e) << " (" << e << ")"; fErrorMsg = oss.str(); fErrorCode = RED_EC_FWRITE_FAIL; logMessage(fErrorMsg, __LINE__); throw runtime_error(fErrorMsg); } } // move back to beging to write real data fflush(fNewFilePtr); rewind(fNewFilePtr); } catch (const std::exception& ex) { // NACK size = -1; logMessage(ex.what(), __LINE__); } catch (...) { // NACK size = -1; } // ack file size fMsgHeader.messageId = RED_DATA_ACK; fBs.restart(); fBs << (ByteStream::byte) WriteEngine::WE_SVR_REDISTRIBUTE; // dummy, keep for now. fBs.append((const ByteStream::byte*) &fMsgHeader, sizeof(fMsgHeader)); fBs << size; fIOSocket.write(fBs); // reset to count the data received size = 0; sbs.reset(); }