// main() // int main(int argc, char **argv) { uint64_t acc=0; uint64_t readBlocks=0; // read size ib blocks uint64_t readSize=0; // read size ib bytes uint64_t readBufferSz=0; const uint64_t blockSize=8192; char* alignedbuff=0; boost::scoped_array<char> realbuff; const unsigned pageSize = 4096; //getpagesize(); WriteEngine::FileOp fFileOp; BRM::OID_t oid; char fname[256]; struct timespec tm; struct timespec tm2; struct timespec tm3; struct timespec starttm; struct timespec endtm; struct timespec tottm; bool odirect=true; int fd = 0; char response='Y'; if (argc <= 1) { cerr << "usage: testread <oid> <buffer size in blocks>" << endl; return -1; } oid=atoi(argv[1]); if (oid <=0) exit(-1); if (argc >=2) { readBlocks = atoi(argv[2]); if (readBlocks <= 0) readBlocks = 8; } if (argc >=4) { odirect=false; } readSize=readBlocks*blockSize; readBufferSz=readSize+pageSize; realbuff.reset(new char[readBufferSz]); if (realbuff.get() == 0) { cerr << "thr_popper: Can't allocate space for a whole extent in memory" << endl; return 0; } if (fFileOp.getFileName(oid, fname) != WriteEngine::NO_ERROR) { fname[0]=0; throw std::runtime_error("fileOp.getFileName failed"); } else { cout << "Reading oid: " << oid << " od: " << odirect << " file: " << fname << endl; } #if __LP64__ alignedbuff=(char*)((((ptrdiff_t)realbuff.get() >> 12) << 12) + pageSize); #else alignedbuff=(char*)(((((ptrdiff_t)realbuff.get() >> 12) << 12) & 0xffffffff) + pageSize); #endif erydbassert(((ptrdiff_t)alignedbuff - (ptrdiff_t)realbuff.get()) < (ptrdiff_t)pageSize); erydbassert(((ptrdiff_t)alignedbuff % pageSize) == 0); if (odirect) fd=open(fname, O_RDONLY|O_DIRECT|O_LARGEFILE|O_NOATIME); else fd=open(fname, O_RDONLY|O_LARGEFILE|O_NOATIME); if (fd<0) { cerr << "Open failed" << endl; perror("open"); throw runtime_error("Error opening file"); } while (toupper(response) != 'N') { uint64_t i=1; uint64_t rCnt=0; clock_gettime(CLOCK_REALTIME, &starttm); while (i!=0) { //clock_gettime(CLOCK_REALTIME, &tm); i = pread(fd, alignedbuff, readSize, acc); //clock_gettime(CLOCK_REALTIME, &tm2); erydbassert(i==0||i==readSize); erydbassert(i%pageSize==0); erydbassert(acc%pageSize==0); if (i < 0 && errno == EINTR) { timespec_sub(tm, tm2, tm3); cout << "* " << i << " " << right << setw(2) << setfill(' ') << tm3.tv_sec << "." << right << setw(9) << setfill('0') << tm3.tv_nsec << endl; continue; } else if (i < 0) { timespec_sub(tm, tm2, tm3); cout << "* i: " << i << " sz: " << readSize << " acc: " << acc << right << setw(2) << setfill(' ') << tm3.tv_sec << " " << right << tm3.tv_nsec << endl; perror("pread"); //make loop exit i=0; } acc += i; if (i>0) rCnt++; //timespec_sub(tm, tm2, tm3); //cout // << i << " " // << right << setw(2) << setfill(' ') << tm3.tv_sec << " " // << right << tm3.tv_nsec // << endl; } // while(acc... clock_gettime(CLOCK_REALTIME, &endtm); timespec_sub(starttm, endtm, tottm); cout << "Total reads: " << rCnt << " sz: " << acc/(1024*1024) << "MB" << " tm: " << tottm.tv_sec << "secs " << tottm.tv_nsec << "ns" << endl; cout << "Repeat the last scan[Y,N]?" << endl; cin >> response; acc=0; } // while response... close(fd); return 0; } //main
void RedistributeWorkerThread::handleDataStart(SBS& sbs, size_t& size) { char fileName[WriteEngine::FILE_NAME_SIZE]; try { // extract the control data for the segment file RedistributeDataControl dc; if (sbs->length() >= sizeof(RedistributeDataControl)) { memcpy(&dc, sbs->buf(), sizeof(RedistributeDataControl)); sbs->advance(sizeof(RedistributeDataControl)); size = dc.size; } else { ostringstream oss; oss << "Short message, length=" << sbs->length(); fErrorMsg = oss.str(); fErrorCode = RED_EC_WKR_MSG_SHORT; logMessage(fErrorMsg, __LINE__); throw runtime_error(fErrorMsg); } // create and open the file for writing. WriteEngine::FileOp fileOp; // just to get filename, not for file operations int rc = fileOp.oid2FileName(dc.oid, fileName, true, dc.dbroot, dc.partition, dc.segment); if (rc == WriteEngine::NO_ERROR) { ostringstream oss; oss << "=>redistributing: " << fileName << ", oid=" << dc.oid << ", db=" << dc.dbroot << ", part=" << dc.partition << ", seg=" << dc.segment << " from db=" << fMsgHeader.destination; // fMsgHeader has swapped source and destination. logMessage(oss.str(), __LINE__); } else { fErrorCode = RED_EC_OID_TO_FILENAME; ostringstream oss; oss << "Failed to get file name: oid=" << dc.oid << ", dbroot=" << dc.dbroot << ", partition=" << dc.partition << ", segment=" << dc.segment; fErrorMsg = oss.str(); logMessage(fErrorMsg, __LINE__); throw runtime_error(fErrorMsg); } if (fNewFilePtr != NULL) closeFile(fNewFilePtr); errno = 0; fNewFilePtr = fopen(fileName, "wb"); if (fNewFilePtr != NULL) { ostringstream oss; oss << "open " << fileName << ", oid=" << dc.oid << ", dbroot=" << dc.dbroot << ", partition=" << dc.partition << ", segment=" << dc.segment << ". " << fNewFilePtr; logMessage(oss.str(), __LINE__); } else { int e = errno; fErrorCode = RED_EC_OPEN_FILE_FAIL; ostringstream oss; oss << "Failed to open " << fileName << ", oid=" << dc.oid << ", dbroot=" << dc.dbroot << ", partition=" << dc.partition << ", segment=" << dc.segment << ". " << strerror(e) << " (" << e << ")"; fErrorMsg = oss.str(); logMessage(fErrorMsg, __LINE__); throw runtime_error(fErrorMsg); } // set output buffering errno = 0; if (setvbuf(fNewFilePtr, fWriteBuffer.get(), _IOFBF, CHUNK_SIZE)) { int e = errno; ostringstream oss; oss << "Failed to set i/o buffer: " << strerror(e) << " (" << e << ")"; fErrorMsg = oss.str(); logMessage(fErrorMsg, __LINE__); // not throwing an exception now. } // add to set for remove after abort addToDirSet(fileName, false); // do a fseek will show the right size, but will not actually allocate the continuous block. // do write 4k block till file size. char buf[PRE_ALLOC_SIZE] = {1}; size_t nmemb = size / PRE_ALLOC_SIZE; while (nmemb-- > 0) { errno = 0; size_t n = fwrite(buf, PRE_ALLOC_SIZE, 1, fNewFilePtr); if (n != 1) { int e = errno; ostringstream oss; oss << "Fail to preallocate file: " << strerror(e) << " (" << e << ")"; fErrorMsg = oss.str(); fErrorCode = RED_EC_FWRITE_FAIL; logMessage(fErrorMsg, __LINE__); throw runtime_error(fErrorMsg); } } // move back to beging to write real data fflush(fNewFilePtr); rewind(fNewFilePtr); } catch (const std::exception& ex) { // NACK size = -1; logMessage(ex.what(), __LINE__); } catch (...) { // NACK size = -1; } // ack file size fMsgHeader.messageId = RED_DATA_ACK; fBs.restart(); fBs << (ByteStream::byte) WriteEngine::WE_SVR_REDISTRIBUTE; // dummy, keep for now. fBs.append((const ByteStream::byte*) &fMsgHeader, sizeof(fMsgHeader)); fBs << size; fIOSocket.write(fBs); // reset to count the data received size = 0; sbs.reset(); }
int RedistributeWorkerThread::sendData() { WriteEngine::FileOp fileOp; // just to get filename, not for file operations bool remotePM = (fMyId.second != fPeerId.second); uint32_t dbroot = fPlanEntry.source; uint32_t partition = fPlanEntry.partition; int16_t source = fPlanEntry.source; int16_t dest = fPlanEntry.destination; IDBDataFile::Types fileType = (IDBPolicy::useHdfs() ? IDBDataFile::HDFS : IDBDataFile::UNBUFFERED); IDBFileSystem& fs = IDBFileSystem::getFs( fileType ); if ((remotePM) && (fileType != IDBDataFile::HDFS)) { if (connectToWes(fPeerId.second) != 0) { fErrorCode = RED_EC_CONNECT_FAIL; ostringstream oss; oss << "Failed to connect to PM" << fPeerId.second << " from PM" << fMyId.second; fErrorMsg = oss.str(); logMessage(fErrorMsg, __LINE__); return fErrorCode; } // start to send each segment file uint32_t seq = 0; ByteStream bs; // start conversion with peer, hand shaking. RedistributeMsgHeader header(dest, source, seq++, RED_DATA_INIT); bs << (ByteStream::byte) WriteEngine::WE_SVR_REDISTRIBUTE; bs.append((const ByteStream::byte*) &header, sizeof(header)); fMsgQueueClient->write(bs); SBS sbs = fMsgQueueClient->read(); if (!checkDataTransferAck(sbs, 0)) return fErrorCode; for (vector<int64_t>::iterator i = fOids.begin(); i != fOids.end(); i++) { for (set<int16_t>::iterator j = fSegments.begin(); j != fSegments.end(); ++j) { char fileName[WriteEngine::FILE_NAME_SIZE]; int rc = fileOp.oid2FileName(*i, fileName, false, dbroot, partition, *j); if (rc == WriteEngine::NO_ERROR) { ostringstream oss; oss << "<=redistributing: " << fileName << ", oid=" << *i << ", db=" << source << ", part=" << partition << ", seg=" << *j << " to db=" << dest; logMessage(oss.str(), __LINE__); } else { fErrorCode = RED_EC_OID_TO_FILENAME; ostringstream oss; oss << "Failed to get file name: oid=" << *i << ", dbroot=" << dbroot << ", partition=" << partition << ", segment=" << *j; fErrorMsg = oss.str(); logMessage(fErrorMsg, __LINE__); return fErrorCode; } if (fOldFilePtr != NULL) closeFile(fOldFilePtr); errno = 0; FILE* fOldFilePtr = fopen(fileName, "rb"); if (fOldFilePtr != NULL) { ostringstream oss; oss << "open " << fileName << ", oid=" << *i << ", dbroot=" << dbroot << ", partition=" << partition << ", segment=" << *j << ". " << fOldFilePtr; logMessage(oss.str(), __LINE__); } else { int e = errno; fErrorCode = RED_EC_OPEN_FILE_FAIL; ostringstream oss; oss << "Failed to open " << fileName << ", oid=" << *i << ", dbroot=" << dbroot << ", partition=" << partition << ", segment=" << *j << ". " << strerror(e) << " (" << e << ")"; fErrorMsg = oss.str(); logMessage(fErrorMsg, __LINE__); return fErrorCode; } // add to set for remove after commit addToDirSet(fileName, true); char chunk[CHUNK_SIZE]; errno = 0; fseek(fOldFilePtr, 0, SEEK_END); // go to end of file long fileSize = ftell(fOldFilePtr); // get current file size if (fileSize < 0) { int e = errno; ostringstream oss; oss << "Fail to tell file size: " << strerror(e) << " (" << e << ")"; fErrorMsg = oss.str(); fErrorCode = RED_EC_FSEEK_FAIL; logMessage(fErrorMsg, __LINE__); return fErrorCode; } // send start message to have the file of fileSize created at target dbroot. bs.restart(); RedistributeMsgHeader header(dest, source, seq++, RED_DATA_START); RedistributeDataControl dataControl(*i, dest, partition, *j, fileSize); bs << (ByteStream::byte) WriteEngine::WE_SVR_REDISTRIBUTE; bs.append((const ByteStream::byte*) &header, sizeof(header)); bs.append((const ByteStream::byte*) &dataControl, sizeof(dataControl)); fMsgQueueClient->write(bs); sbs = fMsgQueueClient->read(); if (!checkDataTransferAck(sbs, fileSize)) return fErrorCode; // now send the file chunk by chunk. rewind(fOldFilePtr); int64_t bytesLeft = fileSize; size_t bytesSend = CHUNK_SIZE; header.messageId = RED_DATA_CONT; while (bytesLeft > 0) { if (fStopAction) { closeFile(fOldFilePtr); fOldFilePtr = NULL; return RED_EC_USER_STOP; } if (bytesLeft < (long) CHUNK_SIZE) bytesSend = bytesLeft; errno = 0; size_t n = fread(chunk, 1, bytesSend, fOldFilePtr); if (n != bytesSend) { int e = errno; ostringstream oss; oss << "Fail to read: " << strerror(e) << " (" << e << ")"; fErrorMsg = oss.str(); fErrorCode = RED_EC_FREAD_FAIL; logMessage(fErrorMsg, __LINE__); return fErrorCode; } header.sequenceNum = seq++; bs.restart(); bs << (ByteStream::byte) WriteEngine::WE_SVR_REDISTRIBUTE; bs.append((const ByteStream::byte*) &header, sizeof(header)); bs << (size_t) bytesSend; bs.append((const ByteStream::byte*) chunk, bytesSend); fMsgQueueClient->write(bs); sbs = fMsgQueueClient->read(); if (!checkDataTransferAck(sbs, bytesSend)) return fErrorCode; bytesLeft -= bytesSend; } closeFile(fOldFilePtr); fOldFilePtr = NULL; header.messageId = RED_DATA_FINISH; header.sequenceNum = seq++; bs.restart(); bs << (ByteStream::byte) WriteEngine::WE_SVR_REDISTRIBUTE; bs.append((const ByteStream::byte*) &header, sizeof(header)); bs << (uint64_t) fileSize; fMsgQueueClient->write(bs); sbs = fMsgQueueClient->read(); if (!checkDataTransferAck(sbs, fileSize)) return fErrorCode; } // segments } // for oids } // remote peer non-hdfs else // local or HDFS file copy { std::map<int,std::string> rootToPathMap; // use cp, in case failed in middle. May consider to use rename if possible. for (vector<int64_t>::iterator i = fOids.begin(); i != fOids.end(); i++) { for (set<int16_t>::iterator j = fSegments.begin(); j != fSegments.end(); ++j) { if (fStopAction) return RED_EC_USER_STOP; if (fileType == IDBDataFile::HDFS) // HDFS file copy { string sourceName; int rc = buildFullHdfsPath( rootToPathMap, // map of root to path *i, // OID source, // dbroot partition, // partition *j, // segment sourceName ); // full path name if (rc != 0) { fErrorCode = RED_EC_OID_TO_FILENAME; ostringstream oss; oss << "Failed to get src file name: oid=" << *i << ", dbroot=" << source << ", partition=" << partition << ", segment=" << *j; fErrorMsg = oss.str(); logMessage(fErrorMsg, __LINE__); return fErrorCode; } string destName; rc = buildFullHdfsPath( rootToPathMap, // map of root to path *i, // OID dest, // dbroot partition, // partition *j, // segment destName ); // full path name if (rc != 0) { fErrorCode = RED_EC_OID_TO_FILENAME; ostringstream oss; oss << "Failed to get dest file name: oid=" << *i << ", dbroot=" << dest << ", partition=" << partition << ", segment=" << *j; fErrorMsg = oss.str(); logMessage(fErrorMsg, __LINE__); return fErrorCode; } ostringstream oss; oss << "<=redistributing(hdfs): " << sourceName << ", oid=" << *i << ", db=" << source << ", part=" << partition << ", seg=" << *j << " to db=" << dest; logMessage(oss.str(), __LINE__); // add to set for remove after commit/abort addToDirSet(sourceName.c_str(), true); addToDirSet(destName.c_str(), false); int ret = fs.copyFile(sourceName.c_str(), destName.c_str()); if (ret != 0) { fErrorCode = RED_EC_COPY_FILE_FAIL; ostringstream oss; oss << "Failed to copy " << sourceName << " to " << destName << "; error is: " << strerror(errno); fErrorMsg = oss.str(); logMessage(fErrorMsg, __LINE__); return fErrorCode; } } else // local file copy { char sourceName[WriteEngine::FILE_NAME_SIZE]; int rc = fileOp.oid2FileName(*i, sourceName, false, source, partition, *j); if (rc != WriteEngine::NO_ERROR) { fErrorCode = RED_EC_OID_TO_FILENAME; ostringstream oss; oss << "Failed to get file name: oid=" << *i << ", dbroot=" << source << ", partition=" << partition << ", segment=" << *j; fErrorMsg = oss.str(); logMessage(fErrorMsg, __LINE__); return fErrorCode; } char destName[WriteEngine::FILE_NAME_SIZE]; rc = fileOp.oid2FileName(*i, destName, true, dest, partition, *j); if (rc != WriteEngine::NO_ERROR) { fErrorCode = RED_EC_OID_TO_FILENAME; ostringstream oss; oss << "Failed to get file name: oid=" << *i << ", dbroot=" << dest << ", partition=" << partition << ", segment=" << *j; fErrorMsg = oss.str(); logMessage(fErrorMsg, __LINE__); return fErrorCode; } ostringstream oss; oss << "<=redistributing(copy): " << sourceName << ", oid=" << *i << ", db=" << source << ", part=" << partition << ", seg=" << *j << " to db=" << dest; logMessage(oss.str(), __LINE__); // add to set for remove after commit/abort addToDirSet(sourceName, true); addToDirSet(destName, false); // Using boost::copy_file() instead of IDBFileSystem::copy- // File() so we can capture/report any boost exception error // msg that IDBFileSystem::copyFile() currently swallows. try { filesystem::copy_file(sourceName, destName); } #if BOOST_VERSION >= 105200 catch(filesystem::filesystem_error& e) #else catch(filesystem::basic_filesystem_error<filesystem::path>& e) #endif { fErrorCode = RED_EC_COPY_FILE_FAIL; ostringstream oss; oss << "Failed to copy " << sourceName << " to " << destName << "; error is: " << e.what(); fErrorMsg = oss.str(); logMessage(fErrorMsg, __LINE__); return fErrorCode; } } } // segment } // oid } // !remote return 0; }
void operator() () { WriteEngine::FileOp fFileOp; char frealbuff[freadBufferSz]; memset(frealbuff, 0, freadBufferSz); if (frealbuff==0) { cerr << "thr_popper: Can't allocate space for a whole extent in memory" << endl; return; } if (fFileOp.getFileName(foid, fname) != WriteEngine::NO_ERROR) { fname[0]=0; throw std::runtime_error("fileOp.getFileName failed"); } else { cout << "Reading oid: " << foid << " od: " << fodirect << " file: " << fname << endl; } #if __LP64__ falignedbuff=(char*)((((ptrdiff_t)frealbuff >> 12) << 12) + fpageSize); #else falignedbuff=(char*)(((((ptrdiff_t)frealbuff >> 12) << 12) & 0xffffffff) + fpageSize); #endif idbassert(((ptrdiff_t)falignedbuff - (ptrdiff_t)frealbuff) < (ptrdiff_t)fpageSize); idbassert(((ptrdiff_t)falignedbuff % fpageSize) == 0); if (fodirect) fd=open(fname, O_RDONLY|O_DIRECT|O_LARGEFILE|O_NOATIME); else fd=open(fname, O_RDONLY|O_LARGEFILE|O_NOATIME); if (fd<0) { cerr << "Open failed" << endl; perror("open"); throw runtime_error("Error opening file"); } uint64_t i=1; uint64_t rCnt=0; clock_gettime(CLOCK_REALTIME, &fstarttm); while (i>0) { clock_gettime(CLOCK_REALTIME, &ftm); i = pread(fd, falignedbuff, freadSize, facc); clock_gettime(CLOCK_REALTIME, &ftm2); idbassert(i==0||i==freadSize); idbassert(i%fpageSize==0); idbassert(facc%fpageSize==0); if (i < 0 && errno == EINTR) { timespec_sub(ftm, ftm2, ftm3); cout << "* " << i << " " << right << setw(2) << setfill(' ') << ftm3.tv_sec << "." << right << setw(9) << setfill('0') << ftm3.tv_nsec << endl; continue; } else if (i < 0) { timespec_sub(ftm, ftm2, ftm3); cout << "* i: " << i << " sz: " << freadSize << " acc: " << facc << right << setw(2) << setfill(' ') << ftm3.tv_sec << " " << right << ftm3.tv_nsec << endl; perror("pread"); } facc += i; if (i>0) rCnt++; /** timespec_sub(ftm, ftm2, ftm3); cout << rCnt << " " << facc/(1024*1024) << right << setw(2) << setfill(' ') << ftm3.tv_sec << "." << right << ftm3.tv_nsec << " i: " << i/(1024*1024) << endl; **/ } // while(acc... clock_gettime(CLOCK_REALTIME, &fendtm); timespec_sub(fstarttm, fendtm, ftottm); cout << "Total reads: " << rCnt << " sz: " << facc/(1024*1024) << "MB" << " tm: " << ftottm.tv_sec << "secs " << ftottm.tv_nsec << "ns" << endl; facc=0; close(fd); } // operator()