void RedistributeWorkerThread::handleDataCont(SBS& sbs, size_t& size) { size_t ack = 0; try { size_t bytesRcvd = 0; *sbs >> bytesRcvd; if (bytesRcvd != sbs->length()) { ostringstream oss; oss << "Incorrect data length: " << sbs->length() << ", expecting " << bytesRcvd; fErrorMsg = oss.str(); fErrorCode = RED_EC_BS_TOO_SHORT; logMessage(fErrorMsg, __LINE__); throw runtime_error(fErrorMsg); } errno = 0; size_t n = fwrite(sbs->buf(), 1, bytesRcvd, fNewFilePtr); if (n != bytesRcvd) { int e = errno; ostringstream oss; oss << "Fail to write file: " << strerror(e) << " (" << e << ")"; fErrorMsg = oss.str(); fErrorCode = RED_EC_FWRITE_FAIL; logMessage(fErrorMsg, __LINE__); throw runtime_error(fErrorMsg); } ack = bytesRcvd; size += ack; } catch (const std::exception&) { // NACK size = -1; } catch (...) { // NACK ack = -1; } // ack received data sbs.reset(); fMsgHeader.messageId = RED_DATA_ACK; fBs.restart(); fBs << (ByteStream::byte) WriteEngine::WE_SVR_REDISTRIBUTE; // dummy, keep for now. fBs.append((const ByteStream::byte*) &fMsgHeader, sizeof(fMsgHeader)); fBs << ack; fIOSocket.write(fBs); }
void RedistributeWorkerThread::handleDataAbort(SBS& sbs, size_t& size) { // close open file if (fNewFilePtr != NULL) closeFile(fNewFilePtr); IDBFileSystem& fs = IDBFileSystem::getFs( (IDBPolicy::useHdfs() ? IDBDataFile::HDFS : IDBDataFile::UNBUFFERED) ); // remove local files for (set<string>::iterator i = fNewDirSet.begin(); i != fNewDirSet.end(); i++) { fs.remove(i->c_str()); // ignoring return code } // send ack sbs.reset(); size_t ack = 0; fMsgHeader.messageId = RED_DATA_ACK; fBs.restart(); fBs << (ByteStream::byte) WriteEngine::WE_SVR_REDISTRIBUTE; // dummy, keep for now. fBs.append((const ByteStream::byte*) &fMsgHeader, sizeof(fMsgHeader)); fBs << ack; fIOSocket.write(fBs); }
bool RedistributeWorkerThread::checkDataTransferAck(SBS& sbs, size_t size) { if (sbs->length() == 0) { ostringstream oss; oss << "Zero byte read, Network error."; fErrorMsg = oss.str(); logMessage(fErrorMsg, __LINE__); fErrorCode = RED_EC_NETWORK_FAIL; } else if (sbs->length() < (sizeof(RedistributeMsgHeader) + 1)) { ostringstream oss; oss << "Short message, length=" << sbs->length(); fErrorMsg = oss.str(); logMessage(fErrorMsg, __LINE__); fErrorCode = RED_EC_WKR_MSG_SHORT; } else { // Need check header info ByteStream::byte wesMsgId; *sbs >> wesMsgId; //const RedistributeMsgHeader* h = (const RedistributeMsgHeader*) sbs->buf(); sbs->advance(sizeof(RedistributeMsgHeader)); size_t ack; *sbs >> ack; if (ack != size) { ostringstream oss; oss << "Acked size does not match request: " << ack << "/" << size; fErrorMsg = oss.str(); logMessage(fErrorMsg, __LINE__); fErrorCode = RED_EC_SIZE_NACK; } } sbs.reset(); return (fErrorCode == RED_EC_OK); }
void RedistributeWorkerThread::handleDataCommit(SBS& sbs, size_t& size) { size_t ack = 0; sbs.reset(); fMsgHeader.messageId = RED_DATA_ACK; fBs.restart(); fBs << (ByteStream::byte) WriteEngine::WE_SVR_REDISTRIBUTE; // dummy, keep for now. fBs.append((const ByteStream::byte*) &fMsgHeader, sizeof(fMsgHeader)); fBs << ack; fIOSocket.write(fBs); }
void RedistributeWorkerThread::handleDataFinish(SBS& sbs, size_t& size) { size_t ack = 0; // close open file closeFile(fNewFilePtr); fNewFilePtr = NULL; try { size_t fileSize = 0; *sbs >> fileSize; if (fileSize != size) { ostringstream oss; oss << "File size not match: local=" << size << ", remote=" << fileSize; fErrorMsg = oss.str(); fErrorCode = RED_EC_FILE_SIZE_NOT_MATCH; logMessage(fErrorMsg, __LINE__); throw runtime_error(fErrorMsg); } ack = size; } catch (const std::exception&) { // NACK size = -1; } catch (...) { // NACK ack = -1; } // ack received data sbs.reset(); fMsgHeader.messageId = RED_DATA_ACK; fBs.restart(); fBs << (ByteStream::byte) WriteEngine::WE_SVR_REDISTRIBUTE; // dummy, keep for now. fBs.append((const ByteStream::byte*) &fMsgHeader, sizeof(fMsgHeader)); fBs << ack; fIOSocket.write(fBs); }
void WEClients::read(uint32_t key, SBS &bs) { boost::shared_ptr<MQE> mqe; //Find the StepMsgQueueList for this session mutex::scoped_lock lk(fMlock); MessageQueueMap::iterator map_tok = fSessionMessages.find(key); if(map_tok == fSessionMessages.end()) { ostringstream os; //cout << " reading for key " << key << " not found" << endl; os << "WEClient: attempt to read(bs) from a nonexistent queue\n"; throw runtime_error(os.str()); } mqe = map_tok->second; lk.unlock(); //this method can block: you can't hold any locks here... (void)mqe->queue.pop(&bs); if (!bs) bs.reset(new ByteStream()); }
void RedistributeWorkerThread::handleDataStart(SBS& sbs, size_t& size) { char fileName[WriteEngine::FILE_NAME_SIZE]; try { // extract the control data for the segment file RedistributeDataControl dc; if (sbs->length() >= sizeof(RedistributeDataControl)) { memcpy(&dc, sbs->buf(), sizeof(RedistributeDataControl)); sbs->advance(sizeof(RedistributeDataControl)); size = dc.size; } else { ostringstream oss; oss << "Short message, length=" << sbs->length(); fErrorMsg = oss.str(); fErrorCode = RED_EC_WKR_MSG_SHORT; logMessage(fErrorMsg, __LINE__); throw runtime_error(fErrorMsg); } // create and open the file for writing. WriteEngine::FileOp fileOp; // just to get filename, not for file operations int rc = fileOp.oid2FileName(dc.oid, fileName, true, dc.dbroot, dc.partition, dc.segment); if (rc == WriteEngine::NO_ERROR) { ostringstream oss; oss << "=>redistributing: " << fileName << ", oid=" << dc.oid << ", db=" << dc.dbroot << ", part=" << dc.partition << ", seg=" << dc.segment << " from db=" << fMsgHeader.destination; // fMsgHeader has swapped source and destination. logMessage(oss.str(), __LINE__); } else { fErrorCode = RED_EC_OID_TO_FILENAME; ostringstream oss; oss << "Failed to get file name: oid=" << dc.oid << ", dbroot=" << dc.dbroot << ", partition=" << dc.partition << ", segment=" << dc.segment; fErrorMsg = oss.str(); logMessage(fErrorMsg, __LINE__); throw runtime_error(fErrorMsg); } if (fNewFilePtr != NULL) closeFile(fNewFilePtr); errno = 0; fNewFilePtr = fopen(fileName, "wb"); if (fNewFilePtr != NULL) { ostringstream oss; oss << "open " << fileName << ", oid=" << dc.oid << ", dbroot=" << dc.dbroot << ", partition=" << dc.partition << ", segment=" << dc.segment << ". " << fNewFilePtr; logMessage(oss.str(), __LINE__); } else { int e = errno; fErrorCode = RED_EC_OPEN_FILE_FAIL; ostringstream oss; oss << "Failed to open " << fileName << ", oid=" << dc.oid << ", dbroot=" << dc.dbroot << ", partition=" << dc.partition << ", segment=" << dc.segment << ". " << strerror(e) << " (" << e << ")"; fErrorMsg = oss.str(); logMessage(fErrorMsg, __LINE__); throw runtime_error(fErrorMsg); } // set output buffering errno = 0; if (setvbuf(fNewFilePtr, fWriteBuffer.get(), _IOFBF, CHUNK_SIZE)) { int e = errno; ostringstream oss; oss << "Failed to set i/o buffer: " << strerror(e) << " (" << e << ")"; fErrorMsg = oss.str(); logMessage(fErrorMsg, __LINE__); // not throwing an exception now. } // add to set for remove after abort addToDirSet(fileName, false); // do a fseek will show the right size, but will not actually allocate the continuous block. // do write 4k block till file size. char buf[PRE_ALLOC_SIZE] = {1}; size_t nmemb = size / PRE_ALLOC_SIZE; while (nmemb-- > 0) { errno = 0; size_t n = fwrite(buf, PRE_ALLOC_SIZE, 1, fNewFilePtr); if (n != 1) { int e = errno; ostringstream oss; oss << "Fail to preallocate file: " << strerror(e) << " (" << e << ")"; fErrorMsg = oss.str(); fErrorCode = RED_EC_FWRITE_FAIL; logMessage(fErrorMsg, __LINE__); throw runtime_error(fErrorMsg); } } // move back to beging to write real data fflush(fNewFilePtr); rewind(fNewFilePtr); } catch (const std::exception& ex) { // NACK size = -1; logMessage(ex.what(), __LINE__); } catch (...) { // NACK size = -1; } // ack file size fMsgHeader.messageId = RED_DATA_ACK; fBs.restart(); fBs << (ByteStream::byte) WriteEngine::WE_SVR_REDISTRIBUTE; // dummy, keep for now. fBs.append((const ByteStream::byte*) &fMsgHeader, sizeof(fMsgHeader)); fBs << size; fIOSocket.write(fBs); // reset to count the data received size = 0; sbs.reset(); }
void RedistributeWorkerThread::handleData() { bool done = false; bool noExcept = true; SBS sbs; size_t size = 0; try { do { switch (fMsgHeader.messageId) { case RED_DATA_INIT: handleDataInit(); break; case RED_DATA_START: handleDataStart(sbs, size); break; case RED_DATA_CONT: handleDataCont(sbs, size); break; case RED_DATA_FINISH: handleDataFinish(sbs, size); break; case RED_DATA_COMMIT: handleDataCommit(sbs, size); done = true; break; case RED_DATA_ABORT: handleDataAbort(sbs, size); done = true; break; default: handleUnknowDataMsg(); done = true; break; } if (!done) { // get next message sbs = fIOSocket.read(); ByteStream::byte wesMsgId; *sbs >> wesMsgId; memcpy(&fMsgHeader, sbs->buf(), sizeof(RedistributeMsgHeader)); sbs->advance(sizeof(RedistributeMsgHeader)); } } while (!done); // will break after commit/abort or catch an exception } catch (const std::exception& ex) { noExcept = false; logMessage(ex.what(), __LINE__); } catch (...) { noExcept = false; } if (noExcept == false) { // send NACK to peer fBs.restart(); fBs << (ByteStream::byte) WriteEngine::WE_SVR_REDISTRIBUTE; // dummy, keep for now. fBs.append((const ByteStream::byte*) &fMsgHeader, sizeof(fMsgHeader)); fBs << ((size_t) -1); fIOSocket.write(fBs); } fBs.reset(); fIOSocket.close(); }
void WEClients::Listen ( boost::shared_ptr<MessageQueueClient> client, uint32_t connIndex) { SBS sbs; try { while ( Busy() ) { //TODO: This call blocks so setting Busy() in another thread doesn't work here... sbs = client->read(); if ( sbs->length() != 0 ) { //cout << "adding data to connIndex " << endl; addDataToOutput(sbs, connIndex); } else // got zero bytes on read, nothing more will come { if (closingConnection > 0) { return; } cerr << "WEC got 0 byte message for object " << this << endl; goto Error; } } return; } catch (std::exception& e) { cerr << "WEC Caught EXCEPTION: " << e.what() << endl; goto Error; } catch (...) { cerr << "WEC Caught UNKNOWN EXCEPT" << endl; goto Error; } Error: // error condition! push 0 length bs to messagequeuemap and // eventually let jobstep error out. mutex::scoped_lock lk(fMlock); MessageQueueMap::iterator map_tok; sbs.reset(new ByteStream(0)); for (map_tok = fSessionMessages.begin(); map_tok != fSessionMessages.end(); ++map_tok) { map_tok->second->queue.clear(); (void)atomicops::atomicInc(&map_tok->second->unackedWork[0]); map_tok->second->queue.push(sbs); } lk.unlock(); // reset the pmconnection map { mutex::scoped_lock onErrLock(fOnErrMutex); string moduleName = client->moduleName(); ClientList::iterator itor = fPmConnections.begin(); while (itor != fPmConnections.end()) { if (moduleName == (itor->second)->moduleName()) { (fPmConnections[itor->first]).reset(); pmCount--; ostringstream oss; //oss << "WECLIENT: connection to is reset and this = " << this << " and pmcount is decremented."; //writeToLog(__FILE__, __LINE__, oss.str() , LOG_TYPE_DEBUG); } itor++; } // send alarm SNMPManager alarmMgr; // string alarmItem = sin_addr2String(client->serv_addr().sin_addr); string alarmItem = client->addr2String(); alarmItem.append(" WriteEngineServer"); alarmMgr.sendAlarmReport(alarmItem.c_str(), oam::CONN_FAILURE, SET); } return; }