unsigned sendData(const UdpPermitToSendMsg &permit, bool isLocal, TokenBucket *bucket, bool &moreRequested, unsigned &maxPackets) { moreRequested = false; maxPackets = permit.max_data; PointerArray toSend; unsigned totalSent = cleanRetryData(permit, toSend); while (toSend.length() < maxPackets && dataQueued()) { DataBuffer *buffer = popQueuedData(); if (buffer) // Aborted slave queries leave NULL records on queue { UdpPacketHeader *header = (UdpPacketHeader*) buffer->data; toSend.append(buffer); totalSent += header->length; #ifdef __linux__ if (isLocal && (totalSent> 100000)) break; #endif } } maxPackets = toSend.length(); for (unsigned idx = 0; idx < maxPackets; idx++) { DataBuffer *buffer = (DataBuffer *) toSend.item(idx); UdpPacketHeader *header = (UdpPacketHeader*) buffer->data; bool isRetry = (header->udpSequence != 0); if (isRetry) { if (checkTraceLevel(TRACE_RETRY_DATA, 1)) DBGLOG("UdpSender: Resending packet to destination node %u sequence %u", permit.destNodeIndex, header->udpSequence); atomic_inc(&packetsRetried); } else header->udpSequence = nextUdpSequence(); unsigned length = header->length; if (bucket) { MTIME_SECTION(timer, "bucket_wait"); bucket->wait((length / 1024)+1); } try { if (udpSendCompletedInData) { if (idx == maxPackets-1) { // MORE - is this safe ? Any other thread looking at the data right now? Don't _think_ so... if (false && dataQueued()) // Causes some problems because no flow control info gets through at all { moreRequested = true; header->udpSequence |= (UDP_SEQUENCE_COMPLETE|UDP_SEQUENCE_MORE); } else header->udpSequence |= UDP_SEQUENCE_COMPLETE; } } #ifdef _SIMULATE_LOST_PACKETS if (isRetry || (header->udpSequence % 100) != 0) #endif data_socket->write(buffer->data, length); header->udpSequence &= ~UDP_SEQUENCE_BITS; } catch(IException *e) { StringBuffer s; DBGLOG("UdpSender: write exception - write(%p, %u) - %s", buffer->data, length, e->errorMessage(s).str()); e->Release(); } catch(...) { DBGLOG("UdpSender: write exception - unknown exception"); } if (!isRetry && maxRetryData) { unsigned slot = (retryDataIdx + retryDataCount) % maxRetryData; if (retryDataCount < maxRetryData) retryDataCount++; else { if (udpTraceLevel > 0) DBGLOG("Overflow in resend packet buffer for destination node %u - discarding packet sequence %u", permit.destNodeIndex, header->udpSequence); ::Release(retryData[slot]); } retryData[slot] = buffer; } else { ::Release(buffer); } } return totalSent; }
void collapseDuplexGroup(string sorted_duplex_group_file, string collapsed_dg_file, string logFile, string errorFile, int maxGap, int maxTotal) { ofstream LOG(logFile, ios_base::app); ofstream ERR(errorFile, ios_base::app); LOG << "Start to collapseDuplexGroup " << sorted_duplex_group_file << " ===> " << collapsed_dg_file << endl; int merged_dgCount = 0; int firstPossible=0; PointerArray<DuplexGroup *> dgArray; long long fileLines = countFileLines(sorted_duplex_group_file); ProcessAlert alert(fileLines, 100000, 10); ifstream DUPLEXGROUP(sorted_duplex_group_file); char strand1, strand2; string chr1, chr2, supportReads; int start1, end1, start2, end2, support; while(DUPLEXGROUP>>chr1>>start1>>end1>>strand1>>chr2>>start2>>end2>>strand2>>supportReads>>support) { char buffer[200]; sprintf(buffer, "firstPossible: %d, merged_dgCount: %d", firstPossible, merged_dgCount); LOG << alert.alert(string(buffer)); DuplexGroup *dg = new DuplexGroup(chr1, start1, end1, strand1, chr2, start2,end2, strand2, support, supportReads); //dgArray.append(dg); //if(supportReads == "23198;10015265;10015267;10015268") // cout << dgArray[dgArray.arrayLen()-1]->supportRead << endl; int lastDGovarlapped = 0; bool merged = false; for(int idx=firstPossible; idx<dgArray.arrayLen();idx++) { int overlapped = dgArray[idx]->overlapDG(dg, maxGap, maxTotal); /* if(supportReads == "23198;10015265;10015267;10015268" and idx==dgArray.arrayLen()-1) { cout << dgArray[dgArray.arrayLen()-1]->supportRead << endl; cout <<overlapped << endl; } */ if (overlapped == -1){ if(not lastDGovarlapped) firstPossible = idx + 1; } else if(overlapped > 0){ lastDGovarlapped = 1; dgArray[idx]->mergeDuplexGroup(dg); //dgArray.del(dgArray.arrayLen()-1); merged = true; delete dg; merged_dgCount++; break; } else{ lastDGovarlapped = 1; } } if(not merged) dgArray.append(dg); //cout << merged_dgCount << endl; } LOG << alert.finish(); DUPLEXGROUP.close(); ofstream COLLAPSE(collapsed_dg_file); for(int i=0; i<dgArray.arrayLen(); i++) COLLAPSE << dgArray[i]->chr1 << "\t" << dgArray[i]->start1 << "\t" << dgArray[i]->end1 << "\t" << dgArray[i]->strand1 << "\t" << dgArray[i]->chr2 << "\t" << dgArray[i]->start2 << "\t" << dgArray[i]->end2 << "\t" << dgArray[i]->strand2 << "\t" << dgArray[i]->supportRead << "\t" << dgArray[i]->support << "\n"; COLLAPSE.close(); LOG.close(); ERR.close(); }
unsigned cleanRetryData(const UdpPermitToSendMsg &permit, PointerArray &retries) { // Any saved packets < lastReceived that are not listed as missing can be deleted SpinBlock b(lock); unsigned totalData = 0; if (checkTraceLevel(TRACE_RETRY_DATA, 3)) { unsigned minUdpSequence; if (retryDataCount) minUdpSequence = ((UdpPacketHeader *) retryData[retryDataIdx]->data)->udpSequence; else minUdpSequence = maxUdpSequence; StringBuffer permitStr; permit.toString(permitStr); DBGLOG("UdpSender: cleanRetryData (%s), total %u available between %u and %u", permitStr.str(), retryDataCount, minUdpSequence, maxUdpSequence); } unsigned lastReceived = permit.lastSequenceSeen; unsigned missingIndex = 0; unsigned missingCount = permit.missingCount; unsigned i = 0; if (maxRetryData) { while (i < retryDataCount && retries.length() < permit.max_data) { unsigned idx = (retryDataIdx + i) % maxRetryData; DataBuffer *buffer = retryData[idx]; if (buffer) { UdpPacketHeader *header = (UdpPacketHeader*) buffer->data; unsigned thisSequence = header->udpSequence; if (thisSequence > lastReceived) break; if (!missingCount || thisSequence < permit.missingSequences[missingIndex]) { ::Release(buffer); retryData[idx] = NULL; if (i) i++; // MORE - leaves holes - is this smart? Alternatively could close up... Should be rare anyway else { retryDataIdx = (retryDataIdx + 1) % maxRetryData; retryDataCount--; } } else if (thisSequence == permit.missingSequences[missingIndex]) { totalData += header->length; retries.append(buffer); i++; missingIndex++; missingCount--; } else { missingIndex++; missingCount--; } } else { if (i) i++; else { // Removing leading nulls retryDataCount--; retryDataIdx = (retryDataIdx + 1) % maxRetryData; } } } } if (checkTraceLevel(TRACE_RETRY_DATA, 3)) DBGLOG("UdpSender: cleanRetryData found %u to resend total size %u, total %u still available", retries.length(), totalData, retryDataCount); return totalData; }
void genDuplexGroup(string read_pair_file, string duplex_group_file, string logFile, string errorFile, int OVERLAP, bool multiDG) { ofstream LOG(logFile, ios_base::app); ofstream ERR(errorFile, ios_base::app); LOG << "Start to genDuplexGroup " << read_pair_file << " ===> " << duplex_group_file << endl; long long fileLines = countFileLines(read_pair_file); ProcessAlert alert(fileLines, 100000, 10); ifstream READPAIR(read_pair_file); char strand1, strand2; string chr1, chr2; int start1, end1, id1, score1, start2, end2, id2, score2; int counter = 0; int firstPossible = 0; int dgCount = 0; PointerArray<DuplexGroup *> dgArray; while(READPAIR>>chr1>>start1>>end1>>id1>>score1>>strand1>>chr2>>start2>>end2>>id2>>score2>>strand2) { char buffer[200]; sprintf(buffer, "firstPossible: %d, duplexCount: %d", firstPossible, dgCount); LOG << alert.alert(string(buffer)); Read read(chr1, start1, end1, strand1, chr2, start2, end2, strand2, id1); int nonOverlapped = 1; int lastDGoverlapped = 0; for(int i=firstPossible;i<dgCount;i++) { int overlap = dgArray[i]->overlapRead(read); //cout << overlap << endl; if(overlap >= OVERLAP) { lastDGoverlapped = 1; nonOverlapped = 0; dgArray[i]->addRead(read); //cout << "find one" << endl; if (! multiDG) break; }else if (overlap == -1){ if(not lastDGoverlapped) firstPossible = i + 1; }else { lastDGoverlapped = 1; } } if (nonOverlapped) { DuplexGroup *dg = new DuplexGroup(read); dgArray.append( dg ); dgCount++; } counter++; } LOG << alert.finish(); READPAIR.close(); ofstream DUPLEXGROUP(duplex_group_file); for(int i=0; i<dgArray.arrayLen(); i++) DUPLEXGROUP << dgArray[i]->chr1 << "\t" << dgArray[i]->start1 << "\t" << dgArray[i]->end1 << "\t" << dgArray[i]->strand1 << "\t" << dgArray[i]->chr2 << "\t" << dgArray[i]->start2 << "\t" << dgArray[i]->end2 << "\t" << dgArray[i]->strand2 << "\t" << dgArray[i]->supportRead << "\t" << dgArray[i]->support << endl; DUPLEXGROUP.close(); LOG.close(); ERR.close(); }