void test_addOutEdge(){ string a="CAATAAGTAAAAAAGATTTTGTAACTTTCACAGCCTTATTTTTATCAATAGATACTGATAT"; string b= "AATAAGTAAAAAAGATTTTGTAACTTTCACAGCCTTATTTTTATCAATAGATACTGATATT"; int wordSize=a.length(); Kmer aKmer=wordId(a.c_str()); Kmer bKmer=wordId(b.c_str()); Vertex aVertex; aVertex.constructor(); Kmer lower=aKmer; Kmer aRC=aKmer.complementVertex(wordSize,false); if(aRC<lower){ lower=aRC; } aVertex.m_lowerKey=lower; aVertex.addOutgoingEdge(&aKmer,&bKmer,wordSize); vector<Kmer>Edges=aVertex.getOutgoingEdges(&aKmer,wordSize); bool found=false; for(int j=0;j<(int)Edges.size();j++){ if(Edges[j]==bKmer){ found=true; break; } } if(!found){ cout<<"Expected: "<<endl; cout<<b<<endl; cout<<"Actual:"<<endl; for(int j=0;j<(int)Edges.size();j++){ cout<<Edges[j].idToWord(wordSize,false)<<endl; } uint8_t edges=aVertex.getEdges(&aKmer); cout<<"Edges"<<endl; print8(edges); } assertEquals(Edges.size(),1); assertEquals(found,true); }
void GenomeGraphReader::readLine() { char buffer[1024]; buffer[0] = '\0'; bool isCurrentlyAtTheGreatEndOfTime = m_reader.eof(); while(!m_bad && !m_reader.eof()) { m_reader.getline(buffer, 1024); // skip comment if(strlen(buffer) > 0 && buffer[0] == '#') continue; break; } if(m_bad || (m_reader.eof() && isCurrentlyAtTheGreatEndOfTime)) { m_reader.close(); printName(); if(m_bad) { cout << " Error: file " << m_fileName << " does not exist"; cout << endl; } else { cout << " finished reading file " << m_fileName; cout << " got " << m_loaded << " objects" << endl; } Message finishedMessage; finishedMessage.setTag(DONE); send(m_parent, finishedMessage); die(); } else { // AGCTGTGAAACTGGTGCAAGCTACCAGAATC;36;A;C string sequence; CoverageDepth coverage; string parents; string children; for(int i = 0 ; i < (int) strlen(buffer) ; ++i) { if(buffer[i] == ';') buffer[i] = ' '; } istringstream stringBuffer(buffer); stringBuffer >> sequence; stringBuffer >> coverage; stringBuffer >> parents; stringBuffer >> children; /////////////////////////////////////////////////////////////////////// // convert the sequence to upper case map<char,char> translationTable; translationTable['a'] = 'A'; translationTable['t'] = 'T'; translationTable['g'] = 'G'; translationTable['c'] = 'C'; for(int i = 0 ; i < (int) sequence.length() ; ++i) { char symbol = sequence[i]; if(translationTable.count(symbol) > 0) { char correct = translationTable[symbol]; sequence [i] = correct; } } #if 0 cout << "DEBUG " << sequence << " with " << coverage << endl; #endif // if this is the first one, send the k-mer length too if(m_loaded == 0) { Message aMessage; aMessage.setTag(CoalescenceManager::SET_KMER_LENGTH); int length = sequence.length(); aMessage.setBuffer(&length); aMessage.setNumberOfBytes(sizeof(length)); send(m_aggregator, aMessage); } Kmer kmer; kmer.loadFromTextRepresentation(sequence.c_str()); Vertex vertex; vertex.setKey(kmer); vertex.setCoverageValue(coverage); // add parents for(int i = 0 ; i < (int)parents.length() ; ++i) { string parent = sequence; for(int j = 0 ; j < (int) parent.length()-1 ; ++j) { parent[j + 1] = parent[j]; } parent[0] = parents[i]; Kmer parentKmer; parentKmer.loadFromTextRepresentation(parent.c_str()); vertex.addIngoingEdge(&kmer, &parentKmer, sequence.length()); } // add children for(int i = 0 ; i < (int)children.length() ; ++i) { string child = sequence; for(int j = 0 ; j < (int) child.length()-1 ; ++j) { child[j] = child[j + 1]; } child[child.length() - 1] = children[i]; Kmer childKmer; childKmer.loadFromTextRepresentation(child.c_str()); vertex.addOutgoingEdge(&kmer, &childKmer, sequence.length()); } char messageBuffer[100]; int position = 0; position += vertex.dump(messageBuffer + position); memcpy(messageBuffer + position, &m_sample, sizeof(m_sample)); position += sizeof(m_sample); // maybe: accumulate many objects before flushing it. // we can go up to MAXIMUM_MESSAGE_SIZE_IN_BYTES bytes. /* printName(); cout << " got data line " << buffer; cout << " sending PAYLOAD to " << m_aggregator << endl; */ Message message; message.setTag(CoalescenceManager::PAYLOAD); message.setBuffer(messageBuffer); message.setNumberOfBytes(position); #if 0 printName(); cout << "DEBUG sending PAYLOAD to " << m_aggregator; cout << " with " << position << " bytes "; vertex.print(sequence.length(), false); cout << endl; #endif int period = 1000000; if(m_loaded % period == 0) { printName(); cout << " loaded " << m_loaded << " sequences" << endl; } m_loaded ++; send(m_aggregator, message); } }