Exemple #1
0
void test_addOutEdge(){
	string a="CAATAAGTAAAAAAGATTTTGTAACTTTCACAGCCTTATTTTTATCAATAGATACTGATAT";
	string b= "AATAAGTAAAAAAGATTTTGTAACTTTCACAGCCTTATTTTTATCAATAGATACTGATATT";
	int wordSize=a.length();

	Kmer aKmer=wordId(a.c_str());
	Kmer bKmer=wordId(b.c_str());

	Vertex aVertex;
	aVertex.constructor();
	Kmer lower=aKmer;
	Kmer aRC=aKmer.complementVertex(wordSize,false);

	if(aRC<lower){
		lower=aRC;
	}
	aVertex.m_lowerKey=lower;
	aVertex.addOutgoingEdge(&aKmer,&bKmer,wordSize);
	
	vector<Kmer>Edges=aVertex.getOutgoingEdges(&aKmer,wordSize);
	bool found=false;
	for(int j=0;j<(int)Edges.size();j++){
		if(Edges[j]==bKmer){
			found=true;
			break;
		}
	}
	if(!found){
		cout<<"Expected: "<<endl;
		cout<<b<<endl;
		cout<<"Actual:"<<endl;
		for(int j=0;j<(int)Edges.size();j++){
			cout<<Edges[j].idToWord(wordSize,false)<<endl;
		}
		uint8_t edges=aVertex.getEdges(&aKmer);
		cout<<"Edges"<<endl;
		print8(edges);
	}
	assertEquals(Edges.size(),1);
	assertEquals(found,true);
}
void GenomeGraphReader::readLine() {

	char buffer[1024];
	buffer[0] = '\0';

	bool isCurrentlyAtTheGreatEndOfTime = m_reader.eof();

	while(!m_bad && !m_reader.eof()) {
		m_reader.getline(buffer, 1024);

		// skip comment
		if(strlen(buffer) > 0 && buffer[0] == '#')
			continue;

		break;
	}

	if(m_bad || (m_reader.eof() && isCurrentlyAtTheGreatEndOfTime)) {

		m_reader.close();

		printName();

		if(m_bad) {
			cout << " Error: file " << m_fileName << " does not exist";
			cout << endl;

		} else {
			cout << " finished reading file " << m_fileName;
			cout << " got " << m_loaded << " objects" << endl;
		}

		Message finishedMessage;
		finishedMessage.setTag(DONE);

		send(m_parent, finishedMessage);

		die();
	} else {

		// AGCTGTGAAACTGGTGCAAGCTACCAGAATC;36;A;C
		string sequence;
		CoverageDepth coverage;
		string parents;
		string children;

		for(int i = 0 ; i < (int) strlen(buffer) ; ++i) {
			if(buffer[i] == ';')
				buffer[i] = ' ';
		}

		istringstream stringBuffer(buffer);

		stringBuffer >> sequence;
		stringBuffer >> coverage;
		stringBuffer >> parents;
		stringBuffer >> children;

		///////////////////////////////////////////////////////////////////////
		// convert the sequence to upper case

		map<char,char> translationTable;
		translationTable['a'] = 'A';
		translationTable['t'] = 'T';
		translationTable['g'] = 'G';
		translationTable['c'] = 'C';

		for(int i = 0 ; i < (int) sequence.length() ; ++i) {

			char symbol = sequence[i];

			if(translationTable.count(symbol) > 0) {
				char correct = translationTable[symbol];

				sequence [i] = correct;
			}
		}
#if 0
		cout << "DEBUG " << sequence << " with " << coverage << endl;
#endif

		// if this is the first one, send the k-mer length too
		if(m_loaded == 0) {

			Message aMessage;
			aMessage.setTag(CoalescenceManager::SET_KMER_LENGTH);

			int length = sequence.length();
			aMessage.setBuffer(&length);
			aMessage.setNumberOfBytes(sizeof(length));

			send(m_aggregator, aMessage);
		}

		Kmer kmer;
		kmer.loadFromTextRepresentation(sequence.c_str());

		Vertex vertex;
		vertex.setKey(kmer);
		vertex.setCoverageValue(coverage);

		// add parents
		for(int i = 0 ; i < (int)parents.length() ; ++i) {

			string parent = sequence;
			for(int j = 0 ; j < (int) parent.length()-1 ; ++j) {
				parent[j + 1] = parent[j];
			}
			parent[0] = parents[i];

			Kmer parentKmer;
			parentKmer.loadFromTextRepresentation(parent.c_str());

			vertex.addIngoingEdge(&kmer, &parentKmer, sequence.length());
		}

		// add children
		for(int i = 0 ; i < (int)children.length() ; ++i) {

			string child = sequence;
			for(int j = 0 ; j < (int) child.length()-1 ; ++j) {
				child[j] = child[j + 1];
			}
			child[child.length() - 1] = children[i];

			Kmer childKmer;
			childKmer.loadFromTextRepresentation(child.c_str());

			vertex.addOutgoingEdge(&kmer, &childKmer, sequence.length());
		}

		char messageBuffer[100];
		int position = 0;

		position += vertex.dump(messageBuffer + position);
		memcpy(messageBuffer + position, &m_sample, sizeof(m_sample));

		position += sizeof(m_sample);

// maybe: accumulate many objects before flushing it.
// we can go up to MAXIMUM_MESSAGE_SIZE_IN_BYTES bytes.

		/*
		printName();
		cout << " got data line " << buffer;
		cout << " sending PAYLOAD to " << m_aggregator << endl;
*/
		Message message;
		message.setTag(CoalescenceManager::PAYLOAD);
		message.setBuffer(messageBuffer);
		message.setNumberOfBytes(position);

#if 0
		printName();
		cout << "DEBUG sending PAYLOAD to " << m_aggregator;
		cout << " with " << position << " bytes ";
		vertex.print(sequence.length(), false);
		cout << endl;
#endif

		int period = 1000000;
		if(m_loaded % period == 0) {
			printName();
			cout << " loaded " << m_loaded << " sequences" << endl;

		}
		m_loaded ++;
		send(m_aggregator, message);
	}
}