Example #1
0
TEST(Kmer, canonicalize)
{
	Kmer::setLength(4);
	Kmer canonical("ATGC");
	Kmer nonCanonical("GCAT");
	Kmer palindrome("ACGT");

	Kmer kmer = canonical;
	kmer.canonicalize();
	EXPECT_EQ(canonical, kmer);

	kmer = nonCanonical;
	kmer.canonicalize();
	EXPECT_EQ(canonical, kmer);

	kmer = palindrome;
	kmer.canonicalize();
	EXPECT_EQ(palindrome, kmer);

	Kmer::setLength(5);
	Kmer oddLength("GCTCG");
	Kmer oddLengthCanonical("CGAGC");

	kmer = oddLength;
	kmer.canonicalize();
	EXPECT_EQ(oddLengthCanonical, kmer);
}
void NetworkSequenceCollection::processSequenceExtension(
		uint64_t groupID, uint64_t branchID, const Kmer& seq,
		const ExtensionRecord& extRec, int multiplicity)
{
	switch(m_state)
	{
		case NAS_TRIM:
			return processLinearSequenceExtension(groupID, branchID,
					seq, extRec, multiplicity, m_trimStep);
		case NAS_ASSEMBLE:
		case NAS_COVERAGE:
			return processLinearSequenceExtension(groupID, branchID,
					seq, extRec, multiplicity, UINT_MAX);
		case NAS_DISCOVER_BUBBLES:
			return processSequenceExtensionPop(groupID, branchID,
					seq, extRec, multiplicity,
					opt::bubbleLen - opt::kmerSize + 1);
		case NAS_WAITING:
			if (m_finishedGroups.count(groupID) == 0) {
				logger(0) << "error: unexpected seqext message: "
					"state: " << m_state << " "
					"gid: " << groupID << " bid: " << branchID << " "
					"seq: " << seq.str() << '\n';
				assert(false);
			}
			break;
		default:
			logger(0) << "error: unexpected seqext message: "
				"state: " << m_state << " "
				"gid: " << groupID << " bid: " << branchID << " "
				"seq: " << seq.str() << '\n';
			assert(false);
			break;
	}
}
Example #3
0
void GraphPath::reverseContent(GraphPath & newPath) const {

#ifdef CONFIG_ASSERT
	assert(newPath.size() == 0);
#endif

	newPath.setKmerLength(getKmerLength());

	for(int i = size() - 1 ; i >= 0 ; --i) {
		Kmer element;
		at(i, &element);

		// the false here drops support for colored data (SOLiD)
		// anyway who cares

		Kmer newElement = element.complementVertex(getKmerLength(), false);
		newPath.push_back(&newElement);
	}

#ifdef CONFIG_ASSERT
	assert(size() == newPath.size());
	assert(getKmerLength() == newPath.getKmerLength());
#endif

}
Example #4
0
int GraphPath::getRequiredNumberOfBytes() const {

	int position = 0;

	uint32_t elements = size();

	int operationSize = sizeof(uint32_t);
	//memcpy(buffer + position, &elements, operationSize);
	position += operationSize;


#ifdef CONFIG_ASSERT
	uint32_t kmerLength = getKmerLength();
	assert(kmerLength > 0);
#endif

	//memcpy(buffer + position, &kmerLength, operationSize);
	position += operationSize;

	//cout << "[DEBUG] GraphPath::dump kmerLength " << kmerLength << endl;

	for(int i = 0 ; i < (int)elements ; i ++) {
		Kmer value;
		at(i, &value);
		//position += value.dump(buffer + position);
		position += value.getRequiredNumberOfBytes();
	}

	return position;


}
Example #5
0
int GraphPath::load(const char * buffer) {
	int position = 0;

	uint32_t elements = 0;
	int operationSize = sizeof(uint32_t);

	memcpy(&elements, buffer + position, operationSize);
	position += operationSize;

	uint32_t kmerLength = 0;
	memcpy(&kmerLength, buffer + position, operationSize);
	position += operationSize;

	setKmerLength(kmerLength);
	//cout << "[DEBUG] GraphPath::load kmerLength " << kmerLength << endl;

	for(int i = 0 ; i < (int)elements ; i ++) {
		Kmer value;
		position += value.load(buffer + position);
		push_back(&value);
	}

	//cout << "DEBUG] loaded " << size() << " items for GraphPath" << endl;

	return position;
}
Example #6
0
int64 HashGraph::Trim(int minLength)
{
    vector<Contig> contigs;
    Assemble(contigs);

    int total = 0;
#pragma omp parallel for
    for (int i = 0; i < (int)contigs.size(); ++i)
    {
        if (contigs[i].IsTangle() && contigs[i].Size() < kmerLength + minLength - 1)
        {
            Kmer kmer;
            for (int j = 0; j+1 < kmerLength; ++j)
                kmer.AddRight(contigs[i][j]);
            for (int j = kmerLength-1; j < contigs[i].Size(); ++j)
            {
                kmer.AddRight(contigs[i][j]);
                KmerNode *node = GetNode(kmer);
                if (node != NULL)
                    node->SetDeadFlag();
            }

#pragma omp atomic
            ++total;
        }
    }

    Refresh();

    LogMessage("trim %lld dead ends\n", total);

    return total;
}
Example #7
0
void f2() {
    srand(time(NULL));
    int size=1536;
    map<int,int> counts;

    uint64_t samples=100000000;
    uint64_t base=rand();
    int wordSize=63;
    Kmer kmer;
    kmer.setU64(0,base);

    int average=samples/size;
    while(samples--) {
        uint64_t second=rand();
        kmer.setU64(1,second);
        int rank=vertexRank(&kmer,size,wordSize,false);
        counts[rank]++;
    }
    vector<int> data;
    for(int i=0; i<size; i++) {
        data.push_back(counts[i]);
        //cout<<i<<" "<<counts[i]<<endl;
    }
    int deviation=average/10;
    int min=average-deviation;
    int max=average+deviation;
    for(int i=0; i<size; i++) {
        if(counts[i]>=max) {
            cout<<counts[i]<<" and Max="<<max<<endl;
        }
        assert(counts[i]<max);
        assert(counts[i]>min);
    }
}
Example #8
0
void ContigGraph::BuildEdgeCountTable()
{
    edge_count_table_.clear();
    edge_count_table_.set_kmer_size(kmer_size_+1);
#pragma omp parallel for schedule(static, 1)
    for (int64_t i = 0; i < (int64_t)vertices_.size(); ++i)
    {
        for (int strand = 0; strand < 2; ++strand)
        {
            ContigGraphVertexAdaptor current(&vertices_[i], strand);

            Kmer kmer = current.end_kmer(kmer_size_);
            kmer.resize(kmer_size_+1);
            for (int x = 0; x < 4; ++x)
            {
                if (current.out_edges()[x])
                {
                    kmer.set_base(kmer_size_, x);
                    edge_count_table_.InsertVertex(kmer);
                }
            }
        }
    }
    edge_count_table_.ClearCount();
}
Example #9
0
void
verify_node_orig(kg_node_t * node, unsigned kmer_length) {
    assert( false && "TODO FIX! REVERSED KMER ENDIANNESS" );
    int double_kmer_length = kmer_length << 1;
#ifdef LARGE_KMERS
    Kmer mask;
    mask.createMask(double_kmer_length);
#else
    Kmer mask = (Kmer(1) << double_kmer_length) - 1;
#endif
    Kmer kmer = node->kmer;
    Kmer rc_kmer = reverseComplement(kmer, kmer_length);
    char leftmost_base = (kmer >> (double_kmer_length - 2)) & 0x3;
    char rightmost_base = kmer & 0x3;

    for (int i = 0 ; i < 4 ; ++ i) {
        // check on the left side
        kg_node_t * node2 = node->left[i];
        int count = node->left_count[i];

        if (node2) {
            assert (count != 0);
            if (count > 0) {
                Kmer kmer2 = KMER_PREPEND(kmer, i, double_kmer_length, mask);
                assert(kmer2 == node2->kmer);
                assert(node2->right[(int)rightmost_base] == node);
                assert(node2->right_count[(int)rightmost_base] == count);
            } else {
                Kmer kmer2 = KMER_APPEND(rc_kmer, i ^ 0x3, double_kmer_length, mask);
                assert(kmer2 == node2->kmer);
                assert(node2->left[rightmost_base ^ 0x3] == node);
                assert(node2->left_count[rightmost_base ^ 0x3] == count);
            }
        } else {
            assert (count == 0);
        }


        // check on the right side
        node2 = node->right[i];
        count = node->right_count[i];

        if (node2) {
            assert (count != 0);
            if (count > 0) {
                Kmer kmer2 = KMER_APPEND(kmer, i, double_kmer_length, mask);
                assert(kmer2 == node2->kmer);
                assert(node2->left[(int)leftmost_base] == node);
                assert(node2->left_count[(int)leftmost_base] == count);
            } else {
                Kmer kmer2 = KMER_PREPEND(rc_kmer, i ^ 0x3, double_kmer_length, mask);
                assert(kmer2 == node2->kmer);
                assert(node2->right[leftmost_base ^ 0x3] == node);
                assert(node2->right_count[leftmost_base ^ 0x3] == count);
            }
        } else {
            assert (count == 0);
        }
    }
}
Example #10
0
bool HashGraph::AddEdgesFromSequence(const Sequence &seq)
{
    if (seq.Size() < kmerLength)
        return false;

    bool flag = false;
    Kmer kmer;
    for (int i = 0; i < kmerLength-1; ++i)
        kmer.AddRight(seq[i]);
    for (int i = kmerLength-1; i < seq.Size(); ++i)
    {
        kmer.AddRight(seq[i]);

        KmerNodeAdapter adp = GetNodeAdapter(kmer);
        if (!adp.IsNull())
        {
            flag = true;
            adp.Increase();
            if (i >= (int)kmerLength)
            {
                adp.AddInEdge(3 - seq[i-kmerLength]);
            }

            if (i+1 < seq.Size())
            {
                adp.AddOutEdge(seq[i+1]);
            }
        }
    }

    return flag;
}
Example #11
0
void SpuriousSeedAnnihilator::writeCheckpointForSeeds(){

	/* write the Seeds checkpoint */
	if(m_parameters->writeCheckpoints() && !m_parameters->hasCheckpoint("Seeds")){

		ofstream f(m_parameters->getCheckpointFile("Seeds").c_str());
		cout<<"Rank "<<m_parameters->getRank()<<" is writing checkpoint Seeds"<<endl;
		int count=(*m_seeds).size();

		f.write((char*)&count,sizeof(int));

		for(int i=0;i<(int)(*m_seeds).size();i++){
			int length=(*m_seeds)[i].size();
			f.write((char*)&length,sizeof(int));

			for(int j=0;j<(int)(*m_seeds)[i].size();j++){
				Kmer theKmer;
				(*m_seeds)[i].at(j,&theKmer);
				theKmer.write(&f);

				CoverageDepth coverageValue=0;
				coverageValue=(*m_seeds)[i].getCoverageAt(j);
				f.write((char*)&coverageValue,sizeof(CoverageDepth));
			}
		}
		f.close();
	}
}
Example #12
0
int64 HashGraph::RemoveLowCoverageContigs(double c)
{
    vector<Contig> contigs;
    Assemble(contigs);

    int total = 0;
#pragma omp parallel for
    for (int i = 0; i < (int)contigs.size(); ++i)
    {
        if (contigs[i].Coverage() < c)
        {
            Kmer kmer;
            for (int j = 0; j+1 < kmerLength; ++j)
                kmer.AddRight(contigs[i][j]);
            for (int j = kmerLength-1; j < contigs[i].Size(); ++j)
            {
                kmer.AddRight(contigs[i][j]);
                KmerNode *node = GetNode(kmer);
                if (node != NULL)
                    node->SetDeadFlag();
            }

#pragma omp atomic
            ++total;
        }
    }

    Refresh();

    return total;
}
Example #13
0
void SeedingData::writeCheckpoints(){

	/* write the Seeds checkpoint */
	if(m_parameters->writeCheckpoints() && !m_parameters->hasCheckpoint("SimpleSeeds")){

		ofstream f(m_parameters->getCheckpointFile("SimpleSeeds").c_str());
		ostringstream buffer;

		cout<<"Rank "<<m_parameters->getRank()<<" is writing checkpoint SimpleSeeds"<<endl;

		vector<GraphPath> * seeds = & m_SEEDING_seeds;

		int count=(*seeds).size();

		buffer.write((char*)&count, sizeof(int));

		for(int i=0;i<(int)(*seeds).size();i++){
			int length=(*seeds)[i].size();
			buffer.write((char*)&length, sizeof(int));

			for(int j=0;j<(int)(*seeds)[i].size();j++){
				Kmer theKmer;
				(*seeds)[i].at(j,&theKmer);
				theKmer.write(&buffer);

				CoverageDepth coverageValue=0;
				coverageValue=(*seeds)[i].getCoverageAt(j);
				buffer.write((char*)&coverageValue, sizeof(CoverageDepth));
				flushFileOperationBuffer(false, &buffer, &f, CONFIG_FILE_IO_BUFFER_SIZE);
			}
		}
                flushFileOperationBuffer(true, &buffer, &f, CONFIG_FILE_IO_BUFFER_SIZE);
		f.close();
	}
}
Example #14
0
void GraphPath::push_back(const Kmer*a){

#ifdef ASSERT
	assert(m_kmerLength!=0);
#endif

	if(!canBeAdded(a)){
		if(!m_errorRaised){
			cout<<"Error: can not add "<<a->idToWord(m_kmerLength,false)<<endl;
			cout<<"last objects:"<<endl;
			int count=16;
			int iterator=size()-count;
			while(iterator<size()){
				Kmer theObject;
				at(iterator,&theObject);

				cout<<" ["<<iterator<<"] ------> "<<theObject.idToWord(m_kmerLength,false)<<endl;

				iterator++;
			}

			m_errorRaised=true;
		}

		return;
	}

#ifdef CONFIG_PATH_STORAGE_DEFAULT
	m_vertices.push_back(*a);
#elif defined(CONFIG_PATH_STORAGE_BLOCK)

	writeObjectInBlock(a);
#endif
}
Example #15
0
void HashGraph::InsertSequence(const Sequence &seq, uint64 prefix, uint64 mask)
{
    if (seq.Size() < kmerLength)
        return;

    Kmer kmer;
    for (int i = 0; i < kmerLength-1; ++i)
        kmer.AddRight(seq[i]);
    for (int i = kmerLength-1; i < seq.Size(); ++i)
    {
        kmer.AddRight(seq[i]);
        Kmer key = kmer;
        Kmer rev_comp = kmer;
        rev_comp.ReverseComplement();
        if (rev_comp < kmer)
            key = rev_comp;

        if ((key.Hash() & mask) == prefix)
        {
            KmerNodeAdapter adp(InsertKmer(kmer), kmer);
            if (i >= (int)kmerLength)
            {
                adp.AddInEdge(3 - seq[i-kmerLength]);
            }

            if (i+1 < seq.Size())
            {
                adp.AddOutEdge(seq[i+1]);
            }
        }
    }
}
Example #16
0
void SeedingData::loadCheckpoint(){
	cout<<"Rank "<<m_parameters->getRank()<<" is reading checkpoint Seeds"<<endl;

	ifstream f(m_parameters->getCheckpointFile("Seeds").c_str());
	int n=0;
	f.read((char*)&n,sizeof(int));
	for(int i=0;i<n;i++){
		GraphPath seed;
		seed.setKmerLength(m_parameters->getWordSize());
		int vertices=0;
		f.read((char*)&vertices,sizeof(int));
		for(int j=0;j<vertices;j++){
			Kmer kmer;
			kmer.read(&f);
			seed.push_back(&kmer);

			CoverageDepth coverageValue=0;

			f.read((char*)&coverageValue,sizeof(CoverageDepth));
			seed.addCoverageValue(coverageValue);
		}

		seed.computePeakCoverage();

		m_SEEDING_seeds.push_back(seed);
	}
	cout<<"Rank "<<m_parameters->getRank()<<" loaded "<<n<<" seeds from checkpoint Seeds"<<endl;
	f.close();
}
/** Return the process ID to which the specified kmer belongs. */
int NetworkSequenceCollection::computeNodeID(const Kmer& seq) const
{
	if (opt::numProc < DEDICATE_CONTROL_AT) {
		return seq.getCode() % (unsigned)opt::numProc;
	} else {
		return seq.getCode() % (unsigned)(opt::numProc - 1) + 1;
	}
}
Example #18
0
void verify_node(KmerNode * node, KmerGraph *hashtable, unsigned kmer_length)
{
    int double_kmer_length = kmer_length << 1;
#ifdef LARGE_KMERS
    Kmer mask;
    mask.createMask(double_kmer_length);
#else
    Kmer mask = (Kmer(1) << double_kmer_length) - 1;
#endif
    Kmer kmer = node->kmer;
    Kmer rc_kmer = reverseComplement(kmer, kmer_length);
    char rightmost_base = KMER_GET_TAIL_BASE(kmer, kmer_length);
    char leftmost_base = KMER_GET_HEAD_BASE(kmer, kmer_length);
    KmerNode *node2;

    for (int i = 0 ; i < 4 ; ++ i) {
        // check on the left side
        int count = node->left_count[i];
        int color = node->left_color[i];
        assert( color == 0 || count != 0 );  // count must be non-zero if color is non-zero
        if (color == 0) {
            if (count > 0) {
                Kmer kmer2 = KMER_PREPEND(kmer, i, double_kmer_length, mask);
                node2 = hashtable->findNode(canonicalKmer(kmer2, kmer_length));
                assert( node2 != NULL );
                assert(cnorm(node2->right_count[static_cast<int>(rightmost_base)]) == cnorm(count));
                assert(node2->right_color[static_cast<int>(rightmost_base)] == 0);
            }	else if (count < 0) {
                Kmer kmer2 = KMER_APPEND(rc_kmer, i ^ 0x3, double_kmer_length);
                node2 = hashtable->findNode(canonicalKmer(kmer2, kmer_length));
                assert( node2 != NULL );
                assert(cnorm(node2->left_count[static_cast<int>( COMPLEMENT(rightmost_base) )]) == cnorm(count));
                assert(node2->left_color[static_cast<int>( COMPLEMENT(rightmost_base) )] == 0);
            }
        }

        // check on the right side
        count = node->right_count[i];
        color = node->right_color[i];
        assert( color == 0 || count != 0 );  // count must be non-zero if color is non-zero
        if (color == 0) {
            if (count > 0) {
                Kmer kmer2 = KMER_APPEND(kmer, i, double_kmer_length);
                node2 = hashtable->findNode(canonicalKmer(kmer2, kmer_length));
                assert( node2 != NULL );
                assert(cnorm(node2->left_count[static_cast<int>(leftmost_base)]) == cnorm(count));
                assert(node2->left_color[static_cast<int>(leftmost_base)] == 0);
            } else if (count < 0) {
                Kmer kmer2 = KMER_PREPEND(rc_kmer, i ^ 0x3, double_kmer_length, mask);
                node2 = hashtable->findNode(canonicalKmer(kmer2, kmer_length));
                assert( node2 != NULL );
                assert(cnorm(node2->right_count[static_cast<int>( COMPLEMENT(leftmost_base) )]) == cnorm(count));
                assert(node2->right_color[static_cast<int>( COMPLEMENT(leftmost_base) )] == 0);
            }
        }
    }
}
Example #19
0
bool GraphPath::canBeAdded(const Kmer*object)const{

	if(size()==0)
		return true;

	Kmer lastKmer;
	int position=size()-1;
	at(position,&lastKmer);

	return lastKmer.canHaveChild(object,m_kmerLength);
}
Example #20
0
/**
 * Return whether this branch is the canonical representation of the
 * contig that it represents. A contig has two ends, and the contig
 * is output starting from the lexicographically smaller end.
 */
bool BranchRecord::isCanonical() const
{
	assert(size() > 1);
	Kmer first = front().first;
	Kmer last = back().first;
	if (getDirection() == SENSE)
		last.reverseComplement();
	else
		first.reverseComplement();
	assert(first != last);
	return first < last;
}
Example #21
0
bool HashGraph::IsValid(const Sequence &seq)
{
    Kmer kmer;
    for (int i = 0; i < kmerLength-1; ++i)
        kmer.AddRight(seq[i]);
    for (int i = kmerLength-1; i < seq.Size(); ++i)
    {
        kmer.AddRight(seq[i]);
        if (GetNode(kmer) == NULL)
            return false;
    }

    return true;
}
Example #22
0
/**
 * Get the ingoing edges
 * one bit (1=yes, 0=no) per possible edge
 */
vector<Kmer> Kmer::_getIngoingEdges(uint8_t edges,int k){
	vector<Kmer> b;
	Kmer aTemplate;
	aTemplate=*this;
	
	int posToClear=2*k;

	for(int i=0;i<aTemplate.getNumberOfU64();i++){
		uint64_t element=aTemplate.getU64(i);
		element=element<<2;

//	1		0
//
//	127..64		63...0
//
//	00abcdefgh  ijklmnopqr		// initial state
//	abcdefgh00  klmnopqr00		// shift left
//	abcdefghij  klmnopqr00		// copy the last to the first
//	00cdefghij  klmnopqr00		// reset the 2 last

/**
 * Now, we need to copy 2 bits from 
 */
		if(i!=0){
			// the 2 last of the previous will be the 2 first of this one
			uint64_t last=getU64(i-1);
			last=(last>>62);
			element=element|last;
		}

		/**
 *	The two last bits that shifted must be cleared
 *	Otherwise, it will change the hash value of the Kmer...
 *	The chunk number i contains bits from i to i*64-1
 *	Therefore, if posToClear is inside these boundaries,
 *	then it is obvious that these awful bits must be changed 
 *	to 0
 */
		if(i*64<=posToClear&&posToClear<i*64+64){
			int position=posToClear%64;

			uint64_t filter=3;// 11 or 1*2^1+1*2^0
			filter=filter<<(position);
			filter=~filter;
			element=element&filter;
		}
		aTemplate.setU64(i,element);
	}
Example #23
0
void BubbleTool::printStuff(Kmer root,vector<vector<Kmer> >*trees,
map<Kmer,int>*coverages){
	int m_wordSize=m_parameters->getWordSize();
	cout<<"Trees="<<trees->size()<<endl;
	cout<<"root="<<root.idToWord(m_wordSize,m_parameters->getColorSpaceMode())<<endl;
	cout<<"digraph{"<<endl;
	map<Kmer,set<Kmer> > printedEdges;
	
	for(map<Kmer ,int>::iterator i=coverages->begin();i!=coverages->end();i++){
		Kmer kmer=i->first;
		cout<<kmer.idToWord(m_wordSize,m_parameters->getColorSpaceMode())<<" [label=\""<<kmer.idToWord(m_wordSize,m_parameters->getColorSpaceMode())<<" "<<i->second<<"\"]"<<endl;
	}
	for(int j=0;j<(int)trees->size();j++){
		for(int i=0;i<(int)trees->at(j).size();i+=2){
			Kmer a=trees->at(j).at(i+0);
			#ifdef ASSERT
			assert(i+1<(int)trees->at(j).size());
			#endif
			Kmer b=trees->at(j).at(i+1);
			if(printedEdges.count(a)>0 && printedEdges[a].count(b)>0){
				continue;
			}
			cout<<a.idToWord(m_wordSize,m_parameters->getColorSpaceMode())<<" -> "<<b.idToWord(m_wordSize,m_parameters->getColorSpaceMode())<<endl;
			printedEdges[a].insert(b);
		}
	}
	cout<<"}"<<endl;
}
Example #24
0
void StoreKeeper::sendKmersSamples() {

	char buffer[MAXIMUM_MESSAGE_SIZE_IN_BYTES];
	int bytes = 0;

	ExperimentVertex * currentVertex = NULL;
	VirtualKmerColorHandle currentVirtualColor = NULL_VIRTUAL_COLOR;

	vector<bool> samplesVector (m_sampleSize, false);

	if(m_hashTableIterator.hasNext()){

		currentVertex = m_hashTableIterator.next();
		Kmer kmer = currentVertex->getKey();

		bytes += kmer.dump(buffer);

		currentVirtualColor = currentVertex->getVirtualColor();
		set<PhysicalKmerColor> * samples = m_colorSet.getPhysicalColors(currentVirtualColor);

		for(set<PhysicalKmerColor>:: iterator sampleIterator = samples->begin();
			sampleIterator != samples->end(); ++sampleIterator) {
			PhysicalKmerColor value = *sampleIterator;
			samplesVector[value] = true;
		}

		for (std::vector<bool>::iterator it = samplesVector.begin();
			it != samplesVector.end(); ++it) {
			buffer[bytes] = *it;
			bytes++;
		}
	}


	Message message;
	message.setNumberOfBytes(bytes);
	message.setBuffer(buffer);

	if(m_hashTableIterator.hasNext()){
		message.setTag(KmerMatrixOwner::PUSH_KMER_SAMPLES);
	}else{
		message.setTag(KmerMatrixOwner::PUSH_KMER_SAMPLES_END);
	}

	send(m_kmerMatrixOwner, message);

}
Example #25
0
void ContigGraph::BuildBeginKmerMap()
{
    begin_kmer_map_.clear();
    begin_kmer_map_.reserve(vertices_.size()*2);
#pragma omp parallel for
    for (int64_t i = 0; i < (int64_t)vertices_.size(); ++i)
    {
        for (int strand = 0; strand < 2; ++strand)
        {
            ContigGraphVertexAdaptor current(&vertices_[i], strand);
            Kmer kmer = current.begin_kmer(kmer_size_);

            Kmer key = kmer.unique_format();
            begin_kmer_map_[key] = i;
        }
    }
}
Example #26
0
NodeEndRef NodeEndTable::find(const Kmer &kmer) const
{
        // chose a representative kmer
        Kmer kmerRC = kmer.getReverseComplement();
        bool reverse = (doubleStranded) && (kmerRC < kmer);
        const Kmer &repKmer = (reverse) ? kmerRC : kmer;

        return NodeEndRef(table.find(repKmer), reverse);
}
Example #27
0
void HashGraph::RefreshEdges()
{
    num_edges = 0;
#pragma omp parallel for
    for (int64 i = 0; i < (int64)table_size; ++i)
    {
        for (HashNode *node = table[i]; node; node = node->next)
        {
            KmerNodeAdapter curr(node);
            for (int strand = 0; strand < 2; ++strand)
            {
                Kmer kmer;
                curr.GetKmer(kmer);
                unsigned edges = curr.OutEdges();
                for (int x = 0; x < 4; ++x)
                {
                    if (edges & (1 << x))
                    {
                        Kmer next = kmer;
                        next.AddRight(x);
                        if (GetNode(next) == NULL)
                            curr.RemoveOutEdge(x);
                        else
                        {
#pragma omp atomic
                            ++num_edges;
                        }
                    }
                }

                curr.ReverseComplement();
            }

            if (node->kmer.IsPalindrome())
            {
                unsigned edges = node->InEdges() | node->OutEdges();
                node->SetInEdges(edges);
                node->SetOutEdges(edges);
            }
        }
    }

    num_edges >>= 1;
}
Kmer wordId(const char*a){
	Kmer i;
	int theLen=strlen(a);
	for(int j=0;j<(int)theLen;j++){
		uint64_t k=charToCode(a[j]);
		int bitPosition=2*j;
		int chunk=bitPosition/64;
		int bitPositionInChunk=bitPosition%64;
		#ifdef ASSERT
		if(!(chunk<i.getNumberOfU64())){
			cout<<"Chunk="<<chunk<<" positionInKmer="<<j<<" KmerLength="<<strlen(a)<<" bitPosition=" <<bitPosition<<" Chunks="<<i.getNumberOfU64()<<endl;
		}
		assert(chunk<i.getNumberOfU64());
		#endif
		uint64_t filter=(k<<bitPositionInChunk);
		i.setU64(chunk,i.getU64(chunk)|filter);
	}
	return i;
}
Example #29
0
void SeedingData::loadCheckpoint(){
	cout<<"Rank "<<m_parameters->getRank()<<" is reading checkpoint Seeds"<<endl;

	ifstream f(m_parameters->getCheckpointFile("Seeds").c_str());
	int n=0;
	f.read((char*)&n,sizeof(int));
	for(int i=0;i<n;i++){
		vector<Kmer> seed;
		int vertices=0;
		f.read((char*)&vertices,sizeof(int));
		for(int j=0;j<vertices;j++){
			Kmer kmer;
			kmer.read(&f);
			seed.push_back(kmer);
		}
		m_SEEDING_seeds.push_back(seed);
	}
	cout<<"Rank "<<m_parameters->getRank()<<" loaded "<<n<<" seeds from checkpoint Seeds"<<endl;
	f.close();
}
Example #30
0
/**
 * Get the outgoing edges
 * one bit (1=yes, 0=no) per possible edge
 */
vector<Kmer> Kmer::_getOutgoingEdges(uint8_t edges,int k){
	vector<Kmer> b;
	Kmer aTemplate;
	aTemplate=*this;

	for(int i=0;i<aTemplate.getNumberOfU64();i++){
		uint64_t word=aTemplate.getU64(i)>>2;
		if(i!=aTemplate.getNumberOfU64()-1){
			uint64_t next=aTemplate.getU64(i+1);
/*
 *		abcd	efgh
 *		00ab	00ef
 *		00ab	cdef
 */
			next=(next<<62);
			word=word|next;
		}
		aTemplate.setU64(i,word);
	}

	int positionToUpdate=2*k;
	int chunkIdToUpdate=positionToUpdate/64;
	positionToUpdate=positionToUpdate%64;

	for(int i=0;i<4;i++){
		int j=((((uint64_t)edges)<<(sizeof(uint64_t)*8-5-i))>>(sizeof(uint64_t)*8-1));
		if(j==1){
			Kmer newKmer=aTemplate;
			uint64_t last=newKmer.getU64(chunkIdToUpdate);
			uint64_t filter=i;
			filter=filter<<(positionToUpdate-2);
			last=last|filter;
			newKmer.setU64(chunkIdToUpdate,last);
			b.push_back(newKmer);
		}
	}

	return b;
}