Exemplo n.º 1
0
static void fillUpGraph(ReadSet * reads,
			KmerOccurenceTable * kmerTable,
			Graph * graph,
			boolean readTracking,
			boolean double_strand,
			ReferenceMapping * referenceMappings,
			Coordinate referenceMappingCount,
			IDnum refCount,
			char * roadmapFilename)
{
	IDnum readIndex;
	RoadMapArray *roadmap = NULL;
	Coordinate *annotationOffset = NULL;
	struct timeval start, end, diff;
	
	if (referenceMappings)
	{
		roadmap = importRoadMapArray(roadmapFilename);
		annotationOffset = callocOrExit(reads->readCount, Coordinate);
		for (readIndex = 1; readIndex < reads->readCount; readIndex++)
			annotationOffset[readIndex] = annotationOffset[readIndex - 1]
						      + getAnnotationCount(getRoadMapInArray(roadmap, readIndex - 1));
	}

	resetNodeStatus(graph);
	// Allocate memory for the read pairs
	if (!readStartsAreActivated(graph))
		activateReadStarts(graph);

	gettimeofday(&start, NULL);
#ifdef OPENMP
	initSmallNodeListMemory();
	createNodeLocks(graph);
	#pragma omp parallel for
#endif
	for (readIndex = refCount; readIndex < reads->readCount; readIndex++)
	{
		Annotation * annotations = NULL;
		IDnum annotationCount = 0;
		Category category;
		boolean second_in_pair;

		if (readIndex % 1000000 == 0)
			velvetLog("Ghost Threading through reads %ld / %ld\n",
				  (long) readIndex, (long) reads->readCount);

		category = reads->categories[readIndex];
		second_in_pair = reads->categories[readIndex] & 1 && isSecondInPair(reads, readIndex);

		if (referenceMappings)
		{
			annotationCount = getAnnotationCount(getRoadMapInArray(roadmap, readIndex));
			annotations = getAnnotationInArray(roadmap->annotations, annotationOffset[readIndex]);
		}
	
		ghostThreadSequenceThroughGraph(getTightStringInArray(reads->tSequences, readIndex),
						kmerTable,
						graph, readIndex + 1,
						category,
						readTracking, double_strand,
						referenceMappings, referenceMappingCount,
					  	refCount, annotations, annotationCount,
						second_in_pair);
	}
	createNodeReadStartArrays(graph);
	gettimeofday(&end, NULL);
	timersub(&end, &start, &diff);
	velvetLog(" === Ghost-Threaded in %ld.%06ld s\n", diff.tv_sec, diff.tv_usec);

	gettimeofday(&start, NULL);
#ifdef OPENMP
	int threads = omp_get_max_threads();
	if (threads > 32)
		threads = 32;

	#pragma omp parallel for num_threads(threads)
#endif
	for (readIndex = 0; readIndex < reads->readCount; readIndex++)
	{
		Annotation * annotations = NULL;
		IDnum annotationCount = 0;
		Category category;
		boolean second_in_pair;

		if (readIndex % 1000000 == 0)
			velvetLog("Threading through reads %li / %li\n",
				  (long) readIndex, (long) reads->readCount);

		category = reads->categories[readIndex];
		second_in_pair = reads->categories[readIndex] % 2 && isSecondInPair(reads, readIndex);

		if (referenceMappings)
		{
			annotationCount = getAnnotationCount(getRoadMapInArray(roadmap, readIndex));
			annotations = getAnnotationInArray(roadmap->annotations, annotationOffset[readIndex]);
		}

		threadSequenceThroughGraph(getTightStringInArray(reads->tSequences, readIndex),
					   kmerTable,
					   graph, readIndex + 1, category,
					   readTracking, double_strand,
					   referenceMappings, referenceMappingCount,
					   refCount, annotations, annotationCount, second_in_pair);
	}
	gettimeofday(&end, NULL);
	timersub(&end, &start, &diff);
	velvetLog(" === Threaded in %ld.%06ld s\n", diff.tv_sec, diff.tv_usec);

#ifdef OPENMP
	free(nodeLocks);
	nodeLocks = NULL;
#endif

	if (referenceMappings)
	{
		destroyRoadMapArray(roadmap);
		free (annotationOffset);
	}

	orderNodeReadStartArrays(graph);

	destroySmallNodeListMemmory();

	destroyKmerOccurenceTable(kmerTable);
}
Exemplo n.º 2
0
static void ghostThreadSequenceThroughGraph(TightString * tString,
					    KmerOccurenceTable *
					    kmerOccurences, Graph * graph,
					    IDnum seqID, Category category,
					    boolean readTracking,
					    boolean double_strand)
{
	Kmer word;
	Kmer antiWord;
	Coordinate readNucleotideIndex;
	KmerOccurence *kmerOccurence;
	int wordLength = getWordLength(graph);
	Nucleotide nucleotide;

	Node *node;
	Node *previousNode = NULL;

	clearKmer(&word);
	clearKmer(&antiWord);

	// Neglect any read which will not be short paired
	if ((!readTracking && category % 2 == 0)
	    || category / 2 >= CATEGORIES)
		return;

	// Neglect any string shorter than WORDLENGTH :
	if (getLength(tString) < wordLength)
		return;

	// Verify that all short reads are reasonnably short
	if (getLength(tString) > USHRT_MAX) {
		printf("Short read of length %lli, longer than limit %i\n",
		       (long long) getLength(tString), SHRT_MAX);
		puts("You should better declare this sequence as long, because it genuinely is!");
		exit(1);
	}
	// Allocate memory for the read pairs
	if (!readStartsAreActivated(graph))
		activateReadStarts(graph);

	// Fill in the initial word : 
	for (readNucleotideIndex = 0;
	     readNucleotideIndex < wordLength - 1; readNucleotideIndex++) {
		nucleotide = getNucleotide(readNucleotideIndex, tString);
		pushNucleotide(&word, nucleotide);
		if (double_strand) {
#ifdef COLOR
			reversePushNucleotide(&antiWord, nucleotide);
#else
			reversePushNucleotide(&antiWord, 3 - nucleotide);
#endif
		}
	}

	// Go through sequence
	while (readNucleotideIndex < getLength(tString)) {
		// Shift word:
		nucleotide = getNucleotide(readNucleotideIndex++, tString);
		pushNucleotide(&word, nucleotide);
		if (double_strand) {
#ifdef COLOR
			reversePushNucleotide(&antiWord, nucleotide);
#else
			reversePushNucleotide(&antiWord, 3 - nucleotide);
#endif
		}

		// Search in table
		if ((!double_strand || compareKmers(&word, &antiWord) <= 0)
		    && (kmerOccurence =
			findKmerOccurenceInSortedTable(&word,
						       kmerOccurences))) {
			node =
			    getNodeInGraph(graph, kmerOccurence->nodeID);
		} else if ((double_strand && compareKmers(&word, &antiWord) > 0)
			   && (kmerOccurence =
			       findKmerOccurenceInSortedTable(&antiWord,
							      kmerOccurences)))
		{
			node =
			    getNodeInGraph(graph, -kmerOccurence->nodeID);
		} else {
			node = NULL;
			if (previousNode)
				break;
		}

		previousNode = node;

		// Fill in graph
		if (node && !getNodeStatus(node)) {
			incrementReadStartCount(node, graph);
			setSingleNodeStatus(node, true);
			memorizeNode(node);
		}
	}

	unlockMemorizedNodes();
}