Ejemplo n.º 1
0
static IDnum *computeReadToNodeCounts()
{
	IDnum readIndex, nodeIndex;
	IDnum maxNodeIndex = 2 * nodeCount(graph) + 1;
	IDnum maxReadIndex = sequenceCount(graph) + 1;
	IDnum *readNodeCounts = callocOrExit(maxReadIndex, IDnum);
	boolean *readMarker = callocOrExit(maxReadIndex, boolean);
	ShortReadMarker *nodeArray, *shortMarker;
	PassageMarkerI marker;
	Node *node;
	IDnum nodeReadCount;

	//puts("Computing read to node mapping array sizes");

	for (nodeIndex = 0; nodeIndex < maxNodeIndex; nodeIndex++) {
		node = getNodeInGraph(graph, nodeIndex - nodeCount(graph));
		if (node == NULL)
			continue;

		// Short reads
		if (readStartsAreActivated(graph)) {
			nodeArray = getNodeReads(node, graph);
			nodeReadCount = getNodeReadCount(node, graph);
			for (readIndex = 0; readIndex < nodeReadCount; readIndex++) {
				shortMarker =
				    getShortReadMarkerAtIndex(nodeArray,
							      readIndex);
				readNodeCounts[getShortReadMarkerID
					       (shortMarker)]++;
			}
		}

		// Long reads
		for (marker = getMarker(node); marker != NULL_IDX;
		     marker = getNextInNode(marker)) {
			readIndex = getPassageMarkerSequenceID(marker);
			if (readIndex < 0)
				continue;

			if (readMarker[readIndex])
				continue;

			readNodeCounts[readIndex]++;
			readMarker[readIndex] = true;
		}

		// Clean up marker array
		for (marker = getMarker(node); marker != NULL_IDX;
		     marker = getNextInNode(marker)) {
			readIndex = getPassageMarkerSequenceID(marker);
			if (readIndex > 0)
				readMarker[readIndex] = false;
		}
	}

	free(readMarker);
	return readNodeCounts;
}
Ejemplo n.º 2
0
void concatenateReadStarts(Node * target, Node * source, Graph * graph)
{
	IDnum sourceLength, targetLength;
	ShortReadMarker *sourceArray, *targetArray, *marker;
	IDnum index;
	Coordinate position, nodeLength;

	if (!readStartsAreActivated(graph))
		return;

	if (target == NULL || source == NULL)
		return;

	// Update Coordinates
	sourceArray = getNodeReads(source, graph);
	sourceLength = getNodeReadCount(source, graph);

	nodeLength = getNodeLength(target);
	for (index = 0; index < sourceLength; index++) {
		marker = getShortReadMarkerAtIndex(sourceArray, index);
		position = getShortReadMarkerPosition(marker);
		if (position != -1) {
			position += nodeLength;
			setShortReadMarkerPosition(marker, position);
		}
	}

	// Same but for symmetrical reads
	targetArray = getNodeReads(getTwinNode(target), graph);
	targetLength = getNodeReadCount(getTwinNode(target), graph);

	nodeLength = getNodeLength(source);
	for (index = 0; index < targetLength; index++) {
		marker = getShortReadMarkerAtIndex(targetArray, index);
		position = getShortReadMarkerPosition(marker);
		if (position != -1) {
			position += nodeLength;
			setShortReadMarkerPosition(marker, position);
		}
	}

	// Merging lists
	mergeNodeReads(target, source, graph);
	mergeNodeReads(getTwinNode(target), getTwinNode(source), graph);
}
Ejemplo n.º 3
0
static void adjustShortReadsByLength(Node * target, Coordinate nodeLength)
{
	ShortReadMarker *targetArray, *marker;
	IDnum targetLength, index;
	Coordinate position;

	if (!readStartsAreActivated(graph))
		return;

	targetArray = getNodeReads(getTwinNode(target), graph);
	targetLength = getNodeReadCount(getTwinNode(target), graph);

	for (index = 0; index < targetLength; index++) {
		marker = getShortReadMarkerAtIndex(targetArray, index);
		position = getShortReadMarkerPosition(marker);
		if (position != -1) {
			position += nodeLength;
			setShortReadMarkerPosition(marker, position);
		}
	}
}
Ejemplo n.º 4
0
static void projectFromNode(IDnum nodeID,
			    ReadOccurence ** readNodes,
			    IDnum * readNodeCounts,
			    IDnum * readPairs, Category * cats,
			    boolean * dubious,
			    ShortLength * lengths, boolean weight)
{
	IDnum index;
	ShortReadMarker *nodeArray, *shortMarker;
	PassageMarkerI marker;
	Node *node;
	IDnum nodeReadCount;

	node = getNodeInGraph(graph, nodeID);

	if (node == NULL || !getUniqueness(node))
		return;

	if (readStartsAreActivated(graph)) {
		nodeArray = getNodeReads(node, graph);
		nodeReadCount = getNodeReadCount(node, graph);
		for (index = 0; index < nodeReadCount; index++) {
			shortMarker = getShortReadMarkerAtIndex(nodeArray, index);
			if (dubious[getShortReadMarkerID(shortMarker) - 1])
				continue;
			projectFromShortRead(node, shortMarker, readPairs, cats,
					     readNodes, readNodeCounts, lengths,
					     weight);
		}
	}

	for (marker = getMarker(node); marker != NULL_IDX;
	     marker = getNextInNode(marker)) {
		if (getPassageMarkerSequenceID(marker) > 0)
			projectFromLongRead(node, marker, readPairs, cats,
					    readNodes, readNodeCounts,
					    lengths, weight);
	}
}
Ejemplo n.º 5
0
void exploitShortReadPairs(Graph * argGraph,
			   ReadSet * reads,
			   boolean * dubious,
			   boolean * shadows,
			   boolean force_jumps)
{
	boolean modified = true;

	graph = argGraph;

	if (!readStartsAreActivated(graph))
		return;

	velvetLog("Starting pebble resolution...\n");

	resetNodeStatus(graph);

	// Prepare scaffold
	buildScaffold(graph, reads, dubious, shadows);

	// Prepare graph
	prepareGraphForLocalCorrections(graph);

	// Prepare local scaffold 
	localScaffold =
	    callocOrExit(2 * nodeCount(graph) + 1, MiniConnection);

	// Loop until convergence
	while (modified)
		modified = expandLongNodes(force_jumps);

	// Clean up memory
	cleanMemory();
	deactivateLocalCorrectionSettings();

	sortGapMarkers(graph);

	velvetLog("Pebble done.\n");
}
Ejemplo n.º 6
0
static void computePartialReadToNodeMapping(IDnum nodeID,
					    ReadOccurence ** readNodes,
					    IDnum * readNodeCounts,
					    boolean * readMarker)
{
	ShortReadMarker *shortMarker;
	IDnum index, readIndex;
	ReadOccurence *readArray, *readOccurence;
	Node *node = getNodeInGraph(graph, nodeID);
	ShortReadMarker *nodeArray;
	IDnum nodeReadCount;
	PassageMarkerI marker;

	if (readStartsAreActivated(graph)) {
		nodeArray = getNodeReads(node, graph);
		nodeReadCount = getNodeReadCount(node, graph);
		for (index = 0; index < nodeReadCount; index++) {
			shortMarker = getShortReadMarkerAtIndex(nodeArray, index);
			readIndex = getShortReadMarkerID(shortMarker);
			readArray = readNodes[readIndex];
			readOccurence = &readArray[readNodeCounts[readIndex]];
			readOccurence->nodeID = nodeID;
			readOccurence->position =
			    getShortReadMarkerPosition(shortMarker);
			readOccurence->offset =
			    getShortReadMarkerOffset(shortMarker);
			readNodeCounts[readIndex]++;
		}
	}

	for (marker = getMarker(node); marker != NULL_IDX;
	     marker = getNextInNode(marker)) {
		readIndex = getPassageMarkerSequenceID(marker);
		if (readIndex < 0)
			continue;

		if (!readMarker[readIndex]) {
			readArray = readNodes[readIndex];
			readOccurence =
			    &readArray[readNodeCounts[readIndex]];
			readOccurence->nodeID = nodeID;
			readOccurence->position = getStartOffset(marker);
			readOccurence->offset =
			    getPassageMarkerStart(marker);
			readNodeCounts[readIndex]++;
			readMarker[readIndex] = true;
		} else {
			readArray = readNodes[readIndex];
			readOccurence =
			    &readArray[readNodeCounts[readIndex] - 1];
			readOccurence->position = -1;
			readOccurence->offset = -1;
		}
	}

	for (marker = getMarker(node); marker != NULL_IDX;
	     marker = getNextInNode(marker)) {
		readIndex = getPassageMarkerSequenceID(marker);
		if (readIndex > 0)
			readMarker[readIndex] = false;
	}
}
Ejemplo n.º 7
0
static void fillUpGraph(ReadSet * reads,
			KmerOccurenceTable * kmerTable,
			Graph * graph,
			boolean readTracking,
			boolean double_strand,
			ReferenceMapping * referenceMappings,
			Coordinate referenceMappingCount,
			IDnum refCount,
			char * roadmapFilename)
{
	IDnum readIndex;
	RoadMapArray *roadmap = NULL;
	Coordinate *annotationOffset = NULL;
	struct timeval start, end, diff;
	
	if (referenceMappings)
	{
		roadmap = importRoadMapArray(roadmapFilename);
		annotationOffset = callocOrExit(reads->readCount, Coordinate);
		for (readIndex = 1; readIndex < reads->readCount; readIndex++)
			annotationOffset[readIndex] = annotationOffset[readIndex - 1]
						      + getAnnotationCount(getRoadMapInArray(roadmap, readIndex - 1));
	}

	resetNodeStatus(graph);
	// Allocate memory for the read pairs
	if (!readStartsAreActivated(graph))
		activateReadStarts(graph);

	gettimeofday(&start, NULL);
#ifdef OPENMP
	initSmallNodeListMemory();
	createNodeLocks(graph);
	#pragma omp parallel for
#endif
	for (readIndex = refCount; readIndex < reads->readCount; readIndex++)
	{
		Annotation * annotations = NULL;
		IDnum annotationCount = 0;
		Category category;
		boolean second_in_pair;

		if (readIndex % 1000000 == 0)
			velvetLog("Ghost Threading through reads %ld / %ld\n",
				  (long) readIndex, (long) reads->readCount);

		category = reads->categories[readIndex];
		second_in_pair = reads->categories[readIndex] & 1 && isSecondInPair(reads, readIndex);

		if (referenceMappings)
		{
			annotationCount = getAnnotationCount(getRoadMapInArray(roadmap, readIndex));
			annotations = getAnnotationInArray(roadmap->annotations, annotationOffset[readIndex]);
		}
	
		ghostThreadSequenceThroughGraph(getTightStringInArray(reads->tSequences, readIndex),
						kmerTable,
						graph, readIndex + 1,
						category,
						readTracking, double_strand,
						referenceMappings, referenceMappingCount,
					  	refCount, annotations, annotationCount,
						second_in_pair);
	}
	createNodeReadStartArrays(graph);
	gettimeofday(&end, NULL);
	timersub(&end, &start, &diff);
	velvetLog(" === Ghost-Threaded in %ld.%06ld s\n", diff.tv_sec, diff.tv_usec);

	gettimeofday(&start, NULL);
#ifdef OPENMP
	int threads = omp_get_max_threads();
	if (threads > 32)
		threads = 32;

	#pragma omp parallel for num_threads(threads)
#endif
	for (readIndex = 0; readIndex < reads->readCount; readIndex++)
	{
		Annotation * annotations = NULL;
		IDnum annotationCount = 0;
		Category category;
		boolean second_in_pair;

		if (readIndex % 1000000 == 0)
			velvetLog("Threading through reads %li / %li\n",
				  (long) readIndex, (long) reads->readCount);

		category = reads->categories[readIndex];
		second_in_pair = reads->categories[readIndex] % 2 && isSecondInPair(reads, readIndex);

		if (referenceMappings)
		{
			annotationCount = getAnnotationCount(getRoadMapInArray(roadmap, readIndex));
			annotations = getAnnotationInArray(roadmap->annotations, annotationOffset[readIndex]);
		}

		threadSequenceThroughGraph(getTightStringInArray(reads->tSequences, readIndex),
					   kmerTable,
					   graph, readIndex + 1, category,
					   readTracking, double_strand,
					   referenceMappings, referenceMappingCount,
					   refCount, annotations, annotationCount, second_in_pair);
	}
	gettimeofday(&end, NULL);
	timersub(&end, &start, &diff);
	velvetLog(" === Threaded in %ld.%06ld s\n", diff.tv_sec, diff.tv_usec);

#ifdef OPENMP
	free(nodeLocks);
	nodeLocks = NULL;
#endif

	if (referenceMappings)
	{
		destroyRoadMapArray(roadmap);
		free (annotationOffset);
	}

	orderNodeReadStartArrays(graph);

	destroySmallNodeListMemmory();

	destroyKmerOccurenceTable(kmerTable);
}
Ejemplo n.º 8
0
static void ghostThreadSequenceThroughGraph(TightString * tString,
					    KmerOccurenceTable *
					    kmerOccurences, Graph * graph,
					    IDnum seqID, Category category,
					    boolean readTracking,
					    boolean double_strand)
{
	Kmer word;
	Kmer antiWord;
	Coordinate readNucleotideIndex;
	KmerOccurence *kmerOccurence;
	int wordLength = getWordLength(graph);
	Nucleotide nucleotide;

	Node *node;
	Node *previousNode = NULL;

	clearKmer(&word);
	clearKmer(&antiWord);

	// Neglect any read which will not be short paired
	if ((!readTracking && category % 2 == 0)
	    || category / 2 >= CATEGORIES)
		return;

	// Neglect any string shorter than WORDLENGTH :
	if (getLength(tString) < wordLength)
		return;

	// Verify that all short reads are reasonnably short
	if (getLength(tString) > USHRT_MAX) {
		printf("Short read of length %lli, longer than limit %i\n",
		       (long long) getLength(tString), SHRT_MAX);
		puts("You should better declare this sequence as long, because it genuinely is!");
		exit(1);
	}
	// Allocate memory for the read pairs
	if (!readStartsAreActivated(graph))
		activateReadStarts(graph);

	// Fill in the initial word : 
	for (readNucleotideIndex = 0;
	     readNucleotideIndex < wordLength - 1; readNucleotideIndex++) {
		nucleotide = getNucleotide(readNucleotideIndex, tString);
		pushNucleotide(&word, nucleotide);
		if (double_strand) {
#ifdef COLOR
			reversePushNucleotide(&antiWord, nucleotide);
#else
			reversePushNucleotide(&antiWord, 3 - nucleotide);
#endif
		}
	}

	// Go through sequence
	while (readNucleotideIndex < getLength(tString)) {
		// Shift word:
		nucleotide = getNucleotide(readNucleotideIndex++, tString);
		pushNucleotide(&word, nucleotide);
		if (double_strand) {
#ifdef COLOR
			reversePushNucleotide(&antiWord, nucleotide);
#else
			reversePushNucleotide(&antiWord, 3 - nucleotide);
#endif
		}

		// Search in table
		if ((!double_strand || compareKmers(&word, &antiWord) <= 0)
		    && (kmerOccurence =
			findKmerOccurenceInSortedTable(&word,
						       kmerOccurences))) {
			node =
			    getNodeInGraph(graph, kmerOccurence->nodeID);
		} else if ((double_strand && compareKmers(&word, &antiWord) > 0)
			   && (kmerOccurence =
			       findKmerOccurenceInSortedTable(&antiWord,
							      kmerOccurences)))
		{
			node =
			    getNodeInGraph(graph, -kmerOccurence->nodeID);
		} else {
			node = NULL;
			if (previousNode)
				break;
		}

		previousNode = node;

		// Fill in graph
		if (node && !getNodeStatus(node)) {
			incrementReadStartCount(node, graph);
			setSingleNodeStatus(node, true);
			memorizeNode(node);
		}
	}

	unlockMemorizedNodes();
}