Beispiel #1
0
static boolean * countCoOccurences(IDnum * coOccurencesCount, ReadOccurence ** readNodes, IDnum * readNodeCounts, IDnum * readPairs, Category * cats) {
	IDnum readIndex, readPairIndex;
	IDnum readNodeCount;
	IDnum readOccurenceIndex, readPairOccurenceIndex;
	ReadOccurence * readOccurence, *readPairOccurence;
	boolean * interestingReads = callocOrExit(sequenceCount(graph), boolean);
	Category libID;

	for (libID = 0; libID < CATEGORIES + 1; libID++)
		coOccurencesCount[libID] = 0;

	for (readIndex = 0; readIndex < sequenceCount(graph); readIndex++) {
		// Eliminating dodgy, unpaired, already counted or user-specified reads
		if ( readPairs[readIndex] < readIndex
		    || getInsertLength(graph, cats[readIndex]) > -1)
			continue;

		// Check for co-occurence
		// We know that for each read the read occurences are ordered by increasing node ID
		// Therefore one list is followed by increasing index, whereas the other is followed 
		// by decreasing index
		libID = cats[readIndex]/2;
		readPairIndex = readPairs[readIndex];	
		
		readOccurenceIndex = 0;
		readOccurence = readNodes[readIndex + 1];
		readNodeCount = readNodeCounts[readIndex + 1];

		readPairOccurenceIndex = readNodeCounts[readPairIndex + 1] - 1;
		readPairOccurence = &(readNodes[readPairIndex + 1][readPairOccurenceIndex]);

		while (readOccurenceIndex < readNodeCount && readPairOccurenceIndex >= 0) {
			if (readOccurence->nodeID == -readPairOccurence->nodeID) {
				if (readOccurence->position > 0 && readPairOccurence->position > 0) {
					coOccurencesCount[libID]++;
					interestingReads[readIndex] = true;
					break;
				} else {
					readOccurence++;
					readOccurenceIndex++;	
					readPairOccurence--;
					readPairOccurenceIndex--;	
				}
			} else if (readOccurence->nodeID < -readPairOccurence->nodeID) {
				readOccurence++;
				readOccurenceIndex++;	
			} else {
				readPairOccurence--;
				readPairOccurenceIndex--;	
			}
		}
	}

	return interestingReads;
}
Beispiel #2
0
static IDnum *computeReadToNodeCounts()
{
	IDnum readIndex, nodeIndex;
	IDnum maxNodeIndex = 2 * nodeCount(graph) + 1;
	IDnum maxReadIndex = sequenceCount(graph) + 1;
	IDnum *readNodeCounts = callocOrExit(maxReadIndex, IDnum);
	boolean *readMarker = callocOrExit(maxReadIndex, boolean);
	ShortReadMarker *nodeArray, *shortMarker;
	PassageMarkerI marker;
	Node *node;
	IDnum nodeReadCount;

	//puts("Computing read to node mapping array sizes");

	for (nodeIndex = 0; nodeIndex < maxNodeIndex; nodeIndex++) {
		node = getNodeInGraph(graph, nodeIndex - nodeCount(graph));
		if (node == NULL)
			continue;

		// Short reads
		if (readStartsAreActivated(graph)) {
			nodeArray = getNodeReads(node, graph);
			nodeReadCount = getNodeReadCount(node, graph);
			for (readIndex = 0; readIndex < nodeReadCount; readIndex++) {
				shortMarker =
				    getShortReadMarkerAtIndex(nodeArray,
							      readIndex);
				readNodeCounts[getShortReadMarkerID
					       (shortMarker)]++;
			}
		}

		// Long reads
		for (marker = getMarker(node); marker != NULL_IDX;
		     marker = getNextInNode(marker)) {
			readIndex = getPassageMarkerSequenceID(marker);
			if (readIndex < 0)
				continue;

			if (readMarker[readIndex])
				continue;

			readNodeCounts[readIndex]++;
			readMarker[readIndex] = true;
		}

		// Clean up marker array
		for (marker = getMarker(node); marker != NULL_IDX;
		     marker = getNextInNode(marker)) {
			readIndex = getPassageMarkerSequenceID(marker);
			if (readIndex > 0)
				readMarker[readIndex] = false;
		}
	}

	free(readMarker);
	return readNodeCounts;
}
Beispiel #3
0
static void measureCoOccurences(Coordinate ** coOccurences, boolean * interestingReads, ReadOccurence ** readNodes, IDnum * readNodeCounts, IDnum * readPairs, Category * cats) {
	IDnum coOccurencesIndex[CATEGORIES + 1];
	IDnum observationIndex;
	IDnum readIndex, readPairIndex;
	IDnum readNodeCount;
	IDnum readOccurenceIndex, readPairOccurenceIndex;
	ReadOccurence * readOccurence, *readPairOccurence;
	Category libID;

	for (libID = 0; libID < CATEGORIES + 1; libID++)
		coOccurencesIndex[libID] = 0;

	for (readIndex = 0; readIndex < sequenceCount(graph); readIndex++) {
		// Eliminating dodgy, unpaired, already counted or user-specified reads
		if (!interestingReads[readIndex])
			continue;
		
		// Find co-occurence
		// We know that for each read the read occurences are ordered by increasing node ID
		libID = cats[readIndex]/2;
		readPairIndex = readPairs[readIndex];	
		observationIndex = coOccurencesIndex[libID];
		
		readOccurence = readNodes[readIndex + 1];
		readOccurenceIndex = 0;
		readNodeCount = readNodeCounts[readIndex + 1];

		readPairOccurenceIndex = readNodeCounts[readPairIndex + 1] - 1;
		readPairOccurence = &(readNodes[readPairIndex + 1][readPairOccurenceIndex]);

		while (readOccurenceIndex < readNodeCount && readPairOccurenceIndex >= 0) {
			if (readOccurence->nodeID == -readPairOccurence->nodeID) {
				if (readOccurence->position > 0 && readPairOccurence->position > 0) {
					coOccurences[libID][observationIndex] = 
					      getNodeLength(getNodeInGraph(graph, readOccurence->nodeID))
					      + getWordLength(graph) - 1
					      - (readOccurence->position - readOccurence->offset)	
					      - (readPairOccurence->position - readPairOccurence->offset);
					coOccurencesIndex[libID]++;
					break;
				} else {
					readOccurence++;
					readOccurenceIndex++;	
					readPairOccurence--;
					readPairOccurenceIndex--;	
				}
			} else if (readOccurence->nodeID < -readPairOccurence->nodeID) {
				readOccurence++;
				readOccurenceIndex++;	
			} else {
				readPairOccurence--;
				readPairOccurenceIndex--;	
			}
		}
	}
}