Beispiel #1
0
void prepareGraphForLocalCorrections(Graph * argGraph)
{
	IDnum nodes = nodeCount(argGraph);
	IDnum index;

	//Setting global params
	graph = argGraph;
	WORDLENGTH = getWordLength(graph);;
	// Done with global params

	velvetLog("Preparing to correct graph with cutoff %f\n",
	       MAXDIVERGENCE);

	// Allocating memory
	times = mallocOrExit(2 * nodes + 1, Time);
	previous = mallocOrExit(2 * nodes + 1, Node *);

	dheapNodes = mallocOrExit(2 * nodes + 1, DFibHeapNode *);

	dheap = newDFibHeap();

	fastSequence = newTightString(MAXREADLENGTH);
	slowSequence = newTightString(MAXREADLENGTH);

	for (index = 0; index < (2 * nodeCount(graph) + 1); index++) {
		times[index] = -1;
		dheapNodes[index] = NULL;
		previous[index] = NULL;
	}

	Fmatrix = callocOrExit(MAXREADLENGTH + 1, double *);
	for (index = 0; index < MAXREADLENGTH + 1; index++)
		Fmatrix[index] = callocOrExit(MAXREADLENGTH + 1, double);
	//Done with memory 
}
Beispiel #2
0
KmerOccurenceTable * newKmerOccurenceTable(short int accelerationBits, int wordLength) {
	KmerOccurenceTable * kmerTable = mallocOrExit(1, KmerOccurenceTable);

	if (accelerationBits > 2 * wordLength)
		accelerationBits = 2 * wordLength;

	if (accelerationBits > 32)
		accelerationBits = 32;

	if (accelerationBits > 0) {
		resetKeyFilter(accelerationBits);	
		kmerTable->accelerationBits = accelerationBits;
		kmerTable->accelerationTable =
		    callocOrExit((((size_t) 1) << accelerationBits) + 1,
			   IDnum);
		kmerTable->accelerationShift =
		    (short int) 2 *wordLength - accelerationBits;
	} else {
		kmerTable->accelerationBits = 0;
		kmerTable->accelerationTable = NULL;
		kmerTable->accelerationShift = 0;
	}

	return kmerTable;
}
Beispiel #3
0
//
// Creates a tightString from an array of normal strings
//
TightString *newTightStringArrayFromStringArray(char **sequences,
						IDnum sequenceCount,
						char **tSeqMem)
{
	IDnum sequenceIndex;
	Codon *tmp;
	TightString *tStringArray = mallocOrExit(sequenceCount, TightString);
	Coordinate totalLength = 0;
	int arrayLength;

	for (sequenceIndex = 0; sequenceIndex < sequenceCount; sequenceIndex++)
	{
		tStringArray[sequenceIndex].length = strlen (sequences[sequenceIndex]);
		arrayLength = tStringArray[sequenceIndex].length / 4;
		if (tStringArray[sequenceIndex].length % 4 > 0)
			arrayLength++;
		totalLength += arrayLength;
	}
	*tSeqMem = callocOrExit (totalLength, char);
	tmp = (Codon*)*tSeqMem;
	for (sequenceIndex = 0; sequenceIndex < sequenceCount; sequenceIndex++)
	{
		fillTightStringWithString (&tStringArray[sequenceIndex],
					   sequences[sequenceIndex],
					   tmp);
		arrayLength = tStringArray[sequenceIndex].length / 4;
		if (tStringArray[sequenceIndex].length % 4 > 0)
			arrayLength++;
		tmp += arrayLength;
	}

	free(sequences);
	return tStringArray;
}
Beispiel #4
0
//
// Creates a tightString from a tradionnal string of A,T,G, and C of length size
//
TightString *newTightStringFromString(char *sequence)
{
	TightString *newTString = mallocOrExit(1, TightString);

	int size = (int) strlen(sequence);
	int arrayLength = size / 4;
	int index;

	if (size % 4 > 0)
		arrayLength++;

	newTString->length = size;
	newTString->sequence = callocOrExit(arrayLength, Codon);

	for (index = 0; index < arrayLength; index++)
		newTString->sequence[index] = 0;

	for (index = 0; index < size; index++)
		writeNucleotide(sequence[index],
				&(newTString->sequence[index / 4]),
				index % 4);

	free(sequence);
	return newTString;
}
Beispiel #5
0
static void createNodeLocks(PreGraph *preGraph)
{
	IDnum nbNodes;
	IDnum nodeIndex;

	nbNodes = preNodeCount_pg(preGraph) + 1;
	if (nodeLocks)
		free (nodeLocks);
	nodeLocks = mallocOrExit(nbNodes, omp_lock_t);

	#pragma omp parallel for
	for (nodeIndex = 0; nodeIndex < nbNodes; nodeIndex++)
		omp_init_lock(nodeLocks + nodeIndex);
}
Beispiel #6
0
// Imports roadmap from the appropriate file format
// Memory allocated within the function
RoadMapArray *importRoadMapArray(char *filename)
{
	FILE *file;
	const int maxline = 100;
	char *line = mallocOrExit(maxline, char);
	RoadMap *array;
	RoadMap *rdmap = NULL;
	IDnum rdmapIndex = 0;
	IDnum seqID;
	Coordinate position, start, finish;
	Annotation *nextAnnotation;
	RoadMapArray *result = mallocOrExit(1, RoadMapArray);
	IDnum sequenceCount;
	IDnum annotationCount = 0;
	short short_var;
	long long_var;
	long long longlong_var, longlong_var2, longlong_var3;

	printf("Reading roadmap file %s\n", filename);

	file = fopen(filename, "r");
	if (!fgets(line, maxline, file))
		exitErrorf(EXIT_FAILURE, true, "%s incomplete.", filename);
	sscanf(line, "%ld\t%i\t%hi\n", &long_var, &(result->WORDLENGTH), &short_var);
	sequenceCount = (IDnum) long_var;
	resetWordFilter(result->WORDLENGTH);
	result->length = sequenceCount;
	array = mallocOrExit(sequenceCount, RoadMap);
	result->array = array;
	result->double_strand = (boolean) short_var;

	while (fgets(line, maxline, file) != NULL)
		if (line[0] != 'R')
			annotationCount++;

	result->annotations = callocOrExit(annotationCount, Annotation);
	nextAnnotation = result->annotations;
	fclose(file);

	file = fopen(filename, "r");

	if (!fgets(line, maxline, file))
		exitErrorf(EXIT_FAILURE, true, "%s incomplete.", filename);
	while (fgets(line, maxline, file) != NULL) {
		if (line[0] == 'R') {
			rdmap = getRoadMapInArray(result, rdmapIndex++);
			rdmap->annotationCount = 0;
		} else {
			sscanf(line, "%ld\t%lld\t%lld\t%lld\n", &long_var,
			       &longlong_var, &longlong_var2, &longlong_var3);
			seqID = (IDnum) long_var;
			position = (Coordinate) longlong_var;
			start = (Coordinate) longlong_var2;
			finish = (Coordinate) longlong_var3;
			nextAnnotation->sequenceID = seqID;
			nextAnnotation->position = position;
			nextAnnotation->start.coord = start;
			nextAnnotation->finish.coord = finish;

			if (seqID > 0)
				nextAnnotation->length = finish - start;
			else
				nextAnnotation->length = start - finish;


			rdmap->annotationCount++;
			nextAnnotation++;
		}
	}

	printf("%d roadmaps reads\n", rdmapIndex);

	fclose(file);
	free(line);
	return result;
}
Beispiel #7
0
// Threads each sequences and creates preArcs according to road map indications
static void connectPreNodes(RoadMapArray * rdmaps, PreGraph * preGraph,
			    IDnum * chains)
{
	IDnum sequenceIndex;
	IDnum referenceCount = rdmaps->referenceCount;
#ifdef _OPENMP
	annotationOffset = mallocOrExit(rdmaps->length + 1, Coordinate);
	annotationOffset[0] = 0;
	for (sequenceIndex = 1; sequenceIndex <= rdmaps->length; sequenceIndex++)
		annotationOffset[sequenceIndex] = annotationOffset[sequenceIndex - 1] +
						  getAnnotationCount(getRoadMapInArray(rdmaps, sequenceIndex - 1));
#else
	Annotation *annot = rdmaps->annotations;
#endif

	if (rdmaps->referenceCount > 0) 
		allocatePreMarkerCountSpace_pg(preGraph);

#ifdef _OPENMP
	int threads = omp_get_max_threads();
	if (threads > 8)
		threads = 8;

	#pragma omp parallel for num_threads(threads)
#endif
	for (sequenceIndex = 1;
	     sequenceIndex <= sequenceCount_pg(preGraph);
	     sequenceIndex++) {
#ifdef _OPENMP
		Annotation *annot = getAnnotationInArray(rdmaps->annotations, annotationOffset[sequenceIndex - 1]);
#endif
		RoadMap *rdmap;
		Coordinate currentPosition, currentInternalPosition;
		IDnum currentPreNodeID, nextInternalPreNodeID;
		IDnum annotIndex, lastAnnotIndex;
		boolean isReference;

		if (sequenceIndex % 1000000 == 0)
			velvetLog("Connecting %li / %li\n", (long) sequenceIndex,
			       (long) sequenceCount_pg(preGraph));

		rdmap = getRoadMapInArray(rdmaps, sequenceIndex - 1);
		annotIndex = 0;
		lastAnnotIndex = getAnnotationCount(rdmap);
		nextInternalPreNodeID = chooseNextInternalPreNode
		    (chains[sequenceIndex] - 1, sequenceIndex,
		     preGraph, chains);
		isReference = (sequenceIndex <= referenceCount);

		currentPosition = 0;
		currentInternalPosition = 0;
		currentPreNodeID = 0;
		// Recursion up to last annotation
		while (annotIndex < lastAnnotIndex
		       || nextInternalPreNodeID != 0) {
			if (annotIndex == lastAnnotIndex
			    || (nextInternalPreNodeID != 0
				&& currentInternalPosition <
				getPosition(annot))) {
				connectPreNodeToTheNext(&currentPreNodeID,
							nextInternalPreNodeID,
							&currentPosition,
							sequenceIndex,
							isReference,
							preGraph);
				nextInternalPreNodeID =
				    chooseNextInternalPreNode
				    (currentPreNodeID, sequenceIndex,
				     preGraph, chains);
				currentInternalPosition +=
				    getPreNodeLength_pg(currentPreNodeID,
							preGraph);

			} else {
				connectAnnotation(&currentPreNodeID, annot,
						  &currentPosition,
						  sequenceIndex, isReference,
						  preGraph);
				annot = getNextAnnotation(annot);
				annotIndex++;
			}
		}
	}

	if (rdmaps->referenceCount > 0) {
		allocatePreMarkerSpace_pg(preGraph);
		createPreMarkers(rdmaps, preGraph, chains);	
	}

#ifdef _OPENMP
	free(annotationOffset);
	annotationOffset = NULL;
#endif
}
Beispiel #8
0
static KmerOccurenceTable *referenceGraphKmers(char *preGraphFilename,
					       short int accelerationBits, Graph * graph, boolean double_strand)
{
	FILE *file = fopen(preGraphFilename, "r");
	const int maxline = MAXLINE;
	char line[MAXLINE];
	char c;
	int wordLength;
	Coordinate lineLength, kmerCount;
	Kmer word;
	Kmer antiWord;
	KmerOccurenceTable *kmerTable = NULL;
	KmerOccurence *kmerOccurences, *kmerOccurencePtr;
	Coordinate kmerOccurenceIndex;
	IDnum index;
	IDnum nodeID = 0;
	IDnum *accelPtr = NULL;
	KmerKey lastHeader = 0;
	KmerKey header;
	Nucleotide nucleotide;

	if (file == NULL)
		exitErrorf(EXIT_FAILURE, true, "Could not open %s", preGraphFilename);

	// Count kmers
	printf("Scanning pre-graph file %s for k-mers\n",
	       preGraphFilename);

	// First  line
	if (!fgets(line, maxline, file))
		exitErrorf(EXIT_FAILURE, true, "PreGraph file incomplete");
	sscanf(line, "%*i\t%*i\t%i\n", &wordLength);

	// Initialize kmer occurence table:
	kmerTable = mallocOrExit(1, KmerOccurenceTable);
	if (accelerationBits > 2 * wordLength)
		accelerationBits = 2 * wordLength;

	if (accelerationBits > 32)
		accelerationBits = 32;

	if (accelerationBits > 0) {
		kmerTable->accelerationBits = accelerationBits;
		kmerTable->accelerationTable =
		    callocOrExit((((size_t) 1) << accelerationBits) + 1,
			   IDnum);
		accelPtr = kmerTable->accelerationTable;
		kmerTable->accelerationShift =
		    (short int) 2 *wordLength - accelerationBits;
	} else {
		kmerTable->accelerationBits = 0;
		kmerTable->accelerationTable = NULL;
		kmerTable->accelerationShift = 0;
	}

	// Read nodes
	if (!fgets(line, maxline, file))
		exitErrorf(EXIT_FAILURE, true, "PreGraph file incomplete");
	kmerCount = 0;
	while (line[0] == 'N') {
		lineLength = 0;
		while ((c = getc(file)) != EOF && c != '\n')
			lineLength++;
		kmerCount += lineLength - wordLength + 1;
		if (fgets(line, maxline, file) == NULL)
			break;
	}
	fclose(file);

	// Create table
	printf("%li kmers found\n", (long) kmerCount);
	kmerOccurences = callocOrExit(kmerCount, KmerOccurence);
	kmerOccurencePtr = kmerOccurences;
	kmerOccurenceIndex = 0;
	kmerTable->kmerTable = kmerOccurences;
	kmerTable->kmerTableSize = kmerCount;

	// Fill table
	file = fopen(preGraphFilename, "r");
	if (file == NULL)
		exitErrorf(EXIT_FAILURE, true, "Could not open %s", preGraphFilename);

	if (!fgets(line, maxline, file))
		exitErrorf(EXIT_FAILURE, true, "PreGraph file incomplete");

	// Read nodes
	if (!fgets(line, maxline, file))
		exitErrorf(EXIT_FAILURE, true, "PreGraph file incomplete");
	while (line[0] == 'N') {
		nodeID++;

		// Fill in the initial word : 
		clearKmer(&word);
		clearKmer(&antiWord);

		for (index = 0; index < wordLength - 1; index++) {
			c = getc(file);
			if (c == 'A')
				nucleotide = ADENINE;
			else if (c == 'C')
				nucleotide = CYTOSINE;
			else if (c == 'G')
				nucleotide = GUANINE;
			else if (c == 'T')
				nucleotide = THYMINE;
			else if (c == '\n')
				exitErrorf(EXIT_FAILURE, true, "PreGraph file incomplete");
			else
				nucleotide = ADENINE;
				

			pushNucleotide(&word, nucleotide);
			if (double_strand) {
#ifdef COLOR
				reversePushNucleotide(&antiWord, nucleotide);
#else
				reversePushNucleotide(&antiWord, 3 - nucleotide);
#endif
			}
		}

		// Scan through node
		index = 0;
		while((c = getc(file)) != '\n' && c != EOF) {
			if (c == 'A')
				nucleotide = ADENINE;
			else if (c == 'C')
				nucleotide = CYTOSINE;
			else if (c == 'G')
				nucleotide = GUANINE;
			else if (c == 'T')
				nucleotide = THYMINE;
			else
				nucleotide = ADENINE;

			pushNucleotide(&word, nucleotide);
			if (double_strand) {
#ifdef COLOR
				reversePushNucleotide(&antiWord, nucleotide);
#else
				reversePushNucleotide(&antiWord, 3 - nucleotide);
#endif
			}

			if (!double_strand || compareKmers(&word, &antiWord) <= 0) {
				copyKmers(&kmerOccurencePtr->kmer, &word);
				kmerOccurencePtr->nodeID = nodeID;
				kmerOccurencePtr->position =
				    index;
			} else {
				copyKmers(&kmerOccurencePtr->kmer, &antiWord);
				kmerOccurencePtr->nodeID = -nodeID;
				kmerOccurencePtr->position =
				    getNodeLength(getNodeInGraph(graph, nodeID)) - 1 - index;
			}

			kmerOccurencePtr++;
			kmerOccurenceIndex++;
			index++;
		}

		if (fgets(line, maxline, file) == NULL)
			break;
	}

	fclose(file);

	// Sort table
	qsort(kmerOccurences, kmerCount, sizeof(KmerOccurence),
	      compareKmerOccurences);

	// Fill up acceleration table
	if (kmerTable->accelerationTable != NULL) {
		*accelPtr = (IDnum) 0;
		for (kmerOccurenceIndex = 0;
		     kmerOccurenceIndex < kmerCount;
		     kmerOccurenceIndex++) {
			header =
			    keyInAccelerationTable(&kmerOccurences
						   [kmerOccurenceIndex].
						   kmer, kmerTable);
			while (lastHeader < header) {
				lastHeader++;
				accelPtr++;
				*accelPtr = kmerOccurenceIndex;
			}
		}

		while (lastHeader < (KmerKey) 1 << accelerationBits) {
			lastHeader++;
			accelPtr++;
			*accelPtr = kmerCount;
		}
	}

	return kmerTable;
}