Пример #1
0
int main(int argc, char **argv)
{
	ReadSet *allSequences = NULL;
	SplayTable *splayTable;
	int hashLength, hashLengthStep, hashLengthMax, h;
	char *directory, *filename, *seqFilename, *buf;
	boolean double_strand = true;
	boolean multiple_kmers = false;
	DIR *dir;

	setProgramName("velveth");

	if (argc < 4) {
		printf("velveth - simple hashing program\n");
		printf("Version %i.%i.%2.2i\n", VERSION_NUMBER,
		       RELEASE_NUMBER, UPDATE_NUMBER);
		printf("\nCopyright 2007, 2008 Daniel Zerbino ([email protected])\n");
		printf("This is free software; see the source for copying conditions.  There is NO\n");
		printf("warranty; not even for MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.\n\n");
		printf("Compilation settings:\n");
		printf("CATEGORIES = %i\n", CATEGORIES);
		printf("MAXKMERLENGTH = %i\n", MAXKMERLENGTH);
		printf("\n");
		printUsage();
		return 0;
	}

	if ( strstr(argv[2],"," ) )
	{
		sscanf(argv[2],"%d,%d,%d",&hashLength,&hashLengthMax,&hashLengthStep);
		multiple_kmers = true;
	}
	else
	{
		hashLength = atoi(argv[2]);
		hashLengthMax = hashLength + 1;
		hashLengthStep = 2;
	}

	if (hashLengthMax > MAXKMERLENGTH) {
		velvetLog
		    ("Velvet can't handle k-mers as long as %i! We'll stick to %i if you don't mind.\n",
		     hashLength, MAXKMERLENGTH);
		hashLength = MAXKMERLENGTH;
	} else if (hashLength <= 0) {
		velvetLog("Invalid hash length: %s\n", argv[2]);
		printUsage();
		return 0;
	} else if ( hashLength > hashLengthMax ) {
		velvetLog("hashLengthMin <= hashLengthMax is required %s", argv[2]);
		printUsage();
		return 0;
	} 

	if (hashLength % 2 == 0) {
		velvetLog
		    ("Velvet can't work with even length k-mers, such as %i. We'll use %i instead, if you don't mind.\n",
		     hashLength, hashLength - 1);
		hashLength--;
	}

	if (hashLengthStep % 2 == 1) {
		velvetLog
		    ("Velvet can't work with an odd length k-mer step, such as %i. We'll use %i instead, if you don't mind.\n",
		     hashLengthStep, hashLengthStep - 1);
		hashLengthStep--;
	}

	for (h = hashLength; h < hashLengthMax; h += hashLengthStep) {

		resetWordFilter(h);

		buf = mallocOrExit(strlen(argv[1]) + 100, char);

		if ( multiple_kmers ) {
			sprintf(buf,"%s_%d",argv[1],h);
			directory = mallocOrExit(strlen(buf) + 100, char);
			strcpy(directory,buf);
		} else 
			directory = argv[1];

		filename = mallocOrExit(strlen(directory) + 100, char);
		seqFilename = mallocOrExit(strlen(directory) + 100, char);

		dir = opendir(directory);

		if (dir == NULL)
			mkdir(directory, 0777);
		else {
			sprintf(buf, "%s/PreGraph", directory);
			remove(buf);
			sprintf(buf, "%s/Graph", directory);
			remove(buf);
			sprintf(buf, "%s/Graph2", directory);
			remove(buf);
			sprintf(buf, "%s/Graph3", directory);
			remove(buf);
			sprintf(buf, "%s/Graph4", directory);
			remove(buf);
			sprintf(buf, "%s/Log", directory);
			remove(buf);
		}

		logInstructions(argc, argv, directory);

		strcpy(seqFilename, directory);
		strcat(seqFilename, "/Sequences");
		if ( h == hashLength ) {
			parseDataAndReadFiles(seqFilename, argc - 2, &(argv[2]), &double_strand);
		} else {
			sprintf(buf,"ln -s ../%s_%d/Sequences %s",argv[1],hashLength,seqFilename);
			system(buf);
		}

		splayTable = newSplayTable(h, double_strand);

		if (!allSequences)
			allSequences = importReadSet(seqFilename);
		velvetLog("%li sequences in total.\n", (long) allSequences->readCount);

		strcpy(filename, directory);
		strcat(filename, "/Roadmaps");
		inputSequenceArrayIntoSplayTableAndArchive(allSequences,
							   splayTable, filename, seqFilename);

		destroySplayTable(splayTable);
		if (dir)
			closedir(dir);
		if (directory != argv[1])
			free(directory);
		free(filename);
		free(seqFilename);
		free(buf);
	}
Пример #2
0
// Imports roadmap from the appropriate file format
// Memory allocated within the function
RoadMapArray *importRoadMapArray(char *filename)
{
	FILE *file;
	const int maxline = 100;
	char *line = mallocOrExit(maxline, char);
	RoadMap *array;
	RoadMap *rdmap = NULL;
	IDnum rdmapIndex = 0;
	IDnum seqID;
	Coordinate position, start, finish;
	Annotation *nextAnnotation;
	RoadMapArray *result = mallocOrExit(1, RoadMapArray);
	IDnum sequenceCount;
	IDnum annotationCount = 0;
	short short_var;
	long long_var;
	long long longlong_var, longlong_var2, longlong_var3;

	printf("Reading roadmap file %s\n", filename);

	file = fopen(filename, "r");
	if (!fgets(line, maxline, file))
		exitErrorf(EXIT_FAILURE, true, "%s incomplete.", filename);
	sscanf(line, "%ld\t%i\t%hi\n", &long_var, &(result->WORDLENGTH), &short_var);
	sequenceCount = (IDnum) long_var;
	resetWordFilter(result->WORDLENGTH);
	result->length = sequenceCount;
	array = mallocOrExit(sequenceCount, RoadMap);
	result->array = array;
	result->double_strand = (boolean) short_var;

	while (fgets(line, maxline, file) != NULL)
		if (line[0] != 'R')
			annotationCount++;

	result->annotations = callocOrExit(annotationCount, Annotation);
	nextAnnotation = result->annotations;
	fclose(file);

	file = fopen(filename, "r");

	if (!fgets(line, maxline, file))
		exitErrorf(EXIT_FAILURE, true, "%s incomplete.", filename);
	while (fgets(line, maxline, file) != NULL) {
		if (line[0] == 'R') {
			rdmap = getRoadMapInArray(result, rdmapIndex++);
			rdmap->annotationCount = 0;
		} else {
			sscanf(line, "%ld\t%lld\t%lld\t%lld\n", &long_var,
			       &longlong_var, &longlong_var2, &longlong_var3);
			seqID = (IDnum) long_var;
			position = (Coordinate) longlong_var;
			start = (Coordinate) longlong_var2;
			finish = (Coordinate) longlong_var3;
			nextAnnotation->sequenceID = seqID;
			nextAnnotation->position = position;
			nextAnnotation->start.coord = start;
			nextAnnotation->finish.coord = finish;

			if (seqID > 0)
				nextAnnotation->length = finish - start;
			else
				nextAnnotation->length = start - finish;


			rdmap->annotationCount++;
			nextAnnotation++;
		}
	}

	printf("%d roadmaps reads\n", rdmapIndex);

	fclose(file);
	free(line);
	return result;
}
Пример #3
0
int main(int argc, char **argv)
{
	ReadSet *allSequences = NULL;
	SplayTable *splayTable;
	int hashLength, hashLengthStep, hashLengthMax, h;
	char *directory, *filename, *seqFilename, *baseSeqName, *buf;
	char * token;
	boolean double_strand = true;
	boolean noHash = false;
	boolean multiple_kmers = false;
	char buffer[100];
	DIR *dir;

	setProgramName("velveth");

	if (argc < 4) {
		printf("velveth - simple hashing program\n");
		printf("Version %i.%i.%2.2i\n", VERSION_NUMBER,
		       RELEASE_NUMBER, UPDATE_NUMBER);
		printf("\nCopyright 2007, 2008 Daniel Zerbino ([email protected])\n");
		printf("This is free software; see the source for copying conditions.  There is NO\n");
		printf("warranty; not even for MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.\n\n");
		printf("Compilation settings:\n");
		printf("CATEGORIES = %i\n", CATEGORIES);
		printf("MAXKMERLENGTH = %i\n", MAXKMERLENGTH);
#ifdef _OPENMP
		puts("OPENMP");
#endif
#ifdef LONGSEQUENCES
		puts("LONGSEQUENCES");
#endif
#ifdef BIGASSEMBLY
		puts("BIGASSEMBLY");
#endif
#ifdef COLOR
		puts("COLOR");
#endif
#ifdef DEBUG
		puts("DEBUG");
#endif
		printf("\n");
		printUsage();
		return 0;
	}

	strcpy(buffer, argv[2]);
	token = strtok(buffer, ",");
	hashLength = atoi(token);
	token = strtok(NULL, ",");
	if (token == NULL) {
		multiple_kmers = false;
		hashLengthMax = hashLength + 1;
	} else {
		multiple_kmers = true;
		hashLengthMax = atoi(token);
	}
	token = strtok(NULL, ",");
	if (token == NULL) {
		hashLengthStep = 2;
	} else {
		hashLengthStep = atoi(token);
	}

	if (hashLength > MAXKMERLENGTH) {
		velvetLog
		    ("Velvet can't handle k-mers as long as %i! We'll stick to %i if you don't mind.\n",
		     hashLength, MAXKMERLENGTH);
		hashLength = MAXKMERLENGTH;
	} 
	if (hashLength <= 0) {
		velvetLog("Invalid hash length: %s\n", argv[2]);
		printUsage();
		return 0;
	} 
	if (hashLength % 2 == 0) {
		velvetLog
		    ("Velvet can't work with even length k-mers, such as %i. We'll use %i instead, if you don't mind.\n",
		     hashLength, hashLength - 1);
		hashLength--;
	} 

	if (multiple_kmers) {
		if (hashLengthMax > MAXKMERLENGTH + 1) {
			velvetLog
			    ("Velvet can't handle k-mers as long as %i! We'll stick to %i if you don't mind.\n",
			     hashLengthMax, MAXKMERLENGTH + 1);
			hashLengthMax = MAXKMERLENGTH + 1;
		} 
		if (hashLengthMax <= hashLength) {
			velvetLog("hashLengthMin < hashLengthMax is required %s", argv[2]);
			printUsage();
			return 0;
		} 

		if (hashLengthStep <= 0) {
			velvetLog("Non-positive hash length! Setting it to 2\n");
			hashLengthStep = 2;
		}
		if (hashLengthStep % 2 == 1) {
			velvetLog
			    ("Velvet can't work with an odd length k-mer step, such as %i. We'll use %i instead, if you don't mind.\n",
			     hashLengthStep, hashLengthStep + 1);
			hashLengthStep++;
		}
	}

	// check if binary sequences should be used
	int argIndex;
	for (argIndex = 3; argIndex < argc; argIndex++)
		if (strcmp(argv[argIndex], "-create_binary") == 0 || strcmp(argv[argIndex], "-reuse_binary") == 0)
			setCreateBinary(true);

	for (h = hashLength; h < hashLengthMax; h += hashLengthStep) {

		resetWordFilter(h);

		buf = mallocOrExit(2 * strlen(argv[1]) + 500, char);

		if ( multiple_kmers ) {
			sprintf(buf,"%s_%d",argv[1],h);
			directory = mallocOrExit(strlen(buf) + 100, char);
			strcpy(directory,buf);
		} else 
			directory = argv[1];

		filename = mallocOrExit(strlen(directory) + 100, char);
		seqFilename = mallocOrExit(strlen(directory) + 100, char);
		baseSeqName = mallocOrExit(100, char);

		dir = opendir(directory);

		if (dir == NULL)
			mkdir(directory, 0777);
		else {
			sprintf(buf, "%s/PreGraph", directory);
			remove(buf);
			sprintf(buf, "%s/Graph", directory);
			remove(buf);
			sprintf(buf, "%s/Graph2", directory);
			remove(buf);
			sprintf(buf, "%s/Graph3", directory);
			remove(buf);
			sprintf(buf, "%s/Graph4", directory);
			remove(buf);
		}

		logInstructions(argc, argv, directory);

		strcpy(seqFilename, directory);
		if (isCreateBinary()) {
			// use the CNY unified seq writer
			strcpy(baseSeqName, "/CnyUnifiedSeq");
			// remove other style sequences file
			sprintf(buf, "%s/Sequences", directory);
			remove(buf);
		} else {
			strcpy(baseSeqName, "/Sequences");
			// remove other style sequences file
			sprintf(buf, "%s/CnyUnifiedSeq", directory);
			remove(buf);
			sprintf(buf, "%s/CnyUnifiedSeq.names", directory);
			remove(buf);
		}
		strcat(seqFilename, baseSeqName);

		if ( h == hashLength ) {
			parseDataAndReadFiles(seqFilename, argc - 2, &(argv[2]), &double_strand, &noHash);
		} else {
			sprintf(buf,"rm -f %s",seqFilename);
			if (system(buf)) {
				velvetLog("Command failed!\n");
				velvetLog("%s\n", buf);
#ifdef DEBUG
				abort();
#endif
				exit(1);
			}
			if (argv[1][0] == '/')
				sprintf(buf,"ln -s %s_%d%s %s",argv[1],hashLength,baseSeqName,seqFilename);
			else
				sprintf(buf,"ln -s `pwd`/%s_%d%s %s",argv[1],hashLength,baseSeqName,seqFilename);
			if (system(buf)) {
				velvetLog("Command failed!\n");
				velvetLog("%s\n", buf);
#ifdef DEBUG
				abort();
#endif
				exit(1);
			}
		}

		if (noHash)
			continue;

		splayTable = newSplayTable(h, double_strand);
		if (isCreateBinary()) {
			allSequences = importCnyReadSet(seqFilename);
		} else {
			allSequences = importReadSet(seqFilename);
		}
		velvetLog("%li sequences in total.\n", (long) allSequences->readCount);

		strcpy(filename, directory);
		strcat(filename, "/Roadmaps");
		inputSequenceArrayIntoSplayTableAndArchive(allSequences,
							   splayTable, filename, seqFilename);

		destroySplayTable(splayTable);
		if (dir)
			closedir(dir);
		if (directory != argv[1])
			free(directory);
		free(filename);
		free(seqFilename);
		free(baseSeqName);
		free(buf);
		if (allSequences) {
			destroyReadSet(allSequences);
		}
	}