int main(int argc, char **argv) { ReadSet *allSequences = NULL; SplayTable *splayTable; int hashLength, hashLengthStep, hashLengthMax, h; char *directory, *filename, *seqFilename, *buf; boolean double_strand = true; boolean multiple_kmers = false; DIR *dir; setProgramName("velveth"); if (argc < 4) { printf("velveth - simple hashing program\n"); printf("Version %i.%i.%2.2i\n", VERSION_NUMBER, RELEASE_NUMBER, UPDATE_NUMBER); printf("\nCopyright 2007, 2008 Daniel Zerbino ([email protected])\n"); printf("This is free software; see the source for copying conditions. There is NO\n"); printf("warranty; not even for MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.\n\n"); printf("Compilation settings:\n"); printf("CATEGORIES = %i\n", CATEGORIES); printf("MAXKMERLENGTH = %i\n", MAXKMERLENGTH); printf("\n"); printUsage(); return 0; } if ( strstr(argv[2],"," ) ) { sscanf(argv[2],"%d,%d,%d",&hashLength,&hashLengthMax,&hashLengthStep); multiple_kmers = true; } else { hashLength = atoi(argv[2]); hashLengthMax = hashLength + 1; hashLengthStep = 2; } if (hashLengthMax > MAXKMERLENGTH) { velvetLog ("Velvet can't handle k-mers as long as %i! We'll stick to %i if you don't mind.\n", hashLength, MAXKMERLENGTH); hashLength = MAXKMERLENGTH; } else if (hashLength <= 0) { velvetLog("Invalid hash length: %s\n", argv[2]); printUsage(); return 0; } else if ( hashLength > hashLengthMax ) { velvetLog("hashLengthMin <= hashLengthMax is required %s", argv[2]); printUsage(); return 0; } if (hashLength % 2 == 0) { velvetLog ("Velvet can't work with even length k-mers, such as %i. We'll use %i instead, if you don't mind.\n", hashLength, hashLength - 1); hashLength--; } if (hashLengthStep % 2 == 1) { velvetLog ("Velvet can't work with an odd length k-mer step, such as %i. We'll use %i instead, if you don't mind.\n", hashLengthStep, hashLengthStep - 1); hashLengthStep--; } for (h = hashLength; h < hashLengthMax; h += hashLengthStep) { resetWordFilter(h); buf = mallocOrExit(strlen(argv[1]) + 100, char); if ( multiple_kmers ) { sprintf(buf,"%s_%d",argv[1],h); directory = mallocOrExit(strlen(buf) + 100, char); strcpy(directory,buf); } else directory = argv[1]; filename = mallocOrExit(strlen(directory) + 100, char); seqFilename = mallocOrExit(strlen(directory) + 100, char); dir = opendir(directory); if (dir == NULL) mkdir(directory, 0777); else { sprintf(buf, "%s/PreGraph", directory); remove(buf); sprintf(buf, "%s/Graph", directory); remove(buf); sprintf(buf, "%s/Graph2", directory); remove(buf); sprintf(buf, "%s/Graph3", directory); remove(buf); sprintf(buf, "%s/Graph4", directory); remove(buf); sprintf(buf, "%s/Log", directory); remove(buf); } logInstructions(argc, argv, directory); strcpy(seqFilename, directory); strcat(seqFilename, "/Sequences"); if ( h == hashLength ) { parseDataAndReadFiles(seqFilename, argc - 2, &(argv[2]), &double_strand); } else { sprintf(buf,"ln -s ../%s_%d/Sequences %s",argv[1],hashLength,seqFilename); system(buf); } splayTable = newSplayTable(h, double_strand); if (!allSequences) allSequences = importReadSet(seqFilename); velvetLog("%li sequences in total.\n", (long) allSequences->readCount); strcpy(filename, directory); strcat(filename, "/Roadmaps"); inputSequenceArrayIntoSplayTableAndArchive(allSequences, splayTable, filename, seqFilename); destroySplayTable(splayTable); if (dir) closedir(dir); if (directory != argv[1]) free(directory); free(filename); free(seqFilename); free(buf); }
// Imports roadmap from the appropriate file format // Memory allocated within the function RoadMapArray *importRoadMapArray(char *filename) { FILE *file; const int maxline = 100; char *line = mallocOrExit(maxline, char); RoadMap *array; RoadMap *rdmap = NULL; IDnum rdmapIndex = 0; IDnum seqID; Coordinate position, start, finish; Annotation *nextAnnotation; RoadMapArray *result = mallocOrExit(1, RoadMapArray); IDnum sequenceCount; IDnum annotationCount = 0; short short_var; long long_var; long long longlong_var, longlong_var2, longlong_var3; printf("Reading roadmap file %s\n", filename); file = fopen(filename, "r"); if (!fgets(line, maxline, file)) exitErrorf(EXIT_FAILURE, true, "%s incomplete.", filename); sscanf(line, "%ld\t%i\t%hi\n", &long_var, &(result->WORDLENGTH), &short_var); sequenceCount = (IDnum) long_var; resetWordFilter(result->WORDLENGTH); result->length = sequenceCount; array = mallocOrExit(sequenceCount, RoadMap); result->array = array; result->double_strand = (boolean) short_var; while (fgets(line, maxline, file) != NULL) if (line[0] != 'R') annotationCount++; result->annotations = callocOrExit(annotationCount, Annotation); nextAnnotation = result->annotations; fclose(file); file = fopen(filename, "r"); if (!fgets(line, maxline, file)) exitErrorf(EXIT_FAILURE, true, "%s incomplete.", filename); while (fgets(line, maxline, file) != NULL) { if (line[0] == 'R') { rdmap = getRoadMapInArray(result, rdmapIndex++); rdmap->annotationCount = 0; } else { sscanf(line, "%ld\t%lld\t%lld\t%lld\n", &long_var, &longlong_var, &longlong_var2, &longlong_var3); seqID = (IDnum) long_var; position = (Coordinate) longlong_var; start = (Coordinate) longlong_var2; finish = (Coordinate) longlong_var3; nextAnnotation->sequenceID = seqID; nextAnnotation->position = position; nextAnnotation->start.coord = start; nextAnnotation->finish.coord = finish; if (seqID > 0) nextAnnotation->length = finish - start; else nextAnnotation->length = start - finish; rdmap->annotationCount++; nextAnnotation++; } } printf("%d roadmaps reads\n", rdmapIndex); fclose(file); free(line); return result; }
int main(int argc, char **argv) { ReadSet *allSequences = NULL; SplayTable *splayTable; int hashLength, hashLengthStep, hashLengthMax, h; char *directory, *filename, *seqFilename, *baseSeqName, *buf; char * token; boolean double_strand = true; boolean noHash = false; boolean multiple_kmers = false; char buffer[100]; DIR *dir; setProgramName("velveth"); if (argc < 4) { printf("velveth - simple hashing program\n"); printf("Version %i.%i.%2.2i\n", VERSION_NUMBER, RELEASE_NUMBER, UPDATE_NUMBER); printf("\nCopyright 2007, 2008 Daniel Zerbino ([email protected])\n"); printf("This is free software; see the source for copying conditions. There is NO\n"); printf("warranty; not even for MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.\n\n"); printf("Compilation settings:\n"); printf("CATEGORIES = %i\n", CATEGORIES); printf("MAXKMERLENGTH = %i\n", MAXKMERLENGTH); #ifdef _OPENMP puts("OPENMP"); #endif #ifdef LONGSEQUENCES puts("LONGSEQUENCES"); #endif #ifdef BIGASSEMBLY puts("BIGASSEMBLY"); #endif #ifdef COLOR puts("COLOR"); #endif #ifdef DEBUG puts("DEBUG"); #endif printf("\n"); printUsage(); return 0; } strcpy(buffer, argv[2]); token = strtok(buffer, ","); hashLength = atoi(token); token = strtok(NULL, ","); if (token == NULL) { multiple_kmers = false; hashLengthMax = hashLength + 1; } else { multiple_kmers = true; hashLengthMax = atoi(token); } token = strtok(NULL, ","); if (token == NULL) { hashLengthStep = 2; } else { hashLengthStep = atoi(token); } if (hashLength > MAXKMERLENGTH) { velvetLog ("Velvet can't handle k-mers as long as %i! We'll stick to %i if you don't mind.\n", hashLength, MAXKMERLENGTH); hashLength = MAXKMERLENGTH; } if (hashLength <= 0) { velvetLog("Invalid hash length: %s\n", argv[2]); printUsage(); return 0; } if (hashLength % 2 == 0) { velvetLog ("Velvet can't work with even length k-mers, such as %i. We'll use %i instead, if you don't mind.\n", hashLength, hashLength - 1); hashLength--; } if (multiple_kmers) { if (hashLengthMax > MAXKMERLENGTH + 1) { velvetLog ("Velvet can't handle k-mers as long as %i! We'll stick to %i if you don't mind.\n", hashLengthMax, MAXKMERLENGTH + 1); hashLengthMax = MAXKMERLENGTH + 1; } if (hashLengthMax <= hashLength) { velvetLog("hashLengthMin < hashLengthMax is required %s", argv[2]); printUsage(); return 0; } if (hashLengthStep <= 0) { velvetLog("Non-positive hash length! Setting it to 2\n"); hashLengthStep = 2; } if (hashLengthStep % 2 == 1) { velvetLog ("Velvet can't work with an odd length k-mer step, such as %i. We'll use %i instead, if you don't mind.\n", hashLengthStep, hashLengthStep + 1); hashLengthStep++; } } // check if binary sequences should be used int argIndex; for (argIndex = 3; argIndex < argc; argIndex++) if (strcmp(argv[argIndex], "-create_binary") == 0 || strcmp(argv[argIndex], "-reuse_binary") == 0) setCreateBinary(true); for (h = hashLength; h < hashLengthMax; h += hashLengthStep) { resetWordFilter(h); buf = mallocOrExit(2 * strlen(argv[1]) + 500, char); if ( multiple_kmers ) { sprintf(buf,"%s_%d",argv[1],h); directory = mallocOrExit(strlen(buf) + 100, char); strcpy(directory,buf); } else directory = argv[1]; filename = mallocOrExit(strlen(directory) + 100, char); seqFilename = mallocOrExit(strlen(directory) + 100, char); baseSeqName = mallocOrExit(100, char); dir = opendir(directory); if (dir == NULL) mkdir(directory, 0777); else { sprintf(buf, "%s/PreGraph", directory); remove(buf); sprintf(buf, "%s/Graph", directory); remove(buf); sprintf(buf, "%s/Graph2", directory); remove(buf); sprintf(buf, "%s/Graph3", directory); remove(buf); sprintf(buf, "%s/Graph4", directory); remove(buf); } logInstructions(argc, argv, directory); strcpy(seqFilename, directory); if (isCreateBinary()) { // use the CNY unified seq writer strcpy(baseSeqName, "/CnyUnifiedSeq"); // remove other style sequences file sprintf(buf, "%s/Sequences", directory); remove(buf); } else { strcpy(baseSeqName, "/Sequences"); // remove other style sequences file sprintf(buf, "%s/CnyUnifiedSeq", directory); remove(buf); sprintf(buf, "%s/CnyUnifiedSeq.names", directory); remove(buf); } strcat(seqFilename, baseSeqName); if ( h == hashLength ) { parseDataAndReadFiles(seqFilename, argc - 2, &(argv[2]), &double_strand, &noHash); } else { sprintf(buf,"rm -f %s",seqFilename); if (system(buf)) { velvetLog("Command failed!\n"); velvetLog("%s\n", buf); #ifdef DEBUG abort(); #endif exit(1); } if (argv[1][0] == '/') sprintf(buf,"ln -s %s_%d%s %s",argv[1],hashLength,baseSeqName,seqFilename); else sprintf(buf,"ln -s `pwd`/%s_%d%s %s",argv[1],hashLength,baseSeqName,seqFilename); if (system(buf)) { velvetLog("Command failed!\n"); velvetLog("%s\n", buf); #ifdef DEBUG abort(); #endif exit(1); } } if (noHash) continue; splayTable = newSplayTable(h, double_strand); if (isCreateBinary()) { allSequences = importCnyReadSet(seqFilename); } else { allSequences = importReadSet(seqFilename); } velvetLog("%li sequences in total.\n", (long) allSequences->readCount); strcpy(filename, directory); strcat(filename, "/Roadmaps"); inputSequenceArrayIntoSplayTableAndArchive(allSequences, splayTable, filename, seqFilename); destroySplayTable(splayTable); if (dir) closedir(dir); if (directory != argv[1]) free(directory); free(filename); free(seqFilename); free(baseSeqName); free(buf); if (allSequences) { destroyReadSet(allSequences); } }