int main(int argc, char **argv) { ReadSet *sequences = NULL; RoadMapArray *rdmaps; PreGraph *preGraph; Graph *graph; char *directory, *graphFilename, *connectedGraphFilename, *preGraphFilename, *seqFilename, *roadmapFilename, *lowCovContigsFilename, *highCovContigsFilename; double coverageCutoff = -1; double longCoverageCutoff = -1; double maxCoverageCutoff = -1; double expectedCoverage = -1; Coordinate minContigLength = -1; Coordinate minContigKmerLength; boolean *dubious = NULL; Coordinate insertLength[CATEGORIES]; Coordinate insertLengthLong = -1; Coordinate std_dev[CATEGORIES]; Coordinate std_dev_long = -1; short int accelerationBits = 24; boolean readTracking = false; boolean exportAssembly = false; boolean unusedReads = false; boolean estimateCoverage = false; boolean estimateCutoff = false; boolean exportAlignments = false; FILE *file; int arg_index, arg_int; double arg_double; char *arg; ShortLength *sequenceLengths = NULL; Category cat; boolean scaffolding = true; int pebbleRounds = 1; long long longlong_var; short int short_var; boolean exportFilteredNodes = false; int clean = 0; boolean conserveLong = false; boolean shadows[CATEGORIES]; int coverageMask = 1; SequencesReader *seqReadInfo = NULL; setProgramName("velvetg"); for (cat = 0; cat < CATEGORIES; cat++) { insertLength[cat] = -1; std_dev[cat] = -1; shadows[cat] = false; } // Error message if (argc == 1) { puts("velvetg - de Bruijn graph construction, error removal and repeat resolution"); printf("Version %i.%i.%2.2i\n", VERSION_NUMBER, RELEASE_NUMBER, UPDATE_NUMBER); puts("Copyright 2007, 2008 Daniel Zerbino ([email protected])"); puts("This is free software; see the source for copying conditions. There is NO"); puts("warranty; not even for MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE."); puts("Compilation settings:"); printf("CATEGORIES = %i\n", CATEGORIES); printf("MAXKMERLENGTH = %i\n", MAXKMERLENGTH); #ifdef _OPENMP puts("OPENMP"); #endif #ifdef LONGSEQUENCES puts("LONGSEQUENCES"); #endif #ifdef BIGASSEMBLY puts("BIGASSEMBLY"); #endif #ifdef COLOR puts("COLOR"); #endif #ifdef DEBUG puts("DEBUG"); #endif puts(""); printUsage(); return 1; } if (strcmp(argv[1], "--help") == 0) { printUsage(); return 0; } // Memory allocation directory = argv[1]; graphFilename = mallocOrExit(strlen(directory) + 100, char); connectedGraphFilename = mallocOrExit(strlen(directory) + 100, char); preGraphFilename = mallocOrExit(strlen(directory) + 100, char); roadmapFilename = mallocOrExit(strlen(directory) + 100, char); seqFilename = mallocOrExit(strlen(directory) + 100, char); lowCovContigsFilename = mallocOrExit(strlen(directory) + 100, char); highCovContigsFilename = mallocOrExit(strlen(directory) + 100, char); // Argument parsing for (arg_index = 2; arg_index < argc; arg_index++) { arg = argv[arg_index++]; if (arg_index >= argc) { velvetLog("Unusual number of arguments!\n"); printUsage(); #ifdef DEBUG abort(); #endif exit(1); } if (strcmp(arg, "-cov_cutoff") == 0) { if (strcmp(argv[arg_index], "auto") == 0) { estimateCutoff = true; } else { sscanf(argv[arg_index], "%lf", &coverageCutoff); } } else if (strcmp(arg, "-long_cov_cutoff") == 0) { sscanf(argv[arg_index], "%lf", &longCoverageCutoff); } else if (strcmp(arg, "-exp_cov") == 0) { if (strcmp(argv[arg_index], "auto") == 0) { estimateCoverage = true; readTracking = true; } else { sscanf(argv[arg_index], "%lf", &expectedCoverage); if (expectedCoverage > 0) readTracking = true; } } else if (strcmp(arg, "-ins_length") == 0) { sscanf(argv[arg_index], "%lli", &longlong_var); insertLength[0] = (Coordinate) longlong_var; if (insertLength[0] < 0) { velvetLog("Invalid insert length: %lli\n", (long long) insertLength[0]); #ifdef DEBUG abort(); #endif exit(1); } } else if (strcmp(arg, "-ins_length_sd") == 0) { sscanf(argv[arg_index], "%lli", &longlong_var); std_dev[0] = (Coordinate) longlong_var; if (std_dev[0] < 0) { velvetLog("Invalid std deviation: %lli\n", (long long) std_dev[0]); #ifdef DEBUG abort(); #endif exit(1); } } else if (strcmp(arg, "-ins_length_long") == 0) { sscanf(argv[arg_index], "%lli", &longlong_var); insertLengthLong = (Coordinate) longlong_var; } else if (strcmp(arg, "-ins_length_long_sd") == 0) { sscanf(argv[arg_index], "%lli", &longlong_var); std_dev_long = (Coordinate) longlong_var; } else if (strncmp(arg, "-ins_length", 11) == 0 && strchr(arg, 'd') == NULL) { sscanf(arg, "-ins_length%hi", &short_var); cat = (Category) short_var; if (cat < 1 || cat > CATEGORIES) { velvetLog("Unknown option: %s\n", arg); #ifdef DEBUG abort(); #endif exit(1); } sscanf(argv[arg_index], "%lli", &longlong_var); insertLength[cat - 1] = (Coordinate) longlong_var; if (insertLength[cat - 1] < 0) { velvetLog("Invalid insert length: %lli\n", (long long) insertLength[cat - 1]); #ifdef DEBUG abort(); #endif exit(1); } } else if (strncmp(arg, "-ins_length", 11) == 0) { sscanf(arg, "-ins_length%hi_sd", &short_var); cat = (Category) short_var; if (cat < 1 || cat > CATEGORIES) { velvetLog("Unknown option: %s\n", arg); #ifdef DEBUG abort(); #endif exit(1); } sscanf(argv[arg_index], "%lli", &longlong_var); std_dev[cat - 1] = (Coordinate) longlong_var; if (std_dev[cat - 1] < 0) { velvetLog("Invalid std deviation: %lli\n", (long long) std_dev[cat - 1]); #ifdef DEBUG abort(); #endif exit(1); } } else if (strcmp(arg, "-read_trkg") == 0) { readTracking = (strcmp(argv[arg_index], "yes") == 0); } else if (strcmp(arg, "-scaffolding") == 0) { scaffolding = (strcmp(argv[arg_index], "yes") == 0); } else if (strcmp(arg, "-exportFiltered") == 0) { exportFilteredNodes = (strcmp(argv[arg_index], "yes") == 0); } else if (strcmp(arg, "-amos_file") == 0) { exportAssembly = (strcmp(argv[arg_index], "yes") == 0); } else if (strcmp(arg, "-alignments") == 0) { exportAlignments = (strcmp(argv[arg_index], "yes") == 0); } else if (strcmp(arg, "-min_contig_lgth") == 0) { sscanf(argv[arg_index], "%lli", &longlong_var); minContigLength = (Coordinate) longlong_var; } else if (strcmp(arg, "-coverage_mask") == 0) { sscanf(argv[arg_index], "%lli", &longlong_var); coverageMask = (IDnum) longlong_var; } else if (strcmp(arg, "-accel_bits") == 0) { sscanf(argv[arg_index], "%hi", &accelerationBits); if (accelerationBits < 0) { velvetLog ("Illegal acceleration parameter: %s\n", argv[arg_index]); printUsage(); return -1; } } else if (strcmp(arg, "-max_branch_length") == 0) { sscanf(argv[arg_index], "%i", &arg_int); setMaxReadLength(arg_int); setLocalMaxReadLength(arg_int); } else if (strcmp(arg, "-max_divergence") == 0) { sscanf(argv[arg_index], "%lf", &arg_double); setMaxDivergence(arg_double); setLocalMaxDivergence(arg_double); } else if (strcmp(arg, "-max_gap_count") == 0) { sscanf(argv[arg_index], "%i", &arg_int); setMaxGaps(arg_int); setLocalMaxGaps(arg_int); } else if (strcmp(arg, "-min_pair_count") == 0) { sscanf(argv[arg_index], "%i", &arg_int); setUnreliableConnectionCutoff(arg_int); } else if (strcmp(arg, "-max_coverage") == 0) { sscanf(argv[arg_index], "%lf", &maxCoverageCutoff); } else if (strcmp(arg, "-long_mult_cutoff") == 0) { sscanf(argv[arg_index], "%i", &arg_int); setMultiplicityCutoff(arg_int); } else if (strcmp(arg, "-paired_exp_fraction") == 0) { sscanf(argv[arg_index], "%lf", &arg_double); setPairedExpFraction(arg_double); } else if (strcmp(arg, "-clean") == 0) { if (strcmp(argv[arg_index], "yes") == 0) clean = 1; } else if (strcmp(arg, "-very_clean") == 0) { if (strcmp(argv[arg_index], "yes") == 0) clean = 2; } else if (strcmp(arg, "-conserveLong") == 0) { if (strcmp(argv[arg_index], "yes") == 0) conserveLong = 2; } else if (strcmp(arg, "-unused_reads") == 0) { unusedReads = (strcmp(argv[arg_index], "yes") == 0); if (unusedReads) readTracking = true; } else if (strcmp(arg, "-shortMatePaired") == 0) { shadows[0] = (strcmp(argv[arg_index], "yes") == 0); } else if (strncmp(arg, "-shortMatePaired", 16) == 0) { sscanf(arg, "-shortMatePaired%hi", &short_var); cat = (Category) short_var; if (cat < 1 || cat > CATEGORIES) { velvetLog("Unknown option: %s\n", arg); #ifdef DEBUG abort(); #endif exit(1); } shadows[cat - 1] = (strcmp(argv[arg_index], "yes") == 0); } else if (strcmp(arg, "--help") == 0) { printUsage(); return 0; } else { velvetLog("Unknown option: %s;\n", arg); printUsage(); return 1; } } // Bookkeeping logInstructions(argc, argv, directory); seqReadInfo = callocOrExit(1, SequencesReader); strcpy(seqFilename, directory); // if binary CnyUnifiedSeq exists, use it. Otherwise try Sequences strcat(seqFilename, "/CnyUnifiedSeq"); if (access(seqFilename, R_OK) == 0) { seqReadInfo->m_bIsBinary = true; } else { seqReadInfo->m_bIsBinary = false; strcpy(seqFilename, directory); strcat(seqFilename, "/Sequences"); } seqReadInfo->m_seqFilename = seqFilename; strcpy(roadmapFilename, directory); strcat(roadmapFilename, "/Roadmaps"); strcpy(preGraphFilename, directory); strcat(preGraphFilename, "/PreGraph"); strcpy(connectedGraphFilename, directory); strcat(connectedGraphFilename, "/ConnectedGraph"); if (!readTracking) { strcpy(graphFilename, directory); strcat(graphFilename, "/Graph"); } else { strcpy(graphFilename, directory); strcat(graphFilename, "/Graph2"); } strcpy(lowCovContigsFilename, directory); strcat(lowCovContigsFilename, "/lowCoverageContigs.fa"); strcpy(highCovContigsFilename, directory); strcat(highCovContigsFilename, "/highCoverageContigs.fa"); // Graph uploading or creation if ((file = fopen(graphFilename, "r")) != NULL) { fclose(file); graph = importGraph(graphFilename); } else if ((file = fopen(connectedGraphFilename, "r")) != NULL) { fclose(file); if (seqReadInfo->m_bIsBinary) { sequences = importCnyReadSet(seqFilename); #if 0 // compare to velvet's version of a seq ReadSet *compareSequences = NULL; compareSeqFilename = mallocOrExit(strlen(directory) + 100, char); strcpy(compareSeqFilename, directory); strcat(compareSeqFilename, "/Sequences"); compareSequences = importReadSet(compareSeqFilename); convertSequences(compareSequences); if (sequences->readCount != compareSequences->readCount) { printf("read count mismatch\n"); exit(1); } int i; for (i = 0; i < sequences->readCount; i++) { TightString *tString = getTightStringInArray(sequences->tSequences, i); TightString *tStringCmp = getTightStringInArray(compareSequences->tSequences, i); if (getLength(tString) != getLength(tStringCmp)) { printf("sequence %d len mismatch\n", i); exit(1); } if (strcmp(readTightString(tString), readTightString(tStringCmp)) != 0) { printf("sequence %d cmp mismatch\n", i); printf("seq %s != cmp %s\n", readTightString(tString), readTightString(tStringCmp)); exit(1); } } #endif } else {
int main(int argc, char **argv) { ReadSet *sequences = NULL; RoadMapArray *rdmaps; PreGraph *preGraph; Graph *graph; char *directory, *graphFilename, *preGraphFilename, *seqFilename, *roadmapFilename; double coverageCutoff = -1; double maxCoverageCutoff = -1; double expectedCoverage = -1; int longMultCutoff = -1; Coordinate minContigLength = -1; Coordinate minContigKmerLength; boolean *dubious = NULL; Coordinate insertLength[CATEGORIES]; Coordinate insertLengthLong = -1; Coordinate std_dev[CATEGORIES]; Coordinate std_dev_long = -1; short int accelerationBits = 24; boolean readTracking = false; boolean exportAssembly = false; boolean unusedReads = false; boolean estimateCoverage = false; boolean estimateCutoff = false; FILE *file; int arg_index, arg_int; double arg_double; char *arg; Coordinate *sequenceLengths = NULL; Category cat; boolean scaffolding = true; int pebbleRounds = 1; long long longlong_var; short int short_var; setProgramName("velvetg"); for (cat = 0; cat < CATEGORIES; cat++) { insertLength[cat] = -1; std_dev[cat] = -1; } // Error message if (argc == 1) { puts("velvetg - de Bruijn graph construction, error removal and repeat resolution"); printf("Version %i.%i.%2.2i\n", VERSION_NUMBER, RELEASE_NUMBER, UPDATE_NUMBER); puts("\nCopyright 2007, 2008 Daniel Zerbino ([email protected])"); puts("This is free software; see the source for copying conditions. There is NO"); puts("warranty; not even for MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.\n"); puts("Compilation settings:"); printf("CATEGORIES = %i\n", CATEGORIES); printf("MAXKMERLENGTH = %i\n", MAXKMERLENGTH); puts(""); printUsage(); return 1; } if (strcmp(argv[1], "--help") == 0) { printUsage(); return 0; } // Memory allocation directory = argv[1]; graphFilename = mallocOrExit(strlen(directory) + 100, char); preGraphFilename = mallocOrExit(strlen(directory) + 100, char); roadmapFilename = mallocOrExit(strlen(directory) + 100, char); seqFilename = mallocOrExit(strlen(directory) + 100, char); // Argument parsing for (arg_index = 2; arg_index < argc; arg_index++) { arg = argv[arg_index++]; if (arg_index >= argc) { puts("Unusual number of arguments!"); printUsage(); exit(1); } if (strcmp(arg, "-cov_cutoff") == 0) { if (strcmp(argv[arg_index], "auto") == 0) { estimateCutoff = true; } else { sscanf(argv[arg_index], "%lf", &coverageCutoff); } } else if (strcmp(arg, "-exp_cov") == 0) { if (strcmp(argv[arg_index], "auto") == 0) { estimateCoverage = true; readTracking = true; } else { sscanf(argv[arg_index], "%lf", &expectedCoverage); if (expectedCoverage > 0) readTracking = true; } } else if (strcmp(arg, "-ins_length") == 0) { sscanf(argv[arg_index], "%lli", &longlong_var); insertLength[0] = (Coordinate) longlong_var; if (insertLength[0] < 0) { printf("Invalid insert length: %lli\n", (long long) insertLength[0]); exit(1); } } else if (strcmp(arg, "-ins_length_sd") == 0) { sscanf(argv[arg_index], "%lli", &longlong_var); std_dev[0] = (Coordinate) longlong_var; if (std_dev[0] < 0) { printf("Invalid std deviation: %lli\n", (long long) std_dev[0]); exit(1); } } else if (strcmp(arg, "-ins_length_long") == 0) { sscanf(argv[arg_index], "%lli", &longlong_var); insertLengthLong = (Coordinate) longlong_var; } else if (strcmp(arg, "-ins_length_long_sd") == 0) { sscanf(argv[arg_index], "%lli", &longlong_var); std_dev_long = (Coordinate) longlong_var; } else if (strncmp(arg, "-ins_length", 11) == 0 && strchr(arg, 'd') == NULL) { sscanf(arg, "-ins_length%hi", &short_var); cat = (Category) short_var; if (cat < 1 || cat > CATEGORIES) { printf("Unknown option: %s\n", arg); exit(1); } sscanf(argv[arg_index], "%lli", &longlong_var); insertLength[cat - 1] = (Coordinate) longlong_var; if (insertLength[cat - 1] < 0) { printf("Invalid insert length: %lli\n", (long long) insertLength[cat - 1]); exit(1); } } else if (strncmp(arg, "-ins_length", 11) == 0) { sscanf(arg, "-ins_length%hi_sd", &short_var); cat = (Category) short_var; if (cat < 1 || cat > CATEGORIES) { printf("Unknown option: %s\n", arg); exit(1); } sscanf(argv[arg_index], "%lli", &longlong_var); std_dev[cat - 1] = (Coordinate) longlong_var; if (std_dev[cat - 1] < 0) { printf("Invalid std deviation: %lli\n", (long long) std_dev[cat - 1]); exit(1); } } else if (strcmp(arg, "-read_trkg") == 0) { readTracking = (strcmp(argv[arg_index], "yes") == 0); } else if (strcmp(arg, "-scaffolding") == 0) { scaffolding = (strcmp(argv[arg_index], "yes") == 0); } else if (strcmp(arg, "-amos_file") == 0) { exportAssembly = (strcmp(argv[arg_index], "yes") == 0); } else if (strcmp(arg, "-min_contig_lgth") == 0) { sscanf(argv[arg_index], "%lli", &longlong_var); minContigLength = (Coordinate) longlong_var; } else if (strcmp(arg, "-accel_bits") == 0) { sscanf(argv[arg_index], "%hi", &accelerationBits); if (accelerationBits < 0) { printf ("Illegal acceleration parameter: %s\n", argv[arg_index]); printUsage(); return -1; } } else if (strcmp(arg, "-max_branch_length") == 0) { sscanf(argv[arg_index], "%i", &arg_int); setMaxReadLength(arg_int); setLocalMaxReadLength(arg_int); } else if (strcmp(arg, "-max_divergence") == 0) { sscanf(argv[arg_index], "%lf", &arg_double); setMaxDivergence(arg_double); setLocalMaxDivergence(arg_double); } else if (strcmp(arg, "-max_gap_count") == 0) { sscanf(argv[arg_index], "%i", &arg_int); setMaxGaps(arg_int); setLocalMaxGaps(arg_int); } else if (strcmp(arg, "-min_pair_count") == 0) { sscanf(argv[arg_index], "%i", &arg_int); setUnreliableConnectionCutoff(arg_int); } else if (strcmp(arg, "-max_coverage") == 0) { sscanf(argv[arg_index], "%lf", &maxCoverageCutoff); } else if (strcmp(arg, "-long_mult_cutoff") == 0) { sscanf(argv[arg_index], "%i", &longMultCutoff); setMultiplicityCutoff(longMultCutoff); } else if (strcmp(arg, "-unused_reads") == 0) { unusedReads = (strcmp(argv[arg_index], "yes") == 0); if (unusedReads) readTracking = true; } else if (strcmp(arg, "--help") == 0) { printUsage(); return 0; } else { printf("Unknown option: %s;\n", arg); printUsage(); return 1; } } // Bookkeeping logInstructions(argc, argv, directory); strcpy(seqFilename, directory); strcat(seqFilename, "/Sequences"); strcpy(roadmapFilename, directory); strcat(roadmapFilename, "/Roadmaps"); strcpy(preGraphFilename, directory); strcat(preGraphFilename, "/PreGraph"); if (!readTracking) { strcpy(graphFilename, directory); strcat(graphFilename, "/Graph"); } else { strcpy(graphFilename, directory); strcat(graphFilename, "/Graph2"); } // Graph uploading or creation if ((file = fopen(graphFilename, "r")) != NULL) { fclose(file); graph = importGraph(graphFilename); } else if ((file = fopen(preGraphFilename, "r")) != NULL) { fclose(file); sequences = importReadSet(seqFilename); convertSequences(sequences); graph = importPreGraph(preGraphFilename, sequences, readTracking, accelerationBits); sequenceLengths = getSequenceLengths(sequences, getWordLength(graph)); correctGraph(graph, sequenceLengths); exportGraph(graphFilename, graph, sequences->tSequences); } else if ((file = fopen(roadmapFilename, "r")) != NULL) { fclose(file); rdmaps = importRoadMapArray(roadmapFilename); preGraph = newPreGraph_pg(rdmaps, seqFilename); clipTips_pg(preGraph); exportPreGraph_pg(preGraphFilename, preGraph); destroyPreGraph_pg(preGraph); sequences = importReadSet(seqFilename); convertSequences(sequences); graph = importPreGraph(preGraphFilename, sequences, readTracking, accelerationBits); sequenceLengths = getSequenceLengths(sequences, getWordLength(graph)); correctGraph(graph, sequenceLengths); exportGraph(graphFilename, graph, sequences->tSequences); } else { puts("No Roadmap file to build upon! Please run velveth (see manual)"); exit(1); } // Set insert lengths and their standard deviations for (cat = 0; cat < CATEGORIES; cat++) { if (insertLength[cat] > -1 && std_dev[cat] < 0) std_dev[cat] = insertLength[cat] / 10; setInsertLengths(graph, cat, insertLength[cat], std_dev[cat]); } if (insertLengthLong > -1 && std_dev_long < 0) std_dev_long = insertLengthLong / 10; setInsertLengths(graph, CATEGORIES, insertLengthLong, std_dev_long); // Coverage cutoff if (expectedCoverage < 0 && estimateCoverage == true) { expectedCoverage = estimated_cov(graph); if (coverageCutoff < 0) { coverageCutoff = expectedCoverage / 2; estimateCutoff = true; } } else { estimateCoverage = false; if (coverageCutoff < 0 && estimateCutoff) coverageCutoff = estimated_cov(graph) / 2; else estimateCutoff = false; } if (coverageCutoff < 0) { puts("WARNING: NO COVERAGE CUTOFF PROVIDED"); puts("Velvet will probably leave behind many detectable errors"); puts("See manual for instructions on how to set the coverage cutoff parameter"); } dubious = removeLowCoverageNodesAndDenounceDubiousReads(graph, coverageCutoff); removeHighCoverageNodes(graph, maxCoverageCutoff); clipTipsHard(graph); if (expectedCoverage > 0) { if (sequences == NULL) { sequences = importReadSet(seqFilename); convertSequences(sequences); } // Mixed length sequencing readCoherentGraph(graph, isUniqueSolexa, expectedCoverage, sequences); // Paired ends module createReadPairingArray(sequences); for (cat = 0; cat < CATEGORIES; cat++) if(pairUpReads(sequences, 2 * cat + 1)) pebbleRounds++; if (pairUpReads(sequences, 2 * CATEGORIES + 1)) pebbleRounds++; detachDubiousReads(sequences, dubious); activateGapMarkers(graph); for ( ;pebbleRounds > 0; pebbleRounds--) exploitShortReadPairs(graph, sequences, dubious, scaffolding); } else { puts("WARNING: NO EXPECTED COVERAGE PROVIDED"); puts("Velvet will be unable to resolve any repeats"); puts("See manual for instructions on how to set the expected coverage parameter"); } free(dubious); concatenateGraph(graph); if (minContigLength < 2 * getWordLength(graph)) minContigKmerLength = getWordLength(graph); else minContigKmerLength = minContigLength - getWordLength(graph) + 1; strcpy(graphFilename, directory); strcat(graphFilename, "/contigs.fa"); exportLongNodeSequences(graphFilename, graph, minContigKmerLength); strcpy(graphFilename, directory); strcat(graphFilename, "/stats.txt"); displayGeneralStatistics(graph, graphFilename); if (sequences == NULL) { sequences = importReadSet(seqFilename); convertSequences(sequences); } strcpy(graphFilename, directory); strcat(graphFilename, "/LastGraph"); exportGraph(graphFilename, graph, sequences->tSequences); if (exportAssembly) { strcpy(graphFilename, directory); strcat(graphFilename, "/velvet_asm.afg"); exportAMOSContigs(graphFilename, graph, minContigKmerLength, sequences); } if (unusedReads) exportUnusedReads(graph, sequences, minContigKmerLength, directory); if (estimateCoverage) printf("Estimated Coverage = %f\n", expectedCoverage); if (estimateCutoff) printf("Estimated Coverage cutoff = %f\n", coverageCutoff); logFinalStats(graph, minContigKmerLength, directory); destroyGraph(graph); free(graphFilename); free(preGraphFilename); free(seqFilename); free(roadmapFilename); destroyReadSet(sequences); return 0; }
int main(int argc, char **argv) { ReadSet *allSequences = NULL; SplayTable *splayTable; int hashLength, hashLengthStep, hashLengthMax, h; char *directory, *filename, *seqFilename, *buf; boolean double_strand = true; boolean multiple_kmers = false; DIR *dir; setProgramName("velveth"); if (argc < 4) { printf("velveth - simple hashing program\n"); printf("Version %i.%i.%2.2i\n", VERSION_NUMBER, RELEASE_NUMBER, UPDATE_NUMBER); printf("\nCopyright 2007, 2008 Daniel Zerbino ([email protected])\n"); printf("This is free software; see the source for copying conditions. There is NO\n"); printf("warranty; not even for MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.\n\n"); printf("Compilation settings:\n"); printf("CATEGORIES = %i\n", CATEGORIES); printf("MAXKMERLENGTH = %i\n", MAXKMERLENGTH); printf("\n"); printUsage(); return 0; } if ( strstr(argv[2],"," ) ) { sscanf(argv[2],"%d,%d,%d",&hashLength,&hashLengthMax,&hashLengthStep); multiple_kmers = true; } else { hashLength = atoi(argv[2]); hashLengthMax = hashLength + 1; hashLengthStep = 2; } if (hashLengthMax > MAXKMERLENGTH) { velvetLog ("Velvet can't handle k-mers as long as %i! We'll stick to %i if you don't mind.\n", hashLength, MAXKMERLENGTH); hashLength = MAXKMERLENGTH; } else if (hashLength <= 0) { velvetLog("Invalid hash length: %s\n", argv[2]); printUsage(); return 0; } else if ( hashLength > hashLengthMax ) { velvetLog("hashLengthMin <= hashLengthMax is required %s", argv[2]); printUsage(); return 0; } if (hashLength % 2 == 0) { velvetLog ("Velvet can't work with even length k-mers, such as %i. We'll use %i instead, if you don't mind.\n", hashLength, hashLength - 1); hashLength--; } if (hashLengthStep % 2 == 1) { velvetLog ("Velvet can't work with an odd length k-mer step, such as %i. We'll use %i instead, if you don't mind.\n", hashLengthStep, hashLengthStep - 1); hashLengthStep--; } for (h = hashLength; h < hashLengthMax; h += hashLengthStep) { resetWordFilter(h); buf = mallocOrExit(strlen(argv[1]) + 100, char); if ( multiple_kmers ) { sprintf(buf,"%s_%d",argv[1],h); directory = mallocOrExit(strlen(buf) + 100, char); strcpy(directory,buf); } else directory = argv[1]; filename = mallocOrExit(strlen(directory) + 100, char); seqFilename = mallocOrExit(strlen(directory) + 100, char); dir = opendir(directory); if (dir == NULL) mkdir(directory, 0777); else { sprintf(buf, "%s/PreGraph", directory); remove(buf); sprintf(buf, "%s/Graph", directory); remove(buf); sprintf(buf, "%s/Graph2", directory); remove(buf); sprintf(buf, "%s/Graph3", directory); remove(buf); sprintf(buf, "%s/Graph4", directory); remove(buf); sprintf(buf, "%s/Log", directory); remove(buf); } logInstructions(argc, argv, directory); strcpy(seqFilename, directory); strcat(seqFilename, "/Sequences"); if ( h == hashLength ) { parseDataAndReadFiles(seqFilename, argc - 2, &(argv[2]), &double_strand); } else { sprintf(buf,"ln -s ../%s_%d/Sequences %s",argv[1],hashLength,seqFilename); system(buf); } splayTable = newSplayTable(h, double_strand); if (!allSequences) allSequences = importReadSet(seqFilename); velvetLog("%li sequences in total.\n", (long) allSequences->readCount); strcpy(filename, directory); strcat(filename, "/Roadmaps"); inputSequenceArrayIntoSplayTableAndArchive(allSequences, splayTable, filename, seqFilename); destroySplayTable(splayTable); if (dir) closedir(dir); if (directory != argv[1]) free(directory); free(filename); free(seqFilename); free(buf); }
int main(int argc, char **argv) { ReadSet *allSequences = NULL; SplayTable *splayTable; int hashLength, hashLengthStep, hashLengthMax, h; char *directory, *filename, *seqFilename, *baseSeqName, *buf; char * token; boolean double_strand = true; boolean noHash = false; boolean multiple_kmers = false; char buffer[100]; DIR *dir; setProgramName("velveth"); if (argc < 4) { printf("velveth - simple hashing program\n"); printf("Version %i.%i.%2.2i\n", VERSION_NUMBER, RELEASE_NUMBER, UPDATE_NUMBER); printf("\nCopyright 2007, 2008 Daniel Zerbino ([email protected])\n"); printf("This is free software; see the source for copying conditions. There is NO\n"); printf("warranty; not even for MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.\n\n"); printf("Compilation settings:\n"); printf("CATEGORIES = %i\n", CATEGORIES); printf("MAXKMERLENGTH = %i\n", MAXKMERLENGTH); #ifdef _OPENMP puts("OPENMP"); #endif #ifdef LONGSEQUENCES puts("LONGSEQUENCES"); #endif #ifdef BIGASSEMBLY puts("BIGASSEMBLY"); #endif #ifdef COLOR puts("COLOR"); #endif #ifdef DEBUG puts("DEBUG"); #endif printf("\n"); printUsage(); return 0; } strcpy(buffer, argv[2]); token = strtok(buffer, ","); hashLength = atoi(token); token = strtok(NULL, ","); if (token == NULL) { multiple_kmers = false; hashLengthMax = hashLength + 1; } else { multiple_kmers = true; hashLengthMax = atoi(token); } token = strtok(NULL, ","); if (token == NULL) { hashLengthStep = 2; } else { hashLengthStep = atoi(token); } if (hashLength > MAXKMERLENGTH) { velvetLog ("Velvet can't handle k-mers as long as %i! We'll stick to %i if you don't mind.\n", hashLength, MAXKMERLENGTH); hashLength = MAXKMERLENGTH; } if (hashLength <= 0) { velvetLog("Invalid hash length: %s\n", argv[2]); printUsage(); return 0; } if (hashLength % 2 == 0) { velvetLog ("Velvet can't work with even length k-mers, such as %i. We'll use %i instead, if you don't mind.\n", hashLength, hashLength - 1); hashLength--; } if (multiple_kmers) { if (hashLengthMax > MAXKMERLENGTH + 1) { velvetLog ("Velvet can't handle k-mers as long as %i! We'll stick to %i if you don't mind.\n", hashLengthMax, MAXKMERLENGTH + 1); hashLengthMax = MAXKMERLENGTH + 1; } if (hashLengthMax <= hashLength) { velvetLog("hashLengthMin < hashLengthMax is required %s", argv[2]); printUsage(); return 0; } if (hashLengthStep <= 0) { velvetLog("Non-positive hash length! Setting it to 2\n"); hashLengthStep = 2; } if (hashLengthStep % 2 == 1) { velvetLog ("Velvet can't work with an odd length k-mer step, such as %i. We'll use %i instead, if you don't mind.\n", hashLengthStep, hashLengthStep + 1); hashLengthStep++; } } // check if binary sequences should be used int argIndex; for (argIndex = 3; argIndex < argc; argIndex++) if (strcmp(argv[argIndex], "-create_binary") == 0 || strcmp(argv[argIndex], "-reuse_binary") == 0) setCreateBinary(true); for (h = hashLength; h < hashLengthMax; h += hashLengthStep) { resetWordFilter(h); buf = mallocOrExit(2 * strlen(argv[1]) + 500, char); if ( multiple_kmers ) { sprintf(buf,"%s_%d",argv[1],h); directory = mallocOrExit(strlen(buf) + 100, char); strcpy(directory,buf); } else directory = argv[1]; filename = mallocOrExit(strlen(directory) + 100, char); seqFilename = mallocOrExit(strlen(directory) + 100, char); baseSeqName = mallocOrExit(100, char); dir = opendir(directory); if (dir == NULL) mkdir(directory, 0777); else { sprintf(buf, "%s/PreGraph", directory); remove(buf); sprintf(buf, "%s/Graph", directory); remove(buf); sprintf(buf, "%s/Graph2", directory); remove(buf); sprintf(buf, "%s/Graph3", directory); remove(buf); sprintf(buf, "%s/Graph4", directory); remove(buf); } logInstructions(argc, argv, directory); strcpy(seqFilename, directory); if (isCreateBinary()) { // use the CNY unified seq writer strcpy(baseSeqName, "/CnyUnifiedSeq"); // remove other style sequences file sprintf(buf, "%s/Sequences", directory); remove(buf); } else { strcpy(baseSeqName, "/Sequences"); // remove other style sequences file sprintf(buf, "%s/CnyUnifiedSeq", directory); remove(buf); sprintf(buf, "%s/CnyUnifiedSeq.names", directory); remove(buf); } strcat(seqFilename, baseSeqName); if ( h == hashLength ) { parseDataAndReadFiles(seqFilename, argc - 2, &(argv[2]), &double_strand, &noHash); } else { sprintf(buf,"rm -f %s",seqFilename); if (system(buf)) { velvetLog("Command failed!\n"); velvetLog("%s\n", buf); #ifdef DEBUG abort(); #endif exit(1); } if (argv[1][0] == '/') sprintf(buf,"ln -s %s_%d%s %s",argv[1],hashLength,baseSeqName,seqFilename); else sprintf(buf,"ln -s `pwd`/%s_%d%s %s",argv[1],hashLength,baseSeqName,seqFilename); if (system(buf)) { velvetLog("Command failed!\n"); velvetLog("%s\n", buf); #ifdef DEBUG abort(); #endif exit(1); } } if (noHash) continue; splayTable = newSplayTable(h, double_strand); if (isCreateBinary()) { allSequences = importCnyReadSet(seqFilename); } else { allSequences = importReadSet(seqFilename); } velvetLog("%li sequences in total.\n", (long) allSequences->readCount); strcpy(filename, directory); strcat(filename, "/Roadmaps"); inputSequenceArrayIntoSplayTableAndArchive(allSequences, splayTable, filename, seqFilename); destroySplayTable(splayTable); if (dir) closedir(dir); if (directory != argv[1]) free(directory); free(filename); free(seqFilename); free(baseSeqName); free(buf); if (allSequences) { destroyReadSet(allSequences); } }