void ParsePhase2Option(int argc, char *argv[]) { OptionsDescription desc; desc.AddOption("host_mem", "", phase2_options.host_mem, "memory to be used. No more than 95% of the free memory is recommended. 0 for auto detect."); desc.AddOption("gpu_mem", "", phase2_options.gpu_mem, "gpu memory to be used. 0 for auto detect."); desc.AddOption("num_cpu_threads", "t", phase2_options.num_cpu_threads, "number of CPU threads. At least 2."); desc.AddOption("num_output_threads", "", phase2_options.num_output_threads, "number of threads for output. Must be less than num_cpu_threads"); desc.AddOption("input_prefix", "", phase2_options.input_prefix, "files input_prefix.edges.* output by count module, can be gzip'ed."); desc.AddOption("num_edge_files", "", phase2_options.num_edge_files, "the number of files with name input_prefix.edges.*"); desc.AddOption("output_prefix", "o", phase2_options.output_prefix, "output prefix"); desc.AddOption("need_mercy", "", phase2_options.need_mercy, "to add mercy edges. The file input_prefix.cand output by count module should exist."); desc.AddOption("max_read_length", "", phase2_options.max_read_length, "max read length"); desc.AddOption("mem_flag", "", phase2_options.mem_flag, "memory options. 0: minimize memory usage; 1: automatically use moderate memory; other: use all available mem specified by '--host_mem'"); try { desc.Parse(argc, argv); if (phase2_options.input_prefix == "") { throw std::logic_error("No input prefix!"); } if (phase2_options.num_edge_files == 0) { throw std::logic_error("Number of edge files cannot be 0!"); } if (phase2_options.num_cpu_threads == 0) { phase2_options.num_cpu_threads = omp_get_max_threads(); } if (phase2_options.num_output_threads == 0) { phase2_options.num_output_threads = std::max(1, phase2_options.num_cpu_threads / 3); } if (phase2_options.host_mem == 0) { throw std::logic_error("Please specify the host memory!"); } if (phase2_options.gpu_mem == 0) { #ifndef DISABLE_GPU size_t free_gpu_mem, total_gpu_mem; get_cuda_memory(free_gpu_mem, total_gpu_mem); phase2_options.gpu_mem = free_gpu_mem; #else // we "simulate" the GTX680 here phase2_options.gpu_mem = 4243689472ULL; #endif } if (phase2_options.num_cpu_threads == 1) { throw std::logic_error("Number of CPU threads is at least 2!"); } if (phase2_options.num_output_threads >= phase2_options.num_cpu_threads) { throw std::logic_error("Number of output threads must be less than number of CPU threads!"); } } catch (std::exception &e) { std::cerr << e.what() << std::endl; std::cerr << "Usage: builder build --input_prefix input --num_edge_files num -o out" << std::endl; std::cerr << "Options:" << std::endl; std::cerr << desc << std::endl; exit(1); } }
void ParseOption(int argc, char *argv[]) { OptionsDescription desc; desc.AddOption("sdbg_name", "s", options.sdbg_name, "succinct de Bruijn graph name"); desc.AddOption("output_prefix", "o", options.output_prefix, "output prefix"); desc.AddOption("num_cpu_threads", "t", options.num_cpu_threads, "number of cpu threads"); desc.AddOption("max_tip_len", "", options.max_tip_len, "max length for tips to be removed. -1 for 2k"); desc.AddOption("min_final_contig_len", "", options.min_final_contig_len, "min length to output a final contig"); desc.AddOption("no_bubble", "", options.no_bubble, "do not remove bubbles"); desc.AddOption("bubble_remove_ratio", "", options.bubble_remove_ratio, "bubbles with multiplicities lower than this ratio times to highest of its group will be removed"); desc.AddOption("remove_low_local", "", options.remove_low_local, "remove low local depth contigs progressively"); desc.AddOption("low_local_ratio", "", options.low_local_ratio, "ratio to define low depth contigs"); desc.AddOption("is_final_round", "", options.is_final_round, "this is the last iteration"); try { desc.Parse(argc, argv); if (options.sdbg_name == "") { throw std::logic_error("no succinct de Bruijn graph name!"); } } catch (std::exception &e) { std::cerr << e.what() << std::endl; std::cerr << "Usage: " << argv[0] << " -s sdbg_name -o output_prefix" << std::endl; std::cerr << "options:" << std::endl; std::cerr << desc << std::endl; exit(1); } }
void ParsePhase1Option(int argc, char *argv[]) { OptionsDescription desc; desc.AddOption("kmer_k", "k", phase1_options.kmer_k, "kmer size"); desc.AddOption("min_kmer_frequency", "m", phase1_options.min_edge_freq, "min frequency to output an edge"); desc.AddOption("host_mem", "", phase1_options.host_mem, "memory to be used. No more than 95% of the free memory is recommended. 0 for auto detect."); desc.AddOption("gpu_mem", "", phase1_options.gpu_mem, "gpu memory to be used. 0 for auto detect."); desc.AddOption("max_read_length", "", phase1_options.max_read_length, "max read length"); desc.AddOption("num_cpu_threads", "", phase1_options.num_cpu_threads, "number of CPU threads. At least 2."); desc.AddOption("num_output_threads", "", phase1_options.num_output_threads, "number of threads for output. Must be less than num_cpu_threads"); desc.AddOption("input_file", "", phase1_options.input_file, "input fastx file, can be gzip'ed. \"-\" for stdin."); desc.AddOption("output_prefix", "", phase1_options.output_prefix, "output prefix"); try { desc.Parse(argc, argv); if (phase1_options.input_file == "") { throw std::logic_error("No input file!"); } if (phase1_options.num_cpu_threads == 0) { phase1_options.num_cpu_threads = omp_get_max_threads(); } if (phase1_options.num_output_threads == 0) { phase1_options.num_output_threads = std::max(1, phase1_options.num_cpu_threads / 3); } if (phase1_options.host_mem == 0) { throw std::logic_error("Please specify the host memory!"); // struct sysinfo s_info; // sysinfo(&s_info); // phase1_options.host_mem = (s_info.freeram + s_info.bufferram) * 0.95; } if (phase1_options.gpu_mem == 0) { #ifndef DISABLE_GPU size_t free_gpu_mem, total_gpu_mem; get_cuda_memory(free_gpu_mem, total_gpu_mem); phase1_options.gpu_mem = free_gpu_mem; #else // we "simulate" the GTX680 here phase1_options.gpu_mem = 4243689472ULL; #endif } if (phase1_options.num_cpu_threads == 1) { throw std::logic_error("Number of CPU threads is at least 2!"); } if (phase1_options.num_output_threads >= phase1_options.num_cpu_threads) { throw std::logic_error("Number of output threads must be less than number of CPU threads!"); } } catch (std::exception &e) { std::cerr << e.what() << std::endl; std::cerr << "Usage: builder count --input_file fastx_file -o out" << std::endl; std::cerr << "Options:" << std::endl; std::cerr << desc << std::endl; exit(1); } }
int main(int argc, char *argv[]) { int kmer_size = 50; int max_length = 1000000; OptionsDescription desc; desc.AddOption("kmer", "k", kmer_size, "k value"); desc.AddOption("max_length", "", max_length, "max length"); desc.Parse(argc, argv); deque<Sequence> refs; ReadSequence(argv[1], refs); HashGraph hash_graph(kmer_size); for (unsigned i = 0; i < refs.size(); ++i) { if ((int)refs[i].size() > max_length) refs[i].resize(max_length); hash_graph.InsertKmers(refs[i]); } hash_graph.Refresh(); hash_graph.AddAllEdges(); deque<Sequence> contigs; deque<ContigInfo> contig_infos; hash_graph.Assemble(contigs, contig_infos); cerr << "build" << endl; ContigGraph contig_graph(kmer_size); contig_graph.Initialize(contigs, contig_infos); cerr << "kmer " << hash_graph.num_vertices() << " branches " << contigs.size()<< endl; deque<deque<ContigGraphVertexAdaptor> > components; deque<string> component_strings; contig_graph.GetComponents(components, component_strings); for (unsigned i = 0; i < component_strings.size(); ++i) cout << component_strings[i] << endl; //FastaWriter writer(argv[2]); WriteSequence(argv[2], contigs, "conitg"); return 0; }
int main(int argc, char *argv[]) { OptionsDescription desc; desc.AddOption("paired", "", is_paired, "if the reads are paired-end in one file"); desc.AddOption("merge", "", is_merged, "if the reads are paired-end in two files"); desc.AddOption("filter", "", is_filtered, "filter out reads containing 'N'"); try { desc.Parse(argc, argv); if (argc < 3) throw logic_error("not enough parameters"); } catch (exception &e) { cerr << e.what() << endl; cerr << "fq2fa - Convert Fastq sequences to Fasta sequences." << endl; cerr << "Usage: fq2fa tmp.fq tmp.fa [...] " << endl; cerr << " fq2fa --paired tmp.fq tmp.fa" << endl; cerr << " fq2fa --merge tmp_1.fq tmp_2.fq tmp.fa" << endl; cerr << "Allowed Options: " << endl; cerr << desc << endl; exit(1); } FastaReader reader(argv[1]); FastqWriter writer(argv[2]); Sequence seq; string comment; while (reader.Read(seq, comment)) { string quality; quality.append(seq.size(), 33 + 40); writer.Write(seq, comment, quality); } return 0; }
int main(int argc, char *argv[]) { desc.AddOption("num_threads", "", num_threads, "number of threads"); desc.AddOption("similar", "", similar, "similarity"); try { desc.Parse(argc, argv); if (argc < 3) throw logic_error("not enough parameters"); } catch (exception &e) { cerr << e.what() << endl; cerr << "parallel_blat - use blat to alignment parallely." << endl; cerr << "Usage: parallel_blat ref.fa query.fa" << endl; cerr << "Allowed Options: " << endl; cerr << desc << endl; exit(1); } ref_filename = argv[1]; query_filename = argv[2]; split_files.resize(num_threads); for (int i = 0; i < num_threads; ++i) split_files[i] = FormatString("%s.split%d", query_filename.c_str(), i); CreateFile(query_filename + ".blat"); deque<string> options; // options.push_back(" -noHead -tileSize=18 -minMatch=40 -maxGap=0 -maxIntron=1000 -minIdentity=95 -minScore=100 "); // options.push_back(" -noHead -tileSize=18 -minMatch=15 -maxGap=0 -maxIntron=1000 -minIdentity=95 -minScore=100 "); // options.push_back(" -noHead -tileSize=18 -minMatch=4 "); options.push_back(" -noHead "); for (unsigned i = 0; i < options.size(); ++i) ParallelBlat(options[i]); return 0; }
int main(int argc, char *argv[]) { int min_contig = 100; double similar = 0.95; double complete_rate = 0.8; bool is_local = false; OptionsDescription desc; desc.AddOption("min_contig", "", min_contig, "minimum contigs"); desc.AddOption("similar", "", similar, "similarity"); desc.AddOption("complete_rate", "", complete_rate, "completeness"); desc.AddOption("is_local", "", is_local, "local align"); try { desc.Parse(argc, argv); } catch (exception &e) { cerr << e.what() << endl; cerr << "validate_contigs_blat - validate contigs by blat." << endl; cerr << "Usage: validate_contigs_blat ref.fa contigs.fa." << endl; cerr << "Allowed Options: " << endl; cerr << desc << endl; exit(1); } deque<Sequence> refs; deque<string> ref_names; ReadSequence(argv[1], refs, ref_names); deque<Sequence> contigs; deque<string> contig_names; ReadSequence(argv[2], contigs, contig_names); vector<int> is_found(refs.size()); vector<vector<double> > flags(refs.size()); map<string, int> dict; for (unsigned i = 0; i < refs.size(); ++i) { flags[i].resize(refs[i].size(), false); size_t index = ref_names[i].find(' '); if (index != string::npos) ref_names[i].resize(index); dict[ref_names[i]] = i; } int num_gaps = 0; for (unsigned i = 0; i < contigs.size(); ++i) { size_t index = contig_names[i].find(' '); if (index != string::npos) contig_names[i].resize(index); bool is_new_gap = true; for (unsigned j = 0; j < contigs[i].size(); ++j) { if (contigs[i][j] == 4) { if (is_new_gap) { is_new_gap = false; ++num_gaps; } } else is_new_gap = true; } } string blat_file = string(argv[2]) + ".blat"; FILE *fblat = OpenFile(blat_file, "rb"); map<string, int> valid_contigs; deque<int> valid_lengths; int64_t num_mismatch = 0; while (fgets(line, MaxLine, fblat) != NULL) { BlatRecord record; record.Parse(line); deque<BlatRecord> records; records.push_back(record); while (fgets(line, MaxLine, fblat) != NULL) { record.Parse(line); if (record.query_name == records.back().query_name) records.push_back(record); else { fseek(fblat, -strlen(line), SEEK_CUR); break; } } int index = 0; for (unsigned i = 0; i < records.size(); ++i) { if (records[i].match_count > similar * records[i].query_length && records[i].match_count > similar * abs(record.ref_to - record.ref_from)) records[index++] = records[i]; } records.resize(index); for (unsigned i = 0; i < records.size(); ++i) { record = records[i]; int ref_id = dict[record.ref_name]; //if (record.match_count > similar * record.query_length && record.query_length >= min_contig if ((record.match_count > similar * record.query_length || (is_local && record.match_count > similar * abs(record.query_to - record.query_from))) //if (record.match_count > similar * abs(record.query_to - record.query_from) && abs(record.query_to - record.query_from) >= min_contig && record.match_count > similar * abs(record.ref_to - record.ref_from) ) { //if (record.match_count >= similar * record.ref_length) if (record.match_count >= complete_rate * record.ref_length) is_found[ref_id] = true; // else // continue; int not_used = 0; for (unsigned i = 0; i < record.blocks.size(); ++i) { BlatBlock block = record.blocks[i]; for (unsigned j = block.ref_from; j < block.ref_from + block.size; ++j) { if (flags[ref_id][j] == false) { //flags[ref_id][j] = true; not_used++; } flags[ref_id][j] += 1.0 / records.size(); } } if (valid_contigs.find(record.query_name) == valid_contigs.end()) { valid_contigs[record.query_name] = record.mismatch_count; valid_lengths.push_back(record.query_to - record.query_from); } else { valid_contigs[record.query_name] = min(record.mismatch_count, (int64_t)valid_contigs[record.query_name]); if (not_used > similar * record.query_length) valid_lengths.push_back(record.query_to - record.query_from); } } } } for (map<string, int>::iterator p = valid_contigs.begin(); p != valid_contigs.end(); ++p) { num_mismatch += p->second; } long long count = 0; long long total = 0; for (unsigned k = 0; k < flags.size(); ++k) { for (unsigned i = 0; i < flags[k].size(); ++i) { if (flags[k][i]) ++count; ++total; } } //valid_lengths.push_back(60000); sort(valid_lengths.begin(), valid_lengths.end()); reverse(valid_lengths.begin(), valid_lengths.end()); long long n50 = 0; long long sum = 0; long long n80 = 0; for (unsigned i = 0; i < valid_lengths.size(); ++i) { sum += valid_lengths[i]; if (sum >= 0.5 * total && n50 == 0) n50 = valid_lengths[i]; if (sum >= 0.8 * total && n80 == 0) n80 = valid_lengths[i]; } cout << "total " << total << " " << sum << endl; long long maximum = 0; long long mean = 0; if (valid_lengths.size() > 0) { maximum = valid_lengths[0]; mean = sum / valid_lengths.size(); } long long sum_wrong = 0; long long num_wrong = 0; long long corret_contigs = 0; long long sum_corret = 0; int last_id = 0; int last_error = 0; deque<int> contig_flags(contigs.size(), false); FastaWriter error_writer(FormatString("%s.error.fa", argv[2])); for (unsigned i = 0; i < contigs.size(); ++i) { if ((int)contigs[i].size() < min_contig) continue; if (valid_contigs.find(contig_names[i]) == valid_contigs.end()) { ++num_wrong; sum_wrong += contigs[i].size(); error_writer.Write(contigs[i], contig_names[i]); } else { last_id = i; last_error = sum_wrong; ++corret_contigs; sum_corret += contigs[i].size(); contig_flags[i] = true; //correct_writer.Write(contigs[i], contig_names[i]); } } printf("last id %d %d total contigs %d gaps %d\n", last_id, last_error, (int)(num_wrong + corret_contigs), num_gaps); printf("contigs: %lld N50: %lld coverage: %.2f%% max: %lld mean: %lld total: %lld/%lld N80: %lld\n", (long long)valid_contigs.size(), n50, count * 100.0 / total, maximum, mean, count, total, n80); printf("substitution error: %.4f%% wrong contigs: %lld %lld correct: %lld %lld %s\n", num_mismatch * 100.0 /sum, num_wrong, sum_wrong, corret_contigs, sum_corret, argv[2]); deque<int> lengths; for (unsigned i = 0; i < refs.size(); ++i) { int last = 0; for (unsigned j = 0; j < refs[i].size(); ++j) { if (flags[i][j] == 0) { if (flags[i][last]) { lengths.push_back(j - last); last = j; } } else { if (flags[i][last] == 0) last = j; } } } sort(lengths.begin(), lengths.end()); reverse(lengths.begin(), lengths.end()); deque<Sequence> gaps; deque<bool> is_no_long_gaps(refs.size()); for (unsigned i = 0; i < refs.size(); ++i) { deque<int> tmp; Sequence gap; if (flags[i][0]) tmp.push_back(0); for (unsigned j = 0; j < refs[i].size(); ++j) { if (flags[i][j] == false) { gap.Append(refs[i][j]); } else { if (gap.size() > 0) { gaps.push_back(gap); tmp.push_back(gap.size()); } gap.resize(0); } } if (gap.size() > 0) { gaps.push_back(gap); tmp.push_back(gap.size()); } else tmp.push_back(0); is_no_long_gaps[i] = true; for (unsigned j = 1; j+1 < tmp.size(); ++j) { if (tmp[j] > 50) is_no_long_gaps[i] = false; } } WriteSequence(FormatString("%s.gap.fa", argv[2]), gaps, "gap"); FastaWriter ref_writer(argv[1] + string(".found.fa")); FILE *ffcound_list = OpenFile(argv[1] + string(".found.fa.list"), "wb"); int found = 0; int covered = 0; int total_contigs = 0; for (unsigned i = 0; i < refs.size(); ++i) { int count = 0; double total_hit = 0; for (unsigned j = 0; j < flags[i].size(); ++j) { if (flags[i][j]) ++count; total_hit += flags[i][j]; } if (count > complete_rate * refs[i].size() //&& is_no_long_gaps[i] //&& 1.0 * total_hit / count > 2 ) { ++covered; ref_writer.Write(refs[i], ref_names[i]); fprintf(ffcound_list, "%s %.4f\n", ref_names[i].c_str(), 1.0 * total_hit / count); } if (is_found[i]) ++found; } int64_t total_bases = 0; for (unsigned i = 0; i < contigs.size(); ++i) { if ((int)contigs[i].size() >= min_contig) { ++total_contigs; total_bases += contigs[i].size(); } } cout << corret_contigs << " " << total_contigs << " " << total_bases << endl; printf("cover ref: %d %d\n", covered, (int)refs.size()); printf("found ref: %d %d\n", found, (int)refs.size()); printf("precision: %.2f%% %d %d\n", 100.0 * corret_contigs / total_contigs, (int)corret_contigs, total_contigs); FastaWriter correct_writer(FormatString("%s.correct.fa", argv[2])); for (unsigned i = 0; i < contigs.size(); i += 2) { if (contig_flags[i] || contig_flags[i+1]) { correct_writer.Write(contigs[i], contig_names[i]); correct_writer.Write(contigs[i+1], contig_names[i+1]); } } return 0; }
int main(int argc, char *argv[]) { OptionsDescription desc; desc.AddOption("out", "o", option.directory, "output directory"); desc.AddOption("read", "r", option.read_file, FormatString("fasta read file (<=%d)", ShortSequence::max_size())); desc.AddOption("read_level_2", "", option.extra_read_files[0], "paired-end reads fasta for second level scaffolds"); desc.AddOption("read_level_3", "", option.extra_read_files[1], "paired-end reads fasta for third level scaffolds"); desc.AddOption("read_level_4", "", option.extra_read_files[2], "paired-end reads fasta for fourth level scaffolds"); desc.AddOption("read_level_5", "", option.extra_read_files[3], "paired-end reads fasta for fifth level scaffolds"); desc.AddOption("long_read", "l", option.long_read_file, FormatString("fasta long read file (>%d)", ShortSequence::max_size())); //desc.AddOption("reference", "", option.reference, "reference genome"); desc.AddOption("mink", "", option.mink, FormatString("minimum k value (<=%d)", Kmer::max_size())); desc.AddOption("maxk", "", option.maxk, FormatString("maximum k value (<=%d)", Kmer::max_size())); desc.AddOption("step", "", option.step, "increment of k-mer of each iteration"); //desc.AddOption("inner_mink", "", option.inner_mink, "inner minimum k value"); //desc.AddOption("inner_step", "", option.inner_step, "inner increment of k-mer"); desc.AddOption("prefix", "", option.prefix_length, "prefix length used to build sub k-mer table"); desc.AddOption("min_count", "", option.min_count, "minimum multiplicity for filtering k-mer when building the graph"); desc.AddOption("min_support", "", option.min_support, "minimum supoort in each iteration"); desc.AddOption("num_threads", "", option.num_threads, "number of threads"); desc.AddOption("seed_kmer", "", option.seed_kmer_size, "seed kmer size for alignment"); desc.AddOption("min_contig", "", option.min_contig, "minimum size of contig"); desc.AddOption("similar", "", option.similar, "similarity for alignment"); desc.AddOption("max_mismatch", "", option.max_mismatch, "max mismatch of error correction"); desc.AddOption("min_pairs", "", option.min_pairs, "minimum number of pairs"); //desc.AddOption("max_gap", "", option.max_gap, "maximum gap in reference"); //desc.AddOption("no_local", "", option.is_no_local, "do not use local assembly"); desc.AddOption("no_coverage", "", option.is_no_coverage, "do not iterate on coverage"); desc.AddOption("no_correct", "", option.is_no_correct, "do not do correction"); desc.AddOption("pre_correction", "", option.is_pre_correction, "perform pre-correction before assembly"); try { desc.Parse(argc, argv); if (option.read_file == "" && option.long_read_file == "") throw logic_error("not enough parameters"); if (option.maxk < option.mink) throw invalid_argument("mink is larger than maxk"); if (option.maxk > (int)Kmer::max_size()) throw invalid_argument("maxk is too large"); } catch (exception &e) { cerr << e.what() << endl; cerr << "IDBA- Iterative de Bruijn Graph Assembler." << endl; cerr << "Usage: idba_ud -r read.fa -o output_dir" << endl; cerr << "Allowed Options: " << endl; cerr << desc << endl; exit(1); } MakeDir(option.directory); LogThread log_thread(option.log_file()); string begin_file = option.directory + "/begin"; fclose(OpenFile(begin_file, "wb")); if (option.num_threads == 0) option.num_threads = omp_get_max_threads(); else omp_set_num_threads(option.num_threads); cout << "number of threads " << option.num_threads << endl; ReadInput(option.read_file, option.long_read_file, assembly_info); deque<Sequence> extra_reads; for (unsigned i = 0; i < option.extra_read_files.size(); ++i) { if (option.extra_read_files[i] != "") { deque<Sequence> reads; ReadSequence(option.extra_read_files[i], reads); extra_reads.insert(extra_reads.end(), reads.begin(), reads.end()); } } cout << "reads " << assembly_info.reads.size() << endl; cout << "long reads " << assembly_info.long_reads.size() << endl; cout << "extra reads " << extra_reads.size() << endl; assembly_info.long_reads.insert(assembly_info.long_reads.end(), extra_reads.begin(), extra_reads.end()); assembly_info.ClearStatus(); read_length = assembly_info.read_length(); cout << "read_length " << read_length << endl; if (option.is_pre_correction) { int kmer_size = (option.maxk + option.mink)/2; cout << "kmer " << kmer_size << endl; BuildHashGraph(kmer_size); AlignReads(option.contig_file(kmer_size), option.align_file(kmer_size)); CorrectReads(kmer_size); assembly_info.ClearStatus(); } int old_kmer_size = 0; int kmer_size = option.mink; while (true) { cout << "kmer " << kmer_size << endl; if (kmer_size >= (option.mink + option.maxk)/2 || kmer_size == option.maxk) assembly_info.ref_contigs.clear(); if (kmer_size == option.mink) BuildHashGraph(kmer_size); else Iterate(old_kmer_size, kmer_size); if (kmer_size < option.maxk) { AlignReads(option.contig_file(kmer_size), option.align_file(kmer_size)); CorrectReads(kmer_size); assembly_info.ClearStatus(); old_kmer_size = kmer_size; kmer_size = min(option.maxk, kmer_size + option.step); if (old_kmer_size == option.maxk) break; } else break; } kmer_size = option.maxk; deque<Sequence> contigs; deque<string> names; ReadSequence(option.contig_file(kmer_size), contigs, names); FastaWriter writer(option.contig_file()); for (unsigned i = 0; i < contigs.size(); ++i) { if ((int)contigs[i].size() >= option.min_contig) writer.Write(contigs[i], names[i]); } Scaffold(option.maxk, option.min_contig); string end_file = option.directory + "/end"; fclose(OpenFile(end_file, "wb")); fflush(stdout); return 0; }
int main(int argc, char *argv[]) { OptionsDescription desc; desc.AddOption("paired", "", is_paired, "if the reads are paired-end"); desc.AddOption("merge", "", is_merged, "if the reads are paired-end in two files"); try { desc.Parse(argc, argv); if (argc < 2) throw logic_error("not enough parameters"); } catch (exception &e) { cerr << e.what() << endl; cerr << "fq2fa - Filter out fasta sequence containing N." << endl; cerr << "Usage: filterfa tmp.fa out.fa " << endl; cerr << " filterfa --paired tmp.fa out.fa" << endl; cerr << " filterfa --merged tmp_1.fa tmp_2.fa out.fa" << endl; cerr << "Allowed Options: " << endl; cerr << desc << endl; exit(1); } if (!is_paired && !is_merged) { FastaReader reader(argv[1]); FastaWriter writer(argv[2]); Sequence seq; string comment; while (reader.Read(seq, comment)) { if (seq.IsValid()) { writer.Write(seq, comment); } } } else if (is_merged) { FastaReader reader1(argv[1]); FastaReader reader2(argv[2]); FastaWriter writer(argv[3]); Sequence seq1, seq2; string comment1, comment2; while (reader1.Read(seq1, comment1) && reader2.Read(seq2, comment2)) { if (seq1.IsValid() && seq2.IsValid()) { writer.Write(seq1, comment1); writer.Write(seq2, comment2); } } } else if (is_paired) { FastaReader reader1(argv[1]); FastaWriter writer(argv[2]); Sequence seq1, seq2; string comment1, comment2; while (reader1.Read(seq1, comment1) && reader1.Read(seq2, comment2)) { if (seq1.IsValid() && seq2.IsValid()) { writer.Write(seq1, comment1); writer.Write(seq2, comment2); } } } return 0; }