コード例 #1
0
ファイル: options.cpp プロジェクト: boostcache/boostcache
    void Options::parseOptions(int argc, char **argv)
    {
        namespace po = boost::program_options;

        /* support for -vv -vvvv etc. */
        for (std::string s = "vv"; s.length() <= MAX_VERBOSE_LEVEL; s.append("v")) {
            m_hiddenOptions.add_options()(s.c_str(), "verbose");
        }

        m_visibleOptions.add_options()
            ("help,h", "Produce help message")
            ("verbose,v",
             "Enable verbosity (optionally specify level, more v - more debug messages)")
            ("version,V", "Print version")
            ("config,c", po::value<std::string>()->default_value("/etc/boostcached"),
             "Setup custom config file")
            ("logFile,l", po::value<std::string>(),
             "Output log (used instead of stdout, can contain modifiers)")
        ;

        additionalOptions();

        int style = (po::command_line_style::unix_style ^ po::command_line_style::allow_guessing) |
                    po::command_line_style::allow_long_disguise;

        OptionsDescription allOptions;
        allOptions.add(m_hiddenOptions);
        allOptions.add(m_visibleOptions);
        po::store(po::command_line_parser(argc, argv)
                  .style(style)
                  .options(allOptions)
                  .run(), m_variablesMap);

        std::string config = getValue<std::string>("config");
        if (config.size()) {
            std::ifstream fileStream(config.c_str());
            if (fileStream.is_open()) {
                std::stringstream stringStream;
                parseConfigFile(fileStream, stringStream);
                po::store(po::parse_config_file(stringStream, allOptions), m_variablesMap);
                fileStream.close();
            } else if (!m_variablesMap["config"].defaulted()) {
                throw Exception("Could not read from config file");
            }
        }

        po::notify(m_variablesMap);

        m_expandedOptions["logLevel"] = int(m_variablesMap.count("verbose") ? 1 : 0);
        for (std::string s = "vv"; s.length() <= MAX_VERBOSE_LEVEL; s.append("v")) {
            if (m_variablesMap.count(s)) {
                m_expandedOptions["logLevel"] = int(s.length());
            }
        }
    }
コード例 #2
0
ファイル: print_graph.cpp プロジェクト: binma/idba
int main(int argc, char *argv[])
{
    int kmer_size = 50;
    int max_length = 1000000;
    
    OptionsDescription desc;
    desc.AddOption("kmer", "k", kmer_size, "k value");
    desc.AddOption("max_length", "", max_length, "max length");

    desc.Parse(argc, argv);

    deque<Sequence> refs;
    ReadSequence(argv[1], refs);

    HashGraph hash_graph(kmer_size);
    for (unsigned i = 0; i < refs.size(); ++i)
    {
        if ((int)refs[i].size() > max_length)
            refs[i].resize(max_length);
        hash_graph.InsertKmers(refs[i]);
    }

    hash_graph.Refresh();
    hash_graph.AddAllEdges();

    deque<Sequence> contigs;
    deque<ContigInfo> contig_infos;
    hash_graph.Assemble(contigs, contig_infos);

    cerr << "build" << endl;

    ContigGraph contig_graph(kmer_size);
    contig_graph.Initialize(contigs, contig_infos);

    cerr << "kmer " << hash_graph.num_vertices() << " branches " << contigs.size()<< endl;

    deque<deque<ContigGraphVertexAdaptor> > components;
    deque<string> component_strings;
    contig_graph.GetComponents(components, component_strings);

    for (unsigned i = 0; i < component_strings.size(); ++i)
        cout << component_strings[i] << endl;

    //FastaWriter writer(argv[2]);
    WriteSequence(argv[2], contigs, "conitg");

    return 0;
}
コード例 #3
0
ファイル: fa2fq.cpp プロジェクト: binma/idba
int main(int argc, char *argv[])
{
    OptionsDescription desc;
    desc.AddOption("paired", "", is_paired, "if the reads are paired-end in one file");
    desc.AddOption("merge", "", is_merged, "if the reads are paired-end in two files");
    desc.AddOption("filter", "", is_filtered, "filter out reads containing 'N'");

    try
    {
        desc.Parse(argc, argv);

        if (argc < 3)
            throw logic_error("not enough parameters");

    }
    catch (exception &e)
    {
        cerr << e.what() << endl;
        cerr << "fq2fa - Convert Fastq sequences to Fasta sequences." << endl;
        cerr << "Usage: fq2fa tmp.fq tmp.fa [...] " << endl;
        cerr << "       fq2fa --paired tmp.fq tmp.fa" << endl;
        cerr << "       fq2fa --merge tmp_1.fq tmp_2.fq tmp.fa" << endl;
        cerr << "Allowed Options: " << endl;
        cerr << desc << endl;
        exit(1);
    }

    FastaReader reader(argv[1]);
    FastqWriter writer(argv[2]);

    Sequence seq;
    string comment;
    while (reader.Read(seq, comment))
    {
        string quality;
        quality.append(seq.size(), 33 + 40);
        writer.Write(seq, comment, quality);
    }

    return 0;
}
コード例 #4
0
ファイル: parallel_blat.cpp プロジェクト: binma/idba
int main(int argc, char *argv[])
{
    desc.AddOption("num_threads", "", num_threads, "number of threads");
    desc.AddOption("similar", "", similar, "similarity");

    try
    {
        desc.Parse(argc, argv);
        
        if (argc < 3)
            throw logic_error("not enough parameters");
    }
    catch (exception &e)
    {
        cerr << e.what() << endl;
        cerr << "parallel_blat - use blat to alignment parallely." << endl;
        cerr << "Usage: parallel_blat ref.fa query.fa" << endl;
        cerr << "Allowed Options: " << endl;
        cerr << desc << endl;
        exit(1);
    }

    ref_filename = argv[1];
    query_filename = argv[2];

    split_files.resize(num_threads);
    for (int i = 0; i < num_threads; ++i)
        split_files[i] = FormatString("%s.split%d", query_filename.c_str(), i);
    CreateFile(query_filename + ".blat");

    deque<string> options;
//    options.push_back(" -noHead -tileSize=18 -minMatch=40 -maxGap=0 -maxIntron=1000 -minIdentity=95 -minScore=100 ");
//    options.push_back(" -noHead -tileSize=18 -minMatch=15 -maxGap=0 -maxIntron=1000 -minIdentity=95 -minScore=100 ");
//    options.push_back(" -noHead -tileSize=18 -minMatch=4 ");
    options.push_back(" -noHead ");

    for (unsigned i = 0; i < options.size(); ++i)
        ParallelBlat(options[i]);

    return 0;
}
コード例 #5
0
ファイル: sdbg_builder.cpp プロジェクト: ch11y/megahit
void ParsePhase1Option(int argc, char *argv[]) {
    OptionsDescription desc;

    desc.AddOption("kmer_k", "k", phase1_options.kmer_k, "kmer size");
    desc.AddOption("min_kmer_frequency", "m", phase1_options.min_edge_freq, "min frequency to output an edge");
    desc.AddOption("host_mem", "", phase1_options.host_mem, "memory to be used. No more than 95% of the free memory is recommended. 0 for auto detect.");
    desc.AddOption("gpu_mem", "", phase1_options.gpu_mem, "gpu memory to be used. 0 for auto detect.");
    desc.AddOption("max_read_length", "", phase1_options.max_read_length, "max read length");
    desc.AddOption("num_cpu_threads", "", phase1_options.num_cpu_threads, "number of CPU threads. At least 2.");
    desc.AddOption("num_output_threads", "", phase1_options.num_output_threads, "number of threads for output. Must be less than num_cpu_threads");
    desc.AddOption("input_file", "", phase1_options.input_file, "input fastx file, can be gzip'ed. \"-\" for stdin.");
    desc.AddOption("output_prefix", "", phase1_options.output_prefix, "output prefix");

    try {
        desc.Parse(argc, argv);
        if (phase1_options.input_file == "") {
            throw std::logic_error("No input file!");
        }

        if (phase1_options.num_cpu_threads == 0) {
            phase1_options.num_cpu_threads = omp_get_max_threads();
        }

        if (phase1_options.num_output_threads == 0) {
            phase1_options.num_output_threads = std::max(1, phase1_options.num_cpu_threads / 3);
        }

        if (phase1_options.host_mem == 0) {
            throw std::logic_error("Please specify the host memory!");
            // struct sysinfo s_info;
            // sysinfo(&s_info);
            // phase1_options.host_mem = (s_info.freeram + s_info.bufferram) * 0.95;
        }

        if (phase1_options.gpu_mem == 0) {
#ifndef DISABLE_GPU
            size_t free_gpu_mem, total_gpu_mem;
            get_cuda_memory(free_gpu_mem, total_gpu_mem);
            phase1_options.gpu_mem = free_gpu_mem;
#else 
            // we "simulate" the GTX680 here
            phase1_options.gpu_mem = 4243689472ULL;
#endif
        }

        if (phase1_options.num_cpu_threads == 1) {
            throw std::logic_error("Number of CPU threads is at least 2!");
        }
        if (phase1_options.num_output_threads >= phase1_options.num_cpu_threads) {
            throw std::logic_error("Number of output threads must be less than number of CPU threads!");
        }
    } catch (std::exception &e) {
        std::cerr << e.what() << std::endl;
        std::cerr << "Usage: builder count --input_file fastx_file -o out" << std::endl;
        std::cerr << "Options:" << std::endl;
        std::cerr << desc << std::endl;
        exit(1);
    }
}
コード例 #6
0
ファイル: sdbg_builder.cpp プロジェクト: ch11y/megahit
void ParsePhase2Option(int argc, char *argv[]) {
    OptionsDescription desc;

    desc.AddOption("host_mem", "", phase2_options.host_mem, "memory to be used. No more than 95% of the free memory is recommended. 0 for auto detect.");
    desc.AddOption("gpu_mem", "", phase2_options.gpu_mem, "gpu memory to be used. 0 for auto detect.");
    desc.AddOption("num_cpu_threads", "t", phase2_options.num_cpu_threads, "number of CPU threads. At least 2.");
    desc.AddOption("num_output_threads", "", phase2_options.num_output_threads, "number of threads for output. Must be less than num_cpu_threads");
    desc.AddOption("input_prefix", "", phase2_options.input_prefix, "files input_prefix.edges.* output by count module, can be gzip'ed.");
    desc.AddOption("num_edge_files", "", phase2_options.num_edge_files, "the number of files with name input_prefix.edges.*");
    desc.AddOption("output_prefix", "o", phase2_options.output_prefix, "output prefix");
    desc.AddOption("need_mercy", "", phase2_options.need_mercy, "to add mercy edges. The file input_prefix.cand output by count module should exist.");
    desc.AddOption("max_read_length", "", phase2_options.max_read_length, "max read length");

    try {
        desc.Parse(argc, argv);
        if (phase2_options.input_prefix == "") {
            throw std::logic_error("No input prefix!");
        }
        if (phase2_options.num_edge_files == 0) {
            throw std::logic_error("Number of edge files cannot be 0!");
        }

        if (phase2_options.num_cpu_threads == 0) {
            phase2_options.num_cpu_threads = omp_get_max_threads();
        }

        if (phase2_options.num_output_threads == 0) {
            phase2_options.num_output_threads = std::max(1, phase2_options.num_cpu_threads / 3);
        }

        if (phase2_options.host_mem == 0) {
            throw std::logic_error("Please specify the host memory!");
            // struct sysinfo s_info;
            // sysinfo(&s_info);
            // phase2_options.host_mem = (s_info.freeram + s_info.bufferram) * 0.95;
        }

        if (phase2_options.gpu_mem == 0) {
#ifndef DISABLE_GPU
            size_t free_gpu_mem, total_gpu_mem;
            get_cuda_memory(free_gpu_mem, total_gpu_mem);
            phase2_options.gpu_mem = free_gpu_mem;
#else 
            // we "simulate" the GTX680 here
            phase2_options.gpu_mem = 4243689472ULL;
#endif
        }

        if (phase2_options.num_cpu_threads == 1) {
            throw std::logic_error("Number of CPU threads is at least 2!");
        }
        if (phase2_options.num_output_threads >= phase2_options.num_cpu_threads) {
            throw std::logic_error("Number of output threads must be less than number of CPU threads!");
        }
    } catch (std::exception &e) {
        std::cerr << e.what() << std::endl;
        std::cerr << "Usage: builder build --input_prefix input --num_edge_files num -o out" << std::endl;
        std::cerr << "Options:" << std::endl;
        std::cerr << desc << std::endl;
        exit(1);
    }
}
コード例 #7
0
ファイル: assembler.cpp プロジェクト: fw1121/megahit
void ParseOption(int argc, char *argv[]) {
    OptionsDescription desc;

    desc.AddOption("sdbg_name", "s", options.sdbg_name, "succinct de Bruijn graph name");
    desc.AddOption("output_prefix", "o", options.output_prefix, "output prefix");
    desc.AddOption("num_cpu_threads", "t", options.num_cpu_threads, "number of cpu threads");
    desc.AddOption("max_tip_len", "", options.max_tip_len, "max length for tips to be removed. -1 for 2k");
    desc.AddOption("min_final_contig_len", "", options.min_final_contig_len, "min length to output a final contig");
    desc.AddOption("no_bubble", "", options.no_bubble, "do not remove bubbles");
    desc.AddOption("bubble_remove_ratio", "", options.bubble_remove_ratio, "bubbles with multiplicities lower than this ratio times to highest of its group will be removed");
    desc.AddOption("remove_low_local", "", options.remove_low_local, "remove low local depth contigs progressively");
    desc.AddOption("low_local_ratio", "", options.low_local_ratio, "ratio to define low depth contigs");
    desc.AddOption("is_final_round", "", options.is_final_round, "this is the last iteration");

    try {
        desc.Parse(argc, argv);
        if (options.sdbg_name == "") {
            throw std::logic_error("no succinct de Bruijn graph name!");
        }
    } catch (std::exception &e) {
        std::cerr << e.what() << std::endl;
        std::cerr << "Usage: " << argv[0] << " -s sdbg_name -o output_prefix" << std::endl;
        std::cerr << "options:" << std::endl;
        std::cerr << desc << std::endl;
        exit(1);
    }
}
コード例 #8
0
ファイル: validate_reads_blat.cpp プロジェクト: binma/idba
int main(int argc, char *argv[])
{
    int min_contig = 100;
    double similar = 0.95;
    double complete_rate = 0.8;
    bool is_local = false;
    
    OptionsDescription desc;
    desc.AddOption("min_contig", "", min_contig, "minimum contigs");
    desc.AddOption("similar", "", similar, "similarity");
    desc.AddOption("complete_rate", "", complete_rate, "completeness");
    desc.AddOption("is_local", "", is_local, "local align");

    try
    {
        desc.Parse(argc, argv);
    }
    catch (exception &e)
    {
        cerr << e.what() << endl;
        cerr << "validate_contigs_blat - validate contigs by blat." << endl;
        cerr << "Usage: validate_contigs_blat ref.fa contigs.fa." << endl;
        cerr << "Allowed Options: " << endl;
        cerr << desc << endl;
        exit(1);
    }

    deque<Sequence> refs;
    deque<string> ref_names;
    ReadSequence(argv[1], refs, ref_names);

    deque<Sequence> contigs;
    deque<string> contig_names;
    ReadSequence(argv[2], contigs, contig_names);

    vector<int> is_found(refs.size());
    vector<vector<double> > flags(refs.size());
    map<string, int> dict;
    for (unsigned i = 0; i < refs.size(); ++i)
    {
        flags[i].resize(refs[i].size(), false);
        size_t index = ref_names[i].find(' ');
        if (index != string::npos)
            ref_names[i].resize(index);
        dict[ref_names[i]] = i;
    }

    int num_gaps = 0;
    for (unsigned i = 0; i < contigs.size(); ++i)
    {
        size_t index = contig_names[i].find(' ');
        if (index != string::npos)
            contig_names[i].resize(index);

        bool is_new_gap = true;
        for (unsigned j = 0; j < contigs[i].size(); ++j)
        {
            if (contigs[i][j] == 4)
            {
                if (is_new_gap)
                {
                    is_new_gap = false;
                    ++num_gaps;
                }
            }
            else
                is_new_gap = true;
        }
    }

    string blat_file = string(argv[2]) + ".blat";
    FILE *fblat = OpenFile(blat_file, "rb");

    map<string, int> valid_contigs;
    deque<int> valid_lengths;
    int64_t num_mismatch = 0;
    while (fgets(line, MaxLine, fblat) != NULL)
    {
        BlatRecord record;
        record.Parse(line);

        deque<BlatRecord> records;
        records.push_back(record);

        while (fgets(line, MaxLine, fblat) != NULL)
        {
            record.Parse(line);
            if (record.query_name == records.back().query_name)
                records.push_back(record);
            else
            {
                fseek(fblat, -strlen(line), SEEK_CUR);
                break;
            }
        }

        int index = 0;
        for (unsigned i = 0; i < records.size(); ++i)
        {
            if (records[i].match_count > similar * records[i].query_length
                    && records[i].match_count > similar * abs(record.ref_to - record.ref_from))
                records[index++] = records[i];
        }
        records.resize(index);

        for (unsigned i = 0; i < records.size(); ++i)
        {
            record = records[i];
            int ref_id = dict[record.ref_name];

            //if (record.match_count > similar * record.query_length && record.query_length >= min_contig
            if ((record.match_count > similar * record.query_length 
                        || (is_local && record.match_count > similar * abs(record.query_to - record.query_from)))
            //if (record.match_count > similar * abs(record.query_to - record.query_from)
                    && abs(record.query_to - record.query_from) >= min_contig
                    && record.match_count > similar * abs(record.ref_to - record.ref_from)
               )
            {
                //if (record.match_count >= similar * record.ref_length)
                if (record.match_count >= complete_rate * record.ref_length)
                    is_found[ref_id] = true;
    //            else
    //                continue;

                int not_used = 0;
                for (unsigned i = 0; i < record.blocks.size(); ++i)
                {
                    BlatBlock block = record.blocks[i];
                    for (unsigned j = block.ref_from; j < block.ref_from + block.size; ++j)
                    {
                        if (flags[ref_id][j] == false)
                        {
                            //flags[ref_id][j] = true;
                            not_used++;
                        }
                        
                        flags[ref_id][j] += 1.0 / records.size();
                    }
                }

                if (valid_contigs.find(record.query_name) == valid_contigs.end())
                {
                    valid_contigs[record.query_name] = record.mismatch_count;
                    valid_lengths.push_back(record.query_to - record.query_from);
                }
                else
                {
                    valid_contigs[record.query_name] = min(record.mismatch_count, (int64_t)valid_contigs[record.query_name]);

                    if (not_used > similar * record.query_length)
                        valid_lengths.push_back(record.query_to - record.query_from);
                }
            }
        }
    }

    for (map<string, int>::iterator p = valid_contigs.begin(); p != valid_contigs.end(); ++p)
    {
        num_mismatch += p->second;
    }

    long long count = 0;
    long long total = 0;
    for (unsigned k = 0; k < flags.size(); ++k)
    {
        for (unsigned i = 0; i < flags[k].size(); ++i)
        {
            if (flags[k][i])
                ++count;
            ++total;
        }
    }

    //valid_lengths.push_back(60000);
    sort(valid_lengths.begin(), valid_lengths.end());
    reverse(valid_lengths.begin(), valid_lengths.end());

    long long n50 = 0;
    long long sum = 0;
    long long n80 = 0;

    for (unsigned i = 0; i < valid_lengths.size(); ++i)
    {
        sum += valid_lengths[i];
        if (sum >= 0.5 * total && n50 == 0)
            n50 = valid_lengths[i];
        if (sum >= 0.8 * total && n80 == 0)
            n80 = valid_lengths[i];
    }
    cout << "total " << total << " " << sum << endl;

    long long maximum = 0;
    long long mean = 0;
    if (valid_lengths.size() > 0)
    {
        maximum = valid_lengths[0];
        mean = sum / valid_lengths.size();
    }

    long long sum_wrong = 0;
    long long num_wrong = 0;
    long long corret_contigs = 0;
    long long sum_corret = 0;
    int last_id = 0;
    int last_error = 0;
    deque<int> contig_flags(contigs.size(), false);
    FastaWriter error_writer(FormatString("%s.error.fa", argv[2]));
    for (unsigned i = 0; i < contigs.size(); ++i)
    {
        if ((int)contigs[i].size() < min_contig)
            continue;

        if (valid_contigs.find(contig_names[i]) == valid_contigs.end())
        {
            ++num_wrong;
            sum_wrong += contigs[i].size();
            error_writer.Write(contigs[i], contig_names[i]);
        }
        else
        {
            last_id = i;
            last_error = sum_wrong;

            ++corret_contigs;
            sum_corret += contigs[i].size();
            contig_flags[i] = true;
            //correct_writer.Write(contigs[i], contig_names[i]);
        }
    }

    printf("last id %d %d total contigs %d gaps %d\n", last_id, last_error, (int)(num_wrong + corret_contigs), num_gaps);
    printf("contigs: %lld N50: %lld coverage: %.2f%% max: %lld mean: %lld total: %lld/%lld N80: %lld\n",
            (long long)valid_contigs.size(), n50, count * 100.0 / total, maximum, mean, count, total, n80);
    printf("substitution error: %.4f%% wrong contigs: %lld %lld correct: %lld %lld %s\n", 
            num_mismatch * 100.0 /sum, num_wrong, sum_wrong, corret_contigs, sum_corret, argv[2]);

    deque<int> lengths;
    for (unsigned i = 0; i < refs.size(); ++i)
    {
        int last = 0;
        for (unsigned j = 0; j < refs[i].size(); ++j)
        {
            if (flags[i][j] == 0)
            {
                if (flags[i][last])
                {
                    lengths.push_back(j - last);
                    last = j;
                }
            }
            else
            {
                if (flags[i][last] == 0)
                    last = j;
            }
        }
    }
    sort(lengths.begin(), lengths.end());
    reverse(lengths.begin(), lengths.end());

    deque<Sequence> gaps;
    deque<bool> is_no_long_gaps(refs.size());
    for (unsigned i = 0; i < refs.size(); ++i)
    {
        deque<int> tmp;
        Sequence gap;

        if (flags[i][0])
            tmp.push_back(0);

        for (unsigned j = 0; j < refs[i].size(); ++j)
        {
            if (flags[i][j] == false)
            {
                gap.Append(refs[i][j]);
            }
            else
            {
                if (gap.size() > 0)
                {
                    gaps.push_back(gap);
                    tmp.push_back(gap.size());
                }
                gap.resize(0);
            }
        }

        if (gap.size() > 0)
        {
            gaps.push_back(gap);
            tmp.push_back(gap.size());
        }
        else
            tmp.push_back(0);

        is_no_long_gaps[i] = true;
        for (unsigned j = 1; j+1 < tmp.size(); ++j)
        {
            if (tmp[j] > 50)
                is_no_long_gaps[i] = false;
        }
    }
    WriteSequence(FormatString("%s.gap.fa", argv[2]), gaps, "gap");

    FastaWriter ref_writer(argv[1] + string(".found.fa"));
    FILE *ffcound_list = OpenFile(argv[1] + string(".found.fa.list"), "wb");
    int found = 0;
    int covered = 0;
    int total_contigs = 0;
    for (unsigned i = 0; i < refs.size(); ++i)
    {
        int count = 0;
        double total_hit = 0;
        for (unsigned j = 0; j < flags[i].size(); ++j)
        {
            if (flags[i][j])
                ++count;
            total_hit += flags[i][j];
        }

        if (count > complete_rate * refs[i].size() 
                //&& is_no_long_gaps[i]
                //&& 1.0 * total_hit / count > 2
                )
        {
            ++covered;
            ref_writer.Write(refs[i], ref_names[i]);
            fprintf(ffcound_list, "%s %.4f\n", ref_names[i].c_str(), 1.0 * total_hit / count);
        }
        if (is_found[i])
            ++found;
    }

    int64_t total_bases = 0;
    for (unsigned i = 0; i < contigs.size(); ++i)
    {
        if ((int)contigs[i].size() >= min_contig)
        {
            ++total_contigs;
            total_bases += contigs[i].size();
        }
    }

    cout << corret_contigs << " " << total_contigs << " " << total_bases << endl;
    printf("cover ref: %d %d\n", covered, (int)refs.size());
    printf("found ref: %d %d\n", found, (int)refs.size());
    printf("precision: %.2f%% %d %d\n", 100.0 * corret_contigs / total_contigs, (int)corret_contigs, total_contigs);

    FastaWriter correct_writer(FormatString("%s.correct.fa", argv[2]));
    for (unsigned i = 0; i < contigs.size(); i += 2)
    {
        if (contig_flags[i] || contig_flags[i+1])
        {
            correct_writer.Write(contigs[i], contig_names[i]);
            correct_writer.Write(contigs[i+1], contig_names[i+1]);
        }
    }
    
    return 0;
}
コード例 #9
0
ファイル: idba.cpp プロジェクト: binma/idba
int main(int argc, char *argv[])
{
    OptionsDescription desc;
    
    desc.AddOption("out", "o", option.directory, "output directory");
    desc.AddOption("read", "r", option.read_file, FormatString("fasta read file (<=%d)", ShortSequence::max_size()));
    desc.AddOption("read_level_2", "", option.extra_read_files[0], "paired-end reads fasta for second level scaffolds");
    desc.AddOption("read_level_3", "", option.extra_read_files[1], "paired-end reads fasta for third level scaffolds");
    desc.AddOption("read_level_4", "", option.extra_read_files[2], "paired-end reads fasta for fourth level scaffolds");
    desc.AddOption("read_level_5", "", option.extra_read_files[3], "paired-end reads fasta for fifth level scaffolds");
    desc.AddOption("long_read", "l", option.long_read_file, FormatString("fasta long read file (>%d)", ShortSequence::max_size()));
    //desc.AddOption("reference", "", option.reference, "reference genome");
    desc.AddOption("mink", "", option.mink, FormatString("minimum k value (<=%d)", Kmer::max_size()));
    desc.AddOption("maxk", "", option.maxk, FormatString("maximum k value (<=%d)", Kmer::max_size()));
    desc.AddOption("step", "", option.step, "increment of k-mer of each iteration");
    //desc.AddOption("inner_mink", "", option.inner_mink, "inner minimum k value");
    //desc.AddOption("inner_step", "", option.inner_step, "inner increment of k-mer");
    desc.AddOption("prefix", "", option.prefix_length, "prefix length used to build sub k-mer table");
    desc.AddOption("min_count", "", option.min_count, "minimum multiplicity for filtering k-mer when building the graph");
    desc.AddOption("min_support", "", option.min_support, "minimum supoort in each iteration");
    desc.AddOption("num_threads", "", option.num_threads, "number of threads");
    desc.AddOption("seed_kmer", "", option.seed_kmer_size, "seed kmer size for alignment");
    desc.AddOption("min_contig", "", option.min_contig, "minimum size of contig");
    desc.AddOption("similar", "", option.similar, "similarity for alignment");
    desc.AddOption("max_mismatch", "", option.max_mismatch, "max mismatch of error correction");
    desc.AddOption("min_pairs", "", option.min_pairs, "minimum number of pairs");
    //desc.AddOption("max_gap", "", option.max_gap, "maximum gap in reference");
    //desc.AddOption("no_local", "", option.is_no_local, "do not use local assembly");
    desc.AddOption("no_coverage", "", option.is_no_coverage, "do not iterate on coverage");
    desc.AddOption("no_correct", "", option.is_no_correct, "do not do correction");
    desc.AddOption("pre_correction", "", option.is_pre_correction, "perform pre-correction before assembly");

    try
    {
        desc.Parse(argc, argv);

        if (option.read_file == "" && option.long_read_file == "")
            throw logic_error("not enough parameters");

        if (option.maxk < option.mink)
            throw invalid_argument("mink is larger than maxk");

        if (option.maxk > (int)Kmer::max_size())
            throw invalid_argument("maxk is too large");
    }
    catch (exception &e)
    {
        cerr << e.what() << endl;
        cerr << "IDBA- Iterative de Bruijn Graph Assembler." << endl;
        cerr << "Usage: idba_ud -r read.fa -o output_dir" << endl;
        cerr << "Allowed Options: " << endl;
        cerr << desc << endl;
        exit(1);
    }

    MakeDir(option.directory);

    LogThread log_thread(option.log_file());

    string begin_file = option.directory + "/begin";
    fclose(OpenFile(begin_file, "wb"));

    if (option.num_threads == 0)
        option.num_threads = omp_get_max_threads();
    else
        omp_set_num_threads(option.num_threads);
    cout << "number of threads " << option.num_threads << endl;

    ReadInput(option.read_file, option.long_read_file, assembly_info);
    deque<Sequence> extra_reads;
    for (unsigned i = 0; i < option.extra_read_files.size(); ++i)
    {
        if (option.extra_read_files[i] != "")
        {
            deque<Sequence> reads;
            ReadSequence(option.extra_read_files[i], reads);
            extra_reads.insert(extra_reads.end(), reads.begin(), reads.end());
        }
    }
    cout << "reads " << assembly_info.reads.size() << endl;
    cout << "long reads " << assembly_info.long_reads.size() << endl;
    cout << "extra reads " << extra_reads.size() << endl;

    assembly_info.long_reads.insert(assembly_info.long_reads.end(), extra_reads.begin(), extra_reads.end());
    assembly_info.ClearStatus();

    read_length = assembly_info.read_length();
    cout << "read_length " << read_length << endl;

    if (option.is_pre_correction)
    {
        int kmer_size = (option.maxk + option.mink)/2;
        cout << "kmer " << kmer_size << endl;
        BuildHashGraph(kmer_size);
        AlignReads(option.contig_file(kmer_size), option.align_file(kmer_size));
        CorrectReads(kmer_size);
        assembly_info.ClearStatus();
    }

    int old_kmer_size = 0;
    int kmer_size = option.mink;
    while (true)
    {
        cout << "kmer " << kmer_size << endl;

        if (kmer_size >= (option.mink + option.maxk)/2 || kmer_size == option.maxk)
            assembly_info.ref_contigs.clear();

        if (kmer_size == option.mink)
            BuildHashGraph(kmer_size);
        else
            Iterate(old_kmer_size, kmer_size);

        if (kmer_size < option.maxk)
        {
            AlignReads(option.contig_file(kmer_size), option.align_file(kmer_size));
            CorrectReads(kmer_size);
            assembly_info.ClearStatus();

            old_kmer_size = kmer_size;
            kmer_size = min(option.maxk, kmer_size + option.step);

            if (old_kmer_size == option.maxk)
                break;
        }
        else
            break;
    }

    kmer_size = option.maxk;

    deque<Sequence> contigs;
    deque<string> names;
    ReadSequence(option.contig_file(kmer_size), contigs, names);
    FastaWriter writer(option.contig_file());
    for (unsigned i = 0; i < contigs.size(); ++i)
    {
        if ((int)contigs[i].size() >= option.min_contig)
            writer.Write(contigs[i], names[i]);
    }

    Scaffold(option.maxk, option.min_contig);

    string end_file = option.directory + "/end";
    fclose(OpenFile(end_file, "wb"));

    fflush(stdout);

    return 0;
}
コード例 #10
0
ファイル: filterfa.cpp プロジェクト: binma/idba
int main(int argc, char *argv[])
{
    OptionsDescription desc;
    desc.AddOption("paired", "", is_paired, "if the reads are paired-end");
    desc.AddOption("merge", "", is_merged, "if the reads are paired-end in two files");

    try
    {
        desc.Parse(argc, argv);

        if (argc < 2)
            throw logic_error("not enough parameters");

    }
    catch (exception &e)
    {
        cerr << e.what() << endl;
        cerr << "fq2fa - Filter out fasta sequence containing N." << endl;
        cerr << "Usage: filterfa tmp.fa out.fa " << endl;
        cerr << "       filterfa --paired tmp.fa out.fa" << endl;
        cerr << "       filterfa --merged tmp_1.fa tmp_2.fa out.fa" << endl;
        cerr << "Allowed Options: " << endl;
        cerr << desc << endl;
        exit(1);
    }

    if (!is_paired && !is_merged)
    {
        FastaReader reader(argv[1]);
        FastaWriter writer(argv[2]);

        Sequence seq;
        string comment;
        while (reader.Read(seq, comment))
        {
            if (seq.IsValid())
            {
                writer.Write(seq, comment);
            }
        }
    }
    else if (is_merged)
    {
        FastaReader reader1(argv[1]);
        FastaReader reader2(argv[2]);
        FastaWriter writer(argv[3]);

        Sequence seq1, seq2;
        string comment1, comment2;
        while (reader1.Read(seq1, comment1) && reader2.Read(seq2, comment2))
        {
            if (seq1.IsValid() && seq2.IsValid())
            {
                writer.Write(seq1, comment1);
                writer.Write(seq2, comment2);
            }
        }
    }
    else if (is_paired)
    {
        FastaReader reader1(argv[1]);
        FastaWriter writer(argv[2]);

        Sequence seq1, seq2;
        string comment1, comment2;
        while (reader1.Read(seq1, comment1) && reader1.Read(seq2, comment2))
        {
            if (seq1.IsValid() && seq2.IsValid())
            {
                writer.Write(seq1, comment1);
                writer.Write(seq2, comment2);
            }
        }
    }

    return 0;
}