Exemplo n.º 1
0
void SequenceConcatenater::write_partition_information (vector <string> const& inputFiles,
    string & partfile) {
    ofstream outfile(partfile.c_str());
    int charIndex = 1;
    int stopIndex = 1;
    
    // need to check seq type when writing this
    // use infer_alpha / get_alpha_name
    // but: are mixed seq types allowed? prolly...
    //     - so: need to check each one
    
    for (unsigned int i = 0; i < partition_sizes_.size(); i++) {
        stopIndex = charIndex + partition_sizes_[i] - 1;
        bool going = true;
        string alpha = "";
        int j = 0;
        while (going) {
            Sequence terp = seqs_[j];
            string subseq = terp.get_sequence().substr((charIndex - 1), partition_sizes_[i]);
            // check if all are the same character (presumably all N, but useful either way)
            if (subseq.find_first_not_of(subseq.front()) != std::string::npos) {
                terp.set_sequence(subseq);
                alpha = terp.get_alpha_name();
                going = false;
            }
            j++;
        }
        outfile << alpha << ", " << inputFiles[i] << " = " << charIndex << "-" << stopIndex << endl;
        charIndex = stopIndex + 1;
    }
    outfile.close();
}
Exemplo n.º 2
0
void SequenceCleaner::read_sequences (istream* pios) {
    Sequence seq;
    string retstring;
    int ft = test_seq_filetype_stream(*pios, retstring);
    int num_current_char = 0;
    bool first = true;
    
    while (read_next_seq_from_stream(*pios, ft, retstring, seq)) {
        sequences_[seq.get_id()] = seq.get_sequence();
        num_current_char = seq.get_sequence().size();
        if (first) {
            num_char_ = num_current_char; // just getting this from an arbitrary (first) sequence for now
            if (is_dna_) {
                string alpha_name = seq.get_alpha_name();
                if (alpha_name == "AA") {
                    is_dna_ = false;
                    //cout << "I believe this is a protein!" << endl;
                }
            }
            first = false;
            continue;
        } else {
            if (num_current_char != num_char_) {
                cout << "Error: sequences are not all of the same length. Exiting."
                    << endl;
                exit(0);
            }
        }
    }
    if (ft == 2) {
        sequences_[seq.get_id()] = seq.get_sequence();
        num_current_char = seq.get_sequence().size();
        if (num_current_char != num_char_) {
            cout << "Error: sequences are not all of the same length. Exiting."
                << endl;
            exit(0);
        }
    }
    num_taxa_ = sequences_.size();
}