Exemplo n.º 1
0
// Output split reads at breakpoint, position reads based on close end mapping.
static void report_split_read_support(Genome& genome, MEI_breakpoint& breakpoint, bool fiveprime_end,
                                      std::map<int, std::string>& seq_name_dict, std::ostream& out) {
    if (breakpoint.associated_split_reads.size() == 0) {
        // No split reads to report.
        return;
    }
    
    // Sort on length of mapped part.
    if (fiveprime_end) {
        sort(breakpoint.associated_split_reads.begin(), breakpoint.associated_split_reads.end(),
             comp_simple_read_mapsize);
    } else {
        sort(breakpoint.associated_split_reads.rbegin(), breakpoint.associated_split_reads.rend(),
             comp_simple_read_unmapped_seqsize);
    }
    
    // Compute on-screen distances.
    int base;
    int end;
    simple_read first = *breakpoint.associated_split_reads.begin();
    simple_read last = *(breakpoint.associated_split_reads.end() - 1);
    if (fiveprime_end) {
        base = first.mapped_sequence.length();
        end = last.unmapped_sequence.length();
    } else {
        base = last.unmapped_sequence.length();
        end = first.mapped_sequence.length();
    }
    
    // Get reference sequence at breakpoint location.
    int offset = (fiveprime_end)? 1 : 0;
    std::string reference = get_fasta_subseq(genome, seq_name_dict.at(breakpoint.breakpoint_tid), 
                                             breakpoint.breakpoint_pos - base + offset, base + end);    
    // Output local reference sequence.
    set_reference_highlight(reference, base, fiveprime_end);
    std::string REFERENCE_PREFIX = "Reference: ";
    out << COMMENT_PREFIX << REFERENCE_PREFIX << reference << std::endl;

    // Output split reads aligned to reference.
    std::vector<simple_read>::iterator read_iter;
    for (read_iter = breakpoint.associated_split_reads.begin(); read_iter != breakpoint.associated_split_reads.end();
         ++read_iter) {
        simple_read read = (*read_iter);
        int indent = REFERENCE_PREFIX.length();
        indent += (fiveprime_end)? base - read.mapped_sequence.length() : base - read.unmapped_sequence.length();
        out << COMMENT_PREFIX << get_whitespace(indent);
        if (fiveprime_end) {
            out << read.mapped_sequence << read.unmapped_sequence;
        } else {
            out << read.unmapped_sequence << read.mapped_sequence;
        }
        out << " (name: " << read.name << " sample: " << read.sample_name << ") " << std::endl;
    }
}
Exemplo n.º 2
0
Arquivo: lexer.cpp Projeto: Thun0/tkom
Type Lexer::get_token(char &c, std::string &str)
{
    str = "";
    if(is_whitespace(c))
    {
        return get_whitespace(c, str);
    }
    if(c == '0')
    {
        str += c;
        c = fgetc(input_file);
        if(c == 'x')
        {
            str += c;
            c = fgetc(input_file);
            return get_hex(c, str);
        }
        while(is_digit(c))
        {
            str += c;
            c = fgetc(input_file);
        }
        if(is_hex(c))
        {
            return get_hex(c, str);
        }
        else
        {
            return DEC;
        }
    }
    if(is_digit(c))
    {
        while(is_digit(c))
        {
            str += c;
            c = fgetc(input_file);
        }
        if(is_hex(c))
        {
            return get_hex(c, str);
        }
        else
        {
            return DEC;
        }
    } 
    if(is_alpha(c) || c == '.' || c == '_')
    {
        uint64_t hash = 0;
        if(is_hex(c))
        {
            while(is_hex(c))
            {
                hash <<= 8;
                hash += c;
                str += c;
                c = fgetc(input_file);
            }
            if(is_alpha(c) || is_digit(c) || c == '.' || c =='_')
            {
                while(is_alpha(c) || is_digit(c) || c == '.' || c =='_')
                {
                    hash <<= 8;
                    hash += c;
                    str += c;
                    c = fgetc(input_file);
                }
                return get_type_by_hash(hash);
            }
            else if(instruction_set.find(hash) != instruction_set.end())
                    return HEX_OR_INSTRUCTION;
            else
                return HEX;
        }
        else
        {
            while(is_alpha(c) || is_digit(c) || c == '.' || c =='_')
            {
                hash <<= 8;
                hash += c;
                str += c;
                c = fgetc(input_file);
            }
            return get_type_by_hash(hash);
        }
    }
    str += c;
    if(c == '/')
    {
        c = fgetc(input_file);
        str += c;
        if(c == '/')
        {
            c = fgetc(input_file);
            return DOUBLE_SLASH;
        }
        else 
            return BAD_TOKEN;
    }
    char s = c;
    c = fgetc(input_file);
    switch(s)
    {
        case '!':
            return EXCLAMATION;
        case '\n':
            return NEWLINE;
        case '#':
            return HASH;
        case ':':
            return COLON;
        case ';':
            return SEMICOLON;
        case '[':
            return LEFT_SQ_BRACKET;
        case ']':
            return RIGHT_SQ_BRACKET;
        case ',':
            return COMMA;
        case '@':
            return AT;
        case '+':
            return PLUS;
        case '-':
            return MINUS;
        case '<':
            return LESS_THAN;
        case '>':
            return MORE_THAN;
        default:
            return BAD_TOKEN;
    }
}