예제 #1
0
static hashtable* ReadReference(const char* const refname)
{
    hashtable* reference = new_hashtable(12);

    sequence* sp = read_fasta_sequence(refname);
    
    while(sp != NULL){
        // allocate a coverage array for the sequence
        chrcoverage* cov = ckallocz(sizeof(chrcoverage));
        cov->length   = strlen((char*)sp->sequence);
        cov->map = ckallocz(strlen((char*)sp->sequence));
        cov->cov = ckallocz(strlen((char*)sp->sequence));
        cov->seq = ckallocz(strlen((char*)sp->sequence)+1);
        memcpy(cov->seq, sp->sequence, cov->length);

        // if the name of the sequence has more than one tokens, just use the
        // first token in the name
        int i = 0;
        while((sp->header[i] != '\n') && 
              (sp->header[i] != 0)    && 
              (sp->header[i] != '\t') && 
              (sp->header[i] != 32)) i++;
        sp->header[i] = 0;

        add_hashtable(reference,(char*)sp->header,strlen((char*)sp->header),cov);
        sp = get_next_sequence(sp);
    } 

    close_fasta_sequence(sp);
    return reference;
}
예제 #2
0
static void PrintBinInfo(const char* const refName,
                         hashtable* const reference,
                         const int binSize)
{
    int gc;

    sequence* sp = read_fasta_sequence(refName);
    
    while(sp != NULL){
        int i = 0;
        while((sp->header[i] != '\n') &&
              (sp->header[i] != 0)    &&
              (sp->header[i] != '\t') &&
              (sp->header[i] != 32)) i++;
        sp->header[i] = 0;

        int binSum = 0; 
        int binIndex = 0;
        u64 start = 0;

        chrcoverage* chrcov = (chrcoverage*)must_find_hashtable(reference, 
                               sp->header, strlen(sp->header));
        for (u64 j = 0; j < chrcov->length; j++) {
            if (chrcov->map[j] == '1') {

                binSum += chrcov->cov[j];
                binIndex += 1;
                if (binIndex == binSize) {
                    gc = CalculateGC(chrcov, start, j+1);
                    printf("%s\t%"PRIu64"\t%"PRIu64"\t%d\t%d\t%d\n", 
                        sp->header, start, j+1, binSum, binSize, gc);
                    binIndex = 0;
                    binSum = 0;
                    start = j + 1;
                }
            }
        }

        if (binIndex > 0) {
            gc = CalculateGC(chrcov, start, chrcov->length);
            printf("%s\t%"PRIu64"\t%"PRIu64"\t%d\t%d\t%d\n", 
                sp->header, start, chrcov->length, binSum, binIndex, gc);
        }

        sp = get_next_sequence(sp);
    }

    close_fasta_sequence(sp);
}
bool reads_parsing::get_next_sequence(std::string &seq_read, int &len)
 {
       bool success = get_next_seq_from_file(seq_read,len,current_file_indx);
       if (success)
        return true;
    
       if ( current_file_indx < nb_files-1 )
       {
            close_file(current_file_indx);
            current_file_indx++;
            open_file(current_file_indx);
            return get_next_sequence(seq_read,len);
       }
       return false;
 }