Ejemplo n.º 1
0
/* F U N C T I O N S *********************************************************/
void process_sff_to_fastq(char *sff_file, int trim_flag) {
    sff_read_header rh;
    sff_read_data rd;
    FILE *sff_fp, *fastq_fp;

    if ( (sff_fp = fopen(sff_file, "r")) == NULL ) {
        fprintf(stderr,
                "[err] Could not open file '%s' for reading.\n", sff_file);
        exit(1);
    }
    
    get_sff_file_size(sff_fp);
    
    read_sff_common_header(sff_fp, &h);
    verify_sff_common_header(&h);


    if ( keep_fastq_orig == true ) {
        vector<string> tmp_rep;
        split_str(string(sff_file), tmp_rep, "//");
    
        if ( ( fastq_fp = fopen( (tmp_rep[tmp_rep.size() - 1].substr(0,tmp_rep[tmp_rep.size() - 1].length()-4) + ".fastq").c_str(), "w") ) == NULL ) {
            fprintf(stderr,
                    "[err] Could not open file '%s' for writing.\n",
                    "");
            exit(1);
        }
    }

    int left_clip = 0, right_clip = 0, nbases = 0;
    char *name;
    char *bases;
    uint8_t *quality;
    //register int i;
    
    unsigned int numreads = h.nreads;
    
    for (int i = 0; i < numreads; i++) { //cout << i << " " << numreads << endl;
        read_sff_read_header(sff_fp, &rh);
        read_sff_read_data(sff_fp, &rd, h.flow_len, rh.nbases);
        
        //rheaders.push_back(rh);
        // get clipping points 
        get_clip_values(rh, trim_flag, &left_clip, &right_clip);
        nbases = right_clip - left_clip;

        // create bases string 
        bases = get_read_bases(rd, left_clip, right_clip);

        // create quality array 
        quality = get_read_quality_values(rd, left_clip, right_clip);

        //Create new read
        Read *read = new Read();
        
        read->initial_length = nbases;
        read->read = string(bases);
        uint8_t quality_char;
        read->quality = (uint8_t*)malloc(sizeof(uint8_t)*nbases);
        for (int j = 0; j < nbases; j++) 
        {
           quality_char = (quality[j] <= 93 ? quality[j] : 93) + 33;
           read->quality[j] = quality_char;
        }
       
        //read->rd = rd;
        read->flowgram = new uint16_t[h.flow_len];
        for(int j=0; j<h.flow_len; j++) {
                read->flowgram[j] = rd.flowgram[j];
                //cout << rd.flowgram[j] << " " << endl;
                
        }
        
        read->flow_index = (uint8_t*)malloc(sizeof(uint8_t)*nbases);
        for(int j=0; j<nbases; j++) {
                read->flow_index[j] = rd.flow_index[j];
                
        }
        
        read->roche_left_clip = (int) max(1, max(rh.clip_qual_left, rh.clip_adapter_left)) - 1;
        read->roche_right_clip = (int) min( (rh.clip_qual_right    == 0 ? rh.nbases : rh.clip_qual_right   ), (rh.clip_adapter_right == 0 ? rh.nbases : rh.clip_adapter_right) );
        
        reads.push_back(read);
        
        
        string tstr = string(rh.name) + " " + string(itoa(rh.clip_adapter_left,new char[5],10)) +  " " + string(itoa(rh.clip_adapter_right,new char[5],10))+  " " + string(itoa(rh.clip_qual_left,new char[5],10))  +   " " + string(itoa(rh.clip_qual_right,new char[5],10)) + " " + string(itoa(rh.clip_qual_right,new char[5],10)); 
        int t_len = tstr.length();
        
        
        // create read name string 
        int name_length = (int) t_len + 1; // account for NULL termination
        name = (char *) malloc( name_length * sizeof(char) );
        if (!name) {
            fprintf(stderr, "Out of memory! For read name string!\n");
            exit(1);
        }
        memset(name, '\0', (size_t) name_length);
        
        read->readID = (char *) malloc( rh.name_len * sizeof(char) );
        //read->readID = rh.name;
        memcpy( read->readID, rh.name, (size_t) rh.name_len );
        
        //strncpy(name, rh.name, (size_t) rh.name_len);
        strncpy(name, tstr.c_str(), (size_t)t_len);
        
        if ( keep_fastq_orig == true )
            construct_fastq_entry(fastq_fp, name, bases, quality, nbases);
        //printf("%d\n",rh.name_len);
        free(name);
        free(bases);
        free(quality);
        free_sff_read_header(&rh);
        free_sff_read_data(&rd);
        
        
    }
    
    read_manifest(sff_fp);

    //free_sff_common_header(&h);
    if ( keep_fastq_orig == true )
        fclose(fastq_fp);
    
    fclose(sff_fp);
}
Ejemplo n.º 2
0
void
process_sff_to_fastq(char *sff_file, char *fastq_file, int trim_flag) {
    sff_common_header h;
    sff_read_header rh;
    sff_read_data rd;
    FILE *sff_fp, *fastq_fp;

    if ( !strlen(sff_file) ) {
        sff_fp = stdin;
    }
    else if ( (sff_fp = fopen(sff_file, "r")) == NULL ) {
        fprintf(stderr,
                "[err] Could not open file '%s' for reading.\n", sff_file);
        exit(1);
    }

    read_sff_common_header(sff_fp, &h);
    verify_sff_common_header(PRG_NAME, VERSION, &h);

//    printf("size of header: %d \n", sizeof(sff_common_header));
//    printf("\tmagic        : 0x%x\n" , h.magic);
//    printf("\tindex_offset : 0x%llx\n", h.index_offset);
//    printf("\tindex_len    : 0x%x\n" , h.index_len);
//    printf("\tnumreads     : 0x%x\n" , h.nreads);
//    printf("\theader_len   : 0x%x\n" , h.header_len);
//    printf("\tkey_len      : 0x%x\n" , h.key_len);
//    printf("\tflow_len     : 0x%x\n" , h.flow_len);
//    printf("\tflowgram_fmt : 0x%x\n" , h.flowgram_format);
//    printf("\tflow         : %s\n  " , h.flow);
//    printf("\tkey          : %s\n  " , h.key);
//    printf("\n\n");

    if ( !strlen(fastq_file) ) {
        fastq_fp = stdout;
    }
    else {
        if ( (fastq_fp = fopen(fastq_file, "w")) == NULL ) {
            fprintf(stderr,
                    "[err] Could not open file '%s' for writing.\n",
                    fastq_file);
            exit(1);
        }
    }

    int left_clip = 0, right_clip = 0, nbases = 0;
    char *name;
    char *bases;
    uint8_t *quality;
    register int i;
    int numreads = (int) h.nreads;
    for (i = 0; i < numreads; i++) {
        read_sff_read_header(sff_fp, &rh);
        read_sff_read_data(sff_fp, &rd, h.flow_len, rh.nbases);

        /* get clipping points */
        get_clip_values(rh, trim_flag, &left_clip, &right_clip);
        nbases = right_clip - left_clip;

        /* create bases string */
        bases = get_read_bases(rd, left_clip, right_clip);

        /* create quality array */
        quality = get_read_quality_values(rd, left_clip, right_clip);

        /* create read name string */
        int name_length = (int) rh.name_len + 1; // account for NULL termination
        name = (char *) malloc( name_length * sizeof(char) );
        if (!name) {
            fprintf(stderr, "Out of memory! For read name string!\n");
            exit(1);
        }
        memset(name, '\0', (size_t) name_length);
        strncpy(name, rh.name, (size_t) rh.name_len);

        construct_fastq_entry(fastq_fp, name, bases, quality, nbases);

        free(name);
        free(bases);
        free(quality);
        free_sff_read_header(&rh);
        free_sff_read_data(&rd);
    }

    free_sff_common_header(&h);
    fclose(fastq_fp);
    fclose(sff_fp);
}