/* F U N C T I O N S *********************************************************/ void process_sff_to_fastq(char *sff_file, int trim_flag) { sff_read_header rh; sff_read_data rd; FILE *sff_fp, *fastq_fp; if ( (sff_fp = fopen(sff_file, "r")) == NULL ) { fprintf(stderr, "[err] Could not open file '%s' for reading.\n", sff_file); exit(1); } get_sff_file_size(sff_fp); read_sff_common_header(sff_fp, &h); verify_sff_common_header(&h); if ( keep_fastq_orig == true ) { vector<string> tmp_rep; split_str(string(sff_file), tmp_rep, "//"); if ( ( fastq_fp = fopen( (tmp_rep[tmp_rep.size() - 1].substr(0,tmp_rep[tmp_rep.size() - 1].length()-4) + ".fastq").c_str(), "w") ) == NULL ) { fprintf(stderr, "[err] Could not open file '%s' for writing.\n", ""); exit(1); } } int left_clip = 0, right_clip = 0, nbases = 0; char *name; char *bases; uint8_t *quality; //register int i; unsigned int numreads = h.nreads; for (int i = 0; i < numreads; i++) { //cout << i << " " << numreads << endl; read_sff_read_header(sff_fp, &rh); read_sff_read_data(sff_fp, &rd, h.flow_len, rh.nbases); //rheaders.push_back(rh); // get clipping points get_clip_values(rh, trim_flag, &left_clip, &right_clip); nbases = right_clip - left_clip; // create bases string bases = get_read_bases(rd, left_clip, right_clip); // create quality array quality = get_read_quality_values(rd, left_clip, right_clip); //Create new read Read *read = new Read(); read->initial_length = nbases; read->read = string(bases); uint8_t quality_char; read->quality = (uint8_t*)malloc(sizeof(uint8_t)*nbases); for (int j = 0; j < nbases; j++) { quality_char = (quality[j] <= 93 ? quality[j] : 93) + 33; read->quality[j] = quality_char; } //read->rd = rd; read->flowgram = new uint16_t[h.flow_len]; for(int j=0; j<h.flow_len; j++) { read->flowgram[j] = rd.flowgram[j]; //cout << rd.flowgram[j] << " " << endl; } read->flow_index = (uint8_t*)malloc(sizeof(uint8_t)*nbases); for(int j=0; j<nbases; j++) { read->flow_index[j] = rd.flow_index[j]; } read->roche_left_clip = (int) max(1, max(rh.clip_qual_left, rh.clip_adapter_left)) - 1; read->roche_right_clip = (int) min( (rh.clip_qual_right == 0 ? rh.nbases : rh.clip_qual_right ), (rh.clip_adapter_right == 0 ? rh.nbases : rh.clip_adapter_right) ); reads.push_back(read); string tstr = string(rh.name) + " " + string(itoa(rh.clip_adapter_left,new char[5],10)) + " " + string(itoa(rh.clip_adapter_right,new char[5],10))+ " " + string(itoa(rh.clip_qual_left,new char[5],10)) + " " + string(itoa(rh.clip_qual_right,new char[5],10)) + " " + string(itoa(rh.clip_qual_right,new char[5],10)); int t_len = tstr.length(); // create read name string int name_length = (int) t_len + 1; // account for NULL termination name = (char *) malloc( name_length * sizeof(char) ); if (!name) { fprintf(stderr, "Out of memory! For read name string!\n"); exit(1); } memset(name, '\0', (size_t) name_length); read->readID = (char *) malloc( rh.name_len * sizeof(char) ); //read->readID = rh.name; memcpy( read->readID, rh.name, (size_t) rh.name_len ); //strncpy(name, rh.name, (size_t) rh.name_len); strncpy(name, tstr.c_str(), (size_t)t_len); if ( keep_fastq_orig == true ) construct_fastq_entry(fastq_fp, name, bases, quality, nbases); //printf("%d\n",rh.name_len); free(name); free(bases); free(quality); free_sff_read_header(&rh); free_sff_read_data(&rd); } read_manifest(sff_fp); //free_sff_common_header(&h); if ( keep_fastq_orig == true ) fclose(fastq_fp); fclose(sff_fp); }
void process_sff_to_fastq(char *sff_file, char *fastq_file, int trim_flag) { sff_common_header h; sff_read_header rh; sff_read_data rd; FILE *sff_fp, *fastq_fp; if ( !strlen(sff_file) ) { sff_fp = stdin; } else if ( (sff_fp = fopen(sff_file, "r")) == NULL ) { fprintf(stderr, "[err] Could not open file '%s' for reading.\n", sff_file); exit(1); } read_sff_common_header(sff_fp, &h); verify_sff_common_header(PRG_NAME, VERSION, &h); // printf("size of header: %d \n", sizeof(sff_common_header)); // printf("\tmagic : 0x%x\n" , h.magic); // printf("\tindex_offset : 0x%llx\n", h.index_offset); // printf("\tindex_len : 0x%x\n" , h.index_len); // printf("\tnumreads : 0x%x\n" , h.nreads); // printf("\theader_len : 0x%x\n" , h.header_len); // printf("\tkey_len : 0x%x\n" , h.key_len); // printf("\tflow_len : 0x%x\n" , h.flow_len); // printf("\tflowgram_fmt : 0x%x\n" , h.flowgram_format); // printf("\tflow : %s\n " , h.flow); // printf("\tkey : %s\n " , h.key); // printf("\n\n"); if ( !strlen(fastq_file) ) { fastq_fp = stdout; } else { if ( (fastq_fp = fopen(fastq_file, "w")) == NULL ) { fprintf(stderr, "[err] Could not open file '%s' for writing.\n", fastq_file); exit(1); } } int left_clip = 0, right_clip = 0, nbases = 0; char *name; char *bases; uint8_t *quality; register int i; int numreads = (int) h.nreads; for (i = 0; i < numreads; i++) { read_sff_read_header(sff_fp, &rh); read_sff_read_data(sff_fp, &rd, h.flow_len, rh.nbases); /* get clipping points */ get_clip_values(rh, trim_flag, &left_clip, &right_clip); nbases = right_clip - left_clip; /* create bases string */ bases = get_read_bases(rd, left_clip, right_clip); /* create quality array */ quality = get_read_quality_values(rd, left_clip, right_clip); /* create read name string */ int name_length = (int) rh.name_len + 1; // account for NULL termination name = (char *) malloc( name_length * sizeof(char) ); if (!name) { fprintf(stderr, "Out of memory! For read name string!\n"); exit(1); } memset(name, '\0', (size_t) name_length); strncpy(name, rh.name, (size_t) rh.name_len); construct_fastq_entry(fastq_fp, name, bases, quality, nbases); free(name); free(bases); free(quality); free_sff_read_header(&rh); free_sff_read_data(&rd); } free_sff_common_header(&h); fclose(fastq_fp); fclose(sff_fp); }