/* * Unpacks the 31-byte fixed size part of the SFF common header. * It allocates memory for this and for the flow order and key, but does * not read the flow & key information (as this may not be in buf). * It also checks that the MAGIC and VERSION match as expected. * * Returns sff_common_header* on success * NULL on failure */ sff_common_header *decode_sff_common_header(unsigned char *buf) { sff_common_header *h; if (NULL == (h = (sff_common_header *)xcalloc(1, sizeof(*h)))) return NULL; h->magic = be_int4(*(uint32_t *)(buf+0)); memcpy(h->version, buf+4, 4); h->index_offset = be_int8(*(uint64_t *)(buf+8)); h->index_len = be_int4(*(uint32_t *)(buf+16)); h->nreads = be_int4(*(uint32_t *)(buf+20)); h->header_len = be_int2(*(uint16_t *)(buf+24)); h->key_len = be_int2(*(uint16_t *)(buf+26)); h->flow_len = be_int2(*(uint16_t *)(buf+28)); h->flowgram_format = be_int1(*(uint8_t *)(buf+30)); if (h->magic != SFF_MAGIC || memcmp(h->version, SFF_VERSION, 4)) { xfree(h); return NULL; } if (NULL == (h->flow = (char *)xmalloc(h->flow_len))) return free_sff_common_header(h), NULL; if (NULL == (h->key = (char *)xmalloc(h->key_len))) return free_sff_common_header(h), NULL; return h; }
static sff_common_header *fread_sff_common_header(FILE *fp) { sff_common_header *h; unsigned char chdr[31]; if (31 != fread(chdr, 1, 31, fp)) return NULL; h = decode_sff_common_header(chdr); if (h->flow_len != fread(h->flow, 1, h->flow_len, fp)) return free_sff_common_header(h), NULL; if (h->key_len != fread(h->key , 1, h->key_len, fp)) return free_sff_common_header(h), NULL; /* Pad to 8 chars */ fseek(fp, (ftell(fp) + 7)& ~7, SEEK_SET); return h; }
/* * Reads a common header (including variable length components) from an mFILE. * * Returns the a pointer to the header on success * NULL on failure */ sff_common_header *read_sff_common_header(mFILE *mf) { sff_common_header *h; unsigned char chdr[31]; if (31 != mfread(chdr, 1, 31, mf)) return NULL; h = decode_sff_common_header(chdr); if (h->flow_len != mfread(h->flow, 1, h->flow_len, mf)) return free_sff_common_header(h), NULL; if (h->key_len != mfread(h->key , 1, h->key_len, mf)) return free_sff_common_header(h), NULL; /* Pad to 8 chars */ mfseek(mf, (mftell(mf) + 7)& ~7, SEEK_SET); return h; }
void verify_sff_common_header(char *prg_name, char *prg_version, sff_common_header *h) { /* ensure that the magic file type is valid */ if (h->magic != SFF_MAGIC) { fprintf(stderr, "The SFF header has magic value '%d' \n", h->magic); fprintf(stderr, "[err] %s (version %s) %s : '%d' \n", prg_name, prg_version, "only knows how to deal an SFF magic value of type", SFF_MAGIC); free_sff_common_header(h); exit(2); } /* ensure that the version header is valid */ if ( memcmp(h->version, SFF_VERSION, SFF_VERSION_LENGTH) ) { fprintf(stderr, "The SFF file has header version: "); int i; char *sff_header_version = h->version; for (i=0; i < SFF_VERSION_LENGTH; i++) { printf("0x%02x ", sff_header_version[i]); } printf("\n"); fprintf(stderr, "[err] %s (version %s) %s : ", prg_name, prg_version, "only knows how to deal an SFF header version: "); char valid_header_version[SFF_VERSION_LENGTH] = SFF_VERSION; for (i=0; i < SFF_VERSION_LENGTH; i++) { printf("0x%02x ", valid_header_version[i]); } free_sff_common_header(h); exit(2); } }
/* * Reads an SFF file from an mFILE and decodes it to a Read struct. * * Returns Read* on success * NULL on failure */ Read *mfread_sff(mFILE *mf) { int i, bpos; Read *r; sff_common_header *ch; sff_read_header *rh; sff_read_data *rd; /* Load the SFF contents */ if (NULL == (ch = read_sff_common_header(mf))) return NULL; if (NULL == (rh = read_sff_read_header(mf))) { free_sff_common_header(ch); return NULL; } if (NULL == (rd = read_sff_read_data(mf, ch->flow_len, rh->nbases))) { free_sff_common_header(ch); free_sff_read_header(rh); return NULL; } /* Convert to Read struct */ r = read_allocate(0,0); if (r->basePos) free(r->basePos); if (r->base) free(r->base); if (r->prob_A) free(r->prob_A); if (r->prob_C) free(r->prob_C); if (r->prob_G) free(r->prob_G); if (r->prob_T) free(r->prob_T); r->nflows = ch->flow_len; r->flow_order = ch->flow; ch->flow = NULL; r->flow_raw = NULL; r->flow = (float *)malloc(r->nflows * sizeof(float)); for (i = 0; i < r->nflows; i++) { r->flow[i] = rd->flowgram[i] / 100.0; } r->NBases = rh->nbases; r->basePos = (uint_2 *)calloc(r->NBases, 2); r->base = rd->bases; rd->bases = NULL; r->prob_A = (char *)calloc(r->NBases, 1); r->prob_C = (char *)calloc(r->NBases, 1); r->prob_G = (char *)calloc(r->NBases, 1); r->prob_T = (char *)calloc(r->NBases, 1); bpos = 0; for (i=0; i < r->NBases; i++) { r->prob_A[i] = 0; r->prob_C[i] = 0; r->prob_G[i] = 0; r->prob_T[i] = 0; switch (r->base[i]) { case 'A': case 'a': r->prob_A[i] = rd->quality[i]; break; case 'C': case 'c': r->prob_C[i] = rd->quality[i]; break; case 'G': case 'g': r->prob_G[i] = rd->quality[i]; break; case 'T': case 't': r->prob_T[i] = rd->quality[i]; break; } bpos += rd->flow_index[i]; r->basePos[i] = bpos; } r->leftCutoff = MAX(rh->clip_qual_left, rh->clip_adapter_left); r->rightCutoff = MIN(rh->clip_qual_right ? rh->clip_qual_right : r->NBases+1, rh->clip_adapter_right ? rh->clip_adapter_right : r->NBases+1); free_sff_common_header(ch); free_sff_read_header(rh); free_sff_read_data(rd); return r; }
void process_sff_to_fastq(char *sff_file, char *fastq_file, int trim_flag) { sff_common_header h; sff_read_header rh; sff_read_data rd; FILE *sff_fp, *fastq_fp; if ( !strlen(sff_file) ) { sff_fp = stdin; } else if ( (sff_fp = fopen(sff_file, "r")) == NULL ) { fprintf(stderr, "[err] Could not open file '%s' for reading.\n", sff_file); exit(1); } read_sff_common_header(sff_fp, &h); verify_sff_common_header(PRG_NAME, VERSION, &h); // printf("size of header: %d \n", sizeof(sff_common_header)); // printf("\tmagic : 0x%x\n" , h.magic); // printf("\tindex_offset : 0x%llx\n", h.index_offset); // printf("\tindex_len : 0x%x\n" , h.index_len); // printf("\tnumreads : 0x%x\n" , h.nreads); // printf("\theader_len : 0x%x\n" , h.header_len); // printf("\tkey_len : 0x%x\n" , h.key_len); // printf("\tflow_len : 0x%x\n" , h.flow_len); // printf("\tflowgram_fmt : 0x%x\n" , h.flowgram_format); // printf("\tflow : %s\n " , h.flow); // printf("\tkey : %s\n " , h.key); // printf("\n\n"); if ( !strlen(fastq_file) ) { fastq_fp = stdout; } else { if ( (fastq_fp = fopen(fastq_file, "w")) == NULL ) { fprintf(stderr, "[err] Could not open file '%s' for writing.\n", fastq_file); exit(1); } } int left_clip = 0, right_clip = 0, nbases = 0; char *name; char *bases; uint8_t *quality; register int i; int numreads = (int) h.nreads; for (i = 0; i < numreads; i++) { read_sff_read_header(sff_fp, &rh); read_sff_read_data(sff_fp, &rd, h.flow_len, rh.nbases); /* get clipping points */ get_clip_values(rh, trim_flag, &left_clip, &right_clip); nbases = right_clip - left_clip; /* create bases string */ bases = get_read_bases(rd, left_clip, right_clip); /* create quality array */ quality = get_read_quality_values(rd, left_clip, right_clip); /* create read name string */ int name_length = (int) rh.name_len + 1; // account for NULL termination name = (char *) malloc( name_length * sizeof(char) ); if (!name) { fprintf(stderr, "Out of memory! For read name string!\n"); exit(1); } memset(name, '\0', (size_t) name_length); strncpy(name, rh.name, (size_t) rh.name_len); construct_fastq_entry(fastq_fp, name, bases, quality, nbases); free(name); free(bases); free(quality); free_sff_read_header(&rh); free_sff_read_data(&rd); } free_sff_common_header(&h); fclose(fastq_fp); fclose(sff_fp); }