tmap_sa_t * tmap_sa_read(const char *fn_fasta) { char *fn_sa = NULL; tmap_file_t *fp_sa = NULL; tmap_sa_t *sa = NULL; fn_sa = tmap_get_file_name(fn_fasta, TMAP_SA_FILE); fp_sa = tmap_file_fopen(fn_sa, "rb", TMAP_SA_COMPRESSION); sa = tmap_calloc(1, sizeof(tmap_sa_t), "sa"); if(1 != tmap_file_fread(&sa->primary, sizeof(tmap_bwt_int_t), 1, fp_sa) || 1 != tmap_file_fread(&sa->sa_intv, sizeof(tmap_bwt_int_t), 1, fp_sa) || 1 != tmap_file_fread(&sa->seq_len, sizeof(tmap_bwt_int_t), 1, fp_sa)) { tmap_error(NULL, Exit, ReadFileError); } sa->n_sa = (sa->seq_len + sa->sa_intv) / sa->sa_intv; sa->sa = tmap_calloc(sa->n_sa, sizeof(tmap_bwt_int_t), "sa->sa"); sa->sa[0] = -1; if(sa->n_sa-1 != tmap_file_fread(sa->sa + 1, sizeof(tmap_bwt_int_t), sa->n_sa - 1, fp_sa)) { tmap_error(NULL, Exit, ReadFileError); } sa->sa_intv_log2 = tmap_log2(sa->sa_intv); tmap_file_fclose(fp_sa); free(fn_sa); sa->is_shm = 0; return sa; }
static inline void tmap_refseq_read_header(tmap_file_t *fp, tmap_refseq_t *refseq) { size_t package_version_l; if(1 != tmap_file_fread(&refseq->version_id, sizeof(uint64_t), 1, fp) || 1 != tmap_file_fread(&package_version_l, sizeof(size_t), 1, fp)) { tmap_error(NULL, Exit, ReadFileError); } if(refseq->version_id != TMAP_VERSION_ID) { tmap_error("version id did not match", Exit, ReadFileError); } refseq->package_version = tmap_string_init(package_version_l+1); // add one for the null terminator refseq->package_version->l = package_version_l; if(refseq->package_version->l+1 != tmap_file_fread(refseq->package_version->s, sizeof(char), refseq->package_version->l+1, fp)) { tmap_error(NULL, Exit, ReadFileError); } if(0 == tmap_refseq_supported(refseq)) { fprintf(stderr, "reference version: %s\n", refseq->package_version->s); fprintf(stderr, "package version: %s\n", PACKAGE_VERSION); tmap_error("the reference index is not supported", Exit, ReadFileError); } if(1 != tmap_file_fread(&refseq->num_annos, sizeof(uint32_t), 1, fp) || 1 != tmap_file_fread(&refseq->len, sizeof(uint64_t), 1, fp)) { tmap_error(NULL, Exit, ReadFileError); } }
static inline void tmap_refseq_read_annos(tmap_file_t *fp, tmap_anno_t *anno) { uint32_t len = 0; // includes the null-terminator if(1 != tmap_file_fread(&len, sizeof(uint32_t), 1, fp)) { tmap_error(NULL, Exit, ReadFileError); } anno->name = tmap_string_init(len); if(len != tmap_file_fread(anno->name->s, sizeof(char), len, fp) || 1 != tmap_file_fread(&anno->len, sizeof(uint64_t), 1, fp) || 1 != tmap_file_fread(&anno->offset, sizeof(uint64_t), 1, fp) || 1 != tmap_file_fread(&anno->num_amb, sizeof(uint32_t), 1, fp)) { tmap_error(NULL, Exit, ReadFileError); } if(0 < anno->num_amb) { anno->amb_positions_start = tmap_malloc(sizeof(uint32_t) * anno->num_amb, "anno->amb_positions_start"); anno->amb_positions_end = tmap_malloc(sizeof(uint32_t) * anno->num_amb, "anno->amb_positions_end"); anno->amb_bases = tmap_malloc(sizeof(uint8_t) * anno->num_amb, "anno->amb_bases"); if(anno->num_amb != tmap_file_fread(anno->amb_positions_start, sizeof(uint32_t), anno->num_amb, fp) || anno->num_amb != tmap_file_fread(anno->amb_positions_end, sizeof(uint32_t), anno->num_amb, fp) || anno->num_amb != tmap_file_fread(anno->amb_bases, sizeof(uint8_t), anno->num_amb, fp)) { tmap_error(NULL, Exit, WriteFileError); } } else { anno->amb_positions_start = NULL; anno->amb_positions_end = NULL; anno->amb_bases = NULL; } // set name length anno->name->l = len-1; }
tmap_refseq_t * tmap_refseq_read(const char *fn_fasta, uint32_t is_rev) { tmap_file_t *fp_pac = NULL, *fp_anno = NULL; char *fn_pac = NULL, *fn_anno = NULL; tmap_refseq_t *refseq = NULL; // allocate some memory refseq = tmap_calloc(1, sizeof(tmap_refseq_t), "refseq"); refseq->is_rev = is_rev; refseq->is_shm = 0; // read annotation file fn_anno = tmap_get_file_name(fn_fasta, TMAP_ANNO_FILE); fp_anno = tmap_file_fopen(fn_anno, "rb", TMAP_ANNO_COMPRESSION); tmap_refseq_read_anno(fp_anno, refseq); tmap_file_fclose(fp_anno); free(fn_anno); // read the sequence fn_pac = tmap_get_file_name(fn_fasta, (0 == is_rev) ? TMAP_PAC_FILE : TMAP_REV_PAC_FILE); fp_pac = tmap_file_fopen(fn_pac, "rb", (0 == is_rev) ? TMAP_PAC_COMPRESSION : TMAP_REV_PAC_COMPRESSION); refseq->seq = tmap_malloc(sizeof(uint8_t)*tmap_refseq_seq_memory(refseq->len), "refseq->seq"); // allocate if(tmap_refseq_seq_memory(refseq->len) != tmap_file_fread(refseq->seq, sizeof(uint8_t), tmap_refseq_seq_memory(refseq->len), fp_pac)) { tmap_error(NULL, Exit, ReadFileError); } tmap_file_fclose(fp_pac); free(fn_pac); return refseq; }
tmap_sff_read_header_t * tmap_sff_read_header_read(tmap_file_t *fp, int32_t early_eof_ok) { tmap_sff_read_header_t *rh = NULL; uint32_t n = 0; rh = tmap_calloc(1, sizeof(tmap_sff_read_header_t), "rh"); if(1 != tmap_file_fread(&rh->rheader_length, sizeof(uint16_t), 1, fp) || 1 != tmap_file_fread(&rh->name_length, sizeof(uint16_t), 1, fp) || 1 != tmap_file_fread(&rh->n_bases, sizeof(uint32_t), 1, fp) || 1 != tmap_file_fread(&rh->clip_qual_left, sizeof(uint16_t), 1, fp) || 1 != tmap_file_fread(&rh->clip_qual_right, sizeof(uint16_t), 1, fp) || 1 != tmap_file_fread(&rh->clip_adapter_left, sizeof(uint16_t), 1, fp) || 1 != tmap_file_fread(&rh->clip_adapter_right, sizeof(uint16_t), 1, fp)) { if(0 == early_eof_ok) { tmap_error("tmap_file_fread", Exit, ReadFileError); } else { free(rh); return NULL; } } n += sizeof(uint32_t) + 6*sizeof(uint16_t); // convert values from big-endian rh->rheader_length = ntohs(rh->rheader_length); rh->name_length = ntohs(rh->name_length); rh->n_bases = ntohl(rh->n_bases); rh->clip_qual_left = ntohs(rh->clip_qual_left); rh->clip_qual_right = ntohs(rh->clip_qual_right); rh->clip_adapter_left = ntohs(rh->clip_adapter_left); rh->clip_adapter_right = ntohs(rh->clip_adapter_right); rh->name = tmap_string_init(rh->name_length+1); if(rh->name_length != tmap_file_fread(rh->name->s, sizeof(char), rh->name_length, fp)) { tmap_error("tmap_file_fread", Exit, ReadFileError); } n += sizeof(char)*rh->name_length; // set read name length and null-terminator rh->name->l = rh->name_length; rh->name->s[rh->name->l]='\0'; n += tmap_sff_read_padding(fp, n); #ifdef TMAP_SFF_DEBUG tmap_sff_read_header_print(stderr, rh); #endif if(rh->rheader_length != n) { tmap_error("SFF read header length did not match", Exit, ReadFileError); } return rh; }
tmap_sff_read_t * tmap_sff_read_read(tmap_file_t *fp, tmap_sff_header_t *gh, tmap_sff_read_header_t *rh) { tmap_sff_read_t *r = NULL; uint32_t i, n = 0; r = tmap_calloc(1, sizeof(tmap_sff_read_t), "r"); r->flowgram = tmap_malloc(sizeof(uint16_t)*gh->flow_length, "r->flowgram"); r->flow_index = tmap_malloc(sizeof(uint8_t)*rh->n_bases, "r->flow_index"); r->bases = tmap_string_init(rh->n_bases+1); r->quality = tmap_string_init(rh->n_bases+1); if(gh->flow_length != tmap_file_fread(r->flowgram, sizeof(uint16_t), gh->flow_length, fp) || rh->n_bases != tmap_file_fread(r->flow_index, sizeof(uint8_t), rh->n_bases, fp) || rh->n_bases != tmap_file_fread(r->bases->s, sizeof(char), rh->n_bases, fp) || rh->n_bases != tmap_file_fread(r->quality->s, sizeof(char), rh->n_bases, fp)) { tmap_error("tmap_file_fread", Exit, ReadFileError); } n += sizeof(uint16_t)*gh->flow_length + 3*sizeof(uint8_t)*rh->n_bases; // set length and null-terminators r->bases->l = rh->n_bases; r->quality->l = rh->n_bases; r->bases->s[r->bases->l]='\0'; r->quality->s[r->quality->l]='\0'; // convert qualities from int to char for(i=0;i<r->quality->l;i++) { r->quality->s[i] = QUAL2CHAR(r->quality->s[i]); } // convert flowgram to host order for(i=0;i<gh->flow_length;i++) { r->flowgram[i] = ntohs(r->flowgram[i]); } n += tmap_sff_read_padding(fp, n); #ifdef TMAP_SFF_DEBUG tmap_sff_read_print(stderr, r, gh, rh); #endif return r; }
static inline uint32_t tmap_sff_read_padding(tmap_file_t *fp, uint32_t n) { char padding[8]="\0"; n = (n & 7); // (n % 8) if(0 != n) { n = 8 - n; // number of bytes of padding if(n != tmap_file_fread(padding, sizeof(char), n, fp)) { tmap_error("tmap_file_fread", Exit, ReadFileError); } } return n; }
size_t tmap_sa_shm_read_num_bytes(const char *fn_fasta) { size_t n = 0; char *fn_sa = NULL; tmap_file_t *fp_sa = NULL; tmap_sa_t *sa = NULL; fn_sa = tmap_get_file_name(fn_fasta, TMAP_SA_FILE); fp_sa = tmap_file_fopen(fn_sa, "rb", TMAP_SA_COMPRESSION); sa = tmap_calloc(1, sizeof(tmap_sa_t), "sa"); if(1 != tmap_file_fread(&sa->primary, sizeof(tmap_bwt_int_t), 1, fp_sa) || 1 != tmap_file_fread(&sa->sa_intv, sizeof(tmap_bwt_int_t), 1, fp_sa) || 1 != tmap_file_fread(&sa->seq_len, sizeof(tmap_bwt_int_t), 1, fp_sa)) { tmap_error(NULL, Exit, ReadFileError); } sa->n_sa = (sa->seq_len + sa->sa_intv) / sa->sa_intv; // No need to read in sa->sa sa->sa = NULL; tmap_file_fclose(fp_sa); free(fn_sa); sa->is_shm = 0; sa->is_mm = 0; // get the number of bytes n = tmap_sa_shm_num_bytes(sa); tmap_sa_destroy(sa); return n; }
tmap_sff_header_t * tmap_sff_header_read(tmap_file_t *fp) { tmap_sff_header_t *h = NULL; uint32_t n = 0; h = tmap_calloc(1, sizeof(tmap_sff_header_t), "h"); if(1 != tmap_file_fread(&h->magic, sizeof(uint32_t), 1, fp) || 1 != tmap_file_fread(&h->version, sizeof(uint32_t), 1, fp) || 1 != tmap_file_fread(&h->index_offset, sizeof(uint64_t), 1, fp) || 1 != tmap_file_fread(&h->index_length, sizeof(uint32_t), 1, fp) || 1 != tmap_file_fread(&h->n_reads, sizeof(uint32_t), 1, fp) || 1 != tmap_file_fread(&h->gheader_length, sizeof(uint16_t), 1, fp) || 1 != tmap_file_fread(&h->key_length, sizeof(uint16_t), 1, fp) || 1 != tmap_file_fread(&h->flow_length, sizeof(uint16_t), 1, fp) || 1 != tmap_file_fread(&h->flowgram_format, sizeof(uint8_t), 1, fp)) { tmap_error("tmap_file_fread", Exit, ReadFileError); } n += 4*sizeof(uint32_t) + sizeof(uint64_t) + 3*sizeof(uint16_t) + sizeof(uint8_t); // convert values from big-endian h->magic = ntohl(h->magic); h->version = ntohl(h->version); h->index_offset = ntohll(h->index_offset); h->index_length = ntohl(h->index_length); h->n_reads = ntohl(h->n_reads); h->gheader_length = ntohs(h->gheader_length); h->key_length = ntohs(h->key_length); h->flow_length = ntohs(h->flow_length); if(TMAP_SFF_MAGIC != h->magic) { tmap_error("SFF magic number did not match", Exit, ReadFileError); } if(h->version != TMAP_SFF_VERSION) { tmap_error("SFF version number did not match", Exit, ReadFileError); } h->flow = tmap_string_init(h->flow_length+1); h->key = tmap_string_init(h->key_length+1); if(h->flow_length != tmap_file_fread(h->flow->s, sizeof(char), h->flow_length, fp) || h->key_length != tmap_file_fread(h->key->s, sizeof(char), h->key_length, fp)) { tmap_error("tmap_file_fread", Exit, ReadFileError); } n += sizeof(char)*(h->flow_length + h->key_length); // set the length and null-terminator h->flow->l = h->flow_length; h->key->l = h->key_length; h->flow->s[h->flow->l]='\0'; h->key->s[h->key->l]='\0'; n += tmap_sff_read_padding(fp, n); #ifdef TMAP_SFF_DEBUG tmap_sff_header_print(stderr, h); #endif if(h->gheader_length != n) { tmap_error("SFF global header length did not match", Exit, ReadFileError); } return h; }