void tmap_refseq_write(tmap_refseq_t *refseq, const char *fn_fasta, uint32_t is_rev) { tmap_file_t *fp_pac = NULL, *fp_anno = NULL; char *fn_pac = NULL, *fn_anno = NULL; uint8_t x = 0; // write annotation file if(0 == is_rev) { fn_anno = tmap_get_file_name(fn_fasta, TMAP_ANNO_FILE); fp_anno = tmap_file_fopen(fn_anno, "wb", TMAP_ANNO_COMPRESSION); tmap_refseq_write_anno(fp_anno, refseq); tmap_file_fclose(fp_anno); free(fn_anno); } // write the sequence fn_pac = tmap_get_file_name(fn_fasta, (0 == is_rev) ? TMAP_PAC_FILE : TMAP_REV_PAC_FILE); fp_pac = tmap_file_fopen(fn_pac, "wb", (0 == is_rev) ? TMAP_PAC_COMPRESSION : TMAP_REV_PAC_COMPRESSION); if(tmap_refseq_seq_memory(refseq->len) != tmap_file_fwrite(refseq->seq, sizeof(uint8_t), tmap_refseq_seq_memory(refseq->len), fp_pac)) { tmap_error(NULL, Exit, WriteFileError); } if(refseq->len % 4 == 0) { // add an extra byte if we completely filled all bits if(1 != tmap_file_fwrite(&x, sizeof(uint8_t), 1, fp_pac)) { tmap_error(fn_pac, Exit, WriteFileError); } } // store number of unused bits at the last byte x = refseq->len % 4; if(1 != tmap_file_fwrite(&x, sizeof(uint8_t), 1, fp_pac)) { tmap_error(fn_pac, Exit, WriteFileError); } tmap_file_fclose(fp_pac); free(fn_pac); }
static inline void tmap_refseq_read_annos(tmap_file_t *fp, tmap_anno_t *anno) { uint32_t len = 0; // includes the null-terminator if(1 != tmap_file_fread(&len, sizeof(uint32_t), 1, fp)) { tmap_error(NULL, Exit, ReadFileError); } anno->name = tmap_string_init(len); if(len != tmap_file_fread(anno->name->s, sizeof(char), len, fp) || 1 != tmap_file_fread(&anno->len, sizeof(uint64_t), 1, fp) || 1 != tmap_file_fread(&anno->offset, sizeof(uint64_t), 1, fp) || 1 != tmap_file_fread(&anno->num_amb, sizeof(uint32_t), 1, fp)) { tmap_error(NULL, Exit, ReadFileError); } if(0 < anno->num_amb) { anno->amb_positions_start = tmap_malloc(sizeof(uint32_t) * anno->num_amb, "anno->amb_positions_start"); anno->amb_positions_end = tmap_malloc(sizeof(uint32_t) * anno->num_amb, "anno->amb_positions_end"); anno->amb_bases = tmap_malloc(sizeof(uint8_t) * anno->num_amb, "anno->amb_bases"); if(anno->num_amb != tmap_file_fread(anno->amb_positions_start, sizeof(uint32_t), anno->num_amb, fp) || anno->num_amb != tmap_file_fread(anno->amb_positions_end, sizeof(uint32_t), anno->num_amb, fp) || anno->num_amb != tmap_file_fread(anno->amb_bases, sizeof(uint8_t), anno->num_amb, fp)) { tmap_error(NULL, Exit, WriteFileError); } } else { anno->amb_positions_start = NULL; anno->amb_positions_end = NULL; anno->amb_bases = NULL; } // set name length anno->name->l = len-1; }
tmap_sa_t * tmap_sa_read(const char *fn_fasta) { char *fn_sa = NULL; tmap_file_t *fp_sa = NULL; tmap_sa_t *sa = NULL; fn_sa = tmap_get_file_name(fn_fasta, TMAP_SA_FILE); fp_sa = tmap_file_fopen(fn_sa, "rb", TMAP_SA_COMPRESSION); sa = tmap_calloc(1, sizeof(tmap_sa_t), "sa"); if(1 != tmap_file_fread(&sa->primary, sizeof(tmap_bwt_int_t), 1, fp_sa) || 1 != tmap_file_fread(&sa->sa_intv, sizeof(tmap_bwt_int_t), 1, fp_sa) || 1 != tmap_file_fread(&sa->seq_len, sizeof(tmap_bwt_int_t), 1, fp_sa)) { tmap_error(NULL, Exit, ReadFileError); } sa->n_sa = (sa->seq_len + sa->sa_intv) / sa->sa_intv; sa->sa = tmap_calloc(sa->n_sa, sizeof(tmap_bwt_int_t), "sa->sa"); sa->sa[0] = -1; if(sa->n_sa-1 != tmap_file_fread(sa->sa + 1, sizeof(tmap_bwt_int_t), sa->n_sa - 1, fp_sa)) { tmap_error(NULL, Exit, ReadFileError); } sa->sa_intv_log2 = tmap_log2(sa->sa_intv); tmap_file_fclose(fp_sa); free(fn_sa); sa->is_shm = 0; return sa; }
static void tmap_seqs_io_init2_fs_and_add(tmap_seqs_io_t *io_in, sam_header_t *header, sam_header_record_t *record) { char tag[2]; // add @RG.KS and @RG.FO if(io_in->type == TMAP_SEQ_TYPE_SFF) { sam_header_records_t *records = sam_header_get_records(header, record->tag); // get the header line if(io_in->n <= records->n) tmap_error("Too many read groups specified", Exit, OutOfRange); // @RG.KS tag[0]='K';tag[1]='S'; if(0 == sam_header_record_add(record, tag, tmap_sff_io_get_rg_ks(io_in->seqios[records->n]->io.sffio))) { tmap_error("Could not add the KS tag; most likely it is already present", Exit, OutOfRange); } // @RG.FO tag[0]='F';tag[1]='O'; if(0 == sam_header_record_add(record, tag, tmap_sff_io_get_rg_fo(io_in->seqios[records->n]->io.sffio))) { tmap_error("Could not add the FO tag; most likely it is already present", Exit, OutOfRange); } } // check for the @RG.ID and @RG.SM tags if(NULL == sam_header_record_get(record, "ID")) tmap_bug(); // should not happen if(NULL == sam_header_record_get(record, "SM")) { if(0 == sam_header_record_add(record, "SM", "NOSM")) tmap_bug(); // dummy SM, for Picard validation } if(NULL == sam_header_record_get(record, "PG")) { if(0 == sam_header_record_add(record, "PG", PACKAGE_NAME)) tmap_bug(); // dummy PG } // add the read group if(0 == sam_header_add_record(header, record)) tmap_bug(); }
static inline int32_t tmap_refseq_supported(tmap_refseq_t *refseq) { int32_t i, j; char *refseq_v = refseq->package_version->s; char *tmap_v = PACKAGE_VERSION; // sanity check on version names for(i=j=0;i<strlen(refseq_v);i++) { if('.' == refseq_v[i]) j++; } if(2 != j) { tmap_error("did not find three version numbers", Exit, OutOfRange); } for(i=j=0;i<strlen(tmap_v);i++) { if('.' == tmap_v[i]) j++; } if(2 != j) { tmap_error("did not find three version numbers", Exit, OutOfRange); } // get the format ids if(0 == strcmp(tmap_refseq_get_version_format(refseq_v), tmap_refseq_get_version_format(tmap_v))) { return 1; } return 0; }
static inline void tmap_refseq_read_header(tmap_file_t *fp, tmap_refseq_t *refseq) { size_t package_version_l; if(1 != tmap_file_fread(&refseq->version_id, sizeof(uint64_t), 1, fp) || 1 != tmap_file_fread(&package_version_l, sizeof(size_t), 1, fp)) { tmap_error(NULL, Exit, ReadFileError); } if(refseq->version_id != TMAP_VERSION_ID) { tmap_error("version id did not match", Exit, ReadFileError); } refseq->package_version = tmap_string_init(package_version_l+1); // add one for the null terminator refseq->package_version->l = package_version_l; if(refseq->package_version->l+1 != tmap_file_fread(refseq->package_version->s, sizeof(char), refseq->package_version->l+1, fp)) { tmap_error(NULL, Exit, ReadFileError); } if(0 == tmap_refseq_supported(refseq)) { fprintf(stderr, "reference version: %s\n", refseq->package_version->s); fprintf(stderr, "package version: %s\n", PACKAGE_VERSION); tmap_error("the reference index is not supported", Exit, ReadFileError); } if(1 != tmap_file_fread(&refseq->num_annos, sizeof(uint32_t), 1, fp) || 1 != tmap_file_fread(&refseq->len, sizeof(uint64_t), 1, fp)) { tmap_error(NULL, Exit, ReadFileError); } }
tmap_sff_read_header_t * tmap_sff_read_header_read(tmap_file_t *fp, int32_t early_eof_ok) { tmap_sff_read_header_t *rh = NULL; uint32_t n = 0; rh = tmap_calloc(1, sizeof(tmap_sff_read_header_t), "rh"); if(1 != tmap_file_fread(&rh->rheader_length, sizeof(uint16_t), 1, fp) || 1 != tmap_file_fread(&rh->name_length, sizeof(uint16_t), 1, fp) || 1 != tmap_file_fread(&rh->n_bases, sizeof(uint32_t), 1, fp) || 1 != tmap_file_fread(&rh->clip_qual_left, sizeof(uint16_t), 1, fp) || 1 != tmap_file_fread(&rh->clip_qual_right, sizeof(uint16_t), 1, fp) || 1 != tmap_file_fread(&rh->clip_adapter_left, sizeof(uint16_t), 1, fp) || 1 != tmap_file_fread(&rh->clip_adapter_right, sizeof(uint16_t), 1, fp)) { if(0 == early_eof_ok) { tmap_error("tmap_file_fread", Exit, ReadFileError); } else { free(rh); return NULL; } } n += sizeof(uint32_t) + 6*sizeof(uint16_t); // convert values from big-endian rh->rheader_length = ntohs(rh->rheader_length); rh->name_length = ntohs(rh->name_length); rh->n_bases = ntohl(rh->n_bases); rh->clip_qual_left = ntohs(rh->clip_qual_left); rh->clip_qual_right = ntohs(rh->clip_qual_right); rh->clip_adapter_left = ntohs(rh->clip_adapter_left); rh->clip_adapter_right = ntohs(rh->clip_adapter_right); rh->name = tmap_string_init(rh->name_length+1); if(rh->name_length != tmap_file_fread(rh->name->s, sizeof(char), rh->name_length, fp)) { tmap_error("tmap_file_fread", Exit, ReadFileError); } n += sizeof(char)*rh->name_length; // set read name length and null-terminator rh->name->l = rh->name_length; rh->name->s[rh->name->l]='\0'; n += tmap_sff_read_padding(fp, n); #ifdef TMAP_SFF_DEBUG tmap_sff_read_header_print(stderr, rh); #endif if(rh->rheader_length != n) { tmap_error("SFF read header length did not match", Exit, ReadFileError); } return rh; }
tmap_index_t* tmap_index_init(const char *fn_fasta, key_t shm_key, int32_t mm) { tmap_index_t *index = NULL; index = tmap_calloc(1, sizeof(tmap_index_t), "index"); index->shm_key = shm_key; index->mm = mm; // get the reference information // primary 65380; sa_intv: 32 // seq_len = 97004 //n_sa = 3032, sa 67973 .. 18446744073709551615 if (1 == index->mm) { tmap_progress_print("Retrieving reference data from memory map"); index->refseq = tmap_refseq_mm_read(fn_fasta); index->bwt = tmap_bwt_mm_read(fn_fasta); index->sa = tmap_sa_mm_read(fn_fasta); tmap_progress_print2("Reference data retrieved from memory map"); } else if(0 == index->shm_key) { tmap_progress_print("reading in reference data"); index->refseq = tmap_refseq_read(fn_fasta); index->bwt = tmap_bwt_read(fn_fasta); index->sa = tmap_sa_read(fn_fasta); tmap_progress_print2("reference data read in"); } else { tmap_progress_print("retrieving reference data from shared memory"); index->shm = tmap_shm_init(index->shm_key, 0, 0); if(NULL == (index->refseq = tmap_refseq_shm_unpack(tmap_shm_get_buffer(index->shm, TMAP_SHM_LISTING_REFSEQ)))) { tmap_error("the packed reference sequence was not found in shared memory", Exit, SharedMemoryListing); } if(NULL == (index->bwt = tmap_bwt_shm_unpack(tmap_shm_get_buffer(index->shm, TMAP_SHM_LISTING_BWT)))) { tmap_error("the BWT string was not found in shared memory", Exit, SharedMemoryListing); } if(NULL == (index->sa = tmap_sa_shm_unpack(tmap_shm_get_buffer(index->shm, TMAP_SHM_LISTING_SA)))) { tmap_error("the SA was not found in shared memory", Exit, SharedMemoryListing); } tmap_progress_print2("reference data retrieved from shared memory"); } if((index->refseq->len << 1) != index->bwt->seq_len) { tmap_error("refseq and bwt lengths do not match", Exit, OutOfRange); } if((index->refseq->len << 1) != index->sa->seq_len) { tmap_error("refseq and sa lengths do not match", Exit, OutOfRange); } return index; }
tmap_seq_t * tmap_seq_clone(tmap_seq_t *seq) { tmap_seq_t *ret = NULL; ret = tmap_calloc(1, sizeof(tmap_seq_t), "ret"); ret->type = seq->type; switch(seq->type) { case TMAP_SEQ_TYPE_FQ: ret->data.fq = tmap_fq_clone(seq->data.fq); break; case TMAP_SEQ_TYPE_SFF: ret->data.sff = tmap_sff_clone(seq->data.sff); break; case TMAP_SEQ_TYPE_SAM: case TMAP_SEQ_TYPE_BAM: ret->data.sam = tmap_sam_clone(seq->data.sam); break; default: tmap_error("type is unrecognized", Exit, OutOfRange); break; } return ret; }
static inline tmap_sam_io_t * tmap_sam_io_init_helper(const char *fn, int32_t is_bam) { tmap_sam_io_t *samio = NULL; // initialize memory samio = tmap_calloc(1, sizeof(tmap_sam_io_t), "samio"); if(0 == is_bam) { samio->fp = samopen(fn, "r", NULL); } else { samio->fp = samopen(fn, "rb", NULL); } if(NULL == samio->fp) { tmap_error(fn, Exit, OpenFileError); } samio->bam_end_vfo = 0; // check if there are sequences in the header /* if(samio->fp->header->n_targets == 0) { tmap_error("Found no @SQ lines in the SAM header", Exit, OutOfRange); } */ return samio; }
static int32_t tmap_shmget(key_t key, size_t size, int32_t shmflg, int32_t create) { int32_t shmid, i; if(0 == create) { // try a number of times before failing for(i=0,shmid=-1;shmid<0 && i<TMAP_SHMGET_RETRIES-1;i++) { if(0 <= (shmid = shmget(key, size, shmflg))) { return shmid; } tmap_progress_print("could not get shared memory, %d more %s", TMAP_SHMGET_RETRIES-i-1, (1 != TMAP_SHMGET_RETRIES-i-1) ? "retries" : "retry"); tmap_progress_print("retrying in %d seconds", TMAP_SHMGET_SLEEP); // sleep and retry sleep(TMAP_SHMGET_SLEEP); } } if((shmid = shmget(key, size, shmflg)) < 0) { tmap_error(NULL, Exit, SharedMemoryGet); } return shmid; }
inline tmap_seqs_io_t* tmap_seqs_io_init(char **fns, int32_t fn_num, int8_t seq_type, int32_t compression, int64_t bam_start_vfo, int64_t bam_end_vfo) { tmap_seqs_io_t *io= NULL; int32_t i; io = tmap_calloc(1, sizeof(tmap_seqs_io_t), "io"); io->type = seq_type; if(1 < io->n && (TMAP_SEQ_TYPE_SAM == io->type || TMAP_SEQ_TYPE_BAM == io->type)) { tmap_error("Multi-SAM/BAM not supported", Exit, OutOfRange); } if(NULL == fns) { // stdin io->n = 1; io->seqios = tmap_calloc(1, sizeof(tmap_seq_io_t*), "io->seqios"); io->seqios[0] = tmap_seq_io_init("-", seq_type, 0, compression); // NB: always reading } else { // from file(s) io->n = fn_num; io->seqios = tmap_calloc(fn_num, sizeof(tmap_seq_io_t*), "io->seqios"); for(i=0;i<io->n;i++) { io->seqios[i] = tmap_seq_io_init(fns[i], seq_type, 0, compression); // NB: always reading } } if (io->n == 1 && io->seqios[0] && io->type == TMAP_SEQ_TYPE_BAM) tmap_sam_io_set_vfo(io->seqios[0]->io.samio, bam_start_vfo, bam_end_vfo); return io; }
// zero-based static inline int32_t tmap_refseq_get_seqid1(const tmap_refseq_t *refseq, uint32_t pacpos) { int32_t left, right, mid; if(refseq->len < pacpos) { tmap_error("Coordinate was larger than the reference", Exit, OutOfRange); } left = 0; mid = 0; right = refseq->num_annos; while (left < right) { mid = (left + right) >> 1; if(refseq->annos[mid].offset < pacpos) { if(mid == refseq->num_annos - 1) break; if(pacpos <= refseq->annos[mid+1].offset) break; left = mid + 1; } else right = mid; } if(refseq->num_annos < mid) { return refseq->num_annos; } return mid; }
tmap_refseq_t * tmap_refseq_read(const char *fn_fasta, uint32_t is_rev) { tmap_file_t *fp_pac = NULL, *fp_anno = NULL; char *fn_pac = NULL, *fn_anno = NULL; tmap_refseq_t *refseq = NULL; // allocate some memory refseq = tmap_calloc(1, sizeof(tmap_refseq_t), "refseq"); refseq->is_rev = is_rev; refseq->is_shm = 0; // read annotation file fn_anno = tmap_get_file_name(fn_fasta, TMAP_ANNO_FILE); fp_anno = tmap_file_fopen(fn_anno, "rb", TMAP_ANNO_COMPRESSION); tmap_refseq_read_anno(fp_anno, refseq); tmap_file_fclose(fp_anno); free(fn_anno); // read the sequence fn_pac = tmap_get_file_name(fn_fasta, (0 == is_rev) ? TMAP_PAC_FILE : TMAP_REV_PAC_FILE); fp_pac = tmap_file_fopen(fn_pac, "rb", (0 == is_rev) ? TMAP_PAC_COMPRESSION : TMAP_REV_PAC_COMPRESSION); refseq->seq = tmap_malloc(sizeof(uint8_t)*tmap_refseq_seq_memory(refseq->len), "refseq->seq"); // allocate if(tmap_refseq_seq_memory(refseq->len) != tmap_file_fread(refseq->seq, sizeof(uint8_t), tmap_refseq_seq_memory(refseq->len), fp_pac)) { tmap_error(NULL, Exit, ReadFileError); } tmap_file_fclose(fp_pac); free(fn_pac); return refseq; }
tmap_seq_t * tmap_seq_init(int8_t type) { tmap_seq_t *seq = NULL; seq = tmap_calloc(1, sizeof(tmap_seq_t), "seq"); seq->type = type; switch(seq->type) { case TMAP_SEQ_TYPE_FQ: seq->data.fq = tmap_fq_init(); break; case TMAP_SEQ_TYPE_SFF: seq->data.sff = tmap_sff_init(); break; case TMAP_SEQ_TYPE_SAM: case TMAP_SEQ_TYPE_BAM: seq->data.sam = tmap_sam_init(); break; default: tmap_error("type is unrecognized", Exit, OutOfRange); break; } return seq; }
static void tmap_index_core(tmap_index_opt_t *opt) { uint64_t ref_len = 0; // pack the reference sequence ref_len = tmap_refseq_fasta2pac(opt->fn_fasta, TMAP_FILE_NO_COMPRESSION, 0); if(TMAP_INDEX_TOO_BIG_GENOME <= ref_len) { // too big (2^32 - 1)! tmap_error("Reference sequence too large", Exit, OutOfRange); } // check returned genome size if(opt->is_large < 0) { if(TMAP_INDEX_LARGE_GENOME <= ref_len) { opt->is_large = 1; tmap_progress_print("defaulting to \"bwtsw\" BWT construction algorithm"); } else { opt->is_large = 0; tmap_progress_print("defaulting to \"is\" BWT construction algorithm"); } } // create the bwt tmap_bwt_pac2bwt(opt->fn_fasta, opt->is_large, opt->occ_interval, opt->hash_width, opt->check_hash); // create the suffix array tmap_sa_bwt2sa(opt->fn_fasta, opt->sa_interval); // pack the reference sequence ref_len = tmap_refseq_fasta2pac(opt->fn_fasta, TMAP_FILE_NO_COMPRESSION, 1); }
static int32_t tmap_shmdt(const void *shmaddr) { if(shmdt(shmaddr) < 0) { tmap_error(NULL, Exit, SharedMemoryDetach); } return 0; }
static int32_t tmap_shmctl(int32_t shmid, int32_t cmd, struct shmid_ds *buf) { if(shmctl(shmid, cmd, buf) < 0) { tmap_error(NULL, Exit, SharedMemoryControl); } return 0; }
static inline void tmap_refseq_write_header(tmap_file_t *fp, tmap_refseq_t *refseq) { if(1 != tmap_file_fwrite(&refseq->version_id, sizeof(uint64_t), 1, fp) || 1 != tmap_file_fwrite(&refseq->package_version->l, sizeof(size_t), 1, fp) || refseq->package_version->l+1 != tmap_file_fwrite(refseq->package_version->s, sizeof(char), refseq->package_version->l+1, fp) || 1 != tmap_file_fwrite(&refseq->num_annos, sizeof(uint32_t), 1, fp) || 1 != tmap_file_fwrite(&refseq->len, sizeof(uint64_t), 1, fp)) { tmap_error(NULL, Exit, WriteFileError); } }
static inline void tmap_refseq_write_annos(tmap_file_t *fp, tmap_anno_t *anno) { uint32_t len = anno->name->l+1; // include null terminator if(1 != tmap_file_fwrite(&len, sizeof(uint32_t), 1, fp) || len != tmap_file_fwrite(anno->name->s, sizeof(char), len, fp) || 1 != tmap_file_fwrite(&anno->len, sizeof(uint64_t), 1, fp) || 1 != tmap_file_fwrite(&anno->offset, sizeof(uint64_t), 1, fp) || 1 != tmap_file_fwrite(&anno->num_amb, sizeof(uint32_t), 1, fp)) { tmap_error(NULL, Exit, WriteFileError); } if(0 < anno->num_amb) { if(anno->num_amb != tmap_file_fwrite(anno->amb_positions_start, sizeof(uint32_t), anno->num_amb, fp) || anno->num_amb != tmap_file_fwrite(anno->amb_positions_end, sizeof(uint32_t), anno->num_amb, fp) || anno->num_amb != tmap_file_fwrite(anno->amb_bases, sizeof(uint8_t), anno->num_amb, fp)) { tmap_error(NULL, Exit, WriteFileError); } } }
static void * tmap_shmat(int32_t shmid, const void *shmaddr, int32_t shmflg) { void *shm = NULL; if((shm = shmat(shmid, shmaddr, shmflg)) == (char*)-1) { tmap_error(NULL, Exit, SharedMemoryAttach); } return shm; }
void tmap_error_cmd_check_int(int32_t val, int32_t lower, int32_t upper, char *option) { if(val < lower || upper < val) { char str[1024] = "\0"; strcpy(str, "option "); strcat(str, option); strcat(str, " out of range"); tmap_error(str, Exit, CommandLineArgument); } }
tmap_index_t* tmap_index_init(const char *fn_fasta, key_t shm_key) { tmap_index_t *index = NULL; index = tmap_calloc(1, sizeof(tmap_index_t), "index"); index->shm_key = shm_key; // get the reference information if(0 == index->shm_key) { tmap_progress_print("reading in reference data"); index->refseq = tmap_refseq_read(fn_fasta); index->bwt = tmap_bwt_read(fn_fasta); index->sa = tmap_sa_read(fn_fasta); tmap_progress_print2("reference data read in"); } else { tmap_progress_print("retrieving reference data from shared memory"); index->shm = tmap_shm_init(index->shm_key, 0, 0); if(NULL == (index->refseq = tmap_refseq_shm_unpack(tmap_shm_get_buffer(index->shm, TMAP_SHM_LISTING_REFSEQ)))) { tmap_error("the packed reference sequence was not found in shared memory", Exit, SharedMemoryListing); } if(NULL == (index->bwt = tmap_bwt_shm_unpack(tmap_shm_get_buffer(index->shm, TMAP_SHM_LISTING_BWT)))) { tmap_error("the BWT string was not found in shared memory", Exit, SharedMemoryListing); } if(NULL == (index->sa = tmap_sa_shm_unpack(tmap_shm_get_buffer(index->shm, TMAP_SHM_LISTING_SA)))) { tmap_error("the SA was not found in shared memory", Exit, SharedMemoryListing); } tmap_progress_print2("reference data retrieved from shared memory"); } if((index->refseq->len << 1) != index->bwt->seq_len) { tmap_error("refseq and bwt lengths do not match", Exit, OutOfRange); } if((index->refseq->len << 1) != index->sa->seq_len) { tmap_error("refseq and sa lengths do not match", Exit, OutOfRange); } return index; }
static inline uint32_t tmap_sff_read_padding(tmap_file_t *fp, uint32_t n) { char padding[8]="\0"; n = (n & 7); // (n % 8) if(0 != n) { n = 8 - n; // number of bytes of padding if(n != tmap_file_fread(padding, sizeof(char), n, fp)) { tmap_error("tmap_file_fread", Exit, ReadFileError); } } return n; }
static inline void tmap_sam_print_rg(tmap_file_t *fp, tmap_seq_t *seq) { // RG if(1 == tmap_sam_rg_id_use) { tmap_file_fprintf(fp, "\tRG:Z:%s", tmap_sam_rg_id); } else if(0 == tmap_sam_rg_id_use) { char *id = tmap_seq_get_rg_id(seq); if(NULL == id) { tmap_error("Missing Record RG.ID in the input file", Exit, OutOfRange); } tmap_file_fprintf(fp, "\tRG:Z:%s", id); } }
tmap_sff_read_t * tmap_sff_read_read(tmap_file_t *fp, tmap_sff_header_t *gh, tmap_sff_read_header_t *rh) { tmap_sff_read_t *r = NULL; uint32_t i, n = 0; r = tmap_calloc(1, sizeof(tmap_sff_read_t), "r"); r->flowgram = tmap_malloc(sizeof(uint16_t)*gh->flow_length, "r->flowgram"); r->flow_index = tmap_malloc(sizeof(uint8_t)*rh->n_bases, "r->flow_index"); r->bases = tmap_string_init(rh->n_bases+1); r->quality = tmap_string_init(rh->n_bases+1); if(gh->flow_length != tmap_file_fread(r->flowgram, sizeof(uint16_t), gh->flow_length, fp) || rh->n_bases != tmap_file_fread(r->flow_index, sizeof(uint8_t), rh->n_bases, fp) || rh->n_bases != tmap_file_fread(r->bases->s, sizeof(char), rh->n_bases, fp) || rh->n_bases != tmap_file_fread(r->quality->s, sizeof(char), rh->n_bases, fp)) { tmap_error("tmap_file_fread", Exit, ReadFileError); } n += sizeof(uint16_t)*gh->flow_length + 3*sizeof(uint8_t)*rh->n_bases; // set length and null-terminators r->bases->l = rh->n_bases; r->quality->l = rh->n_bases; r->bases->s[r->bases->l]='\0'; r->quality->s[r->quality->l]='\0'; // convert qualities from int to char for(i=0;i<r->quality->l;i++) { r->quality->s[i] = QUAL2CHAR(r->quality->s[i]); } // convert flowgram to host order for(i=0;i<gh->flow_length;i++) { r->flowgram[i] = ntohs(r->flowgram[i]); } n += tmap_sff_read_padding(fp, n); #ifdef TMAP_SFF_DEBUG tmap_sff_read_print(stderr, r, gh, rh); #endif return r; }
void tmap_seq_reverse(tmap_seq_t *seq) { switch(seq->type) { case TMAP_SEQ_TYPE_FQ: tmap_fq_reverse(seq->data.fq); break; case TMAP_SEQ_TYPE_SFF: tmap_sff_reverse(seq->data.sff); break; case TMAP_SEQ_TYPE_SAM: case TMAP_SEQ_TYPE_BAM: tmap_sam_reverse(seq->data.sam); break; default: tmap_error("type is unrecognized", Exit, OutOfRange); break; } }
// NB: includes key bases if present static int32_t tmap_seq_get_flowgram(tmap_seq_t *seq, uint16_t **flowgram) { switch(seq->type) { case TMAP_SEQ_TYPE_FQ: break; case TMAP_SEQ_TYPE_SFF: return tmap_sff_get_flowgram(seq->data.sff, flowgram); break; case TMAP_SEQ_TYPE_SAM: case TMAP_SEQ_TYPE_BAM: return tmap_sam_get_flowgram(seq->data.sam, flowgram); break; default: tmap_error("type is unrecognized", Exit, OutOfRange); break; } return -1; }
void tmap_sa_write(const char *fn_fasta, tmap_sa_t *sa) { char *fn_sa = NULL; tmap_file_t *fp_sa = NULL; fn_sa = tmap_get_file_name(fn_fasta, TMAP_SA_FILE); fp_sa = tmap_file_fopen(fn_sa, "wb", TMAP_SA_COMPRESSION); if(1 != tmap_file_fwrite(&sa->primary, sizeof(tmap_bwt_int_t), 1, fp_sa) || 1 != tmap_file_fwrite(&sa->sa_intv, sizeof(tmap_bwt_int_t), 1, fp_sa) || 1 != tmap_file_fwrite(&sa->seq_len, sizeof(tmap_bwt_int_t), 1, fp_sa) || sa->n_sa-1 != tmap_file_fwrite(sa->sa+1, sizeof(tmap_bwt_int_t), sa->n_sa-1, fp_sa)) { tmap_error(NULL, Exit, WriteFileError); } tmap_file_fclose(fp_sa); free(fn_sa); }
inline int tmap_seqs_io_read(tmap_seqs_io_t *io, tmap_seqs_t *seqs, sam_header_t *header) { int32_t i; /* * Case 1 - SAM/BAM * - NB: there must only be one input file * - Read a record, if paired, then read the next * Case 2 - SFF/FQ * - NB: there can be zero or more input files * - Read one from each file, store in one record */ if(io->type != seqs->type) { tmap_error("type mismatch", Exit, OutOfRange); } // reset seqs seqs->n = 0; if(TMAP_SEQ_TYPE_SAM == io->type || TMAP_SEQ_TYPE_BAM == io->type) { // NB: to supported paired reads, we check the paired flag for(i=0;i<2;i++) { tmap_seq_t *seq = tmap_seqs_get(seqs, i); if(tmap_seq_io_read(io->seqios[0], seq) < 0) return EOF; // TODO: better error checking tmap_seqs_add(seqs, seq); tmap_seq_update(seq, i, header); // break if not paired if(0 == (seq->data.sam->b->core.flag & BAM_FPAIRED)) break; } } else { // read in one per file for(i=0;i<io->n;i++) { tmap_seq_t *seq = tmap_seqs_get(seqs, i); if(tmap_seq_io_read(io->seqios[i], seq) < 0) return EOF; // TODO: better error checking tmap_seqs_add(seqs, seq); tmap_seq_update(seq, i, header); } } return 0; }