void tmap_refseq_pac2revpac(const char *fn_fasta) { uint32_t i, j, c; tmap_refseq_t *refseq=NULL, *refseq_rev=NULL; tmap_progress_print("reversing the packed reference FASTA"); refseq = tmap_refseq_read(fn_fasta, 0); // shallow copy refseq_rev = tmap_calloc(1, sizeof(tmap_refseq_t), "refseq_rev"); (*refseq_rev) = (*refseq); // update sequence refseq_rev->seq = NULL; refseq_rev->seq = tmap_calloc(tmap_refseq_seq_memory(refseq->len), sizeof(uint8_t), "refseq_rev->seq"); for(i=0;i<refseq->len;i++) { c = tmap_refseq_seq_i(refseq, i); j = refseq->len - i - 1; tmap_refseq_seq_store_i(refseq_rev, j, c); } // write tmap_refseq_write(refseq_rev, fn_fasta, 1); // free free(refseq_rev->seq); free(refseq_rev); tmap_refseq_destroy(refseq); tmap_progress_print2("reversed the packed reference FASTA"); }
tmap_index_t* tmap_index_init(const char *fn_fasta, key_t shm_key, int32_t mm) { tmap_index_t *index = NULL; index = tmap_calloc(1, sizeof(tmap_index_t), "index"); index->shm_key = shm_key; index->mm = mm; // get the reference information // primary 65380; sa_intv: 32 // seq_len = 97004 //n_sa = 3032, sa 67973 .. 18446744073709551615 if (1 == index->mm) { tmap_progress_print("Retrieving reference data from memory map"); index->refseq = tmap_refseq_mm_read(fn_fasta); index->bwt = tmap_bwt_mm_read(fn_fasta); index->sa = tmap_sa_mm_read(fn_fasta); tmap_progress_print2("Reference data retrieved from memory map"); } else if(0 == index->shm_key) { tmap_progress_print("reading in reference data"); index->refseq = tmap_refseq_read(fn_fasta); index->bwt = tmap_bwt_read(fn_fasta); index->sa = tmap_sa_read(fn_fasta); tmap_progress_print2("reference data read in"); } else { tmap_progress_print("retrieving reference data from shared memory"); index->shm = tmap_shm_init(index->shm_key, 0, 0); if(NULL == (index->refseq = tmap_refseq_shm_unpack(tmap_shm_get_buffer(index->shm, TMAP_SHM_LISTING_REFSEQ)))) { tmap_error("the packed reference sequence was not found in shared memory", Exit, SharedMemoryListing); } if(NULL == (index->bwt = tmap_bwt_shm_unpack(tmap_shm_get_buffer(index->shm, TMAP_SHM_LISTING_BWT)))) { tmap_error("the BWT string was not found in shared memory", Exit, SharedMemoryListing); } if(NULL == (index->sa = tmap_sa_shm_unpack(tmap_shm_get_buffer(index->shm, TMAP_SHM_LISTING_SA)))) { tmap_error("the SA was not found in shared memory", Exit, SharedMemoryListing); } tmap_progress_print2("reference data retrieved from shared memory"); } if((index->refseq->len << 1) != index->bwt->seq_len) { tmap_error("refseq and bwt lengths do not match", Exit, OutOfRange); } if((index->refseq->len << 1) != index->sa->seq_len) { tmap_error("refseq and sa lengths do not match", Exit, OutOfRange); } return index; }
tmap_index_t* tmap_index_init(const char *fn_fasta, key_t shm_key) { tmap_index_t *index = NULL; index = tmap_calloc(1, sizeof(tmap_index_t), "index"); index->shm_key = shm_key; // get the reference information if(0 == index->shm_key) { tmap_progress_print("reading in reference data"); index->refseq = tmap_refseq_read(fn_fasta); index->bwt = tmap_bwt_read(fn_fasta); index->sa = tmap_sa_read(fn_fasta); tmap_progress_print2("reference data read in"); } else { tmap_progress_print("retrieving reference data from shared memory"); index->shm = tmap_shm_init(index->shm_key, 0, 0); if(NULL == (index->refseq = tmap_refseq_shm_unpack(tmap_shm_get_buffer(index->shm, TMAP_SHM_LISTING_REFSEQ)))) { tmap_error("the packed reference sequence was not found in shared memory", Exit, SharedMemoryListing); } if(NULL == (index->bwt = tmap_bwt_shm_unpack(tmap_shm_get_buffer(index->shm, TMAP_SHM_LISTING_BWT)))) { tmap_error("the BWT string was not found in shared memory", Exit, SharedMemoryListing); } if(NULL == (index->sa = tmap_sa_shm_unpack(tmap_shm_get_buffer(index->shm, TMAP_SHM_LISTING_SA)))) { tmap_error("the SA was not found in shared memory", Exit, SharedMemoryListing); } tmap_progress_print2("reference data retrieved from shared memory"); } if((index->refseq->len << 1) != index->bwt->seq_len) { tmap_error("refseq and bwt lengths do not match", Exit, OutOfRange); } if((index->refseq->len << 1) != index->sa->seq_len) { tmap_error("refseq and sa lengths do not match", Exit, OutOfRange); } return index; }
int tmap_refseq_pac2fasta_main(int argc, char *argv[]) { int c, help=0, amb=0; uint32_t i, j, k; char *fn_fasta = NULL; tmap_refseq_t *refseq = NULL; while((c = getopt(argc, argv, "avh")) >= 0) { switch(c) { case 'a': amb = 1; break; case 'v': tmap_progress_set_verbosity(1); break; case 'h': help = 1; break; default: return 1; } } if(1 != argc - optind || 1 == help) { tmap_file_fprintf(tmap_file_stderr, "Usage: %s %s [-avh] <in.fasta>\n", PACKAGE, argv[0]); return 1; } fn_fasta = argv[optind]; // Note: 'tmap_file_stdout' should not have been previously modified tmap_file_stdout = tmap_file_fdopen(fileno(stdout), "wb", TMAP_FILE_NO_COMPRESSION); // read in the reference sequence refseq = tmap_refseq_read(fn_fasta, 0); for(i=0;i<refseq->num_annos;i++) { tmap_file_fprintf(tmap_file_stdout, ">%s", refseq->annos[i].name->s); // new line handled later for(j=k=0;j<refseq->annos[i].len;j++) { if(0 == (j % TMAP_REFSEQ_FASTA_LINE_LENGTH)) { tmap_file_fprintf(tmap_file_stdout, "\n"); } if(1 == amb && 0 < refseq->annos[i].num_amb) { // move the next ambiguous region while(k < refseq->annos[i].num_amb && refseq->annos[i].amb_positions_end[k] < j+1) { k++; } // check for the ambiguous region if(k < refseq->annos[i].num_amb && 0 == tmap_interval_overlap(j+1, j+1, refseq->annos[i].amb_positions_start[k], refseq->annos[i].amb_positions_end[k])) { tmap_file_fprintf(tmap_file_stdout, "%c", tmap_iupac_int_to_char[refseq->annos[i].amb_bases[k]]); } else { tmap_file_fprintf(tmap_file_stdout, "%c", "ACGTN"[(int)tmap_refseq_seq_i(refseq, j + refseq->annos[i].offset)]); } } else { tmap_file_fprintf(tmap_file_stdout, "%c", "ACGTN"[(int)tmap_refseq_seq_i(refseq, j + refseq->annos[i].offset)]); } } tmap_file_fprintf(tmap_file_stdout, "\n"); } // destroy tmap_refseq_destroy(refseq); // close the output tmap_file_fclose(tmap_file_stdout); return 0; }