void gt_Sfxmappedrange_usetmp(GtSfxmappedrange *sfxmappedrange, const GtStr *tmpfilename, void **usedptrptr, GtUword numofentries, bool writable) { gt_assert(sfxmappedrange != NULL); sfxmappedrange->ptr = NULL; /*gt_assert(usedptrptr != NULL && *usedptrptr == NULL);*/ sfxmappedrange->usedptrptr = usedptrptr; sfxmappedrange->filename = gt_str_clone(tmpfilename); sfxmappedrange->writable = writable; if (sfxmappedrange->type == GtSfxGtBitsequence) { sfxmappedrange->numofunits = GT_NUMOFINTSFORBITS(numofentries); } else { sfxmappedrange->numofunits = (size_t) numofentries; } gt_log_log("use file %s for table %s ("GT_WU" units of "GT_WU" bytes)", gt_str_get(sfxmappedrange->filename), gt_str_get(sfxmappedrange->tablename), (GtUword) sfxmappedrange->numofunits, (GtUword) sfxmappedrange->sizeofunit); gt_free(*sfxmappedrange->usedptrptr); *sfxmappedrange->usedptrptr = NULL; }
static int gt_readjoiner_assembly_build_contained_reads_list( GtReadjoinerAssemblyArguments *arguments, GtBitsequence **contained, GtError *err) { int had_err = 0; unsigned int i; GtUword nofreads, nofreads_i; GtStr *filename; filename = gt_str_clone(arguments->readset); gt_str_append_cstr(filename, ".0" GT_READJOINER_SUFFIX_CNTLIST); had_err = gt_cntlist_parse(gt_str_get(filename), true, contained, &nofreads, err); for (i = 1U; i < arguments->nspmfiles && had_err == 0; i++) { gt_str_reset(filename); gt_str_append_str(filename, arguments->readset); gt_str_append_char(filename, '.'); gt_str_append_uint(filename, i); gt_str_append_cstr(filename, GT_READJOINER_SUFFIX_CNTLIST); had_err = gt_cntlist_parse(gt_str_get(filename), false, contained, &nofreads_i, err); gt_assert(had_err || nofreads == nofreads_i); } gt_str_delete(filename); return had_err; }
int gt_lua_set_modules_path(lua_State *L, GtError *err) { GtStr *modules_path = NULL, *external_modules_path = NULL, *package_path = NULL; int had_err = 0; gt_error_check(err); gt_assert(L); if (!(modules_path = gt_get_gtdata_path(gt_error_get_progname(err), err))) had_err = -1; if (!had_err) { external_modules_path = gt_str_clone(modules_path); gt_str_append_cstr(modules_path, "/modules/?.lua"); gt_str_append_cstr(external_modules_path, "/modules/external/?.lua"); lua_getglobal(L, "package"); gt_assert(lua_istable(L, -1)); lua_getfield(L, -1, "path"); gt_assert(lua_isstring(L, -1)); package_path = gt_str_new_cstr(lua_tostring(L, -1)); lua_pop(L, 1); gt_str_append_char(package_path, ';'); gt_str_append_str(package_path, modules_path); gt_str_append_char(package_path, ';'); gt_str_append_str(package_path, external_modules_path); lua_pushstring(L, gt_str_get(package_path)); lua_setfield(L, -2, "path"); lua_pop(L, 1); } gt_str_delete(package_path); gt_str_delete(modules_path); gt_str_delete(external_modules_path); return had_err; }
bool pckbuckettableexists(const GtStr *indexname) { GtStr *tmpfilename; bool retval; tmpfilename = gt_str_clone(indexname); gt_str_append_cstr(tmpfilename,PCKBUCKETTABLE); retval = gt_file_exists(gt_str_get(tmpfilename)); gt_str_delete(tmpfilename); return retval; }
static void seqid_store_add(SeqidStore *ss, GtUword filenum, GtUword seqnum, GtStr *seqid, GtUword offset) { gt_assert(ss && seqid); gt_assert(gt_str_length(seqid)); /* is not empty */ gt_assert(filenum < ss->num_of_files); gt_assert(seqnum < ss->num_of_sequences[filenum]); gt_assert(!ss->store[filenum][seqnum]); /* is unused */ ss->store[filenum][seqnum] = gt_str_clone(seqid); ss->offsets[filenum][seqnum] = offset == GT_UNDEF_UWORD ? 1 : offset; }
/*@null@*/ FILE *opensfxfile(const GtStr *indexname, const char *suffix, const char *mode, GtError *err) { GtStr *tmpfilename; FILE *fp; gt_error_check(err); tmpfilename = gt_str_clone(indexname); gt_str_append_cstr(tmpfilename,suffix); fp = gt_fa_fopen(gt_str_get(tmpfilename),mode,err); gt_str_delete(tmpfilename); return fp; }
bool indexfilealreadyexists(const GtStr *indexname,const char *suffix) { struct stat statbuf; GtStr *tmpfilename; tmpfilename = gt_str_clone(indexname); gt_str_append_cstr(tmpfilename,suffix); if (stat(gt_str_get(tmpfilename),&statbuf) == 0) { gt_str_delete(tmpfilename); return true; } gt_str_delete(tmpfilename); return false; }
void *genericmaponlytable(const GtStr *indexname,const char *suffix, size_t *numofbytes,GtError *err) { GtStr *tmpfilename; void *ptr; bool haserr = false; gt_error_check(err); tmpfilename = gt_str_clone(indexname); gt_str_append_cstr(tmpfilename,suffix); ptr = gt_fa_mmap_read(gt_str_get(tmpfilename),numofbytes); if (ptr == NULL) { gt_error_set(err,"cannot map file \"%s\": %s",gt_str_get(tmpfilename), strerror(errno)); haserr = true; } gt_str_delete(tmpfilename); return haserr ? NULL : ptr; }
static int bioseq_fill(GtBioseq *bs, bool recreate, GtError *err) { GtStr *bioseq_index_file = NULL, *bioseq_ois_file = NULL, *bioseq_sds_file = NULL, *bioseq_md5_file = NULL, *bioseq_des_file = NULL; int had_err = 0; GtStr *bioseq_basename; gt_assert(!bs->encseq); if (bs->use_stdin) bioseq_basename = gt_str_new_cstr("stdin"); else bioseq_basename = bs->sequence_file; /* construct file names */ bioseq_index_file = gt_str_clone(bioseq_basename); gt_str_append_cstr(bioseq_index_file, GT_ENCSEQFILESUFFIX); bioseq_ois_file = gt_str_clone(bioseq_basename); gt_str_append_cstr(bioseq_ois_file, GT_OISTABFILESUFFIX); bioseq_sds_file = gt_str_clone(bioseq_basename); gt_str_append_cstr(bioseq_sds_file, GT_SDSTABFILESUFFIX); bioseq_md5_file = gt_str_clone(bioseq_basename); gt_str_append_cstr(bioseq_md5_file, GT_MD5TABFILESUFFIX); bioseq_des_file = gt_str_clone(bioseq_basename); gt_str_append_cstr(bioseq_des_file, GT_DESTABFILESUFFIX); /* construct the bioseq files if necessary */ if (recreate || bs->use_stdin || !gt_file_exists(gt_str_get(bioseq_index_file)) || !gt_file_exists(gt_str_get(bioseq_ois_file)) || !gt_file_exists(gt_str_get(bioseq_sds_file)) || !gt_file_exists(gt_str_get(bioseq_md5_file)) || !gt_file_exists(gt_str_get(bioseq_des_file)) || gt_file_is_newer(gt_str_get(bs->sequence_file), gt_str_get(bioseq_index_file))) { had_err = construct_bioseq_files(bs, bioseq_basename, err); } if (!had_err) { GtEncseqLoader *el = gt_encseq_loader_new(); gt_encseq_loader_disable_autosupport(el); gt_encseq_loader_require_lossless_support(el); gt_encseq_loader_require_description_support(el); gt_encseq_loader_require_md5_support(el); gt_encseq_loader_require_multiseq_support(el); bs->encseq = gt_encseq_loader_load(el, gt_str_get(bioseq_basename), err); if (bs->encseq == NULL) { had_err = -1; gt_assert(gt_error_is_set(err)); } gt_encseq_loader_delete(el); } if (!had_err) { gt_assert(bs->encseq); } /* free */ if (bs->use_stdin) gt_str_delete(bioseq_basename); gt_str_delete(bioseq_index_file); gt_str_delete(bioseq_ois_file); gt_str_delete(bioseq_md5_file); gt_str_delete(bioseq_sds_file); gt_str_delete(bioseq_des_file); return had_err; }
static int gt_condenser_search_runner(GT_UNUSED int argc, GT_UNUSED const char **argv, GT_UNUSED int parsed_args, void *tool_arguments, GtError *err) { GtCondenserSearchArguments *arguments = tool_arguments; int i, had_err = 0; char *querypath = gt_str_get(arguments->querypath); GtStr* coarse_fname = gt_str_new_cstr("coarse_"); char *db_basename = NULL; char *suffix_ptr = NULL; GtTimer *timer = NULL; GtLogger *logger = NULL; gt_error_check(err); gt_assert(arguments); logger = gt_logger_new(arguments->verbose, GT_LOGGER_DEFLT_PREFIX, stderr); db_basename = gt_basename(gt_str_get(arguments->dbpath)); /* if first char is '.' this might be a hidden file */ if (strlen(db_basename) > (size_t) 1 && (suffix_ptr = strrchr(db_basename + 1, '.')) != NULL) { /* remove suffix */ *suffix_ptr = '\0'; } gt_str_append_cstr(coarse_fname, db_basename); gt_str_append_cstr(coarse_fname, ".fas"); gt_free(db_basename); db_basename = NULL; suffix_ptr = NULL; if (arguments->blastn || arguments->blastp) { GtMatch *match; GtMatchIterator *mp = NULL; GtNREncseq *nrencseq = NULL; GtStr *fastaname = gt_str_clone(arguments->dbpath); HitPosition *hits; double eval, raw_eval = 0.0; GtUword coarse_db_len = 0; GtMatchIteratorStatus status; int curr_hits = 0, max_hits = 100; hits = gt_malloc(sizeof (*hits) * (size_t) max_hits); gt_str_append_cstr(fastaname, ".fas"); for (i=0; i < max_hits; i++) { hits[i].range = gt_malloc(sizeof (*hits[i].range) * (size_t) 1); } if (gt_showtime_enabled()) { timer = gt_timer_new_with_progress_description("initialization"); gt_timer_start(timer); } /*extract sequences from compressed database*/ if (!had_err) { nrencseq = gt_n_r_encseq_new_from_file(gt_str_get(arguments->dbpath), logger, err); if (nrencseq == NULL) had_err = -1; } if (!had_err) { if (arguments->ceval == GT_UNDEF_DOUBLE || arguments->feval == GT_UNDEF_DOUBLE) { /* from NCBI BLAST tutorial: E = Kmne^{-lambdaS} calculates E-value for score S with natural scale parameters K for search space size and lambda for the scoring system E = mn2^-S' m being the subject (total) length, n the length of ONE query calculates E-value for bit-score S' */ GtFastaReader *reader; GtCondenserSearchAvg avg = {0,0}; reader = gt_fasta_reader_rec_new(arguments->querypath); had_err = gt_fasta_reader_run(reader, NULL, NULL, gt_condenser_search_cum_moving_avg, &avg, err); if (!had_err) { GtUword S = arguments->bitscore; gt_log_log(GT_WU " queries, avg query size: " GT_WU, avg.count, avg.avg); raw_eval = 1/pow(2.0, (double) S) * avg.avg; gt_logger_log(logger, "Raw E-value set to %.4e", raw_eval); gt_assert(avg.avg != 0); } gt_fasta_reader_delete(reader); } } /*create BLAST database from compressed database fasta file*/ if (!had_err) { if (timer != NULL) gt_timer_show_progress(timer, "create coarse BLAST db", stderr); if (arguments->blastn) had_err = gt_condenser_search_create_nucl_blastdb(gt_str_get(fastaname), err); else had_err = gt_condenser_search_create_prot_blastdb(gt_str_get(fastaname), err); } if (!had_err) { GtBlastProcessCall *call; if (timer != NULL) gt_timer_show_progress(timer, "coarse BLAST run", stderr); if (arguments->blastp) call = gt_blast_process_call_new_prot(); else call = gt_blast_process_call_new_nucl(); gt_blast_process_call_set_db(call, gt_str_get(fastaname)); gt_blast_process_call_set_query(call, querypath); gt_blast_process_call_set_evalue(call, arguments->ceval); gt_blast_process_call_set_num_threads(call, arguments->blthreads); mp = gt_match_iterator_blast_process_new(call, err); if (!mp) had_err = -1; gt_blast_process_call_delete(call); while (!had_err && (status = gt_match_iterator_next(mp, &match, err)) != GT_MATCHER_STATUS_END) { if (status == GT_MATCHER_STATUS_OK) { GtUword hit_seq_id; char string[7]; const char *dbseqid = gt_match_get_seqid2(match); if (sscanf(dbseqid,"%6s" GT_WU, string, &hit_seq_id) == 2) { gt_match_get_range_seq2(match, hits[curr_hits].range); hits[curr_hits].idx = hit_seq_id; gt_match_delete(match); curr_hits++; if (curr_hits == max_hits) { HitPosition *hit_extention; max_hits += 100; hits = gt_realloc(hits, sizeof (*hit_extention) * max_hits); for (i=max_hits - 100; i < max_hits; i++) { hits[i].range = gt_malloc(sizeof (*hits[i].range)); } } } else { gt_error_set(err, "could not parse unique db header %s", dbseqid); had_err = -1; } } else if (status == GT_MATCHER_STATUS_ERROR) { had_err = -1; } } gt_match_iterator_delete(mp); } /*extract sequences*/ if (!had_err) { GtNREncseqDecompressor *decomp; GtFile *coarse_hits; if (timer != NULL) gt_timer_show_progress(timer, "extract coarse search hits", stderr); decomp = gt_n_r_encseq_decompressor_new(nrencseq); coarse_hits = gt_file_new(gt_str_get(coarse_fname),"w", err); /* TODO DW do NOT extract complete uniques! these could be complete chromosomes!! just extract something around it? maybe +- max query length*/ for (i = 0; i < curr_hits; i++) { gt_n_r_encseq_decompressor_add_unique_idx_to_extract(decomp, hits[i].idx); } had_err = gt_n_r_encseq_decompressor_start_unique_extraction(coarse_hits, decomp, &coarse_db_len, err); gt_assert(coarse_db_len != 0); gt_file_delete(coarse_hits); gt_n_r_encseq_decompressor_delete(decomp); } gt_n_r_encseq_delete(nrencseq); /* create BLAST database from decompressed database file */ if (!had_err) { if (timer != NULL) gt_timer_show_progress(timer, "create fine BLAST db", stderr); if (arguments->blastn) had_err = gt_condenser_search_create_nucl_blastdb(gt_str_get(coarse_fname), err); else had_err = gt_condenser_search_create_prot_blastdb(gt_str_get(coarse_fname), err); } /* perform fine BLAST search */ if (!had_err) { GtBlastProcessCall *call; if (timer != NULL) gt_timer_show_progress(timer, "fine BLAST run", stderr); if (arguments->feval == GT_UNDEF_DOUBLE) { eval = raw_eval * coarse_db_len; } else { eval = arguments->feval; } if (arguments->blastp) call = gt_blast_process_call_new_prot(); else call = gt_blast_process_call_new_nucl(); gt_blast_process_call_set_db(call, gt_str_get(coarse_fname)); gt_blast_process_call_set_query(call, querypath); gt_blast_process_call_set_evalue(call, eval); gt_blast_process_call_set_num_threads(call, arguments->blthreads); gt_logger_log(logger, "Fine E-value set to: %.4e (len)" GT_WU, eval, coarse_db_len); mp = gt_match_iterator_blast_process_new(call, err); if (!mp) had_err = -1; gt_blast_process_call_delete(call); if (!had_err) { GtUword numofhits = 0; while (!had_err && (status = gt_match_iterator_next(mp, &match, err)) != GT_MATCHER_STATUS_END) { if (status == GT_MATCHER_STATUS_OK) { GtMatchBlast *matchb = (GtMatchBlast*) match; char *dbseqid = gt_malloc(sizeof (*dbseqid) * 50); GtRange range_seq1; GtRange range_seq2; numofhits++; gt_match_get_range_seq1(match, &range_seq1); gt_match_get_range_seq2(match, &range_seq2); gt_file_xprintf( arguments->outfp, "%s\t%s\t%.2f\t" GT_WU "\t" GT_WU "\t" GT_WU "\t" GT_WU "\t" GT_WU "\t%g\t%.3f\n", gt_match_get_seqid1(match), gt_match_get_seqid2(match), gt_match_blast_get_similarity(matchb), gt_match_blast_get_align_length(matchb), range_seq1.start, range_seq1.end, range_seq2.start, range_seq2.end, gt_match_blast_get_evalue(matchb), (double) gt_match_blast_get_bitscore(matchb)); gt_match_delete(match); gt_free(dbseqid); } else if (status == GT_MATCHER_STATUS_ERROR) { had_err = -1; } } gt_log_log(GT_WU " hits found\n", numofhits); } gt_match_iterator_delete(mp); } if (!had_err) if (timer != NULL) gt_timer_show_progress_final(timer, stderr); gt_timer_delete(timer); /*cleanup*/ for (i=0; i < max_hits; i++) { gt_free(hits[i].range); } gt_free(hits); gt_str_delete(fastaname); } gt_str_delete(coarse_fname); gt_logger_delete(logger); return had_err; }
static int gt_readjoiner_cnttest_runner(GT_UNUSED int argc, GT_UNUSED const char **argv, GT_UNUSED int parsed_args, void *tool_arguments, GT_UNUSED GtError *err) { GtReadjoinerCnttestArguments *arguments = tool_arguments; GtEncseqLoader *el = NULL; GtEncseq *reads = NULL; GtBitsequence *bits = NULL; GtUword nofreads; int had_err = 0; gt_error_check(err); gt_assert(arguments); if (arguments->test == GT_READJOINER_CNTTEST_SHOWLIST) { GtStr *fn = NULL; fn = gt_str_clone(arguments->readset); gt_str_append_cstr(fn, GT_READJOINER_SUFFIX_CNTLIST); had_err = gt_cntlist_parse(gt_str_get(fn), true, &bits, &nofreads, err); gt_str_delete(fn); } else if (arguments->test == GT_READJOINER_CNTTEST_BRUTEFORCE || arguments->test == GT_READJOINER_CNTTEST_KMP) { el = gt_encseq_loader_new(); gt_encseq_loader_drop_description_support(el); gt_encseq_loader_disable_autosupport(el); if (!arguments->singlestrand) gt_encseq_loader_mirror(el); reads = gt_encseq_loader_load(el, gt_str_get(arguments->readset), err); if (reads == NULL) had_err = -1; else { gt_rdj_pairwise_exact(GT_OVLFIND_CNT, reads, !arguments->singlestrand, false, arguments->test == GT_READJOINER_CNTTEST_KMP, 1UL, true, NULL, NULL, false, NULL, &bits, &nofreads); } gt_encseq_delete(reads); gt_encseq_loader_delete(el); } else if (arguments->test == GT_READJOINER_CNTTEST_ESA) { Sequentialsuffixarrayreader *ssar = NULL; GtUword readlength = 0, firstrevcompl = 0; GtLogger *verbose_logger = gt_logger_new(arguments->verbose, GT_LOGGER_DEFLT_PREFIX, stderr); ssar = gt_newSequentialsuffixarrayreaderfromfile(gt_str_get( arguments->readset), SARR_LCPTAB | SARR_SUFTAB | SARR_SSPTAB, true, verbose_logger, err); if (gt_error_is_set(err)) had_err = -1; else { nofreads = gt_encseq_num_of_sequences(ssar->encseq); if (!arguments->singlestrand) { nofreads = GT_DIV2(nofreads); firstrevcompl = nofreads; } GT_INITBITTAB(bits, nofreads); if (!arguments->singlestrand) if (gt_encseq_accesstype_get(ssar->encseq) == GT_ACCESS_TYPE_EQUALLENGTH) readlength = gt_encseq_seqlength(ssar->encseq, 0); (void)gt_contfind_bottomup(ssar, false, bits, arguments->singlestrand ? 0 : firstrevcompl, readlength); } if (ssar != NULL) gt_freeSequentialsuffixarrayreader(&ssar); gt_logger_delete(verbose_logger); } else { gt_assert(false); } if (!had_err) had_err = gt_cntlist_show(bits, nofreads, NULL, false, err); gt_free(bits); return had_err; }