int gt_region_mapping_get_description(GtRegionMapping *rm, GtStr *desc, GtStr *seqid, GtError *err) { int had_err = 0; gt_error_check(err); gt_assert(rm && desc && seqid); if (rm->userawseq) { gt_str_append_cstr(desc, "<rawseq>"); return 0; } had_err = update_seq_col_if_necessary(rm, seqid, err); if (!had_err) { if (gt_md5_seqid_has_prefix(gt_str_get(seqid))) { had_err = gt_seq_col_md5_to_description(rm->seq_col, desc, seqid, err); } return had_err; } if (!had_err) { if (rm->usedesc) { unsigned long filenum, seqnum; gt_assert(rm->seqid2seqnum_mapping); had_err = gt_seqid2seqnum_mapping_map(rm->seqid2seqnum_mapping, gt_str_get(seqid), NULL, &seqnum, &filenum, NULL, err); if (!had_err) { char *cdesc; cdesc = gt_seq_col_get_description(rm->seq_col, filenum, seqnum); gt_assert(cdesc); gt_str_append_cstr(desc, cdesc); gt_free(cdesc); } } else if (rm->useseqno) { unsigned long seqno = GT_UNDEF_ULONG; gt_assert(rm->encseq); if (1 != sscanf(gt_str_get(seqid), "seq%lu", &seqno)) { gt_error_set(err, "seqid '%s' does not have the form 'seqX' " "where X is a sequence number in the encoded " "sequence", gt_str_get(seqid)); had_err = -1; } gt_assert(had_err || seqno != GT_UNDEF_ULONG); if (!had_err && seqno >= gt_encseq_num_of_sequences(rm->encseq)) { gt_error_set(err, "trying to access sequence %lu, but encoded" "sequence contains only %lu sequences", seqno, gt_encseq_num_of_sequences(rm->encseq)); had_err = -1; } if (!had_err) { unsigned long desclen; const char *edesc; edesc = gt_encseq_description(rm->encseq, &desclen, seqno); gt_str_append_cstr_nt(desc, edesc, desclen); } } else if (rm->matchdesc) { const char *md5; /* XXX: not beautiful, but works -- this may be LOTS faster */ had_err = gt_seq_col_grep_desc_md5(rm->seq_col, &md5, seqid, err); if (!had_err) { GtStr *md5_seqid = gt_str_new_cstr(md5); had_err = gt_seq_col_md5_to_description(rm->seq_col, desc, md5_seqid, err); gt_str_delete(md5_seqid); } } else { if (!had_err) { char *cdesc; cdesc = gt_seq_col_get_description(rm->seq_col, 0, 0); gt_assert(cdesc); gt_str_append_cstr(desc, cdesc); gt_free(cdesc); } } } return had_err; }
int gt_gtdata_show_help(const char *progname, GT_UNUSED void *unused, GtError *err) { GtSplitter *splitter; GtStr *doc_file; lua_State *L = NULL; char *prog, *bn; int had_err = 0; gt_error_check(err); gt_assert(progname); prog = gt_cstr_dup(progname); /* create modifiable copy for splitter */ splitter = gt_splitter_new(); gt_splitter_split(splitter, prog, strlen(prog), ' '); doc_file = gt_get_gtdata_path(gt_splitter_get_token(splitter, 0), err); if (!doc_file) had_err = -1; if (!had_err) { gt_str_append_cstr(doc_file, "/doc/"); /* create Lua & push gtdata_doc_dir to Lua */ L = luaL_newstate(); if (!L) { gt_error_set(err, "out of memory (cannot create new Lua state)"); had_err = -1; } } if (!had_err) { luaL_openlibs(L); lua_pushstring(L, gt_str_get(doc_file)); lua_setglobal(L, "gtdata_doc_dir"); /* finish creating doc_file */ if (gt_splitter_size(splitter) == 1) { /* special case for `gt` */ bn = gt_basename(progname); gt_str_append_cstr(doc_file, bn); gt_free(bn); } else { /* general case for the tools */ gt_str_append_cstr(doc_file, gt_splitter_get_token(splitter, gt_splitter_size(splitter) - 1)); } gt_str_append_cstr(doc_file, ".lua"); /* execute doc_file */ if (luaL_loadfile(L, gt_str_get(doc_file)) || lua_pcall(L, 0, 0, 0)) { gt_error_set(err, "cannot run doc file: %s", lua_tostring(L, -1)); had_err = -1; } } /* free */ if (L) lua_close(L); gt_str_delete(doc_file); gt_splitter_delete(splitter); gt_free(prog); return had_err; }
static int gt_speck_runner(int argc, const char **argv, int parsed_args, void *tool_arguments, GtError *err) { GtNodeStream *gff3_in_stream = NULL, *checker_stream = NULL, *a_in_stream = NULL, *a_out_stream = NULL, *feature_stream = NULL, *sort_stream = NULL, *last_stream = NULL; GtNodeVisitor *spec_visitor = NULL; GtSpecResults *res = NULL; GtFeatureIndex *fi = NULL; GtTypeChecker *type_checker = NULL; GtTimer *t = NULL; GtRegionMapping *rm = NULL; GtArray *arr = gt_array_new(sizeof (GtFeatureNode*)); GtStr *prog, *speclib; SpeccheckArguments *arguments = tool_arguments; int had_err = 0; gt_error_check(err); res = gt_spec_results_new(); gt_assert(res); if (gt_file_exists(gt_str_get(arguments->format))) { speclib = gt_str_ref(arguments->format); } else { prog = gt_str_new(); gt_str_append_cstr_nt(prog, gt_error_get_progname(err), gt_cstr_length_up_to_char(gt_error_get_progname(err), ' ')); speclib = gt_get_gtdata_path(gt_str_get(prog), NULL); gt_str_delete(prog); gt_str_append_cstr(speclib, "/spec/output_drivers/"); gt_str_append_str(speclib, arguments->format); if (!gt_file_exists(gt_str_get(speclib))) { gt_error_set(err, "output driver file \"%s\" does not exist", gt_str_get(speclib)); had_err = -1; } } if (!had_err) { spec_visitor = gt_spec_visitor_new(gt_str_get(arguments->specfile), res, err); if (!spec_visitor) { gt_spec_results_delete(res); return -1; } } t = gt_timer_new(); gt_assert(t); /* add region mapping if given */ if (!had_err && gt_seqid2file_option_used(arguments->s2fi)) { rm = gt_seqid2file_region_mapping_new(arguments->s2fi, err); if (!rm) had_err = -1; if (!had_err) gt_spec_visitor_add_region_mapping((GtSpecVisitor*) spec_visitor, rm); } /* set type checker if necessary */ if (!had_err && gt_typecheck_info_option_used(arguments->tci)) { type_checker = gt_typecheck_info_create_type_checker(arguments->tci, err); if (!type_checker) had_err = -1; if (!had_err) gt_spec_visitor_add_type_checker((GtSpecVisitor*) spec_visitor, type_checker); } if (!had_err) { /* set runtime error behaviour */ if (arguments->fail_hard) gt_spec_visitor_fail_on_runtime_error((GtSpecVisitor*) spec_visitor); else gt_spec_visitor_report_runtime_errors((GtSpecVisitor*) spec_visitor); /* redirect warnings */ gt_warning_set_handler(gt_speck_record_warning, res); last_stream = gff3_in_stream = gt_gff3_in_stream_new_unsorted( argc - parsed_args, argv + parsed_args); gt_assert(gff3_in_stream); gt_gff3_in_stream_enable_tidy_mode((GtGFF3InStream*) gff3_in_stream); /* insert sort stream if requested */ if (arguments->sort) { last_stream = sort_stream = gt_sort_stream_new(last_stream); } /* if -provideindex is given, collect input features and index them first */ if (arguments->provideindex) { fi = gt_feature_index_memory_new(); gt_assert(fi); last_stream = feature_stream = gt_feature_stream_new(last_stream, fi); gt_assert(feature_stream); last_stream = a_out_stream = gt_array_out_stream_all_new(last_stream, arr, err); if (!a_out_stream) had_err = -1; gt_timer_start(t); if (!had_err) had_err = gt_node_stream_pull(last_stream, err); if (!had_err) { gt_spec_visitor_add_feature_index((GtSpecVisitor*) spec_visitor, gt_feature_index_ref(fi)); last_stream = a_in_stream = gt_array_in_stream_new(arr, NULL, err); if (!a_in_stream) had_err = -1; } } else { gt_timer_start(t); } if (!had_err) { checker_stream = gt_visitor_stream_new(last_stream, spec_visitor); gt_assert(checker_stream); } /* perform checking */ if (!had_err) had_err = gt_node_stream_pull(checker_stream, err); gt_timer_stop(t); /* reset warnings output */ gt_warning_set_handler(gt_warning_default_handler, NULL); /* output results */ if (!had_err) { GtStr *runtime = gt_str_new(); gt_timer_get_formatted(t, GT_WD ".%06ld", runtime); had_err = gt_spec_results_render_template(res, gt_str_get(speclib), arguments->outfp, gt_str_get(arguments->specfile), arguments->verbose, arguments->colored, gt_str_get(runtime), err); gt_str_delete(runtime); } } /* free */ gt_node_stream_delete(gff3_in_stream); gt_node_stream_delete(a_in_stream); gt_node_stream_delete(a_out_stream); gt_node_stream_delete(checker_stream); gt_node_stream_delete(feature_stream); gt_node_stream_delete(sort_stream); gt_spec_results_delete(res); gt_feature_index_delete(fi); gt_type_checker_delete(type_checker); gt_timer_delete(t); gt_array_delete(arr); gt_str_delete(speclib); return had_err; }
GtNodeVisitor* gt_ltrdigest_pdom_visitor_new(GtPdomModelSet *model, double eval_cutoff, unsigned int chain_max_gap_length, GtPdomCutoff cutoff, GtRegionMapping *rmap, GtError *err) { GtNodeVisitor *nv; GtLTRdigestPdomVisitor *lv; GtStr *cmd; int had_err = 0, i, rval; gt_assert(model && rmap); rval = system("hmmscan -h > /dev/null"); if (rval == -1) { gt_error_set(err, "error executing system(hmmscan)"); return NULL; } if (WEXITSTATUS(rval) != 0) { gt_error_set(err, "cannot find the hmmscan executable in PATH"); return NULL; } nv = gt_node_visitor_create(gt_ltrdigest_pdom_visitor_class()); lv = gt_ltrdigest_pdom_visitor_cast(nv); lv->eval_cutoff = eval_cutoff; lv->cutoff = cutoff; lv->chain_max_gap_length = chain_max_gap_length; lv->rmap = rmap; lv->output_all_chains = false; lv->tag = gt_str_new_cstr(GT_LTRDIGEST_TAG); for (i = 0; i < 3; i++) { lv->fwd[i] = gt_str_new(); lv->rev[i] = gt_str_new(); } if (!had_err) { cmd = gt_str_new_cstr("hmmscan --cpu "); gt_str_append_uint(cmd, gt_jobs); gt_str_append_cstr(cmd, " "); switch (cutoff) { case GT_PHMM_CUTOFF_GA: gt_str_append_cstr(cmd, "--cut_ga"); break; case GT_PHMM_CUTOFF_TC: gt_str_append_cstr(cmd, "--cut_tc"); break; case GT_PHMM_CUTOFF_NONE: gt_str_append_cstr(cmd, "--domE "); gt_str_append_double(cmd, eval_cutoff, 50); break; } gt_str_append_cstr(cmd, " "); gt_str_append_cstr(cmd, gt_pdom_model_set_get_filename(model)); gt_str_append_cstr(cmd, " -"); lv->cmdline = cmd; lv->args = gt_cstr_split(gt_str_get(lv->cmdline), ' '); gt_log_log("HMMER cmdline: %s", gt_str_get(cmd)); } return nv; }
static int gt_ltrdigest_pdom_visitor_parse_alignments(GT_UNUSED GtLTRdigestPdomVisitor *lv, GtHMMERParseStatus *status, char *buf, FILE *instream, GtError *err) { int had_err = 0, cur_domain = GT_UNDEF_INT, line = GT_UNDEF_INT; bool first_align_line = false; int mod_val = 4; GtHMMERSingleHit *hit = NULL; gt_assert(lv && instream && status); gt_error_check(err); had_err = pdom_parser_get_next_line(buf, instream, err); gt_assert(buf != NULL); while (!had_err && strncmp("Internal pipeline statistics", buf, (size_t) 28) && strncmp(">>", buf, (size_t) 2)) { if ((buf[2] == '=' && buf[3] == '=')) { buf[17] = '\0'; cur_domain = atoi(buf+12); gt_assert(cur_domain != GT_UNDEF_INT && cur_domain > 0); hit = gt_hmmer_parse_status_get_hit(status, (GtUword) cur_domain - 1); gt_assert(hit && !hit->alignment); hit->alignment = gt_str_new(); hit->aastring = gt_str_new(); first_align_line = true; mod_val = 4; } else { bool run = true; char junkbuf[BUFSIZ]; if (first_align_line) { /* some models contain consensus structure annotation -- in this case there is an additional line in the output which must be taken into account */ line = 0; if (1 == sscanf(buf, "%*s %s", junkbuf)) { if (0 == strcmp(junkbuf, "CS") || 0 == strcmp(junkbuf, "RF")) { mod_val = 5; line = -1; run = false; } } first_align_line = false; } if (run) { gt_assert(hit && hit->alignment); gt_str_append_cstr(hit->alignment, buf); gt_str_append_char(hit->alignment, '\n'); switch (line % mod_val) { case 1: gt_str_append_char(hit->alignment, '\n'); break; case 2: { GT_UNUSED char *b = buf; b = strtok(buf, " "); gt_assert(strspn(b, "012+-") == (size_t) 2); b = strtok(NULL, " "); gt_assert(strlen(b) > 0); b = strtok(NULL, " "); gt_ltrdigest_pdom_visitor_add_aaseq(b, hit->aastring); } break; } } line++; } had_err = pdom_parser_get_next_line(buf, instream, err); } return had_err; }
static int gt_sketch_page_runner(GT_UNUSED int argc, const char **argv, int parsed_args, void *tool_arguments, GtError *err) { SketchPageArguments *arguments = tool_arguments; int had_err = 0; GtFeatureIndex *features = NULL; GtRange qry_range, sequence_region_range; GtStyle *sty = NULL; GtStr *prog, *gt_style_file; GtDiagram *d = NULL; GtLayout *l = NULL; GtBioseq *bioseq = NULL; GtCanvas *canvas = NULL; char *seqid = NULL; const char *outfile = NULL; GtUword start, height, num_pages = 0; double offsetpos, usable_height; cairo_surface_t *surf = NULL; cairo_t *cr = NULL; bool has_seqid; GtTextWidthCalculator *twc; gt_error_check(err); features = gt_feature_index_memory_new(); if (cairo_version() < CAIRO_VERSION_ENCODE(1, 8, 6)) gt_warning("Your cairo library (version %s) is older than version 1.8.6! " "These versions contain a bug which may result in " "corrupted PDF output!", cairo_version_string()); /* get style */ sty = gt_style_new(err); if (gt_str_length(arguments->stylefile) == 0) { prog = gt_str_new(); gt_str_append_cstr_nt(prog, argv[0], gt_cstr_length_up_to_char(argv[0], ' ')); gt_style_file = gt_get_gtdata_path(gt_str_get(prog), err); gt_str_delete(prog); gt_str_append_cstr(gt_style_file, "/sketch/default.style"); } else { gt_style_file = gt_str_ref(arguments->stylefile); } had_err = gt_style_load_file(sty, gt_str_get(gt_style_file), err); if (!had_err) { had_err = gt_feature_index_has_seqid(features, &has_seqid, gt_str_get(arguments->seqid), err); } outfile = argv[parsed_args]; if (!had_err) { /* get features */ had_err = gt_feature_index_add_gff3file(features, argv[parsed_args+1], err); if (!had_err && gt_str_length(arguments->seqid) == 0) { seqid = gt_feature_index_get_first_seqid(features, err); if (seqid == NULL) { gt_error_set(err, "GFF input file must contain a sequence region!"); had_err = -1; } } else if (!had_err && !has_seqid) { gt_error_set(err, "sequence region '%s' does not exist in GFF input file", gt_str_get(arguments->seqid)); had_err = -1; } else if (!had_err) seqid = gt_str_get(arguments->seqid); } /* set text */ if (gt_str_length(arguments->text) == 0) { gt_str_delete(arguments->text); arguments->text = gt_str_new_cstr(argv[parsed_args+1]); } if (!had_err) { /* set display range */ had_err = gt_feature_index_get_range_for_seqid(features, &sequence_region_range, seqid, err); } if (!had_err) { qry_range.start = (arguments->range.start == GT_UNDEF_UWORD ? sequence_region_range.start : arguments->range.start); qry_range.end = (arguments->range.end == GT_UNDEF_UWORD ? sequence_region_range.end : arguments->range.end); /* set output format */ if (strcmp(gt_str_get(arguments->format), "pdf") == 0) { surf = cairo_pdf_surface_create(outfile, mm_to_pt(arguments->pwidth), mm_to_pt(arguments->pheight)); } else if (strcmp(gt_str_get(arguments->format), "ps") == 0) { surf = cairo_ps_surface_create(outfile, mm_to_pt(arguments->pwidth), mm_to_pt(arguments->pheight)); } gt_log_log("created page with %.2f:%.2f dimensions\n", mm_to_pt(arguments->pwidth), mm_to_pt(arguments->pheight)); offsetpos = TEXT_SPACER + arguments->theight + TEXT_SPACER; usable_height = mm_to_pt(arguments->pheight) - arguments->theight - arguments->theight - 4*TEXT_SPACER; if (gt_str_length(arguments->seqfile) > 0) { bioseq = gt_bioseq_new(gt_str_get(arguments->seqfile), err); } cr = cairo_create(surf); cairo_set_font_size(cr, 8); twc = gt_text_width_calculator_cairo_new(cr, sty, err); for (start = qry_range.start; start <= qry_range.end; start += arguments->width) { GtRange single_range; GtCustomTrack *ct = NULL; const char *seq; single_range.start = start; single_range.end = start + arguments->width; if (had_err) break; d = gt_diagram_new(features, seqid, &single_range, sty, err); if (!d) { had_err = -1; break; } if (bioseq) { seq = gt_bioseq_get_sequence(bioseq, 0); ct = gt_custom_track_gc_content_new(seq, gt_bioseq_get_sequence_length(bioseq, 0), 800, 70, 0.4, true); gt_diagram_add_custom_track(d, ct); } l = gt_layout_new_with_twc(d, mm_to_pt(arguments->width), sty, twc, err); had_err = gt_layout_get_height(l, &height, err); if (!had_err) { if (gt_double_smaller_double(usable_height - 10 - 2*TEXT_SPACER - arguments->theight, offsetpos + height)) { draw_header(cr, gt_str_get(arguments->text), argv[parsed_args+1], seqid, num_pages, mm_to_pt(arguments->pwidth), mm_to_pt(arguments->pheight), arguments->theight); cairo_show_page(cr); offsetpos = TEXT_SPACER + arguments->theight + TEXT_SPACER; num_pages++; } canvas = gt_canvas_cairo_context_new(sty, cr, offsetpos, mm_to_pt(arguments->pwidth), height, NULL, err); if (!canvas) had_err = -1; offsetpos += height; if (!had_err) had_err = gt_layout_sketch(l, canvas, err); } gt_canvas_delete(canvas); gt_layout_delete(l); gt_diagram_delete(d); if (ct) gt_custom_track_delete(ct); } draw_header(cr, gt_str_get(arguments->text), argv[parsed_args+1], seqid, num_pages, mm_to_pt(arguments->pwidth), mm_to_pt(arguments->pheight), arguments->theight); cairo_show_page(cr); num_pages++; gt_log_log("finished, should be "GT_WU" pages\n", num_pages); gt_text_width_calculator_delete(twc); cairo_destroy(cr); cairo_surface_flush(surf); cairo_surface_finish(surf); cairo_surface_destroy(surf); cairo_debug_reset_static_data(); if (bioseq) gt_bioseq_delete(bioseq); gt_style_delete(sty); gt_free(seqid); gt_str_delete(gt_style_file); gt_feature_index_delete(features); } return had_err; }
void gt_blast_process_call_set_opt(GtBlastProcessCall *call, const char *opt) { gt_str_append_cstr(call->str, opt); }
static int split_fasta_file(const char *filename, unsigned long max_filesize, bool force, GtError *err) { GtFile *srcfp = NULL, *destfp = NULL; GtStr *destfilename = NULL; unsigned long filenum = 0, bytecount = 0, separator_pos; int read_bytes, had_err = 0; char buf[BUFSIZ]; gt_error_check(err); gt_assert(filename && max_filesize); /* open source file */ srcfp = gt_file_xopen(filename, "r"); gt_assert(srcfp); /* read start characters */ if ((read_bytes = gt_file_xread(srcfp, buf, BUFSIZ)) == 0) { gt_error_set(err, "file \"%s\" is empty", filename); had_err = -1; } bytecount += read_bytes; /* make sure the file is in fasta format */ if (!had_err && buf[0] != '>') { gt_error_set(err, "file is not in FASTA format"); had_err = -1; } if (!had_err) { /* open destination file */ destfilename = gt_str_new(); gt_str_append_cstr_nt(destfilename, filename, gt_file_basename_length(filename)); gt_str_append_char(destfilename, '.'); gt_str_append_ulong(destfilename, ++filenum); gt_str_append_cstr(destfilename, gt_file_mode_suffix(gt_file_mode(srcfp))); if (!(destfp = gt_outputfile_xopen_forcecheck(gt_str_get(destfilename), "w", force, err))) { had_err = -1; } if (!had_err) gt_file_xwrite(destfp, buf, read_bytes); while (!had_err && (read_bytes = gt_file_xread(srcfp, buf, BUFSIZ)) != 0) { if (bytecount + read_bytes > max_filesize) { int offset = bytecount < max_filesize ? max_filesize - bytecount : 0; if ((separator_pos = buf_contains_separator(buf, offset, read_bytes))) { separator_pos--; gt_assert(separator_pos < read_bytes); if (separator_pos) gt_file_xwrite(destfp, buf, separator_pos); /* close current file */ gt_file_delete(destfp); /* open new file */ gt_str_reset(destfilename); gt_str_append_cstr_nt(destfilename, filename, gt_file_basename_length(filename)); gt_str_append_char(destfilename, '.'); gt_str_append_ulong(destfilename, ++filenum); gt_str_append_cstr(destfilename, gt_file_mode_suffix(gt_file_mode(srcfp))); if (!(destfp = gt_outputfile_xopen_forcecheck(gt_str_get(destfilename), "w", force, err))) { had_err = -1; break; } bytecount = read_bytes - separator_pos; /* reset */ gt_assert(buf[separator_pos] == '>'); gt_file_xwrite(destfp, buf + separator_pos, read_bytes - separator_pos); continue; } } bytecount += read_bytes; gt_file_xwrite(destfp, buf, read_bytes); } } /* free */ gt_str_delete(destfilename); /* close current file */ gt_file_delete(destfp); /* close source file */ gt_file_delete(srcfp); return had_err; }
GtPdomModelSet* gt_pdom_model_set_new(GtStrArray *hmmfiles, GtError *err) { GtStr *concat_dbnames, *cmdline, *indexfilename = NULL; GtUword i; char *md5_hash, ch; const char *tmpdir; int had_err = 0, rval; FILE *dest; GtPdomModelSet *pdom_model_set; gt_assert(hmmfiles); gt_error_check(err); rval = system("hmmpress -h > /dev/null"); if (rval == -1) { gt_error_set(err, "error executing system(hmmpress)"); return NULL; } #ifndef _WIN32 if (WEXITSTATUS(rval) != 0) { gt_error_set(err, "cannot find the hmmpress executable in PATH"); return NULL; } #else /* XXX */ gt_error_set(err, "hmmpress for Windows not implemented"); return NULL; #endif pdom_model_set = gt_calloc((size_t) 1, sizeof (GtPdomModelSet)); concat_dbnames = gt_str_new(); for (i = 0; !had_err && i < gt_str_array_size(hmmfiles); i++) { const char *filename = gt_str_array_get(hmmfiles, i); if (!gt_file_exists(filename)) { gt_error_set(err, "invalid HMM file: %s", filename); gt_str_delete(concat_dbnames); gt_free(pdom_model_set); return NULL; } else { gt_str_append_cstr(concat_dbnames, filename); } } if (!had_err) { pdom_model_set->filename = gt_str_new(); if (!(tmpdir = getenv("TMPDIR"))) tmpdir = "/tmp"; gt_str_append_cstr(pdom_model_set->filename, tmpdir); gt_str_append_char(pdom_model_set->filename, GT_PATH_SEPARATOR); md5_hash = gt_md5_fingerprint(gt_str_get(concat_dbnames), gt_str_length(concat_dbnames)); gt_str_append_cstr(pdom_model_set->filename, md5_hash); gt_free(md5_hash); gt_str_delete(concat_dbnames); indexfilename = gt_str_new_cstr(gt_str_get(pdom_model_set->filename)); gt_str_append_cstr(indexfilename, GT_HMM_INDEX_SUFFIX); } if (!gt_file_exists(gt_str_get(indexfilename))) { dest = fopen(gt_str_get(pdom_model_set->filename), "w+"); if (!dest) { gt_error_set(err, "could not create file %s", gt_str_get(pdom_model_set->filename)); had_err = -1; } if (!had_err) { for (i = 0; !had_err && i < gt_str_array_size(hmmfiles); i++) { FILE *source; const char *filename = gt_str_array_get(hmmfiles, i); source = fopen(filename, "r"); if (!source) { gt_error_set(err, "could not open HMM file %s", filename); had_err = -1; } if (!had_err) { while (( ch = fgetc(source)) != EOF) (void) fputc(ch, dest); (void) fclose(source); } } (void) fclose(dest); } /* XXX: read hmmer path from env */ cmdline = gt_str_new_cstr("hmmpress -f "); gt_str_append_str(cmdline, pdom_model_set->filename); gt_str_append_cstr(cmdline, "> /dev/null"); /* XXX: portability? */ rval = system(gt_str_get(cmdline)); gt_str_delete(cmdline); if (rval == -1) { gt_error_set(err, "error executing system(hmmpress)"); return NULL; } #ifndef _WIN32 if (WEXITSTATUS(rval) != 0) { gt_error_set(err, "an error occurred during HMM preprocessing"); had_err = -1; } #else gt_error_set(err, "WEXITSTATUS not implemented on Windows"); had_err = -1; #endif } if (had_err) { gt_pdom_model_set_delete(pdom_model_set); pdom_model_set = NULL; } gt_str_delete(indexfilename); return pdom_model_set; }
int gt_mapfmindex (Fmindex *fmindex,const char *indexname, GtLogger *logger,GtError *err) { FILE *fpin = NULL; bool haserr = false, storeindexpos = true; GtSpecialcharinfo specialcharinfo; gt_error_check(err); fmindex->mappedptr = NULL; fmindex->bwtformatching = NULL; fmindex->alphabet = NULL; fpin = gt_fa_fopen_with_suffix(indexname,FMASCIIFILESUFFIX,"rb",err); if (fpin == NULL) { haserr = true; } if (!haserr) { if (scanfmafileviafileptr(fmindex, &specialcharinfo, &storeindexpos, indexname, fpin, logger, err) != 0) { haserr = true; } } gt_fa_xfclose(fpin); if (!haserr) { fmindex->bwtformatching = mapbwtencoding(indexname,logger,err); if (fmindex->bwtformatching == NULL) { haserr = true; } } if (!haserr) { fmindex->specpos.nextfreeGtPairBwtidx = (GtUword) gt_determinenumberofspecialstostore(&specialcharinfo); fmindex->specpos.spaceGtPairBwtidx = NULL; fmindex->specpos.allocatedGtPairBwtidx = 0; fmindex->alphabet = gt_alphabet_ref( gt_encseq_alphabet(fmindex->bwtformatching)); if (fmindex->alphabet == NULL) { haserr = true; } } if (!haserr) { GtStr *tmpfilename; gt_computefmkeyvalues (fmindex, &specialcharinfo, fmindex->bwtlength, fmindex->log2bsize, fmindex->log2markdist, gt_alphabet_num_of_chars(fmindex->alphabet), fmindex->suffixlength, storeindexpos); tmpfilename = gt_str_new_cstr(indexname); gt_str_append_cstr(tmpfilename,FMDATAFILESUFFIX); if (gt_fillfmmapspecstartptr(fmindex,storeindexpos,tmpfilename,err) != 0) { haserr = true; } gt_str_delete(tmpfilename); } if (haserr) { gt_freefmindex(fmindex); } return haserr ? -1 : 0; }
int gt_gtf_parser_parse(GtGTFParser *parser, GtQueue *genome_nodes, GtStr *filenamestr, GtFile *fpin, bool be_tolerant, GtError *err) { GtStr *seqid_str, *source_str, *line_buffer; char *line; size_t line_length; GtUword i, line_number = 0; GtGenomeNode *gn; GtRange range; GtPhase phase_value; GtStrand gt_strand_value; GtSplitter *splitter, *attribute_splitter; float score_value; char *seqname, *source, *feature, *start, *end, *score, *strand, *frame, *attributes, *token, *gene_id, *gene_name = NULL, *transcript_id, *transcript_name = NULL, **tokens; GtHashmap *transcript_id_hash; /* map from transcript id to array of genome nodes */ GtArray *gt_genome_node_array; ConstructionInfo cinfo; GTF_feature_type gtf_feature_type; GT_UNUSED bool gff_type_is_valid = false; const char *type = NULL; const char *filename; bool score_is_defined; int had_err = 0; gt_assert(parser && genome_nodes); gt_error_check(err); filename = gt_str_get(filenamestr); /* alloc */ line_buffer = gt_str_new(); splitter = gt_splitter_new(), attribute_splitter = gt_splitter_new(); #define HANDLE_ERROR \ if (had_err) { \ if (be_tolerant) { \ fprintf(stderr, "skipping line: %s\n", gt_error_get(err)); \ gt_error_unset(err); \ gt_str_reset(line_buffer); \ had_err = 0; \ continue; \ } \ else { \ had_err = -1; \ break; \ } \ } while (gt_str_read_next_line_generic(line_buffer, fpin) != EOF) { line = gt_str_get(line_buffer); line_length = gt_str_length(line_buffer); line_number++; gene_name = gene_id = transcript_id = transcript_name = NULL; had_err = 0; if (line_length == 0) { gt_warning("skipping blank line " GT_WU " in file \"%s\"", line_number, filename); } else if (line[0] == '#') { /* storing comment */ if (line_length >= 2 && line[1] == '#') gn = gt_comment_node_new(line+2); /* store '##' line as '#' line */ else gn = gt_comment_node_new(line+1); gt_genome_node_set_origin(gn, filenamestr, line_number); gt_queue_add(genome_nodes, gn); } else { bool stop_codon = false; char *tokendup, *attrkey; GtStrArray *attrkeys, *attrvals; /* process tab delimited GTF line */ gt_splitter_reset(splitter); gt_splitter_split(splitter, line, line_length, '\t'); if (gt_splitter_size(splitter) != 9UL) { gt_error_set(err, "line " GT_WU " in file \"%s\" contains " GT_WU " tab (\\t) " "separated fields instead of 9", line_number, filename, gt_splitter_size(splitter)); had_err = -1; break; } tokens = gt_splitter_get_tokens(splitter); seqname = tokens[0]; source = tokens[1]; feature = tokens[2]; start = tokens[3]; end = tokens[4]; score = tokens[5]; strand = tokens[6]; frame = tokens[7]; attributes = tokens[8]; /* parse feature */ if (GTF_feature_type_get(>f_feature_type, feature) == -1) { /* we skip unknown features */ fprintf(stderr, "skipping line " GT_WU " in file \"%s\": unknown " "feature: \"%s\"\n", line_number, filename, feature); gt_str_reset(line_buffer); continue; } /* translate into GFF3 feature type */ switch (gtf_feature_type) { case GTF_stop_codon: stop_codon = true; case GTF_CDS: gff_type_is_valid = gt_type_checker_is_valid(parser->type_checker, gt_ft_CDS); type = gt_ft_CDS; break; case GTF_exon: gff_type_is_valid = gt_type_checker_is_valid(parser->type_checker, gt_ft_exon); type = gt_ft_exon; break; case GTF_start_codon: /* we can skip the start codons, they are part of the CDS anyway */ gt_str_reset(line_buffer); continue; } gt_assert(gff_type_is_valid); /* parse the range */ had_err = gt_parse_range(&range, start, end, line_number, filename, err); HANDLE_ERROR; /* process seqname (we have to do it here because we need the range) */ gt_region_node_builder_add_region(parser->region_node_builder, seqname, range); /* parse the score */ had_err = gt_parse_score(&score_is_defined, &score_value, score, line_number, filename, err); HANDLE_ERROR; /* parse the strand */ had_err = gt_parse_strand(>_strand_value, strand, line_number, filename, err); HANDLE_ERROR; /* parse the frame */ had_err = gt_parse_phase(&phase_value, frame, line_number, filename, err); HANDLE_ERROR; /* parse the attributes */ attrkeys = gt_str_array_new(); attrvals = gt_str_array_new(); gt_splitter_reset(attribute_splitter); gene_id = NULL; transcript_id = NULL; gt_splitter_split(attribute_splitter, attributes, strlen(attributes), ';'); for (i = 0; i < gt_splitter_size(attribute_splitter); i++) { token = gt_splitter_get_token(attribute_splitter, i); /* skip leading blanks */ while (*token == ' ') token++; tokendup = gt_cstr_dup(token); attrkey = strtok(tokendup, " "); if (attrkey) { char *attrval = strtok(NULL, " "); if (attrval == NULL || strcmp(attrval, "") == 0 || strcmp(attrval, "\"\"") == 0) { gt_error_set(err, "missing value to attribute \"%s\" on line " GT_WU " in file \"%s\"", attrkey,line_number,filename); had_err = -1; } HANDLE_ERROR; if (*attrval == '"') attrval++; if (attrval[strlen(attrval)-1] == '"') attrval[strlen(attrval)-1] = '\0'; gt_assert(attrkey && strlen(attrkey) > 0); gt_assert(attrval && strlen(attrval) > 0); gt_str_array_add_cstr(attrkeys, attrkey); gt_str_array_add_cstr(attrvals, attrval); } gt_free(tokendup); /* look for the two mandatory attributes */ if (strncmp(token, GENE_ID_ATTRIBUTE, strlen(GENE_ID_ATTRIBUTE)) == 0) { if (strlen(token) + 2 < strlen(GENE_ID_ATTRIBUTE)) { gt_error_set(err, "missing value to attribute \"%s\" on line " GT_WU "in file \"%s\"", GENE_ID_ATTRIBUTE, line_number, filename); had_err = -1; } HANDLE_ERROR; gene_id = token + strlen(GENE_ID_ATTRIBUTE) + 1; if (*gene_id == '"') gene_id++; if (gene_id[strlen(gene_id)-1] == '"') gene_id[strlen(gene_id)-1] = '\0'; } else if (strncmp(token, TRANSCRIPT_ID_ATTRIBUTE, strlen(TRANSCRIPT_ID_ATTRIBUTE)) == 0) { if (strlen(token) + 2 < strlen(TRANSCRIPT_ID_ATTRIBUTE)) { gt_error_set(err, "missing value to attribute \"%s\" on line " GT_WU "in file \"%s\"", TRANSCRIPT_ID_ATTRIBUTE, line_number, filename); had_err = -1; } HANDLE_ERROR; transcript_id = token + strlen(TRANSCRIPT_ID_ATTRIBUTE) + 1; if (*transcript_id == '"') transcript_id++; if (transcript_id[strlen(transcript_id)-1] == '"') transcript_id[strlen(transcript_id)-1] = '\0'; } else if (strncmp(token, GENE_NAME_ATTRIBUTE, strlen(GENE_NAME_ATTRIBUTE)) == 0) { if (strlen(token) + 2 < strlen(GENE_NAME_ATTRIBUTE)) { gt_error_set(err, "missing value to attribute \"%s\" on line " GT_WU "in file \"%s\"", GENE_NAME_ATTRIBUTE, line_number, filename); had_err = -1; } HANDLE_ERROR; gene_name = token + strlen(GENE_NAME_ATTRIBUTE) + 1; /* for output we want to strip quotes */ if (*gene_name == '"') gene_name++; if (gene_name[strlen(gene_name)-1] == '"') gene_name[strlen(gene_name)-1] = '\0'; } else if (strncmp(token, TRANSCRIPT_NAME_ATTRIBUTE, strlen(TRANSCRIPT_NAME_ATTRIBUTE)) == 0) { if (strlen(token) + 2 < strlen(TRANSCRIPT_NAME_ATTRIBUTE)) { gt_error_set(err, "missing value to attribute \"%s\" on line " GT_WU "in file \"%s\"", TRANSCRIPT_NAME_ATTRIBUTE, line_number, filename); had_err = -1; } HANDLE_ERROR; transcript_name = token + strlen(TRANSCRIPT_NAME_ATTRIBUTE) + 1; /* for output we want to strip quotes */ if (*transcript_name == '"') transcript_name++; if (transcript_name[strlen(transcript_name)-1] == '"') transcript_name[strlen(transcript_name)-1] = '\0'; } } /* check for the mandatory attributes */ if (!gene_id) { gt_error_set(err, "missing attribute \"%s\" on line " GT_WU " in file \"%s\"", GENE_ID_ATTRIBUTE, line_number, filename); had_err = -1; } HANDLE_ERROR; if (!transcript_id) { gt_error_set(err, "missing attribute \"%s\" on line " GT_WU " in file \"%s\"", TRANSCRIPT_ID_ATTRIBUTE, line_number, filename); had_err = -1; } HANDLE_ERROR; /* process the mandatory attributes */ if (!(transcript_id_hash = gt_hashmap_get(parser->gene_id_hash, gene_id))) { transcript_id_hash = gt_hashmap_new(GT_HASH_STRING, gt_free_func, (GtFree) gt_array_delete); gt_hashmap_add(parser->gene_id_hash, gt_cstr_dup(gene_id), transcript_id_hash); } gt_assert(transcript_id_hash); if (!(gt_genome_node_array = gt_hashmap_get(transcript_id_hash, transcript_id))) { gt_genome_node_array = gt_array_new(sizeof (GtGenomeNode*)); gt_hashmap_add(transcript_id_hash, gt_cstr_dup(transcript_id), gt_genome_node_array); } gt_assert(gt_genome_node_array); /* save optional gene_name and transcript_name attributes */ if (transcript_name && strlen(transcript_name) > 0 && !gt_hashmap_get(parser->transcript_id_to_name_mapping, transcript_id)) { gt_hashmap_add(parser->transcript_id_to_name_mapping, gt_cstr_dup(transcript_id), gt_cstr_dup(transcript_name)); } if (gene_name && strlen(gene_name) > 0 && !gt_hashmap_get(parser->gene_id_to_name_mapping, gene_id)) { gt_hashmap_add(parser->gene_id_to_name_mapping, gt_cstr_dup(gene_id), gt_cstr_dup(gene_name)); } /* get seqid */ seqid_str = gt_hashmap_get(parser->seqid_to_str_mapping, seqname); if (!seqid_str) { seqid_str = gt_str_new_cstr(seqname); gt_hashmap_add(parser->seqid_to_str_mapping, gt_str_get(seqid_str), seqid_str); } gt_assert(seqid_str); /* construct the new feature */ gn = gt_feature_node_new(seqid_str, type, range.start, range.end, gt_strand_value); gt_genome_node_set_origin(gn, filenamestr, line_number); if (stop_codon) { gt_feature_node_add_attribute((GtFeatureNode*) gn, GTF_PARSER_STOP_CODON_FLAG, "true"); } for (i = 0; i < gt_str_array_size(attrkeys); i++) { GtFeatureNode *fn = (GtFeatureNode *)gn; const char *key = gt_str_array_get(attrkeys, i); const char *val = gt_str_array_get(attrvals, i); /* Not a comprehensive solution to ensure correct encoding, just bare minimum required to get Cufflinks output parsed */ if (strcmp(val, "=") == 0) val = "%26"; if (gt_feature_node_get_attribute(fn, key) != NULL) { const char *oldval = gt_feature_node_get_attribute(fn, key); GtStr *newval = gt_str_new_cstr(oldval); gt_str_append_char(newval, ','); gt_str_append_cstr(newval, val); gt_feature_node_set_attribute(fn, key, gt_str_get(newval)); gt_str_delete(newval); } else gt_feature_node_add_attribute(fn, key, val); } gt_str_array_delete(attrkeys); gt_str_array_delete(attrvals); /* set source */ source_str = gt_hashmap_get(parser->source_to_str_mapping, source); if (!source_str) { source_str = gt_str_new_cstr(source); gt_hashmap_add(parser->source_to_str_mapping, gt_str_get(source_str), source_str); } gt_assert(source_str); gt_feature_node_set_source((GtFeatureNode*) gn, source_str); if (score_is_defined) gt_feature_node_set_score((GtFeatureNode*) gn, score_value); if (phase_value != GT_PHASE_UNDEFINED) gt_feature_node_set_phase((GtFeatureNode*) gn, phase_value); gt_array_add(gt_genome_node_array, gn); } gt_str_reset(line_buffer); } /* process all region nodes */ if (!had_err) gt_region_node_builder_build(parser->region_node_builder, genome_nodes); /* process all feature nodes */ cinfo.genome_nodes = genome_nodes; cinfo.tidy = be_tolerant; cinfo.gene_id_to_name_mapping = parser->gene_id_to_name_mapping; cinfo.transcript_id_to_name_mapping = parser->transcript_id_to_name_mapping; if (!had_err) { had_err = gt_hashmap_foreach(parser->gene_id_hash, construct_genes, &cinfo, err); } gt_hashmap_foreach(parser->gene_id_hash, delete_genes, NULL, err); /* free */ gt_splitter_delete(splitter); gt_splitter_delete(attribute_splitter); gt_str_delete(line_buffer); return had_err; }
GtR* gtr_new(GtError *err) { GtR *gtr; char *seedstr = NULL; int had_err = 0; #ifndef WITHOUT_CAIRO GtStr *style_file = NULL; #endif gtr = gt_calloc(1, sizeof (GtR)); if ((seedstr = getenv("GT_SEED"))) { if (gt_parse_uint(>r->seed, seedstr) != 0) { gt_error_set(err, "invalid seed in GT_SEED environment variable: %s", seedstr); had_err = -1; } } else gtr->seed = 0; if (!had_err) { gtr->debugfp = gt_str_new(); gtr->testspacepeak = gt_str_new(); gtr->test_only = gt_str_new(); gtr->manoutdir = gt_str_new(); gtr->L = luaL_newstate(); if (!gtr->L) { gt_error_set(err, "out of memory (cannot create new lua state)"); had_err = -1; } } if (!had_err) { luaL_openlibs(gtr->L); /* open the standard libraries */ gt_lua_open_lib(gtr->L); /* open the GenomeTools library */ lua_pushcfunction(gtr->L, luaopen_lpeg); lua_pushstring(gtr->L, "lpeg"); lua_call(gtr->L, 1, 0); /* open LPeg library */ lua_pushcfunction(gtr->L, luaopen_md5_core); lua_pushstring(gtr->L, "md5"); lua_call(gtr->L, 1, 0); /* open MD5 library */ lua_pushcfunction(gtr->L, luaopen_lfs); lua_pushstring(gtr->L, "lfs"); lua_call(gtr->L, 1, 0); /* open Lua filesystem */ lua_pushcfunction(gtr->L, luaopen_des56); lua_pushstring(gtr->L, "des56"); lua_call(gtr->L, 1, 0); /* open DES56 library */ had_err = gt_lua_set_modules_path(gtr->L, err); } #ifndef WITHOUT_CAIRO if (!had_err) { lua_settop(gtr->L, 0); if (!(gtr->style = gt_style_new_with_state(gtr->L))) had_err = -1; } if (!had_err) { if (!(style_file = gt_get_gtdata_path(gt_error_get_progname(err), err))) had_err = -1; } if (!had_err) { gt_str_append_cstr(style_file, "/sketch/default.style"); if (gt_file_exists(gt_str_get(style_file))) { if (gt_style_load_file(gtr->style, gt_str_get(style_file), err)) had_err = -1; else gt_lua_put_style_in_registry(gtr->L, gtr->style); } } gt_str_delete(style_file); #endif if (had_err) { gt_free(gtr); return NULL; } return gtr; }
static int gt_condenser_search_runner(GT_UNUSED int argc, GT_UNUSED const char **argv, GT_UNUSED int parsed_args, void *tool_arguments, GtError *err) { GtCondenserSearchArguments *arguments = tool_arguments; int i, had_err = 0; char *querypath = gt_str_get(arguments->querypath); GtStr* coarse_fname = gt_str_new_cstr("coarse_"); char *db_basename = NULL; char *suffix_ptr = NULL; GtTimer *timer = NULL; GtLogger *logger = NULL; gt_error_check(err); gt_assert(arguments); logger = gt_logger_new(arguments->verbose, GT_LOGGER_DEFLT_PREFIX, stderr); db_basename = gt_basename(gt_str_get(arguments->dbpath)); /* if first char is '.' this might be a hidden file */ if (strlen(db_basename) > (size_t) 1 && (suffix_ptr = strrchr(db_basename + 1, '.')) != NULL) { /* remove suffix */ *suffix_ptr = '\0'; } gt_str_append_cstr(coarse_fname, db_basename); gt_str_append_cstr(coarse_fname, ".fas"); gt_free(db_basename); db_basename = NULL; suffix_ptr = NULL; if (arguments->blastn || arguments->blastp) { GtMatch *match; GtMatchIterator *mp = NULL; GtNREncseq *nrencseq = NULL; GtStr *fastaname = gt_str_clone(arguments->dbpath); HitPosition *hits; double eval, raw_eval = 0.0; GtUword coarse_db_len = 0; GtMatchIteratorStatus status; int curr_hits = 0, max_hits = 100; hits = gt_malloc(sizeof (*hits) * (size_t) max_hits); gt_str_append_cstr(fastaname, ".fas"); for (i=0; i < max_hits; i++) { hits[i].range = gt_malloc(sizeof (*hits[i].range) * (size_t) 1); } if (gt_showtime_enabled()) { timer = gt_timer_new_with_progress_description("initialization"); gt_timer_start(timer); } /*extract sequences from compressed database*/ if (!had_err) { nrencseq = gt_n_r_encseq_new_from_file(gt_str_get(arguments->dbpath), logger, err); if (nrencseq == NULL) had_err = -1; } if (!had_err) { if (arguments->ceval == GT_UNDEF_DOUBLE || arguments->feval == GT_UNDEF_DOUBLE) { /* from NCBI BLAST tutorial: E = Kmne^{-lambdaS} calculates E-value for score S with natural scale parameters K for search space size and lambda for the scoring system E = mn2^-S' m being the subject (total) length, n the length of ONE query calculates E-value for bit-score S' */ GtFastaReader *reader; GtCondenserSearchAvg avg = {0,0}; reader = gt_fasta_reader_rec_new(arguments->querypath); had_err = gt_fasta_reader_run(reader, NULL, NULL, gt_condenser_search_cum_moving_avg, &avg, err); if (!had_err) { GtUword S = arguments->bitscore; gt_log_log(GT_WU " queries, avg query size: " GT_WU, avg.count, avg.avg); raw_eval = 1/pow(2.0, (double) S) * avg.avg; gt_logger_log(logger, "Raw E-value set to %.4e", raw_eval); gt_assert(avg.avg != 0); } gt_fasta_reader_delete(reader); } } /*create BLAST database from compressed database fasta file*/ if (!had_err) { if (timer != NULL) gt_timer_show_progress(timer, "create coarse BLAST db", stderr); if (arguments->blastn) had_err = gt_condenser_search_create_nucl_blastdb(gt_str_get(fastaname), err); else had_err = gt_condenser_search_create_prot_blastdb(gt_str_get(fastaname), err); } if (!had_err) { GtBlastProcessCall *call; if (timer != NULL) gt_timer_show_progress(timer, "coarse BLAST run", stderr); if (arguments->blastp) call = gt_blast_process_call_new_prot(); else call = gt_blast_process_call_new_nucl(); gt_blast_process_call_set_db(call, gt_str_get(fastaname)); gt_blast_process_call_set_query(call, querypath); gt_blast_process_call_set_evalue(call, arguments->ceval); gt_blast_process_call_set_num_threads(call, arguments->blthreads); mp = gt_match_iterator_blast_process_new(call, err); if (!mp) had_err = -1; gt_blast_process_call_delete(call); while (!had_err && (status = gt_match_iterator_next(mp, &match, err)) != GT_MATCHER_STATUS_END) { if (status == GT_MATCHER_STATUS_OK) { GtUword hit_seq_id; char string[7]; const char *dbseqid = gt_match_get_seqid2(match); if (sscanf(dbseqid,"%6s" GT_WU, string, &hit_seq_id) == 2) { gt_match_get_range_seq2(match, hits[curr_hits].range); hits[curr_hits].idx = hit_seq_id; gt_match_delete(match); curr_hits++; if (curr_hits == max_hits) { HitPosition *hit_extention; max_hits += 100; hits = gt_realloc(hits, sizeof (*hit_extention) * max_hits); for (i=max_hits - 100; i < max_hits; i++) { hits[i].range = gt_malloc(sizeof (*hits[i].range)); } } } else { gt_error_set(err, "could not parse unique db header %s", dbseqid); had_err = -1; } } else if (status == GT_MATCHER_STATUS_ERROR) { had_err = -1; } } gt_match_iterator_delete(mp); } /*extract sequences*/ if (!had_err) { GtNREncseqDecompressor *decomp; GtFile *coarse_hits; if (timer != NULL) gt_timer_show_progress(timer, "extract coarse search hits", stderr); decomp = gt_n_r_encseq_decompressor_new(nrencseq); coarse_hits = gt_file_new(gt_str_get(coarse_fname),"w", err); /* TODO DW do NOT extract complete uniques! these could be complete chromosomes!! just extract something around it? maybe +- max query length*/ for (i = 0; i < curr_hits; i++) { gt_n_r_encseq_decompressor_add_unique_idx_to_extract(decomp, hits[i].idx); } had_err = gt_n_r_encseq_decompressor_start_unique_extraction(coarse_hits, decomp, &coarse_db_len, err); gt_assert(coarse_db_len != 0); gt_file_delete(coarse_hits); gt_n_r_encseq_decompressor_delete(decomp); } gt_n_r_encseq_delete(nrencseq); /* create BLAST database from decompressed database file */ if (!had_err) { if (timer != NULL) gt_timer_show_progress(timer, "create fine BLAST db", stderr); if (arguments->blastn) had_err = gt_condenser_search_create_nucl_blastdb(gt_str_get(coarse_fname), err); else had_err = gt_condenser_search_create_prot_blastdb(gt_str_get(coarse_fname), err); } /* perform fine BLAST search */ if (!had_err) { GtBlastProcessCall *call; if (timer != NULL) gt_timer_show_progress(timer, "fine BLAST run", stderr); if (arguments->feval == GT_UNDEF_DOUBLE) { eval = raw_eval * coarse_db_len; } else { eval = arguments->feval; } if (arguments->blastp) call = gt_blast_process_call_new_prot(); else call = gt_blast_process_call_new_nucl(); gt_blast_process_call_set_db(call, gt_str_get(coarse_fname)); gt_blast_process_call_set_query(call, querypath); gt_blast_process_call_set_evalue(call, eval); gt_blast_process_call_set_num_threads(call, arguments->blthreads); gt_logger_log(logger, "Fine E-value set to: %.4e (len)" GT_WU, eval, coarse_db_len); mp = gt_match_iterator_blast_process_new(call, err); if (!mp) had_err = -1; gt_blast_process_call_delete(call); if (!had_err) { GtUword numofhits = 0; while (!had_err && (status = gt_match_iterator_next(mp, &match, err)) != GT_MATCHER_STATUS_END) { if (status == GT_MATCHER_STATUS_OK) { GtMatchBlast *matchb = (GtMatchBlast*) match; char *dbseqid = gt_malloc(sizeof (*dbseqid) * 50); GtRange range_seq1; GtRange range_seq2; numofhits++; gt_match_get_range_seq1(match, &range_seq1); gt_match_get_range_seq2(match, &range_seq2); gt_file_xprintf( arguments->outfp, "%s\t%s\t%.2f\t" GT_WU "\t" GT_WU "\t" GT_WU "\t" GT_WU "\t" GT_WU "\t%g\t%.3f\n", gt_match_get_seqid1(match), gt_match_get_seqid2(match), gt_match_blast_get_similarity(matchb), gt_match_blast_get_align_length(matchb), range_seq1.start, range_seq1.end, range_seq2.start, range_seq2.end, gt_match_blast_get_evalue(matchb), (double) gt_match_blast_get_bitscore(matchb)); gt_match_delete(match); gt_free(dbseqid); } else if (status == GT_MATCHER_STATUS_ERROR) { had_err = -1; } } gt_log_log(GT_WU " hits found\n", numofhits); } gt_match_iterator_delete(mp); } if (!had_err) if (timer != NULL) gt_timer_show_progress_final(timer, stderr); gt_timer_delete(timer); /*cleanup*/ for (i=0; i < max_hits; i++) { gt_free(hits[i].range); } gt_free(hits); gt_str_delete(fastaname); } gt_str_delete(coarse_fname); gt_logger_delete(logger); return had_err; }
/* Renders a ruler with dynamic scale labeling and optional grid. */ int gt_canvas_cairo_draw_ruler(GtCanvas *canvas, GtRange viewrange, GtError *err) { double step, minorstep, vmajor, vminor, theight = TOY_TEXT_HEIGHT; long base_length, tick; GtColor rulercol, gridcol; GtStr *left_str, *right_str, *unit; char str[BUFSIZ]; GtStyleQueryStatus rval; bool showgrid = true; gt_assert(canvas); if (gt_style_get_bool(canvas->pvt->sty, "format", "show_grid", &showgrid, NULL, err) == GT_STYLE_QUERY_ERROR) { return -1; } if (gt_style_get_num(canvas->pvt->sty, "format", "ruler_font_size", &theight, NULL, err) == GT_STYLE_QUERY_ERROR) { return -1; } /* get unit value from style, default: empty */ unit = gt_str_new(); if (gt_style_get_str(canvas->pvt->sty, "format", "unit", unit, NULL, err) == GT_STYLE_QUERY_ERROR) { gt_str_delete(unit); return -1; } /* get additional description texts from style */ left_str = gt_str_new(); rval = gt_style_get_str(canvas->pvt->sty, "format", "ruler_left_text", left_str, NULL, err); switch (rval) { case GT_STYLE_QUERY_NOT_SET: gt_str_append_cstr(left_str, FIVE_PRIME_STRING); break; case GT_STYLE_QUERY_ERROR: gt_str_delete(unit); gt_str_delete(left_str); return -1; break; /* shouldn't reach this */ default: break; } right_str = gt_str_new(); rval = gt_style_get_str(canvas->pvt->sty, "format", "ruler_right_text", right_str, NULL, err); switch (rval) { case GT_STYLE_QUERY_NOT_SET: gt_str_append_cstr(right_str, THREE_PRIME_STRING); break; case GT_STYLE_QUERY_ERROR: gt_str_delete(unit); gt_str_delete(left_str); gt_str_delete(right_str); return -1; break; /* shouldn't reach this */ default: break; } /* reset font to default */ gt_graphics_set_font(canvas->pvt->g, "sans-serif", SLANT_NORMAL, WEIGHT_NORMAL, theight); rulercol.red = rulercol.green = rulercol.blue = RULER_GREY; rulercol.alpha = 1.0; gridcol.red = gridcol.green = gridcol.blue = GRID_GREY; gridcol.alpha = 1.0; /* determine range and step of the scale */ base_length = gt_range_length(&viewrange); /* determine tick steps */ step = pow(10,ceil(log10(base_length))-1); minorstep = step/10.0; /* calculate starting positions */ vminor = (double) (floor(viewrange.start / minorstep))*minorstep; vmajor = (double) (floor(viewrange.start / step))*step; /* draw major ticks */ for (tick = vmajor; tick <= viewrange.end; tick += step) { double drawtick = (gt_coords_convert_point(viewrange, tick) * (canvas->pvt->width-2*canvas->pvt->margins)) + canvas->pvt->margins; if (tick < viewrange.start) continue; gt_graphics_draw_vertical_line(canvas->pvt->g, drawtick, canvas->pvt->y + 30, rulercol, 10, 1.0); gt_format_ruler_label(str, tick, gt_str_get(unit), BUFSIZ); gt_graphics_draw_text_centered(canvas->pvt->g, drawtick, canvas->pvt->y + 20, str); } /* draw minor ticks */ if (minorstep >= 1) { for (tick = vminor; tick <= viewrange.end; tick += minorstep) { double drawtick; if (tick < viewrange.start) continue; drawtick = (gt_coords_convert_point(viewrange, tick) * (canvas->pvt->width-2*canvas->pvt->margins)) + canvas->pvt->margins; if (showgrid) { gt_graphics_draw_vertical_line(canvas->pvt->g, drawtick, canvas->pvt->y + 40, gridcol, canvas->pvt->height - 40 - 15, 1.0); } gt_graphics_draw_vertical_line(canvas->pvt->g, drawtick, canvas->pvt->y + 35, rulercol, 5, 1.0); } } /* draw ruler line */ gt_graphics_draw_horizontal_line(canvas->pvt->g, canvas->pvt->margins, canvas->pvt->y + 40, rulercol, canvas->pvt->width - 2 * canvas->pvt->margins, 1.25); gt_graphics_draw_text_right(canvas->pvt->g, canvas->pvt->margins - 10, canvas->pvt->y + 39 + (theight/2), gt_str_get(left_str)); gt_graphics_draw_text(canvas->pvt->g, canvas->pvt->width - canvas->pvt->margins + 10, canvas->pvt->y + 39 + (theight/2), gt_str_get(right_str)); gt_str_delete(unit); gt_str_delete(left_str); gt_str_delete(right_str); return 0; }
static int gff3_show_feature_node(GtFeatureNode *fn, void *data, GT_UNUSED GtError *err) { bool part_shown = false; GtGFF3Visitor *gff3_visitor = (GtGFF3Visitor*) data; GtArray *parent_features = NULL; ShowAttributeInfo info; GtUword i; GtStr *id; gt_error_check(err); gt_assert(fn && gff3_visitor); /* output leading part */ if (!gff3_visitor->outstr) { gt_gff3_output_leading(fn, gff3_visitor->outfp); } else { gt_gff3_output_leading_str(fn, gff3_visitor->outstr); } /* show unique id part of attributes */ if ((id = gt_hashmap_get(gff3_visitor->feature_node_to_unique_id_str, fn))) { if (!gff3_visitor->outstr) gt_file_xprintf(gff3_visitor->outfp, "%s=%s", GT_GFF_ID, gt_str_get(id)); else { gt_str_append_cstr(gff3_visitor->outstr, GT_GFF_ID); gt_str_append_char(gff3_visitor->outstr, '='); gt_str_append_cstr(gff3_visitor->outstr, gt_str_get(id)); } part_shown = true; } /* show parent part of attributes */ parent_features = gt_hashmap_get(gff3_visitor->feature_node_to_id_array, fn); if (gt_array_size(parent_features)) { if (part_shown) { if (!gff3_visitor->outstr) gt_file_xfputc(';', gff3_visitor->outfp); else gt_str_append_char(gff3_visitor->outstr, ';'); } if (!gff3_visitor->outstr) gt_file_xprintf(gff3_visitor->outfp, "%s=", GT_GFF_PARENT); else { gt_str_append_cstr(gff3_visitor->outstr, GT_GFF_PARENT); gt_str_append_char(gff3_visitor->outstr, '='); } for (i = 0; i < gt_array_size(parent_features); i++) { if (i) { if (!gff3_visitor->outstr) gt_file_xfputc(',', gff3_visitor->outfp); else gt_str_append_char(gff3_visitor->outstr, ','); } if (!gff3_visitor->outstr) { gt_file_xprintf(gff3_visitor->outfp, "%s", *(char**) gt_array_get(parent_features, i)); } else { gt_str_append_cstr(gff3_visitor->outstr, *(char**) gt_array_get(parent_features, i)); } } part_shown = true; } /* show missing part of attributes */ info.attribute_shown = &part_shown; info.outfp = gff3_visitor->outfp; info.outstr = gff3_visitor->outstr; gt_feature_node_foreach_attribute(fn, show_attribute, &info); /* show dot if no attributes have been shown */ if (!part_shown) { if (!gff3_visitor->outstr) gt_file_xfputc('.', gff3_visitor->outfp); else gt_str_append_char(gff3_visitor->outstr, '.'); } /* show terminal newline */ if (!gff3_visitor->outstr) gt_file_xfputc('\n', gff3_visitor->outfp); else gt_str_append_char(gff3_visitor->outstr, '\n'); return 0; }
static int gt_sketch_runner(int argc, const char **argv, int parsed_args, void *tool_arguments, GT_UNUSED GtError *err) { GtSketchArguments *arguments = tool_arguments; GtNodeStream *in_stream = NULL, *add_introns_stream = NULL, *gff3_out_stream = NULL, *feature_stream = NULL, *sort_stream = NULL, *last_stream; GtFeatureIndex *features = NULL; const char *file; char *seqid = NULL; GtRange qry_range, sequence_region_range; GtArray *results = NULL; GtStyle *sty = NULL; GtStr *prog, *defaultstylefile = NULL; GtDiagram *d = NULL; GtLayout *l = NULL; GtImageInfo* ii = NULL; GtCanvas *canvas = NULL; GtUword height; bool has_seqid; int had_err = 0; gt_error_check(err); gt_assert(arguments); prog = gt_str_new(); gt_str_append_cstr_nt(prog, argv[0], gt_cstr_length_up_to_char(argv[0], ' ')); defaultstylefile = gt_get_gtdata_path(gt_str_get(prog), err); gt_str_delete(prog); if (!defaultstylefile) had_err = -1; if (!had_err) { gt_str_append_cstr(defaultstylefile, "/sketch/default.style"); } file = argv[parsed_args]; if (!had_err) { /* create feature index */ features = gt_feature_index_memory_new(); parsed_args++; /* create an input stream */ if (strcmp(gt_str_get(arguments->input), "gff") == 0) { in_stream = gt_gff3_in_stream_new_unsorted(argc - parsed_args, argv + parsed_args); if (arguments->verbose) gt_gff3_in_stream_show_progress_bar((GtGFF3InStream*) in_stream); } else if (strcmp(gt_str_get(arguments->input), "bed") == 0) { if (argc - parsed_args == 0) in_stream = gt_bed_in_stream_new(NULL); else in_stream = gt_bed_in_stream_new(argv[parsed_args]); } else if (strcmp(gt_str_get(arguments->input), "gtf") == 0) { if (argc - parsed_args == 0) in_stream = gt_gtf_in_stream_new(NULL); else in_stream = gt_gtf_in_stream_new(argv[parsed_args]); } last_stream = in_stream; /* create add introns stream if -addintrons was used */ if (arguments->addintrons) { sort_stream = gt_sort_stream_new(last_stream); add_introns_stream = gt_add_introns_stream_new(sort_stream); last_stream = add_introns_stream; } /* create gff3 output stream if -pipe was used */ if (arguments->pipe) { gff3_out_stream = gt_gff3_out_stream_new(last_stream, NULL); last_stream = gff3_out_stream; } /* create feature stream */ feature_stream = gt_feature_stream_new(last_stream, features); /* pull the features through the stream and free them afterwards */ had_err = gt_node_stream_pull(feature_stream, err); gt_node_stream_delete(feature_stream); gt_node_stream_delete(gff3_out_stream); gt_node_stream_delete(sort_stream); gt_node_stream_delete(add_introns_stream); gt_node_stream_delete(in_stream); } if (!had_err) { had_err = gt_feature_index_has_seqid(features, &has_seqid, gt_str_get(arguments->seqid), err); } /* if seqid is empty, take first one added to index */ if (!had_err && strcmp(gt_str_get(arguments->seqid),"") == 0) { seqid = gt_feature_index_get_first_seqid(features, err); if (seqid == NULL) { gt_error_set(err, "GFF input file must contain a sequence region!"); had_err = -1; } } else if (!had_err && !has_seqid) { gt_error_set(err, "sequence region '%s' does not exist in GFF input file", gt_str_get(arguments->seqid)); had_err = -1; } else if (!had_err) seqid = gt_str_get(arguments->seqid); results = gt_array_new(sizeof (GtGenomeNode*)); if (!had_err) { had_err = gt_feature_index_get_range_for_seqid(features, &sequence_region_range, seqid, err); } if (!had_err) { qry_range.start = (arguments->start == GT_UNDEF_UWORD ? sequence_region_range.start : arguments->start); qry_range.end = (arguments->end == GT_UNDEF_UWORD ? sequence_region_range.end : arguments->end); } if (!had_err) { if (arguments->verbose) fprintf(stderr, "# of results: "GT_WU"\n", gt_array_size(results)); /* find and load style file */ if (!(sty = gt_style_new(err))) had_err = -1; if (gt_str_length(arguments->stylefile) == 0) { gt_str_append_str(arguments->stylefile, defaultstylefile); } else { if (!had_err && gt_file_exists(gt_str_get(arguments->stylefile))) { if (arguments->unsafe) gt_style_unsafe_mode(sty); } else { had_err = -1; gt_error_set(err, "style file '%s' does not exist!", gt_str_get(arguments->stylefile)); } } if (!had_err) had_err = gt_style_load_file(sty, gt_str_get(arguments->stylefile), err); } if (!had_err) { /* create and write image file */ if (!(d = gt_diagram_new(features, seqid, &qry_range, sty, err))) had_err = -1; if (!had_err && arguments->flattenfiles) gt_diagram_set_track_selector_func(d, flattened_file_track_selector, NULL); if (had_err || !(l = gt_layout_new(d, arguments->width, sty, err))) had_err = -1; if (!had_err) had_err = gt_layout_get_height(l, &height, err); if (!had_err) { ii = gt_image_info_new(); if (strcmp(gt_str_get(arguments->format),"pdf")==0) { canvas = gt_canvas_cairo_file_new(sty, GT_GRAPHICS_PDF, arguments->width, height, ii, err); } else if (strcmp(gt_str_get(arguments->format),"ps")==0) { canvas = gt_canvas_cairo_file_new(sty, GT_GRAPHICS_PS, arguments->width, height, ii, err); } else if (strcmp(gt_str_get(arguments->format),"svg")==0) { canvas = gt_canvas_cairo_file_new(sty, GT_GRAPHICS_SVG, arguments->width, height, ii, err); } else { canvas = gt_canvas_cairo_file_new(sty, GT_GRAPHICS_PNG, arguments->width, height, ii, err); } if (!canvas) had_err = -1; if (!had_err) { had_err = gt_layout_sketch(l, canvas, err); } if (!had_err) { if (arguments->showrecmaps) { GtUword i; const GtRecMap *rm; for (i = 0; i < gt_image_info_num_of_rec_maps(ii) ;i++) { char buf[BUFSIZ]; rm = gt_image_info_get_rec_map(ii, i); (void) gt_rec_map_format_html_imagemap_coords(rm, buf, BUFSIZ); printf("%s, %s\n", buf, gt_feature_node_get_type(gt_rec_map_get_genome_feature(rm))); } } if (arguments->use_streams) { GtFile *outfile; GtStr *str = gt_str_new(); gt_canvas_cairo_file_to_stream((GtCanvasCairoFile*) canvas, str); outfile = gt_file_open(GT_FILE_MODE_UNCOMPRESSED, file, "w+", err); if (outfile) { gt_file_xwrite(outfile, gt_str_get_mem(str), gt_str_length(str)); gt_file_delete(outfile); } else { had_err = -1; } gt_str_delete(str); } else { had_err = gt_canvas_cairo_file_to_file((GtCanvasCairoFile*) canvas, file, err); } } } } /* free */ gt_free(seqid); gt_canvas_delete(canvas); gt_layout_delete(l); gt_image_info_delete(ii); gt_style_delete(sty); gt_diagram_delete(d); gt_array_delete(results); gt_str_delete(defaultstylefile); gt_feature_index_delete(features); return had_err; }
static int bioseq_fill(GtBioseq *bs, bool recreate, GtError *err) { GtStr *bioseq_index_file = NULL, *bioseq_ois_file = NULL, *bioseq_sds_file = NULL, *bioseq_md5_file = NULL, *bioseq_des_file = NULL; int had_err = 0; GtStr *bioseq_basename; gt_assert(!bs->encseq); if (bs->use_stdin) { bioseq_basename = gt_str_new_cstr("stdin."); /* assign a unique name */ gt_str_append_uword(bioseq_basename, (GtUword) bs); } else bioseq_basename = bs->sequence_file; /* construct file names */ bioseq_index_file = gt_str_clone(bioseq_basename); gt_str_append_cstr(bioseq_index_file, GT_ENCSEQFILESUFFIX); bioseq_ois_file = gt_str_clone(bioseq_basename); gt_str_append_cstr(bioseq_ois_file, GT_OISTABFILESUFFIX); bioseq_sds_file = gt_str_clone(bioseq_basename); gt_str_append_cstr(bioseq_sds_file, GT_SDSTABFILESUFFIX); bioseq_md5_file = gt_str_clone(bioseq_basename); gt_str_append_cstr(bioseq_md5_file, GT_MD5TABFILESUFFIX); bioseq_des_file = gt_str_clone(bioseq_basename); gt_str_append_cstr(bioseq_des_file, GT_DESTABFILESUFFIX); /* construct the bioseq files if necessary */ if (recreate || bs->use_stdin || !gt_file_exists(gt_str_get(bioseq_index_file)) || !gt_file_exists(gt_str_get(bioseq_ois_file)) || !gt_file_exists(gt_str_get(bioseq_sds_file)) || !gt_file_exists(gt_str_get(bioseq_md5_file)) || !gt_file_exists(gt_str_get(bioseq_des_file)) || gt_file_is_newer(gt_str_get(bs->sequence_file), gt_str_get(bioseq_index_file))) { had_err = construct_bioseq_files(bs, bioseq_basename, err); } if (!had_err) { GtEncseqLoader *el = gt_encseq_loader_new(); gt_encseq_loader_disable_autosupport(el); gt_encseq_loader_require_lossless_support(el); gt_encseq_loader_require_description_support(el); gt_encseq_loader_require_md5_support(el); gt_encseq_loader_require_multiseq_support(el); bs->encseq = gt_encseq_loader_load(el, gt_str_get(bioseq_basename), err); if (bs->encseq == NULL) { had_err = -1; gt_assert(gt_error_is_set(err)); } gt_encseq_loader_delete(el); } if (!had_err) { gt_assert(bs->encseq); } /* free */ if (bs->use_stdin) gt_str_delete(bioseq_basename); gt_str_delete(bioseq_index_file); gt_str_delete(bioseq_ois_file); gt_str_delete(bioseq_md5_file); gt_str_delete(bioseq_sds_file); gt_str_delete(bioseq_des_file); return had_err; }
/* Formats a given position number for short display in the ruler. */ void gt_format_ruler_label(char *txt, GtWord pos, const char *unitstr, size_t buflen) { double fpos; int logval; GtStr *formatstring; GtUword upos; gt_assert(txt); bool negative = false; if (pos < 0) { upos = (GtUword)-pos; negative = true; formatstring = gt_str_new_cstr("-%."); } else { upos = (GtUword)pos; formatstring = gt_str_new_cstr("%."); } logval = (int) floor(log10(upos)); if (upos >= 1000000000) { fpos = (double) upos / 1000000000; while (upos % 10 == 0) { upos /= 10; logval--; } /*@ignore@*/ gt_str_append_uword(formatstring, (GtUword) logval); gt_str_append_cstr(formatstring, "fG%s"); (void) snprintf(txt, buflen, gt_str_get(formatstring), fpos, unitstr); /*@end@*/ } else if (upos >= 1000000) { fpos = (double) upos / 1000000; while (upos % 10 == 0) { upos /= 10; logval--; } /*@ignore@*/ gt_str_append_uword(formatstring, (GtUword) logval); gt_str_append_cstr(formatstring, "fM%s"); (void) snprintf(txt, buflen, gt_str_get(formatstring), fpos, unitstr); /*@end@*/ } else if (upos >= 1000) { fpos = (double) upos / 1000; while (upos % 10 == 0) { upos /= 10; logval--; } /*@ignore@*/ gt_str_append_uword(formatstring, (GtUword) logval); gt_str_append_cstr(formatstring, "fk%s"); (void) snprintf(txt, buflen, gt_str_get(formatstring), fpos, unitstr); /*@end@*/ } else { /*@ignore@*/ (void) snprintf(txt, buflen, " %s"GT_WU"%s", negative ? "-" : "", upos, unitstr); /*@end@*/ } gt_str_delete(formatstring); }
static GtHcrSeqDecoder *hcr_seq_decoder_new(GtAlphabet *alpha, const char *name, GtError *err) { int had_err = 0; GtHcrSeqDecoder *seq_dec = gt_malloc(sizeof (GtHcrSeqDecoder)); GtBaseQualDistr *bqd = NULL; GtWord end_enc_start_sampling = 0; FILE *fp = NULL; GT_UNUSED size_t read; GT_UNUSED const size_t one = (size_t) 1; seq_dec->alpha = alpha; seq_dec->alphabet_size = gt_alphabet_size(alpha); seq_dec->cur_read = 0; seq_dec->data_iter = NULL; seq_dec->file_info_rbt = NULL; seq_dec->fileinfos = NULL; seq_dec->filename = gt_str_new_cstr(name); seq_dec->huff_dec = NULL; seq_dec->huffman = NULL; seq_dec->sampling = NULL; seq_dec->symbols = NULL; gt_str_append_cstr(seq_dec->filename, HCRFILESUFFIX); fp = gt_fa_fopen_with_suffix(name, HCRFILESUFFIX, "rb", err); if (fp == NULL) { had_err = -1; hcr_seq_decoder_delete(seq_dec); seq_dec = NULL; } if (!had_err) { hcr_read_file_info(seq_dec, fp); bqd = hcr_base_qual_distr_new_from_file(fp, seq_dec->alpha); seq_dec->qual_offset = bqd->qual_offset; read = gt_xfread_one(&end_enc_start_sampling, fp); gt_assert(read == one); seq_dec->start_of_encoding = decoder_calc_start_of_encoded_data(fp); had_err = seq_decoder_init_huffman(seq_dec, end_enc_start_sampling, bqd, err); if (had_err) { hcr_seq_decoder_delete(seq_dec); seq_dec = NULL; } } if (!had_err) { size_t pos; gt_xfseek(fp, 0, SEEK_END); pos = ftell(fp); gt_xfseek(fp, end_enc_start_sampling, SEEK_SET); if (end_enc_start_sampling < pos) seq_dec->sampling = gt_sampling_read(fp); else seq_dec->sampling = NULL; seq_dec->file_info_rbt = seq_decoder_init_file_info(seq_dec->fileinfos, seq_dec->num_of_files); } hcr_base_qual_distr_delete(bqd); gt_fa_fclose(fp); return seq_dec; }
static int gt_ltrdigest_pdom_visitor_parse_domainhits(GtLTRdigestPdomVisitor *lv, GtHMMERParseStatus *status, char *buf, FILE *instream, GtError *err) { int had_err = 0; GtUword i, nof_targets = 0, nof_hits = 0; gt_assert(lv && instream && status); gt_error_check(err); had_err = pdom_parser_get_next_line(buf, instream, err); gt_assert(buf != NULL); while (!had_err && strncmp("Internal", buf, (size_t) 8)) { GtUword no, hmmfrom, hmmto, alifrom, alito; double score, evalue; char threshold_ok = '-'; if ((buf[0] == '>' && buf[1] == '>')) { char *b = buf; b = strtok(buf+3, " "); gt_str_reset(status->cur_model); gt_str_append_cstr(status->cur_model, b); had_err = pdom_parser_get_next_line(buf, instream, err); if (!had_err && strncmp(" [No individual", buf, (size_t) 17)) { for (i = 0UL; i < 2UL && !had_err; i++) had_err = pdom_parser_get_next_line(buf, instream, err); } nof_targets++; nof_hits = 0UL; gt_hmmer_parse_status_mark_frame_finished(status); } while (!had_err && 8 == sscanf(buf, ""GT_WU" %c %lf %*f %*f %lf "GT_WU" "GT_WU" %*s " GT_WU" "GT_WU"", &no, &threshold_ok, &score, &evalue, &hmmfrom, &hmmto, &alifrom, &alito)) { GtHMMERSingleHit *shit = gt_calloc((size_t) 1, sizeof (*shit)); shit->hmmfrom = hmmfrom; shit->hmmto = hmmto; shit->alifrom = alifrom; shit->alito = alito; shit->score = score; shit->evalue = evalue; shit->strand = status->strand; shit->frame = (GtUword) status->frame; shit->reported = (threshold_ok == '!'); shit->chains = gt_array_new(sizeof (GtUword)); gt_hmmer_parse_status_add_hit(status, shit); nof_hits++; had_err = pdom_parser_get_next_line(buf, instream, err); } if (!had_err) { if (nof_hits > 0) had_err = gt_ltrdigest_pdom_visitor_parse_alignments(lv, status, buf, instream, err); else had_err = pdom_parser_get_next_line(buf, instream, err); } } return had_err; }
static int gt_readjoiner_cnttest_runner(GT_UNUSED int argc, GT_UNUSED const char **argv, GT_UNUSED int parsed_args, void *tool_arguments, GT_UNUSED GtError *err) { GtReadjoinerCnttestArguments *arguments = tool_arguments; GtEncseqLoader *el = NULL; GtEncseq *reads = NULL; GtBitsequence *bits = NULL; GtUword nofreads; int had_err = 0; gt_error_check(err); gt_assert(arguments); if (arguments->test == GT_READJOINER_CNTTEST_SHOWLIST) { GtStr *fn = NULL; fn = gt_str_clone(arguments->readset); gt_str_append_cstr(fn, GT_READJOINER_SUFFIX_CNTLIST); had_err = gt_cntlist_parse(gt_str_get(fn), true, &bits, &nofreads, err); gt_str_delete(fn); } else if (arguments->test == GT_READJOINER_CNTTEST_BRUTEFORCE || arguments->test == GT_READJOINER_CNTTEST_KMP) { el = gt_encseq_loader_new(); gt_encseq_loader_drop_description_support(el); gt_encseq_loader_disable_autosupport(el); if (!arguments->singlestrand) gt_encseq_loader_mirror(el); reads = gt_encseq_loader_load(el, gt_str_get(arguments->readset), err); if (reads == NULL) had_err = -1; else { gt_rdj_pairwise_exact(GT_OVLFIND_CNT, reads, !arguments->singlestrand, false, arguments->test == GT_READJOINER_CNTTEST_KMP, 1UL, true, NULL, NULL, false, NULL, &bits, &nofreads); } gt_encseq_delete(reads); gt_encseq_loader_delete(el); } else if (arguments->test == GT_READJOINER_CNTTEST_ESA) { Sequentialsuffixarrayreader *ssar = NULL; GtUword readlength = 0, firstrevcompl = 0; GtLogger *verbose_logger = gt_logger_new(arguments->verbose, GT_LOGGER_DEFLT_PREFIX, stderr); ssar = gt_newSequentialsuffixarrayreaderfromfile(gt_str_get( arguments->readset), SARR_LCPTAB | SARR_SUFTAB | SARR_SSPTAB, true, verbose_logger, err); if (gt_error_is_set(err)) had_err = -1; else { nofreads = gt_encseq_num_of_sequences(ssar->encseq); if (!arguments->singlestrand) { nofreads = GT_DIV2(nofreads); firstrevcompl = nofreads; } GT_INITBITTAB(bits, nofreads); if (!arguments->singlestrand) if (gt_encseq_accesstype_get(ssar->encseq) == GT_ACCESS_TYPE_EQUALLENGTH) readlength = gt_encseq_seqlength(ssar->encseq, 0); (void)gt_contfind_bottomup(ssar, false, bits, arguments->singlestrand ? 0 : firstrevcompl, readlength); } if (ssar != NULL) gt_freeSequentialsuffixarrayreader(&ssar); gt_logger_delete(verbose_logger); } else { gt_assert(false); } if (!had_err) had_err = gt_cntlist_show(bits, nofreads, NULL, false, err); gt_free(bits); return had_err; }