static void canon_gff3_parse_options(int argc, char * const *argv, CanonGFF3Options *options, GtError *error) { int opt = 0; int optindex = 0; const char *optstr = "hio:s:v"; const struct option init_options[] = { { "help", no_argument, NULL, 'h' }, { "infer", no_argument, NULL, 'i' }, { "outfile", required_argument, NULL, 'o' }, { "source", required_argument, NULL, 's' }, { "version", no_argument, NULL, 'v' }, { NULL, no_argument, NULL, 0 }, }; for(opt = getopt_long(argc, argv, optstr, init_options, &optindex); opt != -1; opt = getopt_long(argc, argv, optstr, init_options, &optindex)) { if(opt == 'h') { print_usage(stdout); exit(0); } else if(opt == 'i') options->infer = true; else if(opt == 'o') { if(options->outstream != NULL) gt_file_delete(options->outstream); options->outstream = gt_file_new(optarg, "w", error); } else if(opt == 's') { if(options->source != NULL) gt_str_delete(options->source); options->source = gt_str_new_cstr(optarg); } else if(opt == 'v') { agn_print_version("CanonGFF3", stdout); exit(0); } } }
bool gt_file_exists_with_suffix(const char *path, const char *suffix) { struct stat statbuf; GtStr *tmpfilename; gt_assert(path && suffix); tmpfilename = gt_str_new_cstr(path); gt_str_append_cstr(tmpfilename, suffix); if (stat(gt_str_get(tmpfilename), &statbuf) == 0) { gt_str_delete(tmpfilename); return true; } gt_str_delete(tmpfilename); return false; }
GtLeftborderOutbuffer *gt_leftborderbuffer_new(const char *name, GtFirstcodesspacelog *fcsl) { GtLeftborderOutbuffer *lbbuf = gt_malloc(sizeof (*lbbuf)); lbbuf->totalwrite = 0; lbbuf->outfilename = gt_str_new(); lbbuf->fp = gt_xtmpfp(lbbuf->outfilename); lbbuf->nextfree = 0; lbbuf->allocated = 1024UL; lbbuf->name = gt_str_new_cstr(name); lbbuf->spaceuint32_t = gt_malloc(sizeof (*lbbuf->spaceuint32_t) * lbbuf->allocated); GT_FCI_ADDWORKSPACE(fcsl,name, sizeof (*lbbuf->spaceuint32_t) * lbbuf->allocated); return lbbuf; }
static int layout_tracks(void *key, void *value, void *data, GT_UNUSED GtError *err) { unsigned long i, max; GtTrack *track; GtLayoutTraverseInfo *lti = (GtLayoutTraverseInfo*) data; GtArray *list = (GtArray*) value; GtStr *gt_track_key; const char *type = key; GtBlock *block; bool split; double tmp; gt_assert(type && list); /* to get a deterministic layout, we sort the GtBlocks for each type */ gt_array_sort_stable(list, blocklist_block_compare); block = *(GtBlock**) gt_array_get(list, 0); gt_track_key = gt_str_new_cstr((char*) key); if (!gt_style_get_bool(lti->layout->style, "format", "split_lines", &split, NULL)) split = true; if (split) if (!gt_style_get_bool(lti->layout->style, type, "split_lines", &split, NULL)) split = true; if (gt_style_get_num(lti->layout->style, type, "max_num_lines", &tmp, NULL)) max = tmp; else max = 50; track = gt_track_new(gt_track_key, max, split, gt_line_breaker_captions_new(lti->layout, lti->layout->width, lti->layout->style)); lti->layout->nof_tracks++; for (i = 0; i < gt_array_size(list); i++) { block = *(GtBlock**) gt_array_get(list, i); gt_track_insert_block(track, block); } gt_hashmap_add(lti->layout->tracks, gt_cstr_dup(gt_str_get(gt_track_key)), track); gt_str_delete(gt_track_key); return 0; }
int gt_hcr_encoder_encode(GtHcrEncoder *hcr_enc, const char *name, GtTimer *timer, GtError *err) { int had_err = 0; GtStr *name1; gt_error_check(err); if (timer != NULL) gt_timer_show_progress(timer, "write encoding", stdout); if (hcr_enc->encdesc_encoder != NULL) { GtCstrIterator *cstr_iterator = gt_fasta_header_iterator_new(hcr_enc->files, err); had_err = gt_encdesc_encoder_encode(hcr_enc->encdesc_encoder, cstr_iterator, name, err); gt_cstr_iterator_delete(cstr_iterator); } if (!had_err) had_err = hcr_write_seq_qual_data(name, hcr_enc, timer, err); if (!had_err && gt_log_enabled()) { name1 = gt_str_new_cstr(name); gt_str_append_cstr(name1, HCRFILESUFFIX); gt_log_log("sequences with qualities encoding overview:"); gt_log_log("**>"); if (hcr_enc->page_sampling) gt_log_log("applied sampling technique: sampling every "GT_WU"th page", hcr_enc->sampling_rate); else if (hcr_enc->regular_sampling) gt_log_log("applied sampling technique: sampling every "GT_WU"th read", hcr_enc->sampling_rate); else gt_log_log("applied sampling technique: none"); gt_log_log("total number of encoded nucleotide sequences with qualities: " ""GT_WU"", hcr_enc->num_of_reads); gt_log_log("total number of encoded nucleotides: "GT_LLU"", hcr_enc->seq_encoder->total_num_of_symbols); gt_log_log("bits per nucleotide encoding: %f", (gt_file_estimate_size(gt_str_get(name1)) * 8.0) / hcr_enc->seq_encoder->total_num_of_symbols); gt_log_log("<**"); gt_str_delete(name1); } return had_err; }
static int gt_compreads_decompress_benchmark(GtHcrDecoder *hcrd, unsigned long amount, GtTimer *timer, GtError *err) { char qual[BUFSIZ] = {0}, seq[BUFSIZ] = {0}; int had_err = 0; unsigned long rand, max_rand = gt_hcr_decoder_num_of_reads(hcrd) - 1, count; GtStr *timer_comment = gt_str_new_cstr("extracting "); GtStr *desc = gt_str_new(); gt_str_append_ulong(timer_comment, amount); gt_str_append_cstr(timer_comment, " reads of "); gt_str_append_ulong(timer_comment, max_rand + 1); gt_str_append_cstr(timer_comment, "!"); if (timer == NULL) { timer = gt_timer_new_with_progress_description("extract random reads"); gt_timer_start(timer); } else { gt_timer_show_progress(timer, "extract random reads", stdout); } gt_log_log("%s",gt_str_get(timer_comment)); for (count = 0; count < amount; count++) { if (!had_err) { rand = gt_rand_max(max_rand); gt_log_log("get read: %lu", rand); had_err = gt_hcr_decoder_decode(hcrd, rand, seq, qual, desc, err); gt_log_log("%s",gt_str_get(desc)); gt_log_log("%s",seq); gt_log_log("%s",qual); } } gt_str_delete(timer_comment); gt_str_delete(desc); if (!gt_showtime_enabled()) gt_timer_delete(timer); return had_err; }
static int process_fastakeyfile(GtStr *fastakeyfile, int argc, const char **argv, unsigned long width, GtFile *outfp, GtError *err) { int had_err = 0; gt_error_check(err); gt_assert(gt_str_length(fastakeyfile)); if (argc == 0) { gt_error_set(err,"option -keys requires at least one file argument"); had_err = -1; } if (!had_err) { GtStr *indexname = gt_str_new_cstr(argv[0]); if (argc == 1 && gt_deskeysfileexists(indexname)) { if (gt_extractkeysfromfastaindex(indexname,fastakeyfile,width,err) != 0) { had_err = -1; } } else { GtStrArray *referencefiletab; int i; referencefiletab = gt_str_array_new(); for (i = 0; i < argc; i++) { gt_str_array_add_cstr(referencefiletab, argv[i]); } if (gt_extractkeysfromfastafile(true, outfp, width, fastakeyfile, referencefiletab, err) != 1) { had_err = -1; } gt_str_array_delete(referencefiletab); } gt_str_delete(indexname); } return had_err; }
static GtStr* create_unique_id(GtGFF3Visitor *gff3_visitor, GtFeatureNode *fn) { const char *type; GtStr *id; gt_assert(gff3_visitor && fn); type = gt_feature_node_get_type(fn); /* increase id counter */ gt_string_distri_add(gff3_visitor->id_counter, type); /* build id string */ id = gt_str_new_cstr(type); gt_str_append_ulong(id, gt_string_distri_get(gff3_visitor->id_counter, type)); /* store (unique) id */ gt_hashmap_add(gff3_visitor->feature_node_to_unique_id_str, fn, id); return id; }
GtSfxmappedrange *gt_Sfxmappedrange_new(const char *tablename, GtUword numofentries, GtSfxmappedrangetype type, GtSfxmappedrangetransformfunc transformfunc, const void *transformfunc_data) { GtSfxmappedrange *sfxmappedrange; sfxmappedrange = gt_malloc(sizeof (*sfxmappedrange)); sfxmappedrange->ptr = NULL; sfxmappedrange->pagesize = gt_pagesize(); sfxmappedrange->usedptrptr = NULL; sfxmappedrange->filename = NULL; sfxmappedrange->writable = false; sfxmappedrange->entire = NULL; sfxmappedrange->transformfunc = transformfunc; sfxmappedrange->transformfunc_data = transformfunc_data; sfxmappedrange->type = type; sfxmappedrange->tablename = gt_str_new_cstr(tablename); sfxmappedrange->currentminindex = sfxmappedrange->currentmaxindex = 0; sfxmappedrange->indexrange_defined = false; switch (type) { case GtSfxGtBitsequence: sfxmappedrange->sizeofunit = sizeof (GtBitsequence); sfxmappedrange->numofunits = GT_NUMOFINTSFORBITS(numofentries); break; case GtSfxuint32_t: sfxmappedrange->sizeofunit = sizeof (uint32_t); sfxmappedrange->numofunits = (size_t) numofentries; break; case GtSfxunsignedlong: sfxmappedrange->sizeofunit = sizeof (GtUword); sfxmappedrange->numofunits = (size_t) numofentries; break; default: gt_assert(false); break; } return sfxmappedrange; }
static GtStr* make_id_unique(GtGFF3Visitor *gff3_visitor, GtFeatureNode *fn) { GtUword i = 1; GtStr *id = gt_str_new_cstr(gt_feature_node_get_attribute(fn, "ID")); if (gt_cstr_table_get(gff3_visitor->used_ids, gt_str_get(id))) { GtStr *buf = gt_str_new(); while (!id_string_is_unique(id, buf, gff3_visitor->used_ids, i++)); gt_warning("feature ID \"%s\" not unique: changing to %s", gt_str_get(id), gt_str_get(buf)); gt_str_set(id, gt_str_get(buf)); gt_str_delete(buf); } /* update table with the new id */ gt_cstr_table_add(gff3_visitor->used_ids, gt_str_get(id)); /* store (unique) id */ gt_hashmap_add(gff3_visitor->feature_node_to_unique_id_str, fn, id); return id; }
bool gt_tool_iterator_next(GtToolIterator *tool_iterator, const char **name, GtTool **tool) { ToolIterationInfo tii; gt_assert(tool_iterator && name && tool); if (gt_array_size(tool_iterator->tool_stack)) { ToolEntry *entry = gt_array_pop(tool_iterator->tool_stack); *name = entry->name; *tool = entry->tool; if (tool_iterator->prefixptr) { gt_str_reset(tool_iterator->prefixptr); if (entry->prefix) { gt_str_append_str(tool_iterator->prefixptr, entry->prefix); gt_str_append_char(tool_iterator->prefixptr, tool_iterator->prefixsep); } } if (gt_tool_is_toolbox(entry->tool)) { GtToolbox *toolbox; GtArray *toollist; GtStr *myprefix; myprefix = gt_str_new_cstr(entry->prefix ? gt_str_get(entry->prefix) : ""); gt_str_append_cstr(myprefix, entry->name); toolbox = gt_tool_get_toolbox(entry->tool); toollist = gt_array_new(sizeof (ToolEntry)); tii.arr = toollist; tii.str = myprefix; gt_toolbox_iterate(toolbox, add_tool_to_stack, &tii); if (gt_array_size(toollist)) { gt_array_reverse(toollist); /* alphabetical order */ gt_array_add_array(tool_iterator->tool_stack, toollist); } gt_array_delete(toollist); gt_str_delete(myprefix); } else gt_str_delete(entry->prefix); return true; } else return false; }
GtTypeChecker* gt_typecheck_info_create_type_checker(const GtTypecheckInfo *tci, GtError *err) { GtTypeChecker *type_checker = NULL; int had_err = 0; gt_error_check(err); gt_assert(tci); if (tci->typecheck_built_in) type_checker = gt_type_checker_builtin_new(); else { GtStr *obo_file; gt_assert(gt_option_is_set(tci->typecheck_option)); if (!gt_str_length(tci->typecheck)) { /* a. */ if (!(obo_file = get_obo_path(err))) had_err = -1; if (!had_err) gt_str_append_cstr(obo_file, "sofa.obo"); } else if (gt_file_exists(gt_str_get(tci->typecheck))) { /* b. */ obo_file = gt_str_new_cstr(gt_str_get(tci->typecheck)); } else { /* c. */ if (!(obo_file = get_obo_path(err))) had_err = -1; if (!had_err) { gt_str_append_str(obo_file, tci->typecheck); gt_str_append_cstr(obo_file, ".obo"); } } if (!had_err) type_checker = gt_type_checker_obo_new(gt_str_get(obo_file), err); gt_str_delete(obo_file); } return type_checker; }
static void orf_attach_results_to_gff3(GtFeatureNode *gf, GtRange orf_rng, unsigned int orf_frame, GtStrand strand, GT_UNUSED GtError *err) { GtGenomeNode *child; GtStr *tag; tag = gt_str_new_cstr(GT_ORF_FINDER_TAG); orf_rng.start++; orf_rng.end++; GtFeatureNodeIterator *gfi; GtFeatureNode *curnode = NULL, *parent_node = NULL; GtRange gfi_range; char frame_buf[3]; sprintf(frame_buf, "%d", orf_frame); gfi = gt_feature_node_iterator_new(gf); while ((curnode = gt_feature_node_iterator_next(gfi))) { if (strcmp(gt_feature_node_get_type(curnode), (const char*) GT_ORF_TYPE) != 0) { gfi_range = gt_genome_node_get_range((GtGenomeNode*) curnode); if (gt_range_contains(&gfi_range, &orf_rng)) { parent_node = curnode; } } } if (parent_node) { child = gt_feature_node_new(gt_genome_node_get_seqid((GtGenomeNode*) gf), GT_ORF_TYPE, orf_rng.start, orf_rng.end, strand); gt_feature_node_set_source((GtFeatureNode*) child, tag); gt_feature_node_set_attribute((GtFeatureNode*) child, "frame", frame_buf); gt_feature_node_add_child(parent_node,(GtFeatureNode*) child); } gt_str_delete(tag); gt_feature_node_iterator_delete(gfi); }
/* takes ownership of <files> */ static GtNodeStream* gff3_in_stream_plain_new(GtStrArray *files, bool ensure_sorting) { GtNodeStream *ns = gt_node_stream_create(gt_gff3_in_stream_plain_class(), ensure_sorting); GtGFF3InStreamPlain *gff3_in_stream_plain = gff3_in_stream_plain_cast(ns); gff3_in_stream_plain->next_file = 0; gff3_in_stream_plain->files = files; gff3_in_stream_plain->stdinstr = gt_str_new_cstr("stdin"); gff3_in_stream_plain->ensure_sorting = ensure_sorting; gff3_in_stream_plain->stdin_argument = false; gff3_in_stream_plain->file_is_open = false; gff3_in_stream_plain->fpin = NULL; gff3_in_stream_plain->line_number = 0; gff3_in_stream_plain->genome_node_buffer = gt_queue_new(); gff3_in_stream_plain->checkids = false; gff3_in_stream_plain->checkregions = false; gff3_in_stream_plain->gff3_parser = gt_gff3_parser_new(NULL); gff3_in_stream_plain->used_types = gt_cstr_table_new(); gff3_in_stream_plain->progress_bar = false; return ns; }
void feature_in_stream_init(GtFeatureInStream *stream) { GtUword i; GtError *error = gt_error_new(); stream->seqids = gt_feature_index_get_seqids(stream->fi, error); stream->seqindex = 0; for (i = 0; i < gt_str_array_size(stream->seqids); i++) { const char *seqid = gt_str_array_get(stream->seqids, i); GtRange seqrange; if (stream->useorig) gt_feature_index_get_orig_range_for_seqid(stream->fi, &seqrange, seqid, error); else gt_feature_index_get_range_for_seqid(stream->fi, &seqrange, seqid, error); GtStr *seqstr = gt_str_new_cstr(seqid); GtGenomeNode *rn = gt_region_node_new(seqstr, seqrange.start, seqrange.end); gt_queue_add(stream->regioncache, rn); gt_str_delete(seqstr); } gt_error_delete(error); }
static int gtf_in_stream_process_file(GtGTFInStream *gtf_in_stream, GtError *err) { GtGTFParser *gtf_parser; GtStr *filenamestr; GtFile *fpin; int had_err = 0; gt_error_check(err); gt_assert(gtf_in_stream); gtf_parser = gt_gtf_parser_new(gtf_in_stream->type_checker); /* open input file */ if (gtf_in_stream->filename) { if (!(fpin = gt_file_new(gtf_in_stream->filename, "r", err))) had_err = -1; } else fpin = NULL; /* parse input file */ if (!had_err) { filenamestr = gt_str_new_cstr(gtf_in_stream->filename ? gtf_in_stream->filename : "stdin"); had_err = gt_gtf_parser_parse(gtf_parser, gtf_in_stream->genome_node_buffer, filenamestr, fpin, gtf_in_stream->tidy, err); gt_str_delete(filenamestr); } /* close input file, if necessary */ gt_file_delete(fpin); /* free */ gt_gtf_parser_delete(gtf_parser); return had_err; }
GtXRFChecker* gt_xrfcheck_info_create_xrf_checker(const GtXRFCheckInfo *xci, GtError *err) { GtXRFChecker *xrf_checker = NULL; int had_err = 0; GtStr *xrf_file; gt_error_check(err); gt_assert(xci); gt_assert(gt_option_is_set(xci->xrfcheck_option)); if (!gt_str_length(xci->xrfcheck)) { /* a. */ if (!(xrf_file = get_xrf_path(err))) had_err = -1; if (!had_err) gt_str_append_cstr(xrf_file, "GO.xrf_abbr"); } else if (gt_file_exists(gt_str_get(xci->xrfcheck))) { /* b. */ xrf_file = gt_str_new_cstr(gt_str_get(xci->xrfcheck)); } else { /* c. */ if (!(xrf_file = get_xrf_path(err))) had_err = -1; if (!had_err) { gt_str_append_str(xrf_file, xci->xrfcheck); gt_str_append_cstr(xrf_file, ".xrf_abbr"); } } if (!had_err) xrf_checker = gt_xrf_checker_new(gt_str_get(xrf_file), err); gt_str_delete(xrf_file); return xrf_checker; }
static GtIndexOptions* gt_index_options_new(void) { GtIndexOptions *oi = gt_malloc(sizeof *oi); oi->algbounds = gt_str_array_new(); oi->dir = gt_str_new_cstr("fwd"); oi->indexname = NULL; oi->kysargumentstring = gt_str_new(); oi->lcpdist = false; oi->maximumspace = 0UL; /* in bytes */ oi->memlimit = gt_str_new(); oi->numofparts = 1U; oi->option = NULL; oi->optionalgbounds = NULL; oi->optioncmpcharbychar = NULL; oi->optiondifferencecover = NULL; oi->optionmaxwidthrealmedian = NULL; oi->optionmemlimit = NULL; oi->optionoutbcktab = NULL; oi->optionoutbwttab = NULL; oi->optionoutlcptab = NULL; oi->optionoutsuftab = NULL; oi->optionparts = NULL; oi->optionprefixlength = NULL; oi->optionspmopt = NULL; oi->optionstorespecialcodes = NULL; oi->outbcktab = false; oi->outbwttab = false; oi->outkyssort = false; oi->outkystab = false; oi->outlcptab = false; oi->outsuftab = false; /* only defined for GT_INDEX_OPTIONS_ESA */ oi->prefixlength = GT_PREFIXLENGTH_AUTOMATIC; oi->swallow_tail = false; oi->type = GT_INDEX_OPTIONS_UNDEFINED; return oi; }
static int sequence_region_lua_new(lua_State *L) { GtGenomeNode **rn; GtUword startpos, endpos; const char *seqid; GtStr *seqid_str; gt_assert(L); /* get_check parameters */ seqid = luaL_checkstring(L, 1); startpos = luaL_checklong(L, 2); endpos = luaL_checklong(L, 3); luaL_argcheck(L, startpos > 0, 2, "must be > 0"); luaL_argcheck(L, endpos > 0, 3, "must be > 0"); luaL_argcheck(L, startpos <= endpos, 2, "must be <= endpos"); /* construct object */ rn = lua_newuserdata(L, sizeof (GtGenomeNode*)); seqid_str = gt_str_new_cstr(seqid); *rn = gt_region_node_new(seqid_str, startpos, endpos); gt_str_delete(seqid_str); gt_assert(*rn); luaL_getmetatable(L, GENOME_NODE_METATABLE); lua_setmetatable(L, -2); return 1; }
int gt_condenseq_output_to_gff3(const GtCondenseq *condenseq, GtError *err) { int had_err = 0; GtUword idx, name_len, seqnum = 0, seqstart = 0, seqend = 0, desclen; GtStr *filename = NULL, *id = gt_str_new_cstr("U"), *name = gt_str_new_cstr("unique"), *parent_unique = gt_str_new_cstr("U"), *seqid = gt_str_new(), *source = gt_str_new_cstr("Condenseq"); GtFile *outfile = NULL; GtGFF3Visitor *gffv = NULL; GtNodeVisitor *nodev = NULL; GtFeatureNode *fnode = NULL; GtGenomeNode *node = NULL; GtRange range; gt_assert(condenseq != NULL); filename = gt_str_new_cstr(gt_condenseq_basefilename(condenseq)); name_len = gt_str_length(name); gt_str_append_cstr(filename, ".gff3"); outfile = gt_file_new(gt_str_get(filename), "w", err); nodev = gt_gff3_visitor_new(outfile); gffv = (GtGFF3Visitor *) nodev; gt_gff3_visitor_retain_id_attributes(gffv); node = gt_feature_node_new(seqid, "experimental_feature", (GtUword) 1, (GtUword) 1, GT_STRAND_BOTH); fnode = (GtFeatureNode*) node; gt_feature_node_set_source(fnode, source); for (idx = 0; !had_err && idx < condenseq->udb_nelems; ++idx) { GtCondenseqUnique uq = condenseq->uniques[idx]; if (seqend <= uq.orig_startpos) { const char *desc; gt_genome_node_delete(node); seqnum = gt_condenseq_pos2seqnum(condenseq, uq.orig_startpos); seqstart = gt_condenseq_seqstartpos(condenseq, seqnum); seqend = seqstart + condenseq_seqlength_help(condenseq, seqnum, seqstart); desc = gt_condenseq_description(condenseq, &desclen, seqnum); gt_str_reset(seqid); gt_str_append_cstr_nt(seqid, desc, desclen); node = gt_feature_node_new(seqid, "experimental_feature", (GtUword) 1, (GtUword) 1, GT_STRAND_BOTH); fnode = (GtFeatureNode*) node; gt_feature_node_set_source(fnode, source); } gt_str_set_length(name, name_len); gt_str_append_uword(name, idx); gt_str_set_length(id, (GtUword) 1); gt_str_append_uword(id, idx); gt_feature_node_set_attribute(fnode, "Name", gt_str_get(name)); gt_feature_node_set_attribute(fnode, "ID", gt_str_get(id)); /* 1 Based coordinates! */ range.start = uq.orig_startpos + 1 - seqstart; range.end = uq.orig_startpos + uq.len - seqstart; gt_genome_node_set_range(node, &range); had_err = gt_genome_node_accept(node, nodev, err); } gt_str_reset(name); gt_str_append_cstr(name, "link"); gt_str_reset(id); gt_str_append_cstr(id, "L"); name_len = gt_str_length(name); seqend = 0; for (idx = 0; !had_err && idx < condenseq->ldb_nelems; ++idx) { GtCondenseqLink link = condenseq->links[idx]; if (seqend <= link.orig_startpos) { const char *desc; gt_genome_node_delete(node); seqnum = gt_condenseq_pos2seqnum(condenseq, link.orig_startpos); seqstart = gt_condenseq_seqstartpos(condenseq, seqnum); seqend = seqstart + condenseq_seqlength_help(condenseq, seqnum, seqstart); desc = gt_condenseq_description(condenseq, &desclen, seqnum); gt_str_reset(seqid); gt_str_append_cstr_nt(seqid, desc, desclen); node = gt_feature_node_new(seqid, "experimental_feature", (GtUword) 1, (GtUword) 1, GT_STRAND_BOTH); fnode = (GtFeatureNode*) node; gt_feature_node_set_source(fnode, source); } gt_str_set_length(name, name_len); gt_str_append_uword(name, idx); gt_str_set_length(id, (GtUword) 1); gt_str_append_uword(id, idx); gt_feature_node_set_attribute(fnode, "Name", gt_str_get(name)); gt_feature_node_set_attribute(fnode, "ID", gt_str_get(id)); gt_str_set_length(parent_unique, (GtUword) 1); gt_str_append_uword(parent_unique, link.unique_id); gt_feature_node_set_attribute(fnode, "Derives_from", gt_str_get(parent_unique)); /* 1 Based coordinates! */ range.start = link.orig_startpos + 1 - seqstart; range.end = link.orig_startpos + link.len - seqstart; gt_genome_node_set_range(node, &range); had_err = gt_genome_node_accept(node, nodev, err); } gt_file_delete(outfile); gt_genome_node_delete(node); gt_node_visitor_delete(nodev); gt_str_delete(filename); gt_str_delete(id); gt_str_delete(name); gt_str_delete(parent_unique); gt_str_delete(seqid); gt_str_delete(source); return had_err; }
static int bioseq_fill(GtBioseq *bs, bool recreate, GtError *err) { GtStr *bioseq_index_file = NULL, *bioseq_ois_file = NULL, *bioseq_sds_file = NULL, *bioseq_md5_file = NULL, *bioseq_des_file = NULL; int had_err = 0; GtStr *bioseq_basename; gt_assert(!bs->encseq); if (bs->use_stdin) bioseq_basename = gt_str_new_cstr("stdin"); else bioseq_basename = bs->sequence_file; /* construct file names */ bioseq_index_file = gt_str_clone(bioseq_basename); gt_str_append_cstr(bioseq_index_file, GT_ENCSEQFILESUFFIX); bioseq_ois_file = gt_str_clone(bioseq_basename); gt_str_append_cstr(bioseq_ois_file, GT_OISTABFILESUFFIX); bioseq_sds_file = gt_str_clone(bioseq_basename); gt_str_append_cstr(bioseq_sds_file, GT_SDSTABFILESUFFIX); bioseq_md5_file = gt_str_clone(bioseq_basename); gt_str_append_cstr(bioseq_md5_file, GT_MD5TABFILESUFFIX); bioseq_des_file = gt_str_clone(bioseq_basename); gt_str_append_cstr(bioseq_des_file, GT_DESTABFILESUFFIX); /* construct the bioseq files if necessary */ if (recreate || bs->use_stdin || !gt_file_exists(gt_str_get(bioseq_index_file)) || !gt_file_exists(gt_str_get(bioseq_ois_file)) || !gt_file_exists(gt_str_get(bioseq_sds_file)) || !gt_file_exists(gt_str_get(bioseq_md5_file)) || !gt_file_exists(gt_str_get(bioseq_des_file)) || gt_file_is_newer(gt_str_get(bs->sequence_file), gt_str_get(bioseq_index_file))) { had_err = construct_bioseq_files(bs, bioseq_basename, err); } if (!had_err) { GtEncseqLoader *el = gt_encseq_loader_new(); gt_encseq_loader_disable_autosupport(el); gt_encseq_loader_require_lossless_support(el); gt_encseq_loader_require_description_support(el); gt_encseq_loader_require_md5_support(el); gt_encseq_loader_require_multiseq_support(el); bs->encseq = gt_encseq_loader_load(el, gt_str_get(bioseq_basename), err); if (bs->encseq == NULL) { had_err = -1; gt_assert(gt_error_is_set(err)); } gt_encseq_loader_delete(el); } if (!had_err) { gt_assert(bs->encseq); } /* free */ if (bs->use_stdin) gt_str_delete(bioseq_basename); gt_str_delete(bioseq_index_file); gt_str_delete(bioseq_ois_file); gt_str_delete(bioseq_md5_file); gt_str_delete(bioseq_sds_file); gt_str_delete(bioseq_des_file); return had_err; }
static int gt_sketch_page_runner(GT_UNUSED int argc, const char **argv, int parsed_args, void *tool_arguments, GtError *err) { SketchPageArguments *arguments = tool_arguments; int had_err = 0; GtFeatureIndex *features = NULL; GtRange qry_range, sequence_region_range; GtStyle *sty = NULL; GtStr *prog, *gt_style_file; GtDiagram *d = NULL; GtLayout *l = NULL; GtBioseq *bioseq = NULL; GtCanvas *canvas = NULL; const char *seqid = NULL, *outfile; unsigned long start, height, num_pages = 0; double offsetpos, usable_height; cairo_surface_t *surf = NULL; cairo_t *cr = NULL; GtTextWidthCalculator *twc; gt_error_check(err); features = gt_feature_index_memory_new(); if (cairo_version() < CAIRO_VERSION_ENCODE(1, 8, 6)) gt_warning("Your cairo library (version %s) is older than version 1.8.6! " "These versions contain a bug which may result in " "corrupted PDF output!", cairo_version_string()); /* get style */ sty = gt_style_new(err); if (gt_str_length(arguments->stylefile) == 0) { prog = gt_str_new(); gt_str_append_cstr_nt(prog, argv[0], gt_cstr_length_up_to_char(argv[0], ' ')); gt_style_file = gt_get_gtdata_path(gt_str_get(prog), err); gt_str_delete(prog); gt_str_append_cstr(gt_style_file, "/sketch/default.style"); } else { gt_style_file = gt_str_ref(arguments->stylefile); } had_err = gt_style_load_file(sty, gt_str_get(gt_style_file), err); outfile = argv[parsed_args]; if (!had_err) { /* get features */ had_err = gt_feature_index_add_gff3file(features, argv[parsed_args+1], err); if (!had_err && gt_str_length(arguments->seqid) == 0) { seqid = gt_feature_index_get_first_seqid(features); if (seqid == NULL) { gt_error_set(err, "GFF input file must contain a sequence region!"); had_err = -1; } } else if (!had_err && !gt_feature_index_has_seqid(features, gt_str_get(arguments->seqid))) { gt_error_set(err, "sequence region '%s' does not exist in GFF input file", gt_str_get(arguments->seqid)); had_err = -1; } else if (!had_err) seqid = gt_str_get(arguments->seqid); } /* set text */ if (gt_str_length(arguments->text) == 0) { gt_str_delete(arguments->text); arguments->text = gt_str_new_cstr(argv[parsed_args+1]); } if (!had_err) { /* set display range */ gt_feature_index_get_range_for_seqid(features, &sequence_region_range, seqid); qry_range.start = (arguments->range.start == GT_UNDEF_ULONG ? sequence_region_range.start : arguments->range.start); qry_range.end = (arguments->range.end == GT_UNDEF_ULONG ? sequence_region_range.end : arguments->range.end); /* set output format */ if (strcmp(gt_str_get(arguments->format), "pdf") == 0) { surf = cairo_pdf_surface_create(outfile, mm_to_pt(arguments->pwidth), mm_to_pt(arguments->pheight)); } else if (strcmp(gt_str_get(arguments->format), "ps") == 0) { surf = cairo_ps_surface_create(outfile, mm_to_pt(arguments->pwidth), mm_to_pt(arguments->pheight)); } gt_log_log("created page with %.2f:%.2f dimensions\n", mm_to_pt(arguments->pwidth), mm_to_pt(arguments->pheight)); offsetpos = TEXT_SPACER + arguments->theight + TEXT_SPACER; usable_height = mm_to_pt(arguments->pheight) - arguments->theight - arguments->theight - 4*TEXT_SPACER; if (gt_str_length(arguments->seqfile) > 0) { bioseq = gt_bioseq_new(gt_str_get(arguments->seqfile), err); } cr = cairo_create(surf); cairo_set_font_size(cr, 8); twc = gt_text_width_calculator_cairo_new(cr, sty); for (start = qry_range.start; start <= qry_range.end; start += arguments->width) { GtRange single_range; GtCustomTrack *ct = NULL; const char *seq; single_range.start = start; single_range.end = start + arguments->width; if (had_err) break; d = gt_diagram_new(features, seqid, &single_range, sty, err); if (!d) { had_err = -1; break; } if (bioseq) { seq = gt_bioseq_get_sequence(bioseq, 0); ct = gt_custom_track_gc_content_new(seq, gt_bioseq_get_sequence_length(bioseq, 0), 800, 70, 0.4, true); gt_diagram_add_custom_track(d, ct); } l = gt_layout_new_with_twc(d, mm_to_pt(arguments->width), sty, twc, err); had_err = gt_layout_get_height(l, &height, err); if (!had_err) { if (gt_double_smaller_double(usable_height - 10 - 2*TEXT_SPACER - arguments->theight, offsetpos + height)) { draw_header(cr, gt_str_get(arguments->text), argv[parsed_args+1], seqid, num_pages, mm_to_pt(arguments->pwidth), mm_to_pt(arguments->pheight), arguments->theight); cairo_show_page(cr); offsetpos = TEXT_SPACER + arguments->theight + TEXT_SPACER; num_pages++; } canvas = gt_canvas_cairo_context_new(sty, cr, offsetpos, mm_to_pt(arguments->pwidth), height, NULL, err); if (!canvas) had_err = -1; offsetpos += height; if (!had_err) had_err = gt_layout_sketch(l, canvas, err); } gt_canvas_delete(canvas); gt_layout_delete(l); gt_diagram_delete(d); if (ct) gt_custom_track_delete(ct); } draw_header(cr, gt_str_get(arguments->text), argv[parsed_args+1], seqid, num_pages, mm_to_pt(arguments->pwidth), mm_to_pt(arguments->pheight), arguments->theight); cairo_show_page(cr); num_pages++; gt_log_log("finished, should be %lu pages\n", num_pages); gt_text_width_calculator_delete(twc); cairo_destroy(cr); cairo_surface_flush(surf); cairo_surface_finish(surf); cairo_surface_destroy(surf); cairo_debug_reset_static_data(); if (bioseq) gt_bioseq_delete(bioseq); gt_style_delete(sty); gt_str_delete(gt_style_file); gt_feature_index_delete(features); } return had_err; }
static int gt_compreads_compress_arguments_check(GT_UNUSED int rest_argc, void *tool_arguments, GtError *err) { int had_err = 0; GtCsrHcrEncodeArguments *arguments = tool_arguments; GtSplitter *splitter = NULL; GtStr *buffer; gt_error_check(err); gt_assert(arguments); if (gt_str_array_size(arguments->files) == 0) { gt_error_set(err, "option \"-files\" is mandatory and requires" " at least one filename as argument!"); had_err = -1; } if (!had_err) { if (gt_str_length(arguments->name) == 0) { if (gt_str_array_size(arguments->files) > 1UL) { gt_error_set(err, "option \"-name\" needs to be specified" " if more than one file is given"); had_err = -1; } else { GtUword i; char *basename; splitter = gt_splitter_new(); basename = gt_basename(gt_str_array_get(arguments->files, 0)); buffer = gt_str_new_cstr(basename); gt_splitter_split(splitter, gt_str_get(buffer), gt_str_length(buffer), '.'); for (i = 0; i < gt_splitter_size(splitter) - 1; i++) { gt_str_append_cstr(arguments->name, gt_splitter_get_token(splitter, i)); if (i < gt_splitter_size(splitter) - 2) gt_str_append_char(arguments->name, '.'); } gt_free(basename); gt_splitter_delete(splitter); gt_str_delete(buffer); } } } if (!had_err) { char *sampling_type = gt_str_get(arguments->method); static const char *methods[] = { "page", "regular", "none" }; if (!strcmp(methods[0], sampling_type)) { arguments->pagewise = true; if (arguments->srate == GT_UNDEF_UWORD) arguments->srate = GT_SAMPLING_DEFAULT_PAGE_RATE; else if (arguments->srate == 0) { gt_error_set(err, "page sampling was chosen, but sampling" " rate was set to "GT_WU"! this seems wrong.", arguments->srate); had_err = -1; } } else if (!strcmp(methods[1], sampling_type)) { arguments->regular = true; if (arguments->srate == GT_UNDEF_UWORD) arguments->srate = GT_SAMPLING_DEFAULT_REGULAR_RATE; else if (arguments->srate == 0) { gt_error_set(err, "regular sampling was chosen, but sampling rate " " was set to "GT_WU"! this seems wrong.", arguments->srate); had_err = -1; } } else if (!strcmp(methods[2], sampling_type)) { if (arguments->srate == GT_UNDEF_UWORD) arguments->srate = 0; else if (arguments->srate != 0) { gt_error_set(err, "no sampling was chosen, but sampling rate was" " set to "GT_WU"! this seems wrong.", arguments->srate); had_err = -1; } } else { gt_error_set(err, "somethings wrong with the stype option"); had_err = -1; } } if (!had_err) { if (arguments->arg_range.start != GT_UNDEF_UWORD) { if (arguments->arg_range.start <= (GtUword) UINT_MAX) { gt_safe_assign(arguments->qrng.start, arguments->arg_range.start); if (arguments->arg_range.end <= (GtUword) UINT_MAX) gt_safe_assign(arguments->qrng.end, arguments->arg_range.end); else had_err = -1; } else had_err = -1; } if (had_err) gt_error_set(err, "Range for qualities: value to large! larger than %u", UINT_MAX); } return had_err; }
static int snp_annotator_classify_snp(GtSNPAnnotatorVisitor *sav, GtFeatureNode *mRNA, GtFeatureNode *snp, GtUword variant_pos, GtUword variant_idx, char variant_char, #ifndef NDEBUG GT_UNUSED char reference_char, #endif GT_UNUSED GtError *err) { int had_err = 0; char *mrnaseq; const char *variant_effect = NULL; gt_assert(mRNA && snp && sav); gt_log_log("processing variant char %c for SNP %s\n", variant_char, gt_feature_node_get_attribute(snp, "Dbxref")); mrnaseq = gt_hashmap_get(sav->rnaseqs, mRNA); gt_assert(mrnaseq); if (mrnaseq) { char codon[3], variant_codon[3]; GtStr *effect_string; char oldamino, newamino; GT_UNUSED GtUword mrnalen; GtUword startpos = variant_pos / GT_CODON_LENGTH, variantoffset = variant_pos % GT_CODON_LENGTH; mrnalen = strlen(mrnaseq); gt_assert(variant_pos < mrnalen); variant_codon[0] = codon[0] = mrnaseq[3*startpos]; variant_codon[1] = codon[1] = mrnaseq[3*startpos+1]; variant_codon[2] = codon[2] = mrnaseq[3*startpos+2]; variant_codon[variantoffset] = variant_char; #ifndef NDEBUG gt_assert(toupper(codon[variantoffset]) == toupper(reference_char)); #endif if (gt_trans_table_is_stop_codon(sav->tt, codon[0], codon[1], codon[2])) { if (gt_trans_table_is_stop_codon(sav->tt, variant_codon[0], variant_codon[1], variant_codon[2])) { variant_effect = gt_symbol(GT_SNP_SYNONYMOUS_STOP_EFFECT); } else { variant_effect = gt_symbol(GT_SNP_STOP_LOST_EFFECT); } } else { if (gt_trans_table_is_stop_codon(sav->tt, variant_codon[0], variant_codon[1], variant_codon[2])) { variant_effect = gt_symbol(GT_SNP_NONSENSE_EFFECT); } else { had_err = gt_trans_table_translate_codon(sav->tt, codon[0], codon[1], codon[2], &oldamino, err); if (!had_err) { had_err = gt_trans_table_translate_codon(sav->tt, variant_codon[0], variant_codon[1], variant_codon[2], &newamino, err); } if (!had_err) { if (newamino == oldamino) { variant_effect = gt_symbol(GT_SNP_SYNONYMOUS_AMINO_EFFECT); } else { variant_effect = gt_symbol(GT_SNP_MISSENSE_EFFECT); } } } } if (!had_err) { const char *var_attrib; gt_assert(variant_effect != NULL); if ((var_attrib = gt_feature_node_get_attribute(snp, GT_GVF_VARIANT_EFFECT))) { effect_string = gt_str_new_cstr(var_attrib); gt_str_append_cstr(effect_string, ","); gt_str_append_cstr(effect_string, variant_effect); } else { effect_string = gt_str_new_cstr(variant_effect); } gt_str_append_cstr(effect_string, " "); gt_str_append_ulong(effect_string, variant_idx); gt_str_append_cstr(effect_string, " "); gt_str_append_cstr(effect_string, gt_feature_node_get_type(mRNA)); gt_str_append_cstr(effect_string, " "); gt_str_append_cstr(effect_string, gt_feature_node_get_attribute(mRNA, GT_GFF_ID)); gt_feature_node_set_attribute(snp, GT_GVF_VARIANT_EFFECT, gt_str_get(effect_string)); gt_str_reset(effect_string); gt_str_delete(effect_string); } } return had_err; }
int mg_curl(ParseStruct *parsestruct_ptr, GtUword hit_counter, GtError * err) { int had_err = 0, curl_errornr = 0; /* Laenge der aus dem XML-File stammenden Hit-DNA-Sequenz */ GtUword seq_len; GtWord numb_from = 0, numb_to = 0, numb_diff = 0; GtStr *seq_var, *http_adr; MemoryStruct memorystruct; /* char-Zeiger auf die HTTP-Adresse des cgi-Skriptes efetch von NCBI */ char *http_adr_ptr, *seq_pos; /* char-Zeiger, wird benutzt zum Auslesen der Sequenzinformation aus dem XML-File, welche Ergebnis der efetch-Anfrage ist */ const char *curlerror; /* Curl-Handle */ CURL *curl_handle; /* char-Zeiger auf die Daten ist NULL */ memorystruct.memory = NULL; /* noch keine Daten eingetragen bzw. abgespeichert */ memorystruct.size = 0; /* Zwischenspeicher fuer die Sequnezinformation, da die GtStrArray-Klasse keine Funktion zum begrenzten Einfuegen eines Strings zur Verfuegung stellt; setzen des ersten Teils der HTTP-Adresse */ seq_var = gt_str_new(); http_adr = gt_str_new_cstr ("http://www.ncbi.nlm.nih.gov/entrez/eutils/efetch.fcgi?db="); /* Check der Umgebungsvariablen */ gt_error_check(err); curl_global_init(CURL_GLOBAL_ALL); /* initialisieren der curl-session */ curl_handle = curl_easy_init(); /* Zusammensetzen der http-Adresse durch Anhaengen der query-GI-Nummer, des Hit-from, des Hit-to Wertes und des Rueckgabetyps an den ersten Teil der HTTP-Adresse */ gt_str_append_str(http_adr, ARGUMENTSSTRUCT(curl_fcgi_db)); gt_str_append_cstr(http_adr, "&id=gi|"); gt_str_append_str(http_adr, parsestruct_ptr->hit_gi_nr_tmp); gt_str_append_cstr(http_adr, "&seq_start="); gt_str_append_cstr(http_adr, gt_str_array_get(MATRIXSTRUCT(hit_from), hit_counter)); gt_str_append_cstr(http_adr, "&seq_stop="); gt_str_append_cstr(http_adr, gt_str_array_get(MATRIXSTRUCT(hit_to), hit_counter)); gt_str_append_cstr(http_adr, "&retmode=xml"); /* char-Zeiger wird benoetigt, da curl_easy_setopt als 3. Parameter einen char-Zeiger erwartet */ http_adr_ptr = gt_str_get(http_adr); /* festlegen, welche HTTP-Adresse aufgerufen werden soll */ curl_easy_setopt(curl_handle, CURLOPT_URL, http_adr_ptr); /* die empfangenen Daten werden an die Funktion WriteMemoryCallback gesendet, wo Speicherplatz reserviert und die Daten in diesen Speicherbereich kopiert werden */ curl_easy_setopt(curl_handle, CURLOPT_WRITEFUNCTION, WriteMemoryCallback); /* Die Daten werden in die Struktur eingetragen */ curl_easy_setopt(curl_handle, CURLOPT_WRITEDATA, (void *) &memorystruct); /* setzen des user-agent field, da einige Server diesen voraussetzen */ curl_easy_setopt(curl_handle, CURLOPT_USERAGENT, "libcurl-agent/1.0"); /* Anfrage wird ausgefuehrt */ curl_errornr = curl_easy_perform(curl_handle); curlerror = curl_easy_strerror(curl_errornr); if (curl_errornr) { gt_error_set(err, "an error occurred during curl-processing (error-code %d):\ \"%s\"", curl_errornr, curlerror); had_err = -1; } if (!had_err) { /* Die Hit-DNA steht zwischen dem <GBSeq_sequence> und dem </GBSeq_sequence> XML-Tag, Zeiger auf das < Zeichen von <GBSeq_sequence> */ seq_pos = strstr(memorystruct.memory, "<GBSeq_sequence>"); if (!seq_pos) { gt_error_set(err, "an error occurred while retrieving sequence-information\ with the following request: \"%s\"", http_adr_ptr); had_err = -1; }
static int layout_tracks(void *key, void *value, void *data, GtError *err) { unsigned long i, max = 50; GtTrack *track = NULL; GtLayoutTraverseInfo *lti = (GtLayoutTraverseInfo*) data; GtArray *list = (GtArray*) value; GtStr *gt_track_key; GtBlock *block; int had_err = 0; bool split = true; double tmp = 50; gt_assert(list); /* to get a deterministic layout, we sort the GtBlocks for each type */ if (lti->layout->block_ordering_func) { gt_array_sort_stable_with_data(list, blocklist_block_compare, lti->layout); } /* XXX: get first block for track property lookups, this should be reworked to allow arbitrary track keys! */ block = *(GtBlock**) gt_array_get(list, 0); gt_track_key = gt_str_new_cstr((char*) key); /* obtain default settings*/ if (gt_style_get_bool(lti->layout->style, "format", "split_lines", &split, NULL, err) == GT_STYLE_QUERY_ERROR) { had_err = 1; } if (!had_err) { if (gt_style_get_num(lti->layout->style, "format", "max_num_lines", &tmp, NULL, err) == GT_STYLE_QUERY_ERROR) { had_err = 1; } } /* obtain track-specific settings, should be changed to query arbitrary track keys! */ if (!had_err) { if (gt_style_get_bool(lti->layout->style, gt_block_get_type(block), "split_lines", &split, NULL, err) == GT_STYLE_QUERY_ERROR) { had_err = 1; } } if (!had_err) { if (gt_style_get_num(lti->layout->style, gt_block_get_type(block), "max_num_lines", &tmp, NULL, err) == GT_STYLE_QUERY_ERROR) { had_err = 1; } } if (!had_err) { max = (unsigned long) tmp; track = gt_track_new(gt_track_key, max, split, gt_line_breaker_captions_new(lti->layout, lti->layout->width, lti->layout->style)); lti->layout->nof_tracks++; for (i = 0; !had_err && i < gt_array_size(list); i++) { block = *(GtBlock**) gt_array_get(list, i); had_err = gt_track_insert_block(track, block, err); } } if (!had_err) { gt_hashmap_add(lti->layout->tracks, gt_cstr_dup(gt_str_get(gt_track_key)), track); } else { gt_track_delete(track); } gt_str_delete(gt_track_key); return had_err; }
static int hmmsearch_process_coarse_hits( char *table_filename, GtCondenseq *ces, GtCondenseqHmmsearchArguments *arguments, GtLogger *logger, GtError *err) { int had_err = 0; GtStr *line = gt_str_new(); FILE *table = NULL; GtSplitter *splitter = gt_splitter_new(); GtStr *query = gt_str_new(), *fine_fasta_filename = gt_str_new_cstr("condenseq"); GtRBTree *sequences = NULL; GtUword filecount = (GtUword) 1; unsigned int querycount = 0; const GtUword fine_fasta_name_length = gt_str_length(fine_fasta_filename); const GtUword table_name_length = gt_str_length(arguments->outtable_filename); table = gt_xfopen(table_filename, "r"); sequences = gt_rbtree_new(hmmsearch_cmp_seqnum, hmmsearch_tree_free_node, NULL); while (!had_err && gt_str_read_next_line(line, table) == 0) { char *c_line = gt_str_get(line); GtUword uid; const GtUword target_column = 0, query_column = (GtUword) 3; if (c_line[0] != '#') { gt_splitter_split_non_empty(splitter, c_line, gt_str_length(line), ' '); gt_assert(gt_splitter_size(splitter) == (GtUword) 23); if (sscanf(gt_splitter_get_token(splitter, target_column), GT_WU, &uid) != 1) { gt_error_set(err, "couldn't parse target number: %s", gt_splitter_get_token(splitter, target_column)); had_err = -1; } if (gt_str_length(query) == 0 || strcmp(gt_str_get(query), gt_splitter_get_token(splitter, query_column)) != 0) { gt_str_set(query, gt_splitter_get_token(splitter, query_column)); gt_logger_log(logger, "new query: %s", gt_str_get(query)); querycount++; } if (!had_err && querycount == arguments->max_queries) { hmmsearch_create_fine_fas(fine_fasta_filename, sequences, ces); if (table_name_length != 0) gt_str_append_uword(arguments->outtable_filename, filecount++); had_err = hmmsearch_call_fine_search(table_name_length != 0 ? arguments->outtable_filename : NULL, gt_str_get(fine_fasta_filename), gt_str_get(arguments->hmmsearch_path), gt_str_get(arguments->hmm), logger, err); gt_rbtree_clear(sequences); gt_str_set_length(fine_fasta_filename, fine_fasta_name_length); if (table_name_length != 0) gt_str_set_length(arguments->outtable_filename, table_name_length); querycount = 0; } if (!had_err) { if (gt_condenseq_each_redundant_seq(ces, uid, hmmsearch_process_seq, sequences, err) == 0) { had_err = -1; } } gt_splitter_reset(splitter); } gt_str_reset(line); } gt_splitter_delete(splitter); gt_str_delete(line); gt_str_delete(query); gt_xfclose(table); if (!had_err) { hmmsearch_create_fine_fas(fine_fasta_filename, sequences, ces); if (table_name_length != 0) gt_str_append_uword(arguments->outtable_filename, filecount++); had_err = hmmsearch_call_fine_search(table_name_length != 0 ? arguments->outtable_filename : NULL, gt_str_get(fine_fasta_filename), gt_str_get(arguments->hmmsearch_path), gt_str_get(arguments->hmm), logger, err); } gt_log_log("created " GT_WU " files", filecount); gt_rbtree_delete(sequences); gt_str_delete(fine_fasta_filename); return had_err; }
int gt_block_unit_test(GtError *err) { GtRange r1, r2, r_temp, b_range; GtStrand s; GtGenomeNode *gn1, *gn2; GtElement *e1, *e2; double height; GtBlock *b; GtStr *seqid, *caption1, *caption2; int had_err = 0; GtStyle *sty; GtError *testerr; gt_error_check(err); seqid = gt_str_new_cstr("seqid"); caption1 = gt_str_new_cstr("foo"); caption2 = gt_str_new_cstr("bar"); testerr = gt_error_new(); r1.start = 10UL; r1.end = 50UL; r2.start = 40UL; r2.end = 50UL; gn1 = gt_feature_node_new(seqid, gt_ft_gene, r1.start, r1.end, GT_STRAND_FORWARD); gn2 = gt_feature_node_new(seqid, gt_ft_exon, r2.start, r2.end, GT_STRAND_FORWARD); e1 = gt_element_new((GtFeatureNode*) gn1); e2 = gt_element_new((GtFeatureNode*) gn2); b = gt_block_new(); /* test gt_block_insert_elements */ gt_ensure((0UL == gt_block_get_size(b))); gt_block_insert_element(b, (GtFeatureNode*) gn1); gt_ensure((1UL == gt_block_get_size(b))); gt_block_insert_element(b, (GtFeatureNode*) gn2); gt_ensure((2UL == gt_block_get_size(b))); /* test gt_block_set_range & gt_block_get_range */ r_temp = gt_range_join(&r1, &r2); gt_block_set_range(b, r_temp); b_range = gt_block_get_range(b); gt_ensure((0 == gt_range_compare(&b_range, &r_temp))); gt_ensure((1 == gt_range_compare(&r2, &r_temp))); /* tests gt_block_set_caption & gt_block_get_caption */ gt_block_set_caption(b, caption1); gt_ensure((0 == gt_str_cmp(gt_block_get_caption(b), caption1))); gt_ensure((0 != gt_str_cmp(gt_block_get_caption(b), caption2))); /* tests gt_block_set_strand & gt_block_get_range */ s = gt_block_get_strand(b); gt_ensure((GT_STRAND_UNKNOWN == s)); gt_block_set_strand(b, GT_STRAND_FORWARD); s = gt_block_get_strand(b); gt_ensure((GT_STRAND_FORWARD == s)); /* test gt_block_get_max_height() */ sty = gt_style_new(err); gt_ensure(gt_block_get_max_height(b, &height, sty, err) == 0); gt_ensure(!gt_error_is_set(testerr)); gt_ensure(height == BAR_HEIGHT_DEFAULT); gt_style_set_num(sty, "exon", "bar_height", 42); gt_ensure(gt_block_get_max_height(b, &height, sty, err) == 0); gt_ensure(!gt_error_is_set(testerr)); gt_ensure(height == 42); gt_style_set_num(sty, "gene", "bar_height", 23); gt_ensure(gt_block_get_max_height(b, &height, sty, err) == 0); gt_ensure(!gt_error_is_set(testerr)); gt_ensure(height == 42); gt_style_unset(sty, "exon", "bar_height"); gt_ensure(gt_block_get_max_height(b, &height, sty, err) == 0); gt_ensure(!gt_error_is_set(testerr)); gt_ensure(height == 23); gt_str_delete(caption2); gt_str_delete(seqid); gt_element_delete(e1); gt_element_delete(e2); gt_block_delete(b); gt_style_delete(sty); gt_error_delete(testerr); gt_genome_node_delete(gn1); gt_genome_node_delete(gn2); return had_err; }
int gt_track_unit_test(GtError *err) { int had_err = 0; GtBlock *b[4]; GtRange r[4]; GtTrack *track; GtGenomeNode *parent[4], *gn[4]; GtStr *title; double height, tmp; GtStyle *sty; unsigned long i; GtLineBreaker *lb; double t_rest = 0, l_rest = 0; gt_error_check(err); title = gt_str_new_cstr("test"); r[0].start=100UL; r[0].end=1000UL; r[1].start=1001UL; r[1].end=1500UL; r[2].start=700UL; r[2].end=1200UL; r[3].start=10UL; r[3].end=200UL; for (i=0; i<4; i++) { parent[i] = gt_feature_node_new(title, gt_ft_gene, r[i].start, r[i].end, GT_STRAND_FORWARD); gn[i] = gt_feature_node_new(title, gt_ft_exon, r[i].start, r[i].end, GT_STRAND_FORWARD); gt_feature_node_add_child((GtFeatureNode*) parent[i], (GtFeatureNode*) gn[i]); gt_feature_node_add_attribute((GtFeatureNode*) parent[i], GT_GFF_NAME, "parent"); gt_feature_node_add_attribute((GtFeatureNode*) gn[i], GT_GFF_NAME, "child"); } for (i=0; i<4; i++) { b[i] = gt_block_new(); gt_block_set_range(b[i], r[i]); gt_block_insert_element(b[i], (GtFeatureNode*) parent[i]); gt_block_insert_element(b[i], (GtFeatureNode*) gn[i]); } lb = gt_line_breaker_bases_new(); sty = gt_style_new(err); if (gt_style_get_num(sty, "format", "track_caption_font_size", &tmp, NULL, err) == GT_STYLE_QUERY_NOT_SET) { tmp = TEXT_SIZE_DEFAULT; } t_rest += tmp; if (gt_style_get_num(sty, "format", "track_caption_space", &tmp, NULL, err) == GT_STYLE_QUERY_NOT_SET) { tmp = CAPTION_BAR_SPACE_DEFAULT; } t_rest += tmp; if (gt_style_get_num(sty, "format", "track_vspace", &tmp, NULL, err) == GT_STYLE_QUERY_NOT_SET) { tmp = TRACK_VSPACE_DEFAULT; } t_rest += tmp; if (gt_style_get_num(sty, "format", "bar_vspace", &l_rest, NULL, err) == GT_STYLE_QUERY_NOT_SET) { l_rest = BAR_VSPACE_DEFAULT; } track = gt_track_new(title, GT_UNDEF_ULONG, true, lb); gt_ensure(had_err, track); gt_ensure(had_err, gt_track_get_title(track) == title); gt_ensure(had_err, gt_track_get_number_of_lines(track) == 0); gt_ensure(had_err, gt_track_get_height(track, &height, sty, err) == 0); gt_ensure(had_err, height == t_rest); gt_ensure(had_err, !gt_error_is_set(err)); gt_ensure(had_err, gt_track_insert_block(track, b[0], err) == 0); gt_ensure(had_err, !gt_error_is_set(err)); gt_ensure(had_err, gt_track_get_number_of_lines(track) == 1); gt_ensure(had_err, gt_track_get_height(track, &height, sty, err) == 0); gt_ensure(had_err, height == t_rest + l_rest + BAR_HEIGHT_DEFAULT); gt_ensure(had_err, !gt_error_is_set(err)); gt_ensure(had_err, gt_track_insert_block(track, b[1], err) == 0); gt_ensure(had_err, !gt_error_is_set(err)); gt_ensure(had_err, gt_track_get_number_of_lines(track) == 1); gt_ensure(had_err, gt_track_get_height(track, &height, sty, err) == 0); gt_ensure(had_err, height == t_rest + l_rest + BAR_HEIGHT_DEFAULT); gt_ensure(had_err, !gt_error_is_set(err)); gt_ensure(had_err, gt_track_insert_block(track, b[2], err) == 0); gt_ensure(had_err, !gt_error_is_set(err)); gt_ensure(had_err, gt_track_get_number_of_lines(track) == 2); gt_ensure(had_err, gt_track_insert_block(track, b[3], err) == 0); gt_ensure(had_err, !gt_error_is_set(err)); gt_ensure(had_err, gt_track_get_number_of_lines(track) == 2); gt_ensure(had_err, gt_track_get_height(track, &height, sty, err) == 0); gt_ensure(had_err, height == t_rest + 2*(l_rest + BAR_HEIGHT_DEFAULT)); gt_ensure(had_err, !gt_error_is_set(err)); gt_style_set_num(sty, "exon", "bar_height", 42); gt_ensure(had_err, gt_track_get_height(track, &height, sty, err) == 0); gt_ensure(had_err, height == t_rest + 2*(l_rest+42)); gt_ensure(had_err, !gt_error_is_set(err)); gt_style_set_num(sty, "gene", "bar_height", 23); gt_ensure(had_err, gt_track_get_height(track, &height, sty, err) == 0); gt_ensure(had_err, height == t_rest + 2*(l_rest+42)); gt_ensure(had_err, !gt_error_is_set(err)); gt_style_unset(sty, "exon", "bar_height"); gt_ensure(had_err, gt_track_get_height(track, &height, sty, err) == 0); gt_ensure(had_err, height == t_rest + 2*(l_rest+23)); gt_ensure(had_err, !gt_error_is_set(err)); gt_style_unset(sty, "gene", "bar_height"); gt_style_set_num(sty, "format", "bar_height", 99); gt_ensure(had_err, gt_track_get_height(track, &height, sty, err) == 0); gt_ensure(had_err, height == t_rest + 2*(l_rest+99)); gt_ensure(had_err, !gt_error_is_set(err)); gt_ensure(had_err, gt_track_get_number_of_discarded_blocks(track) == 0); gt_track_delete(track); gt_str_delete(title); gt_style_delete(sty); for (i=0; i<4; i++) { gt_block_delete(b[i]); gt_genome_node_delete(parent[i]); } return had_err; }
static void make_sequence_region(GtHashmap *sequence_regions, GtStr *sequenceid, GthRegionFactory *srf, GthInput *input, GtUword filenum, GtUword seqnum) { GtUword offset_is_defined = false; GtRange range, descrange; GtGenomeNode *sr = NULL; gt_assert(sequence_regions && sequenceid && srf && input); if (gth_input_use_substring_spec(input)) { range.start = gth_input_genomic_substring_from(input); range.end = gth_input_genomic_substring_to(input); } else { range = gth_input_get_relative_genomic_range(input, filenum, seqnum); } if (srf->use_desc_ranges) { GtStr *description = gt_str_new(); gth_input_get_genomic_description(input, description, filenum, seqnum); if (!gt_parse_description_range(gt_str_get(description), &descrange)) offset_is_defined = true; gt_str_delete(description); } if (offset_is_defined) range = gt_range_offset(&range, descrange.start); else range = gt_range_offset(&range, 1); /* 1-based */ if (!gt_str_length(sequenceid) || (gt_cstr_table_get(srf->used_seqids, gt_str_get(sequenceid)) && !offset_is_defined)) { /* sequenceid is empty or exists already (and no offset has been parsed) -> make one up */ GtStr *seqid; char *base; base = gt_basename(gth_input_get_genomic_filename(input, filenum)); seqid = gt_str_new_cstr(base); gt_free(base); gt_str_append_char(seqid, '|'); gt_str_append_uword(seqid, seqnum + 1); /* 1-based */ seqid_store_add(srf->seqid_store, filenum, seqnum, seqid, GT_UNDEF_UWORD); gt_assert(!gt_cstr_table_get(srf->used_seqids, gt_str_get(seqid))); gt_cstr_table_add(srf->used_seqids, gt_str_get(seqid)); sr = gt_region_node_new(seqid_store_get(srf->seqid_store, filenum, seqnum), range.start, range.end); gt_hashmap_add(sequence_regions, (void*) gt_cstr_table_get(srf->used_seqids, gt_str_get(seqid)), sr); gt_str_delete(seqid); } else { /* sequenceid does not exists already (or an offset has been parsed) -> use this one */ if (!gt_cstr_table_get(srf->used_seqids, gt_str_get(sequenceid))) { /* no sequence region with this id exists -> create one */ gt_cstr_table_add(srf->used_seqids, gt_str_get(sequenceid)); seqid_store_add(srf->seqid_store, filenum, seqnum, sequenceid, offset_is_defined ? descrange.start : GT_UNDEF_UWORD); sr = gt_region_node_new(seqid_store_get(srf->seqid_store, filenum, seqnum), range.start, range.end); gt_hashmap_add(sequence_regions, (void*) gt_cstr_table_get(srf->used_seqids, gt_str_get(sequenceid)), sr); } else { GtRange prev_range, new_range; /* sequence region with this id exists already -> modify range */ sr = gt_hashmap_get(sequence_regions, gt_str_get(sequenceid)); gt_assert(sr); prev_range = gt_genome_node_get_range(sr); new_range = gt_range_join(&prev_range, &range); gt_genome_node_set_range(sr, &new_range); seqid_store_add(srf->seqid_store, filenum, seqnum, sequenceid, offset_is_defined ? descrange.start : GT_UNDEF_UWORD); } } gt_assert(sr); }