void gt_type_graph_add_stanza(GtTypeGraph *type_graph, const GtOBOStanza *stanza) { const char *id_value, *name_value; GtUword i, size; GtTypeNode *node; GtStr *buf; gt_assert(type_graph && stanza && !type_graph->ready); gt_assert(gt_obo_stanza_size(stanza, "id") == 1); gt_assert(gt_obo_stanza_size(stanza, "name") == 1); id_value = gt_symbol(gt_obo_stanza_get_value(stanza, "id", 0)); name_value = gt_symbol(gt_obo_stanza_get_value(stanza, "name", 0)); gt_assert(id_value); gt_assert(name_value); gt_assert(!gt_hashmap_get(type_graph->nodemap, id_value)); node = gt_type_node_new(gt_array_size(type_graph->nodes), id_value); gt_hashmap_add(type_graph->name2id, (char*) name_value, (char*) id_value); gt_hashmap_add(type_graph->id2name, (char*) id_value, (char*) name_value); gt_hashmap_add(type_graph->nodemap, (char*) id_value, node); gt_array_add(type_graph->nodes, node); buf = gt_str_new(); /* store is_a entries in node, if necessary */ if ((size = gt_obo_stanza_size(stanza, "is_a"))) { for (i = 0; i < size; i++) { const char *id = gt_obo_stanza_get_value(stanza, "is_a", i); gt_str_reset(buf); gt_str_append_cstr_nt(buf, id, strcspn(id, " \n")); gt_type_node_is_a_add(node, gt_symbol(gt_str_get(buf))); } } /* store part_of entries in node, if necessary */ if ((size = gt_obo_stanza_size(stanza, "relationship"))) { for (i = 0; i < size; i++) { const char *rel = gt_obo_stanza_get_value(stanza, "relationship", i); gt_str_reset(buf); /* match part_of */ if (!strncmp(rel, PART_OF, strlen(PART_OF))) { const char *part_of = rel + strlen(PART_OF) + 1; gt_str_append_cstr_nt(buf, part_of, strcspn(part_of, " \n")); gt_type_node_part_of_add(node, gt_symbol(gt_str_get(buf))); continue; } /* match member_of */ if (!strncmp(rel, MEMBER_OF, strlen(MEMBER_OF))) { const char *member_of = rel + strlen(MEMBER_OF) + 1; gt_str_append_cstr_nt(buf, member_of, strcspn(member_of, " \n")); gt_type_node_part_of_add(node, gt_symbol(gt_str_get(buf))); continue; } /* match integral_part_of */ if (!strncmp(rel, INTEGRAL_PART_OF, strlen(INTEGRAL_PART_OF))) { const char *integral_part_of = rel + strlen(INTEGRAL_PART_OF) + 1; gt_str_append_cstr_nt(buf, integral_part_of, strcspn(integral_part_of, " \n")); gt_type_node_part_of_add(node, gt_symbol(gt_str_get(buf))); } } } gt_str_delete(buf); }
static int gt_rdb_stmt_mysql_get_string(GtRDBStmt *st, GtUword field_no, GtStr *result, GtError *err) { GtRDBStmtMySQL *stm; int had_err = 0; gt_assert(st && result); gt_error_check(err); stm = gt_rdb_stmt_mysql_cast(st); CHECK_INIT_STATEMENT if (!had_err && stm->results[field_no].buffer_type != MYSQL_TYPE_STRING && stm->results[field_no].buffer_type != MYSQL_TYPE_VAR_STRING && stm->results[field_no].buffer_type != MYSQL_TYPE_BLOB && stm->results[field_no].buffer_type != MYSQL_TYPE_TINY_BLOB && stm->results[field_no].buffer_type != MYSQL_TYPE_MEDIUM_BLOB && stm->results[field_no].buffer_type != MYSQL_TYPE_LONG_BLOB && stm->results[field_no].buffer_type != MYSQL_TYPE_BIT) { gt_error_set(err, "incompatible type!"); had_err = -1; } if (!had_err) { gt_str_reset(result); gt_str_append_cstr_nt(result, (char*)stm->results[field_no].buffer, *stm->results[field_no].length); } return had_err; }
static cairo_status_t str_write_func(void *closure, const unsigned char *data, unsigned int length) { GtStr *stream = closure; gt_assert(stream); gt_str_append_cstr_nt(stream, (char*) data, length); return CAIRO_STATUS_SUCCESS; }
void gt_match_set_seqid2_nt(GtMatch *match, const char *seqid, GtUword len) { gt_assert(match && seqid); if (!match->seqid2) match->seqid2 = gt_str_new(); else gt_str_reset(match->seqid2); gt_str_append_cstr_nt(match->seqid2, seqid, len); }
void gt_file_dirname(GtStr *path, const char *file) { GtWord i; gt_str_reset(path); for (i = (GtWord) (strlen(file) - 1); i >= 0; i--) { if (file[i] == GT_PATH_SEPARATOR) break; } if (i > 0) gt_str_append_cstr_nt(path, file, (GtUword) i); }
void gt_splicedseq_add(Splicedseq *ss, unsigned long start, unsigned long end, const char *original_sequence) { unsigned long i; gt_assert(ss && start <= end && original_sequence); gt_str_append_cstr_nt(ss->splicedseq, original_sequence, end - start + 1); /* make sure elements are added in ascending order */ gt_assert(!gt_array_size(ss->positionmapping) || start > *(unsigned long*) gt_array_get_last(ss->positionmapping)); for (i = start; i <= end; i++) gt_array_add(ss->positionmapping, i); }
static int sequence_node_add_stream_next(GtNodeStream *ns, GtGenomeNode **gn, GtError *err) { GtSequenceNodeAddStream *s; int had_err; gt_error_check(err); s = gt_sequence_node_add_stream_cast(ns); /* stream nodes as long as we have some, record seen seqids */ if (!(had_err = gt_node_stream_next(s->in_stream, gn, err)) && *gn) { had_err = gt_genome_node_accept(*gn, s->collect_vis, err); } /* if there are no more */ if (!had_err && !*gn) { if (!s->seqids) { s->seqids = gt_cstr_table_get_all(s->seqid_table); } gt_assert(s->seqids); if (s->cur_seqid >= gt_str_array_size(s->seqids)) { *gn = NULL; return 0; } else { GtGenomeNode *new_sn; GtUword len; char *seq = NULL; GtStr *seqid = gt_str_new(), *seqstr = gt_str_new(); gt_str_append_cstr(seqid, gt_str_array_get(s->seqids, s->cur_seqid)); had_err = gt_region_mapping_get_sequence_length(s->rm, &len, seqid, err); if (!had_err) { had_err = gt_region_mapping_get_sequence(s->rm, &seq, seqid, 1, len, err); } if (!had_err) { gt_str_append_cstr_nt(seqstr, seq, len); new_sn = gt_sequence_node_new(gt_str_get(seqid), seqstr); *gn = new_sn; } s->cur_seqid++; gt_free(seq); gt_str_delete(seqid); gt_str_delete(seqstr); } } return had_err; }
static GtStr* get_xrf_path(GtError *err) { const char *progname; GtStr *xrf_path, *prog; gt_error_check(err); progname = gt_error_get_progname(err); gt_assert(progname != NULL); prog = gt_str_new(); gt_str_append_cstr_nt(prog, progname, gt_cstr_length_up_to_char(progname, ' ')); xrf_path = gt_get_gtdata_path(gt_str_get(prog), err); if (xrf_path) gt_str_append_cstr(xrf_path, "/xrf_abbr/"); gt_str_delete(prog); return xrf_path; }
GtAlphabet* gt_alphabet_new_from_file_no_suffix(const char *filename, GtError *err) { GtStr *transpath = NULL; bool haserr = false; GtAlphabet *alpha; gt_assert(filename); gt_error_check(err); alpha = gt_alphabet_new_empty(); if (!gt_file_exists(filename)) { GtStr *prog; const char *progname = gt_error_get_progname(err); gt_assert(progname != NULL); prog = gt_str_new(); gt_str_append_cstr_nt(prog, progname, gt_cstr_length_up_to_char(progname, ' ')); transpath = gt_get_gtdata_path(gt_str_get(prog), err); gt_str_delete(prog); gt_str_append_cstr(transpath, "/trans/"); gt_str_append_cstr(transpath, filename); } if (read_symbolmap(alpha, transpath == NULL ? filename : gt_str_get(transpath), err) != 0) { haserr = true; } gt_str_delete(transpath); if (alpha == NULL) { haserr = true; } if (haserr) { gt_alphabet_delete((GtAlphabet*) alpha); return NULL; } return alpha; }
static int gt_encseq_col_md5_to_description(GtSeqCol *sc, GtStr *desc, GtStr *md5_seqid, GtError *err) { GtUword seqnum = GT_UNDEF_UWORD; char seqid[GT_MD5_SEQID_HASH_LEN + 1]; int had_err = 0; GtEncseqCol *esc; esc = gt_encseq_col_cast(sc); gt_error_check(err); gt_assert(esc && desc && md5_seqid && err); gt_assert(gt_md5_seqid_has_prefix(gt_str_get(md5_seqid))); if (gt_str_length(md5_seqid) >= GT_MD5_SEQID_TOTAL_LEN) { const char *cstrseqid = gt_str_get(md5_seqid); if (cstrseqid[GT_MD5_SEQID_TOTAL_LEN-1] != GT_MD5_SEQID_SEPARATOR) { gt_error_set(err, "MD5 sequence id %s not terminated with '%c'", gt_str_get(md5_seqid), GT_MD5_SEQID_SEPARATOR); had_err = -1; } if (!had_err) { strncpy(seqid, cstrseqid + GT_MD5_SEQID_PREFIX_LEN, GT_MD5_SEQID_HASH_LEN); seqid[GT_MD5_SEQID_HASH_LEN] = '\0'; } } seqnum = gt_md5_tab_map(esc->md5_tab, seqid); if (seqnum != GT_UNDEF_UWORD) { const char *cdesc; GtUword desc_len; gt_assert(seqnum < gt_encseq_num_of_sequences(esc->encseq)); cdesc = gt_encseq_description(esc->encseq, &desc_len, seqnum); gt_str_append_cstr_nt(desc, cdesc, desc_len); } else { gt_error_set(err, "sequence %s not found", gt_str_get(md5_seqid)); had_err = -1; } return had_err; }
static int extract_join_feature(GtGenomeNode *gn, const char *type, GtRegionMapping *region_mapping, GtStr *sequence, bool *reverse_strand, bool *first_child_of_type_seen, GtPhase *phase, GtError *err) { char *outsequence; GtFeatureNode *fn; GtRange range; int had_err = 0; gt_error_check(err); fn = gt_feature_node_cast(gn); gt_assert(fn); if (gt_feature_node_has_type(fn, type)) { if (gt_feature_node_get_strand(fn) == GT_STRAND_REVERSE) { *reverse_strand = true; *phase = gt_feature_node_get_phase(fn); } else { if (!(*first_child_of_type_seen)) { *first_child_of_type_seen = true; *phase = gt_feature_node_get_phase(fn); } else *phase = GT_PHASE_UNDEFINED; } range = gt_genome_node_get_range(gn); had_err = gt_region_mapping_get_sequence(region_mapping, &outsequence, gt_genome_node_get_seqid(gn), range.start, range.end, err); if (!had_err) { gt_str_append_cstr_nt(sequence, outsequence, gt_range_length(&range)); gt_free(outsequence); } } return had_err; }
static int gt_sketch_runner(int argc, const char **argv, int parsed_args, void *tool_arguments, GT_UNUSED GtError *err) { GtSketchArguments *arguments = tool_arguments; GtNodeStream *in_stream = NULL, *add_introns_stream = NULL, *gff3_out_stream = NULL, *feature_stream = NULL, *sort_stream = NULL, *last_stream; GtFeatureIndex *features = NULL; const char *file; char *seqid = NULL; GtRange qry_range, sequence_region_range; GtArray *results = NULL; GtStyle *sty = NULL; GtStr *prog, *defaultstylefile = NULL; GtDiagram *d = NULL; GtLayout *l = NULL; GtImageInfo* ii = NULL; GtCanvas *canvas = NULL; GtUword height; bool has_seqid; int had_err = 0; gt_error_check(err); gt_assert(arguments); prog = gt_str_new(); gt_str_append_cstr_nt(prog, argv[0], gt_cstr_length_up_to_char(argv[0], ' ')); defaultstylefile = gt_get_gtdata_path(gt_str_get(prog), err); gt_str_delete(prog); if (!defaultstylefile) had_err = -1; if (!had_err) { gt_str_append_cstr(defaultstylefile, "/sketch/default.style"); } file = argv[parsed_args]; if (!had_err) { /* create feature index */ features = gt_feature_index_memory_new(); parsed_args++; /* create an input stream */ if (strcmp(gt_str_get(arguments->input), "gff") == 0) { in_stream = gt_gff3_in_stream_new_unsorted(argc - parsed_args, argv + parsed_args); if (arguments->verbose) gt_gff3_in_stream_show_progress_bar((GtGFF3InStream*) in_stream); } else if (strcmp(gt_str_get(arguments->input), "bed") == 0) { if (argc - parsed_args == 0) in_stream = gt_bed_in_stream_new(NULL); else in_stream = gt_bed_in_stream_new(argv[parsed_args]); } else if (strcmp(gt_str_get(arguments->input), "gtf") == 0) { if (argc - parsed_args == 0) in_stream = gt_gtf_in_stream_new(NULL); else in_stream = gt_gtf_in_stream_new(argv[parsed_args]); } last_stream = in_stream; /* create add introns stream if -addintrons was used */ if (arguments->addintrons) { sort_stream = gt_sort_stream_new(last_stream); add_introns_stream = gt_add_introns_stream_new(sort_stream); last_stream = add_introns_stream; } /* create gff3 output stream if -pipe was used */ if (arguments->pipe) { gff3_out_stream = gt_gff3_out_stream_new(last_stream, NULL); last_stream = gff3_out_stream; } /* create feature stream */ feature_stream = gt_feature_stream_new(last_stream, features); /* pull the features through the stream and free them afterwards */ had_err = gt_node_stream_pull(feature_stream, err); gt_node_stream_delete(feature_stream); gt_node_stream_delete(gff3_out_stream); gt_node_stream_delete(sort_stream); gt_node_stream_delete(add_introns_stream); gt_node_stream_delete(in_stream); } if (!had_err) { had_err = gt_feature_index_has_seqid(features, &has_seqid, gt_str_get(arguments->seqid), err); } /* if seqid is empty, take first one added to index */ if (!had_err && strcmp(gt_str_get(arguments->seqid),"") == 0) { seqid = gt_feature_index_get_first_seqid(features, err); if (seqid == NULL) { gt_error_set(err, "GFF input file must contain a sequence region!"); had_err = -1; } } else if (!had_err && !has_seqid) { gt_error_set(err, "sequence region '%s' does not exist in GFF input file", gt_str_get(arguments->seqid)); had_err = -1; } else if (!had_err) seqid = gt_str_get(arguments->seqid); results = gt_array_new(sizeof (GtGenomeNode*)); if (!had_err) { had_err = gt_feature_index_get_range_for_seqid(features, &sequence_region_range, seqid, err); } if (!had_err) { qry_range.start = (arguments->start == GT_UNDEF_UWORD ? sequence_region_range.start : arguments->start); qry_range.end = (arguments->end == GT_UNDEF_UWORD ? sequence_region_range.end : arguments->end); } if (!had_err) { if (arguments->verbose) fprintf(stderr, "# of results: "GT_WU"\n", gt_array_size(results)); /* find and load style file */ if (!(sty = gt_style_new(err))) had_err = -1; if (gt_str_length(arguments->stylefile) == 0) { gt_str_append_str(arguments->stylefile, defaultstylefile); } else { if (!had_err && gt_file_exists(gt_str_get(arguments->stylefile))) { if (arguments->unsafe) gt_style_unsafe_mode(sty); } else { had_err = -1; gt_error_set(err, "style file '%s' does not exist!", gt_str_get(arguments->stylefile)); } } if (!had_err) had_err = gt_style_load_file(sty, gt_str_get(arguments->stylefile), err); } if (!had_err) { /* create and write image file */ if (!(d = gt_diagram_new(features, seqid, &qry_range, sty, err))) had_err = -1; if (!had_err && arguments->flattenfiles) gt_diagram_set_track_selector_func(d, flattened_file_track_selector, NULL); if (had_err || !(l = gt_layout_new(d, arguments->width, sty, err))) had_err = -1; if (!had_err) had_err = gt_layout_get_height(l, &height, err); if (!had_err) { ii = gt_image_info_new(); if (strcmp(gt_str_get(arguments->format),"pdf")==0) { canvas = gt_canvas_cairo_file_new(sty, GT_GRAPHICS_PDF, arguments->width, height, ii, err); } else if (strcmp(gt_str_get(arguments->format),"ps")==0) { canvas = gt_canvas_cairo_file_new(sty, GT_GRAPHICS_PS, arguments->width, height, ii, err); } else if (strcmp(gt_str_get(arguments->format),"svg")==0) { canvas = gt_canvas_cairo_file_new(sty, GT_GRAPHICS_SVG, arguments->width, height, ii, err); } else { canvas = gt_canvas_cairo_file_new(sty, GT_GRAPHICS_PNG, arguments->width, height, ii, err); } if (!canvas) had_err = -1; if (!had_err) { had_err = gt_layout_sketch(l, canvas, err); } if (!had_err) { if (arguments->showrecmaps) { GtUword i; const GtRecMap *rm; for (i = 0; i < gt_image_info_num_of_rec_maps(ii) ;i++) { char buf[BUFSIZ]; rm = gt_image_info_get_rec_map(ii, i); (void) gt_rec_map_format_html_imagemap_coords(rm, buf, BUFSIZ); printf("%s, %s\n", buf, gt_feature_node_get_type(gt_rec_map_get_genome_feature(rm))); } } if (arguments->use_streams) { GtFile *outfile; GtStr *str = gt_str_new(); gt_canvas_cairo_file_to_stream((GtCanvasCairoFile*) canvas, str); outfile = gt_file_open(GT_FILE_MODE_UNCOMPRESSED, file, "w+", err); if (outfile) { gt_file_xwrite(outfile, gt_str_get_mem(str), gt_str_length(str)); gt_file_delete(outfile); } else { had_err = -1; } gt_str_delete(str); } else { had_err = gt_canvas_cairo_file_to_file((GtCanvasCairoFile*) canvas, file, err); } } } } /* free */ gt_free(seqid); gt_canvas_delete(canvas); gt_layout_delete(l); gt_image_info_delete(ii); gt_style_delete(sty); gt_diagram_delete(d); gt_array_delete(results); gt_str_delete(defaultstylefile); gt_feature_index_delete(features); return had_err; }
static int gt_extract_feature_sequence_generic(GtStr *sequence, GtGenomeNode *gn, const char *type, bool join, GtStr *seqid, GtStrArray *target_ids, unsigned int *out_phase_offset, GtRegionMapping *region_mapping, GtError *err) { GtFeatureNode *fn; GtRange range; unsigned int phase_offset = 0; char *outsequence; const char *target; int had_err = 0; gt_error_check(err); fn = gt_genome_node_cast(gt_feature_node_class(), gn); gt_assert(fn); if (seqid) gt_str_append_str(seqid, gt_genome_node_get_seqid(gn)); if (target_ids && (target = gt_feature_node_get_attribute(fn, GT_GFF_TARGET))) { had_err = gt_gff3_parser_parse_all_target_attributes(target, false, target_ids, NULL, NULL, "", 0, err); } if (!had_err) { if (join) { GtFeatureNodeIterator *fni; GtFeatureNode *child; bool reverse_strand = false, first_child = true, first_child_of_type_seen = false; GtPhase phase = GT_PHASE_UNDEFINED; /* in this case we have to traverse the children */ fni = gt_feature_node_iterator_new_direct(gt_feature_node_cast(gn)); while (!had_err && (child = gt_feature_node_iterator_next(fni))) { if (first_child) { if (target_ids && (target = gt_feature_node_get_attribute(child, GT_GFF_TARGET))) { gt_str_array_reset(target_ids); had_err = gt_gff3_parser_parse_all_target_attributes(target, false, target_ids, NULL, NULL, "", 0, err); } first_child = false; } if (!had_err) { if (extract_join_feature((GtGenomeNode*) child, type, region_mapping, sequence, &reverse_strand, &first_child_of_type_seen, &phase, err)) { had_err = -1; } if (phase != GT_PHASE_UNDEFINED) { phase_offset = (int) phase; } } } gt_feature_node_iterator_delete(fni); gt_assert(phase_offset <= (unsigned int) GT_PHASE_UNDEFINED); if (!had_err && gt_str_length(sequence)) { if (reverse_strand) { had_err = gt_reverse_complement(gt_str_get(sequence), gt_str_length(sequence), err); } } } else if (gt_feature_node_get_type(fn) == type) { GtPhase phase = gt_feature_node_get_phase(fn); gt_assert(!had_err); if (phase != GT_PHASE_UNDEFINED) phase_offset = (unsigned int) phase; /* otherwise we only have to look at this feature */ range = gt_genome_node_get_range(gn); gt_assert(range.start); /* 1-based coordinates */ had_err = gt_region_mapping_get_sequence(region_mapping, &outsequence, gt_genome_node_get_seqid(gn), range.start, range.end, err); if (!had_err) { gt_str_append_cstr_nt(sequence, outsequence, gt_range_length(&range)); gt_free(outsequence); if (gt_feature_node_get_strand(fn) == GT_STRAND_REVERSE) { had_err = gt_reverse_complement(gt_str_get(sequence), gt_str_length(sequence), err); } } } } if (out_phase_offset && phase_offset != GT_PHASE_UNDEFINED) { *out_phase_offset = phase_offset; } return had_err; }
static int gt_speck_runner(int argc, const char **argv, int parsed_args, void *tool_arguments, GtError *err) { GtNodeStream *gff3_in_stream = NULL, *checker_stream = NULL, *a_in_stream = NULL, *a_out_stream = NULL, *feature_stream = NULL, *sort_stream = NULL, *last_stream = NULL; GtNodeVisitor *spec_visitor = NULL; GtSpecResults *res = NULL; GtFeatureIndex *fi = NULL; GtTypeChecker *type_checker = NULL; GtTimer *t = NULL; GtRegionMapping *rm = NULL; GtArray *arr = gt_array_new(sizeof (GtFeatureNode*)); GtStr *prog, *speclib; SpeccheckArguments *arguments = tool_arguments; int had_err = 0; gt_error_check(err); res = gt_spec_results_new(); gt_assert(res); if (gt_file_exists(gt_str_get(arguments->format))) { speclib = gt_str_ref(arguments->format); } else { prog = gt_str_new(); gt_str_append_cstr_nt(prog, gt_error_get_progname(err), gt_cstr_length_up_to_char(gt_error_get_progname(err), ' ')); speclib = gt_get_gtdata_path(gt_str_get(prog), NULL); gt_str_delete(prog); gt_str_append_cstr(speclib, "/spec/output_drivers/"); gt_str_append_str(speclib, arguments->format); if (!gt_file_exists(gt_str_get(speclib))) { gt_error_set(err, "output driver file \"%s\" does not exist", gt_str_get(speclib)); had_err = -1; } } if (!had_err) { spec_visitor = gt_spec_visitor_new(gt_str_get(arguments->specfile), res, err); if (!spec_visitor) { gt_spec_results_delete(res); return -1; } } t = gt_timer_new(); gt_assert(t); /* add region mapping if given */ if (!had_err && gt_seqid2file_option_used(arguments->s2fi)) { rm = gt_seqid2file_region_mapping_new(arguments->s2fi, err); if (!rm) had_err = -1; if (!had_err) gt_spec_visitor_add_region_mapping((GtSpecVisitor*) spec_visitor, rm); } /* set type checker if necessary */ if (!had_err && gt_typecheck_info_option_used(arguments->tci)) { type_checker = gt_typecheck_info_create_type_checker(arguments->tci, err); if (!type_checker) had_err = -1; if (!had_err) gt_spec_visitor_add_type_checker((GtSpecVisitor*) spec_visitor, type_checker); } if (!had_err) { /* set runtime error behaviour */ if (arguments->fail_hard) gt_spec_visitor_fail_on_runtime_error((GtSpecVisitor*) spec_visitor); else gt_spec_visitor_report_runtime_errors((GtSpecVisitor*) spec_visitor); /* redirect warnings */ gt_warning_set_handler(gt_speck_record_warning, res); last_stream = gff3_in_stream = gt_gff3_in_stream_new_unsorted( argc - parsed_args, argv + parsed_args); gt_assert(gff3_in_stream); gt_gff3_in_stream_enable_tidy_mode((GtGFF3InStream*) gff3_in_stream); /* insert sort stream if requested */ if (arguments->sort) { last_stream = sort_stream = gt_sort_stream_new(last_stream); } /* if -provideindex is given, collect input features and index them first */ if (arguments->provideindex) { fi = gt_feature_index_memory_new(); gt_assert(fi); last_stream = feature_stream = gt_feature_stream_new(last_stream, fi); gt_assert(feature_stream); last_stream = a_out_stream = gt_array_out_stream_all_new(last_stream, arr, err); if (!a_out_stream) had_err = -1; gt_timer_start(t); if (!had_err) had_err = gt_node_stream_pull(last_stream, err); if (!had_err) { gt_spec_visitor_add_feature_index((GtSpecVisitor*) spec_visitor, gt_feature_index_ref(fi)); last_stream = a_in_stream = gt_array_in_stream_new(arr, NULL, err); if (!a_in_stream) had_err = -1; } } else { gt_timer_start(t); } if (!had_err) { checker_stream = gt_visitor_stream_new(last_stream, spec_visitor); gt_assert(checker_stream); } /* perform checking */ if (!had_err) had_err = gt_node_stream_pull(checker_stream, err); gt_timer_stop(t); /* reset warnings output */ gt_warning_set_handler(gt_warning_default_handler, NULL); /* output results */ if (!had_err) { GtStr *runtime = gt_str_new(); gt_timer_get_formatted(t, GT_WD ".%06ld", runtime); had_err = gt_spec_results_render_template(res, gt_str_get(speclib), arguments->outfp, gt_str_get(arguments->specfile), arguments->verbose, arguments->colored, gt_str_get(runtime), err); gt_str_delete(runtime); } } /* free */ gt_node_stream_delete(gff3_in_stream); gt_node_stream_delete(a_in_stream); gt_node_stream_delete(a_out_stream); gt_node_stream_delete(checker_stream); gt_node_stream_delete(feature_stream); gt_node_stream_delete(sort_stream); gt_spec_results_delete(res); gt_feature_index_delete(fi); gt_type_checker_delete(type_checker); gt_timer_delete(t); gt_array_delete(arr); gt_str_delete(speclib); return had_err; }
static int store_in_subset_file(void *data, GthSA *sa, const char *outputfilename, GtError *err) { Store_in_subset_file_data *store_in_subset_file_data = (Store_in_subset_file_data*) data; double split_determing_percentage = 0.0; unsigned long filenum; char filenamesuffix[4]; int had_err = 0; gt_error_check(err); /* filter before we do any further processing */ if (gth_sa_filter_filter_sa(store_in_subset_file_data->sa_filter, sa)) { /* and free it afterwards */ gth_sa_delete(sa); /* discard */ return 0; } /* check whether we got a new output file to process */ if (!store_in_subset_file_data->current_outputfilename) { store_in_subset_file_data->current_outputfilename = gt_cstr_dup(outputfilename); } else if (strcmp(store_in_subset_file_data->current_outputfilename, outputfilename)) { /* close current output files */ close_output_files(store_in_subset_file_data); gt_free(store_in_subset_file_data->current_outputfilename); } /* determine in which file the current sa needs to be put */ switch (store_in_subset_file_data->gthsplitinfo->splitmode) { case ALIGNMENTSCORE_SPLIT: split_determing_percentage = gth_sa_score(sa); strcpy(filenamesuffix, "scr"); break; case COVERAGE_SPLIT: split_determing_percentage = gth_sa_coverage(sa); strcpy(filenamesuffix, "cov"); break; default: gt_assert(0); } gt_assert(split_determing_percentage >= 0.0); /* XXX: change into an assertion when coverage problem is fixed */ if (split_determing_percentage > 1.0) split_determing_percentage = 1.0; if (split_determing_percentage == 1.0) filenum = store_in_subset_file_data->num_of_subset_files - 1; else { filenum = floor(split_determing_percentage * 100.0 / store_in_subset_file_data->gthsplitinfo->range); } gt_assert(filenum < store_in_subset_file_data->num_of_subset_files); /* make sure the file exists and is open */ if (!store_in_subset_file_data->subset_files[filenum]) { gt_assert(store_in_subset_file_data->subset_filenames[filenum] == NULL); store_in_subset_file_data->subset_filenames[filenum] = gt_str_new(); gt_str_append_cstr_nt(store_in_subset_file_data->subset_filenames[filenum], outputfilename, gt_file_basename_length(outputfilename)); gt_str_append_char(store_in_subset_file_data->subset_filenames[filenum], '.'); gt_str_append_cstr(store_in_subset_file_data->subset_filenames[filenum], filenamesuffix); gt_str_append_ulong(store_in_subset_file_data->subset_filenames[filenum], filenum * store_in_subset_file_data->gthsplitinfo->range); gt_str_append_char(store_in_subset_file_data->subset_filenames[filenum], '-'); gt_str_append_ulong(store_in_subset_file_data->subset_filenames[filenum], (filenum + 1) * store_in_subset_file_data->gthsplitinfo->range); gt_str_append_cstr(store_in_subset_file_data->subset_filenames[filenum], gt_file_mode_suffix(store_in_subset_file_data ->gthsplitinfo->file_mode)); /* if not disabled by -force, check if file already exists */ if (!store_in_subset_file_data->gthsplitinfo->force) { store_in_subset_file_data->subset_files[filenum] = gt_file_open(store_in_subset_file_data->gthsplitinfo->file_mode, gt_str_get(store_in_subset_file_data ->subset_filenames[filenum]), "r", NULL); if (store_in_subset_file_data->subset_files[filenum]) { gt_error_set(err, "file \"%s\" exists already. use option -%s to " "overwrite", gt_str_get(store_in_subset_file_data ->subset_filenames[filenum]), GT_FORCE_OPT_CSTR); had_err = -1; } } if (!had_err) { /* open split file for writing */ store_in_subset_file_data->subset_files[filenum] = gt_file_xopen_file_mode(store_in_subset_file_data->gthsplitinfo ->file_mode, gt_str_get(store_in_subset_file_data ->subset_filenames[filenum]), "w"); /* store XML header in file */ gth_xml_show_leader(true, store_in_subset_file_data->subset_files[filenum]); } } /* put it there */ if (!had_err) { gth_xml_inter_sa_visitor_set_outfp(store_in_subset_file_data->sa_visitor, store_in_subset_file_data ->subset_files[filenum]); gth_sa_visitor_visit_sa(store_in_subset_file_data->sa_visitor, sa); } /* adjust counter */ if (!had_err) store_in_subset_file_data->subset_file_sa_counter[filenum]++; /* and free it afterwards */ gth_sa_delete(sa); return had_err; }
int gt_region_mapping_get_description(GtRegionMapping *rm, GtStr *desc, GtStr *seqid, GtError *err) { int had_err = 0; gt_error_check(err); gt_assert(rm && desc && seqid); if (rm->userawseq) { gt_str_append_cstr(desc, "<rawseq>"); return 0; } had_err = update_seq_col_if_necessary(rm, seqid, err); if (!had_err) { if (gt_md5_seqid_has_prefix(gt_str_get(seqid))) { had_err = gt_seq_col_md5_to_description(rm->seq_col, desc, seqid, err); } return had_err; } if (!had_err) { if (rm->usedesc) { unsigned long filenum, seqnum; gt_assert(rm->seqid2seqnum_mapping); had_err = gt_seqid2seqnum_mapping_map(rm->seqid2seqnum_mapping, gt_str_get(seqid), NULL, &seqnum, &filenum, NULL, err); if (!had_err) { char *cdesc; cdesc = gt_seq_col_get_description(rm->seq_col, filenum, seqnum); gt_assert(cdesc); gt_str_append_cstr(desc, cdesc); gt_free(cdesc); } } else if (rm->useseqno) { unsigned long seqno = GT_UNDEF_ULONG; gt_assert(rm->encseq); if (1 != sscanf(gt_str_get(seqid), "seq%lu", &seqno)) { gt_error_set(err, "seqid '%s' does not have the form 'seqX' " "where X is a sequence number in the encoded " "sequence", gt_str_get(seqid)); had_err = -1; } gt_assert(had_err || seqno != GT_UNDEF_ULONG); if (!had_err && seqno >= gt_encseq_num_of_sequences(rm->encseq)) { gt_error_set(err, "trying to access sequence %lu, but encoded" "sequence contains only %lu sequences", seqno, gt_encseq_num_of_sequences(rm->encseq)); had_err = -1; } if (!had_err) { unsigned long desclen; const char *edesc; edesc = gt_encseq_description(rm->encseq, &desclen, seqno); gt_str_append_cstr_nt(desc, edesc, desclen); } } else if (rm->matchdesc) { const char *md5; /* XXX: not beautiful, but works -- this may be LOTS faster */ had_err = gt_seq_col_grep_desc_md5(rm->seq_col, &md5, seqid, err); if (!had_err) { GtStr *md5_seqid = gt_str_new_cstr(md5); had_err = gt_seq_col_md5_to_description(rm->seq_col, desc, md5_seqid, err); gt_str_delete(md5_seqid); } } else { if (!had_err) { char *cdesc; cdesc = gt_seq_col_get_description(rm->seq_col, 0, 0); gt_assert(cdesc); gt_str_append_cstr(desc, cdesc); gt_free(cdesc); } } } return had_err; }
static void character_data_handler(void *data, const XML_Char *string, int len) { Parseinfo *parseinfo = (Parseinfo*) data; /* add data to the data buffer */ gt_str_append_cstr_nt(parseinfo->databuf, string, len); }
static int gt_snp_annotator_visitor_prepare_gene(GtSNPAnnotatorVisitor *sav, GtError *err) { GtFeatureNodeIterator *fni, *mrnafni; GtFeatureNode *curnode, *last_mRNA = NULL; GtStr *mrnaseq, *seqid; int had_err = 0; mrnaseq = gt_str_new(); seqid = gt_genome_node_get_seqid((GtGenomeNode*) sav->gene); fni = gt_feature_node_iterator_new(sav->gene); while (!had_err && (curnode = gt_feature_node_iterator_next(fni))) { if (gt_feature_node_get_type(curnode) == sav->mRNA_type) { GtFeatureNode *curnode2; if (last_mRNA) { char *mrna_charseq = gt_calloc(gt_str_length(mrnaseq)+1, sizeof (char)); (void) strncpy(mrna_charseq, gt_str_get(mrnaseq), gt_str_length(mrnaseq)); if (gt_feature_node_get_strand(sav->gene) == GT_STRAND_REVERSE) { had_err = gt_reverse_complement(mrna_charseq, gt_str_length(mrnaseq), err); } if (!had_err) { gt_hashmap_add(sav->rnaseqs, last_mRNA, mrna_charseq); last_mRNA = curnode; gt_str_reset(mrnaseq); } } else last_mRNA = curnode; if (!had_err) { mrnafni = gt_feature_node_iterator_new(curnode); while (!had_err && (curnode2 = gt_feature_node_iterator_next(mrnafni))) { if (gt_feature_node_get_type(curnode2) == sav->CDS_type) { char *tmp; GtRange rng = gt_genome_node_get_range((GtGenomeNode*) curnode2); had_err = gt_region_mapping_get_sequence(sav->rmap, &tmp, seqid, rng.start, rng.end, err); if (!had_err) { gt_str_append_cstr_nt(mrnaseq, tmp, gt_range_length(&rng)); gt_free(tmp); } } } gt_feature_node_iterator_delete(mrnafni); } } } if (!had_err && last_mRNA) { char *mrna_charseq = gt_calloc(gt_str_length(mrnaseq)+1, sizeof (char)); (void) strncpy(mrna_charseq, gt_str_get(mrnaseq), gt_str_length(mrnaseq)); if (gt_feature_node_get_strand(sav->gene) == GT_STRAND_REVERSE) { had_err = gt_reverse_complement(mrna_charseq, gt_str_length(mrnaseq), err); } if (!had_err) { gt_hashmap_add(sav->rnaseqs, last_mRNA, mrna_charseq); } } gt_feature_node_iterator_delete(fni); gt_str_delete(mrnaseq); return had_err; }
static void end_element_handler(void *info, const XML_Char *name) { Parseinfo *parseinfo = (Parseinfo*) info; GthSA *sa = parseinfo->currentSA; GtUword datalength; double retdouble; GtWord ret; char *data; /* save data and data length */ data = gt_str_get(parseinfo->databuf); datalength = gt_str_length(parseinfo->databuf); /* perform actions depending on end tag */ if (strcmp(name, SPLICEDALIGNMENT_TAG) == 0) { /* before we store the spliced alignment we have to reverse its edit operations */ gt_assert(sa && gth_sa_backtrace_path(sa)); gth_backtrace_path_reverse(gth_sa_backtrace_path(sa)); /* ensure that before an intron which is not in phase the edit operation has length 1 (only for protein spliced alignments) */ gth_backtrace_path_ensure_length_1_before_introns( gth_sa_backtrace_path(sa)); if (parseinfo->saprocessfunc(parseinfo->data , sa, parseinfo->outputfilename, parseinfo->err)) { /* XXX */ fprintf(stderr, "error: %s\n", gt_error_get(parseinfo->err)); exit(EXIT_FAILURE); } /* reset current spliced alignment */ parseinfo->currentSA = NULL; } else if (strcmp(name, REFERENCEALPHATYPE_TAG) == 0) { if (strcmp(data, "DNA_ALPHA") == 0) gth_sa_set_alphatype(sa, DNA_ALPHA); else if (strcmp(data, "PROTEIN_ALPHA") == 0) { gth_sa_set_alphatype(sa, PROTEIN_ALPHA); } else { ILLEGAL_DATA; } } else if (strcmp(name, DNA_EOP_TYPE_TAG) == 0) { if (strcmp(data, "match") == 0) parseinfo->eoptype = EOP_TYPE_MATCH; else if (strcmp(data, "deletion") == 0) parseinfo->eoptype = EOP_TYPE_DELETION; else if (strcmp(data, "insertion") == 0) parseinfo->eoptype = EOP_TYPE_INSERTION; else if (strcmp(data, "mismatch") == 0) parseinfo->eoptype = EOP_TYPE_MISMATCH; else if (strcmp(data, "intron") == 0) parseinfo->eoptype = EOP_TYPE_INTRON; else { ILLEGAL_DATA; } } else if (strcmp(name, DNA_EOP_LENGTH_TAG) == 0) { SCANUINT; gth_backtrace_path_add_eop(gth_sa_backtrace_path(sa), parseinfo->eoptype, ret); } else if (strcmp(name, PROTEIN_EOP_TYPE_TAG) == 0) { if (strcmp(data, "match") == 0) parseinfo->eoptype = EOP_TYPE_MATCH; else if (strcmp(data, "deletion") == 0) parseinfo->eoptype = EOP_TYPE_DELETION; else if (strcmp(data, "insertion") == 0) parseinfo->eoptype = EOP_TYPE_INSERTION; else if (strcmp(data, "mismatch") == 0) parseinfo->eoptype = EOP_TYPE_MISMATCH; else if (strcmp(data, "intron") == 0) parseinfo->eoptype = EOP_TYPE_INTRON; else if (strcmp(data, "mismatch_with_1_gap") == 0) parseinfo->eoptype = EOP_TYPE_MISMATCH_WITH_1_GAP; else if (strcmp(data, "mismatch_with_2_gaps") == 0) parseinfo->eoptype = EOP_TYPE_MISMATCH_WITH_2_GAPS; else if (strcmp(data, "deletion_with_1_gap") == 0) parseinfo->eoptype = EOP_TYPE_DELETION_WITH_1_GAP; else if (strcmp(data, "deletion_with_2_gaps") == 0) parseinfo->eoptype = EOP_TYPE_DELETION_WITH_2_GAPS; else if (strcmp(data, "intron_with_1_base_left") == 0) parseinfo->eoptype = EOP_TYPE_INTRON_WITH_1_BASE_LEFT; else if (strcmp(data, "intron_with_2_bases_left") == 0) parseinfo->eoptype = EOP_TYPE_INTRON_WITH_2_BASES_LEFT; else { ILLEGAL_DATA; } } else if (strcmp(name, PROTEIN_EOP_LENGTH_TAG) == 0) { SCANUINT; gth_backtrace_path_add_eop(gth_sa_backtrace_path(sa), parseinfo->eoptype, ret); } else if (strcmp(name, INDELCOUNT_TAG) == 0) { SCANUINT; /* ignore indelcount, gets recomputed anyway */ } else if (strcmp(name, GENOMICLENGTHDP_TAG) == 0) { SCANUINT; gth_sa_set_gen_dp_length(sa, ret); } else if (strcmp(name, GENOMICLENGTHTOTAL_TAG) == 0) { SCANUINT; gth_sa_set_gen_total_length(sa, ret); } else if (strcmp(name, GENOMICOFFSET_TAG) == 0) { SCANUINT; gth_sa_set_gen_offset(sa, ret); } else if (strcmp(name, REFERENCELENGTH_TAG) == 0) { SCANUINT; gth_sa_set_ref_total_length(sa, ret); } else if (strcmp(name, DPSTARTPOS_TAG) == 0) { SCANUINT; gth_sa_set_gen_dp_start(sa, ret); } else if (strcmp(name, DPENDPOS_TAG) == 0) { SCANUINT; /* ignore DP end pos, gets recomputed from gen_dp_length anyway */ gt_assert(gth_sa_gen_dp_end(sa) == ret); } else if (strcmp(name, GENOMICFILENAME_TAG) == 0) { /* save genomic file name */ gt_str_append_cstr_nt(parseinfo->genomicfilename, data, datalength); } else if (strcmp(name, GENOMICFILEHASH_TAG) == 0) { gth_sa_set_gen_file_num(sa, process_file(parseinfo->input, gt_str_get(parseinfo->genomicfilename), data, false, UNDEF_ALPHA)); /* reset genomic filename */ gt_str_reset(parseinfo->genomicfilename); } else if (strcmp(name, GENOMICSEQNUM_TAG) == 0) { SCANUINT; gth_sa_set_gen_seq_num(sa, ret); } else if (strcmp(name, REFERENCEFILENAME_TAG) == 0) { /* save reference file name */ gt_str_append_cstr_nt(parseinfo->referencefilename, data, datalength); } else if (strcmp(name, REFERENCEFILEHASH_TAG) == 0) { gth_sa_set_ref_file_num(sa, process_file(parseinfo->input, gt_str_get(parseinfo->referencefilename), data, true, gth_sa_alphatype(sa))); /* reset reference filename */ gt_str_reset(parseinfo->referencefilename); } else if (strcmp(name, REFERENCESEQNUM_TAG) == 0) { SCANUINT; gth_sa_set_ref_seq_num(sa, ret); } else if (strcmp(name, GENOMICID_TAG) == 0) gth_sa_set_gen_id(sa, data); else if (strcmp(name, REFERENCEID_TAG) == 0) gth_sa_set_ref_id(sa, data); else if (strcmp(name, GENOMICSTRANDISFORWARD_TAG) == 0) gth_sa_set_gen_strand(sa, parse_boolean(data, parseinfo)); else if (strcmp(name, REFERENCESTRANDISFORWARD_TAG) == 0) gth_sa_set_ref_strand(sa, parse_boolean(data, parseinfo)); else if (strcmp(name, GENOMICCUTOFF_TAG) == 0) { SCANUINT; parseinfo->cutoffs.genomiccutoff = ret; } else if (strcmp(name, REFERENCECUTOFF_TAG) == 0) { SCANUINT; parseinfo->cutoffs.referencecutoff = ret; } else if (strcmp(name, EOPCUTOFF_TAG) == 0) { SCANUINT; parseinfo->cutoffs.eopcutoff = ret; } else if (strcmp(name, CUTOFFSSTART_TAG) == 0) gth_sa_set_cutoffs_start(sa, &parseinfo->cutoffs); else if (strcmp(name, CUTOFFSEND_TAG) == 0) gth_sa_set_cutoffs_end(sa, &parseinfo->cutoffs); else if (strcmp(name, LEFTGENOMICEXONBORDER_TAG) == 0) { SCANUINT; parseinfo->exoninfo.leftgenomicexonborder = ret; } else if (strcmp(name, RIGHTGENOMICEXONBORDER_TAG) == 0) { SCANUINT; parseinfo->exoninfo.rightgenomicexonborder = ret; } else if (strcmp(name, LEFTREFERENCEEXONBORDER_TAG) == 0) { SCANUINT; parseinfo->exoninfo.leftreferenceexonborder = ret; } else if (strcmp(name, RIGHTREFERENCEEXONBORDER_TAG) == 0) { SCANUINT; parseinfo->exoninfo.rightreferenceexonborder = ret; } else if (strcmp(name, EXONSCORE_TAG) == 0) { SCANDOUBLE; parseinfo->exoninfo.exonscore = retdouble; } else if (strcmp(name, EXONINFO_TAG) == 0) gth_sa_add_exon(sa, &parseinfo->exoninfo); else if (strcmp(name, DONORSITEPROBABILITY_TAG) == 0) { SCANDOUBLE; parseinfo->introninfo.donorsiteprobability = (GthFlt) retdouble; } else if (strcmp(name, ACCEPTORSITEPROBABILITY_TAG) == 0) { SCANDOUBLE; parseinfo->introninfo.acceptorsiteprobability = (GthFlt) retdouble; } else if (strcmp(name, DONORSITESCORE_TAG) == 0) { SCANDOUBLE; parseinfo->introninfo.donorsitescore = retdouble; } else if (strcmp(name, ACCEPTORSITESCORE_TAG) == 0) { SCANDOUBLE; parseinfo->introninfo.acceptorsitescore = retdouble; } else if (strcmp(name, INTRONINFO_TAG) == 0) gth_sa_add_intron(sa, &parseinfo->introninfo); else if (strcmp(name, POLYASTART_TAG) == 0) { SCANUINT; gth_sa_set_polyAtail_start(sa, ret); } else if (strcmp(name, POLYAEND_TAG) == 0) { SCANUINT; gth_sa_set_polyAtail_stop(sa, ret); } else if (strcmp(name, ALIGNMENTSCORE_TAG) == 0) { SCANDOUBLE; gth_sa_set_score(sa, retdouble); } else if (strcmp(name, COVERAGE_TAG) == 0) { SCANDOUBLE; gth_sa_set_coverage(sa, retdouble); } else if (strcmp(name, COVERAGEOFGENOMICSEGMENTISHIGHEST_TAG) == 0) { gth_sa_set_highest_cov(sa, parse_boolean(data, parseinfo)); } else if (strcmp(name, CUMULATIVELENGTHOFSCOREDEXONS_TAG) == 0) { SCANUINT; gth_sa_set_cumlen_scored_exons(sa, ret); } }
static int split_fasta_file(const char *filename, unsigned long max_filesize, bool force, GtError *err) { GtFile *srcfp = NULL, *destfp = NULL; GtStr *destfilename = NULL; unsigned long filenum = 0, bytecount = 0, separator_pos; int read_bytes, had_err = 0; char buf[BUFSIZ]; gt_error_check(err); gt_assert(filename && max_filesize); /* open source file */ srcfp = gt_file_xopen(filename, "r"); gt_assert(srcfp); /* read start characters */ if ((read_bytes = gt_file_xread(srcfp, buf, BUFSIZ)) == 0) { gt_error_set(err, "file \"%s\" is empty", filename); had_err = -1; } bytecount += read_bytes; /* make sure the file is in fasta format */ if (!had_err && buf[0] != '>') { gt_error_set(err, "file is not in FASTA format"); had_err = -1; } if (!had_err) { /* open destination file */ destfilename = gt_str_new(); gt_str_append_cstr_nt(destfilename, filename, gt_file_basename_length(filename)); gt_str_append_char(destfilename, '.'); gt_str_append_ulong(destfilename, ++filenum); gt_str_append_cstr(destfilename, gt_file_mode_suffix(gt_file_mode(srcfp))); if (!(destfp = gt_outputfile_xopen_forcecheck(gt_str_get(destfilename), "w", force, err))) { had_err = -1; } if (!had_err) gt_file_xwrite(destfp, buf, read_bytes); while (!had_err && (read_bytes = gt_file_xread(srcfp, buf, BUFSIZ)) != 0) { if (bytecount + read_bytes > max_filesize) { int offset = bytecount < max_filesize ? max_filesize - bytecount : 0; if ((separator_pos = buf_contains_separator(buf, offset, read_bytes))) { separator_pos--; gt_assert(separator_pos < read_bytes); if (separator_pos) gt_file_xwrite(destfp, buf, separator_pos); /* close current file */ gt_file_delete(destfp); /* open new file */ gt_str_reset(destfilename); gt_str_append_cstr_nt(destfilename, filename, gt_file_basename_length(filename)); gt_str_append_char(destfilename, '.'); gt_str_append_ulong(destfilename, ++filenum); gt_str_append_cstr(destfilename, gt_file_mode_suffix(gt_file_mode(srcfp))); if (!(destfp = gt_outputfile_xopen_forcecheck(gt_str_get(destfilename), "w", force, err))) { had_err = -1; break; } bytecount = read_bytes - separator_pos; /* reset */ gt_assert(buf[separator_pos] == '>'); gt_file_xwrite(destfp, buf + separator_pos, read_bytes - separator_pos); continue; } } bytecount += read_bytes; gt_file_xwrite(destfp, buf, read_bytes); } } /* free */ gt_str_delete(destfilename); /* close current file */ gt_file_delete(destfp); /* close source file */ gt_file_delete(srcfp); return had_err; }
int gt_condenseq_output_to_gff3(const GtCondenseq *condenseq, GtError *err) { int had_err = 0; GtUword idx, name_len, seqnum = 0, seqstart = 0, seqend = 0, desclen; GtStr *filename = NULL, *id = gt_str_new_cstr("U"), *name = gt_str_new_cstr("unique"), *parent_unique = gt_str_new_cstr("U"), *seqid = gt_str_new(), *source = gt_str_new_cstr("Condenseq"); GtFile *outfile = NULL; GtGFF3Visitor *gffv = NULL; GtNodeVisitor *nodev = NULL; GtFeatureNode *fnode = NULL; GtGenomeNode *node = NULL; GtRange range; gt_assert(condenseq != NULL); filename = gt_str_new_cstr(gt_condenseq_basefilename(condenseq)); name_len = gt_str_length(name); gt_str_append_cstr(filename, ".gff3"); outfile = gt_file_new(gt_str_get(filename), "w", err); nodev = gt_gff3_visitor_new(outfile); gffv = (GtGFF3Visitor *) nodev; gt_gff3_visitor_retain_id_attributes(gffv); node = gt_feature_node_new(seqid, "experimental_feature", (GtUword) 1, (GtUword) 1, GT_STRAND_BOTH); fnode = (GtFeatureNode*) node; gt_feature_node_set_source(fnode, source); for (idx = 0; !had_err && idx < condenseq->udb_nelems; ++idx) { GtCondenseqUnique uq = condenseq->uniques[idx]; if (seqend <= uq.orig_startpos) { const char *desc; gt_genome_node_delete(node); seqnum = gt_condenseq_pos2seqnum(condenseq, uq.orig_startpos); seqstart = gt_condenseq_seqstartpos(condenseq, seqnum); seqend = seqstart + condenseq_seqlength_help(condenseq, seqnum, seqstart); desc = gt_condenseq_description(condenseq, &desclen, seqnum); gt_str_reset(seqid); gt_str_append_cstr_nt(seqid, desc, desclen); node = gt_feature_node_new(seqid, "experimental_feature", (GtUword) 1, (GtUword) 1, GT_STRAND_BOTH); fnode = (GtFeatureNode*) node; gt_feature_node_set_source(fnode, source); } gt_str_set_length(name, name_len); gt_str_append_uword(name, idx); gt_str_set_length(id, (GtUword) 1); gt_str_append_uword(id, idx); gt_feature_node_set_attribute(fnode, "Name", gt_str_get(name)); gt_feature_node_set_attribute(fnode, "ID", gt_str_get(id)); /* 1 Based coordinates! */ range.start = uq.orig_startpos + 1 - seqstart; range.end = uq.orig_startpos + uq.len - seqstart; gt_genome_node_set_range(node, &range); had_err = gt_genome_node_accept(node, nodev, err); } gt_str_reset(name); gt_str_append_cstr(name, "link"); gt_str_reset(id); gt_str_append_cstr(id, "L"); name_len = gt_str_length(name); seqend = 0; for (idx = 0; !had_err && idx < condenseq->ldb_nelems; ++idx) { GtCondenseqLink link = condenseq->links[idx]; if (seqend <= link.orig_startpos) { const char *desc; gt_genome_node_delete(node); seqnum = gt_condenseq_pos2seqnum(condenseq, link.orig_startpos); seqstart = gt_condenseq_seqstartpos(condenseq, seqnum); seqend = seqstart + condenseq_seqlength_help(condenseq, seqnum, seqstart); desc = gt_condenseq_description(condenseq, &desclen, seqnum); gt_str_reset(seqid); gt_str_append_cstr_nt(seqid, desc, desclen); node = gt_feature_node_new(seqid, "experimental_feature", (GtUword) 1, (GtUword) 1, GT_STRAND_BOTH); fnode = (GtFeatureNode*) node; gt_feature_node_set_source(fnode, source); } gt_str_set_length(name, name_len); gt_str_append_uword(name, idx); gt_str_set_length(id, (GtUword) 1); gt_str_append_uword(id, idx); gt_feature_node_set_attribute(fnode, "Name", gt_str_get(name)); gt_feature_node_set_attribute(fnode, "ID", gt_str_get(id)); gt_str_set_length(parent_unique, (GtUword) 1); gt_str_append_uword(parent_unique, link.unique_id); gt_feature_node_set_attribute(fnode, "Derives_from", gt_str_get(parent_unique)); /* 1 Based coordinates! */ range.start = link.orig_startpos + 1 - seqstart; range.end = link.orig_startpos + link.len - seqstart; gt_genome_node_set_range(node, &range); had_err = gt_genome_node_accept(node, nodev, err); } gt_file_delete(outfile); gt_genome_node_delete(node); gt_node_visitor_delete(nodev); gt_str_delete(filename); gt_str_delete(id); gt_str_delete(name); gt_str_delete(parent_unique); gt_str_delete(seqid); gt_str_delete(source); return had_err; }
static int gt_sketch_page_runner(GT_UNUSED int argc, const char **argv, int parsed_args, void *tool_arguments, GtError *err) { SketchPageArguments *arguments = tool_arguments; int had_err = 0; GtFeatureIndex *features = NULL; GtRange qry_range, sequence_region_range; GtStyle *sty = NULL; GtStr *prog, *gt_style_file; GtDiagram *d = NULL; GtLayout *l = NULL; GtBioseq *bioseq = NULL; GtCanvas *canvas = NULL; const char *seqid = NULL, *outfile; unsigned long start, height, num_pages = 0; double offsetpos, usable_height; cairo_surface_t *surf = NULL; cairo_t *cr = NULL; GtTextWidthCalculator *twc; gt_error_check(err); features = gt_feature_index_memory_new(); if (cairo_version() < CAIRO_VERSION_ENCODE(1, 8, 6)) gt_warning("Your cairo library (version %s) is older than version 1.8.6! " "These versions contain a bug which may result in " "corrupted PDF output!", cairo_version_string()); /* get style */ sty = gt_style_new(err); if (gt_str_length(arguments->stylefile) == 0) { prog = gt_str_new(); gt_str_append_cstr_nt(prog, argv[0], gt_cstr_length_up_to_char(argv[0], ' ')); gt_style_file = gt_get_gtdata_path(gt_str_get(prog), err); gt_str_delete(prog); gt_str_append_cstr(gt_style_file, "/sketch/default.style"); } else { gt_style_file = gt_str_ref(arguments->stylefile); } had_err = gt_style_load_file(sty, gt_str_get(gt_style_file), err); outfile = argv[parsed_args]; if (!had_err) { /* get features */ had_err = gt_feature_index_add_gff3file(features, argv[parsed_args+1], err); if (!had_err && gt_str_length(arguments->seqid) == 0) { seqid = gt_feature_index_get_first_seqid(features); if (seqid == NULL) { gt_error_set(err, "GFF input file must contain a sequence region!"); had_err = -1; } } else if (!had_err && !gt_feature_index_has_seqid(features, gt_str_get(arguments->seqid))) { gt_error_set(err, "sequence region '%s' does not exist in GFF input file", gt_str_get(arguments->seqid)); had_err = -1; } else if (!had_err) seqid = gt_str_get(arguments->seqid); } /* set text */ if (gt_str_length(arguments->text) == 0) { gt_str_delete(arguments->text); arguments->text = gt_str_new_cstr(argv[parsed_args+1]); } if (!had_err) { /* set display range */ gt_feature_index_get_range_for_seqid(features, &sequence_region_range, seqid); qry_range.start = (arguments->range.start == GT_UNDEF_ULONG ? sequence_region_range.start : arguments->range.start); qry_range.end = (arguments->range.end == GT_UNDEF_ULONG ? sequence_region_range.end : arguments->range.end); /* set output format */ if (strcmp(gt_str_get(arguments->format), "pdf") == 0) { surf = cairo_pdf_surface_create(outfile, mm_to_pt(arguments->pwidth), mm_to_pt(arguments->pheight)); } else if (strcmp(gt_str_get(arguments->format), "ps") == 0) { surf = cairo_ps_surface_create(outfile, mm_to_pt(arguments->pwidth), mm_to_pt(arguments->pheight)); } gt_log_log("created page with %.2f:%.2f dimensions\n", mm_to_pt(arguments->pwidth), mm_to_pt(arguments->pheight)); offsetpos = TEXT_SPACER + arguments->theight + TEXT_SPACER; usable_height = mm_to_pt(arguments->pheight) - arguments->theight - arguments->theight - 4*TEXT_SPACER; if (gt_str_length(arguments->seqfile) > 0) { bioseq = gt_bioseq_new(gt_str_get(arguments->seqfile), err); } cr = cairo_create(surf); cairo_set_font_size(cr, 8); twc = gt_text_width_calculator_cairo_new(cr, sty); for (start = qry_range.start; start <= qry_range.end; start += arguments->width) { GtRange single_range; GtCustomTrack *ct = NULL; const char *seq; single_range.start = start; single_range.end = start + arguments->width; if (had_err) break; d = gt_diagram_new(features, seqid, &single_range, sty, err); if (!d) { had_err = -1; break; } if (bioseq) { seq = gt_bioseq_get_sequence(bioseq, 0); ct = gt_custom_track_gc_content_new(seq, gt_bioseq_get_sequence_length(bioseq, 0), 800, 70, 0.4, true); gt_diagram_add_custom_track(d, ct); } l = gt_layout_new_with_twc(d, mm_to_pt(arguments->width), sty, twc, err); had_err = gt_layout_get_height(l, &height, err); if (!had_err) { if (gt_double_smaller_double(usable_height - 10 - 2*TEXT_SPACER - arguments->theight, offsetpos + height)) { draw_header(cr, gt_str_get(arguments->text), argv[parsed_args+1], seqid, num_pages, mm_to_pt(arguments->pwidth), mm_to_pt(arguments->pheight), arguments->theight); cairo_show_page(cr); offsetpos = TEXT_SPACER + arguments->theight + TEXT_SPACER; num_pages++; } canvas = gt_canvas_cairo_context_new(sty, cr, offsetpos, mm_to_pt(arguments->pwidth), height, NULL, err); if (!canvas) had_err = -1; offsetpos += height; if (!had_err) had_err = gt_layout_sketch(l, canvas, err); } gt_canvas_delete(canvas); gt_layout_delete(l); gt_diagram_delete(d); if (ct) gt_custom_track_delete(ct); } draw_header(cr, gt_str_get(arguments->text), argv[parsed_args+1], seqid, num_pages, mm_to_pt(arguments->pwidth), mm_to_pt(arguments->pheight), arguments->theight); cairo_show_page(cr); num_pages++; gt_log_log("finished, should be %lu pages\n", num_pages); gt_text_width_calculator_delete(twc); cairo_destroy(cr); cairo_surface_flush(surf); cairo_surface_finish(surf); cairo_surface_destroy(surf); cairo_debug_reset_static_data(); if (bioseq) gt_bioseq_delete(bioseq); gt_style_delete(sty); gt_str_delete(gt_style_file); gt_feature_index_delete(features); } return had_err; }