static int extract_feature_visitor_feature_node(GtNodeVisitor *nv, GtFeatureNode *fn, GtError *err) { GtExtractFeatureVisitor *efv; GtFeatureNodeIterator *fni; GtFeatureNode *child; GtStrArray *target_ids = NULL; GtStr *seqid = NULL, *description, *sequence; int had_err = 0; gt_error_check(err); efv = gt_extract_feature_visitor_cast(nv); gt_assert(efv->region_mapping); fni = gt_feature_node_iterator_new(fn); if (efv->target) target_ids = gt_str_array_new(); if (efv->seqid) seqid = gt_str_new(); description = gt_str_new(); sequence = gt_str_new(); while (!had_err && (child = gt_feature_node_iterator_next(fni))) { if (seqid) gt_str_reset(seqid); if (target_ids) gt_str_array_reset(target_ids); if (gt_extract_feature_sequence(sequence, (GtGenomeNode*) child, efv->type, efv->join, seqid, target_ids, efv->region_mapping, err)) { had_err = -1; } if (!had_err && gt_str_length(sequence)) { efv->fastaseq_counter++; construct_description(description, efv->type, efv->fastaseq_counter, efv->join, efv->translate, seqid, target_ids); had_err = show_entry(description, sequence, efv->translate, efv->width, efv->outfp); gt_str_reset(description); gt_str_reset(sequence); } } gt_str_delete(sequence); gt_str_delete(description); gt_str_delete(seqid); gt_str_array_delete(target_ids); gt_feature_node_iterator_delete(fni); return had_err; }
static int update_seq_col_if_necessary(GtRegionMapping *rm, GtStr *seqid, GtError *err) { int had_err = 0; gt_error_check(err); gt_assert(rm && seqid); /* for mappings, we need to load the changed sequence, if needed... */ if (rm->mapping) { if (!rm->sequence_file || (gt_str_cmp(rm->sequence_name, seqid))) { gt_str_delete(rm->sequence_file); /* ignore MD5 hashes when using region mappings */ if (gt_md5_seqid_has_prefix(gt_str_get(seqid))) { rm->sequence_file = region_mapping_map(rm, gt_str_get(seqid) +GT_MD5_SEQID_TOTAL_LEN, err); } else rm->sequence_file = region_mapping_map(rm, gt_str_get(seqid), err); if (!rm->sequence_file) had_err = -1; else { /* load new seqcol */ if (!rm->sequence_filenames) rm->sequence_filenames = gt_str_array_new(); else gt_str_array_reset(rm->sequence_filenames); gt_str_array_add(rm->sequence_filenames, rm->sequence_file); if (!rm->sequence_name) rm->sequence_name = gt_str_new(); else gt_str_reset(rm->sequence_name); gt_str_append_str(rm->sequence_name, seqid); gt_seq_col_delete(rm->seq_col); rm->seq_col = gt_bioseq_col_new(rm->sequence_filenames, err); if (!rm->seq_col) had_err = -1; } } } else { /* ...otherwise, just make sure the seqcol is loaded */ if (!rm->seq_col) { if (rm->encseq) { if (!(rm->seq_col = gt_encseq_col_new(rm->encseq, err))) had_err = -1; } else { gt_assert(rm->sequence_filenames); if (!(rm->seq_col = gt_bioseq_col_new(rm->sequence_filenames, err))) had_err = -1; } } if (!had_err && rm->usedesc) { if (rm->seqid2seqnum_mapping) gt_seqid2seqnum_mapping_delete(rm->seqid2seqnum_mapping); rm->seqid2seqnum_mapping = gt_seqid2seqnum_mapping_new_seqcol(rm->seq_col, err); if (!rm->seqid2seqnum_mapping) { had_err = -1; } } } return had_err; }
static int gt_extract_feature_sequence_generic(GtStr *sequence, GtGenomeNode *gn, const char *type, bool join, GtStr *seqid, GtStrArray *target_ids, unsigned int *out_phase_offset, GtRegionMapping *region_mapping, GtError *err) { GtFeatureNode *fn; GtRange range; unsigned int phase_offset = 0; char *outsequence; const char *target; int had_err = 0; gt_error_check(err); fn = gt_genome_node_cast(gt_feature_node_class(), gn); gt_assert(fn); if (seqid) gt_str_append_str(seqid, gt_genome_node_get_seqid(gn)); if (target_ids && (target = gt_feature_node_get_attribute(fn, GT_GFF_TARGET))) { had_err = gt_gff3_parser_parse_all_target_attributes(target, false, target_ids, NULL, NULL, "", 0, err); } if (!had_err) { if (join) { GtFeatureNodeIterator *fni; GtFeatureNode *child; bool reverse_strand = false, first_child = true, first_child_of_type_seen = false; GtPhase phase = GT_PHASE_UNDEFINED; /* in this case we have to traverse the children */ fni = gt_feature_node_iterator_new_direct(gt_feature_node_cast(gn)); while (!had_err && (child = gt_feature_node_iterator_next(fni))) { if (first_child) { if (target_ids && (target = gt_feature_node_get_attribute(child, GT_GFF_TARGET))) { gt_str_array_reset(target_ids); had_err = gt_gff3_parser_parse_all_target_attributes(target, false, target_ids, NULL, NULL, "", 0, err); } first_child = false; } if (!had_err) { if (extract_join_feature((GtGenomeNode*) child, type, region_mapping, sequence, &reverse_strand, &first_child_of_type_seen, &phase, err)) { had_err = -1; } if (phase != GT_PHASE_UNDEFINED) { phase_offset = (int) phase; } } } gt_feature_node_iterator_delete(fni); gt_assert(phase_offset <= (unsigned int) GT_PHASE_UNDEFINED); if (!had_err && gt_str_length(sequence)) { if (reverse_strand) { had_err = gt_reverse_complement(gt_str_get(sequence), gt_str_length(sequence), err); } } } else if (gt_feature_node_get_type(fn) == type) { GtPhase phase = gt_feature_node_get_phase(fn); gt_assert(!had_err); if (phase != GT_PHASE_UNDEFINED) phase_offset = (unsigned int) phase; /* otherwise we only have to look at this feature */ range = gt_genome_node_get_range(gn); gt_assert(range.start); /* 1-based coordinates */ had_err = gt_region_mapping_get_sequence(region_mapping, &outsequence, gt_genome_node_get_seqid(gn), range.start, range.end, err); if (!had_err) { gt_str_append_cstr_nt(sequence, outsequence, gt_range_length(&range)); gt_free(outsequence); if (gt_feature_node_get_strand(fn) == GT_STRAND_REVERSE) { had_err = gt_reverse_complement(gt_str_get(sequence), gt_str_length(sequence), err); } } } } if (out_phase_offset && phase_offset != GT_PHASE_UNDEFINED) { *out_phase_offset = phase_offset; } return had_err; }