static void infer_cds_visitor_test_data(GtQueue *queue) { GtError *error = gt_error_new(); const char *file = "data/gff3/grape-codons.gff3"; GtNodeStream *gff3in = gt_gff3_in_stream_new_unsorted(1, &file); gt_gff3_in_stream_check_id_attributes((GtGFF3InStream *)gff3in); gt_gff3_in_stream_enable_tidy_mode((GtGFF3InStream *)gff3in); GtLogger *logger = gt_logger_new(true, "", stderr); GtNodeStream *icv_stream = agn_infer_cds_stream_new(gff3in, NULL, logger); GtArray *feats = gt_array_new( sizeof(GtFeatureNode *) ); GtNodeStream *arraystream = gt_array_out_stream_new(icv_stream, feats, error); int pullresult = gt_node_stream_pull(arraystream, error); if(pullresult == -1) { fprintf(stderr, "[AgnInferCDSVisitor::infer_cds_visitor_test_data] error " "processing features: %s\n", gt_error_get(error)); } gt_node_stream_delete(gff3in); gt_node_stream_delete(icv_stream); gt_node_stream_delete(arraystream); gt_logger_delete(logger); gt_array_sort(feats, (GtCompare)agn_genome_node_compare); gt_array_reverse(feats); while(gt_array_size(feats) > 0) { GtFeatureNode *fn = *(GtFeatureNode **)gt_array_pop(feats); gt_queue_add(queue, fn); } gt_array_delete(feats); gt_error_delete(error); }
int gt_splicedseq_reverse(Splicedseq *ss, GtError *err) { int had_err; gt_error_check(err); gt_assert(ss); had_err = gt_reverse_complement(gt_str_get(ss->splicedseq), gt_str_length(ss->splicedseq), err); if (!had_err) { gt_array_reverse(ss->positionmapping); ss->forward = !ss->forward; } return had_err; }
GtToolIterator* gt_tool_iterator_new(GtToolbox *toolbox) { GtToolIterator *ti; ToolIterationInfo tii; gt_assert(toolbox); ti = gt_malloc(sizeof *ti); ti->tool_stack = gt_array_new(sizeof (ToolEntry)); ti->prefixptr = NULL; ti->prefixsep = ' '; tii.arr = ti->tool_stack; tii.str = NULL; gt_toolbox_iterate(toolbox, add_tool_to_stack, &tii); gt_array_reverse(ti->tool_stack); /* alphabetical order */ return ti; }
static int feature_in_stream_next(GtNodeStream *ns, GtGenomeNode **gn, GtError *error) { GtFeatureInStream *stream = feature_in_stream_cast(ns); gt_error_check(error); if (!stream->init) { feature_in_stream_init(stream); stream->init = true; } if (gt_queue_size(stream->regioncache) > 0) { GtGenomeNode *region = gt_queue_get(stream->regioncache); *gn = region; return 0; } if (stream->featurecache == NULL || gt_array_size(stream->featurecache) == 0) { if (stream->featurecache != NULL) { gt_array_delete(stream->featurecache); stream->featurecache = NULL; } if (stream->seqindex == gt_str_array_size(stream->seqids)) { *gn = NULL; return 0; } const char *seqid = gt_str_array_get(stream->seqids, stream->seqindex++); stream->featurecache = gt_feature_index_get_features_for_seqid(stream->fi, seqid, error); gt_array_sort(stream->featurecache, (GtCompare)gt_genome_node_compare); gt_array_reverse(stream->featurecache); } GtGenomeNode *feat = *(GtGenomeNode **)gt_array_pop(stream->featurecache); *gn = gt_genome_node_ref(feat); return 0; }
bool gt_tool_iterator_next(GtToolIterator *tool_iterator, const char **name, GtTool **tool) { ToolIterationInfo tii; gt_assert(tool_iterator && name && tool); if (gt_array_size(tool_iterator->tool_stack)) { ToolEntry *entry = gt_array_pop(tool_iterator->tool_stack); *name = entry->name; *tool = entry->tool; if (tool_iterator->prefixptr) { gt_str_reset(tool_iterator->prefixptr); if (entry->prefix) { gt_str_append_str(tool_iterator->prefixptr, entry->prefix); gt_str_append_char(tool_iterator->prefixptr, tool_iterator->prefixsep); } } if (gt_tool_is_toolbox(entry->tool)) { GtToolbox *toolbox; GtArray *toollist; GtStr *myprefix; myprefix = gt_str_new_cstr(entry->prefix ? gt_str_get(entry->prefix) : ""); gt_str_append_cstr(myprefix, entry->name); toolbox = gt_tool_get_toolbox(entry->tool); toollist = gt_array_new(sizeof (ToolEntry)); tii.arr = toollist; tii.str = myprefix; gt_toolbox_iterate(toolbox, add_tool_to_stack, &tii); if (gt_array_size(toollist)) { gt_array_reverse(toollist); /* alphabetical order */ gt_array_add_array(tool_iterator->tool_stack, toollist); } gt_array_delete(toollist); gt_str_delete(myprefix); } else gt_str_delete(entry->prefix); return true; } else return false; }
int checkspecialrangesfast(const Encodedsequence *encseq) { GtArray *rangesforward, *rangesbackward; bool haserr = false; Specialrangeiterator *sri; Sequencerange range; if (!hasspecialranges(encseq)) { return 0; } rangesforward = gt_array_new(sizeof (Sequencerange)); rangesbackward = gt_array_new(sizeof (Sequencerange)); sri = newspecialrangeiterator(encseq,true); while (nextspecialrangeiterator(&range,sri)) { gt_array_add(rangesforward,range); } freespecialrangeiterator(&sri); sri = newspecialrangeiterator(encseq,false); while (nextspecialrangeiterator(&range,sri)) { gt_array_add(rangesbackward,range); } freespecialrangeiterator(&sri); gt_array_reverse(rangesbackward); if (!haserr) { if (array_compare(rangesforward,rangesbackward, compareSequencerange) != 0) { exit(GT_EXIT_PROGRAMMING_ERROR); } } gt_array_delete(rangesforward); gt_array_delete(rangesbackward); return haserr ? - 1 : 0; }
static int check_cds_phases(GtArray *cds_features, GtCDSCheckVisitor *v, bool is_multi, bool second_pass, GtError *err) { GtPhase current_phase, correct_phase = GT_PHASE_ZERO; GtFeatureNode *fn; GtStrand strand; unsigned long i, current_length; int had_err = 0; gt_error_check(err); gt_assert(cds_features); gt_assert(gt_array_size(cds_features)); fn = *(GtFeatureNode**) gt_array_get_first(cds_features); strand = gt_feature_node_get_strand(fn); if (strand == GT_STRAND_REVERSE) gt_array_reverse(cds_features); for (i = 0; !had_err && i < gt_array_size(cds_features); i++) { fn = *(GtFeatureNode**) gt_array_get(cds_features, i); /* the first phase can be anything (except being undefined), because the GFF3 spec says: NOTE 4 - CDS features MUST have have a defined phase field. Otherwise it is not possible to infer the correct polypeptides corresponding to partially annotated genes. */ if ((!i && gt_feature_node_get_phase(fn) == GT_PHASE_UNDEFINED) || (i && gt_feature_node_get_phase(fn) != correct_phase)) { if (gt_hashmap_get(v->cds_features, fn)) { if (v->tidy && !is_multi && !gt_feature_node_has_children(fn)) { /* we can split the feature */ gt_warning("%s feature on line %u in file \"%s\" has multiple " "parents which require different phases; split feature", gt_ft_CDS, gt_genome_node_get_line_number((GtGenomeNode*) fn), gt_genome_node_get_filename((GtGenomeNode*) fn)); gt_hashmap_add(v->cds_features_to_split, fn, fn); v->splitting_is_necessary = true; /* split later */ } else { gt_error_set(err, "%s feature on line %u in file \"%s\" has multiple " "parents which require different phases", gt_ft_CDS, gt_genome_node_get_line_number((GtGenomeNode*) fn), gt_genome_node_get_filename((GtGenomeNode*) fn)); had_err = -1; } } else { if (v->tidy) { if (!second_pass) { gt_warning("%s feature on line %u in file \"%s\" has the wrong " "phase %c -> correcting it to %c", gt_ft_CDS, gt_genome_node_get_line_number((GtGenomeNode*) fn), gt_genome_node_get_filename((GtGenomeNode*) fn), GT_PHASE_CHARS[gt_feature_node_get_phase(fn)], GT_PHASE_CHARS[correct_phase]); } gt_feature_node_set_phase(fn, correct_phase); } else { gt_error_set(err, "%s feature on line %u in file \"%s\" has the " "wrong phase %c (should be %c)", gt_ft_CDS, gt_genome_node_get_line_number((GtGenomeNode*) fn), gt_genome_node_get_filename((GtGenomeNode*) fn), GT_PHASE_CHARS[gt_feature_node_get_phase(fn)], GT_PHASE_CHARS[correct_phase]); had_err = -1; } } } if (!had_err) { current_phase = gt_feature_node_get_phase(fn); current_length = gt_genome_node_get_length((GtGenomeNode*) fn); correct_phase = (3 - (current_length - current_phase) % 3) % 3; gt_hashmap_add(v->cds_features, fn, fn); /* record CDS feature */ } } return had_err; }
int gt_ranked_list_unit_test(GtError *err) { int had_err = 0; GtRankedList *rl; GtRankedListIter *iter; GtArray *arr; const GtUword nof_best = 30UL, nof_tests = 100UL; GtRankedListTestStruct *mystr; int values[8] = {-3, 4, 1, 545, 24, 33, 22, 42}, i, j; gt_error_check(err); rl = gt_ranked_list_new(5UL, gt_ranked_list_cmp_numbers, NULL, NULL); gt_ensure(rl != NULL); gt_ensure(gt_ranked_list_size(rl) == 0); iter = gt_ranked_list_iter_new_from_first(rl); mystr = gt_ranked_list_iter_next(iter); gt_ensure(mystr == NULL); mystr = gt_ranked_list_iter_next(iter); gt_ensure(mystr == NULL); gt_ranked_list_iter_delete(iter); iter = gt_ranked_list_iter_new_from_last(rl); mystr = gt_ranked_list_iter_prev(iter); gt_ensure(mystr == NULL); mystr = gt_ranked_list_iter_prev(iter); gt_ensure(mystr == NULL); gt_ranked_list_iter_delete(iter); for (i = 0; i < 8; i++) { gt_ranked_list_insert(rl, values+i); if (i < 5) gt_ensure(gt_ranked_list_size(rl) == (GtUword) i + 1UL); else gt_ensure(gt_ranked_list_size(rl) == 5UL); } gt_ensure((*(int*) gt_ranked_list_first(rl)) == 545); gt_ensure((*(int*) gt_ranked_list_last(rl)) == 22); gt_ranked_list_delete(rl); for (j = 0; (GtUword) j < nof_tests; j++) { rl = gt_ranked_list_new(30UL, gt_ranked_list_cmp_teststructs, gt_free_func, NULL); arr = gt_array_new(sizeof (GtRankedListTestStruct)); for (i = 0; i < 200; i++) { GtRankedListTestStruct newstr, *ptr; newstr.id = (GtUword) i; newstr.score = (GtUword) (random() % (2*nof_best)); gt_array_add(arr, newstr); ptr = gt_malloc(sizeof (*ptr)); ptr->id = newstr.id; ptr->score = newstr.score; gt_ranked_list_insert(rl, ptr); if ((GtUword) i < nof_best) gt_ensure(gt_ranked_list_size(rl) == (GtUword) i + 1UL); else gt_ensure(gt_ranked_list_size(rl) == nof_best); } gt_array_sort_stable_with_data(arr, gt_ranked_list_cmp_teststructs, NULL); gt_array_reverse(arr); gt_ensure(gt_ranked_list_size(rl) == nof_best); iter = gt_ranked_list_iter_new_from_first(rl); i = 0; for (mystr = gt_ranked_list_iter_next(iter); mystr != NULL; mystr = gt_ranked_list_iter_next(iter)) { GtRankedListTestStruct *str = (GtRankedListTestStruct*) gt_array_get(arr, (GtUword) i++); gt_ensure(mystr != NULL); gt_ensure(mystr->id == str->id); gt_ensure(mystr->score == str->score); /* printf("id: "GT_WU"/"GT_WU", score "GT_WU"/"GT_WU"\n", mystr->id, str->id, mystr->score, str->score); */ } gt_ranked_list_iter_delete(iter); gt_array_delete(arr); gt_ranked_list_delete(rl); } return had_err; }
int gt_ltrfileout_stream_next(GtNodeStream *ns, GtGenomeNode **gn, GtError *err) { GtLTRdigestFileOutStream *ls; GtFeatureNode *fn; GtRange lltr_rng = {GT_UNDEF_UWORD, GT_UNDEF_UWORD}, rltr_rng = {GT_UNDEF_UWORD, GT_UNDEF_UWORD}, ppt_rng = {GT_UNDEF_UWORD, GT_UNDEF_UWORD}, pbs_rng = {GT_UNDEF_UWORD, GT_UNDEF_UWORD}; int had_err; GtUword i=0; gt_error_check(err); ls = gt_ltrdigest_file_out_stream_cast(ns); /* initialize this element */ memset(&ls->element, 0, sizeof (GtLTRElement)); /* get annotations from parser */ had_err = gt_node_stream_next(ls->in_stream, gn, err); if (!had_err && *gn) { GtFeatureNodeIterator* gni; GtFeatureNode *mygn; /* only process feature nodes */ if (!(fn = gt_feature_node_try_cast(*gn))) return 0; ls->element.pdomorder = gt_array_new(sizeof (const char*)); /* fill LTRElement structure from GFF3 subgraph */ gni = gt_feature_node_iterator_new(fn); for (mygn = fn; mygn != NULL; mygn = gt_feature_node_iterator_next(gni)) (void) gt_genome_node_accept((GtGenomeNode*) mygn, (GtNodeVisitor*) ls->lv, err); gt_feature_node_iterator_delete(gni); } if (!had_err && ls->element.mainnode != NULL) { char desc[GT_MAXFASTAHEADER]; GtFeatureNode *ltr3, *ltr5; GtStr *sdesc, *sreg, *seq; /* find sequence in GtEncseq */ sreg = gt_genome_node_get_seqid((GtGenomeNode*) ls->element.mainnode); sdesc = gt_str_new(); had_err = gt_region_mapping_get_description(ls->rmap, sdesc, sreg, err); if (!had_err) { GtRange rng; ls->element.seqid = gt_calloc((size_t) ls->seqnamelen+1, sizeof (char)); (void) snprintf(ls->element.seqid, MIN((size_t) gt_str_length(sdesc), (size_t) ls->seqnamelen)+1, "%s", gt_str_get(sdesc)); gt_cstr_rep(ls->element.seqid, ' ', '_'); if (gt_str_length(sdesc) > (GtUword) ls->seqnamelen) ls->element.seqid[ls->seqnamelen] = '\0'; (void) gt_ltrelement_format_description(&ls->element, ls->seqnamelen, desc, (size_t) (GT_MAXFASTAHEADER-1)); gt_str_delete(sdesc); /* output basic retrotransposon data */ lltr_rng = gt_genome_node_get_range((GtGenomeNode*) ls->element.leftLTR); rltr_rng = gt_genome_node_get_range((GtGenomeNode*) ls->element.rightLTR); rng = gt_genome_node_get_range((GtGenomeNode*) ls->element.mainnode); gt_file_xprintf(ls->tabout_file, GT_WU"\t"GT_WU"\t"GT_WU"\t%s\t"GT_WU"\t"GT_WU"\t"GT_WU"\t" GT_WU"\t"GT_WU"\t"GT_WU"\t", rng.start, rng.end, gt_ltrelement_length(&ls->element), ls->element.seqid, lltr_rng.start, lltr_rng.end, gt_ltrelement_leftltrlen(&ls->element), rltr_rng.start, rltr_rng.end, gt_ltrelement_rightltrlen(&ls->element)); } seq = gt_str_new(); /* output TSDs */ if (!had_err && ls->element.leftTSD != NULL) { GtRange tsd_rng; tsd_rng = gt_genome_node_get_range((GtGenomeNode*) ls->element.leftTSD); had_err = gt_extract_feature_sequence(seq, (GtGenomeNode*) ls->element.leftTSD, gt_symbol(gt_ft_target_site_duplication), false, NULL, NULL, ls->rmap, err); if (!had_err) { gt_file_xprintf(ls->tabout_file, ""GT_WU"\t"GT_WU"\t%s\t", tsd_rng.start, tsd_rng.end, gt_str_get(seq)); } gt_str_reset(seq); } else gt_file_xprintf(ls->tabout_file, "\t\t\t"); if (!had_err && ls->element.rightTSD != NULL) { GtRange tsd_rng; tsd_rng = gt_genome_node_get_range((GtGenomeNode*) ls->element.rightTSD); had_err = gt_extract_feature_sequence(seq, (GtGenomeNode*) ls->element.rightTSD, gt_symbol(gt_ft_target_site_duplication), false, NULL, NULL, ls->rmap, err); if (!had_err) { gt_file_xprintf(ls->tabout_file, ""GT_WU"\t"GT_WU"\t%s\t", tsd_rng.start, tsd_rng.end, gt_str_get(seq)); } gt_str_reset(seq); } else gt_file_xprintf(ls->tabout_file, "\t\t\t"); /* output PPT */ if (!had_err && ls->element.ppt != NULL) { GtStrand ppt_strand = gt_feature_node_get_strand(ls->element.ppt); ppt_rng = gt_genome_node_get_range((GtGenomeNode*) ls->element.ppt); had_err = gt_extract_feature_sequence(seq, (GtGenomeNode*) ls->element.ppt, gt_symbol(gt_ft_RR_tract), false, NULL, NULL, ls->rmap, err); if (!had_err) { gt_fasta_show_entry(desc, gt_str_get(seq), gt_range_length(&ppt_rng), GT_FSWIDTH, ls->pptout_file); gt_file_xprintf(ls->tabout_file, ""GT_WU"\t"GT_WU"\t%s\t%c\t%d\t", ppt_rng.start, ppt_rng.end, gt_str_get(seq), GT_STRAND_CHARS[ppt_strand], (ppt_strand == GT_STRAND_FORWARD ? abs((int) (rltr_rng.start - ppt_rng.end)) : abs((int) (lltr_rng.end - ppt_rng.start)))); } gt_str_reset(seq); } else gt_file_xprintf(ls->tabout_file, "\t\t\t\t\t"); /* output PBS */ if (!had_err && ls->element.pbs != NULL) { GtStrand pbs_strand; pbs_strand = gt_feature_node_get_strand(ls->element.pbs); pbs_rng = gt_genome_node_get_range((GtGenomeNode*) ls->element.pbs); had_err = gt_extract_feature_sequence(seq, (GtGenomeNode*) ls->element.pbs, gt_symbol(gt_ft_primer_binding_site), false, NULL, NULL, ls->rmap, err); if (!had_err) { gt_fasta_show_entry(desc, gt_str_get(seq), gt_range_length(&pbs_rng), GT_FSWIDTH, ls->pbsout_file); gt_file_xprintf(ls->tabout_file, ""GT_WU"\t"GT_WU"\t%c\t%s\t%s\t%s\t%s\t%s\t", pbs_rng.start, pbs_rng.end, GT_STRAND_CHARS[pbs_strand], gt_feature_node_get_attribute(ls->element.pbs, "trna"), gt_str_get(seq), gt_feature_node_get_attribute(ls->element.pbs, "pbsoffset"), gt_feature_node_get_attribute(ls->element.pbs, "trnaoffset"), gt_feature_node_get_attribute(ls->element.pbs, "edist")); } gt_str_reset(seq); } else gt_file_xprintf(ls->tabout_file, "\t\t\t\t\t\t\t\t"); /* output protein domains */ if (!had_err && ls->element.pdoms != NULL) { GtStr *pdomorderstr = gt_str_new(); for (i=0; !had_err && i<gt_array_size(ls->element.pdomorder); i++) { const char* key = *(const char**) gt_array_get(ls->element.pdomorder, i); GtArray *entry = (GtArray*) gt_hashmap_get(ls->element.pdoms, key); had_err = write_pdom(ls, entry, key, ls->rmap, desc, err); } if (GT_STRAND_REVERSE == gt_feature_node_get_strand(ls->element.mainnode)) gt_array_reverse(ls->element.pdomorder); for (i=0 ;!had_err && i<gt_array_size(ls->element.pdomorder); i++) { const char* name = *(const char**) gt_array_get(ls->element.pdomorder, i); gt_str_append_cstr(pdomorderstr, name); if (i != gt_array_size(ls->element.pdomorder)-1) gt_str_append_cstr(pdomorderstr, "/"); } gt_file_xprintf(ls->tabout_file, "%s", gt_str_get(pdomorderstr)); gt_str_delete(pdomorderstr); } /* output LTRs (we just expect them to exist) */ switch (gt_feature_node_get_strand(ls->element.mainnode)) { case GT_STRAND_REVERSE: ltr5 = ls->element.rightLTR; ltr3 = ls->element.leftLTR; break; case GT_STRAND_FORWARD: default: ltr5 = ls->element.leftLTR; ltr3 = ls->element.rightLTR; break; } if (!had_err) { had_err = gt_extract_feature_sequence(seq, (GtGenomeNode*) ltr5, gt_symbol(gt_ft_long_terminal_repeat), false, NULL, NULL, ls->rmap, err); } if (!had_err) { gt_fasta_show_entry(desc, gt_str_get(seq), gt_str_length(seq), GT_FSWIDTH, ls->ltr5out_file); gt_str_reset(seq); } if (!had_err) { had_err = gt_extract_feature_sequence(seq, (GtGenomeNode*) ltr3, gt_symbol(gt_ft_long_terminal_repeat), false, NULL, NULL, ls->rmap, err); } if (!had_err) { gt_fasta_show_entry(desc, gt_str_get(seq), gt_str_length(seq), GT_FSWIDTH, ls->ltr3out_file); gt_str_reset(seq); } /* output complete oriented element */ if (!had_err) { had_err = gt_extract_feature_sequence(seq, (GtGenomeNode*) ls->element.mainnode, gt_symbol(gt_ft_LTR_retrotransposon), false, NULL, NULL, ls->rmap, err); } if (!had_err) { gt_fasta_show_entry(desc,gt_str_get(seq), gt_str_length(seq), GT_FSWIDTH, ls->elemout_file); gt_str_reset(seq); } gt_file_xprintf(ls->tabout_file, "\n"); gt_str_delete(seq); } gt_hashmap_delete(ls->element.pdoms); gt_array_delete(ls->element.pdomorder); gt_free(ls->element.seqid); return had_err; }
static int write_pdom(GtLTRdigestFileOutStream *ls, GtArray *pdoms, const char *pdomname, GT_UNUSED GtRegionMapping *rmap, char *desc, GtError *err) { int had_err = 0; GtFile *seqfile = NULL, *alifile = NULL, *aafile = NULL; GtUword i = 0, seq_length = 0; GtStr *pdom_seq, *pdom_aaseq; gt_error_check(err); pdom_seq = gt_str_new(); pdom_aaseq = gt_str_new(); /* get protein domain output file */ seqfile = (GtFile*) gt_hashmap_get(ls->pdomout_files, pdomname); if (seqfile == NULL) { /* no file opened for this domain yet, do it */ char buffer[GT_MAXFILENAMELEN]; (void) snprintf(buffer, (size_t) (GT_MAXFILENAMELEN-1), "%s_pdom_%s.fas", ls->fileprefix, pdomname); seqfile = gt_file_xopen(buffer, "w+"); gt_hashmap_add(ls->pdomout_files, gt_cstr_dup(pdomname), seqfile); } /* get protein alignment output file */ if (ls->write_pdom_alignments) { alifile = (GtFile*) gt_hashmap_get(ls->pdomali_files, pdomname); if (alifile == NULL) { /* no file opened for this domain yet, do it */ char buffer[GT_MAXFILENAMELEN]; (void) snprintf(buffer, (size_t) (GT_MAXFILENAMELEN-1), "%s_pdom_%s.ali", ls->fileprefix, pdomname); alifile = gt_file_xopen(buffer, "w+"); gt_hashmap_add(ls->pdomali_files, gt_cstr_dup(pdomname), alifile); } } /* get amino acid sequence output file */ if (ls->write_pdom_aaseqs) { aafile = (GtFile*) gt_hashmap_get(ls->pdomaa_files, pdomname); if (aafile == NULL) { /* no file opened for this domain yet, do it */ char buffer[GT_MAXFILENAMELEN]; (void) snprintf(buffer, (size_t) (GT_MAXFILENAMELEN-1), "%s_pdom_%s_aa.fas", ls->fileprefix, pdomname); aafile = gt_file_xopen(buffer, "w+"); gt_hashmap_add(ls->pdomaa_files, gt_cstr_dup(pdomname), aafile); } } if (gt_array_size(pdoms) > 1UL) { for (i=1UL; i<gt_array_size(pdoms); i++) { gt_assert(gt_genome_node_cmp(*(GtGenomeNode**)gt_array_get(pdoms, i), *(GtGenomeNode**)gt_array_get(pdoms, i-1)) >= 0); } if (gt_feature_node_get_strand(*(GtFeatureNode**) gt_array_get(pdoms, 0UL)) == GT_STRAND_REVERSE) { gt_array_reverse(pdoms); } } /* output protein domain data */ for (i=0;i<gt_array_size(pdoms);i++) { GtRange pdom_rng; GtStr *ali, *aaseq; GtFeatureNode *fn; int rval; fn = *(GtFeatureNode**) gt_array_get(pdoms, i); ali = gt_genome_node_get_user_data((GtGenomeNode*) fn, "pdom_alignment"); aaseq = gt_genome_node_get_user_data((GtGenomeNode*) fn, "pdom_aaseq"); pdom_rng = gt_genome_node_get_range((GtGenomeNode*) fn); rval = gt_extract_feature_sequence(pdom_seq, (GtGenomeNode*) fn, gt_symbol(gt_ft_protein_match), false, NULL, NULL, rmap, err); if (rval) { had_err = -1; break; } if (ls->write_pdom_alignments && ali) { char buf[BUFSIZ]; /* write away alignment */ (void) snprintf(buf, BUFSIZ-1, "Protein domain alignment in translated " "sequence for candidate\n'%s':\n\n", desc); gt_file_xwrite(alifile, buf, (size_t) strlen(buf) * sizeof (char)); gt_file_xwrite(alifile, gt_str_get(ali), (size_t) gt_str_length(ali) * sizeof (char)); gt_file_xwrite(alifile, "---\n\n", 5 * sizeof (char)); } if (ls->write_pdom_aaseqs && aaseq) { /* append amino acid sequence */ gt_str_append_str(pdom_aaseq, aaseq); } gt_genome_node_release_user_data((GtGenomeNode*) fn, "pdom_alignment"); gt_genome_node_release_user_data((GtGenomeNode*) fn, "pdom_aaseq"); seq_length += gt_range_length(&pdom_rng); } if (!had_err) { gt_fasta_show_entry(desc, gt_str_get(pdom_seq), seq_length, GT_FSWIDTH, seqfile); if (ls->write_pdom_aaseqs) { gt_fasta_show_entry(desc, gt_str_get(pdom_aaseq), gt_str_length(pdom_aaseq), GT_FSWIDTH, aafile); } } gt_str_delete(pdom_seq); gt_str_delete(pdom_aaseq); return had_err; }