static int feature_node_lua_remove_leaf(lua_State *L) { GtGenomeNode **parent, **leaf; GtFeatureNode *pf, *lf; parent = check_genome_node(L, 1); leaf = check_genome_node(L, 2); pf = gt_feature_node_try_cast(*parent); luaL_argcheck(L, pf, 1, "not a feature node"); lf = gt_feature_node_try_cast(*leaf); luaL_argcheck(L, lf, 2, "not a feature node"); gt_feature_node_remove_leaf(pf, lf); return 0; }
static int genome_node_lua_add_child(lua_State *L) { GtGenomeNode **parent, **child; GtFeatureNode *pf, *cf; parent = check_genome_node(L, 1); child = check_genome_node(L, 2); pf = gt_feature_node_try_cast(*parent); luaL_argcheck(L, pf, 1, "not a feature node"); cf = gt_feature_node_try_cast(*child); luaL_argcheck(L, cf, 2, "not a feature node"); gt_feature_node_add_child(pf, (GtFeatureNode*) gt_genome_node_ref((GtGenomeNode*) cf)); return 0; }
static int md5_to_seqid(GtGenomeNode *gn, GtRegionMapping *region_mapping, GtError *err) { GtStr *seqid; int had_err = 0; gt_error_check(err); gt_assert(gn && region_mapping); seqid = gt_genome_node_get_seqid(gn); if (gt_md5_seqid_has_prefix(gt_str_get(seqid))) { /* seqid is a MD5 seqid -> change id */ GtStr *desc = gt_str_new(); had_err = gt_region_mapping_get_description(region_mapping, desc, seqid, err); if (!had_err) { GtStr *new_seqid = gt_str_new(); gt_regular_seqid_save(new_seqid, desc); if (gt_feature_node_try_cast(gn)) { M2IChangeSeqidInfo info; info.new_seqid = new_seqid; info.region_mapping = region_mapping; had_err = gt_feature_node_traverse_children((GtFeatureNode*) gn, &info, m2i_change_seqid, true, err); } else gt_genome_node_change_seqid(gn, new_seqid); gt_str_delete(new_seqid); } gt_str_delete(desc); } return had_err; }
static int feature_node_lua_extract_sequence(lua_State *L) { GtGenomeNode **gn; GtFeatureNode *fn; const char *type; bool join; GtRegionMapping **region_mapping; GtStr *sequence; GtError *err; gn = check_genome_node(L, 1); /* make sure we get a feature node */ fn = gt_feature_node_try_cast(*gn); luaL_argcheck(L, fn, 1, "not a feature node"); type = luaL_checkstring(L, 2); join = lua_toboolean(L, 3); region_mapping = check_region_mapping(L, 4); err = gt_error_new(); sequence = gt_str_new(); if (gt_extract_feature_sequence(sequence, *gn, type, join, NULL, NULL, *region_mapping, err)) { gt_str_delete(sequence); return gt_lua_error(L, err); } if (gt_str_length(sequence)) lua_pushstring(L, gt_str_get(sequence)); else lua_pushnil(L); gt_str_delete(sequence); gt_error_delete(err); return 1; }
static int feature_node_lua_get_source(lua_State *L) { GtGenomeNode **gn = check_genome_node(L, 1); GtFeatureNode *fn; /* make sure we get a feature node */ fn = gt_feature_node_try_cast(*gn); luaL_argcheck(L, fn, 1, "not a feature node"); lua_pushstring(L, gt_feature_node_get_source(fn)); return 1; }
static int filter_stream_next(GtNodeStream *ns, GtGenomeNode **gn, GtError *error) { AgnFilterStream *stream; GtFeatureNode *fn; int had_err; gt_error_check(error); stream = filter_stream_cast(ns); if(gt_queue_size(stream->cache) > 0) { *gn = gt_queue_get(stream->cache); return 0; } while(1) { had_err = gt_node_stream_next(stream->in_stream, gn, error); if(had_err) return had_err; if(!*gn) return 0; fn = gt_feature_node_try_cast(*gn); if(!fn) return 0; GtFeatureNode *current; GtFeatureNodeIterator *iter = gt_feature_node_iterator_new(fn); for(current = gt_feature_node_iterator_next(iter); current != NULL; current = gt_feature_node_iterator_next(iter)) { const char *type = gt_feature_node_get_type(current); bool keepfeature = false; if(gt_hashmap_get(stream->typestokeep, type) != NULL) keepfeature = true; if(keepfeature) { gt_genome_node_ref((GtGenomeNode *)current); gt_queue_add(stream->cache, current); } } gt_feature_node_iterator_delete(iter); gt_genome_node_delete((GtGenomeNode *)fn); if(gt_queue_size(stream->cache) > 0) { *gn = gt_queue_get(stream->cache); return 0; } } return 0; }
static int feature_node_lua_output_leading(lua_State *L) { GtGenomeNode **gn; GtFeatureNode *fn; gn = check_genome_node(L, 1); /* make sure we get a feature node */ fn = gt_feature_node_try_cast(*gn); luaL_argcheck(L, fn, 1, "not a feature node"); gt_gff3_output_leading(fn, NULL); return 0; }
static int feature_node_lua_get_strand(lua_State *L) { GtGenomeNode **gn = check_genome_node(L, 1); GtFeatureNode *fn; char strand_char[2]; /* make sure we get a feature node */ fn = gt_feature_node_try_cast(*gn); luaL_argcheck(L, fn, 1, "not a feature node"); strand_char[0] = GT_STRAND_CHARS[gt_feature_node_get_strand(fn)]; strand_char[1] = '\0'; lua_pushstring(L, strand_char); return 1; }
static int feature_node_lua_get_score(lua_State *L) { GtGenomeNode **gn = check_genome_node(L, 1); GtFeatureNode *fn; /* make sure we get a feature node */ fn = gt_feature_node_try_cast(*gn); luaL_argcheck(L, fn, 1, "not a feature node"); if (gt_feature_node_score_is_defined(fn)) lua_pushnumber(L, gt_feature_node_get_score(fn)); else lua_pushnil(L); return 1; }
static int feature_node_lua_set_source(lua_State *L) { const char *source; GtStr *source_str; GtGenomeNode **gn = check_genome_node(L, 1); GtFeatureNode *fn; /* make sure we get a feature node */ fn = gt_feature_node_try_cast(*gn); luaL_argcheck(L, fn, 1, "not a feature node"); source = luaL_checkstring(L, 2); source_str = gt_str_new_cstr(source); gt_feature_node_set_source(fn, source_str); gt_str_delete(source_str); return 0; }
static int feature_node_lua_get_attribute(lua_State *L) { GtGenomeNode **gn = check_genome_node(L, 1); const char *attr = NULL, *attrval = NULL; attr = luaL_checkstring(L, 2); GtFeatureNode *fn; /* make sure we get a feature node */ fn = gt_feature_node_try_cast(*gn); luaL_argcheck(L, fn, 1, "not a feature node"); attrval = gt_feature_node_get_attribute(fn, attr); if (attrval) lua_pushstring(L, attrval); else lua_pushnil(L); return 1; }
static int gt_array_out_stream_next(GtNodeStream *gs, GtGenomeNode **gn, GtError *err) { GtArrayOutStream *aos; GtGenomeNode *node, *gn_ref; int had_err = 0; gt_error_check(err); aos = gt_array_out_stream_cast(gs); had_err = gt_node_stream_next(aos->in_stream, gn, err); if (!had_err && *gn) { if ((node = gt_feature_node_try_cast(*gn))) { gn_ref = gt_genome_node_ref(*gn); gt_array_add(aos->nodes, gn_ref); } } return had_err; }
static int feature_node_lua_get_exons(lua_State *L) { GtGenomeNode **gn = check_genome_node(L, 1); GtArray *exons = gt_array_new(sizeof (GtGenomeNode*)); GtUword i = 0; GtFeatureNode *fn; /* make sure we get a feature node */ fn = gt_feature_node_try_cast(*gn); luaL_argcheck(L, fn, 1, "not a feature node"); gt_feature_node_get_exons(fn, exons); lua_newtable(L); for (i = 0; i < gt_array_size(exons); i++) { lua_pushnumber(L, i+1); gt_lua_genome_node_push(L, (GtGenomeNode*) gt_genome_node_ref(*(GtGenomeNode**) gt_array_get(exons, i))); lua_rawset(L, -3); } gt_array_delete(exons); return 1; }
static int targetbest_filter_stream_next(GtNodeStream *gs, GtGenomeNode **gn, GtError *err) { GtTargetbestFilterStream *tfs; GtGenomeNode *node; int had_err = 0; gt_error_check(err); tfs = targetbest_filter_stream_cast(gs); if (!tfs->in_stream_processed) { while (!(had_err = gt_node_stream_next(tfs->in_stream, &node, err)) && node) { if (gt_feature_node_try_cast(node) && gt_feature_node_get_attribute((GtFeatureNode*) node, "Target")) { filter_targetbest((GtFeatureNode*) node, tfs->trees, tfs->target_to_elem); } else gt_dlist_add(tfs->trees, node); } tfs->next = gt_dlist_first(tfs->trees); tfs->in_stream_processed = true; } if (!had_err) { gt_assert(tfs->in_stream_processed); if (tfs->next) { *gn = gt_dlistelem_get_data(tfs->next); tfs->next = gt_dlistelem_next(tfs->next); } else *gn = NULL; return 0; } return had_err; }
static int gt_seqpos_classifier_next_fn(GtSeqposClassifier *seqpos_classifier, GtError *err) { int had_err = 0; gt_assert(seqpos_classifier != NULL); if (seqpos_classifier->fni != NULL) { gt_feature_node_iterator_delete(seqpos_classifier->fni); seqpos_classifier->fni = NULL; } while (true) { if (seqpos_classifier->gn != NULL) { gt_genome_node_delete(seqpos_classifier->gn); } had_err = gt_node_stream_next(seqpos_classifier->annotation_stream, &seqpos_classifier->gn, err); if (had_err != 0 || seqpos_classifier->gn == NULL) { seqpos_classifier->fn = NULL; seqpos_classifier->gn = NULL; return had_err; } else { if ((seqpos_classifier->fn = gt_feature_node_try_cast(seqpos_classifier->gn)) != NULL) { seqpos_classifier->fni = gt_feature_node_iterator_new(seqpos_classifier->fn); return had_err; } } } }
static int snp_annotator_stream_next(GtNodeStream *ns, GtGenomeNode **gn, GtError *err) { GtSNPAnnotatorStream *sas; int had_err = 0; bool complete_cluster = false; GtGenomeNode *mygn = NULL; GtFeatureNode *fn = NULL; const char *snv_type = gt_symbol(gt_ft_SNV), *snp_type = gt_symbol(gt_ft_SNP), *gene_type = gt_symbol(gt_ft_gene); gt_error_check(err); sas = gt_snp_annotator_stream_cast(ns); /* if there are still SNPs left in the buffer, output them */ if (gt_queue_size(sas->outqueue) > 0) { *gn = (GtGenomeNode*) gt_queue_get(sas->outqueue); return had_err; } else complete_cluster = false; while (!had_err && !complete_cluster) { had_err = gt_node_stream_next(sas->merge_stream, &mygn, err); /* stop if stream is at the end */ if (had_err || !mygn) break; /* process all feature nodes */ if ((fn = gt_feature_node_try_cast(mygn))) { GtGenomeNode *addgn; const char *type = gt_feature_node_get_type(fn); GtRange new_rng = gt_genome_node_get_range(mygn); if (type == snv_type || type == snp_type) { /* -----> this is a SNP <----- */ if (gt_range_overlap(&new_rng, &sas->cur_gene_range)) { /* it falls into the currently observed range */ gt_queue_add(sas->snps, gt_genome_node_ref((GtGenomeNode*) fn)); } else { /* SNP outside a gene, this cluster is done add to out queue and start serving */ gt_assert(gt_queue_size(sas->outqueue) == 0); had_err = snp_annotator_stream_process_current_gene(sas, err); gt_queue_add(sas->outqueue, mygn); if (gt_queue_size(sas->outqueue) > 0) { *gn = (GtGenomeNode*) gt_queue_get(sas->outqueue); complete_cluster = true; } } } else if (type == gene_type) { /* -----> this is a gene <----- */ if (gt_array_size(sas->cur_gene_set) == 0UL) { /* new overlapping gene cluster */ addgn = gt_genome_node_ref(mygn); gt_array_add(sas->cur_gene_set, addgn); sas->cur_gene_range = gt_genome_node_get_range(mygn); } else { if (gt_range_overlap(&new_rng, &sas->cur_gene_range)) { /* gene overlaps with current one, add to cluster */ addgn = gt_genome_node_ref(mygn); gt_array_add(sas->cur_gene_set, addgn); sas->cur_gene_range = gt_range_join(&sas->cur_gene_range, &new_rng); } else { /* finish current cluster and start a new one */ had_err = snp_annotator_stream_process_current_gene(sas, err); if (!had_err) { addgn = gt_genome_node_ref(mygn); gt_array_add(sas->cur_gene_set, addgn); sas->cur_gene_range = gt_genome_node_get_range(mygn); } if (gt_queue_size(sas->outqueue) > 0) { *gn = (GtGenomeNode*) gt_queue_get(sas->outqueue); complete_cluster = true; } } } /* from now on, genes are kept in gene cluster arrays only */ gt_genome_node_delete(mygn); } } else { /* meta node */ had_err = snp_annotator_stream_process_current_gene(sas, err); if (!had_err) { gt_queue_add(sas->outqueue, mygn); } if (gt_queue_size(sas->outqueue) > 0) { *gn = (GtGenomeNode*) gt_queue_get(sas->outqueue); complete_cluster = true; } } } return had_err; }
int gt_ltrfileout_stream_next(GtNodeStream *ns, GtGenomeNode **gn, GtError *err) { GtLTRdigestFileOutStream *ls; GtFeatureNode *fn; GtRange lltr_rng = {GT_UNDEF_UWORD, GT_UNDEF_UWORD}, rltr_rng = {GT_UNDEF_UWORD, GT_UNDEF_UWORD}, ppt_rng = {GT_UNDEF_UWORD, GT_UNDEF_UWORD}, pbs_rng = {GT_UNDEF_UWORD, GT_UNDEF_UWORD}; int had_err; GtUword i=0; gt_error_check(err); ls = gt_ltrdigest_file_out_stream_cast(ns); /* initialize this element */ memset(&ls->element, 0, sizeof (GtLTRElement)); /* get annotations from parser */ had_err = gt_node_stream_next(ls->in_stream, gn, err); if (!had_err && *gn) { GtFeatureNodeIterator* gni; GtFeatureNode *mygn; /* only process feature nodes */ if (!(fn = gt_feature_node_try_cast(*gn))) return 0; ls->element.pdomorder = gt_array_new(sizeof (const char*)); /* fill LTRElement structure from GFF3 subgraph */ gni = gt_feature_node_iterator_new(fn); for (mygn = fn; mygn != NULL; mygn = gt_feature_node_iterator_next(gni)) (void) gt_genome_node_accept((GtGenomeNode*) mygn, (GtNodeVisitor*) ls->lv, err); gt_feature_node_iterator_delete(gni); } if (!had_err && ls->element.mainnode != NULL) { char desc[GT_MAXFASTAHEADER]; GtFeatureNode *ltr3, *ltr5; GtStr *sdesc, *sreg, *seq; /* find sequence in GtEncseq */ sreg = gt_genome_node_get_seqid((GtGenomeNode*) ls->element.mainnode); sdesc = gt_str_new(); had_err = gt_region_mapping_get_description(ls->rmap, sdesc, sreg, err); if (!had_err) { GtRange rng; ls->element.seqid = gt_calloc((size_t) ls->seqnamelen+1, sizeof (char)); (void) snprintf(ls->element.seqid, MIN((size_t) gt_str_length(sdesc), (size_t) ls->seqnamelen)+1, "%s", gt_str_get(sdesc)); gt_cstr_rep(ls->element.seqid, ' ', '_'); if (gt_str_length(sdesc) > (GtUword) ls->seqnamelen) ls->element.seqid[ls->seqnamelen] = '\0'; (void) gt_ltrelement_format_description(&ls->element, ls->seqnamelen, desc, (size_t) (GT_MAXFASTAHEADER-1)); gt_str_delete(sdesc); /* output basic retrotransposon data */ lltr_rng = gt_genome_node_get_range((GtGenomeNode*) ls->element.leftLTR); rltr_rng = gt_genome_node_get_range((GtGenomeNode*) ls->element.rightLTR); rng = gt_genome_node_get_range((GtGenomeNode*) ls->element.mainnode); gt_file_xprintf(ls->tabout_file, GT_WU"\t"GT_WU"\t"GT_WU"\t%s\t"GT_WU"\t"GT_WU"\t"GT_WU"\t" GT_WU"\t"GT_WU"\t"GT_WU"\t", rng.start, rng.end, gt_ltrelement_length(&ls->element), ls->element.seqid, lltr_rng.start, lltr_rng.end, gt_ltrelement_leftltrlen(&ls->element), rltr_rng.start, rltr_rng.end, gt_ltrelement_rightltrlen(&ls->element)); } seq = gt_str_new(); /* output TSDs */ if (!had_err && ls->element.leftTSD != NULL) { GtRange tsd_rng; tsd_rng = gt_genome_node_get_range((GtGenomeNode*) ls->element.leftTSD); had_err = gt_extract_feature_sequence(seq, (GtGenomeNode*) ls->element.leftTSD, gt_symbol(gt_ft_target_site_duplication), false, NULL, NULL, ls->rmap, err); if (!had_err) { gt_file_xprintf(ls->tabout_file, ""GT_WU"\t"GT_WU"\t%s\t", tsd_rng.start, tsd_rng.end, gt_str_get(seq)); } gt_str_reset(seq); } else gt_file_xprintf(ls->tabout_file, "\t\t\t"); if (!had_err && ls->element.rightTSD != NULL) { GtRange tsd_rng; tsd_rng = gt_genome_node_get_range((GtGenomeNode*) ls->element.rightTSD); had_err = gt_extract_feature_sequence(seq, (GtGenomeNode*) ls->element.rightTSD, gt_symbol(gt_ft_target_site_duplication), false, NULL, NULL, ls->rmap, err); if (!had_err) { gt_file_xprintf(ls->tabout_file, ""GT_WU"\t"GT_WU"\t%s\t", tsd_rng.start, tsd_rng.end, gt_str_get(seq)); } gt_str_reset(seq); } else gt_file_xprintf(ls->tabout_file, "\t\t\t"); /* output PPT */ if (!had_err && ls->element.ppt != NULL) { GtStrand ppt_strand = gt_feature_node_get_strand(ls->element.ppt); ppt_rng = gt_genome_node_get_range((GtGenomeNode*) ls->element.ppt); had_err = gt_extract_feature_sequence(seq, (GtGenomeNode*) ls->element.ppt, gt_symbol(gt_ft_RR_tract), false, NULL, NULL, ls->rmap, err); if (!had_err) { gt_fasta_show_entry(desc, gt_str_get(seq), gt_range_length(&ppt_rng), GT_FSWIDTH, ls->pptout_file); gt_file_xprintf(ls->tabout_file, ""GT_WU"\t"GT_WU"\t%s\t%c\t%d\t", ppt_rng.start, ppt_rng.end, gt_str_get(seq), GT_STRAND_CHARS[ppt_strand], (ppt_strand == GT_STRAND_FORWARD ? abs((int) (rltr_rng.start - ppt_rng.end)) : abs((int) (lltr_rng.end - ppt_rng.start)))); } gt_str_reset(seq); } else gt_file_xprintf(ls->tabout_file, "\t\t\t\t\t"); /* output PBS */ if (!had_err && ls->element.pbs != NULL) { GtStrand pbs_strand; pbs_strand = gt_feature_node_get_strand(ls->element.pbs); pbs_rng = gt_genome_node_get_range((GtGenomeNode*) ls->element.pbs); had_err = gt_extract_feature_sequence(seq, (GtGenomeNode*) ls->element.pbs, gt_symbol(gt_ft_primer_binding_site), false, NULL, NULL, ls->rmap, err); if (!had_err) { gt_fasta_show_entry(desc, gt_str_get(seq), gt_range_length(&pbs_rng), GT_FSWIDTH, ls->pbsout_file); gt_file_xprintf(ls->tabout_file, ""GT_WU"\t"GT_WU"\t%c\t%s\t%s\t%s\t%s\t%s\t", pbs_rng.start, pbs_rng.end, GT_STRAND_CHARS[pbs_strand], gt_feature_node_get_attribute(ls->element.pbs, "trna"), gt_str_get(seq), gt_feature_node_get_attribute(ls->element.pbs, "pbsoffset"), gt_feature_node_get_attribute(ls->element.pbs, "trnaoffset"), gt_feature_node_get_attribute(ls->element.pbs, "edist")); } gt_str_reset(seq); } else gt_file_xprintf(ls->tabout_file, "\t\t\t\t\t\t\t\t"); /* output protein domains */ if (!had_err && ls->element.pdoms != NULL) { GtStr *pdomorderstr = gt_str_new(); for (i=0; !had_err && i<gt_array_size(ls->element.pdomorder); i++) { const char* key = *(const char**) gt_array_get(ls->element.pdomorder, i); GtArray *entry = (GtArray*) gt_hashmap_get(ls->element.pdoms, key); had_err = write_pdom(ls, entry, key, ls->rmap, desc, err); } if (GT_STRAND_REVERSE == gt_feature_node_get_strand(ls->element.mainnode)) gt_array_reverse(ls->element.pdomorder); for (i=0 ;!had_err && i<gt_array_size(ls->element.pdomorder); i++) { const char* name = *(const char**) gt_array_get(ls->element.pdomorder, i); gt_str_append_cstr(pdomorderstr, name); if (i != gt_array_size(ls->element.pdomorder)-1) gt_str_append_cstr(pdomorderstr, "/"); } gt_file_xprintf(ls->tabout_file, "%s", gt_str_get(pdomorderstr)); gt_str_delete(pdomorderstr); } /* output LTRs (we just expect them to exist) */ switch (gt_feature_node_get_strand(ls->element.mainnode)) { case GT_STRAND_REVERSE: ltr5 = ls->element.rightLTR; ltr3 = ls->element.leftLTR; break; case GT_STRAND_FORWARD: default: ltr5 = ls->element.leftLTR; ltr3 = ls->element.rightLTR; break; } if (!had_err) { had_err = gt_extract_feature_sequence(seq, (GtGenomeNode*) ltr5, gt_symbol(gt_ft_long_terminal_repeat), false, NULL, NULL, ls->rmap, err); } if (!had_err) { gt_fasta_show_entry(desc, gt_str_get(seq), gt_str_length(seq), GT_FSWIDTH, ls->ltr5out_file); gt_str_reset(seq); } if (!had_err) { had_err = gt_extract_feature_sequence(seq, (GtGenomeNode*) ltr3, gt_symbol(gt_ft_long_terminal_repeat), false, NULL, NULL, ls->rmap, err); } if (!had_err) { gt_fasta_show_entry(desc, gt_str_get(seq), gt_str_length(seq), GT_FSWIDTH, ls->ltr3out_file); gt_str_reset(seq); } /* output complete oriented element */ if (!had_err) { had_err = gt_extract_feature_sequence(seq, (GtGenomeNode*) ls->element.mainnode, gt_symbol(gt_ft_LTR_retrotransposon), false, NULL, NULL, ls->rmap, err); } if (!had_err) { gt_fasta_show_entry(desc,gt_str_get(seq), gt_str_length(seq), GT_FSWIDTH, ls->elemout_file); gt_str_reset(seq); } gt_file_xprintf(ls->tabout_file, "\n"); gt_str_delete(seq); } gt_hashmap_delete(ls->element.pdoms); gt_array_delete(ls->element.pdomorder); gt_free(ls->element.seqid); return had_err; }
static int chseqids_stream_next(GtNodeStream *gs, GtGenomeNode **gn, GtError *err) { GtChseqidsStream *cs; GtGenomeNode *node, **gn_a, **gn_b; GtFeatureNode *feature_node; GtStr *changed_seqid; unsigned long i; int rval, had_err = 0; gt_error_check(err); cs = chseqids_stream_cast(gs); if (!cs->sequence_regions_processed) { while (!had_err) { if (!(had_err = gt_node_stream_next(cs->in_stream, &node, err))) { if (node) gt_array_add(cs->gt_genome_node_buffer, node); else break; if (!gt_region_node_try_cast(node)) break; /* no more sequence regions */ } } /* now the buffer contains only sequence regions (except the last entry) -> change sequence ids */ for (i = 0; !had_err && i < gt_array_size(cs->gt_genome_node_buffer); i++) { node = *(GtGenomeNode**) gt_array_get(cs->gt_genome_node_buffer, i); if (gt_genome_node_get_seqid(node)) { if ((changed_seqid = gt_mapping_map_string(cs->chseqids_mapping, gt_str_get(gt_genome_node_get_seqid(node)), err))) { if ((feature_node = gt_feature_node_try_cast(node))) { rval = gt_genome_node_traverse_children(node, changed_seqid, change_sequence_id, true, err); gt_assert(!rval); /* change_sequence_id() is sane */ } else gt_genome_node_change_seqid(node, changed_seqid); gt_str_delete(changed_seqid); } else had_err = -1; } } /* sort them */ if (!had_err) gt_genome_nodes_sort(cs->gt_genome_node_buffer); /* consolidate them */ for (i = 1; !had_err && i + 1 < gt_array_size(cs->gt_genome_node_buffer); i++) { gn_a = gt_array_get(cs->gt_genome_node_buffer, i-1); gn_b = gt_array_get(cs->gt_genome_node_buffer, i); if (gt_genome_nodes_are_equal_region_nodes(*gn_a, *gn_b)) { gt_region_node_consolidate(gt_region_node_cast(*gn_b), gt_region_node_cast(*gn_a)); gt_genome_node_delete(*gn_a); *gn_a = NULL; } } cs->sequence_regions_processed = true; } /* return non-null nodes from buffer */ while (!had_err && cs->buffer_index < gt_array_size(cs->gt_genome_node_buffer)) { node = *(GtGenomeNode**) gt_array_get(cs->gt_genome_node_buffer, cs->buffer_index); cs->buffer_index++; if (node) { *gn = node; return had_err; } } if (!had_err) had_err = gt_node_stream_next(cs->in_stream, gn, err); if (!had_err && *gn) { if (gt_genome_node_get_seqid(*gn)) { changed_seqid = gt_mapping_map_string(cs->chseqids_mapping, gt_str_get(gt_genome_node_get_seqid(*gn)), err); gt_assert(changed_seqid); /* is always defined, because an undefined mapping would be catched earlier */ if ((feature_node = gt_feature_node_try_cast(*gn))) { rval = gt_genome_node_traverse_children(*gn, changed_seqid, change_sequence_id, true, err); gt_assert(!rval); /* change_sequence_id() is sane */ } else gt_genome_node_change_seqid(*gn, changed_seqid); gt_str_delete(changed_seqid); } } return had_err; }
static int cluster_annotate_nodes(GtClusteredSet *cs, GtEncseq *encseq, const char *feature, GtArray *nodes, GtError *err) { GtFeatureNodeIterator *fni; GtFeatureNode *curnode = NULL, *tmp; GtClusteredSetIterator *csi = NULL; GtGenomeNode *gn; GtHashmap *desc2node; GtStr *seqid = NULL; int had_err = 0; unsigned long num_of_clusters, i, elm; const char *fnt = NULL; char buffer[BUFSIZ], *real_feature; gt_error_check(err); if ((strcmp(feature, "lLTR") == 0) || (strcmp(feature, "rLTR") == 0)) real_feature = gt_cstr_dup(gt_ft_long_terminal_repeat); else real_feature = gt_cstr_dup(feature); desc2node = gt_hashmap_new(GT_HASH_STRING, free_hash, NULL); for (i = 0; i < gt_array_size(nodes); i++) { gn = *(GtGenomeNode**) gt_array_get(nodes, i); if (gt_feature_node_try_cast(gn) == NULL) continue; fni = gt_feature_node_iterator_new((GtFeatureNode*) gn); while ((curnode = gt_feature_node_iterator_next(fni)) != NULL) { char header[BUFSIZ]; fnt = gt_feature_node_get_type(curnode); if (strcmp(fnt, gt_ft_repeat_region) == 0) { const char *rid; unsigned long id; seqid = gt_genome_node_get_seqid((GtGenomeNode*) curnode); rid = gt_feature_node_get_attribute(curnode, "ID"); (void) sscanf(rid, "repeat_region%lu", &id); (void) snprintf(buffer, BUFSIZ, "%s_%lu", gt_str_get(seqid), id); } else if (strcmp(fnt, gt_ft_protein_match) == 0) { GtRange range; const char *attr; attr = gt_feature_node_get_attribute(curnode, "name"); if (!attr) continue; if (strcmp(feature, attr) != 0) continue; range = gt_genome_node_get_range((GtGenomeNode*) curnode); if ((range.end - range.start + 1) < 10UL) continue; (void) snprintf(header, BUFSIZ, "%s_%lu_%lu", buffer, range.start, range.end); gt_hashmap_add(desc2node, (void*) gt_cstr_dup(header), (void*) curnode); } else if (strcmp(fnt, real_feature) == 0) { GtRange range; range = gt_genome_node_get_range((GtGenomeNode*) curnode); if ((range.end - range.start + 1) < 10UL) continue; (void) snprintf(header, BUFSIZ, "%s_%lu_%lu", buffer, range.start, range.end); gt_hashmap_add(desc2node, (void*) gt_cstr_dup(header), (void*) curnode); } } gt_feature_node_iterator_delete(fni); } gt_free(real_feature); num_of_clusters = gt_clustered_set_num_of_clusters(cs, err); for (i = 0; i < num_of_clusters; i++) { csi = gt_clustered_set_get_iterator(cs, i ,err); if (csi != NULL) { while (!had_err && (gt_clustered_set_iterator_next(csi, &elm, err) != GT_CLUSTERED_SET_ITERATOR_STATUS_END)) { char clid[BUFSIZ]; const char *encseqdesc; char *encseqid; unsigned long desclen; encseqdesc = gt_encseq_description(encseq, &desclen, elm); encseqid = gt_calloc((size_t) (desclen + 1), sizeof (char)); (void) strncpy(encseqid, encseqdesc, (size_t) desclen); encseqid[desclen] = '\0'; tmp = (GtFeatureNode*) gt_hashmap_get(desc2node, (void*) encseqid); (void) snprintf(clid, BUFSIZ, "%lu", i); gt_feature_node_set_attribute(tmp, "clid", clid); gt_free(encseqid); } } gt_clustered_set_iterator_delete(csi, err); csi = NULL; } gt_hashmap_delete(desc2node); return had_err; }