GtNodeVisitor* gt_snp_annotator_visitor_new(GtFeatureNode *gene, GtTransTable *trans_table, GtRegionMapping *rmap, GtError *err) { GtNodeVisitor *nv; GtSNPAnnotatorVisitor *sav; gt_assert(gene && gt_feature_node_get_type(gene) == gt_symbol(gt_ft_gene)); nv = gt_node_visitor_create(gt_snp_annotator_visitor_class()); sav = snp_annotator_visitor_cast(nv); sav->gene = (GtFeatureNode*) gt_genome_node_ref((GtGenomeNode*) gene); sav->rmap = gt_region_mapping_ref(rmap); sav->mRNA_type = gt_symbol(gt_ft_mRNA); sav->CDS_type = gt_symbol(gt_ft_CDS); sav->SNV_type = gt_symbol(gt_ft_SNV); sav->SNP_type = gt_symbol(gt_ft_SNP); sav->rnaseqs = gt_hashmap_new(GT_HASH_DIRECT, NULL, gt_free_func); if (trans_table) { sav->tt = trans_table; sav->own_tt = false; } else { sav->tt = gt_trans_table_new_standard(err); sav->own_tt = true; } if (!sav->tt || gt_snp_annotator_visitor_prepare_gene(sav, err) != 0) { gt_node_visitor_delete(nv); return NULL; } return nv; }
GtNodeVisitor* gt_cds_check_visitor_new(void) { GtNodeVisitor *nv = gt_node_visitor_create(gt_cds_check_visitor_class()); GtCDSCheckVisitor *v = cds_check_visitor_cast(nv); v->cds_features = gt_hashmap_new(GT_HASH_DIRECT, NULL, NULL); return nv; }
GtNodeVisitor* gt_stat_visitor_new(bool gene_length_distri, bool gene_score_distri, bool exon_length_distri, bool exon_number_distri, bool intron_length_distri, bool cds_length_distri, bool used_sources) { GtNodeVisitor *nv = gt_node_visitor_create(gt_stat_visitor_class()); GtStatVisitor *sv = stat_visitor_cast(nv); if (gene_length_distri) sv->gene_length_distribution = gt_disc_distri_new(); if (gene_score_distri) sv->gene_score_distribution = gt_disc_distri_new(); if (exon_length_distri) sv->exon_length_distribution = gt_disc_distri_new(); if (exon_number_distri) sv->exon_number_distribution = gt_disc_distri_new(); if (intron_length_distri) sv->intron_length_distribution = gt_disc_distri_new(); if (cds_length_distri) sv->cds_length_distribution = gt_disc_distri_new(); if (used_sources) sv->used_sources = gt_cstr_table_new(); return nv; }
GtNodeVisitor *agn_locus_map_visitor_new(FILE *genefh, FILE *mrnafh) { GtNodeVisitor *nv = gt_node_visitor_create(locus_map_visitor_class()); AgnLocusMapVisitor *v = locus_map_visitor_cast(nv); v->genefh = genefh; v->mrnafh = mrnafh; return nv; }
GtNodeVisitor* gt_regioncov_visitor_new(GtUword max_feature_dist) { GtNodeVisitor *nv = gt_node_visitor_create(gt_regioncov_visitor_class()); GtRegionCovVisitor *regioncov_visitor = gt_regioncov_visitor_cast(nv); regioncov_visitor->max_feature_dist = max_feature_dist; regioncov_visitor->region2rangelist = gt_hashmap_new(GT_HASH_STRING, gt_free_func, (GtFree) gt_array_delete); return nv; }
GtNodeVisitor* gt_md5_to_id_visitor_new(GtRegionMapping *region_mapping) { GtNodeVisitor *nv; GtMD5ToSeqidsVisitor *md5_to_id_visitor; nv = gt_node_visitor_create(gt_md5_to_id_visitor_class()); md5_to_id_visitor = md5_to_id_visitor_cast(nv); md5_to_id_visitor->region_mapping = region_mapping; return nv; }
GtNodeVisitor* gt_gff3_visitor_new_to_str(GtStr *outstr) { GtNodeVisitor *nv = gt_node_visitor_create(gt_gff3_visitor_class()); GtGFF3Visitor *gff3_visitor = gff3_visitor_cast(nv); gt_gff3_visitor_init(gff3_visitor); gff3_visitor->outfp = NULL; gff3_visitor->outstr = gt_str_ref(outstr); return nv; }
GtNodeVisitor* gt_gff3_visitor_new(GtFile *outfp) { GtNodeVisitor *nv = gt_node_visitor_create(gt_gff3_visitor_class()); GtGFF3Visitor *gff3_visitor = gff3_visitor_cast(nv); gt_gff3_visitor_init(gff3_visitor); gff3_visitor->outfp = outfp; gff3_visitor->outstr = NULL; return nv; }
GtNodeVisitor* gt_gtf_visitor_new(GtFile *outfp) { GtNodeVisitor *nv = gt_node_visitor_create(gt_gtf_visitor_class()); GtGTFVisitor *gtf_visitor = gtf_visitor_cast(nv); gtf_visitor->gene_id = 0; gtf_visitor->exon_features = gt_array_new(sizeof (GtGenomeNode*)); gtf_visitor->CDS_features = gt_array_new(sizeof (GtGenomeNode*)); gtf_visitor->outfp = outfp; return nv; }
GtNodeVisitor *agn_infer_cds_visitor_new(GtLogger *logger) { GtNodeVisitor *nv; nv = gt_node_visitor_create(infer_cds_visitor_class()); AgnInferCDSVisitor *v = infer_cds_visitor_cast(nv); v->logger = logger; v->cdscounter = 0; v->source = NULL; return nv; }
GtNodeVisitor* gt_ltr_input_check_visitor_new(void) { GtNodeVisitor *nv = NULL; GtLTRInputCheckVisitor *lv; nv = gt_node_visitor_create(gt_ltr_input_check_visitor_class()); gt_assert(nv); lv = gt_ltr_input_check_visitor_cast(nv); lv->only_ltrs = false; return nv; }
GtNodeVisitor* gt_feature_visitor_new(GtFeatureIndex *fi) { GtNodeVisitor *gv; GtFeatureVisitor *feature_visitor; gt_assert(fi != NULL); gv = gt_node_visitor_create(gt_feature_visitor_class()); feature_visitor = feature_visitor_cast(gv); feature_visitor->feature_index = gt_feature_index_ref(fi); gt_assert(feature_visitor != NULL); return gv; }
static GtNodeVisitor* gt_ltr_visitor_new(GtLTRElement *element) { GtNodeVisitor *nv; GtLTRVisitor *lv; gt_assert(element); nv = gt_node_visitor_create(gt_ltr_visitor_class()); lv = gt_ltr_visitor_cast(nv); lv->element = element; gt_assert(lv); return nv; }
GtNodeVisitor* gt_inter_feature_visitor_new(const char *outside_type, const char *inter_type) { GtInterFeatureVisitor *aiv; GtNodeVisitor *nv; gt_assert(outside_type && inter_type); nv = gt_node_visitor_create(gt_inter_feature_visitor_class()); aiv = gt_inter_feature_visitor_cast(nv); aiv->outside_type = gt_cstr_dup(outside_type); aiv->inter_type = gt_cstr_dup(inter_type); return nv; }
GtNodeVisitor* gt_add_ids_visitor_new(bool ensure_sorting) { GtNodeVisitor *nv = gt_node_visitor_create(gt_add_ids_visitor_class()); GtAddIDsVisitor *add_ids_visitor = add_ids_visitor_cast(nv); add_ids_visitor->node_buffer = gt_queue_new(); add_ids_visitor->defined_seqids = gt_cstr_table_new(); add_ids_visitor->undefined_sequence_regions = gt_hashmap_new(GT_HASH_STRING, NULL, (GtFree) automatic_sequence_region_delete); add_ids_visitor->ensure_sorting = ensure_sorting; return nv; }
GtNodeVisitor* gt_gff3_visitor_new(GtFile *outfp) { GtNodeVisitor *nv = gt_node_visitor_create(gt_gff3_visitor_class()); GtGFF3Visitor *gff3_visitor = gff3_visitor_cast(nv); gff3_visitor->version_string_shown = false; gff3_visitor->fasta_directive_shown = false; gff3_visitor->id_counter = gt_string_distri_new(); gff3_visitor->feature_node_to_id_array = gt_hashmap_new(GT_HASH_DIRECT, NULL, (GtFree) gt_array_delete); gff3_visitor->feature_node_to_unique_id_str = gt_hashmap_new(GT_HASH_DIRECT, NULL, (GtFree) gt_str_delete); gff3_visitor->fasta_width = 0; gff3_visitor->outfp = outfp; gff3_visitor->used_ids = gt_cstr_table_new(); gff3_visitor->retain_ids = false; return nv; }
GtNodeVisitor* gt_orf_finder_visitor_new(GtRegionMapping *rmap, GtHashmap *types, unsigned int min, unsigned int max, bool all, GT_UNUSED GtError *err) { GtNodeVisitor *gv; GtORFFinderVisitor *lv; gv = gt_node_visitor_create(gt_orf_finder_visitor_class()); lv = gt_orf_finder_visitor_cast(gv); gt_assert(lv); lv->rmap = rmap; lv->types=types; lv->min = min; lv->max = max; lv->all = all; return gv; }
GtNodeVisitor* gt_select_visitor_new(GtStr *seqid, GtStr *source, const GtRange *contain_range, const GtRange *overlap_range, GtStrand strand, GtStrand targetstrand, bool has_CDS, unsigned long max_gene_length, unsigned long max_gene_num, double min_gene_score, double max_gene_score, double min_average_splice_site_prob, unsigned long feature_num) { GtNodeVisitor *nv = gt_node_visitor_create(gt_select_visitor_class()); GtSelectVisitor *select_visitor = select_visitor_cast(nv); select_visitor->node_buffer = gt_queue_new(); select_visitor->seqid = gt_str_ref(seqid); select_visitor->source = gt_str_ref(source); if (contain_range) select_visitor->contain_range = *contain_range; else { select_visitor->contain_range.start = GT_UNDEF_ULONG; select_visitor->contain_range.end = GT_UNDEF_ULONG; } if (overlap_range) select_visitor->overlap_range = *overlap_range; else { select_visitor->overlap_range.start = GT_UNDEF_ULONG; select_visitor->overlap_range.end = GT_UNDEF_ULONG; } select_visitor->strand = strand; select_visitor->targetstrand = targetstrand; select_visitor->has_CDS = has_CDS; select_visitor->max_gene_length = max_gene_length; select_visitor->gene_num = 0; select_visitor->max_gene_num = max_gene_num; select_visitor->min_gene_score = min_gene_score; select_visitor->max_gene_score = max_gene_score; select_visitor->min_average_splice_site_prob = min_average_splice_site_prob; select_visitor->feature_num = feature_num; return nv; }
static int custom_visitor_lua_new(lua_State *L) { GtNodeVisitor **gv; GtLuaCustomVisitor *lcv; gt_assert(L); gv = lua_newuserdata(L, sizeof (GtNodeVisitor*)); gt_assert(gv); *gv = gt_node_visitor_create(lua_custom_visitor_class()); gt_assert(*gv); lcv = lua_custom_visitor_cast(*gv); luaL_getmetatable(L, GENOME_VISITOR_METATABLE); lua_setmetatable(L, -2); /* set clean env for this visitor */ lua_newtable(L); lua_setfenv(L, -2); lcv->L = L; return 1; }
GtNodeVisitor* gt_extract_feature_visitor_new(GtRegionMapping *rm, const char *type, bool join, bool translate, bool seqid, bool target, GtUword width, GtFile *outfp) { GtNodeVisitor *nv; GtExtractFeatureVisitor *efv; gt_assert(rm); nv = gt_node_visitor_create(gt_extract_feature_visitor_class()); efv= gt_extract_feature_visitor_cast(nv); efv->type = gt_symbol(type); efv->join = join; efv->translate = translate; efv->seqid = seqid; efv->target = target; efv->fastaseq_counter = 0; efv->region_mapping = rm; efv->width = width; efv->outfp = outfp; return nv; }
GtNodeVisitor* gt_script_wrapper_visitor_new(GtScriptWrapperVisitorCommentNodeFunc cn, GtScriptWrapperVisitorFeatureNodeFunc fn, GtScriptWrapperVisitorRegionNodeFunc rn, GtScriptWrapperVisitorSequenceNodeFunc sn, GtScriptWrapperVisitorMetaNodeFunc mn, GtScriptWrapperVisitorEOFNodeFunc en, GtScriptWrapperVisitorFreeFunc free_func) { GtNodeVisitor *nv; GtScriptWrapperVisitor *swv; nv = gt_node_visitor_create(gt_script_wrapper_visitor_class()); swv = gt_script_wrapper_visitor_cast(nv); swv->comment_node_func = cn; swv->feature_node_func = fn; swv->region_node_func = rn; swv->sequence_node_func = sn; swv->meta_node_func = mn; swv->eof_node_func = en; swv->free_func = free_func; return nv; }
GtNodeVisitor* gt_ltrdigest_pdom_visitor_new(GtPdomModelSet *model, double eval_cutoff, unsigned int chain_max_gap_length, GtPdomCutoff cutoff, GtRegionMapping *rmap, GtError *err) { GtNodeVisitor *nv; GtLTRdigestPdomVisitor *lv; GtStr *cmd; int had_err = 0, i, rval; gt_assert(model && rmap); rval = system("hmmscan -h > /dev/null"); if (rval == -1) { gt_error_set(err, "error executing system(hmmscan)"); return NULL; } #ifndef _WIN32 if (WEXITSTATUS(rval) != 0) { gt_error_set(err, "cannot find the hmmscan executable in PATH"); return NULL; } #else /* XXX */ gt_error_set(err, "hmmscan for Windows not implemented"); return NULL; #endif nv = gt_node_visitor_create(gt_ltrdigest_pdom_visitor_class()); lv = gt_ltrdigest_pdom_visitor_cast(nv); lv->eval_cutoff = eval_cutoff; lv->cutoff = cutoff; lv->chain_max_gap_length = chain_max_gap_length; lv->rmap = rmap; lv->output_all_chains = false; lv->tag = gt_str_new_cstr("GenomeTools"); lv->root_type = gt_symbol(gt_ft_LTR_retrotransposon); for (i = 0; i < 3; i++) { lv->fwd[i] = gt_str_new(); lv->rev[i] = gt_str_new(); } if (!had_err) { cmd = gt_str_new_cstr("hmmscan --cpu "); gt_str_append_uint(cmd, gt_jobs); gt_str_append_cstr(cmd, " "); switch (cutoff) { case GT_PHMM_CUTOFF_GA: gt_str_append_cstr(cmd, "--cut_ga"); break; case GT_PHMM_CUTOFF_TC: gt_str_append_cstr(cmd, "--cut_tc"); break; case GT_PHMM_CUTOFF_NONE: gt_str_append_cstr(cmd, "--domE "); gt_str_append_double(cmd, eval_cutoff, 50); break; } gt_str_append_cstr(cmd, " "); gt_str_append_cstr(cmd, gt_pdom_model_set_get_filename(model)); gt_str_append_cstr(cmd, " -"); lv->cmdline = cmd; lv->args = gt_cstr_split(gt_str_get(lv->cmdline), ' '); gt_log_log("HMMER cmdline: %s", gt_str_get(cmd)); } return nv; }
GtNodeVisitor* agn_gaeval_visitor_new(GtNodeStream *astream, AgnGaevalParams gparams) { agn_assert(astream); // Create the node visitor GtNodeVisitor *nv = gt_node_visitor_create(gaeval_visitor_class()); AgnGaevalVisitor *v = gaeval_visitor_cast(nv); v->alignments = gt_feature_index_memory_new(); v->tsvout = NULL; v->params = gparams; // Check that sum of weights is 1.0 double weights_total = gparams.alpha + gparams.beta + gparams.gamma + gparams.epsilon; if(fabs(weights_total - 1.0) > 0.0001) { fprintf(stderr, "[AgnGaevalVisitor::agn_gaeval_visitor_new] warning: " "sum of weights is not 1.0 %.3lf; integrity calculations will be " "incorrect\n", weights_total); } // Set up node stream to load alignment features into memory GtQueue *streams = gt_queue_new(); GtNodeStream *stream, *last_stream; GtHashmap *typestokeep = gt_hashmap_new(GT_HASH_STRING, NULL, NULL); gt_hashmap_add(typestokeep, "cDNA_match", "cDNA_match"); gt_hashmap_add(typestokeep, "EST_match", "EST_match"); gt_hashmap_add(typestokeep, "nucleotide_match", "nucleotide_match"); stream = agn_filter_stream_new(astream, typestokeep); gt_queue_add(streams, stream); last_stream = stream; stream = gt_feature_out_stream_new(last_stream, v->alignments); gt_queue_add(streams, stream); last_stream = stream; stream = gt_inter_feature_stream_new(last_stream, "cDNA_match", "match_gap"); gt_queue_add(streams, stream); last_stream = stream; stream = gt_inter_feature_stream_new(last_stream, "EST_match", "match_gap"); gt_queue_add(streams, stream); last_stream = stream; stream = gt_inter_feature_stream_new(last_stream, "nucleotide_match", "match_gap"); gt_queue_add(streams, stream); last_stream = stream; // Process the node stream GtError *error = gt_error_new(); int result = gt_node_stream_pull(last_stream, error); if(result == -1) { fprintf(stderr, "[AEGeAn::AgnGaevalStream] error parsing alignments: %s\n", gt_error_get(error)); gt_node_visitor_delete(nv); return NULL; } gt_error_delete(error); gt_hashmap_delete(typestokeep); while(gt_queue_size(streams) > 0) { stream = gt_queue_get(streams); gt_node_stream_delete(stream); } gt_queue_delete(streams); return nv; }
GtNodeVisitor* gt_check_boundaries_visitor_new(void) { GtNodeVisitor *nv; nv = gt_node_visitor_create(gt_check_boundaries_visitor_class()); return nv; }