static int gff3_visitor_region_node(GtNodeVisitor *nv, GtRegionNode *rn, GT_UNUSED GtError *err) { GtGFF3Visitor *gff3_visitor; gt_error_check(err); gff3_visitor = gff3_visitor_cast(nv); gt_assert(nv && rn); gff3_version_string(nv); if (!gff3_visitor->outstr) { gt_file_xprintf(gff3_visitor->outfp, "%s %s "GT_WU" "GT_WU"\n", GT_GFF_SEQUENCE_REGION, gt_str_get(gt_genome_node_get_seqid((GtGenomeNode*) rn)), gt_genome_node_get_start((GtGenomeNode*) rn), gt_genome_node_get_end((GtGenomeNode*) rn)); } else { gt_str_append_cstr(gff3_visitor->outstr, GT_GFF_SEQUENCE_REGION); gt_str_append_cstr(gff3_visitor->outstr, " "); gt_str_append_cstr(gff3_visitor->outstr, gt_str_get(gt_genome_node_get_seqid((GtGenomeNode*) rn))); gt_str_append_char(gff3_visitor->outstr, ' '); gt_str_append_ulong(gff3_visitor->outstr, gt_genome_node_get_start((GtGenomeNode*) rn)); gt_str_append_char(gff3_visitor->outstr, ' '); gt_str_append_ulong(gff3_visitor->outstr, gt_genome_node_get_end((GtGenomeNode*) rn)); gt_str_append_char(gff3_visitor->outstr, '\n'); } return 0; }
void gt_gff3_output_leading_str(GtFeatureNode *fn, GtStr *outstr) { GtGenomeNode *gn; gt_assert(fn && outstr); gn = (GtGenomeNode*) fn; gt_str_append_str(outstr, gt_genome_node_get_seqid(gn)); gt_str_append_char(outstr, '\t'); gt_str_append_cstr(outstr, gt_feature_node_get_source(fn)); gt_str_append_char(outstr, '\t'); gt_str_append_cstr(outstr, gt_feature_node_get_type(fn)); gt_str_append_char(outstr, '\t'); gt_str_append_uword(outstr, gt_genome_node_get_start(gn)); gt_str_append_char(outstr, '\t'); gt_str_append_uword(outstr, gt_genome_node_get_end(gn)); gt_str_append_char(outstr, '\t'); if (gt_feature_node_score_is_defined(fn)) { char buf[BUFSIZ]; (void) snprintf(buf, BUFSIZ, "%.3g", gt_feature_node_get_score(fn)); gt_str_append_cstr(outstr, buf); } else gt_str_append_char(outstr, '.'); gt_str_append_char(outstr, '\t'); gt_str_append_char(outstr, GT_STRAND_CHARS[gt_feature_node_get_strand(fn)]); gt_str_append_char(outstr, '\t'); gt_str_append_char(outstr, GT_PHASE_CHARS[gt_feature_node_get_phase(fn)]); gt_str_append_char(outstr, '\t'); }
static int create_block_features(GtBEDParser *bed_parser, GtFeatureNode *fn, GtUword block_count, GtSplitter *size_splitter, GtSplitter *start_splitter, GtIO *bed_file, GtError *err) { GtUword i; int had_err = 0; gt_assert(fn && block_count && size_splitter && start_splitter); gt_assert(gt_splitter_size(size_splitter) == block_count); gt_assert(gt_splitter_size(start_splitter) == block_count); for (i = 0; !had_err && i < block_count; i++) { GtUword block_size, block_start, start, end; GtGenomeNode *block; const char *name; if (gt_parse_uword(&block_size, gt_splitter_get_token(size_splitter, i))) { gt_error_set(err, "file \"%s\": line "GT_WU": could not parse blockSize '%s'", gt_io_get_filename(bed_file), gt_io_get_line_number(bed_file), gt_splitter_get_token(size_splitter, i)); had_err = -1; } if (!had_err && gt_parse_uword(&block_start, gt_splitter_get_token(start_splitter, i))) { gt_error_set(err, "file \"%s\": line "GT_WU": could not parse blockStart " "'%s'", gt_io_get_filename(bed_file), gt_io_get_line_number(bed_file), gt_splitter_get_token(start_splitter, i)); had_err = -1; } if (!had_err) { start = gt_genome_node_get_start((GtGenomeNode*) fn) + block_start; end = start + block_size - 1; block = gt_feature_node_new(gt_genome_node_get_seqid((GtGenomeNode*) fn), bed_parser->block_type ? bed_parser->block_type : BED_BLOCK_TYPE, start, end, gt_feature_node_get_strand(fn)); if ((name = gt_feature_node_get_attribute(fn, GT_GFF_NAME))) { gt_feature_node_add_attribute((GtFeatureNode*) block, GT_GFF_NAME, name); } gt_feature_node_set_score((GtFeatureNode*) block, gt_feature_node_get_score(fn)); gt_feature_node_set_strand((GtFeatureNode*) block, gt_feature_node_get_strand(fn)); gt_feature_node_add_child(fn, (GtFeatureNode*) block); } } return had_err; }
static void infer_cds_visitor_set_utrs(AgnInferCDSVisitor *v) { GtGenomeNode **start; GtUword i, cds_start; if(!v->starts || gt_array_size(v->starts) != 1) return; start = gt_array_get(v->starts, 0); cds_start = gt_genome_node_get_start(*start); for(i = 0; i < gt_array_size(v->utrs); i++) { GtFeatureNode *utr = *(GtFeatureNode **)gt_array_get(v->utrs, i); GtStrand strand = gt_feature_node_get_strand(utr); GtUword utr_start = gt_genome_node_get_start((GtGenomeNode *)utr); if(!gt_feature_node_has_type(utr, "five_prime_UTR") && !gt_feature_node_has_type(utr, "three_prime_UTR")) { if(strand == GT_STRAND_FORWARD) { if(utr_start < cds_start) gt_feature_node_set_type(utr, "five_prime_UTR"); else gt_feature_node_set_type(utr, "three_prime_UTR"); } else { if(utr_start < cds_start) gt_feature_node_set_type(utr, "three_prime_UTR"); else gt_feature_node_set_type(utr, "five_prime_UTR"); } } } }
static int gff3_visitor_region_node(GtNodeVisitor *nv, GtRegionNode *rn, GT_UNUSED GtError *err) { GtGFF3Visitor *gff3_visitor; gt_error_check(err); gff3_visitor = gff3_visitor_cast(nv); gt_assert(nv && rn); gff3_version_string(nv); gt_file_xprintf(gff3_visitor->outfp, "%s %s %lu %lu\n", GT_GFF_SEQUENCE_REGION, gt_str_get(gt_genome_node_get_seqid((GtGenomeNode*) rn)), gt_genome_node_get_start((GtGenomeNode*) rn), gt_genome_node_get_end((GtGenomeNode*) rn)); return 0; }
void gt_gff3_output_leading(GtFeatureNode *fn, GtFile *outfp) { GtGenomeNode *gn; gt_assert(fn); gn = (GtGenomeNode*) fn; gt_file_xprintf(outfp, "%s\t%s\t%s\t"GT_WU"\t"GT_WU"\t", gt_str_get(gt_genome_node_get_seqid(gn)), gt_feature_node_get_source(fn), gt_feature_node_get_type(fn), gt_genome_node_get_start(gn), gt_genome_node_get_end(gn)); if (gt_feature_node_score_is_defined(fn)) gt_file_xprintf(outfp, "%.3g", gt_feature_node_get_score(fn)); else gt_file_xfputc('.', outfp); gt_file_xprintf(outfp, "\t%c\t%c\t", GT_STRAND_CHARS[gt_feature_node_get_strand(fn)], GT_PHASE_CHARS[gt_feature_node_get_phase(fn)]); }
static int CpGIOverlap_stream_next(GtNodeStream * ns, GtGenomeNode ** gn, GtError * err) { GtGenomeNode * cur_node, * next_node; GtFeatureNodeIterator * iter; int err_num = 0; *gn = NULL; CpGIOverlap_stream * context; const char * gene_name = NULL; const char * overlap_name = NULL; char chr_str[255]; int chr_num; unsigned int TSS; float CpGIOverlap; context = CpGIOverlap_stream_cast(ns); // find the genes, determine expression level if(!gt_node_stream_next(context->in_stream, &cur_node, err ) && cur_node != NULL ) { *gn = cur_node; // try casting as a feature node so we can test type if(!gt_genome_node_try_cast(gt_feature_node_class(), cur_node)) { return 0; } else // we found a feature node { // first check if it is a pseudo node, if so find the gene in it if available if (gt_feature_node_is_pseudo(cur_node)) { iter = gt_feature_node_iterator_new(cur_node); if (iter == NULL) return; while ((next_node = gt_feature_node_iterator_next(iter)) && !gt_feature_node_has_type(next_node, feature_type_gene)); gt_feature_node_iterator_delete(iter); if (NULL == (cur_node = next_node)) return 0; } if(!gt_feature_node_has_type(cur_node, feature_type_gene)) return 0; // find name of gene gene_name = gt_feature_node_get_attribute(cur_node, "Name"); if (gene_name == NULL) return; if ( 1 != sscanf(gt_str_get(gt_genome_node_get_seqid(cur_node)), "Chr%d", &chr_num)) return 0; TSS = (gt_feature_node_get_strand(cur_node) == GT_STRAND_FORWARD) ? gt_genome_node_get_start(cur_node) : gt_genome_node_get_end(cur_node); // now figure out the overlapping gene if (! (overlap_name = CpGIOverlap_stream_find_gene_overlap( context, TSS, chr_num))) return 0; // save the score into the node gt_feature_node_set_attribute(cur_node, "cpgi_at_tss", overlap_name); return 0; } } return err_num; }
static int CpGI_score_stream_next(GtNodeStream * ns, GtGenomeNode ** gn, GtError * err) { GtGenomeNode * cur_node; int err_num = 0; *gn = NULL; CpGI_score_stream * score_stream; unsigned long island_start; unsigned long island_end; float island_score; int chromosome_num; GtStr * seqID_gtstr; char * seqID_str; char * num_cg_str; unsigned long num_cg = 0; score_stream = CpGI_score_stream_cast(ns); // find the CpGI's, process methylome score if(!gt_node_stream_next(score_stream->in_stream, &cur_node, err ) && cur_node != NULL ) { *gn = cur_node; // try casting as a feature node so we can test type if(!gt_genome_node_try_cast(gt_feature_node_class(), cur_node)) { return 0; } else // we found a feature node { if(!gt_feature_node_has_type(cur_node, feature_type_CpGI)) return 0; #if DEBUG_SCORE printf("found CpGI\n"); #endif island_start = gt_genome_node_get_start(cur_node); island_end = gt_genome_node_get_end(cur_node); seqID_gtstr = gt_genome_node_get_seqid(cur_node); seqID_str = gt_str_get(seqID_gtstr); sscanf(seqID_str, "Chr%d", &chromosome_num); num_cg_str = gt_feature_node_get_attribute(cur_node, "sumcg"); if (!num_cg_str) return 0; sscanf(num_cg_str, "%d", &num_cg); // now figure out the score island_score = CpGI_score_stream_score_island(score_stream , chromosome_num, num_cg, island_start, island_end); // gt_str_delete(seqID_gtstr); // save the score into the node gt_feature_node_set_score(cur_node, island_score); return 0; } } return err_num; }