Exemple #1
0
static int gff3_visitor_region_node(GtNodeVisitor *nv, GtRegionNode *rn,
                                    GT_UNUSED GtError *err)
{
  GtGFF3Visitor *gff3_visitor;
  gt_error_check(err);
  gff3_visitor = gff3_visitor_cast(nv);
  gt_assert(nv && rn);
  gff3_version_string(nv);
  if (!gff3_visitor->outstr) {
    gt_file_xprintf(gff3_visitor->outfp, "%s   %s "GT_WU" "GT_WU"\n",
                    GT_GFF_SEQUENCE_REGION,
                    gt_str_get(gt_genome_node_get_seqid((GtGenomeNode*) rn)),
                    gt_genome_node_get_start((GtGenomeNode*) rn),
                    gt_genome_node_get_end((GtGenomeNode*) rn));
  } else {
    gt_str_append_cstr(gff3_visitor->outstr, GT_GFF_SEQUENCE_REGION);
    gt_str_append_cstr(gff3_visitor->outstr, "   ");
    gt_str_append_cstr(gff3_visitor->outstr,
                      gt_str_get(gt_genome_node_get_seqid((GtGenomeNode*) rn)));
    gt_str_append_char(gff3_visitor->outstr, ' ');
    gt_str_append_ulong(gff3_visitor->outstr,
                                  gt_genome_node_get_start((GtGenomeNode*) rn));
    gt_str_append_char(gff3_visitor->outstr, ' ');
    gt_str_append_ulong(gff3_visitor->outstr,
                                  gt_genome_node_get_end((GtGenomeNode*) rn));
    gt_str_append_char(gff3_visitor->outstr, '\n');
  }
  return 0;
}
Exemple #2
0
void gt_gff3_output_leading_str(GtFeatureNode *fn, GtStr *outstr)
{
  GtGenomeNode *gn;
  gt_assert(fn && outstr);
  gn = (GtGenomeNode*) fn;
  gt_str_append_str(outstr, gt_genome_node_get_seqid(gn));
  gt_str_append_char(outstr, '\t');
  gt_str_append_cstr(outstr, gt_feature_node_get_source(fn));
  gt_str_append_char(outstr, '\t');
  gt_str_append_cstr(outstr, gt_feature_node_get_type(fn));
  gt_str_append_char(outstr, '\t');
  gt_str_append_uword(outstr, gt_genome_node_get_start(gn));
  gt_str_append_char(outstr, '\t');
  gt_str_append_uword(outstr, gt_genome_node_get_end(gn));
  gt_str_append_char(outstr, '\t');
  if (gt_feature_node_score_is_defined(fn)) {
    char buf[BUFSIZ];
    (void) snprintf(buf, BUFSIZ, "%.3g", gt_feature_node_get_score(fn));
    gt_str_append_cstr(outstr, buf);
  } else
    gt_str_append_char(outstr, '.');
  gt_str_append_char(outstr, '\t');
  gt_str_append_char(outstr, GT_STRAND_CHARS[gt_feature_node_get_strand(fn)]);
  gt_str_append_char(outstr, '\t');
  gt_str_append_char(outstr, GT_PHASE_CHARS[gt_feature_node_get_phase(fn)]);
  gt_str_append_char(outstr, '\t');
}
Exemple #3
0
static int create_block_features(GtBEDParser *bed_parser, GtFeatureNode *fn,
                                 GtUword block_count,
                                 GtSplitter *size_splitter,
                                 GtSplitter *start_splitter, GtIO *bed_file,
                                 GtError *err)
{
  GtUword i;
  int had_err = 0;
  gt_assert(fn && block_count && size_splitter && start_splitter);
  gt_assert(gt_splitter_size(size_splitter) == block_count);
  gt_assert(gt_splitter_size(start_splitter) == block_count);
  for (i = 0; !had_err && i < block_count; i++) {
    GtUword block_size, block_start, start, end;
    GtGenomeNode *block;
    const char *name;
    if (gt_parse_uword(&block_size, gt_splitter_get_token(size_splitter, i))) {
      gt_error_set(err,
                   "file \"%s\": line "GT_WU": could not parse blockSize '%s'",
                   gt_io_get_filename(bed_file),
                   gt_io_get_line_number(bed_file),
                   gt_splitter_get_token(size_splitter, i));
      had_err = -1;
    }
    if (!had_err && gt_parse_uword(&block_start,
                                   gt_splitter_get_token(start_splitter, i))) {
      gt_error_set(err, "file \"%s\": line "GT_WU": could not parse blockStart "
                   "'%s'", gt_io_get_filename(bed_file),
                   gt_io_get_line_number(bed_file),
                   gt_splitter_get_token(start_splitter, i));
      had_err = -1;
    }
    if (!had_err) {
      start = gt_genome_node_get_start((GtGenomeNode*) fn) + block_start;
      end = start + block_size - 1;
      block = gt_feature_node_new(gt_genome_node_get_seqid((GtGenomeNode*) fn),
                                  bed_parser->block_type
                                  ? bed_parser->block_type
                                  : BED_BLOCK_TYPE,
                                  start, end, gt_feature_node_get_strand(fn));
      if ((name = gt_feature_node_get_attribute(fn, GT_GFF_NAME))) {
        gt_feature_node_add_attribute((GtFeatureNode*) block, GT_GFF_NAME,
                                      name);
      }
      gt_feature_node_set_score((GtFeatureNode*) block,
                                gt_feature_node_get_score(fn));
      gt_feature_node_set_strand((GtFeatureNode*) block,
                                 gt_feature_node_get_strand(fn));
      gt_feature_node_add_child(fn, (GtFeatureNode*) block);
    }
  }
  return had_err;
}
static void infer_cds_visitor_set_utrs(AgnInferCDSVisitor *v)
{
  GtGenomeNode **start;
  GtUword i, cds_start;

  if(!v->starts || gt_array_size(v->starts) != 1)
    return;
  start = gt_array_get(v->starts, 0);
  cds_start = gt_genome_node_get_start(*start);

  for(i = 0; i < gt_array_size(v->utrs); i++)
  {
    GtFeatureNode *utr = *(GtFeatureNode **)gt_array_get(v->utrs, i);
    GtStrand strand = gt_feature_node_get_strand(utr);
    GtUword utr_start = gt_genome_node_get_start((GtGenomeNode *)utr);

    if(!gt_feature_node_has_type(utr, "five_prime_UTR") &&
       !gt_feature_node_has_type(utr, "three_prime_UTR"))
    {
      if(strand == GT_STRAND_FORWARD)
      {
        if(utr_start < cds_start)
          gt_feature_node_set_type(utr, "five_prime_UTR");
        else
          gt_feature_node_set_type(utr, "three_prime_UTR");
      }
      else
      {
        if(utr_start < cds_start)
          gt_feature_node_set_type(utr, "three_prime_UTR");
        else
          gt_feature_node_set_type(utr, "five_prime_UTR");
      }
    }
  }
}
Exemple #5
0
static int gff3_visitor_region_node(GtNodeVisitor *nv, GtRegionNode *rn,
                                    GT_UNUSED GtError *err)
{
  GtGFF3Visitor *gff3_visitor;
  gt_error_check(err);
  gff3_visitor = gff3_visitor_cast(nv);
  gt_assert(nv && rn);
  gff3_version_string(nv);
  gt_file_xprintf(gff3_visitor->outfp, "%s   %s %lu %lu\n",
                  GT_GFF_SEQUENCE_REGION,
                  gt_str_get(gt_genome_node_get_seqid((GtGenomeNode*) rn)),
                  gt_genome_node_get_start((GtGenomeNode*) rn),
                  gt_genome_node_get_end((GtGenomeNode*) rn));
  return 0;
}
Exemple #6
0
void gt_gff3_output_leading(GtFeatureNode *fn, GtFile *outfp)
{
  GtGenomeNode *gn;
  gt_assert(fn);
  gn = (GtGenomeNode*) fn;
  gt_file_xprintf(outfp, "%s\t%s\t%s\t"GT_WU"\t"GT_WU"\t",
                     gt_str_get(gt_genome_node_get_seqid(gn)),
                     gt_feature_node_get_source(fn),
                     gt_feature_node_get_type(fn),
                     gt_genome_node_get_start(gn),
                     gt_genome_node_get_end(gn));
  if (gt_feature_node_score_is_defined(fn))
    gt_file_xprintf(outfp, "%.3g", gt_feature_node_get_score(fn));
  else
    gt_file_xfputc('.', outfp);
  gt_file_xprintf(outfp, "\t%c\t%c\t",
                     GT_STRAND_CHARS[gt_feature_node_get_strand(fn)],
                     GT_PHASE_CHARS[gt_feature_node_get_phase(fn)]);
}
static int CpGIOverlap_stream_next(GtNodeStream * ns,
                                   GtGenomeNode ** gn,
                                   GtError * err)
{
    GtGenomeNode * cur_node, * next_node;
    GtFeatureNodeIterator * iter;
    int err_num = 0;
    *gn = NULL;
    CpGIOverlap_stream * context;
    const char * gene_name = NULL;
    const char * overlap_name = NULL;
    char  chr_str[255];
    int  chr_num;
    unsigned int TSS;

    float CpGIOverlap;


    context = CpGIOverlap_stream_cast(ns);

    // find the genes, determine expression level
     if(!gt_node_stream_next(context->in_stream,
                            &cur_node,
                            err
                           ) && cur_node != NULL
       )
     {
         *gn = cur_node;

         // try casting as a feature node so we can test type
         if(!gt_genome_node_try_cast(gt_feature_node_class(), cur_node))
         {
               return 0;
         }
         else // we found a feature node
         {
              // first check if it is a pseudo node, if so find the gene in it if available
              if (gt_feature_node_is_pseudo(cur_node))
              {
                  iter = gt_feature_node_iterator_new(cur_node);
                  if (iter == NULL)
                      return;
                  while ((next_node = gt_feature_node_iterator_next(iter)) && !gt_feature_node_has_type(next_node, feature_type_gene));
                  gt_feature_node_iterator_delete(iter);
                  if (NULL == (cur_node = next_node))
                     return 0;
              }


              if(!gt_feature_node_has_type(cur_node, feature_type_gene))
                  return 0;

              // find name of gene
              gene_name = gt_feature_node_get_attribute(cur_node, "Name");

              if (gene_name == NULL)
                  return;

              if ( 1 != sscanf(gt_str_get(gt_genome_node_get_seqid(cur_node)), "Chr%d", &chr_num))
                  return 0;

              TSS = (gt_feature_node_get_strand(cur_node) == GT_STRAND_FORWARD) ? gt_genome_node_get_start(cur_node) : gt_genome_node_get_end(cur_node);

              // now figure out the overlapping gene 
              if (! (overlap_name = CpGIOverlap_stream_find_gene_overlap( context, TSS, chr_num)))
                 return 0;

              // save the score into the node
              gt_feature_node_set_attribute(cur_node, "cpgi_at_tss", overlap_name);
              
              return 0;

         }
     }

    return err_num;
}
static int CpGI_score_stream_next(GtNodeStream * ns,
                                   GtGenomeNode ** gn,
                                   GtError * err)
{
    GtGenomeNode * cur_node;
    int err_num = 0;
    *gn = NULL;
    CpGI_score_stream * score_stream;
    unsigned long island_start;
    unsigned long island_end;
    float island_score;
    int chromosome_num;
    GtStr * seqID_gtstr;
    char *  seqID_str;
    char *  num_cg_str;
    unsigned long num_cg = 0;

    score_stream = CpGI_score_stream_cast(ns);

    // find the CpGI's, process methylome score
     if(!gt_node_stream_next(score_stream->in_stream,
                            &cur_node,
                            err
                           ) && cur_node != NULL
       )
     {
         *gn = cur_node;

         // try casting as a feature node so we can test type
         if(!gt_genome_node_try_cast(gt_feature_node_class(), cur_node))
         {
               return 0;
         }
         else // we found a feature node
         {
              if(!gt_feature_node_has_type(cur_node, feature_type_CpGI))
                  return 0;

              #if DEBUG_SCORE
              printf("found CpGI\n");
              #endif 
 
              island_start = gt_genome_node_get_start(cur_node);
              island_end   = gt_genome_node_get_end(cur_node);

              seqID_gtstr = gt_genome_node_get_seqid(cur_node);
              seqID_str   = gt_str_get(seqID_gtstr);
              sscanf(seqID_str, "Chr%d", &chromosome_num);

              num_cg_str = gt_feature_node_get_attribute(cur_node, "sumcg");
              if (!num_cg_str)
                 return 0;
              
              sscanf(num_cg_str, "%d", &num_cg);             

              // now figure out the score
              island_score = CpGI_score_stream_score_island(score_stream ,
                                                            chromosome_num,
                                                            num_cg,
                                                            island_start,
                                                            island_end);
//              gt_str_delete(seqID_gtstr);

              // save the score into the node
              gt_feature_node_set_score(cur_node, island_score);
              
              return 0;

         }
     }

    return err_num;
}