Beispiel #1
0
GtGenomeNode* gt_feature_node_new(GtStr *seqid, const char *type,
                                  GtUword start, GtUword end,
                                  GtStrand strand)
{
  GtGenomeNode *gn;
  GtFeatureNode *fn;
  gt_assert(seqid && type);
  gt_assert(start <= end);
  gn = gt_genome_node_create(gt_feature_node_class());
  fn = gt_feature_node_cast(gn);
  fn->seqid       = gt_str_ref(seqid);
  fn->source      = NULL;
  fn->type        = gt_symbol(type);
  fn->score       = GT_UNDEF_FLOAT;
  fn->range.start = start;
  fn->range.end   = end;
  fn->representative = NULL;
  fn->attributes  = NULL;
  fn->bit_field   = 0;
  fn->bit_field  |= strand << STRAND_OFFSET;
  fn->children    = NULL; /* the children list is create on demand */
  fn->observer    = NULL;
  gt_feature_node_set_phase(fn, GT_PHASE_UNDEFINED);
  set_transcriptfeaturetype(fn, TRANSCRIPT_FEATURE_TYPE_UNDETERMINED);
  set_tree_status(&fn->bit_field, IS_TREE);
  /* the DFS status is set to DFS_WHITE already */
  fn->representative = NULL;
  return gn;
}
Beispiel #2
0
static int feature_index_lua_add_feature_node(lua_State *L)
{
  GtFeatureIndex **fi;
  GtGenomeNode **gn;
  GtFeatureNode *gf;
  GtStr *seqid;
  gt_assert(L);
  fi = check_feature_index(L, 1);
  gn = check_genome_node(L, 2);
  gf = gt_genome_node_cast(gt_feature_node_class(), *gn);
  luaL_argcheck(L, gf, 2, "not a feature node");
  seqid = gt_genome_node_get_seqid(*gn);
  luaL_argcheck(L, seqid, 2, "feature does not have a sequence id");
  luaL_argcheck(L, gt_feature_index_has_seqid(*fi, gt_str_get(seqid)), 2,
                "feature index does not contain corresponding sequence region");
  gt_feature_index_add_feature_node(*fi, gf);
  return 0;
}
Beispiel #3
0
static int extracttarget_from_node(GtGenomeNode *gn, GtStrArray *seqfiles,
                                   GtError *err)
{
  GtFeatureNodeIterator *fni;
  int had_err = 0;
  gt_error_check(err);
  gt_assert(gn && seqfiles);
  if (gt_genome_node_cast(gt_feature_node_class(), gn)) {
    const char *target;
    GtFeatureNode *child;
    fni = gt_feature_node_iterator_new(gt_feature_node_cast(gn));
    while (!had_err && /* XXX remove cast */
           (child = (GtFeatureNode*) gt_feature_node_iterator_next(fni))) {
      if ((target = gt_feature_node_get_attribute(child, "Target")))
        had_err = extracttarget_from_seqfiles(target, seqfiles, err);
    }
    gt_feature_node_iterator_delete(fni);
  }
  return had_err;
}
static int CpGIOverlap_stream_next(GtNodeStream * ns,
                                   GtGenomeNode ** gn,
                                   GtError * err)
{
    GtGenomeNode * cur_node, * next_node;
    GtFeatureNodeIterator * iter;
    int err_num = 0;
    *gn = NULL;
    CpGIOverlap_stream * context;
    const char * gene_name = NULL;
    const char * overlap_name = NULL;
    char  chr_str[255];
    int  chr_num;
    unsigned int TSS;

    float CpGIOverlap;


    context = CpGIOverlap_stream_cast(ns);

    // find the genes, determine expression level
     if(!gt_node_stream_next(context->in_stream,
                            &cur_node,
                            err
                           ) && cur_node != NULL
       )
     {
         *gn = cur_node;

         // try casting as a feature node so we can test type
         if(!gt_genome_node_try_cast(gt_feature_node_class(), cur_node))
         {
               return 0;
         }
         else // we found a feature node
         {
              // first check if it is a pseudo node, if so find the gene in it if available
              if (gt_feature_node_is_pseudo(cur_node))
              {
                  iter = gt_feature_node_iterator_new(cur_node);
                  if (iter == NULL)
                      return;
                  while ((next_node = gt_feature_node_iterator_next(iter)) && !gt_feature_node_has_type(next_node, feature_type_gene));
                  gt_feature_node_iterator_delete(iter);
                  if (NULL == (cur_node = next_node))
                     return 0;
              }


              if(!gt_feature_node_has_type(cur_node, feature_type_gene))
                  return 0;

              // find name of gene
              gene_name = gt_feature_node_get_attribute(cur_node, "Name");

              if (gene_name == NULL)
                  return;

              if ( 1 != sscanf(gt_str_get(gt_genome_node_get_seqid(cur_node)), "Chr%d", &chr_num))
                  return 0;

              TSS = (gt_feature_node_get_strand(cur_node) == GT_STRAND_FORWARD) ? gt_genome_node_get_start(cur_node) : gt_genome_node_get_end(cur_node);

              // now figure out the overlapping gene 
              if (! (overlap_name = CpGIOverlap_stream_find_gene_overlap( context, TSS, chr_num)))
                 return 0;

              // save the score into the node
              gt_feature_node_set_attribute(cur_node, "cpgi_at_tss", overlap_name);
              
              return 0;

         }
     }

    return err_num;
}
static int CpGI_score_stream_next(GtNodeStream * ns,
                                   GtGenomeNode ** gn,
                                   GtError * err)
{
    GtGenomeNode * cur_node;
    int err_num = 0;
    *gn = NULL;
    CpGI_score_stream * score_stream;
    unsigned long island_start;
    unsigned long island_end;
    float island_score;
    int chromosome_num;
    GtStr * seqID_gtstr;
    char *  seqID_str;
    char *  num_cg_str;
    unsigned long num_cg = 0;

    score_stream = CpGI_score_stream_cast(ns);

    // find the CpGI's, process methylome score
     if(!gt_node_stream_next(score_stream->in_stream,
                            &cur_node,
                            err
                           ) && cur_node != NULL
       )
     {
         *gn = cur_node;

         // try casting as a feature node so we can test type
         if(!gt_genome_node_try_cast(gt_feature_node_class(), cur_node))
         {
               return 0;
         }
         else // we found a feature node
         {
              if(!gt_feature_node_has_type(cur_node, feature_type_CpGI))
                  return 0;

              #if DEBUG_SCORE
              printf("found CpGI\n");
              #endif 
 
              island_start = gt_genome_node_get_start(cur_node);
              island_end   = gt_genome_node_get_end(cur_node);

              seqID_gtstr = gt_genome_node_get_seqid(cur_node);
              seqID_str   = gt_str_get(seqID_gtstr);
              sscanf(seqID_str, "Chr%d", &chromosome_num);

              num_cg_str = gt_feature_node_get_attribute(cur_node, "sumcg");
              if (!num_cg_str)
                 return 0;
              
              sscanf(num_cg_str, "%d", &num_cg);             

              // now figure out the score
              island_score = CpGI_score_stream_score_island(score_stream ,
                                                            chromosome_num,
                                                            num_cg,
                                                            island_start,
                                                            island_end);
//              gt_str_delete(seqID_gtstr);

              // save the score into the node
              gt_feature_node_set_score(cur_node, island_score);
              
              return 0;

         }
     }

    return err_num;
}
static int gt_extract_feature_sequence_generic(GtStr *sequence,
                                GtGenomeNode *gn,
                                const char *type, bool join, GtStr *seqid,
                                GtStrArray *target_ids,
                                unsigned int *out_phase_offset,
                                GtRegionMapping *region_mapping, GtError *err)
{
  GtFeatureNode *fn;
  GtRange range;
  unsigned int phase_offset = 0;
  char *outsequence;
  const char *target;
  int had_err = 0;

  gt_error_check(err);
  fn = gt_genome_node_cast(gt_feature_node_class(), gn);
  gt_assert(fn);

  if (seqid)
    gt_str_append_str(seqid, gt_genome_node_get_seqid(gn));
  if (target_ids &&
      (target = gt_feature_node_get_attribute(fn, GT_GFF_TARGET))) {
    had_err = gt_gff3_parser_parse_all_target_attributes(target, false,
                                                         target_ids, NULL,
                                                         NULL, "", 0, err);
  }
  if (!had_err) {
    if (join) {
      GtFeatureNodeIterator *fni;
      GtFeatureNode *child;
      bool reverse_strand = false,
           first_child = true,
           first_child_of_type_seen = false;
      GtPhase phase = GT_PHASE_UNDEFINED;
      /* in this case we have to traverse the children */
      fni = gt_feature_node_iterator_new_direct(gt_feature_node_cast(gn));
      while (!had_err && (child = gt_feature_node_iterator_next(fni))) {
        if (first_child) {
          if (target_ids &&
               (target = gt_feature_node_get_attribute(child, GT_GFF_TARGET))) {
            gt_str_array_reset(target_ids);
            had_err = gt_gff3_parser_parse_all_target_attributes(target, false,
                                                                 target_ids,
                                                                 NULL,
                                                                 NULL, "", 0,
                                                                 err);
          }
          first_child = false;
        }
        if (!had_err) {
          if (extract_join_feature((GtGenomeNode*) child, type, region_mapping,
                                   sequence, &reverse_strand,
                                   &first_child_of_type_seen,
                                   &phase, err)) {
            had_err = -1;
          }
          if (phase != GT_PHASE_UNDEFINED) {
            phase_offset = (int) phase;
          }
        }
      }
      gt_feature_node_iterator_delete(fni);
      gt_assert(phase_offset <= (unsigned int) GT_PHASE_UNDEFINED);
      if (!had_err && gt_str_length(sequence)) {
        if (reverse_strand) {
          had_err = gt_reverse_complement(gt_str_get(sequence),
                                          gt_str_length(sequence), err);
        }
      }
    }
    else if (gt_feature_node_get_type(fn) == type) {
      GtPhase phase = gt_feature_node_get_phase(fn);
      gt_assert(!had_err);
      if (phase != GT_PHASE_UNDEFINED)
        phase_offset = (unsigned int) phase;
      /* otherwise we only have to look at this feature */
      range = gt_genome_node_get_range(gn);
      gt_assert(range.start); /* 1-based coordinates */
      had_err = gt_region_mapping_get_sequence(region_mapping, &outsequence,
                                               gt_genome_node_get_seqid(gn),
                                               range.start, range.end, err);
      if (!had_err) {
        gt_str_append_cstr_nt(sequence, outsequence, gt_range_length(&range));
        gt_free(outsequence);
        if (gt_feature_node_get_strand(fn) == GT_STRAND_REVERSE) {
          had_err = gt_reverse_complement(gt_str_get(sequence),
                                          gt_str_length(sequence), err);
        }
      }
    }
  }
  if (out_phase_offset && phase_offset != GT_PHASE_UNDEFINED) {
    *out_phase_offset = phase_offset;
  }
  return had_err;
}