Esempio n. 1
0
static void infer_cds_visitor_check_cds_phase(AgnInferCDSVisitor *v)
{
  unsigned long num_cds_feats = gt_array_size(v->cds);
  if(num_cds_feats == 0)
    return;

  GtFeatureNode *cdsf1 = *(GtFeatureNode **)gt_array_get(v->cds, 0);
  GtStrand strand = gt_feature_node_get_strand(cdsf1);
  if(strand == GT_STRAND_REVERSE)
    cdsf1 = *(GtFeatureNode **)gt_array_get(v->cds, num_cds_feats - 1);
  gt_feature_node_set_phase(cdsf1, GT_PHASE_ZERO);
  if(num_cds_feats == 1)
    return;

  unsigned long cds_length = gt_genome_node_get_length((GtGenomeNode *)cdsf1);
  int i;
  if(strand == GT_STRAND_REVERSE)
  {
    for(i = num_cds_feats - 2; i >= 0; i--)
    {
      GtFeatureNode *cds = *(GtFeatureNode **)gt_array_get(v->cds, i);
      int phasenum = cds_length % 3;
      GtPhase phase = GT_PHASE_ZERO;
      if(phasenum == 1)
        phase = GT_PHASE_TWO;
      else if(phasenum == 2)
        phase = GT_PHASE_ONE;
      gt_feature_node_set_phase(cds, phase);
      cds_length += gt_genome_node_get_length((GtGenomeNode *)cds);
    }
  }
  else
  {
    for(i = 1; i < num_cds_feats; i++)
    {
      GtFeatureNode *cds = *(GtFeatureNode **)gt_array_get(v->cds, i);
      int phasenum = cds_length % 3;
      GtPhase phase = GT_PHASE_ZERO;
      if(phasenum == 1)
        phase = GT_PHASE_TWO;
      else if(phasenum == 2)
        phase = GT_PHASE_ONE;
      gt_feature_node_set_phase(cds, phase);
      cds_length += gt_genome_node_get_length((GtGenomeNode *)cds);
    }
  }
}
Esempio n. 2
0
static int check_cds_phases(GtArray *cds_features, GtCDSCheckVisitor *v,
                            bool is_multi, bool second_pass, GtError *err)
{
  GtPhase current_phase, correct_phase = GT_PHASE_ZERO;
  GtFeatureNode *fn;
  GtStrand strand;
  unsigned long i, current_length;
  int had_err = 0;
  gt_error_check(err);
  gt_assert(cds_features);
  gt_assert(gt_array_size(cds_features));
  fn = *(GtFeatureNode**) gt_array_get_first(cds_features);
  strand = gt_feature_node_get_strand(fn);
  if (strand == GT_STRAND_REVERSE)
    gt_array_reverse(cds_features);
  for (i = 0; !had_err && i < gt_array_size(cds_features); i++) {
    fn = *(GtFeatureNode**) gt_array_get(cds_features, i);
    /* the first phase can be anything (except being undefined), because the
       GFF3 spec says:

       NOTE 4 - CDS features MUST have have a defined phase field. Otherwise it
       is not possible to infer the correct polypeptides corresponding to
       partially annotated genes. */
    if ((!i && gt_feature_node_get_phase(fn) == GT_PHASE_UNDEFINED) ||
        (i && gt_feature_node_get_phase(fn) != correct_phase)) {
      if (gt_hashmap_get(v->cds_features, fn)) {
        if (v->tidy && !is_multi && !gt_feature_node_has_children(fn)) {
          /* we can split the feature */
          gt_warning("%s feature on line %u in file \"%s\" has multiple "
                     "parents which require different phases; split feature",
                     gt_ft_CDS,
                     gt_genome_node_get_line_number((GtGenomeNode*) fn),
                     gt_genome_node_get_filename((GtGenomeNode*) fn));
          gt_hashmap_add(v->cds_features_to_split, fn, fn);
          v->splitting_is_necessary = true; /* split later */
        }
        else {
          gt_error_set(err, "%s feature on line %u in file \"%s\" has multiple "
                       "parents which require different phases",
                       gt_ft_CDS,
                       gt_genome_node_get_line_number((GtGenomeNode*) fn),
                       gt_genome_node_get_filename((GtGenomeNode*) fn));
          had_err = -1;
        }
      }
      else {
        if (v->tidy) {
          if (!second_pass) {
            gt_warning("%s feature on line %u in file \"%s\" has the wrong "
                       "phase %c -> correcting it to %c", gt_ft_CDS,
                       gt_genome_node_get_line_number((GtGenomeNode*) fn),
                       gt_genome_node_get_filename((GtGenomeNode*) fn),
                       GT_PHASE_CHARS[gt_feature_node_get_phase(fn)],
                       GT_PHASE_CHARS[correct_phase]);
          }
          gt_feature_node_set_phase(fn, correct_phase);
        }
        else {
          gt_error_set(err, "%s feature on line %u in file \"%s\" has the "
                       "wrong phase %c (should be %c)", gt_ft_CDS,
                       gt_genome_node_get_line_number((GtGenomeNode*) fn),
                       gt_genome_node_get_filename((GtGenomeNode*) fn),
                       GT_PHASE_CHARS[gt_feature_node_get_phase(fn)],
                       GT_PHASE_CHARS[correct_phase]);
          had_err = -1;
        }
      }
    }
    if (!had_err) {
      current_phase = gt_feature_node_get_phase(fn);
      current_length = gt_genome_node_get_length((GtGenomeNode*) fn);
      correct_phase = (3 - (current_length - current_phase) % 3) % 3;
      gt_hashmap_add(v->cds_features, fn, fn); /* record CDS feature */
    }
  }
  return had_err;
}