static void infer_cds_visitor_check_cds_phase(AgnInferCDSVisitor *v) { unsigned long num_cds_feats = gt_array_size(v->cds); if(num_cds_feats == 0) return; GtFeatureNode *cdsf1 = *(GtFeatureNode **)gt_array_get(v->cds, 0); GtStrand strand = gt_feature_node_get_strand(cdsf1); if(strand == GT_STRAND_REVERSE) cdsf1 = *(GtFeatureNode **)gt_array_get(v->cds, num_cds_feats - 1); gt_feature_node_set_phase(cdsf1, GT_PHASE_ZERO); if(num_cds_feats == 1) return; unsigned long cds_length = gt_genome_node_get_length((GtGenomeNode *)cdsf1); int i; if(strand == GT_STRAND_REVERSE) { for(i = num_cds_feats - 2; i >= 0; i--) { GtFeatureNode *cds = *(GtFeatureNode **)gt_array_get(v->cds, i); int phasenum = cds_length % 3; GtPhase phase = GT_PHASE_ZERO; if(phasenum == 1) phase = GT_PHASE_TWO; else if(phasenum == 2) phase = GT_PHASE_ONE; gt_feature_node_set_phase(cds, phase); cds_length += gt_genome_node_get_length((GtGenomeNode *)cds); } } else { for(i = 1; i < num_cds_feats; i++) { GtFeatureNode *cds = *(GtFeatureNode **)gt_array_get(v->cds, i); int phasenum = cds_length % 3; GtPhase phase = GT_PHASE_ZERO; if(phasenum == 1) phase = GT_PHASE_TWO; else if(phasenum == 2) phase = GT_PHASE_ONE; gt_feature_node_set_phase(cds, phase); cds_length += gt_genome_node_get_length((GtGenomeNode *)cds); } } }
static int check_cds_phases(GtArray *cds_features, GtCDSCheckVisitor *v, bool is_multi, bool second_pass, GtError *err) { GtPhase current_phase, correct_phase = GT_PHASE_ZERO; GtFeatureNode *fn; GtStrand strand; unsigned long i, current_length; int had_err = 0; gt_error_check(err); gt_assert(cds_features); gt_assert(gt_array_size(cds_features)); fn = *(GtFeatureNode**) gt_array_get_first(cds_features); strand = gt_feature_node_get_strand(fn); if (strand == GT_STRAND_REVERSE) gt_array_reverse(cds_features); for (i = 0; !had_err && i < gt_array_size(cds_features); i++) { fn = *(GtFeatureNode**) gt_array_get(cds_features, i); /* the first phase can be anything (except being undefined), because the GFF3 spec says: NOTE 4 - CDS features MUST have have a defined phase field. Otherwise it is not possible to infer the correct polypeptides corresponding to partially annotated genes. */ if ((!i && gt_feature_node_get_phase(fn) == GT_PHASE_UNDEFINED) || (i && gt_feature_node_get_phase(fn) != correct_phase)) { if (gt_hashmap_get(v->cds_features, fn)) { if (v->tidy && !is_multi && !gt_feature_node_has_children(fn)) { /* we can split the feature */ gt_warning("%s feature on line %u in file \"%s\" has multiple " "parents which require different phases; split feature", gt_ft_CDS, gt_genome_node_get_line_number((GtGenomeNode*) fn), gt_genome_node_get_filename((GtGenomeNode*) fn)); gt_hashmap_add(v->cds_features_to_split, fn, fn); v->splitting_is_necessary = true; /* split later */ } else { gt_error_set(err, "%s feature on line %u in file \"%s\" has multiple " "parents which require different phases", gt_ft_CDS, gt_genome_node_get_line_number((GtGenomeNode*) fn), gt_genome_node_get_filename((GtGenomeNode*) fn)); had_err = -1; } } else { if (v->tidy) { if (!second_pass) { gt_warning("%s feature on line %u in file \"%s\" has the wrong " "phase %c -> correcting it to %c", gt_ft_CDS, gt_genome_node_get_line_number((GtGenomeNode*) fn), gt_genome_node_get_filename((GtGenomeNode*) fn), GT_PHASE_CHARS[gt_feature_node_get_phase(fn)], GT_PHASE_CHARS[correct_phase]); } gt_feature_node_set_phase(fn, correct_phase); } else { gt_error_set(err, "%s feature on line %u in file \"%s\" has the " "wrong phase %c (should be %c)", gt_ft_CDS, gt_genome_node_get_line_number((GtGenomeNode*) fn), gt_genome_node_get_filename((GtGenomeNode*) fn), GT_PHASE_CHARS[gt_feature_node_get_phase(fn)], GT_PHASE_CHARS[correct_phase]); had_err = -1; } } } if (!had_err) { current_phase = gt_feature_node_get_phase(fn); current_length = gt_genome_node_get_length((GtGenomeNode*) fn); correct_phase = (3 - (current_length - current_phase) % 3) % 3; gt_hashmap_add(v->cds_features, fn, fn); /* record CDS feature */ } } return had_err; }