int splicing_gff_exon_start_end(const splicing_gff_t *gff, splicing_vector_int_t *start, splicing_vector_int_t *end, splicing_vector_int_t *idx, int gene) { size_t noiso; int i=0, p=0, n=splicing_gff_size(gff); int pos; size_t nogenes; splicing_vector_int_t tmp, tmp2; SPLICING_CHECK(splicing_vector_int_init(&tmp, 10)); SPLICING_FINALLY(splicing_vector_int_destroy, &tmp); SPLICING_CHECK(splicing_vector_int_init(&tmp2, 10)); SPLICING_FINALLY(splicing_vector_int_destroy, &tmp2); SPLICING_CHECK(splicing_gff_nogenes(gff, &nogenes)); if (gene < 0 || gene >= nogenes) { SPLICING_ERROR("Invalid gene id", SPLICING_EINVAL); } pos=VECTOR(gff->genes)[gene]+1; SPLICING_CHECK(splicing_gff_noiso_one(gff, gene, &noiso)); splicing_vector_int_clear(start); splicing_vector_int_clear(end); SPLICING_CHECK(splicing_vector_int_resize(idx, noiso+1)); while (pos < n) { if (VECTOR(gff->type)[pos] == SPLICING_TYPE_EXON) { int s=VECTOR(gff->start)[pos]; int e=VECTOR(gff->end)[pos]; SPLICING_CHECK(splicing_vector_int_push_back(start, s)); p++; SPLICING_CHECK(splicing_vector_int_push_back(end, e)); } else if (VECTOR(gff->type)[pos] == SPLICING_TYPE_MRNA) { VECTOR(*idx)[i] = p; if (i!=0) { SPLICING_CHECK(splicing_i_gff_exon_start_end_sort(start, end, idx, i-1, &tmp, &tmp2)); } i++; } else if (VECTOR(gff->type)[pos] == SPLICING_TYPE_GENE) { break; } pos++; } VECTOR(*idx)[i] = p; SPLICING_CHECK(splicing_i_gff_exon_start_end_sort(start, end, idx, i-1, &tmp, &tmp2)); splicing_vector_int_destroy(&tmp2); splicing_vector_int_destroy(&tmp); SPLICING_FINALLY_CLEAN(1); return 0; }
int splicing_i_gff_constitutive_exons_all(const splicing_gff_t *gff, splicing_exonset_t *exons, int min_length) { size_t g, nogenes; splicing_vector_int_t events; SPLICING_CHECK(splicing_gff_nogenes(gff, &nogenes)); SPLICING_CHECK(splicing_exonset_init(exons, 0)); SPLICING_FINALLY(splicing_exonset_destroy, exons); SPLICING_CHECK(splicing_vector_int_init(&events, 0)); SPLICING_FINALLY(splicing_vector_int_destroy, &events); for (g=0; g<nogenes; g++) { const char *seqid= splicing_strvector_get(&gff->seqids, VECTOR(gff->seqid)[g]); int noex, idx, noEvents, i; size_t noiso; int start=VECTOR(gff->genes)[g]; int end= g+1 < nogenes ? VECTOR(gff->genes)[g+1] : gff->n; splicing_gff_noiso_one(gff, g, &noiso); /* Collect and sort all events */ splicing_vector_int_clear(&events); for (idx=start+1; idx<end; idx++) { if (VECTOR(gff->type)[idx] == SPLICING_TYPE_EXON) { SPLICING_CHECK(splicing_vector_int_push_back2 (&events, VECTOR(gff->start)[idx], -VECTOR(gff->end)[idx])); } } noEvents=splicing_vector_int_size(&events); splicing_qsort(VECTOR(events), noEvents, sizeof(int), splicing_i_const_cmp); /* Now go over the sorted events and extract the constitutive exons */ for (noex=0, i=0; i<noEvents; i++) { int ev=VECTOR(events)[i]; if (ev > 0) { noex++; } if (ev < 0) { int prev=VECTOR(events)[i-1]; if (noex == noiso && (-ev)-prev+1 >= min_length) { /* constitutive exon */ SPLICING_CHECK(splicing_exonset_append(exons, seqid, prev, -ev)); } noex--; } } } splicing_vector_int_destroy(&events); SPLICING_FINALLY_CLEAN(2); /* + exons */ return 0; }
int splicing_i_gff_constitutive_exons_full(const splicing_gff_t *gff, splicing_exonset_t *exons, int min_length) { size_t g, nogenes; splicing_vector_int_t events; SPLICING_CHECK(splicing_gff_nogenes(gff, &nogenes)); SPLICING_CHECK(splicing_exonset_init(exons, 1)); SPLICING_FINALLY(splicing_exonset_destroy, exons); SPLICING_CHECK(splicing_vector_int_init(&events, 0)); SPLICING_FINALLY(splicing_vector_int_destroy, &events); for (g=0; g<nogenes; g++) { const char *seqid= splicing_strvector_get(&gff->seqids, VECTOR(gff->seqid)[g]); int i, idx, noExons, noSame; size_t noiso; int start=VECTOR(gff->genes)[g]; int end= g+1 < nogenes ? VECTOR(gff->genes)[g+1] : gff->n; splicing_gff_noiso_one(gff, g, &noiso); /* Collect and sort all events */ splicing_vector_int_clear(&events); for (idx=start+1; idx<end; idx++) { if (VECTOR(gff->type)[idx] == SPLICING_TYPE_EXON) { SPLICING_CHECK(splicing_vector_int_push_back2 (&events, VECTOR(gff->start)[idx], VECTOR(gff->end)[idx])); } } noExons=splicing_vector_int_size(&events)/2; splicing_qsort(VECTOR(events), noExons, sizeof(int)*2, splicing_i_const_cmp2); /* Now go over them and check how many times each exon appears */ for (noSame=1, i=2; i<noExons*2; i+=2) { int start=VECTOR(events)[i]; int end=VECTOR(events)[i+1]; if (start == VECTOR(events)[i-2] && VECTOR(events)[i-1] == end) { noSame++; } else { noSame=1; } if (noSame == noiso) { SPLICING_CHECK(splicing_exonset_append(exons, seqid, start, end)); } } } splicing_vector_int_destroy(&events); SPLICING_FINALLY_CLEAN(2); /* + exons */ return 0; }