int splicing_gff_exon_start_end(const splicing_gff_t *gff, splicing_vector_int_t *start, splicing_vector_int_t *end, splicing_vector_int_t *idx, int gene) { size_t noiso; int i=0, p=0, n=splicing_gff_size(gff); int pos; size_t nogenes; splicing_vector_int_t tmp, tmp2; SPLICING_CHECK(splicing_vector_int_init(&tmp, 10)); SPLICING_FINALLY(splicing_vector_int_destroy, &tmp); SPLICING_CHECK(splicing_vector_int_init(&tmp2, 10)); SPLICING_FINALLY(splicing_vector_int_destroy, &tmp2); SPLICING_CHECK(splicing_gff_nogenes(gff, &nogenes)); if (gene < 0 || gene >= nogenes) { SPLICING_ERROR("Invalid gene id", SPLICING_EINVAL); } pos=VECTOR(gff->genes)[gene]+1; SPLICING_CHECK(splicing_gff_noiso_one(gff, gene, &noiso)); splicing_vector_int_clear(start); splicing_vector_int_clear(end); SPLICING_CHECK(splicing_vector_int_resize(idx, noiso+1)); while (pos < n) { if (VECTOR(gff->type)[pos] == SPLICING_TYPE_EXON) { int s=VECTOR(gff->start)[pos]; int e=VECTOR(gff->end)[pos]; SPLICING_CHECK(splicing_vector_int_push_back(start, s)); p++; SPLICING_CHECK(splicing_vector_int_push_back(end, e)); } else if (VECTOR(gff->type)[pos] == SPLICING_TYPE_MRNA) { VECTOR(*idx)[i] = p; if (i!=0) { SPLICING_CHECK(splicing_i_gff_exon_start_end_sort(start, end, idx, i-1, &tmp, &tmp2)); } i++; } else if (VECTOR(gff->type)[pos] == SPLICING_TYPE_GENE) { break; } pos++; } VECTOR(*idx)[i] = p; SPLICING_CHECK(splicing_i_gff_exon_start_end_sort(start, end, idx, i-1, &tmp, &tmp2)); splicing_vector_int_destroy(&tmp2); splicing_vector_int_destroy(&tmp); SPLICING_FINALLY_CLEAN(1); return 0; }
int splicing_gff_converter_init(const splicing_gff_t *gff, size_t gene, splicing_gff_converter_t *converter) { int i; SPLICING_CHECK(splicing_gff_noiso_one(gff, gene, &converter->noiso)); SPLICING_VECTOR_INT_INIT_FINALLY(&converter->exstart, 0); SPLICING_VECTOR_INT_INIT_FINALLY(&converter->exend, 0); SPLICING_VECTOR_INT_INIT_FINALLY(&converter->exidx, 0); SPLICING_VECTOR_INT_INIT_FINALLY(&converter->shift, 0); SPLICING_VECTOR_INT_INIT_FINALLY(&converter->exlim, 0); SPLICING_CHECK(splicing_gff_exon_start_end(gff, &converter->exstart, &converter->exend, &converter->exidx, gene)); /* Calculate the shift */ for (i=0; i < converter->noiso; i++) { size_t cs=0, ce=0, ex=0; int pos=VECTOR(converter->exidx)[i], pos2=VECTOR(converter->exidx)[i+1]; while (pos < pos2) { cs += VECTOR(converter->exstart)[pos]; SPLICING_CHECK(splicing_vector_int_push_back(&converter->shift, cs-ce-ex-1)); ex++; ce += VECTOR(converter->exend)[pos]; pos++; } } /* Calculate the exlim */ for (i=0; i < converter->noiso; i++) { size_t cs=0; int pos=VECTOR(converter->exidx)[i], pos2=VECTOR(converter->exidx)[i+1]; while (pos < pos2) { size_t l= VECTOR(converter->exend)[pos] - VECTOR(converter->exstart)[pos]+1; cs += l; SPLICING_CHECK(splicing_vector_int_push_back(&converter->exlim, cs+1)); pos++; } } SPLICING_FINALLY_CLEAN(5); return 0; }
int splicing_exonset_append(splicing_exonset_t *ex, const char *seqid, int start, int end) { size_t idx; int seen=splicing_strvector_search(&ex->seqids, seqid, &idx); if (seen) { SPLICING_CHECK(splicing_vector_int_push_back(&ex->seqid, idx)); } else { size_t size=splicing_strvector_size(&ex->seqids); SPLICING_CHECK(splicing_strvector_append(&ex->seqids, seqid)); SPLICING_CHECK(splicing_vector_int_push_back(&ex->seqid, size)); } SPLICING_CHECK(splicing_vector_int_push_back(&ex->start, start)); SPLICING_CHECK(splicing_vector_int_push_back(&ex->end, end)); return 0; }
int splicing_genomic_to_iso(const splicing_gff_t *gff, size_t gene, const splicing_vector_int_t *position, splicing_matrix_int_t *isopos) { size_t r, i, noiso, noreads=splicing_vector_int_size(position); splicing_vector_int_t exstart, exend, exidx, shift; splicing_gff_noiso_one(gff, gene, &noiso); SPLICING_CHECK(splicing_vector_int_init(&exstart, 0)); SPLICING_FINALLY(splicing_vector_int_destroy, &exstart); SPLICING_CHECK(splicing_vector_int_init(&exend, 0)); SPLICING_FINALLY(splicing_vector_int_destroy, &exend); SPLICING_CHECK(splicing_vector_int_init(&exidx, 0)); SPLICING_FINALLY(splicing_vector_int_destroy, &exidx); SPLICING_CHECK(splicing_gff_exon_start_end(gff, &exstart, &exend, &exidx, gene)); SPLICING_CHECK(splicing_vector_int_init(&shift, 0)); SPLICING_FINALLY(splicing_vector_int_destroy, &shift); for (i=0; i<noiso; i++) { size_t cs=0, ce=0, ex=0; int pos=VECTOR(exidx)[i], pos2=VECTOR(exidx)[i+1]; while (pos < pos2) { cs += VECTOR(exstart)[pos]; SPLICING_CHECK(splicing_vector_int_push_back(&shift, cs-ce-ex-1)); ex++; ce += VECTOR(exend)[pos]; pos++; } } SPLICING_CHECK(splicing_matrix_int_resize(isopos, noiso, noreads)); for (r=0; r<noreads; r++) { for (i=0; i<noiso; i++) { size_t pos=VECTOR(*position)[r]; size_t startpos=VECTOR(exidx)[i]; size_t endpos=VECTOR(exidx)[i+1]; int ex; for (ex=startpos; ex < endpos && VECTOR(exend)[ex] < pos; ex++) ; if (VECTOR(exstart)[ex] <= pos && pos <= VECTOR(exend)[ex]) { MATRIX(*isopos, i, r) = VECTOR(*position)[r] - VECTOR(shift)[ex]; } else { MATRIX(*isopos, i, r) = -1; } } } splicing_vector_int_destroy(&shift); splicing_vector_int_destroy(&exidx); splicing_vector_int_destroy(&exend); splicing_vector_int_destroy(&exstart); SPLICING_FINALLY_CLEAN(4); return 0; }
int splicing_gff_append(splicing_gff_t *gff, const char *seqid, const char *source, splicing_type_t type, int start, int end, double score, splicing_strand_t strand, int phase, const char *ID, const char *parent) { if (type == SPLICING_TYPE_GENE) { gff->nogenes++; SPLICING_CHECK(splicing_vector_int_push_back(&gff->genes, gff->n)); SPLICING_CHECK(splicing_vector_int_push_back(&gff->strand, strand)); } else if (type == SPLICING_TYPE_MRNA) { gff->notranscripts++; SPLICING_CHECK(splicing_vector_int_push_back(&gff->transcripts, gff->n)); } if (type == SPLICING_TYPE_GENE) { /* Seqid */ if (!strcmp(seqid, gff->last_seqid)) { int last=splicing_vector_int_tail(&gff->seqid); SPLICING_CHECK(splicing_vector_int_push_back(&gff->seqid, last)); } else { size_t idx; int seen=splicing_strvector_search(&gff->seqids, seqid, &idx); if (seen) { SPLICING_CHECK(splicing_vector_int_push_back(&gff->seqid, idx)); gff->last_seqid=splicing_strvector_get(&gff->seqids, idx); } else { size_t size=splicing_strvector_size(&gff->seqids); SPLICING_CHECK(splicing_strvector_append(&gff->seqids, seqid)); SPLICING_CHECK(splicing_vector_int_push_back(&gff->seqid, size)); gff->last_source=splicing_strvector_get(&gff->seqids, size); } } /* Source */ if (!strcmp(source, gff->last_source)) { int last=splicing_vector_int_tail(&gff->source); SPLICING_CHECK(splicing_vector_int_push_back(&gff->source, last)); } else { size_t idx; int seen=splicing_strvector_search(&gff->sources, source, &idx); if (seen) { SPLICING_CHECK(splicing_vector_int_push_back(&gff->source, idx)); gff->last_source=splicing_strvector_get(&gff->sources, idx); } else { size_t size=splicing_strvector_size(&gff->sources); SPLICING_CHECK(splicing_strvector_append(&gff->sources, source)); SPLICING_CHECK(splicing_vector_int_push_back(&gff->source, size)); gff->last_source=splicing_strvector_get(&gff->sources, size); } } } /* Parent */ if (!parent || !parent[0]) { SPLICING_CHECK(splicing_vector_int_push_back(&gff->parent, -1)); } else if (!strcmp(parent, gff->last_gene_id)) { SPLICING_CHECK(splicing_vector_int_push_back(&gff->parent, gff->last_gene_no)); } else if (!strcmp(parent, gff->last_mrna_id)) { SPLICING_CHECK(splicing_vector_int_push_back(&gff->parent, gff->last_mrna_no)); } else { size_t idx; int seen=splicing_strvector_search(&gff->ID, parent, &idx); if (!seen) { SPLICING_WARNING("Unknown parent ID, invalid GFF file"); SPLICING_CHECK(splicing_vector_int_push_back(&gff->parent, -1)); } else { SPLICING_CHECK(splicing_vector_int_push_back(&gff->parent, idx)); } } SPLICING_CHECK(splicing_vector_int_push_back(&gff->type, type)); SPLICING_CHECK(splicing_vector_int_push_back(&gff->start, start)); SPLICING_CHECK(splicing_vector_int_push_back(&gff->end, end)); SPLICING_CHECK(splicing_vector_push_back(&gff->score, score)); SPLICING_CHECK(splicing_vector_int_push_back(&gff->phase, phase)); SPLICING_CHECK(splicing_strvector_append(&gff->ID, ID)); /* Update last gene/mrna */ if (type == SPLICING_TYPE_GENE) { gff->last_gene_id = splicing_strvector_get(&gff->ID, gff->n); gff->last_gene_no = gff->n; } else if (type == SPLICING_TYPE_MRNA) { gff->last_mrna_id = splicing_strvector_get(&gff->ID, gff->n); gff->last_mrna_no = gff->n; } gff->n += 1; return 0; }
int splicing_iso_to_genomic(const splicing_gff_t *gff, size_t gene, const splicing_vector_int_t *isoform, const splicing_vector_int_t *exstart, const splicing_vector_int_t *exend, const splicing_vector_int_t *exidx, splicing_vector_int_t *position) { size_t i, noiso, n=splicing_vector_int_size(position); splicing_vector_int_t exlim, shift; splicing_vector_int_t vexstart, vexend, vexidx, *myexstart=(splicing_vector_int_t *) exstart, *myexend=(splicing_vector_int_t *) exend, *myexidx=(splicing_vector_int_t *) exidx; size_t pos, pos2; if (!exstart || !exend || !exidx) { myexstart=&vexstart; myexend=&vexend; myexidx=&vexidx; SPLICING_CHECK(splicing_vector_int_init(myexstart, 0)); SPLICING_FINALLY(splicing_vector_int_destroy, myexstart); SPLICING_CHECK(splicing_vector_int_init(myexend, 0)); SPLICING_FINALLY(splicing_vector_int_destroy, myexend); SPLICING_CHECK(splicing_vector_int_init(myexidx, 0)); SPLICING_FINALLY(splicing_vector_int_destroy, myexidx); SPLICING_CHECK(splicing_gff_exon_start_end(gff, myexstart, myexend, myexidx, gene)); } SPLICING_CHECK(splicing_gff_noiso_one(gff, gene, &noiso)); SPLICING_CHECK(splicing_vector_int_init(&exlim, 0)); SPLICING_FINALLY(splicing_vector_int_destroy, &exlim); SPLICING_CHECK(splicing_vector_int_init(&shift, 0)); SPLICING_FINALLY(splicing_vector_int_destroy, &shift); for (i=0; i<noiso; i++) { size_t cs=0, ce=0, ex=0; int pos=VECTOR(*myexidx)[i], pos2=VECTOR(*myexidx)[i+1]; while (pos < pos2) { cs += VECTOR(*myexstart)[pos]; SPLICING_CHECK(splicing_vector_int_push_back(&shift, cs-ce-ex-1)); ex++; ce += VECTOR(*myexend)[pos]; pos++; } } for (i=0; i<noiso; i++) { size_t cs=0; int pos=VECTOR(*myexidx)[i], pos2=VECTOR(*myexidx)[i+1]; while (pos < pos2) { size_t l=VECTOR(*myexend)[pos]-VECTOR(*myexstart)[pos]+1; cs += l; SPLICING_CHECK(splicing_vector_int_push_back(&exlim, cs+1)); pos++; } } for (i=0; i<n; i++) { int iso=VECTOR(*isoform)[i]; size_t pos=VECTOR(*position)[i]; int ex; for (ex=VECTOR(*myexidx)[iso]; VECTOR(exlim)[ex] <= pos; ex++) ; VECTOR(*position)[i] = pos + VECTOR(shift)[ex]; } splicing_vector_int_destroy(&shift); splicing_vector_int_destroy(&exlim); SPLICING_FINALLY_CLEAN(2); if (!exstart || !exend || !exidx) { splicing_vector_int_destroy(myexidx); splicing_vector_int_destroy(myexend); splicing_vector_int_destroy(myexstart); SPLICING_FINALLY_CLEAN(3); } return 0; }