int splicing_dgesdd(const splicing_matrix_t *matrix, splicing_vector_t *values) { splicing_matrix_t tmp; int m=splicing_matrix_nrow(matrix); int n=splicing_matrix_ncol(matrix); int lda=m, minmn= m < n ? m : n, maxmn = m < n ? n : m; int lwork=-1; int info=0; splicing_vector_t work; splicing_vector_int_t iwork; char jobz='N'; int dummy=1; double dummy2; SPLICING_CHECK(splicing_matrix_copy(&tmp, matrix)); SPLICING_FINALLY(splicing_matrix_destroy, &tmp); SPLICING_CHECK(splicing_vector_init(&work, 1)); SPLICING_FINALLY(splicing_vector_destroy, &work); SPLICING_CHECK(splicing_vector_int_init(&iwork, 8*minmn)); SPLICING_FINALLY(splicing_vector_int_destroy, &iwork); SPLICING_CHECK(splicing_vector_resize(values, minmn)); /* Get the optiomal lwork first*/ splicingdgesdd_(&jobz, &m, &n, &MATRIX(tmp,0,0), &lda, VECTOR(*values), /*U=*/ &dummy2, /*LDU=*/ &dummy, /*VT=*/ &dummy2, /*LDVT=*/ &dummy, VECTOR(work), &lwork, VECTOR(iwork), &info); lwork = VECTOR(work)[0]; SPLICING_CHECK(splicing_vector_resize(&work, lwork)); /* Now do the SVD */ splicingdgesdd_(&jobz, &m, &n, &MATRIX(tmp,0,0), &lda, VECTOR(*values), /*U=*/ &dummy2, /*LDU=*/ &dummy, /*VT=*/ &dummy2, /*LDVT=*/ &dummy, VECTOR(work), &lwork, VECTOR(iwork), &info); if (info != 0) { SPLICING_ERROR("Cannot calculate SVD", SPLICING_ELAPACK); } splicing_vector_destroy(&work); splicing_vector_int_destroy(&iwork); splicing_matrix_destroy(&tmp); SPLICING_FINALLY_CLEAN(3); return 0; }
int splicing_gff_isolength(const splicing_gff_t *gff, splicing_vector_int_t *isolength, splicing_vector_int_t *isolength_idx) { size_t idx1; size_t nogenes=splicing_vector_int_size(&gff->genes); size_t notrans=splicing_vector_int_size(&gff->transcripts); int pos=-1, ipos=0; SPLICING_CHECK(splicing_vector_int_resize(isolength, notrans)); SPLICING_CHECK(splicing_vector_int_resize(isolength_idx, nogenes)); for (idx1=VECTOR(gff->genes)[0]; idx1 < gff->n; idx1++) { if (VECTOR(gff->type)[idx1] == SPLICING_TYPE_GENE) { VECTOR(*isolength_idx)[ipos++]=pos+1; } else if (VECTOR(gff->type)[idx1] == SPLICING_TYPE_MRNA) { VECTOR(*isolength)[++pos] = 0; } else if (VECTOR(gff->type)[idx1] == SPLICING_TYPE_EXON) { VECTOR(*isolength)[pos] += (VECTOR(gff->end)[idx1] - VECTOR(gff->start)[idx1] + 1); } } return 0; }
int splicing_score_joint(const splicing_vector_int_t *assignment, int no_reads, const splicing_vector_t *psi, const splicing_vector_t *hyper, const splicing_vector_int_t *effisolen, const splicing_vector_t *isoscores, double *score) { int i, noiso = splicing_vector_int_size(effisolen); double readProb = 0.0, assProb, psiProb; /* Scores the reads */ for (i=0; i<no_reads; i++) { if (VECTOR(*assignment)[i] != -1) { readProb += VECTOR(*isoscores)[ VECTOR(*assignment)[i] ]; } } /* Score isoforms */ SPLICING_CHECK(splicing_score_iso(psi, noiso, assignment, no_reads, effisolen, &assProb)); SPLICING_CHECK(splicing_ldirichlet(psi, hyper, noiso, &psiProb)); *score = readProb + assProb + psiProb; return 0; }
int splicing_i_gff_exon_start_end_sort(const splicing_vector_int_t *start, const splicing_vector_int_t *end, const splicing_vector_int_t *idx, int iso, splicing_vector_int_t *tmp, splicing_vector_int_t *tmp2) { int i, j, from=VECTOR(*idx)[iso], to=VECTOR(*idx)[iso+1], len=to-from; SPLICING_CHECK(splicing_vector_int_resize(tmp, len)); SPLICING_CHECK(splicing_vector_int_resize(tmp2, len)); for (i=0; i<len; i++) { VECTOR(*tmp)[i]=i; } splicing_qsort_r(VECTOR(*tmp), len, sizeof(int), (void*) (VECTOR(*start)+from), splicing_i_gff_exon_start_end_sort_cmp); /* Store the order */ for (i=0, j=from; i<len; i++, j++) { VECTOR(*tmp2)[i]=VECTOR(*start)[j]; } for (i=0, j=from; i<len; i++, j++) { VECTOR(*start)[j] = VECTOR(*tmp2)[ VECTOR(*tmp)[i] ]; } for (i=0, j=from; i<len; i++, j++) { VECTOR(*tmp2)[i]=VECTOR(*end)[j]; } for (i=0, j=from; i<len; i++, j++) { VECTOR(*end)[j] = VECTOR(*tmp2)[ VECTOR(*tmp)[i] ]; } return 0; }
int splicing_gff_noexons_one(const splicing_gff_t *gff, size_t gene, splicing_vector_int_t *noexons) { size_t nogenes, idx1, idx2, noiso, pos, il; SPLICING_CHECK(splicing_gff_nogenes(gff, &nogenes)); if (gene >= nogenes) { SPLICING_ERROR("Invalid gene id", SPLICING_EINVAL); } idx1=VECTOR(gff->genes)[gene]; idx2= gene+1 == nogenes ? gff->n : VECTOR(gff->genes)[gene+1]; for (noiso=0; idx1 < idx2; idx1++) { if (VECTOR(gff->type)[idx1] == SPLICING_TYPE_MRNA) { noiso += 1; } } SPLICING_CHECK(splicing_vector_int_resize(noexons, noiso)); idx1=VECTOR(gff->genes)[gene]; idx2= gene+1 == nogenes ? gff->n : VECTOR(gff->genes)[gene+1]; for (; idx1 < idx2 && VECTOR(gff->type)[idx1] != SPLICING_TYPE_MRNA; idx1++) ; idx1++; for (pos=0, il=0; idx1 < idx2; idx1++) { if (VECTOR(gff->type)[idx1] == SPLICING_TYPE_MRNA) { VECTOR(*noexons)[pos++]=il; il=0; } else if (VECTOR(gff->type)[idx1] == SPLICING_TYPE_EXON) { il++; } } VECTOR(*noexons)[pos++]=il; return 0; }
int splicing_reassign_samples(const splicing_matrix_t *matches, const splicing_vector_int_t *match_order, const splicing_vector_t *psi, int noiso, splicing_vector_int_t *result) { int noreads = splicing_matrix_ncol(matches); int i, w; double *prev, *curr; double rand, sumpsi; int noValid; int *order=VECTOR(*match_order); splicing_vector_t cumsum; splicing_vector_int_t validIso; SPLICING_CHECK(splicing_vector_init(&cumsum, noiso)); SPLICING_FINALLY(splicing_vector_destroy, &cumsum); SPLICING_CHECK(splicing_vector_int_init(&validIso, noiso)); SPLICING_FINALLY(splicing_vector_int_destroy, &validIso); SPLICING_CHECK(splicing_vector_int_resize(result, noreads)); if (noreads == 0) { return 0; } prev = curr = &MATRIX(*matches, 0, order[0]); CUMSUM(); for (i=0; i<noreads; i++) { curr = &MATRIX(*matches, 0, order[i]); /* Maybe we need to update the cumulative sum */ if (memcmp(prev, curr, sizeof(double)*noiso) != 0) { CUMSUM(); } if (noValid == 0) { VECTOR(*result)[order[i]] = -1; } else if (noValid == 1) { VECTOR(*result)[order[i]] = VECTOR(validIso)[0]; } else if (noValid == 2) { rand = RNG_UNIF01() * sumpsi; w = (rand < VECTOR(cumsum)[0]) ? VECTOR(validIso)[0] : VECTOR(validIso)[1]; VECTOR(*result)[order[i]] = w; } else { /* Draw */ rand = RNG_UNIF01() * sumpsi; /* TODO: Binary search for interval, if many classes */ for (w=0; rand > VECTOR(cumsum)[w]; w++) ; VECTOR(*result)[order[i]] = VECTOR(validIso)[w]; } prev=curr; } splicing_vector_int_destroy(&validIso); splicing_vector_destroy(&cumsum); SPLICING_FINALLY_CLEAN(2); return 0; }
int splicing_create_gene(const splicing_vector_int_t *exons, const splicing_vector_int_t *isoforms, const char *id, const char *seqid, const char *source, splicing_strand_t strand, splicing_gff_t *extend) { size_t i=0; size_t exlen=splicing_vector_int_size(exons); size_t isolen=splicing_vector_int_size(isoforms); size_t genestart=splicing_vector_int_min(exons); size_t geneend=splicing_vector_int_max(exons); char buffer[5000], buffer2[5000]; int noiso=0; /* TODO: error checks */ /* Gene */ SPLICING_CHECK(splicing_gff_append(extend, seqid, source, SPLICING_TYPE_GENE, genestart, geneend, /*score=*/ SPLICING_NA_REAL, strand, /*phase=*/ SPLICING_NA_INTEGER, id, /*parent=*/ 0)); while (i<isolen) { size_t mmin=VECTOR(*exons)[ 2*VECTOR(*isoforms)[i] ]; size_t mmax=VECTOR(*exons)[ 2*VECTOR(*isoforms)[i] + 1 ]; size_t j, exon=0; for (j=i+1; VECTOR(*isoforms)[j] >= 0; j++) { size_t m1=VECTOR(*exons)[ 2*VECTOR(*isoforms)[j] ]; size_t m2=VECTOR(*exons)[ 2*VECTOR(*isoforms)[j] + 1 ]; if (m1 < mmin) { mmin = m1; } if (m2 > mmax) { mmax = m2; } } snprintf(buffer, sizeof(buffer)/sizeof(char)-sizeof(char), "%s-isoform-%i", id, noiso); SPLICING_CHECK(splicing_gff_append(extend, seqid, source, SPLICING_TYPE_MRNA, mmin, mmax, /*score=*/ SPLICING_NA_REAL, strand, /*phase=*/ SPLICING_NA_INTEGER, buffer, /*parent=*/ id)); for (; VECTOR(*isoforms)[i] >= 0; i++) { snprintf(buffer2, sizeof(buffer2)/sizeof(char)-sizeof(char), "%s-isoform-%i-exon-%i", id, (int) noiso, (int) exon++); SPLICING_CHECK(splicing_gff_append(extend, seqid, source, SPLICING_TYPE_EXON, VECTOR(*exons)[ 2*VECTOR(*isoforms)[i] ], VECTOR(*exons)[ 2*VECTOR(*isoforms)[i] + 1 ], /*score=*/ SPLICING_NA_REAL, strand, /*phase=*/ SPLICING_NA_INTEGER, buffer2, /*parent=*/ buffer)); } noiso++; i++; } return 0; }
int splicing_i_gff_constitutive_exons_all(const splicing_gff_t *gff, splicing_exonset_t *exons, int min_length) { size_t g, nogenes; splicing_vector_int_t events; SPLICING_CHECK(splicing_gff_nogenes(gff, &nogenes)); SPLICING_CHECK(splicing_exonset_init(exons, 0)); SPLICING_FINALLY(splicing_exonset_destroy, exons); SPLICING_CHECK(splicing_vector_int_init(&events, 0)); SPLICING_FINALLY(splicing_vector_int_destroy, &events); for (g=0; g<nogenes; g++) { const char *seqid= splicing_strvector_get(&gff->seqids, VECTOR(gff->seqid)[g]); int noex, idx, noEvents, i; size_t noiso; int start=VECTOR(gff->genes)[g]; int end= g+1 < nogenes ? VECTOR(gff->genes)[g+1] : gff->n; splicing_gff_noiso_one(gff, g, &noiso); /* Collect and sort all events */ splicing_vector_int_clear(&events); for (idx=start+1; idx<end; idx++) { if (VECTOR(gff->type)[idx] == SPLICING_TYPE_EXON) { SPLICING_CHECK(splicing_vector_int_push_back2 (&events, VECTOR(gff->start)[idx], -VECTOR(gff->end)[idx])); } } noEvents=splicing_vector_int_size(&events); splicing_qsort(VECTOR(events), noEvents, sizeof(int), splicing_i_const_cmp); /* Now go over the sorted events and extract the constitutive exons */ for (noex=0, i=0; i<noEvents; i++) { int ev=VECTOR(events)[i]; if (ev > 0) { noex++; } if (ev < 0) { int prev=VECTOR(events)[i-1]; if (noex == noiso && (-ev)-prev+1 >= min_length) { /* constitutive exon */ SPLICING_CHECK(splicing_exonset_append(exons, seqid, prev, -ev)); } noex--; } } } splicing_vector_int_destroy(&events); SPLICING_FINALLY_CLEAN(2); /* + exons */ return 0; }
int splicing_i_gff_constitutive_exons_full(const splicing_gff_t *gff, splicing_exonset_t *exons, int min_length) { size_t g, nogenes; splicing_vector_int_t events; SPLICING_CHECK(splicing_gff_nogenes(gff, &nogenes)); SPLICING_CHECK(splicing_exonset_init(exons, 1)); SPLICING_FINALLY(splicing_exonset_destroy, exons); SPLICING_CHECK(splicing_vector_int_init(&events, 0)); SPLICING_FINALLY(splicing_vector_int_destroy, &events); for (g=0; g<nogenes; g++) { const char *seqid= splicing_strvector_get(&gff->seqids, VECTOR(gff->seqid)[g]); int i, idx, noExons, noSame; size_t noiso; int start=VECTOR(gff->genes)[g]; int end= g+1 < nogenes ? VECTOR(gff->genes)[g+1] : gff->n; splicing_gff_noiso_one(gff, g, &noiso); /* Collect and sort all events */ splicing_vector_int_clear(&events); for (idx=start+1; idx<end; idx++) { if (VECTOR(gff->type)[idx] == SPLICING_TYPE_EXON) { SPLICING_CHECK(splicing_vector_int_push_back2 (&events, VECTOR(gff->start)[idx], VECTOR(gff->end)[idx])); } } noExons=splicing_vector_int_size(&events)/2; splicing_qsort(VECTOR(events), noExons, sizeof(int)*2, splicing_i_const_cmp2); /* Now go over them and check how many times each exon appears */ for (noSame=1, i=2; i<noExons*2; i+=2) { int start=VECTOR(events)[i]; int end=VECTOR(events)[i+1]; if (start == VECTOR(events)[i-2] && VECTOR(events)[i-1] == end) { noSame++; } else { noSame=1; } if (noSame == noiso) { SPLICING_CHECK(splicing_exonset_append(exons, seqid, start, end)); } } } splicing_vector_int_destroy(&events); SPLICING_FINALLY_CLEAN(2); /* + exons */ return 0; }
int splicing_gff_fprint(const splicing_gff_t *gff, FILE *outfile) { size_t i, n; SPLICING_CHECK(splicing_gff_nogenes(gff, &n)); for (i=0; i<n; i++) { SPLICING_CHECK(splicing_gff_fprint_gene(gff, outfile, i)); } return 0; }
int splicing_exonset_init(splicing_exonset_t *ex, size_t size) { SPLICING_CHECK(splicing_strvector_init(&ex->seqids, 0)); SPLICING_FINALLY(splicing_strvector_destroy, &ex->seqids); SPLICING_CHECK(splicing_vector_int_init(&ex->seqid, 0)); SPLICING_FINALLY(splicing_vector_int_destroy, &ex->seqid); SPLICING_CHECK(splicing_vector_int_init(&ex->start, 0)); SPLICING_FINALLY(splicing_vector_int_destroy, &ex->start); SPLICING_CHECK(splicing_vector_int_init(&ex->end, 0)); SPLICING_FINALLY(splicing_vector_int_destroy, &ex->end); SPLICING_FINALLY_CLEAN(4); return 0; }
int splicing_gff_exon_start_end(const splicing_gff_t *gff, splicing_vector_int_t *start, splicing_vector_int_t *end, splicing_vector_int_t *idx, int gene) { size_t noiso; int i=0, p=0, n=splicing_gff_size(gff); int pos; size_t nogenes; splicing_vector_int_t tmp, tmp2; SPLICING_CHECK(splicing_vector_int_init(&tmp, 10)); SPLICING_FINALLY(splicing_vector_int_destroy, &tmp); SPLICING_CHECK(splicing_vector_int_init(&tmp2, 10)); SPLICING_FINALLY(splicing_vector_int_destroy, &tmp2); SPLICING_CHECK(splicing_gff_nogenes(gff, &nogenes)); if (gene < 0 || gene >= nogenes) { SPLICING_ERROR("Invalid gene id", SPLICING_EINVAL); } pos=VECTOR(gff->genes)[gene]+1; SPLICING_CHECK(splicing_gff_noiso_one(gff, gene, &noiso)); splicing_vector_int_clear(start); splicing_vector_int_clear(end); SPLICING_CHECK(splicing_vector_int_resize(idx, noiso+1)); while (pos < n) { if (VECTOR(gff->type)[pos] == SPLICING_TYPE_EXON) { int s=VECTOR(gff->start)[pos]; int e=VECTOR(gff->end)[pos]; SPLICING_CHECK(splicing_vector_int_push_back(start, s)); p++; SPLICING_CHECK(splicing_vector_int_push_back(end, e)); } else if (VECTOR(gff->type)[pos] == SPLICING_TYPE_MRNA) { VECTOR(*idx)[i] = p; if (i!=0) { SPLICING_CHECK(splicing_i_gff_exon_start_end_sort(start, end, idx, i-1, &tmp, &tmp2)); } i++; } else if (VECTOR(gff->type)[pos] == SPLICING_TYPE_GENE) { break; } pos++; } VECTOR(*idx)[i] = p; SPLICING_CHECK(splicing_i_gff_exon_start_end_sort(start, end, idx, i-1, &tmp, &tmp2)); splicing_vector_int_destroy(&tmp2); splicing_vector_int_destroy(&tmp); SPLICING_FINALLY_CLEAN(1); return 0; }
int splicing_genomic_to_iso_1(const splicing_gff_t *gff, size_t gene, int isoform, int position, const splicing_gff_converter_t *converter, int *result) { size_t startpos, endpos, ex; splicing_gff_converter_t vconverter, *myconverter = (splicing_gff_converter_t*) converter; if (!converter) { myconverter=&vconverter; SPLICING_CHECK(splicing_gff_converter_init(gff, gene, myconverter)); SPLICING_FINALLY(splicing_gff_converter_destroy, myconverter); } startpos=VECTOR(myconverter->exidx)[isoform]; endpos=VECTOR(myconverter->exidx)[isoform+1]; for (ex=startpos; ex < endpos && VECTOR(myconverter->exend)[ex] < position; ex++) ; if (ex < endpos && VECTOR(myconverter->exstart)[ex] <= position && position <= VECTOR(myconverter->exend)[ex]) { *result = position - VECTOR(myconverter->shift)[ex]; } else { *result = -1; } if (!converter) { splicing_gff_converter_destroy(myconverter); SPLICING_FINALLY_CLEAN(1); } return 0; }
int splicing_gff_gene_start_end(const splicing_gff_t *gff, splicing_vector_int_t *start, splicing_vector_int_t *end) { size_t i, nogenes=splicing_vector_int_size(&gff->genes); SPLICING_CHECK(splicing_vector_int_resize(start, nogenes)); SPLICING_CHECK(splicing_vector_int_resize(end, nogenes)); for (i=0; i<nogenes; i++) { size_t idx=VECTOR(gff->genes)[i]; VECTOR(*start)[i] = VECTOR(gff->start)[idx]; VECTOR(*end)[i] = VECTOR(gff->end)[idx]; } return 0; }
int splicing_gff_converter_init(const splicing_gff_t *gff, size_t gene, splicing_gff_converter_t *converter) { int i; SPLICING_CHECK(splicing_gff_noiso_one(gff, gene, &converter->noiso)); SPLICING_VECTOR_INT_INIT_FINALLY(&converter->exstart, 0); SPLICING_VECTOR_INT_INIT_FINALLY(&converter->exend, 0); SPLICING_VECTOR_INT_INIT_FINALLY(&converter->exidx, 0); SPLICING_VECTOR_INT_INIT_FINALLY(&converter->shift, 0); SPLICING_VECTOR_INT_INIT_FINALLY(&converter->exlim, 0); SPLICING_CHECK(splicing_gff_exon_start_end(gff, &converter->exstart, &converter->exend, &converter->exidx, gene)); /* Calculate the shift */ for (i=0; i < converter->noiso; i++) { size_t cs=0, ce=0, ex=0; int pos=VECTOR(converter->exidx)[i], pos2=VECTOR(converter->exidx)[i+1]; while (pos < pos2) { cs += VECTOR(converter->exstart)[pos]; SPLICING_CHECK(splicing_vector_int_push_back(&converter->shift, cs-ce-ex-1)); ex++; ce += VECTOR(converter->exend)[pos]; pos++; } } /* Calculate the exlim */ for (i=0; i < converter->noiso; i++) { size_t cs=0; int pos=VECTOR(converter->exidx)[i], pos2=VECTOR(converter->exidx)[i+1]; while (pos < pos2) { size_t l= VECTOR(converter->exend)[pos] - VECTOR(converter->exstart)[pos]+1; cs += l; SPLICING_CHECK(splicing_vector_int_push_back(&converter->exlim, cs+1)); pos++; } } SPLICING_FINALLY_CLEAN(5); return 0; }
int splicing_exonset_append(splicing_exonset_t *ex, const char *seqid, int start, int end) { size_t idx; int seen=splicing_strvector_search(&ex->seqids, seqid, &idx); if (seen) { SPLICING_CHECK(splicing_vector_int_push_back(&ex->seqid, idx)); } else { size_t size=splicing_strvector_size(&ex->seqids); SPLICING_CHECK(splicing_strvector_append(&ex->seqids, seqid)); SPLICING_CHECK(splicing_vector_int_push_back(&ex->seqid, size)); } SPLICING_CHECK(splicing_vector_int_push_back(&ex->start, start)); SPLICING_CHECK(splicing_vector_int_push_back(&ex->end, end)); return 0; }
int splicing_genomic_to_iso(const splicing_gff_t *gff, size_t gene, const splicing_vector_int_t *position, const splicing_gff_converter_t *converter, splicing_matrix_int_t *isopos) { size_t r, i, noreads=splicing_vector_int_size(position); splicing_gff_converter_t vconverter, *myconverter = (splicing_gff_converter_t*) converter; if (!converter) { myconverter=&vconverter; SPLICING_CHECK(splicing_gff_converter_init(gff, gene, myconverter)); SPLICING_FINALLY(splicing_gff_converter_destroy, myconverter); } SPLICING_CHECK(splicing_matrix_int_resize(isopos, myconverter->noiso, noreads)); for (r=0; r<noreads; r++) { for (i=0; i<myconverter->noiso; i++) { size_t pos=VECTOR(*position)[r]; size_t startpos=VECTOR(myconverter->exidx)[i]; size_t endpos=VECTOR(myconverter->exidx)[i+1]; int ex; for (ex=startpos; ex < endpos && VECTOR(myconverter->exend)[ex] < pos; ex++) ; if (ex < endpos && VECTOR(myconverter->exstart)[ex] <= pos && pos <= VECTOR(myconverter->exend)[ex]) { MATRIX(*isopos, i, r) = VECTOR(*position)[r] - VECTOR(myconverter->shift)[ex]; } else { MATRIX(*isopos, i, r) = -1; } } } if (!converter) { splicing_gff_converter_destroy(myconverter); SPLICING_FINALLY_CLEAN(1); } return 0; }
int splicing_iso_to_genomic_all(const splicing_gff_t *gff, size_t gene, int position, const splicing_gff_converter_t *converter, splicing_vector_int_t *result) { size_t i; splicing_gff_converter_t vconverter, *myconverter = (splicing_gff_converter_t*) converter; if (position < 1) { SPLICING_ERROR("Invalid isoform coordinate, must the larger than zero", SPLICING_EINVAL); } if (!converter) { myconverter=&vconverter; SPLICING_CHECK(splicing_gff_converter_init(gff, gene, myconverter)); SPLICING_FINALLY(splicing_gff_converter_destroy, myconverter); } SPLICING_CHECK(splicing_vector_int_resize(result, myconverter->noiso)); /* TODO: find impossible positions */ for (i=0; i<myconverter->noiso; i++) { int ex; for (ex=VECTOR(myconverter->exidx)[i]; ex < VECTOR(myconverter->exidx)[i+1] && VECTOR(myconverter->exlim)[ex] <= position; ex++) ; if (ex < VECTOR(myconverter->exidx)[i+1]) { VECTOR(*result)[i] = position + VECTOR(myconverter->shift)[ex]; } else { VECTOR(*result)[i] = -1; } } if (!converter) { splicing_gff_converter_destroy(myconverter); SPLICING_FINALLY_CLEAN(1); } return 0; }
int splicing_genomic_to_iso(const splicing_gff_t *gff, size_t gene, const splicing_vector_int_t *position, splicing_matrix_int_t *isopos) { size_t r, i, noiso, noreads=splicing_vector_int_size(position); splicing_vector_int_t exstart, exend, exidx, shift; splicing_gff_noiso_one(gff, gene, &noiso); SPLICING_CHECK(splicing_vector_int_init(&exstart, 0)); SPLICING_FINALLY(splicing_vector_int_destroy, &exstart); SPLICING_CHECK(splicing_vector_int_init(&exend, 0)); SPLICING_FINALLY(splicing_vector_int_destroy, &exend); SPLICING_CHECK(splicing_vector_int_init(&exidx, 0)); SPLICING_FINALLY(splicing_vector_int_destroy, &exidx); SPLICING_CHECK(splicing_gff_exon_start_end(gff, &exstart, &exend, &exidx, gene)); SPLICING_CHECK(splicing_vector_int_init(&shift, 0)); SPLICING_FINALLY(splicing_vector_int_destroy, &shift); for (i=0; i<noiso; i++) { size_t cs=0, ce=0, ex=0; int pos=VECTOR(exidx)[i], pos2=VECTOR(exidx)[i+1]; while (pos < pos2) { cs += VECTOR(exstart)[pos]; SPLICING_CHECK(splicing_vector_int_push_back(&shift, cs-ce-ex-1)); ex++; ce += VECTOR(exend)[pos]; pos++; } } SPLICING_CHECK(splicing_matrix_int_resize(isopos, noiso, noreads)); for (r=0; r<noreads; r++) { for (i=0; i<noiso; i++) { size_t pos=VECTOR(*position)[r]; size_t startpos=VECTOR(exidx)[i]; size_t endpos=VECTOR(exidx)[i+1]; int ex; for (ex=startpos; ex < endpos && VECTOR(exend)[ex] < pos; ex++) ; if (VECTOR(exstart)[ex] <= pos && pos <= VECTOR(exend)[ex]) { MATRIX(*isopos, i, r) = VECTOR(*position)[r] - VECTOR(shift)[ex]; } else { MATRIX(*isopos, i, r) = -1; } } } splicing_vector_int_destroy(&shift); splicing_vector_int_destroy(&exidx); splicing_vector_int_destroy(&exend); splicing_vector_int_destroy(&exstart); SPLICING_FINALLY_CLEAN(4); return 0; }
int splicing_i_gff_noiso_one(const splicing_gff_t *gff, size_t gene, size_t *noiso, splicing_vector_int_t *isolen) { size_t nogenes, idx1, idx2; SPLICING_CHECK(splicing_gff_nogenes(gff, &nogenes)); if (gene < 0 || gene >= nogenes) { SPLICING_ERROR("Invalid gene id", SPLICING_EINVAL); } idx1=VECTOR(gff->genes)[gene]; idx2= gene+1 == nogenes ? gff->n : VECTOR(gff->genes)[gene+1]; *noiso = 0; for ( ; idx1 < idx2; idx1++) { if (VECTOR(gff->type)[idx1] == SPLICING_TYPE_MRNA) { *noiso += 1; } } if (isolen) { size_t il=0, pos=0; SPLICING_CHECK(splicing_vector_int_resize(isolen, *noiso)); idx1=VECTOR(gff->genes)[gene]; idx2= gene+1 == nogenes ? gff->n : VECTOR(gff->genes)[gene+1]; for (; idx1 < idx2 && VECTOR(gff->type)[idx1] != SPLICING_TYPE_MRNA; idx1++) ; idx1++; for (; idx1 < idx2; idx1++) { if (VECTOR(gff->type)[idx1] == SPLICING_TYPE_MRNA) { VECTOR(*isolen)[pos++]=il; il = 0; } else if (VECTOR(gff->type)[idx1] == SPLICING_TYPE_EXON) { il += VECTOR(gff->end)[idx1] - VECTOR(gff->start)[idx1] + 1; } } VECTOR(*isolen)[pos++]=il; } return 0; }
int splicing_gff_noiso(const splicing_gff_t *gff, splicing_vector_int_t *noiso) { size_t nogenes, idx1, idx2, pos=0; SPLICING_CHECK(splicing_gff_nogenes(gff, &nogenes)); idx1=VECTOR(gff->genes)[0]; idx2=gff->n; SPLICING_CHECK(splicing_vector_int_resize(noiso, nogenes)); splicing_vector_int_null(noiso); if (VECTOR(gff->type)[idx1] == SPLICING_TYPE_GENE) { idx1++; } for (; idx1 < gff->n; idx1++) { if (VECTOR(gff->type)[idx1] == SPLICING_TYPE_MRNA) { VECTOR(*noiso)[pos] += 1; } else if (VECTOR(gff->type)[idx1] == SPLICING_TYPE_GENE) { pos++; } } return 0; }
int splicing_drift_proposal(int mode, const splicing_vector_t *psi, const splicing_vector_t *alpha, double sigma, const splicing_vector_t *otherpsi, const splicing_vector_t *otheralpha, int noiso, splicing_vector_t *respsi, splicing_vector_t *resalpha, double *ressigma, double *resscore) { switch (mode) { case 0: /* init */ { SPLICING_CHECK(splicing_vector_resize(respsi, noiso)); SPLICING_CHECK(splicing_vector_resize(resalpha, noiso-1)); if (noiso != 2) { int i; for (i=0; i<noiso; i++) { VECTOR(*respsi)[i] = 1.0/noiso; } for (i=0; i<noiso-1; i++) { VECTOR(*resalpha)[i] = 1.0/(noiso-1); } *ressigma = 0.05; } else { VECTOR(*respsi)[0] = RNG_UNIF01(); VECTOR(*respsi)[1] = 1 - VECTOR(*respsi)[0]; VECTOR(*resalpha)[0] = 0.0; VECTOR(*resalpha)[1] = 0.0; *ressigma = 0.05; } } break; case 1: /* propose */ { int len=noiso-1; double sumpsi=0.0; SPLICING_CHECK(splicing_vector_reserve(respsi, len+1)); SPLICING_CHECK(splicing_mvrnorm(alpha, sigma, resalpha, len)); SPLICING_CHECK(splicing_logit_inv(resalpha, respsi, len)); sumpsi = splicing_vector_sum(respsi); SPLICING_CHECK(splicing_vector_resize(respsi, len+1)); VECTOR(*respsi)[len] = 1-sumpsi; } break; case 2: /* score */ SPLICING_CHECK(splicing_mvplogisnorm(psi, otheralpha, sigma, noiso-1, resscore)); break; } return 0; }
int splicing_genomic_to_iso_all(const splicing_gff_t *gff, size_t gene, int position, const splicing_gff_converter_t *converter, splicing_vector_int_t *result) { int i; splicing_gff_converter_t vconverter, *myconverter = (splicing_gff_converter_t*) converter; if (!converter) { myconverter=&vconverter; SPLICING_CHECK(splicing_gff_converter_init(gff, gene, myconverter)); SPLICING_FINALLY(splicing_gff_converter_destroy, myconverter); } SPLICING_CHECK(splicing_vector_int_resize(result, myconverter->noiso)); for (i=0; i<myconverter->noiso; i++) { size_t startpos=VECTOR(myconverter->exidx)[i]; size_t endpos=VECTOR(myconverter->exidx)[i+1]; int ex; for (ex=startpos; ex < endpos && VECTOR(myconverter->exend)[ex] < position; ex++) ; if (ex < endpos && VECTOR(myconverter->exstart)[ex] <= position && position <= VECTOR(myconverter->exend)[ex]) { VECTOR(*result)[i] = position - VECTOR(myconverter->shift)[ex]; } else { VECTOR(*result)[i] = -1; } } if (!converter) { splicing_gff_converter_destroy(myconverter); SPLICING_FINALLY_CLEAN(1); } return 0; }
int splicing_mvrnorm(const splicing_vector_t *mu, double sigma, splicing_vector_t *resalpha, int len) { int i; double sqrtsigma = len == 1 ? sigma : sqrt(sigma); SPLICING_CHECK(splicing_vector_resize(resalpha, len)); for (i=0; i<len; i++) { VECTOR(*resalpha)[i] = VECTOR(*mu)[i] + sqrtsigma * RNG_NORMAL(0,1); } return 0; }
int splicing_metropolis_hastings_ratio(const splicing_vector_int_t *ass, int no_reads, const splicing_vector_t *psiNew, const splicing_vector_t *alphaNew, const splicing_vector_t *psi, const splicing_vector_t *alpha, double sigma, int noiso, const splicing_vector_int_t *effisolen, const splicing_vector_t *hyperp, const splicing_vector_t *isoscores, int full, double *acceptP, double *pcJS, double *ppJS) { double pJS, cJS, ptoCS, ctoPS; SPLICING_CHECK(splicing_score_joint(ass, no_reads, psiNew, hyperp, effisolen, isoscores, &pJS)); SPLICING_CHECK(splicing_score_joint(ass, no_reads, psi, hyperp, effisolen, isoscores, &cJS)); SPLICING_CHECK(splicing_drift_proposal(/* mode= */ 2, psi, alpha, sigma, psiNew, alphaNew, noiso, 0, 0, 0, &ptoCS)); SPLICING_CHECK(splicing_drift_proposal(/* mode= */ 2, psiNew, alphaNew, sigma, psi, alpha, noiso, 0, 0, 0, &ctoPS)); if (full) { *acceptP = exp(pJS + ptoCS - (cJS + ctoPS)); } else { *acceptP = exp(pJS - cJS); } *pcJS = cJS; *ppJS = pJS; return 0; }
int splicing_gff_fprint_gene(const splicing_gff_t *gff, FILE *outfile, int gene) { size_t nogenes, noiso; int i, j; splicing_vector_int_t start, end, idx; SPLICING_CHECK(splicing_gff_nogenes(gff, &nogenes)); if (gene < 0 || gene >= nogenes) { SPLICING_ERROR("Invalid gene ID", SPLICING_EINVAL); } SPLICING_CHECK(splicing_vector_int_init(&start, 0)); SPLICING_FINALLY(splicing_vector_int_destroy, &start); SPLICING_CHECK(splicing_vector_int_init(&end, 0)); SPLICING_FINALLY(splicing_vector_int_destroy, &end); SPLICING_CHECK(splicing_vector_int_init(&idx, 0)); SPLICING_FINALLY(splicing_vector_int_destroy, &idx); SPLICING_CHECK(splicing_gff_exon_start_end(gff, &start, &end, &idx, gene)); noiso = splicing_vector_int_size(&idx)-1; fprintf(outfile, "===\nGene with %i isoforms:\n", (int) noiso); for (i=0; i<noiso; i++) { fprintf(outfile, " Isoform %i:\n", i); for (j=VECTOR(idx)[i]; j<VECTOR(idx)[i+1]; j++) { fprintf(outfile, " %i-%i\n", VECTOR(start)[j], VECTOR(end)[j]); } } splicing_vector_int_destroy(&idx); splicing_vector_int_destroy(&end); splicing_vector_int_destroy(&start); SPLICING_FINALLY_CLEAN(3); return 0; }
int splicing_gff_reserve(splicing_gff_t *gff, size_t size) { SPLICING_CHECK(splicing_vector_int_reserve(&gff->type, size)); SPLICING_CHECK(splicing_vector_int_reserve(&gff->start, size)); SPLICING_CHECK(splicing_vector_int_reserve(&gff->end, size)); SPLICING_CHECK(splicing_vector_reserve(&gff->score, size)); SPLICING_CHECK(splicing_vector_int_reserve(&gff->phase, size)); SPLICING_CHECK(splicing_vector_int_reserve(&gff->parent, size)); return 0; }
int splicing_logit_inv(const splicing_vector_t *x, splicing_vector_t *res, int len) { int i; double sumexp=0.0; SPLICING_CHECK(splicing_vector_resize(res, len)); for (i=0; i<len; i++) { sumexp += exp(VECTOR(*x)[i]); } sumexp += 1.0; for (i=0; i<len; i++) { VECTOR(*res)[i] = exp(VECTOR(*x)[i]) / sumexp; } return 0; }
int splicing_score_iso(const splicing_vector_t *psi, int noiso, const splicing_vector_int_t *assignment, int noreads, const splicing_vector_int_t *peffisolen, double *res) { int *effisolen = VECTOR(*peffisolen); double sum, maxpsieff, score; splicing_vector_t logpsi; int i; SPLICING_CHECK(splicing_vector_init(&logpsi, noiso)); SPLICING_FINALLY(splicing_vector_destroy, &logpsi); /* Calculate the normalization factor */ VECTOR(logpsi)[0] = log(VECTOR(*psi)[0]) + log(effisolen[0]); for (maxpsieff=VECTOR(logpsi)[0], i=1; i<noiso; i++) { VECTOR(logpsi)[i] = log(VECTOR(*psi)[i]) + log(effisolen[i]); if (VECTOR(logpsi)[i] > maxpsieff) { maxpsieff = VECTOR(logpsi)[i]; } } for (sum=0.0, i=0; i<noiso; i++) { sum += exp(VECTOR(logpsi)[i]-maxpsieff); } sum = log(sum) + maxpsieff; /* Normalize */ for (i=0; i<noiso; i++) { VECTOR(logpsi)[i] -= sum; } /* Calculate score, based on assignments */ for (score=0.0, i=0; i<noreads; i++) { if (VECTOR(*assignment)[i] != -1) { score += VECTOR(logpsi)[ VECTOR(*assignment)[i] ]; } } splicing_vector_destroy(&logpsi); SPLICING_FINALLY_CLEAN(1); *res = score; return 0; }
int splicing_normal_fragment(double normalMean, double normalVar, double numDevs, int minLength, splicing_vector_t *fragmentProb, int *fragmentStart) { double normalSd=sqrt(normalVar); int fragmentEnd; int i, j; *fragmentStart = normalMean - normalSd * numDevs; fragmentEnd = normalMean + normalSd * numDevs; if (*fragmentStart < minLength) { *fragmentStart = minLength; } if (fragmentEnd < *fragmentStart) { fragmentEnd = *fragmentStart; } SPLICING_CHECK(splicing_vector_resize(fragmentProb, fragmentEnd - *fragmentStart + 1)); for (i=*fragmentStart, j=0; i<=fragmentEnd; i++, j++) { VECTOR(*fragmentProb)[j] = splicing_dnorm(i, normalMean, normalSd); } return 0; }