int splicing_dgesdd(const splicing_matrix_t *matrix, splicing_vector_t *values) { splicing_matrix_t tmp; int m=splicing_matrix_nrow(matrix); int n=splicing_matrix_ncol(matrix); int lda=m, minmn= m < n ? m : n, maxmn = m < n ? n : m; int lwork=-1; int info=0; splicing_vector_t work; splicing_vector_int_t iwork; char jobz='N'; int dummy=1; double dummy2; SPLICING_CHECK(splicing_matrix_copy(&tmp, matrix)); SPLICING_FINALLY(splicing_matrix_destroy, &tmp); SPLICING_CHECK(splicing_vector_init(&work, 1)); SPLICING_FINALLY(splicing_vector_destroy, &work); SPLICING_CHECK(splicing_vector_int_init(&iwork, 8*minmn)); SPLICING_FINALLY(splicing_vector_int_destroy, &iwork); SPLICING_CHECK(splicing_vector_resize(values, minmn)); /* Get the optiomal lwork first*/ splicingdgesdd_(&jobz, &m, &n, &MATRIX(tmp,0,0), &lda, VECTOR(*values), /*U=*/ &dummy2, /*LDU=*/ &dummy, /*VT=*/ &dummy2, /*LDVT=*/ &dummy, VECTOR(work), &lwork, VECTOR(iwork), &info); lwork = VECTOR(work)[0]; SPLICING_CHECK(splicing_vector_resize(&work, lwork)); /* Now do the SVD */ splicingdgesdd_(&jobz, &m, &n, &MATRIX(tmp,0,0), &lda, VECTOR(*values), /*U=*/ &dummy2, /*LDU=*/ &dummy, /*VT=*/ &dummy2, /*LDVT=*/ &dummy, VECTOR(work), &lwork, VECTOR(iwork), &info); if (info != 0) { SPLICING_ERROR("Cannot calculate SVD", SPLICING_ELAPACK); } splicing_vector_destroy(&work); splicing_vector_int_destroy(&iwork); splicing_matrix_destroy(&tmp); SPLICING_FINALLY_CLEAN(3); return 0; }
static PyObject* pysplicing_read_gff(PyObject *self, PyObject *args) { const char *filename; FILE *input; splicing_gff_t *gff; if (!PyArg_ParseTuple(args, "s", &filename)) { return NULL; } input = fopen(filename, "r"); gff=malloc(sizeof(splicing_gff_t)); if (!gff) { splicing_error("Cannot create GFF", __FILE__, __LINE__, SPLICING_ENOMEM); splicingmodule_handle_splicing_error(); return NULL; } SPLICING_FINALLY(splicing_free, gff); SPLICING_PYCHECK(splicing_gff_init(gff, 50)); SPLICING_FINALLY(splicing_gff_destroy, gff); splicing_gff_read(input, gff); fclose(input); SPLICING_FINALLY_CLEAN(2); return PyCObject_FromVoidPtr(gff, splicing_gff_destroy2); }
static PyObject* pysplicing_gff_isolength(PyObject *self, PyObject *args) { PyObject *gff; splicing_gff_t *mygff; splicing_vector_int_t isolength, isolength_idx; PyObject *risolength; if (!PyArg_ParseTuple(args, "O", &gff)) { return NULL; } mygff=PyCObject_AsVoidPtr(gff); SPLICING_PYCHECK(splicing_vector_int_init(&isolength, 0)); SPLICING_FINALLY(splicing_vector_int_destroy, &isolength); SPLICING_PYCHECK(splicing_vector_int_init(&isolength_idx, 0)); SPLICING_FINALLY(splicing_vector_int_destroy, &isolength_idx); SPLICING_PYCHECK(splicing_gff_isolength(mygff, &isolength, &isolength_idx)); risolength = pysplicing_from_vector_int_index(&isolength, &isolength_idx); splicing_vector_int_destroy(&isolength); splicing_vector_int_destroy(&isolength_idx); SPLICING_FINALLY_CLEAN(2); return Py_BuildValue("O", risolength); }
static PyObject* pysplicing_create_gene(PyObject *self, PyObject *args) { PyObject *exons, *isoforms; const char *id="insilicogene", *seqid="seq1", *source="protein_coding"; int strand=2; /* unknown */ splicing_vector_int_t myexons, myisoforms; splicing_gff_t *gff; if (!PyArg_ParseTuple(args, "OO|sssi", &exons, &isoforms, &id, &seqid, &source, &strand)) { return NULL; } if (pysplicing_to_exons(exons, &myexons)) { return NULL; } SPLICING_FINALLY(splicing_vector_int_destroy, &myexons); if (pysplicing_to_isoforms(isoforms, &myisoforms)) { return NULL; } SPLICING_FINALLY(splicing_vector_int_destroy, &myisoforms); gff = malloc(sizeof(splicing_gff_t)); if (!gff) { splicing_error("Cannot create GFF", __FILE__, __LINE__, SPLICING_ENOMEM); splicingmodule_handle_splicing_error(); return NULL; } SPLICING_FINALLY(splicing_free, gff); SPLICING_PYCHECK(splicing_gff_init(gff, 0)); SPLICING_FINALLY(splicing_gff_destroy, gff); SPLICING_PYCHECK(splicing_create_gene(&myexons, &myisoforms, id, seqid, source, strand, gff)); splicing_vector_int_destroy(&myisoforms); splicing_vector_int_destroy(&myexons); SPLICING_FINALLY_CLEAN(4); return PyCObject_FromVoidPtr(gff, splicing_gff_destroy2); }
int splicing_reassign_samples(const splicing_matrix_t *matches, const splicing_vector_int_t *match_order, const splicing_vector_t *psi, int noiso, splicing_vector_int_t *result) { int noreads = splicing_matrix_ncol(matches); int i, w; double *prev, *curr; double rand, sumpsi; int noValid; int *order=VECTOR(*match_order); splicing_vector_t cumsum; splicing_vector_int_t validIso; SPLICING_CHECK(splicing_vector_init(&cumsum, noiso)); SPLICING_FINALLY(splicing_vector_destroy, &cumsum); SPLICING_CHECK(splicing_vector_int_init(&validIso, noiso)); SPLICING_FINALLY(splicing_vector_int_destroy, &validIso); SPLICING_CHECK(splicing_vector_int_resize(result, noreads)); if (noreads == 0) { return 0; } prev = curr = &MATRIX(*matches, 0, order[0]); CUMSUM(); for (i=0; i<noreads; i++) { curr = &MATRIX(*matches, 0, order[i]); /* Maybe we need to update the cumulative sum */ if (memcmp(prev, curr, sizeof(double)*noiso) != 0) { CUMSUM(); } if (noValid == 0) { VECTOR(*result)[order[i]] = -1; } else if (noValid == 1) { VECTOR(*result)[order[i]] = VECTOR(validIso)[0]; } else if (noValid == 2) { rand = RNG_UNIF01() * sumpsi; w = (rand < VECTOR(cumsum)[0]) ? VECTOR(validIso)[0] : VECTOR(validIso)[1]; VECTOR(*result)[order[i]] = w; } else { /* Draw */ rand = RNG_UNIF01() * sumpsi; /* TODO: Binary search for interval, if many classes */ for (w=0; rand > VECTOR(cumsum)[w]; w++) ; VECTOR(*result)[order[i]] = VECTOR(validIso)[w]; } prev=curr; } splicing_vector_int_destroy(&validIso); splicing_vector_destroy(&cumsum); SPLICING_FINALLY_CLEAN(2); return 0; }
int splicing_gff_exon_start_end(const splicing_gff_t *gff, splicing_vector_int_t *start, splicing_vector_int_t *end, splicing_vector_int_t *idx, int gene) { size_t noiso; int i=0, p=0, n=splicing_gff_size(gff); int pos; size_t nogenes; splicing_vector_int_t tmp, tmp2; SPLICING_CHECK(splicing_vector_int_init(&tmp, 10)); SPLICING_FINALLY(splicing_vector_int_destroy, &tmp); SPLICING_CHECK(splicing_vector_int_init(&tmp2, 10)); SPLICING_FINALLY(splicing_vector_int_destroy, &tmp2); SPLICING_CHECK(splicing_gff_nogenes(gff, &nogenes)); if (gene < 0 || gene >= nogenes) { SPLICING_ERROR("Invalid gene id", SPLICING_EINVAL); } pos=VECTOR(gff->genes)[gene]+1; SPLICING_CHECK(splicing_gff_noiso_one(gff, gene, &noiso)); splicing_vector_int_clear(start); splicing_vector_int_clear(end); SPLICING_CHECK(splicing_vector_int_resize(idx, noiso+1)); while (pos < n) { if (VECTOR(gff->type)[pos] == SPLICING_TYPE_EXON) { int s=VECTOR(gff->start)[pos]; int e=VECTOR(gff->end)[pos]; SPLICING_CHECK(splicing_vector_int_push_back(start, s)); p++; SPLICING_CHECK(splicing_vector_int_push_back(end, e)); } else if (VECTOR(gff->type)[pos] == SPLICING_TYPE_MRNA) { VECTOR(*idx)[i] = p; if (i!=0) { SPLICING_CHECK(splicing_i_gff_exon_start_end_sort(start, end, idx, i-1, &tmp, &tmp2)); } i++; } else if (VECTOR(gff->type)[pos] == SPLICING_TYPE_GENE) { break; } pos++; } VECTOR(*idx)[i] = p; SPLICING_CHECK(splicing_i_gff_exon_start_end_sort(start, end, idx, i-1, &tmp, &tmp2)); splicing_vector_int_destroy(&tmp2); splicing_vector_int_destroy(&tmp); SPLICING_FINALLY_CLEAN(1); return 0; }
int splicing_genomic_to_iso(const splicing_gff_t *gff, size_t gene, const splicing_vector_int_t *position, splicing_matrix_int_t *isopos) { size_t r, i, noiso, noreads=splicing_vector_int_size(position); splicing_vector_int_t exstart, exend, exidx, shift; splicing_gff_noiso_one(gff, gene, &noiso); SPLICING_CHECK(splicing_vector_int_init(&exstart, 0)); SPLICING_FINALLY(splicing_vector_int_destroy, &exstart); SPLICING_CHECK(splicing_vector_int_init(&exend, 0)); SPLICING_FINALLY(splicing_vector_int_destroy, &exend); SPLICING_CHECK(splicing_vector_int_init(&exidx, 0)); SPLICING_FINALLY(splicing_vector_int_destroy, &exidx); SPLICING_CHECK(splicing_gff_exon_start_end(gff, &exstart, &exend, &exidx, gene)); SPLICING_CHECK(splicing_vector_int_init(&shift, 0)); SPLICING_FINALLY(splicing_vector_int_destroy, &shift); for (i=0; i<noiso; i++) { size_t cs=0, ce=0, ex=0; int pos=VECTOR(exidx)[i], pos2=VECTOR(exidx)[i+1]; while (pos < pos2) { cs += VECTOR(exstart)[pos]; SPLICING_CHECK(splicing_vector_int_push_back(&shift, cs-ce-ex-1)); ex++; ce += VECTOR(exend)[pos]; pos++; } } SPLICING_CHECK(splicing_matrix_int_resize(isopos, noiso, noreads)); for (r=0; r<noreads; r++) { for (i=0; i<noiso; i++) { size_t pos=VECTOR(*position)[r]; size_t startpos=VECTOR(exidx)[i]; size_t endpos=VECTOR(exidx)[i+1]; int ex; for (ex=startpos; ex < endpos && VECTOR(exend)[ex] < pos; ex++) ; if (VECTOR(exstart)[ex] <= pos && pos <= VECTOR(exend)[ex]) { MATRIX(*isopos, i, r) = VECTOR(*position)[r] - VECTOR(shift)[ex]; } else { MATRIX(*isopos, i, r) = -1; } } } splicing_vector_int_destroy(&shift); splicing_vector_int_destroy(&exidx); splicing_vector_int_destroy(&exend); splicing_vector_int_destroy(&exstart); SPLICING_FINALLY_CLEAN(4); return 0; }
int splicing_i_gff_constitutive_exons_all(const splicing_gff_t *gff, splicing_exonset_t *exons, int min_length) { size_t g, nogenes; splicing_vector_int_t events; SPLICING_CHECK(splicing_gff_nogenes(gff, &nogenes)); SPLICING_CHECK(splicing_exonset_init(exons, 0)); SPLICING_FINALLY(splicing_exonset_destroy, exons); SPLICING_CHECK(splicing_vector_int_init(&events, 0)); SPLICING_FINALLY(splicing_vector_int_destroy, &events); for (g=0; g<nogenes; g++) { const char *seqid= splicing_strvector_get(&gff->seqids, VECTOR(gff->seqid)[g]); int noex, idx, noEvents, i; size_t noiso; int start=VECTOR(gff->genes)[g]; int end= g+1 < nogenes ? VECTOR(gff->genes)[g+1] : gff->n; splicing_gff_noiso_one(gff, g, &noiso); /* Collect and sort all events */ splicing_vector_int_clear(&events); for (idx=start+1; idx<end; idx++) { if (VECTOR(gff->type)[idx] == SPLICING_TYPE_EXON) { SPLICING_CHECK(splicing_vector_int_push_back2 (&events, VECTOR(gff->start)[idx], -VECTOR(gff->end)[idx])); } } noEvents=splicing_vector_int_size(&events); splicing_qsort(VECTOR(events), noEvents, sizeof(int), splicing_i_const_cmp); /* Now go over the sorted events and extract the constitutive exons */ for (noex=0, i=0; i<noEvents; i++) { int ev=VECTOR(events)[i]; if (ev > 0) { noex++; } if (ev < 0) { int prev=VECTOR(events)[i-1]; if (noex == noiso && (-ev)-prev+1 >= min_length) { /* constitutive exon */ SPLICING_CHECK(splicing_exonset_append(exons, seqid, prev, -ev)); } noex--; } } } splicing_vector_int_destroy(&events); SPLICING_FINALLY_CLEAN(2); /* + exons */ return 0; }
int splicing_i_gff_constitutive_exons_full(const splicing_gff_t *gff, splicing_exonset_t *exons, int min_length) { size_t g, nogenes; splicing_vector_int_t events; SPLICING_CHECK(splicing_gff_nogenes(gff, &nogenes)); SPLICING_CHECK(splicing_exonset_init(exons, 1)); SPLICING_FINALLY(splicing_exonset_destroy, exons); SPLICING_CHECK(splicing_vector_int_init(&events, 0)); SPLICING_FINALLY(splicing_vector_int_destroy, &events); for (g=0; g<nogenes; g++) { const char *seqid= splicing_strvector_get(&gff->seqids, VECTOR(gff->seqid)[g]); int i, idx, noExons, noSame; size_t noiso; int start=VECTOR(gff->genes)[g]; int end= g+1 < nogenes ? VECTOR(gff->genes)[g+1] : gff->n; splicing_gff_noiso_one(gff, g, &noiso); /* Collect and sort all events */ splicing_vector_int_clear(&events); for (idx=start+1; idx<end; idx++) { if (VECTOR(gff->type)[idx] == SPLICING_TYPE_EXON) { SPLICING_CHECK(splicing_vector_int_push_back2 (&events, VECTOR(gff->start)[idx], VECTOR(gff->end)[idx])); } } noExons=splicing_vector_int_size(&events)/2; splicing_qsort(VECTOR(events), noExons, sizeof(int)*2, splicing_i_const_cmp2); /* Now go over them and check how many times each exon appears */ for (noSame=1, i=2; i<noExons*2; i+=2) { int start=VECTOR(events)[i]; int end=VECTOR(events)[i+1]; if (start == VECTOR(events)[i-2] && VECTOR(events)[i-1] == end) { noSame++; } else { noSame=1; } if (noSame == noiso) { SPLICING_CHECK(splicing_exonset_append(exons, seqid, start, end)); } } } splicing_vector_int_destroy(&events); SPLICING_FINALLY_CLEAN(2); /* + exons */ return 0; }
int splicing_exonset_init(splicing_exonset_t *ex, size_t size) { SPLICING_CHECK(splicing_strvector_init(&ex->seqids, 0)); SPLICING_FINALLY(splicing_strvector_destroy, &ex->seqids); SPLICING_CHECK(splicing_vector_int_init(&ex->seqid, 0)); SPLICING_FINALLY(splicing_vector_int_destroy, &ex->seqid); SPLICING_CHECK(splicing_vector_int_init(&ex->start, 0)); SPLICING_FINALLY(splicing_vector_int_destroy, &ex->start); SPLICING_CHECK(splicing_vector_int_init(&ex->end, 0)); SPLICING_FINALLY(splicing_vector_int_destroy, &ex->end); SPLICING_FINALLY_CLEAN(4); return 0; }
static PyObject* pysplicing_assignment_matrix(PyObject *self, PyObject *args) { PyObject *gff; int gene, readlength; splicing_matrix_t matrix; PyObject *rmatrix; splicing_gff_t *mygff; if (!PyArg_ParseTuple(args, "Oii", &gff, &gene, &readlength)) { return NULL; } mygff=PyCObject_AsVoidPtr(gff); SPLICING_PYCHECK(splicing_matrix_init(&matrix, 0, 0)); SPLICING_FINALLY(splicing_matrix_destroy, &matrix); SPLICING_PYCHECK(splicing_assignment_matrix(mygff, gene, readlength, &matrix)); rmatrix = pysplicing_from_matrix(&matrix); splicing_matrix_destroy(&matrix); SPLICING_FINALLY_CLEAN(1); return Py_BuildValue("O", rmatrix); }
int splicing_genomic_to_iso_1(const splicing_gff_t *gff, size_t gene, int isoform, int position, const splicing_gff_converter_t *converter, int *result) { size_t startpos, endpos, ex; splicing_gff_converter_t vconverter, *myconverter = (splicing_gff_converter_t*) converter; if (!converter) { myconverter=&vconverter; SPLICING_CHECK(splicing_gff_converter_init(gff, gene, myconverter)); SPLICING_FINALLY(splicing_gff_converter_destroy, myconverter); } startpos=VECTOR(myconverter->exidx)[isoform]; endpos=VECTOR(myconverter->exidx)[isoform+1]; for (ex=startpos; ex < endpos && VECTOR(myconverter->exend)[ex] < position; ex++) ; if (ex < endpos && VECTOR(myconverter->exstart)[ex] <= position && position <= VECTOR(myconverter->exend)[ex]) { *result = position - VECTOR(myconverter->shift)[ex]; } else { *result = -1; } if (!converter) { splicing_gff_converter_destroy(myconverter); SPLICING_FINALLY_CLEAN(1); } return 0; }
static PyObject* pysplicing_simulate_paired_reads(PyObject *self, PyObject *args) { PyObject *gff, *expression; int gene, noreads, readlength; double normalMean, normalVar, numDevs; splicing_gff_t *mygff; splicing_vector_t myexpression; splicing_vector_int_t isoform, position; splicing_strvector_t cigar; PyObject *r1, *r2, *r3; if (!PyArg_ParseTuple(args, "OiOiiddd", &gff, &gene, &expression, &noreads, &readlength, &normalMean, &normalVar, &numDevs)) { return NULL; } mygff=PyCObject_AsVoidPtr(gff); if (pysplicing_to_vector(expression, &myexpression)) { return NULL; } SPLICING_FINALLY(splicing_vector_destroy, &myexpression); SPLICING_PYCHECK(splicing_vector_int_init(&isoform, 0)); SPLICING_FINALLY(splicing_vector_int_destroy, &isoform); SPLICING_PYCHECK(splicing_vector_int_init(&position, 0)); SPLICING_FINALLY(splicing_vector_int_destroy, &position); SPLICING_PYCHECK(splicing_strvector_init(&cigar, 0)); SPLICING_FINALLY(splicing_strvector_destroy, &cigar); SPLICING_PYCHECK(splicing_simulate_paired_reads(mygff, gene, &myexpression, noreads, readlength, /*insertProb=*/ 0, /*insertStart=*/ 0, normalMean, normalVar, numDevs, &isoform, &position, &cigar, 0)); r1=pysplicing_from_vector_int(&isoform); r2=pysplicing_from_vector_int(&position); r3=pysplicing_from_strvector(&cigar); splicing_strvector_destroy(&cigar); splicing_vector_int_destroy(&position); splicing_vector_int_destroy(&isoform); splicing_vector_destroy(&myexpression); SPLICING_FINALLY_CLEAN(4); return Py_BuildValue("OOO", r1, r2, r3); }
static PyObject* pysplicing_solve_gene(PyObject *self, PyObject *args) { PyObject *gff, *readcigar, *position; int gene, readLength; splicing_gff_t *mygff; splicing_vector_int_t myposition; splicing_strvector_t myreadcigar; splicing_matrix_t match_matrix, assignment_matrix; splicing_vector_t expression; PyObject *r1, *r2, *r3; if (!PyArg_ParseTuple(args, "OiiOO", &gff, &gene, &readLength, &position, &readcigar)) { return NULL; } mygff=PyCObject_AsVoidPtr(gff); if (pysplicing_to_vector_int(position, &myposition)) { return NULL; } SPLICING_FINALLY(splicing_vector_int_destroy, &myposition); if (pysplicing_to_strvector(readcigar, &myreadcigar)) { return NULL; } SPLICING_FINALLY(splicing_strvector_destroy, &myreadcigar); SPLICING_PYCHECK(splicing_matrix_init(&match_matrix, 0, 0)); SPLICING_FINALLY(splicing_matrix_destroy, &match_matrix); SPLICING_PYCHECK(splicing_matrix_init(&assignment_matrix, 0, 0)); SPLICING_FINALLY(splicing_matrix_destroy, &assignment_matrix); SPLICING_PYCHECK(splicing_vector_init(&expression, 0)); SPLICING_FINALLY(splicing_vector_destroy, &expression); SPLICING_PYCHECK(splicing_solve_gene(mygff, gene, readLength, &myposition, (const char **) myreadcigar.table, &match_matrix, &assignment_matrix, &expression)); r1=pysplicing_from_matrix(&match_matrix); r2=pysplicing_from_matrix(&assignment_matrix); r3=pysplicing_from_vector(&expression); splicing_vector_destroy(&expression); splicing_matrix_destroy(&assignment_matrix); splicing_matrix_destroy(&match_matrix); splicing_strvector_destroy(&myreadcigar); splicing_vector_int_destroy(&myposition); SPLICING_FINALLY_CLEAN(5); return Py_BuildValue("OOO", r1, r2, r3); }
static PyObject* pysplicing_simulate_reads(PyObject *self, PyObject *args) { PyObject *gff, *expression; int gene, noreads, readLength; splicing_vector_t myexpression; splicing_gff_t *mygff; splicing_vector_int_t isoform, position; splicing_strvector_t cigar; PyObject *risoform, *rposition, *rcigar; if (!PyArg_ParseTuple(args, "OiOii", &gff, &gene, &expression, &noreads, &readLength)) { return NULL; } mygff=PyCObject_AsVoidPtr(gff); if (pysplicing_to_vector(expression, &myexpression)) { return NULL; } SPLICING_FINALLY(splicing_vector_destroy, &myexpression); SPLICING_PYCHECK(splicing_strvector_init(&cigar, 0)); SPLICING_FINALLY(splicing_strvector_destroy, &cigar); SPLICING_PYCHECK(splicing_vector_int_init(&position, 0)); SPLICING_FINALLY(splicing_vector_int_destroy, &position); SPLICING_PYCHECK(splicing_vector_int_init(&isoform, 0)); SPLICING_FINALLY(splicing_vector_int_destroy, &isoform); SPLICING_PYCHECK(splicing_simulate_reads(mygff, gene, &myexpression, noreads, readLength, &isoform, &position, &cigar)); risoform = pysplicing_from_vector_int(&isoform); splicing_vector_int_destroy(&isoform); SPLICING_FINALLY_CLEAN(1); rposition = pysplicing_from_vector_int(&position); splicing_vector_int_destroy(&position); SPLICING_FINALLY_CLEAN(1); rcigar = pysplicing_from_strvector(&cigar); splicing_strvector_destroy(&cigar); SPLICING_FINALLY_CLEAN(1); splicing_vector_destroy(&myexpression); SPLICING_FINALLY_CLEAN(1); return Py_BuildValue("OOO", risoform, rposition, rcigar); }
int splicing_gff_fprint_gene(const splicing_gff_t *gff, FILE *outfile, int gene) { size_t nogenes, noiso; int i, j; splicing_vector_int_t start, end, idx; SPLICING_CHECK(splicing_gff_nogenes(gff, &nogenes)); if (gene < 0 || gene >= nogenes) { SPLICING_ERROR("Invalid gene ID", SPLICING_EINVAL); } SPLICING_CHECK(splicing_vector_int_init(&start, 0)); SPLICING_FINALLY(splicing_vector_int_destroy, &start); SPLICING_CHECK(splicing_vector_int_init(&end, 0)); SPLICING_FINALLY(splicing_vector_int_destroy, &end); SPLICING_CHECK(splicing_vector_int_init(&idx, 0)); SPLICING_FINALLY(splicing_vector_int_destroy, &idx); SPLICING_CHECK(splicing_gff_exon_start_end(gff, &start, &end, &idx, gene)); noiso = splicing_vector_int_size(&idx)-1; fprintf(outfile, "===\nGene with %i isoforms:\n", (int) noiso); for (i=0; i<noiso; i++) { fprintf(outfile, " Isoform %i:\n", i); for (j=VECTOR(idx)[i]; j<VECTOR(idx)[i+1]; j++) { fprintf(outfile, " %i-%i\n", VECTOR(start)[j], VECTOR(end)[j]); } } splicing_vector_int_destroy(&idx); splicing_vector_int_destroy(&end); splicing_vector_int_destroy(&start); SPLICING_FINALLY_CLEAN(3); return 0; }
int splicing_genomic_to_iso(const splicing_gff_t *gff, size_t gene, const splicing_vector_int_t *position, const splicing_gff_converter_t *converter, splicing_matrix_int_t *isopos) { size_t r, i, noreads=splicing_vector_int_size(position); splicing_gff_converter_t vconverter, *myconverter = (splicing_gff_converter_t*) converter; if (!converter) { myconverter=&vconverter; SPLICING_CHECK(splicing_gff_converter_init(gff, gene, myconverter)); SPLICING_FINALLY(splicing_gff_converter_destroy, myconverter); } SPLICING_CHECK(splicing_matrix_int_resize(isopos, myconverter->noiso, noreads)); for (r=0; r<noreads; r++) { for (i=0; i<myconverter->noiso; i++) { size_t pos=VECTOR(*position)[r]; size_t startpos=VECTOR(myconverter->exidx)[i]; size_t endpos=VECTOR(myconverter->exidx)[i+1]; int ex; for (ex=startpos; ex < endpos && VECTOR(myconverter->exend)[ex] < pos; ex++) ; if (ex < endpos && VECTOR(myconverter->exstart)[ex] <= pos && pos <= VECTOR(myconverter->exend)[ex]) { MATRIX(*isopos, i, r) = VECTOR(*position)[r] - VECTOR(myconverter->shift)[ex]; } else { MATRIX(*isopos, i, r) = -1; } } } if (!converter) { splicing_gff_converter_destroy(myconverter); SPLICING_FINALLY_CLEAN(1); } return 0; }
int splicing_iso_to_genomic_all(const splicing_gff_t *gff, size_t gene, int position, const splicing_gff_converter_t *converter, splicing_vector_int_t *result) { size_t i; splicing_gff_converter_t vconverter, *myconverter = (splicing_gff_converter_t*) converter; if (position < 1) { SPLICING_ERROR("Invalid isoform coordinate, must the larger than zero", SPLICING_EINVAL); } if (!converter) { myconverter=&vconverter; SPLICING_CHECK(splicing_gff_converter_init(gff, gene, myconverter)); SPLICING_FINALLY(splicing_gff_converter_destroy, myconverter); } SPLICING_CHECK(splicing_vector_int_resize(result, myconverter->noiso)); /* TODO: find impossible positions */ for (i=0; i<myconverter->noiso; i++) { int ex; for (ex=VECTOR(myconverter->exidx)[i]; ex < VECTOR(myconverter->exidx)[i+1] && VECTOR(myconverter->exlim)[ex] <= position; ex++) ; if (ex < VECTOR(myconverter->exidx)[i+1]) { VECTOR(*result)[i] = position + VECTOR(myconverter->shift)[ex]; } else { VECTOR(*result)[i] = -1; } } if (!converter) { splicing_gff_converter_destroy(myconverter); SPLICING_FINALLY_CLEAN(1); } return 0; }
static PyObject* pysplicing_gff_noiso(PyObject *self, PyObject *args) { PyObject *gff; splicing_gff_t *mygff; splicing_vector_int_t noiso; PyObject *rnoiso; if (!PyArg_ParseTuple(args, "O", &gff)) { return NULL; } mygff=PyCObject_AsVoidPtr(gff); SPLICING_PYCHECK(splicing_vector_int_init(&noiso, 0)); SPLICING_FINALLY(splicing_vector_int_destroy, &noiso); SPLICING_PYCHECK(splicing_gff_noiso(mygff, &noiso)); rnoiso = pysplicing_from_vector_int(&noiso); splicing_vector_int_destroy(&noiso); SPLICING_FINALLY_CLEAN(1); return Py_BuildValue("O", rnoiso); }
int splicing_score_iso(const splicing_vector_t *psi, int noiso, const splicing_vector_int_t *assignment, int noreads, const splicing_vector_int_t *peffisolen, double *res) { int *effisolen = VECTOR(*peffisolen); double sum, maxpsieff, score; splicing_vector_t logpsi; int i; SPLICING_CHECK(splicing_vector_init(&logpsi, noiso)); SPLICING_FINALLY(splicing_vector_destroy, &logpsi); /* Calculate the normalization factor */ VECTOR(logpsi)[0] = log(VECTOR(*psi)[0]) + log(effisolen[0]); for (maxpsieff=VECTOR(logpsi)[0], i=1; i<noiso; i++) { VECTOR(logpsi)[i] = log(VECTOR(*psi)[i]) + log(effisolen[i]); if (VECTOR(logpsi)[i] > maxpsieff) { maxpsieff = VECTOR(logpsi)[i]; } } for (sum=0.0, i=0; i<noiso; i++) { sum += exp(VECTOR(logpsi)[i]-maxpsieff); } sum = log(sum) + maxpsieff; /* Normalize */ for (i=0; i<noiso; i++) { VECTOR(logpsi)[i] -= sum; } /* Calculate score, based on assignments */ for (score=0.0, i=0; i<noreads; i++) { if (VECTOR(*assignment)[i] != -1) { score += VECTOR(logpsi)[ VECTOR(*assignment)[i] ]; } } splicing_vector_destroy(&logpsi); SPLICING_FINALLY_CLEAN(1); *res = score; return 0; }
int splicing_genomic_to_iso_all(const splicing_gff_t *gff, size_t gene, int position, const splicing_gff_converter_t *converter, splicing_vector_int_t *result) { int i; splicing_gff_converter_t vconverter, *myconverter = (splicing_gff_converter_t*) converter; if (!converter) { myconverter=&vconverter; SPLICING_CHECK(splicing_gff_converter_init(gff, gene, myconverter)); SPLICING_FINALLY(splicing_gff_converter_destroy, myconverter); } SPLICING_CHECK(splicing_vector_int_resize(result, myconverter->noiso)); for (i=0; i<myconverter->noiso; i++) { size_t startpos=VECTOR(myconverter->exidx)[i]; size_t endpos=VECTOR(myconverter->exidx)[i+1]; int ex; for (ex=startpos; ex < endpos && VECTOR(myconverter->exend)[ex] < position; ex++) ; if (ex < endpos && VECTOR(myconverter->exstart)[ex] <= position && position <= VECTOR(myconverter->exend)[ex]) { VECTOR(*result)[i] = position - VECTOR(myconverter->shift)[ex]; } else { VECTOR(*result)[i] = -1; } } if (!converter) { splicing_gff_converter_destroy(myconverter); SPLICING_FINALLY_CLEAN(1); } return 0; }
int splicing_iso_to_genomic(const splicing_gff_t *gff, size_t gene, const splicing_vector_int_t *isoform, const splicing_gff_converter_t *converter, splicing_vector_int_t *position) { size_t i, n=splicing_vector_int_size(position); splicing_gff_converter_t vconverter, *myconverter = (splicing_gff_converter_t*) converter; if (!converter) { myconverter=&vconverter; SPLICING_CHECK(splicing_gff_converter_init(gff, gene, myconverter)); SPLICING_FINALLY(splicing_gff_converter_destroy, myconverter); } /* Do the shifting */ for (i=0; i<n; i++) { int iso=VECTOR(*isoform)[i]; size_t pos=VECTOR(*position)[i]; int ex; if (pos==-1) { continue; } for (ex=VECTOR(myconverter->exidx)[iso]; ex < VECTOR(myconverter->exidx)[iso+1] && VECTOR(myconverter->exlim)[ex] <= pos; ex++) ; if (ex < VECTOR(myconverter->exidx)[iso+1]) { VECTOR(*position)[i] = pos + VECTOR(myconverter->shift)[ex]; } else { VECTOR(*position)[i] = -1; } } if (!converter) { splicing_gff_converter_destroy(myconverter); SPLICING_FINALLY_CLEAN(1); } return 0; }
int splicing_miso_trinity(const splicing_matrix_t *match_matrix, const splicing_vector_int_t *isolen, int readLength, int noIterations, int noBurnIn, int noLag, const splicing_vector_t *hyperp, splicing_matrix_t *samples, splicing_vector_t *logLik, splicing_matrix_t *class_templates, splicing_vector_t *class_counts, splicing_vector_int_t *assignment, splicing_miso_rundata_t *rundata) { double acceptP, cJS, pJS, sigma; int noiso = splicing_matrix_nrow(match_matrix); int noReads = splicing_matrix_ncol(match_matrix); splicing_vector_int_t *myass=assignment, vass; splicing_vector_t vpsi, vpsiNew, valpha, valphaNew, *psi=&vpsi, *psiNew=&vpsiNew, *alpha=&valpha, *alphaNew=&valphaNew; int noSamples = (noIterations - noBurnIn + 1) / noLag; int i, m, lagCounter=0, noS=0; splicing_vector_int_t match_order; splicing_vector_int_t effisolen; splicing_vector_t isoscores; if ( (class_templates ? 1 : 0) + (class_counts ? 1 : 0) == 1) { SPLICING_ERROR("Only one of `class_templates' and `class_counts' is " "given", SPLICING_EINVAL); } rundata->noIso=noiso; rundata->noIters=noIterations; rundata->noBurnIn=noBurnIn; rundata->noLag=noLag; rundata->noAccepted = rundata->noRejected = 0; if (assignment) { SPLICING_CHECK(splicing_vector_int_resize(myass, noReads)); splicing_vector_int_null(myass); } else { myass=&vass; SPLICING_CHECK(splicing_vector_int_init(myass, noReads)); SPLICING_FINALLY(splicing_vector_int_destroy, myass); } SPLICING_CHECK(splicing_vector_init(&vpsi, noiso)); SPLICING_FINALLY(splicing_vector_destroy, &vpsi); SPLICING_CHECK(splicing_vector_init(&vpsiNew, noiso)); SPLICING_FINALLY(splicing_vector_destroy, &vpsiNew); SPLICING_CHECK(splicing_vector_init(&valpha, noiso-1)); SPLICING_FINALLY(splicing_vector_destroy, &valpha); SPLICING_CHECK(splicing_vector_init(&valphaNew, noiso-1)); SPLICING_FINALLY(splicing_vector_destroy, &valphaNew); SPLICING_CHECK(splicing_vector_int_init(&match_order, noReads)); SPLICING_FINALLY(splicing_vector_int_destroy, &match_order); SPLICING_CHECK(splicing_order_matches(match_matrix, &match_order)); if (class_templates && class_counts) { SPLICING_CHECK(splicing_i_miso_classes(match_matrix, &match_order, class_templates, class_counts, /*bin_class_templates=*/ 0, /*bin_class_counts=*/ 0)); } SPLICING_CHECK(splicing_vector_int_init(&effisolen, noiso)); SPLICING_FINALLY(splicing_vector_int_destroy, &effisolen); SPLICING_CHECK(splicing_vector_init(&isoscores, noiso)); SPLICING_FINALLY(splicing_vector_destroy, &isoscores); for (i=0; i<noiso; i++) { int l=VECTOR(*isolen)[i]-readLength+1; VECTOR(effisolen)[i] = l > 0 ? l : 0; VECTOR(isoscores)[i] = -log((double) l); } SPLICING_CHECK(splicing_matrix_resize(samples, noiso, noSamples)); SPLICING_CHECK(splicing_vector_resize(logLik, noSamples)); /* Initialize Psi(0) randomly */ SPLICING_CHECK(splicing_drift_proposal(/* mode= */ 0, 0, 0, 0, 0, 0, noiso, psi, alpha, &sigma, 0)); SPLICING_CHECK(splicing_drift_proposal(/* mode= */ 1, psi, alpha, sigma, 0, 0, noiso, psi, alpha, 0, 0)); /* Initialize assignments of reads */ SPLICING_CHECK(splicing_reassign_samples(match_matrix, &match_order, psi, noiso, myass)); /* foreach Iteration m=1, ..., M do */ for (m=0; m < noIterations; m++) { SPLICING_CHECK(splicing_drift_proposal(/* mode= */ 1, psi, alpha, sigma, 0, 0, noiso, psiNew, alphaNew, 0, 0)); SPLICING_CHECK(splicing_metropolis_hastings_ratio(myass, noReads, psiNew, alphaNew, psi, alpha, sigma, noiso, &effisolen, hyperp, &isoscores, m > 0 ? 1 : 0, &acceptP, &cJS, &pJS)); if (acceptP >= 1 || RNG_UNIF01() < acceptP) { splicing_vector_t *tmp; tmp=psi; psi=psiNew; psiNew=tmp; tmp=alpha; alpha=alphaNew; alphaNew=tmp; cJS = pJS; rundata->noAccepted ++; } else { rundata->noRejected ++; } if (m >= noBurnIn) { if (lagCounter == noLag - 1) { memcpy(&MATRIX(*samples, 0, noS), VECTOR(*psi), noiso * sizeof(double)); VECTOR(*logLik)[noS] = cJS; noS++; lagCounter = 0; } else { lagCounter ++; } } SPLICING_CHECK(splicing_reassign_samples(match_matrix, &match_order, psi, noiso, myass)); } /* for m < noIterations */ splicing_vector_destroy(&isoscores); splicing_vector_int_destroy(&effisolen); splicing_vector_int_destroy(&match_order); splicing_vector_destroy(&valphaNew); splicing_vector_destroy(&valpha); splicing_vector_destroy(&vpsiNew); splicing_vector_destroy(&vpsi); SPLICING_FINALLY_CLEAN(7); if (!assignment) { splicing_vector_int_destroy(myass); SPLICING_FINALLY_CLEAN(1); } return 0; }
int splicing_gff_reindex(splicing_gff_t *gff) { splicing_vector_int_t index; splicing_vector_int_t index2; splicing_vector_int_t gindex; int i, j, k, n=gff->n; SPLICING_CHECK(splicing_vector_int_init(&index, n)); SPLICING_FINALLY(splicing_vector_int_destroy, &index); for (i=0; i<n; i++) { VECTOR(index)[i] = i; } splicing_qsort_r(VECTOR(index), n, sizeof(int), (void*) gff, splicing_i_gff_reindex_cmp); SPLICING_CHECK(splicing_vector_int_init(&gindex, gff->nogenes)); SPLICING_FINALLY(splicing_vector_int_destroy, &gindex); SPLICING_CHECK(splicing_vector_int_init(&index2, n)); SPLICING_FINALLY(splicing_vector_int_destroy, &index2); for (i=0; i<gff->nogenes; i++) { VECTOR(index2)[ VECTOR(gff->genes)[i] ] = i; } for (i=0, j=0; i<n; i++) { if (VECTOR(gff->type)[ VECTOR(index)[i] ] == SPLICING_TYPE_GENE) { VECTOR(gindex)[j++] = VECTOR(index2)[ VECTOR(index)[i] ]; } } splicing_vector_int_destroy(&index2); SPLICING_FINALLY_CLEAN(1); splicing_vector_int_intiindex(&gff->seqid, &gindex); splicing_vector_int_intiindex(&gff->source, &gindex); splicing_vector_int_intiindex(&gff->strand, &gindex); splicing_vector_int_destroy(&gindex); SPLICING_FINALLY_CLEAN(1); splicing_vector_int_intiindex(&gff->type, &index); splicing_vector_int_intiindex(&gff->start, &index); splicing_vector_int_intiindex(&gff->end, &index); splicing_vector_intiindex(&gff->score, &index); splicing_vector_int_intiindex(&gff->phase, &index); splicing_strvector_ipermute(&gff->ID, &index); splicing_vector_int_intiindex(&gff->parent, &index); SPLICING_CHECK(splicing_vector_int_init(&index2, n)); SPLICING_FINALLY(splicing_vector_int_destroy, &index2); for (i=0; i<n; i++) { VECTOR(index2)[ VECTOR(index)[i] ] = i; } for (i=0; i<n; i++) { int p=VECTOR(gff->parent)[i]; if (p != -1) { VECTOR(gff->parent)[i] = VECTOR(index2)[p]; } } splicing_vector_int_destroy(&index2); SPLICING_FINALLY_CLEAN(1); for (i=j=k=0; i<n; i++) { if (VECTOR(gff->type)[i] == SPLICING_TYPE_GENE) { VECTOR(gff->genes)[j++] = i; } else if (VECTOR(gff->type)[i] == SPLICING_TYPE_MRNA) { VECTOR(gff->transcripts)[k++] = i; } } splicing_vector_int_destroy(&index); SPLICING_FINALLY_CLEAN(1); return 0; }
int splicing_iso_to_genomic(const splicing_gff_t *gff, size_t gene, const splicing_vector_int_t *isoform, const splicing_vector_int_t *exstart, const splicing_vector_int_t *exend, const splicing_vector_int_t *exidx, splicing_vector_int_t *position) { size_t i, noiso, n=splicing_vector_int_size(position); splicing_vector_int_t exlim, shift; splicing_vector_int_t vexstart, vexend, vexidx, *myexstart=(splicing_vector_int_t *) exstart, *myexend=(splicing_vector_int_t *) exend, *myexidx=(splicing_vector_int_t *) exidx; size_t pos, pos2; if (!exstart || !exend || !exidx) { myexstart=&vexstart; myexend=&vexend; myexidx=&vexidx; SPLICING_CHECK(splicing_vector_int_init(myexstart, 0)); SPLICING_FINALLY(splicing_vector_int_destroy, myexstart); SPLICING_CHECK(splicing_vector_int_init(myexend, 0)); SPLICING_FINALLY(splicing_vector_int_destroy, myexend); SPLICING_CHECK(splicing_vector_int_init(myexidx, 0)); SPLICING_FINALLY(splicing_vector_int_destroy, myexidx); SPLICING_CHECK(splicing_gff_exon_start_end(gff, myexstart, myexend, myexidx, gene)); } SPLICING_CHECK(splicing_gff_noiso_one(gff, gene, &noiso)); SPLICING_CHECK(splicing_vector_int_init(&exlim, 0)); SPLICING_FINALLY(splicing_vector_int_destroy, &exlim); SPLICING_CHECK(splicing_vector_int_init(&shift, 0)); SPLICING_FINALLY(splicing_vector_int_destroy, &shift); for (i=0; i<noiso; i++) { size_t cs=0, ce=0, ex=0; int pos=VECTOR(*myexidx)[i], pos2=VECTOR(*myexidx)[i+1]; while (pos < pos2) { cs += VECTOR(*myexstart)[pos]; SPLICING_CHECK(splicing_vector_int_push_back(&shift, cs-ce-ex-1)); ex++; ce += VECTOR(*myexend)[pos]; pos++; } } for (i=0; i<noiso; i++) { size_t cs=0; int pos=VECTOR(*myexidx)[i], pos2=VECTOR(*myexidx)[i+1]; while (pos < pos2) { size_t l=VECTOR(*myexend)[pos]-VECTOR(*myexstart)[pos]+1; cs += l; SPLICING_CHECK(splicing_vector_int_push_back(&exlim, cs+1)); pos++; } } for (i=0; i<n; i++) { int iso=VECTOR(*isoform)[i]; size_t pos=VECTOR(*position)[i]; int ex; for (ex=VECTOR(*myexidx)[iso]; VECTOR(exlim)[ex] <= pos; ex++) ; VECTOR(*position)[i] = pos + VECTOR(shift)[ex]; } splicing_vector_int_destroy(&shift); splicing_vector_int_destroy(&exlim); SPLICING_FINALLY_CLEAN(2); if (!exstart || !exend || !exidx) { splicing_vector_int_destroy(myexidx); splicing_vector_int_destroy(myexend); splicing_vector_int_destroy(myexstart); SPLICING_FINALLY_CLEAN(3); } return 0; }
/* TODO: do not ignore size */ int splicing_gff_init(splicing_gff_t *gff, size_t size) { if (size < 0) { SPLICING_ERROR("Cannot create GFF, `size' must be non-negative", SPLICING_EINVAL); } SPLICING_CHECK(splicing_strvector_init(&gff->seqids, 0)); SPLICING_FINALLY(splicing_strvector_destroy, &gff->seqids); SPLICING_CHECK(splicing_strvector_init(&gff->sources, 0)); SPLICING_FINALLY(splicing_strvector_destroy, &gff->sources); SPLICING_CHECK(splicing_strvector_init(&gff->ID, 0)); SPLICING_FINALLY(splicing_strvector_destroy, &gff->ID); SPLICING_CHECK(splicing_vector_int_init(&gff->genes, 0)); SPLICING_FINALLY(splicing_vector_int_destroy, &gff->genes); SPLICING_CHECK(splicing_vector_int_init(&gff->transcripts, 0)); SPLICING_FINALLY(splicing_vector_int_destroy, &gff->transcripts); SPLICING_CHECK(splicing_vector_int_init(&gff->seqid, 0)); SPLICING_FINALLY(splicing_vector_int_destroy, &gff->seqid); SPLICING_CHECK(splicing_vector_int_init(&gff->source, 0)); SPLICING_FINALLY(splicing_vector_int_destroy, &gff->source); SPLICING_CHECK(splicing_vector_int_init(&gff->strand, 0)); SPLICING_FINALLY(splicing_vector_int_destroy, &gff->strand); SPLICING_CHECK(splicing_vector_int_init(&gff->type, 0)); SPLICING_FINALLY(splicing_vector_int_destroy, &gff->type); SPLICING_CHECK(splicing_vector_int_init(&gff->start, 0)); SPLICING_FINALLY(splicing_vector_int_destroy, &gff->start); SPLICING_CHECK(splicing_vector_int_init(&gff->end, 0)); SPLICING_FINALLY(splicing_vector_int_destroy, &gff->end); SPLICING_CHECK(splicing_vector_init(&gff->score, 0)); SPLICING_FINALLY(splicing_vector_destroy, &gff->score); SPLICING_CHECK(splicing_vector_int_init(&gff->phase, 0)); SPLICING_FINALLY(splicing_vector_int_destroy, &gff->phase); SPLICING_CHECK(splicing_vector_int_init(&gff->parent, 0)); SPLICING_FINALLY(splicing_vector_int_destroy, &gff->parent); gff->n=0; gff->nogenes=0; gff->notranscripts=0; gff->last_gene_id = gff->last_mrna_id = SPLICING_STRVECTOR_ZERO; gff->last_gene_no = gff->last_mrna_no = -1; gff->last_seqid = gff->last_source = SPLICING_STRVECTOR_ZERO; SPLICING_FINALLY_CLEAN(14); return 0; }
static PyObject* pysplicing_miso(PyObject *self, PyObject *args) { PyObject *gff, *readpos, *readcigar, *hyperp=0; int gene, readLength, noIterations=5000, noBurnIn=500, noLag=10; splicing_gff_t *mygff; splicing_strvector_t myreadcigar; splicing_vector_int_t myreadpos; splicing_vector_t myhyperp; splicing_matrix_t samples; splicing_vector_t logLik; splicing_matrix_t class_templates; splicing_vector_t class_counts; splicing_vector_int_t assignment; splicing_miso_rundata_t rundata; PyObject *r1, *r2, *r3, *r4, *r5, *r6; if (!PyArg_ParseTuple(args, "OiOOi|iiiO", &gff, &gene, &readpos, &readcigar, &readLength, &noIterations, &noBurnIn, &noLag, &hyperp)) { return NULL; } mygff=PyCObject_AsVoidPtr(gff); SPLICING_PYCHECK(splicing_matrix_init(&samples, 0, 0)); SPLICING_FINALLY(splicing_matrix_destroy, &samples); SPLICING_PYCHECK(splicing_vector_init(&logLik, 0)); SPLICING_FINALLY(splicing_vector_destroy, &logLik); SPLICING_PYCHECK(splicing_matrix_init(&class_templates, 0, 0)); SPLICING_FINALLY(splicing_matrix_destroy, &class_templates); SPLICING_PYCHECK(splicing_vector_init(&class_counts, 0)); SPLICING_FINALLY(splicing_vector_destroy, &class_counts); SPLICING_PYCHECK(splicing_vector_int_init(&assignment, 0)); SPLICING_FINALLY(splicing_vector_int_destroy, &assignment); if (pysplicing_to_vector_int(readpos, &myreadpos)) { return NULL; } SPLICING_FINALLY(splicing_vector_int_destroy, &myreadpos); if (hyperp) { if (pysplicing_to_vector(hyperp, &myhyperp)) { return NULL; } SPLICING_FINALLY(splicing_vector_destroy, &myhyperp); } else { size_t i, noiso; SPLICING_PYCHECK(splicing_gff_noiso_one(mygff, gene, &noiso)); SPLICING_PYCHECK(splicing_vector_init(&myhyperp, noiso)); SPLICING_FINALLY(splicing_vector_destroy, &myhyperp); for (i=0; i<noiso; i++) { VECTOR(myhyperp)[i] = 1.0; } } if (pysplicing_to_strvector(readcigar, &myreadcigar)) { return NULL; }; SPLICING_FINALLY(splicing_strvector_destroy, &myreadcigar); SPLICING_PYCHECK(splicing_miso(mygff, gene, &myreadpos, (const char**) myreadcigar.table, readLength, noIterations, noBurnIn, noLag, &myhyperp, &samples, &logLik, /*match_matrix=*/ 0, &class_templates, &class_counts, &assignment, &rundata)); splicing_vector_destroy(&myhyperp); splicing_vector_int_destroy(&myreadpos); splicing_strvector_destroy(&myreadcigar); SPLICING_FINALLY_CLEAN(3); r6=pysplicing_from_miso_rundata(&rundata); r5=pysplicing_from_vector_int(&assignment); splicing_vector_int_destroy(&assignment); SPLICING_FINALLY_CLEAN(1); r4=pysplicing_from_vector(&class_counts); splicing_vector_destroy(&class_counts); SPLICING_FINALLY_CLEAN(1); splicing_matrix_transpose(&class_templates); r3=pysplicing_from_matrix(&class_templates); splicing_matrix_destroy(&class_templates); SPLICING_FINALLY_CLEAN(1); r2=pysplicing_from_vector(&logLik); splicing_vector_destroy(&logLik); SPLICING_FINALLY_CLEAN(1); r1=pysplicing_from_matrix(&samples); splicing_matrix_destroy(&samples); SPLICING_FINALLY_CLEAN(1); return Py_BuildValue("OOOOOO", r1, r2, r3, r4, r5, r6); }
int splicing_simulate_paired_reads(const splicing_gff_t *gff, int gene, const splicing_vector_t *expression, int noreads, int readLength, const splicing_vector_t *fragmentProb, int fragmentStart, double normalMean, double normalVar, double numDevs, splicing_vector_int_t *isoform, splicing_vector_int_t *position, splicing_strvector_t *cigar, splicing_vector_t *sampleprob) { size_t i, j, noiso, il, nogenes; splicing_vector_t *mysampleprob=sampleprob, vsampleprob; splicing_vector_t px, cpx; double sumpx, sumpsi=0.0; splicing_vector_int_t isolen; int goodiso=0; splicing_vector_int_t exstart, exend, exidx; splicing_vector_t *myfragmentProb=(splicing_vector_t*) fragmentProb, vfragmentProb; int fs, fl; SPLICING_CHECK(splicing_gff_nogenes(gff, &nogenes)); if (gene < 0 || gene >= nogenes) { SPLICING_ERROR("Invalid gene id", SPLICING_EINVAL); } /* TODO: more error checks */ if (!fragmentProb) { myfragmentProb=&vfragmentProb; SPLICING_CHECK(splicing_vector_init(&vfragmentProb, 0)); SPLICING_FINALLY(splicing_vector_destroy, &vfragmentProb); SPLICING_CHECK(splicing_normal_fragment(normalMean, normalVar, numDevs, readLength, myfragmentProb, &fragmentStart)); splicing_vector_scale(myfragmentProb, 1.0/splicing_vector_sum(myfragmentProb)); } il=splicing_vector_size(myfragmentProb); fs=fragmentStart; fl=fragmentStart+il-1; SPLICING_CHECK(splicing_gff_noiso_one(gff, gene, &noiso)); if ( fabs(splicing_vector_sum(myfragmentProb) - 1.0) > 1e-10 ) { SPLICING_ERROR("Fragment length distribution does not sum up to 1", SPLICING_EINVAL); } SPLICING_CHECK(splicing_vector_int_init(&isolen, noiso)); SPLICING_FINALLY(splicing_vector_int_destroy, &isolen); SPLICING_CHECK(splicing_gff_isolength_one(gff, gene, &isolen)); SPLICING_CHECK(splicing_vector_copy(&px, myfragmentProb)); SPLICING_FINALLY(splicing_vector_destroy, &px); SPLICING_CHECK(splicing_vector_init(&cpx, il)); SPLICING_FINALLY(splicing_vector_destroy, &cpx); if (!sampleprob) { mysampleprob=&vsampleprob; SPLICING_CHECK(splicing_vector_init(mysampleprob, noiso)); SPLICING_FINALLY(splicing_vector_destroy, mysampleprob); } else { SPLICING_CHECK(splicing_vector_resize(mysampleprob, noiso)); } for (sumpx=VECTOR(px)[0], i=1; i<il; i++) { VECTOR(px)[i] += VECTOR(px)[i-1]; sumpx += VECTOR(px)[i]; } VECTOR(cpx)[0] = VECTOR(px)[0]; for (i=1; i<il; i++) { VECTOR(cpx)[i] = VECTOR(cpx)[i-1] + VECTOR(px)[i]; } for (i=0; i<noiso; i++) { int ilen=VECTOR(isolen)[i]; int r1= ilen >= fl ? ilen - fl + 1 : 0; int r2= ilen >= fs ? (ilen >= fl ? fl - fs : ilen - fs + 1) : 0; /* int r3= fs - 1; */ double sp=0.0; if (r1 > 0) { sp += r1; } if (r2 > 0) { sp += VECTOR(cpx)[r2-1]; } VECTOR(*mysampleprob)[i] = sp * VECTOR(*expression)[i]; if (VECTOR(*mysampleprob)[i] != 0) { goodiso += 1; } sumpsi += VECTOR(*mysampleprob)[i]; } if (goodiso == 0) { SPLICING_ERROR("No isoform is possible", SPLICING_FAILURE); } for (i=1; i<noiso; i++) { VECTOR(*mysampleprob)[i] += VECTOR(*mysampleprob)[i-1]; } SPLICING_CHECK(splicing_vector_int_resize(isoform, noreads*2)); for (i=0; i<2*noreads; i+=2) { int w; double rand; if (noiso==1) { w=0; } else if (noiso==2) { rand = RNG_UNIF01() * sumpsi; w = (rand < VECTOR(*mysampleprob)[0]) ? 0 : 1; } else { rand = RNG_UNIF01() * sumpsi; for (w=0; rand > VECTOR(*mysampleprob)[w]; w++) ; } VECTOR(*isoform)[i]=VECTOR(*isoform)[i+1]=w; } if (!sampleprob) { splicing_vector_destroy(mysampleprob); SPLICING_FINALLY_CLEAN(1); } else { for (i=noiso-1; i>0; i--) { VECTOR(*mysampleprob)[i] -= VECTOR(*mysampleprob)[i-1]; } } /* We have the isoforms, now get the read positions. */ SPLICING_CHECK(splicing_vector_int_resize(position, noreads*2)); SPLICING_CHECK(splicing_vector_int_init(&exstart, 0)); SPLICING_FINALLY(splicing_vector_int_destroy, &exstart); SPLICING_CHECK(splicing_vector_int_init(&exend, 0)); SPLICING_FINALLY(splicing_vector_int_destroy, &exend); SPLICING_CHECK(splicing_vector_int_init(&exidx, 0)); SPLICING_FINALLY(splicing_vector_int_destroy, &exidx); SPLICING_CHECK(splicing_gff_exon_start_end(gff, &exstart, &exend, &exidx, gene)); /* Positions in isoform coordinates first. These are sampled based on the fragment length distribution. */ for (i=0, j=0; i<noreads; i++) { int iso=VECTOR(*isoform)[2*i]; int ilen=VECTOR(isolen)[iso]; int r1= ilen >= fl ? ilen - fl + 1 : 0; int r2= ilen >= fs ? (ilen >= fl ? fl - fs : ilen - fs + 1) : 0; /* int r3= fs - 1; */ int pos, fragment; double sp=0.0; if (r1 > 0) { sp += r1; } if (r2 > 0) { sp += VECTOR(cpx)[r2-1]; } double rand=RNG_UNIF(0, sp); if (rand < r1) { pos = ceil(rand); } else { int w; rand -= r1; for (w=0; VECTOR(cpx)[w] < rand; w++) ; pos = r1 + r2 - w; } if (pos <= r1) { rand=RNG_UNIF(0, 1.0); } else { rand=RNG_UNIF(0, VECTOR(px)[r1+r2-pos]); } for (fragment=0; VECTOR(px)[fragment] < rand; fragment++) ; fragment += fragmentStart; VECTOR(*position)[j++] = pos; VECTOR(*position)[j++] = pos+fragment-readLength; } /* Translate positions to genomic coordinates */ /* TODO: some of this is already calculated */ SPLICING_CHECK(splicing_iso_to_genomic(gff, gene, isoform, /*converter=*/ 0, position)); /* CIGAR strings */ splicing_strvector_clear(cigar); SPLICING_CHECK(splicing_strvector_reserve(cigar, 2*noreads)); for (j=0; j<2*noreads; j++) { char tmp[1000], *tmp2=tmp; int iso=VECTOR(*isoform)[j]; size_t rs=VECTOR(*position)[j]; int ex=0; int rl=readLength; for (ex=VECTOR(exidx)[iso]; VECTOR(exend)[ex] < rs; ex++) ; while (rs + rl - 1 > VECTOR(exend)[ex]) { tmp2 += snprintf(tmp2, sizeof(tmp)/sizeof(char)-(tmp2-tmp)-1, "%iM%iN", (int) (VECTOR(exend)[ex]-rs+1), (int) (VECTOR(exstart)[ex+1]-VECTOR(exend)[ex]-1)); if (tmp2 >= tmp + sizeof(tmp)/sizeof(char)) { SPLICING_ERROR("CIGAR string too long", SPLICING_EINVAL); } rl -= (VECTOR(exend)[ex] - rs + 1); rs = VECTOR(exstart)[ex+1]; ex++; } tmp2 += snprintf(tmp2, sizeof(tmp)/sizeof(char)-(tmp2-tmp)-1, "%iM", rl); if (tmp2 >= tmp + sizeof(tmp)/sizeof(char)) { SPLICING_ERROR("CIGAR string too long", SPLICING_EINVAL); } SPLICING_CHECK(splicing_strvector_append(cigar, tmp)); } splicing_vector_int_destroy(&exidx); splicing_vector_int_destroy(&exend); splicing_vector_int_destroy(&exstart); splicing_vector_destroy(&cpx); splicing_vector_destroy(&px); splicing_vector_int_destroy(&isolen); SPLICING_FINALLY_CLEAN(6); if (!fragmentProb) { splicing_vector_destroy(myfragmentProb); SPLICING_FINALLY_CLEAN(1); } return 0; }
int splicing_simulate_reads(const splicing_gff_t *gff, int gene, const splicing_vector_t *expression, int noreads, int readLength, splicing_vector_int_t *isoform, splicing_vector_int_t *position, splicing_strvector_t *cigar, splicing_vector_t *sample_prob) { size_t i, p, noiso, goodiso=0, nogenes; splicing_vector_int_t effisolen; splicing_vector_t sampleprob; double rand, sumpsi=0.0; splicing_vector_int_t exstart, exend, exidx; SPLICING_CHECK(splicing_gff_nogenes(gff, &nogenes)); if (gene < 0 || gene >= nogenes) { SPLICING_ERROR("Invalid gene id", SPLICING_EINVAL); } /* TODO: more error checks */ SPLICING_CHECK(splicing_gff_noiso_one(gff, gene, &noiso)); SPLICING_CHECK(splicing_vector_int_init(&effisolen, noiso)); SPLICING_FINALLY(splicing_vector_int_destroy, &effisolen); SPLICING_CHECK(splicing_vector_init(&sampleprob, noiso)); SPLICING_FINALLY(splicing_vector_destroy, &sampleprob); SPLICING_CHECK(splicing_vector_int_resize(isoform, noreads)); SPLICING_CHECK(splicing_gff_isolength_one(gff, gene, &effisolen)); for (i=0; i<noiso; i++) { int l=VECTOR(effisolen)[i]-readLength+1; VECTOR(effisolen)[i] = l > 0 ? l : 0; VECTOR(sampleprob)[i] = VECTOR(*expression)[i] * VECTOR(effisolen)[i]; if (VECTOR(sampleprob)[i] != 0) { goodiso++; } sumpsi += VECTOR(sampleprob)[i]; } if (goodiso==0) { SPLICING_ERROR("No isoform is possible", SPLICING_FAILURE); } if (sample_prob) { SPLICING_CHECK(splicing_vector_update(sample_prob, &sampleprob)); } for (i=1; i<noiso; i++) { VECTOR(sampleprob)[i] += VECTOR(sampleprob)[i-1]; } for (i=0; i<noreads; i++) { int w; if (noiso==1) { w=0; } else if (noiso==2) { rand = RNG_UNIF01() * sumpsi; w = (rand < VECTOR(sampleprob)[0]) ? 0 : 1; } else { rand = RNG_UNIF01() * sumpsi; for (w=0; rand > VECTOR(sampleprob)[w]; w++) ; } VECTOR(*isoform)[i]=w; } splicing_vector_destroy(&sampleprob); SPLICING_FINALLY_CLEAN(1); /* OK, we have the isoforms, now we need the read positions, these are uniformly sampled from the individual isoforms. */ SPLICING_CHECK(splicing_vector_int_resize(position, noreads)); SPLICING_CHECK(splicing_vector_int_init(&exstart, 0)); SPLICING_FINALLY(splicing_vector_int_destroy, &exstart); SPLICING_CHECK(splicing_vector_int_init(&exend, 0)); SPLICING_FINALLY(splicing_vector_int_destroy, &exend); SPLICING_CHECK(splicing_vector_int_init(&exidx, 0)); SPLICING_FINALLY(splicing_vector_int_destroy, &exidx); SPLICING_CHECK(splicing_gff_exon_start_end(gff, &exstart, &exend, &exidx, gene)); /* Positions in isoform coordinates first */ for (i=0; i<noreads; i++) { int iso=VECTOR(*isoform)[i]; int len=VECTOR(effisolen)[iso]; VECTOR(*position)[i]=RNG_INTEGER(1, len); } /* Translate isoform coordinates to genomic coordintes */ /* TODO: some of this is already calculated */ SPLICING_CHECK(splicing_iso_to_genomic(gff, gene, isoform, /*converter=*/ 0, position)); /* CIGAR strings */ splicing_strvector_clear(cigar); SPLICING_CHECK(splicing_strvector_reserve(cigar, noreads)); for (i=0; i<noreads; i++) { char tmp[1000], *tmp2=tmp; int iso=VECTOR(*isoform)[i]; size_t rs=VECTOR(*position)[i]; int ex=0; int rl=readLength; for (ex=VECTOR(exidx)[iso]; VECTOR(exend)[ex] < rs; ex++) ; while (VECTOR(exend)[ex] < rs+rl-1) { tmp2 += snprintf(tmp2, sizeof(tmp)/sizeof(char)-(tmp2-tmp)-1, "%iM%iN", (int) (VECTOR(exend)[ex]-rs+1), (int) (VECTOR(exstart)[ex+1]-VECTOR(exend)[ex]-1)); if (tmp2 >= tmp + sizeof(tmp)/sizeof(char)) { SPLICING_ERROR("CIGAR string too long", SPLICING_EINVAL); } rl -= (VECTOR(exend)[ex] - rs + 1); rs = VECTOR(exstart)[ex+1]; ex++; } tmp2 += snprintf(tmp2, sizeof(tmp)/sizeof(char)-(tmp2-tmp)-1, "%iM", rl); if (tmp2 >= tmp + sizeof(tmp)/sizeof(char)) { SPLICING_ERROR("CIGAR string too long", SPLICING_EINVAL); } SPLICING_CHECK(splicing_strvector_append(cigar, tmp)); } splicing_vector_int_destroy(&exidx); splicing_vector_int_destroy(&exend); splicing_vector_int_destroy(&exstart); splicing_vector_int_destroy(&effisolen); SPLICING_FINALLY_CLEAN(4); return 0; }
int splicing_gene_complexity(const splicing_gff_t *gff, size_t gene, int readLength, splicing_complexity_t type, splicing_norm_t norm, int paired, const splicing_vector_t *fragmentProb, int fragmentStart, double normalMean, double normalVar, double numDevs, double *complexity) { splicing_matrix_t assignment_matrix; SPLICING_CHECK(splicing_matrix_init(&assignment_matrix, 0, 0)); SPLICING_FINALLY(splicing_matrix_destroy, &assignment_matrix); if (!paired) { SPLICING_CHECK(splicing_assignment_matrix(gff, gene, readLength, &assignment_matrix)); } else { SPLICING_CHECK(splicing_paired_assignment_matrix(gff, gene, readLength, fragmentProb, fragmentStart, normalMean, normalVar, numDevs, &assignment_matrix)); } switch (type) { case SPLICING_COMPLEXITY_RELATIVE: switch (norm) { splicing_vector_t values; int i, n; case SPLICING_NORM_2: SPLICING_CHECK(splicing_vector_init(&values, 0)); SPLICING_FINALLY(splicing_vector_destroy, &values); SPLICING_CHECK(splicing_dgesdd(&assignment_matrix, &values)); n=splicing_vector_size(&values); for (i=n-1; i>=0 && VECTOR(values)[i] < 1e-14; i--) ; *complexity = VECTOR(values)[0] / VECTOR(values)[i]; splicing_vector_destroy(&values); SPLICING_FINALLY_CLEAN(1); break; case SPLICING_NORM_1: SPLICING_ERROR("One norm not implemented", SPLICING_UNIMPLEMENTED); break; case SPLICING_NORM_INFINITY: SPLICING_ERROR("Infinity norm not implemented", SPLICING_UNIMPLEMENTED); break; } break; case SPLICING_COMPLEXITY_ABSOLUTE: SPLICING_ERROR("Absolute complexity not implemented", SPLICING_UNIMPLEMENTED); break; } splicing_matrix_destroy(&assignment_matrix); SPLICING_FINALLY_CLEAN(1); return 0; }