示例#1
1
int splicing_dgesdd(const splicing_matrix_t *matrix, 
		    splicing_vector_t *values) {

  splicing_matrix_t tmp;
  int m=splicing_matrix_nrow(matrix);
  int n=splicing_matrix_ncol(matrix);
  int lda=m, minmn= m < n ? m : n, maxmn = m < n ? n : m;
  int lwork=-1;
  int info=0;
  splicing_vector_t work;
  splicing_vector_int_t iwork;
  char jobz='N';
  int dummy=1;
  double dummy2;
  
  SPLICING_CHECK(splicing_matrix_copy(&tmp, matrix));
  SPLICING_FINALLY(splicing_matrix_destroy, &tmp);
  SPLICING_CHECK(splicing_vector_init(&work, 1));
  SPLICING_FINALLY(splicing_vector_destroy, &work);
  SPLICING_CHECK(splicing_vector_int_init(&iwork, 8*minmn));
  SPLICING_FINALLY(splicing_vector_int_destroy, &iwork);

  SPLICING_CHECK(splicing_vector_resize(values, minmn));

  /* Get the optiomal lwork first*/
  splicingdgesdd_(&jobz, &m, &n, &MATRIX(tmp,0,0), &lda, VECTOR(*values),
		  /*U=*/ &dummy2, /*LDU=*/ &dummy, 
		  /*VT=*/ &dummy2, /*LDVT=*/ &dummy, 
		  VECTOR(work), &lwork, VECTOR(iwork), &info);

  lwork = VECTOR(work)[0];
  SPLICING_CHECK(splicing_vector_resize(&work, lwork));

  /* Now do the SVD */
  splicingdgesdd_(&jobz, &m, &n, &MATRIX(tmp,0,0), &lda, VECTOR(*values),
		  /*U=*/ &dummy2, /*LDU=*/ &dummy, 
		  /*VT=*/ &dummy2, /*LDVT=*/ &dummy, 
		  VECTOR(work), &lwork, VECTOR(iwork), &info);

  if (info != 0) { 
    SPLICING_ERROR("Cannot calculate SVD", SPLICING_ELAPACK);
  }

  splicing_vector_destroy(&work);
  splicing_vector_int_destroy(&iwork);
  splicing_matrix_destroy(&tmp);
  SPLICING_FINALLY_CLEAN(3);
  
  return 0;
}
示例#2
0
文件: gff.c 项目: mlovci/MISO
int splicing_gff_isolength(const splicing_gff_t *gff,
			   splicing_vector_int_t *isolength,
			   splicing_vector_int_t *isolength_idx) {

  size_t idx1;
  size_t nogenes=splicing_vector_int_size(&gff->genes);
  size_t notrans=splicing_vector_int_size(&gff->transcripts);
  int pos=-1, ipos=0;
  
  SPLICING_CHECK(splicing_vector_int_resize(isolength, notrans));
  SPLICING_CHECK(splicing_vector_int_resize(isolength_idx, nogenes));
  
  for (idx1=VECTOR(gff->genes)[0]; idx1 < gff->n; idx1++) {
    if (VECTOR(gff->type)[idx1] == SPLICING_TYPE_GENE) {
      VECTOR(*isolength_idx)[ipos++]=pos+1;
    } else if (VECTOR(gff->type)[idx1] == SPLICING_TYPE_MRNA) {
      VECTOR(*isolength)[++pos] = 0;
    } else if (VECTOR(gff->type)[idx1] == SPLICING_TYPE_EXON) {
      VECTOR(*isolength)[pos] +=
	(VECTOR(gff->end)[idx1] - VECTOR(gff->start)[idx1] + 1);
    }
  }

  return 0;
}
示例#3
0
文件: miso.c 项目: mlovci/MISO
int splicing_score_joint(const splicing_vector_int_t *assignment,
			 int no_reads, const splicing_vector_t *psi, 
			 const splicing_vector_t *hyper, 
			 const splicing_vector_int_t *effisolen,
			 const splicing_vector_t *isoscores, 
			 double *score) {

  int i, noiso = splicing_vector_int_size(effisolen);
  double readProb = 0.0, assProb, psiProb;
  
  /* Scores the reads */
  for (i=0; i<no_reads; i++) {
    if (VECTOR(*assignment)[i] != -1) {
      readProb += VECTOR(*isoscores)[ VECTOR(*assignment)[i] ];
    }
  }
  
  /* Score isoforms */
  SPLICING_CHECK(splicing_score_iso(psi, noiso, assignment, no_reads, 
				    effisolen, &assProb));
  SPLICING_CHECK(splicing_ldirichlet(psi, hyper, noiso, &psiProb));

  *score = readProb + assProb + psiProb;
  return 0;
}
示例#4
0
文件: gff.c 项目: mlovci/MISO
int splicing_i_gff_exon_start_end_sort(const splicing_vector_int_t *start,
				       const splicing_vector_int_t *end,
				       const splicing_vector_int_t *idx, 
				       int iso, splicing_vector_int_t *tmp, 
				       splicing_vector_int_t *tmp2) {
  
  int i, j, from=VECTOR(*idx)[iso], to=VECTOR(*idx)[iso+1], len=to-from;  

  SPLICING_CHECK(splicing_vector_int_resize(tmp, len));
  SPLICING_CHECK(splicing_vector_int_resize(tmp2, len));
  for (i=0; i<len; i++) { VECTOR(*tmp)[i]=i; }
  splicing_qsort_r(VECTOR(*tmp), len, sizeof(int), 
		   (void*) (VECTOR(*start)+from),
		   splicing_i_gff_exon_start_end_sort_cmp);
  
  /* Store the order */
  for (i=0, j=from; i<len; i++, j++) { VECTOR(*tmp2)[i]=VECTOR(*start)[j]; }
  for (i=0, j=from; i<len; i++, j++) { 
    VECTOR(*start)[j] = VECTOR(*tmp2)[ VECTOR(*tmp)[i] ];
  }
  for (i=0, j=from; i<len; i++, j++) { VECTOR(*tmp2)[i]=VECTOR(*end)[j]; }
  for (i=0, j=from; i<len; i++, j++) { 
    VECTOR(*end)[j] = VECTOR(*tmp2)[ VECTOR(*tmp)[i] ];
  }

  return 0;
}
示例#5
0
int splicing_gff_noexons_one(const splicing_gff_t *gff, size_t gene,
			     splicing_vector_int_t *noexons) {

  size_t nogenes, idx1, idx2, noiso, pos, il;
  SPLICING_CHECK(splicing_gff_nogenes(gff, &nogenes));
  
  if (gene >= nogenes) {
    SPLICING_ERROR("Invalid gene id", SPLICING_EINVAL);
  }

  idx1=VECTOR(gff->genes)[gene];
  idx2= gene+1 == nogenes ? gff->n : VECTOR(gff->genes)[gene+1];
  
  for (noiso=0; idx1 < idx2; idx1++) {
    if (VECTOR(gff->type)[idx1] == SPLICING_TYPE_MRNA) { noiso += 1; }    
  }

  SPLICING_CHECK(splicing_vector_int_resize(noexons, noiso));

  idx1=VECTOR(gff->genes)[gene];
  idx2= gene+1 == nogenes ? gff->n : VECTOR(gff->genes)[gene+1];
  for (; idx1 < idx2 && VECTOR(gff->type)[idx1] != SPLICING_TYPE_MRNA; 
       idx1++) ;
  idx1++;
  for (pos=0, il=0; idx1 < idx2; idx1++) {
    if (VECTOR(gff->type)[idx1] == SPLICING_TYPE_MRNA) {
      VECTOR(*noexons)[pos++]=il;
      il=0;
    } else if (VECTOR(gff->type)[idx1] == SPLICING_TYPE_EXON) { il++; }
  }
  VECTOR(*noexons)[pos++]=il;

  return 0;
}
示例#6
0
文件: miso.c 项目: mlovci/MISO
int splicing_reassign_samples(const splicing_matrix_t *matches, 
			      const splicing_vector_int_t *match_order,
			      const splicing_vector_t *psi, 
			      int noiso, splicing_vector_int_t *result) {

  int noreads = splicing_matrix_ncol(matches);
  int i, w;
  double *prev, *curr;
  double rand, sumpsi;
  int noValid;
  int *order=VECTOR(*match_order);
  splicing_vector_t cumsum;
  splicing_vector_int_t validIso;  

  SPLICING_CHECK(splicing_vector_init(&cumsum, noiso));
  SPLICING_FINALLY(splicing_vector_destroy, &cumsum);
  SPLICING_CHECK(splicing_vector_int_init(&validIso, noiso));
  SPLICING_FINALLY(splicing_vector_int_destroy, &validIso);

  SPLICING_CHECK(splicing_vector_int_resize(result, noreads));

  if (noreads == 0) { return 0; }  

  prev = curr = &MATRIX(*matches, 0, order[0]);
  CUMSUM();

  for (i=0; i<noreads; i++) {
    curr = &MATRIX(*matches, 0, order[i]);

    /* Maybe we need to update the cumulative sum */
    if (memcmp(prev, curr, sizeof(double)*noiso) != 0) { CUMSUM(); }

    if (noValid == 0) {
      VECTOR(*result)[order[i]] = -1;
    } else if (noValid == 1) {
      VECTOR(*result)[order[i]] = VECTOR(validIso)[0];
    } else if (noValid == 2) { 
      rand = RNG_UNIF01() * sumpsi;
      w = (rand < VECTOR(cumsum)[0]) ? VECTOR(validIso)[0] : 
	VECTOR(validIso)[1];
      VECTOR(*result)[order[i]] = w;
    } else {
      /* Draw */
      rand = RNG_UNIF01() * sumpsi;
      /* TODO: Binary search for interval, if many classes */
      for (w=0; rand > VECTOR(cumsum)[w]; w++) ;
      VECTOR(*result)[order[i]] = VECTOR(validIso)[w];
    }

    prev=curr;
  }

  splicing_vector_int_destroy(&validIso);
  splicing_vector_destroy(&cumsum);
  SPLICING_FINALLY_CLEAN(2);

  return 0;
}
示例#7
0
int splicing_create_gene(const splicing_vector_int_t *exons,
			 const splicing_vector_int_t *isoforms,
			 const char *id, const char *seqid, 
			 const char *source, splicing_strand_t strand,
			 splicing_gff_t *extend) {

  size_t i=0;
  size_t exlen=splicing_vector_int_size(exons);
  size_t isolen=splicing_vector_int_size(isoforms);
  size_t genestart=splicing_vector_int_min(exons);
  size_t geneend=splicing_vector_int_max(exons);
  char buffer[5000], buffer2[5000];
  int noiso=0;
  
  /* TODO: error checks */
  
  /* Gene */
  SPLICING_CHECK(splicing_gff_append(extend, seqid, source, 
				     SPLICING_TYPE_GENE, 
				     genestart, geneend, 
				     /*score=*/ SPLICING_NA_REAL, 
				     strand, /*phase=*/ SPLICING_NA_INTEGER,
				     id, /*parent=*/ 0));

  while (i<isolen) {
    size_t mmin=VECTOR(*exons)[ 2*VECTOR(*isoforms)[i] ];
    size_t mmax=VECTOR(*exons)[ 2*VECTOR(*isoforms)[i] + 1 ];
    size_t j, exon=0;    
    for (j=i+1; VECTOR(*isoforms)[j] >= 0; j++) {
      size_t m1=VECTOR(*exons)[ 2*VECTOR(*isoforms)[j] ];
      size_t m2=VECTOR(*exons)[ 2*VECTOR(*isoforms)[j] + 1 ];      
      if (m1 < mmin) { mmin = m1; }
      if (m2 > mmax) { mmax = m2; }
    }
    snprintf(buffer, sizeof(buffer)/sizeof(char)-sizeof(char), 
	     "%s-isoform-%i", id, noiso);    
    SPLICING_CHECK(splicing_gff_append(extend, seqid, source, 
				       SPLICING_TYPE_MRNA, mmin, mmax, 
				       /*score=*/ SPLICING_NA_REAL, strand,
				       /*phase=*/ SPLICING_NA_INTEGER,
				       buffer, /*parent=*/ id));
    for (; VECTOR(*isoforms)[i] >= 0; i++) {
      snprintf(buffer2, sizeof(buffer2)/sizeof(char)-sizeof(char),
	       "%s-isoform-%i-exon-%i", id, (int) noiso, (int) exon++);
      SPLICING_CHECK(splicing_gff_append(extend, seqid, source, 
			 SPLICING_TYPE_EXON,
			 VECTOR(*exons)[ 2*VECTOR(*isoforms)[i] ],
			 VECTOR(*exons)[ 2*VECTOR(*isoforms)[i] + 1 ],
			 /*score=*/ SPLICING_NA_REAL, strand,
			 /*phase=*/ SPLICING_NA_INTEGER, buffer2, 
			 /*parent=*/ buffer));
    }
    noiso++;
    i++;
  }
  
  return 0;
}
示例#8
0
int splicing_i_gff_constitutive_exons_all(const splicing_gff_t *gff,
					  splicing_exonset_t *exons,
					  int min_length) {

  size_t g, nogenes;
  splicing_vector_int_t events;

  SPLICING_CHECK(splicing_gff_nogenes(gff, &nogenes));

  SPLICING_CHECK(splicing_exonset_init(exons, 0));
  SPLICING_FINALLY(splicing_exonset_destroy, exons);
  SPLICING_CHECK(splicing_vector_int_init(&events, 0));
  SPLICING_FINALLY(splicing_vector_int_destroy, &events);
  
  for (g=0; g<nogenes; g++) {
    const char *seqid=
      splicing_strvector_get(&gff->seqids, VECTOR(gff->seqid)[g]);
    int noex, idx, noEvents, i;
    size_t noiso;
    int start=VECTOR(gff->genes)[g];
    int end= g+1 < nogenes ? VECTOR(gff->genes)[g+1] : gff->n;
    splicing_gff_noiso_one(gff, g, &noiso);
    
    /* Collect and sort all events */
    splicing_vector_int_clear(&events);
    for (idx=start+1; idx<end; idx++) {
      if (VECTOR(gff->type)[idx] == SPLICING_TYPE_EXON) {
	SPLICING_CHECK(splicing_vector_int_push_back2
		       (&events, VECTOR(gff->start)[idx],
			-VECTOR(gff->end)[idx]));
      }
    }
    noEvents=splicing_vector_int_size(&events);
    splicing_qsort(VECTOR(events), noEvents, sizeof(int),
		   splicing_i_const_cmp);

    /* Now go over the sorted events and extract the constitutive exons */
    for (noex=0, i=0; i<noEvents; i++) {
      int ev=VECTOR(events)[i];
      if (ev > 0) { noex++; } 
      if (ev < 0) { 
	int prev=VECTOR(events)[i-1];
	if (noex == noiso && (-ev)-prev+1 >= min_length) {
	  /* constitutive exon */
	  SPLICING_CHECK(splicing_exonset_append(exons, seqid, prev, -ev));
	}
	noex--;
      }
    }
  }

  splicing_vector_int_destroy(&events);
  SPLICING_FINALLY_CLEAN(2);	/* + exons */

  return 0;
}
示例#9
0
int splicing_i_gff_constitutive_exons_full(const splicing_gff_t *gff,
					   splicing_exonset_t *exons,
					   int min_length) {
  size_t g, nogenes;
  splicing_vector_int_t events;

  SPLICING_CHECK(splicing_gff_nogenes(gff, &nogenes));
  
  SPLICING_CHECK(splicing_exonset_init(exons, 1));
  SPLICING_FINALLY(splicing_exonset_destroy, exons);
  SPLICING_CHECK(splicing_vector_int_init(&events, 0));
  SPLICING_FINALLY(splicing_vector_int_destroy, &events);
  
  for (g=0; g<nogenes; g++) {
    const char *seqid=
      splicing_strvector_get(&gff->seqids, VECTOR(gff->seqid)[g]);
    int i, idx, noExons, noSame;
    size_t noiso;
    int start=VECTOR(gff->genes)[g];
    int end= g+1 < nogenes ? VECTOR(gff->genes)[g+1] : gff->n;
    splicing_gff_noiso_one(gff, g, &noiso);
    
    /* Collect and sort all events */
    splicing_vector_int_clear(&events);
    for (idx=start+1; idx<end; idx++) {
      if (VECTOR(gff->type)[idx] == SPLICING_TYPE_EXON) {
	SPLICING_CHECK(splicing_vector_int_push_back2
		       (&events, VECTOR(gff->start)[idx],
			VECTOR(gff->end)[idx]));
      }
    }
    noExons=splicing_vector_int_size(&events)/2;
    splicing_qsort(VECTOR(events), noExons, sizeof(int)*2,
		   splicing_i_const_cmp2);
    
    /* Now go over them and check how many times each exon appears */
    for (noSame=1, i=2; i<noExons*2; i+=2) { 
      int start=VECTOR(events)[i];
      int end=VECTOR(events)[i+1];
      if (start == VECTOR(events)[i-2] && VECTOR(events)[i-1] == end) {
	noSame++;
      } else {
	noSame=1;
      }
      if (noSame == noiso) {
	SPLICING_CHECK(splicing_exonset_append(exons, seqid, start, end));
      }
    }
  }
  
  splicing_vector_int_destroy(&events);
  SPLICING_FINALLY_CLEAN(2);	/* + exons */

  return 0;
}
示例#10
0
文件: gff.c 项目: mlovci/MISO
int splicing_gff_fprint(const splicing_gff_t *gff, 
			FILE *outfile) {

  size_t i, n;
  SPLICING_CHECK(splicing_gff_nogenes(gff, &n));
  for (i=0; i<n; i++) {
    SPLICING_CHECK(splicing_gff_fprint_gene(gff, outfile, i));
  }
  
  return 0;
}
示例#11
0
int splicing_exonset_init(splicing_exonset_t *ex, size_t size) {
  SPLICING_CHECK(splicing_strvector_init(&ex->seqids, 0));
  SPLICING_FINALLY(splicing_strvector_destroy, &ex->seqids);
  SPLICING_CHECK(splicing_vector_int_init(&ex->seqid, 0));
  SPLICING_FINALLY(splicing_vector_int_destroy, &ex->seqid);
  SPLICING_CHECK(splicing_vector_int_init(&ex->start, 0));
  SPLICING_FINALLY(splicing_vector_int_destroy, &ex->start);
  SPLICING_CHECK(splicing_vector_int_init(&ex->end, 0));
  SPLICING_FINALLY(splicing_vector_int_destroy, &ex->end);
  SPLICING_FINALLY_CLEAN(4);
  return 0;
}
示例#12
0
文件: gff.c 项目: mlovci/MISO
int splicing_gff_exon_start_end(const splicing_gff_t *gff, 
				splicing_vector_int_t *start,
				splicing_vector_int_t *end,
				splicing_vector_int_t *idx,
				int gene) {
  
  size_t noiso;
  int i=0, p=0, n=splicing_gff_size(gff);
  int pos;
  size_t nogenes;
  splicing_vector_int_t tmp, tmp2;

  SPLICING_CHECK(splicing_vector_int_init(&tmp, 10));
  SPLICING_FINALLY(splicing_vector_int_destroy, &tmp);
  SPLICING_CHECK(splicing_vector_int_init(&tmp2, 10));
  SPLICING_FINALLY(splicing_vector_int_destroy, &tmp2);

  SPLICING_CHECK(splicing_gff_nogenes(gff, &nogenes));
  if (gene < 0 || gene >= nogenes) { 
    SPLICING_ERROR("Invalid gene id", SPLICING_EINVAL);
  }

  pos=VECTOR(gff->genes)[gene]+1;
  
  SPLICING_CHECK(splicing_gff_noiso_one(gff, gene, &noiso));
  splicing_vector_int_clear(start);
  splicing_vector_int_clear(end);
  SPLICING_CHECK(splicing_vector_int_resize(idx, noiso+1));
  while (pos < n) {
    if (VECTOR(gff->type)[pos] == SPLICING_TYPE_EXON) { 
      int s=VECTOR(gff->start)[pos];
      int e=VECTOR(gff->end)[pos];
      SPLICING_CHECK(splicing_vector_int_push_back(start, s)); p++;
      SPLICING_CHECK(splicing_vector_int_push_back(end, e));
    } else if (VECTOR(gff->type)[pos] == SPLICING_TYPE_MRNA) {
      VECTOR(*idx)[i] = p;
      if (i!=0) { 
	SPLICING_CHECK(splicing_i_gff_exon_start_end_sort(start, end, idx, 
							  i-1, &tmp, &tmp2));
      }
      i++;
    } else if (VECTOR(gff->type)[pos] == SPLICING_TYPE_GENE) {
      break;
    }
    pos++;
  }
  VECTOR(*idx)[i] = p;
  SPLICING_CHECK(splicing_i_gff_exon_start_end_sort(start, end, idx, i-1, 
						    &tmp, &tmp2));

  splicing_vector_int_destroy(&tmp2);
  splicing_vector_int_destroy(&tmp);
  SPLICING_FINALLY_CLEAN(1);

  return 0;
}
示例#13
0
int splicing_genomic_to_iso_1(const splicing_gff_t *gff, size_t gene,
			      int isoform, int position, 
			      const splicing_gff_converter_t *converter,
			      int *result) {

  size_t startpos, endpos, ex;
  splicing_gff_converter_t vconverter, 
    *myconverter = (splicing_gff_converter_t*) converter;
  
  if (!converter) { 
    myconverter=&vconverter;
    SPLICING_CHECK(splicing_gff_converter_init(gff, gene, myconverter));
    SPLICING_FINALLY(splicing_gff_converter_destroy, myconverter);
  }

  startpos=VECTOR(myconverter->exidx)[isoform];
  endpos=VECTOR(myconverter->exidx)[isoform+1];
  for (ex=startpos; 
       ex < endpos && VECTOR(myconverter->exend)[ex] < position; 
       ex++) ;
  if (ex < endpos && VECTOR(myconverter->exstart)[ex] <= position && 
      position <= VECTOR(myconverter->exend)[ex]) {
    *result = position - VECTOR(myconverter->shift)[ex];
  } else { 
    *result = -1;
  }

  if (!converter) { 
    splicing_gff_converter_destroy(myconverter);
    SPLICING_FINALLY_CLEAN(1);
  }
  
  return 0;
}
示例#14
0
文件: gff.c 项目: mlovci/MISO
int splicing_gff_gene_start_end(const splicing_gff_t *gff, 
				splicing_vector_int_t *start,
				splicing_vector_int_t *end) {

  size_t i, nogenes=splicing_vector_int_size(&gff->genes);
  
  SPLICING_CHECK(splicing_vector_int_resize(start, nogenes));
  SPLICING_CHECK(splicing_vector_int_resize(end, nogenes));
  
  for (i=0; i<nogenes; i++) {
    size_t idx=VECTOR(gff->genes)[i];
    VECTOR(*start)[i] = VECTOR(gff->start)[idx];
    VECTOR(*end)[i] = VECTOR(gff->end)[idx];
  }
  
  return 0;
}
示例#15
0
int splicing_gff_converter_init(const splicing_gff_t *gff, size_t gene,
				splicing_gff_converter_t *converter) {

  int i; 

  SPLICING_CHECK(splicing_gff_noiso_one(gff, gene, &converter->noiso));

  SPLICING_VECTOR_INT_INIT_FINALLY(&converter->exstart, 0);
  SPLICING_VECTOR_INT_INIT_FINALLY(&converter->exend, 0);
  SPLICING_VECTOR_INT_INIT_FINALLY(&converter->exidx, 0);
  SPLICING_VECTOR_INT_INIT_FINALLY(&converter->shift, 0);
  SPLICING_VECTOR_INT_INIT_FINALLY(&converter->exlim, 0);
  
  SPLICING_CHECK(splicing_gff_exon_start_end(gff, &converter->exstart, 
					     &converter->exend, 
					     &converter->exidx, gene));

  /* Calculate the shift */
  for (i=0; i < converter->noiso; i++) {
    size_t cs=0, ce=0, ex=0;
    int pos=VECTOR(converter->exidx)[i], pos2=VECTOR(converter->exidx)[i+1];
    while (pos < pos2) {
      cs += VECTOR(converter->exstart)[pos];
      SPLICING_CHECK(splicing_vector_int_push_back(&converter->shift, 
						   cs-ce-ex-1));
      ex++; ce += VECTOR(converter->exend)[pos]; pos++;
    }
  }
  
  /* Calculate the exlim */
  for (i=0; i < converter->noiso; i++) { 
    size_t cs=0;
    int pos=VECTOR(converter->exidx)[i], pos2=VECTOR(converter->exidx)[i+1];
    while (pos < pos2) {
      size_t l=
	VECTOR(converter->exend)[pos] - VECTOR(converter->exstart)[pos]+1;
      cs += l;
      SPLICING_CHECK(splicing_vector_int_push_back(&converter->exlim, cs+1));
      pos++;
    }
  }

  SPLICING_FINALLY_CLEAN(5);

  return 0;
}
示例#16
0
int splicing_exonset_append(splicing_exonset_t *ex, const char *seqid, 
			    int start, int end) {
  
  size_t idx;
  int seen=splicing_strvector_search(&ex->seqids, seqid, &idx);
  if (seen) { 
    SPLICING_CHECK(splicing_vector_int_push_back(&ex->seqid, idx));
  } else {
    size_t size=splicing_strvector_size(&ex->seqids);
    SPLICING_CHECK(splicing_strvector_append(&ex->seqids, seqid));
    SPLICING_CHECK(splicing_vector_int_push_back(&ex->seqid, size));
  }
  SPLICING_CHECK(splicing_vector_int_push_back(&ex->start, start));
  SPLICING_CHECK(splicing_vector_int_push_back(&ex->end, end));

  return 0;
}
示例#17
0
int splicing_genomic_to_iso(const splicing_gff_t *gff, size_t gene,
			    const splicing_vector_int_t *position, 
			    const splicing_gff_converter_t *converter,
			    splicing_matrix_int_t *isopos) {

  size_t r, i, noreads=splicing_vector_int_size(position);
  splicing_gff_converter_t vconverter, 
    *myconverter = (splicing_gff_converter_t*) converter;
  
  if (!converter) { 
    myconverter=&vconverter;
    SPLICING_CHECK(splicing_gff_converter_init(gff, gene, myconverter));
    SPLICING_FINALLY(splicing_gff_converter_destroy, myconverter);
  }

  SPLICING_CHECK(splicing_matrix_int_resize(isopos, myconverter->noiso, 
					    noreads));
  
  for (r=0; r<noreads; r++) {
    for (i=0; i<myconverter->noiso; i++) {
      size_t pos=VECTOR(*position)[r];
      size_t startpos=VECTOR(myconverter->exidx)[i];
      size_t endpos=VECTOR(myconverter->exidx)[i+1];
      int ex;
      for (ex=startpos; 
	   ex < endpos && VECTOR(myconverter->exend)[ex] < pos; 
	   ex++) ;
      if (ex < endpos && VECTOR(myconverter->exstart)[ex] <= pos && 
	  pos <= VECTOR(myconverter->exend)[ex]) {
	MATRIX(*isopos, i, r) = VECTOR(*position)[r] - 
	  VECTOR(myconverter->shift)[ex];
      } else { 
	MATRIX(*isopos, i, r) = -1;
      }
    }
  }

  if (!converter) { 
    splicing_gff_converter_destroy(myconverter);
    SPLICING_FINALLY_CLEAN(1);
  }

  return 0;
}
示例#18
0
int splicing_iso_to_genomic_all(const splicing_gff_t *gff, size_t gene,
				int position, 
				const splicing_gff_converter_t *converter,
				splicing_vector_int_t *result) {

  size_t i;
  splicing_gff_converter_t vconverter, 
    *myconverter = (splicing_gff_converter_t*) converter;

  if (position < 1) { 
    SPLICING_ERROR("Invalid isoform coordinate, must the larger than zero", 
		   SPLICING_EINVAL);
  }

  if (!converter) { 
    myconverter=&vconverter;
    SPLICING_CHECK(splicing_gff_converter_init(gff, gene, myconverter));
    SPLICING_FINALLY(splicing_gff_converter_destroy, myconverter);
  }

  SPLICING_CHECK(splicing_vector_int_resize(result, myconverter->noiso));

  /* TODO: find impossible positions */
  for (i=0; i<myconverter->noiso; i++) {
    int ex;
    for (ex=VECTOR(myconverter->exidx)[i]; 
	 ex < VECTOR(myconverter->exidx)[i+1] && 
	   VECTOR(myconverter->exlim)[ex] <= position; 
	 ex++) ;
    if (ex < VECTOR(myconverter->exidx)[i+1]) {
      VECTOR(*result)[i] = position + VECTOR(myconverter->shift)[ex];
    } else {
      VECTOR(*result)[i] = -1;
    }
  }

  if (!converter) {
    splicing_gff_converter_destroy(myconverter);
    SPLICING_FINALLY_CLEAN(1);
  }
  
  return 0;
}
示例#19
0
文件: gff.c 项目: mlovci/MISO
int splicing_genomic_to_iso(const splicing_gff_t *gff, size_t gene,
			    const splicing_vector_int_t *position, 
			    splicing_matrix_int_t *isopos) {

  size_t r, i, noiso, noreads=splicing_vector_int_size(position);
  splicing_vector_int_t exstart, exend, exidx, shift;
  
  splicing_gff_noiso_one(gff, gene, &noiso);
  
  SPLICING_CHECK(splicing_vector_int_init(&exstart, 0));
  SPLICING_FINALLY(splicing_vector_int_destroy, &exstart);
  SPLICING_CHECK(splicing_vector_int_init(&exend, 0));
  SPLICING_FINALLY(splicing_vector_int_destroy, &exend);
  SPLICING_CHECK(splicing_vector_int_init(&exidx, 0));
  SPLICING_FINALLY(splicing_vector_int_destroy, &exidx);
  SPLICING_CHECK(splicing_gff_exon_start_end(gff, &exstart, &exend,
					     &exidx, gene));

  SPLICING_CHECK(splicing_vector_int_init(&shift, 0));
  SPLICING_FINALLY(splicing_vector_int_destroy, &shift);
  
  for (i=0; i<noiso; i++) {
    size_t cs=0, ce=0, ex=0;
    int pos=VECTOR(exidx)[i], pos2=VECTOR(exidx)[i+1];
    while (pos < pos2) {
      cs += VECTOR(exstart)[pos];
      SPLICING_CHECK(splicing_vector_int_push_back(&shift, cs-ce-ex-1));
      ex++; ce += VECTOR(exend)[pos]; pos++;
    }
  }

  SPLICING_CHECK(splicing_matrix_int_resize(isopos, noiso, noreads));
  
  for (r=0; r<noreads; r++) {
    for (i=0; i<noiso; i++) {
      size_t pos=VECTOR(*position)[r];
      size_t startpos=VECTOR(exidx)[i];
      size_t endpos=VECTOR(exidx)[i+1];
      int ex;
      for (ex=startpos; ex < endpos && VECTOR(exend)[ex] < pos; ex++) ;
      if (VECTOR(exstart)[ex] <= pos && pos <= VECTOR(exend)[ex]) {
	MATRIX(*isopos, i, r) = VECTOR(*position)[r] - VECTOR(shift)[ex];
      } else { 
	MATRIX(*isopos, i, r) = -1;
      }
    }
  }

  splicing_vector_int_destroy(&shift);
  splicing_vector_int_destroy(&exidx);
  splicing_vector_int_destroy(&exend);
  splicing_vector_int_destroy(&exstart);
  SPLICING_FINALLY_CLEAN(4);

  return 0;
}
示例#20
0
文件: gff.c 项目: mlovci/MISO
int splicing_i_gff_noiso_one(const splicing_gff_t *gff, size_t gene,
			     size_t *noiso, splicing_vector_int_t *isolen) {

  size_t nogenes, idx1, idx2;
  SPLICING_CHECK(splicing_gff_nogenes(gff, &nogenes));
  
  if (gene < 0 || gene >= nogenes) {
    SPLICING_ERROR("Invalid gene id", SPLICING_EINVAL);
  }

  idx1=VECTOR(gff->genes)[gene];
  idx2= gene+1 == nogenes ? gff->n : VECTOR(gff->genes)[gene+1];
  
  *noiso = 0;
  for ( ; idx1 < idx2; idx1++) {
    if (VECTOR(gff->type)[idx1] == SPLICING_TYPE_MRNA) { *noiso += 1; }
  }

  if (isolen) {
    size_t il=0, pos=0;
    SPLICING_CHECK(splicing_vector_int_resize(isolen, *noiso));
    idx1=VECTOR(gff->genes)[gene];
    idx2= gene+1 == nogenes ? gff->n : VECTOR(gff->genes)[gene+1];
    
    for (; idx1 < idx2 && VECTOR(gff->type)[idx1] != SPLICING_TYPE_MRNA; 
	 idx1++) ;
    idx1++;
    for (; idx1 < idx2; idx1++) {
      if (VECTOR(gff->type)[idx1] == SPLICING_TYPE_MRNA) { 
	VECTOR(*isolen)[pos++]=il;
	il = 0;
      } else if (VECTOR(gff->type)[idx1] == SPLICING_TYPE_EXON) {
	il += VECTOR(gff->end)[idx1] - VECTOR(gff->start)[idx1] + 1;
      }
    }
    VECTOR(*isolen)[pos++]=il;
  }
  
  return 0;
}
示例#21
0
文件: gff.c 项目: mlovci/MISO
int splicing_gff_noiso(const splicing_gff_t *gff, 
		       splicing_vector_int_t *noiso) {

  size_t nogenes, idx1, idx2, pos=0;

  SPLICING_CHECK(splicing_gff_nogenes(gff, &nogenes));
  idx1=VECTOR(gff->genes)[0];
  idx2=gff->n;
  
  SPLICING_CHECK(splicing_vector_int_resize(noiso, nogenes));
  splicing_vector_int_null(noiso);
  if (VECTOR(gff->type)[idx1] == SPLICING_TYPE_GENE) { idx1++; }
  for (; idx1 < gff->n; idx1++) { 
    if (VECTOR(gff->type)[idx1] == SPLICING_TYPE_MRNA) { 
      VECTOR(*noiso)[pos] += 1;
    } else if (VECTOR(gff->type)[idx1] == SPLICING_TYPE_GENE) {
      pos++;
    }
  }

  return 0;
}
示例#22
0
文件: miso.c 项目: mlovci/MISO
int splicing_drift_proposal(int mode, 
			    const splicing_vector_t *psi, 
			    const splicing_vector_t *alpha, 
			    double sigma, 
			    const splicing_vector_t *otherpsi, 
			    const splicing_vector_t *otheralpha, int noiso,
			    splicing_vector_t *respsi, 
			    splicing_vector_t *resalpha,
			    double *ressigma, double *resscore) {

  switch (mode) {
  case 0: 			/* init */
    {
      SPLICING_CHECK(splicing_vector_resize(respsi, noiso));
      SPLICING_CHECK(splicing_vector_resize(resalpha, noiso-1));
      if (noiso != 2) {
	int i;
	for (i=0; i<noiso; i++) {	
	  VECTOR(*respsi)[i] = 1.0/noiso; 
	}
	for (i=0; i<noiso-1; i++) { 
	  VECTOR(*resalpha)[i] = 1.0/(noiso-1);
	}
	*ressigma = 0.05;
      } else {
	VECTOR(*respsi)[0] = RNG_UNIF01();
	VECTOR(*respsi)[1] = 1 - VECTOR(*respsi)[0];
	VECTOR(*resalpha)[0] = 0.0;
	VECTOR(*resalpha)[1] = 0.0;
	*ressigma = 0.05;
      }
    }
    break;
  case 1:			/* propose */
    {
      int len=noiso-1;
      double sumpsi=0.0;
  
      SPLICING_CHECK(splicing_vector_reserve(respsi, len+1));
      SPLICING_CHECK(splicing_mvrnorm(alpha, sigma, resalpha, len));
      SPLICING_CHECK(splicing_logit_inv(resalpha, respsi, len));
      sumpsi = splicing_vector_sum(respsi);
      SPLICING_CHECK(splicing_vector_resize(respsi, len+1));
      VECTOR(*respsi)[len] = 1-sumpsi;
    }
    break;
  case 2: 			/* score */
    SPLICING_CHECK(splicing_mvplogisnorm(psi, otheralpha, sigma, noiso-1, 
					 resscore));
    break;
  }
  
  return 0;
}
示例#23
0
int splicing_genomic_to_iso_all(const splicing_gff_t *gff, size_t gene,
				int position, 
				const splicing_gff_converter_t *converter,
				splicing_vector_int_t *result) {

  int i;
  splicing_gff_converter_t vconverter, 
    *myconverter = (splicing_gff_converter_t*) converter;
  
  if (!converter) { 
    myconverter=&vconverter;
    SPLICING_CHECK(splicing_gff_converter_init(gff, gene, myconverter));
    SPLICING_FINALLY(splicing_gff_converter_destroy, myconverter);
  }

  SPLICING_CHECK(splicing_vector_int_resize(result, myconverter->noiso));
  
  for (i=0; i<myconverter->noiso; i++) {
    size_t startpos=VECTOR(myconverter->exidx)[i];
    size_t endpos=VECTOR(myconverter->exidx)[i+1];
    int ex;
    for (ex=startpos; 
	 ex < endpos && VECTOR(myconverter->exend)[ex] < position; 
	 ex++) ;
    if (ex < endpos && VECTOR(myconverter->exstart)[ex] <= position && 
	position <= VECTOR(myconverter->exend)[ex]) {
      VECTOR(*result)[i] = position - VECTOR(myconverter->shift)[ex];
    } else { 
      VECTOR(*result)[i] = -1;
    }
  }

  if (!converter) { 
    splicing_gff_converter_destroy(myconverter);
    SPLICING_FINALLY_CLEAN(1);
  }
  
  return 0;
}
示例#24
0
文件: miso.c 项目: mlovci/MISO
int splicing_mvrnorm(const splicing_vector_t *mu, double sigma, 
		     splicing_vector_t *resalpha, int len) {
  int i;
  double sqrtsigma = len == 1 ? sigma : sqrt(sigma);

  SPLICING_CHECK(splicing_vector_resize(resalpha, len));

  for (i=0; i<len; i++) {
    VECTOR(*resalpha)[i] = VECTOR(*mu)[i] + sqrtsigma * RNG_NORMAL(0,1);
  }

  return 0;
}
示例#25
0
文件: miso.c 项目: mlovci/MISO
int splicing_metropolis_hastings_ratio(const splicing_vector_int_t *ass,
				       int no_reads,
				       const splicing_vector_t *psiNew,
				       const splicing_vector_t *alphaNew,
				       const splicing_vector_t *psi, 
				       const splicing_vector_t *alpha,
				       double sigma,
				       int noiso, 
				       const splicing_vector_int_t *effisolen,
				       const splicing_vector_t *hyperp, 
				       const splicing_vector_t *isoscores,
				       int full, double *acceptP, 
				       double *pcJS, double *ppJS) {
  double pJS, cJS, ptoCS, ctoPS;

  SPLICING_CHECK(splicing_score_joint(ass, no_reads, psiNew, hyperp,
				      effisolen, isoscores, &pJS));
  SPLICING_CHECK(splicing_score_joint(ass, no_reads, psi, hyperp,
				      effisolen, isoscores, &cJS));
  
  SPLICING_CHECK(splicing_drift_proposal(/* mode= */ 2, psi, alpha, sigma, 
					 psiNew, alphaNew, noiso, 0, 0, 0,
					 &ptoCS));
  SPLICING_CHECK(splicing_drift_proposal(/* mode= */ 2, psiNew, alphaNew,
					 sigma, psi, alpha, noiso, 0, 0, 0,
					 &ctoPS));
  
  if (full) {
    *acceptP = exp(pJS + ptoCS - (cJS + ctoPS));
  } else {
    *acceptP = exp(pJS - cJS);
  }

  *pcJS = cJS;
  *ppJS = pJS;

  return 0;
}
示例#26
0
文件: gff.c 项目: mlovci/MISO
int splicing_gff_fprint_gene(const splicing_gff_t *gff, 
			     FILE *outfile, int gene) {

  size_t nogenes, noiso;
  int i, j;
  splicing_vector_int_t start, end, idx;

  SPLICING_CHECK(splicing_gff_nogenes(gff, &nogenes));
  
  if (gene < 0 || gene >= nogenes) { 
    SPLICING_ERROR("Invalid gene ID", SPLICING_EINVAL);
  }

  SPLICING_CHECK(splicing_vector_int_init(&start, 0));
  SPLICING_FINALLY(splicing_vector_int_destroy, &start);
  SPLICING_CHECK(splicing_vector_int_init(&end, 0));
  SPLICING_FINALLY(splicing_vector_int_destroy, &end);
  SPLICING_CHECK(splicing_vector_int_init(&idx, 0));  
  SPLICING_FINALLY(splicing_vector_int_destroy, &idx);

  SPLICING_CHECK(splicing_gff_exon_start_end(gff, &start, &end, &idx, gene));
  noiso = splicing_vector_int_size(&idx)-1;
  
  fprintf(outfile, "===\nGene with %i isoforms:\n", (int) noiso);
  for (i=0; i<noiso; i++) {
    fprintf(outfile, "  Isoform %i:\n", i);
    for (j=VECTOR(idx)[i]; j<VECTOR(idx)[i+1]; j++) {
      fprintf(outfile, "    %i-%i\n", VECTOR(start)[j], VECTOR(end)[j]);
    }
  }
  
  splicing_vector_int_destroy(&idx);
  splicing_vector_int_destroy(&end);
  splicing_vector_int_destroy(&start);
  SPLICING_FINALLY_CLEAN(3);
  
  return 0;    
}
示例#27
0
文件: gff.c 项目: mlovci/MISO
int splicing_gff_reserve(splicing_gff_t *gff, size_t size) {

  SPLICING_CHECK(splicing_vector_int_reserve(&gff->type, size));
  SPLICING_CHECK(splicing_vector_int_reserve(&gff->start, size));
  SPLICING_CHECK(splicing_vector_int_reserve(&gff->end, size));
  SPLICING_CHECK(splicing_vector_reserve(&gff->score, size));
  SPLICING_CHECK(splicing_vector_int_reserve(&gff->phase, size));
  SPLICING_CHECK(splicing_vector_int_reserve(&gff->parent, size));

  return 0;
}
示例#28
0
文件: miso.c 项目: mlovci/MISO
int splicing_logit_inv(const splicing_vector_t *x, 
		       splicing_vector_t *res, int len) {
  int i;
  double sumexp=0.0;

  SPLICING_CHECK(splicing_vector_resize(res, len));

  for (i=0; i<len; i++) {
    sumexp += exp(VECTOR(*x)[i]);
  }
  sumexp += 1.0;
  
  for (i=0; i<len; i++) {
    VECTOR(*res)[i] = exp(VECTOR(*x)[i]) / sumexp;
  }
  
  return 0;
}
示例#29
0
文件: miso.c 项目: mlovci/MISO
int splicing_score_iso(const splicing_vector_t *psi, int noiso, 
		       const splicing_vector_int_t *assignment, int noreads,
		       const splicing_vector_int_t *peffisolen, double *res) {
  int *effisolen = VECTOR(*peffisolen);
  double sum, maxpsieff, score;
  splicing_vector_t logpsi;
  int i;

  SPLICING_CHECK(splicing_vector_init(&logpsi, noiso));
  SPLICING_FINALLY(splicing_vector_destroy, &logpsi);

  /* Calculate the normalization factor */
  VECTOR(logpsi)[0] = log(VECTOR(*psi)[0]) + log(effisolen[0]);
  for (maxpsieff=VECTOR(logpsi)[0], i=1; i<noiso; i++) {
    VECTOR(logpsi)[i] = log(VECTOR(*psi)[i]) + log(effisolen[i]);
    if (VECTOR(logpsi)[i] > maxpsieff) { maxpsieff = VECTOR(logpsi)[i]; }
  }
  for (sum=0.0, i=0; i<noiso; i++) {
    sum += exp(VECTOR(logpsi)[i]-maxpsieff);
  }
  sum = log(sum) + maxpsieff;
  
  /* Normalize */
  for (i=0; i<noiso; i++) {
    VECTOR(logpsi)[i] -= sum;
  }
  
  /* Calculate score, based on assignments */
  for (score=0.0, i=0; i<noreads; i++) {
    if (VECTOR(*assignment)[i] != -1) {
      score += VECTOR(logpsi)[ VECTOR(*assignment)[i] ];
    }
  }

  splicing_vector_destroy(&logpsi);
  SPLICING_FINALLY_CLEAN(1);

  *res = score;
  return 0;
}
示例#30
0
int splicing_normal_fragment(double normalMean, double normalVar, 
			     double numDevs, int minLength, 
			     splicing_vector_t *fragmentProb,
			     int *fragmentStart) {

  double normalSd=sqrt(normalVar);
  int fragmentEnd;
  int i, j;

  *fragmentStart = normalMean - normalSd * numDevs;
  fragmentEnd    = normalMean + normalSd * numDevs;
  if (*fragmentStart < minLength) { *fragmentStart = minLength; }
  if (fragmentEnd    < *fragmentStart) { fragmentEnd = *fragmentStart; }
  
  SPLICING_CHECK(splicing_vector_resize(fragmentProb,
					fragmentEnd - *fragmentStart + 1));
  for (i=*fragmentStart, j=0; i<=fragmentEnd; i++, j++) {
    VECTOR(*fragmentProb)[j] = splicing_dnorm(i, normalMean, normalSd);
  }
  
  return 0;
}