Beispiel #1
0
int splicing_vector_rank(const splicing_vector_t *v, splicing_vector_t *res,
		       long int nodes) {
  
  splicing_vector_t rad;
  splicing_vector_t ptr;
  long int edges = splicing_vector_size(v);
  long int i, c=0;
  
  SPLICING_VECTOR_INIT_FINALLY(&rad, nodes);
  SPLICING_VECTOR_INIT_FINALLY(&ptr, edges);
  splicing_vector_resize(res, edges);
	       
  for (i=0; i<edges; i++) {
    long int elem=VECTOR(*v)[i];
    VECTOR(ptr)[i] = VECTOR(rad)[elem];
    VECTOR(rad)[elem] = i+1;
  }
  
  for (i=0; i<nodes; i++) {
    long int p=VECTOR(rad)[i];
    while (p != 0) {      
      VECTOR(*res)[p-1]=c++;
      p=VECTOR(ptr)[p-1];
    }
  }

  splicing_vector_destroy(&ptr);
  splicing_vector_destroy(&rad);
  SPLICING_FINALLY_CLEAN(2);
  return 0;
}
Beispiel #2
0
PyObject *pysplicing_from_vector(const splicing_vector_t *v) {
  int i, n=splicing_vector_size(v);
  PyObject *o=PyTuple_New(n);
  for (i=0; i<n; i++) {
    PyObject *it=PyFloat_FromDouble(VECTOR(*v)[i]);
    PyTuple_SetItem(o, i, it);
  }
  return o;
}
Beispiel #3
0
int splicing_vector_round(const splicing_vector_t *from, splicing_vector_long_t *to) {
  long int i, n=splicing_vector_size(from);
  
  splicing_vector_long_resize(to, n);
  for (i=0; i<n; i++) {
    VECTOR(*to)[i] = round(VECTOR(*from)[i]);
  }
  return 0;
}
Beispiel #4
0
int splicing_vector_order1(const splicing_vector_t* v,
			 splicing_vector_t* res, double nodes) {
  long int edges=splicing_vector_size(v);
  splicing_vector_t ptr;
  splicing_vector_t rad;
  long int i, j;

  assert(v!=NULL);
  assert(v->stor_begin != NULL);

  SPLICING_VECTOR_INIT_FINALLY(&ptr, nodes+1);
  SPLICING_VECTOR_INIT_FINALLY(&rad, edges);
  splicing_vector_resize(res, edges);
  
  for (i=0; i<edges; i++) {
    long int radix=v->stor_begin[i];
    if (VECTOR(ptr)[radix]!=0) {
      VECTOR(rad)[i]=VECTOR(ptr)[radix];
    }
    VECTOR(ptr)[radix]=i+1;
  }
  
  j=0;
  for (i=0; i<nodes+1; i++) {
    if (VECTOR(ptr)[i] != 0) {
      long int next=VECTOR(ptr)[i]-1;
      res->stor_begin[j++]=next;
      while (VECTOR(rad)[next] != 0) {
	next=VECTOR(rad)[next]-1;
	res->stor_begin[j++]=next;
      }
    }
  }
  
  splicing_vector_destroy(&ptr);
  splicing_vector_destroy(&rad);
  SPLICING_FINALLY_CLEAN(2);
  
  return 0;
}
Beispiel #5
0
int splicing_gene_complexity(const splicing_gff_t *gff, size_t gene,
			     int readLength, splicing_complexity_t type,
			     splicing_norm_t norm, int paired,
			     const splicing_vector_t *fragmentProb,
			     int fragmentStart, double normalMean, 
			     double normalVar, double numDevs,
			     double *complexity) {
  
  splicing_matrix_t assignment_matrix;

  SPLICING_CHECK(splicing_matrix_init(&assignment_matrix, 0, 0));
  SPLICING_FINALLY(splicing_matrix_destroy, &assignment_matrix);

  if (!paired) {
    SPLICING_CHECK(splicing_assignment_matrix(gff, gene, readLength, 
					      &assignment_matrix));
  } else {
    SPLICING_CHECK(splicing_paired_assignment_matrix(gff, gene, readLength, 
						     fragmentProb, 
						     fragmentStart,
						     normalMean, normalVar,
						     numDevs, 
						     &assignment_matrix));
  }

  switch (type) {
  case SPLICING_COMPLEXITY_RELATIVE:
    switch (norm) {
      splicing_vector_t values;
      int i, n;

    case SPLICING_NORM_2:

      SPLICING_CHECK(splicing_vector_init(&values, 0));
      SPLICING_FINALLY(splicing_vector_destroy, &values);
      SPLICING_CHECK(splicing_dgesdd(&assignment_matrix, &values));
      n=splicing_vector_size(&values);
      for (i=n-1; i>=0 && VECTOR(values)[i] < 1e-14; i--) ;
      *complexity = VECTOR(values)[0] / VECTOR(values)[i];
      splicing_vector_destroy(&values);
      SPLICING_FINALLY_CLEAN(1);
      break;

    case SPLICING_NORM_1:

      SPLICING_ERROR("One norm not implemented", SPLICING_UNIMPLEMENTED);
      break;

    case SPLICING_NORM_INFINITY:

      SPLICING_ERROR("Infinity norm not implemented", SPLICING_UNIMPLEMENTED);
      break;

    }
    break;
  case SPLICING_COMPLEXITY_ABSOLUTE:
    SPLICING_ERROR("Absolute complexity not implemented", 
		   SPLICING_UNIMPLEMENTED);
    break;
  }

  splicing_matrix_destroy(&assignment_matrix);
  SPLICING_FINALLY_CLEAN(1);

  return 0;
}
Beispiel #6
0
int splicing_simulate_paired_reads(const splicing_gff_t *gff, int gene,
				   const splicing_vector_t *expression,
				   int noreads, int readLength,
				   const splicing_vector_t *fragmentProb,
				   int fragmentStart, double normalMean,
				   double normalVar, double numDevs,
				   splicing_vector_int_t *isoform,
				   splicing_vector_int_t *position,
				   splicing_strvector_t *cigar, 
				   splicing_vector_t *sampleprob) {
  
  size_t i, j, noiso, il, nogenes;
  splicing_vector_t *mysampleprob=sampleprob, vsampleprob;
  splicing_vector_t px, cpx;
  double sumpx, sumpsi=0.0;
  splicing_vector_int_t isolen;
  int goodiso=0;
  splicing_vector_int_t exstart, exend, exidx;
  splicing_vector_t *myfragmentProb=(splicing_vector_t*) fragmentProb,
    vfragmentProb;
  int fs, fl;

  SPLICING_CHECK(splicing_gff_nogenes(gff, &nogenes));
  if (gene < 0 || gene >= nogenes) {
    SPLICING_ERROR("Invalid gene id", SPLICING_EINVAL);
  }

  /* TODO: more error checks */

  if (!fragmentProb) { 
    myfragmentProb=&vfragmentProb;
    SPLICING_CHECK(splicing_vector_init(&vfragmentProb, 0));
    SPLICING_FINALLY(splicing_vector_destroy, &vfragmentProb);
    SPLICING_CHECK(splicing_normal_fragment(normalMean, normalVar, numDevs, 
					    readLength, myfragmentProb,
					    &fragmentStart));
    splicing_vector_scale(myfragmentProb, 
			  1.0/splicing_vector_sum(myfragmentProb));
  }

  il=splicing_vector_size(myfragmentProb);
  fs=fragmentStart;
  fl=fragmentStart+il-1;

  SPLICING_CHECK(splicing_gff_noiso_one(gff, gene, &noiso));
    
  if ( fabs(splicing_vector_sum(myfragmentProb) - 1.0) > 1e-10 ) {
    SPLICING_ERROR("Fragment length distribution does not sum up to 1", 
		   SPLICING_EINVAL);
  }

  SPLICING_CHECK(splicing_vector_int_init(&isolen, noiso));
  SPLICING_FINALLY(splicing_vector_int_destroy, &isolen);
  SPLICING_CHECK(splicing_gff_isolength_one(gff, gene, &isolen));
  
  SPLICING_CHECK(splicing_vector_copy(&px, myfragmentProb));
  SPLICING_FINALLY(splicing_vector_destroy, &px);
  SPLICING_CHECK(splicing_vector_init(&cpx, il));
  SPLICING_FINALLY(splicing_vector_destroy, &cpx);

  if (!sampleprob) {
    mysampleprob=&vsampleprob;
    SPLICING_CHECK(splicing_vector_init(mysampleprob, noiso));
    SPLICING_FINALLY(splicing_vector_destroy, mysampleprob);
  } else {
    SPLICING_CHECK(splicing_vector_resize(mysampleprob, noiso));
  }

  for (sumpx=VECTOR(px)[0], i=1; i<il; i++) {
    VECTOR(px)[i] += VECTOR(px)[i-1];
    sumpx += VECTOR(px)[i];
  }
  VECTOR(cpx)[0] = VECTOR(px)[0];
  for (i=1; i<il; i++) {
    VECTOR(cpx)[i] = VECTOR(cpx)[i-1] + VECTOR(px)[i];
  }

  for (i=0; i<noiso; i++) {
    int ilen=VECTOR(isolen)[i];
    int r1= ilen >= fl ? ilen - fl + 1 : 0;
    int r2= ilen >= fs ? (ilen >= fl ? fl - fs : ilen - fs + 1) : 0;
    /* int r3= fs - 1; */
    double sp=0.0;
    if (r1 > 0) { sp += r1; } 
    if (r2 > 0) { sp += VECTOR(cpx)[r2-1]; }
    VECTOR(*mysampleprob)[i] = sp * VECTOR(*expression)[i];
    if (VECTOR(*mysampleprob)[i] != 0) { goodiso += 1; }
    sumpsi += VECTOR(*mysampleprob)[i];
  }

  if (goodiso == 0) {
    SPLICING_ERROR("No isoform is possible", SPLICING_FAILURE);
  }

  for (i=1; i<noiso; i++) {
    VECTOR(*mysampleprob)[i] += VECTOR(*mysampleprob)[i-1];
  }

  SPLICING_CHECK(splicing_vector_int_resize(isoform, noreads*2));

  for (i=0; i<2*noreads; i+=2) {
    int w;
    double rand;
    if (noiso==1) {
      w=0;
    } else if (noiso==2) {
      rand = RNG_UNIF01() * sumpsi;
      w = (rand < VECTOR(*mysampleprob)[0]) ? 0 : 1;
    } else {
      rand = RNG_UNIF01() * sumpsi;
      for (w=0; rand > VECTOR(*mysampleprob)[w]; w++) ;
    }
    VECTOR(*isoform)[i]=VECTOR(*isoform)[i+1]=w;
  }

  if (!sampleprob) { 
    splicing_vector_destroy(mysampleprob);
    SPLICING_FINALLY_CLEAN(1);
  } else {
    for (i=noiso-1; i>0; i--) {
      VECTOR(*mysampleprob)[i] -= VECTOR(*mysampleprob)[i-1];
    }
  }

  /* We have the isoforms, now get the read positions. */
  
  SPLICING_CHECK(splicing_vector_int_resize(position, noreads*2));
  SPLICING_CHECK(splicing_vector_int_init(&exstart, 0));
  SPLICING_FINALLY(splicing_vector_int_destroy, &exstart);
  SPLICING_CHECK(splicing_vector_int_init(&exend, 0));
  SPLICING_FINALLY(splicing_vector_int_destroy, &exend);
  SPLICING_CHECK(splicing_vector_int_init(&exidx, 0));
  SPLICING_FINALLY(splicing_vector_int_destroy, &exidx);
  SPLICING_CHECK(splicing_gff_exon_start_end(gff, &exstart, &exend, &exidx,
					     gene));
  
  /* Positions in isoform coordinates first. 
     These are sampled based on the fragment length distribution. */

  for (i=0, j=0; i<noreads; i++) {
    int iso=VECTOR(*isoform)[2*i];
    int ilen=VECTOR(isolen)[iso];
    int r1= ilen >= fl ? ilen - fl + 1 : 0;
    int r2= ilen >= fs ? (ilen >= fl ? fl - fs : ilen - fs + 1) : 0;
    /* int r3= fs - 1; */
    int pos, fragment;
    double sp=0.0;
    if (r1 > 0) { sp += r1; } 
    if (r2 > 0) { sp += VECTOR(cpx)[r2-1]; }
    double rand=RNG_UNIF(0, sp);
    if (rand < r1) { 
      pos = ceil(rand);
    } else {
      int w;
      rand -= r1;
      for (w=0; VECTOR(cpx)[w] < rand; w++) ;
      pos = r1 + r2 - w;
    }

    if (pos <= r1) {
      rand=RNG_UNIF(0, 1.0);
    } else {
      rand=RNG_UNIF(0, VECTOR(px)[r1+r2-pos]);
    }
    for (fragment=0; VECTOR(px)[fragment] < rand; fragment++) ;
    fragment += fragmentStart;

    VECTOR(*position)[j++] = pos;
    VECTOR(*position)[j++] = pos+fragment-readLength;
    
  }

  /* Translate positions to genomic coordinates */

  /* TODO: some of this is already calculated */
  SPLICING_CHECK(splicing_iso_to_genomic(gff, gene, isoform, /*converter=*/ 0,
					 position));

  /* CIGAR strings */

  splicing_strvector_clear(cigar);
  SPLICING_CHECK(splicing_strvector_reserve(cigar, 2*noreads));
  for (j=0; j<2*noreads; j++) {
    char tmp[1000], *tmp2=tmp;
    int iso=VECTOR(*isoform)[j];
    size_t rs=VECTOR(*position)[j];
    int ex=0;
    int rl=readLength;
    for (ex=VECTOR(exidx)[iso]; VECTOR(exend)[ex] < rs; ex++) ;
    while (rs + rl - 1 > VECTOR(exend)[ex]) {
      tmp2 += snprintf(tmp2, sizeof(tmp)/sizeof(char)-(tmp2-tmp)-1, "%iM%iN",
		       (int) (VECTOR(exend)[ex]-rs+1), 
		       (int) (VECTOR(exstart)[ex+1]-VECTOR(exend)[ex]-1));
      if (tmp2 >= tmp + sizeof(tmp)/sizeof(char)) {
	SPLICING_ERROR("CIGAR string too long", SPLICING_EINVAL);
      }
      rl -= (VECTOR(exend)[ex] - rs + 1);
      rs = VECTOR(exstart)[ex+1];
      ex++;
    }
    tmp2 += snprintf(tmp2, sizeof(tmp)/sizeof(char)-(tmp2-tmp)-1, "%iM", rl);
    if (tmp2 >= tmp + sizeof(tmp)/sizeof(char)) {
      SPLICING_ERROR("CIGAR string too long", SPLICING_EINVAL);
    }
    SPLICING_CHECK(splicing_strvector_append(cigar, tmp));
  }

  splicing_vector_int_destroy(&exidx);
  splicing_vector_int_destroy(&exend);
  splicing_vector_int_destroy(&exstart);
  splicing_vector_destroy(&cpx);
  splicing_vector_destroy(&px);
  splicing_vector_int_destroy(&isolen);
  SPLICING_FINALLY_CLEAN(6);

  if (!fragmentProb) { 
    splicing_vector_destroy(myfragmentProb); 
    SPLICING_FINALLY_CLEAN(1);
  }

  return 0;
}