Example #1
0
File: gff.c Project: mlovci/MISO
int splicing_gff_exon_start_end(const splicing_gff_t *gff, 
				splicing_vector_int_t *start,
				splicing_vector_int_t *end,
				splicing_vector_int_t *idx,
				int gene) {
  
  size_t noiso;
  int i=0, p=0, n=splicing_gff_size(gff);
  int pos;
  size_t nogenes;
  splicing_vector_int_t tmp, tmp2;

  SPLICING_CHECK(splicing_vector_int_init(&tmp, 10));
  SPLICING_FINALLY(splicing_vector_int_destroy, &tmp);
  SPLICING_CHECK(splicing_vector_int_init(&tmp2, 10));
  SPLICING_FINALLY(splicing_vector_int_destroy, &tmp2);

  SPLICING_CHECK(splicing_gff_nogenes(gff, &nogenes));
  if (gene < 0 || gene >= nogenes) { 
    SPLICING_ERROR("Invalid gene id", SPLICING_EINVAL);
  }

  pos=VECTOR(gff->genes)[gene]+1;
  
  SPLICING_CHECK(splicing_gff_noiso_one(gff, gene, &noiso));
  splicing_vector_int_clear(start);
  splicing_vector_int_clear(end);
  SPLICING_CHECK(splicing_vector_int_resize(idx, noiso+1));
  while (pos < n) {
    if (VECTOR(gff->type)[pos] == SPLICING_TYPE_EXON) { 
      int s=VECTOR(gff->start)[pos];
      int e=VECTOR(gff->end)[pos];
      SPLICING_CHECK(splicing_vector_int_push_back(start, s)); p++;
      SPLICING_CHECK(splicing_vector_int_push_back(end, e));
    } else if (VECTOR(gff->type)[pos] == SPLICING_TYPE_MRNA) {
      VECTOR(*idx)[i] = p;
      if (i!=0) { 
	SPLICING_CHECK(splicing_i_gff_exon_start_end_sort(start, end, idx, 
							  i-1, &tmp, &tmp2));
      }
      i++;
    } else if (VECTOR(gff->type)[pos] == SPLICING_TYPE_GENE) {
      break;
    }
    pos++;
  }
  VECTOR(*idx)[i] = p;
  SPLICING_CHECK(splicing_i_gff_exon_start_end_sort(start, end, idx, i-1, 
						    &tmp, &tmp2));

  splicing_vector_int_destroy(&tmp2);
  splicing_vector_int_destroy(&tmp);
  SPLICING_FINALLY_CLEAN(1);

  return 0;
}
Example #2
0
int splicing_i_gff_constitutive_exons_all(const splicing_gff_t *gff,
					  splicing_exonset_t *exons,
					  int min_length) {

  size_t g, nogenes;
  splicing_vector_int_t events;

  SPLICING_CHECK(splicing_gff_nogenes(gff, &nogenes));

  SPLICING_CHECK(splicing_exonset_init(exons, 0));
  SPLICING_FINALLY(splicing_exonset_destroy, exons);
  SPLICING_CHECK(splicing_vector_int_init(&events, 0));
  SPLICING_FINALLY(splicing_vector_int_destroy, &events);
  
  for (g=0; g<nogenes; g++) {
    const char *seqid=
      splicing_strvector_get(&gff->seqids, VECTOR(gff->seqid)[g]);
    int noex, idx, noEvents, i;
    size_t noiso;
    int start=VECTOR(gff->genes)[g];
    int end= g+1 < nogenes ? VECTOR(gff->genes)[g+1] : gff->n;
    splicing_gff_noiso_one(gff, g, &noiso);
    
    /* Collect and sort all events */
    splicing_vector_int_clear(&events);
    for (idx=start+1; idx<end; idx++) {
      if (VECTOR(gff->type)[idx] == SPLICING_TYPE_EXON) {
	SPLICING_CHECK(splicing_vector_int_push_back2
		       (&events, VECTOR(gff->start)[idx],
			-VECTOR(gff->end)[idx]));
      }
    }
    noEvents=splicing_vector_int_size(&events);
    splicing_qsort(VECTOR(events), noEvents, sizeof(int),
		   splicing_i_const_cmp);

    /* Now go over the sorted events and extract the constitutive exons */
    for (noex=0, i=0; i<noEvents; i++) {
      int ev=VECTOR(events)[i];
      if (ev > 0) { noex++; } 
      if (ev < 0) { 
	int prev=VECTOR(events)[i-1];
	if (noex == noiso && (-ev)-prev+1 >= min_length) {
	  /* constitutive exon */
	  SPLICING_CHECK(splicing_exonset_append(exons, seqid, prev, -ev));
	}
	noex--;
      }
    }
  }

  splicing_vector_int_destroy(&events);
  SPLICING_FINALLY_CLEAN(2);	/* + exons */

  return 0;
}
Example #3
0
int splicing_i_gff_constitutive_exons_full(const splicing_gff_t *gff,
					   splicing_exonset_t *exons,
					   int min_length) {
  size_t g, nogenes;
  splicing_vector_int_t events;

  SPLICING_CHECK(splicing_gff_nogenes(gff, &nogenes));
  
  SPLICING_CHECK(splicing_exonset_init(exons, 1));
  SPLICING_FINALLY(splicing_exonset_destroy, exons);
  SPLICING_CHECK(splicing_vector_int_init(&events, 0));
  SPLICING_FINALLY(splicing_vector_int_destroy, &events);
  
  for (g=0; g<nogenes; g++) {
    const char *seqid=
      splicing_strvector_get(&gff->seqids, VECTOR(gff->seqid)[g]);
    int i, idx, noExons, noSame;
    size_t noiso;
    int start=VECTOR(gff->genes)[g];
    int end= g+1 < nogenes ? VECTOR(gff->genes)[g+1] : gff->n;
    splicing_gff_noiso_one(gff, g, &noiso);
    
    /* Collect and sort all events */
    splicing_vector_int_clear(&events);
    for (idx=start+1; idx<end; idx++) {
      if (VECTOR(gff->type)[idx] == SPLICING_TYPE_EXON) {
	SPLICING_CHECK(splicing_vector_int_push_back2
		       (&events, VECTOR(gff->start)[idx],
			VECTOR(gff->end)[idx]));
      }
    }
    noExons=splicing_vector_int_size(&events)/2;
    splicing_qsort(VECTOR(events), noExons, sizeof(int)*2,
		   splicing_i_const_cmp2);
    
    /* Now go over them and check how many times each exon appears */
    for (noSame=1, i=2; i<noExons*2; i+=2) { 
      int start=VECTOR(events)[i];
      int end=VECTOR(events)[i+1];
      if (start == VECTOR(events)[i-2] && VECTOR(events)[i-1] == end) {
	noSame++;
      } else {
	noSame=1;
      }
      if (noSame == noiso) {
	SPLICING_CHECK(splicing_exonset_append(exons, seqid, start, end));
      }
    }
  }
  
  splicing_vector_int_destroy(&events);
  SPLICING_FINALLY_CLEAN(2);	/* + exons */

  return 0;
}