Esempio n. 1
0
File: gff.c Progetto: mlovci/MISO
int splicing_gff_exon_start_end(const splicing_gff_t *gff, 
				splicing_vector_int_t *start,
				splicing_vector_int_t *end,
				splicing_vector_int_t *idx,
				int gene) {
  
  size_t noiso;
  int i=0, p=0, n=splicing_gff_size(gff);
  int pos;
  size_t nogenes;
  splicing_vector_int_t tmp, tmp2;

  SPLICING_CHECK(splicing_vector_int_init(&tmp, 10));
  SPLICING_FINALLY(splicing_vector_int_destroy, &tmp);
  SPLICING_CHECK(splicing_vector_int_init(&tmp2, 10));
  SPLICING_FINALLY(splicing_vector_int_destroy, &tmp2);

  SPLICING_CHECK(splicing_gff_nogenes(gff, &nogenes));
  if (gene < 0 || gene >= nogenes) { 
    SPLICING_ERROR("Invalid gene id", SPLICING_EINVAL);
  }

  pos=VECTOR(gff->genes)[gene]+1;
  
  SPLICING_CHECK(splicing_gff_noiso_one(gff, gene, &noiso));
  splicing_vector_int_clear(start);
  splicing_vector_int_clear(end);
  SPLICING_CHECK(splicing_vector_int_resize(idx, noiso+1));
  while (pos < n) {
    if (VECTOR(gff->type)[pos] == SPLICING_TYPE_EXON) { 
      int s=VECTOR(gff->start)[pos];
      int e=VECTOR(gff->end)[pos];
      SPLICING_CHECK(splicing_vector_int_push_back(start, s)); p++;
      SPLICING_CHECK(splicing_vector_int_push_back(end, e));
    } else if (VECTOR(gff->type)[pos] == SPLICING_TYPE_MRNA) {
      VECTOR(*idx)[i] = p;
      if (i!=0) { 
	SPLICING_CHECK(splicing_i_gff_exon_start_end_sort(start, end, idx, 
							  i-1, &tmp, &tmp2));
      }
      i++;
    } else if (VECTOR(gff->type)[pos] == SPLICING_TYPE_GENE) {
      break;
    }
    pos++;
  }
  VECTOR(*idx)[i] = p;
  SPLICING_CHECK(splicing_i_gff_exon_start_end_sort(start, end, idx, i-1, 
						    &tmp, &tmp2));

  splicing_vector_int_destroy(&tmp2);
  splicing_vector_int_destroy(&tmp);
  SPLICING_FINALLY_CLEAN(1);

  return 0;
}
Esempio n. 2
0
int splicing_gff_converter_init(const splicing_gff_t *gff, size_t gene,
				splicing_gff_converter_t *converter) {

  int i; 

  SPLICING_CHECK(splicing_gff_noiso_one(gff, gene, &converter->noiso));

  SPLICING_VECTOR_INT_INIT_FINALLY(&converter->exstart, 0);
  SPLICING_VECTOR_INT_INIT_FINALLY(&converter->exend, 0);
  SPLICING_VECTOR_INT_INIT_FINALLY(&converter->exidx, 0);
  SPLICING_VECTOR_INT_INIT_FINALLY(&converter->shift, 0);
  SPLICING_VECTOR_INT_INIT_FINALLY(&converter->exlim, 0);
  
  SPLICING_CHECK(splicing_gff_exon_start_end(gff, &converter->exstart, 
					     &converter->exend, 
					     &converter->exidx, gene));

  /* Calculate the shift */
  for (i=0; i < converter->noiso; i++) {
    size_t cs=0, ce=0, ex=0;
    int pos=VECTOR(converter->exidx)[i], pos2=VECTOR(converter->exidx)[i+1];
    while (pos < pos2) {
      cs += VECTOR(converter->exstart)[pos];
      SPLICING_CHECK(splicing_vector_int_push_back(&converter->shift, 
						   cs-ce-ex-1));
      ex++; ce += VECTOR(converter->exend)[pos]; pos++;
    }
  }
  
  /* Calculate the exlim */
  for (i=0; i < converter->noiso; i++) { 
    size_t cs=0;
    int pos=VECTOR(converter->exidx)[i], pos2=VECTOR(converter->exidx)[i+1];
    while (pos < pos2) {
      size_t l=
	VECTOR(converter->exend)[pos] - VECTOR(converter->exstart)[pos]+1;
      cs += l;
      SPLICING_CHECK(splicing_vector_int_push_back(&converter->exlim, cs+1));
      pos++;
    }
  }

  SPLICING_FINALLY_CLEAN(5);

  return 0;
}
Esempio n. 3
0
int splicing_exonset_append(splicing_exonset_t *ex, const char *seqid, 
			    int start, int end) {
  
  size_t idx;
  int seen=splicing_strvector_search(&ex->seqids, seqid, &idx);
  if (seen) { 
    SPLICING_CHECK(splicing_vector_int_push_back(&ex->seqid, idx));
  } else {
    size_t size=splicing_strvector_size(&ex->seqids);
    SPLICING_CHECK(splicing_strvector_append(&ex->seqids, seqid));
    SPLICING_CHECK(splicing_vector_int_push_back(&ex->seqid, size));
  }
  SPLICING_CHECK(splicing_vector_int_push_back(&ex->start, start));
  SPLICING_CHECK(splicing_vector_int_push_back(&ex->end, end));

  return 0;
}
Esempio n. 4
0
File: gff.c Progetto: mlovci/MISO
int splicing_genomic_to_iso(const splicing_gff_t *gff, size_t gene,
			    const splicing_vector_int_t *position, 
			    splicing_matrix_int_t *isopos) {

  size_t r, i, noiso, noreads=splicing_vector_int_size(position);
  splicing_vector_int_t exstart, exend, exidx, shift;
  
  splicing_gff_noiso_one(gff, gene, &noiso);
  
  SPLICING_CHECK(splicing_vector_int_init(&exstart, 0));
  SPLICING_FINALLY(splicing_vector_int_destroy, &exstart);
  SPLICING_CHECK(splicing_vector_int_init(&exend, 0));
  SPLICING_FINALLY(splicing_vector_int_destroy, &exend);
  SPLICING_CHECK(splicing_vector_int_init(&exidx, 0));
  SPLICING_FINALLY(splicing_vector_int_destroy, &exidx);
  SPLICING_CHECK(splicing_gff_exon_start_end(gff, &exstart, &exend,
					     &exidx, gene));

  SPLICING_CHECK(splicing_vector_int_init(&shift, 0));
  SPLICING_FINALLY(splicing_vector_int_destroy, &shift);
  
  for (i=0; i<noiso; i++) {
    size_t cs=0, ce=0, ex=0;
    int pos=VECTOR(exidx)[i], pos2=VECTOR(exidx)[i+1];
    while (pos < pos2) {
      cs += VECTOR(exstart)[pos];
      SPLICING_CHECK(splicing_vector_int_push_back(&shift, cs-ce-ex-1));
      ex++; ce += VECTOR(exend)[pos]; pos++;
    }
  }

  SPLICING_CHECK(splicing_matrix_int_resize(isopos, noiso, noreads));
  
  for (r=0; r<noreads; r++) {
    for (i=0; i<noiso; i++) {
      size_t pos=VECTOR(*position)[r];
      size_t startpos=VECTOR(exidx)[i];
      size_t endpos=VECTOR(exidx)[i+1];
      int ex;
      for (ex=startpos; ex < endpos && VECTOR(exend)[ex] < pos; ex++) ;
      if (VECTOR(exstart)[ex] <= pos && pos <= VECTOR(exend)[ex]) {
	MATRIX(*isopos, i, r) = VECTOR(*position)[r] - VECTOR(shift)[ex];
      } else { 
	MATRIX(*isopos, i, r) = -1;
      }
    }
  }

  splicing_vector_int_destroy(&shift);
  splicing_vector_int_destroy(&exidx);
  splicing_vector_int_destroy(&exend);
  splicing_vector_int_destroy(&exstart);
  SPLICING_FINALLY_CLEAN(4);

  return 0;
}
Esempio n. 5
0
File: gff.c Progetto: mlovci/MISO
int splicing_gff_append(splicing_gff_t *gff, const char *seqid, 
			const char *source, splicing_type_t type, int start,
			int end, double score, splicing_strand_t strand,
			int phase, const char *ID, const char *parent) {

  if (type == SPLICING_TYPE_GENE) { 
    gff->nogenes++; 
    SPLICING_CHECK(splicing_vector_int_push_back(&gff->genes, gff->n));
    SPLICING_CHECK(splicing_vector_int_push_back(&gff->strand, strand));
  } else if (type == SPLICING_TYPE_MRNA) { 
    gff->notranscripts++; 
    SPLICING_CHECK(splicing_vector_int_push_back(&gff->transcripts, gff->n));
  }

  if (type == SPLICING_TYPE_GENE) {

    /* Seqid */
    if (!strcmp(seqid, gff->last_seqid)) {
      int last=splicing_vector_int_tail(&gff->seqid);
      SPLICING_CHECK(splicing_vector_int_push_back(&gff->seqid, last));
    } else {
      size_t idx;
      int seen=splicing_strvector_search(&gff->seqids, seqid, &idx);
      if (seen) { 
	SPLICING_CHECK(splicing_vector_int_push_back(&gff->seqid, idx));
	gff->last_seqid=splicing_strvector_get(&gff->seqids, idx);
      } else {
	size_t size=splicing_strvector_size(&gff->seqids);
	SPLICING_CHECK(splicing_strvector_append(&gff->seqids, seqid));
	SPLICING_CHECK(splicing_vector_int_push_back(&gff->seqid, size));
	gff->last_source=splicing_strvector_get(&gff->seqids, size);
      }
    }

    /* Source */
    if (!strcmp(source, gff->last_source)) {
      int last=splicing_vector_int_tail(&gff->source);
      SPLICING_CHECK(splicing_vector_int_push_back(&gff->source, last));
    } else {
      size_t idx;
      int seen=splicing_strvector_search(&gff->sources, source, &idx);
      if (seen) { 
	SPLICING_CHECK(splicing_vector_int_push_back(&gff->source, idx));
	gff->last_source=splicing_strvector_get(&gff->sources, idx);
      } else {
	size_t size=splicing_strvector_size(&gff->sources);
	SPLICING_CHECK(splicing_strvector_append(&gff->sources, source));
	SPLICING_CHECK(splicing_vector_int_push_back(&gff->source, size));
	gff->last_source=splicing_strvector_get(&gff->sources, size);
      }
    }

  }

  /* Parent */
  if (!parent || !parent[0]) {
    SPLICING_CHECK(splicing_vector_int_push_back(&gff->parent, -1));
  } else if (!strcmp(parent, gff->last_gene_id)) {
    SPLICING_CHECK(splicing_vector_int_push_back(&gff->parent, 
						 gff->last_gene_no));
  } else if (!strcmp(parent, gff->last_mrna_id)) {
    SPLICING_CHECK(splicing_vector_int_push_back(&gff->parent, 
						 gff->last_mrna_no));
  } else {
    size_t idx;
    int seen=splicing_strvector_search(&gff->ID, parent, &idx);
    if (!seen) { 
      SPLICING_WARNING("Unknown parent ID, invalid GFF file");
      SPLICING_CHECK(splicing_vector_int_push_back(&gff->parent, -1));
    } else {
      SPLICING_CHECK(splicing_vector_int_push_back(&gff->parent, idx));
    }
  }

  SPLICING_CHECK(splicing_vector_int_push_back(&gff->type, type));
  SPLICING_CHECK(splicing_vector_int_push_back(&gff->start, start));
  SPLICING_CHECK(splicing_vector_int_push_back(&gff->end, end));
  SPLICING_CHECK(splicing_vector_push_back(&gff->score, score));
  SPLICING_CHECK(splicing_vector_int_push_back(&gff->phase, phase));
  SPLICING_CHECK(splicing_strvector_append(&gff->ID, ID));
  
  /* Update last gene/mrna */
  if (type == SPLICING_TYPE_GENE) { 
    gff->last_gene_id = splicing_strvector_get(&gff->ID, gff->n);
    gff->last_gene_no = gff->n;
  } else if (type == SPLICING_TYPE_MRNA) {
    gff->last_mrna_id = splicing_strvector_get(&gff->ID, gff->n);
    gff->last_mrna_no = gff->n;
  }

  gff->n += 1;

  return 0;
}
Esempio n. 6
0
File: gff.c Progetto: mlovci/MISO
int splicing_iso_to_genomic(const splicing_gff_t *gff, size_t gene, 
			    const splicing_vector_int_t *isoform,
			    const splicing_vector_int_t *exstart,
			    const splicing_vector_int_t *exend,
			    const splicing_vector_int_t *exidx,
			    splicing_vector_int_t *position) {

  size_t i, noiso, n=splicing_vector_int_size(position);
  splicing_vector_int_t exlim, shift;
  splicing_vector_int_t vexstart, vexend, vexidx, 
    *myexstart=(splicing_vector_int_t *) exstart, 
    *myexend=(splicing_vector_int_t *) exend, 
    *myexidx=(splicing_vector_int_t *) exidx;
  size_t pos, pos2;

  if (!exstart || !exend || !exidx) {
    myexstart=&vexstart;
    myexend=&vexend;
    myexidx=&vexidx;
    SPLICING_CHECK(splicing_vector_int_init(myexstart, 0));
    SPLICING_FINALLY(splicing_vector_int_destroy, myexstart);
    SPLICING_CHECK(splicing_vector_int_init(myexend, 0));
    SPLICING_FINALLY(splicing_vector_int_destroy, myexend);
    SPLICING_CHECK(splicing_vector_int_init(myexidx, 0));
    SPLICING_FINALLY(splicing_vector_int_destroy, myexidx);
    SPLICING_CHECK(splicing_gff_exon_start_end(gff, myexstart, myexend, 
					       myexidx, gene));
  }

  SPLICING_CHECK(splicing_gff_noiso_one(gff, gene, &noiso));

  SPLICING_CHECK(splicing_vector_int_init(&exlim, 0));
  SPLICING_FINALLY(splicing_vector_int_destroy, &exlim);
  SPLICING_CHECK(splicing_vector_int_init(&shift, 0));
  SPLICING_FINALLY(splicing_vector_int_destroy, &shift);

  for (i=0; i<noiso; i++) {
    size_t cs=0, ce=0, ex=0;
    int pos=VECTOR(*myexidx)[i], pos2=VECTOR(*myexidx)[i+1];
    while (pos < pos2) {
      cs += VECTOR(*myexstart)[pos];
      SPLICING_CHECK(splicing_vector_int_push_back(&shift, cs-ce-ex-1));
      ex++; ce += VECTOR(*myexend)[pos]; pos++;
    }
  }

  for (i=0; i<noiso; i++) { 
    size_t cs=0;
    int pos=VECTOR(*myexidx)[i], pos2=VECTOR(*myexidx)[i+1];
    while (pos < pos2) {
      size_t l=VECTOR(*myexend)[pos]-VECTOR(*myexstart)[pos]+1;
      cs += l;
      SPLICING_CHECK(splicing_vector_int_push_back(&exlim, cs+1));
      pos++;
    }
  }  

  for (i=0; i<n; i++) {
    int iso=VECTOR(*isoform)[i];
    size_t pos=VECTOR(*position)[i];
    int ex;
    for (ex=VECTOR(*myexidx)[iso]; VECTOR(exlim)[ex] <= pos; ex++) ;
    VECTOR(*position)[i] = pos + VECTOR(shift)[ex];
  }

  splicing_vector_int_destroy(&shift);
  splicing_vector_int_destroy(&exlim);
  SPLICING_FINALLY_CLEAN(2);

  if (!exstart || !exend || !exidx) {
    splicing_vector_int_destroy(myexidx);
    splicing_vector_int_destroy(myexend);
    splicing_vector_int_destroy(myexstart);
    SPLICING_FINALLY_CLEAN(3);
  }
  
  return 0;
}