Пример #1
0
PyObject *pysplicing_from_strvector(const splicing_strvector_t *v) {
  int i, n=splicing_strvector_size(v);
  PyObject *o=PyTuple_New(n);
  for (i=0; i<n; i++) {
    PyObject *it=PyString_FromString(splicing_strvector_get(v, i));
    PyTuple_SetItem(o, i, it);
  }
  return o;
}
Пример #2
0
int splicing_i_gff_constitutive_exons_all(const splicing_gff_t *gff,
					  splicing_exonset_t *exons,
					  int min_length) {

  size_t g, nogenes;
  splicing_vector_int_t events;

  SPLICING_CHECK(splicing_gff_nogenes(gff, &nogenes));

  SPLICING_CHECK(splicing_exonset_init(exons, 0));
  SPLICING_FINALLY(splicing_exonset_destroy, exons);
  SPLICING_CHECK(splicing_vector_int_init(&events, 0));
  SPLICING_FINALLY(splicing_vector_int_destroy, &events);
  
  for (g=0; g<nogenes; g++) {
    const char *seqid=
      splicing_strvector_get(&gff->seqids, VECTOR(gff->seqid)[g]);
    int noex, idx, noEvents, i;
    size_t noiso;
    int start=VECTOR(gff->genes)[g];
    int end= g+1 < nogenes ? VECTOR(gff->genes)[g+1] : gff->n;
    splicing_gff_noiso_one(gff, g, &noiso);
    
    /* Collect and sort all events */
    splicing_vector_int_clear(&events);
    for (idx=start+1; idx<end; idx++) {
      if (VECTOR(gff->type)[idx] == SPLICING_TYPE_EXON) {
	SPLICING_CHECK(splicing_vector_int_push_back2
		       (&events, VECTOR(gff->start)[idx],
			-VECTOR(gff->end)[idx]));
      }
    }
    noEvents=splicing_vector_int_size(&events);
    splicing_qsort(VECTOR(events), noEvents, sizeof(int),
		   splicing_i_const_cmp);

    /* Now go over the sorted events and extract the constitutive exons */
    for (noex=0, i=0; i<noEvents; i++) {
      int ev=VECTOR(events)[i];
      if (ev > 0) { noex++; } 
      if (ev < 0) { 
	int prev=VECTOR(events)[i-1];
	if (noex == noiso && (-ev)-prev+1 >= min_length) {
	  /* constitutive exon */
	  SPLICING_CHECK(splicing_exonset_append(exons, seqid, prev, -ev));
	}
	noex--;
      }
    }
  }

  splicing_vector_int_destroy(&events);
  SPLICING_FINALLY_CLEAN(2);	/* + exons */

  return 0;
}
Пример #3
0
int splicing_i_gff_constitutive_exons_full(const splicing_gff_t *gff,
					   splicing_exonset_t *exons,
					   int min_length) {
  size_t g, nogenes;
  splicing_vector_int_t events;

  SPLICING_CHECK(splicing_gff_nogenes(gff, &nogenes));
  
  SPLICING_CHECK(splicing_exonset_init(exons, 1));
  SPLICING_FINALLY(splicing_exonset_destroy, exons);
  SPLICING_CHECK(splicing_vector_int_init(&events, 0));
  SPLICING_FINALLY(splicing_vector_int_destroy, &events);
  
  for (g=0; g<nogenes; g++) {
    const char *seqid=
      splicing_strvector_get(&gff->seqids, VECTOR(gff->seqid)[g]);
    int i, idx, noExons, noSame;
    size_t noiso;
    int start=VECTOR(gff->genes)[g];
    int end= g+1 < nogenes ? VECTOR(gff->genes)[g+1] : gff->n;
    splicing_gff_noiso_one(gff, g, &noiso);
    
    /* Collect and sort all events */
    splicing_vector_int_clear(&events);
    for (idx=start+1; idx<end; idx++) {
      if (VECTOR(gff->type)[idx] == SPLICING_TYPE_EXON) {
	SPLICING_CHECK(splicing_vector_int_push_back2
		       (&events, VECTOR(gff->start)[idx],
			VECTOR(gff->end)[idx]));
      }
    }
    noExons=splicing_vector_int_size(&events)/2;
    splicing_qsort(VECTOR(events), noExons, sizeof(int)*2,
		   splicing_i_const_cmp2);
    
    /* Now go over them and check how many times each exon appears */
    for (noSame=1, i=2; i<noExons*2; i+=2) { 
      int start=VECTOR(events)[i];
      int end=VECTOR(events)[i+1];
      if (start == VECTOR(events)[i-2] && VECTOR(events)[i-1] == end) {
	noSame++;
      } else {
	noSame=1;
      }
      if (noSame == noiso) {
	SPLICING_CHECK(splicing_exonset_append(exons, seqid, start, end));
      }
    }
  }
  
  splicing_vector_int_destroy(&events);
  SPLICING_FINALLY_CLEAN(2);	/* + exons */

  return 0;
}
Пример #4
0
Файл: gff.c Проект: mlovci/MISO
int splicing_gff_write(FILE *output, const splicing_gff_t *gff) {

  size_t i, n=gff->n;
  size_t gene=-1;
  const char *types[] = { "gene", "mRNA", "exon", "CDS", "start_codon",
			  "stop_codon" };
  const char *strands[] = { "+", "-", "." };
  
  for (i=0; i<n; i++) {

    if (VECTOR(gff->type)[i] == SPLICING_TYPE_GENE) { gene++; }

    fprintf(output, "%s\t%s\t%s\t%li\t%li\t", 
	    splicing_strvector_get(&gff->seqids, VECTOR(gff->seqid)[gene]),
	    splicing_strvector_get(&gff->sources, VECTOR(gff->source)[gene]),
	    types[VECTOR(gff->type)[i]], (long) VECTOR(gff->start)[i],
	    (long) VECTOR(gff->end)[i]);
    if (VECTOR(gff->score)[i] == SPLICING_NA_REAL) { 
      fprintf(output, "%s\t", ".");
    } else {
      fprintf(output, "%g\t", VECTOR(gff->score)[i]);
    }
    fprintf(output, "%s\t", strands[gene]);
    if (VECTOR(gff->phase)[i] == SPLICING_NA_INTEGER) {
      fprintf(output, "%s\t", ".");
    } else {
      fprintf(output, "%i\t", VECTOR(gff->phase)[i]);
    }
    fprintf(output, "ID=%s", splicing_strvector_get(&gff->ID, i));
    if (VECTOR(gff->parent)[i] >= 0) {
      fprintf(output, ";Parent=%s", 
	      splicing_strvector_get(&gff->ID, VECTOR(gff->parent)[i]));
    }
    fputs("\n", output);
  }
  
  return 0;
}
Пример #5
0
Файл: gff.c Проект: mlovci/MISO
int splicing_gff_append(splicing_gff_t *gff, const char *seqid, 
			const char *source, splicing_type_t type, int start,
			int end, double score, splicing_strand_t strand,
			int phase, const char *ID, const char *parent) {

  if (type == SPLICING_TYPE_GENE) { 
    gff->nogenes++; 
    SPLICING_CHECK(splicing_vector_int_push_back(&gff->genes, gff->n));
    SPLICING_CHECK(splicing_vector_int_push_back(&gff->strand, strand));
  } else if (type == SPLICING_TYPE_MRNA) { 
    gff->notranscripts++; 
    SPLICING_CHECK(splicing_vector_int_push_back(&gff->transcripts, gff->n));
  }

  if (type == SPLICING_TYPE_GENE) {

    /* Seqid */
    if (!strcmp(seqid, gff->last_seqid)) {
      int last=splicing_vector_int_tail(&gff->seqid);
      SPLICING_CHECK(splicing_vector_int_push_back(&gff->seqid, last));
    } else {
      size_t idx;
      int seen=splicing_strvector_search(&gff->seqids, seqid, &idx);
      if (seen) { 
	SPLICING_CHECK(splicing_vector_int_push_back(&gff->seqid, idx));
	gff->last_seqid=splicing_strvector_get(&gff->seqids, idx);
      } else {
	size_t size=splicing_strvector_size(&gff->seqids);
	SPLICING_CHECK(splicing_strvector_append(&gff->seqids, seqid));
	SPLICING_CHECK(splicing_vector_int_push_back(&gff->seqid, size));
	gff->last_source=splicing_strvector_get(&gff->seqids, size);
      }
    }

    /* Source */
    if (!strcmp(source, gff->last_source)) {
      int last=splicing_vector_int_tail(&gff->source);
      SPLICING_CHECK(splicing_vector_int_push_back(&gff->source, last));
    } else {
      size_t idx;
      int seen=splicing_strvector_search(&gff->sources, source, &idx);
      if (seen) { 
	SPLICING_CHECK(splicing_vector_int_push_back(&gff->source, idx));
	gff->last_source=splicing_strvector_get(&gff->sources, idx);
      } else {
	size_t size=splicing_strvector_size(&gff->sources);
	SPLICING_CHECK(splicing_strvector_append(&gff->sources, source));
	SPLICING_CHECK(splicing_vector_int_push_back(&gff->source, size));
	gff->last_source=splicing_strvector_get(&gff->sources, size);
      }
    }

  }

  /* Parent */
  if (!parent || !parent[0]) {
    SPLICING_CHECK(splicing_vector_int_push_back(&gff->parent, -1));
  } else if (!strcmp(parent, gff->last_gene_id)) {
    SPLICING_CHECK(splicing_vector_int_push_back(&gff->parent, 
						 gff->last_gene_no));
  } else if (!strcmp(parent, gff->last_mrna_id)) {
    SPLICING_CHECK(splicing_vector_int_push_back(&gff->parent, 
						 gff->last_mrna_no));
  } else {
    size_t idx;
    int seen=splicing_strvector_search(&gff->ID, parent, &idx);
    if (!seen) { 
      SPLICING_WARNING("Unknown parent ID, invalid GFF file");
      SPLICING_CHECK(splicing_vector_int_push_back(&gff->parent, -1));
    } else {
      SPLICING_CHECK(splicing_vector_int_push_back(&gff->parent, idx));
    }
  }

  SPLICING_CHECK(splicing_vector_int_push_back(&gff->type, type));
  SPLICING_CHECK(splicing_vector_int_push_back(&gff->start, start));
  SPLICING_CHECK(splicing_vector_int_push_back(&gff->end, end));
  SPLICING_CHECK(splicing_vector_push_back(&gff->score, score));
  SPLICING_CHECK(splicing_vector_int_push_back(&gff->phase, phase));
  SPLICING_CHECK(splicing_strvector_append(&gff->ID, ID));
  
  /* Update last gene/mrna */
  if (type == SPLICING_TYPE_GENE) { 
    gff->last_gene_id = splicing_strvector_get(&gff->ID, gff->n);
    gff->last_gene_no = gff->n;
  } else if (type == SPLICING_TYPE_MRNA) {
    gff->last_mrna_id = splicing_strvector_get(&gff->ID, gff->n);
    gff->last_mrna_no = gff->n;
  }

  gff->n += 1;

  return 0;
}