コード例 #1
0
ファイル: transcript.c プロジェクト: mrmckain/RefTrans
Transcript * copy_Transcript(Transcript * t)
{
  Transcript * out;
  Exon * temp;
  int i;

  out = Transcript_alloc();

  for(i=0;i<t->ex_len;i++) {
    temp = Exon_alloc();
    temp->start = t->exon[i]->start;
    temp->end   = t->exon[i]->end;
    add_ex_Transcript(out,temp);
  }

  for(i=0;i<t->len;i++) {
    add_Transcript(out,copy_Translation(t->translation[i]));
  }

  return out;
}
コード例 #2
0
ファイル: gene.c プロジェクト: PlantandFoodResearch/wise2
Gene * read_EMBL_feature_Gene(char * buffer,int maxlen,FILE * ifp)
{
  Gene * gene;
  Transcript * tr;
  Translation * ts;
  Exon * exon;

  char * runner;
  char * base;
  char * next;
  int i;
  int exon_start[MAX_EMBL_EXON_PARSE];
  int exon_end[MAX_EMBL_EXON_PARSE];
  int number;
  int exon_no = 0;
  int isstart = 1;
  int is_complement = 0;
  int is_cds = 0;
  int break_at_end = 0;

  if( strstartcmp(buffer,"FT") != 0 ) {
    warn("passed in a bad line [%s] to be used for feature table parsing",buffer);
    return NULL;
  }

  if( (runner=strtok(buffer+2,spacestr)) == NULL ) {
    warn("Bad embl feature line [%s]",buffer);
    return NULL;
  }

  if( strcmp(runner,"CDS") != 0 && strcmp(runner,"mRNA") != 0 ) {
    warn("passed in a feature line to read_EMBL_feature_Gene with a %s tag. This only handles CDS and mRNA tags",runner);
    return NULL;
  }

  if( strcmp(runner,"CDS") == 0 ) {
    is_cds = TRUE;
  }

  runner = strtok(NULL,spacestr);

  if( runner == NULL ) {
    warn("Bad embl feature line [%s]",buffer);
    return NULL;
  }

  if( strstartcmp(runner,"complement") == 0 ) {
    runner = strchr(runner,'(');
    if( runner == NULL) {
      warn("Could not find bracket on EMBL feature complement line");
      return NULL;
    }
    is_complement = 1;
    runner++;
  }


  if( strstartcmp(runner,"join") == 0 ) {
    runner = strchr(runner,'(');
    runner++;
  } else if( isdigit((int)*runner)  || *runner == '<' ) {
    /** ok - starts with the numbers. We'll cope!**/
  } else {
    warn("Expecting a join statement, got a [%s]",runner);
    return NULL;
  }

  
  /*** ok, now the major number loop ***/

  for(;;) {
    base= runner;
    for(;*runner && *runner != ')' && *runner != '.' && *runner != ',' && *runner != '>' && !isspace((int)*runner);runner++) 
      ;

    /*fprintf(stderr,"Got a runner of %s\n ",runner); */
    if( *runner == '\0' )
      next = runner;
    else next = runner+1;

    if( *runner == ')' ) {
      break_at_end = TRUE; /* out of reading exons */
    }

    
    *runner='\0';
    if( strstartcmp(base,"complement(") == 0 ) {
      is_complement = TRUE;
      for(;*base != '(';base++) 
	;
      base++;
      break_at_end = FALSE; /* we found an bracket too early! */
    }

    if( is_integer_string(base,&number) == FALSE ) {
      warn("Got a non integer [%s] in the middle of a join statement in EMBL parsing",runner);
      return NULL;
    }

    /** put this number away **/

    if( isstart ) {
      exon_start[exon_no] = number;
      isstart = 0;
    } else {
      exon_end[exon_no++] = number;
      isstart = 1;
    }
    if( break_at_end == TRUE)
      break;

    for(runner=next;*runner && (*runner == '.' || isspace((int)*runner));runner++)
      ;

    if( *runner == '\0' ) {
      if( next_feature_tab_line(buffer,maxlen,ifp) == FALSE) {
	warn("In the middle of getting a join statement, got a [%s]. Yuk!",buffer);
	return NULL;
      }

      if( !isdigit((int)buffer[0]) && buffer[0] != '.' && buffer[0] != ',') {
	/*** ok - sometimes people very boring end things in here ***/
	/* warn("In the middle of getting a join statement, got a [%s]. Ugh!",buffer); */
	break;
      }

      runner = buffer;
    }

  }

  if( isstart == 0 ) {
    warn("I have read an uneven number of start-end points in the exon thing. Yuk!");
    return NULL;
  }

  /** runner should now be on bracket **/

  if( is_complement == 1 ) {
    /** ok . should be another bracket. Do we care? **/
  }

  gene = Gene_alloc_len(1);
  tr  = Transcript_alloc_len(exon_no);
  add_Gene(gene,tr);
  tr->parent = gene;

  if( is_complement == 1 ) {
    gene->start = exon_end[exon_no-1]-1;
    gene->end = exon_start[0] -1;

    for(i=exon_no -1;i >= 0;i--) {
      exon = Exon_alloc();
      exon->start = (gene->start+1) - exon_end[i];
      exon->end = (gene->start+1) - exon_start[i] +1;
      add_ex_Transcript(tr,exon);
    }
  } else {
    gene->start = exon_start[0] -1;
    gene->end = exon_end[exon_no-1] -1;

    for(i=0;i<exon_no;i++) {
      exon = Exon_alloc();
      exon->start = exon_start[i] - (gene->start+1);
      exon->end = exon_end[i] - (gene->start+1)+1;
      add_ex_Transcript(tr,exon);
    }
  }

  if( is_cds == TRUE ) {
    ts = Translation_alloc();
    ts->start = 0;
    ts->end = length_Transcript(tr);
    ts->parent = tr;
    add_Transcript(tr,ts);
  }

  /*** read the rest of this feature ***/

  while( next_feature_tab_line(buffer,maxlen,ifp) == TRUE)
    ;

  return gene;

}