コード例 #1
0
ファイル: iit_get.c プロジェクト: djinnome/JAMg-1
/* coordstart used only if centerp or tallyp is true */
static long int
print_interval (Chrpos_T *lastcoord, long int total,
		char *divstring, Chrpos_T coordstart, Chrpos_T coordend, 
		int index, IIT_T iit, int ndivs, int fieldint) {
  Interval_T interval;
  char *label, *annotation, *restofheader;
  bool allocp;

  if (centerp == true) {
    print_interval_centered(divstring,coordstart,index,iit,fieldint);
    return 0;
  } else if (tallyp == true) {
    total += print_interval_tally(&(*lastcoord),divstring,coordstart,coordend,index,iit,zeroesp);
    return total;
  } else if (runlengthp == true) {
    print_interval_runlength(&(*lastcoord),divstring,coordstart,coordend,index,iit,zeroesp);
    return 0;
  }

  if (annotationonlyp == false) {
    label = IIT_label(iit,index,&allocp);
    printf(">%s ",label);
    if (allocp == true) {
      FREE(label);
    }
      
    if (ndivs > 1) {
      if (divstring == NULL) {
	/* For example, if interval was retrieved by label */
	divstring = IIT_divstring_from_index(iit,index);
      }
      printf("%s:",divstring);
    }

    debug(printf("index is %d\n",index));
    interval = IIT_interval(iit,index);
    if (signedp == false) {
      printf("%u..%u",Interval_low(interval),Interval_high(interval));
    } else if (Interval_sign(interval) < 0) {
      printf("%u..%u",Interval_high(interval),Interval_low(interval));
    } else {
      printf("%u..%u",Interval_low(interval),Interval_high(interval));
    }
    if (Interval_type(interval) > 0) {
      printf(" %s",IIT_typestring(iit,Interval_type(interval)));
    }
#if 0
    /* Unnecessary because of "\n" after restofheader below */
    if (IIT_version(iit) < 5) {
      printf("\n");
    }
#endif
  }


  if (fieldint < 0) {
    annotation = IIT_annotation(&restofheader,iit,index,&allocp);
    printf("%s\n",restofheader);
    printf("%s",annotation);
    if (allocp == true) {
      FREE(restofheader);
    }
  } else {
    annotation = IIT_annotation(&restofheader,iit,index,&allocp);
    printf("%s\n",restofheader);
    if (allocp == true) {
      FREE(restofheader);
    }
    annotation = IIT_fieldvalue(iit,index,fieldint);
    printf("%s\n",annotation);
    FREE(annotation);
  }

  return 0;
}
コード例 #2
0
ファイル: genome-write.c プロジェクト: fabiocpn/GMAP-GSNAP
/* Puts reference genome into refgenome_fp (assume compressed),
   and puts alternate strain sequences into altstrain_iit. */
static void
genome_write_memory (FILE *refgenome_fp, FILE *input, 
		     IIT_T contig_iit, IIT_T altstrain_iit, UINT4 *genomecomp,
		     unsigned int nuint4, char *fileroot) {
  char Buffer[BUFFERSIZE], Complement[BUFFERSIZE], *segment;
  char *accession, *p;
  Genomicpos_T leftposition, rightposition, startposition, endposition, truelength, 
    maxposition = 0, currposition = 0;
  int contigtype;
  bool revcompp;
#ifdef ALTSTRAIN
  int altstrain_index, altstrain_offset;
#endif
  int nbadchars = 0;
  int ncontigs = 0;
  
  while (fgets(Buffer,BUFFERSIZE,input) != NULL) {
    if (Buffer[0] == '>') {
      /* HEADER */
      accession = parse_accession(Buffer);
      find_positions(&revcompp,&leftposition,&rightposition,&startposition,&endposition,
		     &truelength,&contigtype,accession,contig_iit);
      if (++ncontigs < 50) {
	if (revcompp == true) {
	  fprintf(stderr,"Writing contig %s to universal coordinates %u..%u in genome %s\n",
		  accession,startposition,endposition,fileroot);
	} else {
	  fprintf(stderr,"Writing contig %s to universal coordinates %u..%u in genome %s\n",
		  accession,startposition+1U,endposition+1U,fileroot);
	}
      } else if (ncontigs == 50) {
	fprintf(stderr,"More than 50 contigs.  Will stop printing messages\n");
      }

      if (contigtype > 0) {
#ifdef ALTSTRAIN
	fprintf(stderr," (alternate strain %s)",IIT_typestring(altstrain_iit,contigtype));
#endif
      }
      FREE(accession);

      if (contigtype > 0) {
#ifdef ALTSTRAIN
	/* Initialize file pointer for alternate strain */
	altstrain_index = IIT_get_exact(altstrain_iit,/*divstring*/NULL,leftposition,rightposition,contigtype);
	if (revcompp == true) {
	  altstrain_offset = rightposition + 1U - leftposition;
	} else {
	  altstrain_offset = 0;
	}
	debug(printf("Setting altstrain_offset to be %d\n",altstrain_offset));
#endif
      }

      /* Handles case where true length is greater than provided
         coordinates.  This needs to be after call to IIT_get_exact */
      if (leftposition + truelength - 1U > rightposition) {
	debug(fprintf(stderr,"Extending endposition for truelength of %u\n",truelength));
	rightposition = leftposition + truelength;
	if (revcompp == true) {
	  endposition = startposition - truelength;
	} else {
	  endposition = startposition + truelength;
	}
      }

      /* In both cases, set file pointer for reference strain,
         although we won't write sequence of alternate strain.  For an
         alternate strain, ensure that we fill the reference strain
         with sufficient X's. */
      if (startposition > maxposition) {
	/* Start beyond end of file */
	debug(printf("Filling with X's from %u to %u-1\n",maxposition,startposition));
	fill_x_memory(genomecomp,maxposition,startposition);
	  
	if (contigtype > 0) {
#ifdef ALTSTRAIN
	  fill_x_memory(genomecomp,leftposition,rightposition + 1);
	  maxposition = currposition = rightposition + 1;
#endif
	} else {
	  maxposition = rightposition;
	  currposition = startposition;
	}

      } else {
	/* Start within file */
	if (contigtype > 0) {
#ifdef ALTSTRAIN
	  if (rightposition + 1 > maxposition) {
	    debug(printf("Filling with X's from %u to %u-1\n",maxposition,rightposition+1));
	    fill_x_memory(genomecomp,maxposition,rightposition + 1);
	    maxposition = currposition = rightposition + 1;
	  }
#endif
	} else {
	  debug(printf("Moving to %u\n",startposition));
	  currposition = startposition;
	}
      }

    } else {
      /* SEQUENCE */
      if ((p = rindex(Buffer,'\n')) != NULL) {
	*p = '\0';
      }
      if ((p = rindex(Buffer,CONTROLM)) != NULL) {
	*p = '\0';
      }
      if (revcompp == true) {
	make_complement_buffered(Complement,Buffer,strlen(Buffer));
	segment = Complement;
      } else {
	segment = Buffer;
      }

      if (contigtype > 0) {
#ifdef ALTSTRAIN
	/* Write alternate strain */
	if (revcompp == true) {
	  altstrain_offset -= strlen(segment);
	  debug(printf("Writing alternate strain at %u\n",altstrain_offset));
	  IIT_backfill_sequence(altstrain_iit,altstrain_index,altstrain_offset,segment);
	} else {
	  debug(printf("Writing alternate strain at %u\n",altstrain_offset));
	  IIT_backfill_sequence(altstrain_iit,altstrain_index,altstrain_offset,segment);
	  altstrain_offset += strlen(segment);
	}
#endif
      } else {
	/* Write reference strain */
	if (revcompp == true) {
	  debug(printf("Filling with sequence from %u-1 to %u\n",currposition,currposition-strlen(segment)));
	  currposition -= strlen(segment);
	  nbadchars = Compress_update_memory(nbadchars,genomecomp,segment,currposition,currposition+strlen(segment));
	} else {
	  debug(printf("Filling with sequence from %u to %u-1\n",currposition,currposition+strlen(segment)));
	  nbadchars = Compress_update_memory(nbadchars,genomecomp,segment,currposition,currposition+strlen(segment));
	  currposition += strlen(segment);
	  if (currposition > maxposition) {
	    maxposition = currposition;
	  }
	}
      }
    }
  }

  move_absolute(refgenome_fp,0U);
  FWRITE_UINTS(genomecomp,nuint4,refgenome_fp);

  fprintf(stderr,"A total of %d non-ACGTNX characters were seen in the genome.\n",nbadchars);

  return;
}
コード例 #3
0
ファイル: iit_get.c プロジェクト: djinnome/JAMg-1
/* Need to store just the part of the query specified (e.g., 1..10) */
static void
print_interval_centered (char *divstring, Chrpos_T coordstart, int index, IIT_T iit, int fieldint) {
  Interval_T interval;
  char *label, *annotation, *restofheader, centerchar;
  bool allocp;
  int annotlength, left, centerpos;

  if (fieldint < 0) {
    annotation = IIT_annotation(&restofheader,iit,index,&allocp);
    if (allocp == true) {
      FREE(restofheader);
    }
  } else {
    annotation = IIT_fieldvalue(iit,index,fieldint);
    allocp = true;
  }
  annotlength = strlen(annotation);
  if (annotation[annotlength-1] == '\n') {
    annotlength--;
  }

  interval = IIT_interval(iit,index);
  left = coordstart - Interval_low(interval); /* + length(query) - queryend */
  if (Interval_sign(interval) < 0) {
    centerpos = annotlength-left-1;
  } else {
    centerpos = left;
  }
  centerchar = annotation[centerpos];

  if (centeruc == true && islower(centerchar)) {
    if (fieldint >= 0 && allocp == true) {
      FREE(annotation);
    }
  } else {
    print_spaces(centerlength-left);
    if (Interval_sign(interval) < 0) {
      print_complement(annotation,annotlength-1,centerpos+1);
      printf("[%c]",complCode[(int) centerchar]);
      print_complement(annotation,centerpos-1,0);
    } else {
      print_forward(annotation,0,centerpos-1);
      printf("[%c]",centerchar);
      print_forward(annotation,centerpos+1,annotlength-1);
    }
    print_spaces(centerlength+left-annotlength);
    if (fieldint >= 0 && allocp == true) {
      FREE(annotation);
    }
  
    printf("\t");
    if (Interval_type(interval) > 0) {
      printf("%s\t",IIT_typestring(iit,Interval_type(interval)));
    }

    if (divstring != NULL) {
      if (Interval_sign(interval) < 0) {
	printf("-%s:",divstring);
      } else {
	printf("+%s:",divstring);
      }
    }

    if (signedp == false) {
      printf("%u..%u",Interval_low(interval),Interval_high(interval));
    } else if (Interval_sign(interval) < 0) {
      printf("%u..%u",Interval_high(interval),Interval_low(interval));
    } else {
      printf("%u..%u",Interval_low(interval),Interval_high(interval));
    }
    printf("\t");

    label = IIT_label(iit,index,&allocp);
    printf("%s",label);
    if (allocp == true) {
      FREE(label);
    }
    printf("\n");
  }

  return;
}
コード例 #4
0
ファイル: genome-write.c プロジェクト: fabiocpn/GMAP-GSNAP
/* Permits arbitrary ASCII characters.  Useful for storing numeric data */
static void
genome_writeraw_file (FILE *refgenome_fp, FILE *input, 
		      IIT_T contig_iit, IIT_T altstrain_iit, char *fileroot, int index1part) {
  char Buffer[BUFFERSIZE], Reverse[BUFFERSIZE], *segment;
  char *accession, c;
  Genomicpos_T leftposition, rightposition, startposition, endposition, truelength, 
    maxposition = 0, currposition = 0;
  int contigtype;
  int strlength;
  int i;
  bool revcompp;
#ifdef ALTSTRAIN
  int altstrain_index, altstrain_offset;
#endif
  int ncontigs = 0;

  for (i = 0; i < WRITEBLOCK; i++) {
    Empty[i] = 0;
  }

  while (fgets(Buffer,BUFFERSIZE,input) != NULL) {
    if (Buffer[0] != '>') {
      fprintf(stderr,"Expecting to see a new FASTA entry\n");
      fprintf(stderr,"Instead, saw %d (%c)\n",(int) Buffer[0],Buffer[0]);
      exit(9);
    } else {
      /* HEADER */
      accession = parse_accession(Buffer);
      find_positions(&revcompp,&leftposition,&rightposition,&startposition,&endposition,
		     &truelength,&contigtype,accession,contig_iit);
      if (++ncontigs < 50) {
	if (revcompp == true) {
	  fprintf(stderr,"Writing contig %s to universal coordinates %u..%u in genome %s\n",
		  accession,startposition,endposition,fileroot);
	} else {
	  fprintf(stderr,"Writing contig %s to universal coordinates %u..%u in genome %s\n",
		  accession,startposition+1U,endposition+1U,fileroot);
	}
      } else if (ncontigs == 50) {
	fprintf(stderr,"More than 50 contigs.  Will stop printing messages\n");
      }

      if (contigtype > 0) {
#ifdef ALTSTRAIN
	fprintf(stderr," (alternate strain %s)",IIT_typestring(altstrain_iit,contigtype));
#endif
      }
      FREE(accession);

      if (contigtype > 0) {
#ifdef ALTSTRAIN
	/* Initialize file pointer for alternate strain */
	altstrain_index = IIT_get_exact(altstrain_iit,/*divstring*/NULL,leftposition,rightposition,contigtype);
	if (revcompp == true) {
	  altstrain_offset = rightposition + 1U - leftposition;
	} else {
	  altstrain_offset = 0;
	}
	debug(printf("Setting altstrain_offset to be %d\n",altstrain_offset));
#endif
      }

      /* Handles case where true length is greater than provided
         coordinates.  This needs to be after call to IIT_get_exact */
      if (leftposition + truelength - 1U > rightposition) {
	debug(fprintf(stderr,"Extending endposition for truelength of %u\n",truelength));
	rightposition = leftposition + truelength;
	if (revcompp == true) {
	  endposition = startposition - truelength;
	} else {
	  endposition = startposition + truelength;
	}
      }

      /* In both cases, set file pointer for reference strain,
         although we won't write sequence of alternate strain.  For an
         alternate strain, ensure that we fill the reference strain
         with sufficient X's. */
      if (startposition > maxposition) {
	/* Start beyond end of file */
	debug(printf("Filling with zeroes from %u to %u-1\n",maxposition,startposition));
	fill_zero(refgenome_fp,maxposition,startposition,/*uncompressedp*/true,index1part);
	  
	if (contigtype > 0) {
#ifdef ALTSTRAIN
	  fill_zero(refgenome_fp,leftposition,rightposition + 1,/*uncompressedp*/true,index1part);
	  maxposition = currposition = rightposition + 1;
#endif
	} else {
	  maxposition = rightposition;
	  currposition = startposition;
	}

      } else {
	/* Start within file */
	if (contigtype > 0) {
#ifdef ALTSTRAIN
	  if (rightposition + 1 > maxposition) {
	    debug(printf("Filling with zeroes from %u to %u-1\n",maxposition,rightposition+1));
	    fill_zero(refgenome_fp,maxposition,rightposition + 1,/*uncompressedp*/true,index1part);
	    maxposition = currposition = rightposition + 1;
	  }
#endif
	} else {
	  debug(printf("Moving to %u\n",startposition));
	  move_absolute(refgenome_fp,startposition);
	  currposition = startposition;
	}
      }

      /* SEQUENCE */
      fprintf(stderr,"Processing %u characters\n",truelength);
      while (truelength > 0) {
	if (truelength > BUFFERSIZE) {
	  if ((strlength = fread(Buffer,sizeof(char),BUFFERSIZE,input)) < BUFFERSIZE) {
	    fprintf(stderr,"Expecting %u more characters, but saw end of file\n",truelength);
	    exit(9);
	  }
	  truelength -= BUFFERSIZE;
	} else {
	  if ((strlength = fread(Buffer,sizeof(char),truelength,input)) < truelength) {
	    fprintf(stderr,"Expecting %u more characters, but saw end of file\n",truelength);
	    exit(9);
	  }
	  truelength = 0;
	}

	if (revcompp == true) {
	  make_reverse_buffered(Reverse,Buffer,strlength);
	  segment = Reverse;
	} else {
	  segment = Buffer;
	}

	if (contigtype > 0) {
#ifdef ALTSTRAING
	  /* Write alternate strain.  It is likely that IIT commands
	     will fail because they depend on \0 to terminate the segment.  */
	  if (revcompp == true) {
	    altstrain_offset -= strlength;
	    debug(printf("Writing alternate strain at %u\n",altstrain_offset));
	    IIT_backfill_sequence(altstrain_iit,altstrain_index,altstrain_offset,segment);
	  } else {
	    debug(printf("Writing alternate strain at %u\n",altstrain_offset));
	    IIT_backfill_sequence(altstrain_iit,altstrain_index,altstrain_offset,segment);
	    altstrain_offset += strlength;
	  }
#endif
	} else {
	  /* Write reference strain */
	  if (revcompp == true) {
	    debug(printf("Filling with sequence from %u-1 to %u\n",currposition,currposition-strlength));
	    currposition -= strlength;
	    fwrite(segment,sizeof(char),strlength,refgenome_fp);
	  } else {
	    debug(printf("Filling with sequence from %u to %u-1\n",currposition,currposition+strlength));
	    fwrite(segment,sizeof(char),strlength,refgenome_fp);
	    currposition += strlength;
	    if (currposition > maxposition) {
	      maxposition = currposition;
	    }
	  }
	}
      }

      if ((c = fgetc(input)) != EOF && c != '\n') {
	fprintf(stderr,"Expecting linefeed at end of sequence.  Saw %d instead\n",
		c);
	exit(9);
      }

    }
  }

  return;
}