Exemple #1
0
int seq_read_fasta(SEQ *seq)
{
	int b, c;
	charvec_t shdr = charvec_INIT(ckrealloc,ckfree);
	charvec_t sseq = charvec_INIT(ckrealloc,ckfree);

	if (feof(seq->fp) || ferror(seq->fp))
		return 0;

	if (seq->count > 0) { 
		if (seq->flags & SEQ_IS_SUBRANGE) {
			return 0;
		} else {
			seq->from = 1;
			seq->slen = -1; /* will be computed below */
		}
	}

	if (seq->header) ZFREE(seq->header);
	if (seq->seq) ZFREE(seq->seq);

	seq->offset = ftell(seq->fp);

	/* --- header --- */
	c = getnwc(seq->fp);
	if (c == '>') {
		while (c != '\n' && c != EOF) {
			char_append(&shdr, c);
			c = getc(seq->fp);
		}
	} else {
		un_getc(c, seq->fp);
	}
	if (ferror(seq->fp))
		Fatalfr("seq_read(%s)", seq->fname);
	char_append(&shdr, 0);
	seq->header = shdr.a;
	seq->hlen = shdr.len;

	/* --- seq --- */
	b = '\n';
	c = getnwc(seq->fp);
	while ((c != EOF) && !(b == '\n' && c == '>')) {
		switch (nfasta_ctype[c]) {
		case Nfasta_nt:
			char_append(&sseq, c);
			break;
		case Nfasta_ws:
			/* skip space */
			break;
		case Nfasta_amb:
			if (seq->flags & SEQ_ALLOW_AMB) {
				char_append(&sseq, c);
				break;
			}
			/* FALLTHRU */
		default:
			fatalf("non-DNA character '%c' in sequence '%s'",
			  c, seq->fname);
			break;
		}
		b = c;
		c = getc(seq->fp);
	}
	un_getc(c, seq->fp);
	if (ferror(seq->fp))
		Fatalfr("seq_read(%s)", seq->fname);

	/* check conformance */
	if (SEQ_LEN(seq) == -1) {
		char_append(&sseq, 0);
		charvec_fit(&sseq);
		seq->seq = (uchar*)sseq.a;
		seq->slen = sseq.len;
		if (seq->slen > 0) --seq->slen;  /* don't include '\0' */
	} else {
		charvec_t ssub = charvec_INIT(ckrealloc,ckfree);
		int i;

		if (SEQ_FROM(seq) < 1 ||
		    (int)sseq.len < SEQ_FROM(seq) ||
		    SEQ_TO(seq) < 1 ||
		    (int)sseq.len < SEQ_TO(seq) ||
		    SEQ_TO(seq) < SEQ_FROM(seq))
		    fatalf("range [%d,%d] incommensurate with sequence [%d,%d]",
				SEQ_FROM(seq), SEQ_TO(seq), 1, sseq.len);
		
		for (i = SEQ_FROM(seq); i <= SEQ_TO(seq); ++i)
			char_append(&ssub, sseq.a[i-1]);
		char_append(&ssub, 0);
		charvec_fini(&sseq);
		seq->seq = (uchar*)ssub.a;
	}

	seq->flags = seq->flags &~ SEQ_IS_REVCOMP;
        if (seq->flags & SEQ_DO_REVCOMP) {
	    (void)seq_revcomp_inplace(seq);
	}
	if (seq->flags & SEQ_HAS_MASK) {
	    (void)seq_mask_inplace(seq);
	}

	seq->count++;
	return 1;
}
Exemple #2
0
int
main(int argc, char *argv[])
{
  int count;
  seq_t seq1, seq2;
  hash_env_t he;
  collec_t res, rev_res;
#if defined(DEBUG) && (DEBUG > 1)
  mcheck(NULL);
  mtrace();
#endif
  argv0 = argv[0];
  if (setlocale(LC_ALL, "POSIX") == NULL)
    fprintf(stderr, "%s: Warning: could not set locale to POSIX\n", argv[0]);
  signal(SIGSEGV, bug_handler);
#ifndef __MINGW32__  
  signal(SIGBUS, bug_handler);
#endif  
  /* Default options.  */
  options.C = DEFAULT_C;
  options.cutoff = DIST_CUTOFF;
  options.gapPct = DEFAULT_GAPPCT;
  options.intron_window = 6;
  options.K = DEFAULT_K;
  options.splice_type_list = "GTAG,GCAG,GTAC,ATAC";
  options.nbSplice = 4;
  options.scoreSplice_window = 10;
  options.mismatchScore = MISMATCH;
  options.reverse = 2;
  options.matchScore = MATCH;
  options.W = DEFAULT_W;
  options.X = DEFAULT_X;
  options.filterPct = DEFAULT_FILTER;
  options.minScore_cutoff = MATCH_CUTOFF;
  while (1) {
    int c = getopt(argc, argv, "A:C:c:E:f:g:I:K:L:M:o:q:R:r:W:X:");
    if (c == -1)
      break;
    switch (c) {
    case 'A':
      options.ali_flag = atoi(optarg);
      if (options.ali_flag < 0 || options.ali_flag > 4)
	fatal("A must be one of 0, 1, 2, 3, or 4.\n");
      break;
    case 'C': {
      int val = atoi(optarg);
      if (val < 0)
	fatal("Value for option C must be non-negative.\n");
      options.C = val;
      break;
    }
    case 'c': {
      int val = atoi(optarg);
      if (val < 0)
	fatal("Value for option c must be non-negative.\n");
      options.minScore_cutoff = val;
      break;
    }
    case 'E':
      options.cutoff = atoi(optarg);
      if (options.cutoff < 3 || options.cutoff > 10)
	fatal("Cutoff (E) must be within [3,10].\n");
      break;
    case 'f':
      options.filterPct = atoi(optarg);
      if (options.filterPct > 100)
	fatal("Filter in percent (f) must be within [0,100].\n");
      break;
    case 'g':
      options.gapPct = atoi(optarg);
      break;
    case 'I':
      options.intron_window = atoi(optarg);
      break;
    case 'K': {
      int val = atoi(optarg);
      if (val < 0)
	fatal("Value for option K must be non-negative.\n");
      options.K = val;
      break;
    }
    case 'L': {
      size_t i;
      size_t len = strlen(optarg);
      options.splice_type_list = optarg;
      options.nbSplice = 1;
      if (len % 5 != 4)
	fatal("Splice types list has illegal length (%zu)\n", len);
      for (i = 0; i < len; i++)
	if (i % 5 == 4) {
	  if (options.splice_type_list[i] != ',')
	    fatal("Comma expected instead of %c at position %zu"
		  "in splice types list.\n",
		  options.splice_type_list[i], i);
	  options.nbSplice += 1;
	} else {
	  if (options.splice_type_list[i] != 'A'
	      && options.splice_type_list[i] != 'C'
	      && options.splice_type_list[i] != 'G'
	      && options.splice_type_list[i] != 'T')
	    fatal("Expected 'A', 'C', 'G' or 'T' instead of '%c' at"
		  "position %zu in splice types list.\n",
		  options.splice_type_list[i], i);
	}
      break;
    }
    case 'M': {
      int val = atoi(optarg);
      if (val < 0)
	fatal("Value for option M must be non-negative.\n");
      options.scoreSplice_window = val;
      break;
    }
    case 'o':
      options.dnaOffset = atoi(optarg);
      break;
    case 'q':
      options.mismatchScore = atoi(optarg);
      break;
    case 'R':
      options.reverse = atoi(optarg);
      if (options.reverse < 0 || options.reverse > 2)
	fatal("R must be one of 0, 1, or 2.\n");
      break;
    case 'r':
      options.matchScore = atoi(optarg);
      break;
    case 'W':
      options.W = atoi(optarg);
      if (options.W < 1 || options.W > 15)
	fatal("W must be within [1,15].\n");
      break;
    case 'X':
      options.X = atoi(optarg);
      if (options.X < 1)
	fatal("X must be positive.\n");
      break;
    case '?':
      break;
    default:
      fprintf(stderr, "?? getopt returned character code 0%o ??\n", c);
    }
  }
  if (optind + 2 != argc) {
    fprintf(stderr, Usage, argv[0], options.ali_flag, options.C,
	    options.minScore_cutoff, options.cutoff,
	    options.filterPct, options.gapPct, options.intron_window,
	    options.K, options.splice_type_list, options.scoreSplice_window,
	    options.dnaOffset, options.mismatchScore, options.reverse,
	    options.matchScore, options.W, options.X);
    return 1;
  }

  /* read seq1 */
  init_seq(argv[optind], &seq1);
  if (get_next_seq(&seq1, options.dnaOffset, 1) != 0)
    fatal("Cannot read sequence from %s.\n", argv[optind]);
  strncpy(dna_seq_head, seq1.header, 256);

  /* read seq2 */
  init_seq(argv[optind + 1], &seq2);
  if (get_next_seq(&seq2, 0, 0) != 0)
    fatal("Cannot read sequence from %s.\n", argv[optind + 1]);

  init_encoding();
  init_hash_env(&he, options.W, seq1.seq, seq1.len);
  init_col(&res, 1);
  init_col(&rev_res, 1);
  bld_table(&he);
  init_splice_junctions();

  count = 0;
  while (!count || get_next_seq(&seq2, 0, 0) == 0) {
    unsigned int curRes;
    strncpy(rna_seq_head, seq2.header, 256);
    ++count;

    switch (options.reverse) {
    case  0:
      SIM4(&he, &seq2, &res);
      break;
    case  2:
      SIM4(&he, &seq2, &res);
    case  1:
      seq_revcomp_inplace(&seq2);
      SIM4(&he, &seq2, &rev_res);
      break;
    default:
      fatal ("Unrecognized request for EST orientation.\n");
    }
    /* Keep only the best matches, according to filterPct.  */
    if (options.filterPct > 0) {
      unsigned int max_nmatches = 0;
      for (curRes = 0; curRes < rev_res.nb; curRes++) {
	result_p_t r = rev_res.e.result[curRes];
	if (r->st.nmatches > max_nmatches)
	  max_nmatches = r->st.nmatches;
      }
      for (curRes = 0; curRes < res.nb; curRes++) {
	result_p_t r = res.e.result[curRes];
	if (r->st.nmatches > max_nmatches)
	  max_nmatches = r->st.nmatches;
      }
      max_nmatches = (max_nmatches * options.filterPct) / 100;
      for (curRes = 0; curRes < rev_res.nb; curRes++) {
	result_p_t r = rev_res.e.result[curRes];
	if (r->st.nmatches < max_nmatches)
	  r->st.nmatches = 0;
      }
      for (curRes = 0; curRes < res.nb; curRes++) {
	result_p_t r = res.e.result[curRes];
	if (r->st.nmatches < max_nmatches)
	  r->st.nmatches = 0;
      }
    }
    /* Now, print results.  */
    for (curRes = 0; curRes < rev_res.nb; curRes++)
      print_res(rev_res.e.result[curRes], 1, &seq1, &seq2);
    rev_res.nb = 0;
    if (options.reverse && options.ali_flag)
      /* reverse-complement back seq2 for alignment */
      seq_revcomp_inplace(&seq2);
    for (curRes = 0; curRes < res.nb; curRes++)
      print_res(res.e.result[curRes], 0, &seq1, &seq2);
    res.nb = 0;
  }
#ifdef DEBUG
  fprintf(stderr, "DEBUG mode: freeing all memory...\n");
  fflush(stdout);
  fflush(stderr);
  free_hash_env(&he);
  free_seq(&seq1);
  free_seq(&seq2);
  free(options.splice);
  free(res.e.elt);
  free(rev_res.e.elt);
#endif
  return 0;
}