int seq_read_fasta(SEQ *seq) { int b, c; charvec_t shdr = charvec_INIT(ckrealloc,ckfree); charvec_t sseq = charvec_INIT(ckrealloc,ckfree); if (feof(seq->fp) || ferror(seq->fp)) return 0; if (seq->count > 0) { if (seq->flags & SEQ_IS_SUBRANGE) { return 0; } else { seq->from = 1; seq->slen = -1; /* will be computed below */ } } if (seq->header) ZFREE(seq->header); if (seq->seq) ZFREE(seq->seq); seq->offset = ftell(seq->fp); /* --- header --- */ c = getnwc(seq->fp); if (c == '>') { while (c != '\n' && c != EOF) { char_append(&shdr, c); c = getc(seq->fp); } } else { un_getc(c, seq->fp); } if (ferror(seq->fp)) Fatalfr("seq_read(%s)", seq->fname); char_append(&shdr, 0); seq->header = shdr.a; seq->hlen = shdr.len; /* --- seq --- */ b = '\n'; c = getnwc(seq->fp); while ((c != EOF) && !(b == '\n' && c == '>')) { switch (nfasta_ctype[c]) { case Nfasta_nt: char_append(&sseq, c); break; case Nfasta_ws: /* skip space */ break; case Nfasta_amb: if (seq->flags & SEQ_ALLOW_AMB) { char_append(&sseq, c); break; } /* FALLTHRU */ default: fatalf("non-DNA character '%c' in sequence '%s'", c, seq->fname); break; } b = c; c = getc(seq->fp); } un_getc(c, seq->fp); if (ferror(seq->fp)) Fatalfr("seq_read(%s)", seq->fname); /* check conformance */ if (SEQ_LEN(seq) == -1) { char_append(&sseq, 0); charvec_fit(&sseq); seq->seq = (uchar*)sseq.a; seq->slen = sseq.len; if (seq->slen > 0) --seq->slen; /* don't include '\0' */ } else { charvec_t ssub = charvec_INIT(ckrealloc,ckfree); int i; if (SEQ_FROM(seq) < 1 || (int)sseq.len < SEQ_FROM(seq) || SEQ_TO(seq) < 1 || (int)sseq.len < SEQ_TO(seq) || SEQ_TO(seq) < SEQ_FROM(seq)) fatalf("range [%d,%d] incommensurate with sequence [%d,%d]", SEQ_FROM(seq), SEQ_TO(seq), 1, sseq.len); for (i = SEQ_FROM(seq); i <= SEQ_TO(seq); ++i) char_append(&ssub, sseq.a[i-1]); char_append(&ssub, 0); charvec_fini(&sseq); seq->seq = (uchar*)ssub.a; } seq->flags = seq->flags &~ SEQ_IS_REVCOMP; if (seq->flags & SEQ_DO_REVCOMP) { (void)seq_revcomp_inplace(seq); } if (seq->flags & SEQ_HAS_MASK) { (void)seq_mask_inplace(seq); } seq->count++; return 1; }
int main(int argc, char *argv[]) { int count; seq_t seq1, seq2; hash_env_t he; collec_t res, rev_res; #if defined(DEBUG) && (DEBUG > 1) mcheck(NULL); mtrace(); #endif argv0 = argv[0]; if (setlocale(LC_ALL, "POSIX") == NULL) fprintf(stderr, "%s: Warning: could not set locale to POSIX\n", argv[0]); signal(SIGSEGV, bug_handler); #ifndef __MINGW32__ signal(SIGBUS, bug_handler); #endif /* Default options. */ options.C = DEFAULT_C; options.cutoff = DIST_CUTOFF; options.gapPct = DEFAULT_GAPPCT; options.intron_window = 6; options.K = DEFAULT_K; options.splice_type_list = "GTAG,GCAG,GTAC,ATAC"; options.nbSplice = 4; options.scoreSplice_window = 10; options.mismatchScore = MISMATCH; options.reverse = 2; options.matchScore = MATCH; options.W = DEFAULT_W; options.X = DEFAULT_X; options.filterPct = DEFAULT_FILTER; options.minScore_cutoff = MATCH_CUTOFF; while (1) { int c = getopt(argc, argv, "A:C:c:E:f:g:I:K:L:M:o:q:R:r:W:X:"); if (c == -1) break; switch (c) { case 'A': options.ali_flag = atoi(optarg); if (options.ali_flag < 0 || options.ali_flag > 4) fatal("A must be one of 0, 1, 2, 3, or 4.\n"); break; case 'C': { int val = atoi(optarg); if (val < 0) fatal("Value for option C must be non-negative.\n"); options.C = val; break; } case 'c': { int val = atoi(optarg); if (val < 0) fatal("Value for option c must be non-negative.\n"); options.minScore_cutoff = val; break; } case 'E': options.cutoff = atoi(optarg); if (options.cutoff < 3 || options.cutoff > 10) fatal("Cutoff (E) must be within [3,10].\n"); break; case 'f': options.filterPct = atoi(optarg); if (options.filterPct > 100) fatal("Filter in percent (f) must be within [0,100].\n"); break; case 'g': options.gapPct = atoi(optarg); break; case 'I': options.intron_window = atoi(optarg); break; case 'K': { int val = atoi(optarg); if (val < 0) fatal("Value for option K must be non-negative.\n"); options.K = val; break; } case 'L': { size_t i; size_t len = strlen(optarg); options.splice_type_list = optarg; options.nbSplice = 1; if (len % 5 != 4) fatal("Splice types list has illegal length (%zu)\n", len); for (i = 0; i < len; i++) if (i % 5 == 4) { if (options.splice_type_list[i] != ',') fatal("Comma expected instead of %c at position %zu" "in splice types list.\n", options.splice_type_list[i], i); options.nbSplice += 1; } else { if (options.splice_type_list[i] != 'A' && options.splice_type_list[i] != 'C' && options.splice_type_list[i] != 'G' && options.splice_type_list[i] != 'T') fatal("Expected 'A', 'C', 'G' or 'T' instead of '%c' at" "position %zu in splice types list.\n", options.splice_type_list[i], i); } break; } case 'M': { int val = atoi(optarg); if (val < 0) fatal("Value for option M must be non-negative.\n"); options.scoreSplice_window = val; break; } case 'o': options.dnaOffset = atoi(optarg); break; case 'q': options.mismatchScore = atoi(optarg); break; case 'R': options.reverse = atoi(optarg); if (options.reverse < 0 || options.reverse > 2) fatal("R must be one of 0, 1, or 2.\n"); break; case 'r': options.matchScore = atoi(optarg); break; case 'W': options.W = atoi(optarg); if (options.W < 1 || options.W > 15) fatal("W must be within [1,15].\n"); break; case 'X': options.X = atoi(optarg); if (options.X < 1) fatal("X must be positive.\n"); break; case '?': break; default: fprintf(stderr, "?? getopt returned character code 0%o ??\n", c); } } if (optind + 2 != argc) { fprintf(stderr, Usage, argv[0], options.ali_flag, options.C, options.minScore_cutoff, options.cutoff, options.filterPct, options.gapPct, options.intron_window, options.K, options.splice_type_list, options.scoreSplice_window, options.dnaOffset, options.mismatchScore, options.reverse, options.matchScore, options.W, options.X); return 1; } /* read seq1 */ init_seq(argv[optind], &seq1); if (get_next_seq(&seq1, options.dnaOffset, 1) != 0) fatal("Cannot read sequence from %s.\n", argv[optind]); strncpy(dna_seq_head, seq1.header, 256); /* read seq2 */ init_seq(argv[optind + 1], &seq2); if (get_next_seq(&seq2, 0, 0) != 0) fatal("Cannot read sequence from %s.\n", argv[optind + 1]); init_encoding(); init_hash_env(&he, options.W, seq1.seq, seq1.len); init_col(&res, 1); init_col(&rev_res, 1); bld_table(&he); init_splice_junctions(); count = 0; while (!count || get_next_seq(&seq2, 0, 0) == 0) { unsigned int curRes; strncpy(rna_seq_head, seq2.header, 256); ++count; switch (options.reverse) { case 0: SIM4(&he, &seq2, &res); break; case 2: SIM4(&he, &seq2, &res); case 1: seq_revcomp_inplace(&seq2); SIM4(&he, &seq2, &rev_res); break; default: fatal ("Unrecognized request for EST orientation.\n"); } /* Keep only the best matches, according to filterPct. */ if (options.filterPct > 0) { unsigned int max_nmatches = 0; for (curRes = 0; curRes < rev_res.nb; curRes++) { result_p_t r = rev_res.e.result[curRes]; if (r->st.nmatches > max_nmatches) max_nmatches = r->st.nmatches; } for (curRes = 0; curRes < res.nb; curRes++) { result_p_t r = res.e.result[curRes]; if (r->st.nmatches > max_nmatches) max_nmatches = r->st.nmatches; } max_nmatches = (max_nmatches * options.filterPct) / 100; for (curRes = 0; curRes < rev_res.nb; curRes++) { result_p_t r = rev_res.e.result[curRes]; if (r->st.nmatches < max_nmatches) r->st.nmatches = 0; } for (curRes = 0; curRes < res.nb; curRes++) { result_p_t r = res.e.result[curRes]; if (r->st.nmatches < max_nmatches) r->st.nmatches = 0; } } /* Now, print results. */ for (curRes = 0; curRes < rev_res.nb; curRes++) print_res(rev_res.e.result[curRes], 1, &seq1, &seq2); rev_res.nb = 0; if (options.reverse && options.ali_flag) /* reverse-complement back seq2 for alignment */ seq_revcomp_inplace(&seq2); for (curRes = 0; curRes < res.nb; curRes++) print_res(res.e.result[curRes], 0, &seq1, &seq2); res.nb = 0; } #ifdef DEBUG fprintf(stderr, "DEBUG mode: freeing all memory...\n"); fflush(stdout); fflush(stderr); free_hash_env(&he); free_seq(&seq1); free_seq(&seq2); free(options.splice); free(res.e.elt); free(rev_res.e.elt); #endif return 0; }