int STRCOLL (const STRING_TYPE *s1, const STRING_TYPE *s2, locale_t l) { struct __locale_data *current = l->__locales[LC_COLLATE]; uint_fast32_t nrules = current->values[_NL_ITEM_INDEX (_NL_COLLATE_NRULES)].word; /* We don't assign the following values right away since it might be unnecessary in case there are no rules. */ const unsigned char *rulesets; const int32_t *table; const USTRING_TYPE *weights; const USTRING_TYPE *extra; const int32_t *indirect; if (nrules == 0) return STRCMP (s1, s2); /* Catch empty strings. */ if (__glibc_unlikely (*s1 == '\0') || __glibc_unlikely (*s2 == '\0')) return (*s1 != '\0') - (*s2 != '\0'); rulesets = (const unsigned char *) current->values[_NL_ITEM_INDEX (_NL_COLLATE_RULESETS)].string; table = (const int32_t *) current->values[_NL_ITEM_INDEX (CONCAT(_NL_COLLATE_TABLE,SUFFIX))].string; weights = (const USTRING_TYPE *) current->values[_NL_ITEM_INDEX (CONCAT(_NL_COLLATE_WEIGHT,SUFFIX))].string; extra = (const USTRING_TYPE *) current->values[_NL_ITEM_INDEX (CONCAT(_NL_COLLATE_EXTRA,SUFFIX))].string; indirect = (const int32_t *) current->values[_NL_ITEM_INDEX (CONCAT(_NL_COLLATE_INDIRECT,SUFFIX))].string; assert (((uintptr_t) table) % __alignof__ (table[0]) == 0); assert (((uintptr_t) weights) % __alignof__ (weights[0]) == 0); assert (((uintptr_t) extra) % __alignof__ (extra[0]) == 0); assert (((uintptr_t) indirect) % __alignof__ (indirect[0]) == 0); int result = 0, rule = 0; coll_seq seq1, seq2; seq1.len = 0; seq1.idxmax = 0; seq1.rule = 0; seq2.len = 0; seq2.idxmax = 0; for (int pass = 0; pass < nrules; ++pass) { seq1.idxcnt = 0; seq1.idx = 0; seq2.idx = 0; seq1.backw_stop = ~0ul; seq1.backw = ~0ul; seq2.idxcnt = 0; seq2.backw_stop = ~0ul; seq2.backw = ~0ul; /* We need the elements of the strings as unsigned values since they are used as indices. */ seq1.us = (const USTRING_TYPE *) s1; seq2.us = (const USTRING_TYPE *) s2; /* We assume that if a rule has defined `position' in one section this is true for all of them. Please note that the localedef programs makes sure that `position' is not used at the first level. */ int position = rulesets[rule * nrules + pass] & sort_position; while (1) { get_next_seq (&seq1, nrules, rulesets, weights, table, extra, indirect, pass); get_next_seq (&seq2, nrules, rulesets, weights, table, extra, indirect, pass); /* See whether any or both strings are empty. */ if (seq1.len == 0 || seq2.len == 0) { if (seq1.len == seq2.len) { /* Both strings ended and are equal at this level. Do a byte-level comparison to ensure that we don't waste time going through multiple passes for totally equal strings before proceeding to subsequent passes. */ if (pass == 0 && STRCMP (s1, s2) == 0) return result; else break; } /* This means one string is shorter than the other. Find out which one and return an appropriate value. */ return seq1.len == 0 ? -1 : 1; } result = do_compare (&seq1, &seq2, position, weights); if (result != 0) return result; } rule = seq1.rule; } return result; }
int main(int argc, char *argv[]) { int count; seq_t seq1, seq2; hash_env_t he; collec_t res, rev_res; #if defined(DEBUG) && (DEBUG > 1) mcheck(NULL); mtrace(); #endif argv0 = argv[0]; if (setlocale(LC_ALL, "POSIX") == NULL) fprintf(stderr, "%s: Warning: could not set locale to POSIX\n", argv[0]); signal(SIGSEGV, bug_handler); #ifndef __MINGW32__ signal(SIGBUS, bug_handler); #endif /* Default options. */ options.C = DEFAULT_C; options.cutoff = DIST_CUTOFF; options.gapPct = DEFAULT_GAPPCT; options.intron_window = 6; options.K = DEFAULT_K; options.splice_type_list = "GTAG,GCAG,GTAC,ATAC"; options.nbSplice = 4; options.scoreSplice_window = 10; options.mismatchScore = MISMATCH; options.reverse = 2; options.matchScore = MATCH; options.W = DEFAULT_W; options.X = DEFAULT_X; options.filterPct = DEFAULT_FILTER; options.minScore_cutoff = MATCH_CUTOFF; while (1) { int c = getopt(argc, argv, "A:C:c:E:f:g:I:K:L:M:o:q:R:r:W:X:"); if (c == -1) break; switch (c) { case 'A': options.ali_flag = atoi(optarg); if (options.ali_flag < 0 || options.ali_flag > 4) fatal("A must be one of 0, 1, 2, 3, or 4.\n"); break; case 'C': { int val = atoi(optarg); if (val < 0) fatal("Value for option C must be non-negative.\n"); options.C = val; break; } case 'c': { int val = atoi(optarg); if (val < 0) fatal("Value for option c must be non-negative.\n"); options.minScore_cutoff = val; break; } case 'E': options.cutoff = atoi(optarg); if (options.cutoff < 3 || options.cutoff > 10) fatal("Cutoff (E) must be within [3,10].\n"); break; case 'f': options.filterPct = atoi(optarg); if (options.filterPct > 100) fatal("Filter in percent (f) must be within [0,100].\n"); break; case 'g': options.gapPct = atoi(optarg); break; case 'I': options.intron_window = atoi(optarg); break; case 'K': { int val = atoi(optarg); if (val < 0) fatal("Value for option K must be non-negative.\n"); options.K = val; break; } case 'L': { size_t i; size_t len = strlen(optarg); options.splice_type_list = optarg; options.nbSplice = 1; if (len % 5 != 4) fatal("Splice types list has illegal length (%zu)\n", len); for (i = 0; i < len; i++) if (i % 5 == 4) { if (options.splice_type_list[i] != ',') fatal("Comma expected instead of %c at position %zu" "in splice types list.\n", options.splice_type_list[i], i); options.nbSplice += 1; } else { if (options.splice_type_list[i] != 'A' && options.splice_type_list[i] != 'C' && options.splice_type_list[i] != 'G' && options.splice_type_list[i] != 'T') fatal("Expected 'A', 'C', 'G' or 'T' instead of '%c' at" "position %zu in splice types list.\n", options.splice_type_list[i], i); } break; } case 'M': { int val = atoi(optarg); if (val < 0) fatal("Value for option M must be non-negative.\n"); options.scoreSplice_window = val; break; } case 'o': options.dnaOffset = atoi(optarg); break; case 'q': options.mismatchScore = atoi(optarg); break; case 'R': options.reverse = atoi(optarg); if (options.reverse < 0 || options.reverse > 2) fatal("R must be one of 0, 1, or 2.\n"); break; case 'r': options.matchScore = atoi(optarg); break; case 'W': options.W = atoi(optarg); if (options.W < 1 || options.W > 15) fatal("W must be within [1,15].\n"); break; case 'X': options.X = atoi(optarg); if (options.X < 1) fatal("X must be positive.\n"); break; case '?': break; default: fprintf(stderr, "?? getopt returned character code 0%o ??\n", c); } } if (optind + 2 != argc) { fprintf(stderr, Usage, argv[0], options.ali_flag, options.C, options.minScore_cutoff, options.cutoff, options.filterPct, options.gapPct, options.intron_window, options.K, options.splice_type_list, options.scoreSplice_window, options.dnaOffset, options.mismatchScore, options.reverse, options.matchScore, options.W, options.X); return 1; } /* read seq1 */ init_seq(argv[optind], &seq1); if (get_next_seq(&seq1, options.dnaOffset, 1) != 0) fatal("Cannot read sequence from %s.\n", argv[optind]); strncpy(dna_seq_head, seq1.header, 256); /* read seq2 */ init_seq(argv[optind + 1], &seq2); if (get_next_seq(&seq2, 0, 0) != 0) fatal("Cannot read sequence from %s.\n", argv[optind + 1]); init_encoding(); init_hash_env(&he, options.W, seq1.seq, seq1.len); init_col(&res, 1); init_col(&rev_res, 1); bld_table(&he); init_splice_junctions(); count = 0; while (!count || get_next_seq(&seq2, 0, 0) == 0) { unsigned int curRes; strncpy(rna_seq_head, seq2.header, 256); ++count; switch (options.reverse) { case 0: SIM4(&he, &seq2, &res); break; case 2: SIM4(&he, &seq2, &res); case 1: seq_revcomp_inplace(&seq2); SIM4(&he, &seq2, &rev_res); break; default: fatal ("Unrecognized request for EST orientation.\n"); } /* Keep only the best matches, according to filterPct. */ if (options.filterPct > 0) { unsigned int max_nmatches = 0; for (curRes = 0; curRes < rev_res.nb; curRes++) { result_p_t r = rev_res.e.result[curRes]; if (r->st.nmatches > max_nmatches) max_nmatches = r->st.nmatches; } for (curRes = 0; curRes < res.nb; curRes++) { result_p_t r = res.e.result[curRes]; if (r->st.nmatches > max_nmatches) max_nmatches = r->st.nmatches; } max_nmatches = (max_nmatches * options.filterPct) / 100; for (curRes = 0; curRes < rev_res.nb; curRes++) { result_p_t r = rev_res.e.result[curRes]; if (r->st.nmatches < max_nmatches) r->st.nmatches = 0; } for (curRes = 0; curRes < res.nb; curRes++) { result_p_t r = res.e.result[curRes]; if (r->st.nmatches < max_nmatches) r->st.nmatches = 0; } } /* Now, print results. */ for (curRes = 0; curRes < rev_res.nb; curRes++) print_res(rev_res.e.result[curRes], 1, &seq1, &seq2); rev_res.nb = 0; if (options.reverse && options.ali_flag) /* reverse-complement back seq2 for alignment */ seq_revcomp_inplace(&seq2); for (curRes = 0; curRes < res.nb; curRes++) print_res(res.e.result[curRes], 0, &seq1, &seq2); res.nb = 0; } #ifdef DEBUG fprintf(stderr, "DEBUG mode: freeing all memory...\n"); fflush(stdout); fflush(stderr); free_hash_env(&he); free_seq(&seq1); free_seq(&seq2); free(options.splice); free(res.e.elt); free(rev_res.e.elt); #endif return 0; }