static bool reg_matches(const char *regex, const char *s) { regex_t reg; int res; regcomp_or_die(®, regex, REG_EXTENDED | REG_NOSUB); res = regexec(®, s, 0, NULL, 0); regfree(®); return (res != REG_NOMATCH); }
void diffcore_pickaxe(struct diff_options *o) { const char *needle = o->pickaxe; int opts = o->pickaxe_opts; regex_t regex, *regexp = NULL; kwset_t kws = NULL; if (opts & (DIFF_PICKAXE_REGEX | DIFF_PICKAXE_KIND_G)) { int cflags = REG_EXTENDED | REG_NEWLINE; if (o->pickaxe_opts & DIFF_PICKAXE_IGNORE_CASE) cflags |= REG_ICASE; regcomp_or_die(®ex, needle, cflags); regexp = ®ex; } else if (opts & DIFF_PICKAXE_KIND_S) { if (o->pickaxe_opts & DIFF_PICKAXE_IGNORE_CASE && has_non_ascii(needle)) { struct strbuf sb = STRBUF_INIT; int cflags = REG_NEWLINE | REG_ICASE; basic_regex_quote_buf(&sb, needle); regcomp_or_die(®ex, sb.buf, cflags); strbuf_release(&sb); regexp = ®ex; } else { kws = kwsalloc(o->pickaxe_opts & DIFF_PICKAXE_IGNORE_CASE ? tolower_trans_tbl : NULL); kwsincr(kws, needle, strlen(needle)); kwsprep(kws); } } pickaxe(&diff_queued_diff, o, regexp, kws, (opts & DIFF_PICKAXE_KIND_G) ? diff_grep : has_changes); if (regexp) regfree(regexp); if (kws) kwsfree(kws); return; }
rval_evaluator_t* rval_evaluator_alloc_from_x_se_func(mv_binary_arg2_regextract_func_t* pfunc, rval_evaluator_t* parg1, char* regex_string, int ignore_case) { rval_evaluator_x_se_state_t* pstate = mlr_malloc_or_die(sizeof(rval_evaluator_x_se_state_t)); pstate->pfunc = pfunc; pstate->parg1 = parg1; int cflags = ignore_case ? REG_ICASE : 0; regcomp_or_die(&pstate->regex, regex_string, cflags); rval_evaluator_t* pevaluator = mlr_malloc_or_die(sizeof(rval_evaluator_t)); pevaluator->pvstate = pstate; pevaluator->pprocess_func = rval_evaluator_x_se_func; pevaluator->pfree_func = rval_evaluator_x_se_free; return pevaluator; }
rval_evaluator_t* rval_evaluator_alloc_from_x_srs_func(mv_ternary_arg2_regex_func_t* pfunc, rval_evaluator_t* parg1, char* regex_string, int ignore_case, rval_evaluator_t* parg3) { rval_evaluator_x_srs_state_t* pstate = mlr_malloc_or_die(sizeof(rval_evaluator_x_srs_state_t)); pstate->pfunc = pfunc; pstate->parg1 = parg1; int cflags = ignore_case ? REG_ICASE : 0; regcomp_or_die(&pstate->regex, regex_string, cflags); pstate->psb = sb_alloc(MV_SB_ALLOC_LENGTH); pstate->parg3 = parg3; rval_evaluator_t* pevaluator = mlr_malloc_or_die(sizeof(rval_evaluator_t)); pevaluator->pvstate = pstate; pevaluator->pprocess_func = rval_evaluator_x_srs_func; pevaluator->pfree_func = rval_evaluator_x_srs_free; return pevaluator; }
/* * Load background file frequencies into the array. */ ARRAY_T* get_file_frequencies(ALPH_T *alph, char *bg_filename, ARRAY_T *freqs) { regmatch_t matches[4]; STR_T *line; char chunk[BG_CHUNK_SIZE+1], letter[2], *key; int size, terminate, offset, i; FILE *fp; regex_t bgfreq; double freq; RBTREE_T *letters; RBNODE_T *node; regcomp_or_die("bg freq", &bgfreq, BGFREQ_RE, REG_EXTENDED); letters = rbtree_create(rbtree_strcasecmp, rbtree_strcpy, free, rbtree_dblcpy, free); line = str_create(100); if (!(fp = fopen(bg_filename, "r"))) { die("Unable to open background file \"%s\" for reading.\n", bg_filename); } terminate = feof(fp); while (!terminate) { size = fread(chunk, sizeof(char), BG_CHUNK_SIZE, fp); chunk[size] = '\0'; terminate = feof(fp); offset = 0; while (offset < size) { // skip mac newline if (str_len(line) == 0 && chunk[offset] == '\r') { offset++; continue; } // find next new line for (i = offset; i < size; ++i) { if (chunk[i] == '\n') break; } // append portion up to the new line or end of chunk str_append(line, chunk+offset, i - offset); // read more if we didn't find a new line if (i == size && !terminate) break; // move the offset past the new line offset = i + 1; // handle windows new line if (str_char(line, -1) == '\r') str_truncate(line, -1); // remove everything to the right of a comment character for (i = 0; i < str_len(line); ++i) { if (str_char(line, i) == '#') { str_truncate(line, i); break; } } // check the line for a single letter followed by a number if (regexec_or_die("bg freq", &bgfreq, str_internal(line), 4, matches, 0)) { // parse the letter and frequency value regex_strncpy(matches+1, str_internal(line), letter, 2); freq = regex_dbl(matches+2, str_internal(line)); // check the frequency is acceptable if (freq < 0 || freq > 1) { die("The background file lists the illegal probability %g for " "the letter %s.\n", freq, letter); } else if (freq == 0) { die("The background file lists a probability of zero for the " "letter %s\n", letter); } if (freq >= 0 && freq <= 1) rbtree_put(letters, letter, &freq); } str_clear(line); } } // finished with the file so clean up file parsing stuff fclose(fp); str_destroy(line, FALSE); regfree(&bgfreq); // guess the alphabet if (*alph == INVALID_ALPH) { switch (rbtree_size(letters)) { case PROTEIN_ASIZE: *alph = PROTEIN_ALPH; break; case DNA_ASIZE: *alph = DNA_ALPH; break; default: die("Number of single character entries in background does not match " "an alphabet.\n"); } } // make the background if (freqs == NULL) freqs = allocate_array(alph_size(*alph, ALL_SIZE)); assert(get_array_length(freqs) >= alph_size(*alph, ALL_SIZE)); init_array(-1, freqs); for (node = rbtree_first(letters); node != NULL; node = rbtree_next(node)) { key = (char*)rbtree_key(node); i = alph_index(*alph, key[0]); freq = *((double*)rbtree_value(node)); if (i == -1) { die("Background contains letter %s which is not in the %s alphabet.\n", key, alph_name(*alph)); } if (get_array_item(i, freqs) != -1) { die("Background contains letter %s which has the same meaning as an " "already listed letter.\n", key); } set_array_item(i, freq, freqs); } // check that all items were set for (i = 0; i < alph_size(*alph, ALPH_SIZE); i++) { if (get_array_item(i, freqs) == -1) { die("Background is missing letter %c.\n", alph_char(*alph, i)); } } // disabled for backwards compatability (AMA test was failing) //normalize_subarray(0, ALPH_ASIZE[*alph], 0.0, freqs); // calculate the values of the ambiguous letters from the concrete ones calc_ambigs(*alph, FALSE, freqs); // cleanup rbtree_destroy(letters); // return result return freqs; }