int gt_grep(GT_UNUSED bool *match, GT_UNUSED const char *pattern, GT_UNUSED const char *line, GtError *err) { regex_t matcher; int rval, had_err = 0; gt_error_check(err); gt_assert(pattern && line); if ((rval = tre_regcomp(&matcher, pattern, REG_EXTENDED | REG_NOSUB))) { grep_error(rval, &matcher, err); had_err = -1; } if (!had_err) { rval = tre_regexec(&matcher, line, 0, NULL, 0); if (rval && rval != REG_NOMATCH) { grep_error(rval, &matcher, err); had_err = -1; } } tre_regfree(&matcher); if (!had_err) { if (rval) *match = false; else *match = true; } return had_err; }
void setupPrimarySecondary(char *primary, char *secondary, int bidirectional) { if (number_secondaries == 0 || secondary_compares == NULL) { secondary_compares = malloc(sizeof(secondaries_t)); } else { secondary_compares = realloc(secondary_compares, sizeof(secondaries_t) * (number_secondaries + 1)); } if (tre_regcomp(&secondary_compares[number_secondaries].primary_regex, primary, REG_EXTENDED | REG_ICASE) != 0 || tre_regcomp(&secondary_compares[number_secondaries].secondary_regex, secondary, REG_EXTENDED | REG_ICASE) != 0) { tre_regfree(&secondary_compares[number_secondaries].primary_regex); tre_regfree(&secondary_compares[number_secondaries].secondary_regex); number_secondaries--; ci_debug_printf(1, "Invalid REGEX In Setting parameter (PRIMARY_CATEGORY_REGEX: %s SECONDARY_CATEGORY_REGEX: %s BIDIRECTIONAL: %s)\n", primary, secondary, bidirectional ? "TRUE" : "FALSE" ); exit(-1); } secondary_compares[number_secondaries].bidirectional = bidirectional; number_secondaries++; }
void input_regex_callback(Fl_Widget*, void*) { if(input_regex->size() == 0) { return; } buf_out->text(""); regex_t regex; if(tre_regcomp(®ex, input_regex->value(), REG_EXTENDED|REG_NEWLINE) != 0) { buf_out->append("regex is malformed\n"); return; } char *text = buf_test->text(); do_test_match(®ex, text, buf_test->length()); do_test_find(®ex, text, buf_test->length()); tre_regfree(®ex); free(text); }
/* Run one test with tre_reguexec */ static void test_reguexec(const char *str, const char *regex) { regex_t preg; tre_str_source *source; regmatch_t pmatch[5]; source = make_str_source(str); if (!source) return; tre_regcomp(&preg, regex, REG_EXTENDED); if (tre_reguexec(&preg, source, elementsof(pmatch), pmatch, 0) == 0) printf("Match: %d - %d\n", (int)pmatch[0].rm_so, (int)pmatch[0].rm_eo); free_str_source(source); tre_regfree(&preg); }
int main(int argc, char **argv) { regex_t preg; regmatch_t *pmatch = NULL; int i, subject_len, match_len, sub_len, errcode, ret = 1, match_count, done; char errbuf[256], *subject, *regex, *match_start, *sub; if( argc < 3 ) { printf("Usage: %s [subject] [regex]\n", argv[0]); return 0; } subject = strdup(argv[1]); subject_len = strlen(subject); regex = strdup(argv[2]); /* compile the regex */ errcode = tre_regcomp(&preg, regex, REG_EXTENDED); if(errcode) { tre_regerror(errcode, &preg, errbuf, sizeof(errbuf)); fprintf(stderr, "Error: regcomp() %s\n", errbuf); goto err; } /* allocate space for match and submatches */ pmatch = (regmatch_t*)calloc(sizeof(regmatch_t), preg.re_nsub+1); if(pmatch == NULL) { perror("Error calloc()"); goto err; } /* find all matches */ sub = subject; sub_len = subject_len; match_count = 0; done = 0; while(!done) { /* execute regex */ errcode = tre_regnexec(&preg, sub, sub_len, preg.re_nsub+1, pmatch, 0); switch(errcode) { case REG_OK: match_count++; /* loop through subpattern matches */ for(i=0; i < preg.re_nsub+1; i++) { match_len = pmatch[i].rm_eo - pmatch[i].rm_so; match_start = sub + pmatch[i].rm_so; if(i == 0) printf("Match [%d] %.*s\n", match_count, match_len, match_start); else printf(":: group [%d] %.*s\n", i, match_len, match_start); } /* set up for next regexec call */ sub_len -= pmatch[0].rm_eo; assert(sub_len >= 0); /* entire subject consumed */ if(sub_len == 0) { done = 1; break; } /* start next match were we left off */ sub += pmatch[0].rm_eo; break; case REG_NOMATCH: if(match_count == 0) puts("No matches found."); done = 1; break; case REG_ESPACE: fprintf(stderr,"Error: Out of memory.\n"); goto err; default: /* should not be here, abort */ assert(0); } } /* success */ ret = 0; err: /* cleanup */ if(regex) free(regex); if(subject) free(subject); if(&preg) tre_regfree(&preg); if(pmatch) free(pmatch); return ret; }
/* M A I N *******************************************************************/ int main(int argc, char *argv[]) { int opt_index; opt_index = process_options(argc, argv); if (opt_index >= argc) { fprintf(stderr, "%s : %s\n", PRG_NAME, "[err] specify a record string to process!"); exit(1); } if (argv[1]) strncpy (record, argv[opt_index], MAX_CHAR); if ( strlen(record) == 0 ) { fprintf(stdout, "Please enter a valid string to search pattern for!\n"); fprintf(stdout, "Usage: %s %s %s %s\n", PRG_NAME, "[COSTS]", "[PATTERN]", "[STRING]"); exit(1); } /* Testing patterns */ // char record[] = "ATTTACTATGTAAAGATAGAAGGAATAAGGTGAAG"; // char regexp[] = "GATTT"; // char regexp[] = "GTAAAGA"; // char regexp[] = "(G|T)A*GAT"; int comp_flags = REG_EXTENDED | REG_ICASE ; static regex_t preg; /* Compiled pattern to search for. */ static regaparams_t match_params; /* regexp matching parameters */ int errcode; regmatch_t pmatch = { 0, 0 }; /* matched pattern structure */ regamatch_t match; /* overall match structure */ memset(&match, 0, sizeof(match)); /* initialize the overall match struct */ match.pmatch = &pmatch; /* assign default pattern structure */ match.nmatch = 1; /* initialization of pmatch array */ /* setup the default match parameters */ tre_regaparams_default(&match_params); /* Set the maximum number of errors allowed for a record to match. */ match_params.max_cost = max_cost; match_params.cost_ins = cost_ins; match_params.cost_del = cost_del; match_params.cost_subst = cost_subs; fprintf(stdout, "Default regex params set by TRE:\n"); fprintf(stdout, "\t%-12s : %4d\n", "cost_ins", match_params.cost_ins); fprintf(stdout, "\t%-12s : %4d\n", "cost_del", match_params.cost_del); fprintf(stdout, "\t%-12s : %4d\n", "cost_substr", match_params.cost_subst); fprintf(stdout, "\t%-12s : %4d\n", "max_cost", match_params.max_cost); fprintf(stdout, "\t%-12s : %4d\n", "max_ins", match_params.max_ins); fprintf(stdout, "\t%-12s : %4d\n", "max_del", match_params.max_del); fprintf(stdout, "\t%-12s : %4d\n", "max_subst", match_params.max_subst); fprintf(stdout, "\t%-12s : %4d\n", "max_err", match_params.max_err); fprintf(stdout, "\n\n"); /* Step 1: compile the regex */ errcode = tre_regcomp(&preg, regexp, comp_flags); if (errcode) { char errbuf[256]; tre_regerror(errcode, &preg, errbuf, sizeof(errbuf)); fprintf(stderr, "%s: %s: %s\n", PRG_NAME, "Error in search pattern", errbuf); exit(1); } // if (tre_regexec(&delim, "", 0, NULL, 0) == REG_OK) // { // fprintf(stderr, "%s: %s\n", PRG_NAME, // "Record delimiter pattern must not match an empty string"); // exit(1); // } /* Step 2: search for the pattern in the haystack */ errcode = tre_regaexec(&preg, record, &match, match_params, 0); if (errcode == REG_OK) { fprintf(stdout, "Found match!\n"); fprintf(stdout, "\t%10s : %s\n", "record", record); fprintf(stdout, "\t%10s : %s\n", "pattern", regexp); fprintf(stdout, "\t%10s : %4d\n", "cost", match.cost); fprintf(stdout, "\t%10s : %4d\n", "num_ins", match.num_ins); fprintf(stdout, "\t%10s : %4d\n", "num_del", match.num_del); fprintf(stdout, "\t%10s : %4d\n", "num_subst", match.num_subst); fprintf(stdout, "\t%10s : %2d - %2d\n", "char pos", pmatch.rm_so, pmatch.rm_eo); } else { fprintf(stdout, "Found no matches!\n"); fprintf(stdout, "%6s : %s\n", "regexp", regexp); fprintf(stdout, "%6s : %s\n", "record", record); } return 0; }
SEXP attribute_hidden do_readDCF(SEXP call, SEXP op, SEXP args, SEXP env) { int nwhat, nret, nc, nr, m, k, lastm, need; Rboolean blank_skip, field_skip = FALSE; int whatlen, dynwhat, buflen = 8096; // was 100, but that re-alloced often char *line, *buf; regex_t blankline, contline, trailblank, regline, eblankline; regmatch_t regmatch[1]; SEXP file, what, what2, retval, retval2, dims, dimnames; Rconnection con = NULL; Rboolean wasopen, is_eblankline; RCNTXT cntxt; SEXP fold_excludes; Rboolean field_fold = TRUE, has_fold_excludes; const char *field_name; int offset = 0; /* -Wall */ checkArity(op, args); file = CAR(args); con = getConnection(asInteger(file)); wasopen = con->isopen; if(!wasopen) { if(!con->open(con)) error(_("cannot open the connection")); /* Set up a context which will close the connection on error */ begincontext(&cntxt, CTXT_CCODE, R_NilValue, R_BaseEnv, R_BaseEnv, R_NilValue, R_NilValue); cntxt.cend = &con_cleanup; cntxt.cenddata = con; } if(!con->canread) error(_("cannot read from this connection")); args = CDR(args); PROTECT(what = coerceVector(CAR(args), STRSXP)); /* argument fields */ nwhat = LENGTH(what); dynwhat = (nwhat == 0); args = CDR(args); PROTECT(fold_excludes = coerceVector(CAR(args), STRSXP)); has_fold_excludes = (LENGTH(fold_excludes) > 0); buf = (char *) malloc(buflen); if(!buf) error(_("could not allocate memory for 'read.dcf'")); nret = 20; /* it is easier if we first have a record per column */ PROTECT(retval = allocMatrixNA(STRSXP, LENGTH(what), nret)); /* These used to use [:blank:] but that can match \xa0 as part of a UTF-8 character (and is nbspace on Windows). */ tre_regcomp(&blankline, "^[[:blank:]]*$", REG_NOSUB & REG_EXTENDED); tre_regcomp(&trailblank, "[ \t]+$", REG_EXTENDED); tre_regcomp(&contline, "^[[:blank:]]+", REG_EXTENDED); tre_regcomp(®line, "^[^:]+:[[:blank:]]*", REG_EXTENDED); tre_regcomp(&eblankline, "^[[:space:]]+\\.[[:space:]]*$", REG_EXTENDED); k = 0; lastm = -1; /* index of the field currently being recorded */ blank_skip = TRUE; void *vmax = vmaxget(); while((line = Rconn_getline2(con))) { if(strlen(line) == 0 || tre_regexecb(&blankline, line, 0, 0, 0) == 0) { /* A blank line. The first one after a record ends a new * record, subsequent ones are skipped */ if(!blank_skip) { k++; if(k > nret - 1){ nret *= 2; PROTECT(retval2 = allocMatrixNA(STRSXP, LENGTH(what), nret)); transferVector(retval2, retval); UNPROTECT_PTR(retval); retval = retval2; } blank_skip = TRUE; lastm = -1; field_skip = FALSE; field_fold = TRUE; } } else { blank_skip = FALSE; if(tre_regexecb(&contline, line, 1, regmatch, 0) == 0) { /* A continuation line: wrong if at the beginning of a record. */ if((lastm == -1) && !field_skip) { line[20] = '\0'; error(_("Found continuation line starting '%s ...' at begin of record."), line); } if(lastm >= 0) { need = (int) strlen(CHAR(STRING_ELT(retval, lastm + nwhat * k))) + 2; if(tre_regexecb(&eblankline, line, 0, NULL, 0) == 0) { is_eblankline = TRUE; } else { is_eblankline = FALSE; if(field_fold) { offset = regmatch[0].rm_eo; /* Also remove trailing whitespace. */ if((tre_regexecb(&trailblank, line, 1, regmatch, 0) == 0)) line[regmatch[0].rm_so] = '\0'; } else { offset = 0; } need += (int) strlen(line + offset); } if(buflen < need) { char *tmp = (char *) realloc(buf, need); if(!tmp) { free(buf); error(_("could not allocate memory for 'read.dcf'")); } else buf = tmp; buflen = need; } strcpy(buf,CHAR(STRING_ELT(retval, lastm + nwhat * k))); strcat(buf, "\n"); if(!is_eblankline) strcat(buf, line + offset); SET_STRING_ELT(retval, lastm + nwhat * k, mkChar(buf)); } } else { if(tre_regexecb(®line, line, 1, regmatch, 0) == 0) { for(m = 0; m < nwhat; m++){ whatlen = (int) strlen(CHAR(STRING_ELT(what, m))); if(strlen(line) > whatlen && line[whatlen] == ':' && strncmp(CHAR(STRING_ELT(what, m)), line, whatlen) == 0) { /* An already known field we are recording. */ lastm = m; field_skip = FALSE; field_name = CHAR(STRING_ELT(what, lastm)); if(has_fold_excludes) { field_fold = field_is_foldable_p(field_name, fold_excludes); } if(field_fold) { offset = regmatch[0].rm_eo; /* Also remove trailing whitespace. */ if((tre_regexecb(&trailblank, line, 1, regmatch, 0) == 0)) line[regmatch[0].rm_so] = '\0'; } else { offset = 0; } SET_STRING_ELT(retval, m + nwhat * k, mkChar(line + offset)); break; } else { /* This is a field, but not one prespecified */ lastm = -1; field_skip = TRUE; } } if(dynwhat && (lastm == -1)) { /* A previously unseen field and we are * recording all fields */ field_skip = FALSE; PROTECT(what2 = allocVector(STRSXP, nwhat+1)); PROTECT(retval2 = allocMatrixNA(STRSXP, nrows(retval)+1, ncols(retval))); if(nwhat > 0) { copyVector(what2, what); for(nr = 0; nr < nrows(retval); nr++){ for(nc = 0; nc < ncols(retval); nc++){ SET_STRING_ELT(retval2, nr+nc*nrows(retval2), STRING_ELT(retval, nr+nc*nrows(retval))); } } } UNPROTECT_PTR(retval); UNPROTECT_PTR(what); retval = retval2; what = what2; /* Make sure enough space was used */ need = (int) (Rf_strchr(line, ':') - line + 1); if(buflen < need){ char *tmp = (char *) realloc(buf, need); if(!tmp) { free(buf); error(_("could not allocate memory for 'read.dcf'")); } else buf = tmp; buflen = need; } strncpy(buf, line, Rf_strchr(line, ':') - line); buf[Rf_strchr(line, ':') - line] = '\0'; SET_STRING_ELT(what, nwhat, mkChar(buf)); nwhat++; /* lastm uses C indexing, hence nwhat - 1 */ lastm = nwhat - 1; field_name = CHAR(STRING_ELT(what, lastm)); if(has_fold_excludes) { field_fold = field_is_foldable_p(field_name, fold_excludes); } offset = regmatch[0].rm_eo; if(field_fold) { /* Also remove trailing whitespace. */ if((tre_regexecb(&trailblank, line, 1, regmatch, 0) == 0)) line[regmatch[0].rm_so] = '\0'; } SET_STRING_ELT(retval, lastm + nwhat * k, mkChar(line + offset)); } } else { /* Must be a regular line with no tag ... */ line[20] = '\0'; error(_("Line starting '%s ...' is malformed!"), line); } } } } vmaxset(vmax); if(!wasopen) {endcontext(&cntxt); con->close(con);} free(buf); tre_regfree(&blankline); tre_regfree(&contline); tre_regfree(&trailblank); tre_regfree(®line); tre_regfree(&eblankline); if(!blank_skip) k++; /* and now transpose the whole matrix */ PROTECT(retval2 = allocMatrixNA(STRSXP, k, LENGTH(what))); copyMatrix(retval2, retval, 1); PROTECT(dimnames = allocVector(VECSXP, 2)); PROTECT(dims = allocVector(INTSXP, 2)); INTEGER(dims)[0] = k; INTEGER(dims)[1] = LENGTH(what); SET_VECTOR_ELT(dimnames, 1, what); setAttrib(retval2, R_DimSymbol, dims); setAttrib(retval2, R_DimNamesSymbol, dimnames); UNPROTECT(6); return(retval2); }