예제 #1
0
파일: grep.c 프로젝트: lparsons/genometools
int gt_grep(GT_UNUSED bool *match, GT_UNUSED const char *pattern,
            GT_UNUSED const char *line, GtError *err)
{
  regex_t matcher;
  int rval, had_err = 0;
  gt_error_check(err);
  gt_assert(pattern && line);
  if ((rval = tre_regcomp(&matcher, pattern, REG_EXTENDED | REG_NOSUB))) {
    grep_error(rval, &matcher, err);
    had_err = -1;
  }
  if (!had_err) {
    rval = tre_regexec(&matcher, line, 0, NULL, 0);
    if (rval && rval != REG_NOMATCH) {
      grep_error(rval, &matcher, err);
      had_err = -1;
    }
  }
  tre_regfree(&matcher);
  if (!had_err) {
    if (rval)
      *match = false;
    else
      *match = true;
  }
  return had_err;
}
예제 #2
0
void setupPrimarySecondary(char *primary, char *secondary, int bidirectional)
{

    if (number_secondaries == 0 || secondary_compares == NULL) {
        secondary_compares = malloc(sizeof(secondaries_t));
    } else {
        secondary_compares = realloc(secondary_compares, sizeof(secondaries_t) * (number_secondaries + 1));
    }

    if (tre_regcomp(&secondary_compares[number_secondaries].primary_regex, primary, REG_EXTENDED | REG_ICASE) != 0 ||
            tre_regcomp(&secondary_compares[number_secondaries].secondary_regex, secondary, REG_EXTENDED | REG_ICASE) != 0) {
        tre_regfree(&secondary_compares[number_secondaries].primary_regex);
        tre_regfree(&secondary_compares[number_secondaries].secondary_regex);
        number_secondaries--;
        ci_debug_printf(1, "Invalid REGEX In Setting parameter (PRIMARY_CATEGORY_REGEX: %s SECONDARY_CATEGORY_REGEX: %s BIDIRECTIONAL: %s)\n", primary, secondary, bidirectional ? "TRUE" : "FALSE" );
        exit(-1);
    }
    secondary_compares[number_secondaries].bidirectional = bidirectional;

    number_secondaries++;
}
예제 #3
0
파일: regina.cpp 프로젝트: henkman/regina
void input_regex_callback(Fl_Widget*, void*)
{
	if(input_regex->size() == 0) {
		return;
	}
	buf_out->text("");
	regex_t regex;
	if(tre_regcomp(&regex, input_regex->value(), REG_EXTENDED|REG_NEWLINE) != 0) {
		buf_out->append("regex is malformed\n");
		return;
	}
	char *text = buf_test->text();
	do_test_match(&regex, text, buf_test->length());
	do_test_find(&regex, text, buf_test->length());
	tre_regfree(&regex);
	free(text);
}
예제 #4
0
/* Run one test with tre_reguexec */
static void
test_reguexec(const char *str, const char *regex)
{
    regex_t preg;
    tre_str_source *source;
    regmatch_t pmatch[5];

    source = make_str_source(str);
    if (!source)
        return;

    tre_regcomp(&preg, regex, REG_EXTENDED);
    if (tre_reguexec(&preg, source, elementsof(pmatch), pmatch, 0) == 0)
        printf("Match: %d - %d\n", (int)pmatch[0].rm_so, (int)pmatch[0].rm_eo);

    free_str_source(source);
    tre_regfree(&preg);
}
예제 #5
0
파일: main.c 프로젝트: nilshell/tre-util
int main(int argc, char **argv)
{
   regex_t preg;
   regmatch_t *pmatch = NULL;
   int i, subject_len, match_len,
       sub_len, errcode, ret = 1,
       match_count, done;
   char errbuf[256], *subject, *regex, *match_start,
        *sub;

   if( argc < 3 )
   {
      printf("Usage: %s [subject] [regex]\n", argv[0]);
      return 0;
   }

   subject = strdup(argv[1]);
   subject_len = strlen(subject);
   regex = strdup(argv[2]);

   /* compile the regex */
   errcode = tre_regcomp(&preg, regex, REG_EXTENDED);
   if(errcode)
   {
      tre_regerror(errcode, &preg, errbuf, sizeof(errbuf));
      fprintf(stderr, "Error: regcomp() %s\n", errbuf);
      goto err;
   }

   /* allocate space for match and submatches */
   pmatch = (regmatch_t*)calloc(sizeof(regmatch_t),
                                preg.re_nsub+1);
   if(pmatch == NULL)
   {
      perror("Error calloc()");
      goto err;
   }

   /* find all matches */
   sub = subject;
   sub_len = subject_len;
   match_count = 0;
   done = 0;

   while(!done)
   {
      /* execute regex */
      errcode = tre_regnexec(&preg, sub, sub_len,
                             preg.re_nsub+1,
                             pmatch, 0);

      switch(errcode)
      {
         case REG_OK:
            match_count++;

            /* loop through subpattern matches */
            for(i=0; i < preg.re_nsub+1; i++)
            {
               match_len = pmatch[i].rm_eo - pmatch[i].rm_so;
               match_start = sub + pmatch[i].rm_so;

               if(i == 0)
                  printf("Match [%d] %.*s\n", match_count,
                         match_len, match_start);
               else
                  printf(":: group [%d] %.*s\n", i, match_len,
                         match_start);
            }

            /* set up for next regexec call */
            sub_len -= pmatch[0].rm_eo;
            assert(sub_len >= 0);

            /* entire subject consumed */
            if(sub_len == 0)
            {
               done = 1;
               break;
            }

            /* start next match were we left off */
            sub += pmatch[0].rm_eo;
            break;

         case REG_NOMATCH:
            if(match_count == 0)
               puts("No matches found.");
            done = 1;
            break;

         case REG_ESPACE:
            fprintf(stderr,"Error: Out of memory.\n");
            goto err;

         default:
            /* should not be here, abort */
            assert(0);
      }
   }

   /* success */
   ret = 0;

err:
   /* cleanup */
   if(regex)
      free(regex);
   if(subject)
      free(subject);
   if(&preg)
      tre_regfree(&preg);
   if(pmatch)
      free(pmatch);

   return ret;
}
예제 #6
0
/* M A I N *******************************************************************/
int main(int argc, char *argv[]) {
    int opt_index;

    opt_index = process_options(argc, argv);

    if (opt_index >= argc) {
        fprintf(stderr, "%s : %s\n",
                        PRG_NAME, "[err] specify a record string to process!");
        exit(1);
    }

    if (argv[1])
      strncpy (record, argv[opt_index], MAX_CHAR);

    if ( strlen(record) == 0 ) {
        fprintf(stdout, "Please enter a valid string to search pattern for!\n");
        fprintf(stdout, "Usage: %s %s %s %s\n", PRG_NAME, "[COSTS]", "[PATTERN]", "[STRING]");
        exit(1);
    }

/* Testing patterns */
//    char record[] = "ATTTACTATGTAAAGATAGAAGGAATAAGGTGAAG";
//    char regexp[]   = "GATTT";
//    char regexp[]   = "GTAAAGA";
//    char regexp[]   = "(G|T)A*GAT";

    int comp_flags  = REG_EXTENDED | REG_ICASE ;

    static regex_t preg;	      /* Compiled pattern to search for. */
    static regaparams_t match_params; /* regexp matching parameters */
    int errcode;

    regmatch_t pmatch = { 0, 0 };     /* matched pattern structure */
    regamatch_t match;                /* overall match structure */

    memset(&match, 0, sizeof(match)); /* initialize the overall match struct */
    match.pmatch = &pmatch;           /* assign default pattern structure */
    match.nmatch = 1;                 /* initialization of pmatch array */

    /* setup the default match parameters */
    tre_regaparams_default(&match_params);
    /* Set the maximum number of errors allowed for a record to match. */
    match_params.max_cost   = max_cost;
    match_params.cost_ins   = cost_ins;
    match_params.cost_del   = cost_del;
    match_params.cost_subst = cost_subs;

    fprintf(stdout, "Default regex params set by TRE:\n");
    fprintf(stdout, "\t%-12s : %4d\n", "cost_ins", match_params.cost_ins);
    fprintf(stdout, "\t%-12s : %4d\n", "cost_del", match_params.cost_del);
    fprintf(stdout, "\t%-12s : %4d\n", "cost_substr", match_params.cost_subst);
    fprintf(stdout, "\t%-12s : %4d\n", "max_cost", match_params.max_cost);
    fprintf(stdout, "\t%-12s : %4d\n", "max_ins", match_params.max_ins);
    fprintf(stdout, "\t%-12s : %4d\n", "max_del", match_params.max_del);
    fprintf(stdout, "\t%-12s : %4d\n", "max_subst", match_params.max_subst);
    fprintf(stdout, "\t%-12s : %4d\n", "max_err", match_params.max_err);
    fprintf(stdout, "\n\n");

    /* Step 1: compile the regex */
    errcode = tre_regcomp(&preg, regexp, comp_flags);
    if (errcode)
      {
        char errbuf[256];
        tre_regerror(errcode, &preg, errbuf, sizeof(errbuf));
        fprintf(stderr, "%s: %s: %s\n",
  	      PRG_NAME, "Error in search pattern", errbuf);
        exit(1);
      }

//    if (tre_regexec(&delim, "", 0, NULL, 0) == REG_OK)
//    {
//      fprintf(stderr, "%s: %s\n", PRG_NAME,
//	      "Record delimiter pattern must not match an empty string");
//      exit(1);
//    }

    /* Step 2: search for the pattern in the haystack */

    errcode = tre_regaexec(&preg, record, &match, match_params, 0);
    if (errcode == REG_OK) {
        fprintf(stdout, "Found match!\n");
        fprintf(stdout, "\t%10s : %s\n", "record", record);
        fprintf(stdout, "\t%10s : %s\n", "pattern", regexp);
        fprintf(stdout, "\t%10s : %4d\n", "cost", match.cost);
        fprintf(stdout, "\t%10s : %4d\n", "num_ins", match.num_ins);
        fprintf(stdout, "\t%10s : %4d\n", "num_del", match.num_del);
        fprintf(stdout, "\t%10s : %4d\n", "num_subst", match.num_subst);
        fprintf(stdout, "\t%10s : %2d - %2d\n", "char pos", 
                                                pmatch.rm_so, pmatch.rm_eo);
    }
    else {
        fprintf(stdout, "Found no matches!\n");
        fprintf(stdout, "%6s : %s\n", "regexp", regexp);
        fprintf(stdout, "%6s : %s\n", "record", record);

    }

    return 0;
}
예제 #7
0
SEXP attribute_hidden do_readDCF(SEXP call, SEXP op, SEXP args, SEXP env)
{
    int nwhat, nret, nc, nr, m, k, lastm, need;
    Rboolean blank_skip, field_skip = FALSE;
    int whatlen, dynwhat, buflen = 8096; // was 100, but that re-alloced often
    char *line, *buf;
    regex_t blankline, contline, trailblank, regline, eblankline;
    regmatch_t regmatch[1];
    SEXP file, what, what2, retval, retval2, dims, dimnames;
    Rconnection con = NULL;
    Rboolean wasopen, is_eblankline;
    RCNTXT cntxt;

    SEXP fold_excludes;
    Rboolean field_fold = TRUE, has_fold_excludes;
    const char *field_name;
    int offset = 0; /* -Wall */

    checkArity(op, args);

    file = CAR(args);
    con = getConnection(asInteger(file));
    wasopen = con->isopen;
    if(!wasopen) {
	if(!con->open(con)) error(_("cannot open the connection"));
	/* Set up a context which will close the connection on error */
	begincontext(&cntxt, CTXT_CCODE, R_NilValue, R_BaseEnv, R_BaseEnv,
		     R_NilValue, R_NilValue);
	cntxt.cend = &con_cleanup;
	cntxt.cenddata = con;
    }
    if(!con->canread) error(_("cannot read from this connection"));

    args = CDR(args);
    PROTECT(what = coerceVector(CAR(args), STRSXP)); /* argument fields */
    nwhat = LENGTH(what);
    dynwhat = (nwhat == 0);

    args = CDR(args);
    PROTECT(fold_excludes = coerceVector(CAR(args), STRSXP));
    has_fold_excludes = (LENGTH(fold_excludes) > 0);

    buf = (char *) malloc(buflen);
    if(!buf) error(_("could not allocate memory for 'read.dcf'"));
    nret = 20;
    /* it is easier if we first have a record per column */
    PROTECT(retval = allocMatrixNA(STRSXP, LENGTH(what), nret));

    /* These used to use [:blank:] but that can match \xa0 as part of
       a UTF-8 character (and is nbspace on Windows). */ 
    tre_regcomp(&blankline, "^[[:blank:]]*$", REG_NOSUB & REG_EXTENDED);
    tre_regcomp(&trailblank, "[ \t]+$", REG_EXTENDED);
    tre_regcomp(&contline, "^[[:blank:]]+", REG_EXTENDED);
    tre_regcomp(&regline, "^[^:]+:[[:blank:]]*", REG_EXTENDED);
    tre_regcomp(&eblankline, "^[[:space:]]+\\.[[:space:]]*$", REG_EXTENDED);

    k = 0;
    lastm = -1; /* index of the field currently being recorded */
    blank_skip = TRUE;
    void *vmax = vmaxget();
    while((line = Rconn_getline2(con))) {
	if(strlen(line) == 0 ||
	   tre_regexecb(&blankline, line, 0, 0, 0) == 0) {
	    /* A blank line.  The first one after a record ends a new
	     * record, subsequent ones are skipped */
	    if(!blank_skip) {
		k++;
		if(k > nret - 1){
		    nret *= 2;
		    PROTECT(retval2 = allocMatrixNA(STRSXP, LENGTH(what), nret));
		    transferVector(retval2, retval);
		    UNPROTECT_PTR(retval);
		    retval = retval2;
		}
		blank_skip = TRUE;
		lastm = -1;
		field_skip = FALSE;
		field_fold = TRUE;
	    }
	} else {
	    blank_skip = FALSE;
	    if(tre_regexecb(&contline, line, 1, regmatch, 0) == 0) {
		/* A continuation line: wrong if at the beginning of a
		   record. */
		if((lastm == -1) && !field_skip) {
		    line[20] = '\0';
		    error(_("Found continuation line starting '%s ...' at begin of record."),
			  line);
		}
		if(lastm >= 0) {
		    need = (int) strlen(CHAR(STRING_ELT(retval,
							lastm + nwhat * k))) + 2;
		    if(tre_regexecb(&eblankline, line, 0, NULL, 0) == 0) {
			is_eblankline = TRUE;
		    } else {
			is_eblankline = FALSE;
			if(field_fold) {
			    offset = regmatch[0].rm_eo;
			    /* Also remove trailing whitespace. */
			    if((tre_regexecb(&trailblank, line, 1,
					     regmatch, 0) == 0))
				line[regmatch[0].rm_so] = '\0';
			} else {
			    offset = 0;
			}
			need += (int) strlen(line + offset);
		    }
		    if(buflen < need) {
			char *tmp = (char *) realloc(buf, need);
			if(!tmp) {
			    free(buf);
			    error(_("could not allocate memory for 'read.dcf'"));
			} else buf = tmp;
			buflen = need;
		    }
		    strcpy(buf,CHAR(STRING_ELT(retval, lastm + nwhat * k)));
		    strcat(buf, "\n");
		    if(!is_eblankline) strcat(buf, line + offset);
		    SET_STRING_ELT(retval, lastm + nwhat * k, mkChar(buf));
		}
	    } else {
		if(tre_regexecb(&regline, line, 1, regmatch, 0) == 0) {
		    for(m = 0; m < nwhat; m++){
			whatlen = (int) strlen(CHAR(STRING_ELT(what, m)));
			if(strlen(line) > whatlen &&
			   line[whatlen] == ':' &&
			   strncmp(CHAR(STRING_ELT(what, m)),
				   line, whatlen) == 0) {
			    /* An already known field we are recording. */
			    lastm = m;
			    field_skip = FALSE;
			    field_name = CHAR(STRING_ELT(what, lastm));
			    if(has_fold_excludes) {
				field_fold =
				    field_is_foldable_p(field_name,
							fold_excludes);
			    }
			    if(field_fold) {
				offset = regmatch[0].rm_eo;
				/* Also remove trailing whitespace. */
				if((tre_regexecb(&trailblank, line, 1,
						 regmatch, 0) == 0))
				    line[regmatch[0].rm_so] = '\0';
			    } else {
				offset = 0;
			    }
			    SET_STRING_ELT(retval, m + nwhat * k,
					   mkChar(line + offset));
			    break;
			} else {
			    /* This is a field, but not one prespecified */
			    lastm = -1;
			    field_skip = TRUE;
			}
		    }
		    if(dynwhat && (lastm == -1)) {
			/* A previously unseen field and we are
			 * recording all fields */
			field_skip = FALSE;
			PROTECT(what2 = allocVector(STRSXP, nwhat+1));
			PROTECT(retval2 = allocMatrixNA(STRSXP,
							nrows(retval)+1,
							ncols(retval)));
			if(nwhat > 0) {
			    copyVector(what2, what);
			    for(nr = 0; nr < nrows(retval); nr++){
				for(nc = 0; nc < ncols(retval); nc++){
				    SET_STRING_ELT(retval2, nr+nc*nrows(retval2),
						   STRING_ELT(retval,
							      nr+nc*nrows(retval)));
				}
			    }
			}
			UNPROTECT_PTR(retval);
			UNPROTECT_PTR(what);
			retval = retval2;
			what = what2;
			/* Make sure enough space was used */
			need = (int) (Rf_strchr(line, ':') - line + 1);
			if(buflen < need){
			    char *tmp = (char *) realloc(buf, need);
			    if(!tmp) {
				free(buf);
				error(_("could not allocate memory for 'read.dcf'"));
			    } else buf = tmp;
			    buflen = need;
			}
			strncpy(buf, line, Rf_strchr(line, ':') - line);
			buf[Rf_strchr(line, ':') - line] = '\0';
			SET_STRING_ELT(what, nwhat, mkChar(buf));
			nwhat++;
			/* lastm uses C indexing, hence nwhat - 1 */
			lastm = nwhat - 1;
			field_name = CHAR(STRING_ELT(what, lastm));
			if(has_fold_excludes) {
			    field_fold =
				field_is_foldable_p(field_name,
						    fold_excludes);
			}
			offset = regmatch[0].rm_eo;
			if(field_fold) {
			    /* Also remove trailing whitespace. */
			    if((tre_regexecb(&trailblank, line, 1,
					     regmatch, 0) == 0))
				line[regmatch[0].rm_so] = '\0';
			}
			SET_STRING_ELT(retval, lastm + nwhat * k,
				       mkChar(line + offset));
		    }
		} else {
		    /* Must be a regular line with no tag ... */
		    line[20] = '\0';
		    error(_("Line starting '%s ...' is malformed!"), line);
		}
	    }
	}
    }
    vmaxset(vmax);
    if(!wasopen) {endcontext(&cntxt); con->close(con);}
    free(buf);
    tre_regfree(&blankline);
    tre_regfree(&contline);
    tre_regfree(&trailblank);
    tre_regfree(&regline);
    tre_regfree(&eblankline);

    if(!blank_skip) k++;

    /* and now transpose the whole matrix */
    PROTECT(retval2 = allocMatrixNA(STRSXP, k, LENGTH(what)));
    copyMatrix(retval2, retval, 1);

    PROTECT(dimnames = allocVector(VECSXP, 2));
    PROTECT(dims = allocVector(INTSXP, 2));
    INTEGER(dims)[0] = k;
    INTEGER(dims)[1] = LENGTH(what);
    SET_VECTOR_ELT(dimnames, 1, what);
    setAttrib(retval2, R_DimSymbol, dims);
    setAttrib(retval2, R_DimNamesSymbol, dimnames);
    UNPROTECT(6);
    return(retval2);
}