Exemple #1
0
void crm114__regfree(regex_t *preg)
{
  /* The following bug occurs in the 0.8.0 version of tre:

     When tre is configured to use the system regex interface, it
     #defines 'tre_regfree' to be 'regfree'.  This usually works fine,
     except in situations where 'regfree' has already been resolved by
     the linker to point to libc's version (e.g. when loaded from
     within the Python interpreter).  In this case, our use of
     'tre_regncomp' (which is not #defined away) initializes the regex
     context, but it is freed by libc's 'regfree', which causes a
     segfault.

     Luckily, we can still access libtre's 'tre_free' function to free
     up context state allocated by tre.
   */

  Dl_info info;
  memset(&info, 0, sizeof(info));
  if (dladdr((void *)tre_regfree, &info)) {
    if (info.dli_fname && strstr(info.dli_fname, "libtre") == NULL) {
      tre_free(preg);
      return;
    }
  }

  tre_regfree(preg);
}
Exemple #2
0
int gt_grep(GT_UNUSED bool *match, GT_UNUSED const char *pattern,
            GT_UNUSED const char *line, GtError *err)
{
  regex_t matcher;
  int rval, had_err = 0;
  gt_error_check(err);
  gt_assert(pattern && line);
  if ((rval = tre_regcomp(&matcher, pattern, REG_EXTENDED | REG_NOSUB))) {
    grep_error(rval, &matcher, err);
    had_err = -1;
  }
  if (!had_err) {
    rval = tre_regexec(&matcher, line, 0, NULL, 0);
    if (rval && rval != REG_NOMATCH) {
      grep_error(rval, &matcher, err);
      had_err = -1;
    }
  }
  tre_regfree(&matcher);
  if (!had_err) {
    if (rval)
      *match = false;
    else
      *match = true;
  }
  return had_err;
}
Exemple #3
0
static int Ltre_gc (lua_State *L) {
  TPosix *ud = check_ud (L);
  if (ud->freed == 0) {           /* precaution against "manual" __gc calling */
    ud->freed = 1;
    tre_regfree (&ud->r);
    Lfree (L, ud->match, (ALG_NSUB(ud) + 1) * sizeof (regmatch_t));
  }
  return 0;
}
Exemple #4
0
static int Ltre_gc (lua_State *L) {
    TPosix *ud = check_ud (L);
    if (ud->freed == 0) {           /* precaution against "manual" __gc calling */
        ud->freed = 1;
        tre_regfree (&ud->r);
        free (ud->match);
    }
    return 0;
}
void setupPrimarySecondary(char *primary, char *secondary, int bidirectional)
{

    if (number_secondaries == 0 || secondary_compares == NULL) {
        secondary_compares = malloc(sizeof(secondaries_t));
    } else {
        secondary_compares = realloc(secondary_compares, sizeof(secondaries_t) * (number_secondaries + 1));
    }

    if (tre_regcomp(&secondary_compares[number_secondaries].primary_regex, primary, REG_EXTENDED | REG_ICASE) != 0 ||
            tre_regcomp(&secondary_compares[number_secondaries].secondary_regex, secondary, REG_EXTENDED | REG_ICASE) != 0) {
        tre_regfree(&secondary_compares[number_secondaries].primary_regex);
        tre_regfree(&secondary_compares[number_secondaries].secondary_regex);
        number_secondaries--;
        ci_debug_printf(1, "Invalid REGEX In Setting parameter (PRIMARY_CATEGORY_REGEX: %s SECONDARY_CATEGORY_REGEX: %s BIDIRECTIONAL: %s)\n", primary, secondary, bidirectional ? "TRUE" : "FALSE" );
        exit(-1);
    }
    secondary_compares[number_secondaries].bidirectional = bidirectional;

    number_secondaries++;
}
Exemple #6
0
void input_regex_callback(Fl_Widget*, void*)
{
	if(input_regex->size() == 0) {
		return;
	}
	buf_out->text("");
	regex_t regex;
	if(tre_regcomp(&regex, input_regex->value(), REG_EXTENDED|REG_NEWLINE) != 0) {
		buf_out->append("regex is malformed\n");
		return;
	}
	char *text = buf_test->text();
	do_test_match(&regex, text, buf_test->length());
	do_test_find(&regex, text, buf_test->length());
	tre_regfree(&regex);
	free(text);
}
Exemple #7
0
/* Run one test with tre_reguexec */
static void
test_reguexec(const char *str, const char *regex)
{
    regex_t preg;
    tre_str_source *source;
    regmatch_t pmatch[5];

    source = make_str_source(str);
    if (!source)
        return;

    tre_regcomp(&preg, regex, REG_EXTENDED);
    if (tre_reguexec(&preg, source, elementsof(pmatch), pmatch, 0) == 0)
        printf("Match: %d - %d\n", (int)pmatch[0].rm_so, (int)pmatch[0].rm_eo);

    free_str_source(source);
    tre_regfree(&preg);
}
Exemple #8
0
int main(int argc, char **argv)
{
   regex_t preg;
   regmatch_t *pmatch = NULL;
   int i, subject_len, match_len,
       sub_len, errcode, ret = 1,
       match_count, done;
   char errbuf[256], *subject, *regex, *match_start,
        *sub;

   if( argc < 3 )
   {
      printf("Usage: %s [subject] [regex]\n", argv[0]);
      return 0;
   }

   subject = strdup(argv[1]);
   subject_len = strlen(subject);
   regex = strdup(argv[2]);

   /* compile the regex */
   errcode = tre_regcomp(&preg, regex, REG_EXTENDED);
   if(errcode)
   {
      tre_regerror(errcode, &preg, errbuf, sizeof(errbuf));
      fprintf(stderr, "Error: regcomp() %s\n", errbuf);
      goto err;
   }

   /* allocate space for match and submatches */
   pmatch = (regmatch_t*)calloc(sizeof(regmatch_t),
                                preg.re_nsub+1);
   if(pmatch == NULL)
   {
      perror("Error calloc()");
      goto err;
   }

   /* find all matches */
   sub = subject;
   sub_len = subject_len;
   match_count = 0;
   done = 0;

   while(!done)
   {
      /* execute regex */
      errcode = tre_regnexec(&preg, sub, sub_len,
                             preg.re_nsub+1,
                             pmatch, 0);

      switch(errcode)
      {
         case REG_OK:
            match_count++;

            /* loop through subpattern matches */
            for(i=0; i < preg.re_nsub+1; i++)
            {
               match_len = pmatch[i].rm_eo - pmatch[i].rm_so;
               match_start = sub + pmatch[i].rm_so;

               if(i == 0)
                  printf("Match [%d] %.*s\n", match_count,
                         match_len, match_start);
               else
                  printf(":: group [%d] %.*s\n", i, match_len,
                         match_start);
            }

            /* set up for next regexec call */
            sub_len -= pmatch[0].rm_eo;
            assert(sub_len >= 0);

            /* entire subject consumed */
            if(sub_len == 0)
            {
               done = 1;
               break;
            }

            /* start next match were we left off */
            sub += pmatch[0].rm_eo;
            break;

         case REG_NOMATCH:
            if(match_count == 0)
               puts("No matches found.");
            done = 1;
            break;

         case REG_ESPACE:
            fprintf(stderr,"Error: Out of memory.\n");
            goto err;

         default:
            /* should not be here, abort */
            assert(0);
      }
   }

   /* success */
   ret = 0;

err:
   /* cleanup */
   if(regex)
      free(regex);
   if(subject)
      free(subject);
   if(&preg)
      tre_regfree(&preg);
   if(pmatch)
      free(pmatch);

   return ret;
}
Exemple #9
0
SEXP attribute_hidden do_readDCF(SEXP call, SEXP op, SEXP args, SEXP env)
{
    int nwhat, nret, nc, nr, m, k, lastm, need;
    Rboolean blank_skip, field_skip = FALSE;
    int whatlen, dynwhat, buflen = 8096; // was 100, but that re-alloced often
    char *line, *buf;
    regex_t blankline, contline, trailblank, regline, eblankline;
    regmatch_t regmatch[1];
    SEXP file, what, what2, retval, retval2, dims, dimnames;
    Rconnection con = NULL;
    Rboolean wasopen, is_eblankline;
    RCNTXT cntxt;

    SEXP fold_excludes;
    Rboolean field_fold = TRUE, has_fold_excludes;
    const char *field_name;
    int offset = 0; /* -Wall */

    checkArity(op, args);

    file = CAR(args);
    con = getConnection(asInteger(file));
    wasopen = con->isopen;
    if(!wasopen) {
	if(!con->open(con)) error(_("cannot open the connection"));
	/* Set up a context which will close the connection on error */
	begincontext(&cntxt, CTXT_CCODE, R_NilValue, R_BaseEnv, R_BaseEnv,
		     R_NilValue, R_NilValue);
	cntxt.cend = &con_cleanup;
	cntxt.cenddata = con;
    }
    if(!con->canread) error(_("cannot read from this connection"));

    args = CDR(args);
    PROTECT(what = coerceVector(CAR(args), STRSXP)); /* argument fields */
    nwhat = LENGTH(what);
    dynwhat = (nwhat == 0);

    args = CDR(args);
    PROTECT(fold_excludes = coerceVector(CAR(args), STRSXP));
    has_fold_excludes = (LENGTH(fold_excludes) > 0);

    buf = (char *) malloc(buflen);
    if(!buf) error(_("could not allocate memory for 'read.dcf'"));
    nret = 20;
    /* it is easier if we first have a record per column */
    PROTECT(retval = allocMatrixNA(STRSXP, LENGTH(what), nret));

    /* These used to use [:blank:] but that can match \xa0 as part of
       a UTF-8 character (and is nbspace on Windows). */ 
    tre_regcomp(&blankline, "^[[:blank:]]*$", REG_NOSUB & REG_EXTENDED);
    tre_regcomp(&trailblank, "[ \t]+$", REG_EXTENDED);
    tre_regcomp(&contline, "^[[:blank:]]+", REG_EXTENDED);
    tre_regcomp(&regline, "^[^:]+:[[:blank:]]*", REG_EXTENDED);
    tre_regcomp(&eblankline, "^[[:space:]]+\\.[[:space:]]*$", REG_EXTENDED);

    k = 0;
    lastm = -1; /* index of the field currently being recorded */
    blank_skip = TRUE;
    void *vmax = vmaxget();
    while((line = Rconn_getline2(con))) {
	if(strlen(line) == 0 ||
	   tre_regexecb(&blankline, line, 0, 0, 0) == 0) {
	    /* A blank line.  The first one after a record ends a new
	     * record, subsequent ones are skipped */
	    if(!blank_skip) {
		k++;
		if(k > nret - 1){
		    nret *= 2;
		    PROTECT(retval2 = allocMatrixNA(STRSXP, LENGTH(what), nret));
		    transferVector(retval2, retval);
		    UNPROTECT_PTR(retval);
		    retval = retval2;
		}
		blank_skip = TRUE;
		lastm = -1;
		field_skip = FALSE;
		field_fold = TRUE;
	    }
	} else {
	    blank_skip = FALSE;
	    if(tre_regexecb(&contline, line, 1, regmatch, 0) == 0) {
		/* A continuation line: wrong if at the beginning of a
		   record. */
		if((lastm == -1) && !field_skip) {
		    line[20] = '\0';
		    error(_("Found continuation line starting '%s ...' at begin of record."),
			  line);
		}
		if(lastm >= 0) {
		    need = (int) strlen(CHAR(STRING_ELT(retval,
							lastm + nwhat * k))) + 2;
		    if(tre_regexecb(&eblankline, line, 0, NULL, 0) == 0) {
			is_eblankline = TRUE;
		    } else {
			is_eblankline = FALSE;
			if(field_fold) {
			    offset = regmatch[0].rm_eo;
			    /* Also remove trailing whitespace. */
			    if((tre_regexecb(&trailblank, line, 1,
					     regmatch, 0) == 0))
				line[regmatch[0].rm_so] = '\0';
			} else {
			    offset = 0;
			}
			need += (int) strlen(line + offset);
		    }
		    if(buflen < need) {
			char *tmp = (char *) realloc(buf, need);
			if(!tmp) {
			    free(buf);
			    error(_("could not allocate memory for 'read.dcf'"));
			} else buf = tmp;
			buflen = need;
		    }
		    strcpy(buf,CHAR(STRING_ELT(retval, lastm + nwhat * k)));
		    strcat(buf, "\n");
		    if(!is_eblankline) strcat(buf, line + offset);
		    SET_STRING_ELT(retval, lastm + nwhat * k, mkChar(buf));
		}
	    } else {
		if(tre_regexecb(&regline, line, 1, regmatch, 0) == 0) {
		    for(m = 0; m < nwhat; m++){
			whatlen = (int) strlen(CHAR(STRING_ELT(what, m)));
			if(strlen(line) > whatlen &&
			   line[whatlen] == ':' &&
			   strncmp(CHAR(STRING_ELT(what, m)),
				   line, whatlen) == 0) {
			    /* An already known field we are recording. */
			    lastm = m;
			    field_skip = FALSE;
			    field_name = CHAR(STRING_ELT(what, lastm));
			    if(has_fold_excludes) {
				field_fold =
				    field_is_foldable_p(field_name,
							fold_excludes);
			    }
			    if(field_fold) {
				offset = regmatch[0].rm_eo;
				/* Also remove trailing whitespace. */
				if((tre_regexecb(&trailblank, line, 1,
						 regmatch, 0) == 0))
				    line[regmatch[0].rm_so] = '\0';
			    } else {
				offset = 0;
			    }
			    SET_STRING_ELT(retval, m + nwhat * k,
					   mkChar(line + offset));
			    break;
			} else {
			    /* This is a field, but not one prespecified */
			    lastm = -1;
			    field_skip = TRUE;
			}
		    }
		    if(dynwhat && (lastm == -1)) {
			/* A previously unseen field and we are
			 * recording all fields */
			field_skip = FALSE;
			PROTECT(what2 = allocVector(STRSXP, nwhat+1));
			PROTECT(retval2 = allocMatrixNA(STRSXP,
							nrows(retval)+1,
							ncols(retval)));
			if(nwhat > 0) {
			    copyVector(what2, what);
			    for(nr = 0; nr < nrows(retval); nr++){
				for(nc = 0; nc < ncols(retval); nc++){
				    SET_STRING_ELT(retval2, nr+nc*nrows(retval2),
						   STRING_ELT(retval,
							      nr+nc*nrows(retval)));
				}
			    }
			}
			UNPROTECT_PTR(retval);
			UNPROTECT_PTR(what);
			retval = retval2;
			what = what2;
			/* Make sure enough space was used */
			need = (int) (Rf_strchr(line, ':') - line + 1);
			if(buflen < need){
			    char *tmp = (char *) realloc(buf, need);
			    if(!tmp) {
				free(buf);
				error(_("could not allocate memory for 'read.dcf'"));
			    } else buf = tmp;
			    buflen = need;
			}
			strncpy(buf, line, Rf_strchr(line, ':') - line);
			buf[Rf_strchr(line, ':') - line] = '\0';
			SET_STRING_ELT(what, nwhat, mkChar(buf));
			nwhat++;
			/* lastm uses C indexing, hence nwhat - 1 */
			lastm = nwhat - 1;
			field_name = CHAR(STRING_ELT(what, lastm));
			if(has_fold_excludes) {
			    field_fold =
				field_is_foldable_p(field_name,
						    fold_excludes);
			}
			offset = regmatch[0].rm_eo;
			if(field_fold) {
			    /* Also remove trailing whitespace. */
			    if((tre_regexecb(&trailblank, line, 1,
					     regmatch, 0) == 0))
				line[regmatch[0].rm_so] = '\0';
			}
			SET_STRING_ELT(retval, lastm + nwhat * k,
				       mkChar(line + offset));
		    }
		} else {
		    /* Must be a regular line with no tag ... */
		    line[20] = '\0';
		    error(_("Line starting '%s ...' is malformed!"), line);
		}
	    }
	}
    }
    vmaxset(vmax);
    if(!wasopen) {endcontext(&cntxt); con->close(con);}
    free(buf);
    tre_regfree(&blankline);
    tre_regfree(&contline);
    tre_regfree(&trailblank);
    tre_regfree(&regline);
    tre_regfree(&eblankline);

    if(!blank_skip) k++;

    /* and now transpose the whole matrix */
    PROTECT(retval2 = allocMatrixNA(STRSXP, k, LENGTH(what)));
    copyMatrix(retval2, retval, 1);

    PROTECT(dimnames = allocVector(VECSXP, 2));
    PROTECT(dims = allocVector(INTSXP, 2));
    INTEGER(dims)[0] = k;
    INTEGER(dims)[1] = LENGTH(what);
    SET_VECTOR_ELT(dimnames, 1, what);
    setAttrib(retval2, R_DimSymbol, dims);
    setAttrib(retval2, R_DimNamesSymbol, dimnames);
    UNPROTECT(6);
    return(retval2);
}
Exemple #10
0
static VALUE
tre_traverse(VALUE pattern, VALUE string, long char_offset, VALUE params,
		VALUE ignore_case, VALUE multi_line, int num_captures, VALUE repeat) {

	// Compile once
	regex_t preg;
	tre_compile_regex(&preg, pattern, ignore_case, multi_line);

	// Build regaparams
	regaparams_t aparams;
	tre_build_aparams(&aparams, params);

	// Match data
	regamatch_t match;
	regmatch_t pmatch[num_captures + 1];
	// memset(&match, 0, sizeof(match));
	match.nmatch = num_captures + 1;
	match.pmatch = pmatch;

	// Scan
	VALUE arr = rb_ary_new();
	long char_offset_acc = char_offset;
	// rb_global_variable(&arr);

	while (1) {
		// Get substring to start with
		long char_len = CHAR_LENGTH(string) - char_offset;
		if (char_len <= 0) break;
		string = rb_str_substr(string, char_offset, char_len);

		int result = tre_reganexec(&preg, StringValuePtr(string), 
											RSTRING_LEN(string), &match, aparams, 0);

		if (result == REG_NOMATCH) break;

		// Fill in array with ranges
		VALUE subarr;
		if (match.nmatch == 1) 
			subarr = arr;	// Faking.. kind of.
		else {
			subarr = rb_ary_new();
			// rb_global_variable(&subarr);
		}

		unsigned int i;
		for (i = 0; i < match.nmatch; ++i)
			// No match
			if (match.pmatch[i].rm_so == -1)
				rb_ary_push(subarr, Qnil);
			// Match => Range
			else {
				VALUE range = rb_range_new(
						LONG2NUM( char_offset_acc + BYTE_TO_CHAR(string, match.pmatch[i].rm_so) ),
						LONG2NUM( char_offset_acc + BYTE_TO_CHAR(string, match.pmatch[i].rm_eo) ),
						1);
				// rb_global_variable(&range);

				rb_ary_push(subarr, range);
			}
		if (match.nmatch > 1) rb_ary_push(arr, subarr);

		// Stop or proceed
		if (repeat == Qfalse)
			break;
		else {
			char_offset = BYTE_TO_CHAR(string, match.pmatch[0].rm_eo);
			if (char_offset == 0) char_offset = 1; // Weird case
			char_offset_acc += char_offset;
		}
	}

	// Free once
	tre_regfree(&preg);

	return arr;
}