C++ (Cpp) RegexMatcher::resetの例

コード例 #1

0

ファイルを表示

ファイル: stri_search_regex_detect.cpp プロジェクト: rforge/stringi

/**
 * Detect if a pattern occurs in a string
 *
 * @param str R character vector
 * @param pattern R character vector containing regular expressions
 * @param opts_regex list
 *
 * @version 0.1 (Marcin Bujarski)
 * @version 0.2 (Marek Gagolewski) - use StriContainerUTF16
 * @version 0.3 (Marek Gagolewski) - use StriContainerUTF16's vectorization
 * @version 0.4 (Marek Gagolewski, 2013-06-18) use StriContainerRegexPattern + opts_regex
 */
SEXP stri_detect_regex(SEXP str, SEXP pattern, SEXP opts_regex)
{
   str = stri_prepare_arg_string(str, "str");
   pattern = stri_prepare_arg_string(pattern, "pattern");
   R_len_t vectorize_length = stri__recycling_rule(true, 2, LENGTH(str), LENGTH(pattern));
   // this will work for vectorize_length == 0:

   uint32_t pattern_flags = StriContainerRegexPattern::getRegexFlags(opts_regex);

   STRI__ERROR_HANDLER_BEGIN

   StriContainerUTF16 str_cont(str, vectorize_length);
   // MG: tried StriContainerUTF8 + utext_openUTF8 - this was slower
   StriContainerRegexPattern pattern_cont(pattern, vectorize_length, pattern_flags);

   SEXP ret;
   PROTECT(ret = Rf_allocVector(LGLSXP, vectorize_length));
   int* ret_tab = LOGICAL(ret);

   for (R_len_t i = pattern_cont.vectorize_init();
         i != pattern_cont.vectorize_end();
         i = pattern_cont.vectorize_next(i))
   {
      STRI__CONTINUE_ON_EMPTY_OR_NA_STR_PATTERN(str_cont, pattern_cont, ret_tab[i] = NA_LOGICAL, ret_tab[i] = FALSE)

      RegexMatcher *matcher = pattern_cont.getMatcher(i); // will be deleted automatically
      matcher->reset(str_cont.get(i));
      ret_tab[i] = (int)matcher->find();
   }

   UNPROTECT(1);
   return ret;
   STRI__ERROR_HANDLER_END(;/* nothing special to be done on error */)
}

コード例 #2

0

ファイルを表示

ファイル: repattrn.cpp プロジェクト: gitpan/ponie

//---------------------------------------------------------------------
//
//   matcher(UnicodeString, err)
//
//---------------------------------------------------------------------
RegexMatcher *RegexPattern::matcher(const UnicodeString &input,
                                    UErrorCode          &status)  const {
    RegexMatcher    *retMatcher = matcher(status);
    if (retMatcher != NULL) {
        retMatcher->reset(input);
    }
    return retMatcher;
};

コード例 #3

0

ファイルを表示

ファイル: repattrn.cpp プロジェクト: Epictetus/build-couchdb

//
//   matcher, UText mode
//
RegexMatcher *RegexPattern::matcher(UText               *input,
                                    PatternIsUTextFlag  /*flag*/,
                                    UErrorCode          &status)  const {
    RegexMatcher    *retMatcher = matcher(status);
    if (retMatcher != NULL) {
        retMatcher->fDeferredStatus = status;
        retMatcher->reset(input);
    }
    return retMatcher;
}

コード例 #4

0

ファイルを表示

ファイル: stri_search_regex_detect.cpp プロジェクト: cran/stringi

/**
 * Detect if a pattern occurs in a string
 *
 * @param str R character vector
 * @param pattern R character vector containing regular expressions
 * @param negate single bool
 * @param max_count single int
 * @param opts_regex list
 *
 * @version 0.1-?? (Marcin Bujarski)
 *
 * @version 0.1-?? (Marek Gagolewski)
 *          use StriContainerUTF16
 *
 * @version 0.1-?? (Marek Gagolewski)
 *          use StriContainerUTF16's vectorization
 *
 * @version 0.1-?? (Marek Gagolewski, 2013-06-18)
 *          use StriContainerRegexPattern + opts_regex
 *
 * @version 0.3-1 (Marek Gagolewski, 2014-11-05)
 *    Issue #112: str_prepare_arg* retvals were not PROTECTed from gc
 *
 * @version 1.0-2 (Marek Gagolewski, 2016-01-29)
 *    Issue #214: allow a regex pattern like `.*`  to match an empty string
 *
 * @version 1.0-3 (Marek Gagolewski, 2016-02-03)
 *    FR #216: `negate` arg added
 *
 * @version 1.3.1 (Marek Gagolewski, 2019-02-08)
 *    #232: `max_count` arg added
 */
SEXP stri_detect_regex(SEXP str, SEXP pattern, SEXP negate,
    SEXP max_count, SEXP opts_regex)
{
   bool negate_1 = stri__prepare_arg_logical_1_notNA(negate, "negate");
   int max_count_1 = stri__prepare_arg_integer_1_notNA(max_count, "max_count");
   PROTECT(str = stri_prepare_arg_string(str, "str"));
   PROTECT(pattern = stri_prepare_arg_string(pattern, "pattern"));
   R_len_t vectorize_length =
      stri__recycling_rule(true, 2, LENGTH(str), LENGTH(pattern));

   uint32_t pattern_flags = StriContainerRegexPattern::getRegexFlags(opts_regex);

   STRI__ERROR_HANDLER_BEGIN(2)
   StriContainerUTF16 str_cont(str, vectorize_length);
//   StriContainerUTF8 str_cont(str, vectorize_length); // utext_openUTF8, see below
   StriContainerRegexPattern pattern_cont(pattern, vectorize_length, pattern_flags);

   SEXP ret;
   STRI__PROTECT(ret = Rf_allocVector(LGLSXP, vectorize_length));
   int* ret_tab = LOGICAL(ret);

   for (R_len_t i = pattern_cont.vectorize_init();
         i != pattern_cont.vectorize_end();
         i = pattern_cont.vectorize_next(i))
   {
      if (max_count_1 == 0) {
          ret_tab[i] = NA_LOGICAL;
          continue;
      }

      STRI__CONTINUE_ON_EMPTY_OR_NA_PATTERN(str_cont,
         pattern_cont, ret_tab[i] = NA_LOGICAL)

      RegexMatcher *matcher = pattern_cont.getMatcher(i); // will be deleted automatically
      matcher->reset(str_cont.get(i));
      ret_tab[i] = (int)matcher->find(); // returns UBool
      if (negate_1) ret_tab[i] = !ret_tab[i];
      if (max_count_1 > 0 && ret_tab[i]) --max_count_1;

//      // mbmark-regex-detect1.R: UTF16 0.07171792 s; UText 0.10531605 s
//      UText* str_text = NULL;
//      UErrorCode status = U_ZERO_ERROR;
//      RegexMatcher *matcher = pattern_cont.getMatcher(i); // will be deleted automatically
//      str_text = utext_openUTF8(str_text, str_cont.get(i).c_str(), str_cont.get(i).length(), &status);
//      STRI__CHECKICUSTATUS_THROW(status, {/* do nothing special on err */})
//      matcher->reset(str_text);
//      ret_tab[i] = (int)matcher->find(); // returns UBool
//      utext_close(str_text);
   }

   STRI__UNPROTECT_ALL
   return ret;
   STRI__ERROR_HANDLER_END(;/* nothing special to be done on error */)
}

コード例 #5

0

ファイルを表示

ファイル: repattrn.cpp プロジェクト: Acorld/WinObjC-Heading

//
//   matches, UText mode
//
UBool U_EXPORT2 RegexPattern::matches(UText                *regex,
                    UText           *input,
                    UParseError     &pe,
                    UErrorCode      &status) {

    if (U_FAILURE(status)) {return FALSE;}

    UBool         retVal  = FALSE;
    RegexPattern *pat     = NULL;
    RegexMatcher *matcher = NULL;

    pat     = RegexPattern::compile(regex, 0, pe, status);
    matcher = pat->matcher(status);
    if (U_SUCCESS(status)) {
        matcher->reset(input);
        retVal  = matcher->matches(status);
    }

    delete matcher;
    delete pat;
    return retVal;
}

コード例 #6

0

ファイルを表示

ファイル: dcfmtest.cpp プロジェクト: ACSOP/android_external_icu4c

void DecimalFormatTest::DataDrivenTests() {
    char tdd[2048];
    const char *srcPath;
    UErrorCode  status  = U_ZERO_ERROR;
    int32_t     lineNum = 0;

    //
    //  Open and read the test data file.
    //
    srcPath=getPath(tdd, "dcfmtest.txt");
    if(srcPath==NULL) {
        return; /* something went wrong, error already output */
    }

    int32_t    len;
    UChar *testData = ReadAndConvertFile(srcPath, len, status);
    if (U_FAILURE(status)) {
        return; /* something went wrong, error already output */
    }

    //
    //  Put the test data into a UnicodeString
    //
    UnicodeString testString(FALSE, testData, len);

    RegexMatcher    parseLineMat(UnicodeString(
            "(?i)\\s*parse\\s+"
            "\"([^\"]*)\"\\s+"           // Capture group 1: input text
            "([ild])\\s+"                // Capture group 2: expected parsed type
            "\"([^\"]*)\"\\s+"           // Capture group 3: expected parsed decimal
            "\\s*(?:#.*)?"),             // Trailing comment
         0, status);

    RegexMatcher    formatLineMat(UnicodeString(
            "(?i)\\s*format\\s+"
            "(\\S+)\\s+"                 // Capture group 1: pattern
            "(ceiling|floor|down|up|halfeven|halfdown|halfup|default)\\s+"  // Capture group 2: Rounding Mode
            "\"([^\"]*)\"\\s+"           // Capture group 3: input
            "\"([^\"]*)\""           // Capture group 4: expected output
            "\\s*(?:#.*)?"),             // Trailing comment
         0, status);

    RegexMatcher    commentMat    (UNICODE_STRING_SIMPLE("\\s*(#.*)?$"), 0, status);
    RegexMatcher    lineMat(UNICODE_STRING_SIMPLE("(?m)^(.*?)$"), testString, 0, status);

    if (U_FAILURE(status)){
        dataerrln("Construct RegexMatcher() error.");
        delete [] testData;
        return;
    }

    //
    //  Loop over the test data file, once per line.
    //
    while (lineMat.find()) {
        lineNum++;
        if (U_FAILURE(status)) {
            errln("File dcfmtest.txt, line %d: ICU Error \"%s\"", lineNum, u_errorName(status));
        }

        status = U_ZERO_ERROR;
        UnicodeString testLine = lineMat.group(1, status);
        // printf("%s\n", UnicodeStringPiece(testLine).data());
        if (testLine.length() == 0) {
            continue;
        }

        //
        // Parse the test line.  Skip blank and comment only lines.
        // Separate out the three main fields - pattern, flags, target.
        //

        commentMat.reset(testLine);
        if (commentMat.lookingAt(status)) {
            // This line is a comment, or blank.
            continue;
        }


        //
        //  Handle "parse" test case line from file
        //
        parseLineMat.reset(testLine);
        if (parseLineMat.lookingAt(status)) {
            execParseTest(lineNum,
                          parseLineMat.group(1, status),    // input
                          parseLineMat.group(2, status),    // Expected Type
                          parseLineMat.group(3, status),    // Expected Decimal String
                          status
                          );
            continue;
        }

        //
        //  Handle "format" test case line
        //
        formatLineMat.reset(testLine);
        if (formatLineMat.lookingAt(status)) {
            execFormatTest(lineNum,
                           formatLineMat.group(1, status),    // Pattern
                           formatLineMat.group(2, status),    // rounding mode
                           formatLineMat.group(3, status),    // input decimal number
                           formatLineMat.group(4, status),    // expected formatted result
                           status);
            continue;
        }

        //
        //  Line is not a recognizable test case.
        //
        errln("Badly formed test case at line %d.\n%s\n", 
             lineNum, UnicodeStringPiece(testLine).data());

    }

    delete [] testData;
}

コード例 #7

0

ファイルを表示

ファイル: ugrep.cpp プロジェクト: icu-project/icu4c

//------------------------------------------------------------------------------------------
//
//   main          for ugrep
//
//           Structurally, all use of the ICU Regular Expression API is in main(),
//           and all of the supporting stuff necessary to make a running program, but
//           not directly related to regular expressions, is factored out into these other
//           functions.
//
//------------------------------------------------------------------------------------------
int main(int argc, const char** argv) {
    UBool     matchFound = FALSE;

    //
    //  Process the commmand line options.
    //
    processOptions(argc, argv);

    //
    // Create a RegexPattern object from the user supplied pattern string.
    //
    UErrorCode status = U_ZERO_ERROR;   // All ICU operations report success or failure
                                        //   in a status variable.

    UParseError    parseErr;            // In the event of a syntax error in the regex pattern,
                                        //   this struct will contain the position of the
                                        //   error.

    RegexPattern  *rePat = RegexPattern::compile(pattern, parseErr, status);
                                        // Note that C++ is doing an automatic conversion
                                        //  of the (char *) pattern to a temporary
                                        //  UnicodeString object.
    if (U_FAILURE(status)) {
        fprintf(stderr, "ugrep:  error in pattern: \"%s\" at position %d\n",
            u_errorName(status), parseErr.offset);
        exit(-1);
    }

    //
    // Create a RegexMatcher from the newly created pattern.
    //
    UnicodeString empty;
    RegexMatcher *matcher = rePat->matcher(empty, status);
    if (U_FAILURE(status)) {
        fprintf(stderr, "ugrep:  error in creating RegexMatcher: \"%s\"\n",
            u_errorName(status));
        exit(-1);
    }

    //
    // Loop, processing each of the input files.
    //
    for (int fileNum=firstFileNum; fileNum < argc; fileNum++) {
        readFile(argv[fileNum]);

        //
        //  Loop through the lines of a file, trying to match the regex pattern on each.
        //
        for (nextLine(0); lineStart<fileLen; nextLine(lineEnd)) {
            UnicodeString s(FALSE, ucharBuf+lineStart, lineEnd-lineStart);
            matcher->reset(s);
            if (matcher->find()) {
                matchFound = TRUE;
                printMatch();
            }
        }
    }

    //
    //  Clean up
    //
    delete matcher;
    delete rePat;
    free(ucharBuf);
    free(charBuf);
    ucnv_close(outConverter);
    
    u_cleanup();       // shut down ICU, release any cached data it owns.

    return matchFound? 0: 1;
}