U_CAPI void U_EXPORT2 RegexPatternDump(const RegexPattern *This) { int index; int i; REGEX_DUMP_DEBUG_PRINTF(("Original Pattern: ")); UChar32 c = utext_next32From(This->fPattern, 0); while (c != U_SENTINEL) { if (c<32 || c>256) { c = '.'; } REGEX_DUMP_DEBUG_PRINTF(("%c", c)); c = UTEXT_NEXT32(This->fPattern); } REGEX_DUMP_DEBUG_PRINTF(("\n")); REGEX_DUMP_DEBUG_PRINTF((" Min Match Length: %d\n", This->fMinMatchLen)); REGEX_DUMP_DEBUG_PRINTF((" Match Start Type: %s\n", START_OF_MATCH_STR(This->fStartType))); if (This->fStartType == START_STRING) { REGEX_DUMP_DEBUG_PRINTF((" Initial match string: \"")); for (i=This->fInitialStringIdx; i<This->fInitialStringIdx+This->fInitialStringLen; i++) { REGEX_DUMP_DEBUG_PRINTF(("%c", This->fLiteralText[i])); // TODO: non-printables, surrogates. } REGEX_DUMP_DEBUG_PRINTF(("\"\n")); } else if (This->fStartType == START_SET) { int32_t numSetChars = This->fInitialChars->size(); if (numSetChars > 20) { numSetChars = 20; } REGEX_DUMP_DEBUG_PRINTF((" Match First Chars : ")); for (i=0; i<numSetChars; i++) { UChar32 c = This->fInitialChars->charAt(i); if (0x20<c && c <0x7e) { REGEX_DUMP_DEBUG_PRINTF(("%c ", c)); } else { REGEX_DUMP_DEBUG_PRINTF(("%#x ", c)); } } if (numSetChars < This->fInitialChars->size()) { REGEX_DUMP_DEBUG_PRINTF((" ...")); } REGEX_DUMP_DEBUG_PRINTF(("\n")); } else if (This->fStartType == START_CHAR) { REGEX_DUMP_DEBUG_PRINTF((" First char of Match : ")); if (0x20 < This->fInitialChar && This->fInitialChar<0x7e) { REGEX_DUMP_DEBUG_PRINTF(("%c\n", This->fInitialChar)); } else { REGEX_DUMP_DEBUG_PRINTF(("%#x\n", This->fInitialChar)); } } REGEX_DUMP_DEBUG_PRINTF(("\nIndex Binary Type Operand\n" \ "-------------------------------------------\n")); for (index = 0; index<This->fCompiledPat->size(); index++) { This->dumpOp(index); } REGEX_DUMP_DEBUG_PRINTF(("\n\n")); }
void RegexPattern::dumpPattern() const { #if defined(REGEX_DEBUG) int index; UnicodeString patStr; for (UChar32 c = utext_next32From(fPattern, 0); c != U_SENTINEL; c = utext_next32(fPattern)) { patStr.append(c); } printf("Original Pattern: \"%s\"\n", CStr(patStr)()); printf(" Min Match Length: %d\n", fMinMatchLen); printf(" Match Start Type: %s\n", START_OF_MATCH_STR(fStartType)); if (fStartType == START_STRING) { UnicodeString initialString(fLiteralText,fInitialStringIdx, fInitialStringLen); printf(" Initial match string: \"%s\"\n", CStr(initialString)()); } else if (fStartType == START_SET) { UnicodeString s; fInitialChars->toPattern(s, TRUE); printf(" Match First Chars: %s\n", CStr(s)()); } else if (fStartType == START_CHAR) { printf(" First char of Match: "); if (fInitialChar > 0x20) { printf("'%s'\n", CStr(UnicodeString(fInitialChar))()); } else { printf("%#x\n", fInitialChar); } } printf("Named Capture Groups:\n"); if (uhash_count(fNamedCaptureMap) == 0) { printf(" None\n"); } else { int32_t pos = UHASH_FIRST; const UHashElement *el = NULL; while ((el = uhash_nextElement(fNamedCaptureMap, &pos))) { const UnicodeString *name = (const UnicodeString *)el->key.pointer; int32_t number = el->value.integer; printf(" %d\t%s\n", number, CStr(*name)()); } } printf("\nIndex Binary Type Operand\n" \ "-------------------------------------------\n"); for (index = 0; index<fCompiledPat->size(); index++) { dumpOp(index); } printf("\n\n"); #endif }
void RegexPattern::dumpPattern() const { #if defined(REGEX_DEBUG) // TODO: This function assumes an ASCII based charset. int index; int i; printf("Original Pattern: "); UChar32 c = utext_next32From(fPattern, 0); while (c != U_SENTINEL) { if (c<32 || c>256) { c = '.'; } printf("%c", c); c = UTEXT_NEXT32(fPattern); } printf("\n"); printf(" Min Match Length: %d\n", fMinMatchLen); printf(" Match Start Type: %s\n", START_OF_MATCH_STR(fStartType)); if (fStartType == START_STRING) { printf(" Initial match string: \""); for (i=fInitialStringIdx; i<fInitialStringIdx+fInitialStringLen; i++) { printf("%c", fLiteralText[i]); // TODO: non-printables, surrogates. } printf("\"\n"); } else if (fStartType == START_SET) { int32_t numSetChars = fInitialChars->size(); if (numSetChars > 20) { numSetChars = 20; } printf(" Match First Chars : "); for (i=0; i<numSetChars; i++) { UChar32 c = fInitialChars->charAt(i); if (0x20<c && c <0x7e) { printf("%c ", c); } else { printf("%#x ", c); } } if (numSetChars < fInitialChars->size()) { printf(" ..."); } printf("\n"); } else if (fStartType == START_CHAR) { printf(" First char of Match : "); if (0x20 < fInitialChar && fInitialChar<0x7e) { printf("%c\n", fInitialChar); } else { printf("%#x\n", fInitialChar); } } printf("Named Capture Groups:\n"); if (uhash_count(fNamedCaptureMap) == 0) { printf(" None\n"); } else { int32_t pos = UHASH_FIRST; const UHashElement *el = NULL; while ((el = uhash_nextElement(fNamedCaptureMap, &pos))) { const UnicodeString *name = (const UnicodeString *)el->key.pointer; char s[100]; name->extract(0, 99, s, sizeof(s), US_INV); // capture group names are invariant. int32_t number = el->value.integer; printf(" %d\t%s\n", number, s); } } printf("\nIndex Binary Type Operand\n" \ "-------------------------------------------\n"); for (index = 0; index<fCompiledPat->size(); index++) { dumpOp(index); } printf("\n\n"); #endif }
static void TestAPI(void) { UErrorCode status = U_ZERO_ERROR; UBool gFailed = FALSE; (void)gFailed; /* Suppress set but not used warning. */ /* Open */ { UText utLoc = UTEXT_INITIALIZER; const char * cString = "\x61\x62\x63\x64"; UChar uString[] = {0x41, 0x42, 0x43, 0}; UText *uta; UText *utb; UChar c; uta = utext_openUChars(NULL, uString, -1, &status); TEST_SUCCESS(status); c = utext_next32(uta); TEST_ASSERT(c == 0x41); utb = utext_close(uta); TEST_ASSERT(utb == NULL); uta = utext_openUTF8(&utLoc, cString, -1, &status); TEST_SUCCESS(status); TEST_ASSERT(uta == &utLoc); uta = utext_close(&utLoc); TEST_ASSERT(uta == &utLoc); } /* utext_clone() */ { UChar uString[] = {0x41, 0x42, 0x43, 0}; int64_t len; UText *uta; UText *utb; status = U_ZERO_ERROR; uta = utext_openUChars(NULL, uString, -1, &status); TEST_SUCCESS(status); utb = utext_clone(NULL, uta, FALSE, FALSE, &status); TEST_SUCCESS(status); TEST_ASSERT(utb != NULL); TEST_ASSERT(utb != uta); len = utext_nativeLength(uta); TEST_ASSERT(len == u_strlen(uString)); utext_close(uta); utext_close(utb); } /* basic access functions */ { UChar uString[] = {0x41, 0x42, 0x43, 0}; UText *uta; UChar32 c; int64_t len; UBool b; int64_t i; status = U_ZERO_ERROR; uta = utext_openUChars(NULL, uString, -1, &status); TEST_ASSERT(uta!=NULL); TEST_SUCCESS(status); b = utext_isLengthExpensive(uta); TEST_ASSERT(b==TRUE); len = utext_nativeLength(uta); TEST_ASSERT(len == u_strlen(uString)); b = utext_isLengthExpensive(uta); TEST_ASSERT(b==FALSE); c = utext_char32At(uta, 0); TEST_ASSERT(c==uString[0]); c = utext_current32(uta); TEST_ASSERT(c==uString[0]); c = utext_next32(uta); TEST_ASSERT(c==uString[0]); c = utext_current32(uta); TEST_ASSERT(c==uString[1]); c = utext_previous32(uta); TEST_ASSERT(c==uString[0]); c = utext_current32(uta); TEST_ASSERT(c==uString[0]); c = utext_next32From(uta, 1); TEST_ASSERT(c==uString[1]); c = utext_next32From(uta, u_strlen(uString)); TEST_ASSERT(c==U_SENTINEL); c = utext_previous32From(uta, 2); TEST_ASSERT(c==uString[1]); i = utext_getNativeIndex(uta); TEST_ASSERT(i == 1); utext_setNativeIndex(uta, 0); b = utext_moveIndex32(uta, 1); TEST_ASSERT(b==TRUE); i = utext_getNativeIndex(uta); TEST_ASSERT(i==1); b = utext_moveIndex32(uta, u_strlen(uString)-1); TEST_ASSERT(b==TRUE); i = utext_getNativeIndex(uta); TEST_ASSERT(i==u_strlen(uString)); b = utext_moveIndex32(uta, 1); TEST_ASSERT(b==FALSE); i = utext_getNativeIndex(uta); TEST_ASSERT(i==u_strlen(uString)); utext_setNativeIndex(uta, 0); c = UTEXT_NEXT32(uta); TEST_ASSERT(c==uString[0]); c = utext_current32(uta); TEST_ASSERT(c==uString[1]); c = UTEXT_PREVIOUS32(uta); TEST_ASSERT(c==uString[0]); c = UTEXT_PREVIOUS32(uta); TEST_ASSERT(c==U_SENTINEL); utext_close(uta); } { /* * UText opened on a NULL string with zero length */ UText *uta; UChar32 c; status = U_ZERO_ERROR; uta = utext_openUChars(NULL, NULL, 0, &status); TEST_SUCCESS(status); c = UTEXT_NEXT32(uta); TEST_ASSERT(c == U_SENTINEL); utext_close(uta); uta = utext_openUTF8(NULL, NULL, 0, &status); TEST_SUCCESS(status); c = UTEXT_NEXT32(uta); TEST_ASSERT(c == U_SENTINEL); utext_close(uta); } { /* * extract */ UText *uta; UChar uString[] = {0x41, 0x42, 0x43, 0}; UChar buf[100]; int32_t i; /* Test pinning of input bounds */ UChar uString2[] = {0x41, 0x42, 0x43, 0x44, 0x45, 0x46, 0x47, 0x48, 0x49, 0x4A, 0}; UChar * uString2Ptr = uString2 + 5; status = U_ZERO_ERROR; uta = utext_openUChars(NULL, uString, -1, &status); TEST_SUCCESS(status); status = U_ZERO_ERROR; i = utext_extract(uta, 0, 100, NULL, 0, &status); TEST_ASSERT(status==U_BUFFER_OVERFLOW_ERROR); TEST_ASSERT(i == u_strlen(uString)); status = U_ZERO_ERROR; memset(buf, 0, sizeof(buf)); i = utext_extract(uta, 0, 100, buf, 100, &status); TEST_SUCCESS(status); TEST_ASSERT(i == u_strlen(uString)); i = u_strcmp(uString, buf); TEST_ASSERT(i == 0); utext_close(uta); /* Test pinning of input bounds */ status = U_ZERO_ERROR; uta = utext_openUChars(NULL, uString2Ptr, -1, &status); TEST_SUCCESS(status); status = U_ZERO_ERROR; memset(buf, 0, sizeof(buf)); i = utext_extract(uta, -3, 20, buf, 100, &status); TEST_SUCCESS(status); TEST_ASSERT(i == u_strlen(uString2Ptr)); i = u_strcmp(uString2Ptr, buf); TEST_ASSERT(i == 0); utext_close(uta); } { /* * Copy, Replace, isWritable * Can't create an editable UText from plain C, so all we * can easily do is check that errors returned. */ UText *uta; UChar uString[] = {0x41, 0x42, 0x43, 0}; UBool b; status = U_ZERO_ERROR; uta = utext_openUChars(NULL, uString, -1, &status); TEST_SUCCESS(status); b = utext_isWritable(uta); TEST_ASSERT(b == FALSE); b = utext_hasMetaData(uta); TEST_ASSERT(b == FALSE); utext_replace(uta, 0, 1, /* start, limit */ uString, -1, /* replacement, replacement length */ &status); TEST_ASSERT(status == U_NO_WRITE_PERMISSION); utext_copy(uta, 0, 1, /* start, limit */ 2, /* destination index */ FALSE, /* move flag */ &status); TEST_ASSERT(status == U_NO_WRITE_PERMISSION); utext_close(uta); } }