void test_kString(KonohaContext *kctx) { intptr_t i; kString *s; for (i = 0; i < 100; ++i) { s = KLIB new_kString(kctx, GcUnsafe, "abcd", 4, 0); assert(strcmp(S_text(s), "abcd") == 0); assert(S_size(s) == 4); assert(kString_is(ASCII, s) == 1); } for (i = 0; i < 100; ++i) { s = KLIB new_kString(kctx, GcUnsafe, "abcd", 4, 0); assert(strcmp(S_text(s), "abcd") == 0); assert(S_size(s) == 4); assert(S_text(s) == (char*)s->inline_text); } for (i = 0; i < 100; ++i) { static const char *text = "12345678901234567890"; s = KLIB new_kString(kctx, GcUnsafe, text, 20, StringPolicy_TEXT | StringPolicy_UTF8); assert(strcmp(S_text(s), text) == 0); assert(S_size(s) == 20); assert(S_text(s) == text); assert(kString_is(ASCII, s) == 0); } }
static void kArray_executeRegExp(KonohaContext *kctx, kArray *resultArray, kRegExp *regex, kString *s0) { int stringPolicy = kString_is(ASCII, s0) ? StringPolicy_ASCII : 0; if(IS_NOTNULL(regex) && S_size(regex->pattern) > 0) { const char *s = S_text(s0); // necessary const char *base = s; const char *eos = base + S_size(s0); size_t i, nmatch = pcre_nmatchsize(kctx, regex->reg); kregmatch_t *p, pmatch[nmatch+1]; int isGlobalOption = RegExp_isGlobal(regex); do { int res = pcre_regexec(kctx, regex->reg, s, nmatch, pmatch, regex->eflags); if(res != 0) { // FIXME //LOG_regex(kctx, sfp, res, regex, s); break; } for(p = pmatch, i = 0; i < nmatch; p++, i++) { if(p->rm_so == -1) break; KLIB new_kString(kctx, resultArray, s + (p->rm_so), ((p->rm_eo) - (p->rm_so)), stringPolicy); } if(isGlobalOption) { size_t eo = pmatch[0].rm_eo; // shift matched pattern s += (eo > 0) ? eo : 1; if(!(s < eos)) isGlobalOption = 0; // stop iteration } } while(isGlobalOption); } }
static void kArray_split(KonohaContext *kctx, kArray *resultArray, kString *str, kRegExp *regex, size_t limit) { int stringPolicy = kString_is(ASCII, str) ? StringPolicy_ASCII : 0; if(IS_NOTNULL(regex) && S_size(regex->pattern) > 0) { const char *s = S_text(str); // necessary const char *eos = s + S_size(str); kregmatch_t pmatch[2]; int res = 0; while(s < eos && res == 0) { res = pcre_regexec(kctx, regex->reg, s, 1, pmatch, regex->eflags); if(res != 0) break; size_t len = pmatch[0].rm_eo; if(len > 0) { KLIB new_kString(kctx, resultArray, s, pmatch[0].rm_so, stringPolicy); s += len; } if(!(kArray_size(resultArray) + 1 < limit)) { return; } } if(s < eos) { KLIB new_kString(kctx, resultArray, s, eos - s, stringPolicy); // append remaining string to array } } else { const unsigned char *s = (const unsigned char *)S_text(str); size_t i, n = S_size(str); if(kString_is(ASCII, str)) { for(i = 0; i < n; i++) { KLIB new_kString(kctx, resultArray, (const char *)s + i, 1, StringPolicy_ASCII); } } else { for(i = 0; i < n; i++) { int len = utf8len(s[i]); KLIB new_kString(kctx, resultArray, (const char *)s + i, len, len == 1 ? StringPolicy_ASCII: StringPolicy_UTF8); i += len; } } } }
static void RegExp_set(KonohaContext *kctx, kRegExp *re, kString *ptns, kString *opts) { const char *ptn = S_text(ptns); const char *opt = S_text(opts); kRegExp_setOptions(re, opt); KFieldSet(re, re->pattern, ptns); re->reg = pcre_regmalloc(kctx, ptns); int cflags = pcre_ParseComplflags(kctx, opt); if(!kString_is(ASCII, ptns)) { /* Add 'u' option when the pattern is multibyte string. */ cflags |= PCRE_UTF8; } pcre_regcomp(kctx, re->reg, ptn, cflags); re->eflags = pcre_ParseExecflags(kctx, opt); }
static KMETHOD String_search(KonohaContext *kctx, KonohaStack *sfp) { kRegExp *re = sfp[1].asRegExp; intptr_t loc = -1; if(!IS_NULL(re) && S_size(re->pattern) > 0) { kregmatch_t pmatch[2]; // modified by @utrhira const char *str = S_text(sfp[0].asString); // necessary int res = pcre_regexec(kctx, re->reg, str, 1, pmatch, re->eflags); if(res == 0) { loc = pmatch[0].rm_so; if(loc != -1 && !kString_is(ASCII, sfp[0].asString)) { loc = utf8_strlen(str, loc); } } else { //TODO //LOG_regex(kctx, sfp, res, re, str); } } KReturnUnboxValue(loc); }