static ejsval Encode (ejsval string, ejsval unescaped) { EJSPrimString *stringObj = EJSVAL_TO_STRING(string); jschar *unescapedStr = EJSVAL_TO_FLAT_STRING(unescaped); /* 1. Let strLen be the number of code units in string. */ int32_t strLen = stringObj->length; /* 2. Let R be the empty String. */ ejsval R = _ejs_atom_empty; /* 3. Let k be 0. */ int32_t k = 0; /* 4. Repeat. */ for (;;) { /* a. If k equals strLen, return R. */ if (k == strLen) return R; /* b. Let C be the code unit at index k within string. */ jschar C = _ejs_string_ucs2_at (stringObj, k); jschar C_arr [2] = { C, 0 }; /* c. If C is in unescapedSet, then */ jschar *p = ucs2_strstr (unescapedStr, C_arr); if (p) { /* i. Let S be a String containing only the code unit C. */ ejsval S = _ejs_string_new_substring (string, k, 1); /* ii. Let R be a new String value computed by concatenating the previous value of R and S. */ R = _ejs_string_concat (R, S); } /* d. Else C is not in unescapedSet, */ else { /* i. If the code unit value of C is not less than 0xDC00 and not greater than 0xDFFF, throw a URIError exception. */ if (C >= 0xDC00 && C <= 0xDFFF) _ejs_throw_nativeerror_utf8 (EJS_URI_ERROR, "URI malformed"); /* ii. If the code unit value of C is less than 0xD800 or greater than 0xDBFF, then Let V be the code unit value of C. */ jschar V; if (C < 0xD800 || C > 0xDBFF) V = C; /* iii. Else, */ else { /* 1. Increase k by 1. */ k++; /* 2. If k equals strLen, throw a URIError exception. */ if (k == strLen) _ejs_throw_nativeerror_utf8 (EJS_URI_ERROR, "URI malformed"); /* 3. Let kChar be the code unit value of the code unit at index k within string. */ jschar kChar = _ejs_string_ucs2_at (stringObj, k); /* 4. If kChar is less than 0xDC00 or greater than 0xDFFF, throw a URIError exception. */ if (kChar < 0xDC00 || kChar > 0xDFFF) _ejs_throw_nativeerror_utf8 (EJS_URI_ERROR, "URI malformed"); /* 5. Let V be (((the code unit value of C) – 0xD800) × 0x400 + (kChar – 0xDC00) + 0x10000). */ V = (C - 0xD800) * 0x400 + (kChar - 0xDC00) + 0x10000; } /* iv. Let Octets be the array of octets resulting by applying the UTF-8 transformation to V, and let L be the array size. */ char octets[4]; int32_t L = ucs2_to_utf8_char (V, octets); /* v. Let j be 0. */ int32_t j = 0; /* vi. Repeat, while j < L. */ while (j < L) { /* 1. Let jOctet be the value at index j within Octets. */ char jOctet = octets [j]; /* 2. Let S be a String containing three code units “%XY” where XY are two uppercase hexadecimal * digits encoding the value of jOctet. */ char buff[4]; sprintf(buff, "%%%X", jOctet); ejsval S = _ejs_string_new_utf8 (buff); /* 3. Let R be a new String value computed by concatenating the previous value of R and S. */ R = _ejs_string_concat (R, S); /* 4. Increase j by 1. */ j++; } } /* e. Increase k by 1. */ k++; } }
ejsval _ejs_regexp_replace(ejsval str, ejsval search_re, ejsval replace) { EJSRegExp* re = (EJSRegExp*)EJSVAL_TO_OBJECT(search_re); pcre16_extra extra; memset (&extra, 0, sizeof(extra)); pcre16* code = (pcre16*)re->compiled_pattern; int capture_count; pcre16_fullinfo (code, NULL, PCRE_INFO_CAPTURECOUNT, &capture_count); int ovec_count = 3 * (1 + capture_count); int* ovec = malloc(sizeof(int) * ovec_count); int cur_off = 0; do { EJSPrimString *flat_str = _ejs_string_flatten (str); jschar *chars_str = flat_str->data.flat; int rv = pcre16_exec(code, &extra, chars_str, flat_str->length, cur_off, PCRE_NO_UTF16_CHECK, ovec, ovec_count); if (rv < 0) break; ejsval replaceval; if (EJSVAL_IS_FUNCTION(replace)) { ejsval substr_match = _ejs_string_new_substring (str, ovec[0], ovec[1] - ovec[0]); ejsval capture = _ejs_string_new_substring (str, ovec[2], ovec[3] - ovec[2]); _ejs_log ("substring match is %s\n", ucs2_to_utf8(_ejs_string_flatten(substr_match)->data.flat)); _ejs_log ("capture is %s\n", ucs2_to_utf8(_ejs_string_flatten(capture)->data.flat)); int argc = 3; ejsval args[3]; args[0] = substr_match; args[1] = capture; args[2] = _ejs_undefined; replaceval = ToString(_ejs_invoke_closure (replace, _ejs_undefined, argc, args)); } else { replaceval = ToString(replace); } if (ovec[0] == 0) { // we matched from the beginning of the string, so nothing from there to prepend str = _ejs_string_concat (replaceval, _ejs_string_new_substring (str, ovec[1], flat_str->length - ovec[1])); } else { str = _ejs_string_concatv (_ejs_string_new_substring (str, 0, ovec[0]), replaceval, _ejs_string_new_substring (str, ovec[1], flat_str->length - ovec[1]), _ejs_null); } cur_off = ovec[1]; // if the RegExp object was created without a 'g' flag, only replace the first match if (!re->global) break; } while (EJS_TRUE); free (ovec); return str; }