Datum show_trgm(PG_FUNCTION_ARGS) { text *in = PG_GETARG_TEXT_P(0); TRGM *trg; Datum *d; ArrayType *a; trgm *ptr; int i; trg = generate_trgm(VARDATA(in), VARSIZE(in) - VARHDRSZ); d = (Datum *) palloc(sizeof(Datum) * (1 + ARRNELEM(trg))); for (i = 0, ptr = GETARR(trg); i < ARRNELEM(trg); i++, ptr++) { text *item = (text *) palloc(VARHDRSZ + Max(12, pg_database_encoding_max_length() * 3)); if (pg_database_encoding_max_length() > 1 && !ISPRINTABLETRGM(ptr)) { snprintf(VARDATA(item), 12, "0x%06x", trgm2int(ptr)); SET_VARSIZE(item, VARHDRSZ + strlen(VARDATA(item))); } else { SET_VARSIZE(item, VARHDRSZ + 3); CPTRGM(VARDATA(item), ptr); } d[i] = PointerGetDatum(item); } a = construct_array( d, ARRNELEM(trg), TEXTOID, -1, false, 'i' ); for (i = 0; i < ARRNELEM(trg); i++) pfree(DatumGetPointer(d[i])); pfree(d); pfree(trg); PG_FREE_IF_COPY(in, 0); PG_RETURN_POINTER(a); }
Datum gbt_bpchar_consistent(PG_FUNCTION_ARGS) { GISTENTRY *entry = (GISTENTRY *) PG_GETARG_POINTER(0); void *query = (void *) DatumGetPointer(PG_DETOAST_DATUM(PG_GETARG_DATUM(1))); StrategyNumber strategy = (StrategyNumber) PG_GETARG_UINT16(2); /* Oid subtype = PG_GETARG_OID(3); */ bool *recheck = (bool *) PG_GETARG_POINTER(4); bool retval; GBT_VARKEY *key = (GBT_VARKEY *) DatumGetPointer(entry->key); GBT_VARKEY_R r = gbt_var_key_readable(key); void *trim = (void *) DatumGetPointer(DirectFunctionCall1(rtrim1, PointerGetDatum(query))); /* All cases served by this function are exact */ *recheck = false; if (tinfo.eml == 0) { tinfo.eml = pg_database_encoding_max_length(); } retval = gbt_var_consistent(&r, trim, &strategy, GIST_LEAF(entry), &tinfo); PG_RETURN_BOOL(retval); }
TParser * TParserInit(char *str, int len) { TParser *prs = (TParser *) palloc0(sizeof(TParser)); prs->charmaxlen = pg_database_encoding_max_length(); prs->str = str; prs->lenstr = len; #ifdef TS_USE_WIDE /* * Use wide char code only when max encoding length > 1. */ if (prs->charmaxlen > 1) { prs->usewide = true; prs->wstr = (wchar_t *) palloc(sizeof(wchar_t) * (prs->lenstr+1)); prs->lenwstr = char2wchar(prs->wstr, prs->str, prs->lenstr); } else #endif prs->usewide = false; prs->state = newTParserPosition(NULL); prs->state->state = TPS_Base; return prs; }
int ora_mb_strlen1(text *str) { int r_len; int c; char *p; r_len = VARSIZE_ANY_EXHDR(str); if (pg_database_encoding_max_length() == 1) return r_len; p = VARDATA_ANY(str); c = 0; while (r_len > 0) { int sz; sz = _pg_mblen(p); p += sz; r_len -= sz; c += 1; } return c; }
Datum gbt_bpchar_compress(PG_FUNCTION_ARGS) { GISTENTRY *entry = (GISTENTRY *) PG_GETARG_POINTER(0); GISTENTRY *retval; if (tinfo.eml == 0) { tinfo.eml = pg_database_encoding_max_length(); } if (entry->leafkey) { Datum d = DirectFunctionCall1(rtrim1, entry->key); GISTENTRY trim; gistentryinit(trim, d, entry->rel, entry->page, entry->offset, VARSIZE(DatumGetPointer(d)), TRUE); retval = gbt_var_compress(&trim, &tinfo); } else retval = entry; PG_RETURN_POINTER(retval); }
/* * regexp_fixed_prefix - extract fixed prefix, if any, for a regexp * * The result is NULL if there is no fixed prefix, else a palloc'd string. * If it is an exact match, not just a prefix, *exact is returned as TRUE. */ char * regexp_fixed_prefix(text *text_re, bool case_insensitive, Oid collation, bool *exact) { char *result; regex_t *re; int cflags; int re_result; pg_wchar *str; size_t slen; size_t maxlen; char errMsg[100]; *exact = false; /* default result */ /* Compile RE */ cflags = REG_ADVANCED; if (case_insensitive) cflags |= REG_ICASE; re = RE_compile_and_cache(text_re, cflags, collation); /* Examine it to see if there's a fixed prefix */ re_result = pg_regprefix(re, &str, &slen); switch (re_result) { case REG_NOMATCH: return NULL; case REG_PREFIX: /* continue with wchar conversion */ break; case REG_EXACT: *exact = true; /* continue with wchar conversion */ break; default: /* re failed??? */ CHECK_FOR_INTERRUPTS(); pg_regerror(re_result, re, errMsg, sizeof(errMsg)); ereport(ERROR, (errcode(ERRCODE_INVALID_REGULAR_EXPRESSION), errmsg("regular expression failed: %s", errMsg))); break; } /* Convert pg_wchar result back to database encoding */ maxlen = pg_database_encoding_max_length() * slen + 1; result = (char *) palloc(maxlen); slen = pg_wchar2mb_with_len(str, result, slen); Assert(slen < maxlen); free(str); return result; }
/* * Guard against possible overflow in the palloc requests below. (We * don't worry about the additive constants, since palloc can detect * requests that are a little above MaxAllocSize --- we just need to * prevent integer overflow in the multiplications.) */ static void protect_out_of_mem(int slen) { if ((Size) (slen / 2) >= (MaxAllocSize / (sizeof(trgm) * 3)) || (Size) slen >= (MaxAllocSize / pg_database_encoding_max_length())) ereport(ERROR, (errcode(ERRCODE_PROGRAM_LIMIT_EXCEEDED), errmsg("out of memory"))); }
/* * Make array of trigrams without sorting and removing duplicate items. * * trg: where to return the array of trigrams. * str: source string, of length slen bytes. * bounds: where to return bounds of trigrams (if needed). * * Returns length of the generated array. */ static int generate_trgm_only(trgm *trg, char *str, int slen, TrgmBound *bounds) { trgm *tptr; char *buf; int charlen, bytelen; char *bword, *eword; if (slen + LPADDING + RPADDING < 3 || slen == 0) return 0; tptr = trg; /* Allocate a buffer for case-folded, blank-padded words */ buf = (char *) palloc(slen * pg_database_encoding_max_length() + 4); if (LPADDING > 0) { *buf = ' '; if (LPADDING > 1) *(buf + 1) = ' '; } eword = str; while ((bword = find_word(eword, slen - (eword - str), &eword, &charlen)) != NULL) { #ifdef IGNORECASE bword = lowerstr_with_len(bword, eword - bword); bytelen = strlen(bword); #else bytelen = eword - bword; #endif memcpy(buf + LPADDING, bword, bytelen); #ifdef IGNORECASE pfree(bword); #endif buf[LPADDING + bytelen] = ' '; buf[LPADDING + bytelen + 1] = ' '; /* Calculate trigrams marking their bounds if needed */ if (bounds) bounds[tptr - trg] |= TRGM_BOUND_LEFT; tptr = make_trigrams(tptr, buf, bytelen + LPADDING + RPADDING, charlen + LPADDING + RPADDING); if (bounds) bounds[tptr - trg - 1] |= TRGM_BOUND_RIGHT; } pfree(buf); return tptr - trg; }
/* Generic for all cases not requiring inline case-folding */ static inline int GenericMatchText(char *s, int slen, char *p, int plen) { if (pg_database_encoding_max_length() == 1) return SB_MatchText(s, slen, p, plen, 0, true); else if (GetDatabaseEncoding() == PG_UTF8) return UTF8_MatchText(s, slen, p, plen, 0, true); else return MB_MatchText(s, slen, p, plen, 0, true); }
Datum gbt_text_compress(PG_FUNCTION_ARGS) { GISTENTRY *entry = (GISTENTRY *) PG_GETARG_POINTER(0); if (tinfo.eml == 0) { tinfo.eml = pg_database_encoding_max_length(); } PG_RETURN_POINTER(gbt_var_compress(entry, &tinfo)); }
/* * like_escape() --- given a pattern and an ESCAPE string, * convert the pattern to use Postgres' standard backslash escape convention. */ Datum like_escape(PG_FUNCTION_ARGS) { text *pat = PG_GETARG_TEXT_PP(0); text *esc = PG_GETARG_TEXT_PP(1); text *result; if (pg_database_encoding_max_length() == 1) result = SB_do_like_escape(pat, esc); else result = MB_do_like_escape(pat, esc); PG_RETURN_TEXT_P(result); }
Datum bpcharlen(PG_FUNCTION_ARGS) { BpChar *arg = PG_GETARG_BPCHAR_P(0); int len; /* get number of bytes, ignoring trailing spaces */ len = bcTruelen(arg); /* in multibyte encoding, convert to number of characters */ if (pg_database_encoding_max_length() != 1) len = pg_mbstrlen_with_len(VARDATA(arg), len); PG_RETURN_INT32(len); }
Datum orafce_bpcharlen(PG_FUNCTION_ARGS) { BpChar *arg = PG_GETARG_BPCHAR_PP(0); int len; /* byte-length of the argument (trailing spaces not ignored) */ len = VARSIZE_ANY_EXHDR(arg); /* in multibyte encoding, convert to number of characters */ if (pg_database_encoding_max_length() != 1) len = pg_mbstrlen_with_len(VARDATA_ANY(arg), len); PG_RETURN_INT32(len); }
/* * Initializes parser for the input string. If oprisdelim is set, the * following characters are treated as delimiters in addition to whitespace: * ! | & ( ) */ TSVectorParseState init_tsvector_parser(char *input, bool oprisdelim, bool is_tsquery) { TSVectorParseState state; state = (TSVectorParseState) palloc(sizeof(struct TSVectorParseStateData)); state->prsbuf = input; state->bufstart = input; state->len = 32; state->word = (char *) palloc(state->len); state->eml = pg_database_encoding_max_length(); state->oprisdelim = oprisdelim; state->is_tsquery = is_tsquery; return state; }
/* returns the length (counted in wchars) of a multibyte string */ int pg_mbstrlen(const char *mbstr) { int len = 0; /* optimization for single byte encoding */ if (pg_database_encoding_max_length() == 1) return strlen(mbstr); while (*mbstr) { mbstr += pg_mblen(mbstr); len++; } return len; }
int ora_instr(text *txt, text *pattern, int start, int nth) { int len_txt, len_pat; const char *str_txt, *str_pat; int beg, end, i, dx; if (nth <= 0) PARAMETER_ERROR("Four parameter isn't positive."); /* Forward for multibyte strings */ if (pg_database_encoding_max_length() > 1) return ora_instr_mb(txt, pattern, start, nth); str_txt = VARDATA_ANY(txt); len_txt = VARSIZE_ANY_EXHDR(txt); str_pat = VARDATA_ANY(pattern); len_pat = VARSIZE_ANY_EXHDR(pattern); if (start > 0) { dx = 1; beg = start - 1; end = len_txt - len_pat + 1; if (beg >= end) return 0; /* out of range */ } else { dx = -1; beg = Min(len_txt + start, len_txt - len_pat); end = -1; if (beg <= end) return 0; /* out of range */ } for (i = beg; i != end; i += dx) { if (memcmp(str_txt + i, str_pat, len_pat) == 0) { if (--nth == 0) return i + 1; } } return 0; }
Datum plvchr_is_kind_a (PG_FUNCTION_ARGS) { text *str = PG_GETARG_TEXT_PP(0); int32 k = PG_GETARG_INT32(1); char c; NON_EMPTY_CHECK(str); if (pg_database_encoding_max_length() > 1) { if (_pg_mblen(VARDATA_ANY(str)) > 1) PG_RETURN_INT32( (k == 5) ); } c = *VARDATA_ANY(str); PG_RETURN_INT32(is_kind(c,k)); }
Datum plvstr_is_prefix_text (PG_FUNCTION_ARGS) { text *str = PG_GETARG_TEXT_PP(0); text *prefix = PG_GETARG_TEXT_PP(1); bool case_sens = PG_GETARG_BOOL(2); bool mb_encode; int str_len = VARSIZE_ANY_EXHDR(str); int pref_len = VARSIZE_ANY_EXHDR(prefix); int i; char *ap, *bp; mb_encode = pg_database_encoding_max_length() > 1; if (mb_encode && !case_sens) { str = (text*)DatumGetPointer(DirectFunctionCall1(lower, PointerGetDatum(str))); prefix = (text*)DatumGetPointer(DirectFunctionCall1(lower, PointerGetDatum(prefix))); } ap = VARDATA_ANY(str); bp = VARDATA_ANY(prefix); for (i = 0; i < pref_len; i++) { if (i >= str_len) break; if (case_sens || mb_encode) { if (*ap++ != *bp++) break; } else if (!mb_encode) { if (pg_toupper((unsigned char) *ap++) != pg_toupper((unsigned char) *bp++)) break; } } PG_RETURN_BOOL(i == pref_len); }
Datum gbt_text_consistent(PG_FUNCTION_ARGS) { GISTENTRY *entry = (GISTENTRY *) PG_GETARG_POINTER(0); GBT_VARKEY *key = (GBT_VARKEY *) DatumGetPointer(entry->key); void *query = (void *) DatumGetTextP(PG_GETARG_DATUM(1)); StrategyNumber strategy = (StrategyNumber) PG_GETARG_UINT16(2); bool retval = FALSE; GBT_VARKEY_R r = gbt_var_key_readable(key); if (tinfo.eml == 0) { tinfo.eml = pg_database_encoding_max_length(); } retval = gbt_var_consistent(&r, query, &strategy, GIST_LEAF(entry), &tinfo); PG_RETURN_BOOL(retval); }
/* returns the length (counted in wchars) of a multibyte string * (not necessarily NULL terminated) */ int pg_mbstrlen_with_len(const char *mbstr, int limit) { int len = 0; /* optimization for single byte encoding */ if (pg_database_encoding_max_length() == 1) return limit; while (limit > 0 && *mbstr) { int l = pg_mblen(mbstr); limit -= l; mbstr += l; len++; } return len; }
/* * Adds trigrams from words (already padded). */ static trgm * make_trigrams(trgm *tptr, char *str, int bytelen, int charlen) { char *ptr = str; if (charlen < 3) return tptr; #ifdef USE_WIDE_UPPER_LOWER if (pg_database_encoding_max_length() > 1) { int lenfirst = pg_mblen(str), lenmiddle = pg_mblen(str + lenfirst), lenlast = pg_mblen(str + lenfirst + lenmiddle); while ((ptr - str) + lenfirst + lenmiddle + lenlast <= bytelen) { cnt_trigram(tptr, ptr, lenfirst + lenmiddle + lenlast); ptr += lenfirst; tptr++; lenfirst = lenmiddle; lenmiddle = lenlast; lenlast = pg_mblen(ptr + lenfirst + lenmiddle); } } else #endif { Assert(bytelen == charlen); while (ptr - str < bytelen - 2 /* number of trigrams = strlen - 2 */ ) { CPTRGM(tptr, ptr); ptr++; tptr++; } } return tptr; }
/* * returns the byte length of a multibyte string * (not necessarily NULL terminated) * that is no longer than limit. * this function does not break multibyte word boundary. */ int pg_mbcliplen(const unsigned char *mbstr, int len, int limit) { int clen = 0; int l; /* optimization for single byte encoding */ if (pg_database_encoding_max_length() == 1) return cliplen(mbstr, len, limit); while (len > 0 && *mbstr) { l = pg_mblen(mbstr); if ((clen + l) > limit) break; clen += l; if (clen == limit) break; len -= l; mbstr += l; } return (clen); }
/* * Similar to pg_mbcliplen except the limit parameter specifies the * character length, not the byte length. */ int pg_mbcharcliplen(const char *mbstr, int len, int limit) { int clen = 0; int nch = 0; int l; /* optimization for single byte encoding */ if (pg_database_encoding_max_length() == 1) return cliplen(mbstr, len, limit); while (len > 0 && *mbstr) { l = pg_mblen(mbstr); nch++; if (nch > limit) break; clen += l; len -= l; mbstr += l; } return clen; }
static inline int Generic_Text_IC_like(text *str, text *pat) { char *s, *p; int slen, plen; /* * For efficiency reasons, in the single byte case we don't call lower() * on the pattern and text, but instead call to_lower on each character. * In the multi-byte case we don't have much choice :-( */ if (pg_database_encoding_max_length() > 1) { /* lower's result is never packed, so OK to use old macros here */ pat = DatumGetTextP(DirectFunctionCall1(lower, PointerGetDatum(pat))); p = VARDATA(pat); plen = (VARSIZE(pat) - VARHDRSZ); str = DatumGetTextP(DirectFunctionCall1(lower, PointerGetDatum(str))); s = VARDATA(str); slen = (VARSIZE(str) - VARHDRSZ); if (GetDatabaseEncoding() == PG_UTF8) return UTF8_MatchText(s, slen, p, plen); else return MB_MatchText(s, slen, p, plen); } else { p = VARDATA_ANY(pat); plen = VARSIZE_ANY_EXHDR(pat); s = VARDATA_ANY(str); slen = VARSIZE_ANY_EXHDR(str); return SB_IMatchText(s, slen, p, plen); } }
/* * downcase_truncate_identifier() --- do appropriate downcasing and * truncation of an unquoted identifier. Optionally warn of truncation. * * Returns a palloc'd string containing the adjusted identifier. * * Note: in some usages the passed string is not null-terminated. * * Note: the API of this function is designed to allow for downcasing * transformations that increase the string length, but we don't yet * support that. If you want to implement it, you'll need to fix * SplitIdentifierString() in utils/adt/varlena.c. */ char * downcase_truncate_identifier(const char *ident, int len, bool warn) { char *result; int i; bool enc_is_single_byte; result = palloc(len + 1); enc_is_single_byte = pg_database_encoding_max_length() == 1; /* * SQL99 specifies Unicode-aware case normalization, which we don't yet * have the infrastructure for. Instead we use tolower() to provide a * locale-aware translation. However, there are some locales where this * is not right either (eg, Turkish may do strange things with 'i' and * 'I'). Our current compromise is to use tolower() for characters with * the high bit set, as long as they aren't part of a multi-byte * character, and use an ASCII-only downcasing for 7-bit characters. */ for (i = 0; i < len; i++) { unsigned char ch = (unsigned char) ident[i]; if (ch >= 'A' && ch <= 'Z') ch += 'a' - 'A'; else if (enc_is_single_byte && IS_HIGHBIT_SET(ch) && isupper(ch)) ch = tolower(ch); result[i] = (char) ch; } result[i] = '\0'; if (i >= NAMEDATALEN) truncate_identifier(result, i, warn); return result; }
Datum rpad(PG_FUNCTION_ARGS) { text *string1 = PG_GETARG_TEXT_PP(0); int32 len = PG_GETARG_INT32(1); text *string2 = PG_GETARG_TEXT_PP(2); text *ret; char *ptr1, *ptr2, *ptr2start, *ptr2end, *ptr_ret; int m, s1len, s2len; int bytelen; /* Negative len is silently taken as zero */ if (len < 0) len = 0; s1len = VARSIZE_ANY_EXHDR(string1); if (s1len < 0) s1len = 0; /* shouldn't happen */ s2len = VARSIZE_ANY_EXHDR(string2); if (s2len < 0) s2len = 0; /* shouldn't happen */ s1len = pg_mbstrlen_with_len(VARDATA_ANY(string1), s1len); if (s1len > len) s1len = len; /* truncate string1 to len chars */ if (s2len <= 0) len = s1len; /* nothing to pad with, so don't pad */ bytelen = pg_database_encoding_max_length() * len; /* Check for integer overflow */ if (len != 0 && bytelen / pg_database_encoding_max_length() != len) ereport(ERROR, (errcode(ERRCODE_PROGRAM_LIMIT_EXCEEDED), errmsg("requested length too large"))); ret = (text *) palloc(VARHDRSZ + bytelen); m = len - s1len; ptr1 = VARDATA_ANY(string1); ptr_ret = VARDATA(ret); while (s1len--) { int mlen = pg_mblen(ptr1); memcpy(ptr_ret, ptr1, mlen); ptr_ret += mlen; ptr1 += mlen; } ptr2 = ptr2start = VARDATA_ANY(string2); ptr2end = ptr2 + s2len; while (m--) { int mlen = pg_mblen(ptr2); memcpy(ptr_ret, ptr2, mlen); ptr_ret += mlen; ptr2 += mlen; if (ptr2 == ptr2end) /* wrap around at end of s2 */ ptr2 = ptr2start; } SET_VARSIZE(ret, ptr_ret - (char *) ret); PG_RETURN_TEXT_P(ret); }
Datum plvstr_rvrs(PG_FUNCTION_ARGS) { text *str; int start; int end; int len; int i; int new_len; text *result; char *data; char *sizes = NULL; int *positions = NULL; bool mb_encode; if (PG_ARGISNULL(0)) PG_RETURN_NULL(); str = PG_GETARG_TEXT_PP(0); mb_encode = pg_database_encoding_max_length() > 1; if (!mb_encode) len = VARSIZE_ANY_EXHDR(str); else len = ora_mb_strlen(str, &sizes, &positions); start = PG_ARGISNULL(1) ? 1 : PG_GETARG_INT32(1); end = PG_ARGISNULL(2) ? (start < 0 ? -len : len) : PG_GETARG_INT32(2); if ((start > end && start > 0) || (start < end && start < 0)) PARAMETER_ERROR("Second parameter is bigger than third."); if (start < 0) { int new_start, new_end; new_start = len + start + 1; new_end = len + end + 1; start = new_end; end = new_start; } start = start != 0 ? start : 1; end = end < len ? end : len; new_len = end - start + 1; new_len = new_len >= 0 ? new_len : 0; if (mb_encode) { int max_size; int cur_size; char *p; int j; int fz_size; fz_size = VARSIZE_ANY_EXHDR(str); if ((max_size = (new_len*pg_database_encoding_max_length())) > fz_size) result = palloc(fz_size + VARHDRSZ); else result = palloc(max_size + VARHDRSZ); data = (char*) VARDATA(result); cur_size = 0; p = VARDATA_ANY(str); for (i = end - 1; i>= start - 1; i--) { for (j=0; j<sizes[i]; j++) *data++ = *(p+positions[i]+j); cur_size += sizes[i]; } SET_VARSIZE(result, cur_size + VARHDRSZ); } else { char *p = VARDATA_ANY(str); result = palloc(new_len + VARHDRSZ); data = (char*) VARDATA(result); SET_VARSIZE(result, new_len + VARHDRSZ); for (i = end - 1; i >= start - 1; i--) *data++ = p[i]; } PG_RETURN_TEXT_P(result); }
Datum translate(PG_FUNCTION_ARGS) { text *string = PG_GETARG_TEXT_PP(0); text *from = PG_GETARG_TEXT_PP(1); text *to = PG_GETARG_TEXT_PP(2); text *result; char *from_ptr, *to_ptr; char *source, *target; int m, fromlen, tolen, retlen, i; int worst_len; int len; int source_len; int from_index; m = VARSIZE_ANY_EXHDR(string); if (m <= 0) PG_RETURN_TEXT_P(string); source = VARDATA_ANY(string); fromlen = VARSIZE_ANY_EXHDR(from); from_ptr = VARDATA_ANY(from); tolen = VARSIZE_ANY_EXHDR(to); to_ptr = VARDATA_ANY(to); /* * The worst-case expansion is to substitute a max-length character for a * single-byte character at each position of the string. */ worst_len = pg_database_encoding_max_length() * m; /* check for integer overflow */ if (worst_len / pg_database_encoding_max_length() != m) ereport(ERROR, (errcode(ERRCODE_PROGRAM_LIMIT_EXCEEDED), errmsg("requested length too large"))); result = (text *) palloc(worst_len + VARHDRSZ); target = VARDATA(result); retlen = 0; while (m > 0) { source_len = pg_mblen(source); from_index = 0; for (i = 0; i < fromlen; i += len) { len = pg_mblen(&from_ptr[i]); if (len == source_len && memcmp(source, &from_ptr[i], len) == 0) break; from_index++; } if (i < fromlen) { /* substitute */ char *p = to_ptr; for (i = 0; i < from_index; i++) { p += pg_mblen(p); if (p >= (to_ptr + tolen)) break; } if (p < (to_ptr + tolen)) { len = pg_mblen(p); memcpy(target, p, len); target += len; retlen += len; } } else { /* no match, so copy */ memcpy(target, source, source_len); target += source_len; retlen += source_len; } source += source_len; m -= source_len; } SET_VARSIZE(result, retlen + VARHDRSZ); /* * The function result is probably much bigger than needed, if we're using * a multibyte encoding, but it's not worth reallocating it; the result * probably won't live long anyway. */ PG_RETURN_TEXT_P(result); }
/* * Common implementation for btrim, ltrim, rtrim */ static text * dotrim(const char *string, int stringlen, const char *set, int setlen, bool doltrim, bool dortrim) { int i; /* Nothing to do if either string or set is empty */ if (stringlen > 0 && setlen > 0) { if (pg_database_encoding_max_length() > 1) { /* * In the multibyte-encoding case, build arrays of pointers to * character starts, so that we can avoid inefficient checks in * the inner loops. */ const char **stringchars; const char **setchars; int *stringmblen; int *setmblen; int stringnchars; int setnchars; int resultndx; int resultnchars; const char *p; int len; int mblen; const char *str_pos; int str_len; stringchars = (const char **) palloc(stringlen * sizeof(char *)); stringmblen = (int *) palloc(stringlen * sizeof(int)); stringnchars = 0; p = string; len = stringlen; while (len > 0) { stringchars[stringnchars] = p; stringmblen[stringnchars] = mblen = pg_mblen(p); stringnchars++; p += mblen; len -= mblen; } setchars = (const char **) palloc(setlen * sizeof(char *)); setmblen = (int *) palloc(setlen * sizeof(int)); setnchars = 0; p = set; len = setlen; while (len > 0) { setchars[setnchars] = p; setmblen[setnchars] = mblen = pg_mblen(p); setnchars++; p += mblen; len -= mblen; } resultndx = 0; /* index in stringchars[] */ resultnchars = stringnchars; if (doltrim) { while (resultnchars > 0) { str_pos = stringchars[resultndx]; str_len = stringmblen[resultndx]; for (i = 0; i < setnchars; i++) { if (str_len == setmblen[i] && memcmp(str_pos, setchars[i], str_len) == 0) break; } if (i >= setnchars) break; /* no match here */ string += str_len; stringlen -= str_len; resultndx++; resultnchars--; } } if (dortrim) { while (resultnchars > 0) { str_pos = stringchars[resultndx + resultnchars - 1]; str_len = stringmblen[resultndx + resultnchars - 1]; for (i = 0; i < setnchars; i++) { if (str_len == setmblen[i] && memcmp(str_pos, setchars[i], str_len) == 0) break; } if (i >= setnchars) break; /* no match here */ stringlen -= str_len; resultnchars--; } } pfree(stringchars); pfree(stringmblen); pfree(setchars); pfree(setmblen); } else { /* * In the single-byte-encoding case, we don't need such overhead. */ if (doltrim) { while (stringlen > 0) { char str_ch = *string; for (i = 0; i < setlen; i++) { if (str_ch == set[i]) break; } if (i >= setlen) break; /* no match here */ string++; stringlen--; } } if (dortrim) { while (stringlen > 0) { char str_ch = string[stringlen - 1]; for (i = 0; i < setlen; i++) { if (str_ch == set[i]) break; } if (i >= setlen) break; /* no match here */ stringlen--; } } } } /* Return selected portion of string */ return cstring_to_text_with_len(string, stringlen); }
/* * lowerstr_with_len --- fold string to lower case * * Input string need not be null-terminated. * * Returned string is palloc'd */ char * lowerstr_with_len(const char *str, int len) { char *out; #ifdef USE_WIDE_UPPER_LOWER Oid collation = DEFAULT_COLLATION_OID; /* TODO */ pg_locale_t mylocale = 0; /* TODO */ #endif if (len == 0) return pstrdup(""); #ifdef USE_WIDE_UPPER_LOWER /* * Use wide char code only when max encoding length > 1 and ctype != C. * Some operating systems fail with multi-byte encodings and a C locale. * Also, for a C locale there is no need to process as multibyte. From * backend/utils/adt/oracle_compat.c Teodor */ if (pg_database_encoding_max_length() > 1 && !lc_ctype_is_c(collation)) { wchar_t *wstr, *wptr; int wlen; /* * alloc number of wchar_t for worst case, len contains number of * bytes >= number of characters and alloc 1 wchar_t for 0, because * wchar2char wants zero-terminated string */ wptr = wstr = (wchar_t *) palloc(sizeof(wchar_t) * (len + 1)); wlen = char2wchar(wstr, len + 1, str, len, mylocale); Assert(wlen <= len); while (*wptr) { *wptr = towlower((wint_t) *wptr); wptr++; } /* * Alloc result string for worst case + '\0' */ len = pg_database_encoding_max_length() * wlen + 1; out = (char *) palloc(len); wlen = wchar2char(out, wstr, len, mylocale); pfree(wstr); if (wlen < 0) ereport(ERROR, (errcode(ERRCODE_CHARACTER_NOT_IN_REPERTOIRE), errmsg("conversion from wchar_t to server encoding failed: %m"))); Assert(wlen < len); } else #endif /* USE_WIDE_UPPER_LOWER */ { const char *ptr = str; char *outptr; outptr = out = (char *) palloc(sizeof(char) * (len + 1)); while ((ptr - str) < len && *ptr) { *outptr++ = tolower(TOUCHAR(ptr)); ptr++; } *outptr = '\0'; } return out; }