/* * Make array of trigrams with sorting and removing duplicate items. * * str: source string, of length slen bytes. * * Returns the sorted array of unique trigrams. */ TRGM * generate_trgm(char *str, int slen) { TRGM *trg; int len; protect_out_of_mem(slen); trg = (TRGM *) palloc(TRGMHDRSIZE + sizeof(trgm) * (slen / 2 + 1) *3); trg->flag = ARRKEY; len = generate_trgm_only(GETARR(trg), str, slen); SET_VARSIZE(trg, CALCGTSIZE(ARRKEY, len)); if (len == 0) return trg; /* * Make trigrams unique. */ if (len > 1) { qsort((void *) GETARR(trg), len, sizeof(trgm), comp_trgm); len = unique_array(GETARR(trg), len); } SET_VARSIZE(trg, CALCGTSIZE(ARRKEY, len)); return trg; }
Datum ghstore_compress(PG_FUNCTION_ARGS) { GISTENTRY *entry = (GISTENTRY *) PG_GETARG_POINTER(0); GISTENTRY *retval = entry; if (entry->leafkey) { GISTTYPE *res = (GISTTYPE *) palloc0(CALCGTSIZE(0)); HStore *val = DatumGetHStoreP(entry->key); HEntry *hsent = ARRPTR(val); char *ptr = STRPTR(val); int count = HS_COUNT(val); int i; SET_VARSIZE(res, CALCGTSIZE(0)); for (i = 0; i < count; ++i) { int h; h = crc32_sz((char *) HSTORE_KEY(hsent, ptr, i), HSTORE_KEYLEN(hsent, i)); HASH(GETSIGN(res), h); if (!HSTORE_VALISNULL(hsent, i)) { h = crc32_sz((char *) HSTORE_VAL(hsent, ptr, i), HSTORE_VALLEN(hsent, i)); HASH(GETSIGN(res), h); } } retval = (GISTENTRY *) palloc(sizeof(GISTENTRY)); gistentryinit(*retval, PointerGetDatum(res), entry->rel, entry->page, entry->offset, FALSE); } else if (!ISALLTRUE(DatumGetPointer(entry->key))) { int32 i; GISTTYPE *res; BITVECP sign = GETSIGN(DatumGetPointer(entry->key)); LOOPBYTE { if ((sign[i] & 0xff) != 0xff) PG_RETURN_POINTER(retval); } res = (GISTTYPE *) palloc(CALCGTSIZE(ALLISTRUE)); SET_VARSIZE(res, CALCGTSIZE(ALLISTRUE)); res->flag = ALLISTRUE; retval = (GISTENTRY *) palloc(sizeof(GISTENTRY)); gistentryinit(*retval, PointerGetDatum(res), entry->rel, entry->page, entry->offset, FALSE); }
datum_t gtrgm_compress(PG_FUNC_ARGS) { struct gist_entry *entry = (struct gist_entry *) ARG_POINTER(0); struct gist_entry *retval = entry; if (entry->leafkey) { /* trgm */ TRGM *res; text *val = D_TO_TEXT_P(entry->key); res = generate_trgm(VLA_DATA(val), VLA_SZ(val) - VAR_HDR_SZ); retval = (struct gist_entry *) palloc(sizeof(struct gist_entry)); gistentryinit(*retval, PTR_TO_D(res), entry->rel, entry->page, entry->offset, FALSE); } else if (ISSIGNKEY(D_TO_PTR(entry->key)) && !ISALLTRUE(D_TO_PTR(entry->key))) { int4 i, len; TRGM *res; BITVECP sign = GETSIGN(D_TO_PTR(entry->key)); LOOPBYTE { if ((sign[i] & 0xff) != 0xff) RET_POINTER(retval); } len = CALCGTSIZE(SIGNKEY | ALLISTRUE, 0); res = (TRGM *) palloc(len); VLA_SET_SZ_STND(res, len); res->flag = SIGNKEY | ALLISTRUE; retval = (struct gist_entry *) palloc(sizeof(struct gist_entry)); gistentryinit(*retval, PTR_TO_D(res), entry->rel, entry->page, entry->offset, FALSE); }
Datum gtrgm_compress(PG_FUNCTION_ARGS) { GISTENTRY *entry = (GISTENTRY *) PG_GETARG_POINTER(0); GISTENTRY *retval = entry; if (entry->leafkey) { /* trgm */ TRGM *res; text *val = DatumGetTextP(entry->key); res = generate_trgm(VARDATA(val), VARSIZE(val) - VARHDRSZ); retval = (GISTENTRY *) palloc(sizeof(GISTENTRY)); gistentryinit(*retval, PointerGetDatum(res), entry->rel, entry->page, entry->offset, FALSE); } else if (ISSIGNKEY(DatumGetPointer(entry->key)) && !ISALLTRUE(DatumGetPointer(entry->key))) { int32 i, len; TRGM *res; BITVECP sign = GETSIGN(DatumGetPointer(entry->key)); LOOPBYTE { if ((sign[i] & 0xff) != 0xff) PG_RETURN_POINTER(retval); } len = CALCGTSIZE(SIGNKEY | ALLISTRUE, 0); res = (TRGM *) palloc(len); SET_VARSIZE(res, len); res->flag = SIGNKEY | ALLISTRUE; retval = (GISTENTRY *) palloc(sizeof(GISTENTRY)); gistentryinit(*retval, PointerGetDatum(res), entry->rel, entry->page, entry->offset, FALSE); }
/* * Generates trigrams for wildcard search string. * * Returns array of trigrams that must occur in any string that matches the * wildcard string. For example, given pattern "a%bcd%" the trigrams * " a", "bcd" would be extracted. */ TRGM * generate_wildcard_trgm(const char *str, int slen) { TRGM *trg; char *buf, *buf2; trgm *tptr; int len, charlen, bytelen; const char *eword; trg = (TRGM *) palloc(TRGMHDRSIZE + sizeof(trgm) * (slen / 2 + 1) *3); trg->flag = ARRKEY; SET_VARSIZE(trg, TRGMHDRSIZE); if (slen + LPADDING + RPADDING < 3 || slen == 0) return trg; tptr = GETARR(trg); buf = palloc(sizeof(char) * (slen + 4)); /* * Extract trigrams from each substring extracted by get_wildcard_part. */ eword = str; while ((eword = get_wildcard_part(eword, slen - (eword - str), buf, &bytelen, &charlen)) != NULL) { #ifdef IGNORECASE buf2 = lowerstr_with_len(buf, bytelen); bytelen = strlen(buf2); #else buf2 = buf; #endif /* * count trigrams */ tptr = make_trigrams(tptr, buf2, bytelen, charlen); #ifdef IGNORECASE pfree(buf2); #endif } pfree(buf); if ((len = tptr - GETARR(trg)) == 0) return trg; /* * Make trigrams unique. */ if (len > 0) { qsort((void *) GETARR(trg), len, sizeof(trgm), comp_trgm); len = unique_array(GETARR(trg), len); } SET_VARSIZE(trg, CALCGTSIZE(ARRKEY, len)); return trg; }
TRGM * generate_trgm(char *str, int slen) { TRGM *trg; char *buf; trgm *tptr; int len, charlen, bytelen; char *bword, *eword; trg = (TRGM *) palloc(TRGMHDRSIZE + sizeof(trgm) * (slen / 2 + 1) *3); trg->flag = ARRKEY; SET_VARSIZE(trg, TRGMHDRSIZE); if (slen + LPADDING + RPADDING < 3 || slen == 0) return trg; tptr = GETARR(trg); buf = palloc(sizeof(char) * (slen + 4)); if (LPADDING > 0) { *buf = ' '; if (LPADDING > 1) *(buf + 1) = ' '; } eword = str; while ((bword = find_word(eword, slen - (eword - str), &eword, &charlen)) != NULL) { #ifdef IGNORECASE bword = lowerstr_with_len(bword, eword - bword); bytelen = strlen(bword); #else bytelen = eword - bword; #endif memcpy(buf + LPADDING, bword, bytelen); #ifdef IGNORECASE pfree(bword); #endif buf[LPADDING + bytelen] = ' '; buf[LPADDING + bytelen + 1] = ' '; /* * count trigrams */ tptr = make_trigrams(tptr, buf, bytelen + LPADDING + RPADDING, charlen + LPADDING + RPADDING); } pfree(buf); if ((len = tptr - GETARR(trg)) == 0) return trg; if (len > 0) { qsort((void *) GETARR(trg), len, sizeof(trgm), comp_trgm); len = unique_array(GETARR(trg), len); } SET_VARSIZE(trg, CALCGTSIZE(ARRKEY, len)); return trg; }
/* * Generates trigrams for wildcard search string. * * Returns array of trigrams that must occur in any string that matches the * wildcard string. For example, given pattern "a%bcd%" the trigrams * " a", "bcd" would be extracted. */ TRGM * generate_wildcard_trgm(const char *str, int slen) { TRGM *trg; char *buf, *buf2; trgm *tptr; int len, charlen, bytelen; const char *eword; /* * Guard against possible overflow in the palloc requests below. (We * don't worry about the additive constants, since palloc can detect * requests that are a little above MaxAllocSize --- we just need to * prevent integer overflow in the multiplications.) */ if ((Size) (slen / 2) >= (MaxAllocSize / (sizeof(trgm) * 3)) || (Size) slen >= (MaxAllocSize / pg_database_encoding_max_length())) ereport(ERROR, (errcode(ERRCODE_PROGRAM_LIMIT_EXCEEDED), errmsg("out of memory"))); trg = (TRGM *) palloc(TRGMHDRSIZE + sizeof(trgm) * (slen / 2 + 1) *3); trg->flag = ARRKEY; SET_VARSIZE(trg, TRGMHDRSIZE); if (slen + LPADDING + RPADDING < 3 || slen == 0) return trg; tptr = GETARR(trg); /* Allocate a buffer for blank-padded, but not yet case-folded, words */ buf = palloc(sizeof(char) * (slen + 4)); /* * Extract trigrams from each substring extracted by get_wildcard_part. */ eword = str; while ((eword = get_wildcard_part(eword, slen - (eword - str), buf, &bytelen, &charlen)) != NULL) { #ifdef IGNORECASE buf2 = lowerstr_with_len(buf, bytelen); bytelen = strlen(buf2); #else buf2 = buf; #endif /* * count trigrams */ tptr = make_trigrams(tptr, buf2, bytelen, charlen); #ifdef IGNORECASE pfree(buf2); #endif } pfree(buf); if ((len = tptr - GETARR(trg)) == 0) return trg; /* * Make trigrams unique. */ if (len > 1) { qsort((void *) GETARR(trg), len, sizeof(trgm), comp_trgm); len = unique_array(GETARR(trg), len); } SET_VARSIZE(trg, CALCGTSIZE(ARRKEY, len)); return trg; }
TRGM * generate_trgm(char *str, int slen) { TRGM *trg; char *buf; trgm *tptr; int len, charlen, bytelen; char *bword, *eword; /* * Guard against possible overflow in the palloc requests below. (We * don't worry about the additive constants, since palloc can detect * requests that are a little above MaxAllocSize --- we just need to * prevent integer overflow in the multiplications.) */ if ((Size) (slen / 2) >= (MaxAllocSize / (sizeof(trgm) * 3)) || (Size) slen >= (MaxAllocSize / pg_database_encoding_max_length())) ereport(ERROR, (errcode(ERRCODE_PROGRAM_LIMIT_EXCEEDED), errmsg("out of memory"))); trg = (TRGM *) palloc(TRGMHDRSIZE + sizeof(trgm) * (slen / 2 + 1) *3); trg->flag = ARRKEY; SET_VARSIZE(trg, TRGMHDRSIZE); if (slen + LPADDING + RPADDING < 3 || slen == 0) return trg; tptr = GETARR(trg); /* Allocate a buffer for case-folded, blank-padded words */ buf = (char *) palloc(slen * pg_database_encoding_max_length() + 4); if (LPADDING > 0) { *buf = ' '; if (LPADDING > 1) *(buf + 1) = ' '; } eword = str; while ((bword = find_word(eword, slen - (eword - str), &eword, &charlen)) != NULL) { #ifdef IGNORECASE bword = lowerstr_with_len(bword, eword - bword); bytelen = strlen(bword); #else bytelen = eword - bword; #endif memcpy(buf + LPADDING, bword, bytelen); #ifdef IGNORECASE pfree(bword); #endif buf[LPADDING + bytelen] = ' '; buf[LPADDING + bytelen + 1] = ' '; /* * count trigrams */ tptr = make_trigrams(tptr, buf, bytelen + LPADDING + RPADDING, charlen + LPADDING + RPADDING); } pfree(buf); if ((len = tptr - GETARR(trg)) == 0) return trg; /* * Make trigrams unique. */ if (len > 1) { qsort((void *) GETARR(trg), len, sizeof(trgm), comp_trgm); len = unique_array(GETARR(trg), len); } SET_VARSIZE(trg, CALCGTSIZE(ARRKEY, len)); return trg; }
Datum gtsvector_compress(PG_FUNCTION_ARGS) { GISTENTRY *entry = (GISTENTRY *) PG_GETARG_POINTER(0); GISTENTRY *retval = entry; if (entry->leafkey) { /* tsvector */ SignTSVector *res; TSVector val = DatumGetTSVector(entry->key); int32 len; int32 *arr; WordEntry *ptr = ARRPTR(val); char *words = STRPTR(val); len = CALCGTSIZE(ARRKEY, val->size); res = (SignTSVector *) palloc(len); SET_VARSIZE(res, len); res->flag = ARRKEY; arr = GETARR(res); len = val->size; while (len--) { pg_crc32 c; INIT_LEGACY_CRC32(c); COMP_LEGACY_CRC32(c, words + ptr->pos, ptr->len); FIN_LEGACY_CRC32(c); *arr = *(int32 *) &c; arr++; ptr++; } len = uniqueint(GETARR(res), val->size); if (len != val->size) { /* * there is a collision of hash-function; len is always less than * val->size */ len = CALCGTSIZE(ARRKEY, len); res = (SignTSVector *) repalloc((void *) res, len); SET_VARSIZE(res, len); } /* make signature, if array is too long */ if (VARSIZE(res) > TOAST_INDEX_TARGET) { SignTSVector *ressign; len = CALCGTSIZE(SIGNKEY, 0); ressign = (SignTSVector *) palloc(len); SET_VARSIZE(ressign, len); ressign->flag = SIGNKEY; makesign(GETSIGN(ressign), res); res = ressign; } retval = (GISTENTRY *) palloc(sizeof(GISTENTRY)); gistentryinit(*retval, PointerGetDatum(res), entry->rel, entry->page, entry->offset, FALSE); } else if (ISSIGNKEY(DatumGetPointer(entry->key)) && !ISALLTRUE(DatumGetPointer(entry->key))) { int32 i, len; SignTSVector *res; BITVECP sign = GETSIGN(DatumGetPointer(entry->key)); LOOPBYTE { if ((sign[i] & 0xff) != 0xff) PG_RETURN_POINTER(retval); } len = CALCGTSIZE(SIGNKEY | ALLISTRUE, 0); res = (SignTSVector *) palloc(len); SET_VARSIZE(res, len); res->flag = SIGNKEY | ALLISTRUE; retval = (GISTENTRY *) palloc(sizeof(GISTENTRY)); gistentryinit(*retval, PointerGetDatum(res), entry->rel, entry->page, entry->offset, FALSE); }
Datum g_intbig_compress(PG_FUNCTION_ARGS) { GISTENTRY *entry = (GISTENTRY *) PG_GETARG_POINTER(0); if (entry->leafkey) { GISTENTRY *retval; ArrayType *in = DatumGetArrayTypeP(entry->key); int32 *ptr; int num; GISTTYPE *res = (GISTTYPE *) palloc0(CALCGTSIZE(0)); CHECKARRVALID(in); if (ARRISEMPTY(in)) { ptr = NULL; num = 0; } else { ptr = ARRPTR(in); num = ARRNELEMS(in); } SET_VARSIZE(res, CALCGTSIZE(0)); while (num--) { HASH(GETSIGN(res), *ptr); ptr++; } retval = (GISTENTRY *) palloc(sizeof(GISTENTRY)); gistentryinit(*retval, PointerGetDatum(res), entry->rel, entry->page, entry->offset, FALSE); if (in != DatumGetArrayTypeP(entry->key)) pfree(in); PG_RETURN_POINTER(retval); } else if (!ISALLTRUE(DatumGetPointer(entry->key))) { GISTENTRY *retval; int i; BITVECP sign = GETSIGN(DatumGetPointer(entry->key)); GISTTYPE *res; LOOPBYTE { if ((sign[i] & 0xff) != 0xff) PG_RETURN_POINTER(entry); } res = (GISTTYPE *) palloc(CALCGTSIZE(ALLISTRUE)); SET_VARSIZE(res, CALCGTSIZE(ALLISTRUE)); res->flag = ALLISTRUE; retval = (GISTENTRY *) palloc(sizeof(GISTENTRY)); gistentryinit(*retval, PointerGetDatum(res), entry->rel, entry->page, entry->offset, FALSE); PG_RETURN_POINTER(retval); }
Datum gtsvector_compress(PG_FUNCTION_ARGS) { GISTENTRY *entry = (GISTENTRY *) PG_GETARG_POINTER(0); GISTENTRY *retval = entry; if (entry->leafkey) { /* tsvector */ GISTTYPE *res; tsvector *val = (tsvector *) PG_DETOAST_DATUM(entry->key); int4 len; int4 *arr; WordEntry *ptr = ARRPTR(val); char *words = STRPTR(val); len = CALCGTSIZE(ARRKEY, val->size); res = (GISTTYPE *) palloc(len); SET_VARSIZE(res, len); res->flag = ARRKEY; arr = GETARR(res); len = val->size; while (len--) { *arr = crc32_sz(&words[ptr->pos], ptr->len); arr++; ptr++; } len = uniqueint(GETARR(res), val->size); if (len != val->size) { /* * there is a collision of hash-function; len is always less than * val->size */ len = CALCGTSIZE(ARRKEY, len); res = (GISTTYPE *) repalloc((void *) res, len); SET_VARSIZE(res, len); } /* make signature, if array is too long */ if (VARSIZE(res) > TOAST_INDEX_TARGET) { GISTTYPE *ressign; len = CALCGTSIZE(SIGNKEY, 0); ressign = (GISTTYPE *) palloc(len); SET_VARSIZE(ressign, len); ressign->flag = SIGNKEY; makesign(GETSIGN(ressign), res); res = ressign; } retval = (GISTENTRY *) palloc(sizeof(GISTENTRY)); gistentryinit(*retval, PointerGetDatum(res), entry->rel, entry->page, entry->offset, FALSE); } else if (ISSIGNKEY(DatumGetPointer(entry->key)) && !ISALLTRUE(DatumGetPointer(entry->key))) { int4 i, len; GISTTYPE *res; BITVECP sign = GETSIGN(DatumGetPointer(entry->key)); LOOPBYTE( if ((sign[i] & 0xff) != 0xff) PG_RETURN_POINTER(retval); );
entry->offset, FALSE); } else if (ISSIGNKEY(DatumGetPointer(entry->key)) && !ISALLTRUE(DatumGetPointer(entry->key))) { int4 i, len; GISTTYPE *res; BITVECP sign = GETSIGN(DatumGetPointer(entry->key)); LOOPBYTE( if ((sign[i] & 0xff) != 0xff) PG_RETURN_POINTER(retval); ); len = CALCGTSIZE(SIGNKEY | ALLISTRUE, 0); res = (GISTTYPE *) palloc(len); SET_VARSIZE(res, len); res->flag = SIGNKEY | ALLISTRUE; retval = (GISTENTRY *) palloc(sizeof(GISTENTRY)); gistentryinit(*retval, PointerGetDatum(res), entry->rel, entry->page, entry->offset, FALSE); } PG_RETURN_POINTER(retval); } Datum gtsvector_decompress(PG_FUNCTION_ARGS) {
Datum ghstore_compress(PG_FUNCTION_ARGS) { GISTENTRY *entry = (GISTENTRY *) PG_GETARG_POINTER(0); GISTENTRY *retval = entry; if (entry->leafkey) { GISTTYPE *res = (GISTTYPE *) palloc0(CALCGTSIZE(0)); HStore *toastedval = (HStore *) DatumGetPointer(entry->key); HStore *val = (HStore *) DatumGetPointer(PG_DETOAST_DATUM(entry->key)); HEntry *ptr = ARRPTR(val); char *words = STRPTR(val); SET_VARSIZE(res, CALCGTSIZE(0)); while (ptr - ARRPTR(val) < val->size) { int h; h = crc32_sz((char *) (words + ptr->pos), ptr->keylen); HASH(GETSIGN(res), h); if (!ptr->valisnull) { h = crc32_sz((char *) (words + ptr->pos + ptr->keylen), ptr->vallen); HASH(GETSIGN(res), h); } ptr++; } if (val != toastedval) pfree(val); retval = (GISTENTRY *) palloc(sizeof(GISTENTRY)); gistentryinit(*retval, PointerGetDatum(res), entry->rel, entry->page, entry->offset, FALSE); } else if (!ISALLTRUE(DatumGetPointer(entry->key))) { int4 i; GISTTYPE *res; BITVECP sign = GETSIGN(DatumGetPointer(entry->key)); LOOPBYTE { if ((sign[i] & 0xff) != 0xff) PG_RETURN_POINTER(retval); } res = (GISTTYPE *) palloc(CALCGTSIZE(ALLISTRUE)); SET_VARSIZE(res, CALCGTSIZE(ALLISTRUE)); res->flag = ALLISTRUE; retval = (GISTENTRY *) palloc(sizeof(GISTENTRY)); gistentryinit(*retval, PointerGetDatum(res), entry->rel, entry->page, entry->offset, FALSE); }