static void cnt_trigram(trgm *tptr, char *str, int bytelen) { if (bytelen == 3) { CPTRGM(tptr, str); } else { pg_crc32 crc; INIT_CRC32(crc); COMP_CRC32(crc, str, bytelen); FIN_CRC32(crc); /* * use only 3 upper bytes from crc, hope, it's good enough hashing */ CPTRGM(tptr, &crc); } }
Datum show_trgm(PG_FUNCTION_ARGS) { text *in = PG_GETARG_TEXT_P(0); TRGM *trg; Datum *d; ArrayType *a; trgm *ptr; int i; trg = generate_trgm(VARDATA(in), VARSIZE(in) - VARHDRSZ); d = (Datum *) palloc(sizeof(Datum) * (1 + ARRNELEM(trg))); for (i = 0, ptr = GETARR(trg); i < ARRNELEM(trg); i++, ptr++) { text *item = (text *) palloc(VARHDRSZ + Max(12, pg_database_encoding_max_length() * 3)); if (pg_database_encoding_max_length() > 1 && !ISPRINTABLETRGM(ptr)) { snprintf(VARDATA(item), 12, "0x%06x", trgm2int(ptr)); SET_VARSIZE(item, VARHDRSZ + strlen(VARDATA(item))); } else { SET_VARSIZE(item, VARHDRSZ + 3); CPTRGM(VARDATA(item), ptr); } d[i] = PointerGetDatum(item); } a = construct_array( d, ARRNELEM(trg), TEXTOID, -1, false, 'i' ); for (i = 0; i < ARRNELEM(trg); i++) pfree(DatumGetPointer(d[i])); pfree(d); pfree(trg); PG_FREE_IF_COPY(in, 0); PG_RETURN_POINTER(a); }
static void makesign(BITVECP sign, TRGM *a) { int32 k, len = ARRNELEM(a); trgm *ptr = GETARR(a); int32 tmp = 0; MemSet((void *) sign, 0, sizeof(BITVEC)); SETBIT(sign, SIGLENBIT); /* set last unused bit */ for (k = 0; k < len; k++) { CPTRGM(((char *) &tmp), ptr + k); HASH(sign, tmp); } }
static int unique_array(trgm *a, int len) { trgm *curend, *tmp; curend = tmp = a; while (tmp - a < len) if (CMPTRGM(tmp, curend)) { curend++; CPTRGM(curend, tmp); tmp++; } else tmp++; return curend + 1 - a; }
/* * Adds trigrams from words (already padded). */ static trgm * make_trigrams(trgm *tptr, char *str, int bytelen, int charlen) { char *ptr = str; if (charlen < 3) return tptr; #ifdef USE_WIDE_UPPER_LOWER if (pg_database_encoding_max_length() > 1) { int lenfirst = pg_mblen(str), lenmiddle = pg_mblen(str + lenfirst), lenlast = pg_mblen(str + lenfirst + lenmiddle); while ((ptr - str) + lenfirst + lenmiddle + lenlast <= bytelen) { cnt_trigram(tptr, ptr, lenfirst + lenmiddle + lenlast); ptr += lenfirst; tptr++; lenfirst = lenmiddle; lenmiddle = lenlast; lenlast = pg_mblen(ptr + lenfirst + lenmiddle); } } else #endif { Assert(bytelen == charlen); while (ptr - str < bytelen - 2 /* number of trigrams = strlen - 2 */ ) { CPTRGM(tptr, ptr); ptr++; tptr++; } } return tptr; }
/* * Adds trigrams from words (already padded). */ static trgm * make_trigrams(trgm *tptr, char *str, int bytelen, int charlen) { char *ptr = str; if (charlen < 3) return tptr; if (bytelen > charlen) { /* Find multibyte character boundaries and apply compact_trigram */ int lenfirst = pg_mblen(str), lenmiddle = pg_mblen(str + lenfirst), lenlast = pg_mblen(str + lenfirst + lenmiddle); while ((ptr - str) + lenfirst + lenmiddle + lenlast <= bytelen) { compact_trigram(tptr, ptr, lenfirst + lenmiddle + lenlast); ptr += lenfirst; tptr++; lenfirst = lenmiddle; lenmiddle = lenlast; lenlast = pg_mblen(ptr + lenfirst + lenmiddle); } } else { /* Fast path when there are no multibyte characters */ Assert(bytelen == charlen); while (ptr - str < bytelen - 2 /* number of trigrams = strlen - 2 */ ) { CPTRGM(tptr, ptr); ptr++; tptr++; } } return tptr; }