/* * @@ selectivity for tsvector var vs tsquery constant */ static Selectivity tsquerysel(VariableStatData *vardata, Datum constval) { Selectivity selec; TSQuery query; /* The caller made sure the const is a TSQuery, so get it now */ query = DatumGetTSQuery(constval); /* Empty query matches nothing */ if (query->size == 0) return (Selectivity) 0.0; if (HeapTupleIsValid(vardata->statsTuple)) { Form_pg_statistic stats; Datum *values; int nvalues; float4 *numbers; int nnumbers; stats = (Form_pg_statistic) GETSTRUCT(vardata->statsTuple); /* MCELEM will be an array of TEXT elements for a tsvector column */ if (get_attstatsslot(vardata->statsTuple, TEXTOID, -1, STATISTIC_KIND_MCELEM, InvalidOid, NULL, &values, &nvalues, &numbers, &nnumbers)) { /* * There is a most-common-elements slot for the tsvector Var, so * use that. */ selec = mcelem_tsquery_selec(query, values, nvalues, numbers, nnumbers); free_attstatsslot(TEXTOID, values, nvalues, numbers, nnumbers); } else { /* No most-common-elements info, so do without */ selec = tsquery_opr_selec_no_stats(query); } /* * MCE stats count only non-null rows, so adjust for null rows. */ selec *= (1.0 - stats->stanullfrac); } else { /* No stats at all, so do without */ selec = tsquery_opr_selec_no_stats(query); /* we assume no nulls here, so no stanullfrac correction */ } return selec; }
/* * Extract data from the pg_statistic arrays into useful format. */ static Selectivity mcelem_tsquery_selec(TSQuery query, Datum *mcelem, int nmcelem, float4 *numbers, int nnumbers) { float4 minfreq; TextFreq *lookup; Selectivity selec; int i; /* * There should be two more Numbers than Values, because the last two * cells are taken for minimal and maximal frequency. Punt if not. * * (Note: the MCELEM statistics slot definition allows for a third extra * number containing the frequency of nulls, but we're not expecting that * to appear for a tsvector column.) */ if (nnumbers != nmcelem + 2) return tsquery_opr_selec_no_stats(query); /* * Transpose the data into a single array so we can use bsearch(). */ lookup = (TextFreq *) palloc(sizeof(TextFreq) * nmcelem); for (i = 0; i < nmcelem; i++) { /* * The text Datums came from an array, so it cannot be compressed or * stored out-of-line -- it's safe to use VARSIZE_ANY*. */ Assert(!VARATT_IS_COMPRESSED(mcelem[i]) && !VARATT_IS_EXTERNAL(mcelem[i])); lookup[i].element = (text *) DatumGetPointer(mcelem[i]); lookup[i].frequency = numbers[i]; } /* * Grab the lowest frequency. compute_tsvector_stats() stored it for us in * the one before the last cell of the Numbers array. See ts_typanalyze.c */ minfreq = numbers[nnumbers - 2]; selec = tsquery_opr_selec(GETQUERY(query), GETOPERAND(query), lookup, nmcelem, minfreq); pfree(lookup); return selec; }