Datum * extractEntriesS(GinState *ginstate, OffsetNumber attnum, Datum value, int32 *nentries, bool *needUnique) { Datum *entries; entries = (Datum *) DatumGetPointer(FunctionCall2( &ginstate->extractValueFn[attnum - 1], value, PointerGetDatum(nentries) )); if (entries == NULL) *nentries = 0; *needUnique = FALSE; if (*nentries > 1) { cmpEntriesData arg; arg.cmpDatumFunc = &ginstate->compareFn[attnum - 1]; arg.needUnique = needUnique; qsort_arg(entries, *nentries, sizeof(Datum), (qsort_arg_comparator) cmpEntries, (void *) &arg); } return entries; }
/* Sort the given data (len >= 2). Return true if any duplicates found */ bool isort(int32 *a, int len) { bool r = false; qsort_arg(a, len, sizeof(int32), isort_cmp, (void *) &r); return r; }
GIST_SPLITVEC * gbt_num_picksplit(const GistEntryVector *entryvec, GIST_SPLITVEC *v, const gbtree_ninfo *tinfo, FmgrInfo *flinfo) { OffsetNumber i, maxoff = entryvec->n - 1; Nsrt *arr; int nbytes; arr = (Nsrt *) palloc((maxoff + 1) * sizeof(Nsrt)); nbytes = (maxoff + 2) * sizeof(OffsetNumber); v->spl_left = (OffsetNumber *) palloc(nbytes); v->spl_right = (OffsetNumber *) palloc(nbytes); v->spl_ldatum = PointerGetDatum(0); v->spl_rdatum = PointerGetDatum(0); v->spl_nleft = 0; v->spl_nright = 0; /* Sort entries */ for (i = FirstOffsetNumber; i <= maxoff; i = OffsetNumberNext(i)) { arr[i].t = (GBT_NUMKEY *) DatumGetPointer((entryvec->vector[i].key)); arr[i].i = i; } qsort_arg((void *) &arr[FirstOffsetNumber], maxoff - FirstOffsetNumber + 1, sizeof(Nsrt), (qsort_arg_comparator) tinfo->f_cmp, (void *) flinfo); /* We do simply create two parts */ for (i = FirstOffsetNumber; i <= maxoff; i = OffsetNumberNext(i)) { if (i <= (maxoff - FirstOffsetNumber + 1) / 2) { gbt_num_bin_union(&v->spl_ldatum, arr[i].t, tinfo, flinfo); v->spl_left[v->spl_nleft] = arr[i].i; v->spl_nleft++; } else { gbt_num_bin_union(&v->spl_rdatum, arr[i].t, tinfo, flinfo); v->spl_right[v->spl_nright] = arr[i].i; v->spl_nright++; } } return v; }
/* * Array selectivity estimation based on most common elements statistics * * This function just deconstructs and sorts the array constant's contents, * and then passes the problem on to mcelem_array_contain_overlap_selec or * mcelem_array_contained_selec depending on the operator. */ static Selectivity mcelem_array_selec(ArrayType *array, TypeCacheEntry *typentry, Datum *mcelem, int nmcelem, float4 *numbers, int nnumbers, float4 *hist, int nhist, Oid operator, FmgrInfo *cmpfunc) { Selectivity selec; int num_elems; Datum *elem_values; bool *elem_nulls; bool null_present; int nonnull_nitems; int i; /* * Prepare constant array data for sorting. Sorting lets us find unique * elements and efficiently merge with the MCELEM array. */ deconstruct_array(array, typentry->type_id, typentry->typlen, typentry->typbyval, typentry->typalign, &elem_values, &elem_nulls, &num_elems); /* Collapse out any null elements */ nonnull_nitems = 0; null_present = false; for (i = 0; i < num_elems; i++) { if (elem_nulls[i]) null_present = true; else elem_values[nonnull_nitems++] = elem_values[i]; } /* * Query "column @> '{anything, null}'" matches nothing. For the other * two operators, presence of a null in the constant can be ignored. */ if (null_present && operator == OID_ARRAY_CONTAINS_OP) { pfree(elem_values); pfree(elem_nulls); return (Selectivity) 0.0; } /* Sort extracted elements using their default comparison function. */ qsort_arg(elem_values, nonnull_nitems, sizeof(Datum), element_compare, cmpfunc); /* Separate cases according to operator */ if (operator == OID_ARRAY_CONTAINS_OP || operator == OID_ARRAY_OVERLAP_OP) selec = mcelem_array_contain_overlap_selec(mcelem, nmcelem, numbers, nnumbers, elem_values, nonnull_nitems, operator, cmpfunc); else if (operator == OID_ARRAY_CONTAINED_OP) selec = mcelem_array_contained_selec(mcelem, nmcelem, numbers, nnumbers, elem_values, nonnull_nitems, hist, nhist, operator, cmpfunc); else { elog(ERROR, "arraycontsel called for unrecognized operator %u", operator); selec = 0.0; /* keep compiler quiet */ } pfree(elem_values); pfree(elem_nulls); return selec; }
/* * ndistinct_for_combination * Estimates number of distinct values in a combination of columns. * * This uses the same ndistinct estimator as compute_scalar_stats() in * ANALYZE, i.e., * n*d / (n - f1 + f1*n/N) * * except that instead of values in a single column we are dealing with * combination of multiple columns. */ static double ndistinct_for_combination(double totalrows, int numrows, HeapTuple *rows, VacAttrStats **stats, int k, int *combination) { int i, j; int f1, cnt, d; bool *isnull; Datum *values; SortItem *items; MultiSortSupport mss; mss = multi_sort_init(k); /* * In order to determine the number of distinct elements, create separate * values[]/isnull[] arrays with all the data we have, then sort them * using the specified column combination as dimensions. We could try to * sort in place, but it'd probably be more complex and bug-prone. */ items = (SortItem *) palloc(numrows * sizeof(SortItem)); values = (Datum *) palloc0(sizeof(Datum) * numrows * k); isnull = (bool *) palloc0(sizeof(bool) * numrows * k); for (i = 0; i < numrows; i++) { items[i].values = &values[i * k]; items[i].isnull = &isnull[i * k]; } /* * For each dimension, set up sort-support and fill in the values from the * sample data. */ for (i = 0; i < k; i++) { VacAttrStats *colstat = stats[combination[i]]; TypeCacheEntry *type; type = lookup_type_cache(colstat->attrtypid, TYPECACHE_LT_OPR); if (type->lt_opr == InvalidOid) /* shouldn't happen */ elog(ERROR, "cache lookup failed for ordering operator for type %u", colstat->attrtypid); /* prepare the sort function for this dimension */ multi_sort_add_dimension(mss, i, type->lt_opr); /* accumulate all the data for this dimension into the arrays */ for (j = 0; j < numrows; j++) { items[j].values[i] = heap_getattr(rows[j], colstat->attr->attnum, colstat->tupDesc, &items[j].isnull[i]); } } /* We can sort the array now ... */ qsort_arg((void *) items, numrows, sizeof(SortItem), multi_sort_compare, mss); /* ... and count the number of distinct combinations */ f1 = 0; cnt = 1; d = 1; for (i = 1; i < numrows; i++) { if (multi_sort_compare(&items[i], &items[i - 1], mss) != 0) { if (cnt == 1) f1 += 1; d++; cnt = 0; } cnt += 1; } if (cnt == 1) f1 += 1; return estimate_ndistinct(totalrows, numrows, d, f1); }
void qsort_arg(void *a, size_t n, size_t es, qsort_arg_comparator cmp, void *arg) { char *pa, *pb, *pc, *pd, *pl, *pm, *pn; int d, r, swaptype, presorted; loop:SWAPINIT(a, es); if (n < 7) { for (pm = (char *) a + es; pm < (char *) a + n * es; pm += es) for (pl = pm; pl > (char *) a && cmp(pl - es, pl, arg) > 0; pl -= es) swap(pl, pl - es); return; } presorted = 1; for (pm = (char *) a + es; pm < (char *) a + n * es; pm += es) { if (cmp(pm - es, pm, arg) > 0) { presorted = 0; break; } } if (presorted) return; pm = (char *) a + (n / 2) * es; if (n > 7) { pl = (char *) a; pn = (char *) a + (n - 1) * es; if (n > 40) { d = (n / 8) * es; pl = med3(pl, pl + d, pl + 2 * d, cmp, arg); pm = med3(pm - d, pm, pm + d, cmp, arg); pn = med3(pn - 2 * d, pn - d, pn, cmp, arg); } pm = med3(pl, pm, pn, cmp, arg); } swap(static_cast<char *>(a), pm); pa = pb = (char *) a + es; pc = pd = (char *) a + (n - 1) * es; for (;;) { while (pb <= pc && (r = cmp(pb, a, arg)) <= 0) { if (r == 0) { swap(pa, pb); pa += es; } pb += es; } while (pb <= pc && (r = cmp(pc, a, arg)) >= 0) { if (r == 0) { swap(pc, pd); pd -= es; } pc -= es; } if (pb > pc) break; swap(pb, pc); pb += es; pc -= es; } pn = (char *) a + n * es; r = Min(pa - (char *) a, pb - pa); vecswap(static_cast<char *>(a), pb - r, r); r = Min(pd - pc, pn - pd - es); vecswap(pb, pn - r, r); if ((r = pb - pa) > es) qsort_arg(a, r / es, es, cmp, arg); if ((r = pd - pc) > es) { /* Iterate rather than recurse to save stack space */ a = pn - r; n = r / es; goto loop; } /* qsort_arg(pn - r, r / es, es, cmp, arg);*/ }
GIST_SPLITVEC * gbt_var_picksplit(const GistEntryVector *entryvec, GIST_SPLITVEC *v, Oid collation, const gbtree_vinfo *tinfo) { OffsetNumber i, maxoff = entryvec->n - 1; Vsrt *arr; int svcntr = 0, nbytes; char *cur; GBT_VARKEY **sv = NULL; gbt_vsrt_arg varg; arr = (Vsrt *) palloc((maxoff + 1) * sizeof(Vsrt)); nbytes = (maxoff + 2) * sizeof(OffsetNumber); v->spl_left = (OffsetNumber *) palloc(nbytes); v->spl_right = (OffsetNumber *) palloc(nbytes); v->spl_ldatum = PointerGetDatum(0); v->spl_rdatum = PointerGetDatum(0); v->spl_nleft = 0; v->spl_nright = 0; sv = palloc(sizeof(bytea *) * (maxoff + 1)); /* Sort entries */ for (i = FirstOffsetNumber; i <= maxoff; i = OffsetNumberNext(i)) { GBT_VARKEY_R ro; cur = (char *) DatumGetPointer(entryvec->vector[i].key); ro = gbt_var_key_readable((GBT_VARKEY *) cur); if (ro.lower == ro.upper) /* leaf */ { sv[svcntr] = gbt_var_leaf2node((GBT_VARKEY *) cur, tinfo); arr[i].t = sv[svcntr]; if (sv[svcntr] != (GBT_VARKEY *) cur) svcntr++; } else arr[i].t = (GBT_VARKEY *) cur; arr[i].i = i; } /* sort */ varg.tinfo = tinfo; varg.collation = collation; qsort_arg((void *) &arr[FirstOffsetNumber], maxoff - FirstOffsetNumber + 1, sizeof(Vsrt), gbt_vsrt_cmp, (void *) &varg); /* We do simply create two parts */ for (i = FirstOffsetNumber; i <= maxoff; i = OffsetNumberNext(i)) { if (i <= (maxoff - FirstOffsetNumber + 1) / 2) { gbt_var_bin_union(&v->spl_ldatum, arr[i].t, collation, tinfo); v->spl_left[v->spl_nleft] = arr[i].i; v->spl_nleft++; } else { gbt_var_bin_union(&v->spl_rdatum, arr[i].t, collation, tinfo); v->spl_right[v->spl_nright] = arr[i].i; v->spl_nright++; } } /* Truncate (=compress) key */ if (tinfo->trnc) { int32 ll = gbt_var_node_cp_len((GBT_VARKEY *) DatumGetPointer(v->spl_ldatum), tinfo); int32 lr = gbt_var_node_cp_len((GBT_VARKEY *) DatumGetPointer(v->spl_rdatum), tinfo); GBT_VARKEY *dl; GBT_VARKEY *dr; ll = Max(ll, lr); ll++; dl = gbt_var_node_truncate((GBT_VARKEY *) DatumGetPointer(v->spl_ldatum), ll, tinfo); dr = gbt_var_node_truncate((GBT_VARKEY *) DatumGetPointer(v->spl_rdatum), ll, tinfo); v->spl_ldatum = PointerGetDatum(dl); v->spl_rdatum = PointerGetDatum(dr); } return v; }
/* * validates functional dependency on the data * * An actual work horse of detecting functional dependencies. Given a variation * of k attributes, it checks that the first (k-1) are sufficient to determine * the last one. */ static double dependency_degree(int numrows, HeapTuple *rows, int k, AttrNumber *dependency, VacAttrStats **stats, Bitmapset *attrs) { int i, j; int nvalues = numrows * k; MultiSortSupport mss; SortItem *items; Datum *values; bool *isnull; int *attnums; /* counters valid within a group */ int group_size = 0; int n_violations = 0; /* total number of rows supporting (consistent with) the dependency */ int n_supporting_rows = 0; /* Make sure we have at least two input attributes. */ Assert(k >= 2); /* sort info for all attributes columns */ mss = multi_sort_init(k); /* data for the sort */ items = (SortItem *) palloc(numrows * sizeof(SortItem)); values = (Datum *) palloc(sizeof(Datum) * nvalues); isnull = (bool *) palloc(sizeof(bool) * nvalues); /* fix the pointers to values/isnull */ for (i = 0; i < numrows; i++) { items[i].values = &values[i * k]; items[i].isnull = &isnull[i * k]; } /* * Transform the bms into an array, to make accessing i-th member easier. */ attnums = (int *) palloc(sizeof(int) * bms_num_members(attrs)); i = 0; j = -1; while ((j = bms_next_member(attrs, j)) >= 0) attnums[i++] = j; /* * Verify the dependency (a,b,...)->z, using a rather simple algorithm: * * (a) sort the data lexicographically * * (b) split the data into groups by first (k-1) columns * * (c) for each group count different values in the last column */ /* prepare the sort function for the first dimension, and SortItem array */ for (i = 0; i < k; i++) { VacAttrStats *colstat = stats[dependency[i]]; TypeCacheEntry *type; type = lookup_type_cache(colstat->attrtypid, TYPECACHE_LT_OPR); if (type->lt_opr == InvalidOid) /* shouldn't happen */ elog(ERROR, "cache lookup failed for ordering operator for type %u", colstat->attrtypid); /* prepare the sort function for this dimension */ multi_sort_add_dimension(mss, i, type->lt_opr); /* accumulate all the data for both columns into an array and sort it */ for (j = 0; j < numrows; j++) { items[j].values[i] = heap_getattr(rows[j], attnums[dependency[i]], stats[i]->tupDesc, &items[j].isnull[i]); } } /* sort the items so that we can detect the groups */ qsort_arg((void *) items, numrows, sizeof(SortItem), multi_sort_compare, mss); /* * Walk through the sorted array, split it into rows according to the * first (k-1) columns. If there's a single value in the last column, we * count the group as 'supporting' the functional dependency. Otherwise we * count it as contradicting. */ /* start with the first row forming a group */ group_size = 1; /* loop 1 beyond the end of the array so that we count the final group */ for (i = 1; i <= numrows; i++) { /* * Check if the group ended, which may be either because we processed * all the items (i==numrows), or because the i-th item is not equal * to the preceding one. */ if (i == numrows || multi_sort_compare_dims(0, k - 2, &items[i - 1], &items[i], mss) != 0) { /* * If no violations were found in the group then track the rows of * the group as supporting the functional dependency. */ if (n_violations == 0) n_supporting_rows += group_size; /* Reset counters for the new group */ n_violations = 0; group_size = 1; continue; } /* first columns match, but the last one does not (so contradicting) */ else if (multi_sort_compare_dim(k - 1, &items[i - 1], &items[i], mss) != 0) n_violations++; group_size++; } pfree(items); pfree(values); pfree(isnull); pfree(mss); /* Compute the 'degree of validity' as (supporting/total). */ return (n_supporting_rows * 1.0 / numrows); }
/* * Sort an array of WordEntryIN, remove duplicates. * *outbuflen receives the amount of space needed for strings and positions. */ static int uniqueentry(WordEntryIN *a, int l, char *buf, int *outbuflen) { int buflen; WordEntryIN *ptr, *res; Assert(l >= 1); if (l > 1) qsort_arg((void *) a, l, sizeof(WordEntryIN), compareentry, (void *) buf); buflen = 0; res = a; ptr = a + 1; while (ptr - a < l) { if (!(ptr->entry.len == res->entry.len && strncmp(&buf[ptr->entry.pos], &buf[res->entry.pos], res->entry.len) == 0)) { /* done accumulating data into *res, count space needed */ buflen += res->entry.len; if (res->entry.haspos) { res->poslen = uniquePos(res->pos, res->poslen); buflen = SHORTALIGN(buflen); buflen += res->poslen * sizeof(WordEntryPos) + sizeof(uint16); } res++; if (res != ptr) memcpy(res, ptr, sizeof(WordEntryIN)); } else if (ptr->entry.haspos) { if (res->entry.haspos) { /* append ptr's positions to res's positions */ int newlen = ptr->poslen + res->poslen; res->pos = (WordEntryPos *) repalloc(res->pos, newlen * sizeof(WordEntryPos)); memcpy(&res->pos[res->poslen], ptr->pos, ptr->poslen * sizeof(WordEntryPos)); res->poslen = newlen; pfree(ptr->pos); } else { /* just give ptr's positions to pos */ res->entry.haspos = 1; res->pos = ptr->pos; res->poslen = ptr->poslen; } } ptr++; } /* count space needed for last item */ buflen += res->entry.len; if (res->entry.haspos) { res->poslen = uniquePos(res->pos, res->poslen); buflen = SHORTALIGN(buflen); buflen += res->poslen * sizeof(WordEntryPos) + sizeof(uint16); } *outbuflen = buflen; return res + 1 - a; }
Datum tsvectorrecv(PG_FUNCTION_ARGS) { StringInfo buf = (StringInfo) PG_GETARG_POINTER(0); TSVector vec; int i; int32 nentries; int datalen; /* number of bytes used in the variable size * area after fixed size TSVector header and * WordEntries */ Size hdrlen; Size len; /* allocated size of vec */ bool needSort = false; nentries = pq_getmsgint(buf, sizeof(int32)); if (nentries < 0 || nentries > (MaxAllocSize / sizeof(WordEntry))) elog(ERROR, "invalid size of tsvector"); hdrlen = DATAHDRSIZE + sizeof(WordEntry) * nentries; len = hdrlen * 2; /* times two to make room for lexemes */ vec = (TSVector) palloc0(len); vec->size = nentries; datalen = 0; for (i = 0; i < nentries; i++) { const char *lexeme; uint16 npos; size_t lex_len; lexeme = pq_getmsgstring(buf); npos = (uint16) pq_getmsgint(buf, sizeof(uint16)); /* sanity checks */ lex_len = strlen(lexeme); if (lex_len > MAXSTRLEN) elog(ERROR, "invalid tsvector: lexeme too long"); if (datalen > MAXSTRPOS) elog(ERROR, "invalid tsvector: maximum total lexeme length exceeded"); if (npos > MAXNUMPOS) elog(ERROR, "unexpected number of tsvector positions"); /* * Looks valid. Fill the WordEntry struct, and copy lexeme. * * But make sure the buffer is large enough first. */ while (hdrlen + SHORTALIGN(datalen + lex_len) + (npos + 1) * sizeof(WordEntryPos) >= len) { len *= 2; vec = (TSVector) repalloc(vec, len); } vec->entries[i].haspos = (npos > 0) ? 1 : 0; vec->entries[i].len = lex_len; vec->entries[i].pos = datalen; memcpy(STRPTR(vec) + datalen, lexeme, lex_len); datalen += lex_len; if (i > 0 && WordEntryCMP(&vec->entries[i], &vec->entries[i - 1], STRPTR(vec)) <= 0) needSort = true; /* Receive positions */ if (npos > 0) { uint16 j; WordEntryPos *wepptr; /* * Pad to 2-byte alignment if necessary. Though we used palloc0 * for the initial allocation, subsequent repalloc'd memory areas * are not initialized to zero. */ if (datalen != SHORTALIGN(datalen)) { *(STRPTR(vec) + datalen) = '\0'; datalen = SHORTALIGN(datalen); } memcpy(STRPTR(vec) + datalen, &npos, sizeof(uint16)); wepptr = POSDATAPTR(vec, &vec->entries[i]); for (j = 0; j < npos; j++) { wepptr[j] = (WordEntryPos) pq_getmsgint(buf, sizeof(WordEntryPos)); if (j > 0 && WEP_GETPOS(wepptr[j]) <= WEP_GETPOS(wepptr[j - 1])) elog(ERROR, "position information is misordered"); } datalen += (npos + 1) * sizeof(WordEntry); } } SET_VARSIZE(vec, hdrlen + datalen); if (needSort) qsort_arg((void *) ARRPTR(vec), vec->size, sizeof(WordEntry), compareentry, (void *) STRPTR(vec)); PG_RETURN_TSVECTOR(vec); }
/* * Picksplit SP-GiST function: split ranges into nodes. Select "centroid" * range and distribute ranges according to quadrants. */ Datum spg_range_quad_picksplit(PG_FUNCTION_ARGS) { spgPickSplitIn *in = (spgPickSplitIn *) PG_GETARG_POINTER(0); spgPickSplitOut *out = (spgPickSplitOut *) PG_GETARG_POINTER(1); int i; int j; int nonEmptyCount; RangeType *centroid; bool empty; TypeCacheEntry *typcache; /* Use the median values of lower and upper bounds as the centroid range */ RangeBound *lowerBounds, *upperBounds; typcache = range_get_typcache(fcinfo, RangeTypeGetOid(DatumGetRangeType(in->datums[0]))); /* Allocate memory for bounds */ lowerBounds = palloc(sizeof(RangeBound) * in->nTuples); upperBounds = palloc(sizeof(RangeBound) * in->nTuples); j = 0; /* Deserialize bounds of ranges, count non-empty ranges */ for (i = 0; i < in->nTuples; i++) { range_deserialize(typcache, DatumGetRangeType(in->datums[i]), &lowerBounds[j], &upperBounds[j], &empty); if (!empty) j++; } nonEmptyCount = j; /* * All the ranges are empty. The best we can do is to construct an inner * node with no centroid, and put all ranges into node 0. If non-empty * ranges are added later, they will be routed to node 1. */ if (nonEmptyCount == 0) { out->nNodes = 2; out->hasPrefix = false; /* Prefix is empty */ out->prefixDatum = PointerGetDatum(NULL); out->nodeLabels = NULL; out->mapTuplesToNodes = palloc(sizeof(int) * in->nTuples); out->leafTupleDatums = palloc(sizeof(Datum) * in->nTuples); /* Place all ranges into node 0 */ for (i = 0; i < in->nTuples; i++) { RangeType *range = DatumGetRangeType(in->datums[i]); out->leafTupleDatums[i] = RangeTypeGetDatum(range); out->mapTuplesToNodes[i] = 0; } PG_RETURN_VOID(); } /* Sort range bounds in order to find medians */ qsort_arg(lowerBounds, nonEmptyCount, sizeof(RangeBound), bound_cmp, typcache); qsort_arg(upperBounds, nonEmptyCount, sizeof(RangeBound), bound_cmp, typcache); /* Construct "centroid" range from medians of lower and upper bounds */ centroid = range_serialize(typcache, &lowerBounds[nonEmptyCount / 2], &upperBounds[nonEmptyCount / 2], false); out->hasPrefix = true; out->prefixDatum = RangeTypeGetDatum(centroid); /* Create node for empty ranges only if it is a root node */ out->nNodes = (in->level == 0) ? 5 : 4; out->nodeLabels = NULL; /* we don't need node labels */ out->mapTuplesToNodes = palloc(sizeof(int) * in->nTuples); out->leafTupleDatums = palloc(sizeof(Datum) * in->nTuples); /* * Assign ranges to corresponding nodes according to quadrants relative to * "centroid" range. */ for (i = 0; i < in->nTuples; i++) { RangeType *range = DatumGetRangeType(in->datums[i]); int16 quadrant = getQuadrant(typcache, centroid, range); out->leafTupleDatums[i] = RangeTypeGetDatum(range); out->mapTuplesToNodes[i] = quadrant - 1; } PG_RETURN_VOID(); }
/* * Extract the index key values from an indexable item * * The resulting key values are sorted, and any duplicates are removed. * This avoids generating redundant index entries. */ Datum * ginExtractEntries(GinState *ginstate, OffsetNumber attnum, Datum value, bool isNull, int32 *nentries, GinNullCategory **categories) { Datum *entries; bool *nullFlags; int32 i; /* * We don't call the extractValueFn on a null item. Instead generate a * placeholder. */ if (isNull) { *nentries = 1; entries = (Datum *) palloc(sizeof(Datum)); entries[0] = (Datum) 0; *categories = (GinNullCategory *) palloc(sizeof(GinNullCategory)); (*categories)[0] = GIN_CAT_NULL_ITEM; return entries; } /* OK, call the opclass's extractValueFn */ nullFlags = NULL; /* in case extractValue doesn't set it */ entries = (Datum *) DatumGetPointer(FunctionCall3(&ginstate->extractValueFn[attnum - 1], value, PointerGetDatum(nentries), PointerGetDatum(&nullFlags))); /* * Generate a placeholder if the item contained no keys. */ if (entries == NULL || *nentries <= 0) { *nentries = 1; entries = (Datum *) palloc(sizeof(Datum)); entries[0] = (Datum) 0; *categories = (GinNullCategory *) palloc(sizeof(GinNullCategory)); (*categories)[0] = GIN_CAT_EMPTY_ITEM; return entries; } /* * If the extractValueFn didn't create a nullFlags array, create one, * assuming that everything's non-null. Otherwise, run through the * array and make sure each value is exactly 0 or 1; this ensures * binary compatibility with the GinNullCategory representation. */ if (nullFlags == NULL) nullFlags = (bool *) palloc0(*nentries * sizeof(bool)); else { for (i = 0; i < *nentries; i++) nullFlags[i] = (nullFlags[i] ? true : false); } /* now we can use the nullFlags as category codes */ *categories = (GinNullCategory *) nullFlags; /* * If there's more than one key, sort and unique-ify. * * XXX Using qsort here is notationally painful, and the overhead is * pretty bad too. For small numbers of keys it'd likely be better to * use a simple insertion sort. */ if (*nentries > 1) { keyEntryData *keydata; cmpEntriesArg arg; keydata = (keyEntryData *) palloc(*nentries * sizeof(keyEntryData)); for (i = 0; i < *nentries; i++) { keydata[i].datum = entries[i]; keydata[i].isnull = nullFlags[i]; } arg.cmpDatumFunc = &ginstate->compareFn[attnum - 1]; arg.haveDups = false; qsort_arg(keydata, *nentries, sizeof(keyEntryData), cmpEntries, (void *) &arg); if (arg.haveDups) { /* there are duplicates, must get rid of 'em */ int32 j; entries[0] = keydata[0].datum; nullFlags[0] = keydata[0].isnull; j = 1; for (i = 1; i < *nentries; i++) { if (cmpEntries(&keydata[i-1], &keydata[i], &arg) != 0) { entries[j] = keydata[i].datum; nullFlags[j] = keydata[i].isnull; j++; } } *nentries = j; } else { /* easy, no duplicates */ for (i = 0; i < *nentries; i++) { entries[i] = keydata[i].datum; nullFlags[i] = keydata[i].isnull; } } pfree(keydata); } return entries; }
/* * performs compaction of the sorted set * * Sorts the unsorted data, removes duplicate values and then merges it * into the already sorted part (skipping duplicate values). * * Finally, it checks whether at least ARRAY_FREE_FRACT (20%) of the array * is empty, and if not then resizes it. */ static void compact_set(element_set_t * eset, bool need_space) { char *base = eset->data + (eset->nsorted * eset->item_size); char *last = base; char *curr; int i; int cnt = 1; double free_fract; Assert(eset->nall > 0); Assert(eset->data != NULL); Assert(eset->nsorted <= eset->nall); Assert(eset->nall * eset->item_size <= eset->nbytes); /* if there are no new (unsorted) items, we don't need to sort */ if (eset->nall > eset->nsorted) { /* * sort the array with new items, but only when not already sorted * * TODO Consider replacing this insert-sort for small number of items * (for <64 items it might be faster than qsort) */ qsort_arg(eset->data + eset->nsorted * eset->item_size, eset->nall - eset->nsorted, eset->item_size, compare_items, &eset->item_size); /* * Remove duplicate values from the sorted array. That is - walk through * the array, compare each item with the preceding one, and only keep it * if they differ. We skip the first value, as it's always unique (there * is no preceding value it might be equal to). */ for (i = 1; i < eset->nall - eset->nsorted; i++) { curr = base + (i * eset->item_size); /* items differ (keep the item) */ if (memcmp(last, curr, eset->item_size) != 0) { last += eset->item_size; cnt += 1; /* only copy if really needed */ if (last != curr) memcpy(last, curr, eset->item_size); } } /* duplicities removed -> update the number of items in this part */ eset->nall = eset->nsorted + cnt; /* If this is the first sorted part, we can just use it as the 'sorted' part. */ if (eset->nsorted == 0) eset->nsorted = eset->nall; /* * TODO Another optimization opportunity is that we don't really need to * merge the arrays, if we freed enough space by processing the new * items. We may postpone that until the last call (when finalizing * the aggregate). OTOH if that happens, it shouldn't be that * expensive to merge because the number of new items will be small * (as we've removed a enough duplicities). But we still need to * shuffle the data around, which wastes memory bandwidth. */ /* If a merge is needed, walk through the arrays and keep unique values. */ if (eset->nsorted < eset->nall) { MemoryContext oldctx = MemoryContextSwitchTo(eset->aggctx); /* allocate new array for the result */ char * data = palloc(eset->nbytes); char * ptr = data; /* already sorted array */ char * a = eset->data; char * a_max = eset->data + eset->nsorted * eset->item_size; /* the new array */ char * b = eset->data + (eset->nsorted * eset->item_size); char * b_max = eset->data + eset->nall * eset->item_size; MemoryContextSwitchTo(oldctx); /* * TODO There's a possibility for optimization - if we get already * sorted items (e.g. because of a subplan), we can just copy the * arrays. The check is as simple as checking * * (a_first > b_last) || (a_last < b_first). * * OTOH this is probably very unlikely to happen in practice. */ while (true) { int r = memcmp(a, b, eset->item_size); /* * If both values are the same, copy one of them into the result and increment * both. Otherwise, increment only the smaller value. */ if (r == 0) { memcpy(ptr, a, eset->item_size); a += eset->item_size; b += eset->item_size; } else if (r < 0) { memcpy(ptr, a, eset->item_size); a += eset->item_size; } else { memcpy(ptr, b, eset->item_size); b += eset->item_size; } ptr += eset->item_size; /* * If we reached the end of (at least) one of the arrays, copy all * the remaining items and we're done. */ if ((a == a_max) || (b == b_max)) { if (a != a_max) /* b ended -> copy rest of a */ { memcpy(ptr, a, a_max - a); ptr += (a_max - a); } else if (b != b_max) /* a ended -> copy rest of b */ { memcpy(ptr, b, b_max - b); ptr += (b_max - b); } break; } } Assert((ptr - data) <= (eset->nall * eset->item_size)); /* * Update the counts with the result of the merge (there might be * duplicities between the two parts, and we have eliminated them). */ eset->nsorted = (ptr - data) / eset->item_size; eset->nall = eset->nsorted; pfree(eset->data); eset->data = data; } } Assert(eset->nall == eset->nsorted); /* compute free space as a fraction of the total size */ free_fract = (eset->nbytes - eset->nall * eset->item_size) * 1.0 / eset->nbytes; /* * If we need space for more items (e.g. not when finalizing the aggregate * result), enlarge the array when needed. We require ARRAY_FREE_FRACT of * the space to be free. */ if (need_space && (free_fract < ARRAY_FREE_FRACT)) { /* * For small requests, we simply double the array size, because that's * what AllocSet will give use anyway. No point in trying to save * memory by growing the array slower. * * After reaching ALLOCSET_SEPARATE_THRESHOLD, the memory is allocated * in separate blocks, thus we can be smarter and grow the memory * a bit slower (just enough to get the 20% free space). * * XXX If the memory context uses smaller blocks, the switch to special * blocks may happen before ALLOCSET_SEPARATE_THRESHOLD. This limit * is simply global guarantee for all possible AllocSets. */ if ((eset->nbytes / 0.8) < ALLOCSET_SEPARATE_THRESHOLD) eset->nbytes *= 2; else eset->nbytes /= 0.8; eset->data = repalloc(eset->data, eset->nbytes); } }
void BLC_PREFIX(qsort)(void *a, size_t n, size_t es, cmp_t cmp) #endif { char *pa, *pb, *pc, *pd, *pl, *pm, *pn; size_t d, r; int cmp_result; int swaptype, swap_cnt; loop: SWAPINIT(a, es); swap_cnt = 0; if (n < 7) { for (pm = (char *)a + es; pm < (char *)a + n * es; pm += es) for (pl = pm; pl > (char *)a && CMP(arg, pl - es, pl) > 0; pl -= es) swap(pl, pl - es); return; } pm = (char *)a + (n / 2) * es; if (n > 7) { pl = a; pn = (char *)a + (n - 1) * es; if (n > 40) { d = (n / 8) * es; pl = med3(pl, pl + d, pl + 2 * d, cmp, arg); pm = med3(pm - d, pm, pm + d, cmp, arg); pn = med3(pn - 2 * d, pn - d, pn, cmp, arg); } pm = med3(pl, pm, pn, cmp, arg); } swap(a, pm); pa = pb = (char *)a + es; pc = pd = (char *)a + (n - 1) * es; for (;;) { while (pb <= pc && (cmp_result = CMP(arg, pb, a)) <= 0) { if (cmp_result == 0) { swap_cnt = 1; swap(pa, pb); pa += es; } pb += es; } while (pb <= pc && (cmp_result = CMP(arg, pc, a)) >= 0) { if (cmp_result == 0) { swap_cnt = 1; swap(pc, pd); pd -= es; } pc -= es; } if (pb > pc) break; swap(pb, pc); swap_cnt = 1; pb += es; pc -= es; } if (swap_cnt == 0) { /* Switch to insertion sort */ for (pm = (char *)a + es; pm < (char *)a + n * es; pm += es) for (pl = pm; pl > (char *)a && CMP(arg, pl - es, pl) > 0; pl -= es) swap(pl, pl - es); return; } pn = (char *)a + n * es; r = min(pa - (char *)a, pb - pa); vecswap(a, pb - r, r); r = min(pd - pc, pn - pd - es); vecswap(pb, pn - r, r); if ((r = pb - pa) > es) #ifdef I_AM_QSORT_ARG qsort_arg(a, r / es, es, cmp, arg); #else qsort(a, r / es, es, cmp); #endif if ((r = pd - pc) > es) { /* Iterate rather than recurse to save stack space */ a = pn - r; n = r / es; goto loop; } /* qsort(pn - r, r / es, es, cmp);*/ }