/* * The SP-GiST query consistency check for leaf tuples */ Datum inet_spg_leaf_consistent(PG_FUNCTION_ARGS) { spgLeafConsistentIn *in = (spgLeafConsistentIn *) PG_GETARG_POINTER(0); spgLeafConsistentOut *out = (spgLeafConsistentOut *) PG_GETARG_POINTER(1); inet *leaf = DatumGetInetPP(in->leafDatum); /* All tests are exact. */ out->recheck = false; /* Leaf is what it is... */ out->leafValue = InetPGetDatum(leaf); /* Use common code to apply the tests. */ PG_RETURN_BOOL(inet_spg_consistent_bitmap(leaf, in->nkeys, in->scankeys, true)); }
/* * Inet histogram vs single value selectivity estimation * * Estimate the fraction of the histogram population that satisfies * "value OPR CONST". (The result needs to be scaled to reflect the * proportion of the total population represented by the histogram.) * * The histogram is originally for the inet btree comparison operators. * Only the common bits of the network part and the length of the network part * (masklen) are interesting for the subnet inclusion operators. Fortunately, * btree comparison treats the network part as the major sort key. Even so, * the length of the network part would not really be significant in the * histogram. This would lead to big mistakes for data sets with uneven * masklen distribution. To reduce this problem, comparisons with the left * and the right sides of the buckets are used together. * * Histogram bucket matches are calculated in two forms. If the constant * matches both bucket endpoints the bucket is considered as fully matched. * The second form is to match the bucket partially; we recognize this when * the constant matches just one endpoint, or the two endpoints fall on * opposite sides of the constant. (Note that when the constant matches an * interior histogram element, it gets credit for partial matches to the * buckets on both sides, while a match to a histogram endpoint gets credit * for only one partial match. This is desirable.) * * The divider in the partial bucket match is imagined as the distance * between the decisive bits and the common bits of the addresses. It will * be used as a power of two as it is the natural scale for the IP network * inclusion. This partial bucket match divider calculation is an empirical * formula and subject to change with more experiment. * * For a partial match, we try to calculate dividers for both of the * boundaries. If the address family of a boundary value does not match the * constant or comparison of the length of the network parts is not correct * for the operator, the divider for that boundary will not be taken into * account. If both of the dividers are valid, the greater one will be used * to minimize the mistake in buckets that have disparate masklens. This * calculation is unfair when dividers can be calculated for both of the * boundaries but they are far from each other; but it is not a common * situation as the boundaries are expected to share most of their significant * bits of their masklens. The mistake would be greater, if we would use the * minimum instead of the maximum, and we don't know a sensible way to combine * them. * * For partial match in buckets that have different address families on the * left and right sides, only the boundary with the same address family is * taken into consideration. This can cause more mistakes for these buckets * if the masklens of their boundaries are also disparate. But this can only * happen in one bucket, since only two address families exist. It seems a * better option than not considering these buckets at all. */ static Selectivity inet_hist_value_sel(Datum *values, int nvalues, Datum constvalue, int opr_codenum) { Selectivity match = 0.0; inet *query, *left, *right; int i, k, n; int left_order, right_order, left_divider, right_divider; /* guard against zero-divide below */ if (nvalues <= 1) return 0.0; /* if there are too many histogram elements, decimate to limit runtime */ k = (nvalues - 2) / MAX_CONSIDERED_ELEMS + 1; query = DatumGetInetPP(constvalue); /* "left" is the left boundary value of the current bucket ... */ left = DatumGetInetPP(values[0]); left_order = inet_inclusion_cmp(left, query, opr_codenum); n = 0; for (i = k; i < nvalues; i += k) { /* ... and "right" is the right boundary value */ right = DatumGetInetPP(values[i]); right_order = inet_inclusion_cmp(right, query, opr_codenum); if (left_order == 0 && right_order == 0) { /* The whole bucket matches, since both endpoints do. */ match += 1.0; } else if ((left_order <= 0 && right_order >= 0) || (left_order >= 0 && right_order <= 0)) { /* Partial bucket match. */ left_divider = inet_hist_match_divider(left, query, opr_codenum); right_divider = inet_hist_match_divider(right, query, opr_codenum); if (left_divider >= 0 || right_divider >= 0) match += 1.0 / pow(2.0, Max(left_divider, right_divider)); } /* Shift the variables. */ left = right; left_order = right_order; /* Count the number of buckets considered. */ n++; } return match / n; }
/* * The SP-GiST choose function */ Datum inet_spg_choose(PG_FUNCTION_ARGS) { spgChooseIn *in = (spgChooseIn *) PG_GETARG_POINTER(0); spgChooseOut *out = (spgChooseOut *) PG_GETARG_POINTER(1); inet *val = DatumGetInetPP(in->datum), *prefix; int commonbits; /* * If we're looking at a tuple that splits by address family, choose the * appropriate subnode. */ if (!in->hasPrefix) { /* allTheSame isn't possible for such a tuple */ Assert(!in->allTheSame); Assert(in->nNodes == 2); out->resultType = spgMatchNode; out->result.matchNode.nodeN = (ip_family(val) == PGSQL_AF_INET) ? 0 : 1; out->result.matchNode.restDatum = InetPGetDatum(val); PG_RETURN_VOID(); } /* Else it must split by prefix */ Assert(in->nNodes == 4 || in->allTheSame); prefix = DatumGetInetPP(in->prefixDatum); commonbits = ip_bits(prefix); /* * We cannot put addresses from different families under the same inner * node, so we have to split if the new value's family is different. */ if (ip_family(val) != ip_family(prefix)) { /* Set up 2-node tuple */ out->resultType = spgSplitTuple; out->result.splitTuple.prefixHasPrefix = false; out->result.splitTuple.prefixNNodes = 2; out->result.splitTuple.prefixNodeLabels = NULL; /* Identify which node the existing data goes into */ out->result.splitTuple.childNodeN = (ip_family(prefix) == PGSQL_AF_INET) ? 0 : 1; out->result.splitTuple.postfixHasPrefix = true; out->result.splitTuple.postfixPrefixDatum = InetPGetDatum(prefix); PG_RETURN_VOID(); } /* * If the new value does not match the existing prefix, we have to split. */ if (ip_bits(val) < commonbits || bitncmp(ip_addr(prefix), ip_addr(val), commonbits) != 0) { /* Determine new prefix length for the split tuple */ commonbits = bitncommon(ip_addr(prefix), ip_addr(val), Min(ip_bits(val), commonbits)); /* Set up 4-node tuple */ out->resultType = spgSplitTuple; out->result.splitTuple.prefixHasPrefix = true; out->result.splitTuple.prefixPrefixDatum = InetPGetDatum(cidr_set_masklen_internal(val, commonbits)); out->result.splitTuple.prefixNNodes = 4; out->result.splitTuple.prefixNodeLabels = NULL; /* Identify which node the existing data goes into */ out->result.splitTuple.childNodeN = inet_spg_node_number(prefix, commonbits); out->result.splitTuple.postfixHasPrefix = true; out->result.splitTuple.postfixPrefixDatum = InetPGetDatum(prefix); PG_RETURN_VOID(); } /* * All OK, choose the node to descend into. (If this tuple is marked * allTheSame, the core code will ignore our choice of nodeN; but we need * not account for that case explicitly here.) */ out->resultType = spgMatchNode; out->result.matchNode.nodeN = inet_spg_node_number(val, commonbits); out->result.matchNode.restDatum = InetPGetDatum(val); PG_RETURN_VOID(); }
/* * Calculate bitmap of node numbers that are consistent with the query * * This can be used either at a 4-way inner tuple, or at a leaf tuple. * In the latter case, we should return a boolean result (0 or 1) * not a bitmap. * * This definition is pretty odd, but the inner and leaf consistency checks * are mostly common and it seems best to keep them in one function. */ static int inet_spg_consistent_bitmap(const inet *prefix, int nkeys, ScanKey scankeys, bool leaf) { int bitmap; int commonbits, i; /* Initialize result to allow visiting all children */ if (leaf) bitmap = 1; else bitmap = 1 | (1 << 1) | (1 << 2) | (1 << 3); commonbits = ip_bits(prefix); for (i = 0; i < nkeys; i++) { inet *argument = DatumGetInetPP(scankeys[i].sk_argument); StrategyNumber strategy = scankeys[i].sk_strategy; int order; /* * Check 0: different families * * Matching families do not help any of the strategies. */ if (ip_family(argument) != ip_family(prefix)) { switch (strategy) { case RTLessStrategyNumber: case RTLessEqualStrategyNumber: if (ip_family(argument) < ip_family(prefix)) bitmap = 0; break; case RTGreaterEqualStrategyNumber: case RTGreaterStrategyNumber: if (ip_family(argument) > ip_family(prefix)) bitmap = 0; break; case RTNotEqualStrategyNumber: break; default: /* For all other cases, we can be sure there is no match */ bitmap = 0; break; } if (!bitmap) break; /* Other checks make no sense with different families. */ continue; } /* * Check 1: network bit count * * Network bit count (ip_bits) helps to check leaves for sub network * and sup network operators. At non-leaf nodes, we know every child * value has greater ip_bits, so we can avoid descending in some cases * too. * * This check is less expensive than checking the address bits, so we * are doing this before, but it has to be done after for the basic * comparison strategies, because ip_bits only affect their results * when the common network bits are the same. */ switch (strategy) { case RTSubStrategyNumber: if (commonbits <= ip_bits(argument)) bitmap &= (1 << 2) | (1 << 3); break; case RTSubEqualStrategyNumber: if (commonbits < ip_bits(argument)) bitmap &= (1 << 2) | (1 << 3); break; case RTSuperStrategyNumber: if (commonbits == ip_bits(argument) - 1) bitmap &= 1 | (1 << 1); else if (commonbits >= ip_bits(argument)) bitmap = 0; break; case RTSuperEqualStrategyNumber: if (commonbits == ip_bits(argument)) bitmap &= 1 | (1 << 1); else if (commonbits > ip_bits(argument)) bitmap = 0; break; case RTEqualStrategyNumber: if (commonbits < ip_bits(argument)) bitmap &= (1 << 2) | (1 << 3); else if (commonbits == ip_bits(argument)) bitmap &= 1 | (1 << 1); else bitmap = 0; break; } if (!bitmap) break; /* * Check 2: common network bits * * Compare available common prefix bits to the query, but not beyond * either the query's netmask or the minimum netmask among the * represented values. If these bits don't match the query, we can * eliminate some cases. */ order = bitncmp(ip_addr(prefix), ip_addr(argument), Min(commonbits, ip_bits(argument))); if (order != 0) { switch (strategy) { case RTLessStrategyNumber: case RTLessEqualStrategyNumber: if (order > 0) bitmap = 0; break; case RTGreaterEqualStrategyNumber: case RTGreaterStrategyNumber: if (order < 0) bitmap = 0; break; case RTNotEqualStrategyNumber: break; default: /* For all other cases, we can be sure there is no match */ bitmap = 0; break; } if (!bitmap) break; /* * Remaining checks make no sense when common bits don't match. */ continue; } /* * Check 3: next network bit * * We can filter out branch 2 or 3 using the next network bit of the * argument, if it is available. * * This check matters for the performance of the search. The results * would be correct without it. */ if (bitmap & ((1 << 2) | (1 << 3)) && commonbits < ip_bits(argument)) { int nextbit; nextbit = ip_addr(argument)[commonbits / 8] & (1 << (7 - commonbits % 8)); switch (strategy) { case RTLessStrategyNumber: case RTLessEqualStrategyNumber: if (!nextbit) bitmap &= 1 | (1 << 1) | (1 << 2); break; case RTGreaterEqualStrategyNumber: case RTGreaterStrategyNumber: if (nextbit) bitmap &= 1 | (1 << 1) | (1 << 3); break; case RTNotEqualStrategyNumber: break; default: if (!nextbit) bitmap &= 1 | (1 << 1) | (1 << 2); else bitmap &= 1 | (1 << 1) | (1 << 3); break; } if (!bitmap) break; } /* * Remaining checks are only for the basic comparison strategies. This * test relies on the strategy number ordering defined in stratnum.h. */ if (strategy < RTEqualStrategyNumber || strategy > RTGreaterEqualStrategyNumber) continue; /* * Check 4: network bit count * * At this point, we know that the common network bits of the prefix * and the argument are the same, so we can go forward and check the * ip_bits. */ switch (strategy) { case RTLessStrategyNumber: case RTLessEqualStrategyNumber: if (commonbits == ip_bits(argument)) bitmap &= 1 | (1 << 1); else if (commonbits > ip_bits(argument)) bitmap = 0; break; case RTGreaterEqualStrategyNumber: case RTGreaterStrategyNumber: if (commonbits < ip_bits(argument)) bitmap &= (1 << 2) | (1 << 3); break; } if (!bitmap) break; /* Remaining checks don't make sense with different ip_bits. */ if (commonbits != ip_bits(argument)) continue; /* * Check 5: next host bit * * We can filter out branch 0 or 1 using the next host bit of the * argument, if it is available. * * This check matters for the performance of the search. The results * would be correct without it. There is no point in running it for * leafs as we have to check the whole address on the next step. */ if (!leaf && bitmap & (1 | (1 << 1)) && commonbits < ip_maxbits(argument)) { int nextbit; nextbit = ip_addr(argument)[commonbits / 8] & (1 << (7 - commonbits % 8)); switch (strategy) { case RTLessStrategyNumber: case RTLessEqualStrategyNumber: if (!nextbit) bitmap &= 1 | (1 << 2) | (1 << 3); break; case RTGreaterEqualStrategyNumber: case RTGreaterStrategyNumber: if (nextbit) bitmap &= (1 << 1) | (1 << 2) | (1 << 3); break; case RTNotEqualStrategyNumber: break; default: if (!nextbit) bitmap &= 1 | (1 << 2) | (1 << 3); else bitmap &= (1 << 1) | (1 << 2) | (1 << 3); break; } if (!bitmap) break; } /* * Check 6: whole address * * This is the last check for correctness of the basic comparison * strategies. It's only appropriate at leaf entries. */ if (leaf) { /* Redo ordering comparison using all address bits */ order = bitncmp(ip_addr(prefix), ip_addr(argument), ip_maxbits(prefix)); switch (strategy) { case RTLessStrategyNumber: if (order >= 0) bitmap = 0; break; case RTLessEqualStrategyNumber: if (order > 0) bitmap = 0; break; case RTEqualStrategyNumber: if (order != 0) bitmap = 0; break; case RTGreaterEqualStrategyNumber: if (order < 0) bitmap = 0; break; case RTGreaterStrategyNumber: if (order <= 0) bitmap = 0; break; case RTNotEqualStrategyNumber: if (order == 0) bitmap = 0; break; } if (!bitmap) break; } } return bitmap; }
/* * The SP-GiST query consistency check for inner tuples */ Datum inet_spg_inner_consistent(PG_FUNCTION_ARGS) { spgInnerConsistentIn *in = (spgInnerConsistentIn *) PG_GETARG_POINTER(0); spgInnerConsistentOut *out = (spgInnerConsistentOut *) PG_GETARG_POINTER(1); int i; int which; if (!in->hasPrefix) { Assert(!in->allTheSame); Assert(in->nNodes == 2); /* Identify which child nodes need to be visited */ which = 1 | (1 << 1); for (i = 0; i < in->nkeys; i++) { StrategyNumber strategy = in->scankeys[i].sk_strategy; inet *argument = DatumGetInetPP(in->scankeys[i].sk_argument); switch (strategy) { case RTLessStrategyNumber: case RTLessEqualStrategyNumber: if (ip_family(argument) == PGSQL_AF_INET) which &= 1; break; case RTGreaterEqualStrategyNumber: case RTGreaterStrategyNumber: if (ip_family(argument) == PGSQL_AF_INET6) which &= (1 << 1); break; case RTNotEqualStrategyNumber: break; default: /* all other ops can only match addrs of same family */ if (ip_family(argument) == PGSQL_AF_INET) which &= 1; else which &= (1 << 1); break; } } } else if (!in->allTheSame) { Assert(in->nNodes == 4); /* Identify which child nodes need to be visited */ which = inet_spg_consistent_bitmap(DatumGetInetPP(in->prefixDatum), in->nkeys, in->scankeys, false); } else { /* Must visit all nodes; we assume there are less than 32 of 'em */ which = ~0; } out->nNodes = 0; if (which) { out->nodeNumbers = (int *) palloc(sizeof(int) * in->nNodes); for (i = 0; i < in->nNodes; i++) { if (which & (1 << i)) { out->nodeNumbers[out->nNodes] = i; out->nNodes++; } } } PG_RETURN_VOID(); }
/* * The GiST PickSplit method */ Datum inet_spg_picksplit(PG_FUNCTION_ARGS) { spgPickSplitIn *in = (spgPickSplitIn *) PG_GETARG_POINTER(0); spgPickSplitOut *out = (spgPickSplitOut *) PG_GETARG_POINTER(1); inet *prefix, *tmp; int i, commonbits; bool differentFamilies = false; /* Initialize the prefix with the first item */ prefix = DatumGetInetPP(in->datums[0]); commonbits = ip_bits(prefix); /* Examine remaining items to discover minimum common prefix length */ for (i = 1; i < in->nTuples; i++) { tmp = DatumGetInetPP(in->datums[i]); if (ip_family(tmp) != ip_family(prefix)) { differentFamilies = true; break; } if (ip_bits(tmp) < commonbits) commonbits = ip_bits(tmp); commonbits = bitncommon(ip_addr(prefix), ip_addr(tmp), commonbits); if (commonbits == 0) break; } /* Don't need labels; allocate output arrays */ out->nodeLabels = NULL; out->mapTuplesToNodes = (int *) palloc(sizeof(int) * in->nTuples); out->leafTupleDatums = (Datum *) palloc(sizeof(Datum) * in->nTuples); if (differentFamilies) { /* Set up 2-node tuple */ out->hasPrefix = false; out->nNodes = 2; for (i = 0; i < in->nTuples; i++) { tmp = DatumGetInetPP(in->datums[i]); out->mapTuplesToNodes[i] = (ip_family(tmp) == PGSQL_AF_INET) ? 0 : 1; out->leafTupleDatums[i] = InetPGetDatum(tmp); } } else { /* Set up 4-node tuple */ out->hasPrefix = true; out->prefixDatum = InetPGetDatum(cidr_set_masklen_internal(prefix, commonbits)); out->nNodes = 4; for (i = 0; i < in->nTuples; i++) { tmp = DatumGetInetPP(in->datums[i]); out->mapTuplesToNodes[i] = inet_spg_node_number(tmp, commonbits); out->leafTupleDatums[i] = InetPGetDatum(tmp); } } PG_RETURN_VOID(); }