void datumstreamread_getlarge(DatumStreamRead * acc, Datum *datum, bool *null) { switch (acc->largeObjectState) { case DatumStreamLargeObjectState_HaveAoContent: ereport(ERROR, (errmsg("Advance not called on large datum stream object"))); return; case DatumStreamLargeObjectState_PositionAdvanced: acc->largeObjectState = DatumStreamLargeObjectState_Consumed; /* Fall below to ~_Consumed. */ case DatumStreamLargeObjectState_Consumed: { int32 len; len = VARSIZE_ANY(acc->buffer_beginp); /* * It is ok to get the same object more than once. */ if (Debug_datumstream_read_print_varlena_info) { datumstreamread_print_large_varlena_info( acc, acc->buffer_beginp); } if (Debug_appendonly_print_scan_tuple) { ereport(LOG, (errmsg("Datum stream block read is returning large variable-length object " "(length %d)", len))); } *datum = PointerGetDatum(acc->buffer_beginp); *null = false; return; } case DatumStreamLargeObjectState_Exhausted: ereport(ERROR, (errmsg("Get called after large datum stream object already consumed"))); return; default: ereport(FATAL, (errmsg("Unexpected large datum stream state %d", acc->largeObjectState))); return; } }
Datum domainname_show(PG_FUNCTION_ARGS) { text *in = PG_GETARG_TEXT_P(0); size_t len = VARSIZE_ANY(in); text *out = (text *)palloc(len); SET_VARSIZE(out, len); domainname_flip(VARDATA(out), VARDATA(in), len - VARHDRSZ); PG_FREE_IF_COPY(in, 0); PG_RETURN_TEXT_P(out); }
/*------------------------------------------------------------------------- * datumCopy * * Make a copy of a non-NULL datum. * * If the datatype is pass-by-reference, memory is obtained with palloc(). * * If the value is a reference to an expanded object, we flatten into memory * obtained with palloc(). We need to copy because one of the main uses of * this function is to copy a datum out of a transient memory context that's * about to be destroyed, and the expanded object is probably in a child * context that will also go away. Moreover, many callers assume that the * result is a single pfree-able chunk. *------------------------------------------------------------------------- */ Datum datumCopy(Datum value, bool typByVal, int typLen) { Datum res; if (typByVal) res = value; else if (typLen == -1) { /* It is a varlena datatype */ struct varlena *vl = (struct varlena *) DatumGetPointer(value); if (!vl) return PointerGetDatum(NULL); if (VARATT_IS_EXTERNAL_EXPANDED(vl)) { /* Flatten into the caller's memory context */ ExpandedObjectHeader *eoh = DatumGetEOHP(value); Size resultsize; char *resultptr; resultsize = EOH_get_flat_size(eoh); resultptr = (char *) palloc(resultsize); EOH_flatten_into(eoh, (void *) resultptr, resultsize); res = PointerGetDatum(resultptr); } else { /* Otherwise, just copy the varlena datum verbatim */ Size realSize; char *resultptr; realSize = (Size) VARSIZE_ANY(vl); resultptr = (char *) palloc(realSize); memcpy(resultptr, vl, realSize); res = PointerGetDatum(resultptr); } } else { /* Pass by reference, but not varlena, so not toasted */ Size realSize; char *resultptr; realSize = datumGetSize(value, typByVal, typLen); resultptr = (char *) palloc(realSize); memcpy(resultptr, DatumGetPointer(value), realSize); res = PointerGetDatum(resultptr); } return res; }
static JsQuery* joinJsQuery(JsQueryItemType type, JsQuery *jq1, JsQuery *jq2) { JsQuery *out; StringInfoData buf; int32 left, right, chld; JsQueryItem v; initStringInfo(&buf); enlargeStringInfo(&buf, VARSIZE_ANY(jq1) + VARSIZE_ANY(jq2) + 4 * sizeof(int32) + VARHDRSZ); appendStringInfoSpaces(&buf, VARHDRSZ); /* form jqiAnd/jqiOr header */ appendStringInfoChar(&buf, (char)type); alignStringInfoInt(&buf); /* nextPos field of header*/ chld = 0; /* actual value, not a fake */ appendBinaryStringInfo(&buf, (char*)&chld, sizeof(chld)); left = buf.len; appendBinaryStringInfo(&buf, (char*)&left /* fake value */, sizeof(left)); right = buf.len; appendBinaryStringInfo(&buf, (char*)&right /* fake value */, sizeof(right)); /* dump left and right subtree */ jsqInit(&v, jq1); chld = copyJsQuery(&buf, &v); *(int32*)(buf.data + left) = chld; jsqInit(&v, jq2); chld = copyJsQuery(&buf, &v); *(int32*)(buf.data + right) = chld; out = (JsQuery*)buf.data; SET_VARSIZE(out, buf.len); return out; }
/*------------------------------------------------------------------------- * datumGetSize * * Find the "real" size of a datum, given the datum value, * whether it is a "by value", and the declared type length. * * This is essentially an out-of-line version of the att_addlength_datum() * macro in access/tupmacs.h. We do a tad more error checking though. *------------------------------------------------------------------------- */ Size datumGetSize(Datum value, bool typByVal, int typLen) { Size size; if (typByVal) { /* Pass-by-value types are always fixed-length */ Assert(typLen > 0 && typLen <= sizeof(Datum)); size = (Size) typLen; } else { if (typLen > 0) { /* Fixed-length pass-by-ref type */ size = (Size) typLen; } else if (typLen == -1) { /* It is a varlena datatype */ struct varlena *s = (struct varlena *) DatumGetPointer(value); if (!PointerIsValid(s)) ereport(ERROR, (errcode(ERRCODE_DATA_EXCEPTION), errmsg("invalid Datum pointer"))); size = (Size) VARSIZE_ANY(s); } else if (typLen == -2) { /* It is a cstring datatype */ char *s = (char *) DatumGetPointer(value); if (!PointerIsValid(s)) ereport(ERROR, (errcode(ERRCODE_DATA_EXCEPTION), errmsg("invalid Datum pointer"))); size = (Size) (strlen(s) + 1); } else { elog(ERROR, "invalid typLen: %d", typLen); size = 0; /* keep compiler quiet */ } } return size; }
unsigned int getTypeLength(SpGistTypeDesc *att, Datum datum) { unsigned int size; if (att->attbyval) size = sizeof(datum); else if (att->attlen > 0) size = att->attlen; else size = VARSIZE_ANY(datum); return MAXALIGN(size); }
/* * Copy the given non-null datum to *target */ static void memcpyDatum(void *target, SpGistTypeDesc *att, Datum datum) { unsigned int size; if (att->attbyval) { memcpy(target, &datum, sizeof(Datum)); } else { size = (att->attlen > 0) ? att->attlen : VARSIZE_ANY(datum); memcpy(target, DatumGetPointer(datum), size); } }
/* Decompresses sparse counters. Which currently just means allocating more * memory and flipping the compression flag */ static HLLCounter hll_decompress_sparse_V1(HLLCounter hloglog) { HLLCounter htemp; size_t length; /* reset b to positive value for calcs and to indicate data is * decompressed */ hloglog->b = -1 * (hloglog->b); length = pow(2,(hloglog->b-2)); htemp = palloc0(length); memcpy(htemp,hloglog,VARSIZE_ANY(hloglog)); hloglog = htemp; SET_VARSIZE(hloglog,length); return hloglog; }
static unsigned int memcpyDatum(void *in, SpGistTypeDesc *att, Datum datum) { unsigned int size; if (att->attbyval) { size = sizeof(datum); memcpy(in, &datum, size); } else { size = (att->attlen > 0) ? att->attlen : VARSIZE_ANY(datum); Assert(size < 0xffff); memcpy(in, DatumGetPointer(datum), size); } return MAXALIGN(size); }
int datumstreamread_advancelarge(DatumStreamRead * acc) { acc->blockRead.nth++; switch (acc->largeObjectState) { case DatumStreamLargeObjectState_HaveAoContent: { struct varlena *va; int32 len; va = (struct varlena *) acc->buffer_beginp; len = VARSIZE_ANY(va); acc->largeObjectState = DatumStreamLargeObjectState_PositionAdvanced; return len; } case DatumStreamLargeObjectState_PositionAdvanced: case DatumStreamLargeObjectState_Consumed: /* * Second advance returns exhaustion. */ acc->largeObjectState = DatumStreamLargeObjectState_Exhausted; return 0; case DatumStreamLargeObjectState_Exhausted: ereport(ERROR, (errmsg("Advance called after large datum stream object already consumed"))); return 0; /* Never gets here. */ default: ereport(FATAL, (errmsg("Unexpected large datum stream state %d", acc->largeObjectState))); return 0; /* Never reaches here. */ } }
Datum jsquery_not(PG_FUNCTION_ARGS) { JsQuery *jq = PG_GETARG_JSQUERY(0); JsQuery *out; StringInfoData buf; int32 arg, chld; JsQueryItem v; initStringInfo(&buf); enlargeStringInfo(&buf, VARSIZE_ANY(jq) + 4 * sizeof(int32) + VARHDRSZ); appendStringInfoSpaces(&buf, VARHDRSZ); /* form jsquery header */ appendStringInfoChar(&buf, (char)jqiNot); alignStringInfoInt(&buf); /* nextPos field of header*/ chld = 0; /* actual value, not a fake */ appendBinaryStringInfo(&buf, (char*)&chld, sizeof(chld)); arg = buf.len; appendBinaryStringInfo(&buf, (char*)&arg /* fake value */, sizeof(arg)); jsqInit(&v, jq); chld = copyJsQuery(&buf, &v); *(int32*)(buf.data + arg) = chld; out = (JsQuery*)buf.data; SET_VARSIZE(out, buf.len); PG_FREE_IF_COPY(jq, 0); PG_RETURN_JSQUERY(out); }
Datum make_tuple_indirect(PG_FUNCTION_ARGS) { HeapTupleHeader rec = PG_GETARG_HEAPTUPLEHEADER(0); HeapTupleData tuple; int ncolumns; Datum *values; bool *nulls; Oid tupType; int32 tupTypmod; TupleDesc tupdesc; HeapTuple newtup; int i; MemoryContext old_context; /* Extract type info from the tuple itself */ tupType = HeapTupleHeaderGetTypeId(rec); tupTypmod = HeapTupleHeaderGetTypMod(rec); tupdesc = lookup_rowtype_tupdesc(tupType, tupTypmod); ncolumns = tupdesc->natts; /* Build a temporary HeapTuple control structure */ tuple.t_len = HeapTupleHeaderGetDatumLength(rec); ItemPointerSetInvalid(&(tuple.t_self)); tuple.t_tableOid = InvalidOid; tuple.t_data = rec; values = (Datum *) palloc(ncolumns * sizeof(Datum)); nulls = (bool *) palloc(ncolumns * sizeof(bool)); heap_deform_tuple(&tuple, tupdesc, values, nulls); old_context = MemoryContextSwitchTo(TopTransactionContext); for (i = 0; i < ncolumns; i++) { struct varlena *attr; struct varlena *new_attr; struct varatt_indirect redirect_pointer; /* only work on existing, not-null varlenas */ if (TupleDescAttr(tupdesc, i)->attisdropped || nulls[i] || TupleDescAttr(tupdesc, i)->attlen != -1) continue; attr = (struct varlena *) DatumGetPointer(values[i]); /* don't recursively indirect */ if (VARATT_IS_EXTERNAL_INDIRECT(attr)) continue; /* copy datum, so it still lives later */ if (VARATT_IS_EXTERNAL_ONDISK(attr)) attr = heap_tuple_fetch_attr(attr); else { struct varlena *oldattr = attr; attr = palloc0(VARSIZE_ANY(oldattr)); memcpy(attr, oldattr, VARSIZE_ANY(oldattr)); } /* build indirection Datum */ new_attr = (struct varlena *) palloc0(INDIRECT_POINTER_SIZE); redirect_pointer.pointer = attr; SET_VARTAG_EXTERNAL(new_attr, VARTAG_INDIRECT); memcpy(VARDATA_EXTERNAL(new_attr), &redirect_pointer, sizeof(redirect_pointer)); values[i] = PointerGetDatum(new_attr); } newtup = heap_form_tuple(tupdesc, values, nulls); pfree(values); pfree(nulls); ReleaseTupleDesc(tupdesc); MemoryContextSwitchTo(old_context); /* * We intentionally don't use PG_RETURN_HEAPTUPLEHEADER here, because that * would cause the indirect toast pointers to be flattened out of the * tuple immediately, rendering subsequent testing irrelevant. So just * return the HeapTupleHeader pointer as-is. This violates the general * rule that composite Datums shouldn't contain toast pointers, but so * long as the regression test scripts don't insert the result of this * function into a container type (record, array, etc) it should be OK. */ PG_RETURN_POINTER(newtup->t_data); }
/* * For jsonb we always want the de-escaped value - that's what's in token */ static void jsonb_in_scalar(void *pstate, char *token, JsonTokenType tokentype) { JsonbInState *_state = (JsonbInState *) pstate; JsonbValue v; v.estSize = sizeof(JEntry); switch (tokentype) { case JSON_TOKEN_STRING: Assert(token != NULL); v.type = jbvString; v.val.string.len = checkStringLen(strlen(token)); v.val.string.val = pnstrdup(token, v.val.string.len); v.estSize += v.val.string.len; break; case JSON_TOKEN_NUMBER: /* * No need to check size of numeric values, because maximum * numeric size is well below the JsonbValue restriction */ Assert(token != NULL); v.type = jbvNumeric; v.val.numeric = DatumGetNumeric(DirectFunctionCall3(numeric_in, CStringGetDatum(token), 0, -1)); v.estSize += VARSIZE_ANY(v.val.numeric) +sizeof(JEntry) /* alignment */ ; break; case JSON_TOKEN_TRUE: v.type = jbvBool; v.val.boolean = true; break; case JSON_TOKEN_FALSE: v.type = jbvBool; v.val.boolean = false; break; case JSON_TOKEN_NULL: v.type = jbvNull; break; default: /* should not be possible */ elog(ERROR, "invalid json token type"); break; } if (_state->parseState == NULL) { /* single scalar */ JsonbValue va; va.type = jbvArray; va.val.array.rawScalar = true; va.val.array.nElems = 1; _state->res = pushJsonbValue(&_state->parseState, WJB_BEGIN_ARRAY, &va); _state->res = pushJsonbValue(&_state->parseState, WJB_ELEM, &v); _state->res = pushJsonbValue(&_state->parseState, WJB_END_ARRAY, NULL); } else { JsonbValue *o = &_state->parseState->contVal; switch (o->type) { case jbvArray: _state->res = pushJsonbValue(&_state->parseState, WJB_ELEM, &v); break; case jbvObject: _state->res = pushJsonbValue(&_state->parseState, WJB_VALUE, &v); break; default: elog(ERROR, "unexpected parent of nested structure"); } } }
/* * tuple_data_split_internal * * Split raw tuple data taken directly from a page into an array of bytea * elements. This routine does a lookup on NULL values and creates array * elements accordingly. This is a reimplementation of nocachegetattr() * in heaptuple.c simplified for educational purposes. */ static Datum tuple_data_split_internal(Oid relid, char *tupdata, uint16 tupdata_len, uint16 t_infomask, uint16 t_infomask2, bits8 *t_bits, bool do_detoast) { ArrayBuildState *raw_attrs; int nattrs; int i; int off = 0; Relation rel; TupleDesc tupdesc; /* Get tuple descriptor from relation OID */ rel = relation_open(relid, AccessShareLock); tupdesc = RelationGetDescr(rel); raw_attrs = initArrayResult(BYTEAOID, CurrentMemoryContext, false); nattrs = tupdesc->natts; if (nattrs < (t_infomask2 & HEAP_NATTS_MASK)) ereport(ERROR, (errcode(ERRCODE_DATA_CORRUPTED), errmsg("number of attributes in tuple header is greater than number of attributes in tuple descriptor"))); for (i = 0; i < nattrs; i++) { Form_pg_attribute attr; bool is_null; bytea *attr_data = NULL; attr = TupleDescAttr(tupdesc, i); /* * Tuple header can specify less attributes than tuple descriptor as * ALTER TABLE ADD COLUMN without DEFAULT keyword does not actually * change tuples in pages, so attributes with numbers greater than * (t_infomask2 & HEAP_NATTS_MASK) should be treated as NULL. */ if (i >= (t_infomask2 & HEAP_NATTS_MASK)) is_null = true; else is_null = (t_infomask & HEAP_HASNULL) && att_isnull(i, t_bits); if (!is_null) { int len; if (attr->attlen == -1) { off = att_align_pointer(off, attr->attalign, -1, tupdata + off); /* * As VARSIZE_ANY throws an exception if it can't properly * detect the type of external storage in macros VARTAG_SIZE, * this check is repeated to have a nicer error handling. */ if (VARATT_IS_EXTERNAL(tupdata + off) && !VARATT_IS_EXTERNAL_ONDISK(tupdata + off) && !VARATT_IS_EXTERNAL_INDIRECT(tupdata + off)) ereport(ERROR, (errcode(ERRCODE_DATA_CORRUPTED), errmsg("first byte of varlena attribute is incorrect for attribute %d", i))); len = VARSIZE_ANY(tupdata + off); } else { off = att_align_nominal(off, attr->attalign); len = attr->attlen; } if (tupdata_len < off + len) ereport(ERROR, (errcode(ERRCODE_DATA_CORRUPTED), errmsg("unexpected end of tuple data"))); if (attr->attlen == -1 && do_detoast) attr_data = DatumGetByteaPCopy(tupdata + off); else { attr_data = (bytea *) palloc(len + VARHDRSZ); SET_VARSIZE(attr_data, len + VARHDRSZ); memcpy(VARDATA(attr_data), tupdata + off, len); } off = att_addlength_pointer(off, attr->attlen, tupdata + off); } raw_attrs = accumArrayResult(raw_attrs, PointerGetDatum(attr_data), is_null, BYTEAOID, CurrentMemoryContext); if (attr_data) pfree(attr_data); } if (tupdata_len != off) ereport(ERROR, (errcode(ERRCODE_DATA_CORRUPTED), errmsg("end of tuple reached without looking at all its data"))); relation_close(rel, AccessShareLock); return makeArrayResult(raw_attrs, CurrentMemoryContext); }
Datum hstore_to_jsonb_loose(PG_FUNCTION_ARGS) { HStore *in = PG_GETARG_HS(0); int i; int count = HS_COUNT(in); char *base = STRPTR(in); HEntry *entries = ARRPTR(in); JsonbParseState *state = NULL; JsonbValue *res; StringInfoData tmp; bool is_number; initStringInfo(&tmp); res = pushJsonbValue(&state, WJB_BEGIN_OBJECT, NULL); for (i = 0; i < count; i++) { JsonbValue key, val; key.estSize = sizeof(JEntry); key.type = jbvString; key.val.string.len = HS_KEYLEN(entries, i); key.val.string.val = pnstrdup(HS_KEY(entries, base, i), key.val.string.len); key.estSize += key.val.string.len; res = pushJsonbValue(&state, WJB_KEY, &key); val.estSize = sizeof(JEntry); if (HS_VALISNULL(entries, i)) { val.type = jbvNull; } /* guess that values of 't' or 'f' are booleans */ else if (HS_VALLEN(entries, i) == 1 && *(HS_VAL(entries, base, i)) == 't') { val.type = jbvBool; val.val.boolean = true; } else if (HS_VALLEN(entries, i) == 1 && *(HS_VAL(entries, base, i)) == 'f') { val.type = jbvBool; val.val.boolean = false; } else { is_number = false; resetStringInfo(&tmp); appendBinaryStringInfo(&tmp, HS_VAL(entries, base, i), HS_VALLEN(entries, i)); /* * don't treat something with a leading zero followed by another * digit as numeric - could be a zip code or similar */ if (tmp.len > 0 && !(tmp.data[0] == '0' && isdigit((unsigned char) tmp.data[1])) && strspn(tmp.data, "+-0123456789Ee.") == tmp.len) { /* * might be a number. See if we can input it as a numeric * value. Ignore any actual parsed value. */ char *endptr = "junk"; long lval; lval = strtol(tmp.data, &endptr, 10); (void) lval; if (*endptr == '\0') { /* * strol man page says this means the whole string is * valid */ is_number = true; } else { /* not an int - try a double */ double dval; dval = strtod(tmp.data, &endptr); (void) dval; if (*endptr == '\0') is_number = true; } } if (is_number) { val.type = jbvNumeric; val.val.numeric = DatumGetNumeric( DirectFunctionCall3(numeric_in, CStringGetDatum(tmp.data), 0, -1)); val.estSize += VARSIZE_ANY(val.val.numeric) +sizeof(JEntry); } else { val.estSize = sizeof(JEntry); val.type = jbvString; val.val.string.len = HS_VALLEN(entries, i); val.val.string.val = pnstrdup(HS_VAL(entries, base, i), val.val.string.len); val.estSize += val.val.string.len; } } res = pushJsonbValue(&state, WJB_VALUE, &val); } res = pushJsonbValue(&state, WJB_END_OBJECT, NULL); PG_RETURN_POINTER(JsonbValueToJsonb(res)); }
/* ---------- * toast_insert_or_update - * * Delete no-longer-used toast-entries and create new ones to * make the new tuple fit on INSERT or UPDATE * * Inputs: * newtup: the candidate new tuple to be inserted * oldtup: the old row version for UPDATE, or NULL for INSERT * options: options to be passed to heap_insert() for toast rows * Result: * either newtup if no toasting is needed, or a palloc'd modified tuple * that is what should actually get stored * * NOTE: neither newtup nor oldtup will be modified. This is a change * from the pre-8.1 API of this routine. * ---------- */ HeapTuple toast_insert_or_update(Relation rel, HeapTuple newtup, HeapTuple oldtup, int options) { HeapTuple result_tuple; TupleDesc tupleDesc; Form_pg_attribute *att; int numAttrs; int i; bool need_change = false; bool need_free = false; bool need_delold = false; bool has_nulls = false; Size maxDataLen; Size hoff; char toast_action[MaxHeapAttributeNumber]; bool toast_isnull[MaxHeapAttributeNumber]; bool toast_oldisnull[MaxHeapAttributeNumber]; Datum toast_values[MaxHeapAttributeNumber]; Datum toast_oldvalues[MaxHeapAttributeNumber]; int32 toast_sizes[MaxHeapAttributeNumber]; bool toast_free[MaxHeapAttributeNumber]; bool toast_delold[MaxHeapAttributeNumber]; /* * We should only ever be called for tuples of plain relations --- * recursing on a toast rel is bad news. */ Assert(rel->rd_rel->relkind == RELKIND_RELATION); /* * Get the tuple descriptor and break down the tuple(s) into fields. */ tupleDesc = rel->rd_att; att = tupleDesc->attrs; numAttrs = tupleDesc->natts; Assert(numAttrs <= MaxHeapAttributeNumber); heap_deform_tuple(newtup, tupleDesc, toast_values, toast_isnull); if (oldtup != NULL) heap_deform_tuple(oldtup, tupleDesc, toast_oldvalues, toast_oldisnull); /* ---------- * Then collect information about the values given * * NOTE: toast_action[i] can have these values: * ' ' default handling * 'p' already processed --- don't touch it * 'x' incompressible, but OK to move off * * NOTE: toast_sizes[i] is only made valid for varlena attributes with * toast_action[i] different from 'p'. * ---------- */ memset(toast_action, ' ', numAttrs * sizeof(char)); memset(toast_free, 0, numAttrs * sizeof(bool)); memset(toast_delold, 0, numAttrs * sizeof(bool)); for (i = 0; i < numAttrs; i++) { struct varlena *old_value; struct varlena *new_value; if (oldtup != NULL) { /* * For UPDATE get the old and new values of this attribute */ old_value = (struct varlena *) DatumGetPointer(toast_oldvalues[i]); new_value = (struct varlena *) DatumGetPointer(toast_values[i]); /* * If the old value is an external stored one, check if it has * changed so we have to delete it later. */ if (att[i]->attlen == -1 && !toast_oldisnull[i] && VARATT_IS_EXTERNAL(old_value)) { if (toast_isnull[i] || !VARATT_IS_EXTERNAL(new_value) || memcmp((char *) old_value, (char *) new_value, VARSIZE_EXTERNAL(old_value)) != 0) { /* * The old external stored value isn't needed any more * after the update */ toast_delold[i] = true; need_delold = true; } else { /* * This attribute isn't changed by this update so we reuse * the original reference to the old value in the new * tuple. */ toast_action[i] = 'p'; continue; } } } else { /* * For INSERT simply get the new value */ new_value = (struct varlena *) DatumGetPointer(toast_values[i]); } /* * Handle NULL attributes */ if (toast_isnull[i]) { toast_action[i] = 'p'; has_nulls = true; continue; } /* * Now look at varlena attributes */ if (att[i]->attlen == -1) { /* * If the table's attribute says PLAIN always, force it so. */ if (att[i]->attstorage == 'p') toast_action[i] = 'p'; /* * We took care of UPDATE above, so any external value we find * still in the tuple must be someone else's we cannot reuse. * Fetch it back (without decompression, unless we are forcing * PLAIN storage). If necessary, we'll push it out as a new * external value below. */ if (VARATT_IS_EXTERNAL(new_value)) { if (att[i]->attstorage == 'p') new_value = heap_tuple_untoast_attr(new_value); else new_value = heap_tuple_fetch_attr(new_value); toast_values[i] = PointerGetDatum(new_value); toast_free[i] = true; need_change = true; need_free = true; } /* * Remember the size of this attribute */ toast_sizes[i] = VARSIZE_ANY(new_value); } else { /* * Not a varlena attribute, plain storage always */ toast_action[i] = 'p'; } } /* ---------- * Compress and/or save external until data fits into target length * * 1: Inline compress attributes with attstorage 'x', and store very * large attributes with attstorage 'x' or 'e' external immediately * 2: Store attributes with attstorage 'x' or 'e' external * 3: Inline compress attributes with attstorage 'm' * 4: Store attributes with attstorage 'm' external * ---------- */ /* compute header overhead --- this should match heap_form_tuple() */ hoff = offsetof(HeapTupleHeaderData, t_bits); if (has_nulls) hoff += BITMAPLEN(numAttrs); if (newtup->t_data->t_infomask & HEAP_HASOID) hoff += sizeof(Oid); hoff = MAXALIGN(hoff); Assert(hoff == newtup->t_data->t_hoff); /* now convert to a limit on the tuple data size */ maxDataLen = TOAST_TUPLE_TARGET - hoff; /* * Look for attributes with attstorage 'x' to compress. Also find large * attributes with attstorage 'x' or 'e', and store them external. */ while (heap_compute_data_size(tupleDesc, toast_values, toast_isnull) > maxDataLen) { int biggest_attno = -1; int32 biggest_size = MAXALIGN(TOAST_POINTER_SIZE); Datum old_value; Datum new_value; /* * Search for the biggest yet unprocessed internal attribute */ for (i = 0; i < numAttrs; i++) { if (toast_action[i] != ' ') continue; if (VARATT_IS_EXTERNAL(DatumGetPointer(toast_values[i]))) continue; /* can't happen, toast_action would be 'p' */ if (VARATT_IS_COMPRESSED(DatumGetPointer(toast_values[i]))) continue; if (att[i]->attstorage != 'x' && att[i]->attstorage != 'e') continue; if (toast_sizes[i] > biggest_size) { biggest_attno = i; biggest_size = toast_sizes[i]; } } if (biggest_attno < 0) break; /* * Attempt to compress it inline, if it has attstorage 'x' */ i = biggest_attno; if (att[i]->attstorage == 'x') { old_value = toast_values[i]; new_value = toast_compress_datum(old_value); if (DatumGetPointer(new_value) != NULL) { /* successful compression */ if (toast_free[i]) pfree(DatumGetPointer(old_value)); toast_values[i] = new_value; toast_free[i] = true; toast_sizes[i] = VARSIZE(DatumGetPointer(toast_values[i])); need_change = true; need_free = true; } else { /* incompressible, ignore on subsequent compression passes */ toast_action[i] = 'x'; } } else { /* has attstorage 'e', ignore on subsequent compression passes */ toast_action[i] = 'x'; } /* * If this value is by itself more than maxDataLen (after compression * if any), push it out to the toast table immediately, if possible. * This avoids uselessly compressing other fields in the common case * where we have one long field and several short ones. * * XXX maybe the threshold should be less than maxDataLen? */ if (toast_sizes[i] > maxDataLen && rel->rd_rel->reltoastrelid != InvalidOid) { old_value = toast_values[i]; toast_action[i] = 'p'; toast_values[i] = toast_save_datum(rel, toast_values[i], options); if (toast_free[i]) pfree(DatumGetPointer(old_value)); toast_free[i] = true; need_change = true; need_free = true; } } /* * Second we look for attributes of attstorage 'x' or 'e' that are still * inline. But skip this if there's no toast table to push them to. */ while (heap_compute_data_size(tupleDesc, toast_values, toast_isnull) > maxDataLen && rel->rd_rel->reltoastrelid != InvalidOid) { int biggest_attno = -1; int32 biggest_size = MAXALIGN(TOAST_POINTER_SIZE); Datum old_value; /*------ * Search for the biggest yet inlined attribute with * attstorage equals 'x' or 'e' *------ */ for (i = 0; i < numAttrs; i++) { if (toast_action[i] == 'p') continue; if (VARATT_IS_EXTERNAL(DatumGetPointer(toast_values[i]))) continue; /* can't happen, toast_action would be 'p' */ if (att[i]->attstorage != 'x' && att[i]->attstorage != 'e') continue; if (toast_sizes[i] > biggest_size) { biggest_attno = i; biggest_size = toast_sizes[i]; } } if (biggest_attno < 0) break; /* * Store this external */ i = biggest_attno; old_value = toast_values[i]; toast_action[i] = 'p'; toast_values[i] = toast_save_datum(rel, toast_values[i], options); if (toast_free[i]) pfree(DatumGetPointer(old_value)); toast_free[i] = true; need_change = true; need_free = true; } /* * Round 3 - this time we take attributes with storage 'm' into * compression */ while (heap_compute_data_size(tupleDesc, toast_values, toast_isnull) > maxDataLen) { int biggest_attno = -1; int32 biggest_size = MAXALIGN(TOAST_POINTER_SIZE); Datum old_value; Datum new_value; /* * Search for the biggest yet uncompressed internal attribute */ for (i = 0; i < numAttrs; i++) { if (toast_action[i] != ' ') continue; if (VARATT_IS_EXTERNAL(DatumGetPointer(toast_values[i]))) continue; /* can't happen, toast_action would be 'p' */ if (VARATT_IS_COMPRESSED(DatumGetPointer(toast_values[i]))) continue; if (att[i]->attstorage != 'm') continue; if (toast_sizes[i] > biggest_size) { biggest_attno = i; biggest_size = toast_sizes[i]; } } if (biggest_attno < 0) break; /* * Attempt to compress it inline */ i = biggest_attno; old_value = toast_values[i]; new_value = toast_compress_datum(old_value); if (DatumGetPointer(new_value) != NULL) { /* successful compression */ if (toast_free[i]) pfree(DatumGetPointer(old_value)); toast_values[i] = new_value; toast_free[i] = true; toast_sizes[i] = VARSIZE(DatumGetPointer(toast_values[i])); need_change = true; need_free = true; } else { /* incompressible, ignore on subsequent compression passes */ toast_action[i] = 'x'; } } /* * Finally we store attributes of type 'm' externally. At this point we * increase the target tuple size, so that 'm' attributes aren't stored * externally unless really necessary. */ maxDataLen = TOAST_TUPLE_TARGET_MAIN - hoff; while (heap_compute_data_size(tupleDesc, toast_values, toast_isnull) > maxDataLen && rel->rd_rel->reltoastrelid != InvalidOid) { int biggest_attno = -1; int32 biggest_size = MAXALIGN(TOAST_POINTER_SIZE); Datum old_value; /*-------- * Search for the biggest yet inlined attribute with * attstorage = 'm' *-------- */ for (i = 0; i < numAttrs; i++) { if (toast_action[i] == 'p') continue; if (VARATT_IS_EXTERNAL(DatumGetPointer(toast_values[i]))) continue; /* can't happen, toast_action would be 'p' */ if (att[i]->attstorage != 'm') continue; if (toast_sizes[i] > biggest_size) { biggest_attno = i; biggest_size = toast_sizes[i]; } } if (biggest_attno < 0) break; /* * Store this external */ i = biggest_attno; old_value = toast_values[i]; toast_action[i] = 'p'; toast_values[i] = toast_save_datum(rel, toast_values[i], options); if (toast_free[i]) pfree(DatumGetPointer(old_value)); toast_free[i] = true; need_change = true; need_free = true; } /* * In the case we toasted any values, we need to build a new heap tuple * with the changed values. */ if (need_change) { HeapTupleHeader olddata = newtup->t_data; HeapTupleHeader new_data; int32 new_len; int32 new_data_len; /* * Calculate the new size of the tuple. Header size should not * change, but data size might. */ new_len = offsetof(HeapTupleHeaderData, t_bits); if (has_nulls) new_len += BITMAPLEN(numAttrs); if (olddata->t_infomask & HEAP_HASOID) new_len += sizeof(Oid); new_len = MAXALIGN(new_len); Assert(new_len == olddata->t_hoff); new_data_len = heap_compute_data_size(tupleDesc, toast_values, toast_isnull); new_len += new_data_len; /* * Allocate and zero the space needed, and fill HeapTupleData fields. */ result_tuple = (HeapTuple) palloc0(HEAPTUPLESIZE + new_len); result_tuple->t_len = new_len; result_tuple->t_self = newtup->t_self; result_tuple->t_tableOid = newtup->t_tableOid; new_data = (HeapTupleHeader) ((char *) result_tuple + HEAPTUPLESIZE); result_tuple->t_data = new_data; /* * Put the existing tuple header and the changed values into place */ memcpy(new_data, olddata, olddata->t_hoff); heap_fill_tuple(tupleDesc, toast_values, toast_isnull, (char *) new_data + olddata->t_hoff, new_data_len, &(new_data->t_infomask), has_nulls ? new_data->t_bits : NULL); } else result_tuple = newtup; /* * Free allocated temp values */ if (need_free) for (i = 0; i < numAttrs; i++) if (toast_free[i]) pfree(DatumGetPointer(toast_values[i])); /* * Delete external values from the old tuple */ if (need_delold) for (i = 0; i < numAttrs; i++) if (toast_delold[i]) toast_delete_datum(rel, toast_oldvalues[i]); return result_tuple; }
/* * Write a tuple to the outputstream, in the most efficient format possible. */ static void pglogical_write_tuple(StringInfo out, PGLogicalOutputData *data, Relation rel, HeapTuple tuple) { TupleDesc desc; Datum values[MaxTupleAttributeNumber]; bool isnull[MaxTupleAttributeNumber]; int i; uint16 nliveatts = 0; desc = RelationGetDescr(rel); pq_sendbyte(out, 'T'); /* sending TUPLE */ for (i = 0; i < desc->natts; i++) { if (desc->attrs[i]->attisdropped) continue; nliveatts++; } pq_sendint(out, nliveatts, 2); /* try to allocate enough memory from the get go */ enlargeStringInfo(out, tuple->t_len + nliveatts * (1 + 4)); /* * XXX: should this prove to be a relevant bottleneck, it might be * interesting to inline heap_deform_tuple() here, we don't actually need * the information in the form we get from it. */ heap_deform_tuple(tuple, desc, values, isnull); for (i = 0; i < desc->natts; i++) { HeapTuple typtup; Form_pg_type typclass; Form_pg_attribute att = desc->attrs[i]; char transfer_type; /* skip dropped columns */ if (att->attisdropped) continue; if (isnull[i]) { pq_sendbyte(out, 'n'); /* null column */ continue; } else if (att->attlen == -1 && VARATT_IS_EXTERNAL_ONDISK(values[i])) { pq_sendbyte(out, 'u'); /* unchanged toast column */ continue; } typtup = SearchSysCache1(TYPEOID, ObjectIdGetDatum(att->atttypid)); if (!HeapTupleIsValid(typtup)) elog(ERROR, "cache lookup failed for type %u", att->atttypid); typclass = (Form_pg_type) GETSTRUCT(typtup); transfer_type = decide_datum_transfer(att, typclass, data->allow_internal_basetypes, data->allow_binary_basetypes); pq_sendbyte(out, transfer_type); switch (transfer_type) { case 'b': /* internal-format binary data follows */ /* pass by value */ if (att->attbyval) { pq_sendint(out, att->attlen, 4); /* length */ enlargeStringInfo(out, att->attlen); store_att_byval(out->data + out->len, values[i], att->attlen); out->len += att->attlen; out->data[out->len] = '\0'; } /* fixed length non-varlena pass-by-reference type */ else if (att->attlen > 0) { pq_sendint(out, att->attlen, 4); /* length */ appendBinaryStringInfo(out, DatumGetPointer(values[i]), att->attlen); } /* varlena type */ else if (att->attlen == -1) { char *data = DatumGetPointer(values[i]); /* send indirect datums inline */ if (VARATT_IS_EXTERNAL_INDIRECT(values[i])) { struct varatt_indirect redirect; VARATT_EXTERNAL_GET_POINTER(redirect, data); data = (char *) redirect.pointer; } Assert(!VARATT_IS_EXTERNAL(data)); pq_sendint(out, VARSIZE_ANY(data), 4); /* length */ appendBinaryStringInfo(out, data, VARSIZE_ANY(data)); } else elog(ERROR, "unsupported tuple type"); break; case 's': /* binary send/recv data follows */ { bytea *outputbytes; int len; outputbytes = OidSendFunctionCall(typclass->typsend, values[i]); len = VARSIZE(outputbytes) - VARHDRSZ; pq_sendint(out, len, 4); /* length */ pq_sendbytes(out, VARDATA(outputbytes), len); /* data */ pfree(outputbytes); } break; default: { char *outputstr; int len; outputstr = OidOutputFunctionCall(typclass->typoutput, values[i]); len = strlen(outputstr) + 1; pq_sendint(out, len, 4); /* length */ appendBinaryStringInfo(out, outputstr, len); /* data */ pfree(outputstr); } } ReleaseSysCache(typtup); } }
static int32 copyJsQuery(StringInfo buf, JsQueryItem *jsq) { JsQueryItem elem; int32 next, chld; int32 resPos = buf->len - VARHDRSZ; /* position from begining of jsquery data */ check_stack_depth(); Assert((jsq->type & jsq->hint) == 0); Assert((jsq->type & JSQ_HINT_MASK) == 0); appendStringInfoChar(buf, (char)(jsq->type | jsq->hint)); alignStringInfoInt(buf); next = (jsqGetNext(jsq, NULL)) ? buf->len : 0; appendBinaryStringInfo(buf, (char*)&next /* fake value */, sizeof(next)); switch(jsq->type) { case jqiKey: case jqiString: { int32 len; char *s; s = jsqGetString(jsq, &len); appendBinaryStringInfo(buf, (char*)&len, sizeof(len)); appendBinaryStringInfo(buf, s, len + 1 /* \0 */); } break; case jqiNumeric: { Numeric n = jsqGetNumeric(jsq); appendBinaryStringInfo(buf, (char*)n, VARSIZE_ANY(n)); } break; case jqiBool: { bool v = jsqGetBool(jsq); appendBinaryStringInfo(buf, (char*)&v, 1); } break; case jqiArray: { int32 i, arrayStart; appendBinaryStringInfo(buf, (char*)&jsq->array.nelems, sizeof(jsq->array.nelems)); arrayStart = buf->len; /* reserve place for "pointers" to array's elements */ for(i=0; i<jsq->array.nelems; i++) appendBinaryStringInfo(buf, (char*)&i /* fake value */, sizeof(i)); while(jsqIterateArray(jsq, &elem)) { chld = copyJsQuery(buf, &elem); *(int32*)(buf->data + arrayStart + i * sizeof(i)) = chld; i++; } } break; case jqiAnd: case jqiOr: { int32 leftOut, rightOut; leftOut = buf->len; appendBinaryStringInfo(buf, (char*)&leftOut /* fake value */, sizeof(leftOut)); rightOut = buf->len; appendBinaryStringInfo(buf, (char*)&rightOut /* fake value */, sizeof(rightOut)); jsqGetLeftArg(jsq, &elem); chld = copyJsQuery(buf, &elem); *(int32*)(buf->data + leftOut) = chld; jsqGetRightArg(jsq, &elem); chld = copyJsQuery(buf, &elem); *(int32*)(buf->data + rightOut) = chld; } break; case jqiEqual: case jqiIn: case jqiLess: case jqiGreater: case jqiLessOrEqual: case jqiGreaterOrEqual: case jqiContains: case jqiContained: case jqiOverlap: case jqiNot: { int32 argOut = buf->len; appendBinaryStringInfo(buf, (char*)&argOut /* fake value */, sizeof(argOut)); jsqGetArg(jsq, &elem); chld = copyJsQuery(buf, &elem); *(int32*)(buf->data + argOut) = chld; } break; case jqiNull: case jqiCurrent: case jqiLength: case jqiAny: case jqiAnyArray: case jqiAnyKey: case jqiAll: case jqiAllArray: case jqiAllKey: break; default: elog(ERROR, "Unknown type: %d", jsq->type); } if (jsqGetNext(jsq, &elem)) *(int32*)(buf->data + next) = copyJsQuery(buf, &elem); return resPos; }
int write2sqlite(char *sqlitedb_name,char *dataset_name, char *sql_string, char *twkb_name,char *id_name,char *idx_geom,char *idx_tbl, char *idx_id, int create) { char *err_msg; int spi_conn; int proc, rc; /*Sqlite*/ sqlite3 *db; TupleDesc tupdesc; SPITupleTable *tuptable; HeapTuple tuple; int i, j; SPIPlanPtr plan; char insert_str[SQLSTRLEN]; Portal cur; void *val_p; int val_int; int64 val_int64; float8 val_float; bool null_check; char *pg_type; int tot_rows = 0; sqlite3_stmt *prepared_statement; spi_conn = SPI_connect(); if (spi_conn!=SPI_OK_CONNECT) ereport(ERROR, ( errmsg("Failed to open SPI Connection"))); /*Open the sqlite db to write to*/ rc = sqlite3_open(sqlitedb_name, &db); if (rc != SQLITE_OK) { sqlite3_close(db); ereport(ERROR, ( errmsg("Cannot open SQLite database"))); } plan = SPI_prepare(sql_string,0,NULL); //ret = SPI_exec(sql_string, 0); cur = SPI_cursor_open("our_cursor", plan,NULL,NULL,true); elog(INFO, "build sql-strings and create table if : %d",create); create_sqlite_table(&cur,db, insert_str,dataset_name,twkb_name, id_name,create); elog(INFO, "back from creating table"); elog(INFO, "inserted sql = %s",insert_str); //TODO add error handling sqlite3_prepare_v2(db,insert_str,strlen(insert_str), &prepared_statement,NULL); do { sqlite3_exec(db, "BEGIN TRANSACTION", NULL, NULL, &err_msg); SPI_cursor_fetch(cur, true,10000); proc = SPI_processed; tot_rows += proc; // if (ret > 0 && SPI_tuptable != NULL) // { tupdesc = SPI_tuptable->tupdesc; tuptable = SPI_tuptable; for (j = 0; j < proc; j++) { tuple = tuptable->vals[j]; for (i = 1; i <= tupdesc->natts; i++) { pg_type = SPI_gettype(tupdesc, i); if(strcmp(pg_type, "bool")==0) { val_int = (bool) (DatumGetBool(SPI_getbinval(tuple,tupdesc,i, &null_check)) ? 1:0); if(null_check) sqlite3_bind_null(prepared_statement, i); else sqlite3_bind_int(prepared_statement, i,(int) val_int); } if(strcmp(pg_type, "int2")==0) { val_int = (int) DatumGetInt16(SPI_getbinval(tuple,tupdesc,i, &null_check)); //TODO add error handling if(null_check) sqlite3_bind_null(prepared_statement, i); else sqlite3_bind_int(prepared_statement, i,val_int); } else if(strcmp(pg_type, "int4")==0) { val_int = (int) DatumGetInt32(SPI_getbinval(tuple,tupdesc,i, &null_check)); //TODO add error handling if(null_check) sqlite3_bind_null(prepared_statement, i); else sqlite3_bind_int(prepared_statement, i,val_int); } else if(strcmp(pg_type, "int8")==0) { val_int64 = (int64) DatumGetInt64(SPI_getbinval(tuple,tupdesc,i, &null_check)); //TODO add error handling if(null_check) sqlite3_bind_null(prepared_statement, i); else sqlite3_bind_int64(prepared_statement, i,val_int64); } else if(strcmp(pg_type, "float4")==0) { val_float = (float8) DatumGetFloat4(SPI_getbinval(tuple,tupdesc,i, &null_check)); //TODO add error handling if(null_check) sqlite3_bind_null(prepared_statement, i); else sqlite3_bind_double(prepared_statement, i,val_float); } else if(strcmp(pg_type, "float8")==0) { val_float = (float8) DatumGetFloat8(SPI_getbinval(tuple,tupdesc,i, &null_check)); //TODO add error handling if(null_check) sqlite3_bind_null(prepared_statement, i); else sqlite3_bind_double(prepared_statement, i,val_float); } else if(strcmp(pg_type, "bytea")==0) { val_p = (void*) PG_DETOAST_DATUM(SPI_getbinval(tuple,tupdesc,i, &null_check)); //TODO add error handling if(null_check) sqlite3_bind_null(prepared_statement, i); else sqlite3_bind_blob(prepared_statement, i, (const void*) VARDATA_ANY(val_p), VARSIZE_ANY(val_p)-VARHDRSZ, SQLITE_TRANSIENT); } else { // val = (void*) PG_DETOAST_DATUM(SPI_getbinval(tuple,tupdesc,i, &null_check)); //TODO add error handling sqlite3_bind_text(prepared_statement,i,SPI_getvalue(tuple, tupdesc, i),-1,NULL); } } sqlite3_step(prepared_statement); sqlite3_clear_bindings(prepared_statement); sqlite3_reset(prepared_statement); } sqlite3_exec(db, "END TRANSACTION", NULL, NULL, &err_msg); elog(INFO, "inserted %d rows in table",tot_rows); } while (proc > 0); if(dataset_name && idx_geom && idx_id) create_spatial_index(db,dataset_name,idx_tbl, idx_geom, idx_id, sql_string,create); else elog(INFO, "Finnishing without spatial index"); SPI_finish(); sqlite3_close(db); return 0; }
Datum make_tuple_indirect(PG_FUNCTION_ARGS) { HeapTupleHeader rec = PG_GETARG_HEAPTUPLEHEADER(0); HeapTupleData tuple; int ncolumns; Datum *values; bool *nulls; Oid tupType; int32 tupTypmod; TupleDesc tupdesc; HeapTuple newtup; int i; MemoryContext old_context; /* Extract type info from the tuple itself */ tupType = HeapTupleHeaderGetTypeId(rec); tupTypmod = HeapTupleHeaderGetTypMod(rec); tupdesc = lookup_rowtype_tupdesc(tupType, tupTypmod); ncolumns = tupdesc->natts; /* Build a temporary HeapTuple control structure */ tuple.t_len = HeapTupleHeaderGetDatumLength(rec); ItemPointerSetInvalid(&(tuple.t_self)); tuple.t_tableOid = InvalidOid; tuple.t_data = rec; values = (Datum *) palloc(ncolumns * sizeof(Datum)); nulls = (bool *) palloc(ncolumns * sizeof(bool)); heap_deform_tuple(&tuple, tupdesc, values, nulls); old_context = MemoryContextSwitchTo(TopTransactionContext); for (i = 0; i < ncolumns; i++) { struct varlena *attr; struct varlena *new_attr; struct varatt_indirect redirect_pointer; /* only work on existing, not-null varlenas */ if (tupdesc->attrs[i]->attisdropped || nulls[i] || tupdesc->attrs[i]->attlen != -1) continue; attr = (struct varlena *) DatumGetPointer(values[i]); /* don't recursively indirect */ if (VARATT_IS_EXTERNAL_INDIRECT(attr)) continue; /* copy datum, so it still lives later */ if (VARATT_IS_EXTERNAL_ONDISK(attr)) attr = heap_tuple_fetch_attr(attr); else { struct varlena *oldattr = attr; attr = palloc0(VARSIZE_ANY(oldattr)); memcpy(attr, oldattr, VARSIZE_ANY(oldattr)); } /* build indirection Datum */ new_attr = (struct varlena *) palloc0(INDIRECT_POINTER_SIZE); redirect_pointer.pointer = attr; SET_VARTAG_EXTERNAL(new_attr, VARTAG_INDIRECT); memcpy(VARDATA_EXTERNAL(new_attr), &redirect_pointer, sizeof(redirect_pointer)); values[i] = PointerGetDatum(new_attr); } newtup = heap_form_tuple(tupdesc, values, nulls); pfree(values); pfree(nulls); ReleaseTupleDesc(tupdesc); MemoryContextSwitchTo(old_context); PG_RETURN_HEAPTUPLEHEADER(newtup->t_data); }
static text * do_like_escape(text *pat, text *esc) { text *result; char *p, *e, *r; int plen, elen; bool afterescape; p = VARDATA_ANY(pat); plen = VARSIZE_ANY_EXHDR(pat); e = VARDATA_ANY(esc); elen = VARSIZE_ANY_EXHDR(esc); /* * Worst-case pattern growth is 2x --- unlikely, but it's hardly worth * trying to calculate the size more accurately than that. */ result = (text *) palloc(plen * 2 + VARHDRSZ); r = VARDATA(result); if (elen == 0) { /* * No escape character is wanted. Double any backslashes in the * pattern to make them act like ordinary characters. */ while (plen > 0) { if (*p == '\\') *r++ = '\\'; CopyAdvChar(r, p, plen); } } else { /* * The specified escape must be only a single character. */ NextChar(e, elen); if (elen != 0) ereport(ERROR, (errcode(ERRCODE_INVALID_ESCAPE_SEQUENCE), errmsg("invalid escape string"), errhint("Escape string must be empty or one character."))); e = VARDATA_ANY(esc); /* * If specified escape is '\', just copy the pattern as-is. */ if (*e == '\\') { memcpy(result, pat, VARSIZE_ANY(pat)); return result; } /* * Otherwise, convert occurrences of the specified escape character to * '\', and double occurrences of '\' --- unless they immediately * follow an escape character! */ afterescape = false; while (plen > 0) { if (CHAREQ(p, e) && !afterescape) { *r++ = '\\'; NextChar(p, plen); afterescape = true; } else if (*p == '\\') { *r++ = '\\'; if (!afterescape) *r++ = '\\'; NextChar(p, plen); afterescape = false; } else { CopyAdvChar(r, p, plen); afterescape = false; } } } SET_VARSIZE(result, r - ((char *) result)); return result; }
/* * Reads serialized dependencies into MVDependencies structure. */ MVDependencies * statext_dependencies_deserialize(bytea *data) { int i; Size min_expected_size; MVDependencies *dependencies; char *tmp; if (data == NULL) return NULL; if (VARSIZE_ANY_EXHDR(data) < SizeOfDependencies) elog(ERROR, "invalid MVDependencies size %zd (expected at least %zd)", VARSIZE_ANY_EXHDR(data), SizeOfDependencies); /* read the MVDependencies header */ dependencies = (MVDependencies *) palloc0(sizeof(MVDependencies)); /* initialize pointer to the data part (skip the varlena header) */ tmp = VARDATA_ANY(data); /* read the header fields and perform basic sanity checks */ memcpy(&dependencies->magic, tmp, sizeof(uint32)); tmp += sizeof(uint32); memcpy(&dependencies->type, tmp, sizeof(uint32)); tmp += sizeof(uint32); memcpy(&dependencies->ndeps, tmp, sizeof(uint32)); tmp += sizeof(uint32); if (dependencies->magic != STATS_DEPS_MAGIC) elog(ERROR, "invalid dependency magic %d (expected %d)", dependencies->magic, STATS_DEPS_MAGIC); if (dependencies->type != STATS_DEPS_TYPE_BASIC) elog(ERROR, "invalid dependency type %d (expected %d)", dependencies->type, STATS_DEPS_TYPE_BASIC); if (dependencies->ndeps == 0) ereport(ERROR, (errcode(ERRCODE_DATA_CORRUPTED), errmsg("invalid zero-length item array in MVDependencies"))); /* what minimum bytea size do we expect for those parameters */ min_expected_size = SizeOfDependencies + dependencies->ndeps * (SizeOfDependency + sizeof(AttrNumber) * 2); if (VARSIZE_ANY_EXHDR(data) < min_expected_size) elog(ERROR, "invalid dependencies size %zd (expected at least %zd)", VARSIZE_ANY_EXHDR(data), min_expected_size); /* allocate space for the MCV items */ dependencies = repalloc(dependencies, offsetof(MVDependencies, deps) + (dependencies->ndeps * sizeof(MVDependency *))); for (i = 0; i < dependencies->ndeps; i++) { double degree; AttrNumber k; MVDependency *d; /* degree of validity */ memcpy(°ree, tmp, sizeof(double)); tmp += sizeof(double); /* number of attributes */ memcpy(&k, tmp, sizeof(AttrNumber)); tmp += sizeof(AttrNumber); /* is the number of attributes valid? */ Assert((k >= 2) && (k <= STATS_MAX_DIMENSIONS)); /* now that we know the number of attributes, allocate the dependency */ d = (MVDependency *) palloc0(offsetof(MVDependency, attributes) + (k * sizeof(AttrNumber))); d->degree = degree; d->nattributes = k; /* copy attribute numbers */ memcpy(d->attributes, tmp, sizeof(AttrNumber) * d->nattributes); tmp += sizeof(AttrNumber) * d->nattributes; dependencies->deps[i] = d; /* still within the bytea */ Assert(tmp <= ((char *) data + VARSIZE_ANY(data))); } /* we should have consumed the whole bytea exactly */ Assert(tmp == ((char *) data + VARSIZE_ANY(data))); return dependencies; }
/* * compute_tsvector_stats() -- compute statistics for a tsvector column * * This functions computes statistics that are useful for determining @@ * operations' selectivity, along with the fraction of non-null rows and * average width. * * Instead of finding the most common values, as we do for most datatypes, * we're looking for the most common lexemes. This is more useful, because * there most probably won't be any two rows with the same tsvector and thus * the notion of a MCV is a bit bogus with this datatype. With a list of the * most common lexemes we can do a better job at figuring out @@ selectivity. * * For the same reasons we assume that tsvector columns are unique when * determining the number of distinct values. * * The algorithm used is Lossy Counting, as proposed in the paper "Approximate * frequency counts over data streams" by G. S. Manku and R. Motwani, in * Proceedings of the 28th International Conference on Very Large Data Bases, * Hong Kong, China, August 2002, section 4.2. The paper is available at * http://www.vldb.org/conf/2002/S10P03.pdf * * The Lossy Counting (aka LC) algorithm goes like this: * Let s be the threshold frequency for an item (the minimum frequency we * are interested in) and epsilon the error margin for the frequency. Let D * be a set of triples (e, f, delta), where e is an element value, f is that * element's frequency (actually, its current occurrence count) and delta is * the maximum error in f. We start with D empty and process the elements in * batches of size w. (The batch size is also known as "bucket size" and is * equal to 1/epsilon.) Let the current batch number be b_current, starting * with 1. For each element e we either increment its f count, if it's * already in D, or insert a new triple into D with values (e, 1, b_current * - 1). After processing each batch we prune D, by removing from it all * elements with f + delta <= b_current. After the algorithm finishes we * suppress all elements from D that do not satisfy f >= (s - epsilon) * N, * where N is the total number of elements in the input. We emit the * remaining elements with estimated frequency f/N. The LC paper proves * that this algorithm finds all elements with true frequency at least s, * and that no frequency is overestimated or is underestimated by more than * epsilon. Furthermore, given reasonable assumptions about the input * distribution, the required table size is no more than about 7 times w. * * We set s to be the estimated frequency of the K'th word in a natural * language's frequency table, where K is the target number of entries in * the MCELEM array plus an arbitrary constant, meant to reflect the fact * that the most common words in any language would usually be stopwords * so we will not actually see them in the input. We assume that the * distribution of word frequencies (including the stopwords) follows Zipf's * law with an exponent of 1. * * Assuming Zipfian distribution, the frequency of the K'th word is equal * to 1/(K * H(W)) where H(n) is 1/2 + 1/3 + ... + 1/n and W is the number of * words in the language. Putting W as one million, we get roughly 0.07/K. * Assuming top 10 words are stopwords gives s = 0.07/(K + 10). We set * epsilon = s/10, which gives bucket width w = (K + 10)/0.007 and * maximum expected hashtable size of about 1000 * (K + 10). * * Note: in the above discussion, s, epsilon, and f/N are in terms of a * lexeme's frequency as a fraction of all lexemes seen in the input. * However, what we actually want to store in the finished pg_statistic * entry is each lexeme's frequency as a fraction of all rows that it occurs * in. Assuming that the input tsvectors are correctly constructed, no * lexeme occurs more than once per tsvector, so the final count f is a * correct estimate of the number of input tsvectors it occurs in, and we * need only change the divisor from N to nonnull_cnt to get the number we * want. */ static void compute_tsvector_stats(VacAttrStats *stats, AnalyzeAttrFetchFunc fetchfunc, int samplerows, double totalrows) { int num_mcelem; int null_cnt = 0; double total_width = 0; /* This is D from the LC algorithm. */ HTAB *lexemes_tab; HASHCTL hash_ctl; HASH_SEQ_STATUS scan_status; /* This is the current bucket number from the LC algorithm */ int b_current; /* This is 'w' from the LC algorithm */ int bucket_width; int vector_no, lexeme_no; LexemeHashKey hash_key; TrackItem *item; /* * We want statistics_target * 10 lexemes in the MCELEM array. This * multiplier is pretty arbitrary, but is meant to reflect the fact that * the number of individual lexeme values tracked in pg_statistic ought to * be more than the number of values for a simple scalar column. */ num_mcelem = stats->attr->attstattarget * 10; /* * We set bucket width equal to (num_mcelem + 10) / 0.007 as per the * comment above. */ bucket_width = (num_mcelem + 10) * 1000 / 7; /* * Create the hashtable. It will be in local memory, so we don't need to * worry about overflowing the initial size. Also we don't need to pay any * attention to locking and memory management. */ MemSet(&hash_ctl, 0, sizeof(hash_ctl)); hash_ctl.keysize = sizeof(LexemeHashKey); hash_ctl.entrysize = sizeof(TrackItem); hash_ctl.hash = lexeme_hash; hash_ctl.match = lexeme_match; hash_ctl.hcxt = CurrentMemoryContext; lexemes_tab = hash_create("Analyzed lexemes table", num_mcelem, &hash_ctl, HASH_ELEM | HASH_FUNCTION | HASH_COMPARE | HASH_CONTEXT); /* Initialize counters. */ b_current = 1; lexeme_no = 0; /* Loop over the tsvectors. */ for (vector_no = 0; vector_no < samplerows; vector_no++) { Datum value; bool isnull; TSVector vector; WordEntry *curentryptr; char *lexemesptr; int j; vacuum_delay_point(); value = fetchfunc(stats, vector_no, &isnull); /* * Check for null/nonnull. */ if (isnull) { null_cnt++; continue; } /* * Add up widths for average-width calculation. Since it's a * tsvector, we know it's varlena. As in the regular * compute_minimal_stats function, we use the toasted width for this * calculation. */ total_width += VARSIZE_ANY(DatumGetPointer(value)); /* * Now detoast the tsvector if needed. */ vector = DatumGetTSVector(value); /* * We loop through the lexemes in the tsvector and add them to our * tracking hashtable. */ lexemesptr = STRPTR(vector); curentryptr = ARRPTR(vector); for (j = 0; j < vector->size; j++) { bool found; /* * Construct a hash key. The key points into the (detoasted) * tsvector value at this point, but if a new entry is created, we * make a copy of it. This way we can free the tsvector value * once we've processed all its lexemes. */ hash_key.lexeme = lexemesptr + curentryptr->pos; hash_key.length = curentryptr->len; /* Lookup current lexeme in hashtable, adding it if new */ item = (TrackItem *) hash_search(lexemes_tab, (const void *) &hash_key, HASH_ENTER, &found); if (found) { /* The lexeme is already on the tracking list */ item->frequency++; } else { /* Initialize new tracking list element */ item->frequency = 1; item->delta = b_current - 1; item->key.lexeme = palloc(hash_key.length); memcpy(item->key.lexeme, hash_key.lexeme, hash_key.length); } /* lexeme_no is the number of elements processed (ie N) */ lexeme_no++; /* We prune the D structure after processing each bucket */ if (lexeme_no % bucket_width == 0) { prune_lexemes_hashtable(lexemes_tab, b_current); b_current++; } /* Advance to the next WordEntry in the tsvector */ curentryptr++; } /* If the vector was toasted, free the detoasted copy. */ if (TSVectorGetDatum(vector) != value) pfree(vector); } /* We can only compute real stats if we found some non-null values. */ if (null_cnt < samplerows) { int nonnull_cnt = samplerows - null_cnt; int i; TrackItem **sort_table; int track_len; int cutoff_freq; int minfreq, maxfreq; stats->stats_valid = true; /* Do the simple null-frac and average width stats */ stats->stanullfrac = (double) null_cnt / (double) samplerows; stats->stawidth = total_width / (double) nonnull_cnt; /* Assume it's a unique column (see notes above) */ stats->stadistinct = -1.0 * (1.0 - stats->stanullfrac); /* * Construct an array of the interesting hashtable items, that is, * those meeting the cutoff frequency (s - epsilon)*N. Also identify * the minimum and maximum frequencies among these items. * * Since epsilon = s/10 and bucket_width = 1/epsilon, the cutoff * frequency is 9*N / bucket_width. */ cutoff_freq = 9 * lexeme_no / bucket_width; i = hash_get_num_entries(lexemes_tab); /* surely enough space */ sort_table = (TrackItem **) palloc(sizeof(TrackItem *) * i); hash_seq_init(&scan_status, lexemes_tab); track_len = 0; minfreq = lexeme_no; maxfreq = 0; while ((item = (TrackItem *) hash_seq_search(&scan_status)) != NULL) { if (item->frequency > cutoff_freq) { sort_table[track_len++] = item; minfreq = Min(minfreq, item->frequency); maxfreq = Max(maxfreq, item->frequency); } } Assert(track_len <= i); /* emit some statistics for debug purposes */ elog(DEBUG3, "tsvector_stats: target # mces = %d, bucket width = %d, " "# lexemes = %d, hashtable size = %d, usable entries = %d", num_mcelem, bucket_width, lexeme_no, i, track_len); /* * If we obtained more lexemes than we really want, get rid of those * with least frequencies. The easiest way is to qsort the array into * descending frequency order and truncate the array. */ if (num_mcelem < track_len) { qsort(sort_table, track_len, sizeof(TrackItem *), trackitem_compare_frequencies_desc); /* reset minfreq to the smallest frequency we're keeping */ minfreq = sort_table[num_mcelem - 1]->frequency; } else num_mcelem = track_len; /* Generate MCELEM slot entry */ if (num_mcelem > 0) { MemoryContext old_context; Datum *mcelem_values; float4 *mcelem_freqs; /* * We want to store statistics sorted on the lexeme value using * first length, then byte-for-byte comparison. The reason for * doing length comparison first is that we don't care about the * ordering so long as it's consistent, and comparing lengths * first gives us a chance to avoid a strncmp() call. * * This is different from what we do with scalar statistics -- * they get sorted on frequencies. The rationale is that we * usually search through most common elements looking for a * specific value, so we can grab its frequency. When values are * presorted we can employ binary search for that. See * ts_selfuncs.c for a real usage scenario. */ qsort(sort_table, num_mcelem, sizeof(TrackItem *), trackitem_compare_lexemes); /* Must copy the target values into anl_context */ old_context = MemoryContextSwitchTo(stats->anl_context); /* * We sorted statistics on the lexeme value, but we want to be * able to find out the minimal and maximal frequency without * going through all the values. We keep those two extra * frequencies in two extra cells in mcelem_freqs. * * (Note: the MCELEM statistics slot definition allows for a third * extra number containing the frequency of nulls, but we don't * create that for a tsvector column, since null elements aren't * possible.) */ mcelem_values = (Datum *) palloc(num_mcelem * sizeof(Datum)); mcelem_freqs = (float4 *) palloc((num_mcelem + 2) * sizeof(float4)); /* * See comments above about use of nonnull_cnt as the divisor for * the final frequency estimates. */ for (i = 0; i < num_mcelem; i++) { TrackItem *item = sort_table[i]; mcelem_values[i] = PointerGetDatum(cstring_to_text_with_len(item->key.lexeme, item->key.length)); mcelem_freqs[i] = (double) item->frequency / (double) nonnull_cnt; } mcelem_freqs[i++] = (double) minfreq / (double) nonnull_cnt; mcelem_freqs[i] = (double) maxfreq / (double) nonnull_cnt; MemoryContextSwitchTo(old_context); stats->stakind[0] = STATISTIC_KIND_MCELEM; stats->staop[0] = TextEqualOperator; stats->stacoll[0] = DEFAULT_COLLATION_OID; stats->stanumbers[0] = mcelem_freqs; /* See above comment about two extra frequency fields */ stats->numnumbers[0] = num_mcelem + 2; stats->stavalues[0] = mcelem_values; stats->numvalues[0] = num_mcelem; /* We are storing text values */ stats->statypid[0] = TEXTOID; stats->statyplen[0] = -1; /* typlen, -1 for varlena */ stats->statypbyval[0] = false; stats->statypalign[0] = 'i'; } } else { /* We found only nulls; assume the column is entirely null */ stats->stats_valid = true; stats->stanullfrac = 1.0; stats->stawidth = 0; /* "unknown" */ stats->stadistinct = 0.0; /* "unknown" */ } /* * We don't need to bother cleaning up any of our temporary palloc's. The * hashtable should also go away, as it used a child memory context. */ }
/* * statext_ndistinct_deserialize * Read an on-disk bytea format MVNDistinct to in-memory format */ MVNDistinct * statext_ndistinct_deserialize(bytea *data) { int i; Size minimum_size; MVNDistinct ndist; MVNDistinct *ndistinct; char *tmp; if (data == NULL) return NULL; /* we expect at least the basic fields of MVNDistinct struct */ if (VARSIZE_ANY_EXHDR(data) < SizeOfMVNDistinct) elog(ERROR, "invalid MVNDistinct size %zd (expected at least %zd)", VARSIZE_ANY_EXHDR(data), SizeOfMVNDistinct); /* initialize pointer to the data part (skip the varlena header) */ tmp = VARDATA_ANY(data); /* read the header fields and perform basic sanity checks */ memcpy(&ndist.magic, tmp, sizeof(uint32)); tmp += sizeof(uint32); memcpy(&ndist.type, tmp, sizeof(uint32)); tmp += sizeof(uint32); memcpy(&ndist.nitems, tmp, sizeof(uint32)); tmp += sizeof(uint32); if (ndist.magic != STATS_NDISTINCT_MAGIC) ereport(ERROR, (errcode(ERRCODE_DATA_CORRUPTED), errmsg("invalid ndistinct magic %08x (expected %08x)", ndist.magic, STATS_NDISTINCT_MAGIC))); if (ndist.type != STATS_NDISTINCT_TYPE_BASIC) ereport(ERROR, (errcode(ERRCODE_DATA_CORRUPTED), errmsg("invalid ndistinct type %d (expected %d)", ndist.type, STATS_NDISTINCT_TYPE_BASIC))); if (ndist.nitems == 0) ereport(ERROR, (errcode(ERRCODE_DATA_CORRUPTED), errmsg("invalid zero-length item array in MVNDistinct"))); /* what minimum bytea size do we expect for those parameters */ minimum_size = (SizeOfMVNDistinct + ndist.nitems * (SizeOfMVNDistinctItem + sizeof(AttrNumber) * 2)); if (VARSIZE_ANY_EXHDR(data) < minimum_size) ereport(ERROR, (errcode(ERRCODE_DATA_CORRUPTED), errmsg("invalid MVNDistinct size %zd (expected at least %zd)", VARSIZE_ANY_EXHDR(data), minimum_size))); /* * Allocate space for the ndistinct items (no space for each item's * attnos: those live in bitmapsets allocated separately) */ ndistinct = palloc0(MAXALIGN(SizeOfMVNDistinct) + (ndist.nitems * sizeof(MVNDistinctItem))); ndistinct->magic = ndist.magic; ndistinct->type = ndist.type; ndistinct->nitems = ndist.nitems; for (i = 0; i < ndistinct->nitems; i++) { MVNDistinctItem *item = &ndistinct->items[i]; int nelems; item->attrs = NULL; /* ndistinct value */ memcpy(&item->ndistinct, tmp, sizeof(double)); tmp += sizeof(double); /* number of attributes */ memcpy(&nelems, tmp, sizeof(int)); tmp += sizeof(int); Assert((nelems >= 2) && (nelems <= STATS_MAX_DIMENSIONS)); while (nelems-- > 0) { AttrNumber attno; memcpy(&attno, tmp, sizeof(AttrNumber)); tmp += sizeof(AttrNumber); item->attrs = bms_add_member(item->attrs, attno); } /* still within the bytea */ Assert(tmp <= ((char *) data + VARSIZE_ANY(data))); } /* we should have consumed the whole bytea exactly */ Assert(tmp == ((char *) data + VARSIZE_ANY(data))); return ndistinct; }
int64 datumstreamwrite_lob(DatumStreamWrite * acc, Datum d, AppendOnlyBlockDirectory *blockDirectory, int colGroupNo, bool addColAction) { uint8 *p; int32 varLen; Assert(acc); Assert(acc->datumStreamVersion == DatumStreamVersion_Original || acc->datumStreamVersion == DatumStreamVersion_Dense || acc->datumStreamVersion == DatumStreamVersion_Dense_Enhanced); if (acc->typeInfo.datumlen >= 0) { elog(ERROR, "Large object must be variable length objects (varlena)"); } /* * If the datum is toasted / compressed -- an error. */ if (VARATT_IS_EXTENDED(DatumGetPointer(d))) { elog(ERROR, "Expected large object / variable length objects (varlena) to be de-toasted and/or de-compressed at this point"); } /* * De-Toast Datum */ if (VARATT_IS_EXTERNAL(DatumGetPointer(d))) { d = PointerGetDatum(heap_tuple_fetch_attr(DatumGetPointer(d))); } p = (uint8 *) DatumGetPointer(d); varLen = VARSIZE_ANY(p); if (Debug_datumstream_write_print_large_varlena_info) { datumstreamwrite_print_large_varlena_info( acc, p); } /* Set the BlockFirstRowNum */ AppendOnlyStorageWrite_SetFirstRowNum(&acc->ao_write, acc->blockFirstRowNum); AppendOnlyStorageWrite_Content( &acc->ao_write, p, varLen, AOCSBK_BLOB, /* rowCount */ 1); /* Insert an entry to the block directory */ AppendOnlyBlockDirectory_InsertEntry( blockDirectory, colGroupNo, acc->blockFirstRowNum, AppendOnlyStorageWrite_LogicalBlockStartOffset(&acc->ao_write), 1, /*itemCount -- always just the lob just inserted */ addColAction); return varLen; }
/* ---------------- * printtup --- print a tuple in protocol 3.0 * ---------------- */ static void printtup(TupleTableSlot *slot, DestReceiver *self) { TupleDesc typeinfo = slot->tts_tupleDescriptor; DR_printtup *myState = (DR_printtup *) self; StringInfoData buf; int natts = typeinfo->natts; int i; /* Set or update my derived attribute info, if needed */ if (myState->attrinfo != typeinfo || myState->nattrs != natts) printtup_prepare_info(myState, typeinfo, natts); /* Make sure the tuple is fully deconstructed */ slot_getallattrs(slot); /* * Prepare a DataRow message */ pq_beginmessage(&buf, 'D'); pq_sendint(&buf, natts, 2); /* * send the attributes of this tuple */ for (i = 0; i < natts; ++i) { PrinttupAttrInfo *thisState = myState->myinfo + i; Datum origattr = slot->tts_values[i], attr; if (slot->tts_isnull[i]) { pq_sendint(&buf, -1, 4); continue; } /* * If we have a toasted datum, forcibly detoast it here to avoid * memory leakage inside the type's output routine. * * Here we catch undefined bytes in tuples that are returned to the * client without hitting disk; see comments at the related check in * PageAddItem(). Whether to test before or after detoast is somewhat * arbitrary, as is whether to test external/compressed data at all. * Undefined bytes in the pre-toast datum will have triggered Valgrind * errors in the compressor or toaster; any error detected here for * such datums would indicate an (unlikely) bug in a type-independent * facility. Therefore, this test is most useful for uncompressed, * non-external datums. * * We don't presently bother checking non-varlena datums for undefined * data. PageAddItem() does check them. */ if (thisState->typisvarlena) { VALGRIND_CHECK_MEM_IS_DEFINED(origattr, VARSIZE_ANY(origattr)); attr = PointerGetDatum(PG_DETOAST_DATUM(origattr)); } else attr = origattr; if (thisState->format == 0) { /* Text output */ char *outputstr; outputstr = OutputFunctionCall(&thisState->finfo, attr); pq_sendcountedtext(&buf, outputstr, strlen(outputstr), false); pfree(outputstr); } else { /* Binary output */ bytea *outputbytes; outputbytes = SendFunctionCall(&thisState->finfo, attr); pq_sendint(&buf, VARSIZE(outputbytes) - VARHDRSZ, 4); pq_sendbytes(&buf, VARDATA(outputbytes), VARSIZE(outputbytes) - VARHDRSZ); pfree(outputbytes); } /* Clean up detoasted copy, if any */ if (DatumGetPointer(attr) != DatumGetPointer(origattr)) pfree(DatumGetPointer(attr)); } pq_endmessage(&buf); }
/* * compute_tsvector_stats() -- compute statistics for a tsvector column * * This functions computes statistics that are useful for determining @@ * operations' selectivity, along with the fraction of non-null rows and * average width. * * Instead of finding the most common values, as we do for most datatypes, * we're looking for the most common lexemes. This is more useful, because * there most probably won't be any two rows with the same tsvector and thus * the notion of a MCV is a bit bogus with this datatype. With a list of the * most common lexemes we can do a better job at figuring out @@ selectivity. * * For the same reasons we assume that tsvector columns are unique when * determining the number of distinct values. * * The algorithm used is Lossy Counting, as proposed in the paper "Approximate * frequency counts over data streams" by G. S. Manku and R. Motwani, in * Proceedings of the 28th International Conference on Very Large Data Bases, * Hong Kong, China, August 2002, section 4.2. The paper is available at * http://www.vldb.org/conf/2002/S10P03.pdf * * The Lossy Counting (aka LC) algorithm goes like this: * Let D be a set of triples (e, f, d), where e is an element value, f is * that element's frequency (occurrence count) and d is the maximum error in * f. We start with D empty and process the elements in batches of size * w. (The batch size is also known as "bucket size".) Let the current batch * number be b_current, starting with 1. For each element e we either * increment its f count, if it's already in D, or insert a new triple into D * with values (e, 1, b_current - 1). After processing each batch we prune D, * by removing from it all elements with f + d <= b_current. Finally, we * gather elements with largest f. The LC paper proves error bounds on f * dependent on the batch size w, and shows that the required table size * is no more than a few times w. * * We use a hashtable for the D structure and a bucket width of * statistics_target * 10, where 10 is an arbitrarily chosen constant, * meant to approximate the number of lexemes in a single tsvector. */ static void compute_tsvector_stats(VacAttrStats *stats, AnalyzeAttrFetchFunc fetchfunc, int samplerows, double totalrows) { int num_mcelem; int null_cnt = 0; double total_width = 0; /* This is D from the LC algorithm. */ HTAB *lexemes_tab; HASHCTL hash_ctl; HASH_SEQ_STATUS scan_status; /* This is the current bucket number from the LC algorithm */ int b_current; /* This is 'w' from the LC algorithm */ int bucket_width; int vector_no, lexeme_no; LexemeHashKey hash_key; TrackItem *item; /* We want statistics_target * 10 lexemes in the MCELEM array */ num_mcelem = stats->attr->attstattarget * 10; /* * We set bucket width equal to the target number of result lexemes. This * is probably about right but perhaps might need to be scaled up or down * a bit? */ bucket_width = num_mcelem; /* * Create the hashtable. It will be in local memory, so we don't need to * worry about initial size too much. Also we don't need to pay any * attention to locking and memory management. */ MemSet(&hash_ctl, 0, sizeof(hash_ctl)); hash_ctl.keysize = sizeof(LexemeHashKey); hash_ctl.entrysize = sizeof(TrackItem); hash_ctl.hash = lexeme_hash; hash_ctl.match = lexeme_match; hash_ctl.hcxt = CurrentMemoryContext; lexemes_tab = hash_create("Analyzed lexemes table", bucket_width * 4, &hash_ctl, HASH_ELEM | HASH_FUNCTION | HASH_COMPARE | HASH_CONTEXT); /* Initialize counters. */ b_current = 1; lexeme_no = 1; /* Loop over the tsvectors. */ for (vector_no = 0; vector_no < samplerows; vector_no++) { Datum value; bool isnull; TSVector vector; WordEntry *curentryptr; char *lexemesptr; int j; vacuum_delay_point(); value = fetchfunc(stats, vector_no, &isnull); /* * Check for null/nonnull. */ if (isnull) { null_cnt++; continue; } /* * Add up widths for average-width calculation. Since it's a * tsvector, we know it's varlena. As in the regular * compute_minimal_stats function, we use the toasted width for this * calculation. */ total_width += VARSIZE_ANY(DatumGetPointer(value)); /* * Now detoast the tsvector if needed. */ vector = DatumGetTSVector(value); /* * We loop through the lexemes in the tsvector and add them to our * tracking hashtable. Note: the hashtable entries will point into * the (detoasted) tsvector value, therefore we cannot free that * storage until we're done. */ lexemesptr = STRPTR(vector); curentryptr = ARRPTR(vector); for (j = 0; j < vector->size; j++) { bool found; /* Construct a hash key */ hash_key.lexeme = lexemesptr + curentryptr->pos; hash_key.length = curentryptr->len; /* Lookup current lexeme in hashtable, adding it if new */ item = (TrackItem *) hash_search(lexemes_tab, (const void *) &hash_key, HASH_ENTER, &found); if (found) { /* The lexeme is already on the tracking list */ item->frequency++; } else { /* Initialize new tracking list element */ item->frequency = 1; item->delta = b_current - 1; } /* We prune the D structure after processing each bucket */ if (lexeme_no % bucket_width == 0) { prune_lexemes_hashtable(lexemes_tab, b_current); b_current++; } /* Advance to the next WordEntry in the tsvector */ lexeme_no++; curentryptr++; } } /* We can only compute real stats if we found some non-null values. */ if (null_cnt < samplerows) { int nonnull_cnt = samplerows - null_cnt; int i; TrackItem **sort_table; int track_len; int minfreq, maxfreq; stats->stats_valid = true; /* Do the simple null-frac and average width stats */ stats->stanullfrac = (double) null_cnt / (double) samplerows; stats->stawidth = total_width / (double) nonnull_cnt; /* Assume it's a unique column (see notes above) */ stats->stadistinct = -1.0; /* * Determine the top-N lexemes by simply copying pointers from the * hashtable into an array and applying qsort() */ track_len = hash_get_num_entries(lexemes_tab); sort_table = (TrackItem **) palloc(sizeof(TrackItem *) * track_len); hash_seq_init(&scan_status, lexemes_tab); i = 0; while ((item = (TrackItem *) hash_seq_search(&scan_status)) != NULL) { sort_table[i++] = item; } Assert(i == track_len); qsort(sort_table, track_len, sizeof(TrackItem *), trackitem_compare_frequencies_desc); /* Suppress any single-occurrence items */ while (track_len > 0) { if (sort_table[track_len - 1]->frequency > 1) break; track_len--; } /* Determine the number of most common lexemes to be stored */ if (num_mcelem > track_len) num_mcelem = track_len; /* Generate MCELEM slot entry */ if (num_mcelem > 0) { MemoryContext old_context; Datum *mcelem_values; float4 *mcelem_freqs; /* Grab the minimal and maximal frequencies that will get stored */ minfreq = sort_table[num_mcelem - 1]->frequency; maxfreq = sort_table[0]->frequency; /* * We want to store statistics sorted on the lexeme value using * first length, then byte-for-byte comparison. The reason for * doing length comparison first is that we don't care about the * ordering so long as it's consistent, and comparing lengths * first gives us a chance to avoid a strncmp() call. * * This is different from what we do with scalar statistics -- * they get sorted on frequencies. The rationale is that we * usually search through most common elements looking for a * specific value, so we can grab its frequency. When values are * presorted we can employ binary search for that. See * ts_selfuncs.c for a real usage scenario. */ qsort(sort_table, num_mcelem, sizeof(TrackItem *), trackitem_compare_lexemes); /* Must copy the target values into anl_context */ old_context = MemoryContextSwitchTo(stats->anl_context); /* * We sorted statistics on the lexeme value, but we want to be * able to find out the minimal and maximal frequency without * going through all the values. We keep those two extra * frequencies in two extra cells in mcelem_freqs. */ mcelem_values = (Datum *) palloc(num_mcelem * sizeof(Datum)); mcelem_freqs = (float4 *) palloc((num_mcelem + 2) * sizeof(float4)); for (i = 0; i < num_mcelem; i++) { TrackItem *item = sort_table[i]; mcelem_values[i] = PointerGetDatum(cstring_to_text_with_len(item->key.lexeme, item->key.length)); mcelem_freqs[i] = (double) item->frequency / (double) nonnull_cnt; } mcelem_freqs[i++] = (double) minfreq / (double) nonnull_cnt; mcelem_freqs[i] = (double) maxfreq / (double) nonnull_cnt; MemoryContextSwitchTo(old_context); stats->stakind[0] = STATISTIC_KIND_MCELEM; stats->staop[0] = TextEqualOperator; stats->stanumbers[0] = mcelem_freqs; /* See above comment about two extra frequency fields */ stats->numnumbers[0] = num_mcelem + 2; stats->stavalues[0] = mcelem_values; stats->numvalues[0] = num_mcelem; /* We are storing text values */ stats->statypid[0] = TEXTOID; stats->statyplen[0] = -1; /* typlen, -1 for varlena */ stats->statypbyval[0] = false; stats->statypalign[0] = 'i'; } } else { /* We found only nulls; assume the column is entirely null */ stats->stats_valid = true; stats->stanullfrac = 1.0; stats->stawidth = 0; /* "unknown" */ stats->stadistinct = 0.0; /* "unknown" */ } /* * We don't need to bother cleaning up any of our temporary palloc's. The * hashtable should also go away, as it used a child memory context. */ }
/* ---------------- * printtup --- print a tuple in protocol 3.0 * ---------------- */ static bool printtup(TupleTableSlot *slot, DestReceiver *self) { TupleDesc typeinfo = slot->tts_tupleDescriptor; DR_printtup *myState = (DR_printtup *) self; MemoryContext oldcontext; StringInfoData buf; int natts = typeinfo->natts; int i; /* Set or update my derived attribute info, if needed */ if (myState->attrinfo != typeinfo || myState->nattrs != natts) printtup_prepare_info(myState, typeinfo, natts); /* Make sure the tuple is fully deconstructed */ slot_getallattrs(slot); /* Switch into per-row context so we can recover memory below */ oldcontext = MemoryContextSwitchTo(myState->tmpcontext); /* * Prepare a DataRow message (note buffer is in per-row context) */ pq_beginmessage(&buf, 'D'); pq_sendint(&buf, natts, 2); /* * send the attributes of this tuple */ for (i = 0; i < natts; ++i) { PrinttupAttrInfo *thisState = myState->myinfo + i; Datum attr = slot->tts_values[i]; if (slot->tts_isnull[i]) { pq_sendint(&buf, -1, 4); continue; } /* * Here we catch undefined bytes in datums that are returned to the * client without hitting disk; see comments at the related check in * PageAddItem(). This test is most useful for uncompressed, * non-external datums, but we're quite likely to see such here when * testing new C functions. */ if (thisState->typisvarlena) VALGRIND_CHECK_MEM_IS_DEFINED(DatumGetPointer(attr), VARSIZE_ANY(attr)); if (thisState->format == 0) { /* Text output */ char *outputstr; outputstr = OutputFunctionCall(&thisState->finfo, attr); pq_sendcountedtext(&buf, outputstr, strlen(outputstr), false); } else { /* Binary output */ bytea *outputbytes; outputbytes = SendFunctionCall(&thisState->finfo, attr); pq_sendint(&buf, VARSIZE(outputbytes) - VARHDRSZ, 4); pq_sendbytes(&buf, VARDATA(outputbytes), VARSIZE(outputbytes) - VARHDRSZ); } } pq_endmessage(&buf); /* Return to caller's context, and flush row's temporary memory */ MemoryContextSwitchTo(oldcontext); MemoryContextReset(myState->tmpcontext); return true; }
/* checks the individual attributes of the tuple */ uint32 check_index_tuple_attributes(Relation rel, PageHeader header, int block, int i, char *buffer) { IndexTuple tuple; uint32 nerrs = 0; int j, off; bits8 * bitmap; BTPageOpaque opaque; ereport(DEBUG2,(errmsg("[%d:%d] checking attributes for the tuple", block, i))); /* get the index tuple and info about the page */ tuple = (IndexTuple)(buffer + header->pd_linp[i].lp_off); opaque = (BTPageOpaque)(buffer + header->pd_special); /* current attribute offset - always starts at (buffer + off) */ off = header->pd_linp[i].lp_off + IndexInfoFindDataOffset(tuple->t_info); ereport(DEBUG3,(errmsg("[%d:%d] tuple has %d attributes", block, (i+1), RelationGetNumberOfAttributes(rel)))); bitmap = (bits8*)(buffer + header->pd_linp[i].lp_off + sizeof(IndexTupleData)); /* TODO This is mostly copy'n'paste from check_heap_tuple_attributes, so maybe it could be refactored to share the code. */ /* For left-most tuples on non-leaf pages, there are no data actually (see src/backend/access/nbtree/README, last paragraph in section "Notes About Data Representation") Use P_LEFTMOST/P_ISLEAF to identify such cases (for the leftmost item only) and set len = 0. */ if (P_LEFTMOST(opaque) && (! P_ISLEAF(opaque)) && (i == 0)) { ereport(DEBUG3, (errmsg("[%d:%d] leftmost tuple on non-leaf block => no data, skipping", block, i))); return nerrs; } /* check all the index attributes */ for (j = 0; j < rel->rd_att->natts; j++) { /* default length of the attribute */ int len = rel->rd_att->attrs[j]->attlen; /* copy from src/backend/commands/analyze.c */ bool is_varlena = (!rel->rd_att->attrs[j]->attbyval && len == -1); bool is_varwidth = (!rel->rd_att->attrs[j]->attbyval && len < 0); /* thus it's "len = -2" */ /* if the attribute is marked as NULL (in the tuple header), skip to the next attribute */ if (IndexTupleHasNulls(tuple) && att_isnull(j, bitmap)) { ereport(DEBUG3, (errmsg("[%d:%d] attribute '%s' is NULL (skipping)", block, (i+1), rel->rd_att->attrs[j]->attname.data))); continue; } /* fix the alignment (see src/include/access/tupmacs.h) */ off = att_align_pointer(off, rel->rd_att->attrs[j]->attalign, rel->rd_att->attrs[j]->attlen, buffer+off); if (is_varlena) { /* other interesting macros (see postgres.h) - should do something about those ... VARATT_IS_COMPRESSED(PTR) VARATT_IS_4B_C(PTR) VARATT_IS_EXTERNAL(PTR) VARATT_IS_1B_E(PTR) VARATT_IS_SHORT(PTR) VARATT_IS_1B(PTR) VARATT_IS_EXTENDED(PTR) (!VARATT_IS_4B_U(PTR)) */ len = VARSIZE_ANY(buffer + off); if (len < 0) { ereport(WARNING, (errmsg("[%d:%d] attribute '%s' has negative length < 0 (%d)", block, (i+1), rel->rd_att->attrs[j]->attname.data, len))); ++nerrs; break; } if (VARATT_IS_COMPRESSED(buffer + off)) { /* the raw length should be less than 1G (and positive) */ if ((VARRAWSIZE_4B_C(buffer + off) < 0) || (VARRAWSIZE_4B_C(buffer + off) > 1024*1024)) { ereport(WARNING, (errmsg("[%d:%d] attribute '%s' has invalid length %d (should be between 0 and 1G)", block, (i+1), rel->rd_att->attrs[j]->attname.data, VARRAWSIZE_4B_C(buffer + off)))); ++nerrs; /* no break here, this does not break the page structure - we may check the other attributes */ } } /* FIXME Check if the varlena value may be detoasted. */ } else if (is_varwidth) { /* get the C-string length (at most to the end of tuple), +1 as it does not include '\0' at the end */ /* if the string is not properly terminated, then this returns 'remaining space + 1' so it's detected */ len = strnlen(buffer + off, header->pd_linp[i].lp_off + len + header->pd_linp[i].lp_len - off) + 1; } /* Check if the length makes sense (is not negative and does not overflow * the tuple end, stop validating the other rows (we don't know where to * continue anyway). */ if (off + len > (header->pd_linp[i].lp_off + header->pd_linp[i].lp_len)) { ereport(WARNING, (errmsg("[%d:%d] attribute '%s' (off=%d len=%d) overflows tuple end (off=%d, len=%d)", block, (i+1), rel->rd_att->attrs[j]->attname.data, off, len, header->pd_linp[i].lp_off, header->pd_linp[i].lp_len))); ++nerrs; break; } /* skip to the next attribute */ off += len; ereport(DEBUG3,(errmsg("[%d:%d] attribute '%s' len=%d", block, (i+1), rel->rd_att->attrs[j]->attname.data, len))); } ereport(DEBUG3,(errmsg("[%d:%d] last attribute ends at %d, tuple ends at %d", block, (i+1), off, header->pd_linp[i].lp_off + header->pd_linp[i].lp_len))); /* after the last attribute, the offset should be exactly the same as the end of the tuple */ if (MAXALIGN(off) != header->pd_linp[i].lp_off + header->pd_linp[i].lp_len) { ereport(WARNING, (errmsg("[%d:%d] the last attribute ends at %d but the tuple ends at %d", block, (i+1), off, header->pd_linp[i].lp_off + header->pd_linp[i].lp_len))); ++nerrs; } return nerrs; }