/* * heap_fill_tuple * Load data portion of a tuple from values/isnull arrays * * We also fill the null bitmap (if any) and set the infomask bits * that reflect the tuple's data contents. * * NOTE: it is now REQUIRED that the caller have pre-zeroed the data area. */ void heap_fill_tuple(TupleDesc tupleDesc, Datum *values, bool *isnull, char *data, Size data_size, uint16 *infomask, bits8 *bit) { bits8 *bitP; int bitmask; int i; int numberOfAttributes = tupleDesc->natts; Form_pg_attribute *att = tupleDesc->attrs; #ifdef USE_ASSERT_CHECKING char *start = data; #endif if (bit != NULL) { bitP = &bit[-1]; bitmask = HIGHBIT; } else { /* just to keep compiler quiet */ bitP = NULL; bitmask = 0; } *infomask &= ~(HEAP_HASNULL | HEAP_HASVARWIDTH | HEAP_HASEXTERNAL); for (i = 0; i < numberOfAttributes; i++) { Size data_length; if (bit != NULL) { if (bitmask != HIGHBIT) bitmask <<= 1; else { bitP += 1; *bitP = 0x0; bitmask = 1; } if (isnull[i]) { *infomask |= HEAP_HASNULL; continue; } *bitP |= bitmask; } /* * XXX we use the att_align macros on the pointer value itself, not on * an offset. This is a bit of a hack. */ if (att[i]->attbyval) { /* pass-by-value */ data = (char *) att_align_nominal(data, att[i]->attalign); store_att_byval(data, values[i], att[i]->attlen); data_length = att[i]->attlen; } else if (att[i]->attlen == -1) { /* varlena */ Pointer val = DatumGetPointer(values[i]); *infomask |= HEAP_HASVARWIDTH; if (VARATT_IS_EXTERNAL(val)) { *infomask |= HEAP_HASEXTERNAL; /* no alignment, since it's short by definition */ data_length = VARSIZE_EXTERNAL(val); memcpy(data, val, data_length); } else if (VARATT_IS_SHORT(val)) { /* no alignment for short varlenas */ data_length = VARSIZE_SHORT(val); memcpy(data, val, data_length); } else if (VARLENA_ATT_IS_PACKABLE(att[i]) && VARATT_CAN_MAKE_SHORT(val)) { /* convert to short varlena -- no alignment */ data_length = VARATT_CONVERTED_SHORT_SIZE(val); SET_VARSIZE_SHORT(data, data_length); memcpy(data + 1, VARDATA(val), data_length - 1); } else { /* full 4-byte header varlena */ data = (char *) att_align_nominal(data, att[i]->attalign); data_length = VARSIZE(val); memcpy(data, val, data_length); } } else if (att[i]->attlen == -2) { /* cstring ... never needs alignment */ *infomask |= HEAP_HASVARWIDTH; Assert(att[i]->attalign == 'c'); data_length = strlen(DatumGetCString(values[i])) + 1; memcpy(data, DatumGetPointer(values[i]), data_length); } else { /* fixed-length pass-by-reference */ data = (char *) att_align_nominal(data, att[i]->attalign); Assert(att[i]->attlen > 0); data_length = att[i]->attlen; memcpy(data, DatumGetPointer(values[i]), data_length); } data += data_length; } Assert((data - start) == data_size); }
static void compileTheSubstitute(DictThesaurus *d) { int i; for (i = 0; i < d->nsubst; i++) { TSLexeme *rem = d->subst[i].res, *outptr, *inptr; int n = 2; outptr = d->subst[i].res = (TSLexeme *) palloc(sizeof(TSLexeme) * n); outptr->lexeme = NULL; inptr = rem; while (inptr && inptr->lexeme) { TSLexeme *lexized, tmplex[2]; if (inptr->flags & DT_USEASIS) { /* do not lexize */ tmplex[0] = *inptr; tmplex[0].flags = 0; tmplex[1].lexeme = NULL; lexized = tmplex; } else { lexized = (TSLexeme *) DatumGetPointer( FunctionCall4( &(d->subdict->lexize), PointerGetDatum(d->subdict->dictData), PointerGetDatum(inptr->lexeme), Int32GetDatum(strlen(inptr->lexeme)), PointerGetDatum(NULL) ) ); } if (lexized && lexized->lexeme) { int toset = (lexized->lexeme && outptr != d->subst[i].res) ? (outptr - d->subst[i].res) : -1; while (lexized->lexeme) { if (outptr - d->subst[i].res + 1 >= n) { int diff = outptr - d->subst[i].res; n *= 2; d->subst[i].res = (TSLexeme *) repalloc(d->subst[i].res, sizeof(TSLexeme) * n); outptr = d->subst[i].res + diff; } *outptr = *lexized; outptr->lexeme = pstrdup(lexized->lexeme); outptr++; lexized++; } if (toset > 0) d->subst[i].res[toset].flags |= TSL_ADDPOS; } else if (lexized) { ereport(ERROR, (errcode(ERRCODE_CONFIG_FILE_ERROR), errmsg("thesaurus substitute word \"%s\" is a stop word (rule %d)", inptr->lexeme, i + 1))); } else { ereport(ERROR, (errcode(ERRCODE_CONFIG_FILE_ERROR), errmsg("thesaurus substitute word \"%s\" isn't recognized by subdictionary (rule %d)", inptr->lexeme, i + 1))); } if (inptr->lexeme) pfree(inptr->lexeme); inptr++; } if (outptr == d->subst[i].res) ereport(ERROR, (errcode(ERRCODE_CONFIG_FILE_ERROR), errmsg("thesaurus substitute phrase is empty (rule %d)", i + 1))); d->subst[i].reslen = outptr - d->subst[i].res; pfree(rem); } }
/* * Compute the list of TIDs to be visited, by evaluating the expressions * for them. * * (The result is actually an array, not a list.) */ static void TidListCreate(TidScanState *tidstate) { List *evalList = tidstate->tss_tidquals; ExprContext *econtext = tidstate->ss.ps.ps_ExprContext; BlockNumber nblocks; ItemPointerData *tidList; int numAllocTids; int numTids; ListCell *l; /* * We silently discard any TIDs that are out of range at the time of scan * start. (Since we hold at least AccessShareLock on the table, it won't * be possible for someone to truncate away the blocks we intend to * visit.) */ nblocks = RelationGetNumberOfBlocks(tidstate->ss.ss_currentRelation); /* * We initialize the array with enough slots for the case that all quals * are simple OpExprs or CurrentOfExprs. If there are any * ScalarArrayOpExprs, we may have to enlarge the array. */ numAllocTids = list_length(evalList); tidList = (ItemPointerData *) palloc(numAllocTids * sizeof(ItemPointerData)); numTids = 0; tidstate->tss_isCurrentOf = false; foreach(l, evalList) { ExprState *exstate = (ExprState *) lfirst(l); Expr *expr = exstate->expr; ItemPointer itemptr; bool isNull; if (is_opclause(expr)) { FuncExprState *fexstate = (FuncExprState *) exstate; Node *arg1; Node *arg2; arg1 = get_leftop(expr); arg2 = get_rightop(expr); if (IsCTIDVar(arg1)) exstate = (ExprState *) lsecond(fexstate->args); else if (IsCTIDVar(arg2)) exstate = (ExprState *) linitial(fexstate->args); else elog(ERROR, "could not identify CTID variable"); itemptr = (ItemPointer) DatumGetPointer(ExecEvalExprSwitchContext(exstate, econtext, &isNull, NULL)); if (!isNull && ItemPointerIsValid(itemptr) && ItemPointerGetBlockNumber(itemptr) < nblocks) { if (numTids >= numAllocTids) { numAllocTids *= 2; tidList = (ItemPointerData *) repalloc(tidList, numAllocTids * sizeof(ItemPointerData)); } tidList[numTids++] = *itemptr; } } else if (expr && IsA(expr, ScalarArrayOpExpr)) { ScalarArrayOpExprState *saexstate = (ScalarArrayOpExprState *) exstate; Datum arraydatum; ArrayType *itemarray; Datum *ipdatums; bool *ipnulls; int ndatums; int i; exstate = (ExprState *) lsecond(saexstate->fxprstate.args); arraydatum = ExecEvalExprSwitchContext(exstate, econtext, &isNull, NULL); if (isNull) continue; itemarray = DatumGetArrayTypeP(arraydatum); deconstruct_array(itemarray, TIDOID, SizeOfIptrData, false, 's', &ipdatums, &ipnulls, &ndatums); if (numTids + ndatums > numAllocTids) { numAllocTids = numTids + ndatums; tidList = (ItemPointerData *) repalloc(tidList, numAllocTids * sizeof(ItemPointerData)); } for (i = 0; i < ndatums; i++) { if (!ipnulls[i]) { itemptr = (ItemPointer) DatumGetPointer(ipdatums[i]); if (ItemPointerIsValid(itemptr) && ItemPointerGetBlockNumber(itemptr) < nblocks) tidList[numTids++] = *itemptr; } } pfree(ipdatums); pfree(ipnulls); } else if (expr && IsA(expr, CurrentOfExpr)) { CurrentOfExpr *cexpr = (CurrentOfExpr *) expr; ItemPointerData cursor_tid; if (execCurrentOf(cexpr, econtext, RelationGetRelid(tidstate->ss.ss_currentRelation), &cursor_tid)) { if (numTids >= numAllocTids) { numAllocTids *= 2; tidList = (ItemPointerData *) repalloc(tidList, numAllocTids * sizeof(ItemPointerData)); } tidList[numTids++] = cursor_tid; tidstate->tss_isCurrentOf = true; } } else elog(ERROR, "could not identify CTID expression"); }
/* * Convert a HeapTuple into a byte-sequence, and store it directly * into a chunklist for transmission. * * This code is based on the printtup_internal_20() function in printtup.c. */ void SerializeTupleIntoChunks(GenericTuple gtuple, SerTupInfo *pSerInfo, TupleChunkList tcList) { TupleChunkListItem tcItem = NULL; MemoryContext oldCtxt; TupleDesc tupdesc; int i, natts; AssertArg(tcList != NULL); AssertArg(gtuple != NULL); AssertArg(pSerInfo != NULL); tupdesc = pSerInfo->tupdesc; natts = tupdesc->natts; /* get ready to go */ tcList->p_first = NULL; tcList->p_last = NULL; tcList->num_chunks = 0; tcList->serialized_data_length = 0; tcList->max_chunk_length = Gp_max_tuple_chunk_size; if (natts == 0) { tcItem = getChunkFromCache(&pSerInfo->chunkCache); if (tcItem == NULL) { ereport(FATAL, (errcode(ERRCODE_OUT_OF_MEMORY), errmsg("Could not allocate space for first chunk item in new chunk list."))); } /* TC_EMTPY is just one chunk */ SetChunkType(tcItem->chunk_data, TC_EMPTY); tcItem->chunk_length = TUPLE_CHUNK_HEADER_SIZE; appendChunkToTCList(tcList, tcItem); return; } tcItem = getChunkFromCache(&pSerInfo->chunkCache); if (tcItem == NULL) { ereport(FATAL, (errcode(ERRCODE_OUT_OF_MEMORY), errmsg("Could not allocate space for first chunk item in new chunk list."))); } /* assume that we'll take a single chunk */ SetChunkType(tcItem->chunk_data, TC_WHOLE); tcItem->chunk_length = TUPLE_CHUNK_HEADER_SIZE; appendChunkToTCList(tcList, tcItem); AssertState(s_tupSerMemCtxt != NULL); if (is_memtuple(gtuple)) { MemTuple mtuple = (MemTuple) gtuple; addByteStringToChunkList(tcList, (char *) mtuple, memtuple_get_size(mtuple), &pSerInfo->chunkCache); addPadding(tcList, &pSerInfo->chunkCache, memtuple_get_size(mtuple)); } else { HeapTuple tuple = (HeapTuple) gtuple; HeapTupleHeader t_data = tuple->t_data; TupSerHeader tsh; unsigned int datalen; unsigned int nullslen; datalen = tuple->t_len - t_data->t_hoff; if (HeapTupleHasNulls(tuple)) nullslen = BITMAPLEN(HeapTupleHeaderGetNatts(t_data)); else nullslen = 0; tsh.tuplen = sizeof(TupSerHeader) + TYPEALIGN(TUPLE_CHUNK_ALIGN,nullslen) + datalen; tsh.natts = HeapTupleHeaderGetNatts(t_data); tsh.infomask = t_data->t_infomask; addByteStringToChunkList(tcList, (char *)&tsh, sizeof(TupSerHeader), &pSerInfo->chunkCache); /* If we don't have any attributes which have been toasted, we * can be very very simple: just send the raw data. */ if ((tsh.infomask & HEAP_HASEXTERNAL) == 0) { if (nullslen) { addByteStringToChunkList(tcList, (char *)t_data->t_bits, nullslen, &pSerInfo->chunkCache); addPadding(tcList,&pSerInfo->chunkCache,nullslen); } addByteStringToChunkList(tcList, (char *)t_data + t_data->t_hoff, datalen, &pSerInfo->chunkCache); addPadding(tcList,&pSerInfo->chunkCache,datalen); } else { /* We have to be more careful when we have tuples that * have been toasted. Ideally we'd like to send the * untoasted attributes in as "raw" a format as possible * but that makes rebuilding the tuple harder . */ oldCtxt = MemoryContextSwitchTo(s_tupSerMemCtxt); /* deconstruct the tuple (faster than a heap_getattr loop) */ heap_deform_tuple(tuple, tupdesc, pSerInfo->values, pSerInfo->nulls); MemoryContextSwitchTo(oldCtxt); /* Send the nulls character-array. */ addByteStringToChunkList(tcList, pSerInfo->nulls, natts, &pSerInfo->chunkCache); addPadding(tcList,&pSerInfo->chunkCache,natts); /* * send the attributes of this tuple: NOTE anything which allocates * temporary space (e.g. could result in a PG_DETOAST_DATUM) should be * executed with the memory context set to s_tupSerMemCtxt */ for (i = 0; i < natts; ++i) { SerAttrInfo *attrInfo = pSerInfo->myinfo + i; Datum origattr = pSerInfo->values[i], attr; /* skip null attributes (already taken care of above) */ if (pSerInfo->nulls[i]) continue; if (attrInfo->typlen == -1) { int32 sz; char *data; /* * If we have a toasted datum, forcibly detoast it here to avoid * memory leakage: we want to force the detoast allocation(s) to * happen in our reset-able serialization context. */ oldCtxt = MemoryContextSwitchTo(s_tupSerMemCtxt); attr = PointerGetDatum(PG_DETOAST_DATUM_PACKED(origattr)); MemoryContextSwitchTo(oldCtxt); sz = VARSIZE_ANY_EXHDR(attr); data = VARDATA_ANY(attr); /* Send length first, then data */ addInt32ToChunkList(tcList, sz, &pSerInfo->chunkCache); addByteStringToChunkList(tcList, data, sz, &pSerInfo->chunkCache); addPadding(tcList, &pSerInfo->chunkCache, sz); } else if (attrInfo->typlen == -2) { int32 sz; char *data; /* CString, we would send the string with the terminating '\0' */ data = DatumGetCString(origattr); sz = strlen(data) + 1; /* Send length first, then data */ addInt32ToChunkList(tcList, sz, &pSerInfo->chunkCache); addByteStringToChunkList(tcList, data, sz, &pSerInfo->chunkCache); addPadding(tcList, &pSerInfo->chunkCache, sz); } else if (attrInfo->typbyval) { /* * We send a full-width Datum for all pass-by-value types, regardless of * the actual size. */ addByteStringToChunkList(tcList, (char *) &origattr, sizeof(Datum), &pSerInfo->chunkCache); addPadding(tcList, &pSerInfo->chunkCache, sizeof(Datum)); } else { addByteStringToChunkList(tcList, DatumGetPointer(origattr), attrInfo->typlen, &pSerInfo->chunkCache); addPadding(tcList, &pSerInfo->chunkCache, attrInfo->typlen); attr = origattr; } } MemoryContextReset(s_tupSerMemCtxt); } } /* * if we have more than 1 chunk we have to set the chunk types on our * first chunk and last chunk */ if (tcList->num_chunks > 1) { TupleChunkListItem first, last; first = tcList->p_first; last = tcList->p_last; Assert(first != NULL); Assert(first != last); Assert(last != NULL); SetChunkType(first->chunk_data, TC_PARTIAL_START); SetChunkType(last->chunk_data, TC_PARTIAL_END); /* * any intervening chunks are already set to TC_PARTIAL_MID when * allocated */ } return; }
/* * hstoreUpgrade: PG_DETOAST_DATUM plus support for conversion of old hstores */ HStore * hstoreUpgrade(Datum orig) { HStore *hs = (HStore *) PG_DETOAST_DATUM(orig); int valid_new; int valid_old; /* Return immediately if no conversion needed */ if (hs->size_ & HS_FLAG_NEWVERSION) return hs; /* Do we have a writable copy? If not, make one. */ if ((void *) hs == (void *) DatumGetPointer(orig)) hs = (HStore *) PG_DETOAST_DATUM_COPY(orig); if (hs->size_ == 0 || (VARSIZE(hs) < 32768 && HSE_ISFIRST((ARRPTR(hs)[0])))) { HS_SETCOUNT(hs, HS_COUNT(hs)); HS_FIXSIZE(hs, HS_COUNT(hs)); return hs; } valid_new = hstoreValidNewFormat(hs); valid_old = hstoreValidOldFormat(hs); if (!valid_old || hs->size_ == 0) { if (valid_new) { /* * force the "new version" flag and the correct varlena length. */ HS_SETCOUNT(hs, HS_COUNT(hs)); HS_FIXSIZE(hs, HS_COUNT(hs)); return hs; } else { elog(ERROR, "invalid hstore value found"); } } /* * this is the tricky edge case. It is only possible in some quite extreme * cases (the hstore must have had a lot of wasted padding space at the * end). But the only way a "new" hstore value could get here is if we're * upgrading in place from a pre-release version of hstore-new (NOT * contrib/hstore), so we work off the following assumptions: 1. If you're * moving from old contrib/hstore to hstore-new, you're required to fix up * any potential conflicts first, e.g. by running ALTER TABLE ... USING * col::text::hstore; on all hstore columns before upgrading. 2. If you're * moving from old contrib/hstore to new contrib/hstore, then "new" values * are impossible here 3. If you're moving from pre-release hstore-new to * hstore-new, then "old" values are impossible here 4. If you're moving * from pre-release hstore-new to new contrib/hstore, you're not doing so * as an in-place upgrade, so there is no issue So the upshot of all this * is that we can treat all the edge cases as "new" if we're being built * as hstore-new, and "old" if we're being built as contrib/hstore. * * XXX the WARNING can probably be downgraded to DEBUG1 once this has been * beta-tested. But for now, it would be very useful to know if anyone can * actually reach this case in a non-contrived setting. */ if (valid_new) { #if HSTORE_IS_HSTORE_NEW elog(WARNING, "ambiguous hstore value resolved as hstore-new"); /* * force the "new version" flag and the correct varlena length. */ HS_SETCOUNT(hs, HS_COUNT(hs)); HS_FIXSIZE(hs, HS_COUNT(hs)); return hs; #else elog(WARNING, "ambiguous hstore value resolved as hstore-old"); #endif } /* * must have an old-style value. Overwrite it in place as a new-style one. */ { int count = hs->size_; HEntry *new_entries = ARRPTR(hs); HOldEntry *old_entries = (HOldEntry *) ARRPTR(hs); int i; for (i = 0; i < count; ++i) { uint32 pos = old_entries[i].pos; uint32 keylen = old_entries[i].keylen; uint32 vallen = old_entries[i].vallen; bool isnull = old_entries[i].valisnull; if (isnull) vallen = 0; new_entries[2 * i].entry = (pos + keylen) & HENTRY_POSMASK; new_entries[2 * i + 1].entry = (((pos + keylen + vallen) & HENTRY_POSMASK) | ((isnull) ? HENTRY_ISNULL : 0)); } if (count) new_entries[0].entry |= HENTRY_ISFIRST; HS_SETCOUNT(hs, count); HS_FIXSIZE(hs, count); } return hs; }
Datum _numeric_weighted_stddev_samp_intermediate(PG_FUNCTION_ARGS) { WeightedStddevSampInternalState *state; Datum value, weight, old_s_0, old_s_1, old_s_2, w_v, w_v2; MemoryContext aggcontext, oldcontext; if (!AggCheckCallContext(fcinfo, &aggcontext)) /* cannot be called directly because of internal-type argument */ elog(ERROR, "_weighted_stddev_samp_intermediate called in non-aggregate context"); if (PG_ARGISNULL(0)) { oldcontext = MemoryContextSwitchTo(aggcontext); state = (WeightedStddevSampInternalState *) palloc(sizeof(WeightedStddevSampInternalState)); state->s_2 = make_numeric(0); state->s_1 = make_numeric(0); state->s_0 = make_numeric(0); state->zero = make_numeric(0); state->n_prime = 0; MemoryContextSwitchTo(oldcontext); } else state = (WeightedStddevSampInternalState *) PG_GETARG_POINTER(0); /* * We're non-strict, so we MUST check args for nullity ourselves before * using them. To preserve the behaviour of null inputs, we skip updating * on them. */ if (PG_ARGISNULL(1) || PG_ARGISNULL(2)) PG_RETURN_POINTER(state); /* * We fetch and process the input in the shortlived calling context to * avoid leaking memory in aggcontext per cycle. We force the input to be * detoasted here, too, in the shortlived context. (PG_GETARG_DATUM does * not detoast, but PG_GETARG_NUMERIC does.) */ value = NumericGetDatum(PG_GETARG_NUMERIC(1)); weight = NumericGetDatum(PG_GETARG_NUMERIC(2)); /* * We also skip updating when the weight is zero. */ if (DatumGetBool(DirectFunctionCall2(numeric_eq, weight, state->zero))) PG_RETURN_POINTER(state); /* * Compute intermediate values w*v and w*(v^2) in the short-lived context */ w_v = DirectFunctionCall2(numeric_mul, weight, value); w_v2 = DirectFunctionCall2(numeric_mul, w_v, value); /* * The new running totals must be allocated in the long-lived context. We * rely on the numeric_* functions to clean up after themselves (which they * currently do, but only if the input is already detoasted); we could play * safe and copy only the final results into aggcontext, but this turns out * to have a measurable performance hit. */ oldcontext = MemoryContextSwitchTo(aggcontext); old_s_2 = state->s_2; old_s_1 = state->s_1; old_s_0 = state->s_0; state->s_0 = DirectFunctionCall2(numeric_add, old_s_0, weight); state->s_1 = DirectFunctionCall2(numeric_add, old_s_1, w_v); state->s_2 = DirectFunctionCall2(numeric_add, old_s_2, w_v2); state->n_prime += 1; pfree(DatumGetPointer(old_s_2)); pfree(DatumGetPointer(old_s_1)); pfree(DatumGetPointer(old_s_0)); MemoryContextSwitchTo(oldcontext); PG_RETURN_POINTER(state); }
Datum tsquery_rewrite_query(PG_FUNCTION_ARGS) { TSQuery query = PG_GETARG_TSQUERY_COPY(0); text *in = PG_GETARG_TEXT_P(1); TSQuery rewrited = query; MemoryContext outercontext = CurrentMemoryContext; MemoryContext oldcontext; QTNode *tree; char *buf; SPIPlanPtr plan; Portal portal; bool isnull; int i; if (query->size == 0) { PG_FREE_IF_COPY(in, 1); PG_RETURN_POINTER(rewrited); } tree = QT2QTN(GETQUERY(query), GETOPERAND(query)); QTNTernary(tree); QTNSort(tree); buf = text_to_cstring(in); SPI_connect(); if ((plan = SPI_prepare(buf, 0, NULL)) == NULL) elog(ERROR, "SPI_prepare(\"%s\") failed", buf); if ((portal = SPI_cursor_open(NULL, plan, NULL, NULL, true)) == NULL) elog(ERROR, "SPI_cursor_open(\"%s\") failed", buf); SPI_cursor_fetch(portal, true, 100); if (SPI_tuptable == NULL || SPI_tuptable->tupdesc->natts != 2 || SPI_gettypeid(SPI_tuptable->tupdesc, 1) != TSQUERYOID || SPI_gettypeid(SPI_tuptable->tupdesc, 2) != TSQUERYOID) ereport(ERROR, (errcode(ERRCODE_INVALID_PARAMETER_VALUE), errmsg("ts_rewrite query must return two tsquery columns"))); while (SPI_processed > 0 && tree) { for (i = 0; i < SPI_processed && tree; i++) { Datum qdata = SPI_getbinval(SPI_tuptable->vals[i], SPI_tuptable->tupdesc, 1, &isnull); Datum sdata; if (isnull) continue; sdata = SPI_getbinval(SPI_tuptable->vals[i], SPI_tuptable->tupdesc, 2, &isnull); if (!isnull) { TSQuery qtex = DatumGetTSQuery(qdata); TSQuery qtsubs = DatumGetTSQuery(sdata); QTNode *qex, *qsubs = NULL; if (qtex->size == 0) { if (qtex != (TSQuery) DatumGetPointer(qdata)) pfree(qtex); if (qtsubs != (TSQuery) DatumGetPointer(sdata)) pfree(qtsubs); continue; } qex = QT2QTN(GETQUERY(qtex), GETOPERAND(qtex)); QTNTernary(qex); QTNSort(qex); if (qtsubs->size) qsubs = QT2QTN(GETQUERY(qtsubs), GETOPERAND(qtsubs)); oldcontext = MemoryContextSwitchTo(outercontext); tree = findsubquery(tree, qex, qsubs, NULL); MemoryContextSwitchTo(oldcontext); QTNFree(qex); if (qtex != (TSQuery) DatumGetPointer(qdata)) pfree(qtex); QTNFree(qsubs); if (qtsubs != (TSQuery) DatumGetPointer(sdata)) pfree(qtsubs); if (tree) { /* ready the tree for another pass */ QTNClearFlags(tree, QTN_NOCHANGE); QTNSort(tree); } } } SPI_freetuptable(SPI_tuptable); SPI_cursor_fetch(portal, true, 100); } SPI_freetuptable(SPI_tuptable); SPI_cursor_close(portal); SPI_freeplan(plan); SPI_finish(); if (tree) { QTNBinary(tree); rewrited = QTN2QT(tree); QTNFree(tree); PG_FREE_IF_COPY(query, 0); } else { SET_VARSIZE(rewrited, HDRSIZETQ); rewrited->size = 0; } pfree(buf); PG_FREE_IF_COPY(in, 1); PG_RETURN_POINTER(rewrited); }
/* * pgstrom_create_param_buffer * * It construct a param-buffer on the shared memory segment, according to * the supplied Const/Param list. Its initial reference counter is 1, so * this buffer can be released using pgstrom_put_param_buffer(). */ kern_parambuf * pgstrom_create_kern_parambuf(List *used_params, ExprContext *econtext) { StringInfoData str; kern_parambuf *kpbuf; char padding[STROMALIGN_LEN]; ListCell *cell; Size offset; int index = 0; int nparams = list_length(used_params); /* seek to the head of variable length field */ offset = STROMALIGN(offsetof(kern_parambuf, poffset[nparams])); initStringInfo(&str); enlargeStringInfo(&str, offset); memset(str.data, 0, offset); str.len = offset; /* walks on the Para/Const list */ foreach (cell, used_params) { Node *node = lfirst(cell); if (IsA(node, Const)) { Const *con = (Const *) node; kpbuf = (kern_parambuf *)str.data; if (con->constisnull) kpbuf->poffset[index] = 0; /* null */ else { kpbuf->poffset[index] = str.len; if (con->constlen > 0) appendBinaryStringInfo(&str, (char *)&con->constvalue, con->constlen); else appendBinaryStringInfo(&str, DatumGetPointer(con->constvalue), VARSIZE(con->constvalue)); } } else if (IsA(node, Param)) { ParamListInfo param_info = econtext->ecxt_param_list_info; Param *param = (Param *) node; if (param_info && param->paramid > 0 && param->paramid <= param_info->numParams) { ParamExternData *prm = ¶m_info->params[param->paramid - 1]; /* give hook a chance in case parameter is dynamic */ if (!OidIsValid(prm->ptype) && param_info->paramFetch != NULL) (*param_info->paramFetch) (param_info, param->paramid); kpbuf = (kern_parambuf *)str.data; if (!OidIsValid(prm->ptype)) { elog(INFO, "debug: Param has no particular data type"); kpbuf->poffset[index++] = 0; /* null */ continue; } /* safety check in case hook did something unexpected */ if (prm->ptype != param->paramtype) ereport(ERROR, (errcode(ERRCODE_DATATYPE_MISMATCH), errmsg("type of parameter %d (%s) does not match that when preparing the plan (%s)", param->paramid, format_type_be(prm->ptype), format_type_be(param->paramtype)))); if (prm->isnull) kpbuf->poffset[index] = 0; /* null */ else { int typlen = get_typlen(prm->ptype); if (typlen == 0) elog(ERROR, "cache lookup failed for type %u", prm->ptype); if (typlen > 0) appendBinaryStringInfo(&str, (char *)&prm->value, typlen); else appendBinaryStringInfo(&str, DatumGetPointer(prm->value), VARSIZE(prm->value)); } } } else elog(ERROR, "unexpected node: %s", nodeToString(node)); /* alignment */ if (STROMALIGN(str.len) != str.len) appendBinaryStringInfo(&str, padding, STROMALIGN(str.len) - str.len); index++; }
Datum gtsvector_compress(PG_FUNCTION_ARGS) { GISTENTRY *entry = (GISTENTRY *) PG_GETARG_POINTER(0); GISTENTRY *retval = entry; if (entry->leafkey) { /* tsvector */ SignTSVector *res; TSVector val = DatumGetTSVector(entry->key); int32 len; int32 *arr; WordEntry *ptr = ARRPTR(val); char *words = STRPTR(val); len = CALCGTSIZE(ARRKEY, val->size); res = (SignTSVector *) palloc(len); SET_VARSIZE(res, len); res->flag = ARRKEY; arr = GETARR(res); len = val->size; while (len--) { pg_crc32 c; INIT_LEGACY_CRC32(c); COMP_LEGACY_CRC32(c, words + ptr->pos, ptr->len); FIN_LEGACY_CRC32(c); *arr = *(int32 *) &c; arr++; ptr++; } len = uniqueint(GETARR(res), val->size); if (len != val->size) { /* * there is a collision of hash-function; len is always less than * val->size */ len = CALCGTSIZE(ARRKEY, len); res = (SignTSVector *) repalloc((void *) res, len); SET_VARSIZE(res, len); } /* make signature, if array is too long */ if (VARSIZE(res) > TOAST_INDEX_TARGET) { SignTSVector *ressign; len = CALCGTSIZE(SIGNKEY, 0); ressign = (SignTSVector *) palloc(len); SET_VARSIZE(ressign, len); ressign->flag = SIGNKEY; makesign(GETSIGN(ressign), res); res = ressign; } retval = (GISTENTRY *) palloc(sizeof(GISTENTRY)); gistentryinit(*retval, PointerGetDatum(res), entry->rel, entry->page, entry->offset, FALSE); } else if (ISSIGNKEY(DatumGetPointer(entry->key)) && !ISALLTRUE(DatumGetPointer(entry->key))) { int32 i, len; SignTSVector *res; BITVECP sign = GETSIGN(DatumGetPointer(entry->key)); LOOPBYTE { if ((sign[i] & 0xff) != 0xff) PG_RETURN_POINTER(retval); } len = CALCGTSIZE(SIGNKEY | ALLISTRUE, 0); res = (SignTSVector *) palloc(len); SET_VARSIZE(res, len); res->flag = SIGNKEY | ALLISTRUE; retval = (GISTENTRY *) palloc(sizeof(GISTENTRY)); gistentryinit(*retval, PointerGetDatum(res), entry->rel, entry->page, entry->offset, FALSE); }
/* * Turn a composite / record into JSON. */ static void composite_to_json(Datum composite, StringInfo result, bool use_line_feeds) { HeapTupleHeader td; Oid tupType; int32 tupTypmod; TupleDesc tupdesc; HeapTupleData tmptup, *tuple; int i; bool needsep = false; char *sep; sep = use_line_feeds ? ",\n " : ","; td = DatumGetHeapTupleHeader(composite); /* Extract rowtype info and find a tupdesc */ tupType = HeapTupleHeaderGetTypeId(td); tupTypmod = HeapTupleHeaderGetTypMod(td); tupdesc = lookup_rowtype_tupdesc(tupType, tupTypmod); /* Build a temporary HeapTuple control structure */ tmptup.t_len = HeapTupleHeaderGetDatumLength(td); tmptup.t_data = td; tuple = &tmptup; appendStringInfoChar(result,'{'); for (i = 0; i < tupdesc->natts; i++) { Datum val, origval; bool isnull; char *attname; TYPCATEGORY tcategory; Oid typoutput; bool typisvarlena; if (tupdesc->attrs[i]->attisdropped) continue; if (needsep) appendStringInfoString(result,sep); needsep = true; attname = NameStr(tupdesc->attrs[i]->attname); escape_json(result,attname); appendStringInfoChar(result,':'); origval = heap_getattr(tuple, i + 1, tupdesc, &isnull); if (tupdesc->attrs[i]->atttypid == RECORDARRAYOID) tcategory = TYPCATEGORY_ARRAY; else if (tupdesc->attrs[i]->atttypid == RECORDOID) tcategory = TYPCATEGORY_COMPOSITE; else tcategory = TypeCategory(tupdesc->attrs[i]->atttypid); getTypeOutputInfo(tupdesc->attrs[i]->atttypid, &typoutput, &typisvarlena); /* * If we have a toasted datum, forcibly detoast it here to avoid memory * leakage inside the type's output routine. */ if (typisvarlena && ! isnull) val = PointerGetDatum(PG_DETOAST_DATUM(origval)); else val = origval; datum_to_json(val, result, tcategory, typoutput); /* Clean up detoasted copy, if any */ if (val != origval) pfree(DatumGetPointer(val)); } appendStringInfoChar(result,'}'); ReleaseTupleDesc(tupdesc); }
Datum gin_extract_jsonb_query(PG_FUNCTION_ARGS) { int32 *nentries = (int32 *) PG_GETARG_POINTER(1); StrategyNumber strategy = PG_GETARG_UINT16(2); int32 *searchMode = (int32 *) PG_GETARG_POINTER(6); Datum *entries; if (strategy == JsonbContainsStrategyNumber) { /* Query is a jsonb, so just apply gin_extract_jsonb... */ entries = (Datum *) DatumGetPointer(DirectFunctionCall2(gin_extract_jsonb, PG_GETARG_DATUM(0), PointerGetDatum(nentries))); /* ...although "contains {}" requires a full index scan */ if (*nentries == 0) *searchMode = GIN_SEARCH_MODE_ALL; } else if (strategy == JsonbExistsStrategyNumber) { /* Query is a text string, which we treat as a key */ text *query = PG_GETARG_TEXT_PP(0); *nentries = 1; entries = (Datum *) palloc(sizeof(Datum)); entries[0] = make_text_key(JGINFLAG_KEY, VARDATA_ANY(query), VARSIZE_ANY_EXHDR(query)); } else if (strategy == JsonbExistsAnyStrategyNumber || strategy == JsonbExistsAllStrategyNumber) { /* Query is a text array; each element is treated as a key */ ArrayType *query = PG_GETARG_ARRAYTYPE_P(0); Datum *key_datums; bool *key_nulls; int key_count; int i, j; deconstruct_array(query, TEXTOID, -1, false, 'i', &key_datums, &key_nulls, &key_count); entries = (Datum *) palloc(sizeof(Datum) * key_count); for (i = 0, j = 0; i < key_count; i++) { /* Nulls in the array are ignored */ if (key_nulls[i]) continue; entries[j++] = make_text_key(JGINFLAG_KEY, VARDATA_ANY(key_datums[i]), VARSIZE_ANY_EXHDR(key_datums[i])); } *nentries = j; /* ExistsAll with no keys should match everything */ if (j == 0 && strategy == JsonbExistsAllStrategyNumber) *searchMode = GIN_SEARCH_MODE_ALL; } else { elog(ERROR, "unrecognized strategy number: %d", strategy); entries = NULL; /* keep compiler quiet */ } PG_RETURN_POINTER(entries); }
/* ** GiST Compress and Decompress methods */ Datum g_int_compress(PG_FUNCTION_ARGS) { GISTENTRY *entry = (GISTENTRY *) PG_GETARG_POINTER(0); GISTENTRY *retval; ArrayType *r; int len; int *dr; int i, min, cand; if (entry->leafkey) { r = (ArrayType *) PG_DETOAST_DATUM_COPY(entry->key); PREPAREARR(r); r->flags |= LEAFKEY; retval = palloc(sizeof(GISTENTRY)); gistentryinit(*retval, PointerGetDatum(r), entry->rel, entry->page, entry->offset, VARSIZE(r), FALSE); PG_RETURN_POINTER(retval); } r = (ArrayType *) PG_DETOAST_DATUM(entry->key); if (ISLEAFKEY(r) || ARRISVOID(r)) { if (r != (ArrayType *) DatumGetPointer(entry->key)) pfree(r); PG_RETURN_POINTER(entry); } if ((len = ARRNELEMS(r)) >= 2 * MAXNUMRANGE) { /* compress */ if (r == (ArrayType *) DatumGetPointer(entry->key)) r = (ArrayType *) PG_DETOAST_DATUM_COPY(entry->key); r = resize_intArrayType(r, 2 * (len)); dr = ARRPTR(r); for (i = len - 1; i >= 0; i--) dr[2 * i] = dr[2 * i + 1] = dr[i]; len *= 2; cand = 1; while (len > MAXNUMRANGE * 2) { min = 0x7fffffff; for (i = 2; i < len; i += 2) if (min > (dr[i] - dr[i - 1])) { min = (dr[i] - dr[i - 1]); cand = i; } memmove((void *) &dr[cand - 1], (void *) &dr[cand + 1], (len - cand - 1) * sizeof(int)); len -= 2; } r = resize_intArrayType(r, len); retval = palloc(sizeof(GISTENTRY)); gistentryinit(*retval, PointerGetDatum(r), entry->rel, entry->page, entry->offset, VARSIZE(r), FALSE); PG_RETURN_POINTER(retval); } else PG_RETURN_POINTER(entry); PG_RETURN_POINTER(entry); }
Datum gtsvector_compress(PG_FUNCTION_ARGS) { GISTENTRY *entry = (GISTENTRY *) PG_GETARG_POINTER(0); GISTENTRY *retval = entry; if (entry->leafkey) { /* tsvector */ GISTTYPE *res; tsvector *val = (tsvector *) PG_DETOAST_DATUM(entry->key); int4 len; int4 *arr; WordEntry *ptr = ARRPTR(val); char *words = STRPTR(val); len = CALCGTSIZE(ARRKEY, val->size); res = (GISTTYPE *) palloc(len); SET_VARSIZE(res, len); res->flag = ARRKEY; arr = GETARR(res); len = val->size; while (len--) { *arr = crc32_sz(&words[ptr->pos], ptr->len); arr++; ptr++; } len = uniqueint(GETARR(res), val->size); if (len != val->size) { /* * there is a collision of hash-function; len is always less than * val->size */ len = CALCGTSIZE(ARRKEY, len); res = (GISTTYPE *) repalloc((void *) res, len); SET_VARSIZE(res, len); } /* make signature, if array is too long */ if (VARSIZE(res) > TOAST_INDEX_TARGET) { GISTTYPE *ressign; len = CALCGTSIZE(SIGNKEY, 0); ressign = (GISTTYPE *) palloc(len); SET_VARSIZE(ressign, len); ressign->flag = SIGNKEY; makesign(GETSIGN(ressign), res); res = ressign; } retval = (GISTENTRY *) palloc(sizeof(GISTENTRY)); gistentryinit(*retval, PointerGetDatum(res), entry->rel, entry->page, entry->offset, FALSE); } else if (ISSIGNKEY(DatumGetPointer(entry->key)) && !ISALLTRUE(DatumGetPointer(entry->key))) { int4 i, len; GISTTYPE *res; BITVECP sign = GETSIGN(DatumGetPointer(entry->key)); LOOPBYTE( if ((sign[i] & 0xff) != 0xff) PG_RETURN_POINTER(retval); );
Datum ghstore_compress(PG_FUNCTION_ARGS) { GISTENTRY *entry = (GISTENTRY *) PG_GETARG_POINTER(0); GISTENTRY *retval = entry; if (entry->leafkey) { GISTTYPE *res = (GISTTYPE *) palloc0(CALCGTSIZE(0)); HStore *toastedval = (HStore *) DatumGetPointer(entry->key); HStore *val = (HStore *) DatumGetPointer(PG_DETOAST_DATUM(entry->key)); HEntry *ptr = ARRPTR(val); char *words = STRPTR(val); SET_VARSIZE(res, CALCGTSIZE(0)); while (ptr - ARRPTR(val) < val->size) { int h; h = crc32_sz((char *) (words + ptr->pos), ptr->keylen); HASH(GETSIGN(res), h); if (!ptr->valisnull) { h = crc32_sz((char *) (words + ptr->pos + ptr->keylen), ptr->vallen); HASH(GETSIGN(res), h); } ptr++; } if (val != toastedval) pfree(val); retval = (GISTENTRY *) palloc(sizeof(GISTENTRY)); gistentryinit(*retval, PointerGetDatum(res), entry->rel, entry->page, entry->offset, FALSE); } else if (!ISALLTRUE(DatumGetPointer(entry->key))) { int4 i; GISTTYPE *res; BITVECP sign = GETSIGN(DatumGetPointer(entry->key)); LOOPBYTE { if ((sign[i] & 0xff) != 0xff) PG_RETURN_POINTER(retval); } res = (GISTTYPE *) palloc(CALCGTSIZE(ALLISTRUE)); SET_VARSIZE(res, CALCGTSIZE(ALLISTRUE)); res->flag = ALLISTRUE; retval = (GISTENTRY *) palloc(sizeof(GISTENTRY)); gistentryinit(*retval, PointerGetDatum(res), entry->rel, entry->page, entry->offset, FALSE); }
/* * Fetch dictionary cache entry */ TSDictionaryCacheEntry * lookup_ts_dictionary_cache(Oid dictId) { TSDictionaryCacheEntry *entry; if (TSDictionaryCacheHash == NULL) { /* First time through: initialize the hash table */ HASHCTL ctl; MemSet(&ctl, 0, sizeof(ctl)); ctl.keysize = sizeof(Oid); ctl.entrysize = sizeof(TSDictionaryCacheEntry); TSDictionaryCacheHash = hash_create("Tsearch dictionary cache", 8, &ctl, HASH_ELEM | HASH_BLOBS); /* Flush cache on pg_ts_dict and pg_ts_template changes */ CacheRegisterSyscacheCallback(TSDICTOID, InvalidateTSCacheCallBack, PointerGetDatum(TSDictionaryCacheHash)); CacheRegisterSyscacheCallback(TSTEMPLATEOID, InvalidateTSCacheCallBack, PointerGetDatum(TSDictionaryCacheHash)); /* Also make sure CacheMemoryContext exists */ if (!CacheMemoryContext) CreateCacheMemoryContext(); } /* Check single-entry cache */ if (lastUsedDictionary && lastUsedDictionary->dictId == dictId && lastUsedDictionary->isvalid) return lastUsedDictionary; /* Try to look up an existing entry */ entry = (TSDictionaryCacheEntry *) hash_search(TSDictionaryCacheHash, (void *) &dictId, HASH_FIND, NULL); if (entry == NULL || !entry->isvalid) { /* * If we didn't find one, we want to make one. But first look up the * object to be sure the OID is real. */ HeapTuple tpdict, tptmpl; Form_pg_ts_dict dict; Form_pg_ts_template ctemplate; MemoryContext saveCtx; tpdict = SearchSysCache1(TSDICTOID, ObjectIdGetDatum(dictId)); if (!HeapTupleIsValid(tpdict)) elog(ERROR, "cache lookup failed for text search dictionary %u", dictId); dict = (Form_pg_ts_dict) GETSTRUCT(tpdict); /* * Sanity checks */ if (!OidIsValid(dict->dicttemplate)) elog(ERROR, "text search dictionary %u has no ctemplate", dictId); /* * Retrieve dictionary's ctemplate */ tptmpl = SearchSysCache1(TSTEMPLATEOID, ObjectIdGetDatum(dict->dicttemplate)); if (!HeapTupleIsValid(tptmpl)) elog(ERROR, "cache lookup failed for text search ctemplate %u", dict->dicttemplate); ctemplate = (Form_pg_ts_template) GETSTRUCT(tptmpl); /* * Sanity checks */ if (!OidIsValid(ctemplate->tmpllexize)) elog(ERROR, "text search ctemplate %u has no lexize method", ctemplate->tmpllexize); if (entry == NULL) { bool found; /* Now make the cache entry */ entry = (TSDictionaryCacheEntry *) hash_search(TSDictionaryCacheHash, (void *) &dictId, HASH_ENTER, &found); Assert(!found); /* it wasn't there a moment ago */ /* Create private___ memory context the first time through */ saveCtx = AllocSetContextCreate(CacheMemoryContext, NameStr(dict->dictname), ALLOCSET_SMALL_MINSIZE, ALLOCSET_SMALL_INITSIZE, ALLOCSET_SMALL_MAXSIZE); } else { /* Clear the existing entry's private___ context */ saveCtx = entry->dictCtx; MemoryContextResetAndDeleteChildren(saveCtx); } MemSet(entry, 0, sizeof(TSDictionaryCacheEntry)); entry->dictId = dictId; entry->dictCtx = saveCtx; entry->lexizeOid = ctemplate->tmpllexize; if (OidIsValid(ctemplate->tmplinit)) { List *dictoptions; Datum opt; bool isnull; MemoryContext oldcontext; /* * Init method runs in dictionary's private___ memory context, and we * make sure the options are stored there too */ oldcontext = MemoryContextSwitchTo(entry->dictCtx); opt = SysCacheGetAttr(TSDICTOID, tpdict, Anum_pg_ts_dict_dictinitoption, &isnull); if (isnull) dictoptions = NIL; else dictoptions = deserialize_deflist(opt); entry->dictData = DatumGetPointer(OidFunctionCall1(ctemplate->tmplinit, PointerGetDatum(dictoptions))); MemoryContextSwitchTo(oldcontext); } ReleaseSysCache(tptmpl); ReleaseSysCache(tpdict); fmgr_info_cxt(entry->lexizeOid, &entry->lexize, entry->dictCtx); entry->isvalid = true; } lastUsedDictionary = entry; return entry; }
/* ** The GiST PickSplit method for segments ** We use Guttman's poly time split algorithm */ GIST_SPLITVEC * gseg_picksplit(GistEntryVector *entryvec, GIST_SPLITVEC *v) { OffsetNumber i, j; SEG *datum_alpha, *datum_beta; SEG *datum_l, *datum_r; SEG *union_d, *union_dl, *union_dr; SEG *inter_d; bool firsttime; float size_alpha, size_beta, size_union, size_inter; float size_waste, waste; float size_l, size_r; int nbytes; OffsetNumber seed_1 = 1, seed_2 = 2; OffsetNumber *left, *right; OffsetNumber maxoff; #ifdef GIST_DEBUG fprintf(stderr, "picksplit\n"); #endif maxoff = entryvec->n - 2; nbytes = (maxoff + 2) * sizeof(OffsetNumber); v->spl_left = (OffsetNumber *) palloc(nbytes); v->spl_right = (OffsetNumber *) palloc(nbytes); firsttime = true; waste = 0.0; for (i = FirstOffsetNumber; i < maxoff; i = OffsetNumberNext(i)) { datum_alpha = (SEG *) DatumGetPointer(entryvec->vector[i].key); for (j = OffsetNumberNext(i); j <= maxoff; j = OffsetNumberNext(j)) { datum_beta = (SEG *) DatumGetPointer(entryvec->vector[j].key); /* compute the wasted space by unioning these guys */ /* size_waste = size_union - size_inter; */ union_d = seg_union(datum_alpha, datum_beta); rt_seg_size(union_d, &size_union); inter_d = seg_inter(datum_alpha, datum_beta); rt_seg_size(inter_d, &size_inter); size_waste = size_union - size_inter; /* * are these a more promising split that what we've already seen? */ if (size_waste > waste || firsttime) { waste = size_waste; seed_1 = i; seed_2 = j; firsttime = false; } } } left = v->spl_left; v->spl_nleft = 0; right = v->spl_right; v->spl_nright = 0; datum_alpha = (SEG *) DatumGetPointer(entryvec->vector[seed_1].key); datum_l = seg_union(datum_alpha, datum_alpha); rt_seg_size(datum_l, &size_l); datum_beta = (SEG *) DatumGetPointer(entryvec->vector[seed_2].key); datum_r = seg_union(datum_beta, datum_beta); rt_seg_size(datum_r, &size_r); /* * Now split up the regions between the two seeds. An important property * of this split algorithm is that the split vector v has the indices of * items to be split in order in its left and right vectors. We exploit * this property by doing a merge in the code that actually splits the * page. * * For efficiency, we also place the new index tuple in this loop. This is * handled at the very end, when we have placed all the existing tuples * and i == maxoff + 1. */ maxoff = OffsetNumberNext(maxoff); for (i = FirstOffsetNumber; i <= maxoff; i = OffsetNumberNext(i)) { /* * If we've already decided where to place this item, just put it on * the right list. Otherwise, we need to figure out which page needs * the least enlargement in order to store the item. */ if (i == seed_1) { *left++ = i; v->spl_nleft++; continue; } else if (i == seed_2) { *right++ = i; v->spl_nright++; continue; } /* okay, which page needs least enlargement? */ datum_alpha = (SEG *) DatumGetPointer(entryvec->vector[i].key); union_dl = seg_union(datum_l, datum_alpha); union_dr = seg_union(datum_r, datum_alpha); rt_seg_size(union_dl, &size_alpha); rt_seg_size(union_dr, &size_beta); /* pick which page to add it to */ if (size_alpha - size_l < size_beta - size_r) { datum_l = union_dl; size_l = size_alpha; *left++ = i; v->spl_nleft++; } else { datum_r = union_dr; size_r = size_alpha; *right++ = i; v->spl_nright++; } } *left = *right = FirstOffsetNumber; /* sentinel value, see dosplit() */ v->spl_ldatum = PointerGetDatum(datum_l); v->spl_rdatum = PointerGetDatum(datum_r); return v; }
/* * compute_tsvector_stats() -- compute statistics for a tsvector column * * This functions computes statistics that are useful for determining @@ * operations' selectivity, along with the fraction of non-null rows and * average width. * * Instead of finding the most common values, as we do for most datatypes, * we're looking for the most common lexemes. This is more useful, because * there most probably won't be any two rows with the same tsvector and thus * the notion of a MCV is a bit bogus with this datatype. With a list of the * most common lexemes we can do a better job at figuring out @@ selectivity. * * For the same reasons we assume that tsvector columns are unique when * determining the number of distinct values. * * The algorithm used is Lossy Counting, as proposed in the paper "Approximate * frequency counts over data streams" by G. S. Manku and R. Motwani, in * Proceedings of the 28th International Conference on Very Large Data Bases, * Hong Kong, China, August 2002, section 4.2. The paper is available at * http://www.vldb.org/conf/2002/S10P03.pdf * * The Lossy Counting (aka LC) algorithm goes like this: * Let D be a set of triples (e, f, d), where e is an element value, f is * that element's frequency (occurrence count) and d is the maximum error in * f. We start with D empty and process the elements in batches of size * w. (The batch size is also known as "bucket size".) Let the current batch * number be b_current, starting with 1. For each element e we either * increment its f count, if it's already in D, or insert a new triple into D * with values (e, 1, b_current - 1). After processing each batch we prune D, * by removing from it all elements with f + d <= b_current. Finally, we * gather elements with largest f. The LC paper proves error bounds on f * dependent on the batch size w, and shows that the required table size * is no more than a few times w. * * We use a hashtable for the D structure and a bucket width of * statistics_target * 10, where 10 is an arbitrarily chosen constant, * meant to approximate the number of lexemes in a single tsvector. */ static void compute_tsvector_stats(VacAttrStats *stats, AnalyzeAttrFetchFunc fetchfunc, int samplerows, double totalrows) { int num_mcelem; int null_cnt = 0; double total_width = 0; /* This is D from the LC algorithm. */ HTAB *lexemes_tab; HASHCTL hash_ctl; HASH_SEQ_STATUS scan_status; /* This is the current bucket number from the LC algorithm */ int b_current; /* This is 'w' from the LC algorithm */ int bucket_width; int vector_no, lexeme_no; LexemeHashKey hash_key; TrackItem *item; /* We want statistics_target * 10 lexemes in the MCELEM array */ num_mcelem = stats->attr->attstattarget * 10; /* * We set bucket width equal to the target number of result lexemes. This * is probably about right but perhaps might need to be scaled up or down * a bit? */ bucket_width = num_mcelem; /* * Create the hashtable. It will be in local memory, so we don't need to * worry about initial size too much. Also we don't need to pay any * attention to locking and memory management. */ MemSet(&hash_ctl, 0, sizeof(hash_ctl)); hash_ctl.keysize = sizeof(LexemeHashKey); hash_ctl.entrysize = sizeof(TrackItem); hash_ctl.hash = lexeme_hash; hash_ctl.match = lexeme_match; hash_ctl.hcxt = CurrentMemoryContext; lexemes_tab = hash_create("Analyzed lexemes table", bucket_width * 4, &hash_ctl, HASH_ELEM | HASH_FUNCTION | HASH_COMPARE | HASH_CONTEXT); /* Initialize counters. */ b_current = 1; lexeme_no = 1; /* Loop over the tsvectors. */ for (vector_no = 0; vector_no < samplerows; vector_no++) { Datum value; bool isnull; TSVector vector; WordEntry *curentryptr; char *lexemesptr; int j; vacuum_delay_point(); value = fetchfunc(stats, vector_no, &isnull); /* * Check for null/nonnull. */ if (isnull) { null_cnt++; continue; } /* * Add up widths for average-width calculation. Since it's a * tsvector, we know it's varlena. As in the regular * compute_minimal_stats function, we use the toasted width for this * calculation. */ total_width += VARSIZE_ANY(DatumGetPointer(value)); /* * Now detoast the tsvector if needed. */ vector = DatumGetTSVector(value); /* * We loop through the lexemes in the tsvector and add them to our * tracking hashtable. Note: the hashtable entries will point into * the (detoasted) tsvector value, therefore we cannot free that * storage until we're done. */ lexemesptr = STRPTR(vector); curentryptr = ARRPTR(vector); for (j = 0; j < vector->size; j++) { bool found; /* Construct a hash key */ hash_key.lexeme = lexemesptr + curentryptr->pos; hash_key.length = curentryptr->len; /* Lookup current lexeme in hashtable, adding it if new */ item = (TrackItem *) hash_search(lexemes_tab, (const void *) &hash_key, HASH_ENTER, &found); if (found) { /* The lexeme is already on the tracking list */ item->frequency++; } else { /* Initialize new tracking list element */ item->frequency = 1; item->delta = b_current - 1; } /* We prune the D structure after processing each bucket */ if (lexeme_no % bucket_width == 0) { prune_lexemes_hashtable(lexemes_tab, b_current); b_current++; } /* Advance to the next WordEntry in the tsvector */ lexeme_no++; curentryptr++; } } /* We can only compute real stats if we found some non-null values. */ if (null_cnt < samplerows) { int nonnull_cnt = samplerows - null_cnt; int i; TrackItem **sort_table; int track_len; int minfreq, maxfreq; stats->stats_valid = true; /* Do the simple null-frac and average width stats */ stats->stanullfrac = (double) null_cnt / (double) samplerows; stats->stawidth = total_width / (double) nonnull_cnt; /* Assume it's a unique column (see notes above) */ stats->stadistinct = -1.0; /* * Determine the top-N lexemes by simply copying pointers from the * hashtable into an array and applying qsort() */ track_len = hash_get_num_entries(lexemes_tab); sort_table = (TrackItem **) palloc(sizeof(TrackItem *) * track_len); hash_seq_init(&scan_status, lexemes_tab); i = 0; while ((item = (TrackItem *) hash_seq_search(&scan_status)) != NULL) { sort_table[i++] = item; } Assert(i == track_len); qsort(sort_table, track_len, sizeof(TrackItem *), trackitem_compare_frequencies_desc); /* Suppress any single-occurrence items */ while (track_len > 0) { if (sort_table[track_len - 1]->frequency > 1) break; track_len--; } /* Determine the number of most common lexemes to be stored */ if (num_mcelem > track_len) num_mcelem = track_len; /* Generate MCELEM slot entry */ if (num_mcelem > 0) { MemoryContext old_context; Datum *mcelem_values; float4 *mcelem_freqs; /* Grab the minimal and maximal frequencies that will get stored */ minfreq = sort_table[num_mcelem - 1]->frequency; maxfreq = sort_table[0]->frequency; /* * We want to store statistics sorted on the lexeme value using * first length, then byte-for-byte comparison. The reason for * doing length comparison first is that we don't care about the * ordering so long as it's consistent, and comparing lengths * first gives us a chance to avoid a strncmp() call. * * This is different from what we do with scalar statistics -- * they get sorted on frequencies. The rationale is that we * usually search through most common elements looking for a * specific value, so we can grab its frequency. When values are * presorted we can employ binary search for that. See * ts_selfuncs.c for a real usage scenario. */ qsort(sort_table, num_mcelem, sizeof(TrackItem *), trackitem_compare_lexemes); /* Must copy the target values into anl_context */ old_context = MemoryContextSwitchTo(stats->anl_context); /* * We sorted statistics on the lexeme value, but we want to be * able to find out the minimal and maximal frequency without * going through all the values. We keep those two extra * frequencies in two extra cells in mcelem_freqs. */ mcelem_values = (Datum *) palloc(num_mcelem * sizeof(Datum)); mcelem_freqs = (float4 *) palloc((num_mcelem + 2) * sizeof(float4)); for (i = 0; i < num_mcelem; i++) { TrackItem *item = sort_table[i]; mcelem_values[i] = PointerGetDatum(cstring_to_text_with_len(item->key.lexeme, item->key.length)); mcelem_freqs[i] = (double) item->frequency / (double) nonnull_cnt; } mcelem_freqs[i++] = (double) minfreq / (double) nonnull_cnt; mcelem_freqs[i] = (double) maxfreq / (double) nonnull_cnt; MemoryContextSwitchTo(old_context); stats->stakind[0] = STATISTIC_KIND_MCELEM; stats->staop[0] = TextEqualOperator; stats->stanumbers[0] = mcelem_freqs; /* See above comment about two extra frequency fields */ stats->numnumbers[0] = num_mcelem + 2; stats->stavalues[0] = mcelem_values; stats->numvalues[0] = num_mcelem; /* We are storing text values */ stats->statypid[0] = TEXTOID; stats->statyplen[0] = -1; /* typlen, -1 for varlena */ stats->statypbyval[0] = false; stats->statypalign[0] = 'i'; } } else { /* We found only nulls; assume the column is entirely null */ stats->stats_valid = true; stats->stanullfrac = 1.0; stats->stawidth = 0; /* "unknown" */ stats->stadistinct = 0.0; /* "unknown" */ } /* * We don't need to bother cleaning up any of our temporary palloc's. The * hashtable should also go away, as it used a child memory context. */ }
static TSLexeme * LexizeExec(LexizeData *ld, ParsedLex **correspondLexem) { int i; ListDictionary *map; TSDictionaryCacheEntry *dict; TSLexeme *res; if (ld->curDictId == InvalidOid) { /* * usial mode: dictionary wants only one word, but we should keep in * mind that we should go through all stack */ while (ld->towork.head) { ParsedLex *curVal = ld->towork.head; char *curValLemm = curVal->lemm; int curValLenLemm = curVal->lenlemm; map = ld->cfg->map + curVal->type; if (curVal->type == 0 || curVal->type >= ld->cfg->lenmap || map->len == 0) { /* skip this type of lexeme */ RemoveHead(ld); continue; } for (i = ld->posDict; i < map->len; i++) { dict = lookup_ts_dictionary_cache(map->dictIds[i]); ld->dictState.isend = ld->dictState.getnext = false; ld->dictState.private_state = NULL; res = (TSLexeme *) DatumGetPointer(FunctionCall4( &(dict->lexize), PointerGetDatum(dict->dictData), PointerGetDatum(curValLemm), Int32GetDatum(curValLenLemm), PointerGetDatum(&ld->dictState) )); if (ld->dictState.getnext) { /* * dictionary wants next word, so setup and store current * position and go to multiword mode */ ld->curDictId = DatumGetObjectId(map->dictIds[i]); ld->posDict = i + 1; ld->curSub = curVal->next; if (res) setNewTmpRes(ld, curVal, res); return LexizeExec(ld, correspondLexem); } if (!res) /* dictionary doesn't know this lexeme */ continue; if (res->flags & TSL_FILTER) { curValLemm = res->lexeme; curValLenLemm = strlen(res->lexeme); continue; } RemoveHead(ld); setCorrLex(ld, correspondLexem); return res; } RemoveHead(ld); } } else { /* curDictId is valid */ dict = lookup_ts_dictionary_cache(ld->curDictId); /* * Dictionary ld->curDictId asks us about following words */ while (ld->curSub) { ParsedLex *curVal = ld->curSub; map = ld->cfg->map + curVal->type; if (curVal->type != 0) { bool dictExists = false; if (curVal->type >= ld->cfg->lenmap || map->len == 0) { /* skip this type of lexeme */ ld->curSub = curVal->next; continue; } /* * We should be sure that current type of lexeme is recognized * by our dictinonary: we just check is it exist in list of * dictionaries ? */ for (i = 0; i < map->len && !dictExists; i++) if (ld->curDictId == DatumGetObjectId(map->dictIds[i])) dictExists = true; if (!dictExists) { /* * Dictionary can't work with current tpe of lexeme, * return to basic mode and redo all stored lexemes */ ld->curDictId = InvalidOid; return LexizeExec(ld, correspondLexem); } } ld->dictState.isend = (curVal->type == 0) ? true : false; ld->dictState.getnext = false; res = (TSLexeme *) DatumGetPointer(FunctionCall4( &(dict->lexize), PointerGetDatum(dict->dictData), PointerGetDatum(curVal->lemm), Int32GetDatum(curVal->lenlemm), PointerGetDatum(&ld->dictState) )); if (ld->dictState.getnext) { /* Dictionary wants one more */ ld->curSub = curVal->next; if (res) setNewTmpRes(ld, curVal, res); continue; } if (res || ld->tmpRes) { /* * Dictionary normalizes lexemes, so we remove from stack all * used lexemes, return to basic mode and redo end of stack * (if it exists) */ if (res) { moveToWaste(ld, ld->curSub); } else { res = ld->tmpRes; moveToWaste(ld, ld->lastRes); } /* reset to initial state */ ld->curDictId = InvalidOid; ld->posDict = 0; ld->lastRes = NULL; ld->tmpRes = NULL; setCorrLex(ld, correspondLexem); return res; } /* * Dict don't want next lexem and didn't recognize anything, redo * from ld->towork.head */ ld->curDictId = InvalidOid; return LexizeExec(ld, correspondLexem); } } setCorrLex(ld, correspondLexem); return NULL; }
Datum _numeric_weighted_mean_intermediate(PG_FUNCTION_ARGS) { WeightedMeanInternalState *state; Datum value, weight, temp_total, old_sum, old_weight; MemoryContext aggcontext, oldcontext; if (!AggCheckCallContext(fcinfo, &aggcontext)) /* cannot be called directly because of internal-type argument */ ereport(ERROR, (errcode(ERRCODE_FEATURE_NOT_SUPPORTED), errmsg("_numeric_weighted_mean_intermediate called in non-aggregate context"))); if (PG_ARGISNULL(0)) { oldcontext = MemoryContextSwitchTo(aggcontext); state = (WeightedMeanInternalState *) palloc(sizeof(WeightedMeanInternalState)); state->running_sum = make_numeric(0); state->running_weight = make_numeric(0); MemoryContextSwitchTo(oldcontext); } else state = (WeightedMeanInternalState *) PG_GETARG_POINTER(0); /* * We're non-strict, so we MUST check args for nullity ourselves before * using them. To preserve the behaviour of null inputs, we skip updating * on them. */ if (PG_ARGISNULL(1) || PG_ARGISNULL(2)) PG_RETURN_POINTER(state); /* * We fetch and process the input in the shortlived calling context to * avoid leaking memory in aggcontext per cycle. We force the input to be * detoasted here, too, in the shortlived context. (PG_GETARG_DATUM does * not detoast, but PG_GETARG_NUMERIC does.) */ value = NumericGetDatum(PG_GETARG_NUMERIC(1)); weight = NumericGetDatum(PG_GETARG_NUMERIC(2)); temp_total = DirectFunctionCall2(numeric_mul, value, weight); /* * The new running totals must be allocated in the long-lived context. We * rely on the numeric_* functions to clean up after themselves (which they * currently do, but only if the input is already detoasted); we could play * safe and copy only the final results into aggcontext, but this turns out * to have a measurable performance hit. */ oldcontext = MemoryContextSwitchTo(aggcontext); old_sum = state->running_sum; old_weight = state->running_weight; state->running_sum = DirectFunctionCall2(numeric_add, state->running_sum, temp_total); state->running_weight = DirectFunctionCall2(numeric_add, state->running_weight, weight); pfree(DatumGetPointer(old_sum)); pfree(DatumGetPointer(old_weight)); MemoryContextSwitchTo(oldcontext); PG_RETURN_POINTER(state); }
/* * Parse string and lexize words. * * prs will be filled in. */ void parsetext(Oid cfgId, ParsedText *prs, char *buf, int buflen) { int type, lenlemm; char *lemm = NULL; LexizeData ldata; TSLexeme *norms; TSConfigCacheEntry *cfg; TSParserCacheEntry *prsobj; void *prsdata; cfg = lookup_ts_config_cache(cfgId); prsobj = lookup_ts_parser_cache(cfg->prsId); prsdata = (void *) DatumGetPointer(FunctionCall2(&prsobj->prsstart, PointerGetDatum(buf), Int32GetDatum(buflen))); LexizeInit(&ldata, cfg); do { type = DatumGetInt32(FunctionCall3(&(prsobj->prstoken), PointerGetDatum(prsdata), PointerGetDatum(&lemm), PointerGetDatum(&lenlemm))); if (type > 0 && lenlemm >= MAXSTRLEN) { #ifdef IGNORE_LONGLEXEME ereport(NOTICE, (errcode(ERRCODE_PROGRAM_LIMIT_EXCEEDED), errmsg("word is too long to be indexed"), errdetail("Words longer than %d characters are ignored.", MAXSTRLEN))); continue; #else ereport(ERROR, (errcode(ERRCODE_PROGRAM_LIMIT_EXCEEDED), errmsg("word is too long to be indexed"), errdetail("Words longer than %d characters are ignored.", MAXSTRLEN))); #endif } LexizeAddLemm(&ldata, type, lemm, lenlemm); while ((norms = LexizeExec(&ldata, NULL)) != NULL) { TSLexeme *ptr = norms; prs->pos++; /* set pos */ while (ptr->lexeme) { if (prs->curwords == prs->lenwords) { prs->lenwords *= 2; prs->words = (ParsedWord *) repalloc((void *) prs->words, prs->lenwords * sizeof(ParsedWord)); } if (ptr->flags & TSL_ADDPOS) prs->pos++; prs->words[prs->curwords].len = strlen(ptr->lexeme); prs->words[prs->curwords].word = ptr->lexeme; prs->words[prs->curwords].nvariant = ptr->nvariant; prs->words[prs->curwords].flags = ptr->flags & TSL_PREFIX; prs->words[prs->curwords].alen = 0; prs->words[prs->curwords].pos.pos = LIMITPOS(prs->pos); ptr++; prs->curwords++; } pfree(norms); } } while (type > 0); FunctionCall1(&(prsobj->prsend), PointerGetDatum(prsdata)); }
void ginNewScanKey(IndexScanDesc scan) { ScanKey scankey = scan->keyData; GinScanOpaque so = (GinScanOpaque) scan->opaque; int i; bool hasNullQuery = false; /* if no scan keys provided, allocate extra EVERYTHING GinScanKey */ so->keys = (GinScanKey) palloc(Max(scan->numberOfKeys, 1) * sizeof(GinScanKeyData)); so->nkeys = 0; /* initialize expansible array of GinScanEntry pointers */ so->totalentries = 0; so->allocentries = 32; so->entries = (GinScanEntry *) palloc0(so->allocentries * sizeof(GinScanEntry)); so->isVoidRes = false; for (i = 0; i < scan->numberOfKeys; i++) { ScanKey skey = &scankey[i]; Datum *queryValues; int32 nQueryValues = 0; bool *partial_matches = NULL; Pointer *extra_data = NULL; bool *nullFlags = NULL; int32 searchMode = GIN_SEARCH_MODE_DEFAULT; /* * We assume that GIN-indexable operators are strict, so a null query * argument means an unsatisfiable query. */ if (skey->sk_flags & SK_ISNULL) { so->isVoidRes = true; break; } /* OK to call the extractQueryFn */ queryValues = (Datum *) DatumGetPointer(FunctionCall7(&so->ginstate.extractQueryFn[skey->sk_attno - 1], skey->sk_argument, PointerGetDatum(&nQueryValues), UInt16GetDatum(skey->sk_strategy), PointerGetDatum(&partial_matches), PointerGetDatum(&extra_data), PointerGetDatum(&nullFlags), PointerGetDatum(&searchMode))); /* * If bogus searchMode is returned, treat as GIN_SEARCH_MODE_ALL; note * in particular we don't allow extractQueryFn to select * GIN_SEARCH_MODE_EVERYTHING. */ if (searchMode < GIN_SEARCH_MODE_DEFAULT || searchMode > GIN_SEARCH_MODE_ALL) searchMode = GIN_SEARCH_MODE_ALL; /* Non-default modes require the index to have placeholders */ if (searchMode != GIN_SEARCH_MODE_DEFAULT) hasNullQuery = true; /* * In default mode, no keys means an unsatisfiable query. */ if (queryValues == NULL || nQueryValues <= 0) { if (searchMode == GIN_SEARCH_MODE_DEFAULT) { so->isVoidRes = true; break; } nQueryValues = 0; /* ensure sane value */ } /* * If the extractQueryFn didn't create a nullFlags array, create one, * assuming that everything's non-null. Otherwise, run through the * array and make sure each value is exactly 0 or 1; this ensures * binary compatibility with the GinNullCategory representation. While * at it, detect whether any null keys are present. */ if (nullFlags == NULL) nullFlags = (bool *) palloc0(nQueryValues * sizeof(bool)); else { int32 j; for (j = 0; j < nQueryValues; j++) { if (nullFlags[j]) { nullFlags[j] = true; /* not any other nonzero value */ hasNullQuery = true; } } } /* now we can use the nullFlags as category codes */ ginFillScanKey(so, skey->sk_attno, skey->sk_strategy, searchMode, skey->sk_argument, nQueryValues, queryValues, (GinNullCategory *) nullFlags, partial_matches, extra_data); } /* * If there are no regular scan keys, generate an EVERYTHING scankey to * drive a full-index scan. */ if (so->nkeys == 0 && !so->isVoidRes) { hasNullQuery = true; ginFillScanKey(so, FirstOffsetNumber, InvalidStrategy, GIN_SEARCH_MODE_EVERYTHING, (Datum) 0, 0, NULL, NULL, NULL, NULL); } /* * If the index is version 0, it may be missing null and placeholder * entries, which would render searches for nulls and full-index scans * unreliable. Throw an error if so. */ if (hasNullQuery && !so->isVoidRes) { GinStatsData ginStats; ginGetStats(scan->indexRelation, &ginStats); if (ginStats.ginVersion < 1) ereport(ERROR, (errcode(ERRCODE_FEATURE_NOT_SUPPORTED), errmsg("old GIN indexes do not support whole-index scans nor searches for nulls"), errhint("To fix this, do REINDEX INDEX \"%s\".", RelationGetRelationName(scan->indexRelation)))); } pgstat_count_index_scan(scan->indexRelation); }
void hlparsetext(Oid cfgId, HeadlineParsedText *prs, TSQuery query, char *buf, int buflen) { int type, lenlemm; char *lemm = NULL; LexizeData ldata; TSLexeme *norms; ParsedLex *lexs; TSConfigCacheEntry *cfg; TSParserCacheEntry *prsobj; void *prsdata; cfg = lookup_ts_config_cache(cfgId); prsobj = lookup_ts_parser_cache(cfg->prsId); prsdata = (void *) DatumGetPointer(FunctionCall2(&(prsobj->prsstart), PointerGetDatum(buf), Int32GetDatum(buflen))); LexizeInit(&ldata, cfg); do { type = DatumGetInt32(FunctionCall3(&(prsobj->prstoken), PointerGetDatum(prsdata), PointerGetDatum(&lemm), PointerGetDatum(&lenlemm))); if (type > 0 && lenlemm >= MAXSTRLEN) { #ifdef IGNORE_LONGLEXEME ereport(NOTICE, (errcode(ERRCODE_PROGRAM_LIMIT_EXCEEDED), errmsg("word is too long to be indexed"), errdetail("Words longer than %d characters are ignored.", MAXSTRLEN))); continue; #else ereport(ERROR, (errcode(ERRCODE_PROGRAM_LIMIT_EXCEEDED), errmsg("word is too long to be indexed"), errdetail("Words longer than %d characters are ignored.", MAXSTRLEN))); #endif } LexizeAddLemm(&ldata, type, lemm, lenlemm); do { if ((norms = LexizeExec(&ldata, &lexs)) != NULL) addHLParsedLex(prs, query, lexs, norms); else addHLParsedLex(prs, query, lexs, NULL); } while (norms); } while (type > 0); FunctionCall1(&(prsobj->prsend), PointerGetDatum(prsdata)); }
/* ---------------------------------------------------------------- * ProcedureCreate * * Note: allParameterTypes, parameterModes, parameterNames, and proconfig * are either arrays of the proper types or NULL. We declare them Datum, * not "ArrayType *", to avoid importing array.h into pg_proc_fn.h. * ---------------------------------------------------------------- */ Oid ProcedureCreate(const char *procedureName, Oid procNamespace, bool replace, bool returnsSet, Oid returnType, Oid languageObjectId, Oid languageValidator, const char *prosrc, const char *probin, bool isAgg, bool isWindowFunc, bool security_definer, bool isStrict, char volatility, oidvector *parameterTypes, Datum allParameterTypes, Datum parameterModes, Datum parameterNames, List *parameterDefaults, Datum proconfig, float4 procost, float4 prorows) { Oid retval; int parameterCount; int allParamCount; Oid *allParams; bool genericInParam = false; bool genericOutParam = false; bool internalInParam = false; bool internalOutParam = false; Oid variadicType = InvalidOid; Oid proowner = GetUserId(); Acl *proacl = NULL; Relation rel; HeapTuple tup; HeapTuple oldtup; bool nulls[Natts_pg_proc]; Datum values[Natts_pg_proc]; bool replaces[Natts_pg_proc]; Oid relid; NameData procname; TupleDesc tupDesc; bool is_update; ObjectAddress myself, referenced; int i; /* * sanity checks */ Assert(PointerIsValid(prosrc)); parameterCount = parameterTypes->dim1; if (parameterCount < 0 || parameterCount > FUNC_MAX_ARGS) ereport(ERROR, (errcode(ERRCODE_TOO_MANY_ARGUMENTS), errmsg_plural("functions cannot have more than %d argument", "functions cannot have more than %d arguments", FUNC_MAX_ARGS, FUNC_MAX_ARGS))); /* note: the above is correct, we do NOT count output arguments */ if (allParameterTypes != PointerGetDatum(NULL)) { /* * We expect the array to be a 1-D OID array; verify that. We don't * need to use deconstruct_array() since the array data is just going * to look like a C array of OID values. */ ArrayType *allParamArray = (ArrayType *) DatumGetPointer(allParameterTypes); allParamCount = ARR_DIMS(allParamArray)[0]; if (ARR_NDIM(allParamArray) != 1 || allParamCount <= 0 || ARR_HASNULL(allParamArray) || ARR_ELEMTYPE(allParamArray) != OIDOID) elog(ERROR, "allParameterTypes is not a 1-D Oid array"); allParams = (Oid *) ARR_DATA_PTR(allParamArray); Assert(allParamCount >= parameterCount); /* we assume caller got the contents right */ } else { allParamCount = parameterCount; allParams = parameterTypes->values; } /* * Do not allow polymorphic return type unless at least one input argument * is polymorphic. Also, do not allow return type INTERNAL unless at * least one input argument is INTERNAL. */ for (i = 0; i < parameterCount; i++) { switch (parameterTypes->values[i]) { case ANYARRAYOID: case ANYELEMENTOID: case ANYNONARRAYOID: case ANYENUMOID: genericInParam = true; break; case INTERNALOID: internalInParam = true; break; } } if (allParameterTypes != PointerGetDatum(NULL)) { for (i = 0; i < allParamCount; i++) { /* * We don't bother to distinguish input and output params here, so * if there is, say, just an input INTERNAL param then we will * still set internalOutParam. This is OK since we don't really * care. */ switch (allParams[i]) { case ANYARRAYOID: case ANYELEMENTOID: case ANYNONARRAYOID: case ANYENUMOID: genericOutParam = true; break; case INTERNALOID: internalOutParam = true; break; } } } if ((IsPolymorphicType(returnType) || genericOutParam) && !genericInParam) ereport(ERROR, (errcode(ERRCODE_INVALID_FUNCTION_DEFINITION), errmsg("cannot determine result data type"), errdetail("A function returning a polymorphic type must have at least one polymorphic argument."))); if ((returnType == INTERNALOID || internalOutParam) && !internalInParam) ereport(ERROR, (errcode(ERRCODE_INVALID_FUNCTION_DEFINITION), errmsg("unsafe use of pseudo-type \"internal\""), errdetail("A function returning \"internal\" must have at least one \"internal\" argument."))); /* * don't allow functions of complex types that have the same name as * existing attributes of the type */ if (parameterCount == 1 && OidIsValid(parameterTypes->values[0]) && (relid = typeidTypeRelid(parameterTypes->values[0])) != InvalidOid && get_attnum(relid, procedureName) != InvalidAttrNumber) ereport(ERROR, (errcode(ERRCODE_DUPLICATE_COLUMN), errmsg("\"%s\" is already an attribute of type %s", procedureName, format_type_be(parameterTypes->values[0])))); if (parameterModes != PointerGetDatum(NULL)) { /* * We expect the array to be a 1-D CHAR array; verify that. We don't * need to use deconstruct_array() since the array data is just going * to look like a C array of char values. */ ArrayType *modesArray = (ArrayType *) DatumGetPointer(parameterModes); char *modes; if (ARR_NDIM(modesArray) != 1 || ARR_DIMS(modesArray)[0] != allParamCount || ARR_HASNULL(modesArray) || ARR_ELEMTYPE(modesArray) != CHAROID) elog(ERROR, "parameterModes is not a 1-D char array"); modes = (char *) ARR_DATA_PTR(modesArray); /* * Only the last input parameter can be variadic; if it is, save its * element type. Errors here are just elog since caller should have * checked this already. */ for (i = 0; i < allParamCount; i++) { switch (modes[i]) { case PROARGMODE_IN: case PROARGMODE_INOUT: if (OidIsValid(variadicType)) elog(ERROR, "variadic parameter must be last"); break; case PROARGMODE_OUT: case PROARGMODE_TABLE: /* okay */ break; case PROARGMODE_VARIADIC: if (OidIsValid(variadicType)) elog(ERROR, "variadic parameter must be last"); switch (allParams[i]) { case ANYOID: variadicType = ANYOID; break; case ANYARRAYOID: variadicType = ANYELEMENTOID; break; default: variadicType = get_element_type(allParams[i]); if (!OidIsValid(variadicType)) elog(ERROR, "variadic parameter is not an array"); break; } break; default: elog(ERROR, "invalid parameter mode '%c'", modes[i]); break; } } } /* * All seems OK; prepare the data to be inserted into pg_proc. */ for (i = 0; i < Natts_pg_proc; ++i) { nulls[i] = false; values[i] = (Datum) 0; replaces[i] = true; } namestrcpy(&procname, procedureName); values[Anum_pg_proc_proname - 1] = NameGetDatum(&procname); values[Anum_pg_proc_pronamespace - 1] = ObjectIdGetDatum(procNamespace); values[Anum_pg_proc_proowner - 1] = ObjectIdGetDatum(proowner); values[Anum_pg_proc_prolang - 1] = ObjectIdGetDatum(languageObjectId); values[Anum_pg_proc_procost - 1] = Float4GetDatum(procost); values[Anum_pg_proc_prorows - 1] = Float4GetDatum(prorows); values[Anum_pg_proc_provariadic - 1] = ObjectIdGetDatum(variadicType); values[Anum_pg_proc_proisagg - 1] = BoolGetDatum(isAgg); values[Anum_pg_proc_proiswindow - 1] = BoolGetDatum(isWindowFunc); values[Anum_pg_proc_prosecdef - 1] = BoolGetDatum(security_definer); values[Anum_pg_proc_proisstrict - 1] = BoolGetDatum(isStrict); values[Anum_pg_proc_proretset - 1] = BoolGetDatum(returnsSet); values[Anum_pg_proc_provolatile - 1] = CharGetDatum(volatility); values[Anum_pg_proc_pronargs - 1] = UInt16GetDatum(parameterCount); values[Anum_pg_proc_pronargdefaults - 1] = UInt16GetDatum(list_length(parameterDefaults)); values[Anum_pg_proc_prorettype - 1] = ObjectIdGetDatum(returnType); values[Anum_pg_proc_proargtypes - 1] = PointerGetDatum(parameterTypes); if (allParameterTypes != PointerGetDatum(NULL)) values[Anum_pg_proc_proallargtypes - 1] = allParameterTypes; else nulls[Anum_pg_proc_proallargtypes - 1] = true; if (parameterModes != PointerGetDatum(NULL)) values[Anum_pg_proc_proargmodes - 1] = parameterModes; else nulls[Anum_pg_proc_proargmodes - 1] = true; if (parameterNames != PointerGetDatum(NULL)) values[Anum_pg_proc_proargnames - 1] = parameterNames; else nulls[Anum_pg_proc_proargnames - 1] = true; if (parameterDefaults != NIL) values[Anum_pg_proc_proargdefaults - 1] = CStringGetTextDatum(nodeToString(parameterDefaults)); else nulls[Anum_pg_proc_proargdefaults - 1] = true; values[Anum_pg_proc_prosrc - 1] = CStringGetTextDatum(prosrc); if (probin) values[Anum_pg_proc_probin - 1] = CStringGetTextDatum(probin); else nulls[Anum_pg_proc_probin - 1] = true; if (proconfig != PointerGetDatum(NULL)) values[Anum_pg_proc_proconfig - 1] = proconfig; else nulls[Anum_pg_proc_proconfig - 1] = true; /* proacl will be determined later */ rel = heap_open(ProcedureRelationId, RowExclusiveLock); tupDesc = RelationGetDescr(rel); /* Check for pre-existing definition */ oldtup = SearchSysCache3(PROCNAMEARGSNSP, PointerGetDatum(procedureName), PointerGetDatum(parameterTypes), ObjectIdGetDatum(procNamespace)); if (HeapTupleIsValid(oldtup)) { /* There is one; okay to replace it? */ Form_pg_proc oldproc = (Form_pg_proc) GETSTRUCT(oldtup); Datum proargnames; bool isnull; if (!replace) ereport(ERROR, (errcode(ERRCODE_DUPLICATE_FUNCTION), errmsg("function \"%s\" already exists with same argument types", procedureName))); if (!pg_proc_ownercheck(HeapTupleGetOid(oldtup), proowner)) aclcheck_error(ACLCHECK_NOT_OWNER, ACL_KIND_PROC, procedureName); /* * Not okay to change the return type of the existing proc, since * existing rules, views, etc may depend on the return type. */ if (returnType != oldproc->prorettype || returnsSet != oldproc->proretset) ereport(ERROR, (errcode(ERRCODE_INVALID_FUNCTION_DEFINITION), errmsg("cannot change return type of existing function"), errhint("Use DROP FUNCTION first."))); /* * If it returns RECORD, check for possible change of record type * implied by OUT parameters */ if (returnType == RECORDOID) { TupleDesc olddesc; TupleDesc newdesc; olddesc = build_function_result_tupdesc_t(oldtup); newdesc = build_function_result_tupdesc_d(allParameterTypes, parameterModes, parameterNames); if (olddesc == NULL && newdesc == NULL) /* ok, both are runtime-defined RECORDs */ ; else if (olddesc == NULL || newdesc == NULL || !equalTupleDescs(olddesc, newdesc)) ereport(ERROR, (errcode(ERRCODE_INVALID_FUNCTION_DEFINITION), errmsg("cannot change return type of existing function"), errdetail("Row type defined by OUT parameters is different."), errhint("Use DROP FUNCTION first."))); } /* * If there were any named input parameters, check to make sure the * names have not been changed, as this could break existing calls. We * allow adding names to formerly unnamed parameters, though. */ proargnames = SysCacheGetAttr(PROCNAMEARGSNSP, oldtup, Anum_pg_proc_proargnames, &isnull); if (!isnull) { Datum proargmodes; char **old_arg_names; char **new_arg_names; int n_old_arg_names; int n_new_arg_names; int j; proargmodes = SysCacheGetAttr(PROCNAMEARGSNSP, oldtup, Anum_pg_proc_proargmodes, &isnull); if (isnull) proargmodes = PointerGetDatum(NULL); /* just to be sure */ n_old_arg_names = get_func_input_arg_names(proargnames, proargmodes, &old_arg_names); n_new_arg_names = get_func_input_arg_names(parameterNames, parameterModes, &new_arg_names); for (j = 0; j < n_old_arg_names; j++) { if (old_arg_names[j] == NULL) continue; if (j >= n_new_arg_names || new_arg_names[j] == NULL || strcmp(old_arg_names[j], new_arg_names[j]) != 0) ereport(ERROR, (errcode(ERRCODE_INVALID_FUNCTION_DEFINITION), errmsg("cannot change name of input parameter \"%s\"", old_arg_names[j]), errhint("Use DROP FUNCTION first."))); } } /* * If there are existing defaults, check compatibility: redefinition * must not remove any defaults nor change their types. (Removing a * default might cause a function to fail to satisfy an existing call. * Changing type would only be possible if the associated parameter is * polymorphic, and in such cases a change of default type might alter * the resolved output type of existing calls.) */ if (oldproc->pronargdefaults != 0) { Datum proargdefaults; List *oldDefaults; ListCell *oldlc; ListCell *newlc; if (list_length(parameterDefaults) < oldproc->pronargdefaults) ereport(ERROR, (errcode(ERRCODE_INVALID_FUNCTION_DEFINITION), errmsg("cannot remove parameter defaults from existing function"), errhint("Use DROP FUNCTION first."))); proargdefaults = SysCacheGetAttr(PROCNAMEARGSNSP, oldtup, Anum_pg_proc_proargdefaults, &isnull); Assert(!isnull); oldDefaults = (List *) stringToNode(TextDatumGetCString(proargdefaults)); Assert(IsA(oldDefaults, List)); Assert(list_length(oldDefaults) == oldproc->pronargdefaults); /* new list can have more defaults than old, advance over 'em */ newlc = list_head(parameterDefaults); for (i = list_length(parameterDefaults) - oldproc->pronargdefaults; i > 0; i--) newlc = lnext(newlc); foreach(oldlc, oldDefaults) { Node *oldDef = (Node *) lfirst(oldlc); Node *newDef = (Node *) lfirst(newlc); if (exprType(oldDef) != exprType(newDef)) ereport(ERROR, (errcode(ERRCODE_INVALID_FUNCTION_DEFINITION), errmsg("cannot change data type of existing parameter default value"), errhint("Use DROP FUNCTION first."))); newlc = lnext(newlc); } }
static jvalue _TupleDesc_coerceDatum(Type self, Datum arg) { jvalue result; result.l = TupleDesc_create((TupleDesc)DatumGetPointer(arg)); return result; }
static void compileTheLexeme(DictThesaurus *d) { int i, nnw = 0, tnm = 16; TheLexeme *newwrds = (TheLexeme *) palloc(sizeof(TheLexeme) * tnm), *ptrwrds; for (i = 0; i < d->nwrds; i++) { TSLexeme *ptr; if (strcmp(d->wrds[i].lexeme, "?") == 0) /* Is stop word marker? */ newwrds = addCompiledLexeme(newwrds, &nnw, &tnm, NULL, d->wrds[i].entries, 0); else { ptr = (TSLexeme *) DatumGetPointer(FunctionCall4(&(d->subdict->lexize), PointerGetDatum(d->subdict->dictData), PointerGetDatum(d->wrds[i].lexeme), Int32GetDatum(strlen(d->wrds[i].lexeme)), PointerGetDatum(NULL))); if (!ptr) ereport(ERROR, (errcode(ERRCODE_CONFIG_FILE_ERROR), errmsg("thesaurus sample word \"%s\" isn't recognized by subdictionary (rule %d)", d->wrds[i].lexeme, d->wrds[i].entries->idsubst + 1))); else if (!(ptr->lexeme)) ereport(ERROR, (errcode(ERRCODE_CONFIG_FILE_ERROR), errmsg("thesaurus sample word \"%s\" is a stop word (rule %d)", d->wrds[i].lexeme, d->wrds[i].entries->idsubst + 1), errhint("Use \"?\" to represent a stop word within a sample phrase."))); else { while (ptr->lexeme) { TSLexeme *remptr = ptr + 1; int tnvar = 1; int curvar = ptr->nvariant; /* compute n words in one variant */ while (remptr->lexeme) { if (remptr->nvariant != (remptr - 1)->nvariant) break; tnvar++; remptr++; } remptr = ptr; while (remptr->lexeme && remptr->nvariant == curvar) { newwrds = addCompiledLexeme(newwrds, &nnw, &tnm, remptr, d->wrds[i].entries, tnvar); remptr++; } ptr = remptr; } } } pfree(d->wrds[i].lexeme); pfree(d->wrds[i].entries); } if (d->wrds) pfree(d->wrds); d->wrds = newwrds; d->nwrds = nnw; d->ntwrds = tnm; if (d->nwrds > 1) { qsort(d->wrds, d->nwrds, sizeof(TheLexeme), cmpTheLexeme); /* uniq */ newwrds = d->wrds; ptrwrds = d->wrds + 1; while (ptrwrds - d->wrds < d->nwrds) { if (cmpLexeme(ptrwrds, newwrds) == 0) { if (cmpLexemeInfo(ptrwrds->entries, newwrds->entries)) { ptrwrds->entries->nextentry = newwrds->entries; newwrds->entries = ptrwrds->entries; } else pfree(ptrwrds->entries); if (ptrwrds->lexeme) pfree(ptrwrds->lexeme); } else { newwrds++; *newwrds = *ptrwrds; } ptrwrds++; } d->nwrds = newwrds - d->wrds + 1; d->wrds = (TheLexeme *) repalloc(d->wrds, sizeof(TheLexeme) * d->nwrds); } }
/* ---------------- * printtup --- print a tuple in protocol 3.0 * ---------------- */ static void printtup(TupleTableSlot *slot, DestReceiver *self) { TupleDesc typeinfo = slot->tts_tupleDescriptor; DR_printtup *myState = (DR_printtup *) self; StringInfoData buf; int natts = typeinfo->natts; int i; /* Set or update my derived attribute info, if needed */ if (myState->attrinfo != typeinfo || myState->nattrs != natts) printtup_prepare_info(myState, typeinfo, natts); /* Make sure the tuple is fully deconstructed */ slot_getallattrs(slot); /* * Prepare a DataRow message */ pq_beginmessage(&buf, 'D'); pq_sendint(&buf, natts, 2); /* * send the attributes of this tuple */ for (i = 0; i < natts; ++i) { PrinttupAttrInfo *thisState = myState->myinfo + i; Datum origattr = slot->tts_values[i], attr; if (slot->tts_isnull[i]) { pq_sendint(&buf, -1, 4); continue; } /* * If we have a toasted datum, forcibly detoast it here to avoid * memory leakage inside the type's output routine. */ if (thisState->typisvarlena) attr = PointerGetDatum(PG_DETOAST_DATUM(origattr)); else attr = origattr; if (thisState->format == 0) { /* Text output */ char *outputstr; outputstr = OutputFunctionCall(&thisState->finfo, attr); pq_sendcountedtext(&buf, outputstr, strlen(outputstr), false); pfree(outputstr); } else { /* Binary output */ bytea *outputbytes; outputbytes = SendFunctionCall(&thisState->finfo, attr); pq_sendint(&buf, VARSIZE(outputbytes) - VARHDRSZ, 4); pq_sendbytes(&buf, VARDATA(outputbytes), VARSIZE(outputbytes) - VARHDRSZ); pfree(outputbytes); } /* Clean up detoasted copy, if any */ if (attr != origattr) pfree(DatumGetPointer(attr)); } pq_endmessage(&buf); }
GISTENTRY * gbt_num_compress(GISTENTRY *retval, GISTENTRY *entry, const gbtree_ninfo *tinfo) { if (entry->leafkey) { union { int16 i2; int32 i4; int64 i8; float4 f4; float8 f8; DateADT dt; TimeADT tm; Timestamp ts; Cash ch; } v; GBT_NUMKEY *r = (GBT_NUMKEY *) palloc0(tinfo->indexsize); void *leaf = NULL; switch (tinfo->t) { case gbt_t_int2: v.i2 = DatumGetInt16(entry->key); leaf = &v.i2; break; case gbt_t_int4: v.i4 = DatumGetInt32(entry->key); leaf = &v.i4; break; case gbt_t_int8: v.i8 = DatumGetInt64(entry->key); leaf = &v.i8; break; case gbt_t_oid: v.i4 = DatumGetObjectId(entry->key); leaf = &v.i4; break; case gbt_t_float4: v.f4 = DatumGetFloat4(entry->key); leaf = &v.f4; break; case gbt_t_float8: v.f8 = DatumGetFloat8(entry->key); leaf = &v.f8; break; case gbt_t_date: v.dt = DatumGetDateADT(entry->key); leaf = &v.dt; break; case gbt_t_time: v.tm = DatumGetTimeADT(entry->key); leaf = &v.tm; break; case gbt_t_ts: v.ts = DatumGetTimestamp(entry->key); leaf = &v.ts; break; case gbt_t_cash: v.ch = DatumGetCash(entry->key); leaf = &v.ch; break; default: leaf = DatumGetPointer(entry->key); } Assert(tinfo->indexsize >= 2 * tinfo->size); memcpy((void *) &r[0], leaf, tinfo->size); memcpy((void *) &r[tinfo->size], leaf, tinfo->size); retval = palloc(sizeof(GISTENTRY)); gistentryinit(*retval, PointerGetDatum(r), entry->rel, entry->page, entry->offset, FALSE); } else retval = entry; return retval; }
/* ---------------- * printtup_internal_20 --- print a binary tuple in protocol 2.0 * * We use a different message type, i.e. 'B' instead of 'D' to * indicate a tuple in internal (binary) form. * * This is largely same as printtup_20, except we use binary formatting. * ---------------- */ static void printtup_internal_20(TupleTableSlot *slot, DestReceiver *self) { TupleDesc typeinfo = slot->tts_tupleDescriptor; DR_printtup *myState = (DR_printtup *) self; StringInfoData buf; int natts = typeinfo->natts; int i, j, k; /* Set or update my derived attribute info, if needed */ if (myState->attrinfo != typeinfo || myState->nattrs != natts) printtup_prepare_info(myState, typeinfo, natts); /* Make sure the tuple is fully deconstructed */ slot_getallattrs(slot); /* * tell the frontend to expect new tuple data (in binary style) */ pq_beginmessage(&buf, 'B'); /* * send a bitmap of which attributes are not null */ j = 0; k = 1 << 7; for (i = 0; i < natts; ++i) { if (!slot->tts_isnull[i]) j |= k; /* set bit if not null */ k >>= 1; if (k == 0) /* end of byte? */ { pq_sendint(&buf, j, 1); j = 0; k = 1 << 7; } } if (k != (1 << 7)) /* flush last partial byte */ pq_sendint(&buf, j, 1); /* * send the attributes of this tuple */ for (i = 0; i < natts; ++i) { PrinttupAttrInfo *thisState = myState->myinfo + i; Datum origattr = slot->tts_values[i], attr; bytea *outputbytes; if (slot->tts_isnull[i]) continue; Assert(thisState->format == 1); /* * If we have a toasted datum, forcibly detoast it here to avoid * memory leakage inside the type's output routine. */ if (thisState->typisvarlena) attr = PointerGetDatum(PG_DETOAST_DATUM(origattr)); else attr = origattr; outputbytes = SendFunctionCall(&thisState->finfo, attr); /* We assume the result will not have been toasted */ pq_sendint(&buf, VARSIZE(outputbytes) - VARHDRSZ, 4); pq_sendbytes(&buf, VARDATA(outputbytes), VARSIZE(outputbytes) - VARHDRSZ); pfree(outputbytes); /* Clean up detoasted copy, if any */ if (attr != origattr) pfree(DatumGetPointer(attr)); } pq_endmessage(&buf); }
/* * Add an attribute to the hash calculation. * **IMPORTANT: any new hard coded support for a data type in here * must be added to isGreenplumDbHashable() below! * * Note that the caller should provide the base type if the datum is * of a domain type. It is quite expensive to call get_typtype() and * getBaseType() here since this function gets called a lot for the * same set of Datums. * * @param hashFn called to update the hash value. * @param clientData passed to hashFn. */ void hashDatum(Datum datum, Oid type, datumHashFunction hashFn, void *clientData) { void *buf = NULL; /* pointer to the data */ size_t len = 0; /* length for the data buffer */ int64 intbuf; /* an 8 byte buffer for all integer sizes */ float4 buf_f4; float8 buf_f8; Timestamp tsbuf; /* timestamp data dype is either a double or * int8 (determined in compile time) */ TimestampTz tstzbuf; DateADT datebuf; TimeADT timebuf; TimeTzADT *timetzptr; Interval *intervalptr; AbsoluteTime abstime_buf; RelativeTime reltime_buf; TimeInterval tinterval; AbsoluteTime tinterval_len; Numeric num; bool bool_buf; char char_buf; Name namebuf; ArrayType *arrbuf; inet *inetptr; /* inet/cidr */ unsigned char inet_hkey[sizeof(inet_struct)]; macaddr *macptr; /* MAC address */ VarBit *vbitptr; int2vector *i2vec_buf; oidvector *oidvec_buf; Cash cash_buf; AclItem *aclitem_ptr; uint32 aclitem_buf; /* * special case buffers */ uint32 nanbuf; uint32 invalidbuf; void *tofree = NULL; /* * Select the hash to be performed according to the field type we are adding to the * hash. */ switch (type) { /* * ======= NUMERIC TYPES ======== */ case INT2OID: /* -32 thousand to 32 thousand, 2-byte storage */ intbuf = (int64) DatumGetInt16(datum); /* cast to 8 byte before * hashing */ buf = &intbuf; len = sizeof(intbuf); break; case INT4OID: /* -2 billion to 2 billion integer, 4-byte * storage */ intbuf = (int64) DatumGetInt32(datum); /* cast to 8 byte before * hashing */ buf = &intbuf; len = sizeof(intbuf); break; case INT8OID: /* ~18 digit integer, 8-byte storage */ intbuf = DatumGetInt64(datum); /* cast to 8 byte before * hashing */ buf = &intbuf; len = sizeof(intbuf); break; case FLOAT4OID: /* single-precision floating point number, * 4-byte storage */ buf_f4 = DatumGetFloat4(datum); /* * On IEEE-float machines, minus zero and zero have different bit * patterns but should compare as equal. We must ensure that they * have the same hash value, which is most easily done this way: */ if (buf_f4 == (float4) 0) buf_f4 = 0.0; buf = &buf_f4; len = sizeof(buf_f4); break; case FLOAT8OID: /* double-precision floating point number, * 8-byte storage */ buf_f8 = DatumGetFloat8(datum); /* * On IEEE-float machines, minus zero and zero have different bit * patterns but should compare as equal. We must ensure that they * have the same hash value, which is most easily done this way: */ if (buf_f8 == (float8) 0) buf_f8 = 0.0; buf = &buf_f8; len = sizeof(buf_f8); break; case NUMERICOID: num = DatumGetNumeric(datum); if (NUMERIC_IS_NAN(num)) { nanbuf = NAN_VAL; buf = &nanbuf; len = sizeof(nanbuf); } else /* not a nan */ { buf = num->n_data; len = (VARSIZE(num) - NUMERIC_HDRSZ); } /* * If we did a pg_detoast_datum, we need to remember to pfree, * or we will leak memory. Because of the 1-byte varlena header stuff. */ if (num != DatumGetPointer(datum)) tofree = num; break; /* * ====== CHARACTER TYPES ======= */ case CHAROID: /* char(1), single character */ char_buf = DatumGetChar(datum); buf = &char_buf; len = 1; break; case BPCHAROID: /* char(n), blank-padded string, fixed storage */ case TEXTOID: /* text */ case VARCHAROID: /* varchar */ case BYTEAOID: /* bytea */ { int tmplen; varattrib_untoast_ptr_len(datum, (char **) &buf, &tmplen, &tofree); /* adjust length to not include trailing blanks */ if (type != BYTEAOID && tmplen > 1) tmplen = ignoreblanks((char *) buf, tmplen); len = tmplen; break; } case NAMEOID: namebuf = DatumGetName(datum); len = NAMEDATALEN; buf = NameStr(*namebuf); /* adjust length to not include trailing blanks */ if (len > 1) len = ignoreblanks((char *) buf, len); break; /* * ====== OBJECT IDENTIFIER TYPES ====== */ case OIDOID: /* object identifier(oid), maximum 4 billion */ case REGPROCOID: /* function name */ case REGPROCEDUREOID: /* function name with argument types */ case REGOPEROID: /* operator name */ case REGOPERATOROID: /* operator with argument types */ case REGCLASSOID: /* relation name */ case REGTYPEOID: /* data type name */ intbuf = (int64) DatumGetUInt32(datum); /* cast to 8 byte before hashing */ buf = &intbuf; len = sizeof(intbuf); break; case TIDOID: /* tuple id (6 bytes) */ buf = DatumGetPointer(datum); len = SizeOfIptrData; break; /* * ====== DATE/TIME TYPES ====== */ case TIMESTAMPOID: /* date and time */ tsbuf = DatumGetTimestamp(datum); buf = &tsbuf; len = sizeof(tsbuf); break; case TIMESTAMPTZOID: /* date and time with time zone */ tstzbuf = DatumGetTimestampTz(datum); buf = &tstzbuf; len = sizeof(tstzbuf); break; case DATEOID: /* ANSI SQL date */ datebuf = DatumGetDateADT(datum); buf = &datebuf; len = sizeof(datebuf); break; case TIMEOID: /* hh:mm:ss, ANSI SQL time */ timebuf = DatumGetTimeADT(datum); buf = &timebuf; len = sizeof(timebuf); break; case TIMETZOID: /* time with time zone */ /* * will not compare to TIMEOID on equal values. * Postgres never attempts to compare the two as well. */ timetzptr = DatumGetTimeTzADTP(datum); buf = (unsigned char *) timetzptr; /* * Specify hash length as sizeof(double) + sizeof(int4), not as * sizeof(TimeTzADT), so that any garbage pad bytes in the structure * won't be included in the hash! */ len = sizeof(timetzptr->time) + sizeof(timetzptr->zone); break; case INTERVALOID: /* @ <number> <units>, time interval */ intervalptr = DatumGetIntervalP(datum); buf = (unsigned char *) intervalptr; /* * Specify hash length as sizeof(double) + sizeof(int4), not as * sizeof(Interval), so that any garbage pad bytes in the structure * won't be included in the hash! */ len = sizeof(intervalptr->time) + sizeof(intervalptr->month); break; case ABSTIMEOID: abstime_buf = DatumGetAbsoluteTime(datum); if (abstime_buf == INVALID_ABSTIME) { /* hash to a constant value */ invalidbuf = INVALID_VAL; len = sizeof(invalidbuf); buf = &invalidbuf; } else { len = sizeof(abstime_buf); buf = &abstime_buf; } break; case RELTIMEOID: reltime_buf = DatumGetRelativeTime(datum); if (reltime_buf == INVALID_RELTIME) { /* hash to a constant value */ invalidbuf = INVALID_VAL; len = sizeof(invalidbuf); buf = &invalidbuf; } else { len = sizeof(reltime_buf); buf = &reltime_buf; } break; case TINTERVALOID: tinterval = DatumGetTimeInterval(datum); /* * check if a valid interval. the '0' status code * stands for T_INTERVAL_INVAL which is defined in * nabstime.c. We use the actual value instead * of defining it again here. */ if(tinterval->status == 0 || tinterval->data[0] == INVALID_ABSTIME || tinterval->data[1] == INVALID_ABSTIME) { /* hash to a constant value */ invalidbuf = INVALID_VAL; len = sizeof(invalidbuf); buf = &invalidbuf; } else { /* normalize on length of the time interval */ tinterval_len = tinterval->data[1] - tinterval->data[0]; len = sizeof(tinterval_len); buf = &tinterval_len; } break; /* * ======= NETWORK TYPES ======== */ case INETOID: case CIDROID: inetptr = DatumGetInetP(datum); len = inet_getkey(inetptr, inet_hkey, sizeof(inet_hkey)); /* fill-in inet_key & get len */ buf = inet_hkey; break; case MACADDROID: macptr = DatumGetMacaddrP(datum); len = sizeof(macaddr); buf = (unsigned char *) macptr; break; /* * ======== BIT STRINGS ======== */ case BITOID: case VARBITOID: /* * Note that these are essentially strings. * we don't need to worry about '10' and '010' * to compare, b/c they will not, by design. * (see SQL standard, and varbit.c) */ vbitptr = DatumGetVarBitP(datum); len = VARBITBYTES(vbitptr); buf = (char *) VARBITS(vbitptr); break; /* * ======= other types ======= */ case BOOLOID: /* boolean, 'true'/'false' */ bool_buf = DatumGetBool(datum); buf = &bool_buf; len = sizeof(bool_buf); break; /* * We prepare the hash key for aclitems just like postgresql does. * (see code and comment in acl.c: hash_aclitem() ). */ case ACLITEMOID: aclitem_ptr = DatumGetAclItemP(datum); aclitem_buf = (uint32) (aclitem_ptr->ai_privs + aclitem_ptr->ai_grantee + aclitem_ptr->ai_grantor); buf = &aclitem_buf; len = sizeof(aclitem_buf); break; /* * ANYARRAY is a pseudo-type. We use it to include * any of the array types (OIDs 1007-1033 in pg_type.h). * caller needs to be sure the type is ANYARRAYOID * before calling cdbhash on an array (INSERT and COPY do so). */ case ANYARRAYOID: arrbuf = DatumGetArrayTypeP(datum); len = VARSIZE(arrbuf) - VARHDRSZ; buf = VARDATA(arrbuf); break; case INT2VECTOROID: i2vec_buf = (int2vector *) DatumGetPointer(datum); len = i2vec_buf->dim1 * sizeof(int2); buf = (void *)i2vec_buf->values; break; case OIDVECTOROID: oidvec_buf = (oidvector *) DatumGetPointer(datum); len = oidvec_buf->dim1 * sizeof(Oid); buf = oidvec_buf->values; break; case CASHOID: /* cash is stored in int32 internally */ cash_buf = (* (Cash *)DatumGetPointer(datum)); len = sizeof(Cash); buf = &cash_buf; break; default: ereport(ERROR, (errcode(ERRCODE_CDB_FEATURE_NOT_YET), errmsg("Type %u is not hashable.", type))); } /* switch(type) */ /* do the hash using the selected algorithm */ hashFn(clientData, buf, len); if(tofree) pfree(tofree); }
/* * heap_form_tuple * construct a tuple from the given values[] and isnull[] arrays, * which are of the length indicated by tupleDescriptor->natts * * The result is allocated in the current memory context. */ HeapTuple heaptuple_form_to(TupleDesc tupleDescriptor, Datum *values, bool *isnull, HeapTuple dst, uint32 *dstlen) { HeapTuple tuple; /* return tuple */ HeapTupleHeader td; /* tuple data */ Size actual_len; Size len, data_len; int hoff; bool hasnull = false; Form_pg_attribute *att = tupleDescriptor->attrs; int numberOfAttributes = tupleDescriptor->natts; int i; if (numberOfAttributes > MaxTupleAttributeNumber) ereport(ERROR, (errcode(ERRCODE_TOO_MANY_COLUMNS), errmsg("number of columns (%d) exceeds limit (%d)", numberOfAttributes, MaxTupleAttributeNumber))); /* * Check for nulls and embedded tuples; expand any toasted attributes in * embedded tuples. This preserves the invariant that toasting can only * go one level deep. * * We can skip calling toast_flatten_tuple_attribute() if the attribute * couldn't possibly be of composite type. All composite datums are * varlena and have alignment 'd'; furthermore they aren't arrays. Also, * if an attribute is already toasted, it must have been sent to disk * already and so cannot contain toasted attributes. */ for (i = 0; i < numberOfAttributes; i++) { if (isnull[i]) hasnull = true; else if (att[i]->attlen == -1 && att[i]->attalign == 'd' && att[i]->attndims == 0 && !VARATT_IS_EXTENDED(DatumGetPointer(values[i]))) { values[i] = toast_flatten_tuple_attribute(values[i], att[i]->atttypid, att[i]->atttypmod); } } /* * Determine total space needed */ len = offsetof(HeapTupleHeaderData, t_bits); if (hasnull) len += BITMAPLEN(numberOfAttributes); if (tupleDescriptor->tdhasoid) len += sizeof(Oid); hoff = len = MAXALIGN(len); /* align user data safely */ data_len = heap_compute_data_size(tupleDescriptor, values, isnull); len += data_len; if (dstlen && (*dstlen) < (HEAPTUPLESIZE + len)) { *dstlen = HEAPTUPLESIZE + len; return NULL; } if (dstlen) { *dstlen = HEAPTUPLESIZE + len; tuple = dst; memset(tuple, 0, HEAPTUPLESIZE + len); } else tuple = (HeapTuple) palloc0(HEAPTUPLESIZE + len); /* * Allocate and zero the space needed. Note that the tuple body and * HeapTupleData management structure are allocated in one chunk. */ tuple->t_data = td = (HeapTupleHeader) ((char *) tuple + HEAPTUPLESIZE); /* * And fill in the information. Note we fill the Datum fields even though * this tuple may never become a Datum. */ tuple->t_len = len; ItemPointerSetInvalid(&(tuple->t_self)); HeapTupleHeaderSetDatumLength(td, len); HeapTupleHeaderSetTypeId(td, tupleDescriptor->tdtypeid); HeapTupleHeaderSetTypMod(td, tupleDescriptor->tdtypmod); HeapTupleHeaderSetNatts(td, numberOfAttributes); td->t_hoff = hoff; if (tupleDescriptor->tdhasoid) /* else leave infomask = 0 */ td->t_infomask = HEAP_HASOID; actual_len = heap_fill_tuple(tupleDescriptor, values, isnull, (char *) td + hoff, data_len, &td->t_infomask, (hasnull ? td->t_bits : NULL)); Assert(data_len == actual_len); Assert(!is_heaptuple_memtuple(tuple)); return tuple; }