/*------------------------------------------------------------------------- * datumIsEqual * * Return true if two datums are equal, false otherwise * * NOTE: XXX! * We just compare the bytes of the two values, one by one. * This routine will return false if there are 2 different * representations of the same value (something along the lines * of say the representation of zero in one's complement arithmetic). * Also, it will probably not give the answer you want if either * datum has been "toasted". *------------------------------------------------------------------------- */ bool datumIsEqual(Datum value1, Datum value2, bool typByVal, int typLen) { bool res; if (typByVal) { /* * just compare the two datums. NOTE: just comparing "len" bytes will * not do the work, because we do not know how these bytes are aligned * inside the "Datum". We assume instead that any given datatype is * consistent about how it fills extraneous bits in the Datum. */ res = (value1 == value2); } else { Size size1, size2; char *s1, *s2; /* * Compare the bytes pointed by the pointers stored in the datums. */ size1 = datumGetSize(value1, typByVal, typLen); size2 = datumGetSize(value2, typByVal, typLen); if (size1 != size2) return false; s1 = (char *) DatumGetPointer(value1); s2 = (char *) DatumGetPointer(value2); res = (memcmp(s1, s2, size1) == 0); } return res; }
/* * Print the value of a Datum given its type. */ static void _outDatum(StringInfo str, Datum value, int typlen, bool typbyval) { Size length; char *s; if (typbyval) { s = (char *) (&value); appendBinaryStringInfo(str, s, sizeof(Datum)); } else { s = (char *) DatumGetPointer(value); if (!PointerIsValid(s)) { length = 0; appendBinaryStringInfo(str, (char *)&length, sizeof(Size)); } else { length = datumGetSize(value, typbyval, typlen); appendBinaryStringInfo(str, (char *)&length, sizeof(Size)); appendBinaryStringInfo(str, s, length); } } }
/* * ConvertDatumToBytes converts datum to byte array and saves it in the given * datum string. */ static void ConvertDatumToBytes(Datum datum, TypeCacheEntry *datumTypeCacheEntry, StringInfo datumString) { int16 datumTypeLength = datumTypeCacheEntry->typlen; bool datumTypeByValue = datumTypeCacheEntry->typbyval; Size datumSize = 0; if (datumTypeLength == -1) { datumSize = VARSIZE_ANY_EXHDR(DatumGetPointer(datum)); } else { datumSize = datumGetSize(datum, datumTypeByValue, datumTypeLength); } if (datumTypeByValue) { appendBinaryStringInfo(datumString, (char *) &datum, datumSize); } else { appendBinaryStringInfo(datumString, VARDATA_ANY(datum), datumSize); } }
/*------------------------------------------------------------------------- * datumSerialize * * Serialize a possibly-NULL datum into caller-provided storage. * * The format is as follows: first, we write a 4-byte header word, which * is either the length of a pass-by-reference datum, -1 for a * pass-by-value datum, or -2 for a NULL. If the value is NULL, nothing * further is written. If it is pass-by-value, sizeof(Datum) bytes * follow. Otherwise, the number of bytes indicated by the header word * follow. The caller is responsible for ensuring that there is enough * storage to store the number of bytes that will be written; use * datumEstimateSpace() to find out how many will be needed. * *start_address is updated to point to the byte immediately following * those written. *------------------------------------------------------------------------- */ void datumSerialize(Datum value, bool isnull, bool typByVal, int typLen, char **start_address) { int header; /* Write header word. */ if (isnull) header = -2; else if (typByVal) header = -1; else header = datumGetSize(value, typByVal, typLen); memcpy(*start_address, &header, sizeof(int)); *start_address += sizeof(int); /* If not null, write payload bytes. */ if (!isnull) { if (typByVal) { memcpy(*start_address, &value, sizeof(Datum)); *start_address += sizeof(Datum); } else { memcpy(*start_address, DatumGetPointer(value), header); *start_address += header; } } }
/* * Returns the estimate count of the item */ Datum cmsketch_count(PG_FUNCTION_ARGS) { CountMinSketch *cms; Datum elem = PG_GETARG_DATUM(1); uint32_t count = false; Oid val_type = get_fn_expr_argtype(fcinfo->flinfo, 1); TypeCacheEntry *typ; Size size; if (val_type == InvalidOid) ereport(ERROR, (errcode(ERRCODE_INVALID_PARAMETER_VALUE), errmsg("could not determine input data type"))); if (PG_ARGISNULL(0)) PG_RETURN_INT32(count); cms = (CountMinSketch *) PG_GETARG_VARLENA_P(0); typ = lookup_type_cache(val_type, 0); size = datumGetSize(elem, typ->typbyval, typ->typlen); if (typ->typbyval) count = CountMinSketchEstimateCount(cms, (char *) &elem, size); else count = CountMinSketchEstimateCount(cms, DatumGetPointer(elem), size); PG_RETURN_INT32(count); }
Datum orafce_dump(PG_FUNCTION_ARGS) { Oid valtype = get_fn_expr_argtype(fcinfo->flinfo, 0); List *args; int16 typlen; bool typbyval; Size length; Datum value; int format; StringInfoData str; if (!fcinfo->flinfo || !fcinfo->flinfo->fn_expr) elog(ERROR, "function is called from invalid context"); if (PG_ARGISNULL(0)) elog(ERROR, "argument is NULL"); value = PG_GETARG_DATUM(0); format = PG_GETARG_IF_EXISTS(1, INT32, 10); args = ((FuncExpr *) fcinfo->flinfo->fn_expr)->args; valtype = exprType((Node *) list_nth(args, 0)); get_typlenbyval(valtype, &typlen, &typbyval); length = datumGetSize(value, typbyval, typlen); initStringInfo(&str); appendStringInfo(&str, "Typ=%d Len=%d: ", valtype, (int) length); if (!typbyval) { appendDatum(&str, DatumGetPointer(value), length, format); } else if (length <= 1) { char v = DatumGetChar(value); appendDatum(&str, &v, sizeof(char), format); } else if (length <= 2) { int16 v = DatumGetInt16(value); appendDatum(&str, &v, sizeof(int16), format); } else if (length <= 4) { int32 v = DatumGetInt32(value); appendDatum(&str, &v, sizeof(int32), format); } else { int64 v = DatumGetInt64(value); appendDatum(&str, &v, sizeof(int64), format); } PG_RETURN_TEXT_P(cstring_to_text(str.data)); }
/*------------------------------------------------------------------------- * datumCopy * * Make a copy of a non-NULL datum. * * If the datatype is pass-by-reference, memory is obtained with palloc(). * * If the value is a reference to an expanded object, we flatten into memory * obtained with palloc(). We need to copy because one of the main uses of * this function is to copy a datum out of a transient memory context that's * about to be destroyed, and the expanded object is probably in a child * context that will also go away. Moreover, many callers assume that the * result is a single pfree-able chunk. *------------------------------------------------------------------------- */ Datum datumCopy(Datum value, bool typByVal, int typLen) { Datum res; if (typByVal) res = value; else if (typLen == -1) { /* It is a varlena datatype */ struct varlena *vl = (struct varlena *) DatumGetPointer(value); if (!vl) return PointerGetDatum(NULL); if (VARATT_IS_EXTERNAL_EXPANDED(vl)) { /* Flatten into the caller's memory context */ ExpandedObjectHeader *eoh = DatumGetEOHP(value); Size resultsize; char *resultptr; resultsize = EOH_get_flat_size(eoh); resultptr = (char *) palloc(resultsize); EOH_flatten_into(eoh, (void *) resultptr, resultsize); res = PointerGetDatum(resultptr); } else { /* Otherwise, just copy the varlena datum verbatim */ Size realSize; char *resultptr; realSize = (Size) VARSIZE_ANY(vl); resultptr = (char *) palloc(realSize); memcpy(resultptr, vl, realSize); res = PointerGetDatum(resultptr); } } else { /* Pass by reference, but not varlena, so not toasted */ Size realSize; char *resultptr; realSize = datumGetSize(value, typByVal, typLen); resultptr = (char *) palloc(realSize); memcpy(resultptr, DatumGetPointer(value), realSize); res = PointerGetDatum(resultptr); } return res; }
/*------------------------------------------------------------------------- * datumEstimateSpace * * Compute the amount of space that datumSerialize will require for a * particular Datum. *------------------------------------------------------------------------- */ Size datumEstimateSpace(Datum value, bool isnull, bool typByVal, int typLen) { Size sz = sizeof(int); if (!isnull) { /* no need to use add_size, can't overflow */ if (typByVal) sz += sizeof(Datum); else sz += datumGetSize(value, typByVal, typLen); } return sz; }
static CountMinSketch * cmsketch_add_datum(FunctionCallInfo fcinfo, CountMinSketch *cms, Datum elem) { TypeCacheEntry *typ = (TypeCacheEntry *) fcinfo->flinfo->fn_extra; Size size; if (!typ->typbyval && !elem) return cms; size = datumGetSize(elem, typ->typbyval, typ->typlen); if (typ->typbyval) CountMinSketchAdd(cms, (char *) &elem, size, 1); else CountMinSketchAdd(cms, DatumGetPointer(elem), size, 1); return cms; }
/*------------------------------------------------------------------------- * datumSerialize * * Serialize a possibly-NULL datum into caller-provided storage. * * Note: "expanded" objects are flattened so as to produce a self-contained * representation, but other sorts of toast pointers are transferred as-is. * This is because the intended use of this function is to pass the value * to another process within the same database server. The other process * could not access an "expanded" object within this process's memory, but * we assume it can dereference the same TOAST pointers this one can. * * The format is as follows: first, we write a 4-byte header word, which * is either the length of a pass-by-reference datum, -1 for a * pass-by-value datum, or -2 for a NULL. If the value is NULL, nothing * further is written. If it is pass-by-value, sizeof(Datum) bytes * follow. Otherwise, the number of bytes indicated by the header word * follow. The caller is responsible for ensuring that there is enough * storage to store the number of bytes that will be written; use * datumEstimateSpace() to find out how many will be needed. * *start_address is updated to point to the byte immediately following * those written. *------------------------------------------------------------------------- */ void datumSerialize(Datum value, bool isnull, bool typByVal, int typLen, char **start_address) { ExpandedObjectHeader *eoh = NULL; int header; /* Write header word. */ if (isnull) header = -2; else if (typByVal) header = -1; else if (typLen == -1 && VARATT_IS_EXTERNAL_EXPANDED(DatumGetPointer(value))) { eoh = DatumGetEOHP(value); header = EOH_get_flat_size(eoh); } else header = datumGetSize(value, typByVal, typLen); memcpy(*start_address, &header, sizeof(int)); *start_address += sizeof(int); /* If not null, write payload bytes. */ if (!isnull) { if (typByVal) { memcpy(*start_address, &value, sizeof(Datum)); *start_address += sizeof(Datum); } else if (eoh) { EOH_flatten_into(eoh, (void *) *start_address, header); *start_address += header; } else { memcpy(*start_address, DatumGetPointer(value), header); *start_address += header; } } }
/*------------------------------------------------------------------------- * datumEstimateSpace * * Compute the amount of space that datumSerialize will require for a * particular Datum. *------------------------------------------------------------------------- */ Size datumEstimateSpace(Datum value, bool isnull, bool typByVal, int typLen) { Size sz = sizeof(int); if (!isnull) { /* no need to use add_size, can't overflow */ if (typByVal) sz += sizeof(Datum); else if (typLen == -1 && VARATT_IS_EXTERNAL_EXPANDED(DatumGetPointer(value))) { /* Expanded objects need to be flattened, see comment below */ sz += EOH_get_flat_size(DatumGetEOHP(value)); } else sz += datumGetSize(value, typByVal, typLen); } return sz; }
/*------------------------------------------------------------------------- * datumEstimateSpace * * Compute the amount of space that datumSerialize will require for a * particular Datum. *------------------------------------------------------------------------- */ Size datumEstimateSpace(Datum value, bool isnull, bool typByVal, int typLen) { Size sz = sizeof(int); if (!isnull) { /* no need to use add_size, can't overflow */ if (typByVal) sz += sizeof(Datum); else if (VARATT_IS_EXTERNAL_EXPANDED(value)) { ExpandedObjectHeader *eoh = DatumGetEOHP(value); sz += EOH_get_flat_size(eoh); } else sz += datumGetSize(value, typByVal, typLen); } return sz; }
/*------------------------------------------------------------------------- * datumCopy * * make a copy of a datum * * If the datatype is pass-by-reference, memory is obtained with palloc(). *------------------------------------------------------------------------- */ Datum datumCopy(Datum value, bool typByVal, int typLen) { Datum res; if (typByVal) res = value; else { Size realSize; char *s; if (DatumGetPointer(value) == NULL) return PointerGetDatum(NULL); realSize = datumGetSize(value, typByVal, typLen); s = (char *) palloc(realSize); memcpy(s, DatumGetPointer(value), realSize); res = PointerGetDatum(s); } return res; }
/* * This is basically the same as datumCopy(), but we duplicate some code * to avoid computing the datum size twice. */ static Datum getDatumCopy(BuildAccumulator *accum, Datum value) { Form_pg_attribute *att = accum->ginstate->tupdesc->attrs; Datum res; if (att[0]->attbyval) res = value; else { Size realSize; char *s; realSize = datumGetSize(value, false, att[0]->attlen); s = (char *) palloc(realSize); memcpy(s, DatumGetPointer(value), realSize); res = PointerGetDatum(s); accum->allocatedMemory += realSize; } return res; }
/* * Compose and dispatch the MPPEXEC commands corresponding to a plan tree * within a complete parallel plan. (A plan tree will correspond either * to an initPlan or to the main plan.) * * If cancelOnError is true, then any dispatching error, a cancellation * request from the client, or an error from any of the associated QEs, * may cause the unfinished portion of the plan to be abandoned or canceled; * and in the event this occurs before all gangs have been dispatched, this * function does not return, but waits for all QEs to stop and exits to * the caller's error catcher via ereport(ERROR,...).Otherwise this * function returns normally and errors are not reported until later. * * If cancelOnError is false, the plan is to be dispatched as fully as * possible and the QEs allowed to proceed regardless of cancellation * requests, errors or connection failures from other QEs, etc. * * The CdbDispatchResults objects allocated for the plan are returned * in *pPrimaryResults. The caller, after calling * CdbCheckDispatchResult(), can examine the CdbDispatchResults * objects, can keep them as long as needed, and ultimately must free * them with cdbdisp_destroyDispatcherState() prior to deallocation of * the caller's memory context. Callers should use PG_TRY/PG_CATCH to * ensure proper cleanup. * * To wait for completion, check for errors, and clean up, it is * suggested that the caller use cdbdisp_finishCommand(). * * Note that the slice tree dispatched is the one specified in the EState * of the argument QueryDesc as es_cur__slice. * * Note that the QueryDesc params must include PARAM_EXEC_REMOTE parameters * containing the values of any initplans required by the slice to be run. * (This is handled by calls to addRemoteExecParamsToParamList() from the * functions preprocess_initplans() and ExecutorRun().) * * Each QE receives its assignment as a message of type 'M' in PostgresMain(). * The message is deserialized and processed by exec_mpp_query() in postgres.c. */ void cdbdisp_dispatchPlan(struct QueryDesc *queryDesc, bool planRequiresTxn, bool cancelOnError, struct CdbDispatcherState *ds) { char *splan, *sddesc, *sparams; int splan_len, splan_len_uncompressed, sddesc_len, sparams_len; SliceTable *sliceTbl; int rootIdx; int oldLocalSlice; PlannedStmt *stmt; bool is_SRI; DispatchCommandQueryParms queryParms; CdbComponentDatabaseInfo *qdinfo; ds->primaryResults = NULL; ds->dispatchThreads = NULL; Assert(Gp_role == GP_ROLE_DISPATCH); Assert(queryDesc != NULL && queryDesc->estate != NULL); /* * Later we'll need to operate with the slice table provided via the * EState structure in the argument QueryDesc. Cache this information * locally and assert our expectations about it. */ sliceTbl = queryDesc->estate->es_sliceTable; rootIdx = RootSliceIndex(queryDesc->estate); Assert(sliceTbl != NULL); Assert(rootIdx == 0 || (rootIdx > sliceTbl->nMotions && rootIdx <= sliceTbl->nMotions + sliceTbl->nInitPlans)); /* * Keep old value so we can restore it. We use this field as a parameter. */ oldLocalSlice = sliceTbl->localSlice; /* * This function is called only for planned statements. */ stmt = queryDesc->plannedstmt; Assert(stmt); /* * Let's evaluate STABLE functions now, so we get consistent values on the QEs * * Also, if this is a single-row INSERT statement, let's evaluate * nextval() and currval() now, so that we get the QD's values, and a * consistent value for everyone * */ is_SRI = false; if (queryDesc->operation == CMD_INSERT) { Assert(stmt->commandType == CMD_INSERT); /* * We might look for constant input relation (instead of SRI), but I'm afraid * * that wouldn't scale. */ is_SRI = IsA(stmt->planTree, Result) && stmt->planTree->lefttree == NULL; } if (!is_SRI) clear_relsize_cache(); if (queryDesc->operation == CMD_INSERT || queryDesc->operation == CMD_SELECT || queryDesc->operation == CMD_UPDATE || queryDesc->operation == CMD_DELETE) { MemoryContext oldContext; oldContext = CurrentMemoryContext; if (stmt->qdContext) { oldContext = MemoryContextSwitchTo(stmt->qdContext); } else /* * memory context of plan tree should not change */ { MemoryContext mc = GetMemoryChunkContext(stmt->planTree); oldContext = MemoryContextSwitchTo(mc); } stmt->planTree = (Plan *) exec_make_plan_constant(stmt, is_SRI); MemoryContextSwitchTo(oldContext); } /* * Cursor queries and bind/execute path queries don't run on the * writer-gang QEs; but they require snapshot-synchronization to * get started. * * initPlans, and other work (see the function pre-evaluation * above) may advance the snapshot "segmateSync" value, so we're * best off setting the shared-snapshot-ready value here. This * will dispatch to the writer gang and force it to set its * snapshot; we'll then be able to serialize the same snapshot * version (see qdSerializeDtxContextInfo() below). */ if (queryDesc->extended_query) { verify_shared_snapshot_ready(); } /* * serialized plan tree. Note that we're called for a single * slice tree (corresponding to an initPlan or the main plan), so the * parameters are fixed and we can include them in the prefix. */ splan = serializeNode((Node *) queryDesc->plannedstmt, &splan_len, &splan_len_uncompressed); uint64 plan_size_in_kb = ((uint64) splan_len_uncompressed) / (uint64) 1024; if (0 < gp_max_plan_size && plan_size_in_kb > gp_max_plan_size) { ereport(ERROR, (errcode(ERRCODE_STATEMENT_TOO_COMPLEX), (errmsg("Query plan size limit exceeded, current size: " UINT64_FORMAT "KB, max allowed size: %dKB", plan_size_in_kb, gp_max_plan_size), errhint("Size controlled by gp_max_plan_size")))); } Assert(splan != NULL && splan_len > 0 && splan_len_uncompressed > 0); if (queryDesc->params != NULL && queryDesc->params->numParams > 0) { ParamListInfoData *pli; ParamExternData *pxd; StringInfoData parambuf; Size length; int plioff; int32 iparam; /* * Allocate buffer for params */ initStringInfo(¶mbuf); /* * Copy ParamListInfoData header and ParamExternData array */ pli = queryDesc->params; length = (char *) &pli->params[pli->numParams] - (char *) pli; plioff = parambuf.len; Assert(plioff == MAXALIGN(plioff)); appendBinaryStringInfo(¶mbuf, pli, length); /* * Copy pass-by-reference param values. */ for (iparam = 0; iparam < queryDesc->params->numParams; iparam++) { int16 typlen; bool typbyval; /* * Recompute pli each time in case parambuf.data is repalloc'ed */ pli = (ParamListInfoData *) (parambuf.data + plioff); pxd = &pli->params[iparam]; if (pxd->ptype == InvalidOid) continue; /* * Does pxd->value contain the value itself, or a pointer? */ get_typlenbyval(pxd->ptype, &typlen, &typbyval); if (!typbyval) { char *s = DatumGetPointer(pxd->value); if (pxd->isnull || !PointerIsValid(s)) { pxd->isnull = true; pxd->value = 0; } else { length = datumGetSize(pxd->value, typbyval, typlen); /* * We *must* set this before we * append. Appending may realloc, which will * invalidate our pxd ptr. (obviously we could * append first if we recalculate pxd from the new * base address) */ pxd->value = Int32GetDatum(length); appendBinaryStringInfo(¶mbuf, &iparam, sizeof(iparam)); appendBinaryStringInfo(¶mbuf, s, length); } } } sparams = parambuf.data; sparams_len = parambuf.len; } else { sparams = NULL; sparams_len = 0; } sddesc = serializeNode((Node *) queryDesc->ddesc, &sddesc_len, NULL /*uncompressed_size */ ); MemSet(&queryParms, 0, sizeof(queryParms)); queryParms.strCommand = queryDesc->sourceText; queryParms.serializedQuerytree = NULL; queryParms.serializedQuerytreelen = 0; queryParms.serializedPlantree = splan; queryParms.serializedPlantreelen = splan_len; queryParms.serializedParams = sparams; queryParms.serializedParamslen = sparams_len; queryParms.serializedQueryDispatchDesc = sddesc; queryParms.serializedQueryDispatchDesclen = sddesc_len; queryParms.rootIdx = rootIdx; /* * sequence server info */ qdinfo = &(getComponentDatabases()->entry_db_info[0]); Assert(qdinfo != NULL && qdinfo->hostip != NULL); queryParms.seqServerHost = pstrdup(qdinfo->hostip); queryParms.seqServerHostlen = strlen(qdinfo->hostip) + 1; queryParms.seqServerPort = seqServerCtl->seqServerPort; /* * serialized a version of our snapshot */ /* * Generate our transction isolations. We generally want Plan * based dispatch to be in a global transaction. The executor gets * to decide if the special circumstances exist which allow us to * dispatch without starting a global xact. */ queryParms.serializedDtxContextInfo = qdSerializeDtxContextInfo(&queryParms.serializedDtxContextInfolen, true /* wantSnapshot */ , queryDesc->extended_query, mppTxnOptions(planRequiresTxn), "cdbdisp_dispatchPlan"); cdbdisp_dispatchX(&queryParms, cancelOnError, sliceTbl, ds); sliceTbl->localSlice = oldLocalSlice; }
/* * set_kv */ static KeyValue * set_kv(KeyedAggState *state, KeyValue *kv, Datum key, bool key_null, Datum value, bool value_null) { Size klen = 0; Size vlen = 0; Size new_size; char *pos; if (!state->key_type->typbyval && !key_null) klen = datumGetSize(key, state->key_type->typbyval, state->key_type->typlen); if (!state->value_type->typbyval && !value_null) vlen = datumGetSize(value, state->value_type->typbyval, state->value_type->typlen); new_size = sizeof(KeyValue) + klen + vlen; if (kv == NULL) kv = palloc0(new_size); else if (VARSIZE(kv) != new_size) kv = repalloc(kv, new_size); kv->klen = klen; kv->vlen = vlen; pos = (char *) kv; pos += sizeof(KeyValue); if (!key_null) { if (!state->key_type->typbyval) { kv->key = (Datum) pos; memcpy(pos, (char *) key, kv->klen); pos += kv->klen; } else { kv->key = key; } } else { KV_SET_KEY_NULL(kv); } if (!value_null) { if (!state->value_type->typbyval) { kv->value = (Datum ) pos; memcpy(pos, (char *) value, kv->vlen); pos += kv->vlen; } else { kv->value = value; } } else { KV_SET_VALUE_NULL(kv); } SET_VARSIZE(kv, new_size); kv->key_type = state->key_type->type_id; kv->value_type = state->value_type->type_id; return kv; }