/* * Get status of resource groups */ Datum pg_resgroup_get_status(PG_FUNCTION_ARGS) { FuncCallContext *funcctx; ResGroupStatCtx *ctx; if (SRF_IS_FIRSTCALL()) { MemoryContext oldcontext; TupleDesc tupdesc; int nattr = 8; funcctx = SRF_FIRSTCALL_INIT(); oldcontext = MemoryContextSwitchTo(funcctx->multi_call_memory_ctx); tupdesc = CreateTemplateTupleDesc(nattr, false); TupleDescInitEntry(tupdesc, (AttrNumber) 1, "groupid", OIDOID, -1, 0); TupleDescInitEntry(tupdesc, (AttrNumber) 2, "num_running", INT4OID, -1, 0); TupleDescInitEntry(tupdesc, (AttrNumber) 3, "num_queueing", INT4OID, -1, 0); TupleDescInitEntry(tupdesc, (AttrNumber) 4, "num_queued", INT4OID, -1, 0); TupleDescInitEntry(tupdesc, (AttrNumber) 5, "num_executed", INT4OID, -1, 0); TupleDescInitEntry(tupdesc, (AttrNumber) 6, "total_queue_duration", INTERVALOID, -1, 0); TupleDescInitEntry(tupdesc, (AttrNumber) 7, "cpu_usage", JSONOID, -1, 0); TupleDescInitEntry(tupdesc, (AttrNumber) 8, "memory_usage", JSONOID, -1, 0); funcctx->tuple_desc = BlessTupleDesc(tupdesc); if (IsResGroupActivated()) { Relation pg_resgroup_rel; SysScanDesc sscan; HeapTuple tuple; Oid inGroupId = PG_GETARG_OID(0); int ctxsize = sizeof(ResGroupStatCtx) + sizeof(ResGroupStat) * (MaxResourceGroups - 1); (void) inGroupId; funcctx->user_fctx = palloc(ctxsize); ctx = (ResGroupStatCtx *) funcctx->user_fctx; /* * others may be creating/dropping resource group concurrently, * block until creating/dropping finish to avoid inconsistent * resource group metadata */ pg_resgroup_rel = heap_open(ResGroupRelationId, ExclusiveLock); sscan = systable_beginscan(pg_resgroup_rel, InvalidOid, false, NULL, 0, NULL); while (HeapTupleIsValid(tuple = systable_getnext(sscan))) { Oid oid = ObjectIdGetDatum(HeapTupleGetOid(tuple)); if (inGroupId == InvalidOid || inGroupId == oid) { Assert(funcctx->max_calls < MaxResourceGroups); ctx->groups[funcctx->max_calls].cpuUsage = makeStringInfo(); ctx->groups[funcctx->max_calls].memUsage = makeStringInfo(); ctx->groups[funcctx->max_calls++].groupId = oid; if (inGroupId != InvalidOid) break; } } systable_endscan(sscan); ctx->nGroups = funcctx->max_calls; qsort(ctx->groups, ctx->nGroups, sizeof(ctx->groups[0]), compareRow); if (ctx->nGroups > 0) getResUsage(ctx, inGroupId); heap_close(pg_resgroup_rel, ExclusiveLock); } MemoryContextSwitchTo(oldcontext); } /* stuff done on every call of the function */ funcctx = SRF_PERCALL_SETUP(); ctx = (ResGroupStatCtx *) funcctx->user_fctx; if (funcctx->call_cntr < funcctx->max_calls) { /* for each row */ Datum values[8]; bool nulls[8]; HeapTuple tuple; Oid groupId; char statVal[MAXDATELEN + 1]; ResGroupStat *row = &ctx->groups[funcctx->call_cntr]; MemSet(values, 0, sizeof(values)); MemSet(nulls, 0, sizeof(nulls)); MemSet(statVal, 0, sizeof(statVal)); values[0] = row->groupId; groupId = DatumGetObjectId(values[0]); if (Gp_role == GP_ROLE_DISPATCH) { values[1] = ResGroupGetStat(groupId, RES_GROUP_STAT_NRUNNING); values[2] = ResGroupGetStat(groupId, RES_GROUP_STAT_NQUEUEING); values[3] = ResGroupGetStat(groupId, RES_GROUP_STAT_TOTAL_QUEUED); values[4] = ResGroupGetStat(groupId, RES_GROUP_STAT_TOTAL_EXECUTED); values[5] = ResGroupGetStat(groupId, RES_GROUP_STAT_TOTAL_QUEUE_TIME); } else { nulls[1] = true; nulls[2] = true; nulls[3] = true; nulls[4] = true; nulls[5] = true; } values[6] = CStringGetTextDatum(row->cpuUsage->data); values[7] = CStringGetTextDatum(row->memUsage->data); tuple = heap_form_tuple(funcctx->tuple_desc, values, nulls); SRF_RETURN_NEXT(funcctx, HeapTupleGetDatum(tuple)); } else { /* nothing left */ SRF_RETURN_DONE(funcctx); } }
/* * stat a file */ Datum pg_stat_file(PG_FUNCTION_ARGS) { text *filename_t = PG_GETARG_TEXT_P(0); char *filename; struct stat fst; Datum values[6]; bool isnull[6]; HeapTuple tuple; TupleDesc tupdesc; if (!superuser()) ereport(ERROR, (errcode(ERRCODE_INSUFFICIENT_PRIVILEGE), (errmsg("must be superuser to get file information")))); filename = convert_and_check_filename(filename_t); if (stat(filename, &fst) < 0) ereport(ERROR, (errcode_for_file_access(), errmsg("could not stat file \"%s\": %m", filename))); /* * This record type had better match the output parameters declared for me * in pg_proc.h. */ tupdesc = CreateTemplateTupleDesc(6, false); TupleDescInitEntry(tupdesc, (AttrNumber) 1, "size", INT8OID, -1, 0); TupleDescInitEntry(tupdesc, (AttrNumber) 2, "access", TIMESTAMPTZOID, -1, 0); TupleDescInitEntry(tupdesc, (AttrNumber) 3, "modification", TIMESTAMPTZOID, -1, 0); TupleDescInitEntry(tupdesc, (AttrNumber) 4, "change", TIMESTAMPTZOID, -1, 0); TupleDescInitEntry(tupdesc, (AttrNumber) 5, "creation", TIMESTAMPTZOID, -1, 0); TupleDescInitEntry(tupdesc, (AttrNumber) 6, "isdir", BOOLOID, -1, 0); BlessTupleDesc(tupdesc); memset(isnull, false, sizeof(isnull)); values[0] = Int64GetDatum((int64) fst.st_size); values[1] = TimestampTzGetDatum(time_t_to_timestamptz(fst.st_atime)); values[2] = TimestampTzGetDatum(time_t_to_timestamptz(fst.st_mtime)); /* Unix has file status change time, while Win32 has creation time */ #if !defined(WIN32) && !defined(__CYGWIN__) values[3] = TimestampTzGetDatum(time_t_to_timestamptz(fst.st_ctime)); isnull[4] = true; #else isnull[3] = true; values[4] = TimestampTzGetDatum(time_t_to_timestamptz(fst.st_ctime)); #endif values[5] = BoolGetDatum(S_ISDIR(fst.st_mode)); tuple = heap_form_tuple(tupdesc, values, isnull); pfree(filename); PG_RETURN_DATUM(HeapTupleGetDatum(tuple)); }
Datum pg_stat_get_activity(PG_FUNCTION_ARGS) { FuncCallContext *funcctx; if (SRF_IS_FIRSTCALL()) { MemoryContext oldcontext; TupleDesc tupdesc; funcctx = SRF_FIRSTCALL_INIT(); oldcontext = MemoryContextSwitchTo(funcctx->multi_call_memory_ctx); tupdesc = CreateTemplateTupleDesc(13, false); TupleDescInitEntry(tupdesc, (AttrNumber) 1, "datid", OIDOID, -1, 0); TupleDescInitEntry(tupdesc, (AttrNumber) 2, "procpid", INT4OID, -1, 0); TupleDescInitEntry(tupdesc, (AttrNumber) 3, "usesysid", OIDOID, -1, 0); TupleDescInitEntry(tupdesc, (AttrNumber) 4, "application_name", TEXTOID, -1, 0); TupleDescInitEntry(tupdesc, (AttrNumber) 5, "current_query", TEXTOID, -1, 0); TupleDescInitEntry(tupdesc, (AttrNumber) 6, "waiting", BOOLOID, -1, 0); TupleDescInitEntry(tupdesc, (AttrNumber) 7, "act_start", TIMESTAMPTZOID, -1, 0); TupleDescInitEntry(tupdesc, (AttrNumber) 8, "query_start", TIMESTAMPTZOID, -1, 0); TupleDescInitEntry(tupdesc, (AttrNumber) 9, "backend_start", TIMESTAMPTZOID, -1, 0); TupleDescInitEntry(tupdesc, (AttrNumber) 10, "client_addr", INETOID, -1, 0); TupleDescInitEntry(tupdesc, (AttrNumber) 11, "client_port", INT4OID, -1, 0); TupleDescInitEntry(tupdesc, (AttrNumber) 12, "sess_id", INT4OID, -1, 0); /* GPDB */ TupleDescInitEntry(tupdesc, (AttrNumber) 13, "waiting_resource", BOOLOID, -1, 0); funcctx->tuple_desc = BlessTupleDesc(tupdesc); funcctx->user_fctx = palloc0(sizeof(int)); if (PG_ARGISNULL(0)) { /* Get all backends */ funcctx->max_calls = pgstat_fetch_stat_numbackends(); } else { /* * Get one backend - locate by pid. * * We lookup the backend early, so we can return zero rows if it * doesn't exist, instead of returning a single row full of NULLs. */ int pid = PG_GETARG_INT32(0); int i; int n = pgstat_fetch_stat_numbackends(); for (i = 1; i <= n; i++) { PgBackendStatus *be = pgstat_fetch_stat_beentry(i); if (be) { if (be->st_procpid == pid) { *(int *) (funcctx->user_fctx) = i; break; } } } if (*(int *) (funcctx->user_fctx) == 0) /* Pid not found, return zero rows */ funcctx->max_calls = 0; else funcctx->max_calls = 1; } MemoryContextSwitchTo(oldcontext); } /* stuff done on every call of the function */ funcctx = SRF_PERCALL_SETUP(); if (funcctx->call_cntr < funcctx->max_calls) { /* for each row */ Datum values[13]; bool nulls[13]; HeapTuple tuple; PgBackendStatus *beentry; SockAddr zero_clientaddr; MemSet(values, 0, sizeof(values)); MemSet(nulls, 0, sizeof(nulls)); if (*(int *) (funcctx->user_fctx) > 0) { /* Get specific pid slot */ beentry = pgstat_fetch_stat_beentry(*(int *) (funcctx->user_fctx)); } else { /* Get the next one in the list */ beentry = pgstat_fetch_stat_beentry(funcctx->call_cntr + 1); /* 1-based index */ } if (!beentry) { int i; for (i = 0; i < sizeof(nulls) / sizeof(nulls[0]); i++) nulls[i] = true; nulls[4] = false; values[4] = CStringGetTextDatum("<backend information not available>"); tuple = heap_form_tuple(funcctx->tuple_desc, values, nulls); SRF_RETURN_NEXT(funcctx, HeapTupleGetDatum(tuple)); } /* Values available to all callers */ values[0] = ObjectIdGetDatum(beentry->st_databaseid); values[1] = Int32GetDatum(beentry->st_procpid); values[2] = ObjectIdGetDatum(beentry->st_userid); if (beentry->st_appname) values[3] = CStringGetTextDatum(beentry->st_appname); else nulls[3] = true; /* Values only available to same user or superuser */ if (superuser() || beentry->st_userid == GetUserId()) { if (*(beentry->st_activity) == '\0') { values[4] = CStringGetTextDatum("<command string not enabled>"); } else { values[4] = CStringGetTextDatum(beentry->st_activity); } values[5] = BoolGetDatum(beentry->st_waiting); if (beentry->st_xact_start_timestamp != 0) values[6] = TimestampTzGetDatum(beentry->st_xact_start_timestamp); else nulls[6] = true; if (beentry->st_activity_start_timestamp != 0) values[7] = TimestampTzGetDatum(beentry->st_activity_start_timestamp); else nulls[7] = true; if (beentry->st_proc_start_timestamp != 0) values[8] = TimestampTzGetDatum(beentry->st_proc_start_timestamp); else nulls[8] = true; /* A zeroed client addr means we don't know */ memset(&zero_clientaddr, 0, sizeof(zero_clientaddr)); if (memcmp(&(beentry->st_clientaddr), &zero_clientaddr, sizeof(zero_clientaddr) == 0)) { nulls[9] = true; nulls[10] = true; } else { if (beentry->st_clientaddr.addr.ss_family == AF_INET #ifdef HAVE_IPV6 || beentry->st_clientaddr.addr.ss_family == AF_INET6 #endif ) { char remote_host[NI_MAXHOST]; char remote_port[NI_MAXSERV]; int ret; remote_host[0] = '\0'; remote_port[0] = '\0'; ret = pg_getnameinfo_all(&beentry->st_clientaddr.addr, beentry->st_clientaddr.salen, remote_host, sizeof(remote_host), remote_port, sizeof(remote_port), NI_NUMERICHOST | NI_NUMERICSERV); if (ret) { nulls[9] = true; nulls[10] = true; } else { clean_ipv6_addr(beentry->st_clientaddr.addr.ss_family, remote_host); values[9] = DirectFunctionCall1(inet_in, CStringGetDatum(remote_host)); values[10] = Int32GetDatum(atoi(remote_port)); } } else if (beentry->st_clientaddr.addr.ss_family == AF_UNIX) { /* * Unix sockets always reports NULL for host and -1 for * port, so it's possible to tell the difference to * connections we have no permissions to view, or with * errors. */ nulls[9] = true; values[10] = DatumGetInt32(-1); } else { /* Unknown address type, should never happen */ nulls[9] = true; nulls[10] = true; } } values[12] = BoolGetDatum(beentry->st_waiting_resource); } else { /* No permissions to view data about this session */ values[4] = CStringGetTextDatum("<insufficient privilege>"); nulls[5] = true; nulls[6] = true; nulls[7] = true; nulls[8] = true; nulls[9] = true; nulls[10] = true; nulls[12] = true; } values[11] = Int32GetDatum(beentry->st_session_id); /* GPDB */ tuple = heap_form_tuple(funcctx->tuple_desc, values, nulls); SRF_RETURN_NEXT(funcctx, HeapTupleGetDatum(tuple)); } else { /* nothing left */ SRF_RETURN_DONE(funcctx); } }
/* * BuildDescForRelation * * Given a relation schema (list of ColumnDef nodes), build a TupleDesc. * * Note: the default assumption is no OIDs; caller may modify the returned * TupleDesc if it wants OIDs. Also, tdtypeid will need to be filled in * later on. */ TupleDesc BuildDescForRelation(List *schema) { int natts; AttrNumber attnum; ListCell *l; TupleDesc desc; bool has_not_null; char *attname; Oid atttypid; int32 atttypmod; Oid attcollation; int attdim; /* * allocate a new tuple descriptor */ natts = list_length(schema); desc = CreateTemplateTupleDesc(natts, false); has_not_null = false; attnum = 0; foreach(l, schema) { ColumnDef *entry = lfirst(l); AclResult aclresult; /* * for each entry in the list, get the name and type information from * the list and have TupleDescInitEntry fill in the attribute * information we need. */ attnum++; attname = entry->colname; typenameTypeIdAndMod(NULL, entry->typeName, &atttypid, &atttypmod); aclresult = pg_type_aclcheck(atttypid, GetUserId(), ACL_USAGE); if (aclresult != ACLCHECK_OK) aclcheck_error_type(aclresult, atttypid); attcollation = GetColumnDefCollation(NULL, entry, atttypid); attdim = list_length(entry->typeName->arrayBounds); if (entry->typeName->setof) ereport(ERROR, (errcode(ERRCODE_INVALID_TABLE_DEFINITION), errmsg("column \"%s\" cannot be declared SETOF", attname))); TupleDescInitEntry(desc, attnum, attname, atttypid, atttypmod, attdim); /* Override TupleDescInitEntry's settings as requested */ TupleDescInitEntryCollation(desc, attnum, attcollation); if (entry->storage) desc->attrs[attnum - 1]->attstorage = entry->storage; /* Fill in additional stuff not handled by TupleDescInitEntry */ desc->attrs[attnum - 1]->attnotnull = entry->is_not_null; has_not_null |= entry->is_not_null; desc->attrs[attnum - 1]->attislocal = entry->is_local; desc->attrs[attnum - 1]->attinhcount = entry->inhcount; }
Datum gistrescan(PG_FUNCTION_ARGS) { IndexScanDesc scan = (IndexScanDesc) PG_GETARG_POINTER(0); ScanKey key = (ScanKey) PG_GETARG_POINTER(1); ScanKey orderbys = (ScanKey) PG_GETARG_POINTER(3); /* nkeys and norderbys arguments are ignored */ GISTScanOpaque so = (GISTScanOpaque) scan->opaque; bool first_time; int i; MemoryContext oldCxt; /* rescan an existing indexscan --- reset state */ /* * The first time through, we create the search queue in the scanCxt. * Subsequent times through, we create the queue in a separate queueCxt, * which is created on the second call and reset on later calls. Thus, in * the common case where a scan is only rescan'd once, we just put the * queue in scanCxt and don't pay the overhead of making a second memory * context. If we do rescan more than once, the first RBTree is just left * for dead until end of scan; this small wastage seems worth the savings * in the common case. */ if (so->queue == NULL) { /* first time through */ Assert(so->queueCxt == so->giststate->scanCxt); first_time = true; } else if (so->queueCxt == so->giststate->scanCxt) { /* second time through */ so->queueCxt = AllocSetContextCreate(so->giststate->scanCxt, "GiST queue context", ALLOCSET_DEFAULT_MINSIZE, ALLOCSET_DEFAULT_INITSIZE, ALLOCSET_DEFAULT_MAXSIZE); first_time = false; } else { /* third or later time through */ MemoryContextReset(so->queueCxt); first_time = false; } /* * If we're doing an index-only scan, on the first call, also initialize * a tuple descriptor to represent the returned index tuples and create a * memory context to hold them during the scan. */ if (scan->xs_want_itup && !scan->xs_itupdesc) { int natts; int attno; /* * The storage type of the index can be different from the original * datatype being indexed, so we cannot just grab the index's tuple * descriptor. Instead, construct a descriptor with the original data * types. */ natts = RelationGetNumberOfAttributes(scan->indexRelation); so->giststate->fetchTupdesc = CreateTemplateTupleDesc(natts, false); for (attno = 1; attno <= natts; attno++) { TupleDescInitEntry(so->giststate->fetchTupdesc, attno, NULL, scan->indexRelation->rd_opcintype[attno - 1], -1, 0); } scan->xs_itupdesc = so->giststate->fetchTupdesc; so->pageDataCxt = AllocSetContextCreate(so->giststate->scanCxt, "GiST page data context", ALLOCSET_DEFAULT_MINSIZE, ALLOCSET_DEFAULT_INITSIZE, ALLOCSET_DEFAULT_MAXSIZE); } /* create new, empty RBTree for search queue */ oldCxt = MemoryContextSwitchTo(so->queueCxt); so->queue = pairingheap_allocate(pairingheap_GISTSearchItem_cmp, scan); MemoryContextSwitchTo(oldCxt); so->firstCall = true; /* Update scan key, if a new one is given */ if (key && scan->numberOfKeys > 0) { void **fn_extras = NULL; /* * If this isn't the first time through, preserve the fn_extra * pointers, so that if the consistentFns are using them to cache * data, that data is not leaked across a rescan. */ if (!first_time) { fn_extras = (void **) palloc(scan->numberOfKeys * sizeof(void *)); for (i = 0; i < scan->numberOfKeys; i++) fn_extras[i] = scan->keyData[i].sk_func.fn_extra; } memmove(scan->keyData, key, scan->numberOfKeys * sizeof(ScanKeyData)); /* * Modify the scan key so that the Consistent method is called for all * comparisons. The original operator is passed to the Consistent * function in the form of its strategy number, which is available * from the sk_strategy field, and its subtype from the sk_subtype * field. * * Next, if any of keys is a NULL and that key is not marked with * SK_SEARCHNULL/SK_SEARCHNOTNULL then nothing can be found (ie, we * assume all indexable operators are strict). */ so->qual_ok = true; for (i = 0; i < scan->numberOfKeys; i++) { ScanKey skey = scan->keyData + i; fmgr_info_copy(&(skey->sk_func), &(so->giststate->consistentFn[skey->sk_attno - 1]), so->giststate->scanCxt); /* Restore prior fn_extra pointers, if not first time */ if (!first_time) skey->sk_func.fn_extra = fn_extras[i]; if (skey->sk_flags & SK_ISNULL) { if (!(skey->sk_flags & (SK_SEARCHNULL | SK_SEARCHNOTNULL))) so->qual_ok = false; } } if (!first_time) pfree(fn_extras); } /* Update order-by key, if a new one is given */ if (orderbys && scan->numberOfOrderBys > 0) { void **fn_extras = NULL; /* As above, preserve fn_extra if not first time through */ if (!first_time) { fn_extras = (void **) palloc(scan->numberOfOrderBys * sizeof(void *)); for (i = 0; i < scan->numberOfOrderBys; i++) fn_extras[i] = scan->orderByData[i].sk_func.fn_extra; } memmove(scan->orderByData, orderbys, scan->numberOfOrderBys * sizeof(ScanKeyData)); so->orderByTypes = (Oid *) palloc(scan->numberOfOrderBys * sizeof(Oid)); /* * Modify the order-by key so that the Distance method is called for * all comparisons. The original operator is passed to the Distance * function in the form of its strategy number, which is available * from the sk_strategy field, and its subtype from the sk_subtype * field. */ for (i = 0; i < scan->numberOfOrderBys; i++) { ScanKey skey = scan->orderByData + i; FmgrInfo *finfo = &(so->giststate->distanceFn[skey->sk_attno - 1]); /* Check we actually have a distance function ... */ if (!OidIsValid(finfo->fn_oid)) elog(ERROR, "missing support function %d for attribute %d of index \"%s\"", GIST_DISTANCE_PROC, skey->sk_attno, RelationGetRelationName(scan->indexRelation)); /* * Look up the datatype returned by the original ordering operator. * GiST always uses a float8 for the distance function, but the * ordering operator could be anything else. * * XXX: The distance function is only allowed to be lossy if the * ordering operator's result type is float4 or float8. Otherwise * we don't know how to return the distance to the executor. But * we cannot check that here, as we won't know if the distance * function is lossy until it returns *recheck = true for the * first time. */ so->orderByTypes[i] = get_func_rettype(skey->sk_func.fn_oid); fmgr_info_copy(&(skey->sk_func), finfo, so->giststate->scanCxt); /* Restore prior fn_extra pointers, if not first time */ if (!first_time) skey->sk_func.fn_extra = fn_extras[i]; } if (!first_time) pfree(fn_extras); } PG_RETURN_VOID(); }
/* * TypeGetTupleDesc * * Given a type Oid, build a TupleDesc. (In most cases you should be * using get_call_result_type or one of its siblings instead of this * routine, so that you can handle OUT parameters, RECORD result type, * and polymorphic results.) * * If the type is composite, *and* a colaliases List is provided, *and* * the List is of natts length, use the aliases instead of the relation * attnames. (NB: this usage is deprecated since it may result in * creation of unnecessary transient record types.) * * If the type is a base type, a single item alias List is required. */ TupleDesc TypeGetTupleDesc(Oid typeoid, List *colaliases) { TypeFuncClass functypclass = get_type_func_class(typeoid); TupleDesc tupdesc = NULL; /* * Build a suitable tupledesc representing the output rows */ if (functypclass == TYPEFUNC_COMPOSITE) { /* Composite data type, e.g. a table's row type */ tupdesc = lookup_rowtype_tupdesc_copy(typeoid, -1); if (colaliases != NIL) { int natts = tupdesc->natts; int varattno; /* does the list length match the number of attributes? */ if (list_length(colaliases) != natts) ereport(ERROR, (errcode(ERRCODE_DATATYPE_MISMATCH), errmsg("number of aliases does not match number of columns"))); /* OK, use the aliases instead */ for (varattno = 0; varattno < natts; varattno++) { char *label = strVal(list_nth(colaliases, varattno)); if (label != NULL) namestrcpy(&(tupdesc->attrs[varattno]->attname), label); } /* The tuple type is now an anonymous record type */ tupdesc->tdtypeid = RECORDOID; tupdesc->tdtypmod = -1; } } else if (functypclass == TYPEFUNC_SCALAR) { /* Base data type, i.e. scalar */ char *attname; /* the alias list is required for base types */ if (colaliases == NIL) ereport(ERROR, (errcode(ERRCODE_DATATYPE_MISMATCH), errmsg("no column alias was provided"))); /* the alias list length must be 1 */ if (list_length(colaliases) != 1) ereport(ERROR, (errcode(ERRCODE_DATATYPE_MISMATCH), errmsg("number of aliases does not match number of columns"))); /* OK, get the column alias */ attname = strVal(linitial(colaliases)); tupdesc = CreateTemplateTupleDesc(1, false); TupleDescInitEntry(tupdesc, (AttrNumber) 1, attname, typeoid, -1, 0); } else if (functypclass == TYPEFUNC_RECORD) { /* XXX can't support this because typmod wasn't passed in ... */ ereport(ERROR, (errcode(ERRCODE_DATATYPE_MISMATCH), errmsg("could not determine row description for function returning record"))); } else { /* crummy error message, but parser should have caught this */ elog(ERROR, "function in FROM has unsupported return type"); } return tupdesc; }
/* ---------------------------------------------------------------- * ExecInitFunctionScan * ---------------------------------------------------------------- */ FunctionScanState * ExecInitFunctionScan(FunctionScan *node, EState *estate, int eflags) { FunctionScanState *scanstate; RangeTblEntry *rte; Oid funcrettype; TypeFuncClass functypclass; TupleDesc tupdesc = NULL; /* * FunctionScan should not have any children. */ Assert(outerPlan(node) == NULL); Assert(innerPlan(node) == NULL); /* * create new ScanState for node */ scanstate = makeNode(FunctionScanState); scanstate->ss.ps.plan = (Plan *) node; scanstate->ss.ps.state = estate; /* * Miscellaneous initialization * * create expression context for node */ ExecAssignExprContext(estate, &scanstate->ss.ps); #define FUNCTIONSCAN_NSLOTS 2 /* * tuple table initialization */ ExecInitResultTupleSlot(estate, &scanstate->ss.ps); ExecInitScanTupleSlot(estate, &scanstate->ss); /* * initialize child expressions */ scanstate->ss.ps.targetlist = (List *) ExecInitExpr((Expr *) node->scan.plan.targetlist, (PlanState *) scanstate); scanstate->ss.ps.qual = (List *) ExecInitExpr((Expr *) node->scan.plan.qual, (PlanState *) scanstate); /* Check if targetlist or qual contains a var node referencing the ctid column */ scanstate->cdb_want_ctid = contain_ctid_var_reference(&node->scan); ItemPointerSet(&scanstate->cdb_fake_ctid, 0, 0); ItemPointerSet(&scanstate->cdb_mark_ctid, 0, 0); /* * get info about function */ rte = rt_fetch(node->scan.scanrelid, estate->es_range_table); Assert(rte->rtekind == RTE_FUNCTION); /* * Now determine if the function returns a simple or composite type, and * build an appropriate tupdesc. */ functypclass = get_expr_result_type(rte->funcexpr, &funcrettype, &tupdesc); if (functypclass == TYPEFUNC_COMPOSITE) { /* Composite data type, e.g. a table's row type */ Assert(tupdesc); /* Must copy it out of typcache for safety */ tupdesc = CreateTupleDescCopy(tupdesc); } else if (functypclass == TYPEFUNC_SCALAR) { /* Base data type, i.e. scalar */ char *attname = strVal(linitial(rte->eref->colnames)); tupdesc = CreateTemplateTupleDesc(1, false); TupleDescInitEntry(tupdesc, (AttrNumber) 1, attname, funcrettype, -1, 0); } else if (functypclass == TYPEFUNC_RECORD) { tupdesc = BuildDescFromLists(rte->eref->colnames, rte->funccoltypes, rte->funccoltypmods); } else { /* crummy error message, but parser should have caught this */ elog(ERROR, "function in FROM has unsupported return type"); } /* * For RECORD results, make sure a typmod has been assigned. (The * function should do this for itself, but let's cover things in case it * doesn't.) */ BlessTupleDesc(tupdesc); scanstate->tupdesc = tupdesc; ExecAssignScanType(&scanstate->ss, tupdesc); /* * Other node-specific setup */ scanstate->tuplestorestate = NULL; scanstate->funcexpr = ExecInitExpr((Expr *) rte->funcexpr, (PlanState *) scanstate); /* * Initialize result tuple type and projection info. */ ExecAssignResultTypeFromTL(&scanstate->ss.ps); ExecAssignScanProjectionInfo(&scanstate->ss); initGpmonPktForFunctionScan((Plan *)node, &scanstate->ss.ps.gpmon_pkt, estate); if (gp_resqueue_memory_policy != RESQUEUE_MEMORY_POLICY_NONE) { SPI_ReserveMemory(((Plan *)node)->operatorMemKB * 1024L); } return scanstate; }
/* * Given the result tuple descriptor for a function with OUT parameters, * replace any polymorphic columns (ANYELEMENT etc) with correct data types * deduced from the input arguments. Returns TRUE if able to deduce all types, * FALSE if not. */ static bool resolve_polymorphic_tupdesc(TupleDesc tupdesc, oidvector *declared_args, Node *call_expr) { int natts = tupdesc->natts; int nargs = declared_args->dim1; bool have_anyelement_result = false; bool have_anyarray_result = false; bool have_anynonarray = false; bool have_anyenum = false; Oid anyelement_type = InvalidOid; Oid anyarray_type = InvalidOid; int i; /* See if there are any polymorphic outputs; quick out if not */ for (i = 0; i < natts; i++) { switch (tupdesc->attrs[i]->atttypid) { case ANYELEMENTOID: have_anyelement_result = true; break; case ANYARRAYOID: have_anyarray_result = true; break; case ANYNONARRAYOID: have_anyelement_result = true; have_anynonarray = true; break; case ANYENUMOID: have_anyelement_result = true; have_anyenum = true; break; default: break; } } if (!have_anyelement_result && !have_anyarray_result) return true; /* * Otherwise, extract actual datatype(s) from input arguments. (We assume * the parser already validated consistency of the arguments.) */ if (!call_expr) return false; /* no hope */ for (i = 0; i < nargs; i++) { switch (declared_args->values[i]) { case ANYELEMENTOID: case ANYNONARRAYOID: case ANYENUMOID: if (!OidIsValid(anyelement_type)) anyelement_type = get_call_expr_argtype(call_expr, i); break; case ANYARRAYOID: if (!OidIsValid(anyarray_type)) anyarray_type = get_call_expr_argtype(call_expr, i); break; default: break; } } /* If nothing found, parser messed up */ if (!OidIsValid(anyelement_type) && !OidIsValid(anyarray_type)) return false; /* If needed, deduce one polymorphic type from the other */ if (have_anyelement_result && !OidIsValid(anyelement_type)) anyelement_type = resolve_generic_type(ANYELEMENTOID, anyarray_type, ANYARRAYOID); if (have_anyarray_result && !OidIsValid(anyarray_type)) anyarray_type = resolve_generic_type(ANYARRAYOID, anyelement_type, ANYELEMENTOID); /* Enforce ANYNONARRAY if needed */ if (have_anynonarray && type_is_array(anyelement_type)) return false; /* Enforce ANYENUM if needed */ if (have_anyenum && !type_is_enum(anyelement_type)) return false; /* And finally replace the tuple column types as needed */ for (i = 0; i < natts; i++) { switch (tupdesc->attrs[i]->atttypid) { case ANYELEMENTOID: case ANYNONARRAYOID: case ANYENUMOID: TupleDescInitEntry(tupdesc, i + 1, NameStr(tupdesc->attrs[i]->attname), anyelement_type, -1, 0); break; case ANYARRAYOID: TupleDescInitEntry(tupdesc, i + 1, NameStr(tupdesc->attrs[i]->attname), anyarray_type, -1, 0); break; default: break; } } return true; }
/* * create_toast_table --- internal workhorse * * rel is already opened and exclusive-locked * toastOid and toastIndexOid are normally InvalidOid, but during * bootstrap they can be nonzero to specify hand-assigned OIDs */ static bool create_toast_table(Relation rel, Oid toastOid, Oid toastIndexOid, bool is_part_child) { Oid relOid = RelationGetRelid(rel); HeapTuple reltup; TupleDesc tupdesc; bool shared_relation; Relation class_rel; Oid toast_relid; Oid toast_idxid; Oid namespaceid; char toast_relname[NAMEDATALEN]; char toast_idxname[NAMEDATALEN]; IndexInfo *indexInfo; Oid classObjectId[2]; int16 coloptions[2]; ObjectAddress baseobject, toastobject; /* * Is it already toasted? */ if (rel->rd_rel->reltoastrelid != InvalidOid) return false; /* * Check to see whether the table actually needs a TOAST table. */ if (!RelationNeedsToastTable(rel)) return false; /* * Toast table is shared if and only if its parent is. * * We cannot allow toasting a shared relation after initdb (because * there's no way to mark it toasted in other databases' pg_class). */ shared_relation = rel->rd_rel->relisshared; if (shared_relation && !IsBootstrapProcessingMode()) ereport(ERROR, (errcode(ERRCODE_OBJECT_NOT_IN_PREREQUISITE_STATE), errmsg("shared tables cannot be toasted after initdb"))); /* * Create the toast table and its index */ snprintf(toast_relname, sizeof(toast_relname), "pg_toast_%u", relOid); snprintf(toast_idxname, sizeof(toast_idxname), "pg_toast_%u_index", relOid); /* this is pretty painful... need a tuple descriptor */ tupdesc = CreateTemplateTupleDesc(3, false); TupleDescInitEntry(tupdesc, (AttrNumber) 1, "chunk_id", OIDOID, -1, 0); TupleDescInitEntry(tupdesc, (AttrNumber) 2, "chunk_seq", INT4OID, -1, 0); TupleDescInitEntry(tupdesc, (AttrNumber) 3, "chunk_data", BYTEAOID, -1, 0); /* * Ensure that the toast table doesn't itself get toasted, or we'll be * toast :-(. This is essential for chunk_data because type bytea is * toastable; hit the other two just to be sure. */ tupdesc->attrs[0]->attstorage = 'p'; tupdesc->attrs[1]->attstorage = 'p'; tupdesc->attrs[2]->attstorage = 'p'; /* * Toast tables for regular relations go in pg_toast; those for temp * relations go into the per-backend temp-toast-table namespace. */ if (rel->rd_istemp) namespaceid = GetTempToastNamespace(); else namespaceid = PG_TOAST_NAMESPACE; /* * XXX would it make sense to apply the master's reloptions to the toast * table? Or maybe some toast-specific reloptions? */ toast_relid = heap_create_with_catalog(toast_relname, namespaceid, rel->rd_rel->reltablespace, toastOid, rel->rd_rel->relowner, tupdesc, /* relam */ InvalidOid, RELKIND_TOASTVALUE, RELSTORAGE_HEAP, shared_relation, true, /* bufferPoolBulkLoad */ false, 0, ONCOMMIT_NOOP, NULL, /* CDB POLICY */ (Datum) 0, true, /* valid_opts */ false, /* persistentTid */ NULL, /* persistentSerialNum */ NULL); /* make the toast relation visible, else index creation will fail */ CommandCounterIncrement(); /* * Create unique index on chunk_id, chunk_seq. * * NOTE: the normal TOAST access routines could actually function with a * single-column index on chunk_id only. However, the slice access * routines use both columns for faster access to an individual chunk. In * addition, we want it to be unique as a check against the possibility of * duplicate TOAST chunk OIDs. The index might also be a little more * efficient this way, since btree isn't all that happy with large numbers * of equal keys. */ indexInfo = makeNode(IndexInfo); indexInfo->ii_NumIndexAttrs = 2; indexInfo->ii_KeyAttrNumbers[0] = 1; indexInfo->ii_KeyAttrNumbers[1] = 2; indexInfo->ii_Expressions = NIL; indexInfo->ii_ExpressionsState = NIL; indexInfo->ii_Predicate = NIL; indexInfo->ii_PredicateState = NIL; indexInfo->ii_Unique = true; indexInfo->ii_ReadyForInserts = true; indexInfo->ii_Concurrent = false; indexInfo->ii_BrokenHotChain = false; classObjectId[0] = OID_BTREE_OPS_OID; classObjectId[1] = INT4_BTREE_OPS_OID; coloptions[0] = 0; coloptions[1] = 0; toast_idxid = index_create(toast_relid, toast_idxname, toastIndexOid, indexInfo, BTREE_AM_OID, rel->rd_rel->reltablespace, classObjectId, coloptions, (Datum) 0, true, false, true, false, false, NULL); /* * If this is a partitioned child, we can unlock since the master is * already locked. */ if (is_part_child) { UnlockRelationOid(toast_relid, ShareLock); UnlockRelationOid(toast_idxid, AccessExclusiveLock); } /* * Store the toast table's OID in the parent relation's pg_class row */ class_rel = heap_open(RelationRelationId, RowExclusiveLock); reltup = SearchSysCacheCopy(RELOID, ObjectIdGetDatum(relOid), 0, 0, 0); if (!HeapTupleIsValid(reltup)) elog(ERROR, "cache lookup failed for relation %u", relOid); ((Form_pg_class) GETSTRUCT(reltup))->reltoastrelid = toast_relid; if (!IsBootstrapProcessingMode()) { /* normal case, use a transactional update */ simple_heap_update(class_rel, &reltup->t_self, reltup); /* Keep catalog indexes current */ CatalogUpdateIndexes(class_rel, reltup); } else { /* While bootstrapping, we cannot UPDATE, so overwrite in-place */ heap_inplace_update(class_rel, reltup); } heap_freetuple(reltup); heap_close(class_rel, RowExclusiveLock); /* * Register dependency from the toast table to the master, so that the * toast table will be deleted if the master is. Skip this in bootstrap * mode. */ if (!IsBootstrapProcessingMode()) { baseobject.classId = RelationRelationId; baseobject.objectId = relOid; baseobject.objectSubId = 0; toastobject.classId = RelationRelationId; toastobject.objectId = toast_relid; toastobject.objectSubId = 0; recordDependencyOn(&toastobject, &baseobject, DEPENDENCY_INTERNAL); } /* * Make changes visible */ CommandCounterIncrement(); return true; }
Datum pg_control_init(PG_FUNCTION_ARGS) { Datum values[12]; bool nulls[12]; TupleDesc tupdesc; HeapTuple htup; ControlFileData *ControlFile; bool crc_ok; /* * Construct a tuple descriptor for the result row. This must match this * function's pg_proc entry! */ tupdesc = CreateTemplateTupleDesc(12); TupleDescInitEntry(tupdesc, (AttrNumber) 1, "max_data_alignment", INT4OID, -1, 0); TupleDescInitEntry(tupdesc, (AttrNumber) 2, "database_block_size", INT4OID, -1, 0); TupleDescInitEntry(tupdesc, (AttrNumber) 3, "blocks_per_segment", INT4OID, -1, 0); TupleDescInitEntry(tupdesc, (AttrNumber) 4, "wal_block_size", INT4OID, -1, 0); TupleDescInitEntry(tupdesc, (AttrNumber) 5, "bytes_per_wal_segment", INT4OID, -1, 0); TupleDescInitEntry(tupdesc, (AttrNumber) 6, "max_identifier_length", INT4OID, -1, 0); TupleDescInitEntry(tupdesc, (AttrNumber) 7, "max_index_columns", INT4OID, -1, 0); TupleDescInitEntry(tupdesc, (AttrNumber) 8, "max_toast_chunk_size", INT4OID, -1, 0); TupleDescInitEntry(tupdesc, (AttrNumber) 9, "large_object_chunk_size", INT4OID, -1, 0); TupleDescInitEntry(tupdesc, (AttrNumber) 10, "float4_pass_by_value", BOOLOID, -1, 0); TupleDescInitEntry(tupdesc, (AttrNumber) 11, "float8_pass_by_value", BOOLOID, -1, 0); TupleDescInitEntry(tupdesc, (AttrNumber) 12, "data_page_checksum_version", INT4OID, -1, 0); tupdesc = BlessTupleDesc(tupdesc); /* read the control file */ ControlFile = get_controlfile(DataDir, NULL, &crc_ok); if (!crc_ok) ereport(ERROR, (errmsg("calculated CRC checksum does not match value stored in file"))); values[0] = Int32GetDatum(ControlFile->maxAlign); nulls[0] = false; values[1] = Int32GetDatum(ControlFile->blcksz); nulls[1] = false; values[2] = Int32GetDatum(ControlFile->relseg_size); nulls[2] = false; values[3] = Int32GetDatum(ControlFile->xlog_blcksz); nulls[3] = false; values[4] = Int32GetDatum(ControlFile->xlog_seg_size); nulls[4] = false; values[5] = Int32GetDatum(ControlFile->nameDataLen); nulls[5] = false; values[6] = Int32GetDatum(ControlFile->indexMaxKeys); nulls[6] = false; values[7] = Int32GetDatum(ControlFile->toast_max_chunk_size); nulls[7] = false; values[8] = Int32GetDatum(ControlFile->loblksize); nulls[8] = false; values[9] = BoolGetDatum(ControlFile->float4ByVal); nulls[9] = false; values[10] = BoolGetDatum(ControlFile->float8ByVal); nulls[10] = false; values[11] = Int32GetDatum(ControlFile->data_checksum_version); nulls[11] = false; htup = heap_form_tuple(tupdesc, values, nulls); PG_RETURN_DATUM(HeapTupleGetDatum(htup)); }
Datum pg_control_checkpoint(PG_FUNCTION_ARGS) { Datum values[19]; bool nulls[19]; TupleDesc tupdesc; HeapTuple htup; ControlFileData *ControlFile; XLogSegNo segno; char xlogfilename[MAXFNAMELEN]; bool crc_ok; /* * Construct a tuple descriptor for the result row. This must match this * function's pg_proc entry! */ tupdesc = CreateTemplateTupleDesc(18); TupleDescInitEntry(tupdesc, (AttrNumber) 1, "checkpoint_lsn", LSNOID, -1, 0); TupleDescInitEntry(tupdesc, (AttrNumber) 2, "redo_lsn", LSNOID, -1, 0); TupleDescInitEntry(tupdesc, (AttrNumber) 3, "redo_wal_file", TEXTOID, -1, 0); TupleDescInitEntry(tupdesc, (AttrNumber) 4, "timeline_id", INT4OID, -1, 0); TupleDescInitEntry(tupdesc, (AttrNumber) 5, "prev_timeline_id", INT4OID, -1, 0); TupleDescInitEntry(tupdesc, (AttrNumber) 6, "full_page_writes", BOOLOID, -1, 0); TupleDescInitEntry(tupdesc, (AttrNumber) 7, "next_xid", TEXTOID, -1, 0); TupleDescInitEntry(tupdesc, (AttrNumber) 8, "next_oid", OIDOID, -1, 0); TupleDescInitEntry(tupdesc, (AttrNumber) 9, "next_multixact_id", XIDOID, -1, 0); TupleDescInitEntry(tupdesc, (AttrNumber) 10, "next_multi_offset", XIDOID, -1, 0); TupleDescInitEntry(tupdesc, (AttrNumber) 11, "oldest_xid", XIDOID, -1, 0); TupleDescInitEntry(tupdesc, (AttrNumber) 12, "oldest_xid_dbid", OIDOID, -1, 0); TupleDescInitEntry(tupdesc, (AttrNumber) 13, "oldest_active_xid", XIDOID, -1, 0); TupleDescInitEntry(tupdesc, (AttrNumber) 14, "oldest_multi_xid", XIDOID, -1, 0); TupleDescInitEntry(tupdesc, (AttrNumber) 15, "oldest_multi_dbid", OIDOID, -1, 0); TupleDescInitEntry(tupdesc, (AttrNumber) 16, "oldest_commit_ts_xid", XIDOID, -1, 0); TupleDescInitEntry(tupdesc, (AttrNumber) 17, "newest_commit_ts_xid", XIDOID, -1, 0); TupleDescInitEntry(tupdesc, (AttrNumber) 18, "checkpoint_time", TIMESTAMPTZOID, -1, 0); tupdesc = BlessTupleDesc(tupdesc); /* Read the control file. */ ControlFile = get_controlfile(DataDir, NULL, &crc_ok); if (!crc_ok) ereport(ERROR, (errmsg("calculated CRC checksum does not match value stored in file"))); /* * Calculate name of the WAL file containing the latest checkpoint's REDO * start point. */ XLByteToSeg(ControlFile->checkPointCopy.redo, segno, wal_segment_size); XLogFileName(xlogfilename, ControlFile->checkPointCopy.ThisTimeLineID, segno, wal_segment_size); /* Populate the values and null arrays */ values[0] = LSNGetDatum(ControlFile->checkPoint); nulls[0] = false; values[1] = LSNGetDatum(ControlFile->checkPointCopy.redo); nulls[1] = false; values[2] = CStringGetTextDatum(xlogfilename); nulls[2] = false; values[3] = Int32GetDatum(ControlFile->checkPointCopy.ThisTimeLineID); nulls[3] = false; values[4] = Int32GetDatum(ControlFile->checkPointCopy.PrevTimeLineID); nulls[4] = false; values[5] = BoolGetDatum(ControlFile->checkPointCopy.fullPageWrites); nulls[5] = false; values[6] = CStringGetTextDatum(psprintf("%u:%u", ControlFile->checkPointCopy.nextXidEpoch, ControlFile->checkPointCopy.nextXid)); nulls[6] = false; values[7] = ObjectIdGetDatum(ControlFile->checkPointCopy.nextOid); nulls[7] = false; values[8] = TransactionIdGetDatum(ControlFile->checkPointCopy.nextMulti); nulls[8] = false; values[9] = TransactionIdGetDatum(ControlFile->checkPointCopy.nextMultiOffset); nulls[9] = false; values[10] = TransactionIdGetDatum(ControlFile->checkPointCopy.oldestXid); nulls[10] = false; values[11] = ObjectIdGetDatum(ControlFile->checkPointCopy.oldestXidDB); nulls[11] = false; values[12] = TransactionIdGetDatum(ControlFile->checkPointCopy.oldestActiveXid); nulls[12] = false; values[13] = TransactionIdGetDatum(ControlFile->checkPointCopy.oldestMulti); nulls[13] = false; values[14] = ObjectIdGetDatum(ControlFile->checkPointCopy.oldestMultiDB); nulls[14] = false; values[15] = TransactionIdGetDatum(ControlFile->checkPointCopy.oldestCommitTsXid); nulls[15] = false; values[16] = TransactionIdGetDatum(ControlFile->checkPointCopy.newestCommitTsXid); nulls[16] = false; values[17] = TimestampTzGetDatum( time_t_to_timestamptz(ControlFile->checkPointCopy.time)); nulls[17] = false; htup = heap_form_tuple(tupdesc, values, nulls); PG_RETURN_DATUM(HeapTupleGetDatum(htup)); }
Datum pg_stat_get_archiver(PG_FUNCTION_ARGS) { TupleDesc tupdesc; Datum values[7]; bool nulls[7]; PgStat_ArchiverStats *archiver_stats; /* Initialise values and NULL flags arrays */ MemSet(values, 0, sizeof(values)); MemSet(nulls, 0, sizeof(nulls)); /* Initialise attributes information in the tuple descriptor */ tupdesc = CreateTemplateTupleDesc(7, false); TupleDescInitEntry(tupdesc, (AttrNumber) 1, "archived_count", INT8OID, -1, 0); TupleDescInitEntry(tupdesc, (AttrNumber) 2, "last_archived_wal", TEXTOID, -1, 0); TupleDescInitEntry(tupdesc, (AttrNumber) 3, "last_archived_time", TIMESTAMPTZOID, -1, 0); TupleDescInitEntry(tupdesc, (AttrNumber) 4, "failed_count", INT8OID, -1, 0); TupleDescInitEntry(tupdesc, (AttrNumber) 5, "last_failed_wal", TEXTOID, -1, 0); TupleDescInitEntry(tupdesc, (AttrNumber) 6, "last_failed_time", TIMESTAMPTZOID, -1, 0); TupleDescInitEntry(tupdesc, (AttrNumber) 7, "stats_reset", TIMESTAMPTZOID, -1, 0); BlessTupleDesc(tupdesc); /* Get statistics about the archiver process */ archiver_stats = pgstat_fetch_stat_archiver(); /* Fill values and NULLs */ values[0] = Int64GetDatum(archiver_stats->archived_count); if (*(archiver_stats->last_archived_wal) == '\0') nulls[1] = true; else values[1] = CStringGetTextDatum(archiver_stats->last_archived_wal); if (archiver_stats->last_archived_timestamp == 0) nulls[2] = true; else values[2] = TimestampTzGetDatum(archiver_stats->last_archived_timestamp); values[3] = Int64GetDatum(archiver_stats->failed_count); if (*(archiver_stats->last_failed_wal) == '\0') nulls[4] = true; else values[4] = CStringGetTextDatum(archiver_stats->last_failed_wal); if (archiver_stats->last_failed_timestamp == 0) nulls[5] = true; else values[5] = TimestampTzGetDatum(archiver_stats->last_failed_timestamp); if (archiver_stats->stat_reset_timestamp == 0) nulls[6] = true; else values[6] = TimestampTzGetDatum(archiver_stats->stat_reset_timestamp); /* Returns the record as Datum */ PG_RETURN_DATUM(HeapTupleGetDatum( heap_form_tuple(tupdesc, values, nulls))); }
void AlterTableCreateAoVisimapTable(Oid relOid, bool is_part_child) { Relation rel; IndexInfo *indexInfo; TupleDesc tupdesc; Oid classObjectId[2]; int16 coloptions[2]; elogif(Debug_appendonly_print_visimap, LOG, "Create visimap for relation %d", relOid); /* * Grab an exclusive lock on the target table, which we will NOT release * until end of transaction. (This is probably redundant in all present * uses...) */ if (is_part_child) rel = heap_open(relOid, NoLock); else rel = heap_open(relOid, AccessExclusiveLock); if (!RelationIsAoRows(rel) && !RelationIsAoCols(rel)) { heap_close(rel, NoLock); return; } /* Create a tuple descriptor */ tupdesc = CreateTemplateTupleDesc(Natts_pg_aovisimap, false); TupleDescInitEntry(tupdesc, (AttrNumber) 1, "segno", INT4OID, -1, 0); TupleDescInitEntry(tupdesc, (AttrNumber) 2, "first_row_no", INT8OID, -1, 0); TupleDescInitEntry(tupdesc, (AttrNumber) 3, "visimap", BYTEAOID, -1, 0); /* * We don't want any toast columns here. */ tupdesc->attrs[0]->attstorage = 'p'; tupdesc->attrs[1]->attstorage = 'p'; tupdesc->attrs[2]->attstorage = 'p'; /* * Create index on segno, first_row_no. */ indexInfo = makeNode(IndexInfo); indexInfo->ii_NumIndexAttrs = 2; indexInfo->ii_KeyAttrNumbers[0] = 1; indexInfo->ii_KeyAttrNumbers[1] = 2; indexInfo->ii_Expressions = NIL; indexInfo->ii_ExpressionsState = NIL; indexInfo->ii_Predicate = NIL; indexInfo->ii_PredicateState = NIL; indexInfo->ii_Unique = true; indexInfo->ii_Concurrent = false; classObjectId[0] = INT4_BTREE_OPS_OID; classObjectId[1] = INT8_BTREE_OPS_OID; coloptions[0] = 0; coloptions[1] = 0; (void) CreateAOAuxiliaryTable(rel, "pg_aovisimap", RELKIND_AOVISIMAP, tupdesc, indexInfo, classObjectId, coloptions); heap_close(rel, NoLock); }
/* * Get status of resource groups in key-value style */ Datum pg_resgroup_get_status_kv(PG_FUNCTION_ARGS) { FuncCallContext *funcctx; StringInfoData str; bool do_dump; do_dump = (strncmp(text_to_cstring(PG_GETARG_TEXT_P(0)), "dump", 4) == 0); if (do_dump) { /* Only super user can call this function with para=dump. */ if (!superuser()) { ereport(ERROR, (errcode(ERRCODE_INSUFFICIENT_PRIVILEGE), errmsg("only superusers can call this function"))); } initStringInfo(&str); /* dump info in QD and collect info from QEs to form str.*/ dumpResGroupInfo(&str); } if (SRF_IS_FIRSTCALL()) { MemoryContext oldcontext; TupleDesc tupdesc; int nattr = 3; funcctx = SRF_FIRSTCALL_INIT(); oldcontext = MemoryContextSwitchTo(funcctx->multi_call_memory_ctx); tupdesc = CreateTemplateTupleDesc(nattr, false); TupleDescInitEntry(tupdesc, (AttrNumber) 1, "groupid", OIDOID, -1, 0); TupleDescInitEntry(tupdesc, (AttrNumber) 2, "prop", TEXTOID, -1, 0); TupleDescInitEntry(tupdesc, (AttrNumber) 3, "value", TEXTOID, -1, 0); funcctx->tuple_desc = BlessTupleDesc(tupdesc); funcctx->max_calls = do_dump ? 1 : 0; MemoryContextSwitchTo(oldcontext); } /* stuff done on every call of the function */ funcctx = SRF_PERCALL_SETUP(); if (funcctx->call_cntr < funcctx->max_calls) { if (do_dump) { Datum values[3]; bool nulls[3]; HeapTuple tuple; MemSet(values, 0, sizeof(values)); MemSet(nulls, 0, sizeof(nulls)); nulls[0] = nulls[1] = true; values[2] = CStringGetTextDatum(str.data); tuple = heap_form_tuple(funcctx->tuple_desc, values, nulls); SRF_RETURN_NEXT(funcctx, HeapTupleGetDatum(tuple)); } else { SRF_RETURN_DONE(funcctx); } } else { /* nothing left */ SRF_RETURN_DONE(funcctx); } }
/* ---------------------------------------------------------------- * ExecInitFunctionScan * ---------------------------------------------------------------- */ FunctionScanState * ExecInitFunctionScan(FunctionScan *node, EState *estate, int eflags) { FunctionScanState *scanstate; Oid funcrettype; TypeFuncClass functypclass; TupleDesc tupdesc = NULL; /* check for unsupported flags */ Assert(!(eflags & EXEC_FLAG_MARK)); /* * FunctionScan should not have any children. */ Assert(outerPlan(node) == NULL); Assert(innerPlan(node) == NULL); /* * create new ScanState for node */ scanstate = makeNode(FunctionScanState); scanstate->ss.ps.plan = (Plan *) node; scanstate->ss.ps.state = estate; scanstate->eflags = eflags; /* * Miscellaneous initialization * * create expression context for node */ ExecAssignExprContext(estate, &scanstate->ss.ps); /* * tuple table initialization */ ExecInitResultTupleSlot(estate, &scanstate->ss.ps); ExecInitScanTupleSlot(estate, &scanstate->ss); /* * initialize child expressions */ scanstate->ss.ps.targetlist = (List *) ExecInitExpr((Expr *) node->scan.plan.targetlist, (PlanState *) scanstate); scanstate->ss.ps.qual = (List *) ExecInitExpr((Expr *) node->scan.plan.qual, (PlanState *) scanstate); /* * Now determine if the function returns a simple or composite type, and * build an appropriate tupdesc. */ functypclass = get_expr_result_type(node->funcexpr, &funcrettype, &tupdesc); if (functypclass == TYPEFUNC_COMPOSITE) { /* Composite data type, e.g. a table's row type */ Assert(tupdesc); /* Must copy it out of typcache for safety */ tupdesc = CreateTupleDescCopy(tupdesc); } else if (functypclass == TYPEFUNC_SCALAR) { /* Base data type, i.e. scalar */ char *attname = strVal(linitial(node->funccolnames)); tupdesc = CreateTemplateTupleDesc(1, false); TupleDescInitEntry(tupdesc, (AttrNumber) 1, attname, funcrettype, -1, 0); } else if (functypclass == TYPEFUNC_RECORD) { tupdesc = BuildDescFromLists(node->funccolnames, node->funccoltypes, node->funccoltypmods); } else { /* crummy error message, but parser should have caught this */ elog(ERROR, "function in FROM has unsupported return type"); } /* * For RECORD results, make sure a typmod has been assigned. (The * function should do this for itself, but let's cover things in case it * doesn't.) */ BlessTupleDesc(tupdesc); scanstate->tupdesc = tupdesc; ExecAssignScanType(&scanstate->ss, tupdesc); /* * Other node-specific setup */ scanstate->tuplestorestate = NULL; scanstate->funcexpr = ExecInitExpr((Expr *) node->funcexpr, (PlanState *) scanstate); scanstate->ss.ps.ps_TupFromTlist = false; /* * Initialize result tuple type and projection info. */ ExecAssignResultTypeFromTL(&scanstate->ss.ps); ExecAssignScanProjectionInfo(&scanstate->ss); return scanstate; }
void AlterTableCreateAoBlkdirTable(Oid relOid, bool is_part_child) { Relation rel; TupleDesc tupdesc; IndexInfo *indexInfo; Oid classObjectId[3]; int16 coloptions[3]; /* * Grab an exclusive lock on the target table, which we will NOT release * until end of transaction. (This is probably redundant in all present * uses...) */ if (is_part_child) rel = heap_open(relOid, NoLock); else rel = heap_open(relOid, AccessExclusiveLock); if (!RelationIsAoRows(rel) && !RelationIsAoCols(rel)) { heap_close(rel, NoLock); return; } /* Create a tuple descriptor */ tupdesc = CreateTemplateTupleDesc(4, false); TupleDescInitEntry(tupdesc, (AttrNumber) 1, "segno", INT4OID, -1, 0); TupleDescInitEntry(tupdesc, (AttrNumber) 2, "columngroup_no", INT4OID, -1, 0); TupleDescInitEntry(tupdesc, (AttrNumber) 3, "first_row_no", INT8OID, -1, 0); TupleDescInitEntry(tupdesc, (AttrNumber) 4, "minipage", VARBITOID, -1, 0); /* * We don't want any toast columns here. */ tupdesc->attrs[0]->attstorage = 'p'; tupdesc->attrs[1]->attstorage = 'p'; tupdesc->attrs[2]->attstorage = 'p'; /* TODO (dmeister): In the next line, the index should have been 3. * Therefore the minipage might be toasted. */ tupdesc->attrs[2]->attstorage = 'p'; /* * Create index on segno, first_row_no. */ indexInfo = makeNode(IndexInfo); indexInfo->ii_NumIndexAttrs = 3; indexInfo->ii_KeyAttrNumbers[0] = 1; indexInfo->ii_KeyAttrNumbers[1] = 2; indexInfo->ii_KeyAttrNumbers[2] = 3; indexInfo->ii_Expressions = NIL; indexInfo->ii_ExpressionsState = NIL; indexInfo->ii_Predicate = NIL; indexInfo->ii_PredicateState = NIL; indexInfo->ii_Unique = true; indexInfo->ii_Concurrent = false; classObjectId[0] = INT4_BTREE_OPS_OID; classObjectId[1] = INT4_BTREE_OPS_OID; classObjectId[2] = INT8_BTREE_OPS_OID; coloptions[0] = 0; coloptions[1] = 0; coloptions[2] = 0; (void) CreateAOAuxiliaryTable(rel, "pg_aoblkdir", RELKIND_AOBLOCKDIR, tupdesc, indexInfo, classObjectId, coloptions); heap_close(rel, NoLock); }
/* * build_function_result_tupdesc_d * * Build a RECORD function's tupledesc from the pg_proc proallargtypes, * proargmodes, and proargnames arrays. This is split out for the * convenience of ProcedureCreate, which needs to be able to compute the * tupledesc before actually creating the function. * * Returns NULL if there are not at least two OUT or INOUT arguments. */ TupleDesc build_function_result_tupdesc_d(Datum proallargtypes, Datum proargmodes, Datum proargnames) { TupleDesc desc; ArrayType *arr; int numargs; Oid *argtypes; char *argmodes; Datum *argnames = NULL; Oid *outargtypes; char **outargnames; int numoutargs; int nargnames; int i; /* Can't have output args if columns are null */ if (proallargtypes == PointerGetDatum(NULL) || proargmodes == PointerGetDatum(NULL)) return NULL; /* * We expect the arrays to be 1-D arrays of the right types; verify that. * For the OID and char arrays, we don't need to use deconstruct_array() * since the array data is just going to look like a C array of values. */ arr = DatumGetArrayTypeP(proallargtypes); /* ensure not toasted */ numargs = ARR_DIMS(arr)[0]; if (ARR_NDIM(arr) != 1 || numargs < 0 || ARR_HASNULL(arr) || ARR_ELEMTYPE(arr) != OIDOID) elog(ERROR, "proallargtypes is not a 1-D Oid array"); argtypes = (Oid *) ARR_DATA_PTR(arr); arr = DatumGetArrayTypeP(proargmodes); /* ensure not toasted */ if (ARR_NDIM(arr) != 1 || ARR_DIMS(arr)[0] != numargs || ARR_HASNULL(arr) || ARR_ELEMTYPE(arr) != CHAROID) elog(ERROR, "proargmodes is not a 1-D char array"); argmodes = (char *) ARR_DATA_PTR(arr); if (proargnames != PointerGetDatum(NULL)) { arr = DatumGetArrayTypeP(proargnames); /* ensure not toasted */ if (ARR_NDIM(arr) != 1 || ARR_DIMS(arr)[0] != numargs || ARR_HASNULL(arr) || ARR_ELEMTYPE(arr) != TEXTOID) elog(ERROR, "proargnames is not a 1-D text array"); deconstruct_array(arr, TEXTOID, -1, false, 'i', &argnames, NULL, &nargnames); Assert(nargnames == numargs); } /* zero elements probably shouldn't happen, but handle it gracefully */ if (numargs <= 0) return NULL; /* extract output-argument types and names */ outargtypes = (Oid *) palloc(numargs * sizeof(Oid)); outargnames = (char **) palloc(numargs * sizeof(char *)); numoutargs = 0; for (i = 0; i < numargs; i++) { char *pname; if (argmodes[i] == PROARGMODE_IN || argmodes[i] == PROARGMODE_VARIADIC) continue; Assert(argmodes[i] == PROARGMODE_OUT || argmodes[i] == PROARGMODE_INOUT || argmodes[i] == PROARGMODE_TABLE); outargtypes[numoutargs] = argtypes[i]; if (argnames) pname = TextDatumGetCString(argnames[i]); else pname = NULL; if (pname == NULL || pname[0] == '\0') { /* Parameter is not named, so gin up a column name */ pname = (char *) palloc(32); snprintf(pname, 32, "column%d", numoutargs + 1); } outargnames[numoutargs] = pname; numoutargs++; } /* * If there is no output argument, or only one, the function does not * return tuples. */ if (numoutargs < 2) return NULL; desc = CreateTemplateTupleDesc(numoutargs, false); for (i = 0; i < numoutargs; i++) { TupleDescInitEntry(desc, i + 1, outargnames[i], outargtypes[i], -1, 0); } return desc; }
Datum pg_logdir_ls(PG_FUNCTION_ARGS) { FuncCallContext *funcctx; struct dirent *de; directory_fctx *fctx; bool prefix_is_gpdb = true; if (!superuser()) ereport(ERROR, (errcode(ERRCODE_INSUFFICIENT_PRIVILEGE), (errmsg("only superuser can list the log directory")))); if (strcmp(Log_filename, "gpdb-%Y-%m-%d_%H%M%S.csv") != 0 && strcmp(Log_filename, "gpdb-%Y-%m-%d_%H%M%S.log") != 0 && strcmp(Log_filename, "postgresql-%Y-%m-%d_%H%M%S.log") != 0 ) ereport(ERROR, (errcode(ERRCODE_INVALID_PARAMETER_VALUE), (errmsg("the log_filename parameter must equal 'gpdb-%%Y-%%m-%%d_%%H%%M%%S.csv'")))); if (strncmp(Log_filename, "gpdb", 4) != 0) prefix_is_gpdb = false; if (SRF_IS_FIRSTCALL()) { MemoryContext oldcontext; TupleDesc tupdesc; funcctx = SRF_FIRSTCALL_INIT(); oldcontext = MemoryContextSwitchTo(funcctx->multi_call_memory_ctx); fctx = palloc(sizeof(directory_fctx)); tupdesc = CreateTemplateTupleDesc(2, false); TupleDescInitEntry(tupdesc, (AttrNumber) 1, "starttime", TIMESTAMPOID, -1, 0); TupleDescInitEntry(tupdesc, (AttrNumber) 2, "filename", TEXTOID, -1, 0); funcctx->attinmeta = TupleDescGetAttInMetadata(tupdesc); fctx->location = pstrdup(Log_directory); fctx->dirdesc = AllocateDir(fctx->location); if (!fctx->dirdesc) ereport(ERROR, (errcode_for_file_access(), errmsg("could not read directory \"%s\": %m", fctx->location))); funcctx->user_fctx = fctx; MemoryContextSwitchTo(oldcontext); } funcctx = SRF_PERCALL_SETUP(); fctx = (directory_fctx *) funcctx->user_fctx; while ((de = ReadDir(fctx->dirdesc, fctx->location)) != NULL) { char *values[2]; HeapTuple tuple; char timestampbuf[32]; char *field[MAXDATEFIELDS]; char lowstr[MAXDATELEN + 1]; int dtype; int nf, ftype[MAXDATEFIELDS]; fsec_t fsec; int tz = 0; struct pg_tm date; if (prefix_is_gpdb) { int end = 17; /* * Default format: gpdb-YYYY-MM-DD_HHMMSS.log or gpdb-YYYY-MM-DD_HHMMSS.csv */ if (strlen(de->d_name) != 26 || strncmp(de->d_name, "gpdb-", 5) != 0 || de->d_name[15] != '_' || (strcmp(de->d_name + 22, ".log") != 0 && strcmp(de->d_name + 22, ".csv") != 0)) { /* * Not our normal format. Maybe old format without TIME fields? */ if (strlen(de->d_name) != 26 || strncmp(de->d_name, "gpdb-", 5) != 0 || de->d_name[15] != '_' || (strcmp(de->d_name + 22, ".log") != 0 && strcmp(de->d_name + 22, ".csv") != 0)) continue; end = 10; } /* extract timestamp portion of filename */ strcpy(timestampbuf, de->d_name + 5); timestampbuf[end] = '\0'; } else { /* * Default format: postgresql-YYYY-MM-DD_HHMMSS.log */ if (strlen(de->d_name) != 32 || strncmp(de->d_name, "postgresql-", 11) != 0 || de->d_name[21] != '_' || strcmp(de->d_name + 28, ".log") != 0) continue; /* extract timestamp portion of filename */ strcpy(timestampbuf, de->d_name + 11); timestampbuf[17] = '\0'; } /* parse and decode expected timestamp to verify it's OK format */ if (ParseDateTime(timestampbuf, lowstr, MAXDATELEN, field, ftype, MAXDATEFIELDS, &nf)) continue; if (DecodeDateTime(field, ftype, nf, &dtype, &date, &fsec, &tz)) continue; /* Seems the timestamp is OK; prepare and return tuple */ values[0] = timestampbuf; values[1] = palloc(strlen(fctx->location) + strlen(de->d_name) + 2); sprintf(values[1], "%s/%s", fctx->location, de->d_name); tuple = BuildTupleFromCStrings(funcctx->attinmeta, values); SRF_RETURN_NEXT(funcctx, HeapTupleGetDatum(tuple)); } FreeDir(fctx->dirdesc); SRF_RETURN_DONE(funcctx); }
/* * Given the result tuple descriptor for a function with OUT parameters, * replace any polymorphic columns (ANYELEMENT etc) with correct data types * deduced from the input arguments. Returns TRUE if able to deduce all types, * FALSE if not. */ static bool resolve_polymorphic_tupdesc(TupleDesc tupdesc, oidvector *declared_args, Node *call_expr) { int natts = tupdesc->natts; int nargs = declared_args->dim1; bool have_anyelement_result = false; bool have_anyarray_result = false; bool have_anynonarray = false; bool have_anyenum = false; Oid anyelement_type = InvalidOid; Oid anyarray_type = InvalidOid; Oid anycollation; int i; /* See if there are any polymorphic outputs; quick out if not */ for (i = 0; i < natts; i++) { switch (tupdesc->attrs[i]->atttypid) { case ANYELEMENTOID: have_anyelement_result = true; break; case ANYARRAYOID: have_anyarray_result = true; break; case ANYNONARRAYOID: have_anyelement_result = true; have_anynonarray = true; break; case ANYENUMOID: have_anyelement_result = true; have_anyenum = true; break; default: break; } } if (!have_anyelement_result && !have_anyarray_result) return true; /* * Otherwise, extract actual datatype(s) from input arguments. (We assume * the parser already validated consistency of the arguments.) */ if (!call_expr) return false; /* no hope */ for (i = 0; i < nargs; i++) { switch (declared_args->values[i]) { case ANYELEMENTOID: case ANYNONARRAYOID: case ANYENUMOID: if (!OidIsValid(anyelement_type)) anyelement_type = get_call_expr_argtype(call_expr, i); break; case ANYARRAYOID: if (!OidIsValid(anyarray_type)) anyarray_type = get_call_expr_argtype(call_expr, i); break; default: break; } } /* If nothing found, parser messed up */ if (!OidIsValid(anyelement_type) && !OidIsValid(anyarray_type)) return false; /* If needed, deduce one polymorphic type from the other */ if (have_anyelement_result && !OidIsValid(anyelement_type)) anyelement_type = resolve_generic_type(ANYELEMENTOID, anyarray_type, ANYARRAYOID); if (have_anyarray_result && !OidIsValid(anyarray_type)) anyarray_type = resolve_generic_type(ANYARRAYOID, anyelement_type, ANYELEMENTOID); /* Enforce ANYNONARRAY if needed */ if (have_anynonarray && type_is_array(anyelement_type)) return false; /* Enforce ANYENUM if needed */ if (have_anyenum && !type_is_enum(anyelement_type)) return false; /* * Identify the collation to use for polymorphic OUT parameters. (It'll * necessarily be the same for both anyelement and anyarray.) */ anycollation = get_typcollation(OidIsValid(anyelement_type) ? anyelement_type : anyarray_type); if (OidIsValid(anycollation)) { /* * The types are collatable, so consider whether to use a nondefault * collation. We do so if we can identify the input collation used * for the function. */ Oid inputcollation = exprInputCollation(call_expr); if (OidIsValid(inputcollation)) anycollation = inputcollation; } /* And finally replace the tuple column types as needed */ for (i = 0; i < natts; i++) { switch (tupdesc->attrs[i]->atttypid) { case ANYELEMENTOID: case ANYNONARRAYOID: case ANYENUMOID: TupleDescInitEntry(tupdesc, i + 1, NameStr(tupdesc->attrs[i]->attname), anyelement_type, -1, 0); TupleDescInitEntryCollation(tupdesc, i + 1, anycollation); break; case ANYARRAYOID: TupleDescInitEntry(tupdesc, i + 1, NameStr(tupdesc->attrs[i]->attname), anyarray_type, -1, 0); TupleDescInitEntryCollation(tupdesc, i + 1, anycollation); break; default: break; } } return true; }
Datum readindex(PG_FUNCTION_ARGS) { FuncCallContext *funcctx; readindexinfo *info; Relation irel = NULL; Relation hrel = NULL; MIRROREDLOCK_BUFMGR_DECLARE; if (SRF_IS_FIRSTCALL()) { Oid irelid = PG_GETARG_OID(0); TupleDesc tupdesc; MemoryContext oldcontext; AttrNumber outattnum; TupleDesc itupdesc; int i; AttrNumber attno; irel = index_open(irelid, AccessShareLock); itupdesc = RelationGetDescr(irel); outattnum = FIXED_COLUMN + itupdesc->natts; funcctx = SRF_FIRSTCALL_INIT(); oldcontext = MemoryContextSwitchTo(funcctx->multi_call_memory_ctx); tupdesc = CreateTemplateTupleDesc(outattnum, false); attno = 1; TupleDescInitEntry(tupdesc, attno++, "ictid", TIDOID, -1, 0); TupleDescInitEntry(tupdesc, attno++, "hctid", TIDOID, -1, 0); TupleDescInitEntry(tupdesc, attno++, "aotid", TEXTOID, -1, 0); TupleDescInitEntry(tupdesc, attno++, "istatus", TEXTOID, -1, 0); TupleDescInitEntry(tupdesc, attno++, "hstatus", TEXTOID, -1, 0); for (i = 0; i < itupdesc->natts; i++) { Form_pg_attribute attr = itupdesc->attrs[i]; TupleDescInitEntry(tupdesc, attno++, NameStr(attr->attname), attr->atttypid, attr->atttypmod, 0); } funcctx->tuple_desc = BlessTupleDesc(tupdesc); info = (readindexinfo *) palloc(sizeof(readindexinfo)); funcctx->user_fctx = (void *) info; info->outattnum = outattnum; info->ireloid = irelid; hrel = relation_open(irel->rd_index->indrelid, AccessShareLock); if (hrel->rd_rel != NULL && (hrel->rd_rel->relstorage == 'a' || hrel->rd_rel->relstorage == 'c')) { relation_close(hrel, AccessShareLock); hrel = NULL; info->hreloid = InvalidOid; } else info->hreloid = irel->rd_index->indrelid; info->num_pages = RelationGetNumberOfBlocks(irel); info->blkno = BTREE_METAPAGE + 1; info->page = NULL; MemoryContextSwitchTo(oldcontext); } funcctx = SRF_PERCALL_SETUP(); info = (readindexinfo *) funcctx->user_fctx; /* * Open the relations (on first call, we did that above already). * We unfortunately have to look up the relcache entry on every call, * because if we store it in the cross-call context, we won't get a * chance to release it if the function isn't run to completion, * e.g. because of a LIMIT clause. We only lock the relation on the * first call, and keep the lock until completion, however. */ if (!irel) irel = index_open(info->ireloid, NoLock); if (!hrel && info->hreloid != InvalidOid) hrel = heap_open(info->hreloid, NoLock); while (info->blkno < info->num_pages) { Datum values[255]; bool nulls[255]; ItemPointerData itid; HeapTuple tuple; Datum result; if (info->page == NULL) { Buffer buf; /* * Make copy of the page, because we cannot hold a buffer pin * across calls (we wouldn't have a chance to release it, if the * function isn't run to completion.) */ info->page = palloc(BLCKSZ); MIRROREDLOCK_BUFMGR_LOCK; buf = ReadBuffer(irel, info->blkno); memcpy(info->page, BufferGetPage(buf), BLCKSZ); ReleaseBuffer(buf); MIRROREDLOCK_BUFMGR_UNLOCK; info->opaque = (BTPageOpaque) PageGetSpecialPointer(info->page); info->minoff = P_FIRSTDATAKEY(info->opaque); info->maxoff = PageGetMaxOffsetNumber(info->page); info->offnum = info->minoff; } if (!P_ISLEAF(info->opaque) || info->offnum > info->maxoff) { pfree(info->page); info->page = NULL; info->blkno++; continue; } MemSet(nulls, false, info->outattnum * sizeof(bool)); ItemPointerSet(&itid, info->blkno, info->offnum); values[0] = ItemPointerGetDatum(&itid); readindextuple(info, irel, hrel, values, nulls); info->offnum = OffsetNumberNext(info->offnum); tuple = heap_form_tuple(funcctx->tuple_desc, values, nulls); result = HeapTupleGetDatum(tuple); if (hrel != NULL) heap_close(hrel, NoLock); index_close(irel, NoLock); SRF_RETURN_NEXT(funcctx, result); } if (hrel != NULL) heap_close(hrel, AccessShareLock); index_close(irel, AccessShareLock); SRF_RETURN_DONE(funcctx); }
/* * create_toast_table --- internal workhorse * * rel is already opened and locked * toastOid and toastIndexOid are normally InvalidOid, but during * bootstrap they can be nonzero to specify hand-assigned OIDs */ static bool create_toast_table(Relation rel, Oid toastOid, Oid toastIndexOid, Datum reloptions, LOCKMODE lockmode, bool check) { Oid relOid = RelationGetRelid(rel); HeapTuple reltup; TupleDesc tupdesc; bool shared_relation; bool mapped_relation; Relation toast_rel; Relation class_rel; Oid toast_relid; Oid toast_typid = InvalidOid; Oid namespaceid; char toast_relname[NAMEDATALEN]; char toast_idxname[NAMEDATALEN]; IndexInfo *indexInfo; Oid collationObjectId[2]; Oid classObjectId[2]; int16 coloptions[2]; ObjectAddress baseobject, toastobject; /* * Toast table is shared if and only if its parent is. * * We cannot allow toasting a shared relation after initdb (because * there's no way to mark it toasted in other databases' pg_class). */ shared_relation = rel->rd_rel->relisshared; if (shared_relation && !IsBootstrapProcessingMode()) ereport(ERROR, (errcode(ERRCODE_OBJECT_NOT_IN_PREREQUISITE_STATE), errmsg("shared tables cannot be toasted after initdb"))); /* It's mapped if and only if its parent is, too */ mapped_relation = RelationIsMapped(rel); /* * Is it already toasted? */ if (rel->rd_rel->reltoastrelid != InvalidOid) return false; /* * Check to see whether the table actually needs a TOAST table. */ if (!IsBinaryUpgrade) { /* Normal mode, normal check */ if (!needs_toast_table(rel)) return false; } else { /* * In binary-upgrade mode, create a TOAST table if and only if * pg_upgrade told us to (ie, a TOAST table OID has been provided). * * This indicates that the old cluster had a TOAST table for the * current table. We must create a TOAST table to receive the old * TOAST file, even if the table seems not to need one. * * Contrariwise, if the old cluster did not have a TOAST table, we * should be able to get along without one even if the new version's * needs_toast_table rules suggest we should have one. There is a lot * of daylight between where we will create a TOAST table and where * one is really necessary to avoid failures, so small cross-version * differences in the when-to-create heuristic shouldn't be a problem. * If we tried to create a TOAST table anyway, we would have the * problem that it might take up an OID that will conflict with some * old-cluster table we haven't seen yet. */ if (!OidIsValid(binary_upgrade_next_toast_pg_class_oid) || !OidIsValid(binary_upgrade_next_toast_pg_type_oid)) return false; } /* * If requested check lockmode is sufficient. This is a cross check in * case of errors or conflicting decisions in earlier code. */ if (check && lockmode != AccessExclusiveLock) elog(ERROR, "AccessExclusiveLock required to add toast table."); /* * Create the toast table and its index */ snprintf(toast_relname, sizeof(toast_relname), "pg_toast_%u", relOid); snprintf(toast_idxname, sizeof(toast_idxname), "pg_toast_%u_index", relOid); /* this is pretty painful... need a tuple descriptor */ tupdesc = CreateTemplateTupleDesc(3); TupleDescInitEntry(tupdesc, (AttrNumber) 1, "chunk_id", OIDOID, -1, 0); TupleDescInitEntry(tupdesc, (AttrNumber) 2, "chunk_seq", INT4OID, -1, 0); TupleDescInitEntry(tupdesc, (AttrNumber) 3, "chunk_data", BYTEAOID, -1, 0); /* * Ensure that the toast table doesn't itself get toasted, or we'll be * toast :-(. This is essential for chunk_data because type bytea is * toastable; hit the other two just to be sure. */ TupleDescAttr(tupdesc, 0)->attstorage = 'p'; TupleDescAttr(tupdesc, 1)->attstorage = 'p'; TupleDescAttr(tupdesc, 2)->attstorage = 'p'; /* * Toast tables for regular relations go in pg_toast; those for temp * relations go into the per-backend temp-toast-table namespace. */ if (isTempOrTempToastNamespace(rel->rd_rel->relnamespace)) namespaceid = GetTempToastNamespace(); else namespaceid = PG_TOAST_NAMESPACE; /* * Use binary-upgrade override for pg_type.oid, if supplied. We might be * in the post-schema-restore phase where we are doing ALTER TABLE to * create TOAST tables that didn't exist in the old cluster. */ if (IsBinaryUpgrade && OidIsValid(binary_upgrade_next_toast_pg_type_oid)) { toast_typid = binary_upgrade_next_toast_pg_type_oid; binary_upgrade_next_toast_pg_type_oid = InvalidOid; } toast_relid = heap_create_with_catalog(toast_relname, namespaceid, rel->rd_rel->reltablespace, toastOid, toast_typid, InvalidOid, rel->rd_rel->relowner, tupdesc, NIL, RELKIND_TOASTVALUE, rel->rd_rel->relpersistence, shared_relation, mapped_relation, ONCOMMIT_NOOP, reloptions, false, true, true, InvalidOid, NULL); Assert(toast_relid != InvalidOid); /* make the toast relation visible, else heap_open will fail */ CommandCounterIncrement(); /* ShareLock is not really needed here, but take it anyway */ toast_rel = heap_open(toast_relid, ShareLock); /* * Create unique index on chunk_id, chunk_seq. * * NOTE: the normal TOAST access routines could actually function with a * single-column index on chunk_id only. However, the slice access * routines use both columns for faster access to an individual chunk. In * addition, we want it to be unique as a check against the possibility of * duplicate TOAST chunk OIDs. The index might also be a little more * efficient this way, since btree isn't all that happy with large numbers * of equal keys. */ indexInfo = makeNode(IndexInfo); indexInfo->ii_NumIndexAttrs = 2; indexInfo->ii_NumIndexKeyAttrs = 2; indexInfo->ii_IndexAttrNumbers[0] = 1; indexInfo->ii_IndexAttrNumbers[1] = 2; indexInfo->ii_Expressions = NIL; indexInfo->ii_ExpressionsState = NIL; indexInfo->ii_Predicate = NIL; indexInfo->ii_PredicateState = NULL; indexInfo->ii_ExclusionOps = NULL; indexInfo->ii_ExclusionProcs = NULL; indexInfo->ii_ExclusionStrats = NULL; indexInfo->ii_Unique = true; indexInfo->ii_ReadyForInserts = true; indexInfo->ii_Concurrent = false; indexInfo->ii_BrokenHotChain = false; indexInfo->ii_ParallelWorkers = 0; indexInfo->ii_Am = BTREE_AM_OID; indexInfo->ii_AmCache = NULL; indexInfo->ii_Context = CurrentMemoryContext; collationObjectId[0] = InvalidOid; collationObjectId[1] = InvalidOid; classObjectId[0] = OID_BTREE_OPS_OID; classObjectId[1] = INT4_BTREE_OPS_OID; coloptions[0] = 0; coloptions[1] = 0; index_create(toast_rel, toast_idxname, toastIndexOid, InvalidOid, InvalidOid, InvalidOid, indexInfo, list_make2("chunk_id", "chunk_seq"), BTREE_AM_OID, rel->rd_rel->reltablespace, collationObjectId, classObjectId, coloptions, (Datum) 0, INDEX_CREATE_IS_PRIMARY, 0, true, true, NULL); heap_close(toast_rel, NoLock); /* * Store the toast table's OID in the parent relation's pg_class row */ class_rel = heap_open(RelationRelationId, RowExclusiveLock); reltup = SearchSysCacheCopy1(RELOID, ObjectIdGetDatum(relOid)); if (!HeapTupleIsValid(reltup)) elog(ERROR, "cache lookup failed for relation %u", relOid); ((Form_pg_class) GETSTRUCT(reltup))->reltoastrelid = toast_relid; if (!IsBootstrapProcessingMode()) { /* normal case, use a transactional update */ CatalogTupleUpdate(class_rel, &reltup->t_self, reltup); } else { /* While bootstrapping, we cannot UPDATE, so overwrite in-place */ heap_inplace_update(class_rel, reltup); } heap_freetuple(reltup); heap_close(class_rel, RowExclusiveLock); /* * Register dependency from the toast table to the master, so that the * toast table will be deleted if the master is. Skip this in bootstrap * mode. */ if (!IsBootstrapProcessingMode()) { baseobject.classId = RelationRelationId; baseobject.objectId = relOid; baseobject.objectSubId = 0; toastobject.classId = RelationRelationId; toastobject.objectId = toast_relid; toastobject.objectSubId = 0; recordDependencyOn(&toastobject, &baseobject, DEPENDENCY_INTERNAL); } /* * Make changes visible */ CommandCounterIncrement(); return true; }
/* Function to return the list of grammar keywords */ Datum pg_get_keywords(PG_FUNCTION_ARGS) { FuncCallContext *funcctx; if (SRF_IS_FIRSTCALL()) { MemoryContext oldcontext; TupleDesc tupdesc; funcctx = SRF_FIRSTCALL_INIT(); oldcontext = MemoryContextSwitchTo(funcctx->multi_call_memory_ctx); tupdesc = CreateTemplateTupleDesc(3, false); TupleDescInitEntry(tupdesc, (AttrNumber) 1, "word", TEXTOID, -1, 0); TupleDescInitEntry(tupdesc, (AttrNumber) 2, "catcode", CHAROID, -1, 0); TupleDescInitEntry(tupdesc, (AttrNumber) 3, "catdesc", TEXTOID, -1, 0); funcctx->attinmeta = TupleDescGetAttInMetadata(tupdesc); MemoryContextSwitchTo(oldcontext); } funcctx = SRF_PERCALL_SETUP(); if (funcctx->call_cntr < NumScanKeywords) { char *values[3]; HeapTuple tuple; /* cast-away-const is ugly but alternatives aren't much better */ values[0] = (char *) ScanKeywords[funcctx->call_cntr].name; switch (ScanKeywords[funcctx->call_cntr].category) { case UNRESERVED_KEYWORD: values[1] = "U"; values[2] = _("unreserved"); break; case COL_NAME_KEYWORD: values[1] = "C"; values[2] = _("unreserved (cannot be function or type name)"); break; case TYPE_FUNC_NAME_KEYWORD: values[1] = "T"; values[2] = _("reserved (can be function or type name)"); break; case RESERVED_KEYWORD: values[1] = "R"; values[2] = _("reserved"); break; default: /* shouldn't be possible */ values[1] = NULL; values[2] = NULL; break; } tuple = BuildTupleFromCStrings(funcctx->attinmeta, values); SRF_RETURN_NEXT(funcctx, HeapTupleGetDatum(tuple)); } SRF_RETURN_DONE(funcctx); }
/* * pgdatabasev - produce a view of pgdatabase to include transient state */ Datum gp_pgdatabase__(PG_FUNCTION_ARGS) { FuncCallContext *funcctx; Working_State *mystatus; if (SRF_IS_FIRSTCALL()) { TupleDesc tupdesc; MemoryContext oldcontext; /* create a function context for cross-call persistence */ funcctx = SRF_FIRSTCALL_INIT(); /* * switch to memory context appropriate for multiple function * calls */ oldcontext = MemoryContextSwitchTo(funcctx->multi_call_memory_ctx); /* build tupdesc for result tuples */ /* this had better match pg_prepared_xacts view in system_views.sql */ tupdesc = CreateTemplateTupleDesc(5, false); TupleDescInitEntry(tupdesc, (AttrNumber) 1, "dbid", INT2OID, -1, 0); TupleDescInitEntry(tupdesc, (AttrNumber) 2, "isprimary", BOOLOID, -1, 0); TupleDescInitEntry(tupdesc, (AttrNumber) 3, "content", INT2OID, -1, 0); TupleDescInitEntry(tupdesc, (AttrNumber) 4, "valid", BOOLOID, -1, 0); TupleDescInitEntry(tupdesc, (AttrNumber) 5, "definedprimary", BOOLOID, -1, 0); funcctx->tuple_desc = BlessTupleDesc(tupdesc); /* * Collect all the locking information that we will format and * send out as a result set. */ mystatus = (Working_State *) palloc(sizeof(Working_State)); funcctx->user_fctx = (void *) mystatus; mystatus->master = GetMasterSegment(); mystatus->standby = GetStandbySegment(); mystatus->segments = GetSegmentList(); mystatus->idx = 0; MemoryContextSwitchTo(oldcontext); } funcctx = SRF_PERCALL_SETUP(); mystatus = (Working_State *) funcctx->user_fctx; while (mystatus->master || mystatus->standby || (mystatus->idx < list_length(mystatus->segments))) { Datum values[6]; bool nulls[6]; HeapTuple tuple; Datum result; Segment *current = NULL; if (mystatus->master) { current = mystatus->master; mystatus->master = NULL; } else if (mystatus->standby) { current = mystatus->standby; mystatus->standby = NULL; } else { current = list_nth(mystatus->segments, mystatus->idx); mystatus->idx++; } /* * Form tuple with appropriate data. */ MemSet(values, 0, sizeof(values)); MemSet(nulls, false, sizeof(nulls)); //values[0] = UInt16GetDatum(current->dbid); values[1] = current->standby ? false : true;; values[2] = UInt16GetDatum(current->segindex); values[3] = BoolGetDatum(true); values[4] = values[1]; tuple = heap_form_tuple(funcctx->tuple_desc, values, nulls); result = HeapTupleGetDatum(tuple); SRF_RETURN_NEXT(funcctx, result); } SRF_RETURN_DONE(funcctx); }
/* * BuildDescForRelation * * Given a relation schema (list of ColumnDef nodes), build a TupleDesc. * * Note: the default assumption is no OIDs; caller may modify the returned * TupleDesc if it wants OIDs. Also, tdtypeid will need to be filled in * later on. */ TupleDesc BuildDescForRelation(List *schema) { int natts; AttrNumber attnum; ListCell *l; TupleDesc desc; AttrDefault *attrdef = NULL; TupleConstr *constr = (TupleConstr *) palloc0(sizeof(TupleConstr)); char *attname; int32 atttypmod; int attdim; int ndef = 0; /* * allocate a new tuple descriptor */ natts = list_length(schema); desc = CreateTemplateTupleDesc(natts, false); constr->has_not_null = false; attnum = 0; foreach(l, schema) { ColumnDef *entry = lfirst(l); /* * for each entry in the list, get the name and type information from * the list and have TupleDescInitEntry fill in the attribute * information we need. */ attnum++; attname = entry->colname; atttypmod = entry->typname->typmod; attdim = list_length(entry->typname->arrayBounds); if (entry->typname->setof) ereport(ERROR, (errcode(ERRCODE_INVALID_TABLE_DEFINITION), errmsg("column \"%s\" cannot be declared SETOF", attname))); TupleDescInitEntry(desc, attnum, attname, typenameTypeId(NULL, entry->typname), atttypmod, attdim); /* Fill in additional stuff not handled by TupleDescInitEntry */ if (entry->is_not_null) constr->has_not_null = true; desc->attrs[attnum - 1]->attnotnull = entry->is_not_null; /* * Note we copy only pre-cooked default expressions. Digestion of raw * ones is someone else's problem. */ if (entry->cooked_default != NULL) { if (attrdef == NULL) attrdef = (AttrDefault *) palloc(natts * sizeof(AttrDefault)); attrdef[ndef].adnum = attnum; attrdef[ndef].adbin = pstrdup(entry->cooked_default); ndef++; desc->attrs[attnum - 1]->atthasdef = true; } desc->attrs[attnum - 1]->attislocal = entry->is_local; desc->attrs[attnum - 1]->attinhcount = entry->inhcount; }
/* * pg_lock_status - produce a view with one row per held or awaited lock mode */ Datum pg_lock_status(PG_FUNCTION_ARGS) { FuncCallContext *funcctx; PG_Lock_Status *mystatus; LockData *lockData; if (SRF_IS_FIRSTCALL()) { TupleDesc tupdesc; MemoryContext oldcontext; /* create a function context for cross-call persistence */ funcctx = SRF_FIRSTCALL_INIT(); /* * switch to memory context appropriate for multiple function calls */ oldcontext = MemoryContextSwitchTo(funcctx->multi_call_memory_ctx); /* build tupdesc for result tuples */ /* this had better match pg_locks view in system_views.sql */ tupdesc = CreateTemplateTupleDesc(16, false); TupleDescInitEntry(tupdesc, (AttrNumber) 1, "locktype", TEXTOID, -1, 0); TupleDescInitEntry(tupdesc, (AttrNumber) 2, "database", OIDOID, -1, 0); TupleDescInitEntry(tupdesc, (AttrNumber) 3, "relation", OIDOID, -1, 0); TupleDescInitEntry(tupdesc, (AttrNumber) 4, "page", INT4OID, -1, 0); TupleDescInitEntry(tupdesc, (AttrNumber) 5, "tuple", INT2OID, -1, 0); TupleDescInitEntry(tupdesc, (AttrNumber) 6, "transactionid", XIDOID, -1, 0); TupleDescInitEntry(tupdesc, (AttrNumber) 7, "classid", OIDOID, -1, 0); TupleDescInitEntry(tupdesc, (AttrNumber) 8, "objid", OIDOID, -1, 0); TupleDescInitEntry(tupdesc, (AttrNumber) 9, "objsubid", INT2OID, -1, 0); TupleDescInitEntry(tupdesc, (AttrNumber) 10, "transaction", XIDOID, -1, 0); TupleDescInitEntry(tupdesc, (AttrNumber) 11, "pid", INT4OID, -1, 0); TupleDescInitEntry(tupdesc, (AttrNumber) 12, "mode", TEXTOID, -1, 0); TupleDescInitEntry(tupdesc, (AttrNumber) 13, "granted", BOOLOID, -1, 0); /* * These next columns are specific to GPDB */ TupleDescInitEntry(tupdesc, (AttrNumber) 14, "mppSessionId", INT4OID, -1, 0); TupleDescInitEntry(tupdesc, (AttrNumber) 15, "mppIsWriter", BOOLOID, -1, 0); TupleDescInitEntry(tupdesc, (AttrNumber) 16, "gp_segment_id", INT4OID, -1, 0); funcctx->tuple_desc = BlessTupleDesc(tupdesc); /* * Collect all the locking information that we will format and send * out as a result set. */ mystatus = (PG_Lock_Status *) palloc(sizeof(PG_Lock_Status)); funcctx->user_fctx = (void *) mystatus; mystatus->lockData = GetLockStatusData(); mystatus->currIdx = 0; mystatus->numSegLocks = 0; mystatus->numsegresults = 0; mystatus->segresults = NULL; } funcctx = SRF_PERCALL_SETUP(); mystatus = (PG_Lock_Status *) funcctx->user_fctx; lockData = mystatus->lockData; /* * This loop returns all the local lock data from the segment we are running on. */ while (mystatus->currIdx < lockData->nelements) { PROCLOCK *proclock; LOCK *lock; PGPROC *proc; bool granted; LOCKMODE mode = 0; const char *locktypename; char tnbuf[32]; Datum values[16]; bool nulls[16]; HeapTuple tuple; Datum result; proclock = &(lockData->proclocks[mystatus->currIdx]); lock = &(lockData->locks[mystatus->currIdx]); proc = &(lockData->procs[mystatus->currIdx]); /* * Look to see if there are any held lock modes in this PROCLOCK. If * so, report, and destructively modify lockData so we don't report * again. */ granted = false; if (proclock->holdMask) { for (mode = 0; mode < MAX_LOCKMODES; mode++) { if (proclock->holdMask & LOCKBIT_ON(mode)) { granted = true; proclock->holdMask &= LOCKBIT_OFF(mode); break; } } } /* * If no (more) held modes to report, see if PROC is waiting for a * lock on this lock. */ if (!granted) { if (proc->waitLock == proclock->tag.myLock) { /* Yes, so report it with proper mode */ mode = proc->waitLockMode; /* * We are now done with this PROCLOCK, so advance pointer to * continue with next one on next call. */ mystatus->currIdx++; } else { /* * Okay, we've displayed all the locks associated with this * PROCLOCK, proceed to the next one. */ mystatus->currIdx++; continue; } } /* * Form tuple with appropriate data. */ MemSet(values, 0, sizeof(values)); MemSet(nulls, false, sizeof(nulls)); if (lock->tag.locktag_type <= LOCKTAG_ADVISORY) locktypename = LockTagTypeNames[lock->tag.locktag_type]; else { snprintf(tnbuf, sizeof(tnbuf), "unknown %d", (int) lock->tag.locktag_type); locktypename = tnbuf; } values[0] = CStringGetTextDatum(locktypename); switch (lock->tag.locktag_type) { case LOCKTAG_RELATION: case LOCKTAG_RELATION_EXTEND: case LOCKTAG_RELATION_RESYNCHRONIZE: values[1] = ObjectIdGetDatum(lock->tag.locktag_field1); values[2] = ObjectIdGetDatum(lock->tag.locktag_field2); nulls[3] = true; nulls[4] = true; nulls[5] = true; nulls[6] = true; nulls[7] = true; nulls[8] = true; break; case LOCKTAG_PAGE: values[1] = ObjectIdGetDatum(lock->tag.locktag_field1); values[2] = ObjectIdGetDatum(lock->tag.locktag_field2); values[3] = UInt32GetDatum(lock->tag.locktag_field3); nulls[4] = true; nulls[5] = true; nulls[6] = true; nulls[7] = true; nulls[8] = true; break; case LOCKTAG_TUPLE: values[1] = ObjectIdGetDatum(lock->tag.locktag_field1); values[2] = ObjectIdGetDatum(lock->tag.locktag_field2); values[3] = UInt32GetDatum(lock->tag.locktag_field3); values[4] = UInt16GetDatum(lock->tag.locktag_field4); nulls[5] = true; nulls[6] = true; nulls[7] = true; nulls[8] = true; break; case LOCKTAG_TRANSACTION: values[5] = TransactionIdGetDatum(lock->tag.locktag_field1); nulls[1] = true; nulls[2] = true; nulls[3] = true; nulls[4] = true; nulls[6] = true; nulls[7] = true; nulls[8] = true; break; case LOCKTAG_RELATION_APPENDONLY_SEGMENT_FILE: values[1] = ObjectIdGetDatum(lock->tag.locktag_field1); values[2] = ObjectIdGetDatum(lock->tag.locktag_field2); values[7] = ObjectIdGetDatum(lock->tag.locktag_field3); nulls[3] = true; nulls[4] = true; nulls[5] = true; nulls[6] = true; nulls[8] = true; break; case LOCKTAG_RESOURCE_QUEUE: values[1] = ObjectIdGetDatum(proc->databaseId); values[7] = ObjectIdGetDatum(lock->tag.locktag_field1); nulls[2] = true; nulls[3] = true; nulls[4] = true; nulls[5] = true; nulls[6] = true; nulls[8] = true; break; case LOCKTAG_OBJECT: case LOCKTAG_USERLOCK: case LOCKTAG_ADVISORY: default: /* treat unknown locktags like OBJECT */ values[1] = ObjectIdGetDatum(lock->tag.locktag_field1); values[6] = ObjectIdGetDatum(lock->tag.locktag_field2); values[7] = ObjectIdGetDatum(lock->tag.locktag_field3); values[8] = Int16GetDatum(lock->tag.locktag_field4); nulls[2] = true; nulls[3] = true; nulls[4] = true; nulls[5] = true; break; } values[9] = TransactionIdGetDatum(proc->xid); if (proc->pid != 0) values[10] = Int32GetDatum(proc->pid); else nulls[10] = true; values[11] = DirectFunctionCall1(textin, CStringGetDatum((char *) GetLockmodeName(LOCK_LOCKMETHOD(*lock), mode))); values[12] = BoolGetDatum(granted); values[13] = Int32GetDatum(proc->mppSessionId); values[14] = Int32GetDatum(proc->mppIsWriter); values[15] = Int32GetDatum(Gp_segment); tuple = heap_form_tuple(funcctx->tuple_desc, values, nulls); result = HeapTupleGetDatum(tuple); SRF_RETURN_NEXT(funcctx, result); } /* * This loop only executes on the masterDB and only in dispatch mode, because that * is the only time we dispatched to the segDBs. */ while (mystatus->currIdx >= lockData->nelements && mystatus->currIdx < lockData->nelements + mystatus->numSegLocks) { HeapTuple tuple; Datum result; Datum values[16]; bool nulls[16]; int i; int whichresultset = 0; int whichelement = mystatus->currIdx - lockData->nelements; int whichrow = whichelement; Assert(Gp_role == GP_ROLE_DISPATCH); /* * Because we have one result set per segDB (rather than one big result set with everything), * we need to figure out which result set we are on, and which row within that result set * we are returning. * * So, we walk through all the result sets and all the rows in each one, in order. */ while(whichrow >= PQntuples(mystatus->segresults[whichresultset])) { whichrow -= PQntuples(mystatus->segresults[whichresultset]); whichresultset++; if (whichresultset >= mystatus->numsegresults) break; } /* * If this condition is true, we have already sent everything back, * and we just want to do the SRF_RETURN_DONE */ if (whichresultset >= mystatus->numsegresults) break; mystatus->currIdx++; /* * Form tuple with appropriate data we got from the segDBs */ MemSet(values, 0, sizeof(values)); MemSet(nulls, false, sizeof(nulls)); /* * For each column, extract out the value (which comes out in text). * Convert it to the appropriate datatype to match our tupledesc, * and put that in values. * The columns look like this (from select statement earlier): * * " (locktype text, database oid, relation oid, page int4, tuple int2," * " transactionid xid, classid oid, objid oid, objsubid int2," * " transaction xid, pid int4, mode text, granted boolean, " * " mppSessionId int4, mppIsWriter boolean, gp_segment_id int4) ," */ values[0] = CStringGetTextDatum(PQgetvalue(mystatus->segresults[whichresultset], whichrow, 0)); values[1] = ObjectIdGetDatum(atoi(PQgetvalue(mystatus->segresults[whichresultset], whichrow, 1))); values[2] = ObjectIdGetDatum(atoi(PQgetvalue(mystatus->segresults[whichresultset], whichrow, 2))); values[3] = UInt32GetDatum(atoi(PQgetvalue(mystatus->segresults[whichresultset], whichrow, 3))); values[4] = UInt16GetDatum(atoi(PQgetvalue(mystatus->segresults[whichresultset], whichrow, 4))); values[5] = TransactionIdGetDatum(atoi(PQgetvalue(mystatus->segresults[whichresultset], whichrow, 5))); values[6] = ObjectIdGetDatum(atoi(PQgetvalue(mystatus->segresults[whichresultset], whichrow, 6))); values[7] = ObjectIdGetDatum(atoi(PQgetvalue(mystatus->segresults[whichresultset], whichrow, 7))); values[8] = UInt16GetDatum(atoi(PQgetvalue(mystatus->segresults[whichresultset], whichrow, 8))); values[9] = TransactionIdGetDatum(atoi(PQgetvalue(mystatus->segresults[whichresultset], whichrow, 9))); values[10] = UInt32GetDatum(atoi(PQgetvalue(mystatus->segresults[whichresultset], whichrow,10))); values[11] = CStringGetTextDatum(PQgetvalue(mystatus->segresults[whichresultset], whichrow,11)); values[12] = BoolGetDatum(strncmp(PQgetvalue(mystatus->segresults[whichresultset], whichrow,12),"t",1)==0); values[13] = Int32GetDatum(atoi(PQgetvalue(mystatus->segresults[whichresultset], whichrow,13))); values[14] = BoolGetDatum(strncmp(PQgetvalue(mystatus->segresults[whichresultset], whichrow,14),"t",1)==0); values[15] = Int32GetDatum(atoi(PQgetvalue(mystatus->segresults[whichresultset], whichrow,15))); /* * Copy the null info over. It should all match properly. */ for (i=0; i<16; i++) { nulls[i] = PQgetisnull(mystatus->segresults[whichresultset], whichrow, i); } tuple = heap_form_tuple(funcctx->tuple_desc, values, nulls); result = HeapTupleGetDatum(tuple); SRF_RETURN_NEXT(funcctx, result); } /* * if we dispatched to the segDBs, free up the memory holding the result sets. * Otherwise we might leak this memory each time we got called (does it automatically * get freed by the pool being deleted? Probably, but this is safer). */ if (mystatus->segresults != NULL) { int i; for (i = 0; i < mystatus->numsegresults; i++) PQclear(mystatus->segresults[i]); free(mystatus->segresults); } SRF_RETURN_DONE(funcctx); }
/* * master_get_local_first_candidate_nodes returns a set of candidate host names * and port numbers on which to place new shards. The function makes sure to * always allocate the first candidate node as the node the caller is connecting * from; and allocates additional nodes until the shard replication factor is * met. The function errors if the caller's remote node name is not found in the * membership list, or if the number of available nodes falls short of the * replication factor. */ Datum master_get_local_first_candidate_nodes(PG_FUNCTION_ARGS) { FuncCallContext *functionContext = NULL; uint32 desiredNodeCount = 0; uint32 currentNodeCount = 0; if (SRF_IS_FIRSTCALL()) { MemoryContext oldContext = NULL; TupleDesc tupleDescriptor = NULL; uint32 liveNodeCount = 0; bool hasOid = false; /* create a function context for cross-call persistence */ functionContext = SRF_FIRSTCALL_INIT(); /* switch to memory context appropriate for multiple function calls */ oldContext = MemoryContextSwitchTo(functionContext->multi_call_memory_ctx); functionContext->user_fctx = NIL; functionContext->max_calls = ShardReplicationFactor; /* if enough live nodes, return an extra candidate node as backup */ liveNodeCount = WorkerGetLiveNodeCount(); if (liveNodeCount > ShardReplicationFactor) { functionContext->max_calls = ShardReplicationFactor + 1; } /* * This tuple descriptor must match the output parameters declared for * the function in pg_proc. */ tupleDescriptor = CreateTemplateTupleDesc(CANDIDATE_NODE_FIELDS, hasOid); TupleDescInitEntry(tupleDescriptor, (AttrNumber) 1, "node_name", TEXTOID, -1, 0); TupleDescInitEntry(tupleDescriptor, (AttrNumber) 2, "node_port", INT8OID, -1, 0); functionContext->tuple_desc = BlessTupleDesc(tupleDescriptor); MemoryContextSwitchTo(oldContext); } functionContext = SRF_PERCALL_SETUP(); desiredNodeCount = functionContext->max_calls; currentNodeCount = functionContext->call_cntr; if (currentNodeCount < desiredNodeCount) { MemoryContext oldContext = NULL; List *currentNodeList = NIL; WorkerNode *candidateNode = NULL; Datum candidateDatum = 0; /* switch to memory context appropriate for multiple function calls */ oldContext = MemoryContextSwitchTo(functionContext->multi_call_memory_ctx); currentNodeList = functionContext->user_fctx; candidateNode = WorkerGetLocalFirstCandidateNode(currentNodeList); if (candidateNode == NULL) { ereport(ERROR, (errmsg("could only find %u of %u required nodes", currentNodeCount, desiredNodeCount))); } currentNodeList = lappend(currentNodeList, candidateNode); functionContext->user_fctx = currentNodeList; MemoryContextSwitchTo(oldContext); candidateDatum = WorkerNodeGetDatum(candidateNode, functionContext->tuple_desc); SRF_RETURN_NEXT(functionContext, candidateDatum); } else { SRF_RETURN_DONE(functionContext); } }
/* * pg_prepared_xact * Produce a view with one row per prepared transaction. * * This function is here so we don't have to export the * GlobalTransactionData struct definition. */ Datum pg_prepared_xact(PG_FUNCTION_ARGS) { FuncCallContext *funcctx; Working_State *status; if (SRF_IS_FIRSTCALL()) { TupleDesc tupdesc; MemoryContext oldcontext; /* create a function context for cross-call persistence */ funcctx = SRF_FIRSTCALL_INIT(); /* * Switch to memory context appropriate for multiple function calls */ oldcontext = MemoryContextSwitchTo(funcctx->multi_call_memory_ctx); /* build tupdesc for result tuples */ /* this had better match pg_prepared_xacts view in system_views.sql */ tupdesc = CreateTemplateTupleDesc(5, false); TupleDescInitEntry(tupdesc, (AttrNumber) 1, "transaction", XIDOID, -1, 0); TupleDescInitEntry(tupdesc, (AttrNumber) 2, "gid", TEXTOID, -1, 0); TupleDescInitEntry(tupdesc, (AttrNumber) 3, "prepared", TIMESTAMPTZOID, -1, 0); TupleDescInitEntry(tupdesc, (AttrNumber) 4, "ownerid", OIDOID, -1, 0); TupleDescInitEntry(tupdesc, (AttrNumber) 5, "dbid", OIDOID, -1, 0); funcctx->tuple_desc = BlessTupleDesc(tupdesc); /* * Collect all the 2PC status information that we will format and send * out as a result set. */ status = (Working_State *) palloc(sizeof(Working_State)); funcctx->user_fctx = (void *) status; status->ngxacts = GetPreparedTransactionList(&status->array); status->currIdx = 0; MemoryContextSwitchTo(oldcontext); } funcctx = SRF_PERCALL_SETUP(); status = (Working_State *) funcctx->user_fctx; while (status->array != NULL && status->currIdx < status->ngxacts) { GlobalTransaction gxact = &status->array[status->currIdx++]; Datum values[5]; bool nulls[5]; HeapTuple tuple; Datum result; if (!gxact->valid) continue; /* * Form tuple with appropriate data. */ MemSet(values, 0, sizeof(values)); MemSet(nulls, 0, sizeof(nulls)); values[0] = TransactionIdGetDatum(gxact->proc.xid); values[1] = CStringGetTextDatum(gxact->gid); values[2] = TimestampTzGetDatum(gxact->prepared_at); values[3] = ObjectIdGetDatum(gxact->owner); values[4] = ObjectIdGetDatum(gxact->proc.databaseId); tuple = heap_form_tuple(funcctx->tuple_desc, values, nulls); result = HeapTupleGetDatum(tuple); SRF_RETURN_NEXT(funcctx, result); } SRF_RETURN_DONE(funcctx); }
/* * master_get_active_worker_nodes returns a set of active worker host names and * port numbers in deterministic order. Currently we assume that all worker * nodes in pg_worker_list.conf are active. */ Datum master_get_active_worker_nodes(PG_FUNCTION_ARGS) { FuncCallContext *functionContext = NULL; uint32 workerNodeIndex = 0; uint32 workerNodeCount = 0; if (SRF_IS_FIRSTCALL()) { MemoryContext oldContext = NULL; List *workerNodeList = NIL; uint32 workerNodeCount = 0; TupleDesc tupleDescriptor = NULL; bool hasOid = false; /* create a function context for cross-call persistence */ functionContext = SRF_FIRSTCALL_INIT(); /* switch to memory context appropriate for multiple function calls */ oldContext = MemoryContextSwitchTo(functionContext->multi_call_memory_ctx); workerNodeList = WorkerNodeList(); workerNodeCount = (uint32) list_length(workerNodeList); functionContext->user_fctx = workerNodeList; functionContext->max_calls = workerNodeCount; /* * This tuple descriptor must match the output parameters declared for * the function in pg_proc. */ tupleDescriptor = CreateTemplateTupleDesc(WORKER_NODE_FIELDS, hasOid); TupleDescInitEntry(tupleDescriptor, (AttrNumber) 1, "node_name", TEXTOID, -1, 0); TupleDescInitEntry(tupleDescriptor, (AttrNumber) 2, "node_port", INT8OID, -1, 0); functionContext->tuple_desc = BlessTupleDesc(tupleDescriptor); MemoryContextSwitchTo(oldContext); } functionContext = SRF_PERCALL_SETUP(); workerNodeIndex = functionContext->call_cntr; workerNodeCount = functionContext->max_calls; if (workerNodeIndex < workerNodeCount) { List *workerNodeList = functionContext->user_fctx; WorkerNode *workerNode = list_nth(workerNodeList, workerNodeIndex); Datum workerNodeDatum = WorkerNodeGetDatum(workerNode, functionContext->tuple_desc); SRF_RETURN_NEXT(functionContext, workerNodeDatum); } else { SRF_RETURN_DONE(functionContext); } }
/* * pg_lock_status - produce a view with one row per held or awaited lock mode */ Datum pg_lock_status(PG_FUNCTION_ARGS) { FuncCallContext *funcctx; PG_Lock_Status *mystatus; LockData *lockData; PredicateLockData *predLockData; if (SRF_IS_FIRSTCALL()) { TupleDesc tupdesc; MemoryContext oldcontext; /* create a function context for cross-call persistence */ funcctx = SRF_FIRSTCALL_INIT(); /* * switch to memory context appropriate for multiple function calls */ oldcontext = MemoryContextSwitchTo(funcctx->multi_call_memory_ctx); /* build tupdesc for result tuples */ /* this had better match pg_locks view in system_views.sql */ tupdesc = CreateTemplateTupleDesc(NUM_LOCK_STATUS_COLUMNS, false); TupleDescInitEntry(tupdesc, (AttrNumber) 1, "locktype", TEXTOID, -1, 0); TupleDescInitEntry(tupdesc, (AttrNumber) 2, "database", OIDOID, -1, 0); TupleDescInitEntry(tupdesc, (AttrNumber) 3, "relation", OIDOID, -1, 0); TupleDescInitEntry(tupdesc, (AttrNumber) 4, "page", INT4OID, -1, 0); TupleDescInitEntry(tupdesc, (AttrNumber) 5, "tuple", INT2OID, -1, 0); TupleDescInitEntry(tupdesc, (AttrNumber) 6, "virtualxid", TEXTOID, -1, 0); TupleDescInitEntry(tupdesc, (AttrNumber) 7, "transactionid", XIDOID, -1, 0); TupleDescInitEntry(tupdesc, (AttrNumber) 8, "classid", OIDOID, -1, 0); TupleDescInitEntry(tupdesc, (AttrNumber) 9, "objid", OIDOID, -1, 0); TupleDescInitEntry(tupdesc, (AttrNumber) 10, "objsubid", INT2OID, -1, 0); TupleDescInitEntry(tupdesc, (AttrNumber) 11, "virtualtransaction", TEXTOID, -1, 0); TupleDescInitEntry(tupdesc, (AttrNumber) 12, "pid", INT4OID, -1, 0); TupleDescInitEntry(tupdesc, (AttrNumber) 13, "mode", TEXTOID, -1, 0); TupleDescInitEntry(tupdesc, (AttrNumber) 14, "granted", BOOLOID, -1, 0); TupleDescInitEntry(tupdesc, (AttrNumber) 15, "fastpath", BOOLOID, -1, 0); funcctx->tuple_desc = BlessTupleDesc(tupdesc); /* * Collect all the locking information that we will format and send * out as a result set. */ mystatus = (PG_Lock_Status *) palloc(sizeof(PG_Lock_Status)); funcctx->user_fctx = (void *) mystatus; mystatus->lockData = GetLockStatusData(); mystatus->currIdx = 0; mystatus->predLockData = GetPredicateLockStatusData(); mystatus->predLockIdx = 0; MemoryContextSwitchTo(oldcontext); } funcctx = SRF_PERCALL_SETUP(); mystatus = (PG_Lock_Status *) funcctx->user_fctx; lockData = mystatus->lockData; while (mystatus->currIdx < lockData->nelements) { bool granted; LOCKMODE mode = 0; const char *locktypename; char tnbuf[32]; Datum values[NUM_LOCK_STATUS_COLUMNS]; bool nulls[NUM_LOCK_STATUS_COLUMNS]; HeapTuple tuple; Datum result; LockInstanceData *instance; instance = &(lockData->locks[mystatus->currIdx]); /* * Look to see if there are any held lock modes in this PROCLOCK. If * so, report, and destructively modify lockData so we don't report * again. */ granted = false; if (instance->holdMask) { for (mode = 0; mode < MAX_LOCKMODES; mode++) { if (instance->holdMask & LOCKBIT_ON(mode)) { granted = true; instance->holdMask &= LOCKBIT_OFF(mode); break; } } } /* * If no (more) held modes to report, see if PROC is waiting for a * lock on this lock. */ if (!granted) { if (instance->waitLockMode != NoLock) { /* Yes, so report it with proper mode */ mode = instance->waitLockMode; /* * We are now done with this PROCLOCK, so advance pointer to * continue with next one on next call. */ mystatus->currIdx++; } else { /* * Okay, we've displayed all the locks associated with this * PROCLOCK, proceed to the next one. */ mystatus->currIdx++; continue; } } /* * Form tuple with appropriate data. */ MemSet(values, 0, sizeof(values)); MemSet(nulls, false, sizeof(nulls)); if (instance->locktag.locktag_type <= LOCKTAG_LAST_TYPE) locktypename = LockTagTypeNames[instance->locktag.locktag_type]; else { snprintf(tnbuf, sizeof(tnbuf), "unknown %d", (int) instance->locktag.locktag_type); locktypename = tnbuf; } values[0] = CStringGetTextDatum(locktypename); switch ((LockTagType) instance->locktag.locktag_type) { case LOCKTAG_RELATION: case LOCKTAG_RELATION_EXTEND: values[1] = ObjectIdGetDatum(instance->locktag.locktag_field1); values[2] = ObjectIdGetDatum(instance->locktag.locktag_field2); nulls[3] = true; nulls[4] = true; nulls[5] = true; nulls[6] = true; nulls[7] = true; nulls[8] = true; nulls[9] = true; break; case LOCKTAG_PAGE: values[1] = ObjectIdGetDatum(instance->locktag.locktag_field1); values[2] = ObjectIdGetDatum(instance->locktag.locktag_field2); values[3] = UInt32GetDatum(instance->locktag.locktag_field3); nulls[4] = true; nulls[5] = true; nulls[6] = true; nulls[7] = true; nulls[8] = true; nulls[9] = true; break; case LOCKTAG_TUPLE: values[1] = ObjectIdGetDatum(instance->locktag.locktag_field1); values[2] = ObjectIdGetDatum(instance->locktag.locktag_field2); values[3] = UInt32GetDatum(instance->locktag.locktag_field3); values[4] = UInt16GetDatum(instance->locktag.locktag_field4); nulls[5] = true; nulls[6] = true; nulls[7] = true; nulls[8] = true; nulls[9] = true; break; case LOCKTAG_TRANSACTION: values[6] = TransactionIdGetDatum(instance->locktag.locktag_field1); nulls[1] = true; nulls[2] = true; nulls[3] = true; nulls[4] = true; nulls[5] = true; nulls[7] = true; nulls[8] = true; nulls[9] = true; break; case LOCKTAG_VIRTUALTRANSACTION: values[5] = VXIDGetDatum(instance->locktag.locktag_field1, instance->locktag.locktag_field2); nulls[1] = true; nulls[2] = true; nulls[3] = true; nulls[4] = true; nulls[6] = true; nulls[7] = true; nulls[8] = true; nulls[9] = true; break; case LOCKTAG_OBJECT: case LOCKTAG_USERLOCK: case LOCKTAG_ADVISORY: default: /* treat unknown locktags like OBJECT */ values[1] = ObjectIdGetDatum(instance->locktag.locktag_field1); values[7] = ObjectIdGetDatum(instance->locktag.locktag_field2); values[8] = ObjectIdGetDatum(instance->locktag.locktag_field3); values[9] = Int16GetDatum(instance->locktag.locktag_field4); nulls[2] = true; nulls[3] = true; nulls[4] = true; nulls[5] = true; nulls[6] = true; break; } values[10] = VXIDGetDatum(instance->backend, instance->lxid); if (instance->pid != 0) values[11] = Int32GetDatum(instance->pid); else nulls[11] = true; values[12] = CStringGetTextDatum(GetLockmodeName(instance->locktag.locktag_lockmethodid, mode)); values[13] = BoolGetDatum(granted); values[14] = BoolGetDatum(instance->fastpath); tuple = heap_form_tuple(funcctx->tuple_desc, values, nulls); result = HeapTupleGetDatum(tuple); SRF_RETURN_NEXT(funcctx, result); } /* * Have returned all regular locks. Now start on the SIREAD predicate * locks. */ predLockData = mystatus->predLockData; if (mystatus->predLockIdx < predLockData->nelements) { PredicateLockTargetType lockType; PREDICATELOCKTARGETTAG *predTag = &(predLockData->locktags[mystatus->predLockIdx]); SERIALIZABLEXACT *xact = &(predLockData->xacts[mystatus->predLockIdx]); Datum values[NUM_LOCK_STATUS_COLUMNS]; bool nulls[NUM_LOCK_STATUS_COLUMNS]; HeapTuple tuple; Datum result; mystatus->predLockIdx++; /* * Form tuple with appropriate data. */ MemSet(values, 0, sizeof(values)); MemSet(nulls, false, sizeof(nulls)); /* lock type */ lockType = GET_PREDICATELOCKTARGETTAG_TYPE(*predTag); values[0] = CStringGetTextDatum(PredicateLockTagTypeNames[lockType]); /* lock target */ values[1] = GET_PREDICATELOCKTARGETTAG_DB(*predTag); values[2] = GET_PREDICATELOCKTARGETTAG_RELATION(*predTag); if (lockType == PREDLOCKTAG_TUPLE) values[4] = GET_PREDICATELOCKTARGETTAG_OFFSET(*predTag); else nulls[4] = true; if ((lockType == PREDLOCKTAG_TUPLE) || (lockType == PREDLOCKTAG_PAGE)) values[3] = GET_PREDICATELOCKTARGETTAG_PAGE(*predTag); else nulls[3] = true; /* these fields are targets for other types of locks */ nulls[5] = true; /* virtualxid */ nulls[6] = true; /* transactionid */ nulls[7] = true; /* classid */ nulls[8] = true; /* objid */ nulls[9] = true; /* objsubid */ /* lock holder */ values[10] = VXIDGetDatum(xact->vxid.backendId, xact->vxid.localTransactionId); if (xact->pid != 0) values[11] = Int32GetDatum(xact->pid); else nulls[11] = true; /* * Lock mode. Currently all predicate locks are SIReadLocks, which are * always held (never waiting) and have no fast path */ values[12] = CStringGetTextDatum("SIReadLock"); values[13] = BoolGetDatum(true); values[14] = BoolGetDatum(false); tuple = heap_form_tuple(funcctx->tuple_desc, values, nulls); result = HeapTupleGetDatum(tuple); SRF_RETURN_NEXT(funcctx, result); } SRF_RETURN_DONE(funcctx); }
/* * ExecMakeTableFunctionResult * * Evaluate a table function, producing a materialized result in a Tuplestore * object. * * This is used by nodeFunctionscan.c. */ Tuplestorestate * ExecMakeTableFunctionResult(SetExprState *setexpr, ExprContext *econtext, MemoryContext argContext, TupleDesc expectedDesc, bool randomAccess) { Tuplestorestate *tupstore = NULL; TupleDesc tupdesc = NULL; Oid funcrettype; bool returnsTuple; bool returnsSet = false; FunctionCallInfoData fcinfo; PgStat_FunctionCallUsage fcusage; ReturnSetInfo rsinfo; HeapTupleData tmptup; MemoryContext callerContext; MemoryContext oldcontext; bool first_time = true; callerContext = CurrentMemoryContext; funcrettype = exprType((Node *) setexpr->expr); returnsTuple = type_is_rowtype(funcrettype); /* * Prepare a resultinfo node for communication. We always do this even if * not expecting a set result, so that we can pass expectedDesc. In the * generic-expression case, the expression doesn't actually get to see the * resultinfo, but set it up anyway because we use some of the fields as * our own state variables. */ rsinfo.type = T_ReturnSetInfo; rsinfo.econtext = econtext; rsinfo.expectedDesc = expectedDesc; rsinfo.allowedModes = (int) (SFRM_ValuePerCall | SFRM_Materialize | SFRM_Materialize_Preferred); if (randomAccess) rsinfo.allowedModes |= (int) SFRM_Materialize_Random; rsinfo.returnMode = SFRM_ValuePerCall; /* isDone is filled below */ rsinfo.setResult = NULL; rsinfo.setDesc = NULL; /* * Normally the passed expression tree will be a SetExprState, since the * grammar only allows a function call at the top level of a table * function reference. However, if the function doesn't return set then * the planner might have replaced the function call via constant-folding * or inlining. So if we see any other kind of expression node, execute * it via the general ExecEvalExpr() code; the only difference is that we * don't get a chance to pass a special ReturnSetInfo to any functions * buried in the expression. */ if (!setexpr->elidedFuncState) { /* * This path is similar to ExecMakeFunctionResultSet. */ returnsSet = setexpr->funcReturnsSet; InitFunctionCallInfoData(fcinfo, &(setexpr->func), list_length(setexpr->args), setexpr->fcinfo_data.fncollation, NULL, (Node *) &rsinfo); /* * Evaluate the function's argument list. * * We can't do this in the per-tuple context: the argument values * would disappear when we reset that context in the inner loop. And * the caller's CurrentMemoryContext is typically a query-lifespan * context, so we don't want to leak memory there. We require the * caller to pass a separate memory context that can be used for this, * and can be reset each time through to avoid bloat. */ MemoryContextReset(argContext); oldcontext = MemoryContextSwitchTo(argContext); ExecEvalFuncArgs(&fcinfo, setexpr->args, econtext); MemoryContextSwitchTo(oldcontext); /* * If function is strict, and there are any NULL arguments, skip * calling the function and act like it returned NULL (or an empty * set, in the returns-set case). */ if (setexpr->func.fn_strict) { int i; for (i = 0; i < fcinfo.nargs; i++) { if (fcinfo.argnull[i]) goto no_function_result; } } } else { /* Treat setexpr as a generic expression */ InitFunctionCallInfoData(fcinfo, NULL, 0, InvalidOid, NULL, NULL); } /* * Switch to short-lived context for calling the function or expression. */ MemoryContextSwitchTo(econtext->ecxt_per_tuple_memory); /* * Loop to handle the ValuePerCall protocol (which is also the same * behavior needed in the generic ExecEvalExpr path). */ for (;;) { Datum result; CHECK_FOR_INTERRUPTS(); /* * reset per-tuple memory context before each call of the function or * expression. This cleans up any local memory the function may leak * when called. */ ResetExprContext(econtext); /* Call the function or expression one time */ if (!setexpr->elidedFuncState) { pgstat_init_function_usage(&fcinfo, &fcusage); fcinfo.isnull = false; rsinfo.isDone = ExprSingleResult; result = FunctionCallInvoke(&fcinfo); pgstat_end_function_usage(&fcusage, rsinfo.isDone != ExprMultipleResult); } else { result = ExecEvalExpr(setexpr->elidedFuncState, econtext, &fcinfo.isnull); rsinfo.isDone = ExprSingleResult; } /* Which protocol does function want to use? */ if (rsinfo.returnMode == SFRM_ValuePerCall) { /* * Check for end of result set. */ if (rsinfo.isDone == ExprEndResult) break; /* * If first time through, build tuplestore for result. For a * scalar function result type, also make a suitable tupdesc. */ if (first_time) { oldcontext = MemoryContextSwitchTo(econtext->ecxt_per_query_memory); tupstore = tuplestore_begin_heap(randomAccess, false, work_mem); rsinfo.setResult = tupstore; if (!returnsTuple) { tupdesc = CreateTemplateTupleDesc(1, false); TupleDescInitEntry(tupdesc, (AttrNumber) 1, "column", funcrettype, -1, 0); rsinfo.setDesc = tupdesc; } MemoryContextSwitchTo(oldcontext); } /* * Store current resultset item. */ if (returnsTuple) { if (!fcinfo.isnull) { HeapTupleHeader td = DatumGetHeapTupleHeader(result); if (tupdesc == NULL) { /* * This is the first non-NULL result from the * function. Use the type info embedded in the * rowtype Datum to look up the needed tupdesc. Make * a copy for the query. */ oldcontext = MemoryContextSwitchTo(econtext->ecxt_per_query_memory); tupdesc = lookup_rowtype_tupdesc_copy(HeapTupleHeaderGetTypeId(td), HeapTupleHeaderGetTypMod(td)); rsinfo.setDesc = tupdesc; MemoryContextSwitchTo(oldcontext); } else { /* * Verify all later returned rows have same subtype; * necessary in case the type is RECORD. */ if (HeapTupleHeaderGetTypeId(td) != tupdesc->tdtypeid || HeapTupleHeaderGetTypMod(td) != tupdesc->tdtypmod) ereport(ERROR, (errcode(ERRCODE_DATATYPE_MISMATCH), errmsg("rows returned by function are not all of the same row type"))); } /* * tuplestore_puttuple needs a HeapTuple not a bare * HeapTupleHeader, but it doesn't need all the fields. */ tmptup.t_len = HeapTupleHeaderGetDatumLength(td); tmptup.t_data = td; tuplestore_puttuple(tupstore, &tmptup); } else { /* * NULL result from a tuple-returning function; expand it * to a row of all nulls. We rely on the expectedDesc to * form such rows. (Note: this would be problematic if * tuplestore_putvalues saved the tdtypeid/tdtypmod from * the provided descriptor, since that might not match * what we get from the function itself. But it doesn't.) */ int natts = expectedDesc->natts; bool *nullflags; nullflags = (bool *) palloc(natts * sizeof(bool)); memset(nullflags, true, natts * sizeof(bool)); tuplestore_putvalues(tupstore, expectedDesc, NULL, nullflags); } } else { /* Scalar-type case: just store the function result */ tuplestore_putvalues(tupstore, tupdesc, &result, &fcinfo.isnull); } /* * Are we done? */ if (rsinfo.isDone != ExprMultipleResult) break; } else if (rsinfo.returnMode == SFRM_Materialize) { /* check we're on the same page as the function author */ if (!first_time || rsinfo.isDone != ExprSingleResult) ereport(ERROR, (errcode(ERRCODE_E_R_I_E_SRF_PROTOCOL_VIOLATED), errmsg("table-function protocol for materialize mode was not followed"))); /* Done evaluating the set result */ break; } else ereport(ERROR, (errcode(ERRCODE_E_R_I_E_SRF_PROTOCOL_VIOLATED), errmsg("unrecognized table-function returnMode: %d", (int) rsinfo.returnMode))); first_time = false; } no_function_result: /* * If we got nothing from the function (ie, an empty-set or NULL result), * we have to create the tuplestore to return, and if it's a * non-set-returning function then insert a single all-nulls row. As * above, we depend on the expectedDesc to manufacture the dummy row. */ if (rsinfo.setResult == NULL) { MemoryContextSwitchTo(econtext->ecxt_per_query_memory); tupstore = tuplestore_begin_heap(randomAccess, false, work_mem); rsinfo.setResult = tupstore; if (!returnsSet) { int natts = expectedDesc->natts; bool *nullflags; MemoryContextSwitchTo(econtext->ecxt_per_tuple_memory); nullflags = (bool *) palloc(natts * sizeof(bool)); memset(nullflags, true, natts * sizeof(bool)); tuplestore_putvalues(tupstore, expectedDesc, NULL, nullflags); } } /* * If function provided a tupdesc, cross-check it. We only really need to * do this for functions returning RECORD, but might as well do it always. */ if (rsinfo.setDesc) { tupledesc_match(expectedDesc, rsinfo.setDesc); /* * If it is a dynamically-allocated TupleDesc, free it: it is * typically allocated in a per-query context, so we must avoid * leaking it across multiple usages. */ if (rsinfo.setDesc->tdrefcount == -1) FreeTupleDesc(rsinfo.setDesc); } MemoryContextSwitchTo(callerContext); /* All done, pass back the tuplestore */ return rsinfo.setResult; }