/* * This function implements a binary logic consistency check, using a ternary * logic consistent function provided by the opclass. GIN_MAYBE return value * is interpreted as true with recheck flag. */ static bool shimBoolConsistentFn(GinScanKey key) { GinTernaryValue result; result = DatumGetGinTernaryValue(FunctionCall7Coll( key->triConsistentFmgrInfo, key->collation, PointerGetDatum(key->entryRes), UInt16GetDatum(key->strategy), key->query, UInt32GetDatum(key->nuserentries), PointerGetDatum(key->extra_data), PointerGetDatum(key->queryValues), PointerGetDatum(key->queryCategories))); if (result == GIN_MAYBE) { key->recheckCurItem = true; return true; } else { key->recheckCurItem = false; return result; } }
/* * hash_any() -- hash a variable-length key into a 32-bit value * k : the key (the unaligned variable-length array of bytes) * len : the length of the key, counting by bytes * * Returns a uint32 value. Every bit of the key affects every bit of * the return value. Every 1-bit and 2-bit delta achieves avalanche. * About 6*len+35 instructions. The best hash table sizes are powers * of 2. There is no need to do mod a prime (mod is sooo slow!). * If you need less than 32 bits, use a bitmask. */ Datum hash_any(register const unsigned char *k, register int keylen) { register uint32 a, b, c, len; /* Set up the internal state */ len = keylen; a = b = 0x9e3779b9; /* the golden ratio; an arbitrary value */ c = 3923095; /* initialize with an arbitrary value */ /* handle most of the key */ while (len >= 12) { a += (k[0] + ((uint32) k[1] << 8) + ((uint32) k[2] << 16) + ((uint32) k[3] << 24)); b += (k[4] + ((uint32) k[5] << 8) + ((uint32) k[6] << 16) + ((uint32) k[7] << 24)); c += (k[8] + ((uint32) k[9] << 8) + ((uint32) k[10] << 16) + ((uint32) k[11] << 24)); mix(a, b, c); k += 12; len -= 12; } /* handle the last 11 bytes */ c += keylen; switch (len) /* all the case statements fall through */ { case 11: c += ((uint32) k[10] << 24); case 10: c += ((uint32) k[9] << 16); case 9: c += ((uint32) k[8] << 8); /* the first byte of c is reserved for the length */ case 8: b += ((uint32) k[7] << 24); case 7: b += ((uint32) k[6] << 16); case 6: b += ((uint32) k[5] << 8); case 5: b += k[4]; case 4: a += ((uint32) k[3] << 24); case 3: a += ((uint32) k[2] << 16); case 2: a += ((uint32) k[1] << 8); case 1: a += k[0]; /* case 0: nothing left to add */ } mix(a, b, c); /* report the result */ return UInt32GetDatum(c); }
/* * Compute an xlog file name and decimal byte offset given a WAL location, * such as is returned by pg_stop_backup() or pg_xlog_switch(). * * Note that a location exactly at a segment boundary is taken to be in * the previous segment. This is usually the right thing, since the * expected usage is to determine which xlog file(s) are ready to archive. */ Datum pg_xlogfile_name_offset(PG_FUNCTION_ARGS) { XLogSegNo xlogsegno; uint32 xrecoff; XLogRecPtr locationpoint = PG_GETARG_LSN(0); char xlogfilename[MAXFNAMELEN]; Datum values[2]; bool isnull[2]; TupleDesc resultTupleDesc; HeapTuple resultHeapTuple; Datum result; if (RecoveryInProgress()) ereport(ERROR, (errcode(ERRCODE_OBJECT_NOT_IN_PREREQUISITE_STATE), errmsg("recovery is in progress"), errhint("pg_xlogfile_name_offset() cannot be executed during recovery."))); /* * Construct a tuple descriptor for the result row. This must match this * function's pg_proc entry! */ resultTupleDesc = CreateTemplateTupleDesc(2, false); TupleDescInitEntry(resultTupleDesc, (AttrNumber) 1, "file_name", TEXTOID, -1, 0); TupleDescInitEntry(resultTupleDesc, (AttrNumber) 2, "file_offset", INT4OID, -1, 0); resultTupleDesc = BlessTupleDesc(resultTupleDesc); /* * xlogfilename */ XLByteToPrevSeg(locationpoint, xlogsegno); XLogFileName(xlogfilename, ThisTimeLineID, xlogsegno); values[0] = CStringGetTextDatum(xlogfilename); isnull[0] = false; /* * offset */ xrecoff = locationpoint % XLogSegSize; values[1] = UInt32GetDatum(xrecoff); isnull[1] = false; /* * Tuple jam: Having first prepared your Datums, then squash together */ resultHeapTuple = heap_form_tuple(resultTupleDesc, values, isnull); result = HeapTupleGetDatum(resultHeapTuple); PG_RETURN_DATUM(result); }
void worker_main(Datum arg) { int ret; StringInfoData buf; uint32 segment = UInt32GetDatum(arg); /* Setup signal handlers */ pqsignal(SIGHUP, worker_sighup); pqsignal(SIGTERM, worker_sigterm); /* Allow signals */ BackgroundWorkerUnblockSignals(); initialize_worker(segment); /* Connect to the database */ BackgroundWorkerInitializeConnection(job->datname, job->rolname); elog(LOG, "%s initialized running job id %d", MyBgworkerEntry->bgw_name, job->job_id); pgstat_report_appname(MyBgworkerEntry->bgw_name); /* Initialize the query text */ initStringInfo(&buf); appendStringInfo(&buf, "SELECT * FROM %s.%s(%d, NULL)", job_run_function.schema, job_run_function.name, job->job_id); /* Initialize the SPI subsystem */ SetCurrentStatementStartTimestamp(); StartTransactionCommand(); SPI_connect(); PushActiveSnapshot(GetTransactionSnapshot()); pgstat_report_activity(STATE_RUNNING, buf.data); SetCurrentStatementStartTimestamp(); /* And run the query */ ret = SPI_execute(buf.data, true, 0); if (ret < 0) elog(FATAL, "errors while executing %s", buf.data); /* Commmit the transaction */ SPI_finish(); PopActiveSnapshot(); CommitTransactionCommand(); pgstat_report_activity(STATE_IDLE, NULL); proc_exit(0); }
/* * A helper function for calling a native ternary logic consistent function. */ static GinTernaryValue directTriConsistentFn(GinScanKey key) { return DatumGetGinTernaryValue(FunctionCall7Coll( key->triConsistentFmgrInfo, key->collation, PointerGetDatum(key->entryRes), UInt16GetDatum(key->strategy), key->query, UInt32GetDatum(key->nuserentries), PointerGetDatum(key->extra_data), PointerGetDatum(key->queryValues), PointerGetDatum(key->queryCategories))); }
/* * _hash_convert_tuple - convert raw index data to hash key * * Inputs: values and isnull arrays for the user data column(s) * Outputs: values and isnull arrays for the index tuple, suitable for * passing to index_form_tuple(). * * Returns true if successful, false if not (because there are null values). * On a false result, the given data need not be indexed. * * Note: callers know that the index-column arrays are always of length 1. * In principle, there could be more than one input column, though we do not * currently support that. */ bool _hash_convert_tuple(Relation index, Datum *user_values, bool *user_isnull, Datum *index_values, bool *index_isnull) { uint32 hashkey; /* * We do not insert null values into hash indexes. This is okay because * the only supported search operator is '=', and we assume it is strict. */ if (user_isnull[0]) return false; hashkey = _hash_datum2hashkey(index, user_values[0]); index_values[0] = UInt32GetDatum(hashkey); index_isnull[0] = false; return true; }
/* * _hash_form_tuple - form an index tuple containing hash code only */ IndexTuple _hash_form_tuple(Relation index, Datum *values, bool *isnull) { IndexTuple itup; uint32 hashkey; Datum hashkeydatum; TupleDesc hashdesc; if (isnull[0]) hashkeydatum = (Datum) 0; else { hashkey = _hash_datum2hashkey(index, values[0]); hashkeydatum = UInt32GetDatum(hashkey); } hashdesc = RelationGetDescr(index); Assert(hashdesc->natts == 1); itup = index_form_tuple(hashdesc, &hashkeydatum, isnull); return itup; }
/* * A helper function for calling a regular, binary logic, consistent function. */ static bool directBoolConsistentFn(GinScanKey key) { /* * Initialize recheckCurItem in case the consistentFn doesn't know it * should set it. The safe assumption in that case is to force recheck. */ key->recheckCurItem = true; return DatumGetBool(FunctionCall8Coll(key->consistentFmgrInfo, key->collation, PointerGetDatum(key->entryRes), UInt16GetDatum(key->strategy), key->query, UInt32GetDatum(key->nuserentries), PointerGetDatum(key->extra_data), PointerGetDatum(&key->recheckCurItem), PointerGetDatum(key->queryValues), PointerGetDatum(key->queryCategories))); }
/* Append a list of nodes from the jsonpath (jsonb_path_ops). */ static List * jsonb_path_ops__extract_nodes(JsonPathGinContext *cxt, JsonPathGinPath path, JsonbValue *scalar, List *nodes) { if (scalar) { /* append path hash node for equality queries */ uint32 hash = path.hash; JsonbHashScalarValue(scalar, &hash); return lappend(nodes, make_jsp_entry_node(UInt32GetDatum(hash))); } else { /* jsonb_path_ops doesn't support EXISTS queries => nothing to append */ return nodes; } }
Datum query_histogram(PG_FUNCTION_ARGS) { FuncCallContext *funcctx; TupleDesc tupdesc; AttInMetadata *attinmeta; histogram_data* data; /* init on the first call */ if (SRF_IS_FIRSTCALL()) { MemoryContext oldcontext; funcctx = SRF_FIRSTCALL_INIT(); oldcontext = MemoryContextSwitchTo(funcctx->multi_call_memory_ctx); data = query_hist_get_data(PG_GETARG_BOOL(0)); /* init (open file, etc.), maybe read all the data in memory * so that the file is not kept open for a long time */ funcctx->user_fctx = data; funcctx->max_calls = data->bins_count; if (data->bins_count > 0) { funcctx->max_calls = data->bins_count + 1; } /* Build a tuple descriptor for our result type */ if (get_call_result_type(fcinfo, NULL, &tupdesc) != TYPEFUNC_COMPOSITE) ereport(ERROR, (errcode(ERRCODE_FEATURE_NOT_SUPPORTED), errmsg("function returning record called in context " "that cannot accept type record"))); /* * generate attribute metadata needed later to produce tuples from raw * C strings */ attinmeta = TupleDescGetAttInMetadata(tupdesc); funcctx->attinmeta = attinmeta; funcctx->tuple_desc = tupdesc; /* switch back to the old context */ MemoryContextSwitchTo(oldcontext); } /* init the context */ funcctx = SRF_PERCALL_SETUP(); /* check if we have more data */ if (funcctx->max_calls > funcctx->call_cntr) { HeapTuple tuple; Datum result; Datum values[6]; bool nulls[6]; int binIdx; binIdx = funcctx->call_cntr; data = (histogram_data*)funcctx->user_fctx; memset(nulls, 0, sizeof(nulls)); if (data->histogram_type == HISTOGRAM_LINEAR) { values[0] = UInt32GetDatum(binIdx * data->bins_width); if (funcctx->max_calls - 1 == funcctx->call_cntr) { values[1] = UInt32GetDatum(0); nulls[1] = TRUE; } else { values[1] = UInt32GetDatum((binIdx+1)* data->bins_width); } } else { if (funcctx->call_cntr == 0) { values[0] = UInt32GetDatum(0); } else { values[0] = UInt32GetDatum(pow(2,binIdx-1) * data->bins_width); } if (funcctx->max_calls - 1 == funcctx->call_cntr) { values[1] = UInt32GetDatum(0); nulls[1] = TRUE; } else { values[1] = UInt32GetDatum(pow(2,binIdx) * data->bins_width); } } values[2] = Int64GetDatum(data->count_data[binIdx]); if (data->total_count > 0) { values[3] = Float4GetDatum(100.0*data->count_data[binIdx] / data->total_count); } else { values[3] = Float4GetDatum(0); } values[4] = Float8GetDatum(data->time_data[binIdx]); if (data->total_time > 0) { values[5] = Float4GetDatum(100*data->time_data[binIdx] / data->total_time); } else { values[5] = Float4GetDatum(0); } /* Build and return the tuple. */ tuple = heap_form_tuple(funcctx->tuple_desc, values, nulls); /* make the tuple into a datum */ result = HeapTupleGetDatum(tuple); /* Here we want to return another item: */ SRF_RETURN_NEXT(funcctx, result); } else { /* Here we are done returning items and just need to clean up: */ SRF_RETURN_DONE(funcctx); } }
Datum worker_test(PG_FUNCTION_ARGS) { int i, nworkers; dsm_segment *seg; test_shm_mq_header *hdr; MemoryContext oldcontext; worker_state *wstate; nworkers = PG_GETARG_INT32(0); #if PG_VERSION_NUM >= 90500 seg = dsm_create(sizeof(test_shm_mq_header), 0); #else seg = dsm_create(sizeof(test_shm_mq_header) #endif hdr = dsm_segment_address(seg); printf("begin worker_test: %d, %p\n", dsm_segment_handle(seg), hdr); MemSet(hdr, 0, sizeof(test_shm_mq_header)); SpinLockInit(&hdr->mutex); strncpy(hdr->dbname, get_database_name(MyDatabaseId), sizeof(hdr->dbname)); oldcontext = MemoryContextSwitchTo(CurTransactionContext); wstate = MemoryContextAlloc(TopTransactionContext, offsetof(worker_state, handle) + sizeof(BackgroundWorkerHandle *) * nworkers); MemSet(wstate, 0, offsetof(worker_state, handle) + sizeof(BackgroundWorkerHandle *) * nworkers); on_dsm_detach(seg, cleanup_background_workers, PointerGetDatum(wstate)); for (i = 0; i < nworkers; i++) { BackgroundWorker worker; MemSet(&worker, 0, sizeof(worker)); worker.bgw_flags = BGWORKER_SHMEM_ACCESS | BGWORKER_BACKEND_DATABASE_CONNECTION; worker.bgw_start_time = BgWorkerStart_ConsistentState; worker.bgw_restart_time = BGW_NEVER_RESTART; worker.bgw_main = NULL; /* new worker might not have library loaded */ sprintf(worker.bgw_library_name, "worker_test"); sprintf(worker.bgw_function_name, "worker_test_main"); snprintf(worker.bgw_name, BGW_MAXLEN, "worker_test %d", i); worker.bgw_main_arg = UInt32GetDatum(dsm_segment_handle(seg)); worker.bgw_notify_pid = MyProcPid; if (!RegisterDynamicBackgroundWorker(&worker, &wstate->handle[i])) ereport(ERROR, (errcode(ERRCODE_INSUFFICIENT_RESOURCES), errmsg("could not register background process"), errhint("You may need to increase max_worker_processes."))); ++wstate->nworkers; } for (i = 0; i < nworkers; i++) { BgwHandleStatus status; pid_t pid; status = WaitForBackgroundWorkerStartup(wstate->handle[i], &pid); if (status == BGWH_STOPPED) ereport(ERROR, (errcode(ERRCODE_INSUFFICIENT_RESOURCES), errmsg("could not start background process"), errhint("More details may be available in the server log."))); if (status == BGWH_POSTMASTER_DIED) ereport(ERROR, (errcode(ERRCODE_INSUFFICIENT_RESOURCES), errmsg("cannot start background processes without postmaster"), errhint("Kill all remaining database processes and restart the database."))); Assert(status == BGWH_STARTED); } wait_for_workers_to_become_ready(wstate, hdr); cancel_on_dsm_detach(seg, cleanup_background_workers, PointerGetDatum(wstate)); dsm_detach(seg); pfree(wstate); MemoryContextSwitchTo(oldcontext); PG_RETURN_VOID(); }
Datum pg_freespacemap_pages(PG_FUNCTION_ARGS) { FuncCallContext *funcctx; Datum result; MemoryContext oldcontext; FreeSpacePagesContext *fctx; /* User function context. */ TupleDesc tupledesc; HeapTuple tuple; FSMHeader *FreeSpaceMap; /* FSM main structure. */ FSMRelation *fsmrel; /* Individual relation. */ if (SRF_IS_FIRSTCALL()) { int i; int nchunks; /* Size of freespace.c's arena. */ int numPages; /* Max possible no. of pages in map. */ int nPages; /* Mapped pages for a relation. */ /* * Get the free space map data structure. */ FreeSpaceMap = GetFreeSpaceMap(); /* this must match calculation in InitFreeSpaceMap(): */ nchunks = (MaxFSMPages - 1) / CHUNKPAGES + 1; /* Worst case (lots of indexes) could have this many pages: */ numPages = nchunks * INDEXCHUNKPAGES; funcctx = SRF_FIRSTCALL_INIT(); /* Switch context when allocating stuff to be used in later calls */ oldcontext = MemoryContextSwitchTo(funcctx->multi_call_memory_ctx); /* * Create a function context for cross-call persistence. */ fctx = (FreeSpacePagesContext *) palloc(sizeof(FreeSpacePagesContext)); funcctx->user_fctx = fctx; /* Construct a tuple descriptor for the result rows. */ tupledesc = CreateTemplateTupleDesc(NUM_FREESPACE_PAGES_ELEM, false); TupleDescInitEntry(tupledesc, (AttrNumber) 1, "reltablespace", OIDOID, -1, 0); TupleDescInitEntry(tupledesc, (AttrNumber) 2, "reldatabase", OIDOID, -1, 0); TupleDescInitEntry(tupledesc, (AttrNumber) 3, "relfilenode", OIDOID, -1, 0); TupleDescInitEntry(tupledesc, (AttrNumber) 4, "relblocknumber", INT8OID, -1, 0); TupleDescInitEntry(tupledesc, (AttrNumber) 5, "bytes", INT4OID, -1, 0); fctx->tupdesc = BlessTupleDesc(tupledesc); /* * Allocate numPages worth of FreeSpacePagesRec records, this is an * upper bound. */ fctx->record = (FreeSpacePagesRec *) palloc(sizeof(FreeSpacePagesRec) * numPages); /* Return to original context when allocating transient memory */ MemoryContextSwitchTo(oldcontext); /* * Lock free space map and scan though all the relations. For each * relation, gets all its mapped pages. */ LWLockAcquire(FreeSpaceLock, LW_EXCLUSIVE); i = 0; for (fsmrel = FreeSpaceMap->usageList; fsmrel; fsmrel = fsmrel->nextUsage) { if (fsmrel->isIndex) { /* Index relation. */ IndexFSMPageData *page; page = (IndexFSMPageData *) (FreeSpaceMap->arena + fsmrel->firstChunk * CHUNKBYTES); for (nPages = 0; nPages < fsmrel->storedPages; nPages++) { fctx->record[i].reltablespace = fsmrel->key.spcNode; fctx->record[i].reldatabase = fsmrel->key.dbNode; fctx->record[i].relfilenode = fsmrel->key.relNode; fctx->record[i].relblocknumber = IndexFSMPageGetPageNum(page); fctx->record[i].bytes = 0; fctx->record[i].isindex = true; page++; i++; } } else { /* Heap relation. */ FSMPageData *page; page = (FSMPageData *) (FreeSpaceMap->arena + fsmrel->firstChunk * CHUNKBYTES); for (nPages = 0; nPages < fsmrel->storedPages; nPages++) { fctx->record[i].reltablespace = fsmrel->key.spcNode; fctx->record[i].reldatabase = fsmrel->key.dbNode; fctx->record[i].relfilenode = fsmrel->key.relNode; fctx->record[i].relblocknumber = FSMPageGetPageNum(page); fctx->record[i].bytes = FSMPageGetSpace(page); fctx->record[i].isindex = false; page++; i++; } } } /* Release free space map. */ LWLockRelease(FreeSpaceLock); /* Set the real no. of calls as we know it now! */ Assert(i <= numPages); funcctx->max_calls = i; } funcctx = SRF_PERCALL_SETUP(); /* Get the saved state */ fctx = funcctx->user_fctx; if (funcctx->call_cntr < funcctx->max_calls) { int i = funcctx->call_cntr; FreeSpacePagesRec *record = &fctx->record[i]; Datum values[NUM_FREESPACE_PAGES_ELEM]; bool nulls[NUM_FREESPACE_PAGES_ELEM]; values[0] = ObjectIdGetDatum(record->reltablespace); nulls[0] = false; values[1] = ObjectIdGetDatum(record->reldatabase); nulls[1] = false; values[2] = ObjectIdGetDatum(record->relfilenode); nulls[2] = false; values[3] = Int64GetDatum((int64) record->relblocknumber); nulls[3] = false; /* * Set (free) bytes to NULL for an index relation. */ if (record->isindex) { nulls[4] = true; } else { values[4] = UInt32GetDatum(record->bytes); nulls[4] = false; } /* Build and return the tuple. */ tuple = heap_form_tuple(fctx->tupdesc, values, nulls); result = HeapTupleGetDatum(tuple); SRF_RETURN_NEXT(funcctx, result); } else SRF_RETURN_DONE(funcctx); }
Datum connection_limits(PG_FUNCTION_ARGS) { FuncCallContext *funcctx; TupleDesc tupdesc; AttInMetadata *attinmeta; /* init on the first call */ if (SRF_IS_FIRSTCALL()) { MemoryContext oldcontext; funcctx = SRF_FIRSTCALL_INIT(); oldcontext = MemoryContextSwitchTo(funcctx->multi_call_memory_ctx); LWLockAcquire(ProcArrayLock, LW_EXCLUSIVE); reset_rules(); check_all_rules(); /* number of rules */ funcctx->max_calls = rules->n_rules; /* Build a tuple descriptor for our result type */ if (get_call_result_type(fcinfo, NULL, &tupdesc) != TYPEFUNC_COMPOSITE) ereport(ERROR, (errcode(ERRCODE_FEATURE_NOT_SUPPORTED), errmsg("function returning record called in context " "that cannot accept type record"))); /* * generate attribute metadata needed later to produce tuples from raw * C strings */ attinmeta = TupleDescGetAttInMetadata(tupdesc); funcctx->attinmeta = attinmeta; funcctx->tuple_desc = tupdesc; /* switch back to the old context */ MemoryContextSwitchTo(oldcontext); } /* init the context */ funcctx = SRF_PERCALL_SETUP(); /* check if we have more data */ if (funcctx->max_calls > funcctx->call_cntr) { HeapTuple tuple; Datum result; Datum values[6]; bool nulls[6]; rule_t * rule = &(rules->rules[funcctx->call_cntr]); memset(nulls, 0, sizeof(nulls)); /* rule line */ values[0] = UInt32GetDatum(rule->line); /* database */ if (rule->fields & CHECK_DBNAME) values[1] = CStringGetTextDatum(rule->database); else nulls[1] = TRUE; /* username */ if (rule->fields & CHECK_USER) values[2] = CStringGetTextDatum(rule->user); else nulls[2] = TRUE; /* hostname or IP address */ if (rule->fields & CHECK_HOST) values[3] = CStringGetTextDatum(rule->hostname); else if (rule->fields & CHECK_IP) { char buffer[256]; memset(buffer, 0, 256); format_address(buffer, 256, (struct sockaddr*)&rule->ip, (struct sockaddr*)&rule->mask); values[3] = CStringGetTextDatum(buffer); } else nulls[3] = TRUE; /* count and limit */ values[4] = UInt32GetDatum(rule->count); values[5] = UInt32GetDatum(rule->limit); /* Build and return the tuple. */ tuple = heap_form_tuple(funcctx->tuple_desc, values, nulls); /* make the tuple into a datum */ result = HeapTupleGetDatum(tuple); /* Here we want to return another item: */ SRF_RETURN_NEXT(funcctx, result); } else { /* lock ProcArray (serialize the processes) */ LWLockRelease(ProcArrayLock); /* Here we are done returning items and just need to clean up: */ SRF_RETURN_DONE(funcctx); } }
/* * pg_lock_status - produce a view with one row per held or awaited lock mode */ Datum pg_lock_status(PG_FUNCTION_ARGS) { FuncCallContext *funcctx; PG_Lock_Status *mystatus; LockData *lockData; PredicateLockData *predLockData; if (SRF_IS_FIRSTCALL()) { TupleDesc tupdesc; MemoryContext oldcontext; /* create a function context for cross-call persistence */ funcctx = SRF_FIRSTCALL_INIT(); /* * switch to memory context appropriate for multiple function calls */ oldcontext = MemoryContextSwitchTo(funcctx->multi_call_memory_ctx); /* build tupdesc for result tuples */ /* this had better match pg_locks view in system_views.sql */ tupdesc = CreateTemplateTupleDesc(NUM_LOCK_STATUS_COLUMNS, false); TupleDescInitEntry(tupdesc, (AttrNumber) 1, "locktype", TEXTOID, -1, 0); TupleDescInitEntry(tupdesc, (AttrNumber) 2, "database", OIDOID, -1, 0); TupleDescInitEntry(tupdesc, (AttrNumber) 3, "relation", OIDOID, -1, 0); TupleDescInitEntry(tupdesc, (AttrNumber) 4, "page", INT4OID, -1, 0); TupleDescInitEntry(tupdesc, (AttrNumber) 5, "tuple", INT2OID, -1, 0); TupleDescInitEntry(tupdesc, (AttrNumber) 6, "virtualxid", TEXTOID, -1, 0); TupleDescInitEntry(tupdesc, (AttrNumber) 7, "transactionid", XIDOID, -1, 0); TupleDescInitEntry(tupdesc, (AttrNumber) 8, "classid", OIDOID, -1, 0); TupleDescInitEntry(tupdesc, (AttrNumber) 9, "objid", OIDOID, -1, 0); TupleDescInitEntry(tupdesc, (AttrNumber) 10, "objsubid", INT2OID, -1, 0); TupleDescInitEntry(tupdesc, (AttrNumber) 11, "virtualtransaction", TEXTOID, -1, 0); TupleDescInitEntry(tupdesc, (AttrNumber) 12, "pid", INT4OID, -1, 0); TupleDescInitEntry(tupdesc, (AttrNumber) 13, "mode", TEXTOID, -1, 0); TupleDescInitEntry(tupdesc, (AttrNumber) 14, "granted", BOOLOID, -1, 0); TupleDescInitEntry(tupdesc, (AttrNumber) 15, "fastpath", BOOLOID, -1, 0); funcctx->tuple_desc = BlessTupleDesc(tupdesc); /* * Collect all the locking information that we will format and send * out as a result set. */ mystatus = (PG_Lock_Status *) palloc(sizeof(PG_Lock_Status)); funcctx->user_fctx = (void *) mystatus; mystatus->lockData = GetLockStatusData(); mystatus->currIdx = 0; mystatus->predLockData = GetPredicateLockStatusData(); mystatus->predLockIdx = 0; MemoryContextSwitchTo(oldcontext); } funcctx = SRF_PERCALL_SETUP(); mystatus = (PG_Lock_Status *) funcctx->user_fctx; lockData = mystatus->lockData; while (mystatus->currIdx < lockData->nelements) { bool granted; LOCKMODE mode = 0; const char *locktypename; char tnbuf[32]; Datum values[NUM_LOCK_STATUS_COLUMNS]; bool nulls[NUM_LOCK_STATUS_COLUMNS]; HeapTuple tuple; Datum result; LockInstanceData *instance; instance = &(lockData->locks[mystatus->currIdx]); /* * Look to see if there are any held lock modes in this PROCLOCK. If * so, report, and destructively modify lockData so we don't report * again. */ granted = false; if (instance->holdMask) { for (mode = 0; mode < MAX_LOCKMODES; mode++) { if (instance->holdMask & LOCKBIT_ON(mode)) { granted = true; instance->holdMask &= LOCKBIT_OFF(mode); break; } } } /* * If no (more) held modes to report, see if PROC is waiting for a * lock on this lock. */ if (!granted) { if (instance->waitLockMode != NoLock) { /* Yes, so report it with proper mode */ mode = instance->waitLockMode; /* * We are now done with this PROCLOCK, so advance pointer to * continue with next one on next call. */ mystatus->currIdx++; } else { /* * Okay, we've displayed all the locks associated with this * PROCLOCK, proceed to the next one. */ mystatus->currIdx++; continue; } } /* * Form tuple with appropriate data. */ MemSet(values, 0, sizeof(values)); MemSet(nulls, false, sizeof(nulls)); if (instance->locktag.locktag_type <= LOCKTAG_LAST_TYPE) locktypename = LockTagTypeNames[instance->locktag.locktag_type]; else { snprintf(tnbuf, sizeof(tnbuf), "unknown %d", (int) instance->locktag.locktag_type); locktypename = tnbuf; } values[0] = CStringGetTextDatum(locktypename); switch ((LockTagType) instance->locktag.locktag_type) { case LOCKTAG_RELATION: case LOCKTAG_RELATION_EXTEND: values[1] = ObjectIdGetDatum(instance->locktag.locktag_field1); values[2] = ObjectIdGetDatum(instance->locktag.locktag_field2); nulls[3] = true; nulls[4] = true; nulls[5] = true; nulls[6] = true; nulls[7] = true; nulls[8] = true; nulls[9] = true; break; case LOCKTAG_PAGE: values[1] = ObjectIdGetDatum(instance->locktag.locktag_field1); values[2] = ObjectIdGetDatum(instance->locktag.locktag_field2); values[3] = UInt32GetDatum(instance->locktag.locktag_field3); nulls[4] = true; nulls[5] = true; nulls[6] = true; nulls[7] = true; nulls[8] = true; nulls[9] = true; break; case LOCKTAG_TUPLE: values[1] = ObjectIdGetDatum(instance->locktag.locktag_field1); values[2] = ObjectIdGetDatum(instance->locktag.locktag_field2); values[3] = UInt32GetDatum(instance->locktag.locktag_field3); values[4] = UInt16GetDatum(instance->locktag.locktag_field4); nulls[5] = true; nulls[6] = true; nulls[7] = true; nulls[8] = true; nulls[9] = true; break; case LOCKTAG_TRANSACTION: values[6] = TransactionIdGetDatum(instance->locktag.locktag_field1); nulls[1] = true; nulls[2] = true; nulls[3] = true; nulls[4] = true; nulls[5] = true; nulls[7] = true; nulls[8] = true; nulls[9] = true; break; case LOCKTAG_VIRTUALTRANSACTION: values[5] = VXIDGetDatum(instance->locktag.locktag_field1, instance->locktag.locktag_field2); nulls[1] = true; nulls[2] = true; nulls[3] = true; nulls[4] = true; nulls[6] = true; nulls[7] = true; nulls[8] = true; nulls[9] = true; break; case LOCKTAG_OBJECT: case LOCKTAG_USERLOCK: case LOCKTAG_ADVISORY: default: /* treat unknown locktags like OBJECT */ values[1] = ObjectIdGetDatum(instance->locktag.locktag_field1); values[7] = ObjectIdGetDatum(instance->locktag.locktag_field2); values[8] = ObjectIdGetDatum(instance->locktag.locktag_field3); values[9] = Int16GetDatum(instance->locktag.locktag_field4); nulls[2] = true; nulls[3] = true; nulls[4] = true; nulls[5] = true; nulls[6] = true; break; } values[10] = VXIDGetDatum(instance->backend, instance->lxid); if (instance->pid != 0) values[11] = Int32GetDatum(instance->pid); else nulls[11] = true; values[12] = CStringGetTextDatum(GetLockmodeName(instance->locktag.locktag_lockmethodid, mode)); values[13] = BoolGetDatum(granted); values[14] = BoolGetDatum(instance->fastpath); tuple = heap_form_tuple(funcctx->tuple_desc, values, nulls); result = HeapTupleGetDatum(tuple); SRF_RETURN_NEXT(funcctx, result); } /* * Have returned all regular locks. Now start on the SIREAD predicate * locks. */ predLockData = mystatus->predLockData; if (mystatus->predLockIdx < predLockData->nelements) { PredicateLockTargetType lockType; PREDICATELOCKTARGETTAG *predTag = &(predLockData->locktags[mystatus->predLockIdx]); SERIALIZABLEXACT *xact = &(predLockData->xacts[mystatus->predLockIdx]); Datum values[NUM_LOCK_STATUS_COLUMNS]; bool nulls[NUM_LOCK_STATUS_COLUMNS]; HeapTuple tuple; Datum result; mystatus->predLockIdx++; /* * Form tuple with appropriate data. */ MemSet(values, 0, sizeof(values)); MemSet(nulls, false, sizeof(nulls)); /* lock type */ lockType = GET_PREDICATELOCKTARGETTAG_TYPE(*predTag); values[0] = CStringGetTextDatum(PredicateLockTagTypeNames[lockType]); /* lock target */ values[1] = GET_PREDICATELOCKTARGETTAG_DB(*predTag); values[2] = GET_PREDICATELOCKTARGETTAG_RELATION(*predTag); if (lockType == PREDLOCKTAG_TUPLE) values[4] = GET_PREDICATELOCKTARGETTAG_OFFSET(*predTag); else nulls[4] = true; if ((lockType == PREDLOCKTAG_TUPLE) || (lockType == PREDLOCKTAG_PAGE)) values[3] = GET_PREDICATELOCKTARGETTAG_PAGE(*predTag); else nulls[3] = true; /* these fields are targets for other types of locks */ nulls[5] = true; /* virtualxid */ nulls[6] = true; /* transactionid */ nulls[7] = true; /* classid */ nulls[8] = true; /* objid */ nulls[9] = true; /* objsubid */ /* lock holder */ values[10] = VXIDGetDatum(xact->vxid.backendId, xact->vxid.localTransactionId); if (xact->pid != 0) values[11] = Int32GetDatum(xact->pid); else nulls[11] = true; /* * Lock mode. Currently all predicate locks are SIReadLocks, which are * always held (never waiting) and have no fast path */ values[12] = CStringGetTextDatum("SIReadLock"); values[13] = BoolGetDatum(true); values[14] = BoolGetDatum(false); tuple = heap_form_tuple(funcctx->tuple_desc, values, nulls); result = HeapTupleGetDatum(tuple); SRF_RETURN_NEXT(funcctx, result); } SRF_RETURN_DONE(funcctx); }
Datum heap_page_items(PG_FUNCTION_ARGS) { bytea *raw_page = PG_GETARG_BYTEA_P(0); heap_page_items_state *inter_call_data = NULL; FuncCallContext *fctx; int raw_page_size; if (!superuser()) ereport(ERROR, (errcode(ERRCODE_INSUFFICIENT_PRIVILEGE), (errmsg("must be superuser to use raw page functions")))); raw_page_size = VARSIZE(raw_page) - VARHDRSZ; if (SRF_IS_FIRSTCALL()) { TupleDesc tupdesc; MemoryContext mctx; if (raw_page_size < SizeOfPageHeaderData) ereport(ERROR, (errcode(ERRCODE_INVALID_PARAMETER_VALUE), errmsg("input page too small (%d bytes)", raw_page_size))); fctx = SRF_FIRSTCALL_INIT(); mctx = MemoryContextSwitchTo(fctx->multi_call_memory_ctx); inter_call_data = palloc(sizeof(heap_page_items_state)); /* Build a tuple descriptor for our result type */ if (get_call_result_type(fcinfo, NULL, &tupdesc) != TYPEFUNC_COMPOSITE) elog(ERROR, "return type must be a row type"); inter_call_data->tupd = tupdesc; inter_call_data->offset = FirstOffsetNumber; inter_call_data->page = VARDATA(raw_page); fctx->max_calls = PageGetMaxOffsetNumber(inter_call_data->page); fctx->user_fctx = inter_call_data; MemoryContextSwitchTo(mctx); } fctx = SRF_PERCALL_SETUP(); inter_call_data = fctx->user_fctx; if (fctx->call_cntr < fctx->max_calls) { Page page = inter_call_data->page; HeapTuple resultTuple; Datum result; ItemId id; Datum values[13]; bool nulls[13]; uint16 lp_offset; uint16 lp_flags; uint16 lp_len; memset(nulls, 0, sizeof(nulls)); /* Extract information from the line pointer */ id = PageGetItemId(page, inter_call_data->offset); lp_offset = ItemIdGetOffset(id); lp_flags = ItemIdGetFlags(id); lp_len = ItemIdGetLength(id); values[0] = UInt16GetDatum(inter_call_data->offset); values[1] = UInt16GetDatum(lp_offset); values[2] = UInt16GetDatum(lp_flags); values[3] = UInt16GetDatum(lp_len); /* * We do just enough validity checking to make sure we don't reference * data outside the page passed to us. The page could be corrupt in * many other ways, but at least we won't crash. */ if (ItemIdHasStorage(id) && lp_len >= sizeof(HeapTupleHeader) && lp_offset == MAXALIGN(lp_offset) && lp_offset + lp_len <= raw_page_size) { HeapTupleHeader tuphdr; int bits_len; /* Extract information from the tuple header */ tuphdr = (HeapTupleHeader) PageGetItem(page, id); values[4] = UInt32GetDatum(HeapTupleHeaderGetXmin(tuphdr)); values[5] = UInt32GetDatum(HeapTupleHeaderGetRawXmax(tuphdr)); values[6] = UInt32GetDatum(HeapTupleHeaderGetRawCommandId(tuphdr)); /* shared with xvac */ values[7] = PointerGetDatum(&tuphdr->t_ctid); values[8] = UInt32GetDatum(tuphdr->t_infomask2); values[9] = UInt32GetDatum(tuphdr->t_infomask); values[10] = UInt8GetDatum(tuphdr->t_hoff); /* * We already checked that the item as is completely within the * raw page passed to us, with the length given in the line * pointer.. Let's check that t_hoff doesn't point over lp_len, * before using it to access t_bits and oid. */ if (tuphdr->t_hoff >= sizeof(HeapTupleHeader) && tuphdr->t_hoff <= lp_len) { if (tuphdr->t_infomask & HEAP_HASNULL) { bits_len = tuphdr->t_hoff - (((char *) tuphdr->t_bits) -((char *) tuphdr)); values[11] = CStringGetTextDatum( bits_to_text(tuphdr->t_bits, bits_len * 8)); } else nulls[11] = true; if (tuphdr->t_infomask & HEAP_HASOID) values[12] = HeapTupleHeaderGetOid(tuphdr); else nulls[12] = true; } else { nulls[11] = true; nulls[12] = true; } } else { /* * The line pointer is not used, or it's invalid. Set the rest of * the fields to NULL */ int i; for (i = 4; i <= 12; i++) nulls[i] = true; } /* Build and return the result tuple. */ resultTuple = heap_form_tuple(inter_call_data->tupd, values, nulls); result = HeapTupleGetDatum(resultTuple); inter_call_data->offset++; SRF_RETURN_NEXT(fctx, result); } else SRF_RETURN_DONE(fctx); }
Datum gin_extract_jsonb_path(PG_FUNCTION_ARGS) { Jsonb *jb = PG_GETARG_JSONB_P(0); int32 *nentries = (int32 *) PG_GETARG_POINTER(1); int total = 2 * JB_ROOT_COUNT(jb); JsonbIterator *it; JsonbValue v; JsonbIteratorToken r; PathHashStack tail; PathHashStack *stack; int i = 0; Datum *entries; /* If the root level is empty, we certainly have no keys */ if (total == 0) { *nentries = 0; PG_RETURN_POINTER(NULL); } /* Otherwise, use 2 * root count as initial estimate of result size */ entries = (Datum *) palloc(sizeof(Datum) * total); /* We keep a stack of partial hashes corresponding to parent key levels */ tail.parent = NULL; tail.hash = 0; stack = &tail; it = JsonbIteratorInit(&jb->root); while ((r = JsonbIteratorNext(&it, &v, false)) != WJB_DONE) { PathHashStack *parent; /* Since we recurse into the object, we might need more space */ if (i >= total) { total *= 2; entries = (Datum *) repalloc(entries, sizeof(Datum) * total); } switch (r) { case WJB_BEGIN_ARRAY: case WJB_BEGIN_OBJECT: /* Push a stack level for this object */ parent = stack; stack = (PathHashStack *) palloc(sizeof(PathHashStack)); /* * We pass forward hashes from outer nesting levels so that * the hashes for nested values will include outer keys as * well as their own keys. * * Nesting an array within another array will not alter * innermost scalar element hash values, but that seems * inconsequential. */ stack->hash = parent->hash; stack->parent = parent; break; case WJB_KEY: /* mix this key into the current outer hash */ JsonbHashScalarValue(&v, &stack->hash); /* hash is now ready to incorporate the value */ break; case WJB_ELEM: case WJB_VALUE: /* mix the element or value's hash into the prepared hash */ JsonbHashScalarValue(&v, &stack->hash); /* and emit an index entry */ entries[i++] = UInt32GetDatum(stack->hash); /* reset hash for next key, value, or sub-object */ stack->hash = stack->parent->hash; break; case WJB_END_ARRAY: case WJB_END_OBJECT: /* Pop the stack */ parent = stack->parent; pfree(stack); stack = parent; /* reset hash for next key, value, or sub-object */ if (stack->parent) stack->hash = stack->parent->hash; else stack->hash = 0; break; default: elog(ERROR, "invalid JsonbIteratorNext rc: %d", (int) r); } } *nentries = i; PG_RETURN_POINTER(entries); }
/* * Register background workers. */ static worker_state * setup_background_workers(int nworkers, dsm_segment *seg) { MemoryContext oldcontext; BackgroundWorker worker; worker_state *wstate; int i; /* * We need the worker_state object and the background worker handles to * which it points to be allocated in CurTransactionContext rather than * ExprContext; otherwise, they'll be destroyed before the on_dsm_detach * hooks run. */ oldcontext = MemoryContextSwitchTo(CurTransactionContext); /* Create worker state object. */ wstate = MemoryContextAlloc(TopTransactionContext, offsetof(worker_state, handle) + sizeof(BackgroundWorkerHandle *) * nworkers); wstate->nworkers = 0; /* * Arrange to kill all the workers if we abort before all workers are * finished hooking themselves up to the dynamic shared memory segment. * * If we die after all the workers have finished hooking themselves up to * the dynamic shared memory segment, we'll mark the two queues to which * we're directly connected as detached, and the worker(s) connected to * those queues will exit, marking any other queues to which they are * connected as detached. This will cause any as-yet-unaware workers * connected to those queues to exit in their turn, and so on, until * everybody exits. * * But suppose the workers which are supposed to connect to the queues to * which we're directly attached exit due to some error before they * actually attach the queues. The remaining workers will have no way of * knowing this. From their perspective, they're still waiting for those * workers to start, when in fact they've already died. */ on_dsm_detach(seg, cleanup_background_workers, PointerGetDatum(wstate)); /* Configure a worker. */ worker.bgw_flags = BGWORKER_SHMEM_ACCESS; worker.bgw_start_time = BgWorkerStart_ConsistentState; worker.bgw_restart_time = BGW_NEVER_RESTART; worker.bgw_main = NULL; /* new worker might not have library loaded */ sprintf(worker.bgw_library_name, "test_shm_mq"); sprintf(worker.bgw_function_name, "test_shm_mq_main"); snprintf(worker.bgw_name, BGW_MAXLEN, "test_shm_mq"); worker.bgw_main_arg = UInt32GetDatum(dsm_segment_handle(seg)); /* set bgw_notify_pid, so we can detect if the worker stops */ worker.bgw_notify_pid = MyProcPid; /* Register the workers. */ for (i = 0; i < nworkers; ++i) { if (!RegisterDynamicBackgroundWorker(&worker, &wstate->handle[i])) ereport(ERROR, (errcode(ERRCODE_INSUFFICIENT_RESOURCES), errmsg("could not register background process"), errhint("You may need to increase max_worker_processes."))); ++wstate->nworkers; } /* All done. */ MemoryContextSwitchTo(oldcontext); return wstate; }
/* * Returns command progress information for the named command. */ Datum pg_stat_get_progress_info(PG_FUNCTION_ARGS) { #define PG_STAT_GET_PROGRESS_COLS PGSTAT_NUM_PROGRESS_PARAM + 3 int num_backends = pgstat_fetch_stat_numbackends(); int curr_backend; char *cmd = text_to_cstring(PG_GETARG_TEXT_PP(0)); ProgressCommandType cmdtype; TupleDesc tupdesc; Tuplestorestate *tupstore; ReturnSetInfo *rsinfo = (ReturnSetInfo *) fcinfo->resultinfo; MemoryContext per_query_ctx; MemoryContext oldcontext; /* check to see if caller supports us returning a tuplestore */ if (rsinfo == NULL || !IsA(rsinfo, ReturnSetInfo)) ereport(ERROR, (errcode(ERRCODE_FEATURE_NOT_SUPPORTED), errmsg("set-valued function called in context that cannot accept a set"))); if (!(rsinfo->allowedModes & SFRM_Materialize)) ereport(ERROR, (errcode(ERRCODE_FEATURE_NOT_SUPPORTED), errmsg("materialize mode required, but it is not " \ "allowed in this context"))); /* Build a tuple descriptor for our result type */ if (get_call_result_type(fcinfo, NULL, &tupdesc) != TYPEFUNC_COMPOSITE) elog(ERROR, "return type must be a row type"); /* Translate command name into command type code. */ if (pg_strcasecmp(cmd, "VACUUM") == 0) cmdtype = PROGRESS_COMMAND_VACUUM; else ereport(ERROR, (errcode(ERRCODE_INVALID_PARAMETER_VALUE), errmsg("invalid command name: \"%s\"", cmd))); per_query_ctx = rsinfo->econtext->ecxt_per_query_memory; oldcontext = MemoryContextSwitchTo(per_query_ctx); tupstore = tuplestore_begin_heap(true, false, work_mem); rsinfo->returnMode = SFRM_Materialize; rsinfo->setResult = tupstore; rsinfo->setDesc = tupdesc; MemoryContextSwitchTo(oldcontext); /* 1-based index */ for (curr_backend = 1; curr_backend <= num_backends; curr_backend++) { LocalPgBackendStatus *local_beentry; PgBackendStatus *beentry; Datum values[PG_STAT_GET_PROGRESS_COLS]; bool nulls[PG_STAT_GET_PROGRESS_COLS]; int i; MemSet(values, 0, sizeof(values)); MemSet(nulls, 0, sizeof(nulls)); local_beentry = pgstat_fetch_stat_local_beentry(curr_backend); if (!local_beentry) continue; beentry = &local_beentry->backendStatus; /* * Report values for only those backends which are running the given * command. */ if (!beentry || beentry->st_progress_command != cmdtype) continue; /* Value available to all callers */ values[0] = Int32GetDatum(beentry->st_procpid); values[1] = ObjectIdGetDatum(beentry->st_databaseid); /* show rest of the values including relid only to role members */ if (has_privs_of_role(GetUserId(), beentry->st_userid)) { values[2] = ObjectIdGetDatum(beentry->st_progress_command_target); for(i = 0; i < PGSTAT_NUM_PROGRESS_PARAM; i++) values[i+3] = UInt32GetDatum(beentry->st_progress_param[i]); } else { nulls[2] = true; for (i = 0; i < PGSTAT_NUM_PROGRESS_PARAM; i++) nulls[i+3] = true; } tuplestore_putvalues(tupstore, tupdesc, values, nulls); } /* clean up and return the tuplestore */ tuplestore_donestoring(tupstore); return (Datum) 0; }
Datum gin_extract_jsonb_path(PG_FUNCTION_ARGS) { Jsonb *jb = PG_GETARG_JSONB(0); int32 *nentries = (int32 *) PG_GETARG_POINTER(1); int total = 2 * JB_ROOT_COUNT(jb); JsonbIterator *it; JsonbValue v; JsonbIteratorToken r; PathHashStack tail; PathHashStack *stack; int i = 0; Datum *entries; /* If the root level is empty, we certainly have no keys */ if (total == 0) { *nentries = 0; PG_RETURN_POINTER(NULL); } /* Otherwise, use 2 * root count as initial estimate of result size */ entries = (Datum *) palloc(sizeof(Datum) * total); /* We keep a stack of partial hashes corresponding to parent key levels */ tail.parent = NULL; tail.hash = 0; stack = &tail; it = JsonbIteratorInit(&jb->root); while ((r = JsonbIteratorNext(&it, &v, false)) != WJB_DONE) { PathHashStack *parent; /* Since we recurse into the object, we might need more space */ if (i >= total) { total *= 2; entries = (Datum *) repalloc(entries, sizeof(Datum) * total); } switch (r) { case WJB_BEGIN_ARRAY: case WJB_BEGIN_OBJECT: /* Push a stack level for this object */ parent = stack; stack = (PathHashStack *) palloc(sizeof(PathHashStack)); if (parent->parent) { /* * We pass forward hashes from previous container nesting * levels so that nested arrays with an outermost nested * object will have element hashes mixed with the * outermost key. It's also somewhat useful to have * nested objects' innermost values have hashes that are a * function of not just their own key, but outer keys too. * * Nesting an array within another array will not alter * innermost scalar element hash values, but that seems * inconsequential. */ stack->hash = parent->hash; } else { /* * At the outermost level, initialize hash with container * type proxy value. Note that this makes JB_FARRAY and * JB_FOBJECT part of the on-disk representation, but they * are that in the base jsonb object storage already. */ stack->hash = (r == WJB_BEGIN_ARRAY) ? JB_FARRAY : JB_FOBJECT; } stack->parent = parent; break; case WJB_KEY: /* initialize hash from parent */ stack->hash = stack->parent->hash; /* and mix in this key */ JsonbHashScalarValue(&v, &stack->hash); /* hash is now ready to incorporate the value */ break; case WJB_ELEM: /* array elements use parent hash mixed with element's hash */ stack->hash = stack->parent->hash; /* FALL THRU */ case WJB_VALUE: /* mix the element or value's hash into the prepared hash */ JsonbHashScalarValue(&v, &stack->hash); /* and emit an index entry */ entries[i++] = UInt32GetDatum(stack->hash); /* Note: we assume we'll see KEY before another VALUE */ break; case WJB_END_ARRAY: case WJB_END_OBJECT: /* Pop the stack */ parent = stack->parent; pfree(stack); stack = parent; break; default: elog(ERROR, "invalid JsonbIteratorNext rc: %d", (int) r); } } *nentries = i; PG_RETURN_POINTER(entries); }
/* * pgdatabasev - produce a view of gp_distributed_xacts to include transient state */ Datum gp_distributed_xacts__(PG_FUNCTION_ARGS) { FuncCallContext *funcctx; TMGALLXACTSTATUS *allDistributedXactStatus; if (SRF_IS_FIRSTCALL()) { TupleDesc tupdesc; MemoryContext oldcontext; /* create a function context for cross-call persistence */ funcctx = SRF_FIRSTCALL_INIT(); /* * switch to memory context appropriate for multiple function calls */ oldcontext = MemoryContextSwitchTo(funcctx->multi_call_memory_ctx); /* build tupdesc for result tuples */ /* this had better match gp_distributed_xacts view in system_views.sql */ tupdesc = CreateTemplateTupleDesc(5, false); TupleDescInitEntry(tupdesc, (AttrNumber) 1, "distributed_xid", XIDOID, -1, 0); TupleDescInitEntry(tupdesc, (AttrNumber) 2, "distributed_id", TEXTOID, -1, 0); TupleDescInitEntry(tupdesc, (AttrNumber) 3, "state", TEXTOID, -1, 0); TupleDescInitEntry(tupdesc, (AttrNumber) 4, "gp_session_id", INT4OID, -1, 0); TupleDescInitEntry(tupdesc, (AttrNumber) 5, "xmin_distributed_snapshot", XIDOID, -1, 0); funcctx->tuple_desc = BlessTupleDesc(tupdesc); /* * Collect all the locking information that we will format and send * out as a result set. */ getAllDistributedXactStatus(&allDistributedXactStatus); funcctx->user_fctx = (void *) allDistributedXactStatus; MemoryContextSwitchTo(oldcontext); } funcctx = SRF_PERCALL_SETUP(); allDistributedXactStatus = (TMGALLXACTSTATUS *) funcctx->user_fctx; while (true) { TMGXACTSTATUS *distributedXactStatus; Datum values[6]; bool nulls[6]; HeapTuple tuple; Datum result; if (!getNextDistributedXactStatus(allDistributedXactStatus, &distributedXactStatus)) break; /* * Form tuple with appropriate data. */ MemSet(values, 0, sizeof(values)); MemSet(nulls, false, sizeof(nulls)); values[0] = TransactionIdGetDatum(distributedXactStatus->gxid); values[1] = CStringGetTextDatum(distributedXactStatus->gid); values[2] = CStringGetTextDatum(DtxStateToString(distributedXactStatus->state)); values[3] = UInt32GetDatum(distributedXactStatus->sessionId); values[4] = TransactionIdGetDatum(distributedXactStatus->xminDistributedSnapshot); tuple = heap_form_tuple(funcctx->tuple_desc, values, nulls); result = HeapTupleGetDatum(tuple); SRF_RETURN_NEXT(funcctx, result); } SRF_RETURN_DONE(funcctx); }
/* * Compute an xlog file name and decimal byte offset given a WAL location, * such as is returned by pg_stop_backup() or pg_xlog_switch(). * * Note that a location exactly at a segment boundary is taken to be in * the previous segment. This is usually the right thing, since the * expected usage is to determine which xlog file(s) are ready to archive. */ Datum pg_xlogfile_name_offset(PG_FUNCTION_ARGS) { text *location = PG_GETARG_TEXT_P(0); char *locationstr; unsigned int uxlogid; unsigned int uxrecoff; uint32 xlogid; uint32 xlogseg; uint32 xrecoff; XLogRecPtr locationpoint; char xlogfilename[MAXFNAMELEN]; Datum values[2]; bool isnull[2]; TupleDesc resultTupleDesc; HeapTuple resultHeapTuple; Datum result; if (RecoveryInProgress()) ereport(ERROR, (errcode(ERRCODE_OBJECT_NOT_IN_PREREQUISITE_STATE), errmsg("recovery is in progress"), errhint("pg_xlogfile_name_offset() cannot be executed during recovery."))); /* * Read input and parse */ locationstr = text_to_cstring(location); if (sscanf(locationstr, "%X/%X", &uxlogid, &uxrecoff) != 2) ereport(ERROR, (errcode(ERRCODE_INVALID_PARAMETER_VALUE), errmsg("could not parse transaction log location \"%s\"", locationstr))); locationpoint.xlogid = uxlogid; locationpoint.xrecoff = uxrecoff; /* * Construct a tuple descriptor for the result row. This must match this * function's pg_proc entry! */ resultTupleDesc = CreateTemplateTupleDesc(2, false); TupleDescInitEntry(resultTupleDesc, (AttrNumber) 1, "file_name", TEXTOID, -1, 0); TupleDescInitEntry(resultTupleDesc, (AttrNumber) 2, "file_offset", INT4OID, -1, 0); resultTupleDesc = BlessTupleDesc(resultTupleDesc); /* * xlogfilename */ XLByteToPrevSeg(locationpoint, xlogid, xlogseg); XLogFileName(xlogfilename, ThisTimeLineID, xlogid, xlogseg); values[0] = CStringGetTextDatum(xlogfilename); isnull[0] = false; /* * offset */ xrecoff = locationpoint.xrecoff - xlogseg * XLogSegSize; values[1] = UInt32GetDatum(xrecoff); isnull[1] = false; /* * Tuple jam: Having first prepared your Datums, then squash together */ resultHeapTuple = heap_form_tuple(resultTupleDesc, values, isnull); result = HeapTupleGetDatum(resultHeapTuple); PG_RETURN_DATUM(result); }
/* * pg_lock_status - produce a view with one row per held or awaited lock mode */ Datum pg_lock_status(PG_FUNCTION_ARGS) { FuncCallContext *funcctx; PG_Lock_Status *mystatus; LockData *lockData; if (SRF_IS_FIRSTCALL()) { TupleDesc tupdesc; MemoryContext oldcontext; /* create a function context for cross-call persistence */ funcctx = SRF_FIRSTCALL_INIT(); /* * switch to memory context appropriate for multiple function calls */ oldcontext = MemoryContextSwitchTo(funcctx->multi_call_memory_ctx); /* build tupdesc for result tuples */ /* this had better match pg_locks view in system_views.sql */ tupdesc = CreateTemplateTupleDesc(16, false); TupleDescInitEntry(tupdesc, (AttrNumber) 1, "locktype", TEXTOID, -1, 0); TupleDescInitEntry(tupdesc, (AttrNumber) 2, "database", OIDOID, -1, 0); TupleDescInitEntry(tupdesc, (AttrNumber) 3, "relation", OIDOID, -1, 0); TupleDescInitEntry(tupdesc, (AttrNumber) 4, "page", INT4OID, -1, 0); TupleDescInitEntry(tupdesc, (AttrNumber) 5, "tuple", INT2OID, -1, 0); TupleDescInitEntry(tupdesc, (AttrNumber) 6, "transactionid", XIDOID, -1, 0); TupleDescInitEntry(tupdesc, (AttrNumber) 7, "classid", OIDOID, -1, 0); TupleDescInitEntry(tupdesc, (AttrNumber) 8, "objid", OIDOID, -1, 0); TupleDescInitEntry(tupdesc, (AttrNumber) 9, "objsubid", INT2OID, -1, 0); TupleDescInitEntry(tupdesc, (AttrNumber) 10, "transaction", XIDOID, -1, 0); TupleDescInitEntry(tupdesc, (AttrNumber) 11, "pid", INT4OID, -1, 0); TupleDescInitEntry(tupdesc, (AttrNumber) 12, "mode", TEXTOID, -1, 0); TupleDescInitEntry(tupdesc, (AttrNumber) 13, "granted", BOOLOID, -1, 0); /* * These next columns are specific to GPDB */ TupleDescInitEntry(tupdesc, (AttrNumber) 14, "mppSessionId", INT4OID, -1, 0); TupleDescInitEntry(tupdesc, (AttrNumber) 15, "mppIsWriter", BOOLOID, -1, 0); TupleDescInitEntry(tupdesc, (AttrNumber) 16, "gp_segment_id", INT4OID, -1, 0); funcctx->tuple_desc = BlessTupleDesc(tupdesc); /* * Collect all the locking information that we will format and send * out as a result set. */ mystatus = (PG_Lock_Status *) palloc(sizeof(PG_Lock_Status)); funcctx->user_fctx = (void *) mystatus; mystatus->lockData = GetLockStatusData(); mystatus->currIdx = 0; mystatus->numSegLocks = 0; mystatus->numsegresults = 0; mystatus->segresults = NULL; /* * Seeing the locks just from the masterDB isn't enough to know what is locked, * or if there is a deadlock. That's because the segDBs also take locks. * Some locks show up only on the master, some only on the segDBs, and some on both. * * So, let's collect the lock information from all the segDBs. Sure, this means * there are a lot more rows coming back from pg_locks than before, since most locks * on the segDBs happen across all the segDBs at the same time. But not always, * so let's play it safe and get them all. */ if (Gp_role == GP_ROLE_DISPATCH) { int resultCount = 0; struct pg_result **results = NULL; StringInfoData buffer; StringInfoData errbuf; int i; initStringInfo(&buffer); /* * This query has to match the tupledesc we just made above. */ appendStringInfo(&buffer, "SELECT * FROM pg_lock_status() L " " (locktype text, database oid, relation oid, page int4, tuple int2," " transactionid xid, classid oid, objid oid, objsubid int2," " transaction xid, pid int4, mode text, granted boolean, " " mppSessionId int4, mppIsWriter boolean, gp_segment_id int4) "); initStringInfo(&errbuf); /* * Why dispatch something here, rather than do a UNION ALL in pg_locks view, and * a join to gp_dist_random('gp_id')? There are several important reasons. * * The union all method is much slower, and requires taking locks on gp_id. * More importantly, applications such as pgAdmin do queries of this view that * involve a correlated subqueries joining to other catalog tables, * which works if we do it this way, but fails * if the view includes the union all. That completely breaks the server status * display in pgAdmin. * * Why dispatch this way, rather than via SPI? There are several advantages. * First, it's easy to get "writer gang is busy" errors if we use SPI. * * Second, this should be much faster, as it doesn't require setting up * the interconnect, and doesn't need to touch any actual data tables to be * able to get the gp_segment_id. * * The downside is we get n result sets, where n == number of segDBs. * * It would be better yet if we sent a plan tree rather than a text string, * so the segDBs don't need to parse it. That would also avoid taking any relation locks * on the segDB to get this info (normally need to get an accessShareLock on pg_locks on the segDB * to make sure it doesn't go away during parsing). But the only safe way I know to do this * is to hand-build the plan tree, and I'm to lazy to do it right now. It's just a matter of * building a function scan node, and filling it in with our result set info (from the tupledesc). * * One thing to note: it's OK to join pg_locks with any catalog table or master-only table, * but joining to a distributed table will result in "writer gang busy: possible attempt to * execute volatile function in unsupported context" errors, because * the scan of the distributed table might already be running on the writer gang * when we want to dispatch this. * * This could be fixed by allocating a reader gang and dispatching to that, but the cost * of setting up a new gang is high, and I've never seen anyone need to join this to a * distributed table. * */ results = cdbdisp_dispatchRMCommand(buffer.data, true, &errbuf, &resultCount); if (errbuf.len > 0) ereport(ERROR, (errmsg("pg_lock internal error (gathered %d results from cmd '%s')", resultCount, buffer.data), errdetail("%s", errbuf.data))); /* * I don't think resultCount can ever be zero if errbuf isn't set. * But check to be sure. */ if (resultCount == 0) elog(ERROR, "pg_locks didn't get back any data from the segDBs"); for (i = 0; i < resultCount; i++) { /* * Any error here should have propagated into errbuf, so we shouldn't * ever see anything other that tuples_ok here. But, check to be * sure. */ if (PQresultStatus(results[i]) != PGRES_TUPLES_OK) { elog(ERROR,"pg_locks: resultStatus not tuples_Ok"); } else { /* * numSegLocks needs to be the total size we are returning to * the application. At the start of this loop, it has the count * for the masterDB locks. Add each of the segDB lock counts. */ mystatus->numSegLocks += PQntuples(results[i]); } } pfree(errbuf.data); mystatus->numsegresults = resultCount; /* * cdbdisp_dispatchRMCommand copies the result sets into our memory, which * will still exist on the subsequent calls. */ mystatus->segresults = results; MemoryContextSwitchTo(oldcontext); } } funcctx = SRF_PERCALL_SETUP(); mystatus = (PG_Lock_Status *) funcctx->user_fctx; lockData = mystatus->lockData; /* * This loop returns all the local lock data from the segment we are running on. */ while (mystatus->currIdx < lockData->nelements) { PROCLOCK *proclock; LOCK *lock; PGPROC *proc; bool granted; LOCKMODE mode = 0; const char *locktypename; char tnbuf[32]; Datum values[16]; bool nulls[16]; HeapTuple tuple; Datum result; proclock = &(lockData->proclocks[mystatus->currIdx]); lock = &(lockData->locks[mystatus->currIdx]); proc = &(lockData->procs[mystatus->currIdx]); /* * Look to see if there are any held lock modes in this PROCLOCK. If * so, report, and destructively modify lockData so we don't report * again. */ granted = false; if (proclock->holdMask) { for (mode = 0; mode < MAX_LOCKMODES; mode++) { if (proclock->holdMask & LOCKBIT_ON(mode)) { granted = true; proclock->holdMask &= LOCKBIT_OFF(mode); break; } } } /* * If no (more) held modes to report, see if PROC is waiting for a * lock on this lock. */ if (!granted) { if (proc->waitLock == proclock->tag.myLock) { /* Yes, so report it with proper mode */ mode = proc->waitLockMode; /* * We are now done with this PROCLOCK, so advance pointer to * continue with next one on next call. */ mystatus->currIdx++; } else { /* * Okay, we've displayed all the locks associated with this * PROCLOCK, proceed to the next one. */ mystatus->currIdx++; continue; } } /* * Form tuple with appropriate data. */ MemSet(values, 0, sizeof(values)); MemSet(nulls, false, sizeof(nulls)); if (lock->tag.locktag_type <= LOCKTAG_ADVISORY) locktypename = LockTagTypeNames[lock->tag.locktag_type]; else { snprintf(tnbuf, sizeof(tnbuf), "unknown %d", (int) lock->tag.locktag_type); locktypename = tnbuf; } values[0] = CStringGetTextDatum(locktypename); switch (lock->tag.locktag_type) { case LOCKTAG_RELATION: case LOCKTAG_RELATION_EXTEND: case LOCKTAG_RELATION_RESYNCHRONIZE: values[1] = ObjectIdGetDatum(lock->tag.locktag_field1); values[2] = ObjectIdGetDatum(lock->tag.locktag_field2); nulls[3] = true; nulls[4] = true; nulls[5] = true; nulls[6] = true; nulls[7] = true; nulls[8] = true; break; case LOCKTAG_PAGE: values[1] = ObjectIdGetDatum(lock->tag.locktag_field1); values[2] = ObjectIdGetDatum(lock->tag.locktag_field2); values[3] = UInt32GetDatum(lock->tag.locktag_field3); nulls[4] = true; nulls[5] = true; nulls[6] = true; nulls[7] = true; nulls[8] = true; break; case LOCKTAG_TUPLE: values[1] = ObjectIdGetDatum(lock->tag.locktag_field1); values[2] = ObjectIdGetDatum(lock->tag.locktag_field2); values[3] = UInt32GetDatum(lock->tag.locktag_field3); values[4] = UInt16GetDatum(lock->tag.locktag_field4); nulls[5] = true; nulls[6] = true; nulls[7] = true; nulls[8] = true; break; case LOCKTAG_TRANSACTION: values[5] = TransactionIdGetDatum(lock->tag.locktag_field1); nulls[1] = true; nulls[2] = true; nulls[3] = true; nulls[4] = true; nulls[6] = true; nulls[7] = true; nulls[8] = true; break; case LOCKTAG_RELATION_APPENDONLY_SEGMENT_FILE: values[1] = ObjectIdGetDatum(lock->tag.locktag_field1); values[2] = ObjectIdGetDatum(lock->tag.locktag_field2); values[7] = ObjectIdGetDatum(lock->tag.locktag_field3); nulls[3] = true; nulls[4] = true; nulls[5] = true; nulls[6] = true; nulls[8] = true; break; case LOCKTAG_RESOURCE_QUEUE: values[1] = ObjectIdGetDatum(proc->databaseId); values[7] = ObjectIdGetDatum(lock->tag.locktag_field1); nulls[2] = true; nulls[3] = true; nulls[4] = true; nulls[5] = true; nulls[6] = true; nulls[8] = true; break; case LOCKTAG_OBJECT: case LOCKTAG_USERLOCK: case LOCKTAG_ADVISORY: default: /* treat unknown locktags like OBJECT */ values[1] = ObjectIdGetDatum(lock->tag.locktag_field1); values[6] = ObjectIdGetDatum(lock->tag.locktag_field2); values[7] = ObjectIdGetDatum(lock->tag.locktag_field3); values[8] = Int16GetDatum(lock->tag.locktag_field4); nulls[2] = true; nulls[3] = true; nulls[4] = true; nulls[5] = true; break; } values[9] = TransactionIdGetDatum(proc->xid); if (proc->pid != 0) values[10] = Int32GetDatum(proc->pid); else nulls[10] = true; values[11] = DirectFunctionCall1(textin, CStringGetDatum((char *) GetLockmodeName(LOCK_LOCKMETHOD(*lock), mode))); values[12] = BoolGetDatum(granted); values[13] = Int32GetDatum(proc->mppSessionId); values[14] = Int32GetDatum(proc->mppIsWriter); values[15] = Int32GetDatum(Gp_segment); tuple = heap_form_tuple(funcctx->tuple_desc, values, nulls); result = HeapTupleGetDatum(tuple); SRF_RETURN_NEXT(funcctx, result); } /* * This loop only executes on the masterDB and only in dispatch mode, because that * is the only time we dispatched to the segDBs. */ while (mystatus->currIdx >= lockData->nelements && mystatus->currIdx < lockData->nelements + mystatus->numSegLocks) { HeapTuple tuple; Datum result; Datum values[16]; bool nulls[16]; int i; int whichresultset = 0; int whichelement = mystatus->currIdx - lockData->nelements; int whichrow = whichelement; Assert(Gp_role == GP_ROLE_DISPATCH); /* * Because we have one result set per segDB (rather than one big result set with everything), * we need to figure out which result set we are on, and which row within that result set * we are returning. * * So, we walk through all the result sets and all the rows in each one, in order. */ while(whichrow >= PQntuples(mystatus->segresults[whichresultset])) { whichrow -= PQntuples(mystatus->segresults[whichresultset]); whichresultset++; if (whichresultset >= mystatus->numsegresults) break; } /* * If this condition is true, we have already sent everything back, * and we just want to do the SRF_RETURN_DONE */ if (whichresultset >= mystatus->numsegresults) break; mystatus->currIdx++; /* * Form tuple with appropriate data we got from the segDBs */ MemSet(values, 0, sizeof(values)); MemSet(nulls, false, sizeof(nulls)); /* * For each column, extract out the value (which comes out in text). * Convert it to the appropriate datatype to match our tupledesc, * and put that in values. * The columns look like this (from select statement earlier): * * " (locktype text, database oid, relation oid, page int4, tuple int2," * " transactionid xid, classid oid, objid oid, objsubid int2," * " transaction xid, pid int4, mode text, granted boolean, " * " mppSessionId int4, mppIsWriter boolean, gp_segment_id int4) ," */ values[0] = CStringGetTextDatum(PQgetvalue(mystatus->segresults[whichresultset], whichrow, 0)); values[1] = ObjectIdGetDatum(atoi(PQgetvalue(mystatus->segresults[whichresultset], whichrow, 1))); values[2] = ObjectIdGetDatum(atoi(PQgetvalue(mystatus->segresults[whichresultset], whichrow, 2))); values[3] = UInt32GetDatum(atoi(PQgetvalue(mystatus->segresults[whichresultset], whichrow, 3))); values[4] = UInt16GetDatum(atoi(PQgetvalue(mystatus->segresults[whichresultset], whichrow, 4))); values[5] = TransactionIdGetDatum(atoi(PQgetvalue(mystatus->segresults[whichresultset], whichrow, 5))); values[6] = ObjectIdGetDatum(atoi(PQgetvalue(mystatus->segresults[whichresultset], whichrow, 6))); values[7] = ObjectIdGetDatum(atoi(PQgetvalue(mystatus->segresults[whichresultset], whichrow, 7))); values[8] = UInt16GetDatum(atoi(PQgetvalue(mystatus->segresults[whichresultset], whichrow, 8))); values[9] = TransactionIdGetDatum(atoi(PQgetvalue(mystatus->segresults[whichresultset], whichrow, 9))); values[10] = UInt32GetDatum(atoi(PQgetvalue(mystatus->segresults[whichresultset], whichrow,10))); values[11] = CStringGetTextDatum(PQgetvalue(mystatus->segresults[whichresultset], whichrow,11)); values[12] = BoolGetDatum(strncmp(PQgetvalue(mystatus->segresults[whichresultset], whichrow,12),"t",1)==0); values[13] = Int32GetDatum(atoi(PQgetvalue(mystatus->segresults[whichresultset], whichrow,13))); values[14] = BoolGetDatum(strncmp(PQgetvalue(mystatus->segresults[whichresultset], whichrow,14),"t",1)==0); values[15] = Int32GetDatum(atoi(PQgetvalue(mystatus->segresults[whichresultset], whichrow,15))); /* * Copy the null info over. It should all match properly. */ for (i=0; i<16; i++) { nulls[i] = PQgetisnull(mystatus->segresults[whichresultset], whichrow, i); } tuple = heap_form_tuple(funcctx->tuple_desc, values, nulls); result = HeapTupleGetDatum(tuple); SRF_RETURN_NEXT(funcctx, result); } /* * if we dispatched to the segDBs, free up the memory holding the result sets. * Otherwise we might leak this memory each time we got called (does it automatically * get freed by the pool being deleted? Probably, but this is safer). */ if (mystatus->segresults != NULL) { int i; for (i = 0; i < mystatus->numsegresults; i++) PQclear(mystatus->segresults[i]); free(mystatus->segresults); } SRF_RETURN_DONE(funcctx); }
/* * pg_lock_status - produce a view with one row per held or awaited lock mode */ Datum pg_lock_status(PG_FUNCTION_ARGS) { FuncCallContext *funcctx; PG_Lock_Status *mystatus; LockData *lockData; if (SRF_IS_FIRSTCALL()) { TupleDesc tupdesc; MemoryContext oldcontext; /* create a function context for cross-call persistence */ funcctx = SRF_FIRSTCALL_INIT(); /* * switch to memory context appropriate for multiple function calls */ oldcontext = MemoryContextSwitchTo(funcctx->multi_call_memory_ctx); /* build tupdesc for result tuples */ /* this had better match pg_locks view in system_views.sql */ tupdesc = CreateTemplateTupleDesc(16, false); TupleDescInitEntry(tupdesc, (AttrNumber) 1, "locktype", TEXTOID, -1, 0); TupleDescInitEntry(tupdesc, (AttrNumber) 2, "database", OIDOID, -1, 0); TupleDescInitEntry(tupdesc, (AttrNumber) 3, "relation", OIDOID, -1, 0); TupleDescInitEntry(tupdesc, (AttrNumber) 4, "page", INT4OID, -1, 0); TupleDescInitEntry(tupdesc, (AttrNumber) 5, "tuple", INT2OID, -1, 0); TupleDescInitEntry(tupdesc, (AttrNumber) 6, "transactionid", XIDOID, -1, 0); TupleDescInitEntry(tupdesc, (AttrNumber) 7, "classid", OIDOID, -1, 0); TupleDescInitEntry(tupdesc, (AttrNumber) 8, "objid", OIDOID, -1, 0); TupleDescInitEntry(tupdesc, (AttrNumber) 9, "objsubid", INT2OID, -1, 0); TupleDescInitEntry(tupdesc, (AttrNumber) 10, "transaction", XIDOID, -1, 0); TupleDescInitEntry(tupdesc, (AttrNumber) 11, "pid", INT4OID, -1, 0); TupleDescInitEntry(tupdesc, (AttrNumber) 12, "mode", TEXTOID, -1, 0); TupleDescInitEntry(tupdesc, (AttrNumber) 13, "granted", BOOLOID, -1, 0); /* * These next columns are specific to GPDB */ TupleDescInitEntry(tupdesc, (AttrNumber) 14, "mppSessionId", INT4OID, -1, 0); TupleDescInitEntry(tupdesc, (AttrNumber) 15, "mppIsWriter", BOOLOID, -1, 0); TupleDescInitEntry(tupdesc, (AttrNumber) 16, "gp_segment_id", INT4OID, -1, 0); funcctx->tuple_desc = BlessTupleDesc(tupdesc); /* * Collect all the locking information that we will format and send * out as a result set. */ mystatus = (PG_Lock_Status *) palloc(sizeof(PG_Lock_Status)); funcctx->user_fctx = (void *) mystatus; mystatus->lockData = GetLockStatusData(); mystatus->currIdx = 0; mystatus->numSegLocks = 0; mystatus->numsegresults = 0; mystatus->segresults = NULL; } funcctx = SRF_PERCALL_SETUP(); mystatus = (PG_Lock_Status *) funcctx->user_fctx; lockData = mystatus->lockData; /* * This loop returns all the local lock data from the segment we are running on. */ while (mystatus->currIdx < lockData->nelements) { PROCLOCK *proclock; LOCK *lock; PGPROC *proc; bool granted; LOCKMODE mode = 0; const char *locktypename; char tnbuf[32]; Datum values[16]; bool nulls[16]; HeapTuple tuple; Datum result; proclock = &(lockData->proclocks[mystatus->currIdx]); lock = &(lockData->locks[mystatus->currIdx]); proc = &(lockData->procs[mystatus->currIdx]); /* * Look to see if there are any held lock modes in this PROCLOCK. If * so, report, and destructively modify lockData so we don't report * again. */ granted = false; if (proclock->holdMask) { for (mode = 0; mode < MAX_LOCKMODES; mode++) { if (proclock->holdMask & LOCKBIT_ON(mode)) { granted = true; proclock->holdMask &= LOCKBIT_OFF(mode); break; } } } /* * If no (more) held modes to report, see if PROC is waiting for a * lock on this lock. */ if (!granted) { if (proc->waitLock == proclock->tag.myLock) { /* Yes, so report it with proper mode */ mode = proc->waitLockMode; /* * We are now done with this PROCLOCK, so advance pointer to * continue with next one on next call. */ mystatus->currIdx++; } else { /* * Okay, we've displayed all the locks associated with this * PROCLOCK, proceed to the next one. */ mystatus->currIdx++; continue; } } /* * Form tuple with appropriate data. */ MemSet(values, 0, sizeof(values)); MemSet(nulls, false, sizeof(nulls)); if (lock->tag.locktag_type <= LOCKTAG_ADVISORY) locktypename = LockTagTypeNames[lock->tag.locktag_type]; else { snprintf(tnbuf, sizeof(tnbuf), "unknown %d", (int) lock->tag.locktag_type); locktypename = tnbuf; } values[0] = CStringGetTextDatum(locktypename); switch (lock->tag.locktag_type) { case LOCKTAG_RELATION: case LOCKTAG_RELATION_EXTEND: case LOCKTAG_RELATION_RESYNCHRONIZE: values[1] = ObjectIdGetDatum(lock->tag.locktag_field1); values[2] = ObjectIdGetDatum(lock->tag.locktag_field2); nulls[3] = true; nulls[4] = true; nulls[5] = true; nulls[6] = true; nulls[7] = true; nulls[8] = true; break; case LOCKTAG_PAGE: values[1] = ObjectIdGetDatum(lock->tag.locktag_field1); values[2] = ObjectIdGetDatum(lock->tag.locktag_field2); values[3] = UInt32GetDatum(lock->tag.locktag_field3); nulls[4] = true; nulls[5] = true; nulls[6] = true; nulls[7] = true; nulls[8] = true; break; case LOCKTAG_TUPLE: values[1] = ObjectIdGetDatum(lock->tag.locktag_field1); values[2] = ObjectIdGetDatum(lock->tag.locktag_field2); values[3] = UInt32GetDatum(lock->tag.locktag_field3); values[4] = UInt16GetDatum(lock->tag.locktag_field4); nulls[5] = true; nulls[6] = true; nulls[7] = true; nulls[8] = true; break; case LOCKTAG_TRANSACTION: values[5] = TransactionIdGetDatum(lock->tag.locktag_field1); nulls[1] = true; nulls[2] = true; nulls[3] = true; nulls[4] = true; nulls[6] = true; nulls[7] = true; nulls[8] = true; break; case LOCKTAG_RELATION_APPENDONLY_SEGMENT_FILE: values[1] = ObjectIdGetDatum(lock->tag.locktag_field1); values[2] = ObjectIdGetDatum(lock->tag.locktag_field2); values[7] = ObjectIdGetDatum(lock->tag.locktag_field3); nulls[3] = true; nulls[4] = true; nulls[5] = true; nulls[6] = true; nulls[8] = true; break; case LOCKTAG_RESOURCE_QUEUE: values[1] = ObjectIdGetDatum(proc->databaseId); values[7] = ObjectIdGetDatum(lock->tag.locktag_field1); nulls[2] = true; nulls[3] = true; nulls[4] = true; nulls[5] = true; nulls[6] = true; nulls[8] = true; break; case LOCKTAG_OBJECT: case LOCKTAG_USERLOCK: case LOCKTAG_ADVISORY: default: /* treat unknown locktags like OBJECT */ values[1] = ObjectIdGetDatum(lock->tag.locktag_field1); values[6] = ObjectIdGetDatum(lock->tag.locktag_field2); values[7] = ObjectIdGetDatum(lock->tag.locktag_field3); values[8] = Int16GetDatum(lock->tag.locktag_field4); nulls[2] = true; nulls[3] = true; nulls[4] = true; nulls[5] = true; break; } values[9] = TransactionIdGetDatum(proc->xid); if (proc->pid != 0) values[10] = Int32GetDatum(proc->pid); else nulls[10] = true; values[11] = DirectFunctionCall1(textin, CStringGetDatum((char *) GetLockmodeName(LOCK_LOCKMETHOD(*lock), mode))); values[12] = BoolGetDatum(granted); values[13] = Int32GetDatum(proc->mppSessionId); values[14] = Int32GetDatum(proc->mppIsWriter); values[15] = Int32GetDatum(Gp_segment); tuple = heap_form_tuple(funcctx->tuple_desc, values, nulls); result = HeapTupleGetDatum(tuple); SRF_RETURN_NEXT(funcctx, result); } /* * This loop only executes on the masterDB and only in dispatch mode, because that * is the only time we dispatched to the segDBs. */ while (mystatus->currIdx >= lockData->nelements && mystatus->currIdx < lockData->nelements + mystatus->numSegLocks) { HeapTuple tuple; Datum result; Datum values[16]; bool nulls[16]; int i; int whichresultset = 0; int whichelement = mystatus->currIdx - lockData->nelements; int whichrow = whichelement; Assert(Gp_role == GP_ROLE_DISPATCH); /* * Because we have one result set per segDB (rather than one big result set with everything), * we need to figure out which result set we are on, and which row within that result set * we are returning. * * So, we walk through all the result sets and all the rows in each one, in order. */ while(whichrow >= PQntuples(mystatus->segresults[whichresultset])) { whichrow -= PQntuples(mystatus->segresults[whichresultset]); whichresultset++; if (whichresultset >= mystatus->numsegresults) break; } /* * If this condition is true, we have already sent everything back, * and we just want to do the SRF_RETURN_DONE */ if (whichresultset >= mystatus->numsegresults) break; mystatus->currIdx++; /* * Form tuple with appropriate data we got from the segDBs */ MemSet(values, 0, sizeof(values)); MemSet(nulls, false, sizeof(nulls)); /* * For each column, extract out the value (which comes out in text). * Convert it to the appropriate datatype to match our tupledesc, * and put that in values. * The columns look like this (from select statement earlier): * * " (locktype text, database oid, relation oid, page int4, tuple int2," * " transactionid xid, classid oid, objid oid, objsubid int2," * " transaction xid, pid int4, mode text, granted boolean, " * " mppSessionId int4, mppIsWriter boolean, gp_segment_id int4) ," */ values[0] = CStringGetTextDatum(PQgetvalue(mystatus->segresults[whichresultset], whichrow, 0)); values[1] = ObjectIdGetDatum(atoi(PQgetvalue(mystatus->segresults[whichresultset], whichrow, 1))); values[2] = ObjectIdGetDatum(atoi(PQgetvalue(mystatus->segresults[whichresultset], whichrow, 2))); values[3] = UInt32GetDatum(atoi(PQgetvalue(mystatus->segresults[whichresultset], whichrow, 3))); values[4] = UInt16GetDatum(atoi(PQgetvalue(mystatus->segresults[whichresultset], whichrow, 4))); values[5] = TransactionIdGetDatum(atoi(PQgetvalue(mystatus->segresults[whichresultset], whichrow, 5))); values[6] = ObjectIdGetDatum(atoi(PQgetvalue(mystatus->segresults[whichresultset], whichrow, 6))); values[7] = ObjectIdGetDatum(atoi(PQgetvalue(mystatus->segresults[whichresultset], whichrow, 7))); values[8] = UInt16GetDatum(atoi(PQgetvalue(mystatus->segresults[whichresultset], whichrow, 8))); values[9] = TransactionIdGetDatum(atoi(PQgetvalue(mystatus->segresults[whichresultset], whichrow, 9))); values[10] = UInt32GetDatum(atoi(PQgetvalue(mystatus->segresults[whichresultset], whichrow,10))); values[11] = CStringGetTextDatum(PQgetvalue(mystatus->segresults[whichresultset], whichrow,11)); values[12] = BoolGetDatum(strncmp(PQgetvalue(mystatus->segresults[whichresultset], whichrow,12),"t",1)==0); values[13] = Int32GetDatum(atoi(PQgetvalue(mystatus->segresults[whichresultset], whichrow,13))); values[14] = BoolGetDatum(strncmp(PQgetvalue(mystatus->segresults[whichresultset], whichrow,14),"t",1)==0); values[15] = Int32GetDatum(atoi(PQgetvalue(mystatus->segresults[whichresultset], whichrow,15))); /* * Copy the null info over. It should all match properly. */ for (i=0; i<16; i++) { nulls[i] = PQgetisnull(mystatus->segresults[whichresultset], whichrow, i); } tuple = heap_form_tuple(funcctx->tuple_desc, values, nulls); result = HeapTupleGetDatum(tuple); SRF_RETURN_NEXT(funcctx, result); } /* * if we dispatched to the segDBs, free up the memory holding the result sets. * Otherwise we might leak this memory each time we got called (does it automatically * get freed by the pool being deleted? Probably, but this is safer). */ if (mystatus->segresults != NULL) { int i; for (i = 0; i < mystatus->numsegresults; i++) PQclear(mystatus->segresults[i]); free(mystatus->segresults); } SRF_RETURN_DONE(funcctx); }
/* * Function returning all workfile cache entries for one segment */ Datum gp_workfile_mgr_cache_entries(PG_FUNCTION_ARGS) { FuncCallContext *funcctx; int32 *crtIndexPtr; if (SRF_IS_FIRSTCALL()) { /* create a function context for cross-call persistence */ funcctx = SRF_FIRSTCALL_INIT(); /* Switch to memory context appropriate for multiple function calls */ MemoryContext oldcontext = MemoryContextSwitchTo(funcctx->multi_call_memory_ctx); /* * Build a tuple descriptor for our result type * The number and type of attributes have to match the definition of the * view gp_workfile_mgr_cache_entries */ TupleDesc tupdesc = CreateTemplateTupleDesc(NUM_CACHE_ENTRIES_ELEM, false); Assert(NUM_CACHE_ENTRIES_ELEM == 12); TupleDescInitEntry(tupdesc, (AttrNumber) 1, "segid", INT4OID, -1 /* typmod */, 0 /* attdim */); TupleDescInitEntry(tupdesc, (AttrNumber) 2, "path", TEXTOID, -1 /* typmod */, 0 /* attdim */); TupleDescInitEntry(tupdesc, (AttrNumber) 3, "hash", INT4OID, -1 /* typmod */, 0 /* attdim */); TupleDescInitEntry(tupdesc, (AttrNumber) 4, "size", INT8OID, -1 /* typmod */, 0 /* attdim */); TupleDescInitEntry(tupdesc, (AttrNumber) 5, "state", INT4OID, -1 /* typmod */, 0 /* attdim */); TupleDescInitEntry(tupdesc, (AttrNumber) 6, "workmem", INT4OID, -1 /* typmod */, 0 /* attdim */); TupleDescInitEntry(tupdesc, (AttrNumber) 7, "optype", TEXTOID, -1 /* typmod */, 0 /* attdim */); TupleDescInitEntry(tupdesc, (AttrNumber) 8, "slice", INT4OID, -1 /* typmod */, 0 /* attdim */); TupleDescInitEntry(tupdesc, (AttrNumber) 9, "sessionid", INT4OID, -1 /* typmod */, 0 /* attdim */); TupleDescInitEntry(tupdesc, (AttrNumber) 10, "commandid", INT4OID, -1 /* typmod */, 0 /* attdim */); TupleDescInitEntry(tupdesc, (AttrNumber) 11, "query_start", TIMESTAMPTZOID, -1, 0); TupleDescInitEntry(tupdesc, (AttrNumber) 12, "numfiles", INT4OID, -1 /* typmod */, 0 /* attdim */); funcctx->tuple_desc = BlessTupleDesc(tupdesc); crtIndexPtr = (int32 *) palloc(sizeof(*crtIndexPtr)); *crtIndexPtr = 0; funcctx->user_fctx = crtIndexPtr; MemoryContextSwitchTo(oldcontext); } Cache *cache = workfile_mgr_get_cache(); funcctx = SRF_PERCALL_SETUP(); crtIndexPtr = (int32 *) funcctx->user_fctx; while (true) { CacheEntry *crtEntry = next_entry_to_list(cache, crtIndexPtr); if (!crtEntry) { /* Reached the end of the entry array, we're done */ SRF_RETURN_DONE(funcctx); } Datum values[NUM_CACHE_ENTRIES_ELEM]; bool nulls[NUM_CACHE_ENTRIES_ELEM]; MemSet(nulls, 0, sizeof(nulls)); workfile_set *work_set = CACHE_ENTRY_PAYLOAD(crtEntry); char work_set_path[MAXPGPATH] = ""; char *work_set_operator_name = NULL; /* * Lock entry in order to read its payload * Don't call any functions that can get interrupted or * that palloc memory while holding this lock. */ Cache_LockEntry(cache, crtEntry); if (!should_list_entry(crtEntry)) { Cache_UnlockEntry(cache, crtEntry); continue; } values[0] = Int32GetDatum(GpIdentity.segindex); strlcpy(work_set_path, work_set->path, MAXPGPATH); values[2] = UInt32GetDatum(crtEntry->hashvalue); int64 work_set_size = work_set->size; if (crtEntry->state == CACHE_ENTRY_ACQUIRED) { /* * work_set->size is not updated until the entry is cached. * For in-progress queries, the up-to-date size is stored in * work_set->in_progress_size. */ work_set_size = work_set->in_progress_size; } values[3] = Int64GetDatum(work_set_size); values[4] = UInt32GetDatum(crtEntry->state); values[5] = UInt32GetDatum(work_set->metadata.operator_work_mem); work_set_operator_name = gp_workfile_operator_name(work_set->node_type); values[7] = UInt32GetDatum(work_set->slice_id); values[8] = UInt32GetDatum(work_set->session_id); values[9] = UInt32GetDatum(work_set->command_count); values[10] = TimestampTzGetDatum(work_set->session_start_time); values[11] = UInt32GetDatum(work_set->no_files); /* Done reading from the payload of the entry, release lock */ Cache_UnlockEntry(cache, crtEntry); /* * Fill in the rest of the entries of the tuple with data copied * from the descriptor. * CStringGetTextDatum calls palloc so we cannot do this while * holding the lock above. */ values[1] = CStringGetTextDatum(work_set_path); values[6] = CStringGetTextDatum(work_set_operator_name); HeapTuple tuple = heap_form_tuple(funcctx->tuple_desc, values, nulls); Datum result = HeapTupleGetDatum(tuple); SRF_RETURN_NEXT(funcctx, result); } }
/* ------------------------------------------------------ * pgstatginindex() * * Usage: SELECT * FROM pgstatginindex('ginindex'); * ------------------------------------------------------ */ Datum pgstatginindex(PG_FUNCTION_ARGS) { Oid relid = PG_GETARG_OID(0); Relation rel; Buffer buffer; Page page; GinMetaPageData *metadata; GinIndexStat stats; HeapTuple tuple; TupleDesc tupleDesc; Datum values[3]; bool nulls[3] = {false, false, false}; Datum result; if (!superuser()) ereport(ERROR, (errcode(ERRCODE_INSUFFICIENT_PRIVILEGE), (errmsg("must be superuser to use pgstattuple functions")))); rel = relation_open(relid, AccessShareLock); if (!IS_INDEX(rel) || !IS_GIN(rel)) elog(ERROR, "relation \"%s\" is not a GIN index", RelationGetRelationName(rel)); /* * Reject attempts to read non-local temporary relations; we would be * likely to get wrong data since we have no visibility into the owning * session's local buffers. */ if (RELATION_IS_OTHER_TEMP(rel)) ereport(ERROR, (errcode(ERRCODE_FEATURE_NOT_SUPPORTED), errmsg("cannot access temporary indexes of other sessions"))); /* * Read metapage */ buffer = ReadBuffer(rel, GIN_METAPAGE_BLKNO); LockBuffer(buffer, GIN_SHARE); page = BufferGetPage(buffer); metadata = GinPageGetMeta(page); stats.version = metadata->ginVersion; stats.pending_pages = metadata->nPendingPages; stats.pending_tuples = metadata->nPendingHeapTuples; UnlockReleaseBuffer(buffer); relation_close(rel, AccessShareLock); /* * Build a tuple descriptor for our result type */ if (get_call_result_type(fcinfo, NULL, &tupleDesc) != TYPEFUNC_COMPOSITE) elog(ERROR, "return type must be a row type"); values[0] = Int32GetDatum(stats.version); values[1] = UInt32GetDatum(stats.pending_pages); values[2] = Int64GetDatum(stats.pending_tuples); /* * Build and return the tuple */ tuple = heap_form_tuple(tupleDesc, values, nulls); result = HeapTupleGetDatum(tuple); PG_RETURN_DATUM(result); }
/* * Unit test function to test the routines added for * Delta Compression */ void test__DeltaCompression__Core(void **state) { Delta_Compression_status status = DELTA_COMPRESSION_NOT_APPLIED; DatumStreamTypeInfo typeInfo; Datum d; DatumStreamBlockWrite* dsw = malloc(sizeof(DatumStreamBlockWrite)); memset(dsw, 0, sizeof(DatumStreamBlockWrite)); /* Creating the DatumStreamBlockWrite structure to be used for below iterations */ strncpy(dsw->eyecatcher, DatumStreamBlockWrite_Eyecatcher, DatumStreamBlockWrite_EyecatcherLen); dsw->datumStreamVersion = DatumStreamVersion_Dense_Enhanced; dsw->rle_want_compression = true; dsw->delta_want_compression = true; dsw->typeInfo = &typeInfo; dsw->maxDataBlockSize = 32768; dsw->maxDatumPerBlock = 32768 / 64; dsw->datum_buffer_size = dsw->maxDataBlockSize; dsw->datum_buffer = malloc(dsw->datum_buffer_size); dsw->datum_afterp = dsw->datum_buffer + dsw->datum_buffer_size; dsw->null_bitmap_buffer_size = 64; dsw->null_bitmap_buffer = malloc(dsw->null_bitmap_buffer_size); dsw->rle_compress_bitmap_buffer_size = 16; dsw->rle_compress_bitmap_buffer = malloc(dsw->rle_compress_bitmap_buffer_size); dsw->rle_repeatcounts_maxcount = 16; dsw->rle_repeatcounts = malloc(dsw->rle_repeatcounts_maxcount * Int32Compress_MaxByteLen); dsw->delta_bitmap_buffer_size = 16; dsw->delta_bitmap_buffer = malloc(dsw->delta_bitmap_buffer_size); dsw->deltas_maxcount = 16; dsw->deltas = malloc(dsw->deltas_maxcount * Int32Compress_MaxByteLen); dsw->delta_sign = malloc(dsw->deltas_maxcount * sizeof(bool)); DatumStreamBlockWrite_GetReady(dsw); /* For unit testing using this type object */ typeInfo.datumlen = 4; typeInfo.typid = INT4OID; typeInfo.byval = true; /* * First value will always be physically stored */ status = DatumStreamBlockWrite_PerformDeltaCompression(dsw, UInt32GetDatum(32)); assert_int_equal(status, DELTA_COMPRESSION_NOT_APPLIED); assert_false(dsw->delta_has_compression); assert_true(dsw->not_first_datum); /* corresponding bitmap checks */ assert_int_equal(dsw->delta_bitmap.bitOnCount, 0); assert_int_equal(dsw->delta_bitmap.bitCount, 0); /* Since physical datum, test the the routines for processing the same */ DatumStreamBlockWrite_DenseIncrItem(dsw, 0, 4); DatumStreamBlockWrite_DeltaMaintain(dsw, UInt32GetDatum(32)); assert_int_equal(dsw->physical_datum_count, 1); /* corresponding bitmap checks */ assert_int_equal(dsw->delta_bitmap.bitOnCount, 0); assert_int_equal(dsw->delta_bitmap.bitCount, 0); /* * Second value, this must be processed as delta */ status = DatumStreamBlockWrite_PerformDeltaCompression(dsw, UInt32GetDatum(33)); assert_int_equal(status, DELTA_COMPRESSION_OK); assert_true(dsw->delta_has_compression); assert_int_equal((*(uint32 *) (&dsw->compare_item)), 33); /* delta value checks */ assert_int_equal(dsw->deltas_count, 1); assert_int_equal(dsw->delta_sign[dsw->deltas_count-1], true); assert_int_equal(dsw->deltas[dsw->deltas_count-1], 1); /* corresponding bitmap checks */ assert_int_equal(dsw->delta_bitmap.bitOnCount, 1); assert_int_equal(dsw->delta_bitmap.bitCount,2); assert_true(DatumStreamBitMapWrite_CurrentIsOn(&dsw->delta_bitmap)); /* * Third value, this must be processed as positive delta case */ status = DatumStreamBlockWrite_PerformDeltaCompression(dsw, UInt32GetDatum(53)); assert_int_equal(status, DELTA_COMPRESSION_OK); assert_true(dsw->delta_has_compression); /* delta value checks */ assert_int_equal((*(uint32 *) (&dsw->compare_item)), 53); assert_int_equal(dsw->deltas_count, 2); assert_int_equal(dsw->delta_sign[dsw->deltas_count-1], true); assert_int_equal(dsw->deltas[dsw->deltas_count-1], 53 - 33); /* corresponding bitmap checks */ assert_int_equal(dsw->delta_bitmap.bitOnCount, 2); assert_int_equal(dsw->delta_bitmap.bitCount,3); assert_true(DatumStreamBitMapWrite_CurrentIsOn(&dsw->delta_bitmap)); /* * Forth value, this must be processed as negative delta case */ status = DatumStreamBlockWrite_PerformDeltaCompression(dsw, UInt32GetDatum(23)); assert_int_equal(status, DELTA_COMPRESSION_OK); assert_true(dsw->delta_has_compression); /* delta value checks */ assert_int_equal((*(uint32 *) (&dsw->compare_item)), 23); assert_int_equal(dsw->deltas_count, 3); assert_int_equal(dsw->delta_sign[dsw->deltas_count-1], false); assert_int_equal(dsw->deltas[dsw->deltas_count-1], 53 - 23); /* corresponding bitmap checks */ assert_int_equal(dsw->delta_bitmap.bitOnCount, 3); assert_int_equal(dsw->delta_bitmap.bitCount,4); assert_true(DatumStreamBitMapWrite_CurrentIsOn(&dsw->delta_bitmap)); /* * Fifth value, this must be processed as delta exceeding case */ status = DatumStreamBlockWrite_PerformDeltaCompression(dsw, UInt32GetDatum(23 + MAX_DELTA_SUPPORTED_DELTA_COMPRESSION + 1)); assert_int_equal(status, DELTA_COMPRESSION_NOT_APPLIED); assert_true(dsw->delta_has_compression); /* delta value checks */ assert_int_equal((*(uint32 *) (&dsw->compare_item)), 23); assert_int_equal(dsw->deltas_count, 3); assert_int_equal(dsw->delta_sign[dsw->deltas_count-1], false); assert_int_equal(dsw->deltas[dsw->deltas_count-1], 53 - 23); /* corresponding bitmap checks */ assert_int_equal(dsw->delta_bitmap.bitOnCount, 3); assert_int_equal(dsw->delta_bitmap.bitCount,4); assert_true(DatumStreamBitMapWrite_CurrentIsOn(&dsw->delta_bitmap)); assert_true(dsw->not_first_datum); /* Again since physical datum, test the the routines for processing the same */ DatumStreamBlockWrite_DenseIncrItem(dsw, 0, 4); DatumStreamBlockWrite_DeltaMaintain(dsw, UInt32GetDatum(23 + MAX_DELTA_SUPPORTED_DELTA_COMPRESSION + 1)); assert_int_equal(dsw->physical_datum_count, 2); /* corresponding bitmap checks */ assert_int_equal(dsw->delta_bitmap.bitOnCount, 3); assert_int_equal(dsw->delta_bitmap.bitCount, 5); assert_false(DatumStreamBitMapWrite_CurrentIsOn(&dsw->delta_bitmap)); /* * Sixth value, this must be processed as delta exceeding case */ status = DatumStreamBlockWrite_PerformDeltaCompression(dsw, UInt32GetDatum(63)); assert_int_equal(status, DELTA_COMPRESSION_OK); assert_true(dsw->delta_has_compression); /* delta value checks */ assert_int_equal((*(uint32 *) (&dsw->compare_item)), 63); assert_int_equal(dsw->deltas_count, 4); assert_int_equal(dsw->delta_sign[dsw->deltas_count-1], false); assert_int_equal(dsw->deltas[dsw->deltas_count-1], 23 + MAX_DELTA_SUPPORTED_DELTA_COMPRESSION + 1 - 63); /* corresponding bitmap checks */ assert_int_equal(dsw->delta_bitmap.bitOnCount, 4); assert_int_equal(dsw->delta_bitmap.bitCount,6); assert_true(DatumStreamBitMapWrite_CurrentIsOn(&dsw->delta_bitmap)); free(dsw->datum_buffer); free(dsw->null_bitmap_buffer); free(dsw->rle_compress_bitmap_buffer); free(dsw->rle_repeatcounts); free(dsw->delta_bitmap_buffer); free(dsw->deltas); free(dsw->delta_sign); free(dsw); }
/* * pgmpc_status * * Show current song and status. */ Datum pgmpc_status(PG_FUNCTION_ARGS) { #define PGMPC_STATUS_COLUMNS 7 Datum values[PGMPC_STATUS_COLUMNS]; bool nulls[PGMPC_STATUS_COLUMNS]; TupleDesc tupdesc; HeapTuple tuple; Datum result; if (get_call_result_type(fcinfo, NULL, &tupdesc) != TYPEFUNC_COMPOSITE) elog(ERROR, "return type must be a row type"); pgmpc_init(); /* Initialize the values of return tuple */ MemSet(values, 0, sizeof(values)); MemSet(nulls, true, sizeof(nulls)); /* * Send all necessary commands at once to avoid unnecessary round * trips. The following information is obtained in an aync way: * - current status of server * - current song run on server */ if (!mpd_command_list_begin(mpd_conn, true) || !mpd_send_status(mpd_conn) || !mpd_send_current_song(mpd_conn) || !mpd_command_list_end(mpd_conn)) pgmpc_print_error(); /* Obtain status from server and check it */ mpd_status = mpd_recv_status(mpd_conn); if (mpd_status == NULL) pgmpc_print_error(); /* Show current song if any */ if (mpd_status_get_state(mpd_status) == MPD_STATE_PLAY || mpd_status_get_state(mpd_status) == MPD_STATE_PAUSE) { struct mpd_song *song; /* There should be a next response, in this case a song */ if (!mpd_response_next(mpd_conn)) pgmpc_print_error(); /* And now get it */ song = mpd_recv_song(mpd_conn); if (song != NULL) { /* Get information about the current song */ const char *title = mpd_song_get_tag(song, MPD_TAG_TITLE, 0); const char *artist = mpd_song_get_tag(song, MPD_TAG_ARTIST, 0); const char *album = mpd_song_get_tag(song, MPD_TAG_ALBUM, 0); unsigned int elapsed_time = mpd_status_get_elapsed_time(mpd_status); unsigned int total_time = mpd_status_get_total_time(mpd_status); int song_pos = mpd_status_get_song_pos(mpd_status) + 1; int volume = mpd_status_get_volume(mpd_status); /* Build tuple using this information */ if (title) { nulls[0] = false; values[0] = CStringGetTextDatum(title); } else nulls[0] = true; if (artist) { nulls[1] = false; values[1] = CStringGetTextDatum(artist); } else nulls[1] = true; if (album) { nulls[2] = false; values[2] = CStringGetTextDatum(album); } else nulls[2] = true; nulls[3] = false; values[3] = UInt32GetDatum(elapsed_time); nulls[4] = false; values[4] = UInt32GetDatum(total_time); nulls[5] = false; values[5] = Int32GetDatum(song_pos); nulls[6] = false; values[6] = Int32GetDatum(volume); /* Song data is no more needed */ mpd_song_free(song); } if (!mpd_response_finish(mpd_conn)) pgmpc_print_error(); } /* Cleanup MPD status */ pgmpc_reset(); /* Form result and return it */ tuple = heap_form_tuple(tupdesc, values, nulls); result = HeapTupleGetDatum(tuple); PG_RETURN_DATUM(result); }
/* * Extract all item values from a BRIN index page * * Usage: SELECT * FROM brin_page_items(get_raw_page('idx', 1), 'idx'::regclass); */ Datum brin_page_items(PG_FUNCTION_ARGS) { brin_page_state *state; FuncCallContext *fctx; if (!superuser()) ereport(ERROR, (errcode(ERRCODE_INSUFFICIENT_PRIVILEGE), (errmsg("must be superuser to use raw page functions")))); if (SRF_IS_FIRSTCALL()) { bytea *raw_page = PG_GETARG_BYTEA_P(0); Oid indexRelid = PG_GETARG_OID(1); Page page; TupleDesc tupdesc; MemoryContext mctx; Relation indexRel; AttrNumber attno; /* minimally verify the page we got */ page = verify_brin_page(raw_page, BRIN_PAGETYPE_REGULAR, "regular"); /* create a function context for cross-call persistence */ fctx = SRF_FIRSTCALL_INIT(); /* switch to memory context appropriate for multiple function calls */ mctx = MemoryContextSwitchTo(fctx->multi_call_memory_ctx); /* Build a tuple descriptor for our result type */ if (get_call_result_type(fcinfo, NULL, &tupdesc) != TYPEFUNC_COMPOSITE) elog(ERROR, "return type must be a row type"); indexRel = index_open(indexRelid, AccessShareLock); state = palloc(offsetof(brin_page_state, columns) + sizeof(brin_column_state) * RelationGetDescr(indexRel)->natts); state->bdesc = brin_build_desc(indexRel); state->page = page; state->offset = FirstOffsetNumber; state->unusedItem = false; state->done = false; state->dtup = NULL; /* * Initialize output functions for all indexed datatypes; simplifies * calling them later. */ for (attno = 1; attno <= state->bdesc->bd_tupdesc->natts; attno++) { Oid output; bool isVarlena; BrinOpcInfo *opcinfo; int i; brin_column_state *column; opcinfo = state->bdesc->bd_info[attno - 1]; column = palloc(offsetof(brin_column_state, outputFn) + sizeof(FmgrInfo) * opcinfo->oi_nstored); column->nstored = opcinfo->oi_nstored; for (i = 0; i < opcinfo->oi_nstored; i++) { getTypeOutputInfo(opcinfo->oi_typcache[i]->type_id, &output, &isVarlena); fmgr_info(output, &column->outputFn[i]); } state->columns[attno - 1] = column; } index_close(indexRel, AccessShareLock); fctx->user_fctx = state; fctx->tuple_desc = BlessTupleDesc(tupdesc); MemoryContextSwitchTo(mctx); } fctx = SRF_PERCALL_SETUP(); state = fctx->user_fctx; if (!state->done) { HeapTuple result; Datum values[7]; bool nulls[7]; /* * This loop is called once for every attribute of every tuple in the * page. At the start of a tuple, we get a NULL dtup; that's our * signal for obtaining and decoding the next one. If that's not the * case, we output the next attribute. */ if (state->dtup == NULL) { BrinTuple *tup; MemoryContext mctx; ItemId itemId; /* deformed tuple must live across calls */ mctx = MemoryContextSwitchTo(fctx->multi_call_memory_ctx); /* verify item status: if there's no data, we can't decode */ itemId = PageGetItemId(state->page, state->offset); if (ItemIdIsUsed(itemId)) { tup = (BrinTuple *) PageGetItem(state->page, PageGetItemId(state->page, state->offset)); state->dtup = brin_deform_tuple(state->bdesc, tup); state->attno = 1; state->unusedItem = false; } else state->unusedItem = true; MemoryContextSwitchTo(mctx); } else state->attno++; MemSet(nulls, 0, sizeof(nulls)); if (state->unusedItem) { values[0] = UInt16GetDatum(state->offset); nulls[1] = true; nulls[2] = true; nulls[3] = true; nulls[4] = true; nulls[5] = true; nulls[6] = true; } else { int att = state->attno - 1; values[0] = UInt16GetDatum(state->offset); values[1] = UInt32GetDatum(state->dtup->bt_blkno); values[2] = UInt16GetDatum(state->attno); values[3] = BoolGetDatum(state->dtup->bt_columns[att].bv_allnulls); values[4] = BoolGetDatum(state->dtup->bt_columns[att].bv_hasnulls); values[5] = BoolGetDatum(state->dtup->bt_placeholder); if (!state->dtup->bt_columns[att].bv_allnulls) { BrinValues *bvalues = &state->dtup->bt_columns[att]; StringInfoData s; bool first; int i; initStringInfo(&s); appendStringInfoChar(&s, '{'); first = true; for (i = 0; i < state->columns[att]->nstored; i++) { char *val; if (!first) appendStringInfoString(&s, " .. "); first = false; val = OutputFunctionCall(&state->columns[att]->outputFn[i], bvalues->bv_values[i]); appendStringInfoString(&s, val); pfree(val); } appendStringInfoChar(&s, '}'); values[6] = CStringGetTextDatum(s.data); pfree(s.data); } else { nulls[6] = true; } } result = heap_form_tuple(fctx->tuple_desc, values, nulls); /* * If the item was unused, jump straight to the next one; otherwise, * the only cleanup needed here is to set our signal to go to the next * tuple in the following iteration, by freeing the current one. */ if (state->unusedItem) state->offset = OffsetNumberNext(state->offset); else if (state->attno >= state->bdesc->bd_tupdesc->natts) { pfree(state->dtup); state->dtup = NULL; state->offset = OffsetNumberNext(state->offset); } /* * If we're beyond the end of the page, set flag to end the function * in the following iteration. */ if (state->offset > PageGetMaxOffsetNumber(state->page)) state->done = true; SRF_RETURN_NEXT(fctx, HeapTupleGetDatum(result)); } brin_free_desc(state->bdesc); SRF_RETURN_DONE(fctx); }
/* * Extract all item values from a BRIN index page * * Usage: SELECT * FROM brin_page_items(get_raw_page('idx', 1), 'idx'::regclass); */ Datum brin_page_items(PG_FUNCTION_ARGS) { bytea *raw_page = PG_GETARG_BYTEA_P(0); Oid indexRelid = PG_GETARG_OID(1); ReturnSetInfo *rsinfo = (ReturnSetInfo *) fcinfo->resultinfo; TupleDesc tupdesc; MemoryContext oldcontext; Tuplestorestate *tupstore; Relation indexRel; brin_column_state **columns; BrinDesc *bdesc; BrinMemTuple *dtup; Page page; OffsetNumber offset; AttrNumber attno; bool unusedItem; if (!superuser()) ereport(ERROR, (errcode(ERRCODE_INSUFFICIENT_PRIVILEGE), (errmsg("must be superuser to use raw page functions")))); /* check to see if caller supports us returning a tuplestore */ if (rsinfo == NULL || !IsA(rsinfo, ReturnSetInfo)) ereport(ERROR, (errcode(ERRCODE_FEATURE_NOT_SUPPORTED), errmsg("set-valued function called in context that cannot accept a set"))); if (!(rsinfo->allowedModes & SFRM_Materialize) || rsinfo->expectedDesc == NULL) ereport(ERROR, (errcode(ERRCODE_FEATURE_NOT_SUPPORTED), errmsg("materialize mode required, but it is not allowed in this context"))); /* Build a tuple descriptor for our result type */ if (get_call_result_type(fcinfo, NULL, &tupdesc) != TYPEFUNC_COMPOSITE) elog(ERROR, "return type must be a row type"); /* Build tuplestore to hold the result rows */ oldcontext = MemoryContextSwitchTo(rsinfo->econtext->ecxt_per_query_memory); tupstore = tuplestore_begin_heap(true, false, work_mem); rsinfo->returnMode = SFRM_Materialize; rsinfo->setResult = tupstore; rsinfo->setDesc = tupdesc; MemoryContextSwitchTo(oldcontext); indexRel = index_open(indexRelid, AccessShareLock); bdesc = brin_build_desc(indexRel); /* minimally verify the page we got */ page = verify_brin_page(raw_page, BRIN_PAGETYPE_REGULAR, "regular"); /* * Initialize output functions for all indexed datatypes; simplifies * calling them later. */ columns = palloc(sizeof(brin_column_state *) * RelationGetDescr(indexRel)->natts); for (attno = 1; attno <= bdesc->bd_tupdesc->natts; attno++) { Oid output; bool isVarlena; BrinOpcInfo *opcinfo; int i; brin_column_state *column; opcinfo = bdesc->bd_info[attno - 1]; column = palloc(offsetof(brin_column_state, outputFn) + sizeof(FmgrInfo) * opcinfo->oi_nstored); column->nstored = opcinfo->oi_nstored; for (i = 0; i < opcinfo->oi_nstored; i++) { getTypeOutputInfo(opcinfo->oi_typcache[i]->type_id, &output, &isVarlena); fmgr_info(output, &column->outputFn[i]); } columns[attno - 1] = column; } offset = FirstOffsetNumber; unusedItem = false; dtup = NULL; for (;;) { Datum values[7]; bool nulls[7]; /* * This loop is called once for every attribute of every tuple in the * page. At the start of a tuple, we get a NULL dtup; that's our * signal for obtaining and decoding the next one. If that's not the * case, we output the next attribute. */ if (dtup == NULL) { ItemId itemId; /* verify item status: if there's no data, we can't decode */ itemId = PageGetItemId(page, offset); if (ItemIdIsUsed(itemId)) { dtup = brin_deform_tuple(bdesc, (BrinTuple *) PageGetItem(page, itemId)); attno = 1; unusedItem = false; } else unusedItem = true; } else attno++; MemSet(nulls, 0, sizeof(nulls)); if (unusedItem) { values[0] = UInt16GetDatum(offset); nulls[1] = true; nulls[2] = true; nulls[3] = true; nulls[4] = true; nulls[5] = true; nulls[6] = true; } else { int att = attno - 1; values[0] = UInt16GetDatum(offset); values[1] = UInt32GetDatum(dtup->bt_blkno); values[2] = UInt16GetDatum(attno); values[3] = BoolGetDatum(dtup->bt_columns[att].bv_allnulls); values[4] = BoolGetDatum(dtup->bt_columns[att].bv_hasnulls); values[5] = BoolGetDatum(dtup->bt_placeholder); if (!dtup->bt_columns[att].bv_allnulls) { BrinValues *bvalues = &dtup->bt_columns[att]; StringInfoData s; bool first; int i; initStringInfo(&s); appendStringInfoChar(&s, '{'); first = true; for (i = 0; i < columns[att]->nstored; i++) { char *val; if (!first) appendStringInfoString(&s, " .. "); first = false; val = OutputFunctionCall(&columns[att]->outputFn[i], bvalues->bv_values[i]); appendStringInfoString(&s, val); pfree(val); } appendStringInfoChar(&s, '}'); values[6] = CStringGetTextDatum(s.data); pfree(s.data); } else { nulls[6] = true; } } tuplestore_putvalues(tupstore, tupdesc, values, nulls); /* * If the item was unused, jump straight to the next one; otherwise, * the only cleanup needed here is to set our signal to go to the next * tuple in the following iteration, by freeing the current one. */ if (unusedItem) offset = OffsetNumberNext(offset); else if (attno >= bdesc->bd_tupdesc->natts) { pfree(dtup); dtup = NULL; offset = OffsetNumberNext(offset); } /* * If we're beyond the end of the page, we're done. */ if (offset > PageGetMaxOffsetNumber(page)) break; } /* clean up and return the tuplestore */ brin_free_desc(bdesc); tuplestore_donestoring(tupstore); index_close(indexRel, AccessShareLock); return (Datum) 0; }
Datum pg_freespacemap_relations(PG_FUNCTION_ARGS) { FuncCallContext *funcctx; Datum result; MemoryContext oldcontext; FreeSpaceRelationsContext *fctx; /* User function context. */ TupleDesc tupledesc; HeapTuple tuple; FSMHeader *FreeSpaceMap; /* FSM main structure. */ FSMRelation *fsmrel; /* Individual relation. */ if (SRF_IS_FIRSTCALL()) { int i; int numRelations; /* Max no. of Relations in map. */ /* * Get the free space map data structure. */ FreeSpaceMap = GetFreeSpaceMap(); numRelations = MaxFSMRelations; funcctx = SRF_FIRSTCALL_INIT(); /* Switch context when allocating stuff to be used in later calls */ oldcontext = MemoryContextSwitchTo(funcctx->multi_call_memory_ctx); /* * Create a function context for cross-call persistence. */ fctx = (FreeSpaceRelationsContext *) palloc(sizeof(FreeSpaceRelationsContext)); funcctx->user_fctx = fctx; /* Construct a tuple descriptor for the result rows. */ tupledesc = CreateTemplateTupleDesc(NUM_FREESPACE_RELATIONS_ELEM, false); TupleDescInitEntry(tupledesc, (AttrNumber) 1, "reltablespace", OIDOID, -1, 0); TupleDescInitEntry(tupledesc, (AttrNumber) 2, "reldatabase", OIDOID, -1, 0); TupleDescInitEntry(tupledesc, (AttrNumber) 3, "relfilenode", OIDOID, -1, 0); TupleDescInitEntry(tupledesc, (AttrNumber) 4, "avgrequest", INT4OID, -1, 0); TupleDescInitEntry(tupledesc, (AttrNumber) 5, "interestingpages", INT4OID, -1, 0); TupleDescInitEntry(tupledesc, (AttrNumber) 6, "storedpages", INT4OID, -1, 0); TupleDescInitEntry(tupledesc, (AttrNumber) 7, "nextpage", INT4OID, -1, 0); fctx->tupdesc = BlessTupleDesc(tupledesc); /* * Allocate numRelations worth of FreeSpaceRelationsRec records, this * is also an upper bound. */ fctx->record = (FreeSpaceRelationsRec *) palloc(sizeof(FreeSpaceRelationsRec) * numRelations); /* Return to original context when allocating transient memory */ MemoryContextSwitchTo(oldcontext); /* * Lock free space map and scan though all the relations. */ LWLockAcquire(FreeSpaceLock, LW_EXCLUSIVE); i = 0; for (fsmrel = FreeSpaceMap->usageList; fsmrel; fsmrel = fsmrel->nextUsage) { fctx->record[i].reltablespace = fsmrel->key.spcNode; fctx->record[i].reldatabase = fsmrel->key.dbNode; fctx->record[i].relfilenode = fsmrel->key.relNode; fctx->record[i].avgrequest = (int64) fsmrel->avgRequest; fctx->record[i].interestingpages = fsmrel->interestingPages; fctx->record[i].storedpages = fsmrel->storedPages; fctx->record[i].nextpage = fsmrel->nextPage; fctx->record[i].isindex = fsmrel->isIndex; i++; } /* Release free space map. */ LWLockRelease(FreeSpaceLock); /* Set the real no. of calls as we know it now! */ Assert(i <= numRelations); funcctx->max_calls = i; } funcctx = SRF_PERCALL_SETUP(); /* Get the saved state */ fctx = funcctx->user_fctx; if (funcctx->call_cntr < funcctx->max_calls) { int i = funcctx->call_cntr; FreeSpaceRelationsRec *record = &fctx->record[i]; Datum values[NUM_FREESPACE_RELATIONS_ELEM]; bool nulls[NUM_FREESPACE_RELATIONS_ELEM]; values[0] = ObjectIdGetDatum(record->reltablespace); nulls[0] = false; values[1] = ObjectIdGetDatum(record->reldatabase); nulls[1] = false; values[2] = ObjectIdGetDatum(record->relfilenode); nulls[2] = false; /* * avgrequest isn't meaningful for an index */ if (record->isindex) { nulls[3] = true; } else { values[3] = UInt32GetDatum(record->avgrequest); nulls[3] = false; } values[4] = Int32GetDatum(record->interestingpages); nulls[4] = false; values[5] = Int32GetDatum(record->storedpages); nulls[5] = false; values[6] = Int32GetDatum(record->nextpage); nulls[6] = false; /* Build and return the tuple. */ tuple = heap_form_tuple(fctx->tupdesc, values, nulls); result = HeapTupleGetDatum(tuple); SRF_RETURN_NEXT(funcctx, result); } else SRF_RETURN_DONE(funcctx); }