/* * get_all_brokers * * Return a list of all brokers in pipeline_kafka_brokers */ static List * get_all_brokers(void) { HeapTuple tup = NULL; HeapScanDesc scan; Relation brokers = open_pipeline_kafka_brokers(); TupleTableSlot *slot = MakeSingleTupleTableSlot(RelationGetDescr(brokers)); List *result = NIL; scan = heap_beginscan(brokers, GetTransactionSnapshot(), 0, NULL); while ((tup = heap_getnext(scan, ForwardScanDirection)) != NULL) { char *host; Datum d; bool isnull; ExecStoreTuple(tup, slot, InvalidBuffer, false); d = slot_getattr(slot, BROKER_ATTR_HOST, &isnull); host = TextDatumGetCString(d); result = lappend(result, host); } ExecDropSingleTupleTableSlot(slot); heap_endscan(scan); heap_close(brokers, NoLock); return result; }
void SpoolerInsert(Spooler *self, HeapTuple tuple) { /* Spool keys in the tuple */ ExecStoreTuple(tuple, self->slot, InvalidBuffer, false); IndexSpoolInsert(self->spools, self->slot, &(tuple->t_self), self->estate, true); BULKLOAD_PROFILE(&prof_writer_index); }
/* ---------------------------------------------------------------- * FunctionNext * * This is a workhorse for ExecFunctionScan * ---------------------------------------------------------------- */ static TupleTableSlot * FunctionNext(FunctionScanState *node) { TupleTableSlot *slot; EState *estate; ScanDirection direction; Tuplestorestate *tuplestorestate; bool should_free; HeapTuple heapTuple; /* * get information from the estate and scan state */ estate = node->ss.ps.state; direction = estate->es_direction; tuplestorestate = node->tuplestorestate; /* * If first time through, read all tuples from function and put them * in a tuplestore. Subsequent calls just fetch tuples from * tuplestore. */ if (tuplestorestate == NULL) { ExprContext *econtext = node->ss.ps.ps_ExprContext; TupleDesc funcTupdesc; node->tuplestorestate = tuplestorestate = ExecMakeTableFunctionResult(node->funcexpr, econtext, node->tupdesc, &funcTupdesc); /* * If function provided a tupdesc, cross-check it. We only really * need to do this for functions returning RECORD, but might as * well do it always. */ if (funcTupdesc && !tupledesc_match(node->tupdesc, funcTupdesc)) ereport(ERROR, (errcode(ERRCODE_DATATYPE_MISMATCH), errmsg("query-specified return row and actual function return row do not match"))); } /* * Get the next tuple from tuplestore. Return NULL if no more tuples. */ heapTuple = tuplestore_getheaptuple(tuplestorestate, ScanDirectionIsForward(direction), &should_free); slot = node->ss.ss_ScanTupleSlot; return ExecStoreTuple(heapTuple, slot, InvalidBuffer, should_free); }
/* ---------------------------------------------------------------- * FunctionNext * * This is a workhorse for ExecFunctionScan * ---------------------------------------------------------------- */ static TupleTableSlot * FunctionNext(FunctionScanState *node) { TupleTableSlot *slot; EState *estate; ScanDirection direction; Tuplestorestate *tuplestorestate; bool should_free; HeapTuple heapTuple; /* * get information from the estate and scan state */ estate = node->ss.ps.state; direction = estate->es_direction; tuplestorestate = node->tuplestorestate; /* * If first time through, read all tuples from function and put them in a * tuplestore. Subsequent calls just fetch tuples from tuplestore. */ if (tuplestorestate == NULL) { ExprContext *econtext = node->ss.ps.ps_ExprContext; TupleDesc funcTupdesc; node->tuplestorestate = tuplestorestate = ExecMakeTableFunctionResult(node->funcexpr, econtext, node->tupdesc, &funcTupdesc); /* * If function provided a tupdesc, cross-check it. We only really * need to do this for functions returning RECORD, but might as well * do it always. */ if (funcTupdesc) tupledesc_match(node->tupdesc, funcTupdesc); } /* * Get the next tuple from tuplestore. Return NULL if no more tuples. */ heapTuple = tuplestore_getheaptuple(tuplestorestate, ScanDirectionIsForward(direction), &should_free); slot = node->ss.ss_ScanTupleSlot; if (heapTuple) return ExecStoreTuple(heapTuple, slot, InvalidBuffer, should_free); else return ExecClearTuple(slot); }
/* ---------------------------------------------------------------- * SeqNext * * This is a workhorse for ExecSeqScan * ---------------------------------------------------------------- */ static TupleTableSlot * SeqNext(SeqScanState *node) { HeapTuple tuple; HeapScanDesc scandesc; EState *estate; ScanDirection direction; TupleTableSlot *slot; /* * get information from the estate and scan state */ scandesc = node->ss.ss_currentScanDesc; estate = node->ss.ps.state; direction = estate->es_direction; slot = node->ss.ss_ScanTupleSlot; if (scandesc == NULL) { /* * We reach here if the scan is not parallel, or if we're executing a * scan that was intended to be parallel serially. */ scandesc = heap_beginscan(node->ss.ss_currentRelation, estate->es_snapshot, 0, NULL); node->ss.ss_currentScanDesc = scandesc; } /* * get the next tuple from the table */ tuple = heap_getnext(scandesc, direction); /* * save the tuple and the buffer returned to us by the access methods in * our scan tuple slot and return the slot. Note: we pass 'false' because * tuples returned by heap_getnext() are pointers onto disk pages and were * not created with palloc() and so should not be pfree()'d. Note also * that ExecStoreTuple will increment the refcount of the buffer; the * refcount will not be dropped until the tuple table slot is cleared. */ if (tuple) ExecStoreTuple(tuple, /* tuple to store */ slot, /* slot to store in */ scandesc->rs_cbuf, /* buffer associated with this * tuple */ false); /* don't pfree this pointer */ else ExecClearTuple(slot); return slot; }
/* * load_consumer_offsets * * Load all offsets for all of this consumer's partitions */ static void load_consumer_offsets(KafkaConsumer *consumer, struct rd_kafka_metadata_topic *meta, int64_t offset) { MemoryContext old; ScanKeyData skey[1]; HeapTuple tup = NULL; HeapScanDesc scan; Relation offsets = open_pipeline_kafka_offsets(); TupleTableSlot *slot = MakeSingleTupleTableSlot(RelationGetDescr(offsets)); int i; ScanKeyInit(&skey[0], OFFSETS_ATTR_CONSUMER, BTEqualStrategyNumber, F_OIDEQ, ObjectIdGetDatum(consumer->id)); scan = heap_beginscan(offsets, GetTransactionSnapshot(), 1, skey); old = MemoryContextSwitchTo(CacheMemoryContext); consumer->offsets = palloc0(meta->partition_cnt * sizeof(int64_t)); MemoryContextSwitchTo(old); /* by default, begin consuming from the end of a stream */ for (i = 0; i < meta->partition_cnt; i++) consumer->offsets[i] = offset; consumer->num_partitions = meta->partition_cnt; while ((tup = heap_getnext(scan, ForwardScanDirection)) != NULL) { Datum d; bool isnull; int partition; ExecStoreTuple(tup, slot, InvalidBuffer, false); d = slot_getattr(slot, OFFSETS_ATTR_PARTITION, &isnull); partition = DatumGetInt32(d); if(partition > consumer->num_partitions) elog(ERROR, "invalid partition id: %d", partition); if (offset == RD_KAFKA_OFFSET_NULL) { d = slot_getattr(slot, OFFSETS_ATTR_OFFSET, &isnull); if (isnull) offset = RD_KAFKA_OFFSET_END; else offset = DatumGetInt64(d); } consumer->offsets[partition] = DatumGetInt64(offset); } ExecDropSingleTupleTableSlot(slot); heap_endscan(scan); heap_close(offsets, RowExclusiveLock); }
/* * ExecRecFetch -- fetch next potential tuple * * This routine is concerned with substituting a test tuple if we are * inside an EvalPlanQual recheck. If we aren't, just execute * the access method's next-tuple routine. * * This method is identical to ExecScanFetch, we just want to avoid * possible duplicate function issues. */ static inline TupleTableSlot * ExecRecFetch(ScanState *node, ExecScanAccessMtd accessMtd, ExecScanRecheckMtd recheckMtd) { EState *estate = node->ps.state; if (estate->es_epqTuple != NULL) { /* * We are inside an EvalPlanQual recheck. Return the test tuple if * one is available, after rechecking any access-method-specific * conditions. */ Index scanrelid = ((Scan *) node->ps.plan)->scanrelid; Assert(scanrelid > 0); if (estate->es_epqTupleSet[scanrelid - 1]) { TupleTableSlot *slot = node->ss_ScanTupleSlot; /* Return empty slot if we already returned a tuple */ if (estate->es_epqScanDone[scanrelid - 1]) return ExecClearTuple(slot); /* Else mark to remember that we shouldn't return more */ estate->es_epqScanDone[scanrelid - 1] = true; /* Return empty slot if we haven't got a test tuple */ if (estate->es_epqTuple[scanrelid - 1] == NULL) return ExecClearTuple(slot); /* Store test tuple in the plan node's scan slot */ ExecStoreTuple(estate->es_epqTuple[scanrelid - 1], slot, InvalidBuffer, false); /* Check if it meets the access-method conditions */ if (!(*recheckMtd) (node, slot)) ExecClearTuple(slot); /* would not be returned by scan */ return slot; } } /* * Run the node-type-specific access method function to get the next tuple */ return (*accessMtd) (node); }
/* * Read the next tuple. We might fetch a tuple from one of the tuple queues * using gather_readnext, or if no tuple queue contains a tuple and the * single_copy flag is not set, we might generate one locally instead. */ static TupleTableSlot * gather_getnext(GatherState *gatherstate) { PlanState *outerPlan = outerPlanState(gatherstate); TupleTableSlot *outerTupleSlot; TupleTableSlot *fslot = gatherstate->funnel_slot; HeapTuple tup; while (gatherstate->nreaders > 0 || gatherstate->need_to_scan_locally) { CHECK_FOR_INTERRUPTS(); if (gatherstate->nreaders > 0) { tup = gather_readnext(gatherstate); if (HeapTupleIsValid(tup)) { ExecStoreTuple(tup, /* tuple to store */ fslot, /* slot in which to store the tuple */ InvalidBuffer, /* buffer associated with this * tuple */ true); /* pfree tuple when done with it */ return fslot; } } if (gatherstate->need_to_scan_locally) { EState *estate = gatherstate->ps.state; /* Install our DSA area while executing the plan. */ estate->es_query_dsa = gatherstate->pei ? gatherstate->pei->area : NULL; outerTupleSlot = ExecProcNode(outerPlan); estate->es_query_dsa = NULL; if (!TupIsNull(outerTupleSlot)) return outerTupleSlot; gatherstate->need_to_scan_locally = false; } } return ExecClearTuple(fslot); }
/* -------------------------------- * ExecCopySlot * Copy the source slot's contents into the destination slot. * * The destination acquires a private copy that will not go away * if the source is cleared. * * The caller must ensure the slots have compatible tupdescs. * -------------------------------- */ TupleTableSlot * ExecCopySlot(TupleTableSlot *dstslot, TupleTableSlot *srcslot) { HeapTuple newTuple; MemoryContext oldContext; /* * There might be ways to optimize this when the source is virtual, but * for now just always build a physical copy. Make sure it is in the * right context. */ oldContext = MemoryContextSwitchTo(dstslot->tts_mcxt); newTuple = ExecCopySlotTuple(srcslot); MemoryContextSwitchTo(oldContext); return ExecStoreTuple(newTuple, dstslot, InvalidBuffer, true); }
/* * CopyIntoStream * * COPY events to a stream from an input source */ void CopyIntoStream(Relation rel, TupleDesc desc, HeapTuple *tuples, int ntuples) { bool snap = ActiveSnapshotSet(); ResultRelInfo rinfo; StreamInsertState *sis; MemSet(&rinfo, 0, sizeof(ResultRelInfo)); rinfo.ri_RangeTableIndex = 1; /* dummy */ rinfo.ri_TrigDesc = NULL; rinfo.ri_RelationDesc = rel; if (snap) PopActiveSnapshot(); BeginStreamModify(NULL, &rinfo, list_make1(desc), 0, 0); sis = (StreamInsertState *) rinfo.ri_FdwState; Assert(sis); if (sis->queries) { TupleTableSlot *slot = MakeSingleTupleTableSlot(RelationGetDescr(rel)); int i; for (i = 0; i < ntuples; i++) { ExecStoreTuple(tuples[i], slot, InvalidBuffer, false); ExecStreamInsert(NULL, &rinfo, slot, NULL); ExecClearTuple(slot); } ExecDropSingleTupleTableSlot(slot); Assert(sis->ntups == ntuples); pgstat_increment_cq_write(ntuples, sis->nbytes); } EndStreamModify(NULL, &rinfo); if (snap) PushActiveSnapshot(GetTransactionSnapshot()); }
/* * Read the next tuple. We might fetch a tuple from one of the tuple queues * using gather_readnext, or if no tuple queue contains a tuple and the * single_copy flag is not set, we might generate one locally instead. */ static TupleTableSlot * gather_getnext(GatherState *gatherstate) { PlanState *outerPlan = outerPlanState(gatherstate); TupleTableSlot *outerTupleSlot; TupleTableSlot *fslot = gatherstate->funnel_slot; HeapTuple tup; while (gatherstate->reader != NULL || gatherstate->need_to_scan_locally) { if (gatherstate->reader != NULL) { tup = gather_readnext(gatherstate); if (HeapTupleIsValid(tup)) { ExecStoreTuple(tup, /* tuple to store */ fslot, /* slot in which to store the tuple */ InvalidBuffer, /* buffer associated with this * tuple */ true); /* pfree this pointer if not from heap */ return fslot; } } if (gatherstate->need_to_scan_locally) { outerTupleSlot = ExecProcNode(outerPlan); if (!TupIsNull(outerTupleSlot)) return outerTupleSlot; gatherstate->need_to_scan_locally = false; } } return ExecClearTuple(fslot); }
HeapTuple CheckerConstraints(Checker *checker, HeapTuple tuple, int *parsing_field) { if (checker->has_constraints) { *parsing_field = 0; /* Place tuple in tuple slot */ ExecStoreTuple(tuple, checker->slot, InvalidBuffer, false); /* Check the constraints of the tuple */ ExecConstraints(checker->resultRelInfo, checker->slot, checker->estate); } else if (checker->has_not_null && HeapTupleHasNulls(tuple)) { /* * Even if CHECK_CONSTRAINTS is not specified, check NOT NULL constraint */ TupleDesc desc = checker->desc; int i; for (i = 0; i < desc->natts; i++) { if (desc->attrs[i]->attnotnull && att_isnull(i, tuple->t_data->t_bits)) { *parsing_field = i + 1; /* 1 origin */ ereport(ERROR, (errcode(ERRCODE_NOT_NULL_VIOLATION), errmsg("null value in column \"%s\" violates not-null constraint", NameStr(desc->attrs[i]->attname)))); } } } return tuple; }
/* -------------------------------- * ExecMaterializeSlot * Force a slot into the "materialized" state. * * This causes the slot's tuple to be a local copy not dependent on * any external storage. A pointer to the contained tuple is returned. * * A typical use for this operation is to prepare a computed tuple * for being stored on disk. The original data may or may not be * virtual, but in any case we need a private copy for heap_insert * to scribble on. * -------------------------------- */ HeapTuple ExecMaterializeSlot(TupleTableSlot *slot) { HeapTuple newTuple; MemoryContext oldContext; /* * sanity checks */ Assert(slot != NULL); Assert(!slot->tts_isempty); /* * If we have a physical tuple, and it's locally palloc'd, we have nothing * to do. */ if (slot->tts_tuple && slot->tts_shouldFree) return slot->tts_tuple; /* * Otherwise, copy or build a tuple, and then store it as the new slot * value. (Note: tts_nvalid will be reset to zero here. There are cases * in which this could be optimized but it's probably not worth worrying * about.) * * We may be called in a context that is shorter-lived than the tuple * slot, but we have to ensure that the materialized tuple will survive * anyway. */ oldContext = MemoryContextSwitchTo(slot->tts_mcxt); newTuple = ExecCopySlotTuple(slot); MemoryContextSwitchTo(oldContext); ExecStoreTuple(newTuple, slot, InvalidBuffer, true); return slot->tts_tuple; }
/* * save_consumer_state * * Saves the given consumer's state to pipeline_kafka_consumers */ static void save_consumer_state(KafkaConsumer *consumer, int partition_group) { ScanKeyData skey[1]; HeapTuple tup = NULL; HeapScanDesc scan; Relation offsets = open_pipeline_kafka_offsets(); Datum values[OFFSETS_RELATION_NATTS]; bool nulls[OFFSETS_RELATION_NATTS]; bool replace[OFFSETS_RELATION_NATTS]; bool updated[consumer->num_partitions]; TupleTableSlot *slot = MakeSingleTupleTableSlot(RelationGetDescr(offsets)); int partition; MemSet(updated, false, sizeof(updated)); ScanKeyInit(&skey[0], OFFSETS_ATTR_CONSUMER, BTEqualStrategyNumber, F_OIDEQ, ObjectIdGetDatum(consumer->id)); scan = heap_beginscan(offsets, GetTransactionSnapshot(), 1, skey); /* update any existing offset rows */ while ((tup = heap_getnext(scan, ForwardScanDirection)) != NULL) { Datum d; bool isnull; int partition; HeapTuple modified; ExecStoreTuple(tup, slot, InvalidBuffer, false); d = slot_getattr(slot, OFFSETS_ATTR_PARTITION, &isnull); partition = DatumGetInt32(d); /* we only want to update the offsets we're responsible for */ if (partition % consumer->parallelism != partition_group) continue; MemSet(nulls, false, sizeof(nulls)); MemSet(replace, false, sizeof(nulls)); values[OFFSETS_ATTR_OFFSET - 1] = Int64GetDatum(consumer->offsets[partition]); replace[OFFSETS_ATTR_OFFSET - 1] = true; updated[partition] = true; modified = heap_modify_tuple(tup, RelationGetDescr(offsets), values, nulls, replace); simple_heap_update(offsets, &modified->t_self, modified); } heap_endscan(scan); /* now insert any offset rows that didn't already exist */ for (partition = 0; partition < consumer->num_partitions; partition++) { if (updated[partition]) continue; if (partition % consumer->parallelism != partition_group) continue; values[OFFSETS_ATTR_CONSUMER - 1] = ObjectIdGetDatum(consumer->id); values[OFFSETS_ATTR_PARTITION - 1] = Int32GetDatum(partition); values[OFFSETS_ATTR_OFFSET - 1] = Int64GetDatum(consumer->offsets[partition]); MemSet(nulls, false, sizeof(nulls)); tup = heap_form_tuple(RelationGetDescr(offsets), values, nulls); simple_heap_insert(offsets, tup); } ExecDropSingleTupleTableSlot(slot); heap_close(offsets, NoLock); }
/* ---------------------------------------------------------------- * SeqNext * * This is a workhorse for ExecSeqScan * ---------------------------------------------------------------- */ static TupleTableSlot * SeqNext(SeqScanState *node) { HeapTuple tuple; HeapScanDesc scandesc; Index scanrelid; EState *estate; ScanDirection direction; TupleTableSlot *slot; /* * get information from the estate and scan state */ estate = node->ps.state; scandesc = node->ss_currentScanDesc; scanrelid = ((SeqScan *) node->ps.plan)->scanrelid; direction = estate->es_direction; slot = node->ss_ScanTupleSlot; /* * Check if we are evaluating PlanQual for tuple of this relation. * Additional checking is not good, but no other way for now. We could * introduce new nodes for this case and handle SeqScan --> NewNode * switching in Init/ReScan plan... */ if (estate->es_evTuple != NULL && estate->es_evTuple[scanrelid - 1] != NULL) { if (estate->es_evTupleNull[scanrelid - 1]) return ExecClearTuple(slot); ExecStoreTuple(estate->es_evTuple[scanrelid - 1], slot, InvalidBuffer, false); /* * Note that unlike IndexScan, SeqScan never use keys in * heap_beginscan (and this is very bad) - so, here we do not check * are keys ok or not. */ /* Flag for the next call that no more tuples */ estate->es_evTupleNull[scanrelid - 1] = true; return slot; } /* * get the next tuple from the access methods */ scandesc->estate=estate; scandesc->slot=slot; scandesc->qual=node->ps.qual; tuple = heap_getnext(scandesc, direction); /* * save the tuple and the buffer returned to us by the access methods in * our scan tuple slot and return the slot. Note: we pass 'false' because * tuples returned by heap_getnext() are pointers onto disk pages and were * not created with palloc() and so should not be pfree()'d. Note also * that ExecStoreTuple will increment the refcount of the buffer; the * refcount will not be dropped until the tuple table slot is cleared. */ if (tuple) ExecStoreTuple(tuple, /* tuple to store */ slot, /* slot to store in */ scandesc->rs_cbuf, /* buffer associated with this * tuple */ false); /* don't pfree this pointer */ else ExecClearTuple(slot); return slot; }
/* ---------------------------------------------------------------- * ExecSort * * Sorts tuples from the outer subtree of the node using tuplesort, * which saves the results in a temporary file or memory. After the * initial call, returns a tuple from the file with each call. * * Conditions: * -- none. * * Initial States: * -- the outer child is prepared to return the first tuple. * ---------------------------------------------------------------- */ TupleTableSlot * ExecSort(SortState *node) { EState *estate; ScanDirection dir; Tuplesortstate *tuplesortstate; HeapTuple heapTuple; TupleTableSlot *slot; bool should_free; /* * get state info from node */ SO1_printf("ExecSort: %s\n", "entering routine"); estate = node->ss.ps.state; dir = estate->es_direction; tuplesortstate = (Tuplesortstate *) node->tuplesortstate; /* * If first time through, read all tuples from outer plan and pass them to * tuplesort.c. Subsequent calls just fetch tuples from tuplesort. */ if (!node->sort_Done) { Sort *plannode = (Sort *) node->ss.ps.plan; PlanState *outerNode; TupleDesc tupDesc; SO1_printf("ExecSort: %s\n", "sorting subplan"); /* * Want to scan subplan in the forward direction while creating the * sorted data. */ estate->es_direction = ForwardScanDirection; /* * Initialize tuplesort module. */ SO1_printf("ExecSort: %s\n", "calling tuplesort_begin"); outerNode = outerPlanState(node); tupDesc = ExecGetResultType(outerNode); tuplesortstate = tuplesort_begin_heap(tupDesc, plannode->numCols, plannode->sortOperators, plannode->sortColIdx, work_mem, node->randomAccess); node->tuplesortstate = (void *) tuplesortstate; /* * Scan the subplan and feed all the tuples to tuplesort. */ for (;;) { slot = ExecProcNode(outerNode); if (TupIsNull(slot)) break; tuplesort_puttuple(tuplesortstate, (void *) ExecFetchSlotTuple(slot)); } /* * Complete the sort. */ tuplesort_performsort(tuplesortstate); /* * restore to user specified direction */ estate->es_direction = dir; /* * finally set the sorted flag to true */ node->sort_Done = true; SO1_printf("ExecSort: %s\n", "sorting done"); } SO1_printf("ExecSort: %s\n", "retrieving tuple from tuplesort"); /* * Get the first or next tuple from tuplesort. Returns NULL if no more * tuples. */ heapTuple = tuplesort_getheaptuple(tuplesortstate, ScanDirectionIsForward(dir), &should_free); slot = node->ss.ps.ps_ResultTupleSlot; if (heapTuple) return ExecStoreTuple(heapTuple, slot, InvalidBuffer, should_free); else return ExecClearTuple(slot); }
Datum pipeline_stream_insert(PG_FUNCTION_ARGS) { TriggerData *trigdata = (TriggerData *) fcinfo->context; Trigger *trig = trigdata->tg_trigger; HeapTuple tup; List *fdw_private; int i; ResultRelInfo rinfo; if (trig->tgnargs < 1) elog(ERROR, "pipeline_stream_insert: must be provided a stream name"); /* make sure it's called as a trigger */ if (!CALLED_AS_TRIGGER(fcinfo)) ereport(ERROR, (errcode(ERRCODE_E_R_I_E_TRIGGER_PROTOCOL_VIOLATED), errmsg("pipeline_stream_insert: must be called as trigger"))); /* and that it's called on update or insert */ if (!TRIGGER_FIRED_BY_UPDATE(trigdata->tg_event) && !TRIGGER_FIRED_BY_INSERT(trigdata->tg_event)) ereport(ERROR, (errcode(ERRCODE_E_R_I_E_TRIGGER_PROTOCOL_VIOLATED), errmsg("pipeline_stream_insert: must be called on insert or update"))); /* and that it's called for each row */ if (!TRIGGER_FIRED_FOR_ROW(trigdata->tg_event)) ereport(ERROR, (errcode(ERRCODE_E_R_I_E_TRIGGER_PROTOCOL_VIOLATED), errmsg("pipeline_stream_insert: must be called for each row"))); /* and that it's called after insert or update */ if (!TRIGGER_FIRED_AFTER(trigdata->tg_event)) ereport(ERROR, (errcode(ERRCODE_E_R_I_E_TRIGGER_PROTOCOL_VIOLATED), errmsg("pipeline_stream_insert: must be called after insert or update"))); if (TRIGGER_FIRED_BY_UPDATE(trigdata->tg_event)) tup = trigdata->tg_newtuple; else tup = trigdata->tg_trigtuple; fdw_private = list_make1(RelationGetDescr(trigdata->tg_relation)); MemSet(&rinfo, 0, sizeof(ResultRelInfo)); rinfo.ri_RangeTableIndex = 1; /* dummy */ rinfo.ri_TrigDesc = NULL; for (i = 0; i < trig->tgnargs; i++) { RangeVar *stream; Relation rel; StreamInsertState *sis; stream = makeRangeVarFromNameList(textToQualifiedNameList(cstring_to_text(trig->tgargs[i]))); rel = heap_openrv(stream, AccessShareLock); rinfo.ri_RelationDesc = rel; BeginStreamModify(NULL, &rinfo, fdw_private, 0, 0); sis = (StreamInsertState *) rinfo.ri_FdwState; Assert(sis); if (sis->queries) { TupleTableSlot *slot = MakeSingleTupleTableSlot(RelationGetDescr(rel)); ExecStoreTuple(tup, slot, InvalidBuffer, false); ExecStreamInsert(NULL, &rinfo, slot, NULL); ExecClearTuple(slot); ExecDropSingleTupleTableSlot(slot); pgstat_report_streamstat(true); } EndStreamModify(NULL, &rinfo); heap_close(rel, AccessShareLock); } return PointerGetDatum(tup); }
/* * Search the relation 'rel' for tuple using the index. * * If a matching tuple is found, lock it with lockmode, fill the slot with its * contents, and return true. Return false otherwise. */ bool RelationFindReplTupleByIndex(Relation rel, Oid idxoid, LockTupleMode lockmode, TupleTableSlot *searchslot, TupleTableSlot *outslot) { HeapTuple scantuple; ScanKeyData skey[INDEX_MAX_KEYS]; IndexScanDesc scan; SnapshotData snap; TransactionId xwait; Relation idxrel; bool found; /* Open the index. */ idxrel = index_open(idxoid, RowExclusiveLock); /* Start an index scan. */ InitDirtySnapshot(snap); scan = index_beginscan(rel, idxrel, &snap, RelationGetNumberOfAttributes(idxrel), 0); /* Build scan key. */ build_replindex_scan_key(skey, rel, idxrel, searchslot); retry: found = false; index_rescan(scan, skey, RelationGetNumberOfAttributes(idxrel), NULL, 0); /* Try to find the tuple */ if ((scantuple = index_getnext(scan, ForwardScanDirection)) != NULL) { found = true; ExecStoreTuple(scantuple, outslot, InvalidBuffer, false); ExecMaterializeSlot(outslot); xwait = TransactionIdIsValid(snap.xmin) ? snap.xmin : snap.xmax; /* * If the tuple is locked, wait for locking transaction to finish and * retry. */ if (TransactionIdIsValid(xwait)) { XactLockTableWait(xwait, NULL, NULL, XLTW_None); goto retry; } } /* Found tuple, try to lock it in the lockmode. */ if (found) { Buffer buf; HeapUpdateFailureData hufd; HTSU_Result res; HeapTupleData locktup; ItemPointerCopy(&outslot->tts_tuple->t_self, &locktup.t_self); PushActiveSnapshot(GetLatestSnapshot()); res = heap_lock_tuple(rel, &locktup, GetCurrentCommandId(false), lockmode, LockWaitBlock, false /* don't follow updates */ , &buf, &hufd); /* the tuple slot already has the buffer pinned */ ReleaseBuffer(buf); PopActiveSnapshot(); switch (res) { case HeapTupleMayBeUpdated: break; case HeapTupleUpdated: /* XXX: Improve handling here */ ereport(LOG, (errcode(ERRCODE_T_R_SERIALIZATION_FAILURE), errmsg("concurrent update, retrying"))); goto retry; case HeapTupleInvisible: elog(ERROR, "attempted to lock invisible tuple"); default: elog(ERROR, "unexpected heap_lock_tuple status: %u", res); break; } } index_endscan(scan); /* Don't release lock until commit. */ index_close(idxrel, NoLock); return found; }
/* * Handle UPDATE message. * * TODO: FDW support */ static void apply_handle_update(StringInfo s) { LogicalRepRelMapEntry *rel; LogicalRepRelId relid; Oid idxoid; EState *estate; EPQState epqstate; LogicalRepTupleData oldtup; LogicalRepTupleData newtup; bool has_oldtup; TupleTableSlot *localslot; TupleTableSlot *remoteslot; bool found; MemoryContext oldctx; ensure_transaction(); relid = logicalrep_read_update(s, &has_oldtup, &oldtup, &newtup); rel = logicalrep_rel_open(relid, RowExclusiveLock); if (!should_apply_changes_for_rel(rel)) { /* * The relation can't become interesting in the middle of the * transaction so it's safe to unlock it. */ logicalrep_rel_close(rel, RowExclusiveLock); return; } /* Check if we can do the update. */ check_relation_updatable(rel); /* Initialize the executor state. */ estate = create_estate_for_relation(rel); remoteslot = ExecInitExtraTupleSlot(estate, RelationGetDescr(rel->localrel)); localslot = ExecInitExtraTupleSlot(estate, RelationGetDescr(rel->localrel)); EvalPlanQualInit(&epqstate, estate, NULL, NIL, -1); PushActiveSnapshot(GetTransactionSnapshot()); ExecOpenIndices(estate->es_result_relation_info, false); /* Build the search tuple. */ oldctx = MemoryContextSwitchTo(GetPerTupleMemoryContext(estate)); slot_store_cstrings(remoteslot, rel, has_oldtup ? oldtup.values : newtup.values); MemoryContextSwitchTo(oldctx); /* * Try to find tuple using either replica identity index, primary key or * if needed, sequential scan. */ idxoid = GetRelationIdentityOrPK(rel->localrel); Assert(OidIsValid(idxoid) || (rel->remoterel.replident == REPLICA_IDENTITY_FULL && has_oldtup)); if (OidIsValid(idxoid)) found = RelationFindReplTupleByIndex(rel->localrel, idxoid, LockTupleExclusive, remoteslot, localslot); else found = RelationFindReplTupleSeq(rel->localrel, LockTupleExclusive, remoteslot, localslot); ExecClearTuple(remoteslot); /* * Tuple found. * * Note this will fail if there are other conflicting unique indexes. */ if (found) { /* Process and store remote tuple in the slot */ oldctx = MemoryContextSwitchTo(GetPerTupleMemoryContext(estate)); ExecStoreTuple(localslot->tts_tuple, remoteslot, InvalidBuffer, false); slot_modify_cstrings(remoteslot, rel, newtup.values, newtup.changed); MemoryContextSwitchTo(oldctx); EvalPlanQualSetSlot(&epqstate, remoteslot); /* Do the actual update. */ ExecSimpleRelationUpdate(estate, &epqstate, localslot, remoteslot); } else { /* * The tuple to be updated could not be found. * * TODO what to do here, change the log level to LOG perhaps? */ elog(DEBUG1, "logical replication did not find row for update " "in replication target relation \"%s\"", RelationGetRelationName(rel->localrel)); } /* Cleanup. */ ExecCloseIndices(estate->es_result_relation_info); PopActiveSnapshot(); /* Handle queued AFTER triggers. */ AfterTriggerEndQuery(estate); EvalPlanQualEnd(&epqstate); ExecResetTupleTable(estate->es_tupleTable, false); FreeExecutorState(estate); logicalrep_rel_close(rel, NoLock); CommandCounterIncrement(); }
/* * Search the relation 'rel' for tuple using the sequential scan. * * If a matching tuple is found, lock it with lockmode, fill the slot with its * contents, and return true. Return false otherwise. * * Note that this stops on the first matching tuple. * * This can obviously be quite slow on tables that have more than few rows. */ bool RelationFindReplTupleSeq(Relation rel, LockTupleMode lockmode, TupleTableSlot *searchslot, TupleTableSlot *outslot) { HeapTuple scantuple; HeapScanDesc scan; SnapshotData snap; TransactionId xwait; bool found; TupleDesc desc = RelationGetDescr(rel); Assert(equalTupleDescs(desc, outslot->tts_tupleDescriptor)); /* Start an index scan. */ InitDirtySnapshot(snap); scan = heap_beginscan(rel, &snap, 0, NULL); retry: found = false; heap_rescan(scan, NULL); /* Try to find the tuple */ while ((scantuple = heap_getnext(scan, ForwardScanDirection)) != NULL) { if (!tuple_equals_slot(desc, scantuple, searchslot)) continue; found = true; ExecStoreTuple(scantuple, outslot, InvalidBuffer, false); ExecMaterializeSlot(outslot); xwait = TransactionIdIsValid(snap.xmin) ? snap.xmin : snap.xmax; /* * If the tuple is locked, wait for locking transaction to finish and * retry. */ if (TransactionIdIsValid(xwait)) { XactLockTableWait(xwait, NULL, NULL, XLTW_None); goto retry; } } /* Found tuple, try to lock it in the lockmode. */ if (found) { Buffer buf; HeapUpdateFailureData hufd; HTSU_Result res; HeapTupleData locktup; ItemPointerCopy(&outslot->tts_tuple->t_self, &locktup.t_self); PushActiveSnapshot(GetLatestSnapshot()); res = heap_lock_tuple(rel, &locktup, GetCurrentCommandId(false), lockmode, LockWaitBlock, false /* don't follow updates */ , &buf, &hufd); /* the tuple slot already has the buffer pinned */ ReleaseBuffer(buf); PopActiveSnapshot(); switch (res) { case HeapTupleMayBeUpdated: break; case HeapTupleUpdated: /* XXX: Improve handling here */ ereport(LOG, (errcode(ERRCODE_T_R_SERIALIZATION_FAILURE), errmsg("concurrent update, retrying"))); goto retry; case HeapTupleInvisible: elog(ERROR, "attempted to lock invisible tuple"); default: elog(ERROR, "unexpected heap_lock_tuple status: %u", res); break; } } heap_endscan(scan); return found; }
/* ---------------------------------------------------------------- * ExecProcAppend * * Handles the iteration over the multiple scans. * * NOTE: Can't call this ExecAppend, that name is used in execMain. * ---------------------------------------------------------------- */ TupleTableSlot * ExecProcAppend(AppendState *node) { EState *estate; int whichplan; PlanState *subnode; TupleTableSlot *result; TupleTableSlot *result_slot; ScanDirection direction; /* * get information from the node */ estate = node->ps.state; direction = estate->es_direction; whichplan = node->as_whichplan; result_slot = node->ps.ps_ResultTupleSlot; /* * figure out which subplan we are currently processing */ subnode = node->appendplans[whichplan]; /* * get a tuple from the subplan */ result = ExecProcNode(subnode); if (!TupIsNull(result)) { /* * if the subplan gave us something then place a copy of whatever * we get into our result slot and return it. * * Note we rely on the subplan to retain ownership of the tuple for * as long as we need it --- we don't copy it. */ return ExecStoreTuple(result->val, result_slot, InvalidBuffer, false); } else { /* * .. go on to the "next" subplan in the appropriate direction and * try processing again (recursively) */ if (ScanDirectionIsForward(direction)) node->as_whichplan++; else node->as_whichplan--; /* * return something from next node or an empty slot if all of our * subplans have been exhausted. */ if (exec_append_initialize_next(node)) { ExecSetSlotDescriptorIsNew(result_slot, true); return ExecProcAppend(node); } else return ExecClearTuple(result_slot); } }
/* ---------------------------------------------------------------- * BitmapHeapNext * * Retrieve next tuple from the BitmapHeapScan node's currentRelation * ---------------------------------------------------------------- */ static TupleTableSlot * BitmapHeapNext(BitmapHeapScanState *node) { EState *estate; ExprContext *econtext; HeapScanDesc scan; Index scanrelid; TIDBitmap *tbm; TBMIterateResult *tbmres; OffsetNumber targoffset; TupleTableSlot *slot; /* * extract necessary information from index scan node */ estate = node->ss.ps.state; econtext = node->ss.ps.ps_ExprContext; slot = node->ss.ss_ScanTupleSlot; scan = node->ss.ss_currentScanDesc; scanrelid = ((BitmapHeapScan *) node->ss.ps.plan)->scan.scanrelid; tbm = node->tbm; tbmres = node->tbmres; /* * Check if we are evaluating PlanQual for tuple of this relation. * Additional checking is not good, but no other way for now. We could * introduce new nodes for this case and handle IndexScan --> NewNode * switching in Init/ReScan plan... */ if (estate->es_evTuple != NULL && estate->es_evTuple[scanrelid - 1] != NULL) { if (estate->es_evTupleNull[scanrelid - 1]) return ExecClearTuple(slot); ExecStoreTuple(estate->es_evTuple[scanrelid - 1], slot, InvalidBuffer, false); /* Does the tuple meet the original qual conditions? */ econtext->ecxt_scantuple = slot; ResetExprContext(econtext); if (!ExecQual(node->bitmapqualorig, econtext, false)) ExecClearTuple(slot); /* would not be returned by scan */ /* Flag for the next call that no more tuples */ estate->es_evTupleNull[scanrelid - 1] = true; return slot; } /* * If we haven't yet performed the underlying index scan, do it, and * prepare the bitmap to be iterated over. */ if (tbm == NULL) { tbm = (TIDBitmap *) MultiExecProcNode(outerPlanState(node)); if (!tbm || !IsA(tbm, TIDBitmap)) elog(ERROR, "unrecognized result from subplan"); node->tbm = tbm; node->tbmres = tbmres = NULL; tbm_begin_iterate(tbm); } for (;;) { Page dp; ItemId lp; /* * Get next page of results if needed */ if (tbmres == NULL) { node->tbmres = tbmres = tbm_iterate(tbm); if (tbmres == NULL) { /* no more entries in the bitmap */ break; } /* * Ignore any claimed entries past what we think is the end of the * relation. (This is probably not necessary given that we got at * least AccessShareLock on the table before performing any of the * indexscans, but let's be safe.) */ if (tbmres->blockno >= scan->rs_nblocks) { node->tbmres = tbmres = NULL; continue; } /* * Fetch the current heap page and identify candidate tuples. */ bitgetpage(scan, tbmres); /* * Set rs_cindex to first slot to examine */ scan->rs_cindex = 0; } else { /* * Continuing in previously obtained page; advance rs_cindex */ scan->rs_cindex++; } /* * Out of range? If so, nothing more to look at on this page */ if (scan->rs_cindex < 0 || scan->rs_cindex >= scan->rs_ntuples) { node->tbmres = tbmres = NULL; continue; } /* * Okay to fetch the tuple */ targoffset = scan->rs_vistuples[scan->rs_cindex]; dp = (Page) BufferGetPage(scan->rs_cbuf); lp = PageGetItemId(dp, targoffset); Assert(ItemIdIsNormal(lp)); scan->rs_ctup.t_data = (HeapTupleHeader) PageGetItem((Page) dp, lp); scan->rs_ctup.t_len = ItemIdGetLength(lp); ItemPointerSet(&scan->rs_ctup.t_self, tbmres->blockno, targoffset); pgstat_count_heap_fetch(scan->rs_rd); /* * Set up the result slot to point to this tuple. Note that the slot * acquires a pin on the buffer. */ ExecStoreTuple(&scan->rs_ctup, slot, scan->rs_cbuf, false); /* * If we are using lossy info, we have to recheck the qual conditions * at every tuple. */ if (tbmres->ntuples < 0) { econtext->ecxt_scantuple = slot; ResetExprContext(econtext); if (!ExecQual(node->bitmapqualorig, econtext, false)) { /* Fails recheck, so drop it and loop back for another */ ExecClearTuple(slot); continue; } } /* OK to return this tuple */ return slot; } /* * if we get here it means we are at the end of the scan.. */ return ExecClearTuple(slot); }
/* * ExecScanFetch -- fetch next potential tuple * * This routine is concerned with substituting a test tuple if we are * inside an EvalPlanQual recheck. If we aren't, just execute * the access method's next-tuple routine. */ static inline TupleTableSlot * ExecScanFetch(ScanState *node, ExecScanAccessMtd accessMtd, ExecScanRecheckMtd recheckMtd) { EState *estate = node->ps.state; if (estate->es_epqTuple != NULL) { /* * We are inside an EvalPlanQual recheck. Return the test tuple if * one is available, after rechecking any access-method-specific * conditions. */ Index scanrelid = ((Scan *) node->ps.plan)->scanrelid; if (scanrelid == 0) { TupleTableSlot *slot = node->ss_ScanTupleSlot; /* * This is a ForeignScan or CustomScan which has pushed down a * join to the remote side. The recheck method is responsible not * only for rechecking the scan/join quals but also for storing * the correct tuple in the slot. */ if (!(*recheckMtd) (node, slot)) ExecClearTuple(slot); /* would not be returned by scan */ return slot; } else if (estate->es_epqTupleSet[scanrelid - 1]) { TupleTableSlot *slot = node->ss_ScanTupleSlot; /* Return empty slot if we already returned a tuple */ if (estate->es_epqScanDone[scanrelid - 1]) return ExecClearTuple(slot); /* Else mark to remember that we shouldn't return more */ estate->es_epqScanDone[scanrelid - 1] = true; /* Return empty slot if we haven't got a test tuple */ if (estate->es_epqTuple[scanrelid - 1] == NULL) return ExecClearTuple(slot); /* Store test tuple in the plan node's scan slot */ ExecStoreTuple(estate->es_epqTuple[scanrelid - 1], slot, InvalidBuffer, false); /* Check if it meets the access-method conditions */ if (!(*recheckMtd) (node, slot)) ExecClearTuple(slot); /* would not be returned by scan */ return slot; } } /* * Run the node-type-specific access method function to get the next tuple */ return (*accessMtd) (node); }
/* * ExecSetOp for non-hashed case */ static TupleTableSlot * setop_retrieve_direct(SetOpState *setopstate) { SetOp *node = (SetOp *) setopstate->ps.plan; PlanState *outerPlan; SetOpStatePerGroup pergroup; TupleTableSlot *outerslot; TupleTableSlot *resultTupleSlot; /* * get state info from node */ outerPlan = outerPlanState(setopstate); pergroup = setopstate->pergroup; resultTupleSlot = setopstate->ps.ps_ResultTupleSlot; /* * We loop retrieving groups until we find one we should return */ while (!setopstate->setop_done) { /* * If we don't already have the first tuple of the new group, fetch it * from the outer plan. */ if (setopstate->grp_firstTuple == NULL) { outerslot = ExecProcNode(outerPlan); if (!TupIsNull(outerslot)) { /* Make a copy of the first input tuple */ setopstate->grp_firstTuple = ExecCopySlotTuple(outerslot); } else { /* outer plan produced no tuples at all */ setopstate->setop_done = true; return NULL; } } /* * Store the copied first input tuple in the tuple table slot reserved * for it. The tuple will be deleted when it is cleared from the * slot. */ ExecStoreTuple(setopstate->grp_firstTuple, resultTupleSlot, InvalidBuffer, true); setopstate->grp_firstTuple = NULL; /* don't keep two pointers */ /* Initialize working state for a new input tuple group */ initialize_counts(pergroup); /* Count the first input tuple */ advance_counts(pergroup, fetch_tuple_flag(setopstate, resultTupleSlot)); /* * Scan the outer plan until we exhaust it or cross a group boundary. */ for (;;) { outerslot = ExecProcNode(outerPlan); if (TupIsNull(outerslot)) { /* no more outer-plan tuples available */ setopstate->setop_done = true; break; } /* * Check whether we've crossed a group boundary. */ if (!execTuplesMatch(resultTupleSlot, outerslot, node->numCols, node->dupColIdx, setopstate->eqfunctions, setopstate->tempContext)) { /* * Save the first input tuple of the next group. */ setopstate->grp_firstTuple = ExecCopySlotTuple(outerslot); break; } /* Still in same group, so count this tuple */ advance_counts(pergroup, fetch_tuple_flag(setopstate, outerslot)); } /* * Done scanning input tuple group. See if we should emit any copies * of result tuple, and if so return the first copy. */ set_output_count(setopstate, pergroup); if (setopstate->numOutput > 0) { setopstate->numOutput--; return resultTupleSlot; } } /* No more groups */ ExecClearTuple(resultTupleSlot); return NULL; }
/* * load_consumer_state * * Read consumer state from pipeline_kafka_consumers into the given struct */ static void load_consumer_state(Oid worker_id, KafkaConsumer *consumer) { ScanKeyData skey[1]; HeapTuple tup = NULL; HeapScanDesc scan; Relation consumers = open_pipeline_kafka_consumers(); TupleTableSlot *slot = MakeSingleTupleTableSlot(RelationGetDescr(consumers)); Datum d; bool isnull; text *qualified; MemoryContext old; MemSet(consumer, 0, sizeof(KafkaConsumer)); ScanKeyInit(&skey[0], -2, BTEqualStrategyNumber, F_OIDEQ, ObjectIdGetDatum(worker_id)); scan = heap_beginscan(consumers, GetTransactionSnapshot(), 1, skey); tup = heap_getnext(scan, ForwardScanDirection); if (!HeapTupleIsValid(tup)) elog(ERROR, "kafka consumer %d not found", worker_id); ExecStoreTuple(tup, slot, InvalidBuffer, false); consumer->id = HeapTupleGetOid(tup); d = slot_getattr(slot, CONSUMER_ATTR_RELATION, &isnull); /* we don't want anything that's palloc'd to get freed when we commit */ old = MemoryContextSwitchTo(CacheMemoryContext); /* target relation */ qualified = (text *) DatumGetPointer(d); consumer->rel = makeRangeVarFromNameList(textToQualifiedNameList(qualified)); /* topic */ d = slot_getattr(slot, CONSUMER_ATTR_TOPIC, &isnull); consumer->topic = TextDatumGetCString(d); /* format */ d = slot_getattr(slot, CONSUMER_ATTR_FORMAT, &isnull); consumer->format = TextDatumGetCString(d); /* delimiter */ d = slot_getattr(slot, CONSUMER_ATTR_DELIMITER, &isnull); if (!isnull) consumer->delimiter = TextDatumGetCString(d); else consumer->delimiter = NULL; /* quote character */ d = slot_getattr(slot, CONSUMER_ATTR_QUOTE, &isnull); if (!isnull) consumer->quote = TextDatumGetCString(d); else consumer->quote = NULL; /* escape character */ d = slot_getattr(slot, CONSUMER_ATTR_ESCAPE, &isnull); if (!isnull) consumer->escape = TextDatumGetCString(d); else consumer->escape = NULL; /* now load all brokers */ consumer->brokers = get_all_brokers(); MemoryContextSwitchTo(old); d = slot_getattr(slot, CONSUMER_ATTR_PARALLELISM, &isnull); consumer->parallelism = DatumGetInt32(d); /* batch size */ d = slot_getattr(slot, CONSUMER_ATTR_BATCH_SIZE, &isnull); consumer->batch_size = DatumGetInt32(d); ExecDropSingleTupleTableSlot(slot); heap_endscan(scan); heap_close(consumers, NoLock); }
/* * unique_key_recheck - trigger function to do a deferred uniqueness check. * * This now also does deferred exclusion-constraint checks, so the name is * somewhat historical. * * This is invoked as an AFTER ROW trigger for both INSERT and UPDATE, * for any rows recorded as potentially violating a deferrable unique * or exclusion constraint. * * This may be an end-of-statement check, a commit-time check, or a * check triggered by a SET CONSTRAINTS command. */ Datum unique_key_recheck(PG_FUNCTION_ARGS) { TriggerData *trigdata = (TriggerData *) fcinfo->context; const char *funcname = "unique_key_recheck"; HeapTuple new_row; ItemPointerData tmptid; Relation indexRel; IndexInfo *indexInfo; EState *estate; ExprContext *econtext; TupleTableSlot *slot; Datum values[INDEX_MAX_KEYS]; bool isnull[INDEX_MAX_KEYS]; /* * Make sure this is being called as an AFTER ROW trigger. Note: * translatable error strings are shared with ri_triggers.c, so resist the * temptation to fold the function name into them. */ if (!CALLED_AS_TRIGGER(fcinfo)) ereport(ERROR, (errcode(ERRCODE_E_R_I_E_TRIGGER_PROTOCOL_VIOLATED), errmsg("function \"%s\" was not called by trigger manager", funcname))); if (!TRIGGER_FIRED_AFTER(trigdata->tg_event) || !TRIGGER_FIRED_FOR_ROW(trigdata->tg_event)) ereport(ERROR, (errcode(ERRCODE_E_R_I_E_TRIGGER_PROTOCOL_VIOLATED), errmsg("function \"%s\" must be fired AFTER ROW", funcname))); /* * Get the new data that was inserted/updated. */ if (TRIGGER_FIRED_BY_INSERT(trigdata->tg_event)) new_row = trigdata->tg_trigtuple; else if (TRIGGER_FIRED_BY_UPDATE(trigdata->tg_event)) new_row = trigdata->tg_newtuple; else { ereport(ERROR, (errcode(ERRCODE_E_R_I_E_TRIGGER_PROTOCOL_VIOLATED), errmsg("function \"%s\" must be fired for INSERT or UPDATE", funcname))); new_row = NULL; /* keep compiler quiet */ } /* * If the new_row is now dead (ie, inserted and then deleted within our * transaction), we can skip the check. However, we have to be careful, * because this trigger gets queued only in response to index insertions; * which means it does not get queued for HOT updates. The row we are * called for might now be dead, but have a live HOT child, in which case * we still need to make the check. Therefore we have to use * heap_hot_search, not just HeapTupleSatisfiesVisibility as is done in * the comparable test in RI_FKey_check. * * This might look like just an optimization, because the index AM will * make this identical test before throwing an error. But it's actually * needed for correctness, because the index AM will also throw an error * if it doesn't find the index entry for the row. If the row's dead then * it's possible the index entry has also been marked dead, and even * removed. */ tmptid = new_row->t_self; if (!heap_hot_search(&tmptid, trigdata->tg_relation, SnapshotSelf, NULL)) { /* * All rows in the HOT chain are dead, so skip the check. */ return PointerGetDatum(NULL); } /* * Open the index, acquiring a RowExclusiveLock, just as if we were going * to update it. (This protects against possible changes of the index * schema, not against concurrent updates.) */ indexRel = index_open(trigdata->tg_trigger->tgconstrindid, RowExclusiveLock); indexInfo = BuildIndexInfo(indexRel); /* * The heap tuple must be put into a slot for FormIndexDatum. */ slot = MakeSingleTupleTableSlot(RelationGetDescr(trigdata->tg_relation)); ExecStoreTuple(new_row, slot, InvalidBuffer, false); /* * Typically the index won't have expressions, but if it does we need an * EState to evaluate them. We need it for exclusion constraints too, * even if they are just on simple columns. */ if (indexInfo->ii_Expressions != NIL || indexInfo->ii_ExclusionOps != NULL) { estate = CreateExecutorState(); econtext = GetPerTupleExprContext(estate); econtext->ecxt_scantuple = slot; } else estate = NULL; /* * Form the index values and isnull flags for the index entry that we need * to check. * * Note: if the index uses functions that are not as immutable as they are * supposed to be, this could produce an index tuple different from the * original. The index AM can catch such errors by verifying that it * finds a matching index entry with the tuple's TID. For exclusion * constraints we check this in check_exclusion_constraint(). */ FormIndexDatum(indexInfo, slot, estate, values, isnull); /* * Now do the appropriate check. */ if (indexInfo->ii_ExclusionOps == NULL) { /* * Note: this is not a real insert; it is a check that the index entry * that has already been inserted is unique. */ index_insert(indexRel, values, isnull, &(new_row->t_self), trigdata->tg_relation, UNIQUE_CHECK_EXISTING); } else { /* * For exclusion constraints we just do the normal check, but now it's * okay to throw error. */ check_exclusion_constraint(trigdata->tg_relation, indexRel, indexInfo, &(new_row->t_self), values, isnull, estate, false, false); } /* * If that worked, then this index entry is unique or non-excluded, and we * are done. */ if (estate != NULL) FreeExecutorState(estate); ExecDropSingleTupleTableSlot(slot); index_close(indexRel, RowExclusiveLock); return PointerGetDatum(NULL); }
/* ---------------------------------------------------------------- * ExecMaterial * * As long as we are at the end of the data collected in the tuplestore, * we collect one new row from the subplan on each call, and stash it * aside in the tuplestore before returning it. The tuplestore is * only read if we are asked to scan backwards, rescan, or mark/restore. * * ---------------------------------------------------------------- */ TupleTableSlot * /* result tuple from subplan */ ExecMaterial(MaterialState *node) { EState *estate; ScanDirection dir; bool forward; Tuplestorestate *tuplestorestate; HeapTuple heapTuple = NULL; bool should_free = false; bool eof_tuplestore; TupleTableSlot *slot; /* * get state info from node */ estate = node->ss.ps.state; dir = estate->es_direction; forward = ScanDirectionIsForward(dir); tuplestorestate = (Tuplestorestate *) node->tuplestorestate; /* * If first time through, and we need a tuplestore, initialize it. */ if (tuplestorestate == NULL && node->randomAccess) { tuplestorestate = tuplestore_begin_heap(true, false, work_mem); node->tuplestorestate = (void *) tuplestorestate; } /* * If we are not at the end of the tuplestore, or are going backwards, try * to fetch a tuple from tuplestore. */ eof_tuplestore = (tuplestorestate == NULL) || tuplestore_ateof(tuplestorestate); if (!forward && eof_tuplestore) { if (!node->eof_underlying) { /* * When reversing direction at tuplestore EOF, the first * getheaptuple call will fetch the last-added tuple; but we want * to return the one before that, if possible. So do an extra * fetch. */ heapTuple = tuplestore_getheaptuple(tuplestorestate, forward, &should_free); if (heapTuple == NULL) return NULL; /* the tuplestore must be empty */ if (should_free) heap_freetuple(heapTuple); } eof_tuplestore = false; } if (!eof_tuplestore) { heapTuple = tuplestore_getheaptuple(tuplestorestate, forward, &should_free); if (heapTuple == NULL && forward) eof_tuplestore = true; } /* * If necessary, try to fetch another row from the subplan. * * Note: the eof_underlying state variable exists to short-circuit further * subplan calls. It's not optional, unfortunately, because some plan * node types are not robust about being called again when they've already * returned NULL. */ if (eof_tuplestore && !node->eof_underlying) { PlanState *outerNode; TupleTableSlot *outerslot; /* * We can only get here with forward==true, so no need to worry about * which direction the subplan will go. */ outerNode = outerPlanState(node); outerslot = ExecProcNode(outerNode); if (TupIsNull(outerslot)) { node->eof_underlying = true; return NULL; } heapTuple = ExecFetchSlotTuple(outerslot); should_free = false; /* * Append returned tuple to tuplestore, too. NOTE: because the * tuplestore is certainly in EOF state, its read position will move * forward over the added tuple. This is what we want. */ if (tuplestorestate) tuplestore_puttuple(tuplestorestate, (void *) heapTuple); } /* * Return the obtained tuple, if any. */ slot = (TupleTableSlot *) node->ss.ps.ps_ResultTupleSlot; if (heapTuple) return ExecStoreTuple(heapTuple, slot, InvalidBuffer, should_free); else return ExecClearTuple(slot); }
/* * CatalogIndexInsert - insert index entries for one catalog tuple * * This should be called for each inserted or updated catalog tuple. * * This is effectively a cut-down version of ExecInsertIndexTuples. */ void CatalogIndexInsert(CatalogIndexState indstate, HeapTuple heapTuple) { int i; int numIndexes; RelationPtr relationDescs; Relation heapRelation; TupleTableSlot *slot; IndexInfo **indexInfoArray; Datum values[INDEX_MAX_KEYS]; bool isnull[INDEX_MAX_KEYS]; /* HOT update does not require index inserts */ if (HeapTupleIsHeapOnly(heapTuple)) return; /* * Get information from the state structure. Fall out if nothing to do. */ numIndexes = indstate->ri_NumIndices; if (numIndexes == 0) return; relationDescs = indstate->ri_IndexRelationDescs; indexInfoArray = indstate->ri_IndexRelationInfo; heapRelation = indstate->ri_RelationDesc; /* Need a slot to hold the tuple being examined */ slot = MakeSingleTupleTableSlot(RelationGetDescr(heapRelation)); ExecStoreTuple(heapTuple, slot, InvalidBuffer, false); /* * for each index, form and insert the index tuple */ for (i = 0; i < numIndexes; i++) { IndexInfo *indexInfo; indexInfo = indexInfoArray[i]; /* If the index is marked as read-only, ignore it */ if (!indexInfo->ii_ReadyForInserts) continue; /* * Expressional and partial indexes on system catalogs are not * supported, nor exclusion constraints, nor deferred uniqueness */ Assert(indexInfo->ii_Expressions == NIL); Assert(indexInfo->ii_Predicate == NIL); Assert(indexInfo->ii_ExclusionOps == NULL); Assert(relationDescs[i]->rd_index->indimmediate); /* * FormIndexDatum fills in its values and isnull parameters with the * appropriate values for the column(s) of the index. */ FormIndexDatum(indexInfo, slot, NULL, /* no expression eval to do */ values, isnull); /* * The index AM does the rest. */ index_insert(relationDescs[i], /* index relation */ values, /* array of index Datums */ isnull, /* is-null flags */ &(heapTuple->t_self), /* tid of heap tuple */ heapRelation, relationDescs[i]->rd_index->indisunique ? UNIQUE_CHECK_YES : UNIQUE_CHECK_NO); } ExecDropSingleTupleTableSlot(slot); }
/* ---------------------------------------------------------------- * IndexNext * * Retrieve a tuple from the IndexScan node's currentRelation * using the index specified in the IndexScanState information. * ---------------------------------------------------------------- */ static TupleTableSlot * IndexNext(IndexScanState *node) { EState *estate; ExprContext *econtext; ScanDirection direction; IndexScanDesc scandesc; HeapTuple tuple; TupleTableSlot *slot; /* * extract necessary information from index scan node */ estate = node->ss.ps.state; direction = estate->es_direction; /* flip direction if this is an overall backward scan */ if (ScanDirectionIsBackward(((IndexScan *) node->ss.ps.plan)->indexorderdir)) { if (ScanDirectionIsForward(direction)) direction = BackwardScanDirection; else if (ScanDirectionIsBackward(direction)) direction = ForwardScanDirection; } scandesc = node->iss_ScanDesc; econtext = node->ss.ps.ps_ExprContext; slot = node->ss.ss_ScanTupleSlot; /* * ok, now that we have what we need, fetch the next tuple. */ while ((tuple = index_getnext(scandesc, direction)) != NULL) { /* * Store the scanned tuple in the scan tuple slot of the scan state. * Note: we pass 'false' because tuples returned by amgetnext are * pointers onto disk pages and must not be pfree()'d. */ ExecStoreTuple(tuple, /* tuple to store */ slot, /* slot to store in */ scandesc->xs_cbuf, /* buffer containing tuple */ false); /* don't pfree */ /* * If the index was lossy, we have to recheck the index quals using * the real tuple. */ if (scandesc->xs_recheck) { econtext->ecxt_scantuple = slot; ResetExprContext(econtext); if (!ExecQual(node->indexqualorig, econtext, false)) continue; /* nope, so ask index for another one */ } return slot; } /* * if we get here it means the index scan failed so we are at the end of * the scan.. */ return ExecClearTuple(slot); }
/* ---------------------------------------------------------------- * BitmapHeapNext * * Retrieve next tuple from the BitmapHeapScan node's currentRelation * ---------------------------------------------------------------- */ static TupleTableSlot * BitmapHeapNext(BitmapHeapScanState *node) { ExprContext *econtext; HeapScanDesc scan; TIDBitmap *tbm; TBMIterator *tbmiterator; TBMIterateResult *tbmres; TBMIterator *prefetch_iterator; OffsetNumber targoffset; TupleTableSlot *slot; /* * extract necessary information from index scan node */ econtext = node->ss.ps.ps_ExprContext; slot = node->ss.ss_ScanTupleSlot; scan = node->ss.ss_currentScanDesc; tbm = node->tbm; tbmiterator = node->tbmiterator; tbmres = node->tbmres; prefetch_iterator = node->prefetch_iterator; /* * If we haven't yet performed the underlying index scan, do it, and begin * the iteration over the bitmap. * * For prefetching, we use *two* iterators, one for the pages we are * actually scanning and another that runs ahead of the first for * prefetching. node->prefetch_pages tracks exactly how many pages ahead * the prefetch iterator is. Also, node->prefetch_target tracks the * desired prefetch distance, which starts small and increases up to the * GUC-controlled maximum, target_prefetch_pages. This is to avoid doing * a lot of prefetching in a scan that stops after a few tuples because of * a LIMIT. */ if (tbm == NULL) { tbm = (TIDBitmap *) MultiExecProcNode(outerPlanState(node)); if (!tbm || !IsA(tbm, TIDBitmap)) elog(ERROR, "unrecognized result from subplan"); node->tbm = tbm; node->tbmiterator = tbmiterator = tbm_begin_iterate(tbm); node->tbmres = tbmres = NULL; #ifdef USE_PREFETCH if (target_prefetch_pages > 0) { node->prefetch_iterator = prefetch_iterator = tbm_begin_iterate(tbm); node->prefetch_pages = 0; node->prefetch_target = -1; } #endif /* USE_PREFETCH */ } for (;;) { Page dp; ItemId lp; /* * Get next page of results if needed */ if (tbmres == NULL) { node->tbmres = tbmres = tbm_iterate(tbmiterator); if (tbmres == NULL) { /* no more entries in the bitmap */ break; } #ifdef USE_PREFETCH if (node->prefetch_pages > 0) { /* The main iterator has closed the distance by one page */ node->prefetch_pages--; } else if (prefetch_iterator) { /* Do not let the prefetch iterator get behind the main one */ TBMIterateResult *tbmpre = tbm_iterate(prefetch_iterator); if (tbmpre == NULL || tbmpre->blockno != tbmres->blockno) elog(ERROR, "prefetch and main iterators are out of sync"); } #endif /* USE_PREFETCH */ /* * Ignore any claimed entries past what we think is the end of the * relation. (This is probably not necessary given that we got at * least AccessShareLock on the table before performing any of the * indexscans, but let's be safe.) */ if (tbmres->blockno >= scan->rs_nblocks) { node->tbmres = tbmres = NULL; continue; } /* * Fetch the current heap page and identify candidate tuples. */ bitgetpage(scan, tbmres); /* * Set rs_cindex to first slot to examine */ scan->rs_cindex = 0; #ifdef USE_PREFETCH /* * Increase prefetch target if it's not yet at the max. Note that * we will increase it to zero after fetching the very first * page/tuple, then to one after the second tuple is fetched, then * it doubles as later pages are fetched. */ if (node->prefetch_target >= target_prefetch_pages) /* don't increase any further */ ; else if (node->prefetch_target >= target_prefetch_pages / 2) node->prefetch_target = target_prefetch_pages; else if (node->prefetch_target > 0) node->prefetch_target *= 2; else node->prefetch_target++; #endif /* USE_PREFETCH */ } else { /* * Continuing in previously obtained page; advance rs_cindex */ scan->rs_cindex++; #ifdef USE_PREFETCH /* * Try to prefetch at least a few pages even before we get to the * second page if we don't stop reading after the first tuple. */ if (node->prefetch_target < target_prefetch_pages) node->prefetch_target++; #endif /* USE_PREFETCH */ } /* * Out of range? If so, nothing more to look at on this page */ if (scan->rs_cindex < 0 || scan->rs_cindex >= scan->rs_ntuples) { node->tbmres = tbmres = NULL; continue; } #ifdef USE_PREFETCH /* * We issue prefetch requests *after* fetching the current page to try * to avoid having prefetching interfere with the main I/O. Also, this * should happen only when we have determined there is still something * to do on the current page, else we may uselessly prefetch the same * page we are just about to request for real. */ if (prefetch_iterator) { while (node->prefetch_pages < node->prefetch_target) { TBMIterateResult *tbmpre = tbm_iterate(prefetch_iterator); if (tbmpre == NULL) { /* No more pages to prefetch */ tbm_end_iterate(prefetch_iterator); node->prefetch_iterator = prefetch_iterator = NULL; break; } node->prefetch_pages++; PrefetchBuffer(scan->rs_rd, MAIN_FORKNUM, tbmpre->blockno); } } #endif /* USE_PREFETCH */ /* * Okay to fetch the tuple */ targoffset = scan->rs_vistuples[scan->rs_cindex]; dp = (Page) BufferGetPage(scan->rs_cbuf); lp = PageGetItemId(dp, targoffset); Assert(ItemIdIsNormal(lp)); scan->rs_ctup.t_data = (HeapTupleHeader) PageGetItem((Page) dp, lp); scan->rs_ctup.t_len = ItemIdGetLength(lp); ItemPointerSet(&scan->rs_ctup.t_self, tbmres->blockno, targoffset); pgstat_count_heap_fetch(scan->rs_rd); /* * Set up the result slot to point to this tuple. Note that the slot * acquires a pin on the buffer. */ ExecStoreTuple(&scan->rs_ctup, slot, scan->rs_cbuf, false); /* * If we are using lossy info, we have to recheck the qual conditions * at every tuple. */ if (tbmres->recheck) { econtext->ecxt_scantuple = slot; ResetExprContext(econtext); if (!ExecQual(node->bitmapqualorig, econtext, false)) { /* Fails recheck, so drop it and loop back for another */ ExecClearTuple(slot); continue; } } /* OK to return this tuple */ return slot; } /* * if we get here it means we are at the end of the scan.. */ return ExecClearTuple(slot); }