Datum funny_dup17(PG_FUNCTION_ARGS) { TriggerData *trigdata = (TriggerData *) fcinfo->context; TransactionId *xid; int *level; bool *recursion; Relation rel; TupleDesc tupdesc; HeapTuple tuple; char *query, *fieldval, *fieldtype; char *when; uint64 inserted; int selected = 0; int ret; if (!CALLED_AS_TRIGGER(fcinfo)) elog(ERROR, "funny_dup17: not fired by trigger manager"); tuple = trigdata->tg_trigtuple; rel = trigdata->tg_relation; tupdesc = rel->rd_att; if (TRIGGER_FIRED_BEFORE(trigdata->tg_event)) { xid = &fd17b_xid; level = &fd17b_level; recursion = &fd17b_recursion; when = "BEFORE"; } else { xid = &fd17a_xid; level = &fd17a_level; recursion = &fd17a_recursion; when = "AFTER "; } if (!TransactionIdIsCurrentTransactionId(*xid)) { *xid = GetCurrentTransactionId(); *level = 0; *recursion = true; } if (*level == 17) { *recursion = false; return PointerGetDatum(tuple); } if (!(*recursion)) return PointerGetDatum(tuple); (*level)++; SPI_connect(); fieldval = SPI_getvalue(tuple, tupdesc, 1); fieldtype = SPI_gettype(tupdesc, 1); query = (char *) palloc(100 + NAMEDATALEN * 3 + strlen(fieldval) + strlen(fieldtype)); sprintf(query, "insert into %s select * from %s where %s = '%s'::%s", SPI_getrelname(rel), SPI_getrelname(rel), SPI_fname(tupdesc, 1), fieldval, fieldtype); if ((ret = SPI_exec(query, 0)) < 0) elog(ERROR, "funny_dup17 (fired %s) on level %3d: SPI_exec (insert ...) returned %d", when, *level, ret); inserted = SPI_processed; sprintf(query, "select count (*) from %s where %s = '%s'::%s", SPI_getrelname(rel), SPI_fname(tupdesc, 1), fieldval, fieldtype); if ((ret = SPI_exec(query, 0)) < 0) elog(ERROR, "funny_dup17 (fired %s) on level %3d: SPI_exec (select ...) returned %d", when, *level, ret); if (SPI_processed > 0) { selected = DatumGetInt32(DirectFunctionCall1(int4in, CStringGetDatum(SPI_getvalue( SPI_tuptable->vals[0], SPI_tuptable->tupdesc, 1 )))); } elog(DEBUG4, "funny_dup17 (fired %s) on level %3d: " UINT64_FORMAT "/%d tuples inserted/selected", when, *level, inserted, selected); SPI_finish(); (*level)--; if (*level == 0) *xid = InvalidTransactionId; return PointerGetDatum(tuple); }
/* * ProcSleep -- put a process to sleep * * P() on the semaphore should put us to sleep. The process * semaphore is cleared by default, so the first time we try * to acquire it, we sleep. * * ASSUME: that no one will fiddle with the queue until after * we release the spin lock. * * NOTES: The process queue is now a priority queue for locking. */ int ProcSleep(PROC_QUEUE *queue, SPINLOCK spinlock, int token, int prio, LOCK *lock) { int i; PROC *proc; #ifndef WIN32 /* figure this out later */ struct itimerval timeval, dummy; #endif /* WIN32 */ proc = (PROC *) MAKE_PTR(queue->links.prev); for (i=0;i<queue->size;i++) { if (proc->prio < prio) proc = (PROC *) MAKE_PTR(proc->links.prev); else break; } MyProc->token = token; MyProc->waitLock = lock; /* ------------------- * currently, we only need this for the ProcWakeup routines * ------------------- */ TransactionIdStore((TransactionId) GetCurrentTransactionId(), &MyProc->xid); /* ------------------- * assume that these two operations are atomic (because * of the spinlock). * ------------------- */ SHMQueueInsertTL(&(proc->links),&(MyProc->links)); queue->size++; SpinRelease(spinlock); /* -------------- * Postgres does not have any deadlock detection code and for this * reason we must set a timer to wake up the process in the event of * a deadlock. For now the timer is set for 1 minute and we assume that * any process which sleeps for this amount of time is deadlocked and will * receive a SIGALRM signal. The handler should release the processes * semaphore and abort the current transaction. * * Need to zero out struct to set the interval and the micro seconds fields * to 0. * -------------- */ #ifndef WIN32 memset(&timeval, 0, sizeof(struct itimerval)); timeval.it_value.tv_sec = DEADLOCK_TIMEOUT; if (setitimer(ITIMER_REAL, &timeval, &dummy)) elog(FATAL, "ProcSleep: Unable to set timer for process wakeup"); #endif /* WIN32 */ /* -------------- * if someone wakes us between SpinRelease and IpcSemaphoreLock, * IpcSemaphoreLock will not block. The wakeup is "saved" by * the semaphore implementation. * -------------- */ IpcSemaphoreLock(MyProc->sem.semId, MyProc->sem.semNum, IpcExclusiveLock); /* --------------- * We were awoken before a timeout - now disable the timer * --------------- */ #ifndef WIN32 timeval.it_value.tv_sec = 0; if (setitimer(ITIMER_REAL, &timeval, &dummy)) elog(FATAL, "ProcSleep: Unable to diable timer for process wakeup"); #endif /* WIN32 */ /* ---------------- * We were assumed to be in a critical section when we went * to sleep. * ---------------- */ SpinAcquire(spinlock); return(MyProc->errType); }
static void grab_ExecutorEnd(QueryDesc * queryDesc) { Datum values[10]; bool nulls[10] = {false, false, false, false, false, false, false, false, false, false}; Relation dump_heap; RangeVar *dump_table_rv; HeapTuple tuple; Oid namespaceId; /* lookup schema */ namespaceId = GetSysCacheOid1(NAMESPACENAME, CStringGetDatum(EXTENSION_SCHEMA)); if (OidIsValid(namespaceId)) { /* lookup table */ if (OidIsValid(get_relname_relid(EXTENSION_LOG_TABLE, namespaceId))) { /* get table heap */ dump_table_rv = makeRangeVar(EXTENSION_SCHEMA, EXTENSION_LOG_TABLE, -1); dump_heap = heap_openrv(dump_table_rv, RowExclusiveLock); /* transaction info */ values[0] = Int32GetDatum(GetCurrentTransactionId()); values[1] = Int32GetDatum(GetCurrentCommandId(false)); values[2] = Int32GetDatum(MyProcPid); values[3] = Int32GetDatum(GetUserId()); /* query timing */ if (queryDesc->totaltime != NULL) { InstrEndLoop(queryDesc->totaltime); values[4] = TimestampGetDatum( TimestampTzPlusMilliseconds(GetCurrentTimestamp(), (queryDesc->totaltime->total * -1000.0))); values[5] = Float8GetDatum(queryDesc->totaltime->total); } else { nulls[4] = true; nulls[5] = true; } /* query command type */ values[6] = Int32GetDatum(queryDesc->operation); /* query text */ values[7] = CStringGetDatum( cstring_to_text(queryDesc->sourceText)); /* query params */ if (queryDesc->params != NULL) { int numParams = queryDesc->params->numParams; Oid out_func_oid, ptype; Datum pvalue; bool isvarlena; FmgrInfo *out_functions; bool arr_nulls[numParams]; size_t arr_nelems = (size_t) numParams; Datum *arr_val_elems = palloc(sizeof(Datum) * arr_nelems); Datum *arr_typ_elems = palloc(sizeof(Datum) * arr_nelems); char elem_val_byval, elem_val_align, elem_typ_byval, elem_typ_align; int16 elem_val_len, elem_typ_len; int elem_dims[1], elem_lbs[1]; int paramno; /* init */ out_functions = (FmgrInfo *) palloc( (numParams) * sizeof(FmgrInfo)); get_typlenbyvalalign(TEXTOID, &elem_val_len, &elem_val_byval, &elem_val_align); get_typlenbyvalalign(REGTYPEOID, &elem_typ_len, &elem_typ_byval, &elem_typ_align); elem_dims[0] = arr_nelems; elem_lbs[0] = 1; for (paramno = 0; paramno < numParams; paramno++) { pvalue = queryDesc->params->params[paramno].value; ptype = queryDesc->params->params[paramno].ptype; getTypeOutputInfo(ptype, &out_func_oid, &isvarlena); fmgr_info(out_func_oid, &out_functions[paramno]); arr_typ_elems[paramno] = ptype; arr_nulls[paramno] = true; if (!queryDesc->params->params[paramno].isnull) { arr_nulls[paramno] = false; arr_val_elems[paramno] = PointerGetDatum( cstring_to_text( OutputFunctionCall(&out_functions[paramno], pvalue))); } } values[8] = PointerGetDatum( construct_md_array( arr_val_elems, arr_nulls, 1, elem_dims, elem_lbs, TEXTOID, elem_val_len, elem_val_byval, elem_val_align)); values[9] = PointerGetDatum( construct_array( arr_typ_elems, arr_nelems, REGTYPEOID, elem_typ_len, elem_typ_byval, elem_typ_align)); pfree(out_functions); pfree(arr_val_elems); } else { nulls[8] = true; nulls[9] = true; } /* insert */ tuple = heap_form_tuple(dump_heap->rd_att, values, nulls); simple_heap_insert(dump_heap, tuple); heap_close(dump_heap, RowExclusiveLock); } } if (prev_ExecutorEnd) prev_ExecutorEnd(queryDesc); else standard_ExecutorEnd(queryDesc); }
/* Merge local and global snapshots. * Produce most restricted (conservative) snapshot which treate transaction as in-progress if is is marked as in-progress * either in local, either in global snapshots */ static void DtmMergeWithGlobalSnapshot(Snapshot dst) { int i, j, n; TransactionId xid; Snapshot src = &DtmSnapshot; if (!(TransactionIdIsValid(src->xmin) && TransactionIdIsValid(src->xmax))) { PgGetSnapshotData(dst); return; } GetLocalSnapshot: /* * Check that global and local snapshots are consistent: transactions marked as completed in global snapshot * should be completed locally */ dst = PgGetSnapshotData(dst); for (i = 0; i < dst->xcnt; i++) if (TransactionIdIsInDoubt(dst->xip[i])) goto GetLocalSnapshot; for (xid = dst->xmax; xid < src->xmax; xid++) if (TransactionIdIsInDoubt(xid)) goto GetLocalSnapshot; GetCurrentTransactionId(); DumpSnapshot(dst, "local"); DumpSnapshot(src, "DTM"); if (src->xmax < dst->xmax) dst->xmax = src->xmax; if (src->xmin < dst->xmin) dst->xmin = src->xmin; Assert(src->subxcnt == 0); if (src->xcnt + dst->subxcnt + dst->xcnt <= GetMaxSnapshotXidCount()) { Assert(dst->subxcnt == 0); memcpy(dst->xip + dst->xcnt, src->xip, src->xcnt*sizeof(TransactionId)); n = dst->xcnt + src->xcnt; qsort(dst->xip, n, sizeof(TransactionId), xidComparator); xid = InvalidTransactionId; for (i = 0, j = 0; i < n && dst->xip[i] < dst->xmax; i++) if (dst->xip[i] != xid) dst->xip[j++] = xid = dst->xip[i]; dst->xcnt = j; } else { Assert(src->xcnt + dst->subxcnt + dst->xcnt <= GetMaxSnapshotSubxidCount()); memcpy(dst->subxip + dst->subxcnt, dst->xip, dst->xcnt*sizeof(TransactionId)); memcpy(dst->subxip + dst->subxcnt + dst->xcnt, src->xip, src->xcnt*sizeof(TransactionId)); n = dst->xcnt + dst->subxcnt + src->xcnt; qsort(dst->subxip, n, sizeof(TransactionId), xidComparator); xid = InvalidTransactionId; for (i = 0, j = 0; i < n && dst->subxip[i] < dst->xmax; i++) if (dst->subxip[i] != xid) dst->subxip[j++] = xid = dst->subxip[i]; dst->subxcnt = j; dst->xcnt = 0; } DumpSnapshot(dst, "merged"); }
/* * RecordTransactionCommit */ void RecordTransactionCommit(void) { /* * If we made neither any XLOG entries nor any temp-rel updates, we * can omit recording the transaction commit at all. */ if (MyXactMadeXLogEntry || MyXactMadeTempRelUpdate) { TransactionId xid = GetCurrentTransactionId(); bool madeTCentries; XLogRecPtr recptr; /* Tell bufmgr and smgr to prepare for commit */ BufmgrCommit(); START_CRIT_SECTION(); /* * If our transaction made any transaction-controlled XLOG entries, * we need to lock out checkpoint start between writing our XLOG * record and updating pg_clog. Otherwise it is possible for the * checkpoint to set REDO after the XLOG record but fail to flush the * pg_clog update to disk, leading to loss of the transaction commit * if we crash a little later. Slightly klugy fix for problem * discovered 2004-08-10. * * (If it made no transaction-controlled XLOG entries, its XID * appears nowhere in permanent storage, so no one else will ever care * if it committed; so it doesn't matter if we lose the commit flag.) * * Note we only need a shared lock. */ madeTCentries = (MyLastRecPtr.xrecoff != 0); if (madeTCentries) LWLockAcquire(CheckpointStartLock, LW_SHARED); /* * We only need to log the commit in XLOG if the transaction made * any transaction-controlled XLOG entries. */ if (madeTCentries) { /* Need to emit a commit record */ XLogRecData rdata; xl_xact_commit xlrec; xlrec.xtime = time(NULL); rdata.buffer = InvalidBuffer; rdata.data = (char *) (&xlrec); rdata.len = SizeOfXactCommit; rdata.next = NULL; /* * XXX SHOULD SAVE ARRAY OF RELFILENODE-s TO DROP */ recptr = XLogInsert(RM_XACT_ID, XLOG_XACT_COMMIT, &rdata); } else { /* Just flush through last record written by me */ recptr = ProcLastRecEnd; } /* * We must flush our XLOG entries to disk if we made any XLOG * entries, whether in or out of transaction control. For * example, if we reported a nextval() result to the client, this * ensures that any XLOG record generated by nextval will hit the * disk before we report the transaction committed. */ if (MyXactMadeXLogEntry) { /* * Sleep before flush! So we can flush more than one commit * records per single fsync. (The idea is some other backend * may do the XLogFlush while we're sleeping. This needs work * still, because on most Unixen, the minimum select() delay * is 10msec or more, which is way too long.) * * We do not sleep if enableFsync is not turned on, nor if there * are fewer than CommitSiblings other backends with active * transactions. */ if (CommitDelay > 0 && enableFsync && CountActiveBackends() >= CommitSiblings) { struct timeval delay; delay.tv_sec = 0; delay.tv_usec = CommitDelay; (void) select(0, NULL, NULL, NULL, &delay); } XLogFlush(recptr); } /* * We must mark the transaction committed in clog if its XID * appears either in permanent rels or in local temporary rels. We * test this by seeing if we made transaction-controlled entries * *OR* local-rel tuple updates. Note that if we made only the * latter, we have not emitted an XLOG record for our commit, and * so in the event of a crash the clog update might be lost. This * is okay because no one else will ever care whether we * committed. */ if (MyLastRecPtr.xrecoff != 0 || MyXactMadeTempRelUpdate) TransactionIdCommit(xid); /* Unlock checkpoint lock if we acquired it */ if (madeTCentries) LWLockRelease(CheckpointStartLock); END_CRIT_SECTION(); } /* Break the chain of back-links in the XLOG records I output */ MyLastRecPtr.xrecoff = 0; MyXactMadeXLogEntry = false; MyXactMadeTempRelUpdate = false; /* Show myself as out of the transaction in PGPROC array */ MyProc->logRec.xrecoff = 0; }
/* * RecordTransactionAbort */ static void RecordTransactionAbort(void) { /* * If we made neither any transaction-controlled XLOG entries nor any * temp-rel updates, we can omit recording the transaction abort at * all. No one will ever care that it aborted. */ if (MyLastRecPtr.xrecoff != 0 || MyXactMadeTempRelUpdate) { TransactionId xid = GetCurrentTransactionId(); /* * Catch the scenario where we aborted partway through * RecordTransactionCommit ... */ if (TransactionIdDidCommit(xid)) elog(PANIC, "cannot abort transaction %u, it was already committed", xid); START_CRIT_SECTION(); /* * We only need to log the abort in XLOG if the transaction made * any transaction-controlled XLOG entries. (Otherwise, its XID * appears nowhere in permanent storage, so no one else will ever * care if it committed.) We do not flush XLOG to disk in any * case, since the default assumption after a crash would be that * we aborted, anyway. * For the same reason, we don't need to worry about interlocking * against checkpoint start. */ if (MyLastRecPtr.xrecoff != 0) { XLogRecData rdata; xl_xact_abort xlrec; XLogRecPtr recptr; xlrec.xtime = time(NULL); rdata.buffer = InvalidBuffer; rdata.data = (char *) (&xlrec); rdata.len = SizeOfXactAbort; rdata.next = NULL; /* * SHOULD SAVE ARRAY OF RELFILENODE-s TO DROP */ recptr = XLogInsert(RM_XACT_ID, XLOG_XACT_ABORT, &rdata); } /* * Mark the transaction aborted in clog. This is not absolutely * necessary but we may as well do it while we are here. */ TransactionIdAbort(xid); END_CRIT_SECTION(); } /* Break the chain of back-links in the XLOG records I output */ MyLastRecPtr.xrecoff = 0; MyXactMadeXLogEntry = false; MyXactMadeTempRelUpdate = false; /* Show myself as out of the transaction in PGPROC array */ MyProc->logRec.xrecoff = 0; }
static void saveSequenceUpdate(Oid relid, int64 nextValue, bool iscalled) { Oid insertArgTypes[2] = {NAMEOID, INT4OID}; Oid insertDataArgTypes[1] = {NAMEOID}; void *insertPlan; void *insertDataPlan; Datum insertDatum[2]; Datum insertDataDatum[1]; char nextSequenceText[64]; const char *insertQuery = "INSERT INTO dbmirror_Pending (TableName,Op,XID) VALUES" \ "($1,'s',$2)"; const char *insertDataQuery = "INSERT INTO dbmirror_PendingData(SeqId,IsKey,Data) VALUES " \ "(currval('dbmirror_pending_seqid_seq'),'t',$1)"; if (SPI_connect() < 0) ereport(ERROR, (errcode(ERRCODE_EXTERNAL_ROUTINE_EXCEPTION), errmsg("dbmirror:savesequenceupdate could not connect to SPI"))); insertPlan = SPI_prepare(insertQuery, 2, insertArgTypes); insertDataPlan = SPI_prepare(insertDataQuery, 1, insertDataArgTypes); if (insertPlan == NULL || insertDataPlan == NULL) ereport(ERROR, (errcode(ERRCODE_EXTERNAL_ROUTINE_EXCEPTION), errmsg("dbmirror:savesequenceupdate error creating plan"))); insertDatum[0] = PointerGetDatum(get_rel_name(relid)); insertDatum[1] = Int32GetDatum(GetCurrentTransactionId()); snprintf(nextSequenceText, sizeof(nextSequenceText), INT64_FORMAT ",'%c'", nextValue, iscalled ? 't' : 'f'); /* * note type cheat here: we prepare a C string and then claim it is a * NAME, which the system will coerce to varchar for us. */ insertDataDatum[0] = PointerGetDatum(nextSequenceText); debug_msg2("dbmirror:savesequenceupdate: Setting value as %s", nextSequenceText); debug_msg("dbmirror:About to execute insert query"); if (SPI_execp(insertPlan, insertDatum, NULL, 1) != SPI_OK_INSERT) ereport(ERROR, (errcode(ERRCODE_EXTERNAL_ROUTINE_EXCEPTION), errmsg("error inserting row in dbmirror_Pending"))); if (SPI_execp(insertDataPlan, insertDataDatum, NULL, 1) != SPI_OK_INSERT) ereport(ERROR, (errcode(ERRCODE_EXTERNAL_ROUTINE_EXCEPTION), errmsg("error inserting row in dbmirror_PendingData"))); debug_msg("dbmirror:Insert query finished"); SPI_pfree(insertPlan); SPI_pfree(insertDataPlan); SPI_finish(); }
/***************************************************************************** * Constructs and executes an SQL query to write a record of this tuple change * to the pending table. *****************************************************************************/ int storePending(char *cpTableName, HeapTuple tBeforeTuple, HeapTuple tAfterTuple, TupleDesc tTupDesc, Oid tableOid, char cOp) { char *cpQueryBase = "INSERT INTO dbmirror_pending (TableName,Op,XID) VALUES ($1,$2,$3)"; int iResult = 0; HeapTuple tCurTuple; char nulls[3] = " "; /* Points the current tuple(before or after) */ Datum saPlanData[3]; Oid taPlanArgTypes[4] = {NAMEOID, CHAROID, INT4OID}; void *vpPlan; tCurTuple = tBeforeTuple ? tBeforeTuple : tAfterTuple; vpPlan = SPI_prepare(cpQueryBase, 3, taPlanArgTypes); if (vpPlan == NULL) ereport(ERROR, (errcode(ERRCODE_TRIGGERED_ACTION_EXCEPTION), errmsg("dbmirror:storePending error creating plan"))); saPlanData[0] = PointerGetDatum(cpTableName); saPlanData[1] = CharGetDatum(cOp); saPlanData[2] = Int32GetDatum(GetCurrentTransactionId()); iResult = SPI_execp(vpPlan, saPlanData, nulls, 1); if (iResult < 0) elog(NOTICE, "storedPending fired (%s) returned %d", cpQueryBase, iResult); debug_msg("dbmirror:storePending row successfully stored in pending table"); if (cOp == 'd') { /** * This is a record of a delete operation. * Just store the key data. */ iResult = storeKeyInfo(cpTableName, tBeforeTuple, tTupDesc, tableOid); } else if (cOp == 'i') { /** * An Insert operation. * Store all data */ iResult = storeData(cpTableName, tAfterTuple, tTupDesc, tableOid, TRUE); } else { /* op must be an update. */ iResult = storeKeyInfo(cpTableName, tBeforeTuple, tTupDesc, tableOid); iResult = iResult ? iResult : storeData(cpTableName, tAfterTuple, tTupDesc, tableOid, TRUE); } debug_msg("dbmirror:storePending done storing keyinfo"); return iResult; }
/* * PersistHoldablePortal * * Prepare the specified Portal for access outside of the current * transaction. When this function returns, all future accesses to the * portal must be done via the Tuplestore (not by invoking the * executor). */ void PersistHoldablePortal(Portal portal) { QueryDesc *queryDesc = PortalGetQueryDesc(portal); MemoryContext savePortalContext; MemoryContext saveQueryContext; MemoryContext oldcxt; /* * If we're preserving a holdable portal, we had better be inside the * transaction that originally created it. */ Assert(portal->createXact == GetCurrentTransactionId()); Assert(queryDesc != NULL); Assert(portal->portalReady); Assert(!portal->portalDone); /* * Caller must have created the tuplestore already. */ Assert(portal->holdContext != NULL); Assert(portal->holdStore != NULL); /* * Before closing down the executor, we must copy the tupdesc into * long-term memory, since it was created in executor memory. */ oldcxt = MemoryContextSwitchTo(portal->holdContext); portal->tupDesc = CreateTupleDescCopy(portal->tupDesc); MemoryContextSwitchTo(oldcxt); /* * Check for improper portal use, and mark portal active. */ if (portal->portalActive) ereport(ERROR, (errcode(ERRCODE_OBJECT_IN_USE), errmsg("portal \"%s\" already active", portal->name))); portal->portalActive = true; /* * Set global portal context pointers. */ savePortalContext = PortalContext; PortalContext = PortalGetHeapMemory(portal); saveQueryContext = QueryContext; QueryContext = portal->queryContext; MemoryContextSwitchTo(PortalContext); /* * Rewind the executor: we need to store the entire result set in the * tuplestore, so that subsequent backward FETCHs can be processed. */ ExecutorRewind(queryDesc); /* Change the destination to output to the tuplestore */ queryDesc->dest = CreateDestReceiver(Tuplestore, portal); /* Fetch the result set into the tuplestore */ ExecutorRun(queryDesc, ForwardScanDirection, 0L); (*queryDesc->dest->rDestroy) (queryDesc->dest); queryDesc->dest = NULL; /* * Now shut down the inner executor. */ portal->queryDesc = NULL; /* prevent double shutdown */ ExecutorEnd(queryDesc); /* Mark portal not active */ portal->portalActive = false; PortalContext = savePortalContext; QueryContext = saveQueryContext; /* * Reset the position in the result set: ideally, this could be * implemented by just skipping straight to the tuple # that we need * to be at, but the tuplestore API doesn't support that. So we start * at the beginning of the tuplestore and iterate through it until we * reach where we need to be. FIXME someday? */ MemoryContextSwitchTo(portal->holdContext); if (!portal->atEnd) { long store_pos; if (portal->posOverflow) /* oops, cannot trust portalPos */ ereport(ERROR, (errcode(ERRCODE_OBJECT_NOT_IN_PREREQUISITE_STATE), errmsg("could not reposition held cursor"))); tuplestore_rescan(portal->holdStore); for (store_pos = 0; store_pos < portal->portalPos; store_pos++) { HeapTuple tup; bool should_free; tup = tuplestore_gettuple(portal->holdStore, true, &should_free); if (tup == NULL) elog(ERROR, "unexpected end of tuple stream"); if (should_free) pfree(tup); } } MemoryContextSwitchTo(oldcxt); /* * We can now release any subsidiary memory of the portal's heap * context; we'll never use it again. The executor already dropped * its context, but this will clean up anything that glommed onto the * portal's heap via PortalContext. */ MemoryContextDeleteChildren(PortalGetHeapMemory(portal)); }
/** * @brief Initialize a DirectWriter */ static void DirectWriterInit(DirectWriter *self) { LoadStatus *ls; /* * Set defaults to unspecified parameters. */ if (self->base.max_dup_errors < -1) self->base.max_dup_errors = DEFAULT_MAX_DUP_ERRORS; self->base.rel = heap_open(self->base.relid, AccessExclusiveLock); VerifyTarget(self->base.rel, self->base.max_dup_errors); self->base.desc = RelationGetDescr(self->base.rel); SpoolerOpen(&self->spooler, self->base.rel, false, self->base.on_duplicate, self->base.max_dup_errors, self->base.dup_badfile); self->base.context = GetPerTupleMemoryContext(self->spooler.estate); /* Verify DataDir/pg_bulkload directory */ ValidateLSFDirectory(BULKLOAD_LSF_DIR); /* Initialize first block */ PageInit(GetCurrentPage(self), BLCKSZ, 0); PageSetTLI(GetCurrentPage(self), ThisTimeLineID); /* Obtain transaction ID and command ID. */ self->xid = GetCurrentTransactionId(); self->cid = GetCurrentCommandId(true); /* * Initialize load status information */ ls = &self->ls; ls->ls.relid = self->base.relid; ls->ls.rnode = self->base.rel->rd_node; ls->ls.exist_cnt = RelationGetNumberOfBlocks(self->base.rel); ls->ls.create_cnt = 0; /* * Create a load status file and write the initial status for it. * At the time, if we find any existing load status files, exit with * error because recovery process haven't been executed after failing * load to the same table. */ BULKLOAD_LSF_PATH(self->lsf_path, ls); self->lsf_fd = BasicOpenFile(self->lsf_path, O_CREAT | O_EXCL | O_RDWR | PG_BINARY, S_IRUSR | S_IWUSR); if (self->lsf_fd == -1) ereport(ERROR, (errcode_for_file_access(), errmsg("could not create loadstatus file \"%s\": %m", self->lsf_path))); if (write(self->lsf_fd, ls, sizeof(LoadStatus)) != sizeof(LoadStatus) || pg_fsync(self->lsf_fd) != 0) { UnlinkLSF(self); ereport(ERROR, (errcode_for_file_access(), errmsg("could not write loadstatus file \"%s\": %m", self->lsf_path))); } self->base.tchecker = CreateTupleChecker(self->base.desc); self->base.tchecker->checker = (CheckerTupleProc) CoercionCheckerTuple; }
/* ---------------------------------------------------------------- * ExecInsert * * INSERTs have to add the tuple into * the base relation and insert appropriate tuples into the * index relations. * Insert can be part of an update operation when * there is a preceding SplitUpdate node. * ---------------------------------------------------------------- */ void ExecInsert(TupleTableSlot *slot, DestReceiver *dest, EState *estate, PlanGenerator planGen, bool isUpdate) { void *tuple = NULL; ResultRelInfo *resultRelInfo = NULL; Relation resultRelationDesc = NULL; Oid newId = InvalidOid; TupleTableSlot *partslot = NULL; AOTupleId aoTupleId = AOTUPLEID_INIT; bool rel_is_heap = false; bool rel_is_aorows = false; bool rel_is_aocols = false; bool rel_is_external = false; /* * get information on the (current) result relation */ if (estate->es_result_partitions) { resultRelInfo = slot_get_partition(slot, estate); /* Check whether the user provided the correct leaf part only if required */ if (!dml_ignore_target_partition_check) { Assert(NULL != estate->es_result_partitions->part && NULL != resultRelInfo->ri_RelationDesc); List *resultRelations = estate->es_plannedstmt->resultRelations; /* * Only inheritance can generate multiple result relations and inheritance * is not compatible with partitions. As we are in inserting in partitioned * table, we should not have more than one resultRelation */ Assert(list_length(resultRelations) == 1); /* We only have one resultRelations entry where the user originally intended to insert */ int rteIdxForUserRel = linitial_int(resultRelations); Assert (rteIdxForUserRel > 0); Oid userProvidedRel = InvalidOid; if (1 == rteIdxForUserRel) { /* Optimization for typical case */ userProvidedRel = ((RangeTblEntry *) estate->es_plannedstmt->rtable->head->data.ptr_value)->relid; } else { userProvidedRel = getrelid(rteIdxForUserRel, estate->es_plannedstmt->rtable); } /* Error out if user provides a leaf partition that does not match with our calculated partition */ if (userProvidedRel != estate->es_result_partitions->part->parrelid && userProvidedRel != resultRelInfo->ri_RelationDesc->rd_id) { ereport(ERROR, (errcode(ERRCODE_CHECK_VIOLATION), errmsg("Trying to insert row into wrong partition"), errdetail("Expected partition: %s, provided partition: %s", resultRelInfo->ri_RelationDesc->rd_rel->relname.data, estate->es_result_relation_info->ri_RelationDesc->rd_rel->relname.data))); } } estate->es_result_relation_info = resultRelInfo; } else { resultRelInfo = estate->es_result_relation_info; } Assert (!resultRelInfo->ri_projectReturning); resultRelationDesc = resultRelInfo->ri_RelationDesc; rel_is_heap = RelationIsHeap(resultRelationDesc); rel_is_aocols = RelationIsAoCols(resultRelationDesc); rel_is_aorows = RelationIsAoRows(resultRelationDesc); rel_is_external = RelationIsExternal(resultRelationDesc); partslot = reconstructMatchingTupleSlot(slot, resultRelInfo); if (rel_is_heap) { tuple = ExecFetchSlotHeapTuple(partslot); } else if (rel_is_aorows) { tuple = ExecFetchSlotMemTuple(partslot, false); } else if (rel_is_external) { if (estate->es_result_partitions && estate->es_result_partitions->part->parrelid != 0) { ereport(ERROR, (errcode(ERRCODE_FEATURE_NOT_SUPPORTED), errmsg("Insert into external partitions not supported."))); return; } else { tuple = ExecFetchSlotHeapTuple(partslot); } } else { Assert(rel_is_aocols); tuple = ExecFetchSlotMemTuple(partslot, true); } Assert(partslot != NULL && tuple != NULL); /* Execute triggers in Planner-generated plans */ if (planGen == PLANGEN_PLANNER) { /* BEFORE ROW INSERT Triggers */ if (resultRelInfo->ri_TrigDesc && resultRelInfo->ri_TrigDesc->n_before_row[TRIGGER_EVENT_INSERT] > 0) { HeapTuple newtuple; /* NYI */ if(rel_is_aocols) elog(ERROR, "triggers are not supported on tables that use column-oriented storage"); newtuple = ExecBRInsertTriggers(estate, resultRelInfo, tuple); if (newtuple == NULL) /* "do nothing" */ { return; } if (newtuple != tuple) /* modified by Trigger(s) */ { /* * Put the modified tuple into a slot for convenience of routines * below. We assume the tuple was allocated in per-tuple memory * context, and therefore will go away by itself. The tuple table * slot should not try to clear it. */ TupleTableSlot *newslot = estate->es_trig_tuple_slot; if (newslot->tts_tupleDescriptor != partslot->tts_tupleDescriptor) ExecSetSlotDescriptor(newslot, partslot->tts_tupleDescriptor); ExecStoreGenericTuple(newtuple, newslot, false); newslot->tts_tableOid = partslot->tts_tableOid; /* for constraints */ tuple = newtuple; partslot = newslot; } } } /* * Check the constraints of the tuple */ if (resultRelationDesc->rd_att->constr && planGen == PLANGEN_PLANNER) { ExecConstraints(resultRelInfo, partslot, estate); } /* * insert the tuple * * Note: heap_insert returns the tid (location) of the new tuple in the * t_self field. * * NOTE: for append-only relations we use the append-only access methods. */ if (rel_is_aorows) { if (resultRelInfo->ri_aoInsertDesc == NULL) { /* Set the pre-assigned fileseg number to insert into */ ResultRelInfoSetSegno(resultRelInfo, estate->es_result_aosegnos); resultRelInfo->ri_aoInsertDesc = appendonly_insert_init(resultRelationDesc, ActiveSnapshot, resultRelInfo->ri_aosegno, false); } appendonly_insert(resultRelInfo->ri_aoInsertDesc, tuple, &newId, &aoTupleId); } else if (rel_is_aocols) { if (resultRelInfo->ri_aocsInsertDesc == NULL) { ResultRelInfoSetSegno(resultRelInfo, estate->es_result_aosegnos); resultRelInfo->ri_aocsInsertDesc = aocs_insert_init(resultRelationDesc, resultRelInfo->ri_aosegno, false); } newId = aocs_insert(resultRelInfo->ri_aocsInsertDesc, partslot); aoTupleId = *((AOTupleId*)slot_get_ctid(partslot)); } else if (rel_is_external) { /* Writable external table */ if (resultRelInfo->ri_extInsertDesc == NULL) resultRelInfo->ri_extInsertDesc = external_insert_init(resultRelationDesc); newId = external_insert(resultRelInfo->ri_extInsertDesc, tuple); } else { Insist(rel_is_heap); newId = heap_insert(resultRelationDesc, tuple, estate->es_snapshot->curcid, true, true, GetCurrentTransactionId()); } IncrAppended(); (estate->es_processed)++; (resultRelInfo->ri_aoprocessed)++; estate->es_lastoid = newId; partslot->tts_tableOid = RelationGetRelid(resultRelationDesc); if (rel_is_aorows || rel_is_aocols) { /* * insert index entries for AO Row-Store tuple */ if (resultRelInfo->ri_NumIndices > 0) ExecInsertIndexTuples(partslot, (ItemPointer)&aoTupleId, estate, false); } else { /* Use parttuple for index update in case this is an indexed heap table. */ TupleTableSlot *xslot = partslot; void *xtuple = tuple; setLastTid(&(((HeapTuple) xtuple)->t_self)); /* * insert index entries for tuple */ if (resultRelInfo->ri_NumIndices > 0) ExecInsertIndexTuples(xslot, &(((HeapTuple) xtuple)->t_self), estate, false); } if (planGen == PLANGEN_PLANNER) { /* AFTER ROW INSERT Triggers */ ExecARInsertTriggers(estate, resultRelInfo, tuple); } }