コード例 #1
0
ファイル: portalcmds.c プロジェクト: dreamsxin/postgresql-1
/*
 * PersistHoldablePortal
 *
 * Prepare the specified Portal for access outside of the current
 * transaction. When this function returns, all future accesses to the
 * portal must be done via the Tuplestore (not by invoking the
 * executor).
 */
void
PersistHoldablePortal(Portal portal)
{
	QueryDesc  *queryDesc = PortalGetQueryDesc(portal);
	Portal		saveActivePortal;
	ResourceOwner saveResourceOwner;
	MemoryContext savePortalContext;
	MemoryContext oldcxt;

	/*
	 * If we're preserving a holdable portal, we had better be inside the
	 * transaction that originally created it.
	 */
	Assert(portal->createSubid != InvalidSubTransactionId);
	Assert(queryDesc != NULL);

	/*
	 * Caller must have created the tuplestore already ... but not a snapshot.
	 */
	Assert(portal->holdContext != NULL);
	Assert(portal->holdStore != NULL);
	Assert(portal->holdSnapshot == NULL);

	/*
	 * Before closing down the executor, we must copy the tupdesc into
	 * long-term memory, since it was created in executor memory.
	 */
	oldcxt = MemoryContextSwitchTo(portal->holdContext);

	portal->tupDesc = CreateTupleDescCopy(portal->tupDesc);

	MemoryContextSwitchTo(oldcxt);

	/*
	 * Check for improper portal use, and mark portal active.
	 */
	MarkPortalActive(portal);

	/*
	 * Set up global portal context pointers.
	 */
	saveActivePortal = ActivePortal;
	saveResourceOwner = CurrentResourceOwner;
	savePortalContext = PortalContext;
	PG_TRY();
	{
		ActivePortal = portal;
		if (portal->resowner)
			CurrentResourceOwner = portal->resowner;
		PortalContext = PortalGetHeapMemory(portal);

		MemoryContextSwitchTo(PortalContext);

		PushActiveSnapshot(queryDesc->snapshot);

		/*
		 * Rewind the executor: we need to store the entire result set in the
		 * tuplestore, so that subsequent backward FETCHs can be processed.
		 */
		ExecutorRewind(queryDesc);

		/*
		 * Change the destination to output to the tuplestore.  Note we tell
		 * the tuplestore receiver to detoast all data passed through it; this
		 * makes it safe to not keep a snapshot associated with the data.
		 */
		queryDesc->dest = CreateDestReceiver(DestTuplestore);
		SetTuplestoreDestReceiverParams(queryDesc->dest,
										portal->holdStore,
										portal->holdContext,
										true);

		/* Fetch the result set into the tuplestore */
		ExecutorRun(queryDesc, ForwardScanDirection, 0L, false);

		(*queryDesc->dest->rDestroy) (queryDesc->dest);
		queryDesc->dest = NULL;

		/*
		 * Now shut down the inner executor.
		 */
		portal->queryDesc = NULL;		/* prevent double shutdown */
		ExecutorFinish(queryDesc);
		ExecutorEnd(queryDesc);
		FreeQueryDesc(queryDesc);

		/*
		 * Set the position in the result set.
		 */
		MemoryContextSwitchTo(portal->holdContext);

		if (portal->atEnd)
		{
			/*
			 * Just force the tuplestore forward to its end.  The size of the
			 * skip request here is arbitrary.
			 */
			while (tuplestore_skiptuples(portal->holdStore, 1000000, true))
				 /* continue */ ;
		}
		else
		{
			tuplestore_rescan(portal->holdStore);

			if (!tuplestore_skiptuples(portal->holdStore,
									   portal->portalPos,
									   true))
				elog(ERROR, "unexpected end of tuple stream");
		}
	}
	PG_CATCH();
	{
		/* Uncaught error while executing portal: mark it dead */
		MarkPortalFailed(portal);

		/* Restore global vars and propagate error */
		ActivePortal = saveActivePortal;
		CurrentResourceOwner = saveResourceOwner;
		PortalContext = savePortalContext;

		PG_RE_THROW();
	}
	PG_END_TRY();

	MemoryContextSwitchTo(oldcxt);

	/* Mark portal not active */
	portal->status = PORTAL_READY;

	ActivePortal = saveActivePortal;
	CurrentResourceOwner = saveResourceOwner;
	PortalContext = savePortalContext;

	PopActiveSnapshot();

	/*
	 * We can now release any subsidiary memory of the portal's heap context;
	 * we'll never use it again.  The executor already dropped its context,
	 * but this will clean up anything that glommed onto the portal's heap via
	 * PortalContext.
	 */
	MemoryContextDeleteChildren(PortalGetHeapMemory(portal));
}
コード例 #2
0
ファイル: bgwriter.c プロジェクト: PJMODOS/postgres
/*
 * Main entry point for bgwriter process
 *
 * This is invoked from AuxiliaryProcessMain, which has already created the
 * basic execution environment, but not enabled signals yet.
 */
void
BackgroundWriterMain(void)
{
	sigjmp_buf	local_sigjmp_buf;
	MemoryContext bgwriter_context;
	bool		prev_hibernate;

	/*
	 * Properly accept or ignore signals the postmaster might send us.
	 *
	 * bgwriter doesn't participate in ProcSignal signalling, but a SIGUSR1
	 * handler is still needed for latch wakeups.
	 */
	pqsignal(SIGHUP, BgSigHupHandler);	/* set flag to read config file */
	pqsignal(SIGINT, SIG_IGN);
	pqsignal(SIGTERM, ReqShutdownHandler);		/* shutdown */
	pqsignal(SIGQUIT, bg_quickdie);		/* hard crash time */
	pqsignal(SIGALRM, SIG_IGN);
	pqsignal(SIGPIPE, SIG_IGN);
	pqsignal(SIGUSR1, bgwriter_sigusr1_handler);
	pqsignal(SIGUSR2, SIG_IGN);

	/*
	 * Reset some signals that are accepted by postmaster but not here
	 */
	pqsignal(SIGCHLD, SIG_DFL);
	pqsignal(SIGTTIN, SIG_DFL);
	pqsignal(SIGTTOU, SIG_DFL);
	pqsignal(SIGCONT, SIG_DFL);
	pqsignal(SIGWINCH, SIG_DFL);

	/* We allow SIGQUIT (quickdie) at all times */
	sigdelset(&BlockSig, SIGQUIT);

	/*
	 * Create a resource owner to keep track of our resources (currently only
	 * buffer pins).
	 */
	CurrentResourceOwner = ResourceOwnerCreate(NULL, "Background Writer");

	/*
	 * We just started, assume there has been either a shutdown or
	 * end-of-recovery snapshot.
	 */
	last_snapshot_ts = GetCurrentTimestamp();

	/*
	 * Create a memory context that we will do all our work in.  We do this so
	 * that we can reset the context during error recovery and thereby avoid
	 * possible memory leaks.  Formerly this code just ran in
	 * TopMemoryContext, but resetting that would be a really bad idea.
	 */
	bgwriter_context = AllocSetContextCreate(TopMemoryContext,
											 "Background Writer",
											 ALLOCSET_DEFAULT_MINSIZE,
											 ALLOCSET_DEFAULT_INITSIZE,
											 ALLOCSET_DEFAULT_MAXSIZE);
	MemoryContextSwitchTo(bgwriter_context);

	/*
	 * If an exception is encountered, processing resumes here.
	 *
	 * See notes in postgres.c about the design of this coding.
	 */
	if (sigsetjmp(local_sigjmp_buf, 1) != 0)
	{
		/* Since not using PG_TRY, must reset error stack by hand */
		error_context_stack = NULL;

		/* Prevent interrupts while cleaning up */
		HOLD_INTERRUPTS();

		/* Report the error to the server log */
		EmitErrorReport();

		/*
		 * These operations are really just a minimal subset of
		 * AbortTransaction().  We don't have very many resources to worry
		 * about in bgwriter, but we do have LWLocks, buffers, and temp files.
		 */
		LWLockReleaseAll();
		AbortBufferIO();
		UnlockBuffers();
		/* buffer pins are released here: */
		ResourceOwnerRelease(CurrentResourceOwner,
							 RESOURCE_RELEASE_BEFORE_LOCKS,
							 false, true);
		/* we needn't bother with the other ResourceOwnerRelease phases */
		AtEOXact_Buffers(false);
		AtEOXact_SMgr();
		AtEOXact_Files();
		AtEOXact_HashTables(false);

		/*
		 * Now return to normal top-level context and clear ErrorContext for
		 * next time.
		 */
		MemoryContextSwitchTo(bgwriter_context);
		FlushErrorState();

		/* Flush any leaked data in the top-level context */
		MemoryContextResetAndDeleteChildren(bgwriter_context);

		/* Now we can allow interrupts again */
		RESUME_INTERRUPTS();

		/*
		 * Sleep at least 1 second after any error.  A write error is likely
		 * to be repeated, and we don't want to be filling the error logs as
		 * fast as we can.
		 */
		pg_usleep(1000000L);

		/*
		 * Close all open files after any error.  This is helpful on Windows,
		 * where holding deleted files open causes various strange errors.
		 * It's not clear we need it elsewhere, but shouldn't hurt.
		 */
		smgrcloseall();
	}

	/* We can now handle ereport(ERROR) */
	PG_exception_stack = &local_sigjmp_buf;

	/*
	 * Unblock signals (they were blocked when the postmaster forked us)
	 */
	PG_SETMASK(&UnBlockSig);

	/*
	 * Reset hibernation state after any error.
	 */
	prev_hibernate = false;

	/*
	 * Loop forever
	 */
	for (;;)
	{
		bool		can_hibernate;
		int			rc;

		/* Clear any already-pending wakeups */
		ResetLatch(MyLatch);

		if (got_SIGHUP)
		{
			got_SIGHUP = false;
			ProcessConfigFile(PGC_SIGHUP);
		}
		if (shutdown_requested)
		{
			/*
			 * From here on, elog(ERROR) should end with exit(1), not send
			 * control back to the sigsetjmp block above
			 */
			ExitOnAnyError = true;
			/* Normal exit from the bgwriter is here */
			proc_exit(0);		/* done */
		}

		/*
		 * Do one cycle of dirty-buffer writing.
		 */
		can_hibernate = BgBufferSync();

		/*
		 * Send off activity statistics to the stats collector
		 */
		pgstat_send_bgwriter();

		if (FirstCallSinceLastCheckpoint())
		{
			/*
			 * After any checkpoint, close all smgr files.  This is so we
			 * won't hang onto smgr references to deleted files indefinitely.
			 */
			smgrcloseall();
		}

		/*
		 * Log a new xl_running_xacts every now and then so replication can
		 * get into a consistent state faster (think of suboverflowed
		 * snapshots) and clean up resources (locks, KnownXids*) more
		 * frequently. The costs of this are relatively low, so doing it 4
		 * times (LOG_SNAPSHOT_INTERVAL_MS) a minute seems fine.
		 *
		 * We assume the interval for writing xl_running_xacts is
		 * significantly bigger than BgWriterDelay, so we don't complicate the
		 * overall timeout handling but just assume we're going to get called
		 * often enough even if hibernation mode is active. It's not that
		 * important that log_snap_interval_ms is met strictly. To make sure
		 * we're not waking the disk up unneccesarily on an idle system we
		 * check whether there has been any WAL inserted since the last time
		 * we've logged a running xacts.
		 *
		 * We do this logging in the bgwriter as its the only process thats
		 * run regularly and returns to its mainloop all the time. E.g.
		 * Checkpointer, when active, is barely ever in its mainloop and thus
		 * makes it hard to log regularly.
		 */
		if (XLogStandbyInfoActive() && !RecoveryInProgress())
		{
			TimestampTz timeout = 0;
			TimestampTz now = GetCurrentTimestamp();

			timeout = TimestampTzPlusMilliseconds(last_snapshot_ts,
												  LOG_SNAPSHOT_INTERVAL_MS);

			/*
			 * only log if enough time has passed and some xlog record has
			 * been inserted.
			 */
			if (now >= timeout &&
				last_snapshot_lsn != GetXLogInsertRecPtr())
			{
				last_snapshot_lsn = LogStandbySnapshot();
				last_snapshot_ts = now;
			}
		}

		/*
		 * Sleep until we are signaled or BgWriterDelay has elapsed.
		 *
		 * Note: the feedback control loop in BgBufferSync() expects that we
		 * will call it every BgWriterDelay msec.  While it's not critical for
		 * correctness that that be exact, the feedback loop might misbehave
		 * if we stray too far from that.  Hence, avoid loading this process
		 * down with latch events that are likely to happen frequently during
		 * normal operation.
		 */
		rc = WaitLatch(MyLatch,
					   WL_LATCH_SET | WL_TIMEOUT | WL_POSTMASTER_DEATH,
					   BgWriterDelay /* ms */ );

		/*
		 * If no latch event and BgBufferSync says nothing's happening, extend
		 * the sleep in "hibernation" mode, where we sleep for much longer
		 * than bgwriter_delay says.  Fewer wakeups save electricity.  When a
		 * backend starts using buffers again, it will wake us up by setting
		 * our latch.  Because the extra sleep will persist only as long as no
		 * buffer allocations happen, this should not distort the behavior of
		 * BgBufferSync's control loop too badly; essentially, it will think
		 * that the system-wide idle interval didn't exist.
		 *
		 * There is a race condition here, in that a backend might allocate a
		 * buffer between the time BgBufferSync saw the alloc count as zero
		 * and the time we call StrategyNotifyBgWriter.  While it's not
		 * critical that we not hibernate anyway, we try to reduce the odds of
		 * that by only hibernating when BgBufferSync says nothing's happening
		 * for two consecutive cycles.  Also, we mitigate any possible
		 * consequences of a missed wakeup by not hibernating forever.
		 */
		if (rc == WL_TIMEOUT && can_hibernate && prev_hibernate)
		{
			/* Ask for notification at next buffer allocation */
			StrategyNotifyBgWriter(MyProc->pgprocno);
			/* Sleep ... */
			rc = WaitLatch(MyLatch,
						   WL_LATCH_SET | WL_TIMEOUT | WL_POSTMASTER_DEATH,
						   BgWriterDelay * HIBERNATE_FACTOR);
			/* Reset the notification request in case we timed out */
			StrategyNotifyBgWriter(-1);
		}

		/*
		 * Emergency bailout if postmaster has died.  This is to avoid the
		 * necessity for manual cleanup of all postmaster children.
		 */
		if (rc & WL_POSTMASTER_DEATH)
			exit(1);

		prev_hibernate = can_hibernate;
	}
}
コード例 #3
0
/*
 * load_last_minipage
 *
 * Search through the block directory btree to find the last row that
 * contains the last minipage.
 */
static void
load_last_minipage(AppendOnlyBlockDirectory *blockDirectory,
				   int64 lastSequence,
				   int columnGroupNo)
{
	Relation blkdirRel = blockDirectory->blkdirRel;
	Relation blkdirIdx = blockDirectory->blkdirIdx;
	TupleDesc idxTupleDesc;
	TupleDesc heapTupleDesc;
	IndexScanDesc idxScanDesc;
	HeapTuple tuple = NULL;
	MemoryContext oldcxt;
	int numScanKeys = blockDirectory->numScanKeys;
	ScanKey scanKeys = blockDirectory->scanKeys;
	
#ifdef USE_ASSERT_CHECKING
	StrategyNumber *strategyNumbers = blockDirectory->strategyNumbers;
#endif /* USE_ASSERT_CHECKING */
	
	Assert(blockDirectory->aoRel != NULL);
	Assert(blockDirectory->blkdirRel != NULL);
	Assert(blockDirectory->blkdirIdx != NULL);

	oldcxt = MemoryContextSwitchTo(blockDirectory->memoryContext);
	
	heapTupleDesc = RelationGetDescr(blkdirRel);
	idxTupleDesc = RelationGetDescr(blkdirIdx);

	Assert(numScanKeys == 3);
	Assert(blockDirectory->currentSegmentFileInfo != NULL);

	/* Setup the scan keys for the scan. */
	Assert(scanKeys != NULL);
	Assert(strategyNumbers != NULL);
	if (lastSequence == 0)
		lastSequence = 1;
	
	scanKeys[0].sk_argument =
		Int32GetDatum(blockDirectory->currentSegmentFileNum);
	scanKeys[1].sk_argument = Int32GetDatum(columnGroupNo);
	scanKeys[2].sk_argument = Int64GetDatum(lastSequence);

	/*
	 * Search the btree to find the entry in the block directory
	 * that contains the last minipage.
	 */
	idxScanDesc = index_beginscan(blkdirRel, blkdirIdx,
								  blockDirectory->appendOnlyMetaDataSnapshot,
								  numScanKeys, scanKeys);
	
	tuple = index_getnext(idxScanDesc, BackwardScanDirection);
	if (tuple != NULL)
	{
		extract_minipage(blockDirectory,
						 tuple,
						 heapTupleDesc,
						 columnGroupNo);
	}
	
	index_endscan(idxScanDesc);

	MemoryContextSwitchTo(oldcxt);

	if (Debug_appendonly_print_blockdirectory)
		ereport(LOG,
				(errmsg("Append-only block directory load last minipage: "
						"(columnGroupNo, lastSequence, nEntries) = (%d, " INT64_FORMAT ", %u)",
						columnGroupNo, lastSequence,
						blockDirectory->minipages[columnGroupNo].numMinipageEntries)));
	
}
コード例 #4
0
ファイル: misc.c プロジェクト: jarulraj/postgres-cpp
/* Function to return the list of grammar keywords */
Datum
pg_get_keywords(PG_FUNCTION_ARGS)
{
	FuncCallContext *funcctx;

	if (SRF_IS_FIRSTCALL())
	{
		MemoryContext oldcontext;
		TupleDesc	tupdesc;

		funcctx = SRF_FIRSTCALL_INIT();
		oldcontext = MemoryContextSwitchTo(funcctx->multi_call_memory_ctx);

		tupdesc = CreateTemplateTupleDesc(3, false);
		TupleDescInitEntry(tupdesc, (AttrNumber) 1, "word",
						   TEXTOID, -1, 0);
		TupleDescInitEntry(tupdesc, (AttrNumber) 2, "catcode",
						   CHAROID, -1, 0);
		TupleDescInitEntry(tupdesc, (AttrNumber) 3, "catdesc",
						   TEXTOID, -1, 0);

		funcctx->attinmeta = TupleDescGetAttInMetadata(tupdesc);

		MemoryContextSwitchTo(oldcontext);
	}

	funcctx = SRF_PERCALL_SETUP();

	if (funcctx->call_cntr < NumScanKeywords)
	{
		char	   *values[3];
		HeapTuple	tuple;

		/* cast-away-const is ugly but alternatives aren't much better */
		values[0] = (char *) ScanKeywords[funcctx->call_cntr].name;

		switch (ScanKeywords[funcctx->call_cntr].category)
		{
			case UNRESERVED_KEYWORD:
				values[1] = "U";
				values[2] = _("unreserved");
				break;
			case COL_NAME_KEYWORD:
				values[1] = "C";
				values[2] = _("unreserved (cannot be function or type name)");
				break;
			case TYPE_FUNC_NAME_KEYWORD:
				values[1] = "T";
				values[2] = _("reserved (can be function or type name)");
				break;
			case RESERVED_KEYWORD:
				values[1] = "R";
				values[2] = _("reserved");
				break;
			default:			/* shouldn't be possible */
				values[1] = NULL;
				values[2] = NULL;
				break;
		}

		tuple = BuildTupleFromCStrings(funcctx->attinmeta, values);

		SRF_RETURN_NEXT(funcctx, HeapTupleGetDatum(tuple));
	}

	SRF_RETURN_DONE(funcctx);
}
コード例 #5
0
ファイル: ginvacuum.c プロジェクト: DBInsight/postgres
static bool
ginVacuumPostingTreeLeaves(GinVacuumState *gvs, BlockNumber blkno, bool isRoot, Buffer *rootBuffer)
{
	Buffer		buffer;
	Page		page;
	bool		hasVoidPage = FALSE;
	MemoryContext oldCxt;

	buffer = ReadBufferExtended(gvs->index, MAIN_FORKNUM, blkno,
								RBM_NORMAL, gvs->strategy);
	page = BufferGetPage(buffer);

	/*
	 * We should be sure that we don't concurrent with inserts, insert process
	 * never release root page until end (but it can unlock it and lock
	 * again). New scan can't start but previously started ones work
	 * concurrently.
	 */
	if (isRoot)
		LockBufferForCleanup(buffer);
	else
		LockBuffer(buffer, GIN_EXCLUSIVE);

	Assert(GinPageIsData(page));

	if (GinPageIsLeaf(page))
	{
		oldCxt = MemoryContextSwitchTo(gvs->tmpCxt);
		ginVacuumPostingTreeLeaf(gvs->index, buffer, gvs);
		MemoryContextSwitchTo(oldCxt);
		MemoryContextReset(gvs->tmpCxt);

		/* if root is a leaf page, we don't desire further processing */
		if (!isRoot && !hasVoidPage && GinDataLeafPageIsEmpty(page))
			hasVoidPage = TRUE;
	}
	else
	{
		OffsetNumber i;
		bool		isChildHasVoid = FALSE;

		for (i = FirstOffsetNumber; i <= GinPageGetOpaque(page)->maxoff; i++)
		{
			PostingItem *pitem = GinDataPageGetPostingItem(page, i);

			if (ginVacuumPostingTreeLeaves(gvs, PostingItemGetBlockNumber(pitem), FALSE, NULL))
				isChildHasVoid = TRUE;
		}

		if (isChildHasVoid)
			hasVoidPage = TRUE;
	}

	/*
	 * if we have root and there are empty pages in tree, then we don't
	 * release lock to go further processing and guarantee that tree is unused
	 */
	if (!(isRoot && hasVoidPage))
	{
		UnlockReleaseBuffer(buffer);
	}
	else
	{
		Assert(rootBuffer);
		*rootBuffer = buffer;
	}

	return hasVoidPage;
}
コード例 #6
0
ファイル: others.c プロジェクト: KinoSun/orafce
/*
 * decode(lhs, [rhs, ret], ..., [default])
 */
Datum
ora_decode(PG_FUNCTION_ARGS)
{
	int		nargs;
	int		i;
	int		retarg;

	/* default value is last arg or NULL. */
	nargs = PG_NARGS();
	if (nargs % 2 == 0)
	{
		retarg = nargs - 1;
		nargs -= 1;		/* ignore the last argument */
	}
	else
		retarg = -1;	/* NULL */

	if (PG_ARGISNULL(0))
	{
		for (i = 1; i < nargs; i += 2)
		{
			if (PG_ARGISNULL(i))
			{
				retarg = i + 1;
				break;
			}
		}
	}
	else
	{
		FmgrInfo   *eq;
		Oid		collation = PG_GET_COLLATION();

		/*
		 * On first call, get the input type's operator '=' and save at
		 * fn_extra.
		 */
		if (fcinfo->flinfo->fn_extra == NULL)
		{
			MemoryContext	oldctx;
			Oid				typid = get_fn_expr_argtype(fcinfo->flinfo, 0);
			Oid				eqoid = equality_oper_funcid(typid);

			oldctx = MemoryContextSwitchTo(fcinfo->flinfo->fn_mcxt);
			eq = palloc(sizeof(FmgrInfo));
			fmgr_info(eqoid, eq);
			MemoryContextSwitchTo(oldctx);

			fcinfo->flinfo->fn_extra = eq;
		}
		else
			eq = fcinfo->flinfo->fn_extra;

		for (i = 1; i < nargs; i += 2)
		{
			FunctionCallInfoData	func;
			Datum					result;

			if (PG_ARGISNULL(i))
				continue;

			InitFunctionCallInfoData(func, eq, 2, collation, NULL, NULL);

			func.arg[0] = PG_GETARG_DATUM(0);
			func.arg[1] = PG_GETARG_DATUM(i);
			func.argnull[0] = false;
			func.argnull[1] = false;
			result = FunctionCallInvoke(&func);

			if (!func.isnull && DatumGetBool(result))
			{
				retarg = i + 1;
				break;
			}
		}
	}

	if (retarg < 0 || PG_ARGISNULL(retarg))
		PG_RETURN_NULL();
	else
		PG_RETURN_DATUM(PG_GETARG_DATUM(retarg));
}
コード例 #7
0
ファイル: util.c プロジェクト: i0seph/pgsampler
/* 
 * This function receives a command names and a query and it produces a protocol valid command string
 *   which can be send to the server based on the format of the result set.  It handles memory contexts
 *   appropriately, returning a pointer to the command string in the current context when called.
 *
 */
char* exec_to_command(const char* command, char* q) {
  StringInfoData resultbuf;
  char* result;
  int i, j, processed, retval;
  SPITupleTable *coltuptable;
  MemoryContext pre_context;
  MemoryContext spi_conn_context;
  
  // elog(LOG, "%s", command); //prints the current command running
  
  pre_context = CurrentMemoryContext;
  
  SetCurrentStatementStartTimestamp();
  StartTransactionCommand();
  SPI_connect();
  PushActiveSnapshot(GetTransactionSnapshot());
  
  retval = SPI_execute(q, false, 0);
  if (retval != SPI_OK_SELECT) {
    elog(LOG, "Database SELECT execution failed: %d", retval);
    SPI_finish();
		PopActiveSnapshot();
		CommitTransactionCommand();
		result = pstrdup("");
		return result;
  }  
  
  processed = SPI_processed;
  coltuptable = SPI_tuptable;
  
  initStringInfo(&resultbuf);

  appendStringInfoString(&resultbuf, command);
  appendStringInfoString(&resultbuf, ";"); //artisinal semicolon

  
  if (coltuptable != NULL) {
    for(i = 0; i < processed; i++) {
      for(j = 1; j <= coltuptable->tupdesc->natts; j++) {
  
  	    if (SPI_getvalue(coltuptable->vals[i], coltuptable->tupdesc, j) != NULL) {
    	    appendStringInfoString(&resultbuf, SPI_getvalue(coltuptable->vals[i], coltuptable->tupdesc, j));
    	  }
		    appendStringInfo(&resultbuf, FIELD_DELIMIT);
  		  
      }
      
      appendStringInfo(&resultbuf,REC_DELIMIT);
  
    }
  }
  

  appendStringInfo(&resultbuf, CDELIMIT);
  
  spi_conn_context = MemoryContextSwitchTo(pre_context);
  result = pstrdup(resultbuf.data);
  MemoryContextSwitchTo(spi_conn_context);

  SPI_finish();
  PopActiveSnapshot();
  CommitTransactionCommand();
  return result;
}
コード例 #8
0
/*
 * Find or create a hashtable entry for the tuple group containing the
 * given tuple.
 *
 * If isnew is NULL, we do not create new entries; we return NULL if no
 * match is found.
 *
 * If isnew isn't NULL, then a new entry is created if no existing entry
 * matches.  On return, *isnew is true if the entry is newly created,
 * false if it existed already.  Any extra space in a new entry has been
 * zeroed.
 */
TupleHashEntry
LookupTupleHashEntry(TupleHashTable hashtable, TupleTableSlot *slot,
					 bool *isnew)
{
	TupleHashEntry entry;
	MemoryContext oldContext;
	TupleHashTable saveCurHT;
	TupleHashEntryData dummy;
	bool		found;

	/* If first time through, clone the input slot to make table slot */
	if (hashtable->tableslot == NULL)
	{
		TupleDesc	tupdesc;

		oldContext = MemoryContextSwitchTo(hashtable->tablecxt);

		/*
		 * We copy the input tuple descriptor just for safety --- we assume
		 * all input tuples will have equivalent descriptors.
		 */
		tupdesc = CreateTupleDescCopy(slot->tts_tupleDescriptor);
		hashtable->tableslot = MakeSingleTupleTableSlot(tupdesc);
		MemoryContextSwitchTo(oldContext);
	}

	/* Need to run the hash functions in short-lived context */
	oldContext = MemoryContextSwitchTo(hashtable->tempcxt);

	/*
	 * Set up data needed by hash and match functions
	 *
	 * We save and restore CurTupleHashTable just in case someone manages to
	 * invoke this code re-entrantly.
	 */
	hashtable->inputslot = slot;
	saveCurHT = CurTupleHashTable;
	CurTupleHashTable = hashtable;

	/* Search the hash table */
	dummy.firstTuple = NULL;	/* flag to reference inputslot */
	entry = (TupleHashEntry) hash_search(hashtable->hashtab,
										 &dummy,
										 isnew ? HASH_ENTER : HASH_FIND,
										 &found);

	if (isnew)
	{
		if (found)
		{
			/* found pre-existing entry */
			*isnew = false;
		}
		else
		{
			/*
			 * created new entry
			 *
			 * Zero any caller-requested space in the entry.  (This zaps the
			 * "key data" dynahash.c copied into the new entry, but we don't
			 * care since we're about to overwrite it anyway.)
			 */
			MemSet(entry, 0, hashtable->entrysize);

			/* Copy the first tuple into the table context */
			MemoryContextSwitchTo(hashtable->tablecxt);
			entry->firstTuple = ExecCopySlotMinimalTuple(slot);

			*isnew = true;
		}
	}

	CurTupleHashTable = saveCurHT;

	MemoryContextSwitchTo(oldContext);

	return entry;
}
コード例 #9
0
ファイル: ts_cfg.c プロジェクト: merlintang/sgb
void
init_cfg(Oid id, TSCfgInfo * cfg)
{
    Oid			arg[2];
    bool		isnull;
    Datum		pars[2];
    int			stat,
                i,
                j;
    text	   *ptr;
    text	   *prsname = NULL;
    char	   *nsp = get_namespace(TSNSP_FunctionOid);
    char		buf[1024];
    MemoryContext oldcontext;
    void	   *plan;

    arg[0] = OIDOID;
    arg[1] = OIDOID;
    pars[0] = ObjectIdGetDatum(id);
    pars[1] = ObjectIdGetDatum(id);

    memset(cfg, 0, sizeof(TSCfgInfo));
    SPI_connect();

    sprintf(buf, "select prs_name from %s.pg_ts_cfg where oid = $1", nsp);
    plan = SPI_prepare(buf, 1, arg);
    if (!plan)
        ts_error(ERROR, "SPI_prepare() failed");

    stat = SPI_execp(plan, pars, " ", 1);
    if (stat < 0)
        ts_error(ERROR, "SPI_execp return %d", stat);
    if (SPI_processed > 0)
    {
        prsname = (text *) DatumGetPointer(
                      SPI_getbinval(SPI_tuptable->vals[0], SPI_tuptable->tupdesc, 1, &isnull)
                  );
        oldcontext = MemoryContextSwitchTo(TopMemoryContext);
        prsname = ptextdup(prsname);
        MemoryContextSwitchTo(oldcontext);

        cfg->id = id;
    }
    else
        ts_error(ERROR, "No tsearch cfg with id %d", id);

    SPI_freeplan(plan);

    arg[0] = TEXTOID;
    sprintf(buf, "select lt.tokid, map.dict_name from %s.pg_ts_cfgmap as map, %s.pg_ts_cfg as cfg, %s.token_type( $1 ) as lt where lt.alias =  map.tok_alias and map.ts_name = cfg.ts_name and cfg.oid= $2 order by lt.tokid desc;", nsp, nsp, nsp);
    plan = SPI_prepare(buf, 2, arg);
    if (!plan)
        ts_error(ERROR, "SPI_prepare() failed");

    pars[0] = PointerGetDatum(prsname);
    stat = SPI_execp(plan, pars, " ", 0);
    if (stat < 0)
        ts_error(ERROR, "SPI_execp return %d", stat);
    if (SPI_processed <= 0)
        ts_error(ERROR, "No parser with id %d", id);

    for (i = 0; i < SPI_processed; i++)
    {
        int			lexid = DatumGetInt32(SPI_getbinval(SPI_tuptable->vals[i], SPI_tuptable->tupdesc, 1, &isnull));
        ArrayType  *toasted_a = (ArrayType *) PointerGetDatum(SPI_getbinval(SPI_tuptable->vals[i], SPI_tuptable->tupdesc, 2, &isnull));
        ArrayType  *a;

        if (!cfg->map)
        {
            cfg->len = lexid + 1;
            cfg->map = (ListDictionary *) malloc(sizeof(ListDictionary) * cfg->len);
            if (!cfg->map)
                ereport(ERROR,
                        (errcode(ERRCODE_OUT_OF_MEMORY),
                         errmsg("out of memory")));
            memset(cfg->map, 0, sizeof(ListDictionary) * cfg->len);
        }

        if (isnull)
            continue;

        a = (ArrayType *) PointerGetDatum(PG_DETOAST_DATUM(DatumGetPointer(toasted_a)));

        if (ARR_NDIM(a) != 1)
            ts_error(ERROR, "Wrong dimension");
        if (ARRNELEMS(a) < 1)
            continue;
        if (ARR_HASNULL(a))
            ts_error(ERROR, "Array must not contain nulls");

        cfg->map[lexid].len = ARRNELEMS(a);
        cfg->map[lexid].dict_id = (Datum *) malloc(sizeof(Datum) * cfg->map[lexid].len);
        if (!cfg->map[lexid].dict_id)
            ts_error(ERROR, "No memory");

        memset(cfg->map[lexid].dict_id, 0, sizeof(Datum) * cfg->map[lexid].len);
        ptr = (text *) ARR_DATA_PTR(a);
        oldcontext = MemoryContextSwitchTo(TopMemoryContext);
        for (j = 0; j < cfg->map[lexid].len; j++)
        {
            cfg->map[lexid].dict_id[j] = PointerGetDatum(ptextdup(ptr));
            ptr = NEXTVAL(ptr);
        }
        MemoryContextSwitchTo(oldcontext);

        if (a != toasted_a)
            pfree(a);
    }

    SPI_freeplan(plan);
    SPI_finish();
    cfg->prs_id = name2id_prs(prsname);
    pfree(prsname);
    pfree(nsp);
    for (i = 0; i < cfg->len; i++)
    {
        for (j = 0; j < cfg->map[i].len; j++)
        {
            ptr = (text *) DatumGetPointer(cfg->map[i].dict_id[j]);
            cfg->map[i].dict_id[j] = ObjectIdGetDatum(name2id_dict(ptr));
            pfree(ptr);
        }
    }
}
コード例 #10
0
ファイル: rewriteheap.c プロジェクト: 42penguins/postgres
/*
 * Add a tuple to the new heap.
 *
 * Visibility information is copied from the original tuple, except that
 * we "freeze" very-old tuples.  Note that since we scribble on new_tuple,
 * it had better be temp storage not a pointer to the original tuple.
 *
 * state		opaque state as returned by begin_heap_rewrite
 * old_tuple	original tuple in the old heap
 * new_tuple	new, rewritten tuple to be inserted to new heap
 */
void
rewrite_heap_tuple(RewriteState state,
				   HeapTuple old_tuple, HeapTuple new_tuple)
{
	MemoryContext old_cxt;
	ItemPointerData old_tid;
	TidHashKey	hashkey;
	bool		found;
	bool		free_new;

	old_cxt = MemoryContextSwitchTo(state->rs_cxt);

	/*
	 * Copy the original tuple's visibility information into new_tuple.
	 *
	 * XXX we might later need to copy some t_infomask2 bits, too? Right now,
	 * we intentionally clear the HOT status bits.
	 */
	memcpy(&new_tuple->t_data->t_choice.t_heap,
		   &old_tuple->t_data->t_choice.t_heap,
		   sizeof(HeapTupleFields));

	new_tuple->t_data->t_infomask &= ~HEAP_XACT_MASK;
	new_tuple->t_data->t_infomask2 &= ~HEAP2_XACT_MASK;
	new_tuple->t_data->t_infomask |=
		old_tuple->t_data->t_infomask & HEAP_XACT_MASK;

	/*
	 * While we have our hands on the tuple, we may as well freeze any
	 * very-old xmin or xmax, so that future VACUUM effort can be saved.
	 */
	heap_freeze_tuple(new_tuple->t_data, state->rs_freeze_xid,
					  state->rs_cutoff_multi);

	/*
	 * Invalid ctid means that ctid should point to the tuple itself. We'll
	 * override it later if the tuple is part of an update chain.
	 */
	ItemPointerSetInvalid(&new_tuple->t_data->t_ctid);

	/*
	 * If the tuple has been updated, check the old-to-new mapping hash table.
	 */
	if (!((old_tuple->t_data->t_infomask & HEAP_XMAX_INVALID) ||
		  HeapTupleHeaderIsOnlyLocked(old_tuple->t_data)) &&
		!(ItemPointerEquals(&(old_tuple->t_self),
							&(old_tuple->t_data->t_ctid))))
	{
		OldToNewMapping mapping;

		memset(&hashkey, 0, sizeof(hashkey));
		hashkey.xmin = HeapTupleHeaderGetUpdateXid(old_tuple->t_data);
		hashkey.tid = old_tuple->t_data->t_ctid;

		mapping = (OldToNewMapping)
			hash_search(state->rs_old_new_tid_map, &hashkey,
						HASH_FIND, NULL);

		if (mapping != NULL)
		{
			/*
			 * We've already copied the tuple that t_ctid points to, so we can
			 * set the ctid of this tuple to point to the new location, and
			 * insert it right away.
			 */
			new_tuple->t_data->t_ctid = mapping->new_tid;

			/* We don't need the mapping entry anymore */
			hash_search(state->rs_old_new_tid_map, &hashkey,
						HASH_REMOVE, &found);
			Assert(found);
		}
		else
		{
			/*
			 * We haven't seen the tuple t_ctid points to yet. Stash this
			 * tuple into unresolved_tups to be written later.
			 */
			UnresolvedTup unresolved;

			unresolved = hash_search(state->rs_unresolved_tups, &hashkey,
									 HASH_ENTER, &found);
			Assert(!found);

			unresolved->old_tid = old_tuple->t_self;
			unresolved->tuple = heap_copytuple(new_tuple);

			/*
			 * We can't do anything more now, since we don't know where the
			 * tuple will be written.
			 */
			MemoryContextSwitchTo(old_cxt);
			return;
		}
	}

	/*
	 * Now we will write the tuple, and then check to see if it is the B tuple
	 * in any new or known pair.  When we resolve a known pair, we will be
	 * able to write that pair's A tuple, and then we have to check if it
	 * resolves some other pair.  Hence, we need a loop here.
	 */
	old_tid = old_tuple->t_self;
	free_new = false;

	for (;;)
	{
		ItemPointerData new_tid;

		/* Insert the tuple and find out where it's put in new_heap */
		raw_heap_insert(state, new_tuple);
		new_tid = new_tuple->t_self;

		/*
		 * If the tuple is the updated version of a row, and the prior version
		 * wouldn't be DEAD yet, then we need to either resolve the prior
		 * version (if it's waiting in rs_unresolved_tups), or make an entry
		 * in rs_old_new_tid_map (so we can resolve it when we do see it). The
		 * previous tuple's xmax would equal this one's xmin, so it's
		 * RECENTLY_DEAD if and only if the xmin is not before OldestXmin.
		 */
		if ((new_tuple->t_data->t_infomask & HEAP_UPDATED) &&
			!TransactionIdPrecedes(HeapTupleHeaderGetXmin(new_tuple->t_data),
								   state->rs_oldest_xmin))
		{
			/*
			 * Okay, this is B in an update pair.  See if we've seen A.
			 */
			UnresolvedTup unresolved;

			memset(&hashkey, 0, sizeof(hashkey));
			hashkey.xmin = HeapTupleHeaderGetXmin(new_tuple->t_data);
			hashkey.tid = old_tid;

			unresolved = hash_search(state->rs_unresolved_tups, &hashkey,
									 HASH_FIND, NULL);

			if (unresolved != NULL)
			{
				/*
				 * We have seen and memorized the previous tuple already. Now
				 * that we know where we inserted the tuple its t_ctid points
				 * to, fix its t_ctid and insert it to the new heap.
				 */
				if (free_new)
					heap_freetuple(new_tuple);
				new_tuple = unresolved->tuple;
				free_new = true;
				old_tid = unresolved->old_tid;
				new_tuple->t_data->t_ctid = new_tid;

				/*
				 * We don't need the hash entry anymore, but don't free its
				 * tuple just yet.
				 */
				hash_search(state->rs_unresolved_tups, &hashkey,
							HASH_REMOVE, &found);
				Assert(found);

				/* loop back to insert the previous tuple in the chain */
				continue;
			}
			else
			{
				/*
				 * Remember the new tid of this tuple. We'll use it to set the
				 * ctid when we find the previous tuple in the chain.
				 */
				OldToNewMapping mapping;

				mapping = hash_search(state->rs_old_new_tid_map, &hashkey,
									  HASH_ENTER, &found);
				Assert(!found);

				mapping->new_tid = new_tid;
			}
		}

		/* Done with this (chain of) tuples, for now */
		if (free_new)
			heap_freetuple(new_tuple);
		break;
	}

	MemoryContextSwitchTo(old_cxt);
}
コード例 #11
0
/*
 * Main entry point for walwriter process
 *
 * This is invoked from AuxiliaryProcessMain, which has already created the
 * basic execution environment, but not enabled signals yet.
 */
void
WalWriterMain(void)
{
	sigjmp_buf	local_sigjmp_buf;
	MemoryContext walwriter_context;
	int			left_till_hibernate;
	bool		hibernating;

	/*
	 * Properly accept or ignore signals the postmaster might send us
	 *
	 * We have no particular use for SIGINT at the moment, but seems
	 * reasonable to treat like SIGTERM.
	 */
	pqsignal(SIGHUP, WalSigHupHandler); /* set flag to read config file */
	pqsignal(SIGINT, WalShutdownHandler);		/* request shutdown */
	pqsignal(SIGTERM, WalShutdownHandler);		/* request shutdown */
	pqsignal(SIGQUIT, wal_quickdie);	/* hard crash time */
	pqsignal(SIGALRM, SIG_IGN);
	pqsignal(SIGPIPE, SIG_IGN);
	pqsignal(SIGUSR1, walwriter_sigusr1_handler);
	pqsignal(SIGUSR2, SIG_IGN); /* not used */

	/*
	 * Reset some signals that are accepted by postmaster but not here
	 */
	pqsignal(SIGCHLD, SIG_DFL);
	pqsignal(SIGTTIN, SIG_DFL);
	pqsignal(SIGTTOU, SIG_DFL);
	pqsignal(SIGCONT, SIG_DFL);
	pqsignal(SIGWINCH, SIG_DFL);

	/* We allow SIGQUIT (quickdie) at all times */
	sigdelset(&BlockSig, SIGQUIT);

	/*
	 * Create a resource owner to keep track of our resources (not clear that
	 * we need this, but may as well have one).
	 */
	CurrentResourceOwner = ResourceOwnerCreate(NULL, "Wal Writer");

	/*
	 * Create a memory context that we will do all our work in.  We do this so
	 * that we can reset the context during error recovery and thereby avoid
	 * possible memory leaks.  Formerly this code just ran in
	 * TopMemoryContext, but resetting that would be a really bad idea.
	 */
	walwriter_context = AllocSetContextCreate(TopMemoryContext,
											  "Wal Writer",
											  ALLOCSET_DEFAULT_MINSIZE,
											  ALLOCSET_DEFAULT_INITSIZE,
											  ALLOCSET_DEFAULT_MAXSIZE);
	MemoryContextSwitchTo(walwriter_context);

	/*
	 * If an exception is encountered, processing resumes here.
	 *
	 * This code is heavily based on bgwriter.c, q.v.
	 */
	if (sigsetjmp(local_sigjmp_buf, 1) != 0)
	{
		/* Since not using PG_TRY, must reset error stack by hand */
		error_context_stack = NULL;

		/* Prevent interrupts while cleaning up */
		HOLD_INTERRUPTS();

		/* Report the error to the server log */
		EmitErrorReport();

		/*
		 * These operations are really just a minimal subset of
		 * AbortTransaction().  We don't have very many resources to worry
		 * about in walwriter, but we do have LWLocks, and perhaps buffers?
		 */
		LWLockReleaseAll();
		AbortBufferIO();
		UnlockBuffers();
		/* buffer pins are released here: */
		ResourceOwnerRelease(CurrentResourceOwner,
							 RESOURCE_RELEASE_BEFORE_LOCKS,
							 false, true);
		/* we needn't bother with the other ResourceOwnerRelease phases */
		AtEOXact_Buffers(false);
		AtEOXact_SMgr();
		AtEOXact_Files();
		AtEOXact_HashTables(false);

		/*
		 * Now return to normal top-level context and clear ErrorContext for
		 * next time.
		 */
		MemoryContextSwitchTo(walwriter_context);
		FlushErrorState();

		/* Flush any leaked data in the top-level context */
		MemoryContextResetAndDeleteChildren(walwriter_context);

		/* Now we can allow interrupts again */
		RESUME_INTERRUPTS();

		/*
		 * Sleep at least 1 second after any error.  A write error is likely
		 * to be repeated, and we don't want to be filling the error logs as
		 * fast as we can.
		 */
		pg_usleep(1000000L);

		/*
		 * Close all open files after any error.  This is helpful on Windows,
		 * where holding deleted files open causes various strange errors.
		 * It's not clear we need it elsewhere, but shouldn't hurt.
		 */
		smgrcloseall();
	}

	/* We can now handle ereport(ERROR) */
	PG_exception_stack = &local_sigjmp_buf;

	/*
	 * Unblock signals (they were blocked when the postmaster forked us)
	 */
	PG_SETMASK(&UnBlockSig);

	/*
	 * Reset hibernation state after any error.
	 */
	left_till_hibernate = LOOPS_UNTIL_HIBERNATE;
	hibernating = false;
	SetWalWriterSleeping(false);

	/*
	 * Advertise our latch that backends can use to wake us up while we're
	 * sleeping.
	 */
	ProcGlobal->walwriterLatch = &MyProc->procLatch;

	/*
	 * Loop forever
	 */
	for (;;)
	{
		long		cur_timeout;
		int			rc;

		/*
		 * Advertise whether we might hibernate in this cycle.  We do this
		 * before resetting the latch to ensure that any async commits will
		 * see the flag set if they might possibly need to wake us up, and
		 * that we won't miss any signal they send us.  (If we discover work
		 * to do in the last cycle before we would hibernate, the global flag
		 * will be set unnecessarily, but little harm is done.)  But avoid
		 * touching the global flag if it doesn't need to change.
		 */
		if (hibernating != (left_till_hibernate <= 1))
		{
			hibernating = (left_till_hibernate <= 1);
			SetWalWriterSleeping(hibernating);
		}

		/* Clear any already-pending wakeups */
		ResetLatch(MyLatch);

		/*
		 * Process any requests or signals received recently.
		 */
		if (got_SIGHUP)
		{
			got_SIGHUP = false;
			ProcessConfigFile(PGC_SIGHUP);
		}
		if (shutdown_requested)
		{
			/* Normal exit from the walwriter is here */
			proc_exit(0);		/* done */
		}

		/*
		 * Do what we're here for; then, if XLogBackgroundFlush() found useful
		 * work to do, reset hibernation counter.
		 */
		if (XLogBackgroundFlush())
			left_till_hibernate = LOOPS_UNTIL_HIBERNATE;
		else if (left_till_hibernate > 0)
			left_till_hibernate--;

		/*
		 * Sleep until we are signaled or WalWriterDelay has elapsed.  If we
		 * haven't done anything useful for quite some time, lengthen the
		 * sleep time so as to reduce the server's idle power consumption.
		 */
		if (left_till_hibernate > 0)
			cur_timeout = WalWriterDelay;		/* in ms */
		else
			cur_timeout = WalWriterDelay * HIBERNATE_FACTOR;

		rc = WaitLatch(MyLatch,
					   WL_LATCH_SET | WL_TIMEOUT | WL_POSTMASTER_DEATH,
					   cur_timeout);

		/*
		 * Emergency bailout if postmaster has died.  This is to avoid the
		 * necessity for manual cleanup of all postmaster children.
		 */
		if (rc & WL_POSTMASTER_DEATH)
			exit(1);
	}
}
コード例 #12
0
ファイル: execWorkfile.c プロジェクト: AnLingm/gpdb
/*
 * Opens an existing work file with the specified name, the file type,
 * and the compression type.
 *
 * If this fails, NULL is returned.
 */
ExecWorkFile *
ExecWorkFile_Open(const char *fileName,
					ExecWorkFileType fileType,
					bool delOnClose,
					int compressType)
{
	ExecWorkFile *workfile = NULL;
	void *file = NULL;
	int64 file_size = 0;

	/*
	 * Create ExecWorkFile in the TopMemoryContext since this memory context
	 * is still available when calling the transaction callback at the
	 * time when the transaction aborts.
	 */
	MemoryContext oldContext = MemoryContextSwitchTo(TopMemoryContext);

	switch(fileType)
	{
		case BUFFILE:
			file = (void *)BufFileOpenFile(fileName,
					false, /* Create */
					delOnClose,
					true  /* interXact */ );
			if (!file)
			{
				elog(ERROR, "could not open temporary file \"%s\": %m", fileName);
			}
			BufFileSetWorkfile(file);
			file_size = BufFileGetSize(file);

			break;
		case BFZ:
			file = (void *)bfz_open(fileName, delOnClose, compressType);
			if (!file)
			{
				elog(ERROR, "could not open temporary file \"%s\": %m", fileName);
			}
			file_size = bfz_totalbytes((bfz_t *)file);
			break;
		default:
			ereport(LOG,
					(errcode(ERRCODE_INTERNAL_ERROR),
					 errmsg("invalid work file type: %d", fileType)));
			Assert(false);
	}

	/* Failed opening existing workfile. Inform the caller */
	if (NULL == file)
	{
		MemoryContextSwitchTo(oldContext);
		return NULL;
	}

	workfile = palloc0(sizeof(ExecWorkFile));

	workfile->fileType = fileType;
	workfile->compressType = compressType;
	workfile->file = file;
	workfile->fileName = pstrdup(fileName);
	workfile->size = file_size;
	ExecWorkFile_SetFlags(workfile, delOnClose, false /* created */);

	MemoryContextSwitchTo(oldContext);

	return workfile;
}
コード例 #13
0
ファイル: checkpointer.c プロジェクト: pguyot/postgres
/*
 * Main entry point for checkpointer process
 *
 * This is invoked from BootstrapMain, which has already created the basic
 * execution environment, but not enabled signals yet.
 */
void
CheckpointerMain(void)
{
	sigjmp_buf	local_sigjmp_buf;
	MemoryContext checkpointer_context;

	BgWriterShmem->checkpointer_pid = MyProcPid;
	am_checkpointer = true;

	/*
	 * If possible, make this process a group leader, so that the postmaster
	 * can signal any child processes too.	(checkpointer probably never has any
	 * child processes, but for consistency we make all postmaster child
	 * processes do this.)
	 */
#ifdef HAVE_SETSID
	if (setsid() < 0)
		elog(FATAL, "setsid() failed: %m");
#endif

	/*
	 * Properly accept or ignore signals the postmaster might send us
	 *
	 * Note: we deliberately ignore SIGTERM, because during a standard Unix
	 * system shutdown cycle, init will SIGTERM all processes at once.	We
	 * want to wait for the backends to exit, whereupon the postmaster will
	 * tell us it's okay to shut down (via SIGUSR2).
	 *
	 * SIGUSR1 is presently unused; keep it spare in case someday we want this
	 * process to participate in ProcSignal signalling.
	 */
	pqsignal(SIGHUP, ChkptSigHupHandler);	/* set flag to read config file */
	pqsignal(SIGINT, ReqCheckpointHandler);	/* request checkpoint */
	pqsignal(SIGTERM, SIG_IGN);				/* ignore SIGTERM */
	pqsignal(SIGQUIT, chkpt_quickdie);		/* hard crash time */
	pqsignal(SIGALRM, SIG_IGN);
	pqsignal(SIGPIPE, SIG_IGN);
	pqsignal(SIGUSR1, SIG_IGN); /* reserve for ProcSignal */
	pqsignal(SIGUSR2, ReqShutdownHandler);		/* request shutdown */

	/*
	 * Reset some signals that are accepted by postmaster but not here
	 */
	pqsignal(SIGCHLD, SIG_DFL);
	pqsignal(SIGTTIN, SIG_DFL);
	pqsignal(SIGTTOU, SIG_DFL);
	pqsignal(SIGCONT, SIG_DFL);
	pqsignal(SIGWINCH, SIG_DFL);

	/* We allow SIGQUIT (quickdie) at all times */
	sigdelset(&BlockSig, SIGQUIT);

	/*
	 * Initialize so that first time-driven event happens at the correct time.
	 */
	last_checkpoint_time = last_xlog_switch_time = (pg_time_t) time(NULL);

	/*
	 * Create a resource owner to keep track of our resources (currently only
	 * buffer pins).
	 */
	CurrentResourceOwner = ResourceOwnerCreate(NULL, "Checkpointer");

	/*
	 * Create a memory context that we will do all our work in.  We do this so
	 * that we can reset the context during error recovery and thereby avoid
	 * possible memory leaks.  Formerly this code just ran in
	 * TopMemoryContext, but resetting that would be a really bad idea.
	 */
	checkpointer_context = AllocSetContextCreate(TopMemoryContext,
											 "Checkpointer",
											 ALLOCSET_DEFAULT_MINSIZE,
											 ALLOCSET_DEFAULT_INITSIZE,
											 ALLOCSET_DEFAULT_MAXSIZE);
	MemoryContextSwitchTo(checkpointer_context);

	/*
	 * If an exception is encountered, processing resumes here.
	 *
	 * See notes in postgres.c about the design of this coding.
	 */
	if (sigsetjmp(local_sigjmp_buf, 1) != 0)
	{
		/* Since not using PG_TRY, must reset error stack by hand */
		error_context_stack = NULL;

		/* Prevent interrupts while cleaning up */
		HOLD_INTERRUPTS();

		/* Report the error to the server log */
		EmitErrorReport();

		/*
		 * These operations are really just a minimal subset of
		 * AbortTransaction().	We don't have very many resources to worry
		 * about in checkpointer, but we do have LWLocks, buffers, and temp files.
		 */
		LWLockReleaseAll();
		AbortBufferIO();
		UnlockBuffers();
		/* buffer pins are released here: */
		ResourceOwnerRelease(CurrentResourceOwner,
							 RESOURCE_RELEASE_BEFORE_LOCKS,
							 false, true);
		/* we needn't bother with the other ResourceOwnerRelease phases */
		AtEOXact_Buffers(false);
		AtEOXact_Files();
		AtEOXact_HashTables(false);

		/* Warn any waiting backends that the checkpoint failed. */
		if (ckpt_active)
		{
			/* use volatile pointer to prevent code rearrangement */
			volatile BgWriterShmemStruct *bgs = BgWriterShmem;

			SpinLockAcquire(&bgs->ckpt_lck);
			bgs->ckpt_failed++;
			bgs->ckpt_done = bgs->ckpt_started;
			SpinLockRelease(&bgs->ckpt_lck);

			ckpt_active = false;
		}

		/*
		 * Now return to normal top-level context and clear ErrorContext for
		 * next time.
		 */
		MemoryContextSwitchTo(checkpointer_context);
		FlushErrorState();

		/* Flush any leaked data in the top-level context */
		MemoryContextResetAndDeleteChildren(checkpointer_context);

		/* Now we can allow interrupts again */
		RESUME_INTERRUPTS();

		/*
		 * Sleep at least 1 second after any error.  A write error is likely
		 * to be repeated, and we don't want to be filling the error logs as
		 * fast as we can.
		 */
		pg_usleep(1000000L);

		/*
		 * Close all open files after any error.  This is helpful on Windows,
		 * where holding deleted files open causes various strange errors.
		 * It's not clear we need it elsewhere, but shouldn't hurt.
		 */
		smgrcloseall();
	}

	/* We can now handle ereport(ERROR) */
	PG_exception_stack = &local_sigjmp_buf;

	/*
	 * Unblock signals (they were blocked when the postmaster forked us)
	 */
	PG_SETMASK(&UnBlockSig);

	/*
	 * Use the recovery target timeline ID during recovery
	 */
	if (RecoveryInProgress())
		ThisTimeLineID = GetRecoveryTargetTLI();

	/* Do this once before starting the loop, then just at SIGHUP time. */
	SyncRepUpdateSyncStandbysDefined();

	/*
	 * Loop forever
	 */
	for (;;)
	{
		bool		do_checkpoint = false;
		int			flags = 0;
		pg_time_t	now;
		int			elapsed_secs;

		/*
		 * Emergency bailout if postmaster has died.  This is to avoid the
		 * necessity for manual cleanup of all postmaster children.
		 */
		if (!PostmasterIsAlive())
			exit(1);

		/*
		 * Process any requests or signals received recently.
		 */
		AbsorbFsyncRequests();

		if (got_SIGHUP)
		{
			got_SIGHUP = false;
			ProcessConfigFile(PGC_SIGHUP);
			/* update global shmem state for sync rep */
			SyncRepUpdateSyncStandbysDefined();
		}
		if (checkpoint_requested)
		{
			checkpoint_requested = false;
			do_checkpoint = true;
			BgWriterStats.m_requested_checkpoints++;
		}
		if (shutdown_requested)
		{
			/*
			 * From here on, elog(ERROR) should end with exit(1), not send
			 * control back to the sigsetjmp block above
			 */
			ExitOnAnyError = true;
			/* Close down the database */
			ShutdownXLOG(0, 0);
			/* Normal exit from the checkpointer is here */
			proc_exit(0);		/* done */
		}

		/*
		 * Force a checkpoint if too much time has elapsed since the last one.
		 * Note that we count a timed checkpoint in stats only when this
		 * occurs without an external request, but we set the CAUSE_TIME flag
		 * bit even if there is also an external request.
		 */
		now = (pg_time_t) time(NULL);
		elapsed_secs = now - last_checkpoint_time;
		if (elapsed_secs >= CheckPointTimeout)
		{
			if (!do_checkpoint)
				BgWriterStats.m_timed_checkpoints++;
			do_checkpoint = true;
			flags |= CHECKPOINT_CAUSE_TIME;
		}

		/*
		 * Do a checkpoint if requested.
		 */
		if (do_checkpoint)
		{
			bool		ckpt_performed = false;
			bool		do_restartpoint;

			/* use volatile pointer to prevent code rearrangement */
			volatile BgWriterShmemStruct *bgs = BgWriterShmem;

			/*
			 * Check if we should perform a checkpoint or a restartpoint. As a
			 * side-effect, RecoveryInProgress() initializes TimeLineID if
			 * it's not set yet.
			 */
			do_restartpoint = RecoveryInProgress();

			/*
			 * Atomically fetch the request flags to figure out what kind of a
			 * checkpoint we should perform, and increase the started-counter
			 * to acknowledge that we've started a new checkpoint.
			 */
			SpinLockAcquire(&bgs->ckpt_lck);
			flags |= bgs->ckpt_flags;
			bgs->ckpt_flags = 0;
			bgs->ckpt_started++;
			SpinLockRelease(&bgs->ckpt_lck);

			/*
			 * The end-of-recovery checkpoint is a real checkpoint that's
			 * performed while we're still in recovery.
			 */
			if (flags & CHECKPOINT_END_OF_RECOVERY)
				do_restartpoint = false;

			/*
			 * We will warn if (a) too soon since last checkpoint (whatever
			 * caused it) and (b) somebody set the CHECKPOINT_CAUSE_XLOG flag
			 * since the last checkpoint start.  Note in particular that this
			 * implementation will not generate warnings caused by
			 * CheckPointTimeout < CheckPointWarning.
			 */
			if (!do_restartpoint &&
				(flags & CHECKPOINT_CAUSE_XLOG) &&
				elapsed_secs < CheckPointWarning)
				ereport(LOG,
						(errmsg_plural("checkpoints are occurring too frequently (%d second apart)",
				"checkpoints are occurring too frequently (%d seconds apart)",
									   elapsed_secs,
									   elapsed_secs),
						 errhint("Consider increasing the configuration parameter \"checkpoint_segments\".")));

			/*
			 * Initialize checkpointer-private variables used during checkpoint.
			 */
			ckpt_active = true;
			if (!do_restartpoint)
				ckpt_start_recptr = GetInsertRecPtr();
			ckpt_start_time = now;
			ckpt_cached_elapsed = 0;

			/*
			 * Do the checkpoint.
			 */
			if (!do_restartpoint)
			{
				CreateCheckPoint(flags);
				ckpt_performed = true;
			}
			else
				ckpt_performed = CreateRestartPoint(flags);

			/*
			 * After any checkpoint, close all smgr files.	This is so we
			 * won't hang onto smgr references to deleted files indefinitely.
			 */
			smgrcloseall();

			/*
			 * Indicate checkpoint completion to any waiting backends.
			 */
			SpinLockAcquire(&bgs->ckpt_lck);
			bgs->ckpt_done = bgs->ckpt_started;
			SpinLockRelease(&bgs->ckpt_lck);

			if (ckpt_performed)
			{
				/*
				 * Note we record the checkpoint start time not end time as
				 * last_checkpoint_time.  This is so that time-driven
				 * checkpoints happen at a predictable spacing.
				 */
				last_checkpoint_time = now;
			}
			else
			{
				/*
				 * We were not able to perform the restartpoint (checkpoints
				 * throw an ERROR in case of error).  Most likely because we
				 * have not received any new checkpoint WAL records since the
				 * last restartpoint. Try again in 15 s.
				 */
				last_checkpoint_time = now - CheckPointTimeout + 15;
			}

			ckpt_active = false;
		}

		/*
		 * Send off activity statistics to the stats collector
		 */
		pgstat_send_bgwriter();

		/*
		 * Nap for a while and then loop again. Later patches will replace
		 * this with a latch loop. Keep it simple now for clarity.
		 * Relatively long sleep because the bgwriter does cleanup now.
		 */
		pg_usleep(500000L);

		/* Check for archive_timeout and switch xlog files if necessary. */
		CheckArchiveTimeout();
	}
}
コード例 #14
0
ファイル: portalcmds.c プロジェクト: dreamsxin/postgresql-1
/*
 * PerformCursorOpen
 *		Execute SQL DECLARE CURSOR command.
 */
void
PerformCursorOpen(DeclareCursorStmt *cstmt, ParamListInfo params,
				  const char *queryString, bool isTopLevel)
{
	Query	   *query = castNode(Query, cstmt->query);
	List	   *rewritten;
	PlannedStmt *plan;
	Portal		portal;
	MemoryContext oldContext;

	/*
	 * Disallow empty-string cursor name (conflicts with protocol-level
	 * unnamed portal).
	 */
	if (!cstmt->portalname || cstmt->portalname[0] == '\0')
		ereport(ERROR,
				(errcode(ERRCODE_INVALID_CURSOR_NAME),
				 errmsg("invalid cursor name: must not be empty")));

	/*
	 * If this is a non-holdable cursor, we require that this statement has
	 * been executed inside a transaction block (or else, it would have no
	 * user-visible effect).
	 */
	if (!(cstmt->options & CURSOR_OPT_HOLD))
		RequireTransactionChain(isTopLevel, "DECLARE CURSOR");

	/*
	 * Parse analysis was done already, but we still have to run the rule
	 * rewriter.  We do not do AcquireRewriteLocks: we assume the query either
	 * came straight from the parser, or suitable locks were acquired by
	 * plancache.c.
	 *
	 * Because the rewriter and planner tend to scribble on the input, we make
	 * a preliminary copy of the source querytree.  This prevents problems in
	 * the case that the DECLARE CURSOR is in a portal or plpgsql function and
	 * is executed repeatedly.  (See also the same hack in EXPLAIN and
	 * PREPARE.)  XXX FIXME someday.
	 */
	rewritten = QueryRewrite((Query *) copyObject(query));

	/* SELECT should never rewrite to more or less than one query */
	if (list_length(rewritten) != 1)
		elog(ERROR, "non-SELECT statement in DECLARE CURSOR");

	query = castNode(Query, linitial(rewritten));

	if (query->commandType != CMD_SELECT)
		elog(ERROR, "non-SELECT statement in DECLARE CURSOR");

	/* Plan the query, applying the specified options */
	plan = pg_plan_query(query, cstmt->options, params);

	/*
	 * Create a portal and copy the plan and queryString into its memory.
	 */
	portal = CreatePortal(cstmt->portalname, false, false);

	oldContext = MemoryContextSwitchTo(PortalGetHeapMemory(portal));

	plan = copyObject(plan);

	queryString = pstrdup(queryString);

	PortalDefineQuery(portal,
					  NULL,
					  queryString,
					  "SELECT", /* cursor's query is always a SELECT */
					  list_make1(plan),
					  NULL);

	/*----------
	 * Also copy the outer portal's parameter list into the inner portal's
	 * memory context.  We want to pass down the parameter values in case we
	 * had a command like
	 *		DECLARE c CURSOR FOR SELECT ... WHERE foo = $1
	 * This will have been parsed using the outer parameter set and the
	 * parameter value needs to be preserved for use when the cursor is
	 * executed.
	 *----------
	 */
	params = copyParamList(params);

	MemoryContextSwitchTo(oldContext);

	/*
	 * Set up options for portal.
	 *
	 * If the user didn't specify a SCROLL type, allow or disallow scrolling
	 * based on whether it would require any additional runtime overhead to do
	 * so.  Also, we disallow scrolling for FOR UPDATE cursors.
	 */
	portal->cursorOptions = cstmt->options;
	if (!(portal->cursorOptions & (CURSOR_OPT_SCROLL | CURSOR_OPT_NO_SCROLL)))
	{
		if (plan->rowMarks == NIL &&
			ExecSupportsBackwardScan(plan->planTree))
			portal->cursorOptions |= CURSOR_OPT_SCROLL;
		else
			portal->cursorOptions |= CURSOR_OPT_NO_SCROLL;
	}

	/*
	 * Start execution, inserting parameters if any.
	 */
	PortalStart(portal, params, 0, GetActiveSnapshot());

	Assert(portal->strategy == PORTAL_ONE_SELECT);

	/*
	 * We're done; the query won't actually be run until PerformPortalFetch is
	 * called.
	 */
}
コード例 #15
0
ファイル: brin.c プロジェクト: JiannengSun/postgres
/*
 * Execute the index scan.
 *
 * This works by reading index TIDs from the revmap, and obtaining the index
 * tuples pointed to by them; the summary values in the index tuples are
 * compared to the scan keys.  We return into the TID bitmap all the pages in
 * ranges corresponding to index tuples that match the scan keys.
 *
 * If a TID from the revmap is read as InvalidTID, we know that range is
 * unsummarized.  Pages in those ranges need to be returned regardless of scan
 * keys.
 *
 * XXX see _bt_first on what to do about sk_subtype.
 */
Datum
bringetbitmap(PG_FUNCTION_ARGS)
{
	IndexScanDesc scan = (IndexScanDesc) PG_GETARG_POINTER(0);
	TIDBitmap  *tbm = (TIDBitmap *) PG_GETARG_POINTER(1);
	Relation	idxRel = scan->indexRelation;
	Buffer		buf = InvalidBuffer;
	BrinDesc   *bdesc;
	Oid			heapOid;
	Relation	heapRel;
	BrinOpaque *opaque;
	BlockNumber nblocks;
	BlockNumber heapBlk;
	int			totalpages = 0;
	int			keyno;
	FmgrInfo   *consistentFn;
	MemoryContext oldcxt;
	MemoryContext perRangeCxt;

	opaque = (BrinOpaque *) scan->opaque;
	bdesc = opaque->bo_bdesc;
	pgstat_count_index_scan(idxRel);

	/*
	 * We need to know the size of the table so that we know how long to
	 * iterate on the revmap.
	 */
	heapOid = IndexGetRelation(RelationGetRelid(idxRel), false);
	heapRel = heap_open(heapOid, AccessShareLock);
	nblocks = RelationGetNumberOfBlocks(heapRel);
	heap_close(heapRel, AccessShareLock);

	/*
	 * Obtain consistent functions for all indexed column.  Maybe it'd be
	 * possible to do this lazily only the first time we see a scan key that
	 * involves each particular attribute.
	 */
	consistentFn = palloc(sizeof(FmgrInfo) * bdesc->bd_tupdesc->natts);
	for (keyno = 0; keyno < bdesc->bd_tupdesc->natts; keyno++)
	{
		FmgrInfo   *tmp;

		tmp = index_getprocinfo(idxRel, keyno + 1, BRIN_PROCNUM_CONSISTENT);
		fmgr_info_copy(&consistentFn[keyno], tmp, CurrentMemoryContext);
	}

	/*
	 * Setup and use a per-range memory context, which is reset every time we
	 * loop below.  This avoids having to free the tuples within the loop.
	 */
	perRangeCxt = AllocSetContextCreate(CurrentMemoryContext,
										"bringetbitmap cxt",
										ALLOCSET_DEFAULT_MINSIZE,
										ALLOCSET_DEFAULT_INITSIZE,
										ALLOCSET_DEFAULT_MAXSIZE);
	oldcxt = MemoryContextSwitchTo(perRangeCxt);

	/*
	 * Now scan the revmap.  We start by querying for heap page 0,
	 * incrementing by the number of pages per range; this gives us a full
	 * view of the table.
	 */
	for (heapBlk = 0; heapBlk < nblocks; heapBlk += opaque->bo_pagesPerRange)
	{
		bool		addrange;
		BrinTuple  *tup;
		OffsetNumber off;
		Size		size;

		CHECK_FOR_INTERRUPTS();

		MemoryContextResetAndDeleteChildren(perRangeCxt);

		tup = brinGetTupleForHeapBlock(opaque->bo_rmAccess, heapBlk, &buf,
									   &off, &size, BUFFER_LOCK_SHARE);
		if (tup)
		{
			tup = brin_copy_tuple(tup, size);
			LockBuffer(buf, BUFFER_LOCK_UNLOCK);
		}

		/*
		 * For page ranges with no indexed tuple, we must return the whole
		 * range; otherwise, compare it to the scan keys.
		 */
		if (tup == NULL)
		{
			addrange = true;
		}
		else
		{
			BrinMemTuple *dtup;
			int			keyno;

			dtup = brin_deform_tuple(bdesc, tup);
			if (dtup->bt_placeholder)
			{
				/*
				 * Placeholder tuples are always returned, regardless of the
				 * values stored in them.
				 */
				addrange = true;
			}
			else
			{
				/*
				 * Compare scan keys with summary values stored for the range.
				 * If scan keys are matched, the page range must be added to
				 * the bitmap.  We initially assume the range needs to be
				 * added; in particular this serves the case where there are
				 * no keys.
				 */
				addrange = true;
				for (keyno = 0; keyno < scan->numberOfKeys; keyno++)
				{
					ScanKey		key = &scan->keyData[keyno];
					AttrNumber	keyattno = key->sk_attno;
					BrinValues *bval = &dtup->bt_columns[keyattno - 1];
					Datum		add;

					/*
					 * The collation of the scan key must match the collation
					 * used in the index column (but only if the search is not
					 * IS NULL/ IS NOT NULL).  Otherwise we shouldn't be using
					 * this index ...
					 */
					Assert((key->sk_flags & SK_ISNULL) ||
						   (key->sk_collation ==
					   bdesc->bd_tupdesc->attrs[keyattno - 1]->attcollation));

					/*
					 * Check whether the scan key is consistent with the page
					 * range values; if so, have the pages in the range added
					 * to the output bitmap.
					 *
					 * When there are multiple scan keys, failure to meet the
					 * criteria for a single one of them is enough to discard
					 * the range as a whole, so break out of the loop as soon
					 * as a false return value is obtained.
					 */
					add = FunctionCall3Coll(&consistentFn[keyattno - 1],
											key->sk_collation,
											PointerGetDatum(bdesc),
											PointerGetDatum(bval),
											PointerGetDatum(key));
					addrange = DatumGetBool(add);
					if (!addrange)
						break;
				}
			}
		}

		/* add the pages in the range to the output bitmap, if needed */
		if (addrange)
		{
			BlockNumber pageno;

			for (pageno = heapBlk;
				 pageno <= heapBlk + opaque->bo_pagesPerRange - 1;
				 pageno++)
			{
				MemoryContextSwitchTo(oldcxt);
				tbm_add_page(tbm, pageno);
				totalpages++;
				MemoryContextSwitchTo(perRangeCxt);
			}
		}
	}

	MemoryContextSwitchTo(oldcxt);
	MemoryContextDelete(perRangeCxt);

	if (buf != InvalidBuffer)
		ReleaseBuffer(buf);

	/*
	 * XXX We have an approximation of the number of *pages* that our scan
	 * returns, but we don't have a precise idea of the number of heap tuples
	 * involved.
	 */
	PG_RETURN_INT64(totalpages * 10);
}
コード例 #16
0
ファイル: combocid.c プロジェクト: phan-pivotal/gpdb
void
dumpSharedComboCommandId(TransactionId xmin, CommandId cmin, CommandId cmax, CommandId combocid)
{
	/*
	 * In any given segment, there are many readers, but only one writer. The
	 * combo cid file information is stored in the MyProc of the writer process,
	 * and is referenced by reader process via lockHolderProcPtr.  The writer
	 * will setup and/or dump combocids to a combo cid file when appropriate.
	 * The writer keeps track of the number of entries in the combo cid file in
	 * MyProc->combocid_map_count. Readers reference the count via
	 * lockHolderProcPtr->combocid_map_count.
	 *
	 * Since combo cid file entries are always appended to the end of a combo
	 * cid file and because there is only one writer, it is not necessary to
	 * lock the combo cid file during reading or writing. A new combo cid will
	 * not become visable to the reader until the combocid_map_count variable
	 * has been incremented.
	 */

	ComboCidEntryData entry;

	Assert(Gp_role != GP_ROLE_EXECUTE || Gp_is_writer);

	if (combocid_map == NULL)
	{
		/* This is the first time a combo cid is to be written by this writer. */
		MemoryContext oldCtx;
		char			path[MAXPGPATH];

		MyProc->combocid_map_count = 0;

		ComboCidMapName(path, gp_session_id, MyProc->pid);

		/* open our file, as appropriate: this will throw an error if the create-fails. */
		oldCtx = MemoryContextSwitchTo(TopMemoryContext);

		/*
		 * XXX: We could probably close and delete the file at the end of
		 * transaction.  We would then need to keep combocid_map_count
		 * synchronized with open files at (sub-) xact boundaries.
		 */
		combocid_map = BufFileCreateTemp_ReaderWriter(path, true, true);
		MemoryContextSwitchTo(oldCtx);
	}
	Assert(combocid_map != NULL);

	/* Seek to the end: BufFileSeek() doesn't support SEEK_END! */

	/* build our entry */
	memset(&entry, 0, sizeof(entry));
	entry.key.cmin = cmin;
	entry.key.cmax = cmax;
	entry.key.xmin = xmin;
	entry.combocid = combocid;

	/* write our entry */
	if (BufFileWrite(combocid_map, &entry, sizeof(entry)) != sizeof(entry))
	{
		elog(ERROR, "Combocid map I/O error!");
	}

	/* flush our output */
	BufFileFlush(combocid_map);

	/* Increment combocid count to make new combocid visible to Readers */
	MyProc->combocid_map_count += 1;
}
コード例 #17
0
ファイル: brin.c プロジェクト: JiannengSun/postgres
/*
 * A tuple in the heap is being inserted.  To keep a brin index up to date,
 * we need to obtain the relevant index tuple and compare its stored values
 * with those of the new tuple.  If the tuple values are not consistent with
 * the summary tuple, we need to update the index tuple.
 *
 * If the range is not currently summarized (i.e. the revmap returns NULL for
 * it), there's nothing to do.
 */
Datum
brininsert(PG_FUNCTION_ARGS)
{
	Relation	idxRel = (Relation) PG_GETARG_POINTER(0);
	Datum	   *values = (Datum *) PG_GETARG_POINTER(1);
	bool	   *nulls = (bool *) PG_GETARG_POINTER(2);
	ItemPointer heaptid = (ItemPointer) PG_GETARG_POINTER(3);

	/* we ignore the rest of our arguments */
	BlockNumber pagesPerRange;
	BrinDesc   *bdesc = NULL;
	BrinRevmap *revmap;
	Buffer		buf = InvalidBuffer;
	MemoryContext tupcxt = NULL;
	MemoryContext oldcxt = NULL;

	revmap = brinRevmapInitialize(idxRel, &pagesPerRange);

	for (;;)
	{
		bool		need_insert = false;
		OffsetNumber off;
		BrinTuple  *brtup;
		BrinMemTuple *dtup;
		BlockNumber heapBlk;
		int			keyno;
#ifdef USE_ASSERT_CHECKING
		BrinTuple  *tmptup;
		BrinMemTuple *tmpdtup;
		Size 		tmpsiz;
#endif

		CHECK_FOR_INTERRUPTS();

		heapBlk = ItemPointerGetBlockNumber(heaptid);
		/* normalize the block number to be the first block in the range */
		heapBlk = (heapBlk / pagesPerRange) * pagesPerRange;
		brtup = brinGetTupleForHeapBlock(revmap, heapBlk, &buf, &off, NULL,
										 BUFFER_LOCK_SHARE);

		/* if range is unsummarized, there's nothing to do */
		if (!brtup)
			break;

		/* First time through? */
		if (bdesc == NULL)
		{
			bdesc = brin_build_desc(idxRel);
			tupcxt = AllocSetContextCreate(CurrentMemoryContext,
										   "brininsert cxt",
										   ALLOCSET_DEFAULT_MINSIZE,
										   ALLOCSET_DEFAULT_INITSIZE,
										   ALLOCSET_DEFAULT_MAXSIZE);
			oldcxt = MemoryContextSwitchTo(tupcxt);
		}

		dtup = brin_deform_tuple(bdesc, brtup);

#ifdef USE_ASSERT_CHECKING
		{
			/*
			 * When assertions are enabled, we use this as an opportunity to
			 * test the "union" method, which would otherwise be used very
			 * rarely: first create a placeholder tuple, and addValue the
			 * value we just got into it.  Then union the existing index tuple
			 * with the updated placeholder tuple.  The tuple resulting from
			 * that union should be identical to the one resulting from the
			 * regular operation (straight addValue) below.
			 *
			 * Here we create the tuple to compare with; the actual comparison
			 * is below.
			 */
			tmptup = brin_form_placeholder_tuple(bdesc, heapBlk, &tmpsiz);
			tmpdtup = brin_deform_tuple(bdesc, tmptup);
			for (keyno = 0; keyno < bdesc->bd_tupdesc->natts; keyno++)
			{
				BrinValues *bval;
				FmgrInfo   *addValue;

				bval = &tmpdtup->bt_columns[keyno];
				addValue = index_getprocinfo(idxRel, keyno + 1,
											 BRIN_PROCNUM_ADDVALUE);
				FunctionCall4Coll(addValue,
								  idxRel->rd_indcollation[keyno],
								  PointerGetDatum(bdesc),
								  PointerGetDatum(bval),
								  values[keyno],
								  nulls[keyno]);
			}

			union_tuples(bdesc, tmpdtup, brtup);

			tmpdtup->bt_placeholder = dtup->bt_placeholder;
			tmptup = brin_form_tuple(bdesc, heapBlk, tmpdtup, &tmpsiz);
		}
#endif

		/*
		 * Compare the key values of the new tuple to the stored index values;
		 * our deformed tuple will get updated if the new tuple doesn't fit
		 * the original range (note this means we can't break out of the loop
		 * early). Make a note of whether this happens, so that we know to
		 * insert the modified tuple later.
		 */
		for (keyno = 0; keyno < bdesc->bd_tupdesc->natts; keyno++)
		{
			Datum		result;
			BrinValues *bval;
			FmgrInfo   *addValue;

			bval = &dtup->bt_columns[keyno];
			addValue = index_getprocinfo(idxRel, keyno + 1,
										 BRIN_PROCNUM_ADDVALUE);
			result = FunctionCall4Coll(addValue,
									   idxRel->rd_indcollation[keyno],
									   PointerGetDatum(bdesc),
									   PointerGetDatum(bval),
									   values[keyno],
									   nulls[keyno]);
			/* if that returned true, we need to insert the updated tuple */
			need_insert |= DatumGetBool(result);
		}

#ifdef USE_ASSERT_CHECKING
		{
			/*
			 * Now we can compare the tuple produced by the union function
			 * with the one from plain addValue.
			 */
			BrinTuple  *cmptup;
			Size		cmpsz;

			cmptup = brin_form_tuple(bdesc, heapBlk, dtup, &cmpsz);
			Assert(brin_tuples_equal(tmptup, tmpsiz, cmptup, cmpsz));
		}
#endif

		if (!need_insert)
		{
			/*
			 * The tuple is consistent with the new values, so there's nothing
			 * to do.
			 */
			LockBuffer(buf, BUFFER_LOCK_UNLOCK);
		}
		else
		{
			Page		page = BufferGetPage(buf);
			ItemId		lp = PageGetItemId(page, off);
			Size		origsz;
			BrinTuple  *origtup;
			Size		newsz;
			BrinTuple  *newtup;
			bool		samepage;

			/*
			 * Make a copy of the old tuple, so that we can compare it after
			 * re-acquiring the lock.
			 */
			origsz = ItemIdGetLength(lp);
			origtup = brin_copy_tuple(brtup, origsz);

			/*
			 * Before releasing the lock, check if we can attempt a same-page
			 * update.  Another process could insert a tuple concurrently in
			 * the same page though, so downstream we must be prepared to cope
			 * if this turns out to not be possible after all.
			 */
			newtup = brin_form_tuple(bdesc, heapBlk, dtup, &newsz);
			samepage = brin_can_do_samepage_update(buf, origsz, newsz);
			LockBuffer(buf, BUFFER_LOCK_UNLOCK);

			/*
			 * Try to update the tuple.  If this doesn't work for whatever
			 * reason, we need to restart from the top; the revmap might be
			 * pointing at a different tuple for this block now, so we need to
			 * recompute to ensure both our new heap tuple and the other
			 * inserter's are covered by the combined tuple.  It might be that
			 * we don't need to update at all.
			 */
			if (!brin_doupdate(idxRel, pagesPerRange, revmap, heapBlk,
							   buf, off, origtup, origsz, newtup, newsz,
							   samepage))
			{
				/* no luck; start over */
				MemoryContextResetAndDeleteChildren(tupcxt);
				continue;
			}
		}

		/* success! */
		break;
	}

	brinRevmapTerminate(revmap);
	if (BufferIsValid(buf))
		ReleaseBuffer(buf);
	if (bdesc != NULL)
	{
		brin_free_desc(bdesc);
		MemoryContextSwitchTo(oldcxt);
		MemoryContextDelete(tupcxt);
	}

	return BoolGetDatum(false);
}
コード例 #18
0
ファイル: combocid.c プロジェクト: phan-pivotal/gpdb
void
loadSharedComboCommandId(TransactionId xmin, CommandId combocid, CommandId *cmin, CommandId *cmax)
{
	bool		found = false;
	ComboCidEntryData entry;
	int			i;

	Assert(Gp_role == GP_ROLE_EXECUTE);
	Assert(!Gp_is_writer);
	Assert(cmin != NULL);
	Assert(cmax != NULL);

	if (lockHolderProcPtr == NULL)
	{
		/* get lockholder! */
		elog(ERROR, "loadSharedComboCommandId: NO LOCK HOLDER POINTER.");
	}

	if (combocid_map == NULL)
	{
		MemoryContext oldCtx;
		char			path[MAXPGPATH];

		ComboCidMapName(path, gp_session_id, lockHolderProcPtr->pid);
		/* open our file, as appropriate: this will throw an error if the create-fails. */
		oldCtx = MemoryContextSwitchTo(TopMemoryContext);
		combocid_map = BufFileCreateTemp_ReaderWriter(path, false, true);
		MemoryContextSwitchTo(oldCtx);
	}
	Assert(combocid_map != NULL);

	/* Seek to the beginning to start our search ? */
	if (BufFileSeek(combocid_map, 0 /* fileno */, 0 /* offset */, SEEK_SET) != 0)
	{
		elog(ERROR, "loadSharedComboCommandId: seek to beginning failed.");
	}

	/*
	 * Read this entry in ...
	 *
	 * We're going to read in the entire table, caching all occurrences of
	 * our xmin.
	 */
	for (i = 0; i < lockHolderProcPtr->combocid_map_count; i++)
	{
		if (BufFileRead(combocid_map, &entry, sizeof(ComboCidEntryData)) != sizeof(ComboCidEntryData))
		{
			elog(ERROR, "loadSharedComboCommandId: read failed I/O error.");
		}

		if (entry.key.xmin == xmin)
		{
			bool		cached = false;
			readerComboCidKeyData reader_key;
			readerComboCidEntryData *reader_entry;

			memset(&reader_key, 0, sizeof(reader_key));
			reader_key.writer_pid = lockHolderProcPtr->pid;
			reader_key.xmin = entry.key.xmin;
			reader_key.session = gp_session_id;
			reader_key.combocid = entry.combocid;

			reader_entry = (readerComboCidEntryData *)
				hash_search(readerComboHash, &reader_key, HASH_ENTER, &cached);

			if (!cached)
			{
				reader_entry->cmin = entry.key.cmin;
				reader_entry->cmax = entry.key.cmax;
			}

			/*
			 * This was our entry -- we're going to continue our scan,
			 * to pull in any additional entries for our xmin
			 */
			if (entry.combocid == combocid)
			{
				*cmin = entry.key.cmin;
				*cmax = entry.key.cmax;
				found = true;
			}
		}
	}

	if (!found)
	{
		elog(ERROR, "loadSharedComboCommandId: no combocid entry found for %u/%u", xmin, combocid);
	}
}
コード例 #19
0
ファイル: prepare.c プロジェクト: sunyangkobe/cscd43
/*
 * Implements the 'EXECUTE' utility statement.
 */
void
ExecuteQuery(ExecuteStmt *stmt, DestReceiver *dest)
{
	PreparedStatement *entry;
	char	   *query_string;
	List	   *query_list,
			   *plan_list;
	MemoryContext qcontext;
	ParamListInfo paramLI = NULL;
	EState	   *estate = NULL;
	Portal		portal;

	/* Look it up in the hash table */
	entry = FetchPreparedStatement(stmt->name, true);

	query_string = entry->query_string;
	query_list = entry->query_list;
	plan_list = entry->plan_list;
	qcontext = entry->context;

	Assert(length(query_list) == length(plan_list));

	/* Evaluate parameters, if any */
	if (entry->argtype_list != NIL)
	{
		/*
		 * Need an EState to evaluate parameters; must not delete it till
		 * end of query, in case parameters are pass-by-reference.
		 */
		estate = CreateExecutorState();
		paramLI = EvaluateParams(estate, stmt->params, entry->argtype_list);
	}

	/*
	 * Create a new portal to run the query in
	 */
	portal = CreateNewPortal();

	/*
	 * For CREATE TABLE / AS EXECUTE, make a copy of the stored query so
	 * that we can modify its destination (yech, but this has always been
	 * ugly).  For regular EXECUTE we can just use the stored query where
	 * it sits, since the executor is read-only.
	 */
	if (stmt->into)
	{
		MemoryContext oldContext;
		Query	   *query;

		oldContext = MemoryContextSwitchTo(PortalGetHeapMemory(portal));

		if (query_string)
			query_string = pstrdup(query_string);
		query_list = copyObject(query_list);
		plan_list = copyObject(plan_list);
		qcontext = PortalGetHeapMemory(portal);

		if (length(query_list) != 1)
			ereport(ERROR,
					(errcode(ERRCODE_WRONG_OBJECT_TYPE),
					 errmsg("prepared statement is not a SELECT")));
		query = (Query *) lfirst(query_list);
		if (query->commandType != CMD_SELECT)
			ereport(ERROR,
					(errcode(ERRCODE_WRONG_OBJECT_TYPE),
					 errmsg("prepared statement is not a SELECT")));
		query->into = copyObject(stmt->into);

		MemoryContextSwitchTo(oldContext);
	}

	PortalDefineQuery(portal,
					  query_string,
					  entry->commandTag,
					  query_list,
					  plan_list,
					  qcontext);

	/*
	 * Run the portal to completion.
	 */
	PortalStart(portal, paramLI);

	(void) PortalRun(portal, FETCH_ALL, dest, dest, NULL);

	PortalDrop(portal, false);

	if (estate)
		FreeExecutorState(estate);

	/* No need to pfree other memory, MemoryContext will be reset */
}
コード例 #20
0
ファイル: nodeValuesscan.c プロジェクト: markwkm/postgres
/* ----------------------------------------------------------------
 *		ValuesNext
 *
 *		This is a workhorse for ExecValuesScan
 * ----------------------------------------------------------------
 */
static TupleTableSlot *
ValuesNext(ValuesScanState *node)
{
	TupleTableSlot *slot;
	EState	   *estate;
	ExprContext *econtext;
	ScanDirection direction;
	List	   *exprlist;

	/*
	 * get information from the estate and scan state
	 */
	estate = node->ss.ps.state;
	direction = estate->es_direction;
	slot = node->ss.ss_ScanTupleSlot;
	econtext = node->rowcontext;

	/*
	 * Get the next tuple. Return NULL if no more tuples.
	 */
	if (ScanDirectionIsForward(direction))
	{
		if (node->curr_idx < node->array_len)
			node->curr_idx++;
		if (node->curr_idx < node->array_len)
			exprlist = node->exprlists[node->curr_idx];
		else
			exprlist = NIL;
	}
	else
	{
		if (node->curr_idx >= 0)
			node->curr_idx--;
		if (node->curr_idx >= 0)
			exprlist = node->exprlists[node->curr_idx];
		else
			exprlist = NIL;
	}

	/*
	 * Always clear the result slot; this is appropriate if we are at the end
	 * of the data, and if we're not, we still need it as the first step of
	 * the store-virtual-tuple protocol.  It seems wise to clear the slot
	 * before we reset the context it might have pointers into.
	 */
	ExecClearTuple(slot);

	if (exprlist)
	{
		MemoryContext oldContext;
		List	   *exprstatelist;
		Datum	   *values;
		bool	   *isnull;
		ListCell   *lc;
		int			resind;

		/*
		 * Get rid of any prior cycle's leftovers.  We use ReScanExprContext
		 * not just ResetExprContext because we want any registered shutdown
		 * callbacks to be called.
		 */
		ReScanExprContext(econtext);

		/*
		 * Build the expression eval state in the econtext's per-tuple memory.
		 * This is a tad unusual, but we want to delete the eval state again
		 * when we move to the next row, to avoid growth of memory
		 * requirements over a long values list.
		 */
		oldContext = MemoryContextSwitchTo(econtext->ecxt_per_tuple_memory);

		/*
		 * Pass NULL, not my plan node, because we don't want anything in this
		 * transient state linking into permanent state.  The only possibility
		 * is a SubPlan, and there shouldn't be any (any subselects in the
		 * VALUES list should be InitPlans).
		 */
		exprstatelist = (List *) ExecInitExpr((Expr *) exprlist, NULL);

		/* parser should have checked all sublists are the same length */
		Assert(list_length(exprstatelist) == slot->tts_tupleDescriptor->natts);

		/*
		 * Compute the expressions and build a virtual result tuple. We
		 * already did ExecClearTuple(slot).
		 */
		values = slot->tts_values;
		isnull = slot->tts_isnull;

		resind = 0;
		foreach(lc, exprstatelist)
		{
			ExprState  *estate = (ExprState *) lfirst(lc);

			values[resind] = ExecEvalExpr(estate,
										  econtext,
										  &isnull[resind],
										  NULL);
			resind++;
		}

		MemoryContextSwitchTo(oldContext);

		/*
		 * And return the virtual tuple.
		 */
		ExecStoreVirtualTuple(slot);
	}
コード例 #21
0
ファイル: misc.c プロジェクト: jarulraj/postgres-cpp
Datum
pg_tablespace_databases(PG_FUNCTION_ARGS)
{
	FuncCallContext *funcctx;
	struct dirent *de;
	ts_db_fctx *fctx;

	if (SRF_IS_FIRSTCALL())
	{
		MemoryContext oldcontext;
		Oid			tablespaceOid = PG_GETARG_OID(0);

		funcctx = SRF_FIRSTCALL_INIT();
		oldcontext = MemoryContextSwitchTo(funcctx->multi_call_memory_ctx);

		fctx = palloc(sizeof(ts_db_fctx));

		if (tablespaceOid == GLOBALTABLESPACE_OID)
		{
			fctx->dirdesc = NULL;
			ereport(WARNING,
					(errmsg("global tablespace never has databases")));
		}
		else
		{
			if (tablespaceOid == DEFAULTTABLESPACE_OID)
				fctx->location = psprintf("base");
			else
				fctx->location = psprintf("pg_tblspc/%u/%s", tablespaceOid,
										  TABLESPACE_VERSION_DIRECTORY);

			fctx->dirdesc = AllocateDir(fctx->location);

			if (!fctx->dirdesc)
			{
				/* the only expected error is ENOENT */
				if (errno != ENOENT)
					ereport(ERROR,
							(errcode_for_file_access(),
							 errmsg("could not open directory \"%s\": %m",
									fctx->location)));
				ereport(WARNING,
					  (errmsg("%u is not a tablespace OID", tablespaceOid)));
			}
		}
		funcctx->user_fctx = fctx;
		MemoryContextSwitchTo(oldcontext);
	}

	funcctx = SRF_PERCALL_SETUP();
	fctx = (ts_db_fctx *) funcctx->user_fctx;

	if (!fctx->dirdesc)			/* not a tablespace */
		SRF_RETURN_DONE(funcctx);

	while ((de = ReadDir(fctx->dirdesc, fctx->location)) != NULL)
	{
		char	   *subdir;
		DIR		   *dirdesc;
		Oid			datOid = atooid(de->d_name);

		/* this test skips . and .., but is awfully weak */
		if (!datOid)
			continue;

		/* if database subdir is empty, don't report tablespace as used */

		subdir = psprintf("%s/%s", fctx->location, de->d_name);
		dirdesc = AllocateDir(subdir);
		while ((de = ReadDir(dirdesc, subdir)) != NULL)
		{
			if (strcmp(de->d_name, ".") != 0 && strcmp(de->d_name, "..") != 0)
				break;
		}
		FreeDir(dirdesc);
		pfree(subdir);

		if (!de)
			continue;			/* indeed, nothing in it */

		SRF_RETURN_NEXT(funcctx, ObjectIdGetDatum(datOid));
	}

	FreeDir(fctx->dirdesc);
	SRF_RETURN_DONE(funcctx);
}
コード例 #22
0
ファイル: service.c プロジェクト: BALDELab/incubator-hawq
static void
ServiceListenLoop(ServiceCtrl *serviceCtrl)
{
	ServiceConfig *serviceConfig = (ServiceConfig*)serviceCtrl->serviceConfig;

	uint8 		*inputBuff;
	int			n,
				highsock = 0,
				newsockfd;
	mpp_fd_set	rset,
				rrset;

	struct sockaddr_in addr;
	socklen_t	addrlen;

	List   *connectedSockets = NIL;
	ListCell   *cell;

	Assert(TopMemoryContext != NULL);
	MemoryContextSwitchTo(TopMemoryContext);
	Assert(CurrentMemoryContext == TopMemoryContext);

	/*
	 * Setup scratch buffer.
	 */
	inputBuff = palloc(serviceConfig->requestLen);

	MPP_FD_ZERO(&rset);
	MPP_FD_SET(serviceCtrl->listenerFd, &rset);
	highsock = serviceCtrl->listenerFd + 1;

	/* we'll handle many incoming sockets but keep the sockets in blocking
	 * mode since we are dealing with very small messages.
	 */
	while(true)
	{
		struct timeval shutdownTimeout = {1,0};		// 1 second.
			// Use local variable since select modifies
			// the timeout parameter with remaining time.

		CHECK_FOR_INTERRUPTS();

		if (serviceConfig->ServiceShutdownRequested())
		{
			if (serviceConfig->ServiceShutdown != NULL)
			{
				serviceConfig->ServiceShutdown();
			}
			break;
		}

		/* no need to live on if postmaster has died */
		if (!PostmasterIsAlive(true))
		{
			if (serviceConfig->ServicePostmasterDied != NULL)
			{
				serviceConfig->ServicePostmasterDied();
			}
			else
			{
				ereport(LOG,
						(errmsg("exiting because postmaster has died")));
				proc_exit(1);
			}
		}

		memcpy(&rrset, &rset, sizeof(mpp_fd_set));

		n = select(highsock + 1, (fd_set *)&rrset, NULL, NULL, &shutdownTimeout);
		if (n == 0 || (n < 0 && errno == EINTR))
		{
			/* intr or timeout: Have we been here too long ? */
			continue;
		}

		if (n < 0)
		{
			/* this may be a little severe, but if we error on select()
			 * we'll just go ahead and blow up.  This will result in the
			 * postmaster re-spawning a new process.
			 */
			ereport(ERROR, (errcode(ERRCODE_GP_INTERCONNECTION_ERROR),
							 errmsg("'%s': error during select() call (error:%d).",
			     			        serviceConfig->title, errno)));
			break;
		}

		/* is it someone tickling our listener port? */
		if (MPP_FD_ISSET(serviceCtrl->listenerFd, &rrset))
		{
            addrlen = sizeof(addr);
			if ((newsockfd = accept(serviceCtrl->listenerFd,
				                    (struct sockaddr *) & addr,
				                    &addrlen)) < 0)
			{
				/*
				 * TODO: would be nice to read the errno and try and provide
				 * more useful info as to why this happened.
				 */
				ereport(NOTICE, (errcode(ERRCODE_GP_INTERCONNECTION_ERROR),
							 	 errmsg("'%s': error from client connection: %s)",
				     				    serviceConfig->title,
				     				    strerror(errno))));
			}

			/* make socket non-blocking BEFORE we connect. */
			if (!pg_set_noblock(newsockfd))
			{
				/*
				 * TODO: would be nice to read the errno and try and provide
				 * more useful info as to why this happened.
				 */
				ereport(NOTICE, (errcode(ERRCODE_GP_INTERCONNECTION_ERROR),
							 	 errmsg("'%s': could not set outbound socket to non-blocking mode: %s",
									    serviceConfig->title,
									    strerror(errno))));
			}

			if (newsockfd > highsock)
				highsock = newsockfd + 1;

			MPP_FD_SET(newsockfd, &rset);

			/*
			 * Read connection message.
			 */
			// UNDONE: temporarily turn off new connection flag...
			if( !ServiceProcessRequest(serviceCtrl, newsockfd, inputBuff, false))
			{
				/* close it down */
				MPP_FD_CLR( newsockfd, &rset);
				shutdown(newsockfd, SHUT_WR);
				close(newsockfd);
			}
			else
			{
				connectedSockets = lappend_int(connectedSockets, newsockfd);
			}

		}


		/* loop through all of our established sockets */
		cell = list_head(connectedSockets);
		while (cell != NULL)
		{
			int	fd = lfirst_int(cell);

			/* get the next cell ready before we delete */
			cell = lnext(cell);

			if (MPP_FD_ISSET(fd, &rrset))
			{
				if( !ServiceProcessRequest(serviceCtrl, fd, inputBuff, false))
				{
					/* close it down */
					MPP_FD_CLR( fd, &rset);
					connectedSockets = list_delete_int(connectedSockets, fd);
					shutdown(fd, SHUT_WR);
					close(fd);
				}
			}
		}
	}

	ereport(LOG,
			(errmsg("normal shutdown")));
	proc_exit(0);

}
コード例 #23
0
/*
 *	compute_tsvector_stats() -- compute statistics for a tsvector column
 *
 *	This functions computes statistics that are useful for determining @@
 *	operations' selectivity, along with the fraction of non-null rows and
 *	average width.
 *
 *	Instead of finding the most common values, as we do for most datatypes,
 *	we're looking for the most common lexemes. This is more useful, because
 *	there most probably won't be any two rows with the same tsvector and thus
 *	the notion of a MCV is a bit bogus with this datatype. With a list of the
 *	most common lexemes we can do a better job at figuring out @@ selectivity.
 *
 *	For the same reasons we assume that tsvector columns are unique when
 *	determining the number of distinct values.
 *
 *	The algorithm used is Lossy Counting, as proposed in the paper "Approximate
 *	frequency counts over data streams" by G. S. Manku and R. Motwani, in
 *	Proceedings of the 28th International Conference on Very Large Data Bases,
 *	Hong Kong, China, August 2002, section 4.2. The paper is available at
 *	http://www.vldb.org/conf/2002/S10P03.pdf
 *
 *	The Lossy Counting (aka LC) algorithm goes like this:
 *	Let s be the threshold frequency for an item (the minimum frequency we
 *	are interested in) and epsilon the error margin for the frequency. Let D
 *	be a set of triples (e, f, delta), where e is an element value, f is that
 *	element's frequency (actually, its current occurrence count) and delta is
 *	the maximum error in f. We start with D empty and process the elements in
 *	batches of size w. (The batch size is also known as "bucket size" and is
 *	equal to 1/epsilon.) Let the current batch number be b_current, starting
 *	with 1. For each element e we either increment its f count, if it's
 *	already in D, or insert a new triple into D with values (e, 1, b_current
 *	- 1). After processing each batch we prune D, by removing from it all
 *	elements with f + delta <= b_current.  After the algorithm finishes we
 *	suppress all elements from D that do not satisfy f >= (s - epsilon) * N,
 *	where N is the total number of elements in the input.  We emit the
 *	remaining elements with estimated frequency f/N.  The LC paper proves
 *	that this algorithm finds all elements with true frequency at least s,
 *	and that no frequency is overestimated or is underestimated by more than
 *	epsilon.  Furthermore, given reasonable assumptions about the input
 *	distribution, the required table size is no more than about 7 times w.
 *
 *	We set s to be the estimated frequency of the K'th word in a natural
 *	language's frequency table, where K is the target number of entries in
 *	the MCELEM array plus an arbitrary constant, meant to reflect the fact
 *	that the most common words in any language would usually be stopwords
 *	so we will not actually see them in the input.  We assume that the
 *	distribution of word frequencies (including the stopwords) follows Zipf's
 *	law with an exponent of 1.
 *
 *	Assuming Zipfian distribution, the frequency of the K'th word is equal
 *	to 1/(K * H(W)) where H(n) is 1/2 + 1/3 + ... + 1/n and W is the number of
 *	words in the language.  Putting W as one million, we get roughly 0.07/K.
 *	Assuming top 10 words are stopwords gives s = 0.07/(K + 10).  We set
 *	epsilon = s/10, which gives bucket width w = (K + 10)/0.007 and
 *	maximum expected hashtable size of about 1000 * (K + 10).
 *
 *	Note: in the above discussion, s, epsilon, and f/N are in terms of a
 *	lexeme's frequency as a fraction of all lexemes seen in the input.
 *	However, what we actually want to store in the finished pg_statistic
 *	entry is each lexeme's frequency as a fraction of all rows that it occurs
 *	in.  Assuming that the input tsvectors are correctly constructed, no
 *	lexeme occurs more than once per tsvector, so the final count f is a
 *	correct estimate of the number of input tsvectors it occurs in, and we
 *	need only change the divisor from N to nonnull_cnt to get the number we
 *	want.
 */
static void
compute_tsvector_stats(VacAttrStats *stats,
					   AnalyzeAttrFetchFunc fetchfunc,
					   int samplerows,
					   double totalrows)
{
	int			num_mcelem;
	int			null_cnt = 0;
	double		total_width = 0;

	/* This is D from the LC algorithm. */
	HTAB	   *lexemes_tab;
	HASHCTL		hash_ctl;
	HASH_SEQ_STATUS scan_status;

	/* This is the current bucket number from the LC algorithm */
	int			b_current;

	/* This is 'w' from the LC algorithm */
	int			bucket_width;
	int			vector_no,
				lexeme_no;
	LexemeHashKey hash_key;
	TrackItem  *item;

	/*
	 * We want statistics_target * 10 lexemes in the MCELEM array.  This
	 * multiplier is pretty arbitrary, but is meant to reflect the fact that
	 * the number of individual lexeme values tracked in pg_statistic ought to
	 * be more than the number of values for a simple scalar column.
	 */
	num_mcelem = stats->attr->attstattarget * 10;

	/*
	 * We set bucket width equal to (num_mcelem + 10) / 0.007 as per the
	 * comment above.
	 */
	bucket_width = (num_mcelem + 10) * 1000 / 7;

	/*
	 * Create the hashtable. It will be in local memory, so we don't need to
	 * worry about overflowing the initial size. Also we don't need to pay any
	 * attention to locking and memory management.
	 */
	MemSet(&hash_ctl, 0, sizeof(hash_ctl));
	hash_ctl.keysize = sizeof(LexemeHashKey);
	hash_ctl.entrysize = sizeof(TrackItem);
	hash_ctl.hash = lexeme_hash;
	hash_ctl.match = lexeme_match;
	hash_ctl.hcxt = CurrentMemoryContext;
	lexemes_tab = hash_create("Analyzed lexemes table",
							  num_mcelem,
							  &hash_ctl,
					HASH_ELEM | HASH_FUNCTION | HASH_COMPARE | HASH_CONTEXT);

	/* Initialize counters. */
	b_current = 1;
	lexeme_no = 0;

	/* Loop over the tsvectors. */
	for (vector_no = 0; vector_no < samplerows; vector_no++)
	{
		Datum		value;
		bool		isnull;
		TSVector	vector;
		WordEntry  *curentryptr;
		char	   *lexemesptr;
		int			j;

		vacuum_delay_point();

		value = fetchfunc(stats, vector_no, &isnull);

		/*
		 * Check for null/nonnull.
		 */
		if (isnull)
		{
			null_cnt++;
			continue;
		}

		/*
		 * Add up widths for average-width calculation.  Since it's a
		 * tsvector, we know it's varlena.  As in the regular
		 * compute_minimal_stats function, we use the toasted width for this
		 * calculation.
		 */
		total_width += VARSIZE_ANY(DatumGetPointer(value));

		/*
		 * Now detoast the tsvector if needed.
		 */
		vector = DatumGetTSVector(value);

		/*
		 * We loop through the lexemes in the tsvector and add them to our
		 * tracking hashtable.  Note: the hashtable entries will point into
		 * the (detoasted) tsvector value, therefore we cannot free that
		 * storage until we're done.
		 */
		lexemesptr = STRPTR(vector);
		curentryptr = ARRPTR(vector);
		for (j = 0; j < vector->size; j++)
		{
			bool		found;

			/* Construct a hash key */
			hash_key.lexeme = lexemesptr + curentryptr->pos;
			hash_key.length = curentryptr->len;

			/* Lookup current lexeme in hashtable, adding it if new */
			item = (TrackItem *) hash_search(lexemes_tab,
											 (const void *) &hash_key,
											 HASH_ENTER, &found);

			if (found)
			{
				/* The lexeme is already on the tracking list */
				item->frequency++;
			}
			else
			{
				/* Initialize new tracking list element */
				item->frequency = 1;
				item->delta = b_current - 1;
			}

			/* lexeme_no is the number of elements processed (ie N) */
			lexeme_no++;

			/* We prune the D structure after processing each bucket */
			if (lexeme_no % bucket_width == 0)
			{
				prune_lexemes_hashtable(lexemes_tab, b_current);
				b_current++;
			}

			/* Advance to the next WordEntry in the tsvector */
			curentryptr++;
		}
	}

	/* We can only compute real stats if we found some non-null values. */
	if (null_cnt < samplerows)
	{
		int			nonnull_cnt = samplerows - null_cnt;
		int			i;
		TrackItem **sort_table;
		int			track_len;
		int			cutoff_freq;
		int			minfreq,
					maxfreq;

		stats->stats_valid = true;
		/* Do the simple null-frac and average width stats */
		stats->stanullfrac = (double) null_cnt / (double) samplerows;
		stats->stawidth = total_width / (double) nonnull_cnt;

		/* Assume it's a unique column (see notes above) */
		stats->stadistinct = -1.0 * (1.0 - stats->stanullfrac);

		/*
		 * Construct an array of the interesting hashtable items, that is,
		 * those meeting the cutoff frequency (s - epsilon)*N.  Also identify
		 * the minimum and maximum frequencies among these items.
		 *
		 * Since epsilon = s/10 and bucket_width = 1/epsilon, the cutoff
		 * frequency is 9*N / bucket_width.
		 */
		cutoff_freq = 9 * lexeme_no / bucket_width;

		i = hash_get_num_entries(lexemes_tab);	/* surely enough space */
		sort_table = (TrackItem **) palloc(sizeof(TrackItem *) * i);

		hash_seq_init(&scan_status, lexemes_tab);
		track_len = 0;
		minfreq = lexeme_no;
		maxfreq = 0;
		while ((item = (TrackItem *) hash_seq_search(&scan_status)) != NULL)
		{
			if (item->frequency > cutoff_freq)
			{
				sort_table[track_len++] = item;
				minfreq = Min(minfreq, item->frequency);
				maxfreq = Max(maxfreq, item->frequency);
			}
		}
		Assert(track_len <= i);

		/* emit some statistics for debug purposes */
		elog(DEBUG3, "tsvector_stats: target # mces = %d, bucket width = %d, "
			 "# lexemes = %d, hashtable size = %d, usable entries = %d",
			 num_mcelem, bucket_width, lexeme_no, i, track_len);

		/*
		 * If we obtained more lexemes than we really want, get rid of those
		 * with least frequencies.  The easiest way is to qsort the array into
		 * descending frequency order and truncate the array.
		 */
		if (num_mcelem < track_len)
		{
			qsort(sort_table, track_len, sizeof(TrackItem *),
				  trackitem_compare_frequencies_desc);
			/* reset minfreq to the smallest frequency we're keeping */
			minfreq = sort_table[num_mcelem - 1]->frequency;
		}
		else
			num_mcelem = track_len;

		/* Generate MCELEM slot entry */
		if (num_mcelem > 0)
		{
			MemoryContext old_context;
			Datum	   *mcelem_values;
			float4	   *mcelem_freqs;

			/*
			 * We want to store statistics sorted on the lexeme value using
			 * first length, then byte-for-byte comparison. The reason for
			 * doing length comparison first is that we don't care about the
			 * ordering so long as it's consistent, and comparing lengths
			 * first gives us a chance to avoid a strncmp() call.
			 *
			 * This is different from what we do with scalar statistics --
			 * they get sorted on frequencies. The rationale is that we
			 * usually search through most common elements looking for a
			 * specific value, so we can grab its frequency.  When values are
			 * presorted we can employ binary search for that.  See
			 * ts_selfuncs.c for a real usage scenario.
			 */
			qsort(sort_table, num_mcelem, sizeof(TrackItem *),
				  trackitem_compare_lexemes);

			/* Must copy the target values into anl_context */
			old_context = MemoryContextSwitchTo(stats->anl_context);

			/*
			 * We sorted statistics on the lexeme value, but we want to be
			 * able to find out the minimal and maximal frequency without
			 * going through all the values.  We keep those two extra
			 * frequencies in two extra cells in mcelem_freqs.
			 *
			 * (Note: the MCELEM statistics slot definition allows for a third
			 * extra number containing the frequency of nulls, but we don't
			 * create that for a tsvector column, since null elements aren't
			 * possible.)
			 */
			mcelem_values = (Datum *) palloc(num_mcelem * sizeof(Datum));
			mcelem_freqs = (float4 *) palloc((num_mcelem + 2) * sizeof(float4));

			/*
			 * See comments above about use of nonnull_cnt as the divisor for
			 * the final frequency estimates.
			 */
			for (i = 0; i < num_mcelem; i++)
			{
				TrackItem  *item = sort_table[i];

				mcelem_values[i] =
					PointerGetDatum(cstring_to_text_with_len(item->key.lexeme,
														  item->key.length));
				mcelem_freqs[i] = (double) item->frequency / (double) nonnull_cnt;
			}
			mcelem_freqs[i++] = (double) minfreq / (double) nonnull_cnt;
			mcelem_freqs[i] = (double) maxfreq / (double) nonnull_cnt;
			MemoryContextSwitchTo(old_context);

			stats->stakind[0] = STATISTIC_KIND_MCELEM;
			stats->staop[0] = TextEqualOperator;
			stats->stanumbers[0] = mcelem_freqs;
			/* See above comment about two extra frequency fields */
			stats->numnumbers[0] = num_mcelem + 2;
			stats->stavalues[0] = mcelem_values;
			stats->numvalues[0] = num_mcelem;
			/* We are storing text values */
			stats->statypid[0] = TEXTOID;
			stats->statyplen[0] = -1;	/* typlen, -1 for varlena */
			stats->statypbyval[0] = false;
			stats->statypalign[0] = 'i';
		}
	}
	else
	{
		/* We found only nulls; assume the column is entirely null */
		stats->stats_valid = true;
		stats->stanullfrac = 1.0;
		stats->stawidth = 0;	/* "unknown" */
		stats->stadistinct = 0.0;		/* "unknown" */
	}

	/*
	 * We don't need to bother cleaning up any of our temporary palloc's. The
	 * hashtable should also go away, as it used a child memory context.
	 */
}
コード例 #24
0
ファイル: heapfuncs.c プロジェクト: hasegeli/postgres
Datum
heap_page_items(PG_FUNCTION_ARGS)
{
	bytea	   *raw_page = PG_GETARG_BYTEA_P(0);
	heap_page_items_state *inter_call_data = NULL;
	FuncCallContext *fctx;
	int			raw_page_size;

	if (!superuser())
		ereport(ERROR,
				(errcode(ERRCODE_INSUFFICIENT_PRIVILEGE),
				 (errmsg("must be superuser to use raw page functions"))));

	raw_page_size = VARSIZE(raw_page) - VARHDRSZ;

	if (SRF_IS_FIRSTCALL())
	{
		TupleDesc	tupdesc;
		MemoryContext mctx;

		if (raw_page_size < SizeOfPageHeaderData)
			ereport(ERROR,
					(errcode(ERRCODE_INVALID_PARAMETER_VALUE),
					 errmsg("input page too small (%d bytes)", raw_page_size)));

		fctx = SRF_FIRSTCALL_INIT();
		mctx = MemoryContextSwitchTo(fctx->multi_call_memory_ctx);

		inter_call_data = palloc(sizeof(heap_page_items_state));

		/* Build a tuple descriptor for our result type */
		if (get_call_result_type(fcinfo, NULL, &tupdesc) != TYPEFUNC_COMPOSITE)
			elog(ERROR, "return type must be a row type");

		inter_call_data->tupd = tupdesc;

		inter_call_data->offset = FirstOffsetNumber;
		inter_call_data->page = VARDATA(raw_page);

		fctx->max_calls = PageGetMaxOffsetNumber(inter_call_data->page);
		fctx->user_fctx = inter_call_data;

		MemoryContextSwitchTo(mctx);
	}

	fctx = SRF_PERCALL_SETUP();
	inter_call_data = fctx->user_fctx;

	if (fctx->call_cntr < fctx->max_calls)
	{
		Page		page = inter_call_data->page;
		HeapTuple	resultTuple;
		Datum		result;
		ItemId		id;
		Datum		values[14];
		bool		nulls[14];
		uint16		lp_offset;
		uint16		lp_flags;
		uint16		lp_len;

		memset(nulls, 0, sizeof(nulls));

		/* Extract information from the line pointer */

		id = PageGetItemId(page, inter_call_data->offset);

		lp_offset = ItemIdGetOffset(id);
		lp_flags = ItemIdGetFlags(id);
		lp_len = ItemIdGetLength(id);

		values[0] = UInt16GetDatum(inter_call_data->offset);
		values[1] = UInt16GetDatum(lp_offset);
		values[2] = UInt16GetDatum(lp_flags);
		values[3] = UInt16GetDatum(lp_len);

		/*
		 * We do just enough validity checking to make sure we don't reference
		 * data outside the page passed to us. The page could be corrupt in
		 * many other ways, but at least we won't crash.
		 */
		if (ItemIdHasStorage(id) &&
			lp_len >= MinHeapTupleSize &&
			lp_offset == MAXALIGN(lp_offset) &&
			lp_offset + lp_len <= raw_page_size)
		{
			HeapTupleHeader tuphdr;
			bytea	   *tuple_data_bytea;
			int			tuple_data_len;

			/* Extract information from the tuple header */

			tuphdr = (HeapTupleHeader) PageGetItem(page, id);

			values[4] = UInt32GetDatum(HeapTupleHeaderGetRawXmin(tuphdr));
			values[5] = UInt32GetDatum(HeapTupleHeaderGetRawXmax(tuphdr));
			/* shared with xvac */
			values[6] = UInt32GetDatum(HeapTupleHeaderGetRawCommandId(tuphdr));
			values[7] = PointerGetDatum(&tuphdr->t_ctid);
			values[8] = UInt32GetDatum(tuphdr->t_infomask2);
			values[9] = UInt32GetDatum(tuphdr->t_infomask);
			values[10] = UInt8GetDatum(tuphdr->t_hoff);

			/* Copy raw tuple data into bytea attribute */
			tuple_data_len = lp_len - tuphdr->t_hoff;
			tuple_data_bytea = (bytea *) palloc(tuple_data_len + VARHDRSZ);
			SET_VARSIZE(tuple_data_bytea, tuple_data_len + VARHDRSZ);
			memcpy(VARDATA(tuple_data_bytea), (char *) tuphdr + tuphdr->t_hoff,
				   tuple_data_len);
			values[13] = PointerGetDatum(tuple_data_bytea);

			/*
			 * We already checked that the item is completely within the raw
			 * page passed to us, with the length given in the line pointer.
			 * Let's check that t_hoff doesn't point over lp_len, before using
			 * it to access t_bits and oid.
			 */
			if (tuphdr->t_hoff >= SizeofHeapTupleHeader &&
				tuphdr->t_hoff <= lp_len &&
				tuphdr->t_hoff == MAXALIGN(tuphdr->t_hoff))
			{
				if (tuphdr->t_infomask & HEAP_HASNULL)
				{
					int			bits_len;

					bits_len =
						BITMAPLEN(HeapTupleHeaderGetNatts(tuphdr)) * BITS_PER_BYTE;
					values[11] = CStringGetTextDatum(
													 bits_to_text(tuphdr->t_bits, bits_len));
				}
				else
					nulls[11] = true;

				if (tuphdr->t_infomask & HEAP_HASOID_OLD)
					values[12] = HeapTupleHeaderGetOidOld(tuphdr);
				else
					nulls[12] = true;
			}
			else
			{
				nulls[11] = true;
				nulls[12] = true;
			}
		}
		else
		{
			/*
			 * The line pointer is not used, or it's invalid. Set the rest of
			 * the fields to NULL
			 */
			int			i;

			for (i = 4; i <= 13; i++)
				nulls[i] = true;
		}

		/* Build and return the result tuple. */
		resultTuple = heap_form_tuple(inter_call_data->tupd, values, nulls);
		result = HeapTupleGetDatum(resultTuple);

		inter_call_data->offset++;

		SRF_RETURN_NEXT(fctx, result);
	}
	else
		SRF_RETURN_DONE(fctx);
}
コード例 #25
0
ファイル: zdbutils.c プロジェクト: zombodb/zombodb
/*
 * Copied from Postgres' src/backend/optimizer/util/clauses.c
 *
 * evaluate_expr: pre-evaluate a constant expression
 *
 * We use the executor's routine ExecEvalExpr() to avoid duplication of
 * code and ensure we get the same result as the executor would get.
 */
Expr *
evaluate_expr(Expr *expr, Oid result_type, int32 result_typmod,
              Oid result_collation)
{
    EState	   *estate;
    ExprState  *exprstate;
    MemoryContext oldcontext;
    Datum		const_val;
    bool		const_is_null;
    int16		resultTypLen;
    bool		resultTypByVal;

    /*
     * To use the executor, we need an EState.
     */
    estate = CreateExecutorState();

    /* We can use the estate's working context to avoid memory leaks. */
    oldcontext = MemoryContextSwitchTo(estate->es_query_cxt);

    /* Make sure any opfuncids are filled in. */
    fix_opfuncids((Node *) expr);

    /*
     * Prepare expr for execution.  (Note: we can't use ExecPrepareExpr
     * because it'd result in recursively invoking eval_const_expressions.)
     */
    exprstate = ExecInitExpr(expr, NULL);

    /*
     * And evaluate it.
     *
     * It is OK to use a default econtext because none of the ExecEvalExpr()
     * code used in this situation will use econtext.  That might seem
     * fortuitous, but it's not so unreasonable --- a constant expression does
     * not depend on context, by definition, n'est ce pas?
     */
    const_val = ExecEvalExprSwitchContext(exprstate,
                                          GetPerTupleExprContext(estate),
                                          &const_is_null, NULL);

    /* Get info needed about result datatype */
    get_typlenbyval(result_type, &resultTypLen, &resultTypByVal);

    /* Get back to outer memory context */
    MemoryContextSwitchTo(oldcontext);

    /*
     * Must copy result out of sub-context used by expression eval.
     *
     * Also, if it's varlena, forcibly detoast it.  This protects us against
     * storing TOAST pointers into plans that might outlive the referenced
     * data.  (makeConst would handle detoasting anyway, but it's worth a few
     * extra lines here so that we can do the copy and detoast in one step.)
     */
    if (!const_is_null)
    {
        if (resultTypLen == -1)
            const_val = PointerGetDatum(PG_DETOAST_DATUM_COPY(const_val));
        else
            const_val = datumCopy(const_val, resultTypByVal, resultTypLen);
    }

    /* Release all the junk we just created */
    FreeExecutorState(estate);

    /*
     * Make the constant result node.
     */
    return (Expr *) makeConst(result_type, result_typmod, result_collation,
                              resultTypLen,
                              const_val, const_is_null,
                              resultTypByVal);
}
コード例 #26
0
ファイル: nodeHash.c プロジェクト: bilun167/cs3223
	 */
	hashtable->hashCxt = AllocSetContextCreate(CurrentMemoryContext,
											   "HashTableContext",
											   ALLOCSET_DEFAULT_MINSIZE,
											   ALLOCSET_DEFAULT_INITSIZE,
											   ALLOCSET_DEFAULT_MAXSIZE);

	hashtable->batchCxt = AllocSetContextCreate(hashtable->hashCxt,
												"HashBatchContext",
												ALLOCSET_DEFAULT_MINSIZE,
												ALLOCSET_DEFAULT_INITSIZE,
												ALLOCSET_DEFAULT_MAXSIZE);

	/* Allocate data that will live for the life of the hashjoin */

	oldcxt = MemoryContextSwitchTo(hashtable->hashCxt);

	if (nbatch > 1)
	{
		/*
		 * allocate and initialize the file arrays in hashCxt
		 */
		hashtable->innerBatchFile = (BufFile **)
			palloc0(nbatch * sizeof(BufFile *));
		hashtable->outerBatchFile = (BufFile **)
			palloc0(nbatch * sizeof(BufFile *));
		/* The files will not be opened until needed... */
		/* ... but make sure we have temp tablespaces established for them */
		PrepareTempTablespaces();
	}
コード例 #27
0
ファイル: xpath.c プロジェクト: kevinston/postgres
Datum
xpath_table(PG_FUNCTION_ARGS)
{
	/* Function parameters */
	char	   *pkeyfield = text_to_cstring(PG_GETARG_TEXT_PP(0));
	char	   *xmlfield = text_to_cstring(PG_GETARG_TEXT_PP(1));
	char	   *relname = text_to_cstring(PG_GETARG_TEXT_PP(2));
	char	   *xpathset = text_to_cstring(PG_GETARG_TEXT_PP(3));
	char	   *condition = text_to_cstring(PG_GETARG_TEXT_PP(4));

	/* SPI (input tuple) support */
	SPITupleTable *tuptable;
	HeapTuple	spi_tuple;
	TupleDesc	spi_tupdesc;

	/* Output tuple (tuplestore) support */
	Tuplestorestate *tupstore = NULL;
	TupleDesc	ret_tupdesc;
	HeapTuple	ret_tuple;

	ReturnSetInfo *rsinfo = (ReturnSetInfo *) fcinfo->resultinfo;
	AttInMetadata *attinmeta;
	MemoryContext per_query_ctx;
	MemoryContext oldcontext;

	char	  **values;
	xmlChar   **xpaths;
	char	   *pos;
	const char *pathsep = "|";

	int			numpaths;
	int			ret;
	int			proc;
	int			i;
	int			j;
	int			rownr;			/* For issuing multiple rows from one original
								 * document */
	bool		had_values;		/* To determine end of nodeset results */
	StringInfoData query_buf;
	PgXmlErrorContext *xmlerrcxt;
	volatile xmlDocPtr doctree = NULL;

	/* We only have a valid tuple description in table function mode */
	if (rsinfo == NULL || !IsA(rsinfo, ReturnSetInfo))
		ereport(ERROR,
				(errcode(ERRCODE_FEATURE_NOT_SUPPORTED),
				 errmsg("set-valued function called in context that cannot accept a set")));
	if (rsinfo->expectedDesc == NULL)
		ereport(ERROR,
				(errcode(ERRCODE_SYNTAX_ERROR),
				 errmsg("xpath_table must be called as a table function")));

	/*
	 * We want to materialise because it means that we don't have to carry
	 * libxml2 parser state between invocations of this function
	 */
	if (!(rsinfo->allowedModes & SFRM_Materialize))
		ereport(ERROR,
				(errcode(ERRCODE_SYNTAX_ERROR),
			   errmsg("xpath_table requires Materialize mode, but it is not "
					  "allowed in this context")));

	/*
	 * The tuplestore must exist in a higher context than this function call
	 * (per_query_ctx is used)
	 */
	per_query_ctx = rsinfo->econtext->ecxt_per_query_memory;
	oldcontext = MemoryContextSwitchTo(per_query_ctx);

	/*
	 * Create the tuplestore - work_mem is the max in-memory size before a
	 * file is created on disk to hold it.
	 */
	tupstore =
		tuplestore_begin_heap(rsinfo->allowedModes & SFRM_Materialize_Random,
							  false, work_mem);

	MemoryContextSwitchTo(oldcontext);

	/* get the requested return tuple description */
	ret_tupdesc = CreateTupleDescCopy(rsinfo->expectedDesc);

	/* must have at least one output column (for the pkey) */
	if (ret_tupdesc->natts < 1)
		ereport(ERROR,
				(errcode(ERRCODE_SYNTAX_ERROR),
				 errmsg("xpath_table must have at least one output column")));

	/*
	 * At the moment we assume that the returned attributes make sense for the
	 * XPath specififed (i.e. we trust the caller). It's not fatal if they get
	 * it wrong - the input function for the column type will raise an error
	 * if the path result can't be converted into the correct binary
	 * representation.
	 */

	attinmeta = TupleDescGetAttInMetadata(ret_tupdesc);

	/* Set return mode and allocate value space. */
	rsinfo->returnMode = SFRM_Materialize;
	rsinfo->setDesc = ret_tupdesc;

	values = (char **) palloc(ret_tupdesc->natts * sizeof(char *));
	xpaths = (xmlChar **) palloc(ret_tupdesc->natts * sizeof(xmlChar *));

	/*
	 * Split XPaths. xpathset is a writable CString.
	 *
	 * Note that we stop splitting once we've done all needed for tupdesc
	 */
	numpaths = 0;
	pos = xpathset;
	while (numpaths < (ret_tupdesc->natts - 1))
	{
		xpaths[numpaths++] = (xmlChar *) pos;
		pos = strstr(pos, pathsep);
		if (pos != NULL)
		{
			*pos = '\0';
			pos++;
		}
		else
			break;
	}

	/* Now build query */
	initStringInfo(&query_buf);

	/* Build initial sql statement */
	appendStringInfo(&query_buf, "SELECT %s, %s FROM %s WHERE %s",
					 pkeyfield,
					 xmlfield,
					 relname,
					 condition);

	if ((ret = SPI_connect()) < 0)
		elog(ERROR, "xpath_table: SPI_connect returned %d", ret);

	if ((ret = SPI_exec(query_buf.data, 0)) != SPI_OK_SELECT)
		elog(ERROR, "xpath_table: SPI execution failed for query %s",
			 query_buf.data);

	proc = SPI_processed;
	/* elog(DEBUG1,"xpath_table: SPI returned %d rows",proc); */
	tuptable = SPI_tuptable;
	spi_tupdesc = tuptable->tupdesc;

	/* Switch out of SPI context */
	MemoryContextSwitchTo(oldcontext);

	/*
	 * Check that SPI returned correct result. If you put a comma into one of
	 * the function parameters, this will catch it when the SPI query returns
	 * e.g. 3 columns.
	 */
	if (spi_tupdesc->natts != 2)
	{
		ereport(ERROR, (errcode(ERRCODE_INVALID_PARAMETER_VALUE),
						errmsg("expression returning multiple columns is not valid in parameter list"),
						errdetail("Expected two columns in SPI result, got %d.", spi_tupdesc->natts)));
	}

	/*
	 * Setup the parser.  This should happen after we are done evaluating the
	 * query, in case it calls functions that set up libxml differently.
	 */
	xmlerrcxt = pgxml_parser_init(PG_XML_STRICTNESS_LEGACY);

	PG_TRY();
	{
		/* For each row i.e. document returned from SPI */
		for (i = 0; i < proc; i++)
		{
			char	   *pkey;
			char	   *xmldoc;
			xmlXPathContextPtr ctxt;
			xmlXPathObjectPtr res;
			xmlChar    *resstr;
			xmlXPathCompExprPtr comppath;

			/* Extract the row data as C Strings */
			spi_tuple = tuptable->vals[i];
			pkey = SPI_getvalue(spi_tuple, spi_tupdesc, 1);
			xmldoc = SPI_getvalue(spi_tuple, spi_tupdesc, 2);

			/*
			 * Clear the values array, so that not-well-formed documents
			 * return NULL in all columns.	Note that this also means that
			 * spare columns will be NULL.
			 */
			for (j = 0; j < ret_tupdesc->natts; j++)
				values[j] = NULL;

			/* Insert primary key */
			values[0] = pkey;

			/* Parse the document */
			if (xmldoc)
				doctree = xmlParseMemory(xmldoc, strlen(xmldoc));
			else	/* treat NULL as not well-formed */
				doctree = NULL;

			if (doctree == NULL)
			{
				/* not well-formed, so output all-NULL tuple */
				ret_tuple = BuildTupleFromCStrings(attinmeta, values);
				tuplestore_puttuple(tupstore, ret_tuple);
				heap_freetuple(ret_tuple);
			}
			else
			{
				/* New loop here - we have to deal with nodeset results */
				rownr = 0;

				do
				{
					/* Now evaluate the set of xpaths. */
					had_values = false;
					for (j = 0; j < numpaths; j++)
					{
						ctxt = xmlXPathNewContext(doctree);
						ctxt->node = xmlDocGetRootElement(doctree);

						/* compile the path */
						comppath = xmlXPathCompile(xpaths[j]);
						if (comppath == NULL)
							xml_ereport(xmlerrcxt, ERROR,
										ERRCODE_EXTERNAL_ROUTINE_EXCEPTION,
										"XPath Syntax Error");

						/* Now evaluate the path expression. */
						res = xmlXPathCompiledEval(comppath, ctxt);
						xmlXPathFreeCompExpr(comppath);

						if (res != NULL)
						{
							switch (res->type)
							{
								case XPATH_NODESET:
									/* We see if this nodeset has enough nodes */
									if (res->nodesetval != NULL &&
										rownr < res->nodesetval->nodeNr)
									{
										resstr = xmlXPathCastNodeToString(res->nodesetval->nodeTab[rownr]);
										had_values = true;
									}
									else
										resstr = NULL;

									break;

								case XPATH_STRING:
									resstr = xmlStrdup(res->stringval);
									break;

								default:
									elog(NOTICE, "unsupported XQuery result: %d", res->type);
									resstr = xmlStrdup((const xmlChar *) "<unsupported/>");
							}

							/*
							 * Insert this into the appropriate column in the
							 * result tuple.
							 */
							values[j + 1] = (char *) resstr;
						}
						xmlXPathFreeContext(ctxt);
					}

					/* Now add the tuple to the output, if there is one. */
					if (had_values)
					{
						ret_tuple = BuildTupleFromCStrings(attinmeta, values);
						tuplestore_puttuple(tupstore, ret_tuple);
						heap_freetuple(ret_tuple);
					}

					rownr++;
				} while (had_values);
			}

			if (doctree != NULL)
				xmlFreeDoc(doctree);
			doctree = NULL;

			if (pkey)
				pfree(pkey);
			if (xmldoc)
				pfree(xmldoc);
		}
	}
	PG_CATCH();
	{
		if (doctree != NULL)
			xmlFreeDoc(doctree);

		pg_xml_done(xmlerrcxt, true);

		PG_RE_THROW();
	}
	PG_END_TRY();

	if (doctree != NULL)
		xmlFreeDoc(doctree);

	pg_xml_done(xmlerrcxt, false);

	tuplestore_donestoring(tupstore);

	SPI_finish();

	rsinfo->setResult = tupstore;

	/*
	 * SFRM_Materialize mode expects us to return a NULL Datum. The actual
	 * tuples are in our tuplestore and passed back through rsinfo->setResult.
	 * rsinfo->setDesc is set to the tuple description that we actually used
	 * to build our tuples with, so the caller can verify we did what it was
	 * expecting.
	 */
	return (Datum) 0;
}
コード例 #28
0
ファイル: vacuum.c プロジェクト: bwright/postgres
/*
 * Primary entry point for VACUUM and ANALYZE commands.
 *
 * relid is normally InvalidOid; if it is not, then it provides the relation
 * OID to be processed, and vacstmt->relation is ignored.  (The non-invalid
 * case is currently only used by autovacuum.)
 *
 * do_toast is passed as FALSE by autovacuum, because it processes TOAST
 * tables separately.
 *
 * for_wraparound is used by autovacuum to let us know when it's forcing
 * a vacuum for wraparound, which should not be auto-canceled.
 *
 * bstrategy is normally given as NULL, but in autovacuum it can be passed
 * in to use the same buffer strategy object across multiple vacuum() calls.
 *
 * isTopLevel should be passed down from ProcessUtility.
 *
 * It is the caller's responsibility that vacstmt and bstrategy
 * (if given) be allocated in a memory context that won't disappear
 * at transaction commit.
 */
void
vacuum(VacuumStmt *vacstmt, Oid relid, bool do_toast,
	   BufferAccessStrategy bstrategy, bool for_wraparound, bool isTopLevel)
{
	const char *stmttype;
	volatile bool in_outer_xact,
				use_own_xacts;
	List	   *relations;

	/* sanity checks on options */
	Assert(vacstmt->options & (VACOPT_VACUUM | VACOPT_ANALYZE));
	Assert((vacstmt->options & VACOPT_VACUUM) ||
		   !(vacstmt->options & (VACOPT_FULL | VACOPT_FREEZE)));
	Assert((vacstmt->options & VACOPT_ANALYZE) || vacstmt->va_cols == NIL);

	stmttype = (vacstmt->options & VACOPT_VACUUM) ? "VACUUM" : "ANALYZE";

	/*
	 * We cannot run VACUUM inside a user transaction block; if we were inside
	 * a transaction, then our commit- and start-transaction-command calls
	 * would not have the intended effect!	There are numerous other subtle
	 * dependencies on this, too.
	 *
	 * ANALYZE (without VACUUM) can run either way.
	 */
	if (vacstmt->options & VACOPT_VACUUM)
	{
		PreventTransactionChain(isTopLevel, stmttype);
		in_outer_xact = false;
	}
	else
		in_outer_xact = IsInTransactionChain(isTopLevel);

	/*
	 * Send info about dead objects to the statistics collector, unless we are
	 * in autovacuum --- autovacuum.c does this for itself.
	 */
	if ((vacstmt->options & VACOPT_VACUUM) && !IsAutoVacuumWorkerProcess())
		pgstat_vacuum_stat();

	/*
	 * Create special memory context for cross-transaction storage.
	 *
	 * Since it is a child of PortalContext, it will go away eventually even
	 * if we suffer an error; there's no need for special abort cleanup logic.
	 */
	vac_context = AllocSetContextCreate(PortalContext,
										"Vacuum",
										ALLOCSET_DEFAULT_MINSIZE,
										ALLOCSET_DEFAULT_INITSIZE,
										ALLOCSET_DEFAULT_MAXSIZE);

	/*
	 * If caller didn't give us a buffer strategy object, make one in the
	 * cross-transaction memory context.
	 */
	if (bstrategy == NULL)
	{
		MemoryContext old_context = MemoryContextSwitchTo(vac_context);

		bstrategy = GetAccessStrategy(BAS_VACUUM);
		MemoryContextSwitchTo(old_context);
	}
	vac_strategy = bstrategy;

	/*
	 * Build list of relations to process, unless caller gave us one. (If we
	 * build one, we put it in vac_context for safekeeping.)
	 */
	relations = get_rel_oids(relid, vacstmt->relation);

	/*
	 * Decide whether we need to start/commit our own transactions.
	 *
	 * For VACUUM (with or without ANALYZE): always do so, so that we can
	 * release locks as soon as possible.  (We could possibly use the outer
	 * transaction for a one-table VACUUM, but handling TOAST tables would be
	 * problematic.)
	 *
	 * For ANALYZE (no VACUUM): if inside a transaction block, we cannot
	 * start/commit our own transactions.  Also, there's no need to do so if
	 * only processing one relation.  For multiple relations when not within a
	 * transaction block, and also in an autovacuum worker, use own
	 * transactions so we can release locks sooner.
	 */
	if (vacstmt->options & VACOPT_VACUUM)
		use_own_xacts = true;
	else
	{
		Assert(vacstmt->options & VACOPT_ANALYZE);
		if (IsAutoVacuumWorkerProcess())
			use_own_xacts = true;
		else if (in_outer_xact)
			use_own_xacts = false;
		else if (list_length(relations) > 1)
			use_own_xacts = true;
		else
			use_own_xacts = false;
	}

	/*
	 * vacuum_rel expects to be entered with no transaction active; it will
	 * start and commit its own transaction.  But we are called by an SQL
	 * command, and so we are executing inside a transaction already. We
	 * commit the transaction started in PostgresMain() here, and start
	 * another one before exiting to match the commit waiting for us back in
	 * PostgresMain().
	 */
	if (use_own_xacts)
	{
		/* ActiveSnapshot is not set by autovacuum */
		if (ActiveSnapshotSet())
			PopActiveSnapshot();

		/* matches the StartTransaction in PostgresMain() */
		CommitTransactionCommand();
	}

	/* Turn vacuum cost accounting on or off */
	PG_TRY();
	{
		ListCell   *cur;

		VacuumCostActive = (VacuumCostDelay > 0);
		VacuumCostBalance = 0;
		VacuumPageHit = 0;
		VacuumPageMiss = 0;
		VacuumPageDirty = 0;

		/*
		 * Loop to process each selected relation.
		 */
		foreach(cur, relations)
		{
			Oid			relid = lfirst_oid(cur);

			if (vacstmt->options & VACOPT_VACUUM)
			{
				if (!vacuum_rel(relid, vacstmt, do_toast, for_wraparound))
					continue;
			}

			if (vacstmt->options & VACOPT_ANALYZE)
			{
				/*
				 * If using separate xacts, start one for analyze. Otherwise,
				 * we can use the outer transaction.
				 */
				if (use_own_xacts)
				{
					StartTransactionCommand();
					/* functions in indexes may want a snapshot set */
					PushActiveSnapshot(GetTransactionSnapshot());
				}

				analyze_rel(relid, vacstmt, vac_strategy);

				if (use_own_xacts)
				{
					PopActiveSnapshot();
					CommitTransactionCommand();
				}
			}
		}
	}
コード例 #29
0
/*
 * write_minipage
 *
 * Write the in-memory minipage to the block directory relation.
 */
static void
write_minipage(AppendOnlyBlockDirectory *blockDirectory,
			   int columnGroupNo)
{
	HeapTuple tuple;
	MemoryContext oldcxt;
	Datum *values = blockDirectory->values;
	bool *nulls = blockDirectory->nulls;
	Relation blkdirRel = blockDirectory->blkdirRel;
	TupleDesc heapTupleDesc = RelationGetDescr(blkdirRel);
	MinipagePerColumnGroup *minipageInfo =
		&blockDirectory->minipages[columnGroupNo];
	
	Assert(minipageInfo->numMinipageEntries > 0);

	oldcxt = MemoryContextSwitchTo(blockDirectory->memoryContext);
	
	Assert(blkdirRel != NULL);
	
	values[Anum_pg_aoblkdir_segno - 1] =
		Int32GetDatum(blockDirectory->currentSegmentFileNum);
	nulls[Anum_pg_aoblkdir_segno - 1] = false;

	values[Anum_pg_aoblkdir_columngroupno - 1] =
		Int32GetDatum(columnGroupNo);
	nulls[Anum_pg_aoblkdir_columngroupno - 1] = false;

	values[Anum_pg_aoblkdir_firstrownum - 1] =
		Int64GetDatum(minipageInfo->minipage->entry[0].firstRowNum);
	nulls[Anum_pg_aoblkdir_firstrownum - 1] = false;

	SET_VARSIZE(minipageInfo->minipage,
				minipage_size(minipageInfo->numMinipageEntries));
	minipageInfo->minipage->nEntry = minipageInfo->numMinipageEntries;
	values[Anum_pg_aoblkdir_minipage - 1] =
		PointerGetDatum(minipageInfo->minipage);
	nulls[Anum_pg_aoblkdir_minipage - 1] = false;
	
	tuple = heaptuple_form_to(heapTupleDesc,
							  values,
							  nulls,
							  NULL,
							  NULL);
	
	/*
	 * Write out the minipage to the block directory relation.
	 * If this minipage is already in the relation, we update
	 * the row. Otherwise, a new row is inserted.
	 */
	if (ItemPointerIsValid(&minipageInfo->tupleTid))
	{
		if (Debug_appendonly_print_blockdirectory)
			ereport(LOG,
					(errmsg("Append-only block directory update a minipage: "
							"(segno, columnGroupNo, nEntries, firstRowNum) = "
							"(%d, %d, %u, " INT64_FORMAT ")",
							blockDirectory->currentSegmentFileNum,
							columnGroupNo, minipageInfo->numMinipageEntries,
							minipageInfo->minipage->entry[0].firstRowNum)));

		simple_heap_update(blkdirRel, &minipageInfo->tupleTid, tuple);
	}
	else
	{
		if (Debug_appendonly_print_blockdirectory)
			ereport(LOG,
					(errmsg("Append-only block directory insert a minipage: "
							"(segno, columnGroupNo, nEntries, firstRowNum) = "
							"(%d, %d, %u, " INT64_FORMAT ")",
							blockDirectory->currentSegmentFileNum,
							columnGroupNo, minipageInfo->numMinipageEntries,
							minipageInfo->minipage->entry[0].firstRowNum)));

		simple_heap_insert(blkdirRel, tuple);
	}
	
	CatalogUpdateIndexes(blkdirRel, tuple);
	
	heap_freetuple(tuple);
	
	MemoryContextSwitchTo(oldcxt);
}
コード例 #30
0
ファイル: unaccent.c プロジェクト: amulsul/postgres
/*
 * initSuffixTree  - create suffix tree from file. Function converts
 * UTF8-encoded file into current encoding.
 */
static SuffixChar *
initSuffixTree(char *filename)
{
	SuffixChar *volatile rootSuffixTree = NULL;
	MemoryContext ccxt = CurrentMemoryContext;
	tsearch_readline_state trst;
	volatile bool skip;

	filename = get_tsearch_config_filename(filename, "rules");
	if (!tsearch_readline_begin(&trst, filename))
		ereport(ERROR,
				(errcode(ERRCODE_CONFIG_FILE_ERROR),
				 errmsg("could not open unaccent file \"%s\": %m",
						filename)));

	do
	{
		/*
		 * pg_do_encoding_conversion() (called by tsearch_readline()) will
		 * emit exception if it finds untranslatable characters in current
		 * locale. We just skip such lines, continuing with the next.
		 */
		skip = true;

		PG_TRY();
		{
			char	   *line;

			while ((line = tsearch_readline(&trst)) != NULL)
			{
				/*
				 * The format of each line must be "src trg" where src and trg
				 * are sequences of one or more non-whitespace characters,
				 * separated by whitespace.  Whitespace at start or end of
				 * line is ignored.
				 */
				int			state;
				char	   *ptr;
				char	   *src = NULL;
				char	   *trg = NULL;
				int			ptrlen;
				int			srclen = 0;
				int			trglen = 0;

				state = 0;
				for (ptr = line; *ptr; ptr += ptrlen)
				{
					ptrlen = pg_mblen(ptr);
					/* ignore whitespace, but end src or trg */
					if (t_isspace(ptr))
					{
						if (state == 1)
							state = 2;
						else if (state == 3)
							state = 4;
						continue;
					}
					switch (state)
					{
						case 0:
							/* start of src */
							src = ptr;
							srclen = ptrlen;
							state = 1;
							break;
						case 1:
							/* continue src */
							srclen += ptrlen;
							break;
						case 2:
							/* start of trg */
							trg = ptr;
							trglen = ptrlen;
							state = 3;
							break;
						case 3:
							/* continue trg */
							trglen += ptrlen;
							break;
						default:
							/* bogus line format */
							state = -1;
							break;
					}
				}

				if (state >= 3)
					rootSuffixTree = placeChar(rootSuffixTree,
											   (unsigned char *) src, srclen,
											   trg, trglen);

				pfree(line);
			}
			skip = false;
		}
		PG_CATCH();
		{
			ErrorData  *errdata;
			MemoryContext ecxt;

			ecxt = MemoryContextSwitchTo(ccxt);
			errdata = CopyErrorData();
			if (errdata->sqlerrcode == ERRCODE_UNTRANSLATABLE_CHARACTER)
			{
				FlushErrorState();
			}
			else
			{
				MemoryContextSwitchTo(ecxt);
				PG_RE_THROW();
			}
		}
		PG_END_TRY();
	}
	while (skip);

	tsearch_readline_end(&trst);

	return rootSuffixTree;
}