Esempio n. 1
0
/* ----------------------------------------------------------------
 *		ExecHash
 *
 *		build hash table for hashjoin, doing partitioning if more
 *		than one batch is required.
 * ----------------------------------------------------------------
 */
TupleTableSlot *
ExecHash(HashState *node)
{
	EState	   *estate;
	PlanState  *outerNode;
	List	   *hashkeys;
	HashJoinTable hashtable;
	TupleTableSlot *slot;
	ExprContext *econtext;
	int			nbatch;
	int			i;

	/*
	 * get state info from node
	 */
	estate = node->ps.state;
	outerNode = outerPlanState(node);

	hashtable = node->hashtable;
	nbatch = hashtable->nbatch;

	if (nbatch > 0)
	{
		/*
		 * Open temp files for inner batches, if needed. Note that file
		 * buffers are palloc'd in regular executor context.
		 */
		for (i = 0; i < nbatch; i++)
			hashtable->innerBatchFile[i] = BufFileCreateTemp(false);
	}

	/*
	 * set expression context
	 */
	hashkeys = node->hashkeys;
	econtext = node->ps.ps_ExprContext;

	/*
	 * get all inner tuples and insert into the hash table (or temp files)
	 */
	for (;;)
	{
		slot = ExecProcNode(outerNode);
		if (TupIsNull(slot))
			break;
		hashtable->hashNonEmpty = true;
		econtext->ecxt_innertuple = slot;
		ExecHashTableInsert(hashtable, econtext, hashkeys);
		ExecClearTuple(slot);
	}

	/*
	 * Return the slot so that we have the tuple descriptor when we need
	 * to save/restore them.  -Jeff 11 July 1991
	 */
	return slot;
}
Esempio n. 2
0
/*
 * Create a set of logical tapes in a temporary underlying file.
 *
 * Each tape is initialized in write state.
 */
LogicalTapeSet *
LogicalTapeSetCreate(int ntapes)
{
	LogicalTapeSet *lts;
	LogicalTape *lt;
	int			i;

	/*
	 * Create top-level struct including per-tape LogicalTape structs. First
	 * LogicalTape struct is already counted in sizeof(LogicalTapeSet).
	 */
	Assert(ntapes > 0);
	lts = (LogicalTapeSet *) palloc(sizeof(LogicalTapeSet) +
									(ntapes - 1) *sizeof(LogicalTape));
	lts->pfile = BufFileCreateTemp(false);
	lts->nFileBlocks = 0L;
	lts->forgetFreeSpace = false;
	lts->blocksSorted = true;	/* a zero-length array is sorted ... */
	lts->freeBlocksLen = 32;	/* reasonable initial guess */
	lts->freeBlocks = (long *) palloc(lts->freeBlocksLen * sizeof(long));
	lts->nFreeBlocks = 0;
	lts->nTapes = ntapes;

	/*
	 * Initialize per-tape structs.  Note we allocate the I/O buffer and
	 * first-level indirect block for a tape only when it is first actually
	 * written to.	This avoids wasting memory space when tuplesort.c
	 * overestimates the number of tapes needed.
	 */
	for (i = 0; i < ntapes; i++)
	{
		lt = &lts->tapes[i];
		lt->indirect = NULL;
		lt->writing = true;
		lt->frozen = false;
		lt->dirty = false;
		lt->numFullBlocks = 0L;
		lt->lastBlockBytes = 0;
		lt->buffer = NULL;
		lt->curBlockNumber = 0L;
		lt->pos = 0;
		lt->nbytes = 0;
	}
	return lts;
}
Esempio n. 3
0
/*
 * Open many bfz files to simulate running out of file handles.
 * file_type values:
 *   0 - bfz, no compression
 *   1 - bfz, zlib compression
 *   2 - buffile
 */
static void
open_many_files(int file_type)
{
	char file_name[MAXPGPATH];
	int iter = 0;

	while (true)
	{
		CHECK_FOR_INTERRUPTS();
		snprintf(file_name, MAXPGPATH, "fake_file_%d", iter);
		switch(file_type)
		{
		case 0:
		case 1:
			;
#if USE_ASSERT_CHECKING
			bfz_t *bfz_file =
#endif
			bfz_create(file_name, true /* delOnClose */, file_type);
			Assert(NULL != bfz_file);
			break;

		case 2:
			;
#if USE_ASSERT_CHECKING
			BufFile *buf_file =
#endif
			BufFileCreateTemp(file_name, false /* interXact */ );
			Assert(NULL != buf_file);
			break;

		default:
			Assert(false && "argument for fault type not supported");
		}

		iter++;
	}

	return;
}
Esempio n. 4
0
static void
tuplestore_puttuple_common(Tuplestorestate *state, void *tuple)
{
	TSReadPointer *readptr;
	int			i;
	ResourceOwner oldowner;

	state->tuples++;

	switch (state->status)
	{
		case TSS_INMEM:

			/*
			 * Update read pointers as needed; see API spec above.
			 */
			readptr = state->readptrs;
			for (i = 0; i < state->readptrcount; readptr++, i++)
			{
				if (readptr->eof_reached && i != state->activeptr)
				{
					readptr->eof_reached = false;
					readptr->current = state->memtupcount;
				}
			}

			/*
			 * Grow the array as needed.  Note that we try to grow the array
			 * when there is still one free slot remaining --- if we fail,
			 * there'll still be room to store the incoming tuple, and then
			 * we'll switch to tape-based operation.
			 */
			if (state->memtupcount >= state->memtupsize - 1)
			{
				(void) grow_memtuples(state);
				Assert(state->memtupcount < state->memtupsize);
			}

			/* Stash the tuple in the in-memory array */
			state->memtuples[state->memtupcount++] = tuple;

			/*
			 * Done if we still fit in available memory and have array slots.
			 */
			if (state->memtupcount < state->memtupsize && !LACKMEM(state))
				return;

			/*
			 * Nope; time to switch to tape-based operation.  Make sure that
			 * the temp file(s) are created in suitable temp tablespaces.
			 */
			PrepareTempTablespaces();

			/* associate the file with the store's resource owner */
			oldowner = CurrentResourceOwner;
			CurrentResourceOwner = state->resowner;

			state->myfile = BufFileCreateTemp(state->interXact);

			CurrentResourceOwner = oldowner;

			/*
			 * Freeze the decision about whether trailing length words will be
			 * used.  We can't change this choice once data is on tape, even
			 * though callers might drop the requirement.
			 */
			state->backward = (state->eflags & EXEC_FLAG_BACKWARD) != 0;
			state->status = TSS_WRITEFILE;
			dumptuples(state);
			break;
		case TSS_WRITEFILE:

			/*
			 * Update read pointers as needed; see API spec above. Note:
			 * BufFileTell is quite cheap, so not worth trying to avoid
			 * multiple calls.
			 */
			readptr = state->readptrs;
			for (i = 0; i < state->readptrcount; readptr++, i++)
			{
				if (readptr->eof_reached && i != state->activeptr)
				{
					readptr->eof_reached = false;
					BufFileTell(state->myfile,
								&readptr->file,
								&readptr->offset);
				}
			}

			WRITETUP(state, tuple);
			break;
		case TSS_READFILE:

			/*
			 * Switch from reading to writing.
			 */
			if (!state->readptrs[state->activeptr].eof_reached)
				BufFileTell(state->myfile,
							&state->readptrs[state->activeptr].file,
							&state->readptrs[state->activeptr].offset);
			if (BufFileSeek(state->myfile,
							state->writepos_file, state->writepos_offset,
							SEEK_SET) != 0)
				ereport(ERROR,
						(errcode_for_file_access(),
				 errmsg("could not seek in tuplestore temporary file: %m")));
			state->status = TSS_WRITEFILE;

			/*
			 * Update read pointers as needed; see API spec above.
			 */
			readptr = state->readptrs;
			for (i = 0; i < state->readptrcount; readptr++, i++)
			{
				if (readptr->eof_reached && i != state->activeptr)
				{
					readptr->eof_reached = false;
					readptr->file = state->writepos_file;
					readptr->offset = state->writepos_offset;
				}
			}

			WRITETUP(state, tuple);
			break;
		default:
			elog(ERROR, "invalid tuplestore state");
			break;
	}
}
Esempio n. 5
0
/*
 * Initialize GiST build buffers.
 */
GISTBuildBuffers *
gistInitBuildBuffers(int pagesPerBuffer, int levelStep, int maxLevel)
{
	GISTBuildBuffers *gfbb;
	HASHCTL		hashCtl;

	gfbb = palloc(sizeof(GISTBuildBuffers));
	gfbb->pagesPerBuffer = pagesPerBuffer;
	gfbb->levelStep = levelStep;

	/*
	 * Create a temporary file to hold buffer pages that are swapped out of
	 * memory.
	 */
	gfbb->pfile = BufFileCreateTemp(false);
	gfbb->nFileBlocks = 0;

	/* Initialize free page management. */
	gfbb->nFreeBlocks = 0;
	gfbb->freeBlocksLen = 32;
	gfbb->freeBlocks = (long *) palloc(gfbb->freeBlocksLen * sizeof(long));

	/*
	 * Current memory context will be used for all in-memory data structures
	 * of buffers which are persistent during buffering build.
	 */
	gfbb->context = CurrentMemoryContext;

	/*
	 * nodeBuffersTab hash is association between index blocks and it's
	 * buffers.
	 */
	hashCtl.keysize = sizeof(BlockNumber);
	hashCtl.entrysize = sizeof(GISTNodeBuffer);
	hashCtl.hcxt = CurrentMemoryContext;
	hashCtl.hash = tag_hash;
	hashCtl.match = memcmp;
	gfbb->nodeBuffersTab = hash_create("gistbuildbuffers",
									   1024,
									   &hashCtl,
									   HASH_ELEM | HASH_CONTEXT
									   | HASH_FUNCTION | HASH_COMPARE);

	gfbb->bufferEmptyingQueue = NIL;

	/*
	 * Per-level node buffers lists for final buffers emptying process. Node
	 * buffers are inserted here when they are created.
	 */
	gfbb->buffersOnLevelsLen = 1;
	gfbb->buffersOnLevels = (List **) palloc(sizeof(List *) *
											 gfbb->buffersOnLevelsLen);
	gfbb->buffersOnLevels[0] = NIL;

	/*
	 * Block numbers of node buffers which last pages are currently loaded
	 * into main memory.
	 */
	gfbb->loadedBuffersLen = 32;
	gfbb->loadedBuffers = (GISTNodeBuffer **) palloc(gfbb->loadedBuffersLen *
												   sizeof(GISTNodeBuffer *));
	gfbb->loadedBuffersCount = 0;

	gfbb->rootlevel = maxLevel;

	return gfbb;
}
Esempio n. 6
0
/*
 * Create a set of logical tapes in a temporary underlying file.
 *
 * Each tape is initialized in write state.  Serial callers pass ntapes,
 * NULL argument for shared, and -1 for worker.  Parallel worker callers
 * pass ntapes, a shared file handle, NULL shared argument,  and their own
 * worker number.  Leader callers, which claim shared worker tapes here,
 * must supply non-sentinel values for all arguments except worker number,
 * which should be -1.
 *
 * Leader caller is passing back an array of metadata each worker captured
 * when LogicalTapeFreeze() was called for their final result tapes.  Passed
 * tapes array is actually sized ntapes - 1, because it includes only
 * worker tapes, whereas leader requires its own leader tape.  Note that we
 * rely on the assumption that reclaimed worker tapes will only be read
 * from once by leader, and never written to again (tapes are initialized
 * for writing, but that's only to be consistent).  Leader may not write to
 * its own tape purely due to a restriction in the shared buffile
 * infrastructure that may be lifted in the future.
 */
LogicalTapeSet *
LogicalTapeSetCreate(int ntapes, TapeShare *shared, SharedFileSet *fileset,
					 int worker)
{
	LogicalTapeSet *lts;
	LogicalTape *lt;
	int			i;

	/*
	 * Create top-level struct including per-tape LogicalTape structs.
	 */
	Assert(ntapes > 0);
	lts = (LogicalTapeSet *) palloc(offsetof(LogicalTapeSet, tapes) +
									ntapes * sizeof(LogicalTape));
	lts->nBlocksAllocated = 0L;
	lts->nBlocksWritten = 0L;
	lts->nHoleBlocks = 0L;
	lts->forgetFreeSpace = false;
	lts->blocksSorted = true;	/* a zero-length array is sorted ... */
	lts->freeBlocksLen = 32;	/* reasonable initial guess */
	lts->freeBlocks = (long *) palloc(lts->freeBlocksLen * sizeof(long));
	lts->nFreeBlocks = 0;
	lts->nTapes = ntapes;

	/*
	 * Initialize per-tape structs.  Note we allocate the I/O buffer and the
	 * first block for a tape only when it is first actually written to.  This
	 * avoids wasting memory space when tuplesort.c overestimates the number
	 * of tapes needed.
	 */
	for (i = 0; i < ntapes; i++)
	{
		lt = &lts->tapes[i];
		lt->writing = true;
		lt->frozen = false;
		lt->dirty = false;
		lt->firstBlockNumber = -1L;
		lt->curBlockNumber = -1L;
		lt->nextBlockNumber = -1L;
		lt->offsetBlockNumber = 0L;
		lt->buffer = NULL;
		lt->buffer_size = 0;
		/* palloc() larger than MaxAllocSize would fail */
		lt->max_size = MaxAllocSize;
		lt->pos = 0;
		lt->nbytes = 0;
	}

	/*
	 * Create temp BufFile storage as required.
	 *
	 * Leader concatenates worker tapes, which requires special adjustment to
	 * final tapeset data.  Things are simpler for the worker case and the
	 * serial case, though.  They are generally very similar -- workers use a
	 * shared fileset, whereas serial sorts use a conventional serial BufFile.
	 */
	if (shared)
		ltsConcatWorkerTapes(lts, shared, fileset);
	else if (fileset)
	{
		char		filename[MAXPGPATH];

		pg_itoa(worker, filename);
		lts->pfile = BufFileCreateShared(fileset, filename);
	}
	else
		lts->pfile = BufFileCreateTemp(false);

	return lts;
}
Esempio n. 7
0
static void
tuplestore_puttuple_common(Tuplestorestate *state, TuplestorePos *pos, void *tuple)
{
	ResourceOwner oldowner;

	switch (state->status)
	{
		case TSS_INMEM:

			/*
			 * Grow the array as needed.  Note that we try to grow the array
			 * when there is still one free slot remaining --- if we fail,
			 * there'll still be room to store the incoming tuple, and then
			 * we'll switch to tape-based operation.
			 */
			if (state->memtupcount >= state->memtupsize - 1)
			{
				/*
				 * See grow_memtuples() in tuplesort.c for the rationale
				 * behind these two tests.
				 */
				if (state->availMem > (long) (state->memtupsize * sizeof(void *)) &&
					(Size) (state->memtupsize * 2) < MaxAllocSize / sizeof(void *))
				{
					FREEMEM(state, GetMemoryChunkSpace(state->memtuples));
					state->memtupsize *= 2;
					state->memtuples = (void **)
						repalloc(state->memtuples,
								 state->memtupsize * sizeof(void *));
					USEMEM(state, GetMemoryChunkSpace(state->memtuples));
				}
			}

			/* Stash the tuple in the in-memory array */
			state->memtuples[state->memtupcount++] = tuple;

			/* If eof_reached, keep read position in sync */
			if (pos->eof_reached)
				pos->current = state->memtupcount;

			/*
			 * Done if we still fit in available memory and have array slots.
			 */
			if (state->memtupcount < state->memtupsize && !LACKMEM(state))
				return;

			/*
			 * Nope; time to switch to tape-based operation.  Make sure that
			 * the temp file(s) are created in suitable temp tablespaces.
			 */
			PrepareTempTablespaces();

			/* associate the file with the store's resource owner */
			oldowner = CurrentResourceOwner;
			CurrentResourceOwner = state->resowner;

			{
				char tmpprefix[50];
				snprintf(tmpprefix, 50, "slice%d_tuplestore", currentSliceId);
				state->myfile = BufFileCreateTemp(tmpprefix, state->interXact);
			}

			CurrentResourceOwner = oldowner;

			state->status = TSS_WRITEFILE;
			dumptuples(state, pos);
			break;
		case TSS_WRITEFILE:
			WRITETUP(state, pos, tuple);
			break;
		case TSS_READFILE:

			/*
			 * Switch from reading to writing.
			 */
			if (!pos->eof_reached)
				BufFileTell(state->myfile,
							&pos->readpos_offset);
			if (BufFileSeek(state->myfile,
							pos->writepos_offset,
							SEEK_SET) != 0)
				elog(ERROR, "seek to EOF failed");
			state->status = TSS_WRITEFILE;
			WRITETUP(state, pos, tuple);
			break;
		default:
			elog(ERROR, "invalid tuplestore state");
			break;
	}
}
Esempio n. 8
0
static void
tuplestore_puttuple_common(Tuplestorestate *state, void *tuple)
{
	TSReadPointer *readptr;
	int			i;
	ResourceOwner oldowner;

	switch (state->status)
	{
		case TSS_INMEM:

			/*
			 * Update read pointers as needed; see API spec above.
			 */
			readptr = state->readptrs;
			for (i = 0; i < state->readptrcount; readptr++, i++)
			{
				if (readptr->eof_reached && i != state->activeptr)
				{
					readptr->eof_reached = false;
					readptr->current = state->memtupcount;
				}
			}

			/*
			 * Grow the array as needed.  Note that we try to grow the array
			 * when there is still one free slot remaining --- if we fail,
			 * there'll still be room to store the incoming tuple, and then
			 * we'll switch to tape-based operation.
			 */
			if (state->memtupcount >= state->memtupsize - 1)
			{
				/*
				 * See grow_memtuples() in tuplesort.c for the rationale
				 * behind these two tests.
				 */
				if (state->availMem > (long) (state->memtupsize * sizeof(void *)) &&
					(Size) (state->memtupsize * 2) < MaxAllocSize / sizeof(void *))
				{
					FREEMEM(state, GetMemoryChunkSpace(state->memtuples));
					state->memtupsize *= 2;
					state->memtuples = (void **)
						repalloc(state->memtuples,
								 state->memtupsize * sizeof(void *));
					USEMEM(state, GetMemoryChunkSpace(state->memtuples));
					if (LACKMEM(state))
						elog(ERROR, "unexpected out-of-memory situation in tuplestore");
				}
			}

			/* Stash the tuple in the in-memory array */
			state->memtuples[state->memtupcount++] = tuple;

			/*
			 * Done if we still fit in available memory and have array slots.
			 */
			if (state->memtupcount < state->memtupsize && !LACKMEM(state))
				return;

			/*
			 * Nope; time to switch to tape-based operation.  Make sure that
			 * the temp file(s) are created in suitable temp tablespaces.
			 */
			PrepareTempTablespaces();

			/* associate the file with the store's resource owner */
			oldowner = CurrentResourceOwner;
			CurrentResourceOwner = state->resowner;

			char tmpprefix[50];
			snprintf(tmpprefix, 50, "slice%d_tuplestore", currentSliceId);
			state->myfile = BufFileCreateTemp(tmpprefix, state->interXact);

			CurrentResourceOwner = oldowner;

			/*
			 * Freeze the decision about whether trailing length words will be
			 * used.  We can't change this choice once data is on tape, even
			 * though callers might drop the requirement.
			 */
			state->backward = (state->eflags & EXEC_FLAG_BACKWARD) != 0;
			state->status = TSS_WRITEFILE;
			dumptuples(state);
			break;
		case TSS_WRITEFILE:

			/*
			 * Update read pointers as needed; see API spec above. Note:
			 * BufFileTell is quite cheap, so not worth trying to avoid
			 * multiple calls.
			 */
			readptr = state->readptrs;
			for (i = 0; i < state->readptrcount; readptr++, i++)
			{
				if (readptr->eof_reached && i != state->activeptr)
				{
					readptr->eof_reached = false;
					BufFileTell(state->myfile,
								&readptr->file,
								&readptr->offset);
				}
			}

			WRITETUP(state, tuple);
			break;
		case TSS_READFILE:

			/*
			 * Switch from reading to writing.
			 */
			if (!state->readptrs[state->activeptr].eof_reached)
				BufFileTell(state->myfile,
							&state->readptrs[state->activeptr].file,
							&state->readptrs[state->activeptr].offset);
			if (BufFileSeek(state->myfile,
							state->writepos_file, state->writepos_offset,
							SEEK_SET) != 0)
				elog(ERROR, "tuplestore seek to EOF failed");
			state->status = TSS_WRITEFILE;

			/*
			 * Update read pointers as needed; see API spec above.
			 */
			readptr = state->readptrs;
			for (i = 0; i < state->readptrcount; readptr++, i++)
			{
				if (readptr->eof_reached && i != state->activeptr)
				{
					readptr->eof_reached = false;
					readptr->file = state->writepos_file;
					readptr->offset = state->writepos_offset;
				}
			}

			WRITETUP(state, tuple);
			break;
		default:
			elog(ERROR, "invalid tuplestore state");
			break;
	}
}