Exemple #1
0
/*
 * tuplestore_restorepos - restores current position in tuple sequence to
 *						  last saved position
 */
void
tuplestore_restorepos_pos(Tuplestorestate *state, TuplestorePos *pos)
{
	Assert(state->eflags & EXEC_FLAG_MARK);

	switch (state->status)
	{
		case TSS_INMEM:
			pos->eof_reached = false;
			pos->current = pos->markpos_current;
			break;
		case TSS_WRITEFILE:
			pos->eof_reached = false;
			pos->readpos_offset = pos->markpos_offset;
			break;
		case TSS_READFILE:
			pos->eof_reached = false;
			if (BufFileSeek(state->myfile,
							pos->markpos_offset,
							SEEK_SET) != 0)
				elog(ERROR, "tuplestore_restorepos failed");
			break;
		default:
			elog(ERROR, "invalid tuplestore state");
			break;
	}
}
/*
 * tuplestore_rescan		- rewind the active read pointer to start
 */
void
tuplestore_rescan(Tuplestorestate *state)
{
	TSReadPointer *readptr = &state->readptrs[state->activeptr];

	Assert(readptr->eflags & EXEC_FLAG_REWIND);
	Assert(!state->truncated);

	switch (state->status)
	{
		case TSS_INMEM:
			readptr->eof_reached = false;
			readptr->current = 0;
			break;
		case TSS_WRITEFILE:
			readptr->eof_reached = false;
			readptr->file = 0;
			readptr->offset = 0L;
			break;
		case TSS_READFILE:
			readptr->eof_reached = false;
			if (BufFileSeek(state->myfile, 0, 0L, SEEK_SET) != 0)
				elog(ERROR, "tuplestore seek to start failed");
			break;
		default:
			elog(ERROR, "invalid tuplestore state");
			break;
	}
}
Exemple #3
0
/*
 * tuplestore_rescan		- rewind and replay the scan
 */
void
tuplestore_rescan_pos(Tuplestorestate *state, TuplestorePos *pos)
{
	Assert(state->eflags & EXEC_FLAG_REWIND);

	switch (state->status)
	{
		case TSS_INMEM:
			pos->eof_reached = false;
			pos->current = 0;
			break;
		case TSS_WRITEFILE:
			pos->eof_reached = false;
			pos->readpos_offset = 0L;
			break;
		case TSS_READFILE:
			pos->eof_reached = false;
			if (BufFileSeek(state->myfile, 0L /* offset */, SEEK_SET) != 0)
				elog(ERROR, "seek to start failed");
			break;
		default:
			elog(ERROR, "invalid tuplestore state");
			break;
	}
}
Exemple #4
0
/*
 * tuplestore_rescan		- rewind the active read pointer to start
 */
void
tuplestore_rescan(Tuplestorestate *state)
{
	TSReadPointer *readptr = &state->readptrs[state->activeptr];

	Assert(readptr->eflags & EXEC_FLAG_REWIND);
	Assert(!state->truncated);

	switch (state->status)
	{
		case TSS_INMEM:
			readptr->eof_reached = false;
			readptr->current = 0;
			break;
		case TSS_WRITEFILE:
			readptr->eof_reached = false;
			readptr->file = 0;
			readptr->offset = 0L;
			break;
		case TSS_READFILE:
			readptr->eof_reached = false;
			if (BufFileSeek(state->myfile, 0, 0L, SEEK_SET) != 0)
				ereport(ERROR,
						(errcode_for_file_access(),
				 errmsg("could not seek in tuplestore temporary file: %m")));
			break;
		default:
			elog(ERROR, "invalid tuplestore state");
			break;
	}
}
Exemple #5
0
/*
 * BufFileSeekBlock --- block-oriented seek
 *
 * Performs absolute seek to the start of the n'th BLCKSZ-sized block of
 * the file.  Note that users of this interface will fail if their files
 * exceed BLCKSZ * LONG_MAX bytes, but that is quite a lot; we don't work
 * with tables bigger than that, either...
 *
 * Result is 0 if OK, EOF if not.  Logical position is not moved if an
 * impossible seek is attempted.
 */
int
BufFileSeekBlock(BufFile *file, long blknum)
{
	return BufFileSeek(file,
					   (int) (blknum / BUFFILE_SEG_SIZE),
					   (off_t) (blknum % BUFFILE_SEG_SIZE) * BLCKSZ,
					   SEEK_SET);
}
Exemple #6
0
/*
 * BufFileSeekBlock --- block-oriented seek
 *
 * Performs absolute seek to the start of the n'th BLCKSZ-sized block of
 * the file.  Note that users of this interface will fail if their files
 * exceed BLCKSZ * LONG_MAX bytes, but that is quite a lot; we don't work
 * with tables bigger than that, either...
 *
 * Result is 0 if OK, EOF if not.  Logical position is not moved if an
 * impossible seek is attempted.
 */
int
BufFileSeekBlock(BufFile *file, long blknum)
{
	return BufFileSeek(file,
					   (int) (blknum / RELSEG_SIZE),
					   (blknum % RELSEG_SIZE) * BLCKSZ,
					   SEEK_SET);
}
/*
 * ExecWorkFile_Rewind
 *    rewind the pointer position to the beginning of the file.
 *
 * This function returns true if this succeeds. Otherwise, return false.
 */
bool
ExecWorkFile_Rewind(ExecWorkFile *workfile)
{
	Assert(workfile != NULL);

	long ret = 0;
	int64 file_size = 0;
	switch(workfile->fileType)
	{
		case BUFFILE:
			ret = BufFileSeek((BufFile *)workfile->file, 0L  /* offset */, SEEK_SET);
			/* BufFileSeek returns 0 if everything went OK */
			return (0 == ret);
		case BFZ:
			file_size = bfz_append_end((bfz_t *)workfile->file);
			ExecWorkFile_AdjustBFZSize(workfile, file_size);
			bfz_scan_begin((bfz_t *)workfile->file);
			break;
		default:
			insist_log(false, "invalid work file type: %d", workfile->fileType);
	}

	return true;
}
Exemple #8
0
/*
 * BufFileSeekBlock --- block-oriented seek
 *
 * Performs absolute seek to the start of the n'th BLCKSZ-sized block of
 * the file.  Note that users of this interface will fail if their files
 * exceed BLCKSZ * LONG_MAX bytes, but that is quite a lot; we don't work
 * with tables bigger than that, either...
 *
 * Result is 0 if OK, EOF if not.  Logical position is not moved if an
 * impossible seek is attempted.
 */
int
BufFileSeekBlock(BufFile *file, int64 blknum)
{
	return BufFileSeek(file, blknum * BLCKSZ, SEEK_SET);
}
Exemple #9
0
/*
 * Fetch the next tuple in either forward or back direction.
 * Returns NULL if no more tuples.  If should_free is set, the
 * caller must pfree the returned tuple when done with it.
 *
 * Backward scan is only allowed if randomAccess was set true or
 * EXEC_FLAG_BACKWARD was specified to tuplestore_set_eflags().
 */
static void *
tuplestore_gettuple(Tuplestorestate *state, bool forward,
					bool *should_free)
{
	TSReadPointer *readptr = &state->readptrs[state->activeptr];
	unsigned int tuplen;
	void	   *tup;

	Assert(forward || (readptr->eflags & EXEC_FLAG_BACKWARD));

	switch (state->status)
	{
		case TSS_INMEM:
			*should_free = false;
			if (forward)
			{
				if (readptr->eof_reached)
					return NULL;
				if (readptr->current < state->memtupcount)
				{
					/* We have another tuple, so return it */
					return state->memtuples[readptr->current++];
				}
				readptr->eof_reached = true;
				return NULL;
			}
			else
			{
				/*
				 * if all tuples are fetched already then we return last
				 * tuple, else tuple before last returned.
				 */
				if (readptr->eof_reached)
				{
					readptr->current = state->memtupcount;
					readptr->eof_reached = false;
				}
				else
				{
					if (readptr->current <= state->memtupdeleted)
					{
						Assert(!state->truncated);
						return NULL;
					}
					readptr->current--; /* last returned tuple */
				}
				if (readptr->current <= state->memtupdeleted)
				{
					Assert(!state->truncated);
					return NULL;
				}
				return state->memtuples[readptr->current - 1];
			}
			break;

		case TSS_WRITEFILE:
			/* Skip state change if we'll just return NULL */
			if (readptr->eof_reached && forward)
				return NULL;

			/*
			 * Switch from writing to reading.
			 */
			BufFileTell(state->myfile,
						&state->writepos_file, &state->writepos_offset);
			if (!readptr->eof_reached)
				if (BufFileSeek(state->myfile,
								readptr->file, readptr->offset,
								SEEK_SET) != 0)
					ereport(ERROR,
							(errcode_for_file_access(),
							 errmsg("could not seek in tuplestore temporary file: %m")));
			state->status = TSS_READFILE;
			/* FALL THRU into READFILE case */

		case TSS_READFILE:
			*should_free = true;
			if (forward)
			{
				if ((tuplen = getlen(state, true)) != 0)
				{
					tup = READTUP(state, tuplen);
					return tup;
				}
				else
				{
					readptr->eof_reached = true;
					return NULL;
				}
			}

			/*
			 * Backward.
			 *
			 * if all tuples are fetched already then we return last tuple,
			 * else tuple before last returned.
			 *
			 * Back up to fetch previously-returned tuple's ending length
			 * word. If seek fails, assume we are at start of file.
			 */
			if (BufFileSeek(state->myfile, 0, -(long) sizeof(unsigned int),
							SEEK_CUR) != 0)
			{
				/* even a failed backwards fetch gets you out of eof state */
				readptr->eof_reached = false;
				Assert(!state->truncated);
				return NULL;
			}
			tuplen = getlen(state, false);

			if (readptr->eof_reached)
			{
				readptr->eof_reached = false;
				/* We will return the tuple returned before returning NULL */
			}
			else
			{
				/*
				 * Back up to get ending length word of tuple before it.
				 */
				if (BufFileSeek(state->myfile, 0,
								-(long) (tuplen + 2 * sizeof(unsigned int)),
								SEEK_CUR) != 0)
				{
					/*
					 * If that fails, presumably the prev tuple is the first
					 * in the file.  Back up so that it becomes next to read
					 * in forward direction (not obviously right, but that is
					 * what in-memory case does).
					 */
					if (BufFileSeek(state->myfile, 0,
									-(long) (tuplen + sizeof(unsigned int)),
									SEEK_CUR) != 0)
						ereport(ERROR,
								(errcode_for_file_access(),
								 errmsg("could not seek in tuplestore temporary file: %m")));
					Assert(!state->truncated);
					return NULL;
				}
				tuplen = getlen(state, false);
			}

			/*
			 * Now we have the length of the prior tuple, back up and read it.
			 * Note: READTUP expects we are positioned after the initial
			 * length word of the tuple, so back up to that point.
			 */
			if (BufFileSeek(state->myfile, 0,
							-(long) tuplen,
							SEEK_CUR) != 0)
				ereport(ERROR,
						(errcode_for_file_access(),
				 errmsg("could not seek in tuplestore temporary file: %m")));
			tup = READTUP(state, tuplen);
			return tup;

		default:
			elog(ERROR, "invalid tuplestore state");
			return NULL;		/* keep compiler quiet */
	}
}
Exemple #10
0
static void
tuplestore_puttuple_common(Tuplestorestate *state, void *tuple)
{
	TSReadPointer *readptr;
	int			i;
	ResourceOwner oldowner;

	state->tuples++;

	switch (state->status)
	{
		case TSS_INMEM:

			/*
			 * Update read pointers as needed; see API spec above.
			 */
			readptr = state->readptrs;
			for (i = 0; i < state->readptrcount; readptr++, i++)
			{
				if (readptr->eof_reached && i != state->activeptr)
				{
					readptr->eof_reached = false;
					readptr->current = state->memtupcount;
				}
			}

			/*
			 * Grow the array as needed.  Note that we try to grow the array
			 * when there is still one free slot remaining --- if we fail,
			 * there'll still be room to store the incoming tuple, and then
			 * we'll switch to tape-based operation.
			 */
			if (state->memtupcount >= state->memtupsize - 1)
			{
				(void) grow_memtuples(state);
				Assert(state->memtupcount < state->memtupsize);
			}

			/* Stash the tuple in the in-memory array */
			state->memtuples[state->memtupcount++] = tuple;

			/*
			 * Done if we still fit in available memory and have array slots.
			 */
			if (state->memtupcount < state->memtupsize && !LACKMEM(state))
				return;

			/*
			 * Nope; time to switch to tape-based operation.  Make sure that
			 * the temp file(s) are created in suitable temp tablespaces.
			 */
			PrepareTempTablespaces();

			/* associate the file with the store's resource owner */
			oldowner = CurrentResourceOwner;
			CurrentResourceOwner = state->resowner;

			state->myfile = BufFileCreateTemp(state->interXact);

			CurrentResourceOwner = oldowner;

			/*
			 * Freeze the decision about whether trailing length words will be
			 * used.  We can't change this choice once data is on tape, even
			 * though callers might drop the requirement.
			 */
			state->backward = (state->eflags & EXEC_FLAG_BACKWARD) != 0;
			state->status = TSS_WRITEFILE;
			dumptuples(state);
			break;
		case TSS_WRITEFILE:

			/*
			 * Update read pointers as needed; see API spec above. Note:
			 * BufFileTell is quite cheap, so not worth trying to avoid
			 * multiple calls.
			 */
			readptr = state->readptrs;
			for (i = 0; i < state->readptrcount; readptr++, i++)
			{
				if (readptr->eof_reached && i != state->activeptr)
				{
					readptr->eof_reached = false;
					BufFileTell(state->myfile,
								&readptr->file,
								&readptr->offset);
				}
			}

			WRITETUP(state, tuple);
			break;
		case TSS_READFILE:

			/*
			 * Switch from reading to writing.
			 */
			if (!state->readptrs[state->activeptr].eof_reached)
				BufFileTell(state->myfile,
							&state->readptrs[state->activeptr].file,
							&state->readptrs[state->activeptr].offset);
			if (BufFileSeek(state->myfile,
							state->writepos_file, state->writepos_offset,
							SEEK_SET) != 0)
				ereport(ERROR,
						(errcode_for_file_access(),
				 errmsg("could not seek in tuplestore temporary file: %m")));
			state->status = TSS_WRITEFILE;

			/*
			 * Update read pointers as needed; see API spec above.
			 */
			readptr = state->readptrs;
			for (i = 0; i < state->readptrcount; readptr++, i++)
			{
				if (readptr->eof_reached && i != state->activeptr)
				{
					readptr->eof_reached = false;
					readptr->file = state->writepos_file;
					readptr->offset = state->writepos_offset;
				}
			}

			WRITETUP(state, tuple);
			break;
		default:
			elog(ERROR, "invalid tuplestore state");
			break;
	}
}
Exemple #11
0
/*
 * tuplestore_select_read_pointer - make the specified read pointer active
 */
void
tuplestore_select_read_pointer(Tuplestorestate *state, int ptr)
{
	TSReadPointer *readptr;
	TSReadPointer *oldptr;

	Assert(ptr >= 0 && ptr < state->readptrcount);

	/* No work if already active */
	if (ptr == state->activeptr)
		return;

	readptr = &state->readptrs[ptr];
	oldptr = &state->readptrs[state->activeptr];

	switch (state->status)
	{
		case TSS_INMEM:
		case TSS_WRITEFILE:
			/* no work */
			break;
		case TSS_READFILE:

			/*
			 * First, save the current read position in the pointer about to
			 * become inactive.
			 */
			if (!oldptr->eof_reached)
				BufFileTell(state->myfile,
							&oldptr->file,
							&oldptr->offset);

			/*
			 * We have to make the temp file's seek position equal to the
			 * logical position of the new read pointer.  In eof_reached
			 * state, that's the EOF, which we have available from the saved
			 * write position.
			 */
			if (readptr->eof_reached)
			{
				if (BufFileSeek(state->myfile,
								state->writepos_file,
								state->writepos_offset,
								SEEK_SET) != 0)
					ereport(ERROR,
							(errcode_for_file_access(),
							 errmsg("could not seek in tuplestore temporary file: %m")));
			}
			else
			{
				if (BufFileSeek(state->myfile,
								readptr->file,
								readptr->offset,
								SEEK_SET) != 0)
					ereport(ERROR,
							(errcode_for_file_access(),
							 errmsg("could not seek in tuplestore temporary file: %m")));
			}
			break;
		default:
			elog(ERROR, "invalid tuplestore state");
			break;
	}

	state->activeptr = ptr;
}
Exemple #12
0
/*
 * tuplestore_copy_read_pointer - copy a read pointer's state to another
 */
void
tuplestore_copy_read_pointer(Tuplestorestate *state,
							 int srcptr, int destptr)
{
	TSReadPointer *sptr = &state->readptrs[srcptr];
	TSReadPointer *dptr = &state->readptrs[destptr];

	Assert(srcptr >= 0 && srcptr < state->readptrcount);
	Assert(destptr >= 0 && destptr < state->readptrcount);

	/* Assigning to self is a no-op */
	if (srcptr == destptr)
		return;

	if (dptr->eflags != sptr->eflags)
	{
		/* Possible change of overall eflags, so copy and then recompute */
		int			eflags;
		int			i;

		*dptr = *sptr;
		eflags = state->readptrs[0].eflags;
		for (i = 1; i < state->readptrcount; i++)
			eflags |= state->readptrs[i].eflags;
		state->eflags = eflags;
	}
	else
		*dptr = *sptr;

	switch (state->status)
	{
		case TSS_INMEM:
		case TSS_WRITEFILE:
			/* no work */
			break;
		case TSS_READFILE:

			/*
			 * This case is a bit tricky since the active read pointer's
			 * position corresponds to the seek point, not what is in its
			 * variables.  Assigning to the active requires a seek, and
			 * assigning from the active requires a tell, except when
			 * eof_reached.
			 */
			if (destptr == state->activeptr)
			{
				if (dptr->eof_reached)
				{
					if (BufFileSeek(state->myfile,
									state->writepos_file,
									state->writepos_offset,
									SEEK_SET) != 0)
						ereport(ERROR,
								(errcode_for_file_access(),
								 errmsg("could not seek in tuplestore temporary file: %m")));
				}
				else
				{
					if (BufFileSeek(state->myfile,
									dptr->file, dptr->offset,
									SEEK_SET) != 0)
						ereport(ERROR,
								(errcode_for_file_access(),
								 errmsg("could not seek in tuplestore temporary file: %m")));
				}
			}
			else if (srcptr == state->activeptr)
			{
				if (!dptr->eof_reached)
					BufFileTell(state->myfile,
								&dptr->file,
								&dptr->offset);
			}
			break;
		default:
			elog(ERROR, "invalid tuplestore state");
			break;
	}
}
/*
 * Dump the shared local snapshot, so that the readers can pick it up.
 *
 * BufFileCreateTemp_ReaderWriter(filename, iswriter)
 */
void
dumpSharedLocalSnapshot_forCursor(void)
{
	SharedSnapshotSlot *src = NULL;
	char* fname = NULL;
	BufFile *f = NULL;
	Size count=0;
	TransactionId *xids = NULL;
	int64 sub_size;
	int64 size_read;
	ResourceOwner oldowner;
	MemoryContext oldcontext;

	Assert(Gp_role == GP_ROLE_DISPATCH || (Gp_role == GP_ROLE_EXECUTE && Gp_is_writer));
	Assert(SharedLocalSnapshotSlot != NULL);

	src = (SharedSnapshotSlot *)SharedLocalSnapshotSlot;
	fname = sharedLocalSnapshot_filename(src->QDxid, src->QDcid, src->segmateSync);

	/*
	 * Create our dump-file. Hold the reference to it in
	 * the transaction's resource owner, so that it lives as long
	 * as the cursor we're declaring.
	 */
	oldowner = CurrentResourceOwner;
	CurrentResourceOwner = TopTransactionResourceOwner;
	oldcontext = MemoryContextSwitchTo(TopTransactionContext);
	f = BufFileCreateTemp_ReaderWriter(fname, true, false);

	/*
	 * Remember our file, so that we can close it at end of transaction.
	 * The resource owner mechanism would do it for us as a backstop, but it
	 * produces warnings at commit if some files haven't been closed.
	 */
	shared_snapshot_files = lappend(shared_snapshot_files, f);
	MemoryContextSwitchTo(oldcontext);
	CurrentResourceOwner = oldowner;

	/* we have our file. */

#define FileWriteOK(file, ptr, size) (BufFileWrite(file, ptr, size) == size)

#define FileWriteFieldWithCount(count, file, field) \
    if (BufFileWrite((file), &(field), sizeof(field)) != sizeof(field)) break; \
    count += sizeof(field);

	do
	{
		/* Write our length as zero. (we'll fix it later). */
		count = 0;

		/*
		 * We write two counts here: One is count of first part,
		 * second is size of subtransaction xids copied from
		 * SharedLocalSnapshotSlot. This can be a big number.
		 */
		FileWriteFieldWithCount(count, f, count);
		FileWriteFieldWithCount(count, f, src->total_subcnt);

		FileWriteFieldWithCount(count, f, src->pid);
		FileWriteFieldWithCount(count, f, src->xid);
		FileWriteFieldWithCount(count, f, src->cid);
		FileWriteFieldWithCount(count, f, src->startTimestamp);

		FileWriteFieldWithCount(count, f, src->combocidcnt);
		FileWriteFieldWithCount(count, f, src->combocids);
		FileWriteFieldWithCount(count, f, src->snapshot.xmin);
		FileWriteFieldWithCount(count, f, src->snapshot.xmax);
		FileWriteFieldWithCount(count, f, src->snapshot.xcnt);

		if (!FileWriteOK(f, &src->snapshot.xip, src->snapshot.xcnt * sizeof(TransactionId)))
			break;
		count += src->snapshot.xcnt * sizeof(TransactionId);

		FileWriteFieldWithCount(count, f, src->snapshot.curcid);

		/*
		 * THE STUFF IN THE SHARED LOCAL VERSION OF
		 * snapshot.distribSnapshotWithLocalMapping
		 * APPEARS TO *NEVER* BE USED, SO THERE IS
		 * NO POINT IN TRYING TO DUMP IT (IN FACT,
		 * IT'S ALLOCATION STRATEGY ISN'T SHMEM-FRIENDLY).
		 */

		/*
		 * THIS STUFF IS USED IN THE FILENAME
		 * SO THE READER ALREADY HAS IT.
		 *

		 dst->QDcid = src->QDcid;
		 dst->segmateSync = src->segmateSync;
		 dst->QDxid = src->QDxid;
		 dst->ready = src->ready;

		 *
		 */

		if (src->total_subcnt > src->inmemory_subcnt)
		{
			Assert(subxip_file != 0);

			xids = palloc(MAX_XIDBUF_SIZE);

			FileSeek(subxip_file, 0, SEEK_SET);
			sub_size = (src->total_subcnt - src->inmemory_subcnt)
				    * sizeof(TransactionId);
			while (sub_size > 0)
			{
				size_read = (sub_size > MAX_XIDBUF_SIZE) ?
						MAX_XIDBUF_SIZE : sub_size;
				if (size_read != FileRead(subxip_file, (char *)xids,
							  size_read))
				{
					elog(ERROR,
					     "Error in reading subtransaction file.");
				}

				if (!FileWriteOK(f, xids, sub_size))
				{
					break;
				}

				sub_size -= size_read;
			}

			pfree(xids);
			if (sub_size != 0)
				break;
		}

		if (src->inmemory_subcnt > 0)
		{
			sub_size = src->inmemory_subcnt * sizeof(TransactionId);
			if (!FileWriteOK(f, src->subxids, sub_size))
			{
				break;
			}
		}

		/*
		 * Now update our length field: seek to beginning and overwrite
		 * our original zero-length. count does not include
		 * subtransaction ids.
		 */
		if (BufFileSeek(f, 0 /* offset */, SEEK_SET) != 0)
			break;

		if (!FileWriteOK(f, &count, sizeof(count)))
			break;

		/* now flush and close. */
		BufFileFlush(f);
		/*
		 * Temp files get deleted on close!
		 *
		 * BufFileClose(f);
		 */

		return;
	}
	while (0);

	elog(ERROR, "Failed to write shared snapshot to temp-file");
}
void
readSharedLocalSnapshot_forCursor(Snapshot snapshot)
{
	BufFile *f;
	char *fname=NULL;
	Size count=0, sanity;
	uint8 *p, *buffer=NULL;

	pid_t writerPid;
	TransactionId localXid;
	CommandId localCid;
	TimestampTz localXactStartTimestamp;

	uint32 combocidcnt;
	ComboCidKeyData tmp_combocids[MaxComboCids];
	uint32 sub_size;
	uint32 read_size;
	int64 subcnt;
	TransactionId *subxids = NULL;

	Assert(Gp_role == GP_ROLE_EXECUTE);
	Assert(!Gp_is_writer);
	Assert(SharedLocalSnapshotSlot != NULL);
	Assert(snapshot->xip != NULL);
	Assert(snapshot->subxip != NULL);

	/*
	 * Open our dump-file, this will either return a valid file, or
	 * throw an error.
	 *
	 * NOTE: this is always run *after* the dump by the writer is
	 * guaranteed to have completed.
	 */
	fname = sharedLocalSnapshot_filename(QEDtxContextInfo.distributedXid,
		QEDtxContextInfo.curcid, QEDtxContextInfo.segmateSync);

	f = BufFileCreateTemp_ReaderWriter(fname, false, false);
	/* we have our file. */

#define FileReadOK(file, ptr, size) (BufFileRead(file, ptr, size) == size)

	/* Read the file-length info */
	if (!FileReadOK(f, &count, sizeof(count)))
		elog(ERROR, "Cursor snapshot: failed to read size");

	elog(DEBUG1, "Reading in cursor-snapshot %u bytes",
		     (unsigned int)count);

	buffer = palloc(count);

	/*
	 * Seek back to the beginning:
	 * We're going to read this all in one go, the size
	 * of this buffer should be more than a few hundred bytes.
	 */
	if (BufFileSeek(f, 0 /* offset */, SEEK_SET) != 0)
		elog(ERROR, "Cursor snapshot: failed to seek.");

	if (!FileReadOK(f, buffer, count))
		elog(ERROR, "Cursor snapshot: failed to read content");

	/* we've got the entire snapshot read into our buffer. */
	p = buffer;

	/* sanity check count */
	memcpy(&sanity, p, sizeof(sanity));
	if (sanity != count)
		elog(ERROR, "cursor snapshot failed sanity %u != %u",
			    (unsigned int)sanity, (unsigned int)count);
	p += sizeof(sanity);

	memcpy(&sub_size, p, sizeof(uint32));
	p += sizeof(uint32);

	/* see dumpSharedLocalSnapshot_forCursor() for the correct order here */

	memcpy(&writerPid, p, sizeof(writerPid));
	p += sizeof(writerPid);

	memcpy(&localXid, p, sizeof(localXid));
	p += sizeof(localXid);

	memcpy(&localCid, p, sizeof(localCid));
	p += sizeof(localCid);

	memcpy(&localXactStartTimestamp, p, sizeof(localXactStartTimestamp));
	p += sizeof(localXactStartTimestamp);

	memcpy(&combocidcnt, p, sizeof(combocidcnt));
	p += sizeof(combocidcnt);

	memcpy(tmp_combocids, p, sizeof(tmp_combocids));
	p += sizeof(tmp_combocids);

	/* handle the combocid stuff (same as in GetSnapshotData()) */
	if (usedComboCids != combocidcnt)
	{
		if (usedComboCids == 0)
		{
			MemoryContext oldCtx =  MemoryContextSwitchTo(TopTransactionContext);
			comboCids = palloc(combocidcnt * sizeof(ComboCidKeyData));
			MemoryContextSwitchTo(oldCtx);
		}
		else
			repalloc(comboCids, combocidcnt * sizeof(ComboCidKeyData));
	}
	memcpy(comboCids, tmp_combocids, combocidcnt * sizeof(ComboCidKeyData));
	usedComboCids = ((combocidcnt < MaxComboCids) ? combocidcnt : MaxComboCids);

	memcpy(&snapshot->xmin, p, sizeof(snapshot->xmin));
	p += sizeof(snapshot->xmin);

	memcpy(&snapshot->xmax, p, sizeof(snapshot->xmax));
	p += sizeof(snapshot->xmax);

	memcpy(&snapshot->xcnt, p, sizeof(snapshot->xcnt));
	p += sizeof(snapshot->xcnt);

	memcpy(snapshot->xip, p, snapshot->xcnt * sizeof(TransactionId));
	p += snapshot->xcnt * sizeof(TransactionId);

	/* zero out the slack in the xip-array */
	memset(snapshot->xip + snapshot->xcnt, 0, (xipEntryCount - snapshot->xcnt)*sizeof(TransactionId));

	memcpy(&snapshot->curcid, p, sizeof(snapshot->curcid));

	/* Now we're done with the buffer */
	pfree(buffer);

	/*
	 * Now read the subtransaction ids. This can be a big number, so cannot
	 * allocate memory all at once.
	 */
	sub_size *= sizeof(TransactionId);

	ResetXidBuffer(&subxbuf);

	if (sub_size)
	{
		subxids = palloc(MAX_XIDBUF_SIZE);
	}

	while (sub_size > 0)
	{
		read_size = sub_size > MAX_XIDBUF_SIZE ? MAX_XIDBUF_SIZE : sub_size;
		if (!FileReadOK(f, (char *)subxids, read_size))
		{
			elog(ERROR, "Error in Reading Subtransaction file.");
		}
		subcnt = read_size/sizeof(TransactionId);
		AddSortedToXidBuffer(&subxbuf, subxids, subcnt);
		sub_size -= read_size;
	}

	if (subxids)
	{
		pfree(subxids);
	}

	/* we're done with file. */
	BufFileClose(f);

	SetSharedTransactionId_reader(localXid, snapshot->curcid);

	return;
}
Exemple #15
0
static void
tuplestore_puttuple_common(Tuplestorestate *state, void *tuple)
{
	TSReadPointer *readptr;
	int			i;
	ResourceOwner oldowner;

	switch (state->status)
	{
		case TSS_INMEM:

			/*
			 * Update read pointers as needed; see API spec above.
			 */
			readptr = state->readptrs;
			for (i = 0; i < state->readptrcount; readptr++, i++)
			{
				if (readptr->eof_reached && i != state->activeptr)
				{
					readptr->eof_reached = false;
					readptr->current = state->memtupcount;
				}
			}

			/*
			 * Grow the array as needed.  Note that we try to grow the array
			 * when there is still one free slot remaining --- if we fail,
			 * there'll still be room to store the incoming tuple, and then
			 * we'll switch to tape-based operation.
			 */
			if (state->memtupcount >= state->memtupsize - 1)
			{
				/*
				 * See grow_memtuples() in tuplesort.c for the rationale
				 * behind these two tests.
				 */
				if (state->availMem > (long) (state->memtupsize * sizeof(void *)) &&
					(Size) (state->memtupsize * 2) < MaxAllocSize / sizeof(void *))
				{
					FREEMEM(state, GetMemoryChunkSpace(state->memtuples));
					state->memtupsize *= 2;
					state->memtuples = (void **)
						repalloc(state->memtuples,
								 state->memtupsize * sizeof(void *));
					USEMEM(state, GetMemoryChunkSpace(state->memtuples));
					if (LACKMEM(state))
						elog(ERROR, "unexpected out-of-memory situation in tuplestore");
				}
			}

			/* Stash the tuple in the in-memory array */
			state->memtuples[state->memtupcount++] = tuple;

			/*
			 * Done if we still fit in available memory and have array slots.
			 */
			if (state->memtupcount < state->memtupsize && !LACKMEM(state))
				return;

			/*
			 * Nope; time to switch to tape-based operation.  Make sure that
			 * the temp file(s) are created in suitable temp tablespaces.
			 */
			PrepareTempTablespaces();

			/* associate the file with the store's resource owner */
			oldowner = CurrentResourceOwner;
			CurrentResourceOwner = state->resowner;

			char tmpprefix[50];
			snprintf(tmpprefix, 50, "slice%d_tuplestore", currentSliceId);
			state->myfile = BufFileCreateTemp(tmpprefix, state->interXact);

			CurrentResourceOwner = oldowner;

			/*
			 * Freeze the decision about whether trailing length words will be
			 * used.  We can't change this choice once data is on tape, even
			 * though callers might drop the requirement.
			 */
			state->backward = (state->eflags & EXEC_FLAG_BACKWARD) != 0;
			state->status = TSS_WRITEFILE;
			dumptuples(state);
			break;
		case TSS_WRITEFILE:

			/*
			 * Update read pointers as needed; see API spec above. Note:
			 * BufFileTell is quite cheap, so not worth trying to avoid
			 * multiple calls.
			 */
			readptr = state->readptrs;
			for (i = 0; i < state->readptrcount; readptr++, i++)
			{
				if (readptr->eof_reached && i != state->activeptr)
				{
					readptr->eof_reached = false;
					BufFileTell(state->myfile,
								&readptr->file,
								&readptr->offset);
				}
			}

			WRITETUP(state, tuple);
			break;
		case TSS_READFILE:

			/*
			 * Switch from reading to writing.
			 */
			if (!state->readptrs[state->activeptr].eof_reached)
				BufFileTell(state->myfile,
							&state->readptrs[state->activeptr].file,
							&state->readptrs[state->activeptr].offset);
			if (BufFileSeek(state->myfile,
							state->writepos_file, state->writepos_offset,
							SEEK_SET) != 0)
				elog(ERROR, "tuplestore seek to EOF failed");
			state->status = TSS_WRITEFILE;

			/*
			 * Update read pointers as needed; see API spec above.
			 */
			readptr = state->readptrs;
			for (i = 0; i < state->readptrcount; readptr++, i++)
			{
				if (readptr->eof_reached && i != state->activeptr)
				{
					readptr->eof_reached = false;
					readptr->file = state->writepos_file;
					readptr->offset = state->writepos_offset;
				}
			}

			WRITETUP(state, tuple);
			break;
		default:
			elog(ERROR, "invalid tuplestore state");
			break;
	}
}
Exemple #16
0
static void
tuplestore_puttuple_common(Tuplestorestate *state, TuplestorePos *pos, void *tuple)
{
	ResourceOwner oldowner;

	switch (state->status)
	{
		case TSS_INMEM:

			/*
			 * Grow the array as needed.  Note that we try to grow the array
			 * when there is still one free slot remaining --- if we fail,
			 * there'll still be room to store the incoming tuple, and then
			 * we'll switch to tape-based operation.
			 */
			if (state->memtupcount >= state->memtupsize - 1)
			{
				/*
				 * See grow_memtuples() in tuplesort.c for the rationale
				 * behind these two tests.
				 */
				if (state->availMem > (long) (state->memtupsize * sizeof(void *)) &&
					(Size) (state->memtupsize * 2) < MaxAllocSize / sizeof(void *))
				{
					FREEMEM(state, GetMemoryChunkSpace(state->memtuples));
					state->memtupsize *= 2;
					state->memtuples = (void **)
						repalloc(state->memtuples,
								 state->memtupsize * sizeof(void *));
					USEMEM(state, GetMemoryChunkSpace(state->memtuples));
				}
			}

			/* Stash the tuple in the in-memory array */
			state->memtuples[state->memtupcount++] = tuple;

			/* If eof_reached, keep read position in sync */
			if (pos->eof_reached)
				pos->current = state->memtupcount;

			/*
			 * Done if we still fit in available memory and have array slots.
			 */
			if (state->memtupcount < state->memtupsize && !LACKMEM(state))
				return;

			/*
			 * Nope; time to switch to tape-based operation.  Make sure that
			 * the temp file(s) are created in suitable temp tablespaces.
			 */
			PrepareTempTablespaces();

			/* associate the file with the store's resource owner */
			oldowner = CurrentResourceOwner;
			CurrentResourceOwner = state->resowner;

			{
				char tmpprefix[50];
				snprintf(tmpprefix, 50, "slice%d_tuplestore", currentSliceId);
				state->myfile = BufFileCreateTemp(tmpprefix, state->interXact);
			}

			CurrentResourceOwner = oldowner;

			state->status = TSS_WRITEFILE;
			dumptuples(state, pos);
			break;
		case TSS_WRITEFILE:
			WRITETUP(state, pos, tuple);
			break;
		case TSS_READFILE:

			/*
			 * Switch from reading to writing.
			 */
			if (!pos->eof_reached)
				BufFileTell(state->myfile,
							&pos->readpos_offset);
			if (BufFileSeek(state->myfile,
							pos->writepos_offset,
							SEEK_SET) != 0)
				elog(ERROR, "seek to EOF failed");
			state->status = TSS_WRITEFILE;
			WRITETUP(state, pos, tuple);
			break;
		default:
			elog(ERROR, "invalid tuplestore state");
			break;
	}
}
Exemple #17
0
/*
 * Fetch the next tuple in either forward or back direction.
 * Returns NULL if no more tuples.	If should_free is set, the
 * caller must pfree the returned tuple when done with it.
 *
 * Backward scan is only allowed if randomAccess was set true or
 * EXEC_FLAG_BACKWARD was specified to tuplestore_set_eflags().
 */
static void *
tuplestore_gettuple(Tuplestorestate *state, TuplestorePos *pos, bool forward,
					bool *should_free)
{
	uint32 tuplen;
	void	   *tup;

	Assert(forward || (state->eflags & EXEC_FLAG_BACKWARD));

	switch (state->status)
	{
		case TSS_INMEM:
			*should_free = false;
			if (forward)
			{
				if (pos->current < state->memtupcount)
					return state->memtuples[pos->current++];
				pos->eof_reached = true;
				return NULL;
			}
			else
			{
				if (pos->current <= 0)
					return NULL;

				/*
				 * if all tuples are fetched already then we return last
				 * tuple, else - tuple before last returned.
				 */
				if (pos->eof_reached)
					pos->eof_reached = false;
				else
				{
					pos->current--;	/* last returned tuple */
					if (pos->current <= 0)
						return NULL;
				}
				return state->memtuples[pos->current - 1];
			}
			break;

		case TSS_WRITEFILE:
			/* Skip state change if we'll just return NULL */
			if (pos->eof_reached && forward)
				return NULL;

			/*
			 * Switch from writing to reading.
			 */
			BufFileTell(state->myfile,
						&pos->writepos_offset);
			if (!pos->eof_reached)
				if (BufFileSeek(state->myfile,
								pos->readpos_offset,
								SEEK_SET) != 0)
					elog(ERROR, "seek failed");
			state->status = TSS_READFILE;
			/* FALL THRU into READFILE case */

		case TSS_READFILE:
			*should_free = true;
			if (forward)
			{
				if ((tuplen = getlen(state, pos, true)) != 0)
				{
					tup = READTUP(state, pos, tuplen);

					/* CDB XXX XXX XXX XXX */
					/* MPP-1347: EXPLAIN ANALYZE shows runaway memory usage.
					 * Readtup does a usemem, but the free happens in
					 * ExecStoreTuple.  Do a free so state->availMem
					 * doesn't go massively negative to screw up
					 * stats.  It would be better to interrogate the
					 * heap for actual memory usage than use this
					 * homemade accounting.
					 */
					FREEMEM(state, GetMemoryChunkSpace(tup)); 
					/* CDB XXX XXX XXX XXX */
					return tup;
				}
				else
				{
					pos->eof_reached = true;
					return NULL;
				}
			}

			/*
			 * Backward.
			 *
			 * if all tuples are fetched already then we return last tuple,
			 * else - tuple before last returned.
			 *
			 * Back up to fetch previously-returned tuple's ending length
			 * word. If seek fails, assume we are at start of file.
			 */

			insist_log(false, "Backward scanning of tuplestores are not supported at this time");

			if (BufFileSeek(state->myfile, -(long) sizeof(uint32) /* offset */,
							SEEK_CUR) != 0)
				return NULL;
			tuplen = getlen(state, pos, false);

			if (pos->eof_reached)
			{
				pos->eof_reached = false;
				/* We will return the tuple returned before returning NULL */
			}
			else
			{
				/*
				 * Back up to get ending length word of tuple before it.
				 */
				if (BufFileSeek(state->myfile,
								-(long) (tuplen + 2 * sizeof(uint32)) /* offset */,
								SEEK_CUR) != 0)
				{
					/*
					 * If that fails, presumably the prev tuple is the first
					 * in the file.  Back up so that it becomes next to read
					 * in forward direction (not obviously right, but that is
					 * what in-memory case does).
					 */
					if (BufFileSeek(state->myfile,
									-(long) (tuplen + sizeof(uint32)) /* offset */,
									SEEK_CUR) != 0)
						elog(ERROR, "bogus tuple length in backward scan");
					return NULL;
				}
				tuplen = getlen(state, pos, false);
			}

			/*
			 * Now we have the length of the prior tuple, back up and read it.
			 * Note: READTUP expects we are positioned after the initial
			 * length word of the tuple, so back up to that point.
			 */
			if (BufFileSeek(state->myfile,
							-(long) tuplen /* offset */,
							SEEK_CUR) != 0)
				elog(ERROR, "bogus tuple length in backward scan");
			tup = READTUP(state, pos, tuplen);
			return tup;

		default:
			elog(ERROR, "invalid tuplestore state");
			return NULL;		/* keep compiler quiet */
	}
}
/*
 * ExecWorkFile_Seek
 *   Result is 0 if OK, EOF if not.  Logical position is not moved if an
 *   impossible seek is attempted.
 */
int
ExecWorkFile_Seek(ExecWorkFile *workfile, uint64 offset, int whence)
{
	Assert(workfile != NULL);
	Assert((workfile->flags & EXEC_WORKFILE_RANDOM_ACCESS) != 0);
	int result = 0;

	/* Determine if this seeks beyond EOF */
	int64 additional_size = 0;
	switch (whence)
	{
		case SEEK_SET:
			if (offset > workfile->size)
			{
				additional_size = offset - workfile->size;
			}
			break;

		case SEEK_CUR:
			if (ExecWorkFile_Tell64(workfile) + offset > workfile->size)
			{
				additional_size = ExecWorkFile_Tell64(workfile) + offset - workfile->size;
			}
			break;

		default:
			elog(LOG, "invalid whence: %d", whence);
			Assert(false);
			return EOF;
	}

	/* Reserve disk space if needed */
	if (additional_size > 0)
	{
		/*
		 * We only allow seeking beyond EOF for files opened for writing
		 *  (i.e. files we created)
		 */
		if (workfile->flags & EXEC_WORKFILE_CREATED)
		{
			bool success = WorkfileDiskspace_Reserve(additional_size);
			if (!success)
			{
				/* Failed to reserve additional disk space, notify caller */
				return EOF;
			}
		}
		else
		{
			return EOF;
		}
	}

	/* Do the actual seek */
	switch(workfile->fileType)
	{
	case BUFFILE:
		result = BufFileSeek((BufFile *)workfile->file, offset, whence);
		if (additional_size > 0)
		{
			workfile->size = BufFileGetSize((BufFile *)workfile->file);
		}
		break;
	default:
		insist_log(false, "invalid work file type: %d", workfile->fileType);
	}

	if (additional_size > 0)
	{
		WorkfileDiskspace_Commit(additional_size, additional_size, true /* update_query_size */);
		workfile_update_in_progress_size(workfile, additional_size);
	}

	return result;
}
Exemple #19
0
void
loadSharedComboCommandId(TransactionId xmin, CommandId combocid, CommandId *cmin, CommandId *cmax)
{
	bool		found = false;
	ComboCidEntryData entry;
	int			i;

	Assert(Gp_role == GP_ROLE_EXECUTE);
	Assert(!Gp_is_writer);
	Assert(cmin != NULL);
	Assert(cmax != NULL);

	if (lockHolderProcPtr == NULL)
	{
		/* get lockholder! */
		elog(ERROR, "loadSharedComboCommandId: NO LOCK HOLDER POINTER.");
	}

	if (combocid_map == NULL)
	{
		MemoryContext oldCtx;
		char			path[MAXPGPATH];

		ComboCidMapName(path, gp_session_id, lockHolderProcPtr->pid);
		/* open our file, as appropriate: this will throw an error if the create-fails. */
		oldCtx = MemoryContextSwitchTo(TopMemoryContext);
		combocid_map = BufFileCreateTemp_ReaderWriter(path, false, true);
		MemoryContextSwitchTo(oldCtx);
	}
	Assert(combocid_map != NULL);

	/* Seek to the beginning to start our search ? */
	if (BufFileSeek(combocid_map, 0 /* fileno */, 0 /* offset */, SEEK_SET) != 0)
	{
		elog(ERROR, "loadSharedComboCommandId: seek to beginning failed.");
	}

	/*
	 * Read this entry in ...
	 *
	 * We're going to read in the entire table, caching all occurrences of
	 * our xmin.
	 */
	for (i = 0; i < lockHolderProcPtr->combocid_map_count; i++)
	{
		if (BufFileRead(combocid_map, &entry, sizeof(ComboCidEntryData)) != sizeof(ComboCidEntryData))
		{
			elog(ERROR, "loadSharedComboCommandId: read failed I/O error.");
		}

		if (entry.key.xmin == xmin)
		{
			bool		cached = false;
			readerComboCidKeyData reader_key;
			readerComboCidEntryData *reader_entry;

			memset(&reader_key, 0, sizeof(reader_key));
			reader_key.writer_pid = lockHolderProcPtr->pid;
			reader_key.xmin = entry.key.xmin;
			reader_key.session = gp_session_id;
			reader_key.combocid = entry.combocid;

			reader_entry = (readerComboCidEntryData *)
				hash_search(readerComboHash, &reader_key, HASH_ENTER, &cached);

			if (!cached)
			{
				reader_entry->cmin = entry.key.cmin;
				reader_entry->cmax = entry.key.cmax;
			}

			/*
			 * This was our entry -- we're going to continue our scan,
			 * to pull in any additional entries for our xmin
			 */
			if (entry.combocid == combocid)
			{
				*cmin = entry.key.cmin;
				*cmax = entry.key.cmax;
				found = true;
			}
		}
	}

	if (!found)
	{
		elog(ERROR, "loadSharedComboCommandId: no combocid entry found for %u/%u", xmin, combocid);
	}
}