/* * tuplestore_markpos - saves current position in the tuple sequence */ void tuplestore_markpos_pos(Tuplestorestate *state, TuplestorePos *pos) { switch (state->status) { case TSS_INMEM: pos->markpos_current = pos->current; break; case TSS_WRITEFILE: if (pos->eof_reached) { /* Need to record the implicit read position */ BufFileTell(state->myfile, &pos->markpos_offset); } else { pos->markpos_offset = pos->readpos_offset; } break; case TSS_READFILE: BufFileTell(state->myfile, &pos->markpos_offset); break; default: elog(ERROR, "invalid tuplestore state"); break; } }
/* * tuplestore_markpos - saves current position in the tuple sequence */ void tuplestore_markpos_pos(Tuplestorestate *state, TuplestorePos *pos) { Assert(state->eflags & EXEC_FLAG_MARK); switch (state->status) { case TSS_INMEM: pos->markpos_current = pos->current; /* * We can truncate the tuplestore if neither backward scan nor * rewind capability are required by the caller. There will never * be a need to back up past the mark point. * * Note: you might think we could remove all the tuples before * "current", since that one is the next to be returned. However, * since tuplestore_gettuple returns a direct pointer to our * internal copy of the tuple, it's likely that the caller has * still got the tuple just before "current" referenced in a slot. * Don't free it yet. */ if (!(state->eflags & (EXEC_FLAG_BACKWARD | EXEC_FLAG_REWIND))) tuplestore_trim(state, 1); break; case TSS_WRITEFILE: if (pos->eof_reached) { /* Need to record the implicit read position */ BufFileTell(state->myfile, &pos->markpos_offset); } else { pos->markpos_offset = pos->readpos_offset; } break; case TSS_READFILE: BufFileTell(state->myfile, &pos->markpos_offset); break; default: elog(ERROR, "invalid tuplestore state"); break; } }
/* * dumptuples - remove tuples from memory and write to tape * * As a side effect, we must set readpos and markpos to the value * corresponding to "current"; otherwise, a dump would lose the current read * position. */ static void dumptuples(Tuplestorestate *state, TuplestorePos *pos) { int i; for (i = 0;; i++) { if (i == pos->current) BufFileTell(state->myfile, &pos->readpos_offset); if (i == pos->markpos_current) BufFileTell(state->myfile, &pos->markpos_offset); if (i >= state->memtupcount) break; WRITETUP(state, pos, state->memtuples[i]); } state->memtupcount = 0; }
/* * ExecWorkFile_Tell64 * return the value of the current file position indicator. */ uint64 ExecWorkFile_Tell64(ExecWorkFile *workfile) { Assert(workfile != NULL); uint64 bytes = 0; switch(workfile->fileType) { case BUFFILE: BufFileTell((BufFile *)workfile->file, (int64 *) &bytes); break; case BFZ: bytes = bfz_totalbytes((bfz_t *)workfile->file); break; default: insist_log(false, "invalid work file type: %d", workfile->fileType); } return bytes; }
/* * dumptuples - remove tuples from memory and write to tape * * As a side effect, we must convert each read pointer's position from * "current" to file/offset format. But eof_reached pointers don't * need to change state. */ static void dumptuples(Tuplestorestate *state) { int i; for (i = 0;; i++) { TSReadPointer *readptr = state->readptrs; int j; for (j = 0; j < state->readptrcount; readptr++, j++) { if (i == readptr->current && !readptr->eof_reached) BufFileTell(state->myfile, &readptr->file, &readptr->offset); } if (i >= state->memtupcount) break; WRITETUP(state, state->memtuples[i]); } state->memtupcount = 0; }
/* * Fetch the next tuple in either forward or back direction. * Returns NULL if no more tuples. If should_free is set, the * caller must pfree the returned tuple when done with it. * * Backward scan is only allowed if randomAccess was set true or * EXEC_FLAG_BACKWARD was specified to tuplestore_set_eflags(). */ static void * tuplestore_gettuple(Tuplestorestate *state, bool forward, bool *should_free) { TSReadPointer *readptr = &state->readptrs[state->activeptr]; unsigned int tuplen; void *tup; Assert(forward || (readptr->eflags & EXEC_FLAG_BACKWARD)); switch (state->status) { case TSS_INMEM: *should_free = false; if (forward) { if (readptr->eof_reached) return NULL; if (readptr->current < state->memtupcount) { /* We have another tuple, so return it */ return state->memtuples[readptr->current++]; } readptr->eof_reached = true; return NULL; } else { /* * if all tuples are fetched already then we return last * tuple, else tuple before last returned. */ if (readptr->eof_reached) { readptr->current = state->memtupcount; readptr->eof_reached = false; } else { if (readptr->current <= state->memtupdeleted) { Assert(!state->truncated); return NULL; } readptr->current--; /* last returned tuple */ } if (readptr->current <= state->memtupdeleted) { Assert(!state->truncated); return NULL; } return state->memtuples[readptr->current - 1]; } break; case TSS_WRITEFILE: /* Skip state change if we'll just return NULL */ if (readptr->eof_reached && forward) return NULL; /* * Switch from writing to reading. */ BufFileTell(state->myfile, &state->writepos_file, &state->writepos_offset); if (!readptr->eof_reached) if (BufFileSeek(state->myfile, readptr->file, readptr->offset, SEEK_SET) != 0) ereport(ERROR, (errcode_for_file_access(), errmsg("could not seek in tuplestore temporary file: %m"))); state->status = TSS_READFILE; /* FALL THRU into READFILE case */ case TSS_READFILE: *should_free = true; if (forward) { if ((tuplen = getlen(state, true)) != 0) { tup = READTUP(state, tuplen); return tup; } else { readptr->eof_reached = true; return NULL; } } /* * Backward. * * if all tuples are fetched already then we return last tuple, * else tuple before last returned. * * Back up to fetch previously-returned tuple's ending length * word. If seek fails, assume we are at start of file. */ if (BufFileSeek(state->myfile, 0, -(long) sizeof(unsigned int), SEEK_CUR) != 0) { /* even a failed backwards fetch gets you out of eof state */ readptr->eof_reached = false; Assert(!state->truncated); return NULL; } tuplen = getlen(state, false); if (readptr->eof_reached) { readptr->eof_reached = false; /* We will return the tuple returned before returning NULL */ } else { /* * Back up to get ending length word of tuple before it. */ if (BufFileSeek(state->myfile, 0, -(long) (tuplen + 2 * sizeof(unsigned int)), SEEK_CUR) != 0) { /* * If that fails, presumably the prev tuple is the first * in the file. Back up so that it becomes next to read * in forward direction (not obviously right, but that is * what in-memory case does). */ if (BufFileSeek(state->myfile, 0, -(long) (tuplen + sizeof(unsigned int)), SEEK_CUR) != 0) ereport(ERROR, (errcode_for_file_access(), errmsg("could not seek in tuplestore temporary file: %m"))); Assert(!state->truncated); return NULL; } tuplen = getlen(state, false); } /* * Now we have the length of the prior tuple, back up and read it. * Note: READTUP expects we are positioned after the initial * length word of the tuple, so back up to that point. */ if (BufFileSeek(state->myfile, 0, -(long) tuplen, SEEK_CUR) != 0) ereport(ERROR, (errcode_for_file_access(), errmsg("could not seek in tuplestore temporary file: %m"))); tup = READTUP(state, tuplen); return tup; default: elog(ERROR, "invalid tuplestore state"); return NULL; /* keep compiler quiet */ } }
static void tuplestore_puttuple_common(Tuplestorestate *state, void *tuple) { TSReadPointer *readptr; int i; ResourceOwner oldowner; state->tuples++; switch (state->status) { case TSS_INMEM: /* * Update read pointers as needed; see API spec above. */ readptr = state->readptrs; for (i = 0; i < state->readptrcount; readptr++, i++) { if (readptr->eof_reached && i != state->activeptr) { readptr->eof_reached = false; readptr->current = state->memtupcount; } } /* * Grow the array as needed. Note that we try to grow the array * when there is still one free slot remaining --- if we fail, * there'll still be room to store the incoming tuple, and then * we'll switch to tape-based operation. */ if (state->memtupcount >= state->memtupsize - 1) { (void) grow_memtuples(state); Assert(state->memtupcount < state->memtupsize); } /* Stash the tuple in the in-memory array */ state->memtuples[state->memtupcount++] = tuple; /* * Done if we still fit in available memory and have array slots. */ if (state->memtupcount < state->memtupsize && !LACKMEM(state)) return; /* * Nope; time to switch to tape-based operation. Make sure that * the temp file(s) are created in suitable temp tablespaces. */ PrepareTempTablespaces(); /* associate the file with the store's resource owner */ oldowner = CurrentResourceOwner; CurrentResourceOwner = state->resowner; state->myfile = BufFileCreateTemp(state->interXact); CurrentResourceOwner = oldowner; /* * Freeze the decision about whether trailing length words will be * used. We can't change this choice once data is on tape, even * though callers might drop the requirement. */ state->backward = (state->eflags & EXEC_FLAG_BACKWARD) != 0; state->status = TSS_WRITEFILE; dumptuples(state); break; case TSS_WRITEFILE: /* * Update read pointers as needed; see API spec above. Note: * BufFileTell is quite cheap, so not worth trying to avoid * multiple calls. */ readptr = state->readptrs; for (i = 0; i < state->readptrcount; readptr++, i++) { if (readptr->eof_reached && i != state->activeptr) { readptr->eof_reached = false; BufFileTell(state->myfile, &readptr->file, &readptr->offset); } } WRITETUP(state, tuple); break; case TSS_READFILE: /* * Switch from reading to writing. */ if (!state->readptrs[state->activeptr].eof_reached) BufFileTell(state->myfile, &state->readptrs[state->activeptr].file, &state->readptrs[state->activeptr].offset); if (BufFileSeek(state->myfile, state->writepos_file, state->writepos_offset, SEEK_SET) != 0) ereport(ERROR, (errcode_for_file_access(), errmsg("could not seek in tuplestore temporary file: %m"))); state->status = TSS_WRITEFILE; /* * Update read pointers as needed; see API spec above. */ readptr = state->readptrs; for (i = 0; i < state->readptrcount; readptr++, i++) { if (readptr->eof_reached && i != state->activeptr) { readptr->eof_reached = false; readptr->file = state->writepos_file; readptr->offset = state->writepos_offset; } } WRITETUP(state, tuple); break; default: elog(ERROR, "invalid tuplestore state"); break; } }
/* * tuplestore_select_read_pointer - make the specified read pointer active */ void tuplestore_select_read_pointer(Tuplestorestate *state, int ptr) { TSReadPointer *readptr; TSReadPointer *oldptr; Assert(ptr >= 0 && ptr < state->readptrcount); /* No work if already active */ if (ptr == state->activeptr) return; readptr = &state->readptrs[ptr]; oldptr = &state->readptrs[state->activeptr]; switch (state->status) { case TSS_INMEM: case TSS_WRITEFILE: /* no work */ break; case TSS_READFILE: /* * First, save the current read position in the pointer about to * become inactive. */ if (!oldptr->eof_reached) BufFileTell(state->myfile, &oldptr->file, &oldptr->offset); /* * We have to make the temp file's seek position equal to the * logical position of the new read pointer. In eof_reached * state, that's the EOF, which we have available from the saved * write position. */ if (readptr->eof_reached) { if (BufFileSeek(state->myfile, state->writepos_file, state->writepos_offset, SEEK_SET) != 0) ereport(ERROR, (errcode_for_file_access(), errmsg("could not seek in tuplestore temporary file: %m"))); } else { if (BufFileSeek(state->myfile, readptr->file, readptr->offset, SEEK_SET) != 0) ereport(ERROR, (errcode_for_file_access(), errmsg("could not seek in tuplestore temporary file: %m"))); } break; default: elog(ERROR, "invalid tuplestore state"); break; } state->activeptr = ptr; }
/* * tuplestore_copy_read_pointer - copy a read pointer's state to another */ void tuplestore_copy_read_pointer(Tuplestorestate *state, int srcptr, int destptr) { TSReadPointer *sptr = &state->readptrs[srcptr]; TSReadPointer *dptr = &state->readptrs[destptr]; Assert(srcptr >= 0 && srcptr < state->readptrcount); Assert(destptr >= 0 && destptr < state->readptrcount); /* Assigning to self is a no-op */ if (srcptr == destptr) return; if (dptr->eflags != sptr->eflags) { /* Possible change of overall eflags, so copy and then recompute */ int eflags; int i; *dptr = *sptr; eflags = state->readptrs[0].eflags; for (i = 1; i < state->readptrcount; i++) eflags |= state->readptrs[i].eflags; state->eflags = eflags; } else *dptr = *sptr; switch (state->status) { case TSS_INMEM: case TSS_WRITEFILE: /* no work */ break; case TSS_READFILE: /* * This case is a bit tricky since the active read pointer's * position corresponds to the seek point, not what is in its * variables. Assigning to the active requires a seek, and * assigning from the active requires a tell, except when * eof_reached. */ if (destptr == state->activeptr) { if (dptr->eof_reached) { if (BufFileSeek(state->myfile, state->writepos_file, state->writepos_offset, SEEK_SET) != 0) ereport(ERROR, (errcode_for_file_access(), errmsg("could not seek in tuplestore temporary file: %m"))); } else { if (BufFileSeek(state->myfile, dptr->file, dptr->offset, SEEK_SET) != 0) ereport(ERROR, (errcode_for_file_access(), errmsg("could not seek in tuplestore temporary file: %m"))); } } else if (srcptr == state->activeptr) { if (!dptr->eof_reached) BufFileTell(state->myfile, &dptr->file, &dptr->offset); } break; default: elog(ERROR, "invalid tuplestore state"); break; } }
/* * Fetch the next tuple in either forward or back direction. * Returns NULL if no more tuples. If should_free is set, the * caller must pfree the returned tuple when done with it. * * Backward scan is only allowed if randomAccess was set true or * EXEC_FLAG_BACKWARD was specified to tuplestore_set_eflags(). */ static void * tuplestore_gettuple(Tuplestorestate *state, TuplestorePos *pos, bool forward, bool *should_free) { uint32 tuplen; void *tup; Assert(forward || (state->eflags & EXEC_FLAG_BACKWARD)); switch (state->status) { case TSS_INMEM: *should_free = false; if (forward) { if (pos->current < state->memtupcount) return state->memtuples[pos->current++]; pos->eof_reached = true; return NULL; } else { if (pos->current <= 0) return NULL; /* * if all tuples are fetched already then we return last * tuple, else - tuple before last returned. */ if (pos->eof_reached) pos->eof_reached = false; else { pos->current--; /* last returned tuple */ if (pos->current <= 0) return NULL; } return state->memtuples[pos->current - 1]; } break; case TSS_WRITEFILE: /* Skip state change if we'll just return NULL */ if (pos->eof_reached && forward) return NULL; /* * Switch from writing to reading. */ BufFileTell(state->myfile, &pos->writepos_offset); if (!pos->eof_reached) if (BufFileSeek(state->myfile, pos->readpos_offset, SEEK_SET) != 0) elog(ERROR, "seek failed"); state->status = TSS_READFILE; /* FALL THRU into READFILE case */ case TSS_READFILE: *should_free = true; if (forward) { if ((tuplen = getlen(state, pos, true)) != 0) { tup = READTUP(state, pos, tuplen); /* CDB XXX XXX XXX XXX */ /* MPP-1347: EXPLAIN ANALYZE shows runaway memory usage. * Readtup does a usemem, but the free happens in * ExecStoreTuple. Do a free so state->availMem * doesn't go massively negative to screw up * stats. It would be better to interrogate the * heap for actual memory usage than use this * homemade accounting. */ FREEMEM(state, GetMemoryChunkSpace(tup)); /* CDB XXX XXX XXX XXX */ return tup; } else { pos->eof_reached = true; return NULL; } } /* * Backward. * * if all tuples are fetched already then we return last tuple, * else - tuple before last returned. * * Back up to fetch previously-returned tuple's ending length * word. If seek fails, assume we are at start of file. */ insist_log(false, "Backward scanning of tuplestores are not supported at this time"); if (BufFileSeek(state->myfile, -(long) sizeof(uint32) /* offset */, SEEK_CUR) != 0) return NULL; tuplen = getlen(state, pos, false); if (pos->eof_reached) { pos->eof_reached = false; /* We will return the tuple returned before returning NULL */ } else { /* * Back up to get ending length word of tuple before it. */ if (BufFileSeek(state->myfile, -(long) (tuplen + 2 * sizeof(uint32)) /* offset */, SEEK_CUR) != 0) { /* * If that fails, presumably the prev tuple is the first * in the file. Back up so that it becomes next to read * in forward direction (not obviously right, but that is * what in-memory case does). */ if (BufFileSeek(state->myfile, -(long) (tuplen + sizeof(uint32)) /* offset */, SEEK_CUR) != 0) elog(ERROR, "bogus tuple length in backward scan"); return NULL; } tuplen = getlen(state, pos, false); } /* * Now we have the length of the prior tuple, back up and read it. * Note: READTUP expects we are positioned after the initial * length word of the tuple, so back up to that point. */ if (BufFileSeek(state->myfile, -(long) tuplen /* offset */, SEEK_CUR) != 0) elog(ERROR, "bogus tuple length in backward scan"); tup = READTUP(state, pos, tuplen); return tup; default: elog(ERROR, "invalid tuplestore state"); return NULL; /* keep compiler quiet */ } }
static void tuplestore_puttuple_common(Tuplestorestate *state, TuplestorePos *pos, void *tuple) { ResourceOwner oldowner; switch (state->status) { case TSS_INMEM: /* * Grow the array as needed. Note that we try to grow the array * when there is still one free slot remaining --- if we fail, * there'll still be room to store the incoming tuple, and then * we'll switch to tape-based operation. */ if (state->memtupcount >= state->memtupsize - 1) { /* * See grow_memtuples() in tuplesort.c for the rationale * behind these two tests. */ if (state->availMem > (long) (state->memtupsize * sizeof(void *)) && (Size) (state->memtupsize * 2) < MaxAllocSize / sizeof(void *)) { FREEMEM(state, GetMemoryChunkSpace(state->memtuples)); state->memtupsize *= 2; state->memtuples = (void **) repalloc(state->memtuples, state->memtupsize * sizeof(void *)); USEMEM(state, GetMemoryChunkSpace(state->memtuples)); } } /* Stash the tuple in the in-memory array */ state->memtuples[state->memtupcount++] = tuple; /* If eof_reached, keep read position in sync */ if (pos->eof_reached) pos->current = state->memtupcount; /* * Done if we still fit in available memory and have array slots. */ if (state->memtupcount < state->memtupsize && !LACKMEM(state)) return; /* * Nope; time to switch to tape-based operation. Make sure that * the temp file(s) are created in suitable temp tablespaces. */ PrepareTempTablespaces(); /* associate the file with the store's resource owner */ oldowner = CurrentResourceOwner; CurrentResourceOwner = state->resowner; { char tmpprefix[50]; snprintf(tmpprefix, 50, "slice%d_tuplestore", currentSliceId); state->myfile = BufFileCreateTemp(tmpprefix, state->interXact); } CurrentResourceOwner = oldowner; state->status = TSS_WRITEFILE; dumptuples(state, pos); break; case TSS_WRITEFILE: WRITETUP(state, pos, tuple); break; case TSS_READFILE: /* * Switch from reading to writing. */ if (!pos->eof_reached) BufFileTell(state->myfile, &pos->readpos_offset); if (BufFileSeek(state->myfile, pos->writepos_offset, SEEK_SET) != 0) elog(ERROR, "seek to EOF failed"); state->status = TSS_WRITEFILE; WRITETUP(state, pos, tuple); break; default: elog(ERROR, "invalid tuplestore state"); break; } }
static void tuplestore_puttuple_common(Tuplestorestate *state, void *tuple) { TSReadPointer *readptr; int i; ResourceOwner oldowner; switch (state->status) { case TSS_INMEM: /* * Update read pointers as needed; see API spec above. */ readptr = state->readptrs; for (i = 0; i < state->readptrcount; readptr++, i++) { if (readptr->eof_reached && i != state->activeptr) { readptr->eof_reached = false; readptr->current = state->memtupcount; } } /* * Grow the array as needed. Note that we try to grow the array * when there is still one free slot remaining --- if we fail, * there'll still be room to store the incoming tuple, and then * we'll switch to tape-based operation. */ if (state->memtupcount >= state->memtupsize - 1) { /* * See grow_memtuples() in tuplesort.c for the rationale * behind these two tests. */ if (state->availMem > (long) (state->memtupsize * sizeof(void *)) && (Size) (state->memtupsize * 2) < MaxAllocSize / sizeof(void *)) { FREEMEM(state, GetMemoryChunkSpace(state->memtuples)); state->memtupsize *= 2; state->memtuples = (void **) repalloc(state->memtuples, state->memtupsize * sizeof(void *)); USEMEM(state, GetMemoryChunkSpace(state->memtuples)); if (LACKMEM(state)) elog(ERROR, "unexpected out-of-memory situation in tuplestore"); } } /* Stash the tuple in the in-memory array */ state->memtuples[state->memtupcount++] = tuple; /* * Done if we still fit in available memory and have array slots. */ if (state->memtupcount < state->memtupsize && !LACKMEM(state)) return; /* * Nope; time to switch to tape-based operation. Make sure that * the temp file(s) are created in suitable temp tablespaces. */ PrepareTempTablespaces(); /* associate the file with the store's resource owner */ oldowner = CurrentResourceOwner; CurrentResourceOwner = state->resowner; char tmpprefix[50]; snprintf(tmpprefix, 50, "slice%d_tuplestore", currentSliceId); state->myfile = BufFileCreateTemp(tmpprefix, state->interXact); CurrentResourceOwner = oldowner; /* * Freeze the decision about whether trailing length words will be * used. We can't change this choice once data is on tape, even * though callers might drop the requirement. */ state->backward = (state->eflags & EXEC_FLAG_BACKWARD) != 0; state->status = TSS_WRITEFILE; dumptuples(state); break; case TSS_WRITEFILE: /* * Update read pointers as needed; see API spec above. Note: * BufFileTell is quite cheap, so not worth trying to avoid * multiple calls. */ readptr = state->readptrs; for (i = 0; i < state->readptrcount; readptr++, i++) { if (readptr->eof_reached && i != state->activeptr) { readptr->eof_reached = false; BufFileTell(state->myfile, &readptr->file, &readptr->offset); } } WRITETUP(state, tuple); break; case TSS_READFILE: /* * Switch from reading to writing. */ if (!state->readptrs[state->activeptr].eof_reached) BufFileTell(state->myfile, &state->readptrs[state->activeptr].file, &state->readptrs[state->activeptr].offset); if (BufFileSeek(state->myfile, state->writepos_file, state->writepos_offset, SEEK_SET) != 0) elog(ERROR, "tuplestore seek to EOF failed"); state->status = TSS_WRITEFILE; /* * Update read pointers as needed; see API spec above. */ readptr = state->readptrs; for (i = 0; i < state->readptrcount; readptr++, i++) { if (readptr->eof_reached && i != state->activeptr) { readptr->eof_reached = false; readptr->file = state->writepos_file; readptr->offset = state->writepos_offset; } } WRITETUP(state, tuple); break; default: elog(ERROR, "invalid tuplestore state"); break; } }