/* * Create a multi-key heap from an array of entries * * entries: the values to convert to a heap. This array will be under mkheap's ownership * alloc_sz: the allocation size of entries: that is, how much room the array has. * cnt: the number of elements in entries which should be used to build the heap * mkctxt: description of the heap to build * * If alloc_sz is zero then entries must be NULL */ MKHeap * mkheap_from_array(MKEntry *entries, int alloc_sz, int cnt, MKContext *mkctxt) { MKHeap *heap = (MKHeap *) palloc(sizeof(MKHeap)); Assert(mkctxt); Assert(alloc_sz >= cnt); AssertEquivalent(entries != NULL, cnt > 0); AssertEquivalent(!entries, cnt == 0); heap->mkctxt = mkctxt; heap->lvtops = palloc0(mkctxt->total_lv * sizeof(MKEntry)); heap->readers = NULL; heap->nreader = 0; AssertImply(alloc_sz == 0, !entries); Assert(cnt >= 0 && cnt <= alloc_sz); heap->p = entries; heap->alloc_size = alloc_sz; heap->count = cnt; heap->maxentry = cnt; #ifdef USE_ASSERT_CHECKING { int i; for (i = 0; i < cnt; ++i) { Assert(mke_get_lv(entries + i) == 0); Assert(mke_get_reader(entries + i) == 0); } } #endif /* * note: see NOTE ON UNIQUENESS CHECKING at the top of this file for * information about why we don't check uniqueness here */ mk_prepare_array(entries, 0, cnt - 1, 0, mkctxt); mkheap_heapify(heap, true); return heap; }
/* * Updating accounting of size when closing an existing persistent file * we opened for reading */ static void adjust_size_persistent_file_existing(workfile_set *work_set, int64 size) { #if USE_ASSERT_CHECKING bool isCached = (NULL != work_set) && Cache_IsCached(CACHE_ENTRY_HEADER(work_set)); #endif AssertEquivalent((NULL != work_set), isCached); elog(gp_workfile_caching_loglevel, "closing existing persistent file, nothing to do"); return; }
/* * Rewind logical tape and switch from writing to reading or vice versa. * * Unless the tape has been "frozen" in read state, forWrite must be the * opposite of the previous tape state. */ void LogicalTapeRewind(LogicalTapeSet *lts, LogicalTape *lt, bool forWrite) { AssertEquivalent(lt->firstBlkNum==-1, lt->currPos.blkNum == -1); if (!forWrite) { if (lt->writing) { if(lt->firstBlkNum != -1) { Assert(lt->currBlk.next_blk == -1L); ltsWriteBlock(lts, lt->currPos.blkNum, <->currBlk); if(lt->currPos.blkNum != lt->firstBlkNum) ltsReadBlock(lts, lt->firstBlkNum, <->currBlk); } lt->currPos.blkNum = lt->firstBlkNum; lt->currPos.offset = 0; lt->writing = false; } else { /* * This is only OK if tape is frozen; we rewind for (another) read * pass. */ Assert(lt->frozen); if(lt->currPos.blkNum != lt->firstBlkNum) ltsReadBlock(lts, lt->firstBlkNum, <->currBlk); lt->currPos.blkNum = lt->firstBlkNum; lt->currPos.offset = 0; } } else { lt->firstBlkNum = -1L; lt->currBlk.prev_blk = -1L; lt->currBlk.next_blk = -1L; lt->currBlk.payload_tail = 0; lt->currPos.blkNum = -1L; lt->currPos.offset = 0; lt->writing = true; } }
/* ---------------------------------------------------------------- * ExecMaterial * * As long as we are at the end of the data collected in the tuplestore, * we collect one new row from the subplan on each call, and stash it * aside in the tuplestore before returning it. The tuplestore is * only read if we are asked to scan backwards, rescan, or mark/restore. * * ---------------------------------------------------------------- */ TupleTableSlot * /* result tuple from subplan */ ExecMaterial(MaterialState *node) { EState *estate; ScanDirection dir; bool forward; NTupleStore *ts; NTupleStoreAccessor *tsa; bool eof_tuplestore; TupleTableSlot *slot; Material *ma; /* * get state info from node */ estate = node->ss.ps.state; dir = estate->es_direction; forward = ScanDirectionIsForward(dir); ts = node->ts_state->matstore; tsa = (NTupleStoreAccessor *) node->ts_pos; ma = (Material *) node->ss.ps.plan; Assert(IsA(ma, Material)); /* * If first time through, and we need a tuplestore, initialize it. */ if (ts == NULL && (ma->share_type != SHARE_NOTSHARED || node->randomAccess)) { /* * For cross slice material, we only run ExecMaterial on DriverSlice */ if(ma->share_type == SHARE_MATERIAL_XSLICE) { char rwfile_prefix[100]; if(ma->driver_slice != currentSliceId) { elog(LOG, "Material Exec on CrossSlice, current slice %d", currentSliceId); return NULL; } shareinput_create_bufname_prefix(rwfile_prefix, sizeof(rwfile_prefix), ma->share_id); elog(LOG, "Material node creates shareinput rwfile %s", rwfile_prefix); ts = ntuplestore_create_readerwriter(rwfile_prefix, PlanStateOperatorMemKB((PlanState *)node) * 1024, true); tsa = ntuplestore_create_accessor(ts, true); } else { /* Non-shared Materialize node */ bool isWriter = true; workfile_set *work_set = NULL; if (gp_workfile_caching) { work_set = workfile_mgr_find_set( &node->ss.ps); if (NULL != work_set) { /* Reusing cached workfiles. Tell subplan we won't be needing any tuples */ elog(gp_workfile_caching_loglevel, "Materialize reusing cached workfiles, initiating Squelch walker"); isWriter = false; ExecSquelchNode(outerPlanState(node)); node->eof_underlying = true; node->cached_workfiles_found = true; if (node->ss.ps.instrument) { node->ss.ps.instrument->workfileReused = true; } } } if (NULL == work_set) { /* * No work_set found, this is because: * a. workfile caching is enabled but we didn't find any reusable set * b. workfile caching is disabled * Creating new empty workset */ Assert(!node->cached_workfiles_found); /* Don't try to cache when running under a ShareInputScan node */ bool can_reuse = (ma->share_type == SHARE_NOTSHARED); work_set = workfile_mgr_create_set(BUFFILE, can_reuse, &node->ss.ps, NULL_SNAPSHOT); isWriter = true; } Assert(NULL != work_set); AssertEquivalent(node->cached_workfiles_found, !isWriter); ts = ntuplestore_create_workset(work_set, node->cached_workfiles_found, PlanStateOperatorMemKB((PlanState *) node) * 1024); tsa = ntuplestore_create_accessor(ts, isWriter); } Assert(ts && tsa); node->ts_state->matstore = ts; node->ts_pos = (void *) tsa; /* CDB: Offer extra info for EXPLAIN ANALYZE. */ if (node->ss.ps.instrument) { /* Let the tuplestore share our Instrumentation object. */ ntuplestore_setinstrument(ts, node->ss.ps.instrument); /* Request a callback at end of query. */ node->ss.ps.cdbexplainfun = ExecMaterialExplainEnd; } /* * MPP: If requested, fetch all rows from subplan and put them * in the tuplestore. This decouples a middle slice's receiving * and sending Motion operators to neutralize a deadlock hazard. * MPP TODO: Remove when a better solution is implemented. * * ShareInput: if the material node * is used to share input, we will need to fetch all rows and put * them in tuple store */ while (((Material *) node->ss.ps.plan)->cdb_strict || ma->share_type != SHARE_NOTSHARED) { /* * When reusing cached workfiles, we already have all the tuples, * and we don't need to read anything from subplan. */ if (node->cached_workfiles_found) { break; } TupleTableSlot *outerslot = ExecProcNode(outerPlanState(node)); if (TupIsNull(outerslot)) { node->eof_underlying = true; if (ntuplestore_created_reusable_workfiles(ts)) { ntuplestore_flush(ts); ntuplestore_mark_workset_complete(ts); } ntuplestore_acc_seek_bof(tsa); break; } Gpmon_M_Incr(GpmonPktFromMaterialState(node), GPMON_QEXEC_M_ROWSIN); ntuplestore_acc_put_tupleslot(tsa, outerslot); } CheckSendPlanStateGpmonPkt(&node->ss.ps); if(forward) ntuplestore_acc_seek_bof(tsa); else ntuplestore_acc_seek_eof(tsa); /* for share input, material do not need to return any tuple */ if(ma->share_type != SHARE_NOTSHARED) { Assert(ma->share_type == SHARE_MATERIAL || ma->share_type == SHARE_MATERIAL_XSLICE); /* * if the material is shared across slice, notify consumers that * it is ready. */ if(ma->share_type == SHARE_MATERIAL_XSLICE) { if (ma->driver_slice == currentSliceId) { ntuplestore_flush(ts); node->share_lk_ctxt = shareinput_writer_notifyready(ma->share_id, ma->nsharer_xslice, estate->es_plannedstmt->planGen); } } return NULL; } } if(ma->share_type != SHARE_NOTSHARED) return NULL; /* * If we can fetch another tuple from the tuplestore, return it. */ slot = node->ss.ps.ps_ResultTupleSlot; if(forward) eof_tuplestore = (tsa == NULL) || !ntuplestore_acc_advance(tsa, 1); else eof_tuplestore = (tsa == NULL) || !ntuplestore_acc_advance(tsa, -1); if(tsa!=NULL && ntuplestore_acc_tell(tsa, NULL)) { ntuplestore_acc_current_tupleslot(tsa, slot); if (!TupIsNull(slot)) { Gpmon_M_Incr_Rows_Out(GpmonPktFromMaterialState(node)); CheckSendPlanStateGpmonPkt(&node->ss.ps); } return slot; } /* * If necessary, try to fetch another row from the subplan. * * Note: the eof_underlying state variable exists to short-circuit further * subplan calls. It's not optional, unfortunately, because some plan * node types are not robust about being called again when they've already * returned NULL. * If reusing cached workfiles, there is no need to execute subplan at all. */ if (eof_tuplestore && !node->eof_underlying) { PlanState *outerNode; TupleTableSlot *outerslot; Assert(!node->cached_workfiles_found && "we shouldn't get here when using cached workfiles"); /* * We can only get here with forward==true, so no need to worry about * which direction the subplan will go. */ outerNode = outerPlanState(node); outerslot = ExecProcNode(outerNode); if (TupIsNull(outerslot)) { node->eof_underlying = true; if (ntuplestore_created_reusable_workfiles(ts)) { ntuplestore_flush(ts); ntuplestore_mark_workset_complete(ts); } if (!node->ss.ps.delayEagerFree) { ExecEagerFreeMaterial(node); } return NULL; } Gpmon_M_Incr(GpmonPktFromMaterialState(node), GPMON_QEXEC_M_ROWSIN); if (tsa) ntuplestore_acc_put_tupleslot(tsa, outerslot); /* * And return a copy of the tuple. (XXX couldn't we just return the * outerslot?) */ Gpmon_M_Incr_Rows_Out(GpmonPktFromMaterialState(node)); CheckSendPlanStateGpmonPkt(&node->ss.ps); return ExecCopySlot(slot, outerslot); } if (!node->ss.ps.delayEagerFree) { ExecEagerFreeMaterial(node); } /* * Nothing left ... */ return NULL; }