/* * get_all_brokers * * Return a list of all brokers in pipeline_kafka_brokers */ static List * get_all_brokers(void) { HeapTuple tup = NULL; HeapScanDesc scan; Relation brokers = open_pipeline_kafka_brokers(); TupleTableSlot *slot = MakeSingleTupleTableSlot(RelationGetDescr(brokers)); List *result = NIL; scan = heap_beginscan(brokers, GetTransactionSnapshot(), 0, NULL); while ((tup = heap_getnext(scan, ForwardScanDirection)) != NULL) { char *host; Datum d; bool isnull; ExecStoreTuple(tup, slot, InvalidBuffer, false); d = slot_getattr(slot, BROKER_ATTR_HOST, &isnull); host = TextDatumGetCString(d); result = lappend(result, host); } ExecDropSingleTupleTableSlot(slot); heap_endscan(scan); heap_close(brokers, NoLock); return result; }
void CheckerTerm(Checker *checker) { if (checker->slot) ExecDropSingleTupleTableSlot(checker->slot); if (checker->estate) FreeExecutorState(checker->estate); }
/* * load_consumer_offsets * * Load all offsets for all of this consumer's partitions */ static void load_consumer_offsets(KafkaConsumer *consumer, struct rd_kafka_metadata_topic *meta, int64_t offset) { MemoryContext old; ScanKeyData skey[1]; HeapTuple tup = NULL; HeapScanDesc scan; Relation offsets = open_pipeline_kafka_offsets(); TupleTableSlot *slot = MakeSingleTupleTableSlot(RelationGetDescr(offsets)); int i; ScanKeyInit(&skey[0], OFFSETS_ATTR_CONSUMER, BTEqualStrategyNumber, F_OIDEQ, ObjectIdGetDatum(consumer->id)); scan = heap_beginscan(offsets, GetTransactionSnapshot(), 1, skey); old = MemoryContextSwitchTo(CacheMemoryContext); consumer->offsets = palloc0(meta->partition_cnt * sizeof(int64_t)); MemoryContextSwitchTo(old); /* by default, begin consuming from the end of a stream */ for (i = 0; i < meta->partition_cnt; i++) consumer->offsets[i] = offset; consumer->num_partitions = meta->partition_cnt; while ((tup = heap_getnext(scan, ForwardScanDirection)) != NULL) { Datum d; bool isnull; int partition; ExecStoreTuple(tup, slot, InvalidBuffer, false); d = slot_getattr(slot, OFFSETS_ATTR_PARTITION, &isnull); partition = DatumGetInt32(d); if(partition > consumer->num_partitions) elog(ERROR, "invalid partition id: %d", partition); if (offset == RD_KAFKA_OFFSET_NULL) { d = slot_getattr(slot, OFFSETS_ATTR_OFFSET, &isnull); if (isnull) offset = RD_KAFKA_OFFSET_END; else offset = DatumGetInt64(d); } consumer->offsets[partition] = DatumGetInt64(offset); } ExecDropSingleTupleTableSlot(slot); heap_endscan(scan); heap_close(offsets, RowExclusiveLock); }
/* * CopyIntoStream * * COPY events to a stream from an input source */ void CopyIntoStream(Relation rel, TupleDesc desc, HeapTuple *tuples, int ntuples) { bool snap = ActiveSnapshotSet(); ResultRelInfo rinfo; StreamInsertState *sis; MemSet(&rinfo, 0, sizeof(ResultRelInfo)); rinfo.ri_RangeTableIndex = 1; /* dummy */ rinfo.ri_TrigDesc = NULL; rinfo.ri_RelationDesc = rel; if (snap) PopActiveSnapshot(); BeginStreamModify(NULL, &rinfo, list_make1(desc), 0, 0); sis = (StreamInsertState *) rinfo.ri_FdwState; Assert(sis); if (sis->queries) { TupleTableSlot *slot = MakeSingleTupleTableSlot(RelationGetDescr(rel)); int i; for (i = 0; i < ntuples; i++) { ExecStoreTuple(tuples[i], slot, InvalidBuffer, false); ExecStreamInsert(NULL, &rinfo, slot, NULL); ExecClearTuple(slot); } ExecDropSingleTupleTableSlot(slot); Assert(sis->ntups == ntuples); pgstat_increment_cq_write(ntuples, sis->nbytes); } EndStreamModify(NULL, &rinfo); if (snap) PushActiveSnapshot(GetTransactionSnapshot()); }
/* * To perform that check simply start an index scan, create the necessary * slot, do the heap lookup, and shut everything down again. This could be * optimized, but is unlikely to matter from a performance POV. If there * frequently are live index pointers also matching a unique index key, the * CPU overhead of this routine is unlikely to matter. */ bool table_index_fetch_tuple_check(Relation rel, ItemPointer tid, Snapshot snapshot, bool *all_dead) { IndexFetchTableData *scan; TupleTableSlot *slot; bool call_again = false; bool found; slot = table_slot_create(rel, NULL); scan = table_index_fetch_begin(rel); found = table_index_fetch_tuple(scan, tid, snapshot, slot, &call_again, all_dead); table_index_fetch_end(scan); ExecDropSingleTupleTableSlot(slot); return found; }
void SpoolerClose(Spooler *self) { /* Merge indexes */ if (self->spools != NULL) IndexSpoolEnd(self, true); /* Terminate spooler. */ ExecDropSingleTupleTableSlot(self->slot); if (self->estate->es_result_relation_info) ExecCloseIndices(self->estate->es_result_relation_info); FreeExecutorState(self->estate); /* Close and release members. */ if (self->dup_fp != NULL && FreeFile(self->dup_fp) < 0) ereport(WARNING, (errcode_for_file_access(), errmsg("could not close duplicate bad file \"%s\": %m", self->dup_badfile))); if (self->dup_badfile != NULL) pfree(self->dup_badfile); }
/* * save_consumer_state * * Saves the given consumer's state to pipeline_kafka_consumers */ static void save_consumer_state(KafkaConsumer *consumer, int partition_group) { ScanKeyData skey[1]; HeapTuple tup = NULL; HeapScanDesc scan; Relation offsets = open_pipeline_kafka_offsets(); Datum values[OFFSETS_RELATION_NATTS]; bool nulls[OFFSETS_RELATION_NATTS]; bool replace[OFFSETS_RELATION_NATTS]; bool updated[consumer->num_partitions]; TupleTableSlot *slot = MakeSingleTupleTableSlot(RelationGetDescr(offsets)); int partition; MemSet(updated, false, sizeof(updated)); ScanKeyInit(&skey[0], OFFSETS_ATTR_CONSUMER, BTEqualStrategyNumber, F_OIDEQ, ObjectIdGetDatum(consumer->id)); scan = heap_beginscan(offsets, GetTransactionSnapshot(), 1, skey); /* update any existing offset rows */ while ((tup = heap_getnext(scan, ForwardScanDirection)) != NULL) { Datum d; bool isnull; int partition; HeapTuple modified; ExecStoreTuple(tup, slot, InvalidBuffer, false); d = slot_getattr(slot, OFFSETS_ATTR_PARTITION, &isnull); partition = DatumGetInt32(d); /* we only want to update the offsets we're responsible for */ if (partition % consumer->parallelism != partition_group) continue; MemSet(nulls, false, sizeof(nulls)); MemSet(replace, false, sizeof(nulls)); values[OFFSETS_ATTR_OFFSET - 1] = Int64GetDatum(consumer->offsets[partition]); replace[OFFSETS_ATTR_OFFSET - 1] = true; updated[partition] = true; modified = heap_modify_tuple(tup, RelationGetDescr(offsets), values, nulls, replace); simple_heap_update(offsets, &modified->t_self, modified); } heap_endscan(scan); /* now insert any offset rows that didn't already exist */ for (partition = 0; partition < consumer->num_partitions; partition++) { if (updated[partition]) continue; if (partition % consumer->parallelism != partition_group) continue; values[OFFSETS_ATTR_CONSUMER - 1] = ObjectIdGetDatum(consumer->id); values[OFFSETS_ATTR_PARTITION - 1] = Int32GetDatum(partition); values[OFFSETS_ATTR_OFFSET - 1] = Int64GetDatum(consumer->offsets[partition]); MemSet(nulls, false, sizeof(nulls)); tup = heap_form_tuple(RelationGetDescr(offsets), values, nulls); simple_heap_insert(offsets, tup); } ExecDropSingleTupleTableSlot(slot); heap_close(offsets, NoLock); }
/* * load_consumer_state * * Read consumer state from pipeline_kafka_consumers into the given struct */ static void load_consumer_state(Oid worker_id, KafkaConsumer *consumer) { ScanKeyData skey[1]; HeapTuple tup = NULL; HeapScanDesc scan; Relation consumers = open_pipeline_kafka_consumers(); TupleTableSlot *slot = MakeSingleTupleTableSlot(RelationGetDescr(consumers)); Datum d; bool isnull; text *qualified; MemoryContext old; MemSet(consumer, 0, sizeof(KafkaConsumer)); ScanKeyInit(&skey[0], -2, BTEqualStrategyNumber, F_OIDEQ, ObjectIdGetDatum(worker_id)); scan = heap_beginscan(consumers, GetTransactionSnapshot(), 1, skey); tup = heap_getnext(scan, ForwardScanDirection); if (!HeapTupleIsValid(tup)) elog(ERROR, "kafka consumer %d not found", worker_id); ExecStoreTuple(tup, slot, InvalidBuffer, false); consumer->id = HeapTupleGetOid(tup); d = slot_getattr(slot, CONSUMER_ATTR_RELATION, &isnull); /* we don't want anything that's palloc'd to get freed when we commit */ old = MemoryContextSwitchTo(CacheMemoryContext); /* target relation */ qualified = (text *) DatumGetPointer(d); consumer->rel = makeRangeVarFromNameList(textToQualifiedNameList(qualified)); /* topic */ d = slot_getattr(slot, CONSUMER_ATTR_TOPIC, &isnull); consumer->topic = TextDatumGetCString(d); /* format */ d = slot_getattr(slot, CONSUMER_ATTR_FORMAT, &isnull); consumer->format = TextDatumGetCString(d); /* delimiter */ d = slot_getattr(slot, CONSUMER_ATTR_DELIMITER, &isnull); if (!isnull) consumer->delimiter = TextDatumGetCString(d); else consumer->delimiter = NULL; /* quote character */ d = slot_getattr(slot, CONSUMER_ATTR_QUOTE, &isnull); if (!isnull) consumer->quote = TextDatumGetCString(d); else consumer->quote = NULL; /* escape character */ d = slot_getattr(slot, CONSUMER_ATTR_ESCAPE, &isnull); if (!isnull) consumer->escape = TextDatumGetCString(d); else consumer->escape = NULL; /* now load all brokers */ consumer->brokers = get_all_brokers(); MemoryContextSwitchTo(old); d = slot_getattr(slot, CONSUMER_ATTR_PARALLELISM, &isnull); consumer->parallelism = DatumGetInt32(d); /* batch size */ d = slot_getattr(slot, CONSUMER_ATTR_BATCH_SIZE, &isnull); consumer->batch_size = DatumGetInt32(d); ExecDropSingleTupleTableSlot(slot); heap_endscan(scan); heap_close(consumers, NoLock); }
Datum pipeline_stream_insert(PG_FUNCTION_ARGS) { TriggerData *trigdata = (TriggerData *) fcinfo->context; Trigger *trig = trigdata->tg_trigger; HeapTuple tup; List *fdw_private; int i; ResultRelInfo rinfo; if (trig->tgnargs < 1) elog(ERROR, "pipeline_stream_insert: must be provided a stream name"); /* make sure it's called as a trigger */ if (!CALLED_AS_TRIGGER(fcinfo)) ereport(ERROR, (errcode(ERRCODE_E_R_I_E_TRIGGER_PROTOCOL_VIOLATED), errmsg("pipeline_stream_insert: must be called as trigger"))); /* and that it's called on update or insert */ if (!TRIGGER_FIRED_BY_UPDATE(trigdata->tg_event) && !TRIGGER_FIRED_BY_INSERT(trigdata->tg_event)) ereport(ERROR, (errcode(ERRCODE_E_R_I_E_TRIGGER_PROTOCOL_VIOLATED), errmsg("pipeline_stream_insert: must be called on insert or update"))); /* and that it's called for each row */ if (!TRIGGER_FIRED_FOR_ROW(trigdata->tg_event)) ereport(ERROR, (errcode(ERRCODE_E_R_I_E_TRIGGER_PROTOCOL_VIOLATED), errmsg("pipeline_stream_insert: must be called for each row"))); /* and that it's called after insert or update */ if (!TRIGGER_FIRED_AFTER(trigdata->tg_event)) ereport(ERROR, (errcode(ERRCODE_E_R_I_E_TRIGGER_PROTOCOL_VIOLATED), errmsg("pipeline_stream_insert: must be called after insert or update"))); if (TRIGGER_FIRED_BY_UPDATE(trigdata->tg_event)) tup = trigdata->tg_newtuple; else tup = trigdata->tg_trigtuple; fdw_private = list_make1(RelationGetDescr(trigdata->tg_relation)); MemSet(&rinfo, 0, sizeof(ResultRelInfo)); rinfo.ri_RangeTableIndex = 1; /* dummy */ rinfo.ri_TrigDesc = NULL; for (i = 0; i < trig->tgnargs; i++) { RangeVar *stream; Relation rel; StreamInsertState *sis; stream = makeRangeVarFromNameList(textToQualifiedNameList(cstring_to_text(trig->tgargs[i]))); rel = heap_openrv(stream, AccessShareLock); rinfo.ri_RelationDesc = rel; BeginStreamModify(NULL, &rinfo, fdw_private, 0, 0); sis = (StreamInsertState *) rinfo.ri_FdwState; Assert(sis); if (sis->queries) { TupleTableSlot *slot = MakeSingleTupleTableSlot(RelationGetDescr(rel)); ExecStoreTuple(tup, slot, InvalidBuffer, false); ExecStreamInsert(NULL, &rinfo, slot, NULL); ExecClearTuple(slot); ExecDropSingleTupleTableSlot(slot); pgstat_report_streamstat(true); } EndStreamModify(NULL, &rinfo); heap_close(rel, AccessShareLock); } return PointerGetDatum(tup); }
/* * CatalogIndexInsert - insert index entries for one catalog tuple * * This should be called for each inserted or updated catalog tuple. * * This is effectively a cut-down version of ExecInsertIndexTuples. */ void CatalogIndexInsert(CatalogIndexState indstate, HeapTuple heapTuple) { int i; int numIndexes; RelationPtr relationDescs; Relation heapRelation; TupleTableSlot *slot; IndexInfo **indexInfoArray; Datum values[INDEX_MAX_KEYS]; bool isnull[INDEX_MAX_KEYS]; /* HOT update does not require index inserts */ if (HeapTupleIsHeapOnly(heapTuple)) return; /* * Get information from the state structure. Fall out if nothing to do. */ numIndexes = indstate->ri_NumIndices; if (numIndexes == 0) return; relationDescs = indstate->ri_IndexRelationDescs; indexInfoArray = indstate->ri_IndexRelationInfo; heapRelation = indstate->ri_RelationDesc; /* Need a slot to hold the tuple being examined */ slot = MakeSingleTupleTableSlot(RelationGetDescr(heapRelation)); ExecStoreTuple(heapTuple, slot, InvalidBuffer, false); /* * for each index, form and insert the index tuple */ for (i = 0; i < numIndexes; i++) { IndexInfo *indexInfo; indexInfo = indexInfoArray[i]; /* If the index is marked as read-only, ignore it */ if (!indexInfo->ii_ReadyForInserts) continue; /* * Expressional and partial indexes on system catalogs are not * supported, nor exclusion constraints, nor deferred uniqueness */ Assert(indexInfo->ii_Expressions == NIL); Assert(indexInfo->ii_Predicate == NIL); Assert(indexInfo->ii_ExclusionOps == NULL); Assert(relationDescs[i]->rd_index->indimmediate); /* * FormIndexDatum fills in its values and isnull parameters with the * appropriate values for the column(s) of the index. */ FormIndexDatum(indexInfo, slot, NULL, /* no expression eval to do */ values, isnull); /* * The index AM does the rest. */ index_insert(relationDescs[i], /* index relation */ values, /* array of index Datums */ isnull, /* is-null flags */ &(heapTuple->t_self), /* tid of heap tuple */ heapRelation, relationDescs[i]->rd_index->indisunique ? UNIQUE_CHECK_YES : UNIQUE_CHECK_NO); } ExecDropSingleTupleTableSlot(slot); }
/* * unique_key_recheck - trigger function to do a deferred uniqueness check. * * This now also does deferred exclusion-constraint checks, so the name is * somewhat historical. * * This is invoked as an AFTER ROW trigger for both INSERT and UPDATE, * for any rows recorded as potentially violating a deferrable unique * or exclusion constraint. * * This may be an end-of-statement check, a commit-time check, or a * check triggered by a SET CONSTRAINTS command. */ Datum unique_key_recheck(PG_FUNCTION_ARGS) { TriggerData *trigdata = (TriggerData *) fcinfo->context; const char *funcname = "unique_key_recheck"; HeapTuple new_row; ItemPointerData tmptid; Relation indexRel; IndexInfo *indexInfo; EState *estate; ExprContext *econtext; TupleTableSlot *slot; Datum values[INDEX_MAX_KEYS]; bool isnull[INDEX_MAX_KEYS]; /* * Make sure this is being called as an AFTER ROW trigger. Note: * translatable error strings are shared with ri_triggers.c, so resist the * temptation to fold the function name into them. */ if (!CALLED_AS_TRIGGER(fcinfo)) ereport(ERROR, (errcode(ERRCODE_E_R_I_E_TRIGGER_PROTOCOL_VIOLATED), errmsg("function \"%s\" was not called by trigger manager", funcname))); if (!TRIGGER_FIRED_AFTER(trigdata->tg_event) || !TRIGGER_FIRED_FOR_ROW(trigdata->tg_event)) ereport(ERROR, (errcode(ERRCODE_E_R_I_E_TRIGGER_PROTOCOL_VIOLATED), errmsg("function \"%s\" must be fired AFTER ROW", funcname))); /* * Get the new data that was inserted/updated. */ if (TRIGGER_FIRED_BY_INSERT(trigdata->tg_event)) new_row = trigdata->tg_trigtuple; else if (TRIGGER_FIRED_BY_UPDATE(trigdata->tg_event)) new_row = trigdata->tg_newtuple; else { ereport(ERROR, (errcode(ERRCODE_E_R_I_E_TRIGGER_PROTOCOL_VIOLATED), errmsg("function \"%s\" must be fired for INSERT or UPDATE", funcname))); new_row = NULL; /* keep compiler quiet */ } /* * If the new_row is now dead (ie, inserted and then deleted within our * transaction), we can skip the check. However, we have to be careful, * because this trigger gets queued only in response to index insertions; * which means it does not get queued for HOT updates. The row we are * called for might now be dead, but have a live HOT child, in which case * we still need to make the check. Therefore we have to use * heap_hot_search, not just HeapTupleSatisfiesVisibility as is done in * the comparable test in RI_FKey_check. * * This might look like just an optimization, because the index AM will * make this identical test before throwing an error. But it's actually * needed for correctness, because the index AM will also throw an error * if it doesn't find the index entry for the row. If the row's dead then * it's possible the index entry has also been marked dead, and even * removed. */ tmptid = new_row->t_self; if (!heap_hot_search(&tmptid, trigdata->tg_relation, SnapshotSelf, NULL)) { /* * All rows in the HOT chain are dead, so skip the check. */ return PointerGetDatum(NULL); } /* * Open the index, acquiring a RowExclusiveLock, just as if we were going * to update it. (This protects against possible changes of the index * schema, not against concurrent updates.) */ indexRel = index_open(trigdata->tg_trigger->tgconstrindid, RowExclusiveLock); indexInfo = BuildIndexInfo(indexRel); /* * The heap tuple must be put into a slot for FormIndexDatum. */ slot = MakeSingleTupleTableSlot(RelationGetDescr(trigdata->tg_relation)); ExecStoreTuple(new_row, slot, InvalidBuffer, false); /* * Typically the index won't have expressions, but if it does we need an * EState to evaluate them. We need it for exclusion constraints too, * even if they are just on simple columns. */ if (indexInfo->ii_Expressions != NIL || indexInfo->ii_ExclusionOps != NULL) { estate = CreateExecutorState(); econtext = GetPerTupleExprContext(estate); econtext->ecxt_scantuple = slot; } else estate = NULL; /* * Form the index values and isnull flags for the index entry that we need * to check. * * Note: if the index uses functions that are not as immutable as they are * supposed to be, this could produce an index tuple different from the * original. The index AM can catch such errors by verifying that it * finds a matching index entry with the tuple's TID. For exclusion * constraints we check this in check_exclusion_constraint(). */ FormIndexDatum(indexInfo, slot, estate, values, isnull); /* * Now do the appropriate check. */ if (indexInfo->ii_ExclusionOps == NULL) { /* * Note: this is not a real insert; it is a check that the index entry * that has already been inserted is unique. */ index_insert(indexRel, values, isnull, &(new_row->t_self), trigdata->tg_relation, UNIQUE_CHECK_EXISTING); } else { /* * For exclusion constraints we just do the normal check, but now it's * okay to throw error. */ check_exclusion_constraint(trigdata->tg_relation, indexRel, indexInfo, &(new_row->t_self), values, isnull, estate, false, false); } /* * If that worked, then this index entry is unique or non-excluded, and we * are done. */ if (estate != NULL) FreeExecutorState(estate); ExecDropSingleTupleTableSlot(slot); index_close(indexRel, RowExclusiveLock); return PointerGetDatum(NULL); }
/* * ExecFilterRecommend * * This function just borrows a tuple descriptor from the RecView, * but we create the data ourselves through various means. */ static TupleTableSlot* ExecFilterRecommend(RecScanState *recnode, ExecScanAccessMtd accessMtd, ExecScanRecheckMtd recheckMtd) { ExprContext *econtext; List *qual; ProjectionInfo *projInfo; ExprDoneCond isDone; TupleTableSlot *resultSlot; ScanState *node; AttributeInfo *attributes; node = recnode->subscan; attributes = (AttributeInfo*) recnode->attributes; /* * Fetch data from node */ qual = node->ps.qual; projInfo = node->ps.ps_ProjInfo; econtext = node->ps.ps_ExprContext; /* * Check to see if we're still projecting out tuples from a previous scan * tuple (because there is a function-returning-set in the projection * expressions). If so, try to project another one. */ if (node->ps.ps_TupFromTlist) { Assert(projInfo); /* can't get here if not projecting */ resultSlot = ExecProject(projInfo, &isDone); if (isDone == ExprMultipleResult) return resultSlot; /* Done with that source tuple... */ node->ps.ps_TupFromTlist = false; } /* * Reset per-tuple memory context to free any expression evaluation * storage allocated in the previous tuple cycle. Note this can't happen * until we're done projecting out tuples from a scan tuple. */ ResetExprContext(econtext); /* * get a tuple from the access method. Loop until we obtain a tuple that * passes the qualification. */ for (;;) { TupleTableSlot *slot; int natts, i, userID, userindex, itemID, itemindex; CHECK_FOR_INTERRUPTS(); slot = recnode->ss.ps.ps_ResultTupleSlot; /* The first thing we need to do is initialize our recommender * model and other things, if we haven't done so already. */ if (!recnode->initialized) InitializeRecommender(recnode); /* * If we've exhausted our item list, then we're totally * finished. We set a flag for this. It's possible that * we'll be in the inner loop of a join, through poor * planning, so we'll reset the appropriate data in case * we have to do this again, though our JoinRecommend * should assure this doesn't happen. */ if (recnode->finished) { recnode->finished = false; recnode->userNum = 0; recnode->itemNum = 0; return NULL; } /* We're only going to fetch one tuple and store its tuple * descriptor. We can use this tuple descriptor to make as * many new tuples as we want. */ if (recnode->base_slot == NULL) { slot = ExecRecFetch(node, accessMtd, recheckMtd); recnode->base_slot = CreateTupleDescCopy(slot->tts_tupleDescriptor); } /* Create a new slot to operate on. */ slot = MakeSingleTupleTableSlot(recnode->base_slot); slot->tts_isempty = false; /* * place the current tuple into the expr context */ econtext->ecxt_scantuple = slot; /* Mark all slots as usable. */ natts = slot->tts_tupleDescriptor->natts; for (i = 0; i < natts; i++) { /* Mark slot. */ slot->tts_values[i] = Int32GetDatum(0); slot->tts_isnull[i] = false; slot->tts_nvalid++; } /* While we're here, record what tuple attributes * correspond to our key columns. This will save * us unnecessary strcmp functions. */ if (recnode->useratt < 0) { for (i = 0; i < natts; i++) { char* col_name = slot->tts_tupleDescriptor->attrs[i]->attname.data; //printf("%s\n",col_name); if (strcmp(col_name,attributes->userkey) == 0) recnode->useratt = i; else if (strcmp(col_name,attributes->itemkey) == 0) recnode->itematt = i; else if (strcmp(col_name,attributes->eventval) == 0) recnode->eventatt = i; } } /* * We now have a problem: we need to create prediction structures * for a user before we do filtering, so that we can have a proper * item list. But we also need to filter before creating those * structures, so we don't end up taking forever with it. The * solution is to filter twice. */ userID = -1; itemID = -1; /* First, replace the user ID. */ userindex = recnode->userNum; userID = recnode->userList[userindex]; /* * We now have a blank tuple slot that we need to fill with data. * We have a working user ID, but not a valid item list. We'd like to * use the filter to determine if this is a good user, but we can't * do that without an item, in many cases. The solution is to add in * dummy items, then compare it against the filter. If a given user ID * doesn't make it past the filter with any item ID, then that user is * being filtered out, and we'll move on to the next. */ if (recnode->newUser) { recnode->fullItemNum = 0; itemindex = recnode->fullItemNum; itemID = recnode->fullItemList[itemindex]; slot->tts_values[recnode->useratt] = Int32GetDatum(userID); slot->tts_values[recnode->itematt] = Int32GetDatum(itemID); slot->tts_values[recnode->eventatt] = Int32GetDatum(-1); /* We have a preliminary slot - let's test it. */ while (qual && !ExecQual(qual, econtext, false)) { /* We failed the test. Try the next item. */ recnode->fullItemNum++; if (recnode->fullItemNum >= recnode->fullTotalItems) { /* If we've reached the last item, move onto the next user. * If we've reached the last user, we're done. */ InstrCountFiltered1(node, recnode->fullTotalItems); recnode->userNum++; recnode->newUser = true; recnode->fullItemNum = 0; if (recnode->userNum >= recnode->totalUsers) { recnode->userNum = 0; recnode->itemNum = 0; return NULL; } userindex = recnode->userNum; userID = recnode->userList[userindex]; } itemindex = recnode->fullItemNum; itemID = recnode->fullItemList[itemindex]; slot->tts_values[recnode->useratt] = Int32GetDatum(userID); slot->tts_values[recnode->itematt] = Int32GetDatum(itemID); } /* If we get here, then we found a user who will be actually * returned in the results. One quick reset here. */ recnode->fullItemNum = 0; } /* Mark the user ID and index. */ attributes->userID = userID; recnode->userindex = userindex; /* With the user ID determined, we need to investigate and see * if this is a new user. If so, attempt to create prediction * data structures, or report that this user is invalid. We have * to do this here, so we can establish the item list. */ if (recnode->newUser) { recnode->validUser = prepUserForRating(recnode,userID); recnode->newUser = false; } /* Now replace the item ID, if the user is valid. Otherwise, * leave the item ID as is, as it doesn't matter what it is. */ if (recnode->validUser) itemID = recnode->itemList[recnode->itemNum]; while (recnode->fullItemList[recnode->fullItemNum] < itemID) recnode->fullItemNum++; itemindex = recnode->fullItemNum; if (recnode->fullItemList[itemindex] > itemID) elog(ERROR, "critical item mismatch in ExecRecommend"); /* Plug in the data, marking those columns full. We also need to * mark the rating column with something temporary. */ slot->tts_values[recnode->useratt] = Int32GetDatum(userID); slot->tts_values[recnode->itematt] = Int32GetDatum(itemID); slot->tts_values[recnode->eventatt] = Int32GetDatum(-1); /* It's possible our filter criteria involves the RecScore somehow. * If that's the case, we need to calculate it before we do the * qual filtering. Also, if we're doing a JoinRecommend, we should * not calculate the RecScore in this node. In the current version * of RecDB, an OP_NOFILTER shouldn't be allowed. */ if (attributes->opType == OP_NOFILTER) applyRecScore(recnode, slot, itemID, itemindex); /* Move onto the next item, for next time. If we're doing a RecJoin, * though, we'll move onto the next user instead. */ recnode->itemNum++; if (recnode->itemNum >= recnode->totalItems || attributes->opType == OP_JOIN || attributes->opType == OP_GENERATEJOIN) { /* If we've reached the last item, move onto the next user. * If we've reached the last user, we're done. */ recnode->userNum++; recnode->newUser = true; recnode->itemNum = 0; recnode->fullItemNum = 0; if (recnode->userNum >= recnode->totalUsers) recnode->finished = true; } /* * check that the current tuple satisfies the qual-clause * * check for non-nil qual here to avoid a function call to ExecQual() * when the qual is nil ... saves only a few cycles, but they add up * ... */ if (!qual || ExecQual(qual, econtext, false)) { /* * If this is an invalid user, then we'll skip this tuple, * adding one to the filter count. */ if (!recnode->validUser) { InstrCountFiltered1(node, 1); ResetExprContext(econtext); ExecDropSingleTupleTableSlot(slot); continue; } /* * Found a satisfactory scan tuple. This is usually when * we will calculate and apply the RecScore. */ if (attributes->opType == OP_FILTER || attributes->opType == OP_GENERATE) applyRecScore(recnode, slot, itemID, itemindex); if (projInfo) { /* * Form a projection tuple, store it in the result tuple slot * and return it --- unless we find we can project no tuples * from this scan tuple, in which case continue scan. */ resultSlot = ExecProject(projInfo, &isDone); if (isDone != ExprEndResult) { node->ps.ps_TupFromTlist = (isDone == ExprMultipleResult); return resultSlot; } } else { /* * Here, we aren't projecting, so just return scan tuple. */ return slot; } } else InstrCountFiltered1(node, 1); /* * Tuple fails qual, so free per-tuple memory and try again. */ ResetExprContext(econtext); ExecDropSingleTupleTableSlot(slot); } }
Datum partition_insert_trigger(PG_FUNCTION_ARGS) { TriggerData *trigdata = (TriggerData *) fcinfo->context; char *date_time; char child_table[sizeof(TABLE)+sizeof("2012_09_10")] = TABLE; int partition_field; Relation child_table_id; Oid child_table_oid; BulkInsertState bistate = GetBulkInsertState(); TupleTableSlot *slot; EState *estate = CreateExecutorState(); ResultRelInfo *resultRelInfo = makeNode(ResultRelInfo); List *recheckIndexes = NIL; /* make sure it's called as a trigger at all */ if (!CALLED_AS_TRIGGER(fcinfo)) elog(ERROR, "partition_insert_trigger: not called by trigger manager"); /* Sanity checks */ if (!TRIGGER_FIRED_BY_INSERT(trigdata->tg_event) || !TRIGGER_FIRED_BEFORE(trigdata->tg_event)) elog(ERROR, "partition_insert_trigger: not called on insert before"); #ifdef DEBUG elog(INFO, "Trigger Called for: %s", SPI_getrelname(trigdata->tg_relation)); #endif // Get the field number for the partition partition_field = SPI_fnumber(trigdata->tg_relation->rd_att, PARTITION_COLUMN); // Get the value for the partition_field date_time = SPI_getvalue(trigdata->tg_trigtuple, trigdata->tg_relation->rd_att, partition_field); //make sure date was specified if (!date_time) elog(ERROR, "You cannot insert data without specifying a value for the column %s", PARTITION_COLUMN); #ifdef DEBUG elog(INFO, "Trying to insert date_time=%s", date_time); #endif //add date_time, child_table_2012_01_23 strncpy(child_table + sizeof(TABLE) -1 , date_time, 4); //2012 child_table[sizeof(TABLE) + 3] = SEPARATOR; //2012_ strncpy(child_table + sizeof(TABLE) + 4 , date_time + 5, 2); //2012_01 child_table[sizeof(TABLE) + 6] = SEPARATOR; //2012_01_ strncpy(child_table + sizeof(TABLE) + 7 , date_time + 8, 2); //2012_01_23 #ifdef DEBUG elog(INFO, "New table will be %s", child_table); #endif pfree(date_time); //if you care about triggers on the child tables, call ExecBRInsertTriggers //don't care, continue //get the OID of the table we are looking for to insert child_table_oid = RelnameGetRelid(child_table); //Look for child child_table if (child_table_oid == InvalidOid){ elog(INFO, "partition_insert_trigger: Invalid child table %s, inserting data to main table %s", child_table, TABLE); return PointerGetDatum(trigdata->tg_trigtuple); } //get the descriptor of the table we are looking for child_table_id = RelationIdGetRelation(child_table_oid); //Get the child relation descriptor if (child_table_id == NULL){ elog(ERROR, "partition_insert_trigger: Failed to locate relation for child table %s, inserting data to main table %s", child_table, TABLE); return PointerGetDatum(trigdata->tg_trigtuple); } //set the resultRelInfo resultRelInfo->ri_RangeTableIndex = 1; /* dummy */ resultRelInfo->ri_RelationDesc = child_table_id; //setup the estate, not sure why estate->es_result_relations = resultRelInfo; estate->es_num_result_relations = 1; estate->es_result_relation_info = resultRelInfo; /* Set up a tuple slot not sure why yet */ slot = MakeSingleTupleTableSlot(trigdata->tg_relation->rd_att); ExecStoreTuple(trigdata->tg_trigtuple, slot, InvalidBuffer, false); //heap_insert(child_table_id, trigdata->tg_trigtuple, GetCurrentCommandId(true), use_wal, bistate); simple_heap_insert(child_table_id, trigdata->tg_trigtuple); if (resultRelInfo->ri_NumIndices > 0) recheckIndexes = ExecInsertIndexTuples(slot, &(trigdata->tg_trigtuple->t_self), estate); // not sure if this would work CatalogUpdateIndexes(child_table_id, trigdata->tg_trigtuple) //free the used memory list_free(recheckIndexes); ExecDropSingleTupleTableSlot(slot); //saw this somewhere :P RelationClose(child_table_id); //Must be called to free the relation memory page FreeBulkInsertState(bistate); // ? not sure if needed ? //If return next line will add to the regular table //return PointerGetDatum(trigdata->tg_trigtuple); //Return Null data, still have to figure out to return a proper X rows affected return PointerGetDatum(NULL); }
/* * Assumes that the segment file lock is already held. * Assumes that the segment file should be compacted. */ static bool AOCSSegmentFileFullCompaction(Relation aorel, AOCSInsertDesc insertDesc, AOCSFileSegInfo *fsinfo, Snapshot snapshot) { const char *relname; AppendOnlyVisimap visiMap; AOCSScanDesc scanDesc; TupleDesc tupDesc; TupleTableSlot *slot; int compact_segno; int64 movedTupleCount = 0; ResultRelInfo *resultRelInfo; MemTupleBinding *mt_bind; EState *estate; bool *proj; int i; AOTupleId *aoTupleId; int64 tupleCount = 0; int64 tuplePerPage = INT_MAX; Assert(Gp_role == GP_ROLE_EXECUTE || Gp_role == GP_ROLE_UTILITY); Assert(RelationIsAoCols(aorel)); Assert(insertDesc); compact_segno = fsinfo->segno; if (fsinfo->varblockcount > 0) { tuplePerPage = fsinfo->total_tupcount / fsinfo->varblockcount; } relname = RelationGetRelationName(aorel); AppendOnlyVisimap_Init(&visiMap, aorel->rd_appendonly->visimaprelid, aorel->rd_appendonly->visimapidxid, ShareLock, snapshot); elogif(Debug_appendonly_print_compaction, LOG, "Compact AO segfile %d, relation %sd", compact_segno, relname); proj = palloc0(sizeof(bool) * RelationGetNumberOfAttributes(aorel)); for (i = 0; i < RelationGetNumberOfAttributes(aorel); ++i) { proj[i] = true; } scanDesc = aocs_beginrangescan(aorel, snapshot, snapshot, &compact_segno, 1, NULL, proj); tupDesc = RelationGetDescr(aorel); slot = MakeSingleTupleTableSlot(tupDesc); mt_bind = create_memtuple_binding(tupDesc); /* * We need a ResultRelInfo and an EState so we can use the regular * executor's index-entry-making machinery. */ estate = CreateExecutorState(); resultRelInfo = makeNode(ResultRelInfo); resultRelInfo->ri_RangeTableIndex = 1; /* dummy */ resultRelInfo->ri_RelationDesc = aorel; resultRelInfo->ri_TrigDesc = NULL; /* we don't fire triggers */ ExecOpenIndices(resultRelInfo); estate->es_result_relations = resultRelInfo; estate->es_num_result_relations = 1; estate->es_result_relation_info = resultRelInfo; while (aocs_getnext(scanDesc, ForwardScanDirection, slot)) { CHECK_FOR_INTERRUPTS(); aoTupleId = (AOTupleId *) slot_get_ctid(slot); if (AppendOnlyVisimap_IsVisible(&scanDesc->visibilityMap, aoTupleId)) { AOCSMoveTuple(slot, insertDesc, resultRelInfo, estate); movedTupleCount++; } else { /* Tuple is invisible and needs to be dropped */ AppendOnlyThrowAwayTuple(aorel, slot, mt_bind); } /* * Check for vacuum delay point after approximatly a var block */ tupleCount++; if (VacuumCostActive && tupleCount % tuplePerPage == 0) { vacuum_delay_point(); } } SetAOCSFileSegInfoState(aorel, compact_segno, AOSEG_STATE_AWAITING_DROP); AppendOnlyVisimap_DeleteSegmentFile(&visiMap, compact_segno); /* Delete all mini pages of the segment files if block directory exists */ if (OidIsValid(aorel->rd_appendonly->blkdirrelid)) { AppendOnlyBlockDirectory_DeleteSegmentFile(aorel, snapshot, compact_segno, 0); } elogif(Debug_appendonly_print_compaction, LOG, "Finished compaction: " "AO segfile %d, relation %s, moved tuple count " INT64_FORMAT, compact_segno, relname, movedTupleCount); AppendOnlyVisimap_Finish(&visiMap, NoLock); ExecCloseIndices(resultRelInfo); FreeExecutorState(estate); ExecDropSingleTupleTableSlot(slot); destroy_memtuple_binding(mt_bind); aocs_endscan(scanDesc); pfree(proj); return true; }
/* * Assumes that the segment file lock is already held. * Assumes that the segment file should be compacted. * */ static void AppendOnlySegmentFileFullCompaction(Relation aorel, AppendOnlyEntry *aoEntry, AppendOnlyInsertDesc insertDesc, FileSegInfo* fsinfo) { const char* relname; AppendOnlyVisimap visiMap; AppendOnlyScanDesc scanDesc; TupleDesc tupDesc; MemTuple tuple; TupleTableSlot *slot; MemTupleBinding *mt_bind; int compact_segno; int64 movedTupleCount = 0; ResultRelInfo *resultRelInfo; EState *estate; AOTupleId *aoTupleId; int64 tupleCount = 0; int64 tuplePerPage = INT_MAX; Assert(Gp_role == GP_ROLE_EXECUTE || Gp_role == GP_ROLE_UTILITY); Assert(RelationIsAoRows(aorel)); Assert(insertDesc); compact_segno = fsinfo->segno; if (fsinfo->varblockcount > 0) { tuplePerPage = fsinfo->total_tupcount / fsinfo->varblockcount; } relname = RelationGetRelationName(aorel); AppendOnlyVisimap_Init(&visiMap, aoEntry->visimaprelid, aoEntry->visimapidxid, ShareUpdateExclusiveLock, SnapshotNow); elogif(Debug_appendonly_print_compaction, LOG, "Compact AO segno %d, relation %s, insert segno %d", compact_segno, relname, insertDesc->storageWrite.segmentFileNum); /* * Todo: We need to limit the scan to one file and we need to avoid to * lock the file again. * * We use SnapshotAny to get visible and invisible tuples. */ scanDesc = appendonly_beginrangescan(aorel, SnapshotAny, SnapshotNow, &compact_segno, 1, 0, NULL); tupDesc = RelationGetDescr(aorel); slot = MakeSingleTupleTableSlot(tupDesc); mt_bind = create_memtuple_binding(tupDesc); /* * We need a ResultRelInfo and an EState so we can use the regular * executor's index-entry-making machinery. */ estate = CreateExecutorState(); resultRelInfo = makeNode(ResultRelInfo); resultRelInfo->ri_RangeTableIndex = 1; /* dummy */ resultRelInfo->ri_RelationDesc = aorel; resultRelInfo->ri_TrigDesc = NULL; /* we don't fire triggers */ ExecOpenIndices(resultRelInfo); estate->es_result_relations = resultRelInfo; estate->es_num_result_relations = 1; estate->es_result_relation_info = resultRelInfo; /* * Go through all visible tuples and move them to a new segfile. */ while ((tuple = appendonly_getnext(scanDesc, ForwardScanDirection, slot)) != NULL) { /* Check interrupts as this may take time. */ CHECK_FOR_INTERRUPTS(); aoTupleId = (AOTupleId*)slot_get_ctid(slot); if (AppendOnlyVisimap_IsVisible(&scanDesc->visibilityMap, aoTupleId)) { AppendOnlyMoveTuple(tuple, slot, mt_bind, insertDesc, resultRelInfo, estate); movedTupleCount++; } else { /* Tuple is invisible and needs to be dropped */ AppendOnlyThrowAwayTuple(aorel, tuple, slot, mt_bind); } /* * Check for vacuum delay point after approximatly a var block */ tupleCount++; if (VacuumCostActive && tupleCount % tuplePerPage == 0) { vacuum_delay_point(); } } SetFileSegInfoState(aorel, aoEntry, compact_segno, AOSEG_STATE_AWAITING_DROP); AppendOnlyVisimap_DeleteSegmentFile(&visiMap, compact_segno); /* Delete all mini pages of the segment files if block directory exists */ if (OidIsValid(aoEntry->blkdirrelid)) { AppendOnlyBlockDirectory_DeleteSegmentFile( aoEntry, SnapshotNow, compact_segno, 0); } elogif(Debug_appendonly_print_compaction, LOG, "Finished compaction: " "AO segfile %d, relation %s, moved tuple count " INT64_FORMAT, compact_segno, relname, movedTupleCount); AppendOnlyVisimap_Finish(&visiMap, NoLock); ExecCloseIndices(resultRelInfo); FreeExecutorState(estate); ExecDropSingleTupleTableSlot(slot); destroy_memtuple_binding(mt_bind); appendonly_endscan(scanDesc); }