static void PersistentStore_DoInsertTuple( PersistentStoreData *storeData, PersistentStoreSharedData *storeSharedData, Relation persistentRel, /* The persistent table relation. */ Datum *values, bool flushToXLog, /* When true, the XLOG record for this change will be flushed to disk. */ ItemPointer persistentTid) /* TID of the stored tuple. */ { bool *nulls; HeapTuple persistentTuple = NULL; XLogRecPtr xlogInsertEndLoc; /* * In order to keep the tuples the exact same size to enable direct reuse of * free tuples, we do not use NULLs. */ nulls = (bool*)palloc0(storeData->numAttributes * sizeof(bool)); /* * Form the tuple. */ persistentTuple = heap_form_tuple(persistentRel->rd_att, values, nulls); if (!HeapTupleIsValid(persistentTuple)) elog(ERROR, "Failed to build persistent tuple ('%s')", storeData->tableName); frozen_heap_insert( persistentRel, persistentTuple); if (Debug_persistent_store_print) elog(PersistentStore_DebugPrintLevel(), "PersistentStore_DoInsertTuple: new insert TID %s ('%s')", ItemPointerToString2(&persistentTuple->t_self), storeData->tableName); /* * Return the TID of the INSERT tuple. * Return the XLOG location of the INSERT tuple's XLOG record. */ *persistentTid = persistentTuple->t_self; xlogInsertEndLoc = XLogLastInsertEndLoc(); heap_freetuple(persistentTuple); if (flushToXLog) { XLogFlush(xlogInsertEndLoc); XLogRecPtr_Zero(&nowaitXLogEndLoc); } else nowaitXLogEndLoc = xlogInsertEndLoc; pfree(nulls); }
/* * _bitmap_insert_lov() -- insert a new data into the given heap and index. */ void _bitmap_insert_lov(Relation lovHeap, Relation lovIndex, Datum *datum, bool *nulls, bool use_wal __attribute__((unused))) { TupleDesc tupDesc; HeapTuple tuple; bool result; Datum *indexDatum; bool *indexNulls; tupDesc = RelationGetDescr(lovHeap); /* insert this tuple into the heap */ tuple = heap_form_tuple(tupDesc, datum, nulls); frozen_heap_insert(lovHeap, tuple); /* insert a new tuple into the index */ indexDatum = palloc0((tupDesc->natts - 2) * sizeof(Datum)); indexNulls = palloc0((tupDesc->natts - 2) * sizeof(bool)); memcpy(indexDatum, datum, (tupDesc->natts - 2) * sizeof(Datum)); memcpy(indexNulls, nulls, (tupDesc->natts - 2) * sizeof(bool)); result = index_insert(lovIndex, indexDatum, indexNulls, &(tuple->t_self), lovHeap, true); pfree(indexDatum); pfree(indexNulls); Assert(result); heap_freetuple(tuple); }
/* * InsertFileSegInfo * * Adds an entry into the pg_paqseg_* table for this Parquet * relation. Use use frozen_heap_insert so the tuple is * frozen on insert. * * Also insert a new entry to gp_fastsequence for this segment file. */ void InsertInitialParquetSegnoEntry(AppendOnlyEntry *aoEntry, int segno) { Relation pg_parquetseg_rel; Relation pg_parquetseg_idx; TupleDesc pg_parquetseg_dsc; HeapTuple pg_parquetseg_tuple = NULL; int natts = 0; bool *nulls; Datum *values; ItemPointerData tid; Assert(aoEntry != NULL); InsertFastSequenceEntry(aoEntry->segrelid, (int64) segno, 0, &tid); if (segno == 0) { return; } pg_parquetseg_rel = heap_open(aoEntry->segrelid, RowExclusiveLock); pg_parquetseg_dsc = RelationGetDescr(pg_parquetseg_rel); natts = pg_parquetseg_dsc->natts; nulls = palloc(sizeof(bool) * natts); values = palloc0(sizeof(Datum) * natts); MemSet(nulls, 0, sizeof(char) * natts); if (Gp_role != GP_ROLE_EXECUTE) pg_parquetseg_idx = index_open(aoEntry->segidxid, RowExclusiveLock); else pg_parquetseg_idx = NULL; values[Anum_pg_parquetseg_segno - 1] = Int32GetDatum(segno); values[Anum_pg_parquetseg_tupcount - 1] = Float8GetDatum(0); values[Anum_pg_parquetseg_eof - 1] = Float8GetDatum(0); values[Anum_pg_parquetseg_eofuncompressed - 1] = Float8GetDatum(0); /* * form the tuple and insert it */ pg_parquetseg_tuple = heap_form_tuple(pg_parquetseg_dsc, values, nulls); if (!HeapTupleIsValid(pg_parquetseg_tuple)) elog(ERROR, "failed to build Parquet file segment tuple"); frozen_heap_insert(pg_parquetseg_rel, pg_parquetseg_tuple); if (Gp_role != GP_ROLE_EXECUTE) CatalogUpdateIndexes(pg_parquetseg_rel, pg_parquetseg_tuple); heap_freetuple(pg_parquetseg_tuple); if (Gp_role != GP_ROLE_EXECUTE) index_close(pg_parquetseg_idx, RowExclusiveLock); heap_close(pg_parquetseg_rel, RowExclusiveLock); }
/* * InsertIntoErrorTable * * Insert the information in cdbsreh into the error table we are using. * The destination is a regular heap table in a writer gang, and tuplestore * if it's a reader gang. The tuplestore data will be redirected to * the writer gang in the same session later. * By design the error table rows are inserted in a frozen fashion. */ void InsertIntoErrorTable(CdbSreh *cdbsreh) { HeapTuple tuple; tuple = FormErrorTuple(cdbsreh); /* store and freeze the tuple */ frozen_heap_insert(cdbsreh->errtbl, tuple); heap_freetuple(tuple); }
/* * insert or update the existing fast sequence number for (objid, objmod). * * If such an entry exists in the table, it is provided in oldTuple. This tuple * is updated with the new value. Otherwise, a new tuple is inserted into the * table. */ static void insert_or_update_fastsequence(Relation gp_fastsequence_rel, HeapTuple oldTuple, TupleDesc tupleDesc, Oid objid, int64 objmod, int64 newLastSequence) { Datum *values; bool *nulls; HeapTuple newTuple; values = palloc0(sizeof(Datum) * tupleDesc->natts); nulls = palloc0(sizeof(bool) * tupleDesc->natts); /* * If such a tuple does not exist, insert a new one. */ if (!HeapTupleIsValid(oldTuple)) { values[Anum_gp_fastsequence_objid - 1] = ObjectIdGetDatum(objid); values[Anum_gp_fastsequence_objmod - 1] = Int64GetDatum(objmod); values[Anum_gp_fastsequence_last_sequence - 1] = Int64GetDatum(newLastSequence); newTuple = heaptuple_form_to(tupleDesc, values, nulls, NULL, NULL); frozen_heap_insert(gp_fastsequence_rel, newTuple); CatalogUpdateIndexes(gp_fastsequence_rel, newTuple); heap_freetuple(newTuple); } else { #ifdef USE_ASSERT_CHECKING Oid oldObjid; int64 oldObjmod; bool isNull; oldObjid = heap_getattr(oldTuple, Anum_gp_fastsequence_objid, tupleDesc, &isNull); Assert(!isNull); oldObjmod = heap_getattr(oldTuple, Anum_gp_fastsequence_objmod, tupleDesc, &isNull); Assert(!isNull); Assert(oldObjid == objid && oldObjmod == objmod); #endif values[Anum_gp_fastsequence_objid - 1] = ObjectIdGetDatum(objid); values[Anum_gp_fastsequence_objmod - 1] = Int64GetDatum(objmod); values[Anum_gp_fastsequence_last_sequence - 1] = Int64GetDatum(newLastSequence); newTuple = heap_form_tuple(tupleDesc, values, nulls); newTuple->t_data->t_ctid = oldTuple->t_data->t_ctid; newTuple->t_self = oldTuple->t_self; if (tupleDesc->tdhasoid) HeapTupleSetOid(newTuple, HeapTupleGetOid(oldTuple)); heap_inplace_update(gp_fastsequence_rel, newTuple); heap_freetuple(newTuple); } pfree(values); pfree(nulls); }
/* ---------- * toast_save_datum - * * Save one single datum into the secondary relation and return * a Datum reference for it. * ---------- */ static Datum toast_save_datum(Relation rel, Datum value, bool isFrozen) { Relation toastrel; Relation toastidx; HeapTuple toasttup; TupleDesc toasttupDesc; Datum t_values[3]; bool t_isnull[3]; varattrib *result; struct { struct varlena hdr; char data[TOAST_MAX_CHUNK_SIZE]; /* make struct big enough */ int32 align_it; /* ensure struct is aligned well enough */ } chunk_data; int32 chunk_size; int32 chunk_seq = 0; char *data_p; int32 data_todo; int32 rawsize, extsize; /* * Open the toast relation and its index. We can use the index to check * uniqueness of the OID we assign to the toasted item, even though it has * additional columns besides OID. */ toastrel = heap_open(rel->rd_rel->reltoastrelid, RowExclusiveLock); toasttupDesc = toastrel->rd_att; toastidx = index_open(toastrel->rd_rel->reltoastidxid, RowExclusiveLock); /* * Create the varattrib reference */ result = (varattrib *) palloc(sizeof(varattrib)); /* rawsize is the size of the datum that will result after decompression -- * including the full header. so we have to adjust for short headers. * * extsize is the actual size of the data payload in the toast records * without any headers */ if (VARATT_IS_SHORT_D(value)) { rawsize = VARSIZE_SHORT_D(value) - VARHDRSZ_SHORT + VARHDRSZ; extsize = VARSIZE_SHORT_D(value) - VARHDRSZ_SHORT; data_p = VARDATA_SHORT_D(value); data_todo = VARSIZE_SHORT_D(value) - VARHDRSZ_SHORT; } else if (VARATT_IS_COMPRESSED_D(value)) { /* rawsize in a compressed datum is the just the size of the payload */ rawsize = ((varattrib *) DatumGetPointer(value))->va_compressed.va_rawsize + VARHDRSZ; extsize = VARSIZE_D(value) - VARHDRSZ; data_p = VARDATA_D(value); data_todo = VARSIZE_D(value) - VARHDRSZ; /* we used to set result->va_header |= VARATT_FLAG_COMPRESSED; down * below. we don't any longer and depend on the equality holding: * extsize = rawsize + VARHDRSZ*/ } else { rawsize = VARSIZE_D(value); extsize = VARSIZE_D(value) - VARHDRSZ; data_p = VARDATA_D(value); data_todo = VARSIZE_D(value) - VARHDRSZ; } SET_VARSIZE_EXTERNAL(result, TOAST_POINTER_SIZE); result->va_external.va_rawsize = rawsize; result->va_external.va_extsize = extsize; result->va_external.va_valueid = GetNewOidWithIndex(toastrel, toastidx); result->va_external.va_toastrelid = rel->rd_rel->reltoastrelid; #ifdef USE_ASSERT_CHECKING Assert( (VARATT_IS_COMPRESSED_D(value)||0) == (VARATT_EXTERNAL_IS_COMPRESSED(result)||0) ); if (VARATT_IS_COMPRESSED_D(value)) { Assert(VARATT_EXTERNAL_IS_COMPRESSED(result)); elog(DEBUG4, "saved toast datum, original varsize %ud rawsize %ud new extsize %ud rawsize %uld\n", VARSIZE_D(value), ((varattrib *) DatumGetPointer(value))->va_compressed.va_rawsize, result->va_external.va_extsize, result->va_external.va_rawsize); } else { Assert(!VARATT_EXTERNAL_IS_COMPRESSED(result)); elog(DEBUG4, "saved toast datum, original varsize %ud new extsize %ud rawsize %ud\n", VARSIZE_D(value), result->va_external.va_extsize, result->va_external.va_rawsize); } #endif /* * Initialize constant parts of the tuple data */ t_values[0] = ObjectIdGetDatum(result->va_external.va_valueid); t_values[2] = PointerGetDatum(&chunk_data); t_isnull[0] = false; t_isnull[1] = false; t_isnull[2] = false; /* * Split up the item into chunks */ while (data_todo > 0) { /* * Calculate the size of this chunk */ chunk_size = Min(TOAST_MAX_CHUNK_SIZE, data_todo); /* * Build a tuple and store it */ t_values[1] = Int32GetDatum(chunk_seq++); SET_VARSIZE(&chunk_data, chunk_size + VARHDRSZ); memcpy(VARDATA(&chunk_data), data_p, chunk_size); toasttup = heap_form_tuple(toasttupDesc, t_values, t_isnull); if (!HeapTupleIsValid(toasttup)) elog(ERROR, "failed to build TOAST tuple"); if(!isFrozen) { /* the normal case. regular insert */ simple_heap_insert(toastrel, toasttup); } else { /* insert and freeze the tuple. used for errtables and their related toast data */ frozen_heap_insert(toastrel, toasttup); } //heap_insert(relation, tup, GetCurrentCommandId(), // true, true, GetCurrentTransactionId()); /* * Create the index entry. We cheat a little here by not using * FormIndexDatum: this relies on the knowledge that the index columns * are the same as the initial columns of the table. * * Note also that there had better not be any user-created index on * the TOAST table, since we don't bother to update anything else. */ index_insert(toastidx, t_values, t_isnull, &(toasttup->t_self), toastrel, toastidx->rd_index->indisunique); /* * Free memory */ heap_freetuple(toasttup); /* * Move on to next chunk */ data_todo -= chunk_size; data_p += chunk_size; } /* * Done - close toast relation */ index_close(toastidx, RowExclusiveLock); heap_close(toastrel, RowExclusiveLock); return PointerGetDatum(result); }
/* * InsertFastSequenceEntry * * Insert a new fast sequence entry for a given object. If the given * object already exists in the table, this function replaces the old * entry with a fresh initial value. */ void InsertFastSequenceEntry(Oid objid, int64 objmod, int64 lastSequence) { Relation gp_fastsequence_rel; TupleDesc tupleDesc; int natts = 0; Datum *values; bool *nulls; HeapTuple tuple = NULL; cqContext cqc; /* * Open and lock the gp_fastsequence catalog table. */ gp_fastsequence_rel = heap_open(FastSequenceRelationId, RowExclusiveLock); tupleDesc = RelationGetDescr(gp_fastsequence_rel); tuple = caql_getfirst( caql_addrel(cqclr(&cqc), gp_fastsequence_rel), cql("SELECT * FROM gp_fastsequence " " WHERE objid = :1 " " AND objmod = :2 " " FOR UPDATE ", ObjectIdGetDatum(objid), Int64GetDatum(objmod))); if (!HeapTupleIsValid(tuple)) { natts = tupleDesc->natts; values = palloc0(sizeof(Datum) * natts); nulls = palloc0(sizeof(bool) * natts); values[Anum_gp_fastsequence_objid - 1] = ObjectIdGetDatum(objid); values[Anum_gp_fastsequence_objmod - 1] = Int64GetDatum(objmod); values[Anum_gp_fastsequence_last_sequence - 1] = Int64GetDatum(lastSequence); tuple = heaptuple_form_to(tupleDesc, values, nulls, NULL, NULL); frozen_heap_insert(gp_fastsequence_rel, tuple); CatalogUpdateIndexes(gp_fastsequence_rel, tuple); pfree(values); pfree(nulls); } else { update_fastsequence(gp_fastsequence_rel, tuple, tupleDesc, objid, objmod, lastSequence); } heap_freetuple(tuple); /* * gp_fastsequence table locking for AO inserts uses bottom up approach * meaning the locks are first acquired on the segments and later on the * master. * Hence, it is essential that we release the lock here to avoid * any form of master-segment resource deadlock. E.g. A transaction * trying to reindex gp_fastsequence has acquired a lock on it on the * master but is blocked on the segment as another transaction which * is an insert operation has acquired a lock first on segment and is * trying to acquire a lock on the Master. Deadlock! */ heap_close(gp_fastsequence_rel, RowExclusiveLock); }
static int64 PersistentBuild_BuildDb( Oid dbOid, bool mirrored) { int64 count = 0; Relation gp_global_sequence; Relation pg_database; HeapTuple tuple; HeapScanDesc scandesc; Form_pg_database form_pg_database; DatabaseInfo *info; Oid defaultTablespace; int t; bool collectGpRelationNodeInfo, collectAppendOnlyCatalogSegmentInfo; /* * Turn this on so we don't try to fetch persistence information from * gp_releation_node for gp_relation_node and its index until we've done the * assignment with PersistentRelation_AddCreated. */ gp_before_persistence_work = true; /* * If the gp_global_sequence table hasn't been populated yet then we need * to populate it before we can procede with building the rest of the * persistent tables. */ gp_global_sequence = heap_open(GpGlobalSequenceRelationId, RowExclusiveLock); scandesc = heap_beginscan(gp_global_sequence, SnapshotAny, 0, NULL); tuple = heap_getnext(scandesc, ForwardScanDirection); if (!HeapTupleIsValid(tuple)) { TupleDesc tupDesc; Datum values[Natts_gp_global_sequence]; bool nulls[Natts_gp_global_sequence]; /* Insert N frozen tuples of value 0 */ tupDesc = RelationGetDescr(gp_global_sequence); MemSet(nulls, false, sizeof(nulls)); values[Anum_gp_global_sequence_sequence_num-1] = Int64GetDatum(0); tuple = heap_form_tuple(tupDesc, values, nulls); if (!HeapTupleIsValid(tuple)) elog(ERROR, "failed to build global sequence tuple"); for (t = 0; t < GpGlobalSequence_MaxSequenceTid; t++) frozen_heap_insert(gp_global_sequence, tuple); } heap_endscan(scandesc); heap_close(gp_global_sequence, RowExclusiveLock); /* Lookup the information for the current database */ pg_database = heap_open(DatabaseRelationId, AccessShareLock); /* Fetch a copy of the tuple to scribble on */ tuple = SearchSysCacheCopy(DATABASEOID, ObjectIdGetDatum(dbOid), 0, 0, 0); if (!HeapTupleIsValid(tuple)) elog(ERROR, "could not find tuple for database %u", dbOid); form_pg_database = (Form_pg_database) GETSTRUCT(tuple); defaultTablespace = form_pg_database->dattablespace; if (Debug_persistent_print) elog(Persistent_DebugPrintLevel(), "PersistentBuild_BuildDb: dbOid %u, '%s'", dbOid, form_pg_database->datname.data); /* * Special call here to scan the persistent meta-data structures so we are open for * business and then we can add information. */ PersistentFileSysObj_BuildInitScan(); if (gp_upgrade_mode && (Gp_role == GP_ROLE_DISPATCH || Gp_role == GP_ROLE_UTILITY)){ collectGpRelationNodeInfo = false; collectAppendOnlyCatalogSegmentInfo = false; }else{ collectGpRelationNodeInfo = true; collectAppendOnlyCatalogSegmentInfo = true; } info = DatabaseInfo_Collect( dbOid, defaultTablespace, collectGpRelationNodeInfo, collectAppendOnlyCatalogSegmentInfo, /* scanFileSystem */ true); for (t = 0; t < info->tablespacesCount; t++) { Oid tablespace = info->tablespaces[t]; DbDirNode dbDirNode; ItemPointerData persistentTid; if (tablespace == GLOBALTABLESPACE_OID) continue; dbDirNode.tablespace = tablespace; dbDirNode.database = dbOid; PersistentDatabase_AddCreated( &dbDirNode, &persistentTid, /* flushToXLog */ false); } PersistentBuild_PopulateGpRelationNode( info, defaultTablespace, &count); heap_close(pg_database, AccessShareLock); gp_before_persistence_work = false; /* * Since we have written XLOG records with <persistentTid, * persistentSerialNum> of zeroes because of the gp_before_persistence_work * GUC, lets do a checkpoint to force out all buffer pool pages so we never * try to redo those XLOG records in Crash Recovery. */ CreateCheckPoint(false, true); return count; }