/* * Update the eof and filetupcount of a parquet table. */ void UpdateParquetFileSegInfo(Relation parentrel, AppendOnlyEntry *aoEntry, int segno, int64 eof, int64 eof_uncompressed, int64 tuples_added) { LockAcquireResult acquireResult; Relation pg_parquetseg_rel; TupleDesc pg_parquetseg_dsc; ScanKeyData key[1]; SysScanDesc parquetscan; HeapTuple tuple, new_tuple; Datum filetupcount; Datum new_tuple_count; Datum *new_record; bool *new_record_nulls; bool *new_record_repl; bool isNull; /* overflow sanity checks. don't check the same for tuples_added, * it may be coming as a negative diff from gp_update_ao_master_stats */ Assert(eof >= 0); Insist(Gp_role != GP_ROLE_EXECUTE); elog(DEBUG3, "UpdateParquetFileSegInfo called. segno = %d", segno); if (Gp_role != GP_ROLE_DISPATCH) { /* * Verify we already have the write-lock! */ acquireResult = LockRelationAppendOnlySegmentFile( &parentrel->rd_node, segno, AccessExclusiveLock, /* dontWait */ false); if (acquireResult != LOCKACQUIRE_ALREADY_HELD) { elog(ERROR, "Should already have the (transaction-scope) write-lock on Parquet segment file #%d, " "relation %s", segno, RelationGetRelationName(parentrel)); } } /* * Open the aoseg relation and its index. */ pg_parquetseg_rel = heap_open(aoEntry->segrelid, RowExclusiveLock); pg_parquetseg_dsc = pg_parquetseg_rel->rd_att; /* * Setup a scan key to fetch from the index by segno. */ ScanKeyInit(&key[0], (AttrNumber) Anum_pg_parquetseg_segno, BTEqualStrategyNumber, F_INT4EQ, Int32GetDatum(segno)); parquetscan = systable_beginscan(pg_parquetseg_rel, aoEntry->segidxid, TRUE, SnapshotNow, 1, &key[0]); tuple = systable_getnext(parquetscan); if (!HeapTupleIsValid(tuple)) ereport(ERROR, (errcode(ERRCODE_UNDEFINED_OBJECT), errmsg("parquet table \"%s\" file segment \"%d\" entry " "does not exist", RelationGetRelationName(parentrel), segno))); new_record = palloc0(sizeof(Datum) * pg_parquetseg_dsc->natts); new_record_nulls = palloc0(sizeof(bool) * pg_parquetseg_dsc->natts); new_record_repl = palloc0(sizeof(bool) * pg_parquetseg_dsc->natts); /* get the current tuple count so we can add to it */ filetupcount = fastgetattr(tuple, Anum_pg_parquetseg_tupcount, pg_parquetseg_dsc, &isNull); if(isNull) ereport(ERROR, (errcode(ERRCODE_UNDEFINED_OBJECT), errmsg("got invalid pg_aoseg filetupcount value: NULL"))); /* calculate the new tuple count */ new_tuple_count = DirectFunctionCall2(float8pl, filetupcount, Float8GetDatum((float8)tuples_added)); /* * Build a tuple to update */ new_record[Anum_pg_parquetseg_eof - 1] = Float8GetDatum((float8)eof); new_record_repl[Anum_pg_parquetseg_eof - 1] = true; new_record[Anum_pg_parquetseg_tupcount - 1] = new_tuple_count; new_record_repl[Anum_pg_parquetseg_tupcount - 1] = true; new_record[Anum_pg_parquetseg_eofuncompressed - 1] = Float8GetDatum((float8)eof_uncompressed); new_record_repl[Anum_pg_parquetseg_eofuncompressed - 1] = true; /* * update the tuple in the pg_aoseg table */ new_tuple = heap_modify_tuple(tuple, pg_parquetseg_dsc, new_record, new_record_nulls, new_record_repl); simple_heap_update(pg_parquetseg_rel, &tuple->t_self, new_tuple); CatalogUpdateIndexes(pg_parquetseg_rel, new_tuple); heap_freetuple(new_tuple); /* Finish up scan */ systable_endscan(parquetscan); heap_close(pg_parquetseg_rel, RowExclusiveLock); pfree(new_record); pfree(new_record_nulls); pfree(new_record_repl); }
int inv_write(LargeObjectDesc *obj_desc, const char *buf, int nbytes) { int nwritten = 0; int n; int off; int len; int32 pageno = (int32) (obj_desc->offset / LOBLKSIZE); ScanKeyData skey[2]; SysScanDesc sd; HeapTuple oldtuple; Form_pg_largeobject olddata; bool neednextpage; bytea *datafield; bool pfreeit; struct { bytea hdr; char data[LOBLKSIZE]; /* make struct big enough */ int32 align_it; /* ensure struct is aligned well enough */ } workbuf; char *workb = VARDATA(&workbuf.hdr); HeapTuple newtup; Datum values[Natts_pg_largeobject]; bool nulls[Natts_pg_largeobject]; bool replace[Natts_pg_largeobject]; CatalogIndexState indstate; Assert(PointerIsValid(obj_desc)); Assert(buf != NULL); /* enforce writability because snapshot is probably wrong otherwise */ if ((obj_desc->flags & IFS_WRLOCK) == 0) ereport(ERROR, (errcode(ERRCODE_OBJECT_NOT_IN_PREREQUISITE_STATE), errmsg("large object %u was not opened for writing", obj_desc->id))); /* check existence of the target largeobject */ if (!LargeObjectExists(obj_desc->id)) ereport(ERROR, (errcode(ERRCODE_UNDEFINED_OBJECT), errmsg("large object %u was already dropped", obj_desc->id))); if (nbytes <= 0) return 0; open_lo_relation(); indstate = CatalogOpenIndexes(lo_heap_r); ScanKeyInit(&skey[0], Anum_pg_largeobject_loid, BTEqualStrategyNumber, F_OIDEQ, ObjectIdGetDatum(obj_desc->id)); ScanKeyInit(&skey[1], Anum_pg_largeobject_pageno, BTGreaterEqualStrategyNumber, F_INT4GE, Int32GetDatum(pageno)); sd = systable_beginscan_ordered(lo_heap_r, lo_index_r, obj_desc->snapshot, 2, skey); oldtuple = NULL; olddata = NULL; neednextpage = true; while (nwritten < nbytes) { /* * If possible, get next pre-existing page of the LO. We expect the * indexscan will deliver these in order --- but there may be holes. */ if (neednextpage) { if ((oldtuple = systable_getnext_ordered(sd, ForwardScanDirection)) != NULL) { if (HeapTupleHasNulls(oldtuple)) /* paranoia */ elog(ERROR, "null field found in pg_largeobject"); olddata = (Form_pg_largeobject) GETSTRUCT(oldtuple); Assert(olddata->pageno >= pageno); } neednextpage = false; } /* * If we have a pre-existing page, see if it is the page we want to * write, or a later one. */ if (olddata != NULL && olddata->pageno == pageno) { /* * Update an existing page with fresh data. * * First, load old data into workbuf */ datafield = &(olddata->data); /* see note at top of file */ pfreeit = false; if (VARATT_IS_EXTENDED(datafield)) { datafield = (bytea *) heap_tuple_untoast_attr((struct varlena *) datafield); pfreeit = true; } len = getbytealen(datafield); Assert(len <= LOBLKSIZE); memcpy(workb, VARDATA(datafield), len); if (pfreeit) pfree(datafield); /* * Fill any hole */ off = (int) (obj_desc->offset % LOBLKSIZE); if (off > len) MemSet(workb + len, 0, off - len); /* * Insert appropriate portion of new data */ n = LOBLKSIZE - off; n = (n <= (nbytes - nwritten)) ? n : (nbytes - nwritten); memcpy(workb + off, buf + nwritten, n); nwritten += n; obj_desc->offset += n; off += n; /* compute valid length of new page */ len = (len >= off) ? len : off; SET_VARSIZE(&workbuf.hdr, len + VARHDRSZ); /* * Form and insert updated tuple */ memset(values, 0, sizeof(values)); memset(nulls, false, sizeof(nulls)); memset(replace, false, sizeof(replace)); values[Anum_pg_largeobject_data - 1] = PointerGetDatum(&workbuf); replace[Anum_pg_largeobject_data - 1] = true; newtup = heap_modify_tuple(oldtuple, RelationGetDescr(lo_heap_r), values, nulls, replace); simple_heap_update(lo_heap_r, &newtup->t_self, newtup); CatalogIndexInsert(indstate, newtup); heap_freetuple(newtup); /* * We're done with this old page. */ oldtuple = NULL; olddata = NULL; neednextpage = true; } else { /* * Write a brand new page. * * First, fill any hole */ off = (int) (obj_desc->offset % LOBLKSIZE); if (off > 0) MemSet(workb, 0, off); /* * Insert appropriate portion of new data */ n = LOBLKSIZE - off; n = (n <= (nbytes - nwritten)) ? n : (nbytes - nwritten); memcpy(workb + off, buf + nwritten, n); nwritten += n; obj_desc->offset += n; /* compute valid length of new page */ len = off + n; SET_VARSIZE(&workbuf.hdr, len + VARHDRSZ); /* * Form and insert updated tuple */ memset(values, 0, sizeof(values)); memset(nulls, false, sizeof(nulls)); values[Anum_pg_largeobject_loid - 1] = ObjectIdGetDatum(obj_desc->id); values[Anum_pg_largeobject_pageno - 1] = Int32GetDatum(pageno); values[Anum_pg_largeobject_data - 1] = PointerGetDatum(&workbuf); newtup = heap_form_tuple(lo_heap_r->rd_att, values, nulls); simple_heap_insert(lo_heap_r, newtup); CatalogIndexInsert(indstate, newtup); heap_freetuple(newtup); } pageno++; } systable_endscan_ordered(sd); CatalogCloseIndexes(indstate); /* * Advance command counter so that my tuple updates will be seen by later * large-object operations in this transaction. */ CommandCounterIncrement(); return nwritten; }
/* * Select a nonconflicting name for a new constraint. * * The objective here is to choose a name that is unique within the * specified namespace. Postgres does not require this, but the SQL * spec does, and some apps depend on it. Therefore we avoid choosing * default names that so conflict. * * name1, name2, and label are used the same way as for makeObjectName(), * except that the label can't be NULL; digits will be appended to the label * if needed to create a name that is unique within the specified namespace. * * 'others' can be a list of string names already chosen within the current * command (but not yet reflected into the catalogs); we will not choose * a duplicate of one of these either. * * Note: it is theoretically possible to get a collision anyway, if someone * else chooses the same name concurrently. This is fairly unlikely to be * a problem in practice, especially if one is holding an exclusive lock on * the relation identified by name1. * * Returns a palloc'd string. */ char * ChooseConstraintName(const char *name1, const char *name2, const char *label, Oid namespaceid, List *others) { int pass = 0; char *conname = NULL; char modlabel[NAMEDATALEN]; Relation conDesc; SysScanDesc conscan; ScanKeyData skey[2]; bool found; ListCell *l; conDesc = heap_open(ConstraintRelationId, AccessShareLock); /* try the unmodified label first */ StrNCpy(modlabel, label, sizeof(modlabel)); for (;;) { conname = makeObjectName(name1, name2, modlabel); found = false; foreach(l, others) { if (strcmp((char *) lfirst(l), conname) == 0) { found = true; break; } } if (!found) { ScanKeyInit(&skey[0], Anum_pg_constraint_conname, BTEqualStrategyNumber, F_NAMEEQ, CStringGetDatum(conname)); ScanKeyInit(&skey[1], Anum_pg_constraint_connamespace, BTEqualStrategyNumber, F_OIDEQ, ObjectIdGetDatum(namespaceid)); conscan = systable_beginscan(conDesc, ConstraintNameNspIndexId, true, SnapshotNow, 2, skey); found = (HeapTupleIsValid(systable_getnext(conscan))); systable_endscan(conscan); } if (!found) break; /* found a conflict, so try a new name component */ pfree(conname); snprintf(modlabel, sizeof(modlabel), "%s%d", label, ++pass); } heap_close(conDesc, AccessShareLock); return conname; }
/* * checkSharedDependencies * * Check whether there are shared dependency entries for a given shared * object; return true if so. * * In addition, return a string containing a newline-separated list of object * descriptions that depend on the shared object, or NULL if none is found. * We actually return two such strings; the "detail" result is suitable for * returning to the client as an errdetail() string, and is limited in size. * The "detail_log" string is potentially much longer, and should be emitted * to the server log only. * * We can find three different kinds of dependencies: dependencies on objects * of the current database; dependencies on shared objects; and dependencies * on objects local to other databases. We can (and do) provide descriptions * of the two former kinds of objects, but we can't do that for "remote" * objects, so we just provide a count of them. * * If we find a SHARED_DEPENDENCY_PIN entry, we can error out early. */ bool checkSharedDependencies(Oid classId, Oid objectId, char **detail_msg, char **detail_log_msg) { Relation sdepRel; ScanKeyData key[2]; SysScanDesc scan; HeapTuple tup; int numReportedDeps = 0; int numNotReportedDeps = 0; int numNotReportedDbs = 0; List *remDeps = NIL; ListCell *cell; ObjectAddress object; StringInfoData descs; StringInfoData alldescs; /* * We limit the number of dependencies reported to the client to * MAX_REPORTED_DEPS, since client software may not deal well with * enormous error strings. The server log always gets a full report. */ #define MAX_REPORTED_DEPS 100 initStringInfo(&descs); initStringInfo(&alldescs); sdepRel = heap_open(SharedDependRelationId, AccessShareLock); ScanKeyInit(&key[0], Anum_pg_shdepend_refclassid, BTEqualStrategyNumber, F_OIDEQ, ObjectIdGetDatum(classId)); ScanKeyInit(&key[1], Anum_pg_shdepend_refobjid, BTEqualStrategyNumber, F_OIDEQ, ObjectIdGetDatum(objectId)); scan = systable_beginscan(sdepRel, SharedDependReferenceIndexId, true, NULL, 2, key); while (HeapTupleIsValid(tup = systable_getnext(scan))) { Form_pg_shdepend sdepForm = (Form_pg_shdepend) GETSTRUCT(tup); /* This case can be dispatched quickly */ if (sdepForm->deptype == SHARED_DEPENDENCY_PIN) { object.classId = classId; object.objectId = objectId; object.objectSubId = 0; ereport(ERROR, (errcode(ERRCODE_DEPENDENT_OBJECTS_STILL_EXIST), errmsg("cannot drop %s because it is required by the database system", getObjectDescription(&object)))); } object.classId = sdepForm->classid; object.objectId = sdepForm->objid; object.objectSubId = sdepForm->objsubid; /* * If it's a dependency local to this database or it's a shared * object, describe it. * * If it's a remote dependency, keep track of it so we can report the * number of them later. */ if (sdepForm->dbid == MyDatabaseId) { if (numReportedDeps < MAX_REPORTED_DEPS) { numReportedDeps++; storeObjectDescription(&descs, LOCAL_OBJECT, &object, sdepForm->deptype, 0); } else numNotReportedDeps++; storeObjectDescription(&alldescs, LOCAL_OBJECT, &object, sdepForm->deptype, 0); } else if (sdepForm->dbid == InvalidOid) { if (numReportedDeps < MAX_REPORTED_DEPS) { numReportedDeps++; storeObjectDescription(&descs, SHARED_OBJECT, &object, sdepForm->deptype, 0); } else numNotReportedDeps++; storeObjectDescription(&alldescs, SHARED_OBJECT, &object, sdepForm->deptype, 0); } else { /* It's not local nor shared, so it must be remote. */ remoteDep *dep; bool stored = false; /* * XXX this info is kept on a simple List. Maybe it's not good * for performance, but using a hash table seems needlessly * complex. The expected number of databases is not high anyway, * I suppose. */ foreach(cell, remDeps) { dep = lfirst(cell); if (dep->dbOid == sdepForm->dbid) { dep->count++; stored = true; break; } } if (!stored) { dep = (remoteDep *) palloc(sizeof(remoteDep)); dep->dbOid = sdepForm->dbid; dep->count = 1; remDeps = lappend(remDeps, dep); } } }
/* * Adjust dependency record(s) to point to a different object of the same type * * classId/objectId specify the referencing object. * refClassId/oldRefObjectId specify the old referenced object. * newRefObjectId is the new referenced object (must be of class refClassId). * * Note the lack of objsubid parameters. If there are subobject references * they will all be readjusted. * * Returns the number of records updated. */ long changeDependencyFor(Oid classId, Oid objectId, Oid refClassId, Oid oldRefObjectId, Oid newRefObjectId) { long count = 0; Relation depRel; ScanKeyData key[2]; SysScanDesc scan; HeapTuple tup; ObjectAddress objAddr; bool newIsPinned; depRel = heap_open(DependRelationId, RowExclusiveLock); /* * If oldRefObjectId is pinned, there won't be any dependency entries on * it --- we can't cope in that case. (This isn't really worth expending * code to fix, in current usage; it just means you can't rename stuff out * of pg_catalog, which would likely be a bad move anyway.) */ objAddr.classId = refClassId; objAddr.objectId = oldRefObjectId; objAddr.objectSubId = 0; if (isObjectPinned(&objAddr, depRel)) ereport(ERROR, (errcode(ERRCODE_FEATURE_NOT_SUPPORTED), errmsg("cannot remove dependency on %s because it is a system object", getObjectDescription(&objAddr)))); /* * We can handle adding a dependency on something pinned, though, since * that just means deleting the dependency entry. */ objAddr.objectId = newRefObjectId; newIsPinned = isObjectPinned(&objAddr, depRel); /* Now search for dependency records */ ScanKeyInit(&key[0], Anum_pg_depend_classid, BTEqualStrategyNumber, F_OIDEQ, ObjectIdGetDatum(classId)); ScanKeyInit(&key[1], Anum_pg_depend_objid, BTEqualStrategyNumber, F_OIDEQ, ObjectIdGetDatum(objectId)); scan = systable_beginscan(depRel, DependDependerIndexId, true, NULL, 2, key); while (HeapTupleIsValid((tup = systable_getnext(scan)))) { Form_pg_depend depform = (Form_pg_depend) GETSTRUCT(tup); if (depform->refclassid == refClassId && depform->refobjid == oldRefObjectId) { if (newIsPinned) simple_heap_delete(depRel, &tup->t_self); else { /* make a modifiable copy */ tup = heap_copytuple(tup); depform = (Form_pg_depend) GETSTRUCT(tup); depform->refobjid = newRefObjectId; simple_heap_update(depRel, &tup->t_self, tup); CatalogUpdateIndexes(depRel, tup); heap_freetuple(tup); } count++; } } systable_endscan(scan); heap_close(depRel, RowExclusiveLock); return count; }
/* ---------- * toast_fetch_datum_slice - * * Reconstruct a segment of a varattrib from the chunks saved * in the toast relation * ---------- */ static varattrib * toast_fetch_datum_slice(varattrib *attr, int32 sliceoffset, int32 length) { Relation toastrel; Relation toastidx; ScanKeyData toastkey[3]; int nscankeys; IndexScanDesc toastscan; HeapTuple ttup; TupleDesc toasttupDesc; varattrib *result; int32 attrsize; int32 residx; int32 nextidx; int numchunks; int startchunk; int endchunk; int32 startoffset; int32 endoffset; int totalchunks; Pointer chunk; bool isnull; int32 chunksize; int32 chcpystrt; int32 chcpyend; attrsize = attr->va_content.va_external.va_extsize; totalchunks = ((attrsize - 1) / TOAST_MAX_CHUNK_SIZE) + 1; if (sliceoffset >= attrsize) { sliceoffset = 0; length = 0; } if (((sliceoffset + length) > attrsize) || length < 0) length = attrsize - sliceoffset; result = (varattrib *) palloc(length + VARHDRSZ); VARATT_SIZEP(result) = length + VARHDRSZ; if (VARATT_IS_COMPRESSED(attr)) VARATT_SIZEP(result) |= VARATT_FLAG_COMPRESSED; if (length == 0) return (result); /* Can save a lot of work at this point! */ startchunk = sliceoffset / TOAST_MAX_CHUNK_SIZE; endchunk = (sliceoffset + length - 1) / TOAST_MAX_CHUNK_SIZE; numchunks = (endchunk - startchunk) + 1; startoffset = sliceoffset % TOAST_MAX_CHUNK_SIZE; endoffset = (sliceoffset + length - 1) % TOAST_MAX_CHUNK_SIZE; /* * Open the toast relation and it's index */ toastrel = heap_open(attr->va_content.va_external.va_toastrelid, AccessShareLock); toasttupDesc = toastrel->rd_att; toastidx = index_open(toastrel->rd_rel->reltoastidxid); /* * Setup a scan key to fetch from the index. This is either two keys or * three depending on the number of chunks. */ ScanKeyInit(&toastkey[0], (AttrNumber) 1, BTEqualStrategyNumber, F_OIDEQ, ObjectIdGetDatum(attr->va_content.va_external.va_valueid)); /* * Use equality condition for one chunk, a range condition otherwise: */ if (numchunks == 1) { ScanKeyInit(&toastkey[1], (AttrNumber) 2, BTEqualStrategyNumber, F_INT4EQ, Int32GetDatum(startchunk)); nscankeys = 2; } else { ScanKeyInit(&toastkey[1], (AttrNumber) 2, BTGreaterEqualStrategyNumber, F_INT4GE, Int32GetDatum(startchunk)); ScanKeyInit(&toastkey[2], (AttrNumber) 2, BTLessEqualStrategyNumber, F_INT4LE, Int32GetDatum(endchunk)); nscankeys = 3; } /* * Read the chunks by index * * The index is on (valueid, chunkidx) so they will come in order */ nextidx = startchunk; toastscan = index_beginscan(toastrel, toastidx, SnapshotToast, nscankeys, toastkey); while ((ttup = index_getnext(toastscan, ForwardScanDirection)) != NULL) { /* * Have a chunk, extract the sequence number and the data */ residx = DatumGetInt32(fastgetattr(ttup, 2, toasttupDesc, &isnull)); Assert(!isnull); chunk = DatumGetPointer(fastgetattr(ttup, 3, toasttupDesc, &isnull)); Assert(!isnull); chunksize = VARATT_SIZE(chunk) - VARHDRSZ; /* * Some checks on the data we've found */ if ((residx != nextidx) || (residx > endchunk) || (residx < startchunk)) elog(ERROR, "unexpected chunk number %d (expected %d) for toast value %u", residx, nextidx, attr->va_content.va_external.va_valueid); if (residx < totalchunks - 1) { if (chunksize != TOAST_MAX_CHUNK_SIZE) elog(ERROR, "unexpected chunk size %d in chunk %d for toast value %u", chunksize, residx, attr->va_content.va_external.va_valueid); } else { if ((residx * TOAST_MAX_CHUNK_SIZE + chunksize) != attrsize) elog(ERROR, "unexpected chunk size %d in chunk %d for toast value %u", chunksize, residx, attr->va_content.va_external.va_valueid); } /* * Copy the data into proper place in our result */ chcpystrt = 0; chcpyend = chunksize - 1; if (residx == startchunk) chcpystrt = startoffset; if (residx == endchunk) chcpyend = endoffset; memcpy(((char *) VARATT_DATA(result)) + (residx * TOAST_MAX_CHUNK_SIZE - sliceoffset) + chcpystrt, VARATT_DATA(chunk) + chcpystrt, (chcpyend - chcpystrt) + 1); nextidx++; } /* * Final checks that we successfully fetched the datum */ if (nextidx != (endchunk + 1)) elog(ERROR, "missing chunk number %d for toast value %u", nextidx, attr->va_content.va_external.va_valueid); /* * End scan and close relations */ index_endscan(toastscan); index_close(toastidx); heap_close(toastrel, AccessShareLock); return result; }
/* * GetNewSequenceRelationOid * Get a sequence relation Oid and verify it is valid against * the pg_class relation by doing an index lookup. The caller * should have a suitable lock on pg_class. */ Oid GetNewSequenceRelationOid(Relation relation) { Oid newOid; Oid oidIndex; Relation indexrel; SnapshotData SnapshotDirty; IndexScanDesc scan; ScanKeyData key; bool collides; RelFileNode rnode; char *rpath; int fd; /* This should match RelationInitPhysicalAddr */ rnode.spcNode = relation->rd_rel->reltablespace ? relation->rd_rel->reltablespace : MyDatabaseTableSpace; rnode.dbNode = relation->rd_rel->relisshared ? InvalidOid : MyDatabaseId; /* We should only be using pg_class */ Assert(RelationGetRelid(relation) == RelationRelationId); /* The relcache will cache the identity of the OID index for us */ oidIndex = RelationGetOidIndex(relation); /* Otherwise, use the index to find a nonconflicting OID */ indexrel = index_open(oidIndex, AccessShareLock); InitDirtySnapshot(SnapshotDirty); /* Generate new sequence relation OIDs until we find one not in the table */ do { CHECK_FOR_INTERRUPTS(); newOid = GetNewSequenceRelationObjectId(); ScanKeyInit(&key, (AttrNumber) 1, BTEqualStrategyNumber, F_OIDEQ, ObjectIdGetDatum(newOid)); /* see notes above about using SnapshotDirty */ scan = index_beginscan(relation, indexrel, &SnapshotDirty, 1, &key); collides = HeapTupleIsValid(index_getnext(scan, ForwardScanDirection)); index_endscan(scan); if (!collides) { /* Check for existing file of same name */ rpath = relpath(rnode); fd = BasicOpenFile(rpath, O_RDONLY | PG_BINARY, 0); if (fd >= 0) { /* definite collision */ gp_retry_close(fd); collides = true; } else { /* * Here we have a little bit of a dilemma: if errno is something * other than ENOENT, should we declare a collision and loop? In * particular one might think this advisable for, say, EPERM. * However there really shouldn't be any unreadable files in a * tablespace directory, and if the EPERM is actually complaining * that we can't read the directory itself, we'd be in an infinite * loop. In practice it seems best to go ahead regardless of the * errno. If there is a colliding file we will get an smgr * failure when we attempt to create the new relation file. */ collides = false; } } /* * Also check that the OID hasn't been pre-assigned for a different * relation. * * We're a bit sloppy between OIDs and relfilenodes here; it would be * OK to use a value that's been reserved for use as a type or * relation OID here, as long as the relfilenode is free. But there's * no harm in skipping over those too, so we don't bother to * distinguish them. */ if (!collides && !IsOidAcceptable(newOid)) collides = true; } while (collides); index_close(indexrel, AccessShareLock); return newOid; }
/* * Change tablespace owner */ void AlterTableSpaceOwner(const char *name, Oid newOwnerId) { Relation rel; ScanKeyData entry[1]; HeapScanDesc scandesc; Form_pg_tablespace spcForm; HeapTuple tup; /* Search pg_tablespace */ rel = heap_open(TableSpaceRelationId, RowExclusiveLock); ScanKeyInit(&entry[0], Anum_pg_tablespace_spcname, BTEqualStrategyNumber, F_NAMEEQ, CStringGetDatum(name)); scandesc = heap_beginscan(rel, SnapshotNow, 1, entry); tup = heap_getnext(scandesc, ForwardScanDirection); if (!HeapTupleIsValid(tup)) ereport(ERROR, (errcode(ERRCODE_UNDEFINED_OBJECT), errmsg("tablespace \"%s\" does not exist", name))); spcForm = (Form_pg_tablespace) GETSTRUCT(tup); /* * If the new owner is the same as the existing owner, consider the * command to have succeeded. This is for dump restoration purposes. */ if (spcForm->spcowner != newOwnerId) { Datum repl_val[Natts_pg_tablespace]; bool repl_null[Natts_pg_tablespace]; bool repl_repl[Natts_pg_tablespace]; Acl *newAcl; Datum aclDatum; bool isNull; HeapTuple newtuple; /* Otherwise, must be owner of the existing object */ if (!pg_tablespace_ownercheck(HeapTupleGetOid(tup), GetUserId())) aclcheck_error(ACLCHECK_NOT_OWNER, ACL_KIND_TABLESPACE, name); /* Must be able to become new owner */ check_is_member_of_role(GetUserId(), newOwnerId); /* * Normally we would also check for create permissions here, but there * are none for tablespaces so we follow what rename tablespace does * and omit the create permissions check. * * NOTE: Only superusers may create tablespaces to begin with and so * initially only a superuser would be able to change its ownership * anyway. */ memset(repl_null, false, sizeof(repl_null)); memset(repl_repl, false, sizeof(repl_repl)); repl_repl[Anum_pg_tablespace_spcowner - 1] = true; repl_val[Anum_pg_tablespace_spcowner - 1] = ObjectIdGetDatum(newOwnerId); /* * Determine the modified ACL for the new owner. This is only * necessary when the ACL is non-null. */ aclDatum = heap_getattr(tup, Anum_pg_tablespace_spcacl, RelationGetDescr(rel), &isNull); if (!isNull) { newAcl = aclnewowner(DatumGetAclP(aclDatum), spcForm->spcowner, newOwnerId); repl_repl[Anum_pg_tablespace_spcacl - 1] = true; repl_val[Anum_pg_tablespace_spcacl - 1] = PointerGetDatum(newAcl); } newtuple = heap_modify_tuple(tup, RelationGetDescr(rel), repl_val, repl_null, repl_repl); simple_heap_update(rel, &newtuple->t_self, newtuple); CatalogUpdateIndexes(rel, newtuple); heap_freetuple(newtuple); /* Update owner dependency reference */ changeDependencyOnOwner(TableSpaceRelationId, HeapTupleGetOid(tup), newOwnerId); } heap_endscan(scandesc); heap_close(rel, NoLock); }
/* * Alter table space options */ void AlterTableSpaceOptions(AlterTableSpaceOptionsStmt *stmt) { Relation rel; ScanKeyData entry[1]; HeapScanDesc scandesc; HeapTuple tup; Datum datum; Datum newOptions; Datum repl_val[Natts_pg_tablespace]; bool isnull; bool repl_null[Natts_pg_tablespace]; bool repl_repl[Natts_pg_tablespace]; HeapTuple newtuple; /* Search pg_tablespace */ rel = heap_open(TableSpaceRelationId, RowExclusiveLock); ScanKeyInit(&entry[0], Anum_pg_tablespace_spcname, BTEqualStrategyNumber, F_NAMEEQ, CStringGetDatum(stmt->tablespacename)); scandesc = heap_beginscan(rel, SnapshotNow, 1, entry); tup = heap_getnext(scandesc, ForwardScanDirection); if (!HeapTupleIsValid(tup)) ereport(ERROR, (errcode(ERRCODE_UNDEFINED_OBJECT), errmsg("tablespace \"%s\" does not exist", stmt->tablespacename))); /* Must be owner of the existing object */ if (!pg_tablespace_ownercheck(HeapTupleGetOid(tup), GetUserId())) aclcheck_error(ACLCHECK_NOT_OWNER, ACL_KIND_TABLESPACE, stmt->tablespacename); /* Generate new proposed spcoptions (text array) */ datum = heap_getattr(tup, Anum_pg_tablespace_spcoptions, RelationGetDescr(rel), &isnull); newOptions = transformRelOptions(isnull ? (Datum) 0 : datum, stmt->options, NULL, NULL, false, stmt->isReset); (void) tablespace_reloptions(newOptions, true); /* Build new tuple. */ memset(repl_null, false, sizeof(repl_null)); memset(repl_repl, false, sizeof(repl_repl)); if (newOptions != (Datum) 0) repl_val[Anum_pg_tablespace_spcoptions - 1] = newOptions; else repl_null[Anum_pg_tablespace_spcoptions - 1] = true; repl_repl[Anum_pg_tablespace_spcoptions - 1] = true; newtuple = heap_modify_tuple(tup, RelationGetDescr(rel), repl_val, repl_null, repl_repl); /* Update system catalog. */ simple_heap_update(rel, &newtuple->t_self, newtuple); CatalogUpdateIndexes(rel, newtuple); heap_freetuple(newtuple); /* Conclude heap scan. */ heap_endscan(scandesc); heap_close(rel, NoLock); }
int inv_write(LargeObjectDesc *obj_desc, char *buf, int nbytes) { int nwritten = 0; int n; int off; int len; int32 pageno = (int32) (obj_desc->offset / LOBLKSIZE); ScanKeyData skey[2]; IndexScanDesc sd; HeapTuple oldtuple; Form_pg_largeobject olddata; bool neednextpage; bytea *datafield; bool pfreeit; struct { bytea hdr; char data[LOBLKSIZE]; } workbuf; char *workb = VARATT_DATA(&workbuf.hdr); HeapTuple newtup; Datum values[Natts_pg_largeobject]; char nulls[Natts_pg_largeobject]; char replace[Natts_pg_largeobject]; CatalogIndexState indstate; Assert(PointerIsValid(obj_desc)); Assert(buf != NULL); if (nbytes <= 0) return 0; open_lo_relation(); indstate = CatalogOpenIndexes(lo_heap_r); ScanKeyInit(&skey[0], Anum_pg_largeobject_loid, BTEqualStrategyNumber, F_OIDEQ, ObjectIdGetDatum(obj_desc->id)); ScanKeyInit(&skey[1], Anum_pg_largeobject_pageno, BTGreaterEqualStrategyNumber, F_INT4GE, Int32GetDatum(pageno)); sd = index_beginscan(lo_heap_r, lo_index_r, SnapshotNow, 2, skey); oldtuple = NULL; olddata = NULL; neednextpage = true; while (nwritten < nbytes) { /* * If possible, get next pre-existing page of the LO. We assume * the indexscan will deliver these in order --- but there may be * holes. */ if (neednextpage) { if ((oldtuple = index_getnext(sd, ForwardScanDirection)) != NULL) { olddata = (Form_pg_largeobject) GETSTRUCT(oldtuple); Assert(olddata->pageno >= pageno); } neednextpage = false; } /* * If we have a pre-existing page, see if it is the page we want * to write, or a later one. */ if (olddata != NULL && olddata->pageno == pageno) { /* * Update an existing page with fresh data. * * First, load old data into workbuf */ datafield = &(olddata->data); pfreeit = false; if (VARATT_IS_EXTENDED(datafield)) { datafield = (bytea *) heap_tuple_untoast_attr((varattrib *) datafield); pfreeit = true; } len = getbytealen(datafield); Assert(len <= LOBLKSIZE); memcpy(workb, VARDATA(datafield), len); if (pfreeit) pfree(datafield); /* * Fill any hole */ off = (int) (obj_desc->offset % LOBLKSIZE); if (off > len) MemSet(workb + len, 0, off - len); /* * Insert appropriate portion of new data */ n = LOBLKSIZE - off; n = (n <= (nbytes - nwritten)) ? n : (nbytes - nwritten); memcpy(workb + off, buf + nwritten, n); nwritten += n; obj_desc->offset += n; off += n; /* compute valid length of new page */ len = (len >= off) ? len : off; VARATT_SIZEP(&workbuf.hdr) = len + VARHDRSZ; /* * Form and insert updated tuple */ memset(values, 0, sizeof(values)); memset(nulls, ' ', sizeof(nulls)); memset(replace, ' ', sizeof(replace)); values[Anum_pg_largeobject_data - 1] = PointerGetDatum(&workbuf); replace[Anum_pg_largeobject_data - 1] = 'r'; newtup = heap_modifytuple(oldtuple, lo_heap_r, values, nulls, replace); simple_heap_update(lo_heap_r, &newtup->t_self, newtup); CatalogIndexInsert(indstate, newtup); heap_freetuple(newtup); /* * We're done with this old page. */ oldtuple = NULL; olddata = NULL; neednextpage = true; } else { /* * Write a brand new page. * * First, fill any hole */ off = (int) (obj_desc->offset % LOBLKSIZE); if (off > 0) MemSet(workb, 0, off); /* * Insert appropriate portion of new data */ n = LOBLKSIZE - off; n = (n <= (nbytes - nwritten)) ? n : (nbytes - nwritten); memcpy(workb + off, buf + nwritten, n); nwritten += n; obj_desc->offset += n; /* compute valid length of new page */ len = off + n; VARATT_SIZEP(&workbuf.hdr) = len + VARHDRSZ; /* * Form and insert updated tuple */ memset(values, 0, sizeof(values)); memset(nulls, ' ', sizeof(nulls)); values[Anum_pg_largeobject_loid - 1] = ObjectIdGetDatum(obj_desc->id); values[Anum_pg_largeobject_pageno - 1] = Int32GetDatum(pageno); values[Anum_pg_largeobject_data - 1] = PointerGetDatum(&workbuf); newtup = heap_formtuple(lo_heap_r->rd_att, values, nulls); simple_heap_insert(lo_heap_r, newtup); CatalogIndexInsert(indstate, newtup); heap_freetuple(newtup); } pageno++; } index_endscan(sd); CatalogCloseIndexes(indstate); /* * Advance command counter so that my tuple updates will be seen by * later large-object operations in this transaction. */ CommandCounterIncrement(); return nwritten; }
/* * sepgsql_database_post_create * * This routine assigns a default security label on a newly defined * database, and check permission needed for its creation. */ void sepgsql_database_post_create(Oid databaseId, const char *dtemplate) { Relation rel; ScanKeyData skey; SysScanDesc sscan; HeapTuple tuple; char *tcontext; char *ncontext; ObjectAddress object; Form_pg_database datForm; StringInfoData audit_name; /* * Oid of the source database is not saved in pg_database catalog, so we * collect its identifier using contextual information. If NULL, its * default is "template1" according to createdb(). */ if (!dtemplate) dtemplate = "template1"; object.classId = DatabaseRelationId; object.objectId = get_database_oid(dtemplate, false); object.objectSubId = 0; tcontext = sepgsql_get_label(object.classId, object.objectId, object.objectSubId); /* * check db_database:{getattr} permission */ initStringInfo(&audit_name); appendStringInfo(&audit_name, "%s", quote_identifier(dtemplate)); sepgsql_avc_check_perms_label(tcontext, SEPG_CLASS_DB_DATABASE, SEPG_DB_DATABASE__GETATTR, audit_name.data, true); /* * Compute a default security label of the newly created database based on * a pair of security label of client and source database. * * XXX - uncoming version of libselinux supports to take object name to * handle special treatment on default security label. */ rel = heap_open(DatabaseRelationId, AccessShareLock); ScanKeyInit(&skey, ObjectIdAttributeNumber, BTEqualStrategyNumber, F_OIDEQ, ObjectIdGetDatum(databaseId)); sscan = systable_beginscan(rel, DatabaseOidIndexId, true, SnapshotSelf, 1, &skey); tuple = systable_getnext(sscan); if (!HeapTupleIsValid(tuple)) elog(ERROR, "could not find tuple for database %u", databaseId); datForm = (Form_pg_database) GETSTRUCT(tuple); ncontext = sepgsql_compute_create(sepgsql_get_client_label(), tcontext, SEPG_CLASS_DB_DATABASE, NameStr(datForm->datname)); /* * check db_database:{create} permission */ resetStringInfo(&audit_name); appendStringInfo(&audit_name, "%s", quote_identifier(NameStr(datForm->datname))); sepgsql_avc_check_perms_label(ncontext, SEPG_CLASS_DB_DATABASE, SEPG_DB_DATABASE__CREATE, audit_name.data, true); systable_endscan(sscan); heap_close(rel, AccessShareLock); /* * Assign the default security label on the new database */ object.classId = DatabaseRelationId; object.objectId = databaseId; object.objectSubId = 0; SetSecurityLabel(&object, SEPGSQL_LABEL_TAG, ncontext); pfree(ncontext); pfree(tcontext); }
/* * find_inheritance_children * * Returns a list containing the OIDs of all relations which * inherit *directly* from the relation with OID 'parentrelId'. * * The specified lock type is acquired on each child relation (but not on the * given rel; caller should already have locked it). If lockmode is NoLock * then no locks are acquired, but caller must beware of race conditions * against possible DROPs of child relations. */ List * find_inheritance_children(Oid parentrelId, LOCKMODE lockmode) { List *list = NIL; Relation relation; SysScanDesc scan; ScanKeyData key[1]; HeapTuple inheritsTuple; Oid inhrelid; Oid *oidarr; int maxoids, numoids, i; /* * Can skip the scan if pg_class shows the relation has never had a * subclass. */ if (!has_subclass(parentrelId)) return NIL; /* * Scan pg_inherits and build a working array of subclass OIDs. */ maxoids = 32; oidarr = (Oid *) palloc(maxoids * sizeof(Oid)); numoids = 0; relation = heap_open(InheritsRelationId, AccessShareLock); ScanKeyInit(&key[0], Anum_pg_inherits_inhparent, BTEqualStrategyNumber, F_OIDEQ, ObjectIdGetDatum(parentrelId)); scan = systable_beginscan(relation, InheritsParentIndexId, true, NULL, 1, key); while ((inheritsTuple = systable_getnext(scan)) != NULL) { inhrelid = ((Form_pg_inherits) GETSTRUCT(inheritsTuple))->inhrelid; if (numoids >= maxoids) { maxoids *= 2; oidarr = (Oid *) repalloc(oidarr, maxoids * sizeof(Oid)); } oidarr[numoids++] = inhrelid; } systable_endscan(scan); heap_close(relation, AccessShareLock); /* * If we found more than one child, sort them by OID. This ensures * reasonably consistent behavior regardless of the vagaries of an * indexscan. This is important since we need to be sure all backends * lock children in the same order to avoid needless deadlocks. */ if (numoids > 1) qsort(oidarr, numoids, sizeof(Oid), oid_cmp); /* * Acquire locks and build the result list. */ for (i = 0; i < numoids; i++) { inhrelid = oidarr[i]; if (lockmode != NoLock) { /* Get the lock to synchronize against concurrent drop */ LockRelationOid(inhrelid, lockmode); /* * Now that we have the lock, double-check to see if the relation * really exists or not. If not, assume it was dropped while we * waited to acquire lock, and ignore it. */ if (!SearchSysCacheExists1(RELOID, ObjectIdGetDatum(inhrelid))) { /* Release useless lock */ UnlockRelationOid(inhrelid, lockmode); /* And ignore this relation */ continue; } } list = lappend_oid(list, inhrelid); } pfree(oidarr); return list; }
/* * TableDDLCommandList takes in a relationId, and returns the list of DDL * command needed to reconstruct the relation. These DDL commands are all * palloced; and include the table's schema definition, optional column * storage and statistics definitions, and index and constraint defitions. */ List * TableDDLCommandList(Oid relationId) { List *tableDDLCommandList = NIL; char *tableSchemaDef = NULL; char *tableColumnOptionsDef = NULL; Relation pgIndex = NULL; SysScanDesc scanDescriptor = NULL; ScanKeyData scanKey[1]; int scanKeyCount = 1; HeapTuple heapTuple = NULL; /* fetch table schema and column option definitions */ tableSchemaDef = pg_shard_get_tableschemadef_string(relationId); tableColumnOptionsDef = pg_shard_get_tablecolumnoptionsdef_string(relationId); tableDDLCommandList = lappend(tableDDLCommandList, tableSchemaDef); if (tableColumnOptionsDef != NULL) { tableDDLCommandList = lappend(tableDDLCommandList, tableColumnOptionsDef); } /* open system catalog and scan all indexes that belong to this table */ pgIndex = heap_open(IndexRelationId, AccessShareLock); ScanKeyInit(&scanKey[0], Anum_pg_index_indrelid, BTEqualStrategyNumber, F_OIDEQ, relationId); scanDescriptor = systable_beginscan(pgIndex, IndexIndrelidIndexId, true, /* indexOK */ SnapshotSelf, scanKeyCount, scanKey); heapTuple = systable_getnext(scanDescriptor); while (HeapTupleIsValid(heapTuple)) { Form_pg_index indexForm = (Form_pg_index) GETSTRUCT(heapTuple); Oid indexId = indexForm->indexrelid; bool isConstraint = false; char *statementDef = NULL; /* * A primary key index is always created by a constraint statement. * A unique key index is created by a constraint if and only if the * index has a corresponding constraint entry in pg_depend. Any other * index form is never associated with a constraint. */ if (indexForm->indisprimary) { isConstraint = true; } else if (indexForm->indisunique) { Oid constraintId = get_index_constraint(indexId); isConstraint = OidIsValid(constraintId); } else { isConstraint = false; } /* get the corresponding constraint or index statement */ if (isConstraint) { Oid constraintId = get_index_constraint(indexId); Assert(constraintId != InvalidOid); statementDef = pg_get_constraintdef_command(constraintId); } else { statementDef = pg_get_indexdef_string(indexId); } /* append found constraint or index definition to the list */ tableDDLCommandList = lappend(tableDDLCommandList, statementDef); /* if table is clustered on this index, append definition to the list */ if (indexForm->indisclustered) { char *clusteredDef = pg_shard_get_indexclusterdef_string(indexId); Assert(clusteredDef != NULL); tableDDLCommandList = lappend(tableDDLCommandList, clusteredDef); } heapTuple = systable_getnext(scanDescriptor); } /* clean up scan and close system catalog */ systable_endscan(scanDescriptor); heap_close(pgIndex, AccessShareLock); return tableDDLCommandList; }
/* * GetFileSegInfo * * Get the catalog entry for an appendonly (row-oriented) relation from the * pg_aoseg_* relation that belongs to the currently used * AppendOnly table. * * If a caller intends to append to this file segment entry they must * already hold a relation Append-Only segment file (transaction-scope) lock (tag * LOCKTAG_RELATION_APPENDONLY_SEGMENT_FILE) in order to guarantee * stability of the pg_aoseg information on this segment file and exclusive right * to append data to the segment file. */ ParquetFileSegInfo * GetParquetFileSegInfo(Relation parentrel, AppendOnlyEntry *aoEntry, Snapshot parquetMetaDataSnapshot, int segno) { Relation pg_parquetseg_rel; TupleDesc pg_parquetseg_dsc; HeapTuple tuple; ScanKeyData key[1]; SysScanDesc parquetscan; Datum eof, eof_uncompressed, tupcount; bool isNull; bool indexOK; Oid indexid; ParquetFileSegInfo *fsinfo; /* * Check the pg_paqseg relation to be certain the parquet table segment file * is there. */ pg_parquetseg_rel = heap_open(aoEntry->segrelid, AccessShareLock); pg_parquetseg_dsc = RelationGetDescr(pg_parquetseg_rel); if (Gp_role == GP_ROLE_EXECUTE) { indexOK = FALSE; indexid = InvalidOid; } else { indexOK = TRUE; indexid = aoEntry->segidxid; } /* * Setup a scan key to fetch from the index by segno. */ ScanKeyInit(&key[0], (AttrNumber) Anum_pg_parquetseg_segno, BTEqualStrategyNumber, F_INT4EQ, Int32GetDatum(segno)); parquetscan = systable_beginscan(pg_parquetseg_rel, indexid, indexOK, SnapshotNow, 1, &key[0]); tuple = systable_getnext(parquetscan); if (!HeapTupleIsValid(tuple)) { /* This segment file does not have an entry. */ systable_endscan(parquetscan); heap_close(pg_parquetseg_rel, AccessShareLock); return NULL ; } tuple = heap_copytuple(tuple); systable_endscan(parquetscan); Assert(HeapTupleIsValid(tuple)); fsinfo = (ParquetFileSegInfo *) palloc0(sizeof(ParquetFileSegInfo)); /* get the eof */ eof = fastgetattr(tuple, Anum_pg_parquetseg_eof, pg_parquetseg_dsc, &isNull); if (isNull) ereport(ERROR, (errcode(ERRCODE_UNDEFINED_OBJECT), errmsg("got invalid eof value: NULL"))); /* get the tupcount */ tupcount = fastgetattr(tuple, Anum_pg_parquetseg_tupcount, pg_parquetseg_dsc, &isNull); if (isNull) ereport(ERROR, (errcode(ERRCODE_UNDEFINED_OBJECT), errmsg("got invalid tupcount value: NULL"))); /* get the uncompressed eof */ eof_uncompressed = fastgetattr(tuple, Anum_pg_parquetseg_eofuncompressed, pg_parquetseg_dsc, &isNull); /* * Confusing: This eof_uncompressed variable is never used. It appears we only * call fastgetattr to get the isNull value. this variable "eof_uncompressed" is * not at all the same as fsinfo->eof_uncompressed. */ if (isNull) { /* * NULL is allowed. Tables that were created before the release of the * eof_uncompressed catalog column will have a NULL instead of a value. */ fsinfo->eof_uncompressed = InvalidUncompressedEof; } else { fsinfo->eof_uncompressed = (int64) DatumGetFloat8(eof_uncompressed); } fsinfo->segno = segno; fsinfo->eof = (int64) DatumGetFloat8(eof); fsinfo->tupcount = (int64) DatumGetFloat8(tupcount); ItemPointerSetInvalid(&fsinfo->sequence_tid); if (fsinfo->eof < 0) ereport(ERROR, (errcode(ERRCODE_GP_INTERNAL_ERROR), errmsg("invalid eof " INT64_FORMAT " for relation %s", fsinfo->eof, RelationGetRelationName(parentrel)))); /* Finish up scan and close appendonly catalog. */ heap_close(pg_parquetseg_rel, AccessShareLock); return fsinfo; }
/* * regtypein - converts "typename" to type OID * * We also accept a numeric OID, for symmetry with the output routine. * * '-' signifies unknown (OID 0). In all other cases, the input must * match an existing pg_type entry. * * In bootstrap mode the name must just equal some existing name in pg_type. * In normal mode the type name can be specified using the full type syntax * recognized by the parser; for example, DOUBLE PRECISION and INTEGER[] will * work and be translated to the correct type names. (We ignore any typmod * info generated by the parser, however.) */ Datum regtypein(PG_FUNCTION_ARGS) { char *typ_name_or_oid = PG_GETARG_CSTRING(0); Oid result = InvalidOid; int32 typmod; /* '-' ? */ if (strcmp(typ_name_or_oid, "-") == 0) PG_RETURN_OID(InvalidOid); /* Numeric OID? */ if (typ_name_or_oid[0] >= '0' && typ_name_or_oid[0] <= '9' && strspn(typ_name_or_oid, "0123456789") == strlen(typ_name_or_oid)) { result = DatumGetObjectId(DirectFunctionCall1(oidin, CStringGetDatum(typ_name_or_oid))); PG_RETURN_OID(result); } /* Else it's a type name, possibly schema-qualified or decorated */ /* * In bootstrap mode we assume the given name is not schema-qualified, and * just search pg_type for a match. This is needed for initializing other * system catalogs (pg_namespace may not exist yet, and certainly there * are no schemas other than pg_catalog). */ if (IsBootstrapProcessingMode()) { Relation hdesc; ScanKeyData skey[1]; SysScanDesc sysscan; HeapTuple tuple; ScanKeyInit(&skey[0], Anum_pg_type_typname, BTEqualStrategyNumber, F_NAMEEQ, CStringGetDatum(typ_name_or_oid)); hdesc = heap_open(TypeRelationId, AccessShareLock); sysscan = systable_beginscan(hdesc, TypeNameNspIndexId, true, SnapshotNow, 1, skey); if (HeapTupleIsValid(tuple = systable_getnext(sysscan))) result = HeapTupleGetOid(tuple); else ereport(ERROR, (errcode(ERRCODE_UNDEFINED_OBJECT), errmsg("type \"%s\" does not exist", typ_name_or_oid))); /* We assume there can be only one match */ systable_endscan(sysscan); heap_close(hdesc, AccessShareLock); PG_RETURN_OID(result); } /* * Normal case: invoke the full parser to deal with special cases such as * array syntax. */ parseTypeString(typ_name_or_oid, &result, &typmod); PG_RETURN_OID(result); }
/* * Drop a table space * * Be careful to check that the tablespace is empty. */ void DropTableSpace(DropTableSpaceStmt *stmt) { #ifdef HAVE_SYMLINK char *tablespacename = stmt->tablespacename; HeapScanDesc scandesc; Relation rel; HeapTuple tuple; ScanKeyData entry[1]; Oid tablespaceoid; /* * Find the target tuple */ rel = heap_open(TableSpaceRelationId, RowExclusiveLock); ScanKeyInit(&entry[0], Anum_pg_tablespace_spcname, BTEqualStrategyNumber, F_NAMEEQ, CStringGetDatum(tablespacename)); scandesc = heap_beginscan(rel, SnapshotNow, 1, entry); tuple = heap_getnext(scandesc, ForwardScanDirection); if (!HeapTupleIsValid(tuple)) { if (!stmt->missing_ok) { ereport(ERROR, (errcode(ERRCODE_UNDEFINED_OBJECT), errmsg("tablespace \"%s\" does not exist", tablespacename))); } else { ereport(NOTICE, (errmsg("tablespace \"%s\" does not exist, skipping", tablespacename))); /* XXX I assume I need one or both of these next two calls */ heap_endscan(scandesc); heap_close(rel, NoLock); } return; } tablespaceoid = HeapTupleGetOid(tuple); /* Must be tablespace owner */ if (!pg_tablespace_ownercheck(tablespaceoid, GetUserId())) aclcheck_error(ACLCHECK_NOT_OWNER, ACL_KIND_TABLESPACE, tablespacename); /* Disallow drop of the standard tablespaces, even by superuser */ if (tablespaceoid == GLOBALTABLESPACE_OID || tablespaceoid == DEFAULTTABLESPACE_OID) aclcheck_error(ACLCHECK_NO_PRIV, ACL_KIND_TABLESPACE, tablespacename); /* DROP hook for the tablespace being removed */ if (object_access_hook) { ObjectAccessDrop drop_arg; memset(&drop_arg, 0, sizeof(ObjectAccessDrop)); InvokeObjectAccessHook(OAT_DROP, TableSpaceRelationId, tablespaceoid, 0, &drop_arg); } /* * Remove the pg_tablespace tuple (this will roll back if we fail below) */ simple_heap_delete(rel, &tuple->t_self); heap_endscan(scandesc); /* * Remove any comments or security labels on this tablespace. */ DeleteSharedComments(tablespaceoid, TableSpaceRelationId); DeleteSharedSecurityLabel(tablespaceoid, TableSpaceRelationId); /* * Remove dependency on owner. */ deleteSharedDependencyRecordsFor(TableSpaceRelationId, tablespaceoid, 0); /* * Acquire TablespaceCreateLock to ensure that no TablespaceCreateDbspace * is running concurrently. */ LWLockAcquire(TablespaceCreateLock, LW_EXCLUSIVE); /* * Try to remove the physical infrastructure. */ if (!destroy_tablespace_directories(tablespaceoid, false)) { /* * Not all files deleted? However, there can be lingering empty files * in the directories, left behind by for example DROP TABLE, that * have been scheduled for deletion at next checkpoint (see comments * in mdunlink() for details). We could just delete them immediately, * but we can't tell them apart from important data files that we * mustn't delete. So instead, we force a checkpoint which will clean * out any lingering files, and try again. */ RequestCheckpoint(CHECKPOINT_IMMEDIATE | CHECKPOINT_FORCE | CHECKPOINT_WAIT); if (!destroy_tablespace_directories(tablespaceoid, false)) { /* Still not empty, the files must be important then */ ereport(ERROR, (errcode(ERRCODE_OBJECT_NOT_IN_PREREQUISITE_STATE), errmsg("tablespace \"%s\" is not empty", tablespacename))); } } /* Record the filesystem change in XLOG */ { xl_tblspc_drop_rec xlrec; XLogRecData rdata[1]; xlrec.ts_id = tablespaceoid; rdata[0].data = (char *) &xlrec; rdata[0].len = sizeof(xl_tblspc_drop_rec); rdata[0].buffer = InvalidBuffer; rdata[0].next = NULL; (void) XLogInsert(RM_TBLSPC_ID, XLOG_TBLSPC_DROP, rdata); } /* * Note: because we checked that the tablespace was empty, there should be * no need to worry about flushing shared buffers or free space map * entries for relations in the tablespace. */ /* * Force synchronous commit, to minimize the window between removing the * files on-disk and marking the transaction committed. It's not great * that there is any window at all, but definitely we don't want to make * it larger than necessary. */ ForceSyncCommit(); /* * Allow TablespaceCreateDbspace again. */ LWLockRelease(TablespaceCreateLock); /* We keep the lock on pg_tablespace until commit */ heap_close(rel, NoLock); #else /* !HAVE_SYMLINK */ ereport(ERROR, (errcode(ERRCODE_FEATURE_NOT_SUPPORTED), errmsg("tablespaces are not supported on this platform"))); #endif /* HAVE_SYMLINK */ }
/* ---------- * toast_fetch_datum - * * Reconstruct an in memory varattrib from the chunks saved * in the toast relation * ---------- */ static varattrib * toast_fetch_datum(varattrib *attr) { Relation toastrel; Relation toastidx; ScanKeyData toastkey; IndexScanDesc toastscan; HeapTuple ttup; TupleDesc toasttupDesc; varattrib *result; int32 ressize; int32 residx, nextidx; int32 numchunks; Pointer chunk; bool isnull; int32 chunksize; ressize = attr->va_content.va_external.va_extsize; numchunks = ((ressize - 1) / TOAST_MAX_CHUNK_SIZE) + 1; result = (varattrib *) palloc(ressize + VARHDRSZ); VARATT_SIZEP(result) = ressize + VARHDRSZ; if (VARATT_IS_COMPRESSED(attr)) VARATT_SIZEP(result) |= VARATT_FLAG_COMPRESSED; /* * Open the toast relation and its index */ toastrel = heap_open(attr->va_content.va_external.va_toastrelid, AccessShareLock); toasttupDesc = toastrel->rd_att; toastidx = index_open(toastrel->rd_rel->reltoastidxid); /* * Setup a scan key to fetch from the index by va_valueid */ ScanKeyInit(&toastkey, (AttrNumber) 1, BTEqualStrategyNumber, F_OIDEQ, ObjectIdGetDatum(attr->va_content.va_external.va_valueid)); /* * Read the chunks by index * * Note that because the index is actually on (valueid, chunkidx) we will * see the chunks in chunkidx order, even though we didn't explicitly ask * for it. */ nextidx = 0; toastscan = index_beginscan(toastrel, toastidx, SnapshotToast, 1, &toastkey); while ((ttup = index_getnext(toastscan, ForwardScanDirection)) != NULL) { /* * Have a chunk, extract the sequence number and the data */ residx = DatumGetInt32(fastgetattr(ttup, 2, toasttupDesc, &isnull)); Assert(!isnull); chunk = DatumGetPointer(fastgetattr(ttup, 3, toasttupDesc, &isnull)); Assert(!isnull); chunksize = VARATT_SIZE(chunk) - VARHDRSZ; /* * Some checks on the data we've found */ if (residx != nextidx) elog(ERROR, "unexpected chunk number %d (expected %d) for toast value %u", residx, nextidx, attr->va_content.va_external.va_valueid); if (residx < numchunks - 1) { if (chunksize != TOAST_MAX_CHUNK_SIZE) elog(ERROR, "unexpected chunk size %d in chunk %d for toast value %u", chunksize, residx, attr->va_content.va_external.va_valueid); } else if (residx < numchunks) { if ((residx * TOAST_MAX_CHUNK_SIZE + chunksize) != ressize) elog(ERROR, "unexpected chunk size %d in chunk %d for toast value %u", chunksize, residx, attr->va_content.va_external.va_valueid); } else elog(ERROR, "unexpected chunk number %d for toast value %u", residx, attr->va_content.va_external.va_valueid); /* * Copy the data into proper place in our result */ memcpy(((char *) VARATT_DATA(result)) + residx * TOAST_MAX_CHUNK_SIZE, VARATT_DATA(chunk), chunksize); nextidx++; } /* * Final checks that we successfully fetched the datum */ if (nextidx != numchunks) elog(ERROR, "missing chunk number %d for toast value %u", nextidx, attr->va_content.va_external.va_valueid); /* * End scan and close relations */ index_endscan(toastscan); index_close(toastidx); heap_close(toastrel, AccessShareLock); return result; }
/* * Rename a tablespace */ void RenameTableSpace(const char *oldname, const char *newname) { Relation rel; ScanKeyData entry[1]; HeapScanDesc scan; HeapTuple tup; HeapTuple newtuple; Form_pg_tablespace newform; /* Search pg_tablespace */ rel = heap_open(TableSpaceRelationId, RowExclusiveLock); ScanKeyInit(&entry[0], Anum_pg_tablespace_spcname, BTEqualStrategyNumber, F_NAMEEQ, CStringGetDatum(oldname)); scan = heap_beginscan(rel, SnapshotNow, 1, entry); tup = heap_getnext(scan, ForwardScanDirection); if (!HeapTupleIsValid(tup)) ereport(ERROR, (errcode(ERRCODE_UNDEFINED_OBJECT), errmsg("tablespace \"%s\" does not exist", oldname))); newtuple = heap_copytuple(tup); newform = (Form_pg_tablespace) GETSTRUCT(newtuple); heap_endscan(scan); /* Must be owner */ if (!pg_tablespace_ownercheck(HeapTupleGetOid(newtuple), GetUserId())) aclcheck_error(ACLCHECK_NO_PRIV, ACL_KIND_TABLESPACE, oldname); /* Validate new name */ if (!allowSystemTableMods && IsReservedName(newname)) ereport(ERROR, (errcode(ERRCODE_RESERVED_NAME), errmsg("unacceptable tablespace name \"%s\"", newname), errdetail("The prefix \"pg_\" is reserved for system tablespaces."))); /* Make sure the new name doesn't exist */ ScanKeyInit(&entry[0], Anum_pg_tablespace_spcname, BTEqualStrategyNumber, F_NAMEEQ, CStringGetDatum(newname)); scan = heap_beginscan(rel, SnapshotNow, 1, entry); tup = heap_getnext(scan, ForwardScanDirection); if (HeapTupleIsValid(tup)) ereport(ERROR, (errcode(ERRCODE_DUPLICATE_OBJECT), errmsg("tablespace \"%s\" already exists", newname))); heap_endscan(scan); /* OK, update the entry */ namestrcpy(&(newform->spcname), newname); simple_heap_update(rel, &newtuple->t_self, newtuple); CatalogUpdateIndexes(rel, newtuple); heap_close(rel, NoLock); }
/* * CreateSharedComments -- * * Create a comment for the specified shared object descriptor. Inserts a * new pg_shdescription tuple, or replaces an existing one with the same key. * * If the comment given is null or an empty string, instead delete any * existing comment for the specified key. */ void CreateSharedComments(Oid oid, Oid classoid, char *comment) { Relation shdescription; ScanKeyData skey[2]; SysScanDesc sd; HeapTuple oldtuple; HeapTuple newtuple = NULL; Datum values[Natts_pg_shdescription]; bool nulls[Natts_pg_shdescription]; bool replaces[Natts_pg_shdescription]; int i; /* Reduce empty-string to NULL case */ if (comment != NULL && strlen(comment) == 0) comment = NULL; /* Prepare to form or update a tuple, if necessary */ if (comment != NULL) { for (i = 0; i < Natts_pg_shdescription; i++) { nulls[i] = false; replaces[i] = true; } values[Anum_pg_shdescription_objoid - 1] = ObjectIdGetDatum(oid); values[Anum_pg_shdescription_classoid - 1] = ObjectIdGetDatum(classoid); values[Anum_pg_shdescription_description - 1] = CStringGetTextDatum(comment); } /* Use the index to search for a matching old tuple */ ScanKeyInit(&skey[0], Anum_pg_shdescription_objoid, BTEqualStrategyNumber, F_OIDEQ, ObjectIdGetDatum(oid)); ScanKeyInit(&skey[1], Anum_pg_shdescription_classoid, BTEqualStrategyNumber, F_OIDEQ, ObjectIdGetDatum(classoid)); shdescription = heap_open(SharedDescriptionRelationId, RowExclusiveLock); sd = systable_beginscan(shdescription, SharedDescriptionObjIndexId, true, SnapshotNow, 2, skey); while ((oldtuple = systable_getnext(sd)) != NULL) { /* Found the old tuple, so delete or update it */ if (comment == NULL) simple_heap_delete(shdescription, &oldtuple->t_self); else { newtuple = heap_modify_tuple(oldtuple, RelationGetDescr(shdescription), values, nulls, replaces); simple_heap_update(shdescription, &oldtuple->t_self, newtuple); } break; /* Assume there can be only one match */ } systable_endscan(sd); /* If we didn't find an old tuple, insert a new one */ if (newtuple == NULL && comment != NULL) { newtuple = heap_form_tuple(RelationGetDescr(shdescription), values, nulls); simple_heap_insert(shdescription, newtuple); } /* Update indexes, if necessary */ if (newtuple != NULL) { CatalogUpdateIndexes(shdescription, newtuple); heap_freetuple(newtuple); } /* Done */ heap_close(shdescription, NoLock); }
/* * Look to see if we have template information for the given language name. */ static PLTemplate * find_language_template(const char *languageName) { PLTemplate *result; Relation rel; SysScanDesc scan; ScanKeyData key; HeapTuple tup; rel = heap_open(PLTemplateRelationId, AccessShareLock); ScanKeyInit(&key, Anum_pg_pltemplate_tmplname, BTEqualStrategyNumber, F_NAMEEQ, CStringGetDatum(languageName)); scan = systable_beginscan(rel, PLTemplateNameIndexId, true, NULL, 1, &key); tup = systable_getnext(scan); if (HeapTupleIsValid(tup)) { Form_pg_pltemplate tmpl = (Form_pg_pltemplate) GETSTRUCT(tup); Datum datum; bool isnull; result = (PLTemplate *) palloc0(sizeof(PLTemplate)); result->tmpltrusted = tmpl->tmpltrusted; result->tmpldbacreate = tmpl->tmpldbacreate; /* Remaining fields are variable-width so we need heap_getattr */ datum = heap_getattr(tup, Anum_pg_pltemplate_tmplhandler, RelationGetDescr(rel), &isnull); if (!isnull) result->tmplhandler = TextDatumGetCString(datum); datum = heap_getattr(tup, Anum_pg_pltemplate_tmplinline, RelationGetDescr(rel), &isnull); if (!isnull) result->tmplinline = TextDatumGetCString(datum); datum = heap_getattr(tup, Anum_pg_pltemplate_tmplvalidator, RelationGetDescr(rel), &isnull); if (!isnull) result->tmplvalidator = TextDatumGetCString(datum); datum = heap_getattr(tup, Anum_pg_pltemplate_tmpllibrary, RelationGetDescr(rel), &isnull); if (!isnull) result->tmpllibrary = TextDatumGetCString(datum); /* Ignore template if handler or library info is missing */ if (!result->tmplhandler || !result->tmpllibrary) result = NULL; } else result = NULL; systable_endscan(scan); heap_close(rel, AccessShareLock); return result; }
/* * shdepChangeDep * * Update shared dependency records to account for an updated referenced * object. This is an internal workhorse for operations such as changing * an object's owner. * * There must be no more than one existing entry for the given dependent * object and dependency type! So in practice this can only be used for * updating SHARED_DEPENDENCY_OWNER entries, which should have that property. * * If there is no previous entry, we assume it was referencing a PINned * object, so we create a new entry. If the new referenced object is * PINned, we don't create an entry (and drop the old one, if any). * * sdepRel must be the pg_shdepend relation, already opened and suitably * locked. */ static void shdepChangeDep(Relation sdepRel, Oid classid, Oid objid, int32 objsubid, Oid refclassid, Oid refobjid, SharedDependencyType deptype) { Oid dbid = classIdGetDbId(classid); HeapTuple oldtup = NULL; HeapTuple scantup; ScanKeyData key[4]; SysScanDesc scan; /* * Make sure the new referenced object doesn't go away while we record the * dependency. */ shdepLockAndCheckObject(refclassid, refobjid); /* * Look for a previous entry */ ScanKeyInit(&key[0], Anum_pg_shdepend_dbid, BTEqualStrategyNumber, F_OIDEQ, ObjectIdGetDatum(dbid)); ScanKeyInit(&key[1], Anum_pg_shdepend_classid, BTEqualStrategyNumber, F_OIDEQ, ObjectIdGetDatum(classid)); ScanKeyInit(&key[2], Anum_pg_shdepend_objid, BTEqualStrategyNumber, F_OIDEQ, ObjectIdGetDatum(objid)); ScanKeyInit(&key[3], Anum_pg_shdepend_objsubid, BTEqualStrategyNumber, F_INT4EQ, Int32GetDatum(objsubid)); scan = systable_beginscan(sdepRel, SharedDependDependerIndexId, true, NULL, 4, key); while ((scantup = systable_getnext(scan)) != NULL) { /* Ignore if not of the target dependency type */ if (((Form_pg_shdepend) GETSTRUCT(scantup))->deptype != deptype) continue; /* Caller screwed up if multiple matches */ if (oldtup) elog(ERROR, "multiple pg_shdepend entries for object %u/%u/%d deptype %c", classid, objid, objsubid, deptype); oldtup = heap_copytuple(scantup); } systable_endscan(scan); if (isSharedObjectPinned(refclassid, refobjid, sdepRel)) { /* No new entry needed, so just delete existing entry if any */ if (oldtup) simple_heap_delete(sdepRel, &oldtup->t_self); } else if (oldtup) { /* Need to update existing entry */ Form_pg_shdepend shForm = (Form_pg_shdepend) GETSTRUCT(oldtup); /* Since oldtup is a copy, we can just modify it in-memory */ shForm->refclassid = refclassid; shForm->refobjid = refobjid; simple_heap_update(sdepRel, &oldtup->t_self, oldtup); /* keep indexes current */ CatalogUpdateIndexes(sdepRel, oldtup); } else { /* Need to insert new entry */ Datum values[Natts_pg_shdepend]; bool nulls[Natts_pg_shdepend]; memset(nulls, false, sizeof(nulls)); values[Anum_pg_shdepend_dbid - 1] = ObjectIdGetDatum(dbid); values[Anum_pg_shdepend_classid - 1] = ObjectIdGetDatum(classid); values[Anum_pg_shdepend_objid - 1] = ObjectIdGetDatum(objid); values[Anum_pg_shdepend_objsubid - 1] = Int32GetDatum(objsubid); values[Anum_pg_shdepend_refclassid - 1] = ObjectIdGetDatum(refclassid); values[Anum_pg_shdepend_refobjid - 1] = ObjectIdGetDatum(refobjid); values[Anum_pg_shdepend_deptype - 1] = CharGetDatum(deptype); /* * we are reusing oldtup just to avoid declaring a new variable, but * it's certainly a new tuple */ oldtup = heap_form_tuple(RelationGetDescr(sdepRel), values, nulls); simple_heap_insert(sdepRel, oldtup); /* keep indexes current */ CatalogUpdateIndexes(sdepRel, oldtup); } if (oldtup) heap_freetuple(oldtup); }
/* * sepgsql_schema_post_create * * This routine assigns a default security label on a newly defined * schema. */ void sepgsql_schema_post_create(Oid namespaceId) { Relation rel; ScanKeyData skey; SysScanDesc sscan; HeapTuple tuple; char *tcontext; char *ncontext; const char *nsp_name; ObjectAddress object; Form_pg_namespace nspForm; StringInfoData audit_name; /* * Compute a default security label when we create a new schema object * under the working database. * * XXX - uncoming version of libselinux supports to take object name to * handle special treatment on default security label; such as special * label on "pg_temp" schema. */ rel = heap_open(NamespaceRelationId, AccessShareLock); ScanKeyInit(&skey, ObjectIdAttributeNumber, BTEqualStrategyNumber, F_OIDEQ, ObjectIdGetDatum(namespaceId)); sscan = systable_beginscan(rel, NamespaceOidIndexId, true, SnapshotSelf, 1, &skey); tuple = systable_getnext(sscan); if (!HeapTupleIsValid(tuple)) elog(ERROR, "catalog lookup failed for namespace %u", namespaceId); nspForm = (Form_pg_namespace) GETSTRUCT(tuple); nsp_name = NameStr(nspForm->nspname); if (strncmp(nsp_name, "pg_temp_", 8) == 0) nsp_name = "pg_temp"; else if (strncmp(nsp_name, "pg_toast_temp_", 14) == 0) nsp_name = "pg_toast_temp"; tcontext = sepgsql_get_label(DatabaseRelationId, MyDatabaseId, 0); ncontext = sepgsql_compute_create(sepgsql_get_client_label(), tcontext, SEPG_CLASS_DB_SCHEMA, nsp_name); /* * check db_schema:{create} */ initStringInfo(&audit_name); appendStringInfo(&audit_name, "%s", quote_identifier(nsp_name)); sepgsql_avc_check_perms_label(ncontext, SEPG_CLASS_DB_SCHEMA, SEPG_DB_SCHEMA__CREATE, audit_name.data, true); systable_endscan(sscan); heap_close(rel, AccessShareLock); /* * Assign the default security label on a new procedure */ object.classId = NamespaceRelationId; object.objectId = namespaceId; object.objectSubId = 0; SetSecurityLabel(&object, SEPGSQL_LABEL_TAG, ncontext); pfree(ncontext); pfree(tcontext); }
/* * Setup a ScanKey for a search in the relation 'rel' for a tuple 'key' that * is setup to match 'rel' (*NOT* idxrel!). * * Returns whether any column contains NULLs. * * This is not generic routine, it expects the idxrel to be replication * identity of a rel and meet all limitations associated with that. */ static bool build_replindex_scan_key(ScanKey skey, Relation rel, Relation idxrel, TupleTableSlot *searchslot) { int attoff; bool isnull; Datum indclassDatum; oidvector *opclass; int2vector *indkey = &idxrel->rd_index->indkey; bool hasnulls = false; Assert(RelationGetReplicaIndex(rel) == RelationGetRelid(idxrel)); indclassDatum = SysCacheGetAttr(INDEXRELID, idxrel->rd_indextuple, Anum_pg_index_indclass, &isnull); Assert(!isnull); opclass = (oidvector *) DatumGetPointer(indclassDatum); /* Build scankey for every attribute in the index. */ for (attoff = 0; attoff < RelationGetNumberOfAttributes(idxrel); attoff++) { Oid operator; Oid opfamily; RegProcedure regop; int pkattno = attoff + 1; int mainattno = indkey->values[attoff]; Oid optype = get_opclass_input_type(opclass->values[attoff]); /* * Load the operator info. We need this to get the equality operator * function for the scan key. */ opfamily = get_opclass_family(opclass->values[attoff]); operator = get_opfamily_member(opfamily, optype, optype, BTEqualStrategyNumber); if (!OidIsValid(operator)) elog(ERROR, "could not find member %d(%u,%u) of opfamily %u", BTEqualStrategyNumber, optype, optype, opfamily); regop = get_opcode(operator); /* Initialize the scankey. */ ScanKeyInit(&skey[attoff], pkattno, BTEqualStrategyNumber, regop, searchslot->tts_values[mainattno - 1]); /* Check for null value. */ if (searchslot->tts_isnull[mainattno - 1]) { hasnulls = true; skey[attoff].sk_flags |= SK_ISNULL; } } return hasnulls; }
/* * sepgsql_relation_post_create * * The post creation hook of relation/attribute */ void sepgsql_relation_post_create(Oid relOid) { Relation rel; ScanKeyData skey; SysScanDesc sscan; HeapTuple tuple; Form_pg_class classForm; ObjectAddress object; uint16 tclass; char *scontext; /* subject */ char *tcontext; /* schema */ char *rcontext; /* relation */ char *ccontext; /* column */ /* * Fetch catalog record of the new relation. Because pg_class entry is not * visible right now, we need to scan the catalog using SnapshotSelf. */ rel = heap_open(RelationRelationId, AccessShareLock); ScanKeyInit(&skey, ObjectIdAttributeNumber, BTEqualStrategyNumber, F_OIDEQ, ObjectIdGetDatum(relOid)); sscan = systable_beginscan(rel, ClassOidIndexId, true, SnapshotSelf, 1, &skey); tuple = systable_getnext(sscan); if (!HeapTupleIsValid(tuple)) elog(ERROR, "catalog lookup failed for relation %u", relOid); classForm = (Form_pg_class) GETSTRUCT(tuple); if (classForm->relkind == RELKIND_RELATION) tclass = SEPG_CLASS_DB_TABLE; else if (classForm->relkind == RELKIND_SEQUENCE) tclass = SEPG_CLASS_DB_SEQUENCE; else if (classForm->relkind == RELKIND_VIEW) tclass = SEPG_CLASS_DB_VIEW; else goto out; /* No need to assign individual labels */ /* * Compute a default security label when we create a new relation object * under the specified namespace. */ scontext = sepgsql_get_client_label(); tcontext = sepgsql_get_label(NamespaceRelationId, classForm->relnamespace, 0); rcontext = sepgsql_compute_create(scontext, tcontext, tclass); /* * Assign the default security label on the new relation */ object.classId = RelationRelationId; object.objectId = relOid; object.objectSubId = 0; SetSecurityLabel(&object, SEPGSQL_LABEL_TAG, rcontext); /* * We also assigns a default security label on columns of the new regular * tables. */ if (classForm->relkind == RELKIND_RELATION) { AttrNumber index; ccontext = sepgsql_compute_create(scontext, rcontext, SEPG_CLASS_DB_COLUMN); for (index = FirstLowInvalidHeapAttributeNumber + 1; index <= classForm->relnatts; index++) { if (index == InvalidAttrNumber) continue; if (index == ObjectIdAttributeNumber && !classForm->relhasoids) continue; object.classId = RelationRelationId; object.objectId = relOid; object.objectSubId = index; SetSecurityLabel(&object, SEPGSQL_LABEL_TAG, ccontext); } pfree(ccontext); } pfree(rcontext); out: systable_endscan(sscan); heap_close(rel, AccessShareLock); }
int inv_read(LargeObjectDesc *obj_desc, char *buf, int nbytes) { int nread = 0; int n; int off; int len; int32 pageno = (int32) (obj_desc->offset / LOBLKSIZE); uint32 pageoff; ScanKeyData skey[2]; SysScanDesc sd; HeapTuple tuple; Assert(PointerIsValid(obj_desc)); Assert(buf != NULL); if (nbytes <= 0) return 0; open_lo_relation(); ScanKeyInit(&skey[0], Anum_pg_largeobject_loid, BTEqualStrategyNumber, F_OIDEQ, ObjectIdGetDatum(obj_desc->id)); ScanKeyInit(&skey[1], Anum_pg_largeobject_pageno, BTGreaterEqualStrategyNumber, F_INT4GE, Int32GetDatum(pageno)); sd = systable_beginscan_ordered(lo_heap_r, lo_index_r, obj_desc->snapshot, 2, skey); while ((tuple = systable_getnext_ordered(sd, ForwardScanDirection)) != NULL) { Form_pg_largeobject data; bytea *datafield; bool pfreeit; if (HeapTupleHasNulls(tuple)) /* paranoia */ elog(ERROR, "null field found in pg_largeobject"); data = (Form_pg_largeobject) GETSTRUCT(tuple); /* * We expect the indexscan will deliver pages in order. However, * there may be missing pages if the LO contains unwritten "holes". We * want missing sections to read out as zeroes. */ pageoff = ((uint32) data->pageno) * LOBLKSIZE; if (pageoff > obj_desc->offset) { n = pageoff - obj_desc->offset; n = (n <= (nbytes - nread)) ? n : (nbytes - nread); MemSet(buf + nread, 0, n); nread += n; obj_desc->offset += n; } if (nread < nbytes) { Assert(obj_desc->offset >= pageoff); off = (int) (obj_desc->offset - pageoff); Assert(off >= 0 && off < LOBLKSIZE); datafield = &(data->data); /* see note at top of file */ pfreeit = false; if (VARATT_IS_EXTENDED(datafield)) { datafield = (bytea *) heap_tuple_untoast_attr((struct varlena *) datafield); pfreeit = true; } len = getbytealen(datafield); if (len > off) { n = len - off; n = (n <= (nbytes - nread)) ? n : (nbytes - nread); memcpy(buf + nread, VARDATA(datafield) + off, n); nread += n; obj_desc->offset += n; } if (pfreeit) pfree(datafield); } if (nread >= nbytes) break; } systable_endscan_ordered(sd); return nread; }
/* * regprocin - converts "proname" to proc OID * * We also accept a numeric OID, for symmetry with the output routine. * * '-' signifies unknown (OID 0). In all other cases, the input must * match an existing pg_proc entry. */ Datum regprocin(PG_FUNCTION_ARGS) { char *pro_name_or_oid = PG_GETARG_CSTRING(0); RegProcedure result = InvalidOid; List *names; FuncCandidateList clist; /* '-' ? */ if (strcmp(pro_name_or_oid, "-") == 0) PG_RETURN_OID(InvalidOid); /* Numeric OID? */ if (pro_name_or_oid[0] >= '0' && pro_name_or_oid[0] <= '9' && strspn(pro_name_or_oid, "0123456789") == strlen(pro_name_or_oid)) { result = DatumGetObjectId(DirectFunctionCall1(oidin, CStringGetDatum(pro_name_or_oid))); PG_RETURN_OID(result); } /* Else it's a name, possibly schema-qualified */ /* * In bootstrap mode we assume the given name is not schema-qualified, and * just search pg_proc for a unique match. This is needed for * initializing other system catalogs (pg_namespace may not exist yet, and * certainly there are no schemas other than pg_catalog). */ if (IsBootstrapProcessingMode()) { int matches = 0; Relation hdesc; ScanKeyData skey[1]; SysScanDesc sysscan; HeapTuple tuple; ScanKeyInit(&skey[0], Anum_pg_proc_proname, BTEqualStrategyNumber, F_NAMEEQ, CStringGetDatum(pro_name_or_oid)); hdesc = heap_open(ProcedureRelationId, AccessShareLock); sysscan = systable_beginscan(hdesc, ProcedureNameArgsNspIndexId, true, SnapshotNow, 1, skey); while (HeapTupleIsValid(tuple = systable_getnext(sysscan))) { result = (RegProcedure) HeapTupleGetOid(tuple); if (++matches > 1) break; } systable_endscan(sysscan); heap_close(hdesc, AccessShareLock); if (matches == 0) ereport(ERROR, (errcode(ERRCODE_UNDEFINED_FUNCTION), errmsg("function \"%s\" does not exist", pro_name_or_oid))); else if (matches > 1) ereport(ERROR, (errcode(ERRCODE_AMBIGUOUS_FUNCTION), errmsg("more than one function named \"%s\"", pro_name_or_oid))); PG_RETURN_OID(result); } /* * Normal case: parse the name into components and see if it matches any * pg_proc entries in the current search path. */ names = stringToQualifiedNameList(pro_name_or_oid); clist = FuncnameGetCandidates(names, -1, NIL, false, false); if (clist == NULL) ereport(ERROR, (errcode(ERRCODE_UNDEFINED_FUNCTION), errmsg("function \"%s\" does not exist", pro_name_or_oid))); else if (clist->next != NULL) ereport(ERROR, (errcode(ERRCODE_AMBIGUOUS_FUNCTION), errmsg("more than one function named \"%s\"", pro_name_or_oid))); result = clist->oid; PG_RETURN_OID(result); }
void inv_truncate(LargeObjectDesc *obj_desc, int len) { int32 pageno = (int32) (len / LOBLKSIZE); int off; ScanKeyData skey[2]; SysScanDesc sd; HeapTuple oldtuple; Form_pg_largeobject olddata; struct { bytea hdr; char data[LOBLKSIZE]; /* make struct big enough */ int32 align_it; /* ensure struct is aligned well enough */ } workbuf; char *workb = VARDATA(&workbuf.hdr); HeapTuple newtup; Datum values[Natts_pg_largeobject]; bool nulls[Natts_pg_largeobject]; bool replace[Natts_pg_largeobject]; CatalogIndexState indstate; Assert(PointerIsValid(obj_desc)); /* enforce writability because snapshot is probably wrong otherwise */ if ((obj_desc->flags & IFS_WRLOCK) == 0) ereport(ERROR, (errcode(ERRCODE_OBJECT_NOT_IN_PREREQUISITE_STATE), errmsg("large object %u was not opened for writing", obj_desc->id))); /* check existence of the target largeobject */ if (!LargeObjectExists(obj_desc->id)) ereport(ERROR, (errcode(ERRCODE_UNDEFINED_OBJECT), errmsg("large object %u was already dropped", obj_desc->id))); open_lo_relation(); indstate = CatalogOpenIndexes(lo_heap_r); /* * Set up to find all pages with desired loid and pageno >= target */ ScanKeyInit(&skey[0], Anum_pg_largeobject_loid, BTEqualStrategyNumber, F_OIDEQ, ObjectIdGetDatum(obj_desc->id)); ScanKeyInit(&skey[1], Anum_pg_largeobject_pageno, BTGreaterEqualStrategyNumber, F_INT4GE, Int32GetDatum(pageno)); sd = systable_beginscan_ordered(lo_heap_r, lo_index_r, obj_desc->snapshot, 2, skey); /* * If possible, get the page the truncation point is in. The truncation * point may be beyond the end of the LO or in a hole. */ olddata = NULL; if ((oldtuple = systable_getnext_ordered(sd, ForwardScanDirection)) != NULL) { if (HeapTupleHasNulls(oldtuple)) /* paranoia */ elog(ERROR, "null field found in pg_largeobject"); olddata = (Form_pg_largeobject) GETSTRUCT(oldtuple); Assert(olddata->pageno >= pageno); } /* * If we found the page of the truncation point we need to truncate the * data in it. Otherwise if we're in a hole, we need to create a page to * mark the end of data. */ if (olddata != NULL && olddata->pageno == pageno) { /* First, load old data into workbuf */ bytea *datafield = &(olddata->data); /* see note at top of * file */ bool pfreeit = false; int pagelen; if (VARATT_IS_EXTENDED(datafield)) { datafield = (bytea *) heap_tuple_untoast_attr((struct varlena *) datafield); pfreeit = true; } pagelen = getbytealen(datafield); Assert(pagelen <= LOBLKSIZE); memcpy(workb, VARDATA(datafield), pagelen); if (pfreeit) pfree(datafield); /* * Fill any hole */ off = len % LOBLKSIZE; if (off > pagelen) MemSet(workb + pagelen, 0, off - pagelen); /* compute length of new page */ SET_VARSIZE(&workbuf.hdr, off + VARHDRSZ); /* * Form and insert updated tuple */ memset(values, 0, sizeof(values)); memset(nulls, false, sizeof(nulls)); memset(replace, false, sizeof(replace)); values[Anum_pg_largeobject_data - 1] = PointerGetDatum(&workbuf); replace[Anum_pg_largeobject_data - 1] = true; newtup = heap_modify_tuple(oldtuple, RelationGetDescr(lo_heap_r), values, nulls, replace); simple_heap_update(lo_heap_r, &newtup->t_self, newtup); CatalogIndexInsert(indstate, newtup); heap_freetuple(newtup); } else { /* * If the first page we found was after the truncation point, we're in * a hole that we'll fill, but we need to delete the later page * because the loop below won't visit it again. */ if (olddata != NULL) { Assert(olddata->pageno > pageno); simple_heap_delete(lo_heap_r, &oldtuple->t_self); } /* * Write a brand new page. * * Fill the hole up to the truncation point */ off = len % LOBLKSIZE; if (off > 0) MemSet(workb, 0, off); /* compute length of new page */ SET_VARSIZE(&workbuf.hdr, off + VARHDRSZ); /* * Form and insert new tuple */ memset(values, 0, sizeof(values)); memset(nulls, false, sizeof(nulls)); values[Anum_pg_largeobject_loid - 1] = ObjectIdGetDatum(obj_desc->id); values[Anum_pg_largeobject_pageno - 1] = Int32GetDatum(pageno); values[Anum_pg_largeobject_data - 1] = PointerGetDatum(&workbuf); newtup = heap_form_tuple(lo_heap_r->rd_att, values, nulls); simple_heap_insert(lo_heap_r, newtup); CatalogIndexInsert(indstate, newtup); heap_freetuple(newtup); } /* * Delete any pages after the truncation point. If the initial search * didn't find a page, then of course there's nothing more to do. */ if (olddata != NULL) { while ((oldtuple = systable_getnext_ordered(sd, ForwardScanDirection)) != NULL) { simple_heap_delete(lo_heap_r, &oldtuple->t_self); } } systable_endscan_ordered(sd); CatalogCloseIndexes(indstate); /* * Advance command counter so that tuple updates will be seen by later * large-object operations in this transaction. */ CommandCounterIncrement(); }
/* * regclassin - converts "classname" to class OID * * We also accept a numeric OID, for symmetry with the output routine. * * '-' signifies unknown (OID 0). In all other cases, the input must * match an existing pg_class entry. */ Datum regclassin(PG_FUNCTION_ARGS) { char *class_name_or_oid = PG_GETARG_CSTRING(0); Oid result = InvalidOid; List *names; /* '-' ? */ if (strcmp(class_name_or_oid, "-") == 0) PG_RETURN_OID(InvalidOid); /* Numeric OID? */ if (class_name_or_oid[0] >= '0' && class_name_or_oid[0] <= '9' && strspn(class_name_or_oid, "0123456789") == strlen(class_name_or_oid)) { result = DatumGetObjectId(DirectFunctionCall1(oidin, CStringGetDatum(class_name_or_oid))); PG_RETURN_OID(result); } /* Else it's a name, possibly schema-qualified */ /* * In bootstrap mode we assume the given name is not schema-qualified, and * just search pg_class for a match. This is needed for initializing * other system catalogs (pg_namespace may not exist yet, and certainly * there are no schemas other than pg_catalog). */ if (IsBootstrapProcessingMode()) { Relation hdesc; ScanKeyData skey[1]; SysScanDesc sysscan; HeapTuple tuple; ScanKeyInit(&skey[0], Anum_pg_class_relname, BTEqualStrategyNumber, F_NAMEEQ, CStringGetDatum(class_name_or_oid)); hdesc = heap_open(RelationRelationId, AccessShareLock); sysscan = systable_beginscan(hdesc, ClassNameNspIndexId, true, SnapshotNow, 1, skey); if (HeapTupleIsValid(tuple = systable_getnext(sysscan))) result = HeapTupleGetOid(tuple); else ereport(ERROR, (errcode(ERRCODE_UNDEFINED_TABLE), errmsg("relation \"%s\" does not exist", class_name_or_oid))); /* We assume there can be only one match */ systable_endscan(sysscan); heap_close(hdesc, AccessShareLock); PG_RETURN_OID(result); } /* * Normal case: parse the name into components and see if it matches any * pg_class entries in the current search path. */ names = stringToQualifiedNameList(class_name_or_oid); /* We might not even have permissions on this relation; don't lock it. */ result = RangeVarGetRelid(makeRangeVarFromNameList(names), NoLock, false); PG_RETURN_OID(result); }
/* * Determine whether a relation can be proven functionally dependent on * a set of grouping columns. If so, return TRUE and add the pg_constraint * OIDs of the constraints needed for the proof to the *constraintDeps list. * * grouping_columns is a list of grouping expressions, in which columns of * the rel of interest are Vars with the indicated varno/varlevelsup. * * Currently we only check to see if the rel has a primary key that is a * subset of the grouping_columns. We could also use plain unique constraints * if all their columns are known not null, but there's a problem: we need * to be able to represent the not-null-ness as part of the constraints added * to *constraintDeps. FIXME whenever not-null constraints get represented * in pg_constraint. */ bool check_functional_grouping(Oid relid, Index varno, Index varlevelsup, List *grouping_columns, List **constraintDeps) { bool result = false; Relation pg_constraint; HeapTuple tuple; SysScanDesc scan; ScanKeyData skey[1]; /* Scan pg_constraint for constraints of the target rel */ pg_constraint = heap_open(ConstraintRelationId, AccessShareLock); ScanKeyInit(&skey[0], Anum_pg_constraint_conrelid, BTEqualStrategyNumber, F_OIDEQ, ObjectIdGetDatum(relid)); scan = systable_beginscan(pg_constraint, ConstraintRelidIndexId, true, SnapshotNow, 1, skey); while (HeapTupleIsValid(tuple = systable_getnext(scan))) { Form_pg_constraint con = (Form_pg_constraint) GETSTRUCT(tuple); Datum adatum; bool isNull; ArrayType *arr; int16 *attnums; int numkeys; int i; bool found_col; /* Only PK constraints are of interest for now, see comment above */ if (con->contype != CONSTRAINT_PRIMARY) continue; /* Constraint must be non-deferrable */ if (con->condeferrable) continue; /* Extract the conkey array, ie, attnums of PK's columns */ adatum = heap_getattr(tuple, Anum_pg_constraint_conkey, RelationGetDescr(pg_constraint), &isNull); if (isNull) elog(ERROR, "null conkey for constraint %u", HeapTupleGetOid(tuple)); arr = DatumGetArrayTypeP(adatum); /* ensure not toasted */ numkeys = ARR_DIMS(arr)[0]; if (ARR_NDIM(arr) != 1 || numkeys < 0 || ARR_HASNULL(arr) || ARR_ELEMTYPE(arr) != INT2OID) elog(ERROR, "conkey is not a 1-D smallint array"); attnums = (int16 *) ARR_DATA_PTR(arr); found_col = false; for (i = 0; i < numkeys; i++) { AttrNumber attnum = attnums[i]; ListCell *gl; found_col = false; foreach(gl, grouping_columns) { Var *gvar = (Var *) lfirst(gl); if (IsA(gvar, Var) && gvar->varno == varno && gvar->varlevelsup == varlevelsup && gvar->varattno == attnum) { found_col = true; break; } } if (!found_col) break; } if (found_col) { /* The PK is a subset of grouping_columns, so we win */ *constraintDeps = lappend_oid(*constraintDeps, HeapTupleGetOid(tuple)); result = true; break; } }
/* * Test whether an object exists. */ static bool object_exists(ObjectAddress address) { int cache = -1; Oid indexoid = InvalidOid; Relation rel; ScanKeyData skey[1]; SysScanDesc sd; bool found; /* Sub-objects require special treatment. */ if (address.objectSubId != 0) { HeapTuple atttup; /* Currently, attributes are the only sub-objects. */ Assert(address.classId == RelationRelationId); atttup = SearchSysCache2(ATTNUM, ObjectIdGetDatum(address.objectId), Int16GetDatum(address.objectSubId)); if (!HeapTupleIsValid(atttup)) found = false; else { found = ((Form_pg_attribute) GETSTRUCT(atttup))->attisdropped; ReleaseSysCache(atttup); } return found; } /* * For object types that have a relevant syscache, we use it; for * everything else, we'll have to do an index-scan. This switch * sets either the cache to be used for the syscache lookup, or the * index to be used for the index scan. */ switch (address.classId) { case RelationRelationId: cache = RELOID; break; case RewriteRelationId: indexoid = RewriteOidIndexId; break; case TriggerRelationId: indexoid = TriggerOidIndexId; break; case ConstraintRelationId: cache = CONSTROID; break; case DatabaseRelationId: cache = DATABASEOID; break; case TableSpaceRelationId: cache = TABLESPACEOID; break; case AuthIdRelationId: cache = AUTHOID; break; case NamespaceRelationId: cache = NAMESPACEOID; break; case LanguageRelationId: cache = LANGOID; break; case TypeRelationId: cache = TYPEOID; break; case ProcedureRelationId: cache = PROCOID; break; case OperatorRelationId: cache = OPEROID; break; case CollationRelationId: cache = COLLOID; break; case ConversionRelationId: cache = CONVOID; break; case OperatorClassRelationId: cache = CLAOID; break; case OperatorFamilyRelationId: cache = OPFAMILYOID; break; case LargeObjectRelationId: /* * Weird backward compatibility hack: ObjectAddress notation uses * LargeObjectRelationId for large objects, but since PostgreSQL * 9.0, the relevant catalog is actually * LargeObjectMetadataRelationId. */ address.classId = LargeObjectMetadataRelationId; indexoid = LargeObjectMetadataOidIndexId; break; case CastRelationId: indexoid = CastOidIndexId; break; case TSParserRelationId: cache = TSPARSEROID; break; case TSDictionaryRelationId: cache = TSDICTOID; break; case TSTemplateRelationId: cache = TSTEMPLATEOID; break; case TSConfigRelationId: cache = TSCONFIGOID; break; case ExtensionRelationId: indexoid = ExtensionOidIndexId; break; default: elog(ERROR, "unrecognized classid: %u", address.classId); } /* Found a syscache? */ if (cache != -1) return SearchSysCacheExists1(cache, ObjectIdGetDatum(address.objectId)); /* No syscache, so examine the table directly. */ Assert(OidIsValid(indexoid)); ScanKeyInit(&skey[0], ObjectIdAttributeNumber, BTEqualStrategyNumber, F_OIDEQ, ObjectIdGetDatum(address.objectId)); rel = heap_open(address.classId, AccessShareLock); sd = systable_beginscan(rel, indexoid, true, SnapshotNow, 1, skey); found = HeapTupleIsValid(systable_getnext(sd)); systable_endscan(sd); heap_close(rel, AccessShareLock); return found; }