/* * inv_open -- access an existing large object. * * Returns: * large object descriptor, appropriately filled in. */ LargeObjectDesc * inv_open(Oid lobjId, int flags) { LargeObjectDesc *retval; if (!LargeObjectExists(lobjId)) ereport(ERROR, (errcode(ERRCODE_UNDEFINED_OBJECT), errmsg("large object %u does not exist", lobjId))); retval = (LargeObjectDesc *) palloc(sizeof(LargeObjectDesc)); retval->id = lobjId; retval->subid = GetCurrentSubTransactionId(); retval->offset = 0; if (flags & INV_WRITE) retval->flags = IFS_WRLOCK | IFS_RDLOCK; else if (flags & INV_READ) retval->flags = IFS_RDLOCK; else elog(ERROR, "invalid flags: %d", flags); return retval; }
/* * inv_create -- create a new large object. * * Arguments: * flags * * Returns: * large object descriptor, appropriately filled in. */ LargeObjectDesc * inv_create(int flags) { Oid file_oid; LargeObjectDesc *retval; /* * Allocate an OID to be the LO's identifier. */ file_oid = newoid(); /* Check for duplicate (shouldn't happen) */ if (LargeObjectExists(file_oid)) elog(ERROR, "large object %u already exists", file_oid); /* * Create the LO by writing an empty first page for it in * pg_largeobject */ LargeObjectCreate(file_oid); /* * Advance command counter so that new tuple will be seen by later * large-object operations in this transaction. */ CommandCounterIncrement(); /* * Prepare LargeObjectDesc data structure for accessing LO */ retval = (LargeObjectDesc *) palloc(sizeof(LargeObjectDesc)); retval->id = file_oid; retval->subid = GetCurrentSubTransactionId(); retval->offset = 0; if (flags & INV_WRITE) retval->flags = IFS_WRLOCK | IFS_RDLOCK; else if (flags & INV_READ) retval->flags = IFS_RDLOCK; else elog(ERROR, "invalid flags: %d", flags); return retval; }
/* * Translate an object name and arguments (as passed by the parser) to an * ObjectAddress. * * The returned object will be locked using the specified lockmode. If a * sub-object is looked up, the parent object will be locked instead. * * If the object is a relation or a child object of a relation (e.g. an * attribute or contraint), the relation is also opened and *relp receives * the open relcache entry pointer; otherwise, *relp is set to NULL. This * is a bit grotty but it makes life simpler, since the caller will * typically need the relcache entry too. Caller must close the relcache * entry when done with it. The relation is locked with the specified lockmode * if the target object is the relation itself or an attribute, but for other * child objects, only AccessShareLock is acquired on the relation. * * We don't currently provide a function to release the locks acquired here; * typically, the lock must be held until commit to guard against a concurrent * drop operation. */ ObjectAddress get_object_address(ObjectType objtype, List *objname, List *objargs, Relation *relp, LOCKMODE lockmode) { ObjectAddress address; Relation relation = NULL; /* Some kind of lock must be taken. */ Assert(lockmode != NoLock); switch (objtype) { case OBJECT_INDEX: case OBJECT_SEQUENCE: case OBJECT_TABLE: case OBJECT_VIEW: case OBJECT_FOREIGN_TABLE: relation = get_relation_by_qualified_name(objtype, objname, lockmode); address.classId = RelationRelationId; address.objectId = RelationGetRelid(relation); address.objectSubId = 0; break; case OBJECT_COLUMN: address = get_object_address_attribute(objtype, objname, &relation, lockmode); break; case OBJECT_RULE: case OBJECT_TRIGGER: case OBJECT_CONSTRAINT: address = get_object_address_relobject(objtype, objname, &relation); break; case OBJECT_DATABASE: case OBJECT_EXTENSION: case OBJECT_TABLESPACE: case OBJECT_ROLE: case OBJECT_SCHEMA: case OBJECT_LANGUAGE: case OBJECT_FDW: case OBJECT_FOREIGN_SERVER: address = get_object_address_unqualified(objtype, objname); break; case OBJECT_TYPE: case OBJECT_DOMAIN: address.classId = TypeRelationId; address.objectId = typenameTypeId(NULL, makeTypeNameFromNameList(objname)); address.objectSubId = 0; break; case OBJECT_AGGREGATE: address.classId = ProcedureRelationId; address.objectId = LookupAggNameTypeNames(objname, objargs, false); address.objectSubId = 0; break; case OBJECT_FUNCTION: address.classId = ProcedureRelationId; address.objectId = LookupFuncNameTypeNames(objname, objargs, false); address.objectSubId = 0; break; case OBJECT_OPERATOR: Assert(list_length(objargs) == 2); address.classId = OperatorRelationId; address.objectId = LookupOperNameTypeNames(NULL, objname, (TypeName *) linitial(objargs), (TypeName *) lsecond(objargs), false, -1); address.objectSubId = 0; break; case OBJECT_COLLATION: address.classId = CollationRelationId; address.objectId = get_collation_oid(objname, false); address.objectSubId = 0; break; case OBJECT_CONVERSION: address.classId = ConversionRelationId; address.objectId = get_conversion_oid(objname, false); address.objectSubId = 0; break; case OBJECT_OPCLASS: case OBJECT_OPFAMILY: address = get_object_address_opcf(objtype, objname, objargs); break; case OBJECT_LARGEOBJECT: Assert(list_length(objname) == 1); address.classId = LargeObjectRelationId; address.objectId = oidparse(linitial(objname)); address.objectSubId = 0; if (!LargeObjectExists(address.objectId)) ereport(ERROR, (errcode(ERRCODE_UNDEFINED_OBJECT), errmsg("large object %u does not exist", address.objectId))); break; case OBJECT_CAST: { TypeName *sourcetype = (TypeName *) linitial(objname); TypeName *targettype = (TypeName *) linitial(objargs); Oid sourcetypeid = typenameTypeId(NULL, sourcetype); Oid targettypeid = typenameTypeId(NULL, targettype); address.classId = CastRelationId; address.objectId = get_cast_oid(sourcetypeid, targettypeid, false); address.objectSubId = 0; } break; case OBJECT_TSPARSER: address.classId = TSParserRelationId; address.objectId = get_ts_parser_oid(objname, false); address.objectSubId = 0; break; case OBJECT_TSDICTIONARY: address.classId = TSDictionaryRelationId; address.objectId = get_ts_dict_oid(objname, false); address.objectSubId = 0; break; case OBJECT_TSTEMPLATE: address.classId = TSTemplateRelationId; address.objectId = get_ts_template_oid(objname, false); address.objectSubId = 0; break; case OBJECT_TSCONFIGURATION: address.classId = TSConfigRelationId; address.objectId = get_ts_config_oid(objname, false); address.objectSubId = 0; break; default: elog(ERROR, "unrecognized objtype: %d", (int) objtype); /* placate compiler, in case it thinks elog might return */ address.classId = InvalidOid; address.objectId = InvalidOid; address.objectSubId = 0; } /* * If we're dealing with a relation or attribute, then the relation is * already locked. If we're dealing with any other type of object, we need * to lock it and then verify that it still exists. */ if (address.classId != RelationRelationId) { if (IsSharedRelation(address.classId)) LockSharedObject(address.classId, address.objectId, 0, lockmode); else LockDatabaseObject(address.classId, address.objectId, 0, lockmode); /* Did it go away while we were waiting for the lock? */ if (!object_exists(address)) elog(ERROR, "cache lookup failed for class %u object %u subobj %d", address.classId, address.objectId, address.objectSubId); } /* Return the object address and the relation. */ *relp = relation; return address; }
void inv_truncate(LargeObjectDesc *obj_desc, int len) { int32 pageno = (int32) (len / LOBLKSIZE); int off; ScanKeyData skey[2]; SysScanDesc sd; HeapTuple oldtuple; Form_pg_largeobject olddata; struct { bytea hdr; char data[LOBLKSIZE]; /* make struct big enough */ int32 align_it; /* ensure struct is aligned well enough */ } workbuf; char *workb = VARDATA(&workbuf.hdr); HeapTuple newtup; Datum values[Natts_pg_largeobject]; bool nulls[Natts_pg_largeobject]; bool replace[Natts_pg_largeobject]; CatalogIndexState indstate; Assert(PointerIsValid(obj_desc)); /* enforce writability because snapshot is probably wrong otherwise */ if ((obj_desc->flags & IFS_WRLOCK) == 0) ereport(ERROR, (errcode(ERRCODE_OBJECT_NOT_IN_PREREQUISITE_STATE), errmsg("large object %u was not opened for writing", obj_desc->id))); /* check existence of the target largeobject */ if (!LargeObjectExists(obj_desc->id)) ereport(ERROR, (errcode(ERRCODE_UNDEFINED_OBJECT), errmsg("large object %u was already dropped", obj_desc->id))); open_lo_relation(); indstate = CatalogOpenIndexes(lo_heap_r); /* * Set up to find all pages with desired loid and pageno >= target */ ScanKeyInit(&skey[0], Anum_pg_largeobject_loid, BTEqualStrategyNumber, F_OIDEQ, ObjectIdGetDatum(obj_desc->id)); ScanKeyInit(&skey[1], Anum_pg_largeobject_pageno, BTGreaterEqualStrategyNumber, F_INT4GE, Int32GetDatum(pageno)); sd = systable_beginscan_ordered(lo_heap_r, lo_index_r, obj_desc->snapshot, 2, skey); /* * If possible, get the page the truncation point is in. The truncation * point may be beyond the end of the LO or in a hole. */ olddata = NULL; if ((oldtuple = systable_getnext_ordered(sd, ForwardScanDirection)) != NULL) { if (HeapTupleHasNulls(oldtuple)) /* paranoia */ elog(ERROR, "null field found in pg_largeobject"); olddata = (Form_pg_largeobject) GETSTRUCT(oldtuple); Assert(olddata->pageno >= pageno); } /* * If we found the page of the truncation point we need to truncate the * data in it. Otherwise if we're in a hole, we need to create a page to * mark the end of data. */ if (olddata != NULL && olddata->pageno == pageno) { /* First, load old data into workbuf */ bytea *datafield = &(olddata->data); /* see note at top of * file */ bool pfreeit = false; int pagelen; if (VARATT_IS_EXTENDED(datafield)) { datafield = (bytea *) heap_tuple_untoast_attr((struct varlena *) datafield); pfreeit = true; } pagelen = getbytealen(datafield); Assert(pagelen <= LOBLKSIZE); memcpy(workb, VARDATA(datafield), pagelen); if (pfreeit) pfree(datafield); /* * Fill any hole */ off = len % LOBLKSIZE; if (off > pagelen) MemSet(workb + pagelen, 0, off - pagelen); /* compute length of new page */ SET_VARSIZE(&workbuf.hdr, off + VARHDRSZ); /* * Form and insert updated tuple */ memset(values, 0, sizeof(values)); memset(nulls, false, sizeof(nulls)); memset(replace, false, sizeof(replace)); values[Anum_pg_largeobject_data - 1] = PointerGetDatum(&workbuf); replace[Anum_pg_largeobject_data - 1] = true; newtup = heap_modify_tuple(oldtuple, RelationGetDescr(lo_heap_r), values, nulls, replace); simple_heap_update(lo_heap_r, &newtup->t_self, newtup); CatalogIndexInsert(indstate, newtup); heap_freetuple(newtup); } else { /* * If the first page we found was after the truncation point, we're in * a hole that we'll fill, but we need to delete the later page * because the loop below won't visit it again. */ if (olddata != NULL) { Assert(olddata->pageno > pageno); simple_heap_delete(lo_heap_r, &oldtuple->t_self); } /* * Write a brand new page. * * Fill the hole up to the truncation point */ off = len % LOBLKSIZE; if (off > 0) MemSet(workb, 0, off); /* compute length of new page */ SET_VARSIZE(&workbuf.hdr, off + VARHDRSZ); /* * Form and insert new tuple */ memset(values, 0, sizeof(values)); memset(nulls, false, sizeof(nulls)); values[Anum_pg_largeobject_loid - 1] = ObjectIdGetDatum(obj_desc->id); values[Anum_pg_largeobject_pageno - 1] = Int32GetDatum(pageno); values[Anum_pg_largeobject_data - 1] = PointerGetDatum(&workbuf); newtup = heap_form_tuple(lo_heap_r->rd_att, values, nulls); simple_heap_insert(lo_heap_r, newtup); CatalogIndexInsert(indstate, newtup); heap_freetuple(newtup); } /* * Delete any pages after the truncation point. If the initial search * didn't find a page, then of course there's nothing more to do. */ if (olddata != NULL) { while ((oldtuple = systable_getnext_ordered(sd, ForwardScanDirection)) != NULL) { simple_heap_delete(lo_heap_r, &oldtuple->t_self); } } systable_endscan_ordered(sd); CatalogCloseIndexes(indstate); /* * Advance command counter so that tuple updates will be seen by later * large-object operations in this transaction. */ CommandCounterIncrement(); }
int inv_write(LargeObjectDesc *obj_desc, const char *buf, int nbytes) { int nwritten = 0; int n; int off; int len; int32 pageno = (int32) (obj_desc->offset / LOBLKSIZE); ScanKeyData skey[2]; SysScanDesc sd; HeapTuple oldtuple; Form_pg_largeobject olddata; bool neednextpage; bytea *datafield; bool pfreeit; struct { bytea hdr; char data[LOBLKSIZE]; /* make struct big enough */ int32 align_it; /* ensure struct is aligned well enough */ } workbuf; char *workb = VARDATA(&workbuf.hdr); HeapTuple newtup; Datum values[Natts_pg_largeobject]; bool nulls[Natts_pg_largeobject]; bool replace[Natts_pg_largeobject]; CatalogIndexState indstate; Assert(PointerIsValid(obj_desc)); Assert(buf != NULL); /* enforce writability because snapshot is probably wrong otherwise */ if ((obj_desc->flags & IFS_WRLOCK) == 0) ereport(ERROR, (errcode(ERRCODE_OBJECT_NOT_IN_PREREQUISITE_STATE), errmsg("large object %u was not opened for writing", obj_desc->id))); /* check existence of the target largeobject */ if (!LargeObjectExists(obj_desc->id)) ereport(ERROR, (errcode(ERRCODE_UNDEFINED_OBJECT), errmsg("large object %u was already dropped", obj_desc->id))); if (nbytes <= 0) return 0; open_lo_relation(); indstate = CatalogOpenIndexes(lo_heap_r); ScanKeyInit(&skey[0], Anum_pg_largeobject_loid, BTEqualStrategyNumber, F_OIDEQ, ObjectIdGetDatum(obj_desc->id)); ScanKeyInit(&skey[1], Anum_pg_largeobject_pageno, BTGreaterEqualStrategyNumber, F_INT4GE, Int32GetDatum(pageno)); sd = systable_beginscan_ordered(lo_heap_r, lo_index_r, obj_desc->snapshot, 2, skey); oldtuple = NULL; olddata = NULL; neednextpage = true; while (nwritten < nbytes) { /* * If possible, get next pre-existing page of the LO. We expect the * indexscan will deliver these in order --- but there may be holes. */ if (neednextpage) { if ((oldtuple = systable_getnext_ordered(sd, ForwardScanDirection)) != NULL) { if (HeapTupleHasNulls(oldtuple)) /* paranoia */ elog(ERROR, "null field found in pg_largeobject"); olddata = (Form_pg_largeobject) GETSTRUCT(oldtuple); Assert(olddata->pageno >= pageno); } neednextpage = false; } /* * If we have a pre-existing page, see if it is the page we want to * write, or a later one. */ if (olddata != NULL && olddata->pageno == pageno) { /* * Update an existing page with fresh data. * * First, load old data into workbuf */ datafield = &(olddata->data); /* see note at top of file */ pfreeit = false; if (VARATT_IS_EXTENDED(datafield)) { datafield = (bytea *) heap_tuple_untoast_attr((struct varlena *) datafield); pfreeit = true; } len = getbytealen(datafield); Assert(len <= LOBLKSIZE); memcpy(workb, VARDATA(datafield), len); if (pfreeit) pfree(datafield); /* * Fill any hole */ off = (int) (obj_desc->offset % LOBLKSIZE); if (off > len) MemSet(workb + len, 0, off - len); /* * Insert appropriate portion of new data */ n = LOBLKSIZE - off; n = (n <= (nbytes - nwritten)) ? n : (nbytes - nwritten); memcpy(workb + off, buf + nwritten, n); nwritten += n; obj_desc->offset += n; off += n; /* compute valid length of new page */ len = (len >= off) ? len : off; SET_VARSIZE(&workbuf.hdr, len + VARHDRSZ); /* * Form and insert updated tuple */ memset(values, 0, sizeof(values)); memset(nulls, false, sizeof(nulls)); memset(replace, false, sizeof(replace)); values[Anum_pg_largeobject_data - 1] = PointerGetDatum(&workbuf); replace[Anum_pg_largeobject_data - 1] = true; newtup = heap_modify_tuple(oldtuple, RelationGetDescr(lo_heap_r), values, nulls, replace); simple_heap_update(lo_heap_r, &newtup->t_self, newtup); CatalogIndexInsert(indstate, newtup); heap_freetuple(newtup); /* * We're done with this old page. */ oldtuple = NULL; olddata = NULL; neednextpage = true; } else { /* * Write a brand new page. * * First, fill any hole */ off = (int) (obj_desc->offset % LOBLKSIZE); if (off > 0) MemSet(workb, 0, off); /* * Insert appropriate portion of new data */ n = LOBLKSIZE - off; n = (n <= (nbytes - nwritten)) ? n : (nbytes - nwritten); memcpy(workb + off, buf + nwritten, n); nwritten += n; obj_desc->offset += n; /* compute valid length of new page */ len = off + n; SET_VARSIZE(&workbuf.hdr, len + VARHDRSZ); /* * Form and insert updated tuple */ memset(values, 0, sizeof(values)); memset(nulls, false, sizeof(nulls)); values[Anum_pg_largeobject_loid - 1] = ObjectIdGetDatum(obj_desc->id); values[Anum_pg_largeobject_pageno - 1] = Int32GetDatum(pageno); values[Anum_pg_largeobject_data - 1] = PointerGetDatum(&workbuf); newtup = heap_form_tuple(lo_heap_r->rd_att, values, nulls); simple_heap_insert(lo_heap_r, newtup); CatalogIndexInsert(indstate, newtup); heap_freetuple(newtup); } pageno++; } systable_endscan_ordered(sd); CatalogCloseIndexes(indstate); /* * Advance command counter so that my tuple updates will be seen by later * large-object operations in this transaction. */ CommandCounterIncrement(); return nwritten; }