/* * inv_create -- create a new large object * * Arguments: * lobjId - OID to use for new large object, or InvalidOid to pick one * * Returns: * OID of new object * * If lobjId is not InvalidOid, then an error occurs if the OID is already * in use. */ Oid inv_create(Oid lobjId) { /* * Allocate an OID to be the LO's identifier, unless we were told what to * use. We can use the index on pg_largeobject for checking OID * uniqueness, even though it has additional columns besides OID. */ if (!OidIsValid(lobjId)) { open_lo_relation(); lobjId = GetNewOidWithIndex(lo_heap_r, lo_index_r); } /* * Create the LO by writing an empty first page for it in pg_largeobject * (will fail if duplicate) */ LargeObjectCreate(lobjId); /* * Advance command counter to make new tuple visible to later operations. */ CommandCounterIncrement(); return lobjId; }
/* * Determine size of a large object * * NOTE: LOs can contain gaps, just like Unix files. We actually return * the offset of the last byte + 1. */ static uint32 inv_getsize(LargeObjectDesc *obj_desc) { bool found = false; uint32 lastbyte = 0; ScanKeyData skey[1]; IndexScanDesc sd; HeapTuple tuple; Assert(PointerIsValid(obj_desc)); open_lo_relation(); ScanKeyInit(&skey[0], Anum_pg_largeobject_loid, BTEqualStrategyNumber, F_OIDEQ, ObjectIdGetDatum(obj_desc->id)); sd = index_beginscan(lo_heap_r, lo_index_r, obj_desc->snapshot, 1, skey); /* * Because the pg_largeobject index is on both loid and pageno, but we * constrain only loid, a backwards scan should visit all pages of the * large object in reverse pageno order. So, it's sufficient to examine * the first valid tuple (== last valid page). */ while ((tuple = index_getnext(sd, BackwardScanDirection)) != NULL) { Form_pg_largeobject data; bytea *datafield; bool pfreeit; found = true; if (HeapTupleHasNulls(tuple)) /* paranoia */ elog(ERROR, "null field found in pg_largeobject"); data = (Form_pg_largeobject) GETSTRUCT(tuple); datafield = &(data->data); /* see note at top of file */ pfreeit = false; if (VARATT_IS_EXTENDED(datafield)) { datafield = (bytea *) heap_tuple_untoast_attr((struct varlena *) datafield); pfreeit = true; } lastbyte = data->pageno * LOBLKSIZE + getbytealen(datafield); if (pfreeit) pfree(datafield); break; } index_endscan(sd); if (!found) ereport(ERROR, (errcode(ERRCODE_UNDEFINED_OBJECT), errmsg("large object %u does not exist", obj_desc->id))); return lastbyte; }
/* * Determine size of a large object * * NOTE: LOs can contain gaps, just like Unix files. We actually return * the offset of the last byte + 1. */ static uint64 inv_getsize(LargeObjectDesc *obj_desc) { uint64 lastbyte = 0; ScanKeyData skey[1]; SysScanDesc sd; HeapTuple tuple; Assert(PointerIsValid(obj_desc)); open_lo_relation(); ScanKeyInit(&skey[0], Anum_pg_largeobject_loid, BTEqualStrategyNumber, F_OIDEQ, ObjectIdGetDatum(obj_desc->id)); sd = systable_beginscan_ordered(lo_heap_r, lo_index_r, obj_desc->snapshot, 1, skey); /* * Because the pg_largeobject index is on both loid and pageno, but we * constrain only loid, a backwards scan should visit all pages of the * large object in reverse pageno order. So, it's sufficient to examine * the first valid tuple (== last valid page). */ tuple = systable_getnext_ordered(sd, BackwardScanDirection); if (HeapTupleIsValid(tuple)) { Form_pg_largeobject data; bytea *datafield; bool pfreeit; if (HeapTupleHasNulls(tuple)) /* paranoia */ elog(ERROR, "null field found in pg_largeobject"); data = (Form_pg_largeobject) GETSTRUCT(tuple); datafield = &(data->data); /* see note at top of file */ pfreeit = false; if (VARATT_IS_EXTENDED(datafield)) { datafield = (bytea *) heap_tuple_untoast_attr((struct varlena *) datafield); pfreeit = true; } lastbyte = (uint64) data->pageno * LOBLKSIZE + getbytealen(datafield); if (pfreeit) pfree(datafield); } systable_endscan_ordered(sd); return lastbyte; }
/* * Determine size of a large object * * NOTE: LOs can contain gaps, just like Unix files. We actually return * the offset of the last byte + 1. */ static uint32 inv_getsize(struct lobj *obj_desc) { uint32 lastbyte = 0; struct scankey skey[1]; struct sys_scan* sd; struct heap_tuple* tuple; ASSERT(PTR_VALID(obj_desc)); open_lo_relation(); scankey_init(&skey[0], Anum_pg_largeobject_loid, BT_EQ_STRAT_NR, F_OIDEQ, OID_TO_D(obj_desc->id)); sd = systable_beginscan_ordered(lo_heap_r, lo_index_r, obj_desc->snapshot, 1, skey); /* * Because the pg_largeobject index is on both loid and pageno, but we * constrain only loid, a backwards scan should visit all pages of the * large object in reverse pageno order. So, it's sufficient to examine * the first valid tuple (== last valid page). */ tuple = systable_getnext_ordered(sd, BACKWARD_SCANDIR); if (HT_VALID(tuple)) { Form_pg_largeobject data; bytea* datafield; bool pfreeit; if (HT_HAS_NULLS(tuple)) /* paranoia */ elog(ERROR, "null field found in pg_largeobject"); data = (Form_pg_largeobject) GET_STRUCT(tuple); datafield = &(data->data); /* see note at top of file */ pfreeit = false; if (VLA_EXTENDED(datafield)) { datafield = (bytea *) heap_tuple_untoast_attr((struct vla *) datafield); pfreeit = true; } lastbyte = data->pageno * LO_BLK_SIZE + getbytealen(datafield); if (pfreeit) pfree(datafield); } systable_endscan_ordered(sd); return lastbyte; }
void inv_truncate(LargeObjectDesc *obj_desc, int len) { int32 pageno = (int32) (len / LOBLKSIZE); int off; ScanKeyData skey[2]; SysScanDesc sd; HeapTuple oldtuple; Form_pg_largeobject olddata; struct { bytea hdr; char data[LOBLKSIZE]; /* make struct big enough */ int32 align_it; /* ensure struct is aligned well enough */ } workbuf; char *workb = VARDATA(&workbuf.hdr); HeapTuple newtup; Datum values[Natts_pg_largeobject]; bool nulls[Natts_pg_largeobject]; bool replace[Natts_pg_largeobject]; CatalogIndexState indstate; Assert(PointerIsValid(obj_desc)); /* enforce writability because snapshot is probably wrong otherwise */ if ((obj_desc->flags & IFS_WRLOCK) == 0) ereport(ERROR, (errcode(ERRCODE_OBJECT_NOT_IN_PREREQUISITE_STATE), errmsg("large object %u was not opened for writing", obj_desc->id))); /* check existence of the target largeobject */ if (!LargeObjectExists(obj_desc->id)) ereport(ERROR, (errcode(ERRCODE_UNDEFINED_OBJECT), errmsg("large object %u was already dropped", obj_desc->id))); open_lo_relation(); indstate = CatalogOpenIndexes(lo_heap_r); /* * Set up to find all pages with desired loid and pageno >= target */ ScanKeyInit(&skey[0], Anum_pg_largeobject_loid, BTEqualStrategyNumber, F_OIDEQ, ObjectIdGetDatum(obj_desc->id)); ScanKeyInit(&skey[1], Anum_pg_largeobject_pageno, BTGreaterEqualStrategyNumber, F_INT4GE, Int32GetDatum(pageno)); sd = systable_beginscan_ordered(lo_heap_r, lo_index_r, obj_desc->snapshot, 2, skey); /* * If possible, get the page the truncation point is in. The truncation * point may be beyond the end of the LO or in a hole. */ olddata = NULL; if ((oldtuple = systable_getnext_ordered(sd, ForwardScanDirection)) != NULL) { if (HeapTupleHasNulls(oldtuple)) /* paranoia */ elog(ERROR, "null field found in pg_largeobject"); olddata = (Form_pg_largeobject) GETSTRUCT(oldtuple); Assert(olddata->pageno >= pageno); } /* * If we found the page of the truncation point we need to truncate the * data in it. Otherwise if we're in a hole, we need to create a page to * mark the end of data. */ if (olddata != NULL && olddata->pageno == pageno) { /* First, load old data into workbuf */ bytea *datafield = &(olddata->data); /* see note at top of * file */ bool pfreeit = false; int pagelen; if (VARATT_IS_EXTENDED(datafield)) { datafield = (bytea *) heap_tuple_untoast_attr((struct varlena *) datafield); pfreeit = true; } pagelen = getbytealen(datafield); Assert(pagelen <= LOBLKSIZE); memcpy(workb, VARDATA(datafield), pagelen); if (pfreeit) pfree(datafield); /* * Fill any hole */ off = len % LOBLKSIZE; if (off > pagelen) MemSet(workb + pagelen, 0, off - pagelen); /* compute length of new page */ SET_VARSIZE(&workbuf.hdr, off + VARHDRSZ); /* * Form and insert updated tuple */ memset(values, 0, sizeof(values)); memset(nulls, false, sizeof(nulls)); memset(replace, false, sizeof(replace)); values[Anum_pg_largeobject_data - 1] = PointerGetDatum(&workbuf); replace[Anum_pg_largeobject_data - 1] = true; newtup = heap_modify_tuple(oldtuple, RelationGetDescr(lo_heap_r), values, nulls, replace); simple_heap_update(lo_heap_r, &newtup->t_self, newtup); CatalogIndexInsert(indstate, newtup); heap_freetuple(newtup); } else { /* * If the first page we found was after the truncation point, we're in * a hole that we'll fill, but we need to delete the later page * because the loop below won't visit it again. */ if (olddata != NULL) { Assert(olddata->pageno > pageno); simple_heap_delete(lo_heap_r, &oldtuple->t_self); } /* * Write a brand new page. * * Fill the hole up to the truncation point */ off = len % LOBLKSIZE; if (off > 0) MemSet(workb, 0, off); /* compute length of new page */ SET_VARSIZE(&workbuf.hdr, off + VARHDRSZ); /* * Form and insert new tuple */ memset(values, 0, sizeof(values)); memset(nulls, false, sizeof(nulls)); values[Anum_pg_largeobject_loid - 1] = ObjectIdGetDatum(obj_desc->id); values[Anum_pg_largeobject_pageno - 1] = Int32GetDatum(pageno); values[Anum_pg_largeobject_data - 1] = PointerGetDatum(&workbuf); newtup = heap_form_tuple(lo_heap_r->rd_att, values, nulls); simple_heap_insert(lo_heap_r, newtup); CatalogIndexInsert(indstate, newtup); heap_freetuple(newtup); } /* * Delete any pages after the truncation point. If the initial search * didn't find a page, then of course there's nothing more to do. */ if (olddata != NULL) { while ((oldtuple = systable_getnext_ordered(sd, ForwardScanDirection)) != NULL) { simple_heap_delete(lo_heap_r, &oldtuple->t_self); } } systable_endscan_ordered(sd); CatalogCloseIndexes(indstate); /* * Advance command counter so that tuple updates will be seen by later * large-object operations in this transaction. */ CommandCounterIncrement(); }
int inv_write(LargeObjectDesc *obj_desc, const char *buf, int nbytes) { int nwritten = 0; int n; int off; int len; int32 pageno = (int32) (obj_desc->offset / LOBLKSIZE); ScanKeyData skey[2]; SysScanDesc sd; HeapTuple oldtuple; Form_pg_largeobject olddata; bool neednextpage; bytea *datafield; bool pfreeit; struct { bytea hdr; char data[LOBLKSIZE]; /* make struct big enough */ int32 align_it; /* ensure struct is aligned well enough */ } workbuf; char *workb = VARDATA(&workbuf.hdr); HeapTuple newtup; Datum values[Natts_pg_largeobject]; bool nulls[Natts_pg_largeobject]; bool replace[Natts_pg_largeobject]; CatalogIndexState indstate; Assert(PointerIsValid(obj_desc)); Assert(buf != NULL); /* enforce writability because snapshot is probably wrong otherwise */ if ((obj_desc->flags & IFS_WRLOCK) == 0) ereport(ERROR, (errcode(ERRCODE_OBJECT_NOT_IN_PREREQUISITE_STATE), errmsg("large object %u was not opened for writing", obj_desc->id))); /* check existence of the target largeobject */ if (!LargeObjectExists(obj_desc->id)) ereport(ERROR, (errcode(ERRCODE_UNDEFINED_OBJECT), errmsg("large object %u was already dropped", obj_desc->id))); if (nbytes <= 0) return 0; open_lo_relation(); indstate = CatalogOpenIndexes(lo_heap_r); ScanKeyInit(&skey[0], Anum_pg_largeobject_loid, BTEqualStrategyNumber, F_OIDEQ, ObjectIdGetDatum(obj_desc->id)); ScanKeyInit(&skey[1], Anum_pg_largeobject_pageno, BTGreaterEqualStrategyNumber, F_INT4GE, Int32GetDatum(pageno)); sd = systable_beginscan_ordered(lo_heap_r, lo_index_r, obj_desc->snapshot, 2, skey); oldtuple = NULL; olddata = NULL; neednextpage = true; while (nwritten < nbytes) { /* * If possible, get next pre-existing page of the LO. We expect the * indexscan will deliver these in order --- but there may be holes. */ if (neednextpage) { if ((oldtuple = systable_getnext_ordered(sd, ForwardScanDirection)) != NULL) { if (HeapTupleHasNulls(oldtuple)) /* paranoia */ elog(ERROR, "null field found in pg_largeobject"); olddata = (Form_pg_largeobject) GETSTRUCT(oldtuple); Assert(olddata->pageno >= pageno); } neednextpage = false; } /* * If we have a pre-existing page, see if it is the page we want to * write, or a later one. */ if (olddata != NULL && olddata->pageno == pageno) { /* * Update an existing page with fresh data. * * First, load old data into workbuf */ datafield = &(olddata->data); /* see note at top of file */ pfreeit = false; if (VARATT_IS_EXTENDED(datafield)) { datafield = (bytea *) heap_tuple_untoast_attr((struct varlena *) datafield); pfreeit = true; } len = getbytealen(datafield); Assert(len <= LOBLKSIZE); memcpy(workb, VARDATA(datafield), len); if (pfreeit) pfree(datafield); /* * Fill any hole */ off = (int) (obj_desc->offset % LOBLKSIZE); if (off > len) MemSet(workb + len, 0, off - len); /* * Insert appropriate portion of new data */ n = LOBLKSIZE - off; n = (n <= (nbytes - nwritten)) ? n : (nbytes - nwritten); memcpy(workb + off, buf + nwritten, n); nwritten += n; obj_desc->offset += n; off += n; /* compute valid length of new page */ len = (len >= off) ? len : off; SET_VARSIZE(&workbuf.hdr, len + VARHDRSZ); /* * Form and insert updated tuple */ memset(values, 0, sizeof(values)); memset(nulls, false, sizeof(nulls)); memset(replace, false, sizeof(replace)); values[Anum_pg_largeobject_data - 1] = PointerGetDatum(&workbuf); replace[Anum_pg_largeobject_data - 1] = true; newtup = heap_modify_tuple(oldtuple, RelationGetDescr(lo_heap_r), values, nulls, replace); simple_heap_update(lo_heap_r, &newtup->t_self, newtup); CatalogIndexInsert(indstate, newtup); heap_freetuple(newtup); /* * We're done with this old page. */ oldtuple = NULL; olddata = NULL; neednextpage = true; } else { /* * Write a brand new page. * * First, fill any hole */ off = (int) (obj_desc->offset % LOBLKSIZE); if (off > 0) MemSet(workb, 0, off); /* * Insert appropriate portion of new data */ n = LOBLKSIZE - off; n = (n <= (nbytes - nwritten)) ? n : (nbytes - nwritten); memcpy(workb + off, buf + nwritten, n); nwritten += n; obj_desc->offset += n; /* compute valid length of new page */ len = off + n; SET_VARSIZE(&workbuf.hdr, len + VARHDRSZ); /* * Form and insert updated tuple */ memset(values, 0, sizeof(values)); memset(nulls, false, sizeof(nulls)); values[Anum_pg_largeobject_loid - 1] = ObjectIdGetDatum(obj_desc->id); values[Anum_pg_largeobject_pageno - 1] = Int32GetDatum(pageno); values[Anum_pg_largeobject_data - 1] = PointerGetDatum(&workbuf); newtup = heap_form_tuple(lo_heap_r->rd_att, values, nulls); simple_heap_insert(lo_heap_r, newtup); CatalogIndexInsert(indstate, newtup); heap_freetuple(newtup); } pageno++; } systable_endscan_ordered(sd); CatalogCloseIndexes(indstate); /* * Advance command counter so that my tuple updates will be seen by later * large-object operations in this transaction. */ CommandCounterIncrement(); return nwritten; }
int inv_read(LargeObjectDesc *obj_desc, char *buf, int nbytes) { int nread = 0; int n; int off; int len; int32 pageno = (int32) (obj_desc->offset / LOBLKSIZE); uint32 pageoff; ScanKeyData skey[2]; SysScanDesc sd; HeapTuple tuple; Assert(PointerIsValid(obj_desc)); Assert(buf != NULL); if (nbytes <= 0) return 0; open_lo_relation(); ScanKeyInit(&skey[0], Anum_pg_largeobject_loid, BTEqualStrategyNumber, F_OIDEQ, ObjectIdGetDatum(obj_desc->id)); ScanKeyInit(&skey[1], Anum_pg_largeobject_pageno, BTGreaterEqualStrategyNumber, F_INT4GE, Int32GetDatum(pageno)); sd = systable_beginscan_ordered(lo_heap_r, lo_index_r, obj_desc->snapshot, 2, skey); while ((tuple = systable_getnext_ordered(sd, ForwardScanDirection)) != NULL) { Form_pg_largeobject data; bytea *datafield; bool pfreeit; if (HeapTupleHasNulls(tuple)) /* paranoia */ elog(ERROR, "null field found in pg_largeobject"); data = (Form_pg_largeobject) GETSTRUCT(tuple); /* * We expect the indexscan will deliver pages in order. However, * there may be missing pages if the LO contains unwritten "holes". We * want missing sections to read out as zeroes. */ pageoff = ((uint32) data->pageno) * LOBLKSIZE; if (pageoff > obj_desc->offset) { n = pageoff - obj_desc->offset; n = (n <= (nbytes - nread)) ? n : (nbytes - nread); MemSet(buf + nread, 0, n); nread += n; obj_desc->offset += n; } if (nread < nbytes) { Assert(obj_desc->offset >= pageoff); off = (int) (obj_desc->offset - pageoff); Assert(off >= 0 && off < LOBLKSIZE); datafield = &(data->data); /* see note at top of file */ pfreeit = false; if (VARATT_IS_EXTENDED(datafield)) { datafield = (bytea *) heap_tuple_untoast_attr((struct varlena *) datafield); pfreeit = true; } len = getbytealen(datafield); if (len > off) { n = len - off; n = (n <= (nbytes - nread)) ? n : (nbytes - nread); memcpy(buf + nread, VARDATA(datafield) + off, n); nread += n; obj_desc->offset += n; } if (pfreeit) pfree(datafield); } if (nread >= nbytes) break; } systable_endscan_ordered(sd); return nread; }
int inv_write(LargeObjectDesc *obj_desc, char *buf, int nbytes) { int nwritten = 0; int n; int off; int len; int32 pageno = (int32) (obj_desc->offset / LOBLKSIZE); ScanKeyData skey[2]; IndexScanDesc sd; HeapTuple oldtuple; Form_pg_largeobject olddata; bool neednextpage; bytea *datafield; bool pfreeit; struct { bytea hdr; char data[LOBLKSIZE]; } workbuf; char *workb = VARATT_DATA(&workbuf.hdr); HeapTuple newtup; Datum values[Natts_pg_largeobject]; char nulls[Natts_pg_largeobject]; char replace[Natts_pg_largeobject]; CatalogIndexState indstate; Assert(PointerIsValid(obj_desc)); Assert(buf != NULL); if (nbytes <= 0) return 0; open_lo_relation(); indstate = CatalogOpenIndexes(lo_heap_r); ScanKeyInit(&skey[0], Anum_pg_largeobject_loid, BTEqualStrategyNumber, F_OIDEQ, ObjectIdGetDatum(obj_desc->id)); ScanKeyInit(&skey[1], Anum_pg_largeobject_pageno, BTGreaterEqualStrategyNumber, F_INT4GE, Int32GetDatum(pageno)); sd = index_beginscan(lo_heap_r, lo_index_r, SnapshotNow, 2, skey); oldtuple = NULL; olddata = NULL; neednextpage = true; while (nwritten < nbytes) { /* * If possible, get next pre-existing page of the LO. We assume * the indexscan will deliver these in order --- but there may be * holes. */ if (neednextpage) { if ((oldtuple = index_getnext(sd, ForwardScanDirection)) != NULL) { olddata = (Form_pg_largeobject) GETSTRUCT(oldtuple); Assert(olddata->pageno >= pageno); } neednextpage = false; } /* * If we have a pre-existing page, see if it is the page we want * to write, or a later one. */ if (olddata != NULL && olddata->pageno == pageno) { /* * Update an existing page with fresh data. * * First, load old data into workbuf */ datafield = &(olddata->data); pfreeit = false; if (VARATT_IS_EXTENDED(datafield)) { datafield = (bytea *) heap_tuple_untoast_attr((varattrib *) datafield); pfreeit = true; } len = getbytealen(datafield); Assert(len <= LOBLKSIZE); memcpy(workb, VARDATA(datafield), len); if (pfreeit) pfree(datafield); /* * Fill any hole */ off = (int) (obj_desc->offset % LOBLKSIZE); if (off > len) MemSet(workb + len, 0, off - len); /* * Insert appropriate portion of new data */ n = LOBLKSIZE - off; n = (n <= (nbytes - nwritten)) ? n : (nbytes - nwritten); memcpy(workb + off, buf + nwritten, n); nwritten += n; obj_desc->offset += n; off += n; /* compute valid length of new page */ len = (len >= off) ? len : off; VARATT_SIZEP(&workbuf.hdr) = len + VARHDRSZ; /* * Form and insert updated tuple */ memset(values, 0, sizeof(values)); memset(nulls, ' ', sizeof(nulls)); memset(replace, ' ', sizeof(replace)); values[Anum_pg_largeobject_data - 1] = PointerGetDatum(&workbuf); replace[Anum_pg_largeobject_data - 1] = 'r'; newtup = heap_modifytuple(oldtuple, lo_heap_r, values, nulls, replace); simple_heap_update(lo_heap_r, &newtup->t_self, newtup); CatalogIndexInsert(indstate, newtup); heap_freetuple(newtup); /* * We're done with this old page. */ oldtuple = NULL; olddata = NULL; neednextpage = true; } else { /* * Write a brand new page. * * First, fill any hole */ off = (int) (obj_desc->offset % LOBLKSIZE); if (off > 0) MemSet(workb, 0, off); /* * Insert appropriate portion of new data */ n = LOBLKSIZE - off; n = (n <= (nbytes - nwritten)) ? n : (nbytes - nwritten); memcpy(workb + off, buf + nwritten, n); nwritten += n; obj_desc->offset += n; /* compute valid length of new page */ len = off + n; VARATT_SIZEP(&workbuf.hdr) = len + VARHDRSZ; /* * Form and insert updated tuple */ memset(values, 0, sizeof(values)); memset(nulls, ' ', sizeof(nulls)); values[Anum_pg_largeobject_loid - 1] = ObjectIdGetDatum(obj_desc->id); values[Anum_pg_largeobject_pageno - 1] = Int32GetDatum(pageno); values[Anum_pg_largeobject_data - 1] = PointerGetDatum(&workbuf); newtup = heap_formtuple(lo_heap_r->rd_att, values, nulls); simple_heap_insert(lo_heap_r, newtup); CatalogIndexInsert(indstate, newtup); heap_freetuple(newtup); } pageno++; } index_endscan(sd); CatalogCloseIndexes(indstate); /* * Advance command counter so that my tuple updates will be seen by * later large-object operations in this transaction. */ CommandCounterIncrement(); return nwritten; }
void inv_truncate(struct lobj *obj_desc, int len) { int32 pageno = (int32) (len / LO_BLK_SIZE); int off; struct scankey skey[2]; struct sys_scan* sd; struct heap_tuple* oldtuple; Form_pg_largeobject olddata; struct { bytea hdr; char data[LO_BLK_SIZE]; /* make struct big enough */ int32 align_it;/* ensure struct is aligned well enough */ } workbuf; char* workb = VLA_DATA(&workbuf.hdr); struct heap_tuple* newtup; datum_t values[Natts_pg_largeobject]; bool nulls[Natts_pg_largeobject]; bool replace[Natts_pg_largeobject]; CatalogIndexState indstate; ASSERT(PTR_VALID(obj_desc)); /* enforce writability because snapshot is probably wrong otherwise */ if ((obj_desc->flags & IFS_WRLOCK) == 0) ereport(ERROR, ( errcode(E_OBJECT_NOT_IN_PREREQUISITE_STATE), errmsg("large object %u was not opened for writing", obj_desc->id))); /* check existence of the target largeobject */ if (!large_obj_exists(obj_desc->id)) ereport(ERROR, ( errcode(E_UNDEFINED_OBJECT), errmsg("large object %u was already dropped", obj_desc->id))); open_lo_relation(); indstate = cat_open_indexes(lo_heap_r); /* * Set up to find all pages with desired loid and pageno >= target */ scankey_init(&skey[0], Anum_pg_largeobject_loid, BT_EQ_STRAT_NR, F_OIDEQ, OID_TO_D(obj_desc->id)); scankey_init(&skey[1], Anum_pg_largeobject_pageno, BT_GE_STRAT_NR, F_INT4GE, INT32_TO_D(pageno)); sd = systable_beginscan_ordered(lo_heap_r, lo_index_r, obj_desc->snapshot, 2, skey); /* * If possible, get the page the truncation point is in. The truncation * point may be beyond the end of the LO or in a hole. */ olddata = NULL; if ((oldtuple = systable_getnext_ordered(sd, FORWARD_SCANDIR)) != NULL) { if (HT_HAS_NULLS(oldtuple)) /* paranoia */ elog(ERROR, "null field found in pg_largeobject"); olddata = (Form_pg_largeobject) GET_STRUCT(oldtuple); ASSERT(olddata->pageno >= pageno); } /* * If we found the page of the truncation point we need to truncate the * data in it. Otherwise if we're in a hole, we need to create a page to * mark the end of data. */ if (olddata != NULL && olddata->pageno == pageno) { /* First, load old data into workbuf */ bytea* datafield = &(olddata->data); /* see note at top of file */ bool pfreeit = false; int pagelen; if (VLA_EXTENDED(datafield)) { datafield = (bytea *) heap_tuple_untoast_attr((struct vla *) datafield); pfreeit = true; } pagelen = getbytealen(datafield); ASSERT(pagelen <= LO_BLK_SIZE); memcpy(workb, VLA_DATA(datafield), pagelen); if (pfreeit) pfree(datafield); /* * Fill any hole */ off = len % LO_BLK_SIZE; if (off > pagelen) pg_memset(workb + pagelen, 0, off - pagelen); /* compute length of new page */ VLA_SET_SZ_STND(&workbuf.hdr, off + VAR_HDR_SZ); /* * Form and insert updated tuple */ memset(values, 0, sizeof(values)); memset(nulls, false, sizeof(nulls)); memset(replace, false, sizeof(replace)); values[Anum_pg_largeobject_data - 1] = PTR_TO_D(&workbuf); replace[Anum_pg_largeobject_data - 1] = true; newtup = heap_modify_tuple(oldtuple, REL_DESC(lo_heap_r), values, nulls, replace); simple_heap_update(lo_heap_r, &newtup->t_self, newtup); cat_index_insert(indstate, newtup); heap_free_tuple(newtup); } else { /* * If the first page we found was after the truncation point, we're in * a hole that we'll fill, but we need to delete the later page * because the loop below won't visit it again. */ if (olddata != NULL) { ASSERT(olddata->pageno > pageno); simple_heap_delete(lo_heap_r, &oldtuple->t_self); } /* * Write a brand new page. * * Fill the hole up to the truncation point */ off = len % LO_BLK_SIZE; if (off > 0) pg_memset(workb, 0, off); /* compute length of new page */ VLA_SET_SZ_STND(&workbuf.hdr, off + VAR_HDR_SZ); /* * Form and insert new tuple */ memset(values, 0, sizeof(values)); memset(nulls, false, sizeof(nulls)); values[Anum_pg_largeobject_loid - 1] = OID_TO_D(obj_desc->id); values[Anum_pg_largeobject_pageno - 1] = INT32_TO_D(pageno); values[Anum_pg_largeobject_data - 1] = PTR_TO_D(&workbuf); newtup = heap_form_tuple(lo_heap_r->rd_att, values, nulls); simple_heap_insert(lo_heap_r, newtup); cat_index_insert(indstate, newtup); heap_free_tuple(newtup); } /* * Delete any pages after the truncation point. If the initial search * didn't find a page, then of course there's nothing more to do. */ if (olddata != NULL) { while ((oldtuple = systable_getnext_ordered(sd, FORWARD_SCANDIR)) != NULL) { simple_heap_delete(lo_heap_r, &oldtuple->t_self); } } systable_endscan_ordered(sd); cat_close_indexes(indstate); /* * Advance command counter so that tuple updates will be seen by later * large-object operations in this transaction. */ cmd_count_incr(); }
int inv_write(struct lobj *obj_desc, const char *buf, int nbytes) { int nwritten = 0; int n; int off; int len; int32 pageno = (int32) (obj_desc->offset / LO_BLK_SIZE); struct scankey skey[2]; struct sys_scan * sd; struct heap_tuple * oldtuple; Form_pg_largeobject olddata; bool neednextpage; bytea* datafield; bool pfreeit; struct { bytea hdr; char data[LO_BLK_SIZE]; /* make struct big enough */ int32 align_it; /* ensure struct is aligned well enough */ } workbuf; char* workb = VLA_DATA(&workbuf.hdr); struct heap_tuple* newtup; datum_t values[Natts_pg_largeobject]; bool nulls[Natts_pg_largeobject]; bool replace[Natts_pg_largeobject]; CatalogIndexState indstate; ASSERT(PTR_VALID(obj_desc)); ASSERT(buf != NULL); /* enforce writability because snapshot is probably wrong otherwise */ if ((obj_desc->flags & IFS_WRLOCK) == 0) ereport(ERROR, ( errcode(E_OBJECT_NOT_IN_PREREQUISITE_STATE), errmsg("large object %u was not opened for writing", obj_desc->id))); /* check existence of the target largeobject */ if (!large_obj_exists(obj_desc->id)) ereport(ERROR, ( errcode(E_UNDEFINED_OBJECT), errmsg("large object %u was already dropped", obj_desc->id))); if (nbytes <= 0) return 0; open_lo_relation(); indstate = cat_open_indexes(lo_heap_r); scankey_init(&skey[0], Anum_pg_largeobject_loid, BT_EQ_STRAT_NR, F_OIDEQ, OID_TO_D(obj_desc->id)); scankey_init(&skey[1], Anum_pg_largeobject_pageno, BT_GE_STRAT_NR, F_INT4GE, INT32_TO_D(pageno)); sd = systable_beginscan_ordered(lo_heap_r, lo_index_r, obj_desc->snapshot, 2, skey); oldtuple = NULL; olddata = NULL; neednextpage = true; while (nwritten < nbytes) { /* * If possible, get next pre-existing page of the LO. We expect the * indexscan will deliver these in order --- but there may be holes. */ if (neednextpage) { if ((oldtuple = systable_getnext_ordered(sd, FORWARD_SCANDIR)) != NULL) { if (HT_HAS_NULLS(oldtuple)) /* paranoia */ elog(ERROR, "null field found in pg_largeobject"); olddata = (Form_pg_largeobject) GET_STRUCT(oldtuple); ASSERT(olddata->pageno >= pageno); } neednextpage = false; } /* * If we have a pre-existing page, see if it is the page we want to * write, or a later one. */ if (olddata != NULL && olddata->pageno == pageno) { /* * Update an existing page with fresh data. * * First, load old data into workbuf */ datafield = &(olddata->data); /* see note at top of file */ pfreeit = false; if (VLA_EXTENDED(datafield)) { datafield = (bytea *) heap_tuple_untoast_attr((struct vla *) datafield); pfreeit = true; } len = getbytealen(datafield); ASSERT(len <= LO_BLK_SIZE); memcpy(workb, VLA_DATA(datafield), len); if (pfreeit) pfree(datafield); /* * Fill any hole */ off = (int)(obj_desc->offset % LO_BLK_SIZE); if (off > len) pg_memset(workb + len, 0, off - len); /* * Insert appropriate portion of new data */ n = LO_BLK_SIZE - off; n = (n <= (nbytes - nwritten))? n : (nbytes - nwritten); memcpy(workb + off, buf + nwritten, n); nwritten += n; obj_desc->offset += n; off += n; /* compute valid length of new page */ len = (len >= off) ? len : off; VLA_SET_SZ_STND(&workbuf.hdr, len + VAR_HDR_SZ); /* * Form and insert updated tuple */ memset(values, 0, sizeof(values)); memset(nulls, false, sizeof(nulls)); memset(replace, false, sizeof(replace)); values[Anum_pg_largeobject_data - 1] = PTR_TO_D(&workbuf); replace[Anum_pg_largeobject_data - 1] = true; newtup = heap_modify_tuple(oldtuple, REL_DESC(lo_heap_r), values, nulls, replace); simple_heap_update(lo_heap_r, &newtup->t_self, newtup); cat_index_insert(indstate, newtup); heap_free_tuple(newtup); /* * We're done with this old page. */ oldtuple = NULL; olddata = NULL; neednextpage = true; } else { /* * Write a brand new page. * * First, fill any hole */ off = (int)(obj_desc->offset % LO_BLK_SIZE); if (off > 0) pg_memset(workb, 0, off); /* * Insert appropriate portion of new data */ n = LO_BLK_SIZE - off; n = (n <= (nbytes - nwritten))? n : (nbytes - nwritten); memcpy(workb + off, buf + nwritten, n); nwritten += n; obj_desc->offset += n; /* compute valid length of new page */ len = off + n; VLA_SET_SZ_STND(&workbuf.hdr, len + VAR_HDR_SZ); /* * Form and insert updated tuple */ memset(values, 0, sizeof(values)); memset(nulls, false, sizeof(nulls)); values[Anum_pg_largeobject_loid - 1] = OID_TO_D(obj_desc->id); values[Anum_pg_largeobject_pageno - 1] = INT32_TO_D(pageno); values[Anum_pg_largeobject_data - 1] = PTR_TO_D(&workbuf); newtup = heap_form_tuple(lo_heap_r->rd_att, values, nulls); simple_heap_insert(lo_heap_r, newtup); cat_index_insert(indstate, newtup); heap_free_tuple(newtup); } pageno++; } systable_endscan_ordered(sd); cat_close_indexes(indstate); /* * Advance command counter so that my tuple updates will be seen by * later large-object operations in this transaction. */ cmd_count_incr(); return nwritten; }
int inv_read(struct lobj *obj_desc, char *buf, int nbytes) { int nread = 0; int n; int off; int len; int32 pageno = (int32) (obj_desc->offset / LO_BLK_SIZE); uint32 pageoff; struct scankey skey[2]; struct sys_scan* sd; struct heap_tuple* tuple; ASSERT(PTR_VALID(obj_desc)); ASSERT(buf != NULL); if (nbytes <= 0) return 0; open_lo_relation(); scankey_init(&skey[0], Anum_pg_largeobject_loid, BT_EQ_STRAT_NR, F_OIDEQ, OID_TO_D(obj_desc->id)); scankey_init(&skey[1], Anum_pg_largeobject_pageno, BT_GE_STRAT_NR, F_INT4GE, INT32_TO_D(pageno)); sd = systable_beginscan_ordered(lo_heap_r, lo_index_r, obj_desc->snapshot, 2, skey); while ((tuple = systable_getnext_ordered(sd, FORWARD_SCANDIR)) != NULL) { Form_pg_largeobject data; bytea* datafield; bool pfreeit; if (HT_HAS_NULLS(tuple)) /* paranoia */ elog(ERROR, "null field found in pg_largeobject"); data = (Form_pg_largeobject) GET_STRUCT(tuple); /* * We expect the indexscan will deliver pages in order. However, * there may be missing pages if the LO contains unwritten "holes". We * want missing sections to read out as zeroes. */ pageoff = ((uint32) data->pageno) * LO_BLK_SIZE; if (pageoff > obj_desc->offset) { n = pageoff - obj_desc->offset; n = (n <= (nbytes - nread)) ? n : (nbytes - nread); pg_memset(buf + nread, 0, n); nread += n; obj_desc->offset += n; } if (nread < nbytes) { ASSERT(obj_desc->offset >= pageoff); off = (int) (obj_desc->offset - pageoff); ASSERT(off >= 0 && off < LO_BLK_SIZE); datafield = &(data->data); /* see note at top of file */ pfreeit = false; if (VLA_EXTENDED(datafield)) { datafield = (bytea *) heap_tuple_untoast_attr((struct vla *) datafield); pfreeit = true; } len = getbytealen(datafield); if (len > off) { n = len - off; n = (n <= (nbytes - nread)) ? n : (nbytes - nread); memcpy(buf + nread, VLA_DATA(datafield) + off, n); nread += n; obj_desc->offset += n; } if (pfreeit) pfree(datafield); } if (nread >= nbytes) break; } systable_endscan_ordered(sd); return nread; }