/* * Fetch parser cache entry */ TSParserCacheEntry * lookup_ts_parser_cache(Oid prsId) { TSParserCacheEntry *entry; if (TSParserCacheHash == NULL) { /* First time through: initialize the hash table */ HASHCTL ctl; MemSet(&ctl, 0, sizeof(ctl)); ctl.keysize = sizeof(Oid); ctl.entrysize = sizeof(TSParserCacheEntry); TSParserCacheHash = hash_create("Tsearch parser cache", 4, &ctl, HASH_ELEM | HASH_BLOBS); /* Flush cache on pg_ts_parser changes */ CacheRegisterSyscacheCallback(TSPARSEROID, InvalidateTSCacheCallBack, PointerGetDatum(TSParserCacheHash)); /* Also make sure CacheMemoryContext exists */ if (!CacheMemoryContext) CreateCacheMemoryContext(); } /* Check single-entry cache */ if (lastUsedParser && lastUsedParser->prsId == prsId && lastUsedParser->isvalid) return lastUsedParser; /* Try to look up an existing entry */ entry = (TSParserCacheEntry *) hash_search(TSParserCacheHash, (void *) &prsId, HASH_FIND, NULL); if (entry == NULL || !entry->isvalid) { /* * If we didn't find one, we want to make one. But first look up the * object to be sure the OID is real. */ HeapTuple tp; Form_pg_ts_parser prs; tp = SearchSysCache1(TSPARSEROID, ObjectIdGetDatum(prsId)); if (!HeapTupleIsValid(tp)) elog(ERROR, "cache lookup failed for text search parser %u", prsId); prs = (Form_pg_ts_parser) GETSTRUCT(tp); /* * Sanity checks */ if (!OidIsValid(prs->prsstart)) elog(ERROR, "text search parser %u has no prsstart method", prsId); if (!OidIsValid(prs->prstoken)) elog(ERROR, "text search parser %u has no prstoken method", prsId); if (!OidIsValid(prs->prsend)) elog(ERROR, "text search parser %u has no prsend method", prsId); if (entry == NULL) { bool found; /* Now make the cache entry */ entry = (TSParserCacheEntry *) hash_search(TSParserCacheHash, (void *) &prsId, HASH_ENTER, &found); Assert(!found); /* it wasn't there a moment ago */ } MemSet(entry, 0, sizeof(TSParserCacheEntry)); entry->prsId = prsId; entry->startOid = prs->prsstart; entry->tokenOid = prs->prstoken; entry->endOid = prs->prsend; entry->headlineOid = prs->prsheadline; entry->lextypeOid = prs->prslextype; ReleaseSysCache(tp); fmgr_info_cxt(entry->startOid, &entry->prsstart, CacheMemoryContext); fmgr_info_cxt(entry->tokenOid, &entry->prstoken, CacheMemoryContext); fmgr_info_cxt(entry->endOid, &entry->prsend, CacheMemoryContext); if (OidIsValid(entry->headlineOid)) fmgr_info_cxt(entry->headlineOid, &entry->prsheadline, CacheMemoryContext); entry->isvalid = true; } lastUsedParser = entry; return entry; }
int inv_write(LargeObjectDesc *obj_desc, const char *buf, int nbytes) { int nwritten = 0; int n; int off; int len; int32 pageno = (int32) (obj_desc->offset / LOBLKSIZE); ScanKeyData skey[2]; SysScanDesc sd; HeapTuple oldtuple; Form_pg_largeobject olddata; bool neednextpage; bytea *datafield; bool pfreeit; struct { bytea hdr; char data[LOBLKSIZE]; /* make struct big enough */ int32 align_it; /* ensure struct is aligned well enough */ } workbuf; char *workb = VARDATA(&workbuf.hdr); HeapTuple newtup; Datum values[Natts_pg_largeobject]; bool nulls[Natts_pg_largeobject]; bool replace[Natts_pg_largeobject]; CatalogIndexState indstate; Assert(PointerIsValid(obj_desc)); Assert(buf != NULL); /* enforce writability because snapshot is probably wrong otherwise */ Assert(obj_desc->flags & IFS_WRLOCK); if (nbytes <= 0) return 0; /* this addition can't overflow because nbytes is only int32 */ if ((nbytes + obj_desc->offset) > MAX_LARGE_OBJECT_SIZE) ereport(ERROR, (errcode(ERRCODE_INVALID_PARAMETER_VALUE), errmsg("invalid large object write request size: %d", nbytes))); open_lo_relation(); indstate = CatalogOpenIndexes(lo_heap_r); ScanKeyInit(&skey[0], Anum_pg_largeobject_loid, BTEqualStrategyNumber, F_OIDEQ, ObjectIdGetDatum(obj_desc->id)); ScanKeyInit(&skey[1], Anum_pg_largeobject_pageno, BTGreaterEqualStrategyNumber, F_INT4GE, Int32GetDatum(pageno)); sd = systable_beginscan_ordered(lo_heap_r, lo_index_r, obj_desc->snapshot, 2, skey); oldtuple = NULL; olddata = NULL; neednextpage = true; while (nwritten < nbytes) { /* * If possible, get next pre-existing page of the LO. We expect the * indexscan will deliver these in order --- but there may be holes. */ if (neednextpage) { if ((oldtuple = systable_getnext_ordered(sd, ForwardScanDirection)) != NULL) { if (HeapTupleHasNulls(oldtuple)) /* paranoia */ elog(ERROR, "null field found in pg_largeobject"); olddata = (Form_pg_largeobject) GETSTRUCT(oldtuple); Assert(olddata->pageno >= pageno); } neednextpage = false; } /* * If we have a pre-existing page, see if it is the page we want to * write, or a later one. */ if (olddata != NULL && olddata->pageno == pageno) { /* * Update an existing page with fresh data. * * First, load old data into workbuf */ datafield = &(olddata->data); /* see note at top of file */ pfreeit = false; if (VARATT_IS_EXTENDED(datafield)) { datafield = (bytea *) heap_tuple_untoast_attr((struct varlena *) datafield); pfreeit = true; } len = getbytealen(datafield); Assert(len <= LOBLKSIZE); memcpy(workb, VARDATA(datafield), len); if (pfreeit) pfree(datafield); /* * Fill any hole */ off = (int) (obj_desc->offset % LOBLKSIZE); if (off > len) MemSet(workb + len, 0, off - len); /* * Insert appropriate portion of new data */ n = LOBLKSIZE - off; n = (n <= (nbytes - nwritten)) ? n : (nbytes - nwritten); memcpy(workb + off, buf + nwritten, n); nwritten += n; obj_desc->offset += n; off += n; /* compute valid length of new page */ len = (len >= off) ? len : off; SET_VARSIZE(&workbuf.hdr, len + VARHDRSZ); /* * Form and insert updated tuple */ memset(values, 0, sizeof(values)); memset(nulls, false, sizeof(nulls)); memset(replace, false, sizeof(replace)); values[Anum_pg_largeobject_data - 1] = PointerGetDatum(&workbuf); replace[Anum_pg_largeobject_data - 1] = true; newtup = heap_modify_tuple(oldtuple, RelationGetDescr(lo_heap_r), values, nulls, replace); simple_heap_update(lo_heap_r, &newtup->t_self, newtup); CatalogIndexInsert(indstate, newtup); heap_freetuple(newtup); /* * We're done with this old page. */ oldtuple = NULL; olddata = NULL; neednextpage = true; } else { /* * Write a brand new page. * * First, fill any hole */ off = (int) (obj_desc->offset % LOBLKSIZE); if (off > 0) MemSet(workb, 0, off); /* * Insert appropriate portion of new data */ n = LOBLKSIZE - off; n = (n <= (nbytes - nwritten)) ? n : (nbytes - nwritten); memcpy(workb + off, buf + nwritten, n); nwritten += n; obj_desc->offset += n; /* compute valid length of new page */ len = off + n; SET_VARSIZE(&workbuf.hdr, len + VARHDRSZ); /* * Form and insert updated tuple */ memset(values, 0, sizeof(values)); memset(nulls, false, sizeof(nulls)); values[Anum_pg_largeobject_loid - 1] = ObjectIdGetDatum(obj_desc->id); values[Anum_pg_largeobject_pageno - 1] = Int32GetDatum(pageno); values[Anum_pg_largeobject_data - 1] = PointerGetDatum(&workbuf); newtup = heap_form_tuple(lo_heap_r->rd_att, values, nulls); simple_heap_insert(lo_heap_r, newtup); CatalogIndexInsert(indstate, newtup); heap_freetuple(newtup); } pageno++; } systable_endscan_ordered(sd); CatalogCloseIndexes(indstate); /* * Advance command counter so that my tuple updates will be seen by later * large-object operations in this transaction. */ CommandCounterIncrement(); return nwritten; }
/* * CreateConstraintEntry * Create a constraint table entry. * * Subsidiary records (such as triggers or indexes to implement the * constraint) are *not* created here. But we do make dependency links * from the constraint to the things it depends on. */ Oid CreateConstraintEntry(const char *constraintName, Oid constraintNamespace, char constraintType, bool isDeferrable, bool isDeferred, bool isValidated, Oid relId, const int16 *constraintKey, int constraintNKeys, Oid domainId, Oid indexRelId, Oid foreignRelId, const int16 *foreignKey, const Oid *pfEqOp, const Oid *ppEqOp, const Oid *ffEqOp, int foreignNKeys, char foreignUpdateType, char foreignDeleteType, char foreignMatchType, const Oid *exclOp, Node *conExpr, const char *conBin, const char *conSrc, bool conIsLocal, int conInhCount, bool conNoInherit) { Relation conDesc; Oid conOid; HeapTuple tup; bool nulls[Natts_pg_constraint]; Datum values[Natts_pg_constraint]; ArrayType *conkeyArray; ArrayType *confkeyArray; ArrayType *conpfeqopArray; ArrayType *conppeqopArray; ArrayType *conffeqopArray; ArrayType *conexclopArray; NameData cname; int i; ObjectAddress conobject; conDesc = heap_open(ConstraintRelationId, RowExclusiveLock); Assert(constraintName); namestrcpy(&cname, constraintName); /* * Convert C arrays into Postgres arrays. */ if (constraintNKeys > 0) { Datum *conkey; conkey = (Datum *) palloc(constraintNKeys * sizeof(Datum)); for (i = 0; i < constraintNKeys; i++) conkey[i] = Int16GetDatum(constraintKey[i]); conkeyArray = construct_array(conkey, constraintNKeys, INT2OID, 2, true, 's'); } else conkeyArray = NULL; if (foreignNKeys > 0) { Datum *fkdatums; fkdatums = (Datum *) palloc(foreignNKeys * sizeof(Datum)); for (i = 0; i < foreignNKeys; i++) fkdatums[i] = Int16GetDatum(foreignKey[i]); confkeyArray = construct_array(fkdatums, foreignNKeys, INT2OID, 2, true, 's'); for (i = 0; i < foreignNKeys; i++) fkdatums[i] = ObjectIdGetDatum(pfEqOp[i]); conpfeqopArray = construct_array(fkdatums, foreignNKeys, OIDOID, sizeof(Oid), true, 'i'); for (i = 0; i < foreignNKeys; i++) fkdatums[i] = ObjectIdGetDatum(ppEqOp[i]); conppeqopArray = construct_array(fkdatums, foreignNKeys, OIDOID, sizeof(Oid), true, 'i'); for (i = 0; i < foreignNKeys; i++) fkdatums[i] = ObjectIdGetDatum(ffEqOp[i]); conffeqopArray = construct_array(fkdatums, foreignNKeys, OIDOID, sizeof(Oid), true, 'i'); } else { confkeyArray = NULL; conpfeqopArray = NULL; conppeqopArray = NULL; conffeqopArray = NULL; } if (exclOp != NULL) { Datum *opdatums; opdatums = (Datum *) palloc(constraintNKeys * sizeof(Datum)); for (i = 0; i < constraintNKeys; i++) opdatums[i] = ObjectIdGetDatum(exclOp[i]); conexclopArray = construct_array(opdatums, constraintNKeys, OIDOID, sizeof(Oid), true, 'i'); } else conexclopArray = NULL; /* initialize nulls and values */ for (i = 0; i < Natts_pg_constraint; i++) { nulls[i] = false; values[i] = (Datum) NULL; } values[Anum_pg_constraint_conname - 1] = NameGetDatum(&cname); values[Anum_pg_constraint_connamespace - 1] = ObjectIdGetDatum(constraintNamespace); values[Anum_pg_constraint_contype - 1] = CharGetDatum(constraintType); values[Anum_pg_constraint_condeferrable - 1] = BoolGetDatum(isDeferrable); values[Anum_pg_constraint_condeferred - 1] = BoolGetDatum(isDeferred); values[Anum_pg_constraint_convalidated - 1] = BoolGetDatum(isValidated); values[Anum_pg_constraint_conrelid - 1] = ObjectIdGetDatum(relId); values[Anum_pg_constraint_contypid - 1] = ObjectIdGetDatum(domainId); values[Anum_pg_constraint_conindid - 1] = ObjectIdGetDatum(indexRelId); values[Anum_pg_constraint_confrelid - 1] = ObjectIdGetDatum(foreignRelId); values[Anum_pg_constraint_confupdtype - 1] = CharGetDatum(foreignUpdateType); values[Anum_pg_constraint_confdeltype - 1] = CharGetDatum(foreignDeleteType); values[Anum_pg_constraint_confmatchtype - 1] = CharGetDatum(foreignMatchType); values[Anum_pg_constraint_conislocal - 1] = BoolGetDatum(conIsLocal); values[Anum_pg_constraint_coninhcount - 1] = Int32GetDatum(conInhCount); values[Anum_pg_constraint_connoinherit - 1] = BoolGetDatum(conNoInherit); if (conkeyArray) values[Anum_pg_constraint_conkey - 1] = PointerGetDatum(conkeyArray); else nulls[Anum_pg_constraint_conkey - 1] = true; if (confkeyArray) values[Anum_pg_constraint_confkey - 1] = PointerGetDatum(confkeyArray); else nulls[Anum_pg_constraint_confkey - 1] = true; if (conpfeqopArray) values[Anum_pg_constraint_conpfeqop - 1] = PointerGetDatum(conpfeqopArray); else nulls[Anum_pg_constraint_conpfeqop - 1] = true; if (conppeqopArray) values[Anum_pg_constraint_conppeqop - 1] = PointerGetDatum(conppeqopArray); else nulls[Anum_pg_constraint_conppeqop - 1] = true; if (conffeqopArray) values[Anum_pg_constraint_conffeqop - 1] = PointerGetDatum(conffeqopArray); else nulls[Anum_pg_constraint_conffeqop - 1] = true; if (conexclopArray) values[Anum_pg_constraint_conexclop - 1] = PointerGetDatum(conexclopArray); else nulls[Anum_pg_constraint_conexclop - 1] = true; /* * initialize the binary form of the check constraint. */ if (conBin) values[Anum_pg_constraint_conbin - 1] = CStringGetTextDatum(conBin); else nulls[Anum_pg_constraint_conbin - 1] = true; /* * initialize the text form of the check constraint */ if (conSrc) values[Anum_pg_constraint_consrc - 1] = CStringGetTextDatum(conSrc); else nulls[Anum_pg_constraint_consrc - 1] = true; tup = heap_form_tuple(RelationGetDescr(conDesc), values, nulls); conOid = simple_heap_insert(conDesc, tup); /* update catalog indexes */ CatalogUpdateIndexes(conDesc, tup); conobject.classId = ConstraintRelationId; conobject.objectId = conOid; conobject.objectSubId = 0; heap_close(conDesc, RowExclusiveLock); if (OidIsValid(relId)) { /* * Register auto dependency from constraint to owning relation, or to * specific column(s) if any are mentioned. */ ObjectAddress relobject; relobject.classId = RelationRelationId; relobject.objectId = relId; if (constraintNKeys > 0) { for (i = 0; i < constraintNKeys; i++) { relobject.objectSubId = constraintKey[i]; recordDependencyOn(&conobject, &relobject, DEPENDENCY_AUTO); } } else { relobject.objectSubId = 0; recordDependencyOn(&conobject, &relobject, DEPENDENCY_AUTO); } } if (OidIsValid(domainId)) { /* * Register auto dependency from constraint to owning domain */ ObjectAddress domobject; domobject.classId = TypeRelationId; domobject.objectId = domainId; domobject.objectSubId = 0; recordDependencyOn(&conobject, &domobject, DEPENDENCY_AUTO); } if (OidIsValid(foreignRelId)) { /* * Register normal dependency from constraint to foreign relation, or * to specific column(s) if any are mentioned. */ ObjectAddress relobject; relobject.classId = RelationRelationId; relobject.objectId = foreignRelId; if (foreignNKeys > 0) { for (i = 0; i < foreignNKeys; i++) { relobject.objectSubId = foreignKey[i]; recordDependencyOn(&conobject, &relobject, DEPENDENCY_NORMAL); } } else { relobject.objectSubId = 0; recordDependencyOn(&conobject, &relobject, DEPENDENCY_NORMAL); } } if (OidIsValid(indexRelId) && constraintType == CONSTRAINT_FOREIGN) { /* * Register normal dependency on the unique index that supports a * foreign-key constraint. (Note: for indexes associated with unique * or primary-key constraints, the dependency runs the other way, and * is not made here.) */ ObjectAddress relobject; relobject.classId = RelationRelationId; relobject.objectId = indexRelId; relobject.objectSubId = 0; recordDependencyOn(&conobject, &relobject, DEPENDENCY_NORMAL); } if (foreignNKeys > 0) { /* * Register normal dependencies on the equality operators that support * a foreign-key constraint. If the PK and FK types are the same then * all three operators for a column are the same; otherwise they are * different. */ ObjectAddress oprobject; oprobject.classId = OperatorRelationId; oprobject.objectSubId = 0; for (i = 0; i < foreignNKeys; i++) { oprobject.objectId = pfEqOp[i]; recordDependencyOn(&conobject, &oprobject, DEPENDENCY_NORMAL); if (ppEqOp[i] != pfEqOp[i]) { oprobject.objectId = ppEqOp[i]; recordDependencyOn(&conobject, &oprobject, DEPENDENCY_NORMAL); } if (ffEqOp[i] != pfEqOp[i]) { oprobject.objectId = ffEqOp[i]; recordDependencyOn(&conobject, &oprobject, DEPENDENCY_NORMAL); } } } /* * We don't bother to register dependencies on the exclusion operators of * an exclusion constraint. We assume they are members of the opclass * supporting the index, so there's an indirect dependency via that. (This * would be pretty dicey for cross-type operators, but exclusion operators * can never be cross-type.) */ if (conExpr != NULL) { /* * Register dependencies from constraint to objects mentioned in CHECK * expression. */ recordDependencyOnSingleRelExpr(&conobject, conExpr, relId, DEPENDENCY_NORMAL, DEPENDENCY_NORMAL); } /* Post creation hook for new constraint */ InvokeObjectAccessHook(OAT_POST_CREATE, ConstraintRelationId, conOid, 0, NULL); return conOid; }
/* * tuple_data_split_internal * * Split raw tuple data taken directly from a page into an array of bytea * elements. This routine does a lookup on NULL values and creates array * elements accordingly. This is a reimplementation of nocachegetattr() * in heaptuple.c simplified for educational purposes. */ static Datum tuple_data_split_internal(Oid relid, char *tupdata, uint16 tupdata_len, uint16 t_infomask, uint16 t_infomask2, bits8 *t_bits, bool do_detoast) { ArrayBuildState *raw_attrs; int nattrs; int i; int off = 0; Relation rel; TupleDesc tupdesc; /* Get tuple descriptor from relation OID */ rel = relation_open(relid, AccessShareLock); tupdesc = RelationGetDescr(rel); raw_attrs = initArrayResult(BYTEAOID, CurrentMemoryContext, false); nattrs = tupdesc->natts; if (nattrs < (t_infomask2 & HEAP_NATTS_MASK)) ereport(ERROR, (errcode(ERRCODE_DATA_CORRUPTED), errmsg("number of attributes in tuple header is greater than number of attributes in tuple descriptor"))); for (i = 0; i < nattrs; i++) { Form_pg_attribute attr; bool is_null; bytea *attr_data = NULL; attr = TupleDescAttr(tupdesc, i); /* * Tuple header can specify less attributes than tuple descriptor as * ALTER TABLE ADD COLUMN without DEFAULT keyword does not actually * change tuples in pages, so attributes with numbers greater than * (t_infomask2 & HEAP_NATTS_MASK) should be treated as NULL. */ if (i >= (t_infomask2 & HEAP_NATTS_MASK)) is_null = true; else is_null = (t_infomask & HEAP_HASNULL) && att_isnull(i, t_bits); if (!is_null) { int len; if (attr->attlen == -1) { off = att_align_pointer(off, attr->attalign, -1, tupdata + off); /* * As VARSIZE_ANY throws an exception if it can't properly * detect the type of external storage in macros VARTAG_SIZE, * this check is repeated to have a nicer error handling. */ if (VARATT_IS_EXTERNAL(tupdata + off) && !VARATT_IS_EXTERNAL_ONDISK(tupdata + off) && !VARATT_IS_EXTERNAL_INDIRECT(tupdata + off)) ereport(ERROR, (errcode(ERRCODE_DATA_CORRUPTED), errmsg("first byte of varlena attribute is incorrect for attribute %d", i))); len = VARSIZE_ANY(tupdata + off); } else { off = att_align_nominal(off, attr->attalign); len = attr->attlen; } if (tupdata_len < off + len) ereport(ERROR, (errcode(ERRCODE_DATA_CORRUPTED), errmsg("unexpected end of tuple data"))); if (attr->attlen == -1 && do_detoast) attr_data = DatumGetByteaPCopy(tupdata + off); else { attr_data = (bytea *) palloc(len + VARHDRSZ); SET_VARSIZE(attr_data, len + VARHDRSZ); memcpy(VARDATA(attr_data), tupdata + off, len); } off = att_addlength_pointer(off, attr->attlen, tupdata + off); } raw_attrs = accumArrayResult(raw_attrs, PointerGetDatum(attr_data), is_null, BYTEAOID, CurrentMemoryContext); if (attr_data) pfree(attr_data); } if (tupdata_len != off) ereport(ERROR, (errcode(ERRCODE_DATA_CORRUPTED), errmsg("end of tuple reached without looking at all its data"))); relation_close(rel, AccessShareLock); return makeArrayResult(raw_attrs, CurrentMemoryContext); }
/* * A tuple in the heap is being inserted. To keep a brin index up to date, * we need to obtain the relevant index tuple and compare its stored values * with those of the new tuple. If the tuple values are not consistent with * the summary tuple, we need to update the index tuple. * * If the range is not currently summarized (i.e. the revmap returns NULL for * it), there's nothing to do. */ Datum brininsert(PG_FUNCTION_ARGS) { Relation idxRel = (Relation) PG_GETARG_POINTER(0); Datum *values = (Datum *) PG_GETARG_POINTER(1); bool *nulls = (bool *) PG_GETARG_POINTER(2); ItemPointer heaptid = (ItemPointer) PG_GETARG_POINTER(3); /* we ignore the rest of our arguments */ BlockNumber pagesPerRange; BrinDesc *bdesc = NULL; BrinRevmap *revmap; Buffer buf = InvalidBuffer; MemoryContext tupcxt = NULL; MemoryContext oldcxt = NULL; revmap = brinRevmapInitialize(idxRel, &pagesPerRange); for (;;) { bool need_insert = false; OffsetNumber off; BrinTuple *brtup; BrinMemTuple *dtup; BlockNumber heapBlk; int keyno; #ifdef USE_ASSERT_CHECKING BrinTuple *tmptup; BrinMemTuple *tmpdtup; Size tmpsiz; #endif CHECK_FOR_INTERRUPTS(); heapBlk = ItemPointerGetBlockNumber(heaptid); /* normalize the block number to be the first block in the range */ heapBlk = (heapBlk / pagesPerRange) * pagesPerRange; brtup = brinGetTupleForHeapBlock(revmap, heapBlk, &buf, &off, NULL, BUFFER_LOCK_SHARE); /* if range is unsummarized, there's nothing to do */ if (!brtup) break; /* First time through? */ if (bdesc == NULL) { bdesc = brin_build_desc(idxRel); tupcxt = AllocSetContextCreate(CurrentMemoryContext, "brininsert cxt", ALLOCSET_DEFAULT_MINSIZE, ALLOCSET_DEFAULT_INITSIZE, ALLOCSET_DEFAULT_MAXSIZE); oldcxt = MemoryContextSwitchTo(tupcxt); } dtup = brin_deform_tuple(bdesc, brtup); #ifdef USE_ASSERT_CHECKING { /* * When assertions are enabled, we use this as an opportunity to * test the "union" method, which would otherwise be used very * rarely: first create a placeholder tuple, and addValue the * value we just got into it. Then union the existing index tuple * with the updated placeholder tuple. The tuple resulting from * that union should be identical to the one resulting from the * regular operation (straight addValue) below. * * Here we create the tuple to compare with; the actual comparison * is below. */ tmptup = brin_form_placeholder_tuple(bdesc, heapBlk, &tmpsiz); tmpdtup = brin_deform_tuple(bdesc, tmptup); for (keyno = 0; keyno < bdesc->bd_tupdesc->natts; keyno++) { BrinValues *bval; FmgrInfo *addValue; bval = &tmpdtup->bt_columns[keyno]; addValue = index_getprocinfo(idxRel, keyno + 1, BRIN_PROCNUM_ADDVALUE); FunctionCall4Coll(addValue, idxRel->rd_indcollation[keyno], PointerGetDatum(bdesc), PointerGetDatum(bval), values[keyno], nulls[keyno]); } union_tuples(bdesc, tmpdtup, brtup); tmpdtup->bt_placeholder = dtup->bt_placeholder; tmptup = brin_form_tuple(bdesc, heapBlk, tmpdtup, &tmpsiz); } #endif /* * Compare the key values of the new tuple to the stored index values; * our deformed tuple will get updated if the new tuple doesn't fit * the original range (note this means we can't break out of the loop * early). Make a note of whether this happens, so that we know to * insert the modified tuple later. */ for (keyno = 0; keyno < bdesc->bd_tupdesc->natts; keyno++) { Datum result; BrinValues *bval; FmgrInfo *addValue; bval = &dtup->bt_columns[keyno]; addValue = index_getprocinfo(idxRel, keyno + 1, BRIN_PROCNUM_ADDVALUE); result = FunctionCall4Coll(addValue, idxRel->rd_indcollation[keyno], PointerGetDatum(bdesc), PointerGetDatum(bval), values[keyno], nulls[keyno]); /* if that returned true, we need to insert the updated tuple */ need_insert |= DatumGetBool(result); } #ifdef USE_ASSERT_CHECKING { /* * Now we can compare the tuple produced by the union function * with the one from plain addValue. */ BrinTuple *cmptup; Size cmpsz; cmptup = brin_form_tuple(bdesc, heapBlk, dtup, &cmpsz); Assert(brin_tuples_equal(tmptup, tmpsiz, cmptup, cmpsz)); } #endif if (!need_insert) { /* * The tuple is consistent with the new values, so there's nothing * to do. */ LockBuffer(buf, BUFFER_LOCK_UNLOCK); } else { Page page = BufferGetPage(buf); ItemId lp = PageGetItemId(page, off); Size origsz; BrinTuple *origtup; Size newsz; BrinTuple *newtup; bool samepage; /* * Make a copy of the old tuple, so that we can compare it after * re-acquiring the lock. */ origsz = ItemIdGetLength(lp); origtup = brin_copy_tuple(brtup, origsz); /* * Before releasing the lock, check if we can attempt a same-page * update. Another process could insert a tuple concurrently in * the same page though, so downstream we must be prepared to cope * if this turns out to not be possible after all. */ newtup = brin_form_tuple(bdesc, heapBlk, dtup, &newsz); samepage = brin_can_do_samepage_update(buf, origsz, newsz); LockBuffer(buf, BUFFER_LOCK_UNLOCK); /* * Try to update the tuple. If this doesn't work for whatever * reason, we need to restart from the top; the revmap might be * pointing at a different tuple for this block now, so we need to * recompute to ensure both our new heap tuple and the other * inserter's are covered by the combined tuple. It might be that * we don't need to update at all. */ if (!brin_doupdate(idxRel, pagesPerRange, revmap, heapBlk, buf, off, origtup, origsz, newtup, newsz, samepage)) { /* no luck; start over */ MemoryContextResetAndDeleteChildren(tupcxt); continue; } } /* success! */ break; } brinRevmapTerminate(revmap); if (BufferIsValid(buf)) ReleaseBuffer(buf); if (bdesc != NULL) { brin_free_desc(bdesc); MemoryContextSwitchTo(oldcxt); MemoryContextDelete(tupcxt); } return BoolGetDatum(false); }
/* * CREATE SCHEMA */ Oid CreateSchemaCommand(CreateSchemaStmt *stmt, const char *queryString) { const char *schemaName = stmt->schemaname; const char *authId = stmt->authid; Oid namespaceId; OverrideSearchPath *overridePath; List *parsetree_list; ListCell *parsetree_item; Oid owner_uid; Oid saved_uid; int save_sec_context; AclResult aclresult; GetUserIdAndSecContext(&saved_uid, &save_sec_context); /* * Who is supposed to own the new schema? */ if (authId) owner_uid = get_role_oid(authId, false); else owner_uid = saved_uid; /* * To create a schema, must have schema-create privilege on the current * database and must be able to become the target role (this does not * imply that the target role itself must have create-schema privilege). * The latter provision guards against "giveaway" attacks. Note that a * superuser will always have both of these privileges a fortiori. */ aclresult = pg_database_aclcheck(MyDatabaseId, saved_uid, ACL_CREATE); if (aclresult != ACLCHECK_OK) aclcheck_error(aclresult, ACL_KIND_DATABASE, get_database_name(MyDatabaseId)); check_is_member_of_role(saved_uid, owner_uid); /* Additional check to protect reserved schema names */ if (!allowSystemTableMods && IsReservedName(schemaName)) ereport(ERROR, (errcode(ERRCODE_RESERVED_NAME), errmsg("unacceptable schema name \"%s\"", schemaName), errdetail("The prefix \"pg_\" is reserved for system schemas."))); /* * If if_not_exists was given and the schema already exists, bail out. * (Note: we needn't check this when not if_not_exists, because * NamespaceCreate will complain anyway.) We could do this before making * the permissions checks, but since CREATE TABLE IF NOT EXISTS makes its * creation-permission check first, we do likewise. */ if (stmt->if_not_exists && SearchSysCacheExists1(NAMESPACENAME, PointerGetDatum(schemaName))) { ereport(NOTICE, (errcode(ERRCODE_DUPLICATE_SCHEMA), errmsg("schema \"%s\" already exists, skipping", schemaName))); return InvalidOid; } /* * If the requested authorization is different from the current user, * temporarily set the current user so that the object(s) will be created * with the correct ownership. * * (The setting will be restored at the end of this routine, or in case of * error, transaction abort will clean things up.) */ if (saved_uid != owner_uid) SetUserIdAndSecContext(owner_uid, save_sec_context | SECURITY_LOCAL_USERID_CHANGE); /* Create the schema's namespace */ namespaceId = NamespaceCreate(schemaName, owner_uid, false); /* Advance cmd counter to make the namespace visible */ CommandCounterIncrement(); /* * Temporarily make the new namespace be the front of the search path, as * well as the default creation target namespace. This will be undone at * the end of this routine, or upon error. */ overridePath = GetOverrideSearchPath(CurrentMemoryContext); overridePath->schemas = lcons_oid(namespaceId, overridePath->schemas); /* XXX should we clear overridePath->useTemp? */ PushOverrideSearchPath(overridePath); /* * Examine the list of commands embedded in the CREATE SCHEMA command, and * reorganize them into a sequentially executable order with no forward * references. Note that the result is still a list of raw parsetrees --- * we cannot, in general, run parse analysis on one statement until we * have actually executed the prior ones. */ parsetree_list = transformCreateSchemaStmt(stmt); /* * Execute each command contained in the CREATE SCHEMA. Since the grammar * allows only utility commands in CREATE SCHEMA, there is no need to pass * them through parse_analyze() or the rewriter; we can just hand them * straight to ProcessUtility. */ foreach(parsetree_item, parsetree_list) { Node *stmt = (Node *) lfirst(parsetree_item); /* do this step */ ProcessUtility(stmt, queryString, PROCESS_UTILITY_SUBCOMMAND, NULL, None_Receiver, NULL); /* make sure later steps can see the object created here */ CommandCounterIncrement(); }
/* * Initialize the TABLESAMPLE Descriptor and the TABLESAMPLE Method. */ TableSampleDesc * tablesample_init(SampleScanState *scanstate, TableSampleClause *tablesample) { FunctionCallInfoData fcinfo; int i; List *args = tablesample->args; ListCell *arg; ExprContext *econtext = scanstate->ss.ps.ps_ExprContext; TableSampleDesc *tsdesc = (TableSampleDesc *) palloc0(sizeof(TableSampleDesc)); /* Load functions */ fmgr_info(tablesample->tsminit, &(tsdesc->tsminit)); fmgr_info(tablesample->tsmnextblock, &(tsdesc->tsmnextblock)); fmgr_info(tablesample->tsmnexttuple, &(tsdesc->tsmnexttuple)); if (OidIsValid(tablesample->tsmexaminetuple)) fmgr_info(tablesample->tsmexaminetuple, &(tsdesc->tsmexaminetuple)); else tsdesc->tsmexaminetuple.fn_oid = InvalidOid; fmgr_info(tablesample->tsmreset, &(tsdesc->tsmreset)); fmgr_info(tablesample->tsmend, &(tsdesc->tsmend)); InitFunctionCallInfoData(fcinfo, &tsdesc->tsminit, list_length(args) + 2, InvalidOid, NULL, NULL); tsdesc->tupDesc = scanstate->ss.ss_ScanTupleSlot->tts_tupleDescriptor; tsdesc->heapScan = scanstate->ss.ss_currentScanDesc; /* First argument for init function is always TableSampleDesc */ fcinfo.arg[0] = PointerGetDatum(tsdesc); fcinfo.argnull[0] = false; /* * Second arg for init function is always REPEATABLE * When tablesample->repeatable is NULL then REPEATABLE clause was not * specified. * When specified, the expression cannot evaluate to NULL. */ if (tablesample->repeatable) { ExprState *argstate = ExecInitExpr((Expr *) tablesample->repeatable, (PlanState *) scanstate); fcinfo.arg[1] = ExecEvalExpr(argstate, econtext, &fcinfo.argnull[1], NULL); if (fcinfo.argnull[1]) ereport(ERROR, (errcode(ERRCODE_NULL_VALUE_NOT_ALLOWED), errmsg("REPEATABLE clause must be NOT NULL numeric value"))); } else { fcinfo.arg[1] = UInt32GetDatum(random()); fcinfo.argnull[1] = false; } /* Rest of the arguments come from user. */ i = 2; foreach(arg, args) { Expr *argexpr = (Expr *) lfirst(arg); ExprState *argstate = ExecInitExpr(argexpr, (PlanState *) scanstate); if (argstate == NULL) { fcinfo.argnull[i] = true; fcinfo.arg[i] = (Datum) 0;; } fcinfo.arg[i] = ExecEvalExpr(argstate, econtext, &fcinfo.argnull[i], NULL); i++; }
Datum plpython_call_handler(PG_FUNCTION_ARGS) { Datum retval; PLyExecutionContext *exec_ctx; ErrorContextCallback plerrcontext; PLy_initialize(); /* Note: SPI_finish() happens in plpy_exec.c, which is dubious design */ if (SPI_connect() != SPI_OK_CONNECT) elog(ERROR, "SPI_connect failed"); /* * Push execution context onto stack. It is important that this get * popped again, so avoid putting anything that could throw error between * here and the PG_TRY. */ exec_ctx = PLy_push_execution_context(); PG_TRY(); { Oid funcoid = fcinfo->flinfo->fn_oid; PLyProcedure *proc; /* * Setup error traceback support for ereport(). Note that the PG_TRY * structure pops this for us again at exit, so we needn't do that * explicitly, nor do we risk the callback getting called after we've * destroyed the exec_ctx. */ plerrcontext.callback = plpython_error_callback; plerrcontext.arg = exec_ctx; plerrcontext.previous = error_context_stack; error_context_stack = &plerrcontext; if (CALLED_AS_TRIGGER(fcinfo)) { Relation tgrel = ((TriggerData *) fcinfo->context)->tg_relation; HeapTuple trv; proc = PLy_procedure_get(funcoid, RelationGetRelid(tgrel), true); exec_ctx->curr_proc = proc; trv = PLy_exec_trigger(fcinfo, proc); retval = PointerGetDatum(trv); } else { proc = PLy_procedure_get(funcoid, InvalidOid, false); exec_ctx->curr_proc = proc; retval = PLy_exec_function(fcinfo, proc); } } PG_CATCH(); { PLy_pop_execution_context(); PyErr_Clear(); PG_RE_THROW(); } PG_END_TRY(); /* Destroy the execution context */ PLy_pop_execution_context(); return retval; }
/* * ConversionCreate * * Add a new tuple to pg_conversion. */ Oid ConversionCreate(const char *conname, Oid connamespace, Oid conowner, int32 conforencoding, int32 contoencoding, Oid conproc, bool def, Oid newOid) { int i; Relation rel; HeapTuple tup; bool nulls[Natts_pg_conversion]; Datum values[Natts_pg_conversion]; NameData cname; Oid oid; ObjectAddress myself, referenced; cqContext cqc; cqContext *pcqCtx; /* sanity checks */ if (!conname) elog(ERROR, "no conversion name supplied"); /* open pg_conversion */ rel = heap_open(ConversionRelationId, RowExclusiveLock); /* make sure there is no existing conversion of same name */ if (caql_getcount( caql_addrel(cqclr(&cqc), rel), cql("SELECT COUNT(*) FROM pg_conversion " " WHERE conname = :1 " " AND connamespace = :2 ", PointerGetDatum((char *) conname), ObjectIdGetDatum(connamespace)))) { ereport(ERROR, (errcode(ERRCODE_DUPLICATE_OBJECT), errmsg("conversion \"%s\" already exists", conname), errOmitLocation(true))); } if (def) { /* * make sure there is no existing default <for encoding><to encoding> * pair in this name space */ if (FindDefaultConversion(connamespace, conforencoding, contoencoding)) ereport(ERROR, (errcode(ERRCODE_DUPLICATE_OBJECT), errmsg("default conversion for %s to %s already exists", pg_encoding_to_char(conforencoding), pg_encoding_to_char(contoencoding)), errOmitLocation(true))); } pcqCtx = caql_beginscan( caql_addrel(cqclr(&cqc), rel), cql("INSERT INTO pg_conversion", NULL)); /* initialize nulls and values */ for (i = 0; i < Natts_pg_conversion; i++) { nulls[i] = false; values[i] = (Datum) 0; } /* form a tuple */ namestrcpy(&cname, conname); values[Anum_pg_conversion_conname - 1] = NameGetDatum(&cname); values[Anum_pg_conversion_connamespace - 1] = ObjectIdGetDatum(connamespace); values[Anum_pg_conversion_conowner - 1] = ObjectIdGetDatum(conowner); values[Anum_pg_conversion_conforencoding - 1] = Int32GetDatum(conforencoding); values[Anum_pg_conversion_contoencoding - 1] = Int32GetDatum(contoencoding); values[Anum_pg_conversion_conproc - 1] = ObjectIdGetDatum(conproc); values[Anum_pg_conversion_condefault - 1] = BoolGetDatum(def); tup = caql_form_tuple(pcqCtx, values, nulls); if (newOid != 0) HeapTupleSetOid(tup, newOid); /* insert a new tuple */ oid = caql_insert(pcqCtx, tup); /* implicit update of index as well */ Assert(OidIsValid(oid)); myself.classId = ConversionRelationId; myself.objectId = HeapTupleGetOid(tup); myself.objectSubId = 0; /* create dependency on conversion procedure */ referenced.classId = ProcedureRelationId; referenced.objectId = conproc; referenced.objectSubId = 0; recordDependencyOn(&myself, &referenced, DEPENDENCY_NORMAL); /* create dependency on namespace */ referenced.classId = NamespaceRelationId; referenced.objectId = connamespace; referenced.objectSubId = 0; recordDependencyOn(&myself, &referenced, DEPENDENCY_NORMAL); /* create dependency on owner */ recordDependencyOnOwner(ConversionRelationId, HeapTupleGetOid(tup), conowner); heap_freetuple(tup); caql_endscan(pcqCtx); heap_close(rel, RowExclusiveLock); return oid; }
Datum get_instance_memory_stats(PG_FUNCTION_ARGS) { FuncCallContext *funcctx; InstanceState *state; Datum values[7]; bool nulls[7]; HeapTuple tuple; MemoryContextStat *ContextStat; if (MyBackendProcNo < 0) ereport(ERROR, (errcode(ERRCODE_CONFIG_FILE_ERROR), errmsg("memory stat collection isn't worked"), errhint("add memstat to shared_preload_libraries"))); if (SRF_IS_FIRSTCALL()) { TupleDesc tupdesc; MemoryContext oldcontext; funcctx = SRF_FIRSTCALL_INIT(); oldcontext = MemoryContextSwitchTo(funcctx->multi_call_memory_ctx); /* Build a tuple descriptor for our result type */ if (get_call_result_type(fcinfo, NULL, &tupdesc) != TYPEFUNC_COMPOSITE) ereport(ERROR, (errcode(ERRCODE_FEATURE_NOT_SUPPORTED), errmsg("function returning record called in context " "that cannot accept type record"))); funcctx->tuple_desc = BlessTupleDesc(tupdesc); state = palloc0(sizeof(*state)); state->iBackend = 0; /* * we make a copy of backend stat struct to prevent lossing stat * on the fly if that backend will exit while we are printing it */ state->stat = palloc(BMSSIZE); funcctx->user_fctx = state; MemoryContextSwitchTo(oldcontext); /* at least our backend will be in list */ copyBackendMemoryStat(state, 0); } funcctx = SRF_PERCALL_SETUP(); state = (InstanceState*) funcctx->user_fctx; if (state->iContext >= state->stat->nContext) { /* got to the text slot */ if (copyBackendMemoryStat(state, state->iBackend + 1) == false) SRF_RETURN_DONE(funcctx); } ContextStat = state->stat->stats + state->iContext; memset(nulls, 0, sizeof(nulls)); /* Fill data */ values[0] = Int32GetDatum(state->stat->pid); values[1] = PointerGetDatum(cstring_to_text(ContextStat->name.data)); values[2] = Int32GetDatum(ContextStat->level); values[3] = Int64GetDatum(ContextStat->stat.nblocks); values[4] = Int64GetDatum(ContextStat->stat.freechunks); values[5] = Int64GetDatum(ContextStat->stat.totalspace); values[6] = Int64GetDatum(ContextStat->stat.freespace); /* Data are ready */ tuple = heap_form_tuple(funcctx->tuple_desc, values, nulls); /* go next context */ state->iContext++; SRF_RETURN_NEXT(funcctx, HeapTupleGetDatum(tuple)); }
/* * AggregateCreateWithOid */ Oid AggregateCreateWithOid(const char *aggName, Oid aggNamespace, Oid *aggArgTypes, int numArgs, List *aggtransfnName, List *aggprelimfnName, List *aggfinalfnName, List *aggsortopName, Oid aggTransType, const char *agginitval, bool aggordered, Oid procOid) { Relation aggdesc; HeapTuple tup; bool nulls[Natts_pg_aggregate]; Datum values[Natts_pg_aggregate]; Form_pg_proc proc; Oid transfn; Oid invtransfn = InvalidOid; /* MPP windowing optimization */ Oid prelimfn = InvalidOid; /* if omitted, disables MPP 2-stage for this aggregate */ Oid invprelimfn = InvalidOid; /* MPP windowing optimization */ Oid finalfn = InvalidOid; /* can be omitted */ Oid sortop = InvalidOid; /* can be omitted */ bool hasPolyArg; bool hasInternalArg; Oid rettype; Oid finaltype; Oid prelimrettype; Oid *fnArgs; int nargs_transfn; TupleDesc tupDesc; int i; ObjectAddress myself, referenced; /* sanity checks (caller should have caught these) */ if (!aggName) elog(ERROR, "no aggregate name supplied"); if (!aggtransfnName) elog(ERROR, "aggregate must have a transition function"); /* check for polymorphic arguments and INTERNAL arguments */ hasPolyArg = false; hasInternalArg = false; for (i = 0; i < numArgs; i++) { if (IsPolymorphicType(aggArgTypes[i])) hasPolyArg = true; else if (aggArgTypes[i] == INTERNALOID) hasInternalArg = true; } /* * If transtype is polymorphic, must have polymorphic argument also; else * we will have no way to deduce the actual transtype. */ if (IsPolymorphicType(aggTransType) && !hasPolyArg) ereport(ERROR, (errcode(ERRCODE_INVALID_FUNCTION_DEFINITION), errmsg("cannot determine transition data type"), errdetail("An aggregate using a polymorphic transition type must have at least one polymorphic argument."))); /* find the transfn */ nargs_transfn = numArgs + 1; fnArgs = (Oid *) palloc(nargs_transfn * sizeof(Oid)); fnArgs[0] = aggTransType; memcpy(fnArgs + 1, aggArgTypes, numArgs * sizeof(Oid)); transfn = lookup_agg_function(aggtransfnName, nargs_transfn, fnArgs, &rettype); elog(DEBUG5,"AggregateCreateWithOid: successfully located transition " "function %s with return type %d", func_signature_string(aggtransfnName, nargs_transfn, fnArgs), rettype); /* * Return type of transfn (possibly after refinement by * enforce_generic_type_consistency, if transtype isn't polymorphic) must * exactly match declared transtype. * * In the non-polymorphic-transtype case, it might be okay to allow a * rettype that's binary-coercible to transtype, but I'm not quite * convinced that it's either safe or useful. When transtype is * polymorphic we *must* demand exact equality. */ if (rettype != aggTransType) ereport(ERROR, (errcode(ERRCODE_DATATYPE_MISMATCH), errmsg("return type of transition function %s is not %s", NameListToString(aggtransfnName), format_type_be(aggTransType)))); tup = SearchSysCache(PROCOID, ObjectIdGetDatum(transfn), 0, 0, 0); if (!HeapTupleIsValid(tup)) elog(ERROR, "cache lookup failed for function %u", transfn); proc = (Form_pg_proc) GETSTRUCT(tup); /* * If the transfn is strict and the initval is NULL, make sure first input * type and transtype are the same (or at least binary-compatible), so * that it's OK to use the first input value as the initial transValue. */ if (proc->proisstrict && agginitval == NULL) { if (numArgs < 1 || !IsBinaryCoercible(aggArgTypes[0], aggTransType)) ereport(ERROR, (errcode(ERRCODE_INVALID_FUNCTION_DEFINITION), errmsg("must not omit initial value when transition function is strict and transition type is not compatible with input type"))); } ReleaseSysCache(tup); /* handle prelimfn, if supplied */ if (aggprelimfnName) { /* * The preliminary state function (pfunc) input arguments are the results of the * state transition function (sfunc) and therefore must be of the same types. */ fnArgs[0] = rettype; fnArgs[1] = rettype; /* * Check that such a function name and prototype exists in the catalog. */ prelimfn = lookup_agg_function(aggprelimfnName, 2, fnArgs, &prelimrettype); elog(DEBUG5,"AggregateCreateWithOid: successfully located preliminary " "function %s with return type %d", func_signature_string(aggprelimfnName, 2, fnArgs), prelimrettype); Assert(OidIsValid(prelimrettype)); /* * The preliminary return type must be of the same type as the internal * state. (See similar error checking for transition types above) */ if (prelimrettype != rettype) ereport(ERROR, (errcode(ERRCODE_DATATYPE_MISMATCH), errmsg("return type of preliminary function %s is not %s", NameListToString(aggprelimfnName), format_type_be(rettype)))); } /* handle finalfn, if supplied */ if (aggfinalfnName) { fnArgs[0] = aggTransType; finalfn = lookup_agg_function(aggfinalfnName, 1, fnArgs, &finaltype); } else { /* * If no finalfn, aggregate result type is type of the state value */ finaltype = aggTransType; } Assert(OidIsValid(finaltype)); /* * If finaltype (i.e. aggregate return type) is polymorphic, inputs must * be polymorphic also, else parser will fail to deduce result type. * (Note: given the previous test on transtype and inputs, this cannot * happen, unless someone has snuck a finalfn definition into the catalogs * that itself violates the rule against polymorphic result with no * polymorphic input.) */ if (IsPolymorphicType(finaltype) && !hasPolyArg) ereport(ERROR, (errcode(ERRCODE_DATATYPE_MISMATCH), errmsg("cannot determine result data type"), errdetail("An aggregate returning a polymorphic type " "must have at least one polymorphic argument."))); /* * Also, the return type can't be INTERNAL unless there's at least one * INTERNAL argument. This is the same type-safety restriction we * enforce for regular functions, but at the level of aggregates. We * must test this explicitly because we allow INTERNAL as the transtype. */ if (finaltype == INTERNALOID && !hasInternalArg) ereport(ERROR, (errcode(ERRCODE_INVALID_FUNCTION_DEFINITION), errmsg("unsafe use of pseudo-type \"internal\""), errdetail("A function returning \"internal\" must have at least one \"internal\" argument."))); /* handle sortop, if supplied */ if (aggsortopName) { if (numArgs != 1) ereport(ERROR, (errcode(ERRCODE_INVALID_FUNCTION_DEFINITION), errmsg("sort operator can only be specified for single-argument aggregates"))); sortop = LookupOperName(NULL, aggsortopName, aggArgTypes[0], aggArgTypes[0], false, -1); } /* * Everything looks okay. Try to create the pg_proc entry for the * aggregate. (This could fail if there's already a conflicting entry.) */ procOid = ProcedureCreate(aggName, aggNamespace, false, /* no replacement */ false, /* doesn't return a set */ finaltype, /* returnType */ INTERNALlanguageId, /* languageObjectId */ InvalidOid, /* no validator */ InvalidOid, /* no describe function */ "aggregate_dummy", /* placeholder proc */ NULL, /* probin */ true, /* isAgg */ false, /* isWin */ false, /* security invoker (currently not * definable for agg) */ false, /* isStrict (not needed for agg) */ PROVOLATILE_IMMUTABLE, /* volatility (not * needed for agg) */ buildoidvector(aggArgTypes, numArgs), /* paramTypes */ PointerGetDatum(NULL), /* allParamTypes */ PointerGetDatum(NULL), /* parameterModes */ PointerGetDatum(NULL), /* parameterNames */ NIL, /* parameterDefaults */ PointerGetDatum(NULL), /* proconfig */ 1, /* procost */ 0, /* prorows */ PRODATAACCESS_NONE, /* prodataaccess */ procOid); /* * Okay to create the pg_aggregate entry. */ /* initialize nulls and values */ for (i = 0; i < Natts_pg_aggregate; i++) { nulls[i] = false; values[i] = (Datum) 0; } values[Anum_pg_aggregate_aggfnoid - 1] = ObjectIdGetDatum(procOid); values[Anum_pg_aggregate_aggtransfn - 1] = ObjectIdGetDatum(transfn); values[Anum_pg_aggregate_agginvtransfn - 1] = ObjectIdGetDatum(invtransfn); values[Anum_pg_aggregate_aggprelimfn - 1] = ObjectIdGetDatum(prelimfn); values[Anum_pg_aggregate_agginvprelimfn - 1] = ObjectIdGetDatum(invprelimfn); values[Anum_pg_aggregate_aggfinalfn - 1] = ObjectIdGetDatum(finalfn); values[Anum_pg_aggregate_aggsortop - 1] = ObjectIdGetDatum(sortop); values[Anum_pg_aggregate_aggtranstype - 1] = ObjectIdGetDatum(aggTransType); if (agginitval) values[Anum_pg_aggregate_agginitval - 1] = CStringGetTextDatum(agginitval); else nulls[Anum_pg_aggregate_agginitval - 1] = true; values[Anum_pg_aggregate_aggordered - 1] = BoolGetDatum(aggordered); aggdesc = heap_open(AggregateRelationId, RowExclusiveLock); tupDesc = aggdesc->rd_att; tup = heap_form_tuple(tupDesc, values, nulls); simple_heap_insert(aggdesc, tup); CatalogUpdateIndexes(aggdesc, tup); heap_close(aggdesc, RowExclusiveLock); /* * Create dependencies for the aggregate (above and beyond those already * made by ProcedureCreate). Note: we don't need an explicit dependency * on aggTransType since we depend on it indirectly through transfn. */ myself.classId = ProcedureRelationId; myself.objectId = procOid; myself.objectSubId = 0; /* Depends on transition function */ referenced.classId = ProcedureRelationId; referenced.objectId = transfn; referenced.objectSubId = 0; recordDependencyOn(&myself, &referenced, DEPENDENCY_NORMAL); /* Depends on inverse transition function, if any */ if (OidIsValid(invtransfn)) { referenced.classId = ProcedureRelationId; referenced.objectId = invtransfn; referenced.objectSubId = 0; recordDependencyOn(&myself, &referenced, DEPENDENCY_NORMAL); } /* Depends on preliminary aggregation function, if any */ if (OidIsValid(prelimfn)) { referenced.classId = ProcedureRelationId; referenced.objectId = prelimfn; referenced.objectSubId = 0; recordDependencyOn(&myself, &referenced, DEPENDENCY_NORMAL); } /* Depends on inverse preliminary aggregation function, if any */ if (OidIsValid(invprelimfn)) { referenced.classId = ProcedureRelationId; referenced.objectId = invprelimfn; referenced.objectSubId = 0; recordDependencyOn(&myself, &referenced, DEPENDENCY_NORMAL); } /* Depends on final function, if any */ if (OidIsValid(finalfn)) { referenced.classId = ProcedureRelationId; referenced.objectId = finalfn; referenced.objectSubId = 0; recordDependencyOn(&myself, &referenced, DEPENDENCY_NORMAL); } /* Depends on sort operator, if any */ if (OidIsValid(sortop)) { referenced.classId = OperatorRelationId; referenced.objectId = sortop; referenced.objectSubId = 0; recordDependencyOn(&myself, &referenced, DEPENDENCY_NORMAL); } return procOid; }
Datum get_local_memory_stats(PG_FUNCTION_ARGS) { FuncCallContext *funcctx; MemoryContextIteratorState *state; if (SRF_IS_FIRSTCALL()) { TupleDesc tupdesc; MemoryContext oldcontext; funcctx = SRF_FIRSTCALL_INIT(); oldcontext = MemoryContextSwitchTo(funcctx->multi_call_memory_ctx); /* Build a tuple descriptor for our result type */ if (get_call_result_type(fcinfo, NULL, &tupdesc) != TYPEFUNC_COMPOSITE) ereport(ERROR, (errcode(ERRCODE_FEATURE_NOT_SUPPORTED), errmsg("function returning record called in context " "that cannot accept type record"))); funcctx->tuple_desc = BlessTupleDesc(tupdesc); state = palloc0(sizeof(*state)); state->context = TopMemoryContext; funcctx->user_fctx = state; MemoryContextSwitchTo(oldcontext); } funcctx = SRF_PERCALL_SETUP(); state = (MemoryContextIteratorState*) funcctx->user_fctx; if (state && state->context) { Datum values[6]; bool nulls[6]; HeapTuple tuple; MemoryContextCounters stat; getMemoryContextStat(state->context, &stat); memset(nulls, 0, sizeof(nulls)); /* Fill data */ values[0] = PointerGetDatum(cstring_to_text(state->context->name)); values[1] = Int32GetDatum(state->level); values[2] = Int64GetDatum(stat.nblocks); values[3] = Int64GetDatum(stat.freechunks); values[4] = Int64GetDatum(stat.totalspace); values[5] = Int64GetDatum(stat.freespace); /* Data are ready */ tuple = heap_form_tuple(funcctx->tuple_desc, values, nulls); /* go next context */ iterateMemoryContext(state); SRF_RETURN_NEXT(funcctx, HeapTupleGetDatum(tuple)); } else { SRF_RETURN_DONE(funcctx); } }
/* ** The GiST PickSplit method for boxes ** We use Guttman's poly time split algorithm */ Datum g_cube_picksplit(PG_FUNCTION_ARGS) { GistEntryVector *entryvec = (GistEntryVector *) PG_GETARG_POINTER(0); GIST_SPLITVEC *v = (GIST_SPLITVEC *) PG_GETARG_POINTER(1); OffsetNumber i, j; NDBOX *datum_alpha, *datum_beta; NDBOX *datum_l, *datum_r; NDBOX *union_d, *union_dl, *union_dr; NDBOX *inter_d; bool firsttime; double size_alpha, size_beta, size_union, size_inter; double size_waste, waste; double size_l, size_r; int nbytes; OffsetNumber seed_1 = 1, seed_2 = 2; OffsetNumber *left, *right; OffsetNumber maxoff; /* * fprintf(stderr, "picksplit\n"); */ maxoff = entryvec->n - 2; nbytes = (maxoff + 2) * sizeof(OffsetNumber); v->spl_left = (OffsetNumber *) palloc(nbytes); v->spl_right = (OffsetNumber *) palloc(nbytes); firsttime = true; waste = 0.0; for (i = FirstOffsetNumber; i < maxoff; i = OffsetNumberNext(i)) { datum_alpha = DatumGetNDBOX(entryvec->vector[i].key); for (j = OffsetNumberNext(i); j <= maxoff; j = OffsetNumberNext(j)) { datum_beta = DatumGetNDBOX(entryvec->vector[j].key); /* compute the wasted space by unioning these guys */ /* size_waste = size_union - size_inter; */ union_d = cube_union_v0(datum_alpha, datum_beta); rt_cube_size(union_d, &size_union); inter_d = DatumGetNDBOX(DirectFunctionCall2(cube_inter, entryvec->vector[i].key, entryvec->vector[j].key)); rt_cube_size(inter_d, &size_inter); size_waste = size_union - size_inter; /* * are these a more promising split than what we've already seen? */ if (size_waste > waste || firsttime) { waste = size_waste; seed_1 = i; seed_2 = j; firsttime = false; } } } left = v->spl_left; v->spl_nleft = 0; right = v->spl_right; v->spl_nright = 0; datum_alpha = DatumGetNDBOX(entryvec->vector[seed_1].key); datum_l = cube_union_v0(datum_alpha, datum_alpha); rt_cube_size(datum_l, &size_l); datum_beta = DatumGetNDBOX(entryvec->vector[seed_2].key); datum_r = cube_union_v0(datum_beta, datum_beta); rt_cube_size(datum_r, &size_r); /* * Now split up the regions between the two seeds. An important property * of this split algorithm is that the split vector v has the indices of * items to be split in order in its left and right vectors. We exploit * this property by doing a merge in the code that actually splits the * page. * * For efficiency, we also place the new index tuple in this loop. This is * handled at the very end, when we have placed all the existing tuples * and i == maxoff + 1. */ maxoff = OffsetNumberNext(maxoff); for (i = FirstOffsetNumber; i <= maxoff; i = OffsetNumberNext(i)) { /* * If we've already decided where to place this item, just put it on * the right list. Otherwise, we need to figure out which page needs * the least enlargement in order to store the item. */ if (i == seed_1) { *left++ = i; v->spl_nleft++; continue; } else if (i == seed_2) { *right++ = i; v->spl_nright++; continue; } /* okay, which page needs least enlargement? */ datum_alpha = DatumGetNDBOX(entryvec->vector[i].key); union_dl = cube_union_v0(datum_l, datum_alpha); union_dr = cube_union_v0(datum_r, datum_alpha); rt_cube_size(union_dl, &size_alpha); rt_cube_size(union_dr, &size_beta); /* pick which page to add it to */ if (size_alpha - size_l < size_beta - size_r) { datum_l = union_dl; size_l = size_alpha; *left++ = i; v->spl_nleft++; } else { datum_r = union_dr; size_r = size_beta; *right++ = i; v->spl_nright++; } } *left = *right = FirstOffsetNumber; /* sentinel value, see dosplit() */ v->spl_ldatum = PointerGetDatum(datum_l); v->spl_rdatum = PointerGetDatum(datum_r); PG_RETURN_POINTER(v); }
/* * ExecIndexBuildScanKeys * Build the index scan keys from the index qualification expressions * * The index quals are passed to the index AM in the form of a ScanKey array. * This routine sets up the ScanKeys, fills in all constant fields of the * ScanKeys, and prepares information about the keys that have non-constant * comparison values. We divide index qual expressions into five types: * * 1. Simple operator with constant comparison value ("indexkey op constant"). * For these, we just fill in a ScanKey containing the constant value. * * 2. Simple operator with non-constant value ("indexkey op expression"). * For these, we create a ScanKey with everything filled in except the * expression value, and set up an IndexRuntimeKeyInfo struct to drive * evaluation of the expression at the right times. * * 3. RowCompareExpr ("(indexkey, indexkey, ...) op (expr, expr, ...)"). * For these, we create a header ScanKey plus a subsidiary ScanKey array, * as specified in access/skey.h. The elements of the row comparison * can have either constant or non-constant comparison values. * * 4. ScalarArrayOpExpr ("indexkey op ANY (array-expression)"). If the index * has rd_am->amsearcharray, we handle these the same as simple operators, * setting the SK_SEARCHARRAY flag to tell the AM to handle them. Otherwise, * we create a ScanKey with everything filled in except the comparison value, * and set up an IndexArrayKeyInfo struct to drive processing of the qual. * (Note that if we use an IndexArrayKeyInfo struct, the array expression is * always treated as requiring runtime evaluation, even if it's a constant.) * * 5. NullTest ("indexkey IS NULL/IS NOT NULL"). We just fill in the * ScanKey properly. * * This code is also used to prepare ORDER BY expressions for amcanorderbyop * indexes. The behavior is exactly the same, except that we have to look up * the operator differently. Note that only cases 1 and 2 are currently * possible for ORDER BY. * * Input params are: * * planstate: executor state node we are working for * index: the index we are building scan keys for * quals: indexquals (or indexorderbys) expressions * isorderby: true if processing ORDER BY exprs, false if processing quals * *runtimeKeys: ptr to pre-existing IndexRuntimeKeyInfos, or NULL if none * *numRuntimeKeys: number of pre-existing runtime keys * * Output params are: * * *scanKeys: receives ptr to array of ScanKeys * *numScanKeys: receives number of scankeys * *runtimeKeys: receives ptr to array of IndexRuntimeKeyInfos, or NULL if none * *numRuntimeKeys: receives number of runtime keys * *arrayKeys: receives ptr to array of IndexArrayKeyInfos, or NULL if none * *numArrayKeys: receives number of array keys * * Caller may pass NULL for arrayKeys and numArrayKeys to indicate that * IndexArrayKeyInfos are not supported. */ void ExecIndexBuildScanKeys(PlanState *planstate, Relation index, List *quals, bool isorderby, ScanKey *scanKeys, int *numScanKeys, IndexRuntimeKeyInfo **runtimeKeys, int *numRuntimeKeys, IndexArrayKeyInfo **arrayKeys, int *numArrayKeys) { ListCell *qual_cell; ScanKey scan_keys; IndexRuntimeKeyInfo *runtime_keys; IndexArrayKeyInfo *array_keys; int n_scan_keys; int n_runtime_keys; int max_runtime_keys; int n_array_keys; int j; /* Allocate array for ScanKey structs: one per qual */ n_scan_keys = list_length(quals); scan_keys = (ScanKey) palloc(n_scan_keys * sizeof(ScanKeyData)); /* * runtime_keys array is dynamically resized as needed. We handle it this * way so that the same runtime keys array can be shared between * indexquals and indexorderbys, which will be processed in separate calls * of this function. Caller must be sure to pass in NULL/0 for first * call. */ runtime_keys = *runtimeKeys; n_runtime_keys = max_runtime_keys = *numRuntimeKeys; /* Allocate array_keys as large as it could possibly need to be */ array_keys = (IndexArrayKeyInfo *) palloc0(n_scan_keys * sizeof(IndexArrayKeyInfo)); n_array_keys = 0; /* * for each opclause in the given qual, convert the opclause into a single * scan key */ j = 0; foreach(qual_cell, quals) { Expr *clause = (Expr *) lfirst(qual_cell); ScanKey this_scan_key = &scan_keys[j++]; Oid opno; /* operator's OID */ RegProcedure opfuncid; /* operator proc id used in scan */ Oid opfamily; /* opfamily of index column */ int op_strategy; /* operator's strategy number */ Oid op_lefttype; /* operator's declared input types */ Oid op_righttype; Expr *leftop; /* expr on lhs of operator */ Expr *rightop; /* expr on rhs ... */ AttrNumber varattno; /* att number used in scan */ if (IsA(clause, OpExpr)) { /* indexkey op const or indexkey op expression */ int flags = 0; Datum scanvalue; opno = ((OpExpr *) clause)->opno; opfuncid = ((OpExpr *) clause)->opfuncid; /* * leftop should be the index key Var, possibly relabeled */ leftop = (Expr *) get_leftop(clause); if (leftop && IsA(leftop, RelabelType)) leftop = ((RelabelType *) leftop)->arg; Assert(leftop != NULL); if (!(IsA(leftop, Var) && ((Var *) leftop)->varno == INDEX_VAR)) elog(ERROR, "indexqual doesn't have key on left side"); varattno = ((Var *) leftop)->varattno; if (varattno < 1 || varattno > index->rd_index->indnatts) elog(ERROR, "bogus index qualification"); /* * We have to look up the operator's strategy number. This * provides a cross-check that the operator does match the index. */ opfamily = index->rd_opfamily[varattno - 1]; get_op_opfamily_properties(opno, opfamily, isorderby, &op_strategy, &op_lefttype, &op_righttype); if (isorderby) flags |= SK_ORDER_BY; /* * rightop is the constant or variable comparison value */ rightop = (Expr *) get_rightop(clause); if (rightop && IsA(rightop, RelabelType)) rightop = ((RelabelType *) rightop)->arg; Assert(rightop != NULL); if (IsA(rightop, Const)) { /* OK, simple constant comparison value */ scanvalue = ((Const *) rightop)->constvalue; if (((Const *) rightop)->constisnull) flags |= SK_ISNULL; } else { /* Need to treat this one as a runtime key */ if (n_runtime_keys >= max_runtime_keys) { if (max_runtime_keys == 0) { max_runtime_keys = 8; runtime_keys = (IndexRuntimeKeyInfo *) palloc(max_runtime_keys * sizeof(IndexRuntimeKeyInfo)); } else { max_runtime_keys *= 2; runtime_keys = (IndexRuntimeKeyInfo *) repalloc(runtime_keys, max_runtime_keys * sizeof(IndexRuntimeKeyInfo)); } } runtime_keys[n_runtime_keys].scan_key = this_scan_key; runtime_keys[n_runtime_keys].key_expr = ExecInitExpr(rightop, planstate); runtime_keys[n_runtime_keys].key_toastable = TypeIsToastable(op_righttype); n_runtime_keys++; scanvalue = (Datum) 0; } /* * initialize the scan key's fields appropriately */ ScanKeyEntryInitialize(this_scan_key, flags, varattno, /* attribute number to scan */ op_strategy, /* op's strategy */ op_righttype, /* strategy subtype */ ((OpExpr *) clause)->inputcollid, /* collation */ opfuncid, /* reg proc to use */ scanvalue); /* constant */ } else if (IsA(clause, RowCompareExpr)) { /* (indexkey, indexkey, ...) op (expression, expression, ...) */ RowCompareExpr *rc = (RowCompareExpr *) clause; ListCell *largs_cell = list_head(rc->largs); ListCell *rargs_cell = list_head(rc->rargs); ListCell *opnos_cell = list_head(rc->opnos); ListCell *collids_cell = list_head(rc->inputcollids); ScanKey first_sub_key; int n_sub_key; Assert(!isorderby); first_sub_key = (ScanKey) palloc(list_length(rc->opnos) * sizeof(ScanKeyData)); n_sub_key = 0; /* Scan RowCompare columns and generate subsidiary ScanKey items */ while (opnos_cell != NULL) { ScanKey this_sub_key = &first_sub_key[n_sub_key]; int flags = SK_ROW_MEMBER; Datum scanvalue; Oid inputcollation; /* * leftop should be the index key Var, possibly relabeled */ leftop = (Expr *) lfirst(largs_cell); largs_cell = lnext(largs_cell); if (leftop && IsA(leftop, RelabelType)) leftop = ((RelabelType *) leftop)->arg; Assert(leftop != NULL); if (!(IsA(leftop, Var) && ((Var *) leftop)->varno == INDEX_VAR)) elog(ERROR, "indexqual doesn't have key on left side"); varattno = ((Var *) leftop)->varattno; /* * We have to look up the operator's associated btree support * function */ opno = lfirst_oid(opnos_cell); opnos_cell = lnext(opnos_cell); if (index->rd_rel->relam != BTREE_AM_OID || varattno < 1 || varattno > index->rd_index->indnatts) elog(ERROR, "bogus RowCompare index qualification"); opfamily = index->rd_opfamily[varattno - 1]; get_op_opfamily_properties(opno, opfamily, isorderby, &op_strategy, &op_lefttype, &op_righttype); if (op_strategy != rc->rctype) elog(ERROR, "RowCompare index qualification contains wrong operator"); opfuncid = get_opfamily_proc(opfamily, op_lefttype, op_righttype, BTORDER_PROC); inputcollation = lfirst_oid(collids_cell); collids_cell = lnext(collids_cell); /* * rightop is the constant or variable comparison value */ rightop = (Expr *) lfirst(rargs_cell); rargs_cell = lnext(rargs_cell); if (rightop && IsA(rightop, RelabelType)) rightop = ((RelabelType *) rightop)->arg; Assert(rightop != NULL); if (IsA(rightop, Const)) { /* OK, simple constant comparison value */ scanvalue = ((Const *) rightop)->constvalue; if (((Const *) rightop)->constisnull) flags |= SK_ISNULL; } else { /* Need to treat this one as a runtime key */ if (n_runtime_keys >= max_runtime_keys) { if (max_runtime_keys == 0) { max_runtime_keys = 8; runtime_keys = (IndexRuntimeKeyInfo *) palloc(max_runtime_keys * sizeof(IndexRuntimeKeyInfo)); } else { max_runtime_keys *= 2; runtime_keys = (IndexRuntimeKeyInfo *) repalloc(runtime_keys, max_runtime_keys * sizeof(IndexRuntimeKeyInfo)); } } runtime_keys[n_runtime_keys].scan_key = this_sub_key; runtime_keys[n_runtime_keys].key_expr = ExecInitExpr(rightop, planstate); runtime_keys[n_runtime_keys].key_toastable = TypeIsToastable(op_righttype); n_runtime_keys++; scanvalue = (Datum) 0; } /* * initialize the subsidiary scan key's fields appropriately */ ScanKeyEntryInitialize(this_sub_key, flags, varattno, /* attribute number */ op_strategy, /* op's strategy */ op_righttype, /* strategy subtype */ inputcollation, /* collation */ opfuncid, /* reg proc to use */ scanvalue); /* constant */ n_sub_key++; } /* Mark the last subsidiary scankey correctly */ first_sub_key[n_sub_key - 1].sk_flags |= SK_ROW_END; /* * We don't use ScanKeyEntryInitialize for the header because it * isn't going to contain a valid sk_func pointer. */ MemSet(this_scan_key, 0, sizeof(ScanKeyData)); this_scan_key->sk_flags = SK_ROW_HEADER; this_scan_key->sk_attno = first_sub_key->sk_attno; this_scan_key->sk_strategy = rc->rctype; /* sk_subtype, sk_collation, sk_func not used in a header */ this_scan_key->sk_argument = PointerGetDatum(first_sub_key); } else if (IsA(clause, ScalarArrayOpExpr)) { /* indexkey op ANY (array-expression) */ ScalarArrayOpExpr *saop = (ScalarArrayOpExpr *) clause; int flags = 0; Datum scanvalue; Assert(!isorderby); Assert(saop->useOr); opno = saop->opno; opfuncid = saop->opfuncid; /* * leftop should be the index key Var, possibly relabeled */ leftop = (Expr *) linitial(saop->args); if (leftop && IsA(leftop, RelabelType)) leftop = ((RelabelType *) leftop)->arg; Assert(leftop != NULL); if (!(IsA(leftop, Var) && ((Var *) leftop)->varno == INDEX_VAR)) elog(ERROR, "indexqual doesn't have key on left side"); varattno = ((Var *) leftop)->varattno; if (varattno < 1 || varattno > index->rd_index->indnatts) elog(ERROR, "bogus index qualification"); /* * We have to look up the operator's strategy number. This * provides a cross-check that the operator does match the index. */ opfamily = index->rd_opfamily[varattno - 1]; get_op_opfamily_properties(opno, opfamily, isorderby, &op_strategy, &op_lefttype, &op_righttype); /* * rightop is the constant or variable array value */ rightop = (Expr *) lsecond(saop->args); if (rightop && IsA(rightop, RelabelType)) rightop = ((RelabelType *) rightop)->arg; Assert(rightop != NULL); if (index->rd_am->amsearcharray) { /* Index AM will handle this like a simple operator */ flags |= SK_SEARCHARRAY; if (IsA(rightop, Const)) { /* OK, simple constant comparison value */ scanvalue = ((Const *) rightop)->constvalue; if (((Const *) rightop)->constisnull) flags |= SK_ISNULL; } else { /* Need to treat this one as a runtime key */ if (n_runtime_keys >= max_runtime_keys) { if (max_runtime_keys == 0) { max_runtime_keys = 8; runtime_keys = (IndexRuntimeKeyInfo *) palloc(max_runtime_keys * sizeof(IndexRuntimeKeyInfo)); } else { max_runtime_keys *= 2; runtime_keys = (IndexRuntimeKeyInfo *) repalloc(runtime_keys, max_runtime_keys * sizeof(IndexRuntimeKeyInfo)); } } runtime_keys[n_runtime_keys].scan_key = this_scan_key; runtime_keys[n_runtime_keys].key_expr = ExecInitExpr(rightop, planstate); /* * Careful here: the runtime expression is not of * op_righttype, but rather is an array of same; so * TypeIsToastable() isn't helpful. However, we can * assume that all array types are toastable. */ runtime_keys[n_runtime_keys].key_toastable = true; n_runtime_keys++; scanvalue = (Datum) 0; } } else { /* Executor has to expand the array value */ array_keys[n_array_keys].scan_key = this_scan_key; array_keys[n_array_keys].array_expr = ExecInitExpr(rightop, planstate); /* the remaining fields were zeroed by palloc0 */ n_array_keys++; scanvalue = (Datum) 0; } /* * initialize the scan key's fields appropriately */ ScanKeyEntryInitialize(this_scan_key, flags, varattno, /* attribute number to scan */ op_strategy, /* op's strategy */ op_righttype, /* strategy subtype */ saop->inputcollid, /* collation */ opfuncid, /* reg proc to use */ scanvalue); /* constant */ } else if (IsA(clause, NullTest)) { /* indexkey IS NULL or indexkey IS NOT NULL */ NullTest *ntest = (NullTest *) clause; int flags; Assert(!isorderby); /* * argument should be the index key Var, possibly relabeled */ leftop = ntest->arg; if (leftop && IsA(leftop, RelabelType)) leftop = ((RelabelType *) leftop)->arg; Assert(leftop != NULL); if (!(IsA(leftop, Var) && ((Var *) leftop)->varno == INDEX_VAR)) elog(ERROR, "NullTest indexqual has wrong key"); varattno = ((Var *) leftop)->varattno; /* * initialize the scan key's fields appropriately */ switch (ntest->nulltesttype) { case IS_NULL: flags = SK_ISNULL | SK_SEARCHNULL; break; case IS_NOT_NULL: flags = SK_ISNULL | SK_SEARCHNOTNULL; break; default: elog(ERROR, "unrecognized nulltesttype: %d", (int) ntest->nulltesttype); flags = 0; /* keep compiler quiet */ break; } ScanKeyEntryInitialize(this_scan_key, flags, varattno, /* attribute number to scan */ InvalidStrategy, /* no strategy */ InvalidOid, /* no strategy subtype */ InvalidOid, /* no collation */ InvalidOid, /* no reg proc for this */ (Datum) 0); /* constant */ } else elog(ERROR, "unsupported indexqual type: %d", (int) nodeTag(clause)); }
void parsetext_v2(TSCfgInfo * cfg, PRSTEXT * prs, char *buf, int4 buflen) { int type, lenlemm; char *lemm = NULL; WParserInfo *prsobj = findprs(cfg->prs_id); LexizeData ldata; TSLexeme *norms; prsobj->prs = (void *) DatumGetPointer( FunctionCall2( &(prsobj->start_info), PointerGetDatum(buf), Int32GetDatum(buflen) ) ); LexizeInit(&ldata, cfg); do { type = DatumGetInt32(FunctionCall3( &(prsobj->getlexeme_info), PointerGetDatum(prsobj->prs), PointerGetDatum(&lemm), PointerGetDatum(&lenlemm))); if (type > 0 && lenlemm >= MAXSTRLEN) { #ifdef IGNORE_LONGLEXEME ereport(NOTICE, (errcode(ERRCODE_SYNTAX_ERROR), errmsg("A word you are indexing is too long. It will be ignored."))); continue; #else ereport(ERROR, (errcode(ERRCODE_SYNTAX_ERROR), errmsg("A word you are indexing is too long"))); #endif } LexizeAddLemm(&ldata, type, lemm, lenlemm); while ((norms = LexizeExec(&ldata, NULL)) != NULL) { TSLexeme *ptr = norms; prs->pos++; /* set pos */ while (ptr->lexeme) { if (prs->curwords == prs->lenwords) { prs->lenwords *= 2; prs->words = (TSWORD *) repalloc((void *) prs->words, prs->lenwords * sizeof(TSWORD)); } if (ptr->flags & TSL_ADDPOS) prs->pos++; prs->words[prs->curwords].len = strlen(ptr->lexeme); prs->words[prs->curwords].word = ptr->lexeme; prs->words[prs->curwords].nvariant = ptr->nvariant; prs->words[prs->curwords].alen = 0; prs->words[prs->curwords].pos.pos = LIMITPOS(prs->pos); ptr++; prs->curwords++; } pfree(norms); } } while (type > 0); FunctionCall1( &(prsobj->end_info), PointerGetDatum(prsobj->prs) ); }
Datum gist_point_consistent(PG_FUNCTION_ARGS) { GISTENTRY *entry = (GISTENTRY *) PG_GETARG_POINTER(0); StrategyNumber strategy = (StrategyNumber) PG_GETARG_UINT16(2); bool *recheck = (bool *) PG_GETARG_POINTER(4); bool result; StrategyNumber strategyGroup = strategy / GeoStrategyNumberOffset; switch (strategyGroup) { case PointStrategyNumberGroup: result = gist_point_consistent_internal(strategy % GeoStrategyNumberOffset, GIST_LEAF(entry), DatumGetBoxP(entry->key), PG_GETARG_POINT_P(1)); *recheck = false; break; case BoxStrategyNumberGroup: { /* * The only operator___ in this group is point <@ box (on_pb), so * we needn't examine strategy again. * * For historical reasons, on_pb uses exact rather than fuzzy * comparisons. We could use box_overlap when at an internal * page, but that would lead to possibly visiting child pages * uselessly, because box_overlap uses fuzzy comparisons. * Instead we write a non-fuzzy overlap test. The same code * will also serve for leaf-page tests, since leaf keys have * high == low. */ BOX *query, *key; query = PG_GETARG_BOX_P(1); key = DatumGetBoxP(entry->key); result = (key->high.x >= query->low.x && key->low.x <= query->high.x && key->high.y >= query->low.y && key->low.y <= query->high.y); *recheck = false; } break; case PolygonStrategyNumberGroup: { POLYGON *query = PG_GETARG_POLYGON_P(1); result = DatumGetBool(DirectFunctionCall5( gist_poly_consistent, PointerGetDatum(entry), PolygonPGetDatum(query), Int16GetDatum(RTOverlapStrategyNumber), 0, PointerGetDatum(recheck))); if (GIST_LEAF(entry) && result) { /* * We are on leaf page and quick check shows overlapping * of polygon's bounding box and point */ BOX *box = DatumGetBoxP(entry->key); Assert(box->high.x == box->low.x && box->high.y == box->low.y); result = DatumGetBool(DirectFunctionCall2( poly_contain_pt, PolygonPGetDatum(query), PointPGetDatum(&box->high))); *recheck = false; } } break; case CircleStrategyNumberGroup: { CIRCLE *query = PG_GETARG_CIRCLE_P(1); result = DatumGetBool(DirectFunctionCall5( gist_circle_consistent, PointerGetDatum(entry), CirclePGetDatum(query), Int16GetDatum(RTOverlapStrategyNumber), 0, PointerGetDatum(recheck))); if (GIST_LEAF(entry) && result) { /* * We are on leaf page and quick check shows overlapping * of polygon's bounding box and point */ BOX *box = DatumGetBoxP(entry->key); Assert(box->high.x == box->low.x && box->high.y == box->low.y); result = DatumGetBool(DirectFunctionCall2( circle_contain_pt, CirclePGetDatum(query), PointPGetDatum(&box->high))); *recheck = false; } } break; default: elog(ERROR, "unrecognized strategy number: %d", strategy); result = false; /* keep compiler quiet */ break; } PG_RETURN_BOOL(result); }
void init_cfg(Oid id, TSCfgInfo * cfg) { Oid arg[2]; bool isnull; Datum pars[2]; int stat, i, j; text *ptr; text *prsname = NULL; char *nsp = get_namespace(TSNSP_FunctionOid); char buf[1024]; MemoryContext oldcontext; void *plan; arg[0] = OIDOID; arg[1] = OIDOID; pars[0] = ObjectIdGetDatum(id); pars[1] = ObjectIdGetDatum(id); memset(cfg, 0, sizeof(TSCfgInfo)); SPI_connect(); sprintf(buf, "select prs_name from %s.pg_ts_cfg where oid = $1", nsp); plan = SPI_prepare(buf, 1, arg); if (!plan) ts_error(ERROR, "SPI_prepare() failed"); stat = SPI_execp(plan, pars, " ", 1); if (stat < 0) ts_error(ERROR, "SPI_execp return %d", stat); if (SPI_processed > 0) { prsname = (text *) DatumGetPointer( SPI_getbinval(SPI_tuptable->vals[0], SPI_tuptable->tupdesc, 1, &isnull) ); oldcontext = MemoryContextSwitchTo(TopMemoryContext); prsname = ptextdup(prsname); MemoryContextSwitchTo(oldcontext); cfg->id = id; } else ts_error(ERROR, "No tsearch cfg with id %d", id); SPI_freeplan(plan); arg[0] = TEXTOID; sprintf(buf, "select lt.tokid, map.dict_name from %s.pg_ts_cfgmap as map, %s.pg_ts_cfg as cfg, %s.token_type( $1 ) as lt where lt.alias = map.tok_alias and map.ts_name = cfg.ts_name and cfg.oid= $2 order by lt.tokid desc;", nsp, nsp, nsp); plan = SPI_prepare(buf, 2, arg); if (!plan) ts_error(ERROR, "SPI_prepare() failed"); pars[0] = PointerGetDatum(prsname); stat = SPI_execp(plan, pars, " ", 0); if (stat < 0) ts_error(ERROR, "SPI_execp return %d", stat); if (SPI_processed <= 0) ts_error(ERROR, "No parser with id %d", id); for (i = 0; i < SPI_processed; i++) { int lexid = DatumGetInt32(SPI_getbinval(SPI_tuptable->vals[i], SPI_tuptable->tupdesc, 1, &isnull)); ArrayType *toasted_a = (ArrayType *) PointerGetDatum(SPI_getbinval(SPI_tuptable->vals[i], SPI_tuptable->tupdesc, 2, &isnull)); ArrayType *a; if (!cfg->map) { cfg->len = lexid + 1; cfg->map = (ListDictionary *) malloc(sizeof(ListDictionary) * cfg->len); if (!cfg->map) ereport(ERROR, (errcode(ERRCODE_OUT_OF_MEMORY), errmsg("out of memory"))); memset(cfg->map, 0, sizeof(ListDictionary) * cfg->len); } if (isnull) continue; a = (ArrayType *) PointerGetDatum(PG_DETOAST_DATUM(DatumGetPointer(toasted_a))); if (ARR_NDIM(a) != 1) ts_error(ERROR, "Wrong dimension"); if (ARRNELEMS(a) < 1) continue; if (ARR_HASNULL(a)) ts_error(ERROR, "Array must not contain nulls"); cfg->map[lexid].len = ARRNELEMS(a); cfg->map[lexid].dict_id = (Datum *) malloc(sizeof(Datum) * cfg->map[lexid].len); if (!cfg->map[lexid].dict_id) ts_error(ERROR, "No memory"); memset(cfg->map[lexid].dict_id, 0, sizeof(Datum) * cfg->map[lexid].len); ptr = (text *) ARR_DATA_PTR(a); oldcontext = MemoryContextSwitchTo(TopMemoryContext); for (j = 0; j < cfg->map[lexid].len; j++) { cfg->map[lexid].dict_id[j] = PointerGetDatum(ptextdup(ptr)); ptr = NEXTVAL(ptr); } MemoryContextSwitchTo(oldcontext); if (a != toasted_a) pfree(a); } SPI_freeplan(plan); SPI_finish(); cfg->prs_id = name2id_prs(prsname); pfree(prsname); pfree(nsp); for (i = 0; i < cfg->len; i++) { for (j = 0; j < cfg->map[i].len; j++) { ptr = (text *) DatumGetPointer(cfg->map[i].dict_id[j]); cfg->map[i].dict_id[j] = ObjectIdGetDatum(name2id_dict(ptr)); pfree(ptr); } } }
/* * -------------------------------------------------------------------------- * Double sorting split algorithm. This is used for both boxes and points. * * The algorithm finds split of boxes by considering splits along each axis. * Each entry is first projected as an interval on the X-axis, and different * ways to split the intervals into two groups are considered, trying to * minimize the overlap of the groups. Then the same is repeated for the * Y-axis, and the overall best split is chosen. The quality of a split is * determined by overlap along that axis and some other criteria (see * g_box_consider_split). * * After that, all the entries are divided into three groups: * * 1) Entries which should be placed to the left group * 2) Entries which should be placed to the right group * 3) "Common entries" which can be placed to any of groups without affecting * of overlap along selected axis. * * The common entries are distributed by minimizing penalty. * * For details see: * "A new___ double sorting-based node splitting algorithm for R-tree", A. Korotkov * http://syrcose.ispras.ru/2011/files/SYRCoSE2011_Proceedings.pdf#page=36 * -------------------------------------------------------------------------- */ Datum gist_box_picksplit(PG_FUNCTION_ARGS) { GistEntryVector *entryvec = (GistEntryVector *) PG_GETARG_POINTER(0); GIST_SPLITVEC *v = (GIST_SPLITVEC *) PG_GETARG_POINTER(1); OffsetNumber i, maxoff; ConsiderSplitContext context; BOX *box, *leftBox, *rightBox; int dim, commonEntriesCount; SplitInterval *intervalsLower, *intervalsUpper; CommonEntry *commonEntries; int nentries; memset(&context, 0, sizeof(ConsiderSplitContext)); maxoff = entryvec->n - 1; nentries = context.entriesCount = maxoff - FirstOffsetNumber + 1; /* Allocate arrays for intervals along axes */ intervalsLower = (SplitInterval *) palloc(nentries * sizeof(SplitInterval)); intervalsUpper = (SplitInterval *) palloc(nentries * sizeof(SplitInterval)); /* * Calculate the overall minimum bounding box over all the entries. */ for (i = FirstOffsetNumber; i <= maxoff; i = OffsetNumberNext(i)) { box = DatumGetBoxP(entryvec->vector[i].key); if (i == FirstOffsetNumber) context.boundingBox = *box; else adjustBox(&context.boundingBox, box); } /* * Iterate over axes for optimal split searching. */ context.first = true; /* nothing selected yet */ for (dim = 0; dim < 2; dim++) { double leftUpper, rightLower; int i1, i2; /* Project each entry as an interval on the selected axis. */ for (i = FirstOffsetNumber; i <= maxoff; i = OffsetNumberNext(i)) { box = DatumGetBoxP(entryvec->vector[i].key); if (dim == 0) { intervalsLower[i - FirstOffsetNumber].lower = box->low.x; intervalsLower[i - FirstOffsetNumber].upper = box->high.x; } else { intervalsLower[i - FirstOffsetNumber].lower = box->low.y; intervalsLower[i - FirstOffsetNumber].upper = box->high.y; } } /* * Make two arrays of intervals: one sorted by lower bound and another * sorted by upper bound. */ memcpy(intervalsUpper, intervalsLower, sizeof(SplitInterval) * nentries); qsort(intervalsLower, nentries, sizeof(SplitInterval), interval_cmp_lower); qsort(intervalsUpper, nentries, sizeof(SplitInterval), interval_cmp_upper); /*---- * The goal is to form a left and right interval, so that every entry * interval is contained by either left or right interval (or both). * * For example, with the intervals (0,1), (1,3), (2,3), (2,4): * * 0 1 2 3 4 * +-+ * +---+ * +-+ * +---+ * * The left and right intervals are of the form (0,a) and (b,4). * We first consider splits where b is the lower bound of an entry. * We iterate through all entries, and for each b, calculate the * smallest possible a. Then we consider splits where a is the * uppper bound of an entry, and for each a, calculate the greatest * possible b. * * In the above example, the first loop would consider splits: * b=0: (0,1)-(0,4) * b=1: (0,1)-(1,4) * b=2: (0,3)-(2,4) * * And the second loop: * a=1: (0,1)-(1,4) * a=3: (0,3)-(2,4) * a=4: (0,4)-(2,4) */ /* * Iterate over lower bound of right group, finding smallest possible * upper bound of left group. */ i1 = 0; i2 = 0; rightLower = intervalsLower[i1].lower; leftUpper = intervalsUpper[i2].lower; while (true) { /* * Find next lower bound of right group. */ while (i1 < nentries && rightLower == intervalsLower[i1].lower) { leftUpper = Max(leftUpper, intervalsLower[i1].upper); i1++; } if (i1 >= nentries) break; rightLower = intervalsLower[i1].lower; /* * Find count of intervals which anyway should be placed to the * left group. */ while (i2 < nentries && intervalsUpper[i2].upper <= leftUpper) i2++; /* * Consider found split. */ g_box_consider_split(&context, dim, rightLower, i1, leftUpper, i2); } /* * Iterate over upper bound of left group finding greates possible * lower bound of right group. */ i1 = nentries - 1; i2 = nentries - 1; rightLower = intervalsLower[i1].upper; leftUpper = intervalsUpper[i2].upper; while (true) { /* * Find next upper bound of left group. */ while (i2 >= 0 && leftUpper == intervalsUpper[i2].upper) { rightLower = Min(rightLower, intervalsUpper[i2].lower); i2--; } if (i2 < 0) break; leftUpper = intervalsUpper[i2].upper; /* * Find count of intervals which anyway should be placed to the * right group. */ while (i1 >= 0 && intervalsLower[i1].lower >= rightLower) i1--; /* * Consider found split. */ g_box_consider_split(&context, dim, rightLower, i1 + 1, leftUpper, i2 + 1); } } /* * If we failed to find any acceptable splits, use trivial split. */ if (context.first) { fallbackSplit(entryvec, v); PG_RETURN_POINTER(v); } /* * Ok, we have now selected the split across one axis. * * While considering the splits, we already determined that there will be * enough entries in both groups to reach the desired ratio, but we did * not memorize which entries go to which group. So determine that now. */ /* Allocate vectors for results */ v->spl_left = (OffsetNumber *) palloc(nentries * sizeof(OffsetNumber)); v->spl_right = (OffsetNumber *) palloc(nentries * sizeof(OffsetNumber)); v->spl_nleft = 0; v->spl_nright = 0; /* Allocate bounding boxes of left and right groups */ leftBox = static_cast<BOX *>(palloc0(sizeof(BOX))); rightBox = static_cast<BOX *>(palloc0(sizeof(BOX))); /* * Allocate an array for "common entries" - entries which can be placed to * either group without affecting overlap along selected axis. */ commonEntriesCount = 0; commonEntries = (CommonEntry *) palloc(nentries * sizeof(CommonEntry)); /* Helper macros to place an entry in the left or right group */ #define PLACE_LEFT(box, off) \ do { \ if (v->spl_nleft > 0) \ adjustBox(leftBox, box); \ else \ *leftBox = *(box); \ v->spl_left[v->spl_nleft++] = off; \ } while(0) #define PLACE_RIGHT(box, off) \ do { \ if (v->spl_nright > 0) \ adjustBox(rightBox, box); \ else \ *rightBox = *(box); \ v->spl_right[v->spl_nright++] = off; \ } while(0) /* * Distribute entries which can be distributed unambiguously, and collect * common entries. */ for (i = FirstOffsetNumber; i <= maxoff; i = OffsetNumberNext(i)) { double lower, upper; /* * Get upper and lower bounds along selected axis. */ box = DatumGetBoxP(entryvec->vector[i].key); if (context.dim == 0) { lower = box->low.x; upper = box->high.x; } else { lower = box->low.y; upper = box->high.y; } if (upper <= context.leftUpper) { /* Fits to the left group */ if (lower >= context.rightLower) { /* Fits also to the right group, so "common entry" */ commonEntries[commonEntriesCount++].index = i; } else { /* Doesn't fit to the right group, so join to the left group */ PLACE_LEFT(box, i); } } else { /* * Each entry should fit on either left or right group. Since this * entry didn't fit on the left group, it better fit in the right * group. */ Assert(lower >= context.rightLower); /* Doesn't fit to the left group, so join to the right group */ PLACE_RIGHT(box, i); } } /* * Distribute "common entries", if any. */ if (commonEntriesCount > 0) { /* * Calculate minimum number of entries that must be placed in both * groups, to reach LIMIT_RATIO. */ int m = ceil(LIMIT_RATIO * (double) nentries); /* * Calculate delta between penalties of join "common entries" to * different groups. */ for (i = 0; i < commonEntriesCount; i++) { box = DatumGetBoxP(entryvec->vector[commonEntries[i].index].key); commonEntries[i].delta = Abs(box_penalty(leftBox, box) - box_penalty(rightBox, box)); } /* * Sort "common entries" by calculated deltas in order to distribute * the most ambiguous entries first. */ qsort(commonEntries, commonEntriesCount, sizeof(CommonEntry), common_entry_cmp); /* * Distribute "common entries" between groups. */ for (i = 0; i < commonEntriesCount; i++) { box = DatumGetBoxP(entryvec->vector[commonEntries[i].index].key); /* * Check if we have to place this entry in either group to achieve * LIMIT_RATIO. */ if (v->spl_nleft + (commonEntriesCount - i) <= m) PLACE_LEFT(box, commonEntries[i].index); else if (v->spl_nright + (commonEntriesCount - i) <= m) PLACE_RIGHT(box, commonEntries[i].index); else { /* Otherwise select the group by minimal penalty */ if (box_penalty(leftBox, box) < box_penalty(rightBox, box)) PLACE_LEFT(box, commonEntries[i].index); else PLACE_RIGHT(box, commonEntries[i].index); } } } v->spl_ldatum = PointerGetDatum(leftBox); v->spl_rdatum = PointerGetDatum(rightBox); PG_RETURN_POINTER(v); }
Datum _ltree_compress(PG_FUNCTION_ARGS) { GISTENTRY *entry = (GISTENTRY *) PG_GETARG_POINTER(0); GISTENTRY *retval = entry; if (entry->leafkey) { /* ltree */ ltree_gist *key; ArrayType *val = DatumGetArrayTypeP(entry->key); int4 len = LTG_HDRSIZE + ASIGLEN; int num = ArrayGetNItems(ARR_NDIM(val), ARR_DIMS(val)); ltree *item = (ltree *) ARR_DATA_PTR(val); if (ARR_NDIM(val) > 1) ereport(ERROR, (errcode(ERRCODE_ARRAY_SUBSCRIPT_ERROR), errmsg("array must be one-dimensional"))); if (ARR_HASNULL(val)) ereport(ERROR, (errcode(ERRCODE_NULL_VALUE_NOT_ALLOWED), errmsg("array must not contain nulls"))); key = (ltree_gist *) palloc(len); SET_VARSIZE(key, len); key->flag = 0; MemSet(LTG_SIGN(key), 0, ASIGLEN); while (num > 0) { hashing(LTG_SIGN(key), item); num--; item = NEXTVAL(item); } retval = (GISTENTRY *) palloc(sizeof(GISTENTRY)); gistentryinit(*retval, PointerGetDatum(key), entry->rel, entry->page, entry->offset, FALSE); } else if (!LTG_ISALLTRUE(entry->key)) { int4 i, len; ltree_gist *key; BITVECP sign = LTG_SIGN(DatumGetPointer(entry->key)); ALOOPBYTE { if ((sign[i] & 0xff) != 0xff) PG_RETURN_POINTER(retval); } len = LTG_HDRSIZE; key = (ltree_gist *) palloc(len); SET_VARSIZE(key, len); key->flag = LTG_ALLTRUE; retval = (GISTENTRY *) palloc(sizeof(GISTENTRY)); gistentryinit(*retval, PointerGetDatum(key), entry->rel, entry->page, entry->offset, FALSE); }
/* * Internal-page consistency for all these types * * We can use the same function since all types use bounding boxes as the * internal-page representation. */ static bool rtree_internal_consistent(BOX *key, BOX *query, StrategyNumber strategy) { bool retval; switch (strategy) { case RTLeftStrategyNumber: retval = !DatumGetBool(DirectFunctionCall2(box_overright, PointerGetDatum(key), PointerGetDatum(query))); break; case RTOverLeftStrategyNumber: retval = !DatumGetBool(DirectFunctionCall2(box_right, PointerGetDatum(key), PointerGetDatum(query))); break; case RTOverlapStrategyNumber: retval = DatumGetBool(DirectFunctionCall2(box_overlap, PointerGetDatum(key), PointerGetDatum(query))); break; case RTOverRightStrategyNumber: retval = !DatumGetBool(DirectFunctionCall2(box_left, PointerGetDatum(key), PointerGetDatum(query))); break; case RTRightStrategyNumber: retval = !DatumGetBool(DirectFunctionCall2(box_overleft, PointerGetDatum(key), PointerGetDatum(query))); break; case RTSameStrategyNumber: case RTContainsStrategyNumber: case RTOldContainsStrategyNumber: retval = DatumGetBool(DirectFunctionCall2(box_contain, PointerGetDatum(key), PointerGetDatum(query))); break; case RTContainedByStrategyNumber: case RTOldContainedByStrategyNumber: retval = DatumGetBool(DirectFunctionCall2(box_overlap, PointerGetDatum(key), PointerGetDatum(query))); break; case RTOverBelowStrategyNumber: retval = !DatumGetBool(DirectFunctionCall2(box_above, PointerGetDatum(key), PointerGetDatum(query))); break; case RTBelowStrategyNumber: retval = !DatumGetBool(DirectFunctionCall2(box_overabove, PointerGetDatum(key), PointerGetDatum(query))); break; case RTAboveStrategyNumber: retval = !DatumGetBool(DirectFunctionCall2(box_overbelow, PointerGetDatum(key), PointerGetDatum(query))); break; case RTOverAboveStrategyNumber: retval = !DatumGetBool(DirectFunctionCall2(box_below, PointerGetDatum(key), PointerGetDatum(query))); break; default: elog(ERROR, "unrecognized strategy number: %d", strategy); retval = false; /* keep compiler quiet */ break; } return retval; }
Datum heap_page_items(PG_FUNCTION_ARGS) { bytea *raw_page = PG_GETARG_BYTEA_P(0); heap_page_items_state *inter_call_data = NULL; FuncCallContext *fctx; int raw_page_size; if (!superuser()) ereport(ERROR, (errcode(ERRCODE_INSUFFICIENT_PRIVILEGE), (errmsg("must be superuser to use raw page functions")))); raw_page_size = VARSIZE(raw_page) - VARHDRSZ; if (SRF_IS_FIRSTCALL()) { TupleDesc tupdesc; MemoryContext mctx; if (raw_page_size < SizeOfPageHeaderData) ereport(ERROR, (errcode(ERRCODE_INVALID_PARAMETER_VALUE), errmsg("input page too small (%d bytes)", raw_page_size))); fctx = SRF_FIRSTCALL_INIT(); mctx = MemoryContextSwitchTo(fctx->multi_call_memory_ctx); inter_call_data = palloc(sizeof(heap_page_items_state)); /* Build a tuple descriptor for our result type */ if (get_call_result_type(fcinfo, NULL, &tupdesc) != TYPEFUNC_COMPOSITE) elog(ERROR, "return type must be a row type"); inter_call_data->tupd = tupdesc; inter_call_data->offset = FirstOffsetNumber; inter_call_data->page = VARDATA(raw_page); fctx->max_calls = PageGetMaxOffsetNumber(inter_call_data->page); fctx->user_fctx = inter_call_data; MemoryContextSwitchTo(mctx); } fctx = SRF_PERCALL_SETUP(); inter_call_data = fctx->user_fctx; if (fctx->call_cntr < fctx->max_calls) { Page page = inter_call_data->page; HeapTuple resultTuple; Datum result; ItemId id; Datum values[14]; bool nulls[14]; uint16 lp_offset; uint16 lp_flags; uint16 lp_len; memset(nulls, 0, sizeof(nulls)); /* Extract information from the line pointer */ id = PageGetItemId(page, inter_call_data->offset); lp_offset = ItemIdGetOffset(id); lp_flags = ItemIdGetFlags(id); lp_len = ItemIdGetLength(id); values[0] = UInt16GetDatum(inter_call_data->offset); values[1] = UInt16GetDatum(lp_offset); values[2] = UInt16GetDatum(lp_flags); values[3] = UInt16GetDatum(lp_len); /* * We do just enough validity checking to make sure we don't reference * data outside the page passed to us. The page could be corrupt in * many other ways, but at least we won't crash. */ if (ItemIdHasStorage(id) && lp_len >= MinHeapTupleSize && lp_offset == MAXALIGN(lp_offset) && lp_offset + lp_len <= raw_page_size) { HeapTupleHeader tuphdr; bytea *tuple_data_bytea; int tuple_data_len; /* Extract information from the tuple header */ tuphdr = (HeapTupleHeader) PageGetItem(page, id); values[4] = UInt32GetDatum(HeapTupleHeaderGetRawXmin(tuphdr)); values[5] = UInt32GetDatum(HeapTupleHeaderGetRawXmax(tuphdr)); /* shared with xvac */ values[6] = UInt32GetDatum(HeapTupleHeaderGetRawCommandId(tuphdr)); values[7] = PointerGetDatum(&tuphdr->t_ctid); values[8] = UInt32GetDatum(tuphdr->t_infomask2); values[9] = UInt32GetDatum(tuphdr->t_infomask); values[10] = UInt8GetDatum(tuphdr->t_hoff); /* Copy raw tuple data into bytea attribute */ tuple_data_len = lp_len - tuphdr->t_hoff; tuple_data_bytea = (bytea *) palloc(tuple_data_len + VARHDRSZ); SET_VARSIZE(tuple_data_bytea, tuple_data_len + VARHDRSZ); memcpy(VARDATA(tuple_data_bytea), (char *) tuphdr + tuphdr->t_hoff, tuple_data_len); values[13] = PointerGetDatum(tuple_data_bytea); /* * We already checked that the item is completely within the raw * page passed to us, with the length given in the line pointer. * Let's check that t_hoff doesn't point over lp_len, before using * it to access t_bits and oid. */ if (tuphdr->t_hoff >= SizeofHeapTupleHeader && tuphdr->t_hoff <= lp_len && tuphdr->t_hoff == MAXALIGN(tuphdr->t_hoff)) { if (tuphdr->t_infomask & HEAP_HASNULL) { int bits_len; bits_len = BITMAPLEN(HeapTupleHeaderGetNatts(tuphdr)) * BITS_PER_BYTE; values[11] = CStringGetTextDatum( bits_to_text(tuphdr->t_bits, bits_len)); } else nulls[11] = true; if (tuphdr->t_infomask & HEAP_HASOID_OLD) values[12] = HeapTupleHeaderGetOidOld(tuphdr); else nulls[12] = true; } else { nulls[11] = true; nulls[12] = true; } } else { /* * The line pointer is not used, or it's invalid. Set the rest of * the fields to NULL */ int i; for (i = 4; i <= 13; i++) nulls[i] = true; } /* Build and return the result tuple. */ resultTuple = heap_form_tuple(inter_call_data->tupd, values, nulls); result = HeapTupleGetDatum(resultTuple); inter_call_data->offset++; SRF_RETURN_NEXT(fctx, result); } else SRF_RETURN_DONE(fctx); }
static bool gbt_macadlt(const void *a, const void *b) { return DatumGetBool(DirectFunctionCall2(macaddr_lt, PointerGetDatum(a), PointerGetDatum(b))); }
/* * Execute the index scan. * * This works by reading index TIDs from the revmap, and obtaining the index * tuples pointed to by them; the summary values in the index tuples are * compared to the scan keys. We return into the TID bitmap all the pages in * ranges corresponding to index tuples that match the scan keys. * * If a TID from the revmap is read as InvalidTID, we know that range is * unsummarized. Pages in those ranges need to be returned regardless of scan * keys. * * XXX see _bt_first on what to do about sk_subtype. */ Datum bringetbitmap(PG_FUNCTION_ARGS) { IndexScanDesc scan = (IndexScanDesc) PG_GETARG_POINTER(0); TIDBitmap *tbm = (TIDBitmap *) PG_GETARG_POINTER(1); Relation idxRel = scan->indexRelation; Buffer buf = InvalidBuffer; BrinDesc *bdesc; Oid heapOid; Relation heapRel; BrinOpaque *opaque; BlockNumber nblocks; BlockNumber heapBlk; int totalpages = 0; int keyno; FmgrInfo *consistentFn; MemoryContext oldcxt; MemoryContext perRangeCxt; opaque = (BrinOpaque *) scan->opaque; bdesc = opaque->bo_bdesc; pgstat_count_index_scan(idxRel); /* * We need to know the size of the table so that we know how long to * iterate on the revmap. */ heapOid = IndexGetRelation(RelationGetRelid(idxRel), false); heapRel = heap_open(heapOid, AccessShareLock); nblocks = RelationGetNumberOfBlocks(heapRel); heap_close(heapRel, AccessShareLock); /* * Obtain consistent functions for all indexed column. Maybe it'd be * possible to do this lazily only the first time we see a scan key that * involves each particular attribute. */ consistentFn = palloc(sizeof(FmgrInfo) * bdesc->bd_tupdesc->natts); for (keyno = 0; keyno < bdesc->bd_tupdesc->natts; keyno++) { FmgrInfo *tmp; tmp = index_getprocinfo(idxRel, keyno + 1, BRIN_PROCNUM_CONSISTENT); fmgr_info_copy(&consistentFn[keyno], tmp, CurrentMemoryContext); } /* * Setup and use a per-range memory context, which is reset every time we * loop below. This avoids having to free the tuples within the loop. */ perRangeCxt = AllocSetContextCreate(CurrentMemoryContext, "bringetbitmap cxt", ALLOCSET_DEFAULT_MINSIZE, ALLOCSET_DEFAULT_INITSIZE, ALLOCSET_DEFAULT_MAXSIZE); oldcxt = MemoryContextSwitchTo(perRangeCxt); /* * Now scan the revmap. We start by querying for heap page 0, * incrementing by the number of pages per range; this gives us a full * view of the table. */ for (heapBlk = 0; heapBlk < nblocks; heapBlk += opaque->bo_pagesPerRange) { bool addrange; BrinTuple *tup; OffsetNumber off; Size size; CHECK_FOR_INTERRUPTS(); MemoryContextResetAndDeleteChildren(perRangeCxt); tup = brinGetTupleForHeapBlock(opaque->bo_rmAccess, heapBlk, &buf, &off, &size, BUFFER_LOCK_SHARE); if (tup) { tup = brin_copy_tuple(tup, size); LockBuffer(buf, BUFFER_LOCK_UNLOCK); } /* * For page ranges with no indexed tuple, we must return the whole * range; otherwise, compare it to the scan keys. */ if (tup == NULL) { addrange = true; } else { BrinMemTuple *dtup; int keyno; dtup = brin_deform_tuple(bdesc, tup); if (dtup->bt_placeholder) { /* * Placeholder tuples are always returned, regardless of the * values stored in them. */ addrange = true; } else { /* * Compare scan keys with summary values stored for the range. * If scan keys are matched, the page range must be added to * the bitmap. We initially assume the range needs to be * added; in particular this serves the case where there are * no keys. */ addrange = true; for (keyno = 0; keyno < scan->numberOfKeys; keyno++) { ScanKey key = &scan->keyData[keyno]; AttrNumber keyattno = key->sk_attno; BrinValues *bval = &dtup->bt_columns[keyattno - 1]; Datum add; /* * The collation of the scan key must match the collation * used in the index column (but only if the search is not * IS NULL/ IS NOT NULL). Otherwise we shouldn't be using * this index ... */ Assert((key->sk_flags & SK_ISNULL) || (key->sk_collation == bdesc->bd_tupdesc->attrs[keyattno - 1]->attcollation)); /* * Check whether the scan key is consistent with the page * range values; if so, have the pages in the range added * to the output bitmap. * * When there are multiple scan keys, failure to meet the * criteria for a single one of them is enough to discard * the range as a whole, so break out of the loop as soon * as a false return value is obtained. */ add = FunctionCall3Coll(&consistentFn[keyattno - 1], key->sk_collation, PointerGetDatum(bdesc), PointerGetDatum(bval), PointerGetDatum(key)); addrange = DatumGetBool(add); if (!addrange) break; } } } /* add the pages in the range to the output bitmap, if needed */ if (addrange) { BlockNumber pageno; for (pageno = heapBlk; pageno <= heapBlk + opaque->bo_pagesPerRange - 1; pageno++) { MemoryContextSwitchTo(oldcxt); tbm_add_page(tbm, pageno); totalpages++; MemoryContextSwitchTo(perRangeCxt); } } } MemoryContextSwitchTo(oldcxt); MemoryContextDelete(perRangeCxt); if (buf != InvalidBuffer) ReleaseBuffer(buf); /* * XXX We have an approximation of the number of *pages* that our scan * returns, but we don't have a precise idea of the number of heap tuples * involved. */ PG_RETURN_INT64(totalpages * 10); }
/* * Accumulate a new datum for one AO storage option. */ static void accumAOStorageOpt(char *name, char *value, ArrayBuildState *astate, bool *foundAO, bool *aovalue) { text *t; bool boolval; int intval; StringInfoData buf; Assert(astate); initStringInfo(&buf); if (pg_strcasecmp(SOPT_APPENDONLY, name) == 0) { if (!parse_bool(value, &boolval)) ereport(ERROR, (errcode(ERRCODE_INVALID_PARAMETER_VALUE), errmsg("invalid bool value \"%s\" for storage option \"%s\"", value, name))); /* "appendonly" option is explicitly specified. */ if (foundAO != NULL) *foundAO = true; if (aovalue != NULL) *aovalue = boolval; /* * Record value of "appendonly" option as true always. Return * the value specified by user in aovalue. Setting * appendonly=true always in the array of datums enables us to * reuse default_reloptions() and * validateAppendOnlyRelOptions(). If validations are * successful, we keep the user specified value for * appendonly. */ appendStringInfo(&buf, "%s=%s", SOPT_APPENDONLY, "true"); } else if (pg_strcasecmp(SOPT_BLOCKSIZE, name) == 0) { if (!parse_int(value, &intval, 0 /* unit flags */, NULL /* hint message */)) ereport(ERROR, (errcode(ERRCODE_INVALID_PARAMETER_VALUE), errmsg("invalid integer value \"%s\" for storage option \"%s\"", value, name))); appendStringInfo(&buf, "%s=%d", SOPT_BLOCKSIZE, intval); } else if (pg_strcasecmp(SOPT_COMPTYPE, name) == 0) { appendStringInfo(&buf, "%s=%s", SOPT_COMPTYPE, value); } else if (pg_strcasecmp(SOPT_COMPLEVEL, name) == 0) { if (!parse_int(value, &intval, 0 /* unit flags */, NULL /* hint message */)) ereport(ERROR, (errcode(ERRCODE_INVALID_PARAMETER_VALUE), errmsg("invalid integer value \"%s\" for storage option \"%s\"", value, name))); appendStringInfo(&buf, "%s=%d", SOPT_COMPLEVEL, intval); } else if (pg_strcasecmp(SOPT_CHECKSUM, name) == 0) { if (!parse_bool(value, &boolval)) ereport(ERROR, (errcode(ERRCODE_INVALID_PARAMETER_VALUE), errmsg("invalid bool value \"%s\" for storage option \"%s\"", value, name))); appendStringInfo(&buf, "%s=%s", SOPT_CHECKSUM, boolval ? "true" : "false"); } else if (pg_strcasecmp(SOPT_ORIENTATION, name) == 0) { if ((pg_strcasecmp(value, "row") != 0) && (pg_strcasecmp(value, "column") != 0)) { ereport(ERROR, (errcode(ERRCODE_INVALID_PARAMETER_VALUE), errmsg("invalid value \"%s\" for storage option \"%s\"", value, name))); } appendStringInfo(&buf, "%s=%s", SOPT_ORIENTATION, value); } else { ereport(ERROR, (errcode(ERRCODE_INVALID_PARAMETER_VALUE), errmsg("invalid storage option \"%s\"", name))); } t = cstring_to_text(buf.data); accumArrayResult(astate, PointerGetDatum(t), /* disnull */ false, TEXTOID, CurrentMemoryContext); pfree(t); pfree(buf.data); }
/* * InternalIpcMemoryCreate(memKey, size) * * Attempt to create a new shared memory segment with the specified key. * Will fail (return NULL) if such a segment already exists. If successful, * attach the segment to the current process and return its attached address. * On success, callbacks are registered with on_shmem_exit to detach and * delete the segment when on_shmem_exit is called. * * If we fail with a failure code other than collision-with-existing-segment, * print out an error and abort. Other types of errors are not recoverable. */ static void * InternalIpcMemoryCreate(IpcMemoryKey memKey, Size size) { IpcMemoryId shmid; void *memAddress; shmid = shmget(memKey, size, IPC_CREAT | IPC_EXCL | IPCProtection); if (shmid < 0) { int shmget_errno = errno; /* * Fail quietly if error indicates a collision with existing segment. * One would expect EEXIST, given that we said IPC_EXCL, but perhaps * we could get a permission violation instead? Also, EIDRM might * occur if an old seg is slated for destruction but not gone yet. */ if (shmget_errno == EEXIST || shmget_errno == EACCES #ifdef EIDRM || shmget_errno == EIDRM #endif ) return NULL; /* * Some BSD-derived kernels are known to return EINVAL, not EEXIST, if * there is an existing segment but it's smaller than "size" (this is * a result of poorly-thought-out ordering of error tests). To * distinguish between collision and invalid size in such cases, we * make a second try with size = 0. These kernels do not test size * against SHMMIN in the preexisting-segment case, so we will not get * EINVAL a second time if there is such a segment. */ if (shmget_errno == EINVAL) { shmid = shmget(memKey, 0, IPC_CREAT | IPC_EXCL | IPCProtection); if (shmid < 0) { /* As above, fail quietly if we verify a collision */ if (errno == EEXIST || errno == EACCES #ifdef EIDRM || errno == EIDRM #endif ) return NULL; /* Otherwise, fall through to report the original error */ } else { /* * On most platforms we cannot get here because SHMMIN is * greater than zero. However, if we do succeed in creating a * zero-size segment, free it and then fall through to report * the original error. */ if (shmctl(shmid, IPC_RMID, NULL) < 0) elog(LOG, "shmctl(%d, %d, 0) failed: %m", (int) shmid, IPC_RMID); } } /* * Else complain and abort. * * Note: at this point EINVAL should mean that either SHMMIN or SHMMAX * is violated. SHMALL violation might be reported as either ENOMEM * (BSDen) or ENOSPC (Linux); the Single Unix Spec fails to say which * it should be. SHMMNI violation is ENOSPC, per spec. Just plain * not-enough-RAM is ENOMEM. */ errno = shmget_errno; ereport(FATAL, (errmsg("could not create shared memory segment: %m"), errdetail("Failed system call was shmget(key=%lu, size=%zu, 0%o).", (unsigned long) memKey, size, IPC_CREAT | IPC_EXCL | IPCProtection), (shmget_errno == EINVAL) ? errhint("This error usually means that PostgreSQL's request for a shared memory " "segment exceeded your kernel's SHMMAX parameter, or possibly that " "it is less than " "your kernel's SHMMIN parameter.\n" "The PostgreSQL documentation contains more information about shared " "memory configuration.") : 0, (shmget_errno == ENOMEM) ? errhint("This error usually means that PostgreSQL's request for a shared " "memory segment exceeded your kernel's SHMALL parameter. You might need " "to reconfigure the kernel with larger SHMALL.\n" "The PostgreSQL documentation contains more information about shared " "memory configuration.") : 0, (shmget_errno == ENOSPC) ? errhint("This error does *not* mean that you have run out of disk space. " "It occurs either if all available shared memory IDs have been taken, " "in which case you need to raise the SHMMNI parameter in your kernel, " "or because the system's overall limit for shared memory has been " "reached.\n" "The PostgreSQL documentation contains more information about shared " "memory configuration.") : 0)); } /* Register on-exit routine to delete the new segment */ on_shmem_exit(IpcMemoryDelete, Int32GetDatum(shmid)); /* OK, should be able to attach to the segment */ memAddress = shmat(shmid, NULL, PG_SHMAT_FLAGS); if (memAddress == (void *) -1) elog(FATAL, "shmat(id=%d) failed: %m", shmid); /* Register on-exit routine to detach new segment before deleting */ on_shmem_exit(IpcMemoryDetach, PointerGetDatum(memAddress)); /* * Store shmem key and ID in data directory lockfile. Format to try to * keep it the same length always (trailing junk in the lockfile won't * hurt, but might confuse humans). */ { char line[64]; sprintf(line, "%9lu %9lu", (unsigned long) memKey, (unsigned long) shmid); AddToDataDirLockFile(LOCK_FILE_LINE_SHMEM_KEY, line); } return memAddress; }
/* * LargeObjectAlterOwner * * Implementation of ALTER LARGE OBJECT statement */ void LargeObjectAlterOwner(Oid loid, Oid newOwnerId) { Form_pg_largeobject_metadata form_lo_meta; Relation pg_lo_meta; ScanKeyData skey[1]; SysScanDesc scan; HeapTuple oldtup; HeapTuple newtup; pg_lo_meta = heap_open(LargeObjectMetadataRelationId, RowExclusiveLock); ScanKeyInit(&skey[0], ObjectIdAttributeNumber, BTEqualStrategyNumber, F_OIDEQ, ObjectIdGetDatum(loid)); scan = systable_beginscan(pg_lo_meta, LargeObjectMetadataOidIndexId, true, SnapshotNow, 1, skey); oldtup = systable_getnext(scan); if (!HeapTupleIsValid(oldtup)) ereport(ERROR, (errcode(ERRCODE_UNDEFINED_OBJECT), errmsg("large object %u does not exist", loid))); form_lo_meta = (Form_pg_largeobject_metadata) GETSTRUCT(oldtup); if (form_lo_meta->lomowner != newOwnerId) { Datum values[Natts_pg_largeobject_metadata]; bool nulls[Natts_pg_largeobject_metadata]; bool replaces[Natts_pg_largeobject_metadata]; Acl *newAcl; Datum aclDatum; bool isnull; /* Superusers can always do it */ if (!superuser()) { /* * lo_compat_privileges is not checked here, because ALTER LARGE * OBJECT ... OWNER did not exist at all prior to PostgreSQL 9.0. * * We must be the owner of the existing object. */ if (!pg_largeobject_ownercheck(loid, GetUserId())) ereport(ERROR, (errcode(ERRCODE_INSUFFICIENT_PRIVILEGE), errmsg("must be owner of large object %u", loid))); /* Must be able to become new owner */ check_is_member_of_role(GetUserId(), newOwnerId); } memset(values, 0, sizeof(values)); memset(nulls, false, sizeof(nulls)); memset(replaces, false, sizeof(nulls)); values[Anum_pg_largeobject_metadata_lomowner - 1] = ObjectIdGetDatum(newOwnerId); replaces[Anum_pg_largeobject_metadata_lomowner - 1] = true; /* * Determine the modified ACL for the new owner. This is only * necessary when the ACL is non-null. */ aclDatum = heap_getattr(oldtup, Anum_pg_largeobject_metadata_lomacl, RelationGetDescr(pg_lo_meta), &isnull); if (!isnull) { newAcl = aclnewowner(DatumGetAclP(aclDatum), form_lo_meta->lomowner, newOwnerId); values[Anum_pg_largeobject_metadata_lomacl - 1] = PointerGetDatum(newAcl); replaces[Anum_pg_largeobject_metadata_lomacl - 1] = true; } newtup = heap_modify_tuple(oldtup, RelationGetDescr(pg_lo_meta), values, nulls, replaces); simple_heap_update(pg_lo_meta, &newtup->t_self, newtup); CatalogUpdateIndexes(pg_lo_meta, newtup); heap_freetuple(newtup); /* Update owner dependency reference */ changeDependencyOnOwner(LargeObjectRelationId, loid, newOwnerId); } systable_endscan(scan); heap_close(pg_lo_meta, RowExclusiveLock); }
void inv_truncate(LargeObjectDesc *obj_desc, int64 len) { int32 pageno = (int32) (len / LOBLKSIZE); int32 off; ScanKeyData skey[2]; SysScanDesc sd; HeapTuple oldtuple; Form_pg_largeobject olddata; struct { bytea hdr; char data[LOBLKSIZE]; /* make struct big enough */ int32 align_it; /* ensure struct is aligned well enough */ } workbuf; char *workb = VARDATA(&workbuf.hdr); HeapTuple newtup; Datum values[Natts_pg_largeobject]; bool nulls[Natts_pg_largeobject]; bool replace[Natts_pg_largeobject]; CatalogIndexState indstate; Assert(PointerIsValid(obj_desc)); /* enforce writability because snapshot is probably wrong otherwise */ Assert(obj_desc->flags & IFS_WRLOCK); /* * use errmsg_internal here because we don't want to expose INT64_FORMAT * in translatable strings; doing better is not worth the trouble */ if (len < 0 || len > MAX_LARGE_OBJECT_SIZE) ereport(ERROR, (errcode(ERRCODE_INVALID_PARAMETER_VALUE), errmsg_internal("invalid large object truncation target: " INT64_FORMAT, len))); open_lo_relation(); indstate = CatalogOpenIndexes(lo_heap_r); /* * Set up to find all pages with desired loid and pageno >= target */ ScanKeyInit(&skey[0], Anum_pg_largeobject_loid, BTEqualStrategyNumber, F_OIDEQ, ObjectIdGetDatum(obj_desc->id)); ScanKeyInit(&skey[1], Anum_pg_largeobject_pageno, BTGreaterEqualStrategyNumber, F_INT4GE, Int32GetDatum(pageno)); sd = systable_beginscan_ordered(lo_heap_r, lo_index_r, obj_desc->snapshot, 2, skey); /* * If possible, get the page the truncation point is in. The truncation * point may be beyond the end of the LO or in a hole. */ olddata = NULL; if ((oldtuple = systable_getnext_ordered(sd, ForwardScanDirection)) != NULL) { if (HeapTupleHasNulls(oldtuple)) /* paranoia */ elog(ERROR, "null field found in pg_largeobject"); olddata = (Form_pg_largeobject) GETSTRUCT(oldtuple); Assert(olddata->pageno >= pageno); } /* * If we found the page of the truncation point we need to truncate the * data in it. Otherwise if we're in a hole, we need to create a page to * mark the end of data. */ if (olddata != NULL && olddata->pageno == pageno) { /* First, load old data into workbuf */ bytea *datafield = &(olddata->data); /* see note at top of * file */ bool pfreeit = false; int pagelen; if (VARATT_IS_EXTENDED(datafield)) { datafield = (bytea *) heap_tuple_untoast_attr((struct varlena *) datafield); pfreeit = true; } pagelen = getbytealen(datafield); Assert(pagelen <= LOBLKSIZE); memcpy(workb, VARDATA(datafield), pagelen); if (pfreeit) pfree(datafield); /* * Fill any hole */ off = len % LOBLKSIZE; if (off > pagelen) MemSet(workb + pagelen, 0, off - pagelen); /* compute length of new page */ SET_VARSIZE(&workbuf.hdr, off + VARHDRSZ); /* * Form and insert updated tuple */ memset(values, 0, sizeof(values)); memset(nulls, false, sizeof(nulls)); memset(replace, false, sizeof(replace)); values[Anum_pg_largeobject_data - 1] = PointerGetDatum(&workbuf); replace[Anum_pg_largeobject_data - 1] = true; newtup = heap_modify_tuple(oldtuple, RelationGetDescr(lo_heap_r), values, nulls, replace); simple_heap_update(lo_heap_r, &newtup->t_self, newtup); CatalogIndexInsert(indstate, newtup); heap_freetuple(newtup); } else { /* * If the first page we found was after the truncation point, we're in * a hole that we'll fill, but we need to delete the later page * because the loop below won't visit it again. */ if (olddata != NULL) { Assert(olddata->pageno > pageno); simple_heap_delete(lo_heap_r, &oldtuple->t_self); } /* * Write a brand new page. * * Fill the hole up to the truncation point */ off = len % LOBLKSIZE; if (off > 0) MemSet(workb, 0, off); /* compute length of new page */ SET_VARSIZE(&workbuf.hdr, off + VARHDRSZ); /* * Form and insert new tuple */ memset(values, 0, sizeof(values)); memset(nulls, false, sizeof(nulls)); values[Anum_pg_largeobject_loid - 1] = ObjectIdGetDatum(obj_desc->id); values[Anum_pg_largeobject_pageno - 1] = Int32GetDatum(pageno); values[Anum_pg_largeobject_data - 1] = PointerGetDatum(&workbuf); newtup = heap_form_tuple(lo_heap_r->rd_att, values, nulls); simple_heap_insert(lo_heap_r, newtup); CatalogIndexInsert(indstate, newtup); heap_freetuple(newtup); } /* * Delete any pages after the truncation point. If the initial search * didn't find a page, then of course there's nothing more to do. */ if (olddata != NULL) { while ((oldtuple = systable_getnext_ordered(sd, ForwardScanDirection)) != NULL) { simple_heap_delete(lo_heap_r, &oldtuple->t_self); } } systable_endscan_ordered(sd); CatalogCloseIndexes(indstate); /* * Advance command counter so that tuple updates will be seen by later * large-object operations in this transaction. */ CommandCounterIncrement(); }
static void hashJsQuery(char *base, int32 pos, pg_crc32 *crc) { int32 type; int32 nextPos; check_stack_depth(); pos = readJsQueryHeader(base, pos, &type, &nextPos); COMP_CRC32(*crc, &type, sizeof(type)); switch(type) { case jqiNull: COMP_CRC32(*crc, "null", 5); break; case jqiKey: case jqiString: { int32 len; read_int32(len, base, pos); if (type == jqiKey) len++; /* include trailing '\0' */ COMP_CRC32(*crc, base + pos, len); } break; case jqiNumeric: *crc ^= (pg_crc32)DatumGetInt32(DirectFunctionCall1( hash_numeric, PointerGetDatum((Numeric)(base + pos)))); break; case jqiBool: { bool v; read_byte(v, base, pos); COMP_CRC32(*crc, &v, 1); } break; case jqiArray: { int32 i, nelems, *arrayPos; read_int32(nelems, base, pos); arrayPos = (int32*)(base + pos); COMP_CRC32(*crc, &nelems, sizeof(nelems)); for(i=0; i<nelems; i++) hashJsQuery(base, arrayPos[i], crc); } break; case jqiAnd: case jqiOr: { int32 left, right; read_int32(left, base, pos); read_int32(right, base, pos); hashJsQuery(base, left, crc); hashJsQuery(base, right, crc); } break; case jqiNot: case jqiEqual: case jqiIn: case jqiLess: case jqiGreater: case jqiLessOrEqual: case jqiGreaterOrEqual: case jqiContains: case jqiContained: case jqiOverlap: { int32 arg; read_int32(arg, base, pos); hashJsQuery(base, arg, crc); } break; case jqiAny: case jqiAnyArray: break; default: elog(ERROR, "Unknown JsQueryItem type: %d", type); } }
/* * compute_tsvector_stats() -- compute statistics for a tsvector column * * This functions computes statistics that are useful for determining @@ * operations' selectivity, along with the fraction of non-null rows and * average width. * * Instead of finding the most common values, as we do for most datatypes, * we're looking for the most common lexemes. This is more useful, because * there most probably won't be any two rows with the same tsvector and thus * the notion of a MCV is a bit bogus with this datatype. With a list of the * most common lexemes we can do a better job at figuring out @@ selectivity. * * For the same reasons we assume that tsvector columns are unique when * determining the number of distinct values. * * The algorithm used is Lossy Counting, as proposed in the paper "Approximate * frequency counts over data streams" by G. S. Manku and R. Motwani, in * Proceedings of the 28th International Conference on Very Large Data Bases, * Hong Kong, China, August 2002, section 4.2. The paper is available at * http://www.vldb.org/conf/2002/S10P03.pdf * * The Lossy Counting (aka LC) algorithm goes like this: * Let s be the threshold frequency for an item (the minimum frequency we * are interested in) and epsilon the error margin for the frequency. Let D * be a set of triples (e, f, delta), where e is an element value, f is that * element's frequency (actually, its current occurrence count) and delta is * the maximum error in f. We start with D empty and process the elements in * batches of size w. (The batch size is also known as "bucket size" and is * equal to 1/epsilon.) Let the current batch number be b_current, starting * with 1. For each element e we either increment its f count, if it's * already in D, or insert a new triple into D with values (e, 1, b_current * - 1). After processing each batch we prune D, by removing from it all * elements with f + delta <= b_current. After the algorithm finishes we * suppress all elements from D that do not satisfy f >= (s - epsilon) * N, * where N is the total number of elements in the input. We emit the * remaining elements with estimated frequency f/N. The LC paper proves * that this algorithm finds all elements with true frequency at least s, * and that no frequency is overestimated or is underestimated by more than * epsilon. Furthermore, given reasonable assumptions about the input * distribution, the required table size is no more than about 7 times w. * * We set s to be the estimated frequency of the K'th word in a natural * language's frequency table, where K is the target number of entries in * the MCELEM array plus an arbitrary constant, meant to reflect the fact * that the most common words in any language would usually be stopwords * so we will not actually see them in the input. We assume that the * distribution of word frequencies (including the stopwords) follows Zipf's * law with an exponent of 1. * * Assuming Zipfian distribution, the frequency of the K'th word is equal * to 1/(K * H(W)) where H(n) is 1/2 + 1/3 + ... + 1/n and W is the number of * words in the language. Putting W as one million, we get roughly 0.07/K. * Assuming top 10 words are stopwords gives s = 0.07/(K + 10). We set * epsilon = s/10, which gives bucket width w = (K + 10)/0.007 and * maximum expected hashtable size of about 1000 * (K + 10). * * Note: in the above discussion, s, epsilon, and f/N are in terms of a * lexeme's frequency as a fraction of all lexemes seen in the input. * However, what we actually want to store in the finished pg_statistic * entry is each lexeme's frequency as a fraction of all rows that it occurs * in. Assuming that the input tsvectors are correctly constructed, no * lexeme occurs more than once per tsvector, so the final count f is a * correct estimate of the number of input tsvectors it occurs in, and we * need only change the divisor from N to nonnull_cnt to get the number we * want. */ static void compute_tsvector_stats(VacAttrStats *stats, AnalyzeAttrFetchFunc fetchfunc, int samplerows, double totalrows) { int num_mcelem; int null_cnt = 0; double total_width = 0; /* This is D from the LC algorithm. */ HTAB *lexemes_tab; HASHCTL hash_ctl; HASH_SEQ_STATUS scan_status; /* This is the current bucket number from the LC algorithm */ int b_current; /* This is 'w' from the LC algorithm */ int bucket_width; int vector_no, lexeme_no; LexemeHashKey hash_key; TrackItem *item; /* * We want statistics_target * 10 lexemes in the MCELEM array. This * multiplier is pretty arbitrary, but is meant to reflect the fact that * the number of individual lexeme values tracked in pg_statistic ought to * be more than the number of values for a simple scalar column. */ num_mcelem = stats->attr->attstattarget * 10; /* * We set bucket width equal to (num_mcelem + 10) / 0.007 as per the * comment above. */ bucket_width = (num_mcelem + 10) * 1000 / 7; /* * Create the hashtable. It will be in local memory, so we don't need to * worry about overflowing the initial size. Also we don't need to pay any * attention to locking and memory management. */ MemSet(&hash_ctl, 0, sizeof(hash_ctl)); hash_ctl.keysize = sizeof(LexemeHashKey); hash_ctl.entrysize = sizeof(TrackItem); hash_ctl.hash = lexeme_hash; hash_ctl.match = lexeme_match; hash_ctl.hcxt = CurrentMemoryContext; lexemes_tab = hash_create("Analyzed lexemes table", num_mcelem, &hash_ctl, HASH_ELEM | HASH_FUNCTION | HASH_COMPARE | HASH_CONTEXT); /* Initialize counters. */ b_current = 1; lexeme_no = 0; /* Loop over the tsvectors. */ for (vector_no = 0; vector_no < samplerows; vector_no++) { Datum value; bool isnull; TSVector vector; WordEntry *curentryptr; char *lexemesptr; int j; vacuum_delay_point(); value = fetchfunc(stats, vector_no, &isnull); /* * Check for null/nonnull. */ if (isnull) { null_cnt++; continue; } /* * Add up widths for average-width calculation. Since it's a * tsvector, we know it's varlena. As in the regular * compute_minimal_stats function, we use the toasted width for this * calculation. */ total_width += VARSIZE_ANY(DatumGetPointer(value)); /* * Now detoast the tsvector if needed. */ vector = DatumGetTSVector(value); /* * We loop through the lexemes in the tsvector and add them to our * tracking hashtable. Note: the hashtable entries will point into * the (detoasted) tsvector value, therefore we cannot free that * storage until we're done. */ lexemesptr = STRPTR(vector); curentryptr = ARRPTR(vector); for (j = 0; j < vector->size; j++) { bool found; /* Construct a hash key */ hash_key.lexeme = lexemesptr + curentryptr->pos; hash_key.length = curentryptr->len; /* Lookup current lexeme in hashtable, adding it if new */ item = (TrackItem *) hash_search(lexemes_tab, (const void *) &hash_key, HASH_ENTER, &found); if (found) { /* The lexeme is already on the tracking list */ item->frequency++; } else { /* Initialize new tracking list element */ item->frequency = 1; item->delta = b_current - 1; } /* lexeme_no is the number of elements processed (ie N) */ lexeme_no++; /* We prune the D structure after processing each bucket */ if (lexeme_no % bucket_width == 0) { prune_lexemes_hashtable(lexemes_tab, b_current); b_current++; } /* Advance to the next WordEntry in the tsvector */ curentryptr++; } } /* We can only compute real stats if we found some non-null values. */ if (null_cnt < samplerows) { int nonnull_cnt = samplerows - null_cnt; int i; TrackItem **sort_table; int track_len; int cutoff_freq; int minfreq, maxfreq; stats->stats_valid = true; /* Do the simple null-frac and average width stats */ stats->stanullfrac = (double) null_cnt / (double) samplerows; stats->stawidth = total_width / (double) nonnull_cnt; /* Assume it's a unique column (see notes above) */ stats->stadistinct = -1.0 * (1.0 - stats->stanullfrac); /* * Construct an array of the interesting hashtable items, that is, * those meeting the cutoff frequency (s - epsilon)*N. Also identify * the minimum and maximum frequencies among these items. * * Since epsilon = s/10 and bucket_width = 1/epsilon, the cutoff * frequency is 9*N / bucket_width. */ cutoff_freq = 9 * lexeme_no / bucket_width; i = hash_get_num_entries(lexemes_tab); /* surely enough space */ sort_table = (TrackItem **) palloc(sizeof(TrackItem *) * i); hash_seq_init(&scan_status, lexemes_tab); track_len = 0; minfreq = lexeme_no; maxfreq = 0; while ((item = (TrackItem *) hash_seq_search(&scan_status)) != NULL) { if (item->frequency > cutoff_freq) { sort_table[track_len++] = item; minfreq = Min(minfreq, item->frequency); maxfreq = Max(maxfreq, item->frequency); } } Assert(track_len <= i); /* emit some statistics for debug purposes */ elog(DEBUG3, "tsvector_stats: target # mces = %d, bucket width = %d, " "# lexemes = %d, hashtable size = %d, usable entries = %d", num_mcelem, bucket_width, lexeme_no, i, track_len); /* * If we obtained more lexemes than we really want, get rid of those * with least frequencies. The easiest way is to qsort the array into * descending frequency order and truncate the array. */ if (num_mcelem < track_len) { qsort(sort_table, track_len, sizeof(TrackItem *), trackitem_compare_frequencies_desc); /* reset minfreq to the smallest frequency we're keeping */ minfreq = sort_table[num_mcelem - 1]->frequency; } else num_mcelem = track_len; /* Generate MCELEM slot entry */ if (num_mcelem > 0) { MemoryContext old_context; Datum *mcelem_values; float4 *mcelem_freqs; /* * We want to store statistics sorted on the lexeme value using * first length, then byte-for-byte comparison. The reason for * doing length comparison first is that we don't care about the * ordering so long as it's consistent, and comparing lengths * first gives us a chance to avoid a strncmp() call. * * This is different from what we do with scalar statistics -- * they get sorted on frequencies. The rationale is that we * usually search through most common elements looking for a * specific value, so we can grab its frequency. When values are * presorted we can employ binary search for that. See * ts_selfuncs.c for a real usage scenario. */ qsort(sort_table, num_mcelem, sizeof(TrackItem *), trackitem_compare_lexemes); /* Must copy the target values into anl_context */ old_context = MemoryContextSwitchTo(stats->anl_context); /* * We sorted statistics on the lexeme value, but we want to be * able to find out the minimal and maximal frequency without * going through all the values. We keep those two extra * frequencies in two extra cells in mcelem_freqs. * * (Note: the MCELEM statistics slot definition allows for a third * extra number containing the frequency of nulls, but we don't * create that for a tsvector column, since null elements aren't * possible.) */ mcelem_values = (Datum *) palloc(num_mcelem * sizeof(Datum)); mcelem_freqs = (float4 *) palloc((num_mcelem + 2) * sizeof(float4)); /* * See comments above about use of nonnull_cnt as the divisor for * the final frequency estimates. */ for (i = 0; i < num_mcelem; i++) { TrackItem *item = sort_table[i]; mcelem_values[i] = PointerGetDatum(cstring_to_text_with_len(item->key.lexeme, item->key.length)); mcelem_freqs[i] = (double) item->frequency / (double) nonnull_cnt; } mcelem_freqs[i++] = (double) minfreq / (double) nonnull_cnt; mcelem_freqs[i] = (double) maxfreq / (double) nonnull_cnt; MemoryContextSwitchTo(old_context); stats->stakind[0] = STATISTIC_KIND_MCELEM; stats->staop[0] = TextEqualOperator; stats->stanumbers[0] = mcelem_freqs; /* See above comment about two extra frequency fields */ stats->numnumbers[0] = num_mcelem + 2; stats->stavalues[0] = mcelem_values; stats->numvalues[0] = num_mcelem; /* We are storing text values */ stats->statypid[0] = TEXTOID; stats->statyplen[0] = -1; /* typlen, -1 for varlena */ stats->statypbyval[0] = false; stats->statypalign[0] = 'i'; } } else { /* We found only nulls; assume the column is entirely null */ stats->stats_valid = true; stats->stanullfrac = 1.0; stats->stawidth = 0; /* "unknown" */ stats->stadistinct = 0.0; /* "unknown" */ } /* * We don't need to bother cleaning up any of our temporary palloc's. The * hashtable should also go away, as it used a child memory context. */ }
/* * If CREATE/SET, add new options to array; if RESET, just check that the * user didn't say RESET (option=val). (Must do this because the grammar * doesn't enforce it.) */ foreach(cell, defList) { DefElem *def = (DefElem *) lfirst(cell); if (isReset) { if (def->arg != NULL) ereport(ERROR, (errcode(ERRCODE_SYNTAX_ERROR), errmsg("RESET must not include values for parameters"))); } else { text *t; const char *value; Size len; /* * Error out if the namespace is not valid. A NULL namespace is * always valid. */ if (def->defnamespace != NULL) { bool valid = false; int i; if (validnsps) { for (i = 0; validnsps[i]; i++) { if (pg_strcasecmp(def->defnamespace, validnsps[i]) == 0) { valid = true; break; } } } if (!valid) ereport(ERROR, (errcode(ERRCODE_INVALID_PARAMETER_VALUE), errmsg("unrecognized parameter namespace \"%s\"", def->defnamespace))); } if (ignoreOids && pg_strcasecmp(def->defname, "oids") == 0) continue; /* ignore if not in the same namespace */ if (namspace == NULL) { if (def->defnamespace != NULL) continue; } else if (def->defnamespace == NULL) continue; else if (pg_strcasecmp(def->defnamespace, namspace) != 0) continue; /* * Flatten the DefElem into a text string like "name=arg". If we * have just "name", assume "name=true" is meant. Note: the * namespace is not output. */ if (def->arg != NULL) value = defGetString(def); else value = "true"; len = VARHDRSZ + strlen(def->defname) + 1 + strlen(value); /* +1 leaves room for sprintf's trailing null */ t = (text *) palloc(len + 1); SET_VARSIZE(t, len); sprintf(VARDATA(t), "%s=%s", def->defname, value); astate = accumArrayResult(astate, PointerGetDatum(t), false, TEXTOID, CurrentMemoryContext); } }